diff options
Diffstat (limited to 'net')
448 files changed, 11286 insertions, 6479 deletions
diff --git a/net/atm/pppoatm.c b/net/atm/pppoatm.c index 2574aae3e066..e3c422dc533a 100644 --- a/net/atm/pppoatm.c +++ b/net/atm/pppoatm.c @@ -228,7 +228,7 @@ static void pppoatm_push(struct atm_vcc *atmvcc, struct sk_buff *skb) error: kfree_skb(skb); - ppp_input_error(&pvcc->chan, 0); + ppp_input_error(&pvcc->chan); } static int pppoatm_may_send(struct pppoatm_vcc *pvcc, int size) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 11d3ad8d2551..3a0592599086 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -3083,6 +3083,7 @@ static int abort_conn_sync(struct hci_dev *hdev, void *data) int hci_abort_conn(struct hci_conn *conn, u8 reason) { struct hci_dev *hdev = conn->hdev; + int err; /* If abort_reason has already been set it means the connection is * already being aborted so don't attempt to overwrite it. @@ -3119,7 +3120,8 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) * as a result to MGMT_OP_DISCONNECT/MGMT_OP_UNPAIR which does * already queue its callback on cmd_sync_work. */ - return hci_cmd_sync_run_once(hdev, abort_conn_sync, conn, NULL); + err = hci_cmd_sync_run_once(hdev, abort_conn_sync, conn, NULL); + return (err == -EEXIST) ? 0 : err; } void hci_setup_tx_timestamp(struct sk_buff *skb, size_t key_offset, diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 01f8ceeb1c0c..c46c1236ebfa 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -3917,8 +3917,8 @@ static void hci_isodata_packet(struct hci_dev *hdev, struct sk_buff *skb) err = iso_recv(hdev, handle, skb, flags); if (err == -ENOENT) - bt_dev_err(hdev, "ISO packet for unknown connection handle %d", - handle); + bt_dev_err_ratelimited(hdev, "ISO packet for unknown connection handle %d", + handle); else if (err) bt_dev_dbg(hdev, "ISO packet recv for handle %d failed: %d", handle, err); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 3ebc5e6d45d9..b2ee6b6a0f56 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -3340,8 +3340,6 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, memcpy(conn->dev_class, ev->dev_class, 3); - hci_dev_unlock(hdev); - if (ev->link_type == ACL_LINK || (!(flags & HCI_PROTO_DEFER) && !lmp_esco_capable(hdev))) { struct hci_cp_accept_conn_req cp; @@ -3375,7 +3373,6 @@ static void hci_conn_request_evt(struct hci_dev *hdev, void *data, hci_connect_cfm(conn, 0); } - return; unlock: hci_dev_unlock(hdev); } @@ -5498,9 +5495,11 @@ static void hci_user_passkey_notify_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, ""); + hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); if (!conn) - return; + goto unlock; conn->passkey_notify = __le32_to_cpu(ev->passkey); conn->passkey_entered = 0; @@ -5509,6 +5508,9 @@ static void hci_user_passkey_notify_evt(struct hci_dev *hdev, void *data, mgmt_user_passkey_notify(hdev, &conn->dst, conn->type, conn->dst_type, conn->passkey_notify, conn->passkey_entered); + +unlock: + hci_dev_unlock(hdev); } static void hci_keypress_notify_evt(struct hci_dev *hdev, void *data, @@ -5519,14 +5521,16 @@ static void hci_keypress_notify_evt(struct hci_dev *hdev, void *data, bt_dev_dbg(hdev, ""); + hci_dev_lock(hdev); + conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr); if (!conn) - return; + goto unlock; switch (ev->type) { case HCI_KEYPRESS_STARTED: conn->passkey_entered = 0; - return; + goto unlock; case HCI_KEYPRESS_ENTERED: conn->passkey_entered++; @@ -5541,13 +5545,16 @@ static void hci_keypress_notify_evt(struct hci_dev *hdev, void *data, break; case HCI_KEYPRESS_COMPLETED: - return; + goto unlock; } if (hci_dev_test_flag(hdev, HCI_MGMT)) mgmt_user_passkey_notify(hdev, &conn->dst, conn->type, conn->dst_type, conn->passkey_notify, conn->passkey_entered); + +unlock: + hci_dev_unlock(hdev); } static void hci_simple_pair_complete_evt(struct hci_dev *hdev, void *data, diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 919ec275dd23..fd3aacdea512 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -825,7 +825,7 @@ int hci_cmd_sync_run_once(struct hci_dev *hdev, hci_cmd_sync_work_func_t func, void *data, hci_cmd_sync_work_destroy_t destroy) { if (hci_cmd_sync_lookup_entry(hdev, func, data, destroy)) - return 0; + return -EEXIST; return hci_cmd_sync_run(hdev, func, data, destroy); } diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 95c65fece39b..77dec104a9c3 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -5473,7 +5473,13 @@ static inline int l2cap_ecred_reconf_rsp(struct l2cap_conn *conn, if (chan->ident != cmd->ident) continue; + l2cap_chan_hold(chan); + l2cap_chan_lock(chan); + l2cap_chan_del(chan, ECONNRESET); + + l2cap_chan_unlock(chan); + l2cap_chan_put(chan); } return 0; @@ -6705,6 +6711,13 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) return -ENOBUFS; } + if (skb->len > chan->mps) { + BT_ERR("Too big LE L2CAP MPS: len %u > %u", skb->len, + chan->mps); + l2cap_send_disconn_req(chan, ECONNRESET); + return -ENOBUFS; + } + chan->rx_credits--; BT_DBG("chan %p: rx_credits %u -> %u", chan, chan->rx_credits + 1, chan->rx_credits); @@ -6733,7 +6746,7 @@ static int l2cap_ecred_data_rcv(struct l2cap_chan *chan, struct sk_buff *skb) if (sdu_len > chan->imtu) { BT_ERR("Too big LE L2CAP SDU length: len %u > %u", - skb->len, sdu_len); + sdu_len, chan->imtu); l2cap_send_disconn_req(chan, ECONNRESET); err = -EMSGSIZE; goto failed; diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index b84587811ef4..18826d4b9c0b 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -1045,7 +1045,8 @@ static int sco_sock_setsockopt(struct socket *sock, int level, int optname, codecs = (void *)buffer; - if (codecs->num_codecs > 1) { + if (codecs->num_codecs != 1 || + optlen < struct_size(codecs, codecs, codecs->num_codecs)) { hci_dev_put(hdev); err = -EINVAL; break; diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig index 3c8ded7d3e84..318715c8fc9b 100644 --- a/net/bridge/Kconfig +++ b/net/bridge/Kconfig @@ -7,7 +7,6 @@ config BRIDGE tristate "802.1d Ethernet Bridging" select LLC select STP - depends on IPV6 || IPV6=n help If you say Y here, then your Linux box will be able to act as an Ethernet bridge, which means that the different Ethernet segments it diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c index 6b5595868a39..0c8a06cdd46f 100644 --- a/net/bridge/br_arp_nd_proxy.c +++ b/net/bridge/br_arp_nd_proxy.c @@ -17,7 +17,6 @@ #include <linux/if_vlan.h> #include <linux/inetdevice.h> #include <net/addrconf.h> -#include <net/ipv6_stubs.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ip6_checksum.h> #endif @@ -459,7 +458,7 @@ void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, return; } - n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, vlandev); + n = neigh_lookup(&nd_tbl, &msg->target, vlandev); if (n) { struct net_bridge_fdb_entry *f; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index f7502e62dd35..a35ceae0a6f2 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -518,6 +518,7 @@ void br_dev_setup(struct net_device *dev) ether_addr_copy(br->group_addr, eth_stp_addr); br->stp_enabled = BR_NO_STP; + br->stp_mode = BR_STP_MODE_AUTO; br->group_fwd_mask = BR_GROUPFWD_DEFAULT; br->group_fwd_mask_required = BR_GROUPFWD_DEFAULT; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 083e2fe96441..0ab1c94db4b9 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -32,6 +32,7 @@ #include <net/ip.h> #include <net/ipv6.h> +#include <net/ip6_route.h> #include <net/addrconf.h> #include <net/dst_metadata.h> #include <net/route.h> @@ -890,7 +891,6 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff } if (IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) && skb->protocol == htons(ETH_P_IPV6)) { - const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); struct brnf_frag_data *data; if (br_validate_ipv6(net, skb)) @@ -906,15 +906,9 @@ static int br_nf_dev_queue_xmit(struct net *net, struct sock *sk, struct sk_buff skb_copy_from_linear_data_offset(skb, -data->size, data->mac, data->size); - if (v6ops) { - ret = v6ops->fragment(net, sk, skb, br_nf_push_frag_xmit); - local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock); - return ret; - } + ret = ip6_fragment(net, sk, skb, br_nf_push_frag_xmit); local_unlock_nested_bh(&brnf_frag_data_storage.bh_lock); - - kfree_skb(skb); - return -EMSGSIZE; + return ret; } nf_bridge_info_free(skb); return br_dev_queue_push_xmit(net, sk, skb); diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index 76ce70b4e7f3..d8548428929e 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -30,6 +30,7 @@ #include <net/ip.h> #include <net/ipv6.h> +#include <net/ip6_route.h> #include <net/addrconf.h> #include <net/route.h> #include <net/netfilter/br_netfilter.h> @@ -95,15 +96,13 @@ br_nf_ipv6_daddr_was_changed(const struct sk_buff *skb, /* PF_BRIDGE/PRE_ROUTING: Undo the changes made for ip6tables * PREROUTING and continue the bridge PRE_ROUTING hook. See comment - * for br_nf_pre_routing_finish(), same logic is used here but - * equivalent IPv6 function ip6_route_input() called indirectly. + * for br_nf_pre_routing_finish(), same logic is used here. */ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = nf_bridge_info_get(skb); struct rtable *rt; struct net_device *dev = skb->dev, *br_indev; - const struct nf_ipv6_ops *v6ops = nf_get_ipv6_ops(); br_indev = nf_bridge_get_physindev(skb, net); if (!br_indev) { @@ -120,7 +119,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc nf_bridge->in_prerouting = 0; if (br_nf_ipv6_daddr_was_changed(skb, nf_bridge)) { skb_dst_drop(skb); - v6ops->route_input(skb); + ip6_route_input(skb); if (skb_dst(skb)->error) { kfree_skb(skb); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 0264730938f4..6fd5386a1d64 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1270,6 +1270,9 @@ static const struct nla_policy br_policy[IFLA_BR_MAX + 1] = { NLA_POLICY_EXACT_LEN(sizeof(struct br_boolopt_multi)), [IFLA_BR_FDB_N_LEARNED] = { .type = NLA_REJECT }, [IFLA_BR_FDB_MAX_LEARNED] = { .type = NLA_U32 }, + [IFLA_BR_STP_MODE] = NLA_POLICY_RANGE(NLA_U32, + BR_STP_MODE_AUTO, + BR_STP_MODE_MAX), }; static int br_changelink(struct net_device *brdev, struct nlattr *tb[], @@ -1306,6 +1309,23 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[], return err; } + if (data[IFLA_BR_STP_MODE]) { + u32 mode = nla_get_u32(data[IFLA_BR_STP_MODE]); + + if (mode != br->stp_mode) { + bool stp_off = br->stp_enabled == BR_NO_STP || + (data[IFLA_BR_STP_STATE] && + !nla_get_u32(data[IFLA_BR_STP_STATE])); + + if (!stp_off) { + NL_SET_ERR_MSG_MOD(extack, + "Can't change STP mode while STP is enabled"); + return -EBUSY; + } + } + br->stp_mode = mode; + } + if (data[IFLA_BR_STP_STATE]) { u32 stp_enabled = nla_get_u32(data[IFLA_BR_STP_STATE]); @@ -1634,6 +1654,7 @@ static size_t br_get_size(const struct net_device *brdev) nla_total_size(sizeof(u8)) + /* IFLA_BR_NF_CALL_ARPTABLES */ #endif nla_total_size(sizeof(struct br_boolopt_multi)) + /* IFLA_BR_MULTI_BOOLOPT */ + nla_total_size(sizeof(u32)) + /* IFLA_BR_STP_MODE */ 0; } @@ -1686,7 +1707,8 @@ static int br_fill_info(struct sk_buff *skb, const struct net_device *brdev) nla_put(skb, IFLA_BR_MULTI_BOOLOPT, sizeof(bm), &bm) || nla_put_u32(skb, IFLA_BR_FDB_N_LEARNED, atomic_read(&br->fdb_n_learned)) || - nla_put_u32(skb, IFLA_BR_FDB_MAX_LEARNED, br->fdb_max_learned)) + nla_put_u32(skb, IFLA_BR_FDB_MAX_LEARNED, br->fdb_max_learned) || + nla_put_u32(skb, IFLA_BR_STP_MODE, br->stp_mode)) return -EMSGSIZE; #ifdef CONFIG_BRIDGE_VLAN_FILTERING diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 9b55d38ea9ed..361a9b84451e 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -182,6 +182,7 @@ enum { BR_VLFLAG_MCAST_ENABLED = BIT(2), BR_VLFLAG_GLOBAL_MCAST_ENABLED = BIT(3), BR_VLFLAG_NEIGH_SUPPRESS_ENABLED = BIT(4), + BR_VLFLAG_TAGGING_BY_SWITCHDEV = BIT(5), }; /** @@ -522,6 +523,8 @@ struct net_bridge { unsigned char topology_change; unsigned char topology_change_detected; u16 root_port; + u8 stp_mode; + bool stp_helper_active; unsigned long max_age; unsigned long hello_time; unsigned long forward_delay; @@ -2234,6 +2237,8 @@ void br_switchdev_mdb_notify(struct net_device *dev, int type); int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags, bool changed, struct netlink_ext_ack *extack); +int br_switchdev_port_vlan_no_foreign_add(struct net_device *dev, u16 vid, u16 flags, + bool changed, struct netlink_ext_ack *extack); int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid); void br_switchdev_init(struct net_bridge *br); @@ -2317,6 +2322,13 @@ static inline int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, return -EOPNOTSUPP; } +static inline int br_switchdev_port_vlan_no_foreign_add(struct net_device *dev, u16 vid, + u16 flags, bool changed, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + static inline int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid) { return -EOPNOTSUPP; diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index cc4b27ff1b08..28c1d3f7e22f 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -149,7 +149,9 @@ static void br_stp_start(struct net_bridge *br) { int err = -ENOENT; - if (net_eq(dev_net(br->dev), &init_net)) + /* AUTO mode: try bridge-stp helper in init_net only */ + if (br->stp_mode == BR_STP_MODE_AUTO && + net_eq(dev_net(br->dev), &init_net)) err = br_stp_call_user(br, "start"); if (err && err != -ENOENT) @@ -162,8 +164,9 @@ static void br_stp_start(struct net_bridge *br) else if (br->bridge_forward_delay > BR_MAX_FORWARD_DELAY) __br_set_forward_delay(br, BR_MAX_FORWARD_DELAY); - if (!err) { + if (br->stp_mode == BR_STP_MODE_USER || !err) { br->stp_enabled = BR_USER_STP; + br->stp_helper_active = !err; br_debug(br, "userspace STP started\n"); } else { br->stp_enabled = BR_KERNEL_STP; @@ -180,12 +183,14 @@ static void br_stp_start(struct net_bridge *br) static void br_stp_stop(struct net_bridge *br) { - int err; - if (br->stp_enabled == BR_USER_STP) { - err = br_stp_call_user(br, "stop"); - if (err) - br_err(br, "failed to stop userspace STP (%d)\n", err); + if (br->stp_helper_active) { + int err = br_stp_call_user(br, "stop"); + + if (err) + br_err(br, "failed to stop userspace STP (%d)\n", err); + br->stp_helper_active = false; + } /* To start timers on any ports left in blocking */ spin_lock_bh(&br->lock); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 4fac002922d2..18b558a931ad 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -190,6 +190,21 @@ int br_switchdev_port_vlan_add(struct net_device *dev, u16 vid, u16 flags, return switchdev_port_obj_add(dev, &v.obj, extack); } +int br_switchdev_port_vlan_no_foreign_add(struct net_device *dev, u16 vid, u16 flags, + bool changed, struct netlink_ext_ack *extack) +{ + struct switchdev_obj_port_vlan v = { + .obj.orig_dev = dev, + .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, + .obj.flags = SWITCHDEV_F_NO_FOREIGN, + .flags = flags, + .vid = vid, + .changed = changed, + }; + + return switchdev_port_obj_add(dev, &v.obj, extack); +} + int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid) { struct switchdev_obj_port_vlan v = { diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 326933b455b3..84a180927eb7 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -109,6 +109,11 @@ static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, /* Try switchdev op first. In case it is not supported, fallback to * 8021q add. */ + err = br_switchdev_port_vlan_no_foreign_add(dev, v->vid, flags, false, extack); + if (err != -EOPNOTSUPP) { + v->priv_flags |= BR_VLFLAG_ADDED_BY_SWITCHDEV | BR_VLFLAG_TAGGING_BY_SWITCHDEV; + return err; + } err = br_switchdev_port_vlan_add(dev, v->vid, flags, false, extack); if (err == -EOPNOTSUPP) return vlan_vid_add(dev, br->vlan_proto, v->vid); @@ -1491,7 +1496,7 @@ int br_vlan_fill_forward_path_mode(struct net_bridge *br, if (path->bridge.vlan_mode == DEV_PATH_BR_VLAN_TAG) path->bridge.vlan_mode = DEV_PATH_BR_VLAN_KEEP; - else if (v->priv_flags & BR_VLFLAG_ADDED_BY_SWITCHDEV) + else if (v->priv_flags & BR_VLFLAG_TAGGING_BY_SWITCHDEV) path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG_HW; else path->bridge.vlan_mode = DEV_PATH_BR_VLAN_UNTAG; diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c index b7af36bbd306..7763e78abb00 100644 --- a/net/bridge/netfilter/nft_meta_bridge.c +++ b/net/bridge/netfilter/nft_meta_bridge.c @@ -112,7 +112,6 @@ static const struct nft_expr_ops nft_meta_bridge_get_ops = { .eval = nft_meta_bridge_get_eval, .init = nft_meta_bridge_get_init, .dump = nft_meta_get_dump, - .reduce = nft_meta_get_reduce, }; static void nft_meta_bridge_set_eval(const struct nft_expr *expr, @@ -159,24 +158,6 @@ static int nft_meta_bridge_set_init(const struct nft_ctx *ctx, return 0; } -static bool nft_meta_bridge_set_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - int i; - - for (i = 0; i < NFT_REG32_NUM; i++) { - if (!track->regs[i].selector) - continue; - - if (track->regs[i].selector->ops != &nft_meta_bridge_get_ops) - continue; - - __nft_reg_track_cancel(track, i); - } - - return false; -} - static int nft_meta_bridge_set_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -202,7 +183,6 @@ static const struct nft_expr_ops nft_meta_bridge_set_ops = { .init = nft_meta_bridge_set_init, .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, - .reduce = nft_meta_bridge_set_reduce, .validate = nft_meta_bridge_set_validate, }; diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index 1cb5c16e97b7..cd2b04236a99 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -184,7 +184,6 @@ static const struct nft_expr_ops nft_reject_bridge_ops = { .init = nft_reject_init, .dump = nft_reject_dump, .validate = nft_reject_bridge_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_reject_bridge_type __read_mostly = { diff --git a/net/caif/cfsrvl.c b/net/caif/cfsrvl.c index 171fa32ada85..d687fd0b4ed3 100644 --- a/net/caif/cfsrvl.c +++ b/net/caif/cfsrvl.c @@ -191,10 +191,20 @@ bool cfsrvl_phyid_match(struct cflayer *layer, int phyid) void caif_free_client(struct cflayer *adap_layer) { + struct cflayer *serv_layer; struct cfsrvl *servl; - if (adap_layer == NULL || adap_layer->dn == NULL) + + if (!adap_layer) + return; + + serv_layer = adap_layer->dn; + if (!serv_layer) return; - servl = container_obj(adap_layer->dn); + + layer_set_dn(adap_layer, NULL); + layer_set_up(serv_layer, NULL); + + servl = container_obj(serv_layer); servl->release(&servl->layer); } EXPORT_SYMBOL(caif_free_client); diff --git a/net/can/bcm.c b/net/can/bcm.c index 5a4801699250..a4bef2c48a55 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -363,7 +363,6 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, struct sockaddr_can *addr; struct sock *sk = op->sk; unsigned int datalen = head->nframes * op->cfsiz; - int err; unsigned int *pflags; enum skb_drop_reason reason; @@ -420,8 +419,8 @@ static void bcm_send_to_user(struct bcm_op *op, struct bcm_msg_head *head, addr->can_family = AF_CAN; addr->can_ifindex = op->rx_ifindex; - err = sock_queue_rcv_skb_reason(sk, skb, &reason); - if (err < 0) { + reason = sock_queue_rcv_skb_reason(sk, skb); + if (reason) { struct bcm_sock *bo = bcm_sk(sk); sk_skb_reason_drop(sk, skb, reason); diff --git a/net/can/isotp.c b/net/can/isotp.c index 2770f43f4951..c48b4a818297 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -291,7 +291,8 @@ static void isotp_rcv_skb(struct sk_buff *skb, struct sock *sk) addr->can_family = AF_CAN; addr->can_ifindex = skb->dev->ifindex; - if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) + reason = sock_queue_rcv_skb_reason(sk, skb); + if (reason) sk_skb_reason_drop(sk, skb, reason); } diff --git a/net/can/j1939/socket.c b/net/can/j1939/socket.c index 0502b030d238..50a598ef5fd4 100644 --- a/net/can/j1939/socket.c +++ b/net/can/j1939/socket.c @@ -333,7 +333,8 @@ static void j1939_sk_recv_one(struct j1939_sock *jsk, struct sk_buff *oskb) if (skb->sk) skcb->msg_flags |= MSG_DONTROUTE; - if (sock_queue_rcv_skb_reason(&jsk->sk, skb, &reason) < 0) + reason = sock_queue_rcv_skb_reason(&jsk->sk, skb); + if (reason) sk_skb_reason_drop(&jsk->sk, skb, reason); } diff --git a/net/can/raw.c b/net/can/raw.c index eee244ffc31e..a26942e78e68 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -207,7 +207,8 @@ static void raw_rcv(struct sk_buff *oskb, void *data) if (oskb->sk == sk) *pflags |= MSG_CONFIRM; - if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) + reason = sock_queue_rcv_skb_reason(sk, skb); + if (reason) sk_skb_reason_drop(sk, skb, reason); } @@ -361,6 +362,14 @@ static int raw_notifier(struct notifier_block *nb, unsigned long msg, return NOTIFY_DONE; } +static void raw_sock_destruct(struct sock *sk) +{ + struct raw_sock *ro = raw_sk(sk); + + free_percpu(ro->uniq); + can_sock_destruct(sk); +} + static int raw_init(struct sock *sk) { struct raw_sock *ro = raw_sk(sk); @@ -387,6 +396,8 @@ static int raw_init(struct sock *sk) if (unlikely(!ro->uniq)) return -ENOMEM; + sk->sk_destruct = raw_sock_destruct; + /* set notifier */ spin_lock(&raw_notifier_lock); list_add_tail(&ro->notifier, &raw_notifier_list); @@ -436,7 +447,6 @@ static int raw_release(struct socket *sock) ro->bound = 0; ro->dev = NULL; ro->count = 0; - free_percpu(ro->uniq); sock_orphan(sk); sock->sk = NULL; @@ -760,7 +770,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, } static int raw_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) + sockopt_t *opt) { struct sock *sk = sock->sk; struct raw_sock *ro = raw_sk(sk); @@ -770,8 +780,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, if (level != SOL_CAN_RAW) return -EINVAL; - if (get_user(len, optlen)) - return -EFAULT; + len = opt->optlen; if (len < 0) return -EINVAL; @@ -787,12 +796,12 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, if (len < fsize) { /* return -ERANGE and needed space in optlen */ err = -ERANGE; - if (put_user(fsize, optlen)) - err = -EFAULT; + opt->optlen = fsize; } else { if (len > fsize) len = fsize; - if (copy_to_user(optval, ro->filter, len)) + if (copy_to_iter(ro->filter, len, + &opt->iter_out) != len) err = -EFAULT; } } else { @@ -801,7 +810,7 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, release_sock(sk); if (!err) - err = put_user(len, optlen); + opt->optlen = len; return err; } case CAN_RAW_ERR_FILTER: @@ -845,16 +854,16 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, if (len < sizeof(ro->raw_vcid_opts)) { /* return -ERANGE and needed space in optlen */ err = -ERANGE; - if (put_user(sizeof(ro->raw_vcid_opts), optlen)) - err = -EFAULT; + opt->optlen = sizeof(ro->raw_vcid_opts); } else { if (len > sizeof(ro->raw_vcid_opts)) len = sizeof(ro->raw_vcid_opts); - if (copy_to_user(optval, &ro->raw_vcid_opts, len)) + if (copy_to_iter(&ro->raw_vcid_opts, len, + &opt->iter_out) != len) err = -EFAULT; } if (!err) - err = put_user(len, optlen); + opt->optlen = len; return err; } case CAN_RAW_JOIN_FILTERS: @@ -868,9 +877,8 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, return -ENOPROTOOPT; } - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, val, len)) + opt->optlen = len; + if (copy_to_iter(val, len, &opt->iter_out) != len) return -EFAULT; return 0; } @@ -1077,7 +1085,7 @@ static const struct proto_ops raw_ops = { .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = raw_setsockopt, - .getsockopt = raw_getsockopt, + .getsockopt_iter = raw_getsockopt, .sendmsg = raw_sendmsg, .recvmsg = raw_recvmsg, .mmap = sock_no_mmap, diff --git a/net/core/dev.c b/net/core/dev.c index 831129f2a69b..e59f6025067c 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1060,16 +1060,18 @@ struct net_device *dev_get_by_napi_id(unsigned int napi_id) * This helper is intended for locking net_device after it has been looked up * using a lockless lookup helper. Lock prevents the instance from going away. */ -struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net) +struct net_device * +netdev_put_lock(struct net_device *dev, struct net *net, + netdevice_tracker *tracker) { netdev_lock(dev); if (dev->reg_state > NETREG_REGISTERED || dev->moving_ns || !net_eq(dev_net(dev), net)) { netdev_unlock(dev); - dev_put(dev); + netdev_put(dev, tracker); return NULL; } - dev_put(dev); + netdev_put(dev, tracker); return dev; } @@ -1182,7 +1184,6 @@ void netdev_copy_name(struct net_device *dev, char *name) strscpy(name, dev->name, IFNAMSIZ); } while (read_seqretry(&netdev_rename_lock, seq)); } -EXPORT_IPV6_MOD_GPL(netdev_copy_name); /** * netdev_get_name - get a netdevice name, knowing its ifindex. @@ -1312,7 +1313,6 @@ struct net_device *netdev_get_by_flags_rcu(struct net *net, netdevice_tracker *t return NULL; } -EXPORT_IPV6_MOD(netdev_get_by_flags_rcu); /** * dev_valid_name - check if name is okay for network device @@ -1731,6 +1731,7 @@ int netif_open(struct net_device *dev, struct netlink_ext_ack *extack) return ret; } +EXPORT_SYMBOL(netif_open); static void __dev_close_many(struct list_head *head) { @@ -1756,7 +1757,7 @@ static void __dev_close_many(struct list_head *head) smp_mb__after_atomic(); /* Commit netif_running(). */ } - dev_deactivate_many(head); + dev_deactivate_many(head, true); list_for_each_entry(dev, head, close_list) { const struct net_device_ops *ops = dev->netdev_ops; @@ -1837,7 +1838,6 @@ void netif_disable_lro(struct net_device *dev) netdev_unlock_ops(lower_dev); } } -EXPORT_IPV6_MOD(netif_disable_lro); /** * dev_disable_gro_hw - disable HW Generic Receive Offload on a device @@ -4103,15 +4103,16 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d } EXPORT_SYMBOL_GPL(validate_xmit_skb_list); -static void qdisc_pkt_len_segs_init(struct sk_buff *skb) +static enum skb_drop_reason qdisc_pkt_len_segs_init(struct sk_buff *skb) { struct skb_shared_info *shinfo = skb_shinfo(skb); + unsigned int hdr_len, tlen; u16 gso_segs; qdisc_skb_cb(skb)->pkt_len = skb->len; if (!shinfo->gso_size) { qdisc_skb_cb(skb)->pkt_segs = 1; - return; + return SKB_NOT_DROPPED_YET; } qdisc_skb_cb(skb)->pkt_segs = gso_segs = shinfo->gso_segs; @@ -4119,44 +4120,49 @@ static void qdisc_pkt_len_segs_init(struct sk_buff *skb) /* To get more precise estimation of bytes sent on wire, * we add to pkt_len the headers size of all segments */ - if (skb_transport_header_was_set(skb)) { - unsigned int hdr_len; - /* mac layer + network layer */ - if (!skb->encapsulation) - hdr_len = skb_transport_offset(skb); - else - hdr_len = skb_inner_transport_offset(skb); - - /* + transport layer */ - if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { - const struct tcphdr *th; - struct tcphdr _tcphdr; + /* mac layer + network layer */ + if (!skb->encapsulation) { + if (unlikely(!skb_transport_header_was_set(skb))) + return SKB_NOT_DROPPED_YET; + hdr_len = skb_transport_offset(skb); + } else { + hdr_len = skb_inner_transport_offset(skb); + } + /* + transport layer */ + if (likely(shinfo->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))) { + const struct tcphdr *th; - th = skb_header_pointer(skb, hdr_len, - sizeof(_tcphdr), &_tcphdr); - if (likely(th)) - hdr_len += __tcp_hdrlen(th); - } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { - struct udphdr _udphdr; + if (!pskb_may_pull(skb, hdr_len + sizeof(struct tcphdr))) + return SKB_DROP_REASON_SKB_BAD_GSO; - if (skb_header_pointer(skb, hdr_len, - sizeof(_udphdr), &_udphdr)) - hdr_len += sizeof(struct udphdr); - } + th = (const struct tcphdr *)(skb->data + hdr_len); + tlen = __tcp_hdrlen(th); + if (tlen < sizeof(*th)) + return SKB_DROP_REASON_SKB_BAD_GSO; + hdr_len += tlen; + if (!pskb_may_pull(skb, hdr_len)) + return SKB_DROP_REASON_SKB_BAD_GSO; + } else if (shinfo->gso_type & SKB_GSO_UDP_L4) { + if (!pskb_may_pull(skb, hdr_len + sizeof(struct udphdr))) + return SKB_DROP_REASON_SKB_BAD_GSO; + hdr_len += sizeof(struct udphdr); + } - if (unlikely(shinfo->gso_type & SKB_GSO_DODGY)) { - int payload = skb->len - hdr_len; + /* prior pskb_may_pull() might have changed skb->head. */ + shinfo = skb_shinfo(skb); + if (unlikely(shinfo->gso_type & SKB_GSO_DODGY)) { + int payload = skb->len - hdr_len; - /* Malicious packet. */ - if (payload <= 0) - return; - gso_segs = DIV_ROUND_UP(payload, shinfo->gso_size); - shinfo->gso_segs = gso_segs; - qdisc_skb_cb(skb)->pkt_segs = gso_segs; - } - qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len; + /* Malicious packet. */ + if (payload <= 0) + return SKB_DROP_REASON_SKB_BAD_GSO; + gso_segs = DIV_ROUND_UP(payload, shinfo->gso_size); + shinfo->gso_segs = gso_segs; + qdisc_skb_cb(skb)->pkt_segs = gso_segs; } + qdisc_skb_cb(skb)->pkt_len += (gso_segs - 1) * hdr_len; + return SKB_NOT_DROPPED_YET; } static int dev_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *q, @@ -4183,7 +4189,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, qdisc_calculate_pkt_len(skb, q); - tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_DROP); + tcf_set_qdisc_drop_reason(skb, QDISC_DROP_GENERIC); if (q->flags & TCQ_F_NOLOCK) { if (q->flags & TCQ_F_CAN_BYPASS && nolock_qdisc_is_empty(q) && @@ -4291,8 +4297,8 @@ unlock: spin_unlock(root_lock); free_skbs: - tcf_kfree_skb_list(to_free); - tcf_kfree_skb_list(to_free2); + tcf_kfree_skb_list(to_free, q, txq, dev); + tcf_kfree_skb_list(to_free2, q, txq, dev); return rc; } @@ -4761,9 +4767,10 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) { struct net_device *dev = skb->dev; struct netdev_queue *txq = NULL; - struct Qdisc *q; - int rc = -ENOMEM; + enum skb_drop_reason reason; + int cpu, rc = -ENOMEM; bool again = false; + struct Qdisc *q; skb_reset_mac_header(skb); skb_assert_len(skb); @@ -4772,6 +4779,12 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) (SKBTX_SCHED_TSTAMP | SKBTX_BPF))) __skb_tstamp_tx(skb, NULL, NULL, skb->sk, SCM_TSTAMP_SCHED); + reason = qdisc_pkt_len_segs_init(skb); + if (unlikely(reason)) { + dev_core_stats_tx_dropped_inc(dev); + kfree_skb_reason(skb, reason); + return -EINVAL; + } /* Disable soft irqs for various locks below. Also * stops preemption for RCU. */ @@ -4779,7 +4792,6 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) skb_update_prio(skb); - qdisc_pkt_len_segs_init(skb); tcx_set_ingress(skb, false); #ifdef CONFIG_NET_EGRESS if (static_branch_unlikely(&egress_needed_key)) { @@ -4832,59 +4844,62 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) * Check this and shot the lock. It is not prone from deadlocks. *Either shot noqueue qdisc, it is even simpler 8) */ - if (dev->flags & IFF_UP) { - int cpu = smp_processor_id(); /* ok because BHs are off */ + if (unlikely(!(dev->flags & IFF_UP))) { + reason = SKB_DROP_REASON_DEV_READY; + goto drop; + } - if (!netif_tx_owned(txq, cpu)) { - bool is_list = false; + cpu = smp_processor_id(); /* ok because BHs are off */ - if (dev_xmit_recursion()) - goto recursion_alert; + if (likely(!netif_tx_owned(txq, cpu))) { + bool is_list = false; - skb = validate_xmit_skb(skb, dev, &again); - if (!skb) - goto out; + if (dev_xmit_recursion()) + goto recursion_alert; - HARD_TX_LOCK(dev, txq, cpu); + skb = validate_xmit_skb(skb, dev, &again); + if (!skb) + goto out; - if (!netif_xmit_stopped(txq)) { - is_list = !!skb->next; + HARD_TX_LOCK(dev, txq, cpu); - dev_xmit_recursion_inc(); - skb = dev_hard_start_xmit(skb, dev, txq, &rc); - dev_xmit_recursion_dec(); + if (!netif_xmit_stopped(txq)) { + is_list = !!skb->next; - /* GSO segments a single SKB into - * a list of frames. TCP expects error - * to mean none of the data was sent. - */ - if (is_list) - rc = NETDEV_TX_OK; - } - HARD_TX_UNLOCK(dev, txq); - if (!skb) /* xmit completed */ - goto out; + dev_xmit_recursion_inc(); + skb = dev_hard_start_xmit(skb, dev, txq, &rc); + dev_xmit_recursion_dec(); - net_crit_ratelimited("Virtual device %s asks to queue packet!\n", - dev->name); - /* NETDEV_TX_BUSY or queue was stopped */ - if (!is_list) - rc = -ENETDOWN; - } else { - /* Recursion is detected! It is possible, - * unfortunately + /* GSO segments a single SKB into a list of frames. + * TCP expects error to mean none of the data was sent. */ -recursion_alert: - net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", - dev->name); - rc = -ENETDOWN; + if (is_list) + rc = NETDEV_TX_OK; } + HARD_TX_UNLOCK(dev, txq); + if (!skb) /* xmit completed */ + goto out; + + net_crit_ratelimited("Virtual device %s asks to queue packet!\n", + dev->name); + /* NETDEV_TX_BUSY or queue was stopped */ + if (!is_list) + rc = -ENETDOWN; + } else { + /* Recursion is detected! It is possible unfortunately. */ +recursion_alert: + net_crit_ratelimited("Dead loop on virtual device %s (net %llu), fix it urgently!\n", + dev->name, dev_net(dev)->net_cookie); + + rc = -ENETDOWN; } + reason = SKB_DROP_REASON_RECURSION_LIMIT; +drop: rcu_read_unlock_bh(); dev_core_stats_tx_dropped_inc(dev); - kfree_skb_list(skb); + kfree_skb_list_reason(skb, reason); return rc; out: rcu_read_unlock_bh(); @@ -4982,16 +4997,16 @@ EXPORT_SYMBOL(rps_needed); struct static_key_false rfs_needed __read_mostly; EXPORT_SYMBOL(rfs_needed); -static u32 rfs_slot(u32 hash, const struct rps_dev_flow_table *flow_table) +static u32 rfs_slot(u32 hash, rps_tag_ptr tag_ptr) { - return hash_32(hash, flow_table->log); + return hash_32(hash, rps_tag_to_log(tag_ptr)); } #ifdef CONFIG_RFS_ACCEL /** * rps_flow_is_active - check whether the flow is recently active. * @rflow: Specific flow to check activity. - * @flow_table: per-queue flowtable that @rflow belongs to. + * @log: ilog2(hashsize). * @cpu: CPU saved in @rflow. * * If the CPU has processed many packets since the flow's last activity @@ -5000,7 +5015,7 @@ static u32 rfs_slot(u32 hash, const struct rps_dev_flow_table *flow_table) * Return: true if flow was recently active. */ static bool rps_flow_is_active(struct rps_dev_flow *rflow, - struct rps_dev_flow_table *flow_table, + u8 log, unsigned int cpu) { unsigned int flow_last_active; @@ -5013,7 +5028,7 @@ static bool rps_flow_is_active(struct rps_dev_flow *rflow, flow_last_active = READ_ONCE(rflow->last_qtail); return (int)(sd_input_head - flow_last_active) < - (int)(10 << flow_table->log); + (int)(10 << log); } #endif @@ -5025,9 +5040,10 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, u32 head; #ifdef CONFIG_RFS_ACCEL struct netdev_rx_queue *rxqueue; - struct rps_dev_flow_table *flow_table; + struct rps_dev_flow *flow_table; struct rps_dev_flow *old_rflow; struct rps_dev_flow *tmp_rflow; + rps_tag_ptr q_tag_ptr; unsigned int tmp_cpu; u16 rxq_index; u32 flow_id; @@ -5042,16 +5058,18 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, goto out; rxqueue = dev->_rx + rxq_index; - flow_table = rcu_dereference(rxqueue->rps_flow_table); - if (!flow_table) + q_tag_ptr = READ_ONCE(rxqueue->rps_flow_table); + if (!q_tag_ptr) goto out; - flow_id = rfs_slot(hash, flow_table); - tmp_rflow = &flow_table->flows[flow_id]; + flow_id = rfs_slot(hash, q_tag_ptr); + flow_table = rps_tag_to_table(q_tag_ptr); + tmp_rflow = flow_table + flow_id; tmp_cpu = READ_ONCE(tmp_rflow->cpu); if (READ_ONCE(tmp_rflow->filter) != RPS_NO_FILTER) { - if (rps_flow_is_active(tmp_rflow, flow_table, + if (rps_flow_is_active(tmp_rflow, + rps_tag_to_log(q_tag_ptr), tmp_cpu)) { if (hash != READ_ONCE(tmp_rflow->hash) || next_cpu == tmp_cpu) @@ -5089,9 +5107,8 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb, static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, struct rps_dev_flow **rflowp) { - const struct rps_sock_flow_table *sock_flow_table; struct netdev_rx_queue *rxqueue = dev->_rx; - struct rps_dev_flow_table *flow_table; + rps_tag_ptr global_tag_ptr, q_tag_ptr; struct rps_map *map; int cpu = -1; u32 tcpu; @@ -5112,9 +5129,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, /* Avoid computing hash if RFS/RPS is not active for this rxqueue */ - flow_table = rcu_dereference(rxqueue->rps_flow_table); + q_tag_ptr = READ_ONCE(rxqueue->rps_flow_table); map = rcu_dereference(rxqueue->rps_map); - if (!flow_table && !map) + if (!q_tag_ptr && !map) goto done; skb_reset_network_header(skb); @@ -5122,16 +5139,21 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, if (!hash) goto done; - sock_flow_table = rcu_dereference(net_hotdata.rps_sock_flow_table); - if (flow_table && sock_flow_table) { + global_tag_ptr = READ_ONCE(net_hotdata.rps_sock_flow_table); + if (q_tag_ptr && global_tag_ptr) { + struct rps_sock_flow_table *sock_flow_table; + struct rps_dev_flow *flow_table; struct rps_dev_flow *rflow; u32 next_cpu; + u32 flow_id; u32 ident; /* First check into global flow table if there is a match. * This READ_ONCE() pairs with WRITE_ONCE() from rps_record_sock_flow(). */ - ident = READ_ONCE(sock_flow_table->ents[hash & sock_flow_table->mask]); + flow_id = hash & rps_tag_to_mask(global_tag_ptr); + sock_flow_table = rps_tag_to_table(global_tag_ptr); + ident = READ_ONCE(sock_flow_table[flow_id].ent); if ((ident ^ hash) & ~net_hotdata.rps_cpu_mask) goto try_rps; @@ -5140,7 +5162,9 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb, /* OK, now we know there is a match, * we can look at the local (per receive queue) flow table */ - rflow = &flow_table->flows[rfs_slot(hash, flow_table)]; + flow_id = rfs_slot(hash, q_tag_ptr); + flow_table = rps_tag_to_table(q_tag_ptr); + rflow = flow_table + flow_id; tcpu = rflow->cpu; /* @@ -5200,19 +5224,23 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id, u16 filter_id) { struct netdev_rx_queue *rxqueue = dev->_rx + rxq_index; - struct rps_dev_flow_table *flow_table; + struct rps_dev_flow *flow_table; struct rps_dev_flow *rflow; + rps_tag_ptr q_tag_ptr; bool expire = true; + u8 log; rcu_read_lock(); - flow_table = rcu_dereference(rxqueue->rps_flow_table); - if (flow_table && flow_id < (1UL << flow_table->log)) { + q_tag_ptr = READ_ONCE(rxqueue->rps_flow_table); + log = rps_tag_to_log(q_tag_ptr); + if (q_tag_ptr && flow_id < (1UL << log)) { unsigned int cpu; - rflow = &flow_table->flows[flow_id]; + flow_table = rps_tag_to_table(q_tag_ptr); + rflow = flow_table + flow_id; cpu = READ_ONCE(rflow->cpu); if (READ_ONCE(rflow->filter) == filter_id && - rps_flow_is_active(rflow, flow_table, cpu)) + rps_flow_is_active(rflow, log, cpu)) expire = false; } rcu_read_unlock(); @@ -5825,7 +5853,7 @@ static __latent_entropy void net_tx_action(void) to_free = qdisc_run(q); if (root_lock) spin_unlock(root_lock); - tcf_kfree_skb_list(to_free); + tcf_kfree_skb_list(to_free, q, NULL, qdisc_dev(q)); } rcu_read_unlock(); @@ -8132,7 +8160,8 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, { struct netdev_adjacent *upper; - WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_rtnl_is_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held() && + !lockdep_rtnl_is_held()); upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); @@ -12316,10 +12345,8 @@ static void dev_memory_provider_uninstall(struct net_device *dev) for (i = 0; i < dev->real_num_rx_queues; i++) { struct netdev_rx_queue *rxq = &dev->_rx[i]; - struct pp_memory_provider_params *p = &rxq->mp_params; - if (p->mp_ops && p->mp_ops->uninstall) - p->mp_ops->uninstall(rxq->mp_params.mp_priv, rxq); + __netif_mp_uninstall_rxq(rxq, &rxq->mp_params); } } @@ -12352,6 +12379,12 @@ static void netif_close_many_and_unlock_cond(struct list_head *close_head) #endif } +bool unregister_netdevice_queued(const struct net_device *dev) +{ + ASSERT_RTNL(); + return !list_empty(&dev->unreg_list); +} + void unregister_netdevice_many_notify(struct list_head *head, u32 portid, const struct nlmsghdr *nlh) { diff --git a/net/core/dev.h b/net/core/dev.h index 781619e76b3e..628bdaebf0ca 100644 --- a/net/core/dev.h +++ b/net/core/dev.h @@ -12,6 +12,7 @@ struct net; struct netlink_ext_ack; struct netdev_queue_config; struct cpumask; +struct pp_memory_provider_params; /* Random bits of netdevice that don't need to be exposed */ #define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */ @@ -30,7 +31,15 @@ struct napi_struct * netdev_napi_by_id_lock(struct net *net, unsigned int napi_id); struct net_device *dev_get_by_napi_id(unsigned int napi_id); -struct net_device *__netdev_put_lock(struct net_device *dev, struct net *net); +struct net_device *netdev_put_lock(struct net_device *dev, struct net *net, + netdevice_tracker *tracker); + +static inline struct net_device * +__netdev_put_lock(struct net_device *dev, struct net *net) +{ + return netdev_put_lock(dev, net, NULL); +} + struct net_device * netdev_xa_find_lock(struct net *net, struct net_device *dev, unsigned long *index); @@ -96,6 +105,16 @@ int netdev_queue_config_validate(struct net_device *dev, int rxq_idx, struct netdev_queue_config *qcfg, struct netlink_ext_ack *extack); +bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx); +bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx); +bool netif_is_queue_leasee(const struct net_device *dev); + +void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq, + const struct pp_memory_provider_params *p); + +void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq, + struct netdev_rx_queue *virt_rxq); + /* netdev management, shared between various uAPI entry points */ struct netdev_name_node { struct hlist_node hlist; diff --git a/net/core/devmem.c b/net/core/devmem.c index 69d79aee07ef..cde4c89bc146 100644 --- a/net/core/devmem.c +++ b/net/core/devmem.c @@ -145,7 +145,7 @@ void net_devmem_unbind_dmabuf(struct net_devmem_dmabuf_binding *binding) rxq_idx = get_netdev_rx_queue_index(rxq); - __net_mp_close_rxq(binding->dev, rxq_idx, &mp_params); + netif_mp_close_rxq(binding->dev, rxq_idx, &mp_params); } percpu_ref_kill(&binding->ref); @@ -163,7 +163,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, u32 xa_idx; int err; - err = __net_mp_open_rxq(dev, rxq_idx, &mp_params, extack); + err = netif_mp_open_rxq(dev, rxq_idx, &mp_params, extack); if (err) return err; @@ -176,7 +176,7 @@ int net_devmem_bind_dmabuf_to_queue(struct net_device *dev, u32 rxq_idx, return 0; err_close_rxq: - __net_mp_close_rxq(dev, rxq_idx, &mp_params); + netif_mp_close_rxq(dev, rxq_idx, &mp_params); return err; } diff --git a/net/core/failover.c b/net/core/failover.c index 0eb2e0ec875b..11bb183c7a1b 100644 --- a/net/core/failover.c +++ b/net/core/failover.c @@ -59,7 +59,7 @@ static int failover_slave_register(struct net_device *slave_dev) if (!failover_dev) goto done; - if (fops && fops->slave_pre_register && + if (fops->slave_pre_register && fops->slave_pre_register(slave_dev, failover_dev)) goto done; @@ -82,7 +82,7 @@ static int failover_slave_register(struct net_device *slave_dev) slave_dev->priv_flags |= (IFF_FAILOVER_SLAVE | IFF_NO_ADDRCONF); - if (fops && fops->slave_register && + if (fops->slave_register && !fops->slave_register(slave_dev, failover_dev)) return NOTIFY_OK; @@ -115,7 +115,7 @@ int failover_slave_unregister(struct net_device *slave_dev) if (!failover_dev) goto done; - if (fops && fops->slave_pre_unregister && + if (fops->slave_pre_unregister && fops->slave_pre_unregister(slave_dev, failover_dev)) goto done; @@ -123,7 +123,7 @@ int failover_slave_unregister(struct net_device *slave_dev) netdev_upper_dev_unlink(slave_dev, failover_dev); slave_dev->priv_flags &= ~(IFF_FAILOVER_SLAVE | IFF_NO_ADDRCONF); - if (fops && fops->slave_unregister && + if (fops->slave_unregister && !fops->slave_unregister(slave_dev, failover_dev)) return NOTIFY_OK; @@ -149,7 +149,7 @@ static int failover_slave_link_change(struct net_device *slave_dev) if (!netif_running(failover_dev)) goto done; - if (fops && fops->slave_link_change && + if (fops->slave_link_change && !fops->slave_link_change(slave_dev, failover_dev)) return NOTIFY_OK; @@ -174,7 +174,7 @@ static int failover_slave_name_change(struct net_device *slave_dev) if (!netif_running(failover_dev)) goto done; - if (fops && fops->slave_name_change && + if (fops->slave_name_change && !fops->slave_name_change(slave_dev, failover_dev)) return NOTIFY_OK; @@ -244,7 +244,7 @@ struct failover *failover_register(struct net_device *dev, { struct failover *failover; - if (dev->type != ARPHRD_ETHER) + if (dev->type != ARPHRD_ETHER || !ops) return ERR_PTR(-EINVAL); failover = kzalloc_obj(*failover); diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c index 5cdca49b1d7c..6bb2cc7e88ca 100644 --- a/net/core/fib_notifier.c +++ b/net/core/fib_notifier.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/rtnetlink.h> #include <linux/notifier.h> #include <linux/rcupdate.h> diff --git a/net/core/filter.c b/net/core/filter.c index 78b548158fb0..fcfcb72663ca 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -73,7 +73,6 @@ #include <net/seg6.h> #include <net/seg6_local.h> #include <net/lwtunnel.h> -#include <net/ipv6_stubs.h> #include <net/bpf_sk_storage.h> #include <net/transp_v6.h> #include <linux/btf_ids.h> @@ -122,20 +121,20 @@ EXPORT_SYMBOL_GPL(copy_bpf_fprog_from_user); * @sk: sock associated with &sk_buff * @skb: buffer to filter * @cap: limit on how short the eBPF program may trim the packet - * @reason: record drop reason on errors (negative return value) * * Run the eBPF program and then cut skb->data to correct size returned by * the program. If pkt_len is 0 we toss packet. If skb->len is smaller * than pkt_len we keep whole skb->data. This is the socket level * wrapper to bpf_prog_run. It returns 0 if the packet should - * be accepted or -EPERM if the packet should be tossed. + * be accepted or a drop_reason if the packet should be tossed. * */ -int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, - unsigned int cap, enum skb_drop_reason *reason) +enum skb_drop_reason +sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap) { - int err; + enum skb_drop_reason drop_reason; struct sk_filter *filter; + int err; /* * If the skb was allocated from pfmemalloc reserves, only @@ -144,21 +143,17 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, */ if (skb_pfmemalloc(skb) && !sock_flag(sk, SOCK_MEMALLOC)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP); - *reason = SKB_DROP_REASON_PFMEMALLOC; - return -ENOMEM; + return SKB_DROP_REASON_PFMEMALLOC; } err = BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb); - if (err) { - *reason = SKB_DROP_REASON_SOCKET_FILTER; - return err; - } + if (err) + return SKB_DROP_REASON_SOCKET_FILTER; err = security_sock_rcv_skb(sk, skb); - if (err) { - *reason = SKB_DROP_REASON_SECURITY_HOOK; - return err; - } + if (err) + return SKB_DROP_REASON_SECURITY_HOOK; + drop_reason = 0; rcu_read_lock(); filter = rcu_dereference(sk->sk_filter); if (filter) { @@ -170,11 +165,11 @@ int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, skb->sk = save_sk; err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM; if (err) - *reason = SKB_DROP_REASON_SOCKET_FILTER; + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; } rcu_read_unlock(); - return err; + return drop_reason; } EXPORT_SYMBOL(sk_filter_trim_cap); @@ -2279,7 +2274,7 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, .saddr = ip6h->saddr, }; - dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); + dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL); if (IS_ERR(dst)) goto out_drop; @@ -3257,13 +3252,6 @@ static const struct bpf_func_proto bpf_skb_vlan_pop_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto) -{ - skb->protocol = htons(proto); - if (skb_valid_dst(skb)) - skb_dst_drop(skb); -} - static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) { /* Caller already did skb_cow() with meta_len+len as headroom, @@ -3362,7 +3350,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) shinfo->gso_type |= SKB_GSO_DODGY; } - bpf_skb_change_protocol(skb, ETH_P_IPV6); + skb->protocol = htons(ETH_P_IPV6); skb_clear_hash(skb); return 0; @@ -3393,7 +3381,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) shinfo->gso_type |= SKB_GSO_DODGY; } - bpf_skb_change_protocol(skb, ETH_P_IP); + skb->protocol = htons(ETH_P_IP); skb_clear_hash(skb); return 0; @@ -3441,7 +3429,13 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto, */ ret = bpf_skb_proto_xlat(skb, proto); bpf_compute_data_pointers(skb); - return ret; + if (ret) + return ret; + + if (skb_valid_dst(skb)) + skb_dst_drop(skb); + + return 0; } static const struct bpf_func_proto bpf_skb_change_proto_proto = { @@ -3583,12 +3577,13 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, } /* Match skb->protocol to new outer l3 protocol */ - if (skb->protocol == htons(ETH_P_IP) && - flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) - bpf_skb_change_protocol(skb, ETH_P_IPV6); - else if (skb->protocol == htons(ETH_P_IPV6) && - flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) - bpf_skb_change_protocol(skb, ETH_P_IP); + if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) + skb->protocol = htons(ETH_P_IPV6); + else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) + skb->protocol = htons(ETH_P_IP); + + if (skb_valid_dst(skb)) + skb_dst_drop(skb); } if (skb_is_gso(skb)) { @@ -3616,6 +3611,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff, static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, u64 flags) { + bool decap = flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK; int ret; if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO | @@ -3638,13 +3634,16 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff, if (unlikely(ret < 0)) return ret; - /* Match skb->protocol to new outer l3 protocol */ - if (skb->protocol == htons(ETH_P_IP) && - flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) - bpf_skb_change_protocol(skb, ETH_P_IPV6); - else if (skb->protocol == htons(ETH_P_IPV6) && - flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) - bpf_skb_change_protocol(skb, ETH_P_IP); + if (decap) { + /* Match skb->protocol to new outer l3 protocol */ + if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) + skb->protocol = htons(ETH_P_IPV6); + else if (flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) + skb->protocol = htons(ETH_P_IP); + + if (skb_valid_dst(skb)) + skb_dst_drop(skb); + } if (skb_is_gso(skb)) { struct skb_shared_info *shinfo = skb_shinfo(skb); @@ -4395,6 +4394,8 @@ u32 xdp_master_redirect(struct xdp_buff *xdp) struct net_device *master, *slave; master = netdev_master_upper_dev_get_rcu(xdp->rxq->dev); + if (unlikely(!(master->flags & IFF_UP))) + return XDP_ABORTED; slave = master->netdev_ops->ndo_xdp_get_xmit_slave(master, xdp); if (slave && slave != xdp->rxq->dev) { /* The target device is different from the receiving device, so @@ -5577,12 +5578,12 @@ static int sol_ipv6_sockopt(struct sock *sk, int optname, } if (getopt) - return ipv6_bpf_stub->ipv6_getsockopt(sk, SOL_IPV6, optname, - KERNEL_SOCKPTR(optval), - KERNEL_SOCKPTR(optlen)); + return do_ipv6_getsockopt(sk, SOL_IPV6, optname, + KERNEL_SOCKPTR(optval), + KERNEL_SOCKPTR(optlen)); - return ipv6_bpf_stub->ipv6_setsockopt(sk, SOL_IPV6, optname, - KERNEL_SOCKPTR(optval), *optlen); + return do_ipv6_setsockopt(sk, SOL_IPV6, optname, + KERNEL_SOCKPTR(optval), *optlen); } static int __bpf_setsockopt(struct sock *sk, int level, int optname, @@ -5981,9 +5982,6 @@ static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = { .arg2_type = ARG_ANYTHING, }; -const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; -EXPORT_SYMBOL_GPL(ipv6_bpf_stub); - BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, int, addr_len) { @@ -6007,11 +6005,9 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, return err; if (((struct sockaddr_in6 *)addr)->sin6_port == htons(0)) flags |= BIND_FORCE_ADDRESS_NO_PORT; - /* ipv6_bpf_stub cannot be NULL, since it's called from - * bpf_cgroup_inet6_connect hook and ipv6 is already loaded - */ - return ipv6_bpf_stub->inet6_bind(sk, (struct sockaddr_unsized *)addr, - addr_len, flags); + + return __inet6_bind(sk, (struct sockaddr_unsized *)addr, + addr_len, flags); #endif /* CONFIG_IPV6 */ } #endif /* CONFIG_INET */ @@ -6099,9 +6095,9 @@ static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, u32 mtu) static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, u32 flags, bool check_mtu) { + struct neighbour *neigh = NULL; struct fib_nh_common *nhc; struct in_device *in_dev; - struct neighbour *neigh; struct net_device *dev; struct fib_result res; struct flowi4 fl4; @@ -6221,8 +6217,8 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (likely(nhc->nhc_gw_family != AF_INET6)) neigh = __ipv4_neigh_lookup_noref(dev, (__force u32)params->ipv4_dst); - else - neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst); + else if (IS_ENABLED(CONFIG_IPV6)) + neigh = __ipv6_neigh_lookup_noref(dev, params->ipv6_dst); if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID)) return BPF_FIB_LKUP_RET_NO_NEIGH; @@ -6290,12 +6286,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, params->tbid = 0; } - tb = ipv6_stub->fib6_get_table(net, tbid); + tb = fib6_get_table(net, tbid); if (unlikely(!tb)) return BPF_FIB_LKUP_RET_NOT_FWDED; - err = ipv6_stub->fib6_table_lookup(net, tb, oif, &fl6, &res, - strict); + err = fib6_table_lookup(net, tb, oif, &fl6, &res, strict); } else { if (flags & BPF_FIB_LOOKUP_MARK) fl6.flowi6_mark = params->mark; @@ -6305,7 +6300,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, fl6.flowi6_tun_key.tun_id = 0; fl6.flowi6_uid = sock_net_uid(net, NULL); - err = ipv6_stub->fib6_lookup(net, oif, &fl6, &res, strict); + err = fib6_lookup(net, oif, &fl6, &res, strict); } if (unlikely(err || IS_ERR_OR_NULL(res.f6i) || @@ -6326,11 +6321,11 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, return BPF_FIB_LKUP_RET_NOT_FWDED; } - ipv6_stub->fib6_select_path(net, &res, &fl6, fl6.flowi6_oif, - fl6.flowi6_oif != 0, NULL, strict); + fib6_select_path(net, &res, &fl6, fl6.flowi6_oif, + fl6.flowi6_oif != 0, NULL, strict); if (check_mtu) { - mtu = ipv6_stub->ip6_mtu_from_fib6(&res, dst, src); + mtu = ip6_mtu_from_fib6(&res, dst, src); if (params->tot_len > mtu) { params->mtu_result = mtu; /* union with tot_len */ return BPF_FIB_LKUP_RET_FRAG_NEEDED; @@ -6351,9 +6346,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, if (res.f6i->fib6_prefsrc.plen) { *src = res.f6i->fib6_prefsrc.addr; } else { - err = ipv6_bpf_stub->ipv6_dev_get_saddr(net, dev, - &fl6.daddr, 0, - src); + err = ipv6_dev_get_saddr(net, dev, &fl6.daddr, 0, src); if (err) return BPF_FIB_LKUP_RET_NO_SRC_ADDR; } @@ -6365,7 +6358,7 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params, /* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is * not needed here. */ - neigh = __ipv6_neigh_lookup_noref_stub(dev, dst); + neigh = __ipv6_neigh_lookup_noref(dev, dst); if (!neigh || !(READ_ONCE(neigh->nud_state) & NUD_VALID)) return BPF_FIB_LKUP_RET_NO_NEIGH; memcpy(params->dmac, neigh->ha, ETH_ALEN); @@ -6889,7 +6882,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, else sk = __udp4_lib_lookup(net, src4, tuple->ipv4.sport, dst4, tuple->ipv4.dport, - dif, sdif, net->ipv4.udp_table, NULL); + dif, sdif, NULL); #if IS_ENABLED(CONFIG_IPV6) } else { struct in6_addr *src6 = (struct in6_addr *)&tuple->ipv6.saddr; @@ -6900,12 +6893,10 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, src6, tuple->ipv6.sport, dst6, ntohs(tuple->ipv6.dport), dif, sdif, &refcounted); - else if (likely(ipv6_bpf_stub)) - sk = ipv6_bpf_stub->udp6_lib_lookup(net, - src6, tuple->ipv6.sport, - dst6, tuple->ipv6.dport, - dif, sdif, - net->ipv4.udp_table, NULL); + else if (likely(ipv6_mod_enabled())) + sk = __udp6_lib_lookup(net, src6, tuple->ipv6.sport, + dst6, tuple->ipv6.dport, + dif, sdif, NULL); #endif } @@ -7591,7 +7582,7 @@ BPF_CALL_5(bpf_tcp_check_syncookie, struct sock *, sk, void *, iph, u32, iph_len ret = __cookie_v4_check((struct iphdr *)iph, th); break; -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) case 6: if (unlikely(iph_len < sizeof(struct ipv6hdr))) return -EINVAL; @@ -7661,7 +7652,7 @@ BPF_CALL_5(bpf_tcp_gen_syncookie, struct sock *, sk, void *, iph, u32, iph_len, mss = tcp_v4_get_syncookie(sk, iph, th, &cookie); break; -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) case 6: if (unlikely(iph_len < sizeof(struct ipv6hdr))) return -EINVAL; @@ -8027,7 +8018,7 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = { BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph, struct tcphdr *, th, u32, th_len) { -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) const u16 mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); u32 cookie; @@ -8079,7 +8070,7 @@ static const struct bpf_func_proto bpf_tcp_raw_check_syncookie_ipv4_proto = { BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv6, struct ipv6hdr *, iph, struct tcphdr *, th) { -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) if (__cookie_v6_check(iph, th) > 0) return 0; @@ -10581,10 +10572,11 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, si->dst_reg, si->dst_reg, \ offsetof(OBJ, OBJ_FIELD)); \ if (si->dst_reg == si->src_reg) { \ - *insn++ = BPF_JMP_A(1); \ + *insn++ = BPF_JMP_A(2); \ *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ offsetof(struct bpf_sock_ops_kern, \ temp)); \ + *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); \ } \ } while (0) @@ -10618,10 +10610,11 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, si->dst_reg, si->src_reg, \ offsetof(struct bpf_sock_ops_kern, sk));\ if (si->dst_reg == si->src_reg) { \ - *insn++ = BPF_JMP_A(1); \ + *insn++ = BPF_JMP_A(2); \ *insn++ = BPF_LDX_MEM(BPF_DW, reg, si->src_reg, \ offsetof(struct bpf_sock_ops_kern, \ temp)); \ + *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); \ } \ } while (0) @@ -11965,7 +11958,7 @@ BPF_CALL_1(bpf_skc_to_tcp_timewait_sock, struct sock *, sk) return (unsigned long)sk; #endif -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_TIME_WAIT) return (unsigned long)sk; #endif @@ -11988,7 +11981,7 @@ BPF_CALL_1(bpf_skc_to_tcp_request_sock, struct sock *, sk) return (unsigned long)sk; #endif -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) if (sk && sk->sk_prot == &tcpv6_prot && sk->sk_state == TCP_NEW_SYN_RECV) return (unsigned long)sk; #endif @@ -12251,7 +12244,7 @@ __bpf_kfunc int bpf_sk_assign_tcp_reqsk(struct __sk_buff *s, struct sock *sk, ops = &tcp_request_sock_ops; min_mss = 536; break; -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) case htons(ETH_P_IPV6): ops = &tcp6_request_sock_ops; min_mss = IPV6_MIN_MTU - 60; diff --git a/net/core/hotdata.c b/net/core/hotdata.c index a6db36580817..b0f51a726a3d 100644 --- a/net/core/hotdata.c +++ b/net/core/hotdata.c @@ -27,4 +27,3 @@ struct net_hotdata net_hotdata __cacheline_aligned = { EXPORT_SYMBOL(net_hotdata); struct net_aligned_data net_aligned_data; -EXPORT_IPV6_MOD(net_aligned_data); diff --git a/net/core/link_watch.c b/net/core/link_watch.c index 25c455c10a01..ff2c1d4538ef 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -181,7 +181,7 @@ static void linkwatch_do_dev(struct net_device *dev) if (netif_carrier_ok(dev)) dev_activate(dev); else - dev_deactivate(dev); + dev_deactivate(dev, true); netif_state_change(dev); } diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 9f40be0c3e71..f71ef82a5f3d 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -13,7 +13,6 @@ #include <net/gre.h> #include <net/ip.h> #include <net/ip6_route.h> -#include <net/ipv6_stubs.h> struct bpf_lwt_prog { struct bpf_prog *prog; @@ -103,7 +102,12 @@ static int bpf_lwt_input_reroute(struct sk_buff *skb) dev_put(dev); } else if (skb->protocol == htons(ETH_P_IPV6)) { skb_dst_drop(skb); - err = ipv6_stub->ipv6_route_input(skb); + if (IS_ENABLED(CONFIG_IPV6)) { + ip6_route_input(skb); + err = skb_dst(skb)->error; + } else { + err = -EAFNOSUPPORT; + } } else { err = -EAFNOSUPPORT; } @@ -233,7 +237,7 @@ static int bpf_lwt_xmit_reroute(struct sk_buff *skb) fl6.daddr = iph6->daddr; fl6.saddr = iph6->saddr; - dst = ipv6_stub->ipv6_dst_lookup_flow(net, skb->sk, &fl6, NULL); + dst = ip6_dst_lookup_flow(net, skb->sk, &fl6, NULL); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto err; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index c56a4e7bf790..9e12524b67fa 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -778,7 +778,6 @@ struct pneigh_entry *pneigh_lookup(struct neigh_table *tbl, return NULL; } -EXPORT_IPV6_MOD(pneigh_lookup); int pneigh_create(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev, diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index b9740a397f55..3318b5666e43 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1060,31 +1060,23 @@ out: static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, char *buf) { - struct rps_dev_flow_table *flow_table; unsigned long val = 0; + rps_tag_ptr tag_ptr; - rcu_read_lock(); - flow_table = rcu_dereference(queue->rps_flow_table); - if (flow_table) - val = 1UL << flow_table->log; - rcu_read_unlock(); + tag_ptr = READ_ONCE(queue->rps_flow_table); + if (tag_ptr) + val = 1UL << rps_tag_to_log(tag_ptr); return sysfs_emit(buf, "%lu\n", val); } -static void rps_dev_flow_table_release(struct rcu_head *rcu) -{ - struct rps_dev_flow_table *table = container_of(rcu, - struct rps_dev_flow_table, rcu); - vfree(table); -} - static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, const char *buf, size_t len) { + rps_tag_ptr otag, tag_ptr = 0UL; + struct rps_dev_flow *table; unsigned long mask, count; - struct rps_dev_flow_table *table, *old_table; - static DEFINE_SPINLOCK(rps_dev_flow_lock); + size_t sz; int rc; if (!capable(CAP_NET_ADMIN)) @@ -1101,41 +1093,36 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, */ while ((mask | (mask >> 1)) != mask) mask |= (mask >> 1); - /* On 64 bit arches, must check mask fits in table->mask (u32), - * and on 32bit arches, must check - * RPS_DEV_FLOW_TABLE_SIZE(mask + 1) doesn't overflow. - */ -#if BITS_PER_LONG > 32 - if (mask > (unsigned long)(u32)mask) - return -EINVAL; -#else - if (mask > (ULONG_MAX - RPS_DEV_FLOW_TABLE_SIZE(1)) - / sizeof(struct rps_dev_flow)) { - /* Enforce a limit to prevent overflow */ + + /* Do not accept too large tables. */ + if (mask > (INT_MAX / sizeof(*table) - 1)) return -EINVAL; - } -#endif - table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(mask + 1)); + + sz = max_t(size_t, sizeof(*table) * (mask + 1), + PAGE_SIZE); + if (sz <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) || + is_power_of_2(sizeof(*table))) + table = kvmalloc(sz, GFP_KERNEL); + else + table = vmalloc(sz); if (!table) return -ENOMEM; - - table->log = ilog2(mask) + 1; + tag_ptr = (rps_tag_ptr)table; + if (rps_tag_to_log(tag_ptr)) { + pr_err_once("store_rps_dev_flow_table_cnt() got a non page aligned allocation.\n"); + kvfree(table); + return -ENOMEM; + } + tag_ptr |= (ilog2(mask) + 1); for (count = 0; count <= mask; count++) { - table->flows[count].cpu = RPS_NO_CPU; - table->flows[count].filter = RPS_NO_FILTER; + table[count].cpu = RPS_NO_CPU; + table[count].filter = RPS_NO_FILTER; } - } else { - table = NULL; } - spin_lock(&rps_dev_flow_lock); - old_table = rcu_dereference_protected(queue->rps_flow_table, - lockdep_is_held(&rps_dev_flow_lock)); - rcu_assign_pointer(queue->rps_flow_table, table); - spin_unlock(&rps_dev_flow_lock); - - if (old_table) - call_rcu(&old_table->rcu, rps_dev_flow_table_release); + otag = xchg(&queue->rps_flow_table, tag_ptr); + if (otag) + kvfree_rcu_mightsleep(rps_tag_to_table(otag)); return len; } @@ -1161,8 +1148,8 @@ static void rx_queue_release(struct kobject *kobj) { struct netdev_rx_queue *queue = to_rx_queue(kobj); #ifdef CONFIG_RPS + rps_tag_ptr tag_ptr; struct rps_map *map; - struct rps_dev_flow_table *flow_table; map = rcu_dereference_protected(queue->rps_map, 1); if (map) { @@ -1170,11 +1157,9 @@ static void rx_queue_release(struct kobject *kobj) kfree_rcu(map, rcu); } - flow_table = rcu_dereference_protected(queue->rps_flow_table, 1); - if (flow_table) { - RCU_INIT_POINTER(queue->rps_flow_table, NULL); - call_rcu(&flow_table->rcu, rps_dev_flow_table_release); - } + tag_ptr = xchg(&queue->rps_flow_table, 0UL); + if (tag_ptr) + kvfree_rcu_mightsleep(rps_tag_to_table(tag_ptr)); #endif memset(kobj, 0, sizeof(*kobj)); @@ -1754,7 +1739,7 @@ static ssize_t xps_queue_show(struct net_device *dev, unsigned int index, out_no_maps: rcu_read_unlock(); - len = bitmap_print_to_pagebuf(false, buf, mask, nr_ids); + len = sysfs_emit(buf, "%*pb\n", nr_ids, mask); bitmap_free(mask); return len < PAGE_SIZE ? len : -EINVAL; diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index e938f25e8e86..38e2e3ffd0bd 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -13,4 +13,5 @@ int netdev_change_owner(struct net_device *, const struct net *net_old, extern struct mutex rps_default_mask_mutex; +DECLARE_STATIC_KEY_FALSE(skb_defer_disable_key); #endif diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 24aa10a1d0ea..d9dafe24f57e 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -411,7 +411,7 @@ static __net_init int preinit_net(struct net *net, struct user_namespace *user_n ref_tracker_dir_init(&net->refcnt_tracker, 128, "net_refcnt"); ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net_notrefcnt"); - get_random_bytes(&net->hash_mix, sizeof(u32)); + net->hash_mix = get_random_u32(); net->dev_base_seq = 1; net->user_ns = user_ns; diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c index ba673e81716f..81aecb5d3bc5 100644 --- a/net/core/netdev-genl-gen.c +++ b/net/core/netdev-genl-gen.c @@ -28,6 +28,12 @@ static const struct netlink_range_validation netdev_a_napi_defer_hard_irqs_range }; /* Common nested types */ +const struct nla_policy netdev_lease_nl_policy[NETDEV_A_LEASE_NETNS_ID + 1] = { + [NETDEV_A_LEASE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), + [NETDEV_A_LEASE_QUEUE] = NLA_POLICY_NESTED(netdev_queue_id_nl_policy), + [NETDEV_A_LEASE_NETNS_ID] = NLA_POLICY_MIN(NLA_S32, 0), +}; + const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1] = { [NETDEV_A_PAGE_POOL_ID] = NLA_POLICY_FULL_RANGE(NLA_UINT, &netdev_a_page_pool_id_range), [NETDEV_A_PAGE_POOL_IFINDEX] = NLA_POLICY_FULL_RANGE(NLA_U32, &netdev_a_page_pool_ifindex_range), @@ -107,6 +113,13 @@ static const struct nla_policy netdev_bind_tx_nl_policy[NETDEV_A_DMABUF_FD + 1] [NETDEV_A_DMABUF_FD] = { .type = NLA_U32, }, }; +/* NETDEV_CMD_QUEUE_CREATE - do */ +static const struct nla_policy netdev_queue_create_nl_policy[NETDEV_A_QUEUE_LEASE + 1] = { + [NETDEV_A_QUEUE_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1), + [NETDEV_A_QUEUE_TYPE] = NLA_POLICY_MAX(NLA_U32, 1), + [NETDEV_A_QUEUE_LEASE] = NLA_POLICY_NESTED(netdev_lease_nl_policy), +}; + /* Ops table for netdev */ static const struct genl_split_ops netdev_nl_ops[] = { { @@ -205,6 +218,13 @@ static const struct genl_split_ops netdev_nl_ops[] = { .maxattr = NETDEV_A_DMABUF_FD, .flags = GENL_CMD_CAP_DO, }, + { + .cmd = NETDEV_CMD_QUEUE_CREATE, + .doit = netdev_nl_queue_create_doit, + .policy = netdev_queue_create_nl_policy, + .maxattr = NETDEV_A_QUEUE_LEASE, + .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, + }, }; static const struct genl_multicast_group netdev_nl_mcgrps[] = { diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h index cffc08517a41..d71b435d72c1 100644 --- a/net/core/netdev-genl-gen.h +++ b/net/core/netdev-genl-gen.h @@ -14,6 +14,7 @@ #include <net/netdev_netlink.h> /* Common nested types */ +extern const struct nla_policy netdev_lease_nl_policy[NETDEV_A_LEASE_NETNS_ID + 1]; extern const struct nla_policy netdev_page_pool_info_nl_policy[NETDEV_A_PAGE_POOL_IFINDEX + 1]; extern const struct nla_policy netdev_queue_id_nl_policy[NETDEV_A_QUEUE_TYPE + 1]; @@ -36,6 +37,7 @@ int netdev_nl_qstats_get_dumpit(struct sk_buff *skb, int netdev_nl_bind_rx_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_napi_set_doit(struct sk_buff *skb, struct genl_info *info); int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info); +int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info); enum { NETDEV_NLGRP_MGMT, diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c index 470fabbeacd9..b8f6076d8007 100644 --- a/net/core/netdev-genl.c +++ b/net/core/netdev-genl.c @@ -387,10 +387,92 @@ static int nla_put_napi_id(struct sk_buff *skb, const struct napi_struct *napi) } static int +netdev_nl_queue_fill_lease(struct sk_buff *rsp, struct net_device *netdev, + u32 q_idx, u32 q_type) +{ + struct net_device *orig_netdev = netdev; + struct nlattr *nest_lease, *nest_queue; + struct netdev_rx_queue *rxq; + struct net *net, *peer_net; + + rxq = __netif_get_rx_queue_lease(&netdev, &q_idx, NETIF_PHYS_TO_VIRT); + if (!rxq || orig_netdev == netdev) + return 0; + + nest_lease = nla_nest_start(rsp, NETDEV_A_QUEUE_LEASE); + if (!nest_lease) + goto nla_put_failure; + + nest_queue = nla_nest_start(rsp, NETDEV_A_LEASE_QUEUE); + if (!nest_queue) + goto nla_put_failure; + if (nla_put_u32(rsp, NETDEV_A_QUEUE_ID, q_idx)) + goto nla_put_failure; + if (nla_put_u32(rsp, NETDEV_A_QUEUE_TYPE, q_type)) + goto nla_put_failure; + nla_nest_end(rsp, nest_queue); + + if (nla_put_u32(rsp, NETDEV_A_LEASE_IFINDEX, + READ_ONCE(netdev->ifindex))) + goto nla_put_failure; + + rcu_read_lock(); + peer_net = dev_net_rcu(netdev); + net = dev_net_rcu(orig_netdev); + if (!net_eq(net, peer_net)) { + s32 id = peernet2id_alloc(net, peer_net, GFP_ATOMIC); + + if (nla_put_s32(rsp, NETDEV_A_LEASE_NETNS_ID, id)) + goto nla_put_failure_unlock; + } + rcu_read_unlock(); + nla_nest_end(rsp, nest_lease); + return 0; + +nla_put_failure_unlock: + rcu_read_unlock(); +nla_put_failure: + return -ENOMEM; +} + +static int +__netdev_nl_queue_fill_mp(struct sk_buff *rsp, struct netdev_rx_queue *rxq) +{ + struct pp_memory_provider_params *params = &rxq->mp_params; + + if (params->mp_ops && + params->mp_ops->nl_fill(params->mp_priv, rsp, rxq)) + return -EMSGSIZE; + +#ifdef CONFIG_XDP_SOCKETS + if (rxq->pool) + if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) + return -EMSGSIZE; +#endif + return 0; +} + +static int +netdev_nl_queue_fill_mp(struct sk_buff *rsp, struct net_device *netdev, + struct netdev_rx_queue *rxq) +{ + struct netdev_rx_queue *hw_rxq; + int ret; + + hw_rxq = rxq->lease; + if (!hw_rxq || !netif_is_queue_leasee(netdev)) + return __netdev_nl_queue_fill_mp(rsp, rxq); + + netdev_lock(hw_rxq->dev); + ret = __netdev_nl_queue_fill_mp(rsp, hw_rxq); + netdev_unlock(hw_rxq->dev); + return ret; +} + +static int netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, u32 q_idx, u32 q_type, const struct genl_info *info) { - struct pp_memory_provider_params *params; struct netdev_rx_queue *rxq; struct netdev_queue *txq; void *hdr; @@ -409,17 +491,10 @@ netdev_nl_queue_fill_one(struct sk_buff *rsp, struct net_device *netdev, rxq = __netif_get_rx_queue(netdev, q_idx); if (nla_put_napi_id(rsp, rxq->napi)) goto nla_put_failure; - - params = &rxq->mp_params; - if (params->mp_ops && - params->mp_ops->nl_fill(params->mp_priv, rsp, rxq)) + if (netdev_nl_queue_fill_lease(rsp, netdev, q_idx, q_type)) + goto nla_put_failure; + if (netdev_nl_queue_fill_mp(rsp, netdev, rxq)) goto nla_put_failure; -#ifdef CONFIG_XDP_SOCKETS - if (rxq->pool) - if (nla_put_empty_nest(rsp, NETDEV_A_QUEUE_XSK)) - goto nla_put_failure; -#endif - break; case NETDEV_QUEUE_TYPE_TX: txq = netdev_get_tx_queue(netdev, q_idx); @@ -918,7 +993,8 @@ netdev_nl_get_dma_dev(struct net_device *netdev, unsigned long *rxq_bitmap, for_each_set_bit(rxq_idx, rxq_bitmap, netdev->real_num_rx_queues) { struct device *rxq_dma_dev; - rxq_dma_dev = netdev_queue_get_dma_dev(netdev, rxq_idx); + rxq_dma_dev = netdev_queue_get_dma_dev(netdev, rxq_idx, + NETDEV_QUEUE_TYPE_RX); if (dma_dev && rxq_dma_dev != dma_dev) { NL_SET_ERR_MSG_FMT(extack, "DMA device mismatch between queue %u and %u (multi-PF device?)", rxq_idx, prev_rxq_idx); @@ -1095,7 +1171,7 @@ int netdev_nl_bind_tx_doit(struct sk_buff *skb, struct genl_info *info) goto err_unlock_netdev; } - dma_dev = netdev_queue_get_dma_dev(netdev, 0); + dma_dev = netdev_queue_get_dma_dev(netdev, 0, NETDEV_QUEUE_TYPE_TX); binding = net_devmem_bind_dmabuf(netdev, dma_dev, DMA_TO_DEVICE, dmabuf_fd, priv, info->extack); if (IS_ERR(binding)) { @@ -1120,6 +1196,173 @@ err_genlmsg_free: return err; } +int netdev_nl_queue_create_doit(struct sk_buff *skb, struct genl_info *info) +{ + const int qmaxtype = ARRAY_SIZE(netdev_queue_id_nl_policy) - 1; + const int lmaxtype = ARRAY_SIZE(netdev_lease_nl_policy) - 1; + int err, ifindex, ifindex_lease, queue_id, queue_id_lease; + struct nlattr *qtb[ARRAY_SIZE(netdev_queue_id_nl_policy)]; + struct nlattr *ltb[ARRAY_SIZE(netdev_lease_nl_policy)]; + struct netdev_rx_queue *rxq, *rxq_lease; + struct net_device *dev, *dev_lease; + netdevice_tracker dev_tracker; + s32 netns_lease = -1; + struct nlattr *nest; + struct sk_buff *rsp; + struct net *net; + void *hdr; + + if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_IFINDEX) || + GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_TYPE) || + GENL_REQ_ATTR_CHECK(info, NETDEV_A_QUEUE_LEASE)) + return -EINVAL; + if (nla_get_u32(info->attrs[NETDEV_A_QUEUE_TYPE]) != + NETDEV_QUEUE_TYPE_RX) { + NL_SET_BAD_ATTR(info->extack, info->attrs[NETDEV_A_QUEUE_TYPE]); + return -EINVAL; + } + + ifindex = nla_get_u32(info->attrs[NETDEV_A_QUEUE_IFINDEX]); + + nest = info->attrs[NETDEV_A_QUEUE_LEASE]; + err = nla_parse_nested(ltb, lmaxtype, nest, + netdev_lease_nl_policy, info->extack); + if (err < 0) + return err; + if (NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_IFINDEX) || + NL_REQ_ATTR_CHECK(info->extack, nest, ltb, NETDEV_A_LEASE_QUEUE)) + return -EINVAL; + if (ltb[NETDEV_A_LEASE_NETNS_ID]) { + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + netns_lease = nla_get_s32(ltb[NETDEV_A_LEASE_NETNS_ID]); + } + + ifindex_lease = nla_get_u32(ltb[NETDEV_A_LEASE_IFINDEX]); + + nest = ltb[NETDEV_A_LEASE_QUEUE]; + err = nla_parse_nested(qtb, qmaxtype, nest, + netdev_queue_id_nl_policy, info->extack); + if (err < 0) + return err; + if (NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_ID) || + NL_REQ_ATTR_CHECK(info->extack, nest, qtb, NETDEV_A_QUEUE_TYPE)) + return -EINVAL; + if (nla_get_u32(qtb[NETDEV_A_QUEUE_TYPE]) != NETDEV_QUEUE_TYPE_RX) { + NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_TYPE]); + return -EINVAL; + } + + queue_id_lease = nla_get_u32(qtb[NETDEV_A_QUEUE_ID]); + + rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!rsp) + return -ENOMEM; + + hdr = genlmsg_iput(rsp, info); + if (!hdr) { + err = -EMSGSIZE; + goto err_genlmsg_free; + } + + /* Locking order is always from the virtual to the physical device + * since this is also the same order when applications open the + * memory provider later on. + */ + dev = netdev_get_by_index_lock(genl_info_net(info), ifindex); + if (!dev) { + err = -ENODEV; + goto err_genlmsg_free; + } + if (!netdev_can_create_queue(dev, info->extack)) { + err = -EINVAL; + goto err_unlock_dev; + } + + net = genl_info_net(info); + if (netns_lease >= 0) { + net = get_net_ns_by_id(net, netns_lease); + if (!net) { + err = -ENONET; + goto err_unlock_dev; + } + } + + dev_lease = netdev_get_by_index(net, ifindex_lease, &dev_tracker, + GFP_KERNEL); + if (!dev_lease) { + err = -ENODEV; + goto err_put_netns; + } + if (!netdev_can_lease_queue(dev_lease, info->extack)) { + netdev_put(dev_lease, &dev_tracker); + err = -EINVAL; + goto err_put_netns; + } + + dev_lease = netdev_put_lock(dev_lease, net, &dev_tracker); + if (!dev_lease) { + err = -ENODEV; + goto err_put_netns; + } + if (queue_id_lease >= dev_lease->real_num_rx_queues) { + err = -ERANGE; + NL_SET_BAD_ATTR(info->extack, qtb[NETDEV_A_QUEUE_ID]); + goto err_unlock_dev_lease; + } + if (netdev_queue_busy(dev_lease, queue_id_lease, NETDEV_QUEUE_TYPE_RX, + info->extack)) { + err = -EBUSY; + goto err_unlock_dev_lease; + } + + rxq_lease = __netif_get_rx_queue(dev_lease, queue_id_lease); + rxq = __netif_get_rx_queue(dev, dev->real_num_rx_queues - 1); + + /* Leasing queues from different physical devices is currently + * not supported. Capabilities such as XDP features and DMA + * device may differ between physical devices, and computing + * a correct intersection for the virtual device is not yet + * implemented. + */ + if (rxq->lease && rxq->lease->dev != dev_lease) { + err = -EOPNOTSUPP; + NL_SET_ERR_MSG(info->extack, + "Leasing queues from different devices not supported"); + goto err_unlock_dev_lease; + } + + queue_id = dev->queue_mgmt_ops->ndo_queue_create(dev, info->extack); + if (queue_id < 0) { + err = queue_id; + goto err_unlock_dev_lease; + } + rxq = __netif_get_rx_queue(dev, queue_id); + + netdev_rx_queue_lease(rxq, rxq_lease); + + nla_put_u32(rsp, NETDEV_A_QUEUE_ID, queue_id); + genlmsg_end(rsp, hdr); + + netdev_unlock(dev_lease); + netdev_unlock(dev); + if (netns_lease >= 0) + put_net(net); + + return genlmsg_reply(rsp, info); + +err_unlock_dev_lease: + netdev_unlock(dev_lease); +err_put_netns: + if (netns_lease >= 0) + put_net(net); +err_unlock_dev: + netdev_unlock(dev); +err_genlmsg_free: + nlmsg_free(rsp); + return err; +} + void netdev_nl_sock_priv_init(struct netdev_nl_sock *priv) { INIT_LIST_HEAD(&priv->bindings); diff --git a/net/core/netdev_queues.c b/net/core/netdev_queues.c index 251f27a8307f..73fb28087a93 100644 --- a/net/core/netdev_queues.c +++ b/net/core/netdev_queues.c @@ -1,27 +1,118 @@ // SPDX-License-Identifier: GPL-2.0-or-later #include <net/netdev_queues.h> +#include <net/netdev_rx_queue.h> +#include <net/xdp_sock_drv.h> + +#include "dev.h" + +static struct device * +__netdev_queue_get_dma_dev(struct net_device *dev, unsigned int idx) +{ + const struct netdev_queue_mgmt_ops *queue_ops = dev->queue_mgmt_ops; + struct device *dma_dev; + + if (queue_ops && queue_ops->ndo_queue_get_dma_dev) + dma_dev = queue_ops->ndo_queue_get_dma_dev(dev, idx); + else + dma_dev = dev->dev.parent; + + return dma_dev && dma_dev->dma_mask ? dma_dev : NULL; +} /** * netdev_queue_get_dma_dev() - get dma device for zero-copy operations * @dev: net_device * @idx: queue index + * @type: queue type (RX or TX) * - * Get dma device for zero-copy operations to be used for this queue. - * When such device is not available or valid, the function will return NULL. + * Get dma device for zero-copy operations to be used for this queue. If + * the queue is an RX queue leased from a physical queue, we retrieve the + * physical queue's dma device. When the dma device is not available or + * valid, the function will return NULL. * * Return: Device or NULL on error */ -struct device *netdev_queue_get_dma_dev(struct net_device *dev, int idx) +struct device *netdev_queue_get_dma_dev(struct net_device *dev, + unsigned int idx, + enum netdev_queue_type type) { - const struct netdev_queue_mgmt_ops *queue_ops = dev->queue_mgmt_ops; + struct netdev_rx_queue *hw_rxq; struct device *dma_dev; - if (queue_ops && queue_ops->ndo_queue_get_dma_dev) - dma_dev = queue_ops->ndo_queue_get_dma_dev(dev, idx); - else - dma_dev = dev->dev.parent; + netdev_ops_assert_locked(dev); - return dma_dev && dma_dev->dma_mask ? dma_dev : NULL; + /* Only RX side supports queue leasing today. */ + if (type != NETDEV_QUEUE_TYPE_RX || !netif_rxq_is_leased(dev, idx)) + return __netdev_queue_get_dma_dev(dev, idx); + if (!netif_is_queue_leasee(dev)) + return NULL; + + hw_rxq = __netif_get_rx_queue(dev, idx)->lease; + + netdev_lock(hw_rxq->dev); + idx = get_netdev_rx_queue_index(hw_rxq); + dma_dev = __netdev_queue_get_dma_dev(hw_rxq->dev, idx); + netdev_unlock(hw_rxq->dev); + + return dma_dev; } +bool netdev_can_create_queue(const struct net_device *dev, + struct netlink_ext_ack *extack) +{ + if (dev->dev.parent) { + NL_SET_ERR_MSG(extack, "Device is not a virtual device"); + return false; + } + if (!dev->queue_mgmt_ops || + !dev->queue_mgmt_ops->ndo_queue_create) { + NL_SET_ERR_MSG(extack, "Device does not support queue creation"); + return false; + } + if (dev->real_num_rx_queues < 1 || + dev->real_num_tx_queues < 1) { + NL_SET_ERR_MSG(extack, "Device must have at least one real queue"); + return false; + } + return true; +} + +bool netdev_can_lease_queue(const struct net_device *dev, + struct netlink_ext_ack *extack) +{ + if (!dev->dev.parent) { + NL_SET_ERR_MSG(extack, "Lease device is a virtual device"); + return false; + } + if (!netif_device_present(dev)) { + NL_SET_ERR_MSG(extack, "Lease device has been removed from the system"); + return false; + } + if (!dev->queue_mgmt_ops) { + NL_SET_ERR_MSG(extack, "Lease device does not support queue management operations"); + return false; + } + return true; +} + +bool netdev_queue_busy(struct net_device *dev, unsigned int idx, + enum netdev_queue_type type, + struct netlink_ext_ack *extack) +{ + if (xsk_get_pool_from_qid(dev, idx)) { + NL_SET_ERR_MSG(extack, "Device queue in use by AF_XDP"); + return true; + } + if (type == NETDEV_QUEUE_TYPE_TX) + return false; + if (netif_rxq_is_leased(dev, idx)) { + NL_SET_ERR_MSG(extack, "Device queue in use due to queue leasing"); + return true; + } + if (netif_rxq_has_mp(dev, idx)) { + NL_SET_ERR_MSG(extack, "Device queue in use by memory provider"); + return true; + } + return false; +} diff --git a/net/core/netdev_rx_queue.c b/net/core/netdev_rx_queue.c index 05fd2875d725..de4dac4c88b3 100644 --- a/net/core/netdev_rx_queue.c +++ b/net/core/netdev_rx_queue.c @@ -10,15 +10,91 @@ #include "dev.h" #include "page_pool_priv.h" -/* See also page_pool_is_unreadable() */ -bool netif_rxq_has_unreadable_mp(struct net_device *dev, int idx) +void netdev_rx_queue_lease(struct netdev_rx_queue *rxq_dst, + struct netdev_rx_queue *rxq_src) +{ + netdev_assert_locked(rxq_src->dev); + netdev_assert_locked(rxq_dst->dev); + + netdev_hold(rxq_src->dev, &rxq_src->lease_tracker, GFP_KERNEL); + + WRITE_ONCE(rxq_src->lease, rxq_dst); + WRITE_ONCE(rxq_dst->lease, rxq_src); +} + +void netdev_rx_queue_unlease(struct netdev_rx_queue *rxq_dst, + struct netdev_rx_queue *rxq_src) +{ + netdev_assert_locked(rxq_dst->dev); + netdev_assert_locked(rxq_src->dev); + + netif_rxq_cleanup_unlease(rxq_src, rxq_dst); + + WRITE_ONCE(rxq_src->lease, NULL); + WRITE_ONCE(rxq_dst->lease, NULL); + + netdev_put(rxq_src->dev, &rxq_src->lease_tracker); +} + +bool netif_rxq_is_leased(struct net_device *dev, unsigned int rxq_idx) +{ + if (rxq_idx < dev->real_num_rx_queues) + return READ_ONCE(__netif_get_rx_queue(dev, rxq_idx)->lease); + return false; +} + +/* Virtual devices eligible for leasing have no dev->dev.parent, while + * physical devices always have one. Use this to enforce the correct + * lease traversal direction. + */ +static bool netif_lease_dir_ok(const struct net_device *dev, + enum netif_lease_dir dir) +{ + if (dir == NETIF_VIRT_TO_PHYS && !dev->dev.parent) + return true; + if (dir == NETIF_PHYS_TO_VIRT && dev->dev.parent) + return true; + return false; +} + +bool netif_is_queue_leasee(const struct net_device *dev) { - struct netdev_rx_queue *rxq = __netif_get_rx_queue(dev, idx); + return netif_lease_dir_ok(dev, NETIF_VIRT_TO_PHYS); +} - return !!rxq->mp_params.mp_ops; +struct netdev_rx_queue * +__netif_get_rx_queue_lease(struct net_device **dev, unsigned int *rxq_idx, + enum netif_lease_dir dir) +{ + struct net_device *orig_dev = *dev; + struct netdev_rx_queue *rxq = __netif_get_rx_queue(orig_dev, *rxq_idx); + + if (rxq->lease) { + if (!netif_lease_dir_ok(orig_dev, dir)) + return NULL; + rxq = rxq->lease; + *rxq_idx = get_netdev_rx_queue_index(rxq); + *dev = rxq->dev; + } + return rxq; +} + +/* See also page_pool_is_unreadable() */ +bool netif_rxq_has_unreadable_mp(struct net_device *dev, unsigned int rxq_idx) +{ + if (rxq_idx < dev->real_num_rx_queues) + return __netif_get_rx_queue(dev, rxq_idx)->mp_params.mp_ops; + return false; } EXPORT_SYMBOL(netif_rxq_has_unreadable_mp); +bool netif_rxq_has_mp(struct net_device *dev, unsigned int rxq_idx) +{ + if (rxq_idx < dev->real_num_rx_queues) + return __netif_get_rx_queue(dev, rxq_idx)->mp_params.mp_priv; + return false; +} + static int netdev_rx_queue_reconfig(struct net_device *dev, unsigned int rxq_idx, struct netdev_queue_config *qcfg_old, @@ -108,9 +184,9 @@ int netdev_rx_queue_restart(struct net_device *dev, unsigned int rxq_idx) } EXPORT_SYMBOL_NS_GPL(netdev_rx_queue_restart, "NETDEV_INTERNAL"); -int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, - const struct pp_memory_provider_params *p, - struct netlink_ext_ack *extack) +static int __netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *p, + struct netlink_ext_ack *extack) { const struct netdev_queue_mgmt_ops *qops = dev->queue_mgmt_ops; struct netdev_queue_config qcfg[2]; @@ -120,12 +196,6 @@ int __net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, if (!qops) return -EOPNOTSUPP; - if (rxq_idx >= dev->real_num_rx_queues) { - NL_SET_ERR_MSG(extack, "rx queue index out of range"); - return -ERANGE; - } - rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues); - if (dev->cfg->hds_config != ETHTOOL_TCP_DATA_SPLIT_ENABLED) { NL_SET_ERR_MSG(extack, "tcp-data-split is disabled"); return -EINVAL; @@ -172,28 +242,47 @@ err_clear_mp: return ret; } -int net_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, - struct pp_memory_provider_params *p) +int netif_mp_open_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *p, + struct netlink_ext_ack *extack) { int ret; + if (!netdev_need_ops_lock(dev)) + return -EOPNOTSUPP; + + if (rxq_idx >= dev->real_num_rx_queues) { + NL_SET_ERR_MSG(extack, "rx queue index out of range"); + return -ERANGE; + } + rxq_idx = array_index_nospec(rxq_idx, dev->real_num_rx_queues); + + if (!netif_rxq_is_leased(dev, rxq_idx)) + return __netif_mp_open_rxq(dev, rxq_idx, p, extack); + + if (!__netif_get_rx_queue_lease(&dev, &rxq_idx, NETIF_VIRT_TO_PHYS)) { + NL_SET_ERR_MSG(extack, "rx queue leased to a virtual netdev"); + return -EBUSY; + } + if (!dev->dev.parent) { + NL_SET_ERR_MSG(extack, "rx queue belongs to a virtual netdev"); + return -EOPNOTSUPP; + } + netdev_lock(dev); - ret = __net_mp_open_rxq(dev, rxq_idx, p, NULL); + ret = __netif_mp_open_rxq(dev, rxq_idx, p, extack); netdev_unlock(dev); return ret; } -void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, - const struct pp_memory_provider_params *old_p) +static void __netif_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *old_p) { struct netdev_queue_config qcfg[2]; struct netdev_rx_queue *rxq; int err; - if (WARN_ON_ONCE(ifq_idx >= dev->real_num_rx_queues)) - return; - - rxq = __netif_get_rx_queue(dev, ifq_idx); + rxq = __netif_get_rx_queue(dev, rxq_idx); /* Callers holding a netdev ref may get here after we already * went thru shutdown via dev_memory_provider_uninstall(). @@ -206,18 +295,55 @@ void __net_mp_close_rxq(struct net_device *dev, unsigned int ifq_idx, rxq->mp_params.mp_priv != old_p->mp_priv)) return; - netdev_queue_config(dev, ifq_idx, &qcfg[0]); + netdev_queue_config(dev, rxq_idx, &qcfg[0]); memset(&rxq->mp_params, 0, sizeof(rxq->mp_params)); - netdev_queue_config(dev, ifq_idx, &qcfg[1]); + netdev_queue_config(dev, rxq_idx, &qcfg[1]); - err = netdev_rx_queue_reconfig(dev, ifq_idx, &qcfg[0], &qcfg[1]); + err = netdev_rx_queue_reconfig(dev, rxq_idx, &qcfg[0], &qcfg[1]); WARN_ON(err && err != -ENETDOWN); } -void net_mp_close_rxq(struct net_device *dev, unsigned ifq_idx, - struct pp_memory_provider_params *old_p) +void netif_mp_close_rxq(struct net_device *dev, unsigned int rxq_idx, + const struct pp_memory_provider_params *old_p) { + if (WARN_ON_ONCE(rxq_idx >= dev->real_num_rx_queues)) + return; + if (!netif_rxq_is_leased(dev, rxq_idx)) + return __netif_mp_close_rxq(dev, rxq_idx, old_p); + + if (!__netif_get_rx_queue_lease(&dev, &rxq_idx, NETIF_VIRT_TO_PHYS)) { + WARN_ON_ONCE(1); + return; + } netdev_lock(dev); - __net_mp_close_rxq(dev, ifq_idx, old_p); + __netif_mp_close_rxq(dev, rxq_idx, old_p); netdev_unlock(dev); } + +void __netif_mp_uninstall_rxq(struct netdev_rx_queue *rxq, + const struct pp_memory_provider_params *p) +{ + if (p->mp_ops && p->mp_ops->uninstall) + p->mp_ops->uninstall(p->mp_priv, rxq); +} + +/* Clean up memory provider state when a queue lease is torn down. If + * a memory provider was installed on the physical queue via the lease, + * close it now. The memory provider is a property of the queue itself, + * and it was _guaranteed_ to be installed on the physical queue via + * the lease redirection. The extra __netif_mp_close_rxq is needed + * since the physical queue can outlive the virtual queue in the lease + * case, so it needs to be reconfigured to clear the memory provider. + */ +void netif_rxq_cleanup_unlease(struct netdev_rx_queue *phys_rxq, + struct netdev_rx_queue *virt_rxq) +{ + struct pp_memory_provider_params *p = &phys_rxq->mp_params; + unsigned int rxq_idx = get_netdev_rx_queue_index(phys_rxq); + + if (!p->mp_ops) + return; + + __netif_mp_uninstall_rxq(virt_rxq, p); + __netif_mp_close_rxq(phys_rxq->dev, rxq_idx, p); +} diff --git a/net/core/scm.c b/net/core/scm.c index a29aa8fb8065..eec13f50ecaf 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -318,8 +318,10 @@ void put_cmsg_scm_timestamping64(struct msghdr *msg, struct scm_timestamping_int int i; for (i = 0; i < ARRAY_SIZE(tss.ts); i++) { - tss.ts[i].tv_sec = tss_internal->ts[i].tv_sec; - tss.ts[i].tv_nsec = tss_internal->ts[i].tv_nsec; + struct timespec64 tv = ktime_to_timespec64(tss_internal->ts[i]); + + tss.ts[i].tv_sec = tv.tv_sec; + tss.ts[i].tv_nsec = tv.tv_nsec; } put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_NEW, sizeof(tss), &tss); @@ -332,8 +334,10 @@ void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_inter int i; for (i = 0; i < ARRAY_SIZE(tss.ts); i++) { - tss.ts[i].tv_sec = tss_internal->ts[i].tv_sec; - tss.ts[i].tv_nsec = tss_internal->ts[i].tv_nsec; + struct timespec64 tv = ktime_to_timespec64(tss_internal->ts[i]); + + tss.ts[i].tv_sec = tv.tv_sec; + tss.ts[i].tv_nsec = tv.tv_nsec; } put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPING_OLD, sizeof(tss), &tss); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 43ee86dcf2ea..7dad68e3b518 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -94,6 +94,7 @@ #include "dev.h" #include "devmem.h" +#include "net-sysfs.h" #include "netmem_priv.h" #include "sock_destructor.h" @@ -105,10 +106,9 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init; #define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(max(MAX_TCP_HEADER, \ GRO_MAX_HEAD_PAD)) -/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two. - * This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique - * size, and we can differentiate heads from skb_small_head_cache - * vs system slabs by looking at their size (skb_end_offset()). +/* SKB_SMALL_HEAD_CACHE_SIZE is the size used for the skbuff_small_head + * kmem_cache. The non-power-of-2 padding is kept for historical reasons and + * to avoid potential collisions with generic kmalloc bucket sizes. */ #define SKB_SMALL_HEAD_CACHE_SIZE \ (is_power_of_2(SKB_SMALL_HEAD_SIZE) ? \ @@ -891,17 +891,6 @@ skb_fail: } EXPORT_SYMBOL(napi_alloc_skb); -void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, - int off, int size, unsigned int truesize) -{ - DEBUG_NET_WARN_ON_ONCE(size > truesize); - - skb_fill_netmem_desc(skb, i, netmem, off, size); - skb->len += size; - skb->data_len += size; - skb->truesize += truesize; -} -EXPORT_SYMBOL(skb_add_rx_frag_netmem); void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, unsigned int truesize) @@ -1081,7 +1070,7 @@ static int skb_pp_frag_ref(struct sk_buff *skb) return 0; } -static void skb_kfree_head(void *head, unsigned int end_offset) +static void skb_kfree_head(void *head) { kfree(head); } @@ -1095,7 +1084,7 @@ static void skb_free_head(struct sk_buff *skb) return; skb_free_frag(head); } else { - skb_kfree_head(head, skb_end_offset(skb)); + skb_kfree_head(head); } } @@ -1527,7 +1516,8 @@ void napi_consume_skb(struct sk_buff *skb, int budget) DEBUG_NET_WARN_ON_ONCE(!in_softirq()); - if (skb->alloc_cpu != smp_processor_id() && !skb_shared(skb)) { + if (!static_branch_unlikely(&skb_defer_disable_key) && + skb->alloc_cpu != smp_processor_id() && !skb_shared(skb)) { skb_release_head_state(skb); return skb_attempt_defer_free(skb); } @@ -2370,7 +2360,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, return 0; nofrags: - skb_kfree_head(data, size); + skb_kfree_head(data); nodata: return -ENOMEM; } @@ -2416,20 +2406,6 @@ int __skb_unclone_keeptruesize(struct sk_buff *skb, gfp_t pri) if (likely(skb_end_offset(skb) == saved_end_offset)) return 0; - /* We can not change skb->end if the original or new value - * is SKB_SMALL_HEAD_HEADROOM, as it might break skb_kfree_head(). - */ - if (saved_end_offset == SKB_SMALL_HEAD_HEADROOM || - skb_end_offset(skb) == SKB_SMALL_HEAD_HEADROOM) { - /* We think this path should not be taken. - * Add a temporary trace to warn us just in case. - */ - pr_err_once("__skb_unclone_keeptruesize() skb_end_offset() %u -> %u\n", - saved_end_offset, skb_end_offset(skb)); - WARN_ON_ONCE(1); - return 0; - } - shinfo = skb_shinfo(skb); /* We are about to change back skb->end, @@ -5142,7 +5118,7 @@ static const u8 skb_ext_type_len[] = { #endif }; -static __always_inline unsigned int skb_ext_total_length(void) +static __always_inline __no_profile unsigned int skb_ext_total_length(void) { unsigned int l = SKB_EXT_CHUNKSIZEOF(struct skb_ext); int i; @@ -5153,12 +5129,10 @@ static __always_inline unsigned int skb_ext_total_length(void) return l; } -static void skb_extensions_init(void) +static noinline void __init __no_profile skb_extensions_init(void) { BUILD_BUG_ON(SKB_EXT_NUM > 8); -#if !IS_ENABLED(CONFIG_KCOV_INSTRUMENT_ALL) BUILD_BUG_ON(skb_ext_total_length() > 255); -#endif skbuff_ext_cache = kmem_cache_create("skbuff_ext_cache", SKB_EXT_ALIGN_VALUE * skb_ext_total_length(), @@ -6824,7 +6798,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off, if (skb_cloned(skb)) { /* drop the old head gracefully */ if (skb_orphan_frags(skb, gfp_mask)) { - skb_kfree_head(data, size); + skb_kfree_head(data); return -ENOMEM; } for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) @@ -6931,7 +6905,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, memcpy((struct skb_shared_info *)(data + size), skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0])); if (skb_orphan_frags(skb, gfp_mask)) { - skb_kfree_head(data, size); + skb_kfree_head(data); return -ENOMEM; } shinfo = (struct skb_shared_info *)(data + size); @@ -6967,7 +6941,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off, /* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */ if (skb_has_frag_list(skb)) kfree_skb_list(skb_shinfo(skb)->frag_list); - skb_kfree_head(data, size); + skb_kfree_head(data); return -ENOMEM; } skb_release_data(skb, SKB_CONSUMED); @@ -7264,6 +7238,8 @@ static void kfree_skb_napi_cache(struct sk_buff *skb) local_bh_enable(); } +DEFINE_STATIC_KEY_FALSE(skb_defer_disable_key); + /** * skb_attempt_defer_free - queue skb for remote freeing * @skb: buffer @@ -7280,6 +7256,9 @@ void skb_attempt_defer_free(struct sk_buff *skb) bool kick; int cpu; + if (static_branch_unlikely(&skb_defer_disable_key)) + goto nodefer; + /* zero copy notifications should not be delayed. */ if (skb_zcopy(skb)) goto nodefer; diff --git a/net/core/sock.c b/net/core/sock.c index 5976100a9d55..b37b664b6eb9 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -520,43 +520,36 @@ int __sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(__sock_queue_rcv_skb); -int sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb, - enum skb_drop_reason *reason) +enum skb_drop_reason +sock_queue_rcv_skb_reason(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason drop_reason; int err; - err = sk_filter_reason(sk, skb, &drop_reason); - if (err) - goto out; + drop_reason = sk_filter_reason(sk, skb); + if (drop_reason) + return drop_reason; err = __sock_queue_rcv_skb(sk, skb); switch (err) { case -ENOMEM: - drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF; - break; + return SKB_DROP_REASON_SOCKET_RCVBUFF; case -ENOBUFS: - drop_reason = SKB_DROP_REASON_PROTO_MEM; - break; - default: - drop_reason = SKB_NOT_DROPPED_YET; - break; + return SKB_DROP_REASON_PROTO_MEM; } -out: - if (reason) - *reason = drop_reason; - return err; + return SKB_NOT_DROPPED_YET; } EXPORT_SYMBOL(sock_queue_rcv_skb_reason); int __sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested, unsigned int trim_cap, bool refcounted) { - enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; + enum skb_drop_reason reason; int rc = NET_RX_SUCCESS; int err; - if (sk_filter_trim_cap(sk, skb, trim_cap, &reason)) + reason = sk_filter_trim_cap(sk, skb, trim_cap); + if (reason) goto discard_and_relse; skb->dev = NULL; @@ -973,6 +966,8 @@ EXPORT_SYMBOL(sock_set_keepalive); static void __sock_set_rcvbuf(struct sock *sk, int val) { + struct socket *sock = sk->sk_socket; + /* Ensure val * 2 fits into an int, to prevent max_t() from treating it * as a negative value. */ @@ -990,6 +985,13 @@ static void __sock_set_rcvbuf(struct sock *sk, int val) * we actually used in getsockopt is the most desirable behavior. */ WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF)); + + if (sock) { + const struct proto_ops *ops = READ_ONCE(sock->ops); + + if (ops->set_rcvbuf) + ops->set_rcvbuf(sk, sk->sk_rcvbuf); + } } void sock_set_rcvbuf(struct sock *sk, int val) @@ -2583,6 +2585,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority, sk_set_socket(newsk, NULL); sk_tx_queue_clear(newsk); + sk_rx_queue_clear(newsk); RCU_INIT_POINTER(newsk->sk_wq, NULL); if (newsk->sk_prot->sockets_allocated) @@ -3175,7 +3178,7 @@ bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) } EXPORT_SYMBOL(sk_page_frag_refill); -void __lock_sock(struct sock *sk) +static void __lock_sock(struct sock *sk) __releases(&sk->sk_lock.slock) __acquires(&sk->sk_lock.slock) { @@ -3774,14 +3777,30 @@ void sock_init_data(struct socket *sock, struct sock *sk) } EXPORT_SYMBOL(sock_init_data); -void lock_sock_nested(struct sock *sk, int subclass) +void noinline lock_sock_nested(struct sock *sk, int subclass) { /* The sk_lock has mutex_lock() semantics here. */ mutex_acquire(&sk->sk_lock.dep_map, subclass, 0, _RET_IP_); might_sleep(); +#ifdef CONFIG_64BIT + if (sizeof(struct slock_owned) == sizeof(long)) { + socket_lock_t tmp = { + .slock = __SPIN_LOCK_UNLOCKED(tmp.slock), + .owned = 1, + }; + socket_lock_t old = { + .slock = __SPIN_LOCK_UNLOCKED(old.slock), + .owned = 0, + }; + + if (likely(try_cmpxchg(&sk->sk_lock.combined, + &old.combined, tmp.combined))) + return; + } +#endif spin_lock_bh(&sk->sk_lock.slock); - if (sock_owned_by_user_nocheck(sk)) + if (unlikely(sock_owned_by_user_nocheck(sk))) __lock_sock(sk); sk->sk_lock.owned = 1; spin_unlock_bh(&sk->sk_lock.slock); @@ -3791,16 +3810,18 @@ EXPORT_SYMBOL(lock_sock_nested); void release_sock(struct sock *sk) { spin_lock_bh(&sk->sk_lock.slock); - if (sk->sk_backlog.tail) - __release_sock(sk); - if (sk->sk_prot->release_cb) - INDIRECT_CALL_INET_1(sk->sk_prot->release_cb, - tcp_release_cb, sk); + if (unlikely(sk->sk_backlog.tail)) + __release_sock(sk); + if (sk->sk_prot->release_cb) { + if (!tcp_release_cb_cond(sk)) + sk->sk_prot->release_cb(sk); + } sock_release_ownership(sk); - if (waitqueue_active(&sk->sk_lock.wq)) + if (unlikely(waitqueue_active(&sk->sk_lock.wq))) wake_up(&sk->sk_lock.wq); + spin_unlock_bh(&sk->sk_lock.slock); } EXPORT_SYMBOL(release_sock); @@ -3810,7 +3831,7 @@ bool __lock_sock_fast(struct sock *sk) __acquires(&sk->sk_lock.slock) might_sleep(); spin_lock_bh(&sk->sk_lock.slock); - if (!sock_owned_by_user_nocheck(sk)) { + if (likely(!sock_owned_by_user_nocheck(sk))) { /* * Fast path return with bottom halves disabled and * sock::sk_lock.slock held. @@ -3951,13 +3972,8 @@ int sock_common_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; - int addr_len = 0; - int err; - err = sk->sk_prot->recvmsg(sk, msg, size, flags, &addr_len); - if (err >= 0) - msg->msg_namelen = addr_len; - return err; + return sk->sk_prot->recvmsg(sk, msg, size, flags); } EXPORT_SYMBOL(sock_common_recvmsg); diff --git a/net/core/sock_diag.c b/net/core/sock_diag.c index c83335c62360..f67accd60675 100644 --- a/net/core/sock_diag.c +++ b/net/core/sock_diag.c @@ -1,5 +1,4 @@ -/* License: GPL */ - +// SPDX-License-Identifier: GPL-2.0 #include <linux/filter.h> #include <linux/mutex.h> #include <linux/socket.h> diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 03aea10073f0..b508618bfc12 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -138,68 +138,76 @@ done: static int rps_sock_flow_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { + struct rps_sock_flow_table *o_sock_table, *sock_table; + static DEFINE_MUTEX(sock_flow_mutex); + rps_tag_ptr o_tag_ptr, tag_ptr; unsigned int orig_size, size; - int ret, i; struct ctl_table tmp = { .data = &size, .maxlen = sizeof(size), .mode = table->mode }; - struct rps_sock_flow_table *orig_sock_table, *sock_table; - static DEFINE_MUTEX(sock_flow_mutex); + void *tofree = NULL; + int ret, i; + u8 log; mutex_lock(&sock_flow_mutex); - orig_sock_table = rcu_dereference_protected( - net_hotdata.rps_sock_flow_table, - lockdep_is_held(&sock_flow_mutex)); - size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0; + o_tag_ptr = tag_ptr = net_hotdata.rps_sock_flow_table; + + size = o_tag_ptr ? rps_tag_to_mask(o_tag_ptr) + 1 : 0; + o_sock_table = rps_tag_to_table(o_tag_ptr); + orig_size = size; ret = proc_dointvec(&tmp, write, buffer, lenp, ppos); - if (write) { - if (size) { - if (size > 1<<29) { - /* Enforce limit to prevent overflow */ + if (!write) + goto unlock; + + if (size) { + if (size > 1<<29) { + /* Enforce limit to prevent overflow */ + mutex_unlock(&sock_flow_mutex); + return -EINVAL; + } + sock_table = o_sock_table; + size = roundup_pow_of_two(size); + if (size != orig_size) { + sock_table = vmalloc_huge(size * sizeof(*sock_table), + GFP_KERNEL); + if (!sock_table) { mutex_unlock(&sock_flow_mutex); - return -EINVAL; - } - size = roundup_pow_of_two(size); - if (size != orig_size) { - sock_table = - vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size)); - if (!sock_table) { - mutex_unlock(&sock_flow_mutex); - return -ENOMEM; - } - net_hotdata.rps_cpu_mask = - roundup_pow_of_two(nr_cpu_ids) - 1; - sock_table->mask = size - 1; - } else - sock_table = orig_sock_table; - - for (i = 0; i < size; i++) - sock_table->ents[i] = RPS_NO_CPU; - } else - sock_table = NULL; - - if (sock_table != orig_sock_table) { - rcu_assign_pointer(net_hotdata.rps_sock_flow_table, - sock_table); - if (sock_table) { - static_branch_inc(&rps_needed); - static_branch_inc(&rfs_needed); - } - if (orig_sock_table) { - static_branch_dec(&rps_needed); - static_branch_dec(&rfs_needed); - kvfree_rcu(orig_sock_table, rcu); + return -ENOMEM; } + net_hotdata.rps_cpu_mask = + roundup_pow_of_two(nr_cpu_ids) - 1; + log = ilog2(size); + tag_ptr = (rps_tag_ptr)sock_table | log; + } + + for (i = 0; i < size; i++) + sock_table[i].ent = RPS_NO_CPU; + } else { + sock_table = NULL; + tag_ptr = 0UL; + } + if (tag_ptr != o_tag_ptr) { + smp_store_release(&net_hotdata.rps_sock_flow_table, tag_ptr); + if (sock_table) { + static_branch_inc(&rps_needed); + static_branch_inc(&rfs_needed); + } + if (o_sock_table) { + static_branch_dec(&rps_needed); + static_branch_dec(&rfs_needed); + tofree = o_sock_table; } } +unlock: mutex_unlock(&sock_flow_mutex); + kvfree_rcu_mightsleep(tofree); return ret; } #endif /* CONFIG_RPS */ @@ -341,6 +349,29 @@ static int proc_do_rss_key(const struct ctl_table *table, int write, return proc_dostring(&fake_table, write, buffer, lenp, ppos); } +static int proc_do_skb_defer_max(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + static DEFINE_MUTEX(skb_defer_max_mutex); + int ret, oval, nval; + + mutex_lock(&skb_defer_max_mutex); + + oval = !net_hotdata.sysctl_skb_defer_max; + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + nval = !net_hotdata.sysctl_skb_defer_max; + + if (nval != oval) { + if (nval) + static_branch_enable(&skb_defer_disable_key); + else + static_branch_disable(&skb_defer_disable_key); + } + + mutex_unlock(&skb_defer_max_mutex); + return ret; +} + #ifdef CONFIG_BPF_JIT static int proc_dointvec_minmax_bpf_enable(const struct ctl_table *table, int write, void *buffer, size_t *lenp, @@ -642,7 +673,7 @@ static struct ctl_table net_core_table[] = { .data = &net_hotdata.sysctl_skb_defer_max, .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_do_skb_defer_max, .extra1 = SYSCTL_ZERO, }, }; diff --git a/net/core/tso.c b/net/core/tso.c index 6df997b9076e..347b3856ddb9 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -3,6 +3,7 @@ #include <linux/if_vlan.h> #include <net/ip.h> #include <net/tso.h> +#include <linux/dma-mapping.h> #include <linux/unaligned.h> void tso_build_hdr(const struct sk_buff *skb, char *hdr, struct tso_t *tso, @@ -87,3 +88,271 @@ int tso_start(struct sk_buff *skb, struct tso_t *tso) return hdr_len; } EXPORT_SYMBOL(tso_start); + +static int tso_dma_iova_try(struct device *dev, struct tso_dma_map *map, + phys_addr_t phys, size_t linear_len, + size_t total_len, size_t *offset) +{ + const struct sk_buff *skb; + unsigned int nr_frags; + int i; + + if (!dma_iova_try_alloc(dev, &map->iova_state, phys, total_len)) + return 1; + + skb = map->skb; + nr_frags = skb_shinfo(skb)->nr_frags; + + if (linear_len) { + if (dma_iova_link(dev, &map->iova_state, + phys, *offset, linear_len, + DMA_TO_DEVICE, 0)) + goto iova_fail; + map->linear_len = linear_len; + *offset += linear_len; + } + + for (i = 0; i < nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + unsigned int frag_len = skb_frag_size(frag); + + if (dma_iova_link(dev, &map->iova_state, + skb_frag_phys(frag), *offset, + frag_len, DMA_TO_DEVICE, 0)) { + map->nr_frags = i; + goto iova_fail; + } + map->frags[i].len = frag_len; + *offset += frag_len; + map->nr_frags = i + 1; + } + + if (dma_iova_sync(dev, &map->iova_state, 0, total_len)) + goto iova_fail; + + return 0; + +iova_fail: + dma_iova_destroy(dev, &map->iova_state, *offset, + DMA_TO_DEVICE, 0); + memset(&map->iova_state, 0, sizeof(map->iova_state)); + + /* reset map state */ + map->frag_idx = -1; + map->offset = 0; + map->linear_len = 0; + map->nr_frags = 0; + + return 1; +} + +/** + * tso_dma_map_init - DMA-map GSO payload regions + * @map: map struct to initialize + * @dev: device for DMA mapping + * @skb: the GSO skb + * @hdr_len: per-segment header length in bytes + * + * DMA-maps the linear payload (after headers) and all frags. + * Prefers the DMA IOVA API (one contiguous mapping, one IOTLB sync); + * falls back to per-region dma_map_phys() when IOVA is not available. + * Positions the iterator at byte 0 of the payload. + * + * Return: 0 on success, -ENOMEM on DMA mapping failure (partial mappings + * are cleaned up internally). + */ +int tso_dma_map_init(struct tso_dma_map *map, struct device *dev, + const struct sk_buff *skb, unsigned int hdr_len) +{ + unsigned int linear_len = skb_headlen(skb) - hdr_len; + unsigned int nr_frags = skb_shinfo(skb)->nr_frags; + size_t total_len = skb->len - hdr_len; + size_t offset = 0; + phys_addr_t phys; + int i; + + map->dev = dev; + map->skb = skb; + map->hdr_len = hdr_len; + map->frag_idx = -1; + map->offset = 0; + map->iova_offset = 0; + map->total_len = total_len; + map->linear_len = 0; + map->nr_frags = 0; + memset(&map->iova_state, 0, sizeof(map->iova_state)); + + if (!total_len) + return 0; + + if (linear_len) + phys = virt_to_phys(skb->data + hdr_len); + else + phys = skb_frag_phys(&skb_shinfo(skb)->frags[0]); + + if (tso_dma_iova_try(dev, map, phys, linear_len, total_len, &offset)) { + /* IOVA path failed, map state was reset. Fallback to + * per-region dma_map_phys() + */ + if (linear_len) { + map->linear_dma = dma_map_phys(dev, phys, linear_len, + DMA_TO_DEVICE, 0); + if (dma_mapping_error(dev, map->linear_dma)) + return -ENOMEM; + map->linear_len = linear_len; + } + + for (i = 0; i < nr_frags; i++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + unsigned int frag_len = skb_frag_size(frag); + + map->frags[i].len = frag_len; + map->frags[i].dma = dma_map_phys(dev, skb_frag_phys(frag), + frag_len, DMA_TO_DEVICE, 0); + if (dma_mapping_error(dev, map->frags[i].dma)) { + tso_dma_map_cleanup(map); + return -ENOMEM; + } + map->nr_frags = i + 1; + } + } + + if (linear_len == 0 && nr_frags > 0) + map->frag_idx = 0; + + return 0; +} +EXPORT_SYMBOL(tso_dma_map_init); + +/** + * tso_dma_map_cleanup - unmap all DMA regions in a tso_dma_map + * @map: the map to clean up + * + * Handles both IOVA and fallback paths. For IOVA, calls + * dma_iova_destroy(). For fallback, unmaps each region individually. + */ +void tso_dma_map_cleanup(struct tso_dma_map *map) +{ + int i; + + if (dma_use_iova(&map->iova_state)) { + dma_iova_destroy(map->dev, &map->iova_state, map->total_len, + DMA_TO_DEVICE, 0); + memset(&map->iova_state, 0, sizeof(map->iova_state)); + } else { + if (map->linear_len) + dma_unmap_phys(map->dev, map->linear_dma, + map->linear_len, DMA_TO_DEVICE, 0); + + for (i = 0; i < map->nr_frags; i++) + dma_unmap_phys(map->dev, map->frags[i].dma, + map->frags[i].len, DMA_TO_DEVICE, 0); + } + + map->linear_len = 0; + map->nr_frags = 0; +} +EXPORT_SYMBOL(tso_dma_map_cleanup); + +/** + * tso_dma_map_count - count descriptors for a payload range + * @map: the payload map + * @len: number of payload bytes in this segment + * + * Counts how many contiguous DMA region chunks the next @len bytes + * will span, without advancing the iterator. On the IOVA path this + * is always 1 (contiguous). On the fallback path, uses region sizes + * from the current position. + * + * Return: the number of descriptors needed for @len bytes of payload. + */ +unsigned int tso_dma_map_count(struct tso_dma_map *map, unsigned int len) +{ + unsigned int offset = map->offset; + int idx = map->frag_idx; + unsigned int count = 0; + + if (!len) + return 0; + + if (dma_use_iova(&map->iova_state)) + return 1; + + while (len > 0) { + unsigned int region_len, chunk; + + if (idx == -1) + region_len = map->linear_len; + else + region_len = map->frags[idx].len; + + chunk = min(len, region_len - offset); + len -= chunk; + count++; + offset = 0; + idx++; + } + + return count; +} +EXPORT_SYMBOL(tso_dma_map_count); + +/** + * tso_dma_map_next - yield the next DMA address range + * @map: the payload map + * @addr: output DMA address + * @chunk_len: output chunk length + * @mapping_len: full DMA mapping length when this chunk starts a new + * mapping region, or 0 when continuing a previous one. + * On the IOVA path this is always 0 (driver must not + * do per-region unmaps; use tso_dma_map_cleanup instead). + * @seg_remaining: bytes left in current segment + * + * Yields the next (dma_addr, chunk_len) pair and advances the iterator. + * On the IOVA path, the entire payload is contiguous so each segment + * is always a single chunk. + * + * Return: true if a chunk was yielded, false when @seg_remaining is 0. + */ +bool tso_dma_map_next(struct tso_dma_map *map, dma_addr_t *addr, + unsigned int *chunk_len, unsigned int *mapping_len, + unsigned int seg_remaining) +{ + unsigned int region_len, chunk; + + if (!seg_remaining) + return false; + + /* IOVA path: contiguous DMA range, no region boundaries */ + if (dma_use_iova(&map->iova_state)) { + *addr = map->iova_state.addr + map->iova_offset; + *chunk_len = seg_remaining; + *mapping_len = 0; + map->iova_offset += seg_remaining; + return true; + } + + /* Fallback path: per-region iteration */ + + if (map->frag_idx == -1) { + region_len = map->linear_len; + chunk = min(seg_remaining, region_len - map->offset); + *addr = map->linear_dma + map->offset; + } else { + region_len = map->frags[map->frag_idx].len; + chunk = min(seg_remaining, region_len - map->offset); + *addr = map->frags[map->frag_idx].dma + map->offset; + } + + *mapping_len = (map->offset == 0) ? region_len : 0; + *chunk_len = chunk; + map->offset += chunk; + + if (map->offset >= region_len) { + map->frag_idx++; + map->offset = 0; + } + + return true; +} +EXPORT_SYMBOL(tso_dma_map_next); diff --git a/net/devlink/Makefile b/net/devlink/Makefile index 000da622116a..8f2adb5e5836 100644 --- a/net/devlink/Makefile +++ b/net/devlink/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 obj-y := core.o netlink.o netlink_gen.o dev.o port.o sb.o dpipe.o \ - resource.o param.o region.o health.o trap.o rate.o linecard.o + resource.o param.o region.o health.o trap.o rate.o linecard.o sh_dev.o diff --git a/net/devlink/core.c b/net/devlink/core.c index d8e509a669bf..eeb6a71f5f56 100644 --- a/net/devlink/core.c +++ b/net/devlink/core.c @@ -248,6 +248,24 @@ struct device *devlink_to_dev(const struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_to_dev); +const char *devlink_bus_name(const struct devlink *devlink) +{ + return devlink->dev ? devlink->dev->bus->name : DEVLINK_INDEX_BUS_NAME; +} +EXPORT_SYMBOL_GPL(devlink_bus_name); + +const char *devlink_dev_name(const struct devlink *devlink) +{ + return devlink->dev ? dev_name(devlink->dev) : devlink->dev_name_index; +} +EXPORT_SYMBOL_GPL(devlink_dev_name); + +const char *devlink_dev_driver_name(const struct devlink *devlink) +{ + return devlink->dev_driver->name; +} +EXPORT_SYMBOL_GPL(devlink_dev_driver_name); + struct net *devlink_net(const struct devlink *devlink) { return read_pnet(&devlink->_net); @@ -311,7 +329,10 @@ static void devlink_release(struct work_struct *work) mutex_destroy(&devlink->lock); lockdep_unregister_key(&devlink->lock_key); - put_device(devlink->dev); + if (devlink->dev) + put_device(devlink->dev); + else + kfree(devlink->dev_name_index); kvfree(devlink); } @@ -321,13 +342,15 @@ void devlink_put(struct devlink *devlink) queue_rcu_work(system_percpu_wq, &devlink->rwork); } -struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp) +static struct devlink *__devlinks_xa_find_get(struct net *net, + unsigned long *indexp, + unsigned long end) { struct devlink *devlink = NULL; rcu_read_lock(); retry: - devlink = xa_find(&devlinks, indexp, ULONG_MAX, DEVLINK_REGISTERED); + devlink = xa_find(&devlinks, indexp, end, DEVLINK_REGISTERED); if (!devlink) goto unlock; @@ -346,6 +369,16 @@ next: goto retry; } +struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp) +{ + return __devlinks_xa_find_get(net, indexp, ULONG_MAX); +} + +struct devlink *devlinks_xa_lookup_get(struct net *net, unsigned long index) +{ + return __devlinks_xa_find_get(net, &index, index); +} + /** * devl_register - Register devlink instance * @devlink: devlink @@ -394,27 +427,15 @@ void devlink_unregister(struct devlink *devlink) } EXPORT_SYMBOL_GPL(devlink_unregister); -/** - * devlink_alloc_ns - Allocate new devlink instance resources - * in specific namespace - * - * @ops: ops - * @priv_size: size of user private data - * @net: net namespace - * @dev: parent device - * - * Allocate new devlink instance resources, including devlink index - * and name. - */ -struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, - size_t priv_size, struct net *net, - struct device *dev) +struct devlink *__devlink_alloc(const struct devlink_ops *ops, size_t priv_size, + struct net *net, struct device *dev, + const struct device_driver *dev_driver) { struct devlink *devlink; static u32 last_id; int ret; - WARN_ON(!ops || !dev); + WARN_ON(!ops || !dev_driver); if (!devlink_reload_actions_valid(ops)) return NULL; @@ -427,8 +448,16 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, if (ret < 0) goto err_xa_alloc; - devlink->dev = get_device(dev); + if (dev) { + devlink->dev = get_device(dev); + } else { + devlink->dev_name_index = kasprintf(GFP_KERNEL, "%u", devlink->index); + if (!devlink->dev_name_index) + goto err_kasprintf; + } + devlink->ops = ops; + devlink->dev_driver = dev_driver; xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC); xa_init_flags(&devlink->params, XA_FLAGS_ALLOC); xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC); @@ -452,10 +481,32 @@ struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, return devlink; +err_kasprintf: + xa_erase(&devlinks, devlink->index); err_xa_alloc: kvfree(devlink); return NULL; } + +/** + * devlink_alloc_ns - Allocate new devlink instance resources + * in specific namespace + * + * @ops: ops + * @priv_size: size of user private data + * @net: net namespace + * @dev: parent device + * + * Allocate new devlink instance resources, including devlink index + * and name. + */ +struct devlink *devlink_alloc_ns(const struct devlink_ops *ops, + size_t priv_size, struct net *net, + struct device *dev) +{ + WARN_ON(!dev); + return __devlink_alloc(ops, priv_size, net, dev, dev->driver); +} EXPORT_SYMBOL_GPL(devlink_alloc_ns); /** diff --git a/net/devlink/dev.c b/net/devlink/dev.c index e3a36de4f4ae..57b2b8f03543 100644 --- a/net/devlink/dev.c +++ b/net/devlink/dev.c @@ -453,7 +453,8 @@ int devlink_reload(struct devlink *devlink, struct net *dest_net, * (e.g., PCI reset) and to close possible races between these * operations and probe/remove. */ - device_lock_assert(devlink->dev); + if (devlink->dev) + device_lock_assert(devlink->dev); memcpy(remote_reload_stats, devlink->stats.remote_reload_stats, sizeof(remote_reload_stats)); @@ -854,7 +855,7 @@ int devlink_info_version_running_put_ext(struct devlink_info_req *req, } EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext); -static int devlink_nl_driver_info_get(struct device_driver *drv, +static int devlink_nl_driver_info_get(const struct device_driver *drv, struct devlink_info_req *req) { if (!drv) @@ -872,7 +873,6 @@ devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, enum devlink_command cmd, u32 portid, u32 seq, int flags, struct netlink_ext_ack *extack) { - struct device *dev = devlink_to_dev(devlink); struct devlink_info_req req = {}; void *hdr; int err; @@ -892,7 +892,7 @@ devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink, goto err_cancel_msg; } - err = devlink_nl_driver_info_get(dev->driver, &req); + err = devlink_nl_driver_info_get(devlink->dev_driver, &req); if (err) goto err_cancel_msg; diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h index 1377864383bc..e4e48ee2da5a 100644 --- a/net/devlink/devl_internal.h +++ b/net/devlink/devl_internal.h @@ -49,6 +49,8 @@ struct devlink { struct xarray snapshot_ids; struct devlink_dev_stats stats; struct device *dev; + const char *dev_name_index; + const struct device_driver *dev_driver; possible_net_t _net; /* Serializes access to devlink instance specific objects such as * port, sb, dpipe, resource, params, region, traps and more. @@ -66,6 +68,19 @@ struct devlink { extern struct xarray devlinks; extern struct genl_family devlink_nl_family; +struct devlink *__devlink_alloc(const struct devlink_ops *ops, size_t priv_size, + struct net *net, struct device *dev, + const struct device_driver *dev_driver); + +#define devl_warn(devlink, format, args...) \ + do { \ + if ((devlink)->dev) \ + dev_warn((devlink)->dev, format, ##args); \ + else \ + pr_warn("devlink (%s): " format, \ + devlink_dev_name(devlink), ##args); \ + } while (0) + /* devlink instances are open to the access from the user space after * devlink_register() call. Such logical barrier allows us to have certain * expectations related to locking. @@ -90,6 +105,7 @@ extern struct genl_family devlink_nl_family; for (index = 0; (devlink = devlinks_xa_find_get(net, &index)); index++) struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp); +struct devlink *devlinks_xa_lookup_get(struct net *net, unsigned long index); static inline bool __devl_is_registered(struct devlink *devlink) { @@ -104,7 +120,7 @@ static inline bool devl_is_registered(struct devlink *devlink) static inline void devl_dev_lock(struct devlink *devlink, bool dev_lock) { - if (dev_lock) + if (dev_lock && devlink->dev) device_lock(devlink->dev); devl_lock(devlink); } @@ -112,7 +128,7 @@ static inline void devl_dev_lock(struct devlink *devlink, bool dev_lock) static inline void devl_dev_unlock(struct devlink *devlink, bool dev_lock) { devl_unlock(devlink); - if (dev_lock) + if (dev_lock && devlink->dev) device_unlock(devlink->dev); } @@ -148,6 +164,11 @@ struct devlink_nl_dump_state { struct { u64 dump_ts; }; + /* DEVLINK_CMD_RESOURCE_DUMP */ + struct { + u32 index; + bool index_valid; + } port_ctx; }; }; @@ -174,9 +195,11 @@ devlink_dump_state(struct netlink_callback *cb) static inline int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink) { - if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name)) + if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink_bus_name(devlink))) + return -EMSGSIZE; + if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, devlink_dev_name(devlink))) return -EMSGSIZE; - if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev))) + if (nla_put_uint(msg, DEVLINK_ATTR_INDEX, devlink->index)) return -EMSGSIZE; return 0; } @@ -202,6 +225,8 @@ struct devlink_obj_desc { const char *dev_name; unsigned int port_index; bool port_index_valid; + unsigned int devlink_index; + bool devlink_index_valid; long data[]; }; @@ -209,8 +234,10 @@ static inline void devlink_nl_obj_desc_init(struct devlink_obj_desc *desc, struct devlink *devlink) { memset(desc, 0, sizeof(*desc)); - desc->bus_name = devlink->dev->bus->name; - desc->dev_name = dev_name(devlink->dev); + desc->bus_name = devlink_bus_name(devlink); + desc->dev_name = devlink_dev_name(devlink); + desc->devlink_index = devlink->index; + desc->devlink_index_valid = true; } static inline void devlink_nl_obj_desc_port_set(struct devlink_obj_desc *desc, diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c index 593605c1b1ef..ae4afc739678 100644 --- a/net/devlink/netlink.c +++ b/net/devlink/netlink.c @@ -73,13 +73,19 @@ int devlink_nl_notify_filter_set_doit(struct sk_buff *skb, flt->dev_name = pos; } + if (attrs[DEVLINK_ATTR_INDEX]) { + flt->devlink_index = nla_get_uint(attrs[DEVLINK_ATTR_INDEX]); + flt->devlink_index_valid = true; + } + if (attrs[DEVLINK_ATTR_PORT_INDEX]) { flt->port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]); flt->port_index_valid = true; } /* Don't attach empty filter. */ - if (!flt->bus_name && !flt->dev_name && !flt->port_index_valid) { + if (!flt->bus_name && !flt->dev_name && + !flt->devlink_index_valid && !flt->port_index_valid) { kfree(flt); flt = NULL; } @@ -100,6 +106,9 @@ int devlink_nl_notify_filter_set_doit(struct sk_buff *skb, static bool devlink_obj_desc_match(const struct devlink_obj_desc *desc, const struct devlink_obj_desc *flt) { + if (desc->devlink_index_valid && flt->devlink_index_valid && + desc->devlink_index != flt->devlink_index) + return false; if (desc->bus_name && flt->bus_name && strcmp(desc->bus_name, flt->bus_name)) return false; @@ -186,24 +195,48 @@ devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs, char *busname; char *devname; + if (attrs[DEVLINK_ATTR_INDEX]) { + if (attrs[DEVLINK_ATTR_BUS_NAME] || + attrs[DEVLINK_ATTR_DEV_NAME]) + return ERR_PTR(-EINVAL); + index = nla_get_u32(attrs[DEVLINK_ATTR_INDEX]); + devlink = devlinks_xa_lookup_get(net, index); + if (!devlink) + return ERR_PTR(-ENODEV); + goto found; + } + if (!attrs[DEVLINK_ATTR_BUS_NAME] || !attrs[DEVLINK_ATTR_DEV_NAME]) return ERR_PTR(-EINVAL); busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]); devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]); + if (!strcmp(busname, DEVLINK_INDEX_BUS_NAME)) { + if (kstrtoul(devname, 10, &index)) + return ERR_PTR(-ENODEV); + devlink = devlinks_xa_lookup_get(net, index); + if (!devlink) + return ERR_PTR(-ENODEV); + goto found; + } + devlinks_xa_for_each_registered_get(net, index, devlink) { - if (strcmp(devlink->dev->bus->name, busname) == 0 && - strcmp(dev_name(devlink->dev), devname) == 0) { - devl_dev_lock(devlink, dev_lock); - if (devl_is_registered(devlink)) - return devlink; - devl_dev_unlock(devlink, dev_lock); - } + if (strcmp(devlink_bus_name(devlink), busname) == 0 && + strcmp(devlink_dev_name(devlink), devname) == 0) + goto found; devlink_put(devlink); } return ERR_PTR(-ENODEV); + +found: + devl_dev_lock(devlink, dev_lock); + if (devl_is_registered(devlink)) + return devlink; + devl_dev_unlock(devlink, dev_lock); + devlink_put(devlink); + return ERR_PTR(-ENODEV); } static int __devlink_nl_pre_doit(struct sk_buff *skb, struct genl_info *info, @@ -337,6 +370,8 @@ static int devlink_nl_inst_iter_dumpit(struct sk_buff *msg, /* restart sub-object walk for the next instance */ state->idx = 0; + state->port_ctx.index = 0; + state->port_ctx.index_valid = false; } if (err != -EMSGSIZE) @@ -352,7 +387,8 @@ int devlink_nl_dumpit(struct sk_buff *msg, struct netlink_callback *cb, int flags = NLM_F_MULTI; if (attrs && - (attrs[DEVLINK_ATTR_BUS_NAME] || attrs[DEVLINK_ATTR_DEV_NAME])) + (attrs[DEVLINK_ATTR_BUS_NAME] || attrs[DEVLINK_ATTR_DEV_NAME] || + attrs[DEVLINK_ATTR_INDEX])) return devlink_nl_inst_single_dumpit(msg, cb, flags, dump_one, attrs); else diff --git a/net/devlink/netlink_gen.c b/net/devlink/netlink_gen.c index f4c61c2b4f22..81899786fd98 100644 --- a/net/devlink/netlink_gen.c +++ b/net/devlink/netlink_gen.c @@ -11,6 +11,11 @@ #include <uapi/linux/devlink.h> +/* Integer value ranges */ +static const struct netlink_range_validation devlink_attr_index_range = { + .max = U32_MAX, +}; + /* Sparse enums validation callbacks */ static int devlink_attr_param_type_validate(const struct nlattr *attr, @@ -56,37 +61,42 @@ const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_I }; /* DEVLINK_CMD_GET - do */ -static const struct nla_policy devlink_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_PORT_GET - do */ -static const struct nla_policy devlink_port_get_do_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { +static const struct nla_policy devlink_port_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_PORT_GET - dump */ -static const struct nla_policy devlink_port_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_port_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_PORT_SET - do */ -static const struct nla_policy devlink_port_set_nl_policy[DEVLINK_ATTR_PORT_FUNCTION + 1] = { +static const struct nla_policy devlink_port_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_PORT_TYPE] = NLA_POLICY_MAX(NLA_U16, 3), [DEVLINK_ATTR_PORT_FUNCTION] = NLA_POLICY_NESTED(devlink_dl_port_function_nl_policy), }; /* DEVLINK_CMD_PORT_NEW - do */ -static const struct nla_policy devlink_port_new_nl_policy[DEVLINK_ATTR_PORT_PCI_SF_NUMBER + 1] = { +static const struct nla_policy devlink_port_new_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_PORT_FLAVOUR] = NLA_POLICY_MAX(NLA_U16, 7), [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16, }, @@ -95,58 +105,66 @@ static const struct nla_policy devlink_port_new_nl_policy[DEVLINK_ATTR_PORT_PCI_ }; /* DEVLINK_CMD_PORT_DEL - do */ -static const struct nla_policy devlink_port_del_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { +static const struct nla_policy devlink_port_del_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_PORT_SPLIT - do */ -static const struct nla_policy devlink_port_split_nl_policy[DEVLINK_ATTR_PORT_SPLIT_COUNT + 1] = { +static const struct nla_policy devlink_port_split_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_PORT_UNSPLIT - do */ -static const struct nla_policy devlink_port_unsplit_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { +static const struct nla_policy devlink_port_unsplit_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_SB_GET - do */ -static const struct nla_policy devlink_sb_get_do_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = { +static const struct nla_policy devlink_sb_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_SB_GET - dump */ -static const struct nla_policy devlink_sb_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_sb_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_SB_POOL_GET - do */ -static const struct nla_policy devlink_sb_pool_get_do_nl_policy[DEVLINK_ATTR_SB_POOL_INDEX + 1] = { +static const struct nla_policy devlink_sb_pool_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, }, }; /* DEVLINK_CMD_SB_POOL_GET - dump */ -static const struct nla_policy devlink_sb_pool_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_sb_pool_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_SB_POOL_SET - do */ -static const struct nla_policy devlink_sb_pool_set_nl_policy[DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE + 1] = { +static const struct nla_policy devlink_sb_pool_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, }, [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = NLA_POLICY_MAX(NLA_U8, 1), @@ -154,24 +172,27 @@ static const struct nla_policy devlink_sb_pool_set_nl_policy[DEVLINK_ATTR_SB_POO }; /* DEVLINK_CMD_SB_PORT_POOL_GET - do */ -static const struct nla_policy devlink_sb_port_pool_get_do_nl_policy[DEVLINK_ATTR_SB_POOL_INDEX + 1] = { +static const struct nla_policy devlink_sb_port_pool_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, }, }; /* DEVLINK_CMD_SB_PORT_POOL_GET - dump */ -static const struct nla_policy devlink_sb_port_pool_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_sb_port_pool_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_SB_PORT_POOL_SET - do */ -static const struct nla_policy devlink_sb_port_pool_set_nl_policy[DEVLINK_ATTR_SB_THRESHOLD + 1] = { +static const struct nla_policy devlink_sb_port_pool_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, }, @@ -179,9 +200,10 @@ static const struct nla_policy devlink_sb_port_pool_set_nl_policy[DEVLINK_ATTR_S }; /* DEVLINK_CMD_SB_TC_POOL_BIND_GET - do */ -static const struct nla_policy devlink_sb_tc_pool_bind_get_do_nl_policy[DEVLINK_ATTR_SB_TC_INDEX + 1] = { +static const struct nla_policy devlink_sb_tc_pool_bind_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_POOL_TYPE] = NLA_POLICY_MAX(NLA_U8, 1), @@ -189,15 +211,17 @@ static const struct nla_policy devlink_sb_tc_pool_bind_get_do_nl_policy[DEVLINK_ }; /* DEVLINK_CMD_SB_TC_POOL_BIND_GET - dump */ -static const struct nla_policy devlink_sb_tc_pool_bind_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_sb_tc_pool_bind_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_SB_TC_POOL_BIND_SET - do */ -static const struct nla_policy devlink_sb_tc_pool_bind_set_nl_policy[DEVLINK_ATTR_SB_TC_INDEX + 1] = { +static const struct nla_policy devlink_sb_tc_pool_bind_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16, }, @@ -207,80 +231,100 @@ static const struct nla_policy devlink_sb_tc_pool_bind_set_nl_policy[DEVLINK_ATT }; /* DEVLINK_CMD_SB_OCC_SNAPSHOT - do */ -static const struct nla_policy devlink_sb_occ_snapshot_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = { +static const struct nla_policy devlink_sb_occ_snapshot_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_SB_OCC_MAX_CLEAR - do */ -static const struct nla_policy devlink_sb_occ_max_clear_nl_policy[DEVLINK_ATTR_SB_INDEX + 1] = { +static const struct nla_policy devlink_sb_occ_max_clear_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_ESWITCH_GET - do */ -static const struct nla_policy devlink_eswitch_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_eswitch_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_ESWITCH_SET - do */ -static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_ESWITCH_ENCAP_MODE + 1] = { +static const struct nla_policy devlink_eswitch_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_MAX(NLA_U16, 2), [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = NLA_POLICY_MAX(NLA_U8, 3), [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = NLA_POLICY_MAX(NLA_U8, 1), }; /* DEVLINK_CMD_DPIPE_TABLE_GET - do */ -static const struct nla_policy devlink_dpipe_table_get_nl_policy[DEVLINK_ATTR_DPIPE_TABLE_NAME + 1] = { +static const struct nla_policy devlink_dpipe_table_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_DPIPE_ENTRIES_GET - do */ -static const struct nla_policy devlink_dpipe_entries_get_nl_policy[DEVLINK_ATTR_DPIPE_TABLE_NAME + 1] = { +static const struct nla_policy devlink_dpipe_entries_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_DPIPE_HEADERS_GET - do */ -static const struct nla_policy devlink_dpipe_headers_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_dpipe_headers_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_DPIPE_TABLE_COUNTERS_SET - do */ -static const struct nla_policy devlink_dpipe_table_counters_set_nl_policy[DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED + 1] = { +static const struct nla_policy devlink_dpipe_table_counters_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8, }, }; /* DEVLINK_CMD_RESOURCE_SET - do */ -static const struct nla_policy devlink_resource_set_nl_policy[DEVLINK_ATTR_RESOURCE_SIZE + 1] = { +static const struct nla_policy devlink_resource_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64, }, [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64, }, }; /* DEVLINK_CMD_RESOURCE_DUMP - do */ -static const struct nla_policy devlink_resource_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_resource_dump_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { + [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), + [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, +}; + +/* DEVLINK_CMD_RESOURCE_DUMP - dump */ +static const struct nla_policy devlink_resource_dump_dump_nl_policy[DEVLINK_ATTR_RESOURCE_SCOPE_MASK + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), + [DEVLINK_ATTR_RESOURCE_SCOPE_MASK] = NLA_POLICY_MASK(NLA_U32, 0x3), }; /* DEVLINK_CMD_RELOAD - do */ -static const struct nla_policy devlink_reload_nl_policy[DEVLINK_ATTR_RELOAD_LIMITS + 1] = { +static const struct nla_policy devlink_reload_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, 1, 2), [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(6), [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32, }, @@ -289,22 +333,25 @@ static const struct nla_policy devlink_reload_nl_policy[DEVLINK_ATTR_RELOAD_LIMI }; /* DEVLINK_CMD_PARAM_GET - do */ -static const struct nla_policy devlink_param_get_do_nl_policy[DEVLINK_ATTR_PARAM_NAME + 1] = { +static const struct nla_policy devlink_param_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_PARAM_GET - dump */ -static const struct nla_policy devlink_param_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_param_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_PARAM_SET - do */ -static const struct nla_policy devlink_param_set_nl_policy[DEVLINK_ATTR_PARAM_RESET_DEFAULT + 1] = { +static const struct nla_policy devlink_param_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_PARAM_TYPE] = NLA_POLICY_VALIDATE_FN(NLA_U8, &devlink_attr_param_type_validate), [DEVLINK_ATTR_PARAM_VALUE_CMODE] = NLA_POLICY_MAX(NLA_U8, 2), @@ -312,41 +359,46 @@ static const struct nla_policy devlink_param_set_nl_policy[DEVLINK_ATTR_PARAM_RE }; /* DEVLINK_CMD_REGION_GET - do */ -static const struct nla_policy devlink_region_get_do_nl_policy[DEVLINK_ATTR_REGION_NAME + 1] = { +static const struct nla_policy devlink_region_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_REGION_GET - dump */ -static const struct nla_policy devlink_region_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_region_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_REGION_NEW - do */ -static const struct nla_policy devlink_region_new_nl_policy[DEVLINK_ATTR_REGION_SNAPSHOT_ID + 1] = { +static const struct nla_policy devlink_region_new_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_REGION_DEL - do */ -static const struct nla_policy devlink_region_del_nl_policy[DEVLINK_ATTR_REGION_SNAPSHOT_ID + 1] = { +static const struct nla_policy devlink_region_del_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_REGION_READ - dump */ -static const struct nla_policy devlink_region_read_nl_policy[DEVLINK_ATTR_REGION_DIRECT + 1] = { +static const struct nla_policy devlink_region_read_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32, }, @@ -356,44 +408,50 @@ static const struct nla_policy devlink_region_read_nl_policy[DEVLINK_ATTR_REGION }; /* DEVLINK_CMD_PORT_PARAM_GET - do */ -static const struct nla_policy devlink_port_param_get_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { +static const struct nla_policy devlink_port_param_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_PORT_PARAM_SET - do */ -static const struct nla_policy devlink_port_param_set_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { +static const struct nla_policy devlink_port_param_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_INFO_GET - do */ -static const struct nla_policy devlink_info_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_info_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_HEALTH_REPORTER_GET - do */ -static const struct nla_policy devlink_health_reporter_get_do_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { +static const struct nla_policy devlink_health_reporter_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_HEALTH_REPORTER_GET - dump */ -static const struct nla_policy devlink_health_reporter_get_dump_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { +static const struct nla_policy devlink_health_reporter_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_HEALTH_REPORTER_SET - do */ -static const struct nla_policy devlink_health_reporter_set_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD + 1] = { +static const struct nla_policy devlink_health_reporter_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64, }, @@ -403,137 +461,155 @@ static const struct nla_policy devlink_health_reporter_set_nl_policy[DEVLINK_ATT }; /* DEVLINK_CMD_HEALTH_REPORTER_RECOVER - do */ -static const struct nla_policy devlink_health_reporter_recover_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { +static const struct nla_policy devlink_health_reporter_recover_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE - do */ -static const struct nla_policy devlink_health_reporter_diagnose_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { +static const struct nla_policy devlink_health_reporter_diagnose_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET - dump */ -static const struct nla_policy devlink_health_reporter_dump_get_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { +static const struct nla_policy devlink_health_reporter_dump_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_HEALTH_REPORTER_DUMP_CLEAR - do */ -static const struct nla_policy devlink_health_reporter_dump_clear_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { +static const struct nla_policy devlink_health_reporter_dump_clear_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_FLASH_UPDATE - do */ -static const struct nla_policy devlink_flash_update_nl_policy[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK + 1] = { +static const struct nla_policy devlink_flash_update_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] = NLA_POLICY_BITFIELD32(3), }; /* DEVLINK_CMD_TRAP_GET - do */ -static const struct nla_policy devlink_trap_get_do_nl_policy[DEVLINK_ATTR_TRAP_NAME + 1] = { +static const struct nla_policy devlink_trap_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_TRAP_GET - dump */ -static const struct nla_policy devlink_trap_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_trap_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_TRAP_SET - do */ -static const struct nla_policy devlink_trap_set_nl_policy[DEVLINK_ATTR_TRAP_ACTION + 1] = { +static const struct nla_policy devlink_trap_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_TRAP_ACTION] = NLA_POLICY_MAX(NLA_U8, 2), }; /* DEVLINK_CMD_TRAP_GROUP_GET - do */ -static const struct nla_policy devlink_trap_group_get_do_nl_policy[DEVLINK_ATTR_TRAP_GROUP_NAME + 1] = { +static const struct nla_policy devlink_trap_group_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_TRAP_GROUP_GET - dump */ -static const struct nla_policy devlink_trap_group_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_trap_group_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_TRAP_GROUP_SET - do */ -static const struct nla_policy devlink_trap_group_set_nl_policy[DEVLINK_ATTR_TRAP_POLICER_ID + 1] = { +static const struct nla_policy devlink_trap_group_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_TRAP_ACTION] = NLA_POLICY_MAX(NLA_U8, 2), [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_TRAP_POLICER_GET - do */ -static const struct nla_policy devlink_trap_policer_get_do_nl_policy[DEVLINK_ATTR_TRAP_POLICER_ID + 1] = { +static const struct nla_policy devlink_trap_policer_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_TRAP_POLICER_GET - dump */ -static const struct nla_policy devlink_trap_policer_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_trap_policer_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_TRAP_POLICER_SET - do */ -static const struct nla_policy devlink_trap_policer_set_nl_policy[DEVLINK_ATTR_TRAP_POLICER_BURST + 1] = { +static const struct nla_policy devlink_trap_policer_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32, }, [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64, }, [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64, }, }; /* DEVLINK_CMD_HEALTH_REPORTER_TEST - do */ -static const struct nla_policy devlink_health_reporter_test_nl_policy[DEVLINK_ATTR_HEALTH_REPORTER_NAME + 1] = { +static const struct nla_policy devlink_health_reporter_test_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_RATE_GET - do */ -static const struct nla_policy devlink_rate_get_do_nl_policy[DEVLINK_ATTR_RATE_NODE_NAME + 1] = { +static const struct nla_policy devlink_rate_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_RATE_GET - dump */ -static const struct nla_policy devlink_rate_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_rate_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_RATE_SET - do */ -static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TC_BWS + 1] = { +static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64, }, [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64, }, @@ -544,9 +620,10 @@ static const struct nla_policy devlink_rate_set_nl_policy[DEVLINK_ATTR_RATE_TC_B }; /* DEVLINK_CMD_RATE_NEW - do */ -static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TC_BWS + 1] = { +static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64, }, [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64, }, @@ -557,55 +634,62 @@ static const struct nla_policy devlink_rate_new_nl_policy[DEVLINK_ATTR_RATE_TC_B }; /* DEVLINK_CMD_RATE_DEL - do */ -static const struct nla_policy devlink_rate_del_nl_policy[DEVLINK_ATTR_RATE_NODE_NAME + 1] = { +static const struct nla_policy devlink_rate_del_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_LINECARD_GET - do */ -static const struct nla_policy devlink_linecard_get_do_nl_policy[DEVLINK_ATTR_LINECARD_INDEX + 1] = { +static const struct nla_policy devlink_linecard_get_do_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32, }, }; /* DEVLINK_CMD_LINECARD_GET - dump */ -static const struct nla_policy devlink_linecard_get_dump_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_linecard_get_dump_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_LINECARD_SET - do */ -static const struct nla_policy devlink_linecard_set_nl_policy[DEVLINK_ATTR_LINECARD_TYPE + 1] = { +static const struct nla_policy devlink_linecard_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32, }, [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING, }, }; /* DEVLINK_CMD_SELFTESTS_GET - do */ -static const struct nla_policy devlink_selftests_get_nl_policy[DEVLINK_ATTR_DEV_NAME + 1] = { +static const struct nla_policy devlink_selftests_get_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), }; /* DEVLINK_CMD_SELFTESTS_RUN - do */ -static const struct nla_policy devlink_selftests_run_nl_policy[DEVLINK_ATTR_SELFTESTS + 1] = { +static const struct nla_policy devlink_selftests_run_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_SELFTESTS] = NLA_POLICY_NESTED(devlink_dl_selftest_id_nl_policy), }; /* DEVLINK_CMD_NOTIFY_FILTER_SET - do */ -static const struct nla_policy devlink_notify_filter_set_nl_policy[DEVLINK_ATTR_PORT_INDEX + 1] = { +static const struct nla_policy devlink_notify_filter_set_nl_policy[DEVLINK_ATTR_INDEX + 1] = { [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING, }, [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING, }, + [DEVLINK_ATTR_INDEX] = NLA_POLICY_FULL_RANGE(NLA_UINT, &devlink_attr_index_range), [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32, }, }; /* Ops table for devlink */ -const struct genl_split_ops devlink_nl_ops[74] = { +const struct genl_split_ops devlink_nl_ops[75] = { { .cmd = DEVLINK_CMD_GET, .validate = GENL_DONT_VALIDATE_STRICT, @@ -613,7 +697,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_get_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { @@ -629,14 +713,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_PORT_GET, .dumpit = devlink_nl_port_get_dumpit, .policy = devlink_port_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -646,7 +730,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_set_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_FUNCTION, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -656,7 +740,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_new_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_new_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_PCI_SF_NUMBER, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -666,7 +750,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_del_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_del_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -676,7 +760,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_split_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_split_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_SPLIT_COUNT, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -686,7 +770,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_unsplit_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_unsplit_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -696,14 +780,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_SB_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_SB_GET, .dumpit = devlink_nl_sb_get_dumpit, .policy = devlink_sb_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -713,14 +797,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_pool_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_pool_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_SB_POOL_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_SB_POOL_GET, .dumpit = devlink_nl_sb_pool_get_dumpit, .policy = devlink_sb_pool_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -730,7 +814,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_pool_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_pool_set_nl_policy, - .maxattr = DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -740,14 +824,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_port_pool_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_port_pool_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_SB_POOL_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_SB_PORT_POOL_GET, .dumpit = devlink_nl_sb_port_pool_get_dumpit, .policy = devlink_sb_port_pool_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -757,7 +841,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_port_pool_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_port_pool_set_nl_policy, - .maxattr = DEVLINK_ATTR_SB_THRESHOLD, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -767,14 +851,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_tc_pool_bind_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_tc_pool_bind_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_SB_TC_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET, .dumpit = devlink_nl_sb_tc_pool_bind_get_dumpit, .policy = devlink_sb_tc_pool_bind_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -784,7 +868,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_tc_pool_bind_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_tc_pool_bind_set_nl_policy, - .maxattr = DEVLINK_ATTR_SB_TC_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -794,7 +878,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_occ_snapshot_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_occ_snapshot_nl_policy, - .maxattr = DEVLINK_ATTR_SB_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -804,7 +888,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_sb_occ_max_clear_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_sb_occ_max_clear_nl_policy, - .maxattr = DEVLINK_ATTR_SB_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -814,7 +898,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_eswitch_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_eswitch_get_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -824,7 +908,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_eswitch_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_eswitch_set_nl_policy, - .maxattr = DEVLINK_ATTR_ESWITCH_ENCAP_MODE, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -834,7 +918,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_dpipe_table_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_dpipe_table_get_nl_policy, - .maxattr = DEVLINK_ATTR_DPIPE_TABLE_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { @@ -844,7 +928,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_dpipe_entries_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_dpipe_entries_get_nl_policy, - .maxattr = DEVLINK_ATTR_DPIPE_TABLE_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { @@ -854,7 +938,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_dpipe_headers_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_dpipe_headers_get_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { @@ -864,7 +948,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_dpipe_table_counters_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_dpipe_table_counters_set_nl_policy, - .maxattr = DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -874,27 +958,34 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_resource_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_resource_set_nl_policy, - .maxattr = DEVLINK_ATTR_RESOURCE_SIZE, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_RESOURCE_DUMP, .validate = GENL_DONT_VALIDATE_STRICT, - .pre_doit = devlink_nl_pre_doit, + .pre_doit = devlink_nl_pre_doit_port_optional, .doit = devlink_nl_resource_dump_doit, .post_doit = devlink_nl_post_doit, - .policy = devlink_resource_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .policy = devlink_resource_dump_do_nl_policy, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { + .cmd = DEVLINK_CMD_RESOURCE_DUMP, + .dumpit = devlink_nl_resource_dump_dumpit, + .policy = devlink_resource_dump_dump_nl_policy, + .maxattr = DEVLINK_ATTR_RESOURCE_SCOPE_MASK, + .flags = GENL_CMD_CAP_DUMP, + }, + { .cmd = DEVLINK_CMD_RELOAD, .validate = GENL_DONT_VALIDATE_STRICT, .pre_doit = devlink_nl_pre_doit_dev_lock, .doit = devlink_nl_reload_doit, .post_doit = devlink_nl_post_doit_dev_lock, .policy = devlink_reload_nl_policy, - .maxattr = DEVLINK_ATTR_RELOAD_LIMITS, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -904,14 +995,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_param_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_param_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_PARAM_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_PARAM_GET, .dumpit = devlink_nl_param_get_dumpit, .policy = devlink_param_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -921,7 +1012,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_param_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_param_set_nl_policy, - .maxattr = DEVLINK_ATTR_PARAM_RESET_DEFAULT, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -931,14 +1022,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_region_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_region_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_REGION_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_REGION_GET, .dumpit = devlink_nl_region_get_dumpit, .policy = devlink_region_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -948,7 +1039,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_region_new_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_region_new_nl_policy, - .maxattr = DEVLINK_ATTR_REGION_SNAPSHOT_ID, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -958,7 +1049,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_region_del_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_region_del_nl_policy, - .maxattr = DEVLINK_ATTR_REGION_SNAPSHOT_ID, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -966,7 +1057,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .validate = GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = devlink_nl_region_read_dumpit, .policy = devlink_region_read_nl_policy, - .maxattr = DEVLINK_ATTR_REGION_DIRECT, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP, }, { @@ -976,7 +1067,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_param_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_param_get_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { @@ -992,7 +1083,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_port_param_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_port_param_set_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1002,7 +1093,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_info_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_info_get_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { @@ -1018,14 +1109,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_health_reporter_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_health_reporter_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_HEALTH_REPORTER_GET, .dumpit = devlink_nl_health_reporter_get_dumpit, .policy = devlink_health_reporter_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -1035,7 +1126,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_health_reporter_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_health_reporter_set_nl_policy, - .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_BURST_PERIOD, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1045,7 +1136,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_health_reporter_recover_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_health_reporter_recover_nl_policy, - .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1055,7 +1146,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_health_reporter_diagnose_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_health_reporter_diagnose_nl_policy, - .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1063,7 +1154,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .validate = GENL_DONT_VALIDATE_DUMP_STRICT, .dumpit = devlink_nl_health_reporter_dump_get_dumpit, .policy = devlink_health_reporter_dump_get_nl_policy, - .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DUMP, }, { @@ -1073,7 +1164,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_health_reporter_dump_clear_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_health_reporter_dump_clear_nl_policy, - .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1083,7 +1174,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_flash_update_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_flash_update_nl_policy, - .maxattr = DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1093,14 +1184,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_trap_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_trap_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_TRAP_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_TRAP_GET, .dumpit = devlink_nl_trap_get_dumpit, .policy = devlink_trap_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -1110,7 +1201,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_trap_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_trap_set_nl_policy, - .maxattr = DEVLINK_ATTR_TRAP_ACTION, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1120,14 +1211,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_trap_group_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_trap_group_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_TRAP_GROUP_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_TRAP_GROUP_GET, .dumpit = devlink_nl_trap_group_get_dumpit, .policy = devlink_trap_group_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -1137,7 +1228,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_trap_group_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_trap_group_set_nl_policy, - .maxattr = DEVLINK_ATTR_TRAP_POLICER_ID, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1147,14 +1238,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_trap_policer_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_trap_policer_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_TRAP_POLICER_ID, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_TRAP_POLICER_GET, .dumpit = devlink_nl_trap_policer_get_dumpit, .policy = devlink_trap_policer_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -1164,7 +1255,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_trap_policer_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_trap_policer_set_nl_policy, - .maxattr = DEVLINK_ATTR_TRAP_POLICER_BURST, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1174,7 +1265,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_health_reporter_test_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_health_reporter_test_nl_policy, - .maxattr = DEVLINK_ATTR_HEALTH_REPORTER_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1184,14 +1275,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_rate_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_rate_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_RATE_NODE_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_RATE_GET, .dumpit = devlink_nl_rate_get_dumpit, .policy = devlink_rate_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -1201,7 +1292,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_rate_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_rate_set_nl_policy, - .maxattr = DEVLINK_ATTR_RATE_TC_BWS, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1211,7 +1302,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_rate_new_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_rate_new_nl_policy, - .maxattr = DEVLINK_ATTR_RATE_TC_BWS, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1221,7 +1312,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_rate_del_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_rate_del_nl_policy, - .maxattr = DEVLINK_ATTR_RATE_NODE_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1231,14 +1322,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_linecard_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_linecard_get_do_nl_policy, - .maxattr = DEVLINK_ATTR_LINECARD_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_LINECARD_GET, .dumpit = devlink_nl_linecard_get_dumpit, .policy = devlink_linecard_get_dump_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DUMP, }, { @@ -1248,7 +1339,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_linecard_set_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_linecard_set_nl_policy, - .maxattr = DEVLINK_ATTR_LINECARD_TYPE, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { @@ -1258,7 +1349,7 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_selftests_get_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_selftests_get_nl_policy, - .maxattr = DEVLINK_ATTR_DEV_NAME, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, { @@ -1274,14 +1365,14 @@ const struct genl_split_ops devlink_nl_ops[74] = { .doit = devlink_nl_selftests_run_doit, .post_doit = devlink_nl_post_doit, .policy = devlink_selftests_run_nl_policy, - .maxattr = DEVLINK_ATTR_SELFTESTS, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_ADMIN_PERM | GENL_CMD_CAP_DO, }, { .cmd = DEVLINK_CMD_NOTIFY_FILTER_SET, .doit = devlink_nl_notify_filter_set_doit, .policy = devlink_notify_filter_set_nl_policy, - .maxattr = DEVLINK_ATTR_PORT_INDEX, + .maxattr = DEVLINK_ATTR_INDEX, .flags = GENL_CMD_CAP_DO, }, }; diff --git a/net/devlink/netlink_gen.h b/net/devlink/netlink_gen.h index 2817d53a0eba..20034b0929a8 100644 --- a/net/devlink/netlink_gen.h +++ b/net/devlink/netlink_gen.h @@ -18,17 +18,17 @@ extern const struct nla_policy devlink_dl_rate_tc_bws_nl_policy[DEVLINK_RATE_TC_ extern const struct nla_policy devlink_dl_selftest_id_nl_policy[DEVLINK_ATTR_SELFTEST_ID_FLASH + 1]; /* Ops table for devlink */ -extern const struct genl_split_ops devlink_nl_ops[74]; +extern const struct genl_split_ops devlink_nl_ops[75]; int devlink_nl_pre_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); int devlink_nl_pre_doit_port(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); -int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops, - struct sk_buff *skb, struct genl_info *info); int devlink_nl_pre_doit_port_optional(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); +int devlink_nl_pre_doit_dev_lock(const struct genl_split_ops *ops, + struct sk_buff *skb, struct genl_info *info); void devlink_nl_post_doit(const struct genl_split_ops *ops, struct sk_buff *skb, struct genl_info *info); @@ -80,6 +80,8 @@ int devlink_nl_dpipe_table_counters_set_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_resource_set_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_resource_dump_doit(struct sk_buff *skb, struct genl_info *info); +int devlink_nl_resource_dump_dumpit(struct sk_buff *skb, + struct netlink_callback *cb); int devlink_nl_reload_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_param_get_doit(struct sk_buff *skb, struct genl_info *info); int devlink_nl_param_get_dumpit(struct sk_buff *skb, diff --git a/net/devlink/port.c b/net/devlink/port.c index 93d8a25bb920..485029d43428 100644 --- a/net/devlink/port.c +++ b/net/devlink/port.c @@ -220,8 +220,9 @@ size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port) { struct devlink *devlink = devlink_port->devlink; - return nla_total_size(strlen(devlink->dev->bus->name) + 1) /* DEVLINK_ATTR_BUS_NAME */ - + nla_total_size(strlen(dev_name(devlink->dev)) + 1) /* DEVLINK_ATTR_DEV_NAME */ + return nla_total_size(strlen(devlink_bus_name(devlink)) + 1) /* DEVLINK_ATTR_BUS_NAME */ + + nla_total_size(strlen(devlink_dev_name(devlink)) + 1) /* DEVLINK_ATTR_DEV_NAME */ + + nla_total_size(8) /* DEVLINK_ATTR_INDEX */ + nla_total_size(4); /* DEVLINK_ATTR_PORT_INDEX */ } @@ -975,7 +976,7 @@ static void devlink_port_type_warn(struct work_struct *work) struct devlink_port *port = container_of(to_delayed_work(work), struct devlink_port, type_warn_dw); - dev_warn(port->devlink->dev, "Type was not set for devlink port."); + devl_warn(port->devlink, "Type was not set for devlink port."); } static bool devlink_port_type_should_warn(struct devlink_port *devlink_port) @@ -1024,6 +1025,7 @@ void devlink_port_init(struct devlink *devlink, return; devlink_port->devlink = devlink; INIT_LIST_HEAD(&devlink_port->region_list); + INIT_LIST_HEAD(&devlink_port->resource_list); devlink_port->initialized = true; } EXPORT_SYMBOL_GPL(devlink_port_init); @@ -1041,6 +1043,7 @@ EXPORT_SYMBOL_GPL(devlink_port_init); void devlink_port_fini(struct devlink_port *devlink_port) { WARN_ON(!list_empty(&devlink_port->region_list)); + WARN_ON(!list_empty(&devlink_port->resource_list)); } EXPORT_SYMBOL_GPL(devlink_port_fini); @@ -1241,9 +1244,9 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, */ void devlink_port_type_eth_set(struct devlink_port *devlink_port) { - dev_warn(devlink_port->devlink->dev, - "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n", - devlink_port->index); + devl_warn(devlink_port->devlink, + "devlink port type for port %d set to Ethernet without a software interface reference, device type not supported by the kernel?\n", + devlink_port->index); __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_ETH, NULL); } EXPORT_SYMBOL_GPL(devlink_port_type_eth_set); @@ -1272,9 +1275,9 @@ EXPORT_SYMBOL_GPL(devlink_port_type_ib_set); void devlink_port_type_clear(struct devlink_port *devlink_port) { if (devlink_port->type == DEVLINK_PORT_TYPE_ETH) - dev_warn(devlink_port->devlink->dev, - "devlink port type for port %d cleared without a software interface reference, device type not supported by the kernel?\n", - devlink_port->index); + devl_warn(devlink_port->devlink, + "devlink port type for port %d cleared without a software interface reference, device type not supported by the kernel?\n", + devlink_port->index); __devlink_port_type_set(devlink_port, DEVLINK_PORT_TYPE_NOTSET, NULL); } EXPORT_SYMBOL_GPL(devlink_port_type_clear); diff --git a/net/devlink/resource.c b/net/devlink/resource.c index 351835a710b1..3d2f42bc2fb5 100644 --- a/net/devlink/resource.c +++ b/net/devlink/resource.c @@ -36,15 +36,16 @@ struct devlink_resource { }; static struct devlink_resource * -devlink_resource_find(struct devlink *devlink, - struct devlink_resource *resource, u64 resource_id) +__devlink_resource_find(struct list_head *resource_list_head, + struct devlink_resource *resource, + u64 resource_id) { struct list_head *resource_list; if (resource) resource_list = &resource->resource_list; else - resource_list = &devlink->resource_list; + resource_list = resource_list_head; list_for_each_entry(resource, resource_list, list) { struct devlink_resource *child_resource; @@ -52,14 +53,23 @@ devlink_resource_find(struct devlink *devlink, if (resource->id == resource_id) return resource; - child_resource = devlink_resource_find(devlink, resource, - resource_id); + child_resource = __devlink_resource_find(resource_list_head, + resource, + resource_id); if (child_resource) return child_resource; } return NULL; } +static struct devlink_resource * +devlink_resource_find(struct devlink *devlink, + struct devlink_resource *resource, u64 resource_id) +{ + return __devlink_resource_find(&devlink->resource_list, + resource, resource_id); +} + static void devlink_resource_validate_children(struct devlink_resource *resource) { @@ -213,11 +223,38 @@ nla_put_failure: return -EMSGSIZE; } +static int devlink_resource_list_fill(struct sk_buff *skb, + struct devlink *devlink, + struct list_head *resource_list_head, + int *idx) +{ + struct devlink_resource *resource; + int i = 0; + int err; + + list_for_each_entry(resource, resource_list_head, list) { + if (i < *idx) { + i++; + continue; + } + err = devlink_resource_put(devlink, skb, resource); + if (err) { + *idx = i; + return err; + } + i++; + } + *idx = 0; + return 0; +} + static int devlink_resource_fill(struct genl_info *info, enum devlink_command cmd, int flags) { + struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; struct devlink_resource *resource; + struct list_head *resource_list; struct nlattr *resources_attr; struct sk_buff *skb = NULL; struct nlmsghdr *nlh; @@ -226,7 +263,9 @@ static int devlink_resource_fill(struct genl_info *info, int i; int err; - resource = list_first_entry(&devlink->resource_list, + resource_list = devlink_port ? + &devlink_port->resource_list : &devlink->resource_list; + resource = list_first_entry(resource_list, struct devlink_resource, list); start_again: err = devlink_nl_msg_reply_and_new(&skb, info); @@ -242,6 +281,9 @@ start_again: if (devlink_nl_put_handle(skb, devlink)) goto nla_put_failure; + if (devlink_port && + nla_put_u32(skb, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) + goto nla_put_failure; resources_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE_LIST); @@ -250,7 +292,7 @@ start_again: incomplete = false; i = 0; - list_for_each_entry_from(resource, &devlink->resource_list, list) { + list_for_each_entry_from(resource, resource_list, list) { err = devlink_resource_put(devlink, skb, resource); if (err) { if (!i) @@ -284,14 +326,133 @@ err_resource_put: int devlink_nl_resource_dump_doit(struct sk_buff *skb, struct genl_info *info) { + struct devlink_port *devlink_port = info->user_ptr[1]; struct devlink *devlink = info->user_ptr[0]; + struct list_head *resource_list; + + if (info->attrs[DEVLINK_ATTR_PORT_INDEX] && !devlink_port) + return -ENODEV; - if (list_empty(&devlink->resource_list)) + resource_list = devlink_port ? + &devlink_port->resource_list : &devlink->resource_list; + if (list_empty(resource_list)) return -EOPNOTSUPP; return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0); } +static int +devlink_resource_dump_fill_one(struct sk_buff *skb, struct devlink *devlink, + struct devlink_port *devlink_port, + struct netlink_callback *cb, int flags, int *idx) +{ + struct list_head *resource_list; + struct nlattr *resources_attr; + int start_idx = *idx; + void *hdr; + int err; + + resource_list = devlink_port ? + &devlink_port->resource_list : &devlink->resource_list; + + if (list_empty(resource_list)) + return 0; + + err = -EMSGSIZE; + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, + &devlink_nl_family, flags, DEVLINK_CMD_RESOURCE_DUMP); + if (!hdr) + return err; + + if (devlink_nl_put_handle(skb, devlink)) + goto nla_put_failure; + if (devlink_port && + nla_put_u32(skb, DEVLINK_ATTR_PORT_INDEX, devlink_port->index)) + goto nla_put_failure; + + resources_attr = nla_nest_start_noflag(skb, DEVLINK_ATTR_RESOURCE_LIST); + if (!resources_attr) + goto nla_put_failure; + + err = devlink_resource_list_fill(skb, devlink, resource_list, idx); + if (err) { + if (*idx == start_idx) + goto resource_list_cancel; + nla_nest_end(skb, resources_attr); + genlmsg_end(skb, hdr); + return err; + } + nla_nest_end(skb, resources_attr); + genlmsg_end(skb, hdr); + return 0; + +resource_list_cancel: + nla_nest_cancel(skb, resources_attr); +nla_put_failure: + genlmsg_cancel(skb, hdr); + return err; +} + +static int +devlink_nl_resource_dump_one(struct sk_buff *skb, struct devlink *devlink, + struct netlink_callback *cb, int flags) +{ + struct devlink_nl_dump_state *state = devlink_dump_state(cb); + const struct genl_info *info = genl_info_dump(cb); + struct devlink_port *devlink_port; + struct nlattr *scope_attr = NULL; + unsigned long port_idx; + u32 scope = 0; + int err; + + if (info->attrs && info->attrs[DEVLINK_ATTR_RESOURCE_SCOPE_MASK]) { + scope_attr = info->attrs[DEVLINK_ATTR_RESOURCE_SCOPE_MASK]; + scope = nla_get_u32(scope_attr); + if (!scope) { + NL_SET_ERR_MSG_ATTR(info->extack, scope_attr, + "empty resource scope selection"); + return -EINVAL; + } + } + + if (!state->port_ctx.index_valid && + (!scope || (scope & DEVLINK_RESOURCE_SCOPE_DEV))) { + err = devlink_resource_dump_fill_one(skb, devlink, NULL, + cb, flags, &state->idx); + if (err) + return err; + state->idx = 0; + } + + if (scope && !(scope & DEVLINK_RESOURCE_SCOPE_PORT)) + goto out; + /* Check in case port was removed between dump callbacks. */ + if (state->port_ctx.index_valid && + !xa_load(&devlink->ports, state->port_ctx.index)) + state->idx = 0; + state->port_ctx.index_valid = true; + xa_for_each_start(&devlink->ports, port_idx, devlink_port, + state->port_ctx.index) { + err = devlink_resource_dump_fill_one(skb, devlink, devlink_port, + cb, flags, &state->idx); + if (err) { + state->port_ctx.index = port_idx; + return err; + } + state->idx = 0; + } +out: + state->port_ctx.index_valid = false; + state->port_ctx.index = 0; + return 0; +} + +int devlink_nl_resource_dump_dumpit(struct sk_buff *skb, + struct netlink_callback *cb) +{ + return devlink_nl_dumpit(skb, cb, devlink_nl_resource_dump_one); +} + int devlink_resources_validate(struct devlink *devlink, struct devlink_resource *resource, struct genl_info *info) @@ -314,26 +475,12 @@ int devlink_resources_validate(struct devlink *devlink, return err; } -/** - * devl_resource_register - devlink resource register - * - * @devlink: devlink - * @resource_name: resource's name - * @resource_size: resource's size - * @resource_id: resource's id - * @parent_resource_id: resource's parent id - * @size_params: size parameters - * - * Generic resources should reuse the same names across drivers. - * Please see the generic resources list at: - * Documentation/networking/devlink/devlink-resource.rst - */ -int devl_resource_register(struct devlink *devlink, - const char *resource_name, - u64 resource_size, - u64 resource_id, - u64 parent_resource_id, - const struct devlink_resource_size_params *size_params) +static int +__devl_resource_register(struct devlink *devlink, + struct list_head *resource_list_head, + const char *resource_name, u64 resource_size, + u64 resource_id, u64 parent_resource_id, + const struct devlink_resource_size_params *params) { struct devlink_resource *resource; struct list_head *resource_list; @@ -343,7 +490,8 @@ int devl_resource_register(struct devlink *devlink, top_hierarchy = parent_resource_id == DEVLINK_RESOURCE_ID_PARENT_TOP; - resource = devlink_resource_find(devlink, NULL, resource_id); + resource = __devlink_resource_find(resource_list_head, NULL, + resource_id); if (resource) return -EEXIST; @@ -352,12 +500,13 @@ int devl_resource_register(struct devlink *devlink, return -ENOMEM; if (top_hierarchy) { - resource_list = &devlink->resource_list; + resource_list = resource_list_head; } else { struct devlink_resource *parent_resource; - parent_resource = devlink_resource_find(devlink, NULL, - parent_resource_id); + parent_resource = __devlink_resource_find(resource_list_head, + NULL, + parent_resource_id); if (parent_resource) { resource_list = &parent_resource->resource_list; resource->parent = parent_resource; @@ -372,46 +521,78 @@ int devl_resource_register(struct devlink *devlink, resource->size_new = resource_size; resource->id = resource_id; resource->size_valid = true; - memcpy(&resource->size_params, size_params, - sizeof(resource->size_params)); + memcpy(&resource->size_params, params, sizeof(resource->size_params)); INIT_LIST_HEAD(&resource->resource_list); list_add_tail(&resource->list, resource_list); return 0; } + +/** + * devl_resource_register - devlink resource register + * + * @devlink: devlink + * @resource_name: resource's name + * @resource_size: resource's size + * @resource_id: resource's id + * @parent_resource_id: resource's parent id + * @params: size parameters + * + * Generic resources should reuse the same names across drivers. + * Please see the generic resources list at: + * Documentation/networking/devlink/devlink-resource.rst + * + * Return: 0 on success, negative error code otherwise. + */ +int devl_resource_register(struct devlink *devlink, const char *resource_name, + u64 resource_size, u64 resource_id, + u64 parent_resource_id, + const struct devlink_resource_size_params *params) +{ + return __devl_resource_register(devlink, &devlink->resource_list, + resource_name, resource_size, + resource_id, parent_resource_id, + params); +} EXPORT_SYMBOL_GPL(devl_resource_register); -static void devlink_resource_unregister(struct devlink *devlink, - struct devlink_resource *resource) +static void devlink_resource_unregister(struct devlink_resource *resource) { struct devlink_resource *tmp, *child_resource; list_for_each_entry_safe(child_resource, tmp, &resource->resource_list, list) { - devlink_resource_unregister(devlink, child_resource); + devlink_resource_unregister(child_resource); list_del(&child_resource->list); kfree(child_resource); } } -/** - * devl_resources_unregister - free all resources - * - * @devlink: devlink - */ -void devl_resources_unregister(struct devlink *devlink) +static void +__devl_resources_unregister(struct devlink *devlink, + struct list_head *resource_list_head) { struct devlink_resource *tmp, *child_resource; lockdep_assert_held(&devlink->lock); - list_for_each_entry_safe(child_resource, tmp, &devlink->resource_list, + list_for_each_entry_safe(child_resource, tmp, resource_list_head, list) { - devlink_resource_unregister(devlink, child_resource); + devlink_resource_unregister(child_resource); list_del(&child_resource->list); kfree(child_resource); } } + +/** + * devl_resources_unregister - free all resources + * + * @devlink: devlink + */ +void devl_resources_unregister(struct devlink *devlink) +{ + __devl_resources_unregister(devlink, &devlink->resource_list); +} EXPORT_SYMBOL_GPL(devl_resources_unregister); /** @@ -502,3 +683,46 @@ void devl_resource_occ_get_unregister(struct devlink *devlink, resource->occ_get_priv = NULL; } EXPORT_SYMBOL_GPL(devl_resource_occ_get_unregister); + +/** + * devl_port_resource_register - devlink port resource register + * + * @devlink_port: devlink port + * @resource_name: resource's name + * @resource_size: resource's size + * @resource_id: resource's id + * @parent_resource_id: resource's parent id + * @params: size parameters + * + * Generic resources should reuse the same names across drivers. + * Please see the generic resources list at: + * Documentation/networking/devlink/devlink-resource.rst + * + * Return: 0 on success, negative error code otherwise. + */ +int +devl_port_resource_register(struct devlink_port *devlink_port, + const char *resource_name, + u64 resource_size, u64 resource_id, + u64 parent_resource_id, + const struct devlink_resource_size_params *params) +{ + return __devl_resource_register(devlink_port->devlink, + &devlink_port->resource_list, + resource_name, resource_size, + resource_id, parent_resource_id, + params); +} +EXPORT_SYMBOL_GPL(devl_port_resource_register); + +/** + * devl_port_resources_unregister - unregister all devlink port resources + * + * @devlink_port: devlink port + */ +void devl_port_resources_unregister(struct devlink_port *devlink_port) +{ + __devl_resources_unregister(devlink_port->devlink, + &devlink_port->resource_list); +} +EXPORT_SYMBOL_GPL(devl_port_resources_unregister); diff --git a/net/devlink/sh_dev.c b/net/devlink/sh_dev.c new file mode 100644 index 000000000000..85acce97e788 --- /dev/null +++ b/net/devlink/sh_dev.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include <net/devlink.h> + +#include "devl_internal.h" + +static LIST_HEAD(shd_list); +static DEFINE_MUTEX(shd_mutex); /* Protects shd_list and shd->list */ + +/* This structure represents a shared devlink instance, + * there is one created per identifier (e.g., serial number). + */ +struct devlink_shd { + struct list_head list; /* Node in shd list */ + const char *id; /* Identifier string (e.g., serial number) */ + refcount_t refcount; /* Reference count */ + size_t priv_size; /* Size of driver private data */ + char priv[] __aligned(NETDEV_ALIGN) __counted_by(priv_size); +}; + +static struct devlink_shd *devlink_shd_lookup(const char *id) +{ + struct devlink_shd *shd; + + list_for_each_entry(shd, &shd_list, list) { + if (!strcmp(shd->id, id)) + return shd; + } + + return NULL; +} + +static struct devlink_shd *devlink_shd_create(const char *id, + const struct devlink_ops *ops, + size_t priv_size, + const struct device_driver *driver) +{ + struct devlink_shd *shd; + struct devlink *devlink; + + devlink = __devlink_alloc(ops, sizeof(struct devlink_shd) + priv_size, + &init_net, NULL, driver); + if (!devlink) + return NULL; + shd = devlink_priv(devlink); + + shd->id = kstrdup(id, GFP_KERNEL); + if (!shd->id) + goto err_devlink_free; + shd->priv_size = priv_size; + refcount_set(&shd->refcount, 1); + + devl_lock(devlink); + devl_register(devlink); + devl_unlock(devlink); + + list_add_tail(&shd->list, &shd_list); + + return shd; + +err_devlink_free: + devlink_free(devlink); + return NULL; +} + +static void devlink_shd_destroy(struct devlink_shd *shd) +{ + struct devlink *devlink = priv_to_devlink(shd); + + list_del(&shd->list); + devl_lock(devlink); + devl_unregister(devlink); + devl_unlock(devlink); + kfree(shd->id); + devlink_free(devlink); +} + +/** + * devlink_shd_get - Get or create a shared devlink instance + * @id: Identifier string (e.g., serial number) for the shared instance + * @ops: Devlink operations structure + * @priv_size: Size of private data structure + * @driver: Driver associated with the shared devlink instance + * + * Get an existing shared devlink instance identified by @id, or create + * a new one if it doesn't exist. Return the devlink instance with a + * reference held. The caller must call devlink_shd_put() when done. + * + * All callers sharing the same @id must pass identical @ops, @priv_size + * and @driver. A mismatch triggers a warning and returns NULL. + * + * Return: Pointer to the shared devlink instance on success, + * NULL on failure + */ +struct devlink *devlink_shd_get(const char *id, + const struct devlink_ops *ops, + size_t priv_size, + const struct device_driver *driver) +{ + struct devlink *devlink; + struct devlink_shd *shd; + + mutex_lock(&shd_mutex); + + shd = devlink_shd_lookup(id); + if (!shd) { + shd = devlink_shd_create(id, ops, priv_size, driver); + goto unlock; + } + + devlink = priv_to_devlink(shd); + if (WARN_ON_ONCE(devlink->ops != ops || + shd->priv_size != priv_size || + devlink->dev_driver != driver)) { + shd = NULL; + goto unlock; + } + refcount_inc(&shd->refcount); + +unlock: + mutex_unlock(&shd_mutex); + return shd ? priv_to_devlink(shd) : NULL; +} +EXPORT_SYMBOL_GPL(devlink_shd_get); + +/** + * devlink_shd_put - Release a reference on a shared devlink instance + * @devlink: Shared devlink instance + * + * Release a reference on a shared devlink instance obtained via + * devlink_shd_get(). + */ +void devlink_shd_put(struct devlink *devlink) +{ + struct devlink_shd *shd; + + mutex_lock(&shd_mutex); + shd = devlink_priv(devlink); + if (refcount_dec_and_test(&shd->refcount)) + devlink_shd_destroy(shd); + mutex_unlock(&shd_mutex); +} +EXPORT_SYMBOL_GPL(devlink_shd_put); + +/** + * devlink_shd_get_priv - Get private data from shared devlink instance + * @devlink: Devlink instance + * + * Returns a pointer to the driver's private data structure within + * the shared devlink instance. + * + * Return: Pointer to private data + */ +void *devlink_shd_get_priv(struct devlink *devlink) +{ + struct devlink_shd *shd = devlink_priv(devlink); + + return shd->priv; +} +EXPORT_SYMBOL_GPL(devlink_shd_get_priv); diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index c42ddd85ff1f..c3c8c3240ef9 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later /* Key type used to cache DNS lookups made by the kernel * * See Documentation/networking/dns_resolver.rst @@ -7,19 +8,6 @@ * Steve French (sfrench@us.ibm.com) * Wang Lei (wang840925@gmail.com) * David Howells (dhowells@redhat.com) - * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/module.h> #include <linux/moduleparam.h> @@ -215,7 +203,7 @@ store_result: kdebug("store result"); prep->quotalen = result_len; - upayload = kmalloc(sizeof(*upayload) + result_len + 1, GFP_KERNEL); + upayload = kmalloc_flex(*upayload, data, result_len + 1); if (!upayload) { kleave(" = -ENOMEM"); return -ENOMEM; diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index 53da62984447..e1c09d7b8200 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later /* Upcall routine, designed to work as a key type and working through * /sbin/request-key to contact userspace when handling DNS queries. * @@ -20,19 +21,6 @@ * For example to use this module to query AFSDB RR: * * create dns_resolver afsdb:* * /sbin/dns.afsdb %k - * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/module.h> diff --git a/net/dns_resolver/internal.h b/net/dns_resolver/internal.h index 0c570d40e4d6..d0d8edcea092 100644 --- a/net/dns_resolver/internal.h +++ b/net/dns_resolver/internal.h @@ -1,21 +1,9 @@ +/* SPDX-License-Identifier: LGPL-2.1-or-later */ /* * Copyright (c) 2010 Wang Lei * Author(s): Wang Lei (wang840925@gmail.com). All Rights Reserved. * * Internal DNS Rsolver stuff - * - * This library is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published - * by the Free Software Foundation; either version 2.1 of the License, or - * (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public License - * along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include <linux/compiler.h> diff --git a/net/dsa/tag_mxl862xx.c b/net/dsa/tag_mxl862xx.c index 01f215868271..8daefeb8d49d 100644 --- a/net/dsa/tag_mxl862xx.c +++ b/net/dsa/tag_mxl862xx.c @@ -86,6 +86,9 @@ static struct sk_buff *mxl862_tag_rcv(struct sk_buff *skb, return NULL; } + if (likely(!is_link_local_ether_addr(eth_hdr(skb)->h_dest))) + dsa_default_offload_fwd_mark(skb); + /* remove the MxL862xx special tag between the MAC addresses and the * current ethertype field. */ diff --git a/net/dsa/tag_rtl8_4.c b/net/dsa/tag_rtl8_4.c index 2464545da4d2..852c6b88079a 100644 --- a/net/dsa/tag_rtl8_4.c +++ b/net/dsa/tag_rtl8_4.c @@ -17,8 +17,8 @@ * | (8-bit) | (8-bit) | * | Protocol [0x04] | REASON | b * |-----------------------------------+-----------------------------------| y - * | (1) | (1) | (2) | (1) | (3) | (1) | (1) | (1) | (5) | t - * | FID_EN | X | FID | PRI_EN | PRI | KEEP | X | LEARN_DIS | X | e + * | (1) | (3) | (1) | (3) | (1) | (1) | (1) | (5) | t + * | EFID_EN | EFID | PRI_EN | PRI | KEEP | VSEL | LEARN_DIS | VIDX | e * |-----------------------------------+-----------------------------------| s * | (1) | (15-bit) | | * | ALLOW | TX/RX | v @@ -32,19 +32,22 @@ * EtherType | note that Realtek uses the same EtherType for * | other incompatible tag formats (e.g. tag_rtl4_a.c) * Protocol | 0x04: indicates that this tag conforms to this format - * X | reserved * ------------+------------- * REASON | reason for forwarding packet to CPU * | 0: packet was forwarded or flooded to CPU * | 80: packet was trapped to CPU - * FID_EN | 1: packet has an FID - * | 0: no FID - * FID | FID of packet (if FID_EN=1) + * EFID_EN | 1: packet has an EFID + * | 0: no EFID + * EFID | Extended filter ID (EFID) of packet (if EFID_EN=1) * PRI_EN | 1: force priority of packet * | 0: don't force priority * PRI | priority of packet (if PRI_EN=1) * KEEP | preserve packet VLAN tag format + * VSEL | 0: switch should classify packet according to VLAN tag + * | 1: switch should classify packet according to VLAN membership + * | configuration with index VIDX * LEARN_DIS | don't learn the source MAC address of the packet + * VIDX | index of a VLAN membership configuration to use with VSEL * ALLOW | 1: treat TX/RX field as an allowance port mask, meaning the * | packet may only be forwarded to ports specified in the * | mask @@ -96,6 +99,7 @@ #define RTL8_4_REASON_TRAP 80 #define RTL8_4_LEARN_DIS BIT(5) +#define RTL8_4_KEEP BIT(7) #define RTL8_4_TX GENMASK(3, 0) #define RTL8_4_RX GENMASK(10, 0) @@ -111,8 +115,9 @@ static void rtl8_4_write_tag(struct sk_buff *skb, struct net_device *dev, /* Set Protocol; zero REASON */ tag16[1] = htons(FIELD_PREP(RTL8_4_PROTOCOL, RTL8_4_PROTOCOL_RTL8365MB)); - /* Zero FID_EN, FID, PRI_EN, PRI, KEEP; set LEARN_DIS */ - tag16[2] = htons(FIELD_PREP(RTL8_4_LEARN_DIS, 1)); + /* Zero EFID_EN, EFID, PRI_EN, PRI, VSEL, VIDX; set KEEP, LEARN_DIS */ + tag16[2] = htons(FIELD_PREP(RTL8_4_LEARN_DIS, 1) | + FIELD_PREP(RTL8_4_KEEP, 1)); /* Zero ALLOW; set RX (CPU->switch) forwarding port mask */ tag16[3] = htons(FIELD_PREP(RTL8_4_RX, dsa_xmit_port_mask(skb, dev))); diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index f0883357d12e..8bb98d3ea3db 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -2,8 +2,9 @@ #include <linux/ethtool_netlink.h> #include <linux/bitmap.h> -#include "netlink.h" + #include "bitset.h" +#include "netlink.h" /* Some bitmaps are internally represented as an array of unsigned long, some * as an array of u32 (some even as single u32 for now). To avoid the need of diff --git a/net/ethtool/bitset.h b/net/ethtool/bitset.h index c2c2e0051d00..07bc547d47a8 100644 --- a/net/ethtool/bitset.h +++ b/net/ethtool/bitset.h @@ -3,6 +3,9 @@ #ifndef _NET_ETHTOOL_BITSET_H #define _NET_ETHTOOL_BITSET_H +#include <linux/ethtool.h> +#include <linux/netlink.h> + #define ETHNL_MAX_BITSET_SIZE S16_MAX typedef const char (*const ethnl_string_array_t)[ETH_GSTRING_LEN]; diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c index 0364b8fb577b..8d375dac2a40 100644 --- a/net/ethtool/cabletest.c +++ b/net/ethtool/cabletest.c @@ -3,8 +3,9 @@ #include <linux/phy.h> #include <linux/ethtool_netlink.h> #include <net/netdev_lock.h> -#include "netlink.h" + #include "common.h" +#include "netlink.h" /* 802.3 standard allows 100 meters for BaseT cables. However longer * cables might work, depending on the quality of the cables and the diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index ca4f80282448..64ef8cff2005 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -1,9 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only -#include <net/xdp_sock_drv.h> +#include <net/netdev_queues.h> -#include "netlink.h" #include "common.h" +#include "netlink.h" struct channels_req_info { struct ethnl_req_info base; @@ -109,7 +109,7 @@ ethnl_set_channels_validate(struct ethnl_req_info *req_info, static int ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info) { - unsigned int from_channel, old_total, i; + unsigned int old_combined, old_rx, old_tx, i; bool mod = false, mod_combined = false; struct net_device *dev = req_info->dev; struct ethtool_channels channels = {}; @@ -118,8 +118,9 @@ ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info) int ret; dev->ethtool_ops->get_channels(dev, &channels); - old_total = channels.combined_count + - max(channels.rx_count, channels.tx_count); + old_combined = channels.combined_count; + old_rx = channels.rx_count; + old_tx = channels.tx_count; ethnl_update_u32(&channels.rx_count, tb[ETHTOOL_A_CHANNELS_RX_COUNT], &mod); @@ -169,14 +170,19 @@ ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info) if (ret) return ret; - /* Disabling channels, query zero-copy AF_XDP sockets */ - from_channel = channels.combined_count + - min(channels.rx_count, channels.tx_count); - for (i = from_channel; i < old_total; i++) - if (xsk_get_pool_from_qid(dev, i)) { - GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing zerocopy AF_XDP sockets"); + /* ensure channels are not busy at the moment */ + for (i = channels.combined_count + channels.rx_count; + i < old_combined + old_rx; i++) { + if (netdev_queue_busy(dev, i, NETDEV_QUEUE_TYPE_RX, + info->extack)) return -EINVAL; - } + } + for (i = channels.combined_count + channels.tx_count; + i < old_combined + old_tx; i++) { + if (netdev_queue_busy(dev, i, NETDEV_QUEUE_TYPE_TX, + info->extack)) + return -EINVAL; + } ret = dev->ethtool_ops->set_channels(dev, &channels); return ret < 0 ? ret : 1; diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c index 3e18ca1ccc5e..1e2c5c7048a8 100644 --- a/net/ethtool/coalesce.c +++ b/net/ethtool/coalesce.c @@ -1,8 +1,9 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/dim.h> -#include "netlink.h" + #include "common.h" +#include "netlink.h" struct coalesce_req_info { struct ethnl_req_info base; @@ -118,6 +119,8 @@ static int coalesce_reply_size(const struct ethnl_req_info *req_base, nla_total_size(sizeof(u32)) + /* _TX_AGGR_MAX_BYTES */ nla_total_size(sizeof(u32)) + /* _TX_AGGR_MAX_FRAMES */ nla_total_size(sizeof(u32)) + /* _TX_AGGR_TIME_USECS */ + nla_total_size(sizeof(u32)) + /* _RX_CQE_FRAMES */ + nla_total_size(sizeof(u32)) + /* _RX_CQE_NSECS */ total_modersz * 2; /* _{R,T}X_PROFILE */ } @@ -269,7 +272,11 @@ static int coalesce_fill_reply(struct sk_buff *skb, coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES, kcoal->tx_aggr_max_frames, supported) || coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS, - kcoal->tx_aggr_time_usecs, supported)) + kcoal->tx_aggr_time_usecs, supported) || + coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_CQE_FRAMES, + kcoal->rx_cqe_frames, supported) || + coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_CQE_NSECS, + kcoal->rx_cqe_nsecs, supported)) return -EMSGSIZE; if (!req_base->dev || !req_base->dev->irq_moder) @@ -338,6 +345,8 @@ const struct nla_policy ethnl_coalesce_set_policy[] = { [ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS] = { .type = NLA_U32 }, + [ETHTOOL_A_COALESCE_RX_CQE_FRAMES] = { .type = NLA_U32 }, + [ETHTOOL_A_COALESCE_RX_CQE_NSECS] = { .type = NLA_U32 }, [ETHTOOL_A_COALESCE_RX_PROFILE] = NLA_POLICY_NESTED(coalesce_profile_policy), [ETHTOOL_A_COALESCE_TX_PROFILE] = @@ -570,6 +579,10 @@ __ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info, tb[ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES], &mod); ethnl_update_u32(&kernel_coalesce.tx_aggr_time_usecs, tb[ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS], &mod); + ethnl_update_u32(&kernel_coalesce.rx_cqe_frames, + tb[ETHTOOL_A_COALESCE_RX_CQE_FRAMES], &mod); + ethnl_update_u32(&kernel_coalesce.rx_cqe_nsecs, + tb[ETHTOOL_A_COALESCE_RX_CQE_NSECS], &mod); if (dev->irq_moder && dev->irq_moder->profile_flags & DIM_PROFILE_RX) { ret = ethnl_update_profile(dev, &dev->irq_moder->rx_profile, diff --git a/net/ethtool/common.c b/net/ethtool/common.c index e252cf20c22f..84ec88dee05c 100644 --- a/net/ethtool/common.c +++ b/net/ethtool/common.c @@ -8,8 +8,8 @@ #include <linux/phy_link_topology.h> #include <net/netdev_queues.h> -#include "netlink.h" #include "common.h" +#include "netlink.h" #include "../core/dev.h" @@ -1204,6 +1204,189 @@ void ethtool_rxfh_context_lost(struct net_device *dev, u32 context_id) } EXPORT_SYMBOL(ethtool_rxfh_context_lost); +bool netif_is_rxfh_configured(const struct net_device *dev) +{ + return dev->ethtool->rss_indir_user_size; +} +EXPORT_SYMBOL(netif_is_rxfh_configured); + +/** + * ethtool_rxfh_indir_lost - Notify core that the RSS indirection table was lost + * @dev: network device + * + * Drivers should call this when the device can no longer maintain the + * user-configured indirection table, typically after a HW fault recovery + * that reduced the maximum queue count. Marks the default RSS context + * indirection table as unconfigured and sends an %ETHTOOL_MSG_RSS_NTF + * notification. + */ +void ethtool_rxfh_indir_lost(struct net_device *dev) +{ + WARN_ONCE(!rtnl_is_locked() && + !lockdep_is_held_type(&dev->ethtool->rss_lock, -1), + "RSS context lock assertion failed\n"); + + netdev_err(dev, "device error, RSS indirection table lost\n"); + dev->ethtool->rss_indir_user_size = 0; + ethtool_rss_notify(dev, ETHTOOL_MSG_RSS_NTF, 0); +} +EXPORT_SYMBOL(ethtool_rxfh_indir_lost); + +static bool ethtool_rxfh_is_periodic(const u32 *tbl, u32 old_size, u32 new_size) +{ + u32 i; + + for (i = new_size; i < old_size; i++) + if (tbl[i] != tbl[i % new_size]) + return false; + return true; +} + +static bool ethtool_rxfh_can_resize(const u32 *tbl, u32 old_size, u32 new_size, + u32 user_size) +{ + if (new_size == old_size) + return true; + + if (!user_size) + return true; + + if (new_size < old_size) { + if (new_size < user_size) + return false; + if (old_size % new_size) + return false; + if (!ethtool_rxfh_is_periodic(tbl, old_size, new_size)) + return false; + return true; + } + + if (new_size % old_size) + return false; + return true; +} + +/* Resize without validation; caller must have called can_resize first */ +static void ethtool_rxfh_resize(u32 *tbl, u32 old_size, u32 new_size) +{ + u32 i; + + /* Grow: replicate existing pattern; shrink is a no-op on the data */ + for (i = old_size; i < new_size; i++) + tbl[i] = tbl[i % old_size]; +} + +/** + * ethtool_rxfh_indir_can_resize - Check if context 0 indir table can resize + * @dev: network device + * @tbl: indirection table + * @old_size: current number of entries in the table + * @new_size: desired number of entries + * + * Validate that @tbl can be resized from @old_size to @new_size without + * data loss. Uses the user_size floor from context 0. When user_size is + * zero the table is not user-configured and resize always succeeds. + * Read-only; does not modify the table. + * + * Return: true if resize is possible, false otherwise. + */ +bool ethtool_rxfh_indir_can_resize(struct net_device *dev, const u32 *tbl, + u32 old_size, u32 new_size) +{ + return ethtool_rxfh_can_resize(tbl, old_size, new_size, + dev->ethtool->rss_indir_user_size); +} +EXPORT_SYMBOL(ethtool_rxfh_indir_can_resize); + +/** + * ethtool_rxfh_indir_resize - Fold or unfold context 0 indirection table + * @dev: network device + * @tbl: indirection table (must have room for max(old_size, new_size) entries) + * @old_size: current number of entries in the table + * @new_size: desired number of entries + * + * Resize the default RSS context indirection table in place. Caller + * must have validated with ethtool_rxfh_indir_can_resize() first. + */ +void ethtool_rxfh_indir_resize(struct net_device *dev, u32 *tbl, + u32 old_size, u32 new_size) +{ + if (!dev->ethtool->rss_indir_user_size) + return; + + ethtool_rxfh_resize(tbl, old_size, new_size); +} +EXPORT_SYMBOL(ethtool_rxfh_indir_resize); + +/** + * ethtool_rxfh_ctxs_can_resize - Validate resize for all RSS contexts + * @dev: network device + * @new_indir_size: new indirection table size + * + * Validate that the indirection tables of all non-default RSS contexts + * can be resized to @new_indir_size. Read-only; does not modify any + * context. Intended to be paired with ethtool_rxfh_ctxs_resize(). + * + * Return: 0 if all contexts can be resized, negative errno on failure. + */ +int ethtool_rxfh_ctxs_can_resize(struct net_device *dev, u32 new_indir_size) +{ + struct ethtool_rxfh_context *ctx; + unsigned long context; + int ret = 0; + + if (!dev->ethtool_ops->rxfh_indir_space || + new_indir_size > dev->ethtool_ops->rxfh_indir_space) + return -EINVAL; + + mutex_lock(&dev->ethtool->rss_lock); + xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { + u32 *indir = ethtool_rxfh_context_indir(ctx); + + if (!ethtool_rxfh_can_resize(indir, ctx->indir_size, + new_indir_size, + ctx->indir_user_size)) { + ret = -EINVAL; + goto unlock; + } + } +unlock: + mutex_unlock(&dev->ethtool->rss_lock); + return ret; +} +EXPORT_SYMBOL(ethtool_rxfh_ctxs_can_resize); + +/** + * ethtool_rxfh_ctxs_resize - Resize all RSS context indirection tables + * @dev: network device + * @new_indir_size: new indirection table size + * + * Resize the indirection table of every non-default RSS context to + * @new_indir_size. Caller must have validated with + * ethtool_rxfh_ctxs_can_resize() first. An %ETHTOOL_MSG_RSS_NTF is + * sent for each resized context. + * + * Notifications are sent outside the RSS lock to avoid holding the + * mutex during notification delivery. + */ +void ethtool_rxfh_ctxs_resize(struct net_device *dev, u32 new_indir_size) +{ + struct ethtool_rxfh_context *ctx; + unsigned long context; + + mutex_lock(&dev->ethtool->rss_lock); + xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { + ethtool_rxfh_resize(ethtool_rxfh_context_indir(ctx), + ctx->indir_size, new_indir_size); + ctx->indir_size = new_indir_size; + } + mutex_unlock(&dev->ethtool->rss_lock); + + xa_for_each(&dev->ethtool->rss_ctx, context, ctx) + ethtool_rss_notify(dev, ETHTOOL_MSG_RSS_NTF, context); +} +EXPORT_SYMBOL(ethtool_rxfh_ctxs_resize); + enum ethtool_link_medium ethtool_str_to_medium(const char *str) { int i; diff --git a/net/ethtool/debug.c b/net/ethtool/debug.c index 0b2dea56d461..6043916b440e 100644 --- a/net/ethtool/debug.c +++ b/net/ethtool/debug.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" struct debug_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c index bf398973eb8a..50d6fcd3661b 100644 --- a/net/ethtool/eee.c +++ b/net/ethtool/eee.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" struct eee_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index 3b8209e930fd..a557e3996c85 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -2,8 +2,9 @@ #include <linux/ethtool.h> #include <linux/sfp.h> -#include "netlink.h" + #include "common.h" +#include "netlink.h" struct eeprom_req_info { struct ethnl_req_info base; @@ -149,15 +150,17 @@ err_free: return ret; } -static int eeprom_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb, +static int eeprom_parse_request(struct ethnl_req_info *req_info, + const struct genl_info *info, + struct nlattr **tb, struct netlink_ext_ack *extack) { struct eeprom_req_info *request = MODULE_EEPROM_REQINFO(req_info); - if (!tb[ETHTOOL_A_MODULE_EEPROM_OFFSET] || - !tb[ETHTOOL_A_MODULE_EEPROM_LENGTH] || - !tb[ETHTOOL_A_MODULE_EEPROM_PAGE] || - !tb[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS]) + if (GENL_REQ_ATTR_CHECK(info, ETHTOOL_A_MODULE_EEPROM_OFFSET) || + GENL_REQ_ATTR_CHECK(info, ETHTOOL_A_MODULE_EEPROM_LENGTH) || + GENL_REQ_ATTR_CHECK(info, ETHTOOL_A_MODULE_EEPROM_PAGE) || + GENL_REQ_ATTR_CHECK(info, ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS)) return -EINVAL; request->i2c_address = nla_get_u8(tb[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS]); diff --git a/net/ethtool/features.c b/net/ethtool/features.c index f2217983be2b..d9455b30aec9 100644 --- a/net/ethtool/features.c +++ b/net/ethtool/features.c @@ -2,9 +2,9 @@ #include <net/netdev_lock.h> -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" struct features_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/fec.c b/net/ethtool/fec.c index 4669e74cbcaa..e2d539271060 100644 --- a/net/ethtool/fec.c +++ b/net/ethtool/fec.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" struct fec_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index ff4b4780d6af..bd97f9b9bf18 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -27,12 +27,13 @@ #include <linux/net.h> #include <linux/pm_runtime.h> #include <linux/utsname.h> +#include <linux/ethtool_netlink.h> #include <net/devlink.h> #include <net/ipv6.h> -#include <net/xdp_sock_drv.h> #include <net/flow_offload.h> #include <net/netdev_lock.h> -#include <linux/ethtool_netlink.h> +#include <net/netdev_queues.h> + #include "common.h" /* State held across locks and calls for commands which have devlink fallback */ @@ -1404,9 +1405,9 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev, /* indicate whether rxfh was set to default */ if (user_size == 0) - dev->priv_flags &= ~IFF_RXFH_CONFIGURED; + dev->ethtool->rss_indir_user_size = 0; else - dev->priv_flags |= IFF_RXFH_CONFIGURED; + dev->ethtool->rss_indir_user_size = rxfh_dev.indir_size; out_unlock: mutex_unlock(&dev->ethtool->rss_lock); @@ -1721,9 +1722,9 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if (!rxfh_dev.rss_context) { /* indicate whether rxfh was set to default */ if (rxfh.indir_size == 0) - dev->priv_flags &= ~IFF_RXFH_CONFIGURED; + dev->ethtool->rss_indir_user_size = 0; else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) - dev->priv_flags |= IFF_RXFH_CONFIGURED; + dev->ethtool->rss_indir_user_size = dev_indir_size; } /* Update rss_ctx tracking */ if (rxfh_dev.rss_delete) { @@ -1736,6 +1737,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, ctx->indir_configured = rxfh.indir_size && rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE; + ctx->indir_user_size = dev_indir_size; } if (rxfh_dev.key) { memcpy(ethtool_rxfh_context_key(ctx), rxfh_dev.key, @@ -2248,7 +2250,6 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, void __user *useraddr) { struct ethtool_channels channels, curr = { .cmd = ETHTOOL_GCHANNELS }; - u16 from_channel, to_channel; unsigned int i; int ret; @@ -2282,13 +2283,17 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev, if (ret) return ret; - /* Disabling channels, query zero-copy AF_XDP sockets */ - from_channel = channels.combined_count + - min(channels.rx_count, channels.tx_count); - to_channel = curr.combined_count + max(curr.rx_count, curr.tx_count); - for (i = from_channel; i < to_channel; i++) - if (xsk_get_pool_from_qid(dev, i)) + /* Disabling channels, query busy queues (AF_XDP, queue leasing) */ + for (i = channels.combined_count + channels.rx_count; + i < curr.combined_count + curr.rx_count; i++) { + if (netdev_queue_busy(dev, i, NETDEV_QUEUE_TYPE_RX, NULL)) return -EINVAL; + } + for (i = channels.combined_count + channels.tx_count; + i < curr.combined_count + curr.tx_count; i++) { + if (netdev_queue_busy(dev, i, NETDEV_QUEUE_TYPE_TX, NULL)) + return -EINVAL; + } ret = dev->ethtool_ops->set_channels(dev, &channels); if (!ret) diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c index 30b8ce275159..244ff92e2ff9 100644 --- a/net/ethtool/linkinfo.c +++ b/net/ethtool/linkinfo.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" #include "common.h" +#include "netlink.h" struct linkinfo_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index 259cd9ef1f2a..30d703531652 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" /* LINKMODES_GET */ diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c index 05a5f72c99fa..8a5985fd7712 100644 --- a/net/ethtool/linkstate.c +++ b/net/ethtool/linkstate.c @@ -1,10 +1,11 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" -#include "common.h" #include <linux/phy.h> #include <linux/phylib_stubs.h> +#include "common.h" +#include "netlink.h" + struct linkstate_req_info { struct ethnl_req_info base; }; diff --git a/net/ethtool/module.c b/net/ethtool/module.c index 0a761bf4771e..cad2eb25b5a4 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -6,10 +6,10 @@ #include <net/devlink.h> #include <net/netdev_lock.h> -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" #include "module_fw.h" +#include "netlink.h" struct module_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/mse.c b/net/ethtool/mse.c index 8cb3fc5e7be4..e91b74430f76 100644 --- a/net/ethtool/mse.c +++ b/net/ethtool/mse.c @@ -4,8 +4,8 @@ #include <linux/phy.h> #include <linux/slab.h> -#include "netlink.h" #include "common.h" +#include "netlink.h" /* Channels A-D only; WORST and LINK are exclusive alternatives */ #define PHY_MSE_CHANNEL_COUNT 4 diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 6e5f0f4f815a..5046023a30b1 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -6,8 +6,9 @@ #include <linux/ethtool_netlink.h> #include <linux/phy_link_topology.h> #include <linux/pm_runtime.h> -#include "netlink.h" + #include "module_fw.h" +#include "netlink.h" static struct genl_family ethtool_genl_family; @@ -461,7 +462,8 @@ static int ethnl_default_parse(struct ethnl_req_info *req_info, return ret; if (request_ops->parse_request) { - ret = request_ops->parse_request(req_info, tb, info->extack); + ret = request_ops->parse_request(req_info, info, tb, + info->extack); if (ret < 0) goto err_dev; } diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 89010eaa67df..aaf6f2468768 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -396,6 +396,7 @@ struct ethnl_request_ops { u8 set_ntf_cmd; int (*parse_request)(struct ethnl_req_info *req_info, + const struct genl_info *info, struct nlattr **tb, struct netlink_ext_ack *extack); int (*prepare_data)(const struct ethnl_req_info *req_info, diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c index 0f9af1e66548..ccaec8e878b2 100644 --- a/net/ethtool/pause.c +++ b/net/ethtool/pause.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" #include "common.h" +#include "netlink.h" struct pause_req_info { struct ethnl_req_info base; @@ -28,6 +28,7 @@ const struct nla_policy ethnl_pause_get_policy[] = { }; static int pause_parse_request(struct ethnl_req_info *req_base, + const struct genl_info *info, struct nlattr **tb, struct netlink_ext_ack *extack) { @@ -130,7 +131,9 @@ static int pause_put_stats(struct sk_buff *skb, if (ethtool_put_stat(skb, pause_stats->tx_pause_frames, ETHTOOL_A_PAUSE_STAT_TX_FRAMES, pad) || ethtool_put_stat(skb, pause_stats->rx_pause_frames, - ETHTOOL_A_PAUSE_STAT_RX_FRAMES, pad)) + ETHTOOL_A_PAUSE_STAT_RX_FRAMES, pad) || + ethtool_put_stat(skb, pause_stats->tx_pause_storm_events, + ETHTOOL_A_PAUSE_STAT_TX_PAUSE_STORM_EVENTS, pad)) goto err_cancel; nla_nest_end(skb, nest); diff --git a/net/ethtool/phc_vclocks.c b/net/ethtool/phc_vclocks.c index cadaabed60bd..15146e38ab27 100644 --- a/net/ethtool/phc_vclocks.c +++ b/net/ethtool/phc_vclocks.c @@ -2,8 +2,8 @@ /* * Copyright 2021 NXP */ -#include "netlink.h" #include "common.h" +#include "netlink.h" struct phc_vclocks_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/phy.c b/net/ethtool/phy.c index 68372bef4b2f..d4e6887055ab 100644 --- a/net/ethtool/phy.c +++ b/net/ethtool/phy.c @@ -3,14 +3,14 @@ * Copyright 2023 Bootlin * */ -#include "common.h" -#include "netlink.h" - #include <linux/phy.h> #include <linux/phy_link_topology.h> #include <linux/sfp.h> #include <net/netdev_lock.h> +#include "common.h" +#include "netlink.h" + struct phy_req_info { struct ethnl_req_info base; }; diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c index e1f7820a6158..91f0c4233298 100644 --- a/net/ethtool/plca.c +++ b/net/ethtool/plca.c @@ -3,8 +3,8 @@ #include <linux/phy.h> #include <linux/ethtool_netlink.h> -#include "netlink.h" #include "common.h" +#include "netlink.h" struct plca_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/privflags.c b/net/ethtool/privflags.c index 297be6a13ab9..46a4d2a43ba8 100644 --- a/net/ethtool/privflags.c +++ b/net/ethtool/privflags.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" struct privflags_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c index 24def9c9dd54..2eb9bdc2dcb9 100644 --- a/net/ethtool/pse-pd.c +++ b/net/ethtool/pse-pd.c @@ -6,14 +6,15 @@ // Copyright (c) 2022 Pengutronix, Oleksij Rempel <kernel@pengutronix.de> // -#include "common.h" -#include "linux/pse-pd/pse.h" -#include "netlink.h" #include <linux/ethtool_netlink.h> #include <linux/ethtool.h> #include <linux/export.h> #include <linux/phy.h> +#include "common.h" +#include "linux/pse-pd/pse.h" +#include "netlink.h" + struct pse_req_info { struct ethnl_req_info base; }; diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c index aeedd5ec6b8c..0fd5dcc3729f 100644 --- a/net/ethtool/rings.c +++ b/net/ethtool/rings.c @@ -2,8 +2,8 @@ #include <net/netdev_queues.h> -#include "netlink.h" #include "common.h" +#include "netlink.h" struct rings_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c index da5934cceb07..353110b862ab 100644 --- a/net/ethtool/rss.c +++ b/net/ethtool/rss.c @@ -2,8 +2,8 @@ #include <net/netdev_lock.h> -#include "netlink.h" #include "common.h" +#include "netlink.h" struct rss_req_info { struct ethnl_req_info base; @@ -66,7 +66,9 @@ const struct nla_policy ethnl_rss_get_policy[] = { }; static int -rss_parse_request(struct ethnl_req_info *req_info, struct nlattr **tb, +rss_parse_request(struct ethnl_req_info *req_info, + const struct genl_info *info, + struct nlattr **tb, struct netlink_ext_ack *extack) { struct rss_req_info *request = RSS_REQINFO(req_info); @@ -686,7 +688,7 @@ rss_set_prep_indir(struct net_device *dev, struct genl_info *info, *mod |= memcmp(rxfh->indir, data->indir_table, data->indir_size); - return 0; + return user_size; err_free: kfree(rxfh->indir); @@ -833,6 +835,7 @@ ethnl_rss_set(struct ethnl_req_info *req_info, struct genl_info *info) struct nlattr **tb = info->attrs; struct rss_reply_data data = {}; const struct ethtool_ops *ops; + u32 indir_user_size; int ret; ops = dev->ethtool_ops; @@ -845,8 +848,9 @@ ethnl_rss_set(struct ethnl_req_info *req_info, struct genl_info *info) rxfh.rss_context = request->rss_context; ret = rss_set_prep_indir(dev, info, &data, &rxfh, &indir_reset, &mod); - if (ret) + if (ret < 0) goto exit_clean_data; + indir_user_size = ret; indir_mod = !!tb[ETHTOOL_A_RSS_INDIR]; rxfh.hfunc = data.hfunc; @@ -889,12 +893,15 @@ ethnl_rss_set(struct ethnl_req_info *req_info, struct genl_info *info) if (ret) goto exit_unlock; - if (ctx) + if (ctx) { rss_set_ctx_update(ctx, tb, &data, &rxfh); - else if (indir_reset) - dev->priv_flags &= ~IFF_RXFH_CONFIGURED; - else if (indir_mod) - dev->priv_flags |= IFF_RXFH_CONFIGURED; + if (indir_user_size) + ctx->indir_user_size = indir_user_size; + } else if (indir_reset) { + dev->ethtool->rss_indir_user_size = 0; + } else if (indir_mod) { + dev->ethtool->rss_indir_user_size = indir_user_size; + } exit_unlock: mutex_unlock(&dev->ethtool->rss_lock); @@ -999,6 +1006,7 @@ int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info) const struct ethtool_ops *ops; struct rss_req_info req = {}; struct net_device *dev; + u32 indir_user_size; struct sk_buff *rsp; void *hdr; u32 limit; @@ -1035,8 +1043,9 @@ int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info) goto exit_ops; ret = rss_set_prep_indir(dev, info, &data, &rxfh, &indir_dflt, &mod); - if (ret) + if (ret < 0) goto exit_clean_data; + indir_user_size = ret; ethnl_update_u8(&rxfh.hfunc, tb[ETHTOOL_A_RSS_HFUNC], &mod); @@ -1080,6 +1089,7 @@ int ethnl_rss_create_doit(struct sk_buff *skb, struct genl_info *info) /* Store the config from rxfh to Xarray.. */ rss_set_ctx_update(ctx, tb, &data, &rxfh); + ctx->indir_user_size = indir_user_size; /* .. copy from Xarray to data. */ __rss_prepare_ctx(dev, &data, ctx); diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c index 3ca8eb2a3b31..9b0d8cb07675 100644 --- a/net/ethtool/stats.c +++ b/net/ethtool/stats.c @@ -3,9 +3,9 @@ #include <linux/phy.h> #include <linux/phylib_stubs.h> -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" struct stats_req_info { struct ethnl_req_info base; @@ -99,6 +99,7 @@ const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_SRC + 1] = { }; static int stats_parse_request(struct ethnl_req_info *req_base, + const struct genl_info *info, struct nlattr **tb, struct netlink_ext_ack *extack) { diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index f6a67109beda..bb1e829ba099 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -2,8 +2,9 @@ #include <linux/ethtool.h> #include <linux/phy.h> -#include "netlink.h" + #include "common.h" +#include "netlink.h" struct strset_info { bool per_dev; @@ -189,6 +190,7 @@ static const struct nla_policy strset_stringsets_policy[] = { }; static int strset_parse_request(struct ethnl_req_info *req_base, + const struct genl_info *info, struct nlattr **tb, struct netlink_ext_ack *extack) { @@ -441,7 +443,8 @@ static int strset_fill_set(struct sk_buff *skb, if (strset_fill_string(skb, set_info, i) < 0) goto nla_put_failure; } - nla_nest_end(skb, strings_attr); + if (nla_nest_end_safe(skb, strings_attr) < 0) + goto nla_put_failure; } nla_nest_end(skb, stringset_attr); diff --git a/net/ethtool/tsconfig.c b/net/ethtool/tsconfig.c index e49e612a68c2..e4f518e49d4c 100644 --- a/net/ethtool/tsconfig.c +++ b/net/ethtool/tsconfig.c @@ -3,11 +3,11 @@ #include <linux/net_tstamp.h> #include <linux/ptp_clock_kernel.h> -#include "netlink.h" -#include "common.h" #include "bitset.h" -#include "../core/dev.h" +#include "common.h" +#include "netlink.h" #include "ts.h" +#include "../core/dev.h" struct tsconfig_req_info { struct ethnl_req_info base; diff --git a/net/ethtool/tsinfo.c b/net/ethtool/tsinfo.c index c0145c752d2f..a865f0fdd26b 100644 --- a/net/ethtool/tsinfo.c +++ b/net/ethtool/tsinfo.c @@ -6,9 +6,9 @@ #include <linux/ptp_clock_kernel.h> #include <net/netdev_lock.h> -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" #include "ts.h" struct tsinfo_req_info { @@ -70,7 +70,9 @@ int ts_parse_hwtst_provider(const struct nlattr *nest, } static int -tsinfo_parse_request(struct ethnl_req_info *req_base, struct nlattr **tb, +tsinfo_parse_request(struct ethnl_req_info *req_base, + const struct genl_info *info, + struct nlattr **tb, struct netlink_ext_ack *extack) { struct tsinfo_req_info *req = TSINFO_REQINFO(req_base); diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c index a39d8000d808..60f9a1d8535e 100644 --- a/net/ethtool/wol.c +++ b/net/ethtool/wol.c @@ -1,8 +1,8 @@ // SPDX-License-Identifier: GPL-2.0-only -#include "netlink.h" -#include "common.h" #include "bitset.h" +#include "common.h" +#include "netlink.h" struct wol_req_info { struct ethnl_req_info base; diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index fd2fea25eff0..5555b71ab19b 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -618,7 +618,7 @@ static const struct device_type hsr_type = { .name = "hsr", }; -static struct hsr_proto_ops hsr_ops = { +static const struct hsr_proto_ops hsr_ops = { .send_sv_frame = send_hsr_supervision_frame, .create_tagged_frame = hsr_create_tagged_frame, .get_untagged_frame = hsr_get_untagged_frame, @@ -628,7 +628,7 @@ static struct hsr_proto_ops hsr_ops = { .register_frame_out = hsr_register_frame_out, }; -static struct hsr_proto_ops prp_ops = { +static const struct hsr_proto_ops prp_ops = { .send_sv_frame = send_prp_supervision_frame, .create_tagged_frame = prp_create_tagged_frame, .get_untagged_frame = prp_get_untagged_frame, @@ -744,7 +744,7 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], hsr->proto_ops = &hsr_ops; } - /* Make sure we recognize frames from ourselves in hsr_rcv() */ + /* Make sure we recognize frames from ourselves in hsr_handle_frame() */ res = hsr_create_self_node(hsr, hsr_dev->dev_addr, slave[1]->dev_addr); if (res < 0) diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index aefc9b6936ba..0aca859c88cb 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -184,7 +184,7 @@ struct sk_buff *hsr_get_untagged_frame(struct hsr_frame_info *frame, create_stripped_skb_hsr(frame->skb_hsr, frame); else netdev_warn_once(port->dev, - "Unexpected frame received in hsr_get_untagged_frame()\n"); + "Unexpected frame received in %s()\n", __func__); if (!frame->skb_std) return NULL; diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index d41863593674..d09875b33588 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -71,8 +71,8 @@ bool hsr_is_node_in_db(struct list_head *node_db, return !!find_node_by_addr_A(node_db, addr); } -/* Helper for device init; the self_node is used in hsr_rcv() to recognize - * frames from self that's been looped over the HSR ring. +/* Helper for device init; the self_node is used in hsr_handle_frame() to + * recognize frames from self that's been looped over the HSR ring. */ int hsr_create_self_node(struct hsr_priv *hsr, const unsigned char addr_a[ETH_ALEN], diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c index bc94b07101d8..33951d9bd3c5 100644 --- a/net/hsr/hsr_main.c +++ b/net/hsr/hsr_main.c @@ -89,7 +89,7 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event, } } - /* Make sure we recognize frames from ourselves in hsr_rcv() */ + /* Make sure we recognize frames from ourselves in hsr_handle_frame() */ port = hsr_port_get_hsr(hsr, HSR_PT_SLAVE_B); res = hsr_create_self_node(hsr, master->dev->dev_addr, diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index 33b0d2460c9b..134e4f3fff60 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -202,7 +202,7 @@ struct hsr_priv { enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */ spinlock_t seqnr_lock; /* locking for sequence_nr */ spinlock_t list_lock; /* locking for node list */ - struct hsr_proto_ops *proto_ops; + const struct hsr_proto_ops *proto_ops; #define PRP_LAN_ID 0x5 /* 0x1010 for A and 0x1011 for B. Bit 0 is set * based on SLAVE_A or SLAVE_B */ diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index 44f83c8c56a7..d9af9e65f72f 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -243,7 +243,11 @@ void hsr_del_port(struct hsr_port *port) if (!port->hsr->fwd_offloaded) dev_set_promiscuity(port->dev, -1); netdev_upper_dev_unlink(port->dev, master->dev); - eth_hw_addr_set(port->dev, port->original_macaddress); + if (hsr->prot_version == PRP_V1 && + port->type == HSR_PT_SLAVE_B) { + eth_hw_addr_set(port->dev, port->original_macaddress); + call_netdevice_notifiers(NETDEV_CHANGEADDR, port->dev); + } } kfree_rcu(port, rcu); diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index e542fbe113e7..85dce296d751 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -313,7 +313,7 @@ out: } static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { size_t copied = 0; int err = -EOPNOTSUPP; @@ -703,7 +703,7 @@ out: } static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { size_t copied = 0; int err = -EOPNOTSUPP; @@ -737,7 +737,7 @@ static int dgram_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, saddr->family = AF_IEEE802154; ieee802154_addr_to_sa(&saddr->addr, &mac_cb(skb)->source); - *addr_len = sizeof(*saddr); + msg->msg_namelen = sizeof(*saddr); } if (ro->want_lqi) { diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index df922f9f5289..21e5164e30db 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -191,7 +191,7 @@ config NET_IP_TUNNEL config NET_IPGRE tristate "IP: GRE tunnels over IP" - depends on (IPV6 || IPV6=n) && NET_IPGRE_DEMUX + depends on NET_IPGRE_DEMUX select NET_IP_TUNNEL help Tunneling means encapsulating data of one protocol type within @@ -303,7 +303,6 @@ config SYN_COOKIES config NET_IPVTI tristate "Virtual (secure) IP: tunneling" - depends on IPV6 || IPV6=n select INET_TUNNEL select NET_IP_TUNNEL select XFRM @@ -439,7 +438,7 @@ config INET_TCP_DIAG config INET_UDP_DIAG tristate "UDP: socket monitoring interface" - depends on INET_DIAG && (IPV6 || IPV6=n) + depends on INET_DIAG default n help Support for UDP socket monitoring interface used by the ss tool. @@ -447,7 +446,7 @@ config INET_UDP_DIAG config INET_RAW_DIAG tristate "RAW: socket monitoring interface" - depends on INET_DIAG && (IPV6 || IPV6=n) + depends on INET_DIAG default n help Support for RAW socket monitoring interface used by the ss tool. @@ -750,7 +749,7 @@ config TCP_AO select CRYPTO select CRYPTO_LIB_UTILS select TCP_SIGPOOL - depends on 64BIT && IPV6 != m # seq-number extension needs WRITE_ONCE(u64) + depends on 64BIT # seq-number extension needs WRITE_ONCE(u64) help TCP-AO specifies the use of stronger Message Authentication Codes (MACs), protects against replays for long-lived TCP connections, and diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 18108a6f0499..7f9f98813986 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -10,7 +10,7 @@ obj-y := route.o inetpeer.o protocol.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ tcp_recovery.o tcp_ulp.o \ - tcp_offload.o tcp_plb.o datagram.o raw.o udp.o udplite.o \ + tcp_offload.o tcp_plb.o datagram.o raw.o udp.o \ udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o fib_notifier.o \ inet_fragment.o ping.o ip_tunnel_core.o gre_offload.o \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c7731e300a44..0e62032e76b1 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -104,7 +104,6 @@ #include <net/tcp.h> #include <net/psp.h> #include <net/udp.h> -#include <net/udplite.h> #include <net/ping.h> #include <linux/skbuff.h> #include <net/sock.h> @@ -858,11 +857,13 @@ EXPORT_SYMBOL_GPL(inet_send_prepare); int inet_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; + const struct proto *prot; if (unlikely(inet_send_prepare(sk))) return -EAGAIN; - return INDIRECT_CALL_2(sk->sk_prot->sendmsg, tcp_sendmsg, udp_sendmsg, + prot = READ_ONCE(sk->sk_prot); + return INDIRECT_CALL_2(prot->sendmsg, tcp_sendmsg, udp_sendmsg, sk, msg, size); } EXPORT_SYMBOL(inet_sendmsg); @@ -882,23 +883,18 @@ void inet_splice_eof(struct socket *sock) } EXPORT_SYMBOL_GPL(inet_splice_eof); -INDIRECT_CALLABLE_DECLARE(int udp_recvmsg(struct sock *, struct msghdr *, - size_t, int, int *)); int inet_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; - int addr_len = 0; - int err; + const struct proto *prot; if (likely(!(flags & MSG_ERRQUEUE))) sock_rps_record_flow(sk); - err = INDIRECT_CALL_2(sk->sk_prot->recvmsg, tcp_recvmsg, udp_recvmsg, - sk, msg, size, flags, &addr_len); - if (err >= 0) - msg->msg_namelen = addr_len; - return err; + prot = READ_ONCE(sk->sk_prot); + return INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udp_recvmsg, + sk, msg, size, flags); } EXPORT_SYMBOL(inet_recvmsg); @@ -1095,6 +1091,7 @@ const struct proto_ops inet_stream_ops = { .compat_ioctl = inet_compat_ioctl, #endif .set_rcvlowat = tcp_set_rcvlowat, + .set_rcvbuf = tcp_set_rcvbuf, }; EXPORT_SYMBOL(inet_stream_ops); @@ -1583,15 +1580,15 @@ __be32 inet_current_timestamp(void) } EXPORT_SYMBOL(inet_current_timestamp); -int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) +int inet_recv_error(struct sock *sk, struct msghdr *msg, int len) { unsigned int family = READ_ONCE(sk->sk_family); if (family == AF_INET) - return ip_recv_error(sk, msg, len, addr_len); + return ip_recv_error(sk, msg, len); #if IS_ENABLED(CONFIG_IPV6) if (family == AF_INET6) - return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len); + return pingv6_ops.ipv6_recv_error(sk, msg, len); #endif return -EINVAL; } @@ -1737,9 +1734,6 @@ static __net_init int ipv4_mib_init_net(struct net *net) net->mib.udp_statistics = alloc_percpu(struct udp_mib); if (!net->mib.udp_statistics) goto err_udp_mib; - net->mib.udplite_statistics = alloc_percpu(struct udp_mib); - if (!net->mib.udplite_statistics) - goto err_udplite_mib; net->mib.icmp_statistics = alloc_percpu(struct icmp_mib); if (!net->mib.icmp_statistics) goto err_icmp_mib; @@ -1753,8 +1747,6 @@ static __net_init int ipv4_mib_init_net(struct net *net) err_icmpmsg_mib: free_percpu(net->mib.icmp_statistics); err_icmp_mib: - free_percpu(net->mib.udplite_statistics); -err_udplite_mib: free_percpu(net->mib.udp_statistics); err_udp_mib: free_percpu(net->mib.net_statistics); @@ -1770,7 +1762,6 @@ static __net_exit void ipv4_mib_exit_net(struct net *net) { kfree(net->mib.icmpmsg_statistics); free_percpu(net->mib.icmp_statistics); - free_percpu(net->mib.udplite_statistics); free_percpu(net->mib.udp_statistics); free_percpu(net->mib.net_statistics); free_percpu(net->mib.ip_statistics); @@ -1986,9 +1977,6 @@ static int __init inet_init(void) /* Setup UDP memory threshold */ udp_init(); - /* Add UDP-Lite (RFC 3828) */ - udplite4_register(); - raw_init(); ping_init(); diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c index e01492234b0b..008edc7f6688 100644 --- a/net/ipv4/bpf_tcp_ca.c +++ b/net/ipv4/bpf_tcp_ca.c @@ -272,6 +272,10 @@ static void bpf_tcp_ca_cwnd_event(struct sock *sk, enum tcp_ca_event ev) { } +static void bpf_tcp_ca_cwnd_event_tx_start(struct sock *sk) +{ +} + static void bpf_tcp_ca_in_ack_event(struct sock *sk, u32 flags) { } @@ -313,6 +317,7 @@ static struct tcp_congestion_ops __bpf_ops_tcp_congestion_ops = { .cong_avoid = bpf_tcp_ca_cong_avoid, .set_state = bpf_tcp_ca_set_state, .cwnd_event = bpf_tcp_ca_cwnd_event, + .cwnd_event_tx_start = bpf_tcp_ca_cwnd_event_tx_start, .in_ack_event = bpf_tcp_ca_in_ack_event, .pkts_acked = bpf_tcp_ca_pkts_acked, .min_tso_segs = bpf_tcp_ca_min_tso_segs, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 537bb6c315d2..58fe7cb69545 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2063,12 +2063,50 @@ static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = { [IFLA_INET_CONF] = { .type = NLA_NESTED }, }; +static const struct nla_policy inet_devconf_policy[IPV4_DEVCONF_MAX + 1] = { + [IPV4_DEVCONF_FORWARDING] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_MC_FORWARDING] = { .type = NLA_REJECT }, + [IPV4_DEVCONF_PROXY_ARP] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_ACCEPT_REDIRECTS] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_SECURE_REDIRECTS] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_SEND_REDIRECTS] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_SHARED_MEDIA] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_RP_FILTER] = NLA_POLICY_RANGE(NLA_U32, 0, 2), + [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_BOOTP_RELAY] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_LOG_MARTIANS] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_TAG] = { .type = NLA_U32 }, + [IPV4_DEVCONF_ARPFILTER] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_MEDIUM_ID] = NLA_POLICY_MIN(NLA_S32, -1), + [IPV4_DEVCONF_NOXFRM] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_NOPOLICY] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_FORCE_IGMP_VERSION] = NLA_POLICY_RANGE(NLA_U32, 0, 3), + [IPV4_DEVCONF_ARP_ANNOUNCE] = NLA_POLICY_RANGE(NLA_U32, 0, 2), + [IPV4_DEVCONF_ARP_IGNORE] = NLA_POLICY_RANGE(NLA_U32, 0, 8), + [IPV4_DEVCONF_PROMOTE_SECONDARIES] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_ARP_ACCEPT] = NLA_POLICY_RANGE(NLA_U32, 0, 2), + [IPV4_DEVCONF_ARP_NOTIFY] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_ACCEPT_LOCAL] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_SRC_VMARK] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_PROXY_ARP_PVLAN] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_ROUTE_LOCALNET] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_BC_FORWARDING] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_IGMPV2_UNSOLICITED_REPORT_INTERVAL] = { .type = NLA_U32 }, + [IPV4_DEVCONF_IGMPV3_UNSOLICITED_REPORT_INTERVAL] = { .type = NLA_U32 }, + [IPV4_DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN] = + NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_DROP_UNICAST_IN_L2_MULTICAST] = + NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_DROP_GRATUITOUS_ARP] = NLA_POLICY_RANGE(NLA_U32, 0, 1), + [IPV4_DEVCONF_ARP_EVICT_NOCARRIER] = NLA_POLICY_RANGE(NLA_U32, 0, 1), +}; + static int inet_validate_link_af(const struct net_device *dev, const struct nlattr *nla, struct netlink_ext_ack *extack) { - struct nlattr *a, *tb[IFLA_INET_MAX+1]; - int err, rem; + struct nlattr *tb[IFLA_INET_MAX + 1], *nested_tb[IPV4_DEVCONF_MAX + 1]; + int err; if (dev && !__in_dev_get_rtnl(dev)) return -EAFNOSUPPORT; @@ -2079,15 +2117,12 @@ static int inet_validate_link_af(const struct net_device *dev, return err; if (tb[IFLA_INET_CONF]) { - nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) { - int cfgid = nla_type(a); + err = nla_parse_nested(nested_tb, IPV4_DEVCONF_MAX, + tb[IFLA_INET_CONF], inet_devconf_policy, + extack); - if (nla_len(a) < 4) - return -EINVAL; - - if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX) - return -EINVAL; - } + if (err < 0) + return err; } return 0; diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 01cb587866d8..3e8fadc28798 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -585,9 +585,8 @@ static int fib_detect_death(struct fib_info *fi, int order, if (likely(nhc->nhc_gw_family == AF_INET)) n = neigh_lookup(&arp_tbl, &nhc->nhc_gw.ipv4, nhc->nhc_dev); - else if (nhc->nhc_gw_family == AF_INET6) - n = neigh_lookup(ipv6_stub->nd_tbl, &nhc->nhc_gw.ipv6, - nhc->nhc_dev); + else if (IS_ENABLED(CONFIG_IPV6) && nhc->nhc_gw_family == AF_INET6) + n = neigh_lookup(&nd_tbl, &nhc->nhc_gw.ipv6, nhc->nhc_dev); else n = NULL; @@ -1083,7 +1082,7 @@ static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh, struct fib6_nh fib6_nh = {}; int err; - err = ipv6_stub->fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack); + err = fib6_nh_init(net, &fib6_nh, &cfg, GFP_KERNEL, extack); if (!err) { nh->fib_nh_dev = fib6_nh.fib_nh_dev; netdev_hold(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, @@ -1091,7 +1090,7 @@ static int fib_check_nh_v6_gw(struct net *net, struct fib_nh *nh, nh->fib_nh_oif = nh->fib_nh_dev->ifindex; nh->fib_nh_scope = RT_SCOPE_LINK; - ipv6_stub->fib6_nh_release(&fib6_nh); + fib6_nh_release(&fib6_nh); } return err; @@ -2147,9 +2146,10 @@ static bool fib_good_nh(const struct fib_nh *nh) if (likely(nh->fib_nh_gw_family == AF_INET)) n = __ipv4_neigh_lookup_noref(nh->fib_nh_dev, (__force u32)nh->fib_nh_gw4); - else if (nh->fib_nh_gw_family == AF_INET6) - n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, - &nh->fib_nh_gw6); + else if (IS_ENABLED(CONFIG_IPV6) && + nh->fib_nh_gw_family == AF_INET6) + n = __ipv6_neigh_lookup_noref(nh->fib_nh_dev, + &nh->fib_nh_gw6); else n = NULL; if (n) diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c index 3baaa4df7e42..5bae3cf7fe76 100644 --- a/net/ipv4/fou_core.c +++ b/net/ipv4/fou_core.c @@ -1150,8 +1150,7 @@ static int gue_err(struct sk_buff *skb, u32 info) * recursion. Besides, this kind of encapsulation can't even be * configured currently. Discard this. */ - if (guehdr->proto_ctype == IPPROTO_UDP || - guehdr->proto_ctype == IPPROTO_UDPLITE) + if (guehdr->proto_ctype == IPPROTO_UDP) return -EOPNOTSUPP; skb_set_transport_header(skb, -(int)sizeof(struct icmphdr)); diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index dafd68f3436a..96fd7dc6d82d 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -159,14 +159,18 @@ static int gre_rcv(struct sk_buff *skb) rcu_read_lock(); proto = rcu_dereference(gre_proto[ver]); if (!proto || !proto->handler) - goto drop_unlock; + goto drop_nohandler; ret = proto->handler(skb); rcu_read_unlock(); return ret; -drop_unlock: +drop_nohandler: rcu_read_unlock(); + dev_core_stats_rx_nohandler_inc(skb->dev); + kfree_skb(skb); + return NET_RX_DROP; drop: + dev_core_stats_rx_dropped_inc(skb->dev); kfree_skb(skb); return NET_RX_DROP; } diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4e2a6c70dcd8..2f4fac22d1ab 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -263,7 +263,6 @@ bool icmp_global_allow(struct net *net) } return true; } -EXPORT_SYMBOL(icmp_global_allow); void icmp_global_consume(struct net *net) { @@ -273,7 +272,6 @@ void icmp_global_consume(struct net *net) if (credits) atomic_sub(credits, &net->ipv4.icmp_global_credit); } -EXPORT_SYMBOL(icmp_global_consume); static bool icmpv4_mask_allow(struct net *net, int type, int code) { @@ -591,7 +589,6 @@ static struct rtable *icmp_route_lookup(struct net *net, struct flowi4 *fl4, rt2 = dst_rtable(dst2); if (!IS_ERR(dst2)) { dst_release(&rt->dst); - memcpy(fl4, &fl4_dec, sizeof(*fl4)); rt = rt2; } else if (PTR_ERR(dst2) == -EPERM) { if (rt) @@ -1345,14 +1342,7 @@ bool icmp_build_probe(struct sk_buff *skb, struct icmphdr *icmphdr) case ICMP_AFI_IP6: if (iio->ident.addr.ctype3_hdr.addrlen != sizeof(struct in6_addr)) goto send_mal_query; - dev = ipv6_stub->ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev); - /* - * If IPv6 identifier lookup is unavailable, silently - * discard the request instead of misreporting NO_IF. - */ - if (IS_ERR(dev)) - return false; - + dev = ipv6_dev_find(net, &iio->ident.addr.ip_addr.ipv6_addr, dev); dev_hold(dev); break; #endif @@ -1386,7 +1376,6 @@ send_mal_query: icmphdr->code = ICMP_EXT_CODE_MAL_QUERY; return true; } -EXPORT_SYMBOL_GPL(icmp_build_probe); /* * Handle ICMP Timestamp requests. @@ -1608,7 +1597,6 @@ void ip_icmp_error_rfc4884(const struct sk_buff *skb, if (!ip_icmp_error_rfc4884_validate(skb, off)) out->flags |= SO_EE_RFC4884_FLAG_INVALID; } -EXPORT_SYMBOL_GPL(ip_icmp_error_rfc4884); int icmp_err(struct sk_buff *skb, u32 info) { @@ -1736,8 +1724,8 @@ static int __net_init icmp_sk_init(struct net *net) net->ipv4.sysctl_icmp_ratemask = 0x1818; net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr = 0; net->ipv4.sysctl_icmp_errors_extension_mask = 0; - net->ipv4.sysctl_icmp_msgs_per_sec = 1000; - net->ipv4.sysctl_icmp_msgs_burst = 50; + net->ipv4.sysctl_icmp_msgs_per_sec = 10000; + net->ipv4.sysctl_icmp_msgs_burst = 10000; return 0; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e961936b6be7..4ac3ae1bc1af 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -107,7 +107,6 @@ bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, ipv6_only_sock(sk2), match_wildcard, match_wildcard); } -EXPORT_SYMBOL(inet_rcv_saddr_equal); bool inet_rcv_saddr_any(const struct sock *sk) { @@ -710,7 +709,6 @@ out_err: arg->err = error; return NULL; } -EXPORT_SYMBOL(inet_csk_accept); /* * Using different timers for retransmit, delayed acks and probes @@ -1022,7 +1020,6 @@ void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req inet_csk_reqsk_queue_drop(sk, req); reqsk_put(req); } -EXPORT_IPV6_MOD(inet_csk_reqsk_queue_drop_and_put); static void reqsk_timer_handler(struct timer_list *t) { @@ -1523,7 +1520,6 @@ skip_child_forget: } WARN_ON_ONCE(sk->sk_ack_backlog); } -EXPORT_SYMBOL_GPL(inet_csk_listen_stop); static struct dst_entry *inet_csk_rebuild_route(struct sock *sk, struct flowi *fl) { diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 9d215485b5c7..34b77aa87d0a 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -241,7 +241,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, inet_diag_msg_common_fill(r, sk); r->idiag_state = sk->sk_state; - r->idiag_timer = 0; + r->idiag_timer = IDIAG_TIMER_OFF; r->idiag_retrans = 0; r->idiag_expires = 0; @@ -284,20 +284,25 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, if (icsk_pending == ICSK_TIME_RETRANS || icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk_pending == ICSK_TIME_LOSS_PROBE) { - r->idiag_timer = 1; + r->idiag_timer = IDIAG_TIMER_ON; r->idiag_retrans = READ_ONCE(icsk->icsk_retransmits); r->idiag_expires = jiffies_delta_to_msecs(tcp_timeout_expires(sk) - jiffies); } else if (icsk_pending == ICSK_TIME_PROBE0) { - r->idiag_timer = 4; + r->idiag_timer = IDIAG_TIMER_PROBE0; r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out); r->idiag_expires = jiffies_delta_to_msecs(tcp_timeout_expires(sk) - jiffies); } else if (timer_pending(&icsk->icsk_keepalive_timer)) { - r->idiag_timer = 2; + r->idiag_timer = IDIAG_TIMER_KEEPALIVE; r->idiag_retrans = READ_ONCE(icsk->icsk_probes_out); r->idiag_expires = jiffies_delta_to_msecs(icsk->icsk_keepalive_timer.expires - jiffies); + } else if ((READ_ONCE(icsk->icsk_ack.pending) & ICSK_ACK_TIMER) && + timer_pending(&icsk->icsk_delack_timer)) { + r->idiag_timer = IDIAG_TIMER_DELACK; + r->idiag_expires = + jiffies_delta_to_msecs(icsk_delack_timeout(icsk) - jiffies); } if ((ext & (1 << (INET_DIAG_INFO - 1))) && handler->idiag_info_size) { diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 9bfccc283fa6..3e795547b40c 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -16,6 +16,7 @@ #include <linux/wait.h> #include <linux/vmalloc.h> #include <linux/memblock.h> +#include <linux/gcd.h> #include <net/addrconf.h> #include <net/inet_connection_sock.h> @@ -30,12 +31,16 @@ #include <net/sock_reuseport.h> #include <net/tcp.h> +static void inet_init_ehash_secret(void) +{ + net_get_random_sleepable_once(&inet_ehash_secret, + sizeof(inet_ehash_secret)); +} + u32 inet_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) { - net_get_random_once(&inet_ehash_secret, sizeof(inet_ehash_secret)); - return lport + __inet_ehashfn(laddr, 0, faddr, fport, inet_ehash_secret + net_hash_mix(net)); } @@ -753,7 +758,6 @@ bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) } return ok; } -EXPORT_IPV6_MOD(inet_ehash_nolisten); static int inet_reuseport_add_sock(struct sock *sk, struct inet_listen_hashbucket *ilb) @@ -793,6 +797,13 @@ int inet_hash(struct sock *sk) local_bh_enable(); return 0; } + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + inet6_init_ehash_secret(); +#endif + inet_init_ehash_secret(); + WARN_ON(!sk_unhashed(sk)); ilb2 = inet_lhash2_bucket_sk(hashinfo, sk); @@ -814,7 +825,6 @@ unlock: return err; } -EXPORT_IPV6_MOD(inet_hash); void inet_unhash(struct sock *sk) { @@ -847,7 +857,6 @@ void inet_unhash(struct sock *sk) spin_unlock_bh(lock); } } -EXPORT_IPV6_MOD(inet_unhash); static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb, const struct net *net, unsigned short port, @@ -1010,14 +1019,12 @@ int inet_bhash2_update_saddr(struct sock *sk, void *saddr, int family) { return __inet_bhash2_update_saddr(sk, saddr, family, false); } -EXPORT_IPV6_MOD(inet_bhash2_update_saddr); void inet_bhash2_reset_saddr(struct sock *sk) { if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) __inet_bhash2_update_saddr(sk, NULL, 0, true); } -EXPORT_IPV6_MOD(inet_bhash2_reset_saddr); /* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm * Note that we use 32bit integers (vs RFC 'short integers') @@ -1046,12 +1053,12 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, struct net *net = sock_net(sk); struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; + int step, scan_step, l3mdev; + u32 index, max_rand_step; bool tb_created = false; u32 remaining, offset; int ret, i, low, high; bool local_ports; - int step, l3mdev; - u32 index; if (port) { local_bh_disable(); @@ -1065,6 +1072,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, local_ports = inet_sk_get_local_port_range(sk, &low, &high); step = local_ports ? 1 : 2; + scan_step = step; + max_rand_step = READ_ONCE(net->ipv4.sysctl_ip_local_port_step_width); high++; /* [32768, 60999] -> [32768, 61000[ */ remaining = high - low; @@ -1083,9 +1092,28 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, */ if (!local_ports) offset &= ~1U; + + if (max_rand_step && remaining > 1) { + u32 range = remaining / step; + u32 upper_bound; + + upper_bound = min(range, max_rand_step); + scan_step = get_random_u32_inclusive(1, upper_bound); + while (gcd(scan_step, range) != 1) { + scan_step++; + /* if both scan_step and range are even gcd won't be 1 */ + if (!(scan_step & 1) && !(range & 1)) + scan_step++; + if (unlikely(scan_step > upper_bound)) { + scan_step = 1; + break; + } + } + scan_step *= step; + } other_parity_scan: port = low + offset; - for (i = 0; i < remaining; i += step, port += step) { + for (i = 0; i < remaining; i += step, port += scan_step) { if (unlikely(port >= high)) port -= remaining; if (inet_is_local_reserved_port(net, port)) @@ -1239,6 +1267,8 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, if (!inet_sk(sk)->inet_num) port_offset = inet_sk_port_offset(sk); + inet_init_ehash_secret(); + hash_port0 = inet_ehashfn(net, inet->inet_rcv_saddr, 0, inet->inet_daddr, inet->inet_dport); @@ -1246,22 +1276,13 @@ int inet_hash_connect(struct inet_timewait_death_row *death_row, __inet_check_established); } -static void init_hashinfo_lhash2(struct inet_hashinfo *h) -{ - int i; - - for (i = 0; i <= h->lhash2_mask; i++) { - spin_lock_init(&h->lhash2[i].lock); - INIT_HLIST_NULLS_HEAD(&h->lhash2[i].nulls_head, - i + LISTENING_NULLS_BASE); - } -} - void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, unsigned long numentries, int scale, unsigned long low_limit, unsigned long high_limit) { + unsigned int i; + h->lhash2 = alloc_large_system_hash(name, sizeof(*h->lhash2), numentries, @@ -1271,7 +1292,12 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, &h->lhash2_mask, low_limit, high_limit); - init_hashinfo_lhash2(h); + + for (i = 0; i <= h->lhash2_mask; i++) { + spin_lock_init(&h->lhash2[i].lock); + INIT_HLIST_NULLS_HEAD(&h->lhash2[i].nulls_head, + i + LISTENING_NULLS_BASE); + } /* this one is used for source ports of outgoing connections */ table_perturb = alloc_large_system_hash("Table-perturb", @@ -1282,20 +1308,6 @@ void __init inet_hashinfo2_init(struct inet_hashinfo *h, const char *name, INET_TABLE_PERTURB_SIZE); } -int inet_hashinfo2_init_mod(struct inet_hashinfo *h) -{ - h->lhash2 = kmalloc_objs(*h->lhash2, INET_LHTABLE_SIZE); - if (!h->lhash2) - return -ENOMEM; - - h->lhash2_mask = INET_LHTABLE_SIZE - 1; - /* INET_LHTABLE_SIZE must be a power of 2 */ - BUG_ON(INET_LHTABLE_SIZE & h->lhash2_mask); - - init_hashinfo_lhash2(h); - return 0; -} - int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) { unsigned int locksz = sizeof(spinlock_t); diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 7b1e0a2d6906..d8083b9033c2 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -1,8 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 /* * INETPEER - A storage for permanent information about peers * - * This source is covered by the GNU GPL, the same as all kernel sources. - * * Authors: Andrey V. Savochkin <saw@msu.ru> */ @@ -60,7 +59,6 @@ void inet_peer_base_init(struct inet_peer_base *bp) seqlock_init(&bp->lock); bp->total = 0; } -EXPORT_IPV6_MOD_GPL(inet_peer_base_init); #define PEER_MAX_GC 32 @@ -218,7 +216,6 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, return p; } -EXPORT_IPV6_MOD_GPL(inet_getpeer); void inet_putpeer(struct inet_peer *p) { @@ -269,7 +266,6 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) WRITE_ONCE(peer->rate_tokens, token); return rc; } -EXPORT_IPV6_MOD(inet_peer_xrlim_allow); void inetpeer_invalidate_tree(struct inet_peer_base *base) { @@ -286,4 +282,3 @@ void inetpeer_invalidate_tree(struct inet_peer_base *base) base->total = 0; } -EXPORT_IPV6_MOD(inetpeer_invalidate_tree); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 35f0baa99d40..169e2921a851 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -468,6 +468,7 @@ static int gre_rcv(struct sk_buff *skb) out: icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); drop: + dev_core_stats_rx_dropped_inc(skb->dev); kfree_skb(skb); return 0; } diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 19d3141dad1f..9860178752b8 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -319,6 +319,45 @@ static bool ip_can_use_hint(const struct sk_buff *skb, const struct iphdr *iph, ip_hdr(hint)->tos == iph->tos; } +static int tcp_v4_early_demux(struct sk_buff *skb) +{ + struct net *net = dev_net_rcu(skb->dev); + const struct iphdr *iph; + const struct tcphdr *th; + struct sock *sk; + + if (skb->pkt_type != PACKET_HOST) + return 0; + + if (!pskb_may_pull(skb, skb_transport_offset(skb) + + sizeof(struct tcphdr))) + return 0; + + iph = ip_hdr(skb); + th = tcp_hdr(skb); + + if (th->doff < sizeof(struct tcphdr) / 4) + return 0; + + sk = __inet_lookup_established(net, iph->saddr, th->source, + iph->daddr, ntohs(th->dest), + skb->skb_iif, inet_sdif(skb)); + if (sk) { + skb->sk = sk; + skb->destructor = sock_edemux; + if (sk_fullsock(sk)) { + struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); + + if (dst) + dst = dst_check(dst, 0); + if (dst && + sk->sk_rx_dst_ifindex == skb->skb_iif) + skb_dst_set_noref(skb, dst); + } + } + return 0; +} + static int ip_rcv_finish_core(struct net *net, struct sk_buff *skb, struct net_device *dev, const struct sk_buff *hint) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 697e18242d6c..a55ef327ec93 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -520,7 +520,7 @@ static bool ipv4_datagram_support_cmsg(const struct sock *sk, /* * Handle MSG_ERRQUEUE */ -int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) +int ip_recv_error(struct sock *sk, struct msghdr *msg, int len) { struct sock_exterr_skb *serr; struct sk_buff *skb; @@ -557,7 +557,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr->addr_offset); sin->sin_port = serr->port; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); - *addr_len = sizeof(*sin); + msg->msg_namelen = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index 5683c328990f..2667f53482bd 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -60,12 +60,12 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, if (unlikely(dev_recursion_level() > IP_TUNNEL_RECURSION_LIMIT)) { if (dev) { - net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n", - dev->name); + net_crit_ratelimited("Dead loop on virtual device %s (net %llu), fix it urgently!\n", + dev->name, dev_net(dev)->net_cookie); DEV_STATS_INC(dev, tx_errors); } ip_rt_put(rt); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_DROP_REASON_RECURSION_LIMIT); return; } diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 131382c388e9..8a08d09b4c30 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -102,7 +102,8 @@ static DEFINE_SPINLOCK(mfc_unres_lock); static struct kmem_cache *mrt_cachep __ro_after_init; static struct mr_table *ipmr_new_table(struct net *net, u32 id); -static void ipmr_free_table(struct mr_table *mrt); +static void ipmr_free_table(struct mr_table *mrt, + struct list_head *dev_kill_list); static void ip_mr_forward(struct net *net, struct mr_table *mrt, struct net_device *dev, struct sk_buff *skb, @@ -112,7 +113,8 @@ static int ipmr_cache_report(const struct mr_table *mrt, static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, int cmd); static void igmpmsg_netlink_event(const struct mr_table *mrt, struct sk_buff *pkt); -static void mroute_clean_tables(struct mr_table *mrt, int flags); +static void mroute_clean_tables(struct mr_table *mrt, int flags, + struct list_head *dev_kill_list); static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES @@ -250,6 +252,7 @@ static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { static int __net_init ipmr_rules_init(struct net *net) { struct fib_rules_ops *ops; + LIST_HEAD(dev_kill_list); struct mr_table *mrt; int err; @@ -273,9 +276,7 @@ static int __net_init ipmr_rules_init(struct net *net) return 0; err2: - rtnl_lock(); - ipmr_free_table(mrt); - rtnl_unlock(); + ipmr_free_table(mrt, &dev_kill_list); err1: fib_rules_unregister(ops); return err; @@ -283,14 +284,18 @@ err1: static void __net_exit ipmr_rules_exit(struct net *net) { + fib_rules_unregister(net->ipv4.mr_rules_ops); +} + +static void __net_exit ipmr_rules_exit_rtnl(struct net *net, + struct list_head *dev_kill_list) +{ struct mr_table *mrt, *next; - ASSERT_RTNL(); list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { list_del(&mrt->list); - ipmr_free_table(mrt); + ipmr_free_table(mrt, dev_kill_list); } - fib_rules_unregister(net->ipv4.mr_rules_ops); } static int ipmr_rules_dump(struct net *net, struct notifier_block *nb, @@ -348,8 +353,13 @@ static int __net_init ipmr_rules_init(struct net *net) static void __net_exit ipmr_rules_exit(struct net *net) { - ASSERT_RTNL(); - ipmr_free_table(net->ipv4.mrt); +} + +static void __net_exit ipmr_rules_exit_rtnl(struct net *net, + struct list_head *dev_kill_list) +{ + ipmr_free_table(net->ipv4.mrt, dev_kill_list); + net->ipv4.mrt = NULL; } @@ -424,17 +434,22 @@ static struct mr_table *ipmr_new_table(struct net *net, u32 id) ipmr_expire_process, ipmr_new_table_set); } -static void ipmr_free_table(struct mr_table *mrt) +static void ipmr_free_table(struct mr_table *mrt, struct list_head *dev_kill_list) { struct net *net = read_pnet(&mrt->net); + LIST_HEAD(ipmr_dev_kill_list); WARN_ON_ONCE(!mr_can_free_table(net)); timer_shutdown_sync(&mrt->ipmr_expire_timer); mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_VIFS_STATIC | - MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC); + MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC, + &ipmr_dev_kill_list); rhltable_destroy(&mrt->mfc_hash); kfree(mrt); + + WARN_ON_ONCE(!net_initialized(net) && !list_empty(&ipmr_dev_kill_list)); + list_splice(&ipmr_dev_kill_list, dev_kill_list); } /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ @@ -1196,7 +1211,6 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) struct net *net = read_pnet(&mrt->net); struct mfc_cache *c; - /* The entries are added/deleted only under RTNL */ rcu_read_lock(); c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, mfc->mfcc_mcastgrp.s_addr, parent); @@ -1223,7 +1237,6 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, if (mfc->mfcc_parent >= MAXVIFS) return -ENFILE; - /* The entries are added/deleted only under RTNL */ rcu_read_lock(); c = ipmr_cache_find_parent(mrt, mfc->mfcc_origin.s_addr, mfc->mfcc_mcastgrp.s_addr, parent); @@ -1293,12 +1306,12 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, } /* Close the multicast socket, and clear the vif tables etc */ -static void mroute_clean_tables(struct mr_table *mrt, int flags) +static void mroute_clean_tables(struct mr_table *mrt, int flags, + struct list_head *dev_kill_list) { struct net *net = read_pnet(&mrt->net); - struct mr_mfc *c, *tmp; struct mfc_cache *cache; - LIST_HEAD(list); + struct mr_mfc *c, *tmp; int i; /* Shut down all active vif entries */ @@ -1308,13 +1321,14 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags) !(flags & MRT_FLUSH_VIFS_STATIC)) || (!(mrt->vif_table[i].flags & VIFF_STATIC) && !(flags & MRT_FLUSH_VIFS))) continue; - vif_delete(mrt, i, 0, &list); + vif_delete(mrt, i, 0, dev_kill_list); } - unregister_netdevice_many(&list); } /* Wipe the cache */ if (flags & (MRT_FLUSH_MFC | MRT_FLUSH_MFC_STATIC)) { + mutex_lock(&net->ipv4.mfc_mutex); + list_for_each_entry_safe(c, tmp, &mrt->mfc_cache_list, list) { if (((c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC_STATIC)) || (!(c->mfc_flags & MFC_STATIC) && !(flags & MRT_FLUSH_MFC))) @@ -1327,6 +1341,8 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags) mroute_netlink_event(mrt, cache, RTM_DELROUTE); mr_cache_put(c); } + + mutex_unlock(&net->ipv4.mfc_mutex); } if (flags & MRT_FLUSH_MFC) { @@ -1349,9 +1365,11 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags) static void mrtsock_destruct(struct sock *sk) { struct net *net = sock_net(sk); + LIST_HEAD(dev_kill_list); struct mr_table *mrt; rtnl_lock(); + ipmr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; @@ -1360,9 +1378,13 @@ static void mrtsock_destruct(struct sock *sk) NETCONFA_IFINDEX_ALL, net->ipv4.devconf_all); RCU_INIT_POINTER(mrt->mroute_sk, NULL); - mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC); + mroute_clean_tables(mrt, MRT_FLUSH_VIFS | MRT_FLUSH_MFC, + &dev_kill_list); } } + + unregister_netdevice_many(&dev_kill_list); + rtnl_unlock(); } @@ -1478,14 +1500,21 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, } if (parent == 0) parent = mfc.mfcc_parent; + + mutex_lock(&net->ipv4.mfc_mutex); + if (optname == MRT_DEL_MFC || optname == MRT_DEL_MFC_PROXY) ret = ipmr_mfc_delete(mrt, &mfc, parent); else ret = ipmr_mfc_add(net, mrt, &mfc, sk == rtnl_dereference(mrt->mroute_sk), parent); + + mutex_unlock(&net->ipv4.mfc_mutex); break; - case MRT_FLUSH: + case MRT_FLUSH: { + LIST_HEAD(dev_kill_list); + if (optlen != sizeof(val)) { ret = -EINVAL; break; @@ -1494,8 +1523,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, ret = -EFAULT; break; } - mroute_clean_tables(mrt, val); + + mroute_clean_tables(mrt, val, &dev_kill_list); + unregister_netdevice_many(&dev_kill_list); break; + } /* Control PIM assert. */ case MRT_ASSERT: if (optlen != sizeof(val)) { @@ -1506,7 +1538,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, ret = -EFAULT; break; } - mrt->mroute_do_assert = val; + WRITE_ONCE(mrt->mroute_do_assert, val); break; case MRT_PIM: if (!ipmr_pimsm_enabled()) { @@ -1525,9 +1557,9 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, sockptr_t optval, do_wrvifwhole = (val == IGMPMSG_WRVIFWHOLE); val = !!val; if (val != mrt->mroute_do_pim) { - mrt->mroute_do_pim = val; - mrt->mroute_do_assert = val; - mrt->mroute_do_wrvifwhole = do_wrvifwhole; + WRITE_ONCE(mrt->mroute_do_pim, val); + WRITE_ONCE(mrt->mroute_do_assert, val); + WRITE_ONCE(mrt->mroute_do_wrvifwhole, do_wrvifwhole); } break; case MRT_TABLE: @@ -1610,10 +1642,10 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, sockptr_t optval, case MRT_PIM: if (!ipmr_pimsm_enabled()) return -ENOPROTOOPT; - val = mrt->mroute_do_pim; + val = READ_ONCE(mrt->mroute_do_pim); break; case MRT_ASSERT: - val = mrt->mroute_do_assert; + val = READ_ONCE(mrt->mroute_do_assert); break; default: return -ENOPROTOOPT; @@ -2037,20 +2069,20 @@ static void ip_mr_forward(struct net *net, struct mr_table *mrt, atomic_long_inc(&c->_c.mfc_un.res.wrong_if); - if (true_vifi >= 0 && mrt->mroute_do_assert && + if (true_vifi >= 0 && READ_ONCE(mrt->mroute_do_assert) && /* pimsm uses asserts, when switching from RPT to SPT, * so that we cannot check that packet arrived on an oif. * It is bad, but otherwise we would need to move pretty * large chunk of pimd to kernel. Ough... --ANK */ - (mrt->mroute_do_pim || + (READ_ONCE(mrt->mroute_do_pim) || c->_c.mfc_un.res.ttls[true_vifi] < 255) && time_after(jiffies, c->_c.mfc_un.res.last_assert + MFC_ASSERT_THRESH)) { c->_c.mfc_un.res.last_assert = jiffies; ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF); - if (mrt->mroute_do_wrvifwhole) + if (READ_ONCE(mrt->mroute_do_wrvifwhole)) ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRVIFWHOLE); } @@ -2358,7 +2390,7 @@ int pim_rcv_v1(struct sk_buff *skb) mrt = ipmr_rt_fib_lookup(net, skb); if (IS_ERR(mrt)) goto drop; - if (!mrt->mroute_do_pim || + if (!READ_ONCE(mrt->mroute_do_pim) || pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) goto drop; @@ -2510,7 +2542,7 @@ static int _ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, cmd, flags); } -static size_t mroute_msgsize(bool unresolved, int maxvif) +static size_t mroute_msgsize(bool unresolved) { size_t len = NLMSG_ALIGN(sizeof(struct rtmsg)) @@ -2523,7 +2555,7 @@ static size_t mroute_msgsize(bool unresolved, int maxvif) len = len + nla_total_size(4) /* RTA_IIF */ + nla_total_size(0) /* RTA_MULTIPATH */ - + maxvif * NLA_ALIGN(sizeof(struct rtnexthop)) + + MAXVIFS * NLA_ALIGN(sizeof(struct rtnexthop)) /* RTA_MFC_STATS */ + nla_total_size_64bit(sizeof(struct rta_mfc_stats)) ; @@ -2538,8 +2570,7 @@ static void mroute_netlink_event(struct mr_table *mrt, struct mfc_cache *mfc, struct sk_buff *skb; int err = -ENOBUFS; - skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS, - mrt->maxvif), + skb = nlmsg_new(mroute_msgsize(mfc->_c.mfc_parent >= MAXVIFS), GFP_ATOMIC); if (!skb) goto errout; @@ -2681,9 +2712,9 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[RTA_MAX + 1]; - struct sk_buff *skb = NULL; struct mfc_cache *cache; struct mr_table *mrt; + struct sk_buff *skb; __be32 src, grp; u32 tableid; int err; @@ -2696,39 +2727,40 @@ static int ipmr_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, grp = nla_get_in_addr_default(tb[RTA_DST], 0); tableid = nla_get_u32_default(tb[RTA_TABLE], 0); + skb = nlmsg_new(mroute_msgsize(false), GFP_KERNEL); + if (!skb) { + err = -ENOBUFS; + goto errout; + } + + rcu_read_lock(); + mrt = __ipmr_get_table(net, tableid ? tableid : RT_TABLE_DEFAULT); if (!mrt) { err = -ENOENT; - goto errout_free; + goto errout_unlock; } - /* entries are added/deleted only under RTNL */ - rcu_read_lock(); cache = ipmr_cache_find(mrt, src, grp); - rcu_read_unlock(); if (!cache) { err = -ENOENT; - goto errout_free; - } - - skb = nlmsg_new(mroute_msgsize(false, mrt->maxvif), GFP_KERNEL); - if (!skb) { - err = -ENOBUFS; - goto errout_free; + goto errout_unlock; } err = ipmr_fill_mroute(mrt, skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, cache, RTM_NEWROUTE, 0); if (err < 0) - goto errout_free; + goto errout_unlock; - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); + rcu_read_unlock(); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: return err; -errout_free: +errout_unlock: + rcu_read_unlock(); kfree_skb(skb); goto errout; } @@ -2736,15 +2768,17 @@ errout_free: static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) { struct fib_dump_filter filter = { - .rtnl_held = true, + .rtnl_held = false, }; int err; + rcu_read_lock(); + if (cb->strict_check) { err = ip_valid_fib_dump_req(sock_net(skb->sk), cb->nlh, &filter, cb); if (err < 0) - return err; + goto out; } if (filter.table_id) { @@ -2752,19 +2786,28 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) mrt = __ipmr_get_table(sock_net(skb->sk), filter.table_id); if (!mrt) { - if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR) - return skb->len; + if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR) { + err = skb->len; + goto out; + } NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); - return -ENOENT; + err = -ENOENT; + goto out; } + err = mr_table_dump(mrt, skb, cb, _ipmr_fill_mroute, &mfc_unres_lock, &filter); - return skb->len ? : err; + err = skb->len ? : err; + goto out; } - return mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter, - _ipmr_fill_mroute, &mfc_unres_lock, &filter); + err = mr_rtm_dumproute(skb, cb, ipmr_mr_table_iter, + _ipmr_fill_mroute, &mfc_unres_lock, &filter); +out: + rcu_read_unlock(); + + return err; } static const struct nla_policy rtm_ipmr_policy[RTA_MAX + 1] = { @@ -2808,10 +2851,10 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, { struct net_device *dev = NULL; u32 tblid = RT_TABLE_DEFAULT; + int ret, rem, iif = 0; struct mr_table *mrt; struct nlattr *attr; struct rtmsg *rtm; - int ret, rem; ret = nlmsg_validate_deprecated(nlh, sizeof(*rtm), RTA_MAX, rtm_ipmr_policy, extack); @@ -2838,11 +2881,7 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, mfcc->mfcc_mcastgrp.s_addr = nla_get_be32(attr); break; case RTA_IIF: - dev = __dev_get_by_index(net, nla_get_u32(attr)); - if (!dev) { - ret = -ENODEV; - goto out; - } + iif = nla_get_u32(attr); break; case RTA_MULTIPATH: if (ipmr_nla_get_ttls(attr, mfcc) < 0) { @@ -2858,16 +2897,30 @@ static int rtm_to_ipmr_mfcc(struct net *net, struct nlmsghdr *nlh, break; } } + + rcu_read_lock(); + mrt = __ipmr_get_table(net, tblid); if (!mrt) { ret = -ENOENT; - goto out; + goto unlock; } + + if (iif) { + dev = dev_get_by_index_rcu(net, iif); + if (!dev) { + ret = -ENODEV; + goto unlock; + } + + mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); + } + *mrtret = mrt; *mrtsock = rtm->rtm_protocol == RTPROT_MROUTED ? 1 : 0; - if (dev) - mfcc->mfcc_parent = ipmr_find_vif(mrt, dev); +unlock: + rcu_read_unlock(); out: return ret; } @@ -2877,21 +2930,26 @@ static int ipmr_rtm_route(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); - int ret, mrtsock, parent; - struct mr_table *tbl; + int ret, mrtsock = 0, parent; + struct mr_table *tbl = NULL; struct mfcctl mfcc; - mrtsock = 0; - tbl = NULL; ret = rtm_to_ipmr_mfcc(net, nlh, &mfcc, &mrtsock, &tbl, extack); if (ret < 0) return ret; parent = ret ? mfcc.mfcc_parent : -1; + + mutex_lock(&net->ipv4.mfc_mutex); + if (nlh->nlmsg_type == RTM_NEWROUTE) - return ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); + ret = ipmr_mfc_add(net, tbl, &mfcc, mrtsock, parent); else - return ipmr_mfc_delete(tbl, &mfcc, parent); + ret = ipmr_mfc_delete(tbl, &mfcc, parent); + + mutex_unlock(&net->ipv4.mfc_mutex); + + return ret; } static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) @@ -2901,12 +2959,13 @@ static bool ipmr_fill_table(struct mr_table *mrt, struct sk_buff *skb) if (nla_put_u32(skb, IPMRA_TABLE_ID, mrt->id) || nla_put_u32(skb, IPMRA_TABLE_CACHE_RES_QUEUE_LEN, queue_len) || nla_put_s32(skb, IPMRA_TABLE_MROUTE_REG_VIF_NUM, - mrt->mroute_reg_vif_num) || + READ_ONCE(mrt->mroute_reg_vif_num)) || nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_ASSERT, - mrt->mroute_do_assert) || - nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, mrt->mroute_do_pim) || + READ_ONCE(mrt->mroute_do_assert)) || + nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_PIM, + READ_ONCE(mrt->mroute_do_pim)) || nla_put_u8(skb, IPMRA_TABLE_MROUTE_DO_WRVIFWHOLE, - mrt->mroute_do_wrvifwhole)) + READ_ONCE(mrt->mroute_do_wrvifwhole))) return false; return true; @@ -2919,7 +2978,7 @@ static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) struct vif_device *vif; vif = &mrt->vif_table[vifid]; - vif_dev = rtnl_dereference(vif->dev); + vif_dev = vif_dev_read(vif); /* if the VIF doesn't exist just continue */ if (!vif_dev) return true; @@ -2928,16 +2987,16 @@ static bool ipmr_fill_vif(struct mr_table *mrt, u32 vifid, struct sk_buff *skb) if (!vif_nest) return false; - if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, vif_dev->ifindex) || + if (nla_put_u32(skb, IPMRA_VIFA_IFINDEX, READ_ONCE(vif_dev->ifindex)) || nla_put_u32(skb, IPMRA_VIFA_VIF_ID, vifid) || nla_put_u16(skb, IPMRA_VIFA_FLAGS, vif->flags) || - nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, vif->bytes_in, + nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_IN, READ_ONCE(vif->bytes_in), IPMRA_VIFA_PAD) || - nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, vif->bytes_out, + nla_put_u64_64bit(skb, IPMRA_VIFA_BYTES_OUT, READ_ONCE(vif->bytes_out), IPMRA_VIFA_PAD) || - nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, vif->pkt_in, + nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_IN, READ_ONCE(vif->pkt_in), IPMRA_VIFA_PAD) || - nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, vif->pkt_out, + nla_put_u64_64bit(skb, IPMRA_VIFA_PACKETS_OUT, READ_ONCE(vif->pkt_out), IPMRA_VIFA_PAD) || nla_put_be32(skb, IPMRA_VIFA_LOCAL_ADDR, vif->local) || nla_put_be32(skb, IPMRA_VIFA_REMOTE_ADDR, vif->remote)) { @@ -2992,6 +3051,8 @@ static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; s_e = cb->args[1]; + rcu_read_lock(); + ipmr_for_each_table(mrt, net) { struct nlattr *vifs, *af; struct ifinfomsg *hdr; @@ -3026,7 +3087,7 @@ static int ipmr_rtm_dumplink(struct sk_buff *skb, struct netlink_callback *cb) nlmsg_end(skb, nlh); goto out; } - for (i = 0; i < mrt->maxvif; i++) { + for (i = 0; i < READ_ONCE(mrt->maxvif); i++) { if (e < s_e) goto skip_entry; if (!ipmr_fill_vif(mrt, i, skb)) { @@ -3048,6 +3109,8 @@ skip_table: } out: + rcu_read_unlock(); + cb->args[1] = e; cb->args[0] = t; @@ -3185,7 +3248,7 @@ static const struct net_protocol pim_protocol = { static unsigned int ipmr_seq_read(const struct net *net) { - return READ_ONCE(net->ipv4.ipmr_seq) + ipmr_rules_seq_read(net); + return atomic_read(&net->ipv4.ipmr_seq) + ipmr_rules_seq_read(net); } static int ipmr_dump(struct net *net, struct notifier_block *nb, @@ -3206,7 +3269,7 @@ static int __net_init ipmr_notifier_init(struct net *net) { struct fib_notifier_ops *ops; - net->ipv4.ipmr_seq = 0; + atomic_set(&net->ipv4.ipmr_seq, 0); ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net); if (IS_ERR(ops)) @@ -3225,8 +3288,11 @@ static void __net_exit ipmr_notifier_exit(struct net *net) /* Setup for IP multicast routing */ static int __net_init ipmr_net_init(struct net *net) { + LIST_HEAD(dev_kill_list); int err; + mutex_init(&net->ipv4.mfc_mutex); + err = ipmr_notifier_init(net); if (err) goto ipmr_notifier_fail; @@ -3250,9 +3316,8 @@ static int __net_init ipmr_net_init(struct net *net) proc_cache_fail: remove_proc_entry("ip_mr_vif", net->proc_net); proc_vif_fail: - rtnl_lock(); + ipmr_rules_exit_rtnl(net, &dev_kill_list); ipmr_rules_exit(net); - rtnl_unlock(); #endif ipmr_rules_fail: ipmr_notifier_exit(net); @@ -3266,34 +3331,32 @@ static void __net_exit ipmr_net_exit(struct net *net) remove_proc_entry("ip_mr_cache", net->proc_net); remove_proc_entry("ip_mr_vif", net->proc_net); #endif + ipmr_rules_exit(net); ipmr_notifier_exit(net); } -static void __net_exit ipmr_net_exit_batch(struct list_head *net_list) +static void __net_exit ipmr_net_exit_rtnl(struct net *net, + struct list_head *dev_kill_list) { - struct net *net; - - rtnl_lock(); - list_for_each_entry(net, net_list, exit_list) - ipmr_rules_exit(net); - rtnl_unlock(); + ipmr_rules_exit_rtnl(net, dev_kill_list); } static struct pernet_operations ipmr_net_ops = { .init = ipmr_net_init, .exit = ipmr_net_exit, - .exit_batch = ipmr_net_exit_batch, + .exit_rtnl = ipmr_net_exit_rtnl, }; static const struct rtnl_msg_handler ipmr_rtnl_msg_handlers[] __initconst = { {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETLINK, - .dumpit = ipmr_rtm_dumplink}, + .dumpit = ipmr_rtm_dumplink, .flags = RTNL_FLAG_DUMP_UNLOCKED}, {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_NEWROUTE, - .doit = ipmr_rtm_route}, + .doit = ipmr_rtm_route, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_DELROUTE, - .doit = ipmr_rtm_route}, + .doit = ipmr_rtm_route, .flags = RTNL_FLAG_DOIT_UNLOCKED}, {.protocol = RTNL_FAMILY_IPMR, .msgtype = RTM_GETROUTE, - .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute}, + .doit = ipmr_rtm_getroute, .dumpit = ipmr_rtm_dumproute, + .flags = RTNL_FLAG_DOIT_UNLOCKED | RTNL_FLAG_DUMP_UNLOCKED}, }; int __init ip_mr_init(void) diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index 2d62526406ca..37a3c144276c 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Linux multicast routing support * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation */ @@ -26,7 +27,6 @@ void vif_device_init(struct vif_device *v, else v->link = dev->ifindex; } -EXPORT_SYMBOL(vif_device_init); struct mr_table * mr_table_alloc(struct net *net, u32 id, @@ -59,7 +59,6 @@ mr_table_alloc(struct net *net, u32 id, table_set(mrt, net); return mrt; } -EXPORT_SYMBOL(mr_table_alloc); void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent) { @@ -73,7 +72,6 @@ void *mr_mfc_find_parent(struct mr_table *mrt, void *hasharg, int parent) return NULL; } -EXPORT_SYMBOL(mr_mfc_find_parent); void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi) { @@ -88,7 +86,6 @@ void *mr_mfc_find_any_parent(struct mr_table *mrt, int vifi) return NULL; } -EXPORT_SYMBOL(mr_mfc_find_any_parent); void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg) { @@ -108,7 +105,6 @@ void *mr_mfc_find_any(struct mr_table *mrt, int vifi, void *hasharg) return mr_mfc_find_any_parent(mrt, vifi); } -EXPORT_SYMBOL(mr_mfc_find_any); #ifdef CONFIG_PROC_FS void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos) @@ -123,7 +119,6 @@ void *mr_vif_seq_idx(struct net *net, struct mr_vif_iter *iter, loff_t pos) } return NULL; } -EXPORT_SYMBOL(mr_vif_seq_idx); void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) { @@ -142,7 +137,6 @@ void *mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos) } return NULL; } -EXPORT_SYMBOL(mr_vif_seq_next); void *mr_mfc_seq_idx(struct net *net, struct mr_mfc_iter *it, loff_t pos) @@ -167,7 +161,6 @@ void *mr_mfc_seq_idx(struct net *net, it->cache = NULL; return NULL; } -EXPORT_SYMBOL(mr_mfc_seq_idx); void *mr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos) @@ -202,7 +195,6 @@ end_of_list: return NULL; } -EXPORT_SYMBOL(mr_mfc_seq_next); #endif int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, @@ -223,7 +215,7 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, rcu_read_lock(); vif_dev = rcu_dereference(mrt->vif_table[c->mfc_parent].dev); - if (vif_dev && nla_put_u32(skb, RTA_IIF, vif_dev->ifindex) < 0) { + if (vif_dev && nla_put_u32(skb, RTA_IIF, READ_ONCE(vif_dev->ifindex)) < 0) { rcu_read_unlock(); return -EMSGSIZE; } @@ -252,7 +244,7 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, nhp->rtnh_flags = 0; nhp->rtnh_hops = c->mfc_un.res.ttls[ct]; - nhp->rtnh_ifindex = vif_dev->ifindex; + nhp->rtnh_ifindex = READ_ONCE(vif_dev->ifindex); nhp->rtnh_len = sizeof(*nhp); } } @@ -274,7 +266,6 @@ int mr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, rtm->rtm_type = RTN_MULTICAST; return 1; } -EXPORT_SYMBOL(mr_fill_mroute); static bool mr_mfc_uses_dev(const struct mr_table *mrt, const struct mr_mfc *c, @@ -346,7 +337,6 @@ out: cb->args[1] = e; return err; } -EXPORT_SYMBOL(mr_table_dump); int mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb, struct mr_table *(*iter)(struct net *net, @@ -389,7 +379,6 @@ next_table: return skb->len; } -EXPORT_SYMBOL(mr_rtm_dumproute); int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, int (*rules_dump)(struct net *net, @@ -443,4 +432,3 @@ int mr_dump(struct net *net, struct notifier_block *nb, unsigned short family, return 0; } -EXPORT_SYMBOL(mr_dump); diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c index c1463add48c4..ad40762a8b38 100644 --- a/net/ipv4/metrics.c +++ b/net/ipv4/metrics.c @@ -88,4 +88,3 @@ struct dst_metrics *ip_fib_metrics_init(struct nlattr *fc_mx, return fib_metrics; } -EXPORT_IPV6_MOD_GPL(ip_fib_metrics_init); diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index ce310eb779e0..ce9e1bfa4259 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -1,7 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * IPv4 specific functions of netfilter core * - * Rusty Russell (C) 2000 -- This code is GPL. + * Rusty Russell (C) 2000 * Patrick McHardy (C) 2006-2012 */ #include <linux/kernel.h> diff --git a/net/ipv4/netfilter/nft_dup_ipv4.c b/net/ipv4/netfilter/nft_dup_ipv4.c index ef5dd88107dd..d53a65ddbd7b 100644 --- a/net/ipv4/netfilter/nft_dup_ipv4.c +++ b/net/ipv4/netfilter/nft_dup_ipv4.c @@ -76,7 +76,6 @@ static const struct nft_expr_ops nft_dup_ipv4_ops = { .eval = nft_dup_ipv4_eval, .init = nft_dup_ipv4_init, .dump = nft_dup_ipv4_dump, - .reduce = NFT_REDUCE_READONLY, }; static const struct nla_policy nft_dup_ipv4_policy[NFTA_DUP_MAX + 1] = { diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 82af6cd76d13..9d0c6d75109b 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -163,7 +163,6 @@ static const struct nft_expr_ops nft_fib4_type_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, - .reduce = nft_fib_reduce, }; static const struct nft_expr_ops nft_fib4_ops = { @@ -173,7 +172,6 @@ static const struct nft_expr_ops nft_fib4_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, - .reduce = nft_fib_reduce, }; static const struct nft_expr_ops * diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c index 6cb213bb7256..55fc23a8f7a7 100644 --- a/net/ipv4/netfilter/nft_reject_ipv4.c +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -45,7 +45,6 @@ static const struct nft_expr_ops nft_reject_ipv4_ops = { .init = nft_reject_init, .dump = nft_reject_dump, .validate = nft_reject_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_reject_ipv4_type __read_mostly = { diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index 2c9036c719b6..904a060a7330 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -10,7 +10,7 @@ #include <linux/slab.h> #include <linux/vmalloc.h> #include <net/arp.h> -#include <net/ipv6_stubs.h> +#include <net/ip6_route.h> #include <net/lwtunnel.h> #include <net/ndisc.h> #include <net/nexthop.h> @@ -510,7 +510,7 @@ static void nexthop_free_single(struct nexthop *nh) fib_nh_release(nh->net, &nhi->fib_nh); break; case AF_INET6: - ipv6_stub->fib6_nh_release(&nhi->fib6_nh); + fib6_nh_release(&nhi->fib6_nh); break; } kfree(nhi); @@ -1382,7 +1382,7 @@ static bool ipv6_good_nh(const struct fib6_nh *nh) rcu_read_lock(); - n = __ipv6_neigh_lookup_noref_stub(nh->fib_nh_dev, &nh->fib_nh_gw6); + n = __ipv6_neigh_lookup_noref(nh->fib_nh_dev, &nh->fib_nh_gw6); if (n) state = READ_ONCE(n->nud_state); @@ -1416,7 +1416,7 @@ static bool nexthop_is_good_nh(const struct nexthop *nh) case AF_INET: return ipv4_good_nh(&nhi->fib_nh); case AF_INET6: - return ipv6_good_nh(&nhi->fib6_nh); + return IS_ENABLED(CONFIG_IPV6) && ipv6_good_nh(&nhi->fib6_nh); } return false; @@ -2166,8 +2166,8 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh) fib6_info_hold(f6i); spin_unlock_bh(&nh->lock); - ipv6_stub->ip6_del_rt(net, f6i, - !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)); + ip6_del_rt(net, f6i, + !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode)); spin_lock_bh(&nh->lock); } @@ -2223,8 +2223,11 @@ static void nh_rt_cache_flush(struct net *net, struct nexthop *nh, if (!list_empty(&nh->fi_list)) rt_cache_flush(net); - list_for_each_entry(f6i, &nh->f6i_list, nh_list) - ipv6_stub->fib6_update_sernum(net, f6i); + list_for_each_entry(f6i, &nh->f6i_list, nh_list) { + spin_lock_bh(&f6i->fib6_table->tb6_lock); + fib6_update_sernum_upto_root(net, f6i); + spin_unlock_bh(&f6i->fib6_table->tb6_lock); + } /* if an IPv6 group was replaced, we have to release all old * dsts to make sure all refcounts are released @@ -2238,7 +2241,7 @@ static void nh_rt_cache_flush(struct net *net, struct nexthop *nh, struct nh_info *nhi = rtnl_dereference(nhge->nh->nh_info); if (nhi->family == AF_INET6) - ipv6_stub->fib6_nh_release_dsts(&nhi->fib6_nh); + fib6_nh_release_dsts(&nhi->fib6_nh); } } @@ -2519,7 +2522,7 @@ static void __nexthop_replace_notify(struct net *net, struct nexthop *nh, } list_for_each_entry(f6i, &nh->f6i_list, nh_list) - ipv6_stub->fib6_rt_update(net, f6i, info); + fib6_rt_update(net, f6i, info); } /* send RTM_NEWROUTE with REPLACE flag set for all FIB entries @@ -2892,13 +2895,12 @@ static int nh_create_ipv6(struct net *net, struct nexthop *nh, fib6_cfg.fc_flags |= RTF_GATEWAY; /* sets nh_dev if successful */ - err = ipv6_stub->fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL, - extack); + err = fib6_nh_init(net, fib6_nh, &fib6_cfg, GFP_KERNEL, extack); if (err) { /* IPv6 is not enabled, don't call fib6_nh_release */ if (err == -EAFNOSUPPORT) goto out; - ipv6_stub->fib6_nh_release(fib6_nh); + fib6_nh_release(fib6_nh); } else { nh->nh_flags = fib6_nh->fib_nh_flags; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index bc4b43e52303..d36f1e273fde 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -56,7 +56,6 @@ struct ping_table { static struct ping_table ping_table; struct pingv6_ops pingv6_ops; -EXPORT_IPV6_MOD_GPL(pingv6_ops); static inline u32 ping_hashfn(const struct net *net, u32 num, u32 mask) { @@ -139,7 +138,6 @@ fail: spin_unlock(&ping_table.lock); return -EADDRINUSE; } -EXPORT_IPV6_MOD_GPL(ping_get_port); void ping_unhash(struct sock *sk) { @@ -154,7 +152,6 @@ void ping_unhash(struct sock *sk) } spin_unlock(&ping_table.lock); } -EXPORT_IPV6_MOD_GPL(ping_unhash); /* Called under rcu_read_lock() */ static struct sock *ping_lookup(struct net *net, struct sk_buff *skb, u16 ident) @@ -278,7 +275,6 @@ out_release_group: put_group_info(group_info); return ret; } -EXPORT_IPV6_MOD_GPL(ping_init_sock); void ping_close(struct sock *sk, long timeout) { @@ -288,7 +284,6 @@ void ping_close(struct sock *sk, long timeout) sk_common_release(sk); } -EXPORT_IPV6_MOD_GPL(ping_close); static int ping_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) @@ -468,7 +463,6 @@ out: pr_debug("ping_v4_bind -> %d\n", err); return err; } -EXPORT_IPV6_MOD_GPL(ping_bind); /* * Is this a supported type of ICMP message? @@ -601,7 +595,6 @@ void ping_err(struct sk_buff *skb, int offset, u32 info) out: return; } -EXPORT_IPV6_MOD_GPL(ping_err); /* * Copy and checksum an ICMP Echo packet from user space into a buffer @@ -631,7 +624,6 @@ int ping_getfrag(void *from, char *to, return 0; } -EXPORT_IPV6_MOD_GPL(ping_getfrag); static int ping_v4_push_pending_frames(struct sock *sk, struct pingfakehdr *pfh, struct flowi4 *fl4) @@ -692,7 +684,6 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len, return 0; } -EXPORT_IPV6_MOD_GPL(ping_common_sendmsg); static int ping_v4_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { @@ -848,8 +839,7 @@ do_confirm: goto out; } -int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, - int *addr_len) +int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) { struct inet_sock *isk = inet_sk(sk); int family = sk->sk_family; @@ -864,7 +854,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, goto out; if (flags & MSG_ERRQUEUE) - return inet_recv_error(sk, msg, len, addr_len); + return inet_recv_error(sk, msg, len); skb = skb_recv_datagram(sk, flags, &err); if (!skb) @@ -892,7 +882,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, sin->sin_port = 0 /* skb->h.uh->source */; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - *addr_len = sizeof(*sin); + msg->msg_namelen = sizeof(*sin); } if (inet_cmsg_flags(isk)) @@ -913,7 +903,7 @@ int ping_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, inet6_iif(skb)); - *addr_len = sizeof(*sin6); + msg->msg_namelen = sizeof(*sin6); } if (inet6_sk(sk)->rxopt.all) @@ -937,7 +927,6 @@ out: pr_debug("ping_recvmsg -> %d\n", err); return err; } -EXPORT_IPV6_MOD_GPL(ping_recvmsg); static enum skb_drop_reason __ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) @@ -946,7 +935,8 @@ static enum skb_drop_reason __ping_queue_rcv_skb(struct sock *sk, pr_debug("ping_queue_rcv_skb(sk=%p,sk->num=%d,skb=%p)\n", inet_sk(sk), inet_sk(sk)->inet_num, skb); - if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { + reason = sock_queue_rcv_skb_reason(sk, skb); + if (reason) { sk_skb_reason_drop(sk, skb, reason); pr_debug("ping_queue_rcv_skb -> failed\n"); return reason; @@ -958,7 +948,6 @@ int ping_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) { return __ping_queue_rcv_skb(sk, skb) ? -1 : 0; } -EXPORT_IPV6_MOD_GPL(ping_queue_rcv_skb); /* @@ -986,7 +975,6 @@ enum skb_drop_reason ping_rcv(struct sk_buff *skb) kfree_skb_reason(skb, SKB_DROP_REASON_NO_SOCKET); return SKB_DROP_REASON_NO_SOCKET; } -EXPORT_IPV6_MOD_GPL(ping_rcv); struct proto ping_prot = { .name = "PING", @@ -1008,7 +996,6 @@ struct proto ping_prot = { .put_port = ping_unhash, .obj_size = sizeof(struct inet_sock), }; -EXPORT_IPV6_MOD(ping_prot); #ifdef CONFIG_PROC_FS @@ -1073,7 +1060,6 @@ void *ping_seq_start(struct seq_file *seq, loff_t *pos, sa_family_t family) return *pos ? ping_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } -EXPORT_IPV6_MOD_GPL(ping_seq_start); static void *ping_v4_seq_start(struct seq_file *seq, loff_t *pos) { @@ -1092,14 +1078,12 @@ void *ping_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; return sk; } -EXPORT_IPV6_MOD_GPL(ping_seq_next); void ping_seq_stop(struct seq_file *seq, void *v) __releases(ping_table.lock) { spin_unlock(&ping_table.lock); } -EXPORT_IPV6_MOD_GPL(ping_seq_stop); static void ping_v4_format_sock(struct sock *sp, struct seq_file *f, int bucket) diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 974afc4ecbe2..bfc06d1713ec 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -35,7 +35,6 @@ #include <net/mptcp.h> #include <net/proto_memory.h> #include <net/udp.h> -#include <net/udplite.h> #include <linux/bottom_half.h> #include <linux/inetdevice.h> #include <linux/proc_fs.h> @@ -65,8 +64,6 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "UDP: inuse %d mem %ld\n", sock_prot_inuse_get(net, &udp_prot), proto_memory_allocated(&udp_prot)); - seq_printf(seq, "UDPLITE: inuse %d\n", - sock_prot_inuse_get(net, &udplite_prot)); seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(net, &raw_prot)); seq_printf(seq, "FRAG: inuse %u memory %lu\n", @@ -447,19 +444,6 @@ static int snmp_seq_show_tcp_udp(struct seq_file *seq, void *v) for (i = 0; i < udp_cnt; i++) seq_printf(seq, " %lu", buff[i]); - memset(buff, 0, udp_cnt * sizeof(unsigned long)); - - /* the UDP and UDP-Lite MIBs are the same */ - seq_puts(seq, "\nUdpLite:"); - snmp_get_cpu_field_batch_cnt(buff, snmp4_udp_list, - udp_cnt, - net->mib.udplite_statistics); - for (i = 0; i < udp_cnt; i++) - seq_printf(seq, " %s", snmp4_udp_list[i].name); - seq_puts(seq, "\nUdpLite:"); - for (i = 0; i < udp_cnt; i++) - seq_printf(seq, " %lu", buff[i]); - seq_putc(seq, '\n'); return 0; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index bcc99ced1ade..5aaf9c62c8e1 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -300,7 +300,8 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb) /* Charge it to the socket. */ ipv4_pktinfo_prepare(sk, skb, true); - if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { + reason = sock_queue_rcv_skb_reason(sk, skb); + if (reason) { sk_skb_reason_drop(sk, skb, reason); return NET_RX_DROP; } @@ -739,7 +740,7 @@ out: */ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct inet_sock *inet = inet_sk(sk); size_t copied = 0; @@ -751,7 +752,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, goto out; if (flags & MSG_ERRQUEUE) { - err = ip_recv_error(sk, msg, len, addr_len); + err = ip_recv_error(sk, msg, len); goto out; } @@ -777,7 +778,7 @@ static int raw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, sin->sin_addr.s_addr = ip_hdr(skb)->saddr; sin->sin_port = 0; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); - *addr_len = sizeof(*sin); + msg->msg_namelen = sizeof(*sin); } if (inet_cmsg_flags(inet)) ip_cmsg_recv(msg, skb); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 463236e0dc2d..bc1296f0ea69 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -446,8 +446,8 @@ static void ipv4_confirm_neigh(const struct dst_entry *dst, const void *daddr) if (rt->rt_gw_family == AF_INET) { pkey = (const __be32 *)&rt->rt_gw4; - } else if (rt->rt_gw_family == AF_INET6) { - return __ipv6_confirm_neigh_stub(dev, &rt->rt_gw6); + } else if (IS_ENABLED(CONFIG_IPV6) && rt->rt_gw_family == AF_INET6) { + return __ipv6_confirm_neigh(dev, &rt->rt_gw6); } else if (!daddr || (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST | RTCF_LOCAL))) { diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index fc3affd9c801..df479277fb80 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -223,7 +223,6 @@ struct sock *tcp_get_cookie_sock(struct sock *sk, struct sk_buff *skb, return NULL; } -EXPORT_IPV6_MOD(tcp_get_cookie_sock); /* * when syncookies are in effect and tcp timestamps are enabled we stored @@ -260,7 +259,6 @@ bool cookie_timestamp_decode(const struct net *net, return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0; } -EXPORT_IPV6_MOD(cookie_timestamp_decode); static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, struct request_sock *req) @@ -286,7 +284,6 @@ static int cookie_tcp_reqsk_init(struct sock *sk, struct sk_buff *skb, treq->rcv_isn = ntohl(th->seq) - 1; treq->snt_isn = ntohl(th->ack_seq) - 1; treq->syn_tos = TCP_SKB_CB(skb)->ip_dsfield; - treq->req_usec_ts = false; #if IS_ENABLED(CONFIG_MPTCP) treq->is_mptcp = sk_is_mptcp(sk); @@ -312,7 +309,6 @@ struct request_sock *cookie_bpf_check(struct sock *sk, struct sk_buff *skb) return req; } -EXPORT_IPV6_MOD_GPL(cookie_bpf_check); #endif struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, @@ -349,11 +345,11 @@ struct request_sock *cookie_tcp_reqsk_alloc(const struct request_sock_ops *ops, ireq->wscale_ok = tcp_opt->wscale_ok; ireq->ecn_ok = !!(tcp_opt->rcv_tsecr & TS_OPT_ECN); + treq->req_usec_ts = false; treq->ts_off = tsoff; return req; } -EXPORT_IPV6_MOD_GPL(cookie_tcp_reqsk_alloc); static struct request_sock *cookie_tcp_check(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5654cc9c8a0b..d8bdb1bdbff1 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -824,6 +824,13 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = ipv4_local_port_range, }, { + .procname = "ip_local_port_step_width", + .maxlen = sizeof(u32), + .data = &init_net.ipv4.sysctl_ip_local_port_step_width, + .mode = 0644, + .proc_handler = proc_douintvec, + }, + { .procname = "ip_local_reserved_ports", .data = &init_net.ipv4.sysctl_local_reserved_ports, .maxlen = 65536, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 202a4e57a218..1a494d18c5fd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -303,7 +303,6 @@ DEFINE_PER_CPU(u32, tcp_tw_isn); EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn); long sysctl_tcp_mem[3] __read_mostly; -EXPORT_IPV6_MOD(sysctl_tcp_mem); DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc); EXPORT_PER_CPU_SYMBOL_GPL(tcp_memory_per_cpu_fw_alloc); @@ -317,7 +316,6 @@ EXPORT_SYMBOL(tcp_have_smc); * Current number of TCP sockets. */ struct percpu_counter tcp_sockets_allocated ____cacheline_aligned_in_smp; -EXPORT_IPV6_MOD(tcp_sockets_allocated); /* * Pressure flag: try to collapse. @@ -341,7 +339,6 @@ void tcp_enter_memory_pressure(struct sock *sk) if (!cmpxchg(&tcp_memory_pressure, 0, val)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES); } -EXPORT_IPV6_MOD_GPL(tcp_enter_memory_pressure); void tcp_leave_memory_pressure(struct sock *sk) { @@ -354,7 +351,6 @@ void tcp_leave_memory_pressure(struct sock *sk) NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURESCHRONO, jiffies_to_msecs(jiffies - val)); } -EXPORT_IPV6_MOD_GPL(tcp_leave_memory_pressure); /* Convert seconds to retransmits based on initial and max timeout */ static u8 secs_to_retrans(int seconds, int timeout, int rto_max) @@ -418,7 +414,6 @@ void tcp_md5_destruct_sock(struct sock *sk) static_branch_slow_dec_deferred(&tcp_md5_needed); } } -EXPORT_IPV6_MOD_GPL(tcp_md5_destruct_sock); #endif /* Address-family independent initialization for a tcp_sock. @@ -486,7 +481,6 @@ void tcp_init_sock(struct sock *sk) sk_sockets_allocated_inc(sk); xa_init_flags(&sk->sk_user_frags, XA_FLAGS_ALLOC1); } -EXPORT_IPV6_MOD(tcp_init_sock); static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc) { @@ -691,7 +685,6 @@ int tcp_ioctl(struct sock *sk, int cmd, int *karg) *karg = answ; return 0; } -EXPORT_IPV6_MOD(tcp_ioctl); void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) { @@ -895,9 +888,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, release_sock(sk); lock_sock(sk); - if (sk->sk_err || sk->sk_state == TCP_CLOSE || - (sk->sk_shutdown & RCV_SHUTDOWN) || - signal_pending(current)) + if (tcp_recv_should_stop(sk)) break; } @@ -908,7 +899,6 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, return ret; } -EXPORT_IPV6_MOD(tcp_splice_read); /* We allow to exceed memory limits for FIN packets to expedite * connection tear down and (memory) recovery. @@ -1483,7 +1473,6 @@ void tcp_splice_eof(struct socket *sock) tcp_push(sk, 0, mss_now, tp->nonagle, size_goal); release_sock(sk); } -EXPORT_IPV6_MOD_GPL(tcp_splice_eof); /* * Handle reading urgent data. BSD has very simple semantics for @@ -1795,7 +1784,6 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) } return copied; } -EXPORT_IPV6_MOD(tcp_read_skb); void tcp_read_done(struct sock *sk, size_t len) { @@ -1840,7 +1828,6 @@ int tcp_peek_len(struct socket *sock) { return tcp_inq(sock->sk); } -EXPORT_IPV6_MOD(tcp_peek_len); /* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */ int tcp_set_rcvlowat(struct sock *sk, int val) @@ -1870,20 +1857,10 @@ int tcp_set_rcvlowat(struct sock *sk, int val) } return 0; } -EXPORT_IPV6_MOD(tcp_set_rcvlowat); -void tcp_update_recv_tstamps(struct sk_buff *skb, - struct scm_timestamping_internal *tss) +void tcp_set_rcvbuf(struct sock *sk, int val) { - if (skb->tstamp) - tss->ts[0] = ktime_to_timespec64(skb->tstamp); - else - tss->ts[0] = (struct timespec64) {0}; - - if (skb_hwtstamps(skb)->hwtstamp) - tss->ts[2] = ktime_to_timespec64(skb_hwtstamps(skb)->hwtstamp); - else - tss->ts[2] = (struct timespec64) {0}; + tcp_set_window_clamp(sk, tcp_win_from_space(sk, val)); } #ifdef CONFIG_MMU @@ -1903,7 +1880,6 @@ int tcp_mmap(struct file *file, struct socket *sock, vma->vm_ops = &tcp_vm_ops; return 0; } -EXPORT_IPV6_MOD(tcp_mmap); static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb, u32 *offset_frag) @@ -2377,22 +2353,23 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, { int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); u32 tsflags = READ_ONCE(sk->sk_tsflags); - bool has_timestamping = false; - if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) { + if (tss->ts[0]) { if (sock_flag(sk, SOCK_RCVTSTAMP)) { + struct timespec64 tv = ktime_to_timespec64(tss->ts[0]); + if (sock_flag(sk, SOCK_RCVTSTAMPNS)) { if (new_tstamp) { struct __kernel_timespec kts = { - .tv_sec = tss->ts[0].tv_sec, - .tv_nsec = tss->ts[0].tv_nsec, + .tv_sec = tv.tv_sec, + .tv_nsec = tv.tv_nsec, }; put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_NEW, sizeof(kts), &kts); } else { struct __kernel_old_timespec ts_old = { - .tv_sec = tss->ts[0].tv_sec, - .tv_nsec = tss->ts[0].tv_nsec, + .tv_sec = tv.tv_sec, + .tv_nsec = tv.tv_nsec, }; put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMPNS_OLD, sizeof(ts_old), &ts_old); @@ -2400,41 +2377,37 @@ void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, } else { if (new_tstamp) { struct __kernel_sock_timeval stv = { - .tv_sec = tss->ts[0].tv_sec, - .tv_usec = tss->ts[0].tv_nsec / 1000, + .tv_sec = tv.tv_sec, + .tv_usec = tv.tv_nsec / 1000, }; put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_NEW, sizeof(stv), &stv); } else { - struct __kernel_old_timeval tv = { - .tv_sec = tss->ts[0].tv_sec, - .tv_usec = tss->ts[0].tv_nsec / 1000, + struct __kernel_old_timeval otv = { + .tv_sec = tv.tv_sec, + .tv_usec = tv.tv_nsec / 1000, }; put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP_OLD, - sizeof(tv), &tv); + sizeof(otv), &otv); } } } - if (tsflags & SOF_TIMESTAMPING_SOFTWARE && + if (!(tsflags & SOF_TIMESTAMPING_SOFTWARE && (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE || - !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) - has_timestamping = true; - else - tss->ts[0] = (struct timespec64) {0}; + !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER)))) + tss->ts[0] = 0; } - if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) { - if (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE && + if (tss->ts[2]) { + if (!(tsflags & SOF_TIMESTAMPING_RAW_HARDWARE && (tsflags & SOF_TIMESTAMPING_RX_HARDWARE || - !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) - has_timestamping = true; - else - tss->ts[2] = (struct timespec64) {0}; + !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER)))) + tss->ts[2] = 0; } - if (has_timestamping) { - tss->ts[1] = (struct timespec64) {0}; + if (tss->ts[0] | tss->ts[2]) { + tss->ts[1] = 0; if (sock_flag(sk, SOCK_TSTAMP_NEW)) put_cmsg_scm_timestamping64(msg, tss); else @@ -2785,10 +2758,7 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, if (copied) { if (!timeo || - sk->sk_err || - sk->sk_state == TCP_CLOSE || - (sk->sk_shutdown & RCV_SHUTDOWN) || - signal_pending(current)) + tcp_recv_should_stop(sk)) break; } else { if (sock_flag(sk, SOCK_DONE)) @@ -2962,14 +2932,13 @@ recv_sndq: goto out; } -int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, - int *addr_len) +int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) { int cmsg_flags = 0, ret; struct scm_timestamping_internal tss; if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); + return inet_recv_error(sk, msg, len); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue) && @@ -2992,7 +2961,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, } return ret; } -EXPORT_IPV6_MOD(tcp_recvmsg); void tcp_set_state(struct sock *sk, int state) { @@ -3122,7 +3090,6 @@ void tcp_shutdown(struct sock *sk, int how) tcp_send_fin(sk); } } -EXPORT_IPV6_MOD(tcp_shutdown); int tcp_orphan_count_sum(void) { @@ -3579,6 +3546,7 @@ static int tcp_repair_set_window(struct tcp_sock *tp, sockptr_t optbuf, int len) tp->rcv_wnd = opt.rcv_wnd; tp->rcv_wup = opt.rcv_wup; + tp->rcv_mwnd_seq = opt.rcv_wup + opt.rcv_wnd; return 0; } @@ -3634,7 +3602,6 @@ static int tcp_repair_options_est(struct sock *sk, sockptr_t optbuf, } DEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled); -EXPORT_IPV6_MOD(tcp_tx_delay_enabled); static void tcp_enable_tx_delay(struct sock *sk, int val) { @@ -4219,7 +4186,6 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, optval, optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } -EXPORT_IPV6_MOD(tcp_setsockopt); static void tcp_get_info_chrono_stats(const struct tcp_sock *tp, struct tcp_info *info) @@ -4882,7 +4848,6 @@ bool tcp_bpf_bypass_getsockopt(int level, int optname) return false; } -EXPORT_IPV6_MOD(tcp_bpf_bypass_getsockopt); int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) @@ -4896,7 +4861,6 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, return do_tcp_getsockopt(sk, level, optname, USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); } -EXPORT_IPV6_MOD(tcp_getsockopt); #ifdef CONFIG_TCP_MD5SIG void tcp_md5_hash_skb_data(struct md5_ctx *ctx, const struct sk_buff *skb, @@ -4927,7 +4891,6 @@ void tcp_md5_hash_skb_data(struct md5_ctx *ctx, const struct sk_buff *skb, skb_walk_frags(skb, frag_iter) tcp_md5_hash_skb_data(ctx, frag_iter, 0); } -EXPORT_IPV6_MOD(tcp_md5_hash_skb_data); void tcp_md5_hash_key(struct md5_ctx *ctx, const struct tcp_md5sig_key *key) @@ -4939,7 +4902,6 @@ void tcp_md5_hash_key(struct md5_ctx *ctx, */ data_race(({ md5_update(ctx, key->key, keylen), 0; })); } -EXPORT_IPV6_MOD(tcp_md5_hash_key); /* Called with rcu_read_lock() */ static enum skb_drop_reason @@ -4989,6 +4951,59 @@ tcp_inbound_md5_hash(const struct sock *sk, const struct sk_buff *skb, #endif +#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) +/* + * Parse Signature options + */ +int tcp_do_parse_auth_options(const struct tcphdr *th, + const u8 **md5_hash, const u8 **ao_hash) +{ + int length = (th->doff << 2) - sizeof(*th); + const u8 *ptr = (const u8 *)(th + 1); + unsigned int minlen = TCPOLEN_MD5SIG; + + if (IS_ENABLED(CONFIG_TCP_AO)) + minlen = sizeof(struct tcp_ao_hdr) + 1; + + *md5_hash = NULL; + *ao_hash = NULL; + + /* If not enough data remaining, we can short cut */ + while (length >= minlen) { + int opcode = *ptr++; + int opsize; + + switch (opcode) { + case TCPOPT_EOL: + return 0; + case TCPOPT_NOP: + length--; + continue; + default: + opsize = *ptr++; + if (opsize < 2 || opsize > length) + return -EINVAL; + if (opcode == TCPOPT_MD5SIG) { + if (opsize != TCPOLEN_MD5SIG) + return -EINVAL; + if (unlikely(*md5_hash || *ao_hash)) + return -EEXIST; + *md5_hash = ptr; + } else if (opcode == TCPOPT_AO) { + if (opsize <= sizeof(struct tcp_ao_hdr)) + return -EINVAL; + if (unlikely(*md5_hash || *ao_hash)) + return -EEXIST; + *ao_hash = ptr; + } + } + ptr += opsize - 2; + length -= opsize; + } + return 0; +} +#endif + /* Called with rcu_read_lock() */ enum skb_drop_reason tcp_inbound_hash(struct sock *sk, const struct request_sock *req, @@ -5055,7 +5070,6 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req, return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family, l3index, md5_location); } -EXPORT_IPV6_MOD_GPL(tcp_inbound_hash); void tcp_done(struct sock *sk) { @@ -5239,6 +5253,7 @@ static void __init tcp_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ecn_bytes); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_mwnd_seq); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_tstamp); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rx_opt); diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c index 760941e55153..1ddc20a399b0 100644 --- a/net/ipv4/tcp_bbr.c +++ b/net/ipv4/tcp_bbr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Bottleneck Bandwidth and RTT (BBR) congestion control * * BBR congestion control computes the sending rate based on the delivery @@ -329,12 +330,12 @@ static void bbr_save_cwnd(struct sock *sk) bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp)); } -__bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) +__bpf_kfunc static void bbr_cwnd_event_tx_start(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct bbr *bbr = inet_csk_ca(sk); - if (event == CA_EVENT_TX_START && tp->app_limited) { + if (tp->app_limited) { bbr->idle_restart = 1; bbr->ack_epoch_mstamp = tp->tcp_mstamp; bbr->ack_epoch_acked = 0; @@ -1148,7 +1149,7 @@ static struct tcp_congestion_ops tcp_bbr_cong_ops __read_mostly = { .cong_control = bbr_main, .sndbuf_expand = bbr_sndbuf_expand, .undo_cwnd = bbr_undo_cwnd, - .cwnd_event = bbr_cwnd_event, + .cwnd_event_tx_start = bbr_cwnd_event_tx_start, .ssthresh = bbr_ssthresh, .min_tso_segs = bbr_min_tso_segs, .get_info = bbr_get_info, @@ -1160,7 +1161,7 @@ BTF_ID_FLAGS(func, bbr_init) BTF_ID_FLAGS(func, bbr_main) BTF_ID_FLAGS(func, bbr_sndbuf_expand) BTF_ID_FLAGS(func, bbr_undo_cwnd) -BTF_ID_FLAGS(func, bbr_cwnd_event) +BTF_ID_FLAGS(func, bbr_cwnd_event_tx_start) BTF_ID_FLAGS(func, bbr_ssthresh) BTF_ID_FLAGS(func, bbr_min_tso_segs) BTF_ID_FLAGS(func, bbr_set_state) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 813d2e498c93..cc0bd73f36b6 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -221,8 +221,7 @@ static bool is_next_msg_fin(struct sk_psock *psock) static int tcp_bpf_recvmsg_parser(struct sock *sk, struct msghdr *msg, size_t len, - int flags, - int *addr_len) + int flags) { int peek = flags & MSG_PEEK; struct sk_psock *psock; @@ -232,14 +231,14 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk, u32 seq; if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); + return inet_recv_error(sk, msg, len); if (!len) return 0; psock = sk_psock_get(sk); if (unlikely(!psock)) - return tcp_recvmsg(sk, msg, len, flags, addr_len); + return tcp_recvmsg(sk, msg, len, flags); lock_sock(sk); tcp = tcp_sk(sk); @@ -352,24 +351,24 @@ static int tcp_bpf_ioctl(struct sock *sk, int cmd, int *karg) } static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct sk_psock *psock; int copied, ret; if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); + return inet_recv_error(sk, msg, len); if (!len) return 0; psock = sk_psock_get(sk); if (unlikely(!psock)) - return tcp_recvmsg(sk, msg, len, flags, addr_len); + return tcp_recvmsg(sk, msg, len, flags); if (!skb_queue_empty(&sk->sk_receive_queue) && sk_psock_queue_empty(psock)) { sk_psock_put(sk, psock); - return tcp_recvmsg(sk, msg, len, flags, addr_len); + return tcp_recvmsg(sk, msg, len, flags); } lock_sock(sk); msg_bytes_ready: @@ -389,7 +388,7 @@ msg_bytes_ready: goto msg_bytes_ready; release_sock(sk); sk_psock_put(sk, psock); - return tcp_recvmsg(sk, msg, len, flags, addr_len); + return tcp_recvmsg(sk, msg, len, flags); } copied = -EAGAIN; } diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 76c23675ae50..ab78b5ae8d0e 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -139,24 +139,21 @@ __bpf_kfunc static void cubictcp_init(struct sock *sk) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; } -__bpf_kfunc static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) +__bpf_kfunc static void cubictcp_cwnd_event_tx_start(struct sock *sk) { - if (event == CA_EVENT_TX_START) { - struct bictcp *ca = inet_csk_ca(sk); - u32 now = tcp_jiffies32; - s32 delta; - - delta = now - tcp_sk(sk)->lsndtime; - - /* We were application limited (idle) for a while. - * Shift epoch_start to keep cwnd growth to cubic curve. - */ - if (ca->epoch_start && delta > 0) { - ca->epoch_start += delta; - if (after(ca->epoch_start, now)) - ca->epoch_start = now; - } - return; + struct bictcp *ca = inet_csk_ca(sk); + u32 now = tcp_jiffies32; + s32 delta; + + delta = now - tcp_sk(sk)->lsndtime; + + /* We were application limited (idle) for a while. + * Shift epoch_start to keep cwnd growth to cubic curve. + */ + if (ca->epoch_start && delta > 0) { + ca->epoch_start += delta; + if (after(ca->epoch_start, now)) + ca->epoch_start = now; } } @@ -481,7 +478,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .cong_avoid = cubictcp_cong_avoid, .set_state = cubictcp_state, .undo_cwnd = tcp_reno_undo_cwnd, - .cwnd_event = cubictcp_cwnd_event, + .cwnd_event_tx_start = cubictcp_cwnd_event_tx_start, .pkts_acked = cubictcp_acked, .owner = THIS_MODULE, .name = "cubic", @@ -492,7 +489,7 @@ BTF_ID_FLAGS(func, cubictcp_init) BTF_ID_FLAGS(func, cubictcp_recalc_ssthresh) BTF_ID_FLAGS(func, cubictcp_cong_avoid) BTF_ID_FLAGS(func, cubictcp_state) -BTF_ID_FLAGS(func, cubictcp_cwnd_event) +BTF_ID_FLAGS(func, cubictcp_cwnd_event_tx_start) BTF_ID_FLAGS(func, cubictcp_acked) BTF_KFUNCS_END(tcp_cubic_check_kfunc_ids) diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c index 03abe0848420..96c99999e09d 100644 --- a/net/ipv4/tcp_dctcp.c +++ b/net/ipv4/tcp_dctcp.c @@ -203,15 +203,19 @@ __bpf_kfunc static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) tcp_plb_update_state_upon_rto(sk, &ca->plb); dctcp_react_to_loss(sk); break; - case CA_EVENT_TX_START: - tcp_plb_check_rehash(sk, &ca->plb); /* Maybe rehash when inflight is 0 */ - break; default: /* Don't care for the rest. */ break; } } +__bpf_kfunc static void dctcp_cwnd_event_tx_start(struct sock *sk) +{ + struct dctcp *ca = inet_csk_ca(sk); + + tcp_plb_check_rehash(sk, &ca->plb); /* Maybe rehash when inflight is 0 */ +} + static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) { @@ -252,6 +256,7 @@ static struct tcp_congestion_ops dctcp __read_mostly = { .init = dctcp_init, .in_ack_event = dctcp_update_alpha, .cwnd_event = dctcp_cwnd_event, + .cwnd_event_tx_start = dctcp_cwnd_event_tx_start, .ssthresh = dctcp_ssthresh, .cong_avoid = tcp_reno_cong_avoid, .undo_cwnd = dctcp_cwnd_undo, @@ -275,6 +280,7 @@ BTF_KFUNCS_START(tcp_dctcp_check_kfunc_ids) BTF_ID_FLAGS(func, dctcp_init) BTF_ID_FLAGS(func, dctcp_update_alpha) BTF_ID_FLAGS(func, dctcp_cwnd_event) +BTF_ID_FLAGS(func, dctcp_cwnd_event_tx_start) BTF_ID_FLAGS(func, dctcp_ssthresh) BTF_ID_FLAGS(func, dctcp_cwnd_undo) BTF_ID_FLAGS(func, dctcp_state) diff --git a/net/ipv4/tcp_dctcp.h b/net/ipv4/tcp_dctcp.h index 4b0259111d81..f13f8d770576 100644 --- a/net/ipv4/tcp_dctcp.h +++ b/net/ipv4/tcp_dctcp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _TCP_DCTCP_H #define _TCP_DCTCP_H diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 7935702e394b..ba1fdbe9807f 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -212,7 +212,7 @@ static int tcp_twsk_diag_fill(struct sock *sk, r->idiag_retrans = 0; r->idiag_state = READ_ONCE(tw->tw_substate); - r->idiag_timer = 3; + r->idiag_timer = IDIAG_TIMER_TIMEWAIT; tmo = tw->tw_timer.expires - jiffies; r->idiag_expires = jiffies_delta_to_msecs(tmo); r->idiag_rqueue = 0; @@ -247,7 +247,7 @@ static int tcp_req_diag_fill(struct sock *sk, struct sk_buff *skb, r = nlmsg_data(nlh); inet_diag_msg_common_fill(r, sk); r->idiag_state = TCP_SYN_RECV; - r->idiag_timer = 1; + r->idiag_timer = IDIAG_TIMER_ON; r->idiag_retrans = READ_ONCE(reqsk->num_retrans); BUILD_BUG_ON(offsetof(struct inet_request_sock, ir_cookie) != diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index 9fdc19accafd..471c78be5513 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -377,6 +377,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, tcp_rsk(req)->rcv_nxt = tp->rcv_nxt; tp->rcv_wup = tp->rcv_nxt; + tp->rcv_mwnd_seq = tp->rcv_wup + tp->rcv_wnd; /* tcp_conn_request() is sending the SYNACK, * and queues the child into listener accept queue. */ @@ -558,7 +559,6 @@ bool tcp_fastopen_defer_connect(struct sock *sk, int *err) } return false; } -EXPORT_IPV6_MOD(tcp_fastopen_defer_connect); /* * The following code block is to deal with middle box issues with TFO: diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index cba89733d121..021f745747c5 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -814,7 +814,6 @@ void tcp_initialize_rcv_mss(struct sock *sk) inet_csk(sk)->icsk_ack.rcv_mss = hint; } -EXPORT_IPV6_MOD(tcp_initialize_rcv_mss); /* Receiver "autotuning" code. * @@ -3172,7 +3171,6 @@ void tcp_simple_retransmit(struct sock *sk) */ tcp_non_congestion_loss_retransmit(sk); } -EXPORT_IPV6_MOD(tcp_simple_retransmit); void tcp_enter_recovery(struct sock *sk, bool ece_ack) { @@ -3552,7 +3550,7 @@ void tcp_rearm_rto(struct sock *sk) /* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */ static void tcp_set_xmit_timer(struct sock *sk) { - if (!tcp_schedule_loss_probe(sk, true)) + if (!tcp_sk(sk)->packets_out || !tcp_schedule_loss_probe(sk, true)) tcp_rearm_rto(sk); } @@ -4714,60 +4712,6 @@ static bool tcp_fast_parse_options(const struct net *net, return true; } -#if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) -/* - * Parse Signature options - */ -int tcp_do_parse_auth_options(const struct tcphdr *th, - const u8 **md5_hash, const u8 **ao_hash) -{ - int length = (th->doff << 2) - sizeof(*th); - const u8 *ptr = (const u8 *)(th + 1); - unsigned int minlen = TCPOLEN_MD5SIG; - - if (IS_ENABLED(CONFIG_TCP_AO)) - minlen = sizeof(struct tcp_ao_hdr) + 1; - - *md5_hash = NULL; - *ao_hash = NULL; - - /* If not enough data remaining, we can short cut */ - while (length >= minlen) { - int opcode = *ptr++; - int opsize; - - switch (opcode) { - case TCPOPT_EOL: - return 0; - case TCPOPT_NOP: - length--; - continue; - default: - opsize = *ptr++; - if (opsize < 2 || opsize > length) - return -EINVAL; - if (opcode == TCPOPT_MD5SIG) { - if (opsize != TCPOLEN_MD5SIG) - return -EINVAL; - if (unlikely(*md5_hash || *ao_hash)) - return -EEXIST; - *md5_hash = ptr; - } else if (opcode == TCPOPT_AO) { - if (opsize <= sizeof(struct tcp_ao_hdr)) - return -EINVAL; - if (unlikely(*md5_hash || *ao_hash)) - return -EEXIST; - *ao_hash = ptr; - } - } - ptr += opsize - 2; - length -= opsize; - } - return 0; -} -EXPORT_SYMBOL(tcp_do_parse_auth_options); -#endif - /* Sorry, PAWS as specified is broken wrt. pure-ACKs -DaveM * * It is not fatal. If this ACK does _not_ change critical state (seqs, window) @@ -4862,20 +4806,18 @@ static enum skb_drop_reason tcp_sequence(const struct sock *sk, const struct tcphdr *th) { const struct tcp_sock *tp = tcp_sk(sk); - u32 seq_limit; if (before(end_seq, tp->rcv_wup)) return SKB_DROP_REASON_TCP_OLD_SEQUENCE; - seq_limit = tp->rcv_nxt + tcp_receive_window(tp); - if (unlikely(after(end_seq, seq_limit))) { + if (unlikely(after(end_seq, tp->rcv_nxt + tcp_max_receive_window(tp)))) { /* Some stacks are known to handle FIN incorrectly; allow the * FIN to extend beyond the window and check it in detail later. */ - if (!after(end_seq - th->fin, seq_limit)) + if (!after(end_seq - th->fin, tp->rcv_nxt + tcp_receive_window(tp))) return SKB_NOT_DROPPED_YET; - if (after(seq, seq_limit)) + if (after(seq, tp->rcv_nxt + tcp_max_receive_window(tp))) return SKB_DROP_REASON_TCP_INVALID_SEQUENCE; /* Only accept this packet if receive queue is empty. */ @@ -4899,7 +4841,6 @@ void tcp_done_with_error(struct sock *sk, int err) if (!sock_flag(sk, SOCK_DEAD)) sk_error_report(sk); } -EXPORT_IPV6_MOD(tcp_done_with_error); /* When we get a reset we do this. */ void tcp_reset(struct sock *sk, struct sk_buff *skb) @@ -5734,6 +5675,7 @@ drop: if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt + tcp_receive_window(tp))) { reason = SKB_DROP_REASON_TCP_OVERWINDOW; + NET_INC_STATS(sock_net(sk), LINUX_MIB_BEYOND_WINDOW); goto out_of_window; } @@ -6720,7 +6662,6 @@ csum_error: discard: tcp_drop_reason(sk, skb, reason); } -EXPORT_IPV6_MOD(tcp_rcv_established); void tcp_init_transfer(struct sock *sk, int bpf_op, struct sk_buff *skb) { @@ -6957,6 +6898,7 @@ consume: */ WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1); tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; + tp->rcv_mwnd_seq = tp->rcv_wup + tp->rcv_wnd; /* RFC1323: The window in SYN & SYN/ACK segments is * never scaled. @@ -7069,6 +7011,7 @@ consume: WRITE_ONCE(tp->rcv_nxt, TCP_SKB_CB(skb)->seq + 1); WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); tp->rcv_wup = TCP_SKB_CB(skb)->seq + 1; + tp->rcv_mwnd_seq = tp->rcv_wup + tp->rcv_wnd; /* RFC1323: The window in SYN & SYN/ACK segments is * never scaled. @@ -7437,7 +7380,6 @@ consume: __kfree_skb(skb); return 0; } -EXPORT_IPV6_MOD(tcp_rcv_state_process); static inline void pr_drop_req(struct request_sock *req, __u16 port, int family) { @@ -7635,7 +7577,6 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops, return mss; } -EXPORT_IPV6_MOD_GPL(tcp_get_syncookie_mss); int tcp_conn_request(struct request_sock_ops *rsk_ops, const struct tcp_request_sock_ops *af_ops, @@ -7717,7 +7658,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, goto drop_and_free; if (tmp_opt.tstamp_ok || (!want_cookie && !isn)) - st = af_ops->init_seq_and_ts_off(net, skb); + st = INDIRECT_CALL_INET(af_ops->init_seq_and_ts_off, + tcp_v6_init_seq_and_ts_off, + tcp_v4_init_seq_and_ts_off, + net, skb); if (tmp_opt.tstamp_ok) { tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); @@ -7815,4 +7759,3 @@ drop: tcp_listendrop(sk); return 0; } -EXPORT_IPV6_MOD(tcp_conn_request); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 3b9826d89953..8fc24c3743c5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -105,7 +105,7 @@ static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = { static DEFINE_MUTEX(tcp_exit_batch_mutex); -static union tcp_seq_and_ts_off +INDIRECT_CALLABLE_SCOPE union tcp_seq_and_ts_off tcp_v4_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb) { return secure_tcp_seq_and_ts_off(net, @@ -201,7 +201,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) return 0; } -EXPORT_IPV6_MOD_GPL(tcp_twsk_unique); static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) @@ -362,7 +361,6 @@ failure: inet->inet_dport = 0; return err; } -EXPORT_IPV6_MOD(tcp_v4_connect); /* * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191. @@ -402,7 +400,6 @@ void tcp_v4_mtu_reduced(struct sock *sk) tcp_simple_retransmit(sk); } /* else let the usual retransmit timer handle it */ } -EXPORT_IPV6_MOD(tcp_v4_mtu_reduced); static void do_redirect(struct sk_buff *skb, struct sock *sk) { @@ -436,7 +433,6 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort) } reqsk_put(req); } -EXPORT_IPV6_MOD(tcp_req_err); /* TCP-LD (RFC 6069) logic */ void tcp_ld_RTO_revert(struct sock *sk, u32 seq) @@ -475,7 +471,6 @@ void tcp_ld_RTO_revert(struct sock *sk, u32 seq) tcp_retransmit_timer(sk); } } -EXPORT_IPV6_MOD(tcp_ld_RTO_revert); /* * This routine is called by the ICMP module when it gets some @@ -660,24 +655,6 @@ out: return 0; } -void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr) -{ - struct tcphdr *th = tcp_hdr(skb); - - th->check = ~tcp_v4_check(skb->len, saddr, daddr, 0); - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct tcphdr, check); -} - -/* This routine computes an IPv4 TCP checksum. */ -void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) -{ - const struct inet_sock *inet = inet_sk(sk); - - __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); -} -EXPORT_IPV6_MOD(tcp_v4_send_check); - #define REPLY_OPTIONS_LEN (MAX_TCP_OPTION_SPACE / sizeof(__be32)) static bool tcp_v4_ao_sign_reset(const struct sock *sk, struct sk_buff *skb, @@ -1240,7 +1217,6 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) */ DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_md5_needed, HZ); -EXPORT_IPV6_MOD(tcp_md5_needed); static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key *new) { @@ -1299,7 +1275,6 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, } return best_match; } -EXPORT_IPV6_MOD(__tcp_md5_do_lookup); static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, const union tcp_md5_addr *addr, @@ -1346,7 +1321,6 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr; return tcp_md5_do_lookup(sk, l3index, addr, AF_INET); } -EXPORT_IPV6_MOD(tcp_v4_md5_lookup); static int tcp_md5sig_info_add(struct sock *sk, gfp_t gfp) { @@ -1441,7 +1415,6 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, return __tcp_md5_do_add(sk, addr, family, prefixlen, l3index, flags, newkey, newkeylen, GFP_KERNEL); } -EXPORT_IPV6_MOD(tcp_md5_do_add); int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, @@ -1469,7 +1442,6 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, key->flags, key->key, key->keylen, sk_gfp_mask(sk, GFP_ATOMIC)); } -EXPORT_IPV6_MOD(tcp_md5_key_copy); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags) @@ -1484,7 +1456,6 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, kfree_rcu(key, rcu); return 0; } -EXPORT_IPV6_MOD(tcp_md5_do_del); void tcp_clear_md5_list(struct sock *sk) { @@ -1622,7 +1593,6 @@ tcp_v4_md5_hash_skb(char *md5_hash, const struct tcp_md5sig_key *key, tcp_md5_hash_key(&ctx, key); md5_final(&ctx, md5_hash); } -EXPORT_IPV6_MOD(tcp_v4_md5_hash_skb); #endif @@ -1692,7 +1662,6 @@ drop: tcp_listendrop(sk); return 0; } -EXPORT_IPV6_MOD(tcp_v4_conn_request); /* @@ -1818,7 +1787,6 @@ put_and_exit: tcp_done(newsk); goto exit; } -EXPORT_IPV6_MOD(tcp_v4_syn_recv_sock); static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) { @@ -1873,7 +1841,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); - if (dst) { + if (dst && unlikely(dst != skb_dst(skb))) { if (sk->sk_rx_dst_ifindex != skb->skb_iif || !INDIRECT_CALL_1(dst->ops->check, ipv4_dst_check, dst, 0)) { @@ -1932,46 +1900,7 @@ err_discard: } EXPORT_SYMBOL(tcp_v4_do_rcv); -int tcp_v4_early_demux(struct sk_buff *skb) -{ - struct net *net = dev_net_rcu(skb->dev); - const struct iphdr *iph; - const struct tcphdr *th; - struct sock *sk; - - if (skb->pkt_type != PACKET_HOST) - return 0; - - if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) - return 0; - - iph = ip_hdr(skb); - th = tcp_hdr(skb); - - if (th->doff < sizeof(struct tcphdr) / 4) - return 0; - - sk = __inet_lookup_established(net, iph->saddr, th->source, - iph->daddr, ntohs(th->dest), - skb->skb_iif, inet_sdif(skb)); - if (sk) { - skb->sk = sk; - skb->destructor = sock_edemux; - if (sk_fullsock(sk)) { - struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); - - if (dst) - dst = dst_check(dst, 0); - if (dst && - sk->sk_rx_dst_ifindex == skb->skb_iif) - skb_dst_set_noref(skb, dst); - } - } - return 0; -} - -bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, - enum skb_drop_reason *reason) +enum skb_drop_reason tcp_add_backlog(struct sock *sk, struct sk_buff *skb) { u32 tail_gso_size, tail_gso_segs; struct skb_shared_info *shinfo; @@ -1999,10 +1928,9 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, if (unlikely(tcp_checksum_complete(skb))) { bh_unlock_sock(sk); trace_tcp_bad_csum(skb); - *reason = SKB_DROP_REASON_TCP_CSUM; __TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS); __TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS); - return true; + return SKB_DROP_REASON_TCP_CSUM; } /* Attempt coalescing to last skb in backlog, even if we are @@ -2076,7 +2004,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGCOALESCE); kfree_skb_partial(skb, fragstolen); - return false; + return SKB_NOT_DROPPED_YET; } __skb_push(skb, hdrlen); @@ -2101,17 +2029,14 @@ no_coalesce: if (unlikely(err)) { bh_unlock_sock(sk); if (err == -ENOMEM) { - *reason = SKB_DROP_REASON_PFMEMALLOC; __NET_INC_STATS(sock_net(sk), LINUX_MIB_PFMEMALLOCDROP); - } else { - *reason = SKB_DROP_REASON_SOCKET_BACKLOG; - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); + return SKB_DROP_REASON_PFMEMALLOC; } - return true; + __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPBACKLOGDROP); + return SKB_DROP_REASON_SOCKET_BACKLOG; } - return false; + return SKB_NOT_DROPPED_YET; } -EXPORT_IPV6_MOD(tcp_add_backlog); static void tcp_v4_restore_cb(struct sk_buff *skb) { @@ -2235,7 +2160,8 @@ lookup: } refcounted = true; nsk = NULL; - if (!tcp_filter(sk, skb, &drop_reason)) { + drop_reason = tcp_filter(sk, skb); + if (!drop_reason) { th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); @@ -2296,7 +2222,8 @@ process: nf_reset_ct(skb); - if (tcp_filter(sk, skb, &drop_reason)) + drop_reason = tcp_filter(sk, skb); + if (drop_reason) goto discard_and_relse; th = (const struct tcphdr *)skb->data; @@ -2318,7 +2245,8 @@ process: if (!sock_owned_by_user(sk)) { ret = tcp_v4_do_rcv(sk, skb); } else { - if (tcp_add_backlog(sk, skb, &drop_reason)) + drop_reason = tcp_add_backlog(sk, skb); + if (drop_reason) goto discard_and_relse; } bh_unlock_sock(sk); @@ -2423,11 +2351,9 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) sk->sk_rx_dst_ifindex = skb->skb_iif; } } -EXPORT_IPV6_MOD(inet_sk_rx_dst_set); const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, - .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v4_conn_request, @@ -2437,7 +2363,6 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .getsockopt = ip_getsockopt, .mtu_reduced = tcp_v4_mtu_reduced, }; -EXPORT_IPV6_MOD(ipv4_specific); #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { @@ -2530,7 +2455,6 @@ void tcp_v4_destroy_sock(struct sock *sk) sk_sockets_allocated_dec(sk); } -EXPORT_IPV6_MOD(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ @@ -2766,7 +2690,6 @@ out: st->last_pos = *pos; return rc; } -EXPORT_IPV6_MOD(tcp_seq_start); void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { @@ -2797,7 +2720,6 @@ out: st->last_pos = *pos; return rc; } -EXPORT_IPV6_MOD(tcp_seq_next); void tcp_seq_stop(struct seq_file *seq, void *v) { @@ -2815,7 +2737,6 @@ void tcp_seq_stop(struct seq_file *seq, void *v) break; } } -EXPORT_IPV6_MOD(tcp_seq_stop); static void get_openreq4(const struct request_sock *req, struct seq_file *f, int i) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index dafb63b923d0..199f0b579e89 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -287,7 +287,6 @@ out_put: inet_twsk_put(tw); return TCP_TW_SUCCESS; } -EXPORT_IPV6_MOD(tcp_timewait_state_process); static void tcp_time_wait_init(struct sock *sk, struct tcp_timewait_sock *tcptw) { @@ -523,7 +522,6 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) tcp_set_ca_state(sk, TCP_CA_Open); } -EXPORT_IPV6_MOD_GPL(tcp_ca_openreq_child); static void smc_check_reset_syn_req(const struct tcp_sock *oldtp, struct request_sock *req, @@ -604,6 +602,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->window_clamp = req->rsk_window_clamp; newtp->rcv_ssthresh = req->rsk_rcv_wnd; newtp->rcv_wnd = req->rsk_rcv_wnd; + newtp->rcv_mwnd_seq = newtp->rcv_wup + req->rsk_rcv_wnd; newtp->rx_opt.wscale_ok = ireq->wscale_ok; if (newtp->rx_opt.wscale_ok) { newtp->rx_opt.snd_wscale = ireq->snd_wscale; @@ -975,7 +974,6 @@ embryonic_reset: } return NULL; } -EXPORT_IPV6_MOD(tcp_check_req); /* * Queue segment on the new socket if the new socket is active, @@ -1017,4 +1015,3 @@ enum skb_drop_reason tcp_child_process(struct sock *parent, struct sock *child, sock_put(child); return reason; } -EXPORT_IPV6_MOD(tcp_child_process); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 326b58ff1118..8e99687526a6 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -272,7 +272,6 @@ void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, WRITE_ONCE(*__window_clamp, min_t(__u32, U16_MAX << (*rcv_wscale), window_clamp)); } -EXPORT_IPV6_MOD(tcp_select_initial_window); /* Chose a new window to advertise, update state in tcp_sock for the * socket, and return result with RFC1323 scaling applied. The return @@ -293,6 +292,7 @@ static u16 tcp_select_window(struct sock *sk) tp->pred_flags = 0; tp->rcv_wnd = 0; tp->rcv_wup = tp->rcv_nxt; + tcp_update_max_rcv_wnd_seq(tp); return 0; } @@ -316,6 +316,7 @@ static u16 tcp_select_window(struct sock *sk) tp->rcv_wnd = new_win; tp->rcv_wup = tp->rcv_nxt; + tcp_update_max_rcv_wnd_seq(tp); /* Make sure we do not exceed the maximum possible * scaled window. @@ -429,14 +430,18 @@ static void smc_options_write(__be32 *ptr, u16 *options) } struct tcp_out_options { + /* Following group is cleared in __tcp_transmit_skb() */ + struct_group(cleared, + u16 mss; /* 0 to disable */ + u8 bpf_opt_len; /* length of BPF hdr option */ + u8 num_sack_blocks; /* number of SACK blocks to include */ + ); + + /* Caution: following fields are not cleared in __tcp_transmit_skb() */ u16 options; /* bit field of OPTION_* */ - u16 mss; /* 0 to disable */ u8 ws; /* window scale, 0 to disable */ - u8 num_sack_blocks; /* number of SACK blocks to include */ u8 num_accecn_fields:7, /* number of AccECN fields needed */ use_synack_ecn_bytes:1; /* Use synack_ecn_bytes or not */ - u8 hash_size; /* bytes in hash_location */ - u8 bpf_opt_len; /* length of BPF hdr option */ __u8 *hash_location; /* temporary pointer, overloaded */ __u32 tsval, tsecr; /* need to include OPTION_TS */ struct tcp_fastopen_cookie *fastopen_cookie; /* Fast open cookie */ @@ -965,6 +970,8 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, struct tcp_fastopen_request *fastopen = tp->fastopen_req; bool timestamps; + opts->options = 0; + /* Better than switch (key.type) as it has static branches */ if (tcp_key_is_md5(key)) { timestamps = false; @@ -1314,11 +1321,6 @@ static void tcp_tsq_workfn(struct work_struct *work) } } -#define TCP_DEFERRED_ALL (TCPF_TSQ_DEFERRED | \ - TCPF_WRITE_TIMER_DEFERRED | \ - TCPF_DELACK_TIMER_DEFERRED | \ - TCPF_MTU_REDUCED_DEFERRED | \ - TCPF_ACK_DEFERRED) /** * tcp_release_cb - tcp release_sock() callback * @sk: socket @@ -1358,7 +1360,6 @@ void tcp_release_cb(struct sock *sk) if ((flags & TCPF_ACK_DEFERRED) && inet_csk_ack_scheduled(sk)) tcp_send_ack(sk); } -EXPORT_IPV6_MOD(tcp_release_cb); void __init tcp_tsq_work_init(void) { @@ -1496,7 +1497,23 @@ static void tcp_rate_skb_sent(struct sock *sk, struct sk_buff *skb) INDIRECT_CALLABLE_DECLARE(int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)); INDIRECT_CALLABLE_DECLARE(int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)); -INDIRECT_CALLABLE_DECLARE(void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb)); + +/* This routine computes an IPv4 TCP checksum. */ +static void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) +{ + const struct inet_sock *inet = inet_sk(sk); + + __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); +} + +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ip6_checksum.h> + +static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) +{ + __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); +} +#endif /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial @@ -1549,7 +1566,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, inet = inet_sk(sk); tcb = TCP_SKB_CB(skb); - memset(&opts, 0, sizeof(opts)); + memset(&opts.cleared, 0, sizeof(opts.cleared)); tcp_get_current_key(sk, &key); if (unlikely(tcb->tcp_flags & TCPHDR_SYN)) { @@ -1659,9 +1676,12 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, /* BPF prog is the last one writing header option */ bpf_skops_write_hdr_opt(sk, skb, NULL, NULL, 0, &opts); - INDIRECT_CALL_INET(icsk->icsk_af_ops->send_check, - tcp_v6_send_check, tcp_v4_send_check, - sk, skb); +#if IS_ENABLED(CONFIG_IPV6) + if (likely(icsk->icsk_af_ops->net_header_len == sizeof(struct ipv6hdr))) + tcp_v6_send_check(sk, skb); + else +#endif + tcp_v4_send_check(sk, skb); if (likely(tcb->tcp_flags & TCPHDR_ACK)) tcp_event_ack_sent(sk, rcv_nxt); @@ -2001,7 +2021,6 @@ int tcp_mtu_to_mss(struct sock *sk, int pmtu) return __tcp_mtu_to_mss(sk, pmtu) - (tcp_sk(sk)->tcp_header_len - sizeof(struct tcphdr)); } -EXPORT_IPV6_MOD(tcp_mtu_to_mss); /* Inverse of above */ int tcp_mss_to_mtu(struct sock *sk, int mss) @@ -2074,7 +2093,6 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu) return mss_now; } -EXPORT_IPV6_MOD(tcp_sync_mss); /* Compute the current effective MSS, taking SACKs and IP options, * and even PMTU discovery events into account. @@ -2878,30 +2896,6 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, return false; } -static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new) -{ - const u32 now = tcp_jiffies32; - enum tcp_chrono old = tp->chrono_type; - - if (old > TCP_CHRONO_UNSPEC) - tp->chrono_stat[old - 1] += now - tp->chrono_start; - tp->chrono_start = now; - tp->chrono_type = new; -} - -void tcp_chrono_start(struct sock *sk, const enum tcp_chrono type) -{ - struct tcp_sock *tp = tcp_sk(sk); - - /* If there are multiple conditions worthy of tracking in a - * chronograph then the highest priority enum takes precedence - * over the other conditions. So that if something "more interesting" - * starts happening, stop the previous chrono and start a new one. - */ - if (type > tp->chrono_type) - tcp_chrono_set(tp, type); -} - void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type) { struct tcp_sock *tp = tcp_sk(sk); @@ -3116,7 +3110,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) * not in loss recovery, that are either limited by cwnd or application. */ if ((early_retrans != 3 && early_retrans != 4) || - !tp->packets_out || !tcp_is_sack(tp) || + !tcp_is_sack(tp) || (icsk->icsk_ca_state != TCP_CA_Open && icsk->icsk_ca_state != TCP_CA_CWR)) return false; @@ -4079,7 +4073,6 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, return skb; } -EXPORT_IPV6_MOD(tcp_make_synack); static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst) { @@ -4169,6 +4162,7 @@ static void tcp_connect_init(struct sock *sk) else tp->rcv_tstamp = tcp_jiffies32; tp->rcv_wup = tp->rcv_nxt; + tp->rcv_mwnd_seq = tp->rcv_nxt + tp->rcv_wnd; WRITE_ONCE(tp->copied_seq, tp->rcv_nxt); inet_csk(sk)->icsk_rto = tcp_timeout_init(sk); @@ -4659,4 +4653,3 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) } return res; } -EXPORT_IPV6_MOD(tcp_rtx_synack); diff --git a/net/ipv4/tcp_plb.c b/net/ipv4/tcp_plb.c index 4bcf7eff95e3..68ccdb9a5412 100644 --- a/net/ipv4/tcp_plb.c +++ b/net/ipv4/tcp_plb.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Protective Load Balancing (PLB) * * PLB was designed to reduce link load imbalance across datacenter diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 5a14a53a3c9e..ea99988795e7 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -774,7 +774,6 @@ void tcp_set_keepalive(struct sock *sk, int val) else if (!val) tcp_delete_keepalive_timer(sk); } -EXPORT_IPV6_MOD_GPL(tcp_set_keepalive); static void tcp_keepalive_timer(struct timer_list *t) { diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 786848ad37ea..950a66966059 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -151,12 +151,17 @@ EXPORT_SYMBOL_GPL(tcp_vegas_state); */ void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event) { - if (event == CA_EVENT_CWND_RESTART || - event == CA_EVENT_TX_START) + if (event == CA_EVENT_CWND_RESTART) tcp_vegas_init(sk); } EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); +void tcp_vegas_cwnd_event_tx_start(struct sock *sk) +{ + tcp_vegas_init(sk); +} +EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event_tx_start); + static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) { return min(tp->snd_ssthresh, tcp_snd_cwnd(tp)); @@ -314,6 +319,7 @@ static struct tcp_congestion_ops tcp_vegas __read_mostly = { .pkts_acked = tcp_vegas_pkts_acked, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, + .cwnd_event_tx_start = tcp_vegas_cwnd_event_tx_start, .get_info = tcp_vegas_get_info, .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h index 4f24d0e37d9c..602af8e600c7 100644 --- a/net/ipv4/tcp_vegas.h +++ b/net/ipv4/tcp_vegas.h @@ -20,6 +20,7 @@ void tcp_vegas_init(struct sock *sk); void tcp_vegas_state(struct sock *sk, u8 ca_state); void tcp_vegas_pkts_acked(struct sock *sk, const struct ack_sample *sample); void tcp_vegas_cwnd_event(struct sock *sk, enum tcp_ca_event event); +void tcp_vegas_cwnd_event_tx_start(struct sock *sk); size_t tcp_vegas_get_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info); diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 366ff6f214b2..1b2e1b947901 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -112,10 +112,15 @@ static void tcp_veno_state(struct sock *sk, u8 ca_state) */ static void tcp_veno_cwnd_event(struct sock *sk, enum tcp_ca_event event) { - if (event == CA_EVENT_CWND_RESTART || event == CA_EVENT_TX_START) + if (event == CA_EVENT_CWND_RESTART) tcp_veno_init(sk); } +static void tcp_veno_cwnd_event_tx_start(struct sock *sk) +{ + tcp_veno_init(sk); +} + static void tcp_veno_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); @@ -213,6 +218,7 @@ static struct tcp_congestion_ops tcp_veno __read_mostly = { .pkts_acked = tcp_veno_pkts_acked, .set_state = tcp_veno_state, .cwnd_event = tcp_veno_cwnd_event, + .cwnd_event_tx_start = tcp_veno_cwnd_event_tx_start, .owner = THIS_MODULE, .name = "veno", diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index 18b07ff5d20e..b22b3dccd05e 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -212,6 +212,7 @@ static struct tcp_congestion_ops tcp_yeah __read_mostly = { .cong_avoid = tcp_yeah_cong_avoid, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, + .cwnd_event_tx_start = tcp_vegas_cwnd_event_tx_start, .get_info = tcp_vegas_get_info, .pkts_acked = tcp_vegas_pkts_acked, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 05a53f7aef33..0ac2bf4f8759 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -98,8 +98,10 @@ #include <linux/skbuff.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> +#include <net/aligned_data.h> #include <net/net_namespace.h> #include <net/icmp.h> +#include <net/inet_common.h> #include <net/inet_hashtables.h> #include <net/ip.h> #include <net/ip_tunnels.h> @@ -112,20 +114,15 @@ #include <linux/btf_ids.h> #include <trace/events/skb.h> #include <net/busy_poll.h> -#include "udp_impl.h" #include <net/sock_reuseport.h> #include <net/addrconf.h> #include <net/udp_tunnel.h> #include <net/gro.h> -#if IS_ENABLED(CONFIG_IPV6) -#include <net/ipv6_stubs.h> -#endif #include <net/rps.h> struct udp_table udp_table __read_mostly; long sysctl_udp_mem[3] __read_mostly; -EXPORT_IPV6_MOD(sysctl_udp_mem); DEFINE_PER_CPU(int, udp_memory_per_cpu_fw_alloc); EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc); @@ -133,11 +130,6 @@ EXPORT_PER_CPU_SYMBOL_GPL(udp_memory_per_cpu_fw_alloc); #define MAX_UDP_PORTS 65536 #define PORTS_PER_CHAIN (MAX_UDP_PORTS / UDP_HTABLE_SIZE_MIN_PERNET) -static struct udp_table *udp_get_table_prot(struct sock *sk) -{ - return sk->sk_prot->h.udp_table ? : sock_net(sk)->ipv4.udp_table; -} - static int udp_lib_lport_inuse(struct net *net, __u16 num, const struct udp_hslot *hslot, unsigned long *bitmap, @@ -229,7 +221,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot) } /** - * udp_lib_get_port - UDP/-Lite port lookup for IPv4 and IPv6 + * udp_lib_get_port - UDP port lookup for IPv4 and IPv6 * * @sk: socket struct in question * @snum: port number to look up @@ -239,11 +231,13 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot) int udp_lib_get_port(struct sock *sk, unsigned short snum, unsigned int hash2_nulladdr) { - struct udp_table *udptable = udp_get_table_prot(sk); struct udp_hslot *hslot, *hslot2; struct net *net = sock_net(sk); + struct udp_table *udptable; int error = -EADDRINUSE; + udptable = net->ipv4.udp_table; + if (!snum) { DECLARE_BITMAP(bitmap, PORTS_PER_CHAIN); unsigned short first, last; @@ -351,9 +345,8 @@ fail_unlock: fail: return error; } -EXPORT_IPV6_MOD(udp_lib_get_port); -int udp_v4_get_port(struct sock *sk, unsigned short snum) +static int udp_v4_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = ipv4_portaddr_hash(sock_net(sk), htonl(INADDR_ANY), snum); @@ -365,10 +358,10 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, hash2_nulladdr); } -static int compute_score(struct sock *sk, const struct net *net, - __be32 saddr, __be16 sport, - __be32 daddr, unsigned short hnum, - int dif, int sdif) +static __always_inline int +compute_score(struct sock *sk, const struct net *net, + __be32 saddr, __be16 sport, __be32 daddr, + unsigned short hnum, int dif, int sdif) { int score; struct inet_sock *inet; @@ -417,7 +410,6 @@ u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, return __inet_ehashfn(laddr, lport, faddr, fport, udp_ehash_secret + net_hash_mix(net)); } -EXPORT_IPV6_MOD(udp_ehashfn); /** * udp4_lib_lookup1() - Simplified lookup using primary hash (destination port) @@ -508,8 +500,8 @@ rescore: continue; /* compute_score is too long of a function to be - * inlined, and calling it again here yields - * measurable overhead for some + * inlined twice here, and calling it uninlined + * here yields measurable overhead for some * workloads. Work around it by jumping * backwards to rescore 'result'. */ @@ -652,7 +644,6 @@ void udp_lib_hash4(struct sock *sk, u16 hash) spin_unlock_bh(&hslot->lock); } -EXPORT_IPV6_MOD(udp_lib_hash4); /* call with sock lock */ void udp4_hash4(struct sock *sk) @@ -668,16 +659,16 @@ void udp4_hash4(struct sock *sk) udp_lib_hash4(sk, hash); } -EXPORT_IPV6_MOD(udp4_hash4); #endif /* CONFIG_BASE_SMALL */ /* UDP is nearly always wildcards out the wazoo, it makes no sense to try * harder than this. -DaveM */ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr, - __be16 sport, __be32 daddr, __be16 dport, int dif, - int sdif, struct udp_table *udptable, struct sk_buff *skb) + __be16 sport, __be32 daddr, __be16 dport, + int dif, int sdif, struct sk_buff *skb) { + struct udp_table *udptable = net->ipv4.udp_table; unsigned short hnum = ntohs(dport); struct udp_hslot *hslot2; struct sock *result, *sk; @@ -701,8 +692,7 @@ struct sock *__udp4_lib_lookup(const struct net *net, __be32 saddr, goto done; /* Lookup redirect from BPF */ - if (static_branch_unlikely(&bpf_sk_lookup_enabled) && - udptable == net->ipv4.udp_table) { + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { sk = inet_lookup_run_sk_lookup(net, IPPROTO_UDP, skb, sizeof(struct udphdr), saddr, sport, daddr, hnum, dif, udp_ehashfn); @@ -744,14 +734,13 @@ done: EXPORT_SYMBOL_GPL(__udp4_lib_lookup); static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, - __be16 sport, __be16 dport, - struct udp_table *udptable) + __be16 sport, __be16 dport) { const struct iphdr *iph = ip_hdr(skb); return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, iph->daddr, dport, inet_iif(skb), - inet_sdif(skb), udptable, skb); + inet_sdif(skb), skb); } struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, @@ -759,14 +748,12 @@ struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb, { const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; const struct iphdr *iph = (struct iphdr *)(skb->data + offset); - struct net *net = dev_net(skb->dev); int iif, sdif; inet_get_iif_sdif(skb, &iif, &sdif); - return __udp4_lib_lookup(net, iph->saddr, sport, - iph->daddr, dport, iif, - sdif, net->ipv4.udp_table, NULL); + return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, + iph->daddr, dport, iif, sdif, NULL); } /* Must be called under rcu_read_lock(). @@ -778,8 +765,7 @@ struct sock *udp4_lib_lookup(const struct net *net, __be32 saddr, __be16 sport, { struct sock *sk; - sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport, - dif, 0, net->ipv4.udp_table, NULL); + sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport, dif, 0, NULL); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; @@ -808,11 +794,9 @@ static inline bool __udp_is_mcast_sock(struct net *net, const struct sock *sk, } DEFINE_STATIC_KEY_FALSE(udp_encap_needed_key); -EXPORT_IPV6_MOD(udp_encap_needed_key); #if IS_ENABLED(CONFIG_IPV6) DEFINE_STATIC_KEY_FALSE(udpv6_encap_needed_key); -EXPORT_IPV6_MOD(udpv6_encap_needed_key); #endif void udp_encap_enable(void) @@ -869,7 +853,6 @@ static int __udp4_lib_err_encap_no_sk(struct sk_buff *skb, u32 info) static struct sock *__udp4_lib_err_encap(struct net *net, const struct iphdr *iph, struct udphdr *uh, - struct udp_table *udptable, struct sock *sk, struct sk_buff *skb, u32 info) { @@ -897,8 +880,7 @@ static struct sock *__udp4_lib_err_encap(struct net *net, } sk = __udp4_lib_lookup(net, iph->daddr, uh->source, - iph->saddr, uh->dest, skb->dev->ifindex, 0, - udptable, NULL); + iph->saddr, uh->dest, skb->dev->ifindex, 0, NULL); if (sk) { up = udp_sk(sk); @@ -927,29 +909,28 @@ out: * header points to the first 8 bytes of the udp header. We need * to find the appropriate port. */ - -int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) +int udp_err(struct sk_buff *skb, u32 info) { - struct inet_sock *inet; const struct iphdr *iph = (const struct iphdr *)skb->data; - struct udphdr *uh = (struct udphdr *)(skb->data+(iph->ihl<<2)); const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; + struct net *net = dev_net(skb->dev); + struct inet_sock *inet; bool tunnel = false; + struct udphdr *uh; struct sock *sk; int harderr; int err; - struct net *net = dev_net(skb->dev); + uh = (struct udphdr *)(skb->data + (iph->ihl << 2)); sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, iph->saddr, uh->source, skb->dev->ifindex, - inet_sdif(skb), udptable, NULL); + inet_sdif(skb), NULL); if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) { /* No socket for error: try tunnels before discarding */ if (static_branch_unlikely(&udp_encap_needed_key)) { - sk = __udp4_lib_err_encap(net, iph, uh, udptable, sk, skb, - info); + sk = __udp4_lib_err_encap(net, iph, uh, sk, skb, info); if (!sk) return 0; } else @@ -1022,11 +1003,6 @@ out: return 0; } -int udp_err(struct sk_buff *skb, u32 info) -{ - return __udp4_lib_err(skb, info, dev_net(skb->dev)->ipv4.udp_table); -} - /* * Throw away all pending data and cancel the corking. Socket is locked. */ @@ -1040,7 +1016,6 @@ void udp_flush_pending_frames(struct sock *sk) ip_flush_pending_frames(sk); } } -EXPORT_IPV6_MOD(udp_flush_pending_frames); /** * udp4_hwcsum - handle outgoing HW checksumming @@ -1118,20 +1093,19 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, struct inet_cork *cork) { struct sock *sk = skb->sk; - struct inet_sock *inet = inet_sk(sk); + int offset, len, datalen; struct udphdr *uh; int err; - int is_udplite = IS_UDPLITE(sk); - int offset = skb_transport_offset(skb); - int len = skb->len - offset; - int datalen = len - sizeof(*uh); - __wsum csum = 0; + + offset = skb_transport_offset(skb); + len = skb->len - offset; + datalen = len - sizeof(*uh); /* * Create a UDP header */ uh = udp_hdr(skb); - uh->source = inet->inet_sport; + uh->source = inet_sk(sk)->inet_sport; uh->dest = fl4->fl4_dport; uh->len = htons(len); uh->check = 0; @@ -1152,7 +1126,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, kfree_skb(skb); return -EINVAL; } - if (is_udplite || dst_xfrm(skb_dst(skb))) { + if (dst_xfrm(skb_dst(skb))) { kfree_skb(skb); return -EIO; } @@ -1168,26 +1142,18 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, } } - if (is_udplite) /* UDP-Lite */ - csum = udplite_csum(skb); - - else if (sk->sk_no_check_tx) { /* UDP csum off */ - + if (sk->sk_no_check_tx) { /* UDP csum off */ skb->ip_summed = CHECKSUM_NONE; goto send; - } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ csum_partial: - udp4_hwcsum(skb, fl4->saddr, fl4->daddr); goto send; - - } else - csum = udp_csum(skb); + } /* add protocol-dependent pseudo-header */ uh->check = csum_tcpudp_magic(fl4->saddr, fl4->daddr, len, - sk->sk_protocol, csum); + IPPROTO_UDP, udp_csum(skb)); if (uh->check == 0) uh->check = CSUM_MANGLED_0; @@ -1196,13 +1162,12 @@ send: if (unlikely(err)) { if (err == -ENOBUFS && !inet_test_bit(RECVERR, sk)) { - UDP_INC_STATS(sock_net(sk), - UDP_MIB_SNDBUFERRORS, is_udplite); + UDP_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS); err = 0; } - } else - UDP_INC_STATS(sock_net(sk), - UDP_MIB_OUTDATAGRAMS, is_udplite); + } else { + UDP_INC_STATS(sock_net(sk), UDP_MIB_OUTDATAGRAMS); + } return err; } @@ -1228,7 +1193,6 @@ out: WRITE_ONCE(up->pending, 0); return err; } -EXPORT_IPV6_MOD(udp_push_pending_frames); static int __udp_cmsg_send(struct cmsghdr *cmsg, u16 *gso_size) { @@ -1265,30 +1229,26 @@ int udp_cmsg_send(struct sock *sk, struct msghdr *msg, u16 *gso_size) return need_ip; } -EXPORT_IPV6_MOD_GPL(udp_cmsg_send); int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { + int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; DEFINE_RAW_FLEX(struct ip_options_rcu, opt_copy, opt.__data, IP_OPTIONS_DATA_FIXED_SIZE); + DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); + int ulen = len, free = 0, connected = 0; struct inet_sock *inet = inet_sk(sk); struct udp_sock *up = udp_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_in *, usin, msg->msg_name); + __be32 daddr, faddr, saddr; + struct rtable *rt = NULL; struct flowi4 fl4_stack; - struct flowi4 *fl4; - int ulen = len; struct ipcm_cookie ipc; - struct rtable *rt = NULL; - int free = 0; - int connected = 0; - __be32 daddr, faddr, saddr; - u8 scope; - __be16 dport; - int err, is_udplite = IS_UDPLITE(sk); - int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; - int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); struct sk_buff *skb; + struct flowi4 *fl4; + __be16 dport; int uc_index; + u8 scope; + int err; if (len > 0xFFFF) return -EMSGSIZE; @@ -1300,8 +1260,6 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (msg->msg_flags & MSG_OOB) /* Mirror BSD error message compatibility */ return -EOPNOTSUPP; - getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; - fl4 = &inet->cork.fl.u.ip4; if (READ_ONCE(up->pending)) { /* @@ -1443,7 +1401,7 @@ int udp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) flowi4_init_output(fl4, ipc.oif, ipc.sockc.mark, ipc.tos & INET_DSCP_MASK, scope, - sk->sk_protocol, flow_flags, faddr, saddr, + IPPROTO_UDP, flow_flags, faddr, saddr, dport, inet->inet_sport, sk_uid(sk)); @@ -1477,7 +1435,7 @@ back_from_confirm: if (!corkreq) { struct inet_cork cork; - skb = ip_make_skb(sk, fl4, getfrag, msg, ulen, + skb = ip_make_skb(sk, fl4, ip_generic_getfrag, msg, ulen, sizeof(struct udphdr), &ipc, &rt, &cork, msg->msg_flags); err = PTR_ERR(skb); @@ -1508,7 +1466,7 @@ back_from_confirm: do_append_data: up->len += ulen; - err = ip_append_data(sk, fl4, getfrag, msg, ulen, + err = ip_append_data(sk, fl4, ip_generic_getfrag, msg, ulen, sizeof(struct udphdr), &ipc, &rt, corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) @@ -1533,10 +1491,9 @@ out_free: * things). We could add another new stat but at least for now that * seems like overkill. */ - if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP_INC_STATS(sock_net(sk), - UDP_MIB_SNDBUFERRORS, is_udplite); - } + if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) + UDP_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS); + return err; do_confirm: @@ -1562,7 +1519,6 @@ void udp_splice_eof(struct socket *sock) udp_push_pending_frames(sk); release_sock(sk); } -EXPORT_IPV6_MOD_GPL(udp_splice_eof); #define UDP_SKB_IS_STATELESS 0x80000000 @@ -1677,7 +1633,6 @@ void udp_skb_destructor(struct sock *sk, struct sk_buff *skb) prefetch(&skb->data); udp_rmem_release(sk, udp_skb_truesize(skb), 1, false); } -EXPORT_IPV6_MOD(udp_skb_destructor); /* as above, but the caller held the rx queue lock, too */ static void udp_skb_dtor_locked(struct sock *sk, struct sk_buff *skb) @@ -1830,7 +1785,6 @@ drop: udp_drops_inc(sk); return err; } -EXPORT_IPV6_MOD_GPL(__udp_enqueue_schedule_skb); void udp_destruct_common(struct sock *sk) { @@ -1847,7 +1801,6 @@ void udp_destruct_common(struct sock *sk) udp_rmem_release(sk, total, 0, true); kfree(up->udp_prod_queue); } -EXPORT_IPV6_MOD_GPL(udp_destruct_common); static void udp_destruct_sock(struct sock *sk) { @@ -1855,7 +1808,7 @@ static void udp_destruct_sock(struct sock *sk) inet_sock_destruct(sk); } -int udp_init_sock(struct sock *sk) +static int udp_init_sock(struct sock *sk) { int res = udp_lib_init_sock(sk); @@ -1885,7 +1838,6 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) skb_release_head_state(skb); __consume_stateless_skb(skb); } -EXPORT_IPV6_MOD_GPL(skb_consume_udp); static struct sk_buff *__first_packet_length(struct sock *sk, struct sk_buff_head *rcvq, @@ -1895,10 +1847,10 @@ static struct sk_buff *__first_packet_length(struct sock *sk, while ((skb = skb_peek(rcvq)) != NULL) { if (udp_lib_checksum_complete(skb)) { - __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, - IS_UDPLITE(sk)); - __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, - IS_UDPLITE(sk)); + struct net *net = sock_net(sk); + + __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS); + __UDP_INC_STATS(net, UDP_MIB_INERRORS); udp_drops_inc(sk); __skb_unlink(skb, rcvq); *total += skb->truesize; @@ -1967,7 +1919,6 @@ int udp_ioctl(struct sock *sk, int cmd, int *karg) return 0; } -EXPORT_IPV6_MOD(udp_ioctl); struct sk_buff *__skb_recv_udp(struct sock *sk, unsigned int flags, int *off, int *err) @@ -2050,11 +2001,10 @@ try_again: return err; if (udp_lib_checksum_complete(skb)) { - int is_udplite = IS_UDPLITE(sk); struct net *net = sock_net(sk); - __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, is_udplite); - __UDP_INC_STATS(net, UDP_MIB_INERRORS, is_udplite); + __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS); + __UDP_INC_STATS(net, UDP_MIB_INERRORS); udp_drops_inc(sk); kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM); goto try_again; @@ -2063,26 +2013,25 @@ try_again: WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk)); return recv_actor(sk, skb); } -EXPORT_IPV6_MOD(udp_read_skb); /* * This should be easy, if there is something there we * return it, otherwise we block. */ -int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, - int *addr_len) +INDIRECT_CALLABLE_SCOPE +int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags) { - struct inet_sock *inet = inet_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name); - struct sk_buff *skb; - unsigned int ulen, copied; int off, err, peeking = flags & MSG_PEEK; - int is_udplite = IS_UDPLITE(sk); + struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); bool checksum_valid = false; + unsigned int ulen, copied; + struct sk_buff *skb; if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len, addr_len); + return ip_recv_error(sk, msg, len); try_again: off = sk_peek_offset(sk, flags); @@ -2097,14 +2046,10 @@ try_again: else if (copied < ulen) msg->msg_flags |= MSG_TRUNC; - /* - * If checksum is needed at all, try to do it while copying the - * data. If the data is truncated, or if we only want a partial - * coverage checksum (UDP-Lite), do it before the copy. + /* If checksum is needed at all, try to do it while copying the + * data. If the data is truncated, do it before the copy. */ - - if (copied < ulen || peeking || - (is_udplite && UDP_SKB_CB(skb)->partial_cov)) { + if (copied < ulen || peeking) { checksum_valid = udp_skb_csum_unnecessary(skb) || !__udp_lib_checksum_complete(skb); if (!checksum_valid) @@ -2126,16 +2071,14 @@ try_again: if (unlikely(err)) { if (!peeking) { udp_drops_inc(sk); - UDP_INC_STATS(sock_net(sk), - UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS(net, UDP_MIB_INERRORS); } kfree_skb(skb); return err; } if (!peeking) - UDP_INC_STATS(sock_net(sk), - UDP_MIB_INDATAGRAMS, is_udplite); + UDP_INC_STATS(net, UDP_MIB_INDATAGRAMS); sock_recv_cmsgs(msg, sk, skb); @@ -2145,11 +2088,11 @@ try_again: sin->sin_port = udp_hdr(skb)->source; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - *addr_len = sizeof(*sin); + msg->msg_namelen = sizeof(*sin); BPF_CGROUP_RUN_PROG_UDP4_RECVMSG_LOCK(sk, (struct sockaddr *)sin, - addr_len); + &msg->msg_namelen); } if (udp_test_bit(GRO_ENABLED, sk)) @@ -2168,8 +2111,8 @@ try_again: csum_copy_err: if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags, udp_skb_destructor)) { - UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); - UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS(net, UDP_MIB_CSUMERRORS); + UDP_INC_STATS(net, UDP_MIB_INERRORS); } kfree_skb_reason(skb, SKB_DROP_REASON_UDP_CSUM); @@ -2191,7 +2134,6 @@ int udp_pre_connect(struct sock *sk, struct sockaddr_unsized *uaddr, return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr, &addr_len); } -EXPORT_IPV6_MOD(udp_pre_connect); static int udp_connect(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len) @@ -2241,17 +2183,17 @@ int udp_disconnect(struct sock *sk, int flags) release_sock(sk); return 0; } -EXPORT_IPV6_MOD(udp_disconnect); void udp_lib_unhash(struct sock *sk) { if (sk_hashed(sk)) { - struct udp_table *udptable = udp_get_table_prot(sk); struct udp_hslot *hslot, *hslot2; + struct net *net = sock_net(sk); + struct udp_table *udptable; sock_rps_delete_flow(sk); - hslot = udp_hashslot(udptable, sock_net(sk), - udp_sk(sk)->udp_port_hash); + udptable = net->ipv4.udp_table; + hslot = udp_hashslot(udptable, net, udp_sk(sk)->udp_port_hash); hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); spin_lock_bh(&hslot->lock); @@ -2260,7 +2202,7 @@ void udp_lib_unhash(struct sock *sk) if (sk_del_node_init_rcu(sk)) { hslot->count--; inet_sk(sk)->inet_num = 0; - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); + sock_prot_inuse_add(net, sk->sk_prot, -1); spin_lock(&hslot2->lock); hlist_del_init_rcu(&udp_sk(sk)->udp_portaddr_node); @@ -2272,7 +2214,6 @@ void udp_lib_unhash(struct sock *sk) spin_unlock_bh(&hslot->lock); } } -EXPORT_IPV6_MOD(udp_lib_unhash); /* * inet_rcv_saddr was changed, we must rehash secondary hash @@ -2280,11 +2221,12 @@ EXPORT_IPV6_MOD(udp_lib_unhash); void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) { if (sk_hashed(sk)) { - struct udp_table *udptable = udp_get_table_prot(sk); struct udp_hslot *hslot, *hslot2, *nhslot2; + struct net *net = sock_net(sk); + struct udp_table *udptable; - hslot = udp_hashslot(udptable, sock_net(sk), - udp_sk(sk)->udp_port_hash); + udptable = net->ipv4.udp_table; + hslot = udp_hashslot(udptable, net, udp_sk(sk)->udp_port_hash); hslot2 = udp_hashslot2(udptable, udp_sk(sk)->udp_portaddr_hash); nhslot2 = udp_hashslot2(udptable, newhash); @@ -2341,9 +2283,8 @@ void udp_lib_rehash(struct sock *sk, u16 newhash, u16 newhash4) udp_sk(sk)->udp_portaddr_hash = newhash; } } -EXPORT_IPV6_MOD(udp_lib_rehash); -void udp_v4_rehash(struct sock *sk) +static void udp_v4_rehash(struct sock *sk) { u16 new_hash = ipv4_portaddr_hash(sock_net(sk), inet_sk(sk)->inet_rcv_saddr, @@ -2369,20 +2310,18 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { - int is_udplite = IS_UDPLITE(sk); + struct net *net = sock_net(sk); int drop_reason; /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) { - UDP_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, - is_udplite); + UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS); drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF; } else { - UDP_INC_STATS(sock_net(sk), UDP_MIB_MEMERRORS, - is_udplite); + UDP_INC_STATS(net, UDP_MIB_MEMERRORS); drop_reason = SKB_DROP_REASON_PROTO_MEM; } - UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + UDP_INC_STATS(net, UDP_MIB_INERRORS); trace_udp_fail_queue_rcv_skb(rc, sk, skb); sk_skb_reason_drop(sk, skb, drop_reason); return -1; @@ -2403,7 +2342,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; struct udp_sock *up = udp_sk(sk); - int is_udplite = IS_UDPLITE(sk); + struct net *net = sock_net(sk); /* * Charge it to the socket, dropping if the queue is full. @@ -2440,9 +2379,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) ret = encap_rcv(sk, skb); if (ret <= 0) { - __UDP_INC_STATS(sock_net(sk), - UDP_MIB_INDATAGRAMS, - is_udplite); + __UDP_INC_STATS(net, UDP_MIB_INDATAGRAMS); return -ret; } } @@ -2450,48 +2387,13 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* FALLTHROUGH -- it's a UDP Packet */ } - /* - * UDP-Lite specific tests, ignored on UDP sockets - */ - if (unlikely(udp_test_bit(UDPLITE_RECV_CC, sk) && - UDP_SKB_CB(skb)->partial_cov)) { - u16 pcrlen = READ_ONCE(up->pcrlen); - - /* - * MIB statistics other than incrementing the error count are - * disabled for the following two types of errors: these depend - * on the application settings, not on the functioning of the - * protocol stack as such. - * - * RFC 3828 here recommends (sec 3.3): "There should also be a - * way ... to ... at least let the receiving application block - * delivery of packets with coverage values less than a value - * provided by the application." - */ - if (pcrlen == 0) { /* full coverage was set */ - net_dbg_ratelimited("UDPLite: partial coverage %d while full coverage %d requested\n", - UDP_SKB_CB(skb)->cscov, skb->len); - goto drop; - } - /* The next case involves violating the min. coverage requested - * by the receiver. This is subtle: if receiver wants x and x is - * greater than the buffersize/MTU then receiver will complain - * that it wants x while sender emits packets of smaller size y. - * Therefore the above ...()->partial_cov statement is essential. - */ - if (UDP_SKB_CB(skb)->cscov < pcrlen) { - net_dbg_ratelimited("UDPLite: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, pcrlen); - goto drop; - } - } - prefetch(&sk->sk_rmem_alloc); if (rcu_access_pointer(sk->sk_filter) && udp_lib_checksum_complete(skb)) goto csum_error; - if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr), &drop_reason)) + drop_reason = sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)); + if (drop_reason) goto drop; udp_csum_pull_header(skb); @@ -2501,9 +2403,9 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) csum_error: drop_reason = SKB_DROP_REASON_UDP_CSUM; - __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); + __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS); drop: - __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + __UDP_INC_STATS(net, UDP_MIB_INERRORS); udp_drops_inc(sk); sk_skb_reason_drop(sk, skb, drop_reason); return -1; @@ -2545,7 +2447,6 @@ bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) } return false; } -EXPORT_IPV6_MOD(udp_sk_rx_dst_set); /* * Multicasts and broadcasts go to each listener. @@ -2554,19 +2455,24 @@ EXPORT_IPV6_MOD(udp_sk_rx_dst_set); */ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, struct udphdr *uh, - __be32 saddr, __be32 daddr, - struct udp_table *udptable, - int proto) + __be32 saddr, __be32 daddr) { - struct sock *sk, *first = NULL; + struct udp_table *udptable = net->ipv4.udp_table; + unsigned int hash2, hash2_any, offset; unsigned short hnum = ntohs(uh->dest); - struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); - unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); - unsigned int offset = offsetof(typeof(*sk), sk_node); + struct sock *sk, *first = NULL; int dif = skb->dev->ifindex; int sdif = inet_sdif(skb); struct hlist_node *node; + struct udp_hslot *hslot; struct sk_buff *nskb; + bool use_hash2; + + hash2_any = 0; + hash2 = 0; + hslot = udp_hashslot(udptable, net, hnum); + use_hash2 = hslot->count > 10; + offset = offsetof(typeof(*sk), sk_node); if (use_hash2) { hash2_any = ipv4_portaddr_hash(net, htonl(INADDR_ANY), hnum) & @@ -2590,10 +2496,8 @@ start_lookup: if (unlikely(!nskb)) { udp_drops_inc(sk); - __UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS, - IS_UDPLITE(sk)); - __UDP_INC_STATS(net, UDP_MIB_INERRORS, - IS_UDPLITE(sk)); + __UDP_INC_STATS(net, UDP_MIB_RCVBUFERRORS); + __UDP_INC_STATS(net, UDP_MIB_INERRORS); continue; } if (udp_queue_rcv_skb(sk, nskb) > 0) @@ -2611,8 +2515,7 @@ start_lookup: consume_skb(skb); } else { kfree_skb(skb); - __UDP_INC_STATS(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); + __UDP_INC_STATS(net, UDP_MIB_IGNOREDMULTI); } return 0; } @@ -2622,29 +2525,14 @@ start_lookup: * Otherwise, csum completion requires checksumming packet body, * including udp header and folding it to skb->csum. */ -static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, - int proto) +static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh) { int err; - UDP_SKB_CB(skb)->partial_cov = 0; - UDP_SKB_CB(skb)->cscov = skb->len; - - if (proto == IPPROTO_UDPLITE) { - err = udplite_checksum_init(skb, uh); - if (err) - return err; - - if (UDP_SKB_CB(skb)->partial_cov) { - skb->csum = inet_compute_pseudo(skb, proto); - return 0; - } - } - /* Note, we are only interested in != 0 or == 0, thus the * force to int. */ - err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + err = (__force int)skb_checksum_init_zero_check(skb, IPPROTO_UDP, uh->check, inet_compute_pseudo); if (err) return err; @@ -2672,7 +2560,7 @@ static int udp_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, { int ret; - if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) + if (inet_get_convert_csum(sk) && uh->check) skb_checksum_try_convert(skb, IPPROTO_UDP, inet_compute_pseudo); ret = udp_queue_rcv_skb(sk, skb); @@ -2689,15 +2577,14 @@ static int udp_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, * All we need to do is get the socket, and then do a checksum. */ -int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, - int proto) +int udp_rcv(struct sk_buff *skb) { + struct rtable *rt = skb_rtable(skb); + struct net *net = dev_net(skb->dev); struct sock *sk = NULL; - struct udphdr *uh; unsigned short ulen; - struct rtable *rt = skb_rtable(skb); __be32 saddr, daddr; - struct net *net = dev_net(skb->dev); + struct udphdr *uh; bool refcounted; int drop_reason; @@ -2717,14 +2604,17 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (ulen > skb->len) goto short_packet; - if (proto == IPPROTO_UDP) { - /* UDP validates ulen. */ - if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) + if (ulen < sizeof(*uh)) + goto short_packet; + + if (ulen < skb->len) { + if (pskb_trim_rcsum(skb, ulen)) goto short_packet; + uh = udp_hdr(skb); } - if (udp4_csum_init(skb, uh, proto)) + if (udp4_csum_init(skb, uh)) goto csum_error; sk = inet_steal_sock(net, skb, sizeof(struct udphdr), saddr, uh->source, daddr, uh->dest, @@ -2746,10 +2636,9 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, } if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) - return __udp4_lib_mcast_deliver(net, skb, uh, - saddr, daddr, udptable, proto); + return __udp4_lib_mcast_deliver(net, skb, uh, saddr, daddr); - sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest, udptable); + sk = __udp4_lib_lookup_skb(skb, uh->source, uh->dest); if (sk) return udp_unicast_rcv_skb(sk, skb, uh); no_sk: @@ -2762,7 +2651,7 @@ no_sk: goto csum_error; drop_reason = SKB_DROP_REASON_NO_SOCKET; - __UDP_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + __UDP_INC_STATS(net, UDP_MIB_NOPORTS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); /* @@ -2774,8 +2663,7 @@ no_sk: short_packet: drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL; - net_dbg_ratelimited("UDP%s: short packet: From %pI4:%u %d/%d to %pI4:%u\n", - proto == IPPROTO_UDPLITE ? "Lite" : "", + net_dbg_ratelimited("UDP: short packet: From %pI4:%u %d/%d to %pI4:%u\n", &saddr, ntohs(uh->source), ulen, skb->len, &daddr, ntohs(uh->dest)); @@ -2787,13 +2675,12 @@ csum_error: * the network is concerned, anyway) as per 4.1.3.4 (MUST). */ drop_reason = SKB_DROP_REASON_UDP_CSUM; - net_dbg_ratelimited("UDP%s: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", - proto == IPPROTO_UDPLITE ? "Lite" : "", + net_dbg_ratelimited("UDP: bad checksum. From %pI4:%u to %pI4:%u ulen %d\n", &saddr, ntohs(uh->source), &daddr, ntohs(uh->dest), ulen); - __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); + __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS); drop: - __UDP_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + __UDP_INC_STATS(net, UDP_MIB_INERRORS); sk_skb_reason_drop(sk, skb, drop_reason); return 0; } @@ -2931,12 +2818,7 @@ enum skb_drop_reason udp_v4_early_demux(struct sk_buff *skb) return SKB_NOT_DROPPED_YET; } -int udp_rcv(struct sk_buff *skb) -{ - return __udp4_lib_rcv(skb, dev_net(skb->dev)->ipv4.udp_table, IPPROTO_UDP); -} - -void udp_destroy_sock(struct sock *sk) +static void udp_destroy_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); bool slow = lock_sock_fast(sk); @@ -2971,7 +2853,7 @@ static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family, if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) { if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) - new_gro_receive = ipv6_stub->xfrm6_gro_udp_encap_rcv; + new_gro_receive = xfrm6_gro_udp_encap_rcv; else new_gro_receive = xfrm4_gro_udp_encap_rcv; @@ -3000,7 +2882,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, struct udp_sock *up = udp_sk(sk); int val, valbool; int err = 0; - int is_udplite = IS_UDPLITE(sk); if (level == SOL_SOCKET) { err = sk_setsockopt(sk, level, optname, optval, optlen); @@ -3044,7 +2925,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) WRITE_ONCE(up->encap_rcv, - ipv6_stub->xfrm6_udp_encap_rcv); + xfrm6_udp_encap_rcv); else #endif WRITE_ONCE(up->encap_rcv, @@ -3087,36 +2968,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, sockopt_release_sock(sk); break; - /* - * UDP-Lite's partial checksum coverage (RFC 3828). - */ - /* The sender sets actual checksum coverage length via this option. - * The case coverage > packet length is handled by send module. */ - case UDPLITE_SEND_CSCOV: - if (!is_udplite) /* Disable the option on UDP sockets */ - return -ENOPROTOOPT; - if (val != 0 && val < 8) /* Illegal coverage: use default (8) */ - val = 8; - else if (val > USHRT_MAX) - val = USHRT_MAX; - WRITE_ONCE(up->pcslen, val); - udp_set_bit(UDPLITE_SEND_CC, sk); - break; - - /* The receiver specifies a minimum checksum coverage value. To make - * sense, this should be set to at least 8 (as done below). If zero is - * used, this again means full checksum coverage. */ - case UDPLITE_RECV_CSCOV: - if (!is_udplite) /* Disable the option on UDP sockets */ - return -ENOPROTOOPT; - if (val != 0 && val < 8) /* Avoid silly minimal values. */ - val = 8; - else if (val > USHRT_MAX) - val = USHRT_MAX; - WRITE_ONCE(up->pcrlen, val); - udp_set_bit(UDPLITE_RECV_CC, sk); - break; - default: err = -ENOPROTOOPT; break; @@ -3124,12 +2975,11 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return err; } -EXPORT_IPV6_MOD(udp_lib_setsockopt); -int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, - unsigned int optlen) +static int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, + unsigned int optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET) + if (level == SOL_UDP || level == SOL_SOCKET) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_push_pending_frames); @@ -3175,16 +3025,6 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, val = udp_test_bit(GRO_ENABLED, sk); break; - /* The following two cannot be changed on UDP sockets, the return is - * always 0 (which corresponds to the full checksum coverage of UDP). */ - case UDPLITE_SEND_CSCOV: - val = READ_ONCE(up->pcslen); - break; - - case UDPLITE_RECV_CSCOV: - val = READ_ONCE(up->pcrlen); - break; - default: return -ENOPROTOOPT; } @@ -3195,12 +3035,11 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, return -EFAULT; return 0; } -EXPORT_IPV6_MOD(udp_lib_getsockopt); -int udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +static int udp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (level == SOL_UDP) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return ip_getsockopt(sk, level, optname, optval, optlen); } @@ -3237,7 +3076,6 @@ __poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait) return mask; } -EXPORT_IPV6_MOD(udp_poll); int udp_abort(struct sock *sk, int err) { @@ -3260,7 +3098,6 @@ out: return 0; } -EXPORT_IPV6_MOD_GPL(udp_abort); struct proto udp_prot = { .name = "UDP", @@ -3293,7 +3130,6 @@ struct proto udp_prot = { .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp_sock), - .h.udp_table = NULL, .diag_destroy = udp_abort, }; EXPORT_SYMBOL(udp_prot); @@ -3314,19 +3150,6 @@ static bool seq_sk_match(struct seq_file *seq, const struct sock *sk) #ifdef CONFIG_BPF_SYSCALL static const struct seq_operations bpf_iter_udp_seq_ops; #endif -static struct udp_table *udp_get_table_seq(struct seq_file *seq, - struct net *net) -{ - const struct udp_seq_afinfo *afinfo; - -#ifdef CONFIG_BPF_SYSCALL - if (seq->op == &bpf_iter_udp_seq_ops) - return net->ipv4.udp_table; -#endif - - afinfo = pde_data(file_inode(seq->file)); - return afinfo->udp_table ? : net->ipv4.udp_table; -} static struct sock *udp_get_first(struct seq_file *seq, int start) { @@ -3335,7 +3158,7 @@ static struct sock *udp_get_first(struct seq_file *seq, int start) struct udp_table *udptable; struct sock *sk; - udptable = udp_get_table_seq(seq, net); + udptable = net->ipv4.udp_table; for (state->bucket = start; state->bucket <= udptable->mask; ++state->bucket) { @@ -3367,7 +3190,7 @@ static struct sock *udp_get_next(struct seq_file *seq, struct sock *sk) } while (sk && !seq_sk_match(seq, sk)); if (!sk) { - udptable = udp_get_table_seq(seq, net); + udptable = net->ipv4.udp_table; if (state->bucket <= udptable->mask) spin_unlock_bh(&udptable->hash[state->bucket].lock); @@ -3394,7 +3217,6 @@ void *udp_seq_start(struct seq_file *seq, loff_t *pos) return *pos ? udp_get_idx(seq, *pos-1) : SEQ_START_TOKEN; } -EXPORT_IPV6_MOD(udp_seq_start); void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { @@ -3408,19 +3230,17 @@ void *udp_seq_next(struct seq_file *seq, void *v, loff_t *pos) ++*pos; return sk; } -EXPORT_IPV6_MOD(udp_seq_next); void udp_seq_stop(struct seq_file *seq, void *v) { struct udp_iter_state *state = seq->private; struct udp_table *udptable; - udptable = udp_get_table_seq(seq, seq_file_net(seq)); + udptable = seq_file_net(seq)->ipv4.udp_table; if (state->bucket <= udptable->mask) spin_unlock_bh(&udptable->hash[state->bucket].lock); } -EXPORT_IPV6_MOD(udp_seq_stop); /* ------------------------------------------------------------------------ */ static void udp4_format_sock(struct sock *sp, struct seq_file *f, @@ -3444,7 +3264,7 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, sk_drops_read(sp)); } -int udp4_seq_show(struct seq_file *seq, void *v) +static int udp4_seq_show(struct seq_file *seq, void *v) { seq_setwidth(seq, 127); if (v == SEQ_START_TOKEN) @@ -3519,7 +3339,7 @@ static struct sock *bpf_iter_udp_batch(struct seq_file *seq) if (iter->cur_sk == iter->end_sk) state->bucket++; - udptable = udp_get_table_seq(seq, net); + udptable = net->ipv4.udp_table; again: /* New batch for the next bucket. @@ -3748,17 +3568,15 @@ static unsigned short seq_file_family(const struct seq_file *seq) return afinfo->family; } -const struct seq_operations udp_seq_ops = { +static const struct seq_operations udp_seq_ops = { .start = udp_seq_start, .next = udp_seq_next, .stop = udp_seq_stop, .show = udp4_seq_show, }; -EXPORT_IPV6_MOD(udp_seq_ops); static struct udp_seq_afinfo udp4_seq_afinfo = { .family = AF_INET, - .udp_table = NULL, }; static int __net_init udp4_proc_init_net(struct net *net) @@ -3808,7 +3626,7 @@ static int __init set_uhash_entries(char *str) } __setup("uhash_entries=", set_uhash_entries); -void __init udp_table_init(struct udp_table *table, const char *name) +static void __init udp_table_init(struct udp_table *table, const char *name) { unsigned int i, slot_size; diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index 779a3a03762f..9f33b07b1481 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -7,18 +7,16 @@ #include <net/inet_common.h> #include <asm/ioctls.h> -#include "udp_impl.h" - static struct proto *udpv6_prot_saved __read_mostly; static int sk_udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) - return udpv6_prot_saved->recvmsg(sk, msg, len, flags, addr_len); + return udpv6_prot_saved->recvmsg(sk, msg, len, flags); #endif - return udp_prot.recvmsg(sk, msg, len, flags, addr_len); + return udp_prot.recvmsg(sk, msg, len, flags); } static bool udp_sk_has_data(struct sock *sk) @@ -61,23 +59,23 @@ static int udp_msg_wait_data(struct sock *sk, struct sk_psock *psock, } static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct sk_psock *psock; int copied, ret; if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); + return inet_recv_error(sk, msg, len); if (!len) return 0; psock = sk_psock_get(sk); if (unlikely(!psock)) - return sk_udp_recvmsg(sk, msg, len, flags, addr_len); + return sk_udp_recvmsg(sk, msg, len, flags); if (!psock_has_data(psock)) { - ret = sk_udp_recvmsg(sk, msg, len, flags, addr_len); + ret = sk_udp_recvmsg(sk, msg, len, flags); goto out; } @@ -92,7 +90,7 @@ msg_bytes_ready: if (data) { if (psock_has_data(psock)) goto msg_bytes_ready; - ret = sk_udp_recvmsg(sk, msg, len, flags, addr_len); + ret = sk_udp_recvmsg(sk, msg, len, flags); goto out; } copied = -EAGAIN; diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 6e491c720c90..f4b24e628cf8 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -10,7 +10,6 @@ #include <linux/inet_diag.h> #include <linux/udp.h> #include <net/udp.h> -#include <net/udplite.h> #include <linux/sock_diag.h> static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, @@ -25,31 +24,32 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, net_admin); } -static int udp_dump_one(struct udp_table *tbl, - struct netlink_callback *cb, - const struct inet_diag_req_v2 *req) +static int udp_diag_dump_one(struct netlink_callback *cb, + const struct inet_diag_req_v2 *req) { struct sk_buff *in_skb = cb->skb; - int err; struct sock *sk = NULL; struct sk_buff *rep; - struct net *net = sock_net(in_skb->sk); + struct net *net; + int err; + + net = sock_net(in_skb->sk); rcu_read_lock(); if (req->sdiag_family == AF_INET) /* src and dst are swapped for historical reasons */ sk = __udp4_lib_lookup(net, - req->id.idiag_src[0], req->id.idiag_sport, - req->id.idiag_dst[0], req->id.idiag_dport, - req->id.idiag_if, 0, tbl, NULL); + req->id.idiag_src[0], req->id.idiag_sport, + req->id.idiag_dst[0], req->id.idiag_dport, + req->id.idiag_if, 0, NULL); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) sk = __udp6_lib_lookup(net, - (struct in6_addr *)req->id.idiag_src, - req->id.idiag_sport, - (struct in6_addr *)req->id.idiag_dst, - req->id.idiag_dport, - req->id.idiag_if, 0, tbl, NULL); + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + req->id.idiag_if, 0, NULL); #endif if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; @@ -86,14 +86,15 @@ out_nosk: return err; } -static void udp_dump(struct udp_table *table, struct sk_buff *skb, - struct netlink_callback *cb, - const struct inet_diag_req_v2 *r) +static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, + const struct inet_diag_req_v2 *r) { bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct net *net = sock_net(skb->sk); int num, s_num, slot, s_slot; + struct udp_table *table; + table = net->ipv4.udp_table; s_slot = cb->args[0]; num = s_num = cb->args[1]; @@ -140,18 +141,6 @@ done: cb->args[1] = num; } -static void udp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r) -{ - udp_dump(sock_net(cb->skb->sk)->ipv4.udp_table, skb, cb, r); -} - -static int udp_diag_dump_one(struct netlink_callback *cb, - const struct inet_diag_req_v2 *req) -{ - return udp_dump_one(sock_net(cb->skb->sk)->ipv4.udp_table, cb, req); -} - static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *info) { @@ -160,9 +149,8 @@ static void udp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, } #ifdef CONFIG_INET_DIAG_DESTROY -static int __udp_diag_destroy(struct sk_buff *in_skb, - const struct inet_diag_req_v2 *req, - struct udp_table *tbl) +static int udp_diag_destroy(struct sk_buff *in_skb, + const struct inet_diag_req_v2 *req) { struct net *net = sock_net(in_skb->sk); struct sock *sk; @@ -172,25 +160,24 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, if (req->sdiag_family == AF_INET) sk = __udp4_lib_lookup(net, - req->id.idiag_dst[0], req->id.idiag_dport, - req->id.idiag_src[0], req->id.idiag_sport, - req->id.idiag_if, 0, tbl, NULL); + req->id.idiag_dst[0], req->id.idiag_dport, + req->id.idiag_src[0], req->id.idiag_sport, + req->id.idiag_if, 0, NULL); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) { if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_src)) sk = __udp4_lib_lookup(net, - req->id.idiag_dst[3], req->id.idiag_dport, - req->id.idiag_src[3], req->id.idiag_sport, - req->id.idiag_if, 0, tbl, NULL); - + req->id.idiag_dst[3], req->id.idiag_dport, + req->id.idiag_src[3], req->id.idiag_sport, + req->id.idiag_if, 0, NULL); else sk = __udp6_lib_lookup(net, - (struct in6_addr *)req->id.idiag_dst, - req->id.idiag_dport, - (struct in6_addr *)req->id.idiag_src, - req->id.idiag_sport, - req->id.idiag_if, 0, tbl, NULL); + (struct in6_addr *)req->id.idiag_dst, + req->id.idiag_dport, + (struct in6_addr *)req->id.idiag_src, + req->id.idiag_sport, + req->id.idiag_if, 0, NULL); } #endif else { @@ -217,19 +204,6 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, return err; } - -static int udp_diag_destroy(struct sk_buff *in_skb, - const struct inet_diag_req_v2 *req) -{ - return __udp_diag_destroy(in_skb, req, sock_net(in_skb->sk)->ipv4.udp_table); -} - -static int udplite_diag_destroy(struct sk_buff *in_skb, - const struct inet_diag_req_v2 *req) -{ - return __udp_diag_destroy(in_skb, req, &udplite_table); -} - #endif static const struct inet_diag_handler udp_diag_handler = { @@ -244,50 +218,13 @@ static const struct inet_diag_handler udp_diag_handler = { #endif }; -static void udplite_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, - const struct inet_diag_req_v2 *r) -{ - udp_dump(&udplite_table, skb, cb, r); -} - -static int udplite_diag_dump_one(struct netlink_callback *cb, - const struct inet_diag_req_v2 *req) -{ - return udp_dump_one(&udplite_table, cb, req); -} - -static const struct inet_diag_handler udplite_diag_handler = { - .owner = THIS_MODULE, - .dump = udplite_diag_dump, - .dump_one = udplite_diag_dump_one, - .idiag_get_info = udp_diag_get_info, - .idiag_type = IPPROTO_UDPLITE, - .idiag_info_size = 0, -#ifdef CONFIG_INET_DIAG_DESTROY - .destroy = udplite_diag_destroy, -#endif -}; - static int __init udp_diag_init(void) { - int err; - - err = inet_diag_register(&udp_diag_handler); - if (err) - goto out; - err = inet_diag_register(&udplite_diag_handler); - if (err) - goto out_lite; -out: - return err; -out_lite: - inet_diag_unregister(&udp_diag_handler); - goto out; + return inet_diag_register(&udp_diag_handler); } static void __exit udp_diag_exit(void) { - inet_diag_unregister(&udplite_diag_handler); inet_diag_unregister(&udp_diag_handler); } @@ -296,4 +233,3 @@ module_exit(udp_diag_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("UDP socket monitoring via SOCK_DIAG"); MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-17 /* AF_INET - IPPROTO_UDP */); -MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, 2-136 /* AF_INET - IPPROTO_UDPLITE */); diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h deleted file mode 100644 index c7142213fc21..000000000000 --- a/net/ipv4/udp_impl.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _UDP4_IMPL_H -#define _UDP4_IMPL_H -#include <net/aligned_data.h> -#include <net/udp.h> -#include <net/udplite.h> -#include <net/protocol.h> -#include <net/inet_common.h> - -int __udp4_lib_rcv(struct sk_buff *, struct udp_table *, int); -int __udp4_lib_err(struct sk_buff *, u32, struct udp_table *); - -int udp_v4_get_port(struct sock *sk, unsigned short snum); -void udp_v4_rehash(struct sock *sk); - -int udp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, - unsigned int optlen); -int udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); - -int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, - int *addr_len); -void udp_destroy_sock(struct sock *sk); - -#ifdef CONFIG_PROC_FS -int udp4_seq_show(struct seq_file *seq, void *v); -#endif -#endif /* _UDP4_IMPL_H */ diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 6b1654c1ad4a..a0813d425b71 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -343,7 +343,6 @@ out_unlock: return segs; } -EXPORT_SYMBOL(skb_udp_tunnel_segment); static void __udpv4_gso_segment_csum(struct sk_buff *seg, __be32 *oldip, __be32 *newip, @@ -635,7 +634,6 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, } return segs; } -EXPORT_SYMBOL_GPL(__udp_gso_segment); static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, netdev_features_t features) @@ -852,7 +850,6 @@ out: skb_gro_flush_final(skb, pp, flush); return pp; } -EXPORT_SYMBOL(udp_gro_receive); static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport, __be16 dport) @@ -869,8 +866,7 @@ static struct sock *udp4_gro_lookup_skb(struct sk_buff *skb, __be16 sport, inet_get_iif_sdif(skb, &iif, &sdif); return __udp4_lib_lookup(net, iph->saddr, sport, - iph->daddr, dport, iif, - sdif, net->ipv4.udp_table, NULL); + iph->daddr, dport, iif, sdif, NULL); } INDIRECT_CALLABLE_SCOPE @@ -958,7 +954,6 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff, return err; } -EXPORT_SYMBOL(udp_gro_complete); INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff) { diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c deleted file mode 100644 index 826e9e79eb19..000000000000 --- a/net/ipv4/udplite.c +++ /dev/null @@ -1,135 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * UDPLITE An implementation of the UDP-Lite protocol (RFC 3828). - * - * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk> - * - * Changes: - * Fixes: - */ - -#define pr_fmt(fmt) "UDPLite: " fmt - -#include <linux/export.h> -#include <linux/proc_fs.h> -#include "udp_impl.h" - -struct udp_table udplite_table __read_mostly; -EXPORT_SYMBOL(udplite_table); - -/* Designate sk as UDP-Lite socket */ -static int udplite_sk_init(struct sock *sk) -{ - pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " - "please contact the netdev mailing list\n"); - return udp_init_sock(sk); -} - -static int udplite_rcv(struct sk_buff *skb) -{ - return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); -} - -static int udplite_err(struct sk_buff *skb, u32 info) -{ - return __udp4_lib_err(skb, info, &udplite_table); -} - -static const struct net_protocol udplite_protocol = { - .handler = udplite_rcv, - .err_handler = udplite_err, - .no_policy = 1, -}; - -struct proto udplite_prot = { - .name = "UDP-Lite", - .owner = THIS_MODULE, - .close = udp_lib_close, - .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, - .ioctl = udp_ioctl, - .init = udplite_sk_init, - .destroy = udp_destroy_sock, - .setsockopt = udp_setsockopt, - .getsockopt = udp_getsockopt, - .sendmsg = udp_sendmsg, - .recvmsg = udp_recvmsg, - .hash = udp_lib_hash, - .unhash = udp_lib_unhash, - .rehash = udp_v4_rehash, - .get_port = udp_v4_get_port, - - .memory_allocated = &net_aligned_data.udp_memory_allocated, - .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, - - .sysctl_mem = sysctl_udp_mem, - .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), - .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), - .obj_size = sizeof(struct udp_sock), - .h.udp_table = &udplite_table, -}; -EXPORT_SYMBOL(udplite_prot); - -static struct inet_protosw udplite4_protosw = { - .type = SOCK_DGRAM, - .protocol = IPPROTO_UDPLITE, - .prot = &udplite_prot, - .ops = &inet_dgram_ops, - .flags = INET_PROTOSW_PERMANENT, -}; - -#ifdef CONFIG_PROC_FS -static struct udp_seq_afinfo udplite4_seq_afinfo = { - .family = AF_INET, - .udp_table = &udplite_table, -}; - -static int __net_init udplite4_proc_init_net(struct net *net) -{ - if (!proc_create_net_data("udplite", 0444, net->proc_net, &udp_seq_ops, - sizeof(struct udp_iter_state), &udplite4_seq_afinfo)) - return -ENOMEM; - return 0; -} - -static void __net_exit udplite4_proc_exit_net(struct net *net) -{ - remove_proc_entry("udplite", net->proc_net); -} - -static struct pernet_operations udplite4_net_ops = { - .init = udplite4_proc_init_net, - .exit = udplite4_proc_exit_net, -}; - -static __init int udplite4_proc_init(void) -{ - return register_pernet_subsys(&udplite4_net_ops); -} -#else -static inline int udplite4_proc_init(void) -{ - return 0; -} -#endif - -void __init udplite4_register(void) -{ - udp_table_init(&udplite_table, "UDP-Lite"); - if (proto_register(&udplite_prot, 1)) - goto out_register_err; - - if (inet_add_protocol(&udplite_protocol, IPPROTO_UDPLITE) < 0) - goto out_unregister_proto; - - inet_register_protosw(&udplite4_protosw); - - if (udplite4_proc_init()) - pr_err("%s: Cannot register /proc!\n", __func__); - return; - -out_unregister_proto: - proto_unregister(&udplite_prot); -out_register_err: - pr_crit("%s: Cannot add UDP-Lite protocol\n", __func__); -} diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index b8f9a8c0302e..c024aa77f25b 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -3,9 +3,8 @@ # IPv6 configuration # -# IPv6 as module will cause a CRASH if you try to unload it menuconfig IPV6 - tristate "The IPv6 protocol" + bool "The IPv6 protocol" default y select CRYPTO_LIB_SHA1 help @@ -17,9 +16,6 @@ menuconfig IPV6 Documentation/networking/ipv6.rst and read the HOWTO at <https://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/> - To compile this protocol support as a module, choose M here: the - module will be called ipv6. - if IPV6 config IPV6_ROUTER_PREF diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 0492f1a0b491..2c9ce2ccbde1 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -7,7 +7,7 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-y := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ - route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ + route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 22c5cdffeae7..5476b6536eb7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3586,15 +3586,15 @@ static int fixup_permanent_addr(struct net *net, struct fib6_info *f6i, *prev; f6i = addrconf_f6i_alloc(net, idev, &ifp->addr, false, - GFP_ATOMIC, NULL); + GFP_KERNEL, NULL); if (IS_ERR(f6i)) return PTR_ERR(f6i); /* ifp->rt can be accessed outside of rtnl */ - spin_lock(&ifp->lock); + spin_lock_bh(&ifp->lock); prev = ifp->rt; ifp->rt = f6i; - spin_unlock(&ifp->lock); + spin_unlock_bh(&ifp->lock); fib6_info_release(prev); } @@ -3602,7 +3602,7 @@ static int fixup_permanent_addr(struct net *net, if (!(ifp->flags & IFA_F_NOPREFIXROUTE)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->rt_priority, idev->dev, 0, 0, - GFP_ATOMIC); + GFP_KERNEL); } if (ifp->state == INET6_IFADDR_STATE_PREDAD) @@ -3613,29 +3613,36 @@ static int fixup_permanent_addr(struct net *net, static void addrconf_permanent_addr(struct net *net, struct net_device *dev) { - struct inet6_ifaddr *ifp, *tmp; + struct inet6_ifaddr *ifp; + LIST_HEAD(tmp_addr_list); struct inet6_dev *idev; + /* Mutual exclusion with other if_list_aux users. */ + ASSERT_RTNL(); + idev = __in6_dev_get(dev); if (!idev) return; write_lock_bh(&idev->lock); + list_for_each_entry(ifp, &idev->addr_list, if_list) { + if (ifp->flags & IFA_F_PERMANENT) + list_add_tail(&ifp->if_list_aux, &tmp_addr_list); + } + write_unlock_bh(&idev->lock); - list_for_each_entry_safe(ifp, tmp, &idev->addr_list, if_list) { - if ((ifp->flags & IFA_F_PERMANENT) && - fixup_permanent_addr(net, idev, ifp) < 0) { - write_unlock_bh(&idev->lock); + while (!list_empty(&tmp_addr_list)) { + ifp = list_first_entry(&tmp_addr_list, + struct inet6_ifaddr, if_list_aux); + list_del(&ifp->if_list_aux); + if (fixup_permanent_addr(net, idev, ifp) < 0) { net_info_ratelimited("%s: Failed to add prefix route for address %pI6c; dropping\n", idev->dev->name, &ifp->addr); in6_ifa_hold(ifp); ipv6_del_addr(ifp); - write_lock_bh(&idev->lock); } } - - write_unlock_bh(&idev->lock); } static int addrconf_notify(struct notifier_block *this, unsigned long event, diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index c008d21925d7..fa27a90ab3cd 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -6,7 +6,6 @@ #include <linux/export.h> #include <net/ipv6.h> -#include <net/ipv6_stubs.h> #include <net/addrconf.h> #include <net/ip.h> @@ -129,96 +128,6 @@ int inet6addr_validator_notifier_call_chain(unsigned long val, void *v) } EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain); -static struct dst_entry *eafnosupport_ipv6_dst_lookup_flow(struct net *net, - const struct sock *sk, - struct flowi6 *fl6, - const struct in6_addr *final_dst) -{ - return ERR_PTR(-EAFNOSUPPORT); -} - -static int eafnosupport_ipv6_route_input(struct sk_buff *skb) -{ - return -EAFNOSUPPORT; -} - -static struct fib6_table *eafnosupport_fib6_get_table(struct net *net, u32 id) -{ - return NULL; -} - -static int -eafnosupport_fib6_table_lookup(struct net *net, struct fib6_table *table, - int oif, struct flowi6 *fl6, - struct fib6_result *res, int flags) -{ - return -EAFNOSUPPORT; -} - -static int -eafnosupport_fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, - struct fib6_result *res, int flags) -{ - return -EAFNOSUPPORT; -} - -static void -eafnosupport_fib6_select_path(const struct net *net, struct fib6_result *res, - struct flowi6 *fl6, int oif, bool have_oif_match, - const struct sk_buff *skb, int strict) -{ -} - -static u32 -eafnosupport_ip6_mtu_from_fib6(const struct fib6_result *res, - const struct in6_addr *daddr, - const struct in6_addr *saddr) -{ - return 0; -} - -static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, - struct fib6_config *cfg, gfp_t gfp_flags, - struct netlink_ext_ack *extack) -{ - NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel"); - return -EAFNOSUPPORT; -} - -static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt, - bool skip_notify) -{ - return -EAFNOSUPPORT; -} - -static int eafnosupport_ipv6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, - int (*output)(struct net *, struct sock *, struct sk_buff *)) -{ - kfree_skb(skb); - return -EAFNOSUPPORT; -} - -static struct net_device *eafnosupport_ipv6_dev_find(struct net *net, const struct in6_addr *addr, - struct net_device *dev) -{ - return ERR_PTR(-EAFNOSUPPORT); -} - -const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) { - .ipv6_dst_lookup_flow = eafnosupport_ipv6_dst_lookup_flow, - .ipv6_route_input = eafnosupport_ipv6_route_input, - .fib6_get_table = eafnosupport_fib6_get_table, - .fib6_table_lookup = eafnosupport_fib6_table_lookup, - .fib6_lookup = eafnosupport_fib6_lookup, - .fib6_select_path = eafnosupport_fib6_select_path, - .ip6_mtu_from_fib6 = eafnosupport_ip6_mtu_from_fib6, - .fib6_nh_init = eafnosupport_fib6_nh_init, - .ip6_del_rt = eafnosupport_ip6_del_rt, - .ipv6_fragment = eafnosupport_ipv6_fragment, - .ipv6_dev_find = eafnosupport_ipv6_dev_find, -}; -EXPORT_SYMBOL_GPL(ipv6_stub); - /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8) = IN6ADDR_LOOPBACK_INIT; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4cbd45b68088..0a88b376141d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -38,12 +38,10 @@ #include <linux/inet.h> #include <linux/netdevice.h> #include <linux/icmpv6.h> -#include <linux/netfilter_ipv6.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/udp.h> -#include <net/udplite.h> #include <net/tcp.h> #include <net/ping.h> #include <net/protocol.h> @@ -52,7 +50,6 @@ #include <net/transp_v6.h> #include <net/ip6_route.h> #include <net/addrconf.h> -#include <net/ipv6_stubs.h> #include <net/ndisc.h> #ifdef CONFIG_IPV6_TUNNEL #include <net/ip6_tunnel.h> @@ -71,10 +68,6 @@ #include "ip6_offload.h" -MODULE_AUTHOR("Cast of dozens"); -MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); -MODULE_LICENSE("GPL"); - /* The inetsw6 table contains everything that inet6_create needs to * build a new socket. */ @@ -269,8 +262,8 @@ out_sk_release: goto out; } -static int __inet6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len, - u32 flags) +int __inet6_bind(struct sock *sk, struct sockaddr_unsized *uaddr, int addr_len, + u32 flags) { struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr; struct inet_sock *inet = inet_sk(sk); @@ -636,8 +629,6 @@ int inet6_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) EXPORT_SYMBOL_GPL(inet6_compat_ioctl); #endif /* CONFIG_COMPAT */ -INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *, - size_t)); int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct sock *sk = sock->sk; @@ -652,26 +643,19 @@ int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) sk, msg, size); } -INDIRECT_CALLABLE_DECLARE(int udpv6_recvmsg(struct sock *, struct msghdr *, - size_t, int, int *)); int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct sock *sk = sock->sk; const struct proto *prot; - int addr_len = 0; - int err; if (likely(!(flags & MSG_ERRQUEUE))) sock_rps_record_flow(sk); /* IPV6_ADDRFORM can change sk->sk_prot under us. */ prot = READ_ONCE(sk->sk_prot); - err = INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, - sk, msg, size, flags, &addr_len); - if (err >= 0) - msg->msg_namelen = addr_len; - return err; + return INDIRECT_CALL_2(prot->recvmsg, tcp_recvmsg, udpv6_recvmsg, + sk, msg, size, flags); } const struct proto_ops inet6_stream_ops = { @@ -706,6 +690,7 @@ const struct proto_ops inet6_stream_ops = { .compat_ioctl = inet6_compat_ioctl, #endif .set_rcvlowat = tcp_set_rcvlowat, + .set_rcvbuf = tcp_set_rcvbuf, }; EXPORT_SYMBOL_GPL(inet6_stream_ops); @@ -896,9 +881,7 @@ static int __net_init ipv6_init_mibs(struct net *net) net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib); if (!net->mib.udp_stats_in6) return -ENOMEM; - net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib); - if (!net->mib.udplite_stats_in6) - goto err_udplite_mib; + net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib); if (!net->mib.ipv6_statistics) goto err_ip_mib; @@ -909,10 +892,10 @@ static int __net_init ipv6_init_mibs(struct net *net) u64_stats_init(&af_inet6_stats->syncp); } - net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib); if (!net->mib.icmpv6_statistics) goto err_icmp_mib; + net->mib.icmpv6msg_statistics = kzalloc_obj(struct icmpv6msg_mib); if (!net->mib.icmpv6msg_statistics) goto err_icmpmsg_mib; @@ -923,8 +906,6 @@ err_icmpmsg_mib: err_icmp_mib: free_percpu(net->mib.ipv6_statistics); err_ip_mib: - free_percpu(net->mib.udplite_stats_in6); -err_udplite_mib: free_percpu(net->mib.udp_stats_in6); return -ENOMEM; } @@ -932,7 +913,6 @@ err_udplite_mib: static void ipv6_cleanup_mibs(struct net *net) { free_percpu(net->mib.udp_stats_in6); - free_percpu(net->mib.udplite_stats_in6); free_percpu(net->mib.ipv6_statistics); free_percpu(net->mib.icmpv6_statistics); kfree(net->mib.icmpv6msg_statistics); @@ -1015,50 +995,6 @@ static struct pernet_operations inet6_net_ops = { .exit = inet6_net_exit, }; -static int ipv6_route_input(struct sk_buff *skb) -{ - ip6_route_input(skb); - return skb_dst(skb)->error; -} - -static const struct ipv6_stub ipv6_stub_impl = { - .ipv6_sock_mc_join = ipv6_sock_mc_join, - .ipv6_sock_mc_drop = ipv6_sock_mc_drop, - .ipv6_dst_lookup_flow = ip6_dst_lookup_flow, - .ipv6_route_input = ipv6_route_input, - .fib6_get_table = fib6_get_table, - .fib6_table_lookup = fib6_table_lookup, - .fib6_lookup = fib6_lookup, - .fib6_select_path = fib6_select_path, - .ip6_mtu_from_fib6 = ip6_mtu_from_fib6, - .fib6_nh_init = fib6_nh_init, - .fib6_nh_release = fib6_nh_release, - .fib6_nh_release_dsts = fib6_nh_release_dsts, - .fib6_update_sernum = fib6_update_sernum_stub, - .fib6_rt_update = fib6_rt_update, - .ip6_del_rt = ip6_del_rt, - .udpv6_encap_enable = udpv6_encap_enable, - .ndisc_send_na = ndisc_send_na, -#if IS_ENABLED(CONFIG_XFRM) - .xfrm6_local_rxpmtu = xfrm6_local_rxpmtu, - .xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv, - .xfrm6_gro_udp_encap_rcv = xfrm6_gro_udp_encap_rcv, - .xfrm6_rcv_encap = xfrm6_rcv_encap, -#endif - .nd_tbl = &nd_tbl, - .ipv6_fragment = ip6_fragment, - .ipv6_dev_find = ipv6_dev_find, - .ip6_xmit = ip6_xmit, -}; - -static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { - .inet6_bind = __inet6_bind, - .udp6_lib_lookup = __udp6_lib_lookup, - .ipv6_setsockopt = do_ipv6_setsockopt, - .ipv6_getsockopt = do_ipv6_getsockopt, - .ipv6_dev_get_saddr = ipv6_dev_get_saddr, -}; - static int __init inet6_init(void) { struct list_head *r; @@ -1085,13 +1021,9 @@ static int __init inet6_init(void) if (err) goto out_unregister_tcp_proto; - err = proto_register(&udplitev6_prot, 1); - if (err) - goto out_unregister_udp_proto; - err = proto_register(&rawv6_prot, 1); if (err) - goto out_unregister_udplite_proto; + goto out_unregister_udp_proto; err = proto_register(&pingv6_prot, 1); if (err) @@ -1134,16 +1066,11 @@ static int __init inet6_init(void) if (err) goto igmp_fail; - err = ipv6_netfilter_init(); - if (err) - goto netfilter_fail; /* Create /proc/foo6 entries. */ #ifdef CONFIG_PROC_FS err = -ENOMEM; if (raw6_proc_init()) goto proc_raw6_fail; - if (udplite6_proc_init()) - goto proc_udplite6_fail; if (ipv6_misc_proc_init()) goto proc_misc6_fail; if (if6_proc_init()) @@ -1179,10 +1106,6 @@ static int __init inet6_init(void) if (err) goto udpv6_fail; - err = udplitev6_init(); - if (err) - goto udplitev6_fail; - err = udpv6_offload_init(); if (err) goto udpv6_offload_fail; @@ -1225,10 +1148,6 @@ static int __init inet6_init(void) goto sysctl_fail; #endif - /* ensure that ipv6 stubs are visible only after ipv6 is ready */ - wmb(); - ipv6_stub = &ipv6_stub_impl; - ipv6_bpf_stub = &ipv6_bpf_stub_impl; out: return err; @@ -1253,8 +1172,6 @@ ipv6_packet_fail: tcpv6_fail: udpv6_offload_exit(); udpv6_offload_fail: - udplitev6_exit(); -udplitev6_fail: udpv6_exit(); udpv6_fail: ipv6_frag_exit(); @@ -1276,13 +1193,9 @@ ip6_route_fail: proc_if6_fail: ipv6_misc_proc_exit(); proc_misc6_fail: - udplite6_proc_exit(); -proc_udplite6_fail: raw6_proc_exit(); proc_raw6_fail: #endif - ipv6_netfilter_fini(); -netfilter_fail: igmp6_cleanup(); igmp_fail: ndisc_cleanup(); @@ -1301,14 +1214,10 @@ out_unregister_ping_proto: proto_unregister(&pingv6_prot); out_unregister_raw_proto: proto_unregister(&rawv6_prot); -out_unregister_udplite_proto: - proto_unregister(&udplitev6_prot); out_unregister_udp_proto: proto_unregister(&udpv6_prot); out_unregister_tcp_proto: proto_unregister(&tcpv6_prot); goto out; } -module_init(inet6_init); - -MODULE_ALIAS_NETPROTO(PF_INET6); +device_initcall(inet6_init); diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 2d7b59732f7e..ca3605acb433 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -452,7 +452,7 @@ static bool ip6_datagram_support_cmsg(struct sk_buff *skb, /* * Handle MSG_ERRQUEUE */ -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; @@ -503,7 +503,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) &sin->sin6_addr); sin->sin6_scope_id = 0; } - *addr_len = sizeof(*sin); + msg->msg_namelen = sizeof(*sin); } memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); @@ -545,8 +545,7 @@ EXPORT_SYMBOL_GPL(ipv6_recv_error); /* * Handle IPV6_RECVPATHMTU */ -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, - int *addr_len) +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; @@ -579,7 +578,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, sin->sin6_port = 0; sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id; sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr; - *addr_len = sizeof(*sin); + msg->msg_namelen = sizeof(*sin); } put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c index 949b72610df7..64cd4ed8864c 100644 --- a/net/ipv6/fib6_notifier.c +++ b/net/ipv6/fib6_notifier.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/notifier.h> #include <linux/socket.h> #include <linux/kernel.h> diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index fd5f7112a51f..e1b2b4fa6e18 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -92,6 +92,9 @@ int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, return err; } +#if IS_MODULE(CONFIG_NFT_FIB_IPV6) +EXPORT_SYMBOL_GPL(fib6_lookup); +#endif struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, const struct sk_buff *skb, diff --git a/net/ipv6/fou6.c b/net/ipv6/fou6.c index 430518ae26fa..157765259e2f 100644 --- a/net/ipv6/fou6.c +++ b/net/ipv6/fou6.c @@ -141,8 +141,7 @@ static int gue6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, * recursion. Besides, this kind of encapsulation can't even be * configured currently. Discard this. */ - if (guehdr->proto_ctype == IPPROTO_UDP || - guehdr->proto_ctype == IPPROTO_UDPLITE) + if (guehdr->proto_ctype == IPPROTO_UDP) return -EOPNOTSUPP; skb_set_transport_header(skb, -(int)sizeof(struct icmp6hdr)); diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index d5d23a9296ea..799d9e9ac45d 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -1291,13 +1291,8 @@ int __init icmpv6_init(void) if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) goto fail; - err = inet6_register_icmp_sender(icmp6_send); - if (err) - goto sender_reg_err; return 0; -sender_reg_err: - inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); fail: pr_err("Failed to register ICMP6 protocol\n"); return err; @@ -1305,7 +1300,6 @@ fail: void icmpv6_cleanup(void) { - inet6_unregister_icmp_sender(icmp6_send); inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6); } diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index b8d43ed4689d..e71571455c8a 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/errno.h> #include <linux/ip.h> #include <linux/kernel.h> diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 11fc2f7de2fe..37534e116899 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -56,8 +56,8 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, return dst; } -static struct dst_entry *inet6_csk_route_socket(struct sock *sk, - struct flowi6 *fl6) +struct dst_entry *inet6_csk_route_socket(struct sock *sk, + struct flowi6 *fl6) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -118,18 +118,3 @@ int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused return res; } EXPORT_SYMBOL_GPL(inet6_csk_xmit); - -struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) -{ - struct flowi6 *fl6 = &inet_sk(sk)->cork.fl.u.ip6; - struct dst_entry *dst; - - dst = inet6_csk_route_socket(sk, fl6); - - if (IS_ERR(dst)) - return NULL; - dst->ops->update_pmtu(dst, sk, NULL, mtu, true); - - dst = inet6_csk_route_socket(sk, fl6); - return IS_ERR(dst) ? NULL : dst; -} diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 182d38e6d6d8..b111b51d69fc 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -23,20 +23,55 @@ #include <net/sock_reuseport.h> #include <net/tcp.h> +void inet6_init_ehash_secret(void) +{ + net_get_random_sleepable_once(&inet6_ehash_secret, + sizeof(inet6_ehash_secret)); + net_get_random_sleepable_once(&tcp_ipv6_hash_secret, + sizeof(tcp_ipv6_hash_secret)); +} + u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) { - u32 lhash, fhash; + u32 a, b, c; + + /* + * Please look at jhash() implementation for reference. + * Hash laddr + faddr + lport/fport + net_hash_mix. + * Notes: + * We combine laddr[0] (high order 32 bits of local address) + * with net_hash_mix() to hash a multiple of 3 words. + * + * We do not include JHASH_INITVAL + 36 contribution + * to initial values of a, b, c. + */ - net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret)); - net_get_random_once(&tcp_ipv6_hash_secret, sizeof(tcp_ipv6_hash_secret)); + a = b = c = tcp_ipv6_hash_secret; - lhash = (__force u32)laddr->s6_addr32[3]; - fhash = __ipv6_addr_jhash(faddr, tcp_ipv6_hash_secret); + a += (__force u32)laddr->s6_addr32[0] ^ net_hash_mix(net); + b += (__force u32)laddr->s6_addr32[1]; + c += (__force u32)laddr->s6_addr32[2]; + __jhash_mix(a, b, c); - return lport + __inet6_ehashfn(lhash, 0, fhash, fport, - inet6_ehash_secret + net_hash_mix(net)); + a += (__force u32)laddr->s6_addr32[3]; + b += (__force u32)faddr->s6_addr32[0]; + c += (__force u32)faddr->s6_addr32[1]; + __jhash_mix(a, b, c); + + a += (__force u32)faddr->s6_addr32[2]; + b += (__force u32)faddr->s6_addr32[3]; + c += (__force u32)fport; + __jhash_final(a, b, c); + + /* Note: We need to add @lport instead of fully hashing it. + * See commits 9544d60a2605 ("inet: change lport contribution + * to inet_ehashfn() and inet6_ehashfn()") and d4438ce68bf1 + * ("inet: call inet6_ehashfn() once from inet6_hash_connect()") + * for references. + */ + return lport + c; } EXPORT_SYMBOL_GPL(inet6_ehashfn); @@ -363,6 +398,8 @@ int inet6_hash_connect(struct inet_timewait_death_row *death_row, if (!inet_sk(sk)->inet_num) port_offset = inet6_sk_port_offset(sk); + inet6_init_ehash_secret(); + hash_port0 = inet6_ehashfn(net, daddr, 0, saddr, inet->inet_dport); return __inet_hash_connect(death_row, sk, port_offset, hash_port0, diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index 377717045f8f..e1a594873675 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <net/ip.h> +#include <net/ip6_checksum.h> #include <net/udp.h> -#include <net/udplite.h> #include <asm/checksum.h> #ifndef _HAVE_ARCH_IPV6_CSUM @@ -62,53 +62,6 @@ __sum16 csum_ipv6_magic(const struct in6_addr *saddr, EXPORT_SYMBOL(csum_ipv6_magic); #endif -int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) -{ - int err; - - UDP_SKB_CB(skb)->partial_cov = 0; - UDP_SKB_CB(skb)->cscov = skb->len; - - if (proto == IPPROTO_UDPLITE) { - err = udplite_checksum_init(skb, uh); - if (err) - return err; - - if (UDP_SKB_CB(skb)->partial_cov) { - skb->csum = ip6_compute_pseudo(skb, proto); - return 0; - } - } - - /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels) - * we accept a checksum of zero here. When we find the socket - * for the UDP packet we'll check if that socket allows zero checksum - * for IPv6 (set by socket option). - * - * Note, we are only interested in != 0 or == 0, thus the - * force to int. - */ - err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, - ip6_compute_pseudo); - if (err) - return err; - - if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) { - /* If SW calculated the value, we know it's bad */ - if (skb->csum_complete_sw) - return 1; - - /* HW says the value is bad. Let's validate that. - * skb->csum is no longer the full packet checksum, - * so don't treat is as such. - */ - skb_checksum_complete_unset(skb); - } - - return 0; -} -EXPORT_SYMBOL(udp6_csum_init); - /* Function to set UDP checksum for an IPv6 UDP packet. This is intended * for the simple case like when setting the checksum for a UDP tunnel. */ diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 45ef4d65dcbc..b897b3c5023b 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -342,6 +342,9 @@ int fib6_lookup(struct net *net, int oif, struct flowi6 *fl6, return fib6_table_lookup(net, net->ipv6.fib6_main_tbl, oif, fl6, res, flags); } +#if IS_MODULE(CONFIG_NFT_FIB_IPV6) +EXPORT_SYMBOL_GPL(fib6_lookup); +#endif static void __net_init fib6_tables_init(struct net *net) { @@ -1413,14 +1416,6 @@ void fib6_update_sernum_upto_root(struct net *net, struct fib6_info *rt) __fib6_update_sernum_upto_root(rt, fib6_new_sernum(net)); } -/* allow ipv4 to update sernum via ipv6_stub */ -void fib6_update_sernum_stub(struct net *net, struct fib6_info *f6i) -{ - spin_lock_bh(&f6i->fib6_table->tb6_lock); - fib6_update_sernum_upto_root(net, f6i); - spin_unlock_bh(&f6i->fib6_table->tb6_lock); -} - /* * Add routing information to the routing tree. * <destination addr>/<source addr> @@ -2779,7 +2774,7 @@ static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v) rcu_read_unlock(); } -#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL) +#if defined(CONFIG_BPF_SYSCALL) static int ipv6_route_prog_seq_show(struct bpf_prog *prog, struct bpf_iter_meta *meta, void *v) diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index dafcc0dcd77a..63fc8556b475 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -593,6 +593,7 @@ static int gre_rcv(struct sk_buff *skb) out: icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); drop: + dev_core_stats_rx_dropped_inc(skb->dev); kfree_skb(skb); return 0; } diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c index 233914b63bdb..e43ea9492332 100644 --- a/net/ipv6/ip6_icmp.c +++ b/net/ipv6/ip6_icmp.c @@ -7,47 +7,8 @@ #include <net/ipv6.h> -#if IS_ENABLED(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) && IS_ENABLED(CONFIG_NF_NAT) -#if !IS_BUILTIN(CONFIG_IPV6) - -static ip6_icmp_send_t __rcu *ip6_icmp_send; - -int inet6_register_icmp_sender(ip6_icmp_send_t *fn) -{ - return (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, NULL, fn) == NULL) ? - 0 : -EBUSY; -} -EXPORT_SYMBOL(inet6_register_icmp_sender); - -int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn) -{ - int ret; - - ret = (cmpxchg((ip6_icmp_send_t **)&ip6_icmp_send, fn, NULL) == fn) ? - 0 : -EINVAL; - - synchronize_net(); - - return ret; -} -EXPORT_SYMBOL(inet6_unregister_icmp_sender); - -void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, - const struct inet6_skb_parm *parm) -{ - ip6_icmp_send_t *send; - - rcu_read_lock(); - send = rcu_dereference(ip6_icmp_send); - if (send) - send(skb, type, code, info, NULL, parm); - rcu_read_unlock(); -} -EXPORT_SYMBOL(__icmpv6_send); -#endif - -#if IS_ENABLED(CONFIG_NF_NAT) #include <net/netfilter/nf_conntrack.h> void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) { @@ -60,7 +21,7 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) ct = nf_ct_get(skb_in, &ctinfo); if (!ct || !(READ_ONCE(ct->status) & IPS_NAT_MASK)) { - __icmpv6_send(skb_in, type, code, info, &parm); + icmp6_send(skb_in, type, code, info, NULL, &parm); return; } @@ -76,11 +37,10 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info) orig_ip = ipv6_hdr(skb_in)->saddr; dir = CTINFO2DIR(ctinfo); ipv6_hdr(skb_in)->saddr = ct->tuplehash[dir].tuple.src.u3.in6; - __icmpv6_send(skb_in, type, code, info, &parm); + icmp6_send(skb_in, type, code, info, NULL, &parm); ipv6_hdr(skb_in)->saddr = orig_ip; out: consume_skb(cloned_skb); } EXPORT_SYMBOL(icmpv6_ndo_send); #endif -#endif diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 2bcb981c91aa..967b07aeb683 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -44,6 +44,46 @@ #include <net/xfrm.h> #include <net/inet_ecn.h> #include <net/dst_metadata.h> +#include <net/inet6_hashtables.h> + +static void tcp_v6_early_demux(struct sk_buff *skb) +{ + struct net *net = dev_net_rcu(skb->dev); + const struct ipv6hdr *hdr; + const struct tcphdr *th; + struct sock *sk; + + if (skb->pkt_type != PACKET_HOST) + return; + + if (!pskb_may_pull(skb, skb_transport_offset(skb) + + sizeof(struct tcphdr))) + return; + + hdr = ipv6_hdr(skb); + th = tcp_hdr(skb); + + if (th->doff < sizeof(struct tcphdr) / 4) + return; + + /* Note : We use inet6_iif() here, not tcp_v6_iif() */ + sk = __inet6_lookup_established(net, &hdr->saddr, th->source, + &hdr->daddr, ntohs(th->dest), + inet6_iif(skb), inet6_sdif(skb)); + if (sk) { + skb->sk = sk; + skb->destructor = sock_edemux; + if (sk_fullsock(sk)) { + struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); + + if (dst) + dst = dst_check(dst, sk->sk_rx_dst_cookie); + if (dst && + sk->sk_rx_dst_ifindex == skb->skb_iif) + skb_dst_set_noref(skb, dst); + } + } +} static void ip6_rcv_finish_core(struct net *net, struct sock *sk, struct sk_buff *skb) diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index bd7f780e37a5..d8072ad6b8c4 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -286,7 +286,7 @@ not_same_flow: if (likely(proto == IPPROTO_TCP)) pp = tcp6_gro_receive(head, skb); -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) else if (likely(proto == IPPROTO_UDP)) pp = udp6_gro_receive(head, skb); #endif @@ -346,7 +346,7 @@ INDIRECT_CALLABLE_SCOPE int ipv6_gro_complete(struct sk_buff *skb, int nhoff) if (likely(ops == &net_hotdata.tcpv6_offload)) return tcp6_gro_complete(skb, nhoff); -#if IS_BUILTIN(CONFIG_IPV6) +#if IS_ENABLED(CONFIG_IPV6) if (ops == &net_hotdata.udpv6_offload) return udp6_gro_complete(skb, nhoff); #endif diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 8e2a6b28cea7..7e92909ab5be 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -259,6 +259,27 @@ bool ip6_autoflowlabel(struct net *net, const struct sock *sk) return inet6_test_bit(AUTOFLOWLABEL, sk); } +int ip6_dst_hoplimit(struct dst_entry *dst) +{ + int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); + + rcu_read_lock(); + if (hoplimit == 0) { + struct net_device *dev = dst_dev_rcu(dst); + struct inet6_dev *idev; + + idev = __in6_dev_get(dev); + if (idev) + hoplimit = READ_ONCE(idev->cnf.hop_limit); + else + hoplimit = READ_ONCE(dev_net(dev)->ipv6.devconf_all->hop_limit); + } + rcu_read_unlock(); + + return hoplimit; +} +EXPORT_SYMBOL(ip6_dst_hoplimit); + /* * xmit an sk_buff (used by TCP and SCTP) * Note : socket lock is not held for SYNACK packets, but might be modified @@ -873,6 +894,11 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, __be32 frag_id; u8 *prevhdr, nexthdr = 0; + if (!ipv6_mod_enabled()) { + kfree_skb(skb); + return -EAFNOSUPPORT; + } + err = ip6_find_1stfragopt(skb, &prevhdr); if (err < 0) goto fail; @@ -1045,6 +1071,7 @@ fail: kfree_skb(skb); return err; } +EXPORT_SYMBOL_GPL(ip6_fragment); static inline int ip6_rt_check(const struct rt6key *rt_key, const struct in6_addr *fl_addr, @@ -1256,6 +1283,8 @@ struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, st struct dst_entry *dst = NULL; int err; + if (!ipv6_mod_enabled()) + return ERR_PTR(-EAFNOSUPPORT); err = ip6_dst_lookup_tail(net, sk, &dst, fl6); if (err) return ERR_PTR(err); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 0b53488a9229..46bc06506470 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -96,9 +96,6 @@ static inline int ip6_tnl_mpls_supported(void) return IS_ENABLED(CONFIG_MPLS); } -#define for_each_ip6_tunnel_rcu(start) \ - for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) - /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses * @net: network namespace @@ -121,7 +118,7 @@ ip6_tnl_lookup(struct net *net, int link, struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct in6_addr any; - for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + for_each_ip_tunnel_rcu(t, ip6n->tnls_r_l[hash]) { if (!ipv6_addr_equal(local, &t->parms.laddr) || !ipv6_addr_equal(remote, &t->parms.raddr) || !(t->dev->flags & IFF_UP)) @@ -135,7 +132,7 @@ ip6_tnl_lookup(struct net *net, int link, memset(&any, 0, sizeof(any)); hash = HASH(&any, local); - for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + for_each_ip_tunnel_rcu(t, ip6n->tnls_r_l[hash]) { if (!ipv6_addr_equal(local, &t->parms.laddr) || !ipv6_addr_any(&t->parms.raddr) || !(t->dev->flags & IFF_UP)) @@ -148,7 +145,7 @@ ip6_tnl_lookup(struct net *net, int link, } hash = HASH(remote, &any); - for_each_ip6_tunnel_rcu(ip6n->tnls_r_l[hash]) { + for_each_ip_tunnel_rcu(t, ip6n->tnls_r_l[hash]) { if (!ipv6_addr_equal(remote, &t->parms.raddr) || !ipv6_addr_any(&t->parms.laddr) || !(t->dev->flags & IFF_UP)) diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index cef3e0210744..405ef1cb8864 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -162,8 +162,7 @@ struct dst_entry *udp_tunnel6_dst_lookup(struct sk_buff *skb, fl6.fl6_dport = dport; fl6.flowlabel = ip6_make_flowinfo(dsfield, key->label); - dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6, - NULL); + dst = ip6_dst_lookup_flow(net, sock->sk, &fl6, NULL); if (IS_ERR(dst)) { netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr); return ERR_PTR(-ENETUNREACH); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index e047a4680ab0..85010ff21c98 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1280,7 +1280,7 @@ static int ip6mr_device_event(struct notifier_block *this, static unsigned int ip6mr_seq_read(const struct net *net) { - return READ_ONCE(net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net); + return atomic_read(&net->ipv6.ipmr_seq) + ip6mr_rules_seq_read(net); } static int ip6mr_dump(struct net *net, struct notifier_block *nb, @@ -1305,7 +1305,7 @@ static int __net_init ip6mr_notifier_init(struct net *net) { struct fib_notifier_ops *ops; - net->ipv6.ipmr_seq = 0; + atomic_set(&net->ipv6.ipmr_seq, 0); ops = fib_notifier_ops_register(&ip6mr_notifier_ops_template, net); if (IS_ERR(ops)) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 02c4cab60c69..b4c977434c2e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -45,7 +45,6 @@ #include <net/inet_common.h> #include <net/tcp.h> #include <net/udp.h> -#include <net/udplite.h> #include <net/xfrm.h> #include <net/compat.h> #include <net/seg6.h> @@ -563,10 +562,8 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (sk->sk_type == SOCK_RAW) break; - if (sk->sk_protocol == IPPROTO_UDP || - sk->sk_protocol == IPPROTO_UDPLITE) { - struct udp_sock *up = udp_sk(sk); - if (up->pending == AF_INET6) { + if (sk->sk_protocol == IPPROTO_UDP) { + if (udp_sk(sk)->pending == AF_INET6) { retv = -EBUSY; break; } @@ -607,16 +604,11 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, WRITE_ONCE(sk->sk_family, PF_INET); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } else { - struct proto *prot = &udp_prot; - - if (sk->sk_protocol == IPPROTO_UDPLITE) - prot = &udplite_prot; - sock_prot_inuse_add(net, sk->sk_prot, -1); - sock_prot_inuse_add(net, prot, 1); + sock_prot_inuse_add(net, &udp_prot, 1); /* Paired with READ_ONCE(sk->sk_prot) in inet6_dgram_ops */ - WRITE_ONCE(sk->sk_prot, prot); + WRITE_ONCE(sk->sk_prot, &udp_prot); WRITE_ONCE(sk->sk_socket->ops, &inet_dgram_ops); WRITE_ONCE(sk->sk_family, PF_INET); } @@ -1098,7 +1090,6 @@ int do_ipv6_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case IPV6_ADDRFORM: if (sk->sk_protocol != IPPROTO_UDP && - sk->sk_protocol != IPPROTO_UDPLITE && sk->sk_protocol != IPPROTO_TCP) return -ENOPROTOOPT; if (sk->sk_state != TCP_ESTABLISHED) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 186e60c79214..e7ad13c5bd26 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -576,6 +576,7 @@ void ndisc_send_na(struct net_device *dev, const struct in6_addr *daddr, ndisc_send_skb(skb, daddr, src_addr); } +EXPORT_SYMBOL_GPL(ndisc_send_na); static void ndisc_send_unsol_na(struct net_device *dev) { diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 46540a5a4331..6d80f85e55fa 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -1,7 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * IPv6 specific functions of netfilter core * - * Rusty Russell (C) 2000 -- This code is GPL. + * Rusty Russell (C) 2000 * Patrick McHardy (C) 2006-2012 */ #include <linux/kernel.h> @@ -85,21 +86,6 @@ int ip6_route_me_harder(struct net *net, struct sock *sk_partial, struct sk_buff } EXPORT_SYMBOL(ip6_route_me_harder); -static int nf_ip6_reroute(struct sk_buff *skb, - const struct nf_queue_entry *entry) -{ - struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); - - if (entry->state.hook == NF_INET_LOCAL_OUT) { - const struct ipv6hdr *iph = ipv6_hdr(skb); - if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || - !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || - skb->mark != rt_info->mark) - return ip6_route_me_harder(entry->state.net, entry->state.sk, skb); - } - return 0; -} - int __nf_ip6_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict) { @@ -242,36 +228,3 @@ blackhole: return 0; } EXPORT_SYMBOL_GPL(br_ip6_fragment); - -static const struct nf_ipv6_ops ipv6ops = { -#if IS_MODULE(CONFIG_IPV6) - .chk_addr = ipv6_chk_addr, - .route_me_harder = ip6_route_me_harder, - .dev_get_saddr = ipv6_dev_get_saddr, - .route = __nf_ip6_route, -#if IS_ENABLED(CONFIG_SYN_COOKIES) - .cookie_init_sequence = __cookie_v6_init_sequence, - .cookie_v6_check = __cookie_v6_check, -#endif -#endif - .route_input = ip6_route_input, - .fragment = ip6_fragment, - .reroute = nf_ip6_reroute, -#if IS_MODULE(CONFIG_IPV6) - .br_fragment = br_ip6_fragment, -#endif -}; - -int __init ipv6_netfilter_init(void) -{ - RCU_INIT_POINTER(nf_ipv6_ops, &ipv6ops); - return 0; -} - -/* This can be called from inet6_init() on errors, so it cannot - * be marked __exit. -DaveM - */ -void ipv6_netfilter_fini(void) -{ - RCU_INIT_POINTER(nf_ipv6_ops, NULL); -} diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c index da69a27e8332..bbb684f9964c 100644 --- a/net/ipv6/netfilter/ip6t_eui64.c +++ b/net/ipv6/netfilter/ip6t_eui64.c @@ -7,6 +7,7 @@ #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ipv6.h> +#include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/netfilter/x_tables.h> @@ -21,8 +22,10 @@ eui64_mt6(const struct sk_buff *skb, struct xt_action_param *par) { unsigned char eui64[8]; - if (!(skb_mac_header(skb) >= skb->head && - skb_mac_header(skb) + ETH_HLEN <= skb->data)) { + if (!skb->dev || skb->dev->type != ARPHRD_ETHER) + return false; + + if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN) { par->hotdrop = true; return false; } diff --git a/net/ipv6/netfilter/nft_dup_ipv6.c b/net/ipv6/netfilter/nft_dup_ipv6.c index 492a811828a7..95ec27b3971c 100644 --- a/net/ipv6/netfilter/nft_dup_ipv6.c +++ b/net/ipv6/netfilter/nft_dup_ipv6.c @@ -74,7 +74,6 @@ static const struct nft_expr_ops nft_dup_ipv6_ops = { .eval = nft_dup_ipv6_eval, .init = nft_dup_ipv6_init, .dump = nft_dup_ipv6_dump, - .reduce = NFT_REDUCE_READONLY, }; static const struct nla_policy nft_dup_ipv6_policy[NFTA_DUP_MAX + 1] = { diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 421036a3605b..8b2dba88ee96 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -52,7 +52,13 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, fl6->flowlabel = (*(__be32 *)iph) & IPV6_FLOWINFO_MASK; fl6->flowi6_l3mdev = nft_fib_l3mdev_master_ifindex_rcu(pkt, dev); - return lookup_flags; + return lookup_flags | RT6_LOOKUP_F_DST_NOREF; +} + +static int nft_fib6_lookup(struct net *net, struct flowi6 *fl6, + struct fib6_result *res, int flags) +{ + return fib6_lookup(net, fl6->flowi6_oif, fl6, res, flags); } static u32 __nft_fib6_eval_type(const struct nft_fib *priv, @@ -60,13 +66,14 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, struct ipv6hdr *iph) { const struct net_device *dev = NULL; + struct fib6_result res = {}; int route_err, addrtype; - struct rt6_info *rt; struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), }; + int lookup_flags; u32 ret = 0; if (priv->flags & NFTA_FIB_F_IIF) @@ -74,29 +81,23 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, else if (priv->flags & NFTA_FIB_F_OIF) dev = nft_out(pkt); - nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); + lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); if (dev && nf_ipv6_chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) ret = RTN_LOCAL; - route_err = nf_ip6_route(nft_net(pkt), (struct dst_entry **)&rt, - flowi6_to_flowi(&fl6), false); + route_err = nft_fib6_lookup(nft_net(pkt), &fl6, &res, lookup_flags); if (route_err) goto err; - if (rt->rt6i_flags & RTF_REJECT) { - route_err = rt->dst.error; - dst_release(&rt->dst); - goto err; - } + if (res.fib6_flags & RTF_REJECT) + return res.fib6_type; - if (ipv6_anycast_destination((struct dst_entry *)rt, &fl6.daddr)) + if (__ipv6_anycast_destination(&res.f6i->fib6_dst, res.fib6_flags, &fl6.daddr)) ret = RTN_ANYCAST; - else if (!dev && rt->rt6i_flags & RTF_LOCAL) + else if (!dev && res.fib6_flags & RTF_LOCAL) ret = RTN_LOCAL; - dst_release(&rt->dst); - if (ret) return ret; @@ -152,6 +153,33 @@ static bool nft_fib_v6_skip_icmpv6(const struct sk_buff *skb, u8 next, const str return ipv6_addr_type(&iph->daddr) & IPV6_ADDR_LINKLOCAL; } +static bool nft_fib6_info_nh_dev_match(const struct net_device *nh_dev, + const struct net_device *dev) +{ + return nh_dev == dev || + l3mdev_master_ifindex_rcu(nh_dev) == dev->ifindex; +} + +static bool nft_fib6_info_nh_uses_dev(struct fib6_info *rt, + const struct net_device *dev) +{ + const struct net_device *nh_dev; + struct fib6_info *iter; + + nh_dev = fib6_info_nh_dev(rt); + if (nft_fib6_info_nh_dev_match(nh_dev, dev)) + return true; + + list_for_each_entry(iter, &rt->fib6_siblings, fib6_siblings) { + nh_dev = fib6_info_nh_dev(iter); + + if (nft_fib6_info_nh_dev_match(nh_dev, dev)) + return true; + } + + return false; +} + void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { @@ -160,14 +188,14 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct net_device *found = NULL; const struct net_device *oif = NULL; u32 *dest = ®s->data[priv->dreg]; + struct fib6_result res = {}; struct ipv6hdr *iph, _iph; struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, .flowi6_uid = sock_net_uid(nft_net(pkt), NULL), }; - struct rt6_info *rt; - int lookup_flags; + int lookup_flags, ret; if (nft_fib_can_skip(pkt)) { nft_fib_store_result(dest, priv, nft_in(pkt)); @@ -193,26 +221,17 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph); *dest = 0; - rt = (void *)ip6_route_lookup(nft_net(pkt), &fl6, pkt->skb, - lookup_flags); - if (rt->dst.error) - goto put_rt_err; - - /* Should not see RTF_LOCAL here */ - if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) - goto put_rt_err; + ret = nft_fib6_lookup(nft_net(pkt), &fl6, &res, lookup_flags); + if (ret || res.fib6_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) + return; if (!oif) { - found = rt->rt6i_idev->dev; + found = fib6_info_nh_dev(res.f6i); } else { - if (oif == rt->rt6i_idev->dev || - l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == oif->ifindex) + if (nft_fib6_info_nh_uses_dev(res.f6i, oif)) found = oif; } - nft_fib_store_result(dest, priv, found); - put_rt_err: - ip6_rt_put(rt); } EXPORT_SYMBOL_GPL(nft_fib6_eval); @@ -225,7 +244,6 @@ static const struct nft_expr_ops nft_fib6_type_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, - .reduce = nft_fib_reduce, }; static const struct nft_expr_ops nft_fib6_ops = { @@ -235,7 +253,6 @@ static const struct nft_expr_ops nft_fib6_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, - .reduce = nft_fib_reduce, }; static const struct nft_expr_ops * diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index 5c61294f410e..ed69c768797e 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -46,7 +46,6 @@ static const struct nft_expr_ops nft_reject_ipv6_ops = { .init = nft_reject_init, .dump = nft_reject_dump, .validate = nft_reject_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_reject_ipv6_type __read_mostly = { diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index cba1684a3f30..64b1eeb79b57 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -100,29 +100,6 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) } EXPORT_SYMBOL(ip6_find_1stfragopt); -#if IS_ENABLED(CONFIG_IPV6) -int ip6_dst_hoplimit(struct dst_entry *dst) -{ - int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); - - rcu_read_lock(); - if (hoplimit == 0) { - struct net_device *dev = dst_dev_rcu(dst); - struct inet6_dev *idev; - - idev = __in6_dev_get(dev); - if (idev) - hoplimit = READ_ONCE(idev->cnf.hop_limit); - else - hoplimit = READ_ONCE(dev_net(dev)->ipv6.devconf_all->hop_limit); - } - rcu_read_unlock(); - - return hoplimit; -} -EXPORT_SYMBOL(ip6_dst_hoplimit); -#endif - int __ip6_local_out(struct net *net, struct sock *sk, struct sk_buff *skb) { ipv6_set_payload_len(ipv6_hdr(skb), skb->len - sizeof(struct ipv6hdr)); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index e4afc651731a..6e90d0bf9f3d 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -24,8 +24,7 @@ #include <net/ping.h> /* Compatibility glue so we can support IPv6 when it's compiled as a module */ -static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, - int *addr_len) +static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) { return -EAFNOSUPPORT; } diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 73296f38c252..813013ca4e75 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -39,8 +39,6 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(net, &tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", sock_prot_inuse_get(net, &udpv6_prot)); - seq_printf(seq, "UDPLITE6: inuse %d\n", - sock_prot_inuse_get(net, &udplitev6_prot)); seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(net, &rawv6_prot)); seq_printf(seq, "FRAG6: inuse %u memory %lu\n", @@ -110,17 +108,6 @@ static const struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_ITEM("Udp6MemErrors", UDP_MIB_MEMERRORS), }; -static const struct snmp_mib snmp6_udplite6_list[] = { - SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS), - SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS), - SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS), - SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS), - SNMP_MIB_ITEM("UdpLite6RcvbufErrors", UDP_MIB_RCVBUFERRORS), - SNMP_MIB_ITEM("UdpLite6SndbufErrors", UDP_MIB_SNDBUFERRORS), - SNMP_MIB_ITEM("UdpLite6InCsumErrors", UDP_MIB_CSUMERRORS), - SNMP_MIB_ITEM("UdpLite6MemErrors", UDP_MIB_MEMERRORS), -}; - static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib) { char name[32]; @@ -228,9 +215,6 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) snmp6_seq_show_item(seq, net->mib.udp_stats_in6, NULL, snmp6_udp6_list, ARRAY_SIZE(snmp6_udp6_list)); - snmp6_seq_show_item(seq, net->mib.udplite_stats_in6, - NULL, snmp6_udplite6_list, - ARRAY_SIZE(snmp6_udplite6_list)); return 0; } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 27a268059168..3cc58698cbbd 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -369,7 +369,8 @@ static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb) /* Charge it to the socket. */ skb_dst_drop(skb); - if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) { + reason = sock_queue_rcv_skb_reason(sk, skb); + if (reason) { sk_skb_reason_drop(sk, skb, reason); return NET_RX_DROP; } @@ -432,7 +433,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) */ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); @@ -444,10 +445,10 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return -EOPNOTSUPP; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len, addr_len); + return ipv6_recv_error(sk, msg, len); if (np->rxopt.bits.rxpmtu && READ_ONCE(np->rxpmtu)) - return ipv6_recv_rxpmtu(sk, msg, len, addr_len); + return ipv6_recv_rxpmtu(sk, msg, len); skb = skb_recv_datagram(sk, flags, &err); if (!skb) @@ -481,7 +482,7 @@ static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, sin6->sin6_flowinfo = 0; sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr, inet6_iif(skb)); - *addr_len = sizeof(*sin6); + msg->msg_namelen = sizeof(*sin6); } sock_recv_cmsgs(msg, sk, skb); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 25ec8001898d..11f9144bebbe 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -132,6 +132,9 @@ static int ip6_frag_queue(struct net *net, /* note that if prob_offset is set, the skb is freed elsewhere, * we do not free it here. */ + inet_frag_kill(&fq->q, refs); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASMFAILS); return -1; } @@ -163,6 +166,9 @@ static int ip6_frag_queue(struct net *net, * this case. -DaveM */ *prob_offset = offsetof(struct ipv6hdr, payload_len); + inet_frag_kill(&fq->q, refs); + __IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASMFAILS); return -1; } if (end > fq->q.len) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cb521700cee7..19eb6b702227 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2655,6 +2655,7 @@ void ip6_route_input(struct sk_buff *skb) skb_dst_set_noref(skb, ip6_route_input_lookup(net, skb->dev, &fl6, skb, flags)); } +EXPORT_SYMBOL_GPL(ip6_route_input); INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table, @@ -3585,6 +3586,11 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh, struct inet6_dev *idev = NULL; int err; + if (!ipv6_mod_enabled()) { + NL_SET_ERR_MSG(extack, "IPv6 support not enabled in kernel"); + return -EAFNOSUPPORT; + } + fib6_nh->fib_nh_family = AF_INET6; #ifdef CONFIG_IPV6_ROUTER_PREF fib6_nh->last_probe = jiffies; @@ -6826,7 +6832,6 @@ void __init ip6_route_init_special_entries(void) #endif } -#if IS_BUILTIN(CONFIG_IPV6) #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt) @@ -6860,7 +6865,6 @@ static void bpf_iter_unregister(void) bpf_iter_unreg_target(&ipv6_route_reg_info); } #endif -#endif static const struct rtnl_msg_handler ip6_route_rtnl_msg_handlers[] __initconst_or_module = { {.owner = THIS_MODULE, .protocol = PF_INET6, .msgtype = RTM_NEWROUTE, @@ -6921,13 +6925,11 @@ int __init ip6_route_init(void) if (ret) goto out_register_late_subsys; -#if IS_BUILTIN(CONFIG_IPV6) #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) ret = bpf_iter_register(); if (ret) goto out_register_late_subsys; #endif -#endif for_each_possible_cpu(cpu) { struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); @@ -6961,11 +6963,9 @@ out_kmem_cache: void ip6_route_cleanup(void) { -#if IS_BUILTIN(CONFIG_IPV6) #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS) bpf_iter_unregister(); #endif -#endif unregister_netdevice_notifier(&ip6_route_dev_notifier); unregister_pernet_subsys(&ip6_route_net_late_ops); fib6_rules_cleanup(); diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index d6a0f7df9080..97b50d9b1365 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -50,6 +50,7 @@ static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo) struct seg6_lwt { struct dst_cache cache_input; struct dst_cache cache_output; + struct in6_addr tunsrc; struct seg6_iptunnel_encap tuninfo[]; }; @@ -66,6 +67,7 @@ seg6_encap_lwtunnel(struct lwtunnel_state *lwt) static const struct nla_policy seg6_iptunnel_policy[SEG6_IPTUNNEL_MAX + 1] = { [SEG6_IPTUNNEL_SRH] = { .type = NLA_BINARY }, + [SEG6_IPTUNNEL_SRC] = NLA_POLICY_EXACT_LEN(sizeof(struct in6_addr)), }; static int nla_put_srh(struct sk_buff *skb, int attrtype, @@ -88,23 +90,32 @@ static int nla_put_srh(struct sk_buff *skb, int attrtype, } static void set_tun_src(struct net *net, struct net_device *dev, - struct in6_addr *daddr, struct in6_addr *saddr) + struct in6_addr *daddr, struct in6_addr *saddr, + struct in6_addr *route_tunsrc) { struct seg6_pernet_data *sdata = seg6_pernet(net); struct in6_addr *tun_src; - rcu_read_lock(); - - tun_src = rcu_dereference(sdata->tun_src); - - if (!ipv6_addr_any(tun_src)) { - memcpy(saddr, tun_src, sizeof(struct in6_addr)); + /* Priority order to select tunnel source address: + * 1. per route source address (if configured) + * 2. per network namespace source address (if configured) + * 3. dynamic resolution + */ + if (route_tunsrc && !ipv6_addr_any(route_tunsrc)) { + memcpy(saddr, route_tunsrc, sizeof(struct in6_addr)); } else { - ipv6_dev_get_saddr(net, dev, daddr, IPV6_PREFER_SRC_PUBLIC, - saddr); - } + rcu_read_lock(); + tun_src = rcu_dereference(sdata->tun_src); + + if (!ipv6_addr_any(tun_src)) { + memcpy(saddr, tun_src, sizeof(struct in6_addr)); + } else { + ipv6_dev_get_saddr(net, dev, daddr, + IPV6_PREFER_SRC_PUBLIC, saddr); + } - rcu_read_unlock(); + rcu_read_unlock(); + } } /* Compute flowlabel for outer IPv6 header */ @@ -126,7 +137,8 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb, } static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, - int proto, struct dst_entry *cache_dst) + int proto, struct dst_entry *cache_dst, + struct in6_addr *route_tunsrc) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst_dev(dst); @@ -183,7 +195,7 @@ static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, isrh->nexthdr = proto; hdr->daddr = isrh->segments[isrh->first_segment]; - set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc); #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(isrh)) { @@ -203,14 +215,15 @@ static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) { - return __seg6_do_srh_encap(skb, osrh, proto, NULL); + return __seg6_do_srh_encap(skb, osrh, proto, NULL, NULL); } EXPORT_SYMBOL_GPL(seg6_do_srh_encap); /* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */ static int seg6_do_srh_encap_red(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto, - struct dst_entry *cache_dst) + struct dst_entry *cache_dst, + struct in6_addr *route_tunsrc) { __u8 first_seg = osrh->first_segment; struct dst_entry *dst = skb_dst(skb); @@ -273,7 +286,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb, if (skip_srh) { hdr->nexthdr = proto; - set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc); goto out; } @@ -309,7 +322,7 @@ static int seg6_do_srh_encap_red(struct sk_buff *skb, srcaddr: isrh->nexthdr = proto; - set_tun_src(net, dev, &hdr->daddr, &hdr->saddr); + set_tun_src(net, dev, &hdr->daddr, &hdr->saddr, route_tunsrc); #ifdef CONFIG_IPV6_SEG6_HMAC if (unlikely(!skip_srh && sr_has_hmac(isrh))) { @@ -384,9 +397,11 @@ static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst) { struct dst_entry *dst = skb_dst(skb); struct seg6_iptunnel_encap *tinfo; + struct seg6_lwt *slwt; int proto, err = 0; - tinfo = seg6_encap_lwtunnel(dst->lwtstate); + slwt = seg6_lwt_lwtunnel(dst->lwtstate); + tinfo = slwt->tuninfo; switch (tinfo->mode) { case SEG6_IPTUN_MODE_INLINE: @@ -411,11 +426,11 @@ static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst) return -EINVAL; if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP) - err = __seg6_do_srh_encap(skb, tinfo->srh, - proto, cache_dst); + err = __seg6_do_srh_encap(skb, tinfo->srh, proto, + cache_dst, &slwt->tunsrc); else - err = seg6_do_srh_encap_red(skb, tinfo->srh, - proto, cache_dst); + err = seg6_do_srh_encap_red(skb, tinfo->srh, proto, + cache_dst, &slwt->tunsrc); if (err) return err; @@ -437,12 +452,12 @@ static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst) if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP) err = __seg6_do_srh_encap(skb, tinfo->srh, - IPPROTO_ETHERNET, - cache_dst); + IPPROTO_ETHERNET, cache_dst, + &slwt->tunsrc); else err = seg6_do_srh_encap_red(skb, tinfo->srh, - IPPROTO_ETHERNET, - cache_dst); + IPPROTO_ETHERNET, cache_dst, + &slwt->tunsrc); if (err) return err; @@ -679,6 +694,10 @@ static int seg6_build_state(struct net *net, struct nlattr *nla, if (family != AF_INET6) return -EINVAL; + if (tb[SEG6_IPTUNNEL_SRC]) { + NL_SET_ERR_MSG(extack, "incompatible mode for tunsrc"); + return -EINVAL; + } break; case SEG6_IPTUN_MODE_ENCAP: break; @@ -712,6 +731,18 @@ static int seg6_build_state(struct net *net, struct nlattr *nla, memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); + if (tb[SEG6_IPTUNNEL_SRC]) { + slwt->tunsrc = nla_get_in6_addr(tb[SEG6_IPTUNNEL_SRC]); + + if (ipv6_addr_any(&slwt->tunsrc) || + ipv6_addr_is_multicast(&slwt->tunsrc) || + ipv6_addr_loopback(&slwt->tunsrc)) { + NL_SET_ERR_MSG(extack, "invalid tunsrc address"); + err = -EINVAL; + goto err_destroy_output; + } + } + newts->type = LWTUNNEL_ENCAP_SEG6; newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; @@ -724,6 +755,8 @@ static int seg6_build_state(struct net *net, struct nlattr *nla, return 0; +err_destroy_output: + dst_cache_destroy(&slwt->cache_output); err_destroy_input: dst_cache_destroy(&slwt->cache_input); err_free_newts: @@ -743,29 +776,46 @@ static int seg6_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwtstate); if (nla_put_srh(skb, SEG6_IPTUNNEL_SRH, tuninfo)) return -EMSGSIZE; + if (!ipv6_addr_any(&slwt->tunsrc) && + nla_put_in6_addr(skb, SEG6_IPTUNNEL_SRC, &slwt->tunsrc)) + return -EMSGSIZE; + return 0; } static int seg6_encap_nlsize(struct lwtunnel_state *lwtstate) { struct seg6_iptunnel_encap *tuninfo = seg6_encap_lwtunnel(lwtstate); + struct seg6_lwt *slwt = seg6_lwt_lwtunnel(lwtstate); + int nlsize; + + nlsize = nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); + + if (!ipv6_addr_any(&slwt->tunsrc)) + nlsize += nla_total_size(sizeof(slwt->tunsrc)); - return nla_total_size(SEG6_IPTUN_ENCAP_SIZE(tuninfo)); + return nlsize; } static int seg6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) { struct seg6_iptunnel_encap *a_hdr = seg6_encap_lwtunnel(a); struct seg6_iptunnel_encap *b_hdr = seg6_encap_lwtunnel(b); + struct seg6_lwt *a_slwt = seg6_lwt_lwtunnel(a); + struct seg6_lwt *b_slwt = seg6_lwt_lwtunnel(b); int len = SEG6_IPTUN_ENCAP_SIZE(a_hdr); if (len != SEG6_IPTUN_ENCAP_SIZE(b_hdr)) return 1; + if (!ipv6_addr_equal(&a_slwt->tunsrc, &b_slwt->tunsrc)) + return 1; + return memcmp(a_hdr, b_hdr, len); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 6a7b8abb0477..201347b4e127 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -21,6 +21,7 @@ #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> +#include <linux/string.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> @@ -256,9 +257,9 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, if (parms->name[0]) { if (!dev_valid_name(parms->name)) goto failed; - strscpy(name, parms->name, IFNAMSIZ); + strscpy(name, parms->name); } else { - strcpy(name, "sit%d"); + strscpy(name, "sit%d"); } dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ipip6_tunnel_setup); @@ -275,7 +276,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, goto failed_free; if (!parms->name[0]) - strcpy(parms->name, dev->name); + strscpy(parms->name, dev->name); return nt; @@ -308,7 +309,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __u struct ip_tunnel_prl kprl, *kp; struct ip_tunnel_prl_entry *prl; unsigned int cmax, c = 0, ca, len; - int ret = 0; + int ret; if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) return -EINVAL; @@ -1442,7 +1443,7 @@ static int ipip6_tunnel_init(struct net_device *dev) int err; tunnel->dev = dev; - strcpy(tunnel->parms.name, dev->name); + strscpy(tunnel->parms.name, dev->name); ipip6_tunnel_bind_dev(dev); @@ -1863,7 +1864,7 @@ static int __net_init sit_init_net(struct net *net) ipip6_tunnel_clone_6rd(sitn->fb_tunnel_dev, sitn); ipip6_fb_tunnel_init(sitn->fb_tunnel_dev); - strcpy(t->parms.name, sitn->fb_tunnel_dev->name); + strscpy(t->parms.name, sitn->fb_tunnel_dev->name); return 0; err_reg_dev: diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 823bf4fff963..2c3f7a739709 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -105,7 +105,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) } } -static union tcp_seq_and_ts_off +INDIRECT_CALLABLE_SCOPE union tcp_seq_and_ts_off tcp_v6_init_seq_and_ts_off(const struct net *net, const struct sk_buff *skb) { return secure_tcpv6_seq_and_ts_off(net, @@ -325,7 +325,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr_unsized *uaddr, inet->inet_dport); if (!tp->write_seq) WRITE_ONCE(tp->write_seq, st.seq); - tp->tsoffset = st.ts_off; + WRITE_ONCE(tp->tsoffset, st.ts_off); } if (tcp_fastopen_defer_connect(sk, &err)) @@ -348,6 +348,21 @@ failure: return err; } +static struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) +{ + struct flowi6 *fl6 = &inet_sk(sk)->cork.fl.u.ip6; + struct dst_entry *dst; + + dst = inet6_csk_route_socket(sk, fl6); + + if (IS_ERR(dst)) + return NULL; + dst->ops->update_pmtu(dst, sk, NULL, mtu, true); + + dst = inet6_csk_route_socket(sk, fl6); + return IS_ERR(dst) ? NULL : dst; +} + static void tcp_v6_mtu_reduced(struct sock *sk) { struct dst_entry *dst; @@ -1581,7 +1596,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) sock_rps_save_rxhash(sk, skb); sk_mark_napi_id(sk, skb); - if (dst) { + if (dst && unlikely(dst != skb_dst(skb))) { if (sk->sk_rx_dst_ifindex != skb->skb_iif || INDIRECT_CALL_1(dst->ops->check, ip6_dst_check, dst, sk->sk_rx_dst_cookie) == NULL) { @@ -1779,7 +1794,8 @@ lookup: } refcounted = true; nsk = NULL; - if (!tcp_filter(sk, skb, &drop_reason)) { + drop_reason = tcp_filter(sk, skb); + if (!drop_reason) { th = (const struct tcphdr *)skb->data; hdr = ipv6_hdr(skb); tcp_v6_fill_cb(skb, hdr, th); @@ -1840,7 +1856,8 @@ process: nf_reset_ct(skb); - if (tcp_filter(sk, skb, &drop_reason)) + drop_reason = tcp_filter(sk, skb); + if (drop_reason) goto discard_and_relse; th = (const struct tcphdr *)skb->data; @@ -1862,7 +1879,8 @@ process: if (!sock_owned_by_user(sk)) { ret = tcp_v6_do_rcv(sk, skb); } else { - if (tcp_add_backlog(sk, skb, &drop_reason)) + drop_reason = tcp_add_backlog(sk, skb); + if (drop_reason) goto discard_and_relse; } bh_unlock_sock(sk); @@ -1957,56 +1975,12 @@ do_time_wait: goto discard_it; } -void tcp_v6_early_demux(struct sk_buff *skb) -{ - struct net *net = dev_net_rcu(skb->dev); - const struct ipv6hdr *hdr; - const struct tcphdr *th; - struct sock *sk; - - if (skb->pkt_type != PACKET_HOST) - return; - - if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) - return; - - hdr = ipv6_hdr(skb); - th = tcp_hdr(skb); - - if (th->doff < sizeof(struct tcphdr) / 4) - return; - - /* Note : We use inet6_iif() here, not tcp_v6_iif() */ - sk = __inet6_lookup_established(net, &hdr->saddr, th->source, - &hdr->daddr, ntohs(th->dest), - inet6_iif(skb), inet6_sdif(skb)); - if (sk) { - skb->sk = sk; - skb->destructor = sock_edemux; - if (sk_fullsock(sk)) { - struct dst_entry *dst = rcu_dereference(sk->sk_rx_dst); - - if (dst) - dst = dst_check(dst, sk->sk_rx_dst_cookie); - if (dst && - sk->sk_rx_dst_ifindex == skb->skb_iif) - skb_dst_set_noref(skb, dst); - } - } -} - static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), }; -INDIRECT_CALLABLE_SCOPE void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb) -{ - __tcp_v6_send_check(skb, &sk->sk_v6_rcv_saddr, &sk->sk_v6_daddr); -} - const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, - .send_check = tcp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, .sk_rx_dst_set = inet6_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, @@ -2038,7 +2012,6 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { */ static const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, - .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 010b909275dd..15e032194ecc 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -37,6 +37,7 @@ #include <trace/events/udp.h> #include <net/addrconf.h> +#include <net/aligned_data.h> #include <net/ndisc.h> #include <net/protocol.h> #include <net/transp_v6.h> @@ -57,7 +58,6 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <trace/events/skb.h> -#include "udp_impl.h" static void udpv6_destruct_sock(struct sock *sk) { @@ -65,7 +65,7 @@ static void udpv6_destruct_sock(struct sock *sk) inet6_sock_destruct(sk); } -int udpv6_init_sock(struct sock *sk) +static int udpv6_init_sock(struct sock *sk) { int res = udp_lib_init_sock(sk); @@ -95,7 +95,7 @@ u32 udp6_ehashfn(const struct net *net, udp6_ehash_secret + net_hash_mix(net)); } -int udp_v6_get_port(struct sock *sk, unsigned short snum) +static int udp_v6_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = ipv6_portaddr_hash(sock_net(sk), &in6addr_any, snum); @@ -107,7 +107,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, hash2_nulladdr); } -void udp_v6_rehash(struct sock *sk) +static void udp_v6_rehash(struct sock *sk) { u16 new_hash = ipv6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, @@ -127,10 +127,11 @@ void udp_v6_rehash(struct sock *sk) udp_lib_rehash(sk, new_hash, new_hash4); } -static int compute_score(struct sock *sk, const struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, unsigned short hnum, - int dif, int sdif) +static __always_inline int +compute_score(struct sock *sk, const struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, unsigned short hnum, + int dif, int sdif) { int bound_dev_if, score; struct inet_sock *inet; @@ -260,8 +261,8 @@ rescore: continue; /* compute_score is too long of a function to be - * inlined, and calling it again here yields - * measurable overhead for some + * inlined twice here, and calling it uninlined + * here yields measurable overhead for some * workloads. Work around it by jumping * backwards to rescore 'result'. */ @@ -344,9 +345,9 @@ static void udp6_hash4(struct sock *sk) struct sock *__udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, __be16 dport, - int dif, int sdif, struct udp_table *udptable, - struct sk_buff *skb) + int dif, int sdif, struct sk_buff *skb) { + struct udp_table *udptable = net->ipv4.udp_table; unsigned short hnum = ntohs(dport); struct udp_hslot *hslot2; struct sock *result, *sk; @@ -370,8 +371,7 @@ struct sock *__udp6_lib_lookup(const struct net *net, goto done; /* Lookup redirect from BPF */ - if (static_branch_unlikely(&bpf_sk_lookup_enabled) && - udptable == net->ipv4.udp_table) { + if (static_branch_unlikely(&bpf_sk_lookup_enabled)) { sk = inet6_lookup_run_sk_lookup(net, IPPROTO_UDP, skb, sizeof(struct udphdr), saddr, sport, daddr, hnum, dif, udp6_ehashfn); @@ -407,14 +407,13 @@ done: EXPORT_SYMBOL_GPL(__udp6_lib_lookup); static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, - __be16 sport, __be16 dport, - struct udp_table *udptable) + __be16 sport, __be16 dport) { const struct ipv6hdr *iph = ipv6_hdr(skb); return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), - inet6_sdif(skb), udptable, skb); + inet6_sdif(skb), skb); } struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, @@ -422,14 +421,12 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb, { const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation]; const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset); - struct net *net = dev_net(skb->dev); int iif, sdif; inet6_get_iif_sdif(skb, &iif, &sdif); - return __udp6_lib_lookup(net, &iph->saddr, sport, - &iph->daddr, dport, iif, - sdif, net->ipv4.udp_table, NULL); + return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, + &iph->daddr, dport, iif, sdif, NULL); } /* Must be called under rcu_read_lock(). @@ -441,8 +438,7 @@ struct sock *udp6_lib_lookup(const struct net *net, const struct in6_addr *saddr { struct sock *sk; - sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport, - dif, 0, net->ipv4.udp_table, NULL); + sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, 0, NULL); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; @@ -464,24 +460,23 @@ static int udp6_skb_len(struct sk_buff *skb) * return it, otherwise we block. */ +INDIRECT_CALLABLE_SCOPE int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { + int off, is_udp4, err, peeking = flags & MSG_PEEK; struct ipv6_pinfo *np = inet6_sk(sk); struct inet_sock *inet = inet_sk(sk); - struct sk_buff *skb; - unsigned int ulen, copied; - int off, err, peeking = flags & MSG_PEEK; - int is_udplite = IS_UDPLITE(sk); struct udp_mib __percpu *mib; bool checksum_valid = false; - int is_udp4; + unsigned int ulen, copied; + struct sk_buff *skb; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len, addr_len); + return ipv6_recv_error(sk, msg, len); if (np->rxopt.bits.rxpmtu && READ_ONCE(np->rxpmtu)) - return ipv6_recv_rxpmtu(sk, msg, len, addr_len); + return ipv6_recv_rxpmtu(sk, msg, len); try_again: off = sk_peek_offset(sk, flags); @@ -499,14 +494,10 @@ try_again: is_udp4 = (skb->protocol == htons(ETH_P_IP)); mib = __UDPX_MIB(sk, is_udp4); - /* - * If checksum is needed at all, try to do it while copying the - * data. If the data is truncated, or if we only want a partial - * coverage checksum (UDP-Lite), do it before the copy. + /* If checksum is needed at all, try to do it while copying the + * data. If the data is truncated, do it before the copy. */ - - if (copied < ulen || peeking || - (is_udplite && UDP_SKB_CB(skb)->partial_cov)) { + if (copied < ulen || peeking) { checksum_valid = udp_skb_csum_unnecessary(skb) || !__udp_lib_checksum_complete(skb); if (!checksum_valid) @@ -553,11 +544,11 @@ try_again: ipv6_iface_scope_id(&sin6->sin6_addr, inet6_iif(skb)); } - *addr_len = sizeof(*sin6); + msg->msg_namelen = sizeof(*sin6); BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, (struct sockaddr *)sin6, - addr_len); + &msg->msg_namelen); } if (udp_test_bit(GRO_ENABLED, sk)) @@ -648,7 +639,6 @@ static int __udp6_lib_err_encap_no_sk(struct sk_buff *skb, static struct sock *__udp6_lib_err_encap(struct net *net, const struct ipv6hdr *hdr, int offset, struct udphdr *uh, - struct udp_table *udptable, struct sock *sk, struct sk_buff *skb, struct inet6_skb_parm *opt, @@ -679,7 +669,7 @@ static struct sock *__udp6_lib_err_encap(struct net *net, sk = __udp6_lib_lookup(net, &hdr->daddr, uh->source, &hdr->saddr, uh->dest, - inet6_iif(skb), 0, udptable, skb); + inet6_iif(skb), 0, skb); if (sk) { up = udp_sk(sk); @@ -700,29 +690,28 @@ out: return sk; } -int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info, - struct udp_table *udptable) +static int udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) { - struct ipv6_pinfo *np; const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; - const struct in6_addr *saddr = &hdr->saddr; - const struct in6_addr *daddr = seg6_get_daddr(skb, opt) ? : &hdr->daddr; - struct udphdr *uh = (struct udphdr *)(skb->data+offset); + struct udphdr *uh = (struct udphdr *)(skb->data + offset); + const struct in6_addr *saddr, *daddr; + struct net *net = dev_net(skb->dev); + struct ipv6_pinfo *np; bool tunnel = false; struct sock *sk; int harderr; int err; - struct net *net = dev_net(skb->dev); + daddr = seg6_get_daddr(skb, opt) ? : &hdr->daddr; + saddr = &hdr->saddr; sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, - inet6_iif(skb), inet6_sdif(skb), udptable, NULL); + inet6_iif(skb), inet6_sdif(skb), NULL); if (!sk || READ_ONCE(udp_sk(sk)->encap_type)) { /* No socket for error: try tunnels before discarding */ if (static_branch_unlikely(&udpv6_encap_needed_key)) { - sk = __udp6_lib_err_encap(net, hdr, offset, uh, - udptable, sk, skb, + sk = __udp6_lib_err_encap(net, hdr, offset, uh, sk, skb, opt, type, code, info); if (!sk) return 0; @@ -794,20 +783,18 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { - int is_udplite = IS_UDPLITE(sk); enum skb_drop_reason drop_reason; + struct net *net = sock_net(sk); /* Note that an ENOMEM error is charged twice */ if (rc == -ENOMEM) { - UDP6_INC_STATS(sock_net(sk), - UDP_MIB_RCVBUFERRORS, is_udplite); + UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS); drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF; } else { - UDP6_INC_STATS(sock_net(sk), - UDP_MIB_MEMERRORS, is_udplite); + UDP6_INC_STATS(net, UDP_MIB_MEMERRORS); drop_reason = SKB_DROP_REASON_PROTO_MEM; } - UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + UDP6_INC_STATS(net, UDP_MIB_INERRORS); trace_udp_fail_queue_rcv_skb(rc, sk, skb); sk_skb_reason_drop(sk, skb, drop_reason); return -1; @@ -816,19 +803,11 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) return 0; } -static __inline__ int udpv6_err(struct sk_buff *skb, - struct inet6_skb_parm *opt, u8 type, - u8 code, int offset, __be32 info) -{ - return __udp6_lib_err(skb, opt, type, code, offset, info, - dev_net(skb->dev)->ipv4.udp_table); -} - static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; struct udp_sock *up = udp_sk(sk); - int is_udplite = IS_UDPLITE(sk); + struct net *net = sock_net(sk); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { drop_reason = SKB_DROP_REASON_XFRM_POLICY; @@ -862,9 +841,7 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) ret = encap_rcv(sk, skb); if (ret <= 0) { - __UDP6_INC_STATS(sock_net(sk), - UDP_MIB_INDATAGRAMS, - is_udplite); + __UDP6_INC_STATS(net, UDP_MIB_INDATAGRAMS); return -ret; } } @@ -872,31 +849,13 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* FALLTHROUGH -- it's a UDP Packet */ } - /* - * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). - */ - if (unlikely(udp_test_bit(UDPLITE_RECV_CC, sk) && - UDP_SKB_CB(skb)->partial_cov)) { - u16 pcrlen = READ_ONCE(up->pcrlen); - - if (pcrlen == 0) { /* full coverage was set */ - net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", - UDP_SKB_CB(skb)->cscov, skb->len); - goto drop; - } - if (UDP_SKB_CB(skb)->cscov < pcrlen) { - net_dbg_ratelimited("UDPLITE6: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, pcrlen); - goto drop; - } - } - prefetch(&sk->sk_rmem_alloc); if (rcu_access_pointer(sk->sk_filter) && udp_lib_checksum_complete(skb)) goto csum_error; - if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr), &drop_reason)) + drop_reason = sk_filter_trim_cap(sk, skb, sizeof(struct udphdr)); + if (drop_reason) goto drop; udp_csum_pull_header(skb); @@ -907,9 +866,9 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) csum_error: drop_reason = SKB_DROP_REASON_UDP_CSUM; - __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); + __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS); drop: - __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); + __UDP6_INC_STATS(net, UDP_MIB_INERRORS); udp_drops_inc(sk); sk_skb_reason_drop(sk, skb, drop_reason); return -1; @@ -976,19 +935,26 @@ static void udp6_csum_zero_error(struct sk_buff *skb) * so we don't need to lock the hashes. */ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, - const struct in6_addr *saddr, const struct in6_addr *daddr, - struct udp_table *udptable, int proto) + const struct in6_addr *saddr, + const struct in6_addr *daddr) { - struct sock *sk, *first = NULL; + struct udp_table *udptable = net->ipv4.udp_table; const struct udphdr *uh = udp_hdr(skb); + unsigned int hash2, hash2_any, offset; unsigned short hnum = ntohs(uh->dest); - struct udp_hslot *hslot = udp_hashslot(udptable, net, hnum); - unsigned int offset = offsetof(typeof(*sk), sk_node); - unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); - int dif = inet6_iif(skb); + struct sock *sk, *first = NULL; int sdif = inet6_sdif(skb); + int dif = inet6_iif(skb); struct hlist_node *node; + struct udp_hslot *hslot; struct sk_buff *nskb; + bool use_hash2; + + hash2_any = 0; + hash2 = 0; + hslot = udp_hashslot(udptable, net, hnum); + use_hash2 = hslot->count > 10; + offset = offsetof(typeof(*sk), sk_node); if (use_hash2) { hash2_any = ipv6_portaddr_hash(net, &in6addr_any, hnum) & @@ -1016,10 +982,8 @@ start_lookup: nskb = skb_clone(skb, GFP_ATOMIC); if (unlikely(!nskb)) { udp_drops_inc(sk); - __UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS, - IS_UDPLITE(sk)); - __UDP6_INC_STATS(net, UDP_MIB_INERRORS, - IS_UDPLITE(sk)); + __UDP6_INC_STATS(net, UDP_MIB_RCVBUFERRORS); + __UDP6_INC_STATS(net, UDP_MIB_INERRORS); continue; } @@ -1038,8 +1002,7 @@ start_lookup: consume_skb(skb); } else { kfree_skb(skb); - __UDP6_INC_STATS(net, UDP_MIB_IGNOREDMULTI, - proto == IPPROTO_UDPLITE); + __UDP6_INC_STATS(net, UDP_MIB_IGNOREDMULTI); } return 0; } @@ -1058,7 +1021,7 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, { int ret; - if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk)) + if (inet_get_convert_csum(sk) && uh->check) skb_checksum_try_convert(skb, IPPROTO_UDP, ip6_compute_pseudo); ret = udpv6_queue_rcv_skb(sk, skb); @@ -1069,8 +1032,39 @@ static int udp6_unicast_rcv_skb(struct sock *sk, struct sk_buff *skb, return 0; } -int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, - int proto) +static int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh) +{ + int err; + + /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels) + * we accept a checksum of zero here. When we find the socket + * for the UDP packet we'll check if that socket allows zero checksum + * for IPv6 (set by socket option). + * + * Note, we are only interested in != 0 or == 0, thus the + * force to int. + */ + err = (__force int)skb_checksum_init_zero_check(skb, IPPROTO_UDP, uh->check, + ip6_compute_pseudo); + if (err) + return err; + + if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) { + /* If SW calculated the value, we know it's bad */ + if (skb->csum_complete_sw) + return 1; + + /* HW says the value is bad. Let's validate that. + * skb->csum is no longer the full packet checksum, + * so don't treat is as such. + */ + skb_checksum_complete_unset(skb); + } + + return 0; +} + +INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb) { enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED; const struct in6_addr *saddr, *daddr; @@ -1091,26 +1085,23 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (ulen > skb->len) goto short_packet; - if (proto == IPPROTO_UDP) { - /* UDP validates ulen. */ + /* Check for jumbo payload */ + if (ulen == 0) + ulen = skb->len; - /* Check for jumbo payload */ - if (ulen == 0) - ulen = skb->len; + if (ulen < sizeof(*uh)) + goto short_packet; - if (ulen < sizeof(*uh)) + if (ulen < skb->len) { + if (pskb_trim_rcsum(skb, ulen)) goto short_packet; - if (ulen < skb->len) { - if (pskb_trim_rcsum(skb, ulen)) - goto short_packet; - saddr = &ipv6_hdr(skb)->saddr; - daddr = &ipv6_hdr(skb)->daddr; - uh = udp_hdr(skb); - } + saddr = &ipv6_hdr(skb)->saddr; + daddr = &ipv6_hdr(skb)->daddr; + uh = udp_hdr(skb); } - if (udp6_csum_init(skb, uh, proto)) + if (udp6_csum_init(skb, uh)) goto csum_error; /* Check if the socket is already available, e.g. due to early demux */ @@ -1142,11 +1133,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, * Multicast receive code */ if (ipv6_addr_is_multicast(daddr)) - return __udp6_lib_mcast_deliver(net, skb, - saddr, daddr, udptable, proto); + return __udp6_lib_mcast_deliver(net, skb, saddr, daddr); /* Unicast */ - sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); + sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest); if (sk) { if (!uh->check && !udp_get_no_check6_rx(sk)) goto report_csum_error; @@ -1165,7 +1155,7 @@ no_sk: if (udp_lib_checksum_complete(skb)) goto csum_error; - __UDP6_INC_STATS(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + __UDP6_INC_STATS(net, UDP_MIB_NOPORTS); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); sk_skb_reason_drop(sk, skb, reason); @@ -1174,8 +1164,7 @@ no_sk: short_packet: if (reason == SKB_DROP_REASON_NOT_SPECIFIED) reason = SKB_DROP_REASON_PKT_TOO_SMALL; - net_dbg_ratelimited("UDP%sv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", - proto == IPPROTO_UDPLITE ? "-Lite" : "", + net_dbg_ratelimited("UDPv6: short packet: From [%pI6c]:%u %d/%d to [%pI6c]:%u\n", saddr, ntohs(uh->source), ulen, skb->len, daddr, ntohs(uh->dest)); @@ -1186,9 +1175,9 @@ report_csum_error: csum_error: if (reason == SKB_DROP_REASON_NOT_SPECIFIED) reason = SKB_DROP_REASON_UDP_CSUM; - __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS, proto == IPPROTO_UDPLITE); + __UDP6_INC_STATS(net, UDP_MIB_CSUMERRORS); discard: - __UDP6_INC_STATS(net, UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + __UDP6_INC_STATS(net, UDP_MIB_INERRORS); sk_skb_reason_drop(sk, skb, reason); return 0; } @@ -1262,11 +1251,6 @@ void udp_v6_early_demux(struct sk_buff *skb) } } -INDIRECT_CALLABLE_SCOPE int udpv6_rcv(struct sk_buff *skb) -{ - return __udp6_lib_rcv(skb, dev_net(skb->dev)->ipv4.udp_table, IPPROTO_UDP); -} - /* * Throw away all pending data and cancel the corking. Socket is locked. */ @@ -1371,13 +1355,13 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, struct inet_cork *cork) { struct sock *sk = skb->sk; + int offset, len, datalen; struct udphdr *uh; int err = 0; - int is_udplite = IS_UDPLITE(sk); - __wsum csum = 0; - int offset = skb_transport_offset(skb); - int len = skb->len - offset; - int datalen = len - sizeof(*uh); + + offset = skb_transport_offset(skb); + len = skb->len - offset; + datalen = len - sizeof(*uh); /* * Create a UDP header @@ -1404,7 +1388,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (is_udplite || dst_xfrm(skb_dst(skb))) { + if (dst_xfrm(skb_dst(skb))) { kfree_skb(skb); return -EIO; } @@ -1420,21 +1404,18 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, } } - if (is_udplite) - csum = udplite_csum(skb); - else if (udp_get_no_check6_tx(sk)) { /* UDP csum disabled */ + if (udp_get_no_check6_tx(sk)) { /* UDP csum disabled */ skb->ip_summed = CHECKSUM_NONE; goto send; } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ csum_partial: udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, len); goto send; - } else - csum = udp_csum(skb); + } /* add protocol-dependent pseudo-header */ uh->check = csum_ipv6_magic(&fl6->saddr, &fl6->daddr, - len, fl6->flowi6_proto, csum); + len, IPPROTO_UDP, udp_csum(skb)); if (uh->check == 0) uh->check = CSUM_MANGLED_0; @@ -1442,13 +1423,11 @@ send: err = ip6_send_skb(skb); if (unlikely(err)) { if (err == -ENOBUFS && !inet6_test_bit(RECVERR6, sk)) { - UDP6_INC_STATS(sock_net(sk), - UDP_MIB_SNDBUFERRORS, is_udplite); + UDP6_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS); err = 0; } } else { - UDP6_INC_STATS(sock_net(sk), - UDP_MIB_OUTDATAGRAMS, is_udplite); + UDP6_INC_STATS(sock_net(sk), UDP_MIB_OUTDATAGRAMS); } return err; } @@ -1476,27 +1455,26 @@ out: int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { - struct ipv6_txoptions opt_space; - struct udp_sock *up = udp_sk(sk); - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); + int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); - struct in6_addr *daddr, *final_p, final; - struct ipv6_txoptions *opt = NULL; struct ipv6_txoptions *opt_to_free = NULL; + struct in6_addr *daddr, *final_p, final; struct ip6_flowlabel *flowlabel = NULL; + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct ipv6_txoptions *opt = NULL; + struct udp_sock *up = udp_sk(sk); + struct ipv6_txoptions opt_space; + int addr_len = msg->msg_namelen; struct inet_cork_full cork; - struct flowi6 *fl6 = &cork.fl.u.ip6; - struct dst_entry *dst; struct ipcm6_cookie ipc6; - int addr_len = msg->msg_namelen; bool connected = false; + struct dst_entry *dst; + struct flowi6 *fl6; int ulen = len; - int corkreq = udp_test_bit(CORK, sk) || msg->msg_flags & MSG_MORE; int err; - int is_udplite = IS_UDPLITE(sk); - int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); + fl6 = &cork.fl.u.ip6; ipcm6_init_sk(&ipc6, sk); ipc6.gso_size = READ_ONCE(up->gso_size); @@ -1555,7 +1533,6 @@ do_udp_sendmsg: if (len > INT_MAX - sizeof(struct udphdr)) return -EMSGSIZE; - getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; if (READ_ONCE(up->pending)) { if (READ_ONCE(up->pending) == AF_INET) return udp_sendmsg(sk, msg, len); @@ -1657,7 +1634,7 @@ do_udp_sendmsg: opt = ipv6_fixup_options(&opt_space, opt); ipc6.opt = opt; - fl6->flowi6_proto = sk->sk_protocol; + fl6->flowi6_proto = IPPROTO_UDP; fl6->flowi6_mark = ipc6.sockc.mark; fl6->daddr = *daddr; if (ipv6_addr_any(&fl6->saddr) && !ipv6_addr_any(&np->saddr)) @@ -1724,7 +1701,7 @@ back_from_confirm: if (!corkreq) { struct sk_buff *skb; - skb = ip6_make_skb(sk, getfrag, msg, ulen, + skb = ip6_make_skb(sk, ip_generic_getfrag, msg, ulen, sizeof(struct udphdr), &ipc6, dst_rt6_info(dst), msg->msg_flags, &cork); @@ -1750,8 +1727,9 @@ back_from_confirm: do_append_data: up->len += ulen; - err = ip6_append_data(sk, getfrag, msg, ulen, sizeof(struct udphdr), - &ipc6, fl6, dst_rt6_info(dst), + err = ip6_append_data(sk, ip_generic_getfrag, msg, ulen, + sizeof(struct udphdr), &ipc6, fl6, + dst_rt6_info(dst), corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); if (err) udp_v6_flush_pending_frames(sk); @@ -1778,10 +1756,9 @@ out_no_dst: * things). We could add another new stat but at least for now that * seems like overkill. */ - if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP6_INC_STATS(sock_net(sk), - UDP_MIB_SNDBUFERRORS, is_udplite); - } + if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) + UDP6_INC_STATS(sock_net(sk), UDP_MIB_SNDBUFERRORS); + return err; do_confirm: @@ -1808,7 +1785,7 @@ static void udpv6_splice_eof(struct socket *sock) release_sock(sk); } -void udpv6_destroy_sock(struct sock *sk) +static void udpv6_destroy_sock(struct sock *sk) { struct udp_sock *up = udp_sk(sk); lock_sock(sk); @@ -1836,20 +1813,20 @@ void udpv6_destroy_sock(struct sock *sk) /* * Socket option code for UDP */ -int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, - unsigned int optlen) +static int udpv6_setsockopt(struct sock *sk, int level, int optname, + sockptr_t optval, unsigned int optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE || level == SOL_SOCKET) + if (level == SOL_UDP || level == SOL_SOCKET) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_v6_push_pending_frames); return ipv6_setsockopt(sk, level, optname, optval, optlen); } -int udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) +static int udpv6_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) { - if (level == SOL_UDP || level == SOL_UDPLITE) + if (level == SOL_UDP) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return ipv6_getsockopt(sk, level, optname, optval, optlen); } @@ -1857,7 +1834,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS -int udp6_seq_show(struct seq_file *seq, void *v) +static int udp6_seq_show(struct seq_file *seq, void *v) { if (v == SEQ_START_TOKEN) { seq_puts(seq, IPV6_SEQ_DGRAM_HEADER); @@ -1872,17 +1849,15 @@ int udp6_seq_show(struct seq_file *seq, void *v) return 0; } -const struct seq_operations udp6_seq_ops = { +static const struct seq_operations udp6_seq_ops = { .start = udp_seq_start, .next = udp_seq_next, .stop = udp_seq_stop, .show = udp6_seq_show, }; -EXPORT_SYMBOL(udp6_seq_ops); static struct udp_seq_afinfo udp6_seq_afinfo = { .family = AF_INET6, - .udp_table = NULL, }; int __net_init udp6_proc_init(struct net *net) @@ -1934,7 +1909,6 @@ struct proto udpv6_prot = { .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), .obj_size = sizeof(struct udp6_sock), .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6), - .h.udp_table = NULL, .diag_destroy = udp_abort, }; diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h deleted file mode 100644 index 8a406be25a3a..000000000000 --- a/net/ipv6/udp_impl.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _UDP6_IMPL_H -#define _UDP6_IMPL_H -#include <net/aligned_data.h> -#include <net/udp.h> -#include <net/udplite.h> -#include <net/protocol.h> -#include <net/addrconf.h> -#include <net/inet_common.h> -#include <net/transp_v6.h> - -int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int); -int __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int, - __be32, struct udp_table *); - -int udpv6_init_sock(struct sock *sk); -int udp_v6_get_port(struct sock *sk, unsigned short snum); -void udp_v6_rehash(struct sock *sk); - -int udpv6_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen); -int udpv6_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, - unsigned int optlen); -int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); -int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags, - int *addr_len); -void udpv6_destroy_sock(struct sock *sk); - -#ifdef CONFIG_PROC_FS -int udp6_seq_show(struct seq_file *seq, void *v); -#endif -#endif /* _UDP6_IMPL_H */ diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index e003b8494dc0..778afc7453ce 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -128,8 +128,7 @@ static struct sock *udp6_gro_lookup_skb(struct sk_buff *skb, __be16 sport, inet6_get_iif_sdif(skb, &iif, &sdif); return __udp6_lib_lookup(net, &iph->saddr, sport, - &iph->daddr, dport, iif, - sdif, net->ipv4.udp_table, NULL); + &iph->daddr, dport, iif, sdif, NULL); } struct sk_buff *udp6_gro_receive(struct list_head *head, struct sk_buff *skb) diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c deleted file mode 100644 index e867721cda4d..000000000000 --- a/net/ipv6/udplite.c +++ /dev/null @@ -1,139 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * UDPLITEv6 An implementation of the UDP-Lite protocol over IPv6. - * See also net/ipv4/udplite.c - * - * Authors: Gerrit Renker <gerrit@erg.abdn.ac.uk> - * - * Changes: - * Fixes: - */ -#define pr_fmt(fmt) "UDPLite6: " fmt - -#include <linux/export.h> -#include <linux/proc_fs.h> -#include "udp_impl.h" - -static int udplitev6_sk_init(struct sock *sk) -{ - pr_warn_once("UDP-Lite is deprecated and scheduled to be removed in 2025, " - "please contact the netdev mailing list\n"); - return udpv6_init_sock(sk); -} - -static int udplitev6_rcv(struct sk_buff *skb) -{ - return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); -} - -static int udplitev6_err(struct sk_buff *skb, - struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) -{ - return __udp6_lib_err(skb, opt, type, code, offset, info, - &udplite_table); -} - -static const struct inet6_protocol udplitev6_protocol = { - .handler = udplitev6_rcv, - .err_handler = udplitev6_err, - .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, -}; - -struct proto udplitev6_prot = { - .name = "UDPLITEv6", - .owner = THIS_MODULE, - .close = udp_lib_close, - .connect = ip6_datagram_connect, - .disconnect = udp_disconnect, - .ioctl = udp_ioctl, - .init = udplitev6_sk_init, - .destroy = udpv6_destroy_sock, - .setsockopt = udpv6_setsockopt, - .getsockopt = udpv6_getsockopt, - .sendmsg = udpv6_sendmsg, - .recvmsg = udpv6_recvmsg, - .hash = udp_lib_hash, - .unhash = udp_lib_unhash, - .rehash = udp_v6_rehash, - .get_port = udp_v6_get_port, - - .memory_allocated = &net_aligned_data.udp_memory_allocated, - .per_cpu_fw_alloc = &udp_memory_per_cpu_fw_alloc, - - .sysctl_mem = sysctl_udp_mem, - .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_udp_wmem_min), - .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_udp_rmem_min), - .obj_size = sizeof(struct udp6_sock), - .ipv6_pinfo_offset = offsetof(struct udp6_sock, inet6), - .h.udp_table = &udplite_table, -}; - -static struct inet_protosw udplite6_protosw = { - .type = SOCK_DGRAM, - .protocol = IPPROTO_UDPLITE, - .prot = &udplitev6_prot, - .ops = &inet6_dgram_ops, - .flags = INET_PROTOSW_PERMANENT, -}; - -int __init udplitev6_init(void) -{ - int ret; - - ret = inet6_add_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); - if (ret) - goto out; - - ret = inet6_register_protosw(&udplite6_protosw); - if (ret) - goto out_udplitev6_protocol; -out: - return ret; - -out_udplitev6_protocol: - inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); - goto out; -} - -void udplitev6_exit(void) -{ - inet6_unregister_protosw(&udplite6_protosw); - inet6_del_protocol(&udplitev6_protocol, IPPROTO_UDPLITE); -} - -#ifdef CONFIG_PROC_FS -static struct udp_seq_afinfo udplite6_seq_afinfo = { - .family = AF_INET6, - .udp_table = &udplite_table, -}; - -static int __net_init udplite6_proc_init_net(struct net *net) -{ - if (!proc_create_net_data("udplite6", 0444, net->proc_net, - &udp6_seq_ops, sizeof(struct udp_iter_state), - &udplite6_seq_afinfo)) - return -ENOMEM; - return 0; -} - -static void __net_exit udplite6_proc_exit_net(struct net *net) -{ - remove_proc_entry("udplite6", net->proc_net); -} - -static struct pernet_operations udplite6_net_ops = { - .init = udplite6_proc_init_net, - .exit = udplite6_proc_exit_net, -}; - -int __init udplite6_proc_init(void) -{ - return register_pernet_subsys(&udplite6_net_ops); -} - -void udplite6_proc_exit(void) -{ - unregister_pernet_subsys(&udplite6_net_ops); -} -#endif diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 6554d2cffc19..72dfccd4e3d5 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -127,6 +127,8 @@ static inline void low_nmcpy(unsigned char *dst, char *src) * if the socket data len is > 7, the function returns 8. * * Use this function to allocate socket buffers to store iucv message data. + * + * Returns: Length of the IUCV message. */ static inline size_t iucv_msg_length(struct iucv_message *msg) { @@ -145,7 +147,7 @@ static inline size_t iucv_msg_length(struct iucv_message *msg) * @state: first iucv sk state * @state2: second iucv sk state * - * Returns true if the socket in either in the first or second state. + * Returns: true if the socket is either in the first or second state. */ static int iucv_sock_in_state(struct sock *sk, int state, int state2) { @@ -156,9 +158,9 @@ static int iucv_sock_in_state(struct sock *sk, int state, int state2) * iucv_below_msglim() - function to check if messages can be sent * @sk: sock structure * - * Returns true if the send queue length is lower than the message limit. - * Always returns true if the socket is not connected (no iucv path for - * checking the message limit). + * Returns: true, if either the socket is not connected (no iucv path for + * checking the message limit) or if the send queue length is lower + * than the message limit. */ static inline int iucv_below_msglim(struct sock *sk) { @@ -883,7 +885,7 @@ static int iucv_sock_getname(struct socket *sock, struct sockaddr *addr, * list and the socket data len at index 7 (last byte). * See also iucv_msg_length(). * - * Returns the error code from the iucv_message_send() call. + * Returns: the return code from the iucv_message_send() call. */ static int iucv_send_iprm(struct iucv_path *path, struct iucv_message *msg, struct sk_buff *skb) diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c index 6641c49b09ac..e03bc4a74f67 100644 --- a/net/iucv/iucv.c +++ b/net/iucv/iucv.c @@ -687,6 +687,8 @@ __free_cpumask: * * @pathid: path identification number. * @userdata: 16-bytes of user data. + * + * Returns: 0 on success, the result of the CP b2f0 IUCV call. */ static int iucv_sever_pathid(u16 pathid, u8 *userdata) { @@ -1092,6 +1094,8 @@ EXPORT_SYMBOL(iucv_message_purge); * * Internal function used by iucv_message_receive and __iucv_message_receive * to receive RMDATA data stored in struct iucv_message. + * + * Returns: 0 */ static int iucv_message_receive_iprmdata(struct iucv_path *path, struct iucv_message *msg, @@ -1852,6 +1856,8 @@ static enum cpuhp_state iucv_online; /** * iucv_init - Allocates and initializes various data structures. + * + * Returns: 0 on success, return code on failure. */ static int __init iucv_init(void) { diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig index b7856748e960..0de178d5baba 100644 --- a/net/l2tp/Kconfig +++ b/net/l2tp/Kconfig @@ -5,7 +5,6 @@ menuconfig L2TP tristate "Layer Two Tunneling Protocol (L2TP)" - depends on (IPV6 || IPV6=n) depends on INET select NET_UDP_TUNNEL help diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index cf0b66f4fb29..a4956ef9574c 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -14,6 +14,7 @@ #include <linux/in.h> #include <linux/etherdevice.h> #include <linux/spinlock.h> +#include <linux/string.h> #include <net/sock.h> #include <net/ip.h> #include <net/icmp.h> @@ -235,10 +236,10 @@ static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel, int rc; if (cfg->ifname) { - strscpy(name, cfg->ifname, IFNAMSIZ); + strscpy(name, cfg->ifname); name_assign_type = NET_NAME_USER; } else { - strcpy(name, L2TP_ETH_DEV_NAME); + strscpy(name, L2TP_ETH_DEV_NAME); name_assign_type = NET_NAME_ENUM; } diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index cac1ff59cb83..acb2570c11f6 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -537,7 +537,7 @@ no_route: } static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg, - size_t len, int flags, int *addr_len) + size_t len, int flags) { struct inet_sock *inet = inet_sk(sk); size_t copied = 0; @@ -570,7 +570,7 @@ static int l2tp_ip_recvmsg(struct sock *sk, struct msghdr *msg, sin->sin_addr.s_addr = ip_hdr(skb)->saddr; sin->sin_port = 0; memset(&sin->sin_zero, 0, sizeof(sin->sin_zero)); - *addr_len = sizeof(*sin); + msg->msg_namelen = sizeof(*sin); } if (inet_cmsg_flags(inet)) ip_cmsg_recv(msg, skb); diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 05a396ba6a3e..bdaae1b64d25 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -679,7 +679,7 @@ do_confirm: } static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_l2tpip6 *, lsa, msg->msg_name); @@ -691,7 +691,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, goto out; if (flags & MSG_ERRQUEUE) - return ipv6_recv_error(sk, msg, len, addr_len); + return ipv6_recv_error(sk, msg, len); skb = skb_recv_datagram(sk, flags, &err); if (!skb) @@ -719,7 +719,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, lsa->l2tp_conn_id = 0; if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) lsa->l2tp_scope_id = inet6_iif(skb); - *addr_len = sizeof(*lsa); + msg->msg_namelen = sizeof(*lsa); } if (np->rxopt.all) diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index ae4543d5597b..99d6582f41de 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -787,11 +787,12 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr_unsized *userva goto out_no_ppp; } - /* The only header we need to worry about is the L2TP - * header. This size is different depending on whether - * sequence numbers are enabled for the data channel. + /* Reserve enough headroom for the L2TP header with sequence numbers, + * which is the largest possible. This is used by the PPP layer to set + * the net device's hard_header_len at registration, which must be + * sufficient regardless of whether sequence numbers are enabled later. */ - po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; + po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_SEQ; po->chan.private = sk; po->chan.ops = &pppol2tp_chan_ops; @@ -1176,12 +1177,6 @@ static int pppol2tp_session_setsockopt(struct sock *sk, break; } session->send_seq = !!val; - { - struct pppox_sock *po = pppox_sk(sk); - - po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ : - PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; - } l2tp_session_set_header_len(session, session->tunnel->version, session->tunnel->encap); break; diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index b0e392eb7753..20c3135b73ea 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -18,7 +18,6 @@ mac80211-y := \ iface.o \ link.o \ rate.o \ - michael.o \ tkip.o \ aes_cmac.o \ aes_gmac.o \ @@ -36,7 +35,7 @@ mac80211-y := \ tdls.o \ ocb.o \ airtime.o \ - eht.o uhr.o + eht.o uhr.o nan.o mac80211-$(CONFIG_MAC80211_LEDS) += led.o mac80211-$(CONFIG_MAC80211_DEBUGFS) += \ diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index f301a8622bee..0140ac826b23 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2025 Intel Corporation + * Copyright (C) 2018-2026 Intel Corporation */ /** @@ -94,7 +94,8 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, /* check if this is a self generated aggregation halt */ if (initiator == WLAN_BACK_RECIPIENT && tx) ieee80211_send_delba(sta->sdata, sta->sta.addr, - tid, WLAN_BACK_RECIPIENT, reason); + tid, WLAN_BACK_RECIPIENT, reason, + ieee80211_s1g_use_ndp_ba(sta->sdata, sta)); /* * return here in case tid_rx is not assigned - which will happen if @@ -240,6 +241,7 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid, struct sk_buff *skb; struct ieee80211_mgmt *mgmt; bool amsdu = ieee80211_hw_check(&local->hw, SUPPORTS_AMSDU_IN_AMPDU); + bool use_ndp = ieee80211_s1g_use_ndp_ba(sdata, sta); u16 capab; skb = dev_alloc_skb(sizeof(*mgmt) + @@ -251,19 +253,21 @@ static void ieee80211_send_addba_resp(struct sta_info *sta, u8 *da, u16 tid, skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = ieee80211_mgmt_ba(skb, da, sdata); - skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_resp)); + skb_put(skb, 2 + sizeof(mgmt->u.action.addba_resp)); mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.u.addba_resp.action_code = WLAN_ACTION_ADDBA_RESP; - mgmt->u.action.u.addba_resp.dialog_token = dialog_token; + mgmt->u.action.action_code = use_ndp ? + WLAN_ACTION_NDP_ADDBA_RESP : WLAN_ACTION_ADDBA_RESP; + + mgmt->u.action.addba_resp.dialog_token = dialog_token; capab = u16_encode_bits(amsdu, IEEE80211_ADDBA_PARAM_AMSDU_MASK); capab |= u16_encode_bits(policy, IEEE80211_ADDBA_PARAM_POLICY_MASK); capab |= u16_encode_bits(tid, IEEE80211_ADDBA_PARAM_TID_MASK); capab |= u16_encode_bits(buf_size, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK); - mgmt->u.action.u.addba_resp.capab = cpu_to_le16(capab); - mgmt->u.action.u.addba_resp.timeout = cpu_to_le16(timeout); - mgmt->u.action.u.addba_resp.status = cpu_to_le16(status); + mgmt->u.action.addba_resp.capab = cpu_to_le16(capab); + mgmt->u.action.addba_resp.timeout = cpu_to_le16(timeout); + mgmt->u.action.addba_resp.status = cpu_to_le16(status); if (sta->sta.valid_links || sta->sta.deflink.he_cap.has_he) ieee80211_add_addbaext(skb, req_addba_ext_data, buf_size); @@ -275,6 +279,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, u8 dialog_token, u16 timeout, u16 start_seq_num, u16 ba_policy, u16 tid, u16 buf_size, bool tx, bool auto_seq, + bool req_ndp, const u8 addba_ext_data) { struct ieee80211_local *local = sta->sdata->local; @@ -300,6 +305,18 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, goto end; } + if (tx && ieee80211_s1g_use_ndp_ba(sta->sdata, sta) && !req_ndp) { + /* + * According to IEEE 802.11-2024: Inform S1G originator + * ADDBA rejected as NDP BlockAck is preferred + */ + status = WLAN_STATUS_REJECTED_NDP_BLOCK_ACK_SUGGESTED; + ht_dbg(sta->sdata, + "Rejecting AddBA Req from %pM tid %u - require NDP BlockAck\n", + sta->sta.addr, tid); + goto end; + } + if (!sta->sta.valid_links && !sta->sta.deflink.ht_cap.ht_supported && !sta->sta.deflink.he_cap.has_he && @@ -473,31 +490,33 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, struct ieee80211_mgmt *mgmt, size_t len) { + bool req_ndp = mgmt->u.action.action_code == WLAN_ACTION_NDP_ADDBA_REQ; u16 capab, tid, timeout, ba_policy, buf_size, start_seq_num; u8 dialog_token, addba_ext_data; /* extract session parameters from addba request frame */ - dialog_token = mgmt->u.action.u.addba_req.dialog_token; - timeout = le16_to_cpu(mgmt->u.action.u.addba_req.timeout); + dialog_token = mgmt->u.action.addba_req.dialog_token; + timeout = le16_to_cpu(mgmt->u.action.addba_req.timeout); start_seq_num = - le16_to_cpu(mgmt->u.action.u.addba_req.start_seq_num) >> 4; + le16_to_cpu(mgmt->u.action.addba_req.start_seq_num) >> 4; - capab = le16_to_cpu(mgmt->u.action.u.addba_req.capab); + capab = le16_to_cpu(mgmt->u.action.addba_req.capab); ba_policy = (capab & IEEE80211_ADDBA_PARAM_POLICY_MASK) >> 1; tid = (capab & IEEE80211_ADDBA_PARAM_TID_MASK) >> 2; buf_size = (capab & IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK) >> 6; addba_ext_data = ieee80211_retrieve_addba_ext_data(sta, - mgmt->u.action.u.addba_req.variable, + mgmt->u.action.addba_req.variable, len - offsetof(typeof(*mgmt), - u.action.u.addba_req.variable), + u.action.addba_req.variable), &buf_size); __ieee80211_start_rx_ba_session(sta, dialog_token, timeout, start_seq_num, ba_policy, tid, - buf_size, true, false, addba_ext_data); + buf_size, true, false, + req_ndp, addba_ext_data); } void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index 93b47a7ba9c4..4833b46770b6 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2024 Intel Corporation + * Copyright (C) 2018-2026 Intel Corporation */ #include <linux/ieee80211.h> @@ -60,7 +60,7 @@ static void ieee80211_send_addba_request(struct sta_info *sta, u16 tid, u8 dialog_token, u16 start_seq_num, - u16 agg_size, u16 timeout) + u16 agg_size, u16 timeout, bool ndp) { struct ieee80211_sub_if_data *sdata = sta->sdata; struct ieee80211_local *local = sdata->local; @@ -68,7 +68,7 @@ static void ieee80211_send_addba_request(struct sta_info *sta, u16 tid, struct ieee80211_mgmt *mgmt; u16 capab; - skb = dev_alloc_skb(sizeof(*mgmt) + + skb = dev_alloc_skb(IEEE80211_MIN_ACTION_SIZE(addba_req) + 2 + sizeof(struct ieee80211_addba_ext_ie) + local->hw.extra_tx_headroom); if (!skb) @@ -77,21 +77,22 @@ static void ieee80211_send_addba_request(struct sta_info *sta, u16 tid, skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = ieee80211_mgmt_ba(skb, sta->sta.addr, sdata); - skb_put(skb, 1 + sizeof(mgmt->u.action.u.addba_req)); + skb_put(skb, 2 + sizeof(mgmt->u.action.addba_req)); mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.u.addba_req.action_code = WLAN_ACTION_ADDBA_REQ; + mgmt->u.action.action_code = ndp ? + WLAN_ACTION_NDP_ADDBA_REQ : WLAN_ACTION_ADDBA_REQ; - mgmt->u.action.u.addba_req.dialog_token = dialog_token; + mgmt->u.action.addba_req.dialog_token = dialog_token; capab = IEEE80211_ADDBA_PARAM_AMSDU_MASK; capab |= IEEE80211_ADDBA_PARAM_POLICY_MASK; capab |= u16_encode_bits(tid, IEEE80211_ADDBA_PARAM_TID_MASK); capab |= u16_encode_bits(agg_size, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK); - mgmt->u.action.u.addba_req.capab = cpu_to_le16(capab); + mgmt->u.action.addba_req.capab = cpu_to_le16(capab); - mgmt->u.action.u.addba_req.timeout = cpu_to_le16(timeout); - mgmt->u.action.u.addba_req.start_seq_num = + mgmt->u.action.addba_req.timeout = cpu_to_le16(timeout); + mgmt->u.action.addba_req.start_seq_num = cpu_to_le16(start_seq_num << 4); if (sta->sta.deflink.he_cap.has_he) @@ -484,7 +485,8 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta, /* send AddBA request */ ieee80211_send_addba_request(sta, tid, tid_tx->dialog_token, - tid_tx->ssn, buf_size, tid_tx->timeout); + tid_tx->ssn, buf_size, tid_tx->timeout, + tid_tx->ndp); WARN_ON(test_and_set_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)); } @@ -521,6 +523,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid) */ synchronize_net(); + tid_tx->ndp = ieee80211_s1g_use_ndp_ba(sdata, sta); params.ssn = sta->tid_seq[tid] >> 4; ret = drv_ampdu_action(local, sdata, ¶ms); tid_tx->ssn = params.ssn; @@ -638,7 +641,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, sdata->vif.type != NL80211_IFTYPE_MESH_POINT && sdata->vif.type != NL80211_IFTYPE_AP_VLAN && sdata->vif.type != NL80211_IFTYPE_AP && - sdata->vif.type != NL80211_IFTYPE_ADHOC) + sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_NAN_DATA) return -EINVAL; if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { @@ -940,7 +944,9 @@ void ieee80211_stop_tx_ba_cb(struct sta_info *sta, int tid, if (send_delba) ieee80211_send_delba(sdata, sta->sta.addr, tid, - WLAN_BACK_INITIATOR, WLAN_REASON_QSTA_NOT_USE); + WLAN_BACK_INITIATOR, + WLAN_REASON_QSTA_NOT_USE, + tid_tx->ndp); } void ieee80211_stop_tx_ba_cb_irqsafe(struct ieee80211_vif *vif, @@ -978,15 +984,15 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, lockdep_assert_wiphy(sta->local->hw.wiphy); - capab = le16_to_cpu(mgmt->u.action.u.addba_resp.capab); + capab = le16_to_cpu(mgmt->u.action.addba_resp.capab); amsdu = capab & IEEE80211_ADDBA_PARAM_AMSDU_MASK; tid = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_TID_MASK); buf_size = u16_get_bits(capab, IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK); ieee80211_retrieve_addba_ext_data(sta, - mgmt->u.action.u.addba_resp.variable, + mgmt->u.action.addba_resp.variable, len - offsetof(typeof(*mgmt), - u.action.u.addba_resp.variable), + u.action.addba_resp.variable), &buf_size); buf_size = min(buf_size, local->hw.max_tx_aggregation_subframes); @@ -999,7 +1005,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, if (!tid_tx) return; - if (mgmt->u.action.u.addba_resp.dialog_token != tid_tx->dialog_token) { + if (mgmt->u.action.addba_resp.dialog_token != tid_tx->dialog_token) { ht_dbg(sta->sdata, "wrong addBA response token, %pM tid %d\n", sta->sta.addr, tid); return; @@ -1029,7 +1035,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, * is set to 0, the Buffer Size subfield is set to a value * of at least 1. */ - if (le16_to_cpu(mgmt->u.action.u.addba_resp.status) + if (le16_to_cpu(mgmt->u.action.addba_resp.status) == WLAN_STATUS_SUCCESS && buf_size) { if (test_and_set_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { @@ -1046,7 +1052,7 @@ void ieee80211_process_addba_resp(struct ieee80211_local *local, sta->ampdu_mlme.addba_req_num[tid] = 0; tid_tx->timeout = - le16_to_cpu(mgmt->u.action.u.addba_resp.timeout); + le16_to_cpu(mgmt->u.action.addba_resp.timeout); if (tid_tx->timeout) { mod_timer(&tid_tx->session_timer, diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index b85375ceb575..7b77d57c9f96 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -281,10 +281,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy, if (params->use_4addr == ifmgd->use_4addr) return 0; - /* FIXME: no support for 4-addr MLO yet */ - if (ieee80211_vif_is_mld(&sdata->vif)) - return -EOPNOTSUPP; - sdata->u.mgd.use_4addr = params->use_4addr; if (!ifmgd->associated) return 0; @@ -330,7 +326,6 @@ static void ieee80211_stop_p2p_device(struct wiphy *wiphy, static void ieee80211_nan_conf_free(struct cfg80211_nan_conf *conf) { - kfree(conf->cluster_id); kfree(conf->extra_nan_attrs); kfree(conf->vendor_elems); memset(conf, 0, sizeof(*conf)); @@ -372,9 +367,6 @@ static int ieee80211_nan_conf_copy(struct cfg80211_nan_conf *dst, memcpy(&dst->band_cfgs, &src->band_cfgs, sizeof(dst->band_cfgs)); - kfree(dst->cluster_id); - dst->cluster_id = NULL; - kfree(dst->extra_nan_attrs); dst->extra_nan_attrs = NULL; dst->extra_nan_attrs_len = 0; @@ -383,12 +375,8 @@ static int ieee80211_nan_conf_copy(struct cfg80211_nan_conf *dst, dst->vendor_elems = NULL; dst->vendor_elems_len = 0; - if (src->cluster_id) { - dst->cluster_id = kmemdup(src->cluster_id, ETH_ALEN, - GFP_KERNEL); - if (!dst->cluster_id) - goto no_mem; - } + if (is_zero_ether_addr(dst->cluster_id)) + ether_addr_copy(dst->cluster_id, src->cluster_id); if (src->extra_nan_attrs && src->extra_nan_attrs_len) { dst->extra_nan_attrs = kmemdup(src->extra_nan_attrs, @@ -510,12 +498,15 @@ static int ieee80211_add_nan_func(struct wiphy *wiphy, if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; - spin_lock_bh(&sdata->u.nan.func_lock); + if (WARN_ON(wiphy->nan_capa.flags & WIPHY_NAN_FLAGS_USERSPACE_DE)) + return -EOPNOTSUPP; + + spin_lock_bh(&sdata->u.nan.de.func_lock); - ret = idr_alloc(&sdata->u.nan.function_inst_ids, + ret = idr_alloc(&sdata->u.nan.de.function_inst_ids, nan_func, 1, sdata->local->hw.max_nan_de_entries + 1, GFP_ATOMIC); - spin_unlock_bh(&sdata->u.nan.func_lock); + spin_unlock_bh(&sdata->u.nan.de.func_lock); if (ret < 0) return ret; @@ -526,10 +517,10 @@ static int ieee80211_add_nan_func(struct wiphy *wiphy, ret = drv_add_nan_func(sdata->local, sdata, nan_func); if (ret) { - spin_lock_bh(&sdata->u.nan.func_lock); - idr_remove(&sdata->u.nan.function_inst_ids, + spin_lock_bh(&sdata->u.nan.de.func_lock); + idr_remove(&sdata->u.nan.de.function_inst_ids, nan_func->instance_id); - spin_unlock_bh(&sdata->u.nan.func_lock); + spin_unlock_bh(&sdata->u.nan.de.func_lock); } return ret; @@ -542,9 +533,9 @@ ieee80211_find_nan_func_by_cookie(struct ieee80211_sub_if_data *sdata, struct cfg80211_nan_func *func; int id; - lockdep_assert_held(&sdata->u.nan.func_lock); + lockdep_assert_held(&sdata->u.nan.de.func_lock); - idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) { + idr_for_each_entry(&sdata->u.nan.de.function_inst_ids, func, id) { if (func->cookie == cookie) return func; } @@ -563,13 +554,16 @@ static void ieee80211_del_nan_func(struct wiphy *wiphy, !ieee80211_sdata_running(sdata)) return; - spin_lock_bh(&sdata->u.nan.func_lock); + if (WARN_ON(wiphy->nan_capa.flags & WIPHY_NAN_FLAGS_USERSPACE_DE)) + return; + + spin_lock_bh(&sdata->u.nan.de.func_lock); func = ieee80211_find_nan_func_by_cookie(sdata, cookie); if (func) instance_id = func->instance_id; - spin_unlock_bh(&sdata->u.nan.func_lock); + spin_unlock_bh(&sdata->u.nan.de.func_lock); if (instance_id) drv_del_nan_func(sdata->local, sdata, instance_id); @@ -616,11 +610,11 @@ static int ieee80211_set_tx(struct ieee80211_sub_if_data *sdata, return ret; } -static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, +static int ieee80211_add_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr, struct key_params *params) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, false); struct ieee80211_local *local = sdata->local; @@ -704,6 +698,8 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, break; case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: + case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: /* Keys without a station are used for TX only */ if (sta && test_sta_flag(sta, WLAN_STA_MFP)) key->conf.flags |= IEEE80211_KEY_FLAG_RX_MGMT; @@ -720,7 +716,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: - case NL80211_IFTYPE_NAN: case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: @@ -798,11 +793,11 @@ ieee80211_lookup_key(struct ieee80211_sub_if_data *sdata, int link_id, return NULL; } -static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, +static int ieee80211_del_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; struct ieee80211_key *key; @@ -817,7 +812,7 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, return 0; } -static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, +static int ieee80211_get_key(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_idx, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, @@ -833,7 +828,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, int err = -ENOENT; struct ieee80211_key_seq kseq = {}; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); + sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); rcu_read_lock(); @@ -937,10 +932,10 @@ static int ieee80211_config_default_key(struct wiphy *wiphy, } static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, int link_id, u8 key_idx) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, true); @@ -953,10 +948,10 @@ static int ieee80211_config_default_mgmt_key(struct wiphy *wiphy, } static int ieee80211_config_default_beacon_key(struct wiphy *wiphy, - struct net_device *dev, + struct wireless_dev *wdev, int link_id, u8 key_idx) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_link_data *link = ieee80211_link_or_deflink(sdata, link_id, true); @@ -1000,10 +995,10 @@ void sta_set_rate_info_tx(struct sta_info *sta, rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI; } -static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev, +static int ieee80211_dump_station(struct wiphy *wiphy, struct wireless_dev *wdev, int idx, u8 *mac, struct station_info *sinfo) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret = -ENOENT; @@ -1035,10 +1030,11 @@ static int ieee80211_dump_survey(struct wiphy *wiphy, struct net_device *dev, return drv_get_survey(local, idx, survey); } -static int ieee80211_get_station(struct wiphy *wiphy, struct net_device *dev, +static int ieee80211_get_station(struct wiphy *wiphy, + struct wireless_dev *wdev, const u8 *mac, struct station_info *sinfo) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = sdata->local; struct sta_info *sta; int ret = -ENOENT; @@ -2077,7 +2073,7 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, enum sta_link_apply_mode mode, struct link_station_parameters *params) { - struct ieee80211_supported_band *sband; + struct ieee80211_supported_band *sband = NULL; struct ieee80211_sub_if_data *sdata = sta->sdata; u32 link_id = params->link_id < 0 ? 0 : params->link_id; struct ieee80211_link_data *link = @@ -2085,6 +2081,9 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, struct link_sta_info *link_sta = rcu_dereference_protected(sta->link[link_id], lockdep_is_held(&local->hw.wiphy->mtx)); + const struct ieee80211_sta_ht_cap *own_ht_cap; + const struct ieee80211_sta_vht_cap *own_vht_cap; + const struct ieee80211_sta_he_cap *own_he_cap; bool changes = params->link_mac || params->txpwr_set || params->supported_rates_len || @@ -2114,10 +2113,27 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, if (!link || !link_sta) return -EINVAL; - sband = ieee80211_get_link_sband(link); - if (!sband) + /* + * We should not have any changes in NDI station, its capabilities are + * copied from the NMI sta + */ + if (WARN_ON(sdata->vif.type == NL80211_IFTYPE_NAN_DATA)) return -EINVAL; + if (sdata->vif.type == NL80211_IFTYPE_NAN) { + own_ht_cap = &local->hw.wiphy->nan_capa.phy.ht; + own_vht_cap = &local->hw.wiphy->nan_capa.phy.vht; + own_he_cap = &local->hw.wiphy->nan_capa.phy.he; + } else { + sband = ieee80211_get_link_sband(link); + if (!sband) + return -EINVAL; + + own_ht_cap = &sband->ht_cap; + own_vht_cap = &sband->vht_cap; + own_he_cap = ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); + } + if (params->link_mac) { if (mode == STA_LINK_MODE_NEW) { memcpy(link_sta->addr, params->link_mac, ETH_ALEN); @@ -2139,6 +2155,27 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, return ret; } + if (sdata->vif.type == NL80211_IFTYPE_NAN) { + static const u8 all_ofdm_rates[] = { + 0x0c, 0x12, 0x18, 0x24, 0x30, 0x48, 0x60, 0x6c + }; + + /* Set the same supported_rates for all bands */ + for (int i = 0; i < NUM_NL80211_BANDS; i++) { + struct ieee80211_supported_band *tmp = + sdata->local->hw.wiphy->bands[i]; + + if ((i != NL80211_BAND_2GHZ && i != NL80211_BAND_5GHZ) || + !tmp) + continue; + + if (!ieee80211_parse_bitrates(tmp, all_ofdm_rates, + sizeof(all_ofdm_rates), + &link_sta->pub->supp_rates[i])) + return -EINVAL; + } + } + if (params->supported_rates && params->supported_rates_len && !ieee80211_parse_bitrates(sband, params->supported_rates, @@ -2147,21 +2184,24 @@ static int sta_link_apply_parameters(struct ieee80211_local *local, return -EINVAL; if (params->ht_capa) - ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, + ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, own_ht_cap, params->ht_capa, link_sta); /* VHT can override some HT caps such as the A-MSDU max length */ if (params->vht_capa) ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, + own_vht_cap, params->vht_capa, NULL, link_sta); if (params->he_capa) - ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, - (void *)params->he_capa, - params->he_capa_len, - (void *)params->he_6ghz_capa, - link_sta); + _ieee80211_he_cap_ie_to_sta_he_cap(sdata, + own_he_cap, + (void *)params->he_capa, + params->he_capa_len, + (sband && sband->band == NL80211_BAND_6GHZ) ? + (void *)params->he_6ghz_capa : NULL, + link_sta); if (params->he_capa && params->eht_capa) ieee80211_eht_cap_ie_to_sta_eht_cap(sdata, sband, @@ -2348,6 +2388,32 @@ static int sta_apply_parameters(struct ieee80211_local *local, if (params->airtime_weight) sta->airtime_weight = params->airtime_weight; + if (params->nmi_mac) { + struct ieee80211_sub_if_data *nmi = + rcu_dereference_wiphy(local->hw.wiphy, + sdata->u.nan_data.nmi); + struct sta_info *nmi_sta; + + if (WARN_ON(!nmi)) + return -EINVAL; + + nmi_sta = sta_info_get(nmi, params->nmi_mac); + if (!nmi_sta) + return -ENOENT; + rcu_assign_pointer(sta->sta.nmi, &nmi_sta->sta); + + /* For NAN_DATA stations, copy capabilities from the NMI station */ + if (!nmi_sta->deflink.pub->ht_cap.ht_supported) + return -EINVAL; + + sta->deflink.pub->ht_cap = nmi_sta->deflink.pub->ht_cap; + sta->deflink.pub->vht_cap = nmi_sta->deflink.pub->vht_cap; + sta->deflink.pub->he_cap = nmi_sta->deflink.pub->he_cap; + memcpy(&sta->deflink.pub->supp_rates, + &nmi_sta->deflink.pub->supp_rates, + sizeof(sta->deflink.pub->supp_rates)); + } + /* set the STA state after all sta info from usermode has been set */ if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) || set & BIT(NL80211_STA_FLAG_ASSOCIATED)) { @@ -2363,7 +2429,7 @@ static int sta_apply_parameters(struct ieee80211_local *local, return 0; } -static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, +static int ieee80211_add_station(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params) { @@ -2381,7 +2447,7 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, sdata->vif.type != NL80211_IFTYPE_AP) return -EINVAL; } else - sdata = IEEE80211_DEV_TO_SUB_IF(dev); + sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (ether_addr_equal(mac, sdata->vif.addr)) return -EINVAL; @@ -2432,15 +2498,23 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, test_sta_flag(sta, WLAN_STA_ASSOC)) rate_control_rate_init_all_links(sta); - return sta_info_insert(sta); + err = sta_info_insert(sta); + + /* + * ieee80211_nan_update_ndi_carrier was called from sta_apply_parameters, + * but then we did not have the STA in the list. + */ + if (!err && sdata->vif.type == NL80211_IFTYPE_NAN_DATA) + ieee80211_nan_update_ndi_carrier(sta->sdata); + return err; } -static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, +static int ieee80211_del_station(struct wiphy *wiphy, struct wireless_dev *wdev, struct station_del_parameters *params) { struct ieee80211_sub_if_data *sdata; - sdata = IEEE80211_DEV_TO_SUB_IF(dev); + sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); if (params->mac) return sta_info_destroy_addr_bss(sdata, params->mac); @@ -2449,11 +2523,70 @@ static int ieee80211_del_station(struct wiphy *wiphy, struct net_device *dev, return 0; } +static int ieee80211_set_sta_4addr(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct sta_info *sta) +{ + struct ieee80211_vif *vif = &sdata->vif; + struct wiphy *wiphy = local->hw.wiphy; + struct ieee80211_sub_if_data *master; + struct ieee80211_bss_conf *link_conf; + struct wireless_dev *wdev; + unsigned long master_iter; + int link_id; + int err; + + lockdep_assert_wiphy(local->hw.wiphy); + + if (sdata->u.vlan.sta) + return -EBUSY; + + wdev = &sdata->wdev; + master = container_of(sdata->bss, + struct ieee80211_sub_if_data, + u.ap); + + if (sta->sta.valid_links) { + u16 sta_links = sta->sta.valid_links; + u16 new_links = master->vif.valid_links & sta_links; + u16 orig_links = wdev->valid_links; + + wdev->valid_links = new_links; + + err = ieee80211_vif_set_links(sdata, new_links, 0); + if (err) { + wdev->valid_links = orig_links; + return err; + } + + master_iter = master->vif.valid_links; + + for_each_set_bit(link_id, &master_iter, + IEEE80211_MLD_MAX_NUM_LINKS) { + if (!(sta_links & BIT(link_id))) { + eth_zero_addr(wdev->links[link_id].addr); + } else { + link_conf = wiphy_dereference(wiphy, + vif->link_conf[link_id]); + + ether_addr_copy(wdev->links[link_id].addr, + link_conf->bssid); + } + } + } + + rcu_assign_pointer(sdata->u.vlan.sta, sta); + __ieee80211_check_fast_rx_iface(sdata); + drv_sta_set_4addr(local, sta->sdata, &sta->sta, true); + + return 0; +} + static int ieee80211_change_station(struct wiphy *wiphy, - struct net_device *dev, const u8 *mac, + struct wireless_dev *wdev, const u8 *mac, struct station_parameters *params) { - struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); struct ieee80211_local *local = wiphy_priv(wiphy); struct sta_info *sta; struct ieee80211_sub_if_data *vlansdata; @@ -2493,6 +2626,12 @@ static int ieee80211_change_station(struct wiphy *wiphy, else statype = CFG80211_STA_AP_CLIENT_UNASSOC; break; + case NL80211_IFTYPE_NAN: + statype = CFG80211_STA_NAN_MGMT; + break; + case NL80211_IFTYPE_NAN_DATA: + statype = CFG80211_STA_NAN_DATA; + break; default: return -EOPNOTSUPP; } @@ -2505,12 +2644,10 @@ static int ieee80211_change_station(struct wiphy *wiphy, vlansdata = IEEE80211_DEV_TO_SUB_IF(params->vlan); if (params->vlan->ieee80211_ptr->use_4addr) { - if (vlansdata->u.vlan.sta) - return -EBUSY; + err = ieee80211_set_sta_4addr(local, vlansdata, sta); + if (err) + return err; - rcu_assign_pointer(vlansdata->u.vlan.sta, sta); - __ieee80211_check_fast_rx_iface(vlansdata); - drv_sta_set_4addr(local, sta->sdata, &sta->sta, true); } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN && @@ -2531,6 +2668,14 @@ static int ieee80211_change_station(struct wiphy *wiphy, } } + /* NAN capabilties should not change */ + if (statype == CFG80211_STA_NAN_DATA && + sta->deflink.pub->ht_cap.ht_supported && + (params->link_sta_params.ht_capa || + params->link_sta_params.vht_capa || + params->link_sta_params.he_capa)) + return -EINVAL; + err = sta_apply_parameters(local, sta, params); if (err) return err; @@ -4614,7 +4759,9 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_tx_info *info; struct sta_info *sta; struct ieee80211_chanctx_conf *chanctx_conf; + struct ieee80211_bss_conf *conf; enum nl80211_band band; + u8 link_id; int ret; /* the lock is needed to assign the cookie later */ @@ -4629,12 +4776,35 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, qos = sta->sta.wme; - chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); - if (WARN_ON(!chanctx_conf)) { - ret = -EINVAL; - goto unlock; + if (ieee80211_vif_is_mld(&sdata->vif)) { + if (sta->sta.mlo) { + link_id = IEEE80211_LINK_UNSPECIFIED; + } else { + /* + * For non-MLO clients connected to an AP MLD, band + * information is not used; instead, sta->deflink is + * used to send packets. + */ + link_id = sta->deflink.link_id; + + conf = rcu_dereference(sdata->vif.link_conf[link_id]); + + if (unlikely(!conf)) { + ret = -ENOLINK; + goto unlock; + } + } + /* MLD transmissions must not rely on the band */ + band = 0; + } else { + chanctx_conf = rcu_dereference(sdata->vif.bss_conf.chanctx_conf); + if (WARN_ON(!chanctx_conf)) { + ret = -EINVAL; + goto unlock; + } + band = chanctx_conf->def.chan->band; + link_id = 0; } - band = chanctx_conf->def.chan->band; if (qos) { fc = cpu_to_le16(IEEE80211_FTYPE_DATA | @@ -4661,8 +4831,13 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, nullfunc->frame_control = fc; nullfunc->duration_id = 0; memcpy(nullfunc->addr1, sta->sta.addr, ETH_ALEN); - memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); - memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); + if (ieee80211_vif_is_mld(&sdata->vif) && !sta->sta.mlo) { + memcpy(nullfunc->addr2, conf->addr, ETH_ALEN); + memcpy(nullfunc->addr3, conf->addr, ETH_ALEN); + } else { + memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); + memcpy(nullfunc->addr3, sdata->vif.addr, ETH_ALEN); + } nullfunc->seq_ctrl = 0; info = IEEE80211_SKB_CB(skb); @@ -4671,6 +4846,8 @@ static int ieee80211_probe_client(struct wiphy *wiphy, struct net_device *dev, IEEE80211_TX_INTFL_NL80211_FRAME_TX; info->band = band; + info->control.flags |= u32_encode_bits(link_id, + IEEE80211_TX_CTRL_MLO_LINK); skb_set_queue_mapping(skb, IEEE80211_AC_VO); skb->priority = 7; if (qos) @@ -4861,18 +5038,22 @@ void ieee80211_nan_func_terminated(struct ieee80211_vif *vif, if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) return; - spin_lock_bh(&sdata->u.nan.func_lock); + if (WARN_ON(sdata->local->hw.wiphy->nan_capa.flags & + WIPHY_NAN_FLAGS_USERSPACE_DE)) + return; + + spin_lock_bh(&sdata->u.nan.de.func_lock); - func = idr_find(&sdata->u.nan.function_inst_ids, inst_id); + func = idr_find(&sdata->u.nan.de.function_inst_ids, inst_id); if (WARN_ON(!func)) { - spin_unlock_bh(&sdata->u.nan.func_lock); + spin_unlock_bh(&sdata->u.nan.de.func_lock); return; } cookie = func->cookie; - idr_remove(&sdata->u.nan.function_inst_ids, inst_id); + idr_remove(&sdata->u.nan.de.function_inst_ids, inst_id); - spin_unlock_bh(&sdata->u.nan.func_lock); + spin_unlock_bh(&sdata->u.nan.de.func_lock); cfg80211_free_nan_func(func); @@ -4891,16 +5072,20 @@ void ieee80211_nan_func_match(struct ieee80211_vif *vif, if (WARN_ON(vif->type != NL80211_IFTYPE_NAN)) return; - spin_lock_bh(&sdata->u.nan.func_lock); + if (WARN_ON(sdata->local->hw.wiphy->nan_capa.flags & + WIPHY_NAN_FLAGS_USERSPACE_DE)) + return; + + spin_lock_bh(&sdata->u.nan.de.func_lock); - func = idr_find(&sdata->u.nan.function_inst_ids, match->inst_id); + func = idr_find(&sdata->u.nan.de.function_inst_ids, match->inst_id); if (WARN_ON(!func)) { - spin_unlock_bh(&sdata->u.nan.func_lock); + spin_unlock_bh(&sdata->u.nan.de.func_lock); return; } match->cookie = func->cookie; - spin_unlock_bh(&sdata->u.nan.func_lock); + spin_unlock_bh(&sdata->u.nan.de.func_lock); cfg80211_nan_match(ieee80211_vif_to_wdev(vif), match, gfp); } @@ -5396,9 +5581,6 @@ static int ieee80211_add_intf_link(struct wiphy *wiphy, lockdep_assert_wiphy(sdata->local->hw.wiphy); - if (wdev->use_4addr) - return -EOPNOTSUPP; - return ieee80211_vif_set_links(sdata, wdev->valid_links, 0); } @@ -5553,6 +5735,30 @@ ieee80211_set_epcs(struct wiphy *wiphy, struct net_device *dev, bool enable) return ieee80211_mgd_set_epcs(sdata, enable); } +static int +ieee80211_set_local_nan_sched(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_local_sched *sched) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + + lockdep_assert_wiphy(wiphy); + + return ieee80211_nan_set_local_sched(sdata, sched); +} + +static int +ieee80211_set_peer_nan_sched(struct wiphy *wiphy, + struct wireless_dev *wdev, + struct cfg80211_nan_peer_sched *sched) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev); + + lockdep_assert_wiphy(sdata->local->hw.wiphy); + + return ieee80211_nan_set_peer_sched(sdata, sched); +} + const struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -5669,4 +5875,6 @@ const struct cfg80211_ops mac80211_config_ops = { .get_radio_mask = ieee80211_get_radio_mask, .assoc_ml_reconf = ieee80211_assoc_ml_reconf, .set_epcs = ieee80211_set_epcs, + .nan_set_local_sched = ieee80211_set_local_nan_sched, + .nan_set_peer_sched = ieee80211_set_peer_nan_sched, }; diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index 05f45e66999b..fda692316f08 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * mac80211 - channel management - * Copyright 2020 - 2025 Intel Corporation + * Copyright 2020-2026 Intel Corporation */ #include <linux/nl80211.h> @@ -16,6 +16,8 @@ struct ieee80211_chanctx_user_iter { struct ieee80211_chan_req *chanreq; struct ieee80211_sub_if_data *sdata; struct ieee80211_link_data *link; + struct ieee80211_nan_channel *nan_channel; + int nan_channel_next_idx; enum nl80211_iftype iftype; bool reserved, radar_required, done; enum { @@ -31,20 +33,38 @@ enum ieee80211_chanctx_iter_type { CHANCTX_ITER_ASSIGNED, }; -static void ieee80211_chanctx_user_iter_next(struct ieee80211_local *local, - struct ieee80211_chanctx *ctx, - struct ieee80211_chanctx_user_iter *iter, - enum ieee80211_chanctx_iter_type type, - bool start) +static bool +ieee80211_chanctx_user_iter_next_nan_channel(struct ieee80211_chanctx *ctx, + struct ieee80211_chanctx_user_iter *iter) { - lockdep_assert_wiphy(local->hw.wiphy); + /* Start from the next index after current position */ + for (int i = iter->nan_channel_next_idx; + i < ARRAY_SIZE(iter->sdata->vif.cfg.nan_sched.channels); i++) { + struct ieee80211_nan_channel *nan_channel = + &iter->sdata->vif.cfg.nan_sched.channels[i]; - if (start) { - memset(iter, 0, sizeof(*iter)); - goto next_interface; + if (!nan_channel->chanreq.oper.chan) + continue; + + if (nan_channel->chanctx_conf != &ctx->conf) + continue; + + iter->nan_channel = nan_channel; + iter->nan_channel_next_idx = i + 1; + iter->chanreq = &nan_channel->chanreq; + iter->link = NULL; + iter->reserved = false; + iter->radar_required = false; + return true; } + return false; +} -next_link: +static bool +ieee80211_chanctx_user_iter_next_link(struct ieee80211_chanctx *ctx, + struct ieee80211_chanctx_user_iter *iter, + enum ieee80211_chanctx_iter_type type) +{ for (int link_id = iter->link ? iter->link->link_id : 0; link_id < ARRAY_SIZE(iter->sdata->link); link_id++) { @@ -64,7 +84,7 @@ next_link: iter->reserved = false; iter->radar_required = link->radar_required; iter->chanreq = &link->conf->chanreq; - return; + return true; } fallthrough; case CHANCTX_ITER_POS_RESERVED: @@ -77,7 +97,7 @@ next_link: link->reserved_radar_required; iter->chanreq = &link->reserved; - return; + return true; } fallthrough; case CHANCTX_ITER_POS_DONE: @@ -85,6 +105,33 @@ next_link: continue; } } + return false; +} + +static void +ieee80211_chanctx_user_iter_next(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + struct ieee80211_chanctx_user_iter *iter, + enum ieee80211_chanctx_iter_type type, + bool start) +{ + bool found; + + lockdep_assert_wiphy(local->hw.wiphy); + + if (start) { + memset(iter, 0, sizeof(*iter)); + goto next_interface; + } + +next_user: + if (iter->iftype == NL80211_IFTYPE_NAN) + found = ieee80211_chanctx_user_iter_next_nan_channel(ctx, iter); + else + found = ieee80211_chanctx_user_iter_next_link(ctx, iter, type); + + if (found) + return; next_interface: /* next (or first) interface */ @@ -97,10 +144,18 @@ next_interface: if (iter->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) continue; + /* NAN channels don't reserve channel context */ + if (iter->sdata->vif.type == NL80211_IFTYPE_NAN && + type == CHANCTX_ITER_RESERVED) + continue; + + iter->nan_channel = NULL; iter->link = NULL; - iter->per_link = CHANCTX_ITER_POS_ASSIGNED; iter->iftype = iter->sdata->vif.type; - goto next_link; + iter->chanreq = NULL; + iter->per_link = CHANCTX_ITER_POS_ASSIGNED; + iter->nan_channel_next_idx = 0; + goto next_user; } iter->done = true; @@ -133,8 +188,8 @@ next_interface: CHANCTX_ITER_ALL, \ false)) -static int ieee80211_chanctx_num_assigned(struct ieee80211_local *local, - struct ieee80211_chanctx *ctx) +int ieee80211_chanctx_num_assigned(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx) { struct ieee80211_chanctx_user_iter iter; int num = 0; @@ -166,6 +221,13 @@ int ieee80211_chanctx_refcount(struct ieee80211_local *local, for_each_chanctx_user_all(local, ctx, &iter) num++; + /* + * This ctx is in the process of getting used, + * take it into consideration + */ + if (ctx->will_be_used) + num++; + return num; } @@ -239,27 +301,48 @@ ieee80211_chanreq_compatible(const struct ieee80211_chan_req *a, return tmp; } +/* + * When checking for compatible, check against all the links using + * the chanctx (except the one passed that might be changing) to + * allow changes to the AP's bandwidth for wider bandwidth OFDMA + * purposes, which wouldn't be treated as compatible by checking + * against the chanctx's oper/ap chandefs. + */ static const struct ieee80211_chan_req * -ieee80211_chanctx_compatible(struct ieee80211_chanctx *ctx, - const struct ieee80211_chan_req *req, - struct ieee80211_chan_req *tmp) +_ieee80211_chanctx_compatible(struct ieee80211_local *local, + struct ieee80211_link_data *skip_link, + struct ieee80211_chanctx *ctx, + const struct ieee80211_chan_req *req, + struct ieee80211_chan_req *tmp) { - const struct ieee80211_chan_req *ret; - struct ieee80211_chan_req tmp2; + const struct ieee80211_chan_req *ret = req; + struct ieee80211_chanctx_user_iter iter; - *tmp = (struct ieee80211_chan_req){ - .oper = ctx->conf.def, - .ap = ctx->conf.ap, - }; + lockdep_assert_wiphy(local->hw.wiphy); + + for_each_chanctx_user_all(local, ctx, &iter) { + if (iter.link && iter.link == skip_link) + continue; + + ret = ieee80211_chanreq_compatible(ret, iter.chanreq, tmp); + if (!ret) + return NULL; + } - ret = ieee80211_chanreq_compatible(tmp, req, &tmp2); - if (!ret) - return NULL; *tmp = *ret; return tmp; } static const struct ieee80211_chan_req * +ieee80211_chanctx_compatible(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + const struct ieee80211_chan_req *req, + struct ieee80211_chan_req *tmp) +{ + return _ieee80211_chanctx_compatible(local, NULL, ctx, req, tmp); +} + +static const struct ieee80211_chan_req * ieee80211_chanctx_reserved_chanreq(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, const struct ieee80211_chan_req *req, @@ -293,7 +376,7 @@ ieee80211_chanctx_non_reserved_chandef(struct ieee80211_local *local, lockdep_assert_wiphy(local->hw.wiphy); for_each_chanctx_user_assigned(local, ctx, &iter) { - if (iter.link->reserved_chanctx) + if (iter.link && iter.link->reserved_chanctx) continue; comp_def = ieee80211_chanreq_compatible(iter.chanreq, @@ -427,73 +510,105 @@ ieee80211_get_max_required_bw(struct ieee80211_link_data *link) } static enum nl80211_chan_width +ieee80211_get_width_of_link(struct ieee80211_link_data *link) +{ + struct ieee80211_local *local = link->sdata->local; + + switch (link->sdata->vif.type) { + case NL80211_IFTYPE_STATION: + if (!link->sdata->vif.cfg.assoc) { + /* + * The AP's sta->bandwidth may not yet be set + * at this point (pre-association), so simply + * take the width from the chandef. We cannot + * have TDLS peers yet (only after association). + */ + return link->conf->chanreq.oper.width; + } + /* + * otherwise just use min_def like in AP, depending on what + * we currently think the AP STA (and possibly TDLS peers) + * require(s) + */ + fallthrough; + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_AP_VLAN: + return ieee80211_get_max_required_bw(link); + case NL80211_IFTYPE_P2P_DEVICE: + break; + case NL80211_IFTYPE_MONITOR: + WARN_ON_ONCE(!ieee80211_hw_check(&local->hw, + NO_VIRTUAL_MONITOR)); + fallthrough; + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_OCB: + return link->conf->chanreq.oper.width; + case NL80211_IFTYPE_WDS: + case NL80211_IFTYPE_UNSPECIFIED: + case NUM_NL80211_IFTYPES: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: + WARN_ON_ONCE(1); + break; + } + + /* Take the lowest possible, so it won't change the max width */ + return NL80211_CHAN_WIDTH_20_NOHT; +} + +static enum nl80211_chan_width +ieee80211_get_width_of_chanctx_user(struct ieee80211_chanctx_user_iter *iter) +{ + if (iter->link) + return ieee80211_get_width_of_link(iter->link); + + if (WARN_ON_ONCE(!iter->nan_channel || iter->reserved)) + return NL80211_CHAN_WIDTH_20_NOHT; + + return iter->nan_channel->chanreq.oper.width; +} + +static enum nl80211_chan_width ieee80211_get_chanctx_max_required_bw(struct ieee80211_local *local, struct ieee80211_chanctx *ctx, struct ieee80211_link_data *rsvd_for, bool check_reserved) { - struct ieee80211_sub_if_data *sdata; - struct ieee80211_link_data *link; enum nl80211_chan_width max_bw = NL80211_CHAN_WIDTH_20_NOHT; + struct ieee80211_chanctx_user_iter iter; + struct ieee80211_sub_if_data *sdata; + enum nl80211_chan_width width; if (WARN_ON(check_reserved && rsvd_for)) return ctx->conf.def.width; - for_each_sdata_link(local, link) { - enum nl80211_chan_width width = NL80211_CHAN_WIDTH_20_NOHT; - - if (check_reserved) { - if (link->reserved_chanctx != ctx) - continue; - } else if (link != rsvd_for && - rcu_access_pointer(link->conf->chanctx_conf) != &ctx->conf) - continue; - - switch (link->sdata->vif.type) { - case NL80211_IFTYPE_STATION: - if (!link->sdata->vif.cfg.assoc) { - /* - * The AP's sta->bandwidth may not yet be set - * at this point (pre-association), so simply - * take the width from the chandef. We cannot - * have TDLS peers yet (only after association). - */ - width = link->conf->chanreq.oper.width; - break; - } - /* - * otherwise just use min_def like in AP, depending on what - * we currently think the AP STA (and possibly TDLS peers) - * require(s) - */ - fallthrough; - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_AP_VLAN: - width = ieee80211_get_max_required_bw(link); - break; - case NL80211_IFTYPE_P2P_DEVICE: - case NL80211_IFTYPE_NAN: - continue; - case NL80211_IFTYPE_MONITOR: - WARN_ON_ONCE(!ieee80211_hw_check(&local->hw, - NO_VIRTUAL_MONITOR)); - fallthrough; - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_MESH_POINT: - case NL80211_IFTYPE_OCB: - width = link->conf->chanreq.oper.width; - break; - case NL80211_IFTYPE_WDS: - case NL80211_IFTYPE_UNSPECIFIED: - case NUM_NL80211_IFTYPES: - case NL80211_IFTYPE_P2P_CLIENT: - case NL80211_IFTYPE_P2P_GO: - WARN_ON_ONCE(1); + /* When this is true we only care about the reserving links */ + if (check_reserved) { + for_each_chanctx_user_reserved(local, ctx, &iter) { + width = ieee80211_get_width_of_chanctx_user(&iter); + max_bw = max(max_bw, width); } + goto check_monitor; + } + /* Consider all assigned links */ + for_each_chanctx_user_assigned(local, ctx, &iter) { + width = ieee80211_get_width_of_chanctx_user(&iter); max_bw = max(max_bw, width); } + if (!rsvd_for || + rsvd_for->sdata == rcu_access_pointer(local->monitor_sdata)) + goto check_monitor; + + /* Consider the link for which this chanctx is reserved/going to be assigned */ + width = ieee80211_get_width_of_link(rsvd_for); + max_bw = max(max_bw, width); + +check_monitor: /* use the configured bandwidth in case of monitor interface */ sdata = wiphy_dereference(local->hw.wiphy, local->monitor_sdata); if (sdata && @@ -731,10 +846,9 @@ static void ieee80211_change_chanctx(struct ieee80211_local *local, _ieee80211_change_chanctx(local, ctx, old_ctx, chanreq, NULL); } -/* Note: if successful, the returned chanctx is reserved for the link */ +/* Note: if successful, the returned chanctx will_be_used flag is set */ static struct ieee80211_chanctx * ieee80211_find_chanctx(struct ieee80211_local *local, - struct ieee80211_link_data *link, const struct ieee80211_chan_req *chanreq, enum ieee80211_chanctx_mode mode) { @@ -746,9 +860,6 @@ ieee80211_find_chanctx(struct ieee80211_local *local, if (mode == IEEE80211_CHANCTX_EXCLUSIVE) return NULL; - if (WARN_ON(link->reserved_chanctx)) - return NULL; - list_for_each_entry(ctx, &local->chanctx_list, list) { const struct ieee80211_chan_req *compat; @@ -758,7 +869,8 @@ ieee80211_find_chanctx(struct ieee80211_local *local, if (ctx->mode == IEEE80211_CHANCTX_EXCLUSIVE) continue; - compat = ieee80211_chanctx_compatible(ctx, chanreq, &tmp); + compat = ieee80211_chanctx_compatible(local, ctx, chanreq, + &tmp); if (!compat) continue; @@ -768,12 +880,12 @@ ieee80211_find_chanctx(struct ieee80211_local *local, continue; /* - * Reserve the chanctx temporarily, as the driver might change + * Mark the chanctx as will be used, as the driver might change * active links during callbacks we make into it below and/or * later during assignment, which could (otherwise) cause the * context to actually be removed. */ - link->reserved_chanctx = ctx; + ctx->will_be_used = true; ieee80211_change_chanctx(local, ctx, ctx, compat); @@ -898,7 +1010,10 @@ ieee80211_new_chanctx(struct ieee80211_local *local, kfree(ctx); return ERR_PTR(err); } - /* We ignored a driver error, see _ieee80211_set_active_links */ + /* + * We ignored a driver error, see _ieee80211_set_active_links and/or + * ieee80211_nan_set_local_sched + */ WARN_ON_ONCE(err && !local->in_reconfig); list_add_rcu(&ctx->list, &local->chanctx_list); @@ -919,9 +1034,9 @@ static void ieee80211_del_chanctx(struct ieee80211_local *local, ieee80211_remove_wbrf(local, &ctx->conf.def); } -static void ieee80211_free_chanctx(struct ieee80211_local *local, - struct ieee80211_chanctx *ctx, - bool skip_idle_recalc) +void ieee80211_free_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + bool skip_idle_recalc) { lockdep_assert_wiphy(local->hw.wiphy); @@ -1116,6 +1231,7 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_OCB: + case NL80211_IFTYPE_NAN: break; default: continue; @@ -1126,6 +1242,15 @@ void ieee80211_recalc_smps_chanctx(struct ieee80211_local *local, break; } + if (iter.nan_channel) { + rx_chains_dynamic = rx_chains_static = + iter.nan_channel->needed_rx_chains; + break; + } + + if (!iter.link) + continue; + switch (iter.link->smps_mode) { default: WARN_ONCE(1, "Invalid SMPS mode %d\n", @@ -1196,6 +1321,10 @@ __ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link, list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { struct ieee80211_bss_conf *vlan_conf; + if (vlan->vif.valid_links && + !(vlan->vif.valid_links & BIT(link_id))) + continue; + vlan_conf = wiphy_dereference(local->hw.wiphy, vlan->vif.link_conf[link_id]); if (WARN_ON(!vlan_conf)) @@ -1416,6 +1545,7 @@ ieee80211_link_chanctx_reservation_complete(struct ieee80211_link_data *link) case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_P2P_DEVICE: case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: case NUM_NL80211_IFTYPES: WARN_ON(1); break; @@ -1438,6 +1568,10 @@ ieee80211_link_update_chanreq(struct ieee80211_link_data *link, list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { struct ieee80211_bss_conf *vlan_conf; + if (vlan->vif.valid_links && + !(vlan->vif.valid_links & BIT(link_id))) + continue; + vlan_conf = wiphy_dereference(sdata->local->hw.wiphy, vlan->vif.link_conf[link_id]); if (WARN_ON(!vlan_conf)) @@ -1733,7 +1867,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local) for_each_chanctx_user_assigned(local, ctx->replace_ctx, &iter) { n_assigned++; - if (iter.link->reserved_chanctx) { + if (iter.link && iter.link->reserved_chanctx) { n_reserved++; if (iter.link->reserved_ready) n_ready++; @@ -1989,6 +2123,36 @@ void __ieee80211_link_release_channel(struct ieee80211_link_data *link, ieee80211_vif_use_reserved_switch(local); } +struct ieee80211_chanctx * +ieee80211_find_or_create_chanctx(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_chan_req *chanreq, + enum ieee80211_chanctx_mode mode, + bool assign_on_failure, + bool *reused_ctx) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_chanctx *ctx; + int radio_idx; + + lockdep_assert_wiphy(local->hw.wiphy); + + ctx = ieee80211_find_chanctx(local, chanreq, mode); + if (ctx) { + *reused_ctx = true; + return ctx; + } + + *reused_ctx = false; + + if (!ieee80211_find_available_radio(local, chanreq, + sdata->wdev.radio_mask, + &radio_idx)) + return ERR_PTR(-EBUSY); + + return ieee80211_new_chanctx(local, chanreq, mode, + assign_on_failure, radio_idx); +} + int _ieee80211_link_use_channel(struct ieee80211_link_data *link, const struct ieee80211_chan_req *chanreq, enum ieee80211_chanctx_mode mode, @@ -1998,8 +2162,7 @@ int _ieee80211_link_use_channel(struct ieee80211_link_data *link, struct ieee80211_local *local = sdata->local; struct ieee80211_chanctx *ctx; u8 radar_detect_width = 0; - bool reserved = false; - int radio_idx; + bool reused_ctx = false; int ret; lockdep_assert_wiphy(local->hw.wiphy); @@ -2027,17 +2190,8 @@ int _ieee80211_link_use_channel(struct ieee80211_link_data *link, if (!local->in_reconfig) __ieee80211_link_release_channel(link, false); - ctx = ieee80211_find_chanctx(local, link, chanreq, mode); - /* Note: context is now reserved */ - if (ctx) - reserved = true; - else if (!ieee80211_find_available_radio(local, chanreq, - sdata->wdev.radio_mask, - &radio_idx)) - ctx = ERR_PTR(-EBUSY); - else - ctx = ieee80211_new_chanctx(local, chanreq, mode, - assign_on_failure, radio_idx); + ctx = ieee80211_find_or_create_chanctx(sdata, chanreq, mode, + assign_on_failure, &reused_ctx); if (IS_ERR(ctx)) { ret = PTR_ERR(ctx); goto out; @@ -2047,10 +2201,13 @@ int _ieee80211_link_use_channel(struct ieee80211_link_data *link, ret = ieee80211_assign_link_chanctx(link, ctx, assign_on_failure); - if (reserved) { - /* remove reservation */ - WARN_ON(link->reserved_chanctx != ctx); - link->reserved_chanctx = NULL; + /* + * In case an existing channel context is being used, we marked it as + * will_be_used, now that it is assigned - clear this indication + */ + if (reused_ctx) { + WARN_ON(!ctx->will_be_used); + ctx->will_be_used = false; } if (ret) { @@ -2130,40 +2287,6 @@ int ieee80211_link_use_reserved_context(struct ieee80211_link_data *link) return 0; } -/* - * This is similar to ieee80211_chanctx_compatible(), but rechecks - * against all the links actually using it (except the one that's - * passed, since that one is changing). - * This is done in order to allow changes to the AP's bandwidth for - * wider bandwidth OFDMA purposes, which wouldn't be treated as - * compatible by ieee80211_chanctx_recheck() but is OK if the link - * requesting the update is the only one using it. - */ -static const struct ieee80211_chan_req * -ieee80211_chanctx_recheck(struct ieee80211_local *local, - struct ieee80211_link_data *skip_link, - struct ieee80211_chanctx *ctx, - const struct ieee80211_chan_req *req, - struct ieee80211_chan_req *tmp) -{ - const struct ieee80211_chan_req *ret = req; - struct ieee80211_chanctx_user_iter iter; - - lockdep_assert_wiphy(local->hw.wiphy); - - for_each_chanctx_user_all(local, ctx, &iter) { - if (iter.link == skip_link) - continue; - - ret = ieee80211_chanreq_compatible(ret, iter.chanreq, tmp); - if (!ret) - return NULL; - } - - *tmp = *ret; - return tmp; -} - int ieee80211_link_change_chanreq(struct ieee80211_link_data *link, const struct ieee80211_chan_req *chanreq, u64 *changed) @@ -2200,7 +2323,7 @@ int ieee80211_link_change_chanreq(struct ieee80211_link_data *link, ctx = container_of(conf, struct ieee80211_chanctx, conf); - compat = ieee80211_chanctx_recheck(local, link, ctx, chanreq, &tmp); + compat = _ieee80211_chanctx_compatible(local, link, ctx, chanreq, &tmp); if (!compat) return -EINVAL; diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c index 687a66cd4943..5a1831b08677 100644 --- a/net/mac80211/debugfs.c +++ b/net/mac80211/debugfs.c @@ -486,6 +486,7 @@ static const char *hw_flag_names[] = { FLAG(DISALLOW_PUNCTURING), FLAG(HANDLES_QUIET_CSA), FLAG(STRICT), + FLAG(SUPPORTS_NDP_BLOCKACK), #undef FLAG }; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 51bf3c7822a7..f1c0b87fddd5 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1793,4 +1793,25 @@ static inline int drv_set_eml_op_mode(struct ieee80211_sub_if_data *sdata, return ret; } +static inline int +drv_nan_peer_sched_changed(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct sta_info *sta) +{ + int ret; + + might_sleep(); + lockdep_assert_wiphy(local->hw.wiphy); + check_sdata_in_driver(sdata); + + if (!local->ops->nan_peer_sched_changed) + return -EOPNOTSUPP; + + trace_drv_nan_peer_sched_changed(local, sdata, &sta->sta); + ret = local->ops->nan_peer_sched_changed(&local->hw, &sta->sta); + trace_drv_return_int(local, ret); + + return ret; +} + #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/eht.c b/net/mac80211/eht.c index 078e1e23d8d1..768bfc4e737d 100644 --- a/net/mac80211/eht.c +++ b/net/mac80211/eht.c @@ -108,7 +108,7 @@ static void ieee80211_send_eml_op_mode_notif(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *req, int opt_len) { - int len = offsetofend(struct ieee80211_mgmt, u.action.u.eml_omn); + int len = IEEE80211_MIN_ACTION_SIZE(eml_omn); struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; struct sk_buff *skb; @@ -127,16 +127,15 @@ ieee80211_send_eml_op_mode_notif(struct ieee80211_sub_if_data *sdata, memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT; - mgmt->u.action.u.eml_omn.action_code = - WLAN_PROTECTED_EHT_ACTION_EML_OP_MODE_NOTIF; - mgmt->u.action.u.eml_omn.dialog_token = - req->u.action.u.eml_omn.dialog_token; - mgmt->u.action.u.eml_omn.control = req->u.action.u.eml_omn.control & + mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_EML_OP_MODE_NOTIF; + mgmt->u.action.eml_omn.dialog_token = + req->u.action.eml_omn.dialog_token; + mgmt->u.action.eml_omn.control = req->u.action.eml_omn.control & ~(IEEE80211_EML_CTRL_EMLSR_PARAM_UPDATE | IEEE80211_EML_CTRL_INDEV_COEX_ACT); /* Copy optional fields from the received notification frame */ - memcpy(mgmt->u.action.u.eml_omn.variable, - req->u.action.u.eml_omn.variable, opt_len); + memcpy(mgmt->u.action.eml_omn.variable, + req->u.action.eml_omn.variable, opt_len); ieee80211_tx_skb(sdata, skb); } @@ -144,14 +143,14 @@ ieee80211_send_eml_op_mode_notif(struct ieee80211_sub_if_data *sdata, void ieee80211_rx_eml_op_mode_notif(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { - int len = offsetofend(struct ieee80211_mgmt, u.action.u.eml_omn); + int len = IEEE80211_MIN_ACTION_SIZE(eml_omn); enum nl80211_iftype type = ieee80211_vif_type_p2p(&sdata->vif); struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); const struct wiphy_iftype_ext_capab *ift_ext_capa; struct ieee80211_mgmt *mgmt = (void *)skb->data; struct ieee80211_local *local = sdata->local; - u8 control = mgmt->u.action.u.eml_omn.control; - u8 *ptr = mgmt->u.action.u.eml_omn.variable; + u8 control = mgmt->u.action.eml_omn.control; + u8 *ptr = mgmt->u.action.eml_omn.variable; struct ieee80211_eml_params eml_params = { .link_id = status->link_id, .control = control, diff --git a/net/mac80211/he.c b/net/mac80211/he.c index f7b05e59374c..a3e16a5bec22 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -108,14 +108,13 @@ static void ieee80211_he_mcs_intersection(__le16 *he_own_rx, __le16 *he_peer_rx, } void -ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, - const u8 *he_cap_ie, u8 he_cap_len, - const struct ieee80211_he_6ghz_capa *he_6ghz_capa, - struct link_sta_info *link_sta) +_ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_sta_he_cap *own_he_cap_ptr, + const u8 *he_cap_ie, u8 he_cap_len, + const struct ieee80211_he_6ghz_capa *he_6ghz_capa, + struct link_sta_info *link_sta) { struct ieee80211_sta_he_cap *he_cap = &link_sta->pub->he_cap; - const struct ieee80211_sta_he_cap *own_he_cap_ptr; struct ieee80211_sta_he_cap own_he_cap; struct ieee80211_he_cap_elem *he_cap_ie_elem = (void *)he_cap_ie; u8 he_ppe_size; @@ -125,12 +124,11 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, memset(he_cap, 0, sizeof(*he_cap)); - if (!he_cap_ie) + if (!he_cap_ie || !own_he_cap_ptr || !own_he_cap_ptr->has_he) return; - own_he_cap_ptr = - ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); - if (!own_he_cap_ptr) + /* NDI station are using the capabilities from the NMI station */ + if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_NAN_DATA)) return; own_he_cap = *own_he_cap_ptr; @@ -162,9 +160,10 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, he_cap->has_he = true; link_sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(link_sta); - link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); + if (sdata->vif.type != NL80211_IFTYPE_NAN) + link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); - if (sband->band == NL80211_BAND_6GHZ && he_6ghz_capa) + if (he_6ghz_capa) ieee80211_update_from_he_6ghz_capa(he_6ghz_capa, link_sta); ieee80211_he_mcs_intersection(&own_he_cap.he_mcs_nss_supp.rx_mcs_80, @@ -208,6 +207,23 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, } void +ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, + struct ieee80211_supported_band *sband, + const u8 *he_cap_ie, u8 he_cap_len, + const struct ieee80211_he_6ghz_capa *he_6ghz_capa, + struct link_sta_info *link_sta) +{ + const struct ieee80211_sta_he_cap *own_he_cap = + ieee80211_get_he_iftype_cap_vif(sband, &sdata->vif); + + _ieee80211_he_cap_ie_to_sta_he_cap(sdata, own_he_cap, he_cap_ie, + he_cap_len, + (sband->band == NL80211_BAND_6GHZ) ? + he_6ghz_capa : NULL, + link_sta); +} + +void ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif, const struct ieee80211_he_operation *he_op_ie) { diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 1c82a28b03de..97719298e038 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation * Copyright 2017 Intel Deutschland GmbH - * Copyright(c) 2020-2025 Intel Corporation + * Copyright(c) 2020-2026 Intel Corporation */ #include <linux/ieee80211.h> @@ -136,7 +136,7 @@ void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, + const struct ieee80211_sta_ht_cap *own_cap_ptr, const struct ieee80211_ht_cap *ht_cap_ie, struct link_sta_info *link_sta) { @@ -151,12 +151,16 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, memset(&ht_cap, 0, sizeof(ht_cap)); - if (!ht_cap_ie || !sband->ht_cap.ht_supported) + if (!ht_cap_ie || !own_cap_ptr->ht_supported) goto apply; + /* NDI station are using the capabilities from the NMI station */ + if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_NAN_DATA)) + return 0; + ht_cap.ht_supported = true; - own_cap = sband->ht_cap; + own_cap = *own_cap_ptr; /* * If user has specified capability over-rides, take care @@ -254,10 +258,17 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, rcu_read_lock(); link_conf = rcu_dereference(sdata->vif.link_conf[link_sta->link_id]); - if (WARN_ON(!link_conf)) + if (WARN_ON(!link_conf)) { width = NL80211_CHAN_WIDTH_20_NOHT; - else + } else if (sdata->vif.type == NL80211_IFTYPE_NAN || + sdata->vif.type == NL80211_IFTYPE_NAN_DATA) { + /* In NAN, link_sta->bandwidth is invalid since NAN operates on + * multiple channels. Just take the maximum. + */ + width = NL80211_CHAN_WIDTH_320; + } else { width = link_conf->chanreq.oper.width; + } switch (width) { default: @@ -285,7 +296,9 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, IEEE80211_STA_RX_BW_40 : IEEE80211_STA_RX_BW_20; if (sta->sdata->vif.type == NL80211_IFTYPE_AP || - sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) { + sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN || + sta->sdata->vif.type == NL80211_IFTYPE_NAN || + sta->sdata->vif.type == NL80211_IFTYPE_NAN_DATA) { enum ieee80211_smps_mode smps_mode; switch ((ht_cap.cap & IEEE80211_HT_CAP_SM_PS) @@ -379,7 +392,7 @@ void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work) sta->ampdu_mlme.tid_rx_manage_offl)) __ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid, IEEE80211_MAX_AMPDU_BUF_HT, - false, true, 0); + false, true, false, 0); if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS, sta->ampdu_mlme.tid_rx_manage_offl)) @@ -455,29 +468,32 @@ void ieee80211_ba_session_work(struct wiphy *wiphy, struct wiphy_work *work) void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, - u16 initiator, u16 reason_code) + u16 initiator, u16 reason_code, + bool use_ndp) { struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u16 params; - skb = dev_alloc_skb(sizeof(*mgmt) + local->hw.extra_tx_headroom); + skb = dev_alloc_skb(IEEE80211_MIN_ACTION_SIZE(delba) + + local->hw.extra_tx_headroom); if (!skb) return; skb_reserve(skb, local->hw.extra_tx_headroom); mgmt = ieee80211_mgmt_ba(skb, da, sdata); - skb_put(skb, 1 + sizeof(mgmt->u.action.u.delba)); + skb_put(skb, 2 + sizeof(mgmt->u.action.delba)); mgmt->u.action.category = WLAN_CATEGORY_BACK; - mgmt->u.action.u.delba.action_code = WLAN_ACTION_DELBA; + mgmt->u.action.action_code = use_ndp ? + WLAN_ACTION_NDP_DELBA : WLAN_ACTION_DELBA; params = (u16)(initiator << 11); /* bit 11 initiator */ params |= (u16)(tid << 12); /* bit 15:12 TID number */ - mgmt->u.action.u.delba.params = cpu_to_le16(params); - mgmt->u.action.u.delba.reason_code = cpu_to_le16(reason_code); + mgmt->u.action.delba.params = cpu_to_le16(params); + mgmt->u.action.delba.reason_code = cpu_to_le16(reason_code); ieee80211_tx_skb(sdata, skb); } @@ -489,14 +505,14 @@ void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, u16 tid, params; u16 initiator; - params = le16_to_cpu(mgmt->u.action.u.delba.params); + params = le16_to_cpu(mgmt->u.action.delba.params); tid = (params & IEEE80211_DELBA_PARAM_TID_MASK) >> 12; initiator = (params & IEEE80211_DELBA_PARAM_INITIATOR_MASK) >> 11; ht_dbg_ratelimited(sdata, "delba from %pM (%s) tid %d reason code %d\n", mgmt->sa, initiator ? "initiator" : "recipient", tid, - le16_to_cpu(mgmt->u.action.u.delba.reason_code)); + le16_to_cpu(mgmt->u.action.delba.reason_code)); if (initiator == WLAN_BACK_INITIATOR) __ieee80211_stop_rx_ba_session(sta, tid, WLAN_BACK_INITIATOR, 0, @@ -530,20 +546,20 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, struct ieee80211_tx_info *info; u8 status_link_id = link_id < 0 ? 0 : link_id; - /* 27 = header + category + action + smps mode */ - skb = dev_alloc_skb(27 + local->hw.extra_tx_headroom); + skb = dev_alloc_skb(IEEE80211_MIN_ACTION_SIZE(ht_smps) + + local->hw.extra_tx_headroom); if (!skb) return -ENOMEM; skb_reserve(skb, local->hw.extra_tx_headroom); - action_frame = skb_put(skb, 27); + action_frame = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(ht_smps)); memcpy(action_frame->da, da, ETH_ALEN); memcpy(action_frame->sa, sdata->dev->dev_addr, ETH_ALEN); memcpy(action_frame->bssid, bssid, ETH_ALEN); action_frame->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); action_frame->u.action.category = WLAN_CATEGORY_HT; - action_frame->u.action.u.ht_smps.action = WLAN_HT_ACTION_SMPS; + action_frame->u.action.action_code = WLAN_HT_ACTION_SMPS; switch (smps) { case IEEE80211_SMPS_AUTOMATIC: case IEEE80211_SMPS_NUM_MODES: @@ -551,15 +567,15 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, smps = IEEE80211_SMPS_OFF; fallthrough; case IEEE80211_SMPS_OFF: - action_frame->u.action.u.ht_smps.smps_control = + action_frame->u.action.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_DISABLED; break; case IEEE80211_SMPS_STATIC: - action_frame->u.action.u.ht_smps.smps_control = + action_frame->u.action.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_STATIC; break; case IEEE80211_SMPS_DYNAMIC: - action_frame->u.action.u.ht_smps.smps_control = + action_frame->u.action.ht_smps.smps_control = WLAN_HT_SMPS_CONTROL_DYNAMIC; break; } diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index 168f84a1353b..97292ff51475 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -9,7 +9,7 @@ * Copyright 2009, Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright(c) 2016 Intel Deutschland GmbH - * Copyright(c) 2018-2025 Intel Corporation + * Copyright(c) 2018-2026 Intel Corporation */ #include <linux/delay.h> @@ -888,19 +888,11 @@ ieee80211_rx_mgmt_spectrum_mgmt(struct ieee80211_sub_if_data *sdata, struct ieee80211_rx_status *rx_status, struct ieee802_11_elems *elems) { - int required_len; - - if (len < IEEE80211_MIN_ACTION_SIZE + 1) + if (len < IEEE80211_MIN_ACTION_SIZE(chan_switch)) return; /* CSA is the only action we handle for now */ - if (mgmt->u.action.u.measurement.action_code != - WLAN_ACTION_SPCT_CHL_SWITCH) - return; - - required_len = IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.chan_switch); - if (len < required_len) + if (mgmt->u.action.action_code != WLAN_ACTION_SPCT_CHL_SWITCH) return; if (!sdata->vif.bss_conf.csa_active) @@ -1022,7 +1014,8 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, ieee80211_chandef_ht_oper(elems->ht_operation, &chandef); memcpy(&htcap_ie, elems->ht_cap_elem, sizeof(htcap_ie)); - rates_updated |= ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, + rates_updated |= ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, + &sband->ht_cap, &htcap_ie, &sta->deflink); @@ -1041,6 +1034,7 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata, &chandef); memcpy(&cap_ie, elems->vht_cap_elem, sizeof(cap_ie)); ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, + &sband->vht_cap, &cap_ie, NULL, &sta->deflink); if (memcmp(&cap, &sta->sta.deflink.vht_cap, sizeof(cap))) @@ -1133,7 +1127,7 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata, if (ieee80211_have_rx_timestamp(rx_status)) { /* time when timestamp field was received */ rx_timestamp = - ieee80211_calculate_rx_timestamp(local, rx_status, + ieee80211_calculate_rx_timestamp(&local->hw, rx_status, len + FCS_LEN, 24); } else { /* @@ -1613,12 +1607,12 @@ void ieee80211_ibss_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, case WLAN_CATEGORY_SPECTRUM_MGMT: ies_len = skb->len - offsetof(struct ieee80211_mgmt, - u.action.u.chan_switch.variable); + u.action.chan_switch.variable); if (ies_len < 0) break; - elems = ieee802_11_parse_elems(mgmt->u.action.u.chan_switch.variable, + elems = ieee802_11_parse_elems(mgmt->u.action.chan_switch.variable, ies_len, IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION, diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index e60b814dd89e..2a693406294b 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -256,6 +256,8 @@ struct ieee80211_rx_data { u8 pn[IEEE80211_CCMP_PN_LEN]; } ccm_gcm; }; + + u8 link_addrs[3 * ETH_ALEN]; }; struct ieee80211_csa_settings { @@ -926,6 +928,9 @@ struct ieee80211_chanctx { bool radar_detected; + /* This chanctx is in process of getting used */ + bool will_be_used; + /* MUST be last - ends in a flexible-array member. */ struct ieee80211_chanctx_conf conf; }; @@ -982,16 +987,33 @@ struct ieee80211_if_mntr { * * @conf: current NAN configuration * @started: true iff NAN is started - * @func_lock: lock for @func_inst_ids - * @function_inst_ids: a bitmap of available instance_id's + * @de: Discovery Engine state (only valid if !WIPHY_NAN_FLAGS_USERSPACE_DE) + * @de.func_lock: lock for @de.function_inst_ids + * @de.function_inst_ids: a bitmap of available instance_id's + * @removed_channels: bitmap of channels that should be removed from the NAN + * schedule once the deferred schedule update is completed. */ struct ieee80211_if_nan { struct cfg80211_nan_conf conf; bool started; - /* protects function_inst_ids */ - spinlock_t func_lock; - struct idr function_inst_ids; + struct { + /* protects function_inst_ids */ + spinlock_t func_lock; + struct idr function_inst_ids; + } de; + + DECLARE_BITMAP(removed_channels, IEEE80211_NAN_MAX_CHANNELS); +}; + +/** + * struct ieee80211_if_nan_data - NAN data path state + * + * @nmi: pointer to the NAN management interface sdata. Used for data path, + * hence RCU. + */ +struct ieee80211_if_nan_data { + struct ieee80211_sub_if_data __rcu *nmi; }; struct ieee80211_link_data_managed { @@ -1192,6 +1214,7 @@ struct ieee80211_sub_if_data { struct ieee80211_if_ocb ocb; struct ieee80211_if_mntr mntr; struct ieee80211_if_nan nan; + struct ieee80211_if_nan_data nan_data; } u; struct ieee80211_link_data deflink; @@ -1917,10 +1940,6 @@ ieee80211_vif_get_num_mcast_if(struct ieee80211_sub_if_data *sdata) return -1; } -u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, - struct ieee80211_rx_status *status, - unsigned int mpdu_len, - unsigned int mpdu_offset); int ieee80211_hw_config(struct ieee80211_local *local, int radio_idx, u32 changed); int ieee80211_hw_conf_chan(struct ieee80211_local *local); @@ -2020,6 +2039,14 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata, int ieee80211_mesh_finish_csa(struct ieee80211_sub_if_data *sdata, u64 *changed); +/* NAN code */ +int ieee80211_nan_set_local_sched(struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_local_sched *sched); +int ieee80211_nan_set_peer_sched(struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_peer_sched *sched); +void ieee80211_nan_free_peer_sched(struct ieee80211_nan_peer_sched *sched); +void ieee80211_nan_update_ndi_carrier(struct ieee80211_sub_if_data *ndi_sdata); + /* scan/BSS handling */ void ieee80211_scan_work(struct wiphy *wiphy, struct wiphy_work *work); int ieee80211_request_ibss_scan(struct ieee80211_sub_if_data *sdata, @@ -2183,12 +2210,13 @@ void ieee80211_aggr_check(struct ieee80211_sub_if_data *sdata, void ieee80211_apply_htcap_overrides(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_ht_cap *ht_cap); bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, - struct ieee80211_supported_band *sband, + const struct ieee80211_sta_ht_cap *own_cap, const struct ieee80211_ht_cap *ht_cap_ie, struct link_sta_info *link_sta); void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, const u8 *da, u16 tid, - u16 initiator, u16 reason_code); + u16 initiator, u16 reason_code, + bool use_ndp); int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps, const u8 *da, const u8 *bssid, int link_id); @@ -2204,6 +2232,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, u8 dialog_token, u16 timeout, u16 start_seq_num, u16 ba_policy, u16 tid, u16 buf_size, bool tx, bool auto_seq, + bool req_ndp, const u8 addba_ext_data); void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, enum ieee80211_agg_stop_reason reason); @@ -2266,6 +2295,7 @@ void ieee80211_ht_handle_chanwidth_notif(struct ieee80211_local *local, void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, + const struct ieee80211_sta_vht_cap *own_vht_cap, const struct ieee80211_vht_cap *vht_cap_ie, const struct ieee80211_vht_cap *vht_cap_ie2, struct link_sta_info *link_sta); @@ -2306,6 +2336,12 @@ ieee80211_sta_rx_bw_to_chan_width(struct link_sta_info *sta); /* HE */ void +_ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_sta_he_cap *own_he_cap, + const u8 *he_cap_ie, u8 he_cap_len, + const struct ieee80211_he_6ghz_capa *he_6ghz_capa, + struct link_sta_info *link_sta); +void ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, const u8 *he_cap_ie, u8 he_cap_len, @@ -2329,6 +2365,8 @@ void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata, void ieee80211_s1g_cap_to_sta_s1g_cap(struct ieee80211_sub_if_data *sdata, const struct ieee80211_s1g_cap *s1g_cap_ie, struct link_sta_info *link_sta); +bool ieee80211_s1g_use_ndp_ba(const struct ieee80211_sub_if_data *sdata, + const struct sta_info *sta); /* Spectrum management */ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, @@ -2797,7 +2835,17 @@ int ieee80211_max_num_channels(struct ieee80211_local *local, int radio_idx); u32 ieee80211_get_radio_mask(struct wiphy *wiphy, struct net_device *dev); void ieee80211_recalc_chanctx_chantype(struct ieee80211_local *local, struct ieee80211_chanctx *ctx); - +struct ieee80211_chanctx * +ieee80211_find_or_create_chanctx(struct ieee80211_sub_if_data *sdata, + const struct ieee80211_chan_req *chanreq, + enum ieee80211_chanctx_mode mode, + bool assign_on_failure, + bool *reused_ctx); +void ieee80211_free_chanctx(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx, + bool skip_idle_recalc); +int ieee80211_chanctx_num_assigned(struct ieee80211_local *local, + struct ieee80211_chanctx *ctx); /* TDLS */ int ieee80211_tdls_mgmt(struct wiphy *wiphy, struct net_device *dev, const u8 *peer, int link_id, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 676b2a43c9f2..95b779c4d627 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -362,6 +362,17 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, return -EBUSY; /* + * A NAN DATA interface is correlated to the NAN + * (management) one + */ + if (iftype == NL80211_IFTYPE_NAN_DATA && + nsdata->vif.type == NL80211_IFTYPE_NAN) { + if (!nsdata->u.nan.started) + return -EINVAL; + rcu_assign_pointer(sdata->u.nan_data.nmi, nsdata); + } + + /* * Allow only a single IBSS interface to be up at any * time. This is restricted because beacon distribution * cannot work properly if both are in the same IBSS. @@ -398,13 +409,6 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, nsdata->vif.type)) return -ENOTUNIQ; - /* No support for VLAN with MLO yet */ - if (iftype == NL80211_IFTYPE_AP_VLAN && - sdata->wdev.use_4addr && - nsdata->vif.type == NL80211_IFTYPE_AP && - nsdata->vif.valid_links) - return -EOPNOTSUPP; - /* * can only add VLANs to enabled APs */ @@ -475,6 +479,7 @@ static int ieee80211_open(struct net_device *dev) static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_down) { struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *iter; unsigned long flags; struct sk_buff_head freeq; struct sk_buff *skb, *tmp; @@ -523,12 +528,14 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do * (because if we remove a STA after ops->remove_interface() * the driver will have removed the vif info already!) * - * For AP_VLANs stations may exist since there's nothing else that - * would have removed them, but in other modes there shouldn't - * be any stations. + * For AP_VLANs, NAN and NAN_DATA stations may exist since there's + * nothing else that would have removed them, but in other modes there + * shouldn't be any stations. */ flushed = sta_info_flush(sdata, -1); - WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_AP_VLAN && flushed > 0); + WARN_ON_ONCE(sdata->vif.type != NL80211_IFTYPE_AP_VLAN && + sdata->vif.type != NL80211_IFTYPE_NAN && + sdata->vif.type != NL80211_IFTYPE_NAN_DATA && flushed > 0); /* don't count this interface for allmulti while it is down */ if (sdata->flags & IEEE80211_SDATA_ALLMULTI) @@ -621,17 +628,30 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do } break; case NL80211_IFTYPE_NAN: + /* Check if any open NAN_DATA interfaces */ + list_for_each_entry(iter, &local->interfaces, list) { + WARN_ON(iter->vif.type == NL80211_IFTYPE_NAN_DATA && + ieee80211_sdata_running(iter)); + } + /* clean all the functions */ - spin_lock_bh(&sdata->u.nan.func_lock); + if (!(local->hw.wiphy->nan_capa.flags & + WIPHY_NAN_FLAGS_USERSPACE_DE)) { + spin_lock_bh(&sdata->u.nan.de.func_lock); + + idr_for_each_entry(&sdata->u.nan.de.function_inst_ids, + func, i) { + idr_remove(&sdata->u.nan.de.function_inst_ids, i); + cfg80211_free_nan_func(func); + } + idr_destroy(&sdata->u.nan.de.function_inst_ids); - idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, i) { - idr_remove(&sdata->u.nan.function_inst_ids, i); - cfg80211_free_nan_func(func); + spin_unlock_bh(&sdata->u.nan.de.func_lock); } - idr_destroy(&sdata->u.nan.function_inst_ids); - - spin_unlock_bh(&sdata->u.nan.func_lock); break; + case NL80211_IFTYPE_NAN_DATA: + RCU_INIT_POINTER(sdata->u.nan_data.nmi, NULL); + fallthrough; default: wiphy_work_cancel(sdata->local->hw.wiphy, &sdata->work); /* @@ -682,6 +702,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_do if (sdata->vif.txq) ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq)); + if (sdata->vif.txq_mgmt) + ieee80211_txq_purge(sdata->local, + to_txq_info(sdata->vif.txq_mgmt)); + sdata->bss = NULL; if (local->open_count == 0) @@ -878,6 +902,14 @@ static void ieee80211_teardown_sdata(struct ieee80211_sub_if_data *sdata) ieee80211_vif_clear_links(sdata); ieee80211_link_stop(&sdata->deflink); + + if (sdata->vif.type == NL80211_IFTYPE_NAN) { + struct ieee80211_nan_sched_cfg *nan_sched = + &sdata->vif.cfg.nan_sched; + + for (int i = 0; i < ARRAY_SIZE(nan_sched->channels); i++) + WARN_ON(nan_sched->channels[i].chanreq.oper.chan); + } } static void ieee80211_uninit(struct net_device *dev) @@ -1222,14 +1254,14 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local, } } - set_bit(SDATA_STATE_RUNNING, &sdata->state); - ret = ieee80211_check_queues(sdata, NL80211_IFTYPE_MONITOR); if (ret) { kfree(sdata); return ret; } + set_bit(SDATA_STATE_RUNNING, &sdata->state); + mutex_lock(&local->iflist_mtx); rcu_assign_pointer(local->monitor_sdata, sdata); mutex_unlock(&local->iflist_mtx); @@ -1242,6 +1274,7 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local, mutex_unlock(&local->iflist_mtx); synchronize_net(); drv_remove_interface(local, sdata); + clear_bit(SDATA_STATE_RUNNING, &sdata->state); kfree(sdata); return ret; } @@ -1360,8 +1393,6 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) break; } case NL80211_IFTYPE_AP: - sdata->bss = &sdata->u.ap; - break; case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_MONITOR: @@ -1371,6 +1402,10 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) case NL80211_IFTYPE_NAN: /* no special treatment */ break; + case NL80211_IFTYPE_NAN_DATA: + if (WARN_ON(!rcu_access_pointer(sdata->u.nan_data.nmi))) + return -ENOLINK; + break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_P2P_CLIENT: @@ -1386,8 +1421,13 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) local->reconfig_failure = false; res = drv_start(local); - if (res) - goto err_del_bss; + if (res) { + /* + * no need to worry about AP_VLAN/NAN_DATA cleanup since + * in that case we can't have open_count == 0 + */ + return res; + } ieee80211_led_radio(local, true); ieee80211_mod_tpt_led_trig(local, IEEE80211_TPT_LEDTRIG_FL_RADIO, 0); @@ -1458,6 +1498,9 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) netif_carrier_on(dev); list_add_tail_rcu(&sdata->u.mntr.list, &local->mon_list); break; + case NL80211_IFTYPE_AP: + sdata->bss = &sdata->u.ap; + fallthrough; default: if (coming_up) { ieee80211_del_virtual_monitor(local); @@ -1500,6 +1543,7 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) case NL80211_IFTYPE_AP: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_OCB: + case NL80211_IFTYPE_NAN_DATA: netif_carrier_off(dev); break; case NL80211_IFTYPE_P2P_DEVICE: @@ -1546,12 +1590,12 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) err_stop: if (!local->open_count) drv_stop(local, false); - err_del_bss: - sdata->bss = NULL; + if (sdata->vif.type == NL80211_IFTYPE_NAN_DATA) + RCU_INIT_POINTER(sdata->u.nan_data.nmi, NULL); if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) list_del(&sdata->u.vlan.list); - /* might already be clear but that doesn't matter */ - clear_bit(SDATA_STATE_RUNNING, &sdata->state); + /* Might not be initialized yet, but it is harmless */ + sdata->bss = NULL; return res; } @@ -1579,16 +1623,19 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, sta = sta_info_get_bss(sdata, mgmt->sa); if (sta) { - switch (mgmt->u.action.u.addba_req.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_ACTION_ADDBA_REQ: + case WLAN_ACTION_NDP_ADDBA_REQ: ieee80211_process_addba_request(local, sta, mgmt, len); break; case WLAN_ACTION_ADDBA_RESP: + case WLAN_ACTION_NDP_ADDBA_RESP: ieee80211_process_addba_resp(local, sta, mgmt, len); break; case WLAN_ACTION_DELBA: + case WLAN_ACTION_NDP_DELBA: ieee80211_process_delba(sdata, sta, mgmt, len); break; @@ -1599,9 +1646,9 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, } } else if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_HT) { - switch (mgmt->u.action.u.ht_smps.action) { + switch (mgmt->u.action.action_code) { case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: { - u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth; + u8 chanwidth = mgmt->u.action.ht_notify_cw.chanwidth; struct ieee80211_rx_status *status; struct link_sta_info *link_sta; struct sta_info *sta; @@ -1628,7 +1675,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, } } else if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_VHT) { - switch (mgmt->u.action.u.vht_group_notif.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_VHT_ACTION_OPMODE_NOTIF: { struct ieee80211_rx_status *status; enum nl80211_band band; @@ -1637,7 +1684,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, status = IEEE80211_SKB_RXCB(skb); band = status->band; - opmode = mgmt->u.action.u.vht_opmode_notif.operating_mode; + opmode = mgmt->u.action.vht_opmode_notif.operating_mode; sta = sta_info_get_bss(sdata, mgmt->sa); @@ -1658,7 +1705,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, } } else if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_S1G) { - switch (mgmt->u.action.u.s1g.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_S1G_TWT_TEARDOWN: case WLAN_S1G_TWT_SETUP: ieee80211_s1g_rx_twt_action(sdata, skb); @@ -1669,7 +1716,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, } else if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_PROTECTED_EHT) { if (sdata->vif.type == NL80211_IFTYPE_AP) { - switch (mgmt->u.action.u.eml_omn.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_PROTECTED_EHT_ACTION_EML_OP_MODE_NOTIF: ieee80211_rx_eml_op_mode_notif(sdata, skb); break; @@ -1677,7 +1724,7 @@ static void ieee80211_iface_process_skb(struct ieee80211_local *local, break; } } else if (sdata->vif.type == NL80211_IFTYPE_STATION) { - switch (mgmt->u.action.u.ttlm_req.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ: ieee80211_process_neg_ttlm_req(sdata, mgmt, skb->len); @@ -1765,7 +1812,7 @@ static void ieee80211_iface_process_status(struct ieee80211_sub_if_data *sdata, if (ieee80211_is_action(mgmt->frame_control) && mgmt->u.action.category == WLAN_CATEGORY_S1G) { - switch (mgmt->u.action.u.s1g.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_S1G_TWT_TEARDOWN: case WLAN_S1G_TWT_SETUP: ieee80211_s1g_status_twt_action(sdata, skb); @@ -1929,14 +1976,19 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, MONITOR_FLAG_OTHER_BSS; break; case NL80211_IFTYPE_NAN: - idr_init(&sdata->u.nan.function_inst_ids); - spin_lock_init(&sdata->u.nan.func_lock); + if (!(sdata->local->hw.wiphy->nan_capa.flags & + WIPHY_NAN_FLAGS_USERSPACE_DE)) { + idr_init(&sdata->u.nan.de.function_inst_ids); + spin_lock_init(&sdata->u.nan.de.func_lock); + } sdata->vif.bss_conf.bssid = sdata->vif.addr; break; case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_DEVICE: sdata->vif.bss_conf.bssid = sdata->vif.addr; break; + case NL80211_IFTYPE_NAN_DATA: + break; case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_WDS: case NUM_NL80211_IFTYPES: @@ -2212,10 +2264,16 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, lockdep_assert_wiphy(local->hw.wiphy); if (type == NL80211_IFTYPE_P2P_DEVICE || type == NL80211_IFTYPE_NAN) { + int size = ALIGN(sizeof(*sdata) + local->hw.vif_data_size, + sizeof(void *)); struct wireless_dev *wdev; + int txq_size = 0; + + if (type == NL80211_IFTYPE_NAN) + txq_size = sizeof(struct txq_info) + + local->hw.txq_data_size; - sdata = kzalloc(sizeof(*sdata) + local->hw.vif_data_size, - GFP_KERNEL); + sdata = kzalloc(size + txq_size, GFP_KERNEL); if (!sdata) return -ENOMEM; wdev = &sdata->wdev; @@ -2225,6 +2283,16 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, ieee80211_assign_perm_addr(local, wdev->address, type); memcpy(sdata->vif.addr, wdev->address, ETH_ALEN); ether_addr_copy(sdata->vif.bss_conf.addr, sdata->vif.addr); + + /* + * Add a management TXQ for NAN devices which includes frames + * that will only be transmitted during discovery windows (DWs) + */ + if (type == NL80211_IFTYPE_NAN) { + txqi = (struct txq_info *)((unsigned long)sdata + size); + ieee80211_txq_init(sdata, NULL, txqi, + IEEE80211_NUM_TIDS); + } } else { int size = ALIGN(sizeof(*sdata) + local->hw.vif_data_size, sizeof(void *)); @@ -2375,6 +2443,10 @@ void ieee80211_if_remove(struct ieee80211_sub_if_data *sdata) if (sdata->vif.txq) ieee80211_txq_purge(sdata->local, to_txq_info(sdata->vif.txq)); + if (sdata->vif.txq_mgmt) + ieee80211_txq_purge(sdata->local, + to_txq_info(sdata->vif.txq_mgmt)); + synchronize_rcu(); cfg80211_unregister_wdev(&sdata->wdev); diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 03bfca27d205..93e290dd783f 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -14,26 +14,38 @@ static void ieee80211_update_apvlan_links(struct ieee80211_sub_if_data *sdata) { + unsigned long rem = ~sdata->vif.valid_links & + GENMASK(IEEE80211_MLD_MAX_NUM_LINKS - 1, 0); + struct ieee80211_local *local = sdata->local; + unsigned long add = sdata->vif.valid_links; + struct wiphy *wiphy = local->hw.wiphy; struct ieee80211_sub_if_data *vlan; struct ieee80211_link_data *link; - u16 ap_bss_links = sdata->vif.valid_links; - u16 new_links, vlan_links; - unsigned long add; + struct sta_info *sta; list_for_each_entry(vlan, &sdata->u.ap.vlans, u.vlan.list) { int link_id; - /* No support for 4addr with MLO yet */ - if (vlan->wdev.use_4addr) - return; + if (vlan->wdev.use_4addr) { + sta = wiphy_dereference(wiphy, + vlan->u.vlan.sta); + if (sta) + add = add & sta->sta.valid_links; + } - vlan_links = vlan->vif.valid_links; + if (add == vlan->vif.valid_links) + continue; - new_links = ap_bss_links; + for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { + vlan->wdev.valid_links |= BIT(link_id); + ether_addr_copy(vlan->wdev.links[link_id].addr, + sdata->wdev.links[link_id].addr); + } - add = new_links & ~vlan_links; - if (!add) - continue; + for_each_set_bit(link_id, &rem, IEEE80211_MLD_MAX_NUM_LINKS) { + vlan->wdev.valid_links &= ~BIT(link_id); + eth_zero_addr(vlan->wdev.links[link_id].addr); + } ieee80211_vif_set_links(vlan, add, 0); @@ -96,8 +108,13 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata, ap_bss = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); - ap_bss_conf = sdata_dereference(ap_bss->vif.link_conf[link_id], - ap_bss); + + if (deflink) + ap_bss_conf = &ap_bss->vif.bss_conf; + else + ap_bss_conf = sdata_dereference(ap_bss->vif.link_conf[link_id], + ap_bss); + memcpy(link_conf, ap_bss_conf, sizeof(*link_conf)); } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 616f86b1a7e4..f47dd58770ad 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -915,6 +915,7 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len, wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_TXQS); wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_RRM); + wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_IEEE8021X_AUTH); wiphy->bss_priv_size = sizeof(struct ieee80211_bss); @@ -1117,6 +1118,19 @@ ieee80211_ifcomb_check(const struct ieee80211_iface_combination *c, int n_comb) return true; } +static void ieee80211_create_default_chandef(struct cfg80211_chan_def *chandef, + struct ieee80211_channel *chan) +{ + *chandef = (struct cfg80211_chan_def) { + .chan = chan, + .width = chan->band == NL80211_BAND_S1GHZ ? + NL80211_CHAN_WIDTH_1 : + NL80211_CHAN_WIDTH_20_NOHT, + .center_freq1 = chan->center_freq, + .freq1_offset = chan->freq_offset, + }; +} + int ieee80211_register_hw(struct ieee80211_hw *hw) { struct ieee80211_local *local = hw_to_local(hw); @@ -1143,7 +1157,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) if (WARN_ON(local->hw.wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN) && - (!local->ops->start_nan || !local->ops->stop_nan))) + ((!local->ops->start_nan || !local->ops->stop_nan) || + (local->hw.wiphy->nan_capa.flags & WIPHY_NAN_FLAGS_USERSPACE_DE && + (local->ops->add_nan_func || local->ops->del_nan_func))))) return -EINVAL; if (hw->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO) { @@ -1260,9 +1276,8 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) /* if none found then use the first anyway */ if (i == sband->n_channels) i = 0; - cfg80211_chandef_create(&dflt_chandef, - &sband->channels[i], - NL80211_CHAN_NO_HT); + ieee80211_create_default_chandef(&dflt_chandef, + &sband->channels[i]); /* init channel we're on */ local->monitor_chanreq.oper = dflt_chandef; if (local->emulate_chanctx) { @@ -1597,6 +1612,15 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) local->sband_allocated |= BIT(band); } + /* + * mac80211 supports EPPKE, if the driver supports (Re)Association + * frame encryption + */ + if (wiphy_ext_feature_isset(local->hw.wiphy, + NL80211_EXT_FEATURE_ASSOC_FRAME_ENCRYPTION)) + wiphy_ext_feature_set(local->hw.wiphy, + NL80211_EXT_FEATURE_EPPKE); + result = wiphy_register(local->hw.wiphy); if (result < 0) goto fail_wiphy_register; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 8fdbdf9ba2a9..04578447df9b 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2018 - 2025 Intel Corporation + * Copyright (C) 2018-2026 Intel Corporation * Authors: Luis Carlos Cobo <luisca@cozybit.com> * Javier Cardona <javier@cozybit.com> */ @@ -19,8 +19,7 @@ static struct kmem_cache *rm_cache; bool mesh_action_is_path_sel(struct ieee80211_mgmt *mgmt) { - return (mgmt->u.action.u.mesh_action.action_code == - WLAN_MESH_ACTION_HWMP_PATH_SELECTION); + return mgmt->u.action.action_code == WLAN_MESH_ACTION_HWMP_PATH_SELECTION; } void ieee80211s_init(void) @@ -1621,13 +1620,12 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, size_t baselen; u8 *pos; - if (mgmt->u.action.u.measurement.action_code != - WLAN_ACTION_SPCT_CHL_SWITCH) + if (mgmt->u.action.action_code != WLAN_ACTION_SPCT_CHL_SWITCH) return; - pos = mgmt->u.action.u.chan_switch.variable; + pos = mgmt->u.action.chan_switch.variable; baselen = offsetof(struct ieee80211_mgmt, - u.action.u.chan_switch.variable); + u.action.chan_switch.variable); elems = ieee802_11_parse_elems(pos, len - baselen, IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION, @@ -1673,7 +1671,7 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata, { switch (mgmt->u.action.category) { case WLAN_CATEGORY_SELF_PROTECTED: - switch (mgmt->u.action.u.self_prot.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_SP_MESH_PEERING_OPEN: case WLAN_SP_MESH_PEERING_CLOSE: case WLAN_SP_MESH_PEERING_CONFIRM: diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index 98d5aaa36d00..9d89ebcce1c1 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2019, 2021-2023, 2025 Intel Corporation + * Copyright (C) 2019, 2021-2023, 2025-2026 Intel Corporation * Author: Luis Carlos Cobo <luisca@cozybit.com> */ @@ -105,12 +105,11 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, u32 lifetime, u32 metric, u32 preq_id, struct ieee80211_sub_if_data *sdata) { + int hdr_len = IEEE80211_MIN_ACTION_SIZE(mesh_action); struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u8 *pos, ie_len; - int hdr_len = offsetofend(struct ieee80211_mgmt, - u.action.u.mesh_action); skb = dev_alloc_skb(local->tx_headroom + hdr_len + @@ -127,8 +126,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, /* BSSID == SA */ memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_MESH_ACTION; - mgmt->u.action.u.mesh_action.action_code = - WLAN_MESH_ACTION_HWMP_PATH_SELECTION; + mgmt->u.action.action_code = WLAN_MESH_ACTION_HWMP_PATH_SELECTION; switch (action) { case MPATH_PREQ: @@ -237,13 +235,12 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, u8 ttl, const u8 *target, u32 target_sn, u16 target_rcode, const u8 *ra) { + int hdr_len = IEEE80211_MIN_ACTION_SIZE(mesh_action); struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_mgmt *mgmt; u8 *pos, ie_len; - int hdr_len = offsetofend(struct ieee80211_mgmt, - u.action.u.mesh_action); if (time_before(jiffies, ifmsh->next_perr)) return -EAGAIN; @@ -265,8 +262,7 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, /* BSSID == SA */ memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_MESH_ACTION; - mgmt->u.action.u.mesh_action.action_code = - WLAN_MESH_ACTION_HWMP_PATH_SELECTION; + mgmt->u.action.action_code = WLAN_MESH_ACTION_HWMP_PATH_SELECTION; ie_len = 15; pos = skb_put(skb, 2 + ie_len); *pos++ = WLAN_EID_PERR; @@ -938,7 +934,7 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, struct sta_info *sta; /* need action_code */ - if (len < IEEE80211_MIN_ACTION_SIZE + 1) + if (len < IEEE80211_MIN_ACTION_SIZE(mesh_action)) return; rcu_read_lock(); @@ -949,8 +945,8 @@ void mesh_rx_path_sel_frame(struct ieee80211_sub_if_data *sdata, } rcu_read_unlock(); - baselen = (u8 *) mgmt->u.action.u.mesh_action.variable - (u8 *) mgmt; - elems = ieee802_11_parse_elems(mgmt->u.action.u.mesh_action.variable, + baselen = mgmt->u.action.mesh_action.variable - (u8 *)mgmt; + elems = ieee802_11_parse_elems(mgmt->u.action.mesh_action.variable, len - baselen, IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION, diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index 04c931cd2063..7cbab90c8784 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (c) 2008, 2009 open80211s Ltd. - * Copyright (C) 2019, 2021-2025 Intel Corporation + * Copyright (C) 2019, 2021-2026 Intel Corporation * Author: Luis Carlos Cobo <luisca@cozybit.com> */ #include <linux/gfp.h> @@ -13,7 +13,7 @@ #include "rate.h" #include "mesh.h" -#define PLINK_CNF_AID(mgmt) ((mgmt)->u.action.u.self_prot.variable + 2) +#define PLINK_CNF_AID(mgmt) ((mgmt)->u.action.self_prot.variable + 2) #define PLINK_GET_LLID(p) (p + 2) #define PLINK_GET_PLID(p) (p + 4) @@ -215,6 +215,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, enum ieee80211_self_protected_actioncode action, u8 *da, u16 llid, u16 plid, u16 reason) { + int hdr_len = IEEE80211_MIN_ACTION_SIZE(self_prot); struct ieee80211_local *local = sdata->local; struct sk_buff *skb; struct ieee80211_tx_info *info; @@ -223,7 +224,6 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, u16 peering_proto = 0; u8 *pos, ie_len = 4; u8 ie_len_he_cap, ie_len_eht_cap; - int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot); int err = -ENOMEM; ie_len_he_cap = ieee80211_ie_len_he_cap(sdata); @@ -260,7 +260,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_SELF_PROTECTED; - mgmt->u.action.u.self_prot.action_code = action; + mgmt->u.action.action_code = action; if (action != WLAN_SP_MESH_PEERING_CLOSE) { struct ieee80211_supported_band *sband; @@ -450,12 +450,13 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata, changed |= IEEE80211_RC_SUPP_RATES_CHANGED; sta->sta.deflink.supp_rates[sband->band] = rates; - if (ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, + if (ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, &sband->ht_cap, elems->ht_cap_elem, &sta->deflink)) changed |= IEEE80211_RC_BW_CHANGED; ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, + &sband->vht_cap, elems->vht_cap_elem, NULL, &sta->deflink); @@ -711,7 +712,7 @@ void mesh_plink_timer(struct timer_list *t) "Mesh plink for %pM (retry, timeout): %d %d\n", sta->sta.addr, sta->mesh->plink_retries, sta->mesh->plink_timeout); - get_random_bytes(&rand, sizeof(u32)); + rand = get_random_u32(); sta->mesh->plink_timeout = sta->mesh->plink_timeout + rand % sta->mesh->plink_timeout; ++sta->mesh->plink_retries; @@ -1141,7 +1142,7 @@ mesh_process_plink_frame(struct ieee80211_sub_if_data *sdata, return; } - ftype = mgmt->u.action.u.self_prot.action_code; + ftype = mgmt->u.action.action_code; if ((ftype == WLAN_SP_MESH_PEERING_OPEN && ie_len != 4) || (ftype == WLAN_SP_MESH_PEERING_CONFIRM && ie_len != 6) || (ftype == WLAN_SP_MESH_PEERING_CLOSE && ie_len != 6 @@ -1224,8 +1225,8 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, size_t baselen; u8 *baseaddr; - /* need action_code, aux */ - if (len < IEEE80211_MIN_ACTION_SIZE + 3) + /* need aux */ + if (len < IEEE80211_MIN_ACTION_SIZE(self_prot) + 1) return; if (sdata->u.mesh.user_mpm) @@ -1238,10 +1239,9 @@ void mesh_rx_plink_frame(struct ieee80211_sub_if_data *sdata, return; } - baseaddr = mgmt->u.action.u.self_prot.variable; - baselen = (u8 *) mgmt->u.action.u.self_prot.variable - (u8 *) mgmt; - if (mgmt->u.action.u.self_prot.action_code == - WLAN_SP_MESH_PEERING_CONFIRM) { + baseaddr = mgmt->u.action.self_prot.variable; + baselen = mgmt->u.action.self_prot.variable - (u8 *)mgmt; + if (mgmt->u.action.action_code == WLAN_SP_MESH_PEERING_CONFIRM) { baseaddr += 4; baselen += 4; diff --git a/net/mac80211/mesh_sync.c b/net/mac80211/mesh_sync.c index 3a66b4cefca7..24a68eef7db8 100644 --- a/net/mac80211/mesh_sync.c +++ b/net/mac80211/mesh_sync.c @@ -103,7 +103,7 @@ mesh_sync_offset_rx_bcn_presp(struct ieee80211_sub_if_data *sdata, u16 stype, * section. */ if (ieee80211_have_rx_timestamp(rx_status)) - t_r = ieee80211_calculate_rx_timestamp(local, rx_status, + t_r = ieee80211_calculate_rx_timestamp(&local->hw, rx_status, len + FCS_LEN, 24); else t_r = drv_get_tsf(local, sdata); diff --git a/net/mac80211/michael.h b/net/mac80211/michael.h deleted file mode 100644 index a7fdb8e84615..000000000000 --- a/net/mac80211/michael.h +++ /dev/null @@ -1,22 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Michael MIC implementation - optimized for TKIP MIC operations - * Copyright 2002-2003, Instant802 Networks, Inc. - */ - -#ifndef MICHAEL_H -#define MICHAEL_H - -#include <linux/types.h> -#include <linux/ieee80211.h> - -#define MICHAEL_MIC_LEN 8 - -struct michael_mic_ctx { - u32 l, r; -}; - -void michael_mic(const u8 *key, struct ieee80211_hdr *hdr, - const u8 *data, size_t data_len, u8 *mic); - -#endif /* MICHAEL_H */ diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 810bea1aacc5..160ae65a5c64 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -216,6 +216,24 @@ ieee80211_determine_ap_chan(struct ieee80211_sub_if_data *sdata, return IEEE80211_CONN_MODE_LEGACY; } + if (eht_oper && ieee80211_hw_check(&sdata->local->hw, STRICT)) { + struct cfg80211_chan_def he_chandef = *chandef; + + if (!ieee80211_chandef_he_6ghz_oper(sdata->local, + he_oper, NULL, + &he_chandef)) { + sdata_info(sdata, + "bad HE operation in EHT AP\n"); + return IEEE80211_CONN_MODE_LEGACY; + } + + if (!cfg80211_chandef_compatible(chandef, + &he_chandef)) { + sdata_info(sdata, "HE/EHT incompatible\n"); + return IEEE80211_CONN_MODE_LEGACY; + } + } + if (mode <= IEEE80211_CONN_MODE_EHT) return mode; goto check_uhr; @@ -2496,9 +2514,9 @@ void ieee80211_send_4addr_nullfunc(struct ieee80211_local *local, fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC | IEEE80211_FCTL_FROMDS | IEEE80211_FCTL_TODS); nullfunc->frame_control = fc; - memcpy(nullfunc->addr1, sdata->deflink.u.mgd.bssid, ETH_ALEN); + memcpy(nullfunc->addr1, sdata->vif.cfg.ap_addr, ETH_ALEN); memcpy(nullfunc->addr2, sdata->vif.addr, ETH_ALEN); - memcpy(nullfunc->addr3, sdata->deflink.u.mgd.bssid, ETH_ALEN); + memcpy(nullfunc->addr3, sdata->vif.cfg.ap_addr, ETH_ALEN); memcpy(nullfunc->addr4, sdata->vif.addr, ETH_ALEN); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT; @@ -4920,7 +4938,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - u16 auth_alg, auth_transaction, status_code; + u16 auth_alg, auth_transaction, status_code, encap_len; struct ieee80211_event event = { .type = MLME_EVENT, .u.mlme.data = AUTH_EVENT, @@ -4929,6 +4947,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, .subtype = IEEE80211_STYPE_AUTH, }; bool sae_need_confirm = false; + bool auth_fail = false; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -4945,6 +4964,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, auth_transaction = le16_to_cpu(mgmt->u.auth.auth_transaction); status_code = le16_to_cpu(mgmt->u.auth.status_code); + /* + * IEEE 802.1X Authentication: + * Header + Authentication Algorithm Number(2 byte) + Authentication + * Transaction Sequence Number(2 byte) + Status Code(2 byte) + + * Encapsulation Length(2 byte). + */ + if (auth_alg == WLAN_AUTH_IEEE8021X && len < 24 + 8) + return; + info.link_id = ifmgd->auth_data->link_id; if (auth_alg != ifmgd->auth_data->algorithm || @@ -4960,7 +4988,24 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, goto notify_driver; } - if (status_code != WLAN_STATUS_SUCCESS) { + switch (auth_alg) { + case WLAN_AUTH_IEEE8021X: + if (status_code != WLAN_STATUS_SUCCESS && + status_code != WLAN_STATUS_8021X_AUTH_SUCCESS) + auth_fail = true; + + if (!auth_fail) { + /* Indicates length of encapsulated EAPOL PDU */ + encap_len = get_unaligned_le16(mgmt->u.auth.variable); + } + break; + default: + if (status_code != WLAN_STATUS_SUCCESS) + auth_fail = true; + break; + } + + if (auth_fail) { cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len); if (auth_alg == WLAN_AUTH_SAE && @@ -4997,6 +5042,7 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, case WLAN_AUTH_FILS_SK_PFS: case WLAN_AUTH_FILS_PK: case WLAN_AUTH_EPPKE: + case WLAN_AUTH_IEEE8021X: break; case WLAN_AUTH_SHARED_KEY: if (ifmgd->auth_data->expected_transaction != 4) { @@ -5017,8 +5063,37 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata, if (ifmgd->auth_data->algorithm != WLAN_AUTH_SAE || (auth_transaction == 2 && ifmgd->auth_data->expected_transaction == 2)) { - if (!ieee80211_mark_sta_auth(sdata)) - return; /* ignore frame -- wait for timeout */ + switch (ifmgd->auth_data->algorithm) { + case WLAN_AUTH_IEEE8021X: + /* + * IEEE 802.1X authentication: + * - When the full EAP handshake completes over the + * Authentication process, the responder sets the + * Status Code to WLAN_STATUS_8021X_AUTH_SUCCESS as + * specified in "IEEE P802.11bi/D4.0, 12.16.5". + * + * - In the PMKSA caching case, only two Authentication + * frames are exchanged if the responder (e.g., AP) + * identifies a valid PMKSA, then as specified in + * "IEEE P802.11bi/D4.0, 12.16.8.3", the responder + * shall set the Status Code to SUCCESS in the final + * Authentication frame and must not include an + * encapsulated EAPOL PDU. + * + * Both conditions are treated as successful + * authentication, so mark the state to Authenticated. + */ + if (status_code != WLAN_STATUS_8021X_AUTH_SUCCESS && + !(status_code == WLAN_STATUS_SUCCESS && + encap_len == 0)) + break; + fallthrough; + default: + if (!ieee80211_mark_sta_auth(sdata)) + return; /* ignore frame -- wait for timeout */ + + break; + } } else if (ifmgd->auth_data->algorithm == WLAN_AUTH_SAE && auth_transaction == 1) { sae_need_confirm = true; @@ -5511,7 +5586,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, /* Set up internal HT/VHT capabilities */ if (elems->ht_cap_elem && link->u.mgd.conn.mode >= IEEE80211_CONN_MODE_HT) - ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, sband, + ieee80211_ht_cap_ie_to_sta_ht_cap(sdata, &sband->ht_cap, elems->ht_cap_elem, link_sta); @@ -5547,6 +5622,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, } ieee80211_vht_cap_ie_to_sta_vht_cap(sdata, sband, + &sband->vht_cap, elems->vht_cap_elem, bss_vht_cap, link_sta); rcu_read_unlock(); @@ -6009,7 +6085,8 @@ ieee80211_determine_our_sta_mode(struct ieee80211_sub_if_data *sdata, if (is_5ghz && !(vht_cap.cap & (IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ | - IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ))) { + IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ | + IEEE80211_VHT_CAP_EXT_NSS_BW_MASK))) { conn->bw_limit = IEEE80211_CONN_BW_LIMIT_80; mlme_link_id_dbg(sdata, link_id, "no VHT 160 MHz capability on 5 GHz, limiting to 80 MHz"); @@ -6664,7 +6741,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, sdata_info(sdata, "RX %sssocResp from %pM (capab=0x%x status=%d aid=%d)\n", reassoc ? "Rea" : "A", assoc_data->ap_addr, - capab_info, status_code, (u16)(aid & ~(BIT(15) | BIT(14)))); + capab_info, status_code, aid); ifmgd->broken_ap = false; @@ -7001,6 +7078,7 @@ static void ieee80211_ml_reconf_work(struct wiphy *wiphy, container_of(work, struct ieee80211_sub_if_data, u.mgd.ml_reconf_work.work); u16 new_valid_links, new_active_links, new_dormant_links; + struct sta_info *sta; int ret; if (!sdata->u.mgd.removed_links) @@ -7036,6 +7114,16 @@ static void ieee80211_ml_reconf_work(struct wiphy *wiphy, } } + sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr); + if (sta) { + unsigned long removed_links = sdata->u.mgd.removed_links; + unsigned int link_id; + + for_each_set_bit(link_id, &removed_links, + IEEE80211_MLD_MAX_NUM_LINKS) + ieee80211_sta_remove_link(sta, link_id); + } + new_dormant_links = sdata->vif.dormant_links & ~sdata->u.mgd.removed_links; ret = ieee80211_vif_set_links(sdata, new_valid_links, @@ -7900,7 +7988,7 @@ ieee80211_send_neg_ttlm_req(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; struct sk_buff *skb; - int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_req); + int hdr_len = IEEE80211_MIN_ACTION_SIZE(ttlm_req); int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 + 2 * 2 * IEEE80211_TTLM_NUM_TIDS; @@ -7917,9 +8005,8 @@ ieee80211_send_neg_ttlm_req(struct ieee80211_sub_if_data *sdata, memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT; - mgmt->u.action.u.ttlm_req.action_code = - WLAN_PROTECTED_EHT_ACTION_TTLM_REQ; - mgmt->u.action.u.ttlm_req.dialog_token = dialog_token; + mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_TTLM_REQ; + mgmt->u.action.ttlm_req.dialog_token = dialog_token; ieee80211_neg_ttlm_add_suggested_map(skb, neg_ttlm); ieee80211_tx_skb(sdata, skb); } @@ -7969,7 +8056,7 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; struct sk_buff *skb; - int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.ttlm_res); + int hdr_len = IEEE80211_MIN_ACTION_SIZE(ttlm_res); int ttlm_max_len = 2 + 1 + sizeof(struct ieee80211_ttlm_elem) + 1 + 2 * 2 * IEEE80211_TTLM_NUM_TIDS; u16 status_code; @@ -7987,9 +8074,8 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT; - mgmt->u.action.u.ttlm_res.action_code = - WLAN_PROTECTED_EHT_ACTION_TTLM_RES; - mgmt->u.action.u.ttlm_res.dialog_token = dialog_token; + mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_TTLM_RES; + mgmt->u.action.ttlm_res.dialog_token = dialog_token; switch (ttlm_res) { default: WARN_ON(1); @@ -8006,7 +8092,7 @@ ieee80211_send_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, break; } - mgmt->u.action.u.ttlm_res.status_code = cpu_to_le16(status_code); + mgmt->u.action.ttlm_res.status_code = cpu_to_le16(status_code); ieee80211_tx_skb(sdata, skb); } @@ -8106,10 +8192,9 @@ void ieee80211_process_neg_ttlm_req(struct ieee80211_sub_if_data *sdata, if (!ieee80211_vif_is_mld(&sdata->vif)) return; - dialog_token = mgmt->u.action.u.ttlm_req.dialog_token; - ies_len = len - offsetof(struct ieee80211_mgmt, - u.action.u.ttlm_req.variable); - elems = ieee802_11_parse_elems(mgmt->u.action.u.ttlm_req.variable, + dialog_token = mgmt->u.action.ttlm_req.dialog_token; + ies_len = len - IEEE80211_MIN_ACTION_SIZE(ttlm_req); + elems = ieee802_11_parse_elems(mgmt->u.action.ttlm_req.variable, ies_len, IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION, @@ -8160,8 +8245,7 @@ void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, size_t len) { if (!ieee80211_vif_is_mld(&sdata->vif) || - mgmt->u.action.u.ttlm_req.dialog_token != - sdata->u.mgd.dialog_token_alloc) + mgmt->u.action.ttlm_res.dialog_token != sdata->u.mgd.dialog_token_alloc) return; wiphy_delayed_work_cancel(sdata->local->hw.wiphy, @@ -8175,7 +8259,7 @@ void ieee80211_process_neg_ttlm_res(struct ieee80211_sub_if_data *sdata, * This can be better implemented in the future, to handle request * rejections. */ - if (le16_to_cpu(mgmt->u.action.u.ttlm_res.status_code) != WLAN_STATUS_SUCCESS) + if (le16_to_cpu(mgmt->u.action.ttlm_res.status_code) != WLAN_STATUS_SUCCESS) __ieee80211_disconnect(sdata); } @@ -8208,12 +8292,11 @@ static void ieee80211_teardown_ttlm_work(struct wiphy *wiphy, void ieee80211_send_teardown_neg_ttlm(struct ieee80211_vif *vif) { + int frame_len = IEEE80211_MIN_ACTION_SIZE(ttlm_tear_down); struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; struct sk_buff *skb; - int frame_len = offsetofend(struct ieee80211_mgmt, - u.action.u.ttlm_tear_down); struct ieee80211_tx_info *info; skb = dev_alloc_skb(local->hw.extra_tx_headroom + frame_len); @@ -8229,8 +8312,7 @@ void ieee80211_send_teardown_neg_ttlm(struct ieee80211_vif *vif) memcpy(mgmt->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT; - mgmt->u.action.u.ttlm_tear_down.action_code = - WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN; + mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN; info = IEEE80211_SKB_CB(skb); info->flags |= IEEE80211_TX_CTL_REQ_TX_STATUS; @@ -8313,13 +8395,13 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, case WLAN_CATEGORY_SPECTRUM_MGMT: ies_len = skb->len - offsetof(struct ieee80211_mgmt, - u.action.u.chan_switch.variable); + u.action.chan_switch.variable); if (ies_len < 0) break; /* CSA IE cannot be overridden, no need for BSSID */ - elems = ieee802_11_parse_elems(mgmt->u.action.u.chan_switch.variable, + elems = ieee802_11_parse_elems(mgmt->u.action.chan_switch.variable, ies_len, IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION, @@ -8341,7 +8423,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, case WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION: ies_len = skb->len - offsetof(struct ieee80211_mgmt, - u.action.u.ext_chan_switch.variable); + u.action.ext_chan_switch.variable); if (ies_len < 0) break; @@ -8350,7 +8432,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, * extended CSA IE can't be overridden, no need for * BSSID */ - elems = ieee802_11_parse_elems(mgmt->u.action.u.ext_chan_switch.variable, + elems = ieee802_11_parse_elems(mgmt->u.action.ext_chan_switch.variable, ies_len, IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION, @@ -8367,7 +8449,7 @@ void ieee80211_sta_rx_queued_mgmt(struct ieee80211_sub_if_data *sdata, /* for the handling code pretend it was an IE */ elems->ext_chansw_ie = - &mgmt->u.action.u.ext_chan_switch.data; + &mgmt->u.action.ext_chan_switch.data; ieee80211_sta_process_chanswitch(link, rx_status->mactime, @@ -8441,7 +8523,8 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata) return -ETIMEDOUT; } - if (auth_data->algorithm == WLAN_AUTH_SAE) + if (auth_data->algorithm == WLAN_AUTH_SAE || + auth_data->algorithm == WLAN_AUTH_EPPKE) info.duration = jiffies_to_msecs(IEEE80211_AUTH_TIMEOUT_SAE); info.link_id = auth_data->link_id; @@ -8460,6 +8543,10 @@ static int ieee80211_auth(struct ieee80211_sub_if_data *sdata) } else if (auth_data->algorithm == WLAN_AUTH_EPPKE) { trans = auth_data->trans; status = auth_data->status; + } else if (auth_data->algorithm == WLAN_AUTH_IEEE8021X) { + trans = auth_data->trans; + status = auth_data->status; + auth_data->expected_transaction = trans + 1; } if (ieee80211_hw_check(&local->hw, REPORTS_TX_ACK_STATUS)) @@ -9117,7 +9204,8 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, } if (ifmgd->auth_data && - ifmgd->auth_data->algorithm == WLAN_AUTH_EPPKE) + (ifmgd->auth_data->algorithm == WLAN_AUTH_EPPKE || + ifmgd->auth_data->algorithm == WLAN_AUTH_IEEE8021X)) new_sta->sta.epp_peer = true; new_sta->sta.mlo = mlo; @@ -9377,6 +9465,9 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, case NL80211_AUTHTYPE_EPPKE: auth_alg = WLAN_AUTH_EPPKE; break; + case NL80211_AUTHTYPE_IEEE8021X: + auth_alg = WLAN_AUTH_IEEE8021X; + break; default: return -EOPNOTSUPP; } @@ -9402,7 +9493,8 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata, if (req->auth_data_len >= 4) { if (req->auth_type == NL80211_AUTHTYPE_SAE || - req->auth_type == NL80211_AUTHTYPE_EPPKE) { + req->auth_type == NL80211_AUTHTYPE_EPPKE || + req->auth_type == NL80211_AUTHTYPE_IEEE8021X) { __le16 *pos = (__le16 *) req->auth_data; auth_data->trans = le16_to_cpu(pos[0]); @@ -9767,10 +9859,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) size += req->links[i].elems_len; - /* FIXME: no support for 4-addr MLO yet */ - if (sdata->u.mgd.use_4addr && req->link_id >= 0) - return -EOPNOTSUPP; - assoc_data = kzalloc(size, GFP_KERNEL); if (!assoc_data) return -ENOMEM; @@ -10359,28 +10447,30 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata, u8 *pos; if (!ieee80211_vif_is_mld(&sdata->vif) || - len < offsetofend(typeof(*mgmt), u.action.u.ml_reconf_resp) || - mgmt->u.action.u.ml_reconf_resp.dialog_token != - sdata->u.mgd.reconf.dialog_token || + len < IEEE80211_MIN_ACTION_SIZE(ml_reconf_resp) || + mgmt->u.action.ml_reconf_resp.dialog_token != + sdata->u.mgd.reconf.dialog_token || !sta_changed_links) return; - pos = mgmt->u.action.u.ml_reconf_resp.variable; - len -= offsetofend(typeof(*mgmt), u.action.u.ml_reconf_resp); + pos = mgmt->u.action.ml_reconf_resp.variable; + len -= offsetofend(typeof(*mgmt), u.action.ml_reconf_resp); - /* each status duple is 3 octets */ - if (len < mgmt->u.action.u.ml_reconf_resp.count * 3) { + if (len < mgmt->u.action.ml_reconf_resp.count * + sizeof(struct ieee80211_ml_reconf_status)) { sdata_info(sdata, "mlo: reconf: unexpected len=%zu, count=%u\n", - len, mgmt->u.action.u.ml_reconf_resp.count); + len, mgmt->u.action.ml_reconf_resp.count); goto disconnect; } link_mask = sta_changed_links; - for (i = 0; i < mgmt->u.action.u.ml_reconf_resp.count; i++) { - u16 status = get_unaligned_le16(pos + 1); + for (i = 0; i < mgmt->u.action.ml_reconf_resp.count; i++) { + struct ieee80211_ml_reconf_status *reconf_status = (void *)pos; + u16 status = le16_to_cpu(reconf_status->status); - link_id = *pos; + link_id = u8_get_bits(reconf_status->info, + IEEE80211_ML_RECONF_LINK_ID_MASK); if (!(link_mask & BIT(link_id))) { sdata_info(sdata, @@ -10415,8 +10505,8 @@ void ieee80211_process_ml_reconf_resp(struct ieee80211_sub_if_data *sdata, sdata->u.mgd.reconf.added_links &= ~BIT(link_id); } - pos += 3; - len -= 3; + pos += sizeof(*reconf_status); + len -= sizeof(*reconf_status); } if (link_mask) { @@ -10662,8 +10752,7 @@ ieee80211_build_ml_reconf_req(struct ieee80211_sub_if_data *sdata, return NULL; skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = skb_put_zero(skb, offsetofend(struct ieee80211_mgmt, - u.action.u.ml_reconf_req)); + mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(ml_reconf_req)); /* Add the MAC header */ mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | @@ -10674,12 +10763,11 @@ ieee80211_build_ml_reconf_req(struct ieee80211_sub_if_data *sdata, /* Add the action frame fixed fields */ mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT; - mgmt->u.action.u.ml_reconf_req.action_code = - WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_REQ; + mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_REQ; /* allocate a dialog token and store it */ sdata->u.mgd.reconf.dialog_token = ++sdata->u.mgd.dialog_token_alloc; - mgmt->u.action.u.ml_reconf_req.dialog_token = + mgmt->u.action.ml_reconf_req.dialog_token = sdata->u.mgd.reconf.dialog_token; /* Add the ML reconfiguration element and the common information */ @@ -11049,11 +11137,10 @@ static bool ieee80211_mgd_epcs_supp(struct ieee80211_sub_if_data *sdata) int ieee80211_mgd_set_epcs(struct ieee80211_sub_if_data *sdata, bool enable) { + int frame_len = IEEE80211_MIN_ACTION_SIZE(epcs) + (enable ? 1 : 0); struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; struct sk_buff *skb; - int frame_len = offsetofend(struct ieee80211_mgmt, - u.action.u.epcs) + (enable ? 1 : 0); if (!ieee80211_mgd_epcs_supp(sdata)) return -EINVAL; @@ -11082,15 +11169,15 @@ int ieee80211_mgd_set_epcs(struct ieee80211_sub_if_data *sdata, bool enable) mgmt->u.action.category = WLAN_CATEGORY_PROTECTED_EHT; if (enable) { - u8 *pos = mgmt->u.action.u.epcs.variable; + u8 *pos = mgmt->u.action.epcs.variable; - mgmt->u.action.u.epcs.action_code = + mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_REQ; *pos = ++sdata->u.mgd.dialog_token_alloc; sdata->u.mgd.epcs.dialog_token = *pos; } else { - mgmt->u.action.u.epcs.action_code = + mgmt->u.action.action_code = WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_TEARDOWN; ieee80211_epcs_teardown(sdata); @@ -11179,7 +11266,7 @@ void ieee80211_process_epcs_ena_resp(struct ieee80211_sub_if_data *sdata, return; /* Handle dialog token and status code */ - pos = mgmt->u.action.u.epcs.variable; + pos = mgmt->u.action.epcs.variable; dialog_token = *pos; status_code = get_unaligned_le16(pos + 1); @@ -11201,8 +11288,7 @@ void ieee80211_process_epcs_ena_resp(struct ieee80211_sub_if_data *sdata, return; pos += IEEE80211_EPCS_ENA_RESP_BODY_LEN; - ies_len = len - offsetof(struct ieee80211_mgmt, - u.action.u.epcs.variable) - + ies_len = len - IEEE80211_MIN_ACTION_SIZE(epcs) - IEEE80211_EPCS_ENA_RESP_BODY_LEN; elems = ieee802_11_parse_elems(pos, ies_len, diff --git a/net/mac80211/nan.c b/net/mac80211/nan.c new file mode 100644 index 000000000000..4e262b624521 --- /dev/null +++ b/net/mac80211/nan.c @@ -0,0 +1,710 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * NAN mode implementation + * Copyright(c) 2025-2026 Intel Corporation + */ +#include <net/mac80211.h> + +#include "ieee80211_i.h" +#include "driver-ops.h" +#include "sta_info.h" + +static void +ieee80211_nan_init_channel(struct ieee80211_nan_channel *nan_channel, + struct cfg80211_nan_channel *cfg_nan_channel) +{ + memset(nan_channel, 0, sizeof(*nan_channel)); + + nan_channel->chanreq.oper = cfg_nan_channel->chandef; + memcpy(nan_channel->channel_entry, cfg_nan_channel->channel_entry, + sizeof(nan_channel->channel_entry)); + nan_channel->needed_rx_chains = cfg_nan_channel->rx_nss; +} + +static void +ieee80211_nan_update_channel(struct ieee80211_local *local, + struct ieee80211_nan_channel *nan_channel, + struct cfg80211_nan_channel *cfg_nan_channel, + bool deferred) +{ + struct ieee80211_chanctx_conf *conf; + bool reducing_nss; + + if (WARN_ON(!cfg80211_chandef_identical(&nan_channel->chanreq.oper, + &cfg_nan_channel->chandef))) + return; + + if (WARN_ON(memcmp(nan_channel->channel_entry, + cfg_nan_channel->channel_entry, + sizeof(nan_channel->channel_entry)))) + return; + + if (nan_channel->needed_rx_chains == cfg_nan_channel->rx_nss) + return; + + reducing_nss = nan_channel->needed_rx_chains > cfg_nan_channel->rx_nss; + nan_channel->needed_rx_chains = cfg_nan_channel->rx_nss; + + conf = nan_channel->chanctx_conf; + + /* + * If we are adding NSSs, we need to be ready before notifying the peer, + * if we are reducing NSSs, we need to wait until the peer is notified. + */ + if (!conf || (deferred && reducing_nss)) + return; + + ieee80211_recalc_smps_chanctx(local, container_of(conf, + struct ieee80211_chanctx, + conf)); +} + +static int +ieee80211_nan_use_chanctx(struct ieee80211_sub_if_data *sdata, + struct ieee80211_nan_channel *nan_channel, + bool assign_on_failure) +{ + struct ieee80211_chanctx *ctx; + bool reused_ctx; + + if (!nan_channel->chanreq.oper.chan) + return -EINVAL; + + if (ieee80211_check_combinations(sdata, &nan_channel->chanreq.oper, + IEEE80211_CHANCTX_SHARED, 0, -1)) + return -EBUSY; + + ctx = ieee80211_find_or_create_chanctx(sdata, &nan_channel->chanreq, + IEEE80211_CHANCTX_SHARED, + assign_on_failure, + &reused_ctx); + if (IS_ERR(ctx)) + return PTR_ERR(ctx); + + nan_channel->chanctx_conf = &ctx->conf; + + /* + * In case an existing channel context is being used, we marked it as + * will_be_used, now that it is assigned - clear this indication + */ + if (reused_ctx) { + WARN_ON(!ctx->will_be_used); + ctx->will_be_used = false; + } + ieee80211_recalc_chanctx_min_def(sdata->local, ctx); + ieee80211_recalc_smps_chanctx(sdata->local, ctx); + + return 0; +} + +static void +ieee80211_nan_update_peer_channels(struct ieee80211_sub_if_data *sdata, + struct ieee80211_chanctx_conf *removed_conf) +{ + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + lockdep_assert_wiphy(local->hw.wiphy); + + list_for_each_entry(sta, &local->sta_list, list) { + struct ieee80211_nan_peer_sched *peer_sched; + int write_idx = 0; + bool updated = false; + + if (sta->sdata != sdata) + continue; + + peer_sched = sta->sta.nan_sched; + if (!peer_sched) + continue; + + /* NULL out map slots for channels being removed */ + for (int i = 0; i < peer_sched->n_channels; i++) { + if (peer_sched->channels[i].chanctx_conf != removed_conf) + continue; + + for (int m = 0; m < CFG80211_NAN_MAX_PEER_MAPS; m++) { + struct ieee80211_nan_peer_map *map = + &peer_sched->maps[m]; + + if (map->map_id == CFG80211_NAN_INVALID_MAP_ID) + continue; + + for (int s = 0; s < ARRAY_SIZE(map->slots); s++) + if (map->slots[s] == &peer_sched->channels[i]) + map->slots[s] = NULL; + } + } + + /* Compact channels array, removing those with removed_conf */ + for (int i = 0; i < peer_sched->n_channels; i++) { + if (peer_sched->channels[i].chanctx_conf == removed_conf) { + updated = true; + continue; + } + + if (write_idx != i) { + /* Update map pointers before moving */ + for (int m = 0; m < CFG80211_NAN_MAX_PEER_MAPS; m++) { + struct ieee80211_nan_peer_map *map = + &peer_sched->maps[m]; + + if (map->map_id == CFG80211_NAN_INVALID_MAP_ID) + continue; + + for (int s = 0; s < ARRAY_SIZE(map->slots); s++) + if (map->slots[s] == &peer_sched->channels[i]) + map->slots[s] = &peer_sched->channels[write_idx]; + } + + peer_sched->channels[write_idx] = peer_sched->channels[i]; + } + write_idx++; + } + + /* Clear any remaining entries at the end */ + for (int i = write_idx; i < peer_sched->n_channels; i++) + memset(&peer_sched->channels[i], 0, sizeof(peer_sched->channels[i])); + + peer_sched->n_channels = write_idx; + + if (updated) + drv_nan_peer_sched_changed(local, sdata, sta); + } +} + +static void +ieee80211_nan_remove_channel(struct ieee80211_sub_if_data *sdata, + struct ieee80211_nan_channel *nan_channel) +{ + struct ieee80211_chanctx_conf *conf; + struct ieee80211_chanctx *ctx; + struct ieee80211_nan_sched_cfg *sched_cfg = &sdata->vif.cfg.nan_sched; + + if (WARN_ON(!nan_channel)) + return; + + lockdep_assert_wiphy(sdata->local->hw.wiphy); + + if (!nan_channel->chanreq.oper.chan) + return; + + for (int slot = 0; slot < ARRAY_SIZE(sched_cfg->schedule); slot++) + if (sched_cfg->schedule[slot] == nan_channel) + sched_cfg->schedule[slot] = NULL; + + conf = nan_channel->chanctx_conf; + + /* If any peer nan schedule uses this chanctx, update them */ + if (conf) + ieee80211_nan_update_peer_channels(sdata, conf); + + memset(nan_channel, 0, sizeof(*nan_channel)); + + /* Update the driver before (possibly) releasing the channel context */ + drv_vif_cfg_changed(sdata->local, sdata, BSS_CHANGED_NAN_LOCAL_SCHED); + + /* Channel might not have a chanctx if it was ULWed */ + if (!conf) + return; + + ctx = container_of(conf, struct ieee80211_chanctx, conf); + + if (ieee80211_chanctx_num_assigned(sdata->local, ctx) > 0) { + ieee80211_recalc_chanctx_chantype(sdata->local, ctx); + ieee80211_recalc_smps_chanctx(sdata->local, ctx); + ieee80211_recalc_chanctx_min_def(sdata->local, ctx); + } + + if (ieee80211_chanctx_refcount(sdata->local, ctx) == 0) + ieee80211_free_chanctx(sdata->local, ctx, false); +} + +static void +ieee80211_nan_update_all_ndi_carriers(struct ieee80211_local *local) +{ + struct ieee80211_sub_if_data *sdata; + + lockdep_assert_wiphy(local->hw.wiphy); + + /* Iterate all interfaces and update carrier for NDI interfaces */ + list_for_each_entry(sdata, &local->interfaces, list) { + if (!ieee80211_sdata_running(sdata) || + sdata->vif.type != NL80211_IFTYPE_NAN_DATA) + continue; + + ieee80211_nan_update_ndi_carrier(sdata); + } +} + +static struct ieee80211_nan_channel * +ieee80211_nan_find_free_channel(struct ieee80211_nan_sched_cfg *sched_cfg) +{ + for (int i = 0; i < ARRAY_SIZE(sched_cfg->channels); i++) { + if (!sched_cfg->channels[i].chanreq.oper.chan) + return &sched_cfg->channels[i]; + } + + return NULL; +} + +int ieee80211_nan_set_local_sched(struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_local_sched *sched) +{ + struct ieee80211_nan_channel *sched_idx_to_chan[IEEE80211_NAN_MAX_CHANNELS] = {}; + struct ieee80211_nan_sched_cfg *sched_cfg = &sdata->vif.cfg.nan_sched; + struct ieee80211_nan_sched_cfg backup_sched; + int ret; + + if (sched->n_channels > IEEE80211_NAN_MAX_CHANNELS) + return -EOPNOTSUPP; + + if (sched->nan_avail_blob_len > IEEE80211_NAN_AVAIL_BLOB_MAX_LEN) + return -EINVAL; + + /* + * If a deferred schedule update is pending completion, new updates are + * not allowed. Only allow to configure an empty schedule so NAN can be + * stopped in the middle of a deferred update. This is fine because + * empty schedule means the local NAN device will not be available for + * peers anymore so there is no need to update peers about a new + * schedule. + */ + if (WARN_ON(sched_cfg->deferred && sched->n_channels)) + return -EBUSY; + + bitmap_zero(sdata->u.nan.removed_channels, IEEE80211_NAN_MAX_CHANNELS); + + memcpy(backup_sched.schedule, sched_cfg->schedule, + sizeof(backup_sched.schedule)); + memcpy(backup_sched.channels, sched_cfg->channels, + sizeof(backup_sched.channels)); + memcpy(backup_sched.avail_blob, sched_cfg->avail_blob, + sizeof(backup_sched.avail_blob)); + backup_sched.avail_blob_len = sched_cfg->avail_blob_len; + + memcpy(sched_cfg->avail_blob, sched->nan_avail_blob, + sched->nan_avail_blob_len); + sched_cfg->avail_blob_len = sched->nan_avail_blob_len; + + /* + * Remove channels that are no longer in the new schedule to free up + * resources before adding new channels. For deferred schedule, channels + * will be removed when the schedule is applied. + * Create a mapping from sched index to sched_cfg channel + */ + for (int i = 0; i < ARRAY_SIZE(sched_cfg->channels); i++) { + bool still_needed = false; + + if (!sched_cfg->channels[i].chanreq.oper.chan) + continue; + + for (int j = 0; j < sched->n_channels; j++) { + if (cfg80211_chandef_identical(&sched_cfg->channels[i].chanreq.oper, + &sched->nan_channels[j].chandef)) { + sched_idx_to_chan[j] = + &sched_cfg->channels[i]; + still_needed = true; + break; + } + } + + if (!still_needed) { + __set_bit(i, sdata->u.nan.removed_channels); + if (!sched->deferred) + ieee80211_nan_remove_channel(sdata, + &sched_cfg->channels[i]); + } + } + + for (int i = 0; i < sched->n_channels; i++) { + struct ieee80211_nan_channel *chan = sched_idx_to_chan[i]; + + if (chan) { + ieee80211_nan_update_channel(sdata->local, chan, + &sched->nan_channels[i], + sched->deferred); + } else { + chan = ieee80211_nan_find_free_channel(sched_cfg); + if (WARN_ON(!chan)) { + ret = -EINVAL; + goto err; + } + + sched_idx_to_chan[i] = chan; + ieee80211_nan_init_channel(chan, + &sched->nan_channels[i]); + + ret = ieee80211_nan_use_chanctx(sdata, chan, false); + if (ret) { + memset(chan, 0, sizeof(*chan)); + goto err; + } + } + } + + for (int s = 0; s < ARRAY_SIZE(sched_cfg->schedule); s++) { + if (sched->schedule[s] < ARRAY_SIZE(sched_idx_to_chan)) + sched_cfg->schedule[s] = + sched_idx_to_chan[sched->schedule[s]]; + else + sched_cfg->schedule[s] = NULL; + } + + sched_cfg->deferred = sched->deferred; + + drv_vif_cfg_changed(sdata->local, sdata, BSS_CHANGED_NAN_LOCAL_SCHED); + + /* + * For deferred update, don't update NDI carriers yet as the new + * schedule is not yet applied so common slots don't change. The NDI + * carrier will be updated once the driver notifies the new schedule is + * applied. + */ + if (sched_cfg->deferred) + return 0; + + ieee80211_nan_update_all_ndi_carriers(sdata->local); + bitmap_zero(sdata->u.nan.removed_channels, IEEE80211_NAN_MAX_CHANNELS); + + return 0; +err: + /* Remove newly added channels */ + for (int i = 0; i < ARRAY_SIZE(sched_cfg->channels); i++) { + struct cfg80211_chan_def *chan_def = + &sched_cfg->channels[i].chanreq.oper; + + if (!chan_def->chan) + continue; + + if (!cfg80211_chandef_identical(&backup_sched.channels[i].chanreq.oper, + chan_def)) + ieee80211_nan_remove_channel(sdata, + &sched_cfg->channels[i]); + } + + /* Re-add all backed up channels */ + for (int i = 0; i < ARRAY_SIZE(backup_sched.channels); i++) { + struct ieee80211_nan_channel *chan = &sched_cfg->channels[i]; + + *chan = backup_sched.channels[i]; + + /* + * For deferred update, no channels were removed and the channel + * context didn't change, so nothing else to do. + */ + if (!chan->chanctx_conf || sched->deferred) + continue; + + if (test_bit(i, sdata->u.nan.removed_channels)) { + /* Clear the stale chanctx pointer */ + chan->chanctx_conf = NULL; + /* + * We removed the newly added channels so we don't lack + * resources. So the only reason that this would fail + * is a FW error which we ignore. Therefore, this + * should never fail. + */ + WARN_ON(ieee80211_nan_use_chanctx(sdata, chan, true)); + } else { + struct ieee80211_chanctx_conf *conf = chan->chanctx_conf; + + /* FIXME: detect no-op? */ + /* Channel was not removed but may have been updated */ + ieee80211_recalc_smps_chanctx(sdata->local, + container_of(conf, + struct ieee80211_chanctx, + conf)); + } + } + + memcpy(sched_cfg->schedule, backup_sched.schedule, + sizeof(backup_sched.schedule)); + memcpy(sched_cfg->avail_blob, backup_sched.avail_blob, + sizeof(backup_sched.avail_blob)); + sched_cfg->avail_blob_len = backup_sched.avail_blob_len; + sched_cfg->deferred = false; + bitmap_zero(sdata->u.nan.removed_channels, IEEE80211_NAN_MAX_CHANNELS); + + drv_vif_cfg_changed(sdata->local, sdata, BSS_CHANGED_NAN_LOCAL_SCHED); + ieee80211_nan_update_all_ndi_carriers(sdata->local); + return ret; +} + +void ieee80211_nan_sched_update_done(struct ieee80211_vif *vif) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_nan_sched_cfg *sched_cfg = &vif->cfg.nan_sched; + unsigned int i; + + lockdep_assert_wiphy(sdata->local->hw.wiphy); + + if (WARN_ON(!sched_cfg->deferred)) + return; + + ieee80211_nan_update_all_ndi_carriers(sdata->local); + + /* + * Clear the deferred flag before removing channels. Removing channels + * will trigger another schedule update to the driver, and there is no + * need for this update to be deferred since removed channels are not + * part of the schedule anymore, so no need to notify peers about + * removing them. + */ + sched_cfg->deferred = false; + + for (i = 0; i < ARRAY_SIZE(sched_cfg->channels); i++) { + struct ieee80211_nan_channel *chan = &sched_cfg->channels[i]; + struct ieee80211_chanctx_conf *conf = chan->chanctx_conf; + + if (!chan->chanreq.oper.chan) + continue; + + if (test_bit(i, sdata->u.nan.removed_channels)) + ieee80211_nan_remove_channel(sdata, chan); + else if (conf) + /* + * We might have called this already for some channels, + * but this knows to handle a no-op. + */ + ieee80211_recalc_smps_chanctx(sdata->local, + container_of(conf, + struct ieee80211_chanctx, + conf)); + } + + bitmap_zero(sdata->u.nan.removed_channels, IEEE80211_NAN_MAX_CHANNELS); + cfg80211_nan_sched_update_done(ieee80211_vif_to_wdev(vif), true, + GFP_KERNEL); +} +EXPORT_SYMBOL(ieee80211_nan_sched_update_done); + +void ieee80211_nan_free_peer_sched(struct ieee80211_nan_peer_sched *sched) +{ + if (!sched) + return; + + kfree(sched->init_ulw); + kfree(sched); +} + +static int +ieee80211_nan_init_peer_channel(struct ieee80211_sub_if_data *sdata, + const struct sta_info *sta, + const struct cfg80211_nan_channel *cfg_chan, + struct ieee80211_nan_channel *new_chan) +{ + struct ieee80211_nan_sched_cfg *sched_cfg = &sdata->vif.cfg.nan_sched; + + /* Find compatible local channel */ + for (int j = 0; j < ARRAY_SIZE(sched_cfg->channels); j++) { + struct ieee80211_nan_channel *local_chan = + &sched_cfg->channels[j]; + const struct cfg80211_chan_def *compat; + + if (!local_chan->chanreq.oper.chan) + continue; + + compat = cfg80211_chandef_compatible(&local_chan->chanreq.oper, + &cfg_chan->chandef); + if (!compat) + continue; + + /* compat is the wider chandef, and we want the narrower one */ + new_chan->chanreq.oper = compat == &local_chan->chanreq.oper ? + cfg_chan->chandef : local_chan->chanreq.oper; + new_chan->needed_rx_chains = min(local_chan->needed_rx_chains, + cfg_chan->rx_nss); + new_chan->chanctx_conf = local_chan->chanctx_conf; + + break; + } + + /* + * nl80211 already validated that each peer channel is compatible + * with at least one local channel, so this should never happen. + */ + if (WARN_ON(!new_chan->chanreq.oper.chan)) + return -EINVAL; + + memcpy(new_chan->channel_entry, cfg_chan->channel_entry, + sizeof(new_chan->channel_entry)); + + return 0; +} + +static void +ieee80211_nan_init_peer_map(struct ieee80211_nan_peer_sched *peer_sched, + const struct cfg80211_nan_peer_map *cfg_map, + struct ieee80211_nan_peer_map *new_map) +{ + new_map->map_id = cfg_map->map_id; + + if (new_map->map_id == CFG80211_NAN_INVALID_MAP_ID) + return; + + /* Set up the slots array */ + for (int slot = 0; slot < ARRAY_SIZE(new_map->slots); slot++) { + u8 chan_idx = cfg_map->schedule[slot]; + + if (chan_idx < peer_sched->n_channels) + new_map->slots[slot] = &peer_sched->channels[chan_idx]; + } +} + +/* + * Check if the local schedule and a peer schedule have at least one common + * slot - a slot where both schedules are active on compatible channels. + */ +static bool +ieee80211_nan_has_common_slots(struct ieee80211_sub_if_data *sdata, + struct ieee80211_nan_peer_sched *peer_sched) +{ + for (int slot = 0; slot < CFG80211_NAN_SCHED_NUM_TIME_SLOTS; slot++) { + struct ieee80211_nan_channel *local_chan = + sdata->vif.cfg.nan_sched.schedule[slot]; + + if (!local_chan || !local_chan->chanctx_conf) + continue; + + /* Check all peer maps for this slot */ + for (int m = 0; m < CFG80211_NAN_MAX_PEER_MAPS; m++) { + struct ieee80211_nan_peer_map *map = &peer_sched->maps[m]; + struct ieee80211_nan_channel *peer_chan; + + if (map->map_id == CFG80211_NAN_INVALID_MAP_ID) + continue; + + peer_chan = map->slots[slot]; + if (!peer_chan) + continue; + + if (local_chan->chanctx_conf == peer_chan->chanctx_conf) + return true; + } + } + + return false; +} + +void ieee80211_nan_update_ndi_carrier(struct ieee80211_sub_if_data *ndi_sdata) +{ + struct ieee80211_local *local = ndi_sdata->local; + struct ieee80211_sub_if_data *nmi_sdata; + struct sta_info *sta; + + lockdep_assert_wiphy(local->hw.wiphy); + + if (WARN_ON(ndi_sdata->vif.type != NL80211_IFTYPE_NAN_DATA || + !ndi_sdata->dev) || !ieee80211_sdata_running(ndi_sdata)) + return; + + nmi_sdata = wiphy_dereference(local->hw.wiphy, ndi_sdata->u.nan_data.nmi); + if (WARN_ON(!nmi_sdata)) + return; + + list_for_each_entry(sta, &local->sta_list, list) { + struct ieee80211_sta *nmi_sta; + + if (sta->sdata != ndi_sdata || + !test_sta_flag(sta, WLAN_STA_AUTHORIZED)) + continue; + + nmi_sta = wiphy_dereference(local->hw.wiphy, sta->sta.nmi); + if (WARN_ON(!nmi_sta) || !nmi_sta->nan_sched) + continue; + + if (ieee80211_nan_has_common_slots(nmi_sdata, nmi_sta->nan_sched)) { + netif_carrier_on(ndi_sdata->dev); + return; + } + } + + netif_carrier_off(ndi_sdata->dev); +} + +static void +ieee80211_nan_update_peer_ndis_carrier(struct ieee80211_local *local, + struct sta_info *nmi_sta) +{ + struct sta_info *sta; + + lockdep_assert_wiphy(local->hw.wiphy); + + list_for_each_entry(sta, &local->sta_list, list) { + if (rcu_access_pointer(sta->sta.nmi) == &nmi_sta->sta) + ieee80211_nan_update_ndi_carrier(sta->sdata); + } +} + +int ieee80211_nan_set_peer_sched(struct ieee80211_sub_if_data *sdata, + struct cfg80211_nan_peer_sched *sched) +{ + struct ieee80211_nan_peer_sched *new_sched, *old_sched, *to_free; + struct sta_info *sta; + int ret; + + lockdep_assert_wiphy(sdata->local->hw.wiphy); + + if (!sdata->u.nan.started) + return -EINVAL; + + sta = sta_info_get(sdata, sched->peer_addr); + if (!sta) + return -ENOENT; + + new_sched = kzalloc(struct_size(new_sched, channels, sched->n_channels), + GFP_KERNEL); + if (!new_sched) + return -ENOMEM; + + to_free = new_sched; + + new_sched->seq_id = sched->seq_id; + new_sched->committed_dw = sched->committed_dw; + new_sched->max_chan_switch = sched->max_chan_switch; + new_sched->n_channels = sched->n_channels; + + if (sched->ulw_size && sched->init_ulw) { + new_sched->init_ulw = kmemdup(sched->init_ulw, sched->ulw_size, + GFP_KERNEL); + if (!new_sched->init_ulw) { + ret = -ENOMEM; + goto out; + } + new_sched->ulw_size = sched->ulw_size; + } + + for (int i = 0; i < sched->n_channels; i++) { + ret = ieee80211_nan_init_peer_channel(sdata, sta, + &sched->nan_channels[i], + &new_sched->channels[i]); + if (ret) + goto out; + } + + for (int m = 0; m < ARRAY_SIZE(sched->maps); m++) + ieee80211_nan_init_peer_map(new_sched, &sched->maps[m], + &new_sched->maps[m]); + + /* Install the new schedule before calling the driver */ + old_sched = sta->sta.nan_sched; + sta->sta.nan_sched = new_sched; + + ret = drv_nan_peer_sched_changed(sdata->local, sdata, sta); + if (ret) { + /* Revert to old schedule */ + sta->sta.nan_sched = old_sched; + goto out; + } + + ieee80211_nan_update_peer_ndis_carrier(sdata->local, sta); + + /* Success - free old schedule */ + to_free = old_sched; + ret = 0; + +out: + ieee80211_nan_free_peer_sched(to_free); + return ret; +} diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 62745ca00e06..b73ef3adfcc5 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1849,20 +1849,7 @@ minstrel_ht_rate_update(void *priv, struct ieee80211_supported_band *sband, static void * minstrel_ht_alloc_sta(void *priv, struct ieee80211_sta *sta, gfp_t gfp) { - struct ieee80211_supported_band *sband; - struct minstrel_ht_sta *mi; - struct minstrel_priv *mp = priv; - struct ieee80211_hw *hw = mp->hw; - int max_rates = 0; - int i; - - for (i = 0; i < NUM_NL80211_BANDS; i++) { - sband = hw->wiphy->bands[i]; - if (sband && sband->n_bitrates > max_rates) - max_rates = sband->n_bitrates; - } - - return kzalloc_obj(*mi, gfp); + return kzalloc_obj(struct minstrel_ht_sta, gfp); } static void diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 11d6c56c9d7e..3e5d1c47a5b0 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -274,7 +274,7 @@ static void ieee80211_handle_mu_mimo_mon(struct ieee80211_sub_if_data *sdata, if (!sdata) return; - BUILD_BUG_ON(sizeof(action) != IEEE80211_MIN_ACTION_SIZE + 1); + BUILD_BUG_ON(sizeof(action) != IEEE80211_MIN_ACTION_SIZE(action_code)); if (skb->len < rtap_space + sizeof(action) + VHT_MUMIMO_GROUPS_DATA_LEN) @@ -404,7 +404,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local, while ((pos - (u8 *)rthdr) & 7) *pos++ = 0; put_unaligned_le64( - ieee80211_calculate_rx_timestamp(local, status, + ieee80211_calculate_rx_timestamp(&local->hw, status, mpdulen, 0), pos); rthdr->it_present |= cpu_to_le32(BIT(IEEE80211_RADIOTAP_TSFT)); @@ -1162,7 +1162,7 @@ static ieee80211_rx_result ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx) u8 category; /* make sure category field is present */ - if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE) + if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE(category)) return RX_DROP_U_RUNT_ACTION; mgmt = (struct ieee80211_mgmt *)hdr; @@ -1475,7 +1475,9 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx, !test_and_set_bit(tid, rx->sta->ampdu_mlme.unexpected_agg)) ieee80211_send_delba(rx->sdata, rx->sta->sta.addr, tid, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_REQUIRE_SETUP); + WLAN_REASON_QSTA_REQUIRE_SETUP, + ieee80211_s1g_use_ndp_ba(rx->sdata, + rx->sta)); goto dont_reorder; } @@ -1587,6 +1589,25 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) if (ieee80211_vif_is_mesh(&rx->sdata->vif)) return ieee80211_rx_mesh_check(rx); + /* + * Wi-Fi Aware (TM) 4.0 specification 6.2.5: + * For NAN_DATA, unicast data frames must have A2 (source) + * assigned to an active NDP. If not the frame must be dropped + * and NAN Data Path termination frame should be sent. Notify + * user space so it can do so. + */ + if (rx->sdata->vif.type == NL80211_IFTYPE_NAN_DATA) { + if (ieee80211_is_data(hdr->frame_control) && + !is_multicast_ether_addr(hdr->addr1) && + (!rx->sta || !test_sta_flag(rx->sta, WLAN_STA_ASSOC))) { + if (cfg80211_rx_spurious_frame(rx->sdata->dev, hdr->addr2, + rx->link_id, GFP_ATOMIC)) + return RX_DROP_U_SPURIOUS_NOTIF; + return RX_DROP_U_SPURIOUS; + } + return RX_CONTINUE; + } + if (unlikely((ieee80211_is_data(hdr->frame_control) || ieee80211_is_pspoll(hdr->frame_control)) && rx->sdata->vif.type != NL80211_IFTYPE_ADHOC && @@ -3372,7 +3393,9 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames) !test_and_set_bit(tid, rx->sta->ampdu_mlme.unexpected_agg)) ieee80211_send_delba(rx->sdata, rx->sta->sta.addr, tid, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_REQUIRE_SETUP); + WLAN_REASON_QSTA_REQUIRE_SETUP, + ieee80211_s1g_use_ndp_ba(rx->sdata, + rx->sta)); tid_agg_rx = rcu_dereference(rx->sta->ampdu_mlme.tid_rx[tid]); if (!tid_agg_rx) @@ -3422,7 +3445,7 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, return; } - if (len < 24 + 1 + sizeof(resp->u.action.u.sa_query)) { + if (len < IEEE80211_MIN_ACTION_SIZE(sa_query)) { /* Too short SA Query request frame */ return; } @@ -3432,17 +3455,16 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, return; skb_reserve(skb, local->hw.extra_tx_headroom); - resp = skb_put_zero(skb, 24); + resp = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(sa_query)); memcpy(resp->da, sdata->vif.cfg.ap_addr, ETH_ALEN); memcpy(resp->sa, sdata->vif.addr, ETH_ALEN); memcpy(resp->bssid, sdata->vif.cfg.ap_addr, ETH_ALEN); resp->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); - skb_put(skb, 1 + sizeof(resp->u.action.u.sa_query)); resp->u.action.category = WLAN_CATEGORY_SA_QUERY; - resp->u.action.u.sa_query.action = WLAN_ACTION_SA_QUERY_RESPONSE; - memcpy(resp->u.action.u.sa_query.trans_id, - mgmt->u.action.u.sa_query.trans_id, + resp->u.action.action_code = WLAN_ACTION_SA_QUERY_RESPONSE; + memcpy(resp->u.action.sa_query.trans_id, + mgmt->u.action.sa_query.trans_id, WLAN_SA_QUERY_TR_ID_LEN); ieee80211_tx_skb(sdata, skb); @@ -3516,7 +3538,7 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) /* drop too small action frames */ if (ieee80211_is_action(mgmt->frame_control) && - rx->skb->len < IEEE80211_MIN_ACTION_SIZE) + rx->skb->len < IEEE80211_MIN_ACTION_SIZE(category)) return RX_DROP_U_RUNT_ACTION; /* Drop non-broadcast Beacon frames */ @@ -3565,29 +3587,28 @@ ieee80211_process_rx_twt_action(struct ieee80211_rx_data *rx) if (!rx->sta) return false; - switch (mgmt->u.action.u.s1g.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_S1G_TWT_SETUP: { struct ieee80211_twt_setup *twt; - if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE + - 1 + /* action code */ + if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE(action_code) + sizeof(struct ieee80211_twt_setup) + 2 /* TWT req_type agrt */) break; - twt = (void *)mgmt->u.action.u.s1g.variable; + twt = (void *)mgmt->u.action.s1g.variable; if (twt->element_id != WLAN_EID_S1G_TWT) break; - if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE + - 4 + /* action code + token + tlv */ + if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE(action_code) + + 3 + /* token + tlv */ twt->length) break; return true; /* queue the frame */ } case WLAN_S1G_TWT_TEARDOWN: - if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE + 2) + if (rx->skb->len < IEEE80211_MIN_ACTION_SIZE(action_code) + 1) break; return true; /* queue the frame */ @@ -3632,10 +3653,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) break; /* verify action & smps_control/chanwidth are present */ - if (len < IEEE80211_MIN_ACTION_SIZE + 2) + if (len < IEEE80211_MIN_ACTION_SIZE(ht_smps)) goto invalid; - switch (mgmt->u.action.u.ht_smps.action) { + switch (mgmt->u.action.action_code) { case WLAN_HT_ACTION_SMPS: { struct ieee80211_supported_band *sband; enum ieee80211_smps_mode smps_mode; @@ -3646,7 +3667,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto handled; /* convert to HT capability */ - switch (mgmt->u.action.u.ht_smps.smps_control) { + switch (mgmt->u.action.ht_smps.smps_control) { case WLAN_HT_SMPS_CONTROL_DISABLED: smps_mode = IEEE80211_SMPS_OFF; break; @@ -3679,7 +3700,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto handled; } case WLAN_HT_ACTION_NOTIFY_CHANWIDTH: { - u8 chanwidth = mgmt->u.action.u.ht_notify_cw.chanwidth; + u8 chanwidth = mgmt->u.action.ht_notify_cw.chanwidth; if (chanwidth != IEEE80211_HT_CHANWIDTH_20MHZ && chanwidth != IEEE80211_HT_CHANWIDTH_ANY) @@ -3699,7 +3720,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) break; case WLAN_CATEGORY_PUBLIC: case WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION: - if (len < IEEE80211_MIN_ACTION_SIZE + 1) + if (len < IEEE80211_MIN_ACTION_SIZE(action_code)) goto invalid; if (sdata->vif.type != NL80211_IFTYPE_STATION) break; @@ -3707,11 +3728,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) break; if (!ether_addr_equal(mgmt->bssid, sdata->deflink.u.mgd.bssid)) break; - if (mgmt->u.action.u.ext_chan_switch.action_code != + if (mgmt->u.action.action_code != WLAN_PUB_ACTION_EXT_CHANSW_ANN) break; - if (len < offsetof(struct ieee80211_mgmt, - u.action.u.ext_chan_switch.variable)) + if (len < IEEE80211_MIN_ACTION_SIZE(ext_chan_switch)) goto invalid; goto queue; case WLAN_CATEGORY_VHT: @@ -3723,18 +3743,18 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) break; /* verify action code is present */ - if (len < IEEE80211_MIN_ACTION_SIZE + 1) + if (len < IEEE80211_MIN_ACTION_SIZE(action_code)) goto invalid; - switch (mgmt->u.action.u.vht_opmode_notif.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_VHT_ACTION_OPMODE_NOTIF: { /* verify opmode is present */ - if (len < IEEE80211_MIN_ACTION_SIZE + 2) + if (len < IEEE80211_MIN_ACTION_SIZE(vht_opmode_notif)) goto invalid; goto queue; } case WLAN_VHT_ACTION_GROUPID_MGMT: { - if (len < IEEE80211_MIN_ACTION_SIZE + 25) + if (len < IEEE80211_MIN_ACTION_SIZE(vht_group_notif)) goto invalid; goto queue; } @@ -3747,27 +3767,28 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) sdata->vif.type != NL80211_IFTYPE_MESH_POINT && sdata->vif.type != NL80211_IFTYPE_AP_VLAN && sdata->vif.type != NL80211_IFTYPE_AP && - sdata->vif.type != NL80211_IFTYPE_ADHOC) + sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_NAN_DATA) break; /* verify action_code is present */ - if (len < IEEE80211_MIN_ACTION_SIZE + 1) + if (len < IEEE80211_MIN_ACTION_SIZE(action_code)) break; - switch (mgmt->u.action.u.addba_req.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_ACTION_ADDBA_REQ: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.addba_req))) + case WLAN_ACTION_NDP_ADDBA_REQ: + if (len < IEEE80211_MIN_ACTION_SIZE(addba_req)) goto invalid; break; case WLAN_ACTION_ADDBA_RESP: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.addba_resp))) + case WLAN_ACTION_NDP_ADDBA_RESP: + if (len < IEEE80211_MIN_ACTION_SIZE(addba_resp)) goto invalid; break; case WLAN_ACTION_DELBA: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.delba))) + case WLAN_ACTION_NDP_DELBA: + if (len < IEEE80211_MIN_ACTION_SIZE(delba)) goto invalid; break; default: @@ -3777,16 +3798,15 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto queue; case WLAN_CATEGORY_SPECTRUM_MGMT: /* verify action_code is present */ - if (len < IEEE80211_MIN_ACTION_SIZE + 1) + if (len < IEEE80211_MIN_ACTION_SIZE(action_code)) break; - switch (mgmt->u.action.u.measurement.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_ACTION_SPCT_MSR_REQ: if (status->band != NL80211_BAND_5GHZ) break; - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.measurement))) + if (len < IEEE80211_MIN_ACTION_SIZE(measurement)) break; if (sdata->vif.type != NL80211_IFTYPE_STATION) @@ -3796,8 +3816,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto handled; case WLAN_ACTION_SPCT_CHL_SWITCH: { u8 *bssid; - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.chan_switch))) + if (len < IEEE80211_MIN_ACTION_SIZE(chan_switch)) break; if (sdata->vif.type != NL80211_IFTYPE_STATION && @@ -3822,11 +3841,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } break; case WLAN_CATEGORY_SELF_PROTECTED: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.self_prot.action_code))) + if (len < IEEE80211_MIN_ACTION_SIZE(self_prot)) break; - switch (mgmt->u.action.u.self_prot.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_SP_MESH_PEERING_OPEN: case WLAN_SP_MESH_PEERING_CLOSE: case WLAN_SP_MESH_PEERING_CONFIRM: @@ -3844,8 +3862,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } break; case WLAN_CATEGORY_MESH_ACTION: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.mesh_action.action_code))) + if (len < IEEE80211_MIN_ACTION_SIZE(action_code)) break; if (!ieee80211_vif_is_mesh(&sdata->vif)) @@ -3855,11 +3872,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) break; goto queue; case WLAN_CATEGORY_S1G: - if (len < offsetofend(typeof(*mgmt), - u.action.u.s1g.action_code)) + if (len < IEEE80211_MIN_ACTION_SIZE(action_code)) break; - switch (mgmt->u.action.u.s1g.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_S1G_TWT_SETUP: case WLAN_S1G_TWT_TEARDOWN: if (ieee80211_process_rx_twt_action(rx)) @@ -3870,33 +3886,29 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } break; case WLAN_CATEGORY_PROTECTED_EHT: - if (len < offsetofend(typeof(*mgmt), - u.action.u.ttlm_req.action_code)) + if (len < IEEE80211_MIN_ACTION_SIZE(action_code)) break; - switch (mgmt->u.action.u.ttlm_req.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_PROTECTED_EHT_ACTION_TTLM_REQ: if (sdata->vif.type != NL80211_IFTYPE_STATION) break; - if (len < offsetofend(typeof(*mgmt), - u.action.u.ttlm_req)) + if (len < IEEE80211_MIN_ACTION_SIZE(ttlm_req)) goto invalid; goto queue; case WLAN_PROTECTED_EHT_ACTION_TTLM_RES: if (sdata->vif.type != NL80211_IFTYPE_STATION) break; - if (len < offsetofend(typeof(*mgmt), - u.action.u.ttlm_res)) + if (len < IEEE80211_MIN_ACTION_SIZE(ttlm_res)) goto invalid; goto queue; case WLAN_PROTECTED_EHT_ACTION_TTLM_TEARDOWN: if (sdata->vif.type != NL80211_IFTYPE_STATION) break; - if (len < offsetofend(typeof(*mgmt), - u.action.u.ttlm_tear_down)) + if (len < IEEE80211_MIN_ACTION_SIZE(ttlm_tear_down)) goto invalid; goto queue; case WLAN_PROTECTED_EHT_ACTION_LINK_RECONFIG_RESP: @@ -3906,34 +3918,29 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) /* The reconfiguration response action frame must * least one 'Status Duple' entry (3 octets) */ - if (len < - offsetofend(typeof(*mgmt), - u.action.u.ml_reconf_resp) + 3) + if (len < IEEE80211_MIN_ACTION_SIZE(ml_reconf_resp) + 3) goto invalid; goto queue; case WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_RESP: if (sdata->vif.type != NL80211_IFTYPE_STATION) break; - if (len < offsetofend(typeof(*mgmt), - u.action.u.epcs) + - IEEE80211_EPCS_ENA_RESP_BODY_LEN) + if (len < IEEE80211_MIN_ACTION_SIZE(epcs) + + IEEE80211_EPCS_ENA_RESP_BODY_LEN) goto invalid; goto queue; case WLAN_PROTECTED_EHT_ACTION_EPCS_ENABLE_TEARDOWN: if (sdata->vif.type != NL80211_IFTYPE_STATION) break; - if (len < offsetofend(typeof(*mgmt), - u.action.u.epcs)) + if (len < IEEE80211_MIN_ACTION_SIZE(epcs)) goto invalid; goto queue; case WLAN_PROTECTED_EHT_ACTION_EML_OP_MODE_NOTIF: if (sdata->vif.type != NL80211_IFTYPE_AP) break; - if (len < offsetofend(typeof(*mgmt), - u.action.u.eml_omn)) + if (len < IEEE80211_MIN_ACTION_SIZE(eml_omn)) goto invalid; goto queue; default: @@ -4015,11 +4022,10 @@ ieee80211_rx_h_action_post_userspace(struct ieee80211_rx_data *rx) switch (mgmt->u.action.category) { case WLAN_CATEGORY_SA_QUERY: - if (len < (IEEE80211_MIN_ACTION_SIZE + - sizeof(mgmt->u.action.u.sa_query))) + if (len < IEEE80211_MIN_ACTION_SIZE(sa_query)) break; - switch (mgmt->u.action.u.sa_query.action) { + switch (mgmt->u.action.action_code) { case WLAN_ACTION_SA_QUERY_REQUEST: if (sdata->vif.type != NL80211_IFTYPE_STATION) break; @@ -4483,6 +4489,9 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) u8 *bssid = ieee80211_get_bssid(hdr, skb->len, sdata->vif.type); bool multicast = is_multicast_ether_addr(hdr->addr1) || ieee80211_is_s1g_beacon(hdr->frame_control); + static const u8 nan_network_id[ETH_ALEN] __aligned(2) = { + 0x51, 0x6F, 0x9A, 0x01, 0x00, 0x00 + }; switch (sdata->vif.type) { case NL80211_IFTYPE_STATION: @@ -4611,6 +4620,10 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) (ieee80211_is_auth(hdr->frame_control) && ether_addr_equal(sdata->vif.addr, hdr->addr1)); case NL80211_IFTYPE_NAN: + if (ieee80211_has_tods(hdr->frame_control) || + ieee80211_has_fromds(hdr->frame_control)) + return false; + /* Accept only frames that are addressed to the NAN cluster * (based on the Cluster ID). From these frames, accept only * action frames or authentication frames that are addressed to @@ -4621,6 +4634,36 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx) (ieee80211_is_public_action(hdr, skb->len) || (ieee80211_is_auth(hdr->frame_control) && ether_addr_equal(sdata->vif.addr, hdr->addr1))); + case NL80211_IFTYPE_NAN_DATA: + if (ieee80211_has_tods(hdr->frame_control) || + ieee80211_has_fromds(hdr->frame_control)) + return false; + + if (ieee80211_is_data(hdr->frame_control)) { + struct ieee80211_sub_if_data *nmi; + + nmi = rcu_dereference(sdata->u.nan_data.nmi); + if (!nmi) + return false; + + if (!ether_addr_equal(nmi->wdev.u.nan.cluster_id, + hdr->addr3)) + return false; + + return multicast || + ether_addr_equal(sdata->vif.addr, hdr->addr1); + } + + /* Non-public action frames (unicast or multicast) */ + if (ieee80211_is_action(hdr->frame_control) && + !ieee80211_is_public_action(hdr, skb->len) && + (ether_addr_equal(nan_network_id, hdr->addr1) || + ether_addr_equal(sdata->vif.addr, hdr->addr1))) + return true; + + /* Unicast secure management frames */ + return ether_addr_equal(sdata->vif.addr, hdr->addr1) && + ieee80211_is_unicast_robust_mgmt_frame(skb); default: break; } @@ -5127,6 +5170,11 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, hdr = (struct ieee80211_hdr *)rx->skb->data; } + /* Store a copy of the pre-translated link addresses for SW crypto */ + if (unlikely(is_unicast_ether_addr(hdr->addr1) && + !ieee80211_is_data(hdr->frame_control))) + memcpy(rx->link_addrs, &hdr->addrs, 3 * ETH_ALEN); + if (unlikely(rx->sta && rx->sta->sta.mlo) && is_unicast_ether_addr(hdr->addr1) && !ieee80211_is_probe_resp(hdr->frame_control) && diff --git a/net/mac80211/s1g.c b/net/mac80211/s1g.c index 1f68df6e8067..5af4a0c6c642 100644 --- a/net/mac80211/s1g.c +++ b/net/mac80211/s1g.c @@ -2,7 +2,7 @@ /* * S1G handling * Copyright(c) 2020 Adapt-IP - * Copyright (C) 2023 Intel Corporation + * Copyright (C) 2023, 2026 Intel Corporation */ #include <linux/ieee80211.h> #include <net/mac80211.h> @@ -27,14 +27,14 @@ bool ieee80211_s1g_is_twt_setup(struct sk_buff *skb) if (likely(mgmt->u.action.category != WLAN_CATEGORY_S1G)) return false; - return mgmt->u.action.u.s1g.action_code == WLAN_S1G_TWT_SETUP; + return mgmt->u.action.action_code == WLAN_S1G_TWT_SETUP; } static void ieee80211_s1g_send_twt_setup(struct ieee80211_sub_if_data *sdata, const u8 *da, const u8 *bssid, struct ieee80211_twt_setup *twt) { - int len = IEEE80211_MIN_ACTION_SIZE + 4 + twt->length; + int len = IEEE80211_MIN_ACTION_SIZE(s1g) + 3 + twt->length; struct ieee80211_local *local = sdata->local; struct ieee80211_mgmt *mgmt; struct sk_buff *skb; @@ -52,8 +52,8 @@ ieee80211_s1g_send_twt_setup(struct ieee80211_sub_if_data *sdata, const u8 *da, memcpy(mgmt->bssid, bssid, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_S1G; - mgmt->u.action.u.s1g.action_code = WLAN_S1G_TWT_SETUP; - memcpy(mgmt->u.action.u.s1g.variable, twt, 3 + twt->length); + mgmt->u.action.action_code = WLAN_S1G_TWT_SETUP; + memcpy(mgmt->u.action.s1g.variable, twt, 3 + twt->length); IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | IEEE80211_TX_INTFL_MLME_CONN_TX | @@ -71,12 +71,12 @@ ieee80211_s1g_send_twt_teardown(struct ieee80211_sub_if_data *sdata, u8 *id; skb = dev_alloc_skb(local->hw.extra_tx_headroom + - IEEE80211_MIN_ACTION_SIZE + 2); + IEEE80211_MIN_ACTION_SIZE(s1g) + 1); if (!skb) return; skb_reserve(skb, local->hw.extra_tx_headroom); - mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE + 2); + mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(s1g) + 1); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); memcpy(mgmt->da, da, ETH_ALEN); @@ -84,8 +84,8 @@ ieee80211_s1g_send_twt_teardown(struct ieee80211_sub_if_data *sdata, memcpy(mgmt->bssid, bssid, ETH_ALEN); mgmt->u.action.category = WLAN_CATEGORY_S1G; - mgmt->u.action.u.s1g.action_code = WLAN_S1G_TWT_TEARDOWN; - id = (u8 *)mgmt->u.action.u.s1g.variable; + mgmt->u.action.action_code = WLAN_S1G_TWT_TEARDOWN; + id = (u8 *)mgmt->u.action.s1g.variable; *id = flowid; IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT | @@ -98,7 +98,7 @@ ieee80211_s1g_rx_twt_setup(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (void *)skb->data; - struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.u.s1g.variable; + struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.s1g.variable; struct ieee80211_twt_params *twt_agrt = (void *)twt->params; twt_agrt->req_type &= cpu_to_le16(~IEEE80211_TWT_REQTYPE_REQUEST); @@ -128,7 +128,7 @@ ieee80211_s1g_rx_twt_teardown(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; drv_twt_teardown_request(sdata->local, sdata, &sta->sta, - mgmt->u.action.u.s1g.variable[0]); + mgmt->u.action.s1g.variable[0]); } static void @@ -136,7 +136,7 @@ ieee80211_s1g_tx_twt_setup_fail(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct sk_buff *skb) { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *)skb->data; - struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.u.s1g.variable; + struct ieee80211_twt_setup *twt = (void *)mgmt->u.action.s1g.variable; struct ieee80211_twt_params *twt_agrt = (void *)twt->params; u8 flowid = le16_get_bits(twt_agrt->req_type, IEEE80211_TWT_REQTYPE_FLOWID); @@ -160,7 +160,7 @@ void ieee80211_s1g_rx_twt_action(struct ieee80211_sub_if_data *sdata, if (!sta) return; - switch (mgmt->u.action.u.s1g.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_S1G_TWT_SETUP: ieee80211_s1g_rx_twt_setup(sdata, sta, skb); break; @@ -185,7 +185,7 @@ void ieee80211_s1g_status_twt_action(struct ieee80211_sub_if_data *sdata, if (!sta) return; - switch (mgmt->u.action.u.s1g.action_code) { + switch (mgmt->u.action.action_code) { case WLAN_S1G_TWT_SETUP: /* process failed twt setup frames */ ieee80211_s1g_tx_twt_setup_fail(sdata, sta, skb); @@ -220,3 +220,11 @@ void ieee80211_s1g_cap_to_sta_s1g_cap(struct ieee80211_sub_if_data *sdata, ieee80211_sta_recalc_aggregates(&link_sta->sta->sta); } + +bool ieee80211_s1g_use_ndp_ba(const struct ieee80211_sub_if_data *sdata, + const struct sta_info *sta) +{ + return sdata->vif.cfg.s1g && + ieee80211_hw_check(&sdata->local->hw, SUPPORTS_NDP_BLOCKACK) && + (sta && sta->sta.deflink.s1g_cap.s1g); +} diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 4823c8d45639..eeff230bd909 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -216,7 +216,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local, if (link_conf) { bss_meta.parent_tsf = - ieee80211_calculate_rx_timestamp(local, + ieee80211_calculate_rx_timestamp(&local->hw, rx_status, len + FCS_LEN, 24); diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index 7422888d3640..e2eaf8d8d7ff 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -9,7 +9,7 @@ * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2008, Intel Corporation * Copyright 2008, Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2018, 2020, 2022-2024 Intel Corporation + * Copyright (C) 2018, 2020, 2022-2024, 2026 Intel Corporation */ #include <linux/ieee80211.h> @@ -409,35 +409,30 @@ static void ieee80211_send_refuse_measurement_request(struct ieee80211_sub_if_da struct sk_buff *skb; struct ieee80211_mgmt *msr_report; - skb = dev_alloc_skb(sizeof(*msr_report) + local->hw.extra_tx_headroom + - sizeof(struct ieee80211_msrment_ie)); + skb = dev_alloc_skb(IEEE80211_MIN_ACTION_SIZE(measurement) + + local->hw.extra_tx_headroom); if (!skb) return; skb_reserve(skb, local->hw.extra_tx_headroom); - msr_report = skb_put_zero(skb, 24); + msr_report = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(measurement)); memcpy(msr_report->da, da, ETH_ALEN); memcpy(msr_report->sa, sdata->vif.addr, ETH_ALEN); memcpy(msr_report->bssid, bssid, ETH_ALEN); msr_report->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); - skb_put(skb, 1 + sizeof(msr_report->u.action.u.measurement)); msr_report->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT; - msr_report->u.action.u.measurement.action_code = - WLAN_ACTION_SPCT_MSR_RPRT; - msr_report->u.action.u.measurement.dialog_token = dialog_token; + msr_report->u.action.action_code = WLAN_ACTION_SPCT_MSR_RPRT; - msr_report->u.action.u.measurement.element_id = WLAN_EID_MEASURE_REPORT; - msr_report->u.action.u.measurement.length = + msr_report->u.action.measurement.dialog_token = dialog_token; + msr_report->u.action.measurement.element_id = WLAN_EID_MEASURE_REPORT; + msr_report->u.action.measurement.length = sizeof(struct ieee80211_msrment_ie); - - memset(&msr_report->u.action.u.measurement.msr_elem, 0, - sizeof(struct ieee80211_msrment_ie)); - msr_report->u.action.u.measurement.msr_elem.token = request_ie->token; - msr_report->u.action.u.measurement.msr_elem.mode |= + msr_report->u.action.measurement.msr_elem.token = request_ie->token; + msr_report->u.action.measurement.msr_elem.mode |= IEEE80211_SPCT_MSR_RPRT_MODE_REFUSED; - msr_report->u.action.u.measurement.msr_elem.type = request_ie->type; + msr_report->u.action.measurement.msr_elem.type = request_ie->type; ieee80211_tx_skb(sdata, skb); } @@ -454,7 +449,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata, * TODO: Answer basic measurement as unmeasured */ ieee80211_send_refuse_measurement_request(sdata, - &mgmt->u.action.u.measurement.msr_elem, + &mgmt->u.action.measurement.msr_elem, mgmt->sa, mgmt->bssid, - mgmt->u.action.u.measurement.dialog_token); + mgmt->u.action.measurement.dialog_token); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index dd51a578fbc5..4c31ef8817ce 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -795,6 +795,7 @@ struct sta_info *sta_info_alloc_with_link(struct ieee80211_sub_if_data *sdata, static int sta_info_insert_check(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; + struct ieee80211_sta *same_addr_sta; lockdep_assert_wiphy(sdata->local->hw.wiphy); @@ -810,13 +811,18 @@ static int sta_info_insert_check(struct sta_info *sta) !is_valid_ether_addr(sta->sta.addr))) return -EINVAL; + if (!ieee80211_hw_check(&sdata->local->hw, NEEDS_UNIQUE_STA_ADDR)) + return 0; + /* The RCU read lock is required by rhashtable due to * asynchronous resize/rehash. We also require the mutex * for correctness. */ rcu_read_lock(); - if (ieee80211_hw_check(&sdata->local->hw, NEEDS_UNIQUE_STA_ADDR) && - ieee80211_find_sta_by_ifaddr(&sdata->local->hw, sta->addr, NULL)) { + same_addr_sta = ieee80211_find_sta_by_ifaddr(&sdata->local->hw, + sta->addr, NULL); + /* For NAN, a peer can re-use */ + if (same_addr_sta && same_addr_sta != rcu_access_pointer(sta->sta.nmi)) { rcu_read_unlock(); return -ENOTUNIQ; } @@ -974,7 +980,7 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) } sinfo->generation = local->sta_generation; - cfg80211_new_sta(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); + cfg80211_new_sta(&sdata->wdev, sta->sta.addr, sinfo, GFP_KERNEL); kfree(sinfo); sta_dbg(sdata, "Inserted STA %pM\n", sta->sta.addr); @@ -1294,6 +1300,21 @@ static int __must_check __sta_info_destroy_part1(struct sta_info *sta) lockdep_assert_wiphy(local->hw.wiphy); + if (sdata->vif.type == NL80211_IFTYPE_NAN) { + struct sta_info *sta_iter, *tmp; + + /* Remove all NDI stations associated with this NMI STA */ + list_for_each_entry_safe(sta_iter, tmp, &local->sta_list, list) { + if (rcu_access_pointer(sta_iter->sta.nmi) != &sta->sta) + continue; + sta_info_destroy_addr(sta_iter->sdata, sta_iter->addr); + } + + /* Free and clear the local peer schedule */ + ieee80211_nan_free_peer_sched(sta->sta.nan_sched); + sta->sta.nan_sched = NULL; + } + /* * Before removing the station from the driver and * rate control, it might still start new aggregation @@ -1433,6 +1454,8 @@ static int _sta_info_move_state(struct sta_info *sta, } else if (sta->sta_state == IEEE80211_STA_AUTHORIZED) { ieee80211_vif_dec_num_mcast(sta->sdata); clear_bit(WLAN_STA_AUTHORIZED, &sta->_flags); + if (sta->sdata->vif.type == NL80211_IFTYPE_NAN_DATA) + ieee80211_nan_update_ndi_carrier(sta->sdata); /* * If we have encryption offload, flush (station) queues @@ -1461,6 +1484,8 @@ static int _sta_info_move_state(struct sta_info *sta, set_bit(WLAN_STA_AUTHORIZED, &sta->_flags); ieee80211_check_fast_xmit(sta); ieee80211_check_fast_rx(sta); + if (sta->sdata->vif.type == NL80211_IFTYPE_NAN_DATA) + ieee80211_nan_update_ndi_carrier(sta->sdata); } if (sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN || sta->sdata->vif.type == NL80211_IFTYPE_AP) @@ -1557,7 +1582,7 @@ static void __sta_info_destroy_part2(struct sta_info *sta, bool recalc) sta_dbg(sdata, "Removed STA %pM\n", sta->sta.addr); - cfg80211_del_sta_sinfo(sdata->dev, sta->sta.addr, sinfo, GFP_KERNEL); + cfg80211_del_sta_sinfo(&sdata->wdev, sta->sta.addr, sinfo, GFP_KERNEL); kfree(sinfo); ieee80211_sta_debugfs_remove(sta); diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 2875ef7d7946..3e5d003bd31f 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -171,6 +171,7 @@ struct sta_info; * @bar_pending: BAR needs to be re-sent * @amsdu: support A-MSDU within A-MDPU * @ssn: starting sequence number of the session + * @ndp: this session is using NDP Block ACKs * * This structure's lifetime is managed by RCU, assignments to * the array holding it must hold the aggregation mutex. @@ -199,6 +200,7 @@ struct tid_ampdu_tx { u16 failed_bar_ssn; bool bar_pending; bool amsdu; + bool ndp; u8 tid; }; @@ -503,14 +505,14 @@ struct ieee80211_fragment_cache { * @status_stats.ack_signal_filled: last ACK signal validity * @status_stats.avg_ack_signal: average ACK signal * @cur_max_bandwidth: maximum bandwidth to use for TX to the station, - * taken from HT/VHT capabilities or VHT operating mode notification + * taken from HT/VHT capabilities or VHT operating mode notification. + * Invalid for NAN since that is operating on multiple bands. * @rx_omi_bw_rx: RX OMI bandwidth restriction to apply for RX * @rx_omi_bw_tx: RX OMI bandwidth restriction to apply for TX * @rx_omi_bw_staging: RX OMI bandwidth restriction to apply later * during finalize * @debugfs_dir: debug filesystem directory dentry * @pub: public (driver visible) link STA data - * TODO Move other link params from sta_info as required for MLD operation */ struct link_sta_info { u8 addr[ETH_ALEN]; diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c index 1dca2fae05a5..44bba7c3e603 100644 --- a/net/mac80211/tdls.c +++ b/net/mac80211/tdls.c @@ -6,7 +6,7 @@ * Copyright 2014, Intel Corporation * Copyright 2014 Intel Mobile Communications GmbH * Copyright 2015 - 2016 Intel Deutschland GmbH - * Copyright (C) 2019, 2021-2025 Intel Corporation + * Copyright (C) 2019, 2021-2026 Intel Corporation */ #include <linux/ieee80211.h> @@ -879,28 +879,23 @@ ieee80211_prep_tdls_direct(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_mgmt *mgmt; - mgmt = skb_put_zero(skb, 24); + if (action_code != WLAN_PUB_ACTION_TDLS_DISCOVER_RES) + return -EINVAL; + + mgmt = skb_put_zero(skb, IEEE80211_MIN_ACTION_SIZE(tdls_discover_resp)); memcpy(mgmt->da, peer, ETH_ALEN); memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN); memcpy(mgmt->bssid, link->u.mgd.bssid, ETH_ALEN); mgmt->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); - switch (action_code) { - case WLAN_PUB_ACTION_TDLS_DISCOVER_RES: - skb_put(skb, 1 + sizeof(mgmt->u.action.u.tdls_discover_resp)); - mgmt->u.action.category = WLAN_CATEGORY_PUBLIC; - mgmt->u.action.u.tdls_discover_resp.action_code = - WLAN_PUB_ACTION_TDLS_DISCOVER_RES; - mgmt->u.action.u.tdls_discover_resp.dialog_token = - dialog_token; - mgmt->u.action.u.tdls_discover_resp.capability = - cpu_to_le16(ieee80211_get_tdls_sta_capab(link, - status_code)); - break; - default: - return -EINVAL; - } + mgmt->u.action.category = WLAN_CATEGORY_PUBLIC; + mgmt->u.action.action_code = WLAN_PUB_ACTION_TDLS_DISCOVER_RES; + + mgmt->u.action.tdls_discover_resp.dialog_token = dialog_token; + mgmt->u.action.tdls_discover_resp.capability = + cpu_to_le16(ieee80211_get_tdls_sta_capab(link, + status_code)); return 0; } diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index c04d4547e8f4..71cf88039bd4 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018 - 2024 Intel Corporation + * Copyright (C) 2018-2024, 2026 Intel Corporation */ #if !defined(__MAC80211_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) @@ -37,64 +37,46 @@ #define VIF_PR_FMT " vif:%s(%d%s)" #define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" -#define CHANDEF_ENTRY __field(u32, control_freq) \ - __field(u32, freq_offset) \ - __field(u32, chan_width) \ - __field(u32, center_freq1) \ - __field(u32, freq1_offset) \ - __field(u32, center_freq2) -#define CHANDEF_ASSIGN(c) \ - __entry->control_freq = (c) ? ((c)->chan ? (c)->chan->center_freq : 0) : 0; \ - __entry->freq_offset = (c) ? ((c)->chan ? (c)->chan->freq_offset : 0) : 0; \ - __entry->chan_width = (c) ? (c)->width : 0; \ - __entry->center_freq1 = (c) ? (c)->center_freq1 : 0; \ - __entry->freq1_offset = (c) ? (c)->freq1_offset : 0; \ - __entry->center_freq2 = (c) ? (c)->center_freq2 : 0; -#define CHANDEF_PR_FMT " chandef(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)" -#define CHANDEF_PR_ARG __entry->control_freq, __entry->freq_offset, __entry->chan_width, \ - __entry->center_freq1, __entry->freq1_offset, __entry->center_freq2 - -#define MIN_CHANDEF_ENTRY \ - __field(u32, min_control_freq) \ - __field(u32, min_freq_offset) \ - __field(u32, min_chan_width) \ - __field(u32, min_center_freq1) \ - __field(u32, min_freq1_offset) \ - __field(u32, min_center_freq2) - -#define MIN_CHANDEF_ASSIGN(c) \ - __entry->min_control_freq = (c)->chan ? (c)->chan->center_freq : 0; \ - __entry->min_freq_offset = (c)->chan ? (c)->chan->freq_offset : 0; \ - __entry->min_chan_width = (c)->width; \ - __entry->min_center_freq1 = (c)->center_freq1; \ - __entry->min_freq1_offset = (c)->freq1_offset; \ - __entry->min_center_freq2 = (c)->center_freq2; -#define MIN_CHANDEF_PR_FMT " mindef(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)" -#define MIN_CHANDEF_PR_ARG __entry->min_control_freq, __entry->min_freq_offset, \ - __entry->min_chan_width, \ - __entry->min_center_freq1, __entry->min_freq1_offset, \ - __entry->min_center_freq2 - -#define AP_CHANDEF_ENTRY \ - __field(u32, ap_control_freq) \ - __field(u32, ap_freq_offset) \ - __field(u32, ap_chan_width) \ - __field(u32, ap_center_freq1) \ - __field(u32, ap_freq1_offset) \ - __field(u32, ap_center_freq2) - -#define AP_CHANDEF_ASSIGN(c) \ - __entry->ap_control_freq = (c)->chan ? (c)->chan->center_freq : 0;\ - __entry->ap_freq_offset = (c)->chan ? (c)->chan->freq_offset : 0;\ - __entry->ap_chan_width = (c)->chan ? (c)->width : 0; \ - __entry->ap_center_freq1 = (c)->chan ? (c)->center_freq1 : 0; \ - __entry->ap_freq1_offset = (c)->chan ? (c)->freq1_offset : 0; \ - __entry->ap_center_freq2 = (c)->chan ? (c)->center_freq2 : 0; -#define AP_CHANDEF_PR_FMT " ap(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz)" -#define AP_CHANDEF_PR_ARG __entry->ap_control_freq, __entry->ap_freq_offset, \ - __entry->ap_chan_width, \ - __entry->ap_center_freq1, __entry->ap_freq1_offset, \ - __entry->ap_center_freq2 +#define __CHANDEF_ENTRY(n) \ + __field(u32, n##control_freq) \ + __field(u32, n##freq_offset) \ + __field(u32, n##chan_width) \ + __field(u32, n##center_freq1) \ + __field(u32, n##freq1_offset) \ + __field(u32, n##center_freq2) \ + __field(u16, n##punctured) +#define __CHANDEF_ASSIGN(n, c) \ + __entry->n##control_freq = (c) && (c)->chan ? \ + (c)->chan->center_freq : 0; \ + __entry->n##freq_offset = (c) && (c)->chan ? \ + (c)->chan->freq_offset : 0; \ + __entry->n##chan_width = (c) ? (c)->width : 0; \ + __entry->n##center_freq1 = (c) ? (c)->center_freq1 : 0; \ + __entry->n##freq1_offset = (c) ? (c)->freq1_offset : 0; \ + __entry->n##center_freq2 = (c) ? (c)->center_freq2 : 0; \ + __entry->n##punctured = (c) ? (c)->punctured : 0; +#define __CHANDEF_PR_FMT(n) \ + " " #n "(%d.%03d MHz,width:%d,center: %d.%03d/%d MHz, punct:0x%x)" +#define __CHANDEF_PR_ARG(n) \ + __entry->n##control_freq, __entry->n##freq_offset, \ + __entry->n##chan_width, __entry->n##center_freq1, \ + __entry->n##freq1_offset, __entry->n##center_freq2, \ + __entry->n##punctured + +#define CHANDEF_ENTRY __CHANDEF_ENTRY() +#define CHANDEF_ASSIGN(c) __CHANDEF_ASSIGN(, c) +#define CHANDEF_PR_FMT __CHANDEF_PR_FMT(chandef) +#define CHANDEF_PR_ARG __CHANDEF_PR_ARG() + +#define MIN_CHANDEF_ENTRY __CHANDEF_ENTRY(min) +#define MIN_CHANDEF_ASSIGN(c) __CHANDEF_ASSIGN(min, c) +#define MIN_CHANDEF_PR_FMT __CHANDEF_PR_FMT(mindef) +#define MIN_CHANDEF_PR_ARG __CHANDEF_PR_ARG(min) + +#define AP_CHANDEF_ENTRY __CHANDEF_ENTRY(ap) +#define AP_CHANDEF_ASSIGN(c) __CHANDEF_ASSIGN(ap, c) +#define AP_CHANDEF_PR_FMT __CHANDEF_PR_FMT(ap) +#define AP_CHANDEF_PR_ARG __CHANDEF_PR_ARG(ap) #define CHANCTX_ENTRY CHANDEF_ENTRY \ MIN_CHANDEF_ENTRY \ @@ -1796,9 +1778,8 @@ TRACE_EVENT(drv_switch_vif_chanctx, SWITCH_ENTRY_ASSIGN(vif.vif_type, vif->type); SWITCH_ENTRY_ASSIGN(vif.p2p, vif->p2p); SWITCH_ENTRY_ASSIGN(link_id, link_conf->link_id); - strncpy(local_vifs[i].vif.vif_name, - sdata->name, - sizeof(local_vifs[i].vif.vif_name)); + strscpy_pad(local_vifs[i].vif.vif_name, + sdata->name); SWITCH_ENTRY_ASSIGN(old_chandef.control_freq, old_ctx->def.chan->center_freq); SWITCH_ENTRY_ASSIGN(old_chandef.freq_offset, @@ -3385,6 +3366,37 @@ TRACE_EVENT(drv_set_eml_op_mode, ) ); +TRACE_EVENT(drv_nan_peer_sched_changed, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta), + + TP_ARGS(local, sdata, sta), + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + STA_ENTRY + __array(u8, map_ids, CFG80211_NAN_MAX_PEER_MAPS) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + STA_ASSIGN; + for (int i = 0; i < CFG80211_NAN_MAX_PEER_MAPS; i++) + __entry->map_ids[i] = sta->nan_sched ? + sta->nan_sched->maps[i].map_id : + 0xff; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT + " map_ids=[%u, %u]", + LOCAL_PR_ARG, VIF_PR_ARG, STA_PR_ARG, + __entry->map_ids[0], __entry->map_ids[1] + ) +); + #endif /* !__MAC80211_DRIVER_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index b7aedaab8483..b487d2330f25 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -287,10 +287,7 @@ ieee80211_tx_h_check_assoc(struct ieee80211_tx_data *tx) * active scan) are allowed, all other frames should not be * sent and we should not get here, but if we do * nonetheless, drop them to avoid sending them - * off-channel. See the link below and - * ieee80211_start_scan() for more. - * - * http://article.gmane.org/gmane.linux.kernel.wireless.general/30089 + * off-channel. See __ieee80211_start_scan() for more. */ return TX_DROP; @@ -1316,13 +1313,19 @@ static struct txq_info *ieee80211_get_txq(struct ieee80211_local *local, unlikely(!ieee80211_is_data_present(hdr->frame_control))) { if ((!ieee80211_is_mgmt(hdr->frame_control) || ieee80211_is_bufferable_mmpdu(skb) || - vif->type == NL80211_IFTYPE_STATION) && + vif->type == NL80211_IFTYPE_STATION || + vif->type == NL80211_IFTYPE_NAN || + vif->type == NL80211_IFTYPE_NAN_DATA) && sta && sta->uploaded) { /* * This will be NULL if the driver didn't set the * opt-in hardware flag. */ txq = sta->sta.txq[IEEE80211_NUM_TIDS]; + } else if ((!ieee80211_is_mgmt(hdr->frame_control) || + ieee80211_is_bufferable_mmpdu(skb)) && + !sta) { + txq = vif->txq_mgmt; } } else if (sta) { u8 tid = skb->priority & IEEE80211_QOS_CTL_TID_MASK; @@ -1515,9 +1518,15 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, txqi->txq.vif = &sdata->vif; if (!sta) { - sdata->vif.txq = &txqi->txq; - txqi->txq.tid = 0; - txqi->txq.ac = IEEE80211_AC_BE; + txqi->txq.tid = tid; + + if (tid == IEEE80211_NUM_TIDS) { + sdata->vif.txq_mgmt = &txqi->txq; + txqi->txq.ac = IEEE80211_AC_VO; + } else { + sdata->vif.txq = &txqi->txq; + txqi->txq.ac = IEEE80211_AC_BE; + } return; } @@ -2534,6 +2543,13 @@ int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata, if (!sta) return -ENOLINK; break; + case NL80211_IFTYPE_NAN_DATA: + if (is_multicast_ether_addr(skb->data)) { + *sta_out = ERR_PTR(-ENOENT); + return 0; + } + sta = sta_info_get(sdata, skb->data); + break; default: return -EINVAL; } @@ -2827,18 +2843,37 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata, memcpy(hdr.addr3, sdata->u.ibss.bssid, ETH_ALEN); hdrlen = 24; break; + case NL80211_IFTYPE_NAN_DATA: { + struct ieee80211_sub_if_data *nmi; + + /* DA SA Cluster ID */ + memcpy(hdr.addr1, skb->data, ETH_ALEN); + memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); + nmi = rcu_dereference(sdata->u.nan_data.nmi); + if (!nmi) { + ret = -ENOTCONN; + goto free; + } + memcpy(hdr.addr3, nmi->wdev.u.nan.cluster_id, ETH_ALEN); + hdrlen = 24; + break; + } default: ret = -EINVAL; goto free; } if (!chanctx_conf) { - if (!ieee80211_vif_is_mld(&sdata->vif)) { + if (sdata->vif.type == NL80211_IFTYPE_NAN_DATA) { + /* NAN operates on multiple bands */ + band = NUM_NL80211_BANDS; + } else if (!ieee80211_vif_is_mld(&sdata->vif)) { ret = -ENOTCONN; goto free; + } else { + /* MLD transmissions must not rely on the band */ + band = 0; } - /* MLD transmissions must not rely on the band */ - band = 0; } else { band = chanctx_conf->def.chan->band; } @@ -5317,6 +5352,38 @@ static int ieee80211_beacon_protect(struct sk_buff *skb, return 0; } +int ieee80211_encrypt_tx_skb(struct sk_buff *skb) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + struct ieee80211_sub_if_data *sdata; + struct sk_buff *check_skb; + struct ieee80211_tx_data tx; + ieee80211_tx_result res; + + if (!info->control.hw_key) + return 0; + + memset(&tx, 0, sizeof(tx)); + tx.key = container_of(info->control.hw_key, struct ieee80211_key, conf); + /* NULL it out now so we do full SW crypto */ + info->control.hw_key = NULL; + __skb_queue_head_init(&tx.skbs); + __skb_queue_tail(&tx.skbs, skb); + + sdata = IEEE80211_DEV_TO_SUB_IF(skb->dev); + tx.sdata = sdata; + tx.local = sdata->local; + res = ieee80211_tx_h_encrypt(&tx); + check_skb = __skb_dequeue(&tx.skbs); + /* we may crash after this, but it'd be a bug in crypto */ + WARN_ON(check_skb != skb); + if (WARN_ON_ONCE(res != TX_CONTINUE)) + return -EINVAL; + + return 0; +} +EXPORT_SYMBOL_GPL(ieee80211_encrypt_tx_skb); + static void ieee80211_beacon_get_finish(struct ieee80211_hw *hw, struct ieee80211_vif *vif, @@ -5839,21 +5906,28 @@ out: EXPORT_SYMBOL(ieee80211_proberesp_get); struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) + struct ieee80211_vif *vif, + unsigned int link_id) { struct sk_buff *skb = NULL; struct fils_discovery_data *tmpl = NULL; struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_link_data *link; if (sdata->vif.type != NL80211_IFTYPE_AP) return NULL; - rcu_read_lock(); - tmpl = rcu_dereference(sdata->deflink.u.ap.fils_discovery); - if (!tmpl) { - rcu_read_unlock(); + if (link_id >= IEEE80211_MLD_MAX_NUM_LINKS) + return NULL; + + guard(rcu)(); + link = rcu_dereference(sdata->link[link_id]); + if (!link) + return NULL; + + tmpl = rcu_dereference(link->u.ap.fils_discovery); + if (!tmpl) return NULL; - } skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom + tmpl->len); if (skb) { @@ -5861,28 +5935,34 @@ struct sk_buff *ieee80211_get_fils_discovery_tmpl(struct ieee80211_hw *hw, skb_put_data(skb, tmpl->data, tmpl->len); } - rcu_read_unlock(); return skb; } EXPORT_SYMBOL(ieee80211_get_fils_discovery_tmpl); struct sk_buff * ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, - struct ieee80211_vif *vif) + struct ieee80211_vif *vif, + unsigned int link_id) { struct sk_buff *skb = NULL; struct unsol_bcast_probe_resp_data *tmpl = NULL; struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_link_data *link; if (sdata->vif.type != NL80211_IFTYPE_AP) return NULL; - rcu_read_lock(); - tmpl = rcu_dereference(sdata->deflink.u.ap.unsol_bcast_probe_resp); - if (!tmpl) { - rcu_read_unlock(); + if (link_id >= IEEE80211_MLD_MAX_NUM_LINKS) + return NULL; + + guard(rcu)(); + link = rcu_dereference(sdata->link[link_id]); + if (!link) + return NULL; + + tmpl = rcu_dereference(link->u.ap.unsol_bcast_probe_resp); + if (!tmpl) return NULL; - } skb = dev_alloc_skb(sdata->local->hw.extra_tx_headroom + tmpl->len); if (skb) { @@ -5890,7 +5970,6 @@ ieee80211_get_unsol_bcast_probe_resp_tmpl(struct ieee80211_hw *hw, skb_put_data(skb, tmpl->data, tmpl->len); } - rcu_read_unlock(); return skb; } EXPORT_SYMBOL(ieee80211_get_unsol_bcast_probe_resp_tmpl); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index b2e6c8b98381..b093bc203c81 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -325,7 +325,7 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac) struct ieee80211_vif *vif = &sdata->vif; struct fq *fq = &local->fq; struct ps_data *ps = NULL; - struct txq_info *txqi; + struct txq_info *txqi = NULL; struct sta_info *sta; int i; @@ -344,37 +344,49 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac) for (i = 0; i < ARRAY_SIZE(sta->sta.txq); i++) { struct ieee80211_txq *txq = sta->sta.txq[i]; + struct txq_info *sta_txqi; if (!txq) continue; - txqi = to_txq_info(txq); + sta_txqi = to_txq_info(txq); if (ac != txq->ac) continue; if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY, - &txqi->flags)) + &sta_txqi->flags)) continue; spin_unlock(&fq->lock); - drv_wake_tx_queue(local, txqi); + drv_wake_tx_queue(local, sta_txqi); spin_lock(&fq->lock); } } - if (!vif->txq) - goto out; + if (vif->txq) { + txqi = to_txq_info(vif->txq); - txqi = to_txq_info(vif->txq); + /* txq and txq_mgmt are mutually exclusive */ + WARN_ON_ONCE(vif->txq_mgmt); - if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) || - (ps && atomic_read(&ps->num_sta_ps)) || ac != vif->txq->ac) - goto out; + if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) || + (ps && atomic_read(&ps->num_sta_ps)) || + ac != vif->txq->ac) + txqi = NULL; + } else if (vif->txq_mgmt) { + txqi = to_txq_info(vif->txq_mgmt); + + if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) || + ac != vif->txq_mgmt->ac) + txqi = NULL; + } spin_unlock(&fq->lock); - drv_wake_tx_queue(local, txqi); + if (txqi) + drv_wake_tx_queue(local, txqi); + local_bh_enable(); return; out: @@ -1732,16 +1744,12 @@ static void ieee80211_reconfig_stations(struct ieee80211_sub_if_data *sdata) } } -static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) +static int +ieee80211_reconfig_nan_offload_de(struct ieee80211_sub_if_data *sdata) { struct cfg80211_nan_func *func, **funcs; int res, id, i = 0; - res = drv_start_nan(sdata->local, sdata, - &sdata->u.nan.conf); - if (WARN_ON(res)) - return res; - funcs = kzalloc_objs(*funcs, sdata->local->hw.max_nan_de_entries + 1); if (!funcs) return -ENOMEM; @@ -1750,12 +1758,12 @@ static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) * This is a little bit ugly. We need to call a potentially sleeping * callback for each NAN function, so we can't hold the spinlock. */ - spin_lock_bh(&sdata->u.nan.func_lock); + spin_lock_bh(&sdata->u.nan.de.func_lock); - idr_for_each_entry(&sdata->u.nan.function_inst_ids, func, id) + idr_for_each_entry(&sdata->u.nan.de.function_inst_ids, func, id) funcs[i++] = func; - spin_unlock_bh(&sdata->u.nan.func_lock); + spin_unlock_bh(&sdata->u.nan.de.func_lock); for (i = 0; funcs[i]; i++) { res = drv_add_nan_func(sdata->local, sdata, funcs[i]); @@ -1767,6 +1775,77 @@ static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) } kfree(funcs); + return res; +} + +static int ieee80211_reconfig_nan(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + struct ieee80211_sub_if_data *ndi_sdata; + struct sta_info *sta; + int res; + + res = drv_start_nan(local, sdata, &sdata->u.nan.conf); + if (WARN_ON(res)) + return res; + + if (!(sdata->local->hw.wiphy->nan_capa.flags & WIPHY_NAN_FLAGS_USERSPACE_DE)) + return ieee80211_reconfig_nan_offload_de(sdata); + + drv_vif_cfg_changed(sdata->local, sdata, BSS_CHANGED_NAN_LOCAL_SCHED); + + /* Now we can add all the NDIs to the driver */ + list_for_each_entry(ndi_sdata, &local->interfaces, list) { + if (ndi_sdata->vif.type == NL80211_IFTYPE_NAN_DATA) { + res = drv_add_interface(local, ndi_sdata); + if (WARN_ON(res)) + return res; + } + } + + /* Add NMI stations (stations on the NAN interface) */ + list_for_each_entry(sta, &local->sta_list, list) { + enum ieee80211_sta_state state; + + if (!sta->uploaded || sta->sdata != sdata) + continue; + + for (state = IEEE80211_STA_NOTEXIST; state < sta->sta_state; + state++) { + res = drv_sta_state(local, sdata, sta, state, + state + 1); + if (WARN_ON(res)) + return res; + } + + /* Add peer schedules for NMI stations that have them */ + if (!sta->sta.nan_sched) + continue; + + res = drv_nan_peer_sched_changed(local, sdata, sta); + if (WARN_ON(res)) + return res; + } + + /* Add NDI stations (stations on NAN_DATA interfaces) */ + list_for_each_entry(sta, &local->sta_list, list) { + enum ieee80211_sta_state state; + + if (!sta->uploaded || + sta->sdata->vif.type != NL80211_IFTYPE_NAN_DATA) + continue; + + if (WARN_ON(!sta->sta.nmi)) + continue; + + for (state = IEEE80211_STA_NOTEXIST; state < sta->sta_state; + state++) { + res = drv_sta_state(local, sta->sdata, sta, state, + state + 1); + if (WARN_ON(res)) + return res; + } + } return 0; } @@ -1921,6 +2000,9 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (sdata->vif.type == NL80211_IFTYPE_MONITOR && !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) continue; + /* These vifs can't be added before NAN was started */ + if (sdata->vif.type == NL80211_IFTYPE_NAN_DATA) + continue; if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && ieee80211_sdata_running(sdata)) { res = drv_add_interface(local, sdata); @@ -1938,6 +2020,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) if (sdata->vif.type == NL80211_IFTYPE_MONITOR && !ieee80211_hw_check(&local->hw, NO_VIRTUAL_MONITOR)) continue; + if (sdata->vif.type == NL80211_IFTYPE_NAN_DATA) + continue; if (sdata->vif.type != NL80211_IFTYPE_AP_VLAN && ieee80211_sdata_running(sdata)) drv_remove_interface(local, sdata); @@ -2021,6 +2105,10 @@ int ieee80211_reconfig(struct ieee80211_local *local) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: break; + case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: + /* NAN stations are handled later */ + break; case NL80211_IFTYPE_ADHOC: if (sdata->vif.cfg.ibss_joined) WARN_ON(drv_join_ibss(local, sdata)); @@ -2118,6 +2206,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) return res; } break; + case NL80211_IFTYPE_NAN_DATA: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_P2P_DEVICE: @@ -3410,20 +3499,7 @@ u8 ieee80211_mcs_to_chains(const struct ieee80211_mcs_info *mcs) return 1; } -/** - * ieee80211_calculate_rx_timestamp - calculate timestamp in frame - * @local: mac80211 hw info struct - * @status: RX status - * @mpdu_len: total MPDU length (including FCS) - * @mpdu_offset: offset into MPDU to calculate timestamp at - * - * This function calculates the RX timestamp at the given MPDU offset, taking - * into account what the RX timestamp was. An offset of 0 will just normalize - * the timestamp to TSF at beginning of MPDU reception. - * - * Returns: the calculated timestamp - */ -u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, +u64 ieee80211_calculate_rx_timestamp(struct ieee80211_hw *hw, struct ieee80211_rx_status *status, unsigned int mpdu_len, unsigned int mpdu_offset) @@ -3542,7 +3618,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, case RX_ENC_LEGACY: { struct ieee80211_supported_band *sband; - sband = local->hw.wiphy->bands[status->band]; + sband = hw->wiphy->bands[status->band]; ri.legacy = sband->bitrates[status->rate_idx].bitrate; if (mactime_plcp_start) { @@ -3574,6 +3650,7 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local, return ts; } +EXPORT_SYMBOL_GPL(ieee80211_calculate_rx_timestamp); /* Cancel CAC for the interfaces under the specified @local. If @ctx is * also provided, only the interfaces using that ctx will be canceled. @@ -3766,12 +3843,11 @@ again: int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, struct cfg80211_csa_settings *csa_settings) { + int hdr_len = IEEE80211_MIN_ACTION_SIZE(chan_switch); struct sk_buff *skb; struct ieee80211_mgmt *mgmt; struct ieee80211_local *local = sdata->local; int freq; - int hdr_len = offsetofend(struct ieee80211_mgmt, - u.action.u.chan_switch); u8 *pos; if (sdata->vif.type != NL80211_IFTYPE_ADHOC && @@ -3800,7 +3876,7 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, memcpy(mgmt->bssid, ifibss->bssid, ETH_ALEN); } mgmt->u.action.category = WLAN_CATEGORY_SPECTRUM_MGMT; - mgmt->u.action.u.chan_switch.action_code = WLAN_ACTION_SPCT_CHL_SWITCH; + mgmt->u.action.action_code = WLAN_ACTION_SPCT_CHL_SWITCH; pos = skb_put(skb, 5); *pos++ = WLAN_EID_CHANNEL_SWITCH; /* EID */ *pos++ = 3; /* IE length */ diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index b099d79e8fbb..f3bb5a561a38 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -4,7 +4,7 @@ * * Portions of this file * Copyright(c) 2015 - 2016 Intel Deutschland GmbH - * Copyright (C) 2018 - 2024 Intel Corporation + * Copyright (C) 2018 - 2026 Intel Corporation */ #include <linux/ieee80211.h> @@ -115,6 +115,7 @@ void ieee80211_apply_vhtcap_overrides(struct ieee80211_sub_if_data *sdata, void ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, + const struct ieee80211_sta_vht_cap *own_vht_cap, const struct ieee80211_vht_cap *vht_cap_ie, const struct ieee80211_vht_cap *vht_cap_ie2, struct link_sta_info *link_sta) @@ -122,7 +123,6 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, struct ieee80211_sta_vht_cap *vht_cap = &link_sta->pub->vht_cap; struct ieee80211_sta_vht_cap own_cap; u32 cap_info, i; - bool have_80mhz; u32 mpdu_len; memset(vht_cap, 0, sizeof(*vht_cap)); @@ -130,22 +130,29 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, if (!link_sta->pub->ht_cap.ht_supported) return; - if (!vht_cap_ie || !sband->vht_cap.vht_supported) + if (!vht_cap_ie || !own_vht_cap->vht_supported) return; - /* Allow VHT if at least one channel on the sband supports 80 MHz */ - have_80mhz = false; - for (i = 0; i < sband->n_channels; i++) { - if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED | - IEEE80211_CHAN_NO_80MHZ)) - continue; + /* NDI station are using the capabilities from the NMI station */ + if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_NAN_DATA)) + return; - have_80mhz = true; - break; - } + if (sband) { + /* Allow VHT if at least one channel on the sband supports 80 MHz */ + bool have_80mhz = false; - if (!have_80mhz) - return; + for (i = 0; i < sband->n_channels; i++) { + if (sband->channels[i].flags & (IEEE80211_CHAN_DISABLED | + IEEE80211_CHAN_NO_80MHZ)) + continue; + + have_80mhz = true; + break; + } + + if (!have_80mhz) + return; + } /* * A VHT STA must support 40 MHz, but if we verify that here @@ -156,7 +163,7 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, vht_cap->vht_supported = true; - own_cap = sband->vht_cap; + own_cap = *own_vht_cap; /* * If user has specified capability overrides, take care * of that if the station we're setting up is the AP that @@ -317,7 +324,8 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, IEEE80211_STA_RX_BW_160; } - link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); + if (sdata->vif.type != NL80211_IFTYPE_NAN) + link_sta->pub->bandwidth = ieee80211_sta_cur_vht_bw(link_sta); /* * Work around the Cisco 9115 FW 17.3 bug by taking the min of @@ -370,6 +378,10 @@ __ieee80211_sta_cap_rx_bw(struct link_sta_info *link_sta, } else { struct ieee80211_bss_conf *link_conf; + if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_NAN_DATA || + sdata->vif.type == NL80211_IFTYPE_NAN)) + return IEEE80211_STA_RX_BW_20; + rcu_read_lock(); link_conf = rcu_dereference(sdata->vif.link_conf[link_id]); band = link_conf->chanreq.oper.chan->band; @@ -515,6 +527,11 @@ _ieee80211_sta_cur_vht_bw(struct link_sta_info *link_sta, } else { struct ieee80211_bss_conf *link_conf; + /* NAN operates on multiple channels so a chandef must be given */ + if (WARN_ON_ONCE(sta->sdata->vif.type == NL80211_IFTYPE_NAN || + sta->sdata->vif.type == NL80211_IFTYPE_NAN_DATA)) + return IEEE80211_STA_RX_BW_20; + rcu_read_lock(); link_conf = rcu_dereference(sta->sdata->vif.link_conf[link_sta->link_id]); if (WARN_ON_ONCE(!link_conf)) { @@ -723,17 +740,17 @@ void ieee80211_process_mu_groups(struct ieee80211_sub_if_data *sdata, if (!link_conf->mu_mimo_owner) return; - if (!memcmp(mgmt->u.action.u.vht_group_notif.position, + if (!memcmp(mgmt->u.action.vht_group_notif.position, link_conf->mu_group.position, WLAN_USER_POSITION_LEN) && - !memcmp(mgmt->u.action.u.vht_group_notif.membership, + !memcmp(mgmt->u.action.vht_group_notif.membership, link_conf->mu_group.membership, WLAN_MEMBERSHIP_LEN)) return; memcpy(link_conf->mu_group.membership, - mgmt->u.action.u.vht_group_notif.membership, + mgmt->u.action.vht_group_notif.membership, WLAN_MEMBERSHIP_LEN); memcpy(link_conf->mu_group.position, - mgmt->u.action.u.vht_group_notif.position, + mgmt->u.action.vht_group_notif.position, WLAN_USER_POSITION_LEN); ieee80211_link_info_change_notify(sdata, link, diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 59324b367bdd..be3a2e95303c 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -18,7 +18,6 @@ #include <crypto/utils.h> #include "ieee80211_i.h" -#include "michael.h" #include "tkip.h" #include "aes_ccm.h" #include "aes_cmac.h" @@ -315,7 +314,8 @@ ieee80211_crypto_tkip_decrypt(struct ieee80211_rx_data *rx) * Calculate AAD for CCMP/GCMP, returning qos_tid since we * need that in CCMP also for b_0. */ -static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad, bool spp_amsdu) +static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad, bool spp_amsdu, + bool aad_nonce_computed) { struct ieee80211_hdr *hdr = (void *)skb->data; __le16 mask_fc; @@ -358,7 +358,8 @@ static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad, bool spp_amsdu) * FC | A1 | A2 | A3 | SC | [A4] | [QC] */ put_unaligned_be16(len_a, &aad[0]); put_unaligned(mask_fc, (__le16 *)&aad[2]); - memcpy(&aad[4], &hdr->addrs, 3 * ETH_ALEN); + if (!aad_nonce_computed) + memcpy(&aad[4], &hdr->addrs, 3 * ETH_ALEN); /* Mask Seq#, leave Frag# */ aad[22] = *((u8 *) &hdr->seq_ctrl) & 0x0f; @@ -377,10 +378,10 @@ static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad, bool spp_amsdu) } static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad, - bool spp_amsdu) + bool spp_amsdu, bool aad_nonce_computed) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - u8 qos_tid = ccmp_gcmp_aad(skb, aad, spp_amsdu); + u8 qos_tid = ccmp_gcmp_aad(skb, aad, spp_amsdu, aad_nonce_computed); /* In CCM, the initial vectors (IV) used for CTR mode encryption and CBC * mode authentication are not allowed to collide, yet both are derived @@ -395,7 +396,8 @@ static void ccmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *b_0, u8 *aad, * Nonce Flags: Priority (b0..b3) | Management (b4) | Reserved (b5..b7) */ b_0[1] = qos_tid | (ieee80211_is_mgmt(hdr->frame_control) << 4); - memcpy(&b_0[2], hdr->addr2, ETH_ALEN); + if (!aad_nonce_computed) + memcpy(&b_0[2], hdr->addr2, ETH_ALEN); memcpy(&b_0[8], pn, IEEE80211_CCMP_PN_LEN); } @@ -488,7 +490,8 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, pos += IEEE80211_CCMP_HDR_LEN; ccmp_special_blocks(skb, pn, b_0, aad, - key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU); + key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU, + false); return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len, skb_put(skb, mic_len)); } @@ -566,9 +569,22 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, if (!(status->flag & RX_FLAG_DECRYPTED)) { u8 aad[2 * AES_BLOCK_SIZE]; u8 b_0[AES_BLOCK_SIZE]; + bool aad_nonce_computed = false; + + if (is_unicast_ether_addr(hdr->addr1) && + !ieee80211_is_data(hdr->frame_control)) { + /* AAD computation */ + memcpy(&aad[4], rx->link_addrs, 3 * ETH_ALEN); + /* Nonce computation */ + ether_addr_copy(&b_0[2], + &rx->link_addrs[ETH_ALEN]); + aad_nonce_computed = true; + } + /* hardware didn't decrypt/verify MIC */ ccmp_special_blocks(skb, pn, b_0, aad, - key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU); + key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU, + aad_nonce_computed); if (ieee80211_aes_ccm_decrypt( key->u.ccmp.tfm, b_0, aad, @@ -593,14 +609,15 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, } static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad, - bool spp_amsdu) + bool spp_amsdu, bool aad_nonce_computed) { struct ieee80211_hdr *hdr = (void *)skb->data; - memcpy(j_0, hdr->addr2, ETH_ALEN); + if (!aad_nonce_computed) + memcpy(j_0, hdr->addr2, ETH_ALEN); memcpy(&j_0[ETH_ALEN], pn, IEEE80211_GCMP_PN_LEN); - ccmp_gcmp_aad(skb, aad, spp_amsdu); + ccmp_gcmp_aad(skb, aad, spp_amsdu, aad_nonce_computed); } static inline void gcmp_pn2hdr(u8 *hdr, const u8 *pn, int key_id) @@ -690,7 +707,8 @@ static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb) pos += IEEE80211_GCMP_HDR_LEN; gcmp_special_blocks(skb, pn, j_0, aad, - key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU); + key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU, + false); return ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len, skb_put(skb, IEEE80211_GCMP_MIC_LEN)); } @@ -763,9 +781,21 @@ ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx) if (!(status->flag & RX_FLAG_DECRYPTED)) { u8 aad[2 * AES_BLOCK_SIZE]; u8 j_0[AES_BLOCK_SIZE]; + bool aad_nonce_computed = false; + + if (is_unicast_ether_addr(hdr->addr1) && + !ieee80211_is_data(hdr->frame_control)) { + /* AAD computation */ + memcpy(&aad[4], rx->link_addrs, 3 * ETH_ALEN); + /* Nonce computation */ + ether_addr_copy(&j_0[0], + &rx->link_addrs[ETH_ALEN]); + aad_nonce_computed = true; + } /* hardware didn't decrypt/verify MIC */ gcmp_special_blocks(skb, pn, j_0, aad, - key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU); + key->conf.flags & IEEE80211_KEY_FLAG_SPP_AMSDU, + aad_nonce_computed); if (ieee80211_aes_gcm_decrypt( key->u.gcmp.tfm, j_0, aad, diff --git a/net/mctp/route.c b/net/mctp/route.c index 59ad60b88563..26fb8c6bbad2 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -880,9 +880,25 @@ static bool mctp_rt_compare_exact(struct mctp_route *rt1, rt1->max == rt2->max; } +static mctp_eid_t mctp_dev_saddr(struct mctp_dev *dev) +{ + mctp_eid_t addr = MCTP_ADDR_NULL; + unsigned long flags; + + spin_lock_irqsave(&dev->addrs_lock, flags); + if (dev->num_addrs) { + /* use the outbound interface's first address as our source */ + addr = dev->addrs[0]; + } + spin_unlock_irqrestore(&dev->addrs_lock, flags); + + return addr; +} + /* must only be called on a direct route, as the final output hop */ static void mctp_dst_from_route(struct mctp_dst *dst, mctp_eid_t eid, - unsigned int mtu, struct mctp_route *route) + mctp_eid_t saddr, unsigned int mtu, + struct mctp_route *route) { mctp_dev_hold(route->dev); dst->nexthop = eid; @@ -892,6 +908,7 @@ static void mctp_dst_from_route(struct mctp_dst *dst, mctp_eid_t eid, dst->mtu = min(dst->mtu, mtu); dst->halen = 0; dst->output = route->output; + dst->saddr = saddr; } int mctp_dst_from_extaddr(struct mctp_dst *dst, struct net *net, int ifindex, @@ -924,6 +941,7 @@ int mctp_dst_from_extaddr(struct mctp_dst *dst, struct net *net, int ifindex, dst->halen = halen; dst->output = mctp_dst_output; dst->nexthop = 0; + dst->saddr = mctp_dev_saddr(dev); memcpy(dst->haddr, haddr, halen); rc = 0; @@ -978,8 +996,14 @@ int mctp_route_lookup(struct net *net, unsigned int dnet, mtu = mtu ?: rt->mtu; if (rt->dst_type == MCTP_ROUTE_DIRECT) { + mctp_eid_t saddr = mctp_dev_saddr(rt->dev); + + /* cannot do gateway-ed routes without a src */ + if (saddr == MCTP_ADDR_NULL && depth != 0) + break; + if (dst) - mctp_dst_from_route(dst, daddr, mtu, rt); + mctp_dst_from_route(dst, daddr, saddr, mtu, rt); rc = 0; break; @@ -993,29 +1017,22 @@ int mctp_route_lookup(struct net *net, unsigned int dnet, return rc; } -static int mctp_route_lookup_null(struct net *net, struct net_device *dev, - struct mctp_dst *dst) +static int mctp_dst_input_null(struct net *net, struct net_device *dev, + struct mctp_dst *dst) { - int rc = -EHOSTUNREACH; - struct mctp_route *rt; - rcu_read_lock(); + dst->dev = __mctp_dev_get(dev); + rcu_read_unlock(); - list_for_each_entry_rcu(rt, &net->mctp.routes, list) { - if (rt->dst_type != MCTP_ROUTE_DIRECT || rt->type != RTN_LOCAL) - continue; - - if (rt->dev->dev != dev) - continue; - - mctp_dst_from_route(dst, 0, 0, rt); - rc = 0; - break; - } + if (!dst->dev) + return -EHOSTUNREACH; - rcu_read_unlock(); + dst->mtu = READ_ONCE(dev->mtu); + dst->halen = 0; + dst->output = mctp_dst_input; + dst->nexthop = 0; - return rc; + return 0; } static int mctp_do_fragment_route(struct mctp_dst *dst, struct sk_buff *skb, @@ -1037,6 +1054,13 @@ static int mctp_do_fragment_route(struct mctp_dst *dst, struct sk_buff *skb, return -EMSGSIZE; } + /* within MTU? avoid the copy, send original skb */ + if (skb->len <= mtu) { + hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | + MCTP_HDR_FLAG_EOM | tag; + return dst->output(dst, skb); + } + /* keep same headroom as the original skb */ headroom = skb_headroom(skb); @@ -1109,43 +1133,25 @@ int mctp_local_output(struct sock *sk, struct mctp_dst *dst, struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); struct mctp_sk_key *key; struct mctp_hdr *hdr; - unsigned long flags; unsigned int netid; - unsigned int mtu; - mctp_eid_t saddr; - int rc; u8 tag; KUNIT_STATIC_STUB_REDIRECT(mctp_local_output, sk, dst, skb, daddr, req_tag); - rc = -ENODEV; - - spin_lock_irqsave(&dst->dev->addrs_lock, flags); - if (dst->dev->num_addrs == 0) { - rc = -EHOSTUNREACH; - } else { - /* use the outbound interface's first address as our source */ - saddr = dst->dev->addrs[0]; - rc = 0; - } - spin_unlock_irqrestore(&dst->dev->addrs_lock, flags); netid = READ_ONCE(dst->dev->net); - if (rc) - goto out_release; - if (req_tag & MCTP_TAG_OWNER) { if (req_tag & MCTP_TAG_PREALLOC) key = mctp_lookup_prealloc_tag(msk, netid, daddr, req_tag, &tag); else - key = mctp_alloc_local_tag(msk, netid, saddr, daddr, - false, &tag); + key = mctp_alloc_local_tag(msk, netid, dst->saddr, + daddr, false, &tag); if (IS_ERR(key)) { - rc = PTR_ERR(key); - goto out_release; + kfree_skb(skb); + return PTR_ERR(key); } mctp_skb_set_flow(skb, key); /* done with the key in this scope */ @@ -1168,24 +1174,10 @@ int mctp_local_output(struct sock *sk, struct mctp_dst *dst, hdr = mctp_hdr(skb); hdr->ver = 1; hdr->dest = daddr; - hdr->src = saddr; - - mtu = dst->mtu; - - if (skb->len + sizeof(struct mctp_hdr) <= mtu) { - hdr->flags_seq_tag = MCTP_HDR_FLAG_SOM | - MCTP_HDR_FLAG_EOM | tag; - rc = dst->output(dst, skb); - } else { - rc = mctp_do_fragment_route(dst, skb, mtu, tag); - } + hdr->src = dst->saddr; /* route output functions consume the skb, even on error */ - skb = NULL; - -out_release: - kfree_skb(skb); - return rc; + return mctp_do_fragment_route(dst, skb, dst->mtu, tag); } /* route management */ @@ -1370,7 +1362,7 @@ static int mctp_pkttype_receive(struct sk_buff *skb, struct net_device *dev, /* NULL EID, but addressed to our physical address */ if (rc && mh->dest == MCTP_ADDR_NULL && skb->pkt_type == PACKET_HOST) - rc = mctp_route_lookup_null(net, dev, &dst); + rc = mctp_dst_input_null(net, dev, &dst); if (rc) goto err_drop; diff --git a/net/mctp/test/route-test.c b/net/mctp/test/route-test.c index 75ea96c10e49..e1033643fab0 100644 --- a/net/mctp/test/route-test.c +++ b/net/mctp/test/route-test.c @@ -63,6 +63,10 @@ static void mctp_test_fragment(struct kunit *test) if (!skb2) break; + /* avoid copying single-skb messages */ + if (first && last) + KUNIT_EXPECT_PTR_EQ(test, skb, skb2); + hdr2 = mctp_hdr(skb2); tag_mask = MCTP_HDR_TAG_MASK | MCTP_HDR_FLAG_TO; @@ -170,7 +174,9 @@ static void mctp_rx_input_test_to_desc(const struct mctp_rx_input_test *t, KUNIT_ARRAY_PARAM(mctp_rx_input, mctp_rx_input_tests, mctp_rx_input_test_to_desc); -/* set up a local dev, route on EID 8, and a socket listening on type 0 */ +/* set up a local dev (with addr 8), route on EID 8, and a socket listening on + * type 0 + */ static void __mctp_route_test_init(struct kunit *test, struct mctp_test_dev **devp, struct mctp_dst *dst, @@ -187,6 +193,10 @@ static void __mctp_route_test_init(struct kunit *test, if (netid != MCTP_NET_ANY) WRITE_ONCE(dev->mdev->net, netid); + dev->mdev->addrs = kmalloc_objs(u8, 1, GFP_KERNEL); + dev->mdev->num_addrs = 1; + dev->mdev->addrs[0] = 8; + mctp_test_dst_setup(test, dst, dev, 68); rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); @@ -904,6 +914,48 @@ static void mctp_test_route_input_cloned_frag(struct kunit *test) __mctp_route_test_fini(test, dev, &dst, sock); } +/* check we can receive an incoming packet with the null EID as daddr, when + * no RTN_LOCAL routes are present. + */ +static void mctp_test_route_input_null_eid(struct kunit *test) +{ + struct mctp_hdr hdr = RX_HDR(1, 10, 0, FL_S | FL_E | FL_TO); + struct sk_buff *skb_pkt, *skb_sk; + struct mctp_test_dev *dev; + struct sockaddr_mctp addr; + struct socket *sock; + u8 type = 0; + int rc; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); + KUNIT_ASSERT_EQ(test, rc, 0); + + addr.smctp_family = AF_MCTP; + addr.smctp_network = MCTP_NET_ANY; + addr.smctp_addr.s_addr = MCTP_ADDR_ANY; + addr.smctp_type = type; + rc = kernel_bind(sock, (struct sockaddr_unsized *)&addr, sizeof(addr)); + KUNIT_ASSERT_EQ(test, rc, 0); + + skb_pkt = mctp_test_create_skb_data(&hdr, &type); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb_pkt); + + skb_pkt->dev = dev->ndev; + skb_pkt->pkt_type = PACKET_HOST; + + mctp_pkttype_receive(skb_pkt, dev->ndev, &mctp_packet_type, NULL); + + skb_sk = skb_recv_datagram(sock->sk, MSG_DONTWAIT, &rc); + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, skb_sk); + + skb_free_datagram(sock->sk, skb_sk); + sock_release(sock); + mctp_test_destroy_dev(dev); +} + #if IS_ENABLED(CONFIG_MCTP_FLOWS) static void mctp_test_flow_init(struct kunit *test, @@ -924,11 +976,6 @@ static void mctp_test_flow_init(struct kunit *test, */ __mctp_route_test_init(test, &dev, dst, sock, MCTP_NET_ANY); - /* Assign a single EID. ->addrs is freed on mctp netdev release */ - dev->mdev->addrs = kmalloc(sizeof(u8), GFP_KERNEL); - dev->mdev->num_addrs = 1; - dev->mdev->addrs[0] = 8; - skb = alloc_skb(len + sizeof(struct mctp_hdr) + 1, GFP_KERNEL); KUNIT_ASSERT_TRUE(test, skb); __mctp_cb(skb); @@ -1054,8 +1101,6 @@ static void mctp_test_route_output_key_create(struct kunit *test) KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); WRITE_ONCE(dev->mdev->net, netid); - mctp_test_dst_setup(test, &dst, dev, 68); - rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); KUNIT_ASSERT_EQ(test, rc, 0); @@ -1063,6 +1108,8 @@ static void mctp_test_route_output_key_create(struct kunit *test) dev->mdev->num_addrs = 1; dev->mdev->addrs[0] = src_eid; + mctp_test_dst_setup(test, &dst, dev, 68); + skb = alloc_skb(sizeof(struct mctp_hdr) + 1 + len, GFP_KERNEL); KUNIT_ASSERT_TRUE(test, skb); __mctp_cb(skb); @@ -1161,7 +1208,7 @@ static void mctp_test_route_gw_lookup(struct kunit *test) struct mctp_test_dev *dev; int rc; - dev = mctp_test_create_dev(); + dev = mctp_test_create_dev_with_addr(8); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); /* 8 (local) -> 10 (gateway) via 9 (direct) */ @@ -1191,7 +1238,7 @@ static void mctp_test_route_gw_loop(struct kunit *test) struct mctp_test_dev *dev; int rc; - dev = mctp_test_create_dev(); + dev = mctp_test_create_dev_with_addr(8); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); /* two routes using each other as the gw */ @@ -1250,7 +1297,7 @@ static void mctp_test_route_gw_mtu(struct kunit *test) unsigned int netid; int rc; - dev = mctp_test_create_dev(); + dev = mctp_test_create_dev_with_addr(8); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); dev->ndev->mtu = mtus->dev; mdev = dev->mdev; @@ -1565,6 +1612,117 @@ cleanup: __mctp_route_test_fini(test, dev, &dst, sock_ty0); } +static void mctp_test_route_output_direct_no_eids(struct kunit *test) +{ + struct mctp_dst dst = { 0 }; + struct sk_buff *skb, *skb2; + struct mctp_test_route *rt; + struct mctp_test_dev *dev; + struct socket *sock; + const int len = 2; + int rc; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + rt = mctp_test_create_route_direct(&init_net, dev->mdev, 9, 68); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt); + + rc = mctp_route_lookup(&init_net, dev->mdev->net, 9, &dst); + KUNIT_ASSERT_EQ(test, rc, 0); + + rc = sock_create_kern(&init_net, AF_MCTP, SOCK_DGRAM, 0, &sock); + KUNIT_ASSERT_EQ(test, rc, 0); + + skb = alloc_skb(sizeof(struct mctp_hdr) + 1 + len, GFP_KERNEL); + KUNIT_ASSERT_TRUE(test, skb); + __mctp_cb(skb); + skb_reserve(skb, sizeof(struct mctp_hdr) + 1 + len); + memset(skb_put(skb, len), 0, len); + + rc = mctp_local_output(sock->sk, &dst, skb, 9, MCTP_TAG_OWNER); + KUNIT_ASSERT_EQ(test, rc, 0); + + KUNIT_ASSERT_EQ(test, dev->pkts.qlen, 1); + + skb2 = skb_dequeue(&dev->pkts); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb2); + + kfree_skb(skb2); + sock_release(sock); + mctp_dst_release(&dst); + mctp_test_route_destroy(test, rt); + mctp_test_destroy_dev(dev); +} + +static void mctp_test_route_output_gw_no_eids(struct kunit *test) +{ + struct mctp_test_route *rt1, *rt2; + struct mctp_test_dev *dev; + struct mctp_dst dst = { 0 }; + int rc; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + /* route: direct to bridge */ + rt1 = mctp_test_create_route_direct(&init_net, dev->mdev, 9, 68); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt1); + + /* route: bridge gw to final dest */ + rt2 = mctp_test_create_route_gw(&init_net, dev->mdev->net, 10, 9, 0); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, rt2); + + /* route lookup should fail, due to no source address on dev */ + rc = mctp_route_lookup(&init_net, dev->mdev->net, 10, &dst); + KUNIT_ASSERT_NE(test, rc, 0); + + mctp_test_route_destroy(test, rt1); + mctp_test_route_destroy(test, rt2); + mctp_test_destroy_dev(dev); +} + +static void mctp_test_route_output_extaddr_no_eids(struct kunit *test) +{ + struct mctp_dst dst = { 0 }; + struct sk_buff *skb, *skb2; + struct mctp_test_dev *dev; + struct socket *sock; + const int len = 1; + struct net *net; + int rc; + + dev = mctp_test_create_dev(); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, dev); + + net = dev_net(dev->ndev); + + rc = mctp_dst_from_extaddr(&dst, net, dev->ndev->ifindex, 0, NULL); + KUNIT_ASSERT_EQ(test, rc, 0); + + rc = sock_create_kern(net, AF_MCTP, SOCK_DGRAM, 0, &sock); + KUNIT_ASSERT_EQ(test, rc, 0); + + skb = alloc_skb(sizeof(struct mctp_hdr) + 1 + len, GFP_KERNEL); + KUNIT_ASSERT_TRUE(test, skb); + __mctp_cb(skb); + skb_reserve(skb, sizeof(struct mctp_hdr) + 1 + len); + memset(skb_put(skb, len), 0, len); + + rc = mctp_local_output(sock->sk, &dst, skb, 9, MCTP_TAG_OWNER); + KUNIT_ASSERT_EQ(test, rc, 0); + + KUNIT_ASSERT_EQ(test, dev->pkts.qlen, 1); + + skb2 = skb_dequeue(&dev->pkts); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, skb2); + + kfree_skb(skb2); + sock_release(sock); + mctp_dst_release(&dst); + mctp_test_destroy_dev(dev); +} + static struct kunit_case mctp_test_cases[] = { KUNIT_CASE_PARAM(mctp_test_fragment, mctp_frag_gen_params), KUNIT_CASE_PARAM(mctp_test_rx_input, mctp_rx_input_gen_params), @@ -1577,6 +1735,7 @@ static struct kunit_case mctp_test_cases[] = { KUNIT_CASE(mctp_test_route_input_sk_fail_frag), KUNIT_CASE(mctp_test_route_input_multiple_nets_bind), KUNIT_CASE(mctp_test_route_input_multiple_nets_key), + KUNIT_CASE(mctp_test_route_input_null_eid), KUNIT_CASE(mctp_test_packet_flow), KUNIT_CASE(mctp_test_fragment_flow), KUNIT_CASE(mctp_test_route_output_key_create), @@ -1587,6 +1746,9 @@ static struct kunit_case mctp_test_cases[] = { KUNIT_CASE_PARAM(mctp_test_route_gw_mtu, mctp_route_gw_mtu_gen_params), KUNIT_CASE(mctp_test_route_gw_output), KUNIT_CASE_PARAM(mctp_test_bind_lookup, mctp_bind_lookup_gen_params), + KUNIT_CASE(mctp_test_route_output_direct_no_eids), + KUNIT_CASE(mctp_test_route_output_gw_no_eids), + KUNIT_CASE(mctp_test_route_output_extaddr_no_eids), {} }; diff --git a/net/mctp/test/utils.c b/net/mctp/test/utils.c index 97afe8cd2b05..c3987d5ade7a 100644 --- a/net/mctp/test/utils.c +++ b/net/mctp/test/utils.c @@ -80,6 +80,26 @@ struct mctp_test_dev *mctp_test_create_dev(void) return __mctp_test_create_dev(0, NULL); } +struct mctp_test_dev *mctp_test_create_dev_with_addr(mctp_eid_t addr) +{ + struct mctp_test_dev *dev; + + dev = __mctp_test_create_dev(0, NULL); + if (!dev) + return NULL; + + dev->mdev->addrs = kmalloc_objs(u8, 1, GFP_KERNEL); + if (!dev->mdev->addrs) { + mctp_test_destroy_dev(dev); + return NULL; + } + + dev->mdev->num_addrs = 1; + dev->mdev->addrs[0] = addr; + + return dev; +} + struct mctp_test_dev *mctp_test_create_dev_lladdr(unsigned short lladdr_len, const unsigned char *lladdr) { @@ -171,6 +191,8 @@ struct mctp_test_route *mctp_test_create_route_gw(struct net *net, void mctp_test_dst_setup(struct kunit *test, struct mctp_dst *dst, struct mctp_test_dev *dev, unsigned int mtu) { + unsigned long flags; + KUNIT_EXPECT_NOT_ERR_OR_NULL(test, dev); memset(dst, 0, sizeof(*dst)); @@ -179,6 +201,11 @@ void mctp_test_dst_setup(struct kunit *test, struct mctp_dst *dst, __mctp_dev_get(dst->dev->dev); dst->mtu = mtu; dst->output = mctp_test_dst_output; + dst->saddr = MCTP_ADDR_NULL; + spin_lock_irqsave(&dev->mdev->addrs_lock, flags); + if (dev->mdev->num_addrs) + dst->saddr = dev->mdev->addrs[0]; + spin_unlock_irqrestore(&dev->mdev->addrs_lock, flags); } void mctp_test_route_destroy(struct kunit *test, struct mctp_test_route *rt) diff --git a/net/mctp/test/utils.h b/net/mctp/test/utils.h index 4cc90c9da4d1..47603732e6a5 100644 --- a/net/mctp/test/utils.h +++ b/net/mctp/test/utils.h @@ -42,6 +42,7 @@ struct mctp_test_bind_setup { }; struct mctp_test_dev *mctp_test_create_dev(void); +struct mctp_test_dev *mctp_test_create_dev_with_addr(mctp_eid_t eid); struct mctp_test_dev *mctp_test_create_dev_lladdr(unsigned short lladdr_len, const unsigned char *lladdr); void mctp_test_destroy_dev(struct mctp_test_dev *dev); diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 18d3da8ab384..26340a7306b5 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -24,7 +24,6 @@ #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> #endif -#include <net/ipv6_stubs.h> #include <net/rtnh.h> #include "internal.h" @@ -656,12 +655,9 @@ static struct net_device *inet6_fib_lookup_dev(struct net *net, struct dst_entry *dst; struct flowi6 fl6; - if (!ipv6_stub) - return ERR_PTR(-EAFNOSUPPORT); - memset(&fl6, 0, sizeof(fl6)); memcpy(&fl6.daddr, addr, sizeof(struct in6_addr)); - dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); + dst = ip6_dst_lookup_flow(net, NULL, &fl6, NULL); if (IS_ERR(dst)) return ERR_CAST(dst); diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 43df4293f58b..8a1c5698983c 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -1076,6 +1076,7 @@ static void rwin_update(struct mptcp_sock *msk, struct sock *ssk, * resync. */ tp->rcv_wnd += mptcp_rcv_wnd - subflow->rcv_wnd_sent; + tcp_update_max_rcv_wnd_seq(tp); subflow->rcv_wnd_sent = mptcp_rcv_wnd; } @@ -1338,8 +1339,9 @@ raise_win: */ rcv_wnd_new = rcv_wnd_old; win = rcv_wnd_old - ack_seq; - tp->rcv_wnd = min_t(u64, win, U32_MAX); - new_win = tp->rcv_wnd; + new_win = min_t(u64, win, U32_MAX); + tp->rcv_wnd = new_win; + tcp_update_max_rcv_wnd_seq(tp); /* Make sure we do not exceed the maximum possible * scaled window. diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 614c3f583ca0..fbffd3a43fe8 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -879,6 +879,32 @@ static bool move_skbs_to_msk(struct mptcp_sock *msk, struct sock *ssk) return moved; } +static void mptcp_rcv_rtt_update(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow) +{ + const struct tcp_sock *tp = tcp_sk(subflow->tcp_sock); + u32 rtt_us = tp->rcv_rtt_est.rtt_us; + int id; + + /* Update once per subflow per rcvwnd to avoid touching the msk + * too often. + */ + if (!rtt_us || tp->rcv_rtt_est.seq == subflow->prev_rtt_seq) + return; + + subflow->prev_rtt_seq = tp->rcv_rtt_est.seq; + + /* Pairs with READ_ONCE() in mptcp_rtt_us_est(). */ + id = msk->rcv_rtt_est.next_sample; + WRITE_ONCE(msk->rcv_rtt_est.samples[id], rtt_us); + if (++msk->rcv_rtt_est.next_sample == MPTCP_RTT_SAMPLES) + msk->rcv_rtt_est.next_sample = 0; + + /* EWMA among the incoming subflows */ + msk->scaling_ratio = ((msk->scaling_ratio << 3) - msk->scaling_ratio + + tp->scaling_ratio) >> 3; +} + void mptcp_data_ready(struct sock *sk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); @@ -892,6 +918,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) return; mptcp_data_lock(sk); + mptcp_rcv_rtt_update(msk, subflow); if (!sock_owned_by_user(sk)) { /* Wake-up the reader only for in-sequence data */ if (move_skbs_to_msk(msk, ssk) && mptcp_epollin_ready(sk)) @@ -1005,7 +1032,8 @@ static bool mptcp_frag_can_collapse_to(const struct mptcp_sock *msk, const struct page_frag *pfrag, const struct mptcp_data_frag *df) { - return df && pfrag->page == df->page && + return df && !df->eor && + pfrag->page == df->page && pfrag->size - pfrag->offset > 0 && pfrag->offset == (df->offset + df->data_len) && df->data_seq + df->data_len == msk->write_seq; @@ -1147,6 +1175,7 @@ mptcp_carve_data_frag(const struct mptcp_sock *msk, struct page_frag *pfrag, dfrag->offset = offset + sizeof(struct mptcp_data_frag); dfrag->already_sent = 0; dfrag->page = pfrag->page; + dfrag->eor = 0; return dfrag; } @@ -1408,6 +1437,13 @@ out: mptcp_update_infinite_map(msk, ssk, mpext); trace_mptcp_sendmsg_frag(mpext); mptcp_subflow_ctx(ssk)->rel_write_seq += copy; + + /* if this is the last chunk of a dfrag with MSG_EOR set, + * mark the skb to prevent coalescing with subsequent data. + */ + if (dfrag->eor && info->sent + copy >= dfrag->data_len) + TCP_SKB_CB(skb)->eor = 1; + return copy; } @@ -1868,7 +1904,8 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) long timeo; /* silently ignore everything else */ - msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | MSG_FASTOPEN; + msg->msg_flags &= MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL | + MSG_FASTOPEN | MSG_EOR; lock_sock(sk); @@ -1975,8 +2012,16 @@ wait_for_memory: goto do_error; } - if (copied) + if (copied) { + /* mark the last dfrag with EOR if MSG_EOR was set */ + if (msg->msg_flags & MSG_EOR) { + struct mptcp_data_frag *dfrag = mptcp_pending_tail(sk); + + if (dfrag) + dfrag->eor = 1; + } __mptcp_push_pending(sk, msg->msg_flags); + } out: release_sock(sk); @@ -2077,7 +2122,6 @@ static void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) msk->rcvspace_init = 1; msk->rcvq_space.copied = 0; - msk->rcvq_space.rtt_us = 0; /* initial rcv_space offering made to peer */ msk->rcvq_space.space = min_t(u32, tp->rcv_wnd, @@ -2088,15 +2132,15 @@ static void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk) /* receive buffer autotuning. See tcp_rcv_space_adjust for more information. * - * Only difference: Use highest rtt estimate of the subflows in use. + * Only difference: Use lowest rtt estimate of the subflows in use, see + * mptcp_rcv_rtt_update() and mptcp_rtt_us_est(). */ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) { struct mptcp_subflow_context *subflow; struct sock *sk = (struct sock *)msk; - u8 scaling_ratio = U8_MAX; - u32 time, advmss = 1; - u64 rtt_us, mstamp; + u32 time, rtt_us; + u64 mstamp; msk_owned_by_me(msk); @@ -2111,36 +2155,17 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) mstamp = mptcp_stamp(); time = tcp_stamp_us_delta(mstamp, READ_ONCE(msk->rcvq_space.time)); - rtt_us = msk->rcvq_space.rtt_us; - if (rtt_us && time < (rtt_us >> 3)) + rtt_us = mptcp_rtt_us_est(msk); + if (rtt_us == U32_MAX || time < (rtt_us >> 3)) return; - rtt_us = 0; - mptcp_for_each_subflow(msk, subflow) { - const struct tcp_sock *tp; - u64 sf_rtt_us; - u32 sf_advmss; - - tp = tcp_sk(mptcp_subflow_tcp_sock(subflow)); - - sf_rtt_us = READ_ONCE(tp->rcv_rtt_est.rtt_us); - sf_advmss = READ_ONCE(tp->advmss); - - rtt_us = max(sf_rtt_us, rtt_us); - advmss = max(sf_advmss, advmss); - scaling_ratio = min(tp->scaling_ratio, scaling_ratio); - } - - msk->rcvq_space.rtt_us = rtt_us; - msk->scaling_ratio = scaling_ratio; - if (time < (rtt_us >> 3) || rtt_us == 0) - return; - - if (msk->rcvq_space.copied <= msk->rcvq_space.space) + copied = msk->rcvq_space.copied; + copied -= mptcp_inq_hint(sk); + if (copied <= msk->rcvq_space.space) goto new_measure; trace_mptcp_rcvbuf_grow(sk, time); - if (mptcp_rcvbuf_grow(sk, msk->rcvq_space.copied)) { + if (mptcp_rcvbuf_grow(sk, copied)) { /* Make subflows follow along. If we do not do this, we * get drops at subflow level if skbs can't be moved to * the mptcp rx queue fast enough (announced rcv_win can @@ -2154,7 +2179,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied) slow = lock_sock_fast(ssk); /* subflows can be added before tcp_init_transfer() */ if (tcp_sk(ssk)->rcvq_space.space) - tcp_rcvbuf_grow(ssk, msk->rcvq_space.copied); + tcp_rcvbuf_grow(ssk, copied); unlock_sock_fast(ssk, slow); } } @@ -2262,7 +2287,7 @@ static unsigned int mptcp_inq_hint(const struct sock *sk) } static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct mptcp_sock *msk = mptcp_sk(sk); struct scm_timestamping_internal tss; @@ -2272,7 +2297,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, /* MSG_ERRQUEUE is really a no-op till we support IP_RECVERR */ if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); + return inet_recv_error(sk, msg, len); lock_sock(sk); if (unlikely(sk->sk_state == TCP_LISTEN)) { @@ -2315,11 +2340,8 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, break; if (copied) { - if (sk->sk_err || - sk->sk_state == TCP_CLOSE || - (sk->sk_shutdown & RCV_SHUTDOWN) || - !timeo || - signal_pending(current)) + if (tcp_recv_should_stop(sk) || + !timeo) break; } else { if (sk->sk_err) { @@ -3000,6 +3022,7 @@ static void __mptcp_init_sock(struct sock *sk) msk->timer_ival = TCP_RTO_MIN; msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; msk->backlog_len = 0; + mptcp_init_rtt_est(msk); WRITE_ONCE(msk->first, NULL); inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; @@ -3445,6 +3468,7 @@ static int mptcp_disconnect(struct sock *sk, int flags) msk->bytes_retrans = 0; msk->rcvspace_init = 0; msk->fastclosing = 0; + mptcp_init_rtt_est(msk); /* for fallback's sake */ WRITE_ONCE(msk->ack_seq, 0); @@ -4502,9 +4526,7 @@ static ssize_t mptcp_splice_read(struct socket *sock, loff_t *ppos, release_sock(sk); lock_sock(sk); - if (sk->sk_err || sk->sk_state == TCP_CLOSE || - (sk->sk_shutdown & RCV_SHUTDOWN) || - signal_pending(current)) + if (tcp_recv_should_stop(sk)) break; } @@ -4616,6 +4638,12 @@ void __init mptcp_proto_init(void) inet_register_protosw(&mptcp_protosw); BUILD_BUG_ON(sizeof(struct mptcp_skb_cb) > sizeof_field(struct sk_buff, cb)); + + /* struct mptcp_data_frag: 'overhead' corresponds to the alignment + * (ALIGN(1, sizeof(long)) - 1, so 8-1) + the struct's size + */ + BUILD_BUG_ON(ALIGN(1, sizeof(long)) - 1 + sizeof(struct mptcp_data_frag) + > U8_MAX); } #if IS_ENABLED(CONFIG_MPTCP_IPV6) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index ec15e503da8b..e4f5aba24da7 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -263,11 +263,19 @@ struct mptcp_data_frag { u64 data_seq; u16 data_len; u16 offset; - u16 overhead; + u8 overhead; + u8 eor; /* currently using 1 bit */ u16 already_sent; struct page *page; }; +/* Arbitrary compromise between as low as possible to react timely to subflow + * close event and as big as possible to avoid being fouled by biased large + * samples due to peer sending data on a different subflow WRT to the incoming + * ack. + */ +#define MPTCP_RTT_SAMPLES 5 + /* MPTCP connection sock */ struct mptcp_sock { /* inet_connection_sock must be the first member */ @@ -340,11 +348,17 @@ struct mptcp_sock { */ struct mptcp_pm_data pm; struct mptcp_sched_ops *sched; + + /* Most recent rtt_us observed by in use incoming subflows. */ + struct { + u32 samples[MPTCP_RTT_SAMPLES]; + u32 next_sample; + } rcv_rtt_est; + struct { int space; /* bytes copied in last measurement window */ int copied; /* bytes copied in this measurement window */ u64 time; /* start time of measurement window */ - u64 rtt_us; /* last maximum rtt of subflows */ } rcvq_space; u8 scaling_ratio; bool allow_subflows; @@ -422,6 +436,27 @@ static inline struct mptcp_data_frag *mptcp_send_head(const struct sock *sk) return msk->first_pending; } +static inline void mptcp_init_rtt_est(struct mptcp_sock *msk) +{ + int i; + + for (i = 0; i < MPTCP_RTT_SAMPLES; ++i) + msk->rcv_rtt_est.samples[i] = U32_MAX; + msk->rcv_rtt_est.next_sample = 0; + msk->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; +} + +static inline u32 mptcp_rtt_us_est(const struct mptcp_sock *msk) +{ + u32 rtt_us = READ_ONCE(msk->rcv_rtt_est.samples[0]); + int i; + + /* Lockless access of collected samples. */ + for (i = 1; i < MPTCP_RTT_SAMPLES; ++i) + rtt_us = min(rtt_us, READ_ONCE(msk->rcv_rtt_est.samples[i])); + return rtt_us; +} + static inline struct mptcp_data_frag *mptcp_send_next(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -523,6 +558,7 @@ struct mptcp_subflow_context { u32 map_data_len; __wsum map_data_csum; u32 map_csum_len; + u32 prev_rtt_seq; u32 request_mptcp : 1, /* send MP_CAPABLE */ request_join : 1, /* send MP_JOIN */ request_bkup : 1, diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4ff5863aa9fd..e2cb9d23e4a0 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -72,7 +72,7 @@ static void subflow_req_create_thmac(struct mptcp_subflow_request_sock *subflow_ struct mptcp_sock *msk = subflow_req->msk; u8 hmac[SHA256_DIGEST_SIZE]; - get_random_bytes(&subflow_req->local_nonce, sizeof(u32)); + subflow_req->local_nonce = get_random_u32(); subflow_generate_hmac(READ_ONCE(msk->local_key), READ_ONCE(msk->remote_key), @@ -1639,7 +1639,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, ssk = sf->sk; subflow = mptcp_subflow_ctx(ssk); do { - get_random_bytes(&subflow->local_nonce, sizeof(u32)); + subflow->local_nonce = get_random_u32(); } while (!subflow->local_nonce); /* if 'IPADDRANY', the ID will be set later, after the routing */ @@ -2200,7 +2200,6 @@ void __init mptcp_subflow_v6_init(void) subflow_v6m_specific = subflow_v6_specific; subflow_v6m_specific.queue_xmit = ipv4_specific.queue_xmit; - subflow_v6m_specific.send_check = ipv4_specific.send_check; subflow_v6m_specific.net_header_len = ipv4_specific.net_header_len; subflow_v6m_specific.mtu_reduced = ipv4_specific.mtu_reduced; subflow_v6m_specific.rebuild_header = subflow_rebuild_header; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 6cdc994fdc8a..682c675125fc 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -209,17 +209,6 @@ config NF_CT_PROTO_SCTP If unsure, say Y. -config NF_CT_PROTO_UDPLITE - bool 'UDP-Lite protocol connection tracking support' - depends on NETFILTER_ADVANCED - default y - help - With this option enabled, the layer 3 independent connection - tracking code will be able to do state tracking on UDP-Lite - connections. - - If unsure, say Y. - config NF_CONNTRACK_AMANDA tristate "Amanda backup protocol support" depends on NETFILTER_ADVANCED @@ -249,7 +238,6 @@ config NF_CONNTRACK_FTP config NF_CONNTRACK_H323 tristate "H.323 protocol support" - depends on IPV6 || IPV6=n depends on NETFILTER_ADVANCED help H.323 is a VoIP signalling protocol from ITU-T. As one of the most @@ -589,7 +577,6 @@ config NFT_QUOTA config NFT_REJECT default m if NETFILTER_ADVANCED=n tristate "Netfilter nf_tables reject support" - depends on !NF_TABLES_INET || (IPV6!=m || m) help This option adds the "reject" expression that you can use to explicitly deny and notify via TCP reset/ICMP informational errors @@ -636,7 +623,6 @@ config NFT_XFRM config NFT_SOCKET tristate "Netfilter nf_tables socket match support" - depends on IPV6 || IPV6=n select NF_SOCKET_IPV4 select NF_SOCKET_IPV6 if NF_TABLES_IPV6 help @@ -652,7 +638,6 @@ config NFT_OSF config NFT_TPROXY tristate "Netfilter nf_tables tproxy support" - depends on IPV6 || IPV6=n select NF_DEFRAG_IPV4 select NF_DEFRAG_IPV6 if NF_TABLES_IPV6 select NF_TPROXY_IPV4 @@ -1071,7 +1056,6 @@ config NETFILTER_XT_TARGET_MASQUERADE config NETFILTER_XT_TARGET_TEE tristate '"TEE" - packet cloning to alternate destination' depends on NETFILTER_ADVANCED - depends on IPV6 || IPV6=n depends on !NF_CONNTRACK || NF_CONNTRACK depends on IP6_NF_IPTABLES || !IP6_NF_IPTABLES select NF_DUP_IPV4 @@ -1084,7 +1068,6 @@ config NETFILTER_XT_TARGET_TPROXY tristate '"TPROXY" target transparent proxying support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED - depends on IPV6 || IPV6=n depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n depends on IP_NF_MANGLE || NFT_COMPAT select NF_DEFRAG_IPV4 @@ -1126,7 +1109,6 @@ config NETFILTER_XT_TARGET_SECMARK config NETFILTER_XT_TARGET_TCPMSS tristate '"TCPMSS" target support' - depends on IPV6 || IPV6=n default m if NETFILTER_ADVANCED=n help This option adds a `TCPMSS' target, which allows you to alter the @@ -1581,7 +1563,6 @@ config NETFILTER_XT_MATCH_SOCKET tristate '"socket" match support' depends on NETFILTER_XTABLES depends on NETFILTER_ADVANCED - depends on IPV6 || IPV6=n depends on IP6_NF_IPTABLES || IP6_NF_IPTABLES=n select NF_SOCKET_IPV4 select NF_SOCKET_IPV6 if IP6_NF_IPTABLES diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 11a702065bab..675a1034b340 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -1,10 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* netfilter.c: look after the filters for various protocols. * Heavily influenced by the old firewall.c by David Bonn and Alan Cox. * * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any * way. - * - * This code is GPL. */ #include <linux/kernel.h> #include <linux/netfilter.h> @@ -28,9 +27,6 @@ #include "nf_internals.h" -const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly; -EXPORT_SYMBOL_GPL(nf_ipv6_ops); - #ifdef CONFIG_JUMP_LABEL struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; EXPORT_SYMBOL(nf_hooks_needed); diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 2c625e0f49ec..752f59ef8744 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -11,6 +11,7 @@ #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/errno.h> +#include <linux/if_arp.h> #include <linux/if_ether.h> #include <linux/netlink.h> #include <linux/jiffies.h> @@ -220,8 +221,8 @@ bitmap_ipmac_kadt(struct ip_set *set, const struct sk_buff *skb, return -IPSET_ERR_BITMAP_RANGE; /* Backward compatibility: we don't check the second flag */ - if (skb_mac_header(skb) < skb->head || - (skb_mac_header(skb) + ETH_HLEN) > skb->data) + if (!skb->dev || skb->dev->type != ARPHRD_ETHER || + !skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN) return -EINVAL; e.id = ip_to_id(map, ip); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index d0c9fe59c67d..c5a26236a0bb 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -985,7 +985,7 @@ static const struct nla_policy ip_set_create_policy[IPSET_ATTR_CMD_MAX + 1] = { .len = IPSET_MAXNAMELEN - 1 }, [IPSET_ATTR_TYPENAME] = { .type = NLA_NUL_STRING, .len = IPSET_MAXNAMELEN - 1}, - [IPSET_ATTR_REVISION] = { .type = NLA_U8 }, + [IPSET_ATTR_REVISION] = NLA_POLICY_MAX(NLA_U8, IPSET_REVISION_MAX), [IPSET_ATTR_FAMILY] = { .type = NLA_U8 }, [IPSET_ATTR_DATA] = { .type = NLA_NESTED }, }; diff --git a/net/netfilter/ipset/ip_set_hash_ipmac.c b/net/netfilter/ipset/ip_set_hash_ipmac.c index 467c59a83c0a..b9a2681e2488 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmac.c +++ b/net/netfilter/ipset/ip_set_hash_ipmac.c @@ -11,6 +11,7 @@ #include <linux/skbuff.h> #include <linux/errno.h> #include <linux/random.h> +#include <linux/if_arp.h> #include <linux/if_ether.h> #include <net/ip.h> #include <net/ipv6.h> @@ -89,8 +90,8 @@ hash_ipmac4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_ipmac4_elem e = { .ip = 0, { .foo[0] = 0, .foo[1] = 0 } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - if (skb_mac_header(skb) < skb->head || - (skb_mac_header(skb) + ETH_HLEN) > skb->data) + if (!skb->dev || skb->dev->type != ARPHRD_ETHER || + !skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN) return -EINVAL; if (opt->flags & IPSET_DIM_TWO_SRC) @@ -205,8 +206,8 @@ hash_ipmac6_kadt(struct ip_set *set, const struct sk_buff *skb, }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - if (skb_mac_header(skb) < skb->head || - (skb_mac_header(skb) + ETH_HLEN) > skb->data) + if (!skb->dev || skb->dev->type != ARPHRD_ETHER || + !skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN) return -EINVAL; if (opt->flags & IPSET_DIM_TWO_SRC) diff --git a/net/netfilter/ipset/ip_set_hash_mac.c b/net/netfilter/ipset/ip_set_hash_mac.c index 718814730acf..41a122591fe2 100644 --- a/net/netfilter/ipset/ip_set_hash_mac.c +++ b/net/netfilter/ipset/ip_set_hash_mac.c @@ -8,6 +8,7 @@ #include <linux/etherdevice.h> #include <linux/skbuff.h> #include <linux/errno.h> +#include <linux/if_arp.h> #include <linux/if_ether.h> #include <net/netlink.h> @@ -77,8 +78,8 @@ hash_mac4_kadt(struct ip_set *set, const struct sk_buff *skb, struct hash_mac4_elem e = { { .foo[0] = 0, .foo[1] = 0 } }; struct ip_set_ext ext = IP_SET_INIT_KEXT(skb, opt, set); - if (skb_mac_header(skb) < skb->head || - (skb_mac_header(skb) + ETH_HLEN) > skb->data) + if (!skb->dev || skb->dev->type != ARPHRD_ETHER || + !skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN) return -EINVAL; if (opt->flags & IPSET_DIM_ONE_SRC) diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 005c1134d756..2082bfb2d93c 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -47,31 +47,12 @@ static int ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS; module_param_named(conn_tab_bits, ip_vs_conn_tab_bits, int, 0444); MODULE_PARM_DESC(conn_tab_bits, "Set connections' hash size"); -/* size and mask values */ +/* Max table size */ int ip_vs_conn_tab_size __read_mostly; -static int ip_vs_conn_tab_mask __read_mostly; - -/* - * Connection hash table: for input and output packets lookups of IPVS - */ -static struct hlist_head *ip_vs_conn_tab __read_mostly; /* SLAB cache for IPVS connections */ static struct kmem_cache *ip_vs_conn_cachep __read_mostly; -/* counter for no client port connections */ -static atomic_t ip_vs_conn_no_cport_cnt = ATOMIC_INIT(0); - -/* random value for IPVS connection hash */ -static unsigned int ip_vs_conn_rnd __read_mostly; - -/* - * Fine locking granularity for big connection hash table - */ -#define CT_LOCKARRAY_BITS 5 -#define CT_LOCKARRAY_SIZE (1<<CT_LOCKARRAY_BITS) -#define CT_LOCKARRAY_MASK (CT_LOCKARRAY_SIZE-1) - /* We need an addrstrlen that works with or without v6 */ #ifdef CONFIG_IP_VS_IPV6 #define IP_VS_ADDRSTRLEN INET6_ADDRSTRLEN @@ -79,23 +60,102 @@ static unsigned int ip_vs_conn_rnd __read_mostly; #define IP_VS_ADDRSTRLEN (8+1) #endif -struct ip_vs_aligned_lock +/* Connection hashing: + * - hash (add conn) and unhash (del conn) are safe for RCU readers walking + * the bucket, they will not jump to another bucket or hash table and to miss + * conns + * - rehash (fill cport) hashes the conn to new bucket or even new table, + * so we use seqcount to retry lookups on buckets where we delete + * conns (unhash) because after hashing their next ptr can point to another + * bucket or hash table + * - hash table resize works like rehash but always rehashes into new table + * - bit lock on bucket serializes all operations that modify the chain + * - cp->lock protects conn fields like cp->flags, cp->dest + */ + +/* Lock conn_tab bucket for conn hash/unhash, not for rehash */ +static __always_inline void +conn_tab_lock(struct ip_vs_rht *t, struct ip_vs_conn *cp, u32 hash_key, + u32 hash_key2, bool use2, bool new_hash, + struct hlist_bl_head **head_ret, struct hlist_bl_head **head2_ret) { - spinlock_t l; -} __attribute__((__aligned__(SMP_CACHE_BYTES))); + struct hlist_bl_head *head, *head2; + u32 hash_key_new, hash_key_new2; + struct ip_vs_rht *t2 = t; + u32 idx, idx2; + + idx = hash_key & t->mask; + if (use2) + idx2 = hash_key2 & t->mask; + else + idx2 = idx; + if (!new_hash) { + /* We need to lock the bucket in the right table */ + +retry: + if (!ip_vs_rht_same_table(t, hash_key)) { + /* It is already moved to new table */ + t = rcu_dereference(t->new_tbl); + /* Rehashing works in two steps and we may detect + * both nodes in different tables, use idx/idx2 + * for proper lock ordering for heads. + */ + idx = hash_key & t->mask; + idx |= IP_VS_RHT_TABLE_ID_MASK; + } + if (use2) { + if (!ip_vs_rht_same_table(t2, hash_key2)) { + /* It is already moved to new table */ + t2 = rcu_dereference(t2->new_tbl); + idx2 = hash_key2 & t2->mask; + idx2 |= IP_VS_RHT_TABLE_ID_MASK; + } + } else { + idx2 = idx; + } + } -/* lock array for conn table */ -static struct ip_vs_aligned_lock -__ip_vs_conntbl_lock_array[CT_LOCKARRAY_SIZE] __cacheline_aligned; + head = t->buckets + (hash_key & t->mask); + head2 = use2 ? t2->buckets + (hash_key2 & t2->mask) : head; -static inline void ct_write_lock_bh(unsigned int key) -{ - spin_lock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); + local_bh_disable(); + /* Do not touch seqcount, this is a safe operation */ + + if (idx <= idx2) { + hlist_bl_lock(head); + if (head != head2) + hlist_bl_lock(head2); + } else { + hlist_bl_lock(head2); + hlist_bl_lock(head); + } + if (!new_hash) { + /* Ensure hash_key is read under lock */ + hash_key_new = READ_ONCE(cp->hn0.hash_key); + hash_key_new2 = READ_ONCE(cp->hn1.hash_key); + /* Hash changed ? */ + if (hash_key != hash_key_new || + (hash_key2 != hash_key_new2 && use2)) { + if (head != head2) + hlist_bl_unlock(head2); + hlist_bl_unlock(head); + local_bh_enable(); + hash_key = hash_key_new; + hash_key2 = hash_key_new2; + goto retry; + } + } + *head_ret = head; + *head2_ret = head2; } -static inline void ct_write_unlock_bh(unsigned int key) +static inline void conn_tab_unlock(struct hlist_bl_head *head, + struct hlist_bl_head *head2) { - spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); + if (head != head2) + hlist_bl_unlock(head2); + hlist_bl_unlock(head); + local_bh_enable(); } static void ip_vs_conn_expire(struct timer_list *t); @@ -103,48 +163,70 @@ static void ip_vs_conn_expire(struct timer_list *t); /* * Returns hash value for IPVS connection entry */ -static unsigned int ip_vs_conn_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto, - const union nf_inet_addr *addr, - __be16 port) +static u32 ip_vs_conn_hashkey(struct ip_vs_rht *t, int af, unsigned int proto, + const union nf_inet_addr *addr, __be16 port, + const union nf_inet_addr *laddr, __be16 lport) { + u64 a = (u32)proto << 16 | (__force u32)port; + u64 d; + #ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) - return (jhash_3words(jhash(addr, 16, ip_vs_conn_rnd), - (__force u32)port, proto, ip_vs_conn_rnd) ^ - ((size_t)ipvs>>8)) & ip_vs_conn_tab_mask; + if (af == AF_INET6) { + u64 b = (u64)addr->all[0] << 32 | addr->all[1]; + u64 c = (u64)addr->all[2] << 32 | addr->all[3]; + + a |= (u64)laddr->all[2] << 32 ^ (__force u32)lport; + c ^= laddr->all[1]; + d = (u64)laddr->all[0] << 32 | laddr->all[3]; + return (u32)siphash_4u64(a, b, c, d, &t->hash_key); + } #endif - return (jhash_3words((__force u32)addr->ip, (__force u32)port, proto, - ip_vs_conn_rnd) ^ - ((size_t)ipvs>>8)) & ip_vs_conn_tab_mask; + a |= (u64)addr->all[0] << 32; + d = (u64)laddr->all[0] << 32 | (__force u32)lport; + return (u32)siphash_2u64(a, d, &t->hash_key); } static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, - bool inverse) + struct ip_vs_rht *t, bool inverse) { + const union nf_inet_addr *laddr; const union nf_inet_addr *addr; + __be16 lport; __be16 port; if (p->pe_data && p->pe->hashkey_raw) - return p->pe->hashkey_raw(p, ip_vs_conn_rnd, inverse) & - ip_vs_conn_tab_mask; + return p->pe->hashkey_raw(p, t, inverse); if (likely(!inverse)) { addr = p->caddr; port = p->cport; + laddr = p->vaddr; + lport = p->vport; } else { addr = p->vaddr; port = p->vport; + laddr = p->caddr; + lport = p->cport; } - return ip_vs_conn_hashkey(p->ipvs, p->af, p->protocol, addr, port); + return ip_vs_conn_hashkey(t, p->af, p->protocol, addr, port, laddr, + lport); } -static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) +static unsigned int ip_vs_conn_hashkey_conn(struct ip_vs_rht *t, + const struct ip_vs_conn *cp, + bool out) { struct ip_vs_conn_param p; - ip_vs_conn_fill_param(cp->ipvs, cp->af, cp->protocol, - &cp->caddr, cp->cport, NULL, 0, &p); + if (!out) + ip_vs_conn_fill_param(cp->ipvs, cp->af, cp->protocol, + &cp->caddr, cp->cport, &cp->vaddr, + cp->vport, &p); + else + ip_vs_conn_fill_param(cp->ipvs, cp->af, cp->protocol, + &cp->daddr, cp->dport, &cp->caddr, + cp->cport, &p); if (cp->pe) { p.pe = cp->pe; @@ -152,31 +234,51 @@ static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) p.pe_data_len = cp->pe_data_len; } - return ip_vs_conn_hashkey_param(&p, false); + return ip_vs_conn_hashkey_param(&p, t, out); } -/* - * Hashes ip_vs_conn in ip_vs_conn_tab by netns,proto,addr,port. +/* Hashes ip_vs_conn in conn_tab * returns bool success. */ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) { - unsigned int hash; + struct netns_ipvs *ipvs = cp->ipvs; + struct hlist_bl_head *head, *head2; + u32 hash_key, hash_key2; + struct ip_vs_rht *t; + u32 hash, hash2; + bool use2; int ret; if (cp->flags & IP_VS_CONN_F_ONE_PACKET) return 0; - /* Hash by protocol, client address and port */ - hash = ip_vs_conn_hashkey_conn(cp); + /* New entries go into recent table */ + t = rcu_dereference(ipvs->conn_tab); + t = rcu_dereference(t->new_tbl); - ct_write_lock_bh(hash); + hash = ip_vs_conn_hashkey_conn(t, cp, false); + hash_key = ip_vs_rht_build_hash_key(t, hash); + if (ip_vs_conn_use_hash2(cp)) { + hash2 = ip_vs_conn_hashkey_conn(t, cp, true); + hash_key2 = ip_vs_rht_build_hash_key(t, hash2); + use2 = true; + } else { + hash_key2 = hash_key; + use2 = false; + } + conn_tab_lock(t, cp, hash_key, hash_key2, use2, true /* new_hash */, + &head, &head2); spin_lock(&cp->lock); if (!(cp->flags & IP_VS_CONN_F_HASHED)) { cp->flags |= IP_VS_CONN_F_HASHED; + WRITE_ONCE(cp->hn0.hash_key, hash_key); + WRITE_ONCE(cp->hn1.hash_key, hash_key2); refcount_inc(&cp->refcnt); - hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]); + hlist_bl_add_head_rcu(&cp->hn0.node, head); + if (use2) + hlist_bl_add_head_rcu(&cp->hn1.node, head2); ret = 1; } else { pr_err("%s(): request for already hashed, called from %pS\n", @@ -185,75 +287,64 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) } spin_unlock(&cp->lock); - ct_write_unlock_bh(hash); - - return ret; -} - + conn_tab_unlock(head, head2); -/* - * UNhashes ip_vs_conn from ip_vs_conn_tab. - * returns bool success. Caller should hold conn reference. - */ -static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) -{ - unsigned int hash; - int ret; - - /* unhash it and decrease its reference counter */ - hash = ip_vs_conn_hashkey_conn(cp); - - ct_write_lock_bh(hash); - spin_lock(&cp->lock); - - if (cp->flags & IP_VS_CONN_F_HASHED) { - hlist_del_rcu(&cp->c_list); - cp->flags &= ~IP_VS_CONN_F_HASHED; - refcount_dec(&cp->refcnt); - ret = 1; - } else - ret = 0; - - spin_unlock(&cp->lock); - ct_write_unlock_bh(hash); + /* Schedule resizing if load increases */ + if (atomic_read(&ipvs->conn_count) > t->u_thresh && + !test_and_set_bit(IP_VS_WORK_CONN_RESIZE, &ipvs->work_flags)) + mod_delayed_work(system_unbound_wq, &ipvs->conn_resize_work, 0); return ret; } -/* Try to unlink ip_vs_conn from ip_vs_conn_tab. +/* Try to unlink ip_vs_conn from conn_tab. * returns bool success. */ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) { - unsigned int hash; + struct netns_ipvs *ipvs = cp->ipvs; + struct hlist_bl_head *head, *head2; + u32 hash_key, hash_key2; + struct ip_vs_rht *t; bool ret = false; + bool use2; if (cp->flags & IP_VS_CONN_F_ONE_PACKET) return refcount_dec_if_one(&cp->refcnt); - hash = ip_vs_conn_hashkey_conn(cp); + rcu_read_lock(); + + t = rcu_dereference(ipvs->conn_tab); + hash_key = READ_ONCE(cp->hn0.hash_key); + hash_key2 = READ_ONCE(cp->hn1.hash_key); + use2 = ip_vs_conn_use_hash2(cp); - ct_write_lock_bh(hash); + conn_tab_lock(t, cp, hash_key, hash_key2, use2, false /* new_hash */, + &head, &head2); spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { /* Decrease refcnt and unlink conn only if we are last user */ if (refcount_dec_if_one(&cp->refcnt)) { - hlist_del_rcu(&cp->c_list); + hlist_bl_del_rcu(&cp->hn0.node); + if (use2) + hlist_bl_del_rcu(&cp->hn1.node); cp->flags &= ~IP_VS_CONN_F_HASHED; ret = true; } } spin_unlock(&cp->lock); - ct_write_unlock_bh(hash); + conn_tab_unlock(head, head2); + + rcu_read_unlock(); return ret; } /* - * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. + * Gets ip_vs_conn associated with supplied parameters in the conn_tab. * Called for pkts coming from OUTside-to-INside. * p->caddr, p->cport: pkt source address (foreign host) * p->vaddr, p->vport: pkt dest address (load balancer) @@ -261,26 +352,42 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) static inline struct ip_vs_conn * __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) { - unsigned int hash; + DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); + struct netns_ipvs *ipvs = p->ipvs; + struct ip_vs_conn_hnode *hn; + struct hlist_bl_head *head; + struct ip_vs_rht *t, *pt; + struct hlist_bl_node *e; struct ip_vs_conn *cp; - - hash = ip_vs_conn_hashkey_param(p, false); + u32 hash, hash_key; rcu_read_lock(); - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (p->cport == cp->cport && p->vport == cp->vport && - cp->af == p->af && - ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && - ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && - ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && - p->protocol == cp->protocol && - cp->ipvs == p->ipvs) { - if (!__ip_vs_conn_get(cp)) - continue; - /* HIT */ - rcu_read_unlock(); - return cp; + ip_vs_rht_for_each_table_rcu(ipvs->conn_tab, t, pt) { + hash = ip_vs_conn_hashkey_param(p, t, false); + hash_key = ip_vs_rht_build_hash_key(t, hash); + ip_vs_rht_walk_bucket_rcu(t, hash_key, head) { + hlist_bl_for_each_entry_rcu(hn, e, head, node) { + if (READ_ONCE(hn->hash_key) != hash_key || + hn->dir != 0) + continue; + cp = ip_vs_hn0_to_conn(hn); + if (p->cport == cp->cport && + p->vport == cp->vport && cp->af == p->af && + ip_vs_addr_equal(p->af, p->caddr, + &cp->caddr) && + ip_vs_addr_equal(p->af, p->vaddr, + &cp->vaddr) && + (!p->cport ^ + (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && + p->protocol == cp->protocol) { + if (__ip_vs_conn_get(cp)) { + /* HIT */ + rcu_read_unlock(); + return cp; + } + } + } } } @@ -294,10 +401,16 @@ struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p) struct ip_vs_conn *cp; cp = __ip_vs_conn_in_get(p); - if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) { - struct ip_vs_conn_param cport_zero_p = *p; - cport_zero_p.cport = 0; - cp = __ip_vs_conn_in_get(&cport_zero_p); + if (!cp) { + struct netns_ipvs *ipvs = p->ipvs; + int af_id = ip_vs_af_index(p->af); + + if (atomic_read(&ipvs->no_cport_conns[af_id])) { + struct ip_vs_conn_param cport_zero_p = *p; + + cport_zero_p.cport = 0; + cp = __ip_vs_conn_in_get(&cport_zero_p); + } } IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n", @@ -347,37 +460,53 @@ EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto); /* Get reference to connection template */ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) { - unsigned int hash; + DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); + struct netns_ipvs *ipvs = p->ipvs; + struct ip_vs_conn_hnode *hn; + struct hlist_bl_head *head; + struct ip_vs_rht *t, *pt; + struct hlist_bl_node *e; struct ip_vs_conn *cp; - - hash = ip_vs_conn_hashkey_param(p, false); + u32 hash, hash_key; rcu_read_lock(); - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (unlikely(p->pe_data && p->pe->ct_match)) { - if (cp->ipvs != p->ipvs) - continue; - if (p->pe == cp->pe && p->pe->ct_match(p, cp)) { - if (__ip_vs_conn_get(cp)) - goto out; + ip_vs_rht_for_each_table_rcu(ipvs->conn_tab, t, pt) { + hash = ip_vs_conn_hashkey_param(p, t, false); + hash_key = ip_vs_rht_build_hash_key(t, hash); + ip_vs_rht_walk_bucket_rcu(t, hash_key, head) { + hlist_bl_for_each_entry_rcu(hn, e, head, node) { + if (READ_ONCE(hn->hash_key) != hash_key || + hn->dir != 0) + continue; + cp = ip_vs_hn0_to_conn(hn); + if (unlikely(p->pe_data && p->pe->ct_match)) { + if (p->pe == cp->pe && + p->pe->ct_match(p, cp) && + __ip_vs_conn_get(cp)) + goto out; + continue; + } + if (cp->af == p->af && + ip_vs_addr_equal(p->af, p->caddr, + &cp->caddr) && + /* protocol should only be IPPROTO_IP if + * p->vaddr is a fwmark + */ + ip_vs_addr_equal(p->protocol == IPPROTO_IP ? + AF_UNSPEC : p->af, + p->vaddr, &cp->vaddr) && + p->vport == cp->vport && + p->cport == cp->cport && + cp->flags & IP_VS_CONN_F_TEMPLATE && + p->protocol == cp->protocol && + cp->dport != htons(0xffff)) { + if (__ip_vs_conn_get(cp)) + goto out; + } } - continue; } - if (cp->af == p->af && - ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && - /* protocol should only be IPPROTO_IP if - * p->vaddr is a fwmark */ - ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC : - p->af, p->vaddr, &cp->vaddr) && - p->vport == cp->vport && p->cport == cp->cport && - cp->flags & IP_VS_CONN_F_TEMPLATE && - p->protocol == cp->protocol && - cp->ipvs == p->ipvs) { - if (__ip_vs_conn_get(cp)) - goto out; - } } cp = NULL; @@ -393,58 +522,68 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) return cp; } -/* Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. +/* Gets ip_vs_conn associated with supplied parameters in the conn_tab. * Called for pkts coming from inside-to-OUTside. * p->caddr, p->cport: pkt source address (inside host) * p->vaddr, p->vport: pkt dest address (foreign host) */ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) { - unsigned int hash; - struct ip_vs_conn *cp, *ret=NULL; + DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); + struct netns_ipvs *ipvs = p->ipvs; const union nf_inet_addr *saddr; + struct ip_vs_conn_hnode *hn; + struct hlist_bl_head *head; + struct ip_vs_rht *t, *pt; + struct hlist_bl_node *e; + struct ip_vs_conn *cp; + u32 hash, hash_key; __be16 sport; - /* - * Check for "full" addressed entries - */ - hash = ip_vs_conn_hashkey_param(p, true); - rcu_read_lock(); - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (p->vport != cp->cport) - continue; + ip_vs_rht_for_each_table_rcu(ipvs->conn_tab, t, pt) { + hash = ip_vs_conn_hashkey_param(p, t, true); + hash_key = ip_vs_rht_build_hash_key(t, hash); + ip_vs_rht_walk_bucket_rcu(t, hash_key, head) { + hlist_bl_for_each_entry_rcu(hn, e, head, node) { + /* dir can be 0 for DR/TUN */ + if (READ_ONCE(hn->hash_key) != hash_key) + continue; + cp = ip_vs_hn_to_conn(hn); + if (p->vport != cp->cport) + continue; - if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { - sport = cp->vport; - saddr = &cp->vaddr; - } else { - sport = cp->dport; - saddr = &cp->daddr; - } + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) { + sport = cp->vport; + saddr = &cp->vaddr; + } else { + sport = cp->dport; + saddr = &cp->daddr; + } - if (p->cport == sport && cp->af == p->af && - ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && - ip_vs_addr_equal(p->af, p->caddr, saddr) && - p->protocol == cp->protocol && - cp->ipvs == p->ipvs) { - if (!__ip_vs_conn_get(cp)) - continue; - /* HIT */ - ret = cp; - break; + if (p->cport == sport && cp->af == p->af && + ip_vs_addr_equal(p->af, p->vaddr, + &cp->caddr) && + ip_vs_addr_equal(p->af, p->caddr, saddr) && + p->protocol == cp->protocol) { + if (__ip_vs_conn_get(cp)) + goto out; + } + } } } + cp = NULL; +out: rcu_read_unlock(); IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", ip_vs_proto_name(p->protocol), IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport), IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport), - ret ? "hit" : "not hit"); + cp ? "hit" : "not hit"); - return ret; + return cp; } struct ip_vs_conn * @@ -489,20 +628,289 @@ void ip_vs_conn_put(struct ip_vs_conn *cp) */ void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) { - if (ip_vs_conn_unhash(cp)) { - spin_lock_bh(&cp->lock); - if (cp->flags & IP_VS_CONN_F_NO_CPORT) { - atomic_dec(&ip_vs_conn_no_cport_cnt); + struct hlist_bl_head *head, *head2, *head_new; + bool use2 = ip_vs_conn_use_hash2(cp); + struct netns_ipvs *ipvs = cp->ipvs; + int af_id = ip_vs_af_index(cp->af); + u32 hash_r = 0, hash_key_r = 0; + struct ip_vs_rht *t, *tp, *t2; + struct ip_vs_conn_hnode *hn; + u32 hash_key, hash_key_new; + struct ip_vs_conn_param p; + int ntbl; + int dir; + + /* No packets from inside, so we can do it in 2 steps. */ + dir = use2 ? 1 : 0; + +next_dir: + if (dir) + ip_vs_conn_fill_param(ipvs, cp->af, cp->protocol, &cp->daddr, + cp->dport, &cp->caddr, cport, &p); + else + ip_vs_conn_fill_param(ipvs, cp->af, cp->protocol, &cp->caddr, + cport, &cp->vaddr, cp->vport, &p); + hn = dir ? &cp->hn1 : &cp->hn0; + ntbl = 0; + + /* Attempt to rehash cp safely, by informing seqcount readers */ + t = rcu_dereference(ipvs->conn_tab); + hash_key = READ_ONCE(hn->hash_key); + tp = NULL; + +retry: + /* Moved to new table ? */ + if (!ip_vs_rht_same_table(t, hash_key)) { + t = rcu_dereference(t->new_tbl); + ntbl++; + /* We are lost? */ + if (ntbl >= 2) + return; + } + + /* Rehashing during resize? Use the recent table for adds */ + t2 = rcu_dereference(t->new_tbl); + /* Calc new hash once per table */ + if (tp != t2) { + hash_r = ip_vs_conn_hashkey_param(&p, t2, dir); + hash_key_r = ip_vs_rht_build_hash_key(t2, hash_r); + tp = t2; + } + head = t->buckets + (hash_key & t->mask); + head2 = t2->buckets + (hash_key_r & t2->mask); + head_new = head2; + + if (head > head2 && t == t2) + swap(head, head2); + + /* Lock seqcount only for the old bucket, even if we are on new table + * because it affects the del operation, not the adding. + */ + spin_lock_bh(&t->lock[hash_key & t->lock_mask].l); + preempt_disable_nested(); + write_seqcount_begin(&t->seqc[hash_key & t->seqc_mask]); + + /* Lock buckets in same (increasing) order */ + hlist_bl_lock(head); + if (head != head2) + hlist_bl_lock(head2); + + /* Ensure hash_key is read under lock */ + hash_key_new = READ_ONCE(hn->hash_key); + /* Racing with another rehashing ? */ + if (unlikely(hash_key != hash_key_new)) { + if (head != head2) + hlist_bl_unlock(head2); + hlist_bl_unlock(head); + write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]); + preempt_enable_nested(); + spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l); + hash_key = hash_key_new; + goto retry; + } + + spin_lock(&cp->lock); + if ((cp->flags & IP_VS_CONN_F_NO_CPORT) && + (cp->flags & IP_VS_CONN_F_HASHED)) { + /* We do not recalc hash_key_r under lock, we assume the + * parameters in cp do not change, i.e. cport is + * the only possible change. + */ + WRITE_ONCE(hn->hash_key, hash_key_r); + if (!use2) + WRITE_ONCE(cp->hn1.hash_key, hash_key_r); + /* For dir=1 we do not check in flags if hn is already + * rehashed but this check will do it. + */ + if (head != head2) { + hlist_bl_del_rcu(&hn->node); + hlist_bl_add_head_rcu(&hn->node, head_new); + } + if (!dir) { + atomic_dec(&ipvs->no_cport_conns[af_id]); cp->flags &= ~IP_VS_CONN_F_NO_CPORT; cp->cport = cport; } - spin_unlock_bh(&cp->lock); - - /* hash on new dport */ - ip_vs_conn_hash(cp); } + spin_unlock(&cp->lock); + + if (head != head2) + hlist_bl_unlock(head2); + hlist_bl_unlock(head); + write_seqcount_end(&t->seqc[hash_key & t->seqc_mask]); + preempt_enable_nested(); + spin_unlock_bh(&t->lock[hash_key & t->lock_mask].l); + if (dir--) + goto next_dir; +} + +/* Get default load factor to map conn_count/u_thresh to t->size */ +static int ip_vs_conn_default_load_factor(struct netns_ipvs *ipvs) +{ + int factor; + + if (net_eq(ipvs->net, &init_net)) + factor = -3; + else + factor = -1; + /* Double hashing adds twice more nodes for NAT */ + factor--; + return factor; +} + +/* Get the desired conn_tab size */ +int ip_vs_conn_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, + int lfactor) +{ + return ip_vs_rht_desired_size(ipvs, t, atomic_read(&ipvs->conn_count), + lfactor, IP_VS_CONN_TAB_MIN_BITS, + ip_vs_conn_tab_bits); } +/* Allocate conn_tab */ +struct ip_vs_rht *ip_vs_conn_tab_alloc(struct netns_ipvs *ipvs, int buckets, + int lfactor) +{ + struct ip_vs_rht *t; + int scounts, locks; + + /* scounts: affects readers during resize */ + scounts = clamp(buckets >> 6, 1, 256); + /* locks: based on parallel IP_VS_CONN_F_NO_CPORT operations + resize */ + locks = clamp(8, 1, scounts); + + t = ip_vs_rht_alloc(buckets, scounts, locks); + if (!t) + return NULL; + t->lfactor = lfactor; + ip_vs_rht_set_thresholds(t, t->size, lfactor, IP_VS_CONN_TAB_MIN_BITS, + ip_vs_conn_tab_bits); + return t; +} + +/* conn_tab resizer work */ +static void conn_resize_work_handler(struct work_struct *work) +{ + struct hlist_bl_head *head, *head2; + unsigned int resched_score = 0; + struct hlist_bl_node *cn, *nn; + struct ip_vs_rht *t, *t_new; + struct ip_vs_conn_hnode *hn; + struct netns_ipvs *ipvs; + struct ip_vs_conn *cp; + bool more_work = false; + u32 hash, hash_key; + int limit = 0; + int new_size; + int lfactor; + u32 bucket; + + ipvs = container_of(work, struct netns_ipvs, conn_resize_work.work); + + /* Allow work to be queued again */ + clear_bit(IP_VS_WORK_CONN_RESIZE, &ipvs->work_flags); + t = rcu_dereference_protected(ipvs->conn_tab, 1); + /* Do nothing if table is removed */ + if (!t) + goto out; + /* New table needs to be registered? BUG! */ + if (t != rcu_dereference_protected(t->new_tbl, 1)) + goto out; + + lfactor = sysctl_conn_lfactor(ipvs); + /* Should we resize ? */ + new_size = ip_vs_conn_desired_size(ipvs, t, lfactor); + if (new_size == t->size && lfactor == t->lfactor) + goto out; + + t_new = ip_vs_conn_tab_alloc(ipvs, new_size, lfactor); + if (!t_new) { + more_work = true; + goto out; + } + /* Flip the table_id */ + t_new->table_id = t->table_id ^ IP_VS_RHT_TABLE_ID_MASK; + + rcu_assign_pointer(t->new_tbl, t_new); + + /* Wait RCU readers to see the new table, we do not want new + * conns to go into old table and to be left there. + */ + synchronize_rcu(); + + ip_vs_rht_for_each_bucket(t, bucket, head) { +same_bucket: + if (++limit >= 16) { + if (resched_score >= 100) { + resched_score = 0; + cond_resched(); + } + limit = 0; + } + if (hlist_bl_empty(head)) { + resched_score++; + continue; + } + /* Preemption calls ahead... */ + resched_score = 0; + + /* seqcount_t usage considering PREEMPT_RT rules: + * - other writers (SoftIRQ) => serialize with spin_lock_bh + * - readers (SoftIRQ) => disable BHs + * - readers (processes) => preemption should be disabled + */ + spin_lock_bh(&t->lock[bucket & t->lock_mask].l); + preempt_disable_nested(); + write_seqcount_begin(&t->seqc[bucket & t->seqc_mask]); + hlist_bl_lock(head); + + hlist_bl_for_each_entry_safe(hn, cn, nn, head, node) { + cp = ip_vs_hn_to_conn(hn); + hash = ip_vs_conn_hashkey_conn(t_new, cp, hn->dir); + hash_key = ip_vs_rht_build_hash_key(t_new, hash); + + head2 = t_new->buckets + (hash & t_new->mask); + hlist_bl_lock(head2); + /* t_new->seqc are not used at this stage, we race + * only with add/del, so only lock the bucket. + */ + hlist_bl_del_rcu(&hn->node); + WRITE_ONCE(hn->hash_key, hash_key); + /* Keep both hash keys in sync if no double hashing */ + if (!ip_vs_conn_use_hash2(cp)) + WRITE_ONCE(cp->hn1.hash_key, hash_key); + hlist_bl_add_head_rcu(&hn->node, head2); + hlist_bl_unlock(head2); + /* Too long chain? Do it in steps */ + if (++limit >= 64) + break; + } + + hlist_bl_unlock(head); + write_seqcount_end(&t->seqc[bucket & t->seqc_mask]); + preempt_enable_nested(); + spin_unlock_bh(&t->lock[bucket & t->lock_mask].l); + if (limit >= 64) + goto same_bucket; + } + + rcu_assign_pointer(ipvs->conn_tab, t_new); + /* Inform readers that new table is installed */ + smp_mb__before_atomic(); + atomic_inc(&ipvs->conn_tab_changes); + + /* RCU readers should not see more than two tables in chain. + * To prevent new table to be attached wait here instead of + * freeing the old table in RCU callback. + */ + synchronize_rcu(); + ip_vs_rht_free(t); + +out: + /* Monitor if we need to shrink table */ + queue_delayed_work(system_unbound_wq, &ipvs->conn_resize_work, + more_work ? 1 : 2 * HZ); +} /* * Bind a connection entry with the corresponding packet_xmit. @@ -786,17 +1194,11 @@ int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest) IP_VS_DBG_ADDR(ct->daf, &ct->daddr), ntohs(ct->dport)); - /* - * Invalidate the connection template + /* Invalidate the connection template. Prefer to avoid + * rehashing, it will move it as first in chain, so use + * only dport as indication, it is not a hash key. */ - if (ct->vport != htons(0xffff)) { - if (ip_vs_conn_unhash(ct)) { - ct->dport = htons(0xffff); - ct->vport = htons(0xffff); - ct->cport = 0; - ip_vs_conn_hash(ct); - } - } + ct->dport = htons(0xffff); /* * Simply decrease the refcnt of the template, @@ -891,8 +1293,11 @@ static void ip_vs_conn_expire(struct timer_list *t) if (unlikely(cp->app != NULL)) ip_vs_unbind_app(cp); ip_vs_unbind_dest(cp); - if (cp->flags & IP_VS_CONN_F_NO_CPORT) - atomic_dec(&ip_vs_conn_no_cport_cnt); + if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { + int af_id = ip_vs_af_index(cp->af); + + atomic_dec(&ipvs->no_cport_conns[af_id]); + } if (cp->flags & IP_VS_CONN_F_ONE_PACKET) ip_vs_conn_rcu_free(&cp->rcu_head); else @@ -934,7 +1339,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) /* - * Create a new connection entry and hash it into the ip_vs_conn_tab + * Create a new connection entry and hash it into the conn_tab */ struct ip_vs_conn * ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, @@ -952,10 +1357,13 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, return NULL; } - INIT_HLIST_NODE(&cp->c_list); + INIT_HLIST_BL_NODE(&cp->hn0.node); + INIT_HLIST_BL_NODE(&cp->hn1.node); timer_setup(&cp->timer, ip_vs_conn_expire, 0); cp->ipvs = ipvs; + cp->hn0.dir = 0; cp->af = p->af; + cp->hn1.dir = 1; cp->daf = dest_af; cp->protocol = p->protocol; ip_vs_addr_set(p->af, &cp->caddr, p->caddr); @@ -999,8 +1407,11 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, cp->out_seq.delta = 0; atomic_inc(&ipvs->conn_count); - if (flags & IP_VS_CONN_F_NO_CPORT) - atomic_inc(&ip_vs_conn_no_cport_cnt); + if (unlikely(flags & IP_VS_CONN_F_NO_CPORT)) { + int af_id = ip_vs_af_index(cp->af); + + atomic_inc(&ipvs->no_cport_conns[af_id]); + } /* Bind the connection with a destination server */ cp->dest = NULL; @@ -1033,7 +1444,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, if (ip_vs_conntrack_enabled(ipvs)) cp->flags |= IP_VS_CONN_F_NFCT; - /* Hash it in the ip_vs_conn_tab finally */ + /* Hash it in the conn_tab finally */ ip_vs_conn_hash(cp); return cp; @@ -1045,32 +1456,50 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, #ifdef CONFIG_PROC_FS struct ip_vs_iter_state { struct seq_net_private p; - unsigned int bucket; + struct ip_vs_rht *t; + int gen; + u32 bucket; unsigned int skip_elems; }; -static void *ip_vs_conn_array(struct ip_vs_iter_state *iter) +static void *ip_vs_conn_array(struct seq_file *seq) { + struct ip_vs_iter_state *iter = seq->private; + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); + struct ip_vs_rht *t = iter->t; + struct ip_vs_conn_hnode *hn; + struct hlist_bl_node *e; int idx; - struct ip_vs_conn *cp; - for (idx = iter->bucket; idx < ip_vs_conn_tab_size; idx++) { + if (!t) + return NULL; + for (idx = iter->bucket; idx < t->size; idx++) { unsigned int skip = 0; - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { + hlist_bl_for_each_entry_rcu(hn, e, &t->buckets[idx], node) { /* __ip_vs_conn_get() is not needed by * ip_vs_conn_seq_show and ip_vs_conn_sync_seq_show */ + if (!ip_vs_rht_same_table(t, READ_ONCE(hn->hash_key))) + break; + if (hn->dir != 0) + continue; if (skip >= iter->skip_elems) { iter->bucket = idx; - return cp; + return hn; } ++skip; } + if (!(idx & 31)) { + cond_resched_rcu(); + /* New table installed ? */ + if (iter->gen != atomic_read(&ipvs->conn_tab_changes)) + break; + } iter->skip_elems = 0; - cond_resched_rcu(); } iter->bucket = idx; @@ -1081,38 +1510,52 @@ static void *ip_vs_conn_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { struct ip_vs_iter_state *iter = seq->private; + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); rcu_read_lock(); + iter->gen = atomic_read(&ipvs->conn_tab_changes); + smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */ + iter->t = rcu_dereference(ipvs->conn_tab); if (*pos == 0) { iter->skip_elems = 0; iter->bucket = 0; return SEQ_START_TOKEN; } - return ip_vs_conn_array(iter); + return ip_vs_conn_array(seq); } static void *ip_vs_conn_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct ip_vs_conn *cp = v; struct ip_vs_iter_state *iter = seq->private; - struct hlist_node *e; + struct ip_vs_conn_hnode *hn = v; + struct hlist_bl_node *e; + struct ip_vs_rht *t; ++*pos; if (v == SEQ_START_TOKEN) - return ip_vs_conn_array(iter); + return ip_vs_conn_array(seq); + + t = iter->t; + if (!t) + return NULL; /* more on same hash chain? */ - e = rcu_dereference(hlist_next_rcu(&cp->c_list)); - if (e) { + hlist_bl_for_each_entry_continue_rcu(hn, e, node) { + /* Our cursor was moved to new table ? */ + if (!ip_vs_rht_same_table(t, READ_ONCE(hn->hash_key))) + break; + if (hn->dir != 0) + continue; iter->skip_elems++; - return hlist_entry(e, struct ip_vs_conn, c_list); + return hn; } iter->skip_elems = 0; iter->bucket++; - return ip_vs_conn_array(iter); + return ip_vs_conn_array(seq); } static void ip_vs_conn_seq_stop(struct seq_file *seq, void *v) @@ -1128,14 +1571,12 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); else { - const struct ip_vs_conn *cp = v; - struct net *net = seq_file_net(seq); + struct ip_vs_conn_hnode *hn = v; + const struct ip_vs_conn *cp = ip_vs_hn0_to_conn(hn); char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; size_t len = 0; char dbuf[IP_VS_ADDRSTRLEN]; - if (!net_eq(cp->ipvs->net, net)) - return 0; if (cp->pe_data) { pe_data[0] = ' '; len = strlen(cp->pe->name); @@ -1207,10 +1648,6 @@ static int ip_vs_conn_sync_seq_show(struct seq_file *seq, void *v) "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Origin Expires\n"); else { const struct ip_vs_conn *cp = v; - struct net *net = seq_file_net(seq); - - if (!net_eq(cp->ipvs->net, net)) - return 0; #ifdef CONFIG_IP_VS_IPV6 if (cp->daf == AF_INET6) @@ -1257,6 +1694,7 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = { }; #endif +#ifdef CONFIG_SYSCTL /* Randomly drop connection entries before running out of memory * Can be used for DATA and CTL conns. For TPL conns there are exceptions: @@ -1266,12 +1704,7 @@ static const struct seq_operations ip_vs_conn_sync_seq_ops = { */ static inline int todrop_entry(struct ip_vs_conn *cp) { - /* - * The drop rate array needs tuning for real environments. - * Called from timer bh only => no locking - */ - static const signed char todrop_rate[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; - static signed char todrop_counter[9] = {0}; + struct netns_ipvs *ipvs = cp->ipvs; int i; /* if the conn entry hasn't lasted for 60 seconds, don't drop it. @@ -1280,15 +1713,17 @@ static inline int todrop_entry(struct ip_vs_conn *cp) if (time_before(cp->timeout + jiffies, cp->timer.expires + 60*HZ)) return 0; - /* Don't drop the entry if its number of incoming packets is not - located in [0, 8] */ + /* Drop only conns with number of incoming packets in [1..8] range */ i = atomic_read(&cp->in_pkts); - if (i > 8 || i < 0) return 0; + if (i > 8 || i < 1) + return 0; - if (!todrop_rate[i]) return 0; - if (--todrop_counter[i] > 0) return 0; + i--; + if (--ipvs->dropentry_counters[i] > 0) + return 0; - todrop_counter[i] = todrop_rate[i]; + /* Prefer to drop conns with less number of incoming packets */ + ipvs->dropentry_counters[i] = i + 1; return 1; } @@ -1302,22 +1737,33 @@ static inline bool ip_vs_conn_ops_mode(struct ip_vs_conn *cp) return svc && (svc->flags & IP_VS_SVC_F_ONEPACKET); } -/* Called from keventd and must protect itself from softirqs */ void ip_vs_random_dropentry(struct netns_ipvs *ipvs) { - int idx; + struct ip_vs_conn_hnode *hn; + struct hlist_bl_node *e; struct ip_vs_conn *cp; + struct ip_vs_rht *t; + unsigned int r; + int idx; + r = get_random_u32(); rcu_read_lock(); + t = rcu_dereference(ipvs->conn_tab); + if (!t) + goto out; /* * Randomly scan 1/32 of the whole table every second */ - for (idx = 0; idx < (ip_vs_conn_tab_size>>5); idx++) { - unsigned int hash = get_random_u32() & ip_vs_conn_tab_mask; + for (idx = 0; idx < (t->size >> 5); idx++) { + unsigned int hash = (r + idx) & t->mask; - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[hash], c_list) { - if (cp->ipvs != ipvs) + /* Don't care if due to moved entry we jump to another bucket + * and even to new table + */ + hlist_bl_for_each_entry_rcu(hn, e, &t->buckets[hash], node) { + if (hn->dir != 0) continue; + cp = ip_vs_hn0_to_conn(hn); if (atomic_read(&cp->n_control)) continue; if (cp->flags & IP_VS_CONN_F_TEMPLATE) { @@ -1364,27 +1810,43 @@ drop: IP_VS_DBG(4, "drop connection\n"); ip_vs_conn_del(cp); } - cond_resched_rcu(); + if (!(idx & 31)) { + cond_resched_rcu(); + t = rcu_dereference(ipvs->conn_tab); + if (!t) + goto out; + } } + +out: rcu_read_unlock(); } +#endif - -/* - * Flush all the connection entries in the ip_vs_conn_tab - */ +/* Flush all the connection entries in the conn_tab */ static void ip_vs_conn_flush(struct netns_ipvs *ipvs) { - int idx; + DECLARE_IP_VS_RHT_WALK_BUCKETS_SAFE_RCU(); struct ip_vs_conn *cp, *cp_c; + struct ip_vs_conn_hnode *hn; + struct hlist_bl_head *head; + struct ip_vs_rht *t, *p; + struct hlist_bl_node *e; + + if (!rcu_dereference_protected(ipvs->conn_tab, 1)) + return; + cancel_delayed_work_sync(&ipvs->conn_resize_work); + if (!atomic_read(&ipvs->conn_count)) + goto unreg; flush_again: + /* Rely on RCU grace period while accessing cp after ip_vs_conn_del */ rcu_read_lock(); - for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { - - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { - if (cp->ipvs != ipvs) + ip_vs_rht_walk_buckets_safe_rcu(ipvs->conn_tab, head) { + hlist_bl_for_each_entry_rcu(hn, e, head, node) { + if (hn->dir != 0) continue; + cp = ip_vs_hn0_to_conn(hn); if (atomic_read(&cp->n_control)) continue; cp_c = cp->control; @@ -1405,21 +1867,51 @@ flush_again: schedule(); goto flush_again; } + +unreg: + /* Unregister the hash table and release it after RCU grace period. + * This is needed because other works may not be stopped yet and + * they may walk the tables. + */ + t = rcu_dereference_protected(ipvs->conn_tab, 1); + rcu_assign_pointer(ipvs->conn_tab, NULL); + /* Inform readers that conn_tab is changed */ + smp_mb__before_atomic(); + atomic_inc(&ipvs->conn_tab_changes); + while (1) { + p = rcu_dereference_protected(t->new_tbl, 1); + call_rcu(&t->rcu_head, ip_vs_rht_rcu_free); + if (p == t) + break; + t = p; + } } #ifdef CONFIG_SYSCTL void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs) { - int idx; + DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU(); + unsigned int resched_score = 0; struct ip_vs_conn *cp, *cp_c; + struct ip_vs_conn_hnode *hn; + struct hlist_bl_head *head; struct ip_vs_dest *dest; + struct hlist_bl_node *e; + int old_gen, new_gen; + if (!atomic_read(&ipvs->conn_count)) + return; + old_gen = atomic_read(&ipvs->conn_tab_changes); rcu_read_lock(); - for (idx = 0; idx < ip_vs_conn_tab_size; idx++) { - hlist_for_each_entry_rcu(cp, &ip_vs_conn_tab[idx], c_list) { - if (cp->ipvs != ipvs) - continue; +repeat: + smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */ + ip_vs_rht_walk_buckets_rcu(ipvs->conn_tab, head) { + hlist_bl_for_each_entry_rcu(hn, e, head, node) { + if (hn->dir != 0) + continue; + cp = ip_vs_hn0_to_conn(hn); + resched_score++; dest = cp->dest; if (!dest || (dest->flags & IP_VS_DEST_F_AVAILABLE)) continue; @@ -1434,13 +1926,25 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs) IP_VS_DBG(4, "del controlling connection\n"); ip_vs_conn_del(cp_c); } + resched_score += 10; + } + resched_score++; + if (resched_score >= 100) { + resched_score = 0; + cond_resched_rcu(); + /* netns clean up started, abort delayed work */ + if (!READ_ONCE(ipvs->enable)) + goto out; + new_gen = atomic_read(&ipvs->conn_tab_changes); + /* New table installed ? */ + if (old_gen != new_gen) { + old_gen = new_gen; + goto repeat; + } } - cond_resched_rcu(); - - /* netns clean up started, abort delayed work */ - if (!READ_ONCE(ipvs->enable)) - break; } + +out: rcu_read_unlock(); } #endif @@ -1450,7 +1954,15 @@ void ip_vs_expire_nodest_conn_flush(struct netns_ipvs *ipvs) */ int __net_init ip_vs_conn_net_init(struct netns_ipvs *ipvs) { + int idx; + atomic_set(&ipvs->conn_count, 0); + for (idx = 0; idx < IP_VS_AF_MAX; idx++) + atomic_set(&ipvs->no_cport_conns[idx], 0); + INIT_DELAYED_WORK(&ipvs->conn_resize_work, conn_resize_work_handler); + RCU_INIT_POINTER(ipvs->conn_tab, NULL); + atomic_set(&ipvs->conn_tab_changes, 0); + ipvs->sysctl_conn_lfactor = ip_vs_conn_default_load_factor(ipvs); #ifdef CONFIG_PROC_FS if (!proc_create_net("ip_vs_conn", 0, ipvs->net->proc_net, @@ -1486,56 +1998,36 @@ void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs) int __init ip_vs_conn_init(void) { + int min = IP_VS_CONN_TAB_MIN_BITS; + int max = IP_VS_CONN_TAB_MAX_BITS; size_t tab_array_size; int max_avail; -#if BITS_PER_LONG > 32 - int max = 27; -#else - int max = 20; -#endif - int min = 8; - int idx; max_avail = order_base_2(totalram_pages()) + PAGE_SHIFT; - max_avail -= 2; /* ~4 in hash row */ + /* 64-bit: 27 bits at 64GB, 32-bit: 20 bits at 512MB */ + max_avail += 1; /* hash table loaded at 50% */ max_avail -= 1; /* IPVS up to 1/2 of mem */ max_avail -= order_base_2(sizeof(struct ip_vs_conn)); max = clamp(max_avail, min, max); ip_vs_conn_tab_bits = clamp(ip_vs_conn_tab_bits, min, max); ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits; - ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1; /* * Allocate the connection hash table and initialize its list heads */ tab_array_size = array_size(ip_vs_conn_tab_size, - sizeof(*ip_vs_conn_tab)); - ip_vs_conn_tab = kvmalloc_objs(*ip_vs_conn_tab, ip_vs_conn_tab_size); - if (!ip_vs_conn_tab) - return -ENOMEM; + sizeof(struct hlist_bl_head)); /* Allocate ip_vs_conn slab cache */ ip_vs_conn_cachep = KMEM_CACHE(ip_vs_conn, SLAB_HWCACHE_ALIGN); - if (!ip_vs_conn_cachep) { - kvfree(ip_vs_conn_tab); + if (!ip_vs_conn_cachep) return -ENOMEM; - } pr_info("Connection hash table configured (size=%d, memory=%zdKbytes)\n", ip_vs_conn_tab_size, tab_array_size / 1024); IP_VS_DBG(0, "Each connection entry needs %zd bytes at least\n", sizeof(struct ip_vs_conn)); - for (idx = 0; idx < ip_vs_conn_tab_size; idx++) - INIT_HLIST_HEAD(&ip_vs_conn_tab[idx]); - - for (idx = 0; idx < CT_LOCKARRAY_SIZE; idx++) { - spin_lock_init(&__ip_vs_conntbl_lock_array[idx].l); - } - - /* calculate the random value for connection hash */ - get_random_bytes(&ip_vs_conn_rnd, sizeof(ip_vs_conn_rnd)); - return 0; } @@ -1545,5 +2037,4 @@ void ip_vs_conn_cleanup(void) rcu_barrier(); /* Release the empty cache */ kmem_cache_destroy(ip_vs_conn_cachep); - kvfree(ip_vs_conn_tab); } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 90d56f92c0f6..f5b7a2047291 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -117,6 +117,185 @@ void ip_vs_init_hash_table(struct list_head *table, int rows) INIT_LIST_HEAD(&table[rows]); } +/* IPVS Resizable Hash Tables: + * - list_bl buckets with bit lock + * + * Goals: + * - RCU lookup for entry can run in parallel with add/del/move operations + * - hash keys can be on non-contiguous memory + * - support entries with duplicate keys + * - unlink entries without lookup, use the saved table and bucket id + * - resizing can trigger on load change or depending on key refresh period + * - customizable load factor to balance between speed and memory usage + * - add/del/move operations should be allowed for any context + * + * Resizing: + * - new table is attached to the current table and all entries are moved + * with new hash key. Finally, the new table is installed as current one and + * the old table is released after RCU grace period. + * - RCU read-side critical sections will walk two tables while resizing is + * in progress + * - new entries are added to the new table + * - entries will be deleted from the old or from the new table, the table_id + * can be saved into entry as part of the hash key to know where the entry is + * hashed + * - move operations may delay readers or to cause retry for the modified + * bucket. As result, searched entry will be found but walkers that operate + * on multiple entries may see same entry twice if bucket walking is retried. + * - for fast path the number of entries (load) can be compared to u_thresh + * and l_thresh to decide when to trigger table growing/shrinking. They + * are calculated based on load factor (shift count), negative value allows + * load to be below 100% to reduce collisions by maintaining larger table + * while positive value tolerates collisions by using smaller table and load + * above 100%: u_thresh(load) = size * (2 ^ lfactor) + * + * Locking: + * - lock: protect seqc if other context except resizer can move entries + * - seqc: seqcount_t, delay/retry readers while entries are moved to + * new table on resizing + * - bit lock: serialize bucket modifications + * - writers may use other locking mechanisms to serialize operations for + * resizing, moving and installing new tables + */ + +void ip_vs_rht_free(struct ip_vs_rht *t) +{ + kvfree(t->buckets); + kvfree(t->seqc); + kvfree(t->lock); + kfree(t); +} + +void ip_vs_rht_rcu_free(struct rcu_head *head) +{ + struct ip_vs_rht *t; + + t = container_of(head, struct ip_vs_rht, rcu_head); + ip_vs_rht_free(t); +} + +struct ip_vs_rht *ip_vs_rht_alloc(int buckets, int scounts, int locks) +{ + struct ip_vs_rht *t = kzalloc(sizeof(*t), GFP_KERNEL); + int i; + + if (!t) + return NULL; + if (scounts) { + int ml = roundup_pow_of_two(nr_cpu_ids); + + scounts = min(scounts, buckets); + scounts = min(scounts, ml); + t->seqc = kvmalloc_array(scounts, sizeof(*t->seqc), GFP_KERNEL); + if (!t->seqc) + goto err; + for (i = 0; i < scounts; i++) + seqcount_init(&t->seqc[i]); + + if (locks) { + locks = min(locks, scounts); + t->lock = kvmalloc_array(locks, sizeof(*t->lock), + GFP_KERNEL); + if (!t->lock) + goto err; + for (i = 0; i < locks; i++) + spin_lock_init(&t->lock[i].l); + } + } + + t->buckets = kvmalloc_array(buckets, sizeof(*t->buckets), GFP_KERNEL); + if (!t->buckets) + goto err; + for (i = 0; i < buckets; i++) + INIT_HLIST_BL_HEAD(&t->buckets[i]); + t->mask = buckets - 1; + t->size = buckets; + t->seqc_mask = scounts - 1; + t->lock_mask = locks - 1; + t->u_thresh = buckets; + t->l_thresh = buckets >> 4; + t->bits = order_base_2(buckets); + /* new_tbl points to self if no new table is filled */ + RCU_INIT_POINTER(t->new_tbl, t); + get_random_bytes(&t->hash_key, sizeof(t->hash_key)); + return t; + +err: + ip_vs_rht_free(t); + return NULL; +} + +/* Get the desired table size for n entries based on current table size and + * by using the formula size = n / (2^lfactor) + * lfactor: shift value for the load factor: + * - >0: u_thresh=size << lfactor, for load factor above 100% + * - <0: u_thresh=size >> -lfactor, for load factor below 100% + * - 0: for load factor of 100% + */ +int ip_vs_rht_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, int n, + int lfactor, int min_bits, int max_bits) +{ + if (!t) + return 1 << min_bits; + n = roundup_pow_of_two(n); + if (lfactor < 0) { + int factor = min(-lfactor, max_bits); + + n = min(n, 1 << (max_bits - factor)); + n <<= factor; + } else { + n = min(n >> lfactor, 1 << max_bits); + } + if (lfactor != t->lfactor) + return clamp(n, 1 << min_bits, 1 << max_bits); + if (n > t->size) + return n; + if (n > t->size >> 4) + return t->size; + /* Shrink but keep it n * 2 to prevent frequent resizing */ + return clamp(n << 1, 1 << min_bits, 1 << max_bits); +} + +/* Set thresholds based on table size and load factor: + * u_thresh = size * (2^lfactor) + * l_thresh = u_thresh / 16 + * u_thresh/l_thresh can be used to check if load triggers a table grow/shrink + */ +void ip_vs_rht_set_thresholds(struct ip_vs_rht *t, int size, int lfactor, + int min_bits, int max_bits) +{ + if (size >= 1 << max_bits) + t->u_thresh = INT_MAX; /* stop growing */ + else if (lfactor <= 0) + t->u_thresh = size >> min(-lfactor, max_bits); + else + t->u_thresh = min(size, 1 << (30 - lfactor)) << lfactor; + + /* l_thresh: shrink when load is 16 times lower, can be 0 */ + if (size >= 1 << max_bits) + t->l_thresh = (1 << max_bits) >> 4; + else if (size > 1 << min_bits) + t->l_thresh = t->u_thresh >> 4; + else + t->l_thresh = 0; /* stop shrinking */ +} + +/* Return hash value for local info (fast, insecure) */ +u32 ip_vs_rht_hash_linfo(struct ip_vs_rht *t, int af, + const union nf_inet_addr *addr, u32 v1, u32 v2) +{ + u32 v3; + +#ifdef CONFIG_IP_VS_IPV6 + if (af == AF_INET6) + v3 = ipv6_addr_hash(&addr->in6); + else +#endif + v3 = addr->all[0]; + + return jhash_3words(v1, v2, v3, (u32)t->hash_key.key[0]); +} + static inline void ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) { @@ -1400,7 +1579,7 @@ ip_vs_out_hook(void *priv, struct sk_buff *skb, const struct nf_hook_state *stat return handle_response(af, skb, pd, cp, &iph, hooknum); /* Check for real-server-started requests */ - if (atomic_read(&ipvs->conn_out_counter)) { + if (atomic_read(&ipvs->conn_out_counter[ip_vs_af_index(af)])) { /* Currently only for UDP: * connection oriented protocols typically use * ephemeral ports for outgoing connections, so diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 2aaf50f52c8e..6632daa87ded 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -29,6 +29,7 @@ #include <linux/netfilter.h> #include <linux/netfilter_ipv4.h> #include <linux/mutex.h> +#include <linux/rcupdate_wait.h> #include <net/net_namespace.h> #include <linux/nsproxy.h> @@ -48,7 +49,7 @@ MODULE_ALIAS_GENL_FAMILY(IPVS_GENL_NAME); -DEFINE_MUTEX(__ip_vs_mutex); /* Serialize configuration with sockopt/netlink */ +static struct lock_class_key __ipvs_service_key; /* sysctl variables */ @@ -280,6 +281,20 @@ unlock: mutex_unlock(&ipvs->est_mutex); } +static int get_conn_tab_size(struct netns_ipvs *ipvs) +{ + const struct ip_vs_rht *t; + int size = 0; + + rcu_read_lock(); + t = rcu_dereference(ipvs->conn_tab); + if (t) + size = t->size; + rcu_read_unlock(); + + return size; +} + int ip_vs_use_count_inc(void) { @@ -293,58 +308,59 @@ ip_vs_use_count_dec(void) } -/* - * Hash table: for virtual service lookups +/* Service hashing: + * Operation Locking order + * --------------------------------------------------------------------------- + * add table service_mutex, svc_resize_sem(W) + * del table service_mutex + * move between tables svc_resize_sem(W), seqcount_t(W), bit lock + * add/del service service_mutex, bit lock + * find service RCU, seqcount_t(R) + * walk services(blocking) service_mutex, svc_resize_sem(R) + * walk services(non-blocking) RCU, seqcount_t(R) + * + * - new tables are linked/unlinked under service_mutex and svc_resize_sem + * - new table is linked on resizing and all operations can run in parallel + * in 2 tables until the new table is registered as current one + * - two contexts can modify buckets: config and table resize, both in + * process context + * - only table resizer can move entries, so we do not protect t->seqc[] + * items with t->lock[] + * - lookups occur under RCU lock and seqcount reader lock to detect if + * services are moved to new table + * - move operations may disturb readers: find operation will not miss entries + * but walkers may see same entry twice if they are forced to retry chains + * - walkers using cond_resched_rcu() on !PREEMPT_RCU may need to hold + * service_mutex to disallow new tables to be installed or to check + * svc_table_changes and repeat the RCU read section if new table is installed */ -#define IP_VS_SVC_TAB_BITS 8 -#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS) -#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1) - -/* the service table hashed by <protocol, addr, port> */ -static struct hlist_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE]; -/* the service table hashed by fwmark */ -static struct hlist_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE]; - /* * Returns hash value for virtual service */ -static inline unsigned int -ip_vs_svc_hashkey(struct netns_ipvs *ipvs, int af, unsigned int proto, +static inline u32 +ip_vs_svc_hashval(struct ip_vs_rht *t, int af, unsigned int proto, const union nf_inet_addr *addr, __be16 port) { - unsigned int porth = ntohs(port); - __be32 addr_fold = addr->ip; - __u32 ahash; - -#ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) - addr_fold = addr->ip6[0]^addr->ip6[1]^ - addr->ip6[2]^addr->ip6[3]; -#endif - ahash = ntohl(addr_fold); - ahash ^= ((size_t) ipvs >> 8); - - return (proto ^ ahash ^ (porth >> IP_VS_SVC_TAB_BITS) ^ porth) & - IP_VS_SVC_TAB_MASK; + return ip_vs_rht_hash_linfo(t, af, addr, ntohs(port), proto); } /* * Returns hash value of fwmark for virtual service lookup */ -static inline unsigned int ip_vs_svc_fwm_hashkey(struct netns_ipvs *ipvs, __u32 fwmark) +static inline u32 ip_vs_svc_fwm_hashval(struct ip_vs_rht *t, int af, + __u32 fwmark) { - return (((size_t)ipvs>>8) ^ fwmark) & IP_VS_SVC_TAB_MASK; + return jhash_2words(fwmark, af, (u32)t->hash_key.key[0]); } -/* - * Hashes a service in the ip_vs_svc_table by <netns,proto,addr,port> - * or in the ip_vs_svc_fwm_table by fwmark. - * Should be called with locked tables. - */ +/* Hashes a service in the svc_table by <proto,addr,port> or by fwmark */ static int ip_vs_svc_hash(struct ip_vs_service *svc) { - unsigned int hash; + struct netns_ipvs *ipvs = svc->ipvs; + struct hlist_bl_head *head; + struct ip_vs_rht *t; + u32 hash; if (svc->flags & IP_VS_SVC_F_HASHED) { pr_err("%s(): request for already hashed, called from %pS\n", @@ -352,50 +368,81 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) return 0; } + /* increase its refcnt because it is referenced by the svc table */ + atomic_inc(&svc->refcnt); + + /* New entries go into recent table */ + t = rcu_dereference_protected(ipvs->svc_table, 1); + t = rcu_dereference_protected(t->new_tbl, 1); + if (svc->fwmark == 0) { /* - * Hash it by <netns,protocol,addr,port> in ip_vs_svc_table + * Hash it by <protocol,addr,port> */ - hash = ip_vs_svc_hashkey(svc->ipvs, svc->af, svc->protocol, + hash = ip_vs_svc_hashval(t, svc->af, svc->protocol, &svc->addr, svc->port); - hlist_add_head_rcu(&svc->s_list, &ip_vs_svc_table[hash]); } else { /* - * Hash it by fwmark in svc_fwm_table + * Hash it by fwmark */ - hash = ip_vs_svc_fwm_hashkey(svc->ipvs, svc->fwmark); - hlist_add_head_rcu(&svc->f_list, &ip_vs_svc_fwm_table[hash]); + hash = ip_vs_svc_fwm_hashval(t, svc->af, svc->fwmark); } - + head = t->buckets + (hash & t->mask); + hlist_bl_lock(head); + WRITE_ONCE(svc->hash_key, ip_vs_rht_build_hash_key(t, hash)); svc->flags |= IP_VS_SVC_F_HASHED; - /* increase its refcnt because it is referenced by the svc table */ - atomic_inc(&svc->refcnt); + hlist_bl_add_head_rcu(&svc->s_list, head); + hlist_bl_unlock(head); + return 1; } /* - * Unhashes a service from svc_table / svc_fwm_table. + * Unhashes a service from svc_table. * Should be called with locked tables. */ static int ip_vs_svc_unhash(struct ip_vs_service *svc) { + struct netns_ipvs *ipvs = svc->ipvs; + struct hlist_bl_head *head; + struct ip_vs_rht *t; + u32 hash_key2; + u32 hash_key; + if (!(svc->flags & IP_VS_SVC_F_HASHED)) { pr_err("%s(): request for unhash flagged, called from %pS\n", __func__, __builtin_return_address(0)); return 0; } - if (svc->fwmark == 0) { - /* Remove it from the svc_table table */ - hlist_del_rcu(&svc->s_list); + t = rcu_dereference_protected(ipvs->svc_table, 1); + hash_key = READ_ONCE(svc->hash_key); + /* We need to lock the bucket in the right table */ + if (ip_vs_rht_same_table(t, hash_key)) { + head = t->buckets + (hash_key & t->mask); + hlist_bl_lock(head); + /* Ensure hash_key is read under lock */ + hash_key2 = READ_ONCE(svc->hash_key); + /* Moved to new table ? */ + if (hash_key != hash_key2) { + hlist_bl_unlock(head); + t = rcu_dereference_protected(t->new_tbl, 1); + head = t->buckets + (hash_key2 & t->mask); + hlist_bl_lock(head); + } } else { - /* Remove it from the svc_fwm_table table */ - hlist_del_rcu(&svc->f_list); + /* It is already moved to new table */ + t = rcu_dereference_protected(t->new_tbl, 1); + head = t->buckets + (hash_key & t->mask); + hlist_bl_lock(head); } + /* Remove it from svc_table */ + hlist_bl_del_rcu(&svc->s_list); svc->flags &= ~IP_VS_SVC_F_HASHED; atomic_dec(&svc->refcnt); + hlist_bl_unlock(head); return 1; } @@ -407,20 +454,29 @@ static inline struct ip_vs_service * __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport) { - unsigned int hash; + DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); + struct hlist_bl_head *head; struct ip_vs_service *svc; - - /* Check for "full" addressed entries */ - hash = ip_vs_svc_hashkey(ipvs, af, protocol, vaddr, vport); - - hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[hash], s_list) { - if ((svc->af == af) - && ip_vs_addr_equal(af, &svc->addr, vaddr) - && (svc->port == vport) - && (svc->protocol == protocol) - && (svc->ipvs == ipvs)) { - /* HIT */ - return svc; + struct ip_vs_rht *t, *p; + struct hlist_bl_node *e; + u32 hash, hash_key; + + ip_vs_rht_for_each_table_rcu(ipvs->svc_table, t, p) { + /* Check for "full" addressed entries */ + hash = ip_vs_svc_hashval(t, af, protocol, vaddr, vport); + + hash_key = ip_vs_rht_build_hash_key(t, hash); + ip_vs_rht_walk_bucket_rcu(t, hash_key, head) { + hlist_bl_for_each_entry_rcu(svc, e, head, s_list) { + if (READ_ONCE(svc->hash_key) == hash_key && + svc->af == af && + ip_vs_addr_equal(af, &svc->addr, vaddr) && + svc->port == vport && + svc->protocol == protocol && !svc->fwmark) { + /* HIT */ + return svc; + } + } } } @@ -434,17 +490,26 @@ __ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u16 protocol, static inline struct ip_vs_service * __ip_vs_svc_fwm_find(struct netns_ipvs *ipvs, int af, __u32 fwmark) { - unsigned int hash; + DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); + struct hlist_bl_head *head; struct ip_vs_service *svc; - - /* Check for fwmark addressed entries */ - hash = ip_vs_svc_fwm_hashkey(ipvs, fwmark); - - hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[hash], f_list) { - if (svc->fwmark == fwmark && svc->af == af - && (svc->ipvs == ipvs)) { - /* HIT */ - return svc; + struct ip_vs_rht *t, *p; + struct hlist_bl_node *e; + u32 hash, hash_key; + + ip_vs_rht_for_each_table_rcu(ipvs->svc_table, t, p) { + /* Check for fwmark addressed entries */ + hash = ip_vs_svc_fwm_hashval(t, af, fwmark); + + hash_key = ip_vs_rht_build_hash_key(t, hash); + ip_vs_rht_walk_bucket_rcu(t, hash_key, head) { + hlist_bl_for_each_entry_rcu(svc, e, head, s_list) { + if (READ_ONCE(svc->hash_key) == hash_key && + svc->fwmark == fwmark && svc->af == af) { + /* HIT */ + return svc; + } + } } } @@ -456,35 +521,42 @@ struct ip_vs_service * ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol, const union nf_inet_addr *vaddr, __be16 vport) { - struct ip_vs_service *svc; + struct ip_vs_service *svc = NULL; + int af_id = ip_vs_af_index(af); /* * Check the table hashed by fwmark first */ - if (fwmark) { + if (fwmark && atomic_read(&ipvs->fwm_services[af_id])) { svc = __ip_vs_svc_fwm_find(ipvs, af, fwmark); if (svc) goto out; } + if (!atomic_read(&ipvs->nonfwm_services[af_id])) + goto out; + /* * Check the table hashed by <protocol,addr,port> * for "full" addressed entries */ svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, vport); + if (svc) + goto out; - if (!svc && protocol == IPPROTO_TCP && - atomic_read(&ipvs->ftpsvc_counter) && + if (protocol == IPPROTO_TCP && + atomic_read(&ipvs->ftpsvc_counter[af_id]) && (vport == FTPDATA || !inet_port_requires_bind_service(ipvs->net, ntohs(vport)))) { /* * Check if ftp service entry exists, the packet * might belong to FTP data connections. */ svc = __ip_vs_service_find(ipvs, af, protocol, vaddr, FTPPORT); + if (svc) + goto out; } - if (svc == NULL - && atomic_read(&ipvs->nullsvc_counter)) { + if (atomic_read(&ipvs->nullsvc_counter[af_id])) { /* * Check if the catch-all port (port zero) exists */ @@ -500,6 +572,220 @@ ip_vs_service_find(struct netns_ipvs *ipvs, int af, __u32 fwmark, __u16 protocol return svc; } +/* Return the number of registered services */ +static int ip_vs_get_num_services(struct netns_ipvs *ipvs) +{ + int ns = 0, ni = IP_VS_AF_MAX; + + while (--ni >= 0) + ns += atomic_read(&ipvs->num_services[ni]); + return ns; +} + +/* Get default load factor to map num_services/u_thresh to t->size */ +static int ip_vs_svc_default_load_factor(struct netns_ipvs *ipvs) +{ + int factor; + + if (net_eq(ipvs->net, &init_net)) + factor = -3; /* grow if load is above 12.5% */ + else + factor = -2; /* grow if load is above 25% */ + return factor; +} + +/* Get the desired svc_table size */ +static int ip_vs_svc_desired_size(struct netns_ipvs *ipvs, struct ip_vs_rht *t, + int lfactor) +{ + return ip_vs_rht_desired_size(ipvs, t, ip_vs_get_num_services(ipvs), + lfactor, IP_VS_SVC_TAB_MIN_BITS, + IP_VS_SVC_TAB_MAX_BITS); +} + +/* Allocate svc_table */ +static struct ip_vs_rht *ip_vs_svc_table_alloc(struct netns_ipvs *ipvs, + int buckets, int lfactor) +{ + struct ip_vs_rht *t; + int scounts, locks; + + /* No frequent lookups to race with resizing, so use max of 64 + * seqcounts. Only resizer moves entries, so use 0 locks. + */ + scounts = clamp(buckets >> 4, 1, 64); + locks = 0; + + t = ip_vs_rht_alloc(buckets, scounts, locks); + if (!t) + return NULL; + t->lfactor = lfactor; + ip_vs_rht_set_thresholds(t, t->size, lfactor, IP_VS_SVC_TAB_MIN_BITS, + IP_VS_SVC_TAB_MAX_BITS); + return t; +} + +/* svc_table resizer work */ +static void svc_resize_work_handler(struct work_struct *work) +{ + struct hlist_bl_head *head, *head2; + struct ip_vs_rht *t_free = NULL; + unsigned int resched_score = 0; + struct hlist_bl_node *cn, *nn; + struct ip_vs_rht *t, *t_new; + struct ip_vs_service *svc; + struct netns_ipvs *ipvs; + bool more_work = true; + seqcount_t *sc; + int limit = 0; + int new_size; + int lfactor; + u32 bucket; + + ipvs = container_of(work, struct netns_ipvs, svc_resize_work.work); + + if (!down_write_trylock(&ipvs->svc_resize_sem)) + goto out; + if (!mutex_trylock(&ipvs->service_mutex)) + goto unlock_sem; + more_work = false; + clear_bit(IP_VS_WORK_SVC_RESIZE, &ipvs->work_flags); + if (!READ_ONCE(ipvs->enable) || + test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags)) + goto unlock_m; + t = rcu_dereference_protected(ipvs->svc_table, 1); + /* Do nothing if table is removed */ + if (!t) + goto unlock_m; + /* New table needs to be registered? BUG! */ + if (t != rcu_dereference_protected(t->new_tbl, 1)) + goto unlock_m; + + lfactor = sysctl_svc_lfactor(ipvs); + /* Should we resize ? */ + new_size = ip_vs_svc_desired_size(ipvs, t, lfactor); + if (new_size == t->size && lfactor == t->lfactor) + goto unlock_m; + + t_new = ip_vs_svc_table_alloc(ipvs, new_size, lfactor); + if (!t_new) { + more_work = true; + goto unlock_m; + } + /* Flip the table_id */ + t_new->table_id = t->table_id ^ IP_VS_RHT_TABLE_ID_MASK; + + rcu_assign_pointer(t->new_tbl, t_new); + /* Allow add/del to new_tbl while moving from old table */ + mutex_unlock(&ipvs->service_mutex); + + ip_vs_rht_for_each_bucket(t, bucket, head) { +same_bucket: + if (++limit >= 16) { + if (!READ_ONCE(ipvs->enable) || + test_bit(IP_VS_WORK_SVC_NORESIZE, + &ipvs->work_flags)) + goto unlock_sem; + if (resched_score >= 100) { + resched_score = 0; + cond_resched(); + } + limit = 0; + } + if (hlist_bl_empty(head)) { + resched_score++; + continue; + } + /* Preemption calls ahead... */ + resched_score = 0; + + sc = &t->seqc[bucket & t->seqc_mask]; + /* seqcount_t usage considering PREEMPT_RT rules: + * - we are the only writer => preemption can be allowed + * - readers (SoftIRQ) => disable BHs + * - readers (processes) => preemption should be disabled + */ + local_bh_disable(); + preempt_disable_nested(); + write_seqcount_begin(sc); + hlist_bl_lock(head); + + hlist_bl_for_each_entry_safe(svc, cn, nn, head, s_list) { + u32 hash; + + /* New hash for the new table */ + if (svc->fwmark == 0) { + /* Hash it by <protocol,addr,port> */ + hash = ip_vs_svc_hashval(t_new, svc->af, + svc->protocol, + &svc->addr, svc->port); + } else { + /* Hash it by fwmark */ + hash = ip_vs_svc_fwm_hashval(t_new, svc->af, + svc->fwmark); + } + hlist_bl_del_rcu(&svc->s_list); + head2 = t_new->buckets + (hash & t_new->mask); + + hlist_bl_lock(head2); + WRITE_ONCE(svc->hash_key, + ip_vs_rht_build_hash_key(t_new, hash)); + /* t_new->seqc are not used at this stage, we race + * only with add/del, so only lock the bucket. + */ + hlist_bl_add_head_rcu(&svc->s_list, head2); + hlist_bl_unlock(head2); + /* Too long chain? Do it in steps */ + if (++limit >= 64) + break; + } + + hlist_bl_unlock(head); + write_seqcount_end(sc); + preempt_enable_nested(); + local_bh_enable(); + if (limit >= 64) + goto same_bucket; + } + + /* Tables can be switched only under service_mutex */ + while (!mutex_trylock(&ipvs->service_mutex)) { + cond_resched(); + if (!READ_ONCE(ipvs->enable) || + test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags)) + goto unlock_sem; + } + if (!READ_ONCE(ipvs->enable) || + test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags)) + goto unlock_m; + + rcu_assign_pointer(ipvs->svc_table, t_new); + /* Inform readers that new table is installed */ + smp_mb__before_atomic(); + atomic_inc(&ipvs->svc_table_changes); + t_free = t; + +unlock_m: + mutex_unlock(&ipvs->service_mutex); + +unlock_sem: + up_write(&ipvs->svc_resize_sem); + + if (t_free) { + /* RCU readers should not see more than two tables in chain. + * To prevent new table to be attached wait here instead of + * freeing the old table in RCU callback. + */ + synchronize_rcu(); + ip_vs_rht_free(t_free); + } + +out: + if (!READ_ONCE(ipvs->enable) || !more_work || + test_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags)) + return; + queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work, 1); +} static inline void __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc) @@ -829,7 +1115,6 @@ static void ip_vs_dest_free(struct ip_vs_dest *dest) { struct ip_vs_service *svc = rcu_dereference_protected(dest->svc, 1); - __ip_vs_dst_cache_reset(dest); __ip_vs_svc_put(svc); call_rcu(&dest->rcu_head, ip_vs_dest_rcu_free); } @@ -1032,10 +1317,6 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, dest->af = udest->af; - spin_lock_bh(&dest->dst_lock); - __ip_vs_dst_cache_reset(dest); - spin_unlock_bh(&dest->dst_lock); - if (add) { list_add_rcu(&dest->n_list, &svc->destinations); svc->num_dests++; @@ -1043,6 +1324,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, if (sched && sched->add_dest) sched->add_dest(svc, dest); } else { + spin_lock_bh(&dest->dst_lock); + __ip_vs_dst_cache_reset(dest); + spin_unlock_bh(&dest->dst_lock); + sched = rcu_dereference_protected(svc->scheduler, 1); if (sched && sched->upd_dest) sched->upd_dest(svc, dest); @@ -1277,6 +1562,10 @@ static void __ip_vs_unlink_dest(struct ip_vs_service *svc, { dest->flags &= ~IP_VS_DEST_F_AVAILABLE; + spin_lock_bh(&dest->dst_lock); + __ip_vs_dst_cache_reset(dest); + spin_unlock_bh(&dest->dst_lock); + /* * Remove it from the d-linked destination list. */ @@ -1367,11 +1656,14 @@ static int ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, struct ip_vs_service **svc_p) { - int ret = 0; struct ip_vs_scheduler *sched = NULL; - struct ip_vs_pe *pe = NULL; + struct ip_vs_rht *tc_new = NULL; + struct ip_vs_rht *t, *t_new = NULL; + int af_id = ip_vs_af_index(u->af); struct ip_vs_service *svc = NULL; + struct ip_vs_pe *pe = NULL; int ret_hooks = -1; + int ret = 0; /* increase the module use count */ if (!ip_vs_use_count_inc()) @@ -1413,8 +1705,30 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, } #endif - if ((u->af == AF_INET && !ipvs->num_services) || - (u->af == AF_INET6 && !ipvs->num_services6)) { + t = rcu_dereference_protected(ipvs->svc_table, 1); + if (!t) { + int lfactor = sysctl_svc_lfactor(ipvs); + int new_size = ip_vs_svc_desired_size(ipvs, NULL, lfactor); + + t_new = ip_vs_svc_table_alloc(ipvs, new_size, lfactor); + if (!t_new) { + ret = -ENOMEM; + goto out_err; + } + } + + if (!rcu_dereference_protected(ipvs->conn_tab, 1)) { + int lfactor = sysctl_conn_lfactor(ipvs); + int new_size = ip_vs_conn_desired_size(ipvs, NULL, lfactor); + + tc_new = ip_vs_conn_tab_alloc(ipvs, new_size, lfactor); + if (!tc_new) { + ret = -ENOMEM; + goto out_err; + } + } + + if (!atomic_read(&ipvs->num_services[af_id])) { ret = ip_vs_register_hooks(ipvs, u->af); if (ret < 0) goto out_err; @@ -1458,27 +1772,43 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, if (ret < 0) goto out_err; + if (t_new) { + clear_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags); + rcu_assign_pointer(ipvs->svc_table, t_new); + t_new = NULL; + } + if (tc_new) { + rcu_assign_pointer(ipvs->conn_tab, tc_new); + tc_new = NULL; + } + /* Update the virtual service counters */ if (svc->port == FTPPORT) - atomic_inc(&ipvs->ftpsvc_counter); - else if (svc->port == 0) - atomic_inc(&ipvs->nullsvc_counter); + atomic_inc(&ipvs->ftpsvc_counter[af_id]); + else if (!svc->port && !svc->fwmark) + atomic_inc(&ipvs->nullsvc_counter[af_id]); if (pe && pe->conn_out) - atomic_inc(&ipvs->conn_out_counter); + atomic_inc(&ipvs->conn_out_counter[af_id]); /* Bind the ct retriever */ RCU_INIT_POINTER(svc->pe, pe); pe = NULL; - /* Count only IPv4 services for old get/setsockopt interface */ - if (svc->af == AF_INET) - ipvs->num_services++; - else if (svc->af == AF_INET6) - ipvs->num_services6++; + if (svc->fwmark) + atomic_inc(&ipvs->fwm_services[af_id]); + else + atomic_inc(&ipvs->nonfwm_services[af_id]); + atomic_inc(&ipvs->num_services[af_id]); /* Hash the service into the service table */ ip_vs_svc_hash(svc); + /* Schedule resize work */ + if (t && ip_vs_get_num_services(ipvs) > t->u_thresh && + !test_and_set_bit(IP_VS_WORK_SVC_RESIZE, &ipvs->work_flags)) + queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work, + 1); + *svc_p = svc; if (!READ_ONCE(ipvs->enable)) { @@ -1493,6 +1823,10 @@ ip_vs_add_service(struct netns_ipvs *ipvs, struct ip_vs_service_user_kern *u, out_err: + if (tc_new) + ip_vs_rht_free(tc_new); + if (t_new) + ip_vs_rht_free(t_new); if (ret_hooks >= 0) ip_vs_unregister_hooks(ipvs, u->af); if (svc != NULL) { @@ -1519,6 +1853,8 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) struct ip_vs_pe *pe = NULL, *old_pe = NULL; int ret = 0; bool new_pe_conn_out, old_pe_conn_out; + struct netns_ipvs *ipvs = svc->ipvs; + int af_id = ip_vs_af_index(svc->af); /* * Lookup the scheduler, by 'u->sched_name' @@ -1587,9 +1923,9 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) new_pe_conn_out = (pe && pe->conn_out) ? true : false; old_pe_conn_out = (old_pe && old_pe->conn_out) ? true : false; if (new_pe_conn_out && !old_pe_conn_out) - atomic_inc(&svc->ipvs->conn_out_counter); + atomic_inc(&ipvs->conn_out_counter[af_id]); if (old_pe_conn_out && !new_pe_conn_out) - atomic_dec(&svc->ipvs->conn_out_counter); + atomic_dec(&ipvs->conn_out_counter[af_id]); } out: @@ -1609,16 +1945,15 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) struct ip_vs_scheduler *old_sched; struct ip_vs_pe *old_pe; struct netns_ipvs *ipvs = svc->ipvs; + int af_id = ip_vs_af_index(svc->af); - if (svc->af == AF_INET) { - ipvs->num_services--; - if (!ipvs->num_services) - ip_vs_unregister_hooks(ipvs, svc->af); - } else if (svc->af == AF_INET6) { - ipvs->num_services6--; - if (!ipvs->num_services6) - ip_vs_unregister_hooks(ipvs, svc->af); - } + atomic_dec(&ipvs->num_services[af_id]); + if (!atomic_read(&ipvs->num_services[af_id])) + ip_vs_unregister_hooks(ipvs, svc->af); + if (svc->fwmark) + atomic_dec(&ipvs->fwm_services[af_id]); + else + atomic_dec(&ipvs->nonfwm_services[af_id]); ip_vs_stop_estimator(svc->ipvs, &svc->stats); @@ -1630,7 +1965,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) /* Unbind persistence engine, keep svc->pe */ old_pe = rcu_dereference_protected(svc->pe, 1); if (old_pe && old_pe->conn_out) - atomic_dec(&ipvs->conn_out_counter); + atomic_dec(&ipvs->conn_out_counter[af_id]); ip_vs_pe_put(old_pe); /* @@ -1645,9 +1980,9 @@ static void __ip_vs_del_service(struct ip_vs_service *svc, bool cleanup) * Update the virtual service counters */ if (svc->port == FTPPORT) - atomic_dec(&ipvs->ftpsvc_counter); - else if (svc->port == 0) - atomic_dec(&ipvs->nullsvc_counter); + atomic_dec(&ipvs->ftpsvc_counter[af_id]); + else if (!svc->port && !svc->fwmark) + atomic_dec(&ipvs->nullsvc_counter[af_id]); /* * Free the service if nobody refers to it @@ -1679,10 +2014,38 @@ static void ip_vs_unlink_service(struct ip_vs_service *svc, bool cleanup) */ static int ip_vs_del_service(struct ip_vs_service *svc) { + struct netns_ipvs *ipvs; + struct ip_vs_rht *t, *p; + int ns; + if (svc == NULL) return -EEXIST; + ipvs = svc->ipvs; ip_vs_unlink_service(svc, false); - + t = rcu_dereference_protected(ipvs->svc_table, 1); + + /* Drop the table if no more services */ + ns = ip_vs_get_num_services(ipvs); + if (!ns) { + /* Stop the resizer and drop the tables */ + set_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags); + cancel_delayed_work_sync(&ipvs->svc_resize_work); + if (t) { + rcu_assign_pointer(ipvs->svc_table, NULL); + while (1) { + p = rcu_dereference_protected(t->new_tbl, 1); + call_rcu(&t->rcu_head, ip_vs_rht_rcu_free); + if (p == t) + break; + t = p; + } + } + } else if (ns <= t->l_thresh && + !test_and_set_bit(IP_VS_WORK_SVC_RESIZE, + &ipvs->work_flags)) { + queue_delayed_work(system_unbound_wq, &ipvs->svc_resize_work, + 1); + } return 0; } @@ -1692,32 +2055,37 @@ static int ip_vs_del_service(struct ip_vs_service *svc) */ static int ip_vs_flush(struct netns_ipvs *ipvs, bool cleanup) { - int idx; + DECLARE_IP_VS_RHT_WALK_BUCKETS(); + struct hlist_bl_head *head; struct ip_vs_service *svc; - struct hlist_node *n; - - /* - * Flush the service table hashed by <netns,protocol,addr,port> - */ - for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_safe(svc, n, &ip_vs_svc_table[idx], - s_list) { - if (svc->ipvs == ipvs) + struct hlist_bl_node *ne; + struct hlist_bl_node *e; + struct ip_vs_rht *t, *p; + + /* Stop the resizer and drop the tables */ + if (!test_and_set_bit(IP_VS_WORK_SVC_NORESIZE, &ipvs->work_flags)) + cancel_delayed_work_sync(&ipvs->svc_resize_work); + /* No resizer, so now we have exclusive write access */ + + if (ip_vs_get_num_services(ipvs)) { + ip_vs_rht_walk_buckets(ipvs->svc_table, head) { + hlist_bl_for_each_entry_safe(svc, e, ne, head, s_list) ip_vs_unlink_service(svc, cleanup); } } - /* - * Flush the service table hashed by fwmark - */ - for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_safe(svc, n, &ip_vs_svc_fwm_table[idx], - f_list) { - if (svc->ipvs == ipvs) - ip_vs_unlink_service(svc, cleanup); + /* Unregister the hash table and release it after RCU grace period */ + t = rcu_dereference_protected(ipvs->svc_table, 1); + if (t) { + rcu_assign_pointer(ipvs->svc_table, NULL); + while (1) { + p = rcu_dereference_protected(t->new_tbl, 1); + call_rcu(&t->rcu_head, ip_vs_rht_rcu_free); + if (p == t) + break; + t = p; } } - return 0; } @@ -1731,12 +2099,12 @@ void ip_vs_service_nets_cleanup(struct list_head *net_list) struct net *net; /* Check for "full" addressed entries */ - mutex_lock(&__ip_vs_mutex); list_for_each_entry(net, net_list, exit_list) { ipvs = net_ipvs(net); + mutex_lock(&ipvs->service_mutex); ip_vs_flush(ipvs, true); + mutex_unlock(&ipvs->service_mutex); } - mutex_unlock(&__ip_vs_mutex); } /* Put all references for device (dst_cache) */ @@ -1767,41 +2135,47 @@ static int ip_vs_dst_event(struct notifier_block *this, unsigned long event, struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct net *net = dev_net(dev); struct netns_ipvs *ipvs = net_ipvs(net); + DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU(); + unsigned int resched_score = 0; + struct hlist_bl_head *head; struct ip_vs_service *svc; + struct hlist_bl_node *e; struct ip_vs_dest *dest; - unsigned int idx; + int old_gen, new_gen; if (event != NETDEV_DOWN || !ipvs) return NOTIFY_DONE; IP_VS_DBG(3, "%s() dev=%s\n", __func__, dev->name); - mutex_lock(&__ip_vs_mutex); - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - if (svc->ipvs == ipvs) { - list_for_each_entry(dest, &svc->destinations, - n_list) { - ip_vs_forget_dev(dest, dev); - } + + old_gen = atomic_read(&ipvs->svc_table_changes); + + rcu_read_lock(); + +repeat: + smp_rmb(); /* ipvs->svc_table and svc_table_changes */ + ip_vs_rht_walk_buckets_rcu(ipvs->svc_table, head) { + hlist_bl_for_each_entry_rcu(svc, e, head, s_list) { + list_for_each_entry_rcu(dest, &svc->destinations, + n_list) { + ip_vs_forget_dev(dest, dev); + resched_score += 10; } + resched_score++; } - - hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - if (svc->ipvs == ipvs) { - list_for_each_entry(dest, &svc->destinations, - n_list) { - ip_vs_forget_dev(dest, dev); - } + resched_score++; + if (resched_score >= 100) { + resched_score = 0; + cond_resched_rcu(); + new_gen = atomic_read(&ipvs->svc_table_changes); + /* New table installed ? */ + if (old_gen != new_gen) { + old_gen = new_gen; + goto repeat; } - } } + rcu_read_unlock(); - spin_lock_bh(&ipvs->dest_trash_lock); - list_for_each_entry(dest, &ipvs->dest_trash, t_list) { - ip_vs_forget_dev(dest, dev); - } - spin_unlock_bh(&ipvs->dest_trash_lock); - mutex_unlock(&__ip_vs_mutex); return NOTIFY_DONE; } @@ -1821,23 +2195,28 @@ static int ip_vs_zero_service(struct ip_vs_service *svc) static int ip_vs_zero_all(struct netns_ipvs *ipvs) { - int idx; + DECLARE_IP_VS_RHT_WALK_BUCKETS_RCU(); + unsigned int resched_score = 0; + struct hlist_bl_head *head; struct ip_vs_service *svc; + struct hlist_bl_node *e; - for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { - if (svc->ipvs == ipvs) - ip_vs_zero_service(svc); - } - } + rcu_read_lock(); - for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - if (svc->ipvs == ipvs) - ip_vs_zero_service(svc); + ip_vs_rht_walk_buckets_rcu(ipvs->svc_table, head) { + hlist_bl_for_each_entry_rcu(svc, e, head, s_list) { + ip_vs_zero_service(svc); + resched_score += 10; + } + resched_score++; + if (resched_score >= 100) { + resched_score = 0; + cond_resched_rcu(); } } + rcu_read_unlock(); + ip_vs_zero_stats(&ipvs->tot_stats->s); return 0; } @@ -2066,6 +2445,60 @@ static int ipvs_proc_run_estimation(const struct ctl_table *table, int write, return ret; } +static int ipvs_proc_conn_lfactor(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct netns_ipvs *ipvs = table->extra2; + int *valp = table->data; + int val = *valp; + int ret; + + struct ctl_table tmp_table = { + .data = &val, + .maxlen = sizeof(int), + }; + + ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos); + if (write && ret >= 0) { + if (val < -8 || val > 8) { + ret = -EINVAL; + } else { + *valp = val; + if (rcu_access_pointer(ipvs->conn_tab)) + mod_delayed_work(system_unbound_wq, + &ipvs->conn_resize_work, 0); + } + } + return ret; +} + +static int ipvs_proc_svc_lfactor(const struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct netns_ipvs *ipvs = table->extra2; + int *valp = table->data; + int val = *valp; + int ret; + + struct ctl_table tmp_table = { + .data = &val, + .maxlen = sizeof(int), + }; + + ret = proc_dointvec(&tmp_table, write, buffer, lenp, ppos); + if (write && ret >= 0) { + if (val < -8 || val > 8) { + ret = -EINVAL; + } else { + *valp = val; + if (rcu_access_pointer(ipvs->svc_table)) + mod_delayed_work(system_unbound_wq, + &ipvs->svc_resize_work, 0); + } + } + return ret; +} + /* * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/) * Do not change order or insert new entries without @@ -2254,6 +2687,18 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = ipvs_proc_est_nice, }, + { + .procname = "conn_lfactor", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = ipvs_proc_conn_lfactor, + }, + { + .procname = "svc_lfactor", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = ipvs_proc_svc_lfactor, + }, #ifdef CONFIG_IP_VS_DEBUG { .procname = "debug_level", @@ -2271,8 +2716,8 @@ static struct ctl_table vs_vars[] = { struct ip_vs_iter { struct seq_net_private p; /* Do not move this, netns depends upon it*/ - struct hlist_head *table; - int bucket; + struct ip_vs_rht *t; + u32 bucket; }; /* @@ -2293,55 +2738,51 @@ static inline const char *ip_vs_fwd_name(unsigned int flags) } } - -/* Get the Nth entry in the two lists */ +/* Do not expect consistent view during add, del and move(table resize). + * We may miss entries and even show duplicates. + */ static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos) { - struct net *net = seq_file_net(seq); - struct netns_ipvs *ipvs = net_ipvs(net); struct ip_vs_iter *iter = seq->private; - int idx; + struct ip_vs_rht *t = iter->t; struct ip_vs_service *svc; + struct hlist_bl_node *e; + int idx; - /* look in hash by protocol */ - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_rcu(svc, &ip_vs_svc_table[idx], s_list) { - if ((svc->ipvs == ipvs) && pos-- == 0) { - iter->table = ip_vs_svc_table; - iter->bucket = idx; - return svc; - } - } - } - - /* keep looking in fwmark */ - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry_rcu(svc, &ip_vs_svc_fwm_table[idx], - f_list) { - if ((svc->ipvs == ipvs) && pos-- == 0) { - iter->table = ip_vs_svc_fwm_table; + if (!t) + return NULL; + for (idx = 0; idx < t->size; idx++) { + hlist_bl_for_each_entry_rcu(svc, e, &t->buckets[idx], s_list) { + if (!ip_vs_rht_same_table(t, READ_ONCE(svc->hash_key))) + break; + if (pos-- == 0) { iter->bucket = idx; return svc; } } } - return NULL; } static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos) __acquires(RCU) { + struct ip_vs_iter *iter = seq->private; + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); + rcu_read_lock(); + iter->t = rcu_dereference(ipvs->svc_table); return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN; } static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) { - struct hlist_node *e; - struct ip_vs_iter *iter; struct ip_vs_service *svc; + struct ip_vs_iter *iter; + struct hlist_bl_node *e; + struct ip_vs_rht *t; ++*pos; if (v == SEQ_START_TOKEN) @@ -2349,39 +2790,25 @@ static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos) svc = v; iter = seq->private; + t = iter->t; + if (!t) + return NULL; - if (iter->table == ip_vs_svc_table) { - /* next service in table hashed by protocol */ - e = rcu_dereference(hlist_next_rcu(&svc->s_list)); - if (e) - return hlist_entry(e, struct ip_vs_service, s_list); - - while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { - hlist_for_each_entry_rcu(svc, - &ip_vs_svc_table[iter->bucket], - s_list) { - return svc; - } - } - - iter->table = ip_vs_svc_fwm_table; - iter->bucket = -1; - goto scan_fwmark; + hlist_bl_for_each_entry_continue_rcu(svc, e, s_list) { + /* Our cursor was moved to new table ? */ + if (!ip_vs_rht_same_table(t, READ_ONCE(svc->hash_key))) + break; + return svc; } - /* next service in hashed by fwmark */ - e = rcu_dereference(hlist_next_rcu(&svc->f_list)); - if (e) - return hlist_entry(e, struct ip_vs_service, f_list); - - scan_fwmark: - while (++iter->bucket < IP_VS_SVC_TAB_SIZE) { - hlist_for_each_entry_rcu(svc, - &ip_vs_svc_fwm_table[iter->bucket], - f_list) + while (++iter->bucket < t->size) { + hlist_bl_for_each_entry_rcu(svc, e, &t->buckets[iter->bucket], + s_list) { + if (!ip_vs_rht_same_table(t, READ_ONCE(svc->hash_key))) + break; return svc; + } } - return NULL; } @@ -2394,26 +2821,24 @@ static void ip_vs_info_seq_stop(struct seq_file *seq, void *v) static int ip_vs_info_seq_show(struct seq_file *seq, void *v) { + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); + if (v == SEQ_START_TOKEN) { seq_printf(seq, "IP Virtual Server version %d.%d.%d (size=%d)\n", - NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); + NVERSION(IP_VS_VERSION_CODE), get_conn_tab_size(ipvs)); seq_puts(seq, "Prot LocalAddress:Port Scheduler Flags\n"); seq_puts(seq, " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); } else { - struct net *net = seq_file_net(seq); - struct netns_ipvs *ipvs = net_ipvs(net); const struct ip_vs_service *svc = v; - const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); char *sched_name = sched ? sched->name : "none"; - if (svc->ipvs != ipvs) - return 0; - if (iter->table == ip_vs_svc_table) { + if (!svc->fwmark) { #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) seq_printf(seq, "%s [%pI6]:%04X %s ", @@ -2565,6 +2990,144 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v) return 0; } + +static int ip_vs_status_show(struct seq_file *seq, void *v) +{ + struct net *net = seq_file_single_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); + unsigned int resched_score = 0; + struct ip_vs_conn_hnode *hn; + struct hlist_bl_head *head; + struct ip_vs_service *svc; + struct ip_vs_rht *t, *pt; + struct hlist_bl_node *e; + int old_gen, new_gen; + u32 counts[8]; + u32 bucket; + int count; + u32 sum1; + u32 sum; + int i; + + rcu_read_lock(); + + t = rcu_dereference(ipvs->conn_tab); + + seq_printf(seq, "Conns:\t%d\n", atomic_read(&ipvs->conn_count)); + seq_printf(seq, "Conn buckets:\t%d (%d bits, lfactor %d)\n", + t ? t->size : 0, t ? t->bits : 0, t ? t->lfactor : 0); + + if (!atomic_read(&ipvs->conn_count)) + goto after_conns; + old_gen = atomic_read(&ipvs->conn_tab_changes); + +repeat_conn: + smp_rmb(); /* ipvs->conn_tab and conn_tab_changes */ + memset(counts, 0, sizeof(counts)); + ip_vs_rht_for_each_table_rcu(ipvs->conn_tab, t, pt) { + for (bucket = 0; bucket < t->size; bucket++) { + DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); + + count = 0; + resched_score++; + ip_vs_rht_walk_bucket_rcu(t, bucket, head) { + count = 0; + hlist_bl_for_each_entry_rcu(hn, e, head, node) + count++; + } + resched_score += count; + if (resched_score >= 100) { + resched_score = 0; + cond_resched_rcu(); + new_gen = atomic_read(&ipvs->conn_tab_changes); + /* New table installed ? */ + if (old_gen != new_gen) { + old_gen = new_gen; + goto repeat_conn; + } + } + counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++; + } + } + for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++) + sum += counts[i]; + sum1 = sum - counts[0]; + seq_printf(seq, "Conn buckets empty:\t%u (%lu%%)\n", + counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U)); + for (i = 1; i < ARRAY_SIZE(counts); i++) { + if (!counts[i]) + continue; + seq_printf(seq, "Conn buckets len-%d:\t%u (%lu%%)\n", + i, counts[i], + (unsigned long)counts[i] * 100 / max(sum1, 1U)); + } + +after_conns: + t = rcu_dereference(ipvs->svc_table); + + count = ip_vs_get_num_services(ipvs); + seq_printf(seq, "Services:\t%d\n", count); + seq_printf(seq, "Service buckets:\t%d (%d bits, lfactor %d)\n", + t ? t->size : 0, t ? t->bits : 0, t ? t->lfactor : 0); + + if (!count) + goto after_svc; + old_gen = atomic_read(&ipvs->svc_table_changes); + +repeat_svc: + smp_rmb(); /* ipvs->svc_table and svc_table_changes */ + memset(counts, 0, sizeof(counts)); + ip_vs_rht_for_each_table_rcu(ipvs->svc_table, t, pt) { + for (bucket = 0; bucket < t->size; bucket++) { + DECLARE_IP_VS_RHT_WALK_BUCKET_RCU(); + + count = 0; + resched_score++; + ip_vs_rht_walk_bucket_rcu(t, bucket, head) { + count = 0; + hlist_bl_for_each_entry_rcu(svc, e, head, + s_list) + count++; + } + resched_score += count; + if (resched_score >= 100) { + resched_score = 0; + cond_resched_rcu(); + new_gen = atomic_read(&ipvs->svc_table_changes); + /* New table installed ? */ + if (old_gen != new_gen) { + old_gen = new_gen; + goto repeat_svc; + } + } + counts[min(count, (int)ARRAY_SIZE(counts) - 1)]++; + } + } + for (sum = 0, i = 0; i < ARRAY_SIZE(counts); i++) + sum += counts[i]; + sum1 = sum - counts[0]; + seq_printf(seq, "Service buckets empty:\t%u (%lu%%)\n", + counts[0], (unsigned long)counts[0] * 100 / max(sum, 1U)); + for (i = 1; i < ARRAY_SIZE(counts); i++) { + if (!counts[i]) + continue; + seq_printf(seq, "Service buckets len-%d:\t%u (%lu%%)\n", + i, counts[i], + (unsigned long)counts[i] * 100 / max(sum1, 1U)); + } + +after_svc: + seq_printf(seq, "Stats thread slots:\t%d (max %lu)\n", + ipvs->est_kt_count, ipvs->est_max_threads); + seq_printf(seq, "Stats chain max len:\t%d\n", ipvs->est_chain_max); + seq_printf(seq, "Stats thread ests:\t%d\n", + ipvs->est_chain_max * IPVS_EST_CHAIN_FACTOR * + IPVS_EST_NTICKS); + + rcu_read_unlock(); + return 0; +} + #endif /* @@ -2735,7 +3298,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len) return ret; } - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); if (cmd == IP_VS_SO_SET_FLUSH) { /* Flush the virtual service */ ret = ip_vs_flush(ipvs, false); @@ -2832,7 +3395,7 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, sockptr_t ptr, unsigned int len) } out_unlock: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); return ret; } @@ -2864,15 +3427,20 @@ __ip_vs_get_service_entries(struct netns_ipvs *ipvs, const struct ip_vs_get_services *get, struct ip_vs_get_services __user *uptr) { - int idx, count=0; - struct ip_vs_service *svc; struct ip_vs_service_entry entry; + DECLARE_IP_VS_RHT_WALK_BUCKETS(); + struct hlist_bl_head *head; + struct ip_vs_service *svc; + struct hlist_bl_node *e; + int count = 0; int ret = 0; - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) { + lockdep_assert_held(&ipvs->svc_resize_sem); + /* All service modifications are disabled, go ahead */ + ip_vs_rht_walk_buckets(ipvs->svc_table, head) { + hlist_bl_for_each_entry(svc, e, head, s_list) { /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET || (svc->ipvs != ipvs)) + if (svc->af != AF_INET) continue; if (count >= get->num_services) @@ -2888,24 +3456,6 @@ __ip_vs_get_service_entries(struct netns_ipvs *ipvs, } } - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) { - /* Only expose IPv4 entries to old interface */ - if (svc->af != AF_INET || (svc->ipvs != ipvs)) - continue; - - if (count >= get->num_services) - goto out; - memset(&entry, 0, sizeof(entry)); - ip_vs_copy_service(&entry, svc); - if (copy_to_user(&uptr->entrytable[count], - &entry, sizeof(entry))) { - ret = -EFAULT; - goto out; - } - count++; - } - } out: return ret; } @@ -3060,14 +3610,43 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } - mutex_lock(&__ip_vs_mutex); + if (cmd == IP_VS_SO_GET_SERVICES) { + struct ip_vs_get_services *get; + size_t size; + + get = (struct ip_vs_get_services *)arg; + size = struct_size(get, entrytable, get->num_services); + if (*len != size) { + pr_err("length: %u != %zu\n", *len, size); + return -EINVAL; + } + /* Protect against table resizer moving the entries. + * Try reverse locking, so that we do not hold the mutex + * while waiting for semaphore. + */ + while (1) { + ret = down_read_killable(&ipvs->svc_resize_sem); + if (ret < 0) + return ret; + if (mutex_trylock(&ipvs->service_mutex)) + break; + up_read(&ipvs->svc_resize_sem); + cond_resched(); + } + ret = __ip_vs_get_service_entries(ipvs, get, user); + up_read(&ipvs->svc_resize_sem); + mutex_unlock(&ipvs->service_mutex); + return ret; + } + + mutex_lock(&ipvs->service_mutex); switch (cmd) { case IP_VS_SO_GET_VERSION: { char buf[64]; sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)", - NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size); + NVERSION(IP_VS_VERSION_CODE), get_conn_tab_size(ipvs)); if (copy_to_user(user, buf, strlen(buf)+1) != 0) { ret = -EFAULT; goto out; @@ -3079,30 +3658,16 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) case IP_VS_SO_GET_INFO: { struct ip_vs_getinfo info; + info.version = IP_VS_VERSION_CODE; - info.size = ip_vs_conn_tab_size; - info.num_services = ipvs->num_services; + info.size = get_conn_tab_size(ipvs); + info.num_services = + atomic_read(&ipvs->num_services[IP_VS_AF_INET]); if (copy_to_user(user, &info, sizeof(info)) != 0) ret = -EFAULT; } break; - case IP_VS_SO_GET_SERVICES: - { - struct ip_vs_get_services *get; - size_t size; - - get = (struct ip_vs_get_services *)arg; - size = struct_size(get, entrytable, get->num_services); - if (*len != size) { - pr_err("length: %u != %zu\n", *len, size); - ret = -EINVAL; - goto out; - } - ret = __ip_vs_get_service_entries(ipvs, get, user); - } - break; - case IP_VS_SO_GET_SERVICE: { struct ip_vs_service_entry *entry; @@ -3159,7 +3724,7 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) } out: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); return ret; } @@ -3338,9 +3903,9 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, goto nla_put_failure; } - sched = rcu_dereference_protected(svc->scheduler, 1); + sched = rcu_dereference(svc->scheduler); sched_name = sched ? sched->name : "none"; - pe = rcu_dereference_protected(svc->pe, 1); + pe = rcu_dereference(svc->pe); if (nla_put_string(skb, IPVS_SVC_ATTR_SCHED_NAME, sched_name) || (pe && nla_put_string(skb, IPVS_SVC_ATTR_PE_NAME, pe->name)) || nla_put(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags) || @@ -3388,27 +3953,20 @@ nla_put_failure: static int ip_vs_genl_dump_services(struct sk_buff *skb, struct netlink_callback *cb) { - int idx = 0, i; - int start = cb->args[0]; - struct ip_vs_service *svc; + DECLARE_IP_VS_RHT_WALK_BUCKETS_SAFE_RCU(); struct net *net = sock_net(skb->sk); struct netns_ipvs *ipvs = net_ipvs(net); + struct hlist_bl_head *head; + struct ip_vs_service *svc; + struct hlist_bl_node *e; + int start = cb->args[0]; + int idx = 0; - mutex_lock(&__ip_vs_mutex); - for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - hlist_for_each_entry(svc, &ip_vs_svc_table[i], s_list) { - if (++idx <= start || (svc->ipvs != ipvs)) - continue; - if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { - idx--; - goto nla_put_failure; - } - } - } - - for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) { - hlist_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) { - if (++idx <= start || (svc->ipvs != ipvs)) + down_read(&ipvs->svc_resize_sem); + rcu_read_lock(); + ip_vs_rht_walk_buckets_safe_rcu(ipvs->svc_table, head) { + hlist_bl_for_each_entry_rcu(svc, e, head, s_list) { + if (++idx <= start) continue; if (ip_vs_genl_dump_service(skb, svc, cb) < 0) { idx--; @@ -3418,7 +3976,8 @@ static int ip_vs_genl_dump_services(struct sk_buff *skb, } nla_put_failure: - mutex_unlock(&__ip_vs_mutex); + rcu_read_unlock(); + up_read(&ipvs->svc_resize_sem); cb->args[0] = idx; return skb->len; @@ -3474,13 +4033,11 @@ static int ip_vs_genl_parse_service(struct netns_ipvs *ipvs, usvc->fwmark = 0; } - rcu_read_lock(); if (usvc->fwmark) svc = __ip_vs_svc_fwm_find(ipvs, usvc->af, usvc->fwmark); else svc = __ip_vs_service_find(ipvs, usvc->af, usvc->protocol, &usvc->addr, usvc->port); - rcu_read_unlock(); *ret_svc = svc; /* If a full entry was requested, check for the additional fields */ @@ -3607,7 +4164,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, struct net *net = sock_net(skb->sk); struct netns_ipvs *ipvs = net_ipvs(net); - mutex_lock(&__ip_vs_mutex); + rcu_read_lock(); /* Try to find the service for which to dump destinations */ if (nlmsg_parse_deprecated(cb->nlh, GENL_HDRLEN, attrs, IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy, cb->extack)) @@ -3619,7 +4176,7 @@ static int ip_vs_genl_dump_dests(struct sk_buff *skb, goto out_err; /* Dump the destinations */ - list_for_each_entry(dest, &svc->destinations, n_list) { + list_for_each_entry_rcu(dest, &svc->destinations, n_list) { if (++idx <= start) continue; if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) { @@ -3632,7 +4189,7 @@ nla_put_failure: cb->args[0] = idx; out_err: - mutex_unlock(&__ip_vs_mutex); + rcu_read_unlock(); return skb->len; } @@ -3915,7 +4472,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) cmd = info->genlhdr->cmd; - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); if (cmd == IPVS_CMD_FLUSH) { ret = ip_vs_flush(ipvs, false); @@ -3935,9 +4492,12 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE) need_full_svc = true; + /* We use function that requires RCU lock (hlist_bl) */ + rcu_read_lock(); ret = ip_vs_genl_parse_service(ipvs, &usvc, info->attrs[IPVS_CMD_ATTR_SERVICE], need_full_svc, &svc); + rcu_read_unlock(); if (ret) goto out; @@ -4027,7 +4587,7 @@ static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info) } out: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); return ret; } @@ -4057,7 +4617,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - mutex_lock(&__ip_vs_mutex); + rcu_read_lock(); reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd); if (reply == NULL) @@ -4109,7 +4669,7 @@ static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info) if (nla_put_u32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE) || nla_put_u32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE, - ip_vs_conn_tab_size)) + get_conn_tab_size(ipvs))) goto nla_put_failure; break; } @@ -4125,7 +4685,7 @@ nla_put_failure: out_err: nlmsg_free(msg); out: - mutex_unlock(&__ip_vs_mutex); + rcu_read_unlock(); return ret; } @@ -4242,6 +4802,7 @@ static struct genl_family ip_vs_genl_family __ro_after_init = { .small_ops = ip_vs_genl_ops, .n_small_ops = ARRAY_SIZE(ip_vs_genl_ops), .resv_start_op = IPVS_CMD_FLUSH + 1, + .parallel_ops = 1, }; static int __init ip_vs_genl_register(void) @@ -4358,6 +4919,16 @@ static int __net_init ip_vs_control_net_init_sysctl(struct netns_ipvs *ipvs) tbl[idx].extra2 = ipvs; tbl[idx++].data = &ipvs->sysctl_est_nice; + if (unpriv) + tbl[idx].mode = 0444; + tbl[idx].extra2 = ipvs; + tbl[idx++].data = &ipvs->sysctl_conn_lfactor; + + if (unpriv) + tbl[idx].mode = 0444; + tbl[idx].extra2 = ipvs; + tbl[idx++].data = &ipvs->sysctl_svc_lfactor; + #ifdef CONFIG_IP_VS_DEBUG /* Global sysctls must be ro in non-init netns */ if (!net_eq(net, &init_net)) @@ -4424,6 +4995,13 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) int ret = -ENOMEM; int idx; + /* Initialize service_mutex, svc_table per netns */ + __mutex_init(&ipvs->service_mutex, "ipvs->service_mutex", &__ipvs_service_key); + init_rwsem(&ipvs->svc_resize_sem); + INIT_DELAYED_WORK(&ipvs->svc_resize_work, svc_resize_work_handler); + atomic_set(&ipvs->svc_table_changes, 0); + RCU_INIT_POINTER(ipvs->svc_table, NULL); + /* Initialize rs_table */ for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++) INIT_HLIST_HEAD(&ipvs->rs_table[idx]); @@ -4431,11 +5009,17 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) INIT_LIST_HEAD(&ipvs->dest_trash); spin_lock_init(&ipvs->dest_trash_lock); timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0); - atomic_set(&ipvs->ftpsvc_counter, 0); - atomic_set(&ipvs->nullsvc_counter, 0); - atomic_set(&ipvs->conn_out_counter, 0); + for (idx = 0; idx < IP_VS_AF_MAX; idx++) { + atomic_set(&ipvs->num_services[idx], 0); + atomic_set(&ipvs->fwm_services[idx], 0); + atomic_set(&ipvs->nonfwm_services[idx], 0); + atomic_set(&ipvs->ftpsvc_counter[idx], 0); + atomic_set(&ipvs->nullsvc_counter[idx], 0); + atomic_set(&ipvs->conn_out_counter[idx], 0); + } INIT_DELAYED_WORK(&ipvs->est_reload_work, est_reload_work_handler); + ipvs->sysctl_svc_lfactor = ip_vs_svc_default_load_factor(ipvs); /* procfs stats */ ipvs->tot_stats = kzalloc_obj(*ipvs->tot_stats); @@ -4455,6 +5039,9 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) ipvs->net->proc_net, ip_vs_stats_percpu_show, NULL)) goto err_percpu; + if (!proc_create_net_single("ip_vs_status", 0, ipvs->net->proc_net, + ip_vs_status_show, NULL)) + goto err_status; #endif ret = ip_vs_control_net_init_sysctl(ipvs); @@ -4465,6 +5052,9 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) err: #ifdef CONFIG_PROC_FS + remove_proc_entry("ip_vs_status", ipvs->net->proc_net); + +err_status: remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); err_percpu: @@ -4490,6 +5080,7 @@ void __net_exit ip_vs_control_net_cleanup(struct netns_ipvs *ipvs) ip_vs_control_net_cleanup_sysctl(ipvs); cancel_delayed_work_sync(&ipvs->est_reload_work); #ifdef CONFIG_PROC_FS + remove_proc_entry("ip_vs_status", ipvs->net->proc_net); remove_proc_entry("ip_vs_stats_percpu", ipvs->net->proc_net); remove_proc_entry("ip_vs_stats", ipvs->net->proc_net); remove_proc_entry("ip_vs", ipvs->net->proc_net); @@ -4528,17 +5119,8 @@ void ip_vs_unregister_nl_ioctl(void) int __init ip_vs_control_init(void) { - int idx; int ret; - /* Initialize svc_table, ip_vs_svc_fwm_table */ - for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) { - INIT_HLIST_HEAD(&ip_vs_svc_table[idx]); - INIT_HLIST_HEAD(&ip_vs_svc_fwm_table[idx]); - } - - smp_wmb(); /* Do we really need it now ? */ - ret = register_netdevice_notifier(&ip_vs_dst_notifier); if (ret < 0) return ret; diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index b17de33314da..433ba3cab58c 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -602,7 +602,7 @@ static void ip_vs_est_drain_temp_list(struct netns_ipvs *ipvs) while (1) { int max = 16; - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); while (max-- > 0) { est = hlist_entry_safe(ipvs->est_temp_list.first, @@ -622,12 +622,12 @@ static void ip_vs_est_drain_temp_list(struct netns_ipvs *ipvs) } goto unlock; } - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); cond_resched(); } unlock: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); } /* Calculate limits for all kthreads */ @@ -647,9 +647,9 @@ static int ip_vs_est_calc_limits(struct netns_ipvs *ipvs, int *chain_max) u64 val; INIT_HLIST_HEAD(&chain); - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); kd = ipvs->est_kt_arr[0]; - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); s = kd ? kd->calc_stats : NULL; if (!s) goto out; @@ -748,7 +748,7 @@ static void ip_vs_est_calc_phase(struct netns_ipvs *ipvs) if (!ip_vs_est_calc_limits(ipvs, &chain_max)) return; - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); /* Stop all other tasks, so that we can immediately move the * estimators to est_temp_list without RCU grace period @@ -815,9 +815,9 @@ walk_chain: /* Give chance estimators to be added (to est_temp_list) * and deleted (releasing kthread contexts) */ - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); cond_resched(); - mutex_lock(&__ip_vs_mutex); + mutex_lock(&ipvs->service_mutex); /* Current kt released ? */ if (id >= ipvs->est_kt_count) @@ -893,7 +893,7 @@ unlock2: mutex_unlock(&ipvs->est_mutex); unlock: - mutex_unlock(&__ip_vs_mutex); + mutex_unlock(&ipvs->service_mutex); } void ip_vs_zero_estimator(struct ip_vs_stats *stats) diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 85f31d71e29a..0c83c7b69581 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -132,9 +132,9 @@ static bool ip_vs_sip_ct_match(const struct ip_vs_conn_param *p, } static u32 ip_vs_sip_hashkey_raw(const struct ip_vs_conn_param *p, - u32 initval, bool inverse) + struct ip_vs_rht *t, bool inverse) { - return jhash(p->pe_data, p->pe_data_len, initval); + return jhash(p->pe_data, p->pe_data_len, (u32)t->hash_key.key[0]); } static int ip_vs_sip_show_pe_data(const struct ip_vs_conn *cp, char *buf) diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index b2ba3befbd55..93038abbf5e0 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1755,6 +1755,28 @@ int start_sync_thread(struct netns_ipvs *ipvs, struct ipvs_sync_daemon_cfg *c, if (!ip_vs_use_count_inc()) return -ENOPROTOOPT; + /* Backup server can be started without services just to sync conns, + * make sure conn_tab is created even if ipvs->enable is 0. + */ + if (state == IP_VS_STATE_BACKUP) { + mutex_lock(&ipvs->service_mutex); + if (!rcu_dereference_protected(ipvs->conn_tab, 1)) { + int lfactor = sysctl_conn_lfactor(ipvs); + int new_size = ip_vs_conn_desired_size(ipvs, NULL, + lfactor); + struct ip_vs_rht *tc_new; + + tc_new = ip_vs_conn_tab_alloc(ipvs, new_size, lfactor); + if (!tc_new) { + mutex_unlock(&ipvs->service_mutex); + result = -ENOMEM; + goto out_module; + } + rcu_assign_pointer(ipvs->conn_tab, tc_new); + } + mutex_unlock(&ipvs->service_mutex); + } + /* Do not hold one mutex and then to block on another */ for (;;) { rtnl_lock(); @@ -1922,6 +1944,7 @@ out_early: mutex_unlock(&ipvs->sync_mutex); rtnl_unlock(); +out_module: /* decrease the module use count */ ip_vs_use_count_dec(); return result; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 3601eb86d025..0fb5162992e5 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -336,9 +336,11 @@ __ip_vs_get_out_rt(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, goto err_unreach; } /* It is forbidden to attach dest->dest_dst if - * device is going down. + * device is going down or if server is removed and + * stored in dest_trash. */ - if (!rt_dev_is_down(dst_dev_rcu(&rt->dst))) + if (!rt_dev_is_down(dst_dev_rcu(&rt->dst)) && + dest->flags & IP_VS_DEST_F_AVAILABLE) __ip_vs_dst_set(dest, dest_dst, &rt->dst, 0); else noref = 0; @@ -513,9 +515,11 @@ __ip_vs_get_out_rt_v6(struct netns_ipvs *ipvs, int skb_af, struct sk_buff *skb, rt = dst_rt6_info(dst); cookie = rt6_get_cookie(rt); /* It is forbidden to attach dest->dest_dst if - * device is going down. + * device is going down or if server is removed and + * stored in dest_trash. */ - if (!rt_dev_is_down(dst_dev_rcu(&rt->dst))) + if (!rt_dev_is_down(dst_dev_rcu(&rt->dst)) && + dest->flags & IP_VS_DEST_F_AVAILABLE) __ip_vs_dst_set(dest, dest_dst, &rt->dst, cookie); else noref = 0; diff --git a/net/netfilter/nf_conntrack_amanda.c b/net/netfilter/nf_conntrack_amanda.c index c0132559f6af..d2c09e8dd872 100644 --- a/net/netfilter/nf_conntrack_amanda.c +++ b/net/netfilter/nf_conntrack_amanda.c @@ -37,13 +37,7 @@ MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); module_param(ts_algo, charp, 0400); MODULE_PARM_DESC(ts_algo, "textsearch algorithm to use (default kmp)"); -unsigned int (__rcu *nf_nat_amanda_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp) - __read_mostly; +nf_nat_amanda_hook_fn __rcu *nf_nat_amanda_hook __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_amanda_hook); enum amanda_strings { @@ -98,7 +92,7 @@ static int amanda_help(struct sk_buff *skb, u_int16_t len; __be16 port; int ret = NF_ACCEPT; - typeof(nf_nat_amanda_hook) nf_nat_amanda; + nf_nat_amanda_hook_fn *nf_nat_amanda; /* Only look at packets from the Amanda server */ if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 27ce5fda8993..b08189226320 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -323,9 +323,6 @@ nf_ct_get_tuple(const struct sk_buff *skb, #endif case IPPROTO_TCP: case IPPROTO_UDP: -#ifdef CONFIG_NF_CT_PROTO_UDPLITE - case IPPROTO_UDPLITE: -#endif #ifdef CONFIG_NF_CT_PROTO_SCTP case IPPROTO_SCTP: #endif @@ -1987,11 +1984,6 @@ static int nf_conntrack_handle_packet(struct nf_conn *ct, case IPPROTO_ICMPV6: return nf_conntrack_icmpv6_packet(ct, skb, ctinfo, state); #endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE - case IPPROTO_UDPLITE: - return nf_conntrack_udplite_packet(ct, skb, dataoff, - ctinfo, state); -#endif #ifdef CONFIG_NF_CT_PROTO_SCTP case IPPROTO_SCTP: return nf_conntrack_sctp_packet(ct, skb, dataoff, diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 5e00f9123c38..de83bf9e6c61 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -43,13 +43,7 @@ module_param_array(ports, ushort, &ports_c, 0400); static bool loose; module_param(loose, bool, 0600); -unsigned int (__rcu *nf_nat_ftp_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - enum nf_ct_ftp_type type, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp); +nf_nat_ftp_hook_fn __rcu *nf_nat_ftp_hook; EXPORT_SYMBOL_GPL(nf_nat_ftp_hook); static int try_rfc959(const char *, size_t, struct nf_conntrack_man *, @@ -385,7 +379,7 @@ static int help(struct sk_buff *skb, struct nf_conntrack_man cmd = {}; unsigned int i; int found = 0, ends_in_nl; - typeof(nf_nat_ftp_hook) nf_nat_ftp; + nf_nat_ftp_hook_fn *nf_nat_ftp; /* Until there's been traffic both ways, don't look in packets. */ if (ctinfo != IP_CT_ESTABLISHED && diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 7b1497ed97d2..6830c9da3507 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -21,7 +21,6 @@ #if H323_TRACE #define TAB_SIZE 4 -#define IFTHEN(cond, act) if(cond){act;} #ifdef __KERNEL__ #define PRINT printk #else @@ -29,7 +28,6 @@ #endif #define FNAME(name) name, #else -#define IFTHEN(cond, act) #define PRINT(fmt, args...) #define FNAME(name) #endif @@ -276,7 +274,7 @@ static unsigned int get_uint(struct bitstr *bs, int b) static int decode_nul(struct bitstr *bs, const struct field_t *f, char *base, int level) { - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); return H323_ERROR_NONE; } @@ -284,7 +282,7 @@ static int decode_nul(struct bitstr *bs, const struct field_t *f, static int decode_bool(struct bitstr *bs, const struct field_t *f, char *base, int level) { - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); INC_BIT(bs); if (nf_h323_error_boundary(bs, 0, 0)) @@ -297,7 +295,7 @@ static int decode_oid(struct bitstr *bs, const struct field_t *f, { int len; - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); BYTE_ALIGN(bs); if (nf_h323_error_boundary(bs, 1, 0)) @@ -316,7 +314,7 @@ static int decode_int(struct bitstr *bs, const struct field_t *f, { unsigned int len; - PRINT("%*.s%s", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s", level * TAB_SIZE, " ", f->name); switch (f->sz) { case BYTE: /* Range == 256 */ @@ -363,7 +361,7 @@ static int decode_int(struct bitstr *bs, const struct field_t *f, static int decode_enum(struct bitstr *bs, const struct field_t *f, char *base, int level) { - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); if ((f->attr & EXT) && get_bit(bs)) { INC_BITS(bs, 7); @@ -381,7 +379,7 @@ static int decode_bitstr(struct bitstr *bs, const struct field_t *f, { unsigned int len; - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); BYTE_ALIGN(bs); switch (f->sz) { @@ -417,7 +415,7 @@ static int decode_numstr(struct bitstr *bs, const struct field_t *f, { unsigned int len; - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); /* 2 <= Range <= 255 */ if (nf_h323_error_boundary(bs, 0, f->sz)) @@ -437,7 +435,7 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f, { unsigned int len; - PRINT("%*.s%s", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s", level * TAB_SIZE, " ", f->name); switch (f->sz) { case FIXD: /* Range == 1 */ @@ -445,11 +443,6 @@ static int decode_octstr(struct bitstr *bs, const struct field_t *f, BYTE_ALIGN(bs); if (base && (f->attr & DECODE)) { /* The IP Address */ - IFTHEN(f->lb == 4, - PRINT(" = %d.%d.%d.%d:%d", - bs->cur[0], bs->cur[1], - bs->cur[2], bs->cur[3], - bs->cur[4] * 256 + bs->cur[5])); *((unsigned int *)(base + f->offset)) = bs->cur - bs->buf; } @@ -490,7 +483,7 @@ static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, { unsigned int len; - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); switch (f->sz) { case BYTE: /* Range == 256 */ @@ -522,7 +515,7 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, const struct field_t *son; unsigned char *beg = NULL; - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); /* Decode? */ base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; @@ -544,7 +537,7 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, /* Decode the root components */ for (i = opt = 0, son = f->fields; i < f->lb; i++, son++) { if (son->attr & STOP) { - PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", + PRINT("%*s%s\n", (level + 1) * TAB_SIZE, " ", son->name); return H323_ERROR_STOP; } @@ -562,7 +555,7 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, if (nf_h323_error_boundary(bs, len, 0)) return H323_ERROR_BOUND; if (!base || !(son->attr & DECODE)) { - PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, + PRINT("%*s%s\n", (level + 1) * TAB_SIZE, " ", son->name); bs->cur += len; continue; @@ -615,7 +608,7 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, } if (son->attr & STOP) { - PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", + PRINT("%*s%s\n", (level + 1) * TAB_SIZE, " ", son->name); return H323_ERROR_STOP; } @@ -629,7 +622,7 @@ static int decode_seq(struct bitstr *bs, const struct field_t *f, if (nf_h323_error_boundary(bs, len, 0)) return H323_ERROR_BOUND; if (!base || !(son->attr & DECODE)) { - PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", + PRINT("%*s%s\n", (level + 1) * TAB_SIZE, " ", son->name); bs->cur += len; continue; @@ -655,7 +648,7 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f, const struct field_t *son; unsigned char *beg = NULL; - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); /* Decode? */ base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; @@ -710,7 +703,7 @@ static int decode_seqof(struct bitstr *bs, const struct field_t *f, if (nf_h323_error_boundary(bs, len, 0)) return H323_ERROR_BOUND; if (!base || !(son->attr & DECODE)) { - PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, + PRINT("%*s%s\n", (level + 1) * TAB_SIZE, " ", son->name); bs->cur += len; continue; @@ -751,7 +744,7 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, const struct field_t *son; unsigned char *beg = NULL; - PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); + PRINT("%*s%s\n", level * TAB_SIZE, " ", f->name); /* Decode? */ base = (base && (f->attr & DECODE)) ? base + f->offset : NULL; @@ -792,7 +785,7 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, /* Transfer to son level */ son = &f->fields[type]; if (son->attr & STOP) { - PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", son->name); + PRINT("%*s%s\n", (level + 1) * TAB_SIZE, " ", son->name); return H323_ERROR_STOP; } @@ -804,7 +797,7 @@ static int decode_choice(struct bitstr *bs, const struct field_t *f, if (nf_h323_error_boundary(bs, len, 0)) return H323_ERROR_BOUND; if (!base || !(son->attr & DECODE)) { - PRINT("%*.s%s\n", (level + 1) * TAB_SIZE, " ", + PRINT("%*s%s\n", (level + 1) * TAB_SIZE, " ", son->name); bs->cur += len; return H323_ERROR_NONE; diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index b8e6d724acd1..522183b9a604 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -30,13 +30,7 @@ static unsigned int dcc_timeout __read_mostly = 300; static char *irc_buffer; static DEFINE_SPINLOCK(irc_buffer_lock); -unsigned int (__rcu *nf_nat_irc_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - unsigned int protoff, - unsigned int matchoff, - unsigned int matchlen, - struct nf_conntrack_expect *exp) - __read_mostly; +nf_nat_irc_hook_fn __rcu *nf_nat_irc_hook __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_irc_hook); #define HELPER_NAME "irc" @@ -122,7 +116,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, __be16 port; int i, ret = NF_ACCEPT; char *addr_beg_p, *addr_end_p; - typeof(nf_nat_irc_hook) nf_nat_irc; + nf_nat_irc_hook_fn *nf_nat_irc; unsigned int datalen; /* If packet is coming from IRC server */ diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a20cd82446c5..eda5fe4a75c8 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Connection tracking via netlink socket. Allows for user space * protocol helpers and general trouble making from userspace. * @@ -10,9 +11,6 @@ * generally made possible by Network Robots, Inc. (www.networkrobots.com) * * Further development of this code funded by Astaro AG (http://www.astaro.com) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. */ #include <linux/init.h> diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index bc1d96686b9c..50ddd3d613e1 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -103,9 +103,6 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto) #ifdef CONFIG_NF_CT_PROTO_SCTP case IPPROTO_SCTP: return &nf_conntrack_l4proto_sctp; #endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE - case IPPROTO_UDPLITE: return &nf_conntrack_l4proto_udplite; -#endif #ifdef CONFIG_NF_CT_PROTO_GRE case IPPROTO_GRE: return &nf_conntrack_l4proto_gre; #endif diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 0030fbe8885c..cc9b7e5e1935 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -129,91 +129,6 @@ int nf_conntrack_udp_packet(struct nf_conn *ct, return NF_ACCEPT; } -#ifdef CONFIG_NF_CT_PROTO_UDPLITE -static void udplite_error_log(const struct sk_buff *skb, - const struct nf_hook_state *state, - const char *msg) -{ - nf_l4proto_log_invalid(skb, state, IPPROTO_UDPLITE, "%s", msg); -} - -static bool udplite_error(struct sk_buff *skb, - unsigned int dataoff, - const struct nf_hook_state *state) -{ - unsigned int udplen = skb->len - dataoff; - const struct udphdr *hdr; - struct udphdr _hdr; - unsigned int cscov; - - /* Header is too small? */ - hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); - if (!hdr) { - udplite_error_log(skb, state, "short packet"); - return true; - } - - cscov = ntohs(hdr->len); - if (cscov == 0) { - cscov = udplen; - } else if (cscov < sizeof(*hdr) || cscov > udplen) { - udplite_error_log(skb, state, "invalid checksum coverage"); - return true; - } - - /* UDPLITE mandates checksums */ - if (!hdr->check) { - udplite_error_log(skb, state, "checksum missing"); - return true; - } - - /* Checksum invalid? Ignore. */ - if (state->hook == NF_INET_PRE_ROUTING && - state->net->ct.sysctl_checksum && - nf_checksum_partial(skb, state->hook, dataoff, cscov, IPPROTO_UDP, - state->pf)) { - udplite_error_log(skb, state, "bad checksum"); - return true; - } - - return false; -} - -/* Returns verdict for packet, and may modify conntracktype */ -int nf_conntrack_udplite_packet(struct nf_conn *ct, - struct sk_buff *skb, - unsigned int dataoff, - enum ip_conntrack_info ctinfo, - const struct nf_hook_state *state) -{ - unsigned int *timeouts; - - if (udplite_error(skb, dataoff, state)) - return -NF_ACCEPT; - - timeouts = nf_ct_timeout_lookup(ct); - if (!timeouts) - timeouts = udp_get_timeouts(nf_ct_net(ct)); - - /* If we've seen traffic both ways, this is some kind of UDP - stream. Extend timeout. */ - if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { - nf_ct_refresh_acct(ct, ctinfo, skb, - timeouts[UDP_CT_REPLIED]); - - if (unlikely((ct->status & IPS_NAT_CLASH))) - return NF_ACCEPT; - - /* Also, more likely to be important, and not a probe */ - if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_ASSURED, ct); - } else { - nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[UDP_CT_UNREPLIED]); - } - return NF_ACCEPT; -} -#endif - #ifdef CONFIG_NF_CONNTRACK_TIMEOUT #include <linux/netfilter/nfnetlink.h> @@ -299,26 +214,3 @@ const struct nf_conntrack_l4proto nf_conntrack_l4proto_udp = }, #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ }; - -#ifdef CONFIG_NF_CT_PROTO_UDPLITE -const struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite = -{ - .l4proto = IPPROTO_UDPLITE, - .allow_clash = true, -#if IS_ENABLED(CONFIG_NF_CT_NETLINK) - .tuple_to_nlattr = nf_ct_port_tuple_to_nlattr, - .nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, - .nlattr_tuple_size = nf_ct_port_nlattr_tuple_size, - .nla_policy = nf_ct_port_nla_policy, -#endif -#ifdef CONFIG_NF_CONNTRACK_TIMEOUT - .ctnl_timeout = { - .nlattr_to_obj = udp_timeout_nlattr_to_obj, - .obj_to_nlattr = udp_timeout_obj_to_nlattr, - .nlattr_max = CTA_TIMEOUT_UDP_MAX, - .obj_size = sizeof(unsigned int) * CTA_TIMEOUT_UDP_MAX, - .nla_policy = udp_timeout_nla_policy, - }, -#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ -}; -#endif diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 939502ff7c87..182cfb119448 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -869,9 +869,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, saddr = &ct->tuplehash[!dir].tuple.src.u3; } else if (sip_external_media) { struct net_device *dev = skb_dst(skb)->dev; - struct net *net = dev_net(dev); - struct flowi fl; struct dst_entry *dst = NULL; + struct flowi fl; memset(&fl, 0, sizeof(fl)); diff --git a/net/netfilter/nf_conntrack_snmp.c b/net/netfilter/nf_conntrack_snmp.c index 387dd6e58f88..7b7eed43c54f 100644 --- a/net/netfilter/nf_conntrack_snmp.c +++ b/net/netfilter/nf_conntrack_snmp.c @@ -25,17 +25,14 @@ static unsigned int timeout __read_mostly = 30; module_param(timeout, uint, 0400); MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); -int (__rcu *nf_nat_snmp_hook)(struct sk_buff *skb, - unsigned int protoff, - struct nf_conn *ct, - enum ip_conntrack_info ctinfo); +nf_nat_snmp_hook_fn __rcu *nf_nat_snmp_hook; EXPORT_SYMBOL_GPL(nf_nat_snmp_hook); static int snmp_conntrack_help(struct sk_buff *skb, unsigned int protoff, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - typeof(nf_nat_snmp_hook) nf_nat_snmp; + nf_nat_snmp_hook_fn *nf_nat_snmp; nf_conntrack_broadcast_help(skb, ct, ctinfo, timeout); diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 207b240b14e5..be2953c7d702 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -61,7 +61,6 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, ntohs(tuple->src.u.tcp.port), ntohs(tuple->dst.u.tcp.port)); break; - case IPPROTO_UDPLITE: case IPPROTO_UDP: seq_printf(s, "sport=%hu dport=%hu ", ntohs(tuple->src.u.udp.port), @@ -277,7 +276,6 @@ static const char* l4proto_name(u16 proto) case IPPROTO_UDP: return "udp"; case IPPROTO_GRE: return "gre"; case IPPROTO_SCTP: return "sctp"; - case IPPROTO_UDPLITE: return "udplite"; case IPPROTO_ICMPV6: return "icmpv6"; } diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 89e9914e5d03..a2e6833a0bf7 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -32,10 +32,7 @@ static unsigned int ports_c; module_param_array(ports, ushort, &ports_c, 0400); MODULE_PARM_DESC(ports, "Port numbers of TFTP servers"); -unsigned int (__rcu *nf_nat_tftp_hook)(struct sk_buff *skb, - enum ip_conntrack_info ctinfo, - struct nf_conntrack_expect *exp) - __read_mostly; +nf_nat_tftp_hook_fn __rcu *nf_nat_tftp_hook __read_mostly; EXPORT_SYMBOL_GPL(nf_nat_tftp_hook); static int tftp_help(struct sk_buff *skb, @@ -48,7 +45,7 @@ static int tftp_help(struct sk_buff *skb, struct nf_conntrack_expect *exp; struct nf_conntrack_tuple *tuple; unsigned int ret = NF_ACCEPT; - typeof(nf_nat_tftp_hook) nf_nat_tftp; + nf_nat_tftp_hook_fn *nf_nat_tftp; tfh = skb_header_pointer(skb, protoff + sizeof(struct udphdr), sizeof(_tftph), &_tftph); diff --git a/net/netfilter/nf_dup_netdev.c b/net/netfilter/nf_dup_netdev.c index fab8b9011098..e348fb90b8dc 100644 --- a/net/netfilter/nf_dup_netdev.c +++ b/net/netfilter/nf_dup_netdev.c @@ -95,7 +95,10 @@ int nft_fwd_dup_netdev_offload(struct nft_offload_ctx *ctx, if (!dev) return -EOPNOTSUPP; - entry = &flow->rule->action.entries[ctx->num_actions++]; + entry = nft_flow_action_entry_next(ctx, flow); + if (!entry) + return -E2BIG; + entry->id = id; entry->dev = dev; diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 93d0aa7f8fcc..002ec15d988b 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c index 41503847d9d7..7a8952b049d1 100644 --- a/net/netfilter/nf_log_syslog.c +++ b/net/netfilter/nf_log_syslog.c @@ -78,7 +78,10 @@ dump_arp_packet(struct nf_log_buf *m, else logflags = NF_LOG_DEFAULT_MASK; - if (logflags & NF_LOG_MACDECODE) { + if ((logflags & NF_LOG_MACDECODE) && + skb->dev && skb->dev->type == ARPHRD_ETHER && + skb_mac_header_was_set(skb) && + skb_mac_header_len(skb) >= ETH_HLEN) { nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ", eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest); nf_log_dump_vlan(m, skb); @@ -165,18 +168,26 @@ static struct nf_logger nf_arp_logger __read_mostly = { static void nf_log_dump_sk_uid_gid(struct net *net, struct nf_log_buf *m, struct sock *sk) { + const struct socket *sock; + const struct file *file; + if (!sk || !sk_fullsock(sk) || !net_eq(net, sock_net(sk))) return; - read_lock_bh(&sk->sk_callback_lock); - if (sk->sk_socket && sk->sk_socket->file) { - const struct cred *cred = sk->sk_socket->file->f_cred; + /* The sk pointer remains valid as long as the skb is. The sk_socket and + * file pointer may become NULL if the socket is closed. Both structures + * (including file->cred) are RCU freed which means they can be accessed + * within a RCU read section. + */ + sock = READ_ONCE(sk->sk_socket); + file = sock ? READ_ONCE(sock->file) : NULL; + if (file) { + const struct cred *cred = file->f_cred; nf_log_buf_add(m, "UID=%u GID=%u ", from_kuid_munged(&init_user_ns, cred->fsuid), from_kgid_munged(&init_user_ns, cred->fsgid)); } - read_unlock_bh(&sk->sk_callback_lock); } static noinline_for_stack int @@ -789,6 +800,9 @@ static void dump_mac_header(struct nf_log_buf *m, switch (dev->type) { case ARPHRD_ETHER: + if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN) + return; + nf_log_buf_add(m, "MACSRC=%pM MACDST=%pM ", eth_hdr(skb)->h_source, eth_hdr(skb)->h_dest); nf_log_dump_vlan(m, skb); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index 3b5434e4ec9c..83b2b5e9759a 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -68,7 +68,6 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb, fl4->daddr = t->dst.u3.ip; if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_SCTP) fl4->fl4_dport = t->dst.u.all; } @@ -79,7 +78,6 @@ static void nf_nat_ipv4_decode_session(struct sk_buff *skb, fl4->saddr = t->src.u3.ip; if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_SCTP) fl4->fl4_sport = t->src.u.all; } @@ -99,7 +97,6 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb, fl6->daddr = t->dst.u3.in6; if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_SCTP) fl6->fl6_dport = t->dst.u.all; } @@ -110,7 +107,6 @@ static void nf_nat_ipv6_decode_session(struct sk_buff *skb, fl6->saddr = t->src.u3.in6; if (t->dst.protonum == IPPROTO_TCP || t->dst.protonum == IPPROTO_UDP || - t->dst.protonum == IPPROTO_UDPLITE || t->dst.protonum == IPPROTO_SCTP) fl6->fl6_sport = t->src.u.all; } @@ -415,7 +411,6 @@ static bool l4proto_in_range(const struct nf_conntrack_tuple *tuple, case IPPROTO_GRE: /* all fall though */ case IPPROTO_TCP: case IPPROTO_UDP: - case IPPROTO_UDPLITE: case IPPROTO_SCTP: if (maniptype == NF_NAT_MANIP_SRC) port = tuple->src.u.all; @@ -612,7 +607,6 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple, goto find_free_id; #endif case IPPROTO_UDP: - case IPPROTO_UDPLITE: case IPPROTO_TCP: case IPPROTO_SCTP: if (maniptype == NF_NAT_MANIP_SRC) diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index a5a23c03fda9..4de6e0a51701 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -220,23 +220,6 @@ static struct notifier_block masq_inet_notifier = { }; #if IS_ENABLED(CONFIG_IPV6) -static int -nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev, - const struct in6_addr *daddr, unsigned int srcprefs, - struct in6_addr *saddr) -{ -#ifdef CONFIG_IPV6_MODULE - const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops(); - - if (!v6_ops) - return -EHOSTUNREACH; - - return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr); -#else - return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr); -#endif -} - unsigned int nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, const struct net_device *out) @@ -251,8 +234,8 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range, WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY))); - if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out, - &ipv6_hdr(skb)->daddr, 0, &src) < 0) + if (ipv6_dev_get_saddr(nf_ct_net(ct), out, + &ipv6_hdr(skb)->daddr, 0, &src) < 0) return NF_DROP; nat = nf_ct_nat_ext_add(ct); diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 97c0f841fc96..07f51fe75fbe 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -79,23 +79,6 @@ static bool udp_manip_pkt(struct sk_buff *skb, return true; } -static bool udplite_manip_pkt(struct sk_buff *skb, - unsigned int iphdroff, unsigned int hdroff, - const struct nf_conntrack_tuple *tuple, - enum nf_nat_manip_type maniptype) -{ -#ifdef CONFIG_NF_CT_PROTO_UDPLITE - struct udphdr *hdr; - - if (skb_ensure_writable(skb, hdroff + sizeof(*hdr))) - return false; - - hdr = (struct udphdr *)(skb->data + hdroff); - __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, true); -#endif - return true; -} - static bool sctp_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, unsigned int hdroff, @@ -287,9 +270,6 @@ static bool l4proto_manip_pkt(struct sk_buff *skb, case IPPROTO_UDP: return udp_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); - case IPPROTO_UDPLITE: - return udplite_manip_pkt(skb, iphdroff, hdroff, - tuple, maniptype); case IPPROTO_SCTP: return sctp_manip_pkt(skb, iphdroff, hdroff, tuple, maniptype); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 7f12e56e6e52..a6c81c04b3a5 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * Rusty Russell (C)2000 -- This code is GPL. * Patrick McHardy (c) 2006-2012 diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8c42247a176c..8537b94653d3 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -936,58 +936,6 @@ static int nft_delflowtable(struct nft_ctx *ctx, return 0; } -static void __nft_reg_track_clobber(struct nft_regs_track *track, u8 dreg) -{ - int i; - - for (i = track->regs[dreg].num_reg; i > 0; i--) - __nft_reg_track_cancel(track, dreg - i); -} - -static void __nft_reg_track_update(struct nft_regs_track *track, - const struct nft_expr *expr, - u8 dreg, u8 num_reg) -{ - track->regs[dreg].selector = expr; - track->regs[dreg].bitwise = NULL; - track->regs[dreg].num_reg = num_reg; -} - -void nft_reg_track_update(struct nft_regs_track *track, - const struct nft_expr *expr, u8 dreg, u8 len) -{ - unsigned int regcount; - int i; - - __nft_reg_track_clobber(track, dreg); - - regcount = DIV_ROUND_UP(len, NFT_REG32_SIZE); - for (i = 0; i < regcount; i++, dreg++) - __nft_reg_track_update(track, expr, dreg, i); -} -EXPORT_SYMBOL_GPL(nft_reg_track_update); - -void nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg, u8 len) -{ - unsigned int regcount; - int i; - - __nft_reg_track_clobber(track, dreg); - - regcount = DIV_ROUND_UP(len, NFT_REG32_SIZE); - for (i = 0; i < regcount; i++, dreg++) - __nft_reg_track_cancel(track, dreg); -} -EXPORT_SYMBOL_GPL(nft_reg_track_cancel); - -void __nft_reg_track_cancel(struct nft_regs_track *track, u8 dreg) -{ - track->regs[dreg].selector = NULL; - track->regs[dreg].bitwise = NULL; - track->regs[dreg].num_reg = 0; -} -EXPORT_SYMBOL_GPL(__nft_reg_track_cancel); - /* * Tables */ @@ -1164,7 +1112,7 @@ static __be16 nft_base_seq_be16(const struct net *net) static const struct nla_policy nft_table_policy[NFTA_TABLE_MAX + 1] = { [NFTA_TABLE_NAME] = { .type = NLA_STRING, .len = NFT_TABLE_MAXNAMELEN - 1 }, - [NFTA_TABLE_FLAGS] = { .type = NLA_U32 }, + [NFTA_TABLE_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_TABLE_F_MASK), [NFTA_TABLE_HANDLE] = { .type = NLA_U64 }, [NFTA_TABLE_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN } @@ -1930,7 +1878,7 @@ static const struct nla_policy nft_chain_policy[NFTA_CHAIN_MAX + 1] = { [NFTA_CHAIN_TYPE] = { .type = NLA_STRING, .len = NFT_MODULE_AUTOLOAD_LIMIT }, [NFTA_CHAIN_COUNTERS] = { .type = NLA_NESTED }, - [NFTA_CHAIN_FLAGS] = { .type = NLA_U32 }, + [NFTA_CHAIN_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_CHAIN_FLAGS), [NFTA_CHAIN_ID] = { .type = NLA_U32 }, [NFTA_CHAIN_USERDATA] = { .type = NLA_BINARY, .len = NFT_USERDATA_MAXLEN }, @@ -4132,7 +4080,6 @@ int nft_chain_validate(const struct nft_ctx *ctx, struct nft_chain *chain) nft_chain_vstate_update(ctx, chain); return 0; } -EXPORT_SYMBOL_GPL(nft_chain_validate); static int nft_table_validate(struct net *net, const struct nft_table *table) { @@ -4650,7 +4597,16 @@ static const struct nla_policy nft_set_policy[NFTA_SET_MAX + 1] = { .len = NFT_TABLE_MAXNAMELEN - 1 }, [NFTA_SET_NAME] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, - [NFTA_SET_FLAGS] = { .type = NLA_U32 }, + [NFTA_SET_FLAGS] = NLA_POLICY_MASK(NLA_BE32, + NFT_SET_ANONYMOUS | + NFT_SET_CONSTANT | + NFT_SET_INTERVAL | + NFT_SET_MAP | + NFT_SET_TIMEOUT | + NFT_SET_EVAL | + NFT_SET_OBJECT | + NFT_SET_CONCAT | + NFT_SET_EXPR), [NFTA_SET_KEY_TYPE] = { .type = NLA_U32 }, [NFTA_SET_KEY_LEN] = { .type = NLA_U32 }, [NFTA_SET_DATA_TYPE] = { .type = NLA_U32 }, @@ -4748,7 +4704,6 @@ struct nft_set *nft_set_lookup_global(const struct net *net, } return set; } -EXPORT_SYMBOL_GPL(nft_set_lookup_global); static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, const char *name) @@ -5821,7 +5776,6 @@ bind: return 0; } -EXPORT_SYMBOL_GPL(nf_tables_bind_set); static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, bool event) @@ -5900,7 +5854,6 @@ void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set) nft_use_inc_restore(&set->use); } -EXPORT_SYMBOL_GPL(nf_tables_activate_set); void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding, @@ -5940,14 +5893,12 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, phase == NFT_TRANS_COMMIT); } } -EXPORT_SYMBOL_GPL(nf_tables_deactivate_set); void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set) { if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) nft_set_destroy(ctx, set); } -EXPORT_SYMBOL_GPL(nf_tables_destroy_set); const struct nft_set_ext_type nft_set_ext_types[] = { [NFT_SET_EXT_KEY] = { @@ -5987,7 +5938,8 @@ const struct nft_set_ext_type nft_set_ext_types[] = { static const struct nla_policy nft_set_elem_policy[NFTA_SET_ELEM_MAX + 1] = { [NFTA_SET_ELEM_KEY] = { .type = NLA_NESTED }, [NFTA_SET_ELEM_DATA] = { .type = NLA_NESTED }, - [NFTA_SET_ELEM_FLAGS] = { .type = NLA_U32 }, + [NFTA_SET_ELEM_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_SET_ELEM_INTERVAL_END | + NFT_SET_ELEM_CATCHALL), [NFTA_SET_ELEM_TIMEOUT] = { .type = NLA_U64 }, [NFTA_SET_ELEM_EXPIRATION] = { .type = NLA_U64 }, [NFTA_SET_ELEM_USERDATA] = { .type = NLA_BINARY, @@ -6785,7 +6737,6 @@ void nft_set_elem_destroy(const struct nft_set *set, __nft_set_elem_destroy(&ctx, set, elem_priv, destroy_expr); } -EXPORT_SYMBOL_GPL(nft_set_elem_destroy); /* Drop references and destroy. Called from abort path. */ static void nft_trans_set_elem_destroy(const struct nft_ctx *ctx, struct nft_trans_elem *te) @@ -6912,7 +6863,6 @@ struct nft_set_ext *nft_set_catchall_lookup(const struct net *net, return NULL; } -EXPORT_SYMBOL_GPL(nft_set_catchall_lookup); static int nft_setelem_catchall_insert(const struct net *net, struct nft_set *set, @@ -8032,7 +7982,6 @@ out: rcu_read_unlock(); return ERR_PTR(-ENOENT); } -EXPORT_SYMBOL_GPL(nft_obj_lookup); static struct nft_object *nft_obj_lookup_byhandle(const struct nft_table *table, const struct nlattr *nla, @@ -8710,7 +8659,7 @@ static const struct nla_policy nft_flowtable_policy[NFTA_FLOWTABLE_MAX + 1] = { .len = NFT_NAME_MAXLEN - 1 }, [NFTA_FLOWTABLE_HOOK] = { .type = NLA_NESTED }, [NFTA_FLOWTABLE_HANDLE] = { .type = NLA_U64 }, - [NFTA_FLOWTABLE_FLAGS] = { .type = NLA_U32 }, + [NFTA_FLOWTABLE_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_FLOWTABLE_MASK), }; struct nft_flowtable *nft_flowtable_lookup(const struct net *net, @@ -10176,16 +10125,9 @@ void nf_tables_trans_destroy_flush_work(struct net *net) } EXPORT_SYMBOL_GPL(nf_tables_trans_destroy_flush_work); -static bool nft_expr_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - return false; -} - static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *chain) { const struct nft_expr *expr, *last; - struct nft_regs_track track = {}; unsigned int size, data_size; void *data, *data_boundary; struct nft_rule_dp *prule; @@ -10222,15 +10164,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha return -ENOMEM; size = 0; - track.last = nft_expr_last(rule); nft_rule_for_each_expr(expr, last, rule) { - track.cur = expr; - - if (nft_expr_reduce(&track, expr)) { - expr = track.cur; - continue; - } - if (WARN_ON_ONCE(data + size + expr->ops->size > data_boundary)) return -ENOMEM; @@ -11853,7 +11787,6 @@ void nft_data_release(const struct nft_data *data, enum nft_data_types type) WARN_ON(1); } } -EXPORT_SYMBOL_GPL(nft_data_release); int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, enum nft_data_types type, unsigned int len) @@ -11880,7 +11813,6 @@ int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, nla_nest_end(skb, nest); return err; } -EXPORT_SYMBOL_GPL(nft_data_dump); static void __nft_release_hook(struct net *net, struct nft_table *table) { diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 6557a4018c09..5ddd5b6e135f 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -151,7 +151,7 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, unsigned char *ptr; if (priv->base == NFT_PAYLOAD_NETWORK_HEADER) - ptr = skb_network_header(skb); + ptr = skb_network_header(skb) + pkt->nhoff; else { if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) return false; diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index e62a0dea24ea..47f3ed441f64 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Netfilter messages via netlink socket. Allows for user space * protocol helpers and general trouble making from userspace. * @@ -9,9 +10,6 @@ * generally made possible by Network Robots, Inc. (www.networkrobots.com) * * Further development of this code funded by Astaro AG (http://www.astaro.com) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. */ #include <linux/module.h> diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 2bfaa773d82f..8ff1e0ad5cb0 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -373,7 +373,7 @@ static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = { [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 }, [NFACCT_BYTES] = { .type = NLA_U64 }, [NFACCT_PKTS] = { .type = NLA_U64 }, - [NFACCT_FLAGS] = { .type = NLA_U32 }, + [NFACCT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFACCT_F_QUOTA), [NFACCT_QUOTA] = { .type = NLA_U64 }, [NFACCT_FILTER] = {.type = NLA_NESTED }, }; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index d545fa459455..0d16ad82d70c 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -165,7 +165,7 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, static const struct nla_policy nfnl_cthelper_expect_policy_set[NFCTH_POLICY_SET_MAX+1] = { - [NFCTH_POLICY_SET_NUM] = { .type = NLA_U32, }, + [NFCTH_POLICY_SET_NUM] = NLA_POLICY_MAX(NLA_BE32, NF_CT_MAX_EXPECT_CLASSES), }; static int diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index fd8652aa7e88..dca6826af7de 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -457,7 +457,6 @@ static int cttimeout_default_get(struct sk_buff *skb, timeouts = nf_tcp_pernet(info->net)->timeouts; break; case IPPROTO_UDP: - case IPPROTO_UDPLITE: timeouts = nf_udp_pernet(info->net)->timeouts; break; case IPPROTO_ICMPV6: diff --git a/net/netfilter/nfnetlink_hook.c b/net/netfilter/nfnetlink_hook.c index 531706982859..5623c18fcd12 100644 --- a/net/netfilter/nfnetlink_hook.c +++ b/net/netfilter/nfnetlink_hook.c @@ -24,7 +24,7 @@ #include <net/sock.h> static const struct nla_policy nfnl_hook_nla_policy[NFNLA_HOOK_MAX + 1] = { - [NFNLA_HOOK_HOOKNUM] = { .type = NLA_U32 }, + [NFNLA_HOOK_HOOKNUM] = NLA_POLICY_MAX(NLA_BE32, 255), [NFNLA_HOOK_PRIORITY] = { .type = NLA_U32 }, [NFNLA_HOOK_DEV] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 0db908518b2f..2439cbbd5b26 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -401,7 +401,7 @@ nfulnl_timer(struct timer_list *t) static u32 nfulnl_get_bridge_size(const struct sk_buff *skb) { - u32 size = 0; + u32 mac_len, size = 0; if (!skb_mac_header_was_set(skb)) return 0; @@ -412,14 +412,17 @@ static u32 nfulnl_get_bridge_size(const struct sk_buff *skb) size += nla_total_size(sizeof(u16)); /* tag */ } - if (skb->network_header > skb->mac_header) - size += nla_total_size(skb->network_header - skb->mac_header); + mac_len = skb_mac_header_len(skb); + if (mac_len > 0) + size += nla_total_size(mac_len); return size; } static int nfulnl_put_bridge(struct nfulnl_instance *inst, const struct sk_buff *skb) { + u32 mac_len; + if (!skb_mac_header_was_set(skb)) return 0; @@ -437,12 +440,10 @@ static int nfulnl_put_bridge(struct nfulnl_instance *inst, const struct sk_buff nla_nest_end(inst->skb, nest); } - if (skb->mac_header < skb->network_header) { - int len = (int)(skb->network_header - skb->mac_header); - - if (nla_put(inst->skb, NFULA_L2HDR, len, skb_mac_header(skb))) - goto nla_put_failure; - } + mac_len = skb_mac_header_len(skb); + if (mac_len > 0 && + nla_put(inst->skb, NFULA_L2HDR, mac_len, skb_mac_header(skb))) + goto nla_put_failure; return 0; @@ -611,19 +612,26 @@ __build_packet_message(struct nfnl_log_net *log, /* UID */ sk = skb->sk; if (sk && sk_fullsock(sk)) { - read_lock_bh(&sk->sk_callback_lock); - if (sk->sk_socket && sk->sk_socket->file) { - struct file *file = sk->sk_socket->file; + const struct socket *sock; + const struct file *file; + + /* The sk pointer remains valid as long as the skb is. + * The sk_socket and file pointer may become NULL + * if the socket is closed. + * Both structures (including file->cred) are RCU freed + * which means they can be accessed within a RCU read section. + */ + sock = READ_ONCE(sk->sk_socket); + file = sock ? READ_ONCE(sock->file) : NULL; + if (file) { const struct cred *cred = file->f_cred; struct user_namespace *user_ns = inst->peer_user_ns; __be32 uid = htonl(from_kuid_munged(user_ns, cred->fsuid)); __be32 gid = htonl(from_kgid_munged(user_ns, cred->fsgid)); - read_unlock_bh(&sk->sk_callback_lock); if (nla_put_be32(inst->skb, NFULA_UID, uid) || nla_put_be32(inst->skb, NFULA_GID, gid)) goto nla_put_failure; - } else - read_unlock_bh(&sk->sk_callback_lock); + } } /* local sequence number */ @@ -872,7 +880,9 @@ static const struct nla_policy nfula_cfg_policy[NFULA_CFG_MAX+1] = { [NFULA_CFG_TIMEOUT] = { .type = NLA_U32 }, [NFULA_CFG_QTHRESH] = { .type = NLA_U32 }, [NFULA_CFG_NLBUFSIZ] = { .type = NLA_U32 }, - [NFULA_CFG_FLAGS] = { .type = NLA_U16 }, + [NFULA_CFG_FLAGS] = NLA_POLICY_MASK(NLA_BE16, NFULNL_CFG_F_SEQ | + NFULNL_CFG_F_SEQ_GLOBAL | + NFULNL_CFG_F_CONNTRACK), }; static int nfulnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info, diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 45d9ad231a92..d64ce21c7b55 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -293,7 +293,7 @@ bool nf_osf_find(const struct sk_buff *skb, EXPORT_SYMBOL_GPL(nf_osf_find); static const struct nla_policy nfnl_osf_policy[OSF_ATTR_MAX + 1] = { - [OSF_ATTR_FINGER] = { .len = sizeof(struct nf_osf_user_finger) }, + [OSF_ATTR_FINGER] = NLA_POLICY_EXACT_LEN(sizeof(struct nf_osf_user_finger)), }; static int nfnl_osf_add_callback(struct sk_buff *skb, diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 8e02f84784da..58304fd1f70f 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -319,9 +319,25 @@ static int nf_ip_reroute(struct sk_buff *skb, const struct nf_queue_entry *entry return 0; } +static int nf_ip6_reroute(struct sk_buff *skb, + const struct nf_queue_entry *entry) +{ + struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); + + if (entry->state.hook == NF_INET_LOCAL_OUT) { + const struct ipv6hdr *iph = ipv6_hdr(skb); + + if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || + !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || + skb->mark != rt_info->mark) + return nf_ip6_route_me_harder(entry->state.net, + entry->state.sk, skb); + } + return 0; +} + static int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) { - const struct nf_ipv6_ops *v6ops; int ret = 0; switch (entry->state.pf) { @@ -329,9 +345,7 @@ static int nf_reroute(struct sk_buff *skb, struct nf_queue_entry *entry) ret = nf_ip_reroute(skb, entry); break; case AF_INET6: - v6ops = rcu_dereference(nf_ipv6_ops); - if (v6ops) - ret = v6ops->reroute(skb, entry); + ret = nf_ip6_reroute(skb, entry); break; } return ret; @@ -508,14 +522,23 @@ nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet, static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk) { + const struct socket *sock; + const struct file *file; const struct cred *cred; if (!sk_fullsock(sk)) return 0; - read_lock_bh(&sk->sk_callback_lock); - if (sk->sk_socket && sk->sk_socket->file) { - cred = sk->sk_socket->file->f_cred; + /* The sk pointer remains valid as long as the skb is. + * The sk_socket and file pointer may become NULL + * if the socket is closed. + * Both structures (including file->cred) are RCU freed + * which means they can be accessed within a RCU read section. + */ + sock = READ_ONCE(sk->sk_socket); + file = sock ? READ_ONCE(sock->file) : NULL; + if (file) { + cred = file->f_cred; if (nla_put_be32(skb, NFQA_UID, htonl(from_kuid_munged(&init_user_ns, cred->fsuid)))) goto nla_put_failure; @@ -523,11 +546,9 @@ static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk) htonl(from_kgid_munged(&init_user_ns, cred->fsgid)))) goto nla_put_failure; } - read_unlock_bh(&sk->sk_callback_lock); return 0; nla_put_failure: - read_unlock_bh(&sk->sk_callback_lock); return -1; } @@ -548,15 +569,8 @@ static int nfqnl_get_sk_secctx(struct sk_buff *skb, struct lsm_context *ctx) { int seclen = 0; #if IS_ENABLED(CONFIG_NETWORK_SECMARK) - - if (!skb || !sk_fullsock(skb->sk)) - return 0; - - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->secmark) seclen = security_secid_to_secctx(skb->secmark, ctx); - read_unlock_bh(&skb->sk->sk_callback_lock); #endif return seclen; } @@ -565,6 +579,7 @@ static u32 nfqnl_get_bridge_size(struct nf_queue_entry *entry) { struct sk_buff *entskb = entry->skb; u32 nlalen = 0; + u32 mac_len; if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb)) return 0; @@ -573,9 +588,9 @@ static u32 nfqnl_get_bridge_size(struct nf_queue_entry *entry) nlalen += nla_total_size(nla_total_size(sizeof(__be16)) + nla_total_size(sizeof(__be16))); - if (entskb->network_header > entskb->mac_header) - nlalen += nla_total_size((entskb->network_header - - entskb->mac_header)); + mac_len = skb_mac_header_len(entskb); + if (mac_len > 0) + nlalen += nla_total_size(mac_len); return nlalen; } @@ -583,6 +598,7 @@ static u32 nfqnl_get_bridge_size(struct nf_queue_entry *entry) static int nfqnl_put_bridge(struct nf_queue_entry *entry, struct sk_buff *skb) { struct sk_buff *entskb = entry->skb; + u32 mac_len; if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb)) return 0; @@ -601,12 +617,10 @@ static int nfqnl_put_bridge(struct nf_queue_entry *entry, struct sk_buff *skb) nla_nest_end(skb, nest); } - if (entskb->mac_header < entskb->network_header) { - int len = (int)(entskb->network_header - entskb->mac_header); - - if (nla_put(skb, NFQA_L2HDR, len, skb_mac_header(entskb))) - goto nla_put_failure; - } + mac_len = skb_mac_header_len(entskb); + if (mac_len > 0 && + nla_put(skb, NFQA_L2HDR, mac_len, skb_mac_header(entskb))) + goto nla_put_failure; return 0; @@ -990,13 +1004,13 @@ nf_queue_entry_dup(struct nf_queue_entry *e) static void nf_bridge_adjust_skb_data(struct sk_buff *skb) { if (nf_bridge_info_get(skb)) - __skb_push(skb, skb->network_header - skb->mac_header); + __skb_push(skb, skb_mac_header_len(skb)); } static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) { if (nf_bridge_info_get(skb)) - __skb_pull(skb, skb->network_header - skb->mac_header); + __skb_pull(skb, skb_mac_header_len(skb)); } #else #define nf_bridge_adjust_skb_data(s) do {} while (0) @@ -1455,8 +1469,7 @@ static int nfqa_parse_bridge(struct nf_queue_entry *entry, } if (nfqa[NFQA_L2HDR]) { - int mac_header_len = entry->skb->network_header - - entry->skb->mac_header; + u32 mac_header_len = skb_mac_header_len(entry->skb); if (mac_header_len != nla_len(nfqa[NFQA_L2HDR])) return -EINVAL; @@ -1548,7 +1561,7 @@ static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = { [NFQA_CFG_PARAMS] = { .len = sizeof(struct nfqnl_msg_config_params) }, [NFQA_CFG_QUEUE_MAXLEN] = { .type = NLA_U32 }, [NFQA_CFG_MASK] = { .type = NLA_U32 }, - [NFQA_CFG_FLAGS] = { .type = NLA_U32 }, + [NFQA_CFG_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFQA_CFG_F_MAX - 1), }; static const struct nf_queue_handler nfqh = { diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c index d550910aabec..13808e9cd999 100644 --- a/net/netfilter/nft_bitwise.c +++ b/net/netfilter/nft_bitwise.c @@ -125,9 +125,9 @@ void nft_bitwise_eval(const struct nft_expr *expr, } static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = { - [NFTA_BITWISE_SREG] = { .type = NLA_U32 }, - [NFTA_BITWISE_SREG2] = { .type = NLA_U32 }, - [NFTA_BITWISE_DREG] = { .type = NLA_U32 }, + [NFTA_BITWISE_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), + [NFTA_BITWISE_SREG2] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), + [NFTA_BITWISE_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_BITWISE_LEN] = { .type = NLA_U32 }, [NFTA_BITWISE_MASK] = { .type = NLA_NESTED }, [NFTA_BITWISE_XOR] = { .type = NLA_NESTED }, @@ -391,61 +391,12 @@ static int nft_bitwise_offload(struct nft_offload_ctx *ctx, return 0; } -static bool nft_bitwise_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_bitwise *priv = nft_expr_priv(expr); - const struct nft_bitwise *bitwise; - unsigned int regcount; - u8 dreg; - int i; - - if (!track->regs[priv->sreg].selector) - return false; - - bitwise = nft_expr_priv(track->regs[priv->dreg].selector); - if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector && - track->regs[priv->sreg].num_reg == 0 && - track->regs[priv->dreg].bitwise && - track->regs[priv->dreg].bitwise->ops == expr->ops && - priv->sreg == bitwise->sreg && - priv->sreg2 == bitwise->sreg2 && - priv->dreg == bitwise->dreg && - priv->op == bitwise->op && - priv->len == bitwise->len && - !memcmp(&priv->mask, &bitwise->mask, sizeof(priv->mask)) && - !memcmp(&priv->xor, &bitwise->xor, sizeof(priv->xor)) && - !memcmp(&priv->data, &bitwise->data, sizeof(priv->data))) { - track->cur = expr; - return true; - } - - if (track->regs[priv->sreg].bitwise || - track->regs[priv->sreg].num_reg != 0) { - nft_reg_track_cancel(track, priv->dreg, priv->len); - return false; - } - - if (priv->sreg != priv->dreg) { - nft_reg_track_update(track, track->regs[priv->sreg].selector, - priv->dreg, priv->len); - } - - dreg = priv->dreg; - regcount = DIV_ROUND_UP(priv->len, NFT_REG32_SIZE); - for (i = 0; i < regcount; i++, dreg++) - track->regs[dreg].bitwise = expr; - - return false; -} - static const struct nft_expr_ops nft_bitwise_ops = { .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise)), .eval = nft_bitwise_eval, .init = nft_bitwise_init, .dump = nft_bitwise_dump, - .reduce = nft_bitwise_reduce, .offload = nft_bitwise_offload, }; @@ -548,48 +499,12 @@ static int nft_bitwise_fast_offload(struct nft_offload_ctx *ctx, return 0; } -static bool nft_bitwise_fast_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_bitwise_fast_expr *priv = nft_expr_priv(expr); - const struct nft_bitwise_fast_expr *bitwise; - - if (!track->regs[priv->sreg].selector) - return false; - - bitwise = nft_expr_priv(track->regs[priv->dreg].selector); - if (track->regs[priv->sreg].selector == track->regs[priv->dreg].selector && - track->regs[priv->dreg].bitwise && - track->regs[priv->dreg].bitwise->ops == expr->ops && - priv->sreg == bitwise->sreg && - priv->dreg == bitwise->dreg && - priv->mask == bitwise->mask && - priv->xor == bitwise->xor) { - track->cur = expr; - return true; - } - - if (track->regs[priv->sreg].bitwise) { - nft_reg_track_cancel(track, priv->dreg, NFT_REG32_SIZE); - return false; - } - - if (priv->sreg != priv->dreg) { - track->regs[priv->dreg].selector = - track->regs[priv->sreg].selector; - } - track->regs[priv->dreg].bitwise = expr; - - return false; -} - const struct nft_expr_ops nft_bitwise_fast_ops = { .type = &nft_bitwise_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_bitwise_fast_expr)), .eval = NULL, /* inlined */ .init = nft_bitwise_fast_init, .dump = nft_bitwise_fast_dump, - .reduce = nft_bitwise_fast_reduce, .offload = nft_bitwise_fast_offload, }; @@ -626,22 +541,3 @@ struct nft_expr_type nft_bitwise_type __read_mostly = { .maxattr = NFTA_BITWISE_MAX, .owner = THIS_MODULE, }; - -bool nft_expr_reduce_bitwise(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_expr *last = track->last; - const struct nft_expr *next; - - if (expr == last) - return false; - - next = nft_expr_next(expr); - if (next->ops == &nft_bitwise_ops) - return nft_bitwise_reduce(track, next); - else if (next->ops == &nft_bitwise_fast_ops) - return nft_bitwise_fast_reduce(track, next); - - return false; -} -EXPORT_SYMBOL_GPL(nft_expr_reduce_bitwise); diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c index af9206a3afd1..e00dddfa2fc0 100644 --- a/net/netfilter/nft_byteorder.c +++ b/net/netfilter/nft_byteorder.c @@ -87,8 +87,8 @@ void nft_byteorder_eval(const struct nft_expr *expr, } static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = { - [NFTA_BYTEORDER_SREG] = { .type = NLA_U32 }, - [NFTA_BYTEORDER_DREG] = { .type = NLA_U32 }, + [NFTA_BYTEORDER_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), + [NFTA_BYTEORDER_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_BYTEORDER_OP] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_BYTEORDER_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_BYTEORDER_SIZE] = NLA_POLICY_MAX(NLA_BE32, 255), @@ -170,23 +170,12 @@ nla_put_failure: return -1; } -static bool nft_byteorder_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - struct nft_byteorder *priv = nft_expr_priv(expr); - - nft_reg_track_cancel(track, priv->dreg, priv->len); - - return false; -} - static const struct nft_expr_ops nft_byteorder_ops = { .type = &nft_byteorder_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_byteorder)), .eval = nft_byteorder_eval, .init = nft_byteorder_init, .dump = nft_byteorder_dump, - .reduce = nft_byteorder_reduce, }; struct nft_expr_type nft_byteorder_type __read_mostly = { diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c index 041426e3bdbf..e04e689f2228 100644 --- a/net/netfilter/nft_chain_filter.c +++ b/net/netfilter/nft_chain_filter.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/init.h> #include <linux/kernel.h> #include <linux/netdevice.h> diff --git a/net/netfilter/nft_cmp.c b/net/netfilter/nft_cmp.c index 2605f43737bc..e085c2a00b70 100644 --- a/net/netfilter/nft_cmp.c +++ b/net/netfilter/nft_cmp.c @@ -64,7 +64,7 @@ mismatch: } static const struct nla_policy nft_cmp_policy[NFTA_CMP_MAX + 1] = { - [NFTA_CMP_SREG] = { .type = NLA_U32 }, + [NFTA_CMP_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_CMP_OP] = { .type = NLA_U32 }, [NFTA_CMP_DATA] = { .type = NLA_NESTED }, }; @@ -190,7 +190,6 @@ static const struct nft_expr_ops nft_cmp_ops = { .eval = nft_cmp_eval, .init = nft_cmp_init, .dump = nft_cmp_dump, - .reduce = NFT_REDUCE_READONLY, .offload = nft_cmp_offload, }; @@ -282,7 +281,6 @@ const struct nft_expr_ops nft_cmp_fast_ops = { .eval = NULL, /* inlined */ .init = nft_cmp_fast_init, .dump = nft_cmp_fast_dump, - .reduce = NFT_REDUCE_READONLY, .offload = nft_cmp_fast_offload, }; @@ -376,7 +374,6 @@ const struct nft_expr_ops nft_cmp16_fast_ops = { .eval = NULL, /* inlined */ .init = nft_cmp16_fast_init, .dump = nft_cmp16_fast_dump, - .reduce = NFT_REDUCE_READONLY, .offload = nft_cmp16_fast_offload, }; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 27cc983a7cdf..decc725a33c2 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -195,7 +195,7 @@ static void target_compat_from_user(struct xt_target *t, void *in, void *out) static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1] = { [NFTA_RULE_COMPAT_PROTO] = { .type = NLA_U32 }, - [NFTA_RULE_COMPAT_FLAGS] = { .type = NLA_U32 }, + [NFTA_RULE_COMPAT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_RULE_COMPAT_F_MASK), }; static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv) @@ -778,14 +778,6 @@ static const struct nfnetlink_subsystem nfnl_compat_subsys = { static struct nft_expr_type nft_match_type; -static bool nft_match_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct xt_match *match = expr->ops->data; - - return strcmp(match->name, "comment") == 0; -} - static const struct nft_expr_ops * nft_match_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -828,7 +820,6 @@ nft_match_select_ops(const struct nft_ctx *ctx, ops->dump = nft_match_dump; ops->validate = nft_match_validate; ops->data = match; - ops->reduce = nft_match_reduce; matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); if (matchsize > NFT_MATCH_LARGE_THRESH) { @@ -918,7 +909,6 @@ nft_target_select_ops(const struct nft_ctx *ctx, ops->dump = nft_target_dump; ops->validate = nft_target_validate; ops->data = target; - ops->reduce = NFT_REDUCE_READONLY; if (family == NFPROTO_BRIDGE) ops->eval = nft_target_eval_bridge; diff --git a/net/netfilter/nft_connlimit.c b/net/netfilter/nft_connlimit.c index 43357f99eb00..46b31d78abc6 100644 --- a/net/netfilter/nft_connlimit.c +++ b/net/netfilter/nft_connlimit.c @@ -159,7 +159,7 @@ static int nft_connlimit_obj_dump(struct sk_buff *skb, static const struct nla_policy nft_connlimit_policy[NFTA_CONNLIMIT_MAX + 1] = { [NFTA_CONNLIMIT_COUNT] = { .type = NLA_U32 }, - [NFTA_CONNLIMIT_FLAGS] = { .type = NLA_U32 }, + [NFTA_CONNLIMIT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_CONNLIMIT_F_INV), }; static struct nft_object_type nft_connlimit_obj_type; @@ -258,7 +258,6 @@ static const struct nft_expr_ops nft_connlimit_ops = { .destroy_clone = nft_connlimit_destroy_clone, .dump = nft_connlimit_dump, .gc = nft_connlimit_gc, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_connlimit_type __read_mostly = { diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 169ae93688bc..3fa6369790f4 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -313,7 +313,6 @@ static const struct nft_expr_ops nft_counter_ops = { .destroy_clone = nft_counter_destroy, .dump = nft_counter_dump, .clone = nft_counter_clone, - .reduce = NFT_REDUCE_READONLY, .offload = nft_counter_offload, .offload_stats = nft_counter_offload_stats, }; diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 04c74ccf9b84..60ee8d932fcb 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -336,10 +336,10 @@ static void nft_ct_set_eval(const struct nft_expr *expr, } static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { - [NFTA_CT_DREG] = { .type = NLA_U32 }, + [NFTA_CT_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_CT_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_CT_DIRECTION] = { .type = NLA_U8 }, - [NFTA_CT_SREG] = { .type = NLA_U32 }, + [NFTA_CT_DIRECTION] = NLA_POLICY_MAX(NLA_U8, IP_CT_DIR_REPLY), + [NFTA_CT_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), }; #ifdef CONFIG_NF_CONNTRACK_ZONES @@ -700,29 +700,6 @@ nla_put_failure: return -1; } -static bool nft_ct_get_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_ct *priv = nft_expr_priv(expr); - const struct nft_ct *ct; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - ct = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->key != ct->key) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return nft_expr_reduce_bitwise(track, expr); -} - static int nft_ct_set_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { @@ -757,27 +734,8 @@ static const struct nft_expr_ops nft_ct_get_ops = { .init = nft_ct_get_init, .destroy = nft_ct_get_destroy, .dump = nft_ct_get_dump, - .reduce = nft_ct_get_reduce, }; -static bool nft_ct_set_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - int i; - - for (i = 0; i < NFT_REG32_NUM; i++) { - if (!track->regs[i].selector) - continue; - - if (track->regs[i].selector->ops != &nft_ct_get_ops) - continue; - - __nft_reg_track_cancel(track, i); - } - - return false; -} - #ifdef CONFIG_MITIGATION_RETPOLINE static const struct nft_expr_ops nft_ct_get_fast_ops = { .type = &nft_ct_type, @@ -786,7 +744,6 @@ static const struct nft_expr_ops nft_ct_get_fast_ops = { .init = nft_ct_get_init, .destroy = nft_ct_get_destroy, .dump = nft_ct_get_dump, - .reduce = nft_ct_set_reduce, }; #endif @@ -797,7 +754,6 @@ static const struct nft_expr_ops nft_ct_set_ops = { .init = nft_ct_set_init, .destroy = nft_ct_set_destroy, .dump = nft_ct_set_dump, - .reduce = nft_ct_set_reduce, }; #ifdef CONFIG_NF_CONNTRACK_ZONES @@ -808,7 +764,6 @@ static const struct nft_expr_ops nft_ct_set_zone_ops = { .init = nft_ct_set_init, .destroy = nft_ct_set_destroy, .dump = nft_ct_set_dump, - .reduce = nft_ct_set_reduce, }; #endif @@ -878,7 +833,6 @@ static const struct nft_expr_ops nft_notrack_ops = { .type = &nft_notrack_type, .size = NFT_EXPR_SIZE(0), .eval = nft_notrack_eval, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_notrack_type __read_mostly = { @@ -1298,7 +1252,6 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx, switch (priv->l4proto) { case IPPROTO_TCP: case IPPROTO_UDP: - case IPPROTO_UDPLITE: case IPPROTO_DCCP: case IPPROTO_SCTP: break; diff --git a/net/netfilter/nft_dup_netdev.c b/net/netfilter/nft_dup_netdev.c index 0573f96ce079..06866799e946 100644 --- a/net/netfilter/nft_dup_netdev.c +++ b/net/netfilter/nft_dup_netdev.c @@ -80,7 +80,6 @@ static const struct nft_expr_ops nft_dup_netdev_ops = { .eval = nft_dup_netdev_eval, .init = nft_dup_netdev_init, .dump = nft_dup_netdev_dump, - .reduce = NFT_REDUCE_READONLY, .offload = nft_dup_netdev_offload, .offload_action = nft_dup_netdev_offload_action, }; diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 9123277be03c..ee9d3e7b1ecf 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -163,7 +163,8 @@ static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = { [NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 }, [NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 }, [NFTA_DYNSET_EXPR] = { .type = NLA_NESTED }, - [NFTA_DYNSET_FLAGS] = { .type = NLA_U32 }, + [NFTA_DYNSET_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_DYNSET_F_INV | + NFT_DYNSET_F_EXPR), [NFTA_DYNSET_EXPRESSIONS] = { .type = NLA_NESTED }, }; @@ -429,7 +430,6 @@ static const struct nft_expr_ops nft_dynset_ops = { .activate = nft_dynset_activate, .deactivate = nft_dynset_deactivate, .dump = nft_dynset_dump, - .reduce = NFT_REDUCE_READONLY, }; struct nft_expr_type nft_dynset_type __read_mostly = { diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 7eedf4e3ae9c..0407d6f708ae 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -486,13 +486,13 @@ err: #endif static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { - [NFTA_EXTHDR_DREG] = { .type = NLA_U32 }, + [NFTA_EXTHDR_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 }, [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 }, [NFTA_EXTHDR_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 }, + [NFTA_EXTHDR_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_EXTHDR_F_PRESENT), [NFTA_EXTHDR_OP] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_EXTHDR_SREG] = { .type = NLA_U32 }, + [NFTA_EXTHDR_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), }; static int nft_exthdr_init(const struct nft_ctx *ctx, @@ -702,40 +702,12 @@ static int nft_exthdr_dump_strip(struct sk_buff *skb, return nft_exthdr_dump_common(skb, priv); } -static bool nft_exthdr_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_exthdr *priv = nft_expr_priv(expr); - const struct nft_exthdr *exthdr; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - exthdr = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->type != exthdr->type || - priv->op != exthdr->op || - priv->flags != exthdr->flags || - priv->offset != exthdr->offset || - priv->len != exthdr->len) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return nft_expr_reduce_bitwise(track, expr); -} - static const struct nft_expr_ops nft_exthdr_ipv6_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), .eval = nft_exthdr_ipv6_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, - .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_ipv4_ops = { @@ -744,7 +716,6 @@ static const struct nft_expr_ops nft_exthdr_ipv4_ops = { .eval = nft_exthdr_ipv4_eval, .init = nft_exthdr_ipv4_init, .dump = nft_exthdr_dump, - .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_tcp_ops = { @@ -753,7 +724,6 @@ static const struct nft_expr_ops nft_exthdr_tcp_ops = { .eval = nft_exthdr_tcp_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, - .reduce = nft_exthdr_reduce, }; static const struct nft_expr_ops nft_exthdr_tcp_set_ops = { @@ -762,7 +732,6 @@ static const struct nft_expr_ops nft_exthdr_tcp_set_ops = { .eval = nft_exthdr_tcp_set_eval, .init = nft_exthdr_tcp_set_init, .dump = nft_exthdr_dump_set, - .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops nft_exthdr_tcp_strip_ops = { @@ -771,7 +740,6 @@ static const struct nft_expr_ops nft_exthdr_tcp_strip_ops = { .eval = nft_exthdr_tcp_strip_eval, .init = nft_exthdr_tcp_strip_init, .dump = nft_exthdr_dump_strip, - .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops nft_exthdr_sctp_ops = { @@ -780,7 +748,6 @@ static const struct nft_expr_ops nft_exthdr_sctp_ops = { .eval = nft_exthdr_sctp_eval, .init = nft_exthdr_init, .dump = nft_exthdr_dump, - .reduce = nft_exthdr_reduce, }; #ifdef CONFIG_NFT_EXTHDR_DCCP @@ -790,7 +757,6 @@ static const struct nft_expr_ops nft_exthdr_dccp_ops = { .eval = nft_exthdr_dccp_eval, .init = nft_exthdr_dccp_init, .dump = nft_exthdr_dump, - .reduce = nft_exthdr_reduce, }; #endif @@ -830,6 +796,9 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx, break; #ifdef CONFIG_NFT_EXTHDR_DCCP case NFT_EXTHDR_OP_DCCP: + pr_warn_once("The dccp option matching is deprecated and scheduled to be removed in 2027.\n" + "Please contact the netfilter-devel mailing list or update your nftables rules.\n"); + if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_dccp_ops; break; diff --git a/net/netfilter/nft_fib.c b/net/netfilter/nft_fib.c index 96e02a83c045..327a5f33659c 100644 --- a/net/netfilter/nft_fib.c +++ b/net/netfilter/nft_fib.c @@ -19,7 +19,7 @@ NFTA_FIB_F_PRESENT) const struct nla_policy nft_fib_policy[NFTA_FIB_MAX + 1] = { - [NFTA_FIB_DREG] = { .type = NLA_U32 }, + [NFTA_FIB_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_FIB_RESULT] = { .type = NLA_U32 }, [NFTA_FIB_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFTA_FIB_F_ALL), @@ -162,48 +162,6 @@ void nft_fib_store_result(void *reg, const struct nft_fib *priv, } EXPORT_SYMBOL_GPL(nft_fib_store_result); -bool nft_fib_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_fib *priv = nft_expr_priv(expr); - unsigned int len = NFT_REG32_SIZE; - const struct nft_fib *fib; - - switch (priv->result) { - case NFT_FIB_RESULT_OIF: - break; - case NFT_FIB_RESULT_OIFNAME: - if (priv->flags & NFTA_FIB_F_PRESENT) - len = NFT_REG32_SIZE; - else - len = IFNAMSIZ; - break; - case NFT_FIB_RESULT_ADDRTYPE: - break; - default: - WARN_ON_ONCE(1); - break; - } - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, len); - return false; - } - - fib = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->result != fib->result || - priv->flags != fib->flags) { - nft_reg_track_update(track, expr, priv->dreg, len); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return false; -} -EXPORT_SYMBOL_GPL(nft_fib_reduce); - MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Query routing table from nftables"); MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); diff --git a/net/netfilter/nft_fib_inet.c b/net/netfilter/nft_fib_inet.c index 666a3741d20b..a88d44e163d1 100644 --- a/net/netfilter/nft_fib_inet.c +++ b/net/netfilter/nft_fib_inet.c @@ -49,7 +49,6 @@ static const struct nft_expr_ops nft_fib_inet_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, - .reduce = nft_fib_reduce, }; static struct nft_expr_type nft_fib_inet_type __read_mostly = { diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c index 9121ec64e918..3f3478abd845 100644 --- a/net/netfilter/nft_fib_netdev.c +++ b/net/netfilter/nft_fib_netdev.c @@ -58,7 +58,6 @@ static const struct nft_expr_ops nft_fib_netdev_ops = { .init = nft_fib_init, .dump = nft_fib_dump, .validate = nft_fib_validate, - .reduce = nft_fib_reduce, }; static struct nft_expr_type nft_fib_netdev_type __read_mostly = { diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 179d0e59e2b5..32b4281038dd 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -225,7 +225,6 @@ static const struct nft_expr_ops nft_flow_offload_ops = { .destroy = nft_flow_offload_destroy, .validate = nft_flow_offload_validate, .dump = nft_flow_offload_dump, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_flow_offload_type __read_mostly = { diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c index 152a9fb4d23a..4bce36c3a6a0 100644 --- a/net/netfilter/nft_fwd_netdev.c +++ b/net/netfilter/nft_fwd_netdev.c @@ -116,6 +116,11 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, goto out; } iph = ip_hdr(skb); + if (iph->ttl <= 1) { + verdict = NF_DROP; + goto out; + } + ip_decrease_ttl(iph); neigh_table = NEIGH_ARP_TABLE; break; @@ -132,6 +137,11 @@ static void nft_fwd_neigh_eval(const struct nft_expr *expr, goto out; } ip6h = ipv6_hdr(skb); + if (ip6h->hop_limit <= 1) { + verdict = NF_DROP; + goto out; + } + ip6h->hop_limit--; neigh_table = NEIGH_ND_TABLE; break; @@ -218,7 +228,6 @@ static const struct nft_expr_ops nft_fwd_neigh_netdev_ops = { .init = nft_fwd_neigh_init, .dump = nft_fwd_neigh_dump, .validate = nft_fwd_validate, - .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops nft_fwd_netdev_ops = { @@ -228,7 +237,6 @@ static const struct nft_expr_ops nft_fwd_netdev_ops = { .init = nft_fwd_netdev_init, .dump = nft_fwd_netdev_dump, .validate = nft_fwd_validate, - .reduce = NFT_REDUCE_READONLY, .offload = nft_fwd_netdev_offload, .offload_action = nft_fwd_netdev_offload_action, }; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 5d034bbb6913..3bacc9b53789 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -58,8 +58,8 @@ static void nft_symhash_eval(const struct nft_expr *expr, } static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = { - [NFTA_HASH_SREG] = { .type = NLA_U32 }, - [NFTA_HASH_DREG] = { .type = NLA_U32 }, + [NFTA_HASH_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), + [NFTA_HASH_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_HASH_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_HASH_MODULUS] = { .type = NLA_U32 }, [NFTA_HASH_SEED] = { .type = NLA_U32 }, @@ -166,16 +166,6 @@ nla_put_failure: return -1; } -static bool nft_jhash_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_jhash *priv = nft_expr_priv(expr); - - nft_reg_track_cancel(track, priv->dreg, sizeof(u32)); - - return false; -} - static int nft_symhash_dump(struct sk_buff *skb, const struct nft_expr *expr, bool reset) { @@ -196,30 +186,6 @@ nla_put_failure: return -1; } -static bool nft_symhash_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - struct nft_symhash *priv = nft_expr_priv(expr); - struct nft_symhash *symhash; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, sizeof(u32)); - return false; - } - - symhash = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->offset != symhash->offset || - priv->modulus != symhash->modulus) { - nft_reg_track_update(track, expr, priv->dreg, sizeof(u32)); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return false; -} - static struct nft_expr_type nft_hash_type; static const struct nft_expr_ops nft_jhash_ops = { .type = &nft_hash_type, @@ -227,7 +193,6 @@ static const struct nft_expr_ops nft_jhash_ops = { .eval = nft_jhash_eval, .init = nft_jhash_init, .dump = nft_jhash_dump, - .reduce = nft_jhash_reduce, }; static const struct nft_expr_ops nft_symhash_ops = { @@ -236,7 +201,6 @@ static const struct nft_expr_ops nft_symhash_ops = { .eval = nft_symhash_eval, .init = nft_symhash_init, .dump = nft_symhash_dump, - .reduce = nft_symhash_reduce, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 02ee5fb69871..d00eb2eb30e4 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -25,7 +25,7 @@ void nft_immediate_eval(const struct nft_expr *expr, } static const struct nla_policy nft_immediate_policy[NFTA_IMMEDIATE_MAX + 1] = { - [NFTA_IMMEDIATE_DREG] = { .type = NLA_U32 }, + [NFTA_IMMEDIATE_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_IMMEDIATE_DATA] = { .type = NLA_NESTED }, }; @@ -279,7 +279,9 @@ static int nft_immediate_offload_verdict(struct nft_offload_ctx *ctx, struct flow_action_entry *entry; const struct nft_data *data; - entry = &flow->rule->action.entries[ctx->num_actions++]; + entry = nft_flow_action_entry_next(ctx, flow); + if (!entry) + return -E2BIG; data = &priv->data; switch (data->verdict.code) { @@ -320,17 +322,6 @@ static bool nft_immediate_offload_action(const struct nft_expr *expr) return false; } -static bool nft_immediate_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_immediate_expr *priv = nft_expr_priv(expr); - - if (priv->dreg != NFT_REG_VERDICT) - nft_reg_track_cancel(track, priv->dreg, priv->dlen); - - return false; -} - static const struct nft_expr_ops nft_imm_ops = { .type = &nft_imm_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), @@ -341,7 +332,6 @@ static const struct nft_expr_ops nft_imm_ops = { .destroy = nft_immediate_destroy, .dump = nft_immediate_dump, .validate = nft_immediate_validate, - .reduce = nft_immediate_reduce, .offload = nft_immediate_offload, .offload_action = nft_immediate_offload_action, }; diff --git a/net/netfilter/nft_inner.c b/net/netfilter/nft_inner.c index c4569d4b9228..03ffb1159fc1 100644 --- a/net/netfilter/nft_inner.c +++ b/net/netfilter/nft_inner.c @@ -321,7 +321,7 @@ err: static const struct nla_policy nft_inner_policy[NFTA_INNER_MAX + 1] = { [NFTA_INNER_NUM] = { .type = NLA_U32 }, - [NFTA_INNER_FLAGS] = { .type = NLA_U32 }, + [NFTA_INNER_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_INNER_MASK), [NFTA_INNER_HDRSIZE] = { .type = NLA_U32 }, [NFTA_INNER_TYPE] = { .type = NLA_U32 }, [NFTA_INNER_EXPR] = { .type = NLA_NESTED }, diff --git a/net/netfilter/nft_last.c b/net/netfilter/nft_last.c index 20706be12807..996e03a4ad0f 100644 --- a/net/netfilter/nft_last.c +++ b/net/netfilter/nft_last.c @@ -125,7 +125,6 @@ static const struct nft_expr_ops nft_last_ops = { .destroy = nft_last_destroy, .clone = nft_last_clone, .dump = nft_last_dump, - .reduce = NFT_REDUCE_READONLY, }; struct nft_expr_type nft_last_type __read_mostly = { diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index f3b1f791942b..167d99b1447f 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -189,7 +189,7 @@ static const struct nla_policy nft_limit_policy[NFTA_LIMIT_MAX + 1] = { [NFTA_LIMIT_UNIT] = { .type = NLA_U64 }, [NFTA_LIMIT_BURST] = { .type = NLA_U32 }, [NFTA_LIMIT_TYPE] = { .type = NLA_U32 }, - [NFTA_LIMIT_FLAGS] = { .type = NLA_U32 }, + [NFTA_LIMIT_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_LIMIT_F_INV), }; static int nft_limit_pkts_init(const struct nft_ctx *ctx, @@ -243,7 +243,6 @@ static const struct nft_expr_ops nft_limit_pkts_ops = { .destroy = nft_limit_pkts_destroy, .clone = nft_limit_pkts_clone, .dump = nft_limit_pkts_dump, - .reduce = NFT_REDUCE_READONLY, }; static void nft_limit_bytes_eval(const struct nft_expr *expr, @@ -299,7 +298,6 @@ static const struct nft_expr_ops nft_limit_bytes_ops = { .dump = nft_limit_bytes_dump, .clone = nft_limit_bytes_clone, .destroy = nft_limit_bytes_destroy, - .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_log.c b/net/netfilter/nft_log.c index bf01cf8a8907..0d868eea6257 100644 --- a/net/netfilter/nft_log.c +++ b/net/netfilter/nft_log.c @@ -69,7 +69,7 @@ static const struct nla_policy nft_log_policy[NFTA_LOG_MAX + 1] = { [NFTA_LOG_SNAPLEN] = { .type = NLA_U32 }, [NFTA_LOG_QTHRESHOLD] = { .type = NLA_U16 }, [NFTA_LOG_LEVEL] = { .type = NLA_U32 }, - [NFTA_LOG_FLAGS] = { .type = NLA_U32 }, + [NFTA_LOG_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_LOG_MASK), }; static int nft_log_modprobe(struct net *net, enum nf_log_type t) @@ -235,7 +235,6 @@ static const struct nft_expr_ops nft_log_ops = { .init = nft_log_init, .destroy = nft_log_destroy, .dump = nft_log_dump, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_log_type __read_mostly = { diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index fc2d7c5d83c8..9fafe5afc490 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -125,8 +125,8 @@ static const struct nla_policy nft_lookup_policy[NFTA_LOOKUP_MAX + 1] = { [NFTA_LOOKUP_SET] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_LOOKUP_SET_ID] = { .type = NLA_U32 }, - [NFTA_LOOKUP_SREG] = { .type = NLA_U32 }, - [NFTA_LOOKUP_DREG] = { .type = NLA_U32 }, + [NFTA_LOOKUP_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), + [NFTA_LOOKUP_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_LOOKUP_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_LOOKUP_F_INV), }; @@ -266,17 +266,6 @@ static int nft_lookup_validate(const struct nft_ctx *ctx, return 0; } -static bool nft_lookup_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_lookup *priv = nft_expr_priv(expr); - - if (priv->set->flags & NFT_SET_MAP) - nft_reg_track_cancel(track, priv->dreg, priv->set->dlen); - - return false; -} - static const struct nft_expr_ops nft_lookup_ops = { .type = &nft_lookup_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), @@ -287,7 +276,6 @@ static const struct nft_expr_ops nft_lookup_ops = { .destroy = nft_lookup_destroy, .dump = nft_lookup_dump, .validate = nft_lookup_validate, - .reduce = nft_lookup_reduce, }; struct nft_expr_type nft_lookup_type __read_mostly = { diff --git a/net/netfilter/nft_masq.c b/net/netfilter/nft_masq.c index 868bd4d73555..2b01128737a3 100644 --- a/net/netfilter/nft_masq.c +++ b/net/netfilter/nft_masq.c @@ -143,7 +143,6 @@ static const struct nft_expr_ops nft_masq_ipv4_ops = { .destroy = nft_masq_ipv4_destroy, .dump = nft_masq_dump, .validate = nft_masq_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_masq_ipv4_type __read_mostly = { @@ -171,7 +170,6 @@ static const struct nft_expr_ops nft_masq_ipv6_ops = { .destroy = nft_masq_ipv6_destroy, .dump = nft_masq_dump, .validate = nft_masq_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_masq_ipv6_type __read_mostly = { @@ -213,7 +211,6 @@ static const struct nft_expr_ops nft_masq_inet_ops = { .destroy = nft_masq_inet_destroy, .dump = nft_masq_dump, .validate = nft_masq_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_masq_inet_type __read_mostly = { diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c index 05cd1e6e6a2f..5b25851381e5 100644 --- a/net/netfilter/nft_meta.c +++ b/net/netfilter/nft_meta.c @@ -23,6 +23,8 @@ #include <net/tcp_states.h> /* for TCP_TIME_WAIT */ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables_ipv4.h> +#include <net/netfilter/nf_tables_ipv6.h> #include <net/netfilter/nft_meta.h> #include <net/netfilter/nf_tables_offload.h> @@ -131,33 +133,36 @@ nft_meta_get_eval_skugid(enum nft_meta_keys key, u32 *dest, const struct nft_pktinfo *pkt) { - struct sock *sk = skb_to_full_sk(pkt->skb); - struct socket *sock; + const struct sock *sk = skb_to_full_sk(pkt->skb); + const struct socket *sock; + const struct file *file; if (!sk || !sk_fullsock(sk) || !net_eq(nft_net(pkt), sock_net(sk))) return false; - read_lock_bh(&sk->sk_callback_lock); - sock = sk->sk_socket; - if (!sock || !sock->file) { - read_unlock_bh(&sk->sk_callback_lock); + /* The sk pointer remains valid as long as the skb is. The sk_socket and + * file pointer may become NULL if the socket is closed. Both structures + * (including file->cred) are RCU freed which means they can be accessed + * within a RCU read section. + */ + sock = READ_ONCE(sk->sk_socket); + file = sock ? READ_ONCE(sock->file) : NULL; + if (!file) return false; - } switch (key) { case NFT_META_SKUID: *dest = from_kuid_munged(sock_net(sk)->user_ns, - sock->file->f_cred->fsuid); + file->f_cred->fsuid); break; case NFT_META_SKGID: *dest = from_kgid_munged(sock_net(sk)->user_ns, - sock->file->f_cred->fsgid); + file->f_cred->fsgid); break; default: break; } - read_unlock_bh(&sk->sk_callback_lock); return true; } @@ -306,6 +311,54 @@ nft_meta_get_eval_sdifname(u32 *dest, const struct nft_pktinfo *pkt) nft_meta_store_ifname(dest, dev); } +static void nft_meta_pktinfo_may_update(struct nft_pktinfo *pkt) +{ + struct sk_buff *skb = pkt->skb; + struct vlan_ethhdr *veth; + __be16 ethertype; + int nhoff; + + /* Is this an IP packet? Then, skip. */ + if (pkt->flags) + return; + + /* ... else maybe an IP packet over PPPoE or Q-in-Q? */ + switch (skb->protocol) { + case htons(ETH_P_8021Q): + if (!pskb_may_pull(skb, skb_mac_offset(skb) + sizeof(*veth))) + return; + + veth = (struct vlan_ethhdr *)skb_mac_header(skb); + nhoff = VLAN_HLEN; + ethertype = veth->h_vlan_encapsulated_proto; + break; + case htons(ETH_P_PPP_SES): + if (!nf_flow_pppoe_proto(skb, ðertype)) + return; + + nhoff = PPPOE_SES_HLEN; + break; + default: + return; + } + + nhoff += skb_network_offset(skb); + switch (ethertype) { + case htons(ETH_P_IP): + if (__nft_set_pktinfo_ipv4_validate(pkt, nhoff)) + nft_set_pktinfo_unspec(pkt); + break; + case htons(ETH_P_IPV6): + if (__nft_set_pktinfo_ipv6_validate(pkt, nhoff)) + nft_set_pktinfo_unspec(pkt); + break; + default: + break; + } + + pkt->ethertype = ethertype; +} + void nft_meta_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -319,12 +372,14 @@ void nft_meta_get_eval(const struct nft_expr *expr, *dest = skb->len; break; case NFT_META_PROTOCOL: - nft_reg_store16(dest, (__force u16)skb->protocol); + nft_meta_pktinfo_may_update((struct nft_pktinfo *)pkt); + nft_reg_store16(dest, (__force u16)pkt->ethertype); break; case NFT_META_NFPROTO: nft_reg_store8(dest, nft_pf(pkt)); break; case NFT_META_L4PROTO: + nft_meta_pktinfo_may_update((struct nft_pktinfo *)pkt); if (!(pkt->flags & NFT_PKTINFO_L4PROTO)) goto err; nft_reg_store8(dest, pkt->tprot); @@ -457,9 +512,9 @@ void nft_meta_set_eval(const struct nft_expr *expr, EXPORT_SYMBOL_GPL(nft_meta_set_eval); const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = { - [NFTA_META_DREG] = { .type = NLA_U32 }, + [NFTA_META_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_META_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_META_SREG] = { .type = NLA_U32 }, + [NFTA_META_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), }; EXPORT_SYMBOL_GPL(nft_meta_policy); @@ -742,60 +797,16 @@ static int nft_meta_get_offload(struct nft_offload_ctx *ctx, return 0; } -bool nft_meta_get_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_meta *priv = nft_expr_priv(expr); - const struct nft_meta *meta; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - meta = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->key != meta->key || - priv->dreg != meta->dreg) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return nft_expr_reduce_bitwise(track, expr); -} -EXPORT_SYMBOL_GPL(nft_meta_get_reduce); - static const struct nft_expr_ops nft_meta_get_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), .eval = nft_meta_get_eval, .init = nft_meta_get_init, .dump = nft_meta_get_dump, - .reduce = nft_meta_get_reduce, .validate = nft_meta_get_validate, .offload = nft_meta_get_offload, }; -static bool nft_meta_set_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - int i; - - for (i = 0; i < NFT_REG32_NUM; i++) { - if (!track->regs[i].selector) - continue; - - if (track->regs[i].selector->ops != &nft_meta_get_ops) - continue; - - __nft_reg_track_cancel(track, i); - } - - return false; -} - static const struct nft_expr_ops nft_meta_set_ops = { .type = &nft_meta_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_meta)), @@ -803,7 +814,6 @@ static const struct nft_expr_ops nft_meta_set_ops = { .init = nft_meta_set_init, .destroy = nft_meta_set_destroy, .dump = nft_meta_set_dump, - .reduce = nft_meta_set_reduce, .validate = nft_meta_set_validate, }; diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c index 6e21f72c5b57..e32cd9fbc7c2 100644 --- a/net/netfilter/nft_nat.c +++ b/net/netfilter/nft_nat.c @@ -320,7 +320,6 @@ static const struct nft_expr_ops nft_nat_ops = { .destroy = nft_nat_destroy, .dump = nft_nat_dump, .validate = nft_nat_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_nat_type __read_mostly = { @@ -351,7 +350,6 @@ static const struct nft_expr_ops nft_nat_inet_ops = { .destroy = nft_nat_destroy, .dump = nft_nat_dump, .validate = nft_nat_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_inet_nat_type __read_mostly = { diff --git a/net/netfilter/nft_numgen.c b/net/netfilter/nft_numgen.c index 0a39e51ec9b7..b0c802370159 100644 --- a/net/netfilter/nft_numgen.c +++ b/net/netfilter/nft_numgen.c @@ -43,7 +43,7 @@ static void nft_ng_inc_eval(const struct nft_expr *expr, } static const struct nla_policy nft_ng_policy[NFTA_NG_MAX + 1] = { - [NFTA_NG_DREG] = { .type = NLA_U32 }, + [NFTA_NG_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_NG_MODULUS] = { .type = NLA_U32 }, [NFTA_NG_TYPE] = { .type = NLA_U32 }, [NFTA_NG_OFFSET] = { .type = NLA_U32 }, @@ -84,16 +84,6 @@ err: return err; } -static bool nft_ng_inc_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_ng_inc *priv = nft_expr_priv(expr); - - nft_reg_track_cancel(track, priv->dreg, NFT_REG32_SIZE); - - return false; -} - static int nft_ng_dump(struct sk_buff *skb, enum nft_registers dreg, u32 modulus, enum nft_ng_types type, u32 offset) { @@ -178,16 +168,6 @@ static int nft_ng_random_dump(struct sk_buff *skb, priv->offset); } -static bool nft_ng_random_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_ng_random *priv = nft_expr_priv(expr); - - nft_reg_track_cancel(track, priv->dreg, NFT_REG32_SIZE); - - return false; -} - static struct nft_expr_type nft_ng_type; static const struct nft_expr_ops nft_ng_inc_ops = { .type = &nft_ng_type, @@ -196,7 +176,6 @@ static const struct nft_expr_ops nft_ng_inc_ops = { .init = nft_ng_inc_init, .destroy = nft_ng_inc_destroy, .dump = nft_ng_inc_dump, - .reduce = nft_ng_inc_reduce, }; static const struct nft_expr_ops nft_ng_random_ops = { @@ -205,7 +184,6 @@ static const struct nft_expr_ops nft_ng_random_ops = { .eval = nft_ng_random_eval, .init = nft_ng_random_init, .dump = nft_ng_random_dump, - .reduce = nft_ng_random_reduce, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 1a62e384766a..249ded517446 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -123,7 +123,6 @@ static const struct nft_expr_ops nft_objref_ops = { .deactivate = nft_objref_deactivate, .dump = nft_objref_dump, .validate = nft_objref_validate, - .reduce = NFT_REDUCE_READONLY, }; struct nft_objref_map { @@ -245,7 +244,6 @@ static const struct nft_expr_ops nft_objref_map_ops = { .destroy = nft_objref_map_destroy, .dump = nft_objref_map_dump, .validate = nft_objref_map_validate, - .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops * @@ -267,7 +265,7 @@ static const struct nla_policy nft_objref_policy[NFTA_OBJREF_MAX + 1] = { [NFTA_OBJREF_IMM_NAME] = { .type = NLA_STRING, .len = NFT_OBJ_MAXNAMELEN - 1 }, [NFTA_OBJREF_IMM_TYPE] = { .type = NLA_U32 }, - [NFTA_OBJREF_SET_SREG] = { .type = NLA_U32 }, + [NFTA_OBJREF_SET_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_OBJREF_SET_NAME] = { .type = NLA_STRING, .len = NFT_SET_MAXNAMELEN - 1 }, [NFTA_OBJREF_SET_ID] = { .type = NLA_U32 }, diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 1c0b493ef0a9..18003433476c 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -12,9 +12,9 @@ struct nft_osf { }; static const struct nla_policy nft_osf_policy[NFTA_OSF_MAX + 1] = { - [NFTA_OSF_DREG] = { .type = NLA_U32 }, + [NFTA_OSF_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_OSF_TTL] = { .type = NLA_U8 }, - [NFTA_OSF_FLAGS] = { .type = NLA_U32 }, + [NFTA_OSF_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_OSF_F_VERSION), }; static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, @@ -127,30 +127,6 @@ static int nft_osf_validate(const struct nft_ctx *ctx, return nft_chain_validate_hooks(ctx->chain, hooks); } -static bool nft_osf_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - struct nft_osf *priv = nft_expr_priv(expr); - struct nft_osf *osf; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, NFT_OSF_MAXGENRELEN); - return false; - } - - osf = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->flags != osf->flags || - priv->ttl != osf->ttl) { - nft_reg_track_update(track, expr, priv->dreg, NFT_OSF_MAXGENRELEN); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return false; -} - static struct nft_expr_type nft_osf_type; static const struct nft_expr_ops nft_osf_op = { .eval = nft_osf_eval, @@ -159,7 +135,6 @@ static const struct nft_expr_ops nft_osf_op = { .dump = nft_osf_dump, .type = &nft_osf_type, .validate = nft_osf_validate, - .reduce = nft_osf_reduce, }; static struct nft_expr_type nft_osf_type __read_mostly = { diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index b0214418f75a..01e13e5255a9 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -183,7 +183,7 @@ void nft_payload_eval(const struct nft_expr *expr, offset = skb_mac_header(skb) - skb->data; break; case NFT_PAYLOAD_NETWORK_HEADER: - offset = skb_network_offset(skb); + offset = skb_network_offset(skb) + pkt->nhoff; break; case NFT_PAYLOAD_TRANSPORT_HEADER: if (!(pkt->flags & NFT_PKTINFO_L4PROTO) || pkt->fragoff) @@ -209,14 +209,14 @@ err: } static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { - [NFTA_PAYLOAD_SREG] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), + [NFTA_PAYLOAD_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, [NFTA_PAYLOAD_OFFSET] = { .type = NLA_BE32 }, [NFTA_PAYLOAD_LEN] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, [NFTA_PAYLOAD_CSUM_OFFSET] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_PAYLOAD_CSUM_FLAGS] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_PAYLOAD_L4CSUM_PSEUDOHDR), }; static int nft_payload_init(const struct nft_ctx *ctx, @@ -250,31 +250,6 @@ nla_put_failure: return -1; } -static bool nft_payload_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_payload *priv = nft_expr_priv(expr); - const struct nft_payload *payload; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - payload = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->base != payload->base || - priv->offset != payload->offset || - priv->len != payload->len) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return nft_expr_reduce_bitwise(track, expr); -} - static bool nft_payload_offload_mask(struct nft_offload_reg *reg, u32 priv_len, u32 field_len) { @@ -578,7 +553,6 @@ static const struct nft_expr_ops nft_payload_ops = { .eval = nft_payload_eval, .init = nft_payload_init, .dump = nft_payload_dump, - .reduce = nft_payload_reduce, .offload = nft_payload_offload, }; @@ -588,7 +562,6 @@ const struct nft_expr_ops nft_payload_fast_ops = { .eval = nft_payload_eval, .init = nft_payload_init, .dump = nft_payload_dump, - .reduce = nft_payload_reduce, .offload = nft_payload_offload, }; @@ -1012,32 +985,12 @@ nla_put_failure: return -1; } -static bool nft_payload_set_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - int i; - - for (i = 0; i < NFT_REG32_NUM; i++) { - if (!track->regs[i].selector) - continue; - - if (track->regs[i].selector->ops != &nft_payload_ops && - track->regs[i].selector->ops != &nft_payload_fast_ops) - continue; - - __nft_reg_track_cancel(track, i); - } - - return false; -} - static const struct nft_expr_ops nft_payload_set_ops = { .type = &nft_payload_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_payload_set)), .eval = nft_payload_set_eval, .init = nft_payload_set_init, .dump = nft_payload_set_dump, - .reduce = nft_payload_set_reduce, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_queue.c b/net/netfilter/nft_queue.c index 344fe311878f..b83d209db886 100644 --- a/net/netfilter/nft_queue.c +++ b/net/netfilter/nft_queue.c @@ -95,7 +95,7 @@ static int nft_queue_validate(const struct nft_ctx *ctx, static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = { [NFTA_QUEUE_NUM] = { .type = NLA_U16 }, [NFTA_QUEUE_TOTAL] = { .type = NLA_U16 }, - [NFTA_QUEUE_FLAGS] = { .type = NLA_U16 }, + [NFTA_QUEUE_FLAGS] = NLA_POLICY_MASK(NLA_BE16, NFT_QUEUE_FLAG_MASK), [NFTA_QUEUE_SREG_QNUM] = { .type = NLA_U32 }, }; @@ -191,7 +191,6 @@ static const struct nft_expr_ops nft_queue_ops = { .init = nft_queue_init, .dump = nft_queue_dump, .validate = nft_queue_validate, - .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops nft_queue_sreg_ops = { @@ -201,7 +200,6 @@ static const struct nft_expr_ops nft_queue_sreg_ops = { .init = nft_queue_sreg_init, .dump = nft_queue_sreg_dump, .validate = nft_queue_validate, - .reduce = NFT_REDUCE_READONLY, }; static const struct nft_expr_ops * diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 2390a993aed9..6ed7c4409706 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -46,7 +46,7 @@ static inline void nft_quota_do_eval(struct nft_quota *priv, static const struct nla_policy nft_quota_policy[NFTA_QUOTA_MAX + 1] = { [NFTA_QUOTA_BYTES] = { .type = NLA_U64 }, - [NFTA_QUOTA_FLAGS] = { .type = NLA_U32 }, + [NFTA_QUOTA_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_QUOTA_F_INV), [NFTA_QUOTA_CONSUMED] = { .type = NLA_U64 }, }; @@ -266,7 +266,6 @@ static const struct nft_expr_ops nft_quota_ops = { .destroy = nft_quota_destroy, .clone = nft_quota_clone, .dump = nft_quota_dump, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_quota_type __read_mostly = { diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c index ea382f7bbd78..f8a1641afccf 100644 --- a/net/netfilter/nft_range.c +++ b/net/netfilter/nft_range.c @@ -41,7 +41,7 @@ void nft_range_eval(const struct nft_expr *expr, } static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = { - [NFTA_RANGE_SREG] = { .type = NLA_U32 }, + [NFTA_RANGE_SREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_RANGE_OP] = NLA_POLICY_MAX(NLA_BE32, 255), [NFTA_RANGE_FROM_DATA] = { .type = NLA_NESTED }, [NFTA_RANGE_TO_DATA] = { .type = NLA_NESTED }, @@ -138,7 +138,6 @@ static const struct nft_expr_ops nft_range_ops = { .eval = nft_range_eval, .init = nft_range_init, .dump = nft_range_dump, - .reduce = NFT_REDUCE_READONLY, }; struct nft_expr_type nft_range_type __read_mostly = { diff --git a/net/netfilter/nft_redir.c b/net/netfilter/nft_redir.c index 95eedad85c83..58ae802db8f5 100644 --- a/net/netfilter/nft_redir.c +++ b/net/netfilter/nft_redir.c @@ -146,7 +146,6 @@ static const struct nft_expr_ops nft_redir_ipv4_ops = { .destroy = nft_redir_ipv4_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_redir_ipv4_type __read_mostly = { @@ -174,7 +173,6 @@ static const struct nft_expr_ops nft_redir_ipv6_ops = { .destroy = nft_redir_ipv6_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_redir_ipv6_type __read_mostly = { @@ -203,7 +201,6 @@ static const struct nft_expr_ops nft_redir_inet_ops = { .destroy = nft_redir_inet_destroy, .dump = nft_redir_dump, .validate = nft_redir_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_redir_inet_type __read_mostly = { diff --git a/net/netfilter/nft_reject_inet.c b/net/netfilter/nft_reject_inet.c index 49020e67304a..dcae83ddc32e 100644 --- a/net/netfilter/nft_reject_inet.c +++ b/net/netfilter/nft_reject_inet.c @@ -79,7 +79,6 @@ static const struct nft_expr_ops nft_reject_inet_ops = { .init = nft_reject_init, .dump = nft_reject_dump, .validate = nft_reject_inet_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_reject_inet_type __read_mostly = { diff --git a/net/netfilter/nft_reject_netdev.c b/net/netfilter/nft_reject_netdev.c index 2558ce1505d9..b53e81e4ca75 100644 --- a/net/netfilter/nft_reject_netdev.c +++ b/net/netfilter/nft_reject_netdev.c @@ -158,7 +158,6 @@ static const struct nft_expr_ops nft_reject_netdev_ops = { .init = nft_reject_init, .dump = nft_reject_dump, .validate = nft_reject_netdev_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_reject_netdev_type __read_mostly = { diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index dc50b9a5bd68..e23cd4759851 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -103,7 +103,7 @@ err: } static const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = { - [NFTA_RT_DREG] = { .type = NLA_U32 }, + [NFTA_RT_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_RT_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), }; @@ -195,7 +195,6 @@ static const struct nft_expr_ops nft_rt_get_ops = { .init = nft_rt_get_init, .dump = nft_rt_get_dump, .validate = nft_rt_validate, - .reduce = NFT_REDUCE_READONLY, }; struct nft_expr_type nft_rt_type __read_mostly = { diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 7fd24e0cc428..50d4a4f04309 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -452,8 +452,6 @@ static struct nft_pipapo_elem *pipapo_get_slow(const struct nft_pipapo_match *m, pipapo_and_field_buckets_4bit(f, res_map, data); NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4; - data += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f); - /* Now populate the bitmap for the next field, unless this is * the last field, in which case return the matched 'ext' * pointer if any. @@ -498,7 +496,7 @@ next_match: map_index = !map_index; swap(res_map, fill_map); - data += NFT_PIPAPO_GROUPS_PADDING(f); + data += NFT_PIPAPO_GROUPS_PADDED_SIZE(f); } __local_unlock_nested_bh(&scratch->bh_lock); diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h index 9aee9a9eaeb7..b82abb03576e 100644 --- a/net/netfilter/nft_set_pipapo.h +++ b/net/netfilter/nft_set_pipapo.h @@ -42,9 +42,6 @@ /* Fields are padded to 32 bits in input registers */ #define NFT_PIPAPO_GROUPS_PADDED_SIZE(f) \ (round_up((f)->groups / NFT_PIPAPO_GROUPS_PER_BYTE(f), sizeof(u32))) -#define NFT_PIPAPO_GROUPS_PADDING(f) \ - (NFT_PIPAPO_GROUPS_PADDED_SIZE(f) - (f)->groups / \ - NFT_PIPAPO_GROUPS_PER_BYTE(f)) /* Number of buckets given by 2 ^ n, with n bucket bits */ #define NFT_PIPAPO_BUCKETS(bb) (1 << (bb)) diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c index 6395982e4d95..dad265807b8b 100644 --- a/net/netfilter/nft_set_pipapo_avx2.c +++ b/net/netfilter/nft_set_pipapo_avx2.c @@ -1041,7 +1041,6 @@ nothing: * @map: Previous match result, used as initial bitmap * @fill: Destination bitmap to be filled with current match result * @f: Field, containing lookup and mapping tables - * @offset: Ignore buckets before the given index, no bits are filled there * @pkt: Packet data, pointer to input nftables register * @first: If this is the first field, don't source previous result * @last: Last field: stop at the first match and return bit index @@ -1056,32 +1055,19 @@ nothing: static int nft_pipapo_avx2_lookup_slow(const struct nft_pipapo_match *mdata, unsigned long *map, unsigned long *fill, const struct nft_pipapo_field *f, - int offset, const u8 *pkt, + const u8 *pkt, bool first, bool last) { - unsigned long bsize = f->bsize; - int i, ret = -1, b; - if (first) pipapo_resmap_init(mdata, map); - for (i = offset; i < bsize; i++) { - if (f->bb == 8) - pipapo_and_field_buckets_8bit(f, map, pkt); - else - pipapo_and_field_buckets_4bit(f, map, pkt); - NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4; - - b = pipapo_refill(map, bsize, f->rules, fill, f->mt, last); + if (f->bb == 8) + pipapo_and_field_buckets_8bit(f, map, pkt); + else + pipapo_and_field_buckets_4bit(f, map, pkt); + NFT_PIPAPO_GROUP_BITS_ARE_8_OR_4; - if (last) - return b; - - if (ret == -1) - ret = b / XSAVE_YMM_SIZE; - } - - return ret; + return pipapo_refill(map, f->bsize, f->rules, fill, f->mt, last); } /** @@ -1201,7 +1187,7 @@ struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m, NFT_SET_PIPAPO_AVX2_LOOKUP(8, 16); } else { ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f, - ret, data, + data, first, last); } } else { @@ -1217,7 +1203,7 @@ struct nft_pipapo_elem *pipapo_get_avx2(const struct nft_pipapo_match *m, NFT_SET_PIPAPO_AVX2_LOOKUP(4, 32); } else { ret = nft_pipapo_avx2_lookup_slow(m, res, fill, f, - ret, data, + data, first, last); } } diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 737c339decd0..560fbe6e3f75 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -697,21 +697,14 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, cond_resched(); - write_lock_bh(&priv->lock); + write_lock(&priv->lock); err = __nft_rbtree_insert(net, set, rbe, elem_priv, tstamp); - write_unlock_bh(&priv->lock); + write_unlock(&priv->lock); } while (err == -EAGAIN); return err; } -static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rbe) -{ - write_lock_bh(&priv->lock); - rb_erase(&rbe->node, &priv->root); - write_unlock_bh(&priv->lock); -} - static void nft_rbtree_remove(const struct net *net, const struct nft_set *set, struct nft_elem_priv *elem_priv) @@ -719,7 +712,9 @@ static void nft_rbtree_remove(const struct net *net, struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv); struct nft_rbtree *priv = nft_set_priv(set); - nft_rbtree_erase(priv, rbe); + write_lock(&priv->lock); + rb_erase(&rbe->node, &priv->root); + write_unlock(&priv->lock); } static void nft_rbtree_activate(const struct net *net, @@ -880,9 +875,9 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, nft_rbtree_do_walk(ctx, set, iter); break; case NFT_ITER_READ: - read_lock_bh(&priv->lock); + read_lock(&priv->lock); nft_rbtree_do_walk(ctx, set, iter); - read_unlock_bh(&priv->lock); + read_unlock(&priv->lock); break; default: iter->err = -EINVAL; @@ -918,14 +913,14 @@ static void nft_rbtree_gc_scan(struct nft_set *set) /* end element needs to be removed first, it has * no timeout extension. */ - write_lock_bh(&priv->lock); + write_lock(&priv->lock); if (rbe_end) { nft_rbtree_gc_elem_move(net, set, priv, rbe_end); rbe_end = NULL; } nft_rbtree_gc_elem_move(net, set, priv, rbe); - write_unlock_bh(&priv->lock); + write_unlock(&priv->lock); } priv->last_gc = jiffies; diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index 36affbb697c2..a146a45d7531 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -163,7 +163,7 @@ out_put_sk: static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = { [NFTA_SOCKET_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_SOCKET_DREG] = { .type = NLA_U32 }, + [NFTA_SOCKET_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), [NFTA_SOCKET_LEVEL] = NLA_POLICY_MAX(NLA_BE32, 255), }; @@ -249,31 +249,6 @@ static int nft_socket_dump(struct sk_buff *skb, return 0; } -static bool nft_socket_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_socket *priv = nft_expr_priv(expr); - const struct nft_socket *socket; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - socket = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->key != socket->key || - priv->dreg != socket->dreg || - priv->level != socket->level) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return nft_expr_reduce_bitwise(track, expr); -} - static int nft_socket_validate(const struct nft_ctx *ctx, const struct nft_expr *expr) { @@ -296,7 +271,6 @@ static const struct nft_expr_ops nft_socket_ops = { .init = nft_socket_init, .dump = nft_socket_dump, .validate = nft_socket_validate, - .reduce = nft_socket_reduce, }; static struct nft_expr_type nft_socket_type __read_mostly = { diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c index b71ef18b0e8c..7641f249614c 100644 --- a/net/netfilter/nft_synproxy.c +++ b/net/netfilter/nft_synproxy.c @@ -17,8 +17,8 @@ struct nft_synproxy { static const struct nla_policy nft_synproxy_policy[NFTA_SYNPROXY_MAX + 1] = { [NFTA_SYNPROXY_MSS] = { .type = NLA_U16 }, - [NFTA_SYNPROXY_WSCALE] = { .type = NLA_U8 }, - [NFTA_SYNPROXY_FLAGS] = { .type = NLA_U32 }, + [NFTA_SYNPROXY_WSCALE] = NLA_POLICY_MAX(NLA_U8, TCP_MAX_WSCALE), + [NFTA_SYNPROXY_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NF_SYNPROXY_OPT_MASK), }; static void nft_synproxy_tcp_options(struct synproxy_options *opts, @@ -292,7 +292,6 @@ static const struct nft_expr_ops nft_synproxy_ops = { .dump = nft_synproxy_dump, .type = &nft_synproxy_type, .validate = nft_synproxy_validate, - .reduce = NFT_REDUCE_READONLY, }; static struct nft_expr_type nft_synproxy_type __read_mostly = { diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c index 50481280abd2..f2101af8c867 100644 --- a/net/netfilter/nft_tproxy.c +++ b/net/netfilter/nft_tproxy.c @@ -331,7 +331,6 @@ static const struct nft_expr_ops nft_tproxy_ops = { .init = nft_tproxy_init, .destroy = nft_tproxy_destroy, .dump = nft_tproxy_dump, - .reduce = NFT_REDUCE_READONLY, .validate = nft_tproxy_validate, }; diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c index a12486ae089d..0b987bc2132a 100644 --- a/net/netfilter/nft_tunnel.c +++ b/net/netfilter/nft_tunnel.c @@ -67,8 +67,8 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr, static const struct nla_policy nft_tunnel_policy[NFTA_TUNNEL_MAX + 1] = { [NFTA_TUNNEL_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_TUNNEL_DREG] = { .type = NLA_U32 }, - [NFTA_TUNNEL_MODE] = NLA_POLICY_MAX(NLA_BE32, 255), + [NFTA_TUNNEL_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), + [NFTA_TUNNEL_MODE] = NLA_POLICY_MAX(NLA_BE32, NFT_TUNNEL_MODE_MAX), }; static int nft_tunnel_get_init(const struct nft_ctx *ctx, @@ -124,31 +124,6 @@ nla_put_failure: return -1; } -static bool nft_tunnel_get_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_tunnel *priv = nft_expr_priv(expr); - const struct nft_tunnel *tunnel; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - tunnel = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->key != tunnel->key || - priv->dreg != tunnel->dreg || - priv->mode != tunnel->mode) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return false; -} - static struct nft_expr_type nft_tunnel_type; static const struct nft_expr_ops nft_tunnel_get_ops = { .type = &nft_tunnel_type, @@ -156,7 +131,6 @@ static const struct nft_expr_ops nft_tunnel_get_ops = { .eval = nft_tunnel_get_eval, .init = nft_tunnel_get_init, .dump = nft_tunnel_get_dump, - .reduce = nft_tunnel_get_reduce, }; static struct nft_expr_type nft_tunnel_type __read_mostly = { @@ -434,7 +408,7 @@ static const struct nla_policy nft_tunnel_key_policy[NFTA_TUNNEL_KEY_MAX + 1] = [NFTA_TUNNEL_KEY_IP] = { .type = NLA_NESTED, }, [NFTA_TUNNEL_KEY_IP6] = { .type = NLA_NESTED, }, [NFTA_TUNNEL_KEY_ID] = { .type = NLA_U32, }, - [NFTA_TUNNEL_KEY_FLAGS] = { .type = NLA_U32, }, + [NFTA_TUNNEL_KEY_FLAGS] = NLA_POLICY_MASK(NLA_BE32, NFT_TUNNEL_F_MASK), [NFTA_TUNNEL_KEY_TOS] = { .type = NLA_U8, }, [NFTA_TUNNEL_KEY_TTL] = { .type = NLA_U8, }, [NFTA_TUNNEL_KEY_SPORT] = { .type = NLA_U16, }, diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c index 3210cfc966ab..65a75d88e5f0 100644 --- a/net/netfilter/nft_xfrm.c +++ b/net/netfilter/nft_xfrm.c @@ -17,9 +17,9 @@ static const struct nla_policy nft_xfrm_policy[NFTA_XFRM_MAX + 1] = { [NFTA_XFRM_KEY] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_XFRM_DIR] = { .type = NLA_U8 }, - [NFTA_XFRM_SPNUM] = NLA_POLICY_MAX(NLA_BE32, 255), - [NFTA_XFRM_DREG] = { .type = NLA_U32 }, + [NFTA_XFRM_DIR] = NLA_POLICY_MAX(NLA_U8, XFRM_POLICY_OUT), + [NFTA_XFRM_SPNUM] = NLA_POLICY_MAX(NLA_BE32, XFRM_MAX_DEPTH - 1), + [NFTA_XFRM_DREG] = NLA_POLICY_MAX(NLA_BE32, NFT_REG32_MAX), }; struct nft_xfrm { @@ -259,32 +259,6 @@ static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *e return nft_chain_validate_hooks(ctx->chain, hooks); } -static bool nft_xfrm_reduce(struct nft_regs_track *track, - const struct nft_expr *expr) -{ - const struct nft_xfrm *priv = nft_expr_priv(expr); - const struct nft_xfrm *xfrm; - - if (!nft_reg_track_cmp(track, expr, priv->dreg)) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - xfrm = nft_expr_priv(track->regs[priv->dreg].selector); - if (priv->key != xfrm->key || - priv->dreg != xfrm->dreg || - priv->dir != xfrm->dir || - priv->spnum != xfrm->spnum) { - nft_reg_track_update(track, expr, priv->dreg, priv->len); - return false; - } - - if (!track->regs[priv->dreg].bitwise) - return true; - - return nft_expr_reduce_bitwise(track, expr); -} - static struct nft_expr_type nft_xfrm_type; static const struct nft_expr_ops nft_xfrm_get_ops = { .type = &nft_xfrm_type, @@ -293,7 +267,6 @@ static const struct nft_expr_ops nft_xfrm_get_ops = { .init = nft_xfrm_get_init, .dump = nft_xfrm_get_dump, .validate = nft_xfrm_validate, - .reduce = nft_xfrm_reduce, }; static struct nft_expr_type nft_xfrm_type __read_mostly = { diff --git a/net/netfilter/utils.c b/net/netfilter/utils.c index 008419db815a..29c4dcc362c7 100644 --- a/net/netfilter/utils.c +++ b/net/netfilter/utils.c @@ -163,7 +163,6 @@ EXPORT_SYMBOL_GPL(nf_checksum_partial); int nf_route(struct net *net, struct dst_entry **dst, struct flowi *fl, bool strict, unsigned short family) { - const struct nf_ipv6_ops *v6ops __maybe_unused; int ret = 0; switch (family) { diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index b39017c80548..9f837fb5ceb4 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -819,13 +819,17 @@ EXPORT_SYMBOL_GPL(xt_compat_match_to_user); /* non-compat version may have padding after verdict */ struct compat_xt_standard_target { - struct compat_xt_entry_target t; - compat_uint_t verdict; + /* Must be last as it ends in a flexible-array member. */ + TRAILING_OVERLAP(struct compat_xt_entry_target, t, data, + compat_uint_t verdict; + ); }; struct compat_xt_error_target { - struct compat_xt_entry_target t; - char errorname[XT_FUNCTION_MAXNAMELEN]; + /* Must be last as it ends in a flexible-array member. */ + TRAILING_OVERLAP(struct compat_xt_entry_target, t, data, + char errorname[XT_FUNCTION_MAXNAMELEN]; + ); }; int xt_compat_check_entry_offsets(const void *base, const char *elems, diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 2aabdcea8707..1c6ffc7f1622 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -1,5 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 /* Kernel module to match connection tracking byte counter. - * GPL (C) 2002 Martin Devera (devik@cdi.cz). + * (C) 2002 Martin Devera (devik@cdi.cz). */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index 848287ab79cf..42df9e175aff 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0-or-later /* * netfilter module to limit the number of parallel tcp * connections per IP address. @@ -9,7 +10,7 @@ * based on ... * * Kernel module to match connection tracking information. - * GPL (C) 1999 Rusty Russell (rusty@rustcorp.com.au). + * (C) 1999 Rusty Russell (rusty@rustcorp.com.au). */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c index 037ab93e25d0..3db81e041af9 100644 --- a/net/netfilter/xt_dccp.c +++ b/net/netfilter/xt_dccp.c @@ -159,6 +159,9 @@ static int __init dccp_mt_init(void) { int ret; + pr_warn_once("The DCCP match is deprecated and scheduled to be removed in 2027.\n" + "Please contact the netfilter-devel mailing list or update your iptables rules\n"); + /* doff is 8 bits, so the maximum option size is (4*256). Don't put * this in BSS since DaveM is worried about locked TLB's for kernel * BSS. */ diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c index c1a70f8f0441..4a12a757ecbf 100644 --- a/net/netfilter/xt_hl.c +++ b/net/netfilter/xt_hl.c @@ -6,6 +6,7 @@ * Hop Limit matching module * (C) 2001-2002 Maciej Soltysiak <solt@dns.toxicfilms.tv> */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/ip.h> #include <linux/ipv6.h> @@ -22,6 +23,18 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("ipt_ttl"); MODULE_ALIAS("ip6t_hl"); +static int ttl_mt_check(const struct xt_mtchk_param *par) +{ + const struct ipt_ttl_info *info = par->matchinfo; + + if (info->mode > IPT_TTL_GT) { + pr_err("Unknown TTL match mode: %d\n", info->mode); + return -EINVAL; + } + + return 0; +} + static bool ttl_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct ipt_ttl_info *info = par->matchinfo; @@ -41,6 +54,18 @@ static bool ttl_mt(const struct sk_buff *skb, struct xt_action_param *par) return false; } +static int hl_mt6_check(const struct xt_mtchk_param *par) +{ + const struct ip6t_hl_info *info = par->matchinfo; + + if (info->mode > IP6T_HL_GT) { + pr_err("Unknown Hop Limit match mode: %d\n", info->mode); + return -EINVAL; + } + + return 0; +} + static bool hl_mt6(const struct sk_buff *skb, struct xt_action_param *par) { const struct ip6t_hl_info *info = par->matchinfo; @@ -65,6 +90,7 @@ static struct xt_match hl_mt_reg[] __read_mostly = { .name = "ttl", .revision = 0, .family = NFPROTO_IPV4, + .checkentry = ttl_mt_check, .match = ttl_mt, .matchsize = sizeof(struct ipt_ttl_info), .me = THIS_MODULE, @@ -73,6 +99,7 @@ static struct xt_match hl_mt_reg[] __read_mostly = { .name = "hl", .revision = 0, .family = NFPROTO_IPV6, + .checkentry = hl_mt6_check, .match = hl_mt6, .matchsize = sizeof(struct ip6t_hl_info), .me = THIS_MODULE, diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c index 81649da57ba5..4798cd2ca26e 100644 --- a/net/netfilter/xt_mac.c +++ b/net/netfilter/xt_mac.c @@ -29,9 +29,7 @@ static bool mac_mt(const struct sk_buff *skb, struct xt_action_param *par) if (skb->dev == NULL || skb->dev->type != ARPHRD_ETHER) return false; - if (skb_mac_header(skb) < skb->head) - return false; - if (skb_mac_header(skb) + ETH_HLEN > skb->data) + if (!skb_mac_header_was_set(skb) || skb_mac_header_len(skb) < ETH_HLEN) return false; ret = ether_addr_equal(eth_hdr(skb)->h_source, info->srcaddr); ret ^= info->invert; diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index 50332888c8d2..5bfb4843df66 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -63,11 +63,12 @@ static bool owner_mt(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_owner_match_info *info = par->matchinfo; - const struct file *filp; struct sock *sk = skb_to_full_sk(skb); struct net *net = xt_net(par); + const struct socket *sock; + const struct file *filp; - if (!sk || !sk->sk_socket || !net_eq(net, sock_net(sk))) + if (!sk || !READ_ONCE(sk->sk_socket) || !net_eq(net, sock_net(sk))) return (info->match ^ info->invert) == 0; else if (info->match & info->invert & XT_OWNER_SOCKET) /* @@ -76,23 +77,25 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) */ return false; - read_lock_bh(&sk->sk_callback_lock); - filp = sk->sk_socket ? sk->sk_socket->file : NULL; - if (filp == NULL) { - read_unlock_bh(&sk->sk_callback_lock); + /* The sk pointer remains valid as long as the skb is. The sk_socket and + * file pointer may become NULL if the socket is closed. Both structures + * (including file->cred) are RCU freed which means they can be accessed + * within a RCU read section. + */ + sock = READ_ONCE(sk->sk_socket); + filp = sock ? READ_ONCE(sock->file) : NULL; + if (filp == NULL) return ((info->match ^ info->invert) & (XT_OWNER_UID | XT_OWNER_GID)) == 0; - } if (info->match & XT_OWNER_UID) { kuid_t uid_min = make_kuid(net->user_ns, info->uid_min); kuid_t uid_max = make_kuid(net->user_ns, info->uid_max); + if ((uid_gte(filp->f_cred->fsuid, uid_min) && uid_lte(filp->f_cred->fsuid, uid_max)) ^ - !(info->invert & XT_OWNER_UID)) { - read_unlock_bh(&sk->sk_callback_lock); + !(info->invert & XT_OWNER_UID)) return false; - } } if (info->match & XT_OWNER_GID) { @@ -117,13 +120,10 @@ owner_mt(const struct sk_buff *skb, struct xt_action_param *par) } } - if (match ^ !(info->invert & XT_OWNER_GID)) { - read_unlock_bh(&sk->sk_callback_lock); + if (match ^ !(info->invert & XT_OWNER_GID)) return false; - } } - read_unlock_bh(&sk->sk_callback_lock); return true; } diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index 343e65f377d4..53997771013f 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -107,6 +107,28 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) return -EINVAL; } +#define X(memb) strnlen(info->memb, sizeof(info->memb)) >= sizeof(info->memb) + if (info->bitmask & XT_PHYSDEV_OP_IN) { + if (info->physindev[0] == '\0') + return -EINVAL; + if (X(physindev)) + return -ENAMETOOLONG; + } + + if (info->bitmask & XT_PHYSDEV_OP_OUT) { + if (info->physoutdev[0] == '\0') + return -EINVAL; + + if (X(physoutdev)) + return -ENAMETOOLONG; + } + + if (X(in_mask)) + return -ENAMETOOLONG; + if (X(out_mask)) + return -ENAMETOOLONG; +#undef X + if (!brnf_probed) { brnf_probed = true; request_module("br_netfilter"); diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 76e01f292aaf..811e53bee408 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -168,52 +168,41 @@ static int socket_mt_enable_defrag(struct net *net, int family) static int socket_mt_v1_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo1 *info = (struct xt_socket_mtinfo1 *) par->matchinfo; - int err; - - err = socket_mt_enable_defrag(par->net, par->family); - if (err) - return err; if (info->flags & ~XT_SOCKET_FLAGS_V1) { pr_info_ratelimited("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1); return -EINVAL; } - return 0; + + return socket_mt_enable_defrag(par->net, par->family); } static int socket_mt_v2_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo2 *info = (struct xt_socket_mtinfo2 *) par->matchinfo; - int err; - - err = socket_mt_enable_defrag(par->net, par->family); - if (err) - return err; if (info->flags & ~XT_SOCKET_FLAGS_V2) { pr_info_ratelimited("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2); return -EINVAL; } - return 0; + + return socket_mt_enable_defrag(par->net, par->family); } static int socket_mt_v3_check(const struct xt_mtchk_param *par) { const struct xt_socket_mtinfo3 *info = (struct xt_socket_mtinfo3 *)par->matchinfo; - int err; - err = socket_mt_enable_defrag(par->net, par->family); - if (err) - return err; if (info->flags & ~XT_SOCKET_FLAGS_V3) { pr_info_ratelimited("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V3); return -EINVAL; } - return 0; + + return socket_mt_enable_defrag(par->net, par->family); } static void socket_mt_destroy(const struct xt_mtdtor_param *par) diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index d9d74011bb64..2065fce8ef81 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * xt_time * Copyright © CC Computer Consultants GmbH, 2007 @@ -6,8 +7,6 @@ * This is a module which is used for time matching * It is using some modified code from dietlibc (localtime() function) * that you can find at https://www.fefe.de/dietlibc/ - * This file is distributed under the terms of the GNU General Public - * License (GPL). Copies of the GPL can be obtained from gnu.org/gpl. */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index aba847902be5..2aeb0680807d 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2108,8 +2108,8 @@ int __netlink_change_ngroups(struct sock *sk, unsigned int groups) * This changes the number of multicast groups that are available * on a certain netlink family. Note that it is not possible to * change the number of groups to below 32. Also note that it does - * not implicitly call netlink_clear_multicast_users() when the - * number of groups is reduced. + * not implicitly clear listeners from groups that are removed when + * the number of groups is reduced. * * @sk: The kernel netlink socket, as returned by netlink_kernel_create(). * @groups: The new number of groups. diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index a23d4c51c089..d251d894afd4 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -92,10 +92,8 @@ static unsigned long mc_group_start = 0x3 | BIT(GENL_ID_CTRL) | static unsigned long *mc_groups = &mc_group_start; static unsigned long mc_groups_longs = 1; -/* We need the last attribute with non-zero ID therefore a 2-entry array */ static struct nla_policy genl_policy_reject_all[] = { { .type = NLA_REJECT }, - { .type = NLA_REJECT }, }; static int genl_ctrl_event(int event, const struct genl_family *family, @@ -106,13 +104,10 @@ static void genl_op_fill_in_reject_policy(const struct genl_family *family, struct genl_ops *op) { - BUILD_BUG_ON(ARRAY_SIZE(genl_policy_reject_all) - 1 != 1); - if (op->policy || op->cmd < family->resv_start_op) return; op->policy = genl_policy_reject_all; - op->maxattr = 1; } static void @@ -123,7 +118,6 @@ genl_op_fill_in_reject_policy_split(const struct genl_family *family, return; op->policy = genl_policy_reject_all; - op->maxattr = 1; } static const struct genl_family *genl_family_find_byid(unsigned int id) @@ -250,6 +244,7 @@ genl_get_cmd_split(u32 cmd, u8 flag, const struct genl_family *family, if (family->split_ops[i].cmd == cmd && family->split_ops[i].flags & flag) { *op = family->split_ops[i]; + genl_op_fill_in_reject_policy_split(family, op); return 0; } @@ -934,12 +929,17 @@ genl_family_rcv_msg_attrs_parse(const struct genl_family *family, struct nlattr **attrbuf; int err; - if (!ops->maxattr) + if (!ops->policy) return NULL; - attrbuf = kmalloc_objs(struct nlattr *, ops->maxattr + 1); - if (!attrbuf) - return ERR_PTR(-ENOMEM); + if (ops->maxattr) { + attrbuf = kmalloc_objs(struct nlattr *, ops->maxattr + 1); + if (!attrbuf) + return ERR_PTR(-ENOMEM); + } else { + /* Reject all policy, __nlmsg_parse() will just validate */ + attrbuf = NULL; + } err = __nlmsg_parse(nlh, hdrlen, attrbuf, ops->maxattr, ops->policy, validate, extack); diff --git a/net/netlink/policy.c b/net/netlink/policy.c index f39cd7cc4fb5..08b006c48f06 100644 --- a/net/netlink/policy.c +++ b/net/netlink/policy.c @@ -31,7 +31,7 @@ static int add_policy(struct netlink_policy_dump_state **statep, struct netlink_policy_dump_state *state = *statep; unsigned int old_n_alloc, n_alloc, i; - if (!policy || !maxtype) + if (!policy) return 0; for (i = 0; i < state->n_alloc; i++) { @@ -85,7 +85,7 @@ int netlink_policy_dump_get_policy_idx(struct netlink_policy_dump_state *state, { unsigned int i; - if (WARN_ON(!policy || !maxtype)) + if (WARN_ON(!policy)) return 0; for (i = 0; i < state->n_alloc; i++) { diff --git a/net/nfc/digital_technology.c b/net/nfc/digital_technology.c index 63f1b721c71d..ae63c5eb06fa 100644 --- a/net/nfc/digital_technology.c +++ b/net/nfc/digital_technology.c @@ -424,6 +424,12 @@ static void digital_in_recv_sdd_res(struct nfc_digital_dev *ddev, void *arg, size = 4; } + if (target->nfcid1_len + size > NFC_NFCID1_MAXSIZE) { + PROTOCOL_ERR("4.7.2.1"); + rc = -EPROTO; + goto exit; + } + memcpy(target->nfcid1 + target->nfcid1_len, sdd_res->nfcid1 + offset, size); target->nfcid1_len += size; diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 366d7566308c..db5bc6a878dd 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -1091,6 +1091,7 @@ static void nfc_llcp_recv_hdlc(struct nfc_llcp_local *local, if (sk->sk_state == LLCP_CLOSED) { release_sock(sk); nfc_llcp_sock_put(llcp_sock); + return; } /* Pass the payload upstream */ @@ -1182,6 +1183,7 @@ static void nfc_llcp_recv_disc(struct nfc_llcp_local *local, if (sk->sk_state == LLCP_CLOSED) { release_sock(sk); nfc_llcp_sock_put(llcp_sock); + return; } if (sk->sk_state == LLCP_CONNECTED) { diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 792ca44a461d..140388a18ae0 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -21,6 +21,7 @@ #include <net/ip.h> #include <net/ipv6.h> #include <net/ip6_fib.h> +#include <net/ip6_route.h> #include <net/checksum.h> #include <net/dsfield.h> #include <net/mpls.h> @@ -810,7 +811,7 @@ static void ovs_fragment(struct net *net, struct vport *vport, skb_dst_set_noref(skb, &ovs_rt.dst); IP6CB(skb)->frag_max_size = mru; - ipv6_stub->ipv6_fragment(net, skb->sk, skb, ovs_vport_output); + ip6_fragment(net, skb->sk, skb, ovs_vport_output); refdst_drop(orig_dst); } else { WARN_ONCE(1, "Failed fragment ->%s: eth=%04x, MRU=%d, MTU=%d.", diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 61c6a5f77c2e..67d5b8c0fe79 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -167,7 +167,7 @@ static struct table_instance *table_instance_alloc(int new_size) ti->n_buckets = new_size; ti->node_ver = 0; - get_random_bytes(&ti->hash_seed, sizeof(u32)); + ti->hash_seed = get_random_u32(); return ti; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index f642a95996e1..4b043241fd56 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -49,6 +49,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/ethtool.h> +#include <linux/uio.h> #include <linux/filter.h> #include <linux/types.h> #include <linux/mm.h> @@ -4051,7 +4052,7 @@ packet_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, } static int packet_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) + sockopt_t *opt) { int len; int val, lv = sizeof(val); @@ -4065,8 +4066,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, if (level != SOL_PACKET) return -ENOPROTOOPT; - if (get_user(len, optlen)) - return -EFAULT; + len = opt->optlen; if (len < 0) return -EINVAL; @@ -4115,7 +4115,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, len = sizeof(int); if (len < sizeof(int)) return -EINVAL; - if (copy_from_user(&val, optval, len)) + if (copy_from_iter(&val, len, &opt->iter_in) != len) return -EFAULT; switch (val) { case TPACKET_V1: @@ -4171,9 +4171,8 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, if (len > lv) len = lv; - if (put_user(len, optlen)) - return -EFAULT; - if (copy_to_user(optval, data, len)) + opt->optlen = len; + if (copy_to_iter(data, len, &opt->iter_out) != len) return -EFAULT; return 0; } @@ -4672,7 +4671,7 @@ static const struct proto_ops packet_ops = { .listen = sock_no_listen, .shutdown = sock_no_shutdown, .setsockopt = packet_setsockopt, - .getsockopt = packet_getsockopt, + .getsockopt_iter = packet_getsockopt, .sendmsg = packet_sendmsg, .recvmsg = packet_recvmsg, .mmap = packet_mmap, diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 976fe250b509..22cf23f06832 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -109,7 +109,7 @@ static int pn_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) } static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct sk_buff *skb = NULL; struct sockaddr_pn sa; @@ -143,7 +143,7 @@ static int pn_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (msg->msg_name != NULL) { __sockaddr_check_size(sizeof(sa)); memcpy(msg->msg_name, &sa, sizeof(sa)); - *addr_len = sizeof(sa); + msg->msg_namelen = sizeof(sa); } out: diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 120e711ea78c..4dbf0914df7d 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -1262,7 +1262,7 @@ struct sk_buff *pep_read(struct sock *sk) } static int pep_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct sk_buff *skb; int err; diff --git a/net/psp/psp_main.c b/net/psp/psp_main.c index d4c04c923c5a..9508b6c38003 100644 --- a/net/psp/psp_main.c +++ b/net/psp/psp_main.c @@ -202,7 +202,7 @@ static void psp_write_headers(struct net *net, struct sk_buff *skb, __be32 spi, * reciprocal divide. */ hash ^= hash << 16; - uh->source = htons((((u64)hash * (max - min)) >> 32) + min); + uh->source = htons(reciprocal_scale(hash, max - min + 1) + min); } else { uh->source = udp_flow_src_port(net, skb, 0, 0, false); } diff --git a/net/psp/psp_sock.c b/net/psp/psp_sock.c index a85b0ed88842..07dc4cf741f3 100644 --- a/net/psp/psp_sock.c +++ b/net/psp/psp_sock.c @@ -291,4 +291,3 @@ void psp_reply_set_decrypted(const struct sock *sk, struct sk_buff *skb) skb->decrypted = 1; rcu_read_unlock(); } -EXPORT_IPV6_MOD_GPL(psp_reply_set_decrypted); diff --git a/net/qrtr/af_qrtr.c b/net/qrtr/af_qrtr.c index d77e9c8212da..7cec6a7859b0 100644 --- a/net/qrtr/af_qrtr.c +++ b/net/qrtr/af_qrtr.c @@ -361,7 +361,7 @@ static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, } hdr->size = cpu_to_le32(len); - hdr->confirm_rx = !!confirm_rx; + hdr->confirm_rx = cpu_to_le32(!!confirm_rx); rc = skb_put_padto(skb, ALIGN(len, 4) + sizeof(*hdr)); @@ -462,7 +462,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) cb->type = le32_to_cpu(v1->type); cb->src_node = le32_to_cpu(v1->src_node_id); cb->src_port = le32_to_cpu(v1->src_port_id); - cb->confirm_rx = !!v1->confirm_rx; + cb->confirm_rx = !!le32_to_cpu(v1->confirm_rx); cb->dst_node = le32_to_cpu(v1->dst_node_id); cb->dst_port = le32_to_cpu(v1->dst_port_id); diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c index 3203b2220860..b3f9bbcf9ab9 100644 --- a/net/qrtr/ns.c +++ b/net/qrtr/ns.c @@ -22,8 +22,10 @@ static struct { struct socket *sock; struct sockaddr_qrtr bcast_sq; struct list_head lookups; + u32 lookup_count; struct workqueue_struct *workqueue; struct work_struct work; + void (*saved_data_ready)(struct sock *sk); int local_node; } qrtr_ns; @@ -67,8 +69,19 @@ struct qrtr_server { struct qrtr_node { unsigned int id; struct xarray servers; + u32 server_count; }; +/* Max nodes, server, lookup limits are chosen based on the current platform + * requirements. If the requirement changes in the future, these values can be + * increased. + */ +#define QRTR_NS_MAX_NODES 64 +#define QRTR_NS_MAX_SERVERS 256 +#define QRTR_NS_MAX_LOOKUPS 64 + +static u8 node_count; + static struct qrtr_node *node_get(unsigned int node_id) { struct qrtr_node *node; @@ -77,6 +90,11 @@ static struct qrtr_node *node_get(unsigned int node_id) if (node) return node; + if (node_count >= QRTR_NS_MAX_NODES) { + pr_err_ratelimited("QRTR clients exceed max node limit!\n"); + return NULL; + } + /* If node didn't exist, allocate and insert it to the tree */ node = kzalloc_obj(*node); if (!node) @@ -90,6 +108,8 @@ static struct qrtr_node *node_get(unsigned int node_id) return NULL; } + node_count++; + return node; } @@ -229,6 +249,17 @@ static struct qrtr_server *server_add(unsigned int service, if (!service || !port) return NULL; + node = node_get(node_id); + if (!node) + return NULL; + + /* Make sure the new servers per port are capped at the maximum value */ + old = xa_load(&node->servers, port); + if (!old && node->server_count >= QRTR_NS_MAX_SERVERS) { + pr_err_ratelimited("QRTR client node %u exceeds max server limit!\n", node_id); + return NULL; + } + srv = kzalloc_obj(*srv); if (!srv) return NULL; @@ -238,10 +269,6 @@ static struct qrtr_server *server_add(unsigned int service, srv->node = node_id; srv->port = port; - node = node_get(node_id); - if (!node) - goto err; - /* Delete the old server on the same port */ old = xa_store(&node->servers, port, srv, GFP_KERNEL); if (old) { @@ -252,6 +279,8 @@ static struct qrtr_server *server_add(unsigned int service, } else { kfree(old); } + } else { + node->server_count++; } trace_qrtr_ns_server_add(srv->service, srv->instance, @@ -292,6 +321,7 @@ static int server_del(struct qrtr_node *node, unsigned int port, bool bcast) } kfree(srv); + node->server_count--; return 0; } @@ -341,7 +371,7 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) struct qrtr_node *node; unsigned long index; struct kvec iv; - int ret; + int ret = 0; iv.iov_base = &pkt; iv.iov_len = sizeof(pkt); @@ -356,8 +386,10 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) /* Advertise the removal of this client to all local servers */ local_node = node_get(qrtr_ns.local_node); - if (!local_node) - return 0; + if (!local_node) { + ret = 0; + goto delete_node; + } memset(&pkt, 0, sizeof(pkt)); pkt.cmd = cpu_to_le32(QRTR_TYPE_BYE); @@ -374,10 +406,19 @@ static int ctrl_cmd_bye(struct sockaddr_qrtr *from) ret = kernel_sendmsg(qrtr_ns.sock, &msg, &iv, 1, sizeof(pkt)); if (ret < 0 && ret != -ENODEV) { pr_err("failed to send bye cmd\n"); - return ret; + goto delete_node; } } - return 0; + + /* Ignore -ENODEV */ + ret = 0; + +delete_node: + xa_erase(&nodes, from->sq_node); + kfree(node); + node_count--; + + return ret; } static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, @@ -417,6 +458,7 @@ static int ctrl_cmd_del_client(struct sockaddr_qrtr *from, list_del(&lookup->li); kfree(lookup); + qrtr_ns.lookup_count--; } /* Remove the server belonging to this port but don't broadcast @@ -534,6 +576,11 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, if (from->sq_node != qrtr_ns.local_node) return -EINVAL; + if (qrtr_ns.lookup_count >= QRTR_NS_MAX_LOOKUPS) { + pr_err_ratelimited("QRTR client node exceeds max lookup limit!\n"); + return -ENOSPC; + } + lookup = kzalloc_obj(*lookup); if (!lookup) return -ENOMEM; @@ -542,6 +589,7 @@ static int ctrl_cmd_new_lookup(struct sockaddr_qrtr *from, lookup->service = service; lookup->instance = instance; list_add_tail(&lookup->li, &qrtr_ns.lookups); + qrtr_ns.lookup_count++; memset(&filter, 0, sizeof(filter)); filter.service = service; @@ -582,6 +630,7 @@ static void ctrl_cmd_del_lookup(struct sockaddr_qrtr *from, list_del(&lookup->li); kfree(lookup); + qrtr_ns.lookup_count--; } } @@ -670,7 +719,7 @@ static void qrtr_ns_worker(struct work_struct *work) } if (ret < 0) - pr_err("failed while handling packet from %d:%d", + pr_err_ratelimited("failed while handling packet from %d:%d", sq.sq_node, sq.sq_port); } @@ -709,6 +758,7 @@ int qrtr_ns_init(void) goto err_sock; } + qrtr_ns.saved_data_ready = qrtr_ns.sock->sk->sk_data_ready; qrtr_ns.sock->sk->sk_data_ready = qrtr_ns_data_ready; sq.sq_port = QRTR_PORT_CTRL; @@ -749,6 +799,10 @@ int qrtr_ns_init(void) return 0; err_wq: + write_lock_bh(&qrtr_ns.sock->sk->sk_callback_lock); + qrtr_ns.sock->sk->sk_data_ready = qrtr_ns.saved_data_ready; + write_unlock_bh(&qrtr_ns.sock->sk->sk_callback_lock); + destroy_workqueue(qrtr_ns.workqueue); err_sock: sock_release(qrtr_ns.sock); @@ -758,7 +812,12 @@ EXPORT_SYMBOL_GPL(qrtr_ns_init); void qrtr_ns_remove(void) { + write_lock_bh(&qrtr_ns.sock->sk->sk_callback_lock); + qrtr_ns.sock->sk->sk_data_ready = qrtr_ns.saved_data_ready; + write_unlock_bh(&qrtr_ns.sock->sk->sk_callback_lock); + cancel_work_sync(&qrtr_ns.work); + synchronize_net(); destroy_workqueue(qrtr_ns.workqueue); /* sock_release() expects the two references that were put during diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index b396c673dfaf..76f625986a7f 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -357,7 +357,8 @@ static int rds_cong_monitor(struct rds_sock *rs, sockptr_t optval, int optlen) return ret; } -static int rds_set_transport(struct rds_sock *rs, sockptr_t optval, int optlen) +static int rds_set_transport(struct net *net, struct rds_sock *rs, + sockptr_t optval, int optlen) { int t_type; @@ -373,6 +374,10 @@ static int rds_set_transport(struct rds_sock *rs, sockptr_t optval, int optlen) if (t_type < 0 || t_type >= RDS_TRANS_COUNT) return -EINVAL; + /* RDS/IB is restricted to the initial network namespace */ + if (t_type != RDS_TRANS_TCP && !net_eq(net, &init_net)) + return -EPROTOTYPE; + rs->rs_transport = rds_trans_get(t_type); return rs->rs_transport ? 0 : -ENOPROTOOPT; @@ -433,6 +438,7 @@ static int rds_setsockopt(struct socket *sock, int level, int optname, sockptr_t optval, unsigned int optlen) { struct rds_sock *rs = rds_sk_to_rs(sock->sk); + struct net *net = sock_net(sock->sk); int ret; if (level != SOL_RDS) { @@ -461,7 +467,7 @@ static int rds_setsockopt(struct socket *sock, int level, int optname, break; case SO_RDS_TRANSPORT: lock_sock(sock->sk); - ret = rds_set_transport(rs, optval, optlen); + ret = rds_set_transport(net, rs, optval, optlen); release_sock(sock->sk); break; case SO_TIMESTAMP_OLD: diff --git a/net/rds/ib.c b/net/rds/ib.c index ac6affa33ce7..39f87272e071 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -401,8 +401,8 @@ static void rds6_ib_ic_info(struct socket *sock, unsigned int len, * allowed to influence which paths have priority. We could call userspace * asserting this policy "routing". */ -static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, - __u32 scope_id) +static int rds_ib_laddr_check_cm(struct net *net, const struct in6_addr *addr, + __u32 scope_id) { int ret; struct rdma_cm_id *cm_id; @@ -487,6 +487,26 @@ out: return ret; } +static int rds_ib_laddr_check(struct net *net, const struct in6_addr *addr, + __u32 scope_id) +{ + struct rds_ib_device *rds_ibdev = NULL; + + /* RDS/IB is restricted to the initial network namespace */ + if (!net_eq(net, &init_net)) + return -EPROTOTYPE; + + if (ipv6_addr_v4mapped(addr)) { + rds_ibdev = rds_ib_get_device(addr->s6_addr32[3]); + if (rds_ibdev) { + rds_ib_dev_put(rds_ibdev); + return 0; + } + } + + return rds_ib_laddr_check_cm(net, addr, scope_id); +} + static void rds_ib_unregister_client(void) { ib_unregister_client(&rds_ib_client); diff --git a/net/rds/ib.h b/net/rds/ib.h index 8ef3178ed4d6..5ff346a1e8ba 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -381,6 +381,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, __rds_ib_conn_error(conn, KERN_WARNING "RDS/IB: " fmt) /* ib_rdma.c */ +struct rds_ib_device *rds_ib_get_device(__be32 ipaddr); int rds_ib_update_ipaddr(struct rds_ib_device *rds_ibdev, struct in6_addr *ipaddr); void rds_ib_add_conn(struct rds_ib_device *rds_ibdev, struct rds_connection *conn); diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 2cfec252eeac..9594ea245f7f 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -43,7 +43,7 @@ struct workqueue_struct *rds_ib_mr_wq; static void rds_ib_odp_mr_worker(struct work_struct *work); -static struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) +struct rds_ib_device *rds_ib_get_device(__be32 ipaddr) { struct rds_ib_device *rds_ibdev; struct rds_ib_ipaddr *i_ipaddr; diff --git a/net/rds/send.c b/net/rds/send.c index a1039e422a38..d8b14ff9d366 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -284,7 +284,7 @@ restart: * * cp_xmit_rm holds a ref while we're sending this message down * the connection. We can use this ref while holding the - * send_sem.. rds_send_reset() is serialized with it. + * send_sem.. rds_send_path_reset() is serialized with it. */ if (!rm) { unsigned int len; diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c index 0276b393f0e5..ca4f217ef3d3 100644 --- a/net/rose/rose_in.c +++ b/net/rose/rose_in.c @@ -101,7 +101,6 @@ static int rose_state2_machine(struct sock *sk, struct sk_buff *skb, int framety */ static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int frametype, int ns, int nr, int q, int d, int m) { - enum skb_drop_reason dr; /* ignored */ struct rose_sock *rose = rose_sk(sk); int queued = 0; @@ -163,7 +162,7 @@ static int rose_state3_machine(struct sock *sk, struct sk_buff *skb, int framety rose_frames_acked(sk, nr); if (ns == rose->vr) { rose_start_idletimer(sk); - if (!sk_filter_trim_cap(sk, skb, ROSE_MIN_LEN, &dr) && + if (!sk_filter_trim_cap(sk, skb, ROSE_MIN_LEN) && __sock_queue_rcv_skb(sk, skb) == 0) { rose->vr = (rose->vr + 1) % ROSE_MODULUS; queued = 1; @@ -271,6 +270,13 @@ int rose_process_rx_frame(struct sock *sk, struct sk_buff *skb) frametype = rose_decode(skb, &ns, &nr, &q, &d, &m); + /* + * ROSE_CLEAR_REQUEST carries cause and diagnostic in bytes 3..4. + * Reject a malformed frame that is too short to contain them. + */ + if (frametype == ROSE_CLEAR_REQUEST && skb->len < 5) + return 0; + switch (rose->state) { case ROSE_STATE_1: queued = rose_state1_machine(sk, skb, frametype); diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index f60b81c66078..43416b3026fb 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -25,7 +25,7 @@ if AF_RXRPC config AF_RXRPC_IPV6 bool "IPv6 support for RxRPC" - depends on (IPV6 = m && AF_RXRPC = m) || (IPV6 = y && AF_RXRPC) + depends on IPV6 help Say Y here to allow AF_RXRPC to use IPV6 UDP as well as IPV4 UDP as its network transport. diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 870e59bf06af..88cad087f13b 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -16,8 +16,6 @@ #include <net/udp.h> #include "ar-internal.h" -extern int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); - ssize_t do_udp_sendmsg(struct socket *socket, struct msghdr *msg, size_t len) { struct sockaddr *sa = msg->msg_name; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 7d5e50c921a0..6158e13c98d3 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -328,9 +328,13 @@ static int tcf_ct_flow_table_get(struct net *net, struct tcf_ct_params *params) int err = -ENOMEM; mutex_lock(&zones_mutex); - ct_ft = rhashtable_lookup_fast(&zones_ht, &key, zones_params); - if (ct_ft && refcount_inc_not_zero(&ct_ft->ref)) + rcu_read_lock(); + ct_ft = rhashtable_lookup(&zones_ht, &key, zones_params); + if (ct_ft && refcount_inc_not_zero(&ct_ft->ref)) { + rcu_read_unlock(); goto out_unlock; + } + rcu_read_unlock(); ct_ft = kzalloc_obj(*ct_ft); if (!ct_ft) diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 26070c892305..88f8a32fab2b 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -59,18 +59,14 @@ struct fl_flow_key { struct flow_dissector_key_eth_addrs eth; struct flow_dissector_key_vlan vlan; struct flow_dissector_key_vlan cvlan; - union { - struct flow_dissector_key_ipv4_addrs ipv4; - struct flow_dissector_key_ipv6_addrs ipv6; - }; + struct flow_dissector_key_ipv4_addrs ipv4; + struct flow_dissector_key_ipv6_addrs ipv6; struct flow_dissector_key_ports tp; struct flow_dissector_key_icmp icmp; struct flow_dissector_key_arp arp; struct flow_dissector_key_keyid enc_key_id; - union { - struct flow_dissector_key_ipv4_addrs enc_ipv4; - struct flow_dissector_key_ipv6_addrs enc_ipv6; - }; + struct flow_dissector_key_ipv4_addrs enc_ipv4; + struct flow_dissector_key_ipv6_addrs enc_ipv6; struct flow_dissector_key_ports enc_tp; struct flow_dissector_key_mpls mpls; struct flow_dissector_key_tcp tcp; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 23884ef8b80c..646a730dca93 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -74,9 +74,13 @@ TC_INDIRECT_SCOPE int fw_classify(struct sk_buff *skb, } } } else { - struct Qdisc *q = tcf_block_q(tp->chain->block); + struct Qdisc *q; /* Old method: classify the packet using its skb mark. */ + if (tcf_block_shared(tp->chain->block)) + return -1; + + q = tcf_block_q(tp->chain->block); if (id && (TC_H_MAJ(id) == 0 || !(TC_H_MAJ(id ^ q->handle)))) { res->classid = id; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 9241c025aa74..8f30cc82181d 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -852,7 +852,10 @@ static struct tc_u_knode *u32_init_knode(struct net *net, struct tcf_proto *tp, /* Similarly success statistics must be moved as pointers */ new->pcpu_success = n->pcpu_success; #endif - memcpy(&new->sel, s, struct_size(s, keys, s->nkeys)); + unsafe_memcpy(&new->sel, s, struct_size(s, keys, s->nkeys), + /* A composite flex-array structure destination, + * which was correctly sized with kzalloc_flex(), + * above. */); if (tcf_exts_init(&new->exts, net, TCA_U32_ACT, TCA_U32_POLICE)) { kfree(new); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index cc43e3f7574f..ed869a5ffc73 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1120,7 +1120,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, } if (dev->flags & IFF_UP) - dev_deactivate(dev); + dev_deactivate(dev, false); qdisc_offload_graft_root(dev, new, old, extack); @@ -2479,7 +2479,8 @@ static struct pernet_operations psched_net_ops = { }; #if IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) -DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper); +DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper_act); +DEFINE_STATIC_KEY_FALSE(tc_skip_wrapper_cls); #endif static const struct rtnl_msg_handler psched_rtnl_msg_handlers[] __initconst = { diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 9efe23f8371b..ffea9fbd522d 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -497,13 +497,13 @@ static bool cobalt_queue_empty(struct cobalt_vars *vars, /* Call this with a freshly dequeued packet for possible congestion marking. * Returns true as an instruction to drop the packet, false for delivery. */ -static enum skb_drop_reason cobalt_should_drop(struct cobalt_vars *vars, - struct cobalt_params *p, - ktime_t now, - struct sk_buff *skb, - u32 bulk_flows) +static enum qdisc_drop_reason cobalt_should_drop(struct cobalt_vars *vars, + struct cobalt_params *p, + ktime_t now, + struct sk_buff *skb, + u32 bulk_flows) { - enum skb_drop_reason reason = SKB_NOT_DROPPED_YET; + enum qdisc_drop_reason reason = QDISC_DROP_UNSPEC; bool next_due, over_target; ktime_t schedule; u64 sojourn; @@ -548,7 +548,7 @@ static enum skb_drop_reason cobalt_should_drop(struct cobalt_vars *vars, if (next_due && vars->dropping) { /* Use ECN mark if possible, otherwise drop */ if (!(vars->ecn_marked = INET_ECN_set_ce(skb))) - reason = SKB_DROP_REASON_QDISC_CONGESTED; + reason = QDISC_DROP_CONGESTED; vars->count++; if (!vars->count) @@ -571,14 +571,14 @@ static enum skb_drop_reason cobalt_should_drop(struct cobalt_vars *vars, } /* Simple BLUE implementation. Lack of ECN is deliberate. */ - if (vars->p_drop && reason == SKB_NOT_DROPPED_YET && + if (vars->p_drop && reason == QDISC_DROP_UNSPEC && get_random_u32() < vars->p_drop) - reason = SKB_DROP_REASON_CAKE_FLOOD; + reason = QDISC_DROP_FLOOD_PROTECTION; /* Overload the drop_next field as an activity timeout */ if (!vars->count) vars->drop_next = ktime_add_ns(now, p->interval); - else if (ktime_to_ns(schedule) > 0 && reason == SKB_NOT_DROPPED_YET) + else if (ktime_to_ns(schedule) > 0 && reason == QDISC_DROP_UNSPEC) vars->drop_next = now; return reason; @@ -1604,7 +1604,7 @@ static unsigned int cake_drop(struct Qdisc *sch, struct sk_buff **to_free) if (q->config->rate_flags & CAKE_FLAG_INGRESS) cake_advance_shaper(q, b, skb, now, true); - qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT); + qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT); sch->q.qlen--; cake_heapify(q, 0); @@ -2004,7 +2004,7 @@ static struct sk_buff *cake_dequeue(struct Qdisc *sch) { struct cake_sched_data *q = qdisc_priv(sch); struct cake_tin_data *b = &q->tins[q->cur_tin]; - enum skb_drop_reason reason; + enum qdisc_drop_reason reason; ktime_t now = ktime_get(); struct cake_flow *flow; struct list_head *head; @@ -2223,7 +2223,7 @@ retry: !!(q->config->rate_flags & CAKE_FLAG_INGRESS))); /* Last packet in queue may be marked, shouldn't be dropped */ - if (reason == SKB_NOT_DROPPED_YET || !flow->head) + if (reason == QDISC_DROP_UNSPEC || !flow->head) break; /* drop this packet, get another one */ diff --git a/net/sched/sch_codel.c b/net/sched/sch_codel.c index c6551578f1cf..317aae0ec7bd 100644 --- a/net/sched/sch_codel.c +++ b/net/sched/sch_codel.c @@ -52,7 +52,7 @@ static void drop_func(struct sk_buff *skb, void *ctx) { struct Qdisc *sch = ctx; - qdisc_dequeue_drop(sch, skb, SKB_DROP_REASON_QDISC_CONGESTED); + qdisc_dequeue_drop(sch, skb, QDISC_DROP_CONGESTED); qdisc_qstats_drop(sch); } @@ -85,9 +85,8 @@ static int codel_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, return qdisc_enqueue_tail(skb, sch); } q = qdisc_priv(sch); - q->drop_overlimit++; - return qdisc_drop_reason(skb, sch, to_free, - SKB_DROP_REASON_QDISC_OVERLIMIT); + WRITE_ONCE(q->drop_overlimit, q->drop_overlimit + 1); + return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT); } static const struct nla_policy codel_policy[TCA_CODEL_MAX + 1] = { @@ -222,18 +221,18 @@ static int codel_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { const struct codel_sched_data *q = qdisc_priv(sch); struct tc_codel_xstats st = { - .maxpacket = q->stats.maxpacket, - .count = q->vars.count, - .lastcount = q->vars.lastcount, - .drop_overlimit = q->drop_overlimit, - .ldelay = codel_time_to_us(q->vars.ldelay), - .dropping = q->vars.dropping, - .ecn_mark = q->stats.ecn_mark, - .ce_mark = q->stats.ce_mark, + .maxpacket = READ_ONCE(q->stats.maxpacket), + .count = READ_ONCE(q->vars.count), + .lastcount = READ_ONCE(q->vars.lastcount), + .drop_overlimit = READ_ONCE(q->drop_overlimit), + .ldelay = codel_time_to_us(READ_ONCE(q->vars.ldelay)), + .dropping = READ_ONCE(q->vars.dropping), + .ecn_mark = READ_ONCE(q->stats.ecn_mark), + .ce_mark = READ_ONCE(q->stats.ce_mark), }; - if (q->vars.dropping) { - codel_tdiff_t delta = q->vars.drop_next - codel_get_time(); + if (st.dropping) { + codel_tdiff_t delta = READ_ONCE(q->vars.drop_next) - codel_get_time(); if (delta >= 0) st.drop_next = codel_time_to_us(delta); diff --git a/net/sched/sch_dualpi2.c b/net/sched/sch_dualpi2.c index 6d7e6389758d..fe6f5e889625 100644 --- a/net/sched/sch_dualpi2.c +++ b/net/sched/sch_dualpi2.c @@ -393,13 +393,11 @@ static int dualpi2_enqueue_skb(struct sk_buff *skb, struct Qdisc *sch, qdisc_qstats_overlimit(sch); if (skb_in_l_queue(skb)) qdisc_qstats_overlimit(q->l_queue); - return qdisc_drop_reason(skb, sch, to_free, - SKB_DROP_REASON_QDISC_OVERLIMIT); + return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT); } if (q->drop_early && must_drop(sch, q, skb)) { - qdisc_drop_reason(skb, sch, to_free, - SKB_DROP_REASON_QDISC_CONGESTED); + qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_CONGESTED); return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS; } @@ -573,11 +571,11 @@ static int do_step_aqm(struct dualpi2_sched_data *q, struct sk_buff *skb, } static void drop_and_retry(struct dualpi2_sched_data *q, struct sk_buff *skb, - struct Qdisc *sch, enum skb_drop_reason reason) + struct Qdisc *sch, enum qdisc_drop_reason reason) { ++q->deferred_drops_cnt; q->deferred_drops_len += qdisc_pkt_len(skb); - kfree_skb_reason(skb, reason); + qdisc_dequeue_drop(sch, skb, reason); qdisc_qstats_drop(sch); } @@ -592,15 +590,13 @@ static struct sk_buff *dualpi2_qdisc_dequeue(struct Qdisc *sch) while ((skb = dequeue_packet(sch, q, &credit_change, now))) { if (!q->drop_early && must_drop(sch, q, skb)) { - drop_and_retry(q, skb, sch, - SKB_DROP_REASON_QDISC_CONGESTED); + drop_and_retry(q, skb, sch, QDISC_DROP_CONGESTED); continue; } if (skb_in_l_queue(skb) && do_step_aqm(q, skb, now)) { qdisc_qstats_drop(q->l_queue); - drop_and_retry(q, skb, sch, - SKB_DROP_REASON_DUALPI2_STEP_DROP); + drop_and_retry(q, skb, sch, QDISC_DROP_L4S_STEP_NON_ECN); continue; } @@ -917,6 +913,8 @@ static int dualpi2_init(struct Qdisc *sch, struct nlattr *opt, struct dualpi2_sched_data *q = qdisc_priv(sch); int err; + sch->flags |= TCQ_F_DEQUEUE_DROPS; + q->l_queue = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, 1), extack); if (!q->l_queue) diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c index 05084c9af48e..f2edcf872981 100644 --- a/net/sched/sch_fq.c +++ b/net/sched/sch_fq.c @@ -539,8 +539,6 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb, return unlikely((s64)skb->tstamp > (s64)(now + q->horizon)); } -#define FQDR(reason) SKB_DROP_REASON_FQ_##reason - static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -552,8 +550,7 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, band = fq_prio2band(q->prio2band, skb->priority & TC_PRIO_MAX); if (unlikely(q->band_pkt_count[band] >= sch->limit)) { q->stat_band_drops[band]++; - return qdisc_drop_reason(skb, sch, to_free, - FQDR(BAND_LIMIT)); + return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_BAND_LIMIT); } now = ktime_get_ns(); @@ -565,7 +562,7 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (q->horizon_drop) { q->stat_horizon_drops++; return qdisc_drop_reason(skb, sch, to_free, - FQDR(HORIZON_LIMIT)); + QDISC_DROP_HORIZON_LIMIT); } q->stat_horizon_caps++; skb->tstamp = now + q->horizon; @@ -579,7 +576,7 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (unlikely(f->qlen >= q->flow_plimit)) { q->stat_flows_plimit++; return qdisc_drop_reason(skb, sch, to_free, - FQDR(FLOW_LIMIT)); + QDISC_DROP_FLOW_LIMIT); } if (fq_flow_is_detached(f)) { @@ -604,7 +601,6 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch, return NET_XMIT_SUCCESS; } -#undef FQDR static void fq_check_throttled(struct fq_sched_data *q, u64 now) { diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 8181b52dd9a8..2a3d758f67ab 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -168,7 +168,7 @@ static unsigned int fq_codel_drop(struct Qdisc *sch, unsigned int max_packets, skb = dequeue_head(flow); len += qdisc_pkt_len(skb); mem += get_codel_cb(skb)->mem_usage; - tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_OVERLIMIT); + tcf_set_qdisc_drop_reason(skb, QDISC_DROP_OVERLIMIT); __qdisc_drop(skb, to_free); } while (++i < max_packets && len < threshold); @@ -275,7 +275,7 @@ static void drop_func(struct sk_buff *skb, void *ctx) { struct Qdisc *sch = ctx; - qdisc_dequeue_drop(sch, skb, SKB_DROP_REASON_QDISC_CONGESTED); + qdisc_dequeue_drop(sch, skb, QDISC_DROP_CONGESTED); qdisc_qstats_drop(sch); } diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index d8ac3519e937..154c70f489f2 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -130,7 +130,7 @@ static inline void flow_queue_add(struct fq_pie_flow *flow, static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_OVERLIMIT; + enum qdisc_drop_reason reason = QDISC_DROP_OVERLIMIT; struct fq_pie_sched_data *q = qdisc_priv(sch); struct fq_pie_flow *sel_flow; int ret; @@ -162,7 +162,7 @@ static int fq_pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->overmemory++; } - reason = SKB_DROP_REASON_QDISC_CONGESTED; + reason = QDISC_DROP_CONGESTED; if (!pie_drop_early(sch, &q->p_params, &sel_flow->vars, sel_flow->backlog, skb->len)) { diff --git a/net/sched/sch_frag.c b/net/sched/sch_frag.c index d1d87dce7f3f..75ee52750919 100644 --- a/net/sched/sch_frag.c +++ b/net/sched/sch_frag.c @@ -6,6 +6,7 @@ #include <net/dst.h> #include <net/ip.h> #include <net/ip6_fib.h> +#include <net/ip6_route.h> struct sch_frag_data { unsigned long dst; @@ -127,8 +128,7 @@ static int sch_fragment(struct net *net, struct sk_buff *skb, skb_dst_set_noref(skb, &sch_frag_rt.dst); IP6CB(skb)->frag_max_size = mru; - ret = ipv6_stub->ipv6_fragment(net, skb->sk, skb, - sch_frag_xmit); + ret = ip6_fragment(net, skb->sk, skb, sch_frag_xmit); local_unlock_nested_bh(&sch_frag_data_storage.bh_lock); refdst_drop(orig_dst); } else { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 9e726c3bd86b..a93321db8fd7 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -25,11 +25,11 @@ #include <linux/skb_array.h> #include <linux/if_macvlan.h> #include <linux/bpf.h> +#include <trace/events/qdisc.h> #include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/dst.h> #include <net/hotdata.h> -#include <trace/events/qdisc.h> #include <trace/events/net.h> #include <net/xfrm.h> @@ -37,6 +37,31 @@ const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops; EXPORT_SYMBOL(default_qdisc_ops); +void __tcf_kfree_skb_list(struct sk_buff *skb, struct Qdisc *q, + struct netdev_queue *txq, struct net_device *dev) +{ + while (skb) { + u32 reason = tc_skb_cb(skb)->drop_reason; + struct sk_buff *next = skb->next; + enum skb_drop_reason skb_reason; + + prefetch(next); + /* TC classifier and qdisc share drop_reason storage. + * Check subsystem mask to identify qdisc drop reasons, + * else pass through skb_drop_reason set by TC classifier. + */ + if ((reason & SKB_DROP_REASON_SUBSYS_MASK) == __QDISC_DROP_REASON) { + trace_qdisc_drop(q, txq, dev, skb, (enum qdisc_drop_reason)reason); + skb_reason = SKB_DROP_REASON_QDISC_DROP; + } else { + skb_reason = (enum skb_drop_reason)reason; + } + kfree_skb_reason(skb, skb_reason); + skb = next; + } +} +EXPORT_SYMBOL(__tcf_kfree_skb_list); + static void qdisc_maybe_clear_missed(struct Qdisc *q, const struct netdev_queue *txq) { @@ -741,7 +766,7 @@ static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc, err = skb_array_produce(q, skb); if (unlikely(err)) { - tcf_set_drop_reason(skb, SKB_DROP_REASON_QDISC_OVERLIMIT); + tcf_set_qdisc_drop_reason(skb, QDISC_DROP_OVERLIMIT); if (qdisc_is_percpu_stats(qdisc)) return qdisc_drop_cpu(skb, qdisc, to_free); @@ -825,7 +850,7 @@ static void pfifo_fast_reset(struct Qdisc *qdisc) continue; while ((skb = __skb_array_consume(q)) != NULL) - kfree_skb(skb); + rtnl_kfree_skbs(skb, skb); } if (qdisc_is_percpu_stats(qdisc)) { @@ -1318,11 +1343,12 @@ static bool some_qdisc_is_busy(struct net_device *dev) /** * dev_deactivate_many - deactivate transmissions on several devices * @head: list of devices to deactivate + * @reset_needed: qdisc should be reset if true. * * This function returns only when all outstanding transmissions * have completed, unless all devices are in dismantle phase. */ -void dev_deactivate_many(struct list_head *head) +void dev_deactivate_many(struct list_head *head, bool reset_needed) { bool sync_needed = false; struct net_device *dev; @@ -1341,11 +1367,14 @@ void dev_deactivate_many(struct list_head *head) if (sync_needed) synchronize_net(); - list_for_each_entry(dev, head, close_list) { - netdev_for_each_tx_queue(dev, dev_reset_queue, NULL); + if (reset_needed) { + list_for_each_entry(dev, head, close_list) { + netdev_for_each_tx_queue(dev, dev_reset_queue, NULL); - if (dev_ingress_queue(dev)) - dev_reset_queue(dev, dev_ingress_queue(dev), NULL); + if (dev_ingress_queue(dev)) + dev_reset_queue(dev, dev_ingress_queue(dev), + NULL); + } } /* Wait for outstanding qdisc_run calls. */ @@ -1360,12 +1389,12 @@ void dev_deactivate_many(struct list_head *head) } } -void dev_deactivate(struct net_device *dev) +void dev_deactivate(struct net_device *dev, bool reset_needed) { LIST_HEAD(single); list_add(&dev->close_list, &single); - dev_deactivate_many(&single); + dev_deactivate_many(&single, reset_needed); list_del(&single); } EXPORT_SYMBOL(dev_deactivate); @@ -1421,7 +1450,7 @@ int dev_qdisc_change_tx_queue_len(struct net_device *dev) int ret = 0; if (up) - dev_deactivate(dev); + dev_deactivate(dev, false); for (i = 0; i < dev->num_tx_queues; i++) { ret = qdisc_change_tx_queue_len(dev, &dev->_tx[i]); diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c index 6706faba95b9..36d0cafac206 100644 --- a/net/sched/sch_gred.c +++ b/net/sched/sch_gred.c @@ -251,10 +251,10 @@ static int gred_enqueue(struct sk_buff *skb, struct Qdisc *sch, q->stats.pdrop++; drop: - return qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_OVERLIMIT); + return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT); congestion_drop: - qdisc_drop_reason(skb, sch, to_free, SKB_DROP_REASON_QDISC_CONGESTED); + qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_CONGESTED); return NET_XMIT_CN; } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index cf6cd4ccfa20..eb12381795ce 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1387,7 +1387,7 @@ htb_graft_helper(struct netdev_queue *dev_queue, struct Qdisc *new_q) struct Qdisc *old_q; if (dev->flags & IFF_UP) - dev_deactivate(dev); + dev_deactivate(dev, false); old_q = dev_graft_qdisc(dev_queue, new_q); if (new_q) new_q->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT; @@ -1421,7 +1421,7 @@ static void htb_offload_move_qdisc(struct Qdisc *sch, struct htb_class *cl_old, struct Qdisc *qdisc; if (dev->flags & IFF_UP) - dev_deactivate(dev); + dev_deactivate(dev, false); qdisc = dev_graft_qdisc(queue_old, NULL); WARN_ON(qdisc != cl_old->leaf.q); } diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 0ed199fa18f0..a0133a7b9d3b 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -201,7 +201,7 @@ static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, struct net_device *dev = qdisc_dev(sch); if (dev->flags & IFF_UP) - dev_deactivate(dev); + dev_deactivate(dev, false); *old = dev_graft_qdisc(dev_queue, new); if (new) diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index b83276409416..002add5ce9e0 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -469,7 +469,7 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, return -EINVAL; if (dev->flags & IFF_UP) - dev_deactivate(dev); + dev_deactivate(dev, false); *old = dev_graft_qdisc(dev_queue, new); diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 0a377313b6a9..16f3f629cb8e 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -85,7 +85,7 @@ EXPORT_SYMBOL_GPL(pie_drop_early); static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_OVERLIMIT; + enum qdisc_drop_reason reason = QDISC_DROP_OVERLIMIT; struct pie_sched_data *q = qdisc_priv(sch); bool enqueue = false; @@ -94,7 +94,7 @@ static int pie_qdisc_enqueue(struct sk_buff *skb, struct Qdisc *sch, goto out; } - reason = SKB_DROP_REASON_QDISC_CONGESTED; + reason = QDISC_DROP_CONGESTED; if (!pie_drop_early(sch, &q->params, &q->vars, sch->qstats.backlog, skb->len)) { diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 479c42d11083..c8d3d09f15e3 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -70,7 +70,7 @@ static int red_use_nodrop(struct red_sched_data *q) static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_CONGESTED; + enum qdisc_drop_reason reason = QDISC_DROP_CONGESTED; struct red_sched_data *q = qdisc_priv(sch); struct Qdisc *child = q->qdisc; unsigned int len; @@ -108,7 +108,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch, break; case RED_HARD_MARK: - reason = SKB_DROP_REASON_QDISC_OVERLIMIT; + reason = QDISC_DROP_OVERLIMIT; qdisc_qstats_overlimit(sch); if (red_use_harddrop(q) || !red_use_ecn(q)) { q->stats.forced_drop++; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index d2835f1168e1..013738662128 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -280,7 +280,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { - enum skb_drop_reason reason = SKB_DROP_REASON_QDISC_OVERLIMIT; + enum qdisc_drop_reason reason = QDISC_DROP_OVERLIMIT; struct sfb_sched_data *q = qdisc_priv(sch); unsigned int len = qdisc_pkt_len(skb); struct Qdisc *child = q->qdisc; @@ -381,7 +381,7 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, } r = get_random_u16() & SFB_MAX_PROB; - reason = SKB_DROP_REASON_QDISC_CONGESTED; + reason = QDISC_DROP_CONGESTED; if (unlikely(r < p_min)) { if (unlikely(p_min > SFB_MAX_PROB / 2)) { diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 503d7d3ca081..c3f3181dba54 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -302,7 +302,7 @@ drop: sfq_dec(q, x); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); - qdisc_drop(skb, sch, to_free); + qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_OVERLIMIT); return len; } @@ -363,7 +363,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) if (x == SFQ_EMPTY_SLOT) { x = q->dep[0].next; /* get a free slot */ if (x >= SFQ_MAX_FLOWS) - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_MAXFLOWS); q->ht[hash] = x; slot = &q->slots[x]; slot->hash = hash; @@ -420,14 +420,14 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) if (slot->qlen >= q->maxdepth) { congestion_drop: if (!sfq_headdrop(q)) - return qdisc_drop(skb, sch, to_free); + return qdisc_drop_reason(skb, sch, to_free, QDISC_DROP_FLOW_LIMIT); /* We know we have at least one packet in queue */ head = slot_dequeue_head(slot); delta = qdisc_pkt_len(head) - qdisc_pkt_len(skb); sch->qstats.backlog -= delta; slot->backlog -= delta; - qdisc_drop(head, sch, to_free); + qdisc_drop_reason(head, sch, to_free, QDISC_DROP_FLOW_LIMIT); slot_queue_add(slot, skb); qdisc_tree_reduce_backlog(sch, 0, delta); diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index f721c03514f6..8e3752811950 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -2184,7 +2184,7 @@ static int taprio_graft(struct Qdisc *sch, unsigned long cl, return -EINVAL; if (dev->flags & IFF_UP) - dev_deactivate(dev); + dev_deactivate(dev, false); /* In offload mode, the child Qdisc is directly attached to the netdev * TX queue, and thus, we need to keep its refcount elevated in order diff --git a/net/sctp/Kconfig b/net/sctp/Kconfig index e947646a380c..fc989a3791b3 100644 --- a/net/sctp/Kconfig +++ b/net/sctp/Kconfig @@ -6,7 +6,6 @@ menuconfig IP_SCTP tristate "The SCTP Protocol" depends on INET - depends on IPV6 || IPV6=n select CRYPTO_LIB_SHA1 select CRYPTO_LIB_SHA256 select CRYPTO_LIB_UTILS diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index f5a7d5a38755..a024c0843247 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -201,6 +201,7 @@ new_skb: cb->chunk = head_cb->chunk; cb->af = head_cb->af; + cb->encap_port = head_cb->encap_port; } } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 53a5c027f8e3..cd15b695607e 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -261,9 +261,11 @@ static int sctp_v6_xmit(struct sk_buff *skb, struct sctp_transport *t) skb_set_inner_ipproto(skb, IPPROTO_SCTP); label = ip6_make_flowlabel(sock_net(sk), skb, fl6->flowlabel, true, fl6); + local_bh_disable(); udp_tunnel6_xmit_skb(dst, sk, skb, NULL, &fl6->saddr, &fl6->daddr, tclass, ip6_dst_hoplimit(dst), label, sctp_sk(sk)->udp_port, t->encap_port, false, 0); + local_bh_enable(); return 0; } diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 828a59b8e7bf..5800e7ee7ea0 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1070,10 +1070,12 @@ static inline int sctp_v4_xmit(struct sk_buff *skb, struct sctp_transport *t) skb_reset_inner_mac_header(skb); skb_reset_inner_transport_header(skb); skb_set_inner_ipproto(skb, IPPROTO_SCTP); + local_bh_disable(); udp_tunnel_xmit_skb(dst_rtable(dst), sk, skb, fl4->saddr, fl4->daddr, dscp, ip4_dst_hoplimit(dst), df, sctp_sk(sk)->udp_port, t->encap_port, false, false, 0); + local_bh_enable(); return 0; } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 2c0017d058d4..de86ac088289 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -2727,7 +2727,7 @@ __u32 sctp_generate_tag(const struct sctp_endpoint *ep) __u32 x; do { - get_random_bytes(&x, sizeof(__u32)); + x = get_random_u32(); } while (x == 0); return x; @@ -2738,7 +2738,7 @@ __u32 sctp_generate_tsn(const struct sctp_endpoint *ep) { __u32 retval; - get_random_bytes(&retval, sizeof(__u32)); + retval = get_random_u32(); return retval; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 05fb00c9c335..d2665bbd41a2 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2087,7 +2087,7 @@ static int sctp_skb_pull(struct sk_buff *skb, int len) * 5 for complete description of the flags. */ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct sctp_ulpevent *event = NULL; struct sctp_sock *sp = sctp_sk(sk); @@ -2096,11 +2096,11 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int err = 0; int skb_len; - pr_debug("%s: sk:%p, msghdr:%p, len:%zd, flags:0x%x, addr_len:%p)\n", - __func__, sk, msg, len, flags, addr_len); + pr_debug("%s: sk:%p, msghdr:%p, len:%zd, flags:0x%x)\n", + __func__, sk, msg, len, flags); if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); + return inet_recv_error(sk, msg, len); if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) @@ -2141,9 +2141,9 @@ static int sctp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, sock_recv_cmsgs(msg, sk, head_skb); if (sctp_ulpevent_is_notification(event)) { msg->msg_flags |= MSG_NOTIFICATION; - sp->pf->event_msgname(event, msg->msg_name, addr_len); + sp->pf->event_msgname(event, msg->msg_name, &msg->msg_namelen); } else { - sp->pf->skb_msgname(head_skb, msg->msg_name, addr_len); + sp->pf->skb_msgname(head_skb, msg->msg_name, &msg->msg_namelen); } /* Check if we allow SCTP_NXTINFO. */ diff --git a/net/socket.c b/net/socket.c index 89ad750ea536..22a412fdec07 100644 --- a/net/socket.c +++ b/net/socket.c @@ -77,6 +77,7 @@ #include <linux/mount.h> #include <linux/pseudo_fs.h> #include <linux/security.h> +#include <linux/uio.h> #include <linux/syscalls.h> #include <linux/compat.h> #include <linux/kmod.h> @@ -280,23 +281,18 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, BUG_ON(klen > sizeof(struct sockaddr_storage)); - if (can_do_masked_user_access()) - ulen = masked_user_access_begin(ulen); - else if (!user_access_begin(ulen, 4)) - return -EFAULT; - - unsafe_get_user(len, ulen, efault_end); - - if (len > klen) - len = klen; - /* - * "fromlen shall refer to the value before truncation.." - * 1003.1g - */ - if (len >= 0) - unsafe_put_user(klen, ulen, efault_end); + scoped_user_rw_access_size(ulen, 4, efault_end) { + unsafe_get_user(len, ulen, efault_end); - user_access_end(); + if (len > klen) + len = klen; + /* + * "fromlen shall refer to the value before truncation.." + * 1003.1g + */ + if (len >= 0) + unsafe_put_user(klen, ulen, efault_end); + } if (len) { if (len < 0) @@ -309,7 +305,6 @@ static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, return 0; efault_end: - user_access_end(); return -EFAULT; } @@ -977,11 +972,10 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, { int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); int new_tstamp = sock_flag(sk, SOCK_TSTAMP_NEW); - struct scm_timestamping_internal tss; - int empty = 1, false_tstamp = 0; struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb); - int if_index; + struct scm_timestamping_internal tss; + int if_index, false_tstamp = 0; ktime_t hwtstamp; u32 tsflags; @@ -1026,12 +1020,12 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, memset(&tss, 0, sizeof(tss)); tsflags = READ_ONCE(sk->sk_tsflags); - if ((tsflags & SOF_TIMESTAMPING_SOFTWARE && - (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE || - skb_is_err_queue(skb) || - !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) && - ktime_to_timespec64_cond(skb->tstamp, tss.ts + 0)) - empty = 0; + if (tsflags & SOF_TIMESTAMPING_SOFTWARE && + (tsflags & SOF_TIMESTAMPING_RX_SOFTWARE || + skb_is_err_queue(skb) || + !(tsflags & SOF_TIMESTAMPING_OPT_RX_FILTER))) + tss.ts[0] = skb->tstamp; + if (shhwtstamps && (tsflags & SOF_TIMESTAMPING_RAW_HARDWARE && (tsflags & SOF_TIMESTAMPING_RX_HARDWARE || @@ -1048,15 +1042,15 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, hwtstamp = ptp_convert_timestamp(&hwtstamp, READ_ONCE(sk->sk_bind_phc)); - if (ktime_to_timespec64_cond(hwtstamp, tss.ts + 2)) { - empty = 0; + if (hwtstamp) { + tss.ts[2] = hwtstamp; if ((tsflags & SOF_TIMESTAMPING_OPT_PKTINFO) && !skb_is_err_queue(skb)) put_ts_pktinfo(msg, skb, if_index); } } - if (!empty) { + if (tss.ts[0] | tss.ts[2]) { if (sock_flag(sk, SOCK_TSTAMP_NEW)) put_cmsg_scm_timestamping64(msg, &tss); else @@ -2421,11 +2415,45 @@ SYSCALL_DEFINE5(setsockopt, int, fd, int, level, int, optname, INDIRECT_CALLABLE_DECLARE(bool tcp_bpf_bypass_getsockopt(int level, int optname)); +/* + * Initialize a sockopt_t from sockptr optval/optlen, setting up iov_iter + * for both input and output directions. + * It is important to remember that both iov points to the same data, but, + * .iter_in is read-only and .iter_out is write-only by the protocol callbacks + */ +static int sockptr_to_sockopt(sockopt_t *opt, sockptr_t optval, + sockptr_t optlen, struct kvec *kvec) +{ + int koptlen; + + if (copy_from_sockptr(&koptlen, optlen, sizeof(int))) + return -EFAULT; + + if (koptlen < 0) + return -EINVAL; + + if (optval.is_kernel) { + kvec->iov_base = optval.kernel; + kvec->iov_len = koptlen; + iov_iter_kvec(&opt->iter_out, ITER_DEST, kvec, 1, koptlen); + iov_iter_kvec(&opt->iter_in, ITER_SOURCE, kvec, 1, koptlen); + } else { + iov_iter_ubuf(&opt->iter_out, ITER_DEST, optval.user, koptlen); + iov_iter_ubuf(&opt->iter_in, ITER_SOURCE, optval.user, + koptlen); + } + opt->optlen = koptlen; + + return 0; +} + int do_sock_getsockopt(struct socket *sock, bool compat, int level, int optname, sockptr_t optval, sockptr_t optlen) { int max_optlen __maybe_unused = 0; const struct proto_ops *ops; + struct kvec kvec; + sockopt_t opt; int err; err = security_socket_getsockopt(sock, level, optname); @@ -2438,15 +2466,28 @@ int do_sock_getsockopt(struct socket *sock, bool compat, int level, ops = READ_ONCE(sock->ops); if (level == SOL_SOCKET) { err = sk_getsockopt(sock->sk, level, optname, optval, optlen); - } else if (unlikely(!ops->getsockopt)) { - err = -EOPNOTSUPP; - } else { + } else if (ops->getsockopt_iter) { + err = sockptr_to_sockopt(&opt, optval, optlen, &kvec); + if (err) + return err; + + err = ops->getsockopt_iter(sock, level, optname, &opt); + + /* Always write back optlen, even on failure. Some protocols + * (e.g. CAN raw) return -ERANGE and set optlen to the + * required buffer size so userspace can discover it. + */ + if (copy_to_sockptr(optlen, &opt.optlen, sizeof(int))) + return -EFAULT; + } else if (ops->getsockopt) { if (WARN_ONCE(optval.is_kernel || optlen.is_kernel, "Invalid argument type")) return -EOPNOTSUPP; err = ops->getsockopt(sock, level, optname, optval.user, optlen.user); + } else { + err = -EOPNOTSUPP; } if (!compat) diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index fe0e76fdd1f1..a23f4b4dfc67 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -45,6 +45,14 @@ static void strp_abort_strp(struct strparser *strp, int err) strp->stopped = 1; + if (strp->skb_head) { + kfree_skb(strp->skb_head); + strp->skb_head = NULL; + } + + strp->skb_nextp = NULL; + strp->need_bytes = 0; + if (strp->sk) { struct sock *sk = strp->sk; diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index b55df183e6d5..474df25e96a0 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -760,7 +760,7 @@ static int __switchdev_handle_port_obj_add(struct net_device *dev, /* Event is neither on a bridge nor a LAG. Check whether it is on an * interface that is in a bridge with us. */ - if (!foreign_dev_check_cb) + if (!foreign_dev_check_cb || port_obj_info->obj->flags & SWITCHDEV_F_NO_FOREIGN) return err; br = netdev_master_upper_dev_get(dev); diff --git a/net/tipc/Kconfig b/net/tipc/Kconfig index bb0d71eb02a6..18f62135e47b 100644 --- a/net/tipc/Kconfig +++ b/net/tipc/Kconfig @@ -6,7 +6,6 @@ menuconfig TIPC tristate "The TIPC Protocol" depends on INET - depends on IPV6 || IPV6=n help The Transparent Inter Process Communication (TIPC) protocol is specially designed for intra cluster communication. This protocol diff --git a/net/tipc/node.c b/net/tipc/node.c index af442a5ef8f3..97aa970a0d83 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1275,7 +1275,7 @@ void tipc_node_check_dest(struct net *net, u32 addr, goto exit; if_name = strchr(b->name, ':') + 1; - get_random_bytes(&session, sizeof(u16)); + session = get_random_u16(); if (!tipc_link_create(net, if_name, b->identity, b->tolerance, b->net_plane, b->mtu, b->priority, b->min_win, b->max_win, session, diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index 2b8e385d1e51..2c66b356025a 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -44,7 +44,6 @@ #include <net/sock.h> #include <net/ip.h> #include <net/udp_tunnel.h> -#include <net/ipv6_stubs.h> #include <linux/tipc_netlink.h> #include "core.h" #include "addr.h" @@ -207,9 +206,8 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, .saddr = src->ipv6, .flowi6_proto = IPPROTO_UDP }; - ndst = ipv6_stub->ipv6_dst_lookup_flow(net, - ub->ubsock->sk, - &fl6, NULL); + ndst = ip6_dst_lookup_flow(net, ub->ubsock->sk, + &fl6, NULL); if (IS_ERR(ndst)) { err = PTR_ERR(ndst); goto tx_error; @@ -418,8 +416,7 @@ static int enable_mcast(struct udp_bearer *ub, struct udp_media_addr *remote) #if IS_ENABLED(CONFIG_IPV6) } else { lock_sock(sk); - err = ipv6_stub->ipv6_sock_mc_join(sk, ub->ifindex, - &remote->ipv6); + err = ipv6_sock_mc_join(sk, ub->ifindex, &remote->ipv6); release_sock(sk); #endif } diff --git a/net/tls/Kconfig b/net/tls/Kconfig index ce8d56a19187..a25bf57f2673 100644 --- a/net/tls/Kconfig +++ b/net/tls/Kconfig @@ -8,7 +8,6 @@ config TLS select CRYPTO select CRYPTO_AES select CRYPTO_GCM - select STREAM_PARSER select NET_SOCK_MSG default n help diff --git a/net/tls/tls.h b/net/tls/tls.h index 2f86baeb71fc..e8f81a006520 100644 --- a/net/tls/tls.h +++ b/net/tls/tls.h @@ -161,7 +161,7 @@ void tls_sw_free_resources_rx(struct sock *sk); void tls_sw_release_resources_rx(struct sock *sk); void tls_sw_free_ctx_rx(struct tls_context *tls_ctx); int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len); + int flags); bool tls_sw_sock_is_readable(struct sock *sk); ssize_t tls_sw_splice_read(struct socket *sock, loff_t *ppos, struct pipe_inode_info *pipe, diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index d3c72f509baa..3b7d0ab2bcf1 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -149,9 +149,6 @@ static int tls_enc_records(struct aead_request *aead_req, return rc; } -/* Can't use icsk->icsk_af_ops->send_check here because the ip addresses - * might have been changed by NAT. - */ static void update_chksum(struct sk_buff *skb, int headln) { struct tcphdr *th = tcp_hdr(skb); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 83e78a3d1e65..94d2ae0daa8c 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2042,8 +2042,7 @@ static void tls_rx_reader_unlock(struct sock *sk, struct tls_sw_context_rx *ctx) int tls_sw_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, - int *addr_len) + int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 662ddb91f735..4c4a8d23ddd2 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2671,7 +2671,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si const struct proto *prot = READ_ONCE(sk->sk_prot); if (prot != &unix_dgram_proto) - return prot->recvmsg(sk, msg, size, flags, NULL); + return prot->recvmsg(sk, msg, size, flags); #endif return __unix_dgram_recvmsg(sk, msg, size, flags); } @@ -3145,7 +3145,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, const struct proto *prot = READ_ONCE(sk->sk_prot); if (prot != &unix_stream_proto) - return prot->recvmsg(sk, msg, size, flags, NULL); + return prot->recvmsg(sk, msg, size, flags); #endif return unix_stream_read_generic(&state, true); } diff --git a/net/unix/unix_bpf.c b/net/unix/unix_bpf.c index e0d30d6d22ac..d14cd5454a8d 100644 --- a/net/unix/unix_bpf.c +++ b/net/unix/unix_bpf.c @@ -49,7 +49,7 @@ static int __unix_recvmsg(struct sock *sk, struct msghdr *msg, } static int unix_bpf_recvmsg(struct sock *sk, struct msghdr *msg, - size_t len, int flags, int *addr_len) + size_t len, int flags) { struct unix_sock *u = unix_sk(sk); struct sk_psock *psock; diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index d912ed2f012a..44037b066a5f 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -545,9 +545,13 @@ static void vsock_deassign_transport(struct vsock_sock *vsk) * The vsk->remote_addr is used to decide which transport to use: * - remote CID == VMADDR_CID_LOCAL or g2h->local_cid or VMADDR_CID_HOST if * g2h is not loaded, will use local transport; - * - remote CID <= VMADDR_CID_HOST or h2g is not loaded or remote flags field - * includes VMADDR_FLAG_TO_HOST flag value, will use guest->host transport; - * - remote CID > VMADDR_CID_HOST will use host->guest transport; + * - remote CID <= VMADDR_CID_HOST or remote flags field includes + * VMADDR_FLAG_TO_HOST, will use guest->host transport; + * - remote CID > VMADDR_CID_HOST and h2g is loaded and h2g claims that CID, + * will use host->guest transport; + * - h2g not loaded or h2g does not claim that CID and g2h claims the CID via + * has_remote_cid, will use guest->host transport (when g2h_fallback=1) + * - anything else goes to h2g or returns -ENODEV if no h2g is available */ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) { @@ -581,11 +585,21 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) case SOCK_SEQPACKET: if (vsock_use_local_transport(remote_cid)) new_transport = transport_local; - else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g || + else if (remote_cid <= VMADDR_CID_HOST || (remote_flags & VMADDR_FLAG_TO_HOST)) new_transport = transport_g2h; - else + else if (transport_h2g && + (!transport_h2g->has_remote_cid || + transport_h2g->has_remote_cid(vsk, remote_cid))) + new_transport = transport_h2g; + else if (sock_net(sk)->vsock.g2h_fallback && + transport_g2h && transport_g2h->has_remote_cid && + transport_g2h->has_remote_cid(vsk, remote_cid)) { + vsk->remote_addr.svm_flags |= VMADDR_FLAG_TO_HOST; + new_transport = transport_g2h; + } else { new_transport = transport_h2g; + } break; default: ret = -ESOCKTNOSUPPORT; @@ -1502,7 +1516,7 @@ int vsock_dgram_recvmsg(struct socket *sock, struct msghdr *msg, prot = READ_ONCE(sk->sk_prot); if (prot != &vsock_proto) - return prot->recvmsg(sk, msg, len, flags, NULL); + return prot->recvmsg(sk, msg, len, flags); #endif return __vsock_dgram_recvmsg(sock, msg, len, flags); @@ -1850,10 +1864,10 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, * created upon connection establishment. */ timeout = sock_rcvtimeo(listener, arg->flags & O_NONBLOCK); - prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); while ((connected = vsock_dequeue_accept(listener)) == NULL && - listener->sk_err == 0) { + listener->sk_err == 0 && timeout != 0) { + prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); release_sock(listener); timeout = schedule_timeout(timeout); finish_wait(sk_sleep(listener), &wait); @@ -1862,17 +1876,14 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, if (signal_pending(current)) { err = sock_intr_errno(timeout); goto out; - } else if (timeout == 0) { - err = -EAGAIN; - goto out; } - - prepare_to_wait(sk_sleep(listener), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(listener), &wait); - if (listener->sk_err) + if (listener->sk_err) { err = -listener->sk_err; + } else if (!connected) { + err = -EAGAIN; + } if (connected) { sk_acceptq_removed(listener); @@ -1951,12 +1962,12 @@ static void vsock_update_buffer_size(struct vsock_sock *vsk, const struct vsock_transport *transport, u64 val) { - if (val > vsk->buffer_max_size) - val = vsk->buffer_max_size; - if (val < vsk->buffer_min_size) val = vsk->buffer_min_size; + if (val > vsk->buffer_max_size) + val = vsk->buffer_max_size; + if (val != vsk->buffer_size && transport && transport->notify_buffer_size) transport->notify_buffer_size(vsk, &val); @@ -2575,7 +2586,7 @@ vsock_connectible_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, prot = READ_ONCE(sk->sk_prot); if (prot != &vsock_proto) - return prot->recvmsg(sk, msg, len, flags, NULL); + return prot->recvmsg(sk, msg, len, flags); #endif return __vsock_connectible_recvmsg(sock, msg, len, flags); @@ -2879,6 +2890,15 @@ static struct ctl_table vsock_table[] = { .mode = 0644, .proc_handler = vsock_net_child_mode_string }, + { + .procname = "g2h_fallback", + .data = &init_net.vsock.g2h_fallback, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, }; static int __net_init vsock_sysctl_register(struct net *net) @@ -2894,6 +2914,7 @@ static int __net_init vsock_sysctl_register(struct net *net) table[0].data = &net->vsock.mode; table[1].data = &net->vsock.child_ns_mode; + table[2].data = &net->vsock.g2h_fallback; } net->vsock.sysctl_hdr = register_net_sysctl_sz(net, "net/vsock", table, @@ -2929,6 +2950,7 @@ static void vsock_net_init(struct net *net) net->vsock.child_ns_mode = net->vsock.mode; net->vsock.child_ns_mode_locked = 0; + net->vsock.g2h_fallback = 1; } static __net_init int vsock_sysctl_init_net(struct net *net) diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c index 069386a74557..2b7c0b5896ed 100644 --- a/net/vmw_vsock/hyperv_transport.c +++ b/net/vmw_vsock/hyperv_transport.c @@ -196,7 +196,7 @@ static int hvs_channel_readable_payload(struct vmbus_channel *chan) if (readable > HVS_PKT_LEN(0)) { /* At least we have 1 byte to read. We don't need to return - * the exact readable bytes: see vsock_stream_recvmsg() -> + * the exact readable bytes: see vsock_connectible_recvmsg() -> * vsock_stream_has_data(). */ return 1; diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 77fe5b7b066c..57f2d6ec3ffc 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -547,11 +547,18 @@ bool virtio_transport_stream_allow(struct vsock_sock *vsk, u32 cid, u32 port) static bool virtio_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid); +static bool virtio_transport_has_remote_cid(struct vsock_sock *vsk, u32 cid) +{ + /* The CID could be implemented by the host. Always assume it is. */ + return true; +} + static struct virtio_transport virtio_transport = { .transport = { .module = THIS_MODULE, .get_local_cid = virtio_transport_get_local_cid, + .has_remote_cid = virtio_transport_has_remote_cid, .init = virtio_transport_do_socket_init, .destruct = virtio_transport_destruct, diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 8a9fb23c6e85..a152a9e208d0 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -60,8 +60,6 @@ static bool virtio_transport_can_zcopy(const struct virtio_transport *t_ops, return false; /* Check that transport can send data in zerocopy mode. */ - t_ops = virtio_transport_get_ops(info->vsk); - if (t_ops->can_msgzerocopy) { int pages_to_send = iov_iter_npages(iov_iter, MAX_SKB_FRAGS); diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c index 07b96d56f3a5..9049d2648646 100644 --- a/net/vmw_vsock/vsock_bpf.c +++ b/net/vmw_vsock/vsock_bpf.c @@ -74,7 +74,7 @@ static int __vsock_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int } static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg, - size_t len, int flags, int *addr_len) + size_t len, int flags) { struct sk_psock *psock; struct vsock_sock *vsk; diff --git a/net/wireless/Makefile b/net/wireless/Makefile index 62a83faf0e07..a77fd5ba6368 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -8,7 +8,7 @@ obj-$(CONFIG_WEXT_PRIV) += wext-priv.o cfg80211-y += core.o sysfs.o radiotap.o util.o reg.o scan.o nl80211.o cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o ap.o trace.o ocb.o -cfg80211-y += pmsr.o +cfg80211-y += michael-mic.o pmsr.o cfg80211-$(CONFIG_OF) += of.o cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o diff --git a/net/wireless/chan.c b/net/wireless/chan.c index 68221b1ab45e..8b94c0de80ad 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -6,7 +6,7 @@ * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2018-2025 Intel Corporation + * Copyright 2018-2026 Intel Corporation */ #include <linux/export.h> @@ -29,9 +29,11 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef, *chandef = (struct cfg80211_chan_def) { .chan = chan, - .freq1_offset = chan->freq_offset, }; + WARN_ON(chan->band == NL80211_BAND_60GHZ || + chan->band == NL80211_BAND_S1GHZ); + switch (chan_type) { case NL80211_CHAN_NO_HT: chandef->width = NL80211_CHAN_WIDTH_20_NOHT; @@ -315,7 +317,7 @@ static bool cfg80211_valid_center_freq(u32 center, int step; /* We only do strict verification on 6 GHz */ - if (center < 5955 || center > 7115) + if (center < 5955 || center > 7215) return true; bw = nl80211_chan_width_to_mhz(width); @@ -323,7 +325,7 @@ static bool cfg80211_valid_center_freq(u32 center, return false; /* Validate that the channels bw is entirely within the 6 GHz band */ - if (center - bw / 2 < 5945 || center + bw / 2 > 7125) + if (center - bw / 2 < 5945 || center + bw / 2 > 7225) return false; /* With 320 MHz the permitted channels overlap */ @@ -339,6 +341,58 @@ static bool cfg80211_valid_center_freq(u32 center, return (center - bw / 2 - 5945) % step == 0; } +static bool +cfg80211_chandef_valid_control_freq(const struct cfg80211_chan_def *chandef, + u32 control_freq) +{ + switch (chandef->width) { + case NL80211_CHAN_WIDTH_5: + case NL80211_CHAN_WIDTH_10: + case NL80211_CHAN_WIDTH_20: + case NL80211_CHAN_WIDTH_20_NOHT: + case NL80211_CHAN_WIDTH_1: + case NL80211_CHAN_WIDTH_2: + case NL80211_CHAN_WIDTH_4: + case NL80211_CHAN_WIDTH_8: + case NL80211_CHAN_WIDTH_16: + /* checked separately */ + break; + case NL80211_CHAN_WIDTH_320: + if (chandef->center_freq1 == control_freq + 150 || + chandef->center_freq1 == control_freq + 130 || + chandef->center_freq1 == control_freq + 110 || + chandef->center_freq1 == control_freq + 90 || + chandef->center_freq1 == control_freq - 90 || + chandef->center_freq1 == control_freq - 110 || + chandef->center_freq1 == control_freq - 130 || + chandef->center_freq1 == control_freq - 150) + break; + fallthrough; + case NL80211_CHAN_WIDTH_160: + if (chandef->center_freq1 == control_freq + 70 || + chandef->center_freq1 == control_freq + 50 || + chandef->center_freq1 == control_freq - 50 || + chandef->center_freq1 == control_freq - 70) + break; + fallthrough; + case NL80211_CHAN_WIDTH_80P80: + case NL80211_CHAN_WIDTH_80: + if (chandef->center_freq1 == control_freq + 30 || + chandef->center_freq1 == control_freq - 30) + break; + fallthrough; + case NL80211_CHAN_WIDTH_40: + if (chandef->center_freq1 == control_freq + 10 || + chandef->center_freq1 == control_freq - 10) + break; + fallthrough; + default: + return false; + } + + return true; +} + bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) { u32 control_freq, control_freq_khz, start_khz, end_khz; @@ -351,6 +405,14 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) control_freq = chandef->chan->center_freq; + if (cfg80211_chandef_is_s1g(chandef) && + chandef->width != NL80211_CHAN_WIDTH_1 && + chandef->width != NL80211_CHAN_WIDTH_2 && + chandef->width != NL80211_CHAN_WIDTH_4 && + chandef->width != NL80211_CHAN_WIDTH_8 && + chandef->width != NL80211_CHAN_WIDTH_16) + return false; + switch (chandef->width) { case NL80211_CHAN_WIDTH_5: case NL80211_CHAN_WIDTH_10: @@ -393,50 +455,8 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef) break; } - switch (chandef->width) { - case NL80211_CHAN_WIDTH_5: - case NL80211_CHAN_WIDTH_10: - case NL80211_CHAN_WIDTH_20: - case NL80211_CHAN_WIDTH_20_NOHT: - case NL80211_CHAN_WIDTH_1: - case NL80211_CHAN_WIDTH_2: - case NL80211_CHAN_WIDTH_4: - case NL80211_CHAN_WIDTH_8: - case NL80211_CHAN_WIDTH_16: - /* all checked above */ - break; - case NL80211_CHAN_WIDTH_320: - if (chandef->center_freq1 == control_freq + 150 || - chandef->center_freq1 == control_freq + 130 || - chandef->center_freq1 == control_freq + 110 || - chandef->center_freq1 == control_freq + 90 || - chandef->center_freq1 == control_freq - 90 || - chandef->center_freq1 == control_freq - 110 || - chandef->center_freq1 == control_freq - 130 || - chandef->center_freq1 == control_freq - 150) - break; - fallthrough; - case NL80211_CHAN_WIDTH_160: - if (chandef->center_freq1 == control_freq + 70 || - chandef->center_freq1 == control_freq + 50 || - chandef->center_freq1 == control_freq - 50 || - chandef->center_freq1 == control_freq - 70) - break; - fallthrough; - case NL80211_CHAN_WIDTH_80P80: - case NL80211_CHAN_WIDTH_80: - if (chandef->center_freq1 == control_freq + 30 || - chandef->center_freq1 == control_freq - 30) - break; - fallthrough; - case NL80211_CHAN_WIDTH_40: - if (chandef->center_freq1 == control_freq + 10 || - chandef->center_freq1 == control_freq - 10) - break; - fallthrough; - default: + if (!cfg80211_chandef_valid_control_freq(chandef, control_freq)) return false; - } if (!cfg80211_valid_center_freq(chandef->center_freq1, chandef->width)) return false; @@ -642,6 +662,33 @@ void cfg80211_set_dfs_state(struct wiphy *wiphy, } } +void cfg80211_set_cac_state(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + bool cac_ongoing) +{ + struct ieee80211_channel *c; + int width; + u64 cac_time; + + if (WARN_ON(!cfg80211_chandef_valid(chandef))) + return; + + width = cfg80211_chandef_get_width(chandef); + if (width < 0) + return; + + /* Get the same timestamp for all subchannels */ + cac_time = cac_ongoing ? ktime_get_boottime_ns() : 0; + + for_each_subchan(chandef, freq, cf) { + c = ieee80211_get_channel_khz(wiphy, freq); + if (!c) + continue; + + c->cac_start_time = cac_time; + } +} + static bool cfg80211_dfs_permissive_check_wdev(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, @@ -754,6 +801,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, case NL80211_IFTYPE_AP: case NL80211_IFTYPE_P2P_GO: case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_NAN: width = cfg80211_chandef_get_width(chandef); if (width < 0) return -EINVAL; @@ -768,7 +816,7 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy, case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_DEVICE: - case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: break; case NL80211_IFTYPE_WDS: case NL80211_IFTYPE_UNSPECIFIED: @@ -892,6 +940,7 @@ bool cfg80211_beaconing_iface_active(struct wireless_dev *wdev) case NL80211_IFTYPE_P2P_DEVICE: /* Can NAN type be considered as beaconing interface? */ case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: break; case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_WDS: diff --git a/net/wireless/core.c b/net/wireless/core.c index 28ca4290ca99..6783e0672dcb 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -5,7 +5,7 @@ * Copyright 2006-2010 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2015-2017 Intel Deutschland GmbH - * Copyright (C) 2018-2025 Intel Corporation + * Copyright (C) 2018-2026 Intel Corporation */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -254,6 +254,8 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { + struct cfg80211_nan_local_sched empty_sched = {}; + lockdep_assert_held(&rdev->wiphy.mtx); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_NAN)) @@ -262,6 +264,15 @@ void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, if (!wdev_running(wdev)) return; + /* + * If there is a scheduled update pending, mark it as canceled, so the + * empty schedule will be accepted + */ + wdev->u.nan.sched_update_pending = false; + + /* Unschedule all */ + cfg80211_nan_set_local_schedule(rdev, wdev, &empty_sched); + rdev_stop_nan(rdev, wdev); wdev->is_running = false; @@ -270,6 +281,47 @@ void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, rdev->opencount--; } +int cfg80211_nan_set_local_schedule(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_local_sched *sched) +{ + int ret; + + lockdep_assert_held(&rdev->wiphy.mtx); + + if (wdev->iftype != NL80211_IFTYPE_NAN || !wdev_running(wdev)) + return -EINVAL; + + if (wdev->u.nan.sched_update_pending) + return -EBUSY; + + ret = rdev_nan_set_local_sched(rdev, wdev, sched); + if (ret) + return ret; + + wdev->u.nan.sched_update_pending = sched->deferred; + + kfree(wdev->u.nan.chandefs); + wdev->u.nan.chandefs = NULL; + wdev->u.nan.n_channels = 0; + + if (!sched->n_channels) + return 0; + + wdev->u.nan.chandefs = kcalloc(sched->n_channels, + sizeof(*wdev->u.nan.chandefs), + GFP_KERNEL); + if (!wdev->u.nan.chandefs) + return -ENOMEM; + + for (int i = 0; i < sched->n_channels; i++) + wdev->u.nan.chandefs[i] = sched->nan_channels[i].chandef; + + wdev->u.nan.n_channels = sched->n_channels; + + return 0; +} + void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) { struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); @@ -277,16 +329,21 @@ void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy) ASSERT_RTNL(); + /* + * Some netdev interfaces need to be closed before some non-netdev + * ones, i.e. NAN_DATA interfaces need to be closed before the NAN + * interface + */ list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { if (wdev->netdev) { dev_close(wdev->netdev); continue; } + } - /* otherwise, check iftype */ - - guard(wiphy)(wiphy); + guard(wiphy)(wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { switch (wdev->iftype) { case NL80211_IFTYPE_P2P_DEVICE: cfg80211_stop_p2p_device(rdev, wdev); @@ -344,17 +401,33 @@ void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev) list_for_each_entry_safe(wdev, tmp, &rdev->wiphy.wdev_list, list) { if (wdev->nl_owner_dead) { + cfg80211_close_dependents(rdev, wdev); + if (wdev->netdev) dev_close(wdev->netdev); guard(wiphy)(&rdev->wiphy); - cfg80211_leave(rdev, wdev, -1); cfg80211_remove_virtual_intf(rdev, wdev); } } } +void cfg80211_close_dependents(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev) +{ + ASSERT_RTNL(); + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return; + + /* Close all NAN DATA interfaces */ + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { + if (wdev->iftype == NL80211_IFTYPE_NAN_DATA) + dev_close(wdev->netdev); + } +} + static void cfg80211_destroy_iface_wk(struct work_struct *work) { struct cfg80211_registered_device *rdev; @@ -762,6 +835,10 @@ int wiphy_register(struct wiphy *wiphy) !(wiphy->nan_supported_bands & BIT(NL80211_BAND_2GHZ))))) return -EINVAL; + if (WARN_ON((wiphy->interface_modes & BIT(NL80211_IFTYPE_NAN_DATA)) && + !wiphy->nan_capa.phy.ht.ht_supported)) + return -EINVAL; + if (WARN_ON(wiphy->interface_modes & BIT(NL80211_IFTYPE_WDS))) return -EINVAL; @@ -1368,9 +1445,8 @@ void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, rdev->num_running_monitor_ifaces += num; } -void cfg80211_leave(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, - int link_id) +void cfg80211_leave_locked(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, int link_id) { struct net_device *dev = wdev->netdev; struct cfg80211_sched_scan_request *pos, *tmp; @@ -1421,6 +1497,7 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, break; case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MONITOR: + case NL80211_IFTYPE_NAN_DATA: /* nothing to do */ break; case NL80211_IFTYPE_UNSPECIFIED: @@ -1431,6 +1508,19 @@ void cfg80211_leave(struct cfg80211_registered_device *rdev, } } +void cfg80211_leave(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, int link_id) +{ + ASSERT_RTNL(); + + /* NAN_DATA interfaces must be closed before stopping NAN */ + cfg80211_close_dependents(rdev, wdev); + + guard(wiphy)(&rdev->wiphy); + + cfg80211_leave_locked(rdev, wdev, link_id); +} + void cfg80211_stop_link(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, gfp_t gfp) { @@ -1446,6 +1536,9 @@ void cfg80211_stop_link(struct wiphy *wiphy, struct wireless_dev *wdev, trace_cfg80211_stop_link(wiphy, wdev, link_id); + if (wdev->iftype == NL80211_IFTYPE_NAN) + return; + ev = kzalloc_obj(*ev, gfp); if (!ev) return; @@ -1596,10 +1689,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, } break; case NETDEV_GOING_DOWN: - scoped_guard(wiphy, &rdev->wiphy) { - cfg80211_leave(rdev, wdev, -1); + cfg80211_leave(rdev, wdev, -1); + scoped_guard(wiphy, &rdev->wiphy) cfg80211_remove_links(wdev); - } /* since we just did cfg80211_leave() nothing to do there */ cancel_work_sync(&wdev->disconnect_wk); cancel_work_sync(&wdev->pmsr_free_wk); @@ -1680,6 +1772,23 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb, if (rfkill_blocked(rdev->wiphy.rfkill)) return notifier_from_errno(-ERFKILL); + + /* NAN_DATA interfaces require a running NAN interface */ + if (wdev->iftype == NL80211_IFTYPE_NAN_DATA) { + struct wireless_dev *iter; + bool nan_started = false; + + list_for_each_entry(iter, &rdev->wiphy.wdev_list, list) { + if (iter->iftype == NL80211_IFTYPE_NAN && + wdev_running(iter)) { + nan_started = true; + break; + } + } + + if (!nan_started) + return notifier_from_errno(-ENOLINK); + } break; default: return NOTIFY_DONE; diff --git a/net/wireless/core.h b/net/wireless/core.h index 6ac57b7b2615..ae2d56d3ad90 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -318,6 +318,9 @@ void cfg80211_cqm_rssi_notify_work(struct wiphy *wiphy, void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev); +void cfg80211_close_dependents(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev); + /* free object */ void cfg80211_dev_free(struct cfg80211_registered_device *rdev); @@ -481,6 +484,10 @@ void cfg80211_set_dfs_state(struct wiphy *wiphy, const struct cfg80211_chan_def *chandef, enum nl80211_dfs_state dfs_state); +void cfg80211_set_cac_state(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + bool cac_ongoing); + void cfg80211_dfs_channels_update_work(struct work_struct *work); void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev); @@ -537,6 +544,9 @@ int cfg80211_validate_beacon_int(struct cfg80211_registered_device *rdev, void cfg80211_update_iface_num(struct cfg80211_registered_device *rdev, enum nl80211_iftype iftype, int num); +void cfg80211_leave_locked(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, int link_id); + void cfg80211_leave(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, int link_id); @@ -547,6 +557,10 @@ void cfg80211_stop_p2p_device(struct cfg80211_registered_device *rdev, void cfg80211_stop_nan(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); +int cfg80211_nan_set_local_schedule(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_local_sched *sched); + struct cfg80211_internal_bss * cfg80211_bss_update(struct cfg80211_registered_device *rdev, struct cfg80211_internal_bss *tmp, diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index a7024af39b40..b1d748bdb504 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -3,7 +3,7 @@ * Some IBSS support code for cfg80211. * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2020-2024 Intel Corporation + * Copyright (C) 2020-2026 Intel Corporation */ #include <linux/etherdevice.h> @@ -172,7 +172,7 @@ void cfg80211_clear_ibss(struct net_device *dev, bool nowext) */ if (rdev->ops->del_key) for (i = 0; i < 6; i++) - rdev_del_key(rdev, dev, -1, i, false, NULL); + rdev_del_key(rdev, wdev, -1, i, false, NULL); if (wdev->u.ibss.current_bss) { cfg80211_unhold_bss(wdev->u.ibss.current_bss); diff --git a/net/mac80211/michael.c b/net/wireless/michael-mic.c index 8a1afc93e749..ec5164756e0a 100644 --- a/net/mac80211/michael.c +++ b/net/wireless/michael-mic.c @@ -5,10 +5,13 @@ */ #include <linux/types.h> #include <linux/bitops.h> +#include <linux/export.h> #include <linux/ieee80211.h> #include <linux/unaligned.h> -#include "michael.h" +struct michael_mic_ctx { + u32 l, r; +}; static void michael_block(struct michael_mic_ctx *mctx, u32 val) { @@ -81,3 +84,4 @@ void michael_mic(const u8 *key, struct ieee80211_hdr *hdr, put_unaligned_le32(mctx.l, mic); put_unaligned_le32(mctx.r, mic + 4); } +EXPORT_SYMBOL_GPL(michael_mic); diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 3fc175f9f868..bd72317c4964 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -4,7 +4,7 @@ * * Copyright (c) 2009, Jouni Malinen <j@w1.fi> * Copyright (c) 2015 Intel Deutschland GmbH - * Copyright (C) 2019-2020, 2022-2025 Intel Corporation + * Copyright (C) 2019-2020, 2022-2026 Intel Corporation */ #include <linux/kernel.h> @@ -782,8 +782,8 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) rdev_crit_proto_stop(rdev, wdev); } - if (nlportid == wdev->ap_unexpected_nlportid) - wdev->ap_unexpected_nlportid = 0; + if (nlportid == wdev->unexpected_nlportid) + wdev->unexpected_nlportid = 0; } void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) @@ -933,12 +933,17 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, * cfg80211 doesn't track the stations */ break; + case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: + if (mgmt->u.action.category != + WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION) + err = -EOPNOTSUPP; + break; case NL80211_IFTYPE_P2P_DEVICE: /* * fall through, P2P device only supports * public action frames */ - case NL80211_IFTYPE_NAN: default: err = -EOPNOTSUPP; break; @@ -1115,8 +1120,10 @@ void __cfg80211_radar_event(struct wiphy *wiphy, */ cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_UNAVAILABLE); - if (offchan) + if (offchan) { + cancel_delayed_work(&rdev->background_cac_done_wk); queue_work(cfg80211_wq, &rdev->background_cac_abort_wk); + } cfg80211_sched_dfs_chan_update(rdev); @@ -1160,9 +1167,11 @@ void cfg80211_cac_event(struct net_device *netdev, fallthrough; case NL80211_RADAR_CAC_ABORTED: wdev->links[link_id].cac_started = false; + cfg80211_set_cac_state(wiphy, chandef, false); break; case NL80211_RADAR_CAC_STARTED: wdev->links[link_id].cac_started = true; + cfg80211_set_cac_state(wiphy, chandef, true); break; default: WARN_ON(1); @@ -1187,23 +1196,21 @@ __cfg80211_background_cac_event(struct cfg80211_registered_device *rdev, if (!cfg80211_chandef_valid(chandef)) return; - if (!rdev->background_radar_wdev) - return; - switch (event) { case NL80211_RADAR_CAC_FINISHED: cfg80211_set_dfs_state(wiphy, chandef, NL80211_DFS_AVAILABLE); + cfg80211_set_cac_state(wiphy, chandef, false); memcpy(&rdev->cac_done_chandef, chandef, sizeof(*chandef)); queue_work(cfg80211_wq, &rdev->propagate_cac_done_wk); cfg80211_sched_dfs_chan_update(rdev); - wdev = rdev->background_radar_wdev; break; case NL80211_RADAR_CAC_ABORTED: + cfg80211_set_cac_state(wiphy, chandef, false); if (!cancel_delayed_work(&rdev->background_cac_done_wk)) return; - wdev = rdev->background_radar_wdev; break; case NL80211_RADAR_CAC_STARTED: + cfg80211_set_cac_state(wiphy, chandef, true); break; default: return; @@ -1213,17 +1220,6 @@ __cfg80211_background_cac_event(struct cfg80211_registered_device *rdev, nl80211_radar_notify(rdev, chandef, event, netdev, GFP_KERNEL); } -static void -cfg80211_background_cac_event(struct cfg80211_registered_device *rdev, - const struct cfg80211_chan_def *chandef, - enum nl80211_radar_event event) -{ - guard(wiphy)(&rdev->wiphy); - - __cfg80211_background_cac_event(rdev, rdev->background_radar_wdev, - chandef, event); -} - void cfg80211_background_cac_done_wk(struct work_struct *work) { struct delayed_work *delayed_work = to_delayed_work(work); @@ -1231,18 +1227,31 @@ void cfg80211_background_cac_done_wk(struct work_struct *work) rdev = container_of(delayed_work, struct cfg80211_registered_device, background_cac_done_wk); - cfg80211_background_cac_event(rdev, &rdev->background_radar_chandef, - NL80211_RADAR_CAC_FINISHED); + + guard(wiphy)(&rdev->wiphy); + + rdev_set_radar_background(rdev, NULL); + + __cfg80211_background_cac_event(rdev, rdev->background_radar_wdev, + &rdev->background_radar_chandef, + NL80211_RADAR_CAC_FINISHED); + + rdev->background_radar_wdev = NULL; } void cfg80211_background_cac_abort_wk(struct work_struct *work) { struct cfg80211_registered_device *rdev; + struct wireless_dev *wdev; rdev = container_of(work, struct cfg80211_registered_device, background_cac_abort_wk); - cfg80211_background_cac_event(rdev, &rdev->background_radar_chandef, - NL80211_RADAR_CAC_ABORTED); + + guard(wiphy)(&rdev->wiphy); + + wdev = rdev->background_radar_wdev; + if (wdev) + cfg80211_stop_background_radar_detection(wdev); } void cfg80211_background_cac_abort(struct wiphy *wiphy) @@ -1309,6 +1318,8 @@ void cfg80211_stop_radar_detection(struct wireless_dev *wdev) chandef = *wdev_chandef(wdev, link_id); rdev_end_cac(rdev, wdev->netdev, link_id); + wdev->links[link_id].cac_started = false; + cfg80211_set_cac_state(wiphy, &chandef, false); nl80211_radar_notify(rdev, &chandef, NL80211_RADAR_CAC_ABORTED, wdev->netdev, GFP_KERNEL); } @@ -1325,11 +1336,12 @@ void cfg80211_stop_background_radar_detection(struct wireless_dev *wdev) return; rdev_set_radar_background(rdev, NULL); - rdev->background_radar_wdev = NULL; /* Release offchain ownership */ __cfg80211_background_cac_event(rdev, wdev, &rdev->background_radar_chandef, NL80211_RADAR_CAC_ABORTED); + + rdev->background_radar_wdev = NULL; } int cfg80211_assoc_ml_reconf(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index b94231c8441c..f334cdef8958 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -21,6 +21,7 @@ #include <linux/nospec.h> #include <linux/etherdevice.h> #include <linux/if_vlan.h> +#include <linux/random.h> #include <net/net_namespace.h> #include <net/genetlink.h> #include <net/cfg80211.h> @@ -332,13 +333,117 @@ static int validate_nan_cluster_id(const struct nlattr *attr, return 0; } +static int validate_nan_avail_blob(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + const u8 *data = nla_data(attr); + unsigned int len = nla_len(attr); + u16 attr_len; + + /* Need at least: Attr ID (1) + Length (2) */ + if (len < 3) { + NL_SET_ERR_MSG_FMT(extack, + "NAN Availability: Too short (need at least 3 bytes, have %u)", + len); + return -EINVAL; + } + + if (data[0] != 0x12) { + NL_SET_ERR_MSG_FMT(extack, + "NAN Availability: Invalid Attribute ID 0x%02x (expected 0x12)", + data[0]); + return -EINVAL; + } + + attr_len = get_unaligned_le16(&data[1]); + + if (attr_len != len - 3) { + NL_SET_ERR_MSG_FMT(extack, + "NAN Availability: Length field (%u) doesn't match data length (%u)", + attr_len, len - 3); + return -EINVAL; + } + + return 0; +} + +static int validate_nan_ulw(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + const u8 *data = nla_data(attr); + unsigned int len = nla_len(attr); + unsigned int pos = 0; + + while (pos < len) { + u16 attr_len; + + /* Need at least: Attr ID (1) + Length (2) */ + if (pos + 3 > len) { + NL_SET_ERR_MSG_FMT(extack, + "ULW: Incomplete header (need 3 bytes, have %u)", + len - pos); + return -EINVAL; + } + + if (data[pos] != 0x17) { + NL_SET_ERR_MSG_FMT(extack, + "ULW: Invalid Attribute ID 0x%02x (expected 0x17)", + data[pos]); + return -EINVAL; + } + pos++; + + /* Length is in little-endian format */ + attr_len = get_unaligned_le16(&data[pos]); + pos += 2; + + /* + * Check if length is one of the valid values: 16 (no + * channel/band entry included), 18 (band entry included), + * 21 (channel entry included without Auxiliary channel bitmap), + * or 23 (channel entry included with Auxiliary channel bitmap). + */ + if (attr_len != 16 && attr_len != 18 && attr_len != 21 && + attr_len != 23) { + NL_SET_ERR_MSG_FMT(extack, + "ULW: Invalid length %u (must be 16, 18, 21, or 23)", + attr_len); + return -EINVAL; + } + + if (pos + attr_len > len) { + NL_SET_ERR_MSG_FMT(extack, + "ULW: Length field (%u) exceeds remaining data (%u)", + attr_len, len - pos); + return -EINVAL; + } + + pos += attr_len; + } + + return 0; +} + static int validate_uhr_capa(const struct nlattr *attr, struct netlink_ext_ack *extack) { const u8 *data = nla_data(attr); unsigned int len = nla_len(attr); - return ieee80211_uhr_capa_size_ok(data, len, false); + if (!ieee80211_uhr_capa_size_ok(data, len, false)) + return -EINVAL; + return 0; +} + +static int validate_uhr_operation(const struct nlattr *attr, + struct netlink_ext_ack *extack) +{ + const u8 *data = nla_data(attr); + unsigned int len = nla_len(attr); + + if (!ieee80211_uhr_oper_size_ok(data, len, false)) + return -EINVAL; + return 0; } /* policy for the attributes */ @@ -542,6 +647,13 @@ nl80211_nan_band_conf_policy[NL80211_NAN_BAND_CONF_ATTR_MAX + 1] = { }; static const struct nla_policy +nl80211_nan_peer_map_policy[NL80211_NAN_PEER_MAP_ATTR_MAX + 1] = { + [NL80211_NAN_PEER_MAP_ATTR_MAP_ID] = NLA_POLICY_MAX(NLA_U8, 15), + [NL80211_NAN_PEER_MAP_ATTR_TIME_SLOTS] = + NLA_POLICY_EXACT_LEN(CFG80211_NAN_SCHED_NUM_TIME_SLOTS), +}; + +static const struct nla_policy nl80211_nan_conf_policy[NL80211_NAN_CONF_ATTR_MAX + 1] = { [NL80211_NAN_CONF_CLUSTER_ID] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_nan_cluster_id, @@ -946,6 +1058,24 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = { [NL80211_ATTR_UHR_CAPABILITY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_uhr_capa, 255), [NL80211_ATTR_DISABLE_UHR] = { .type = NLA_FLAG }, + [NL80211_ATTR_UHR_OPERATION] = + NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_uhr_operation), + [NL80211_ATTR_NAN_CHANNEL] = NLA_POLICY_NESTED(nl80211_policy), + [NL80211_ATTR_NAN_CHANNEL_ENTRY] = NLA_POLICY_EXACT_LEN(6), + [NL80211_ATTR_NAN_RX_NSS] = { .type = NLA_U8 }, + [NL80211_ATTR_NAN_TIME_SLOTS] = + NLA_POLICY_EXACT_LEN(CFG80211_NAN_SCHED_NUM_TIME_SLOTS), + [NL80211_ATTR_NAN_AVAIL_BLOB] = + NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_nan_avail_blob), + [NL80211_ATTR_NAN_SCHED_DEFERRED] = { .type = NLA_FLAG }, + [NL80211_ATTR_NAN_NMI_MAC] = NLA_POLICY_ETH_ADDR, + [NL80211_ATTR_NAN_ULW] = + NLA_POLICY_VALIDATE_FN(NLA_BINARY, validate_nan_ulw), + [NL80211_ATTR_NAN_COMMITTED_DW] = { .type = NLA_U16 }, + [NL80211_ATTR_NAN_SEQ_ID] = { .type = NLA_U8 }, + [NL80211_ATTR_NAN_MAX_CHAN_SWITCH_TIME] = { .type = NLA_U16 }, + [NL80211_ATTR_NAN_PEER_MAPS] = + NLA_POLICY_NESTED_ARRAY(nl80211_nan_peer_map_policy), }; /* policy for the key attributes */ @@ -1333,6 +1463,12 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy, if ((chan->flags & IEEE80211_CHAN_NO_UHR) && nla_put_flag(msg, NL80211_FREQUENCY_ATTR_NO_UHR)) goto nla_put_failure; + if (chan->cac_start_time && + nla_put_u64_64bit(msg, + NL80211_FREQUENCY_ATTR_CAC_START_TIME, + chan->cac_start_time, + NL80211_FREQUENCY_ATTR_PAD)) + goto nla_put_failure; } if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_MAX_TX_POWER, @@ -1700,6 +1836,7 @@ static int nl80211_key_allowed(struct wireless_dev *wdev) return 0; return -ENOLINK; case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: if (wiphy_ext_feature_isset(wdev->wiphy, NL80211_EXT_FEATURE_SECURE_NAN)) return 0; @@ -2656,6 +2793,68 @@ fail: return -ENOBUFS; } +static int nl80211_put_nan_phy_cap(struct wiphy *wiphy, struct sk_buff *msg) +{ + struct nlattr *nl_phy_cap; + const struct ieee80211_sta_ht_cap *ht_cap; + const struct ieee80211_sta_vht_cap *vht_cap; + const struct ieee80211_sta_he_cap *he_cap; + + if (!cfg80211_iftype_allowed(wiphy, NL80211_IFTYPE_NAN_DATA, false, 0)) + return 0; + + ht_cap = &wiphy->nan_capa.phy.ht; + vht_cap = &wiphy->nan_capa.phy.vht; + he_cap = &wiphy->nan_capa.phy.he; + + /* HT is mandatory */ + if (WARN_ON(!ht_cap->ht_supported)) + return 0; + + nl_phy_cap = nla_nest_start_noflag(msg, NL80211_NAN_CAPA_PHY); + if (!nl_phy_cap) + return -ENOBUFS; + + if (nla_put(msg, NL80211_NAN_PHY_CAP_ATTR_HT_MCS_SET, + sizeof(ht_cap->mcs), &ht_cap->mcs) || + nla_put_u16(msg, NL80211_NAN_PHY_CAP_ATTR_HT_CAPA, ht_cap->cap) || + nla_put_u8(msg, NL80211_NAN_PHY_CAP_ATTR_HT_AMPDU_FACTOR, + ht_cap->ampdu_factor) || + nla_put_u8(msg, NL80211_NAN_PHY_CAP_ATTR_HT_AMPDU_DENSITY, + ht_cap->ampdu_density)) + goto fail; + + if (vht_cap->vht_supported) { + if (nla_put(msg, NL80211_NAN_PHY_CAP_ATTR_VHT_MCS_SET, + sizeof(vht_cap->vht_mcs), &vht_cap->vht_mcs) || + nla_put_u32(msg, NL80211_NAN_PHY_CAP_ATTR_VHT_CAPA, + vht_cap->cap)) + goto fail; + } + + if (he_cap->has_he) { + if (nla_put(msg, NL80211_NAN_PHY_CAP_ATTR_HE_MAC, + sizeof(he_cap->he_cap_elem.mac_cap_info), + he_cap->he_cap_elem.mac_cap_info) || + nla_put(msg, NL80211_NAN_PHY_CAP_ATTR_HE_PHY, + sizeof(he_cap->he_cap_elem.phy_cap_info), + he_cap->he_cap_elem.phy_cap_info) || + nla_put(msg, NL80211_NAN_PHY_CAP_ATTR_HE_MCS_SET, + sizeof(he_cap->he_mcs_nss_supp), + &he_cap->he_mcs_nss_supp) || + nla_put(msg, NL80211_NAN_PHY_CAP_ATTR_HE_PPE, + sizeof(he_cap->ppe_thres), he_cap->ppe_thres)) + goto fail; + } + + nla_nest_end(msg, nl_phy_cap); + return 0; + +fail: + nla_nest_cancel(msg, nl_phy_cap); + return -ENOBUFS; +} + static int nl80211_put_nan_capa(struct wiphy *wiphy, struct sk_buff *msg) { struct nlattr *nan_caps; @@ -2682,6 +2881,9 @@ static int nl80211_put_nan_capa(struct wiphy *wiphy, struct sk_buff *msg) wiphy->nan_capa.dev_capabilities)) goto fail; + if (nl80211_put_nan_phy_cap(wiphy, msg)) + goto fail; + nla_nest_end(msg, nan_caps); return 0; @@ -3567,11 +3769,10 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev) } static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev, - struct genl_info *info, bool monitor, + struct netlink_ext_ack *extack, + struct nlattr **attrs, bool monitor, struct cfg80211_chan_def *chandef) { - struct netlink_ext_ack *extack = info->extack; - struct nlattr **attrs = info->attrs; u32 control_freq; if (!attrs[NL80211_ATTR_WIPHY_FREQ]) { @@ -3581,10 +3782,10 @@ static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev, } control_freq = MHZ_TO_KHZ( - nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ])); - if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]) + nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ])); + if (attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]) control_freq += - nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]); + nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]); memset(chandef, 0, sizeof(*chandef)); chandef->chan = ieee80211_get_channel_khz(&rdev->wiphy, control_freq); @@ -3613,6 +3814,9 @@ static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev, case NL80211_CHAN_HT20: case NL80211_CHAN_HT40PLUS: case NL80211_CHAN_HT40MINUS: + if (chandef->chan->band == NL80211_BAND_60GHZ || + chandef->chan->band == NL80211_BAND_S1GHZ) + return -EINVAL; cfg80211_chandef_create(chandef, chandef->chan, chantype); /* user input for center_freq is incorrect */ @@ -3655,40 +3859,43 @@ static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev, attrs[NL80211_ATTR_S1G_PRIMARY_2MHZ]); } - if (info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) { + if (attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]) { chandef->edmg.channels = - nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]); + nla_get_u8(attrs[NL80211_ATTR_WIPHY_EDMG_CHANNELS]); - if (info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]) + if (attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]) chandef->edmg.bw_config = - nla_get_u8(info->attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]); + nla_get_u8(attrs[NL80211_ATTR_WIPHY_EDMG_BW_CONFIG]); } else { chandef->edmg.bw_config = 0; chandef->edmg.channels = 0; } - if (info->attrs[NL80211_ATTR_PUNCT_BITMAP]) { + if (attrs[NL80211_ATTR_PUNCT_BITMAP]) { chandef->punctured = - nla_get_u32(info->attrs[NL80211_ATTR_PUNCT_BITMAP]); + nla_get_u32(attrs[NL80211_ATTR_PUNCT_BITMAP]); if (chandef->punctured && !wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_PUNCT)) { - NL_SET_ERR_MSG(extack, - "driver doesn't support puncturing"); + NL_SET_ERR_MSG_ATTR(extack, + attrs[NL80211_ATTR_WIPHY_FREQ], + "driver doesn't support puncturing"); return -EINVAL; } } if (!cfg80211_chandef_valid(chandef)) { - NL_SET_ERR_MSG(extack, "invalid channel definition"); + NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_WIPHY_FREQ], + "invalid channel definition"); return -EINVAL; } if (!_cfg80211_chandef_usable(&rdev->wiphy, chandef, IEEE80211_CHAN_DISABLED, monitor ? IEEE80211_CHAN_CAN_MONITOR : 0)) { - NL_SET_ERR_MSG(extack, "(extension) channel is disabled"); + NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_WIPHY_FREQ], + "(extension) channel is disabled"); return -EINVAL; } @@ -3703,10 +3910,11 @@ static int _nl80211_parse_chandef(struct cfg80211_registered_device *rdev, } int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, - struct genl_info *info, + struct netlink_ext_ack *extack, + struct nlattr **attrs, struct cfg80211_chan_def *chandef) { - return _nl80211_parse_chandef(rdev, info, false, chandef); + return _nl80211_parse_chandef(rdev, extack, attrs, false, chandef); } static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, @@ -3733,7 +3941,7 @@ static int __nl80211_set_channel(struct cfg80211_registered_device *rdev, link_id = 0; } - result = _nl80211_parse_chandef(rdev, info, + result = _nl80211_parse_chandef(rdev, info->extack, info->attrs, iftype == NL80211_IFTYPE_MONITOR, &chandef); if (result) @@ -4851,6 +5059,8 @@ static int nl80211_del_interface(struct sk_buff *skb, struct genl_info *info) else dev_close(wdev->netdev); + cfg80211_close_dependents(rdev, wdev); + mutex_lock(&rdev->wiphy.mtx); return cfg80211_remove_virtual_intf(rdev, wdev); @@ -4950,7 +5160,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; u8 key_idx = 0; const u8 *mac_addr = NULL; bool pairwise; @@ -4961,7 +5171,6 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; bool bigtk_support = false; int link_id = nl80211_link_id_or_invalid(info->attrs); - struct wireless_dev *wdev = dev->ieee80211_ptr; if (wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_BEACON_PROTECTION)) @@ -5013,7 +5222,10 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) cookie.msg = msg; cookie.idx = key_idx; - if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || + if ((wdev->netdev && + nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || nla_put_u8(msg, NL80211_ATTR_KEY_IDX, key_idx)) goto nla_put_failure; if (mac_addr && @@ -5024,7 +5236,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (err) goto free_msg; - err = rdev_get_key(rdev, dev, link_id, key_idx, pairwise, mac_addr, + err = rdev_get_key(rdev, wdev, link_id, key_idx, pairwise, mac_addr, &cookie, get_key_callback); if (err) @@ -5048,9 +5260,8 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct key_parse key; int err; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; int link_id = nl80211_link_id_or_invalid(info->attrs); - struct wireless_dev *wdev = dev->ieee80211_ptr; err = nl80211_parse_key(info, &key); if (err) @@ -5070,6 +5281,9 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->set_default_key) return -EOPNOTSUPP; + if (!wdev->netdev) + return -EINVAL; + err = nl80211_key_allowed(wdev); if (err) return err; @@ -5078,7 +5292,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (err) return err; - err = rdev_set_default_key(rdev, dev, link_id, key.idx, + err = rdev_set_default_key(rdev, wdev->netdev, link_id, key.idx, key.def_uni, key.def_multi); if (err) @@ -5103,7 +5317,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (err) return err; - err = rdev_set_default_mgmt_key(rdev, dev, link_id, key.idx); + err = rdev_set_default_mgmt_key(rdev, wdev, link_id, key.idx); if (err) return err; @@ -5126,7 +5340,8 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (err) return err; - return rdev_set_default_beacon_key(rdev, dev, link_id, key.idx); + return rdev_set_default_beacon_key(rdev, wdev, link_id, + key.idx); } else if (key.p.mode == NL80211_KEY_SET_TX && wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_EXT_KEY_ID)) { @@ -5142,7 +5357,7 @@ static int nl80211_set_key(struct sk_buff *skb, struct genl_info *info) if (err) return err; - return rdev_add_key(rdev, dev, link_id, key.idx, + return rdev_add_key(rdev, wdev, link_id, key.idx, NL80211_KEYTYPE_PAIRWISE, mac_addr, &key.p); } @@ -5154,11 +5369,10 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; struct key_parse key; const u8 *mac_addr = NULL; int link_id = nl80211_link_id_or_invalid(info->attrs); - struct wireless_dev *wdev = dev->ieee80211_ptr; err = nl80211_parse_key(info, &key); if (err) @@ -5209,7 +5423,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) key.type == NL80211_KEYTYPE_PAIRWISE); if (!err) { - err = rdev_add_key(rdev, dev, link_id, key.idx, + err = rdev_add_key(rdev, wdev, link_id, key.idx, key.type == NL80211_KEYTYPE_PAIRWISE, mac_addr, &key.p); if (err) @@ -5223,11 +5437,10 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; u8 *mac_addr = NULL; struct key_parse key; int link_id = nl80211_link_id_or_invalid(info->attrs); - struct wireless_dev *wdev = dev->ieee80211_ptr; err = nl80211_parse_key(info, &key); if (err) @@ -5266,7 +5479,7 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) key.type == NL80211_KEYTYPE_PAIRWISE); if (!err) - err = rdev_del_key(rdev, dev, link_id, key.idx, + err = rdev_del_key(rdev, wdev, link_id, key.idx, key.type == NL80211_KEYTYPE_PAIRWISE, mac_addr); @@ -5812,7 +6025,7 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info, */ BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8); nla_for_each_nested(tx_rates, attrs[attr], rem) { - enum nl80211_band band = nla_type(tx_rates); + int band = nla_type(tx_rates); int err; if (band < 0 || band >= NUM_NL80211_BANDS) @@ -6486,16 +6699,6 @@ static int nl80211_calculate_ap_params(struct cfg80211_ap_settings *params) return -EINVAL; } - cap = cfg80211_find_ext_elem(WLAN_EID_EXT_UHR_OPER, ies, ies_len); - if (cap) { - if (!cap->datalen) - return -EINVAL; - params->uhr_oper = (void *)(cap->data + 1); - if (!ieee80211_uhr_oper_size_ok((const u8 *)params->uhr_oper, - cap->datalen - 1, true)) - return -EINVAL; - } - return 0; } @@ -6541,6 +6744,10 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, NL80211_EXT_FEATURE_EPPKE) && auth_type == NL80211_AUTHTYPE_EPPKE) return false; + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_IEEE8021X_AUTH) && + auth_type == NL80211_AUTHTYPE_IEEE8021X) + return false; return true; case NL80211_CMD_CONNECT: if (!(rdev->wiphy.features & NL80211_FEATURE_SAE) && @@ -6562,6 +6769,10 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev, NL80211_EXT_FEATURE_EPPKE) && auth_type == NL80211_AUTHTYPE_EPPKE) return false; + if (!wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_IEEE8021X_AUTH) && + auth_type == NL80211_AUTHTYPE_IEEE8021X) + return false; return true; case NL80211_CMD_START_AP: if (!wiphy_ext_feature_isset(&rdev->wiphy, @@ -6811,7 +7022,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { - err = nl80211_parse_chandef(rdev, info, ¶ms->chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, + ¶ms->chandef); if (err) goto out; } else if (wdev->valid_links) { @@ -6928,6 +7140,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info) if (err) goto out; + if (info->attrs[NL80211_ATTR_UHR_OPERATION]) + params->uhr_oper = nla_data(info->attrs[NL80211_ATTR_UHR_OPERATION]); + err = nl80211_validate_ap_phy_operation(params); if (err) goto out; @@ -7090,6 +7305,26 @@ static int parse_station_flags(struct genl_info *info, if ((params->sta_flags_mask | params->sta_flags_set) & BIT(__NL80211_STA_FLAG_INVALID)) return -EINVAL; + + if ((iftype == NL80211_IFTYPE_NAN || + iftype == NL80211_IFTYPE_NAN_DATA) && + params->sta_flags_mask & + ~(BIT(NL80211_STA_FLAG_AUTHENTICATED) | + BIT(NL80211_STA_FLAG_ASSOCIATED) | + BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_MFP))) + return -EINVAL; + + /* WME is always used in NAN */ + if (iftype == NL80211_IFTYPE_NAN_DATA) { + /* but don't let userspace control it */ + if (params->sta_flags_mask & BIT(NL80211_STA_FLAG_WME)) + return -EINVAL; + + params->sta_flags_mask |= BIT(NL80211_STA_FLAG_WME); + params->sta_flags_set |= BIT(NL80211_STA_FLAG_WME); + } + return 0; } @@ -7497,7 +7732,7 @@ nla_put_failure: static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, - struct net_device *dev, + struct wireless_dev *wdev, const u8 *mac_addr, struct station_info *sinfo, bool link_stats) { @@ -7513,7 +7748,10 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid, return -1; } - if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) || + if ((wdev->netdev && + nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex)) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr) || nla_put_u32(msg, NL80211_ATTR_GENERATION, sinfo->generation)) goto nla_put_failure; @@ -7969,7 +8207,7 @@ static int nl80211_dump_station(struct sk_buff *skb, /* nl80211_prepare_wdev_dump acquired it in the successful case */ __acquire(&rdev->wiphy.mtx); - if (!wdev->netdev) { + if (!wdev->netdev && wdev->iftype != NL80211_IFTYPE_NAN) { err = -EINVAL; goto out_err; } @@ -7992,7 +8230,7 @@ static int nl80211_dump_station(struct sk_buff *skb, sinfo_alloc = true; } - err = rdev_dump_station(rdev, wdev->netdev, sta_idx, + err = rdev_dump_station(rdev, wdev, sta_idx, mac_addr, &sinfo); if (err == -ENOENT) break; @@ -8010,7 +8248,7 @@ static int nl80211_dump_station(struct sk_buff *skb, if (nl80211_send_station(skb, NL80211_CMD_NEW_STATION, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, - rdev, wdev->netdev, mac_addr, + rdev, wdev, mac_addr, &sinfo, false) < 0) goto out; @@ -8031,7 +8269,7 @@ static int nl80211_dump_station(struct sk_buff *skb, static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; struct station_info sinfo; struct sk_buff *msg; u8 *mac_addr = NULL; @@ -8039,6 +8277,9 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) memset(&sinfo, 0, sizeof(sinfo)); + if (!wdev->netdev) + return -EINVAL; + if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; @@ -8055,7 +8296,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) } } - err = rdev_get_station(rdev, dev, mac_addr, &sinfo); + err = rdev_get_station(rdev, wdev, mac_addr, &sinfo); if (err) { cfg80211_sinfo_release_content(&sinfo); return err; @@ -8072,7 +8313,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, info->snd_portid, info->snd_seq, 0, - rdev, dev, mac_addr, &sinfo, false) < 0) { + rdev, wdev, mac_addr, &sinfo, false) < 0) { nlmsg_free(msg); return -ENOBUFS; } @@ -8153,10 +8394,12 @@ int cfg80211_check_station_change(struct wiphy *wiphy, return -EINVAL; if (params->link_sta_params.supported_rates) return -EINVAL; - if (params->ext_capab || params->link_sta_params.ht_capa || - params->link_sta_params.vht_capa || - params->link_sta_params.he_capa || - params->link_sta_params.eht_capa || + if (statype != CFG80211_STA_NAN_MGMT && + (params->link_sta_params.ht_capa || + params->link_sta_params.vht_capa || + params->link_sta_params.he_capa)) + return -EINVAL; + if (params->ext_capab || params->link_sta_params.eht_capa || params->link_sta_params.uhr_capa) return -EINVAL; if (params->sta_flags_mask & BIT(NL80211_STA_FLAG_SPP_AMSDU)) @@ -8228,6 +8471,19 @@ int cfg80211_check_station_change(struct wiphy *wiphy, params->plink_action != NL80211_PLINK_ACTION_BLOCK) return -EINVAL; break; + case CFG80211_STA_NAN_MGMT: + if (params->sta_flags_mask & + ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_MFP))) + return -EINVAL; + break; + case CFG80211_STA_NAN_DATA: + if (params->sta_flags_mask & + ~(BIT(NL80211_STA_FLAG_AUTHORIZED) | + BIT(NL80211_STA_FLAG_MFP) | + BIT(NL80211_STA_FLAG_WME))) + return -EINVAL; + break; } /* @@ -8434,13 +8690,18 @@ static int nl80211_parse_sta_txpower_setting(struct genl_info *info, static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; + struct wireless_dev *wdev = info->user_ptr[1]; + struct net_device *dev = wdev->netdev; struct station_parameters params; u8 *mac_addr; int err; memset(¶ms, 0, sizeof(params)); + if (!dev && wdev->iftype != NL80211_IFTYPE_NAN && + wdev->iftype != NL80211_IFTYPE_NAN_DATA) + return -EINVAL; + if (!rdev->ops->change_station) return -EOPNOTSUPP; @@ -8513,7 +8774,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) nla_len(info->attrs[NL80211_ATTR_STA_EXT_CAPABILITY]); } - if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) + if (parse_station_flags(info, wdev->iftype, ¶ms)) return -EINVAL; if (info->attrs[NL80211_ATTR_STA_PLINK_ACTION]) @@ -8573,7 +8834,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(params.vlan)) return PTR_ERR(params.vlan); - switch (dev->ieee80211_ptr->iftype) { + switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: @@ -8581,6 +8842,8 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_STATION: case NL80211_IFTYPE_ADHOC: case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: break; default: err = -EOPNOTSUPP; @@ -8588,7 +8851,7 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info) } /* driver will call cfg80211_check_station_change() */ - err = rdev_change_station(rdev, dev, mac_addr, ¶ms); + err = rdev_change_station(rdev, wdev, mac_addr, ¶ms); out_put_vlan: dev_put(params.vlan); @@ -8600,8 +8863,8 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; int err; - struct net_device *dev = info->user_ptr[1]; - struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wireless_dev *wdev = info->user_ptr[1]; + struct net_device *dev = wdev->netdev; struct station_parameters params; u8 *mac_addr = NULL; u32 auth_assoc = BIT(NL80211_STA_FLAG_AUTHENTICATED) | @@ -8609,21 +8872,40 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) memset(¶ms, 0, sizeof(params)); + if (!dev && wdev->iftype != NL80211_IFTYPE_NAN) + return -EINVAL; + if (!rdev->ops->add_station) return -EOPNOTSUPP; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) - return -EINVAL; + if (wdev->iftype == NL80211_IFTYPE_NAN || + wdev->iftype == NL80211_IFTYPE_NAN_DATA) { + if (info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) + return -EINVAL; + if (wdev->iftype == NL80211_IFTYPE_NAN_DATA) { + if (!info->attrs[NL80211_ATTR_NAN_NMI_MAC]) + return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) - return -EINVAL; + /* Only NMI stations receive the HT/VHT/HE capabilities */ + if (info->attrs[NL80211_ATTR_HT_CAPABILITY] || + info->attrs[NL80211_ATTR_VHT_CAPABILITY] || + info->attrs[NL80211_ATTR_HE_CAPABILITY]) + return -EINVAL; + } + } else { + if (!info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) + return -EINVAL; - if (!info->attrs[NL80211_ATTR_STA_AID] && - !info->attrs[NL80211_ATTR_PEER_AID]) - return -EINVAL; + if (!info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) + return -EINVAL; + + if (!info->attrs[NL80211_ATTR_STA_AID] && + !info->attrs[NL80211_ATTR_PEER_AID]) + return -EINVAL; + } params.link_sta_params.link_id = nl80211_link_id_or_invalid(info->attrs); @@ -8639,12 +8921,16 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); } - params.link_sta_params.supported_rates = - nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); - params.link_sta_params.supported_rates_len = - nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); - params.listen_interval = - nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); + if (info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]) { + params.link_sta_params.supported_rates = + nla_data(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); + params.link_sta_params.supported_rates_len = + nla_len(info->attrs[NL80211_ATTR_STA_SUPPORTED_RATES]); + } + + if (info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]) + params.listen_interval = + nla_get_u16(info->attrs[NL80211_ATTR_STA_LISTEN_INTERVAL]); if (info->attrs[NL80211_ATTR_VLAN_ID]) params.vlan_id = nla_get_u16(info->attrs[NL80211_ATTR_VLAN_ID]); @@ -8658,12 +8944,12 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) * and is NOT supported for AP interface */ params.support_p2p_ps = - dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_GO; + wdev->iftype == NL80211_IFTYPE_P2P_GO; } if (info->attrs[NL80211_ATTR_PEER_AID]) params.aid = nla_get_u16(info->attrs[NL80211_ATTR_PEER_AID]); - else + else if (info->attrs[NL80211_ATTR_STA_AID]) params.aid = nla_get_u16(info->attrs[NL80211_ATTR_STA_AID]); if (info->attrs[NL80211_ATTR_STA_CAPABILITY]) { @@ -8764,7 +9050,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) if (err) return err; - if (parse_station_flags(info, dev->ieee80211_ptr->iftype, ¶ms)) + if (parse_station_flags(info, wdev->iftype, ¶ms)) return -EINVAL; /* HT/VHT requires QoS, but if we don't have that just ignore HT/VHT @@ -8784,6 +9070,16 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } + if (wdev->iftype == NL80211_IFTYPE_NAN || + wdev->iftype == NL80211_IFTYPE_NAN_DATA) { + if (params.sta_modify_mask & STATION_PARAM_APPLY_UAPSD) + return -EINVAL; + /* NAN NMI station must be added in associated or authorized state */ + if (!(params.sta_flags_set & (BIT(NL80211_STA_FLAG_ASSOCIATED) | + BIT(NL80211_STA_FLAG_AUTHENTICATED)))) + return -EINVAL; + } + /* Ensure that HT/VHT capabilities are not set for 6 GHz HE STA */ if (params.link_sta_params.he_6ghz_capa && (params.link_sta_params.ht_capa || params.link_sta_params.vht_capa)) @@ -8792,7 +9088,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) /* When you run into this, adjust the code below for the new flag */ BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 8); - switch (dev->ieee80211_ptr->iftype) { + switch (wdev->iftype) { case NL80211_IFTYPE_AP: case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_P2P_GO: @@ -8876,6 +9172,11 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) */ params.sta_flags_mask &= ~BIT(NL80211_STA_FLAG_AUTHORIZED); break; + case NL80211_IFTYPE_NAN: + break; + case NL80211_IFTYPE_NAN_DATA: + params.nmi_mac = nla_data(info->attrs[NL80211_ATTR_NAN_NMI_MAC]); + break; default: return -EOPNOTSUPP; } @@ -8901,7 +9202,7 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info) params.epp_peer = nla_get_flag(info->attrs[NL80211_ATTR_EPP_PEER]); - err = rdev_add_station(rdev, dev, mac_addr, ¶ms); + err = rdev_add_station(rdev, wdev, mac_addr, ¶ms); out: dev_put(params.vlan); return err; @@ -8910,13 +9211,16 @@ out: static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; - struct net_device *dev = info->user_ptr[1]; - struct wireless_dev *wdev = dev->ieee80211_ptr; + struct wireless_dev *wdev = info->user_ptr[1]; + struct net_device *dev = wdev->netdev; struct station_del_parameters params; int link_id = nl80211_link_id_or_invalid(info->attrs); memset(¶ms, 0, sizeof(params)); + if (!dev && wdev->iftype != NL80211_IFTYPE_NAN) + return -EINVAL; + if (info->attrs[NL80211_ATTR_MAC]) params.mac = nla_data(info->attrs[NL80211_ATTR_MAC]); @@ -8925,6 +9229,8 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_AP_VLAN: case NL80211_IFTYPE_MESH_POINT: case NL80211_IFTYPE_P2P_GO: + case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: /* always accept these */ break; case NL80211_IFTYPE_ADHOC: @@ -8972,7 +9278,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) params.link_id = link_id; - return rdev_del_station(rdev, dev, ¶ms); + return rdev_del_station(rdev, wdev, ¶ms); } static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, @@ -10656,7 +10962,7 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_SUPP_RATES], tmp) { - enum nl80211_band band = nla_type(attr); + int band = nla_type(attr); if (band < 0 || band >= NUM_NL80211_BANDS) { err = -EINVAL; @@ -11287,7 +11593,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, if (dfs_region == NL80211_DFS_UNSET) return -EINVAL; - err = nl80211_parse_chandef(rdev, info, &chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, &chandef); if (err) return err; @@ -11353,6 +11659,7 @@ static int nl80211_start_radar_detection(struct sk_buff *skb, wdev->links[link_id].cac_started = true; wdev->links[link_id].cac_start_time = jiffies; wdev->links[link_id].cac_time_ms = cac_time_ms; + cfg80211_set_cac_state(wiphy, &chandef, true); return 0; } @@ -11375,7 +11682,7 @@ static int nl80211_notify_radar_detection(struct sk_buff *skb, return -EINVAL; } - err = nl80211_parse_chandef(rdev, info, &chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, &chandef); if (err) { GENL_SET_ERR_MSG(info, "Unable to extract chandef info"); return err; @@ -11560,7 +11867,8 @@ static int nl80211_channel_switch(struct sk_buff *skb, struct genl_info *info) goto free; skip_beacons: - err = nl80211_parse_chandef(rdev, info, ¶ms.chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, + ¶ms.chandef); if (err) goto free; @@ -12075,7 +12383,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) auth_type == NL80211_AUTHTYPE_FILS_SK || auth_type == NL80211_AUTHTYPE_FILS_SK_PFS || auth_type == NL80211_AUTHTYPE_FILS_PK || - auth_type == NL80211_AUTHTYPE_EPPKE) && + auth_type == NL80211_AUTHTYPE_EPPKE || + auth_type == NL80211_AUTHTYPE_IEEE8021X) && !info->attrs[NL80211_ATTR_AUTH_DATA]) return -EINVAL; @@ -12084,7 +12393,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) auth_type != NL80211_AUTHTYPE_FILS_SK && auth_type != NL80211_AUTHTYPE_FILS_SK_PFS && auth_type != NL80211_AUTHTYPE_FILS_PK && - auth_type != NL80211_AUTHTYPE_EPPKE) + auth_type != NL80211_AUTHTYPE_EPPKE && + auth_type != NL80211_AUTHTYPE_IEEE8021X) return -EINVAL; req.auth_data = nla_data(info->attrs[NL80211_ATTR_AUTH_DATA]); req.auth_data_len = nla_len(info->attrs[NL80211_ATTR_AUTH_DATA]); @@ -12781,7 +13091,8 @@ static int nl80211_join_ibss(struct sk_buff *skb, struct genl_info *info) ibss.ie_len = nla_len(info->attrs[NL80211_ATTR_IE]); } - err = nl80211_parse_chandef(rdev, info, &ibss.chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, + &ibss.chandef); if (err) return err; @@ -13779,7 +14090,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, duration > rdev->wiphy.max_remain_on_channel_duration) return -EINVAL; - err = nl80211_parse_chandef(rdev, info, &chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, &chandef); if (err) return err; @@ -13896,6 +14207,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_P2P_DEVICE: break; case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: if (!wiphy_ext_feature_isset(wdev->wiphy, NL80211_EXT_FEATURE_SECURE_NAN) && !(wdev->wiphy->nan_capa.flags & @@ -13959,6 +14271,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) case NL80211_IFTYPE_P2P_GO: break; case NL80211_IFTYPE_NAN: + case NL80211_IFTYPE_NAN_DATA: if (!wiphy_ext_feature_isset(wdev->wiphy, NL80211_EXT_FEATURE_SECURE_NAN) && !(wdev->wiphy->nan_capa.flags & @@ -13995,7 +14308,8 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) */ chandef.chan = NULL; if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { - err = nl80211_parse_chandef(rdev, info, &chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, + &chandef); if (err) return err; } @@ -14227,7 +14541,7 @@ static int cfg80211_cqm_rssi_update(struct cfg80211_registered_device *rdev, mac_addr = wdev->links[0].client.current_bss->pub.bssid; - err = rdev_get_station(rdev, dev, mac_addr, &sinfo); + err = rdev_get_station(rdev, wdev, mac_addr, &sinfo); if (err) return err; @@ -14397,7 +14711,8 @@ static int nl80211_join_ocb(struct sk_buff *skb, struct genl_info *info) struct ocb_setup setup = {}; int err; - err = nl80211_parse_chandef(rdev, info, &setup.chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, + &setup.chandef); if (err) return err; @@ -14472,7 +14787,8 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) cfg.auto_open_plinks = false; if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) { - err = nl80211_parse_chandef(rdev, info, &setup.chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, + &setup.chandef); if (err) return err; } else { @@ -15463,13 +15779,14 @@ static int nl80211_register_unexpected_frame(struct sk_buff *skb, struct wireless_dev *wdev = dev->ieee80211_ptr; if (wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO) + wdev->iftype != NL80211_IFTYPE_P2P_GO && + wdev->iftype != NL80211_IFTYPE_NAN_DATA) return -EINVAL; - if (wdev->ap_unexpected_nlportid) + if (wdev->unexpected_nlportid) return -EBUSY; - wdev->ap_unexpected_nlportid = info->snd_portid; + wdev->unexpected_nlportid = info->snd_portid; return 0; } @@ -15725,9 +16042,16 @@ static int nl80211_parse_nan_conf(struct wiphy *wiphy, return err; changed |= CFG80211_NAN_CONF_CHANGED_CONFIG; - if (attrs[NL80211_NAN_CONF_CLUSTER_ID] && start) - conf->cluster_id = - nla_data(attrs[NL80211_NAN_CONF_CLUSTER_ID]); + if (attrs[NL80211_NAN_CONF_CLUSTER_ID] && start) { + ether_addr_copy(conf->cluster_id, + nla_data(attrs[NL80211_NAN_CONF_CLUSTER_ID])); + } else if (start) { + conf->cluster_id[0] = 0x50; + conf->cluster_id[1] = 0x6f; + conf->cluster_id[2] = 0x9a; + conf->cluster_id[3] = 0x01; + get_random_bytes(&conf->cluster_id[4], 2); + } if (attrs[NL80211_NAN_CONF_EXTRA_ATTRS]) { conf->extra_nan_attrs = @@ -15858,6 +16182,10 @@ static int nl80211_stop_nan(struct sk_buff *skb, struct genl_info *info) if (wdev->iftype != NL80211_IFTYPE_NAN) return -EOPNOTSUPP; + cfg80211_close_dependents(rdev, wdev); + + guard(wiphy)(&rdev->wiphy); + cfg80211_stop_nan(rdev, wdev); return 0; @@ -16357,6 +16685,482 @@ nla_put_failure: } EXPORT_SYMBOL(cfg80211_nan_func_terminated); +void cfg80211_nan_sched_update_done(struct wireless_dev *wdev, bool success, + gfp_t gfp) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + void *hdr; + + trace_cfg80211_nan_sched_update_done(wiphy, wdev, success); + + /* Can happen if we stopped NAN */ + if (!wdev->u.nan.sched_update_pending) + return; + + wdev->u.nan.sched_update_pending = false; + + if (!wdev->owner_nlportid) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NAN_SCHED_UPDATE_DONE); + if (!hdr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || + (success && + nla_put_flag(msg, NL80211_ATTR_NAN_SCHED_UPDATE_SUCCESS))) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_unicast(wiphy_net(wiphy), msg, wdev->owner_nlportid); + + return; + +nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_nan_sched_update_done); + +static int nl80211_parse_nan_channel(struct cfg80211_registered_device *rdev, + struct nlattr *channel, + struct genl_info *info, + struct cfg80211_nan_channel *nan_channels, + u8 index, bool local) +{ + struct nlattr **channel_parsed __free(kfree) = NULL; + struct cfg80211_chan_def chandef; + u8 n_rx_nss; + int ret; + + channel_parsed = kcalloc(NL80211_ATTR_MAX + 1, sizeof(*channel_parsed), + GFP_KERNEL); + if (!channel_parsed) + return -ENOMEM; + + ret = nla_parse_nested(channel_parsed, NL80211_ATTR_MAX, channel, NULL, + info->extack); + if (ret) + return ret; + + ret = nl80211_parse_chandef(rdev, info->extack, channel_parsed, + &chandef); + if (ret) + return ret; + + if (chandef.chan->band == NL80211_BAND_6GHZ) { + NL_SET_ERR_MSG(info->extack, + "6 GHz band is not supported"); + return -EOPNOTSUPP; + } + + if (!cfg80211_reg_can_beacon(&rdev->wiphy, &chandef, + NL80211_IFTYPE_NAN)) { + NL_SET_ERR_MSG_ATTR(info->extack, channel, + "Channel in NAN schedule is not allowed for NAN operation"); + return -EINVAL; + } + + if (local) { + for (int i = 0; i < index; i++) { + if (cfg80211_chandef_compatible(&nan_channels[i].chandef, + &chandef)) { + NL_SET_ERR_MSG_ATTR(info->extack, channel, + "Channels in NAN schedule must be mutually incompatible"); + return -EINVAL; + } + } + } + + if (!channel_parsed[NL80211_ATTR_NAN_CHANNEL_ENTRY]) { + NL_SET_ERR_MSG(info->extack, + "Missing NAN channel entry attribute"); + return -EINVAL; + } + + nan_channels[index].channel_entry = + nla_data(channel_parsed[NL80211_ATTR_NAN_CHANNEL_ENTRY]); + + if (!channel_parsed[NL80211_ATTR_NAN_RX_NSS]) { + NL_SET_ERR_MSG(info->extack, + "Missing NAN RX NSS attribute"); + return -EINVAL; + } + + nan_channels[index].rx_nss = + nla_get_u8(channel_parsed[NL80211_ATTR_NAN_RX_NSS]); + + n_rx_nss = u8_get_bits(rdev->wiphy.nan_capa.n_antennas, 0x03); + if ((local && nan_channels[index].rx_nss > n_rx_nss) || + !nan_channels[index].rx_nss) { + NL_SET_ERR_MSG_ATTR(info->extack, channel, + "Invalid RX NSS in NAN channel definition"); + return -EINVAL; + } + + nan_channels[index].chandef = chandef; + + return 0; +} + +static int +nl80211_parse_nan_schedule(struct genl_info *info, struct nlattr *slots_attr, + u8 schedule[CFG80211_NAN_SCHED_NUM_TIME_SLOTS], + u8 n_channels) +{ + if (WARN_ON(nla_len(slots_attr) != CFG80211_NAN_SCHED_NUM_TIME_SLOTS)) + return -EINVAL; + + memcpy(schedule, nla_data(slots_attr), nla_len(slots_attr)); + + for (int slot = 0; slot < CFG80211_NAN_SCHED_NUM_TIME_SLOTS; slot++) { + if (schedule[slot] != NL80211_NAN_SCHED_NOT_AVAIL_SLOT && + schedule[slot] >= n_channels) { + NL_SET_ERR_MSG_FMT(info->extack, + "Invalid time slot: slot %d refers to channel index %d, n_channels=%d", + slot, schedule[slot], n_channels); + return -EINVAL; + } + } + + return 0; +} + +static int +nl80211_parse_nan_peer_map(struct genl_info *info, struct nlattr *map_attr, + struct cfg80211_nan_peer_map *map, u8 n_channels) +{ + struct nlattr *tb[NL80211_NAN_PEER_MAP_ATTR_MAX + 1]; + int ret; + + ret = nla_parse_nested(tb, NL80211_NAN_PEER_MAP_ATTR_MAX, map_attr, + nl80211_nan_peer_map_policy, info->extack); + if (ret) + return ret; + + if (!tb[NL80211_NAN_PEER_MAP_ATTR_MAP_ID] || + !tb[NL80211_NAN_PEER_MAP_ATTR_TIME_SLOTS]) { + NL_SET_ERR_MSG(info->extack, + "Missing required peer map attributes"); + return -EINVAL; + } + + map->map_id = nla_get_u8(tb[NL80211_NAN_PEER_MAP_ATTR_MAP_ID]); + + /* Parse schedule */ + return nl80211_parse_nan_schedule(info, + tb[NL80211_NAN_PEER_MAP_ATTR_TIME_SLOTS], + map->schedule, n_channels); +} + +static int nl80211_nan_validate_map_pair(struct wiphy *wiphy, + struct genl_info *info, + const struct cfg80211_nan_peer_map *map1, + const struct cfg80211_nan_peer_map *map2, + struct cfg80211_nan_channel *nan_channels) +{ + /* Check for duplicate map_id */ + if (map1->map_id == map2->map_id) { + NL_SET_ERR_MSG_FMT(info->extack, "Duplicate map_id %u", + map1->map_id); + return -EINVAL; + } + + /* Check for compatible channels between maps */ + for (int i = 0; i < ARRAY_SIZE(map1->schedule); i++) { + if (map1->schedule[i] == NL80211_NAN_SCHED_NOT_AVAIL_SLOT) + continue; + + for (int j = 0; j < ARRAY_SIZE(map2->schedule); j++) { + u8 ch1 = map1->schedule[i]; + u8 ch2 = map2->schedule[j]; + + if (ch2 == NL80211_NAN_SCHED_NOT_AVAIL_SLOT) + continue; + + if (cfg80211_chandef_compatible(&nan_channels[ch1].chandef, + &nan_channels[ch2].chandef)) { + NL_SET_ERR_MSG_FMT(info->extack, + "Maps %u and %u have compatible channels %d and %d", + map1->map_id, map2->map_id, + ch1, ch2); + return -EINVAL; + } + } + } + + /* + * Check for conflicting time slots between maps. + * Only check for single-radio devices (n_radio <= 1) which cannot + * operate on multiple channels simultaneously. + */ + if (wiphy->n_radio > 1) + return 0; + + for (int i = 0; i < ARRAY_SIZE(map1->schedule); i++) { + if (map1->schedule[i] != NL80211_NAN_SCHED_NOT_AVAIL_SLOT && + map2->schedule[i] != NL80211_NAN_SCHED_NOT_AVAIL_SLOT) { + NL_SET_ERR_MSG_FMT(info->extack, + "Maps %u and %u both schedule slot %d", + map1->map_id, map2->map_id, i); + return -EINVAL; + } + } + + return 0; +} + +static int nl80211_nan_set_peer_sched(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_nan_channel *nan_channels __free(kfree) = NULL; + struct cfg80211_nan_peer_sched sched = {}; + struct wireless_dev *wdev = info->user_ptr[1]; + struct nlattr *map_attr, *channel; + int ret, n_maps = 0, n_channels = 0, i = 0, rem; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!info->attrs[NL80211_ATTR_MAC] || + !info->attrs[NL80211_ATTR_NAN_COMMITTED_DW]) { + NL_SET_ERR_MSG(info->extack, + "Required NAN peer schedule attributes are missing"); + return -EINVAL; + } + + /* First count how many channel attributes we got */ + nlmsg_for_each_attr_type(channel, NL80211_ATTR_NAN_CHANNEL, + info->nlhdr, GENL_HDRLEN, rem) + n_channels++; + + if (!((info->attrs[NL80211_ATTR_NAN_SEQ_ID] && + info->attrs[NL80211_ATTR_NAN_PEER_MAPS] && n_channels) || + ((!info->attrs[NL80211_ATTR_NAN_SEQ_ID] && + !info->attrs[NL80211_ATTR_NAN_PEER_MAPS] && !n_channels)))) { + NL_SET_ERR_MSG(info->extack, + "Either provide all of: seq id, channels and maps, or none"); + return -EINVAL; + } + + /* + * Limit the number of peer channels to: + * local_channels * 4 (possible BWs) * 2 (possible NSS values) + */ + if (n_channels && n_channels > wdev->u.nan.n_channels * 4 * 2) { + NL_SET_ERR_MSG_FMT(info->extack, + "Too many peer channels: %d (max %d)", + n_channels, + wdev->u.nan.n_channels * 4 * 2); + return -EINVAL; + } + + if (n_channels) { + nan_channels = kcalloc(n_channels, sizeof(*nan_channels), + GFP_KERNEL); + if (!nan_channels) + return -ENOMEM; + } + + /* Parse peer channels */ + nlmsg_for_each_attr_type(channel, NL80211_ATTR_NAN_CHANNEL, + info->nlhdr, GENL_HDRLEN, rem) { + bool compatible = false; + + ret = nl80211_parse_nan_channel(rdev, channel, info, + nan_channels, i, false); + if (ret) + return ret; + + /* Verify channel is compatible with at least one local channel */ + for (int j = 0; j < wdev->u.nan.n_channels; j++) { + if (cfg80211_chandef_compatible(&nan_channels[i].chandef, + &wdev->u.nan.chandefs[j])) { + compatible = true; + break; + } + } + if (!compatible) { + NL_SET_ERR_MSG_FMT(info->extack, + "Channel %d not compatible with any local channel", + i); + return -EINVAL; + } + i++; + } + + sched.n_channels = n_channels; + sched.nan_channels = nan_channels; + sched.peer_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + sched.seq_id = nla_get_u8_default(info->attrs[NL80211_ATTR_NAN_SEQ_ID], 0); + sched.committed_dw = nla_get_u16(info->attrs[NL80211_ATTR_NAN_COMMITTED_DW]); + sched.max_chan_switch = + nla_get_u16_default(info->attrs[NL80211_ATTR_NAN_MAX_CHAN_SWITCH_TIME], 0); + + if (info->attrs[NL80211_ATTR_NAN_ULW]) { + sched.ulw_size = nla_len(info->attrs[NL80211_ATTR_NAN_ULW]); + sched.init_ulw = nla_data(info->attrs[NL80211_ATTR_NAN_ULW]); + } + + /* Initialize all maps as invalid */ + for (int j = 0; j < ARRAY_SIZE(sched.maps); j++) + sched.maps[j].map_id = CFG80211_NAN_INVALID_MAP_ID; + + if (info->attrs[NL80211_ATTR_NAN_PEER_MAPS]) { + /* Parse each map */ + nla_for_each_nested(map_attr, info->attrs[NL80211_ATTR_NAN_PEER_MAPS], + rem) { + if (n_maps >= ARRAY_SIZE(sched.maps)) { + NL_SET_ERR_MSG(info->extack, "Too many peer maps"); + return -EINVAL; + } + + ret = nl80211_parse_nan_peer_map(info, map_attr, + &sched.maps[n_maps], + n_channels); + if (ret) + return ret; + + /* Validate against previous maps */ + for (int j = 0; j < n_maps; j++) { + ret = nl80211_nan_validate_map_pair(&rdev->wiphy, info, + &sched.maps[j], + &sched.maps[n_maps], + nan_channels); + if (ret) + return ret; + } + + n_maps++; + } + } + + /* Verify each channel is scheduled at least once */ + for (int ch = 0; ch < n_channels; ch++) { + bool scheduled = false; + + for (int m = 0; m < n_maps && !scheduled; m++) { + for (int s = 0; s < ARRAY_SIZE(sched.maps[m].schedule); s++) { + if (sched.maps[m].schedule[s] == ch) { + scheduled = true; + break; + } + } + } + if (!scheduled) { + NL_SET_ERR_MSG_FMT(info->extack, + "Channel %d is not scheduled in any map", + ch); + return -EINVAL; + } + } + + return rdev_nan_set_peer_sched(rdev, wdev, &sched); +} + +static bool nl80211_nan_is_sched_empty(struct cfg80211_nan_local_sched *sched) +{ + if (!sched->n_channels) + return true; + + for (int i = 0; i < ARRAY_SIZE(sched->schedule); i++) { + if (sched->schedule[i] != NL80211_NAN_SCHED_NOT_AVAIL_SLOT) + return false; + } + + return true; +} + +static int nl80211_nan_set_local_sched(struct sk_buff *skb, + struct genl_info *info) +{ + struct cfg80211_registered_device *rdev = info->user_ptr[0]; + struct cfg80211_nan_local_sched *sched __free(kfree) = NULL; + struct wireless_dev *wdev = info->user_ptr[1]; + int rem, i = 0, n_channels = 0, ret; + struct nlattr *channel; + bool sched_empty; + + if (wdev->iftype != NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + + if (!wdev_running(wdev)) + return -ENOTCONN; + + if (!info->attrs[NL80211_ATTR_NAN_TIME_SLOTS]) + return -EINVAL; + + /* First count how many channel attributes we got */ + nlmsg_for_each_attr_type(channel, NL80211_ATTR_NAN_CHANNEL, + info->nlhdr, GENL_HDRLEN, rem) + n_channels++; + + sched = kzalloc(struct_size(sched, nan_channels, n_channels), + GFP_KERNEL); + if (!sched) + return -ENOMEM; + + sched->n_channels = n_channels; + + nlmsg_for_each_attr_type(channel, NL80211_ATTR_NAN_CHANNEL, + info->nlhdr, GENL_HDRLEN, rem) { + ret = nl80211_parse_nan_channel(rdev, channel, info, + sched->nan_channels, i, true); + + if (ret) + return ret; + i++; + } + + /* Parse and validate schedule */ + ret = nl80211_parse_nan_schedule(info, + info->attrs[NL80211_ATTR_NAN_TIME_SLOTS], + sched->schedule, sched->n_channels); + if (ret) + return ret; + + sched_empty = nl80211_nan_is_sched_empty(sched); + + sched->deferred = + nla_get_flag(info->attrs[NL80211_ATTR_NAN_SCHED_DEFERRED]); + + if (sched_empty) { + if (sched->deferred) { + NL_SET_ERR_MSG(info->extack, + "Schedule cannot be deferred if all time slots are unavailable"); + return -EINVAL; + } + + if (info->attrs[NL80211_ATTR_NAN_AVAIL_BLOB]) { + NL_SET_ERR_MSG(info->extack, + "NAN Availability blob must be empty if all time slots are unavailable"); + return -EINVAL; + } + } else { + if (!info->attrs[NL80211_ATTR_NAN_AVAIL_BLOB]) { + NL_SET_ERR_MSG(info->extack, + "NAN Availability blob attribute is required"); + return -EINVAL; + } + + sched->nan_avail_blob = + nla_data(info->attrs[NL80211_ATTR_NAN_AVAIL_BLOB]); + sched->nan_avail_blob_len = + nla_len(info->attrs[NL80211_ATTR_NAN_AVAIL_BLOB]); + } + + return cfg80211_nan_set_local_schedule(rdev, wdev, sched); +} + static int nl80211_get_protocol_features(struct sk_buff *skb, struct genl_info *info) { @@ -16947,7 +17751,7 @@ static int nl80211_tdls_channel_switch(struct sk_buff *skb, !info->attrs[NL80211_ATTR_OPER_CLASS]) return -EINVAL; - err = nl80211_parse_chandef(rdev, info, &chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, &chandef); if (err) return err; @@ -17326,7 +18130,7 @@ static int nl80211_probe_mesh_link(struct sk_buff *skb, struct genl_info *info) !ether_addr_equal(buf + ETH_ALEN, dev->dev_addr)) return -EINVAL; - err = rdev_get_station(rdev, dev, dest, &sinfo); + err = rdev_get_station(rdev, wdev, dest, &sinfo); if (err) return err; @@ -18021,6 +18825,9 @@ nl80211_epcs_cfg(struct sk_buff *skb, struct genl_info *info) NL80211_FLAG_CLEAR_SKB) \ SELECTOR(__sel, WDEV_UP, \ NL80211_FLAG_NEED_WDEV_UP) \ + SELECTOR(__sel, WDEV_UP_CLEAR, \ + NL80211_FLAG_NEED_WDEV_UP | \ + NL80211_FLAG_CLEAR_SKB) \ SELECTOR(__sel, WDEV_UP_LINK, \ NL80211_FLAG_NEED_WDEV_UP | \ NL80211_FLAG_MLO_VALID_LINK_ID) \ @@ -18029,7 +18836,11 @@ nl80211_epcs_cfg(struct sk_buff *skb, struct genl_info *info) NL80211_FLAG_NEED_RTNL) \ SELECTOR(__sel, WIPHY_CLEAR, \ NL80211_FLAG_NEED_WIPHY | \ - NL80211_FLAG_CLEAR_SKB) + NL80211_FLAG_CLEAR_SKB) \ + SELECTOR(__sel, WDEV_UP_RTNL_NOMTX, \ + NL80211_FLAG_NEED_WDEV_UP | \ + NL80211_FLAG_NO_WIPHY_MTX | \ + NL80211_FLAG_NEED_RTNL) enum nl80211_internal_flags_selector { #define SELECTOR(_, name, value) NL80211_IFL_SEL_##name, @@ -18353,7 +19164,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_key, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_SET_KEY, @@ -18361,7 +19172,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_set_key, .flags = GENL_UNS_ADMIN_PERM, /* cannot use NL80211_FLAG_MLO_VALID_LINK_ID, depends on key */ - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { @@ -18369,7 +19180,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_key, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP | + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP | NL80211_FLAG_CLEAR_SKB), }, { @@ -18377,7 +19188,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_del_key, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_SET_BEACON, @@ -18408,21 +19219,21 @@ static const struct genl_small_ops nl80211_small_ops[] = { .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_get_station, .dumpit = nl80211_dump_station, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV), }, { .cmd = NL80211_CMD_SET_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_set_station, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_NEW_STATION, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, .doit = nl80211_new_station, .flags = GENL_UNS_ADMIN_PERM, - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_DEL_STATION, @@ -18433,7 +19244,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { * whether MAC address is passed or not. If MAC address is * passed, then even during MLO, link ID is not required. */ - .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), }, { .cmd = NL80211_CMD_GET_MPATH, @@ -18866,6 +19677,7 @@ static const struct genl_small_ops nl80211_small_ops[] = { .doit = nl80211_stop_nan, .flags = GENL_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP | + NL80211_FLAG_NO_WIPHY_MTX | NL80211_FLAG_NEED_RTNL), }, { @@ -19160,6 +19972,18 @@ static const struct genl_small_ops nl80211_small_ops[] = { .flags = GENL_UNS_ADMIN_PERM, .internal_flags = IFLAGS(NL80211_FLAG_NEED_NETDEV_UP), }, + { + .cmd = NL80211_CMD_NAN_SET_LOCAL_SCHED, + .doit = nl80211_nan_set_local_sched, + .flags = GENL_ADMIN_PERM, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), + }, + { + .cmd = NL80211_CMD_NAN_SET_PEER_SCHED, + .doit = nl80211_nan_set_peer_sched, + .flags = GENL_ADMIN_PERM, + .internal_flags = IFLAGS(NL80211_FLAG_NEED_WDEV_UP), + }, }; static struct genl_family nl80211_fam __ro_after_init = { @@ -20364,21 +21188,21 @@ void cfg80211_tx_mgmt_expired(struct wireless_dev *wdev, u64 cookie, } EXPORT_SYMBOL(cfg80211_tx_mgmt_expired); -void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, +void cfg80211_new_sta(struct wireless_dev *wdev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp) { - struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; - trace_cfg80211_new_sta(dev, mac_addr, sinfo); + trace_cfg80211_new_sta(wdev, mac_addr, sinfo); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) return; if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, 0, 0, 0, - rdev, dev, mac_addr, sinfo, false) < 0) { + rdev, wdev, mac_addr, sinfo, false) < 0) { nlmsg_free(msg); return; } @@ -20388,10 +21212,10 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, } EXPORT_SYMBOL(cfg80211_new_sta); -void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr, +void cfg80211_del_sta_sinfo(struct wireless_dev *wdev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp) { - struct wiphy *wiphy = dev->ieee80211_ptr->wiphy; + struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); struct sk_buff *msg; struct station_info empty_sinfo = {}; @@ -20399,7 +21223,7 @@ void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr, if (!sinfo) sinfo = &empty_sinfo; - trace_cfg80211_del_sta(dev, mac_addr); + trace_cfg80211_del_sta(wdev, mac_addr); msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); if (!msg) { @@ -20408,7 +21232,7 @@ void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr, } if (nl80211_send_station(msg, NL80211_CMD_DEL_STATION, 0, 0, 0, - rdev, dev, mac_addr, sinfo, false) < 0) { + rdev, wdev, mac_addr, sinfo, false) < 0) { nlmsg_free(msg); return; } @@ -20460,7 +21284,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; - u32 nlportid = READ_ONCE(wdev->ap_unexpected_nlportid); + u32 nlportid = READ_ONCE(wdev->unexpected_nlportid); if (!nlportid) return false; @@ -20500,7 +21324,8 @@ bool cfg80211_rx_spurious_frame(struct net_device *dev, const u8 *addr, trace_cfg80211_rx_spurious_frame(dev, addr, link_id); if (WARN_ON(wdev->iftype != NL80211_IFTYPE_AP && - wdev->iftype != NL80211_IFTYPE_P2P_GO)) { + wdev->iftype != NL80211_IFTYPE_P2P_GO && + wdev->iftype != NL80211_IFTYPE_NAN_DATA)) { trace_cfg80211_return_bool(false); return false; } @@ -21120,6 +21945,46 @@ void cfg80211_ch_switch_notify(struct net_device *dev, } EXPORT_SYMBOL(cfg80211_ch_switch_notify); +void cfg80211_incumbent_signal_notify(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + u32 signal_interference_bitmap, + gfp_t gfp) +{ + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + void *hdr; + + trace_cfg80211_incumbent_signal_notify(wiphy, chandef, signal_interference_bitmap); + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_INCUMBENT_SIGNAL_DETECT); + if (!hdr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx)) + goto nla_put_failure; + + if (nl80211_send_chandef(msg, chandef)) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_INCUMBENT_SIGNAL_INTERFERENCE_BITMAP, + signal_interference_bitmap)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, + NL80211_MCGRP_MLME, gfp); + return; + +nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_incumbent_signal_notify); + void cfg80211_ch_switch_started_notify(struct net_device *dev, struct cfg80211_chan_def *chandef, unsigned int link_id, u8 count, @@ -21222,6 +22087,13 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev, goto nla_put_failure; } + if (rdev->background_radar_wdev && + cfg80211_chandef_identical(&rdev->background_radar_chandef, + chandef)) { + if (nla_put_flag(msg, NL80211_ATTR_RADAR_BACKGROUND)) + goto nla_put_failure; + } + if (nla_put_u32(msg, NL80211_ATTR_RADAR_EVENT, event)) goto nla_put_failure; @@ -22023,6 +22895,97 @@ void cfg80211_nan_cluster_joined(struct wireless_dev *wdev, } EXPORT_SYMBOL(cfg80211_nan_cluster_joined); +void cfg80211_nan_ulw_update(struct wireless_dev *wdev, + const u8 *ulw, size_t ulw_len, gfp_t gfp) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + void *hdr; + + trace_cfg80211_nan_ulw_update(wiphy, wdev, ulw, ulw_len); + + if (!wdev->owner_nlportid) + return; + + /* 32 for the wiphy idx, 64 for the wdev id, 100 for padding */ + msg = nlmsg_new(nla_total_size(sizeof(u32)) + + nla_total_size(ulw_len) + + nla_total_size(sizeof(u64)) + 100, + gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NAN_ULW_UPDATE); + if (!hdr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD) || + (ulw && ulw_len && + nla_put(msg, NL80211_ATTR_NAN_ULW, ulw_len, ulw))) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_unicast(wiphy_net(wiphy), msg, wdev->owner_nlportid); + + return; + + nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_nan_ulw_update); + +void cfg80211_nan_channel_evac(struct wireless_dev *wdev, + const struct cfg80211_chan_def *chandef, + gfp_t gfp) +{ + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); + struct sk_buff *msg; + struct nlattr *chan_attr; + void *hdr; + + trace_cfg80211_nan_channel_evac(wiphy, wdev, chandef); + + if (!wdev->owner_nlportid) + return; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NAN_CHANNEL_EVAC); + if (!hdr) + goto nla_put_failure; + + if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) || + nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev), + NL80211_ATTR_PAD)) + goto nla_put_failure; + + chan_attr = nla_nest_start(msg, NL80211_ATTR_NAN_CHANNEL); + if (!chan_attr) + goto nla_put_failure; + + if (nl80211_send_chandef(msg, chandef)) + goto nla_put_failure; + + nla_nest_end(msg, chan_attr); + + genlmsg_end(msg, hdr); + + genlmsg_unicast(wiphy_net(wiphy), msg, wdev->owner_nlportid); + + return; + + nla_put_failure: + nlmsg_free(msg); +} +EXPORT_SYMBOL(cfg80211_nan_channel_evac); + /* initialisation/exit functions */ int __init nl80211_init(void) diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 5e25782af1e0..048ba92c3e42 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * Portions of this file - * Copyright (C) 2018, 2020-2024 Intel Corporation + * Copyright (C) 2018, 2020-2025 Intel Corporation */ #ifndef __NET_WIRELESS_NL80211_H #define __NET_WIRELESS_NL80211_H @@ -23,7 +23,8 @@ static inline u64 wdev_id(struct wireless_dev *wdev) } int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, - struct genl_info *info, + struct netlink_ext_ack *extack, + struct nlattr **attrs, struct cfg80211_chan_def *chandef); int nl80211_parse_random_mac(struct nlattr **attrs, u8 *mac_addr, u8 *mac_addr_mask); diff --git a/net/wireless/of.c b/net/wireless/of.c index 60a864465331..99acbea3beee 100644 --- a/net/wireless/of.c +++ b/net/wireless/of.c @@ -1,17 +1,6 @@ +// SPDX-License-Identifier: ISC /* * Copyright (C) 2017 Rafał Miłecki <rafal@milecki.pl> - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ #include <linux/of.h> diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c index 50e8e19aa366..4c8ea0583f94 100644 --- a/net/wireless/pmsr.c +++ b/net/wireless/pmsr.c @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * Copyright (C) 2018 - 2021, 2023 - 2024 Intel Corporation + * Copyright (C) 2018 - 2021, 2023 - 2026 Intel Corporation */ #include <net/cfg80211.h> #include "core.h" @@ -237,7 +237,8 @@ static int pmsr_parse_peer(struct cfg80211_registered_device *rdev, if (err) return err; - err = nl80211_parse_chandef(rdev, info, &out->chandef); + err = nl80211_parse_chandef(rdev, info->extack, info->attrs, + &out->chandef); if (err) return err; diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c index c85eaa583a46..df29048a0449 100644 --- a/net/wireless/radiotap.c +++ b/net/wireless/radiotap.c @@ -1,17 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-2-Clause /* * Radiotap parser * * Copyright 2007 Andy Green <andy@warmcat.com> * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Alternatively, this software may be distributed under the terms of BSD - * license. - * - * See COPYING for more details. */ #include <linux/kernel.h> diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index ac6884bacf3f..bba239a068f6 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018, 2021-2025 Intel Corporation + * Copyright (C) 2018, 2021-2026 Intel Corporation */ #ifndef __CFG80211_RDEV_OPS #define __CFG80211_RDEV_OPS @@ -77,42 +77,42 @@ rdev_change_virtual_intf(struct cfg80211_registered_device *rdev, } static inline int rdev_add_key(struct cfg80211_registered_device *rdev, - struct net_device *netdev, int link_id, + struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params) { int ret; - trace_rdev_add_key(&rdev->wiphy, netdev, link_id, key_index, pairwise, + trace_rdev_add_key(&rdev->wiphy, wdev, link_id, key_index, pairwise, mac_addr, params->mode); - ret = rdev->ops->add_key(&rdev->wiphy, netdev, link_id, key_index, + ret = rdev->ops->add_key(&rdev->wiphy, wdev, link_id, key_index, pairwise, mac_addr, params); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int -rdev_get_key(struct cfg80211_registered_device *rdev, struct net_device *netdev, +rdev_get_key(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, void *cookie, void (*callback)(void *cookie, struct key_params*)) { int ret; - trace_rdev_get_key(&rdev->wiphy, netdev, link_id, key_index, pairwise, + trace_rdev_get_key(&rdev->wiphy, wdev, link_id, key_index, pairwise, mac_addr); - ret = rdev->ops->get_key(&rdev->wiphy, netdev, link_id, key_index, + ret = rdev->ops->get_key(&rdev->wiphy, wdev, link_id, key_index, pairwise, mac_addr, cookie, callback); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_del_key(struct cfg80211_registered_device *rdev, - struct net_device *netdev, int link_id, + struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr) { int ret; - trace_rdev_del_key(&rdev->wiphy, netdev, link_id, key_index, pairwise, + trace_rdev_del_key(&rdev->wiphy, wdev, link_id, key_index, pairwise, mac_addr); - ret = rdev->ops->del_key(&rdev->wiphy, netdev, link_id, key_index, + ret = rdev->ops->del_key(&rdev->wiphy, wdev, link_id, key_index, pairwise, mac_addr); trace_rdev_return_int(&rdev->wiphy, ret); return ret; @@ -134,12 +134,12 @@ rdev_set_default_key(struct cfg80211_registered_device *rdev, static inline int rdev_set_default_mgmt_key(struct cfg80211_registered_device *rdev, - struct net_device *netdev, int link_id, u8 key_index) + struct wireless_dev *wdev, int link_id, u8 key_index) { int ret; - trace_rdev_set_default_mgmt_key(&rdev->wiphy, netdev, link_id, + trace_rdev_set_default_mgmt_key(&rdev->wiphy, wdev, link_id, key_index); - ret = rdev->ops->set_default_mgmt_key(&rdev->wiphy, netdev, link_id, + ret = rdev->ops->set_default_mgmt_key(&rdev->wiphy, wdev, link_id, key_index); trace_rdev_return_int(&rdev->wiphy, ret); return ret; @@ -147,14 +147,14 @@ rdev_set_default_mgmt_key(struct cfg80211_registered_device *rdev, static inline int rdev_set_default_beacon_key(struct cfg80211_registered_device *rdev, - struct net_device *netdev, int link_id, + struct wireless_dev *wdev, int link_id, u8 key_index) { int ret; - trace_rdev_set_default_beacon_key(&rdev->wiphy, netdev, link_id, + trace_rdev_set_default_beacon_key(&rdev->wiphy, wdev, link_id, key_index); - ret = rdev->ops->set_default_beacon_key(&rdev->wiphy, netdev, link_id, + ret = rdev->ops->set_default_beacon_key(&rdev->wiphy, wdev, link_id, key_index); trace_rdev_return_int(&rdev->wiphy, ret); return ret; @@ -193,56 +193,56 @@ static inline int rdev_stop_ap(struct cfg80211_registered_device *rdev, } static inline int rdev_add_station(struct cfg80211_registered_device *rdev, - struct net_device *dev, u8 *mac, + struct wireless_dev *wdev, u8 *mac, struct station_parameters *params) { int ret; - trace_rdev_add_station(&rdev->wiphy, dev, mac, params); - ret = rdev->ops->add_station(&rdev->wiphy, dev, mac, params); + trace_rdev_add_station(&rdev->wiphy, wdev, mac, params); + ret = rdev->ops->add_station(&rdev->wiphy, wdev, mac, params); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_del_station(struct cfg80211_registered_device *rdev, - struct net_device *dev, + struct wireless_dev *wdev, struct station_del_parameters *params) { int ret; - trace_rdev_del_station(&rdev->wiphy, dev, params); - ret = rdev->ops->del_station(&rdev->wiphy, dev, params); + trace_rdev_del_station(&rdev->wiphy, wdev, params); + ret = rdev->ops->del_station(&rdev->wiphy, wdev, params); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_change_station(struct cfg80211_registered_device *rdev, - struct net_device *dev, u8 *mac, + struct wireless_dev *wdev, u8 *mac, struct station_parameters *params) { int ret; - trace_rdev_change_station(&rdev->wiphy, dev, mac, params); - ret = rdev->ops->change_station(&rdev->wiphy, dev, mac, params); + trace_rdev_change_station(&rdev->wiphy, wdev, mac, params); + ret = rdev->ops->change_station(&rdev->wiphy, wdev, mac, params); trace_rdev_return_int(&rdev->wiphy, ret); return ret; } static inline int rdev_get_station(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *mac, + struct wireless_dev *wdev, const u8 *mac, struct station_info *sinfo) { int ret; - trace_rdev_get_station(&rdev->wiphy, dev, mac); - ret = rdev->ops->get_station(&rdev->wiphy, dev, mac, sinfo); + trace_rdev_get_station(&rdev->wiphy, wdev, mac); + ret = rdev->ops->get_station(&rdev->wiphy, wdev, mac, sinfo); trace_rdev_return_int_station_info(&rdev->wiphy, ret, sinfo); return ret; } static inline int rdev_dump_station(struct cfg80211_registered_device *rdev, - struct net_device *dev, int idx, u8 *mac, + struct wireless_dev *wdev, int idx, u8 *mac, struct station_info *sinfo) { int ret; - trace_rdev_dump_station(&rdev->wiphy, dev, idx, mac); - ret = rdev->ops->dump_station(&rdev->wiphy, dev, idx, mac, sinfo); + trace_rdev_dump_station(&rdev->wiphy, wdev, idx, mac); + ret = rdev->ops->dump_station(&rdev->wiphy, wdev, idx, mac, sinfo); trace_rdev_return_int_station_info(&rdev->wiphy, ret, sinfo); return ret; } @@ -1060,6 +1060,38 @@ rdev_nan_change_conf(struct cfg80211_registered_device *rdev, return ret; } +static inline int +rdev_nan_set_local_sched(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_local_sched *sched) +{ + int ret; + + trace_rdev_nan_set_local_sched(&rdev->wiphy, wdev, sched); + if (rdev->ops->nan_set_local_sched) + ret = rdev->ops->nan_set_local_sched(&rdev->wiphy, wdev, sched); + else + ret = -EOPNOTSUPP; + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + +static inline int +rdev_nan_set_peer_sched(struct cfg80211_registered_device *rdev, + struct wireless_dev *wdev, + struct cfg80211_nan_peer_sched *sched) +{ + int ret; + + trace_rdev_nan_set_peer_sched(&rdev->wiphy, wdev, sched); + if (rdev->ops->nan_set_peer_sched) + ret = rdev->ops->nan_set_peer_sched(&rdev->wiphy, wdev, sched); + else + ret = -EOPNOTSUPP; + trace_rdev_return_int(&rdev->wiphy, ret); + return ret; +} + static inline int rdev_set_mac_acl(struct cfg80211_registered_device *rdev, struct net_device *dev, struct cfg80211_acl_data *params) diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 1c5c38d18feb..5db2121c0b57 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: ISC /* * Copyright 2002-2005, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. @@ -6,18 +7,6 @@ * Copyright 2013-2014 Intel Mobile Communications GmbH * Copyright 2017 Intel Deutschland GmbH * Copyright (C) 2018 - 2026 Intel Corporation - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ @@ -2359,6 +2348,18 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) if (!wdev->netdev || !netif_running(wdev->netdev)) return true; + /* NAN doesn't have links, handle it separately */ + if (iftype == NL80211_IFTYPE_NAN) { + for (int i = 0; i < wdev->u.nan.n_channels; i++) { + ret = cfg80211_reg_can_beacon(wiphy, + &wdev->u.nan.chandefs[i], + NL80211_IFTYPE_NAN); + if (!ret) + return false; + } + return true; + } + for (link = 0; link < ARRAY_SIZE(wdev->links); link++) { struct ieee80211_channel *chan; @@ -2408,9 +2409,9 @@ static bool reg_wdev_chan_valid(struct wiphy *wiphy, struct wireless_dev *wdev) continue; chandef = wdev->u.ocb.chandef; break; - case NL80211_IFTYPE_NAN: - /* we have no info, but NAN is also pretty universal */ - continue; + case NL80211_IFTYPE_NAN_DATA: + /* NAN channels are checked in NL80211_IFTYPE_NAN interface */ + break; default: /* others not implemented for now */ WARN_ON_ONCE(1); @@ -2447,11 +2448,14 @@ static void reg_leave_invalid_chans(struct wiphy *wiphy) struct wireless_dev *wdev; struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy); - guard(wiphy)(wiphy); + list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) { + bool valid; - list_for_each_entry(wdev, &rdev->wiphy.wdev_list, list) - if (!reg_wdev_chan_valid(wiphy, wdev)) + scoped_guard(wiphy, wiphy) + valid = reg_wdev_chan_valid(wiphy, wdev); + if (!valid) cfg80211_leave(rdev, wdev, -1); + } } static void reg_check_chans_work(struct work_struct *work) diff --git a/net/wireless/reg.h b/net/wireless/reg.h index e1b211c4f75c..fc31c5f9a61a 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: ISC */ #ifndef __NET_WIRELESS_REG_H #define __NET_WIRELESS_REG_H @@ -6,18 +7,6 @@ /* * Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com> * Copyright (C) 2019, 2023 Intel Corporation - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ enum ieee80211_regd_source { diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 5b21432450d5..86e2ccaa678c 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -5,7 +5,7 @@ * (for nl80211's connect() and wext) * * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2009, 2020, 2022-2025 Intel Corporation. All rights reserved. + * Copyright (C) 2009, 2020, 2022-2026 Intel Corporation. All rights reserved. * Copyright 2017 Intel Deutschland GmbH */ @@ -1386,7 +1386,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT)) max_key_idx = 7; for (i = 0; i <= max_key_idx; i++) - rdev_del_key(rdev, dev, -1, i, false, NULL); + rdev_del_key(rdev, wdev, -1, i, false, NULL); } rdev_set_qos_map(rdev, dev, NULL); diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c index 0b9abe70d39d..3ec25d3f004e 100644 --- a/net/wireless/sysfs.c +++ b/net/wireless/sysfs.c @@ -99,26 +99,32 @@ static int wiphy_suspend(struct device *dev) rdev->suspend_at = ktime_get_boottime_seconds(); rtnl_lock(); - wiphy_lock(&rdev->wiphy); - if (rdev->wiphy.registered) { - if (!rdev->wiphy.wowlan_config) { - cfg80211_leave_all(rdev); - cfg80211_process_rdev_events(rdev); + if (!rdev->wiphy.registered) + goto out_unlock_rtnl; + + if (rdev->wiphy.wowlan_config) { + scoped_guard(wiphy, &rdev->wiphy) { + cfg80211_process_wiphy_works(rdev, NULL); + if (rdev->ops->suspend) + ret = rdev_suspend(rdev, + rdev->wiphy.wowlan_config); + if (ret <= 0) + goto out_unlock_rtnl; } + } + + /* Driver refused to configure wowlan (ret = 1) or no wowlan */ + + cfg80211_leave_all(rdev); + scoped_guard(wiphy, &rdev->wiphy) { + cfg80211_process_rdev_events(rdev); cfg80211_process_wiphy_works(rdev, NULL); if (rdev->ops->suspend) - ret = rdev_suspend(rdev, rdev->wiphy.wowlan_config); - if (ret == 1) { - /* Driver refuse to configure wowlan */ - cfg80211_leave_all(rdev); - cfg80211_process_rdev_events(rdev); - cfg80211_process_wiphy_works(rdev, NULL); ret = rdev_suspend(rdev, NULL); - } - if (ret == 0) - rdev->suspended = true; } - wiphy_unlock(&rdev->wiphy); +out_unlock_rtnl: + if (ret == 0) + rdev->suspended = true; rtnl_unlock(); return ret; diff --git a/net/wireless/trace.c b/net/wireless/trace.c index 95f997fad755..7cb93acf1a8f 100644 --- a/net/wireless/trace.c +++ b/net/wireless/trace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/module.h> #ifndef __CHECKER__ diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 643ccf4f0227..eb5bedf9c92a 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -2,7 +2,7 @@ /* * Portions of this file * Copyright(c) 2016-2017 Intel Deutschland GmbH - * Copyright (C) 2018, 2020-2025 Intel Corporation + * Copyright (C) 2018, 2020-2026 Intel Corporation */ #undef TRACE_SYSTEM #define TRACE_SYSTEM cfg80211 @@ -546,12 +546,12 @@ TRACE_EVENT(rdev_change_virtual_intf, ); DECLARE_EVENT_CLASS(key_handle, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr), - TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr), + TP_ARGS(wiphy, wdev, link_id, key_index, pairwise, mac_addr), TP_STRUCT__entry( WIPHY_ENTRY - NETDEV_ENTRY + WDEV_ENTRY MAC_ENTRY(mac_addr) __field(int, link_id) __field(u8, key_index) @@ -559,38 +559,38 @@ DECLARE_EVENT_CLASS(key_handle, ), TP_fast_assign( WIPHY_ASSIGN; - NETDEV_ASSIGN; + WDEV_ASSIGN; MAC_ASSIGN(mac_addr, mac_addr); __entry->link_id = link_id; __entry->key_index = key_index; __entry->pairwise = pairwise; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %d, " "key_index: %u, pairwise: %s, mac addr: %pM", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id, + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id, __entry->key_index, BOOL_TO_STR(__entry->pairwise), __entry->mac_addr) ); DEFINE_EVENT(key_handle, rdev_get_key, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr), - TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr) + TP_ARGS(wiphy, wdev, link_id, key_index, pairwise, mac_addr) ); DEFINE_EVENT(key_handle, rdev_del_key, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr), - TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr) + TP_ARGS(wiphy, wdev, link_id, key_index, pairwise, mac_addr) ); TRACE_EVENT(rdev_add_key, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index, bool pairwise, const u8 *mac_addr, u8 mode), - TP_ARGS(wiphy, netdev, link_id, key_index, pairwise, mac_addr, mode), + TP_ARGS(wiphy, wdev, link_id, key_index, pairwise, mac_addr, mode), TP_STRUCT__entry( WIPHY_ENTRY - NETDEV_ENTRY + WDEV_ENTRY MAC_ENTRY(mac_addr) __field(int, link_id) __field(u8, key_index) @@ -599,17 +599,17 @@ TRACE_EVENT(rdev_add_key, ), TP_fast_assign( WIPHY_ASSIGN; - NETDEV_ASSIGN; + WDEV_ASSIGN; MAC_ASSIGN(mac_addr, mac_addr); __entry->link_id = link_id; __entry->key_index = key_index; __entry->pairwise = pairwise; __entry->mode = mode; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %d, " "key_index: %u, mode: %u, pairwise: %s, " "mac addr: %pM", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id, + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id, __entry->key_index, __entry->mode, BOOL_TO_STR(__entry->pairwise), __entry->mac_addr) ); @@ -642,45 +642,45 @@ TRACE_EVENT(rdev_set_default_key, ); TRACE_EVENT(rdev_set_default_mgmt_key, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index), - TP_ARGS(wiphy, netdev, link_id, key_index), + TP_ARGS(wiphy, wdev, link_id, key_index), TP_STRUCT__entry( WIPHY_ENTRY - NETDEV_ENTRY + WDEV_ENTRY __field(int, link_id) __field(u8, key_index) ), TP_fast_assign( WIPHY_ASSIGN; - NETDEV_ASSIGN; + WDEV_ASSIGN; __entry->link_id = link_id; __entry->key_index = key_index; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " - "key index: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, - __entry->link_id, __entry->key_index) + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %d, key index: %u", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id, + __entry->key_index) ); TRACE_EVENT(rdev_set_default_beacon_key, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int link_id, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, int link_id, u8 key_index), - TP_ARGS(wiphy, netdev, link_id, key_index), + TP_ARGS(wiphy, wdev, link_id, key_index), TP_STRUCT__entry( WIPHY_ENTRY - NETDEV_ENTRY + WDEV_ENTRY __field(int, link_id) __field(u8, key_index) ), TP_fast_assign( WIPHY_ASSIGN; - NETDEV_ASSIGN; + WDEV_ASSIGN; __entry->link_id = link_id; __entry->key_index = key_index; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, " - "key index: %u", WIPHY_PR_ARG, NETDEV_PR_ARG, - __entry->link_id, __entry->key_index) + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", link_id: %d, key index: %u", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->link_id, + __entry->key_index) ); TRACE_EVENT(rdev_start_ap, @@ -856,12 +856,12 @@ TRACE_EVENT(rdev_end_cac, ); DECLARE_EVENT_CLASS(station_add_change, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *mac, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u8 *mac, struct station_parameters *params), - TP_ARGS(wiphy, netdev, mac, params), + TP_ARGS(wiphy, wdev, mac, params), TP_STRUCT__entry( WIPHY_ENTRY - NETDEV_ENTRY + WDEV_ENTRY MAC_ENTRY(sta_mac) __field(u32, sta_flags_mask) __field(u32, sta_flags_set) @@ -888,7 +888,7 @@ DECLARE_EVENT_CLASS(station_add_change, ), TP_fast_assign( WIPHY_ASSIGN; - NETDEV_ASSIGN; + WDEV_ASSIGN; MAC_ASSIGN(sta_mac, mac); __entry->sta_flags_mask = params->sta_flags_mask; __entry->sta_flags_set = params->sta_flags_set; @@ -936,11 +936,11 @@ DECLARE_EVENT_CLASS(station_add_change, __entry->opmode_notif_used = params->link_sta_params.opmode_notif_used; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM" + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", station mac: %pM" ", station flags mask: 0x%x, station flags set: 0x%x, " "station modify mask: 0x%x, listen interval: %d, aid: %u, " "plink action: %u, plink state: %u, uapsd queues: %u, vlan:%s", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac, + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->sta_mac, __entry->sta_flags_mask, __entry->sta_flags_set, __entry->sta_modify_mask, __entry->listen_interval, __entry->aid, __entry->plink_action, __entry->plink_state, @@ -948,15 +948,15 @@ DECLARE_EVENT_CLASS(station_add_change, ); DEFINE_EVENT(station_add_change, rdev_add_station, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *mac, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u8 *mac, struct station_parameters *params), - TP_ARGS(wiphy, netdev, mac, params) + TP_ARGS(wiphy, wdev, mac, params) ); DEFINE_EVENT(station_add_change, rdev_change_station, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, u8 *mac, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, u8 *mac, struct station_parameters *params), - TP_ARGS(wiphy, netdev, mac, params) + TP_ARGS(wiphy, wdev, mac, params) ); DECLARE_EVENT_CLASS(wiphy_netdev_mac_evt, @@ -977,12 +977,12 @@ DECLARE_EVENT_CLASS(wiphy_netdev_mac_evt, ); DECLARE_EVENT_CLASS(station_del, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, struct station_del_parameters *params), - TP_ARGS(wiphy, netdev, params), + TP_ARGS(wiphy, wdev, params), TP_STRUCT__entry( WIPHY_ENTRY - NETDEV_ENTRY + WDEV_ENTRY MAC_ENTRY(sta_mac) __field(u8, subtype) __field(u16, reason_code) @@ -990,28 +990,45 @@ DECLARE_EVENT_CLASS(station_del, ), TP_fast_assign( WIPHY_ASSIGN; - NETDEV_ASSIGN; + WDEV_ASSIGN; MAC_ASSIGN(sta_mac, params->mac); __entry->subtype = params->subtype; __entry->reason_code = params->reason_code; __entry->link_id = params->link_id; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM" + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", station mac: %pM" ", subtype: %u, reason_code: %u, link_id: %d", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac, + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->sta_mac, __entry->subtype, __entry->reason_code, __entry->link_id) ); DEFINE_EVENT(station_del, rdev_del_station, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, struct station_del_parameters *params), - TP_ARGS(wiphy, netdev, params) + TP_ARGS(wiphy, wdev, params) ); -DEFINE_EVENT(wiphy_netdev_mac_evt, rdev_get_station, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, const u8 *mac), - TP_ARGS(wiphy, netdev, mac) +DECLARE_EVENT_CLASS(wiphy_wdev_mac_evt, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac), + TP_ARGS(wiphy, wdev, mac), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + MAC_ENTRY(sta_mac) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + MAC_ASSIGN(sta_mac, mac); + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", mac: %pM", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->sta_mac) +); + +DEFINE_EVENT(wiphy_wdev_mac_evt, rdev_get_station, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, const u8 *mac), + TP_ARGS(wiphy, wdev, mac) ); DEFINE_EVENT(wiphy_netdev_mac_evt, rdev_del_mpath, @@ -1020,23 +1037,23 @@ DEFINE_EVENT(wiphy_netdev_mac_evt, rdev_del_mpath, ); TRACE_EVENT(rdev_dump_station, - TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, int _idx, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, int _idx, u8 *mac), - TP_ARGS(wiphy, netdev, _idx, mac), + TP_ARGS(wiphy, wdev, _idx, mac), TP_STRUCT__entry( WIPHY_ENTRY - NETDEV_ENTRY + WDEV_ENTRY MAC_ENTRY(sta_mac) __field(int, idx) ), TP_fast_assign( WIPHY_ASSIGN; - NETDEV_ASSIGN; + WDEV_ASSIGN; MAC_ASSIGN(sta_mac, mac); __entry->idx = _idx; ), - TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM, idx: %d", - WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac, + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", station mac: %pM, idx: %d", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->sta_mac, __entry->idx) ); @@ -2393,6 +2410,55 @@ TRACE_EVENT(rdev_del_nan_func, WIPHY_PR_ARG, WDEV_PR_ARG, __entry->cookie) ); +TRACE_EVENT(rdev_nan_set_local_sched, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_local_sched *sched), + TP_ARGS(wiphy, wdev, sched), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __array(u8, schedule, CFG80211_NAN_SCHED_NUM_TIME_SLOTS) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + memcpy(__entry->schedule, sched->schedule, + CFG80211_NAN_SCHED_NUM_TIME_SLOTS); + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", schedule: %s", + WIPHY_PR_ARG, WDEV_PR_ARG, + __print_array(__entry->schedule, + CFG80211_NAN_SCHED_NUM_TIME_SLOTS, 1)) +); + +TRACE_EVENT(rdev_nan_set_peer_sched, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + struct cfg80211_nan_peer_sched *sched), + TP_ARGS(wiphy, wdev, sched), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __array(u8, peer_addr, ETH_ALEN) + __field(u8, seq_id) + __field(u16, committed_dw) + __field(u16, max_chan_switch) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + memcpy(__entry->peer_addr, sched->peer_addr, ETH_ALEN); + __entry->seq_id = sched->seq_id; + __entry->committed_dw = sched->committed_dw; + __entry->max_chan_switch = sched->max_chan_switch; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT + ", peer: %pM, seq_id: %u, committed_dw: 0x%x, max_chan_switch: %u", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->peer_addr, + __entry->seq_id, __entry->committed_dw, + __entry->max_chan_switch + ) +); + TRACE_EVENT(rdev_set_mac_acl, TP_PROTO(struct wiphy *wiphy, struct net_device *netdev, struct cfg80211_acl_data *params), @@ -3153,6 +3219,21 @@ DECLARE_EVENT_CLASS(cfg80211_netdev_mac_evt, NETDEV_PR_ARG, __entry->macaddr) ); +DECLARE_EVENT_CLASS(cfg80211_wdev_mac_evt, + TP_PROTO(struct wireless_dev *wdev, const u8 *macaddr), + TP_ARGS(wdev, macaddr), + TP_STRUCT__entry( + WDEV_ENTRY + MAC_ENTRY(macaddr) + ), + TP_fast_assign( + WDEV_ASSIGN; + MAC_ASSIGN(macaddr, macaddr); + ), + TP_printk(WDEV_PR_FMT ", mac: %pM", + WDEV_PR_ARG, __entry->macaddr) +); + DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_notify_new_peer_candidate, TP_PROTO(struct net_device *netdev, const u8 *macaddr), TP_ARGS(netdev, macaddr) @@ -3342,26 +3423,26 @@ TRACE_EVENT(cfg80211_tx_mgmt_expired, ); TRACE_EVENT(cfg80211_new_sta, - TP_PROTO(struct net_device *netdev, const u8 *mac_addr, + TP_PROTO(struct wireless_dev *wdev, const u8 *mac_addr, struct station_info *sinfo), - TP_ARGS(netdev, mac_addr, sinfo), + TP_ARGS(wdev, mac_addr, sinfo), TP_STRUCT__entry( - NETDEV_ENTRY + WDEV_ENTRY MAC_ENTRY(mac_addr) SINFO_ENTRY ), TP_fast_assign( - NETDEV_ASSIGN; + WDEV_ASSIGN; MAC_ASSIGN(mac_addr, mac_addr); SINFO_ASSIGN; ), - TP_printk(NETDEV_PR_FMT ", %pM", - NETDEV_PR_ARG, __entry->mac_addr) + TP_printk(WDEV_PR_FMT ", %pM", + WDEV_PR_ARG, __entry->mac_addr) ); -DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_del_sta, - TP_PROTO(struct net_device *netdev, const u8 *macaddr), - TP_ARGS(netdev, macaddr) +DEFINE_EVENT(cfg80211_wdev_mac_evt, cfg80211_del_sta, + TP_PROTO(struct wireless_dev *wdev, const u8 *macaddr), + TP_ARGS(wdev, macaddr) ); TRACE_EVENT(cfg80211_rx_mgmt, @@ -4225,6 +4306,81 @@ TRACE_EVENT(cfg80211_nan_cluster_joined, WDEV_PR_ARG, __entry->cluster_id, __entry->new_cluster ? " [new]" : "") ); + +TRACE_EVENT(cfg80211_incumbent_signal_notify, + TP_PROTO(struct wiphy *wiphy, + const struct cfg80211_chan_def *chandef, + u32 signal_interference_bitmap), + TP_ARGS(wiphy, chandef, signal_interference_bitmap), + TP_STRUCT__entry( + WIPHY_ENTRY + CHAN_DEF_ENTRY + __field(u32, signal_interference_bitmap) + ), + TP_fast_assign( + WIPHY_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + __entry->signal_interference_bitmap = signal_interference_bitmap; + ), + TP_printk(WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT ", signal_interference_bitmap=0x%x", + WIPHY_PR_ARG, CHAN_DEF_PR_ARG, __entry->signal_interference_bitmap) +); + +TRACE_EVENT(cfg80211_nan_sched_update_done, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, bool success), + TP_ARGS(wiphy, wdev, success), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __field(bool, success) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + __entry->success = success; + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT " success=%d", + WIPHY_PR_ARG, WDEV_PR_ARG, __entry->success) +); + +TRACE_EVENT(cfg80211_nan_ulw_update, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + const u8 *ulw, size_t ulw_len), + TP_ARGS(wiphy, wdev, ulw, ulw_len), + TP_STRUCT__entry( + WIPHY_ENTRY + WDEV_ENTRY + __dynamic_array(u8, ulw, ulw_len) + ), + TP_fast_assign( + WIPHY_ASSIGN; + WDEV_ASSIGN; + if (ulw && ulw_len) + memcpy(__get_dynamic_array(ulw), ulw, ulw_len); + ), + TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT " ulw: %s", + WIPHY_PR_ARG, WDEV_PR_ARG, + __print_array(__get_dynamic_array(ulw), + __get_dynamic_array_len(ulw), 1)) +); + +TRACE_EVENT(cfg80211_nan_channel_evac, + TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev, + const struct cfg80211_chan_def *chandef), + TP_ARGS(wiphy, wdev, chandef), + TP_STRUCT__entry( + WDEV_ENTRY + WIPHY_ENTRY + CHAN_DEF_ENTRY + ), + TP_fast_assign( + WDEV_ASSIGN; + WIPHY_ASSIGN; + CHAN_DEF_ASSIGN(chandef); + ), + TP_printk(WDEV_PR_FMT ", " WIPHY_PR_FMT ", " CHAN_DEF_PR_FMT, + WDEV_PR_ARG, WIPHY_PR_ARG, CHAN_DEF_PR_ARG) +); #endif /* !__RDEV_OPS_TRACE || TRACE_HEADER_MULTI_READ */ #undef TRACE_INCLUDE_PATH diff --git a/net/wireless/util.c b/net/wireless/util.c index b78530c3e3f8..cff5a1bd95cc 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -90,7 +90,7 @@ u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band) /* see 802.11ax D6.1 27.3.23.2 */ if (chan == 2) return MHZ_TO_KHZ(5935); - if (chan <= 233) + if (chan <= 253) return MHZ_TO_KHZ(5950 + chan * 5); break; case NL80211_BAND_60GHZ: @@ -625,8 +625,9 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, case cpu_to_le16(0): if (iftype != NL80211_IFTYPE_ADHOC && iftype != NL80211_IFTYPE_STATION && - iftype != NL80211_IFTYPE_OCB) - return -1; + iftype != NL80211_IFTYPE_OCB && + iftype != NL80211_IFTYPE_NAN_DATA) + return -1; break; } @@ -1095,7 +1096,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) for (i = 0; i < 4; i++) { if (!wdev->connect_keys->params[i].cipher) continue; - if (rdev_add_key(rdev, dev, -1, i, false, NULL, + if (rdev_add_key(rdev, wdev, -1, i, false, NULL, &wdev->connect_keys->params[i])) { netdev_err(dev, "failed to set key %d\n", i); continue; @@ -1144,8 +1145,15 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) ev->ij.channel); break; case EVENT_STOPPED: - cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev, - ev->link_id); + /* + * for NAN interfaces cfg80211_leave must be called but + * locking here doesn't allow this. + */ + if (WARN_ON(wdev->iftype == NL80211_IFTYPE_NAN)) + break; + + cfg80211_leave_locked(wiphy_to_rdev(wdev->wiphy), wdev, + ev->link_id); break; case EVENT_PORT_AUTHORIZED: __cfg80211_port_authorized(wdev, ev->pa.peer_addr, @@ -1184,6 +1192,13 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, if (otype == NL80211_IFTYPE_AP_VLAN) return -EOPNOTSUPP; + /* + * for NAN interfaces cfg80211_leave must be called for leaving, + * but locking here doesn't allow this. + */ + if (otype == NL80211_IFTYPE_NAN) + return -EOPNOTSUPP; + /* cannot change into P2P device or NAN */ if (ntype == NL80211_IFTYPE_P2P_DEVICE || ntype == NL80211_IFTYPE_NAN) @@ -1204,7 +1219,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, dev->ieee80211_ptr->use_4addr = false; rdev_set_qos_map(rdev, dev, NULL); - cfg80211_leave(rdev, dev->ieee80211_ptr, -1); + cfg80211_leave_locked(rdev, dev->ieee80211_ptr, -1); cfg80211_process_rdev_events(rdev); cfg80211_mlme_purge_registrations(dev->ieee80211_ptr); @@ -1232,6 +1247,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, case NL80211_IFTYPE_OCB: case NL80211_IFTYPE_P2P_CLIENT: case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_NAN_DATA: dev->priv_flags |= IFF_DONT_BRIDGE; break; case NL80211_IFTYPE_P2P_GO: @@ -2669,7 +2685,7 @@ int cfg80211_get_station(struct net_device *dev, const u8 *mac_addr, guard(wiphy)(&rdev->wiphy); - return rdev_get_station(rdev, dev, mac_addr, sinfo); + return rdev_get_station(rdev, wdev, mac_addr, sinfo); } EXPORT_SYMBOL(cfg80211_get_station); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 5a70a0120343..22d9d9bae8f5 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -7,7 +7,7 @@ * we directly assign the wireless handlers of wireless interfaces. * * Copyright 2008-2009 Johannes Berg <johannes@sipsolutions.net> - * Copyright (C) 2019-2023 Intel Corporation + * Copyright (C) 2019-2023, 2026 Intel Corporation */ #include <linux/export.h> @@ -457,7 +457,7 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) err = -ENOENT; else - err = rdev_del_key(rdev, dev, -1, idx, pairwise, + err = rdev_del_key(rdev, wdev, -1, idx, pairwise, addr); } wdev->wext.connect.privacy = false; @@ -496,7 +496,7 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) - err = rdev_add_key(rdev, dev, -1, idx, pairwise, addr, params); + err = rdev_add_key(rdev, wdev, -1, idx, pairwise, addr, params); else if (params->cipher != WLAN_CIPHER_SUITE_WEP40 && params->cipher != WLAN_CIPHER_SUITE_WEP104) return -EINVAL; @@ -549,7 +549,7 @@ static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (wdev->connected || (wdev->iftype == NL80211_IFTYPE_ADHOC && wdev->u.ibss.current_bss)) - err = rdev_set_default_mgmt_key(rdev, dev, -1, idx); + err = rdev_set_default_mgmt_key(rdev, wdev, -1, idx); if (!err) wdev->wext.default_mgmt_key = idx; return err; @@ -1261,7 +1261,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev, return err; scoped_guard(wiphy, &rdev->wiphy) { - err = rdev_get_station(rdev, dev, addr, &sinfo); + err = rdev_get_station(rdev, wdev, addr, &sinfo); } if (err) return err; @@ -1305,7 +1305,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) memset(&sinfo, 0, sizeof(sinfo)); - ret = rdev_get_station(rdev, dev, bssid, &sinfo); + ret = rdev_get_station(rdev, wdev, bssid, &sinfo); wiphy_unlock(&rdev->wiphy); if (ret) diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 7b8e94214b07..c19dece2bc6e 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * This file implement the Wireless Extensions core API. * @@ -5,8 +6,6 @@ * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> * Copyright (C) 2024 Intel Corporation - * - * (As all part of the Linux kernel, this file is GPL) */ #include <linux/kernel.h> #include <linux/netdevice.h> diff --git a/net/wireless/wext-priv.c b/net/wireless/wext-priv.c index 37d1147019c2..ce9022843dfd 100644 --- a/net/wireless/wext-priv.c +++ b/net/wireless/wext-priv.c @@ -1,11 +1,10 @@ +// SPDX-License-Identifier: GPL-2.0 /* * This file implement the Wireless Extensions priv API. * * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com> * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. * Copyright 2009 Johannes Berg <johannes@sipsolutions.net> - * - * (As all part of the Linux kernel, this file is GPL) */ #include <linux/slab.h> #include <linux/wireless.h> diff --git a/net/wireless/wext-proc.c b/net/wireless/wext-proc.c index cadcf8613af2..be6b2b695bf9 100644 --- a/net/wireless/wext-proc.c +++ b/net/wireless/wext-proc.c @@ -1,10 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 /* * This file implement the Wireless Extensions proc API. * * Authors : Jean Tourrilhes - HPL - <jt@hpl.hp.com> * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved. - * - * (As all part of the Linux kernel, this file is GPL) */ /* diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 19fef1398f1c..887abed25466 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -23,12 +23,16 @@ #include <linux/netdevice.h> #include <linux/rculist.h> #include <linux/vmalloc.h> + +#include <net/netdev_queues.h> #include <net/xdp_sock_drv.h> #include <net/busy_poll.h> #include <net/netdev_lock.h> #include <net/netdev_rx_queue.h> #include <net/xdp.h> +#include "../core/dev.h" + #include "xsk_queue.h" #include "xdp_umem.h" #include "xsk.h" @@ -115,7 +119,7 @@ struct xsk_buff_pool *xsk_get_pool_from_qid(struct net_device *dev, } EXPORT_SYMBOL(xsk_get_pool_from_qid); -void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) +static void __xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) { if (queue_id < dev->num_rx_queues) dev->_rx[queue_id].pool = NULL; @@ -123,6 +127,36 @@ void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) dev->_tx[queue_id].pool = NULL; } +void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) +{ + struct netdev_rx_queue *hw_rxq; + + if (!netif_rxq_is_leased(dev, queue_id)) + return __xsk_clear_pool_at_qid(dev, queue_id); + WARN_ON_ONCE(!netif_is_queue_leasee(dev)); + + hw_rxq = __netif_get_rx_queue(dev, queue_id)->lease; + + netdev_lock(hw_rxq->dev); + queue_id = get_netdev_rx_queue_index(hw_rxq); + __xsk_clear_pool_at_qid(hw_rxq->dev, queue_id); + netdev_unlock(hw_rxq->dev); +} + +static int __xsk_reg_pool_at_qid(struct net_device *dev, + struct xsk_buff_pool *pool, u16 queue_id) +{ + if (xsk_get_pool_from_qid(dev, queue_id)) + return -EBUSY; + + if (queue_id < dev->real_num_rx_queues) + dev->_rx[queue_id].pool = pool; + if (queue_id < dev->real_num_tx_queues) + dev->_tx[queue_id].pool = pool; + + return 0; +} + /* The buffer pool is stored both in the _rx struct and the _tx struct as we do * not know if the device has more tx queues than rx, or the opposite. * This might also change during run time. @@ -130,17 +164,27 @@ void xsk_clear_pool_at_qid(struct net_device *dev, u16 queue_id) int xsk_reg_pool_at_qid(struct net_device *dev, struct xsk_buff_pool *pool, u16 queue_id) { - if (queue_id >= max_t(unsigned int, - dev->real_num_rx_queues, - dev->real_num_tx_queues)) + struct netdev_rx_queue *hw_rxq; + int ret; + + if (queue_id >= max(dev->real_num_rx_queues, + dev->real_num_tx_queues)) return -EINVAL; - if (queue_id < dev->real_num_rx_queues) - dev->_rx[queue_id].pool = pool; - if (queue_id < dev->real_num_tx_queues) - dev->_tx[queue_id].pool = pool; + if (queue_id >= dev->real_num_rx_queues || + !netif_rxq_is_leased(dev, queue_id)) + return __xsk_reg_pool_at_qid(dev, pool, queue_id); + if (!netif_is_queue_leasee(dev)) + return -EBUSY; - return 0; + hw_rxq = __netif_get_rx_queue(dev, queue_id)->lease; + + netdev_lock(hw_rxq->dev); + queue_id = get_netdev_rx_queue_index(hw_rxq); + ret = __xsk_reg_pool_at_qid(hw_rxq->dev, pool, queue_id); + netdev_unlock(hw_rxq->dev); + + return ret; } static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff_xsk *xskb, u32 len, @@ -342,12 +386,36 @@ static bool xsk_is_bound(struct xdp_sock *xs) return false; } +static bool xsk_dev_queue_valid(const struct xdp_sock *xs, + const struct xdp_rxq_info *info) +{ + struct net_device *dev = xs->dev; + u32 queue_index = xs->queue_id; + struct netdev_rx_queue *rxq; + + if (info->dev == dev && + info->queue_index == queue_index) + return true; + + if (queue_index < dev->real_num_rx_queues) { + rxq = READ_ONCE(__netif_get_rx_queue(dev, queue_index)->lease); + if (!rxq) + return false; + + dev = rxq->dev; + queue_index = get_netdev_rx_queue_index(rxq); + + return info->dev == dev && + info->queue_index == queue_index; + } + return false; +} + static int xsk_rcv_check(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { if (!xsk_is_bound(xs)) return -ENXIO; - - if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) + if (!xsk_dev_queue_valid(xs, xdp->rxq)) return -EINVAL; if (len > __xsk_pool_get_rx_frame_size(xs->pool) && !xs->sg) { diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h index a4bc4749faac..7c811b5cce76 100644 --- a/net/xdp/xsk.h +++ b/net/xdp/xsk.h @@ -4,13 +4,6 @@ #ifndef XSK_H_ #define XSK_H_ -/* Masks for xdp_umem_page flags. - * The low 12-bits of the addr will be 0 since this is the page address, so we - * can use them for flags. - */ -#define XSK_NEXT_PG_CONTIG_SHIFT 0 -#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT) - struct xdp_ring_offset_v1 { __u64 producer; __u64 consumer; diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index e1b11ab59f6e..a2756186e13a 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -7,9 +7,6 @@ #include <linux/skmsg.h> #include <net/inet_common.h> #include <trace/events/sock.h> -#if IS_ENABLED(CONFIG_IPV6) -#include <net/ipv6_stubs.h> -#endif #include <net/hotdata.h> static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb, @@ -43,7 +40,7 @@ static void handle_esp(struct sk_buff *skb, struct sock *sk) local_bh_disable(); #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) - ipv6_stub->xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); + xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); else #endif xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP); @@ -133,7 +130,7 @@ static int espintcp_parse(struct strparser *strp, struct sk_buff *skb) } static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, - int flags, int *addr_len) + int flags) { struct espintcp_ctx *ctx = espintcp_getctx(sk); struct sk_buff *skb; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 749011e031c0..70434495f23f 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -291,26 +291,6 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "hmac(rmd160)", - .compat = "rmd160", - - .uinfo = { - .auth = { - .icv_truncbits = 96, - .icv_fullbits = 160, - } - }, - - .pfkey_supported = 1, - - .desc = { - .sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC, - .sadb_alg_ivlen = 0, - .sadb_alg_minbits = 160, - .sadb_alg_maxbits = 160 - } -}, -{ .name = "xcbc(aes)", .uinfo = { diff --git a/net/xfrm/xfrm_nat_keepalive.c b/net/xfrm/xfrm_nat_keepalive.c index 1856beee0149..458931062a04 100644 --- a/net/xfrm/xfrm_nat_keepalive.c +++ b/net/xfrm/xfrm_nat_keepalive.c @@ -98,14 +98,14 @@ static int nat_keepalive_send_ipv6(struct sk_buff *skb, local_lock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock); sk = this_cpu_read(nat_keepalive_sk_ipv6.sock); sock_net_set(sk, net); - dst = ipv6_stub->ipv6_dst_lookup_flow(net, sk, &fl6, NULL); + dst = ip6_dst_lookup_flow(net, sk, &fl6, NULL); if (IS_ERR(dst)) { local_unlock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock); return PTR_ERR(dst); } skb_dst_set(skb, dst); - err = ipv6_stub->ip6_xmit(sk, skb, &fl6, skb->mark, NULL, 0, 0); + err = ip6_xmit(sk, skb, &fl6, skb->mark, NULL, 0, 0); sock_net_set(sk, &init_net); local_unlock_nested_bh(&nat_keepalive_sk_ipv6.bh_lock); return err; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 54222fcbd7fd..a9652b422f51 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -20,7 +20,6 @@ #if IS_ENABLED(CONFIG_IPV6) #include <net/ip6_route.h> -#include <net/ipv6_stubs.h> #endif #include "xfrm_inout.h" @@ -900,7 +899,7 @@ int xfrm6_tunnel_check_size(struct sk_buff *skb) skb->protocol = htons(ETH_P_IPV6); if (xfrm6_local_dontfrag(sk)) - ipv6_stub->xfrm6_local_rxpmtu(skb, mtu); + xfrm6_local_rxpmtu(skb, mtu); else if (sk) xfrm_local_error(skb, mtu); else diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index a872af5610dc..c944327ce66c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3917,7 +3917,7 @@ EXPORT_SYMBOL(__xfrm_route_forward); static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) { - /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete + /* Code (such as xfrm_bundle_create()) sets dst->obsolete * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to * get validated by dst_ops->check on every use. We do this * because when a normal route referenced by an XFRM dst is |
