diff options
Diffstat (limited to 'net')
64 files changed, 550 insertions, 382 deletions
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index fd91cd34f25e..dec3f35467c9 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -1187,7 +1187,10 @@ static int __must_check ax25_connect(struct socket *sock, if (addr_len > sizeof(struct sockaddr_ax25) && fsa->fsa_ax25.sax25_ndigis != 0) { /* Valid number of digipeaters ? */ - if (fsa->fsa_ax25.sax25_ndigis < 1 || fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS) { + if (fsa->fsa_ax25.sax25_ndigis < 1 || + fsa->fsa_ax25.sax25_ndigis > AX25_MAX_DIGIS || + addr_len < sizeof(struct sockaddr_ax25) + + sizeof(ax25_address) * fsa->fsa_ax25.sax25_ndigis) { err = -EINVAL; goto out_release; } @@ -1507,7 +1510,10 @@ static int ax25_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) struct full_sockaddr_ax25 *fsa = (struct full_sockaddr_ax25 *)usax; /* Valid number of digipeaters ? */ - if (usax->sax25_ndigis < 1 || usax->sax25_ndigis > AX25_MAX_DIGIS) { + if (usax->sax25_ndigis < 1 || + usax->sax25_ndigis > AX25_MAX_DIGIS || + addr_len < sizeof(struct sockaddr_ax25) + + sizeof(ax25_address) * usax->sax25_ndigis) { err = -EINVAL; goto out; } diff --git a/net/bpfilter/bpfilter_kern.c b/net/bpfilter/bpfilter_kern.c index cfb27166bfd7..4494ea6056cd 100644 --- a/net/bpfilter/bpfilter_kern.c +++ b/net/bpfilter/bpfilter_kern.c @@ -50,7 +50,7 @@ static int __bpfilter_process_sockopt(struct sock *sk, int optname, req.len = optlen; if (!bpfilter_ops.info.pid) goto out; - n = __kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req), + n = kernel_write(bpfilter_ops.info.pipe_to_umh, &req, sizeof(req), &pos); if (n != sizeof(req)) { pr_err("write fail %zd\n", n); diff --git a/net/compat.c b/net/compat.c index 5e3041a2c37d..434838bef5f8 100644 --- a/net/compat.c +++ b/net/compat.c @@ -202,7 +202,7 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk, /* Advance. */ kcmsg = (struct cmsghdr *)((char *)kcmsg + tmp); - ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen); + ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, cmsg.cmsg_len); } /* diff --git a/net/core/dev.c b/net/core/dev.c index 90b59fc50dc9..7a774ebf64e2 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5601,7 +5601,7 @@ static void flush_backlog(struct work_struct *work) skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) { if (skb->dev->reg_state == NETREG_UNREGISTERING) { __skb_unlink(skb, &sd->input_pkt_queue); - kfree_skb(skb); + dev_kfree_skb_irq(skb); input_queue_head_incr(sd); } } diff --git a/net/core/devlink.c b/net/core/devlink.c index 2cafbc808b09..47f14a2f25fb 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -1065,7 +1065,9 @@ static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg, devlink_sb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq); - if (err && err != -EOPNOTSUPP) { + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { mutex_unlock(&devlink->lock); goto out; } @@ -1266,7 +1268,9 @@ static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg, devlink, devlink_sb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq); - if (err && err != -EOPNOTSUPP) { + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { mutex_unlock(&devlink->lock); goto out; } @@ -1498,7 +1502,9 @@ devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg, devlink_sb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq); - if (err && err != -EOPNOTSUPP) { + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { mutex_unlock(&devlink->lock); goto out; } @@ -3299,7 +3305,9 @@ static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); - if (err && err != -EOPNOTSUPP) { + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { mutex_unlock(&devlink->lock); goto out; } @@ -3569,7 +3577,9 @@ static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI); - if (err && err != -EOPNOTSUPP) { + if (err == -EOPNOTSUPP) { + err = 0; + } else if (err) { mutex_unlock(&devlink->lock); goto out; } @@ -4518,7 +4528,9 @@ static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->extack); mutex_unlock(&devlink->lock); - if (err && err != -EOPNOTSUPP) + if (err == -EOPNOTSUPP) + err = 0; + else if (err) break; idx++; } @@ -8567,6 +8579,7 @@ static const struct devlink_trap_group devlink_trap_group_generic[] = { DEVLINK_TRAP_GROUP(PIM), DEVLINK_TRAP_GROUP(UC_LB), DEVLINK_TRAP_GROUP(LOCAL_DELIVERY), + DEVLINK_TRAP_GROUP(EXTERNAL_DELIVERY), DEVLINK_TRAP_GROUP(IPV6), DEVLINK_TRAP_GROUP(PTP_EVENT), DEVLINK_TRAP_GROUP(PTP_GENERAL), diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index b739cfab796e..2076219b8ba5 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -4,6 +4,7 @@ #include <net/flow_offload.h> #include <linux/rtnetlink.h> #include <linux/mutex.h> +#include <linux/rhashtable.h> struct flow_rule *flow_rule_alloc(unsigned int num_actions) { diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e353b822bb15..7bd6440c63bf 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -1108,7 +1108,7 @@ static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf) trans_timeout = queue->trans_timeout; spin_unlock_irq(&queue->_xmit_lock); - return sprintf(buf, "%lu", trans_timeout); + return sprintf(buf, fmt_ulong, trans_timeout); } static unsigned int get_netdev_queue_index(struct netdev_queue *queue) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9aedc15736ad..85a4b0101f76 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3343,7 +3343,8 @@ replay: */ if (err < 0) { /* If device is not registered at all, free it now */ - if (dev->reg_state == NETREG_UNINITIALIZED) + if (dev->reg_state == NETREG_UNINITIALIZED || + dev->reg_state == NETREG_UNREGISTERED) free_netdev(dev); goto out; } diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index adcb3aea576d..bbdd3c7b6cb5 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -101,6 +101,7 @@ static struct sock_reuseport *reuseport_grow(struct sock_reuseport *reuse) more_reuse->prog = reuse->prog; more_reuse->reuseport_id = reuse->reuseport_id; more_reuse->bind_inany = reuse->bind_inany; + more_reuse->has_conns = reuse->has_conns; memcpy(more_reuse->socks, reuse->socks, reuse->num_socks * sizeof(struct sock *)); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index f93f8ace6c56..6ada114bbcca 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -274,7 +274,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); if (write && !ret) { if (jit_enable < 2 || - (jit_enable == 2 && bpf_dump_raw_ok())) { + (jit_enable == 2 && bpf_dump_raw_ok(current_cred()))) { *(int *)table->data = jit_enable; if (jit_enable == 2) pr_warn("bpf_jit_enable = 2 was set! NEVER use this in production, only for JIT debugging!\n"); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index ed13760463de..1ea17752fffc 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -120,13 +120,18 @@ static struct sk_buff *frame_get_stripped_skb(struct hsr_frame_info *frame, return skb_clone(frame->skb_std, GFP_ATOMIC); } -static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, - struct hsr_port *port, u8 proto_version) +static struct sk_buff *hsr_fill_tag(struct sk_buff *skb, + struct hsr_frame_info *frame, + struct hsr_port *port, u8 proto_version) { struct hsr_ethhdr *hsr_ethhdr; int lane_id; int lsdu_size; + /* pad to minimum packet size which is 60 + 6 (HSR tag) */ + if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN)) + return NULL; + if (port->type == HSR_PT_SLAVE_A) lane_id = 0; else @@ -144,6 +149,8 @@ static void hsr_fill_tag(struct sk_buff *skb, struct hsr_frame_info *frame, hsr_ethhdr->hsr_tag.encap_proto = hsr_ethhdr->ethhdr.h_proto; hsr_ethhdr->ethhdr.h_proto = htons(proto_version ? ETH_P_HSR : ETH_P_PRP); + + return skb; } static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, @@ -172,9 +179,10 @@ static struct sk_buff *create_tagged_skb(struct sk_buff *skb_o, memmove(dst, src, movelen); skb_reset_mac_header(skb); - hsr_fill_tag(skb, frame, port, port->hsr->prot_version); - - return skb; + /* skb_put_padto free skb on error and hsr_fill_tag returns NULL in + * that case + */ + return hsr_fill_tag(skb, frame, port, port->hsr->prot_version); } /* If the original frame was an HSR tagged frame, just clone it to be sent diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 03b891904314..530de24b1fb5 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -325,7 +325,8 @@ void hsr_addr_subst_dest(struct hsr_node *node_src, struct sk_buff *skb, if (port->type != node_dst->addr_B_port) return; - ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->macaddress_B); + if (is_valid_ether_addr(node_dst->macaddress_B)) + ether_addr_copy(eth_hdr(skb)->h_dest, node_dst->macaddress_B); } void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 248f1c1959a6..3c65f71d0e82 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1864,7 +1864,7 @@ struct fib_table *fib_trie_unmerge(struct fib_table *oldtb) while ((l = leaf_walk_rcu(&tp, key)) != NULL) { struct key_vector *local_l = NULL, *local_tp; - hlist_for_each_entry_rcu(fa, &l->leaf, fa_list) { + hlist_for_each_entry(fa, &l->leaf, fa_list) { struct fib_alias *new_fa; if (local_tb->tb_id != fa->tb_id) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9615e72656d1..518f04355fbf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3488,10 +3488,8 @@ static void tcp_replace_ts_recent(struct tcp_sock *tp, u32 seq) } } -/* This routine deals with acks during a TLP episode. - * We mark the end of a TLP episode on receiving TLP dupack or when - * ack is after tlp_high_seq. - * Ref: loss detection algorithm in draft-dukkipati-tcpm-tcp-loss-probe. +/* This routine deals with acks during a TLP episode and ends an episode by + * resetting tlp_high_seq. Ref: TLP algorithm in draft-ietf-tcpm-rack */ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) { @@ -3500,7 +3498,10 @@ static void tcp_process_tlp_ack(struct sock *sk, u32 ack, int flag) if (before(ack, tp->tlp_high_seq)) return; - if (flag & FLAG_DSACKING_ACK) { + if (!tp->tlp_retrans) { + /* TLP of new data has been acknowledged */ + tp->tlp_high_seq = 0; + } else if (flag & FLAG_DSACKING_ACK) { /* This DSACK means original and TLP probe arrived; no loss */ tp->tlp_high_seq = 0; } else if (after(ack, tp->tlp_high_seq)) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5f5b2f0b0e60..0bc05d68cd74 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2624,6 +2624,11 @@ void tcp_send_loss_probe(struct sock *sk) int pcount; int mss = tcp_current_mss(sk); + /* At most one outstanding TLP */ + if (tp->tlp_high_seq) + goto rearm_timer; + + tp->tlp_retrans = 0; skb = tcp_send_head(sk); if (skb && tcp_snd_wnd_test(tp, skb, mss)) { pcount = tp->packets_out; @@ -2641,10 +2646,6 @@ void tcp_send_loss_probe(struct sock *sk) return; } - /* At most one outstanding TLP retransmission. */ - if (tp->tlp_high_seq) - goto rearm_timer; - if (skb_still_in_host_queue(sk, skb)) goto rearm_timer; @@ -2666,10 +2667,12 @@ void tcp_send_loss_probe(struct sock *sk) if (__tcp_retransmit_skb(sk, skb, 1)) goto rearm_timer; + tp->tlp_retrans = 1; + +probe_sent: /* Record snd_nxt for loss detection. */ tp->tlp_high_seq = tp->snd_nxt; -probe_sent: NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPLOSSPROBES); /* Reset s.t. tcp_rearm_rto will restart timer from now */ inet_csk(sk)->icsk_pending = 0; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 1b7ebbcae497..4077d589b72e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -416,7 +416,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, struct udp_hslot *hslot2, struct sk_buff *skb) { - struct sock *sk, *result; + struct sock *sk, *result, *reuseport_result; int score, badness; u32 hash = 0; @@ -426,17 +426,20 @@ static struct sock *udp4_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { + reuseport_result = NULL; + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { hash = udp_ehashfn(net, daddr, hnum, saddr, sport); - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (result && !reuseport_has_conns(sk, false)) - return result; + reuseport_result = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); + if (reuseport_result && !reuseport_has_conns(sk, false)) + return reuseport_result; } + + result = reuseport_result ? : sk; badness = score; - result = sk; } } return result; @@ -2051,7 +2054,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets */ - if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { /* * MIB statistics other than incrementing the error count are diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 893261230ffc..dacdea7fcb62 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -183,7 +183,7 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) return 0; } -void ipv6_sock_ac_close(struct sock *sk) +void __ipv6_sock_ac_close(struct sock *sk) { struct ipv6_pinfo *np = inet6_sk(sk); struct net_device *dev = NULL; @@ -191,10 +191,7 @@ void ipv6_sock_ac_close(struct sock *sk) struct net *net = sock_net(sk); int prev_index; - if (!np->ipv6_ac_list) - return; - - rtnl_lock(); + ASSERT_RTNL(); pac = np->ipv6_ac_list; np->ipv6_ac_list = NULL; @@ -211,6 +208,16 @@ void ipv6_sock_ac_close(struct sock *sk) sock_kfree_s(sk, pac, sizeof(*pac)); pac = next; } +} + +void ipv6_sock_ac_close(struct sock *sk) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + + if (!np->ipv6_ac_list) + return; + rtnl_lock(); + __ipv6_sock_ac_close(sk); rtnl_unlock(); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index c43592771126..52c2f063529f 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -805,10 +805,17 @@ int esp6_input_done2(struct sk_buff *skb, int err) if (x->encap) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int offset = skb_network_offset(skb) + sizeof(*ip6h); struct xfrm_encap_tmpl *encap = x->encap; - struct udphdr *uh = (void *)(skb_network_header(skb) + hdr_len); - struct tcphdr *th = (void *)(skb_network_header(skb) + hdr_len); - __be16 source; + u8 nexthdr = ip6h->nexthdr; + __be16 frag_off, source; + struct udphdr *uh; + struct tcphdr *th; + + offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); + uh = (void *)(skb->data + offset); + th = (void *)(skb->data + offset); + hdr_len += offset; switch (x->encap->encap_type) { case TCP_ENCAP_ESPINTCP: diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 6532bde82b40..3a57fb9ce049 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1562,17 +1562,18 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) static int __net_init ip6gre_init_net(struct net *net) { struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct net_device *ndev; int err; if (!net_has_fallback_tunnels(net)) return 0; - ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", - NET_NAME_UNKNOWN, - ip6gre_tunnel_setup); - if (!ign->fb_tunnel_dev) { + ndev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", + NET_NAME_UNKNOWN, ip6gre_tunnel_setup); + if (!ndev) { err = -ENOMEM; goto err_alloc_dev; } + ign->fb_tunnel_dev = ndev; dev_net_set(ign->fb_tunnel_dev, net); /* FB netdevice is special: we have one, and only one per netns. * Allowing to move it to another netns is clearly unsafe. @@ -1592,7 +1593,7 @@ static int __net_init ip6gre_init_net(struct net *net) return 0; err_reg_dev: - free_netdev(ign->fb_tunnel_dev); + free_netdev(ndev); err_alloc_dev: return err; } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 20576e87a5f7..76f9e41859a2 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -240,6 +240,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, fl6_free_socklist(sk); __ipv6_sock_mc_close(sk); + __ipv6_sock_ac_close(sk); /* * Sock is moving from IPv6 to IPv4 (sk_prot), so diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f3279810d765..4c36bd0c7930 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -3685,14 +3685,14 @@ static struct fib6_info *ip6_route_info_create(struct fib6_config *cfg, rt->fib6_src.plen = cfg->fc_src_len; #endif if (nh) { - if (!nexthop_get(nh)) { - NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); - goto out; - } if (rt->fib6_src.plen) { NL_SET_ERR_MSG(extack, "Nexthops can not be used with source routing"); goto out; } + if (!nexthop_get(nh)) { + NL_SET_ERR_MSG(extack, "Nexthop has been deleted"); + goto out; + } rt->nh = nh; fib6_nh = nexthop_fib6_nh(rt->nh); } else { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 7d4151747340..a8d74f44056a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -148,7 +148,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, int dif, int sdif, struct udp_hslot *hslot2, struct sk_buff *skb) { - struct sock *sk, *result; + struct sock *sk, *result, *reuseport_result; int score, badness; u32 hash = 0; @@ -158,17 +158,20 @@ static struct sock *udp6_lib_lookup2(struct net *net, score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, sdif); if (score > badness) { + reuseport_result = NULL; + if (sk->sk_reuseport && sk->sk_state != TCP_ESTABLISHED) { hash = udp6_ehashfn(net, daddr, hnum, saddr, sport); - result = reuseport_select_sock(sk, hash, skb, - sizeof(struct udphdr)); - if (result && !reuseport_has_conns(sk, false)) - return result; + reuseport_result = reuseport_select_sock(sk, hash, skb, + sizeof(struct udphdr)); + if (reuseport_result && !reuseport_has_conns(sk, false)) + return reuseport_result; } - result = sk; + + result = reuseport_result ? : sk; badness = score; } } @@ -643,7 +646,7 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) /* * UDP-Lite specific tests, ignored on UDP sockets (see net/ipv4/udp.c). */ - if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + if ((up->pcflag & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { if (up->pcrlen == 0) { /* full coverage was set */ net_dbg_ratelimited("UDPLITE6: partial coverage %d while full coverage %d requested\n", diff --git a/net/key/af_key.c b/net/key/af_key.c index b67ed3a8486c..a915bc86620a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1849,6 +1849,13 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms if (ext_hdrs[SADB_X_EXT_FILTER - 1]) { struct sadb_x_filter *xfilter = ext_hdrs[SADB_X_EXT_FILTER - 1]; + if ((xfilter->sadb_x_filter_splen >= + (sizeof(xfrm_address_t) << 3)) || + (xfilter->sadb_x_filter_dplen >= + (sizeof(xfrm_address_t) << 3))) { + mutex_unlock(&pfk->dump_lock); + return -EINVAL; + } filter = kmalloc(sizeof(*filter), GFP_KERNEL); if (filter == NULL) { mutex_unlock(&pfk->dump_lock); @@ -2400,7 +2407,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa return err; } - xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN, + xp = xfrm_policy_bysel_ctx(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir - 1, &sel, pol_ctx, 1, &err); security_xfrm_policy_free(pol_ctx); @@ -2651,7 +2658,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ return -EINVAL; delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2); - xp = xfrm_policy_byid(net, DUMMY_MARK, 0, XFRM_POLICY_TYPE_MAIN, + xp = xfrm_policy_byid(net, &dummy_mark, 0, XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, delete, &err); if (xp == NULL) return -ENOENT; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 9b360544ad6f..1079a07e43e4 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2166,6 +2166,7 @@ static int ieee80211_leave_mesh(struct wiphy *wiphy, struct net_device *dev) ieee80211_stop_mesh(sdata); mutex_lock(&sdata->local->mtx); ieee80211_vif_release_channel(sdata); + kfree(sdata->u.mesh.ie); mutex_unlock(&sdata->local->mtx); return 0; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 5f1ca25b6c97..e88beb3ff6db 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -617,6 +617,19 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata, int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb) { + struct ieee80211_supported_band *sband; + const struct ieee80211_sband_iftype_data *iftd; + + sband = ieee80211_get_sband(sdata); + if (!sband) + return -EINVAL; + + iftd = ieee80211_get_sband_iftype_data(sband, + NL80211_IFTYPE_MESH_POINT); + /* The device doesn't support HE in mesh mode or at all */ + if (!iftd) + return 0; + ieee80211_ie_build_he_6ghz_cap(sdata, skb); return 0; } diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 117519bf33d6..aca608ae313f 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -521,6 +521,7 @@ static void mesh_path_free_rcu(struct mesh_table *tbl, del_timer_sync(&mpath->timer); atomic_dec(&sdata->u.mesh.mpaths); atomic_dec(&tbl->entries); + mesh_path_flush_pending(mpath); kfree_rcu(mpath, rcu); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index cd8487bc6fc2..af4cc5fb678e 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -1923,9 +1923,7 @@ void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, if (sta) { tx_pending = atomic_sub_return(tx_airtime, &sta->airtime[ac].aql_tx_pending); - if (WARN_ONCE(tx_pending < 0, - "STA %pM AC %d txq pending airtime underflow: %u, %u", - sta->addr, ac, tx_pending, tx_airtime)) + if (tx_pending < 0) atomic_cmpxchg(&sta->airtime[ac].aql_tx_pending, tx_pending, 0); } diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1a2941e5244f..3529d1368068 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -4230,11 +4230,12 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata, test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state)) goto out_free; + memset(info, 0, sizeof(*info)); + if (unlikely(!multicast && skb->sk && skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS)) - ieee80211_store_ack_skb(local, skb, &info->flags, NULL); - - memset(info, 0, sizeof(*info)); + info->ack_frame_id = ieee80211_store_ack_skb(local, skb, + &info->flags, NULL); if (unlikely(sdata->control_port_protocol == ehdr->h_proto)) { if (sdata->control_port_no_encrypt) diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 21c94094a699..dd9f5c7a1ade 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -2878,6 +2878,10 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata, if (WARN_ON(!iftd)) return; + /* Check for device HE 6 GHz capability before adding element */ + if (!iftd->he_6ghz_capa.capa) + return; + cap = le16_to_cpu(iftd->he_6ghz_capa.capa); cap &= ~IEEE80211_HE_6GHZ_CAP_SM_PS; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 3980fbb6f31e..c0abe738e7d3 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1833,7 +1833,7 @@ do_connect: /* on successful connect, the msk state will be moved to established by * subflow_finish_connect() */ - if (!err || err == EINPROGRESS) + if (!err || err == -EINPROGRESS) mptcp_copy_inaddrs(sock->sk, ssock->sk); else inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk)); diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 605e0f68f8bd..2b8abbfe018c 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1717,6 +1717,8 @@ static int sync_thread_backup(void *data) { struct ip_vs_sync_thread_data *tinfo = data; struct netns_ipvs *ipvs = tinfo->ipvs; + struct sock *sk = tinfo->sock->sk; + struct udp_sock *up = udp_sk(sk); int len; pr_info("sync thread started: state = BACKUP, mcast_ifn = %s, " @@ -1724,12 +1726,14 @@ static int sync_thread_backup(void *data) ipvs->bcfg.mcast_ifn, ipvs->bcfg.syncid, tinfo->id); while (!kthread_should_stop()) { - wait_event_interruptible(*sk_sleep(tinfo->sock->sk), - !skb_queue_empty(&tinfo->sock->sk->sk_receive_queue) - || kthread_should_stop()); + wait_event_interruptible(*sk_sleep(sk), + !skb_queue_empty_lockless(&sk->sk_receive_queue) || + !skb_queue_empty_lockless(&up->reader_queue) || + kthread_should_stop()); /* do we have data now? */ - while (!skb_queue_empty(&(tinfo->sock->sk->sk_receive_queue))) { + while (!skb_queue_empty_lockless(&sk->sk_receive_queue) || + !skb_queue_empty_lockless(&up->reader_queue)) { len = ip_vs_receive(tinfo->sock, tinfo->buf, ipvs->bcfg.sync_maxlen); if (len <= 0) { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7647ecfa0d40..88325b264737 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -188,24 +188,6 @@ static void nft_netdev_unregister_hooks(struct net *net, nf_unregister_net_hook(net, &hook->ops); } -static int nft_register_basechain_hooks(struct net *net, int family, - struct nft_base_chain *basechain) -{ - if (family == NFPROTO_NETDEV) - return nft_netdev_register_hooks(net, &basechain->hook_list); - - return nf_register_net_hook(net, &basechain->ops); -} - -static void nft_unregister_basechain_hooks(struct net *net, int family, - struct nft_base_chain *basechain) -{ - if (family == NFPROTO_NETDEV) - nft_netdev_unregister_hooks(net, &basechain->hook_list); - else - nf_unregister_net_hook(net, &basechain->ops); -} - static int nf_tables_register_hook(struct net *net, const struct nft_table *table, struct nft_chain *chain) @@ -223,7 +205,10 @@ static int nf_tables_register_hook(struct net *net, if (basechain->type->ops_register) return basechain->type->ops_register(net, ops); - return nft_register_basechain_hooks(net, table->family, basechain); + if (table->family == NFPROTO_NETDEV) + return nft_netdev_register_hooks(net, &basechain->hook_list); + + return nf_register_net_hook(net, &basechain->ops); } static void nf_tables_unregister_hook(struct net *net, @@ -242,7 +227,10 @@ static void nf_tables_unregister_hook(struct net *net, if (basechain->type->ops_unregister) return basechain->type->ops_unregister(net, ops); - nft_unregister_basechain_hooks(net, table->family, basechain); + if (table->family == NFPROTO_NETDEV) + nft_netdev_unregister_hooks(net, &basechain->hook_list); + else + nf_unregister_net_hook(net, &basechain->ops); } static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) @@ -832,8 +820,7 @@ static void nft_table_disable(struct net *net, struct nft_table *table, u32 cnt) if (cnt && i++ == cnt) break; - nft_unregister_basechain_hooks(net, table->family, - nft_base_chain(chain)); + nf_tables_unregister_hook(net, table, chain); } } @@ -848,8 +835,7 @@ static int nf_tables_table_enable(struct net *net, struct nft_table *table) if (!nft_is_base_chain(chain)) continue; - err = nft_register_basechain_hooks(net, table->family, - nft_base_chain(chain)); + err = nf_tables_register_hook(net, table, chain); if (err < 0) goto err_register_hooks; @@ -894,11 +880,12 @@ static int nf_tables_updtable(struct nft_ctx *ctx) nft_trans_table_enable(trans) = false; } else if (!(flags & NFT_TABLE_F_DORMANT) && ctx->table->flags & NFT_TABLE_F_DORMANT) { + ctx->table->flags &= ~NFT_TABLE_F_DORMANT; ret = nf_tables_table_enable(ctx->net, ctx->table); - if (ret >= 0) { - ctx->table->flags &= ~NFT_TABLE_F_DORMANT; + if (ret >= 0) nft_trans_table_enable(trans) = true; - } + else + ctx->table->flags |= NFT_TABLE_F_DORMANT; } if (ret < 0) goto err; diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 7cd524884304..78ea8c94dcba 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1228,10 +1228,13 @@ int nci_register_device(struct nci_dev *ndev) rc = nfc_register_device(ndev->nfc_dev); if (rc) - goto destroy_rx_wq_exit; + goto destroy_tx_wq_exit; goto exit; +destroy_tx_wq_exit: + destroy_workqueue(ndev->tx_wq); + destroy_rx_wq_exit: destroy_workqueue(ndev->rx_wq); diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 24a8c3c6da0d..300a104b9a0f 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -1180,6 +1180,7 @@ static int qrtr_release(struct socket *sock) sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); + sock_orphan(sk); sock->sk = NULL; if (!sock_flag(sk, SOCK_ZAPPED)) diff --git a/net/rds/recv.c b/net/rds/recv.c index c8404971d5ab..aba4afe4dfed 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -450,12 +450,13 @@ static int rds_still_queued(struct rds_sock *rs, struct rds_incoming *inc, int rds_notify_queue_get(struct rds_sock *rs, struct msghdr *msghdr) { struct rds_notifier *notifier; - struct rds_rdma_notify cmsg = { 0 }; /* fill holes with zero */ + struct rds_rdma_notify cmsg; unsigned int count = 0, max_messages = ~0U; unsigned long flags; LIST_HEAD(copy); int err = 0; + memset(&cmsg, 0, sizeof(cmsg)); /* fill holes with zero */ /* put_cmsg copies to user space and thus may sleep. We can't do this * with rs_lock held, so first grab as many notifications as we can stuff diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f07970207b54..38a46167523f 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -288,7 +288,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, */ ret = rxrpc_connect_call(rx, call, cp, srx, gfp); if (ret < 0) - goto error; + goto error_attached_to_socket; trace_rxrpc_call(call->debug_id, rxrpc_call_connected, atomic_read(&call->usage), here, NULL); @@ -308,18 +308,29 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, error_dup_user_ID: write_unlock(&rx->call_lock); release_sock(&rx->sk); - ret = -EEXIST; - -error: __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, - RX_CALL_DEAD, ret); + RX_CALL_DEAD, -EEXIST); trace_rxrpc_call(call->debug_id, rxrpc_call_error, - atomic_read(&call->usage), here, ERR_PTR(ret)); + atomic_read(&call->usage), here, ERR_PTR(-EEXIST)); rxrpc_release_call(rx, call); mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put); - _leave(" = %d", ret); - return ERR_PTR(ret); + _leave(" = -EEXIST"); + return ERR_PTR(-EEXIST); + + /* We got an error, but the call is attached to the socket and is in + * need of release. However, we might now race with recvmsg() when + * completing the call queues it. Return 0 from sys_sendmsg() and + * leave the error to recvmsg() to deal with. + */ +error_attached_to_socket: + trace_rxrpc_call(call->debug_id, rxrpc_call_error, + atomic_read(&call->usage), here, ERR_PTR(ret)); + set_bit(RXRPC_CALL_DISCONNECTED, &call->flags); + __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_CALL_DEAD, ret); + _leave(" = c=%08x [err]", call->debug_id); + return call; } /* diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 19e141eeed17..8cbe0bf20ed5 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -212,9 +212,11 @@ void rxrpc_disconnect_call(struct rxrpc_call *call) call->peer->cong_cwnd = call->cong_cwnd; - spin_lock_bh(&conn->params.peer->lock); - hlist_del_rcu(&call->error_link); - spin_unlock_bh(&conn->params.peer->lock); + if (!hlist_unhashed(&call->error_link)) { + spin_lock_bh(&call->peer->lock); + hlist_del_rcu(&call->error_link); + spin_unlock_bh(&call->peer->lock); + } if (rxrpc_is_client_call(call)) return rxrpc_disconnect_client_call(call); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 2989742a4aa1..efecc5a8f67d 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -543,7 +543,7 @@ try_again: list_empty(&rx->recvmsg_q) && rx->sk.sk_state != RXRPC_SERVER_LISTENING) { release_sock(&rx->sk); - return -ENODATA; + return -EAGAIN; } if (list_empty(&rx->recvmsg_q)) { @@ -620,7 +620,7 @@ try_again: goto error_unlock_call; } - if (msg->msg_name) { + if (msg->msg_name && call->peer) { struct sockaddr_rxrpc *srx = msg->msg_name; size_t len = sizeof(call->peer->srx); diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 1304b8608f56..f3f6da6e4ad2 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -304,7 +304,7 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, /* this should be in poll */ sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) + if (sk->sk_shutdown & SEND_SHUTDOWN) return -EPIPE; more = msg->msg_flags & MSG_MORE; @@ -681,6 +681,9 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) if (IS_ERR(call)) return PTR_ERR(call); /* ... and we have the call lock. */ + ret = 0; + if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) + goto out_put_unlock; } else { switch (READ_ONCE(call->state)) { case RXRPC_CALL_UNINITIALISED: diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 67504aece9ae..6ed1652d1e26 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -673,9 +673,10 @@ static int tcf_ct_ipv6_is_fragment(struct sk_buff *skb, bool *frag) } static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, - u8 family, u16 zone) + u8 family, u16 zone, bool *defrag) { enum ip_conntrack_info ctinfo; + struct qdisc_skb_cb cb; struct nf_conn *ct; int err = 0; bool frag; @@ -693,6 +694,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, return err; skb_get(skb); + cb = *qdisc_skb_cb(skb); if (family == NFPROTO_IPV4) { enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone; @@ -703,6 +705,9 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, local_bh_enable(); if (err && err != -EINPROGRESS) goto out_free; + + if (!err) + *defrag = true; } else { /* NFPROTO_IPV6 */ #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone; @@ -711,12 +716,16 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb, err = nf_ct_frag6_gather(net, skb, user); if (err && err != -EINPROGRESS) goto out_free; + + if (!err) + *defrag = true; #else err = -EOPNOTSUPP; goto out_free; #endif } + *qdisc_skb_cb(skb) = cb; skb_clear_hash(skb); skb->ignore_df = 1; return err; @@ -914,6 +923,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, int nh_ofs, err, retval; struct tcf_ct_params *p; bool skip_add = false; + bool defrag = false; struct nf_conn *ct; u8 family; @@ -946,7 +956,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a, */ nh_ofs = skb_network_offset(skb); skb_pull_rcsum(skb, nh_ofs); - err = tcf_ct_handle_fragments(net, skb, family, p->zone); + err = tcf_ct_handle_fragments(net, skb, family, p->zone, &defrag); if (err == -EINPROGRESS) { retval = TC_ACT_STOLEN; goto out; @@ -1014,6 +1024,8 @@ out_push: out: tcf_action_update_bstats(&c->common, skb); + if (defrag) + qdisc_skb_cb(skb)->pkt_len = skb->len; return retval; drop: @@ -1531,10 +1543,10 @@ static int __init ct_init_module(void) return 0; -err_tbl_init: - destroy_workqueue(act_ct_wq); err_register: tcf_ct_flow_tables_uninit(); +err_tbl_init: + destroy_workqueue(act_ct_wq); return err; } diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index e62beec0d844..4619cb3cb0a8 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -20,7 +20,6 @@ #include <linux/kmod.h> #include <linux/slab.h> #include <linux/idr.h> -#include <linux/rhashtable.h> #include <linux/jhash.h> #include <linux/rculist.h> #include <net/net_namespace.h> diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 67f7e71f9129..bda2536dd740 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -22,17 +22,11 @@ #include <net/sctp/sm.h> #include <net/sctp/stream_sched.h> -/* Migrates chunks from stream queues to new stream queues if needed, - * but not across associations. Also, removes those chunks to streams - * higher than the new max. - */ -static void sctp_stream_outq_migrate(struct sctp_stream *stream, - struct sctp_stream *new, __u16 outcnt) +static void sctp_stream_shrink_out(struct sctp_stream *stream, __u16 outcnt) { struct sctp_association *asoc; struct sctp_chunk *ch, *temp; struct sctp_outq *outq; - int i; asoc = container_of(stream, struct sctp_association, stream); outq = &asoc->outqueue; @@ -56,6 +50,19 @@ static void sctp_stream_outq_migrate(struct sctp_stream *stream, sctp_chunk_free(ch); } +} + +/* Migrates chunks from stream queues to new stream queues if needed, + * but not across associations. Also, removes those chunks to streams + * higher than the new max. + */ +static void sctp_stream_outq_migrate(struct sctp_stream *stream, + struct sctp_stream *new, __u16 outcnt) +{ + int i; + + if (stream->outcnt > outcnt) + sctp_stream_shrink_out(stream, outcnt); if (new) { /* Here we actually move the old ext stuff into the new @@ -1037,11 +1044,13 @@ struct sctp_chunk *sctp_process_strreset_resp( nums = ntohs(addstrm->number_of_streams); number = stream->outcnt - nums; - if (result == SCTP_STRRESET_PERFORMED) + if (result == SCTP_STRRESET_PERFORMED) { for (i = number; i < stream->outcnt; i++) SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN; - else + } else { + sctp_stream_shrink_out(stream, number); stream->outcnt = number; + } *evp = sctp_ulpevent_make_stream_change_event(asoc, flags, 0, nums, GFP_ATOMIC); diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 903321543838..1163d51196da 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -126,8 +126,10 @@ EXPORT_SYMBOL_GPL(smc_proto6); static void smc_restore_fallback_changes(struct smc_sock *smc) { - smc->clcsock->file->private_data = smc->sk.sk_socket; - smc->clcsock->file = NULL; + if (smc->clcsock->file) { /* non-accepted sockets have no file yet */ + smc->clcsock->file->private_data = smc->sk.sk_socket; + smc->clcsock->file = NULL; + } } static int __smc_release(struct smc_sock *smc) @@ -352,7 +354,7 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link, */ mutex_lock(&lgr->llc_conf_mutex); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].state != SMC_LNK_ACTIVE) + if (!smc_link_active(&lgr->lnk[i])) continue; rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc); if (rc) @@ -632,7 +634,9 @@ static int smc_connect_rdma(struct smc_sock *smc, for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { struct smc_link *l = &smc->conn.lgr->lnk[i]; - if (l->peer_qpn == ntoh24(aclc->qpn)) { + if (l->peer_qpn == ntoh24(aclc->qpn) && + !memcmp(l->peer_gid, &aclc->lcl.gid, SMC_GID_SIZE) && + !memcmp(l->peer_mac, &aclc->lcl.mac, sizeof(l->peer_mac))) { link = l; break; } diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index a47e8855e045..ce468ff62a19 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -66,9 +66,13 @@ int smc_cdc_get_free_slot(struct smc_connection *conn, rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, wr_rdma_buf, (struct smc_wr_tx_pend_priv **)pend); - if (conn->killed) + if (conn->killed) { /* abnormal termination */ + if (!rc) + smc_wr_tx_put_slot(link, + (struct smc_wr_tx_pend_priv *)pend); rc = -EPIPE; + } return rc; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index f69d205b3e11..f82a2e599917 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -45,18 +45,10 @@ static struct smc_lgr_list smc_lgr_list = { /* established link groups */ static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */ static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted); -struct smc_ib_up_work { - struct work_struct work; - struct smc_link_group *lgr; - struct smc_ib_device *smcibdev; - u8 ibport; -}; - static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb, struct smc_buf_desc *buf_desc); static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft); -static void smc_link_up_work(struct work_struct *work); static void smc_link_down_work(struct work_struct *work); /* return head of link group list and its lock for a given link group */ @@ -326,7 +318,6 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, get_device(&ini->ib_dev->ibdev->dev); atomic_inc(&ini->ib_dev->lnk_cnt); - lnk->state = SMC_LNK_ACTIVATING; lnk->link_id = smcr_next_link_id(lgr); lnk->lgr = lgr; lnk->link_idx = link_idx; @@ -362,6 +353,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, rc = smc_wr_create_link(lnk); if (rc) goto destroy_qp; + lnk->state = SMC_LNK_ACTIVATING; return 0; destroy_qp: @@ -452,7 +444,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini) } smc->conn.lgr = lgr; spin_lock_bh(lgr_lock); - list_add(&lgr->list, lgr_list); + list_add_tail(&lgr->list, lgr_list); spin_unlock_bh(lgr_lock); return 0; @@ -550,8 +542,7 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr, smc_wr_wakeup_tx_wait(from_lnk); for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].state != SMC_LNK_ACTIVE || - i == from_lnk->link_idx) + if (!smc_link_active(&lgr->lnk[i]) || i == from_lnk->link_idx) continue; if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev && from_lnk->ibport == lgr->lnk[i].ibport) { @@ -1106,67 +1097,23 @@ static void smc_conn_abort_work(struct work_struct *work) sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */ } -/* link is up - establish alternate link if applicable */ -static void smcr_link_up(struct smc_link_group *lgr, - struct smc_ib_device *smcibdev, u8 ibport) -{ - struct smc_link *link = NULL; - - if (list_empty(&lgr->list) || - lgr->type == SMC_LGR_SYMMETRIC || - lgr->type == SMC_LGR_ASYMMETRIC_PEER) - return; - - if (lgr->role == SMC_SERV) { - /* trigger local add link processing */ - link = smc_llc_usable_link(lgr); - if (!link) - return; - smc_llc_srv_add_link_local(link); - } else { - /* invite server to start add link processing */ - u8 gid[SMC_GID_SIZE]; - - if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid, - NULL)) - return; - if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { - /* some other llc task is ongoing */ - wait_event_timeout(lgr->llc_flow_waiter, - (list_empty(&lgr->list) || - lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE), - SMC_LLC_WAIT_TIME); - } - /* lgr or device no longer active? */ - if (!list_empty(&lgr->list) && - smc_ib_port_active(smcibdev, ibport)) - link = smc_llc_usable_link(lgr); - if (link) - smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], - gid, NULL, SMC_LLC_REQ); - wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */ - } -} - void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport) { - struct smc_ib_up_work *ib_work; struct smc_link_group *lgr, *n; list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) { + struct smc_link *link; + if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id, SMC_MAX_PNETID_LEN) || lgr->type == SMC_LGR_SYMMETRIC || lgr->type == SMC_LGR_ASYMMETRIC_PEER) continue; - ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL); - if (!ib_work) - continue; - INIT_WORK(&ib_work->work, smc_link_up_work); - ib_work->lgr = lgr; - ib_work->smcibdev = smcibdev; - ib_work->ibport = ibport; - schedule_work(&ib_work->work); + + /* trigger local add link processing */ + link = smc_llc_usable_link(lgr); + if (link) + smc_llc_add_link_local(link); } } @@ -1204,10 +1151,12 @@ static void smcr_link_down(struct smc_link *lnk) SMC_LLC_WAIT_TIME); mutex_lock(&lgr->llc_conf_mutex); } - if (!list_empty(&lgr->list)) + if (!list_empty(&lgr->list)) { smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true, SMC_LLC_DEL_LOST_PATH); + smcr_link_clear(lnk, true); + } wake_up(&lgr->llc_flow_waiter); /* wake up next waiter */ } } @@ -1247,20 +1196,6 @@ void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport) } } -static void smc_link_up_work(struct work_struct *work) -{ - struct smc_ib_up_work *ib_work = container_of(work, - struct smc_ib_up_work, - work); - struct smc_link_group *lgr = ib_work->lgr; - - if (list_empty(&lgr->list)) - goto out; - smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport); -out: - kfree(ib_work); -} - static void smc_link_down_work(struct work_struct *work) { struct smc_link *link = container_of(work, struct smc_link, @@ -1333,7 +1268,7 @@ static bool smcr_lgr_match(struct smc_link_group *lgr, return false; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (lgr->lnk[i].state != SMC_LNK_ACTIVE) + if (!smc_link_active(&lgr->lnk[i])) continue; if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) && !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) && @@ -1376,7 +1311,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini) smcr_lgr_match(lgr, ini->ib_lcl, role, ini->ib_clcqpn)) && !lgr->sync_err && lgr->vlan_id == ini->vlan_id && - (role == SMC_CLNT || + (role == SMC_CLNT || ini->is_smcd || lgr->conns_num < SMC_RMBS_PER_LGR_MAX)) { /* link group found */ ini->cln_first_contact = SMC_REUSE_CONTACT; @@ -1781,14 +1716,14 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) { - if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) + if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) return; smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) { - if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk)) + if (!conn->lgr || conn->lgr->is_smcd || !smc_link_active(conn->lnk)) return; smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE); } @@ -1800,7 +1735,7 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) if (!conn->lgr || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_usable(&conn->lgr->lnk[i])) + if (!smc_link_active(&conn->lgr->lnk[i])) continue; smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc, DMA_FROM_DEVICE); @@ -1814,7 +1749,7 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn) if (!conn->lgr || conn->lgr->is_smcd) return; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_usable(&conn->lgr->lnk[i])) + if (!smc_link_active(&conn->lgr->lnk[i])) continue; smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc, DMA_FROM_DEVICE); @@ -1837,8 +1772,12 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd) return rc; /* create rmb */ rc = __smc_buf_create(smc, is_smcd, true); - if (rc) + if (rc) { + mutex_lock(&smc->conn.lgr->sndbufs_lock); + list_del(&smc->conn.sndbuf_desc->list); + mutex_unlock(&smc->conn.lgr->sndbufs_lock); smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc); + } return rc; } diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index c3ff512fd891..1c4d5439d0ff 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -349,6 +349,11 @@ static inline bool smc_link_usable(struct smc_link *lnk) return true; } +static inline bool smc_link_active(struct smc_link *lnk) +{ + return lnk->state == SMC_LNK_ACTIVE; +} + struct smc_sock; struct smc_clc_msg_accept_confirm; struct smc_clc_msg_local; diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 7637fdebbb78..1c314dbdc7fa 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -506,6 +506,10 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) int cqe_size_order, smc_order; long rc; + mutex_lock(&smcibdev->mutex); + rc = 0; + if (smcibdev->initialized) + goto out; /* the calculated number of cq entries fits to mlx5 cq allocation */ cqe_size_order = cache_line_size() == 128 ? 7 : 6; smc_order = MAX_ORDER - cqe_size_order - 1; @@ -517,7 +521,7 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) rc = PTR_ERR_OR_ZERO(smcibdev->roce_cq_send); if (IS_ERR(smcibdev->roce_cq_send)) { smcibdev->roce_cq_send = NULL; - return rc; + goto out; } smcibdev->roce_cq_recv = ib_create_cq(smcibdev->ibdev, smc_wr_rx_cq_handler, NULL, @@ -529,21 +533,26 @@ long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev) } smc_wr_add_dev(smcibdev); smcibdev->initialized = 1; - return rc; + goto out; err: ib_destroy_cq(smcibdev->roce_cq_send); +out: + mutex_unlock(&smcibdev->mutex); return rc; } static void smc_ib_cleanup_per_ibdev(struct smc_ib_device *smcibdev) { + mutex_lock(&smcibdev->mutex); if (!smcibdev->initialized) - return; + goto out; smcibdev->initialized = 0; ib_destroy_cq(smcibdev->roce_cq_recv); ib_destroy_cq(smcibdev->roce_cq_send); smc_wr_remove_dev(smcibdev); +out: + mutex_unlock(&smcibdev->mutex); } static struct ib_client smc_ib_client; @@ -566,6 +575,7 @@ static int smc_ib_add_dev(struct ib_device *ibdev) INIT_WORK(&smcibdev->port_event_work, smc_ib_port_event_work); atomic_set(&smcibdev->lnk_cnt, 0); init_waitqueue_head(&smcibdev->lnks_deleted); + mutex_init(&smcibdev->mutex); mutex_lock(&smc_ib_devices.mutex); list_add_tail(&smcibdev->list, &smc_ib_devices.list); mutex_unlock(&smc_ib_devices.mutex); diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index ae6776e1e726..2ce481187dd0 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -52,6 +52,7 @@ struct smc_ib_device { /* ib-device infos for smc */ DECLARE_BITMAP(ports_going_away, SMC_MAX_PORTS); atomic_t lnk_cnt; /* number of links on ibdev */ wait_queue_head_t lnks_deleted; /* wait 4 removal of all links*/ + struct mutex mutex; /* protect dev setup+cleanup */ }; struct smc_buf_desc; diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index c1a038689c63..df5b0a6ea848 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -428,7 +428,7 @@ static int smc_llc_send_confirm_rkey(struct smc_link *send_link, rtok_ix = 1; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { link = &send_link->lgr->lnk[i]; - if (link->state == SMC_LNK_ACTIVE && link != send_link) { + if (smc_link_active(link) && link != send_link) { rkeyllc->rtoken[rtok_ix].link_id = link->link_id; rkeyllc->rtoken[rtok_ix].rmb_key = htonl(rmb_desc->mr_rx[link->link_idx]->rkey); @@ -895,6 +895,36 @@ out: return rc; } +/* as an SMC client, invite server to start the add_link processing */ +static void smc_llc_cli_add_link_invite(struct smc_link *link, + struct smc_llc_qentry *qentry) +{ + struct smc_link_group *lgr = smc_get_lgr(link); + struct smc_init_info ini; + + if (lgr->type == SMC_LGR_SYMMETRIC || + lgr->type == SMC_LGR_ASYMMETRIC_PEER) + goto out; + + ini.vlan_id = lgr->vlan_id; + smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev); + if (!ini.ib_dev) + goto out; + + smc_llc_send_add_link(link, ini.ib_dev->mac[ini.ib_port - 1], + ini.ib_gid, NULL, SMC_LLC_REQ); +out: + kfree(qentry); +} + +static bool smc_llc_is_local_add_link(union smc_llc_msg *llc) +{ + if (llc->raw.hdr.common.type == SMC_LLC_ADD_LINK && + !llc->add_link.qp_mtu && !llc->add_link.link_num) + return true; + return false; +} + static void smc_llc_process_cli_add_link(struct smc_link_group *lgr) { struct smc_llc_qentry *qentry; @@ -902,7 +932,10 @@ static void smc_llc_process_cli_add_link(struct smc_link_group *lgr) qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl); mutex_lock(&lgr->llc_conf_mutex); - smc_llc_cli_add_link(qentry->link, qentry); + if (smc_llc_is_local_add_link(&qentry->msg)) + smc_llc_cli_add_link_invite(qentry->link, qentry); + else + smc_llc_cli_add_link(qentry->link, qentry); mutex_unlock(&lgr->llc_conf_mutex); } @@ -911,7 +944,7 @@ static int smc_llc_active_link_count(struct smc_link_group *lgr) int i, link_count = 0; for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) { - if (!smc_link_usable(&lgr->lnk[i])) + if (!smc_link_active(&lgr->lnk[i])) continue; link_count++; } @@ -1051,12 +1084,14 @@ static int smc_llc_srv_conf_link(struct smc_link *link, if (rc) return -ENOLINK; /* receive CONFIRM LINK response over the RoCE fabric */ - qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, - SMC_LLC_CONFIRM_LINK); - if (!qentry) { + qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME, 0); + if (!qentry || + qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) { /* send DELETE LINK */ smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ, false, SMC_LLC_DEL_LOST_PATH); + if (qentry) + smc_llc_flow_qentry_del(&lgr->llc_flow_lcl); return -ENOLINK; } smc_llc_save_peer_uid(qentry); @@ -1158,14 +1193,14 @@ static void smc_llc_process_srv_add_link(struct smc_link_group *lgr) mutex_unlock(&lgr->llc_conf_mutex); } -/* enqueue a local add_link req to trigger a new add_link flow, only as SERV */ -void smc_llc_srv_add_link_local(struct smc_link *link) +/* enqueue a local add_link req to trigger a new add_link flow */ +void smc_llc_add_link_local(struct smc_link *link) { struct smc_llc_msg_add_link add_llc = {0}; add_llc.hd.length = sizeof(add_llc); add_llc.hd.common.type = SMC_LLC_ADD_LINK; - /* no dev and port needed, we as server ignore client data anyway */ + /* no dev and port needed */ smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc); } @@ -1345,7 +1380,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr) if (lgr->type == SMC_LGR_SINGLE && !list_empty(&lgr->list)) { /* trigger setup of asymm alt link */ - smc_llc_srv_add_link_local(lnk); + smc_llc_add_link_local(lnk); } out: mutex_unlock(&lgr->llc_conf_mutex); @@ -1474,7 +1509,18 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) if (list_empty(&lgr->list)) goto out; /* lgr is terminating */ if (lgr->role == SMC_CLNT) { - if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK) { + if (smc_llc_is_local_add_link(llc)) { + if (lgr->llc_flow_lcl.type == + SMC_LLC_FLOW_ADD_LINK) + break; /* add_link in progress */ + if (smc_llc_flow_start(&lgr->llc_flow_lcl, + qentry)) { + schedule_work(&lgr->llc_add_link_work); + } + return; + } + if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK && + !lgr->llc_flow_lcl.qentry) { /* a flow is waiting for this message */ smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); @@ -1498,28 +1544,13 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry) } break; case SMC_LLC_DELETE_LINK: - if (lgr->role == SMC_CLNT) { - /* server requests to delete this link, send response */ - if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) { - /* DEL LINK REQ during ADD LINK SEQ */ - smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, - qentry); - wake_up(&lgr->llc_msg_waiter); - } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, - qentry)) { - schedule_work(&lgr->llc_del_link_work); - } - } else { - if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK && - !lgr->llc_flow_lcl.qentry) { - /* DEL LINK REQ during ADD LINK SEQ */ - smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, - qentry); - wake_up(&lgr->llc_msg_waiter); - } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, - qentry)) { - schedule_work(&lgr->llc_del_link_work); - } + if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK && + !lgr->llc_flow_lcl.qentry) { + /* DEL LINK REQ during ADD LINK SEQ */ + smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry); + wake_up(&lgr->llc_msg_waiter); + } else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) { + schedule_work(&lgr->llc_del_link_work); } return; case SMC_LLC_CONFIRM_RKEY: @@ -1585,23 +1616,30 @@ again: static void smc_llc_rx_response(struct smc_link *link, struct smc_llc_qentry *qentry) { + enum smc_llc_flowtype flowtype = link->lgr->llc_flow_lcl.type; + struct smc_llc_flow *flow = &link->lgr->llc_flow_lcl; u8 llc_type = qentry->msg.raw.hdr.common.type; switch (llc_type) { case SMC_LLC_TEST_LINK: - if (link->state == SMC_LNK_ACTIVE) + if (smc_link_active(link)) complete(&link->llc_testlink_resp); break; case SMC_LLC_ADD_LINK: - case SMC_LLC_DELETE_LINK: - case SMC_LLC_CONFIRM_LINK: case SMC_LLC_ADD_LINK_CONT: + case SMC_LLC_CONFIRM_LINK: + if (flowtype != SMC_LLC_FLOW_ADD_LINK || flow->qentry) + break; /* drop out-of-flow response */ + goto assign; + case SMC_LLC_DELETE_LINK: + if (flowtype != SMC_LLC_FLOW_DEL_LINK || flow->qentry) + break; /* drop out-of-flow response */ + goto assign; case SMC_LLC_CONFIRM_RKEY: case SMC_LLC_DELETE_RKEY: - /* assign responses to the local flow, we requested them */ - smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry); - wake_up(&link->lgr->llc_msg_waiter); - return; + if (flowtype != SMC_LLC_FLOW_RKEY || flow->qentry) + break; /* drop out-of-flow response */ + goto assign; case SMC_LLC_CONFIRM_RKEY_CONT: /* not used because max links is 3 */ break; @@ -1610,6 +1648,11 @@ static void smc_llc_rx_response(struct smc_link *link, break; } kfree(qentry); + return; +assign: + /* assign responses to the local flow, we requested them */ + smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry); + wake_up(&link->lgr->llc_msg_waiter); } static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc) @@ -1663,7 +1706,7 @@ static void smc_llc_testlink_work(struct work_struct *work) u8 user_data[16] = { 0 }; int rc; - if (link->state != SMC_LNK_ACTIVE) + if (!smc_link_active(link)) return; /* don't reschedule worker */ expire_time = link->wr_rx_tstamp + link->llc_testlink_time; if (time_is_after_jiffies(expire_time)) { @@ -1675,7 +1718,7 @@ static void smc_llc_testlink_work(struct work_struct *work) /* receive TEST LINK response over RoCE fabric */ rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp, SMC_LLC_WAIT_TIME); - if (link->state != SMC_LNK_ACTIVE) + if (!smc_link_active(link)) return; /* link state changed */ if (rc <= 0) { smcr_link_down_cond_sched(link); diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index a5d2fe3eea61..cc00a2ec4e92 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -103,7 +103,7 @@ void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn); int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry); int smc_llc_srv_add_link(struct smc_link *link); -void smc_llc_srv_add_link_local(struct smc_link *link); +void smc_llc_add_link_local(struct smc_link *link); int smc_llc_init(void) __init; #endif /* SMC_LLC_H */ diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 39e14d5edaf1..e9d0953522f0 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1317,6 +1317,7 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data) q.len = strlen(gssd_dummy_clnt_dir[0].name); clnt_dentry = d_hash_and_lookup(gssd_dentry, &q); if (!clnt_dentry) { + __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1); pipe_dentry = ERR_PTR(-ENOENT); goto out; } diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5c4ec9386f81..c537272f9c7e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -44,6 +44,7 @@ #include <net/tcp.h> #include <net/tcp_states.h> #include <linux/uaccess.h> +#include <linux/highmem.h> #include <asm/ioctls.h> #include <linux/sunrpc/types.h> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 6f7d82fb1eb0..be11d672b5b9 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1118,6 +1118,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, base = 0; } else { base -= buf->head[0].iov_len; + subbuf->head[0].iov_base = buf->head[0].iov_base; subbuf->head[0].iov_len = 0; } @@ -1130,6 +1131,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, base = 0; } else { base -= buf->page_len; + subbuf->pages = buf->pages; + subbuf->page_base = 0; subbuf->page_len = 0; } @@ -1141,6 +1144,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, base = 0; } else { base -= buf->tail[0].iov_len; + subbuf->tail[0].iov_base = buf->tail[0].iov_base; subbuf->tail[0].iov_len = 0; } diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index ef997880e17a..b647562a26dd 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -367,7 +367,7 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) trace_xprtrdma_wc_fastreg(wc, frwr); /* The MR will get recycled when the associated req is retransmitted */ - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_flush_disconnect(cq->cq_context, wc); } /** @@ -452,7 +452,7 @@ static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) trace_xprtrdma_wc_li(wc, frwr); __frwr_release_mr(wc, mr); - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_flush_disconnect(cq->cq_context, wc); } /** @@ -474,7 +474,7 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) __frwr_release_mr(wc, mr); complete(&frwr->fr_linv_done); - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_flush_disconnect(cq->cq_context, wc); } /** @@ -582,7 +582,7 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc) smp_rmb(); rpcrdma_complete_rqst(rep); - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_flush_disconnect(cq->cq_context, wc); } /** diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 2081c8fbfa48..453bacc99907 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -71,7 +71,7 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs) size = RPCRDMA_HDRLEN_MIN; /* Maximum Read list size */ - size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32); + size += maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32); /* Minimal Read chunk size */ size += sizeof(__be32); /* segment count */ @@ -94,7 +94,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs) size = RPCRDMA_HDRLEN_MIN; /* Maximum Write list size */ - size = sizeof(__be32); /* segment count */ + size += sizeof(__be32); /* segment count */ size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32); size += sizeof(__be32); /* list discriminator */ @@ -1349,8 +1349,7 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep, be32_to_cpup(p), be32_to_cpu(rep->rr_xid)); } - r_xprt->rx_stats.bad_reply_count++; - return -EREMOTEIO; + return -EIO; } /* Perform XID lookup, reconstruction of the RPC reply, and @@ -1387,13 +1386,11 @@ out: spin_unlock(&xprt->queue_lock); return; -/* If the incoming reply terminated a pending RPC, the next - * RPC call will post a replacement receive buffer as it is - * being marshaled. - */ out_badheader: trace_xprtrdma_reply_hdr(rep); r_xprt->rx_stats.bad_reply_count++; + rqst->rq_task->tk_status = status; + status = 0; goto out; } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 0c4af7f5e241..053c8ab1265a 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -242,13 +242,18 @@ xprt_rdma_connect_worker(struct work_struct *work) rc = rpcrdma_xprt_connect(r_xprt); xprt_clear_connecting(xprt); - if (r_xprt->rx_ep && r_xprt->rx_ep->re_connect_status > 0) { + if (!rc) { xprt->connect_cookie++; xprt->stat.connect_count++; xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start; xprt_set_connected(xprt); rc = -EAGAIN; + } else { + /* Force a call to xprt_rdma_close to clean up */ + spin_lock(&xprt->transport_lock); + set_bit(XPRT_CLOSE_WAIT, &xprt->state); + spin_unlock(&xprt->transport_lock); } xprt_wake_pending_tasks(xprt, rc); } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 2ae348377806..75c646743df3 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -84,7 +84,8 @@ static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); -static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep); +static void rpcrdma_ep_get(struct rpcrdma_ep *ep); +static int rpcrdma_ep_put(struct rpcrdma_ep *ep); static struct rpcrdma_regbuf * rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, gfp_t flags); @@ -97,7 +98,8 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); */ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) { - struct rdma_cm_id *id = r_xprt->rx_ep->re_id; + struct rpcrdma_ep *ep = r_xprt->rx_ep; + struct rdma_cm_id *id = ep->re_id; /* Flush Receives, then wait for deferred Reply work * to complete. @@ -108,6 +110,8 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) * local invalidations. */ ib_drain_sq(id->qp); + + rpcrdma_ep_put(ep); } /** @@ -126,23 +130,27 @@ static void rpcrdma_qp_event_handler(struct ib_event *event, void *context) trace_xprtrdma_qp_event(ep, event); } +/* Ensure xprt_force_disconnect() is invoked exactly once when a + * connection is closed or lost. (The important thing is it needs + * to be invoked "at least" once). + */ +static void rpcrdma_force_disconnect(struct rpcrdma_ep *ep) +{ + if (atomic_add_unless(&ep->re_force_disconnect, 1, 1)) + xprt_force_disconnect(ep->re_xprt); +} + /** * rpcrdma_flush_disconnect - Disconnect on flushed completion - * @cq: completion queue + * @r_xprt: transport to disconnect * @wc: work completion entry * * Must be called in process context. */ -void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc) +void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc) { - struct rpcrdma_xprt *r_xprt = cq->cq_context; - struct rpc_xprt *xprt = &r_xprt->rx_xprt; - - if (wc->status != IB_WC_SUCCESS && - r_xprt->rx_ep->re_connect_status == 1) { - r_xprt->rx_ep->re_connect_status = -ECONNABORTED; - xprt_force_disconnect(xprt); - } + if (wc->status != IB_WC_SUCCESS) + rpcrdma_force_disconnect(r_xprt->rx_ep); } /** @@ -156,11 +164,12 @@ static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) struct ib_cqe *cqe = wc->wr_cqe; struct rpcrdma_sendctx *sc = container_of(cqe, struct rpcrdma_sendctx, sc_cqe); + struct rpcrdma_xprt *r_xprt = cq->cq_context; /* WARNING: Only wr_cqe and status are reliable at this point */ trace_xprtrdma_wc_send(sc, wc); - rpcrdma_sendctx_put_locked((struct rpcrdma_xprt *)cq->cq_context, sc); - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_sendctx_put_locked(r_xprt, sc); + rpcrdma_flush_disconnect(r_xprt, wc); } /** @@ -195,7 +204,7 @@ static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) return; out_flushed: - rpcrdma_flush_disconnect(cq, wc); + rpcrdma_flush_disconnect(r_xprt, wc); rpcrdma_rep_destroy(rep); } @@ -239,7 +248,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr; struct rpcrdma_ep *ep = id->context; - struct rpc_xprt *xprt = ep->re_xprt; might_sleep(); @@ -263,10 +271,9 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) /* fall through */ case RDMA_CM_EVENT_ADDR_CHANGE: ep->re_connect_status = -ENODEV; - xprt_force_disconnect(xprt); goto disconnected; case RDMA_CM_EVENT_ESTABLISHED: - kref_get(&ep->re_kref); + rpcrdma_ep_get(ep); ep->re_connect_status = 1; rpcrdma_update_cm_private(ep, &event->param.conn); trace_xprtrdma_inline_thresh(ep); @@ -274,22 +281,24 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) break; case RDMA_CM_EVENT_CONNECT_ERROR: ep->re_connect_status = -ENOTCONN; - goto disconnected; + goto wake_connect_worker; case RDMA_CM_EVENT_UNREACHABLE: ep->re_connect_status = -ENETUNREACH; - goto disconnected; + goto wake_connect_worker; case RDMA_CM_EVENT_REJECTED: dprintk("rpcrdma: connection to %pISpc rejected: %s\n", sap, rdma_reject_msg(id, event->status)); ep->re_connect_status = -ECONNREFUSED; if (event->status == IB_CM_REJ_STALE_CONN) - ep->re_connect_status = -EAGAIN; - goto disconnected; + ep->re_connect_status = -ENOTCONN; +wake_connect_worker: + wake_up_all(&ep->re_connect_wait); + return 0; case RDMA_CM_EVENT_DISCONNECTED: ep->re_connect_status = -ECONNABORTED; disconnected: - xprt_force_disconnect(xprt); - return rpcrdma_ep_destroy(ep); + rpcrdma_force_disconnect(ep); + return rpcrdma_ep_put(ep); default: break; } @@ -345,7 +354,7 @@ out: return ERR_PTR(rc); } -static void rpcrdma_ep_put(struct kref *kref) +static void rpcrdma_ep_destroy(struct kref *kref) { struct rpcrdma_ep *ep = container_of(kref, struct rpcrdma_ep, re_kref); @@ -369,13 +378,18 @@ static void rpcrdma_ep_put(struct kref *kref) module_put(THIS_MODULE); } +static noinline void rpcrdma_ep_get(struct rpcrdma_ep *ep) +{ + kref_get(&ep->re_kref); +} + /* Returns: * %0 if @ep still has a positive kref count, or * %1 if @ep was destroyed successfully. */ -static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep) +static noinline int rpcrdma_ep_put(struct rpcrdma_ep *ep) { - return kref_put(&ep->re_kref, rpcrdma_ep_put); + return kref_put(&ep->re_kref, rpcrdma_ep_destroy); } static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) @@ -388,14 +402,14 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) ep = kzalloc(sizeof(*ep), GFP_NOFS); if (!ep) - return -EAGAIN; + return -ENOTCONN; ep->re_xprt = &r_xprt->rx_xprt; kref_init(&ep->re_kref); id = rpcrdma_create_id(r_xprt, ep); if (IS_ERR(id)) { - rc = PTR_ERR(id); - goto out_free; + kfree(ep); + return PTR_ERR(id); } __module_get(THIS_MODULE); device = id->device; @@ -492,11 +506,8 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) return 0; out_destroy: - rpcrdma_ep_destroy(ep); + rpcrdma_ep_put(ep); rdma_destroy_id(id); -out_free: - kfree(ep); - r_xprt->rx_ep = NULL; return rc; } @@ -512,22 +523,19 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) struct rpcrdma_ep *ep; int rc; -retry: - rpcrdma_xprt_disconnect(r_xprt); rc = rpcrdma_ep_create(r_xprt); if (rc) return rc; ep = r_xprt->rx_ep; - ep->re_connect_status = 0; xprt_clear_connected(xprt); - rpcrdma_reset_cwnd(r_xprt); - rpcrdma_post_recvs(r_xprt, true); - rc = rpcrdma_sendctxs_create(r_xprt); - if (rc) - goto out; + /* Bump the ep's reference count while there are + * outstanding Receives. + */ + rpcrdma_ep_get(ep); + rpcrdma_post_recvs(r_xprt, true); rc = rdma_connect(ep->re_id, &ep->re_remote_cma); if (rc) @@ -538,22 +546,24 @@ retry: wait_event_interruptible(ep->re_connect_wait, ep->re_connect_status != 0); if (ep->re_connect_status <= 0) { - if (ep->re_connect_status == -EAGAIN) - goto retry; rc = ep->re_connect_status; goto out; } + rc = rpcrdma_sendctxs_create(r_xprt); + if (rc) { + rc = -ENOTCONN; + goto out; + } + rc = rpcrdma_reqs_setup(r_xprt); if (rc) { - rpcrdma_xprt_disconnect(r_xprt); + rc = -ENOTCONN; goto out; } rpcrdma_mrs_create(r_xprt); out: - if (rc) - ep->re_connect_status = rc; trace_xprtrdma_connect(r_xprt, rc); return rc; } @@ -587,7 +597,7 @@ void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) rpcrdma_mrs_destroy(r_xprt); rpcrdma_sendctxs_destroy(r_xprt); - if (rpcrdma_ep_destroy(ep)) + if (rpcrdma_ep_put(ep)) rdma_destroy_id(id); r_xprt->rx_ep = NULL; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 0a16fdb09b2c..43974ef39a50 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -82,6 +82,7 @@ struct rpcrdma_ep { unsigned int re_max_inline_recv; int re_async_rc; int re_connect_status; + atomic_t re_force_disconnect; struct ib_qp_init_attr re_attr; wait_queue_head_t re_connect_wait; struct rpc_xprt *re_xprt; @@ -446,7 +447,7 @@ extern unsigned int xprt_rdma_memreg_strategy; /* * Endpoint calls - xprtrdma/verbs.c */ -void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc); +void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc); int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt); void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt); diff --git a/net/tipc/link.c b/net/tipc/link.c index 263d950e70e9..d40f8e5b7683 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -827,11 +827,11 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) state |= l->bc_rcvlink->rcv_unacked; state |= l->rcv_unacked; state |= !skb_queue_empty(&l->transmq); - state |= !skb_queue_empty(&l->deferdq); probe = mstate->probing; probe |= l->silent_intv_cnt; if (probe || mstate->monitoring) l->silent_intv_cnt++; + probe |= !skb_queue_empty(&l->deferdq); if (l->snd_nxt == l->checkpoint) { tipc_link_update_cwin(l, 0, 0); probe = true; diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index dfbaf6bd8b1c..2700a63ab095 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -22,7 +22,7 @@ #include <net/af_vsock.h> static struct workqueue_struct *virtio_vsock_workqueue; -static struct virtio_vsock *the_virtio_vsock; +static struct virtio_vsock __rcu *the_virtio_vsock; static DEFINE_MUTEX(the_virtio_vsock_mutex); /* protects the_virtio_vsock */ struct virtio_vsock { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0e07fb8585fb..7fbca0854265 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -13266,13 +13266,13 @@ static int nl80211_vendor_cmd(struct sk_buff *skb, struct genl_info *info) if (!wdev_running(wdev)) return -ENETDOWN; } - - if (!vcmd->doit) - return -EOPNOTSUPP; } else { wdev = NULL; } + if (!vcmd->doit) + return -EOPNOTSUPP; + if (info->attrs[NL80211_ATTR_VENDOR_DATA]) { data = nla_data(info->attrs[NL80211_ATTR_VENDOR_DATA]); len = nla_len(info->attrs[NL80211_ATTR_VENDOR_DATA]); diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 100e29682b48..827ccdf2db57 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -15,6 +15,7 @@ static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb, { if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf || !sk_rmem_schedule(sk, skb, skb->truesize)) { + XFRM_INC_STATS(sock_net(sk), LINUX_MIB_XFRMINERROR); kfree_skb(skb); return; } @@ -49,23 +50,51 @@ static void espintcp_rcv(struct strparser *strp, struct sk_buff *skb) struct espintcp_ctx *ctx = container_of(strp, struct espintcp_ctx, strp); struct strp_msg *rxm = strp_msg(skb); + int len = rxm->full_len - 2; u32 nonesp_marker; int err; + /* keepalive packet? */ + if (unlikely(len == 1)) { + u8 data; + + err = skb_copy_bits(skb, rxm->offset + 2, &data, 1); + if (err < 0) { + XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR); + kfree_skb(skb); + return; + } + + if (data == 0xff) { + kfree_skb(skb); + return; + } + } + + /* drop other short messages */ + if (unlikely(len <= sizeof(nonesp_marker))) { + XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR); + kfree_skb(skb); + return; + } + err = skb_copy_bits(skb, rxm->offset + 2, &nonesp_marker, sizeof(nonesp_marker)); if (err < 0) { + XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINHDRERROR); kfree_skb(skb); return; } /* remove header, leave non-ESP marker/SPI */ if (!__pskb_pull(skb, rxm->offset + 2)) { + XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR); kfree_skb(skb); return; } if (pskb_trim(skb, rxm->full_len - 2) != 0) { + XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR); kfree_skb(skb); return; } @@ -91,7 +120,7 @@ static int espintcp_parse(struct strparser *strp, struct sk_buff *skb) return err; len = be16_to_cpu(blen); - if (len < 6) + if (len < 2) return -EINVAL; return len; @@ -109,8 +138,11 @@ static int espintcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, flags |= nonblock ? MSG_DONTWAIT : 0; skb = __skb_recv_datagram(sk, &ctx->ike_queue, flags, &off, &err); - if (!skb) + if (!skb) { + if (err == -EAGAIN && sk->sk_shutdown & RCV_SHUTDOWN) + return 0; return err; + } copied = len; if (copied > skb->len) @@ -213,7 +245,7 @@ retry: return 0; } -static int espintcp_push_msgs(struct sock *sk) +static int espintcp_push_msgs(struct sock *sk, int flags) { struct espintcp_ctx *ctx = espintcp_getctx(sk); struct espintcp_msg *emsg = &ctx->partial; @@ -227,12 +259,12 @@ static int espintcp_push_msgs(struct sock *sk) ctx->tx_running = 1; if (emsg->skb) - err = espintcp_sendskb_locked(sk, emsg, 0); + err = espintcp_sendskb_locked(sk, emsg, flags); else - err = espintcp_sendskmsg_locked(sk, emsg, 0); + err = espintcp_sendskmsg_locked(sk, emsg, flags); if (err == -EAGAIN) { ctx->tx_running = 0; - return 0; + return flags & MSG_DONTWAIT ? -EAGAIN : 0; } if (!err) memset(emsg, 0, sizeof(*emsg)); @@ -257,7 +289,7 @@ int espintcp_push_skb(struct sock *sk, struct sk_buff *skb) offset = skb_transport_offset(skb); len = skb->len - offset; - espintcp_push_msgs(sk); + espintcp_push_msgs(sk, 0); if (emsg->len) { kfree_skb(skb); @@ -270,7 +302,7 @@ int espintcp_push_skb(struct sock *sk, struct sk_buff *skb) emsg->len = len; emsg->skb = skb; - espintcp_push_msgs(sk); + espintcp_push_msgs(sk, 0); return 0; } @@ -287,7 +319,7 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) char buf[2] = {0}; int err, end; - if (msg->msg_flags) + if (msg->msg_flags & ~MSG_DONTWAIT) return -EOPNOTSUPP; if (size > MAX_ESPINTCP_MSG) @@ -298,9 +330,10 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) lock_sock(sk); - err = espintcp_push_msgs(sk); + err = espintcp_push_msgs(sk, msg->msg_flags & MSG_DONTWAIT); if (err < 0) { - err = -ENOBUFS; + if (err != -EAGAIN || !(msg->msg_flags & MSG_DONTWAIT)) + err = -ENOBUFS; goto unlock; } @@ -337,10 +370,9 @@ static int espintcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) tcp_rate_check_app_limited(sk); - err = espintcp_push_msgs(sk); + err = espintcp_push_msgs(sk, msg->msg_flags & MSG_DONTWAIT); /* this message could be partially sent, keep it */ - if (err < 0) - goto unlock; + release_sock(sk); return size; @@ -374,7 +406,7 @@ static void espintcp_tx_work(struct work_struct *work) lock_sock(sk); if (!ctx->tx_running) - espintcp_push_msgs(sk); + espintcp_push_msgs(sk, 0); release_sock(sk); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 564aa6492e7c..19c5e0fa3f44 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -39,7 +39,7 @@ #ifdef CONFIG_XFRM_STATISTICS #include <net/snmp.h> #endif -#ifdef CONFIG_INET_ESPINTCP +#ifdef CONFIG_XFRM_ESPINTCP #include <net/espintcp.h> #endif @@ -1433,14 +1433,10 @@ static void xfrm_policy_requeue(struct xfrm_policy *old, spin_unlock_bh(&pq->hold_queue.lock); } -static bool xfrm_policy_mark_match(struct xfrm_policy *policy, - struct xfrm_policy *pol) +static inline bool xfrm_policy_mark_match(const struct xfrm_mark *mark, + struct xfrm_policy *pol) { - if (policy->mark.v == pol->mark.v && - policy->priority == pol->priority) - return true; - - return false; + return mark->v == pol->mark.v && mark->m == pol->mark.m; } static u32 xfrm_pol_bin_key(const void *data, u32 len, u32 seed) @@ -1503,7 +1499,7 @@ static void xfrm_policy_insert_inexact_list(struct hlist_head *chain, if (pol->type == policy->type && pol->if_id == policy->if_id && !selector_cmp(&pol->selector, &policy->selector) && - xfrm_policy_mark_match(policy, pol) && + xfrm_policy_mark_match(&policy->mark, pol) && xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { delpol = pol; @@ -1538,7 +1534,7 @@ static struct xfrm_policy *xfrm_policy_insert_list(struct hlist_head *chain, if (pol->type == policy->type && pol->if_id == policy->if_id && !selector_cmp(&pol->selector, &policy->selector) && - xfrm_policy_mark_match(policy, pol) && + xfrm_policy_mark_match(&policy->mark, pol) && xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) @@ -1610,9 +1606,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) EXPORT_SYMBOL(xfrm_policy_insert); static struct xfrm_policy * -__xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id, - u8 type, int dir, - struct xfrm_selector *sel, +__xfrm_policy_bysel_ctx(struct hlist_head *chain, const struct xfrm_mark *mark, + u32 if_id, u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx) { struct xfrm_policy *pol; @@ -1623,7 +1618,7 @@ __xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id, hlist_for_each_entry(pol, chain, bydst) { if (pol->type == type && pol->if_id == if_id && - (mark & pol->mark.m) == pol->mark.v && + xfrm_policy_mark_match(mark, pol) && !selector_cmp(sel, &pol->selector) && xfrm_sec_ctx_match(ctx, pol->security)) return pol; @@ -1632,11 +1627,10 @@ __xfrm_policy_bysel_ctx(struct hlist_head *chain, u32 mark, u32 if_id, return NULL; } -struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id, - u8 type, int dir, - struct xfrm_selector *sel, - struct xfrm_sec_ctx *ctx, int delete, - int *err) +struct xfrm_policy * +xfrm_policy_bysel_ctx(struct net *net, const struct xfrm_mark *mark, u32 if_id, + u8 type, int dir, struct xfrm_selector *sel, + struct xfrm_sec_ctx *ctx, int delete, int *err) { struct xfrm_pol_inexact_bin *bin = NULL; struct xfrm_policy *pol, *ret = NULL; @@ -1703,9 +1697,9 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u32 if_id, } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, - u8 type, int dir, u32 id, int delete, - int *err) +struct xfrm_policy * +xfrm_policy_byid(struct net *net, const struct xfrm_mark *mark, u32 if_id, + u8 type, int dir, u32 id, int delete, int *err) { struct xfrm_policy *pol, *ret; struct hlist_head *chain; @@ -1720,8 +1714,7 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u32 if_id, ret = NULL; hlist_for_each_entry(pol, chain, byidx) { if (pol->type == type && pol->index == id && - pol->if_id == if_id && - (mark & pol->mark.m) == pol->mark.v) { + pol->if_id == if_id && xfrm_policy_mark_match(mark, pol)) { xfrm_pol_hold(pol); if (delete) { *err = security_xfrm_policy_delete( @@ -4156,7 +4149,7 @@ void __init xfrm_init(void) seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); -#ifdef CONFIG_INET_ESPINTCP +#ifdef CONFIG_XFRM_ESPINTCP espintcp_init(); #endif diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index e6cfaa680ef3..fbb7d9d06478 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1863,7 +1863,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct km_event c; int delete; struct xfrm_mark m; - u32 mark = xfrm_mark_get(attrs, &m); u32 if_id = 0; p = nlmsg_data(nlh); @@ -1880,8 +1879,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_IF_ID]) if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + xfrm_mark_get(attrs, &m); + if (p->index) - xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, delete, &err); + xp = xfrm_policy_byid(net, &m, if_id, type, p->dir, + p->index, delete, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -1898,8 +1900,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir, &p->sel, - ctx, delete, &err); + xp = xfrm_policy_bysel_ctx(net, &m, if_id, type, p->dir, + &p->sel, ctx, delete, &err); security_xfrm_policy_free(ctx); } if (xp == NULL) @@ -2166,7 +2168,6 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; struct xfrm_mark m; - u32 mark = xfrm_mark_get(attrs, &m); u32 if_id = 0; err = copy_from_user_policy_type(&type, attrs); @@ -2180,8 +2181,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (attrs[XFRMA_IF_ID]) if_id = nla_get_u32(attrs[XFRMA_IF_ID]); + xfrm_mark_get(attrs, &m); + if (p->index) - xp = xfrm_policy_byid(net, mark, if_id, type, p->dir, p->index, 0, &err); + xp = xfrm_policy_byid(net, &m, if_id, type, p->dir, p->index, + 0, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -2198,7 +2202,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(net, mark, if_id, type, p->dir, + xp = xfrm_policy_bysel_ctx(net, &m, if_id, type, p->dir, &p->sel, ctx, 0, &err); security_xfrm_policy_free(ctx); } |