diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/fib_frontend.c | 3 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 1 | ||||
-rw-r--r-- | net/ipv4/inet_diag.c | 18 | ||||
-rw-r--r-- | net/ipv4/ip_forward.c | 1 | ||||
-rw-r--r-- | net/ipv4/ip_fragment.c | 11 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 3 | ||||
-rw-r--r-- | net/ipv4/ip_sockglue.c | 33 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 7 | ||||
-rw-r--r-- | net/ipv4/netfilter/ip_tables.c | 6 | ||||
-rw-r--r-- | net/ipv4/ping.c | 12 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 10 | ||||
-rw-r--r-- | net/ipv4/tcp_cong.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_cubic.c | 6 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 9 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 6 | ||||
-rw-r--r-- | net/ipv4/xfrm4_output.c | 2 |
17 files changed, 90 insertions, 46 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 57be71dd6a9e..23b9b3e86f4c 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1111,11 +1111,10 @@ static void ip_fib_net_exit(struct net *net) { unsigned int i; + rtnl_lock(); #ifdef CONFIG_IP_MULTIPLE_TABLES fib4_rules_exit(net); #endif - - rtnl_lock(); for (i = 0; i < FIB_TABLE_HASHSZ; i++) { struct fib_table *tb; struct hlist_head *head; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 14d02ea905b6..3e44b9b0b78e 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -268,6 +268,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) release_sock(sk); if (reqsk_queue_empty(&icsk->icsk_accept_queue)) timeo = schedule_timeout(timeo); + sched_annotate_sleep(); lock_sock(sk); err = 0; if (!reqsk_queue_empty(&icsk->icsk_accept_queue)) diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 81751f12645f..592aff37366b 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -71,6 +71,20 @@ static inline void inet_diag_unlock_handler( mutex_unlock(&inet_diag_table_mutex); } +static size_t inet_sk_attr_size(void) +{ + return nla_total_size(sizeof(struct tcp_info)) + + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + + nla_total_size(1) /* INET_DIAG_TOS */ + + nla_total_size(1) /* INET_DIAG_TCLASS */ + + nla_total_size(sizeof(struct inet_diag_meminfo)) + + nla_total_size(sizeof(struct inet_diag_msg)) + + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) + + nla_total_size(TCP_CA_NAME_MAX) + + nla_total_size(sizeof(struct tcpvegas_info)) + + 64; +} + int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct inet_diag_req_v2 *req, struct user_namespace *user_ns, @@ -326,9 +340,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s if (err) goto out; - rep = nlmsg_new(sizeof(struct inet_diag_msg) + - sizeof(struct inet_diag_meminfo) + - sizeof(struct tcp_info) + 64, GFP_KERNEL); + rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL); if (!rep) { err = -ENOMEM; goto out; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 787b3c294ce6..d9bc28ac5d1b 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -67,6 +67,7 @@ static int ip_forward_finish(struct sk_buff *skb) if (unlikely(opt->optlen)) ip_forward_options(skb); + skb_sender_cpu_clear(skb); return dst_output(skb); } diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index e5b6d0ddcb58..145a50c4d566 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -659,27 +659,30 @@ EXPORT_SYMBOL(ip_defrag); struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user) { struct iphdr iph; + int netoff; u32 len; if (skb->protocol != htons(ETH_P_IP)) return skb; - if (!skb_copy_bits(skb, 0, &iph, sizeof(iph))) + netoff = skb_network_offset(skb); + + if (skb_copy_bits(skb, netoff, &iph, sizeof(iph)) < 0) return skb; if (iph.ihl < 5 || iph.version != 4) return skb; len = ntohs(iph.tot_len); - if (skb->len < len || len < (iph.ihl * 4)) + if (skb->len < netoff + len || len < (iph.ihl * 4)) return skb; if (ip_is_fragment(&iph)) { skb = skb_share_check(skb, GFP_ATOMIC); if (skb) { - if (!pskb_may_pull(skb, iph.ihl*4)) + if (!pskb_may_pull(skb, netoff + iph.ihl * 4)) return skb; - if (pskb_trim_rcsum(skb, len)) + if (pskb_trim_rcsum(skb, netoff + len)) return skb; memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); if (ip_defrag(skb, user)) diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index d68199d9b2b0..a7aea2048a0d 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -888,7 +888,8 @@ static int __ip_append_data(struct sock *sk, cork->length += length; if (((length > mtu) || (skb && skb_is_gso(skb))) && (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) { + (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len && + (sk->sk_type == SOCK_DGRAM)) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, maxfraglen, flags); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 31d8c71986b4..5cd99271d3a6 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -432,17 +432,32 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf kfree_skb(skb); } -static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk, - const struct sk_buff *skb, - int ee_origin) +/* IPv4 supports cmsg on all imcp errors and some timestamps + * + * Timestamp code paths do not initialize the fields expected by cmsg: + * the PKTINFO fields in skb->cb[]. Fill those in here. + */ +static bool ipv4_datagram_support_cmsg(const struct sock *sk, + struct sk_buff *skb, + int ee_origin) { - struct in_pktinfo *info = PKTINFO_SKB_CB(skb); + struct in_pktinfo *info; + + if (ee_origin == SO_EE_ORIGIN_ICMP) + return true; - if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) || - (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || + if (ee_origin == SO_EE_ORIGIN_LOCAL) + return false; + + /* Support IP_PKTINFO on tstamp packets if requested, to correlate + * timestamp with egress dev. Not possible for packets without dev + * or without payload (SOF_TIMESTAMPING_OPT_TSONLY). + */ + if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || (!skb->dev)) return false; + info = PKTINFO_SKB_CB(skb); info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; info->ipi_ifindex = skb->dev->ifindex; return true; @@ -483,7 +498,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin && skb->len) { + if (sin && serr->port) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) + serr->addr_offset); @@ -496,9 +511,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) sin = &errhdr.offender; memset(sin, 0, sizeof(*sin)); - if (skb->len && - (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || - ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) { + if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) { sin->sin_family = AF_INET; sin->sin_addr.s_addr = ip_hdr(skb)->saddr; if (inet_sk(sk)->cmsg_flags) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9d78427652d2..fe54eba6d00d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -268,7 +268,7 @@ static int __net_init ipmr_rules_init(struct net *net) return 0; err2: - kfree(mrt); + ipmr_free_table(mrt); err1: fib_rules_unregister(ops); return err; @@ -278,11 +278,13 @@ static void __net_exit ipmr_rules_exit(struct net *net) { struct mr_table *mrt, *next; + rtnl_lock(); list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) { list_del(&mrt->list); ipmr_free_table(mrt); } fib_rules_unregister(net->ipv4.mr_rules_ops); + rtnl_unlock(); } #else #define ipmr_for_each_table(mrt, net) \ @@ -308,7 +310,10 @@ static int __net_init ipmr_rules_init(struct net *net) static void __net_exit ipmr_rules_exit(struct net *net) { + rtnl_lock(); ipmr_free_table(net->ipv4.mrt); + net->ipv4.mrt = NULL; + rtnl_unlock(); } #endif diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 99e810f84671..cf5e82f39d3b 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -272,9 +272,9 @@ static void trace_packet(const struct sk_buff *skb, &chainname, &comment, &rulenum) != 0) break; - nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo, - "TRACE: %s:%s:%s:%u ", - tablename, chainname, comment, rulenum); + nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + tablename, chainname, comment, rulenum); } #endif diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index e9f66e1cda50..208d5439e59b 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -259,6 +259,9 @@ int ping_init_sock(struct sock *sk) kgid_t low, high; int ret = 0; + if (sk->sk_family == AF_INET6) + sk->sk_ipv6only = 1; + inet_get_ping_group_range_net(net, &low, &high); if (gid_lte(low, group) && gid_lte(group, high)) return 0; @@ -305,6 +308,11 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, if (addr_len < sizeof(*addr)) return -EINVAL; + if (addr->sin_family != AF_INET && + !(addr->sin_family == AF_UNSPEC && + addr->sin_addr.s_addr == htonl(INADDR_ANY))) + return -EAFNOSUPPORT; + pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n", sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port)); @@ -330,7 +338,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk, return -EINVAL; if (addr->sin6_family != AF_INET6) - return -EINVAL; + return -EAFNOSUPPORT; pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n", sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port)); @@ -716,7 +724,7 @@ static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m if (msg->msg_namelen < sizeof(*usin)) return -EINVAL; if (usin->sin_family != AF_INET) - return -EINVAL; + return -EAFNOSUPPORT; daddr = usin->sin_addr.s_addr; /* no remote port */ } else { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 9d72a0fcd928..995a2259bcfc 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -835,17 +835,13 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now, int large_allowed) { struct tcp_sock *tp = tcp_sk(sk); - u32 new_size_goal, size_goal, hlen; + u32 new_size_goal, size_goal; if (!large_allowed || !sk_can_gso(sk)) return mss_now; - /* Maybe we should/could use sk->sk_prot->max_header here ? */ - hlen = inet_csk(sk)->icsk_af_ops->net_header_len + - inet_csk(sk)->icsk_ext_hdr_len + - tp->tcp_header_len; - - new_size_goal = sk->sk_gso_max_size - 1 - hlen; + /* Note : tcp_tso_autosize() will eventually split this later */ + new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER; new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal); /* We try hard to avoid divides here */ diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index d694088214cd..62856e185a93 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -378,6 +378,12 @@ EXPORT_SYMBOL_GPL(tcp_slow_start); */ void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) { + /* If credits accumulated at a higher w, apply them gently now. */ + if (tp->snd_cwnd_cnt >= w) { + tp->snd_cwnd_cnt = 0; + tp->snd_cwnd++; + } + tp->snd_cwnd_cnt += acked; if (tp->snd_cwnd_cnt >= w) { u32 delta = tp->snd_cwnd_cnt / w; diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 4b276d1ed980..06d3d665a9fd 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -306,8 +306,10 @@ tcp_friendliness: } } - if (ca->cnt == 0) /* cannot be zero */ - ca->cnt = 1; + /* The maximum rate of cwnd increase CUBIC allows is 1 packet per + * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT. + */ + ca->cnt = max(ca->cnt, 2U); } static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 8fdd27b17306..f501ac048366 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3105,10 +3105,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (!first_ackt.v64) first_ackt = last_ackt; - if (!(sacked & TCPCB_SACKED_ACKED)) + if (!(sacked & TCPCB_SACKED_ACKED)) { reord = min(pkts_acked, reord); - if (!after(scb->end_seq, tp->high_seq)) - flag |= FLAG_ORIG_SACK_ACKED; + if (!after(scb->end_seq, tp->high_seq)) + flag |= FLAG_ORIG_SACK_ACKED; + } } if (sacked & TCPCB_SACKED_ACKED) @@ -4770,7 +4771,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk) return false; /* If we filled the congestion window, do not expand. */ - if (tp->packets_out >= tp->snd_cwnd) + if (tcp_packets_in_flight(tp) >= tp->snd_cwnd) return false; return true; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 5a2dfed4783b..f1756ee02207 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1518,7 +1518,7 @@ void tcp_v4_early_demux(struct sk_buff *skb) skb->sk = sk; skb->destructor = sock_edemux; if (sk->sk_state != TCP_TIME_WAIT) { - struct dst_entry *dst = sk->sk_rx_dst; + struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); if (dst) dst = dst_check(dst, 0); diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a2a796c5536b..1db253e36045 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2773,15 +2773,11 @@ void tcp_send_fin(struct sock *sk) } else { /* Socket is locked, keep trying until memory is available. */ for (;;) { - skb = alloc_skb_fclone(MAX_TCP_HEADER, - sk->sk_allocation); + skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation); if (skb) break; yield(); } - - /* Reserve space for headers and prepare control bits. */ - skb_reserve(skb, MAX_TCP_HEADER); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ tcp_init_nondata_skb(skb, tp->write_seq, TCPHDR_ACK | TCPHDR_FIN); diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index d5f6bd9a210a..dab73813cb92 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -63,6 +63,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb) return err; IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE; + skb->protocol = htons(ETH_P_IP); return x->outer_mode->output2(x, skb); } @@ -71,7 +72,6 @@ EXPORT_SYMBOL(xfrm4_prepare_output); int xfrm4_output_finish(struct sk_buff *skb) { memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - skb->protocol = htons(ETH_P_IP); #ifdef CONFIG_NETFILTER IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; |