summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_frontend.c3
-rw-r--r--net/ipv4/inet_connection_sock.c1
-rw-r--r--net/ipv4/inet_diag.c18
-rw-r--r--net/ipv4/ip_forward.c1
-rw-r--r--net/ipv4/ip_fragment.c11
-rw-r--r--net/ipv4/ip_output.c3
-rw-r--r--net/ipv4/ip_sockglue.c33
-rw-r--r--net/ipv4/ipmr.c7
-rw-r--r--net/ipv4/netfilter/ip_tables.c6
-rw-r--r--net/ipv4/ping.c12
-rw-r--r--net/ipv4/tcp.c10
-rw-r--r--net/ipv4/tcp_cong.c6
-rw-r--r--net/ipv4/tcp_cubic.c6
-rw-r--r--net/ipv4/tcp_input.c9
-rw-r--r--net/ipv4/tcp_ipv4.c2
-rw-r--r--net/ipv4/tcp_output.c6
-rw-r--r--net/ipv4/xfrm4_output.c2
17 files changed, 90 insertions, 46 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 57be71dd6a9e..23b9b3e86f4c 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1111,11 +1111,10 @@ static void ip_fib_net_exit(struct net *net)
{
unsigned int i;
+ rtnl_lock();
#ifdef CONFIG_IP_MULTIPLE_TABLES
fib4_rules_exit(net);
#endif
-
- rtnl_lock();
for (i = 0; i < FIB_TABLE_HASHSZ; i++) {
struct fib_table *tb;
struct hlist_head *head;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 14d02ea905b6..3e44b9b0b78e 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -268,6 +268,7 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo)
release_sock(sk);
if (reqsk_queue_empty(&icsk->icsk_accept_queue))
timeo = schedule_timeout(timeo);
+ sched_annotate_sleep();
lock_sock(sk);
err = 0;
if (!reqsk_queue_empty(&icsk->icsk_accept_queue))
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 81751f12645f..592aff37366b 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -71,6 +71,20 @@ static inline void inet_diag_unlock_handler(
mutex_unlock(&inet_diag_table_mutex);
}
+static size_t inet_sk_attr_size(void)
+{
+ return nla_total_size(sizeof(struct tcp_info))
+ + nla_total_size(1) /* INET_DIAG_SHUTDOWN */
+ + nla_total_size(1) /* INET_DIAG_TOS */
+ + nla_total_size(1) /* INET_DIAG_TCLASS */
+ + nla_total_size(sizeof(struct inet_diag_meminfo))
+ + nla_total_size(sizeof(struct inet_diag_msg))
+ + nla_total_size(SK_MEMINFO_VARS * sizeof(u32))
+ + nla_total_size(TCP_CA_NAME_MAX)
+ + nla_total_size(sizeof(struct tcpvegas_info))
+ + 64;
+}
+
int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
struct sk_buff *skb, struct inet_diag_req_v2 *req,
struct user_namespace *user_ns,
@@ -326,9 +340,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s
if (err)
goto out;
- rep = nlmsg_new(sizeof(struct inet_diag_msg) +
- sizeof(struct inet_diag_meminfo) +
- sizeof(struct tcp_info) + 64, GFP_KERNEL);
+ rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL);
if (!rep) {
err = -ENOMEM;
goto out;
diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
index 787b3c294ce6..d9bc28ac5d1b 100644
--- a/net/ipv4/ip_forward.c
+++ b/net/ipv4/ip_forward.c
@@ -67,6 +67,7 @@ static int ip_forward_finish(struct sk_buff *skb)
if (unlikely(opt->optlen))
ip_forward_options(skb);
+ skb_sender_cpu_clear(skb);
return dst_output(skb);
}
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index e5b6d0ddcb58..145a50c4d566 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -659,27 +659,30 @@ EXPORT_SYMBOL(ip_defrag);
struct sk_buff *ip_check_defrag(struct sk_buff *skb, u32 user)
{
struct iphdr iph;
+ int netoff;
u32 len;
if (skb->protocol != htons(ETH_P_IP))
return skb;
- if (!skb_copy_bits(skb, 0, &iph, sizeof(iph)))
+ netoff = skb_network_offset(skb);
+
+ if (skb_copy_bits(skb, netoff, &iph, sizeof(iph)) < 0)
return skb;
if (iph.ihl < 5 || iph.version != 4)
return skb;
len = ntohs(iph.tot_len);
- if (skb->len < len || len < (iph.ihl * 4))
+ if (skb->len < netoff + len || len < (iph.ihl * 4))
return skb;
if (ip_is_fragment(&iph)) {
skb = skb_share_check(skb, GFP_ATOMIC);
if (skb) {
- if (!pskb_may_pull(skb, iph.ihl*4))
+ if (!pskb_may_pull(skb, netoff + iph.ihl * 4))
return skb;
- if (pskb_trim_rcsum(skb, len))
+ if (pskb_trim_rcsum(skb, netoff + len))
return skb;
memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
if (ip_defrag(skb, user))
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index d68199d9b2b0..a7aea2048a0d 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -888,7 +888,8 @@ static int __ip_append_data(struct sock *sk,
cork->length += length;
if (((length > mtu) || (skb && skb_is_gso(skb))) &&
(sk->sk_protocol == IPPROTO_UDP) &&
- (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len) {
+ (rt->dst.dev->features & NETIF_F_UFO) && !rt->dst.header_len &&
+ (sk->sk_type == SOCK_DGRAM)) {
err = ip_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, transhdrlen,
maxfraglen, flags);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 31d8c71986b4..5cd99271d3a6 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -432,17 +432,32 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
kfree_skb(skb);
}
-static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk,
- const struct sk_buff *skb,
- int ee_origin)
+/* IPv4 supports cmsg on all imcp errors and some timestamps
+ *
+ * Timestamp code paths do not initialize the fields expected by cmsg:
+ * the PKTINFO fields in skb->cb[]. Fill those in here.
+ */
+static bool ipv4_datagram_support_cmsg(const struct sock *sk,
+ struct sk_buff *skb,
+ int ee_origin)
{
- struct in_pktinfo *info = PKTINFO_SKB_CB(skb);
+ struct in_pktinfo *info;
+
+ if (ee_origin == SO_EE_ORIGIN_ICMP)
+ return true;
- if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) ||
- (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
+ if (ee_origin == SO_EE_ORIGIN_LOCAL)
+ return false;
+
+ /* Support IP_PKTINFO on tstamp packets if requested, to correlate
+ * timestamp with egress dev. Not possible for packets without dev
+ * or without payload (SOF_TIMESTAMPING_OPT_TSONLY).
+ */
+ if ((!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
(!skb->dev))
return false;
+ info = PKTINFO_SKB_CB(skb);
info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
info->ipi_ifindex = skb->dev->ifindex;
return true;
@@ -483,7 +498,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
serr = SKB_EXT_ERR(skb);
- if (sin && skb->len) {
+ if (sin && serr->port) {
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
serr->addr_offset);
@@ -496,9 +511,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
sin = &errhdr.offender;
memset(sin, 0, sizeof(*sin));
- if (skb->len &&
- (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
- ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) {
+ if (ipv4_datagram_support_cmsg(sk, skb, serr->ee.ee_origin)) {
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
if (inet_sk(sk)->cmsg_flags)
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9d78427652d2..fe54eba6d00d 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -268,7 +268,7 @@ static int __net_init ipmr_rules_init(struct net *net)
return 0;
err2:
- kfree(mrt);
+ ipmr_free_table(mrt);
err1:
fib_rules_unregister(ops);
return err;
@@ -278,11 +278,13 @@ static void __net_exit ipmr_rules_exit(struct net *net)
{
struct mr_table *mrt, *next;
+ rtnl_lock();
list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
list_del(&mrt->list);
ipmr_free_table(mrt);
}
fib_rules_unregister(net->ipv4.mr_rules_ops);
+ rtnl_unlock();
}
#else
#define ipmr_for_each_table(mrt, net) \
@@ -308,7 +310,10 @@ static int __net_init ipmr_rules_init(struct net *net)
static void __net_exit ipmr_rules_exit(struct net *net)
{
+ rtnl_lock();
ipmr_free_table(net->ipv4.mrt);
+ net->ipv4.mrt = NULL;
+ rtnl_unlock();
}
#endif
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 99e810f84671..cf5e82f39d3b 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -272,9 +272,9 @@ static void trace_packet(const struct sk_buff *skb,
&chainname, &comment, &rulenum) != 0)
break;
- nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo,
- "TRACE: %s:%s:%s:%u ",
- tablename, chainname, comment, rulenum);
+ nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo,
+ "TRACE: %s:%s:%s:%u ",
+ tablename, chainname, comment, rulenum);
}
#endif
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index e9f66e1cda50..208d5439e59b 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -259,6 +259,9 @@ int ping_init_sock(struct sock *sk)
kgid_t low, high;
int ret = 0;
+ if (sk->sk_family == AF_INET6)
+ sk->sk_ipv6only = 1;
+
inet_get_ping_group_range_net(net, &low, &high);
if (gid_lte(low, group) && gid_lte(group, high))
return 0;
@@ -305,6 +308,11 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
if (addr_len < sizeof(*addr))
return -EINVAL;
+ if (addr->sin_family != AF_INET &&
+ !(addr->sin_family == AF_UNSPEC &&
+ addr->sin_addr.s_addr == htonl(INADDR_ANY)))
+ return -EAFNOSUPPORT;
+
pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
@@ -330,7 +338,7 @@ static int ping_check_bind_addr(struct sock *sk, struct inet_sock *isk,
return -EINVAL;
if (addr->sin6_family != AF_INET6)
- return -EINVAL;
+ return -EAFNOSUPPORT;
pr_debug("ping_check_bind_addr(sk=%p,addr=%pI6c,port=%d)\n",
sk, addr->sin6_addr.s6_addr, ntohs(addr->sin6_port));
@@ -716,7 +724,7 @@ static int ping_v4_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
if (msg->msg_namelen < sizeof(*usin))
return -EINVAL;
if (usin->sin_family != AF_INET)
- return -EINVAL;
+ return -EAFNOSUPPORT;
daddr = usin->sin_addr.s_addr;
/* no remote port */
} else {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 9d72a0fcd928..995a2259bcfc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -835,17 +835,13 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
int large_allowed)
{
struct tcp_sock *tp = tcp_sk(sk);
- u32 new_size_goal, size_goal, hlen;
+ u32 new_size_goal, size_goal;
if (!large_allowed || !sk_can_gso(sk))
return mss_now;
- /* Maybe we should/could use sk->sk_prot->max_header here ? */
- hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
- inet_csk(sk)->icsk_ext_hdr_len +
- tp->tcp_header_len;
-
- new_size_goal = sk->sk_gso_max_size - 1 - hlen;
+ /* Note : tcp_tso_autosize() will eventually split this later */
+ new_size_goal = sk->sk_gso_max_size - 1 - MAX_TCP_HEADER;
new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
/* We try hard to avoid divides here */
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index d694088214cd..62856e185a93 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -378,6 +378,12 @@ EXPORT_SYMBOL_GPL(tcp_slow_start);
*/
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
{
+ /* If credits accumulated at a higher w, apply them gently now. */
+ if (tp->snd_cwnd_cnt >= w) {
+ tp->snd_cwnd_cnt = 0;
+ tp->snd_cwnd++;
+ }
+
tp->snd_cwnd_cnt += acked;
if (tp->snd_cwnd_cnt >= w) {
u32 delta = tp->snd_cwnd_cnt / w;
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 4b276d1ed980..06d3d665a9fd 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -306,8 +306,10 @@ tcp_friendliness:
}
}
- if (ca->cnt == 0) /* cannot be zero */
- ca->cnt = 1;
+ /* The maximum rate of cwnd increase CUBIC allows is 1 packet per
+ * 2 packets ACKed, meaning cwnd grows at 1.5x per RTT.
+ */
+ ca->cnt = max(ca->cnt, 2U);
}
static void bictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 8fdd27b17306..f501ac048366 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3105,10 +3105,11 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
if (!first_ackt.v64)
first_ackt = last_ackt;
- if (!(sacked & TCPCB_SACKED_ACKED))
+ if (!(sacked & TCPCB_SACKED_ACKED)) {
reord = min(pkts_acked, reord);
- if (!after(scb->end_seq, tp->high_seq))
- flag |= FLAG_ORIG_SACK_ACKED;
+ if (!after(scb->end_seq, tp->high_seq))
+ flag |= FLAG_ORIG_SACK_ACKED;
+ }
}
if (sacked & TCPCB_SACKED_ACKED)
@@ -4770,7 +4771,7 @@ static bool tcp_should_expand_sndbuf(const struct sock *sk)
return false;
/* If we filled the congestion window, do not expand. */
- if (tp->packets_out >= tp->snd_cwnd)
+ if (tcp_packets_in_flight(tp) >= tp->snd_cwnd)
return false;
return true;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5a2dfed4783b..f1756ee02207 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1518,7 +1518,7 @@ void tcp_v4_early_demux(struct sk_buff *skb)
skb->sk = sk;
skb->destructor = sock_edemux;
if (sk->sk_state != TCP_TIME_WAIT) {
- struct dst_entry *dst = sk->sk_rx_dst;
+ struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
if (dst)
dst = dst_check(dst, 0);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index a2a796c5536b..1db253e36045 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2773,15 +2773,11 @@ void tcp_send_fin(struct sock *sk)
} else {
/* Socket is locked, keep trying until memory is available. */
for (;;) {
- skb = alloc_skb_fclone(MAX_TCP_HEADER,
- sk->sk_allocation);
+ skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation);
if (skb)
break;
yield();
}
-
- /* Reserve space for headers and prepare control bits. */
- skb_reserve(skb, MAX_TCP_HEADER);
/* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */
tcp_init_nondata_skb(skb, tp->write_seq,
TCPHDR_ACK | TCPHDR_FIN);
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index d5f6bd9a210a..dab73813cb92 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -63,6 +63,7 @@ int xfrm4_prepare_output(struct xfrm_state *x, struct sk_buff *skb)
return err;
IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
+ skb->protocol = htons(ETH_P_IP);
return x->outer_mode->output2(x, skb);
}
@@ -71,7 +72,6 @@ EXPORT_SYMBOL(xfrm4_prepare_output);
int xfrm4_output_finish(struct sk_buff *skb)
{
memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- skb->protocol = htons(ETH_P_IP);
#ifdef CONFIG_NETFILTER
IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;