diff options
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/fib_frontend.c | 3 | ||||
-rw-r--r-- | net/ipv4/fib_rules.c | 2 | ||||
-rw-r--r-- | net/ipv4/fib_semantics.c | 4 | ||||
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_forward.c | 2 | ||||
-rw-r--r-- | net/ipv4/ip_output.c | 4 | ||||
-rw-r--r-- | net/ipv4/ip_vti.c | 4 | ||||
-rw-r--r-- | net/ipv4/ipmr.c | 2 | ||||
-rw-r--r-- | net/ipv4/route.c | 157 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 10 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 13 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 8 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 4 | ||||
-rw-r--r-- | net/ipv4/xfrm4_policy.c | 1 |
16 files changed, 133 insertions, 88 deletions
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 68c93d1bb03a..825c608826de 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -322,7 +322,8 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, { int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); - if (!r && !fib_num_tclassid_users(dev_net(dev))) { + if (!r && !fib_num_tclassid_users(dev_net(dev)) && + (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) { *itag = 0; return 0; } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 274309d3aded..26aa65d1fce4 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -262,7 +262,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops) rt_cache_flush(ops->fro_net); } -static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = { +static const struct fib_rules_ops __net_initconst fib4_rules_ops_template = { .family = AF_INET, .rule_size = sizeof(struct fib4_rule), .addr_size = sizeof(u32), diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 3509065e409a..71b125cd5db1 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -314,6 +314,7 @@ static struct fib_info *fib_find_info(const struct fib_info *nfi) nfi->fib_scope == fi->fib_scope && nfi->fib_prefsrc == fi->fib_prefsrc && nfi->fib_priority == fi->fib_priority && + nfi->fib_type == fi->fib_type && memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(u32) * RTAX_MAX) == 0 && ((nfi->fib_flags ^ fi->fib_flags) & ~RTNH_F_DEAD) == 0 && @@ -833,11 +834,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi->fib_flags = cfg->fc_flags; fi->fib_priority = cfg->fc_priority; fi->fib_prefsrc = cfg->fc_prefsrc; + fi->fib_type = cfg->fc_type; fi->fib_nhs = nhs; change_nexthops(fi) { nexthop_nh->nh_parent = fi; nexthop_nh->nh_pcpu_rth_output = alloc_percpu(struct rtable __rcu *); + if (!nexthop_nh->nh_pcpu_rth_output) + goto failure; } endfor_nexthops(fi) if (cfg->fc_mx) { diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f0c5b9c1a957..d34ce2972c8f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -406,7 +406,7 @@ struct dst_entry *inet_csk_route_req(struct sock *sk, rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->opt.is_strictroute && rt->rt_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; return &rt->dst; @@ -442,7 +442,7 @@ struct dst_entry *inet_csk_route_child_sock(struct sock *sk, rt = ip_route_output_flow(net, fl4, sk); if (IS_ERR(rt)) goto no_route; - if (opt && opt->opt.is_strictroute && rt->rt_gateway) + if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; rcu_read_unlock(); return &rt->dst; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index ab09b126423c..694de3b7aebf 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -85,7 +85,7 @@ int ip_forward(struct sk_buff *skb) rt = skb_rtable(skb); - if (opt->is_strictroute && opt->nexthop != rt->rt_gateway) + if (opt->is_strictroute && rt->rt_uses_gateway) goto sr_failed; if (unlikely(skb->len > dst_mtu(&rt->dst) && !skb_is_gso(skb) && diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 24a29a39e9a8..6537a408a4fb 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -193,7 +193,7 @@ static inline int ip_finish_output2(struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt->rt_gateway ? rt->rt_gateway : ip_hdr(skb)->daddr; + nexthop = (__force u32) rt_nexthop(rt, ip_hdr(skb)->daddr); neigh = __ipv4_neigh_lookup_noref(dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&arp_tbl, &nexthop, dev, false); @@ -371,7 +371,7 @@ int ip_queue_xmit(struct sk_buff *skb, struct flowi *fl) skb_dst_set_noref(skb, &rt->dst); packet_routed: - if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_gateway) + if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway) goto no_route; /* OK, we know where to send it, allocate and build IP header. */ diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 978bca4818ae..1831092f999f 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -374,7 +374,7 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) memset(&fl4, 0, sizeof(fl4)); flowi4_init_output(&fl4, tunnel->parms.link, - htonl(tunnel->parms.i_key), RT_TOS(tos), + be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos), RT_SCOPE_UNIVERSE, IPPROTO_IPIP, 0, dst, tiph->saddr, 0, 0); @@ -441,7 +441,7 @@ static int vti_tunnel_bind_dev(struct net_device *dev) struct flowi4 fl4; memset(&fl4, 0, sizeof(fl4)); flowi4_init_output(&fl4, tunnel->parms.link, - htonl(tunnel->parms.i_key), + be32_to_cpu(tunnel->parms.i_key), RT_TOS(iph->tos), RT_SCOPE_UNIVERSE, IPPROTO_IPIP, 0, iph->daddr, iph->saddr, 0, 0); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 1daa95c2a0ba..6168c4dc58b1 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -221,7 +221,7 @@ static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb, return 0; } -static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = { +static const struct fib_rules_ops __net_initconst ipmr_rules_ops_template = { .family = RTNL_FAMILY_IPMR, .rule_size = sizeof(struct ipmr_rule), .addr_size = sizeof(u32), diff --git a/net/ipv4/route.c b/net/ipv4/route.c index ff622069fcef..a8c651216fa6 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -802,7 +802,8 @@ void ip_rt_send_redirect(struct sk_buff *skb) net = dev_net(rt->dst.dev); peer = inet_getpeer_v4(net->ipv4.peers, ip_hdr(skb)->saddr, 1); if (!peer) { - icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); + icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, + rt_nexthop(rt, ip_hdr(skb)->daddr)); return; } @@ -827,7 +828,9 @@ void ip_rt_send_redirect(struct sk_buff *skb) time_after(jiffies, (peer->rate_last + (ip_rt_redirect_load << peer->rate_tokens)))) { - icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); + __be32 gw = rt_nexthop(rt, ip_hdr(skb)->daddr); + + icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, gw); peer->rate_last = jiffies; ++peer->rate_tokens; #ifdef CONFIG_IP_ROUTE_VERBOSE @@ -835,7 +838,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) peer->rate_tokens == ip_rt_redirect_number) net_warn_ratelimited("host %pI4/if%d ignores redirects for %pI4 to %pI4\n", &ip_hdr(skb)->saddr, inet_iif(skb), - &ip_hdr(skb)->daddr, &rt->rt_gateway); + &ip_hdr(skb)->daddr, &gw); #endif } out_put_peer: @@ -904,22 +907,32 @@ out: kfree_skb(skb); return 0; } -static u32 __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) +static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) { + struct dst_entry *dst = &rt->dst; struct fib_result res; + if (dst->dev->mtu < mtu) + return; + if (mtu < ip_rt_min_pmtu) mtu = ip_rt_min_pmtu; + if (!rt->rt_pmtu) { + dst->obsolete = DST_OBSOLETE_KILL; + } else { + rt->rt_pmtu = mtu; + dst->expires = max(1UL, jiffies + ip_rt_mtu_expires); + } + rcu_read_lock(); - if (fib_lookup(dev_net(rt->dst.dev), fl4, &res) == 0) { + if (fib_lookup(dev_net(dst->dev), fl4, &res) == 0) { struct fib_nh *nh = &FIB_RES_NH(res); update_or_create_fnhe(nh, fl4->daddr, 0, mtu, jiffies + ip_rt_mtu_expires); } rcu_read_unlock(); - return mtu; } static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, @@ -929,14 +942,7 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct flowi4 fl4; ip_rt_build_flow_key(&fl4, sk, skb); - mtu = __ip_rt_update_pmtu(rt, &fl4, mtu); - - if (!rt->rt_pmtu) { - dst->obsolete = DST_OBSOLETE_KILL; - } else { - rt->rt_pmtu = mtu; - rt->dst.expires = max(1UL, jiffies + ip_rt_mtu_expires); - } + __ip_rt_update_pmtu(rt, &fl4, mtu); } void ipv4_update_pmtu(struct sk_buff *skb, struct net *net, u32 mtu, @@ -1120,7 +1126,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) mtu = dst->dev->mtu; if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { - if (rt->rt_gateway && mtu > 576) + if (rt->rt_uses_gateway && mtu > 576) mtu = 576; } @@ -1157,8 +1163,12 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, spin_lock_bh(&fnhe_lock); if (daddr == fnhe->fnhe_daddr) { - struct rtable *orig; - + struct rtable *orig = rcu_dereference(fnhe->fnhe_rth); + if (orig && rt_is_expired(orig)) { + fnhe->fnhe_gw = 0; + fnhe->fnhe_pmtu = 0; + fnhe->fnhe_expires = 0; + } if (fnhe->fnhe_pmtu) { unsigned long expires = fnhe->fnhe_expires; unsigned long diff = expires - jiffies; @@ -1171,22 +1181,16 @@ static bool rt_bind_exception(struct rtable *rt, struct fib_nh_exception *fnhe, if (fnhe->fnhe_gw) { rt->rt_flags |= RTCF_REDIRECTED; rt->rt_gateway = fnhe->fnhe_gw; - } + rt->rt_uses_gateway = 1; + } else if (!rt->rt_gateway) + rt->rt_gateway = daddr; - orig = rcu_dereference(fnhe->fnhe_rth); rcu_assign_pointer(fnhe->fnhe_rth, rt); if (orig) rt_free(orig); fnhe->fnhe_stamp = jiffies; ret = true; - } else { - /* Routes we intend to cache in nexthop exception have - * the DST_NOCACHE bit clear. However, if we are - * unsuccessful at storing this route into the cache - * we really need to set it. - */ - rt->dst.flags |= DST_NOCACHE; } spin_unlock_bh(&fnhe_lock); @@ -1201,8 +1205,6 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) if (rt_is_input_route(rt)) { p = (struct rtable **)&nh->nh_rth_input; } else { - if (!nh->nh_pcpu_rth_output) - goto nocache; p = (struct rtable **)__this_cpu_ptr(nh->nh_pcpu_rth_output); } orig = *p; @@ -1211,16 +1213,8 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt) if (prev == orig) { if (orig) rt_free(orig); - } else { - /* Routes we intend to cache in the FIB nexthop have - * the DST_NOCACHE bit clear. However, if we are - * unsuccessful at storing this route into the cache - * we really need to set it. - */ -nocache: - rt->dst.flags |= DST_NOCACHE; + } else ret = false; - } return ret; } @@ -1281,8 +1275,10 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, if (fi) { struct fib_nh *nh = &FIB_RES_NH(*res); - if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) + if (nh->nh_gw && nh->nh_scope == RT_SCOPE_LINK) { rt->rt_gateway = nh->nh_gw; + rt->rt_uses_gateway = 1; + } dst_init_metrics(&rt->dst, fi->fib_metrics, true); #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; @@ -1291,8 +1287,18 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, cached = rt_bind_exception(rt, fnhe, daddr); else if (!(rt->dst.flags & DST_NOCACHE)) cached = rt_cache_route(nh, rt); - } - if (unlikely(!cached)) + if (unlikely(!cached)) { + /* Routes we intend to cache in nexthop exception or + * FIB nexthop have the DST_NOCACHE bit clear. + * However, if we are unsuccessful at storing this + * route into the cache we really need to set it. + */ + rt->dst.flags |= DST_NOCACHE; + if (!rt->rt_gateway) + rt->rt_gateway = daddr; + rt_add_uncached_list(rt); + } + } else rt_add_uncached_list(rt); #ifdef CONFIG_IP_ROUTE_CLASSID @@ -1360,6 +1366,7 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); if (our) { rth->dst.input= ip_local_deliver; @@ -1429,7 +1436,6 @@ static int __mkroute_input(struct sk_buff *skb, return -EINVAL; } - err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res), in_dev->dev, in_dev, &itag); if (err < 0) { @@ -1439,10 +1445,13 @@ static int __mkroute_input(struct sk_buff *skb, goto cleanup; } - if (out_dev == in_dev && err && + do_cache = res->fi && !itag; + if (out_dev == in_dev && err && IN_DEV_TX_REDIRECTS(out_dev) && (IN_DEV_SHARED_MEDIA(out_dev) || - inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) + inet_addr_onlink(out_dev, saddr, FIB_RES_GW(*res)))) { flags |= RTCF_DOREDIRECT; + do_cache = false; + } if (skb->protocol != htons(ETH_P_IP)) { /* Not IP (i.e. ARP). Do not create route, if it is @@ -1459,15 +1468,11 @@ static int __mkroute_input(struct sk_buff *skb, } } - do_cache = false; - if (res->fi) { - if (!itag) { - rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); - if (rt_cache_valid(rth)) { - skb_dst_set_noref(skb, &rth->dst); - goto out; - } - do_cache = true; + if (do_cache) { + rth = rcu_dereference(FIB_RES_NH(*res).nh_rth_input); + if (rt_cache_valid(rth)) { + skb_dst_set_noref(skb, &rth->dst); + goto out; } } @@ -1486,6 +1491,7 @@ static int __mkroute_input(struct sk_buff *skb, rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); rth->dst.input = ip_forward; @@ -1656,6 +1662,7 @@ local_input: rth->rt_iif = 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); if (res.type == RTN_UNREACHABLE) { rth->dst.input= ip_error; @@ -1758,6 +1765,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, struct in_device *in_dev; u16 type = res->type; struct rtable *rth; + bool do_cache; in_dev = __in_dev_get_rcu(dev_out); if (!in_dev) @@ -1794,24 +1802,36 @@ static struct rtable *__mkroute_output(const struct fib_result *res, } fnhe = NULL; + do_cache = fi != NULL; if (fi) { struct rtable __rcu **prth; + struct fib_nh *nh = &FIB_RES_NH(*res); - fnhe = find_exception(&FIB_RES_NH(*res), fl4->daddr); + fnhe = find_exception(nh, fl4->daddr); if (fnhe) prth = &fnhe->fnhe_rth; - else - prth = __this_cpu_ptr(FIB_RES_NH(*res).nh_pcpu_rth_output); + else { + if (unlikely(fl4->flowi4_flags & + FLOWI_FLAG_KNOWN_NH && + !(nh->nh_gw && + nh->nh_scope == RT_SCOPE_LINK))) { + do_cache = false; + goto add; + } + prth = __this_cpu_ptr(nh->nh_pcpu_rth_output); + } rth = rcu_dereference(*prth); if (rt_cache_valid(rth)) { dst_hold(&rth->dst); return rth; } } + +add: rth = rt_dst_alloc(dev_out, IN_DEV_CONF_GET(in_dev, NOPOLICY), IN_DEV_CONF_GET(in_dev, NOXFRM), - fi); + do_cache); if (!rth) return ERR_PTR(-ENOBUFS); @@ -1824,6 +1844,7 @@ static struct rtable *__mkroute_output(const struct fib_result *res, rth->rt_iif = orig_oif ? : 0; rth->rt_pmtu = 0; rth->rt_gateway = 0; + rth->rt_uses_gateway = 0; INIT_LIST_HEAD(&rth->rt_uncached); RT_CACHE_STAT_INC(out_slow_tot); @@ -2102,6 +2123,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or rt->rt_flags = ort->rt_flags; rt->rt_type = ort->rt_type; rt->rt_gateway = ort->rt_gateway; + rt->rt_uses_gateway = ort->rt_uses_gateway; INIT_LIST_HEAD(&rt->rt_uncached); @@ -2180,28 +2202,31 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, if (nla_put_be32(skb, RTA_PREFSRC, fl4->saddr)) goto nla_put_failure; } - if (rt->rt_gateway && + if (rt->rt_uses_gateway && nla_put_be32(skb, RTA_GATEWAY, rt->rt_gateway)) goto nla_put_failure; + expires = rt->dst.expires; + if (expires) { + unsigned long now = jiffies; + + if (time_before(now, expires)) + expires -= now; + else + expires = 0; + } + memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); - if (rt->rt_pmtu) + if (rt->rt_pmtu && expires) metrics[RTAX_MTU - 1] = rt->rt_pmtu; if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; if (fl4->flowi4_mark && - nla_put_be32(skb, RTA_MARK, fl4->flowi4_mark)) + nla_put_u32(skb, RTA_MARK, fl4->flowi4_mark)) goto nla_put_failure; error = rt->dst.error; - expires = rt->dst.expires; - if (expires) { - if (time_before(jiffies, expires)) - expires -= jiffies; - else - expires = 0; - } if (rt_is_input_route(rt)) { if (nla_put_u32(skb, RTA_IIF, rt->rt_iif)) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 9205e492dc9d..63d4eccc674d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -248,6 +248,8 @@ int proc_tcp_fastopen_key(ctl_table *ctl, int write, void __user *buffer, ctxt = rcu_dereference(tcp_fastopen_ctx); if (ctxt) memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); + else + memset(user_key, 0, sizeof(user_key)); rcu_read_unlock(); snprintf(tbl.data, tbl.maxlen, "%08x-%08x-%08x-%08x", diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index f32c02e2a543..197c0008503c 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -549,14 +549,12 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) !tp->urg_data || before(tp->urg_seq, tp->copied_seq) || !before(tp->urg_seq, tp->rcv_nxt)) { - struct sk_buff *skb; answ = tp->rcv_nxt - tp->copied_seq; - /* Subtract 1, if FIN is in queue. */ - skb = skb_peek_tail(&sk->sk_receive_queue); - if (answ && skb) - answ -= tcp_hdr(skb)->fin; + /* Subtract 1, if FIN was received */ + if (answ && sock_flag(sk, SOCK_DONE)) + answ--; } else answ = tp->urg_seq - tp->copied_seq; release_sock(sk); @@ -2766,6 +2764,8 @@ void tcp_get_info(const struct sock *sk, struct tcp_info *info) info->tcpi_options |= TCPI_OPT_ECN; if (tp->ecn_flags & TCP_ECN_SEEN) info->tcpi_options |= TCPI_OPT_ECN_SEEN; + if (tp->syn_data_acked) + info->tcpi_options |= TCPI_OPT_SYN_DATA; info->tcpi_rto = jiffies_to_usecs(icsk->icsk_rto); info->tcpi_ato = jiffies_to_usecs(icsk->icsk_ack.ato); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 432c36649db3..1db663983587 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5646,6 +5646,7 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, tcp_rearm_rto(sk); return true; } + tp->syn_data_acked = tp->syn_data; return false; } @@ -5963,7 +5964,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, req = tp->fastopen_rsk; if (req != NULL) { - BUG_ON(sk->sk_state != TCP_SYN_RECV && + WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && sk->sk_state != TCP_FIN_WAIT1); if (tcp_check_req(sk, skb, req, NULL, true) == NULL) @@ -6052,7 +6053,15 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * ACK we have received, this would have acknowledged * our SYNACK so stop the SYNACK timer. */ - if (acceptable && req != NULL) { + if (req != NULL) { + /* Return RST if ack_seq is invalid. + * Note that RFC793 only says to generate a + * DUPACK for it but for TCP Fast Open it seems + * better to treat this case like TCP_SYN_RECV + * above. + */ + if (!acceptable) + return 1; /* We no longer need the request sock. */ reqsk_fastopen_remove(sk, req, false); tcp_rearm_rto(sk); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 75735c9a6a9d..0c4a64355603 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -708,10 +708,11 @@ static void tcp_v4_send_reset(struct sock *sk, struct sk_buff *skb) arg.csumoffset = offsetof(struct tcphdr, check) / 2; arg.flags = (sk && inet_sk(sk)->transparent) ? IP_REPLY_ARG_NOSRCCHECK : 0; /* When socket is gone, all binding information is lost. - * routing might fail in this case. using iif for oif to - * make sure we can deliver it + * routing might fail in this case. No choice here, if we choose to force + * input interface, we will misroute in case of asymmetric route. */ - arg.bound_dev_if = sk ? sk->sk_bound_dev_if : inet_iif(skb); + if (sk) + arg.bound_dev_if = sk->sk_bound_dev_if; net = dev_net(skb_dst(skb)->dev); arg.tos = ip_hdr(skb)->tos; @@ -1460,6 +1461,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, skb_set_owner_r(skb, child); __skb_queue_tail(&child->sk_receive_queue, skb); tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + tp->syn_data_acked = 1; } sk->sk_data_ready(sk, 0); bh_unlock_sock(child); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 27536ba16c9d..a7302d974f32 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -510,6 +510,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newtp->rx_opt.mss_clamp = req->mss; TCP_ECN_openreq_child(newtp, req); newtp->fastopen_rsk = NULL; + newtp->syn_data_acked = 0; TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index fc04711e80c8..d47c1b4421a3 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -347,8 +347,8 @@ void tcp_retransmit_timer(struct sock *sk) return; } if (tp->fastopen_rsk) { - BUG_ON(sk->sk_state != TCP_SYN_RECV && - sk->sk_state != TCP_FIN_WAIT1); + WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && + sk->sk_state != TCP_FIN_WAIT1); tcp_fastopen_synack_timer(sk); /* Before we receive ACK to our SYN-ACK don't retransmit * anything else (e.g., data or FIN segments). diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 681ea2f413e2..05c5ab8d983c 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -91,6 +91,7 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, RTCF_LOCAL); xdst->u.rt.rt_type = rt->rt_type; xdst->u.rt.rt_gateway = rt->rt_gateway; + xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; xdst->u.rt.rt_pmtu = rt->rt_pmtu; INIT_LIST_HEAD(&xdst->u.rt.rt_uncached); |