diff options
Diffstat (limited to 'net/ipv4/tcp_ipv4.c')
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 114 |
1 files changed, 63 insertions, 51 deletions
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2632844d2c35..8cce0d5489da 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -66,6 +66,7 @@ #include <net/transp_v6.h> #include <net/ipv6.h> #include <net/inet_common.h> +#include <net/inet_ecn.h> #include <net/timewait_sock.h> #include <net/xfrm.h> #include <net/secure_seq.h> @@ -92,7 +93,6 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, #endif struct inet_hashinfo tcp_hashinfo; -EXPORT_SYMBOL(tcp_hashinfo); static DEFINE_PER_CPU(struct sock_bh_locked, ipv4_tcp_sk) = { .bh_lock = INIT_LOCAL_LOCK(bh_lock), @@ -199,7 +199,7 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) return 0; } -EXPORT_SYMBOL_GPL(tcp_twsk_unique); +EXPORT_IPV6_MOD_GPL(tcp_twsk_unique); static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) @@ -359,7 +359,7 @@ failure: inet->inet_dport = 0; return err; } -EXPORT_SYMBOL(tcp_v4_connect); +EXPORT_IPV6_MOD(tcp_v4_connect); /* * This routine reacts to ICMP_FRAG_NEEDED mtu indications as defined in RFC1191. @@ -400,7 +400,7 @@ void tcp_v4_mtu_reduced(struct sock *sk) tcp_simple_retransmit(sk); } /* else let the usual retransmit timer handle it */ } -EXPORT_SYMBOL(tcp_v4_mtu_reduced); +EXPORT_IPV6_MOD(tcp_v4_mtu_reduced); static void do_redirect(struct sk_buff *skb, struct sock *sk) { @@ -434,7 +434,7 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort) } reqsk_put(req); } -EXPORT_SYMBOL(tcp_req_err); +EXPORT_IPV6_MOD(tcp_req_err); /* TCP-LD (RFC 6069) logic */ void tcp_ld_RTO_revert(struct sock *sk, u32 seq) @@ -458,15 +458,14 @@ void tcp_ld_RTO_revert(struct sock *sk, u32 seq) icsk->icsk_backoff--; icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : TCP_TIMEOUT_INIT; - icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); + icsk->icsk_rto = inet_csk_rto_backoff(icsk, tcp_rto_max(sk)); tcp_mstamp_refresh(tp); delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb)); remaining = icsk->icsk_rto - usecs_to_jiffies(delta_us); if (remaining > 0) { - inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, - remaining, TCP_RTO_MAX); + tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, remaining, false); } else { /* RTO revert clocked out retransmission. * Will retransmit now. @@ -474,7 +473,7 @@ void tcp_ld_RTO_revert(struct sock *sk, u32 seq) tcp_retransmit_timer(sk); } } -EXPORT_SYMBOL(tcp_ld_RTO_revert); +EXPORT_IPV6_MOD(tcp_ld_RTO_revert); /* * This routine is called by the ICMP module when it gets some @@ -496,14 +495,14 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (const struct iphdr *)skb->data; struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2)); - struct tcp_sock *tp; + struct net *net = dev_net_rcu(skb->dev); const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; - struct sock *sk; struct request_sock *fastopen; + struct tcp_sock *tp; u32 seq, snd_una; + struct sock *sk; int err; - struct net *net = dev_net(skb->dev); sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, iph->daddr, th->dest, iph->saddr, @@ -676,7 +675,7 @@ void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb) __tcp_v4_send_check(skb, inet->inet_saddr, inet->inet_daddr); } -EXPORT_SYMBOL(tcp_v4_send_check); +EXPORT_IPV6_MOD(tcp_v4_send_check); #define REPLY_OPTIONS_LEN (MAX_TCP_OPTION_SPACE / sizeof(__be32)) @@ -788,7 +787,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, arg.iov[0].iov_base = (unsigned char *)&rep; arg.iov[0].iov_len = sizeof(rep.th); - net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev); + net = sk ? sock_net(sk) : dev_net_rcu(skb_dst(skb)->dev); /* Invalid TCP option size or twice included auth */ if (tcp_parse_auth_options(tcp_hdr(skb), &md5_hash_location, &aoh)) @@ -889,7 +888,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb, BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != offsetof(struct inet_timewait_sock, tw_bound_dev_if)); - arg.tos = ip_hdr(skb)->tos; + /* ECN bits of TW reset are cleared */ + arg.tos = ip_hdr(skb)->tos & ~INET_ECN_MASK; arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); local_lock_nested_bh(&ipv4_tcp_sk.bh_lock); @@ -1035,11 +1035,21 @@ static void tcp_v4_send_ack(const struct sock *sk, local_bh_enable(); } -static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) +static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb, + enum tcp_tw_status tw_status) { struct inet_timewait_sock *tw = inet_twsk(sk); struct tcp_timewait_sock *tcptw = tcp_twsk(sk); struct tcp_key key = {}; + u8 tos = tw->tw_tos; + + /* Cleaning only ECN bits of TW ACKs of oow data or is paws_reject, + * while not cleaning ECN bits of other TW ACKs to avoid these ACKs + * being placed in a different service queues (Classic rather than L4S) + */ + if (tw_status == TCP_TW_ACK_OOW) + tos &= ~INET_ECN_MASK; + #ifdef CONFIG_TCP_AO struct tcp_ao_info *ao_info; @@ -1083,7 +1093,7 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb) READ_ONCE(tcptw->tw_ts_recent), tw->tw_bound_dev_if, &key, tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0, - tw->tw_tos, + tos, tw->tw_txhash); inet_twsk_put(tw); @@ -1153,14 +1163,15 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, key.type = TCP_KEY_MD5; } + /* Cleaning ECN bits of TW ACKs of oow data or is paws_reject */ tcp_v4_send_ack(sk, skb, seq, tcp_rsk(req)->rcv_nxt, tcp_synack_window(req) >> inet_rsk(req)->rcv_wscale, tcp_rsk_tsval(tcp_rsk(req)), - READ_ONCE(req->ts_recent), + req->ts_recent, 0, &key, inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0, - ip_hdr(skb)->tos, + ip_hdr(skb)->tos & ~INET_ECN_MASK, READ_ONCE(tcp_rsk(req)->txhash)); if (tcp_key_is_ao(&key)) kfree(key.traffic_key); @@ -1231,7 +1242,7 @@ static void tcp_v4_reqsk_destructor(struct request_sock *req) */ DEFINE_STATIC_KEY_DEFERRED_FALSE(tcp_md5_needed, HZ); -EXPORT_SYMBOL(tcp_md5_needed); +EXPORT_IPV6_MOD(tcp_md5_needed); static bool better_md5_match(struct tcp_md5sig_key *old, struct tcp_md5sig_key *new) { @@ -1290,7 +1301,7 @@ struct tcp_md5sig_key *__tcp_md5_do_lookup(const struct sock *sk, int l3index, } return best_match; } -EXPORT_SYMBOL(__tcp_md5_do_lookup); +EXPORT_IPV6_MOD(__tcp_md5_do_lookup); static struct tcp_md5sig_key *tcp_md5_do_lookup_exact(const struct sock *sk, const union tcp_md5_addr *addr, @@ -1337,7 +1348,7 @@ struct tcp_md5sig_key *tcp_v4_md5_lookup(const struct sock *sk, addr = (const union tcp_md5_addr *)&addr_sk->sk_daddr; return tcp_md5_do_lookup(sk, l3index, addr, AF_INET); } -EXPORT_SYMBOL(tcp_v4_md5_lookup); +EXPORT_IPV6_MOD(tcp_v4_md5_lookup); static int tcp_md5sig_info_add(struct sock *sk, gfp_t gfp) { @@ -1433,7 +1444,7 @@ int tcp_md5_do_add(struct sock *sk, const union tcp_md5_addr *addr, return __tcp_md5_do_add(sk, addr, family, prefixlen, l3index, flags, newkey, newkeylen, GFP_KERNEL); } -EXPORT_SYMBOL(tcp_md5_do_add); +EXPORT_IPV6_MOD(tcp_md5_do_add); int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, @@ -1465,7 +1476,7 @@ int tcp_md5_key_copy(struct sock *sk, const union tcp_md5_addr *addr, key->flags, key->key, key->keylen, sk_gfp_mask(sk, GFP_ATOMIC)); } -EXPORT_SYMBOL(tcp_md5_key_copy); +EXPORT_IPV6_MOD(tcp_md5_key_copy); int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, u8 prefixlen, int l3index, u8 flags) @@ -1480,7 +1491,7 @@ int tcp_md5_do_del(struct sock *sk, const union tcp_md5_addr *addr, int family, kfree_rcu(key, rcu); return 0; } -EXPORT_SYMBOL(tcp_md5_do_del); +EXPORT_IPV6_MOD(tcp_md5_do_del); void tcp_clear_md5_list(struct sock *sk) { @@ -1659,7 +1670,7 @@ clear_hash_nostart: memset(md5_hash, 0, 16); return 1; } -EXPORT_SYMBOL(tcp_v4_md5_hash_skb); +EXPORT_IPV6_MOD(tcp_v4_md5_hash_skb); #endif @@ -1732,7 +1743,7 @@ drop: tcp_listendrop(sk); return 0; } -EXPORT_SYMBOL(tcp_v4_conn_request); +EXPORT_IPV6_MOD(tcp_v4_conn_request); /* @@ -1770,10 +1781,6 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, newtp = tcp_sk(newsk); newinet = inet_sk(newsk); ireq = inet_rsk(req); - sk_daddr_set(newsk, ireq->ir_rmt_addr); - sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); - newsk->sk_bound_dev_if = ireq->ir_iif; - newinet->inet_saddr = ireq->ir_loc_addr; inet_opt = rcu_dereference(ireq->ireq_opt); RCU_INIT_POINTER(newinet->inet_opt, inet_opt); newinet->mc_index = inet_iif(skb); @@ -1856,7 +1863,7 @@ put_and_exit: tcp_done(newsk); goto exit; } -EXPORT_SYMBOL(tcp_v4_syn_recv_sock); +EXPORT_IPV6_MOD(tcp_v4_syn_recv_sock); static struct sock *tcp_v4_cookie_check(struct sock *sk, struct sk_buff *skb) { @@ -1967,7 +1974,7 @@ EXPORT_SYMBOL(tcp_v4_do_rcv); int tcp_v4_early_demux(struct sk_buff *skb) { - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); const struct iphdr *iph; const struct tcphdr *th; struct sock *sk; @@ -2057,7 +2064,8 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb, !((TCP_SKB_CB(tail)->tcp_flags & TCP_SKB_CB(skb)->tcp_flags) & TCPHDR_ACK) || ((TCP_SKB_CB(tail)->tcp_flags ^ - TCP_SKB_CB(skb)->tcp_flags) & (TCPHDR_ECE | TCPHDR_CWR)) || + TCP_SKB_CB(skb)->tcp_flags) & + (TCPHDR_ECE | TCPHDR_CWR | TCPHDR_AE)) || !tcp_skb_can_collapse_rx(tail, skb) || thtail->doff != th->doff || memcmp(thtail + 1, th + 1, hdrlen - sizeof(*th))) @@ -2135,7 +2143,7 @@ no_coalesce: } return false; } -EXPORT_SYMBOL(tcp_add_backlog); +EXPORT_IPV6_MOD(tcp_add_backlog); int tcp_filter(struct sock *sk, struct sk_buff *skb) { @@ -2143,7 +2151,7 @@ int tcp_filter(struct sock *sk, struct sk_buff *skb) return sk_filter_trim_cap(sk, skb, th->doff * 4); } -EXPORT_SYMBOL(tcp_filter); +EXPORT_IPV6_MOD(tcp_filter); static void tcp_v4_restore_cb(struct sk_buff *skb) { @@ -2165,7 +2173,7 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin + skb->len - th->doff * 4); TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq); - TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th); + TCP_SKB_CB(skb)->tcp_flags = tcp_flags_ntohs(th); TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); TCP_SKB_CB(skb)->sacked = 0; TCP_SKB_CB(skb)->has_rxtstamp = @@ -2178,8 +2186,9 @@ static void tcp_v4_fill_cb(struct sk_buff *skb, const struct iphdr *iph, int tcp_v4_rcv(struct sk_buff *skb) { - struct net *net = dev_net(skb->dev); + struct net *net = dev_net_rcu(skb->dev); enum skb_drop_reason drop_reason; + enum tcp_tw_status tw_status; int sdif = inet_sdif(skb); int dif = inet_iif(skb); const struct iphdr *iph; @@ -2271,7 +2280,8 @@ lookup: th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); tcp_v4_fill_cb(skb, iph, th); - nsk = tcp_check_req(sk, skb, req, false, &req_stolen); + nsk = tcp_check_req(sk, skb, req, false, &req_stolen, + &drop_reason); } else { drop_reason = SKB_DROP_REASON_SOCKET_FILTER; } @@ -2406,7 +2416,9 @@ do_time_wait: inet_twsk_put(inet_twsk(sk)); goto csum_error; } - switch (tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn)) { + + tw_status = tcp_timewait_state_process(inet_twsk(sk), skb, th, &isn); + switch (tw_status) { case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, @@ -2427,7 +2439,8 @@ do_time_wait: /* to ACK */ fallthrough; case TCP_TW_ACK: - tcp_v4_timewait_ack(sk, skb); + case TCP_TW_ACK_OOW: + tcp_v4_timewait_ack(sk, skb, tw_status); break; case TCP_TW_RST: tcp_v4_send_reset(sk, skb, SK_RST_REASON_TCP_TIMEWAIT_SOCKET); @@ -2452,7 +2465,7 @@ void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) sk->sk_rx_dst_ifindex = skb->skb_iif; } } -EXPORT_SYMBOL(inet_sk_rx_dst_set); +EXPORT_IPV6_MOD(inet_sk_rx_dst_set); const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, @@ -2464,11 +2477,9 @@ const struct inet_connection_sock_af_ops ipv4_specific = { .net_header_len = sizeof(struct iphdr), .setsockopt = ip_setsockopt, .getsockopt = ip_getsockopt, - .addr2sockaddr = inet_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in), .mtu_reduced = tcp_v4_mtu_reduced, }; -EXPORT_SYMBOL(ipv4_specific); +EXPORT_IPV6_MOD(ipv4_specific); #if defined(CONFIG_TCP_MD5SIG) || defined(CONFIG_TCP_AO) static const struct tcp_sock_af_ops tcp_sock_ipv4_specific = { @@ -2578,7 +2589,7 @@ void tcp_v4_destroy_sock(struct sock *sk) sk_sockets_allocated_dec(sk); } -EXPORT_SYMBOL(tcp_v4_destroy_sock); +EXPORT_IPV6_MOD(tcp_v4_destroy_sock); #ifdef CONFIG_PROC_FS /* Proc filesystem TCP sock list dumping. */ @@ -2814,7 +2825,7 @@ out: st->last_pos = *pos; return rc; } -EXPORT_SYMBOL(tcp_seq_start); +EXPORT_IPV6_MOD(tcp_seq_start); void *tcp_seq_next(struct seq_file *seq, void *v, loff_t *pos) { @@ -2845,7 +2856,7 @@ out: st->last_pos = *pos; return rc; } -EXPORT_SYMBOL(tcp_seq_next); +EXPORT_IPV6_MOD(tcp_seq_next); void tcp_seq_stop(struct seq_file *seq, void *v) { @@ -2863,7 +2874,7 @@ void tcp_seq_stop(struct seq_file *seq, void *v) break; } } -EXPORT_SYMBOL(tcp_seq_stop); +EXPORT_IPV6_MOD(tcp_seq_stop); static void get_openreq4(const struct request_sock *req, struct seq_file *f, int i) @@ -2912,10 +2923,10 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i) icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; - timer_expires = icsk->icsk_timeout; + timer_expires = icsk_timeout(icsk); } else if (icsk_pending == ICSK_TIME_PROBE0) { timer_active = 4; - timer_expires = icsk->icsk_timeout; + timer_expires = icsk_timeout(icsk); } else if (timer_pending(&sk->sk_timer)) { timer_active = 2; timer_expires = sk->sk_timer.expires; @@ -3533,6 +3544,7 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_pingpong_thresh = 1; net->ipv4.sysctl_tcp_rto_min_us = jiffies_to_usecs(TCP_RTO_MIN); + net->ipv4.sysctl_tcp_rto_max_ms = TCP_RTO_MAX_SEC * MSEC_PER_SEC; return 0; } |