diff options
author | Eric Dumazet <edumazet@google.com> | 2023-10-20 12:57:47 +0000 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2023-10-23 09:35:01 +0100 |
commit | 614e8316aa4cafba3e204cb8ee48bd12b92f3d93 (patch) | |
tree | bc6f18ff35ef1e418218dc944a48aa6d143ae45a /net/ipv4/tcp_output.c | |
parent | af7721448a609d1912b57c825194ef6e17fc71a4 (diff) | |
download | lwn-614e8316aa4cafba3e204cb8ee48bd12b92f3d93.tar.gz lwn-614e8316aa4cafba3e204cb8ee48bd12b92f3d93.zip |
tcp: add support for usec resolution in TCP TS values
Back in 2015, Van Jacobson suggested to use usec resolution in TCP TS values.
This has been implemented in our private kernels.
Goals were :
1) better observability of delays in networking stacks.
2) better disambiguation of events based on TSval/ecr values.
3) building block for congestion control modules needing usec resolution.
Back then we implemented a schem based on private SYN options
to negotiate the feature.
For upstream submission, we chose to use a route attribute,
because this feature is probably going to be used in private
networks [1] [2].
ip route add 10/8 ... features tcp_usec_ts
Note that RFC 7323 recommends a
"timestamp clock frequency in the range 1 ms to 1 sec per tick.",
but also mentions
"the maximum acceptable clock frequency is one tick every 59 ns."
[1] Unfortunately RFC 7323 5.5 (Outdated Timestamps) suggests
to invalidate TS.Recent values after a flow was idle for more
than 24 days. This is the part making usec_ts a problem
for peers following this recommendation for long living
idle flows.
[2] Attempts to standardize usec ts went nowhere:
https://www.ietf.org/proceedings/97/slides/slides-97-tcpm-tcp-options-for-low-latency-00.pdf
https://datatracker.ietf.org/doc/draft-wang-tcpm-low-latency-opt/
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_output.c')
-rw-r--r-- | net/ipv4/tcp_output.c | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index a1fec8be9ac3..2866ccbccde0 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -799,7 +799,7 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) { opts->options |= OPTION_TS; - opts->tsval = tcp_skb_timestamp_ts(false, skb) + tp->tsoffset; + opts->tsval = tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb) + tp->tsoffset; opts->tsecr = tp->rx_opt.ts_recent; remaining -= TCPOLEN_TSTAMP_ALIGNED; } @@ -884,7 +884,8 @@ static unsigned int tcp_synack_options(const struct sock *sk, } if (likely(ireq->tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = tcp_skb_timestamp_ts(false, skb) + tcp_rsk(req)->ts_off; + opts->tsval = tcp_skb_timestamp_ts(tcp_rsk(req)->req_usec_ts, skb) + + tcp_rsk(req)->ts_off; opts->tsecr = READ_ONCE(req->ts_recent); remaining -= TCPOLEN_TSTAMP_ALIGNED; } @@ -943,7 +944,8 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb if (likely(tp->rx_opt.tstamp_ok)) { opts->options |= OPTION_TS; - opts->tsval = skb ? tcp_skb_timestamp_ts(false, skb) + tp->tsoffset : 0; + opts->tsval = skb ? tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb) + + tp->tsoffset : 0; opts->tsecr = tp->rx_opt.ts_recent; size += TCPOLEN_TSTAMP_ALIGNED; } @@ -3379,7 +3381,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) /* Save stamp of the first (attempted) retransmit. */ if (!tp->retrans_stamp) - tp->retrans_stamp = tcp_skb_timestamp_ts(false, skb); + tp->retrans_stamp = tcp_skb_timestamp_ts(tp->tcp_usec_ts, skb); if (tp->undo_retrans < 0) tp->undo_retrans = 0; @@ -3665,6 +3667,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, mss = tcp_mss_clamp(tp, dst_metric_advmss(dst)); memset(&opts, 0, sizeof(opts)); + if (tcp_rsk(req)->req_usec_ts < 0) + tcp_rsk(req)->req_usec_ts = dst_tcp_usec_ts(dst); now = tcp_clock_ns(); #ifdef CONFIG_SYN_COOKIES if (unlikely(synack_type == TCP_SYNACK_COOKIE && ireq->tstamp_ok)) |