summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c199
1 files changed, 130 insertions, 69 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0d704bda6c41..6edc441b3702 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -300,10 +300,10 @@ DEFINE_PER_CPU(u32, tcp_tw_isn);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_tw_isn);
long sysctl_tcp_mem[3] __read_mostly;
-EXPORT_SYMBOL(sysctl_tcp_mem);
+EXPORT_IPV6_MOD(sysctl_tcp_mem);
atomic_long_t tcp_memory_allocated ____cacheline_aligned_in_smp; /* Current allocated memory. */
-EXPORT_SYMBOL(tcp_memory_allocated);
+EXPORT_IPV6_MOD(tcp_memory_allocated);
DEFINE_PER_CPU(int, tcp_memory_per_cpu_fw_alloc);
EXPORT_PER_CPU_SYMBOL_GPL(tcp_memory_per_cpu_fw_alloc);
@@ -316,7 +316,7 @@ EXPORT_SYMBOL(tcp_have_smc);
* Current number of TCP sockets.
*/
struct percpu_counter tcp_sockets_allocated ____cacheline_aligned_in_smp;
-EXPORT_SYMBOL(tcp_sockets_allocated);
+EXPORT_IPV6_MOD(tcp_sockets_allocated);
/*
* TCP splice context
@@ -349,7 +349,7 @@ void tcp_enter_memory_pressure(struct sock *sk)
if (!cmpxchg(&tcp_memory_pressure, 0, val))
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURES);
}
-EXPORT_SYMBOL_GPL(tcp_enter_memory_pressure);
+EXPORT_IPV6_MOD_GPL(tcp_enter_memory_pressure);
void tcp_leave_memory_pressure(struct sock *sk)
{
@@ -362,7 +362,7 @@ void tcp_leave_memory_pressure(struct sock *sk)
NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPMEMORYPRESSURESCHRONO,
jiffies_to_msecs(jiffies - val));
}
-EXPORT_SYMBOL_GPL(tcp_leave_memory_pressure);
+EXPORT_IPV6_MOD_GPL(tcp_leave_memory_pressure);
/* Convert seconds to retransmits based on initial and max timeout */
static u8 secs_to_retrans(int seconds, int timeout, int rto_max)
@@ -423,7 +423,7 @@ void tcp_init_sock(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- int rto_min_us;
+ int rto_min_us, rto_max_ms;
tp->out_of_order_queue = RB_ROOT;
sk->tcp_rtx_queue = RB_ROOT;
@@ -432,6 +432,10 @@ void tcp_init_sock(struct sock *sk)
INIT_LIST_HEAD(&tp->tsorted_sent_queue);
icsk->icsk_rto = TCP_TIMEOUT_INIT;
+
+ rto_max_ms = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_max_ms);
+ icsk->icsk_rto_max = msecs_to_jiffies(rto_max_ms);
+
rto_min_us = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rto_min_us);
icsk->icsk_rto_min = usecs_to_jiffies(rto_min_us);
icsk->icsk_delack_max = TCP_DELACK_MAX;
@@ -475,7 +479,7 @@ void tcp_init_sock(struct sock *sk)
sk_sockets_allocated_inc(sk);
xa_init_flags(&sk->sk_user_frags, XA_FLAGS_ALLOC1);
}
-EXPORT_SYMBOL(tcp_init_sock);
+EXPORT_IPV6_MOD(tcp_init_sock);
static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
{
@@ -488,10 +492,14 @@ static void tcp_tx_timestamp(struct sock *sk, struct sockcm_cookie *sockc)
sock_tx_timestamp(sk, sockc, &shinfo->tx_flags);
if (tsflags & SOF_TIMESTAMPING_TX_ACK)
- tcb->txstamp_ack = 1;
+ tcb->txstamp_ack |= TSTAMP_ACK_SK;
if (tsflags & SOF_TIMESTAMPING_TX_RECORD_MASK)
shinfo->tskey = TCP_SKB_CB(skb)->seq + skb->len - 1;
}
+
+ if (cgroup_bpf_enabled(CGROUP_SOCK_OPS) &&
+ SK_BPF_CB_FLAG_TEST(sk, SK_BPF_CB_TX_TIMESTAMPING) && skb)
+ bpf_skops_tx_timestamping(sk, skb, BPF_SOCK_OPS_TSTAMP_SENDMSG_CB);
}
static bool tcp_stream_is_readable(struct sock *sk, int target)
@@ -660,7 +668,7 @@ int tcp_ioctl(struct sock *sk, int cmd, int *karg)
*karg = answ;
return 0;
}
-EXPORT_SYMBOL(tcp_ioctl);
+EXPORT_IPV6_MOD(tcp_ioctl);
void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb)
{
@@ -876,7 +884,7 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos,
return ret;
}
-EXPORT_SYMBOL(tcp_splice_read);
+EXPORT_IPV6_MOD(tcp_splice_read);
struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, gfp_t gfp,
bool force_schedule)
@@ -1123,7 +1131,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size)
/* 'common' sending to sendq */
}
- sockcm_init(&sockc, sk);
+ sockc = (struct sockcm_cookie) { .tsflags = READ_ONCE(sk->sk_tsflags)};
if (msg->msg_controllen) {
err = sock_cmsg_send(sk, msg, &sockc);
if (unlikely(err)) {
@@ -1376,7 +1384,7 @@ void tcp_splice_eof(struct socket *sock)
tcp_push(sk, 0, mss_now, tp->nonagle, size_goal);
release_sock(sk);
}
-EXPORT_SYMBOL_GPL(tcp_splice_eof);
+EXPORT_IPV6_MOD_GPL(tcp_splice_eof);
/*
* Handle reading urgent data. BSD has very simple semantics for
@@ -1565,12 +1573,13 @@ EXPORT_SYMBOL(tcp_recv_skb);
* or for 'peeking' the socket using this routine
* (although both would be easy to implement).
*/
-int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
- sk_read_actor_t recv_actor)
+static int __tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor, bool noack,
+ u32 *copied_seq)
{
struct sk_buff *skb;
struct tcp_sock *tp = tcp_sk(sk);
- u32 seq = tp->copied_seq;
+ u32 seq = *copied_seq;
u32 offset;
int copied = 0;
@@ -1624,9 +1633,12 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
tcp_eat_recv_skb(sk, skb);
if (!desc->count)
break;
- WRITE_ONCE(tp->copied_seq, seq);
+ WRITE_ONCE(*copied_seq, seq);
}
- WRITE_ONCE(tp->copied_seq, seq);
+ WRITE_ONCE(*copied_seq, seq);
+
+ if (noack)
+ goto out;
tcp_rcv_space_adjust(sk);
@@ -1635,10 +1647,25 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
tcp_recv_skb(sk, seq, &offset);
tcp_cleanup_rbuf(sk, copied);
}
+out:
return copied;
}
+
+int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor)
+{
+ return __tcp_read_sock(sk, desc, recv_actor, false,
+ &tcp_sk(sk)->copied_seq);
+}
EXPORT_SYMBOL(tcp_read_sock);
+int tcp_read_sock_noack(struct sock *sk, read_descriptor_t *desc,
+ sk_read_actor_t recv_actor, bool noack,
+ u32 *copied_seq)
+{
+ return __tcp_read_sock(sk, desc, recv_actor, noack, copied_seq);
+}
+
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
struct sk_buff *skb;
@@ -1667,7 +1694,7 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
}
return copied;
}
-EXPORT_SYMBOL(tcp_read_skb);
+EXPORT_IPV6_MOD(tcp_read_skb);
void tcp_read_done(struct sock *sk, size_t len)
{
@@ -1712,7 +1739,7 @@ int tcp_peek_len(struct socket *sock)
{
return tcp_inq(sock->sk);
}
-EXPORT_SYMBOL(tcp_peek_len);
+EXPORT_IPV6_MOD(tcp_peek_len);
/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */
int tcp_set_rcvlowat(struct sock *sk, int val)
@@ -1739,7 +1766,7 @@ int tcp_set_rcvlowat(struct sock *sk, int val)
}
return 0;
}
-EXPORT_SYMBOL(tcp_set_rcvlowat);
+EXPORT_IPV6_MOD(tcp_set_rcvlowat);
void tcp_update_recv_tstamps(struct sk_buff *skb,
struct scm_timestamping_internal *tss)
@@ -1772,7 +1799,7 @@ int tcp_mmap(struct file *file, struct socket *sock,
vma->vm_ops = &tcp_vm_ops;
return 0;
}
-EXPORT_SYMBOL(tcp_mmap);
+EXPORT_IPV6_MOD(tcp_mmap);
static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb,
u32 *offset_frag)
@@ -2438,14 +2465,12 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb,
*/
memset(&dmabuf_cmsg, 0, sizeof(dmabuf_cmsg));
dmabuf_cmsg.frag_size = copy;
- err = put_cmsg(msg, SOL_SOCKET, SO_DEVMEM_LINEAR,
- sizeof(dmabuf_cmsg), &dmabuf_cmsg);
- if (err || msg->msg_flags & MSG_CTRUNC) {
- msg->msg_flags &= ~MSG_CTRUNC;
- if (!err)
- err = -ETOOSMALL;
+ err = put_cmsg_notrunc(msg, SOL_SOCKET,
+ SO_DEVMEM_LINEAR,
+ sizeof(dmabuf_cmsg),
+ &dmabuf_cmsg);
+ if (err)
goto out;
- }
sent += copy;
@@ -2476,6 +2501,11 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb,
}
niov = skb_frag_net_iov(frag);
+ if (!net_is_devmem_iov(niov)) {
+ err = -ENODEV;
+ goto out;
+ }
+
end = start + skb_frag_size(frag);
copy = end - offset;
@@ -2494,21 +2524,17 @@ static int tcp_recvmsg_dmabuf(struct sock *sk, const struct sk_buff *skb,
/* Will perform the exchange later */
dmabuf_cmsg.frag_token = tcp_xa_pool.tokens[tcp_xa_pool.idx];
- dmabuf_cmsg.dmabuf_id = net_iov_binding_id(niov);
+ dmabuf_cmsg.dmabuf_id = net_devmem_iov_binding_id(niov);
offset += copy;
remaining_len -= copy;
- err = put_cmsg(msg, SOL_SOCKET,
- SO_DEVMEM_DMABUF,
- sizeof(dmabuf_cmsg),
- &dmabuf_cmsg);
- if (err || msg->msg_flags & MSG_CTRUNC) {
- msg->msg_flags &= ~MSG_CTRUNC;
- if (!err)
- err = -ETOOSMALL;
+ err = put_cmsg_notrunc(msg, SOL_SOCKET,
+ SO_DEVMEM_DMABUF,
+ sizeof(dmabuf_cmsg),
+ &dmabuf_cmsg);
+ if (err)
goto out;
- }
atomic_long_inc(&niov->pp_ref_count);
tcp_xa_pool.netmems[tcp_xa_pool.idx++] = skb_frag_netmem(frag);
@@ -2864,7 +2890,7 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int flags,
}
return ret;
}
-EXPORT_SYMBOL(tcp_recvmsg);
+EXPORT_IPV6_MOD(tcp_recvmsg);
void tcp_set_state(struct sock *sk, int state)
{
@@ -2994,7 +3020,7 @@ void tcp_shutdown(struct sock *sk, int how)
tcp_send_fin(sk);
}
}
-EXPORT_SYMBOL(tcp_shutdown);
+EXPORT_IPV6_MOD(tcp_shutdown);
int tcp_orphan_count_sum(void)
{
@@ -3174,7 +3200,7 @@ adjudge_to_death:
const int tmo = tcp_fin_time(sk);
if (tmo > TCP_TIMEWAIT_LEN) {
- inet_csk_reset_keepalive_timer(sk,
+ tcp_reset_keepalive_timer(sk,
tmo - TCP_TIMEWAIT_LEN);
} else {
tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
@@ -3326,8 +3352,8 @@ int tcp_disconnect(struct sock *sk, int flags)
icsk->icsk_probes_out = 0;
icsk->icsk_probes_tstamp = 0;
icsk->icsk_rto = TCP_TIMEOUT_INIT;
- icsk->icsk_rto_min = TCP_RTO_MIN;
- icsk->icsk_delack_max = TCP_DELACK_MAX;
+ WRITE_ONCE(icsk->icsk_rto_min, TCP_RTO_MIN);
+ WRITE_ONCE(icsk->icsk_delack_max, TCP_DELACK_MAX);
tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
tcp_snd_cwnd_set(tp, TCP_INIT_CWND);
tp->snd_cwnd_cnt = 0;
@@ -3493,7 +3519,7 @@ static int tcp_repair_options_est(struct sock *sk, sockptr_t optbuf,
}
DEFINE_STATIC_KEY_FALSE(tcp_tx_delay_enabled);
-EXPORT_SYMBOL(tcp_tx_delay_enabled);
+EXPORT_IPV6_MOD(tcp_tx_delay_enabled);
static void tcp_enable_tx_delay(void)
{
@@ -3627,7 +3653,7 @@ int tcp_sock_set_keepidle_locked(struct sock *sk, int val)
elapsed = tp->keepalive_time - elapsed;
else
elapsed = 0;
- inet_csk_reset_keepalive_timer(sk, elapsed);
+ tcp_reset_keepalive_timer(sk, elapsed);
}
return 0;
@@ -3667,32 +3693,32 @@ EXPORT_SYMBOL(tcp_sock_set_keepcnt);
int tcp_set_window_clamp(struct sock *sk, int val)
{
+ u32 old_window_clamp, new_window_clamp, new_rcv_ssthresh;
struct tcp_sock *tp = tcp_sk(sk);
if (!val) {
if (sk->sk_state != TCP_CLOSE)
return -EINVAL;
WRITE_ONCE(tp->window_clamp, 0);
- } else {
- u32 new_rcv_ssthresh, old_window_clamp = tp->window_clamp;
- u32 new_window_clamp = val < SOCK_MIN_RCVBUF / 2 ?
- SOCK_MIN_RCVBUF / 2 : val;
+ return 0;
+ }
- if (new_window_clamp == old_window_clamp)
- return 0;
+ old_window_clamp = tp->window_clamp;
+ new_window_clamp = max_t(int, SOCK_MIN_RCVBUF / 2, val);
- WRITE_ONCE(tp->window_clamp, new_window_clamp);
- if (new_window_clamp < old_window_clamp) {
- /* need to apply the reserved mem provisioning only
- * when shrinking the window clamp
- */
- __tcp_adjust_rcv_ssthresh(sk, tp->window_clamp);
+ if (new_window_clamp == old_window_clamp)
+ return 0;
- } else {
- new_rcv_ssthresh = min(tp->rcv_wnd, tp->window_clamp);
- tp->rcv_ssthresh = max(new_rcv_ssthresh,
- tp->rcv_ssthresh);
- }
+ WRITE_ONCE(tp->window_clamp, new_window_clamp);
+
+ /* Need to apply the reserved mem provisioning only
+ * when shrinking the window clamp.
+ */
+ if (new_window_clamp < old_window_clamp) {
+ __tcp_adjust_rcv_ssthresh(sk, new_window_clamp);
+ } else {
+ new_rcv_ssthresh = min(tp->rcv_wnd, new_window_clamp);
+ tp->rcv_ssthresh = max(new_rcv_ssthresh, tp->rcv_ssthresh);
}
return 0;
}
@@ -3802,6 +3828,27 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
TCP_RTO_MAX / HZ));
return 0;
+ case TCP_RTO_MAX_MS:
+ if (val < MSEC_PER_SEC || val > TCP_RTO_MAX_SEC * MSEC_PER_SEC)
+ return -EINVAL;
+ WRITE_ONCE(inet_csk(sk)->icsk_rto_max, msecs_to_jiffies(val));
+ return 0;
+ case TCP_RTO_MIN_US: {
+ int rto_min = usecs_to_jiffies(val);
+
+ if (rto_min > TCP_RTO_MIN || rto_min < TCP_TIMEOUT_MIN)
+ return -EINVAL;
+ WRITE_ONCE(inet_csk(sk)->icsk_rto_min, rto_min);
+ return 0;
+ }
+ case TCP_DELACK_MAX_US: {
+ int delack_max = usecs_to_jiffies(val);
+
+ if (delack_max > TCP_DELACK_MAX || delack_max < TCP_TIMEOUT_MIN)
+ return -EINVAL;
+ WRITE_ONCE(inet_csk(sk)->icsk_delack_max, delack_max);
+ return 0;
+ }
}
sockopt_lock_sock(sk);
@@ -4031,7 +4078,7 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
optval, optlen);
return do_tcp_setsockopt(sk, level, optname, optval, optlen);
}
-EXPORT_SYMBOL(tcp_setsockopt);
+EXPORT_IPV6_MOD(tcp_setsockopt);
static void tcp_get_info_chrono_stats(const struct tcp_sock *tp,
struct tcp_info *info)
@@ -4107,7 +4154,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
info->tcpi_rcv_wscale = tp->rx_opt.rcv_wscale;
}
- if (tp->ecn_flags & TCP_ECN_OK)
+ if (tcp_ecn_mode_any(tp))
info->tcpi_options |= TCPI_OPT_ECN;
if (tp->ecn_flags & TCP_ECN_SEEN)
info->tcpi_options |= TCPI_OPT_ECN_SEEN;
@@ -4638,6 +4685,15 @@ zerocopy_rcv_out:
case TCP_IS_MPTCP:
val = 0;
break;
+ case TCP_RTO_MAX_MS:
+ val = jiffies_to_msecs(tcp_rto_max(sk));
+ break;
+ case TCP_RTO_MIN_US:
+ val = jiffies_to_usecs(READ_ONCE(inet_csk(sk)->icsk_rto_min));
+ break;
+ case TCP_DELACK_MAX_US:
+ val = jiffies_to_usecs(READ_ONCE(inet_csk(sk)->icsk_delack_max));
+ break;
default:
return -ENOPROTOOPT;
}
@@ -4659,7 +4715,7 @@ bool tcp_bpf_bypass_getsockopt(int level, int optname)
return false;
}
-EXPORT_SYMBOL(tcp_bpf_bypass_getsockopt);
+EXPORT_IPV6_MOD(tcp_bpf_bypass_getsockopt);
int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
int __user *optlen)
@@ -4673,11 +4729,11 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
return do_tcp_getsockopt(sk, level, optname, USER_SOCKPTR(optval),
USER_SOCKPTR(optlen));
}
-EXPORT_SYMBOL(tcp_getsockopt);
+EXPORT_IPV6_MOD(tcp_getsockopt);
#ifdef CONFIG_TCP_MD5SIG
int tcp_md5_sigpool_id = -1;
-EXPORT_SYMBOL_GPL(tcp_md5_sigpool_id);
+EXPORT_IPV6_MOD_GPL(tcp_md5_sigpool_id);
int tcp_md5_alloc_sigpool(void)
{
@@ -4723,7 +4779,7 @@ int tcp_md5_hash_key(struct tcp_sigpool *hp,
*/
return data_race(crypto_ahash_update(hp->req));
}
-EXPORT_SYMBOL(tcp_md5_hash_key);
+EXPORT_IPV6_MOD(tcp_md5_hash_key);
/* Called with rcu_read_lock() */
static enum skb_drop_reason
@@ -4843,7 +4899,7 @@ tcp_inbound_hash(struct sock *sk, const struct request_sock *req,
return tcp_inbound_md5_hash(sk, skb, saddr, daddr, family,
l3index, md5_location);
}
-EXPORT_SYMBOL_GPL(tcp_inbound_hash);
+EXPORT_IPV6_MOD_GPL(tcp_inbound_hash);
void tcp_done(struct sock *sk)
{
@@ -4992,7 +5048,12 @@ static void __init tcp_struct_check(void)
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, rtt_min);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, out_of_order_queue);
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, snd_ssthresh);
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+ CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_read_rx, tcp_clean_acked);
+ CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 77);
+#else
CACHELINE_ASSERT_GROUP_SIZE(struct tcp_sock, tcp_sock_read_rx, 69);
+#endif
/* TX read-write hotpath cache lines */
CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_tx, segs_out);