From 8395406b3495235d73c7aa86ef8df97830e036d6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 19 Dec 2022 14:22:12 +0000 Subject: rxrpc: Fix trace string Fix a trace string to indicate that it's discarding the local endpoint for a preallocated peer, not a preallocated connection. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/call_accept.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/rxrpc') diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 3e8689fdc437..0f5a1d77b890 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -195,7 +195,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) tail = b->peer_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_peer *peer = b->peer_backlog[tail]; - rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_conn); + rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_peer); kfree(peer); tail = (tail + 1) & (size - 1); } -- cgit v1.2.3 From 223f59016fa2b6d01814dc53ace1c146857ba236 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 12 Oct 2022 22:06:52 +0100 Subject: rxrpc: Convert call->recvmsg_lock to a spinlock Convert call->recvmsg_lock to a spinlock as it's only ever write-locked. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/af_rxrpc.c | 2 +- net/rxrpc/ar-internal.h | 2 +- net/rxrpc/call_object.c | 4 ++-- net/rxrpc/recvmsg.c | 12 ++++++------ 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'net/rxrpc') diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index ebbd4a1c3f86..102f5cbff91a 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -786,7 +786,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol, INIT_LIST_HEAD(&rx->sock_calls); INIT_LIST_HEAD(&rx->to_be_accepted); INIT_LIST_HEAD(&rx->recvmsg_q); - rwlock_init(&rx->recvmsg_lock); + spin_lock_init(&rx->recvmsg_lock); rwlock_init(&rx->call_lock); memset(&rx->srx, 0, sizeof(rx->srx)); diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 433060cade03..808c08cb2ae5 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -149,7 +149,7 @@ struct rxrpc_sock { struct list_head sock_calls; /* List of calls owned by this socket */ struct list_head to_be_accepted; /* calls awaiting acceptance */ struct list_head recvmsg_q; /* Calls awaiting recvmsg's attention */ - rwlock_t recvmsg_lock; /* Lock for recvmsg_q */ + spinlock_t recvmsg_lock; /* Lock for recvmsg_q */ struct key *key; /* security for this socket */ struct key *securities; /* list of server security descriptors */ struct rb_root calls; /* User ID -> call mapping */ diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index f3c9f0201c15..0012589f2aad 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -560,7 +560,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) rxrpc_put_call_slot(call); /* Make sure we don't get any more notifications */ - write_lock(&rx->recvmsg_lock); + spin_lock(&rx->recvmsg_lock); if (!list_empty(&call->recvmsg_link)) { _debug("unlinking once-pending call %p { e=%lx f=%lx }", @@ -573,7 +573,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) call->recvmsg_link.next = NULL; call->recvmsg_link.prev = NULL; - write_unlock(&rx->recvmsg_lock); + spin_unlock(&rx->recvmsg_lock); if (put) rxrpc_put_call(call, rxrpc_call_put_unnotify); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index dd54ceee7bcc..b7545fdc0401 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -40,12 +40,12 @@ void rxrpc_notify_socket(struct rxrpc_call *call) call->notify_rx(sk, call, call->user_call_ID); spin_unlock(&call->notify_lock); } else { - write_lock(&rx->recvmsg_lock); + spin_lock(&rx->recvmsg_lock); if (list_empty(&call->recvmsg_link)) { rxrpc_get_call(call, rxrpc_call_get_notify_socket); list_add_tail(&call->recvmsg_link, &rx->recvmsg_q); } - write_unlock(&rx->recvmsg_lock); + spin_unlock(&rx->recvmsg_lock); if (!sock_flag(sk, SOCK_DEAD)) { _debug("call %ps", sk->sk_data_ready); @@ -335,14 +335,14 @@ try_again: /* Find the next call and dequeue it if we're not just peeking. If we * do dequeue it, that comes with a ref that we will need to release. */ - write_lock(&rx->recvmsg_lock); + spin_lock(&rx->recvmsg_lock); l = rx->recvmsg_q.next; call = list_entry(l, struct rxrpc_call, recvmsg_link); if (!(flags & MSG_PEEK)) list_del_init(&call->recvmsg_link); else rxrpc_get_call(call, rxrpc_call_get_recvmsg); - write_unlock(&rx->recvmsg_lock); + spin_unlock(&rx->recvmsg_lock); call_debug_id = call->debug_id; trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_dequeue, 0); @@ -431,9 +431,9 @@ error_unlock_call: error_requeue_call: if (!(flags & MSG_PEEK)) { - write_lock(&rx->recvmsg_lock); + spin_lock(&rx->recvmsg_lock); list_add(&call->recvmsg_link, &rx->recvmsg_q); - write_unlock(&rx->recvmsg_lock); + spin_unlock(&rx->recvmsg_lock); trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0); } else { rxrpc_put_call(call, rxrpc_call_put_recvmsg); -- cgit v1.2.3 From af094824f20b454ee23b7b5a860b3ba58f4e6938 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 17 Oct 2022 08:54:57 +0100 Subject: rxrpc: Allow a delay to be injected into packet reception If CONFIG_AF_RXRPC_DEBUG_RX_DELAY=y, then a delay is injected between packets and errors being received and them being made available to the processing code, thereby allowing the RTT to be artificially increased. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/Kconfig | 9 +++++++++ net/rxrpc/ar-internal.h | 6 ++++++ net/rxrpc/io_thread.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++- net/rxrpc/local_object.c | 6 ++++++ net/rxrpc/misc.c | 7 +++++++ net/rxrpc/sysctl.c | 17 ++++++++++++++++- 6 files changed, 91 insertions(+), 2 deletions(-) (limited to 'net/rxrpc') diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig index 7ae023b37a83..a20986806fea 100644 --- a/net/rxrpc/Kconfig +++ b/net/rxrpc/Kconfig @@ -36,6 +36,15 @@ config AF_RXRPC_INJECT_LOSS Say Y here to inject packet loss by discarding some received and some transmitted packets. +config AF_RXRPC_INJECT_RX_DELAY + bool "Inject delay into packet reception" + depends on SYSCTL + help + Say Y here to inject a delay into packet reception, allowing an + extended RTT time to be modelled. The delay can be configured using + /proc/sys/net/rxrpc/rxrpc_inject_rx_delay, setting a number of + milliseconds up to 0.5s (note that the granularity is actually in + jiffies). config AF_RXRPC_DEBUG bool "RxRPC dynamic debugging" diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 808c08cb2ae5..bfae4a87626f 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -285,6 +285,9 @@ struct rxrpc_local { struct completion io_thread_ready; /* Indication that the I/O thread started */ struct rxrpc_sock *service; /* Service(s) listening on this endpoint */ struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ +#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY + struct sk_buff_head rx_delay_queue; /* Delay injection queue */ +#endif struct sk_buff_head rx_queue; /* Received packets */ struct list_head conn_attend_q; /* Conns requiring immediate attention */ struct list_head call_attend_q; /* Calls requiring immediate attention */ @@ -1109,6 +1112,9 @@ extern unsigned long rxrpc_idle_ack_delay; extern unsigned int rxrpc_rx_window_size; extern unsigned int rxrpc_rx_mtu; extern unsigned int rxrpc_rx_jumbo_max; +#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY +extern unsigned long rxrpc_inject_rx_delay; +#endif /* * net_ns.c diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c index 9e9dfb2fc559..4a3a08a0e2cd 100644 --- a/net/rxrpc/io_thread.c +++ b/net/rxrpc/io_thread.c @@ -25,6 +25,7 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn, */ int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb) { + struct sk_buff_head *rx_queue; struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk); if (unlikely(!local)) { @@ -36,7 +37,16 @@ int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb) skb->mark = RXRPC_SKB_MARK_PACKET; rxrpc_new_skb(skb, rxrpc_skb_new_encap_rcv); - skb_queue_tail(&local->rx_queue, skb); + rx_queue = &local->rx_queue; +#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY + if (rxrpc_inject_rx_delay || + !skb_queue_empty(&local->rx_delay_queue)) { + skb->tstamp = ktime_add_ms(skb->tstamp, rxrpc_inject_rx_delay); + rx_queue = &local->rx_delay_queue; + } +#endif + + skb_queue_tail(rx_queue, skb); rxrpc_wake_up_io_thread(local); return 0; } @@ -407,6 +417,9 @@ int rxrpc_io_thread(void *data) struct rxrpc_local *local = data; struct rxrpc_call *call; struct sk_buff *skb; +#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY + ktime_t now; +#endif bool should_stop; complete(&local->io_thread_ready); @@ -481,6 +494,17 @@ int rxrpc_io_thread(void *data) continue; } + /* Inject a delay into packets if requested. */ +#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY + now = ktime_get_real(); + while ((skb = skb_peek(&local->rx_delay_queue))) { + if (ktime_before(now, skb->tstamp)) + break; + skb = skb_dequeue(&local->rx_delay_queue); + skb_queue_tail(&local->rx_queue, skb); + } +#endif + if (!skb_queue_empty(&local->rx_queue)) { spin_lock_irq(&local->rx_queue.lock); skb_queue_splice_tail_init(&local->rx_queue, &rx_queue); @@ -502,6 +526,28 @@ int rxrpc_io_thread(void *data) if (should_stop) break; + +#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY + skb = skb_peek(&local->rx_delay_queue); + if (skb) { + unsigned long timeout; + ktime_t tstamp = skb->tstamp; + ktime_t now = ktime_get_real(); + s64 delay_ns = ktime_to_ns(ktime_sub(tstamp, now)); + + if (delay_ns <= 0) { + __set_current_state(TASK_RUNNING); + continue; + } + + timeout = nsecs_to_jiffies(delay_ns); + timeout = max(timeout, 1UL); + schedule_timeout(timeout); + __set_current_state(TASK_RUNNING); + continue; + } +#endif + schedule(); } diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index b8eaca5d9f22..07d83a4e5841 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -110,6 +110,9 @@ static struct rxrpc_local *rxrpc_alloc_local(struct net *net, INIT_HLIST_NODE(&local->link); init_rwsem(&local->defrag_sem); init_completion(&local->io_thread_ready); +#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY + skb_queue_head_init(&local->rx_delay_queue); +#endif skb_queue_head_init(&local->rx_queue); INIT_LIST_HEAD(&local->conn_attend_q); INIT_LIST_HEAD(&local->call_attend_q); @@ -434,6 +437,9 @@ void rxrpc_destroy_local(struct rxrpc_local *local) /* At this point, there should be no more packets coming in to the * local endpoint. */ +#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY + rxrpc_purge_queue(&local->rx_delay_queue); +#endif rxrpc_purge_queue(&local->rx_queue); rxrpc_purge_client_connections(local); } diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 056c428d8bf3..825b81183046 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -53,3 +53,10 @@ unsigned int rxrpc_rx_mtu = 5692; * sender that we're willing to handle. */ unsigned int rxrpc_rx_jumbo_max = 4; + +#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY +/* + * The delay to inject into packet reception. + */ +unsigned long rxrpc_inject_rx_delay; +#endif diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index cde3224a5cd2..ecaeb4ecfb58 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -17,6 +17,9 @@ static const unsigned int n_65535 = 65535; static const unsigned int n_max_acks = 255; static const unsigned long one_jiffy = 1; static const unsigned long max_jiffies = MAX_JIFFY_OFFSET; +#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY +static const unsigned long max_500 = 500; +#endif /* * RxRPC operating parameters. @@ -63,6 +66,19 @@ static struct ctl_table rxrpc_sysctl_table[] = { .extra2 = (void *)&max_jiffies, }, + /* Values used in milliseconds */ +#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY + { + .procname = "inject_rx_delay", + .data = &rxrpc_inject_rx_delay, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_doulongvec_minmax, + .extra1 = (void *)SYSCTL_LONG_ZERO, + .extra2 = (void *)&max_500, + }, +#endif + /* Non-time values */ { .procname = "reap_client_conns", @@ -109,7 +125,6 @@ static struct ctl_table rxrpc_sysctl_table[] = { .extra1 = (void *)SYSCTL_ONE, .extra2 = (void *)&four, }, - { } }; -- cgit v1.2.3 From 84e28aa513af814807a5e9a0e5f3cab773946f3c Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 17 Oct 2022 10:55:41 +0100 Subject: rxrpc: Generate extra pings for RTT during heavy-receive call When doing a call that has a single transmitted data packet and a massive amount of received data packets, we only ping for one RTT sample, which means we don't get a good reading on it. Fix this by converting occasional IDLE ACKs into PING ACKs to elicit a response. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- include/trace/events/rxrpc.h | 3 ++- net/rxrpc/call_event.c | 15 ++++++++++++--- net/rxrpc/output.c | 7 +++++-- 3 files changed, 19 insertions(+), 6 deletions(-) (limited to 'net/rxrpc') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index cdcadb1345dc..450b8f345814 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -360,11 +360,12 @@ EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \ EM(rxrpc_propose_ack_input_data, "DataIn ") \ EM(rxrpc_propose_ack_input_data_hole, "DataInH") \ - EM(rxrpc_propose_ack_ping_for_check_life, "ChkLife") \ EM(rxrpc_propose_ack_ping_for_keepalive, "KeepAlv") \ EM(rxrpc_propose_ack_ping_for_lost_ack, "LostAck") \ EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \ + EM(rxrpc_propose_ack_ping_for_old_rtt, "OldRtt ") \ EM(rxrpc_propose_ack_ping_for_params, "Params ") \ + EM(rxrpc_propose_ack_ping_for_rtt, "Rtt ") \ EM(rxrpc_propose_ack_processing_op, "ProcOp ") \ EM(rxrpc_propose_ack_respond_to_ack, "Rsp2Ack") \ EM(rxrpc_propose_ack_respond_to_ping, "Rsp2Png") \ diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 1abdef15debc..cf9799be4286 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -498,9 +498,18 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0, rxrpc_propose_ack_rx_idle); - if (atomic_read(&call->ackr_nr_unacked) > 2) - rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0, - rxrpc_propose_ack_input_data); + if (atomic_read(&call->ackr_nr_unacked) > 2) { + if (call->peer->rtt_count < 3) + rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, + rxrpc_propose_ack_ping_for_rtt); + else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), + ktime_get_real())) + rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, + rxrpc_propose_ack_ping_for_old_rtt); + else + rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0, + rxrpc_propose_ack_input_data); + } /* Make sure the timer is restarted */ if (!__rxrpc_call_is_complete(call)) { diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index a9746be29634..98b5d0db7761 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -253,12 +253,15 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len); ret = do_udp_sendmsg(conn->local->socket, &msg, len); call->peer->last_tx_at = ktime_get_seconds(); - if (ret < 0) + if (ret < 0) { trace_rxrpc_tx_fail(call->debug_id, serial, ret, rxrpc_tx_point_call_ack); - else + } else { trace_rxrpc_tx_packet(call->debug_id, &txb->wire, rxrpc_tx_point_call_ack); + if (txb->wire.flags & RXRPC_REQUEST_ACK) + call->peer->rtt_last_req = ktime_get_real(); + } rxrpc_tx_backoff(call, ret); if (!__rxrpc_call_is_complete(call)) { -- cgit v1.2.3 From 5bbf953382bec6d3b7003e9389668c1d0863db31 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 17 Oct 2022 11:44:22 +0100 Subject: rxrpc: De-atomic call->ackr_window and call->ackr_nr_unacked call->ackr_window doesn't need to be atomic as ACK generation and ACK transmission are now done in the same thread, so drop the atomic64 handling and split it into two separate members. Similarly, call->ackr_nr_unacked doesn't need to be atomic now either. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- include/trace/events/rxrpc.h | 10 ++++++---- net/rxrpc/ar-internal.h | 5 +++-- net/rxrpc/call_event.c | 2 +- net/rxrpc/call_object.c | 3 ++- net/rxrpc/input.c | 14 +++++++------- net/rxrpc/output.c | 13 +++++-------- net/rxrpc/proc.c | 4 +--- net/rxrpc/recvmsg.c | 6 +++--- 8 files changed, 28 insertions(+), 29 deletions(-) (limited to 'net/rxrpc') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index 450b8f345814..e51a84f349d8 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1152,7 +1152,8 @@ TRACE_EVENT(rxrpc_receive, __field(enum rxrpc_receive_trace, why) __field(rxrpc_serial_t, serial) __field(rxrpc_seq_t, seq) - __field(u64, window) + __field(rxrpc_seq_t, window) + __field(rxrpc_seq_t, wtop) ), TP_fast_assign( @@ -1160,7 +1161,8 @@ TRACE_EVENT(rxrpc_receive, __entry->why = why; __entry->serial = serial; __entry->seq = seq; - __entry->window = atomic64_read(&call->ackr_window); + __entry->window = call->ackr_window; + __entry->wtop = call->ackr_wtop; ), TP_printk("c=%08x %s r=%08x q=%08x w=%08x-%08x", @@ -1168,8 +1170,8 @@ TRACE_EVENT(rxrpc_receive, __print_symbolic(__entry->why, rxrpc_receive_traces), __entry->serial, __entry->seq, - lower_32_bits(__entry->window), - upper_32_bits(__entry->window)) + __entry->window, + __entry->wtop) ); TRACE_EVENT(rxrpc_recvmsg, diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index bfae4a87626f..2ca99688f7f0 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -692,8 +692,9 @@ struct rxrpc_call { /* Receive-phase ACK management (ACKs we send). */ u8 ackr_reason; /* reason to ACK */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ - atomic64_t ackr_window; /* Base (in LSW) and top (in MSW) of SACK window */ - atomic_t ackr_nr_unacked; /* Number of unacked packets */ + rxrpc_seq_t ackr_window; /* Base of SACK window */ + rxrpc_seq_t ackr_wtop; /* Base of SACK window */ + unsigned int ackr_nr_unacked; /* Number of unacked packets */ atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */ struct { #define RXRPC_SACK_SIZE 256 diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index cf9799be4286..e363f21a2014 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -498,7 +498,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0, rxrpc_propose_ack_rx_idle); - if (atomic_read(&call->ackr_nr_unacked) > 2) { + if (call->ackr_nr_unacked > 2) { if (call->peer->rtt_count < 3) rxrpc_send_ACK(call, RXRPC_ACK_PING, 0, rxrpc_propose_ack_ping_for_rtt); diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 0012589f2aad..6eaffb0d8fdc 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -167,7 +167,8 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp, call->tx_total_len = -1; call->next_rx_timo = 20 * HZ; call->next_req_timo = 1 * HZ; - atomic64_set(&call->ackr_window, 0x100000001ULL); + call->ackr_window = 1; + call->ackr_wtop = 1; memset(&call->sock_node, 0xed, sizeof(call->sock_node)); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 367927a99881..7e65c7d5bff0 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -338,7 +338,8 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial) static void rxrpc_input_update_ack_window(struct rxrpc_call *call, rxrpc_seq_t window, rxrpc_seq_t wtop) { - atomic64_set_release(&call->ackr_window, ((u64)wtop) << 32 | window); + call->ackr_window = window; + call->ackr_wtop = wtop; } /* @@ -367,9 +368,8 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct sk_buff *oos; rxrpc_serial_t serial = sp->hdr.serial; - u64 win = atomic64_read(&call->ackr_window); - rxrpc_seq_t window = lower_32_bits(win); - rxrpc_seq_t wtop = upper_32_bits(win); + rxrpc_seq_t window = call->ackr_window; + rxrpc_seq_t wtop = call->ackr_wtop; rxrpc_seq_t wlimit = window + call->rx_winsize - 1; rxrpc_seq_t seq = sp->hdr.seq; bool last = sp->hdr.flags & RXRPC_LAST_PACKET; @@ -419,7 +419,7 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, else if (!skb_queue_empty(&call->rx_oos_queue)) ack_reason = RXRPC_ACK_DELAY; else - atomic_inc_return(&call->ackr_nr_unacked); + call->ackr_nr_unacked++; window++; if (after(window, wtop)) @@ -567,8 +567,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_serial_t serial = sp->hdr.serial; rxrpc_seq_t seq0 = sp->hdr.seq; - _enter("{%llx,%x},{%u,%x}", - atomic64_read(&call->ackr_window), call->rx_highest_seq, + _enter("{%x,%x,%x},{%u,%x}", + call->ackr_window, call->ackr_wtop, call->rx_highest_seq, skb->len, seq0); if (__rxrpc_call_is_complete(call)) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 98b5d0db7761..b6bd5e6ccb4c 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -86,20 +86,18 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, unsigned int qsize; rxrpc_seq_t window, wtop, wrap_point, ix, first; int rsize; - u64 wtmp; u32 mtu, jmax; u8 *ackp = txb->acks; u8 sack_buffer[sizeof(call->ackr_sack_table)] __aligned(8); - atomic_set(&call->ackr_nr_unacked, 0); + call->ackr_nr_unacked = 0; atomic_set(&call->ackr_nr_consumed, 0); rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill); /* Barrier against rxrpc_input_data(). */ retry: - wtmp = atomic64_read_acquire(&call->ackr_window); - window = lower_32_bits(wtmp); - wtop = upper_32_bits(wtmp); + window = call->ackr_window; + wtop = call->ackr_wtop; txb->ack.firstPacket = htonl(window); txb->ack.nAcks = 0; @@ -111,9 +109,8 @@ retry: */ memcpy(sack_buffer, call->ackr_sack_table, sizeof(sack_buffer)); wrap_point = window + RXRPC_SACK_SIZE - 1; - wtmp = atomic64_read_acquire(&call->ackr_window); - window = lower_32_bits(wtmp); - wtop = upper_32_bits(wtmp); + window = call->ackr_window; + wtop = call->ackr_wtop; if (after(wtop, wrap_point)) { cond_resched(); goto retry; diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index 750158a085cd..682636d3b060 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -55,7 +55,6 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) unsigned long timeout = 0; rxrpc_seq_t acks_hard_ack; char lbuff[50], rbuff[50]; - u64 wtmp; if (v == &rxnet->calls) { seq_puts(seq, @@ -83,7 +82,6 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) } acks_hard_ack = READ_ONCE(call->acks_hard_ack); - wtmp = atomic64_read_acquire(&call->ackr_window); seq_printf(seq, "UDP %-47.47s %-47.47s %4x %08x %08x %s %3u" " %-8.8s %08x %08x %08x %02x %08x %02x %08x %02x %06lx\n", @@ -98,7 +96,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v) call->abort_code, call->debug_id, acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack, - lower_32_bits(wtmp), upper_32_bits(wtmp) - lower_32_bits(wtmp), + call->ackr_window, call->ackr_wtop - call->ackr_window, call->rx_serial, call->cong_cwnd, timeout); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index b7545fdc0401..50d263a6359d 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -95,7 +95,7 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg) } trace_rxrpc_recvdata(call, rxrpc_recvmsg_terminal, - lower_32_bits(atomic64_read(&call->ackr_window)) - 1, + call->ackr_window - 1, call->rx_pkt_offset, call->rx_pkt_len, ret); return ret; } @@ -175,13 +175,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call, rx_pkt_len = call->rx_pkt_len; if (rxrpc_call_has_failed(call)) { - seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1; + seq = call->ackr_window - 1; ret = -EIO; goto done; } if (test_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags)) { - seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1; + seq = call->ackr_window - 1; ret = 1; goto done; } -- cgit v1.2.3 From f21e93485bcbfa2753d1447b6198604a2c3d57be Mon Sep 17 00:00:00 2001 From: David Howells Date: Sun, 16 Oct 2022 08:01:32 +0100 Subject: rxrpc: Simplify ACK handling Now that general ACK transmission is done from the same thread as incoming DATA packet wrangling, there's no possibility that the SACK table will be being updated by the latter whilst the former is trying to copy it to an ACK. This means that we can safely rotate the SACK table whilst updating it without having to take a lock, rather than keeping all the bits inside it in fixed place and copying and then rotating it in the transmitter. Therefore, simplify SACK handing by keeping track of starting point in the ring and rotate slots down as we consume them. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- include/trace/events/rxrpc.h | 36 ++++++++++++++++++++++++++++++++++ net/rxrpc/ar-internal.h | 1 + net/rxrpc/input.c | 46 ++++++++++++++++++++++---------------------- net/rxrpc/output.c | 46 +++++++++++++------------------------------- 4 files changed, 73 insertions(+), 56 deletions(-) (limited to 'net/rxrpc') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index e51a84f349d8..b6adec9111e1 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -422,6 +422,13 @@ EM(RXRPC_ACK_IDLE, "IDL") \ E_(RXRPC_ACK__INVALID, "-?-") +#define rxrpc_sack_traces \ + EM(rxrpc_sack_advance, "ADV") \ + EM(rxrpc_sack_fill, "FIL") \ + EM(rxrpc_sack_nack, "NAK") \ + EM(rxrpc_sack_none, "---") \ + E_(rxrpc_sack_oos, "OOS") + #define rxrpc_completions \ EM(RXRPC_CALL_SUCCEEDED, "Succeeded") \ EM(RXRPC_CALL_REMOTELY_ABORTED, "RemoteAbort") \ @@ -497,6 +504,7 @@ enum rxrpc_recvmsg_trace { rxrpc_recvmsg_traces } __mode(byte); enum rxrpc_req_ack_trace { rxrpc_req_ack_traces } __mode(byte); enum rxrpc_rtt_rx_trace { rxrpc_rtt_rx_traces } __mode(byte); enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_traces } __mode(byte); +enum rxrpc_sack_trace { rxrpc_sack_traces } __mode(byte); enum rxrpc_skb_trace { rxrpc_skb_traces } __mode(byte); enum rxrpc_timer_trace { rxrpc_timer_traces } __mode(byte); enum rxrpc_tx_point { rxrpc_tx_points } __mode(byte); @@ -531,6 +539,7 @@ rxrpc_recvmsg_traces; rxrpc_req_ack_traces; rxrpc_rtt_rx_traces; rxrpc_rtt_tx_traces; +rxrpc_sack_traces; rxrpc_skb_traces; rxrpc_timer_traces; rxrpc_tx_points; @@ -1929,6 +1938,33 @@ TRACE_EVENT(rxrpc_call_poked, __entry->call_debug_id) ); +TRACE_EVENT(rxrpc_sack, + TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq, + unsigned int sack, enum rxrpc_sack_trace what), + + TP_ARGS(call, seq, sack, what), + + TP_STRUCT__entry( + __field(unsigned int, call_debug_id) + __field(rxrpc_seq_t, seq) + __field(unsigned int, sack) + __field(enum rxrpc_sack_trace, what) + ), + + TP_fast_assign( + __entry->call_debug_id = call->debug_id; + __entry->seq = seq; + __entry->sack = sack; + __entry->what = what; + ), + + TP_printk("c=%08x q=%08x %s k=%x", + __entry->call_debug_id, + __entry->seq, + __print_symbolic(__entry->what, rxrpc_sack_traces), + __entry->sack) + ); + #undef EM #undef E_ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 2ca99688f7f0..2b1d0d3ca064 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -691,6 +691,7 @@ struct rxrpc_call { /* Receive-phase ACK management (ACKs we send). */ u8 ackr_reason; /* reason to ACK */ + u16 ackr_sack_base; /* Starting slot in SACK table ring */ rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */ rxrpc_seq_t ackr_window; /* Base of SACK window */ rxrpc_seq_t ackr_wtop; /* Base of SACK window */ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 7e65c7d5bff0..d68848fce51f 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -368,6 +368,7 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct sk_buff *oos; rxrpc_serial_t serial = sp->hdr.serial; + unsigned int sack = call->ackr_sack_base; rxrpc_seq_t window = call->ackr_window; rxrpc_seq_t wtop = call->ackr_wtop; rxrpc_seq_t wlimit = window + call->rx_winsize - 1; @@ -410,9 +411,6 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, /* Queue the packet. */ if (seq == window) { - rxrpc_seq_t reset_from; - bool reset_sack = false; - if (sp->hdr.flags & RXRPC_REQUEST_ACK) ack_reason = RXRPC_ACK_REQUESTED; /* Send an immediate ACK if we fill in a hole */ @@ -422,8 +420,14 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, call->ackr_nr_unacked++; window++; - if (after(window, wtop)) + if (after(window, wtop)) { + trace_rxrpc_sack(call, seq, sack, rxrpc_sack_none); wtop = window; + } else { + trace_rxrpc_sack(call, seq, sack, rxrpc_sack_advance); + sack = (sack + 1) % RXRPC_SACK_SIZE; + } + rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg); @@ -440,43 +444,39 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, __skb_unlink(oos, &call->rx_oos_queue); last = osp->hdr.flags & RXRPC_LAST_PACKET; seq = osp->hdr.seq; - if (!reset_sack) { - reset_from = seq; - reset_sack = true; - } + call->ackr_sack_table[sack] = 0; + trace_rxrpc_sack(call, seq, sack, rxrpc_sack_fill); + sack = (sack + 1) % RXRPC_SACK_SIZE; window++; rxrpc_input_queue_data(call, oos, window, wtop, - rxrpc_receive_queue_oos); + rxrpc_receive_queue_oos); } spin_unlock(&call->recvmsg_queue.lock); - if (reset_sack) { - do { - call->ackr_sack_table[reset_from % RXRPC_SACK_SIZE] = 0; - } while (reset_from++, before(reset_from, window)); - } + call->ackr_sack_base = sack; } else { - bool keep = false; + unsigned int slot; ack_reason = RXRPC_ACK_OUT_OF_SEQUENCE; - if (!call->ackr_sack_table[seq % RXRPC_SACK_SIZE]) { - call->ackr_sack_table[seq % RXRPC_SACK_SIZE] = 1; - keep = 1; + slot = seq - window; + sack = (sack + slot) % RXRPC_SACK_SIZE; + + if (call->ackr_sack_table[sack % RXRPC_SACK_SIZE]) { + ack_reason = RXRPC_ACK_DUPLICATE; + goto send_ack; } + call->ackr_sack_table[sack % RXRPC_SACK_SIZE] |= 1; + trace_rxrpc_sack(call, seq, sack, rxrpc_sack_oos); + if (after(seq + 1, wtop)) { wtop = seq + 1; rxrpc_input_update_ack_window(call, window, wtop); } - if (!keep) { - ack_reason = RXRPC_ACK_DUPLICATE; - goto send_ack; - } - skb_queue_walk(&call->rx_oos_queue, oos) { struct rxrpc_skb_priv *osp = rxrpc_skb(oos); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index b6bd5e6ccb4c..c69c31470fa8 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -83,56 +83,36 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, struct rxrpc_txbuf *txb) { struct rxrpc_ackinfo ackinfo; - unsigned int qsize; - rxrpc_seq_t window, wtop, wrap_point, ix, first; + unsigned int qsize, sack, wrap, to; + rxrpc_seq_t window, wtop; int rsize; u32 mtu, jmax; u8 *ackp = txb->acks; - u8 sack_buffer[sizeof(call->ackr_sack_table)] __aligned(8); call->ackr_nr_unacked = 0; atomic_set(&call->ackr_nr_consumed, 0); rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill); + clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags); - /* Barrier against rxrpc_input_data(). */ -retry: window = call->ackr_window; wtop = call->ackr_wtop; + sack = call->ackr_sack_base % RXRPC_SACK_SIZE; txb->ack.firstPacket = htonl(window); - txb->ack.nAcks = 0; + txb->ack.nAcks = wtop - window; if (after(wtop, window)) { - /* Try to copy the SACK ring locklessly. We can use the copy, - * only if the now-current top of the window didn't go past the - * previously read base - otherwise we can't know whether we - * have old data or new data. - */ - memcpy(sack_buffer, call->ackr_sack_table, sizeof(sack_buffer)); - wrap_point = window + RXRPC_SACK_SIZE - 1; - window = call->ackr_window; - wtop = call->ackr_wtop; - if (after(wtop, wrap_point)) { - cond_resched(); - goto retry; - } - - /* The buffer is maintained as a ring with an invariant mapping - * between bit position and sequence number, so we'll probably - * need to rotate it. - */ - txb->ack.nAcks = wtop - window; - ix = window % RXRPC_SACK_SIZE; - first = sizeof(sack_buffer) - ix; + wrap = RXRPC_SACK_SIZE - sack; + to = min_t(unsigned int, txb->ack.nAcks, RXRPC_SACK_SIZE); - if (ix + txb->ack.nAcks <= RXRPC_SACK_SIZE) { - memcpy(txb->acks, sack_buffer + ix, txb->ack.nAcks); + if (sack + txb->ack.nAcks <= RXRPC_SACK_SIZE) { + memcpy(txb->acks, call->ackr_sack_table + sack, txb->ack.nAcks); } else { - memcpy(txb->acks, sack_buffer + ix, first); - memcpy(txb->acks + first, sack_buffer, - txb->ack.nAcks - first); + memcpy(txb->acks, call->ackr_sack_table + sack, wrap); + memcpy(txb->acks + wrap, call->ackr_sack_table, + to - wrap); } - ackp += txb->ack.nAcks; + ackp += to; } else if (before(wtop, window)) { pr_warn("ack window backward %x %x", window, wtop); } else if (txb->ack.reason == RXRPC_ACK_DELAY) { -- cgit v1.2.3 From b30d61f4b12899101f754238f02069ff4d6161c2 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 17 Oct 2022 22:48:58 +0100 Subject: rxrpc: Don't lock call->tx_lock to access call->tx_buffer call->tx_buffer is now only accessed within the I/O thread (->tx_sendmsg is the way sendmsg passes packets to the I/O thread) so there's no need to lock around it. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/txbuf.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'net/rxrpc') diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c index d2cf2aac3adb..d43be8512386 100644 --- a/net/rxrpc/txbuf.c +++ b/net/rxrpc/txbuf.c @@ -110,12 +110,8 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call) _enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top); - for (;;) { - spin_lock(&call->tx_lock); - txb = list_first_entry_or_null(&call->tx_buffer, - struct rxrpc_txbuf, call_link); - if (!txb) - break; + while ((txb = list_first_entry_or_null(&call->tx_buffer, + struct rxrpc_txbuf, call_link))) { hard_ack = smp_load_acquire(&call->acks_hard_ack); if (before(hard_ack, txb->seq)) break; @@ -128,15 +124,11 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call) trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue); - spin_unlock(&call->tx_lock); - rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated); if (after(call->acks_hard_ack, call->tx_bottom + 128)) wake = true; } - spin_unlock(&call->tx_lock); - if (wake) wake_up(&call->waitq); } -- cgit v1.2.3 From e7f40f4a701b67ada4a843bcc253711d8a34b1f1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 17 Oct 2022 22:52:06 +0100 Subject: rxrpc: Remove local->defrag_sem We no longer need local->defrag_sem as all DATA packet transmission is now done from one thread, so remove it. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/ar-internal.h | 1 - net/rxrpc/local_object.c | 1 - net/rxrpc/output.c | 7 ------- 3 files changed, 9 deletions(-) (limited to 'net/rxrpc') diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 2b1d0d3ca064..9e19688b0e06 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -284,7 +284,6 @@ struct rxrpc_local { struct task_struct *io_thread; struct completion io_thread_ready; /* Indication that the I/O thread started */ struct rxrpc_sock *service; /* Service(s) listening on this endpoint */ - struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */ #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY struct sk_buff_head rx_delay_queue; /* Delay injection queue */ #endif diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 07d83a4e5841..7d910aee4f8c 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -108,7 +108,6 @@ static struct rxrpc_local *rxrpc_alloc_local(struct net *net, local->net = net; local->rxnet = rxrpc_net(net); INIT_HLIST_NODE(&local->link); - init_rwsem(&local->defrag_sem); init_completion(&local->io_thread_ready); #ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY skb_queue_head_init(&local->rx_delay_queue); diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index c69c31470fa8..6b2022240076 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -409,8 +409,6 @@ dont_set_request_ack: if (txb->len >= call->peer->maxdata) goto send_fragmentable; - down_read(&conn->local->defrag_sem); - txb->last_sent = ktime_get_real(); if (txb->wire.flags & RXRPC_REQUEST_ACK) rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data); @@ -425,7 +423,6 @@ dont_set_request_ack: ret = do_udp_sendmsg(conn->local->socket, &msg, len); conn->peer->last_tx_at = ktime_get_seconds(); - up_read(&conn->local->defrag_sem); if (ret < 0) { rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail); rxrpc_cancel_rtt_probe(call, serial, rtt_slot); @@ -486,8 +483,6 @@ send_fragmentable: /* attempt to send this message with fragmentation enabled */ _debug("send fragment"); - down_write(&conn->local->defrag_sem); - txb->last_sent = ktime_get_real(); if (txb->wire.flags & RXRPC_REQUEST_ACK) rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data); @@ -519,8 +514,6 @@ send_fragmentable: rxrpc_tx_point_call_data_frag); } rxrpc_tx_backoff(call, ret); - - up_write(&conn->local->defrag_sem); goto done; } -- cgit v1.2.3 From f20fe3ff82b321287ba59cab12e4f34eec9a7e10 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 11 Nov 2022 22:01:04 +0000 Subject: rxrpc: Show consumed and freed packets as non-dropped in dropwatch Set a reason when freeing a packet that has been consumed such that dropwatch doesn't complain that it has been dropped. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/skbuff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/rxrpc') diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index ebe0c75e7b07..944320e65ea8 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -63,7 +63,7 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) if (skb) { int n = atomic_dec_return(select_skb_count(skb)); trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_CONSUMED); } } @@ -78,6 +78,6 @@ void rxrpc_purge_queue(struct sk_buff_head *list) int n = atomic_dec_return(select_skb_count(skb)); trace_rxrpc_skb(skb, refcount_read(&skb->users), n, rxrpc_skb_put_purge); - kfree_skb(skb); + kfree_skb_reason(skb, SKB_CONSUMED); } } -- cgit v1.2.3 From 550130a0ce303f7cd754c7067b0a971ca179db63 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 16 Nov 2022 16:42:02 +0000 Subject: rxrpc: Kill service bundle Now that the bundle->channel_lock has been eliminated, we don't need the dummy service bundle anymore. It's purpose was purely to provide the channel_lock for service connections. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/conn_service.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'net/rxrpc') diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index f30323de82bd..89ac05a711a4 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -8,11 +8,6 @@ #include #include "ar-internal.h" -static struct rxrpc_bundle rxrpc_service_dummy_bundle = { - .ref = REFCOUNT_INIT(1), - .debug_id = UINT_MAX, -}; - /* * Find a service connection under RCU conditions. * @@ -132,8 +127,6 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn */ conn->state = RXRPC_CONN_SERVICE_PREALLOC; refcount_set(&conn->ref, 2); - conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle, - rxrpc_bundle_get_service_conn); atomic_inc(&rxnet->nr_conns); write_lock(&rxnet->conn_lock); -- cgit v1.2.3 From 16d5677ef1041beee18b5709bf5759611ec82875 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 7 Feb 2023 22:11:30 +0000 Subject: rxrpc: Use consume_skb() rather than kfree_skb_reason() Use consume_skb() rather than kfree_skb_reason(). Reported-by: Paolo Abeni Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/skbuff.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/rxrpc') diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c index 944320e65ea8..3bcd6ee80396 100644 --- a/net/rxrpc/skbuff.c +++ b/net/rxrpc/skbuff.c @@ -63,7 +63,7 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace why) if (skb) { int n = atomic_dec_return(select_skb_count(skb)); trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why); - kfree_skb_reason(skb, SKB_CONSUMED); + consume_skb(skb); } } @@ -78,6 +78,6 @@ void rxrpc_purge_queue(struct sk_buff_head *list) int n = atomic_dec_return(select_skb_count(skb)); trace_rxrpc_skb(skb, refcount_read(&skb->users), n, rxrpc_skb_put_purge); - kfree_skb_reason(skb, SKB_CONSUMED); + consume_skb(skb); } } -- cgit v1.2.3 From a33395ab85b9b9cff83948a03a1d6d96347935d8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 29 Nov 2022 12:37:37 +0000 Subject: rxrpc: Fix overwaking on call poking If an rxrpc call is given a poke, it will get woken up unconditionally, even if there's already a poke pending (for which there will have been a wake) or if the call refcount has gone to 0. Fix this by only waking the call if it is still referenced and if it doesn't already have a poke pending. Fixes: 15f661dc95da ("rxrpc: Implement a mechanism to send an event notification to a call") Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/call_object.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/rxrpc') diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index 6eaffb0d8fdc..e9f1f49d18c2 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -54,12 +54,14 @@ void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what) spin_lock_bh(&local->lock); busy = !list_empty(&call->attend_link); trace_rxrpc_poke_call(call, busy, what); + if (!busy && !rxrpc_try_get_call(call, rxrpc_call_get_poke)) + busy = true; if (!busy) { - rxrpc_get_call(call, rxrpc_call_get_poke); list_add_tail(&call->attend_link, &local->call_attend_q); } spin_unlock_bh(&local->lock); - rxrpc_wake_up_io_thread(local); + if (!busy) + rxrpc_wake_up_io_thread(local); } } -- cgit v1.2.3 From f789bff2deb3ddae08950f8e4a1e6f41b916c520 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 31 Jan 2023 15:31:49 +0000 Subject: rxrpc: Trace ack.rwind Log ack.rwind in the rxrpc_tx_ack tracepoint. This value is useful to see as it represents flow-control information to the peer. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- include/trace/events/rxrpc.h | 11 +++++++---- net/rxrpc/conn_event.c | 2 +- net/rxrpc/output.c | 10 +++++++--- 3 files changed, 15 insertions(+), 8 deletions(-) (limited to 'net/rxrpc') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index d7bb4acf4580..c3c0b0aa8381 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -1118,9 +1118,9 @@ TRACE_EVENT(rxrpc_tx_data, TRACE_EVENT(rxrpc_tx_ack, TP_PROTO(unsigned int call, rxrpc_serial_t serial, rxrpc_seq_t ack_first, rxrpc_serial_t ack_serial, - u8 reason, u8 n_acks), + u8 reason, u8 n_acks, u16 rwind), - TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks), + TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks, rwind), TP_STRUCT__entry( __field(unsigned int, call) @@ -1129,6 +1129,7 @@ TRACE_EVENT(rxrpc_tx_ack, __field(rxrpc_serial_t, ack_serial) __field(u8, reason) __field(u8, n_acks) + __field(u16, rwind) ), TP_fast_assign( @@ -1138,15 +1139,17 @@ TRACE_EVENT(rxrpc_tx_ack, __entry->ack_serial = ack_serial; __entry->reason = reason; __entry->n_acks = n_acks; + __entry->rwind = rwind; ), - TP_printk(" c=%08x ACK %08x %s f=%08x r=%08x n=%u", + TP_printk(" c=%08x ACK %08x %s f=%08x r=%08x n=%u rw=%u", __entry->call, __entry->serial, __print_symbolic(__entry->reason, rxrpc_ack_names), __entry->ack_first, __entry->ack_serial, - __entry->n_acks) + __entry->n_acks, + __entry->rwind) ); TRACE_EVENT(rxrpc_receive, diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c index 44414e724415..95f4bc206b3d 100644 --- a/net/rxrpc/conn_event.c +++ b/net/rxrpc/conn_event.c @@ -163,7 +163,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, trace_rxrpc_tx_ack(chan->call_debug_id, serial, ntohl(pkt.ack.firstPacket), ntohl(pkt.ack.serial), - pkt.ack.reason, 0); + pkt.ack.reason, 0, rxrpc_rx_window_size); break; default: diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 6b2022240076..5e53429c6922 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -80,7 +80,8 @@ static void rxrpc_set_keepalive(struct rxrpc_call *call) */ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, struct rxrpc_call *call, - struct rxrpc_txbuf *txb) + struct rxrpc_txbuf *txb, + u16 *_rwind) { struct rxrpc_ackinfo ackinfo; unsigned int qsize, sack, wrap, to; @@ -124,6 +125,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, jmax = rxrpc_rx_jumbo_max; qsize = (window - 1) - call->rx_consumed; rsize = max_t(int, call->rx_winsize - qsize, 0); + *_rwind = rsize; ackinfo.rxMTU = htonl(rxrpc_rx_mtu); ackinfo.maxMTU = htonl(mtu); ackinfo.rwind = htonl(rsize); @@ -190,6 +192,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) rxrpc_serial_t serial; size_t len, n; int ret, rtt_slot = -1; + u16 rwind; if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) return -ECONNRESET; @@ -205,7 +208,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) if (txb->ack.reason == RXRPC_ACK_PING) txb->wire.flags |= RXRPC_REQUEST_ACK; - n = rxrpc_fill_out_ack(conn, call, txb); + n = rxrpc_fill_out_ack(conn, call, txb, &rwind); if (n == 0) return 0; @@ -217,7 +220,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb) txb->wire.serial = htonl(serial); trace_rxrpc_tx_ack(call->debug_id, serial, ntohl(txb->ack.firstPacket), - ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks); + ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks, + rwind); if (txb->ack.reason == RXRPC_ACK_PING) rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_ping); -- cgit v1.2.3 From 5a2c5a5b0829ef8bcb5d868145c1d8c1221c5637 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 31 Jan 2023 21:40:18 +0000 Subject: rxrpc: Reduce unnecessary ack transmission rxrpc_recvmsg_data() schedules an ACK to be transmitted every time at least two packets have been consumed and any time it runs out of data and would return -EAGAIN to the caller. Both events may occur within a single loop, however, and if the I/O thread is quick enough it may send duplicate ACKs. The ACKs are sent to inform the peer that more space has been made in the local Rx window, but the I/O thread is going to send an ACK every couple of DATA packets anyway, so we end up sending a lot more ACKs than we really need to. So reduce the rate at which recvmsg() schedules ACKs, such that if the I/O thread sends ACKs at its normal faster rate, recvmsg() won't actually schedule ACKs until the Rx flow stops (call->rx_consumed is cleared any time we transmit an ACK for that call, resetting the counter used by recvmsg). Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org --- net/rxrpc/recvmsg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/rxrpc') diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 50d263a6359d..76eb2b9cd936 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -137,7 +137,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call) /* Check to see if there's an ACK that needs sending. */ acked = atomic_add_return(call->rx_consumed - old_consumed, &call->ackr_nr_consumed); - if (acked > 2 && + if (acked > 8 && !test_and_set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags)) rxrpc_poke_call(call, rxrpc_call_poke_idle); } -- cgit v1.2.3 From c078381856230f1e8e13738661d83c2b4b433819 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 15 Feb 2023 21:48:05 +0000 Subject: rxrpc: Fix overproduction of wakeups to recvmsg() Fix three cases of overproduction of wakeups: (1) rxrpc_input_split_jumbo() conditionally notifies the app that there's data for recvmsg() to collect if it queues some data - and then its only caller, rxrpc_input_data(), goes and wakes up recvmsg() anyway. Fix the rxrpc_input_data() to only do the wakeup in failure cases. (2) If a DATA packet is received for a call by the I/O thread whilst recvmsg() is busy draining the call's rx queue in the app thread, the call will left on the recvmsg() queue for recvmsg() to pick up, even though there isn't any data on it. This can cause an unexpected recvmsg() with a 0 return and no MSG_EOR set after the reply has been posted to a service call. Fix this by discarding pending calls from the recvmsg() queue that don't need servicing yet. (3) Not-yet-completed calls get requeued after having data read from them, even if they have no data to read. Fix this by only requeuing them if they have data waiting on them; if they don't, the I/O thread will requeue them when data arrives or they fail. Signed-off-by: David Howells cc: Marc Dionne cc: linux-afs@lists.infradead.org Link: https://lore.kernel.org/r/3386149.1676497685@warthog.procyon.org.uk Signed-off-by: Paolo Abeni --- include/trace/events/rxrpc.h | 1 + net/rxrpc/input.c | 2 +- net/rxrpc/recvmsg.c | 16 +++++++++++++++- 3 files changed, 17 insertions(+), 2 deletions(-) (limited to 'net/rxrpc') diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h index c3c0b0aa8381..4c53a5ef6257 100644 --- a/include/trace/events/rxrpc.h +++ b/include/trace/events/rxrpc.h @@ -318,6 +318,7 @@ EM(rxrpc_recvmsg_return, "RETN") \ EM(rxrpc_recvmsg_terminal, "TERM") \ EM(rxrpc_recvmsg_to_be_accepted, "TBAC") \ + EM(rxrpc_recvmsg_unqueue, "UNQU") \ E_(rxrpc_recvmsg_wait, "WAIT") #define rxrpc_rtt_tx_traces \ diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index d68848fce51f..030d64f282f3 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -606,7 +606,7 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb) rxrpc_proto_abort(call, sp->hdr.seq, rxrpc_badmsg_bad_jumbo); goto out_notify; } - skb = NULL; + return; out_notify: trace_rxrpc_notify_socket(call->debug_id, serial); diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 76eb2b9cd936..a482f88c5fc5 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -334,10 +334,23 @@ try_again: /* Find the next call and dequeue it if we're not just peeking. If we * do dequeue it, that comes with a ref that we will need to release. + * We also want to weed out calls that got requeued whilst we were + * shovelling data out. */ spin_lock(&rx->recvmsg_lock); l = rx->recvmsg_q.next; call = list_entry(l, struct rxrpc_call, recvmsg_link); + + if (!rxrpc_call_is_complete(call) && + skb_queue_empty(&call->recvmsg_queue)) { + list_del_init(&call->recvmsg_link); + spin_unlock(&rx->recvmsg_lock); + release_sock(&rx->sk); + trace_rxrpc_recvmsg(call->debug_id, rxrpc_recvmsg_unqueue, 0); + rxrpc_put_call(call, rxrpc_call_put_recvmsg); + goto try_again; + } + if (!(flags & MSG_PEEK)) list_del_init(&call->recvmsg_link); else @@ -402,7 +415,8 @@ try_again: if (rxrpc_call_has_failed(call)) goto call_failed; - rxrpc_notify_socket(call); + if (!skb_queue_empty(&call->recvmsg_queue)) + rxrpc_notify_socket(call); goto not_yet_complete; call_failed: -- cgit v1.2.3