diff options
Diffstat (limited to 'net/rxrpc/input.c')
-rw-r--r-- | net/rxrpc/input.c | 712 |
1 files changed, 448 insertions, 264 deletions
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 16d49a861dbb..24aceb183c2c 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -27,80 +27,68 @@ static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq, } /* - * Do TCP-style congestion management [RFC 5681]. + * Do TCP-style congestion management [RFC5681]. */ static void rxrpc_congestion_management(struct rxrpc_call *call, - struct sk_buff *skb, - struct rxrpc_ack_summary *summary, - rxrpc_serial_t acked_serial) + struct rxrpc_ack_summary *summary) { - enum rxrpc_congest_change change = rxrpc_cong_no_change; - unsigned int cumulative_acks = call->cong_cumul_acks; - unsigned int cwnd = call->cong_cwnd; - bool resend = false; - - summary->flight_size = - (call->tx_top - call->acks_hard_ack) - summary->nr_acks; + summary->change = rxrpc_cong_no_change; + summary->in_flight = rxrpc_tx_in_flight(call); if (test_and_clear_bit(RXRPC_CALL_RETRANS_TIMEOUT, &call->flags)) { summary->retrans_timeo = true; - call->cong_ssthresh = max_t(unsigned int, - summary->flight_size / 2, 2); - cwnd = 1; - if (cwnd >= call->cong_ssthresh && - call->cong_mode == RXRPC_CALL_SLOW_START) { - call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; - call->cong_tstamp = skb->tstamp; - cumulative_acks = 0; + call->cong_ssthresh = umax(summary->in_flight / 2, 2); + call->cong_cwnd = 1; + if (call->cong_cwnd >= call->cong_ssthresh && + call->cong_ca_state == RXRPC_CA_SLOW_START) { + call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; + call->cong_tstamp = call->acks_latest_ts; + call->cong_cumul_acks = 0; } } - cumulative_acks += summary->nr_new_acks; - if (cumulative_acks > 255) - cumulative_acks = 255; - - summary->cwnd = call->cong_cwnd; - summary->ssthresh = call->cong_ssthresh; - summary->cumulative_acks = cumulative_acks; - summary->dup_acks = call->cong_dup_acks; + call->cong_cumul_acks += summary->nr_new_sacks; + call->cong_cumul_acks += summary->nr_new_hacks; + if (call->cong_cumul_acks > 255) + call->cong_cumul_acks = 255; - switch (call->cong_mode) { - case RXRPC_CALL_SLOW_START: - if (summary->saw_nacks) + switch (call->cong_ca_state) { + case RXRPC_CA_SLOW_START: + if (call->acks_nr_snacks > 0) goto packet_loss_detected; - if (summary->cumulative_acks > 0) - cwnd += 1; - if (cwnd >= call->cong_ssthresh) { - call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; - call->cong_tstamp = skb->tstamp; + if (call->cong_cumul_acks > 0) + call->cong_cwnd += 1; + if (call->cong_cwnd >= call->cong_ssthresh) { + call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; + call->cong_tstamp = call->acks_latest_ts; } goto out; - case RXRPC_CALL_CONGEST_AVOIDANCE: - if (summary->saw_nacks) + case RXRPC_CA_CONGEST_AVOIDANCE: + if (call->acks_nr_snacks > 0) goto packet_loss_detected; /* We analyse the number of packets that get ACK'd per RTT * period and increase the window if we managed to fill it. */ - if (call->peer->rtt_count == 0) + if (call->rtt_count == 0) goto out; - if (ktime_before(skb->tstamp, + if (ktime_before(call->acks_latest_ts, ktime_add_us(call->cong_tstamp, - call->peer->srtt_us >> 3))) + call->srtt_us >> 3))) goto out_no_clear_ca; - change = rxrpc_cong_rtt_window_end; - call->cong_tstamp = skb->tstamp; - if (cumulative_acks >= cwnd) - cwnd++; + summary->change = rxrpc_cong_rtt_window_end; + call->cong_tstamp = call->acks_latest_ts; + if (call->cong_cumul_acks >= call->cong_cwnd) + call->cong_cwnd++; goto out; - case RXRPC_CALL_PACKET_LOSS: - if (!summary->saw_nacks) + case RXRPC_CA_PACKET_LOSS: + if (call->acks_nr_snacks == 0) goto resume_normality; - if (summary->new_low_nack) { - change = rxrpc_cong_new_low_nack; + if (summary->new_low_snack) { + summary->change = rxrpc_cong_new_low_nack; call->cong_dup_acks = 1; if (call->cong_extra > 1) call->cong_extra = 1; @@ -111,31 +99,35 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, if (call->cong_dup_acks < 3) goto send_extra_data; - change = rxrpc_cong_begin_retransmission; - call->cong_mode = RXRPC_CALL_FAST_RETRANSMIT; - call->cong_ssthresh = max_t(unsigned int, - summary->flight_size / 2, 2); - cwnd = call->cong_ssthresh + 3; + summary->change = rxrpc_cong_begin_retransmission; + call->cong_ca_state = RXRPC_CA_FAST_RETRANSMIT; + call->cong_ssthresh = umax(summary->in_flight / 2, 2); + call->cong_cwnd = call->cong_ssthresh + 3; call->cong_extra = 0; call->cong_dup_acks = 0; - resend = true; + summary->need_retransmit = true; + summary->in_fast_or_rto_recovery = true; goto out; - case RXRPC_CALL_FAST_RETRANSMIT: - if (!summary->new_low_nack) { - if (summary->nr_new_acks == 0) - cwnd += 1; + case RXRPC_CA_FAST_RETRANSMIT: + rxrpc_tlp_init(call); + summary->in_fast_or_rto_recovery = true; + if (!summary->new_low_snack) { + if (summary->nr_new_sacks == 0) + call->cong_cwnd += 1; call->cong_dup_acks++; if (call->cong_dup_acks == 2) { - change = rxrpc_cong_retransmit_again; + summary->change = rxrpc_cong_retransmit_again; call->cong_dup_acks = 0; - resend = true; + summary->need_retransmit = true; } } else { - change = rxrpc_cong_progress; - cwnd = call->cong_ssthresh; - if (!summary->saw_nacks) + summary->change = rxrpc_cong_progress; + call->cong_cwnd = call->cong_ssthresh; + if (call->acks_nr_snacks == 0) { + summary->exiting_fast_or_rto_recovery = true; goto resume_normality; + } } goto out; @@ -145,30 +137,25 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, } resume_normality: - change = rxrpc_cong_cleared_nacks; + summary->change = rxrpc_cong_cleared_nacks; call->cong_dup_acks = 0; call->cong_extra = 0; - call->cong_tstamp = skb->tstamp; - if (cwnd < call->cong_ssthresh) - call->cong_mode = RXRPC_CALL_SLOW_START; + call->cong_tstamp = call->acks_latest_ts; + if (call->cong_cwnd < call->cong_ssthresh) + call->cong_ca_state = RXRPC_CA_SLOW_START; else - call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE; + call->cong_ca_state = RXRPC_CA_CONGEST_AVOIDANCE; out: - cumulative_acks = 0; + call->cong_cumul_acks = 0; out_no_clear_ca: - if (cwnd >= RXRPC_TX_MAX_WINDOW) - cwnd = RXRPC_TX_MAX_WINDOW; - call->cong_cwnd = cwnd; - call->cong_cumul_acks = cumulative_acks; - summary->mode = call->cong_mode; - trace_rxrpc_congest(call, summary, acked_serial, change); - if (resend) - rxrpc_resend(call, skb); + if (call->cong_cwnd >= RXRPC_TX_MAX_WINDOW) + call->cong_cwnd = RXRPC_TX_MAX_WINDOW; + trace_rxrpc_congest(call, summary); return; packet_loss_detected: - change = rxrpc_cong_saw_nack; - call->cong_mode = RXRPC_CALL_PACKET_LOSS; + summary->change = rxrpc_cong_saw_nack; + call->cong_ca_state = RXRPC_CA_PACKET_LOSS; call->cong_dup_acks = 0; goto send_extra_data; @@ -177,7 +164,7 @@ send_extra_data: * state. */ if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) || - summary->nr_acks != call->tx_top - call->acks_hard_ack) { + call->acks_nr_sacks != call->tx_top - call->tx_bottom) { call->cong_extra++; wake_up(&call->waitq); } @@ -189,26 +176,42 @@ send_extra_data: */ void rxrpc_congestion_degrade(struct rxrpc_call *call) { - ktime_t rtt, now; + ktime_t rtt, now, time_since; - if (call->cong_mode != RXRPC_CALL_SLOW_START && - call->cong_mode != RXRPC_CALL_CONGEST_AVOIDANCE) + if (call->cong_ca_state != RXRPC_CA_SLOW_START && + call->cong_ca_state != RXRPC_CA_CONGEST_AVOIDANCE) return; if (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_REPLY) return; - rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8)); + rtt = ns_to_ktime(call->srtt_us * (NSEC_PER_USEC / 8)); now = ktime_get_real(); - if (!ktime_before(ktime_add(call->tx_last_sent, rtt), now)) + time_since = ktime_sub(now, call->tx_last_sent); + if (ktime_before(time_since, rtt)) return; - trace_rxrpc_reset_cwnd(call, now); + trace_rxrpc_reset_cwnd(call, time_since, rtt); rxrpc_inc_stat(call->rxnet, stat_tx_data_cwnd_reset); call->tx_last_sent = now; - call->cong_mode = RXRPC_CALL_SLOW_START; - call->cong_ssthresh = max_t(unsigned int, call->cong_ssthresh, - call->cong_cwnd * 3 / 4); - call->cong_cwnd = max_t(unsigned int, call->cong_cwnd / 2, RXRPC_MIN_CWND); + call->cong_ca_state = RXRPC_CA_SLOW_START; + call->cong_ssthresh = umax(call->cong_ssthresh, call->cong_cwnd * 3 / 4); + call->cong_cwnd = umax(call->cong_cwnd / 2, RXRPC_MIN_CWND); +} + +/* + * Add an RTT sample derived from an ACK'd DATA packet. + */ +static void rxrpc_add_data_rtt_sample(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + int ix) +{ + ktime_t xmit_ts = ktime_add_us(tq->xmit_ts_base, tq->segment_xmit_ts[ix]); + + rxrpc_call_add_rtt(call, rxrpc_rtt_rx_data_ack, -1, + summary->acked_serial, summary->ack_serial, + xmit_ts, call->acks_latest_ts); + __clear_bit(ix, &tq->rtt_samples); /* Prevent repeat RTT sample */ } /* @@ -217,37 +220,120 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call) static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to, struct rxrpc_ack_summary *summary) { - struct rxrpc_txbuf *txb; - bool rot_last = false; + struct rxrpc_txqueue *tq = call->tx_queue; + rxrpc_seq_t seq = call->tx_bottom + 1; + bool rot_last = false, trace = false; - list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) { - if (before_eq(txb->seq, call->acks_hard_ack)) - continue; - if (txb->flags & RXRPC_LAST_PACKET) { + _enter("%x,%x", call->tx_bottom, to); + + trace_rxrpc_tx_rotate(call, seq, to); + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate); + + if (call->acks_lowest_nak == call->tx_bottom) { + call->acks_lowest_nak = to; + } else if (after(to, call->acks_lowest_nak)) { + summary->new_low_snack = true; + call->acks_lowest_nak = to; + } + + /* We may have a left over fully-consumed buffer at the front that we + * couldn't drop before (rotate_and_keep below). + */ + if (seq == call->tx_qbase + RXRPC_NR_TXQUEUE) { + call->tx_qbase += RXRPC_NR_TXQUEUE; + call->tx_queue = tq->next; + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_free); + kfree(tq); + tq = call->tx_queue; + } + + do { + unsigned int ix = seq - call->tx_qbase; + + _debug("tq=%x seq=%x i=%d f=%x", tq->qbase, seq, ix, tq->bufs[ix]->flags); + if (tq->bufs[ix]->flags & RXRPC_LAST_PACKET) { set_bit(RXRPC_CALL_TX_LAST, &call->flags); rot_last = true; } - if (txb->seq == to) - break; - } - if (rot_last) - set_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags); + if (summary->acked_serial == tq->segment_serial[ix] && + test_bit(ix, &tq->rtt_samples)) + rxrpc_add_data_rtt_sample(call, summary, tq, ix); + + if (ix == tq->nr_reported_acks) { + /* Packet directly hard ACK'd. */ + tq->nr_reported_acks++; + rxrpc_input_rack_one(call, summary, tq, ix); + if (seq == call->tlp_seq) + summary->tlp_probe_acked = true; + summary->nr_new_hacks++; + __set_bit(ix, &tq->segment_acked); + trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_hack); + } else if (test_bit(ix, &tq->segment_acked)) { + /* Soft ACK -> hard ACK. */ + call->acks_nr_sacks--; + trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_sack); + } else { + /* Soft NAK -> hard ACK. */ + call->acks_nr_snacks--; + rxrpc_input_rack_one(call, summary, tq, ix); + if (seq == call->tlp_seq) + summary->tlp_probe_acked = true; + summary->nr_new_hacks++; + __set_bit(ix, &tq->segment_acked); + trace_rxrpc_rotate(call, tq, summary, seq, rxrpc_rotate_trace_snak); + } - _enter("%x,%x,%x,%d", to, call->acks_hard_ack, call->tx_top, rot_last); + call->tx_nr_sent--; + if (__test_and_clear_bit(ix, &tq->segment_lost)) + call->tx_nr_lost--; + if (__test_and_clear_bit(ix, &tq->segment_retransmitted)) + call->tx_nr_resent--; + __clear_bit(ix, &tq->ever_retransmitted); - if (call->acks_lowest_nak == call->acks_hard_ack) { - call->acks_lowest_nak = to; - } else if (after(to, call->acks_lowest_nak)) { - summary->new_low_nack = true; - call->acks_lowest_nak = to; + rxrpc_put_txbuf(tq->bufs[ix], rxrpc_txbuf_put_rotated); + tq->bufs[ix] = NULL; + + WRITE_ONCE(call->tx_bottom, seq); + trace_rxrpc_txqueue(call, (rot_last ? + rxrpc_txqueue_rotate_last : + rxrpc_txqueue_rotate)); + + seq++; + trace = true; + if (!(seq & RXRPC_TXQ_MASK)) { + trace_rxrpc_rack_update(call, summary); + trace = false; + prefetch(tq->next); + if (tq != call->tx_qtail) { + call->tx_qbase += RXRPC_NR_TXQUEUE; + call->tx_queue = tq->next; + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_free); + kfree(tq); + tq = call->tx_queue; + } else { + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_keep); + tq = NULL; + break; + } + } + + } while (before_eq(seq, to)); + + if (trace) + trace_rxrpc_rack_update(call, summary); + + if (rot_last) { + set_bit(RXRPC_CALL_TX_ALL_ACKED, &call->flags); + if (tq) { + trace_rxrpc_tq(call, tq, seq, rxrpc_tq_rotate_and_free); + kfree(tq); + call->tx_queue = NULL; + } } - smp_store_release(&call->acks_hard_ack, to); + _debug("%x,%x,%x,%d", to, call->tx_bottom, call->tx_top, rot_last); - trace_rxrpc_txqueue(call, (rot_last ? - rxrpc_txqueue_rotate_last : - rxrpc_txqueue_rotate)); wake_up(&call->waitq); return rot_last; } @@ -263,13 +349,10 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, { ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags)); - call->resend_at = KTIME_MAX; - trace_rxrpc_timer_can(call, rxrpc_timer_trace_resend); - - if (unlikely(call->cong_last_nack)) { - rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); - call->cong_last_nack = NULL; - } + call->rack_timer_mode = RXRPC_CALL_RACKTIMER_OFF; + call->rack_timo_at = KTIME_MAX; + trace_rxrpc_rack_timer(call, 0, false); + trace_rxrpc_timer_can(call, rxrpc_timer_trace_rack_off + call->rack_timer_mode); switch (__rxrpc_call_state(call)) { case RXRPC_CALL_CLIENT_SEND_REQUEST: @@ -365,11 +448,19 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); bool last = sp->hdr.flags & RXRPC_LAST_PACKET; + spin_lock_irq(&call->recvmsg_queue.lock); + __skb_queue_tail(&call->recvmsg_queue, skb); rxrpc_input_update_ack_window(call, window, wtop); trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq); if (last) + /* Change the state inside the lock so that recvmsg syncs + * correctly with it and using sendmsg() to send a reply + * doesn't race. + */ rxrpc_end_rx_phase(call, sp->hdr.serial); + + spin_unlock_irq(&call->recvmsg_queue.lock); } /* @@ -442,7 +533,6 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg); - spin_lock(&call->recvmsg_queue.lock); rxrpc_input_queue_data(call, skb, window, wtop, rxrpc_receive_queue); *_notify = true; @@ -464,8 +554,6 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb, rxrpc_receive_queue_oos); } - spin_unlock(&call->recvmsg_queue.lock); - call->ackr_sack_base = sack; } else { unsigned int slot; @@ -530,7 +618,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb unsigned int offset = sizeof(struct rxrpc_wire_header); unsigned int len = skb->len - offset; bool notify = false; - int ack_reason = 0; + int ack_reason = 0, count = 1, stat_ix; while (sp->hdr.flags & RXRPC_JUMBO_PACKET) { if (len < RXRPC_JUMBO_SUBPKTLEN) @@ -559,12 +647,16 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb sp->hdr.serial++; offset += RXRPC_JUMBO_SUBPKTLEN; len -= RXRPC_JUMBO_SUBPKTLEN; + count++; } sp->offset = offset; sp->len = len; rxrpc_input_data_one(call, skb, ¬ify, &ack_serial, &ack_reason); + stat_ix = umin(count, ARRAY_SIZE(call->rxnet->stat_rx_jumbo)) - 1; + atomic_inc(&call->rxnet->stat_rx_jumbo[stat_ix]); + if (ack_reason > 0) { rxrpc_send_ACK(call, ack_reason, ack_serial, rxrpc_propose_ack_input_data); @@ -573,7 +665,7 @@ static bool rxrpc_input_split_jumbo(struct rxrpc_call *call, struct sk_buff *skb rxrpc_propose_delay_ACK(call, sp->hdr.serial, rxrpc_propose_ack_input_data); } - if (notify) { + if (notify && !test_bit(RXRPC_CALL_CONN_CHALLENGING, &call->flags)) { trace_rxrpc_notify_socket(call->debug_id, sp->hdr.serial); rxrpc_notify_socket(call); } @@ -667,7 +759,7 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call, clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); smp_mb(); /* Read data before setting avail bit */ set_bit(i, &call->rtt_avail); - rxrpc_peer_add_rtt(call, type, i, acked_serial, ack_serial, + rxrpc_call_add_rtt(call, type, i, acked_serial, ack_serial, sent_at, resp_time); matched = true; } @@ -677,7 +769,7 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call, */ if (after(acked_serial, orig_serial)) { trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_obsolete, i, - orig_serial, acked_serial, 0, 0); + orig_serial, acked_serial, 0, 0, 0); clear_bit(i + RXRPC_CALL_RTT_PEND_SHIFT, &call->rtt_avail); smp_wmb(); set_bit(i, &call->rtt_avail); @@ -685,7 +777,7 @@ static void rxrpc_complete_rtt_probe(struct rxrpc_call *call, } if (!matched) - trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_lost, 9, 0, acked_serial, 0, 0); + trace_rxrpc_rtt_rx(call, rxrpc_rtt_rx_lost, 9, 0, acked_serial, 0, 0, 0); } /* @@ -695,10 +787,13 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb struct rxrpc_acktrailer *trailer) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - struct rxrpc_peer *peer; - unsigned int mtu; + struct rxrpc_peer *peer = call->peer; + unsigned int max_data, capacity; bool wake = false; - u32 rwind = ntohl(trailer->rwind); + u32 max_mtu = ntohl(trailer->maxMTU); + //u32 if_mtu = ntohl(trailer->ifMTU); + u32 rwind = ntohl(trailer->rwind); + u32 jumbo_max = ntohl(trailer->jumbo_max); if (rwind > RXRPC_TX_MAX_WINDOW) rwind = RXRPC_TX_MAX_WINDOW; @@ -709,54 +804,147 @@ static void rxrpc_input_ack_trailer(struct rxrpc_call *call, struct sk_buff *skb call->tx_winsize = rwind; } - mtu = min(ntohl(trailer->maxMTU), ntohl(trailer->ifMTU)); + max_mtu = clamp(max_mtu, 500, 65535); + peer->ackr_max_data = max_mtu; + + if (max_mtu < peer->max_data) { + trace_rxrpc_pmtud_reduce(peer, sp->hdr.serial, max_mtu, + rxrpc_pmtud_reduce_ack); + peer->max_data = max_mtu; + } + + max_data = umin(max_mtu, peer->max_data); + capacity = max_data; + capacity += sizeof(struct rxrpc_jumbo_header); /* First subpacket has main hdr, not jumbo */ + capacity /= sizeof(struct rxrpc_jumbo_header) + RXRPC_JUMBO_DATALEN; - peer = call->peer; - if (mtu < peer->maxdata) { - spin_lock(&peer->lock); - peer->maxdata = mtu; - peer->mtu = mtu + peer->hdrsize; - spin_unlock(&peer->lock); + if (jumbo_max == 0) { + /* The peer says it supports pmtu discovery */ + peer->ackr_adv_pmtud = true; + } else { + peer->ackr_adv_pmtud = false; + capacity = clamp(capacity, 1, jumbo_max); } + call->tx_jumbo_max = capacity; + if (wake) wake_up(&call->waitq); } +#if defined(CONFIG_X86) && __GNUC__ && !defined(__clang__) +/* Clang doesn't support the %z constraint modifier */ +#define shiftr_adv_rotr(shift_from, rotate_into) ({ \ + asm(" shr%z1 %1\n" \ + " inc %0\n" \ + " rcr%z2 %2\n" \ + : "+d"(shift_from), "+m"(*(shift_from)), "+rm"(rotate_into) \ + ); \ + }) +#else +#define shiftr_adv_rotr(shift_from, rotate_into) ({ \ + typeof(rotate_into) __bit0 = *(shift_from) & 1; \ + *(shift_from) >>= 1; \ + shift_from++; \ + rotate_into >>= 1; \ + rotate_into |= __bit0 << (sizeof(rotate_into) * 8 - 1); \ + }) +#endif + /* - * Determine how many nacks from the previous ACK have now been satisfied. + * Deal with RTT samples from soft ACKs. */ -static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, - struct rxrpc_ack_summary *summary, - rxrpc_seq_t seq) +static void rxrpc_input_soft_rtt(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq) { - struct sk_buff *skb = call->cong_last_nack; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int i, new_acks = 0, retained_nacks = 0; - rxrpc_seq_t old_seq = sp->ack.first_ack; - u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); + for (int ix = 0; ix < RXRPC_NR_TXQUEUE; ix++) + if (summary->acked_serial == tq->segment_serial[ix]) + return rxrpc_add_data_rtt_sample(call, summary, tq, ix); +} - if (after_eq(seq, old_seq + sp->ack.nr_acks)) { - summary->nr_new_acks += sp->ack.nr_nacks; - summary->nr_new_acks += seq - (old_seq + sp->ack.nr_acks); - summary->nr_retained_nacks = 0; - } else if (seq == old_seq) { - summary->nr_retained_nacks = sp->ack.nr_nacks; - } else { - for (i = 0; i < sp->ack.nr_acks; i++) { - if (acks[i] == RXRPC_ACK_TYPE_NACK) { - if (before(old_seq + i, seq)) - new_acks++; - else - retained_nacks++; - } +/* + * Process a batch of soft ACKs specific to a transmission queue segment. + */ +static void rxrpc_input_soft_ack_tq(struct rxrpc_call *call, + struct rxrpc_ack_summary *summary, + struct rxrpc_txqueue *tq, + unsigned long extracted_acks, + int nr_reported, + rxrpc_seq_t seq, + rxrpc_seq_t *lowest_nak) +{ + unsigned long old_reported = 0, flipped, new_acks = 0; + unsigned long a_to_n, n_to_a = 0; + int new, a, n; + + if (tq->nr_reported_acks > 0) + old_reported = ~0UL >> (RXRPC_NR_TXQUEUE - tq->nr_reported_acks); + + _enter("{%x,%lx,%d},%lx,%d,%x", + tq->qbase, tq->segment_acked, tq->nr_reported_acks, + extracted_acks, nr_reported, seq); + + _debug("[%x]", tq->qbase); + _debug("tq %16lx %u", tq->segment_acked, tq->nr_reported_acks); + _debug("sack %16lx %u", extracted_acks, nr_reported); + + /* See how many previously logged ACKs/NAKs have flipped. */ + flipped = (tq->segment_acked ^ extracted_acks) & old_reported; + if (flipped) { + n_to_a = ~tq->segment_acked & flipped; /* Old NAK -> ACK */ + a_to_n = tq->segment_acked & flipped; /* Old ACK -> NAK */ + a = hweight_long(n_to_a); + n = hweight_long(a_to_n); + _debug("flip %16lx", flipped); + _debug("ntoa %16lx %d", n_to_a, a); + _debug("aton %16lx %d", a_to_n, n); + call->acks_nr_sacks += a - n; + call->acks_nr_snacks += n - a; + summary->nr_new_sacks += a; + summary->nr_new_snacks += n; + } + + /* See how many new ACKs/NAKs have been acquired. */ + new = nr_reported - tq->nr_reported_acks; + if (new > 0) { + new_acks = extracted_acks & ~old_reported; + if (new_acks) { + a = hweight_long(new_acks); + n = new - a; + _debug("new_a %16lx new=%d a=%d n=%d", new_acks, new, a, n); + call->acks_nr_sacks += a; + call->acks_nr_snacks += n; + summary->nr_new_sacks += a; + summary->nr_new_snacks += n; + } else { + call->acks_nr_snacks += new; + summary->nr_new_snacks += new; } + } + + tq->nr_reported_acks = nr_reported; + tq->segment_acked = extracted_acks; + trace_rxrpc_apply_acks(call, tq); - summary->nr_new_acks += new_acks; - summary->nr_retained_nacks = retained_nacks; + if (extracted_acks != ~0UL) { + rxrpc_seq_t lowest = seq + ffz(extracted_acks); + + if (before(lowest, *lowest_nak)) + *lowest_nak = lowest; } - return old_seq + sp->ack.nr_acks; + if (summary->acked_serial) + rxrpc_input_soft_rtt(call, summary, tq); + + new_acks |= n_to_a; + if (new_acks) + rxrpc_input_rack(call, summary, tq, new_acks); + + if (call->tlp_serial && + rxrpc_seq_in_txq(tq, call->tlp_seq) && + test_bit(call->tlp_seq - tq->qbase, &new_acks)) + summary->tlp_probe_acked = true; } /* @@ -770,39 +958,50 @@ static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call, */ static void rxrpc_input_soft_acks(struct rxrpc_call *call, struct rxrpc_ack_summary *summary, - struct sk_buff *skb, - rxrpc_seq_t seq, - rxrpc_seq_t since) + struct sk_buff *skb) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); - unsigned int i, old_nacks = 0; + struct rxrpc_txqueue *tq = call->tx_queue; + unsigned long extracted = ~0UL; + unsigned int nr = 0; + rxrpc_seq_t seq = call->acks_hard_ack + 1; rxrpc_seq_t lowest_nak = seq + sp->ack.nr_acks; u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); - for (i = 0; i < sp->ack.nr_acks; i++) { - if (acks[i] == RXRPC_ACK_TYPE_ACK) { - summary->nr_acks++; - if (after_eq(seq, since)) - summary->nr_new_acks++; - } else { - summary->saw_nacks = true; - if (before(seq, since)) { - /* Overlap with previous ACK */ - old_nacks++; - } else { - summary->nr_new_nacks++; - sp->ack.nr_nacks++; - } + _enter("%x,%x,%u", tq->qbase, seq, sp->ack.nr_acks); + + while (after(seq, tq->qbase + RXRPC_NR_TXQUEUE - 1)) + tq = tq->next; - if (before(seq, lowest_nak)) - lowest_nak = seq; + for (unsigned int i = 0; i < sp->ack.nr_acks; i++) { + /* Decant ACKs until we hit a txqueue boundary. */ + shiftr_adv_rotr(acks, extracted); + if (i == 256) { + acks -= i; + i = 0; } seq++; + nr++; + if ((seq & RXRPC_TXQ_MASK) != 0) + continue; + + _debug("bound %16lx %u", extracted, nr); + + rxrpc_input_soft_ack_tq(call, summary, tq, extracted, RXRPC_NR_TXQUEUE, + seq - RXRPC_NR_TXQUEUE, &lowest_nak); + extracted = ~0UL; + nr = 0; + tq = tq->next; + prefetch(tq); } - if (lowest_nak != call->acks_lowest_nak) { - call->acks_lowest_nak = lowest_nak; - summary->new_low_nack = true; + if (nr) { + unsigned int nr_reported = seq & RXRPC_TXQ_MASK; + + extracted >>= RXRPC_NR_TXQUEUE - nr_reported; + _debug("tail %16lx %u", extracted, nr_reported); + rxrpc_input_soft_ack_tq(call, summary, tq, extracted, nr_reported, + seq & ~RXRPC_TXQ_MASK, &lowest_nak); } /* We *can* have more nacks than we did - the peer is permitted to drop @@ -810,9 +1009,14 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, * possible for the nack distribution to change whilst the number of * nacks stays the same or goes down. */ - if (old_nacks < summary->nr_retained_nacks) - summary->nr_new_acks += summary->nr_retained_nacks - old_nacks; - summary->nr_retained_nacks = old_nacks; + if (lowest_nak != call->acks_lowest_nak) { + call->acks_lowest_nak = lowest_nak; + summary->new_low_snack = true; + } + + _debug("summary A=%d+%d N=%d+%d", + call->acks_nr_sacks, summary->nr_new_sacks, + call->acks_nr_snacks, summary->nr_new_snacks); } /* @@ -820,21 +1024,21 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, * with respect to the ack state conveyed by preceding ACKs. */ static bool rxrpc_is_ack_valid(struct rxrpc_call *call, - rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt) + rxrpc_seq_t hard_ack, rxrpc_seq_t prev_pkt) { - rxrpc_seq_t base = READ_ONCE(call->acks_first_seq); + rxrpc_seq_t base = READ_ONCE(call->acks_hard_ack); - if (after(first_pkt, base)) + if (after(hard_ack, base)) return true; /* The window advanced */ - if (before(first_pkt, base)) + if (before(hard_ack, base)) return false; /* firstPacket regressed */ if (after_eq(prev_pkt, call->acks_prev_seq)) return true; /* previousPacket hasn't regressed. */ /* Some rx implementations put a serial number in previousPacket. */ - if (after_eq(prev_pkt, base + call->tx_winsize)) + if (after(prev_pkt, base + call->tx_winsize)) return false; return true; } @@ -852,53 +1056,34 @@ static bool rxrpc_is_ack_valid(struct rxrpc_call *call, static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) { struct rxrpc_ack_summary summary = { 0 }; - struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_acktrailer trailer; - rxrpc_serial_t ack_serial, acked_serial; - rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since; + struct rxrpc_skb_priv *sp = rxrpc_skb(skb); + rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt; int nr_acks, offset, ioffset; _enter(""); offset = sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket); - ack_serial = sp->hdr.serial; - acked_serial = sp->ack.acked_serial; - first_soft_ack = sp->ack.first_ack; - prev_pkt = sp->ack.prev_ack; - nr_acks = sp->ack.nr_acks; - hard_ack = first_soft_ack - 1; - summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ? - sp->ack.reason : RXRPC_ACK__INVALID); - - trace_rxrpc_rx_ack(call, ack_serial, acked_serial, - first_soft_ack, prev_pkt, - summary.ack_reason, nr_acks); - rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]); + summary.ack_serial = sp->hdr.serial; + first_soft_ack = sp->ack.first_ack; + prev_pkt = sp->ack.prev_ack; + nr_acks = sp->ack.nr_acks; + hard_ack = first_soft_ack - 1; + summary.acked_serial = sp->ack.acked_serial; + summary.ack_reason = (sp->ack.reason < RXRPC_ACK__INVALID ? + sp->ack.reason : RXRPC_ACK__INVALID); - if (acked_serial != 0) { - switch (summary.ack_reason) { - case RXRPC_ACK_PING_RESPONSE: - rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, - rxrpc_rtt_rx_ping_response); - break; - case RXRPC_ACK_REQUESTED: - rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, - rxrpc_rtt_rx_requested_ack); - break; - default: - rxrpc_complete_rtt_probe(call, skb->tstamp, acked_serial, ack_serial, - rxrpc_rtt_rx_other_ack); - break; - } - } + trace_rxrpc_rx_ack(call, sp); + rxrpc_inc_stat(call->rxnet, stat_rx_acks[summary.ack_reason]); + prefetch(call->tx_queue); /* If we get an EXCEEDS_WINDOW ACK from the server, it probably * indicates that the client address changed due to NAT. The server * lost the call because it switched to a different peer. */ if (unlikely(summary.ack_reason == RXRPC_ACK_EXCEEDS_WINDOW) && - first_soft_ack == 1 && + hard_ack == 0 && prev_pkt == 0 && rxrpc_is_client_call(call)) { rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, @@ -911,9 +1096,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) * if we still have it buffered to the beginning. */ if (unlikely(summary.ack_reason == RXRPC_ACK_OUT_OF_SEQUENCE) && - first_soft_ack == 1 && + hard_ack == 0 && prev_pkt == 0 && - call->acks_hard_ack == 0 && + call->tx_bottom == 0 && rxrpc_is_client_call(call)) { rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED, 0, -ENETRESET); @@ -921,11 +1106,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) } /* Discard any out-of-order or duplicate ACKs (outside lock). */ - if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { - trace_rxrpc_rx_discard_ack(call->debug_id, ack_serial, - first_soft_ack, call->acks_first_seq, - prev_pkt, call->acks_prev_seq); - goto send_response; + if (!rxrpc_is_ack_valid(call, hard_ack, prev_pkt)) { + trace_rxrpc_rx_discard_ack(call, summary.ack_serial, hard_ack, prev_pkt); + goto send_response; /* Still respond if requested. */ } trailer.maxMTU = 0; @@ -937,34 +1120,30 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (nr_acks > 0) skb_condense(skb); - if (call->cong_last_nack) { - since = rxrpc_input_check_prev_ack(call, &summary, first_soft_ack); - rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack); - call->cong_last_nack = NULL; - } else { - summary.nr_new_acks = first_soft_ack - call->acks_first_seq; - call->acks_lowest_nak = first_soft_ack + nr_acks; - since = first_soft_ack; - } - - call->acks_latest_ts = skb->tstamp; - call->acks_first_seq = first_soft_ack; + call->acks_latest_ts = ktime_get_real(); + call->acks_hard_ack = hard_ack; call->acks_prev_seq = prev_pkt; - switch (summary.ack_reason) { - case RXRPC_ACK_PING: - break; - default: - if (acked_serial && after(acked_serial, call->acks_highest_serial)) - call->acks_highest_serial = acked_serial; - break; + if (summary.acked_serial) { + switch (summary.ack_reason) { + case RXRPC_ACK_PING_RESPONSE: + rxrpc_complete_rtt_probe(call, call->acks_latest_ts, + summary.acked_serial, summary.ack_serial, + rxrpc_rtt_rx_ping_response); + break; + default: + if (after(summary.acked_serial, call->acks_highest_serial)) + call->acks_highest_serial = summary.acked_serial; + summary.rtt_sample_avail = true; + break; + } } /* Parse rwind and mtu sizes if provided. */ if (trailer.maxMTU) rxrpc_input_ack_trailer(call, skb, &trailer); - if (first_soft_ack == 0) + if (hard_ack + 1 == 0) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero); /* Ignore ACKs unless we are or have just been transmitting. */ @@ -978,13 +1157,13 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) goto send_response; } - if (before(hard_ack, call->acks_hard_ack) || + if (before(hard_ack, call->tx_bottom) || after(hard_ack, call->tx_top)) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_outside_window); if (nr_acks > call->tx_top - hard_ack) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow); - if (after(hard_ack, call->acks_hard_ack)) { + if (after(hard_ack, call->tx_bottom)) { if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) { rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack); goto send_response; @@ -994,25 +1173,30 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) if (nr_acks > 0) { if (offset > (int)skb->len - nr_acks) return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack); - rxrpc_input_soft_acks(call, &summary, skb, first_soft_ack, since); - rxrpc_get_skb(skb, rxrpc_skb_get_last_nack); - call->cong_last_nack = skb; + rxrpc_input_soft_acks(call, &summary, skb); } if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) && - summary.nr_acks == call->tx_top - hard_ack && + call->acks_nr_sacks == call->tx_top - hard_ack && rxrpc_is_client_call(call)) - rxrpc_propose_ping(call, ack_serial, + rxrpc_propose_ping(call, summary.ack_serial, rxrpc_propose_ack_ping_for_lost_reply); - rxrpc_congestion_management(call, skb, &summary, acked_serial); + /* Drive the congestion management algorithm first and then RACK-TLP as + * the latter depends on the state/change in state in the former. + */ + rxrpc_congestion_management(call, &summary); + rxrpc_rack_detect_loss_and_arm_timer(call, &summary); + rxrpc_tlp_process_ack(call, &summary); + if (call->tlp_serial && after_eq(summary.acked_serial, call->tlp_serial)) + call->tlp_serial = 0; send_response: if (summary.ack_reason == RXRPC_ACK_PING) - rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, ack_serial, + rxrpc_send_ACK(call, RXRPC_ACK_PING_RESPONSE, summary.ack_serial, rxrpc_propose_ack_respond_to_ping); else if (sp->hdr.flags & RXRPC_REQUEST_ACK) - rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, ack_serial, + rxrpc_send_ACK(call, RXRPC_ACK_REQUESTED, summary.ack_serial, rxrpc_propose_ack_respond_to_ack); } @@ -1111,5 +1295,5 @@ void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb) break; } - rxrpc_input_call_event(call, skb); + rxrpc_input_call_event(call); } |