diff options
author | Jakub Kicinski <kuba@kernel.org> | 2023-06-21 22:45:59 -0700 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2023-06-21 22:46:00 -0700 |
commit | 98e95872f2b818c74872d073eaa4c937579d41fc (patch) | |
tree | 3d00ae3cdbad3686021de3ee04a2ddd429e53979 /tools | |
parent | 5dfbbaa208f5429a02ccb410ae3515222bbe64ef (diff) | |
parent | 528cb5f2a1e859522f36f091f29f5c81ec6d4a4c (diff) | |
download | lwn-98e95872f2b818c74872d073eaa4c937579d41fc.tar.gz lwn-98e95872f2b818c74872d073eaa4c937579d41fc.zip |
Merge branch 'mptcp-expose-more-info-and-small-improvements'
Matthieu Baerts says:
====================
mptcp: expose more info and small improvements
Patch 1-3/9 track and expose some aggregated data counters at the MPTCP
level: the number of retransmissions and the bytes that have been
transferred. The first patch prepares the work by moving where snd_una
is updated for fallback sockets while the last patch adds some tests to
cover the new code.
Patch 4-6/9 introduce a new getsockopt for SOL_MPTCP: MPTCP_FULL_INFO.
This new socket option allows to combine info from MPTCP_INFO,
MPTCP_TCPINFO and MPTCP_SUBFLOW_ADDRS socket options into one. It can be
needed to have all info in one because the path-manager can close and
re-create subflows between getsockopt() and fooling the accounting. The
first patch introduces a unique subflow ID to easily detect when
subflows are being re-created with the same 5-tuple while the last patch
adds some tests to cover the new code.
Please note that patch 5/9 ("mptcp: introduce MPTCP_FULL_INFO getsockopt")
can reveal a bug that were there for a bit of time, see [1]. A fix has
recently been fixed to netdev for the -net tree: "mptcp: ensure listener
is unhashed before updating the sk status", see [2]. There is no
conflicts between the two patches but it might be better to apply this
series after the one for -net and after having merged "net" into
"net-next".
Patch 7/9 is similar to commit 47867f0a7e83 ("selftests: mptcp: join:
skip check if MIB counter not supported") recently applied in the -net
tree but here it adapts the new code that is only in net-next (and it
fixes a merge conflict resolution which didn't have any impact).
Patch 8 and 9/9 are two simple refactoring. One to consolidate the
transition to TCP_CLOSE in mptcp_do_fastclose() and avoid duplicated
code. The other one reduces the scope of an argument passed to
mptcp_pm_alloc_anno_list() function.
Link: https://github.com/multipath-tcp/mptcp_net-next/issues/407 [1]
Link: https://lore.kernel.org/netdev/20230620-upstream-net-20230620-misc-fixes-for-v6-4-v1-0-f36aa5eae8b9@tessares.net/ [2]
====================
Link: https://lore.kernel.org/r/20230620-upstream-net-next-20230620-mptcp-expose-more-info-and-misc-v1-0-62b9444bfd48@tessares.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'tools')
-rwxr-xr-x | tools/testing/selftests/net/mptcp/mptcp_join.sh | 33 | ||||
-rw-r--r-- | tools/testing/selftests/net/mptcp/mptcp_sockopt.c | 120 |
2 files changed, 134 insertions, 19 deletions
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 1b68fe1c0885..a7973d6a40a0 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -1683,12 +1683,12 @@ chk_add_tx_nr() timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) printf "%-${nr_blank}s %s" " " "add TX" - count=$(ip netns exec $ns1 nstat -as MPTcpExtAddAddrTx | grep MPTcpExtAddAddrTx | awk '{print $2}') - [ -z "$count" ] && count=0 - + count=$(get_counter ${ns1} "MPTcpExtAddAddrTx") + if [ -z "$count" ]; then + echo -n "[skip]" # if the test configured a short timeout tolerate greater then expected # add addrs options, due to retransmissions - if [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then + elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then echo "[fail] got $count ADD_ADDR[s] TX, expected $add_tx_nr" fail_test else @@ -1696,9 +1696,10 @@ chk_add_tx_nr() fi echo -n " - echo TX " - count=$(ip netns exec $ns2 nstat -as MPTcpExtEchoAddTx | grep MPTcpExtEchoAddTx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$echo_tx_nr" ]; then + count=$(get_counter ${ns2} "MPTcpExtEchoAddTx") + if [ -z "$count" ]; then + echo "[skip]" + elif [ "$count" != "$echo_tx_nr" ]; then echo "[fail] got $count ADD_ADDR echo[s] TX, expected $echo_tx_nr" fail_test else @@ -1734,9 +1735,10 @@ chk_rm_nr() fi printf "%-${nr_blank}s %s" " " "rm " - count=$(ip netns exec $addr_ns nstat -as MPTcpExtRmAddr | grep MPTcpExtRmAddr | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$rm_addr_nr" ]; then + count=$(get_counter ${addr_ns} "MPTcpExtRmAddr") + if [ -z "$count" ]; then + echo -n "[skip]" + elif [ "$count" != "$rm_addr_nr" ]; then echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr" fail_test else @@ -1778,16 +1780,15 @@ chk_rm_tx_nr() local rm_addr_tx_nr=$1 printf "%-${nr_blank}s %s" " " "rm TX " - count=$(ip netns exec $ns2 nstat -as MPTcpExtRmAddrTx | grep MPTcpExtRmAddrTx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ "$count" != "$rm_addr_tx_nr" ]; then + count=$(get_counter ${ns2} "MPTcpExtRmAddrTx") + if [ -z "$count" ]; then + echo "[skip]" + elif [ "$count" != "$rm_addr_tx_nr" ]; then echo "[fail] got $count RM_ADDR[s] expected $rm_addr_tx_nr" fail_test else - echo -n "[ ok ]" + echo "[ ok ]" fi - - echo "$extra_msg" } chk_prio_nr() diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index b35148edbf02..926b0be87c99 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -51,6 +51,11 @@ struct mptcp_info { __u8 mptcpi_local_addr_used; __u8 mptcpi_local_addr_max; __u8 mptcpi_csum_enabled; + __u32 mptcpi_retransmits; + __u64 mptcpi_bytes_retrans; + __u64 mptcpi_bytes_sent; + __u64 mptcpi_bytes_received; + __u64 mptcpi_bytes_acked; }; struct mptcp_subflow_data { @@ -81,10 +86,41 @@ struct mptcp_subflow_addrs { #define MPTCP_SUBFLOW_ADDRS 3 #endif +#ifndef MPTCP_FULL_INFO +struct mptcp_subflow_info { + __u32 id; + struct mptcp_subflow_addrs addrs; +}; + +struct mptcp_full_info { + __u32 size_tcpinfo_kernel; /* must be 0, set by kernel */ + __u32 size_tcpinfo_user; + __u32 size_sfinfo_kernel; /* must be 0, set by kernel */ + __u32 size_sfinfo_user; + __u32 num_subflows; /* must be 0, set by kernel (real subflow count) */ + __u32 size_arrays_user; /* max subflows that userspace is interested in; + * the buffers at subflow_info/tcp_info + * are respectively at least: + * size_arrays * size_sfinfo_user + * size_arrays * size_tcpinfo_user + * bytes wide + */ + __aligned_u64 subflow_info; + __aligned_u64 tcp_info; + struct mptcp_info mptcp_info; +}; + +#define MPTCP_FULL_INFO 4 +#endif + struct so_state { struct mptcp_info mi; + struct mptcp_info last_sample; + struct tcp_info tcp_info; + struct mptcp_subflow_addrs addrs; uint64_t mptcpi_rcv_delta; uint64_t tcpi_rcv_delta; + bool pkt_stats_avail; }; #ifndef MIN @@ -322,8 +358,9 @@ static void do_getsockopt_mptcp_info(struct so_state *s, int fd, size_t w) if (ret < 0) die_perror("getsockopt MPTCP_INFO"); - assert(olen == sizeof(i)); + s->pkt_stats_avail = olen >= sizeof(i); + s->last_sample = i; if (s->mi.mptcpi_write_seq == 0) s->mi = i; @@ -362,6 +399,8 @@ static void do_getsockopt_tcp_info(struct so_state *s, int fd, size_t r, size_t olen -= sizeof(struct mptcp_subflow_data); assert(olen == ti.d.size_user); + s->tcp_info = ti.ti[0]; + if (ti.ti[0].tcpi_bytes_sent == w && ti.ti[0].tcpi_bytes_received == r) goto done; @@ -383,7 +422,7 @@ done: do_getsockopt_bogus_sf_data(fd, MPTCP_TCPINFO); } -static void do_getsockopt_subflow_addrs(int fd) +static void do_getsockopt_subflow_addrs(struct so_state *s, int fd) { struct sockaddr_storage remote, local; socklen_t olen, rlen, llen; @@ -431,6 +470,7 @@ static void do_getsockopt_subflow_addrs(int fd) assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) == 0); assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) == 0); + s->addrs = addrs.addr[0]; memset(&addrs, 0, sizeof(addrs)); @@ -451,13 +491,70 @@ static void do_getsockopt_subflow_addrs(int fd) do_getsockopt_bogus_sf_data(fd, MPTCP_SUBFLOW_ADDRS); } +static void do_getsockopt_mptcp_full_info(struct so_state *s, int fd) +{ + size_t data_size = sizeof(struct mptcp_full_info); + struct mptcp_subflow_info sfinfo[2]; + struct tcp_info tcp_info[2]; + struct mptcp_full_info mfi; + socklen_t olen; + int ret; + + memset(&mfi, 0, data_size); + memset(tcp_info, 0, sizeof(tcp_info)); + memset(sfinfo, 0, sizeof(sfinfo)); + + mfi.size_tcpinfo_user = sizeof(struct tcp_info); + mfi.size_sfinfo_user = sizeof(struct mptcp_subflow_info); + mfi.size_arrays_user = 2; + mfi.subflow_info = (unsigned long)&sfinfo[0]; + mfi.tcp_info = (unsigned long)&tcp_info[0]; + olen = data_size; + + ret = getsockopt(fd, SOL_MPTCP, MPTCP_FULL_INFO, &mfi, &olen); + if (ret < 0) { + if (errno == EOPNOTSUPP) { + perror("MPTCP_FULL_INFO test skipped"); + return; + } + xerror("getsockopt MPTCP_FULL_INFO"); + } + + assert(olen <= data_size); + assert(mfi.size_tcpinfo_kernel > 0); + assert(mfi.size_tcpinfo_user == + MIN(mfi.size_tcpinfo_kernel, sizeof(struct tcp_info))); + assert(mfi.size_sfinfo_kernel > 0); + assert(mfi.size_sfinfo_user == + MIN(mfi.size_sfinfo_kernel, sizeof(struct mptcp_subflow_info))); + assert(mfi.num_subflows == 1); + + /* Tolerate future extension to mptcp_info struct and running newer + * test on top of older kernel. + * Anyway any kernel supporting MPTCP_FULL_INFO must at least include + * the following in mptcp_info. + */ + assert(olen > (socklen_t)__builtin_offsetof(struct mptcp_full_info, tcp_info)); + assert(mfi.mptcp_info.mptcpi_subflows == 0); + assert(mfi.mptcp_info.mptcpi_bytes_sent == s->last_sample.mptcpi_bytes_sent); + assert(mfi.mptcp_info.mptcpi_bytes_received == s->last_sample.mptcpi_bytes_received); + + assert(sfinfo[0].id == 1); + assert(tcp_info[0].tcpi_bytes_sent == s->tcp_info.tcpi_bytes_sent); + assert(tcp_info[0].tcpi_bytes_received == s->tcp_info.tcpi_bytes_received); + assert(!memcmp(&sfinfo->addrs, &s->addrs, sizeof(struct mptcp_subflow_addrs))); +} + static void do_getsockopts(struct so_state *s, int fd, size_t r, size_t w) { do_getsockopt_mptcp_info(s, fd, w); do_getsockopt_tcp_info(s, fd, r, w); - do_getsockopt_subflow_addrs(fd); + do_getsockopt_subflow_addrs(s, fd); + + if (r) + do_getsockopt_mptcp_full_info(s, fd); } static void connect_one_server(int fd, int pipefd) @@ -562,6 +659,23 @@ static void process_one_client(int fd, int pipefd) do_getsockopts(&s, fd, ret, ret2); if (s.mptcpi_rcv_delta != (uint64_t)ret + 1) xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret); + + /* be nice when running on top of older kernel */ + if (s.pkt_stats_avail) { + if (s.last_sample.mptcpi_bytes_sent != ret2) + xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64, + s.last_sample.mptcpi_bytes_sent, ret2, + s.last_sample.mptcpi_bytes_sent - ret2); + if (s.last_sample.mptcpi_bytes_received != ret) + xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64, + s.last_sample.mptcpi_bytes_received, ret, + s.last_sample.mptcpi_bytes_received - ret); + if (s.last_sample.mptcpi_bytes_acked != ret) + xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64, + s.last_sample.mptcpi_bytes_acked, ret2, + s.last_sample.mptcpi_bytes_acked - ret2); + } + close(fd); } |