summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2023-06-21 22:45:59 -0700
committerJakub Kicinski <kuba@kernel.org>2023-06-21 22:46:00 -0700
commit98e95872f2b818c74872d073eaa4c937579d41fc (patch)
tree3d00ae3cdbad3686021de3ee04a2ddd429e53979 /tools
parent5dfbbaa208f5429a02ccb410ae3515222bbe64ef (diff)
parent528cb5f2a1e859522f36f091f29f5c81ec6d4a4c (diff)
downloadlwn-98e95872f2b818c74872d073eaa4c937579d41fc.tar.gz
lwn-98e95872f2b818c74872d073eaa4c937579d41fc.zip
Merge branch 'mptcp-expose-more-info-and-small-improvements'
Matthieu Baerts says: ==================== mptcp: expose more info and small improvements Patch 1-3/9 track and expose some aggregated data counters at the MPTCP level: the number of retransmissions and the bytes that have been transferred. The first patch prepares the work by moving where snd_una is updated for fallback sockets while the last patch adds some tests to cover the new code. Patch 4-6/9 introduce a new getsockopt for SOL_MPTCP: MPTCP_FULL_INFO. This new socket option allows to combine info from MPTCP_INFO, MPTCP_TCPINFO and MPTCP_SUBFLOW_ADDRS socket options into one. It can be needed to have all info in one because the path-manager can close and re-create subflows between getsockopt() and fooling the accounting. The first patch introduces a unique subflow ID to easily detect when subflows are being re-created with the same 5-tuple while the last patch adds some tests to cover the new code. Please note that patch 5/9 ("mptcp: introduce MPTCP_FULL_INFO getsockopt") can reveal a bug that were there for a bit of time, see [1]. A fix has recently been fixed to netdev for the -net tree: "mptcp: ensure listener is unhashed before updating the sk status", see [2]. There is no conflicts between the two patches but it might be better to apply this series after the one for -net and after having merged "net" into "net-next". Patch 7/9 is similar to commit 47867f0a7e83 ("selftests: mptcp: join: skip check if MIB counter not supported") recently applied in the -net tree but here it adapts the new code that is only in net-next (and it fixes a merge conflict resolution which didn't have any impact). Patch 8 and 9/9 are two simple refactoring. One to consolidate the transition to TCP_CLOSE in mptcp_do_fastclose() and avoid duplicated code. The other one reduces the scope of an argument passed to mptcp_pm_alloc_anno_list() function. Link: https://github.com/multipath-tcp/mptcp_net-next/issues/407 [1] Link: https://lore.kernel.org/netdev/20230620-upstream-net-20230620-misc-fixes-for-v6-4-v1-0-f36aa5eae8b9@tessares.net/ [2] ==================== Link: https://lore.kernel.org/r/20230620-upstream-net-next-20230620-mptcp-expose-more-info-and-misc-v1-0-62b9444bfd48@tessares.net Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'tools')
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh33
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c120
2 files changed, 134 insertions, 19 deletions
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 1b68fe1c0885..a7973d6a40a0 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1683,12 +1683,12 @@ chk_add_tx_nr()
timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
printf "%-${nr_blank}s %s" " " "add TX"
- count=$(ip netns exec $ns1 nstat -as MPTcpExtAddAddrTx | grep MPTcpExtAddAddrTx | awk '{print $2}')
- [ -z "$count" ] && count=0
-
+ count=$(get_counter ${ns1} "MPTcpExtAddAddrTx")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
# if the test configured a short timeout tolerate greater then expected
# add addrs options, due to retransmissions
- if [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
+ elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
echo "[fail] got $count ADD_ADDR[s] TX, expected $add_tx_nr"
fail_test
else
@@ -1696,9 +1696,10 @@ chk_add_tx_nr()
fi
echo -n " - echo TX "
- count=$(ip netns exec $ns2 nstat -as MPTcpExtEchoAddTx | grep MPTcpExtEchoAddTx | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$echo_tx_nr" ]; then
+ count=$(get_counter ${ns2} "MPTcpExtEchoAddTx")
+ if [ -z "$count" ]; then
+ echo "[skip]"
+ elif [ "$count" != "$echo_tx_nr" ]; then
echo "[fail] got $count ADD_ADDR echo[s] TX, expected $echo_tx_nr"
fail_test
else
@@ -1734,9 +1735,10 @@ chk_rm_nr()
fi
printf "%-${nr_blank}s %s" " " "rm "
- count=$(ip netns exec $addr_ns nstat -as MPTcpExtRmAddr | grep MPTcpExtRmAddr | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$rm_addr_nr" ]; then
+ count=$(get_counter ${addr_ns} "MPTcpExtRmAddr")
+ if [ -z "$count" ]; then
+ echo -n "[skip]"
+ elif [ "$count" != "$rm_addr_nr" ]; then
echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
fail_test
else
@@ -1778,16 +1780,15 @@ chk_rm_tx_nr()
local rm_addr_tx_nr=$1
printf "%-${nr_blank}s %s" " " "rm TX "
- count=$(ip netns exec $ns2 nstat -as MPTcpExtRmAddrTx | grep MPTcpExtRmAddrTx | awk '{print $2}')
- [ -z "$count" ] && count=0
- if [ "$count" != "$rm_addr_tx_nr" ]; then
+ count=$(get_counter ${ns2} "MPTcpExtRmAddrTx")
+ if [ -z "$count" ]; then
+ echo "[skip]"
+ elif [ "$count" != "$rm_addr_tx_nr" ]; then
echo "[fail] got $count RM_ADDR[s] expected $rm_addr_tx_nr"
fail_test
else
- echo -n "[ ok ]"
+ echo "[ ok ]"
fi
-
- echo "$extra_msg"
}
chk_prio_nr()
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
index b35148edbf02..926b0be87c99 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -51,6 +51,11 @@ struct mptcp_info {
__u8 mptcpi_local_addr_used;
__u8 mptcpi_local_addr_max;
__u8 mptcpi_csum_enabled;
+ __u32 mptcpi_retransmits;
+ __u64 mptcpi_bytes_retrans;
+ __u64 mptcpi_bytes_sent;
+ __u64 mptcpi_bytes_received;
+ __u64 mptcpi_bytes_acked;
};
struct mptcp_subflow_data {
@@ -81,10 +86,41 @@ struct mptcp_subflow_addrs {
#define MPTCP_SUBFLOW_ADDRS 3
#endif
+#ifndef MPTCP_FULL_INFO
+struct mptcp_subflow_info {
+ __u32 id;
+ struct mptcp_subflow_addrs addrs;
+};
+
+struct mptcp_full_info {
+ __u32 size_tcpinfo_kernel; /* must be 0, set by kernel */
+ __u32 size_tcpinfo_user;
+ __u32 size_sfinfo_kernel; /* must be 0, set by kernel */
+ __u32 size_sfinfo_user;
+ __u32 num_subflows; /* must be 0, set by kernel (real subflow count) */
+ __u32 size_arrays_user; /* max subflows that userspace is interested in;
+ * the buffers at subflow_info/tcp_info
+ * are respectively at least:
+ * size_arrays * size_sfinfo_user
+ * size_arrays * size_tcpinfo_user
+ * bytes wide
+ */
+ __aligned_u64 subflow_info;
+ __aligned_u64 tcp_info;
+ struct mptcp_info mptcp_info;
+};
+
+#define MPTCP_FULL_INFO 4
+#endif
+
struct so_state {
struct mptcp_info mi;
+ struct mptcp_info last_sample;
+ struct tcp_info tcp_info;
+ struct mptcp_subflow_addrs addrs;
uint64_t mptcpi_rcv_delta;
uint64_t tcpi_rcv_delta;
+ bool pkt_stats_avail;
};
#ifndef MIN
@@ -322,8 +358,9 @@ static void do_getsockopt_mptcp_info(struct so_state *s, int fd, size_t w)
if (ret < 0)
die_perror("getsockopt MPTCP_INFO");
- assert(olen == sizeof(i));
+ s->pkt_stats_avail = olen >= sizeof(i);
+ s->last_sample = i;
if (s->mi.mptcpi_write_seq == 0)
s->mi = i;
@@ -362,6 +399,8 @@ static void do_getsockopt_tcp_info(struct so_state *s, int fd, size_t r, size_t
olen -= sizeof(struct mptcp_subflow_data);
assert(olen == ti.d.size_user);
+ s->tcp_info = ti.ti[0];
+
if (ti.ti[0].tcpi_bytes_sent == w &&
ti.ti[0].tcpi_bytes_received == r)
goto done;
@@ -383,7 +422,7 @@ done:
do_getsockopt_bogus_sf_data(fd, MPTCP_TCPINFO);
}
-static void do_getsockopt_subflow_addrs(int fd)
+static void do_getsockopt_subflow_addrs(struct so_state *s, int fd)
{
struct sockaddr_storage remote, local;
socklen_t olen, rlen, llen;
@@ -431,6 +470,7 @@ static void do_getsockopt_subflow_addrs(int fd)
assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) == 0);
assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) == 0);
+ s->addrs = addrs.addr[0];
memset(&addrs, 0, sizeof(addrs));
@@ -451,13 +491,70 @@ static void do_getsockopt_subflow_addrs(int fd)
do_getsockopt_bogus_sf_data(fd, MPTCP_SUBFLOW_ADDRS);
}
+static void do_getsockopt_mptcp_full_info(struct so_state *s, int fd)
+{
+ size_t data_size = sizeof(struct mptcp_full_info);
+ struct mptcp_subflow_info sfinfo[2];
+ struct tcp_info tcp_info[2];
+ struct mptcp_full_info mfi;
+ socklen_t olen;
+ int ret;
+
+ memset(&mfi, 0, data_size);
+ memset(tcp_info, 0, sizeof(tcp_info));
+ memset(sfinfo, 0, sizeof(sfinfo));
+
+ mfi.size_tcpinfo_user = sizeof(struct tcp_info);
+ mfi.size_sfinfo_user = sizeof(struct mptcp_subflow_info);
+ mfi.size_arrays_user = 2;
+ mfi.subflow_info = (unsigned long)&sfinfo[0];
+ mfi.tcp_info = (unsigned long)&tcp_info[0];
+ olen = data_size;
+
+ ret = getsockopt(fd, SOL_MPTCP, MPTCP_FULL_INFO, &mfi, &olen);
+ if (ret < 0) {
+ if (errno == EOPNOTSUPP) {
+ perror("MPTCP_FULL_INFO test skipped");
+ return;
+ }
+ xerror("getsockopt MPTCP_FULL_INFO");
+ }
+
+ assert(olen <= data_size);
+ assert(mfi.size_tcpinfo_kernel > 0);
+ assert(mfi.size_tcpinfo_user ==
+ MIN(mfi.size_tcpinfo_kernel, sizeof(struct tcp_info)));
+ assert(mfi.size_sfinfo_kernel > 0);
+ assert(mfi.size_sfinfo_user ==
+ MIN(mfi.size_sfinfo_kernel, sizeof(struct mptcp_subflow_info)));
+ assert(mfi.num_subflows == 1);
+
+ /* Tolerate future extension to mptcp_info struct and running newer
+ * test on top of older kernel.
+ * Anyway any kernel supporting MPTCP_FULL_INFO must at least include
+ * the following in mptcp_info.
+ */
+ assert(olen > (socklen_t)__builtin_offsetof(struct mptcp_full_info, tcp_info));
+ assert(mfi.mptcp_info.mptcpi_subflows == 0);
+ assert(mfi.mptcp_info.mptcpi_bytes_sent == s->last_sample.mptcpi_bytes_sent);
+ assert(mfi.mptcp_info.mptcpi_bytes_received == s->last_sample.mptcpi_bytes_received);
+
+ assert(sfinfo[0].id == 1);
+ assert(tcp_info[0].tcpi_bytes_sent == s->tcp_info.tcpi_bytes_sent);
+ assert(tcp_info[0].tcpi_bytes_received == s->tcp_info.tcpi_bytes_received);
+ assert(!memcmp(&sfinfo->addrs, &s->addrs, sizeof(struct mptcp_subflow_addrs)));
+}
+
static void do_getsockopts(struct so_state *s, int fd, size_t r, size_t w)
{
do_getsockopt_mptcp_info(s, fd, w);
do_getsockopt_tcp_info(s, fd, r, w);
- do_getsockopt_subflow_addrs(fd);
+ do_getsockopt_subflow_addrs(s, fd);
+
+ if (r)
+ do_getsockopt_mptcp_full_info(s, fd);
}
static void connect_one_server(int fd, int pipefd)
@@ -562,6 +659,23 @@ static void process_one_client(int fd, int pipefd)
do_getsockopts(&s, fd, ret, ret2);
if (s.mptcpi_rcv_delta != (uint64_t)ret + 1)
xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret);
+
+ /* be nice when running on top of older kernel */
+ if (s.pkt_stats_avail) {
+ if (s.last_sample.mptcpi_bytes_sent != ret2)
+ xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64,
+ s.last_sample.mptcpi_bytes_sent, ret2,
+ s.last_sample.mptcpi_bytes_sent - ret2);
+ if (s.last_sample.mptcpi_bytes_received != ret)
+ xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64,
+ s.last_sample.mptcpi_bytes_received, ret,
+ s.last_sample.mptcpi_bytes_received - ret);
+ if (s.last_sample.mptcpi_bytes_acked != ret)
+ xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64,
+ s.last_sample.mptcpi_bytes_acked, ret2,
+ s.last_sample.mptcpi_bytes_acked - ret2);
+ }
+
close(fd);
}