diff options
| author | Alexander Graf <graf@amazon.com> | 2026-03-04 23:00:27 +0000 |
|---|---|---|
| committer | Paolo Abeni <pabeni@redhat.com> | 2026-03-12 10:59:36 +0100 |
| commit | 0de607dc4fd80ede3b2a35e8a72f99c7a0bbc321 (patch) | |
| tree | 72878c52d5623288bd4e0ff528e6d6f9c73d146f /net/vmw_vsock | |
| parent | 17edc4e820bf8b4c7737c1de86c267e6974d543a (diff) | |
| download | lwn-0de607dc4fd80ede3b2a35e8a72f99c7a0bbc321.tar.gz lwn-0de607dc4fd80ede3b2a35e8a72f99c7a0bbc321.zip | |
vsock: add G2H fallback for CIDs not owned by H2G transport
When no H2G transport is loaded, vsock currently routes all CIDs to the
G2H transport (commit 65b422d9b61b ("vsock: forward all packets to the
host when no H2G is registered"). Extend that existing behavior: when
an H2G transport is loaded but does not claim a given CID, the
connection falls back to G2H in the same way.
This matters in environments like Nitro Enclaves, where an instance may
run nested VMs via vhost-vsock (H2G) while also needing to reach sibling
enclaves at higher CIDs through virtio-vsock-pci (G2H). With the old
code, any CID > 2 was unconditionally routed to H2G when vhost was
loaded, making those enclaves unreachable without setting
VMADDR_FLAG_TO_HOST explicitly on every connect.
Requiring every application to set VMADDR_FLAG_TO_HOST creates friction:
tools like socat, iperf, and others would all need to learn about it.
The flag was introduced 6 years ago and I am still not aware of any tool
that supports it. Even if there was support, it would be cumbersome to
use. The most natural experience is a single CID address space where H2G
only wins for CIDs it actually owns, and everything else falls through to
G2H, extending the behavior that already exists when H2G is absent.
To give user space at least a hint that the kernel applied this logic,
automatically set the VMADDR_FLAG_TO_HOST on the remote address so it
can determine the path taken via getpeername().
Add a per-network namespace sysctl net.vsock.g2h_fallback (default 1).
At 0 it forces strict routing: H2G always wins for CID > VMADDR_CID_HOST,
or ENODEV if H2G is not loaded.
Signed-off-by: Alexander Graf <graf@amazon.com>
Tested-by: syzbot@syzkaller.appspotmail.com
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Link: https://patch.msgid.link/20260304230027.59857-1-graf@amazon.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Diffstat (limited to 'net/vmw_vsock')
| -rw-r--r-- | net/vmw_vsock/af_vsock.c | 35 | ||||
| -rw-r--r-- | net/vmw_vsock/virtio_transport.c | 7 |
2 files changed, 37 insertions, 5 deletions
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index f0ab2f13e9db..cc4b225250b9 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -545,9 +545,13 @@ static void vsock_deassign_transport(struct vsock_sock *vsk) * The vsk->remote_addr is used to decide which transport to use: * - remote CID == VMADDR_CID_LOCAL or g2h->local_cid or VMADDR_CID_HOST if * g2h is not loaded, will use local transport; - * - remote CID <= VMADDR_CID_HOST or h2g is not loaded or remote flags field - * includes VMADDR_FLAG_TO_HOST flag value, will use guest->host transport; - * - remote CID > VMADDR_CID_HOST will use host->guest transport; + * - remote CID <= VMADDR_CID_HOST or remote flags field includes + * VMADDR_FLAG_TO_HOST, will use guest->host transport; + * - remote CID > VMADDR_CID_HOST and h2g is loaded and h2g claims that CID, + * will use host->guest transport; + * - h2g not loaded or h2g does not claim that CID and g2h claims the CID via + * has_remote_cid, will use guest->host transport (when g2h_fallback=1) + * - anything else goes to h2g or returns -ENODEV if no h2g is available */ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) { @@ -581,11 +585,21 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk) case SOCK_SEQPACKET: if (vsock_use_local_transport(remote_cid)) new_transport = transport_local; - else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g || + else if (remote_cid <= VMADDR_CID_HOST || (remote_flags & VMADDR_FLAG_TO_HOST)) new_transport = transport_g2h; - else + else if (transport_h2g && + (!transport_h2g->has_remote_cid || + transport_h2g->has_remote_cid(vsk, remote_cid))) + new_transport = transport_h2g; + else if (sock_net(sk)->vsock.g2h_fallback && + transport_g2h && transport_g2h->has_remote_cid && + transport_g2h->has_remote_cid(vsk, remote_cid)) { + vsk->remote_addr.svm_flags |= VMADDR_FLAG_TO_HOST; + new_transport = transport_g2h; + } else { new_transport = transport_h2g; + } break; default: ret = -ESOCKTNOSUPPORT; @@ -2879,6 +2893,15 @@ static struct ctl_table vsock_table[] = { .mode = 0644, .proc_handler = vsock_net_child_mode_string }, + { + .procname = "g2h_fallback", + .data = &init_net.vsock.g2h_fallback, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }, }; static int __net_init vsock_sysctl_register(struct net *net) @@ -2894,6 +2917,7 @@ static int __net_init vsock_sysctl_register(struct net *net) table[0].data = &net->vsock.mode; table[1].data = &net->vsock.child_ns_mode; + table[2].data = &net->vsock.g2h_fallback; } net->vsock.sysctl_hdr = register_net_sysctl_sz(net, "net/vsock", table, @@ -2928,6 +2952,7 @@ static void vsock_net_init(struct net *net) net->vsock.mode = vsock_net_child_mode(current->nsproxy->net_ns); net->vsock.child_ns_mode = net->vsock.mode; + net->vsock.g2h_fallback = 1; } static __net_init int vsock_sysctl_init_net(struct net *net) diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 77fe5b7b066c..57f2d6ec3ffc 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -547,11 +547,18 @@ bool virtio_transport_stream_allow(struct vsock_sock *vsk, u32 cid, u32 port) static bool virtio_transport_seqpacket_allow(struct vsock_sock *vsk, u32 remote_cid); +static bool virtio_transport_has_remote_cid(struct vsock_sock *vsk, u32 cid) +{ + /* The CID could be implemented by the host. Always assume it is. */ + return true; +} + static struct virtio_transport virtio_transport = { .transport = { .module = THIS_MODULE, .get_local_cid = virtio_transport_get_local_cid, + .has_remote_cid = virtio_transport_has_remote_cid, .init = virtio_transport_do_socket_init, .destruct = virtio_transport_destruct, |
