summaryrefslogtreecommitdiff
path: root/net/vmw_vsock
diff options
context:
space:
mode:
authorAlexander Graf <graf@amazon.com>2026-03-04 23:00:27 +0000
committerPaolo Abeni <pabeni@redhat.com>2026-03-12 10:59:36 +0100
commit0de607dc4fd80ede3b2a35e8a72f99c7a0bbc321 (patch)
tree72878c52d5623288bd4e0ff528e6d6f9c73d146f /net/vmw_vsock
parent17edc4e820bf8b4c7737c1de86c267e6974d543a (diff)
downloadlwn-0de607dc4fd80ede3b2a35e8a72f99c7a0bbc321.tar.gz
lwn-0de607dc4fd80ede3b2a35e8a72f99c7a0bbc321.zip
vsock: add G2H fallback for CIDs not owned by H2G transport
When no H2G transport is loaded, vsock currently routes all CIDs to the G2H transport (commit 65b422d9b61b ("vsock: forward all packets to the host when no H2G is registered"). Extend that existing behavior: when an H2G transport is loaded but does not claim a given CID, the connection falls back to G2H in the same way. This matters in environments like Nitro Enclaves, where an instance may run nested VMs via vhost-vsock (H2G) while also needing to reach sibling enclaves at higher CIDs through virtio-vsock-pci (G2H). With the old code, any CID > 2 was unconditionally routed to H2G when vhost was loaded, making those enclaves unreachable without setting VMADDR_FLAG_TO_HOST explicitly on every connect. Requiring every application to set VMADDR_FLAG_TO_HOST creates friction: tools like socat, iperf, and others would all need to learn about it. The flag was introduced 6 years ago and I am still not aware of any tool that supports it. Even if there was support, it would be cumbersome to use. The most natural experience is a single CID address space where H2G only wins for CIDs it actually owns, and everything else falls through to G2H, extending the behavior that already exists when H2G is absent. To give user space at least a hint that the kernel applied this logic, automatically set the VMADDR_FLAG_TO_HOST on the remote address so it can determine the path taken via getpeername(). Add a per-network namespace sysctl net.vsock.g2h_fallback (default 1). At 0 it forces strict routing: H2G always wins for CID > VMADDR_CID_HOST, or ENODEV if H2G is not loaded. Signed-off-by: Alexander Graf <graf@amazon.com> Tested-by: syzbot@syzkaller.appspotmail.com Reviewed-by: Stefano Garzarella <sgarzare@redhat.com> Link: https://patch.msgid.link/20260304230027.59857-1-graf@amazon.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Diffstat (limited to 'net/vmw_vsock')
-rw-r--r--net/vmw_vsock/af_vsock.c35
-rw-r--r--net/vmw_vsock/virtio_transport.c7
2 files changed, 37 insertions, 5 deletions
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index f0ab2f13e9db..cc4b225250b9 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -545,9 +545,13 @@ static void vsock_deassign_transport(struct vsock_sock *vsk)
* The vsk->remote_addr is used to decide which transport to use:
* - remote CID == VMADDR_CID_LOCAL or g2h->local_cid or VMADDR_CID_HOST if
* g2h is not loaded, will use local transport;
- * - remote CID <= VMADDR_CID_HOST or h2g is not loaded or remote flags field
- * includes VMADDR_FLAG_TO_HOST flag value, will use guest->host transport;
- * - remote CID > VMADDR_CID_HOST will use host->guest transport;
+ * - remote CID <= VMADDR_CID_HOST or remote flags field includes
+ * VMADDR_FLAG_TO_HOST, will use guest->host transport;
+ * - remote CID > VMADDR_CID_HOST and h2g is loaded and h2g claims that CID,
+ * will use host->guest transport;
+ * - h2g not loaded or h2g does not claim that CID and g2h claims the CID via
+ * has_remote_cid, will use guest->host transport (when g2h_fallback=1)
+ * - anything else goes to h2g or returns -ENODEV if no h2g is available
*/
int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
{
@@ -581,11 +585,21 @@ int vsock_assign_transport(struct vsock_sock *vsk, struct vsock_sock *psk)
case SOCK_SEQPACKET:
if (vsock_use_local_transport(remote_cid))
new_transport = transport_local;
- else if (remote_cid <= VMADDR_CID_HOST || !transport_h2g ||
+ else if (remote_cid <= VMADDR_CID_HOST ||
(remote_flags & VMADDR_FLAG_TO_HOST))
new_transport = transport_g2h;
- else
+ else if (transport_h2g &&
+ (!transport_h2g->has_remote_cid ||
+ transport_h2g->has_remote_cid(vsk, remote_cid)))
+ new_transport = transport_h2g;
+ else if (sock_net(sk)->vsock.g2h_fallback &&
+ transport_g2h && transport_g2h->has_remote_cid &&
+ transport_g2h->has_remote_cid(vsk, remote_cid)) {
+ vsk->remote_addr.svm_flags |= VMADDR_FLAG_TO_HOST;
+ new_transport = transport_g2h;
+ } else {
new_transport = transport_h2g;
+ }
break;
default:
ret = -ESOCKTNOSUPPORT;
@@ -2879,6 +2893,15 @@ static struct ctl_table vsock_table[] = {
.mode = 0644,
.proc_handler = vsock_net_child_mode_string
},
+ {
+ .procname = "g2h_fallback",
+ .data = &init_net.vsock.g2h_fallback,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
};
static int __net_init vsock_sysctl_register(struct net *net)
@@ -2894,6 +2917,7 @@ static int __net_init vsock_sysctl_register(struct net *net)
table[0].data = &net->vsock.mode;
table[1].data = &net->vsock.child_ns_mode;
+ table[2].data = &net->vsock.g2h_fallback;
}
net->vsock.sysctl_hdr = register_net_sysctl_sz(net, "net/vsock", table,
@@ -2928,6 +2952,7 @@ static void vsock_net_init(struct net *net)
net->vsock.mode = vsock_net_child_mode(current->nsproxy->net_ns);
net->vsock.child_ns_mode = net->vsock.mode;
+ net->vsock.g2h_fallback = 1;
}
static __net_init int vsock_sysctl_init_net(struct net *net)
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index 77fe5b7b066c..57f2d6ec3ffc 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -547,11 +547,18 @@ bool virtio_transport_stream_allow(struct vsock_sock *vsk, u32 cid, u32 port)
static bool virtio_transport_seqpacket_allow(struct vsock_sock *vsk,
u32 remote_cid);
+static bool virtio_transport_has_remote_cid(struct vsock_sock *vsk, u32 cid)
+{
+ /* The CID could be implemented by the host. Always assume it is. */
+ return true;
+}
+
static struct virtio_transport virtio_transport = {
.transport = {
.module = THIS_MODULE,
.get_local_cid = virtio_transport_get_local_cid,
+ .has_remote_cid = virtio_transport_has_remote_cid,
.init = virtio_transport_do_socket_init,
.destruct = virtio_transport_destruct,