summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Herrmann <dh.herrmann@gmail.com>2019-01-15 14:42:14 +0100
committerDavid S. Miller <davem@davemloft.net>2019-01-17 14:55:51 -0800
commitf5dd3d0c9638a9d9a02b5964c4ad636f06cf7e2c (patch)
tree22b7cd708c478b43037c6d52feb80bafd3f6c6dc
parent692d7b5d1f9125a1cf0595e979e3b5fb7210547e (diff)
downloadlwn-f5dd3d0c9638a9d9a02b5964c4ad636f06cf7e2c.tar.gz
lwn-f5dd3d0c9638a9d9a02b5964c4ad636f06cf7e2c.zip
net: introduce SO_BINDTOIFINDEX sockopt
This introduces a new generic SOL_SOCKET-level socket option called SO_BINDTOIFINDEX. It behaves similar to SO_BINDTODEVICE, but takes a network interface index as argument, rather than the network interface name. User-space often refers to network-interfaces via their index, but has to temporarily resolve it to a name for a call into SO_BINDTODEVICE. This might pose problems when the network-device is renamed asynchronously by other parts of the system. When this happens, the SO_BINDTODEVICE might either fail, or worse, it might bind to the wrong device. In most cases user-space only ever operates on devices which they either manage themselves, or otherwise have a guarantee that the device name will not change (e.g., devices that are UP cannot be renamed). However, particularly in libraries this guarantee is non-obvious and it would be nice if that race-condition would simply not exist. It would make it easier for those libraries to operate even in situations where the device-name might change under the hood. A real use-case that we recently hit is trying to start the network stack early in the initrd but make it survive into the real system. Existing distributions rename network-interfaces during the transition from initrd into the real system. This, obviously, cannot affect devices that are up and running (unless you also consider moving them between network-namespaces). However, the network manager now has to make sure its management engine for dormant devices will not run in parallel to these renames. Particularly, when you offload operations like DHCP into separate processes, these might setup their sockets early, and thus have to resolve the device-name possibly running into this race-condition. By avoiding a call to resolve the device-name, we no longer depend on the name and can run network setup of dormant devices in parallel to the transition off the initrd. The SO_BINDTOIFINDEX ioctl plugs this race. Reviewed-by: Tom Gundersen <teg@jklm.no> Signed-off-by: David Herrmann <dh.herrmann@gmail.com> Acked-by: Willem de Bruijn <willemb@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--arch/alpha/include/uapi/asm/socket.h2
-rw-r--r--arch/ia64/include/uapi/asm/socket.h2
-rw-r--r--arch/mips/include/uapi/asm/socket.h2
-rw-r--r--arch/parisc/include/uapi/asm/socket.h2
-rw-r--r--arch/s390/include/uapi/asm/socket.h2
-rw-r--r--arch/sparc/include/uapi/asm/socket.h2
-rw-r--r--arch/xtensa/include/uapi/asm/socket.h2
-rw-r--r--include/uapi/asm-generic/socket.h2
-rw-r--r--net/core/sock.c46
9 files changed, 52 insertions, 10 deletions
diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index 065fb372e355..b1c9b542c021 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -115,4 +115,6 @@
#define SO_TXTIME 61
#define SCM_TXTIME SO_TXTIME
+#define SO_BINDTOIFINDEX 62
+
#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index c872c4e6bafb..ba0d245f9576 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -117,4 +117,6 @@
#define SO_TXTIME 61
#define SCM_TXTIME SO_TXTIME
+#define SO_BINDTOIFINDEX 62
+
#endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 71370fb3ceef..73e25e35d803 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -126,4 +126,6 @@
#define SO_TXTIME 61
#define SCM_TXTIME SO_TXTIME
+#define SO_BINDTOIFINDEX 62
+
#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 061b9cf2a779..52bed5976cbe 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -107,4 +107,6 @@
#define SO_TXTIME 0x4036
#define SCM_TXTIME SO_TXTIME
+#define SO_BINDTOIFINDEX 0x4037
+
#endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 39d901476ee5..49c971587087 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -114,4 +114,6 @@
#define SO_TXTIME 61
#define SCM_TXTIME SO_TXTIME
+#define SO_BINDTOIFINDEX 62
+
#endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 7ea35e5601b6..bbdb81594dd4 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -104,6 +104,8 @@
#define SO_TXTIME 0x003f
#define SCM_TXTIME SO_TXTIME
+#define SO_BINDTOIFINDEX 0x0041
+
/* Security levels - as per NRL IPv6 - don't actually do anything */
#define SO_SECURITY_AUTHENTICATION 0x5001
#define SO_SECURITY_ENCRYPTION_TRANSPORT 0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index 1de07a7f7680..b434217783d0 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -119,4 +119,6 @@
#define SO_TXTIME 61
#define SCM_TXTIME SO_TXTIME
+#define SO_BINDTOIFINDEX 62
+
#endif /* _XTENSA_SOCKET_H */
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index a12692e5f7a8..3066ab3853a8 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -110,4 +110,6 @@
#define SO_TXTIME 61
#define SCM_TXTIME SO_TXTIME
+#define SO_BINDTOIFINDEX 62
+
#endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 6aa2e7e0b4fb..b53764ebb973 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -520,14 +520,11 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
}
EXPORT_SYMBOL(sk_dst_check);
-static int sock_setbindtodevice(struct sock *sk, char __user *optval,
- int optlen)
+static int sock_setbindtodevice_locked(struct sock *sk, int ifindex)
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
struct net *net = sock_net(sk);
- char devname[IFNAMSIZ];
- int index;
/* Sorry... */
ret = -EPERM;
@@ -535,6 +532,32 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
goto out;
ret = -EINVAL;
+ if (ifindex < 0)
+ goto out;
+
+ sk->sk_bound_dev_if = ifindex;
+ if (sk->sk_prot->rehash)
+ sk->sk_prot->rehash(sk);
+ sk_dst_reset(sk);
+
+ ret = 0;
+
+out:
+#endif
+
+ return ret;
+}
+
+static int sock_setbindtodevice(struct sock *sk, char __user *optval,
+ int optlen)
+{
+ int ret = -ENOPROTOOPT;
+#ifdef CONFIG_NETDEVICES
+ struct net *net = sock_net(sk);
+ char devname[IFNAMSIZ];
+ int index;
+
+ ret = -EINVAL;
if (optlen < 0)
goto out;
@@ -566,14 +589,9 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
}
lock_sock(sk);
- sk->sk_bound_dev_if = index;
- if (sk->sk_prot->rehash)
- sk->sk_prot->rehash(sk);
- sk_dst_reset(sk);
+ ret = sock_setbindtodevice_locked(sk, index);
release_sock(sk);
- ret = 0;
-
out:
#endif
@@ -1055,6 +1073,10 @@ set_rcvbuf:
}
break;
+ case SO_BINDTOIFINDEX:
+ ret = sock_setbindtodevice_locked(sk, val);
+ break;
+
default:
ret = -ENOPROTOOPT;
break;
@@ -1399,6 +1421,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
SOF_TXTIME_REPORT_ERRORS : 0;
break;
+ case SO_BINDTOIFINDEX:
+ v.val = sk->sk_bound_dev_if;
+ break;
+
default:
/* We implement the SO_SNDLOWAT etc to not be settable
* (1003.1g 7).