From 51f3d02b980a338cd291d2bc7629cdfb2568424b Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 5 Nov 2014 16:46:40 -0500 Subject: net: Add and use skb_copy_datagram_msg() helper. This encapsulates all of the skb_copy_datagram_iovec() callers with call argument signature "skb, offset, msghdr->msg_iov, length". When we move to iov_iters in the networking, the iov_iter object will sit in the msghdr. Having a helper like this means there will be less places to touch during that transformation. Based upon descriptions and patch from Al Viro. Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/ip_sockglue.c') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index c373a9ad4555..21894df66262 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -424,7 +424,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) msg->msg_flags |= MSG_TRUNC; copied = len; } - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied); + err = skb_copy_datagram_msg(skb, 0, msg, copied); if (err) goto out_free_skb; -- cgit v1.2.3 From 7ce875e5ecb8562fd44040f69bda96c999e38bbc Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sun, 30 Nov 2014 22:22:33 -0500 Subject: ipv4: warn once on passing AF_INET6 socket to ip_recv_error One line change, in response to catching an occurrence of this bug. See also fix f4713a3dfad0 ("net-timestamp: make tcp_recvmsg call ...") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- net/ipv4/ip_sockglue.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/ipv4/ip_sockglue.c') diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index b7826575d215..59eba6c7a512 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -414,6 +414,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) int err; int copied; + WARN_ON_ONCE(sk->sk_family == AF_INET6); + err = -EAGAIN; skb = sock_dequeue_err_skb(sk); if (skb == NULL) -- cgit v1.2.3 From 829ae9d611651467fe6cd7be834bd33ca6b28dfe Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Sun, 30 Nov 2014 22:22:34 -0500 Subject: net-timestamp: allow reading recv cmsg on errqueue with origin tstamp Allow reading of timestamps and cmsg at the same time on all relevant socket families. One use is to correlate timestamps with egress device, by asking for cmsg IP_PKTINFO. on AF_INET sockets, call the relevant function (ip_cmsg_recv). To avoid changing legacy expectations, only do so if the caller sets a new timestamping flag SOF_TIMESTAMPING_OPT_CMSG. on AF_INET6 sockets, IPV6_PKTINFO and all other recv cmsg are already returned for all origins. only change is to set ifindex, which is not initialized for all error origins. In both cases, only generate the pktinfo message if an ifindex is known. This is not the case for ACK timestamps. The difference between the protocol families is probably a historical accident as a result of the different conditions for generating cmsg in the relevant ip(v6)_recv_error function: ipv4: if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) { ipv6: if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) { At one time, this was the same test bar for the ICMP/ICMP6 distinction. This is no longer true. Signed-off-by: Willem de Bruijn ---- Changes v1 -> v2 large rewrite - integrate with existing pktinfo cmsg generation code - on ipv4: only send with new flag, to maintain legacy behavior - on ipv6: send at most a single pktinfo cmsg - on ipv6: initialize fields if not yet initialized The recv cmsg interfaces are also relevant to the discussion of whether looping packet headers is problematic. For v6, cmsgs that identify many headers are already returned. This patch expands that to v4. If it sounds reasonable, I will follow with patches 1. request timestamps without payload with SOF_TIMESTAMPING_OPT_TSONLY (http://patchwork.ozlabs.org/patch/366967/) 2. sysctl to conditionally drop all timestamps that have payload or cmsg from users without CAP_NET_RAW. Signed-off-by: David S. Miller --- Documentation/networking/timestamping.txt | 12 +++++++++++- include/uapi/linux/net_tstamp.h | 3 ++- net/ipv4/ip_sockglue.c | 22 ++++++++++++++++++++-- net/ipv6/datagram.c | 21 +++++++++++++++++++-- 4 files changed, 52 insertions(+), 6 deletions(-) (limited to 'net/ipv4/ip_sockglue.c') diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index 1d6d02d6ba52..b08e27261ff9 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -122,7 +122,7 @@ SOF_TIMESTAMPING_RAW_HARDWARE: 1.3.3 Timestamp Options -The interface supports one option +The interface supports the options SOF_TIMESTAMPING_OPT_ID: @@ -145,6 +145,16 @@ SOF_TIMESTAMPING_OPT_ID: stream sockets, it increments with every byte. +SOF_TIMESTAMPING_OPT_CMSG: + + Support recv() cmsg for all timestamped packets. Control messages + are already supported unconditionally on all packets with receive + timestamps and on IPv6 packets with transmit timestamp. This option + extends them to IPv4 packets with transmit timestamp. One use case + is to correlate packets with their egress device, by enabling socket + option IP_PKTINFO simultaneously. + + 1.4 Bytestream Timestamps The SO_TIMESTAMPING interface supports timestamping of bytes in a diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h index ff354021bb69..edbc888ceb51 100644 --- a/include/uapi/linux/net_tstamp.h +++ b/include/uapi/linux/net_tstamp.h @@ -23,8 +23,9 @@ enum { SOF_TIMESTAMPING_OPT_ID = (1<<7), SOF_TIMESTAMPING_TX_SCHED = (1<<8), SOF_TIMESTAMPING_TX_ACK = (1<<9), + SOF_TIMESTAMPING_OPT_CMSG = (1<<10), - SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_ACK, + SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_CMSG, SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) | SOF_TIMESTAMPING_LAST }; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 59eba6c7a512..640f26c6a9fe 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -399,6 +399,22 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf kfree_skb(skb); } +static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk, + const struct sk_buff *skb, + int ee_origin) +{ + struct in_pktinfo *info = PKTINFO_SKB_CB(skb); + + if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) || + (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) || + (!skb->dev)) + return false; + + info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr; + info->ipi_ifindex = skb->dev->ifindex; + return true; +} + /* * Handle MSG_ERRQUEUE */ @@ -446,7 +462,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); sin = &errhdr.offender; sin->sin_family = AF_UNSPEC; - if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) { + + if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) { struct inet_sock *inet = inet_sk(sk); sin->sin_family = AF_INET; @@ -1051,7 +1069,7 @@ e_inval: } /** - * ipv4_pktinfo_prepare - transfert some info from rtable to skb + * ipv4_pktinfo_prepare - transfer some info from rtable to skb * @sk: socket * @skb: buffer * diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index cc1139687fd7..2464a00e36ab 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) kfree_skb(skb); } +static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb) +{ + int ifindex = skb->dev ? skb->dev->ifindex : -1; + + if (skb->protocol == htons(ETH_P_IPV6)) + IP6CB(skb)->iif = ifindex; + else + PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex; +} + /* * Handle MSG_ERRQUEUE */ @@ -388,8 +398,12 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_port = 0; - if (np->rxopt.all) + if (np->rxopt.all) { + if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP && + serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6) + ip6_datagram_prepare_pktinfo_errqueue(skb); ip6_datagram_recv_common_ctl(sk, msg, skb); + } if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) @@ -491,7 +505,10 @@ void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg, ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, &src_info.ipi6_addr); } - put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); + + if (src_info.ipi6_ifindex >= 0) + put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, + sizeof(src_info), &src_info); } } -- cgit v1.2.3 From f95b414edb18de59940dcebbefb49cf25c6d505c Mon Sep 17 00:00:00 2001 From: Gu Zheng Date: Thu, 11 Dec 2014 11:22:04 +0800 Subject: net: introduce helper macro for_each_cmsghdr Introduce helper macro for_each_cmsghdr as a wrapper of the enumerating cmsghdr from msghdr, just cleanup. Signed-off-by: Gu Zheng Signed-off-by: David S. Miller --- crypto/af_alg.c | 2 +- include/linux/socket.h | 4 ++++ net/core/scm.c | 3 +-- net/dccp/proto.c | 5 ++--- net/ipv4/ip_sockglue.c | 2 +- net/ipv6/datagram.c | 2 +- net/iucv/af_iucv.c | 4 +--- net/rds/send.c | 4 ++-- net/rxrpc/ar-output.c | 2 +- net/sctp/socket.c | 3 +-- 10 files changed, 15 insertions(+), 16 deletions(-) (limited to 'net/ipv4/ip_sockglue.c') diff --git a/crypto/af_alg.c b/crypto/af_alg.c index 6a3ad8011585..bc21f520d489 100644 --- a/crypto/af_alg.c +++ b/crypto/af_alg.c @@ -399,7 +399,7 @@ int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con) { struct cmsghdr *cmsg; - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { + for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; if (cmsg->cmsg_level != SOL_ALG) diff --git a/include/linux/socket.h b/include/linux/socket.h index 048d6d6eed6d..6e49a14365dc 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -103,6 +103,10 @@ struct cmsghdr { (cmsg)->cmsg_len <= (unsigned long) \ ((mhdr)->msg_controllen - \ ((char *)(cmsg) - (char *)(mhdr)->msg_control))) +#define for_each_cmsghdr(cmsg, msg) \ + for (cmsg = CMSG_FIRSTHDR(msg); \ + cmsg; \ + cmsg = CMSG_NXTHDR(msg, cmsg)) /* * Get the next cmsg header diff --git a/net/core/scm.c b/net/core/scm.c index b442e7e25e60..3b6899b7d810 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -129,8 +129,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p) struct cmsghdr *cmsg; int err; - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) - { + for_each_cmsghdr(cmsg, msg) { err = -EINVAL; /* Verify that cmsg_len is at least sizeof(struct cmsghdr) */ diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 19f038739087..e171b780b499 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -703,7 +703,7 @@ EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) { - struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg); + struct cmsghdr *cmsg; /* * Assign an (opaque) qpolicy priority value to skb->priority. @@ -717,8 +717,7 @@ static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) */ skb->priority = 0; - for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) { - + for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 640f26c6a9fe..8a89c738b7a3 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -192,7 +192,7 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc, int err, val; struct cmsghdr *cmsg; - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { + for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; #if IS_ENABLED(CONFIG_IPV6) diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 2464a00e36ab..100c589a2a6c 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -657,7 +657,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, int len; int err = 0; - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { + for_each_cmsghdr(cmsg, msg) { int addr_type; if (!CMSG_OK(msg, cmsg)) { diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 1cd3f8107239..2e9953b2db84 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -1070,9 +1070,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock, txmsg.class = 0; /* iterate over control messages */ - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; - cmsg = CMSG_NXTHDR(msg, cmsg)) { - + for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) { err = -EINVAL; goto out; diff --git a/net/rds/send.c b/net/rds/send.c index 40a5629a0a13..42f65d4305c8 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -826,7 +826,7 @@ static int rds_rm_size(struct msghdr *msg, int data_len) int cmsg_groups = 0; int retval; - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { + for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; @@ -878,7 +878,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, struct cmsghdr *cmsg; int ret = 0; - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { + for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c index 86e0f10aa2c0..e1a9373e5979 100644 --- a/net/rxrpc/ar-output.c +++ b/net/rxrpc/ar-output.c @@ -45,7 +45,7 @@ static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg, if (msg->msg_controllen == 0) return -EINVAL; - for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) { + for_each_cmsghdr(cmsg, msg) { if (!CMSG_OK(msg, cmsg)) return -EINVAL; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index c92f96cda699..2625eccb77d5 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6592,8 +6592,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) struct cmsghdr *cmsg; struct msghdr *my_msg = (struct msghdr *)msg; - for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; - cmsg = CMSG_NXTHDR(my_msg, cmsg)) { + for_each_cmsghdr(cmsg, my_msg) { if (!CMSG_OK(my_msg, cmsg)) return -EINVAL; -- cgit v1.2.3