summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2014-12-08 20:20:55 -0500
committerDavid S. Miller <davem@davemloft.net>2014-12-08 20:20:55 -0500
commitaae68bc6f69a574beceeca6d323636782c437fa6 (patch)
treeb0adfab040099b369f38ebe9a19df9632f85ded8
parent8d0c4697534a739725e429ff062dea393d8860d1 (diff)
parentcbd3aad5ce66f5a266a185aa37e0eb9be9ba4154 (diff)
downloadlwn-aae68bc6f69a574beceeca6d323636782c437fa6.tar.gz
lwn-aae68bc6f69a574beceeca6d323636782c437fa6.zip
Merge branch 'tstamp-next'
Willem de Bruijn says: ==================== timestamping updates The main goal for this patchset is to allow correlating timestamps with the egress interface. Also introduce a warning, as discussed previously, and update the tests to verify the new feature. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--Documentation/networking/timestamping.txt33
-rw-r--r--Documentation/networking/timestamping/txtimestamp.c90
-rw-r--r--include/uapi/linux/net_tstamp.h3
-rw-r--r--net/ipv4/ip_sockglue.c24
-rw-r--r--net/ipv6/datagram.c21
5 files changed, 146 insertions, 25 deletions
diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index 1d6d02d6ba52..a5c784c89312 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -122,7 +122,7 @@ SOF_TIMESTAMPING_RAW_HARDWARE:
1.3.3 Timestamp Options
-The interface supports one option
+The interface supports the options
SOF_TIMESTAMPING_OPT_ID:
@@ -130,19 +130,36 @@ SOF_TIMESTAMPING_OPT_ID:
have multiple concurrent timestamping requests outstanding. Packets
can be reordered in the transmit path, for instance in the packet
scheduler. In that case timestamps will be queued onto the error
- queue out of order from the original send() calls. This option
- embeds a counter that is incremented at send() time, to order
- timestamps within a flow.
+ queue out of order from the original send() calls. It is not always
+ possible to uniquely match timestamps to the original send() calls
+ based on timestamp order or payload inspection alone, then.
+
+ This option associates each packet at send() with a unique
+ identifier and returns that along with the timestamp. The identifier
+ is derived from a per-socket u32 counter (that wraps). For datagram
+ sockets, the counter increments with each sent packet. For stream
+ sockets, it increments with every byte.
+
+ The counter starts at zero. It is initialized the first time that
+ the socket option is enabled. It is reset each time the option is
+ enabled after having been disabled. Resetting the counter does not
+ change the identifiers of existing packets in the system.
This option is implemented only for transmit timestamps. There, the
timestamp is always looped along with a struct sock_extended_err.
The option modifies field ee_data to pass an id that is unique
among all possibly concurrently outstanding timestamp requests for
- that socket. In practice, it is a monotonically increasing u32
- (that wraps).
+ that socket.
+
+
+SOF_TIMESTAMPING_OPT_CMSG:
- In datagram sockets, the counter increments on each send call. In
- stream sockets, it increments with every byte.
+ Support recv() cmsg for all timestamped packets. Control messages
+ are already supported unconditionally on all packets with receive
+ timestamps and on IPv6 packets with transmit timestamp. This option
+ extends them to IPv4 packets with transmit timestamp. One use case
+ is to correlate packets with their egress device, by enabling socket
+ option IP_PKTINFO simultaneously.
1.4 Bytestream Timestamps
diff --git a/Documentation/networking/timestamping/txtimestamp.c b/Documentation/networking/timestamping/txtimestamp.c
index b32fc2a07734..876f71c5625a 100644
--- a/Documentation/networking/timestamping/txtimestamp.c
+++ b/Documentation/networking/timestamping/txtimestamp.c
@@ -46,6 +46,7 @@
#include <netpacket/packet.h>
#include <poll.h>
#include <stdarg.h>
+#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@@ -58,6 +59,14 @@
#include <time.h>
#include <unistd.h>
+/* ugly hack to work around netinet/in.h and linux/ipv6.h conflicts */
+#ifndef in6_pktinfo
+struct in6_pktinfo {
+ struct in6_addr ipi6_addr;
+ int ipi6_ifindex;
+};
+#endif
+
/* command line parameters */
static int cfg_proto = SOCK_STREAM;
static int cfg_ipproto = IPPROTO_TCP;
@@ -65,6 +74,8 @@ static int cfg_num_pkts = 4;
static int do_ipv4 = 1;
static int do_ipv6 = 1;
static int cfg_payload_len = 10;
+static bool cfg_show_payload;
+static bool cfg_do_pktinfo;
static uint16_t dest_port = 9000;
static struct sockaddr_in daddr;
@@ -131,6 +142,30 @@ static void print_timestamp(struct scm_timestamping *tss, int tstype,
__print_timestamp(tsname, &tss->ts[0], tskey, payload_len);
}
+/* TODO: convert to check_and_print payload once API is stable */
+static void print_payload(char *data, int len)
+{
+ int i;
+
+ if (len > 70)
+ len = 70;
+
+ fprintf(stderr, "payload: ");
+ for (i = 0; i < len; i++)
+ fprintf(stderr, "%02hhx ", data[i]);
+ fprintf(stderr, "\n");
+}
+
+static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr)
+{
+ char sa[INET6_ADDRSTRLEN], da[INET6_ADDRSTRLEN];
+
+ fprintf(stderr, " pktinfo: ifindex=%u src=%s dst=%s\n",
+ ifindex,
+ saddr ? inet_ntop(family, saddr, sa, sizeof(sa)) : "unknown",
+ daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown");
+}
+
static void __poll(int fd)
{
struct pollfd pollfd;
@@ -156,10 +191,9 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
cm->cmsg_type == SCM_TIMESTAMPING) {
tss = (void *) CMSG_DATA(cm);
} else if ((cm->cmsg_level == SOL_IP &&
- cm->cmsg_type == IP_RECVERR) ||
- (cm->cmsg_level == SOL_IPV6 &&
- cm->cmsg_type == IPV6_RECVERR)) {
-
+ cm->cmsg_type == IP_RECVERR) ||
+ (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type == IPV6_RECVERR)) {
serr = (void *) CMSG_DATA(cm);
if (serr->ee_errno != ENOMSG ||
serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) {
@@ -168,6 +202,16 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len)
serr->ee_origin);
serr = NULL;
}
+ } else if (cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type == IP_PKTINFO) {
+ struct in_pktinfo *info = (void *) CMSG_DATA(cm);
+ print_pktinfo(AF_INET, info->ipi_ifindex,
+ &info->ipi_spec_dst, &info->ipi_addr);
+ } else if (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type == IPV6_PKTINFO) {
+ struct in6_pktinfo *info6 = (void *) CMSG_DATA(cm);
+ print_pktinfo(AF_INET6, info6->ipi6_ifindex,
+ NULL, &info6->ipi6_addr);
} else
fprintf(stderr, "unknown cmsg %d,%d\n",
cm->cmsg_level, cm->cmsg_type);
@@ -206,7 +250,11 @@ static int recv_errmsg(int fd)
if (ret == -1 && errno != EAGAIN)
error(1, errno, "recvmsg");
- __recv_errmsg_cmsg(&msg, ret);
+ if (ret > 0) {
+ __recv_errmsg_cmsg(&msg, ret);
+ if (cfg_show_payload)
+ print_payload(data, cfg_payload_len);
+ }
free(data);
return ret == -1;
@@ -215,9 +263,9 @@ static int recv_errmsg(int fd)
static void do_test(int family, unsigned int opt)
{
char *buf;
- int fd, i, val, total_len;
+ int fd, i, val = 1, total_len;
- if (family == IPPROTO_IPV6 && cfg_proto != SOCK_STREAM) {
+ if (family == AF_INET6 && cfg_proto != SOCK_STREAM) {
/* due to lack of checksum generation code */
fprintf(stderr, "test: skipping datagram over IPv6\n");
return;
@@ -239,7 +287,6 @@ static void do_test(int family, unsigned int opt)
error(1, errno, "socket");
if (cfg_proto == SOCK_STREAM) {
- val = 1;
if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY,
(char*) &val, sizeof(val)))
error(1, 0, "setsockopt no nagle");
@@ -253,7 +300,20 @@ static void do_test(int family, unsigned int opt)
}
}
+ if (cfg_do_pktinfo) {
+ if (family == AF_INET6) {
+ if (setsockopt(fd, SOL_IPV6, IPV6_RECVPKTINFO,
+ &val, sizeof(val)))
+ error(1, errno, "setsockopt pktinfo ipv6");
+ } else {
+ if (setsockopt(fd, SOL_IP, IP_PKTINFO,
+ &val, sizeof(val)))
+ error(1, errno, "setsockopt pktinfo ipv4");
+ }
+ }
+
opt |= SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_CMSG |
SOF_TIMESTAMPING_OPT_ID;
if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
(char *) &opt, sizeof(opt)))
@@ -262,8 +322,6 @@ static void do_test(int family, unsigned int opt)
for (i = 0; i < cfg_num_pkts; i++) {
memset(&ts_prev, 0, sizeof(ts_prev));
memset(buf, 'a' + i, total_len);
- buf[total_len - 2] = '\n';
- buf[total_len - 1] = '\0';
if (cfg_proto == SOCK_RAW) {
struct udphdr *udph;
@@ -324,11 +382,13 @@ static void __attribute__((noreturn)) usage(const char *filepath)
" -4: only IPv4\n"
" -6: only IPv6\n"
" -h: show this message\n"
+ " -I: request PKTINFO\n"
" -l N: send N bytes at a time\n"
" -r: use raw\n"
" -R: use raw (IP_HDRINCL)\n"
" -p N: connect to port N\n"
- " -u: use udp\n",
+ " -u: use udp\n"
+ " -x: show payload (up to 70 bytes)\n",
filepath);
exit(1);
}
@@ -338,7 +398,7 @@ static void parse_opt(int argc, char **argv)
int proto_count = 0;
char c;
- while ((c = getopt(argc, argv, "46hl:p:rRu")) != -1) {
+ while ((c = getopt(argc, argv, "46hIl:p:rRux")) != -1) {
switch (c) {
case '4':
do_ipv6 = 0;
@@ -346,6 +406,9 @@ static void parse_opt(int argc, char **argv)
case '6':
do_ipv4 = 0;
break;
+ case 'I':
+ cfg_do_pktinfo = true;
+ break;
case 'r':
proto_count++;
cfg_proto = SOCK_RAW;
@@ -367,6 +430,9 @@ static void parse_opt(int argc, char **argv)
case 'p':
dest_port = strtoul(optarg, NULL, 10);
break;
+ case 'x':
+ cfg_show_payload = true;
+ break;
case 'h':
default:
usage(argv[0]);
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index ff354021bb69..edbc888ceb51 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -23,8 +23,9 @@ enum {
SOF_TIMESTAMPING_OPT_ID = (1<<7),
SOF_TIMESTAMPING_TX_SCHED = (1<<8),
SOF_TIMESTAMPING_TX_ACK = (1<<9),
+ SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
- SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_TX_ACK,
+ SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_CMSG,
SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
SOF_TIMESTAMPING_LAST
};
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b7826575d215..640f26c6a9fe 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -399,6 +399,22 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 inf
kfree_skb(skb);
}
+static bool ipv4_pktinfo_prepare_errqueue(const struct sock *sk,
+ const struct sk_buff *skb,
+ int ee_origin)
+{
+ struct in_pktinfo *info = PKTINFO_SKB_CB(skb);
+
+ if ((ee_origin != SO_EE_ORIGIN_TIMESTAMPING) ||
+ (!(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_CMSG)) ||
+ (!skb->dev))
+ return false;
+
+ info->ipi_spec_dst.s_addr = ip_hdr(skb)->saddr;
+ info->ipi_ifindex = skb->dev->ifindex;
+ return true;
+}
+
/*
* Handle MSG_ERRQUEUE
*/
@@ -414,6 +430,8 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
int err;
int copied;
+ WARN_ON_ONCE(sk->sk_family == AF_INET6);
+
err = -EAGAIN;
skb = sock_dequeue_err_skb(sk);
if (skb == NULL)
@@ -444,7 +462,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
sin = &errhdr.offender;
sin->sin_family = AF_UNSPEC;
- if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP) {
+
+ if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
+ ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) {
struct inet_sock *inet = inet_sk(sk);
sin->sin_family = AF_INET;
@@ -1049,7 +1069,7 @@ e_inval:
}
/**
- * ipv4_pktinfo_prepare - transfert some info from rtable to skb
+ * ipv4_pktinfo_prepare - transfer some info from rtable to skb
* @sk: socket
* @skb: buffer
*
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index cc1139687fd7..2464a00e36ab 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -325,6 +325,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)
kfree_skb(skb);
}
+static void ip6_datagram_prepare_pktinfo_errqueue(struct sk_buff *skb)
+{
+ int ifindex = skb->dev ? skb->dev->ifindex : -1;
+
+ if (skb->protocol == htons(ETH_P_IPV6))
+ IP6CB(skb)->iif = ifindex;
+ else
+ PKTINFO_SKB_CB(skb)->ipi_ifindex = ifindex;
+}
+
/*
* Handle MSG_ERRQUEUE
*/
@@ -388,8 +398,12 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
sin->sin6_family = AF_INET6;
sin->sin6_flowinfo = 0;
sin->sin6_port = 0;
- if (np->rxopt.all)
+ if (np->rxopt.all) {
+ if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP &&
+ serr->ee.ee_origin != SO_EE_ORIGIN_ICMP6)
+ ip6_datagram_prepare_pktinfo_errqueue(skb);
ip6_datagram_recv_common_ctl(sk, msg, skb);
+ }
if (skb->protocol == htons(ETH_P_IPV6)) {
sin->sin6_addr = ipv6_hdr(skb)->saddr;
if (np->rxopt.all)
@@ -491,7 +505,10 @@ void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg,
ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr,
&src_info.ipi6_addr);
}
- put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);
+
+ if (src_info.ipi6_ifindex >= 0)
+ put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO,
+ sizeof(src_info), &src_info);
}
}