Merge branch 'skb_sk-sk_fullsock-tcp_sock'

Martin KaFai Lau says: ==================== This series adds __sk_buff->sk, "struct bpf_tcp_sock", BPF_FUNC_sk_fullsock and BPF_FUNC_tcp_sock. Together, they provide a common way to expose the members of "struct tcp_sock" and "struct bpf_sock" for the bpf_prog to access. The patch series first adds a bpf_sock pointer to __sk_buff and a new helper BPF_FUNC_sk_fullsock. It then adds BPF_FUNC_tcp_sock to get a bpf_tcp_sock pointer from a bpf_sock pointer. The current use case is to allow a cg_skb_bpf_prog to provide per cgroup traffic policing/shaping. Please see individual patch for details. v2: - Patch 1 depends on commit d623876646be ("bpf: Fix narrow load on a bpf_sock returned from sk_lookup()") in the bpf branch. - Add sk_to_full_sk() to bpf_sk_fullsock() and bpf_tcp_sock() such that there is a way to access the listener's sk and tcp_sk when __sk_buff->sk is a request_sock. The comments in the uapi bpf.h is updated accordingly. - bpf_ctx_range_till() is used in bpf_sock_common_is_valid_access() in patch 1. Saved a few lines. - Patch 2 is new in v2 and it adds "state", "dst_ip4", "dst_ip6" and "dst_port" to the bpf_sock. Narrow load is allowed on them. The "state" (i.e. sk_state) has already been used in INET_DIAG (e.g. ss -t) and getsockopt(TCP_INFO). - While at it in the new patch 2, also allow narrow load on some existing fields of the bpf_sock, which are "family", "type", "protocol" and "src_port". Only allow loading from first byte for now. i.e. does not allow narrow load starting from the 2nd byte. - Add some narrow load tests to the test_verifier's sock.c ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
author: Alexei Starovoitov <ast@kernel.org> 2019-02-10 19:46:17 -0800
committer: Alexei Starovoitov <ast@kernel.org> 2019-02-10 19:46:18 -0800
commit: d105fa983c582ab92923b160cc7e9d19e5d9ce3c (patch)
tree: acbb0d1786bcccde256ee57a9be1fd731885c1cf /include/uapi/linux
parent: 5f4566498dee5e38e36a015a968c22ed21568f0b (diff)
parent: e0b27b3f97b8fce620331baad563833617c1f303 (diff)
download: lwn-d105fa983c582ab92923b160cc7e9d19e5d9ce3c.tar.gz
lwn-d105fa983c582ab92923b160cc7e9d19e5d9ce3c.zip
1 files changed, 65 insertions, 7 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1777fa0c61e4..25c8c0e62ecf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2329,6 +2329,23 @@ union bpf_attr {
  *		"**y**".
  *	Return
  *		0
+ *
+ * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
+ *	Description
+ *		This helper gets a **struct bpf_sock** pointer such
+ *		that all the fields in bpf_sock can be accessed.
+ *	Return
+ *		A **struct bpf_sock** pointer on success, or NULL in
+ *		case of failure.
+ *
+ * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
+ *	Description
+ *		This helper gets a **struct bpf_tcp_sock** pointer from a
+ *		**struct bpf_sock** pointer.
+ *
+ *	Return
+ *		A **struct bpf_tcp_sock** pointer on success, or NULL in
+ *		case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2425,7 +2442,9 @@ union bpf_attr {
 	FN(msg_pop_data),		\
 	FN(rc_pointer_rel),		\
 	FN(spin_lock),			\
-	FN(spin_unlock),
+	FN(spin_unlock),		\
+	FN(sk_fullsock),		\
+	FN(tcp_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -2545,6 +2564,7 @@ struct __sk_buff {
 	__u64 tstamp;
 	__u32 wire_len;
 	__u32 gso_segs;
+	__bpf_md_ptr(struct bpf_sock *, sk);
 };
 
 struct bpf_tunnel_key {
@@ -2596,14 +2616,52 @@ struct bpf_sock {
 	__u32 protocol;
 	__u32 mark;
 	__u32 priority;
-	__u32 src_ip4;		/* Allows 1,2,4-byte read.
-				 * Stored in network byte order.
+	/* IP address also allows 1 and 2 bytes access */
+	__u32 src_ip4;
+	__u32 src_ip6[4];
+	__u32 src_port;		/* host byte order */
+	__u32 dst_port;		/* network byte order */
+	__u32 dst_ip4;
+	__u32 dst_ip6[4];
+	__u32 state;
+};
+
+struct bpf_tcp_sock {
+	__u32 snd_cwnd;		/* Sending congestion window		*/
+	__u32 srtt_us;		/* smoothed round trip time << 3 in usecs */
+	__u32 rtt_min;
+	__u32 snd_ssthresh;	/* Slow start size threshold		*/
+	__u32 rcv_nxt;		/* What we want to receive next		*/
+	__u32 snd_nxt;		/* Next sequence we send		*/
+	__u32 snd_una;		/* First byte we want an ack for	*/
+	__u32 mss_cache;	/* Cached effective mss, not including SACKS */
+	__u32 ecn_flags;	/* ECN status bits.			*/
+	__u32 rate_delivered;	/* saved rate sample: packets delivered */
+	__u32 rate_interval_us;	/* saved rate sample: time elapsed */
+	__u32 packets_out;	/* Packets which are "in flight"	*/
+	__u32 retrans_out;	/* Retransmitted packets out		*/
+	__u32 total_retrans;	/* Total retransmits for entire connection */
+	__u32 segs_in;		/* RFC4898 tcpEStatsPerfSegsIn
+				 * total number of segments in.
 				 */
-	__u32 src_ip6[4];	/* Allows 1,2,4-byte read.
-				 * Stored in network byte order.
+	__u32 data_segs_in;	/* RFC4898 tcpEStatsPerfDataSegsIn
+				 * total number of data segments in.
+				 */
+	__u32 segs_out;		/* RFC4898 tcpEStatsPerfSegsOut
+				 * The total number of segments sent.
+				 */
+	__u32 data_segs_out;	/* RFC4898 tcpEStatsPerfDataSegsOut
+				 * total number of data segments sent.
+				 */
+	__u32 lost_out;		/* Lost packets			*/
+	__u32 sacked_out;	/* SACK'd packets			*/
+	__u64 bytes_received;	/* RFC4898 tcpEStatsAppHCThruOctetsReceived
+				 * sum(delta(rcv_nxt)), or how many bytes
+				 * were acked.
 				 */
-	__u32 src_port;		/* Allows 4-byte read.
-				 * Stored in host byte order
+	__u64 bytes_acked;	/* RFC4898 tcpEStatsAppHCThruOctetsAcked
+				 * sum(delta(snd_una)), or how many bytes
+				 * were acked.
 				 */
 };
author	Alexei Starovoitov <ast@kernel.org>	2019-02-10 19:46:17 -0800
committer	Alexei Starovoitov <ast@kernel.org>	2019-02-10 19:46:18 -0800
commit	d105fa983c582ab92923b160cc7e9d19e5d9ce3c (patch)
tree	acbb0d1786bcccde256ee57a9be1fd731885c1cf /include/uapi/linux
parent	5f4566498dee5e38e36a015a968c22ed21568f0b (diff)
parent	e0b27b3f97b8fce620331baad563833617c1f303 (diff)
download	lwn-d105fa983c582ab92923b160cc7e9d19e5d9ce3c.tar.gz lwn-d105fa983c582ab92923b160cc7e9d19e5d9ce3c.zip