summaryrefslogtreecommitdiff
path: root/include/uapi/linux
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2019-02-10 19:46:17 -0800
committerAlexei Starovoitov <ast@kernel.org>2019-02-10 19:46:18 -0800
commitd105fa983c582ab92923b160cc7e9d19e5d9ce3c (patch)
treeacbb0d1786bcccde256ee57a9be1fd731885c1cf /include/uapi/linux
parent5f4566498dee5e38e36a015a968c22ed21568f0b (diff)
parente0b27b3f97b8fce620331baad563833617c1f303 (diff)
downloadlwn-d105fa983c582ab92923b160cc7e9d19e5d9ce3c.tar.gz
lwn-d105fa983c582ab92923b160cc7e9d19e5d9ce3c.zip
Merge branch 'skb_sk-sk_fullsock-tcp_sock'
Martin KaFai Lau says: ==================== This series adds __sk_buff->sk, "struct bpf_tcp_sock", BPF_FUNC_sk_fullsock and BPF_FUNC_tcp_sock. Together, they provide a common way to expose the members of "struct tcp_sock" and "struct bpf_sock" for the bpf_prog to access. The patch series first adds a bpf_sock pointer to __sk_buff and a new helper BPF_FUNC_sk_fullsock. It then adds BPF_FUNC_tcp_sock to get a bpf_tcp_sock pointer from a bpf_sock pointer. The current use case is to allow a cg_skb_bpf_prog to provide per cgroup traffic policing/shaping. Please see individual patch for details. v2: - Patch 1 depends on commit d623876646be ("bpf: Fix narrow load on a bpf_sock returned from sk_lookup()") in the bpf branch. - Add sk_to_full_sk() to bpf_sk_fullsock() and bpf_tcp_sock() such that there is a way to access the listener's sk and tcp_sk when __sk_buff->sk is a request_sock. The comments in the uapi bpf.h is updated accordingly. - bpf_ctx_range_till() is used in bpf_sock_common_is_valid_access() in patch 1. Saved a few lines. - Patch 2 is new in v2 and it adds "state", "dst_ip4", "dst_ip6" and "dst_port" to the bpf_sock. Narrow load is allowed on them. The "state" (i.e. sk_state) has already been used in INET_DIAG (e.g. ss -t) and getsockopt(TCP_INFO). - While at it in the new patch 2, also allow narrow load on some existing fields of the bpf_sock, which are "family", "type", "protocol" and "src_port". Only allow loading from first byte for now. i.e. does not allow narrow load starting from the 2nd byte. - Add some narrow load tests to the test_verifier's sock.c ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'include/uapi/linux')
-rw-r--r--include/uapi/linux/bpf.h72
1 files changed, 65 insertions, 7 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1777fa0c61e4..25c8c0e62ecf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2329,6 +2329,23 @@ union bpf_attr {
* "**y**".
* Return
* 0
+ *
+ * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
+ * Description
+ * This helper gets a **struct bpf_sock** pointer such
+ * that all the fields in bpf_sock can be accessed.
+ * Return
+ * A **struct bpf_sock** pointer on success, or NULL in
+ * case of failure.
+ *
+ * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
+ * Description
+ * This helper gets a **struct bpf_tcp_sock** pointer from a
+ * **struct bpf_sock** pointer.
+ *
+ * Return
+ * A **struct bpf_tcp_sock** pointer on success, or NULL in
+ * case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2425,7 +2442,9 @@ union bpf_attr {
FN(msg_pop_data), \
FN(rc_pointer_rel), \
FN(spin_lock), \
- FN(spin_unlock),
+ FN(spin_unlock), \
+ FN(sk_fullsock), \
+ FN(tcp_sock),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2545,6 +2564,7 @@ struct __sk_buff {
__u64 tstamp;
__u32 wire_len;
__u32 gso_segs;
+ __bpf_md_ptr(struct bpf_sock *, sk);
};
struct bpf_tunnel_key {
@@ -2596,14 +2616,52 @@ struct bpf_sock {
__u32 protocol;
__u32 mark;
__u32 priority;
- __u32 src_ip4; /* Allows 1,2,4-byte read.
- * Stored in network byte order.
+ /* IP address also allows 1 and 2 bytes access */
+ __u32 src_ip4;
+ __u32 src_ip6[4];
+ __u32 src_port; /* host byte order */
+ __u32 dst_port; /* network byte order */
+ __u32 dst_ip4;
+ __u32 dst_ip6[4];
+ __u32 state;
+};
+
+struct bpf_tcp_sock {
+ __u32 snd_cwnd; /* Sending congestion window */
+ __u32 srtt_us; /* smoothed round trip time << 3 in usecs */
+ __u32 rtt_min;
+ __u32 snd_ssthresh; /* Slow start size threshold */
+ __u32 rcv_nxt; /* What we want to receive next */
+ __u32 snd_nxt; /* Next sequence we send */
+ __u32 snd_una; /* First byte we want an ack for */
+ __u32 mss_cache; /* Cached effective mss, not including SACKS */
+ __u32 ecn_flags; /* ECN status bits. */
+ __u32 rate_delivered; /* saved rate sample: packets delivered */
+ __u32 rate_interval_us; /* saved rate sample: time elapsed */
+ __u32 packets_out; /* Packets which are "in flight" */
+ __u32 retrans_out; /* Retransmitted packets out */
+ __u32 total_retrans; /* Total retransmits for entire connection */
+ __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn
+ * total number of segments in.
*/
- __u32 src_ip6[4]; /* Allows 1,2,4-byte read.
- * Stored in network byte order.
+ __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn
+ * total number of data segments in.
+ */
+ __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
+ * The total number of segments sent.
+ */
+ __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
+ * total number of data segments sent.
+ */
+ __u32 lost_out; /* Lost packets */
+ __u32 sacked_out; /* SACK'd packets */
+ __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived
+ * sum(delta(rcv_nxt)), or how many bytes
+ * were acked.
*/
- __u32 src_port; /* Allows 4-byte read.
- * Stored in host byte order
+ __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
+ * sum(delta(snd_una)), or how many bytes
+ * were acked.
*/
};