summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorAlexei Starovoitov <ast@kernel.org>2019-02-10 19:46:17 -0800
committerAlexei Starovoitov <ast@kernel.org>2019-02-10 19:46:18 -0800
commitd105fa983c582ab92923b160cc7e9d19e5d9ce3c (patch)
treeacbb0d1786bcccde256ee57a9be1fd731885c1cf /include
parent5f4566498dee5e38e36a015a968c22ed21568f0b (diff)
parente0b27b3f97b8fce620331baad563833617c1f303 (diff)
downloadlwn-d105fa983c582ab92923b160cc7e9d19e5d9ce3c.tar.gz
lwn-d105fa983c582ab92923b160cc7e9d19e5d9ce3c.zip
Merge branch 'skb_sk-sk_fullsock-tcp_sock'
Martin KaFai Lau says: ==================== This series adds __sk_buff->sk, "struct bpf_tcp_sock", BPF_FUNC_sk_fullsock and BPF_FUNC_tcp_sock. Together, they provide a common way to expose the members of "struct tcp_sock" and "struct bpf_sock" for the bpf_prog to access. The patch series first adds a bpf_sock pointer to __sk_buff and a new helper BPF_FUNC_sk_fullsock. It then adds BPF_FUNC_tcp_sock to get a bpf_tcp_sock pointer from a bpf_sock pointer. The current use case is to allow a cg_skb_bpf_prog to provide per cgroup traffic policing/shaping. Please see individual patch for details. v2: - Patch 1 depends on commit d623876646be ("bpf: Fix narrow load on a bpf_sock returned from sk_lookup()") in the bpf branch. - Add sk_to_full_sk() to bpf_sk_fullsock() and bpf_tcp_sock() such that there is a way to access the listener's sk and tcp_sk when __sk_buff->sk is a request_sock. The comments in the uapi bpf.h is updated accordingly. - bpf_ctx_range_till() is used in bpf_sock_common_is_valid_access() in patch 1. Saved a few lines. - Patch 2 is new in v2 and it adds "state", "dst_ip4", "dst_ip6" and "dst_port" to the bpf_sock. Narrow load is allowed on them. The "state" (i.e. sk_state) has already been used in INET_DIAG (e.g. ss -t) and getsockopt(TCP_INFO). - While at it in the new patch 2, also allow narrow load on some existing fields of the bpf_sock, which are "family", "type", "protocol" and "src_port". Only allow loading from first byte for now. i.e. does not allow narrow load starting from the 2nd byte. - Add some narrow load tests to the test_verifier's sock.c ==================== Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Diffstat (limited to 'include')
-rw-r--r--include/linux/bpf.h42
-rw-r--r--include/uapi/linux/bpf.h72
2 files changed, 107 insertions, 7 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index bd169a7bcc93..7f58828755fd 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -194,6 +194,7 @@ enum bpf_arg_type {
ARG_ANYTHING, /* any (initialized) argument is ok */
ARG_PTR_TO_SOCKET, /* pointer to bpf_sock */
ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */
+ ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */
};
/* type of values returned from helper functions */
@@ -203,6 +204,7 @@ enum bpf_return_type {
RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */
RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */
RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */
+ RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */
};
/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -256,6 +258,10 @@ enum bpf_reg_type {
PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */
PTR_TO_SOCKET, /* reg points to struct bpf_sock */
PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */
+ PTR_TO_SOCK_COMMON, /* reg points to sock_common */
+ PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
+ PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
+ PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
};
/* The information passed from prog-specific *_is_valid_access
@@ -920,6 +926,9 @@ void bpf_user_rnd_init_once(void);
u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
#if defined(CONFIG_NET)
+bool bpf_sock_common_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ struct bpf_insn_access_aux *info);
bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
struct bpf_insn_access_aux *info);
u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
@@ -928,6 +937,12 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
struct bpf_prog *prog,
u32 *target_size);
#else
+static inline bool bpf_sock_common_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
+{
+ return false;
+}
static inline bool bpf_sock_is_valid_access(int off, int size,
enum bpf_access_type type,
struct bpf_insn_access_aux *info)
@@ -944,4 +959,31 @@ static inline u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
}
#endif
+#ifdef CONFIG_INET
+bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type,
+ struct bpf_insn_access_aux *info);
+
+u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size);
+#else
+static inline bool bpf_tcp_sock_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ struct bpf_insn_access_aux *info)
+{
+ return false;
+}
+
+static inline u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
+ const struct bpf_insn *si,
+ struct bpf_insn *insn_buf,
+ struct bpf_prog *prog,
+ u32 *target_size)
+{
+ return 0;
+}
+#endif /* CONFIG_INET */
+
#endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1777fa0c61e4..25c8c0e62ecf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2329,6 +2329,23 @@ union bpf_attr {
* "**y**".
* Return
* 0
+ *
+ * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
+ * Description
+ * This helper gets a **struct bpf_sock** pointer such
+ * that all the fields in bpf_sock can be accessed.
+ * Return
+ * A **struct bpf_sock** pointer on success, or NULL in
+ * case of failure.
+ *
+ * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
+ * Description
+ * This helper gets a **struct bpf_tcp_sock** pointer from a
+ * **struct bpf_sock** pointer.
+ *
+ * Return
+ * A **struct bpf_tcp_sock** pointer on success, or NULL in
+ * case of failure.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2425,7 +2442,9 @@ union bpf_attr {
FN(msg_pop_data), \
FN(rc_pointer_rel), \
FN(spin_lock), \
- FN(spin_unlock),
+ FN(spin_unlock), \
+ FN(sk_fullsock), \
+ FN(tcp_sock),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
@@ -2545,6 +2564,7 @@ struct __sk_buff {
__u64 tstamp;
__u32 wire_len;
__u32 gso_segs;
+ __bpf_md_ptr(struct bpf_sock *, sk);
};
struct bpf_tunnel_key {
@@ -2596,14 +2616,52 @@ struct bpf_sock {
__u32 protocol;
__u32 mark;
__u32 priority;
- __u32 src_ip4; /* Allows 1,2,4-byte read.
- * Stored in network byte order.
+ /* IP address also allows 1 and 2 bytes access */
+ __u32 src_ip4;
+ __u32 src_ip6[4];
+ __u32 src_port; /* host byte order */
+ __u32 dst_port; /* network byte order */
+ __u32 dst_ip4;
+ __u32 dst_ip6[4];
+ __u32 state;
+};
+
+struct bpf_tcp_sock {
+ __u32 snd_cwnd; /* Sending congestion window */
+ __u32 srtt_us; /* smoothed round trip time << 3 in usecs */
+ __u32 rtt_min;
+ __u32 snd_ssthresh; /* Slow start size threshold */
+ __u32 rcv_nxt; /* What we want to receive next */
+ __u32 snd_nxt; /* Next sequence we send */
+ __u32 snd_una; /* First byte we want an ack for */
+ __u32 mss_cache; /* Cached effective mss, not including SACKS */
+ __u32 ecn_flags; /* ECN status bits. */
+ __u32 rate_delivered; /* saved rate sample: packets delivered */
+ __u32 rate_interval_us; /* saved rate sample: time elapsed */
+ __u32 packets_out; /* Packets which are "in flight" */
+ __u32 retrans_out; /* Retransmitted packets out */
+ __u32 total_retrans; /* Total retransmits for entire connection */
+ __u32 segs_in; /* RFC4898 tcpEStatsPerfSegsIn
+ * total number of segments in.
*/
- __u32 src_ip6[4]; /* Allows 1,2,4-byte read.
- * Stored in network byte order.
+ __u32 data_segs_in; /* RFC4898 tcpEStatsPerfDataSegsIn
+ * total number of data segments in.
+ */
+ __u32 segs_out; /* RFC4898 tcpEStatsPerfSegsOut
+ * The total number of segments sent.
+ */
+ __u32 data_segs_out; /* RFC4898 tcpEStatsPerfDataSegsOut
+ * total number of data segments sent.
+ */
+ __u32 lost_out; /* Lost packets */
+ __u32 sacked_out; /* SACK'd packets */
+ __u64 bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived
+ * sum(delta(rcv_nxt)), or how many bytes
+ * were acked.
*/
- __u32 src_port; /* Allows 4-byte read.
- * Stored in host byte order
+ __u64 bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked
+ * sum(delta(snd_una)), or how many bytes
+ * were acked.
*/
};