diff options
author | David S. Miller <davem@davemloft.net> | 2016-12-02 13:46:21 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-12-02 13:46:21 -0500 |
commit | b5b5eca9aa4166779e184685dcd838f4d0775e76 (patch) | |
tree | 546165a4619501f722cb551411164e1b88e693b3 | |
parent | 7f7bf1606fa8fa0e3aecdeac0ba8005f2a0fbdef (diff) | |
parent | 554ae6e792ef38020b80b4d5127c51d510c0918f (diff) | |
download | lwn-b5b5eca9aa4166779e184685dcd838f4d0775e76.tar.gz lwn-b5b5eca9aa4166779e184685dcd838f4d0775e76.zip |
Merge branch 'bpf-support-for-sockets'
David Ahern says:
====================
net: Add bpf support for sockets
The recently added VRF support in Linux leverages the bind-to-device
API for programs to specify an L3 domain for a socket. While
SO_BINDTODEVICE has been around for ages, not every ipv4/ipv6 capable
program has support for it. Even for those programs that do support it,
the API requires processes to be started as root (CAP_NET_RAW) which
is not desirable from a general security perspective.
This patch set leverages Daniel Mack's work to attach bpf programs to
a cgroup to provide a capability to set sk_bound_dev_if for all
AF_INET{6} sockets opened by a process in a cgroup when the sockets
are allocated.
For example:
1. configure vrf (e.g., using ifupdown2)
auto eth0
iface eth0 inet dhcp
vrf mgmt
auto mgmt
iface mgmt
vrf-table auto
2. configure cgroup
mount -t cgroup2 none /tmp/cgroupv2
mkdir /tmp/cgroupv2/mgmt
test_cgrp2_sock /tmp/cgroupv2/mgmt 15
3. set shell into cgroup (e.g., can be done at login using pam)
echo $$ >> /tmp/cgroupv2/mgmt/cgroup.procs
At this point all commands run in the shell (e.g, apt) have sockets
automatically bound to the VRF (see output of ss -ap 'dev == <vrf>'),
including processes not running as root.
This capability enables running any program in a VRF context and is key
to deploying Management VRF, a fundamental configuration for networking
gear, with any Linux OS installation.
This patchset also exports the socket family, type and protocol as
read-only allowing bpf filters to deny a process in a cgroup the ability
to open specific types of AF_INET or AF_INET6 sockets.
v7
- comments from Alexei
v6
- add export of socket family, type and protocol
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/bpf-cgroup.h | 60 | ||||
-rw-r--r-- | include/net/sock.h | 15 | ||||
-rw-r--r-- | include/uapi/linux/bpf.h | 9 | ||||
-rw-r--r-- | kernel/bpf/cgroup.c | 43 | ||||
-rw-r--r-- | kernel/bpf/syscall.c | 33 | ||||
-rw-r--r-- | net/core/filter.c | 83 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 12 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 8 | ||||
-rw-r--r-- | samples/bpf/Makefile | 6 | ||||
-rw-r--r-- | samples/bpf/bpf_load.c | 14 | ||||
-rw-r--r-- | samples/bpf/bpf_load.h | 1 | ||||
-rw-r--r-- | samples/bpf/sock_flags_kern.c | 44 | ||||
-rw-r--r-- | samples/bpf/test_cgrp2_sock.c | 83 | ||||
-rwxr-xr-x | samples/bpf/test_cgrp2_sock.sh | 47 | ||||
-rw-r--r-- | samples/bpf/test_cgrp2_sock2.c | 66 | ||||
-rwxr-xr-x | samples/bpf/test_cgrp2_sock2.sh | 81 |
16 files changed, 559 insertions, 46 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 0cf1adfadd2d..7b6e5d168c95 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -36,31 +36,44 @@ void cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type); -int __cgroup_bpf_run_filter(struct sock *sk, - struct sk_buff *skb, - enum bpf_attach_type type); - -/* Wrappers for __cgroup_bpf_run_filter() guarded by cgroup_bpf_enabled. */ -#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) \ -({ \ - int __ret = 0; \ - if (cgroup_bpf_enabled) \ - __ret = __cgroup_bpf_run_filter(sk, skb, \ - BPF_CGROUP_INET_INGRESS); \ - \ - __ret; \ +int __cgroup_bpf_run_filter_skb(struct sock *sk, + struct sk_buff *skb, + enum bpf_attach_type type); + +int __cgroup_bpf_run_filter_sk(struct sock *sk, + enum bpf_attach_type type); + +/* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ +#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_run_filter_skb(sk, skb, \ + BPF_CGROUP_INET_INGRESS); \ + \ + __ret; \ }) -#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) \ -({ \ - int __ret = 0; \ - if (cgroup_bpf_enabled && sk && sk == skb->sk) { \ - typeof(sk) __sk = sk_to_full_sk(sk); \ - if (sk_fullsock(__sk)) \ - __ret = __cgroup_bpf_run_filter(__sk, skb, \ - BPF_CGROUP_INET_EGRESS); \ - } \ - __ret; \ +#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled && sk && sk == skb->sk) { \ + typeof(sk) __sk = sk_to_full_sk(sk); \ + if (sk_fullsock(__sk)) \ + __ret = __cgroup_bpf_run_filter_skb(__sk, skb, \ + BPF_CGROUP_INET_EGRESS); \ + } \ + __ret; \ +}) + +#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled && sk) { \ + __ret = __cgroup_bpf_run_filter_sk(sk, \ + BPF_CGROUP_INET_SOCK_CREATE); \ + } \ + __ret; \ }) #else @@ -72,6 +85,7 @@ static inline void cgroup_bpf_inherit(struct cgroup *cgrp, #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #endif /* CONFIG_CGROUP_BPF */ diff --git a/include/net/sock.h b/include/net/sock.h index 442cbb118a07..69afda6bea15 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -389,6 +389,21 @@ struct sock { * Because of non atomicity rules, all * changes are protected by socket lock. */ + unsigned int __sk_flags_offset[0]; +#ifdef __BIG_ENDIAN_BITFIELD +#define SK_FL_PROTO_SHIFT 16 +#define SK_FL_PROTO_MASK 0x00ff0000 + +#define SK_FL_TYPE_SHIFT 0 +#define SK_FL_TYPE_MASK 0x0000ffff +#else +#define SK_FL_PROTO_SHIFT 8 +#define SK_FL_PROTO_MASK 0x0000ff00 + +#define SK_FL_TYPE_SHIFT 16 +#define SK_FL_TYPE_MASK 0xffff0000 +#endif + kmemcheck_bitfield_begin(flags); unsigned int sk_padding : 2, sk_no_check_tx : 1, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 22ac82792687..6123d9b8e828 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -101,6 +101,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_XDP, BPF_PROG_TYPE_PERF_EVENT, BPF_PROG_TYPE_CGROUP_SKB, + BPF_PROG_TYPE_CGROUP_SOCK, BPF_PROG_TYPE_LWT_IN, BPF_PROG_TYPE_LWT_OUT, BPF_PROG_TYPE_LWT_XMIT, @@ -109,6 +110,7 @@ enum bpf_prog_type { enum bpf_attach_type { BPF_CGROUP_INET_INGRESS, BPF_CGROUP_INET_EGRESS, + BPF_CGROUP_INET_SOCK_CREATE, __MAX_BPF_ATTACH_TYPE }; @@ -567,6 +569,13 @@ enum bpf_ret_code { /* >127 are reserved for prog type specific return codes */ }; +struct bpf_sock { + __u32 bound_dev_if; + __u32 family; + __u32 type; + __u32 protocol; +}; + /* User return codes for XDP prog type. * A valid XDP program must return one of these defined values. All other * return codes are reserved for future use. Unknown return codes will result diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 8c784f8c67cd..a515f7b007c6 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -118,7 +118,7 @@ void __cgroup_bpf_update(struct cgroup *cgrp, } /** - * __cgroup_bpf_run_filter() - Run a program for packet filtering + * __cgroup_bpf_run_filter_skb() - Run a program for packet filtering * @sk: The socken sending or receiving traffic * @skb: The skb that is being sent or received * @type: The type of program to be exectuted @@ -132,9 +132,9 @@ void __cgroup_bpf_update(struct cgroup *cgrp, * This function will return %-EPERM if any if an attached program was found * and if it returned != 1 during execution. In all other cases, 0 is returned. */ -int __cgroup_bpf_run_filter(struct sock *sk, - struct sk_buff *skb, - enum bpf_attach_type type) +int __cgroup_bpf_run_filter_skb(struct sock *sk, + struct sk_buff *skb, + enum bpf_attach_type type) { struct bpf_prog *prog; struct cgroup *cgrp; @@ -164,4 +164,37 @@ int __cgroup_bpf_run_filter(struct sock *sk, return ret; } -EXPORT_SYMBOL(__cgroup_bpf_run_filter); +EXPORT_SYMBOL(__cgroup_bpf_run_filter_skb); + +/** + * __cgroup_bpf_run_filter_sk() - Run a program on a sock + * @sk: sock structure to manipulate + * @type: The type of program to be exectuted + * + * socket is passed is expected to be of type INET or INET6. + * + * The program type passed in via @type must be suitable for sock + * filtering. No further check is performed to assert that. + * + * This function will return %-EPERM if any if an attached program was found + * and if it returned != 1 during execution. In all other cases, 0 is returned. + */ +int __cgroup_bpf_run_filter_sk(struct sock *sk, + enum bpf_attach_type type) +{ + struct cgroup *cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data); + struct bpf_prog *prog; + int ret = 0; + + + rcu_read_lock(); + + prog = rcu_dereference(cgrp->bpf.effective[type]); + if (prog) + ret = BPF_PROG_RUN(prog, sk) == 1 ? 0 : -EPERM; + + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(__cgroup_bpf_run_filter_sk); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 4caa18e6860a..85af86c496cd 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -856,6 +856,7 @@ static int bpf_prog_attach(const union bpf_attr *attr) { struct bpf_prog *prog; struct cgroup *cgrp; + enum bpf_prog_type ptype; if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -866,25 +867,28 @@ static int bpf_prog_attach(const union bpf_attr *attr) switch (attr->attach_type) { case BPF_CGROUP_INET_INGRESS: case BPF_CGROUP_INET_EGRESS: - prog = bpf_prog_get_type(attr->attach_bpf_fd, - BPF_PROG_TYPE_CGROUP_SKB); - if (IS_ERR(prog)) - return PTR_ERR(prog); - - cgrp = cgroup_get_from_fd(attr->target_fd); - if (IS_ERR(cgrp)) { - bpf_prog_put(prog); - return PTR_ERR(cgrp); - } - - cgroup_bpf_update(cgrp, prog, attr->attach_type); - cgroup_put(cgrp); + ptype = BPF_PROG_TYPE_CGROUP_SKB; + break; + case BPF_CGROUP_INET_SOCK_CREATE: + ptype = BPF_PROG_TYPE_CGROUP_SOCK; break; - default: return -EINVAL; } + prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); + if (IS_ERR(prog)) + return PTR_ERR(prog); + + cgrp = cgroup_get_from_fd(attr->target_fd); + if (IS_ERR(cgrp)) { + bpf_prog_put(prog); + return PTR_ERR(cgrp); + } + + cgroup_bpf_update(cgrp, prog, attr->attach_type); + cgroup_put(cgrp); + return 0; } @@ -903,6 +907,7 @@ static int bpf_prog_detach(const union bpf_attr *attr) switch (attr->attach_type) { case BPF_CGROUP_INET_INGRESS: case BPF_CGROUP_INET_EGRESS: + case BPF_CGROUP_INET_SOCK_CREATE: cgrp = cgroup_get_from_fd(attr->target_fd); if (IS_ERR(cgrp)) return PTR_ERR(cgrp); diff --git a/net/core/filter.c b/net/core/filter.c index 1c4d0faf22c8..56b43587d200 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2818,6 +2818,32 @@ static bool lwt_is_valid_access(int off, int size, return __is_valid_access(off, size, type); } +static bool sock_filter_is_valid_access(int off, int size, + enum bpf_access_type type, + enum bpf_reg_type *reg_type) +{ + if (type == BPF_WRITE) { + switch (off) { + case offsetof(struct bpf_sock, bound_dev_if): + break; + default: + return false; + } + } + + if (off < 0 || off + size > sizeof(struct bpf_sock)) + return false; + + /* The verifier guarantees that size > 0. */ + if (off % size != 0) + return false; + + if (size != sizeof(__u32)) + return false; + + return true; +} + static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, const struct bpf_prog *prog) { @@ -3076,6 +3102,51 @@ static u32 sk_filter_convert_ctx_access(enum bpf_access_type type, int dst_reg, return insn - insn_buf; } +static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, + int dst_reg, int src_reg, + int ctx_off, + struct bpf_insn *insn_buf, + struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + switch (ctx_off) { + case offsetof(struct bpf_sock, bound_dev_if): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_bound_dev_if) != 4); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sock, sk_bound_dev_if)); + else + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sock, sk_bound_dev_if)); + break; + + case offsetof(struct bpf_sock, family): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2); + + *insn++ = BPF_LDX_MEM(BPF_H, dst_reg, src_reg, + offsetof(struct sock, sk_family)); + break; + + case offsetof(struct bpf_sock, type): + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sock, __sk_flags_offset)); + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_TYPE_MASK); + *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_TYPE_SHIFT); + break; + + case offsetof(struct bpf_sock, protocol): + *insn++ = BPF_LDX_MEM(BPF_W, dst_reg, src_reg, + offsetof(struct sock, __sk_flags_offset)); + *insn++ = BPF_ALU32_IMM(BPF_AND, dst_reg, SK_FL_PROTO_MASK); + *insn++ = BPF_ALU32_IMM(BPF_RSH, dst_reg, SK_FL_PROTO_SHIFT); + break; + } + + return insn - insn_buf; +} + static u32 tc_cls_act_convert_ctx_access(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn_buf, @@ -3162,6 +3233,12 @@ static const struct bpf_verifier_ops lwt_xmit_ops = { .gen_prologue = tc_cls_act_prologue, }; +static const struct bpf_verifier_ops cg_sock_ops = { + .get_func_proto = sk_filter_func_proto, + .is_valid_access = sock_filter_is_valid_access, + .convert_ctx_access = sock_filter_convert_ctx_access, +}; + static struct bpf_prog_type_list sk_filter_type __read_mostly = { .ops = &sk_filter_ops, .type = BPF_PROG_TYPE_SOCKET_FILTER, @@ -3202,6 +3279,11 @@ static struct bpf_prog_type_list lwt_xmit_type __read_mostly = { .type = BPF_PROG_TYPE_LWT_XMIT, }; +static struct bpf_prog_type_list cg_sock_type __read_mostly = { + .ops = &cg_sock_ops, + .type = BPF_PROG_TYPE_CGROUP_SOCK +}; + static int __init register_sk_filter_ops(void) { bpf_register_prog_type(&sk_filter_type); @@ -3209,6 +3291,7 @@ static int __init register_sk_filter_ops(void) bpf_register_prog_type(&sched_act_type); bpf_register_prog_type(&xdp_type); bpf_register_prog_type(&cg_skb_type); + bpf_register_prog_type(&cg_sock_type); bpf_register_prog_type(&lwt_in_type); bpf_register_prog_type(&lwt_out_type); bpf_register_prog_type(&lwt_xmit_type); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 5ddf5cda07f4..24d2550492ee 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -374,8 +374,18 @@ lookup_protocol: if (sk->sk_prot->init) { err = sk->sk_prot->init(sk); - if (err) + if (err) { + sk_common_release(sk); + goto out; + } + } + + if (!kern) { + err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); + if (err) { sk_common_release(sk); + goto out; + } } out: return err; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d424f3a3737a..237e654ba717 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -258,6 +258,14 @@ lookup_protocol: goto out; } } + + if (!kern) { + err = BPF_CGROUP_RUN_PROG_INET_SOCK(sk); + if (err) { + sk_common_release(sk); + goto out; + } + } out: return err; out_rcu_unlock: diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 44532fbe4cae..bfc2cb88a1f7 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -23,6 +23,8 @@ hostprogs-y += map_perf_test hostprogs-y += test_overhead hostprogs-y += test_cgrp2_array_pin hostprogs-y += test_cgrp2_attach +hostprogs-y += test_cgrp2_sock +hostprogs-y += test_cgrp2_sock2 hostprogs-y += xdp1 hostprogs-y += xdp2 hostprogs-y += test_current_task_under_cgroup @@ -52,6 +54,8 @@ map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o test_cgrp2_attach-objs := libbpf.o test_cgrp2_attach.o +test_cgrp2_sock-objs := libbpf.o test_cgrp2_sock.o +test_cgrp2_sock2-objs := bpf_load.o libbpf.o test_cgrp2_sock2.o xdp1-objs := bpf_load.o libbpf.o xdp1_user.o # reuse xdp1 source intentionally xdp2-objs := bpf_load.o libbpf.o xdp1_user.o @@ -73,6 +77,7 @@ always += tracex3_kern.o always += tracex4_kern.o always += tracex5_kern.o always += tracex6_kern.o +always += sock_flags_kern.o always += test_probe_write_user_kern.o always += trace_output_kern.o always += tcbpf1_kern.o @@ -107,6 +112,7 @@ HOSTLOADLIBES_tracex3 += -lelf HOSTLOADLIBES_tracex4 += -lelf -lrt HOSTLOADLIBES_tracex5 += -lelf HOSTLOADLIBES_tracex6 += -lelf +HOSTLOADLIBES_test_cgrp2_sock2 += -lelf HOSTLOADLIBES_test_probe_write_user += -lelf HOSTLOADLIBES_trace_output += -lelf -lrt HOSTLOADLIBES_lathist += -lelf diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 62f54d6eb8bf..49b45ccbe153 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -52,6 +52,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; bool is_xdp = strncmp(event, "xdp", 3) == 0; bool is_perf_event = strncmp(event, "perf_event", 10) == 0; + bool is_cgroup_skb = strncmp(event, "cgroup/skb", 10) == 0; + bool is_cgroup_sk = strncmp(event, "cgroup/sock", 11) == 0; enum bpf_prog_type prog_type; char buf[256]; int fd, efd, err, id; @@ -72,6 +74,10 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_type = BPF_PROG_TYPE_XDP; } else if (is_perf_event) { prog_type = BPF_PROG_TYPE_PERF_EVENT; + } else if (is_cgroup_skb) { + prog_type = BPF_PROG_TYPE_CGROUP_SKB; + } else if (is_cgroup_sk) { + prog_type = BPF_PROG_TYPE_CGROUP_SOCK; } else { printf("Unknown event '%s'\n", event); return -1; @@ -85,7 +91,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_fd[prog_cnt++] = fd; - if (is_xdp || is_perf_event) + if (is_xdp || is_perf_event || is_cgroup_skb || is_cgroup_sk) return 0; if (is_socket) { @@ -334,7 +340,8 @@ int load_bpf_file(char *path) memcmp(shname_prog, "tracepoint/", 11) == 0 || memcmp(shname_prog, "xdp", 3) == 0 || memcmp(shname_prog, "perf_event", 10) == 0 || - memcmp(shname_prog, "socket", 6) == 0) + memcmp(shname_prog, "socket", 6) == 0 || + memcmp(shname_prog, "cgroup/", 7) == 0) load_and_attach(shname_prog, insns, data_prog->d_size); } } @@ -353,7 +360,8 @@ int load_bpf_file(char *path) memcmp(shname, "tracepoint/", 11) == 0 || memcmp(shname, "xdp", 3) == 0 || memcmp(shname, "perf_event", 10) == 0 || - memcmp(shname, "socket", 6) == 0) + memcmp(shname, "socket", 6) == 0 || + memcmp(shname, "cgroup/", 7) == 0) load_and_attach(shname, data->d_buf, data->d_size); } diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h index dfa57fe65c8e..4adeeef53ad6 100644 --- a/samples/bpf/bpf_load.h +++ b/samples/bpf/bpf_load.h @@ -7,6 +7,7 @@ extern int map_fd[MAX_MAPS]; extern int prog_fd[MAX_PROGS]; extern int event_fd[MAX_PROGS]; +extern int prog_cnt; /* parses elf file compiled by llvm .c->.o * . parses 'maps' section and creates maps via BPF syscall diff --git a/samples/bpf/sock_flags_kern.c b/samples/bpf/sock_flags_kern.c new file mode 100644 index 000000000000..533dd11a6baa --- /dev/null +++ b/samples/bpf/sock_flags_kern.c @@ -0,0 +1,44 @@ +#include <uapi/linux/bpf.h> +#include <linux/socket.h> +#include <linux/net.h> +#include <uapi/linux/in.h> +#include <uapi/linux/in6.h> +#include "bpf_helpers.h" + +SEC("cgroup/sock1") +int bpf_prog1(struct bpf_sock *sk) +{ + char fmt[] = "socket: family %d type %d protocol %d\n"; + + bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol); + + /* block PF_INET6, SOCK_RAW, IPPROTO_ICMPV6 sockets + * ie., make ping6 fail + */ + if (sk->family == PF_INET6 && + sk->type == SOCK_RAW && + sk->protocol == IPPROTO_ICMPV6) + return 0; + + return 1; +} + +SEC("cgroup/sock2") +int bpf_prog2(struct bpf_sock *sk) +{ + char fmt[] = "socket: family %d type %d protocol %d\n"; + + bpf_trace_printk(fmt, sizeof(fmt), sk->family, sk->type, sk->protocol); + + /* block PF_INET, SOCK_RAW, IPPROTO_ICMP sockets + * ie., make ping fail + */ + if (sk->family == PF_INET && + sk->type == SOCK_RAW && + sk->protocol == IPPROTO_ICMP) + return 0; + + return 1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/test_cgrp2_sock.c b/samples/bpf/test_cgrp2_sock.c new file mode 100644 index 000000000000..d467b3c1c55c --- /dev/null +++ b/samples/bpf/test_cgrp2_sock.c @@ -0,0 +1,83 @@ +/* eBPF example program: + * + * - Loads eBPF program + * + * The eBPF program sets the sk_bound_dev_if index in new AF_INET{6} + * sockets opened by processes in the cgroup. + * + * - Attaches the new program to a cgroup using BPF_PROG_ATTACH + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <net/if.h> +#include <linux/bpf.h> + +#include "libbpf.h" + +static int prog_load(int idx) +{ + struct bpf_insn prog[] = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_MOV64_IMM(BPF_REG_3, idx), + BPF_MOV64_IMM(BPF_REG_2, offsetof(struct bpf_sock, bound_dev_if)), + BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3, offsetof(struct bpf_sock, bound_dev_if)), + BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = verdict */ + BPF_EXIT_INSN(), + }; + + return bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, prog, sizeof(prog), + "GPL", 0); +} + +static int usage(const char *argv0) +{ + printf("Usage: %s cg-path device-index\n", argv0); + return EXIT_FAILURE; +} + +int main(int argc, char **argv) +{ + int cg_fd, prog_fd, ret; + unsigned int idx; + + if (argc < 2) + return usage(argv[0]); + + idx = if_nametoindex(argv[2]); + if (!idx) { + printf("Invalid device name\n"); + return EXIT_FAILURE; + } + + cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY); + if (cg_fd < 0) { + printf("Failed to open cgroup path: '%s'\n", strerror(errno)); + return EXIT_FAILURE; + } + + prog_fd = prog_load(idx); + printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf); + + if (prog_fd < 0) { + printf("Failed to load prog: '%s'\n", strerror(errno)); + return EXIT_FAILURE; + } + + ret = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_INET_SOCK_CREATE); + if (ret < 0) { + printf("Failed to attach prog to cgroup: '%s'\n", + strerror(errno)); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/samples/bpf/test_cgrp2_sock.sh b/samples/bpf/test_cgrp2_sock.sh new file mode 100755 index 000000000000..925fd467c7cc --- /dev/null +++ b/samples/bpf/test_cgrp2_sock.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +function config_device { + ip netns add at_ns0 + ip link add veth0 type veth peer name veth0b + ip link set veth0b up + ip link set veth0 netns at_ns0 + ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 + ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad + ip netns exec at_ns0 ip link set dev veth0 up + ip link add foo type vrf table 1234 + ip link set foo up + ip addr add 172.16.1.101/24 dev veth0b + ip addr add 2401:db00::2/64 dev veth0b nodad + ip link set veth0b master foo +} + +function attach_bpf { + rm -rf /tmp/cgroupv2 + mkdir -p /tmp/cgroupv2 + mount -t cgroup2 none /tmp/cgroupv2 + mkdir -p /tmp/cgroupv2/foo + test_cgrp2_sock /tmp/cgroupv2/foo foo + echo $$ >> /tmp/cgroupv2/foo/cgroup.procs +} + +function cleanup { + set +ex + ip netns delete at_ns0 + ip link del veth0 + ip link del foo + umount /tmp/cgroupv2 + rm -rf /tmp/cgroupv2 + set -ex +} + +function do_test { + ping -c1 -w1 172.16.1.100 + ping6 -c1 -w1 2401:db00::1 +} + +cleanup 2>/dev/null +config_device +attach_bpf +do_test +cleanup +echo "*** PASS ***" diff --git a/samples/bpf/test_cgrp2_sock2.c b/samples/bpf/test_cgrp2_sock2.c new file mode 100644 index 000000000000..455ef0d06e93 --- /dev/null +++ b/samples/bpf/test_cgrp2_sock2.c @@ -0,0 +1,66 @@ +/* eBPF example program: + * + * - Loads eBPF program + * + * The eBPF program loads a filter from file and attaches the + * program to a cgroup using BPF_PROG_ATTACH + */ + +#define _GNU_SOURCE + +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <string.h> +#include <unistd.h> +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <net/if.h> +#include <linux/bpf.h> + +#include "libbpf.h" +#include "bpf_load.h" + +static int usage(const char *argv0) +{ + printf("Usage: %s cg-path filter-path [filter-id]\n", argv0); + return EXIT_FAILURE; +} + +int main(int argc, char **argv) +{ + int cg_fd, ret, filter_id = 0; + + if (argc < 3) + return usage(argv[0]); + + cg_fd = open(argv[1], O_DIRECTORY | O_RDONLY); + if (cg_fd < 0) { + printf("Failed to open cgroup path: '%s'\n", strerror(errno)); + return EXIT_FAILURE; + } + + if (load_bpf_file(argv[2])) + return EXIT_FAILURE; + + printf("Output from kernel verifier:\n%s\n-------\n", bpf_log_buf); + + if (argc > 3) + filter_id = atoi(argv[3]); + + if (filter_id > prog_cnt) { + printf("Invalid program id; program not found in file\n"); + return EXIT_FAILURE; + } + + ret = bpf_prog_attach(prog_fd[filter_id], cg_fd, + BPF_CGROUP_INET_SOCK_CREATE); + if (ret < 0) { + printf("Failed to attach prog to cgroup: '%s'\n", + strerror(errno)); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} diff --git a/samples/bpf/test_cgrp2_sock2.sh b/samples/bpf/test_cgrp2_sock2.sh new file mode 100755 index 000000000000..891f12a0e26f --- /dev/null +++ b/samples/bpf/test_cgrp2_sock2.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +function config_device { + ip netns add at_ns0 + ip link add veth0 type veth peer name veth0b + ip link set veth0b up + ip link set veth0 netns at_ns0 + ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0 + ip netns exec at_ns0 ip addr add 2401:db00::1/64 dev veth0 nodad + ip netns exec at_ns0 ip link set dev veth0 up + ip addr add 172.16.1.101/24 dev veth0b + ip addr add 2401:db00::2/64 dev veth0b nodad +} + +function config_cgroup { + rm -rf /tmp/cgroupv2 + mkdir -p /tmp/cgroupv2 + mount -t cgroup2 none /tmp/cgroupv2 + mkdir -p /tmp/cgroupv2/foo + echo $$ >> /tmp/cgroupv2/foo/cgroup.procs +} + + +function attach_bpf { + test_cgrp2_sock2 /tmp/cgroupv2/foo sock_flags_kern.o $1 + [ $? -ne 0 ] && exit 1 +} + +function cleanup { + ip link del veth0b + ip netns delete at_ns0 + umount /tmp/cgroupv2 + rm -rf /tmp/cgroupv2 +} + +cleanup 2>/dev/null + +set -e +config_device +config_cgroup +set +e + +# +# Test 1 - fail ping6 +# +attach_bpf 0 +ping -c1 -w1 172.16.1.100 +if [ $? -ne 0 ]; then + echo "ping failed when it should succeed" + cleanup + exit 1 +fi + +ping6 -c1 -w1 2401:db00::1 +if [ $? -eq 0 ]; then + echo "ping6 succeeded when it should not" + cleanup + exit 1 +fi + +# +# Test 2 - fail ping +# +attach_bpf 1 +ping6 -c1 -w1 2401:db00::1 +if [ $? -ne 0 ]; then + echo "ping6 failed when it should succeed" + cleanup + exit 1 +fi + +ping -c1 -w1 172.16.1.100 +if [ $? -eq 0 ]; then + echo "ping succeeded when it should not" + cleanup + exit 1 +fi + +cleanup +echo +echo "*** PASS ***" |