summaryrefslogtreecommitdiff
path: root/kernel/trace/bpf_trace.c
diff options
context:
space:
mode:
authorDaniel Borkmann <daniel@iogearbox.net>2019-11-02 00:17:59 +0100
committerAlexei Starovoitov <ast@kernel.org>2019-11-02 12:39:12 -0700
commit6ae08ae3dea2cfa03dd3665a3c8475c2d429ef47 (patch)
tree7d89f1cc7074ea3cec0e35ab80490f653894dc72 /kernel/trace/bpf_trace.c
parenteb1b66887472eaa7342305b7890ae510dd9d1a79 (diff)
downloadlwn-6ae08ae3dea2cfa03dd3665a3c8475c2d429ef47.tar.gz
lwn-6ae08ae3dea2cfa03dd3665a3c8475c2d429ef47.zip
bpf: Add probe_read_{user, kernel} and probe_read_{user, kernel}_str helpers
The current bpf_probe_read() and bpf_probe_read_str() helpers are broken in that they assume they can be used for probing memory access for kernel space addresses /as well as/ user space addresses. However, plain use of probe_kernel_read() for both cases will attempt to always access kernel space address space given access is performed under KERNEL_DS and some archs in-fact have overlapping address spaces where a kernel pointer and user pointer would have the /same/ address value and therefore accessing application memory via bpf_probe_read{,_str}() would read garbage values. Lets fix BPF side by making use of recently added 3d7081822f7f ("uaccess: Add non-pagefault user-space read functions"). Unfortunately, the only way to fix this status quo is to add dedicated bpf_probe_read_{user,kernel}() and bpf_probe_read_{user,kernel}_str() helpers. The bpf_probe_read{,_str}() helpers are kept as-is to retain their current behavior. The two *_user() variants attempt the access always under USER_DS set, the two *_kernel() variants will -EFAULT when accessing user memory if the underlying architecture has non-overlapping address ranges, also avoiding throwing the kernel warning via 00c42373d397 ("x86-64: add warning for non-canonical user access address dereferences"). Fixes: a5e8c07059d0 ("bpf: add bpf_probe_read_str helper") Fixes: 2541517c32be ("tracing, perf: Implement BPF programs attached to kprobes") Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: Alexei Starovoitov <ast@kernel.org> Acked-by: Andrii Nakryiko <andriin@fb.com> Link: https://lore.kernel.org/bpf/796ee46e948bc808d54891a1108435f8652c6ca4.1572649915.git.daniel@iogearbox.net
Diffstat (limited to 'kernel/trace/bpf_trace.c')
-rw-r--r--kernel/trace/bpf_trace.c181
1 files changed, 135 insertions, 46 deletions
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 2d87fcdcb19b..ffc91d4935ac 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -138,24 +138,140 @@ static const struct bpf_func_proto bpf_override_return_proto = {
};
#endif
-BPF_CALL_3(bpf_probe_read, void *, dst, u32, size, const void *, unsafe_ptr)
+BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size,
+ const void __user *, unsafe_ptr)
{
- int ret;
+ int ret = probe_user_read(dst, unsafe_ptr, size);
- ret = security_locked_down(LOCKDOWN_BPF_READ);
- if (ret < 0)
- goto out;
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_probe_read_user_proto = {
+ .func = bpf_probe_read_user,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size,
+ const void __user *, unsafe_ptr)
+{
+ int ret = strncpy_from_unsafe_user(dst, unsafe_ptr, size);
+
+ if (unlikely(ret < 0))
+ memset(dst, 0, size);
+
+ return ret;
+}
+
+static const struct bpf_func_proto bpf_probe_read_user_str_proto = {
+ .func = bpf_probe_read_user_str,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_ANYTHING,
+};
- ret = probe_kernel_read(dst, unsafe_ptr, size);
+static __always_inline int
+bpf_probe_read_kernel_common(void *dst, u32 size, const void *unsafe_ptr,
+ const bool compat)
+{
+ int ret = security_locked_down(LOCKDOWN_BPF_READ);
+
+ if (unlikely(ret < 0))
+ goto out;
+ ret = compat ? probe_kernel_read(dst, unsafe_ptr, size) :
+ probe_kernel_read_strict(dst, unsafe_ptr, size);
if (unlikely(ret < 0))
out:
memset(dst, 0, size);
+ return ret;
+}
+
+BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
+ const void *, unsafe_ptr)
+{
+ return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, false);
+}
+
+static const struct bpf_func_proto bpf_probe_read_kernel_proto = {
+ .func = bpf_probe_read_kernel,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_probe_read_compat, void *, dst, u32, size,
+ const void *, unsafe_ptr)
+{
+ return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, true);
+}
+static const struct bpf_func_proto bpf_probe_read_compat_proto = {
+ .func = bpf_probe_read_compat,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_ANYTHING,
+};
+
+static __always_inline int
+bpf_probe_read_kernel_str_common(void *dst, u32 size, const void *unsafe_ptr,
+ const bool compat)
+{
+ int ret = security_locked_down(LOCKDOWN_BPF_READ);
+
+ if (unlikely(ret < 0))
+ goto out;
+ /*
+ * The strncpy_from_unsafe_*() call will likely not fill the entire
+ * buffer, but that's okay in this circumstance as we're probing
+ * arbitrary memory anyway similar to bpf_probe_read_*() and might
+ * as well probe the stack. Thus, memory is explicitly cleared
+ * only in error case, so that improper users ignoring return
+ * code altogether don't copy garbage; otherwise length of string
+ * is returned that can be used for bpf_perf_event_output() et al.
+ */
+ ret = compat ? strncpy_from_unsafe(dst, unsafe_ptr, size) :
+ strncpy_from_unsafe_strict(dst, unsafe_ptr, size);
+ if (unlikely(ret < 0))
+out:
+ memset(dst, 0, size);
return ret;
}
-static const struct bpf_func_proto bpf_probe_read_proto = {
- .func = bpf_probe_read,
+BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size,
+ const void *, unsafe_ptr)
+{
+ return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, false);
+}
+
+static const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
+ .func = bpf_probe_read_kernel_str,
+ .gpl_only = true,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_UNINIT_MEM,
+ .arg2_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg3_type = ARG_ANYTHING,
+};
+
+BPF_CALL_3(bpf_probe_read_compat_str, void *, dst, u32, size,
+ const void *, unsafe_ptr)
+{
+ return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, true);
+}
+
+static const struct bpf_func_proto bpf_probe_read_compat_str_proto = {
+ .func = bpf_probe_read_compat_str,
.gpl_only = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_PTR_TO_UNINIT_MEM,
@@ -583,41 +699,6 @@ static const struct bpf_func_proto bpf_current_task_under_cgroup_proto = {
.arg2_type = ARG_ANYTHING,
};
-BPF_CALL_3(bpf_probe_read_str, void *, dst, u32, size,
- const void *, unsafe_ptr)
-{
- int ret;
-
- ret = security_locked_down(LOCKDOWN_BPF_READ);
- if (ret < 0)
- goto out;
-
- /*
- * The strncpy_from_unsafe() call will likely not fill the entire
- * buffer, but that's okay in this circumstance as we're probing
- * arbitrary memory anyway similar to bpf_probe_read() and might
- * as well probe the stack. Thus, memory is explicitly cleared
- * only in error case, so that improper users ignoring return
- * code altogether don't copy garbage; otherwise length of string
- * is returned that can be used for bpf_perf_event_output() et al.
- */
- ret = strncpy_from_unsafe(dst, unsafe_ptr, size);
- if (unlikely(ret < 0))
-out:
- memset(dst, 0, size);
-
- return ret;
-}
-
-static const struct bpf_func_proto bpf_probe_read_str_proto = {
- .func = bpf_probe_read_str,
- .gpl_only = true,
- .ret_type = RET_INTEGER,
- .arg1_type = ARG_PTR_TO_UNINIT_MEM,
- .arg2_type = ARG_CONST_SIZE_OR_ZERO,
- .arg3_type = ARG_ANYTHING,
-};
-
struct send_signal_irq_work {
struct irq_work irq_work;
struct task_struct *task;
@@ -697,8 +778,6 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_map_pop_elem_proto;
case BPF_FUNC_map_peek_elem:
return &bpf_map_peek_elem_proto;
- case BPF_FUNC_probe_read:
- return &bpf_probe_read_proto;
case BPF_FUNC_ktime_get_ns:
return &bpf_ktime_get_ns_proto;
case BPF_FUNC_tail_call:
@@ -725,8 +804,18 @@ tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_current_task_under_cgroup_proto;
case BPF_FUNC_get_prandom_u32:
return &bpf_get_prandom_u32_proto;
+ case BPF_FUNC_probe_read_user:
+ return &bpf_probe_read_user_proto;
+ case BPF_FUNC_probe_read_kernel:
+ return &bpf_probe_read_kernel_proto;
+ case BPF_FUNC_probe_read:
+ return &bpf_probe_read_compat_proto;
+ case BPF_FUNC_probe_read_user_str:
+ return &bpf_probe_read_user_str_proto;
+ case BPF_FUNC_probe_read_kernel_str:
+ return &bpf_probe_read_kernel_str_proto;
case BPF_FUNC_probe_read_str:
- return &bpf_probe_read_str_proto;
+ return &bpf_probe_read_compat_str_proto;
#ifdef CONFIG_CGROUPS
case BPF_FUNC_get_current_cgroup_id:
return &bpf_get_current_cgroup_id_proto;