summaryrefslogtreecommitdiff
path: root/arch/x86/net
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/net')
-rw-r--r--arch/x86/net/Makefile2
-rw-r--r--arch/x86/net/bpf_jit_comp.c610
-rw-r--r--arch/x86/net/bpf_jit_comp32.c37
-rw-r--r--arch/x86/net/bpf_timed_may_goto.S55
4 files changed, 512 insertions, 192 deletions
diff --git a/arch/x86/net/Makefile b/arch/x86/net/Makefile
index 383c87300b0d..dddbefc0f439 100644
--- a/arch/x86/net/Makefile
+++ b/arch/x86/net/Makefile
@@ -6,5 +6,5 @@
ifeq ($(CONFIG_X86_32),y)
obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o
else
- obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o
+ obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_timed_may_goto.o
endif
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index a43fc5af973d..ea9e707e8abf 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -8,6 +8,7 @@
#include <linux/netdevice.h>
#include <linux/filter.h>
#include <linux/if_vlan.h>
+#include <linux/bitfield.h>
#include <linux/bpf.h>
#include <linux/memory.h>
#include <linux/sort.h>
@@ -41,6 +42,8 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2)
#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
+#define EMIT5(b1, b2, b3, b4, b5) \
+ do { EMIT1(b1); EMIT4(b2, b3, b4, b5); } while (0)
#define EMIT1_off32(b1, off) \
do { EMIT1(b1); EMIT(off, 4); } while (0)
@@ -55,8 +58,8 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
#define EMIT_ENDBR() EMIT(gen_endbr(), 4)
#define EMIT_ENDBR_POISON() EMIT(gen_endbr_poison(), 4)
#else
-#define EMIT_ENDBR()
-#define EMIT_ENDBR_POISON()
+#define EMIT_ENDBR() do { } while (0)
+#define EMIT_ENDBR_POISON() do { } while (0)
#endif
static bool is_imm8(int value)
@@ -410,16 +413,20 @@ static void emit_nops(u8 **pprog, int len)
* Emit the various CFI preambles, see asm/cfi.h and the comments about FineIBT
* in arch/x86/kernel/alternative.c
*/
+static int emit_call(u8 **prog, void *func, void *ip);
-static void emit_fineibt(u8 **pprog, u32 hash)
+static void emit_fineibt(u8 **pprog, u8 *ip, u32 hash, int arity)
{
u8 *prog = *pprog;
EMIT_ENDBR();
- EMIT3_off32(0x41, 0x81, 0xea, hash); /* subl $hash, %r10d */
- EMIT2(0x74, 0x07); /* jz.d8 +7 */
- EMIT2(0x0f, 0x0b); /* ud2 */
- EMIT1(0x90); /* nop */
+ EMIT1_off32(0x2d, hash); /* subl $hash, %eax */
+ if (cfi_bhi) {
+ EMIT2(0x2e, 0x2e); /* cs cs */
+ emit_call(&prog, __bhi_args[arity], ip + 11);
+ } else {
+ EMIT3_off32(0x2e, 0x0f, 0x85, 3); /* jne.d32,pn 3 */
+ }
EMIT_ENDBR_POISON();
*pprog = prog;
@@ -431,30 +438,21 @@ static void emit_kcfi(u8 **pprog, u32 hash)
EMIT1_off32(0xb8, hash); /* movl $hash, %eax */
#ifdef CONFIG_CALL_PADDING
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
- EMIT1(0x90);
+ for (int i = 0; i < CONFIG_FUNCTION_PADDING_CFI; i++)
+ EMIT1(0x90);
#endif
EMIT_ENDBR();
*pprog = prog;
}
-static void emit_cfi(u8 **pprog, u32 hash)
+static void emit_cfi(u8 **pprog, u8 *ip, u32 hash, int arity)
{
u8 *prog = *pprog;
switch (cfi_mode) {
case CFI_FINEIBT:
- emit_fineibt(&prog, hash);
+ emit_fineibt(&prog, ip, hash, arity);
break;
case CFI_KCFI:
@@ -505,13 +503,17 @@ static void emit_prologue_tail_call(u8 **pprog, bool is_subprog)
* bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes
* while jumping to another program
*/
-static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
+static void emit_prologue(u8 **pprog, u8 *ip, u32 stack_depth, bool ebpf_from_cbpf,
bool tail_call_reachable, bool is_subprog,
bool is_exception_cb)
{
u8 *prog = *pprog;
- emit_cfi(&prog, is_subprog ? cfi_bpf_subprog_hash : cfi_bpf_hash);
+ if (is_subprog) {
+ emit_cfi(&prog, ip, cfi_bpf_subprog_hash, 5);
+ } else {
+ emit_cfi(&prog, ip, cfi_bpf_hash, 1);
+ }
/* BPF trampoline can be made to work without these nops,
* but let's waste 5 bytes for now and optimize later
*/
@@ -586,7 +588,8 @@ static int emit_jump(u8 **pprog, void *func, void *ip)
return emit_patch(pprog, func, ip, 0xE9);
}
-static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
+static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
+ enum bpf_text_poke_type new_t,
void *old_addr, void *new_addr)
{
const u8 *nop_insn = x86_nops[5];
@@ -596,9 +599,9 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
int ret;
memcpy(old_insn, nop_insn, X86_PATCH_SIZE);
- if (old_addr) {
+ if (old_t != BPF_MOD_NOP && old_addr) {
prog = old_insn;
- ret = t == BPF_MOD_CALL ?
+ ret = old_t == BPF_MOD_CALL ?
emit_call(&prog, old_addr, ip) :
emit_jump(&prog, old_addr, ip);
if (ret)
@@ -606,9 +609,9 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
}
memcpy(new_insn, nop_insn, X86_PATCH_SIZE);
- if (new_addr) {
+ if (new_t != BPF_MOD_NOP && new_addr) {
prog = new_insn;
- ret = t == BPF_MOD_CALL ?
+ ret = new_t == BPF_MOD_CALL ?
emit_call(&prog, new_addr, ip) :
emit_jump(&prog, new_addr, ip);
if (ret)
@@ -621,7 +624,7 @@ static int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
goto out;
ret = 1;
if (memcmp(ip, new_insn, X86_PATCH_SIZE)) {
- text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL);
+ smp_text_poke_single(ip, new_insn, X86_PATCH_SIZE, NULL);
ret = 0;
}
out:
@@ -629,8 +632,9 @@ out:
return ret;
}
-int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
- void *old_addr, void *new_addr)
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
+ enum bpf_text_poke_type new_t, void *old_addr,
+ void *new_addr)
{
if (!is_kernel_text((long)ip) &&
!is_bpf_text_address((long)ip))
@@ -641,29 +645,46 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
* See emit_prologue(), for IBT builds the trampoline hook is preceded
* with an ENDBR instruction.
*/
- if (is_endbr(*(u32 *)ip))
+ if (is_endbr(ip))
ip += ENDBR_INSN_SIZE;
- return __bpf_arch_text_poke(ip, t, old_addr, new_addr);
+ return __bpf_arch_text_poke(ip, old_t, new_t, old_addr, new_addr);
}
#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8)
-static void emit_indirect_jump(u8 **pprog, int reg, u8 *ip)
+static void __emit_indirect_jump(u8 **pprog, int reg, bool ereg)
+{
+ u8 *prog = *pprog;
+
+ if (ereg)
+ EMIT1(0x41);
+
+ EMIT2(0xFF, 0xE0 + reg);
+
+ *pprog = prog;
+}
+
+static void emit_indirect_jump(u8 **pprog, int bpf_reg, u8 *ip)
{
u8 *prog = *pprog;
+ int reg = reg2hex[bpf_reg];
+ bool ereg = is_ereg(bpf_reg);
- if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
+ if (cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) {
+ OPTIMIZER_HIDE_VAR(reg);
+ emit_jump(&prog, its_static_thunk(reg + 8*ereg), ip);
+ } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
EMIT_LFENCE();
- EMIT2(0xFF, 0xE0 + reg);
+ __emit_indirect_jump(&prog, reg, ereg);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
OPTIMIZER_HIDE_VAR(reg);
if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH))
- emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg], ip);
+ emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg + 8*ereg], ip);
else
- emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
+ emit_jump(&prog, &__x86_indirect_thunk_array[reg + 8*ereg], ip);
} else {
- EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */
+ __emit_indirect_jump(&prog, reg, ereg);
if (IS_ENABLED(CONFIG_MITIGATION_RETPOLINE) || IS_ENABLED(CONFIG_MITIGATION_SLS))
EMIT1(0xCC); /* int3 */
}
@@ -675,7 +696,7 @@ static void emit_return(u8 **pprog, u8 *ip)
{
u8 *prog = *pprog;
- if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
+ if (cpu_wants_rethunk()) {
emit_jump(&prog, x86_return_thunk, ip);
} else {
EMIT1(0xC3); /* ret */
@@ -783,7 +804,7 @@ static void emit_bpf_tail_call_indirect(struct bpf_prog *bpf_prog,
* rdi == ctx (1st arg)
* rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET
*/
- emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start));
+ emit_indirect_jump(&prog, BPF_REG_4 /* R4 -> rcx */, ip + (prog - start));
/* out: */
ctx->tail_call_indirect_label = prog - start;
@@ -869,12 +890,13 @@ static void bpf_tail_call_direct_fixup(struct bpf_prog *prog)
target = array->ptrs[poke->tail_call.key];
if (target) {
ret = __bpf_arch_text_poke(poke->tailcall_target,
- BPF_MOD_JUMP, NULL,
+ BPF_MOD_NOP, BPF_MOD_JUMP,
+ NULL,
(u8 *)target->bpf_func +
poke->adj_off);
BUG_ON(ret < 0);
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
- BPF_MOD_JUMP,
+ BPF_MOD_JUMP, BPF_MOD_NOP,
(u8 *)poke->tailcall_target +
X86_PATCH_SIZE, NULL);
BUG_ON(ret < 0);
@@ -1138,11 +1160,38 @@ static void emit_ldx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 i
*pprog = prog;
}
+static void emit_ldsx_index(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, u32 index_reg, int off)
+{
+ u8 *prog = *pprog;
+
+ switch (size) {
+ case BPF_B:
+ /* movsx rax, byte ptr [rax + r12 + off] */
+ EMIT3(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x0F, 0xBE);
+ break;
+ case BPF_H:
+ /* movsx rax, word ptr [rax + r12 + off] */
+ EMIT3(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x0F, 0xBF);
+ break;
+ case BPF_W:
+ /* movsx rax, dword ptr [rax + r12 + off] */
+ EMIT2(add_3mod(0x48, src_reg, dst_reg, index_reg), 0x63);
+ break;
+ }
+ emit_insn_suffix_SIB(&prog, src_reg, dst_reg, index_reg, off);
+ *pprog = prog;
+}
+
static void emit_ldx_r12(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
{
emit_ldx_index(pprog, size, dst_reg, src_reg, X86_REG_R12, off);
}
+static void emit_ldsx_r12(u8 **prog, u32 size, u32 dst_reg, u32 src_reg, int off)
+{
+ emit_ldsx_index(prog, size, dst_reg, src_reg, X86_REG_R12, off);
+}
+
/* STX: *(u8*)(dst_reg + off) = src_reg */
static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off)
{
@@ -1242,12 +1291,23 @@ static void emit_st_r12(u8 **pprog, u32 size, u32 dst_reg, int off, int imm)
emit_st_index(pprog, size, dst_reg, X86_REG_R12, off, imm);
}
-static int emit_atomic(u8 **pprog, u8 atomic_op,
- u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
+static void emit_store_stack_imm64(u8 **pprog, int reg, int stack_off, u64 imm64)
+{
+ /*
+ * mov reg, imm64
+ * mov QWORD PTR [rbp + stack_off], reg
+ */
+ emit_mov_imm64(pprog, reg, imm64 >> 32, (u32) imm64);
+ emit_stx(pprog, BPF_DW, BPF_REG_FP, reg, stack_off);
+}
+
+static int emit_atomic_rmw(u8 **pprog, u32 atomic_op,
+ u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size)
{
u8 *prog = *pprog;
- EMIT1(0xF0); /* lock prefix */
+ if (atomic_op != BPF_XCHG)
+ EMIT1(0xF0); /* lock prefix */
maybe_emit_mod(&prog, dst_reg, src_reg, bpf_size == BPF_DW);
@@ -1283,12 +1343,15 @@ static int emit_atomic(u8 **pprog, u8 atomic_op,
return 0;
}
-static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size,
- u32 dst_reg, u32 src_reg, u32 index_reg, int off)
+static int emit_atomic_rmw_index(u8 **pprog, u32 atomic_op, u32 size,
+ u32 dst_reg, u32 src_reg, u32 index_reg,
+ int off)
{
u8 *prog = *pprog;
- EMIT1(0xF0); /* lock prefix */
+ if (atomic_op != BPF_XCHG)
+ EMIT1(0xF0); /* lock prefix */
+
switch (size) {
case BPF_W:
EMIT1(add_3mod(0x40, dst_reg, src_reg, index_reg));
@@ -1297,7 +1360,7 @@ static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size,
EMIT1(add_3mod(0x48, dst_reg, src_reg, index_reg));
break;
default:
- pr_err("bpf_jit: 1 and 2 byte atomics are not supported\n");
+ pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n");
return -EFAULT;
}
@@ -1331,16 +1394,110 @@ static int emit_atomic_index(u8 **pprog, u8 atomic_op, u32 size,
return 0;
}
+static int emit_atomic_ld_st(u8 **pprog, u32 atomic_op, u32 dst_reg,
+ u32 src_reg, s16 off, u8 bpf_size)
+{
+ switch (atomic_op) {
+ case BPF_LOAD_ACQ:
+ /* dst_reg = smp_load_acquire(src_reg + off16) */
+ emit_ldx(pprog, bpf_size, dst_reg, src_reg, off);
+ break;
+ case BPF_STORE_REL:
+ /* smp_store_release(dst_reg + off16, src_reg) */
+ emit_stx(pprog, bpf_size, dst_reg, src_reg, off);
+ break;
+ default:
+ pr_err("bpf_jit: unknown atomic load/store opcode %02x\n",
+ atomic_op);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static int emit_atomic_ld_st_index(u8 **pprog, u32 atomic_op, u32 size,
+ u32 dst_reg, u32 src_reg, u32 index_reg,
+ int off)
+{
+ switch (atomic_op) {
+ case BPF_LOAD_ACQ:
+ /* dst_reg = smp_load_acquire(src_reg + idx_reg + off16) */
+ emit_ldx_index(pprog, size, dst_reg, src_reg, index_reg, off);
+ break;
+ case BPF_STORE_REL:
+ /* smp_store_release(dst_reg + idx_reg + off16, src_reg) */
+ emit_stx_index(pprog, size, dst_reg, src_reg, index_reg, off);
+ break;
+ default:
+ pr_err("bpf_jit: unknown atomic load/store opcode %02x\n",
+ atomic_op);
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/*
+ * Metadata encoding for exception handling in JITed code.
+ *
+ * Format of `fixup` and `data` fields in `struct exception_table_entry`:
+ *
+ * Bit layout of `fixup` (32-bit):
+ *
+ * +-----------+--------+-----------+---------+----------+
+ * | 31 | 30-24 | 23-16 | 15-8 | 7-0 |
+ * | | | | | |
+ * | ARENA_ACC | Unused | ARENA_REG | DST_REG | INSN_LEN |
+ * +-----------+--------+-----------+---------+----------+
+ *
+ * - INSN_LEN (8 bits): Length of faulting insn (max x86 insn = 15 bytes (fits in 8 bits)).
+ * - DST_REG (8 bits): Offset of dst_reg from reg2pt_regs[] (max offset = 112 (fits in 8 bits)).
+ * This is set to DONT_CLEAR if the insn is a store.
+ * - ARENA_REG (8 bits): Offset of the register that is used to calculate the
+ * address for load/store when accessing the arena region.
+ * - ARENA_ACCESS (1 bit): This bit is set when the faulting instruction accessed the arena region.
+ *
+ * Bit layout of `data` (32-bit):
+ *
+ * +--------------+--------+--------------+
+ * | 31-16 | 15-8 | 7-0 |
+ * | | | |
+ * | ARENA_OFFSET | Unused | EX_TYPE_BPF |
+ * +--------------+--------+--------------+
+ *
+ * - ARENA_OFFSET (16 bits): Offset used to calculate the address for load/store when
+ * accessing the arena region.
+ */
+
#define DONT_CLEAR 1
+#define FIXUP_INSN_LEN_MASK GENMASK(7, 0)
+#define FIXUP_REG_MASK GENMASK(15, 8)
+#define FIXUP_ARENA_REG_MASK GENMASK(23, 16)
+#define FIXUP_ARENA_ACCESS BIT(31)
+#define DATA_ARENA_OFFSET_MASK GENMASK(31, 16)
bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
{
- u32 reg = x->fixup >> 8;
+ u32 reg = FIELD_GET(FIXUP_REG_MASK, x->fixup);
+ u32 insn_len = FIELD_GET(FIXUP_INSN_LEN_MASK, x->fixup);
+ bool is_arena = !!(x->fixup & FIXUP_ARENA_ACCESS);
+ bool is_write = (reg == DONT_CLEAR);
+ unsigned long addr;
+ s16 off;
+ u32 arena_reg;
+
+ if (is_arena) {
+ arena_reg = FIELD_GET(FIXUP_ARENA_REG_MASK, x->fixup);
+ off = FIELD_GET(DATA_ARENA_OFFSET_MASK, x->data);
+ addr = *(unsigned long *)((void *)regs + arena_reg) + off;
+ bpf_prog_report_arena_violation(is_write, addr, regs->ip);
+ }
/* jump over faulting load and clear dest register */
if (reg != DONT_CLEAR)
*(unsigned long *)((void *)regs + reg) = 0;
- regs->ip += x->fixup & 0xff;
+ regs->ip += insn_len;
+
return true;
}
@@ -1450,8 +1607,50 @@ static void emit_priv_frame_ptr(u8 **pprog, void __percpu *priv_frame_ptr)
#define PRIV_STACK_GUARD_SZ 8
#define PRIV_STACK_GUARD_VAL 0xEB9F12345678eb9fULL
-static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
- int oldproglen, struct jit_context *ctx, bool jmp_padding)
+static int emit_spectre_bhb_barrier(u8 **pprog, u8 *ip,
+ struct bpf_prog *bpf_prog)
+{
+ u8 *prog = *pprog;
+ u8 *func;
+
+ if (cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP)) {
+ /* The clearing sequence clobbers eax and ecx. */
+ EMIT1(0x50); /* push rax */
+ EMIT1(0x51); /* push rcx */
+ ip += 2;
+
+ func = (u8 *)clear_bhb_loop;
+ ip += x86_call_depth_emit_accounting(&prog, func, ip);
+
+ if (emit_call(&prog, func, ip))
+ return -EINVAL;
+ EMIT1(0x59); /* pop rcx */
+ EMIT1(0x58); /* pop rax */
+ }
+ /* Insert IBHF instruction */
+ if ((cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_LOOP) &&
+ cpu_feature_enabled(X86_FEATURE_HYPERVISOR)) ||
+ cpu_feature_enabled(X86_FEATURE_CLEAR_BHB_HW)) {
+ /*
+ * Add an Indirect Branch History Fence (IBHF). IBHF acts as a
+ * fence preventing branch history from before the fence from
+ * affecting indirect branches after the fence. This is
+ * specifically used in cBPF jitted code to prevent Intra-mode
+ * BHI attacks. The IBHF instruction is designed to be a NOP on
+ * hardware that doesn't need or support it. The REP and REX.W
+ * prefixes are required by the microcode, and they also ensure
+ * that the NOP is unlikely to be used in existing code.
+ *
+ * IBHF is not a valid instruction in 32-bit mode.
+ */
+ EMIT5(0xF3, 0x48, 0x0F, 0x1E, 0xF8); /* ibhf */
+ }
+ *pprog = prog;
+ return 0;
+}
+
+static int do_jit(struct bpf_verifier_env *env, struct bpf_prog *bpf_prog, int *addrs, u8 *image,
+ u8 *rw_image, int oldproglen, struct jit_context *ctx, bool jmp_padding)
{
bool tail_call_reachable = bpf_prog->aux->tail_call_reachable;
struct bpf_insn *insn = bpf_prog->insnsi;
@@ -1464,7 +1663,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
void __percpu *priv_stack_ptr;
int i, excnt = 0;
int ilen, proglen = 0;
- u8 *prog = temp;
+ u8 *ip, *prog = temp;
u32 stack_depth;
int err;
@@ -1480,9 +1679,12 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
detect_reg_usage(insn, insn_cnt, callee_regs_used);
- emit_prologue(&prog, stack_depth,
+ emit_prologue(&prog, image, stack_depth,
bpf_prog_was_classic(bpf_prog), tail_call_reachable,
bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
+
+ bpf_prog->aux->ksym.fp_start = prog - temp;
+
/* Exception callback will clobber callee regs for its own use, and
* restore the original callee regs from main prog's stack frame.
*/
@@ -1532,6 +1734,11 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
dst_reg = X86_REG_R9;
}
+ if (bpf_insn_is_indirect_target(env, bpf_prog, i - 1))
+ EMIT_ENDBR();
+
+ ip = image + addrs[i - 1] + (prog - temp);
+
switch (insn->code) {
/* ALU */
case BPF_ALU | BPF_ADD | BPF_X:
@@ -1958,19 +2165,27 @@ st: if (is_imm8(insn->off))
case BPF_LDX | BPF_PROBE_MEM32 | BPF_H:
case BPF_LDX | BPF_PROBE_MEM32 | BPF_W:
case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW:
+ case BPF_LDX | BPF_PROBE_MEM32SX | BPF_B:
+ case BPF_LDX | BPF_PROBE_MEM32SX | BPF_H:
+ case BPF_LDX | BPF_PROBE_MEM32SX | BPF_W:
case BPF_STX | BPF_PROBE_MEM32 | BPF_B:
case BPF_STX | BPF_PROBE_MEM32 | BPF_H:
case BPF_STX | BPF_PROBE_MEM32 | BPF_W:
case BPF_STX | BPF_PROBE_MEM32 | BPF_DW:
start_of_ldx = prog;
- if (BPF_CLASS(insn->code) == BPF_LDX)
- emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
- else
+ if (BPF_CLASS(insn->code) == BPF_LDX) {
+ if (BPF_MODE(insn->code) == BPF_PROBE_MEM32SX)
+ emit_ldsx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ else
+ emit_ldx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ } else {
emit_stx_r12(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off);
+ }
populate_extable:
{
struct exception_table_entry *ex;
u8 *_insn = image + proglen + (start_of_ldx - temp);
+ u32 arena_reg, fixup_reg;
s64 delta;
if (!bpf_prog->aux->extable)
@@ -1990,8 +2205,29 @@ populate_extable:
ex->data = EX_TYPE_BPF;
- ex->fixup = (prog - start_of_ldx) |
- ((BPF_CLASS(insn->code) == BPF_LDX ? reg2pt_regs[dst_reg] : DONT_CLEAR) << 8);
+ /*
+ * src_reg/dst_reg holds the address in the arena region with upper
+ * 32-bits being zero because of a preceding addr_space_cast(r<n>,
+ * 0x0, 0x1) instruction. This address is adjusted with the addition
+ * of arena_vm_start (see the implementation of BPF_PROBE_MEM32 and
+ * BPF_PROBE_ATOMIC) before being used for the memory access. Pass
+ * the reg holding the unmodified 32-bit address to
+ * ex_handler_bpf().
+ */
+ if (BPF_CLASS(insn->code) == BPF_LDX) {
+ arena_reg = reg2pt_regs[src_reg];
+ fixup_reg = reg2pt_regs[dst_reg];
+ } else {
+ arena_reg = reg2pt_regs[dst_reg];
+ fixup_reg = DONT_CLEAR;
+ }
+
+ ex->fixup = FIELD_PREP(FIXUP_INSN_LEN_MASK, prog - start_of_ldx) |
+ FIELD_PREP(FIXUP_ARENA_REG_MASK, arena_reg) |
+ FIELD_PREP(FIXUP_REG_MASK, fixup_reg);
+ ex->fixup |= FIXUP_ARENA_ACCESS;
+
+ ex->data |= FIELD_PREP(DATA_ARENA_OFFSET_MASK, insn->off);
}
break;
@@ -2109,10 +2345,18 @@ populate_extable:
* End result: x86 insn "mov rbx, qword ptr [rax+0x14]"
* of 4 bytes will be ignored and rbx will be zero inited.
*/
- ex->fixup = (prog - start_of_ldx) | (reg2pt_regs[dst_reg] << 8);
+ ex->fixup = FIELD_PREP(FIXUP_INSN_LEN_MASK, prog - start_of_ldx) |
+ FIELD_PREP(FIXUP_REG_MASK, reg2pt_regs[dst_reg]);
}
break;
+ case BPF_STX | BPF_ATOMIC | BPF_B:
+ case BPF_STX | BPF_ATOMIC | BPF_H:
+ if (!bpf_atomic_is_load_store(insn)) {
+ pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n");
+ return -EFAULT;
+ }
+ fallthrough;
case BPF_STX | BPF_ATOMIC | BPF_W:
case BPF_STX | BPF_ATOMIC | BPF_DW:
if (insn->imm == (BPF_AND | BPF_FETCH) ||
@@ -2148,10 +2392,10 @@ populate_extable:
EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)],
add_2reg(0xC0, AUX_REG, real_src_reg));
/* Attempt to swap in new value */
- err = emit_atomic(&prog, BPF_CMPXCHG,
- real_dst_reg, AUX_REG,
- insn->off,
- BPF_SIZE(insn->code));
+ err = emit_atomic_rmw(&prog, BPF_CMPXCHG,
+ real_dst_reg, AUX_REG,
+ insn->off,
+ BPF_SIZE(insn->code));
if (WARN_ON(err))
return err;
/*
@@ -2166,25 +2410,41 @@ populate_extable:
break;
}
- err = emit_atomic(&prog, insn->imm, dst_reg, src_reg,
- insn->off, BPF_SIZE(insn->code));
+ if (bpf_atomic_is_load_store(insn))
+ err = emit_atomic_ld_st(&prog, insn->imm, dst_reg, src_reg,
+ insn->off, BPF_SIZE(insn->code));
+ else
+ err = emit_atomic_rmw(&prog, insn->imm, dst_reg, src_reg,
+ insn->off, BPF_SIZE(insn->code));
if (err)
return err;
break;
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_B:
+ case BPF_STX | BPF_PROBE_ATOMIC | BPF_H:
+ if (!bpf_atomic_is_load_store(insn)) {
+ pr_err("bpf_jit: 1- and 2-byte RMW atomics are not supported\n");
+ return -EFAULT;
+ }
+ fallthrough;
case BPF_STX | BPF_PROBE_ATOMIC | BPF_W:
case BPF_STX | BPF_PROBE_ATOMIC | BPF_DW:
start_of_ldx = prog;
- err = emit_atomic_index(&prog, insn->imm, BPF_SIZE(insn->code),
- dst_reg, src_reg, X86_REG_R12, insn->off);
+
+ if (bpf_atomic_is_load_store(insn))
+ err = emit_atomic_ld_st_index(&prog, insn->imm,
+ BPF_SIZE(insn->code), dst_reg,
+ src_reg, X86_REG_R12, insn->off);
+ else
+ err = emit_atomic_rmw_index(&prog, insn->imm, BPF_SIZE(insn->code),
+ dst_reg, src_reg, X86_REG_R12,
+ insn->off);
if (err)
return err;
goto populate_extable;
/* call */
case BPF_JMP | BPF_CALL: {
- u8 *ip = image + addrs[i - 1];
-
func = (u8 *) __bpf_call_base + imm32;
if (src_reg == BPF_PSEUDO_CALL && tail_call_reachable) {
LOAD_TAIL_CALL_CNT_PTR(stack_depth);
@@ -2208,7 +2468,8 @@ populate_extable:
if (imm32)
emit_bpf_tail_call_direct(bpf_prog,
&bpf_prog->aux->poke_tab[imm32 - 1],
- &prog, image + addrs[i - 1],
+ &prog,
+ ip,
callee_regs_used,
stack_depth,
ctx);
@@ -2217,7 +2478,7 @@ populate_extable:
&prog,
callee_regs_used,
stack_depth,
- image + addrs[i - 1],
+ ip,
ctx);
break;
@@ -2381,6 +2642,9 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */
break;
+ case BPF_JMP | BPF_JA | BPF_X:
+ emit_indirect_jump(&prog, insn->dst_reg, ip);
+ break;
case BPF_JMP | BPF_JA:
case BPF_JMP32 | BPF_JA:
if (BPF_CLASS(insn->code) == BPF_JMP) {
@@ -2467,6 +2731,11 @@ emit_jmp:
seen_exit = true;
/* Update cleanup_addr */
ctx->cleanup_addr = proglen;
+ if (bpf_prog_was_classic(bpf_prog) &&
+ !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN)) {
+ if (emit_spectre_bhb_barrier(&prog, ip, bpf_prog))
+ return -EINVAL;
+ }
if (bpf_prog->aux->exception_boundary) {
pop_callee_regs(&prog, all_callee_regs_used);
pop_r12(&prog);
@@ -2476,6 +2745,8 @@ emit_jmp:
pop_r12(&prog);
}
EMIT1(0xC9); /* leave */
+ bpf_prog->aux->ksym.fp_end = prog - temp;
+
emit_return(&prog, image + addrs[i - 1] + (prog - temp));
break;
@@ -2590,9 +2861,10 @@ static int get_nr_used_regs(const struct btf_func_model *m)
}
static void save_args(const struct btf_func_model *m, u8 **prog,
- int stack_size, bool for_call_origin)
+ int stack_size, bool for_call_origin, u32 flags)
{
int arg_regs, first_off = 0, nr_regs = 0, nr_stack_slots = 0;
+ bool use_jmp = bpf_trampoline_use_jmp(flags);
int i, j;
/* Store function arguments to stack.
@@ -2633,7 +2905,7 @@ static void save_args(const struct btf_func_model *m, u8 **prog,
*/
for (j = 0; j < arg_regs; j++) {
emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP,
- nr_stack_slots * 8 + 0x18);
+ nr_stack_slots * 8 + 16 + (!use_jmp) * 8);
emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0,
-stack_size);
@@ -2815,13 +3087,19 @@ static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond)
static int invoke_bpf(const struct btf_func_model *m, u8 **pprog,
struct bpf_tramp_links *tl, int stack_size,
- int run_ctx_off, bool save_ret,
- void *image, void *rw_image)
+ int run_ctx_off, int func_meta_off, bool save_ret,
+ void *image, void *rw_image, u64 func_meta,
+ int cookie_off)
{
- int i;
+ int i, cur_cookie = (cookie_off - stack_size) / 8;
u8 *prog = *pprog;
for (i = 0; i < tl->nr_links; i++) {
+ if (tl->links[i]->link.prog->call_session_cookie) {
+ emit_store_stack_imm64(&prog, BPF_REG_0, -func_meta_off,
+ func_meta | (cur_cookie << BPF_TRAMP_COOKIE_INDEX_SHIFT));
+ cur_cookie--;
+ }
if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size,
run_ctx_off, save_ret, image, rw_image))
return -EINVAL;
@@ -2939,12 +3217,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
void *func_addr)
{
int i, ret, nr_regs = m->nr_args, stack_size = 0;
- int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
+ int regs_off, func_meta_off, ip_off, run_ctx_off, arg_stack_off, rbx_off;
struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
void *orig_call = func_addr;
+ int cookie_off, cookie_cnt;
u8 **branches = NULL;
+ u64 func_meta;
u8 *prog;
bool save_ret;
@@ -2980,7 +3260,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
* [ ... ]
* RBP - regs_off [ reg_arg1 ] program's ctx pointer
*
- * RBP - nregs_off [ regs count ] always
+ * RBP - func_meta_off [ regs count, etc ] always
*
* RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag
*
@@ -3003,15 +3283,20 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
stack_size += nr_regs * 8;
regs_off = stack_size;
- /* regs count */
+ /* function matedata, such as regs count */
stack_size += 8;
- nregs_off = stack_size;
+ func_meta_off = stack_size;
if (flags & BPF_TRAMP_F_IP_ARG)
stack_size += 8; /* room for IP address argument */
ip_off = stack_size;
+ cookie_cnt = bpf_fsession_cookie_cnt(tlinks);
+ /* room for session cookies */
+ stack_size += cookie_cnt * 8;
+ cookie_off = stack_size;
+
stack_size += 8;
rbx_off = stack_size;
@@ -3027,16 +3312,21 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
* should be 16-byte aligned. Following code depend on
* that stack_size is already 8-byte aligned.
*/
- stack_size += (stack_size % 16) ? 0 : 8;
+ if (bpf_trampoline_use_jmp(flags)) {
+ /* no rip in the "jmp" case */
+ stack_size += (stack_size % 16) ? 8 : 0;
+ } else {
+ stack_size += (stack_size % 16) ? 0 : 8;
+ }
}
arg_stack_off = stack_size;
- if (flags & BPF_TRAMP_F_SKIP_FRAME) {
+ if (flags & BPF_TRAMP_F_CALL_ORIG) {
/* skip patched call instruction and point orig_call to actual
* body of the kernel function.
*/
- if (is_endbr(*(u32 *)orig_call))
+ if (is_endbr(orig_call))
orig_call += ENDBR_INSN_SIZE;
orig_call += X86_PATCH_SIZE;
}
@@ -3047,7 +3337,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
/*
* Indirect call for bpf_struct_ops
*/
- emit_cfi(&prog, cfi_get_func_hash(func_addr));
+ emit_cfi(&prog, image,
+ cfi_get_func_hash(func_addr),
+ cfi_get_func_arity(func_addr));
} else {
/*
* Direct-call fentry stub, as such it needs accounting for the
@@ -3057,6 +3349,9 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
EMIT1(0x55); /* push rbp */
EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */
+ if (im)
+ im->ksym.fp_start = prog - (u8 *)rw_image;
+
if (!is_imm8(stack_size)) {
/* sub rsp, stack_size */
EMIT3_off32(0x48, 0x81, 0xEC, stack_size);
@@ -3069,23 +3364,16 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
/* mov QWORD PTR [rbp - rbx_off], rbx */
emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_6, -rbx_off);
- /* Store number of argument registers of the traced function:
- * mov rax, nr_regs
- * mov QWORD PTR [rbp - nregs_off], rax
- */
- emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_regs);
- emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -nregs_off);
+ func_meta = nr_regs;
+ /* Store number of argument registers of the traced function */
+ emit_store_stack_imm64(&prog, BPF_REG_0, -func_meta_off, func_meta);
if (flags & BPF_TRAMP_F_IP_ARG) {
- /* Store IP address of the traced function:
- * movabsq rax, func_addr
- * mov QWORD PTR [rbp - ip_off], rax
- */
- emit_mov_imm64(&prog, BPF_REG_0, (long) func_addr >> 32, (u32) (long) func_addr);
- emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off);
+ /* Store IP address of the traced function */
+ emit_store_stack_imm64(&prog, BPF_REG_0, -ip_off, (long)func_addr);
}
- save_args(m, &prog, regs_off, false);
+ save_args(m, &prog, regs_off, false, flags);
if (flags & BPF_TRAMP_F_CALL_ORIG) {
/* arg1: mov rdi, im */
@@ -3097,9 +3385,18 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
}
+ if (bpf_fsession_cnt(tlinks)) {
+ /* clear all the session cookies' value */
+ for (int i = 0; i < cookie_cnt; i++)
+ emit_store_stack_imm64(&prog, BPF_REG_0, -cookie_off + 8 * i, 0);
+ /* clear the return value to make sure fentry always get 0 */
+ emit_store_stack_imm64(&prog, BPF_REG_0, -8, 0);
+ }
+
if (fentry->nr_links) {
- if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off,
- flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image))
+ if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off, func_meta_off,
+ flags & BPF_TRAMP_F_RET_FENTRY_RET, image, rw_image,
+ func_meta, cookie_off))
return -EINVAL;
}
@@ -3118,7 +3415,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
if (flags & BPF_TRAMP_F_CALL_ORIG) {
restore_regs(m, &prog, regs_off);
- save_args(m, &prog, arg_stack_off, true);
+ save_args(m, &prog, arg_stack_off, true, flags);
if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) {
/* Before calling the original function, load the
@@ -3159,9 +3456,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
}
}
+ /* set the "is_return" flag for fsession */
+ func_meta |= (1ULL << BPF_TRAMP_IS_RETURN_SHIFT);
+ if (bpf_fsession_cnt(tlinks))
+ emit_store_stack_imm64(&prog, BPF_REG_0, -func_meta_off, func_meta);
+
if (fexit->nr_links) {
- if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off,
- false, image, rw_image)) {
+ if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, func_meta_off,
+ false, image, rw_image, func_meta, cookie_off)) {
ret = -EINVAL;
goto cleanup;
}
@@ -3194,7 +3496,11 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_im
emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off);
+
EMIT1(0xC9); /* leave */
+ if (im)
+ im->ksym.fp_end = prog - (u8 *)rw_image;
+
if (flags & BPF_TRAMP_F_SKIP_FRAME) {
/* skip our return address and return to parent */
EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
@@ -3301,7 +3607,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs, u8 *image,
if (err)
return err;
- emit_indirect_jump(&prog, 2 /* rdx */, image + (prog - buf));
+ emit_indirect_jump(&prog, BPF_REG_3 /* R3 -> rdx */, image + (prog - buf));
*pprog = prog;
return 0;
@@ -3368,13 +3674,6 @@ int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_func
return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs, image, buf);
}
-static const char *bpf_get_prog_name(struct bpf_prog *prog)
-{
- if (prog->aux->ksym.prog)
- return prog->aux->ksym.name;
- return prog->aux->name;
-}
-
static void priv_stack_init_guard(void __percpu *priv_stack_ptr, int alloc_size)
{
int cpu, underflow_idx = (alloc_size - PRIV_STACK_GUARD_SZ) >> 3;
@@ -3398,7 +3697,7 @@ static void priv_stack_check_guard(void __percpu *priv_stack_ptr, int alloc_size
if (stack_ptr[0] != PRIV_STACK_GUARD_VAL ||
stack_ptr[underflow_idx] != PRIV_STACK_GUARD_VAL) {
pr_err("BPF private stack overflow/underflow detected for prog %sx\n",
- bpf_get_prog_name(prog));
+ bpf_jit_get_prog_name(prog));
break;
}
}
@@ -3416,17 +3715,15 @@ struct x64_jit_data {
#define MAX_PASSES 20
#define PADDING_PASSES (MAX_PASSES - 5)
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
{
struct bpf_binary_header *rw_header = NULL;
struct bpf_binary_header *header = NULL;
- struct bpf_prog *tmp, *orig_prog = prog;
void __percpu *priv_stack_ptr = NULL;
struct x64_jit_data *jit_data;
int priv_stack_alloc_sz;
int proglen, oldproglen = 0;
struct jit_context ctx = {};
- bool tmp_blinded = false;
bool extra_pass = false;
bool padding = false;
u8 *rw_image = NULL;
@@ -3436,27 +3733,13 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
int i;
if (!prog->jit_requested)
- return orig_prog;
-
- tmp = bpf_jit_blind_constants(prog);
- /*
- * If blinding was requested and we failed during blinding,
- * we must fall back to the interpreter.
- */
- if (IS_ERR(tmp))
- return orig_prog;
- if (tmp != prog) {
- tmp_blinded = true;
- prog = tmp;
- }
+ return prog;
jit_data = prog->aux->jit_data;
if (!jit_data) {
- jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
- if (!jit_data) {
- prog = orig_prog;
- goto out;
- }
+ jit_data = kzalloc_obj(*jit_data);
+ if (!jit_data)
+ return prog;
prog->aux->jit_data = jit_data;
}
priv_stack_ptr = prog->aux->priv_stack_ptr;
@@ -3468,10 +3751,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
priv_stack_alloc_sz = round_up(prog->aux->stack_depth, 8) +
2 * PRIV_STACK_GUARD_SZ;
priv_stack_ptr = __alloc_percpu_gfp(priv_stack_alloc_sz, 8, GFP_KERNEL);
- if (!priv_stack_ptr) {
- prog = orig_prog;
+ if (!priv_stack_ptr)
goto out_priv_stack;
- }
priv_stack_init_guard(priv_stack_ptr, priv_stack_alloc_sz);
prog->aux->priv_stack_ptr = priv_stack_ptr;
@@ -3488,11 +3769,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
padding = true;
goto skip_init_addrs;
}
- addrs = kvmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL);
- if (!addrs) {
- prog = orig_prog;
+ addrs = kvmalloc_objs(*addrs, prog->len + 1);
+ if (!addrs)
goto out_addrs;
- }
/*
* Before first pass, make a rough estimation of addrs[]
@@ -3514,7 +3793,7 @@ skip_init_addrs:
for (pass = 0; pass < MAX_PASSES || image; pass++) {
if (!padding && pass >= PADDING_PASSES)
padding = true;
- proglen = do_jit(prog, addrs, image, rw_image, oldproglen, &ctx, padding);
+ proglen = do_jit(env, prog, addrs, image, rw_image, oldproglen, &ctx, padding);
if (proglen <= 0) {
out_image:
image = NULL;
@@ -3523,8 +3802,6 @@ out_image:
sizeof(rw_header->size));
bpf_jit_binary_pack_free(header, rw_header);
}
- /* Fall back to interpreter mode */
- prog = orig_prog;
if (extra_pass) {
prog->bpf_func = NULL;
prog->jited = 0;
@@ -3555,10 +3832,8 @@ out_image:
header = bpf_jit_binary_pack_alloc(roundup(proglen, align) + extable_size,
&image, align, &rw_header, &rw_image,
jit_fill_hole);
- if (!header) {
- prog = orig_prog;
+ if (!header)
goto out_addrs;
- }
prog->aux->extable = (void *) image + roundup(proglen, align);
}
oldproglen = proglen;
@@ -3592,6 +3867,15 @@ out_image:
jit_data->header = header;
jit_data->rw_header = rw_header;
}
+
+ /*
+ * The bpf_prog_update_insn_ptrs function expects addrs to
+ * point to the first byte of the jitted instruction (unlike
+ * the bpf_prog_fill_jited_linfo below, which, for historical
+ * reasons, expects to point to the next instruction)
+ */
+ bpf_prog_update_insn_ptrs(prog, addrs, image);
+
/*
* ctx.prog_offset is used when CFI preambles put code *before*
* the function. See emit_cfi(). For FineIBT specifically this code
@@ -3602,8 +3886,6 @@ out_image:
prog->bpf_func = (void *)image + cfi_get_offset();
prog->jited = 1;
prog->jited_len = proglen - cfi_get_offset();
- } else {
- prog = orig_prog;
}
if (!image || !prog->is_func || extra_pass) {
@@ -3619,10 +3901,7 @@ out_priv_stack:
kfree(jit_data);
prog->aux->jit_data = NULL;
}
-out:
- if (tmp_blinded)
- bpf_jit_prog_release_other(prog, prog == orig_prog ?
- tmp : orig_prog);
+
return prog;
}
@@ -3712,13 +3991,13 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp
}
return;
#endif
- WARN(1, "verification of programs using bpf_throw should have failed\n");
}
void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
struct bpf_prog *new, struct bpf_prog *old)
{
u8 *old_addr, *new_addr, *old_bypass_addr;
+ enum bpf_text_poke_type t;
int ret;
old_bypass_addr = old ? NULL : poke->bypass_addr;
@@ -3731,21 +4010,22 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
* the kallsyms check.
*/
if (new) {
+ t = old_addr ? BPF_MOD_JUMP : BPF_MOD_NOP;
ret = __bpf_arch_text_poke(poke->tailcall_target,
- BPF_MOD_JUMP,
+ t, BPF_MOD_JUMP,
old_addr, new_addr);
BUG_ON(ret < 0);
if (!old) {
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
- BPF_MOD_JUMP,
+ BPF_MOD_JUMP, BPF_MOD_NOP,
poke->bypass_addr,
NULL);
BUG_ON(ret < 0);
}
} else {
+ t = old_bypass_addr ? BPF_MOD_JUMP : BPF_MOD_NOP;
ret = __bpf_arch_text_poke(poke->tailcall_bypass,
- BPF_MOD_JUMP,
- old_bypass_addr,
+ t, BPF_MOD_JUMP, old_bypass_addr,
poke->bypass_addr);
BUG_ON(ret < 0);
/* let other CPUs finish the execution of program
@@ -3754,9 +4034,9 @@ void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
*/
if (!ret)
synchronize_rcu();
+ t = old_addr ? BPF_MOD_JUMP : BPF_MOD_NOP;
ret = __bpf_arch_text_poke(poke->tailcall_target,
- BPF_MOD_JUMP,
- old_addr, NULL);
+ t, BPF_MOD_NOP, old_addr, NULL);
BUG_ON(ret < 0);
}
}
@@ -3791,3 +4071,13 @@ u64 bpf_arch_uaddress_limit(void)
{
return 0;
}
+
+bool bpf_jit_supports_timed_may_goto(void)
+{
+ return true;
+}
+
+bool bpf_jit_supports_fsession(void)
+{
+ return true;
+}
diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index de0f9e5f9f73..852baf2e4db4 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -2518,38 +2518,22 @@ bool bpf_jit_needs_zext(void)
return true;
}
-struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
+struct bpf_prog *bpf_int_jit_compile(struct bpf_verifier_env *env, struct bpf_prog *prog)
{
struct bpf_binary_header *header = NULL;
- struct bpf_prog *tmp, *orig_prog = prog;
int proglen, oldproglen = 0;
struct jit_context ctx = {};
- bool tmp_blinded = false;
u8 *image = NULL;
int *addrs;
int pass;
int i;
if (!prog->jit_requested)
- return orig_prog;
+ return prog;
- tmp = bpf_jit_blind_constants(prog);
- /*
- * If blinding was requested and we failed during blinding,
- * we must fall back to the interpreter.
- */
- if (IS_ERR(tmp))
- return orig_prog;
- if (tmp != prog) {
- tmp_blinded = true;
- prog = tmp;
- }
-
- addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL);
- if (!addrs) {
- prog = orig_prog;
- goto out;
- }
+ addrs = kmalloc_objs(*addrs, prog->len);
+ if (!addrs)
+ return prog;
/*
* Before first pass, make a rough estimation of addrs[]
@@ -2574,7 +2558,6 @@ out_image:
image = NULL;
if (header)
bpf_jit_binary_free(header);
- prog = orig_prog;
goto out_addrs;
}
if (image) {
@@ -2588,10 +2571,8 @@ out_image:
if (proglen == oldproglen) {
header = bpf_jit_binary_alloc(proglen, &image,
1, jit_fill_hole);
- if (!header) {
- prog = orig_prog;
+ if (!header)
goto out_addrs;
- }
}
oldproglen = proglen;
cond_resched();
@@ -2604,16 +2585,10 @@ out_image:
prog->bpf_func = (void *)image;
prog->jited = 1;
prog->jited_len = proglen;
- } else {
- prog = orig_prog;
}
out_addrs:
kfree(addrs);
-out:
- if (tmp_blinded)
- bpf_jit_prog_release_other(prog, prog == orig_prog ?
- tmp : orig_prog);
return prog;
}
diff --git a/arch/x86/net/bpf_timed_may_goto.S b/arch/x86/net/bpf_timed_may_goto.S
new file mode 100644
index 000000000000..54c690cae190
--- /dev/null
+++ b/arch/x86/net/bpf_timed_may_goto.S
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/export.h>
+#include <linux/linkage.h>
+#include <asm/nospec-branch.h>
+
+ .code64
+ .section .text, "ax"
+
+SYM_FUNC_START(arch_bpf_timed_may_goto)
+ ANNOTATE_NOENDBR
+
+ /*
+ * r10 passes us stack depth, load the pointer to count and timestamp
+ * into r10 by adding it to BPF frame pointer.
+ */
+ leaq (%rbp, %r10, 1), %r10
+
+ /* Setup frame. */
+ pushq %rbp
+ movq %rsp, %rbp
+
+ /* Save r0-r5. */
+ pushq %rax
+ pushq %rdi
+ pushq %rsi
+ pushq %rdx
+ pushq %rcx
+ pushq %r8
+
+ /*
+ * r10 has the pointer to count and timestamp, pass it as first
+ * argument.
+ */
+ movq %r10, %rdi
+
+ /* Emit call depth accounting for call below. */
+ CALL_DEPTH_ACCOUNT
+ call bpf_check_timed_may_goto
+
+ /* BPF_REG_AX=r10 will be stored into count, so move return value to it. */
+ movq %rax, %r10
+
+ /* Restore r5-r0. */
+ popq %r8
+ popq %rcx
+ popq %rdx
+ popq %rsi
+ popq %rdi
+ popq %rax
+
+ leave
+ RET
+SYM_FUNC_END(arch_bpf_timed_may_goto)