From 0a14842f5a3c0e88a1e59fac5c3025db39721f74 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Apr 2011 09:27:32 +0000 Subject: net: filter: Just In Time compiler for x86-64 In order to speedup packet filtering, here is an implementation of a JIT compiler for x86_64 It is disabled by default, and must be enabled by the admin. echo 1 >/proc/sys/net/core/bpf_jit_enable It uses module_alloc() and module_free() to get memory in the 2GB text kernel range since we call helpers functions from the generated code. EAX : BPF A accumulator EBX : BPF X accumulator RDI : pointer to skb (first argument given to JIT function) RBP : frame pointer (even if CONFIG_FRAME_POINTER=n) r9d : skb->len - skb->data_len (headlen) r8 : skb->data To get a trace of generated code, use : echo 2 >/proc/sys/net/core/bpf_jit_enable Example of generated code : # tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24 flen=18 proglen=147 pass=3 image=ffffffffa00b5000 JIT code: ffffffffa00b5000: 55 48 89 e5 48 83 ec 60 48 89 5d f8 44 8b 4f 60 JIT code: ffffffffa00b5010: 44 2b 4f 64 4c 8b 87 b8 00 00 00 be 0c 00 00 00 JIT code: ffffffffa00b5020: e8 24 7b f7 e0 3d 00 08 00 00 75 28 be 1a 00 00 JIT code: ffffffffa00b5030: 00 e8 fe 7a f7 e0 24 00 3d 00 14 a8 c0 74 49 be JIT code: ffffffffa00b5040: 1e 00 00 00 e8 eb 7a f7 e0 24 00 3d 00 14 a8 c0 JIT code: ffffffffa00b5050: 74 36 eb 3b 3d 06 08 00 00 74 07 3d 35 80 00 00 JIT code: ffffffffa00b5060: 75 2d be 1c 00 00 00 e8 c8 7a f7 e0 24 00 3d 00 JIT code: ffffffffa00b5070: 14 a8 c0 74 13 be 26 00 00 00 e8 b5 7a f7 e0 24 JIT code: ffffffffa00b5080: 00 3d 00 14 a8 c0 75 07 b8 ff ff 00 00 eb 02 31 JIT code: ffffffffa00b5090: c0 c9 c3 BPF program is 144 bytes long, so native program is almost same size ;) (000) ldh [12] (001) jeq #0x800 jt 2 jf 8 (002) ld [26] (003) and #0xffffff00 (004) jeq #0xc0a81400 jt 16 jf 5 (005) ld [30] (006) and #0xffffff00 (007) jeq #0xc0a81400 jt 16 jf 17 (008) jeq #0x806 jt 10 jf 9 (009) jeq #0x8035 jt 10 jf 17 (010) ld [28] (011) and #0xffffff00 (012) jeq #0xc0a81400 jt 16 jf 13 (013) ld [38] (014) and #0xffffff00 (015) jeq #0xc0a81400 jt 16 jf 17 (016) ret #65535 (017) ret #0 Signed-off-by: Eric Dumazet Cc: Arnaldo Carvalho de Melo Cc: Ben Hutchings Cc: Hagen Paul Pfeifer Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit.S | 140 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 arch/x86/net/bpf_jit.S (limited to 'arch/x86/net/bpf_jit.S') diff --git a/arch/x86/net/bpf_jit.S b/arch/x86/net/bpf_jit.S new file mode 100644 index 000000000000..66870223f8c5 --- /dev/null +++ b/arch/x86/net/bpf_jit.S @@ -0,0 +1,140 @@ +/* bpf_jit.S : BPF JIT helper functions + * + * Copyright (C) 2011 Eric Dumazet (eric.dumazet@gmail.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include +#include + +/* + * Calling convention : + * rdi : skb pointer + * esi : offset of byte(s) to fetch in skb (can be scratched) + * r8 : copy of skb->data + * r9d : hlen = skb->len - skb->data_len + */ +#define SKBDATA %r8 + +sk_load_word_ind: + .globl sk_load_word_ind + + add %ebx,%esi /* offset += X */ +# test %esi,%esi /* if (offset < 0) goto bpf_error; */ + js bpf_error + +sk_load_word: + .globl sk_load_word + + mov %r9d,%eax # hlen + sub %esi,%eax # hlen - offset + cmp $3,%eax + jle bpf_slow_path_word + mov (SKBDATA,%rsi),%eax + bswap %eax /* ntohl() */ + ret + + +sk_load_half_ind: + .globl sk_load_half_ind + + add %ebx,%esi /* offset += X */ + js bpf_error + +sk_load_half: + .globl sk_load_half + + mov %r9d,%eax + sub %esi,%eax # hlen - offset + cmp $1,%eax + jle bpf_slow_path_half + movzwl (SKBDATA,%rsi),%eax + rol $8,%ax # ntohs() + ret + +sk_load_byte_ind: + .globl sk_load_byte_ind + add %ebx,%esi /* offset += X */ + js bpf_error + +sk_load_byte: + .globl sk_load_byte + + cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte */ + jle bpf_slow_path_byte + movzbl (SKBDATA,%rsi),%eax + ret + +/** + * sk_load_byte_msh - BPF_S_LDX_B_MSH helper + * + * Implements BPF_S_LDX_B_MSH : ldxb 4*([offset]&0xf) + * Must preserve A accumulator (%eax) + * Inputs : %esi is the offset value, already known positive + */ +ENTRY(sk_load_byte_msh) + CFI_STARTPROC + cmp %esi,%r9d /* if (offset >= hlen) goto bpf_slow_path_byte_msh */ + jle bpf_slow_path_byte_msh + movzbl (SKBDATA,%rsi),%ebx + and $15,%bl + shl $2,%bl + ret + CFI_ENDPROC +ENDPROC(sk_load_byte_msh) + +bpf_error: +# force a return 0 from jit handler + xor %eax,%eax + mov -8(%rbp),%rbx + leaveq + ret + +/* rsi contains offset and can be scratched */ +#define bpf_slow_path_common(LEN) \ + push %rdi; /* save skb */ \ + push %r9; \ + push SKBDATA; \ +/* rsi already has offset */ \ + mov $LEN,%ecx; /* len */ \ + lea -12(%rbp),%rdx; \ + call skb_copy_bits; \ + test %eax,%eax; \ + pop SKBDATA; \ + pop %r9; \ + pop %rdi + + +bpf_slow_path_word: + bpf_slow_path_common(4) + js bpf_error + mov -12(%rbp),%eax + bswap %eax + ret + +bpf_slow_path_half: + bpf_slow_path_common(2) + js bpf_error + mov -12(%rbp),%ax + rol $8,%ax + movzwl %ax,%eax + ret + +bpf_slow_path_byte: + bpf_slow_path_common(1) + js bpf_error + movzbl -12(%rbp),%eax + ret + +bpf_slow_path_byte_msh: + xchg %eax,%ebx /* dont lose A , X is about to be scratched */ + bpf_slow_path_common(1) + js bpf_error + movzbl -12(%rbp),%eax + and $15,%al + shl $2,%al + xchg %eax,%ebx + ret -- cgit v1.2.3