summaryrefslogtreecommitdiff
path: root/arch/powerpc/net/bpf_jit.h
blob: 889fd199a821cbda22fe6e61ebed54cfa115d53f (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
/* bpf_jit.h: BPF JIT compiler for PPC64
 *
 * Copyright 2011 Matt Evans <matt@ozlabs.org>, IBM Corporation
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; version 2
 * of the License.
 */
#ifndef _BPF_JIT_H
#define _BPF_JIT_H

#ifdef CONFIG_PPC64
#define BPF_PPC_STACK_R3_OFF	48
#define BPF_PPC_STACK_LOCALS	32
#define BPF_PPC_STACK_BASIC	(48+64)
#define BPF_PPC_STACK_SAVE	(18*8)
#define BPF_PPC_STACKFRAME	(BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
				 BPF_PPC_STACK_SAVE)
#define BPF_PPC_SLOWPATH_FRAME	(48+64)
#else
#define BPF_PPC_STACK_R3_OFF	24
#define BPF_PPC_STACK_LOCALS	16
#define BPF_PPC_STACK_BASIC	(24+32)
#define BPF_PPC_STACK_SAVE	(18*4)
#define BPF_PPC_STACKFRAME	(BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
				 BPF_PPC_STACK_SAVE)
#define BPF_PPC_SLOWPATH_FRAME	(24+32)
#endif

#define REG_SZ         (BITS_PER_LONG/8)

/*
 * Generated code register usage:
 *
 * As normal PPC C ABI (e.g. r1=sp, r2=TOC), with:
 *
 * skb		r3	(Entry parameter)
 * A register	r4
 * X register	r5
 * addr param	r6
 * r7-r10	scratch
 * skb->data	r14
 * skb headlen	r15	(skb->len - skb->data_len)
 * m[0]		r16
 * m[...]	...
 * m[15]	r31
 */
#define r_skb		3
#define r_ret		3
#define r_A		4
#define r_X		5
#define r_addr		6
#define r_scratch1	7
#define r_scratch2	8
#define r_D		14
#define r_HL		15
#define r_M		16

#ifndef __ASSEMBLY__

/*
 * Assembly helpers from arch/powerpc/net/bpf_jit.S:
 */
#define DECLARE_LOAD_FUNC(func)	\
	extern u8 func[], func##_negative_offset[], func##_positive_offset[]

DECLARE_LOAD_FUNC(sk_load_word);
DECLARE_LOAD_FUNC(sk_load_half);
DECLARE_LOAD_FUNC(sk_load_byte);
DECLARE_LOAD_FUNC(sk_load_byte_msh);

#ifdef CONFIG_PPC64
#define FUNCTION_DESCR_SIZE	24
#else
#define FUNCTION_DESCR_SIZE	0
#endif

/*
 * 16-bit immediate helper macros: HA() is for use with sign-extending instrs
 * (e.g. LD, ADDI).  If the bottom 16 bits is "-ve", add another bit into the
 * top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000).
 */
#define IMM_H(i)		((uintptr_t)(i)>>16)
#define IMM_HA(i)		(((uintptr_t)(i)>>16) +			      \
				 (((uintptr_t)(i) & 0x8000) >> 15))
#define IMM_L(i)		((uintptr_t)(i) & 0xffff)

#define PLANT_INSTR(d, idx, instr)					      \
	do { if (d) { (d)[idx] = instr; } idx++; } while (0)
#define EMIT(instr)		PLANT_INSTR(image, ctx->idx, instr)

#define PPC_NOP()		EMIT(PPC_INST_NOP)
#define PPC_BLR()		EMIT(PPC_INST_BLR)
#define PPC_BLRL()		EMIT(PPC_INST_BLRL)
#define PPC_MTLR(r)		EMIT(PPC_INST_MTLR | ___PPC_RT(r))
#define PPC_ADDI(d, a, i)	EMIT(PPC_INST_ADDI | ___PPC_RT(d) |	      \
				     ___PPC_RA(a) | IMM_L(i))
#define PPC_MR(d, a)		PPC_OR(d, a, a)
#define PPC_LI(r, i)		PPC_ADDI(r, 0, i)
#define PPC_ADDIS(d, a, i)	EMIT(PPC_INST_ADDIS |			      \
				     ___PPC_RS(d) | ___PPC_RA(a) | IMM_L(i))
#define PPC_LIS(r, i)		PPC_ADDIS(r, 0, i)
#define PPC_STD(r, base, i)	EMIT(PPC_INST_STD | ___PPC_RS(r) |	      \
				     ___PPC_RA(base) | ((i) & 0xfffc))
#define PPC_STDU(r, base, i)	EMIT(PPC_INST_STDU | ___PPC_RS(r) |	      \
				     ___PPC_RA(base) | ((i) & 0xfffc))
#define PPC_STW(r, base, i)	EMIT(PPC_INST_STW | ___PPC_RS(r) |	      \
				     ___PPC_RA(base) | ((i) & 0xfffc))
#define PPC_STWU(r, base, i)	EMIT(PPC_INST_STWU | ___PPC_RS(r) |	      \
				     ___PPC_RA(base) | ((i) & 0xfffc))

#define PPC_LBZ(r, base, i)	EMIT(PPC_INST_LBZ | ___PPC_RT(r) |	      \
				     ___PPC_RA(base) | IMM_L(i))
#define PPC_LD(r, base, i)	EMIT(PPC_INST_LD | ___PPC_RT(r) |	      \
				     ___PPC_RA(base) | IMM_L(i))
#define PPC_LWZ(r, base, i)	EMIT(PPC_INST_LWZ | ___PPC_RT(r) |	      \
				     ___PPC_RA(base) | IMM_L(i))
#define PPC_LHZ(r, base, i)	EMIT(PPC_INST_LHZ | ___PPC_RT(r) |	      \
				     ___PPC_RA(base) | IMM_L(i))
#define PPC_LHBRX(r, base, b)	EMIT(PPC_INST_LHBRX | ___PPC_RT(r) |	      \
				     ___PPC_RA(base) | ___PPC_RB(b))

#ifdef CONFIG_PPC64
#define PPC_BPF_LL(r, base, i) do { PPC_LD(r, base, i); } while(0)
#define PPC_BPF_STL(r, base, i) do { PPC_STD(r, base, i); } while(0)
#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0)
#else
#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0)
#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0)
#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0)
#endif

/* Convenience helpers for the above with 'far' offsets: */
#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LBZ(r, base, i);   \
		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
			PPC_LBZ(r, r, IMM_L(i)); } } while(0)

#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i);     \
		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
			PPC_LD(r, r, IMM_L(i)); } } while(0)

#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LWZ(r, base, i);   \
		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
			PPC_LWZ(r, r, IMM_L(i)); } } while(0)

#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LHZ(r, base, i);   \
		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
			PPC_LHZ(r, r, IMM_L(i)); } } while(0)

#ifdef CONFIG_PPC64
#define PPC_LL_OFFS(r, base, i) do { PPC_LD_OFFS(r, base, i); } while(0)
#else
#define PPC_LL_OFFS(r, base, i) do { PPC_LWZ_OFFS(r, base, i); } while(0)
#endif

#ifdef CONFIG_SMP
#ifdef CONFIG_PPC64
#define PPC_BPF_LOAD_CPU(r)		\
	do { BUILD_BUG_ON(FIELD_SIZEOF(struct paca_struct, paca_index) != 2);	\
		PPC_LHZ_OFFS(r, 13, offsetof(struct paca_struct, paca_index));		\
	} while (0)
#else
#define PPC_BPF_LOAD_CPU(r)     \
	do { BUILD_BUG_ON(FIELD_SIZEOF(struct thread_info, cpu) != 4);			\
		PPC_LHZ_OFFS(r, (1 & ~(THREAD_SIZE - 1)),							\
				offsetof(struct thread_info, cpu));							\
	} while(0)
#endif
#else
#define PPC_BPF_LOAD_CPU(r) do { PPC_LI(r, 0); } while(0)
#endif

#define PPC_CMPWI(a, i)		EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i))
#define PPC_CMPDI(a, i)		EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i))
#define PPC_CMPLWI(a, i)	EMIT(PPC_INST_CMPLWI | ___PPC_RA(a) | IMM_L(i))
#define PPC_CMPLW(a, b)		EMIT(PPC_INST_CMPLW | ___PPC_RA(a) | ___PPC_RB(b))

#define PPC_SUB(d, a, b)	EMIT(PPC_INST_SUB | ___PPC_RT(d) |	      \
				     ___PPC_RB(a) | ___PPC_RA(b))
#define PPC_ADD(d, a, b)	EMIT(PPC_INST_ADD | ___PPC_RT(d) |	      \
				     ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_MUL(d, a, b)	EMIT(PPC_INST_MULLW | ___PPC_RT(d) |	      \
				     ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_MULHWU(d, a, b)	EMIT(PPC_INST_MULHWU | ___PPC_RT(d) |	      \
				     ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_MULI(d, a, i)	EMIT(PPC_INST_MULLI | ___PPC_RT(d) |	      \
				     ___PPC_RA(a) | IMM_L(i))
#define PPC_DIVWU(d, a, b)	EMIT(PPC_INST_DIVWU | ___PPC_RT(d) |	      \
				     ___PPC_RA(a) | ___PPC_RB(b))
#define PPC_AND(d, a, b)	EMIT(PPC_INST_AND | ___PPC_RA(d) |	      \
				     ___PPC_RS(a) | ___PPC_RB(b))
#define PPC_ANDI(d, a, i)	EMIT(PPC_INST_ANDI | ___PPC_RA(d) |	      \
				     ___PPC_RS(a) | IMM_L(i))
#define PPC_AND_DOT(d, a, b)	EMIT(PPC_INST_ANDDOT | ___PPC_RA(d) |	      \
				     ___PPC_RS(a) | ___PPC_RB(b))
#define PPC_OR(d, a, b)		EMIT(PPC_INST_OR | ___PPC_RA(d) |	      \
				     ___PPC_RS(a) | ___PPC_RB(b))
#define PPC_ORI(d, a, i)	EMIT(PPC_INST_ORI | ___PPC_RA(d) |	      \
				     ___PPC_RS(a) | IMM_L(i))
#define PPC_ORIS(d, a, i)	EMIT(PPC_INST_ORIS | ___PPC_RA(d) |	      \
				     ___PPC_RS(a) | IMM_L(i))
#define PPC_XOR(d, a, b)	EMIT(PPC_INST_XOR | ___PPC_RA(d) |	      \
				     ___PPC_RS(a) | ___PPC_RB(b))
#define PPC_XORI(d, a, i)	EMIT(PPC_INST_XORI | ___PPC_RA(d) |	      \
				     ___PPC_RS(a) | IMM_L(i))
#define PPC_XORIS(d, a, i)	EMIT(PPC_INST_XORIS | ___PPC_RA(d) |	      \
				     ___PPC_RS(a) | IMM_L(i))
#define PPC_SLW(d, a, s)	EMIT(PPC_INST_SLW | ___PPC_RA(d) |	      \
				     ___PPC_RS(a) | ___PPC_RB(s))
#define PPC_SRW(d, a, s)	EMIT(PPC_INST_SRW | ___PPC_RA(d) |	      \
				     ___PPC_RS(a) | ___PPC_RB(s))
/* slwi = rlwinm Rx, Ry, n, 0, 31-n */
#define PPC_SLWI(d, a, i)	EMIT(PPC_INST_RLWINM | ___PPC_RA(d) |	      \
				     ___PPC_RS(a) | __PPC_SH(i) |	      \
				     __PPC_MB(0) | __PPC_ME(31-(i)))
/* srwi = rlwinm Rx, Ry, 32-n, n, 31 */
#define PPC_SRWI(d, a, i)	EMIT(PPC_INST_RLWINM | ___PPC_RA(d) |	      \
				     ___PPC_RS(a) | __PPC_SH(32-(i)) |	      \
				     __PPC_MB(i) | __PPC_ME(31))
/* sldi = rldicr Rx, Ry, n, 63-n */
#define PPC_SLDI(d, a, i)	EMIT(PPC_INST_RLDICR | ___PPC_RA(d) |	      \
				     ___PPC_RS(a) | __PPC_SH(i) |	      \
				     __PPC_MB(63-(i)) | (((i) & 0x20) >> 4))
#define PPC_NEG(d, a)		EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a))

/* Long jump; (unconditional 'branch') */
#define PPC_JMP(dest)		EMIT(PPC_INST_BRANCH |			      \
				     (((dest) - (ctx->idx * 4)) & 0x03fffffc))
/* "cond" here covers BO:BI fields. */
#define PPC_BCC_SHORT(cond, dest)	EMIT(PPC_INST_BRANCH_COND |	      \
					     (((cond) & 0x3ff) << 16) |	      \
					     (((dest) - (ctx->idx * 4)) &     \
					      0xfffc))
#define PPC_LI32(d, i)		do { PPC_LI(d, IMM_L(i));		      \
		if ((u32)(uintptr_t)(i) >= 32768) {			      \
			PPC_ADDIS(d, d, IMM_HA(i));			      \
		} } while(0)
#define PPC_LI64(d, i)		do {					      \
		if (!((uintptr_t)(i) & 0xffffffff00000000ULL))		      \
			PPC_LI32(d, i);					      \
		else {							      \
			PPC_LIS(d, ((uintptr_t)(i) >> 48));		      \
			if ((uintptr_t)(i) & 0x0000ffff00000000ULL)	      \
				PPC_ORI(d, d,				      \
					((uintptr_t)(i) >> 32) & 0xffff);     \
			PPC_SLDI(d, d, 32);				      \
			if ((uintptr_t)(i) & 0x00000000ffff0000ULL)	      \
				PPC_ORIS(d, d,				      \
					 ((uintptr_t)(i) >> 16) & 0xffff);    \
			if ((uintptr_t)(i) & 0x000000000000ffffULL)	      \
				PPC_ORI(d, d, (uintptr_t)(i) & 0xffff);	      \
		} } while (0);

#ifdef CONFIG_PPC64
#define PPC_FUNC_ADDR(d,i) do { PPC_LI64(d, i); } while(0)
#else
#define PPC_FUNC_ADDR(d,i) do { PPC_LI32(d, i); } while(0)
#endif

#define PPC_LHBRX_OFFS(r, base, i) \
		do { PPC_LI32(r, i); PPC_LHBRX(r, r, base); } while(0)
#ifdef __LITTLE_ENDIAN__
#define PPC_NTOHS_OFFS(r, base, i)	PPC_LHBRX_OFFS(r, base, i)
#else
#define PPC_NTOHS_OFFS(r, base, i)	PPC_LHZ_OFFS(r, base, i)
#endif

static inline bool is_nearbranch(int offset)
{
	return (offset < 32768) && (offset >= -32768);
}

/*
 * The fly in the ointment of code size changing from pass to pass is
 * avoided by padding the short branch case with a NOP.	 If code size differs
 * with different branch reaches we will have the issue of code moving from
 * one pass to the next and will need a few passes to converge on a stable
 * state.
 */
#define PPC_BCC(cond, dest)	do {					      \
		if (is_nearbranch((dest) - (ctx->idx * 4))) {		      \
			PPC_BCC_SHORT(cond, dest);			      \
			PPC_NOP();					      \
		} else {						      \
			/* Flip the 'T or F' bit to invert comparison */      \
			PPC_BCC_SHORT(cond ^ COND_CMP_TRUE, (ctx->idx+2)*4);  \
			PPC_JMP(dest);					      \
		} } while(0)

/* To create a branch condition, select a bit of cr0... */
#define CR0_LT		0
#define CR0_GT		1
#define CR0_EQ		2
/* ...and modify BO[3] */
#define COND_CMP_TRUE	0x100
#define COND_CMP_FALSE	0x000
/* Together, they make all required comparisons: */
#define COND_GT		(CR0_GT | COND_CMP_TRUE)
#define COND_GE		(CR0_LT | COND_CMP_FALSE)
#define COND_EQ		(CR0_EQ | COND_CMP_TRUE)
#define COND_NE		(CR0_EQ | COND_CMP_FALSE)
#define COND_LT		(CR0_LT | COND_CMP_TRUE)

#define SEEN_DATAREF 0x10000 /* might call external helpers */
#define SEEN_XREG    0x20000 /* X reg is used */
#define SEEN_MEM     0x40000 /* SEEN_MEM+(1<<n) = use mem[n] for temporary
			      * storage */
#define SEEN_MEM_MSK 0x0ffff

struct codegen_context {
	unsigned int seen;
	unsigned int idx;
	int pc_ret0; /* bpf index of first RET #0 instruction (if any) */
};

#endif

#endif