summaryrefslogtreecommitdiff
path: root/arch/ppc/boot/simple/relocate.S
blob: 1bbbcd2f2bcbd44c033329f479cc0def95ce7630 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
/*
 * This is the common part of the loader relocation and initialization
 * process.  All of the board/processor specific initialization is
 * done before we get here.
 *
 * Author: Tom Rini
 *	   trini@mvista.com
 * Derived from arch/ppc/boot/prep/head.S (Cort Dougan, many others).
 *
 * 2001-2004 (c) MontaVista, Software, Inc.  This file is licensed under
 * the terms of the GNU General Public License version 2.  This program
 * is licensed "as is" without any warranty of any kind, whether express
 * or implied.
 */

#include <asm/cache.h>
#include <asm/ppc_asm.h>

#define GETSYM(reg, sym)	\
	lis	reg, sym@h; ori	reg, reg, sym@l

	.text
	/* We get called from the early initialization code.
	 * Register 3 has the address where we were loaded,
	 * Register 4 contains any residual data passed from the
	 * boot rom.
	 */
	.globl	relocate
relocate:
	/* Save r3, r4 for later.
	 * The r8/r11 are legacy registers so I don't have to
	 * rewrite the code below :-).
	 */
	mr	r8, r3
	mr	r11, r4

	/* compute the size of the whole image in words. */
	GETSYM(r4,start)
	GETSYM(r5,end)

	addi	r5,r5,3		/* round up */
	sub	r5,r5,r4	/* end - start */
	srwi	r5,r5,2
	mr	r7,r5		/* Save for later use. */

	/*
	 * Check if we need to relocate ourselves to the link addr or were
	 * we loaded there to begin with.
	 */
	cmpw	cr0,r3,r4
	beq	start_ldr	/* If 0, we don't need to relocate */

	/* Move this code somewhere safe.  This is max(load + size, end)
	 * r8 == load address
	 */
	GETSYM(r4, start)
	GETSYM(r5, end)

	sub	r6,r5,r4
	add	r6,r8,r6	/* r6 == phys(load + size) */

	cmpw	r5,r6
	bgt	1f
	b	2f
1:
	mr	r6, r5
2:
	/* dest is in r6 */
	/* Ensure alignment --- this code is precautionary */
	addi	r6,r6,4
	li	r5,0x0003
	andc	r6,r6,r5

	/* Find physical address and size of do_relocate */
	GETSYM(r5, __relocate_start)
	GETSYM(r4, __relocate_end)
	GETSYM(r3, start)

	/* Size to copy */
	sub	r4,r4,r5
	srwi	r4,r4,2

	/* Src addr to copy (= __relocate_start - start + where_loaded) */
	sub	r3,r5,r3
	add	r5,r8,r3

	/* Save dest */
	mr	r3, r6

	/* Do the copy */
	mtctr	r4
3:	lwz	r4,0(r5)
	stw	r4,0(r3)
	addi	r3,r3,4
	addi	r5,r5,4
	bdnz	3b

	GETSYM(r4, __relocate_start)
	GETSYM(r5, do_relocate)

	sub	r4,r5,r4	/* Get entry point for do_relocate in */
	add	r6,r6,r4	/* relocated section */

	/* This will return to the relocated do_relocate */
	mtlr	r6
	b	flush_instruction_cache

	.section ".relocate_code","xa"
	
do_relocate:
	/* We have 2 cases --- start < load, or start > load
	 * This determines whether we copy from the end, or the start.
	 * Its easier to have 2 loops than to have paramaterised
	 * loops.  Sigh.
	 */
	li	r6,0		/* Clear checksum */
	mtctr	r7		/* Setup for a loop */
	
	GETSYM(r4, start)
	mr	r3,r8		/* Get the load addr */

	cmpw	cr0,r4,r3	/* If we need to copy from the end, do so */
	bgt	do_relocate_from_end

do_relocate_from_start:
1:	lwz	r5,0(r3)	/* Load and decrement */
	stw	r5,0(r4)	/* Store and decrement */
	addi	r3,r3,4
	addi	r4,r4,4
	xor	r6,r6,r5	/* Update checksum */
	bdnz	1b		/* Are we done? */
	b	do_relocate_out	/* Finished */

do_relocate_from_end:
	GETSYM(r3, end)
	slwi	r4,r7,2
	add	r4,r8,r4	/* Get the physical end */
1:	lwzu	r5,-4(r4)
	stwu	r5, -4(r3)
	xor	r6,r6,r5
	bdnz	1b

do_relocate_out:
	GETSYM(r3,start_ldr)
	mtlr	r3		/* Easiest way to do an absolute jump */
/* Some boards don't boot up with the I-cache enabled.  Do that
 * now because the decompress runs much faster that way.
 * As a side effect, we have to ensure the data cache is not enabled
 * so we can access the serial I/O without trouble.
 */
	b	flush_instruction_cache

	.previous

start_ldr:
/* Clear all of BSS and set up stack for C calls */
	lis	r3,__bss_start@h
	ori	r3,r3,__bss_start@l
	lis	r4,end@h
	ori	r4,r4,end@l
	subi	r3,r3,4
	subi	r4,r4,4
	li	r0,0
50:	stwu	r0,4(r3)
	cmpw	cr0,r3,r4
	blt	50b
90:	mr	r9,r1		/* Save old stack pointer (in case it matters) */
	lis	r1,.stack@h
	ori	r1,r1,.stack@l
	addi	r1,r1,4096*2
	subi	r1,r1,256
	li	r2,0x000F	/* Mask pointer to 16-byte boundary */
	andc	r1,r1,r2

	/*
	 * Exec kernel loader
	 */
	mr	r3,r8		/* Load point */
	mr	r4,r7		/* Program length */
	mr	r5,r6		/* Checksum */
	mr	r6,r11		/* Residual data */
	mr	r7,r25		/* Validated OFW interface */
	bl	load_kernel

	/*
	 * Make sure the kernel knows we don't have things set in
	 * registers.  -- Tom
	 */
	li	r4,0
	li	r5,0
	li	r6,0

	/*
	 * Start at the begining.
	 */
#ifdef CONFIG_PPC_PREP
	li	r9,0xc
	mtlr	r9
	/* tell kernel we're prep, by putting 0xdeadc0de at KERNELLOAD,
	 * and tell the kernel to start on the 4th instruction since we
	 * overwrite the first 3 sometimes (which are 'nop').
	 */
	lis	r10,0xdeadc0de@h
	ori	r10,r10,0xdeadc0de@l
	li	r9,0
	stw	r10,0(r9)
#else
	li	r9,0
	mtlr	r9
#endif
	blr

	.comm	.stack,4096*2,4