summaryrefslogtreecommitdiffstats
path: root/src/arch/x86/lib/c_start.S
blob: 582966bfdc20d59e772af091293f30f185691b90 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
#include <cpu/x86/post_code.h>

/* Place the stack in the bss section. It's not necessary to define it in the
 * the linker script. */
	.section .bss, "aw", @nobits
.global _stack
.global _estack

.align CONFIG_STACK_SIZE
_stack:
.space CONFIG_MAX_CPUS*CONFIG_STACK_SIZE
_estack:
#if CONFIG_COOP_MULTITASKING
.global thread_stacks
thread_stacks:
.space CONFIG_STACK_SIZE*CONFIG_NUM_THREADS
#endif

	.section ".text._start", "ax", @progbits
#ifdef __x86_64__
	.code64
#else
	.code32
#endif
	.globl _start
	.globl __rmodule_entry
__rmodule_entry:
_start:
	cli
	lgdt	%cs:gdtaddr
#ifndef __x86_64__
	ljmp	$0x10, $1f
#endif
1:	movl	$0x18, %eax
	movl	%eax, %ds
	movl	%eax, %es
	movl	%eax, %ss
	movl	%eax, %fs
	movl	%eax, %gs
#ifdef __x86_64__
	mov     $0x48, %ecx
	call    SetCodeSelector
#endif

	post_code(POST_ENTRY_C_START)		/* post 13 */

	cld

	/** poison the stack. Code should not count on the
	 * stack being full of zeros. This stack poisoning
	 * recently uncovered a bug in the broadcast SIPI
	 * code.
	 */
	leal	_stack, %edi
	movl	$_estack, %ecx
	subl	%edi, %ecx
	shrl	$2, %ecx   /* it is 32 bit aligned, right? */
	movl	$0xDEADBEEF, %eax
	rep
	stosl

	/* set new stack */
	movl	$_estack, %esp

#if CONFIG_COOP_MULTITASKING
	/* Push the thread pointer. */
	push	$0
#endif
	/* Push the cpu index and struct cpu */
	push	$0
	push	$0

	/* Initialize the Interrupt Descriptor table */
	leal	_idt, %edi
	leal	vec0, %ebx
	movl	$(0x10 << 16), %eax	/* cs selector */

1:	movw	%bx, %ax
	movl	%ebx, %edx
	movw	$0x8E00, %dx		/* Interrupt gate - dpl=0, present */
	movl	%eax, 0(%edi)
	movl	%edx, 4(%edi)
	addl	$6, %ebx
	addl	$8, %edi
	cmpl	$_idt_end, %edi
	jne	1b

	/* Load the Interrupt descriptor table */
#ifndef __x86_64__
	lidt	idtarg
#else
	// FIXME port table to x64 - lidt     idtarg
#endif

	/*
	 *	Now we are finished. Memory is up, data is copied and
	 *	bss is cleared.   Now we call the main routine and
	 *	let it do the rest.
	 */
	post_code(POST_PRE_HARDWAREMAIN)	/* post fe */

#if CONFIG_GDB_WAIT
	call gdb_hw_init
	call gdb_stub_breakpoint
#endif
	call	main
	/* NOTREACHED */
.Lhlt:
	post_code(POST_DEAD_CODE)	/* post ee */
	hlt
	jmp	.Lhlt

vec0:
	push	$0 /* error code */
	push	$0 /* vector */
	jmp int_hand
vec1:
	push	$0 /* error code */
	push	$1 /* vector */
	jmp int_hand

vec2:
	push	$0 /* error code */
	push	$2 /* vector */
	jmp int_hand

vec3:
	push	$0 /* error code */
	push	$3 /* vector */
	jmp	int_hand

vec4:
	push	$0 /* error code */
	push	$4 /* vector */
	jmp	int_hand

vec5:
	push	$0 /* error code */
	push	$5 /* vector */
	jmp	int_hand

vec6:
	push	$0 /* error code */
	push	$6 /* vector */
	jmp	int_hand

vec7:
	push	$0 /* error code */
	push	$7 /* vector */
	jmp	int_hand

vec8:
	/* error code */
	push	$8 /* vector */
	jmp	int_hand
	.word	0x9090

vec9:
	push	$0 /* error code */
	push	$9 /* vector */
	jmp int_hand

vec10:
	/* error code */
	push	$10 /* vector */
	jmp	int_hand
	.word	0x9090

vec11:
	/* error code */
	push	$11 /* vector */
	jmp	int_hand
	.word	0x9090

vec12:
	/* error code */
	push	$12 /* vector */
	jmp	int_hand
	.word	0x9090

vec13:
	/* error code */
	push	$13 /* vector */
	jmp	int_hand
	.word	0x9090

vec14:
	/* error code */
	push	$14 /* vector */
	jmp	int_hand
	.word	0x9090

vec15:
	push	$0 /* error code */
	push	$15 /* vector */
	jmp	int_hand

vec16:
	push	$0 /* error code */
	push	$16 /* vector */
	jmp	int_hand

vec17:
	/* error code */
	push	$17 /* vector */
	jmp	int_hand
	.word	0x9090

vec18:
	push	$0 /* error code */
	push	$18 /* vector */
	jmp	int_hand

vec19:
	push	$0 /* error code */
	push	$19 /* vector */
	jmp	int_hand

int_hand:
	/* At this point, on x86-32, on the stack there is:
	 *  0(%esp) vector
	 *  4(%esp) error code
	 *  8(%esp) eip
	 * 12(%esp) cs
	 * 16(%esp) eflags
	 */
#ifdef __x86_64__
	push	%rdi
	push	%rsi
	push	%rbp
	/* Original stack pointer */
	lea	32(%rsp), %rbp
	push	%rbp
	push	%rbx
	push	%rdx
	push	%rcx
	push	%rax

	push	%rsp	/* Pointer to structure on the stack */
	call	x86_exception
	pop	%rax	/* Drop the pointer */

	pop	%rax
	pop	%rcx
	pop	%rdx
	pop	%rbx
	pop	%rbp	/* Ignore saved %rsp value */
	pop	%rbp
	pop	%rsi
	pop	%rdi

	add	$8, %rsp /* pop of the vector and error code */
#else
	pushl	%edi
	pushl	%esi
	pushl	%ebp

	/* Original stack pointer */
	leal	32(%esp), %ebp
	pushl	%ebp
	pushl	%ebx
	pushl	%edx
	pushl	%ecx
	pushl	%eax

	pushl	%esp	/* Pointer to structure on the stack */
	call	x86_exception
	pop	%eax	/* Drop the pointer */

	popl	%eax
	popl	%ecx
	popl	%edx
	popl	%ebx
	popl	%ebp	/* Ignore saved %esp value */
	popl	%ebp
	popl	%esi
	popl	%edi

	addl	$8, %esp /* pop of the vector and error code */
#endif

	iret

#if CONFIG_GDB_WAIT

	.globl gdb_stub_breakpoint
gdb_stub_breakpoint:
#ifdef __x86_64__
	pop	%rax	/* Return address */
	pushfl
	push	%cs
	push	%rax	/* Return address */
	push	$0	/* No error code */
	push	$32	/* vector 32 is user defined */
#else
	popl	%eax	/* Return address */
	pushfl
	pushl	%cs
	pushl	%eax	/* Return address */
	pushl	$0	/* No error code */
	pushl	$32	/* vector 32 is user defined */
#endif
	jmp	int_hand
#endif

	.globl gdt, gdt_end, idtarg

gdtaddr:
	.word	gdt_end - gdt - 1
#ifdef __x86_64__
	.quad	gdt
#else
	.long	gdt		/* we know the offset */
#endif

	 .data

	/* This is the gdt for GCC part of coreboot.
	 * It is different from the gdt in ROMCC/ASM part of coreboot
	 * which is defined in entry32.inc
	 *
	 * When the machine is initially started, we use a very simple
	 * gdt from rom (that in entry32.inc) which only contains those
	 * entries we need for protected mode.
	 *
	 * When we're executing code from RAM, we want to do more complex
	 * stuff, like initializing PCI option roms in real mode, or doing
	 * a resume from a suspend to ram.
	 */
gdt:
	/* selgdt 0, unused */
	.word	0x0000, 0x0000		/* dummy */
	.byte	0x00, 0x00, 0x00, 0x00

	/* selgdt 8, unused */
	.word	0x0000, 0x0000		/* dummy */
	.byte	0x00, 0x00, 0x00, 0x00

	/* selgdt 0x10, flat code segment */
	.word	0xffff, 0x0000
	.byte	0x00, 0x9b, 0xcf, 0x00 /* G=1 and 0x0f, So we get 4Gbytes for limit */

	/* selgdt 0x18, flat data segment */
	.word	0xffff, 0x0000
#ifdef __x86_64__
	.byte	0x00, 0x92, 0xcf, 0x00
#else
	.byte	0x00, 0x93, 0xcf, 0x00
#endif

	/* selgdt 0x20, unused */
	.word	0x0000, 0x0000		/* dummy */
	.byte	0x00, 0x00, 0x00, 0x00

	/* The next two entries are used for executing VGA option ROMs */

	/* selgdt 0x28 16 bit 64k code at 0x00000000 */
	.word   0xffff, 0x0000
	.byte   0, 0x9a, 0, 0

	/* selgdt 0x30 16 bit 64k data at 0x00000000 */
	.word   0xffff, 0x0000
	.byte   0, 0x92, 0, 0

	/* The next two entries are used for ACPI S3 RESUME */

	/* selgdt 0x38, flat data segment 16 bit */
	.word	0x0000, 0x0000		/* dummy */
	.byte	0x00, 0x93, 0x8f, 0x00 /* G=1 and 0x0f, So we get 4Gbytes for limit */

	/* selgdt 0x40, flat code segment 16 bit */
	.word	0xffff, 0x0000
	.byte	0x00, 0x9b, 0x8f, 0x00 /* G=1 and 0x0f, So we get 4Gbytes for limit */

#ifdef __x86_64__
	/* selgdt 0x48, flat x64 code segment */
	.word	0xffff, 0x0000
	.byte	0x00, 0x9b, 0xaf, 0x00
#endif
gdt_end:

idtarg:
	.word	_idt_end - _idt - 1	/* limit */
	.long	_idt
	.word	0
_idt:
	.fill	20, 8, 0	# idt is uninitialized
_idt_end:

#ifdef __x86_64__
SetCodeSelector:
.intel_syntax noprefix

       # save rsp because iret will align it to a 16 byte boundary
       mov     rdx, rsp

       # use iret to jump to a 64-bit offset in a new code segment
       # iret will pop cs:rip, flags, then ss:rsp
       mov     ax, ss          # need to push ss..
       push    rax             # push ss instuction not valid in x64 mode, so use ax
       push    rsp
       pushfq
       push    rcx             # cx is code segment selector from caller
       mov     rax, offset setCodeSelectorLongJump
       push    rax

       # the iret will continue at next instruction, with the new cs value loaded
       iretq

setCodeSelectorLongJump:
       # restore rsp, it might not have been 16-byte aligned on entry
       mov      rsp, rdx
       ret
.att_syntax prefix

	.previous
.code64
#else
	.previous
.code32
#endif
OpenPOWER on IntegriCloud