1 files changed, 957 insertions, 0 deletions
diff --git a/zpu/sw/startup/crt0.S b/zpu/sw/startup/crt0.S
new file mode 100644
index 0000000..00870c4
--- /dev/null
+++ b/zpu/sw/startup/crt0.S
@@ -0,0 +1,957 @@
+/* Startup code for ZPU
+   Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file with other programs, and to distribute
+those programs without any restriction coming from the use of this
+file.  (The General Public License restrictions do apply in other
+respects; for example, they cover modification of the file, and
+distribution when not linked into another program.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING.  If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA.  */
+	.file	"crt0.S"
+	
+	
+	
+	
+;	.section ".fixed_vectors","ax"
+; KLUDGE!!! we remove the executable bit to avoid relaxation 
+	.section ".fixed_vectors","a" 
+
+; DANGER!!!! 
+; we need to align these code sections to 32 bytes, which
+; means we must not use any assembler instructions that are relaxed
+; at linker time
+; DANGER!!!! 
+
+	.macro fixedim value
+			im \value
+	.endm
+
+	.macro  jsr address
+	
+			im 0		; save R0
+			load
+			im 4		; save R1
+			load
+			im 8		; save R2
+			load
+	
+			fixedim \address
+			call
+			
+			im 8
+			store		; restore R2
+			im 4
+			store		; restore R1
+			im 0
+			store		; restore R0
+	.endm
+
+
+	.macro  jmp address
+			fixedim \address
+			poppc
+	.endm
+		
+
+	.macro fast_neg
+	not
+	im 1
+	add
+	.endm
+	
+	.macro cimpl funcname
+	; save R0
+	im 0
+	load
+	
+	; save R1
+	im 4
+	load
+	
+	; save R2
+	im 8
+	load
+	
+	loadsp 20
+	loadsp 20
+	
+	fixedim \funcname
+	call
+
+	; destroy arguments on stack
+	storesp 0
+	storesp 0	
+	 
+	im 0
+	load
+	
+	; poke the result into the right slot
+	storesp 24
+
+	; restore R2
+	im 8
+	store
+	
+	; restore R1
+	im 4
+	store
+	
+	; restore r0
+	im 0
+	store
+	
+	
+	storesp 4
+	poppc
+	.endm
+
+	.macro mult1bit
+	; create mask of lowest bit in A
+	loadsp 8 ; A
+	im 1
+	and
+	im -1
+	add
+	not
+	loadsp 8 ; B
+	and 
+	add ; accumulate in C
+	
+	; shift B left 1 bit
+	loadsp 4 ; B
+	addsp 0
+	storesp 8 ; B
+	
+	; shift A right 1 bit
+	loadsp 8 ; A
+	flip
+	addsp 0
+	flip
+	storesp 12 ; A
+	.endm
+
+
+
+/* vectors */
+        .balign 32,0
+# offset 0x0000 0000
+		.globl _start
+_start:
+		; intSp must be 0 when we jump to _premain
+		
+		im ZPU_ID
+		loadsp 0
+		im _cpu_config
+		store
+		config
+		jmp _premain
+
+
+
+        .balign 32,0
+# offset 0x0000 0020
+		.globl _zpu_interrupt_vector
+_zpu_interrupt_vector:
+		jsr _zpu_interrupt
+		poppc
+
+
+/* instruction emulation code */
+
+# opcode 34
+# offset 0x0000 0040
+	.balign 32,0
+_loadh:
+	loadsp 4
+	; by not masking out bit 0, we cause a memory access error 
+	; on unaligned access
+	im ~0x2
+	and
+	load
+
+	; mult 8	
+	loadsp 8
+	im 3
+	and
+	fast_neg
+	im 2
+	add
+	im 3
+	ashiftleft
+	; shift right addr&3 * 8
+	lshiftright
+	im 0xffff
+	and
+	storesp 8
+	
+	poppc
+
+# opcode 35
+# offset 0x0000 0060
+	.balign 32,0
+_storeh:
+	loadsp 4
+	; by not masking out bit 0, we cause a memory access error 
+	; on unaligned access
+	im ~0x2
+	and
+	load
+
+	; mask
+	im 0xffff
+	loadsp 12
+	im 3
+	and
+	fast_neg
+	im 2
+	add
+	im 3
+	ashiftleft
+	ashiftleft
+	not
+
+	and
+
+	loadsp 12
+	im 0xffff
+
+	nop
+		
+	fixedim _storehtail
+	poppc
+
+
+# opcode 36
+# offset 0x0000 0080
+	.balign 32,0
+_lessthan:
+	loadsp 8
+	fast_neg
+	loadsp 8
+	add
+
+	; DANGER!!!!
+	; 0x80000000 will overflow when negated, so we need to mask
+	; the result above with the compare positive to negative
+	; number case
+	loadsp 12
+	loadsp 12
+	not
+	and
+	not
+	and
+
+
+	; handle case where we are comparing a negative number
+	; and positve number. This can underflow. E.g. consider 0x8000000 < 0x1000
+	loadsp 12
+	not
+	loadsp 12
+	and
+	
+	or
+
+
+
+	flip
+	im 1
+	and	
+
+	
+	storesp 12
+	storesp 4
+	poppc
+	
+
+# opcode 37
+# offset 0x0000 00a0
+	.balign 32,0
+_lessthanorequal:
+	loadsp 8
+	loadsp 8
+	lessthan
+	loadsp 12
+	loadsp 12
+	eq
+	or
+	
+	storesp 12
+	storesp 4
+	poppc
+
+	
+# opcode 38
+# offset 0x0000 00c0
+	.balign 32,0
+_ulessthan:
+	; fish up arguments 
+	loadsp 4
+	loadsp 12
+	
+	/* low: -1 if low bit dif is negative 0 otherwise:  neg (not x&1 and (y&1))
+		x&1		y&1		neg (not x&1 and (y&1))
+		1		1		0
+		1		0 		0
+		0		1		-1
+		0		0		0
+	
+	*/
+	loadsp 4 
+	not
+	loadsp 4
+	and
+	im 1
+	and
+	neg
+	
+	
+	/* high: upper 31-bit diff is only wrong when diff is 0 and low=-1
+		high=x>>1 - y>>1 + low
+		
+		extremes
+		
+		0000 - 1111:
+		low= neg(not 0 and 1) = 1111 (-1)
+		high=000+ neg(111) +low = 000 + 1001 + low = 1000 
+		OK
+		
+		1111 - 0000
+		low=neg(not 1 and 0) = 0
+		high=111+neg(000) + low = 0111
+		OK
+		 
+		
+	 */
+	loadsp 8
+	
+	flip 
+	addsp 0
+	flip
+	
+	loadsp 8
+	
+	flip	
+	addsp 0
+	flip
+
+	sub
+
+	; if they are equal, then the last bit decides...	
+	add
+	
+	/* test if negative: result = flip(diff) & 1 */
+	flip
+	im 1
+	and
+
+	; destroy a&b which are on stack	
+	storesp 4
+	storesp 4
+	
+	storesp 12
+	storesp 4
+	poppc			
+
+# opcode 39
+# offset 0x0000 00e0
+	.balign 32,0
+_ulessthanorequal:
+	loadsp 8
+	loadsp 8
+	ulessthan
+	loadsp 12
+	loadsp 12
+	eq
+	or
+	
+	storesp 12
+	storesp 4
+	poppc
+
+
+# opcode 40
+# offset 0x0000 0100
+	.balign 32,0
+	.globl _swap
+_swap:
+	breakpoint ; tbd
+
+# opcode 41
+# offset 0x0000 0120
+	.balign 32,0
+_slowmult:
+	im _slowmultImpl
+	poppc
+
+# opcode 42
+# offset 0x0000 0140
+	.balign 32,0
+_lshiftright:
+	loadsp 8
+	flip
+
+	loadsp 8
+	ashiftleft
+	flip
+	
+	storesp 12
+	storesp 4
+
+	poppc
+	
+
+# opcode 43
+# offset 0x0000 0160
+	.balign 32,0
+_ashiftleft:
+	loadsp 8
+	
+	loadsp 8
+	im 0x1f
+	and
+	fast_neg
+	im _ashiftleftEnd
+	add
+	poppc
+	
+	
+	
+# opcode 44
+# offset 0x0000 0180
+	.balign 32,0
+_ashiftright:
+	loadsp 8
+	loadsp 8
+	lshiftright
+	
+	; handle signed value
+	im -1
+	loadsp 12
+	im 0x1f
+	and
+	lshiftright
+	not	; now we have an integer on the stack with the signed 
+		; bits in the right position
+
+	; mask these bits with the signed bit.
+	loadsp 16
+	not
+	flip
+	im 1
+	and
+	im -1
+	add
+	
+	and	
+	
+	; stuff in the signed bits...
+	or
+	
+	; store result into correct stack slot	
+	storesp 12
+	
+	; move up return value 
+	storesp 4
+	poppc
+
+# opcode 45
+# offset 0x0000 01a0
+	.balign 32,0
+_call:
+	; fn
+	loadsp 4
+	
+	; return address
+	loadsp 4
+
+	; store return address
+	storesp 12
+	
+	; fn to call
+	storesp 4
+	
+	pushsp	; flush internal stack
+	popsp
+		
+	poppc
+
+_storehtail:
+
+	and
+	loadsp 12
+	im 3
+	and
+	fast_neg
+	im 2
+	add
+	im 3
+	ashiftleft
+	nop
+	ashiftleft
+	
+	or
+	
+	loadsp 8
+	im  ~0x3
+	and
+
+	store
+	
+	storesp 4
+	storesp 4
+	poppc
+
+
+# opcode 46
+# offset 0x0000 01c0
+	.balign 32,0
+_eq:
+	loadsp 8
+	fast_neg
+	loadsp 8
+	add
+	
+	not 
+	loadsp 0
+	im 1
+	add
+	not
+	and
+	flip
+	im 1
+	and
+	
+	storesp 12
+	storesp 4
+	poppc
+
+# opcode 47
+# offset 0x0000 01e0
+	.balign 32,0
+_neq:
+	loadsp 8
+	fast_neg
+	loadsp 8
+	add
+	
+	not 
+	loadsp 0
+	im 1
+	add
+	not
+	and
+	flip
+
+	not
+
+	im 1
+	and
+		
+	storesp 12
+	storesp 4
+	poppc
+	
+
+# opcode 48
+# offset 0x0000 0200
+	.balign 32,0
+_neg:
+	loadsp 4
+	not
+	im 1
+	add
+	storesp 8
+	
+	poppc
+	
+
+# opcode 49
+# offset 0x0000 0220
+	.balign 32,0
+_sub:
+	loadsp 8
+	loadsp 8
+	fast_neg
+	add
+	storesp 12
+
+	storesp 4
+
+	poppc
+
+
+# opcode 50
+# offset 0x0000 0240
+	.balign 32,0
+_xor:
+	loadsp 8
+	not
+	loadsp 8
+	and
+	
+	loadsp 12
+	loadsp 12
+	not
+	and
+
+	or
+
+	storesp 12
+	storesp 4
+	poppc
+
+# opcode 51
+# offset 0x0000 0260
+	.balign 32,0
+_loadb:
+	loadsp 4
+	im ~0x3
+	and
+	load
+
+	loadsp 8
+	im 3
+	and
+	fast_neg
+	im 3
+	add
+	; x8
+	addsp 0
+	addsp 0
+	addsp 0
+
+	lshiftright
+
+	im 0xff
+	and
+	storesp 8
+	
+	poppc
+
+
+# opcode 52
+# offset 0x0000 0280
+	.balign 32,0
+_storeb:
+	loadsp 4
+	im ~0x3
+	and
+	load
+
+	; mask away destination
+	im _mask
+	loadsp 12
+	im 3
+	and
+	addsp 0
+	addsp 0
+	add
+	load
+
+	and
+
+
+	im _storebtail
+	poppc
+	
+# opcode 53
+# offset 0x0000 02a0
+	.balign 32,0
+_div:
+	cimpl __divsi3
+	
+# opcode 54
+# offset 0x0000 02c0
+	.balign 32,0
+_mod:
+	cimpl __modsi3
+
+# opcode 55
+# offset 0x0000 02e0
+	.balign 32,0
+	.globl _eqbranch
+_eqbranch:
+	loadsp 8
+	
+	; eq
+
+	not 
+	loadsp 0
+	im 1
+	add
+	not
+	and
+	flip
+	im 1
+	and
+
+	; mask
+	im -1
+	add
+	loadsp 0
+	storesp 16
+
+	; no branch address
+	loadsp 4
+	
+	and
+
+	; fetch boolean & neg mask
+	loadsp 12
+	not
+	
+	; calc address & mask for branch
+	loadsp 8
+	loadsp 16
+	add
+	; subtract 1 to find PC of branch instruction
+	im -1
+	add
+	
+	and
+
+	or	
+	
+	storesp 4
+	storesp 4
+	storesp 4
+	poppc	
+
+
+# opcode 56
+# offset 0x0000 0300
+	.balign 32,0
+	.globl _neqbranch
+_neqbranch:
+	loadsp 8
+	
+	; neq
+
+	not 
+	loadsp 0
+	im 1
+	add
+	not
+	and
+	flip
+	
+	not
+	
+	im 1
+	and
+
+	; mask
+	im -1
+	add
+	loadsp 0
+	storesp 16
+
+	; no branch address
+	loadsp 4
+	
+	and
+
+	; fetch boolean & neg mask
+	loadsp 12
+	not
+	
+	; calc address & mask for branch
+	loadsp 8
+	loadsp 16
+	add
+	; find address of branch instruction
+	im -1
+	add
+	
+	and
+
+	or	
+	
+	storesp 4
+	storesp 4
+	storesp 4
+	poppc	
+
+# opcode 57
+# offset 0x0000 0320
+	.balign 32,0
+	.globl _poppcrel
+_poppcrel:
+	add
+	; address of poppcrel
+	im -1
+	add
+	poppc
+		
+# opcode 58
+# offset 0x0000 0340
+	.balign 32,0
+	.globl _config
+_config:
+	im 1 
+	nop
+	im _hardware
+	store
+	storesp 4
+	poppc
+
+# opcode 59
+# offset 0x0000 0360
+	.balign 32,0
+_pushpc:
+	loadsp 4
+	im 1
+	add 
+	storesp 8
+	poppc
+	
+# opcode 60
+# offset 0x0000 0380
+	.balign 32,0
+_syscall_emulate:
+	.byte 0
+	
+# opcode 61
+# offset 0x0000 03a0
+	.balign 32,0
+_pushspadd:
+	pushsp
+	im 4
+	add
+	loadsp 8
+	addsp 0
+	addsp 0
+	add
+	storesp 8
+	
+	poppc
+
+# opcode 62
+# offset 0x0000 03c0
+	.balign 32,0
+_halfmult:
+	breakpoint
+	
+# opcode 63
+# offset 0x0000 03e0
+	.balign 32,0
+_callpcrel:
+	loadsp 4
+	loadsp 4
+	add
+	im -1
+	add
+	loadsp 4
+	
+	storesp 12	; return address
+	storesp 4 
+	pushsp		; this will flush the internal stack.
+	popsp
+	poppc
+
+	.text
+
+	
+
+
+_ashiftleftBegin:
+	.rept 0x1f
+	addsp 0
+	.endr
+_ashiftleftEnd:
+	storesp 12
+	storesp 4
+	poppc
+	
+_storebtail:
+	loadsp 12
+	im 0xff
+	and
+	loadsp 12
+	im 3
+	and
+
+	fast_neg
+	im 3
+	add
+	; x8
+	addsp 0
+	addsp 0
+	addsp 0
+
+	ashiftleft
+	 
+	or
+	
+	loadsp 8
+	im  ~0x3
+	and
+
+	store
+	
+	storesp 4
+	storesp 4
+	poppc
+	
+
+
+
+; NB! this is not an EMULATE instruction. It is a varargs fn.
+	.globl _syscall	
+_syscall:
+	syscall
+	poppc
+	
+_slowmultImpl:
+	
+	loadsp 8 ; A
+	loadsp 8 ; B
+	im 0 ; C
+
+.LmoreMult:
+	mult1bit
+	
+	; cutoff
+	loadsp 8
+	.byte (.LmoreMult-.Lbranch)&0x7f+0x80
+.Lbranch:
+	neqbranch
+
+	storesp 4
+	storesp 4
+	storesp 12
+	storesp 4
+	poppc
+	
+	.data
+	.balign 4,0
+_mask:
+	.long 0x00ffffff
+	.long 0xff00ffff
+	.long 0xffff00ff
+	.long 0xffffff00
+
+	
+	.globl _hardware
+_hardware:
+	.long 0
+	.globl _cpu_config
+_cpu_config:
+	.long 0
+