summaryrefslogtreecommitdiffstats
path: root/zpu/sw/startup/crt0.S
diff options
context:
space:
mode:
Diffstat (limited to 'zpu/sw/startup/crt0.S')
-rw-r--r--zpu/sw/startup/crt0.S957
1 files changed, 957 insertions, 0 deletions
diff --git a/zpu/sw/startup/crt0.S b/zpu/sw/startup/crt0.S
new file mode 100644
index 0000000..00870c4
--- /dev/null
+++ b/zpu/sw/startup/crt0.S
@@ -0,0 +1,957 @@
+/* Startup code for ZPU
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file with other programs, and to distribute
+those programs without any restriction coming from the use of this
+file. (The General Public License restrictions do apply in other
+respects; for example, they cover modification of the file, and
+distribution when not linked into another program.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+ .file "crt0.S"
+
+
+
+
+; .section ".fixed_vectors","ax"
+; KLUDGE!!! we remove the executable bit to avoid relaxation
+ .section ".fixed_vectors","a"
+
+; DANGER!!!!
+; we need to align these code sections to 32 bytes, which
+; means we must not use any assembler instructions that are relaxed
+; at linker time
+; DANGER!!!!
+
+ .macro fixedim value
+ im \value
+ .endm
+
+ .macro jsr address
+
+ im 0 ; save R0
+ load
+ im 4 ; save R1
+ load
+ im 8 ; save R2
+ load
+
+ fixedim \address
+ call
+
+ im 8
+ store ; restore R2
+ im 4
+ store ; restore R1
+ im 0
+ store ; restore R0
+ .endm
+
+
+ .macro jmp address
+ fixedim \address
+ poppc
+ .endm
+
+
+ .macro fast_neg
+ not
+ im 1
+ add
+ .endm
+
+ .macro cimpl funcname
+ ; save R0
+ im 0
+ load
+
+ ; save R1
+ im 4
+ load
+
+ ; save R2
+ im 8
+ load
+
+ loadsp 20
+ loadsp 20
+
+ fixedim \funcname
+ call
+
+ ; destroy arguments on stack
+ storesp 0
+ storesp 0
+
+ im 0
+ load
+
+ ; poke the result into the right slot
+ storesp 24
+
+ ; restore R2
+ im 8
+ store
+
+ ; restore R1
+ im 4
+ store
+
+ ; restore r0
+ im 0
+ store
+
+
+ storesp 4
+ poppc
+ .endm
+
+ .macro mult1bit
+ ; create mask of lowest bit in A
+ loadsp 8 ; A
+ im 1
+ and
+ im -1
+ add
+ not
+ loadsp 8 ; B
+ and
+ add ; accumulate in C
+
+ ; shift B left 1 bit
+ loadsp 4 ; B
+ addsp 0
+ storesp 8 ; B
+
+ ; shift A right 1 bit
+ loadsp 8 ; A
+ flip
+ addsp 0
+ flip
+ storesp 12 ; A
+ .endm
+
+
+
+/* vectors */
+ .balign 32,0
+# offset 0x0000 0000
+ .globl _start
+_start:
+ ; intSp must be 0 when we jump to _premain
+
+ im ZPU_ID
+ loadsp 0
+ im _cpu_config
+ store
+ config
+ jmp _premain
+
+
+
+ .balign 32,0
+# offset 0x0000 0020
+ .globl _zpu_interrupt_vector
+_zpu_interrupt_vector:
+ jsr _zpu_interrupt
+ poppc
+
+
+/* instruction emulation code */
+
+# opcode 34
+# offset 0x0000 0040
+ .balign 32,0
+_loadh:
+ loadsp 4
+ ; by not masking out bit 0, we cause a memory access error
+ ; on unaligned access
+ im ~0x2
+ and
+ load
+
+ ; mult 8
+ loadsp 8
+ im 3
+ and
+ fast_neg
+ im 2
+ add
+ im 3
+ ashiftleft
+ ; shift right addr&3 * 8
+ lshiftright
+ im 0xffff
+ and
+ storesp 8
+
+ poppc
+
+# opcode 35
+# offset 0x0000 0060
+ .balign 32,0
+_storeh:
+ loadsp 4
+ ; by not masking out bit 0, we cause a memory access error
+ ; on unaligned access
+ im ~0x2
+ and
+ load
+
+ ; mask
+ im 0xffff
+ loadsp 12
+ im 3
+ and
+ fast_neg
+ im 2
+ add
+ im 3
+ ashiftleft
+ ashiftleft
+ not
+
+ and
+
+ loadsp 12
+ im 0xffff
+
+ nop
+
+ fixedim _storehtail
+ poppc
+
+
+# opcode 36
+# offset 0x0000 0080
+ .balign 32,0
+_lessthan:
+ loadsp 8
+ fast_neg
+ loadsp 8
+ add
+
+ ; DANGER!!!!
+ ; 0x80000000 will overflow when negated, so we need to mask
+ ; the result above with the compare positive to negative
+ ; number case
+ loadsp 12
+ loadsp 12
+ not
+ and
+ not
+ and
+
+
+ ; handle case where we are comparing a negative number
+ ; and positve number. This can underflow. E.g. consider 0x8000000 < 0x1000
+ loadsp 12
+ not
+ loadsp 12
+ and
+
+ or
+
+
+
+ flip
+ im 1
+ and
+
+
+ storesp 12
+ storesp 4
+ poppc
+
+
+# opcode 37
+# offset 0x0000 00a0
+ .balign 32,0
+_lessthanorequal:
+ loadsp 8
+ loadsp 8
+ lessthan
+ loadsp 12
+ loadsp 12
+ eq
+ or
+
+ storesp 12
+ storesp 4
+ poppc
+
+
+# opcode 38
+# offset 0x0000 00c0
+ .balign 32,0
+_ulessthan:
+ ; fish up arguments
+ loadsp 4
+ loadsp 12
+
+ /* low: -1 if low bit dif is negative 0 otherwise: neg (not x&1 and (y&1))
+ x&1 y&1 neg (not x&1 and (y&1))
+ 1 1 0
+ 1 0 0
+ 0 1 -1
+ 0 0 0
+
+ */
+ loadsp 4
+ not
+ loadsp 4
+ and
+ im 1
+ and
+ neg
+
+
+ /* high: upper 31-bit diff is only wrong when diff is 0 and low=-1
+ high=x>>1 - y>>1 + low
+
+ extremes
+
+ 0000 - 1111:
+ low= neg(not 0 and 1) = 1111 (-1)
+ high=000+ neg(111) +low = 000 + 1001 + low = 1000
+ OK
+
+ 1111 - 0000
+ low=neg(not 1 and 0) = 0
+ high=111+neg(000) + low = 0111
+ OK
+
+
+ */
+ loadsp 8
+
+ flip
+ addsp 0
+ flip
+
+ loadsp 8
+
+ flip
+ addsp 0
+ flip
+
+ sub
+
+ ; if they are equal, then the last bit decides...
+ add
+
+ /* test if negative: result = flip(diff) & 1 */
+ flip
+ im 1
+ and
+
+ ; destroy a&b which are on stack
+ storesp 4
+ storesp 4
+
+ storesp 12
+ storesp 4
+ poppc
+
+# opcode 39
+# offset 0x0000 00e0
+ .balign 32,0
+_ulessthanorequal:
+ loadsp 8
+ loadsp 8
+ ulessthan
+ loadsp 12
+ loadsp 12
+ eq
+ or
+
+ storesp 12
+ storesp 4
+ poppc
+
+
+# opcode 40
+# offset 0x0000 0100
+ .balign 32,0
+ .globl _swap
+_swap:
+ breakpoint ; tbd
+
+# opcode 41
+# offset 0x0000 0120
+ .balign 32,0
+_slowmult:
+ im _slowmultImpl
+ poppc
+
+# opcode 42
+# offset 0x0000 0140
+ .balign 32,0
+_lshiftright:
+ loadsp 8
+ flip
+
+ loadsp 8
+ ashiftleft
+ flip
+
+ storesp 12
+ storesp 4
+
+ poppc
+
+
+# opcode 43
+# offset 0x0000 0160
+ .balign 32,0
+_ashiftleft:
+ loadsp 8
+
+ loadsp 8
+ im 0x1f
+ and
+ fast_neg
+ im _ashiftleftEnd
+ add
+ poppc
+
+
+
+# opcode 44
+# offset 0x0000 0180
+ .balign 32,0
+_ashiftright:
+ loadsp 8
+ loadsp 8
+ lshiftright
+
+ ; handle signed value
+ im -1
+ loadsp 12
+ im 0x1f
+ and
+ lshiftright
+ not ; now we have an integer on the stack with the signed
+ ; bits in the right position
+
+ ; mask these bits with the signed bit.
+ loadsp 16
+ not
+ flip
+ im 1
+ and
+ im -1
+ add
+
+ and
+
+ ; stuff in the signed bits...
+ or
+
+ ; store result into correct stack slot
+ storesp 12
+
+ ; move up return value
+ storesp 4
+ poppc
+
+# opcode 45
+# offset 0x0000 01a0
+ .balign 32,0
+_call:
+ ; fn
+ loadsp 4
+
+ ; return address
+ loadsp 4
+
+ ; store return address
+ storesp 12
+
+ ; fn to call
+ storesp 4
+
+ pushsp ; flush internal stack
+ popsp
+
+ poppc
+
+_storehtail:
+
+ and
+ loadsp 12
+ im 3
+ and
+ fast_neg
+ im 2
+ add
+ im 3
+ ashiftleft
+ nop
+ ashiftleft
+
+ or
+
+ loadsp 8
+ im ~0x3
+ and
+
+ store
+
+ storesp 4
+ storesp 4
+ poppc
+
+
+# opcode 46
+# offset 0x0000 01c0
+ .balign 32,0
+_eq:
+ loadsp 8
+ fast_neg
+ loadsp 8
+ add
+
+ not
+ loadsp 0
+ im 1
+ add
+ not
+ and
+ flip
+ im 1
+ and
+
+ storesp 12
+ storesp 4
+ poppc
+
+# opcode 47
+# offset 0x0000 01e0
+ .balign 32,0
+_neq:
+ loadsp 8
+ fast_neg
+ loadsp 8
+ add
+
+ not
+ loadsp 0
+ im 1
+ add
+ not
+ and
+ flip
+
+ not
+
+ im 1
+ and
+
+ storesp 12
+ storesp 4
+ poppc
+
+
+# opcode 48
+# offset 0x0000 0200
+ .balign 32,0
+_neg:
+ loadsp 4
+ not
+ im 1
+ add
+ storesp 8
+
+ poppc
+
+
+# opcode 49
+# offset 0x0000 0220
+ .balign 32,0
+_sub:
+ loadsp 8
+ loadsp 8
+ fast_neg
+ add
+ storesp 12
+
+ storesp 4
+
+ poppc
+
+
+# opcode 50
+# offset 0x0000 0240
+ .balign 32,0
+_xor:
+ loadsp 8
+ not
+ loadsp 8
+ and
+
+ loadsp 12
+ loadsp 12
+ not
+ and
+
+ or
+
+ storesp 12
+ storesp 4
+ poppc
+
+# opcode 51
+# offset 0x0000 0260
+ .balign 32,0
+_loadb:
+ loadsp 4
+ im ~0x3
+ and
+ load
+
+ loadsp 8
+ im 3
+ and
+ fast_neg
+ im 3
+ add
+ ; x8
+ addsp 0
+ addsp 0
+ addsp 0
+
+ lshiftright
+
+ im 0xff
+ and
+ storesp 8
+
+ poppc
+
+
+# opcode 52
+# offset 0x0000 0280
+ .balign 32,0
+_storeb:
+ loadsp 4
+ im ~0x3
+ and
+ load
+
+ ; mask away destination
+ im _mask
+ loadsp 12
+ im 3
+ and
+ addsp 0
+ addsp 0
+ add
+ load
+
+ and
+
+
+ im _storebtail
+ poppc
+
+# opcode 53
+# offset 0x0000 02a0
+ .balign 32,0
+_div:
+ cimpl __divsi3
+
+# opcode 54
+# offset 0x0000 02c0
+ .balign 32,0
+_mod:
+ cimpl __modsi3
+
+# opcode 55
+# offset 0x0000 02e0
+ .balign 32,0
+ .globl _eqbranch
+_eqbranch:
+ loadsp 8
+
+ ; eq
+
+ not
+ loadsp 0
+ im 1
+ add
+ not
+ and
+ flip
+ im 1
+ and
+
+ ; mask
+ im -1
+ add
+ loadsp 0
+ storesp 16
+
+ ; no branch address
+ loadsp 4
+
+ and
+
+ ; fetch boolean & neg mask
+ loadsp 12
+ not
+
+ ; calc address & mask for branch
+ loadsp 8
+ loadsp 16
+ add
+ ; subtract 1 to find PC of branch instruction
+ im -1
+ add
+
+ and
+
+ or
+
+ storesp 4
+ storesp 4
+ storesp 4
+ poppc
+
+
+# opcode 56
+# offset 0x0000 0300
+ .balign 32,0
+ .globl _neqbranch
+_neqbranch:
+ loadsp 8
+
+ ; neq
+
+ not
+ loadsp 0
+ im 1
+ add
+ not
+ and
+ flip
+
+ not
+
+ im 1
+ and
+
+ ; mask
+ im -1
+ add
+ loadsp 0
+ storesp 16
+
+ ; no branch address
+ loadsp 4
+
+ and
+
+ ; fetch boolean & neg mask
+ loadsp 12
+ not
+
+ ; calc address & mask for branch
+ loadsp 8
+ loadsp 16
+ add
+ ; find address of branch instruction
+ im -1
+ add
+
+ and
+
+ or
+
+ storesp 4
+ storesp 4
+ storesp 4
+ poppc
+
+# opcode 57
+# offset 0x0000 0320
+ .balign 32,0
+ .globl _poppcrel
+_poppcrel:
+ add
+ ; address of poppcrel
+ im -1
+ add
+ poppc
+
+# opcode 58
+# offset 0x0000 0340
+ .balign 32,0
+ .globl _config
+_config:
+ im 1
+ nop
+ im _hardware
+ store
+ storesp 4
+ poppc
+
+# opcode 59
+# offset 0x0000 0360
+ .balign 32,0
+_pushpc:
+ loadsp 4
+ im 1
+ add
+ storesp 8
+ poppc
+
+# opcode 60
+# offset 0x0000 0380
+ .balign 32,0
+_syscall_emulate:
+ .byte 0
+
+# opcode 61
+# offset 0x0000 03a0
+ .balign 32,0
+_pushspadd:
+ pushsp
+ im 4
+ add
+ loadsp 8
+ addsp 0
+ addsp 0
+ add
+ storesp 8
+
+ poppc
+
+# opcode 62
+# offset 0x0000 03c0
+ .balign 32,0
+_halfmult:
+ breakpoint
+
+# opcode 63
+# offset 0x0000 03e0
+ .balign 32,0
+_callpcrel:
+ loadsp 4
+ loadsp 4
+ add
+ im -1
+ add
+ loadsp 4
+
+ storesp 12 ; return address
+ storesp 4
+ pushsp ; this will flush the internal stack.
+ popsp
+ poppc
+
+ .text
+
+
+
+
+_ashiftleftBegin:
+ .rept 0x1f
+ addsp 0
+ .endr
+_ashiftleftEnd:
+ storesp 12
+ storesp 4
+ poppc
+
+_storebtail:
+ loadsp 12
+ im 0xff
+ and
+ loadsp 12
+ im 3
+ and
+
+ fast_neg
+ im 3
+ add
+ ; x8
+ addsp 0
+ addsp 0
+ addsp 0
+
+ ashiftleft
+
+ or
+
+ loadsp 8
+ im ~0x3
+ and
+
+ store
+
+ storesp 4
+ storesp 4
+ poppc
+
+
+
+
+; NB! this is not an EMULATE instruction. It is a varargs fn.
+ .globl _syscall
+_syscall:
+ syscall
+ poppc
+
+_slowmultImpl:
+
+ loadsp 8 ; A
+ loadsp 8 ; B
+ im 0 ; C
+
+.LmoreMult:
+ mult1bit
+
+ ; cutoff
+ loadsp 8
+ .byte (.LmoreMult-.Lbranch)&0x7f+0x80
+.Lbranch:
+ neqbranch
+
+ storesp 4
+ storesp 4
+ storesp 12
+ storesp 4
+ poppc
+
+ .data
+ .balign 4,0
+_mask:
+ .long 0x00ffffff
+ .long 0xff00ffff
+ .long 0xffff00ff
+ .long 0xffffff00
+
+
+ .globl _hardware
+_hardware:
+ .long 0
+ .globl _cpu_config
+_cpu_config:
+ .long 0
+
OpenPOWER on IntegriCloud