summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--zpu/docs/zpu_arch.html6
-rw-r--r--zpu/sw/startup/nextgen_crt0.S894
2 files changed, 900 insertions, 0 deletions
diff --git a/zpu/docs/zpu_arch.html b/zpu/docs/zpu_arch.html
index d8d982d..21b7df5 100644
--- a/zpu/docs/zpu_arch.html
+++ b/zpu/docs/zpu_arch.html
@@ -1335,6 +1335,12 @@ pushed onto stack before jumping to unkonwn instruction vector. This makes it po
to write denser microcode for missing instructions. For emulated opcodes that are
not in use, the microcode can more easily be disabled. Determining
that e.g. MULT is not used, can be a bit tricky, but disabling it is easy.
+<p>
+The address of this entry will be 0x10. The reason 0x00 is not used is that
+GCC needs 0x00-0x0b inclusive to store R0-R2(memory mapped GCC registers).
+The reset vector remains 0x0 so the 0x00-0x0f addresses contains the
+first few instructions executed by the ZPU. Some very early work has been
+done in <a href="../sw/startup/nextgen_crt0.S"> nextgen_crt0.S</a>.
<li>Single entry for *all* unknown instructions does not limit emulation to the
EMULATE instructions today, but instructions such as OR, LOADSP, STORESP, ADDSP,
etc. can also be emulated. This opens up for further reduction in logic usage.
diff --git a/zpu/sw/startup/nextgen_crt0.S b/zpu/sw/startup/nextgen_crt0.S
new file mode 100644
index 0000000..3cf9112
--- /dev/null
+++ b/zpu/sw/startup/nextgen_crt0.S
@@ -0,0 +1,894 @@
+/* Startup code for ZPU
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file with other programs, and to distribute
+those programs without any restriction coming from the use of this
+file. (The General Public License restrictions do apply in other
+respects; for example, they cover modification of the file, and
+distribution when not linked into another program.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+ .file "crt0.S"
+
+ .section ".fixed_vectors","ax"
+
+
+
+
+ .macro fast_neg
+ not
+ im 1
+ add
+ .endm
+
+ .macro cimpl funcname
+ ; save R0
+ im 0
+ load
+
+ ; save R1
+ im 4
+ load
+
+ ; save R2
+ im 8
+ load
+
+ loadsp 20
+ loadsp 20
+
+ fixedim \funcname
+ call
+
+ ; destroy arguments on stack
+ storesp 0
+ storesp 0
+
+ im 0
+ load
+
+ ; poke the result into the right slot
+ storesp 24
+
+ ; restore R2
+ im 8
+ store
+
+ ; restore R1
+ im 4
+ store
+
+ ; restore r0
+ im 0
+ store
+
+ storesp 4
+ poppc
+ .endm
+
+
+
+
+/* vectors */
+ .balign 32,0
+# offset 0x0000 0000
+ .globl _start
+_start:
+ im _premain
+ poppc
+
+ .balign 16,0
+# offset 0x0000 0010
+ .globl _zpu_unknown_instruction_vector
+_zpu_unknown_instruction:
+ /* We have unsupported instruction * 4 on stack */
+ im _emulate_table
+ add
+ load
+ poppc
+
+ .rodata
+ .balign 4,0
+_emulate_table:
+ .long _storeh
+ .long ...
+
+/* instruction emulation code */
+
+# opcode 34
+# offset 0x0000 0040
+ .balign 32,0
+_loadh:
+ loadsp 4
+ ; by not masking out bit 0, we cause a memory access error
+ ; on unaligned access
+ im ~0x2
+ and
+ load
+
+ ; mult 8
+ loadsp 8
+ im 3
+ and
+ fast_neg
+ im 2
+ add
+ im 3
+ ashiftleft
+ ; shift right addr&3 * 8
+ lshiftright
+ im 0xffff
+ and
+ storesp 8
+
+ poppc
+
+# opcode 35
+# offset 0x0000 0060
+ .balign 32,0
+_storeh:
+ loadsp 4
+ ; by not masking out bit 0, we cause a memory access error
+ ; on unaligned access
+ im ~0x2
+ and
+ load
+
+ ; mask
+ im 0xffff
+ loadsp 12
+ im 3
+ and
+ fast_neg
+ im 2
+ add
+ im 3
+ ashiftleft
+ ashiftleft
+ not
+
+ and
+
+ loadsp 12
+ im 0xffff
+
+ nop
+
+ fixedim _storehtail
+ poppc
+
+
+# opcode 36
+# offset 0x0000 0080
+ .balign 32,0
+_lessthan:
+ loadsp 8
+ fast_neg
+ loadsp 8
+ add
+
+ ; DANGER!!!!
+ ; 0x80000000 will overflow when negated, so we need to mask
+ ; the result above with the compare positive to negative
+ ; number case
+ loadsp 12
+ loadsp 12
+ not
+ and
+ not
+ and
+
+
+ ; handle case where we are comparing a negative number
+ ; and positve number. This can underflow. E.g. consider 0x8000000 < 0x1000
+ loadsp 12
+ not
+ loadsp 12
+ and
+
+ or
+
+
+
+ flip
+ im 1
+ and
+
+
+ storesp 12
+ storesp 4
+ poppc
+
+
+# opcode 37
+# offset 0x0000 00a0
+ .balign 32,0
+_lessthanorequal:
+ loadsp 8
+ loadsp 8
+ lessthan
+ loadsp 12
+ loadsp 12
+ eq
+ or
+
+ storesp 12
+ storesp 4
+ poppc
+
+
+# opcode 38
+# offset 0x0000 00c0
+ .balign 32,0
+_ulessthan:
+ ; fish up arguments
+ loadsp 4
+ loadsp 12
+
+ /* low: -1 if low bit dif is negative 0 otherwise: neg (not x&1 and (y&1))
+ x&1 y&1 neg (not x&1 and (y&1))
+ 1 1 0
+ 1 0 0
+ 0 1 -1
+ 0 0 0
+
+ */
+ loadsp 4
+ not
+ loadsp 4
+ and
+ im 1
+ and
+ neg
+
+
+ /* high: upper 31-bit diff is only wrong when diff is 0 and low=-1
+ high=x>>1 - y>>1 + low
+
+ extremes
+
+ 0000 - 1111:
+ low= neg(not 0 and 1) = 1111 (-1)
+ high=000+ neg(111) +low = 000 + 1001 + low = 1000
+ OK
+
+ 1111 - 0000
+ low=neg(not 1 and 0) = 0
+ high=111+neg(000) + low = 0111
+ OK
+
+
+ */
+ loadsp 8
+
+ flip
+ addsp 0
+ flip
+
+ loadsp 8
+
+ flip
+ addsp 0
+ flip
+
+ sub
+
+ ; if they are equal, then the last bit decides...
+ add
+
+ /* test if negative: result = flip(diff) & 1 */
+ flip
+ im 1
+ and
+
+ ; destroy a&b which are on stack
+ storesp 4
+ storesp 4
+
+ storesp 12
+ storesp 4
+ poppc
+
+# opcode 39
+# offset 0x0000 00e0
+ .balign 32,0
+_ulessthanorequal:
+ loadsp 8
+ loadsp 8
+ ulessthan
+ loadsp 12
+ loadsp 12
+ eq
+ or
+
+ storesp 12
+ storesp 4
+ poppc
+
+
+# opcode 40
+# offset 0x0000 0100
+ .balign 32,0
+ .globl _swap
+_swap:
+ breakpoint ; tbd
+
+# opcode 41
+# offset 0x0000 0120
+ .balign 32,0
+_slowmult:
+ im _slowmultImpl
+ poppc
+
+# opcode 42
+# offset 0x0000 0140
+ .balign 32,0
+_lshiftright:
+ loadsp 8
+ flip
+
+ loadsp 8
+ ashiftleft
+ flip
+
+ storesp 12
+ storesp 4
+
+ poppc
+
+
+# opcode 43
+# offset 0x0000 0160
+ .balign 32,0
+_ashiftleft:
+ loadsp 8
+
+ loadsp 8
+ im 0x1f
+ and
+ fast_neg
+ im _ashiftleftEnd
+ add
+ poppc
+
+
+
+# opcode 44
+# offset 0x0000 0180
+ .balign 32,0
+_ashiftright:
+ loadsp 8
+ loadsp 8
+ lshiftright
+
+ ; handle signed value
+ im -1
+ loadsp 12
+ im 0x1f
+ and
+ lshiftright
+ not ; now we have an integer on the stack with the signed
+ ; bits in the right position
+
+ ; mask these bits with the signed bit.
+ loadsp 16
+ not
+ flip
+ im 1
+ and
+ im -1
+ add
+
+ and
+
+ ; stuff in the signed bits...
+ or
+
+ ; store result into correct stack slot
+ storesp 12
+
+ ; move up return value
+ storesp 4
+ poppc
+
+# opcode 45
+# offset 0x0000 01a0
+ .balign 32,0
+_call:
+ ; fn
+ loadsp 4
+
+ ; return address
+ loadsp 4
+
+ ; store return address
+ storesp 12
+
+ ; fn to call
+ storesp 4
+
+ pushsp ; flush internal stack
+ popsp
+
+ poppc
+
+_storehtail:
+
+ and
+ loadsp 12
+ im 3
+ and
+ fast_neg
+ im 2
+ add
+ im 3
+ ashiftleft
+ nop
+ ashiftleft
+
+ or
+
+ loadsp 8
+ im ~0x3
+ and
+
+ store
+
+ storesp 4
+ storesp 4
+ poppc
+
+
+# opcode 46
+# offset 0x0000 01c0
+ .balign 32,0
+_eq:
+ loadsp 8
+ fast_neg
+ loadsp 8
+ add
+
+ not
+ loadsp 0
+ im 1
+ add
+ not
+ and
+ flip
+ im 1
+ and
+
+ storesp 12
+ storesp 4
+ poppc
+
+# opcode 47
+# offset 0x0000 01e0
+ .balign 32,0
+_neq:
+ loadsp 8
+ fast_neg
+ loadsp 8
+ add
+
+ not
+ loadsp 0
+ im 1
+ add
+ not
+ and
+ flip
+
+ not
+
+ im 1
+ and
+
+ storesp 12
+ storesp 4
+ poppc
+
+
+# opcode 48
+# offset 0x0000 0200
+ .balign 32,0
+_neg:
+ loadsp 4
+ not
+ im 1
+ add
+ storesp 8
+
+ poppc
+
+
+# opcode 49
+# offset 0x0000 0220
+ .balign 32,0
+_sub:
+ loadsp 8
+ loadsp 8
+ fast_neg
+ add
+ storesp 12
+
+ storesp 4
+
+ poppc
+
+
+# opcode 50
+# offset 0x0000 0240
+ .balign 32,0
+_xor:
+ loadsp 8
+ not
+ loadsp 8
+ and
+
+ loadsp 12
+ loadsp 12
+ not
+ and
+
+ or
+
+ storesp 12
+ storesp 4
+ poppc
+
+# opcode 51
+# offset 0x0000 0260
+ .balign 32,0
+_loadb:
+ loadsp 4
+ im ~0x3
+ and
+ load
+
+ loadsp 8
+ im 3
+ and
+ fast_neg
+ im 3
+ add
+ ; x8
+ addsp 0
+ addsp 0
+ addsp 0
+
+ lshiftright
+
+ im 0xff
+ and
+ storesp 8
+
+ poppc
+
+
+# opcode 52
+# offset 0x0000 0280
+ .balign 32,0
+_storeb:
+ loadsp 4
+ im ~0x3
+ and
+ load
+
+ ; mask away destination
+ im _mask
+ loadsp 12
+ im 3
+ and
+ addsp 0
+ addsp 0
+ add
+ load
+
+ and
+
+
+ im _storebtail
+ poppc
+
+# opcode 53
+# offset 0x0000 02a0
+ .balign 32,0
+_div:
+ cimpl __divsi3
+
+# opcode 54
+# offset 0x0000 02c0
+ .balign 32,0
+_mod:
+ cimpl __modsi3
+
+# opcode 55
+# offset 0x0000 02e0
+ .balign 32,0
+ .globl _eqbranch
+_eqbranch:
+ loadsp 8
+
+ ; eq
+
+ not
+ loadsp 0
+ im 1
+ add
+ not
+ and
+ flip
+ im 1
+ and
+
+ ; mask
+ im -1
+ add
+ loadsp 0
+ storesp 16
+
+ ; no branch address
+ loadsp 4
+
+ and
+
+ ; fetch boolean & neg mask
+ loadsp 12
+ not
+
+ ; calc address & mask for branch
+ loadsp 8
+ loadsp 16
+ add
+ ; subtract 1 to find PC of branch instruction
+ im -1
+ add
+
+ and
+
+ or
+
+ storesp 4
+ storesp 4
+ storesp 4
+ poppc
+
+
+# opcode 56
+# offset 0x0000 0300
+ .balign 32,0
+ .globl _neqbranch
+_neqbranch:
+ loadsp 8
+
+ ; neq
+
+ not
+ loadsp 0
+ im 1
+ add
+ not
+ and
+ flip
+
+ not
+
+ im 1
+ and
+
+ ; mask
+ im -1
+ add
+ loadsp 0
+ storesp 16
+
+ ; no branch address
+ loadsp 4
+
+ and
+
+ ; fetch boolean & neg mask
+ loadsp 12
+ not
+
+ ; calc address & mask for branch
+ loadsp 8
+ loadsp 16
+ add
+ ; find address of branch instruction
+ im -1
+ add
+
+ and
+
+ or
+
+ storesp 4
+ storesp 4
+ storesp 4
+ poppc
+
+# opcode 57
+# offset 0x0000 0320
+ .balign 32,0
+ .globl _poppcrel
+_poppcrel:
+ add
+ ; address of poppcrel
+ im -1
+ add
+ poppc
+
+# opcode 58
+# offset 0x0000 0340
+ .balign 32,0
+ .globl _config
+_config:
+ im 1
+ nop
+ im _hardware
+ store
+ storesp 4
+ poppc
+
+# opcode 59
+# offset 0x0000 0360
+ .balign 32,0
+_pushpc:
+ loadsp 4
+ im 1
+ add
+ storesp 8
+ poppc
+
+# opcode 60
+# offset 0x0000 0380
+ .balign 32,0
+_syscall_emulate:
+ .byte 0
+
+# opcode 61
+# offset 0x0000 03a0
+ .balign 32,0
+_pushspadd:
+ pushsp
+ im 4
+ add
+ loadsp 8
+ addsp 0
+ addsp 0
+ add
+ storesp 8
+
+ poppc
+
+# opcode 62
+# offset 0x0000 03c0
+ .balign 32,0
+_halfmult:
+ breakpoint
+
+# opcode 63
+# offset 0x0000 03e0
+ .balign 32,0
+_callpcrel:
+ loadsp 4
+ loadsp 4
+ add
+ im -1
+ add
+ loadsp 4
+
+ storesp 12 ; return address
+ storesp 4
+ pushsp ; this will flush the internal stack.
+ popsp
+ poppc
+
+ .text
+
+
+
+
+_ashiftleftBegin:
+ .rept 0x1f
+ addsp 0
+ .endr
+_ashiftleftEnd:
+ storesp 12
+ storesp 4
+ poppc
+
+_storebtail:
+ loadsp 12
+ im 0xff
+ and
+ loadsp 12
+ im 3
+ and
+
+ fast_neg
+ im 3
+ add
+ ; x8
+ addsp 0
+ addsp 0
+ addsp 0
+
+ ashiftleft
+
+ or
+
+ loadsp 8
+ im ~0x3
+ and
+
+ store
+
+ storesp 4
+ storesp 4
+ poppc
+
+
+
+
+; NB! this is not an EMULATE instruction. It is a varargs fn.
+ .globl _syscall
+_syscall:
+ syscall
+ poppc
+
+_slowmultImpl:
+
+ loadsp 8 ; A
+ loadsp 8 ; B
+ im 0 ; C
+
+.LmoreMult:
+ mult1bit
+
+ ; cutoff
+ loadsp 8
+ .byte (.LmoreMult-.Lbranch)&0x7f+0x80
+.Lbranch:
+ neqbranch
+
+ storesp 4
+ storesp 4
+ storesp 12
+ storesp 4
+ poppc
+
+ .data
+ .balign 4,0
+_mask:
+ .long 0x00ffffff
+ .long 0xff00ffff
+ .long 0xffff00ff
+ .long 0xffffff00
+
+
+ .globl _hardware
+_hardware:
+ .long 0
+ .globl _cpu_config
+_cpu_config:
+ .long 0
+
+; Pointers to emulated instructions
OpenPOWER on IntegriCloud