summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoroharboe <oharboe>2008-08-18 12:00:34 +0000
committeroharboe <oharboe>2008-08-18 12:00:34 +0000
commit431a1bf775d468bcd788c3dd716b97cc0fca1f34 (patch)
treed327dae59d556908d6b90f6de2d2cf199ca8c323
parent3c919f795b78bacdb9b3b7396ac5761f7457224a (diff)
downloadzpu-431a1bf775d468bcd788c3dd716b97cc0fca1f34.zip
zpu-431a1bf775d468bcd788c3dd716b97cc0fca1f34.tar.gz
* duplicated crt0.s and some other stuff from libgloss into
sw/startup. This makes it easier to tinker w/startup code.
-rw-r--r--zpu/ChangeLog3
-rw-r--r--zpu/docs/zpu_arch.html16
-rw-r--r--zpu/sw/startup/crt0.S957
-rw-r--r--zpu/sw/startup/crt_io.c91
-rw-r--r--zpu/sw/startup/time.c32
5 files changed, 1098 insertions, 1 deletions
diff --git a/zpu/ChangeLog b/zpu/ChangeLog
index 88bc650..02adb2c 100644
--- a/zpu/ChangeLog
+++ b/zpu/ChangeLog
@@ -1,3 +1,6 @@
+2008-08-18 Øyvind Harboe
+ * duplicated crt0.s and some other stuff from libgloss into
+ sw/startup. This makes it easier to tinker w/startup code.
2008-08-08 Salvador E. Tropea
* zpu/hdl/zpu4/core/histogram.perl - generate opcode histogram from
HDL simulation output
diff --git a/zpu/docs/zpu_arch.html b/zpu/docs/zpu_arch.html
index ccbd0df..d8d982d 100644
--- a/zpu/docs/zpu_arch.html
+++ b/zpu/docs/zpu_arch.html
@@ -5,6 +5,7 @@
<li> <a href="#started">Getting started</a>
<li> <a href="#introduction">Introduction</a>
<li> <a href="#instructionset">Instruction set</a>
+<li> <a href="#startup">Custom startup code (aka crt0.s)</a>
<li> <a href="#implementing">Implementing your own ZPU</a>
<li> <a href="#vectors">Jump vectors</a>
<li> <a href="#memorymap">Memory map</a>
@@ -817,7 +818,20 @@ int address = pop();<br>
</table>
-
+<a name="startup"/>
+<h1>Custom startup code (aka crt0.s)</h1>
+To minimize the size of an application, one important trick is to
+strip down the startup code. The startup code contains emulation
+of instructions that may never be used by a particular application.
+<p>
+The startup code is found in the GCC source code under gcc/libgloss/zpu,
+but to make the startup code more available, it has been duplicated
+into <a href="../sw/startup">zpu/sw/startup</a>
+<p>
+To minimize startup size, see <a href="../roadshow/roadshow/codesize/index.html">codesize</a>
+demo. This is pretty standard GCC stuff and simple enough once you've
+been over it a couple of times.
+
<a name="implementing"/>
<h1>Implementing your own ZPU</h1>
One of the neat things about the ZPU is that the instruction set and architecture
diff --git a/zpu/sw/startup/crt0.S b/zpu/sw/startup/crt0.S
new file mode 100644
index 0000000..00870c4
--- /dev/null
+++ b/zpu/sw/startup/crt0.S
@@ -0,0 +1,957 @@
+/* Startup code for ZPU
+ Copyright (C) 2005 Free Software Foundation, Inc.
+
+This file is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 2, or (at your option) any
+later version.
+
+In addition to the permissions in the GNU General Public License, the
+Free Software Foundation gives you unlimited permission to link the
+compiled version of this file with other programs, and to distribute
+those programs without any restriction coming from the use of this
+file. (The General Public License restrictions do apply in other
+respects; for example, they cover modification of the file, and
+distribution when not linked into another program.)
+
+This file is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; see the file COPYING. If not, write to
+the Free Software Foundation, 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
+ .file "crt0.S"
+
+
+
+
+; .section ".fixed_vectors","ax"
+; KLUDGE!!! we remove the executable bit to avoid relaxation
+ .section ".fixed_vectors","a"
+
+; DANGER!!!!
+; we need to align these code sections to 32 bytes, which
+; means we must not use any assembler instructions that are relaxed
+; at linker time
+; DANGER!!!!
+
+ .macro fixedim value
+ im \value
+ .endm
+
+ .macro jsr address
+
+ im 0 ; save R0
+ load
+ im 4 ; save R1
+ load
+ im 8 ; save R2
+ load
+
+ fixedim \address
+ call
+
+ im 8
+ store ; restore R2
+ im 4
+ store ; restore R1
+ im 0
+ store ; restore R0
+ .endm
+
+
+ .macro jmp address
+ fixedim \address
+ poppc
+ .endm
+
+
+ .macro fast_neg
+ not
+ im 1
+ add
+ .endm
+
+ .macro cimpl funcname
+ ; save R0
+ im 0
+ load
+
+ ; save R1
+ im 4
+ load
+
+ ; save R2
+ im 8
+ load
+
+ loadsp 20
+ loadsp 20
+
+ fixedim \funcname
+ call
+
+ ; destroy arguments on stack
+ storesp 0
+ storesp 0
+
+ im 0
+ load
+
+ ; poke the result into the right slot
+ storesp 24
+
+ ; restore R2
+ im 8
+ store
+
+ ; restore R1
+ im 4
+ store
+
+ ; restore r0
+ im 0
+ store
+
+
+ storesp 4
+ poppc
+ .endm
+
+ .macro mult1bit
+ ; create mask of lowest bit in A
+ loadsp 8 ; A
+ im 1
+ and
+ im -1
+ add
+ not
+ loadsp 8 ; B
+ and
+ add ; accumulate in C
+
+ ; shift B left 1 bit
+ loadsp 4 ; B
+ addsp 0
+ storesp 8 ; B
+
+ ; shift A right 1 bit
+ loadsp 8 ; A
+ flip
+ addsp 0
+ flip
+ storesp 12 ; A
+ .endm
+
+
+
+/* vectors */
+ .balign 32,0
+# offset 0x0000 0000
+ .globl _start
+_start:
+ ; intSp must be 0 when we jump to _premain
+
+ im ZPU_ID
+ loadsp 0
+ im _cpu_config
+ store
+ config
+ jmp _premain
+
+
+
+ .balign 32,0
+# offset 0x0000 0020
+ .globl _zpu_interrupt_vector
+_zpu_interrupt_vector:
+ jsr _zpu_interrupt
+ poppc
+
+
+/* instruction emulation code */
+
+# opcode 34
+# offset 0x0000 0040
+ .balign 32,0
+_loadh:
+ loadsp 4
+ ; by not masking out bit 0, we cause a memory access error
+ ; on unaligned access
+ im ~0x2
+ and
+ load
+
+ ; mult 8
+ loadsp 8
+ im 3
+ and
+ fast_neg
+ im 2
+ add
+ im 3
+ ashiftleft
+ ; shift right addr&3 * 8
+ lshiftright
+ im 0xffff
+ and
+ storesp 8
+
+ poppc
+
+# opcode 35
+# offset 0x0000 0060
+ .balign 32,0
+_storeh:
+ loadsp 4
+ ; by not masking out bit 0, we cause a memory access error
+ ; on unaligned access
+ im ~0x2
+ and
+ load
+
+ ; mask
+ im 0xffff
+ loadsp 12
+ im 3
+ and
+ fast_neg
+ im 2
+ add
+ im 3
+ ashiftleft
+ ashiftleft
+ not
+
+ and
+
+ loadsp 12
+ im 0xffff
+
+ nop
+
+ fixedim _storehtail
+ poppc
+
+
+# opcode 36
+# offset 0x0000 0080
+ .balign 32,0
+_lessthan:
+ loadsp 8
+ fast_neg
+ loadsp 8
+ add
+
+ ; DANGER!!!!
+ ; 0x80000000 will overflow when negated, so we need to mask
+ ; the result above with the compare positive to negative
+ ; number case
+ loadsp 12
+ loadsp 12
+ not
+ and
+ not
+ and
+
+
+ ; handle case where we are comparing a negative number
+ ; and positve number. This can underflow. E.g. consider 0x8000000 < 0x1000
+ loadsp 12
+ not
+ loadsp 12
+ and
+
+ or
+
+
+
+ flip
+ im 1
+ and
+
+
+ storesp 12
+ storesp 4
+ poppc
+
+
+# opcode 37
+# offset 0x0000 00a0
+ .balign 32,0
+_lessthanorequal:
+ loadsp 8
+ loadsp 8
+ lessthan
+ loadsp 12
+ loadsp 12
+ eq
+ or
+
+ storesp 12
+ storesp 4
+ poppc
+
+
+# opcode 38
+# offset 0x0000 00c0
+ .balign 32,0
+_ulessthan:
+ ; fish up arguments
+ loadsp 4
+ loadsp 12
+
+ /* low: -1 if low bit dif is negative 0 otherwise: neg (not x&1 and (y&1))
+ x&1 y&1 neg (not x&1 and (y&1))
+ 1 1 0
+ 1 0 0
+ 0 1 -1
+ 0 0 0
+
+ */
+ loadsp 4
+ not
+ loadsp 4
+ and
+ im 1
+ and
+ neg
+
+
+ /* high: upper 31-bit diff is only wrong when diff is 0 and low=-1
+ high=x>>1 - y>>1 + low
+
+ extremes
+
+ 0000 - 1111:
+ low= neg(not 0 and 1) = 1111 (-1)
+ high=000+ neg(111) +low = 000 + 1001 + low = 1000
+ OK
+
+ 1111 - 0000
+ low=neg(not 1 and 0) = 0
+ high=111+neg(000) + low = 0111
+ OK
+
+
+ */
+ loadsp 8
+
+ flip
+ addsp 0
+ flip
+
+ loadsp 8
+
+ flip
+ addsp 0
+ flip
+
+ sub
+
+ ; if they are equal, then the last bit decides...
+ add
+
+ /* test if negative: result = flip(diff) & 1 */
+ flip
+ im 1
+ and
+
+ ; destroy a&b which are on stack
+ storesp 4
+ storesp 4
+
+ storesp 12
+ storesp 4
+ poppc
+
+# opcode 39
+# offset 0x0000 00e0
+ .balign 32,0
+_ulessthanorequal:
+ loadsp 8
+ loadsp 8
+ ulessthan
+ loadsp 12
+ loadsp 12
+ eq
+ or
+
+ storesp 12
+ storesp 4
+ poppc
+
+
+# opcode 40
+# offset 0x0000 0100
+ .balign 32,0
+ .globl _swap
+_swap:
+ breakpoint ; tbd
+
+# opcode 41
+# offset 0x0000 0120
+ .balign 32,0
+_slowmult:
+ im _slowmultImpl
+ poppc
+
+# opcode 42
+# offset 0x0000 0140
+ .balign 32,0
+_lshiftright:
+ loadsp 8
+ flip
+
+ loadsp 8
+ ashiftleft
+ flip
+
+ storesp 12
+ storesp 4
+
+ poppc
+
+
+# opcode 43
+# offset 0x0000 0160
+ .balign 32,0
+_ashiftleft:
+ loadsp 8
+
+ loadsp 8
+ im 0x1f
+ and
+ fast_neg
+ im _ashiftleftEnd
+ add
+ poppc
+
+
+
+# opcode 44
+# offset 0x0000 0180
+ .balign 32,0
+_ashiftright:
+ loadsp 8
+ loadsp 8
+ lshiftright
+
+ ; handle signed value
+ im -1
+ loadsp 12
+ im 0x1f
+ and
+ lshiftright
+ not ; now we have an integer on the stack with the signed
+ ; bits in the right position
+
+ ; mask these bits with the signed bit.
+ loadsp 16
+ not
+ flip
+ im 1
+ and
+ im -1
+ add
+
+ and
+
+ ; stuff in the signed bits...
+ or
+
+ ; store result into correct stack slot
+ storesp 12
+
+ ; move up return value
+ storesp 4
+ poppc
+
+# opcode 45
+# offset 0x0000 01a0
+ .balign 32,0
+_call:
+ ; fn
+ loadsp 4
+
+ ; return address
+ loadsp 4
+
+ ; store return address
+ storesp 12
+
+ ; fn to call
+ storesp 4
+
+ pushsp ; flush internal stack
+ popsp
+
+ poppc
+
+_storehtail:
+
+ and
+ loadsp 12
+ im 3
+ and
+ fast_neg
+ im 2
+ add
+ im 3
+ ashiftleft
+ nop
+ ashiftleft
+
+ or
+
+ loadsp 8
+ im ~0x3
+ and
+
+ store
+
+ storesp 4
+ storesp 4
+ poppc
+
+
+# opcode 46
+# offset 0x0000 01c0
+ .balign 32,0
+_eq:
+ loadsp 8
+ fast_neg
+ loadsp 8
+ add
+
+ not
+ loadsp 0
+ im 1
+ add
+ not
+ and
+ flip
+ im 1
+ and
+
+ storesp 12
+ storesp 4
+ poppc
+
+# opcode 47
+# offset 0x0000 01e0
+ .balign 32,0
+_neq:
+ loadsp 8
+ fast_neg
+ loadsp 8
+ add
+
+ not
+ loadsp 0
+ im 1
+ add
+ not
+ and
+ flip
+
+ not
+
+ im 1
+ and
+
+ storesp 12
+ storesp 4
+ poppc
+
+
+# opcode 48
+# offset 0x0000 0200
+ .balign 32,0
+_neg:
+ loadsp 4
+ not
+ im 1
+ add
+ storesp 8
+
+ poppc
+
+
+# opcode 49
+# offset 0x0000 0220
+ .balign 32,0
+_sub:
+ loadsp 8
+ loadsp 8
+ fast_neg
+ add
+ storesp 12
+
+ storesp 4
+
+ poppc
+
+
+# opcode 50
+# offset 0x0000 0240
+ .balign 32,0
+_xor:
+ loadsp 8
+ not
+ loadsp 8
+ and
+
+ loadsp 12
+ loadsp 12
+ not
+ and
+
+ or
+
+ storesp 12
+ storesp 4
+ poppc
+
+# opcode 51
+# offset 0x0000 0260
+ .balign 32,0
+_loadb:
+ loadsp 4
+ im ~0x3
+ and
+ load
+
+ loadsp 8
+ im 3
+ and
+ fast_neg
+ im 3
+ add
+ ; x8
+ addsp 0
+ addsp 0
+ addsp 0
+
+ lshiftright
+
+ im 0xff
+ and
+ storesp 8
+
+ poppc
+
+
+# opcode 52
+# offset 0x0000 0280
+ .balign 32,0
+_storeb:
+ loadsp 4
+ im ~0x3
+ and
+ load
+
+ ; mask away destination
+ im _mask
+ loadsp 12
+ im 3
+ and
+ addsp 0
+ addsp 0
+ add
+ load
+
+ and
+
+
+ im _storebtail
+ poppc
+
+# opcode 53
+# offset 0x0000 02a0
+ .balign 32,0
+_div:
+ cimpl __divsi3
+
+# opcode 54
+# offset 0x0000 02c0
+ .balign 32,0
+_mod:
+ cimpl __modsi3
+
+# opcode 55
+# offset 0x0000 02e0
+ .balign 32,0
+ .globl _eqbranch
+_eqbranch:
+ loadsp 8
+
+ ; eq
+
+ not
+ loadsp 0
+ im 1
+ add
+ not
+ and
+ flip
+ im 1
+ and
+
+ ; mask
+ im -1
+ add
+ loadsp 0
+ storesp 16
+
+ ; no branch address
+ loadsp 4
+
+ and
+
+ ; fetch boolean & neg mask
+ loadsp 12
+ not
+
+ ; calc address & mask for branch
+ loadsp 8
+ loadsp 16
+ add
+ ; subtract 1 to find PC of branch instruction
+ im -1
+ add
+
+ and
+
+ or
+
+ storesp 4
+ storesp 4
+ storesp 4
+ poppc
+
+
+# opcode 56
+# offset 0x0000 0300
+ .balign 32,0
+ .globl _neqbranch
+_neqbranch:
+ loadsp 8
+
+ ; neq
+
+ not
+ loadsp 0
+ im 1
+ add
+ not
+ and
+ flip
+
+ not
+
+ im 1
+ and
+
+ ; mask
+ im -1
+ add
+ loadsp 0
+ storesp 16
+
+ ; no branch address
+ loadsp 4
+
+ and
+
+ ; fetch boolean & neg mask
+ loadsp 12
+ not
+
+ ; calc address & mask for branch
+ loadsp 8
+ loadsp 16
+ add
+ ; find address of branch instruction
+ im -1
+ add
+
+ and
+
+ or
+
+ storesp 4
+ storesp 4
+ storesp 4
+ poppc
+
+# opcode 57
+# offset 0x0000 0320
+ .balign 32,0
+ .globl _poppcrel
+_poppcrel:
+ add
+ ; address of poppcrel
+ im -1
+ add
+ poppc
+
+# opcode 58
+# offset 0x0000 0340
+ .balign 32,0
+ .globl _config
+_config:
+ im 1
+ nop
+ im _hardware
+ store
+ storesp 4
+ poppc
+
+# opcode 59
+# offset 0x0000 0360
+ .balign 32,0
+_pushpc:
+ loadsp 4
+ im 1
+ add
+ storesp 8
+ poppc
+
+# opcode 60
+# offset 0x0000 0380
+ .balign 32,0
+_syscall_emulate:
+ .byte 0
+
+# opcode 61
+# offset 0x0000 03a0
+ .balign 32,0
+_pushspadd:
+ pushsp
+ im 4
+ add
+ loadsp 8
+ addsp 0
+ addsp 0
+ add
+ storesp 8
+
+ poppc
+
+# opcode 62
+# offset 0x0000 03c0
+ .balign 32,0
+_halfmult:
+ breakpoint
+
+# opcode 63
+# offset 0x0000 03e0
+ .balign 32,0
+_callpcrel:
+ loadsp 4
+ loadsp 4
+ add
+ im -1
+ add
+ loadsp 4
+
+ storesp 12 ; return address
+ storesp 4
+ pushsp ; this will flush the internal stack.
+ popsp
+ poppc
+
+ .text
+
+
+
+
+_ashiftleftBegin:
+ .rept 0x1f
+ addsp 0
+ .endr
+_ashiftleftEnd:
+ storesp 12
+ storesp 4
+ poppc
+
+_storebtail:
+ loadsp 12
+ im 0xff
+ and
+ loadsp 12
+ im 3
+ and
+
+ fast_neg
+ im 3
+ add
+ ; x8
+ addsp 0
+ addsp 0
+ addsp 0
+
+ ashiftleft
+
+ or
+
+ loadsp 8
+ im ~0x3
+ and
+
+ store
+
+ storesp 4
+ storesp 4
+ poppc
+
+
+
+
+; NB! this is not an EMULATE instruction. It is a varargs fn.
+ .globl _syscall
+_syscall:
+ syscall
+ poppc
+
+_slowmultImpl:
+
+ loadsp 8 ; A
+ loadsp 8 ; B
+ im 0 ; C
+
+.LmoreMult:
+ mult1bit
+
+ ; cutoff
+ loadsp 8
+ .byte (.LmoreMult-.Lbranch)&0x7f+0x80
+.Lbranch:
+ neqbranch
+
+ storesp 4
+ storesp 4
+ storesp 12
+ storesp 4
+ poppc
+
+ .data
+ .balign 4,0
+_mask:
+ .long 0x00ffffff
+ .long 0xff00ffff
+ .long 0xffff00ff
+ .long 0xffffff00
+
+
+ .globl _hardware
+_hardware:
+ .long 0
+ .globl _cpu_config
+_cpu_config:
+ .long 0
+
diff --git a/zpu/sw/startup/crt_io.c b/zpu/sw/startup/crt_io.c
new file mode 100644
index 0000000..966ae33
--- /dev/null
+++ b/zpu/sw/startup/crt_io.c
@@ -0,0 +1,91 @@
+#include <syscall.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/stat.h>
+
+extern int _hardware;
+/* _cpu_config==0 => Abel
+ * _cpu_config==1 => Zeta
+ * _cpu_config==2 => Phi
+ */
+extern int _cpu_config;
+static volatile int *UART;
+static volatile int *TIMER;
+volatile int *MHZ;
+
+
+
+/*
+ * Wait indefinitely for input byte
+ */
+
+
+int __attribute__ ((weak)) inbyte()
+{
+ int val;
+ for (;;)
+ {
+ val=UART[1];
+ if ((val&0x100)!=0)
+ {
+ return val&0xff;
+ }
+ }
+}
+
+
+
+/*
+ * Output one character to the serial port
+ *
+ *
+ */
+void __attribute__ ((weak)) outbyte(int c)
+{
+ /* Wait for space in FIFO */
+ while ((UART[0]&0x100)==0);
+ UART[0]=c;
+}
+
+static const int mhz=64;
+
+void __attribute__ ((weak)) _initIO(void)
+{
+ if (_hardware)
+ {
+ if (_cpu_config==2)
+ {
+ /* Phi board addresses */
+ UART=(volatile int *)0x080a000c;
+ TIMER=(volatile int *)0x080a0014;
+ MHZ=(volatile int *)&mhz;
+ } else
+ {
+ /* Abel board */
+ UART=(volatile int *)0xc000;
+ TIMER=(volatile int *)0x9000;
+ MHZ=(volatile int *)0x8800;
+ }
+ } else
+ {
+ UART=(volatile int *)0x80000024;
+ TIMER=(volatile int *)0x80000100;
+ MHZ=(volatile int *)0x80000200;
+ }
+}
+
+
+
+long long __attribute__ ((weak)) _readCycles()
+{
+ long long clock;
+ unsigned int i;
+
+ TIMER[0]=0x2; /* sample timer */
+ clock=0;
+ for (i=0; i<2; i++)
+ {
+ clock|=((long long )(TIMER[i]))<<(i*32);
+ }
+ return clock;
+}
diff --git a/zpu/sw/startup/time.c b/zpu/sw/startup/time.c
new file mode 100644
index 0000000..767b62f
--- /dev/null
+++ b/zpu/sw/startup/time.c
@@ -0,0 +1,32 @@
+#include <_ansi.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+extern long long _readCycles();
+
+
+extern volatile int *MHZ;
+
+long long _readMicroseconds()
+{
+ int Hz;
+ long long clock;
+ Hz=(*MHZ&0xff);
+ clock=_readCycles();
+ return clock/(long long)Hz;
+}
+
+
+
+
+time_t
+time (time_t *tloc)
+{
+ time_t t;
+ t=(time_t)(_readMicroseconds()/(long long )1000000);
+ if (tloc!=NULL)
+ {
+ *tloc=t;
+ }
+ return t;
+}
OpenPOWER on IntegriCloud