diff options
author | oharboe <oharboe> | 2008-08-18 12:00:34 +0000 |
---|---|---|
committer | oharboe <oharboe> | 2008-08-18 12:00:34 +0000 |
commit | 431a1bf775d468bcd788c3dd716b97cc0fca1f34 (patch) | |
tree | d327dae59d556908d6b90f6de2d2cf199ca8c323 | |
parent | 3c919f795b78bacdb9b3b7396ac5761f7457224a (diff) | |
download | zpu-431a1bf775d468bcd788c3dd716b97cc0fca1f34.zip zpu-431a1bf775d468bcd788c3dd716b97cc0fca1f34.tar.gz |
* duplicated crt0.s and some other stuff from libgloss into
sw/startup. This makes it easier to tinker w/startup code.
-rw-r--r-- | zpu/ChangeLog | 3 | ||||
-rw-r--r-- | zpu/docs/zpu_arch.html | 16 | ||||
-rw-r--r-- | zpu/sw/startup/crt0.S | 957 | ||||
-rw-r--r-- | zpu/sw/startup/crt_io.c | 91 | ||||
-rw-r--r-- | zpu/sw/startup/time.c | 32 |
5 files changed, 1098 insertions, 1 deletions
diff --git a/zpu/ChangeLog b/zpu/ChangeLog index 88bc650..02adb2c 100644 --- a/zpu/ChangeLog +++ b/zpu/ChangeLog @@ -1,3 +1,6 @@ +2008-08-18 Øyvind Harboe
+ * duplicated crt0.s and some other stuff from libgloss into
+ sw/startup. This makes it easier to tinker w/startup code.
2008-08-08 Salvador E. Tropea
* zpu/hdl/zpu4/core/histogram.perl - generate opcode histogram from
HDL simulation output
diff --git a/zpu/docs/zpu_arch.html b/zpu/docs/zpu_arch.html index ccbd0df..d8d982d 100644 --- a/zpu/docs/zpu_arch.html +++ b/zpu/docs/zpu_arch.html @@ -5,6 +5,7 @@ <li> <a href="#started">Getting started</a> <li> <a href="#introduction">Introduction</a> <li> <a href="#instructionset">Instruction set</a> +<li> <a href="#startup">Custom startup code (aka crt0.s)</a> <li> <a href="#implementing">Implementing your own ZPU</a> <li> <a href="#vectors">Jump vectors</a> <li> <a href="#memorymap">Memory map</a> @@ -817,7 +818,20 @@ int address = pop();<br> </table> - +<a name="startup"/> +<h1>Custom startup code (aka crt0.s)</h1> +To minimize the size of an application, one important trick is to +strip down the startup code. The startup code contains emulation +of instructions that may never be used by a particular application. +<p> +The startup code is found in the GCC source code under gcc/libgloss/zpu, +but to make the startup code more available, it has been duplicated +into <a href="../sw/startup">zpu/sw/startup</a> +<p> +To minimize startup size, see <a href="../roadshow/roadshow/codesize/index.html">codesize</a> +demo. This is pretty standard GCC stuff and simple enough once you've +been over it a couple of times. + <a name="implementing"/> <h1>Implementing your own ZPU</h1> One of the neat things about the ZPU is that the instruction set and architecture diff --git a/zpu/sw/startup/crt0.S b/zpu/sw/startup/crt0.S new file mode 100644 index 0000000..00870c4 --- /dev/null +++ b/zpu/sw/startup/crt0.S @@ -0,0 +1,957 @@ +/* Startup code for ZPU + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file with other programs, and to distribute +those programs without any restriction coming from the use of this +file. (The General Public License restrictions do apply in other +respects; for example, they cover modification of the file, and +distribution when not linked into another program.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + .file "crt0.S" + + + + +; .section ".fixed_vectors","ax" +; KLUDGE!!! we remove the executable bit to avoid relaxation + .section ".fixed_vectors","a" + +; DANGER!!!! +; we need to align these code sections to 32 bytes, which +; means we must not use any assembler instructions that are relaxed +; at linker time +; DANGER!!!! + + .macro fixedim value + im \value + .endm + + .macro jsr address + + im 0 ; save R0 + load + im 4 ; save R1 + load + im 8 ; save R2 + load + + fixedim \address + call + + im 8 + store ; restore R2 + im 4 + store ; restore R1 + im 0 + store ; restore R0 + .endm + + + .macro jmp address + fixedim \address + poppc + .endm + + + .macro fast_neg + not + im 1 + add + .endm + + .macro cimpl funcname + ; save R0 + im 0 + load + + ; save R1 + im 4 + load + + ; save R2 + im 8 + load + + loadsp 20 + loadsp 20 + + fixedim \funcname + call + + ; destroy arguments on stack + storesp 0 + storesp 0 + + im 0 + load + + ; poke the result into the right slot + storesp 24 + + ; restore R2 + im 8 + store + + ; restore R1 + im 4 + store + + ; restore r0 + im 0 + store + + + storesp 4 + poppc + .endm + + .macro mult1bit + ; create mask of lowest bit in A + loadsp 8 ; A + im 1 + and + im -1 + add + not + loadsp 8 ; B + and + add ; accumulate in C + + ; shift B left 1 bit + loadsp 4 ; B + addsp 0 + storesp 8 ; B + + ; shift A right 1 bit + loadsp 8 ; A + flip + addsp 0 + flip + storesp 12 ; A + .endm + + + +/* vectors */ + .balign 32,0 +# offset 0x0000 0000 + .globl _start +_start: + ; intSp must be 0 when we jump to _premain + + im ZPU_ID + loadsp 0 + im _cpu_config + store + config + jmp _premain + + + + .balign 32,0 +# offset 0x0000 0020 + .globl _zpu_interrupt_vector +_zpu_interrupt_vector: + jsr _zpu_interrupt + poppc + + +/* instruction emulation code */ + +# opcode 34 +# offset 0x0000 0040 + .balign 32,0 +_loadh: + loadsp 4 + ; by not masking out bit 0, we cause a memory access error + ; on unaligned access + im ~0x2 + and + load + + ; mult 8 + loadsp 8 + im 3 + and + fast_neg + im 2 + add + im 3 + ashiftleft + ; shift right addr&3 * 8 + lshiftright + im 0xffff + and + storesp 8 + + poppc + +# opcode 35 +# offset 0x0000 0060 + .balign 32,0 +_storeh: + loadsp 4 + ; by not masking out bit 0, we cause a memory access error + ; on unaligned access + im ~0x2 + and + load + + ; mask + im 0xffff + loadsp 12 + im 3 + and + fast_neg + im 2 + add + im 3 + ashiftleft + ashiftleft + not + + and + + loadsp 12 + im 0xffff + + nop + + fixedim _storehtail + poppc + + +# opcode 36 +# offset 0x0000 0080 + .balign 32,0 +_lessthan: + loadsp 8 + fast_neg + loadsp 8 + add + + ; DANGER!!!! + ; 0x80000000 will overflow when negated, so we need to mask + ; the result above with the compare positive to negative + ; number case + loadsp 12 + loadsp 12 + not + and + not + and + + + ; handle case where we are comparing a negative number + ; and positve number. This can underflow. E.g. consider 0x8000000 < 0x1000 + loadsp 12 + not + loadsp 12 + and + + or + + + + flip + im 1 + and + + + storesp 12 + storesp 4 + poppc + + +# opcode 37 +# offset 0x0000 00a0 + .balign 32,0 +_lessthanorequal: + loadsp 8 + loadsp 8 + lessthan + loadsp 12 + loadsp 12 + eq + or + + storesp 12 + storesp 4 + poppc + + +# opcode 38 +# offset 0x0000 00c0 + .balign 32,0 +_ulessthan: + ; fish up arguments + loadsp 4 + loadsp 12 + + /* low: -1 if low bit dif is negative 0 otherwise: neg (not x&1 and (y&1)) + x&1 y&1 neg (not x&1 and (y&1)) + 1 1 0 + 1 0 0 + 0 1 -1 + 0 0 0 + + */ + loadsp 4 + not + loadsp 4 + and + im 1 + and + neg + + + /* high: upper 31-bit diff is only wrong when diff is 0 and low=-1 + high=x>>1 - y>>1 + low + + extremes + + 0000 - 1111: + low= neg(not 0 and 1) = 1111 (-1) + high=000+ neg(111) +low = 000 + 1001 + low = 1000 + OK + + 1111 - 0000 + low=neg(not 1 and 0) = 0 + high=111+neg(000) + low = 0111 + OK + + + */ + loadsp 8 + + flip + addsp 0 + flip + + loadsp 8 + + flip + addsp 0 + flip + + sub + + ; if they are equal, then the last bit decides... + add + + /* test if negative: result = flip(diff) & 1 */ + flip + im 1 + and + + ; destroy a&b which are on stack + storesp 4 + storesp 4 + + storesp 12 + storesp 4 + poppc + +# opcode 39 +# offset 0x0000 00e0 + .balign 32,0 +_ulessthanorequal: + loadsp 8 + loadsp 8 + ulessthan + loadsp 12 + loadsp 12 + eq + or + + storesp 12 + storesp 4 + poppc + + +# opcode 40 +# offset 0x0000 0100 + .balign 32,0 + .globl _swap +_swap: + breakpoint ; tbd + +# opcode 41 +# offset 0x0000 0120 + .balign 32,0 +_slowmult: + im _slowmultImpl + poppc + +# opcode 42 +# offset 0x0000 0140 + .balign 32,0 +_lshiftright: + loadsp 8 + flip + + loadsp 8 + ashiftleft + flip + + storesp 12 + storesp 4 + + poppc + + +# opcode 43 +# offset 0x0000 0160 + .balign 32,0 +_ashiftleft: + loadsp 8 + + loadsp 8 + im 0x1f + and + fast_neg + im _ashiftleftEnd + add + poppc + + + +# opcode 44 +# offset 0x0000 0180 + .balign 32,0 +_ashiftright: + loadsp 8 + loadsp 8 + lshiftright + + ; handle signed value + im -1 + loadsp 12 + im 0x1f + and + lshiftright + not ; now we have an integer on the stack with the signed + ; bits in the right position + + ; mask these bits with the signed bit. + loadsp 16 + not + flip + im 1 + and + im -1 + add + + and + + ; stuff in the signed bits... + or + + ; store result into correct stack slot + storesp 12 + + ; move up return value + storesp 4 + poppc + +# opcode 45 +# offset 0x0000 01a0 + .balign 32,0 +_call: + ; fn + loadsp 4 + + ; return address + loadsp 4 + + ; store return address + storesp 12 + + ; fn to call + storesp 4 + + pushsp ; flush internal stack + popsp + + poppc + +_storehtail: + + and + loadsp 12 + im 3 + and + fast_neg + im 2 + add + im 3 + ashiftleft + nop + ashiftleft + + or + + loadsp 8 + im ~0x3 + and + + store + + storesp 4 + storesp 4 + poppc + + +# opcode 46 +# offset 0x0000 01c0 + .balign 32,0 +_eq: + loadsp 8 + fast_neg + loadsp 8 + add + + not + loadsp 0 + im 1 + add + not + and + flip + im 1 + and + + storesp 12 + storesp 4 + poppc + +# opcode 47 +# offset 0x0000 01e0 + .balign 32,0 +_neq: + loadsp 8 + fast_neg + loadsp 8 + add + + not + loadsp 0 + im 1 + add + not + and + flip + + not + + im 1 + and + + storesp 12 + storesp 4 + poppc + + +# opcode 48 +# offset 0x0000 0200 + .balign 32,0 +_neg: + loadsp 4 + not + im 1 + add + storesp 8 + + poppc + + +# opcode 49 +# offset 0x0000 0220 + .balign 32,0 +_sub: + loadsp 8 + loadsp 8 + fast_neg + add + storesp 12 + + storesp 4 + + poppc + + +# opcode 50 +# offset 0x0000 0240 + .balign 32,0 +_xor: + loadsp 8 + not + loadsp 8 + and + + loadsp 12 + loadsp 12 + not + and + + or + + storesp 12 + storesp 4 + poppc + +# opcode 51 +# offset 0x0000 0260 + .balign 32,0 +_loadb: + loadsp 4 + im ~0x3 + and + load + + loadsp 8 + im 3 + and + fast_neg + im 3 + add + ; x8 + addsp 0 + addsp 0 + addsp 0 + + lshiftright + + im 0xff + and + storesp 8 + + poppc + + +# opcode 52 +# offset 0x0000 0280 + .balign 32,0 +_storeb: + loadsp 4 + im ~0x3 + and + load + + ; mask away destination + im _mask + loadsp 12 + im 3 + and + addsp 0 + addsp 0 + add + load + + and + + + im _storebtail + poppc + +# opcode 53 +# offset 0x0000 02a0 + .balign 32,0 +_div: + cimpl __divsi3 + +# opcode 54 +# offset 0x0000 02c0 + .balign 32,0 +_mod: + cimpl __modsi3 + +# opcode 55 +# offset 0x0000 02e0 + .balign 32,0 + .globl _eqbranch +_eqbranch: + loadsp 8 + + ; eq + + not + loadsp 0 + im 1 + add + not + and + flip + im 1 + and + + ; mask + im -1 + add + loadsp 0 + storesp 16 + + ; no branch address + loadsp 4 + + and + + ; fetch boolean & neg mask + loadsp 12 + not + + ; calc address & mask for branch + loadsp 8 + loadsp 16 + add + ; subtract 1 to find PC of branch instruction + im -1 + add + + and + + or + + storesp 4 + storesp 4 + storesp 4 + poppc + + +# opcode 56 +# offset 0x0000 0300 + .balign 32,0 + .globl _neqbranch +_neqbranch: + loadsp 8 + + ; neq + + not + loadsp 0 + im 1 + add + not + and + flip + + not + + im 1 + and + + ; mask + im -1 + add + loadsp 0 + storesp 16 + + ; no branch address + loadsp 4 + + and + + ; fetch boolean & neg mask + loadsp 12 + not + + ; calc address & mask for branch + loadsp 8 + loadsp 16 + add + ; find address of branch instruction + im -1 + add + + and + + or + + storesp 4 + storesp 4 + storesp 4 + poppc + +# opcode 57 +# offset 0x0000 0320 + .balign 32,0 + .globl _poppcrel +_poppcrel: + add + ; address of poppcrel + im -1 + add + poppc + +# opcode 58 +# offset 0x0000 0340 + .balign 32,0 + .globl _config +_config: + im 1 + nop + im _hardware + store + storesp 4 + poppc + +# opcode 59 +# offset 0x0000 0360 + .balign 32,0 +_pushpc: + loadsp 4 + im 1 + add + storesp 8 + poppc + +# opcode 60 +# offset 0x0000 0380 + .balign 32,0 +_syscall_emulate: + .byte 0 + +# opcode 61 +# offset 0x0000 03a0 + .balign 32,0 +_pushspadd: + pushsp + im 4 + add + loadsp 8 + addsp 0 + addsp 0 + add + storesp 8 + + poppc + +# opcode 62 +# offset 0x0000 03c0 + .balign 32,0 +_halfmult: + breakpoint + +# opcode 63 +# offset 0x0000 03e0 + .balign 32,0 +_callpcrel: + loadsp 4 + loadsp 4 + add + im -1 + add + loadsp 4 + + storesp 12 ; return address + storesp 4 + pushsp ; this will flush the internal stack. + popsp + poppc + + .text + + + + +_ashiftleftBegin: + .rept 0x1f + addsp 0 + .endr +_ashiftleftEnd: + storesp 12 + storesp 4 + poppc + +_storebtail: + loadsp 12 + im 0xff + and + loadsp 12 + im 3 + and + + fast_neg + im 3 + add + ; x8 + addsp 0 + addsp 0 + addsp 0 + + ashiftleft + + or + + loadsp 8 + im ~0x3 + and + + store + + storesp 4 + storesp 4 + poppc + + + + +; NB! this is not an EMULATE instruction. It is a varargs fn. + .globl _syscall +_syscall: + syscall + poppc + +_slowmultImpl: + + loadsp 8 ; A + loadsp 8 ; B + im 0 ; C + +.LmoreMult: + mult1bit + + ; cutoff + loadsp 8 + .byte (.LmoreMult-.Lbranch)&0x7f+0x80 +.Lbranch: + neqbranch + + storesp 4 + storesp 4 + storesp 12 + storesp 4 + poppc + + .data + .balign 4,0 +_mask: + .long 0x00ffffff + .long 0xff00ffff + .long 0xffff00ff + .long 0xffffff00 + + + .globl _hardware +_hardware: + .long 0 + .globl _cpu_config +_cpu_config: + .long 0 + diff --git a/zpu/sw/startup/crt_io.c b/zpu/sw/startup/crt_io.c new file mode 100644 index 0000000..966ae33 --- /dev/null +++ b/zpu/sw/startup/crt_io.c @@ -0,0 +1,91 @@ +#include <syscall.h>
+#include <stdio.h>
+#include <errno.h>
+#include <sys/stat.h>
+
+extern int _hardware;
+/* _cpu_config==0 => Abel
+ * _cpu_config==1 => Zeta
+ * _cpu_config==2 => Phi
+ */
+extern int _cpu_config;
+static volatile int *UART;
+static volatile int *TIMER;
+volatile int *MHZ;
+
+
+
+/*
+ * Wait indefinitely for input byte
+ */
+
+
+int __attribute__ ((weak)) inbyte()
+{
+ int val;
+ for (;;)
+ {
+ val=UART[1];
+ if ((val&0x100)!=0)
+ {
+ return val&0xff;
+ }
+ }
+}
+
+
+
+/*
+ * Output one character to the serial port
+ *
+ *
+ */
+void __attribute__ ((weak)) outbyte(int c)
+{
+ /* Wait for space in FIFO */
+ while ((UART[0]&0x100)==0);
+ UART[0]=c;
+}
+
+static const int mhz=64;
+
+void __attribute__ ((weak)) _initIO(void)
+{
+ if (_hardware)
+ {
+ if (_cpu_config==2)
+ {
+ /* Phi board addresses */
+ UART=(volatile int *)0x080a000c;
+ TIMER=(volatile int *)0x080a0014;
+ MHZ=(volatile int *)&mhz;
+ } else
+ {
+ /* Abel board */
+ UART=(volatile int *)0xc000;
+ TIMER=(volatile int *)0x9000;
+ MHZ=(volatile int *)0x8800;
+ }
+ } else
+ {
+ UART=(volatile int *)0x80000024;
+ TIMER=(volatile int *)0x80000100;
+ MHZ=(volatile int *)0x80000200;
+ }
+}
+
+
+
+long long __attribute__ ((weak)) _readCycles()
+{
+ long long clock;
+ unsigned int i;
+
+ TIMER[0]=0x2; /* sample timer */
+ clock=0;
+ for (i=0; i<2; i++)
+ {
+ clock|=((long long )(TIMER[i]))<<(i*32);
+ }
+ return clock;
+}
diff --git a/zpu/sw/startup/time.c b/zpu/sw/startup/time.c new file mode 100644 index 0000000..767b62f --- /dev/null +++ b/zpu/sw/startup/time.c @@ -0,0 +1,32 @@ +#include <_ansi.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+extern long long _readCycles();
+
+
+extern volatile int *MHZ;
+
+long long _readMicroseconds()
+{
+ int Hz;
+ long long clock;
+ Hz=(*MHZ&0xff);
+ clock=_readCycles();
+ return clock/(long long)Hz;
+}
+
+
+
+
+time_t
+time (time_t *tloc)
+{
+ time_t t;
+ t=(time_t)(_readMicroseconds()/(long long )1000000);
+ if (tloc!=NULL)
+ {
+ *tloc=t;
+ }
+ return t;
+}
|