From 431a1bf775d468bcd788c3dd716b97cc0fca1f34 Mon Sep 17 00:00:00 2001 From: oharboe Date: Mon, 18 Aug 2008 12:00:34 +0000 Subject: * duplicated crt0.s and some other stuff from libgloss into sw/startup. This makes it easier to tinker w/startup code. --- zpu/ChangeLog | 3 + zpu/docs/zpu_arch.html | 16 +- zpu/sw/startup/crt0.S | 957 ++++++++++++++++++++++++++++++++++++++++++++++++ zpu/sw/startup/crt_io.c | 91 +++++ zpu/sw/startup/time.c | 32 ++ 5 files changed, 1098 insertions(+), 1 deletion(-) create mode 100644 zpu/sw/startup/crt0.S create mode 100644 zpu/sw/startup/crt_io.c create mode 100644 zpu/sw/startup/time.c diff --git a/zpu/ChangeLog b/zpu/ChangeLog index 88bc650..02adb2c 100644 --- a/zpu/ChangeLog +++ b/zpu/ChangeLog @@ -1,3 +1,6 @@ +2008-08-18 Øyvind Harboe + * duplicated crt0.s and some other stuff from libgloss into + sw/startup. This makes it easier to tinker w/startup code. 2008-08-08 Salvador E. Tropea * zpu/hdl/zpu4/core/histogram.perl - generate opcode histogram from HDL simulation output diff --git a/zpu/docs/zpu_arch.html b/zpu/docs/zpu_arch.html index ccbd0df..d8d982d 100644 --- a/zpu/docs/zpu_arch.html +++ b/zpu/docs/zpu_arch.html @@ -5,6 +5,7 @@
  • Getting started
  • Introduction
  • Instruction set +
  • Custom startup code (aka crt0.s)
  • Implementing your own ZPU
  • Jump vectors
  • Memory map @@ -817,7 +818,20 @@ int address = pop();
    - + +

    Custom startup code (aka crt0.s)

    +To minimize the size of an application, one important trick is to +strip down the startup code. The startup code contains emulation +of instructions that may never be used by a particular application. +

    +The startup code is found in the GCC source code under gcc/libgloss/zpu, +but to make the startup code more available, it has been duplicated +into zpu/sw/startup +

    +To minimize startup size, see codesize +demo. This is pretty standard GCC stuff and simple enough once you've +been over it a couple of times. +

    Implementing your own ZPU

    One of the neat things about the ZPU is that the instruction set and architecture diff --git a/zpu/sw/startup/crt0.S b/zpu/sw/startup/crt0.S new file mode 100644 index 0000000..00870c4 --- /dev/null +++ b/zpu/sw/startup/crt0.S @@ -0,0 +1,957 @@ +/* Startup code for ZPU + Copyright (C) 2005 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the +Free Software Foundation; either version 2, or (at your option) any +later version. + +In addition to the permissions in the GNU General Public License, the +Free Software Foundation gives you unlimited permission to link the +compiled version of this file with other programs, and to distribute +those programs without any restriction coming from the use of this +file. (The General Public License restrictions do apply in other +respects; for example, they cover modification of the file, and +distribution when not linked into another program.) + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; see the file COPYING. If not, write to +the Free Software Foundation, 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ + .file "crt0.S" + + + + +; .section ".fixed_vectors","ax" +; KLUDGE!!! we remove the executable bit to avoid relaxation + .section ".fixed_vectors","a" + +; DANGER!!!! +; we need to align these code sections to 32 bytes, which +; means we must not use any assembler instructions that are relaxed +; at linker time +; DANGER!!!! + + .macro fixedim value + im \value + .endm + + .macro jsr address + + im 0 ; save R0 + load + im 4 ; save R1 + load + im 8 ; save R2 + load + + fixedim \address + call + + im 8 + store ; restore R2 + im 4 + store ; restore R1 + im 0 + store ; restore R0 + .endm + + + .macro jmp address + fixedim \address + poppc + .endm + + + .macro fast_neg + not + im 1 + add + .endm + + .macro cimpl funcname + ; save R0 + im 0 + load + + ; save R1 + im 4 + load + + ; save R2 + im 8 + load + + loadsp 20 + loadsp 20 + + fixedim \funcname + call + + ; destroy arguments on stack + storesp 0 + storesp 0 + + im 0 + load + + ; poke the result into the right slot + storesp 24 + + ; restore R2 + im 8 + store + + ; restore R1 + im 4 + store + + ; restore r0 + im 0 + store + + + storesp 4 + poppc + .endm + + .macro mult1bit + ; create mask of lowest bit in A + loadsp 8 ; A + im 1 + and + im -1 + add + not + loadsp 8 ; B + and + add ; accumulate in C + + ; shift B left 1 bit + loadsp 4 ; B + addsp 0 + storesp 8 ; B + + ; shift A right 1 bit + loadsp 8 ; A + flip + addsp 0 + flip + storesp 12 ; A + .endm + + + +/* vectors */ + .balign 32,0 +# offset 0x0000 0000 + .globl _start +_start: + ; intSp must be 0 when we jump to _premain + + im ZPU_ID + loadsp 0 + im _cpu_config + store + config + jmp _premain + + + + .balign 32,0 +# offset 0x0000 0020 + .globl _zpu_interrupt_vector +_zpu_interrupt_vector: + jsr _zpu_interrupt + poppc + + +/* instruction emulation code */ + +# opcode 34 +# offset 0x0000 0040 + .balign 32,0 +_loadh: + loadsp 4 + ; by not masking out bit 0, we cause a memory access error + ; on unaligned access + im ~0x2 + and + load + + ; mult 8 + loadsp 8 + im 3 + and + fast_neg + im 2 + add + im 3 + ashiftleft + ; shift right addr&3 * 8 + lshiftright + im 0xffff + and + storesp 8 + + poppc + +# opcode 35 +# offset 0x0000 0060 + .balign 32,0 +_storeh: + loadsp 4 + ; by not masking out bit 0, we cause a memory access error + ; on unaligned access + im ~0x2 + and + load + + ; mask + im 0xffff + loadsp 12 + im 3 + and + fast_neg + im 2 + add + im 3 + ashiftleft + ashiftleft + not + + and + + loadsp 12 + im 0xffff + + nop + + fixedim _storehtail + poppc + + +# opcode 36 +# offset 0x0000 0080 + .balign 32,0 +_lessthan: + loadsp 8 + fast_neg + loadsp 8 + add + + ; DANGER!!!! + ; 0x80000000 will overflow when negated, so we need to mask + ; the result above with the compare positive to negative + ; number case + loadsp 12 + loadsp 12 + not + and + not + and + + + ; handle case where we are comparing a negative number + ; and positve number. This can underflow. E.g. consider 0x8000000 < 0x1000 + loadsp 12 + not + loadsp 12 + and + + or + + + + flip + im 1 + and + + + storesp 12 + storesp 4 + poppc + + +# opcode 37 +# offset 0x0000 00a0 + .balign 32,0 +_lessthanorequal: + loadsp 8 + loadsp 8 + lessthan + loadsp 12 + loadsp 12 + eq + or + + storesp 12 + storesp 4 + poppc + + +# opcode 38 +# offset 0x0000 00c0 + .balign 32,0 +_ulessthan: + ; fish up arguments + loadsp 4 + loadsp 12 + + /* low: -1 if low bit dif is negative 0 otherwise: neg (not x&1 and (y&1)) + x&1 y&1 neg (not x&1 and (y&1)) + 1 1 0 + 1 0 0 + 0 1 -1 + 0 0 0 + + */ + loadsp 4 + not + loadsp 4 + and + im 1 + and + neg + + + /* high: upper 31-bit diff is only wrong when diff is 0 and low=-1 + high=x>>1 - y>>1 + low + + extremes + + 0000 - 1111: + low= neg(not 0 and 1) = 1111 (-1) + high=000+ neg(111) +low = 000 + 1001 + low = 1000 + OK + + 1111 - 0000 + low=neg(not 1 and 0) = 0 + high=111+neg(000) + low = 0111 + OK + + + */ + loadsp 8 + + flip + addsp 0 + flip + + loadsp 8 + + flip + addsp 0 + flip + + sub + + ; if they are equal, then the last bit decides... + add + + /* test if negative: result = flip(diff) & 1 */ + flip + im 1 + and + + ; destroy a&b which are on stack + storesp 4 + storesp 4 + + storesp 12 + storesp 4 + poppc + +# opcode 39 +# offset 0x0000 00e0 + .balign 32,0 +_ulessthanorequal: + loadsp 8 + loadsp 8 + ulessthan + loadsp 12 + loadsp 12 + eq + or + + storesp 12 + storesp 4 + poppc + + +# opcode 40 +# offset 0x0000 0100 + .balign 32,0 + .globl _swap +_swap: + breakpoint ; tbd + +# opcode 41 +# offset 0x0000 0120 + .balign 32,0 +_slowmult: + im _slowmultImpl + poppc + +# opcode 42 +# offset 0x0000 0140 + .balign 32,0 +_lshiftright: + loadsp 8 + flip + + loadsp 8 + ashiftleft + flip + + storesp 12 + storesp 4 + + poppc + + +# opcode 43 +# offset 0x0000 0160 + .balign 32,0 +_ashiftleft: + loadsp 8 + + loadsp 8 + im 0x1f + and + fast_neg + im _ashiftleftEnd + add + poppc + + + +# opcode 44 +# offset 0x0000 0180 + .balign 32,0 +_ashiftright: + loadsp 8 + loadsp 8 + lshiftright + + ; handle signed value + im -1 + loadsp 12 + im 0x1f + and + lshiftright + not ; now we have an integer on the stack with the signed + ; bits in the right position + + ; mask these bits with the signed bit. + loadsp 16 + not + flip + im 1 + and + im -1 + add + + and + + ; stuff in the signed bits... + or + + ; store result into correct stack slot + storesp 12 + + ; move up return value + storesp 4 + poppc + +# opcode 45 +# offset 0x0000 01a0 + .balign 32,0 +_call: + ; fn + loadsp 4 + + ; return address + loadsp 4 + + ; store return address + storesp 12 + + ; fn to call + storesp 4 + + pushsp ; flush internal stack + popsp + + poppc + +_storehtail: + + and + loadsp 12 + im 3 + and + fast_neg + im 2 + add + im 3 + ashiftleft + nop + ashiftleft + + or + + loadsp 8 + im ~0x3 + and + + store + + storesp 4 + storesp 4 + poppc + + +# opcode 46 +# offset 0x0000 01c0 + .balign 32,0 +_eq: + loadsp 8 + fast_neg + loadsp 8 + add + + not + loadsp 0 + im 1 + add + not + and + flip + im 1 + and + + storesp 12 + storesp 4 + poppc + +# opcode 47 +# offset 0x0000 01e0 + .balign 32,0 +_neq: + loadsp 8 + fast_neg + loadsp 8 + add + + not + loadsp 0 + im 1 + add + not + and + flip + + not + + im 1 + and + + storesp 12 + storesp 4 + poppc + + +# opcode 48 +# offset 0x0000 0200 + .balign 32,0 +_neg: + loadsp 4 + not + im 1 + add + storesp 8 + + poppc + + +# opcode 49 +# offset 0x0000 0220 + .balign 32,0 +_sub: + loadsp 8 + loadsp 8 + fast_neg + add + storesp 12 + + storesp 4 + + poppc + + +# opcode 50 +# offset 0x0000 0240 + .balign 32,0 +_xor: + loadsp 8 + not + loadsp 8 + and + + loadsp 12 + loadsp 12 + not + and + + or + + storesp 12 + storesp 4 + poppc + +# opcode 51 +# offset 0x0000 0260 + .balign 32,0 +_loadb: + loadsp 4 + im ~0x3 + and + load + + loadsp 8 + im 3 + and + fast_neg + im 3 + add + ; x8 + addsp 0 + addsp 0 + addsp 0 + + lshiftright + + im 0xff + and + storesp 8 + + poppc + + +# opcode 52 +# offset 0x0000 0280 + .balign 32,0 +_storeb: + loadsp 4 + im ~0x3 + and + load + + ; mask away destination + im _mask + loadsp 12 + im 3 + and + addsp 0 + addsp 0 + add + load + + and + + + im _storebtail + poppc + +# opcode 53 +# offset 0x0000 02a0 + .balign 32,0 +_div: + cimpl __divsi3 + +# opcode 54 +# offset 0x0000 02c0 + .balign 32,0 +_mod: + cimpl __modsi3 + +# opcode 55 +# offset 0x0000 02e0 + .balign 32,0 + .globl _eqbranch +_eqbranch: + loadsp 8 + + ; eq + + not + loadsp 0 + im 1 + add + not + and + flip + im 1 + and + + ; mask + im -1 + add + loadsp 0 + storesp 16 + + ; no branch address + loadsp 4 + + and + + ; fetch boolean & neg mask + loadsp 12 + not + + ; calc address & mask for branch + loadsp 8 + loadsp 16 + add + ; subtract 1 to find PC of branch instruction + im -1 + add + + and + + or + + storesp 4 + storesp 4 + storesp 4 + poppc + + +# opcode 56 +# offset 0x0000 0300 + .balign 32,0 + .globl _neqbranch +_neqbranch: + loadsp 8 + + ; neq + + not + loadsp 0 + im 1 + add + not + and + flip + + not + + im 1 + and + + ; mask + im -1 + add + loadsp 0 + storesp 16 + + ; no branch address + loadsp 4 + + and + + ; fetch boolean & neg mask + loadsp 12 + not + + ; calc address & mask for branch + loadsp 8 + loadsp 16 + add + ; find address of branch instruction + im -1 + add + + and + + or + + storesp 4 + storesp 4 + storesp 4 + poppc + +# opcode 57 +# offset 0x0000 0320 + .balign 32,0 + .globl _poppcrel +_poppcrel: + add + ; address of poppcrel + im -1 + add + poppc + +# opcode 58 +# offset 0x0000 0340 + .balign 32,0 + .globl _config +_config: + im 1 + nop + im _hardware + store + storesp 4 + poppc + +# opcode 59 +# offset 0x0000 0360 + .balign 32,0 +_pushpc: + loadsp 4 + im 1 + add + storesp 8 + poppc + +# opcode 60 +# offset 0x0000 0380 + .balign 32,0 +_syscall_emulate: + .byte 0 + +# opcode 61 +# offset 0x0000 03a0 + .balign 32,0 +_pushspadd: + pushsp + im 4 + add + loadsp 8 + addsp 0 + addsp 0 + add + storesp 8 + + poppc + +# opcode 62 +# offset 0x0000 03c0 + .balign 32,0 +_halfmult: + breakpoint + +# opcode 63 +# offset 0x0000 03e0 + .balign 32,0 +_callpcrel: + loadsp 4 + loadsp 4 + add + im -1 + add + loadsp 4 + + storesp 12 ; return address + storesp 4 + pushsp ; this will flush the internal stack. + popsp + poppc + + .text + + + + +_ashiftleftBegin: + .rept 0x1f + addsp 0 + .endr +_ashiftleftEnd: + storesp 12 + storesp 4 + poppc + +_storebtail: + loadsp 12 + im 0xff + and + loadsp 12 + im 3 + and + + fast_neg + im 3 + add + ; x8 + addsp 0 + addsp 0 + addsp 0 + + ashiftleft + + or + + loadsp 8 + im ~0x3 + and + + store + + storesp 4 + storesp 4 + poppc + + + + +; NB! this is not an EMULATE instruction. It is a varargs fn. + .globl _syscall +_syscall: + syscall + poppc + +_slowmultImpl: + + loadsp 8 ; A + loadsp 8 ; B + im 0 ; C + +.LmoreMult: + mult1bit + + ; cutoff + loadsp 8 + .byte (.LmoreMult-.Lbranch)&0x7f+0x80 +.Lbranch: + neqbranch + + storesp 4 + storesp 4 + storesp 12 + storesp 4 + poppc + + .data + .balign 4,0 +_mask: + .long 0x00ffffff + .long 0xff00ffff + .long 0xffff00ff + .long 0xffffff00 + + + .globl _hardware +_hardware: + .long 0 + .globl _cpu_config +_cpu_config: + .long 0 + diff --git a/zpu/sw/startup/crt_io.c b/zpu/sw/startup/crt_io.c new file mode 100644 index 0000000..966ae33 --- /dev/null +++ b/zpu/sw/startup/crt_io.c @@ -0,0 +1,91 @@ +#include +#include +#include +#include + +extern int _hardware; +/* _cpu_config==0 => Abel + * _cpu_config==1 => Zeta + * _cpu_config==2 => Phi + */ +extern int _cpu_config; +static volatile int *UART; +static volatile int *TIMER; +volatile int *MHZ; + + + +/* + * Wait indefinitely for input byte + */ + + +int __attribute__ ((weak)) inbyte() +{ + int val; + for (;;) + { + val=UART[1]; + if ((val&0x100)!=0) + { + return val&0xff; + } + } +} + + + +/* + * Output one character to the serial port + * + * + */ +void __attribute__ ((weak)) outbyte(int c) +{ + /* Wait for space in FIFO */ + while ((UART[0]&0x100)==0); + UART[0]=c; +} + +static const int mhz=64; + +void __attribute__ ((weak)) _initIO(void) +{ + if (_hardware) + { + if (_cpu_config==2) + { + /* Phi board addresses */ + UART=(volatile int *)0x080a000c; + TIMER=(volatile int *)0x080a0014; + MHZ=(volatile int *)&mhz; + } else + { + /* Abel board */ + UART=(volatile int *)0xc000; + TIMER=(volatile int *)0x9000; + MHZ=(volatile int *)0x8800; + } + } else + { + UART=(volatile int *)0x80000024; + TIMER=(volatile int *)0x80000100; + MHZ=(volatile int *)0x80000200; + } +} + + + +long long __attribute__ ((weak)) _readCycles() +{ + long long clock; + unsigned int i; + + TIMER[0]=0x2; /* sample timer */ + clock=0; + for (i=0; i<2; i++) + { + clock|=((long long )(TIMER[i]))<<(i*32); + } + return clock; +} diff --git a/zpu/sw/startup/time.c b/zpu/sw/startup/time.c new file mode 100644 index 0000000..767b62f --- /dev/null +++ b/zpu/sw/startup/time.c @@ -0,0 +1,32 @@ +#include <_ansi.h> +#include +#include + +extern long long _readCycles(); + + +extern volatile int *MHZ; + +long long _readMicroseconds() +{ + int Hz; + long long clock; + Hz=(*MHZ&0xff); + clock=_readCycles(); + return clock/(long long)Hz; +} + + + + +time_t +time (time_t *tloc) +{ + time_t t; + t=(time_t)(_readMicroseconds()/(long long )1000000); + if (tloc!=NULL) + { + *tloc=t; + } + return t; +} -- cgit v1.1