From 536db927b7577ee895677b1fa1d3cd3ae4c85940 Mon Sep 17 00:00:00 2001 From: oharboe Date: Sun, 21 Sep 2008 08:17:35 +0000 Subject: * zpu/hdl/zealot: added small ZPU core, testbenches and FPGA implementation --- zpu/hdl/zealot/zpu_small.vhdl | 471 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 471 insertions(+) create mode 100644 zpu/hdl/zealot/zpu_small.vhdl (limited to 'zpu/hdl/zealot/zpu_small.vhdl') diff --git a/zpu/hdl/zealot/zpu_small.vhdl b/zpu/hdl/zealot/zpu_small.vhdl new file mode 100644 index 0000000..7e022d4 --- /dev/null +++ b/zpu/hdl/zealot/zpu_small.vhdl @@ -0,0 +1,471 @@ +------------------------------------------------------------------------------ +---- ---- +---- ZPU Small ---- +---- ---- +---- http://www.opencores.org/ ---- +---- ---- +---- Description: ---- +---- ZPU is a 32 bits small stack cpu. This is the small size version. ---- +---- It doesn't support external memories, needs a dual ported memory. ---- +---- ---- +---- To Do: ---- +---- - ---- +---- ---- +---- Author: ---- +---- - Øyvind Harboe, oyvind.harboe zylin.com ---- +---- - Salvador E. Tropea, salvador inti.gob.ar ---- +---- ---- +------------------------------------------------------------------------------ +---- ---- +---- Copyright (c) 2008 Øyvind Harboe ---- +---- Copyright (c) 2008 Salvador E. Tropea ---- +---- Copyright (c) 2008 Instituto Nacional de Tecnología Industrial ---- +---- ---- +---- Distributed under the BSD license ---- +---- ---- +------------------------------------------------------------------------------ +---- ---- +---- Design unit: ZPUSmallCore(Behave) (Entity and architecture) ---- +---- File name: zpu_small.vhdl ---- +---- Note: None ---- +---- Limitations: None known ---- +---- Errors: None known ---- +---- Library: zpu ---- +---- Dependencies: IEEE.std_logic_1164 ---- +---- IEEE.numeric_std ---- +---- zpu.zpupkg ---- +---- Target FPGA: Spartan 3 (XC3S1500-4-FG456) ---- +---- Language: VHDL ---- +---- Wishbone: No ---- +---- Synthesis tools: Xilinx Release 9.2.03i - xst J.39 ---- +---- Simulation tools: GHDL [Sokcho edition] (0.2x) ---- +---- Text editor: SETEdit 0.5.x ---- +---- ---- +------------------------------------------------------------------------------ + +library IEEE; +use IEEE.std_logic_1164.ALL; +use IEEE.numeric_std.all; + +library zpu; +use zpu.zpupkg.all; + +entity ZPUSmallCore is + generic( + WORD_SIZE : integer:=32; -- Data width 16/32 + ADDR_W : integer:=16; -- Total address space width (incl. I/O) + MEM_W : integer:=15; -- Memory (prog+data+stack) width + D_CARE_VAL : std_logic:='X'); -- Value used to fill the unsused bits + port( + clk_i : in std_logic; -- System Clock + reset_i : in std_logic; -- Synchronous Reset + interrupt_i : in std_logic; -- Interrupt + break_o : out std_logic; -- Breakpoint opcode executed + dbg_o : out zpu_dbgo_t; -- Debug outputs (i.e. trace log) + -- BRAM (text, data, bss and stack) + a_we_o : out std_logic; -- BRAM A port Write Enable + a_addr_o : out unsigned(MEM_W-1 downto WORD_SIZE/16):=(others => '0'); -- BRAM A Address + a_o : out unsigned(WORD_SIZE-1 downto 0):=(others => '0'); -- Data to BRAM A port + a_i : in unsigned(WORD_SIZE-1 downto 0); -- Data from BRAM A port + b_we_o : out std_logic; -- BRAM B port Write Enable + b_addr_o : out unsigned(MEM_W-1 downto WORD_SIZE/16):=(others => '0'); -- BRAM B Address + b_o : out unsigned(WORD_SIZE-1 downto 0):=(others => '0'); -- Data to BRAM B port + b_i : in unsigned(WORD_SIZE-1 downto 0); -- Data from BRAM B port + -- Memory mapped I/O + mem_busy_i : in std_logic; + data_i : in unsigned(WORD_SIZE-1 downto 0); + data_o : out unsigned(WORD_SIZE-1 downto 0); + addr_o : out unsigned(ADDR_W-1 downto 0); + write_en_o : out std_logic; + read_en_o : out std_logic); +end entity ZPUSmallCore; + +architecture Behave of ZPUSmallCore is + constant MAX_ADDR_BIT : integer:=ADDR_W-2; + constant BYTE_BITS : integer:=WORD_SIZE/16; -- # of bits in a word that addresses bytes + -- Stack Pointer initial value: BRAM size-8 + constant SP_START_1 : unsigned(ADDR_W-1 downto 0):=to_unsigned((2**MEM_W)-8,ADDR_W); + constant SP_START : unsigned(MAX_ADDR_BIT downto BYTE_BITS):= + SP_START_1(MAX_ADDR_BIT downto BYTE_BITS); + constant IO_BIT : integer:=ADDR_W-1; -- Address bit to determine this is an I/O + + -- Program counter + signal pc_r : unsigned(MAX_ADDR_BIT downto 0):=(others => '0'); + -- Stack pointer + signal sp_r : unsigned(MAX_ADDR_BIT downto BYTE_BITS):=SP_START; + signal idim_r : std_logic:='0'; + + -- BRAM (text, data, bss and stack) + -- a_r is a register for the top of the stack [SP] + -- Note: as this is a stack CPU this is a very important register. + signal a_we_r : std_logic:='0'; + signal a_addr_r : unsigned(MAX_ADDR_BIT downto BYTE_BITS):=(others => '0'); + signal a_r : unsigned(WORD_SIZE-1 downto 0):=(others => '0'); + -- b_r is a register for the next value in the stack [SP+1] + -- We also use the B port to fetch instructions. + signal b_we_r : std_logic:='0'; + signal b_addr_r : unsigned(MAX_ADDR_BIT downto BYTE_BITS):=(others => '0'); + signal b_r : unsigned(WORD_SIZE-1 downto 0):=(others => '0'); + + -- State machine. + type state_t is (st_fetch, st_write_io_done, st_execute, st_add, st_or, + st_and, st_store, st_read_io, st_write_io, st_fetch_next, + st_add_sp, st_decode, st_resync); + signal state : state_t:=st_resync; + + -- Decoded Opcode + type decode_t is (dec_nop, dec_im, dec_load_sp, dec_store_sp, dec_add_sp, + dec_emulate, dec_break, dec_push_sp, dec_pop_pc, dec_add, + dec_or, dec_and, dec_load, dec_not, dec_flip, dec_store, + dec_pop_sp, dec_interrupt); + signal d_opcode_r : decode_t; + signal d_opcode : decode_t; + + signal opcode : unsigned(OPCODE_W-1 downto 0); -- Decoded + signal opcode_r : unsigned(OPCODE_W-1 downto 0); -- Registered + + -- IRQ flag + signal in_irq_r : std_logic:='0'; + -- I/O space address + signal addr_r : unsigned(ADDR_W-1 downto 0):=(others => '0'); +begin + -- Dual ported memory interface + a_we_o <= a_we_r; + a_addr_o <= a_addr_r(MEM_W-1 downto BYTE_BITS); + a_o <= a_r; + b_we_o <= b_we_r; + b_addr_o <= b_addr_r(MEM_W-1 downto BYTE_BITS); + b_o <= b_r; + + ------------------------- + -- Instruction Decoder -- + ------------------------- + -- Note: We use Port B memory to fetch the opcodes. + decode_control: + process(b_i, pc_r) + variable topcode : unsigned(OPCODE_W-1 downto 0); + begin + -- Select the addressed byte inside the fetched word + case (to_integer(pc_r(BYTE_BITS-1 downto 0))) is + when 0 => + topcode:=b_i(31 downto 24); + when 1 => + topcode:=b_i(23 downto 16); + when 2 => + topcode:=b_i(15 downto 8); + when others => -- 3 + topcode:=b_i(7 downto 0); + end case; + opcode <= topcode; + + if (topcode(7 downto 7)=OPCODE_IM) then + d_opcode <= dec_im; + elsif (topcode(7 downto 5)=OPCODE_STORESP) then + d_opcode <= dec_store_sp; + elsif (topcode(7 downto 5)=OPCODE_LOADSP) then + d_opcode <= dec_load_sp; + elsif (topcode(7 downto 5)=OPCODE_EMULATE) then + d_opcode <= dec_emulate; + elsif (topcode(7 downto 4)=OPCODE_ADDSP) then + d_opcode <= dec_add_sp; + else -- OPCODE_SHORT + case topcode(3 downto 0) is + when OPCODE_BREAK => + d_opcode <= dec_break; + when OPCODE_PUSHSP => + d_opcode <= dec_push_sp; + when OPCODE_POPPC => + d_opcode <= dec_pop_pc; + when OPCODE_ADD => + d_opcode <= dec_add; + when OPCODE_OR => + d_opcode <= dec_or; + when OPCODE_AND => + d_opcode <= dec_and; + when OPCODE_LOAD => + d_opcode <= dec_load; + when OPCODE_NOT => + d_opcode <= dec_not; + when OPCODE_FLIP => + d_opcode <= dec_flip; + when OPCODE_STORE => + d_opcode <= dec_store; + when OPCODE_POPSP => + d_opcode <= dec_pop_sp; + when others => -- OPCODE_NOP and others + d_opcode <= dec_nop; + end case; + end if; + end process decode_control; + + data_o <= b_i; + opcode_control: + process (clk_i) + variable sp_offset : unsigned(4 downto 0); + begin + if rising_edge(clk_i) then + break_o <= '0'; + write_en_o <= '0'; + read_en_o <= '0'; + dbg_o.b_inst <= '0'; + if reset_i='1' then + state <= st_resync; + sp_r <= SP_START; + pc_r <= (others => '0'); + idim_r <= '0'; + a_addr_r <= (others => '0'); + b_addr_r <= (others => '0'); + a_we_r <= '0'; + b_we_r <= '0'; + a_r <= (others => '0'); + b_r <= (others => '0'); + in_irq_r <= '0'; + addr_r <= (others => '0'); + else -- reset_i/='1' + a_we_r <= '0'; + b_we_r <= '0'; + -- This saves LUTs, by explicitly declaring that the + -- a_o can be left at whatever value if a_we_r is + -- not set. + a_r <= (others => D_CARE_VAL); + b_r <= (others => D_CARE_VAL); + sp_offset:=(others => D_CARE_VAL); + a_addr_r <= (others => D_CARE_VAL); + b_addr_r <= (others => D_CARE_VAL); + addr_r <= a_i(ADDR_W-1 downto 0); + d_opcode_r <= d_opcode; + opcode_r <= opcode; + if interrupt_i='0' then + in_irq_r <= '0'; -- no longer in an interrupt + end if; + + case state is + when st_execute => + state <= st_fetch; + -- At this point: + -- b_i contains opcode word + -- a_i contains top of stack + pc_r <= pc_r+1; + + -- Debug info (Trace) + dbg_o.b_inst <= '1'; + dbg_o.pc <= (others => '0'); + dbg_o.pc(MAX_ADDR_BIT downto 0) <= pc_r; + dbg_o.opcode <= opcode_r; + dbg_o.sp <= (others => '0'); + dbg_o.sp(MAX_ADDR_BIT downto BYTE_BITS) <= sp_r; + dbg_o.stk_a <= a_i; + dbg_o.stk_b <= b_i; + + -- During the next cycle we'll be reading the next opcode + sp_offset(4):=not opcode_r(4); + sp_offset(3 downto 0):=opcode_r(3 downto 0); + + idim_r <= '0'; + + -------------------- + -- Execution Unit -- + -------------------- + case d_opcode_r is + when dec_interrupt => + -- Not a real instruction, but an interrupt + -- Push(PC); PC=32 + sp_r <= sp_r-1; + a_addr_r <= sp_r-1; + a_we_r <= '1'; + a_r <= (others => D_CARE_VAL); + a_r(MAX_ADDR_BIT downto 0) <= pc_r; + -- Jump to ISR + pc_r <= to_unsigned(32,MAX_ADDR_BIT+1); -- interrupt address + --report "ZPU jumped to interrupt!" severity note; + when dec_im => + idim_r <= '1'; + a_we_r <= '1'; + if idim_r='0' then + -- First IM + -- Push the 7 bits (extending the sign) + sp_r <= sp_r-1; + a_addr_r <= sp_r-1; + a_r <= unsigned(resize(signed(opcode_r(6 downto 0)),WORD_SIZE)); + else + -- Next IMs, shift the word and put the new value in the lower + -- bits + a_addr_r <= sp_r; + a_r(WORD_SIZE-1 downto 7) <= a_i(WORD_SIZE-8 downto 0); + a_r(6 downto 0) <= opcode_r(6 downto 0); + end if; + when dec_store_sp => + -- [SP+Offset]=Pop() + b_we_r <= '1'; + b_addr_r <= sp_r+sp_offset; + b_r <= a_i; + sp_r <= sp_r+1; + state <= st_resync; + when dec_load_sp => + -- Push([SP+Offset]) + sp_r <= sp_r-1; + a_addr_r <= sp_r+sp_offset; + when dec_emulate => + -- Push(PC+1), PC=Opcode[4:0]*32 + sp_r <= sp_r-1; + a_we_r <= '1'; + a_addr_r <= sp_r-1; + a_r <= (others => D_CARE_VAL); + a_r(MAX_ADDR_BIT downto 0) <= pc_r+1; + -- Jump to NUM*32 + -- The emulate address is: + -- 98 7654 3210 + -- 0000 00aa aaa0 0000 + pc_r <= (others => '0'); + pc_r(9 downto 5) <= opcode_r(4 downto 0); + when dec_add_sp => + -- Push(Pop()+[SP+Offset]) + a_addr_r <= sp_r; + b_addr_r <= sp_r+sp_offset; + state <= st_add_sp; + when dec_break => + --report "Break instruction encountered" severity failure; + break_o <= '1'; + when dec_push_sp => + -- Push(SP) + sp_r <= sp_r-1; + a_we_r <= '1'; + a_addr_r <= sp_r-1; + a_r <= (others => D_CARE_VAL); + a_r(MAX_ADDR_BIT downto BYTE_BITS) <= sp_r; + when dec_pop_pc => + -- Pop(PC) + pc_r <= a_i(MAX_ADDR_BIT downto 0); + sp_r <= sp_r+1; + state <= st_resync; + when dec_add => + -- Push(Pop()+Pop()) + sp_r <= sp_r+1; + state <= st_add; + when dec_or => + -- Push(Pop() or Pop()) + sp_r <= sp_r+1; + state <= st_or; + when dec_and => + -- Push(Pop() and Pop()) + sp_r <= sp_r+1; + state <= st_and; + when dec_load => + -- Push([Pop()]) + if a_i(IO_BIT)='1' then + addr_r <= a_i(ADDR_W-1 downto 0); + read_en_o <= '1'; + state <= st_read_io; + else + a_addr_r <= a_i(MAX_ADDR_BIT downto BYTE_BITS); + end if; + when dec_not => + -- Push(not(Pop())) + a_addr_r <= sp_r(MAX_ADDR_BIT downto BYTE_BITS); + a_we_r <= '1'; + a_r <= not a_i; + when dec_flip => + -- Push(flip(Pop())) + a_addr_r <= sp_r(MAX_ADDR_BIT downto BYTE_BITS); + a_we_r <= '1'; + for i in 0 to WORD_SIZE-1 loop + a_r(i) <= a_i(WORD_SIZE-1-i); + end loop; + when dec_store => + -- a=Pop(), b=Pop(), [a]=b + b_addr_r <= sp_r+1; + sp_r <= sp_r+1; + if a_i(IO_BIT)='1' then + state <= st_write_io; + else + state <= st_store; + end if; + when dec_pop_sp => + -- SP=Pop() + sp_r <= a_i(MAX_ADDR_BIT downto BYTE_BITS); + state <= st_resync; + when dec_nop => + -- Default, keep addressing to of the stack (A) + a_addr_r <= sp_r; + when others => + null; + end case; + when st_read_io => + -- Wait until memory I/O isn't busy + if mem_busy_i='0' then + state <= st_fetch; + a_we_r <= '1'; + a_r <= data_i; + end if; + when st_write_io => + -- [A]=B + sp_r <= sp_r+1; + write_en_o <= '1'; + addr_r <= a_i(ADDR_W-1 downto 0); + state <= st_write_io_done; + when st_write_io_done => + -- Wait until memory I/O isn't busy + if mem_busy_i='0' then + state <= st_resync; + end if; + when st_fetch => + -- We need to resync. During the *next* cycle + -- we'll fetch the opcode @ pc and thus it will + -- be available for st_execute the cycle after + -- next + b_addr_r <= pc_r(MAX_ADDR_BIT downto BYTE_BITS); + state <= st_fetch_next; + when st_fetch_next => + -- At this point a_i contains the value that is either + -- from the top of stack or should be copied to the top of the stack + a_we_r <= '1'; + a_r <= a_i; + a_addr_r <= sp_r; + b_addr_r <= sp_r+1; + state <= st_decode; + when st_decode => + if interrupt_i='1' and in_irq_r='0' and idim_r='0' then + -- We got an interrupt, execute interrupt instead of next instruction + in_irq_r <= '1'; + d_opcode_r <= dec_interrupt; + end if; + -- during the st_execute cycle we'll be fetching SP+1 + a_addr_r <= sp_r; + b_addr_r <= sp_r+1; + state <= st_execute; + when st_store => + sp_r <= sp_r+1; + a_we_r <= '1'; + a_addr_r <= a_i(MAX_ADDR_BIT downto BYTE_BITS); + a_r <= b_i; + state <= st_resync; + when st_add_sp => + state <= st_add; + when st_add => + a_addr_r <= sp_r; + a_we_r <= '1'; + a_r <= a_i+b_i; + state <= st_fetch; + when st_or => + a_addr_r <= sp_r; + a_we_r <= '1'; + a_r <= a_i or b_i; + state <= st_fetch; + when st_and => + a_addr_r <= sp_r; + a_we_r <= '1'; + a_r <= a_i and b_i; + state <= st_fetch; + when st_resync => + a_addr_r <= sp_r; + state <= st_fetch; + when others => + null; + end case; + end if; -- else reset_i/='1' + end if; -- rising_edge(clk_i) + end process opcode_control; + addr_o <= addr_r; + +end architecture Behave; -- Entity: ZPUSmallCore + -- cgit v1.1 From 105f8b40509ea2657e36e13af76b7580029fd2e5 Mon Sep 17 00:00:00 2001 From: Bert Lange Date: Tue, 25 Oct 2011 22:03:02 +0200 Subject: fix: zealot/zpu_small - load, see patch from Alvaro --- zpu/hdl/zealot/zpu_small.vhdl | 1 + 1 file changed, 1 insertion(+) (limited to 'zpu/hdl/zealot/zpu_small.vhdl') diff --git a/zpu/hdl/zealot/zpu_small.vhdl b/zpu/hdl/zealot/zpu_small.vhdl index 7e022d4..2e5f464 100644 --- a/zpu/hdl/zealot/zpu_small.vhdl +++ b/zpu/hdl/zealot/zpu_small.vhdl @@ -391,6 +391,7 @@ begin null; end case; when st_read_io => + a_addr_r <= sp_r; -- Wait until memory I/O isn't busy if mem_busy_i='0' then state <= st_fetch; -- cgit v1.1 From 221eb9f6525789d20f2895b2de775bf345cf97cb Mon Sep 17 00:00:00 2001 From: Bert Lange Date: Fri, 28 Oct 2011 11:25:31 +0200 Subject: minor fix: reduce simulation warnings at 0 ps --- zpu/hdl/zealot/zpu_small.vhdl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'zpu/hdl/zealot/zpu_small.vhdl') diff --git a/zpu/hdl/zealot/zpu_small.vhdl b/zpu/hdl/zealot/zpu_small.vhdl index 2e5f464..056b924 100644 --- a/zpu/hdl/zealot/zpu_small.vhdl +++ b/zpu/hdl/zealot/zpu_small.vhdl @@ -148,13 +148,13 @@ begin -- Select the addressed byte inside the fetched word case (to_integer(pc_r(BYTE_BITS-1 downto 0))) is when 0 => - topcode:=b_i(31 downto 24); + topcode := to_01( b_i(31 downto 24)); when 1 => - topcode:=b_i(23 downto 16); + topcode := to_01( b_i(23 downto 16)); when 2 => - topcode:=b_i(15 downto 8); + topcode := to_01( b_i(15 downto 8)); when others => -- 3 - topcode:=b_i(7 downto 0); + topcode := to_01( b_i(7 downto 0)); end case; opcode <= topcode; -- cgit v1.1