From 91e13ae045ee76c25b8883013d386beab3cb8086 Mon Sep 17 00:00:00 2001 From: Alvaro Date: Thu, 1 Sep 2011 12:13:53 +0100 Subject: jtag: Apply Martin Strubel JTAG implementation for ZPU The current JTAG debugging capable ZPU implementation (VHDL) consists of: - A generic, device independent JTAG module (tck, tms, tdi, tdo, trst) - A TAP module, defining JTAG instruction and data registers - A few control lines to/from the core (request, execute, acknowledge, ready) and: * An emulation instruction register * A data exchange register - An enhanced ZPU small core state machine --- zpu/hdl/example/zpuromgen.c | 1 + zpu/hdl/tap/README | 22 ++ zpu/hdl/tap/jtagx.vhd | 254 ++++++++++++ zpu/hdl/tap/tap.vhd | 217 +++++++++++ zpu/hdl/zealot/devices/trace.vhdl | 18 +- zpu/hdl/zealot/helpers/zpu_small1.vhdl | 14 + zpu/hdl/zealot/testbenches/hwdbg_small1_tb.vhdl | 371 ++++++++++++++++++ zpu/hdl/zealot/zpu_pkg.vhdl | 11 + zpu/hdl/zealot/zpu_small.vhdl | 120 +++++- zpu/sw/emulation/Makefile | 31 ++ zpu/sw/emulation/README | 12 + zpu/sw/emulation/tap.h | 41 ++ zpu/sw/emulation/zpu-opcodes.h | 26 ++ zpu/sw/emulation/zpu-tap.h | 30 ++ zpu/sw/emulation/zpuemu.c | 493 ++++++++++++++++++++++++ zpu/sw/emulation/zpuemu.h | 55 +++ 16 files changed, 1695 insertions(+), 21 deletions(-) create mode 100644 zpu/hdl/tap/README create mode 100644 zpu/hdl/tap/jtagx.vhd create mode 100644 zpu/hdl/tap/tap.vhd create mode 100644 zpu/hdl/zealot/testbenches/hwdbg_small1_tb.vhdl create mode 100644 zpu/sw/emulation/Makefile create mode 100644 zpu/sw/emulation/README create mode 100644 zpu/sw/emulation/tap.h create mode 100644 zpu/sw/emulation/zpu-opcodes.h create mode 100644 zpu/sw/emulation/zpu-tap.h create mode 100644 zpu/sw/emulation/zpuemu.c create mode 100644 zpu/sw/emulation/zpuemu.h diff --git a/zpu/hdl/example/zpuromgen.c b/zpu/hdl/example/zpuromgen.c index fb8c4ba..1c7f19c 100644 --- a/zpu/hdl/example/zpuromgen.c +++ b/zpu/hdl/example/zpuromgen.c @@ -7,6 +7,7 @@ // This software is free to use by anyone for any purpose. // +#include #include #include diff --git a/zpu/hdl/tap/README b/zpu/hdl/tap/README new file mode 100644 index 0000000..07a1234 --- /dev/null +++ b/zpu/hdl/tap/README @@ -0,0 +1,22 @@ +Test Access Port implementation for the ZPU (alpha) +---------------------------------------------------------------------------- +08/2011 Martin Strubel + +Brief: +- Implements JTAG interface (on FPGA custom pins) for the zealot small core. + Zealot medium (and others) under scrutiny. +- Important: Clock synchronization between TCK and core clock domain has + to be taken care of by the parenting module, in particular for the + emuexec_i pin. That means: From the emuexec rising edge signal from the TAP + you have to create a one core clock cycle wide emuexec_i pulse. +- If you want to swap the debug interface, just write a new tapxxx.vhd +- The software debug interface may change, and it may be different for various + implementations of the ZPU. + Possible solution: + * Create and register various ZPU core IDs (IDCODE instruction) + * Take care of the ZPU variant in the zpu emulation library: + $ZPU/zpu/sw/emulation/ + * Likewise, handle various debug interfaces (direct/indirect JTAG, etc.) + * Implement different debug targets in gdbproxy, callable like + 'gdbproxy zpu_', and just use different + libzpuemu configurations. diff --git a/zpu/hdl/tap/jtagx.vhd b/zpu/hdl/tap/jtagx.vhd new file mode 100644 index 0000000..89c2075 --- /dev/null +++ b/zpu/hdl/tap/jtagx.vhd @@ -0,0 +1,254 @@ +---------------------------------------------------------------------------- +-- Simple JTAG controller, enhanced version +-- +-- $Id: jtag.vhdl 35 2008-09-05 23:31:00Z strubi $ +-- +-- (c) 2005, 2006, 2011 +-- Martin Strubel // +---------------------------------------------------------------------------- + +-- Functionality: +-- +-- This module implements a JTAG controller with a instruction register (IR) +-- and a data register (DR). +-- Data is clocked into the IR register MSB first, +-- into the DR register LSB first. +-- +-- The reason for this inconsistent behaviour is, that this controller +-- allows variable sizes of data registers, depending on the IR value. +-- +-- (Actually, the Blackfin CPU JTAG controller does it the same odd way) +-- +-- The IR and DR register size is specified in the parameters: +-- +-- IRSIZE (default 4) +-- DRSIZE (default 8) +-- +-- All special functionality must be encoded outside this module, using +-- the IR values. +-- There is one exception: The Instruction "1111" is reserved for the +-- IR_BYPASS mode. In this mode, the TDI bit is passed onto TDO with a delay +-- of one bit, according to the JTAG standard. +-- +-- The design is tested using the JTAG library coming with the ICEbear +-- USB JTAG adapter. +-- + +library ieee; +use ieee.std_logic_1164.all; +use IEEE.std_logic_unsigned.all; +use IEEE.numeric_std.all; -- TO_INTEGER + +library work; +use work.jtag.all; + +entity JtagController is + generic (IRSIZE : natural := 4; + DRSIZE : natural := 8); + port ( + tck, -- Tap Clock + trst, -- Tap Reset + tms, -- Tap mode select + tdi : in std_logic; -- Tap data in + tdo : out std_logic; -- Tap data out + state : out jtag_state_type; -- JTAG machine state +-- Data register input: + dr_in : in std_logic_vector (DRSIZE-1 downto 0); +-- Configureable DR size: + msbpos : in bitpos_type; +-- Data register output: + dr_out : out std_logic_vector (DRSIZE-1 downto 0); +-- Instruction register: + ir_out : out std_logic_vector (IRSIZE-1 downto 0) + ); +end JtagController; + +architecture behaviour of JtagController is + +-- The only fixed instruction: All ones. Reserved for bypassing. + constant IR_BYPASS : std_logic_vector (IRSIZE-1 downto 0) := + (others => '1'); + + signal mystate : jtag_state_type := TEST_LOGIC_RESET; + signal next_state : jtag_state_type; + + signal s_dr : std_logic_vector (DRSIZE-1 downto 0); + signal s_ir : std_logic_vector (IRSIZE-1 downto 0) := + (others => '1'); + + signal msb : bitpos_type; + + -- Disabled: Buffered register + -- signal ir : std_logic_vector (IRSIZE-1 downto 0); + +begin + +nextstate_decode: + process (mystate, tms) + begin + case mystate is + when CAPTURE_DR => + if (tms = '1') then + next_state <= EXIT1_DR; + else + next_state <= SHIFT_DR; + end if; + when CAPTURE_IR => + if (tms = '1') then + next_state <= EXIT1_IR; + else + next_state <= SHIFT_IR; + end if; + when EXIT1_DR => + if (tms = '1') then + next_state <= UPDATE_DR; + else + next_state <= PAUSE_DR; + end if; + when EXIT1_IR => + if (tms = '1') then + next_state <= UPDATE_IR; + else + next_state <= PAUSE_IR; + end if; + when EXIT2_DR => + if (tms = '1') then + next_state <= UPDATE_DR; + else + next_state <= SHIFT_DR; + end if; + when EXIT2_IR => + if (tms = '1') then + next_state <= UPDATE_IR; + else + next_state <= SHIFT_IR; + end if; + when PAUSE_DR => + if (tms = '1') then + next_state <= EXIT2_DR; + else + next_state <= PAUSE_DR; + end if; + when PAUSE_IR => + if (tms = '1') then + next_state <= EXIT2_IR; + else + next_state <= PAUSE_IR; + end if; + when RUN_TEST_IDLE => + if (tms = '1') then + next_state <= SELECT_DR; + else + next_state <= RUN_TEST_IDLE; + end if; + when SELECT_DR => + if (tms = '1') then + next_state <= SELECT_IR; + else + next_state <= CAPTURE_DR; + end if; + when SELECT_IR => + if (tms = '1') then + next_state <= TEST_LOGIC_RESET; + else + next_state <= CAPTURE_IR; + end if; + when SHIFT_DR => + if (tms = '1') then + next_state <= EXIT1_DR; + else + next_state <= SHIFT_DR; + end if; + when SHIFT_IR => + if (tms = '1') then + next_state <= EXIT1_IR; + else + next_state <= SHIFT_IR; + end if; + when TEST_LOGIC_RESET => + if (tms = '1') then + next_state <= TEST_LOGIC_RESET; + else + next_state <= RUN_TEST_IDLE; + end if; + when UPDATE_DR => + if (tms = '1') then + next_state <= SELECT_DR; + else + next_state <= RUN_TEST_IDLE; + end if; + when UPDATE_IR => + if (tms = '1') then + next_state <= SELECT_DR; + else + next_state <= RUN_TEST_IDLE; + end if; + when others => + end case; + end process; + +-- When we're in BYPASS, use MSB 0 + msb <= 0 when s_ir = IR_BYPASS else msbpos; + +tdo_encode: + process (mystate, s_ir, s_dr) + begin + case mystate is + when SHIFT_IR => + tdo <= s_ir(0); -- Shift out LSB + when SHIFT_DR => + tdo <= s_dr(msb); -- Take MSB + when others => + tdo <= '1'; + end case; + end process; + +state_advance: + process (tck, trst) + begin + if (trst = '0') then + mystate <= TEST_LOGIC_RESET; + elsif rising_edge(tck) then + mystate <= next_state; -- Advance to next state + end if; + end process; + +process_ir_dr: + process (tck) + begin + if rising_edge(tck) then +-- takes effect when entering the concerning state + case next_state is + -- When resetting, go into BYPASS mode + when TEST_LOGIC_RESET => + s_ir <= (others => '1'); + s_dr <= (others => '0'); + when others => + end case; + +-- Mystate is the current state, process takes effect on rising TCK when IN +-- the concerning state. + case mystate is + when SHIFT_IR => + s_ir <= tdi & s_ir(IRSIZE-1 downto 1); -- Shift in from MSB + when SHIFT_DR => + s_dr <= s_dr(DRSIZE-2 downto 0) & tdi; -- likewise from LSB + when CAPTURE_DR => +-- We could move this BYPASS check to a higher level module. But since +-- it's a reserved command, we leave it in here. + if (s_ir /= IR_BYPASS) then + s_dr <= dr_in; -- Latch! + end if; + when others => + end case; + end if; + end process; + + -- always assign state to output + -- We assign nextstate which is valid on the rising_edge of tck + state <= next_state; + + ir_out <= s_ir; + dr_out <= s_dr; + +end behaviour; diff --git a/zpu/hdl/tap/tap.vhd b/zpu/hdl/tap/tap.vhd new file mode 100644 index 0000000..41dc07b --- /dev/null +++ b/zpu/hdl/tap/tap.vhd @@ -0,0 +1,217 @@ +-- Example Test Access Port (TAP) controller implementation +-- (c) 2005-2011 Martin Strubel + +-- General behaviour summary: + +-- * JtagController entity decodes the TMS and TDI sequences +-- * IR and DR are decoded by this TAP controller +-- * Signals to a CPU core are generated by the TAP. See +-- "Core emulation signals" below. These are mapped into a control register: +-- emuctl [W] +-- * Signals from a CPU core are mapped into a status register: +-- emustat [R] + +-- Note: Fixed size IR register is clocked in LSB first, +-- Variable size DR register is clocked in MSB first. + +-- The default EMUIR value on a state machine reset is set to +-- INS_NOP, which should be defined properly by the parent module. +-- This is necessary for the core to not run any spurious command when +-- a breakpoint was hit and the JTAG state machine enters Run-Test-Idle. + +-- This TAP is supported by a generic software library as part of the +-- ICEbear software suite. + +library ieee; +use ieee.std_logic_1164.all; +use ieee.numeric_std.all; + +library work; +use work.jtag.all; +-- TAP register definitions (generated): +use work.tap_registers.all; + +entity tap is + generic (EMUDAT_SIZE : natural := 32; + EMUIR_SIZE : natural := 8; + IDCODE : std_logic_vector(32-1 downto 0) := x"deadbeef"; + INS_NOP : std_logic_vector(8-1 downto 0) := x"00"); + port ( + -- JTAG signals: + tck, trst, tms, tdi : in std_logic; + tdo : out std_logic; + -- Core <-> TAP signals: + core_reset : out std_logic; -- Reset core logic + emuexec : out std_logic; -- Execute opcode on rising edge + emurequest : out std_logic; -- Emulation request to core + emuack : in std_logic; -- Core has acknowledged EMULATION request + emurdy : in std_logic; -- Core ready to execute next instruction + pulse : in std_logic; -- Pulse event counter + -- PC of possibly running core. Allows to access PC without + -- entering emulation. + dbgpc : in std_logic_vector(EMUDAT_SIZE-1 downto 0); -- PC + + -- Extra status bits, core dependent + exstat : in std_logic_vector(7 downto 0); + + emudata_i : in std_logic_vector(EMUDAT_SIZE-1 downto 0); + emudata_o : out std_logic_vector(EMUDAT_SIZE-1 downto 0); + -- Not implemented in this version + -- emudat_wr : in std_logic; + -- emudat_rd : in std_logic; + emuir : out std_logic_vector(EMUIR_SIZE-1 downto 0) + ); +end tap; + +architecture behaviour of tap is + + -- Use generated registers from tap_pkg.vhd + + -- Note: all ones is always reserved for BYPASS + -- all zeros is normally reserved for EXTEST + + signal jtag_state : jtag_state_type; + + signal exec : std_logic := '0'; + + -- Emulation control/status registers: + signal emustat : std_logic_vector(16-1 downto 0); + signal emuctl : std_logic_vector(16-1 downto 0) := x"0000"; + + -- Core emulation signals: + + signal dr_in : std_logic_vector(EMUDAT_SIZE-1 downto 0); + signal dr_out : std_logic_vector(EMUDAT_SIZE-1 downto 0); + + signal count1 : unsigned(32-1 downto 0); + signal count1_reset : std_logic; + signal count2 : unsigned(16-1 downto 0); + + signal ir : std_logic_vector(4-1 downto 0); + + -- Position of MSB of data register when not in BYPASS + -- (defines length of DR register). See also jtag_config.vhd + signal msbpos : bitpos_type := 31; + + -- Emulation auxiliaries: + + -- Emulation data register for exchange between core and JTAG: + signal emudat_i : std_logic_vector(EMUDAT_SIZE-1 downto 0); + signal emudat_o : std_logic_vector(EMUDAT_SIZE-1 downto 0); +-- These signals are just stubs. Not implemented in this version. + -- signal emudat_rxf : std_logic; -- Receive full + -- signal emudat_txe : std_logic := '0'; -- Transmit empty + -- signal emudat_ovr : std_logic := '0'; -- Overrun + -- signal emudat_unr : std_logic := '0'; -- Underrun + +begin +i_jtag : JtagController + port map ( + tck => tck, + tms => tms, + tdi => tdi, + tdo => tdo, + trst => trst, + state => jtag_state, + dr_out => dr_out, + msbpos => msbpos, + dr_in => dr_in, + ir_out => ir + ); + +-- Select DR register according to supported IRs +-- We sample this with tck to avoid gated clock issues + +select_dr: + process (tck) + begin + if rising_edge(tck) then + case ir is + when TAP_IDCODE => + dr_in <= IDCODE; + msbpos <= 31; + when TAP_EMUDATA => + dr_in <= emudata_i; + msbpos <= emudata_i'length - 1; + when TAP_EMUSTAT => + dr_in(emustat'length-1 downto 0) <= emustat; + msbpos <= emustat'length - 1; + when TAP_DBGPC => + dr_in <= dbgpc; + msbpos <= dbgpc'length - 1; + when TAP_COUNT1 => + dr_in(count1'length-1 downto 0) <= std_logic_vector(count1); + msbpos <= count1'length - 1; + when TAP_COUNT2 => + dr_in(count2'length-1 downto 0) <= std_logic_vector(count2); + msbpos <= count2'length - 1; + when others => + dr_in <= (others => 'X'); + msbpos <= 31; + end case; + end if; + end process; + + +-- NOTE: action is being taken when ENTERING the concerning state +-- on rising edge of tck. + +-- exec is the signal sent to the core. It is like an IRQ event, thus +-- it must be properly treated as an exception (edge sensitive) +-- inside the core logic. + +decode_scanchain_w: + process (tck) + begin + if rising_edge(tck) then + count1_reset <= '0'; + exec <= '0'; + case jtag_state is + when UPDATE_DR => + if ir = TAP_EMUCTRL then + emuctl <= dr_out(15 downto 0); + elsif ir = TAP_EMUDATA then + emudat_o <= dr_out; + elsif ir = TAP_EMUIR then + emuir <= dr_out(EMUIR_SIZE-1 downto 0); + end if; + when TEST_LOGIC_RESET => + count1_reset <= '1'; + count2 <= (others => '0'); + emuctl <= (others => '0'); + emudat_o <= (others => '0'); +-- Important to reset the EMUIR to NOP for sane JTAG operation: + emuir <= INS_NOP; + when RUN_TEST_IDLE => + count2 <= count2 + 1; + exec <= '1'; + when others => + end case; + end if; + end process; + +pulse_count: + process (count1_reset, pulse) + begin + if count1_reset = '1' then + count1 <= (others => '0'); + elsif rising_edge(pulse) then + count1 <= count1 + 1; + end if; + end process; + +emuexec <= exec; +emurequest <= emuctl(0); +core_reset <= emuctl(15); + +-- Registers: +emudata_o <= emudat_o; + +-- FIXME: +-- These mappings should probably move to glue between TAP and core +-- to be more generic. + +emustat <= exstat & + "0000" & "00" & emurdy & emuack; + +end behaviour; diff --git a/zpu/hdl/zealot/devices/trace.vhdl b/zpu/hdl/zealot/devices/trace.vhdl index 83d3782..e067fe6 100644 --- a/zpu/hdl/zealot/devices/trace.vhdl +++ b/zpu/hdl/zealot/devices/trace.vhdl @@ -63,6 +63,7 @@ entity Trace is port( clk_i : in std_logic; dbg_i : in zpu_dbgo_t; + emu_i : in std_logic; -- Emulation trigger stop_i : in std_logic; busy_i : in std_logic ); @@ -76,6 +77,7 @@ begin receive_data: process variable l : line; + variable l0 : line; variable stk_min : unsigned(31 downto 0):=(others => '1'); variable stk_ini : unsigned(31 downto 0); variable first : boolean:=true; @@ -204,7 +206,7 @@ begin else -- OPCODE_SHORT case dbg_i.opcode(3 downto 0) is when OPCODE_BREAK => - write(l,string'("break")); + write(l,string'("break/rte")); when OPCODE_PUSHSP => write(l,string'("pushsp")); when OPCODE_POPPC => @@ -247,7 +249,19 @@ begin first:=false; end if; end if; - wait until clk_i='0' or stop_i='1'; + wait until clk_i='0' or stop_i='1' or emu_i'event; + if emu_i'event then + if emu_i = '1' then + write(l0, "-- Enable Emulation --"); + writeline(l_file,l0); + print(output,"Enter Emulation at PC 0x"&hstr(dbg_i.pc(ADDR_W-1 downto 0))); + else + write(l0, "-- Disable Emulation --"); + writeline(l_file,l0); + print(output,"Leave Emulation at PC 0x"&hstr(dbg_i.pc(ADDR_W-1 downto 0))); + end if; + wait until clk_i='0'; + end if; if stop_i='1' then print(output,"Minimum SP: 0x"&hstr(stk_min)&" Size: 0x"&hstr(stk_ini-stk_min)); wait; diff --git a/zpu/hdl/zealot/helpers/zpu_small1.vhdl b/zpu/hdl/zealot/helpers/zpu_small1.vhdl index 13dd485..91112d0 100644 --- a/zpu/hdl/zealot/helpers/zpu_small1.vhdl +++ b/zpu/hdl/zealot/helpers/zpu_small1.vhdl @@ -64,6 +64,14 @@ entity ZPU_Small1 is port( clk_i : in std_logic; -- CPU clock rst_i : in std_logic; -- Reset + + -- Emulation pins: + emureq_i : in std_logic; + emuexec_i : in std_logic; + emuack_o : out std_logic; + emurdy_o : out std_logic; + emuir : in std_logic_vector(OPCODE_W-1 downto 0); + break_o : out std_logic; -- Break executed dbg_o : out zpu_dbgo_t; -- Debug info rs232_tx_o : out std_logic; -- UART Tx @@ -125,7 +133,13 @@ begin D_CARE_VAL => D_CARE_VAL) port map( clk_i => clk_i, reset_i => rst_i, interrupt_i => '0', + emureq_i => emureq_i, + emuexec_i => emuexec_i, + emuack_o => emuack_o, + emurdy_o => emurdy_o, + emuir => emuir, break_o => break_o, dbg_o => dbg_o, + -- BRAM (text, data, bss and stack) a_we_o => a_we, a_addr_o => a_addr, a_o => a_write, a_i => a_read, b_we_o => b_we, b_addr_o => b_addr, b_o => b_write, b_i => b_read, diff --git a/zpu/hdl/zealot/testbenches/hwdbg_small1_tb.vhdl b/zpu/hdl/zealot/testbenches/hwdbg_small1_tb.vhdl new file mode 100644 index 0000000..1685634 --- /dev/null +++ b/zpu/hdl/zealot/testbenches/hwdbg_small1_tb.vhdl @@ -0,0 +1,371 @@ +------------------------------------------------------------------------------ +---- ---- +---- Testbench for the ZPU Small connection to the FPGA ---- +---- ---- +---- http://www.opencores.org/ ---- +---- ---- +---- Description: ---- +---- This is a testbench to simulate the ZPU_Small1 core as used in the ---- +---- *_small1.vhdl ---- +---- ---- +---- ...plus the JTAG debugger proof of concept for the Small core. +---- +---- To Do: ---- +---- - ---- +---- ---- +---- Author: ---- +---- - Salvador E. Tropea, salvador inti.gob.ar ---- +---- Modifications for core debug signal testing +---- - Martin Strubel +---- ---- +------------------------------------------------------------------------------ +---- ---- +---- Copyright (c) 2008 Salvador E. Tropea ---- +---- Copyright (c) 2008 Instituto Nacional de Tecnología Industrial ---- +---- ---- +---- Distributed under the BSD license ---- +---- ---- +------------------------------------------------------------------------------ +---- ---- +---- Design unit: HWDbg_Small1_TB(Behave) (Entity and architecture) ---- +---- File name: small1_tb.vhdl ---- +---- Note: None ---- +---- Limitations: None known ---- +---- Errors: None known ---- +---- Library: work ---- +---- Dependencies: IEEE.std_logic_1164 ---- +---- IEEE.numeric_std ---- +---- zpu.zpupkg ---- +---- zpu.txt_util ---- +---- work.zpu_memory ---- +---- Target FPGA: Spartan 3 (XC3S1500-4-FG456) ---- +---- Language: VHDL ---- +---- Wishbone: No ---- +---- Synthesis tools: N/A ---- +---- Simulation tools: Isim +---- Text editor: gvim +---- ---- +------------------------------------------------------------------------------ + +library IEEE; +use IEEE.std_logic_1164.all; +use IEEE.numeric_std.all; + +library zpu; +use zpu.zpupkg.all; +use zpu.txt_util.all; + +library work; +use work.zpu_memory.all; + +entity HWDbg_Small1_TB is +end entity HWDbg_Small1_TB; + +architecture Behave of HWDbg_Small1_TB is + constant WORD_SIZE : natural:=32; -- 32 bits data path + constant ADDR_W : natural:=18; -- 18 bits address space=256 kB, 128 kB I/O + constant BRAM_W : natural:=14; -- 15 bits RAM space=32 kB + constant D_CARE_VAL : std_logic:='0'; -- Fill value + constant CLK_FREQ : positive:=50; -- 50 MHz clock + constant CLK_S_PER : time:=1 us/(2.0*real(CLK_FREQ)); -- Clock semi period + constant BRATE : positive:=115200; + + -- Opcode to leave emulation: + constant OPCODE_LEAVE_EMULATION : unsigned(3 downto 0) := OPCODE_BREAK; + + component tap + generic (EMUDAT_SIZE : natural; + EMUIR_SIZE : natural); + port ( + emu : out std_logic; + tck : in std_logic; + trst : in std_logic; + tms : in std_logic; + tdi : in std_logic; + tdo : out std_logic; + emuexec : out std_logic; -- Execute opcode on rising edge + emurequest : out std_logic; -- Emulation request to core + emuack : in std_logic; -- Core has acknowledged EMULATION request + emurdy : in std_logic; -- Core ready to execute next instruction + -- Program Counter without going to emulation. + dbgpc : in std_logic_vector(32-1 downto 0); + + emudata_i : in std_logic_vector(32-1 downto 0); + emudata_o : out std_logic_vector(32-1 downto 0); + emudat_wr : in std_logic; + emudat_rd : in std_logic; + emuir : out std_logic_vector(OPCODE_W-1 downto 0) + ); + end component; + + component ZPU_Small1 is + generic( + WORD_SIZE : natural:=32; -- 32 bits data path + D_CARE_VAL : std_logic:='0'; -- Fill value + CLK_FREQ : positive:=50; -- 50 MHz clock + BRATE : positive:=115200; -- RS232 baudrate + ADDR_W : natural:=16; -- 16 bits address space=64 kB, 32 kB I/O + BRAM_W : natural:=15); -- 15 bits RAM space=32 kB + port( + clk_i : in std_logic; -- CPU clock + rst_i : in std_logic; -- Reset + + -- Emulation pins: + emureq_i : in std_logic; + emuexec_i : in std_logic; + emuack_o : out std_logic; + emurdy_o : out std_logic; + emuir : in std_logic_vector(OPCODE_W-1 downto 0); + + break_o : out std_logic; -- Break executed + dbg_o : out zpu_dbgo_t; -- Debug info + rs232_tx_o : out std_logic; -- UART Tx + rs232_rx_i : in std_logic); -- UART Rx + end component ZPU_Small1; + + signal clk : std_logic; + signal reset : std_logic:='1'; + + signal emureq : std_logic := '0'; + signal emuexec : std_logic := '0'; + signal emuack : std_logic; + signal emurdy : std_logic := '0'; + signal emuir : std_logic_vector(OPCODE_W-1 downto 0); + + signal break : std_logic := '0'; + signal dbg : zpu_dbgo_t; -- Debug info + signal rs232_tx : std_logic; + signal rs232_rx : std_logic; + + + -- Auxiliary signals + signal terminate : std_logic := '0'; + signal mismatch : std_logic := '0'; + signal finish : std_logic; + signal save_sp : unsigned(31 downto 0); + +begin + + zpu : ZPU_Small1 + generic map( + WORD_SIZE => WORD_SIZE, D_CARE_VAL => D_CARE_VAL, + CLK_FREQ => CLK_FREQ, BRATE => BRATE, ADDR_W => ADDR_W, + BRAM_W => BRAM_W) + port map( + clk_i => clk, rst_i => reset, rs232_tx_o => rs232_tx, + emureq_i => emureq, emuexec_i => emuexec, + emuack_o => emuack, emurdy_o => emurdy, + emuir => emuir, + rs232_rx_i => rs232_rx, break_o => break, dbg_o => dbg); + + trace_mod : Trace + generic map( + ADDR_W => ADDR_W, WORD_SIZE => WORD_SIZE, + LOG_FILE => "dbg_small1_trace.log") + port map( + clk_i => clk, dbg_i => dbg, emu_i => emuack, stop_i => '0', + busy_i => '0'); + + do_clock: + process + begin + clk <= '0'; + wait for CLK_S_PER; + clk <= '1'; + wait for CLK_S_PER; + if finish='1' then + print("* Finish asserted, end of test"); + if terminate = '1' then + print("* Reason: Terminate"); + end if; + if mismatch = '1' then + print("* Reason: Mismatch"); + end if; + if break = '1' then + print("* Reason: Breakpoint"); + end if; + wait; + end if; + end process do_clock; + + do_reset: + process + begin + wait until rising_edge(clk); + reset <= '0'; + end process do_reset; + +do_emulation: + process + procedure execute_opcode( + code: unsigned(OPCODE_W-1 downto 0) + ) + is begin + emuir <= std_logic_vector(code); + + wait until rising_edge(clk); + emuexec <= '1'; + wait until rising_edge(clk); + emuexec <= '0'; + wait for 200ns; + end execute_opcode; + + procedure push_imm32( + imm: unsigned(31 downto 0) + ) + is begin + execute_opcode(OPCODE_IM & "000" & imm(31 downto 28)); + execute_opcode(OPCODE_IM & imm(27 downto 21)); + execute_opcode(OPCODE_IM & imm(20 downto 14)); + execute_opcode(OPCODE_IM & imm(13 downto 7)); + execute_opcode(OPCODE_IM & imm(6 downto 0)); + end push_imm32; + + procedure getsp + is begin + execute_opcode(OPCODE_SHORT & OPCODE_PUSHSP); -- Restore context + execute_opcode(OPCODE_SHORT & OPCODE_NOP); + print("sp: " & hstr(dbg.stk_a)); + -- execute_opcode(OPCODE_LOADSP & '1' & x"1"); -- Restore context + execute_opcode(OPCODE_SHORT & OPCODE_POPSP); -- Restore context + end getsp; + + procedure mem_read( + addr: unsigned(31 downto 0) + ) + is begin + execute_opcode(OPCODE_SHORT & OPCODE_PUSHSP); -- Restore context + -- Save current SP for for reference. Note the dbg.sp is not yet + -- updated to the above command. + save_sp <= dbg.sp; + push_imm32(addr); + execute_opcode(OPCODE_SHORT & OPCODE_LOAD); -- Load indirect + execute_opcode(OPCODE_SHORT & OPCODE_NOP); + print("value: " & hstr(dbg.stk_a)); + execute_opcode(OPCODE_LOADSP & '1' & x"1"); -- Restore context + execute_opcode(OPCODE_SHORT & OPCODE_POPSP); -- Restore context + execute_opcode(OPCODE_SHORT & OPCODE_NOP); + + if (dbg.sp /= save_sp) then + mismatch <= '1'; + print("* ERROR: Stack pointers don't match"); + print("sp: " & hstr(dbg.sp) & " SAVE_SP: " & hstr(save_sp)); + end if; + + + end mem_read; + + begin + wait for 400ns; + emuir <= "00001011"; -- NOP + -- emureq <= '1'; + + -- It is IMPORTANT to wait after an emu request + wait until emurdy = '1' and break ='1'; + wait for 100ns; + +---------------------------------------------------------------------------- + + -- Single stepping: + emureq <= '1'; + wait for 100ns; + + for i in 0 to 200 loop + getsp; + execute_opcode(OPCODE_SHORT & OPCODE_LEAVE_EMULATION); + end loop; + + +---------------------------------------------------------------------------- + -- Save context here: + + execute_opcode(OPCODE_SHORT & OPCODE_PUSHSP); -- Restore context + -- Save current SP for for reference. Note the dbg.sp is not yet + -- updated to the above command. + save_sp <= dbg.sp; + + -- Now do your stuff and count the pushes, including the above + + -- MEMORY READ { + + push_imm32(x"000008d8"); + + -- execute_opcode(OPCODE_LOADSP & '1' & x"0"); + execute_opcode(OPCODE_SHORT & OPCODE_LOAD); -- Load indirect + + execute_opcode(OPCODE_SHORT & OPCODE_NOP); + print("value: " & hstr(dbg.stk_a)); + + -- Now we should see the data from the address above in + -- dbg.stk_a + -- } + + + -- Restore old stack: + + -- RESTORE + -- execute_opcode(OPCODE_SHORT & OPCODE_PUSHSP); + -- Fix up stack: + -- execute_opcode(OPCODE_IM & "000" & x"8"); + -- execute_opcode(OPCODE_SHORT & OPCODE_ADD); + execute_opcode(OPCODE_LOADSP & '1' & x"1"); -- Restore context + execute_opcode(OPCODE_SHORT & OPCODE_POPSP); -- Restore context + -- push_imm32(save_sp); + + -- Need one NOP to update dbg.sp (for sanity check): + execute_opcode(OPCODE_SHORT & OPCODE_NOP); + + if (dbg.sp /= save_sp) then + mismatch <= '1'; + print("* MEM_READ ERROR: Stack pointers don't match"); + print("sp: " & hstr(dbg.sp) & " SAVE_SP: " & hstr(save_sp)); + end if; + +---------------------------------------------------------------------------- +-- MEMORY WRITE + execute_opcode(OPCODE_SHORT & OPCODE_PUSHSP); -- Restore context + -- Save current SP for for reference. Note the dbg.sp is not yet + -- updated to the above command. + save_sp <= dbg.sp; + + push_imm32(x"deadbeef"); + execute_opcode(OPCODE_SHORT & OPCODE_NOP); + push_imm32(x"000008d8"); + execute_opcode(OPCODE_SHORT & OPCODE_STORE); -- Store indirect + + execute_opcode(OPCODE_LOADSP & '1' & x"0"); -- Restore context + execute_opcode(OPCODE_SHORT & OPCODE_POPSP); -- Restore context + + -- Need one NOP to update dbg.sp (for sanity check): + execute_opcode(OPCODE_SHORT & OPCODE_NOP); + + if (dbg.sp /= save_sp) then + mismatch <= '1'; + print("* MEM_WRITE ERROR: Stack pointers don't match"); + print("sp: " & hstr(dbg.sp) & " SAVE_SP: " & hstr(save_sp)); + end if; + +---------------------------------------------------------------------------- +-- VERIFY: + mem_read(x"000008d8"); + +---------------------------------------------------------------------------- +-- SET PC + push_imm32(x"00000000"); + execute_opcode(OPCODE_SHORT & OPCODE_POPPC); -- Restore context +---------------------------------------------------------------------------- + + wait for 100ns; + execute_opcode(OPCODE_SHORT & OPCODE_NOP); + getsp; + + terminate <= '1'; + wait; + + + end process; + + +finish <= mismatch or terminate; -- or break; + +end architecture Behave; -- Entity: HWDbg_Small1_TB diff --git a/zpu/hdl/zealot/zpu_pkg.vhdl b/zpu/hdl/zealot/zpu_pkg.vhdl index 2a15880..751d825 100644 --- a/zpu/hdl/zealot/zpu_pkg.vhdl +++ b/zpu/hdl/zealot/zpu_pkg.vhdl @@ -56,6 +56,7 @@ package zpupkg is sp : unsigned(31 downto 0); stk_a : unsigned(31 downto 0); stk_b : unsigned(31 downto 0); + idim : std_logic; -- Debugging: idim flag end record; component Trace is @@ -66,6 +67,7 @@ package zpupkg is port( clk_i : in std_logic; dbg_i : in zpu_dbgo_t; + emu_i : in std_logic; stop_i : in std_logic; busy_i : in std_logic ); @@ -81,6 +83,15 @@ package zpupkg is clk_i : in std_logic; -- System Clock reset_i : in std_logic; -- Synchronous Reset interrupt_i : in std_logic; -- Interrupt + + -- Emulation pins: + emureq_i : in std_logic; + emuexec_i : in std_logic; -- exec pulse. 1 clk cycle wide! + emuack_o : out std_logic; + emurdy_o : out std_logic; + pulse_o : out std_logic; -- Debug pulse for event counter + emuir : in std_logic_vector(OPCODE_W-1 downto 0); + break_o : out std_logic; -- Breakpoint opcode executed dbg_o : out zpu_dbgo_t; -- Debug outputs (i.e. trace log) -- BRAM (text, data, bss and stack) diff --git a/zpu/hdl/zealot/zpu_small.vhdl b/zpu/hdl/zealot/zpu_small.vhdl index 7e022d4..cf4e189 100644 --- a/zpu/hdl/zealot/zpu_small.vhdl +++ b/zpu/hdl/zealot/zpu_small.vhdl @@ -60,6 +60,14 @@ entity ZPUSmallCore is clk_i : in std_logic; -- System Clock reset_i : in std_logic; -- Synchronous Reset interrupt_i : in std_logic; -- Interrupt + -- Emulation pins: + emureq_i : in std_logic; -- Emulation request from TAP + emuexec_i : in std_logic; -- exec pulse. 1 clk cycle wide! + emuack_o : out std_logic; -- Emulation ACK to TAP + emurdy_o : out std_logic; -- Emulation ready + pulse_o : out std_logic; -- Debug pulse for event counter + emuir : in std_logic_vector(OPCODE_W-1 downto 0); + break_o : out std_logic; -- Breakpoint opcode executed dbg_o : out zpu_dbgo_t; -- Debug outputs (i.e. trace log) -- BRAM (text, data, bss and stack) @@ -95,6 +103,8 @@ architecture Behave of ZPUSmallCore is signal sp_r : unsigned(MAX_ADDR_BIT downto BYTE_BITS):=SP_START; signal idim_r : std_logic:='0'; + signal idim_save_r : std_logic; + -- BRAM (text, data, bss and stack) -- a_r is a register for the top of the stack [SP] -- Note: as this is a stack CPU this is a very important register. @@ -110,7 +120,7 @@ architecture Behave of ZPUSmallCore is -- State machine. type state_t is (st_fetch, st_write_io_done, st_execute, st_add, st_or, st_and, st_store, st_read_io, st_write_io, st_fetch_next, - st_add_sp, st_decode, st_resync); + st_add_sp, st_decode, st_resync, st_emulation); signal state : state_t:=st_resync; -- Decoded Opcode @@ -124,6 +134,13 @@ architecture Behave of ZPUSmallCore is signal opcode : unsigned(OPCODE_W-1 downto 0); -- Decoded signal opcode_r : unsigned(OPCODE_W-1 downto 0); -- Registered + -- '1' when we are in IC emulation + signal in_emu : std_logic := '0'; + signal break : std_logic := '0'; -- emulation cause: breakpoint + signal ready : std_logic := '0'; + signal exec : std_logic := '0'; -- Exec strobe + signal reset_exec : std_logic := '0'; -- exec pulse reset + -- IRQ flag signal in_irq_r : std_logic:='0'; -- I/O space address @@ -142,20 +159,26 @@ begin ------------------------- -- Note: We use Port B memory to fetch the opcodes. decode_control: - process(b_i, pc_r) + process(b_i, pc_r, in_emu, exec, emuir) variable topcode : unsigned(OPCODE_W-1 downto 0); begin - -- Select the addressed byte inside the fetched word - case (to_integer(pc_r(BYTE_BITS-1 downto 0))) is - when 0 => - topcode:=b_i(31 downto 24); - when 1 => - topcode:=b_i(23 downto 16); - when 2 => - topcode:=b_i(15 downto 8); - when others => -- 3 - topcode:=b_i(7 downto 0); - end case; + -- When in emulation, get opcode from emuir + if in_emu = '1' and exec = '1' then + topcode := unsigned(emuir); + else + -- Select the addressed byte inside the fetched word + case (to_integer(pc_r(BYTE_BITS-1 downto 0))) is + when 0 => + topcode:=b_i(31 downto 24); + when 1 => + topcode:=b_i(23 downto 16); + when 2 => + topcode:=b_i(15 downto 8); + when others => -- 3 + topcode:=b_i(7 downto 0); + end case; + end if; + opcode <= topcode; if (topcode(7 downto 7)=OPCODE_IM) then @@ -192,19 +215,32 @@ begin d_opcode <= dec_store; when OPCODE_POPSP => d_opcode <= dec_pop_sp; + -- when OPCODE_POPINT => -- Used to return from emulation + -- d_opcode <= dec_emuleave; when others => -- OPCODE_NOP and others d_opcode <= dec_nop; end case; end if; end process decode_control; +trigger_exec: + process (clk_i, reset_exec) + begin + if rising_edge(clk_i) then + if emuexec_i = '1' then + exec <= '1'; + elsif reset_exec = '1' then + exec <= '0'; + end if; + end if; + end process; + data_o <= b_i; opcode_control: process (clk_i) variable sp_offset : unsigned(4 downto 0); begin if rising_edge(clk_i) then - break_o <= '0'; write_en_o <= '0'; read_en_o <= '0'; dbg_o.b_inst <= '0'; @@ -238,6 +274,9 @@ begin if interrupt_i='0' then in_irq_r <= '0'; -- no longer in an interrupt end if; + + + reset_exec <= '0'; case state is when st_execute => @@ -245,7 +284,9 @@ begin -- At this point: -- b_i contains opcode word -- a_i contains top of stack - pc_r <= pc_r+1; + if in_emu ='0' then + pc_r <= pc_r+1; + end if; -- Debug info (Trace) dbg_o.b_inst <= '1'; @@ -256,6 +297,7 @@ begin dbg_o.sp(MAX_ADDR_BIT downto BYTE_BITS) <= sp_r; dbg_o.stk_a <= a_i; dbg_o.stk_b <= b_i; + dbg_o.idim <= idim_r; -- During the next cycle we'll be reading the next opcode sp_offset(4):=not opcode_r(4); @@ -324,8 +366,20 @@ begin b_addr_r <= sp_r+sp_offset; state <= st_add_sp; when dec_break => - --report "Break instruction encountered" severity failure; - break_o <= '1'; + -- Hit breakpoint, enter emulation + if in_emu = '0' then + in_emu <= '1'; + break <= '1'; + idim_save_r <= idim_r; -- save idim flag + state <= st_emulation; + else + -- Leave emulation: + idim_r <= idim_save_r; -- restore idim flag + break <= '0'; + in_emu <= '0'; + b_addr_r <= pc_r(MAX_ADDR_BIT downto BYTE_BITS); + state <= st_fetch_next; + end if; when dec_push_sp => -- Push(SP) sp_r <= sp_r-1; @@ -413,6 +467,13 @@ begin -- we'll fetch the opcode @ pc and thus it will -- be available for st_execute the cycle after -- next + + -- If we just entered emulation, save idim flag + -- and mark we're in emulation. + if emureq_i = '1' and in_emu = '0' then + in_emu <= '1'; + idim_save_r <= idim_r; -- save idim flag + end if; b_addr_r <= pc_r(MAX_ADDR_BIT downto BYTE_BITS); state <= st_fetch_next; when st_fetch_next => @@ -423,8 +484,12 @@ begin a_addr_r <= sp_r; b_addr_r <= sp_r+1; state <= st_decode; + reset_exec <= '1'; when st_decode => - if interrupt_i='1' and in_irq_r='0' and idim_r='0' then + state <= st_execute; + if in_emu = '1' then + state <= st_emulation; + elsif interrupt_i='1' and in_irq_r='0' and idim_r='0' then -- We got an interrupt, execute interrupt instead of next instruction in_irq_r <= '1'; d_opcode_r <= dec_interrupt; @@ -432,7 +497,6 @@ begin -- during the st_execute cycle we'll be fetching SP+1 a_addr_r <= sp_r; b_addr_r <= sp_r+1; - state <= st_execute; when st_store => sp_r <= sp_r+1; a_we_r <= '1'; @@ -459,6 +523,15 @@ begin when st_resync => a_addr_r <= sp_r; state <= st_fetch; + when st_emulation => + a_addr_r <= sp_r; + b_addr_r <= sp_r+1; + + if exec = '1' then + state <= st_execute; + else + state <= st_emulation; + end if; when others => null; end case; @@ -467,5 +540,14 @@ begin end process opcode_control; addr_o <= addr_r; +-- Emulation flag export: + + ready <= '1' when state = st_emulation else '0'; + emuack_o <= in_emu; + emurdy_o <= ready and not exec; + break_o <= break; + pulse_o <= exec; + + end architecture Behave; -- Entity: ZPUSmallCore diff --git a/zpu/sw/emulation/Makefile b/zpu/sw/emulation/Makefile new file mode 100644 index 0000000..60e72d1 --- /dev/null +++ b/zpu/sw/emulation/Makefile @@ -0,0 +1,31 @@ +# JTAG / shifter real life test bench code +# (c) 2005-2011, Martin Strubel +# + +NETPP = $(HOME)/src/netpp +BFEMU = $(HOME)/src/blackfin/bfemu + +XSL = tapreg.xsl +# TAP register definitions: +TAPDEF = $(HOME)/src/vhdl/tap/tap.xml + +CFLAGS = -I$(BFEMU) -g -Wall + +DUTIES = libzpuemu.a + +all: $(DUTIES) + +tap.h: $(TAPDEF) $(XSL) + xsltproc -o $@ $(XSL) $< + +zpu-tap.h: $(TAPDEF) $(XSL) + xsltproc -o $@ --param selectDevice 2 $(XSL) $< + +LIBOBJS = zpuemu.o + +libzpuemu.a: $(LIBOBJS) + $(AR) ruv $@ $(LIBOBJS) + + +clean: + rm -f *.o $(DUTIES) diff --git a/zpu/sw/emulation/README b/zpu/sw/emulation/README new file mode 100644 index 0000000..2e9e126 --- /dev/null +++ b/zpu/sw/emulation/README @@ -0,0 +1,12 @@ +Simple atomic I/O emulation library layer for ZPU [DRAFT] +---------------------------------------------------------------------------- +08/2011 Martin Strubel + +Brief: +- Supports basic I/O set for debugging +- Depends on a JTAG library defining a few simple commands +- Not yet generic enough +- Should in future take care of different ZPU implementations, + optimizations and debug interfaces. Currently, it supports the ICEbearPlus + JTAG library only. +- TBD: Indirect JTAG interface diff --git a/zpu/sw/emulation/tap.h b/zpu/sw/emulation/tap.h new file mode 100644 index 0000000..ae3ac27 --- /dev/null +++ b/zpu/sw/emulation/tap.h @@ -0,0 +1,41 @@ +/************************************************************************** + * + * + * This file was generated by dclib/netpp. Modifications to this file will + * be lost. + * Stylesheet: genreg.xsl (c) 2010-2011 section5 + * + * Version: 0.0 + **************************************************************************/ + + + +#ifndef _BITMASK_ +#define _BITMASK_(msb, lsb) ( (-1 << (msb + 1)) ^ (-1 << lsb) ) +#endif +#ifndef _BIT_ +#define _BIT_(pos) (1 << pos) +#endif + + + +/********************************************************* + * Address segment 'TAP' + *********************************************************/ + +#define TAP_IDCODE 0x01 +#define TAP_EMUDATA 0x02 +#define TAP_EMUCTRL 0x03 +# define EMUREQ _BIT_(0) +# define CORE_RESET _BIT_(15) +#define TAP_EMUSTAT 0x04 +# define EMUACK _BIT_(0) +# define EMURDY _BIT_(1) +# define EMUDAT_UNR _BIT_(2) +# define EMUDAT_OVR _BIT_(3) +# define CORE_SPEC _BITMASK_(15, 8) +# define CORE_SPEC_SHFT 8 +#define TAP_EMUIR 0x05 +#define TAP_DBGPC 0x08 +#define TAP_COUNT1 0x0c +#define TAP_COUNT2 0x0d diff --git a/zpu/sw/emulation/zpu-opcodes.h b/zpu/sw/emulation/zpu-opcodes.h new file mode 100644 index 0000000..6004319 --- /dev/null +++ b/zpu/sw/emulation/zpu-opcodes.h @@ -0,0 +1,26 @@ +/** \file zpu-opcodes.h + * + * Basic ZPU opcode definitions + * + * 2011, + * + */ + +/** This is also the opcode for leaving emulation */ +#define OPCODE_BREAK 0x00 + +/* ZPU basic opcodes that are supported by emulation */ + +#define OPCODE_IM 0x80 +#define OPCODE_NOP 0x0b +#define OPCODE_LOAD 0x08 +#define OPCODE_STORE 0x0c +#define OPCODE_LOADSP 0x60 +// Dunno why, but this bit wants to be inverted in the offset field: +# define LOADSP_INV 0x10 +#define OPCODE_PUSHSP 0x02 +#define OPCODE_POPSP 0x0d +#define OPCODE_POPPC 0x04 + +/* Special opcode: Leave emulation */ +#define OPCODE_EMULEAVE OPCODE_BREAK diff --git a/zpu/sw/emulation/zpu-tap.h b/zpu/sw/emulation/zpu-tap.h new file mode 100644 index 0000000..176c4dc --- /dev/null +++ b/zpu/sw/emulation/zpu-tap.h @@ -0,0 +1,30 @@ +/************************************************************************** + * + * + * This file was generated by dclib/netpp. Modifications to this file will + * be lost. + * Stylesheet: genreg.xsl (c) 2010-2011 section5 + * + * Version: 0.0 + **************************************************************************/ + + + +#ifndef _BITMASK_ +#define _BITMASK_(msb, lsb) ( (-1 << (msb + 1)) ^ (-1 << lsb) ) +#endif +#ifndef _BIT_ +#define _BIT_(pos) (1 << pos) +#endif + + + +/********************************************************* + * Address segment 'ZPUsmall' + *********************************************************/ + +#define ZPUsmall_EMUSTAT_DUMMY 0x04 +# define ZPU_IDIM _BIT_(15) +# define ZPU_BREAK _BIT_(14) +# define ZPU_MEMBUSY _BIT_(9) +# define ZPU_INRESET _BIT_(8) diff --git a/zpu/sw/emulation/zpuemu.c b/zpu/sw/emulation/zpuemu.c new file mode 100644 index 0000000..bf2f50b --- /dev/null +++ b/zpu/sw/emulation/zpuemu.c @@ -0,0 +1,493 @@ +/* ZPU emulation library + * + * (c) 2011, Martin Strubel + * + * Limited functionality: Only one core in chain supported. + * + */ + +// These headers must be implemented by the JTAG interface to your +// HW debug adapter +#include "jtag.h" +#include "jtag_intern.h" + +#include "zpuemu.h" + +#include "zpu-opcodes.h" +#include + + +#define IRSIZE 4 + +static unsigned char +s_reg8[2]; + +static JtagRegister +ir_r = { + .data = &s_reg8[1], + .nbits = IRSIZE, + .flags = JTAGREG_LSB +}; + +static JtagRegister +opcode_r = { + .data = &s_reg8[0], + .nbits = 8, + .flags = JTAGREG_MSB +}; + +static unsigned char +s_reg16[2]; + +static JtagRegister +ctrl_r = { + .data = &s_reg16[0], + .nbits = 16, + .flags = JTAGREG_MSB +}; + +static unsigned char +s_reg32[4]; + +static JtagRegister +data_r = { + .data = &s_reg32[0], + .nbits = 32, + .flags = JTAGREG_MSB +}; + +void select_dr(CpuContext *c, uint8_t dr) +{ + jtag_goto_state(c->jtag, s_jtag_shift_ir); + reg_set(&ir_r, 0, IRSIZE, dr); + shift_generic(c->jtag, &ir_r, NULL, ir_r.nbits, UPDATE); +} + +void shiftout32(CpuContext *c, REGISTER *r, int mode) +{ + jtag_flush(c->jtag); + jtag_goto_state(c->jtag, s_jtag_shift_dr); + shift_generic(c->jtag, &data_r, &data_r, data_r.nbits, mode); + *r = reg_get(&data_r, 0, 32); +} + +void shiftin16(CpuContext *c, REGISTER r, int mode) +{ + jtag_goto_state(c->jtag, s_jtag_shift_dr); + reg_set(&ctrl_r, 0, 16, r); + shift_generic(c->jtag, &ctrl_r, NULL, ctrl_r.nbits, mode); +} + +void shiftout16(CpuContext *c, REGISTER *r, int mode) +{ + jtag_goto_state(c->jtag, s_jtag_shift_dr); + shift_generic(c->jtag, &ctrl_r, &ctrl_r, ctrl_r.nbits, mode); + *r = reg_get(&ctrl_r, 0, 16); +} + +// Auxiliaries + +static +void push_opcode(CpuContext *c, uint8_t opcode, int mode) +{ + opcode_r.data[0] = opcode; + jtag_goto_state(c->jtag, s_jtag_shift_dr); + shift_generic(c->jtag, &opcode_r, NULL, opcode_r.nbits, mode); +} + +#if 0 +static +void push_val16(CpuContext *c, uint16_t val) +{ + int i = 14; + push_opcode(c, OPCODE_IM | ((val >> i) & 0x7f), EXEC); i -= 7; + push_opcode(c, OPCODE_IM | ((val >> i) & 0x7f), EXEC); i -= 7; + push_opcode(c, OPCODE_IM | ((val >> i) & 0x7f), EXEC); + push_opcode(c, OPCODE_NOP, EXEC); +} +#endif + +static +void push_val32(CpuContext *c, uint32_t val) +{ + int i = 28; + push_opcode(c, OPCODE_IM | ((val >> i) & 0x7f), EXEC); i -= 7; + push_opcode(c, OPCODE_IM | ((val >> i) & 0x7f), EXEC); i -= 7; + push_opcode(c, OPCODE_IM | ((val >> i) & 0x7f), EXEC); i -= 7; + push_opcode(c, OPCODE_IM | ((val >> i) & 0x7f), EXEC); i -= 7; + push_opcode(c, OPCODE_IM | ((val >> i) & 0x7f), EXEC); + push_opcode(c, OPCODE_NOP, EXEC); +} + +static +uint32_t mem_read32(CpuContext *c, uint32_t addr) +{ + REGISTER r; + int q = jtag_queue(c->jtag, 1); + select_dr(c, TAP_EMUIR); + push_opcode(c, OPCODE_PUSHSP, EXEC); + push_val32(c, addr); + push_opcode(c, OPCODE_LOAD, EXEC); + push_opcode(c, OPCODE_NOP, EXEC); + select_dr(c, TAP_EMUDATA); + shiftout32(c, &r, UPDATE); // Execute Stack fixup + select_dr(c, TAP_EMUIR); + push_opcode(c, OPCODE_LOADSP | (LOADSP_INV ^ 0x01), EXEC); + push_opcode(c, OPCODE_POPSP, EXEC); + push_opcode(c, OPCODE_NOP, EXEC); + jtag_queue(c->jtag, q); + return r; +} + +// Little read cache: +struct cache { + ADDR addr; + uint32_t val; +} g_cache = { 0xffffffff, 0 }; + +static +uint32_t mem_read32_cached(CpuContext *c, ADDR a) +{ + if (a != g_cache.addr) { + g_cache.val = mem_read32(c, a); + g_cache.addr = a; + // printf("Read %08x\n", g_cache.val); + } + return g_cache.val; +} + +uint8_t mem_read8(CpuContext *c, ADDR a) +{ + uint32_t v; + int shift = (3 - (a & 0x3)) << 3; + a &= ~0x3; + // printf("shift: %d\n", shift); + v = mem_read32_cached(c, a) >> shift; + return v; +} + +uint16_t mem_read16(CpuContext *c, ADDR a) +{ + uint32_t v; + int shift = (2 - (a & 0x2)) << 3; + a &= ~0x3; + // printf("shift: %d\n", shift); + v = mem_read32_cached(c, a) >> shift; + return v; +} + +void mem_write32(CpuContext*c, uint32_t addr, uint32_t val) +{ + // Invalidate cache, when we're writing to the same addr: + if (g_cache.addr == (addr)) { + g_cache.addr = 0xffffffff; + } + select_dr(c, TAP_EMUIR); + push_opcode(c, OPCODE_PUSHSP, EXEC); + push_val32(c, val); + push_val32(c, addr); // Address + push_opcode(c, OPCODE_STORE, EXEC); + push_opcode(c, OPCODE_LOADSP | (LOADSP_INV ^ 0x00), EXEC); + push_opcode(c, OPCODE_POPSP, EXEC); + push_opcode(c, OPCODE_NOP, UPDATE); +} + + +void mem_write16(CpuContext*c, uint32_t addr, uint16_t val) +{ + int shift = (2 - (addr & 0x2)) << 3; + uint32_t v; + + addr &= ~0x3; + v = mem_read32_cached(c, addr); + + v = (v & ~(0xffff << shift)) | (val << shift); + mem_write32(c, addr, v); +} + +void mem_write8(CpuContext *c, uint32_t addr, uint8_t val) +{ + int shift = (3 - (addr & 0x3)) << 3; + uint32_t v; + + addr &= ~0x3; + v = mem_read32_cached(c, addr); + + v = (v & ~(0xff << shift)) | (val << shift); + mem_write32(c, addr, v); +} + +int enter_emulation(CpuContext *c) +{ + REGISTER r; + + g_cache.addr = 0xffffffff; // Invalidate cache + + // puts(">>> Enter emulation"); + select_dr(c, TAP_EMUCTRL); + r = EMUREQ; + shiftin16(c, r, UPDATE); + + return 0; +} + +int leave_emulation(CpuContext *c) +{ + int error = 0; + + // Turn off emulation bit + select_dr(c, TAP_EMUCTRL); + shiftin16(c, 0, UPDATE); + + // Run some emulated opcodes: + // Return from emulation: + select_dr(c, TAP_EMUIR); + push_opcode(c, OPCODE_EMULEAVE, EXEC); + return error; +} + +//////////////////////////////////////////////////////////////////////////// +// API calls + +int zpu_emuinit(CpuContext *c, CONTROLLER jtag) +{ + c->jtag = jtag; + return 0; +} + +int zpu_getid(CpuContext *c, uint32_t *code) +{ + select_dr(c, TAP_IDCODE); + shiftout32(c, code, UPDATE); + return 0; +} + +int zpu_resume(CpuContext *c, int step) +{ + jtag_flush(c->jtag); + if (step) { + select_dr(c, TAP_EMUCTRL); + shiftin16(c, EMUREQ, UPDATE); + select_dr(c, TAP_EMUIR); + push_opcode(c, OPCODE_EMULEAVE, EXEC); + } else { + leave_emulation(c); + } + return 0; +} + +int zpu_emulation(CpuContext *c, int which) +{ + // FIXME: for multicore, this needs to change. + jtag_flush(c->jtag); + if (which) { + enter_emulation(c); + } else { + leave_emulation(c); + } + return 0; +} + +int zpu_state(CpuContext *c, uint16_t *state) +{ + REGISTER r; + int error = 0; + + select_dr(c, TAP_EMUSTAT); + shiftout16(c, &r, UPDATE); + *state = r; + return error; +} + +int zpu_reset(CpuContext *c, int mode) +{ + // TODO: Implement system control register on zealot + return 0; +} + +int zpu_setreg(CpuContext *c, int regno, REGISTER val) +{ + switch (regno) { + case REG_PC: + select_dr(c, TAP_EMUIR); + push_val32(c, val); + push_opcode(c, OPCODE_POPPC, EXEC); + break; + case REG_SP: + select_dr(c, TAP_EMUIR); + push_val32(c, val); + push_opcode(c, OPCODE_POPSP, EXEC); + break; + default: return -1; + } + return 0; +} + +int zpu_getreg(CpuContext *c, int regno, REGISTER *val) +{ + REGISTER r; + int q = jtag_queue(c->jtag, 1); + switch (regno) { + case REG_PC: + // XXX needed to update dbg_o.: + select_dr(c, TAP_EMUIR); // XXX + push_opcode(c, OPCODE_NOP, EXEC); // XXX + select_dr(c, TAP_DBGPC); + shiftout32(c, &r, UPDATE); + break; + case REG_SP: + select_dr(c, TAP_EMUIR); + push_opcode(c, OPCODE_PUSHSP, EXEC); + // XXX needed to update dbg_o.: + push_opcode(c, OPCODE_NOP, EXEC); // XXX + push_opcode(c, OPCODE_POPSP, UPDATE); // queue, exec later + select_dr(c, TAP_EMUDATA); + shiftout32(c, &r, EXEC); // (here) + break; + default: return -1; + } + *val = r; + jtag_queue(c->jtag, q); + return 0; +} + +void zpu_dumpstat(CpuContext *c) +{ + REGISTER r; + + select_dr(c, TAP_EMUSTAT); + shiftout16(c, &r, UPDATE); + printf("EMUSTAT: %04x -", r & 0xffff); + if (r) { + if (r & ZPU_IDIM) printf(" [IDIM]"); + if (r & ZPU_INRESET) printf(" [RESET]"); + if (r & ZPU_BREAK) printf(" [BREAK]"); + if (r & EMUACK) printf(" [EMUACK]"); + if (r & EMURDY) printf(" [EMURDY]"); + if (r & ZPU_MEMBUSY) printf(" [MEM_BUSY]"); + } + printf("\n"); + select_dr(c, TAP_COUNT1); + shiftout32(c, &r, UPDATE); + printf("COUNT1: %012d\n", r); + select_dr(c, TAP_COUNT2); + shiftout16(c, &r, UPDATE); + printf("COUNT2: %08d\n", r); +} + +int guess_access(ADDR addr, unsigned int *count) +{ + int sizecode; + // I/O space wants to be addressed long word wise: + if (addr >= 0x80080000 && *count == 4) { + *count = 1; + return LDST_32; + } + // if we have even addresses and even count, we can + // use word size transfers instead of byte wise. + switch (addr % 4) { + case 0: + switch (*count % 4) { + case 0: + sizecode = LDST_32; + *count /= 4; + break; + case 2: + sizecode = LDST_16; + *count /= 2; + break; + default: + sizecode = LDST_8; + break; + } + break; + case 2: + if (*count % 2 == 0) { + sizecode = LDST_16; + *count /= 2; + } else { + sizecode = LDST_8; + } + break; + default: + sizecode = LDST_8; + } + return sizecode; +} + + +int zpu_mem_read(CpuContext *c, ADDR addr, unsigned int count, + unsigned char *buf) +{ + int sz; + uint32_t v; + int q = jtag_queue(c->jtag, 1); + + sz = guess_access(addr, &count); + + switch (sz) { + case LDST_8: + while (count--) { + *buf++ = mem_read8(c, addr++); + } + break; + case LDST_16: + while (count--) { + v = mem_read16(c, addr); addr += 2; + buf[1] = v; v >>= 8; + buf[0] = v; + buf += 2; + } + break; + case LDST_32: + while (count--) { + v = mem_read32(c, addr); addr += 4; + buf[3] = v; v >>= 8; + buf[2] = v; v >>= 8; + buf[1] = v; v >>= 8; + buf[0] = v; + buf += 4; + } + break; + } + jtag_flush(c->jtag); + jtag_queue(c->jtag, q); + return 0; +} + +int zpu_mem_write(CpuContext *c, ADDR addr, unsigned int count, + const unsigned char *buf) +{ + int sz; + uint32_t v; + int q = jtag_queue(c->jtag, 1); + + sz = guess_access(addr, &count); + + switch (sz) { + case LDST_8: + // XXX: Could be optimized further + while (count--) { + v = *buf++; + mem_write8(c, addr, v); addr++; + } + break; + case LDST_16: + while (count--) { + v = (buf[0] << 8) | buf[1]; + mem_write16(c, addr, v); + addr += 2; buf += 2; + } + break; + case LDST_32: + while (count--) { + v = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3]; + mem_write32(c, addr, v); + addr += 4; buf += 4; + } + break; + } + jtag_flush(c->jtag); + jtag_queue(c->jtag, q); + return 0; +} diff --git a/zpu/sw/emulation/zpuemu.h b/zpu/sw/emulation/zpuemu.h new file mode 100644 index 0000000..5fa14dd --- /dev/null +++ b/zpu/sw/emulation/zpuemu.h @@ -0,0 +1,55 @@ +/* ZPU emulation library header + * + * (c) 2011, Martin Strubel + * + * + */ + + +#include + +#include "zpu-tap.h" +#include "tap.h" + +#define REGISTER uint32_t +#define ADDR uint32_t + +// #define LDST_32 4 +// #define LDST_16 2 +// #define LDST_8 1 + +#define REG_PC 0 +#define REG_SP 1 + +struct _cpu; + +typedef struct _cpu_context { + struct _cpu *cpu; + JTAG_CTRL jtag; // Pointer to used JTAG controller + short id; // identification tag + short flags; // Interrupt flag - did we Ctrl-C ? + short prev_state; // CPU's previous' state (for change detect) + // Dual core stuff +} CpuContext; + +int zpu_emuinit(CpuContext *c, JTAG_CTRL jtag); +int zpu_getid(CpuContext *c, uint32_t *code); +int zpu_emulation(CpuContext *c, int which); +int zpu_resume(CpuContext *c, int step); +int zpu_setreg(CpuContext *c, int regno, REGISTER val); +int zpu_getreg(CpuContext *c, int regno, REGISTER *val); +int zpu_state(CpuContext *c, uint16_t *state); +int zpu_reset(CpuContext *c, int mode); +int zpu_getpc(CpuContext *c, REGISTER *pc); +void zpu_dumpstat(CpuContext *c); + +int zpu_mem_read(CpuContext *c, ADDR addr, unsigned int count, + unsigned char *b); +int zpu_mem_write(CpuContext *c, ADDR addr, unsigned int count, + const unsigned char *b); + +enum { + LDST_32, + LDST_16, + LDST_8, +}; -- cgit v1.1