jtag: Apply Martin Strubel JTAG implementation for ZPUjtagdbg
The current JTAG debugging capable ZPU implementation (VHDL) consists of: - A generic, device independent JTAG module (tck, tms, tdi, tdo, trst) - A TAP module, defining JTAG instruction and data registers - A few control lines to/from the core (request, execute, acknowledge, ready) and: * An emulation instruction register * A data exchange register - An enhanced ZPU small core state machine
diff --git a/zpu/hdl/example/zpuromgen.c b/zpu/hdl/example/zpuromgen.c
index fb8c4ba..1c7f19c 100644
--- a/zpu/hdl/example/zpuromgen.c
+++ b/zpu/hdl/example/zpuromgen.c
@@ -7,6 +7,7 @@
// This software is free to use by anyone for any purpose.
+#include <stdint.h>
#include <unistd.h>
#include <stdio.h>
diff --git a/zpu/hdl/tap/README b/zpu/hdl/tap/README
new file mode 100644
index 0000000..07a1234
--- /dev/null
+++ b/zpu/hdl/tap/README
@@ -0,0 +1,22 @@
+Test Access Port implementation for the ZPU (alpha)
+08/2011 Martin Strubel <>
+- Implements JTAG interface (on FPGA custom pins) for the zealot small core.
+ Zealot medium (and others) under scrutiny.
+- Important: Clock synchronization between TCK and core clock domain has
+ to be taken care of by the parenting module, in particular for the
+ emuexec_i pin. That means: From the emuexec rising edge signal from the TAP
+ you have to create a one core clock cycle wide emuexec_i pulse.
+- If you want to swap the debug interface, just write a new tapxxx.vhd
+- The software debug interface may change, and it may be different for various
+ implementations of the ZPU.
+ Possible solution:
+ * Create and register various ZPU core IDs (IDCODE instruction)
+ * Take care of the ZPU variant in the zpu emulation library:
+ $ZPU/zpu/sw/emulation/
+ * Likewise, handle various debug interfaces (direct/indirect JTAG, etc.)
+ * Implement different debug targets in gdbproxy, callable like
+ 'gdbproxy zpu_<special_interface_name>', and just use different
+ libzpuemu configurations.
diff --git a/zpu/hdl/tap/jtagx.vhd b/zpu/hdl/tap/jtagx.vhd
new file mode 100644
index 0000000..89c2075
--- /dev/null
+++ b/zpu/hdl/tap/jtagx.vhd
@@ -0,0 +1,254 @@
+-- Simple JTAG controller, enhanced version
+-- $Id: jtag.vhdl 35 2008-09-05 23:31:00Z strubi $
+-- (c) 2005, 2006, 2011
+-- Martin Strubel // <>
+-- Functionality:
+-- This module implements a JTAG controller with a instruction register (IR)
+-- and a data register (DR).
+-- Data is clocked into the IR register MSB first,
+-- into the DR register LSB first.
+-- The reason for this inconsistent behaviour is, that this controller
+-- allows variable sizes of data registers, depending on the IR value.
+-- (Actually, the Blackfin CPU JTAG controller does it the same odd way)
+-- The IR and DR register size is specified in the parameters:
+-- IRSIZE (default 4)
+-- DRSIZE (default 8)
+-- All special functionality must be encoded outside this module, using
+-- the IR values.
+-- There is one exception: The Instruction "1111" is reserved for the
+-- IR_BYPASS mode. In this mode, the TDI bit is passed onto TDO with a delay
+-- of one bit, according to the JTAG standard.
+-- The design is tested using the JTAG library coming with the ICEbear
+-- USB JTAG adapter.
+library ieee;
+use ieee.std_logic_1164.all;
+use IEEE.std_logic_unsigned.all;
+use IEEE.numeric_std.all; -- TO_INTEGER
+library work;
+use work.jtag.all;
+entity JtagController is
+ generic (IRSIZE : natural := 4;
+ DRSIZE : natural := 8);
+ port (
+ tck, -- Tap Clock
+ trst, -- Tap Reset
+ tms, -- Tap mode select
+ tdi : in std_logic; -- Tap data in
+ tdo : out std_logic; -- Tap data out
+ state : out jtag_state_type; -- JTAG machine state
+-- Data register input:
+ dr_in : in std_logic_vector (DRSIZE-1 downto 0);
+-- Configureable DR size:
+ msbpos : in bitpos_type;
+-- Data register output:
+ dr_out : out std_logic_vector (DRSIZE-1 downto 0);
+-- Instruction register:
+ ir_out : out std_logic_vector (IRSIZE-1 downto 0)
+ );
+end JtagController;
+architecture behaviour of JtagController is
+-- The only fixed instruction: All ones. Reserved for bypassing.
+ constant IR_BYPASS : std_logic_vector (IRSIZE-1 downto 0) :=
+ (others => '1');
+ signal mystate : jtag_state_type := TEST_LOGIC_RESET;
+ signal next_state : jtag_state_type;
+ signal s_dr : std_logic_vector (DRSIZE-1 downto 0);
+ signal s_ir : std_logic_vector (IRSIZE-1 downto 0) :=
+ (others => '1');
+ signal msb : bitpos_type;
+ -- Disabled: Buffered register
+ -- signal ir : std_logic_vector (IRSIZE-1 downto 0);
+ process (mystate, tms)
+ begin
+ case mystate is
+ when CAPTURE_DR =>
+ if (tms = '1') then
+ next_state <= EXIT1_DR;
+ else
+ next_state <= SHIFT_DR;
+ end if;
+ when CAPTURE_IR =>
+ if (tms = '1') then
+ next_state <= EXIT1_IR;
+ else
+ next_state <= SHIFT_IR;
+ end if;
+ when EXIT1_DR =>
+ if (tms = '1') then
+ next_state <= UPDATE_DR;
+ else
+ next_state <= PAUSE_DR;
+ end if;
+ when EXIT1_IR =>
+ if (tms = '1') then
+ next_state <= UPDATE_IR;
+ else
+ next_state <= PAUSE_IR;
+ end if;
+ when EXIT2_DR =>
+ if (tms = '1') then
+ next_state <= UPDATE_DR;
+ else
+ next_state <= SHIFT_DR;
+ end if;
+ when EXIT2_IR =>
+ if (tms = '1') then
+ next_state <= UPDATE_IR;
+ else
+ next_state <= SHIFT_IR;
+ end if;
+ when PAUSE_DR =>
+ if (tms = '1') then
+ next_state <= EXIT2_DR;
+ else
+ next_state <= PAUSE_DR;
+ end if;
+ when PAUSE_IR =>
+ if (tms = '1') then
+ next_state <= EXIT2_IR;
+ else
+ next_state <= PAUSE_IR;
+ end if;
+ when RUN_TEST_IDLE =>
+ if (tms = '1') then
+ next_state <= SELECT_DR;
+ else
+ next_state <= RUN_TEST_IDLE;
+ end if;
+ when SELECT_DR =>
+ if (tms = '1') then
+ next_state <= SELECT_IR;
+ else
+ next_state <= CAPTURE_DR;
+ end if;
+ when SELECT_IR =>
+ if (tms = '1') then
+ next_state <= TEST_LOGIC_RESET;
+ else
+ next_state <= CAPTURE_IR;
+ end if;
+ when SHIFT_DR =>
+ if (tms = '1') then
+ next_state <= EXIT1_DR;
+ else
+ next_state <= SHIFT_DR;
+ end if;
+ when SHIFT_IR =>
+ if (tms = '1') then
+ next_state <= EXIT1_IR;
+ else
+ next_state <= SHIFT_IR;
+ end if;
+ if (tms = '1') then
+ next_state <= TEST_LOGIC_RESET;
+ else
+ next_state <= RUN_TEST_IDLE;
+ end if;
+ when UPDATE_DR =>
+ if (tms = '1') then
+ next_state <= SELECT_DR;
+ else
+ next_state <= RUN_TEST_IDLE;
+ end if;
+ when UPDATE_IR =>
+ if (tms = '1') then
+ next_state <= SELECT_DR;
+ else
+ next_state <= RUN_TEST_IDLE;
+ end if;
+ when others =>
+ end case;
+ end process;
+-- When we're in BYPASS, use MSB 0
+ msb <= 0 when s_ir = IR_BYPASS else msbpos;
+ process (mystate, s_ir, s_dr)
+ begin
+ case mystate is
+ when SHIFT_IR =>
+ tdo <= s_ir(0); -- Shift out LSB
+ when SHIFT_DR =>
+ tdo <= s_dr(msb); -- Take MSB
+ when others =>
+ tdo <= '1';
+ end case;
+ end process;
+ process (tck, trst)
+ begin
+ if (trst = '0') then
+ mystate <= TEST_LOGIC_RESET;
+ elsif rising_edge(tck) then
+ mystate <= next_state; -- Advance to next state
+ end if;
+ end process;
+ process (tck)
+ begin
+ if rising_edge(tck) then
+-- takes effect when entering the concerning state
+ case next_state is
+ -- When resetting, go into BYPASS mode
+ s_ir <= (others => '1');
+ s_dr <= (others => '0');
+ when others =>
+ end case;
+-- Mystate is the current state, process takes effect on rising TCK when IN
+-- the concerning state.
+ case mystate is
+ when SHIFT_IR =>
+ s_ir <= tdi & s_ir(IRSIZE-1 downto 1); -- Shift in from MSB
+ when SHIFT_DR =>
+ s_dr <= s_dr(DRSIZE-2 downto 0) & tdi; -- likewise from LSB
+ when CAPTURE_DR =>
+-- We could move this BYPASS check to a higher level module. But since
+-- it's a reserved command, we leave it in here.
+ if (s_ir /= IR_BYPASS) then
+ s_dr <= dr_in; -- Latch!
+ end if;
+ when others =>
+ end case;
+ end if;
+ end process;
+ -- always assign state to output
+ -- We assign nextstate which is valid on the rising_edge of tck
+ state <= next_state;
+ ir_out <= s_ir;
+ dr_out <= s_dr;
+end behaviour;
diff --git a/zpu/hdl/tap/tap.vhd b/zpu/hdl/tap/tap.vhd
new file mode 100644
index 0000000..41dc07b
--- /dev/null
+++ b/zpu/hdl/tap/tap.vhd
@@ -0,0 +1,217 @@
+-- Example Test Access Port (TAP) controller implementation
+-- (c) 2005-2011 Martin Strubel <>
+-- General behaviour summary:
+-- * JtagController entity decodes the TMS and TDI sequences
+-- * IR and DR are decoded by this TAP controller
+-- * Signals to a CPU core are generated by the TAP. See
+-- "Core emulation signals" below. These are mapped into a control register:
+-- emuctl [W]
+-- * Signals from a CPU core are mapped into a status register:
+-- emustat [R]
+-- Note: Fixed size IR register is clocked in LSB first,
+-- Variable size DR register is clocked in MSB first.
+-- The default EMUIR value on a state machine reset is set to
+-- INS_NOP, which should be defined properly by the parent module.
+-- This is necessary for the core to not run any spurious command when
+-- a breakpoint was hit and the JTAG state machine enters Run-Test-Idle.
+-- This TAP is supported by a generic software library as part of the
+-- ICEbear software suite.
+library ieee;
+use ieee.std_logic_1164.all;
+use ieee.numeric_std.all;
+library work;
+use work.jtag.all;
+-- TAP register definitions (generated):
+use work.tap_registers.all;
+entity tap is
+ generic (EMUDAT_SIZE : natural := 32;
+ EMUIR_SIZE : natural := 8;
+ IDCODE : std_logic_vector(32-1 downto 0) := x"deadbeef";
+ INS_NOP : std_logic_vector(8-1 downto 0) := x"00");
+ port (
+ -- JTAG signals:
+ tck, trst, tms, tdi : in std_logic;
+ tdo : out std_logic;
+ -- Core <-> TAP signals:
+ core_reset : out std_logic; -- Reset core logic
+ emuexec : out std_logic; -- Execute opcode on rising edge
+ emurequest : out std_logic; -- Emulation request to core
+ emuack : in std_logic; -- Core has acknowledged EMULATION request
+ emurdy : in std_logic; -- Core ready to execute next instruction
+ pulse : in std_logic; -- Pulse event counter
+ -- PC of possibly running core. Allows to access PC without
+ -- entering emulation.
+ dbgpc : in std_logic_vector(EMUDAT_SIZE-1 downto 0); -- PC
+ -- Extra status bits, core dependent
+ exstat : in std_logic_vector(7 downto 0);
+ emudata_i : in std_logic_vector(EMUDAT_SIZE-1 downto 0);
+ emudata_o : out std_logic_vector(EMUDAT_SIZE-1 downto 0);
+ -- Not implemented in this version
+ -- emudat_wr : in std_logic;
+ -- emudat_rd : in std_logic;
+ emuir : out std_logic_vector(EMUIR_SIZE-1 downto 0)
+ );
+end tap;
+architecture behaviour of tap is
+ -- Use generated registers from tap_pkg.vhd
+ -- Note: all ones is always reserved for BYPASS
+ -- all zeros is normally reserved for EXTEST
+ signal jtag_state : jtag_state_type;
+ signal exec : std_logic := '0';
+ -- Emulation control/status registers:
+ signal emustat : std_logic_vector(16-1 downto 0);
+ signal emuctl : std_logic_vector(16-1 downto 0) := x"0000";
+ -- Core emulation signals:
+ signal dr_in : std_logic_vector(EMUDAT_SIZE-1 downto 0);
+ signal dr_out : std_logic_vector(EMUDAT_SIZE-1 downto 0);
+ signal count1 : unsigned(32-1 downto 0);
+ signal count1_reset : std_logic;
+ signal count2 : unsigned(16-1 downto 0);
+ signal ir : std_logic_vector(4-1 downto 0);
+ -- Position of MSB of data register when not in BYPASS
+ -- (defines length of DR register). See also jtag_config.vhd
+ signal msbpos : bitpos_type := 31;
+ -- Emulation auxiliaries:
+ -- Emulation data register for exchange between core and JTAG:
+ signal emudat_i : std_logic_vector(EMUDAT_SIZE-1 downto 0);
+ signal emudat_o : std_logic_vector(EMUDAT_SIZE-1 downto 0);
+-- These signals are just stubs. Not implemented in this version.
+ -- signal emudat_rxf : std_logic; -- Receive full
+ -- signal emudat_txe : std_logic := '0'; -- Transmit empty
+ -- signal emudat_ovr : std_logic := '0'; -- Overrun
+ -- signal emudat_unr : std_logic := '0'; -- Underrun
+i_jtag : JtagController
+ port map (
+ tck => tck,
+ tms => tms,
+ tdi => tdi,
+ tdo => tdo,
+ trst => trst,
+ state => jtag_state,
+ dr_out => dr_out,
+ msbpos => msbpos,
+ dr_in => dr_in,
+ ir_out => ir
+ );
+-- Select DR register according to supported IRs
+-- We sample this with tck to avoid gated clock issues
+ process (tck)
+ begin
+ if rising_edge(tck) then
+ case ir is
+ when TAP_IDCODE =>
+ dr_in <= IDCODE;
+ msbpos <= 31;
+ when TAP_EMUDATA =>
+ dr_in <= emudata_i;
+ msbpos <= emudata_i'length - 1;
+ when TAP_EMUSTAT =>
+ dr_in(emustat'length-1 downto 0) <= emustat;
+ msbpos <= emustat'length - 1;
+ when TAP_DBGPC =>
+ dr_in <= dbgpc;
+ msbpos <= dbgpc'length - 1;
+ when TAP_COUNT1 =>
+ dr_in(count1'length-1 downto 0) <= std_logic_vector(count1);
+ msbpos <= count1'length - 1;
+ when TAP_COUNT2 =>
+ dr_in(count2'length-1 downto 0) <= std_logic_vector(count2);
+ msbpos <= count2'length - 1;
+ when others =>
+ dr_in <= (others => 'X');
+ msbpos <= 31;
+ end case;
+ end if;
+ end process;
+-- NOTE: action is being taken when ENTERING the concerning state
+-- on rising edge of tck.
+-- exec is the signal sent to the core. It is like an IRQ event, thus
+-- it must be properly treated as an exception (edge sensitive)
+-- inside the core logic.
+ process (tck)
+ begin
+ if rising_edge(tck) then
+ count1_reset <= '0';
+ exec <= '0';
+ case jtag_state is
+ when UPDATE_DR =>
+ if ir = TAP_EMUCTRL then
+ emuctl <= dr_out(15 downto 0);
+ elsif ir = TAP_EMUDATA then
+ emudat_o <= dr_out;
+ elsif ir = TAP_EMUIR then
+ emuir <= dr_out(EMUIR_SIZE-1 downto 0);
+ end if;
+ count1_reset <= '1';
+ count2 <= (others => '0');
+ emuctl <= (others => '0');
+ emudat_o <= (others => '0');
+-- Important to reset the EMUIR to NOP for sane JTAG operation:
+ emuir <= INS_NOP;
+ when RUN_TEST_IDLE =>
+ count2 <= count2 + 1;
+ exec <= '1';
+ when others =>
+ end case;
+ end if;
+ end process;
+ process (count1_reset, pulse)
+ begin
+ if count1_reset = '1' then
+ count1 <= (others => '0');
+ elsif rising_edge(pulse) then
+ count1 <= count1 + 1;
+ end if;
+ end process;
+emuexec <= exec;
+emurequest <= emuctl(0);
+core_reset <= emuctl(15);
+-- Registers:
+emudata_o <= emudat_o;
+-- FIXME:
+-- These mappings should probably move to glue between TAP and core
+-- to be more generic.
+emustat <= exstat &
+ "0000" & "00" & emurdy & emuack;
+end behaviour;
diff --git a/zpu/hdl/zealot/devices/trace.vhdl b/zpu/hdl/zealot/devices/trace.vhdl
index 83d3782..e067fe6 100644
--- a/zpu/hdl/zealot/devices/trace.vhdl
+++ b/zpu/hdl/zealot/devices/trace.vhdl
@@ -63,6 +63,7 @@ entity Trace is
clk_i : in std_logic;
dbg_i : in zpu_dbgo_t;
+ emu_i : in std_logic; -- Emulation trigger
stop_i : in std_logic;
busy_i : in std_logic
@@ -76,6 +77,7 @@ begin
variable l : line;
+ variable l0 : line;
variable stk_min : unsigned(31 downto 0):=(others => '1');
variable stk_ini : unsigned(31 downto 0);
variable first : boolean:=true;
@@ -204,7 +206,7 @@ begin
case dbg_i.opcode(3 downto 0) is
- write(l,string'("break"));
+ write(l,string'("break/rte"));
@@ -247,7 +249,19 @@ begin
end if;
end if;
- wait until clk_i='0' or stop_i='1';
+ wait until clk_i='0' or stop_i='1' or emu_i'event;
+ if emu_i'event then
+ if emu_i = '1' then
+ write(l0, "-- Enable Emulation --");
+ writeline(l_file,l0);
+ print(output,"Enter Emulation at PC 0x"&hstr(dbg_i.pc(ADDR_W-1 downto 0)));
+ else
+ write(l0, "-- Disable Emulation --");
+ writeline(l_file,l0);
+ print(output,"Leave Emulation at PC 0x"&hstr(dbg_i.pc(ADDR_W-1 downto 0)));
+ end if;
+ wait until clk_i='0';
+ end if;
if stop_i='1' then
print(output,"Minimum SP: 0x"&hstr(stk_min)&" Size: 0x"&hstr(stk_ini-stk_min));
diff --git a/zpu/hdl/zealot/helpers/zpu_small1.vhdl b/zpu/hdl/zealot/helpers/zpu_small1.vhdl
index 13dd485..91112d0 100644
--- a/zpu/hdl/zealot/helpers/zpu_small1.vhdl
+++ b/zpu/hdl/zealot/helpers/zpu_small1.vhdl
@@ -64,6 +64,14 @@ entity ZPU_Small1 is
clk_i : in std_logic; -- CPU clock
rst_i : in std_logic; -- Reset
+ -- Emulation pins:
+ emureq_i : in std_logic;
+ emuexec_i : in std_logic;
+ emuack_o : out std_logic;
+ emurdy_o : out std_logic;
+ emuir : in std_logic_vector(OPCODE_W-1 downto 0);
break_o : out std_logic; -- Break executed
dbg_o : out zpu_dbgo_t; -- Debug info
rs232_tx_o : out std_logic; -- UART Tx
@@ -125,7 +133,13 @@ begin
port map(
clk_i => clk_i, reset_i => rst_i, interrupt_i => '0',
+ emureq_i => emureq_i,
+ emuexec_i => emuexec_i,
+ emuack_o => emuack_o,
+ emurdy_o => emurdy_o,
+ emuir => emuir,
break_o => break_o, dbg_o => dbg_o,
-- BRAM (text, data, bss and stack)
a_we_o => a_we, a_addr_o => a_addr, a_o => a_write, a_i => a_read,
b_we_o => b_we, b_addr_o => b_addr, b_o => b_write, b_i => b_read,
diff --git a/zpu/hdl/zealot/testbenches/hwdbg_small1_tb.vhdl b/zpu/hdl/zealot/testbenches/hwdbg_small1_tb.vhdl
new file mode 100644
index 0000000..1685634
--- /dev/null
+++ b/zpu/hdl/zealot/testbenches/hwdbg_small1_tb.vhdl
@@ -0,0 +1,371 @@
+---- ----
+---- Testbench for the ZPU Small connection to the FPGA ----
+---- ----
+---- ----
+---- ----
+---- Description: ----
+---- This is a testbench to simulate the ZPU_Small1 core as used in the ----
+---- *_small1.vhdl ----
+---- ----
+---- the JTAG debugger proof of concept for the Small core.
+---- To Do: ----
+---- - ----
+---- ----
+---- Author: ----
+---- - Salvador E. Tropea, salvador ----
+---- Modifications for core debug signal testing
+---- - Martin Strubel <>
+---- ----
+---- ----
+---- Copyright (c) 2008 Salvador E. Tropea <salvador> ----
+---- Copyright (c) 2008 Instituto Nacional de Tecnología Industrial ----
+---- ----
+---- Distributed under the BSD license ----
+---- ----
+---- ----
+---- Design unit: HWDbg_Small1_TB(Behave) (Entity and architecture) ----
+---- File name: small1_tb.vhdl ----
+---- Note: None ----
+---- Limitations: None known ----
+---- Errors: None known ----
+---- Library: work ----
+---- Dependencies: IEEE.std_logic_1164 ----
+---- IEEE.numeric_std ----
+---- zpu.zpupkg ----
+---- zpu.txt_util ----
+---- work.zpu_memory ----
+---- Target FPGA: Spartan 3 (XC3S1500-4-FG456) ----
+---- Language: VHDL ----
+---- Wishbone: No ----
+---- Synthesis tools: N/A ----
+---- Simulation tools: Isim
+---- Text editor: gvim
+---- ----
+library IEEE;
+use IEEE.std_logic_1164.all;
+use IEEE.numeric_std.all;
+library zpu;
+use zpu.zpupkg.all;
+use zpu.txt_util.all;
+library work;
+use work.zpu_memory.all;
+entity HWDbg_Small1_TB is
+end entity HWDbg_Small1_TB;
+architecture Behave of HWDbg_Small1_TB is
+ constant WORD_SIZE : natural:=32; -- 32 bits data path
+ constant ADDR_W : natural:=18; -- 18 bits address space=256 kB, 128 kB I/O
+ constant BRAM_W : natural:=14; -- 15 bits RAM space=32 kB
+ constant D_CARE_VAL : std_logic:='0'; -- Fill value
+ constant CLK_FREQ : positive:=50; -- 50 MHz clock
+ constant CLK_S_PER : time:=1 us/(2.0*real(CLK_FREQ)); -- Clock semi period
+ constant BRATE : positive:=115200;
+ -- Opcode to leave emulation:
+ constant OPCODE_LEAVE_EMULATION : unsigned(3 downto 0) := OPCODE_BREAK;
+ component tap
+ generic (EMUDAT_SIZE : natural;
+ EMUIR_SIZE : natural);
+ port (
+ emu : out std_logic;
+ tck : in std_logic;
+ trst : in std_logic;
+ tms : in std_logic;
+ tdi : in std_logic;
+ tdo : out std_logic;
+ emuexec : out std_logic; -- Execute opcode on rising edge
+ emurequest : out std_logic; -- Emulation request to core
+ emuack : in std_logic; -- Core has acknowledged EMULATION request
+ emurdy : in std_logic; -- Core ready to execute next instruction
+ -- Program Counter without going to emulation.
+ dbgpc : in std_logic_vector(32-1 downto 0);
+ emudata_i : in std_logic_vector(32-1 downto 0);
+ emudata_o : out std_logic_vector(32-1 downto 0);
+ emudat_wr : in std_logic;
+ emudat_rd : in std_logic;
+ emuir : out std_logic_vector(OPCODE_W-1 downto 0)
+ );
+ end component;
+ component ZPU_Small1 is
+ generic(
+ WORD_SIZE : natural:=32; -- 32 bits data path
+ D_CARE_VAL : std_logic:='0'; -- Fill value
+ CLK_FREQ : positive:=50; -- 50 MHz clock
+ BRATE : positive:=115200; -- RS232 baudrate
+ ADDR_W : natural:=16; -- 16 bits address space=64 kB, 32 kB I/O
+ BRAM_W : natural:=15); -- 15 bits RAM space=32 kB
+ port(
+ clk_i : in std_logic; -- CPU clock
+ rst_i : in std_logic; -- Reset
+ -- Emulation pins:
+ emureq_i : in std_logic;
+ emuexec_i : in std_logic;
+ emuack_o : out std_logic;
+ emurdy_o : out std_logic;
+ emuir : in std_logic_vector(OPCODE_W-1 downto 0);
+ break_o : out std_logic; -- Break executed
+ dbg_o : out zpu_dbgo_t; -- Debug info
+ rs232_tx_o : out std_logic; -- UART Tx
+ rs232_rx_i : in std_logic); -- UART Rx
+ end component ZPU_Small1;
+ signal clk : std_logic;
+ signal reset : std_logic:='1';
+ signal emureq : std_logic := '0';
+ signal emuexec : std_logic := '0';
+ signal emuack : std_logic;
+ signal emurdy : std_logic := '0';
+ signal emuir : std_logic_vector(OPCODE_W-1 downto 0);
+ signal break : std_logic := '0';
+ signal dbg : zpu_dbgo_t; -- Debug info
+ signal rs232_tx : std_logic;
+ signal rs232_rx : std_logic;
+ -- Auxiliary signals
+ signal terminate : std_logic := '0';
+ signal mismatch : std_logic := '0';
+ signal finish : std_logic;
+ signal save_sp : unsigned(31 downto 0);
+ zpu : ZPU_Small1
+ generic map(
+ port map(
+ clk_i => clk, rst_i => reset, rs232_tx_o => rs232_tx,
+ emureq_i => emureq, emuexec_i => emuexec,
+ emuack_o => emuack, emurdy_o => emurdy,
+ emuir => emuir,
+ rs232_rx_i => rs232_rx, break_o => break, dbg_o => dbg);
+ trace_mod : Trace
+ generic map(
+ LOG_FILE => "dbg_small1_trace.log")
+ port map(
+ clk_i => clk, dbg_i => dbg, emu_i => emuack, stop_i => '0',
+ busy_i => '0');
+ do_clock:
+ process
+ begin
+ clk <= '0';
+ wait for CLK_S_PER;
+ clk <= '1';
+ wait for CLK_S_PER;
+ if finish='1' then
+ print("* Finish asserted, end of test");
+ if terminate = '1' then
+ print("* Reason: Terminate");
+ end if;
+ if mismatch = '1' then
+ print("* Reason: Mismatch");
+ end if;
+ if break = '1' then
+ print("* Reason: Breakpoint");
+ end if;
+ wait;
+ end if;
+ end process do_clock;
+ do_reset:
+ process
+ begin
+ wait until rising_edge(clk);
+ reset <= '0';
+ end process do_reset;
+ process
+ procedure execute_opcode(
+ code: unsigned(OPCODE_W-1 downto 0)
+ )
+ is begin
+ emuir <= std_logic_vector(code);
+ wait until rising_edge(clk);
+ emuexec <= '1';
+ wait until rising_edge(clk);
+ emuexec <= '0';
+ wait for 200ns;
+ end execute_opcode;
+ procedure push_imm32(
+ imm: unsigned(31 downto 0)
+ )
+ is begin
+ execute_opcode(OPCODE_IM & "000" & imm(31 downto 28));
+ execute_opcode(OPCODE_IM & imm(27 downto 21));
+ execute_opcode(OPCODE_IM & imm(20 downto 14));
+ execute_opcode(OPCODE_IM & imm(13 downto 7));
+ execute_opcode(OPCODE_IM & imm(6 downto 0));
+ end push_imm32;
+ procedure getsp
+ is begin
+ execute_opcode(OPCODE_SHORT & OPCODE_PUSHSP); -- Restore context
+ execute_opcode(OPCODE_SHORT & OPCODE_NOP);
+ print("sp: " & hstr(dbg.stk_a));
+ -- execute_opcode(OPCODE_LOADSP & '1' & x"1"); -- Restore context
+ execute_opcode(OPCODE_SHORT & OPCODE_POPSP); -- Restore context
+ end getsp;
+ procedure mem_read(
+ addr: unsigned(31 downto 0)
+ )
+ is begin
+ execute_opcode(OPCODE_SHORT & OPCODE_PUSHSP); -- Restore context
+ -- Save current SP for for reference. Note the dbg.sp is not yet
+ -- updated to the above command.
+ save_sp <= dbg.sp;
+ push_imm32(addr);
+ execute_opcode(OPCODE_SHORT & OPCODE_LOAD); -- Load indirect
+ execute_opcode(OPCODE_SHORT & OPCODE_NOP);
+ print("value: " & hstr(dbg.stk_a));
+ execute_opcode(OPCODE_LOADSP & '1' & x"1"); -- Restore context
+ execute_opcode(OPCODE_SHORT & OPCODE_POPSP); -- Restore context
+ execute_opcode(OPCODE_SHORT & OPCODE_NOP);
+ if (dbg.sp /= save_sp) then
+ mismatch <= '1';
+ print("* ERROR: Stack pointers don't match");
+ print("sp: " & hstr(dbg.sp) & " SAVE_SP: " & hstr(save_sp));
+ end if;
+ end mem_read;
+ begin
+ wait for 400ns;
+ emuir <= "00001011"; -- NOP
+ -- emureq <= '1';
+ -- It is IMPORTANT to wait after an emu request
+ wait until emurdy = '1' and break ='1';
+ wait for 100ns;
+ -- Single stepping:
+ emureq <= '1';
+ wait for 100ns;
+ for i in 0 to 200 loop
+ getsp;
+ end loop;
+ -- Save context here:
+ execute_opcode(OPCODE_SHORT & OPCODE_PUSHSP); -- Restore context
+ -- Save current SP for for reference. Note the dbg.sp is not yet
+ -- updated to the above command.
+ save_sp <= dbg.sp;
+ -- Now do your stuff and count the pushes, including the above
+ push_imm32(x"000008d8");
+ -- execute_opcode(OPCODE_LOADSP & '1' & x"0");
+ execute_opcode(OPCODE_SHORT & OPCODE_LOAD); -- Load indirect
+ execute_opcode(OPCODE_SHORT & OPCODE_NOP);
+ print("value: " & hstr(dbg.stk_a));
+ -- Now we should see the data from the address above in
+ -- dbg.stk_a
+ -- }
+ -- Restore old stack:
+ -- execute_opcode(OPCODE_SHORT & OPCODE_PUSHSP);
+ -- Fix up stack:
+ -- execute_opcode(OPCODE_IM & "000" & x"8");
+ -- execute_opcode(OPCODE_SHORT & OPCODE_ADD);
+ execute_opcode(OPCODE_LOADSP & '1' & x"1"); -- Restore context
+ execute_opcode(OPCODE_SHORT & OPCODE_POPSP); -- Restore context
+ -- push_imm32(save_sp);
+ -- Need one NOP to update dbg.sp (for sanity check):
+ execute_opcode(OPCODE_SHORT & OPCODE_NOP);
+ if (dbg.sp /= save_sp) then
+ mismatch <= '1';
+ print("* MEM_READ ERROR: Stack pointers don't match");
+ print("sp: " & hstr(dbg.sp) & " SAVE_SP: " & hstr(save_sp));
+ end if;
+ execute_opcode(OPCODE_SHORT & OPCODE_PUSHSP); -- Restore context
+ -- Save current SP for for reference. Note the dbg.sp is not yet
+ -- updated to the above command.
+ save_sp <= dbg.sp;
+ push_imm32(x"deadbeef");
+ execute_opcode(OPCODE_SHORT & OPCODE_NOP);
+ push_imm32(x"000008d8");
+ execute_opcode(OPCODE_SHORT & OPCODE_STORE); -- Store indirect
+ execute_opcode(OPCODE_LOADSP & '1' & x"0"); -- Restore context
+ execute_opcode(OPCODE_SHORT & OPCODE_POPSP); -- Restore context
+ -- Need one NOP to update dbg.sp (for sanity check):
+ execute_opcode(OPCODE_SHORT & OPCODE_NOP);
+ if (dbg.sp /= save_sp) then
+ mismatch <= '1';
+ print("* MEM_WRITE ERROR: Stack pointers don't match");
+ print("sp: " & hstr(dbg.sp) & " SAVE_SP: " & hstr(save_sp));
+ end if;
+ mem_read(x"000008d8");
+-- SET PC
+ push_imm32(x"00000000");
+ execute_opcode(OPCODE_SHORT & OPCODE_POPPC); -- Restore context
+ wait for 100ns;
+ execute_opcode(OPCODE_SHORT & OPCODE_NOP);
+ getsp;
+ terminate <= '1';
+ wait;
+ end process;
+finish <= mismatch or terminate; -- or break;
+end architecture Behave; -- Entity: HWDbg_Small1_TB
diff --git a/zpu/hdl/zealot/zpu_pkg.vhdl b/zpu/hdl/zealot/zpu_pkg.vhdl
index 2a15880..751d825 100644
--- a/zpu/hdl/zealot/zpu_pkg.vhdl
+++ b/zpu/hdl/zealot/zpu_pkg.vhdl
@@ -56,6 +56,7 @@ package zpupkg is
sp : unsigned(31 downto 0);
stk_a : unsigned(31 downto 0);
stk_b : unsigned(31 downto 0);
+ idim : std_logic; -- Debugging: idim flag
end record;
component Trace is
@@ -66,6 +67,7 @@ package zpupkg is
clk_i : in std_logic;
dbg_i : in zpu_dbgo_t;
+ emu_i : in std_logic;
stop_i : in std_logic;
busy_i : in std_logic
@@ -81,6 +83,15 @@ package zpupkg is
clk_i : in std_logic; -- System Clock
reset_i : in std_logic; -- Synchronous Reset
interrupt_i : in std_logic; -- Interrupt
+ -- Emulation pins:
+ emureq_i : in std_logic;
+ emuexec_i : in std_logic; -- exec pulse. 1 clk cycle wide!
+ emuack_o : out std_logic;
+ emurdy_o : out std_logic;
+ pulse_o : out std_logic; -- Debug pulse for event counter
+ emuir : in std_logic_vector(OPCODE_W-1 downto 0);
break_o : out std_logic; -- Breakpoint opcode executed
dbg_o : out zpu_dbgo_t; -- Debug outputs (i.e. trace log)
-- BRAM (text, data, bss and stack)
diff --git a/zpu/hdl/zealot/zpu_small.vhdl b/zpu/hdl/zealot/zpu_small.vhdl
index 7e022d4..cf4e189 100644
--- a/zpu/hdl/zealot/zpu_small.vhdl
+++ b/zpu/hdl/zealot/zpu_small.vhdl
@@ -60,6 +60,14 @@ entity ZPUSmallCore is
clk_i : in std_logic; -- System Clock
reset_i : in std_logic; -- Synchronous Reset
interrupt_i : in std_logic; -- Interrupt
+ -- Emulation pins:
+ emureq_i : in std_logic; -- Emulation request from TAP
+ emuexec_i : in std_logic; -- exec pulse. 1 clk cycle wide!
+ emuack_o : out std_logic; -- Emulation ACK to TAP
+ emurdy_o : out std_logic; -- Emulation ready
+ pulse_o : out std_logic; -- Debug pulse for event counter
+ emuir : in std_logic_vector(OPCODE_W-1 downto 0);
break_o : out std_logic; -- Breakpoint opcode executed
dbg_o : out zpu_dbgo_t; -- Debug outputs (i.e. trace log)
-- BRAM (text, data, bss and stack)
@@ -95,6 +103,8 @@ architecture Behave of ZPUSmallCore is
signal sp_r : unsigned(MAX_ADDR_BIT downto BYTE_BITS):=SP_START;
signal idim_r : std_logic:='0';
+ signal idim_save_r : std_logic;
-- BRAM (text, data, bss and stack)
-- a_r is a register for the top of the stack [SP]
-- Note: as this is a stack CPU this is a very important register.
@@ -110,7 +120,7 @@ architecture Behave of ZPUSmallCore is
-- State machine.
type state_t is (st_fetch, st_write_io_done, st_execute, st_add, st_or,
st_and, st_store, st_read_io, st_write_io, st_fetch_next,
- st_add_sp, st_decode, st_resync);
+ st_add_sp, st_decode, st_resync, st_emulation);
signal state : state_t:=st_resync;
-- Decoded Opcode
@@ -124,6 +134,13 @@ architecture Behave of ZPUSmallCore is
signal opcode : unsigned(OPCODE_W-1 downto 0); -- Decoded
signal opcode_r : unsigned(OPCODE_W-1 downto 0); -- Registered
+ -- '1' when we are in IC emulation
+ signal in_emu : std_logic := '0';
+ signal break : std_logic := '0'; -- emulation cause: breakpoint
+ signal ready : std_logic := '0';
+ signal exec : std_logic := '0'; -- Exec strobe
+ signal reset_exec : std_logic := '0'; -- exec pulse reset
-- IRQ flag
signal in_irq_r : std_logic:='0';
-- I/O space address
@@ -142,20 +159,26 @@ begin
-- Note: We use Port B memory to fetch the opcodes.
- process(b_i, pc_r)
+ process(b_i, pc_r, in_emu, exec, emuir)
variable topcode : unsigned(OPCODE_W-1 downto 0);
- -- Select the addressed byte inside the fetched word
- case (to_integer(pc_r(BYTE_BITS-1 downto 0))) is
- when 0 =>
- topcode:=b_i(31 downto 24);
- when 1 =>
- topcode:=b_i(23 downto 16);
- when 2 =>
- topcode:=b_i(15 downto 8);
- when others => -- 3
- topcode:=b_i(7 downto 0);
- end case;
+ -- When in emulation, get opcode from emuir
+ if in_emu = '1' and exec = '1' then
+ topcode := unsigned(emuir);
+ else
+ -- Select the addressed byte inside the fetched word
+ case (to_integer(pc_r(BYTE_BITS-1 downto 0))) is
+ when 0 =>
+ topcode:=b_i(31 downto 24);
+ when 1 =>
+ topcode:=b_i(23 downto 16);
+ when 2 =>
+ topcode:=b_i(15 downto 8);
+ when others => -- 3
+ topcode:=b_i(7 downto 0);
+ end case;
+ end if;
opcode <= topcode;
if (topcode(7 downto 7)=OPCODE_IM) then
@@ -192,19 +215,32 @@ begin
d_opcode <= dec_store;
d_opcode <= dec_pop_sp;
+ -- when OPCODE_POPINT => -- Used to return from emulation
+ -- d_opcode <= dec_emuleave;
when others => -- OPCODE_NOP and others
d_opcode <= dec_nop;
end case;
end if;
end process decode_control;
+ process (clk_i, reset_exec)
+ begin
+ if rising_edge(clk_i) then
+ if emuexec_i = '1' then
+ exec <= '1';
+ elsif reset_exec = '1' then
+ exec <= '0';
+ end if;
+ end if;
+ end process;
data_o <= b_i;
process (clk_i)
variable sp_offset : unsigned(4 downto 0);
if rising_edge(clk_i) then
- break_o <= '0';
write_en_o <= '0';
read_en_o <= '0';
dbg_o.b_inst <= '0';
@@ -238,6 +274,9 @@ begin
if interrupt_i='0' then
in_irq_r <= '0'; -- no longer in an interrupt
end if;
+ reset_exec <= '0';
case state is
when st_execute =>
@@ -245,7 +284,9 @@ begin
-- At this point:
-- b_i contains opcode word
-- a_i contains top of stack
- pc_r <= pc_r+1;
+ if in_emu ='0' then
+ pc_r <= pc_r+1;
+ end if;
-- Debug info (Trace)
dbg_o.b_inst <= '1';
@@ -256,6 +297,7 @@ begin
dbg_o.sp(MAX_ADDR_BIT downto BYTE_BITS) <= sp_r;
dbg_o.stk_a <= a_i;
dbg_o.stk_b <= b_i;
+ dbg_o.idim <= idim_r;
-- During the next cycle we'll be reading the next opcode
sp_offset(4):=not opcode_r(4);
@@ -324,8 +366,20 @@ begin
b_addr_r <= sp_r+sp_offset;
state <= st_add_sp;
when dec_break =>
- --report "Break instruction encountered" severity failure;
- break_o <= '1';
+ -- Hit breakpoint, enter emulation
+ if in_emu = '0' then
+ in_emu <= '1';
+ break <= '1';
+ idim_save_r <= idim_r; -- save idim flag
+ state <= st_emulation;
+ else
+ -- Leave emulation:
+ idim_r <= idim_save_r; -- restore idim flag
+ break <= '0';
+ in_emu <= '0';
+ b_addr_r <= pc_r(MAX_ADDR_BIT downto BYTE_BITS);
+ state <= st_fetch_next;
+ end if;
when dec_push_sp =>
-- Push(SP)
sp_r <= sp_r-1;
@@ -413,6 +467,13 @@ begin
-- we'll fetch the opcode @ pc and thus it will
-- be available for st_execute the cycle after
-- next
+ -- If we just entered emulation, save idim flag
+ -- and mark we're in emulation.
+ if emureq_i = '1' and in_emu = '0' then
+ in_emu <= '1';
+ idim_save_r <= idim_r; -- save idim flag
+ end if;
b_addr_r <= pc_r(MAX_ADDR_BIT downto BYTE_BITS);
state <= st_fetch_next;
when st_fetch_next =>
@@ -423,8 +484,12 @@ begin
a_addr_r <= sp_r;
b_addr_r <= sp_r+1;
state <= st_decode;
+ reset_exec <= '1';
when st_decode =>
- if interrupt_i='1' and in_irq_r='0' and idim_r='0' then
+ state <= st_execute;
+ if in_emu = '1' then
+ state <= st_emulation;
+ elsif interrupt_i='1' and in_irq_r='0' and idim_r='0' then
-- We got an interrupt, execute interrupt instead of next instruction
in_irq_r <= '1';
d_opcode_r <= dec_interrupt;
@@ -432,7 +497,6 @@ begin
-- during the st_execute cycle we'll be fetching SP+1
a_addr_r <= sp_r;
b_addr_r <= sp_r+1;
- state <= st_execute;
when st_store =>
sp_r <= sp_r+1;
a_we_r <= '1';
@@ -459,6 +523,15 @@ begin
when st_resync =>
a_addr_r <= sp_r;
state <= st_fetch;
+ when st_emulation =>
+ a_addr_r <= sp_r;
+ b_addr_r <= sp_r+1;
+ if exec = '1' then
+ state <= st_execute;
+ else
+ state <= st_emulation;
+ end if;
when others =>
end case;
@@ -467,5 +540,14 @@ begin
end process opcode_control;
addr_o <= addr_r;
+-- Emulation flag export:
+ ready <= '1' when state = st_emulation else '0';
+ emuack_o <= in_emu;
+ emurdy_o <= ready and not exec;
+ break_o <= break;
+ pulse_o <= exec;
end architecture Behave; -- Entity: ZPUSmallCore
diff --git a/zpu/sw/emulation/Makefile b/zpu/sw/emulation/Makefile
new file mode 100644
index 0000000..60e72d1
--- /dev/null
+++ b/zpu/sw/emulation/Makefile
@@ -0,0 +1,31 @@
+# JTAG / shifter real life test bench code
+# (c) 2005-2011, Martin Strubel <>
+NETPP = $(HOME)/src/netpp
+BFEMU = $(HOME)/src/blackfin/bfemu
+XSL = tapreg.xsl
+# TAP register definitions:
+TAPDEF = $(HOME)/src/vhdl/tap/tap.xml
+CFLAGS = -I$(BFEMU) -g -Wall
+DUTIES = libzpuemu.a
+all: $(DUTIES)
+tap.h: $(TAPDEF) $(XSL)
+ xsltproc -o $@ $(XSL) $<
+zpu-tap.h: $(TAPDEF) $(XSL)
+ xsltproc -o $@ --param selectDevice 2 $(XSL) $<
+LIBOBJS = zpuemu.o
+libzpuemu.a: $(LIBOBJS)
+ $(AR) ruv $@ $(LIBOBJS)
+ rm -f *.o $(DUTIES)
diff --git a/zpu/sw/emulation/README b/zpu/sw/emulation/README
new file mode 100644
index 0000000..2e9e126
--- /dev/null
+++ b/zpu/sw/emulation/README
@@ -0,0 +1,12 @@
+Simple atomic I/O emulation library layer for ZPU [DRAFT]
+08/2011 Martin Strubel <>
+- Supports basic I/O set for debugging
+- Depends on a JTAG library defining a few simple commands
+- Not yet generic enough
+- Should in future take care of different ZPU implementations,
+ optimizations and debug interfaces. Currently, it supports the ICEbearPlus
+ JTAG library only.
+- TBD: Indirect JTAG interface
diff --git a/zpu/sw/emulation/tap.h b/zpu/sw/emulation/tap.h
new file mode 100644
index 0000000..ae3ac27
--- /dev/null
+++ b/zpu/sw/emulation/tap.h
@@ -0,0 +1,41 @@
+ *
+ *
+ * This file was generated by dclib/netpp. Modifications to this file will
+ * be lost.
+ * Stylesheet: genreg.xsl (c) 2010-2011 section5
+ *
+ * Version: 0.0
+ **************************************************************************/
+#ifndef _BITMASK_
+#define _BITMASK_(msb, lsb) ( (-1 << (msb + 1)) ^ (-1 << lsb) )
+#ifndef _BIT_
+#define _BIT_(pos) (1 << pos)
+ * Address segment 'TAP'
+ *********************************************************/
+#define TAP_IDCODE 0x01
+#define TAP_EMUDATA 0x02
+#define TAP_EMUCTRL 0x03
+# define EMUREQ _BIT_(0)
+# define CORE_RESET _BIT_(15)
+#define TAP_EMUSTAT 0x04
+# define EMUACK _BIT_(0)
+# define EMURDY _BIT_(1)
+# define EMUDAT_UNR _BIT_(2)
+# define EMUDAT_OVR _BIT_(3)
+# define CORE_SPEC _BITMASK_(15, 8)
+# define CORE_SPEC_SHFT 8
+#define TAP_EMUIR 0x05
+#define TAP_DBGPC 0x08
+#define TAP_COUNT1 0x0c
+#define TAP_COUNT2 0x0d
diff --git a/zpu/sw/emulation/zpu-opcodes.h b/zpu/sw/emulation/zpu-opcodes.h
new file mode 100644
index 0000000..6004319
--- /dev/null
+++ b/zpu/sw/emulation/zpu-opcodes.h
@@ -0,0 +1,26 @@
+/** \file zpu-opcodes.h
+ *
+ * Basic ZPU opcode definitions
+ *
+ * 2011, <>
+ *
+ */
+/** This is also the opcode for leaving emulation */
+#define OPCODE_BREAK 0x00
+/* ZPU basic opcodes that are supported by emulation */
+#define OPCODE_IM 0x80
+#define OPCODE_NOP 0x0b
+#define OPCODE_LOAD 0x08
+#define OPCODE_STORE 0x0c
+#define OPCODE_LOADSP 0x60
+// Dunno why, but this bit wants to be inverted in the offset field:
+# define LOADSP_INV 0x10
+#define OPCODE_PUSHSP 0x02
+#define OPCODE_POPSP 0x0d
+#define OPCODE_POPPC 0x04
+/* Special opcode: Leave emulation */
diff --git a/zpu/sw/emulation/zpu-tap.h b/zpu/sw/emulation/zpu-tap.h
new file mode 100644
index 0000000..176c4dc
--- /dev/null
+++ b/zpu/sw/emulation/zpu-tap.h
@@ -0,0 +1,30 @@
+ *
+ *
+ * This file was generated by dclib/netpp. Modifications to this file will
+ * be lost.
+ * Stylesheet: genreg.xsl (c) 2010-2011 section5
+ *
+ * Version: 0.0
+ **************************************************************************/
+#ifndef _BITMASK_
+#define _BITMASK_(msb, lsb) ( (-1 << (msb + 1)) ^ (-1 << lsb) )
+#ifndef _BIT_
+#define _BIT_(pos) (1 << pos)
+ * Address segment 'ZPUsmall'
+ *********************************************************/
+#define ZPUsmall_EMUSTAT_DUMMY 0x04
+# define ZPU_IDIM _BIT_(15)
+# define ZPU_BREAK _BIT_(14)
+# define ZPU_MEMBUSY _BIT_(9)
+# define ZPU_INRESET _BIT_(8)
diff --git a/zpu/sw/emulation/zpuemu.c b/zpu/sw/emulation/zpuemu.c
new file mode 100644
index 0000000..bf2f50b
--- /dev/null
+++ b/zpu/sw/emulation/zpuemu.c
@@ -0,0 +1,493 @@
+/* ZPU emulation library
+ *
+ * (c) 2011, Martin Strubel <>
+ *
+ * Limited functionality: Only one core in chain supported.
+ *
+ */
+// These headers must be implemented by the JTAG interface to your
+// HW debug adapter
+#include "jtag.h"
+#include "jtag_intern.h"
+#include "zpuemu.h"
+#include "zpu-opcodes.h"
+#include <stdio.h>
+#define IRSIZE 4
+static unsigned char
+static JtagRegister
+ir_r = {
+ .data = &s_reg8[1],
+ .nbits = IRSIZE,
+ .flags = JTAGREG_LSB
+static JtagRegister
+opcode_r = {
+ .data = &s_reg8[0],
+ .nbits = 8,
+ .flags = JTAGREG_MSB
+static unsigned char
+static JtagRegister
+ctrl_r = {
+ .data = &s_reg16[0],
+ .nbits = 16,
+ .flags = JTAGREG_MSB
+static unsigned char
+static JtagRegister
+data_r = {
+ .data = &s_reg32[0],
+ .nbits = 32,
+ .flags = JTAGREG_MSB
+void select_dr(CpuContext *c, uint8_t dr)
+ jtag_goto_state(c->jtag, s_jtag_shift_ir);
+ reg_set(&ir_r, 0, IRSIZE, dr);
+ shift_generic(c->jtag, &ir_r, NULL, ir_r.nbits, UPDATE);
+void shiftout32(CpuContext *c, REGISTER *r, int mode)
+ jtag_flush(c->jtag);
+ jtag_goto_state(c->jtag, s_jtag_shift_dr);
+ shift_generic(c->jtag, &data_r, &data_r, data_r.nbits, mode);
+ *r = reg_get(&data_r, 0, 32);
+void shiftin16(CpuContext *c, REGISTER r, int mode)
+ jtag_goto_state(c->jtag, s_jtag_shift_dr);
+ reg_set(&ctrl_r, 0, 16, r);
+ shift_generic(c->jtag, &ctrl_r, NULL, ctrl_r.nbits, mode);
+void shiftout16(CpuContext *c, REGISTER *r, int mode)
+ jtag_goto_state(c->jtag, s_jtag_shift_dr);
+ shift_generic(c->jtag, &ctrl_r, &ctrl_r, ctrl_r.nbits, mode);
+ *r = reg_get(&ctrl_r, 0, 16);
+// Auxiliaries
+void push_opcode(CpuContext *c, uint8_t opcode, int mode)
+[0] = opcode;
+ jtag_goto_state(c->jtag, s_jtag_shift_dr);
+ shift_generic(c->jtag, &opcode_r, NULL, opcode_r.nbits, mode);
+#if 0
+void push_val16(CpuContext *c, uint16_t val)
+ int i = 14;
+ push_opcode(c, OPCODE_IM | ((val >> i) & 0x7f), EXEC); i -= 7;
+ push_opcode(c, OPCODE_IM | ((val >> i) & 0x7f), EXEC); i -= 7;
+ push_opcode(c, OPCODE_IM | ((val >> i) & 0x7f), EXEC);
+ push_opcode(c, OPCODE_NOP, EXEC);
+void push_val32(CpuContext *c, uint32_t val)
+ int i = 28;
+ push_opcode(c, OPCODE_IM | ((val >> i) & 0x7f), EXEC); i -= 7;
+ push_opcode(c, OPCODE_IM | ((val >> i) & 0x7f), EXEC); i -= 7;
+ push_opcode(c, OPCODE_IM | ((val >> i) & 0x7f), EXEC); i -= 7;
+ push_opcode(c, OPCODE_IM | ((val >> i) & 0x7f), EXEC); i -= 7;
+ push_opcode(c, OPCODE_IM | ((val >> i) & 0x7f), EXEC);
+ push_opcode(c, OPCODE_NOP, EXEC);
+uint32_t mem_read32(CpuContext *c, uint32_t addr)
+ int q = jtag_queue(c->jtag, 1);
+ select_dr(c, TAP_EMUIR);
+ push_opcode(c, OPCODE_PUSHSP, EXEC);
+ push_val32(c, addr);
+ push_opcode(c, OPCODE_LOAD, EXEC);
+ push_opcode(c, OPCODE_NOP, EXEC);
+ select_dr(c, TAP_EMUDATA);
+ shiftout32(c, &r, UPDATE); // Execute Stack fixup
+ select_dr(c, TAP_EMUIR);
+ push_opcode(c, OPCODE_LOADSP | (LOADSP_INV ^ 0x01), EXEC);
+ push_opcode(c, OPCODE_POPSP, EXEC);
+ push_opcode(c, OPCODE_NOP, EXEC);
+ jtag_queue(c->jtag, q);
+ return r;
+// Little read cache:
+struct cache {
+ ADDR addr;
+ uint32_t val;
+} g_cache = { 0xffffffff, 0 };
+uint32_t mem_read32_cached(CpuContext *c, ADDR a)
+ if (a != g_cache.addr) {
+ g_cache.val = mem_read32(c, a);
+ g_cache.addr = a;
+ // printf("Read %08x\n", g_cache.val);
+ }
+ return g_cache.val;
+uint8_t mem_read8(CpuContext *c, ADDR a)
+ uint32_t v;
+ int shift = (3 - (a & 0x3)) << 3;
+ a &= ~0x3;
+ // printf("shift: %d\n", shift);
+ v = mem_read32_cached(c, a) >> shift;
+ return v;
+uint16_t mem_read16(CpuContext *c, ADDR a)
+ uint32_t v;
+ int shift = (2 - (a & 0x2)) << 3;
+ a &= ~0x3;
+ // printf("shift: %d\n", shift);
+ v = mem_read32_cached(c, a) >> shift;
+ return v;
+void mem_write32(CpuContext*c, uint32_t addr, uint32_t val)
+ // Invalidate cache, when we're writing to the same addr:
+ if (g_cache.addr == (addr)) {
+ g_cache.addr = 0xffffffff;
+ }
+ select_dr(c, TAP_EMUIR);
+ push_opcode(c, OPCODE_PUSHSP, EXEC);
+ push_val32(c, val);
+ push_val32(c, addr); // Address
+ push_opcode(c, OPCODE_STORE, EXEC);
+ push_opcode(c, OPCODE_LOADSP | (LOADSP_INV ^ 0x00), EXEC);
+ push_opcode(c, OPCODE_POPSP, EXEC);
+ push_opcode(c, OPCODE_NOP, UPDATE);
+void mem_write16(CpuContext*c, uint32_t addr, uint16_t val)
+ int shift = (2 - (addr & 0x2)) << 3;
+ uint32_t v;
+ addr &= ~0x3;
+ v = mem_read32_cached(c, addr);
+ v = (v & ~(0xffff << shift)) | (val << shift);
+ mem_write32(c, addr, v);
+void mem_write8(CpuContext *c, uint32_t addr, uint8_t val)
+ int shift = (3 - (addr & 0x3)) << 3;
+ uint32_t v;
+ addr &= ~0x3;
+ v = mem_read32_cached(c, addr);
+ v = (v & ~(0xff << shift)) | (val << shift);
+ mem_write32(c, addr, v);
+int enter_emulation(CpuContext *c)
+ g_cache.addr = 0xffffffff; // Invalidate cache
+ // puts(">>> Enter emulation");
+ select_dr(c, TAP_EMUCTRL);
+ r = EMUREQ;
+ shiftin16(c, r, UPDATE);
+ return 0;
+int leave_emulation(CpuContext *c)
+ int error = 0;
+ // Turn off emulation bit
+ select_dr(c, TAP_EMUCTRL);
+ shiftin16(c, 0, UPDATE);
+ // Run some emulated opcodes:
+ // Return from emulation:
+ select_dr(c, TAP_EMUIR);
+ push_opcode(c, OPCODE_EMULEAVE, EXEC);
+ return error;
+// API calls
+int zpu_emuinit(CpuContext *c, CONTROLLER jtag)
+ c->jtag = jtag;
+ return 0;
+int zpu_getid(CpuContext *c, uint32_t *code)
+ select_dr(c, TAP_IDCODE);
+ shiftout32(c, code, UPDATE);
+ return 0;
+int zpu_resume(CpuContext *c, int step)
+ jtag_flush(c->jtag);
+ if (step) {
+ select_dr(c, TAP_EMUCTRL);
+ shiftin16(c, EMUREQ, UPDATE);
+ select_dr(c, TAP_EMUIR);
+ push_opcode(c, OPCODE_EMULEAVE, EXEC);
+ } else {
+ leave_emulation(c);
+ }
+ return 0;
+int zpu_emulation(CpuContext *c, int which)
+ // FIXME: for multicore, this needs to change.
+ jtag_flush(c->jtag);
+ if (which) {
+ enter_emulation(c);
+ } else {
+ leave_emulation(c);
+ }
+ return 0;
+int zpu_state(CpuContext *c, uint16_t *state)
+ int error = 0;
+ select_dr(c, TAP_EMUSTAT);
+ shiftout16(c, &r, UPDATE);
+ *state = r;
+ return error;
+int zpu_reset(CpuContext *c, int mode)
+ // TODO: Implement system control register on zealot
+ return 0;
+int zpu_setreg(CpuContext *c, int regno, REGISTER val)
+ switch (regno) {
+ case REG_PC:
+ select_dr(c, TAP_EMUIR);
+ push_val32(c, val);
+ push_opcode(c, OPCODE_POPPC, EXEC);
+ break;
+ case REG_SP:
+ select_dr(c, TAP_EMUIR);
+ push_val32(c, val);
+ push_opcode(c, OPCODE_POPSP, EXEC);
+ break;
+ default: return -1;
+ }
+ return 0;
+int zpu_getreg(CpuContext *c, int regno, REGISTER *val)
+ int q = jtag_queue(c->jtag, 1);
+ switch (regno) {
+ case REG_PC:
+ // XXX needed to update dbg_o.<signal>:
+ select_dr(c, TAP_EMUIR); // XXX
+ push_opcode(c, OPCODE_NOP, EXEC); // XXX
+ select_dr(c, TAP_DBGPC);
+ shiftout32(c, &r, UPDATE);
+ break;
+ case REG_SP:
+ select_dr(c, TAP_EMUIR);
+ push_opcode(c, OPCODE_PUSHSP, EXEC);
+ // XXX needed to update dbg_o.<signal>:
+ push_opcode(c, OPCODE_NOP, EXEC); // XXX
+ push_opcode(c, OPCODE_POPSP, UPDATE); // queue, exec later
+ select_dr(c, TAP_EMUDATA);
+ shiftout32(c, &r, EXEC); // (here)
+ break;
+ default: return -1;
+ }
+ *val = r;
+ jtag_queue(c->jtag, q);
+ return 0;
+void zpu_dumpstat(CpuContext *c)
+ select_dr(c, TAP_EMUSTAT);
+ shiftout16(c, &r, UPDATE);
+ printf("EMUSTAT: %04x -", r & 0xffff);
+ if (r) {
+ if (r & ZPU_IDIM) printf(" [IDIM]");
+ if (r & ZPU_INRESET) printf(" [RESET]");
+ if (r & ZPU_BREAK) printf(" [BREAK]");
+ if (r & EMUACK) printf(" [EMUACK]");
+ if (r & EMURDY) printf(" [EMURDY]");
+ if (r & ZPU_MEMBUSY) printf(" [MEM_BUSY]");
+ }
+ printf("\n");
+ select_dr(c, TAP_COUNT1);
+ shiftout32(c, &r, UPDATE);
+ printf("COUNT1: %012d\n", r);
+ select_dr(c, TAP_COUNT2);
+ shiftout16(c, &r, UPDATE);
+ printf("COUNT2: %08d\n", r);
+int guess_access(ADDR addr, unsigned int *count)
+ int sizecode;
+ // I/O space wants to be addressed long word wise:
+ if (addr >= 0x80080000 && *count == 4) {
+ *count = 1;
+ return LDST_32;
+ }
+ // if we have even addresses and even count, we can
+ // use word size transfers instead of byte wise.
+ switch (addr % 4) {
+ case 0:
+ switch (*count % 4) {
+ case 0:
+ sizecode = LDST_32;
+ *count /= 4;
+ break;
+ case 2:
+ sizecode = LDST_16;
+ *count /= 2;
+ break;
+ default:
+ sizecode = LDST_8;
+ break;
+ }
+ break;
+ case 2:
+ if (*count % 2 == 0) {
+ sizecode = LDST_16;
+ *count /= 2;
+ } else {
+ sizecode = LDST_8;
+ }
+ break;
+ default:
+ sizecode = LDST_8;
+ }
+ return sizecode;
+int zpu_mem_read(CpuContext *c, ADDR addr, unsigned int count,
+ unsigned char *buf)
+ int sz;
+ uint32_t v;
+ int q = jtag_queue(c->jtag, 1);
+ sz = guess_access(addr, &count);
+ switch (sz) {
+ case LDST_8:
+ while (count--) {
+ *buf++ = mem_read8(c, addr++);
+ }
+ break;
+ case LDST_16:
+ while (count--) {
+ v = mem_read16(c, addr); addr += 2;
+ buf[1] = v; v >>= 8;
+ buf[0] = v;
+ buf += 2;
+ }
+ break;
+ case LDST_32:
+ while (count--) {
+ v = mem_read32(c, addr); addr += 4;
+ buf[3] = v; v >>= 8;
+ buf[2] = v; v >>= 8;
+ buf[1] = v; v >>= 8;
+ buf[0] = v;
+ buf += 4;
+ }
+ break;
+ }
+ jtag_flush(c->jtag);
+ jtag_queue(c->jtag, q);
+ return 0;
+int zpu_mem_write(CpuContext *c, ADDR addr, unsigned int count,
+ const unsigned char *buf)
+ int sz;
+ uint32_t v;
+ int q = jtag_queue(c->jtag, 1);
+ sz = guess_access(addr, &count);
+ switch (sz) {
+ case LDST_8:
+ // XXX: Could be optimized further
+ while (count--) {
+ v = *buf++;
+ mem_write8(c, addr, v); addr++;
+ }
+ break;
+ case LDST_16:
+ while (count--) {
+ v = (buf[0] << 8) | buf[1];
+ mem_write16(c, addr, v);
+ addr += 2; buf += 2;
+ }
+ break;
+ case LDST_32:
+ while (count--) {
+ v = (buf[0] << 24) | (buf[1] << 16) | (buf[2] << 8) | buf[3];
+ mem_write32(c, addr, v);
+ addr += 4; buf += 4;
+ }
+ break;
+ }
+ jtag_flush(c->jtag);
+ jtag_queue(c->jtag, q);
+ return 0;
diff --git a/zpu/sw/emulation/zpuemu.h b/zpu/sw/emulation/zpuemu.h
new file mode 100644
index 0000000..5fa14dd
--- /dev/null
+++ b/zpu/sw/emulation/zpuemu.h
@@ -0,0 +1,55 @@
+/* ZPU emulation library header
+ *
+ * (c) 2011, Martin Strubel <>
+ *
+ *
+ */
+#include <stdint.h>
+#include "zpu-tap.h"
+#include "tap.h"
+#define REGISTER uint32_t
+#define ADDR uint32_t
+// #define LDST_32 4
+// #define LDST_16 2
+// #define LDST_8 1
+#define REG_PC 0
+#define REG_SP 1
+struct _cpu;
+typedef struct _cpu_context {
+ struct _cpu *cpu;
+ JTAG_CTRL jtag; // Pointer to used JTAG controller
+ short id; // identification tag
+ short flags; // Interrupt flag - did we Ctrl-C ?
+ short prev_state; // CPU's previous' state (for change detect)
+ // Dual core stuff
+} CpuContext;
+int zpu_emuinit(CpuContext *c, JTAG_CTRL jtag);
+int zpu_getid(CpuContext *c, uint32_t *code);
+int zpu_emulation(CpuContext *c, int which);
+int zpu_resume(CpuContext *c, int step);
+int zpu_setreg(CpuContext *c, int regno, REGISTER val);
+int zpu_getreg(CpuContext *c, int regno, REGISTER *val);
+int zpu_state(CpuContext *c, uint16_t *state);
+int zpu_reset(CpuContext *c, int mode);
+int zpu_getpc(CpuContext *c, REGISTER *pc);
+void zpu_dumpstat(CpuContext *c);
+int zpu_mem_read(CpuContext *c, ADDR addr, unsigned int count,
+ unsigned char *b);
+int zpu_mem_write(CpuContext *c, ADDR addr, unsigned int count,
+ const unsigned char *b);
+enum {
+ LDST_32,
+ LDST_16,
+ LDST_8,
OpenPOWER on IntegriCloud