summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAntonio Anton <antonio.anton@anro-ingenieros.com>2009-09-15 10:58:55 +0200
committeroyvind <oyvind@titan.(none)>2009-09-15 10:58:55 +0200
commit685ce53dfba47bf06a25f2566a157ed5cda8ba1d (patch)
tree3845e6a0c4bb4de4fda0a4e60acc754d69e14ccf
parent879f385d7ae2bcef98f1970aba872235e940776b (diff)
downloadzpu-685ce53dfba47bf06a25f2566a157ed5cda8ba1d.zip
zpu-685ce53dfba47bf06a25f2566a157ed5cda8ba1d.tar.gz
Avalanche ZPU implementation
-rw-r--r--zpu/hdl/avalanche/core/zpu_core.v749
-rw-r--r--zpu/hdl/avalanche/core/zpu_core_defines.v322
-rw-r--r--zpu/hdl/avalanche/core/zpu_core_rom.v1017
-rw-r--r--zpu/hdl/avalanche/readme.txt91
4 files changed, 2179 insertions, 0 deletions
diff --git a/zpu/hdl/avalanche/core/zpu_core.v b/zpu/hdl/avalanche/core/zpu_core.v
new file mode 100644
index 0000000..e704fbc
--- /dev/null
+++ b/zpu/hdl/avalanche/core/zpu_core.v
@@ -0,0 +1,749 @@
+`timescale 1ns / 1ps
+`include "zpu_core_defines.v"
+
+/* MODULE: zpu_core
+ DESCRIPTION: Contains ZPU cpu
+ AUTHOR: Antonio J. Anton (aj <at> anro-ingenieros.com)
+
+REVISION HISTORY:
+Revision 1.0, 14/09/2009
+Initial public release
+
+COPYRIGHT:
+Copyright (c) 2009 Antonio J. Anton
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.*/
+
+// --------- MICROPROGRAMMED ZPU CORE ---------------
+// all signals are polled on clk rising edge
+// all signals positive
+
+module zpu_core (
+`ifdef ENABLE_CPU_INTERRUPTS
+ interrupt, // interrupt request
+`endif
+ clk, // clock on rising edge
+ reset, // reset on rising edge
+ mem_read, // request memory read
+ mem_write, // request memory write
+ mem_done, // memory operation completed
+ mem_addr, // memory address
+ mem_data_read, // data readed
+ mem_data_write, // data written
+ byte_select // byte select on memory operation
+);
+
+input clk;
+input reset;
+output mem_read;
+output mem_write;
+input mem_done;
+input [31:0] mem_data_read;
+output [31:0] mem_data_write;
+output [31:0] mem_addr;
+output [3:0] byte_select;
+`ifdef ENABLE_CPU_INTERRUPTS
+input interrupt;
+`endif
+
+wire clk;
+wire reset;
+wire mem_read;
+wire mem_write;
+wire mem_done;
+wire [31:0] mem_data_read;
+wire [31:0] mem_data_write;
+wire [31:0] mem_addr;
+`ifdef ENABLE_CPU_INTERRUPTS
+wire interrupt;
+`endif
+
+`ifdef ENABLE_BYTE_SELECT
+// ------ unaligned byte/halfword memory operations -----
+/// TODO: think rewriting into microcode or in a less resource wasting way
+
+reg [3:0] byte_select;
+wire byte_op;
+wire halfw_op;
+
+reg [31:0] mem_data_read_int; // aligned data from memory
+reg [31:0] mem_data_write_out; // write data already aligned
+wire [31:0] mem_data_write_int; // write data from cpu to be aligned
+
+// --- byte select logic ---
+always @(mem_addr[1:0] or byte_op or halfw_op)
+begin
+ casez( { mem_addr[1:0], byte_op, halfw_op } )
+ 4'b00_1_? : byte_select <= 4'b0001; // byte select
+ 4'b01_1_? : byte_select <= 4'b0010;
+ 4'b10_1_? : byte_select <= 4'b0100;
+ 4'b11_1_? : byte_select <= 4'b1000;
+ 4'b0?_0_1 : byte_select <= 4'b0011; // half word select
+ 4'b1?_0_1 : byte_select <= 4'b1100;
+ default : byte_select <= 4'b1111; // word select
+ endcase
+end
+
+// --- input data to cpu ---
+always @(mem_data_read or mem_addr[1:0] or byte_op or halfw_op)
+begin
+ casez( { mem_addr[1:0], byte_op, halfw_op } )
+ 4'b00_1_? : mem_data_read_int <= { 24'b0, mem_data_read[7:0] }; // 8 bit read
+ 4'b01_1_? : mem_data_read_int <= { 24'b0, mem_data_read[15:8] };
+ 4'b10_1_? : mem_data_read_int <= { 24'b0, mem_data_read[23:16] };
+ 4'b11_1_? : mem_data_read_int <= { 24'b0, mem_data_read[31:24] };
+ 4'b0?_0_1 : mem_data_read_int <= { 16'b0, mem_data_read[7:0], mem_data_read[15:8] }; // 16 bit read
+ 4'b1?_0_1 : mem_data_read_int <= { 16'b0, mem_data_read[23:16], mem_data_read[31:24] };
+ default : mem_data_read_int <= { mem_data_read[7:0], mem_data_read[15:8], mem_data_read[23:16], mem_data_read[31:24] }; // 32 bit access (default)
+ endcase
+end
+
+// --- output data from cpu ---
+assign mem_data_write = mem_data_write_out;
+
+always @(mem_data_write_int or mem_addr[1:0] or byte_op or halfw_op)
+begin
+ casez( {mem_addr[1:0], byte_op, halfw_op } )
+ 4'b00_1_? : mem_data_write_out <= { 24'bX, mem_data_write_int[7:0] }; // 8 bit write
+ 4'b01_1_? : mem_data_write_out <= { 16'bX, mem_data_write_int[7:0], 8'bX };
+ 4'b10_1_? : mem_data_write_out <= { 8'bX, mem_data_write_int[7:0], 16'bX };
+ 4'b11_1_? : mem_data_write_out <= { mem_data_write_int[7:0], 24'bX };
+ 4'b0?_0_1 : mem_data_write_out <= { 16'bX, mem_data_write_int[7:0], mem_data_write_int[15:8] }; // 16 bit write
+ 4'b1?_0_1 : mem_data_write_out <= { mem_data_write_int[7:0], mem_data_write_int[15:8], 16'bX };
+ default : mem_data_write_out <= { mem_data_write_int[7:0], mem_data_write_int[15:8], mem_data_write_int[23:16], mem_data_write_int[31:24] };
+ endcase
+end
+`else
+// -------- only 32 bit memory access --------
+wire [3:0] byte_select = 4'b1111; // all memory operations are 32 bit wide
+wire [31:0] mem_data_read_int; // no byte/halfword memory access by HW
+wire [31:0] mem_data_write_int; // byte and halfword memory access must be emulated
+
+// ----- reorder bytes due to MSB-LSB configuration -----
+assign mem_data_read_int = { mem_data_read[7:0], mem_data_read[15:8], mem_data_read[23:16], mem_data_read[31:24] };
+assign mem_data_write = { mem_data_write_int[7:0], mem_data_write_int[15:8], mem_data_write_int[23:16], mem_data_write_int[31:24] };
+`endif
+
+// ------ datapath registers and connections -----------
+reg [31:0] pc; // program counter (byte align)
+reg [31:0] sp; // stack counter (word align)
+reg [31:0] a; // operand (address_out, data_out, alu_in)
+reg [31:0] b; // operand (address_out)
+reg idim; // im opcode being processed
+reg [7:0] opcode; // opcode being processed
+reg [31:2] pc_cached; // cached PC
+reg [31:0] opcode_cache; // cached opcodes (current word)
+`ifdef ENABLE_CPU_INTERRUPTS
+ reg int_requested; // interrupt has been requested
+ reg on_interrupt; // serving interrupt
+ wire exit_interrupt; // microcode says this is poppc_interrupt
+ wire enter_interrupt; // microcode says we are entering interrupt
+`endif
+wire [1:0] sel_opcode = pc[1:0]; // which opcode is selected
+wire sel_read; // mux for data-in
+wire [1:0] sel_alu; // mux for alu
+wire [1:0] sel_addr; // mux for addr
+wire w_pc; // write PC
+`ifdef ENABLE_PC_INCREMENT
+ wire w_pc_increment; // write PC+1
+`endif
+wire w_sp; // write SP
+wire w_a; // write A (from ALU result)
+wire w_a_mem; // write A (from MEM read)
+wire w_b; // write B
+wire w_op; // write OPCODE (opcode cache)
+wire set_idim; // set IDIM
+wire clear_idim; // clear IDIM
+wire is_op_cached = (pc[31:2] == pc_cached) ? 1'b1 : 1'b0; // is opcode available?
+wire a_is_zero; // A == 0
+wire a_is_neg; // A[31] == 1
+wire busy; // busy signal to microcode sequencer (stalls cpu)
+
+reg [`MC_MEM_BITS-1:0] mc_pc; // microcode PC
+initial mc_pc <= `MC_ADDR_RESET-1;
+wire [`MC_BITS-1:0] mc_op; // current microcode operation
+
+// memory addr / write ports
+assign mem_addr = (sel_addr == `SEL_ADDR_SP) ? sp :
+ (sel_addr == `SEL_ADDR_A) ? a :
+ (sel_addr == `SEL_ADDR_B) ? b : pc;
+assign mem_data_write_int = a; // only A can be written to memory
+
+// ------- alu instantiation -------
+wire [31:0] alu_a;
+wire [31:0] alu_b;
+wire [31:0] alu_r;
+wire [`ALU_OP_WIDTH-1:0] alu_op;
+wire alu_done;
+
+// alu inputs multiplexors
+// constant in microcode is sign extended (in order to implement substractions like adds)
+assign alu_a = (sel_read == `SEL_READ_DATA) ? mem_data_read_int : mem_addr;
+assign alu_b = (sel_alu == `SEL_ALU_MC_CONST) ? { {25{mc_op[`P_ADDR+6]}} , mc_op[`P_ADDR+6:`P_ADDR] } : // most priority
+ (sel_alu == `SEL_ALU_A) ? a :
+ (sel_alu == `SEL_ALU_B) ? b : { {24{1'b0}} , opcode }; // `SEL_ALU_OPCODE is less priority
+
+zpu_core_alu alu(
+ .alu_a(alu_a),
+ .alu_b(alu_b),
+ .alu_r(alu_r),
+ .alu_op(alu_op),
+ .flag_idim(idim),
+ .clk(clk),
+ .done(alu_done)
+);
+
+// -------- pc : program counter --------
+always @(posedge clk)
+begin
+ if(w_pc) pc <= alu_r;
+`ifdef ENABLE_PC_INCREMENT // microcode optimization
+ else if(w_pc_increment) pc <= pc + 1; // usually pc=pc+1
+`endif
+end
+
+// -------- sp : stack pointer --------
+always @(posedge clk)
+begin
+ if(w_sp) sp <= alu_r;
+end
+
+// -------- a : acumulator register ---------
+always @(posedge clk)
+begin
+ if(w_a) a <= alu_r;
+ else if(w_a_mem) a <= mem_data_read_int;
+end
+
+// alu results over a register instead of alu result
+// in order to improve speed
+assign a_is_zero = (a == 0);
+assign a_is_neg = a[31];
+
+// -------- b : auxiliary register ---------
+always @(posedge clk)
+begin
+ if(w_b) b <= alu_r;
+end
+
+// -------- opcode and opcode_cache --------
+always @(posedge clk)
+begin
+ if(w_op)
+ begin
+ opcode_cache <= alu_r; // store all opcodes in the word
+ pc_cached <= pc[31:2]; // store PC address of cached opcodes
+ end
+end
+
+// -------- opcode : based on pc[1:0] ---------
+always @(sel_opcode or opcode_cache) // select current opcode from
+begin // the cached opcode word
+ case(sel_opcode)
+ 0 : opcode <= opcode_cache[31:24];
+ 1 : opcode <= opcode_cache[23:16];
+ 2 : opcode <= opcode_cache[15:8];
+ 3 : opcode <= opcode_cache[7:0];
+ endcase
+end
+
+// ------- idim : immediate opcode handling ----------
+always @(posedge clk)
+begin
+ if(set_idim) idim <= 1'b1;
+ else if(clear_idim) idim <= 1'b0;
+end
+
+`ifdef ENABLE_CPU_INTERRUPTS
+// ------ on interrupt status bit -----
+always @(posedge clk)
+begin
+ if(reset | exit_interrupt) on_interrupt <= 1'b0;
+ else if(enter_interrupt) on_interrupt <= 1'b1;
+end
+`endif
+
+// ------ microcode execution unit --------
+assign sel_read = mc_op[`P_SEL_READ]; // map datapath signals with microcode program bits
+assign sel_alu = mc_op[`P_SEL_ALU+1:`P_SEL_ALU];
+assign sel_addr = mc_op[`P_SEL_ADDR+1:`P_SEL_ADDR];
+assign alu_op = mc_op[`P_ALU+3:`P_ALU];
+assign w_sp = mc_op[`P_W_SP] & ~busy;
+assign w_pc = mc_op[`P_W_PC] & ~busy;
+assign w_a = mc_op[`P_W_A] & ~busy;
+assign w_a_mem = mc_op[`P_W_A_MEM] & ~busy;
+assign w_b = mc_op[`P_W_B] & ~busy;
+assign w_op = mc_op[`P_W_OPCODE] & ~busy;
+assign mem_read = mc_op[`P_MEM_R];
+assign mem_write = mc_op[`P_MEM_W];
+assign set_idim = mc_op[`P_SET_IDIM] & ~busy;
+assign clear_idim= mc_op[`P_CLEAR_IDIM] & ~busy;
+`ifdef ENABLE_BYTE_SELECT
+assign byte_op = mc_op[`P_BYTE];
+assign halfw_op = mc_op[`P_HALFWORD];
+`endif
+`ifdef ENABLE_PC_INCREMENT
+ assign w_pc_increment = mc_op[`P_PC_INCREMENT] & ~busy;
+`endif
+`ifdef ENABLE_CPU_INTERRUPTS
+ assign exit_interrupt = mc_op[`P_EXIT_INT] & ~busy;
+ assign enter_interrupt = mc_op[`P_ENTER_INT] & ~busy;
+`endif
+
+wire cond_op_not_cached = mc_op[`P_OP_NOT_CACHED]; // conditional: true if opcode not cached
+wire cond_a_zero = mc_op[`P_A_ZERO]; // conditional: true if A is zero
+wire cond_a_neg = mc_op[`P_A_NEG]; // conditional: true if A is negative
+wire decode = mc_op[`P_DECODE]; // decode means jumps to apropiate microcode based on zpu opcode
+wire branch = mc_op[`P_BRANCH]; // unconditional jump inside microcode
+
+wire [`MC_MEM_BITS-1:0] mc_goto = { mc_op[`P_ADDR+6:`P_ADDR], 2'b00 }; // microcode goto (goto = high 7 bits)
+wire [`MC_MEM_BITS-1:0] mc_entry = { opcode[6:0], 2'b00 }; // microcode entry point for opcode
+reg [`MC_MEM_BITS-1:0] next_mc_pc; // next microcode operation to be executed
+initial next_mc_pc <= `MC_ADDR_RESET-1;
+
+wire cond_branch = (cond_op_not_cached & ~is_op_cached) | // sum of all conditionals
+ (cond_a_zero & a_is_zero) |
+ (cond_a_neg & a_is_neg);
+
+assign busy = ((mem_read | mem_write) & ~mem_done) | ~alu_done; // busy signal for microcode sequencer
+
+// ------- handle interrupts ---------
+`ifdef ENABLE_CPU_INTERRUPTS
+always @(posedge clk)
+begin
+ if(reset | on_interrupt) int_requested <= 0;
+ else if(interrupt & ~on_interrupt & ~int_requested) int_requested <= 1; // interrupt requested
+end
+`endif
+
+// ----- calculate next microcode address (next, decode, branch, specific opcode, etc.) -----
+always @(reset or mc_pc or mc_goto or opcode[7:4] or idim or
+ decode or branch or cond_branch or mc_entry or busy
+`ifdef ENABLE_CPU_INTERRUPTS
+ or int_requested
+`endif
+)
+begin
+ // default, next microcode instruction
+ next_mc_pc <= mc_pc + 1;
+ if(reset) next_mc_pc <= `MC_ADDR_RESET;
+ else if(~busy)
+ begin
+ // get next microcode instruction
+ if(branch | cond_branch) next_mc_pc <= mc_goto;
+ else if(decode) // decode: entry point of a new zpu opcode
+ begin
+`ifdef ENABLE_CPU_INTERRUPTS
+ if(int_requested & ~idim) next_mc_pc <= `MC_ADDR_INTERRUPT; // microde to enter interrupt mode
+ else
+`endif
+ if(opcode[7] == `OP_IM) next_mc_pc <= (idim ? `MC_ADDR_IM_IDIM : `MC_ADDR_IM_NOIDIM);
+ else if(opcode[7:5] == `OP_STORESP) next_mc_pc <= `MC_ADDR_STORESP;
+ else if(opcode[7:5] == `OP_LOADSP) next_mc_pc <= `MC_ADDR_LOADSP;
+ else if(opcode[7:4] == `OP_ADDSP) next_mc_pc <= `MC_ADDR_ADDSP;
+ else next_mc_pc <= mc_entry; // includes EMULATE opcodes
+ end
+ end
+ else next_mc_pc <= mc_pc; // in case of cpu stalled (busy=1)
+end
+
+// set microcode program counter
+always @(posedge clk) mc_pc <= next_mc_pc;
+
+// ----- microcode program ------
+zpu_core_rom microcode (
+ .addr(next_mc_pc),
+ .data(mc_op),
+ .clk(clk)
+);
+
+// -------------- ZPU debugger --------------------
+`ifdef ZPU_CORE_DEBUG
+//synthesis translate_off
+// ---- register operation dump ----
+always @(posedge clk)
+begin
+ if(~reset)
+ begin
+ if(w_pc) $display("zpu_core: set PC=0x%h", alu.alu_r);
+`ifdef ENABLE_PC_INCREMENT
+ if(w_pc_increment) $display("zpu_core: set PC=0x%h (PC+1)", pc);
+`endif
+ if(w_sp) $display("zpu_core: set SP=0x%h", alu.alu_r);
+ if(w_a) $display("zpu_core: set A=0x%h", alu.alu_r);
+ if(w_a_mem) $display("zpu_core: set A=0x%h (from MEM)", mem_data_read_int);
+ if(w_b) $display("zpu_core: set B=0x%h", alu.alu_r);
+ if(w_op & ~is_op_cached) $display("zpu_core: set opcode_cache=0x%h, pc_cached=0x%h", alu.alu_r, {pc[31:2], 2'b0});
+`ifdef ENABLE_CPU_INTERRUPTS
+ if(~busy & mc_pc == `MC_ADDR_INTERRUPT) $display("zpu_core: ***** ENTERING INTERRUPT MICROCODE ******");
+ if(~busy & exit_interrupt) $display("zpu_core: ***** INTERRUPT FLAG CLEARED *****");
+ if(~busy & enter_interrupt) $display("zpu_core: ***** INTERRUPT FLAG SET *****");
+`endif
+ if(set_idim & ~idim) $display("zpu_core: IDIM=1");
+ if(clear_idim & idim) $display("zpu_core: IDIM=0");
+
+// ---- microcode debug ----
+`ifdef ZPU_CORE_DEBUG_MICROCODE
+ if(~busy)
+ begin
+ $display("zpu_core: mc_op[%d]=0b%b", mc_pc, mc_op);
+ if(branch) $display("zpu_core: microcode: branch=%d", mc_goto);
+ if(cond_branch) $display("zpu_core: microcode: CONDITION branch=%d", mc_goto);
+ if(decode) $display("zpu_core: decoding opcode=0x%h (0b%b) : branch to=%d ", opcode, opcode, mc_entry);
+ end
+ else $display("zpu_core: busy");
+`endif
+
+// ---- cpu abort in case of unaligned memory access ---
+`ifdef ASSERT_NON_ALIGNMENT
+ /* unaligned word access (except PC) */
+ if(sel_addr != `SEL_ADDR_PC & mem_addr[1:0] != 2'b00 & (mem_read | mem_write) & !byte_op & !halfw_op)
+ begin
+ $display("zpu_core: unaligned word operation at addr=0x%x", mem_addr);
+ $finish;
+ end
+
+ /* unaligned halfword access */
+ if(mem_addr[0] & (mem_read | mem_write) & !byte_op & halfw_op)
+ begin
+ $display("zpu_core: unaligned halfword operation at addr=0x%x", mem_addr);
+ $finish;
+ end
+`endif
+
+ end
+end
+
+// ----- opcode dissasembler ------
+always @(posedge clk)
+begin
+if(~busy)
+case(mc_pc)
+0 : begin
+ $display("zpu_core: ------ breakpoint ------");
+ $finish;
+ end
+4 : $display("zpu_core: ------ shiftleft ------");
+8 : $display("zpu_core: ------ pushsp ------");
+12 : $display("zpu_core: ------ popint ------");
+16 : $display("zpu_core: ------ poppc ------");
+20 : $display("zpu_core: ------ add ------");
+24 : $display("zpu_core: ------ and ------");
+28 : $display("zpu_core: ------ or ------");
+32 : $display("zpu_core: ------ load ------");
+36 : $display("zpu_core: ------ not ------");
+40 : $display("zpu_core: ------ flip ------");
+44 : $display("zpu_core: ------ nop ------");
+48 : $display("zpu_core: ------ store ------");
+52 : $display("zpu_core: ------ popsp ------");
+56 : $display("zpu_core: ------ ipsum ------");
+60 : $display("zpu_core: ------ sncpy ------");
+
+`MC_ADDR_IM_NOIDIM : $display("zpu_core: ------ im 0x%h (1st) ------", opcode[6:0] );
+`MC_ADDR_IM_IDIM : $display("zpu_core: ------ im 0x%h (cont) ------", opcode[6:0] );
+`MC_ADDR_STORESP : $display("zpu_core: ------ storesp 0x%h ------", { ~opcode[4], opcode[3:0], 2'b0 } );
+`MC_ADDR_LOADSP : $display("zpu_core: ------ loadsp 0x%h ------", { ~opcode[4], opcode[3:0], 2'b0 } );
+`MC_ADDR_ADDSP : $display("zpu_core: ------ addsp 0x%h ------", { ~opcode[4], opcode[3:0], 2'b0 } );
+`MC_ADDR_EMULATE : $display("zpu_core: ------ emulate 0x%h ------", b[2:0]); // opcode[5:0] );
+
+128 : $display("zpu_core: ------ mcpy ------");
+132 : $display("zpu_core: ------ mset ------");
+136 : $display("zpu_core: ------ loadh ------");
+140 : $display("zpu_core: ------ storeh ------");
+144 : $display("zpu_core: ------ lessthan ------");
+148 : $display("zpu_core: ------ lessthanorequal ------");
+152 : $display("zpu_core: ------ ulessthan ------");
+156 : $display("zpu_core: ------ ulessthanorequal ------");
+160 : $display("zpu_core: ------ swap ------");
+164 : $display("zpu_core: ------ mult ------");
+168 : $display("zpu_core: ------ lshiftright ------");
+172 : $display("zpu_core: ------ ashiftleft ------");
+176 : $display("zpu_core: ------ ashiftright ------");
+180 : $display("zpu_core: ------ call ------");
+184 : $display("zpu_core: ------ eq ------");
+188 : $display("zpu_core: ------ neq ------");
+192 : $display("zpu_core: ------ neg ------");
+196 : $display("zpu_core: ------ sub ------");
+200 : $display("zpu_core: ------ xor ------");
+204 : $display("zpu_core: ------ loadb ------");
+208 : $display("zpu_core: ------ storeb ------");
+212 : $display("zpu_core: ------ div ------");
+216 : $display("zpu_core: ------ mod ------");
+220 : $display("zpu_core: ------ eqbranch ------");
+224 : $display("zpu_core: ------ neqbranch ------");
+228 : $display("zpu_core: ------ poppcrel ------");
+232 : $display("zpu_core: ------ config ------");
+236 : $display("zpu_core: ------ pushpc ------");
+240 : $display("zpu_core: ------ syscall_emulate ------");
+244 : $display("zpu_core: ------ pushspadd ------");
+248 : $display("zpu_core: ------ halfmult ------");
+252 : $display("zpu_core: ------ callpcrel ------");
+//default : $display("zpu_core: mc_pc=0x%h", decode_mcpc);
+endcase
+end
+//synthesis translate_on
+`endif
+endmodule
+
+// --------- ZPU CORE ALU UNIT ---------------
+module zpu_core_alu(
+ alu_a, // parameter A
+ alu_b, // parameter B
+ alu_r, // computed result
+ flag_idim, // for IMM alu op
+ alu_op, // ALU operation
+ clk, // clock for syncronous multicycle operations
+ done // done signal for alu operation
+);
+
+input [31:0] alu_a;
+input [31:0] alu_b;
+input [`ALU_OP_WIDTH-1:0] alu_op;
+input flag_idim;
+output [31:0] alu_r;
+input clk;
+output done;
+
+wire [31:0] alu_a;
+wire [31:0] alu_b;
+wire [`ALU_OP_WIDTH-1:0] alu_op;
+wire flag_idim;
+reg [31:0] alu_r;
+wire clk;
+reg done;
+
+`ifdef ENABLE_MULT
+// implement 32 bit pipeline multiplier
+reg mul_running;
+reg [2:0] mul_counter;
+wire mul_done = (mul_counter == 3);
+reg [31:0] mul_result, mul_tmp1;
+reg [31:0] a_in, b_in;
+
+always@(posedge clk)
+begin
+ a_in <= 0;
+ b_in <= 0;
+ mul_tmp1 <= 0;
+ mul_result <= 0;
+ mul_counter <= 0;
+ if(mul_running)
+ begin // infer pipeline multiplier
+ a_in <= alu_a;
+ b_in <= alu_b;
+ mul_tmp1 <= a_in * b_in;
+ mul_result <= mul_tmp1;
+ mul_counter <= mul_counter + 1;
+ end
+end
+`endif
+
+`ifdef ENABLE_DIV
+// implement 32 bit divider
+// Unsigned/Signed division based on Patterson and Hennessy's algorithm.
+// Description: Calculates quotient. The "sign" input determines whether
+// signs (two's complement) should be taken into consideration.
+// references: http://www.ece.lsu.edu/ee3755/2002/l07.html
+reg [63:0] qr;
+wire [33:0] diff;
+wire [31:0] quotient;
+wire [31:0] dividend;
+wire [31:0] divider;
+reg [6:0] bit;
+wire div_done;
+reg div_running;
+reg divide_sign;
+reg negative_output;
+
+assign div_done = !bit;
+assign diff = qr[63:31] - {1'b0, divider};
+assign quotient = (!negative_output) ? qr[31:0] : ~qr[31:0] + 1'b1;
+assign dividend = (!divide_sign || !alu_a[31]) ? alu_a : ~alu_a + 1'b1;
+assign divider = (!divide_sign || !alu_b[31]) ? alu_b : ~alu_b + 1'b1;
+
+always@(posedge clk)
+begin
+ bit <= 7'b1_000000; // divider stopped
+ if(div_running)
+ begin
+ if(bit[6]) // divider started: initialize registers
+ begin
+ bit <= 7'd32;
+ qr <= { 32'd0, dividend };
+ negative_output <= divide_sign && ((alu_b[31] && !alu_a[31]) || (!alu_b[31] && alu_a[31]));
+ end
+ else // step by step divide
+ begin
+ if( diff[32] ) qr <= { qr[62:0], 1'd0 };
+ else qr <= { diff[31:0], qr[30:0], 1'd1 };
+ bit <= bit - 1;
+ end
+ end
+end
+`endif
+
+`ifdef ENABLE_BARREL
+// implement 32 bit barrel shift
+// alu_b[6] == 1 ? left(only arithmetic) : right
+// alu_b[5] == 1 ? logical : arithmetic
+reg bs_running;
+reg [31:0] bs_result;
+reg [4:0] bs_counter; // 5 bits
+wire bs_left = alu_b[6];
+wire bs_logical = alu_b[5];
+wire [4:0] bs_moves = alu_b[4:0];
+wire bs_done = (bs_counter == bs_moves);
+
+always @(posedge clk)
+begin
+ bs_counter <= 0;
+ bs_result <= alu_a;
+ if(bs_running)
+ begin
+ if(bs_left) bs_result <= { bs_result[30:0], 1'b0 }; // shift left
+ else
+ begin
+ if(bs_logical) bs_result <= { 1'b0, bs_result[31:1] }; // shift logical right
+ else bs_result <= { bs_result[31], bs_result[31], bs_result[30:1] };// shift arithmetic right
+ end
+ bs_counter <= bs_counter + 1;
+ end
+end
+`endif
+
+// ----- alu add/sub -----
+reg [31:0] alu_b_tmp;
+always @(alu_b or alu_op)
+begin
+ alu_b_tmp <= alu_b; // by default, ALU_B as is
+ if(alu_op == `ALU_PLUS_OFFSET) alu_b_tmp <= { {25{1'b0}}, ~alu_b[4], alu_b[3:0], 2'b0 }; // ALU_B is an offset if ALU_PLUS_OFFSET operation
+end
+
+reg [31:0] alu_r_addsub; // compute R=A+B or A-B based on opcode (ALU_PLUSxx / ALU_SUB-CMP)
+always @(alu_a or alu_b_tmp or alu_op)
+begin
+`ifdef ENABLE_CMP
+ if(alu_op == `ALU_CMP_SIGNED || alu_op == `ALU_CMP_UNSIGNED) // in case of sub or cmp --> operation is '-'
+ begin
+ alu_r_addsub <= alu_a - alu_b_tmp;
+ end
+ else
+`endif
+ begin
+ alu_r_addsub <= alu_a + alu_b_tmp; // by default '+' operation
+ end
+end
+
+`ifdef ENABLE_CMP
+// handle overflow/underflow exceptions in ALU_CMP_SIGNED
+reg cmp_exception;
+always @(alu_a[31] or alu_b[31] or alu_r_addsub[31])
+begin
+ cmp_exception <= 0;
+ if( (alu_a[31] == 0 && alu_b[31] == 1 && alu_r_addsub[31] == 1) ||
+ (alu_a[31] == 1 && alu_b[31] == 0 && alu_r_addsub[31] == 0) ) cmp_exception <= 1;
+end
+`endif
+
+// ----- alu operation selection -----
+always @(alu_a or alu_b or alu_op or flag_idim or alu_r_addsub
+`ifdef ENABLE_CMP
+ or cmp_exception
+`endif
+`ifdef ENABLE_MULT
+ or mul_done or mul_result
+`endif
+`ifdef ENABLE_BARREL
+ or bs_done or bs_result
+`endif
+`ifdef ENABLE_DIV
+ or div_done or div_result
+`endif
+)
+begin
+ done <= 1; // default alu operations are 1 cycle
+`ifdef ENABLE_MULT
+ mul_running <= 0;
+`endif
+`ifdef ENABLE_BARREL
+ bs_running <= 0;
+`endif
+`ifdef ENABLE_DIV
+ div_running <= 0;
+`endif
+ alu_r <= alu_r_addsub; // ALU_PLUS, ALU_PLUS_OFFSET, ALU_SUB and part of ALU_CMP
+ case(alu_op)
+ `ALU_NOP : alu_r <= alu_a;
+ `ALU_NOP_B : alu_r <= alu_b;
+ `ALU_AND : alu_r <= alu_a & alu_b;
+ `ALU_OR : alu_r <= alu_a | alu_b;
+ `ALU_NOT : alu_r <= ~alu_a;
+ `ALU_FLIP : alu_r <= { alu_a[0], alu_a[1], alu_a[2], alu_a[3], alu_a[4], alu_a[5], alu_a[6], alu_a[7],
+ alu_a[8],alu_a[9],alu_a[10],alu_a[11],alu_a[12],alu_a[13],alu_a[14],alu_a[15],
+ alu_a[16],alu_a[17],alu_a[18],alu_a[19],alu_a[20],alu_a[21],alu_a[22],alu_a[23],
+ alu_a[24],alu_a[25],alu_a[26],alu_a[27],alu_a[28],alu_a[29],alu_a[30],alu_a[31] };
+ `ALU_IM : if(flag_idim) alu_r <= { alu_a[24:0], alu_b[6:0] };
+ else alu_r <= { {25{alu_b[6]}}, alu_b[6:0] };
+`ifdef ENABLE_CMP
+ `ALU_CMP_UNSIGNED:if( (alu_a[31] == alu_b[31] && cmp_exception) ||
+ (alu_a[31] != alu_b[31] && ~cmp_exception) )
+ begin
+ alu_r[31] <= ~alu_r_addsub[31];
+ end
+ `ALU_CMP_SIGNED : if(cmp_exception)
+ begin
+ alu_r[31] <= ~alu_r_addsub[31];
+ end
+`endif
+`ifdef ENABLE_XOR
+ `ALU_XOR : alu_r <= alu_a ^ alu_b;
+`endif
+`ifdef ENABLE_A_SHIFT
+ `ALU_A_SHIFT_RIGHT: alu_r <= { alu_a[31], alu_a[31], alu_a[30:1] }; // arithmetic shift left
+`endif
+`ifdef ENABLE_MULT
+ `ALU_MULT : begin
+ mul_running <= ~mul_done;
+ done <= mul_done;
+ alu_r <= mul_result;
+ end
+`endif
+`ifdef ENABLE_BARREL
+ `ALU_BARREL : begin
+ bs_running <= ~bs_done;
+ done <= bs_done;
+ alu_r <= bs_result;
+ end
+`endif
+`ifdef ENABLE_DIV
+ `ALU_DIV : begin
+ div_running<= ~div_done;
+ done <= div_done;
+ alu_r <= quotient;
+ end
+ `ALU_MOD : begin
+ div_running<= ~div_done;
+ done <= div_done;
+ alu_r <= qr[31:0];
+ end
+`endif
+ endcase
+end
+
+endmodule
diff --git a/zpu/hdl/avalanche/core/zpu_core_defines.v b/zpu/hdl/avalanche/core/zpu_core_defines.v
new file mode 100644
index 0000000..228f46b
--- /dev/null
+++ b/zpu/hdl/avalanche/core/zpu_core_defines.v
@@ -0,0 +1,322 @@
+/* MODULE: zpu_core_defines
+ DESCRIPTION: Contains ZPU parameters and other cpu related definitions
+ AUTHOR: Antonio J. Anton (aj <at> anro-ingenieros.com)
+
+REVISION HISTORY:
+Revision 1.0, 14/09/2009
+Initial public release
+
+COPYRIGHT:
+Copyright (c) 2009 Antonio J. Anton
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.*/
+
+/* --------------- ISA DOCUMENTATION ------------------
+ stack: top of stack = sp, mem[sp]=valid data
+ push: sp=sp-1, then mem[sp]=data
+ pop: data=mem[sp], then sp=sp+1
+
+ immediates: any opcode instead of im sets idim=0
+
+ MNEMONIC OPCODE HEX OPERATION
+- im x 1_xxxxxxx if(~idim) { idim=1; sp=sp-1; mem[sp]={{25{b[6]}},b[6:0]} }
+ else { idim=1; mem[sp]={mem[sp][24:0], b[6:0]} }
+- emulate x 001_xxxxx sp=sp-1; mem[sp]=pc+1; pc=mem[@VECTOR_EMULATE + <b>]; fetch (used only by microcode)
+- storesp x 010_xxxxx mem[sp+x<<2] = mem[sp]; sp=sp+1
+- loadsp x 011_xxxxx mem[sp-1] = mem [sp+x<<2]; sp=sp-1
+- addsp x 0001_xxxx (1x) mem[sp] = mem[sp]+mem[sp+x<<2]
+
+- breakpoint 0000_0000 (00) call exception vector
+ shiftleft 0000_0001 (01)
+- pushsp 0000_0010 (02) mem[sp-1] = sp; sp = sp - 1
+- popint 0000_0011 (03) pc=mem[sp]; sp = sp + 1 ; fetch ; decode ; clear_interrupt_flag
+- poppc 0000_0100 (04) pc=mem[sp]; sp = sp + 1
+- add 0000_0101 (05) mem[sp+1] = mem[sp+1] + mem[sp]; sp = sp + 1
+- and 0000_0110 (06) mem[sp+1] = mem[sp+1] & mem[sp]; sp = sp + 1
+- or 0000_0111 (07) mem[sp+1] = mem[sp+1] | mem[sp]; sp = sp + 1
+- load 0000_1000 (08) mem[sp] = mem[ mem[sp] ]
+- not 0000_1001 (09) mem[sp] = ~mem[sp]
+- flip 0000_1010 (0a) mem[sp] = flip(mem[sp])
+- nop 0000_1011 (0b) -
+- store 0000_1100 (0c) mem[mem[sp]] = mem[sp+1]; sp = sp + 2
+- popsp 0000_1101 (0d) sp = mem[sp]
+ compare 0000_1110 (0e) ???? --> opcode recycled (see below)
+ popint 0000_1111 (0f) duplicated of 0x03 ????? --> opcode recycled (see below)
+
+- ipsum 0000_1110 (0e) c=mem[sp],s=mem[sp+1]; sum=0; while(c-->0) {sum+=halfword(mem[s],s);s+=2}; sp=sp+1; mem[sp]=sum (overwrites mem[0] & mem[4] words)
+- sncpy 0000_1111 (0f) c=mem[sp],d=mem[sp+1],s=mem[sp+2]; while( *(char*)s != 0 && c>0 ) {*((char*)d++)=*((char*)s++));c--}; sp=sp+3 (overwrites mem[0] & mem[4] words)
+- wcpy 001_00000 (20) c=mem[sp],d=mem[sp+1],s=mem[sp+2]; while(c-->0) mem[d++]=mem[s++]; sp=sp+3 (overwrites mem[0] & mem[4] words)
+- wset 001_00001 (21) v=mem[sp],c=mem[sp+1],d=mem[sp+2]; while(c-->0) mem[d++]=v; sp=sp+3 (overwrites mem[0] & mem[4] words)
+
+- loadh 001_00010 (22) mem[sp] = halfword[ mem[sp] ]
+- storeh 001_00011 (23) halfword[mem[sp]] = (mem[sp+1] & 0xFFFF); sp = sp + 2
+- lessthan 001_00100 (24) (mem[sp]-mem[sp+1]) < 0 ? mem[sp+1]=1 : mem[sp+1]=0; sp = sp + 1
+- lessthanorequal 001_00101 (25) (mem[sp]-mem[sp+1]) <= 0 ? mem[sp+1]=1 : mem[sp+1]=0; sp = sp + 1
+- ulessthan 001_00110 (26) (unsigned(mem[sp])-unsigned(mem[sp+1])) < 0 ? mem[sp+1]=1 : mem[sp+1]=0; sp = sp + 1
+- ulessthanorequal 001_00111 (27) (unsigned(mem[sp])-unsigned(mem[sp+1])) <= 0 || == 0 ? mem[sp+1]=1 : mem[sp+1]=0; sp = sp + 1
+ swap 001_01000 (28)
+- mult 001_01001 (29) mem[sp+1] = mem[sp+1] * mem[sp]; sp = sp + 1
+- lshiftright 001_01010 (2a) mem[sp+1] = mem[sp+1] >> (mem[sp] & 0x1f); sp = sp + 1
+- ashiftleft 001_01011 (2b) mem[sp+1] = mem[sp+1] << (mem[sp] & 0x1f); sp = sp + 1
+- ashiftright 001_01100 (2c) mem[sp+1] = mem[sp+1] signed>> (mem[sp] & 0x1f); sp = sp + 1
+- call 001_01101 (2d) a = mem[sp]; mem[sp]=pc + 1; pc = a
+- eq 001_01110 (2e) mem[sp+1] = (mem[sp] == mem[sp+1]) ? 1 : 0; sp = sp + 1
+- neq 001_01111 (2f) mem[sp+1] = (mem[sp] != mem[sp+1]) ? 1 : 0; sp = sp + 1
+- neg 001_10000 (30) mem[sp] = NOT(mem[sp])+1
+- sub 001_10001 (31) mem[sp+1]=mem[sp+1]-mem[sp]; sp=sp+1
+- xor 001_10010 (32) mem[sp+1]=mem[sp] ^ mem[sp+1]; sp=sp+1
+- loadb 001_10011 (33) mem[sp] = byte[ mem[sp] ]
+- storeb 001_10100 (34) byte[mem[sp]] = (mem[sp+1] & 0xFF); sp = sp + 2
+ div 001_10101 (35)
+ mod 001_10110 (36)
+- eqbranch 001_10111 (37) mem[sp+1] == 0 ? pc = pc + mem[sp]; sp = sp + 2
+- neqbranch 001_11000 (38) mem[sp+1] != 0 ? pc = pc + mem[sp]; sp = sp + 2
+- poppcrel 001_11001 (39) pc = pc + mem[sp]; sp = sp + 1
+ config 001_11010 (3a)
+- pushpc 001_11011 (3b) sp=sp-1; mem[sp]=pc
+ syscall 001_11100 (3c)
+- pushspadd 001_11101 (3d) mem[sp] = sp + (mem[sp] << 2)
+- halfmult 001_11110 (3e) mem[sp+1] = 16bits(mem[sp]) * 16bits(mem[sp+1]); sp = sp + 1
+- callpcrel 001_11111 (3f) a = mem[sp]; mem[sp]=pc+1; pc = pc + a;
+
+ gcc seems to be using only:
+
+ add, addsp, and, ashiftleft, ashiftright, call, callpcrel, div, eq, flip, im, lessthan,
+ lessthanorequal, loadb, loadh, load, loadsp, lshiftright, mod, mult, neg, neqbranch,
+ not, or, poppc, poppcrel, popsp, pushpc, pushspadd, pushsp, storeb, storeh, store, storesp,
+ sub, ulessthan, ulessthanorequal, xor
+
+ --------- memory access ----------------------------
+
+ data is stored in big-endian format into memory:
+ 00 MSB .. .. LSB
+ 05 .. .. .. ..
+
+ ---------------------------------------------------- */
+`define SP_START 32'h10 // after reset change in startup code
+`define EMULATION_VECTOR 32'h10 // table of emulated opcodes (interrupt & exception vectors plus up to 5 emulated opcodes)
+`define RESET_VECTOR 32'h20 // reset entry point (can be moved up to 0x3c as per emulation table needs)
+
+// ---- zpu core optimizations/features ----
+`define ZPU_CORE_DEBUG
+//`define ZPU_CORE_DEBUG_MICROCODE
+`define ASSERT_NON_ALIGNMENT /* abort cpu in case of non-aligned memory access (only simulation) */
+
+`define ENABLE_BYTE_SELECT /* allow byte / halfword memory accesses */
+`define ENABLE_CPU_INTERRUPTS /* enable interrupts to cpu */
+//`define ENABLE_PC_INCREMENT /* gain 1 clk per opcode but requires microcode changes ** not done at the moment ** */
+//`define ENABLE_A_SHIFT /* 1 bit arithmetic shift (right) mutual exclusive with barrel shift */
+//`define ENABLE_XOR /* 1 cycle x-or */
+//`define ENABLE_MULT /* 32 bit pipelined (3 stages) multiplier */
+//`define ENABLE_DIV /* 32 bit, up to 32 cycles serial divider */
+`define ENABLE_BARREL /* n bit logical & arithmetic shift mutual exclusive with 1 bit shift */
+`define ENABLE_CMP /* enable ALU_CMP_SIGNED and ALU_CMP_UNSIGNED */
+
+// ------- microcode zpu core datapath selectors --------
+`define SEL_READ_DATA 0
+`define SEL_READ_ADDR 1
+
+`define SEL_ALU_A 0
+`define SEL_ALU_OPCODE 1
+`define SEL_ALU_MC_CONST 2
+`define SEL_ALU_B 3
+
+`define SEL_ADDR_PC 0
+`define SEL_ADDR_SP 1
+`define SEL_ADDR_A 2
+`define SEL_ADDR_B 3
+
+`define ALU_OP_WIDTH 4 // alu operation is 4 bits
+
+`define ALU_NOP 0 // r = a
+`define ALU_NOP_B 1 // r = b
+`define ALU_PLUS 2 // r = a + b
+`define ALU_PLUS_OFFSET 3 // r = a + { 27'b0, ~b[4], b[3:0] }
+`define ALU_AND 4 // r = a AND b
+`define ALU_OR 5 // r = a OR b
+`define ALU_NOT 6 // r = NOT a
+`define ALU_FLIP 7 // r = FLIP a
+`define ALU_IM 8 // r = IDIM ? { a[24:0], b[6:0] } : { 25{b[6]}, b[6:0] }
+`ifdef ENABLE_CMP
+ `define ALU_CMP_UNSIGNED 9 // r = (unsigned)a - (unsigned)b (r[31] is overflow/underflow adjusted)
+ `define ALU_CMP_SIGNED 10 // r = (signed)a - (signed)b (r[31] is overflow/underflow adjusted)
+`endif
+`ifdef ENABLE_BARREL
+ `define ALU_BARREL 11 // r = a <<|>> b (logical, arithmetical)
+`endif
+`ifdef ENABLE_A_SHIFT
+ `define ALU_A_SHIFT_RIGHT 11 // r = { a[31], a[31], a[30:29] } = (signed)a >> 1
+`endif
+`ifdef ENABLE_XOR
+ `define ALU_XOR 12 // r = a XOR b
+`endif
+`ifdef ENABLE_MULT
+ `define ALU_MULT 13 // r = a * b
+`endif
+`ifdef ENABLE_DIV
+ `define ALU_DIV 14 // r = a / b
+ `define ALU_MOD 15 // r = a mod b
+`endif
+
+// ------- special zpu opcodes ------
+`define OP_NOP 8'b0000_1011 // default value for opcode cache on reset
+`define OP_IM 1'b1
+`define OP_EMULATE 3'b001
+`define OP_STORESP 3'b010
+`define OP_LOADSP 3'b011
+`define OP_ADDSP 4'b0001
+
+// ------- microcode memory settings ------
+`define MC_MEM_BITS 9 // 512 microcode operations
+`define MC_BITS 36 // microcode opcode width
+
+// ------- microcode labels for opcode execution -------
+// based on microcode program
+`define MC_ADDR_IM_NOIDIM 488
+`define MC_ADDR_IM_IDIM 491
+`define MC_ADDR_STORESP 493
+`define MC_ADDR_LOADSP 496
+`define MC_ADDR_ADDSP 500
+`define MC_ADDR_EMULATE 504
+`define MC_ADDR_INTERRUPT 484
+`define MC_ADDR_FETCH_NEXT 480
+`define MC_ADDR_FETCH 476
+`define MC_ADDR_RESET 474
+
+// ---------- microcode settings --------------------
+`define P_SEL_READ 0 // alu-A multiplexor between data-in and addr-out (1 bit)
+`define P_SEL_ALU 1 // alu-B multiplexor between a, b, mc_const or opcode (2 bits)
+`define P_SEL_ADDR 3 // addr-out multiplexor between sp, pc, a, b (2 bits)
+`define P_ALU 5 // alu operation (4 bits)
+`define P_W_SP 9 // write sp (from alu-out)
+`define P_W_PC 10 // write pc (from alu-out)
+`define P_W_A 11 // write a (from alu-out)
+`define P_W_B 12 // write b (from alu-out)
+`define P_SET_IDIM 13 // set idim flag
+`define P_CLEAR_IDIM 14 // clear idim flag
+`define P_W_OPCODE 15 // write opcode (from alu-out) : check if can be written directly from data-in
+`define P_DECODE 16 // jump to microcode entry point based on current opcode
+`define P_MEM_R 17 // request memory read
+`define P_MEM_W 18 // request memory write
+`define P_ADDR 19 // microcode address (7 bits (granularity is 4 words)) or constant to be used at microcode level
+`define P_BRANCH 26 // microcode inconditional branch to address
+`define P_OP_NOT_CACHED 27 // microcode branch if byte[pc] is not cached at opcode
+`define P_A_ZERO 28 // microcode branch if a is zero
+`define P_A_NEG 29 // microcode branch if a is negative a[31]=1
+`define P_W_A_MEM 30 // write a directly from data-in (alu datapath is free to perform any other operation in parallel)
+`ifdef ENABLE_BYTE_SELECT
+ `define P_BYTE 31 // byte memory operation
+ `define P_HALFWORD 32 // half word memory operation
+`endif
+`ifdef ENABLE_PC_INCREMENT
+ `define P_PC_INCREMENT 33 // autoincrement PC bypassing ALU (1 clock gain per opcode) : not implemented at microcode level
+`endif
+`ifdef ENABLE_CPU_INTERRUPTS
+ `define P_EXIT_INT 34 // clear interrupt flag (exit from interrupt)
+ `define P_ENTER_INT 35 // set interrupt flag (enter interrupt)
+`endif
+
+`define MC_SEL_READ_DATA (`SEL_READ_DATA << `P_SEL_READ) // 1 bit
+`define MC_SEL_READ_ADDR (`SEL_READ_ADDR << `P_SEL_READ)
+
+`define MC_SEL_ALU_A (`SEL_ALU_A << `P_SEL_ALU) // 2 bit
+`define MC_SEL_ALU_OPCODE (`SEL_ALU_OPCODE << `P_SEL_ALU)
+`define MC_SEL_ALU_MC_CONST (`SEL_ALU_MC_CONST << `P_SEL_ALU)
+`define MC_SEL_ALU_B (`SEL_ALU_B << `P_SEL_ALU)
+
+`define MC_SEL_ADDR_PC (`SEL_ADDR_PC << `P_SEL_ADDR) // 2 bits
+`define MC_SEL_ADDR_SP (`SEL_ADDR_SP << `P_SEL_ADDR)
+`define MC_SEL_ADDR_A (`SEL_ADDR_A << `P_SEL_ADDR)
+`define MC_SEL_ADDR_B (`SEL_ADDR_B << `P_SEL_ADDR)
+
+`define MC_ALU_NOP (`ALU_NOP << `P_ALU) // 4 bits
+`define MC_ALU_NOP_B (`ALU_NOP_B << `P_ALU)
+`define MC_ALU_PLUS (`ALU_PLUS << `P_ALU)
+`define MC_ALU_AND (`ALU_AND << `P_ALU)
+`define MC_ALU_OR (`ALU_OR << `P_ALU)
+`define MC_ALU_NOT (`ALU_NOT << `P_ALU)
+`define MC_ALU_FLIP (`ALU_FLIP << `P_ALU)
+`define MC_ALU_IM (`ALU_IM << `P_ALU)
+`define MC_ALU_PLUS_OFFSET (`ALU_PLUS_OFFSET << `P_ALU)
+`ifdef ENABLE_CMP
+ `define MC_ALU_CMP_SIGNED (`ALU_CMP_SIGNED << `P_ALU)
+ `define MC_ALU_CMP_UNSIGNED (`ALU_CMP_UNSIGNED << `P_ALU)
+`endif
+`ifdef ENABLE_XOR
+ `define MC_ALU_XOR (`ALU_XOR << `P_ALU)
+`endif
+`ifdef ENABLE_A_SHIFT
+ `define MC_ALU_A_SHIFT_RIGHT (`ALU_A_SHIFT_RIGHT << `P_ALU)
+`endif
+`ifdef ENABLE_MULT
+ `define MC_ALU_MULT (`ALU_MULT << `P_ALU)
+`endif
+`ifdef ENABLE_DIV
+ `define MC_ALU_DIV (`ALU_DIV << `P_ALU)
+ `define MC_ALU_MOD (`ALU_MOD << `P_ALU)
+`endif
+`ifdef ENABLE_BARREL
+ `define MC_ALU_BARREL (`ALU_BARREL << `P_ALU)
+`endif
+
+`define MC_W_SP (1 << `P_W_SP)
+`define MC_W_PC (1 << `P_W_PC)
+`define MC_W_A (1 << `P_W_A)
+`define MC_W_A_MEM (1 << `P_W_A_MEM)
+`define MC_W_B (1 << `P_W_B)
+`define MC_W_OPCODE (1 << `P_W_OPCODE)
+`define MC_SET_IDIM (1 << `P_SET_IDIM)
+`define MC_CLEAR_IDIM (1 << `P_CLEAR_IDIM)
+`ifdef ENABLE_BYTE_SELECT
+ `define MC_BYTE (1 << `P_BYTE)
+ `define MC_HALFWORD (1 << `P_HALFWORD)
+`endif
+`ifdef ENABLE_PC_INCREMENT
+ `define MC_PC_INCREMENT (1 << `P_PC_INCREMENT)
+`endif
+`ifdef ENABLE_CPU_INTERRUPTS
+ `define MC_EXIT_INTERRUPT (1 << `P_EXIT_INT)
+ `define MC_ENTER_INTERRUPT (1 << `P_ENTER_INT)
+`endif
+
+`define MC_MEM_R (1 << `P_MEM_R)
+`define MC_MEM_W (1 << `P_MEM_W)
+
+`define MC_DECODE (1 << `P_DECODE)
+`define MC_BRANCH (1 << `P_BRANCH)
+`define MC_BRANCHIF_OP_NOT_CACHED (1 << `P_OP_NOT_CACHED)
+`define MC_BRANCHIF_A_ZERO (1 << `P_A_ZERO)
+`define MC_BRANCHIF_A_NEG (1 << `P_A_NEG)
+
+// microcode common operations
+
+`define MC_ADDR_FETCH_OP ( (`MC_ADDR_FETCH >> 2) << `P_ADDR) // fetch opcode from memory then decode
+`define MC_ADDR_NEXT_OP ( (`MC_ADDR_FETCH_NEXT >> 2) << `P_ADDR) // go to next opcode
+`define MC_ADDR_EMULATE_OP ( (`MC_ADDR_EMULATE >> 2) << `P_ADDR) // EMULATE opcode
+
+`define MC_PC_PLUS_1 (`MC_SEL_ADDR_PC | `MC_SEL_READ_ADDR | `MC_SEL_ALU_MC_CONST | `MC_ALU_PLUS | (1 << `P_ADDR) | `MC_W_PC)
+`define MC_SP_MINUS_4 (`MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | `MC_SEL_ALU_MC_CONST | `MC_ALU_PLUS | ((-4 & 127) << `P_ADDR) | `MC_W_SP)
+`define MC_SP_PLUS_4 (`MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | `MC_SEL_ALU_MC_CONST | `MC_ALU_PLUS | (4 << `P_ADDR) | `MC_W_SP)
+`define MC_EMULATE (`MC_BRANCH | `MC_ADDR_EMULATE_OP)
+
+`define MC_FETCH (`MC_BRANCHIF_OP_NOT_CACHED | `MC_ADDR_FETCH_OP | `MC_DECODE) // fetch and decode current PC opcode
+`define MC_GO_NEXT (`MC_BRANCH | `MC_ADDR_NEXT_OP) // go to next opcode (PC=PC+1, fetch, decode)
+`define MC_GO_FETCH (`MC_BRANCH | `MC_ADDR_FETCH_OP) // go to fetch opcode at PC, then decode
+`define MC_GO_BREAKPOINT (`MC_BRANCH | ((0 >> 2) << `P_ADDR)) // go to breakpoint opcode
+
diff --git a/zpu/hdl/avalanche/core/zpu_core_rom.v b/zpu/hdl/avalanche/core/zpu_core_rom.v
new file mode 100644
index 0000000..62b7229
--- /dev/null
+++ b/zpu/hdl/avalanche/core/zpu_core_rom.v
@@ -0,0 +1,1017 @@
+`timescale 1ns / 1ps
+`include "zpu_core_defines.v"
+
+/* MODULE: zpu_core_rom
+ DESCRIPTION: Contains microcode program
+ AUTHOR: Antonio J. Anton (aj <at> anro-ingenieros.com)
+
+REVISION HISTORY:
+Revision 1.0, 14/09/2009
+Initial public release
+
+COPYRIGHT:
+Copyright (c) 2009 Antonio J. Anton
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.*/
+
+module zpu_core_rom (
+ clk,
+ addr,
+ data
+);
+
+input [`MC_MEM_BITS-1:0] addr;
+output [`MC_BITS-1:0] data;
+input clk;
+
+wire [`MC_MEM_BITS-1:0] addr;
+reg [`MC_BITS-1:0] data;
+reg [`MC_BITS-1:0] memory[(1<<`MC_MEM_BITS)-1:0];
+
+initial data <= 0;
+always @(posedge clk) data <= memory[addr];
+
+// --- clear all memory at startup; for any reason, xilinx xst
+// will not syntetize as block ram if not all memory is initialized ---
+integer n;
+initial begin
+// initialize all memory array
+for(n = 0; n < (1<<`MC_MEM_BITS); n = n + 1) memory[n] = 0;
+
+// ------------------------- MICROCODE MEMORY START -----------------------------------
+
+// As per zpu_core.v, each opcode is executed by microcode. Each opcode microcode entry point
+// is at <opcode> << 2 (example pushsp = 0x02 has microcode entry point of 0x08); this leaves
+// room of 4 microcode operations per opcode; if the opcode microcode needs more space,
+// it can jump & link to other microcode address (with the two lower bits at 0). The lower 256 addresses
+// of microcode memory are entry points and code for 0..127 opcodes; other specific opcodes like im, storesp, etc.
+// are directly hardwired to specific microcode addresses at the memory end. Upper 256 addresses are
+// used by microcode continuation (eg. opcodes which needs more microcode operations), entry points, initializations, etc.
+// the idea is to fit the microcode program in a xilinx blockram 512x36.
+
+// ----- OPCODES WITHOUT CONSTANT ------
+
+// 0000_0000 (00) breakpoint -------------------------------------
+memory[0] = `MC_ALU_NOP_B | `MC_SEL_ALU_MC_CONST | (4 << `P_ADDR) | // b = 4 (#1 in emulate table)
+ `MC_W_B;
+memory[1] = `MC_EMULATE; // emulate #1 (exception)
+
+// 0000_0001 (01) shiftleft -------------------------------------
+memory[4] = `MC_GO_BREAKPOINT;
+
+// 0000_0010 (02) pushsp -------------------------------------
+// mem[sp-1] = sp
+// sp = sp - 1
+memory[8] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | // a = sp
+ `MC_ALU_NOP | `MC_W_A;
+memory[9] = `MC_SP_MINUS_4; // sp = sp - 1
+memory[10] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp]=a
+
+// 0000_0011 (03) popint -------------------------------------
+`ifdef ENABLE_CPU_INTERRUPTS
+// pc=mem[sp]-1 (emulate stores pc+1 but we must return to
+// sp=sp+1 pc because interrupt takes precedence to decode)
+// fetch & decode, then clear_interrupt_flag
+// this guarantees that a continous interrupt allows to execute at least one
+// opcode of mainstream program before reentry to interrupt handler
+memory[12] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // pc = mem[sp]-1
+ `MC_MEM_R | `MC_ALU_PLUS | `MC_SEL_ALU_MC_CONST |
+ ((-1 & 127) << `P_ADDR) | `MC_W_PC;
+memory[13] = `MC_SEL_ADDR_PC | `MC_SEL_READ_DATA | `MC_MEM_R | // opcode_cache = mem[pc]
+ `MC_W_OPCODE;
+memory[14] = `MC_SP_PLUS_4 | `MC_DECODE | `MC_EXIT_INTERRUPT; // sp=sp+1, decode opcode, exit_interrupt
+`else
+memory[12] = `MC_GO_BREAKPOINT;
+`endif
+
+// 0000_0100 (04) poppc -------------------------------------
+// pc=mem[sp]
+// sp = sp + 1
+memory[16] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // pc = mem[sp]
+ `MC_MEM_R | `MC_W_PC;
+memory[17] = `MC_SP_PLUS_4; // sp = sp + 1
+memory[18] = `MC_FETCH; // opcode cached ? decode : fetch,decode
+
+// 0000_0101 (05) add -------------------------------------
+// mem[sp+1] = mem[sp+1] + mem[sp]
+// sp = sp + 1
+memory[20] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_R | // a = mem[sp] || sp=sp+1
+ `MC_W_A_MEM | `MC_SP_PLUS_4;
+memory[21] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // a = a + mem[sp]
+ `MC_ALU_PLUS | `MC_SEL_ALU_A | `MC_W_A;
+memory[22] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+
+// 0000_0110 (06) and -------------------------------------
+// mem[sp+1] = mem[sp+1] & mem[sp]
+// sp = sp + 1
+memory[24] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_R | // a = mem[sp] || sp=sp+1
+ `MC_W_A_MEM | `MC_SP_PLUS_4;
+memory[25] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // a = a & mem[sp]
+ `MC_ALU_AND |`MC_SEL_ALU_A | `MC_W_A;
+memory[26] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+
+// 0000_0111 (07) or -------------------------------------
+// mem[sp+1] = mem[sp+1] | mem[sp]
+// sp = sp + 1
+memory[28] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_R | // a = mem[sp] || sp=sp+1
+ `MC_W_A_MEM | `MC_SP_PLUS_4;
+memory[29] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // a = a | mem[sp]
+ `MC_ALU_OR | `MC_SEL_ALU_A | `MC_W_A;
+memory[30] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+
+// 0000_1000 (08) load -------------------------------------
+// mem[sp] = mem[ mem[sp] ]
+memory[32] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // a = mem[sp]
+ `MC_MEM_R | `MC_W_A;
+memory[33] = `MC_SEL_ADDR_A | `MC_SEL_READ_DATA | `MC_MEM_R | `MC_W_A; // a = mem[a]
+memory[34] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+
+// 0000_1001 (09) not -------------------------------------
+// mem[sp] = ~mem[sp]
+memory[36] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // a = ~mem[sp]
+ `MC_MEM_R | `MC_ALU_NOT | `MC_W_A;
+memory[37] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+
+// 0000_1010 (0a) flip -------------------------------------
+// mem[sp] = flip(mem[sp])
+memory[40] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // a = FLIP(mem[sp])
+ `MC_MEM_R | `MC_ALU_FLIP | `MC_W_A;
+memory[41] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+
+// 0000_1011 (0b) nop -------------------------------------
+memory[44] = `MC_CLEAR_IDIM | `MC_PC_PLUS_1; // IDIM=0
+memory[45] = `MC_FETCH;
+
+// 0000_1100 (0c) store -------------------------------------
+// mem[mem[sp]] <= mem[sp+1]
+// sp = sp + 2
+memory[48] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // b = mem[sp]
+ `MC_MEM_R | `MC_W_B;
+memory[49] = `MC_SP_PLUS_4; // sp = sp + 1
+memory[50] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_W_A_MEM | `MC_SP_PLUS_4; // a = mem[sp] || sp = sp + 1
+memory[51] = `MC_SEL_ADDR_B | `MC_MEM_W | `MC_GO_NEXT; // mem[b] = a
+
+// 0000_1101 (0d) popsp -------------------------------------
+// sp = mem[sp]
+memory[52] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_R | // sp = mem[sp]
+ `MC_W_SP | `MC_GO_NEXT;
+
+// 0000_1110 (0e) ipsum ------------------------------------
+// compare: opcode recycled --> ipsum
+// c=mem[sp];s=mem[sp+1]; sum=0;
+// while(c-->0) {sum+=halfword(mem[s],s);s++};
+// sp=sp+1; mem[sp]=sum (overwrites mem[0] & mem[4] words)
+// requires HALFWORD memory access
+`ifdef ENABLE_BYTE_SELECT
+memory[56] = `MC_CLEAR_IDIM | `MC_ALU_NOP_B | `MC_SEL_ALU_MC_CONST | // b=0
+ (0 << `P_ADDR) | `MC_W_B;
+memory[57] = `MC_SEL_ADDR_PC | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // a=pc+1 save next pc on mem[0]
+ `MC_SEL_ALU_MC_CONST | (1 << `P_ADDR) | `MC_W_A;
+memory[58] = `MC_SEL_ADDR_B | `MC_MEM_W | `MC_ALU_NOP_B | `MC_W_B | // mem[b]=a || b=4
+ `MC_SEL_ALU_MC_CONST | (4 << `P_ADDR);
+memory[59] = `MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | `MC_W_A | // a=sp || goto @ipsum_continue1
+ `MC_BRANCH | ((116 >> 2) << `P_ADDR);
+`else
+memory[56] = `MC_GO_BREAKPOINT;
+`endif
+
+// 0000_1111 (0f) sncpy ---------------------------------------
+// c=mem[sp],d=mem[sp+1],s=mem[sp+2];
+// while( *(char*)s != 0 && c>0 ) { *((char*)d++)=*((char*)s++)); c-- };
+// sp=sp+1; mem[sp+1]=d; mem[sp]=c
+// (overwrites mem[0] & mem[4] words)
+// requires BYTE memory access
+`ifdef ENABLE_BYTE_SELECT
+memory[60] = `MC_CLEAR_IDIM | `MC_ALU_NOP_B | `MC_SEL_ALU_MC_CONST | // b=0
+ (0 << `P_ADDR) | `MC_W_B;
+memory[61] = `MC_SEL_ADDR_PC | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // a=pc+1 save next pc on mem[0]
+ `MC_SEL_ALU_MC_CONST | (1 << `P_ADDR) | `MC_W_A;
+memory[62] = `MC_SEL_ADDR_B | `MC_MEM_W | `MC_ALU_NOP_B | `MC_W_B | // mem[b]=a || b=4
+ `MC_SEL_ALU_MC_CONST | (4 << `P_ADDR);
+memory[63] = `MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | `MC_W_A | // a=sp || goto @sncpy_continue1
+ `MC_BRANCH | ((100 >> 2) << `P_ADDR);
+`else
+memory[60] = `MC_GO_BREAKPOINT;
+`endif
+
+// ------------- microcode opcode continuations ---------------
+// wset_continue1: ------------------------
+memory[64] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // a=a+12 save clear stack on mem[4]
+ `MC_SEL_ALU_MC_CONST | (12 << `P_ADDR) | `MC_W_A;
+memory[65] = `MC_SEL_ADDR_B | `MC_MEM_W; // mem[b]=a
+memory[66] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_SEL_READ_DATA | `MC_W_PC;// pc=mem[sp] (data)
+memory[67] = `MC_SP_PLUS_4; // sp=sp+4
+memory[68] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_SEL_READ_DATA | `MC_W_B; // b=mem[sp] (count)
+memory[69] = `MC_SP_PLUS_4; // sp=sp+4
+memory[70] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_SEL_READ_DATA | `MC_W_SP;// sp=mem[sp] (destination @)
+memory[71] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_W_A; // a=b (count)
+// wset_loop:
+memory[72] = `MC_BRANCHIF_A_ZERO | ( (80 >> 2) << `P_ADDR); // if(a==0) goto @wset_end
+memory[73] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // b=b-1 (count)
+ `MC_SEL_ALU_MC_CONST | ((-1 & 127) << `P_ADDR) | `MC_W_B;
+memory[74] = `MC_SEL_ADDR_PC | `MC_SEL_READ_ADDR | `MC_W_A; // a=pc (data)
+memory[75] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_SP_PLUS_4; // mem[sp]=a || sp=sp+4 (sp=destination@)
+memory[76] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_W_A | // a=b (count) || goto @wset_loop
+ `MC_BRANCH | ((72 >> 2) << `P_ADDR);
+// wset_end: wcpy_end: sncpy_end:
+memory[80] = `MC_SEL_ADDR_A | `MC_MEM_R | `MC_SEL_READ_DATA | `MC_W_PC; // pc=mem[a] (a is 0)
+memory[81] = `MC_ALU_NOP_B | `MC_SEL_ALU_MC_CONST | (4 << `P_ADDR) | // b=4
+ `MC_W_B;
+memory[82] = `MC_SEL_ADDR_B | `MC_MEM_R | `MC_SEL_READ_DATA | // sp=mem[b] || goto @fetch
+ `MC_W_SP | `MC_FETCH;
+
+// wcpy_continue1: ------------------------
+memory[84] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // a=a+12 save clear stack on mem[4]
+ `MC_SEL_ALU_MC_CONST | (12 << `P_ADDR) | `MC_W_A;
+memory[85] = `MC_SEL_ADDR_B | `MC_MEM_W; // mem[b]=a
+memory[86] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_SEL_READ_DATA | `MC_W_B; // b=mem[sp] (count)
+memory[87] = `MC_SP_PLUS_4; // sp=sp+4
+memory[88] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_SEL_READ_DATA | `MC_W_PC;// pc=mem[sp] (destination @)
+memory[89] = `MC_SP_PLUS_4; // sp=sp+4
+memory[90] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_SEL_READ_DATA | `MC_W_SP;// sp=mem[sp] (source @)
+memory[91] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_W_A; // a=b (count)
+// wcpy_loop:
+memory[92] = `MC_BRANCHIF_A_ZERO | ( (80 >> 2) << `P_ADDR); // if(a==0) goto @wcpy_end
+memory[93] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // b=b-1 (count)
+ `MC_SEL_ALU_MC_CONST | ((-1 & 127) << `P_ADDR) | `MC_W_B;
+memory[94] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_W_A_MEM | // a=mem[sp] || sp=sp+4 (sp=source@)
+ `MC_SP_PLUS_4;
+memory[95] = `MC_SEL_ADDR_PC | `MC_MEM_W | `MC_SEL_READ_ADDR | // mem[pc]=a || pc=pc+4 (pc=destination@)
+ `MC_ALU_PLUS | `MC_SEL_ALU_MC_CONST | (4 << `P_ADDR) | `MC_W_PC;
+memory[96] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_W_A | // a=b (count) || goto @wcpy_loop
+ `MC_BRANCH | ((92 >> 2) << `P_ADDR);
+
+`ifdef ENABLE_BYTE_SELECT
+// sncpy_continue1: ---------------------
+memory[100] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // a=a+12
+ `MC_SEL_ALU_MC_CONST | (12 << `P_ADDR) | `MC_W_A;
+memory[101] = `MC_SEL_ADDR_B | `MC_MEM_W; // mem[b]=a
+memory[102] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_SEL_READ_DATA | `MC_W_B;// b=mem[sp] (count)
+memory[103] = `MC_SP_PLUS_4; // sp=sp+4
+memory[104] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_SEL_READ_DATA | `MC_W_PC;// pc=mem[sp] (destination @)
+memory[105] = `MC_SP_PLUS_4; // sp=sp+4
+memory[106] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_SEL_READ_DATA | `MC_W_SP;// sp=mem[sp] (source @)
+memory[107] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_W_A; // a=b (count)
+// sncpy_loop:
+memory[108] = `MC_BRANCHIF_A_ZERO | ( (80 >> 2) << `P_ADDR); // if(a==0) goto @sncpy_end (count==0?)
+memory[109] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_BYTE | `MC_W_A_MEM | // a=BYTE(mem[sp],sp) || sp=sp+1 (sp=source@)
+ `MC_SEL_READ_ADDR | `MC_ALU_PLUS | `MC_SEL_ALU_MC_CONST |
+ (1 << `P_ADDR) | `MC_W_SP;
+memory[110] = `MC_SEL_ADDR_PC | `MC_MEM_W | `MC_SEL_READ_ADDR | // BYTE(mem[pc],pc)=a || pc=pc+1 (pc=destination@)
+ `MC_BYTE | `MC_ALU_PLUS | `MC_SEL_ALU_MC_CONST |
+ (1 << `P_ADDR) | `MC_W_PC;
+memory[111] = `MC_BRANCHIF_A_ZERO | ( (80 >> 2) << `P_ADDR); // if(a==0) goto @sncpy_end (mem[src]==0?)
+memory[112] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // b=b-1 (count)
+ `MC_SEL_ALU_MC_CONST | ((-1 & 127) << `P_ADDR) | `MC_W_B;
+memory[113] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_W_A | // a=b (count) || goto @sncpy_loop
+ `MC_BRANCH | ((108 >> 2) << `P_ADDR);
+
+// ipsum_continue1: -------------------
+memory[116] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // a=a+4
+ `MC_SEL_ALU_MC_CONST | (4 << `P_ADDR) | `MC_W_A;
+memory[117] = `MC_SEL_ADDR_B | `MC_MEM_W; // mem[b]=a save return sp on mem[4]
+memory[118] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_SEL_READ_DATA | // pc=mem[sp] (count)
+ `MC_W_PC;
+memory[119] = `MC_SP_PLUS_4; // sp=sp+4
+memory[120] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_SEL_READ_DATA | // sp=mem[sp] (start @)
+ `MC_W_SP;
+memory[121] = `MC_SEL_ALU_MC_CONST | (0 << `P_ADDR) | `MC_W_B | // b=0 (sum)
+ `MC_ALU_NOP_B;
+memory[122] = `MC_SEL_ADDR_PC | `MC_SEL_READ_DATA | `MC_W_A; // a=pc (count)
+// ipsum_loop:
+memory[124] = `MC_BRANCHIF_A_ZERO | ((392 >> 2) << `P_ADDR); // a == 0 ? goto @ipsum_end
+
+memory[125] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_HALFWORD | // b=mem[sp]+b
+ `MC_SEL_READ_DATA | `MC_ALU_PLUS | `MC_SEL_ALU_B | `MC_W_B;
+memory[126] = `MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // sp=sp+2
+ `MC_SEL_ALU_MC_CONST | (2 << `P_ADDR) | `MC_W_SP;
+memory[127] = `MC_BRANCH | ((408 >> 2) << `P_ADDR); // goto @ipsum_continue2
+`endif
+
+// -------------------------------------------------------------
+
+// 001_00000 (20) wcpy -----------------------------------------
+// before using this opcode you must save mem[0] & mem[4] words, then wcpy, then restore mems
+// c=mem[sp],d=mem[sp+1],s=mem[sp+2]; while(c-->0) mem[d++]=mem[s++]; sp=sp+3
+memory[128] = `MC_CLEAR_IDIM | `MC_ALU_NOP_B | `MC_SEL_ALU_MC_CONST | // b=0
+ (0 << `P_ADDR) | `MC_W_B;
+memory[129] = `MC_SEL_ADDR_PC | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // a=pc+1
+ `MC_SEL_ALU_MC_CONST | (1 << `P_ADDR) | `MC_W_A;
+memory[130] = `MC_SEL_ADDR_B | `MC_MEM_W | `MC_ALU_NOP_B | `MC_W_B | // mem[b]=a || b=4
+ `MC_SEL_ALU_MC_CONST | (4 << `P_ADDR);
+memory[131] = `MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | `MC_W_A | // a=sp || goto @wcpy_continue1
+ `MC_BRANCH | ((84 >> 2) << `P_ADDR);
+
+// 001_00001 (21) wset ----------------------------------------
+// before using this opcode you must save mem[0] & mem[4] words, then wset, then restore mems
+// v=mem[sp],c=mem[sp+1],d=mem[sp+2]; while(c-->0) mem[d++]=v; sp=sp+3
+memory[132] = `MC_CLEAR_IDIM | `MC_ALU_NOP_B | `MC_SEL_ALU_MC_CONST | // b=0
+ (0 << `P_ADDR) | `MC_W_B;
+memory[133] = `MC_SEL_ADDR_PC | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // a=pc+1
+ `MC_SEL_ALU_MC_CONST | (1 << `P_ADDR) | `MC_W_A;
+memory[134] = `MC_SEL_ADDR_B | `MC_MEM_W | `MC_ALU_NOP_B | `MC_W_B | // mem[b]=a || b=4
+ `MC_SEL_ALU_MC_CONST | (4 << `P_ADDR);
+memory[135] = `MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | `MC_W_A | // a=sp || goto @wset_continue1
+ `MC_BRANCH | ((64 >> 2) << `P_ADDR);
+
+// 001_00010 (22) loadh -------------------------------------
+`ifdef ENABLE_BYTE_SELECT
+// mem[sp] = HALFWORD(mem[sp], mem[mem[sp]])
+memory[136] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // a = mem[sp]
+ `MC_MEM_R | `MC_W_A;
+memory[137] = `MC_SEL_ADDR_A | `MC_SEL_READ_DATA | `MC_MEM_R | // a = halfword(a, mem[a])
+ `MC_W_A | `MC_HALFWORD;
+memory[138] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+`else
+memory[136] = `MC_GO_BREAKPOINT;
+`endif
+
+// 001_00011 (23) storeh -------------------------------------
+`ifdef ENABLE_BYTE_SELECT
+// HALFWORD( mem[mem[sp]] <= mem[sp+1] )
+// sp = sp + 2
+memory[140] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // b = mem[sp]
+ `MC_MEM_R | `MC_W_B;
+memory[141] = `MC_SP_PLUS_4; // sp = sp + 1
+memory[142] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_W_A_MEM | // a = mem[sp] || sp=sp+1
+ `MC_SP_PLUS_4;
+memory[143] = `MC_SEL_ADDR_B | `MC_MEM_W | `MC_HALFWORD | `MC_GO_NEXT; // HALFWORD(mem[b] = a)
+`else
+memory[140] = `MC_GO_BREAKPOINT;
+`endif
+
+// 001_00100 (24) lessthan -------------------------------------
+// (mem[sp]-mem[sp+1]) < 0 ? mem[sp+1]=1 : mem[sp+1]=0
+// sp=sp+1
+`ifdef ENABLE_CMP
+memory[144] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_W_A_MEM | // a=mem[sp] || sp=sp+1
+ `MC_SP_PLUS_4;
+memory[145] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | `MC_W_B; // b=mem[sp]
+memory[146] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_SEL_ALU_B | // a = (a - b) with overflow/underflow correction || goto @lessthan_check
+ `MC_ALU_CMP_SIGNED | `MC_W_A | ((424>>2) << `P_ADDR) | `MC_BRANCH;
+`else
+memory[144] = `MC_GO_BREAKPOINT;
+`endif
+
+// 001_00101 (25) lessthanorequal -------------------------------------
+// (mem[sp]-mem[sp+1]) <= 0 ? mem[sp+1]=1 : mem[sp+1]=0
+// sp=sp+1
+`ifdef ENABLE_CMP
+memory[148] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_W_A_MEM | // a=mem[sp] || sp=sp+1
+ `MC_SP_PLUS_4;
+memory[149] = `MC_SEL_ADDR_SP | `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_W_B; // b=mem[sp]
+memory[150] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_SEL_ALU_B | // a = (a - b) with overflow/underflow correction || goto @lessthanorequal_check
+ `MC_ALU_CMP_SIGNED | `MC_W_A | ((420>>2) << `P_ADDR) | `MC_BRANCH;
+`else
+memory[148] = `MC_GO_BREAKPOINT;
+`endif
+
+// 001_00110 (26) ulessthan -------------------------------------
+// signA!=signB -> (unsigA < unsigB) == ~(sigA < sigA)
+// signA==signB -> (unsigA < unsigB) == (sigA < sigB)
+// (mem[sp]-mem[sp+1]) < 0 ? mem[sp+1]=1 : mem[sp+1]=0
+// sp=sp+1
+`ifdef ENABLE_CMP
+memory[152] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_W_A_MEM | // a=mem[sp] || sp=sp+1
+ `MC_SP_PLUS_4;
+memory[153] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | `MC_W_B; // b=mem[sp]
+memory[154] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_SEL_ALU_B | // a = (a - b) with overflow/underflow correction || goto @lessthan_check
+ `MC_ALU_CMP_UNSIGNED | `MC_W_A | ((424>>2) << `P_ADDR) | `MC_BRANCH;
+`else
+memory[152] = `MC_GO_BREAKPOINT;
+`endif
+
+// 001_00111 (27) ulessthanorequal -------------------------------------
+// (mem[sp]-mem[sp+1]) <= 0 ? mem[sp+1]=1 : mem[sp+1]=0
+// sp=sp+1
+`ifdef ENABLE_CMP
+memory[156] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_W_A_MEM | // a=mem[sp] || sp=sp+1
+ `MC_SP_PLUS_4;
+memory[157] = `MC_SEL_ADDR_SP | `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_W_B; // b=mem[sp]
+memory[158] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_SEL_ALU_B | // a = (a - b) with overflow/underflow correction || goto @lessthanorequal_check
+ `MC_ALU_CMP_UNSIGNED | `MC_W_A | ((420>>2) << `P_ADDR) | `MC_BRANCH;
+`else
+memory[156] = `MC_GO_BREAKPOINT;
+`endif
+
+// 001_01000 (28) swap -------------------------------------
+memory[160] = `MC_GO_BREAKPOINT;
+
+// 001_01001 (29) mult -------------------------------------
+`ifdef ENABLE_MULT
+// mem[sp+1] = mem[sp+1] * mem[sp]
+// sp = sp + 1
+memory[164] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_R | // a = mem[sp] || sp=sp+1
+ `MC_W_A_MEM | `MC_SP_PLUS_4;
+memory[165] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // b = mem[sp]
+ `MC_W_B;
+memory[166] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_SEL_ALU_B | // a = a * b DON'T COMBINE MULTICYCLE ALU
+ `MC_ALU_MULT | `MC_W_A; // OPERATIONS WITH MEMORY READ/WRITE
+memory[167] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+`else
+memory[164] = `MC_ALU_NOP_B | `MC_SEL_ALU_MC_CONST | (8 << `P_ADDR) | // b = 8 (#2 in emulate table)
+ `MC_W_B;
+memory[165] = `MC_EMULATE; // emulate #2 (mult opcode)
+`endif
+
+// 001_01010 (2a) lshiftright -------------------------------------
+`ifdef ENABLE_BARREL
+// b = mem[sp] & 5'b1111 : limit to 5 bits (max 31 shifts)
+// b = b | 7'b01_00000 : shift right, logical
+// sp=sp+1
+// a = mem[sp]
+// a = a >> b
+// mem[sp] = a
+memory[168] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // b = mem[sp] & 5'b11111
+ `MC_MEM_R | `MC_ALU_AND | `MC_SEL_ALU_MC_CONST | (31 << `P_ADDR) | `MC_W_B;
+memory[169] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_ALU_OR | // b = b | 7'b01_00000 (shift right, logical)
+ `MC_SEL_ALU_MC_CONST | (32 << `P_ADDR) | `MC_W_B;
+memory[170] = `MC_SP_PLUS_4; // sp = sp + 1
+memory[171] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // a = mem[sp] | goto @shift_cont
+ `MC_W_A_MEM | `MC_BRANCH | ((432 >> 2) << `P_ADDR);
+`else
+ `ifdef ENABLE_A_SHIFT
+// a = mem[sp] & 5'b11111
+// sp=sp+1
+// b = FLIP(mem[sp])
+// label: a <= 0 ? goto @fin
+// b = b << 1
+// a = a - 1 || goto @label
+// fin: a = FLIP(b)
+// mem[sp]=a
+memory[168] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // a = mem[sp] & 5'b11111
+ `MC_MEM_R | `MC_ALU_AND | `MC_SEL_ALU_MC_CONST |
+ (31 << `P_ADDR) | `MC_W_A;
+memory[169] = `MC_SP_PLUS_4; // sp = sp + 1
+memory[170] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // b = FLIP(mem[sp])
+ `MC_ALU_FLIP | `MC_W_B;
+memory[171] = `MC_BRANCH | ((448 >> 2) << `P_ADDR); // goto @lshiftleft_loop
+ `else
+ memory[168] = `MC_GO_BREAKPOINT;
+ `endif
+`endif
+
+// 001_01011 (2b) ashiftleft -------------------------------------
+`ifdef ENABLE_BARREL
+// b = mem[sp] & 5'b11111 : 5 bit shift
+// b = b | 7'b10_00000 : shift left, arithmetic
+// sp=sp+1
+// a = mem[sp]
+// a = a <<signed b
+// mem[sp] = a
+memory[172] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // b = mem[sp] & 5'b11111
+ `MC_MEM_R | `MC_ALU_AND | `MC_SEL_ALU_MC_CONST | (31 << `P_ADDR) | `MC_W_B;
+memory[173] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_ALU_OR | // b = b | 7'b10_00000 (shift left, arithmetic)
+ `MC_SEL_ALU_MC_CONST | (64 << `P_ADDR) | `MC_W_B;
+memory[174] = `MC_SP_PLUS_4; // sp = sp + 1
+memory[175] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // a = mem[sp] | goto @shift_cont
+ `MC_W_A_MEM | `MC_BRANCH | ((432 >> 2) << `P_ADDR);
+`else
+// a = mem[sp] & 5'b11111
+// sp = sp + 1
+// b = mem[sp]
+// label: a <= 0 ? goto @fin
+// b = b << 1
+// a = a - 1 || goto @label
+// fin: a = b
+// mem[sp] = a
+memory[172] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // a = mem[sp] & 5'b11111
+ `MC_MEM_R | `MC_ALU_AND | `MC_SEL_ALU_MC_CONST |
+ (31 << `P_ADDR) | `MC_W_A;
+memory[173] = `MC_SP_PLUS_4; // sp = sp + 1
+memory[174] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // b = mem[sp]
+ `MC_W_B;
+memory[175] = `MC_BRANCH | ((440 >> 2) << `P_ADDR); // goto @ashiftleft_loop
+`endif
+
+// 001_01100 (2c) ashiftright -------------------------------------
+`ifdef ENABLE_BARREL
+// b = mem[sp] & 5'b11111 : 5 bit shift
+// b = b | 7'b00_00000 : shift right, arithmetic
+// sp=sp+1
+// a = mem[sp]
+// a = a >>signed b
+// mem[sp] = a
+memory[176] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // b = mem[sp] & 5'b11111
+ `MC_MEM_R | `MC_ALU_AND | `MC_SEL_ALU_MC_CONST | (31 << `P_ADDR) | `MC_W_B;
+memory[177] = `MC_SP_PLUS_4; // sp = sp + 1
+memory[178] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // a = mem[sp] | goto @shift_cont
+ `MC_W_A_MEM | `MC_BRANCH | ((432 >> 2) << `P_ADDR);
+`else
+ `ifdef ENABLE_A_SHIFT
+// a = mem[sp] & 5'b11111
+// sp = sp + 1
+// b = FLIP(mem[sp])
+// label: a <= 0 ? goto @fin
+// b = b signed_<< 1
+// a = a - 1 || goto @label
+// fin: a = FLIP(b)
+// mem[sp] = a
+memory[176] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // a = mem[sp] & 5'b11111
+ `MC_MEM_R | `MC_ALU_AND | `MC_SEL_ALU_MC_CONST |
+ (31 << `P_ADDR) | `MC_W_A;
+memory[177] = `MC_SP_PLUS_4; // sp = sp + 1
+memory[178] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // b = FLIP(mem[sp])
+ `MC_ALU_FLIP | `MC_W_B;
+memory[179] = `MC_BRANCH | ((432 >> 2) << `P_ADDR); // goto @ashiftright_loop
+ `else
+memory[176] = `MC_GO_BREAKPOINT;
+ `endif
+`endif
+
+// 001_01101 (2d) call -------------------------------------
+// a = mem[sp]
+// mem[sp]=pc+1
+// pc = a
+memory[180] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // b = mem[sp]
+ `MC_MEM_R | `MC_W_B;
+memory[181] = `MC_SEL_ADDR_PC | `MC_SEL_READ_ADDR | `MC_ALU_PLUS |
+ `MC_SEL_ALU_MC_CONST | (1 << `P_ADDR) | `MC_W_A; // a = pc + 1
+memory[182] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_ALU_NOP_B | // mem[sp] = a || pc = b
+ `MC_SEL_ALU_B | `MC_W_PC;
+memory[183] = `MC_FETCH; // op_cached? decode : goto next
+
+// 001_01110 (2e) eq -------------------------------------
+// a = mem[sp]
+// sp = sp + 1
+// (mem[sp] - a == 0) ? mem[sp] = 1 : mem[sp] = 0
+memory[184] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_R | // a = NOT(mem[sp])
+ `MC_SEL_READ_DATA | `MC_ALU_NOT | `MC_W_A;
+memory[185] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR |`MC_ALU_PLUS | // a = a + 1
+ `MC_SEL_ALU_MC_CONST | (1 << `P_ADDR) | `MC_W_A;
+memory[186] = `MC_SP_PLUS_4; // sp = sp + 1
+memory[187] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // a = mem[sp] + a || goto @eq_check
+ `MC_ALU_PLUS |`MC_SEL_ALU_A | `MC_W_A |
+ ( (416 >> 2) << `P_ADDR) | `MC_BRANCH;
+
+// 001_01111 (2f) neq -------------------------------------
+// a = mem[sp]
+// sp = sp + 1
+// (mem[sp] - a != 0) ? mem[sp] = 1 : mem[sp] = 0
+memory[188] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // a = NOT(mem[sp])
+ `MC_MEM_R | `MC_ALU_NOT | `MC_W_A;
+memory[189] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR |`MC_ALU_PLUS | // a = a + 1
+ `MC_SEL_ALU_MC_CONST | (1 << `P_ADDR) | `MC_W_A;
+memory[190] = `MC_SP_PLUS_4; // sp = sp + 1
+memory[191] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // a = mem[sp] + a || goto @neq_check
+ `MC_ALU_PLUS | `MC_SEL_ALU_A | `MC_W_A |
+ ( (412 >> 2) << `P_ADDR) | `MC_BRANCH;
+
+// 001_10000 (30) neg -------------------------------------
+// a = NOT(mem[sp])
+// a = a + 1
+// mem[sp] = a
+memory[192] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // a = NOT(mem[sp])
+ `MC_MEM_R | `MC_ALU_NOT | `MC_W_A;
+memory[193] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // a = a + 1
+ (1 << `P_ADDR) | `MC_SEL_ALU_MC_CONST | `MC_W_A;
+memory[194] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+
+// 001_10001 (31) sub -------------------------------------
+// mem[sp+1] = mem[sp+1] - mem[sp]
+// sp = sp + 1
+memory[196] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // a = NOT(mem[sp])
+ `MC_MEM_R | `MC_ALU_NOT | `MC_W_A;
+memory[197] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // a = a + 1
+ `MC_SEL_ALU_MC_CONST | (1 << `P_ADDR) | `MC_W_A;
+memory[198] = `MC_SP_PLUS_4; // sp = sp + 1
+memory[199] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // a = mem[sp] + a || goto @sub_cont (set_mem[sp]=a)
+ `MC_ALU_PLUS | `MC_SEL_ALU_A | `MC_W_A | ((400>>2) << `P_ADDR) |
+ `MC_BRANCH;
+
+// 001_10010 (32) xor -------------------------------------
+`ifdef ENABLE_XOR
+// mem[sp+1] = mem[sp+1] ^ mem[sp]
+// sp = sp + 1
+memory[200] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_R | // a = mem[sp] || sp=sp+1
+ `MC_W_A_MEM | `MC_SP_PLUS_4;
+memory[201] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // a = a ^ mem[sp]
+ `MC_ALU_XOR |`MC_SEL_ALU_A | `MC_W_A;
+memory[202] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+`else
+// ALU doesn't perform XOR operation
+// mem[sp+1] = mem[sp] ^ mem[sp+1] -> A^B=(A&~B)|(~A&B)
+// a = ~mem[sp] --> a = ~A
+// sp = sp + 1
+// a = mem[sp] & a --> a = ~A&B
+// b = ~a --> b = A&~B
+// a = a | b --> a = ~A&B | A&~B
+// mem[sp] = a
+memory[200] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // a = ~mem[sp] --> a=~A
+ `MC_ALU_NOT | `MC_W_A;
+memory[201] = `MC_SP_PLUS_4; // sp = sp + 1
+memory[202] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // a = mem[sp] & a --> a = ~A&B
+ `MC_ALU_AND | `MC_SEL_ALU_A | `MC_W_A;
+memory[203] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_ALU_NOT | // b = ~a || goto @xor_cont --> b = A&~B
+ `MC_W_B | `MC_BRANCH | ((428 >> 2) << `P_ADDR);
+`endif
+
+// 001_10011 (33) loadb -------------------------------------
+`ifdef ENABLE_BYTE_SELECT
+// mem[sp] = BYTE(mem[sp], mem[mem[sp]])
+memory[204] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // a = mem[sp]
+ `MC_MEM_R | `MC_W_A;
+memory[205] = `MC_SEL_ADDR_A | `MC_SEL_READ_DATA | `MC_MEM_R | // a = byte(a, mem[a])
+ `MC_W_A | `MC_BYTE;
+memory[206] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+`else
+// b=pc
+// pc = mem[sp]
+// opcode_cache=mem[pc]
+// a = opcode
+// mem[sp]=a
+// pc=b
+// fetch
+memory[204] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_PC | `MC_SEL_READ_ADDR | // b = pc
+ `MC_W_B;
+memory[205] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // pc = mem[sp]
+ `MC_W_PC;
+memory[206] = `MC_SEL_ADDR_PC | `MC_SEL_READ_DATA | `MC_MEM_R | // opcode_cache = mem[pc]
+ `MC_W_OPCODE;
+memory[207] = `MC_SEL_ALU_OPCODE | `MC_ALU_NOP_B | `MC_W_A | // a = opcode -> byte(pc, mem[pc]) || goto @loadb_continued
+ `MC_BRANCH | ( (396 >> 2) << `P_ADDR);
+`endif
+
+// 001_10100 (34) storeb -------------------------------------
+`ifdef ENABLE_BYTE_SELECT
+// BYTE( mem[mem[sp]] <= mem[sp+1] )
+// sp = sp + 2
+memory[208] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // b = mem[sp]
+ `MC_MEM_R | `MC_W_B;
+memory[209] = `MC_SP_PLUS_4; // sp = sp + 1
+memory[210] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_W_A_MEM | // a = mem[sp] || sp=sp+1
+ `MC_SP_PLUS_4;
+memory[211] = `MC_SEL_ADDR_B | `MC_MEM_W | `MC_BYTE | `MC_GO_NEXT; // BYTE(mem[b] = a)
+`else
+memory[208] = `MC_GO_BREAKPOINT;
+`endif
+
+// 001_10101 (35) div -------------------------------------
+`ifdef ENABLE_DIV
+// *** TODO: CHECK IF DIVIDE BY ZERO AND RAISE EXCEPTION ***
+// mem[sp+1] = mem[sp+1] / mem[sp]
+// sp = sp + 1
+memory[212] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_R | // a = mem[sp] || sp=sp+1
+ `MC_W_A_MEM | `MC_SP_PLUS_4;
+memory[213] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // b = mem[sp]
+ `MC_W_B;
+memory[214] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_SEL_ALU_B | // a = a / b DON'T COMBINE MULTICYCLE ALU
+ `MC_ALU_DIV | `MC_W_A; // OPERATIONS WITH MEMORY READ/WRITE
+memory[215] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+`else
+memory[212] = `MC_GO_BREAKPOINT;
+`endif
+
+// 001_10110 (36) mod -------------------------------------
+`ifdef ENABLE_DIV
+// mem[sp+1] = mem[sp+1] % mem[sp]
+// sp = sp + 1
+memory[216] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_R | // a = mem[sp] || sp=sp+1
+ `MC_W_A_MEM | `MC_SP_PLUS_4;
+memory[217] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R | // b = mem[sp]
+ `MC_W_B;
+memory[218] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_SEL_ALU_B | // a = a % b DON'T COMBINE MULTICYCLE ALU
+ `MC_ALU_MOD | `MC_W_A; // OPERATIONS WITH MEMORY READ/WRITE
+memory[219] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+`else
+memory[216] = `MC_GO_BREAKPOINT;
+`endif
+
+// 001_10111 (37) eqbranch -------------------------------------
+// a = sp + 1
+// a = mem[a]
+// a = mem[sp] || a == 0 ? { pc = pc + a; sp = sp + 2 }
+// else { sp = sp + 2, pc = pc + 1 }
+memory[220] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | // a = sp + 1
+ `MC_ALU_PLUS | `MC_SEL_ALU_MC_CONST | (4 << `P_ADDR) |
+ `MC_W_A;
+memory[221] = `MC_SEL_ADDR_A | `MC_MEM_R | `MC_W_A; // a = mem[a]
+memory[222] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_W_A | // a = mem[sp] || a == 0 ? goto 456 (sp=sp+2, pc=pc+a)
+ `MC_BRANCHIF_A_ZERO | ((456>>2) << `P_ADDR);
+memory[223] = `MC_BRANCH | ((460>>2) << `P_ADDR); // else goto 460 (sp=sp+2, pc=pc+1)
+
+// 001_11000 (38) neqbranch -------------------------------------
+// a = sp + 1
+// a = mem[a]
+// a = mem[sp] || a == 0 ? { sp = sp + 2, pc = pc + 1 }
+// else { sp = sp + 2, pc = pc + a }
+memory[224] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | // a = sp + 1
+ `MC_ALU_PLUS | `MC_SEL_ALU_MC_CONST | (4 << `P_ADDR) |
+ `MC_W_A;
+memory[225] = `MC_SEL_ADDR_A | `MC_MEM_R | `MC_W_A; // a = mem[a]
+memory[226] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_W_A | // a = mem[sp] || a == 0 ? goto 460 (sp=sp+2, pc=pc+1)
+ `MC_BRANCHIF_A_ZERO | ((460>>2) << `P_ADDR);
+memory[227] = `MC_BRANCH | ((456>>2) << `P_ADDR); // else goto 456 (sp=sp+2, pc=pc+a)
+
+// 001_11001 (39) poppcrel -------------------------------------
+// a = mem[sp]
+// sp = sp + 1
+// pc = pc + a
+memory[228] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_R | // a=mem[sp] || sp=sp+1
+ `MC_W_A_MEM | `MC_SP_PLUS_4;
+memory[229] = `MC_SEL_ADDR_PC | `MC_SEL_READ_ADDR | `MC_SEL_ALU_A | // pc = pc + a
+ `MC_ALU_PLUS | `MC_W_PC;
+memory[230] = `MC_FETCH; // op_cached? decode : goto next
+
+// 001_11010 (3a) config -------------------------------------
+memory[232] = `MC_GO_BREAKPOINT;
+
+// 001_11011 (3b) pushpc -------------------------------------
+// sp = sp - 1
+// mem[sp] = pc
+memory[236] = `MC_CLEAR_IDIM | `MC_SP_MINUS_4 | `MC_W_A; // a = sp = sp - 1
+memory[237] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+
+// 001_11100 (3c) syscall_emulate ------------------------------
+memory[240] = `MC_GO_BREAKPOINT;
+
+// 001_11101 (3d) pushspadd -------------------------------------
+// a = mem[sp] << 2
+// mem[sp] = a + sp
+`ifdef ENABLE_BARREL
+memory[244] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_R | // a = mem[sp]
+ `MC_W_A_MEM;
+memory[245] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_ALU_BARREL | // a = a << 2 (left,arithmetic->10_00010)
+ `MC_SEL_ALU_MC_CONST | ( 66 << `P_ADDR) | `MC_W_A;
+memory[246] = `MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | `MC_SEL_ALU_A | // a = a + sp
+ `MC_ALU_PLUS | `MC_W_A;
+memory[247] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+`else
+memory[244] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_W_A_MEM; // a = mem[sp]
+memory[245] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_SEL_ALU_A | // a = a + a
+ `MC_ALU_PLUS | `MC_W_A;
+memory[246] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_SEL_ALU_A | // a = a + a
+ `MC_ALU_PLUS | `MC_W_A;
+memory[247] = `MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | `MC_SEL_ALU_A | // a = a + sp || goto @cont (->mem[sp] = a)
+ `MC_ALU_PLUS | `MC_W_A | ((400>>2) << `P_ADDR) | `MC_BRANCH;
+`endif
+
+// 001_11110 (3e) halfmult -------------------------------------
+memory[248] = `MC_GO_BREAKPOINT;
+
+// 001_11111 (3f) callpcrel -------------------------------------
+// a = mem[sp]
+// mem[sp]=pc+1
+// pc = pc + a
+memory[252] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | // b = mem[sp]
+ `MC_MEM_R | `MC_W_B;
+memory[253] = `MC_SEL_ADDR_PC | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // a = pc + 1
+ `MC_SEL_ALU_MC_CONST | (1 << `P_ADDR) | `MC_W_A;
+memory[254] = `MC_SEL_ADDR_SP | `MC_MEM_W; // mem[sp] = a;
+memory[255] = `MC_SEL_ADDR_PC | `MC_SEL_READ_ADDR | `MC_SEL_ALU_B | // pc = pc + b, goto @fetch
+ `MC_ALU_PLUS | `MC_W_PC | `MC_GO_FETCH;
+
+// --------------------- MICROCODE HOLE -----------------------------------
+
+
+
+
+// --------------------- CONTINUATION OF COMPLEX OPCODES ------------------
+
+`ifdef ENABLE_BYTE_SELECT
+// ipsum_end: ----------
+memory[392] = `MC_ALU_NOP_B | `MC_SEL_ALU_MC_CONST | (0 << `P_ADDR) | // sp=0
+ `MC_W_SP;
+memory[393] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_SEL_READ_DATA | // pc=mem[sp] restore next pc
+ `MC_W_PC;
+memory[394] = `MC_SP_PLUS_4; // sp=sp+4
+memory[395] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_SEL_READ_DATA | // sp=mem[sp] restore sp
+ `MC_W_SP;
+memory[396] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_W_A; // a=b (sum)
+memory[397] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_FETCH; // mem[sp]=a || fetch (return sum)
+`endif
+
+`ifndef ENABLE_BYTE_SELECT
+// loadb continued microcode -----
+// mem[sp]=a || pc=b
+// opcode_cache=mem[pc] || go next
+memory[396] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_SEL_ALU_B | // mem[sp]=a || pc=b
+ `MC_ALU_NOP_B | `MC_W_PC;
+memory[397] = `MC_SEL_ADDR_PC | `MC_MEM_R | `MC_W_OPCODE | `MC_GO_NEXT; // opcode_cache=mem[pc] || go next
+`endif
+
+// sub/pushspadd continued microcode ----------------
+memory[400] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+
+// ----- hole ------
+
+`ifdef ENABLE_BYTE_SELECT
+// ipsum_continue2: ------------
+memory[408] = `MC_SEL_ADDR_PC | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // pc=pc-1; a=pc
+ `MC_SEL_ALU_MC_CONST | ((-1 & 127) << `P_ADDR) | `MC_W_PC |
+ `MC_W_A;
+memory[409] = `MC_BRANCH | ((124 >> 2) << `P_ADDR); // goto @ipsum_loop
+`endif
+
+// neqcheck ----------
+memory[412] = `MC_BRANCHIF_A_ZERO | ((468 >> 2) << `P_ADDR); // a == 0 ? goto @set_mem[sp]=0
+memory[413] = `MC_BRANCH | ((464 >> 2) << `P_ADDR); // else goto @set_mem[sp]=1
+
+// eqcheck ----------
+memory[416] = `MC_BRANCHIF_A_ZERO | ((464 >> 2) << `P_ADDR); // a == 0 ? goto @set_mem[sp]=1
+memory[417] = `MC_BRANCH | ((468 >> 2) << `P_ADDR); // else goto @set_mem[sp]=0
+
+// lessthanorequal_check ----
+memory[420] = `MC_BRANCHIF_A_ZERO | `MC_BRANCHIF_A_NEG | ((464 >> 2) << `P_ADDR); // a <= 0 ? goto @set_mem[sp]=1
+memory[421] = `MC_BRANCH | ((468 >> 2) << `P_ADDR); // else goto @set_mem[sp]=0
+
+// lessthan_check ----
+memory[424] = `MC_BRANCHIF_A_NEG | ((464 >> 2) << `P_ADDR); // a < 0 ? goto @set_mem[sp]=1
+memory[425] = `MC_BRANCH | ((468 >> 2) << `P_ADDR); // else goto @set_mem[sp]=0
+
+// xor_cont continued microcode -----------------------------------
+`ifndef ENABLE_XOR
+memory[428] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_ALU_OR | // a = a | b --> a = ~A&B | A&~B
+ `MC_SEL_ALU_B | `MC_W_A;
+memory[429] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+`endif
+
+// ashiftright_loop continued microcode -----------------------------------
+`ifdef ENABLE_BARREL
+memory[432] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_ALU_BARREL | // a = a {<<|>>} b
+ `MC_SEL_ALU_B | `MC_W_A;
+memory[433] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+`else
+ `ifdef ENABLE_A_SHIFT
+memory[432] = `MC_BRANCHIF_A_ZERO | `MC_BRANCHIF_A_NEG | ((436 >> 2) << `P_ADDR); // (a <= 0) ? goto @ashiftright_exit
+memory[433] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // a = a + (-1)
+ `MC_SEL_ALU_MC_CONST | ( (-1 & 127) << `P_ADDR) | `MC_W_A;
+memory[434] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // b = b signed_<< 1 || goto @ashiftright_loop
+ `MC_SEL_ALU_B | `MC_W_B | `MC_BRANCH | ((432 >>2) << `P_ADDR);
+// ashiftright_exit
+memory[436] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_ALU_FLIP | // a = FLIP(b)
+ `MC_W_A;
+memory[437] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+ `endif
+`endif
+
+// ashiftleft_loop continued microcode -----------------------------------
+`ifndef ENABLE_BARREL
+memory[440] = `MC_BRANCHIF_A_ZERO | `MC_BRANCHIF_A_NEG | ((444 >> 2) << `P_ADDR);// (a <= 0) ? goto @ashiftleft_exit
+memory[441] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // a = a + (-1)
+ `MC_SEL_ALU_MC_CONST | ( (-1 & 127) << `P_ADDR) | `MC_W_A;
+memory[442] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // b = b << 1 || goto @ashiftleft_loop
+ `MC_SEL_ALU_B | `MC_W_B | `MC_BRANCH | ((440 >>2) << `P_ADDR);
+// ashiftleft_exit
+memory[444] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_ALU_NOP | // a = b
+ `MC_W_A;
+memory[445] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+`endif
+
+// lshiftright_loop continued microcode -----------------------------------
+`ifdef ENABLE_A_SHIFT
+memory[448] = `MC_BRANCHIF_A_ZERO | `MC_BRANCHIF_A_NEG | ((452 >> 2) << `P_ADDR);// (a <= 0) ? goto @lshiftright_exit
+memory[449] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // a = a + (-1)
+ `MC_SEL_ALU_MC_CONST | ( (-1 & 127) << `P_ADDR) | `MC_W_A;
+memory[450] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // b = b << 1 || goto @lshiftright_loop
+ `MC_SEL_ALU_B | `MC_W_B | `MC_BRANCH | ((448 >>2) << `P_ADDR);
+// lshiftright_exit
+memory[452] = `MC_SEL_ADDR_B | `MC_SEL_READ_ADDR | `MC_ALU_FLIP | // a = FLIP(b)
+ `MC_W_A;
+memory[453] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+`endif
+
+// neqbranch / eqbranch --- continued microcode -------------------------------------
+// sp = sp + 2
+// pc = pc + a
+memory[456] = `MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // sp = sp + 2
+ `MC_SEL_ALU_MC_CONST | (8 << `P_ADDR) | `MC_W_SP;
+memory[457] = `MC_SEL_ADDR_PC | `MC_SEL_READ_ADDR | `MC_SEL_ALU_A | // pc = pc + a
+ `MC_ALU_PLUS | `MC_W_PC;
+memory[458] = `MC_FETCH; // op_cached? decode : goto fetch
+
+// neqbranch / eqbranch --- continued microcode -------------------------------------
+// sp = sp + 2
+// pc = pc + 1
+memory[460] = `MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // sp = sp + 2
+ `MC_SEL_ALU_MC_CONST | (8 << `P_ADDR) | `MC_W_SP;
+memory[461] = `MC_PC_PLUS_1; // pc = pc + 1
+memory[462] = `MC_FETCH; // op_cached? decode : goto fetch
+
+// neq / eq / lessthan_1 --- continued microcode --------------------
+// mem[sp] = 1
+memory[464] = `MC_SEL_ALU_MC_CONST | `MC_ALU_NOP_B | (1 << `P_ADDR) | // a = 1
+ `MC_W_A;
+memory[465] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+
+// neq / eq / lessthan_0 --- continued microcode --------------------
+// mem[sp] = 0
+memory[468] = `MC_SEL_ALU_MC_CONST | `MC_ALU_NOP_B | (0 << `P_ADDR) | // a = 0
+ `MC_W_A;
+memory[469] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+
+// MICROCODE ENTRY POINT AFTER RESET -------------------------------
+// initialize cpu registers
+// sp = @SP_START
+// pc = @RESET_VECTOR
+memory[473] = 0; // reserved and empty for correct cpu startup
+memory[474] = `MC_CLEAR_IDIM |`MC_SEL_ALU_MC_CONST | `MC_ALU_NOP_B | // sp = @SP_START
+ (`SP_START << `P_ADDR) | `MC_W_SP;
+memory[475] = `MC_SEL_ALU_MC_CONST | `MC_ALU_NOP_B | `MC_W_PC | // pc = @RESET
+ (`RESET_VECTOR << `P_ADDR) | `MC_EXIT_INTERRUPT; // enable interrupts on reset
+// fall throught fetch/decode
+
+// FETCH / DECODE -------------------------------------
+// opcode=mem[pc]
+// decode (goto microcode entry point for opcode)
+memory[476] = `MC_SEL_ADDR_PC | `MC_SEL_READ_DATA | `MC_MEM_R | // opcode_cache = mem[pc]
+ `MC_W_OPCODE;
+memory[477] = `MC_DECODE; // decode jump to microcode
+
+// NEXT OPCODE -------------------------------------
+// pc = pc + 1
+// opcode cached ? decode : goto fetch
+memory[480] = `MC_PC_PLUS_1; // pc = pc + 1
+memory[481] = `MC_FETCH; // pc_cached ? decode else fetch,decode
+
+// INTERRUPT REQUEST -------------------------------------
+// sp = sp - 1
+// mem[sp] = pc
+// pc = mem[EMULATED_VECTORS + 0]
+memory[484] = `MC_ALU_NOP_B | `MC_SEL_ALU_MC_CONST | (0 << `P_ADDR) | // b = 0 (#0 in emulate table) || disable interrupts
+ `MC_W_B | `MC_ENTER_INTERRUPT;
+memory[485] = `MC_EMULATE; // emulate #0 (interrupt)
+
+// ---------------- OPCODES WITH PARAMETER IN OPCODE ----------------
+
+// im x (idim=0) 1_xxxxxxx -------------------------------------
+// sp = sp - 1
+// mem[sp] = IMM(IDIM, opcode)
+// idim = 1
+memory[488] = `MC_SP_MINUS_4; // sp = sp - 1
+memory[489] = `MC_SEL_ALU_OPCODE | `MC_ALU_IM | `MC_W_A; // a = IMM(IDIM, opcode)
+memory[490] = `MC_SET_IDIM | `MC_SEL_ADDR_SP | `MC_MEM_W | // MEM[sp] = a; IDIM=1
+ `MC_GO_NEXT;
+
+// 1_xxxxxxx im x (idim=1) -------------------------------------
+// mem[sp] = IMM(IDIM, mem[sp], opcode)
+memory[491] = `MC_SET_IDIM | `MC_SEL_READ_DATA | `MC_SEL_ADDR_SP | // a = IMM(IDIM, MEM[sp], opcode)
+ `MC_MEM_R | `MC_SEL_ALU_OPCODE | `MC_ALU_IM | `MC_W_A;
+memory[492] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // MEM[sp] = a
+
+// 010_xxxxx storesp x
+// mem[sp + x<<2] = mem[sp]
+// sp = sp + 1
+memory[493] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | // b = sp + offset
+ `MC_ALU_PLUS_OFFSET | `MC_SEL_ALU_OPCODE | `MC_W_B;
+memory[494] = `MC_SEL_ADDR_SP | `MC_MEM_R | `MC_W_A_MEM | // a=mem[sp] || sp=sp+1
+ `MC_SP_PLUS_4;
+memory[495] = `MC_SEL_ADDR_B | `MC_MEM_W | `MC_GO_NEXT; // mem[b] = a
+
+// 011_xxxxx loadsp x -------------------------------------
+// mem[sp-1] = mem [sp + x<<2]
+// sp = sp - 1
+memory[496] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | // a = sp + offset
+ `MC_ALU_PLUS_OFFSET | `MC_SEL_ALU_OPCODE | `MC_W_A;
+memory[497] = `MC_SEL_ADDR_A | `MC_SEL_READ_DATA | `MC_MEM_R | `MC_W_A; // a = mem[a]
+memory[498] = `MC_SP_MINUS_4; // sp = sp - 1
+memory[499] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+
+// 0001_xxxx addsp x -------------------------------------
+// mem[sp] = mem[sp] + mem[sp + x<<2]
+memory[500] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_SP | `MC_SEL_READ_ADDR | // a = sp + offset
+ `MC_ALU_PLUS_OFFSET | `MC_SEL_ALU_OPCODE | `MC_W_A;
+memory[501] = `MC_SEL_ADDR_A | `MC_SEL_READ_DATA | `MC_MEM_R | `MC_W_A; // a = mem[a]
+memory[502] = `MC_SEL_ADDR_SP | `MC_SEL_READ_DATA | `MC_MEM_R |
+ `MC_ALU_PLUS | `MC_SEL_ALU_A | `MC_W_A; // a = a + mem[sp]
+memory[503] = `MC_SEL_ADDR_SP | `MC_MEM_W | `MC_GO_NEXT; // mem[sp] = a
+
+// 001_xxxxx emulate x -------------------------------------
+// <expects b = offset into table for emulated opcode>
+// sp = sp - 1
+// mem[sp] = pc + 1 emulated opcode microcode must set b to
+// a=@EMULATION_TABLE offset inside emulated_table prior to
+// pc = mem[a + b] calling the emulate microcode
+// fetch
+memory[504] = `MC_CLEAR_IDIM | `MC_SEL_ADDR_PC | `MC_SEL_READ_ADDR | // a = pc + 1
+ `MC_ALU_PLUS | `MC_SEL_ALU_MC_CONST | (1 << `P_ADDR) | `MC_W_A;
+memory[505] = `MC_SP_MINUS_4; // sp = sp - 1
+memory[506] = `MC_SEL_ADDR_SP | `MC_MEM_W; // mem[sp] = a
+memory[507] = `MC_ALU_NOP_B | `MC_SEL_ALU_MC_CONST | `MC_W_A | // a = @vector_emulated
+ (`EMULATION_VECTOR << `P_ADDR);
+memory[508] = `MC_SEL_ADDR_A | `MC_SEL_READ_ADDR | `MC_ALU_PLUS | // a = a + b
+ `MC_SEL_ALU_B | `MC_W_A;
+memory[509] = `MC_SEL_ADDR_A | `MC_MEM_R | `MC_SEL_READ_DATA | // pc = mem[a]
+ `MC_ALU_NOP | `MC_W_PC;
+memory[510] = `MC_FETCH;
+
+// --------------------- END OF MICROCODE PROGRAM --------------------------
+end
+
+endmodule
diff --git a/zpu/hdl/avalanche/readme.txt b/zpu/hdl/avalanche/readme.txt
new file mode 100644
index 0000000..3eb1baf
--- /dev/null
+++ b/zpu/hdl/avalanche/readme.txt
@@ -0,0 +1,91 @@
+This ZPU implementation, codenamed "avalanche" was
+contributed by Antonio Anton <antonio.anton@anro-ingenieros.com>.
+
+It's most interesting aspects are it's implementation using
+microcode, small size, reduced code size overhead and that
+it's implemented in Verilog.
+
+Please direct any questions to the zylin-zpu mailing list.
+
+The most urgently needed patches would be to provide working
+simulation examples and improved documentation.
+
+
+Øyvind Harboe
+
+
+Notes from Antonio:
+
+Hi,
+
+attached goes my zpu implementation in verilog in case anybody is
+interested in. Code is quite commented. Also microcode and opcodes are
+exhaustive commented (and more accurate that the HTML documentation in
+some cases :-) ).
+
+At the moment I have no time to send a working environment but I will
+get some time in next days and prepare a clean environment
+(software/hardware) and send to the list. The target HW is spartan3
+starter kit board (all peripherals working: vga, sram, uarts, etc.).
+
+Feel free to ask any question to the list I will do my best to answer
+quickly.
+
+Regards
+Antonio
+
+Hi,
+
+the zpu_core is complete and lot of bugs has been solved in the past but
+extensive testing and a complete test program has not been
+defined/executed; anyway I'm quite confident it works: this core
+executes eCos, FreeRTOS, Forth and other applications.
+
+Regarding FPGA resources for a "balanced" implementation (not the
+smallest, not the fastest):
+
+-cpu+alu+microcode rom: 671 LUT + 239 FF + 1 BRAM (50% of LUT is ALU)
+-complete soc (cpu, vga, uart, memory controller, interrupt controller,
+timers, gpio, spi, etc.): 1317 LUT + 716 FF + 1 BRAM
+
+Regarding "modelsim hello world"; I'm sorry but I don't modelsim;
+instead I use Icarus Verilog & gtkwave. The core has a "debug" facility
+which displays all opcode and registers (memory changes, sp, pc, etc..)
+during simulation execution.
+
+Regards
+Antonio
+
+
+> > Regarding FPGA resources for a "balanced" implementation (not the
+> > smallest, not the fastest):
+> >
+> > -cpu+alu+microcode rom: 671 LUT + 239 FF + 1 BRAM (50% of LUT is ALU)
+>
+> Are there any emulated instructions not implemented in
+> microcode?
+>
+
+*All* zpu opcodes are microcoded. For some opcodes (like *shift*),
+there are two versions; 32 bit barrel shift in HDL (up to 32 clocks) or
+1 bit shift in HDL microcode drived (up to ~130 clocks). They are
+selectable via `DEFINES in the zpu_core_defines.v
+
+Other opcodes like mult and div are 32 bit HDL only at the moment (there
+are enough room in microcode memory to implement as microcode) and
+software emulable as well.
+
+For the above figures (671 LUT + 239 FF): *shift* are 32 bit HDL and
+mult/div are software implemented.
+
+There are new opcodes (as per my needs) like memory bulk copy (sncpy,
+wcpy, wset) and ip checksum calculation (ipsum). There are room in
+microccode memory to define new opcodes using the holes in the ISA (for
+a complete list of opcodes and its function please see
+zpu_core_defines.v).
+
+Some future ideas (easy to implement in microcode)
+-on-chip debug
+-microcode update via software
+
+Regards
OpenPOWER on IntegriCloud