summaryrefslogtreecommitdiffstats
path: root/zpu/hdl/avalanche/core/zpu_core.v
diff options
context:
space:
mode:
Diffstat (limited to 'zpu/hdl/avalanche/core/zpu_core.v')
-rw-r--r--zpu/hdl/avalanche/core/zpu_core.v749
1 files changed, 749 insertions, 0 deletions
diff --git a/zpu/hdl/avalanche/core/zpu_core.v b/zpu/hdl/avalanche/core/zpu_core.v
new file mode 100644
index 0000000..e704fbc
--- /dev/null
+++ b/zpu/hdl/avalanche/core/zpu_core.v
@@ -0,0 +1,749 @@
+`timescale 1ns / 1ps
+`include "zpu_core_defines.v"
+
+/* MODULE: zpu_core
+ DESCRIPTION: Contains ZPU cpu
+ AUTHOR: Antonio J. Anton (aj <at> anro-ingenieros.com)
+
+REVISION HISTORY:
+Revision 1.0, 14/09/2009
+Initial public release
+
+COPYRIGHT:
+Copyright (c) 2009 Antonio J. Anton
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.*/
+
+// --------- MICROPROGRAMMED ZPU CORE ---------------
+// all signals are polled on clk rising edge
+// all signals positive
+
+module zpu_core (
+`ifdef ENABLE_CPU_INTERRUPTS
+ interrupt, // interrupt request
+`endif
+ clk, // clock on rising edge
+ reset, // reset on rising edge
+ mem_read, // request memory read
+ mem_write, // request memory write
+ mem_done, // memory operation completed
+ mem_addr, // memory address
+ mem_data_read, // data readed
+ mem_data_write, // data written
+ byte_select // byte select on memory operation
+);
+
+input clk;
+input reset;
+output mem_read;
+output mem_write;
+input mem_done;
+input [31:0] mem_data_read;
+output [31:0] mem_data_write;
+output [31:0] mem_addr;
+output [3:0] byte_select;
+`ifdef ENABLE_CPU_INTERRUPTS
+input interrupt;
+`endif
+
+wire clk;
+wire reset;
+wire mem_read;
+wire mem_write;
+wire mem_done;
+wire [31:0] mem_data_read;
+wire [31:0] mem_data_write;
+wire [31:0] mem_addr;
+`ifdef ENABLE_CPU_INTERRUPTS
+wire interrupt;
+`endif
+
+`ifdef ENABLE_BYTE_SELECT
+// ------ unaligned byte/halfword memory operations -----
+/// TODO: think rewriting into microcode or in a less resource wasting way
+
+reg [3:0] byte_select;
+wire byte_op;
+wire halfw_op;
+
+reg [31:0] mem_data_read_int; // aligned data from memory
+reg [31:0] mem_data_write_out; // write data already aligned
+wire [31:0] mem_data_write_int; // write data from cpu to be aligned
+
+// --- byte select logic ---
+always @(mem_addr[1:0] or byte_op or halfw_op)
+begin
+ casez( { mem_addr[1:0], byte_op, halfw_op } )
+ 4'b00_1_? : byte_select <= 4'b0001; // byte select
+ 4'b01_1_? : byte_select <= 4'b0010;
+ 4'b10_1_? : byte_select <= 4'b0100;
+ 4'b11_1_? : byte_select <= 4'b1000;
+ 4'b0?_0_1 : byte_select <= 4'b0011; // half word select
+ 4'b1?_0_1 : byte_select <= 4'b1100;
+ default : byte_select <= 4'b1111; // word select
+ endcase
+end
+
+// --- input data to cpu ---
+always @(mem_data_read or mem_addr[1:0] or byte_op or halfw_op)
+begin
+ casez( { mem_addr[1:0], byte_op, halfw_op } )
+ 4'b00_1_? : mem_data_read_int <= { 24'b0, mem_data_read[7:0] }; // 8 bit read
+ 4'b01_1_? : mem_data_read_int <= { 24'b0, mem_data_read[15:8] };
+ 4'b10_1_? : mem_data_read_int <= { 24'b0, mem_data_read[23:16] };
+ 4'b11_1_? : mem_data_read_int <= { 24'b0, mem_data_read[31:24] };
+ 4'b0?_0_1 : mem_data_read_int <= { 16'b0, mem_data_read[7:0], mem_data_read[15:8] }; // 16 bit read
+ 4'b1?_0_1 : mem_data_read_int <= { 16'b0, mem_data_read[23:16], mem_data_read[31:24] };
+ default : mem_data_read_int <= { mem_data_read[7:0], mem_data_read[15:8], mem_data_read[23:16], mem_data_read[31:24] }; // 32 bit access (default)
+ endcase
+end
+
+// --- output data from cpu ---
+assign mem_data_write = mem_data_write_out;
+
+always @(mem_data_write_int or mem_addr[1:0] or byte_op or halfw_op)
+begin
+ casez( {mem_addr[1:0], byte_op, halfw_op } )
+ 4'b00_1_? : mem_data_write_out <= { 24'bX, mem_data_write_int[7:0] }; // 8 bit write
+ 4'b01_1_? : mem_data_write_out <= { 16'bX, mem_data_write_int[7:0], 8'bX };
+ 4'b10_1_? : mem_data_write_out <= { 8'bX, mem_data_write_int[7:0], 16'bX };
+ 4'b11_1_? : mem_data_write_out <= { mem_data_write_int[7:0], 24'bX };
+ 4'b0?_0_1 : mem_data_write_out <= { 16'bX, mem_data_write_int[7:0], mem_data_write_int[15:8] }; // 16 bit write
+ 4'b1?_0_1 : mem_data_write_out <= { mem_data_write_int[7:0], mem_data_write_int[15:8], 16'bX };
+ default : mem_data_write_out <= { mem_data_write_int[7:0], mem_data_write_int[15:8], mem_data_write_int[23:16], mem_data_write_int[31:24] };
+ endcase
+end
+`else
+// -------- only 32 bit memory access --------
+wire [3:0] byte_select = 4'b1111; // all memory operations are 32 bit wide
+wire [31:0] mem_data_read_int; // no byte/halfword memory access by HW
+wire [31:0] mem_data_write_int; // byte and halfword memory access must be emulated
+
+// ----- reorder bytes due to MSB-LSB configuration -----
+assign mem_data_read_int = { mem_data_read[7:0], mem_data_read[15:8], mem_data_read[23:16], mem_data_read[31:24] };
+assign mem_data_write = { mem_data_write_int[7:0], mem_data_write_int[15:8], mem_data_write_int[23:16], mem_data_write_int[31:24] };
+`endif
+
+// ------ datapath registers and connections -----------
+reg [31:0] pc; // program counter (byte align)
+reg [31:0] sp; // stack counter (word align)
+reg [31:0] a; // operand (address_out, data_out, alu_in)
+reg [31:0] b; // operand (address_out)
+reg idim; // im opcode being processed
+reg [7:0] opcode; // opcode being processed
+reg [31:2] pc_cached; // cached PC
+reg [31:0] opcode_cache; // cached opcodes (current word)
+`ifdef ENABLE_CPU_INTERRUPTS
+ reg int_requested; // interrupt has been requested
+ reg on_interrupt; // serving interrupt
+ wire exit_interrupt; // microcode says this is poppc_interrupt
+ wire enter_interrupt; // microcode says we are entering interrupt
+`endif
+wire [1:0] sel_opcode = pc[1:0]; // which opcode is selected
+wire sel_read; // mux for data-in
+wire [1:0] sel_alu; // mux for alu
+wire [1:0] sel_addr; // mux for addr
+wire w_pc; // write PC
+`ifdef ENABLE_PC_INCREMENT
+ wire w_pc_increment; // write PC+1
+`endif
+wire w_sp; // write SP
+wire w_a; // write A (from ALU result)
+wire w_a_mem; // write A (from MEM read)
+wire w_b; // write B
+wire w_op; // write OPCODE (opcode cache)
+wire set_idim; // set IDIM
+wire clear_idim; // clear IDIM
+wire is_op_cached = (pc[31:2] == pc_cached) ? 1'b1 : 1'b0; // is opcode available?
+wire a_is_zero; // A == 0
+wire a_is_neg; // A[31] == 1
+wire busy; // busy signal to microcode sequencer (stalls cpu)
+
+reg [`MC_MEM_BITS-1:0] mc_pc; // microcode PC
+initial mc_pc <= `MC_ADDR_RESET-1;
+wire [`MC_BITS-1:0] mc_op; // current microcode operation
+
+// memory addr / write ports
+assign mem_addr = (sel_addr == `SEL_ADDR_SP) ? sp :
+ (sel_addr == `SEL_ADDR_A) ? a :
+ (sel_addr == `SEL_ADDR_B) ? b : pc;
+assign mem_data_write_int = a; // only A can be written to memory
+
+// ------- alu instantiation -------
+wire [31:0] alu_a;
+wire [31:0] alu_b;
+wire [31:0] alu_r;
+wire [`ALU_OP_WIDTH-1:0] alu_op;
+wire alu_done;
+
+// alu inputs multiplexors
+// constant in microcode is sign extended (in order to implement substractions like adds)
+assign alu_a = (sel_read == `SEL_READ_DATA) ? mem_data_read_int : mem_addr;
+assign alu_b = (sel_alu == `SEL_ALU_MC_CONST) ? { {25{mc_op[`P_ADDR+6]}} , mc_op[`P_ADDR+6:`P_ADDR] } : // most priority
+ (sel_alu == `SEL_ALU_A) ? a :
+ (sel_alu == `SEL_ALU_B) ? b : { {24{1'b0}} , opcode }; // `SEL_ALU_OPCODE is less priority
+
+zpu_core_alu alu(
+ .alu_a(alu_a),
+ .alu_b(alu_b),
+ .alu_r(alu_r),
+ .alu_op(alu_op),
+ .flag_idim(idim),
+ .clk(clk),
+ .done(alu_done)
+);
+
+// -------- pc : program counter --------
+always @(posedge clk)
+begin
+ if(w_pc) pc <= alu_r;
+`ifdef ENABLE_PC_INCREMENT // microcode optimization
+ else if(w_pc_increment) pc <= pc + 1; // usually pc=pc+1
+`endif
+end
+
+// -------- sp : stack pointer --------
+always @(posedge clk)
+begin
+ if(w_sp) sp <= alu_r;
+end
+
+// -------- a : acumulator register ---------
+always @(posedge clk)
+begin
+ if(w_a) a <= alu_r;
+ else if(w_a_mem) a <= mem_data_read_int;
+end
+
+// alu results over a register instead of alu result
+// in order to improve speed
+assign a_is_zero = (a == 0);
+assign a_is_neg = a[31];
+
+// -------- b : auxiliary register ---------
+always @(posedge clk)
+begin
+ if(w_b) b <= alu_r;
+end
+
+// -------- opcode and opcode_cache --------
+always @(posedge clk)
+begin
+ if(w_op)
+ begin
+ opcode_cache <= alu_r; // store all opcodes in the word
+ pc_cached <= pc[31:2]; // store PC address of cached opcodes
+ end
+end
+
+// -------- opcode : based on pc[1:0] ---------
+always @(sel_opcode or opcode_cache) // select current opcode from
+begin // the cached opcode word
+ case(sel_opcode)
+ 0 : opcode <= opcode_cache[31:24];
+ 1 : opcode <= opcode_cache[23:16];
+ 2 : opcode <= opcode_cache[15:8];
+ 3 : opcode <= opcode_cache[7:0];
+ endcase
+end
+
+// ------- idim : immediate opcode handling ----------
+always @(posedge clk)
+begin
+ if(set_idim) idim <= 1'b1;
+ else if(clear_idim) idim <= 1'b0;
+end
+
+`ifdef ENABLE_CPU_INTERRUPTS
+// ------ on interrupt status bit -----
+always @(posedge clk)
+begin
+ if(reset | exit_interrupt) on_interrupt <= 1'b0;
+ else if(enter_interrupt) on_interrupt <= 1'b1;
+end
+`endif
+
+// ------ microcode execution unit --------
+assign sel_read = mc_op[`P_SEL_READ]; // map datapath signals with microcode program bits
+assign sel_alu = mc_op[`P_SEL_ALU+1:`P_SEL_ALU];
+assign sel_addr = mc_op[`P_SEL_ADDR+1:`P_SEL_ADDR];
+assign alu_op = mc_op[`P_ALU+3:`P_ALU];
+assign w_sp = mc_op[`P_W_SP] & ~busy;
+assign w_pc = mc_op[`P_W_PC] & ~busy;
+assign w_a = mc_op[`P_W_A] & ~busy;
+assign w_a_mem = mc_op[`P_W_A_MEM] & ~busy;
+assign w_b = mc_op[`P_W_B] & ~busy;
+assign w_op = mc_op[`P_W_OPCODE] & ~busy;
+assign mem_read = mc_op[`P_MEM_R];
+assign mem_write = mc_op[`P_MEM_W];
+assign set_idim = mc_op[`P_SET_IDIM] & ~busy;
+assign clear_idim= mc_op[`P_CLEAR_IDIM] & ~busy;
+`ifdef ENABLE_BYTE_SELECT
+assign byte_op = mc_op[`P_BYTE];
+assign halfw_op = mc_op[`P_HALFWORD];
+`endif
+`ifdef ENABLE_PC_INCREMENT
+ assign w_pc_increment = mc_op[`P_PC_INCREMENT] & ~busy;
+`endif
+`ifdef ENABLE_CPU_INTERRUPTS
+ assign exit_interrupt = mc_op[`P_EXIT_INT] & ~busy;
+ assign enter_interrupt = mc_op[`P_ENTER_INT] & ~busy;
+`endif
+
+wire cond_op_not_cached = mc_op[`P_OP_NOT_CACHED]; // conditional: true if opcode not cached
+wire cond_a_zero = mc_op[`P_A_ZERO]; // conditional: true if A is zero
+wire cond_a_neg = mc_op[`P_A_NEG]; // conditional: true if A is negative
+wire decode = mc_op[`P_DECODE]; // decode means jumps to apropiate microcode based on zpu opcode
+wire branch = mc_op[`P_BRANCH]; // unconditional jump inside microcode
+
+wire [`MC_MEM_BITS-1:0] mc_goto = { mc_op[`P_ADDR+6:`P_ADDR], 2'b00 }; // microcode goto (goto = high 7 bits)
+wire [`MC_MEM_BITS-1:0] mc_entry = { opcode[6:0], 2'b00 }; // microcode entry point for opcode
+reg [`MC_MEM_BITS-1:0] next_mc_pc; // next microcode operation to be executed
+initial next_mc_pc <= `MC_ADDR_RESET-1;
+
+wire cond_branch = (cond_op_not_cached & ~is_op_cached) | // sum of all conditionals
+ (cond_a_zero & a_is_zero) |
+ (cond_a_neg & a_is_neg);
+
+assign busy = ((mem_read | mem_write) & ~mem_done) | ~alu_done; // busy signal for microcode sequencer
+
+// ------- handle interrupts ---------
+`ifdef ENABLE_CPU_INTERRUPTS
+always @(posedge clk)
+begin
+ if(reset | on_interrupt) int_requested <= 0;
+ else if(interrupt & ~on_interrupt & ~int_requested) int_requested <= 1; // interrupt requested
+end
+`endif
+
+// ----- calculate next microcode address (next, decode, branch, specific opcode, etc.) -----
+always @(reset or mc_pc or mc_goto or opcode[7:4] or idim or
+ decode or branch or cond_branch or mc_entry or busy
+`ifdef ENABLE_CPU_INTERRUPTS
+ or int_requested
+`endif
+)
+begin
+ // default, next microcode instruction
+ next_mc_pc <= mc_pc + 1;
+ if(reset) next_mc_pc <= `MC_ADDR_RESET;
+ else if(~busy)
+ begin
+ // get next microcode instruction
+ if(branch | cond_branch) next_mc_pc <= mc_goto;
+ else if(decode) // decode: entry point of a new zpu opcode
+ begin
+`ifdef ENABLE_CPU_INTERRUPTS
+ if(int_requested & ~idim) next_mc_pc <= `MC_ADDR_INTERRUPT; // microde to enter interrupt mode
+ else
+`endif
+ if(opcode[7] == `OP_IM) next_mc_pc <= (idim ? `MC_ADDR_IM_IDIM : `MC_ADDR_IM_NOIDIM);
+ else if(opcode[7:5] == `OP_STORESP) next_mc_pc <= `MC_ADDR_STORESP;
+ else if(opcode[7:5] == `OP_LOADSP) next_mc_pc <= `MC_ADDR_LOADSP;
+ else if(opcode[7:4] == `OP_ADDSP) next_mc_pc <= `MC_ADDR_ADDSP;
+ else next_mc_pc <= mc_entry; // includes EMULATE opcodes
+ end
+ end
+ else next_mc_pc <= mc_pc; // in case of cpu stalled (busy=1)
+end
+
+// set microcode program counter
+always @(posedge clk) mc_pc <= next_mc_pc;
+
+// ----- microcode program ------
+zpu_core_rom microcode (
+ .addr(next_mc_pc),
+ .data(mc_op),
+ .clk(clk)
+);
+
+// -------------- ZPU debugger --------------------
+`ifdef ZPU_CORE_DEBUG
+//synthesis translate_off
+// ---- register operation dump ----
+always @(posedge clk)
+begin
+ if(~reset)
+ begin
+ if(w_pc) $display("zpu_core: set PC=0x%h", alu.alu_r);
+`ifdef ENABLE_PC_INCREMENT
+ if(w_pc_increment) $display("zpu_core: set PC=0x%h (PC+1)", pc);
+`endif
+ if(w_sp) $display("zpu_core: set SP=0x%h", alu.alu_r);
+ if(w_a) $display("zpu_core: set A=0x%h", alu.alu_r);
+ if(w_a_mem) $display("zpu_core: set A=0x%h (from MEM)", mem_data_read_int);
+ if(w_b) $display("zpu_core: set B=0x%h", alu.alu_r);
+ if(w_op & ~is_op_cached) $display("zpu_core: set opcode_cache=0x%h, pc_cached=0x%h", alu.alu_r, {pc[31:2], 2'b0});
+`ifdef ENABLE_CPU_INTERRUPTS
+ if(~busy & mc_pc == `MC_ADDR_INTERRUPT) $display("zpu_core: ***** ENTERING INTERRUPT MICROCODE ******");
+ if(~busy & exit_interrupt) $display("zpu_core: ***** INTERRUPT FLAG CLEARED *****");
+ if(~busy & enter_interrupt) $display("zpu_core: ***** INTERRUPT FLAG SET *****");
+`endif
+ if(set_idim & ~idim) $display("zpu_core: IDIM=1");
+ if(clear_idim & idim) $display("zpu_core: IDIM=0");
+
+// ---- microcode debug ----
+`ifdef ZPU_CORE_DEBUG_MICROCODE
+ if(~busy)
+ begin
+ $display("zpu_core: mc_op[%d]=0b%b", mc_pc, mc_op);
+ if(branch) $display("zpu_core: microcode: branch=%d", mc_goto);
+ if(cond_branch) $display("zpu_core: microcode: CONDITION branch=%d", mc_goto);
+ if(decode) $display("zpu_core: decoding opcode=0x%h (0b%b) : branch to=%d ", opcode, opcode, mc_entry);
+ end
+ else $display("zpu_core: busy");
+`endif
+
+// ---- cpu abort in case of unaligned memory access ---
+`ifdef ASSERT_NON_ALIGNMENT
+ /* unaligned word access (except PC) */
+ if(sel_addr != `SEL_ADDR_PC & mem_addr[1:0] != 2'b00 & (mem_read | mem_write) & !byte_op & !halfw_op)
+ begin
+ $display("zpu_core: unaligned word operation at addr=0x%x", mem_addr);
+ $finish;
+ end
+
+ /* unaligned halfword access */
+ if(mem_addr[0] & (mem_read | mem_write) & !byte_op & halfw_op)
+ begin
+ $display("zpu_core: unaligned halfword operation at addr=0x%x", mem_addr);
+ $finish;
+ end
+`endif
+
+ end
+end
+
+// ----- opcode dissasembler ------
+always @(posedge clk)
+begin
+if(~busy)
+case(mc_pc)
+0 : begin
+ $display("zpu_core: ------ breakpoint ------");
+ $finish;
+ end
+4 : $display("zpu_core: ------ shiftleft ------");
+8 : $display("zpu_core: ------ pushsp ------");
+12 : $display("zpu_core: ------ popint ------");
+16 : $display("zpu_core: ------ poppc ------");
+20 : $display("zpu_core: ------ add ------");
+24 : $display("zpu_core: ------ and ------");
+28 : $display("zpu_core: ------ or ------");
+32 : $display("zpu_core: ------ load ------");
+36 : $display("zpu_core: ------ not ------");
+40 : $display("zpu_core: ------ flip ------");
+44 : $display("zpu_core: ------ nop ------");
+48 : $display("zpu_core: ------ store ------");
+52 : $display("zpu_core: ------ popsp ------");
+56 : $display("zpu_core: ------ ipsum ------");
+60 : $display("zpu_core: ------ sncpy ------");
+
+`MC_ADDR_IM_NOIDIM : $display("zpu_core: ------ im 0x%h (1st) ------", opcode[6:0] );
+`MC_ADDR_IM_IDIM : $display("zpu_core: ------ im 0x%h (cont) ------", opcode[6:0] );
+`MC_ADDR_STORESP : $display("zpu_core: ------ storesp 0x%h ------", { ~opcode[4], opcode[3:0], 2'b0 } );
+`MC_ADDR_LOADSP : $display("zpu_core: ------ loadsp 0x%h ------", { ~opcode[4], opcode[3:0], 2'b0 } );
+`MC_ADDR_ADDSP : $display("zpu_core: ------ addsp 0x%h ------", { ~opcode[4], opcode[3:0], 2'b0 } );
+`MC_ADDR_EMULATE : $display("zpu_core: ------ emulate 0x%h ------", b[2:0]); // opcode[5:0] );
+
+128 : $display("zpu_core: ------ mcpy ------");
+132 : $display("zpu_core: ------ mset ------");
+136 : $display("zpu_core: ------ loadh ------");
+140 : $display("zpu_core: ------ storeh ------");
+144 : $display("zpu_core: ------ lessthan ------");
+148 : $display("zpu_core: ------ lessthanorequal ------");
+152 : $display("zpu_core: ------ ulessthan ------");
+156 : $display("zpu_core: ------ ulessthanorequal ------");
+160 : $display("zpu_core: ------ swap ------");
+164 : $display("zpu_core: ------ mult ------");
+168 : $display("zpu_core: ------ lshiftright ------");
+172 : $display("zpu_core: ------ ashiftleft ------");
+176 : $display("zpu_core: ------ ashiftright ------");
+180 : $display("zpu_core: ------ call ------");
+184 : $display("zpu_core: ------ eq ------");
+188 : $display("zpu_core: ------ neq ------");
+192 : $display("zpu_core: ------ neg ------");
+196 : $display("zpu_core: ------ sub ------");
+200 : $display("zpu_core: ------ xor ------");
+204 : $display("zpu_core: ------ loadb ------");
+208 : $display("zpu_core: ------ storeb ------");
+212 : $display("zpu_core: ------ div ------");
+216 : $display("zpu_core: ------ mod ------");
+220 : $display("zpu_core: ------ eqbranch ------");
+224 : $display("zpu_core: ------ neqbranch ------");
+228 : $display("zpu_core: ------ poppcrel ------");
+232 : $display("zpu_core: ------ config ------");
+236 : $display("zpu_core: ------ pushpc ------");
+240 : $display("zpu_core: ------ syscall_emulate ------");
+244 : $display("zpu_core: ------ pushspadd ------");
+248 : $display("zpu_core: ------ halfmult ------");
+252 : $display("zpu_core: ------ callpcrel ------");
+//default : $display("zpu_core: mc_pc=0x%h", decode_mcpc);
+endcase
+end
+//synthesis translate_on
+`endif
+endmodule
+
+// --------- ZPU CORE ALU UNIT ---------------
+module zpu_core_alu(
+ alu_a, // parameter A
+ alu_b, // parameter B
+ alu_r, // computed result
+ flag_idim, // for IMM alu op
+ alu_op, // ALU operation
+ clk, // clock for syncronous multicycle operations
+ done // done signal for alu operation
+);
+
+input [31:0] alu_a;
+input [31:0] alu_b;
+input [`ALU_OP_WIDTH-1:0] alu_op;
+input flag_idim;
+output [31:0] alu_r;
+input clk;
+output done;
+
+wire [31:0] alu_a;
+wire [31:0] alu_b;
+wire [`ALU_OP_WIDTH-1:0] alu_op;
+wire flag_idim;
+reg [31:0] alu_r;
+wire clk;
+reg done;
+
+`ifdef ENABLE_MULT
+// implement 32 bit pipeline multiplier
+reg mul_running;
+reg [2:0] mul_counter;
+wire mul_done = (mul_counter == 3);
+reg [31:0] mul_result, mul_tmp1;
+reg [31:0] a_in, b_in;
+
+always@(posedge clk)
+begin
+ a_in <= 0;
+ b_in <= 0;
+ mul_tmp1 <= 0;
+ mul_result <= 0;
+ mul_counter <= 0;
+ if(mul_running)
+ begin // infer pipeline multiplier
+ a_in <= alu_a;
+ b_in <= alu_b;
+ mul_tmp1 <= a_in * b_in;
+ mul_result <= mul_tmp1;
+ mul_counter <= mul_counter + 1;
+ end
+end
+`endif
+
+`ifdef ENABLE_DIV
+// implement 32 bit divider
+// Unsigned/Signed division based on Patterson and Hennessy's algorithm.
+// Description: Calculates quotient. The "sign" input determines whether
+// signs (two's complement) should be taken into consideration.
+// references: http://www.ece.lsu.edu/ee3755/2002/l07.html
+reg [63:0] qr;
+wire [33:0] diff;
+wire [31:0] quotient;
+wire [31:0] dividend;
+wire [31:0] divider;
+reg [6:0] bit;
+wire div_done;
+reg div_running;
+reg divide_sign;
+reg negative_output;
+
+assign div_done = !bit;
+assign diff = qr[63:31] - {1'b0, divider};
+assign quotient = (!negative_output) ? qr[31:0] : ~qr[31:0] + 1'b1;
+assign dividend = (!divide_sign || !alu_a[31]) ? alu_a : ~alu_a + 1'b1;
+assign divider = (!divide_sign || !alu_b[31]) ? alu_b : ~alu_b + 1'b1;
+
+always@(posedge clk)
+begin
+ bit <= 7'b1_000000; // divider stopped
+ if(div_running)
+ begin
+ if(bit[6]) // divider started: initialize registers
+ begin
+ bit <= 7'd32;
+ qr <= { 32'd0, dividend };
+ negative_output <= divide_sign && ((alu_b[31] && !alu_a[31]) || (!alu_b[31] && alu_a[31]));
+ end
+ else // step by step divide
+ begin
+ if( diff[32] ) qr <= { qr[62:0], 1'd0 };
+ else qr <= { diff[31:0], qr[30:0], 1'd1 };
+ bit <= bit - 1;
+ end
+ end
+end
+`endif
+
+`ifdef ENABLE_BARREL
+// implement 32 bit barrel shift
+// alu_b[6] == 1 ? left(only arithmetic) : right
+// alu_b[5] == 1 ? logical : arithmetic
+reg bs_running;
+reg [31:0] bs_result;
+reg [4:0] bs_counter; // 5 bits
+wire bs_left = alu_b[6];
+wire bs_logical = alu_b[5];
+wire [4:0] bs_moves = alu_b[4:0];
+wire bs_done = (bs_counter == bs_moves);
+
+always @(posedge clk)
+begin
+ bs_counter <= 0;
+ bs_result <= alu_a;
+ if(bs_running)
+ begin
+ if(bs_left) bs_result <= { bs_result[30:0], 1'b0 }; // shift left
+ else
+ begin
+ if(bs_logical) bs_result <= { 1'b0, bs_result[31:1] }; // shift logical right
+ else bs_result <= { bs_result[31], bs_result[31], bs_result[30:1] };// shift arithmetic right
+ end
+ bs_counter <= bs_counter + 1;
+ end
+end
+`endif
+
+// ----- alu add/sub -----
+reg [31:0] alu_b_tmp;
+always @(alu_b or alu_op)
+begin
+ alu_b_tmp <= alu_b; // by default, ALU_B as is
+ if(alu_op == `ALU_PLUS_OFFSET) alu_b_tmp <= { {25{1'b0}}, ~alu_b[4], alu_b[3:0], 2'b0 }; // ALU_B is an offset if ALU_PLUS_OFFSET operation
+end
+
+reg [31:0] alu_r_addsub; // compute R=A+B or A-B based on opcode (ALU_PLUSxx / ALU_SUB-CMP)
+always @(alu_a or alu_b_tmp or alu_op)
+begin
+`ifdef ENABLE_CMP
+ if(alu_op == `ALU_CMP_SIGNED || alu_op == `ALU_CMP_UNSIGNED) // in case of sub or cmp --> operation is '-'
+ begin
+ alu_r_addsub <= alu_a - alu_b_tmp;
+ end
+ else
+`endif
+ begin
+ alu_r_addsub <= alu_a + alu_b_tmp; // by default '+' operation
+ end
+end
+
+`ifdef ENABLE_CMP
+// handle overflow/underflow exceptions in ALU_CMP_SIGNED
+reg cmp_exception;
+always @(alu_a[31] or alu_b[31] or alu_r_addsub[31])
+begin
+ cmp_exception <= 0;
+ if( (alu_a[31] == 0 && alu_b[31] == 1 && alu_r_addsub[31] == 1) ||
+ (alu_a[31] == 1 && alu_b[31] == 0 && alu_r_addsub[31] == 0) ) cmp_exception <= 1;
+end
+`endif
+
+// ----- alu operation selection -----
+always @(alu_a or alu_b or alu_op or flag_idim or alu_r_addsub
+`ifdef ENABLE_CMP
+ or cmp_exception
+`endif
+`ifdef ENABLE_MULT
+ or mul_done or mul_result
+`endif
+`ifdef ENABLE_BARREL
+ or bs_done or bs_result
+`endif
+`ifdef ENABLE_DIV
+ or div_done or div_result
+`endif
+)
+begin
+ done <= 1; // default alu operations are 1 cycle
+`ifdef ENABLE_MULT
+ mul_running <= 0;
+`endif
+`ifdef ENABLE_BARREL
+ bs_running <= 0;
+`endif
+`ifdef ENABLE_DIV
+ div_running <= 0;
+`endif
+ alu_r <= alu_r_addsub; // ALU_PLUS, ALU_PLUS_OFFSET, ALU_SUB and part of ALU_CMP
+ case(alu_op)
+ `ALU_NOP : alu_r <= alu_a;
+ `ALU_NOP_B : alu_r <= alu_b;
+ `ALU_AND : alu_r <= alu_a & alu_b;
+ `ALU_OR : alu_r <= alu_a | alu_b;
+ `ALU_NOT : alu_r <= ~alu_a;
+ `ALU_FLIP : alu_r <= { alu_a[0], alu_a[1], alu_a[2], alu_a[3], alu_a[4], alu_a[5], alu_a[6], alu_a[7],
+ alu_a[8],alu_a[9],alu_a[10],alu_a[11],alu_a[12],alu_a[13],alu_a[14],alu_a[15],
+ alu_a[16],alu_a[17],alu_a[18],alu_a[19],alu_a[20],alu_a[21],alu_a[22],alu_a[23],
+ alu_a[24],alu_a[25],alu_a[26],alu_a[27],alu_a[28],alu_a[29],alu_a[30],alu_a[31] };
+ `ALU_IM : if(flag_idim) alu_r <= { alu_a[24:0], alu_b[6:0] };
+ else alu_r <= { {25{alu_b[6]}}, alu_b[6:0] };
+`ifdef ENABLE_CMP
+ `ALU_CMP_UNSIGNED:if( (alu_a[31] == alu_b[31] && cmp_exception) ||
+ (alu_a[31] != alu_b[31] && ~cmp_exception) )
+ begin
+ alu_r[31] <= ~alu_r_addsub[31];
+ end
+ `ALU_CMP_SIGNED : if(cmp_exception)
+ begin
+ alu_r[31] <= ~alu_r_addsub[31];
+ end
+`endif
+`ifdef ENABLE_XOR
+ `ALU_XOR : alu_r <= alu_a ^ alu_b;
+`endif
+`ifdef ENABLE_A_SHIFT
+ `ALU_A_SHIFT_RIGHT: alu_r <= { alu_a[31], alu_a[31], alu_a[30:1] }; // arithmetic shift left
+`endif
+`ifdef ENABLE_MULT
+ `ALU_MULT : begin
+ mul_running <= ~mul_done;
+ done <= mul_done;
+ alu_r <= mul_result;
+ end
+`endif
+`ifdef ENABLE_BARREL
+ `ALU_BARREL : begin
+ bs_running <= ~bs_done;
+ done <= bs_done;
+ alu_r <= bs_result;
+ end
+`endif
+`ifdef ENABLE_DIV
+ `ALU_DIV : begin
+ div_running<= ~div_done;
+ done <= div_done;
+ alu_r <= quotient;
+ end
+ `ALU_MOD : begin
+ div_running<= ~div_done;
+ done <= div_done;
+ alu_r <= qr[31:0];
+ end
+`endif
+ endcase
+end
+
+endmodule
OpenPOWER on IntegriCloud