diff options
author | Bert Lange <b.lange@hzdr.de> | 2015-04-15 13:36:55 +0200 |
---|---|---|
committer | Bert Lange <b.lange@hzdr.de> | 2015-04-15 13:36:55 +0200 |
commit | a1c964908b51599bf624bd2d253419c7e629f195 (patch) | |
tree | 06125d59e83b7dde82d1bb57bc0e09ca83451b98 /zpu/hdl/avalanche/core/zpu_core.v | |
parent | bbfe29a15f11548eb7c9fa71dcb4d2d18c164a53 (diff) | |
parent | 8679e4f91dcae05aef40f96629f33f0f4161f14a (diff) | |
download | zpu-a1c964908b51599bf624bd2d253419c7e629f195.zip zpu-a1c964908b51599bf624bd2d253419c7e629f195.tar.gz |
Merge branch 'master' of https://github.com/zylin/zpu
Diffstat (limited to 'zpu/hdl/avalanche/core/zpu_core.v')
-rw-r--r-- | zpu/hdl/avalanche/core/zpu_core.v | 749 |
1 files changed, 749 insertions, 0 deletions
diff --git a/zpu/hdl/avalanche/core/zpu_core.v b/zpu/hdl/avalanche/core/zpu_core.v new file mode 100644 index 0000000..e704fbc --- /dev/null +++ b/zpu/hdl/avalanche/core/zpu_core.v @@ -0,0 +1,749 @@ +`timescale 1ns / 1ps +`include "zpu_core_defines.v" + +/* MODULE: zpu_core + DESCRIPTION: Contains ZPU cpu + AUTHOR: Antonio J. Anton (aj <at> anro-ingenieros.com) + +REVISION HISTORY: +Revision 1.0, 14/09/2009 +Initial public release + +COPYRIGHT: +Copyright (c) 2009 Antonio J. Anton + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.*/ + +// --------- MICROPROGRAMMED ZPU CORE --------------- +// all signals are polled on clk rising edge +// all signals positive + +module zpu_core ( +`ifdef ENABLE_CPU_INTERRUPTS + interrupt, // interrupt request +`endif + clk, // clock on rising edge + reset, // reset on rising edge + mem_read, // request memory read + mem_write, // request memory write + mem_done, // memory operation completed + mem_addr, // memory address + mem_data_read, // data readed + mem_data_write, // data written + byte_select // byte select on memory operation +); + +input clk; +input reset; +output mem_read; +output mem_write; +input mem_done; +input [31:0] mem_data_read; +output [31:0] mem_data_write; +output [31:0] mem_addr; +output [3:0] byte_select; +`ifdef ENABLE_CPU_INTERRUPTS +input interrupt; +`endif + +wire clk; +wire reset; +wire mem_read; +wire mem_write; +wire mem_done; +wire [31:0] mem_data_read; +wire [31:0] mem_data_write; +wire [31:0] mem_addr; +`ifdef ENABLE_CPU_INTERRUPTS +wire interrupt; +`endif + +`ifdef ENABLE_BYTE_SELECT +// ------ unaligned byte/halfword memory operations ----- +/// TODO: think rewriting into microcode or in a less resource wasting way + +reg [3:0] byte_select; +wire byte_op; +wire halfw_op; + +reg [31:0] mem_data_read_int; // aligned data from memory +reg [31:0] mem_data_write_out; // write data already aligned +wire [31:0] mem_data_write_int; // write data from cpu to be aligned + +// --- byte select logic --- +always @(mem_addr[1:0] or byte_op or halfw_op) +begin + casez( { mem_addr[1:0], byte_op, halfw_op } ) + 4'b00_1_? : byte_select <= 4'b0001; // byte select + 4'b01_1_? : byte_select <= 4'b0010; + 4'b10_1_? : byte_select <= 4'b0100; + 4'b11_1_? : byte_select <= 4'b1000; + 4'b0?_0_1 : byte_select <= 4'b0011; // half word select + 4'b1?_0_1 : byte_select <= 4'b1100; + default : byte_select <= 4'b1111; // word select + endcase +end + +// --- input data to cpu --- +always @(mem_data_read or mem_addr[1:0] or byte_op or halfw_op) +begin + casez( { mem_addr[1:0], byte_op, halfw_op } ) + 4'b00_1_? : mem_data_read_int <= { 24'b0, mem_data_read[7:0] }; // 8 bit read + 4'b01_1_? : mem_data_read_int <= { 24'b0, mem_data_read[15:8] }; + 4'b10_1_? : mem_data_read_int <= { 24'b0, mem_data_read[23:16] }; + 4'b11_1_? : mem_data_read_int <= { 24'b0, mem_data_read[31:24] }; + 4'b0?_0_1 : mem_data_read_int <= { 16'b0, mem_data_read[7:0], mem_data_read[15:8] }; // 16 bit read + 4'b1?_0_1 : mem_data_read_int <= { 16'b0, mem_data_read[23:16], mem_data_read[31:24] }; + default : mem_data_read_int <= { mem_data_read[7:0], mem_data_read[15:8], mem_data_read[23:16], mem_data_read[31:24] }; // 32 bit access (default) + endcase +end + +// --- output data from cpu --- +assign mem_data_write = mem_data_write_out; + +always @(mem_data_write_int or mem_addr[1:0] or byte_op or halfw_op) +begin + casez( {mem_addr[1:0], byte_op, halfw_op } ) + 4'b00_1_? : mem_data_write_out <= { 24'bX, mem_data_write_int[7:0] }; // 8 bit write + 4'b01_1_? : mem_data_write_out <= { 16'bX, mem_data_write_int[7:0], 8'bX }; + 4'b10_1_? : mem_data_write_out <= { 8'bX, mem_data_write_int[7:0], 16'bX }; + 4'b11_1_? : mem_data_write_out <= { mem_data_write_int[7:0], 24'bX }; + 4'b0?_0_1 : mem_data_write_out <= { 16'bX, mem_data_write_int[7:0], mem_data_write_int[15:8] }; // 16 bit write + 4'b1?_0_1 : mem_data_write_out <= { mem_data_write_int[7:0], mem_data_write_int[15:8], 16'bX }; + default : mem_data_write_out <= { mem_data_write_int[7:0], mem_data_write_int[15:8], mem_data_write_int[23:16], mem_data_write_int[31:24] }; + endcase +end +`else +// -------- only 32 bit memory access -------- +wire [3:0] byte_select = 4'b1111; // all memory operations are 32 bit wide +wire [31:0] mem_data_read_int; // no byte/halfword memory access by HW +wire [31:0] mem_data_write_int; // byte and halfword memory access must be emulated + +// ----- reorder bytes due to MSB-LSB configuration ----- +assign mem_data_read_int = { mem_data_read[7:0], mem_data_read[15:8], mem_data_read[23:16], mem_data_read[31:24] }; +assign mem_data_write = { mem_data_write_int[7:0], mem_data_write_int[15:8], mem_data_write_int[23:16], mem_data_write_int[31:24] }; +`endif + +// ------ datapath registers and connections ----------- +reg [31:0] pc; // program counter (byte align) +reg [31:0] sp; // stack counter (word align) +reg [31:0] a; // operand (address_out, data_out, alu_in) +reg [31:0] b; // operand (address_out) +reg idim; // im opcode being processed +reg [7:0] opcode; // opcode being processed +reg [31:2] pc_cached; // cached PC +reg [31:0] opcode_cache; // cached opcodes (current word) +`ifdef ENABLE_CPU_INTERRUPTS + reg int_requested; // interrupt has been requested + reg on_interrupt; // serving interrupt + wire exit_interrupt; // microcode says this is poppc_interrupt + wire enter_interrupt; // microcode says we are entering interrupt +`endif +wire [1:0] sel_opcode = pc[1:0]; // which opcode is selected +wire sel_read; // mux for data-in +wire [1:0] sel_alu; // mux for alu +wire [1:0] sel_addr; // mux for addr +wire w_pc; // write PC +`ifdef ENABLE_PC_INCREMENT + wire w_pc_increment; // write PC+1 +`endif +wire w_sp; // write SP +wire w_a; // write A (from ALU result) +wire w_a_mem; // write A (from MEM read) +wire w_b; // write B +wire w_op; // write OPCODE (opcode cache) +wire set_idim; // set IDIM +wire clear_idim; // clear IDIM +wire is_op_cached = (pc[31:2] == pc_cached) ? 1'b1 : 1'b0; // is opcode available? +wire a_is_zero; // A == 0 +wire a_is_neg; // A[31] == 1 +wire busy; // busy signal to microcode sequencer (stalls cpu) + +reg [`MC_MEM_BITS-1:0] mc_pc; // microcode PC +initial mc_pc <= `MC_ADDR_RESET-1; +wire [`MC_BITS-1:0] mc_op; // current microcode operation + +// memory addr / write ports +assign mem_addr = (sel_addr == `SEL_ADDR_SP) ? sp : + (sel_addr == `SEL_ADDR_A) ? a : + (sel_addr == `SEL_ADDR_B) ? b : pc; +assign mem_data_write_int = a; // only A can be written to memory + +// ------- alu instantiation ------- +wire [31:0] alu_a; +wire [31:0] alu_b; +wire [31:0] alu_r; +wire [`ALU_OP_WIDTH-1:0] alu_op; +wire alu_done; + +// alu inputs multiplexors +// constant in microcode is sign extended (in order to implement substractions like adds) +assign alu_a = (sel_read == `SEL_READ_DATA) ? mem_data_read_int : mem_addr; +assign alu_b = (sel_alu == `SEL_ALU_MC_CONST) ? { {25{mc_op[`P_ADDR+6]}} , mc_op[`P_ADDR+6:`P_ADDR] } : // most priority + (sel_alu == `SEL_ALU_A) ? a : + (sel_alu == `SEL_ALU_B) ? b : { {24{1'b0}} , opcode }; // `SEL_ALU_OPCODE is less priority + +zpu_core_alu alu( + .alu_a(alu_a), + .alu_b(alu_b), + .alu_r(alu_r), + .alu_op(alu_op), + .flag_idim(idim), + .clk(clk), + .done(alu_done) +); + +// -------- pc : program counter -------- +always @(posedge clk) +begin + if(w_pc) pc <= alu_r; +`ifdef ENABLE_PC_INCREMENT // microcode optimization + else if(w_pc_increment) pc <= pc + 1; // usually pc=pc+1 +`endif +end + +// -------- sp : stack pointer -------- +always @(posedge clk) +begin + if(w_sp) sp <= alu_r; +end + +// -------- a : acumulator register --------- +always @(posedge clk) +begin + if(w_a) a <= alu_r; + else if(w_a_mem) a <= mem_data_read_int; +end + +// alu results over a register instead of alu result +// in order to improve speed +assign a_is_zero = (a == 0); +assign a_is_neg = a[31]; + +// -------- b : auxiliary register --------- +always @(posedge clk) +begin + if(w_b) b <= alu_r; +end + +// -------- opcode and opcode_cache -------- +always @(posedge clk) +begin + if(w_op) + begin + opcode_cache <= alu_r; // store all opcodes in the word + pc_cached <= pc[31:2]; // store PC address of cached opcodes + end +end + +// -------- opcode : based on pc[1:0] --------- +always @(sel_opcode or opcode_cache) // select current opcode from +begin // the cached opcode word + case(sel_opcode) + 0 : opcode <= opcode_cache[31:24]; + 1 : opcode <= opcode_cache[23:16]; + 2 : opcode <= opcode_cache[15:8]; + 3 : opcode <= opcode_cache[7:0]; + endcase +end + +// ------- idim : immediate opcode handling ---------- +always @(posedge clk) +begin + if(set_idim) idim <= 1'b1; + else if(clear_idim) idim <= 1'b0; +end + +`ifdef ENABLE_CPU_INTERRUPTS +// ------ on interrupt status bit ----- +always @(posedge clk) +begin + if(reset | exit_interrupt) on_interrupt <= 1'b0; + else if(enter_interrupt) on_interrupt <= 1'b1; +end +`endif + +// ------ microcode execution unit -------- +assign sel_read = mc_op[`P_SEL_READ]; // map datapath signals with microcode program bits +assign sel_alu = mc_op[`P_SEL_ALU+1:`P_SEL_ALU]; +assign sel_addr = mc_op[`P_SEL_ADDR+1:`P_SEL_ADDR]; +assign alu_op = mc_op[`P_ALU+3:`P_ALU]; +assign w_sp = mc_op[`P_W_SP] & ~busy; +assign w_pc = mc_op[`P_W_PC] & ~busy; +assign w_a = mc_op[`P_W_A] & ~busy; +assign w_a_mem = mc_op[`P_W_A_MEM] & ~busy; +assign w_b = mc_op[`P_W_B] & ~busy; +assign w_op = mc_op[`P_W_OPCODE] & ~busy; +assign mem_read = mc_op[`P_MEM_R]; +assign mem_write = mc_op[`P_MEM_W]; +assign set_idim = mc_op[`P_SET_IDIM] & ~busy; +assign clear_idim= mc_op[`P_CLEAR_IDIM] & ~busy; +`ifdef ENABLE_BYTE_SELECT +assign byte_op = mc_op[`P_BYTE]; +assign halfw_op = mc_op[`P_HALFWORD]; +`endif +`ifdef ENABLE_PC_INCREMENT + assign w_pc_increment = mc_op[`P_PC_INCREMENT] & ~busy; +`endif +`ifdef ENABLE_CPU_INTERRUPTS + assign exit_interrupt = mc_op[`P_EXIT_INT] & ~busy; + assign enter_interrupt = mc_op[`P_ENTER_INT] & ~busy; +`endif + +wire cond_op_not_cached = mc_op[`P_OP_NOT_CACHED]; // conditional: true if opcode not cached +wire cond_a_zero = mc_op[`P_A_ZERO]; // conditional: true if A is zero +wire cond_a_neg = mc_op[`P_A_NEG]; // conditional: true if A is negative +wire decode = mc_op[`P_DECODE]; // decode means jumps to apropiate microcode based on zpu opcode +wire branch = mc_op[`P_BRANCH]; // unconditional jump inside microcode + +wire [`MC_MEM_BITS-1:0] mc_goto = { mc_op[`P_ADDR+6:`P_ADDR], 2'b00 }; // microcode goto (goto = high 7 bits) +wire [`MC_MEM_BITS-1:0] mc_entry = { opcode[6:0], 2'b00 }; // microcode entry point for opcode +reg [`MC_MEM_BITS-1:0] next_mc_pc; // next microcode operation to be executed +initial next_mc_pc <= `MC_ADDR_RESET-1; + +wire cond_branch = (cond_op_not_cached & ~is_op_cached) | // sum of all conditionals + (cond_a_zero & a_is_zero) | + (cond_a_neg & a_is_neg); + +assign busy = ((mem_read | mem_write) & ~mem_done) | ~alu_done; // busy signal for microcode sequencer + +// ------- handle interrupts --------- +`ifdef ENABLE_CPU_INTERRUPTS +always @(posedge clk) +begin + if(reset | on_interrupt) int_requested <= 0; + else if(interrupt & ~on_interrupt & ~int_requested) int_requested <= 1; // interrupt requested +end +`endif + +// ----- calculate next microcode address (next, decode, branch, specific opcode, etc.) ----- +always @(reset or mc_pc or mc_goto or opcode[7:4] or idim or + decode or branch or cond_branch or mc_entry or busy +`ifdef ENABLE_CPU_INTERRUPTS + or int_requested +`endif +) +begin + // default, next microcode instruction + next_mc_pc <= mc_pc + 1; + if(reset) next_mc_pc <= `MC_ADDR_RESET; + else if(~busy) + begin + // get next microcode instruction + if(branch | cond_branch) next_mc_pc <= mc_goto; + else if(decode) // decode: entry point of a new zpu opcode + begin +`ifdef ENABLE_CPU_INTERRUPTS + if(int_requested & ~idim) next_mc_pc <= `MC_ADDR_INTERRUPT; // microde to enter interrupt mode + else +`endif + if(opcode[7] == `OP_IM) next_mc_pc <= (idim ? `MC_ADDR_IM_IDIM : `MC_ADDR_IM_NOIDIM); + else if(opcode[7:5] == `OP_STORESP) next_mc_pc <= `MC_ADDR_STORESP; + else if(opcode[7:5] == `OP_LOADSP) next_mc_pc <= `MC_ADDR_LOADSP; + else if(opcode[7:4] == `OP_ADDSP) next_mc_pc <= `MC_ADDR_ADDSP; + else next_mc_pc <= mc_entry; // includes EMULATE opcodes + end + end + else next_mc_pc <= mc_pc; // in case of cpu stalled (busy=1) +end + +// set microcode program counter +always @(posedge clk) mc_pc <= next_mc_pc; + +// ----- microcode program ------ +zpu_core_rom microcode ( + .addr(next_mc_pc), + .data(mc_op), + .clk(clk) +); + +// -------------- ZPU debugger -------------------- +`ifdef ZPU_CORE_DEBUG +//synthesis translate_off +// ---- register operation dump ---- +always @(posedge clk) +begin + if(~reset) + begin + if(w_pc) $display("zpu_core: set PC=0x%h", alu.alu_r); +`ifdef ENABLE_PC_INCREMENT + if(w_pc_increment) $display("zpu_core: set PC=0x%h (PC+1)", pc); +`endif + if(w_sp) $display("zpu_core: set SP=0x%h", alu.alu_r); + if(w_a) $display("zpu_core: set A=0x%h", alu.alu_r); + if(w_a_mem) $display("zpu_core: set A=0x%h (from MEM)", mem_data_read_int); + if(w_b) $display("zpu_core: set B=0x%h", alu.alu_r); + if(w_op & ~is_op_cached) $display("zpu_core: set opcode_cache=0x%h, pc_cached=0x%h", alu.alu_r, {pc[31:2], 2'b0}); +`ifdef ENABLE_CPU_INTERRUPTS + if(~busy & mc_pc == `MC_ADDR_INTERRUPT) $display("zpu_core: ***** ENTERING INTERRUPT MICROCODE ******"); + if(~busy & exit_interrupt) $display("zpu_core: ***** INTERRUPT FLAG CLEARED *****"); + if(~busy & enter_interrupt) $display("zpu_core: ***** INTERRUPT FLAG SET *****"); +`endif + if(set_idim & ~idim) $display("zpu_core: IDIM=1"); + if(clear_idim & idim) $display("zpu_core: IDIM=0"); + +// ---- microcode debug ---- +`ifdef ZPU_CORE_DEBUG_MICROCODE + if(~busy) + begin + $display("zpu_core: mc_op[%d]=0b%b", mc_pc, mc_op); + if(branch) $display("zpu_core: microcode: branch=%d", mc_goto); + if(cond_branch) $display("zpu_core: microcode: CONDITION branch=%d", mc_goto); + if(decode) $display("zpu_core: decoding opcode=0x%h (0b%b) : branch to=%d ", opcode, opcode, mc_entry); + end + else $display("zpu_core: busy"); +`endif + +// ---- cpu abort in case of unaligned memory access --- +`ifdef ASSERT_NON_ALIGNMENT + /* unaligned word access (except PC) */ + if(sel_addr != `SEL_ADDR_PC & mem_addr[1:0] != 2'b00 & (mem_read | mem_write) & !byte_op & !halfw_op) + begin + $display("zpu_core: unaligned word operation at addr=0x%x", mem_addr); + $finish; + end + + /* unaligned halfword access */ + if(mem_addr[0] & (mem_read | mem_write) & !byte_op & halfw_op) + begin + $display("zpu_core: unaligned halfword operation at addr=0x%x", mem_addr); + $finish; + end +`endif + + end +end + +// ----- opcode dissasembler ------ +always @(posedge clk) +begin +if(~busy) +case(mc_pc) +0 : begin + $display("zpu_core: ------ breakpoint ------"); + $finish; + end +4 : $display("zpu_core: ------ shiftleft ------"); +8 : $display("zpu_core: ------ pushsp ------"); +12 : $display("zpu_core: ------ popint ------"); +16 : $display("zpu_core: ------ poppc ------"); +20 : $display("zpu_core: ------ add ------"); +24 : $display("zpu_core: ------ and ------"); +28 : $display("zpu_core: ------ or ------"); +32 : $display("zpu_core: ------ load ------"); +36 : $display("zpu_core: ------ not ------"); +40 : $display("zpu_core: ------ flip ------"); +44 : $display("zpu_core: ------ nop ------"); +48 : $display("zpu_core: ------ store ------"); +52 : $display("zpu_core: ------ popsp ------"); +56 : $display("zpu_core: ------ ipsum ------"); +60 : $display("zpu_core: ------ sncpy ------"); + +`MC_ADDR_IM_NOIDIM : $display("zpu_core: ------ im 0x%h (1st) ------", opcode[6:0] ); +`MC_ADDR_IM_IDIM : $display("zpu_core: ------ im 0x%h (cont) ------", opcode[6:0] ); +`MC_ADDR_STORESP : $display("zpu_core: ------ storesp 0x%h ------", { ~opcode[4], opcode[3:0], 2'b0 } ); +`MC_ADDR_LOADSP : $display("zpu_core: ------ loadsp 0x%h ------", { ~opcode[4], opcode[3:0], 2'b0 } ); +`MC_ADDR_ADDSP : $display("zpu_core: ------ addsp 0x%h ------", { ~opcode[4], opcode[3:0], 2'b0 } ); +`MC_ADDR_EMULATE : $display("zpu_core: ------ emulate 0x%h ------", b[2:0]); // opcode[5:0] ); + +128 : $display("zpu_core: ------ mcpy ------"); +132 : $display("zpu_core: ------ mset ------"); +136 : $display("zpu_core: ------ loadh ------"); +140 : $display("zpu_core: ------ storeh ------"); +144 : $display("zpu_core: ------ lessthan ------"); +148 : $display("zpu_core: ------ lessthanorequal ------"); +152 : $display("zpu_core: ------ ulessthan ------"); +156 : $display("zpu_core: ------ ulessthanorequal ------"); +160 : $display("zpu_core: ------ swap ------"); +164 : $display("zpu_core: ------ mult ------"); +168 : $display("zpu_core: ------ lshiftright ------"); +172 : $display("zpu_core: ------ ashiftleft ------"); +176 : $display("zpu_core: ------ ashiftright ------"); +180 : $display("zpu_core: ------ call ------"); +184 : $display("zpu_core: ------ eq ------"); +188 : $display("zpu_core: ------ neq ------"); +192 : $display("zpu_core: ------ neg ------"); +196 : $display("zpu_core: ------ sub ------"); +200 : $display("zpu_core: ------ xor ------"); +204 : $display("zpu_core: ------ loadb ------"); +208 : $display("zpu_core: ------ storeb ------"); +212 : $display("zpu_core: ------ div ------"); +216 : $display("zpu_core: ------ mod ------"); +220 : $display("zpu_core: ------ eqbranch ------"); +224 : $display("zpu_core: ------ neqbranch ------"); +228 : $display("zpu_core: ------ poppcrel ------"); +232 : $display("zpu_core: ------ config ------"); +236 : $display("zpu_core: ------ pushpc ------"); +240 : $display("zpu_core: ------ syscall_emulate ------"); +244 : $display("zpu_core: ------ pushspadd ------"); +248 : $display("zpu_core: ------ halfmult ------"); +252 : $display("zpu_core: ------ callpcrel ------"); +//default : $display("zpu_core: mc_pc=0x%h", decode_mcpc); +endcase +end +//synthesis translate_on +`endif +endmodule + +// --------- ZPU CORE ALU UNIT --------------- +module zpu_core_alu( + alu_a, // parameter A + alu_b, // parameter B + alu_r, // computed result + flag_idim, // for IMM alu op + alu_op, // ALU operation + clk, // clock for syncronous multicycle operations + done // done signal for alu operation +); + +input [31:0] alu_a; +input [31:0] alu_b; +input [`ALU_OP_WIDTH-1:0] alu_op; +input flag_idim; +output [31:0] alu_r; +input clk; +output done; + +wire [31:0] alu_a; +wire [31:0] alu_b; +wire [`ALU_OP_WIDTH-1:0] alu_op; +wire flag_idim; +reg [31:0] alu_r; +wire clk; +reg done; + +`ifdef ENABLE_MULT +// implement 32 bit pipeline multiplier +reg mul_running; +reg [2:0] mul_counter; +wire mul_done = (mul_counter == 3); +reg [31:0] mul_result, mul_tmp1; +reg [31:0] a_in, b_in; + +always@(posedge clk) +begin + a_in <= 0; + b_in <= 0; + mul_tmp1 <= 0; + mul_result <= 0; + mul_counter <= 0; + if(mul_running) + begin // infer pipeline multiplier + a_in <= alu_a; + b_in <= alu_b; + mul_tmp1 <= a_in * b_in; + mul_result <= mul_tmp1; + mul_counter <= mul_counter + 1; + end +end +`endif + +`ifdef ENABLE_DIV +// implement 32 bit divider +// Unsigned/Signed division based on Patterson and Hennessy's algorithm. +// Description: Calculates quotient. The "sign" input determines whether +// signs (two's complement) should be taken into consideration. +// references: http://www.ece.lsu.edu/ee3755/2002/l07.html +reg [63:0] qr; +wire [33:0] diff; +wire [31:0] quotient; +wire [31:0] dividend; +wire [31:0] divider; +reg [6:0] bit; +wire div_done; +reg div_running; +reg divide_sign; +reg negative_output; + +assign div_done = !bit; +assign diff = qr[63:31] - {1'b0, divider}; +assign quotient = (!negative_output) ? qr[31:0] : ~qr[31:0] + 1'b1; +assign dividend = (!divide_sign || !alu_a[31]) ? alu_a : ~alu_a + 1'b1; +assign divider = (!divide_sign || !alu_b[31]) ? alu_b : ~alu_b + 1'b1; + +always@(posedge clk) +begin + bit <= 7'b1_000000; // divider stopped + if(div_running) + begin + if(bit[6]) // divider started: initialize registers + begin + bit <= 7'd32; + qr <= { 32'd0, dividend }; + negative_output <= divide_sign && ((alu_b[31] && !alu_a[31]) || (!alu_b[31] && alu_a[31])); + end + else // step by step divide + begin + if( diff[32] ) qr <= { qr[62:0], 1'd0 }; + else qr <= { diff[31:0], qr[30:0], 1'd1 }; + bit <= bit - 1; + end + end +end +`endif + +`ifdef ENABLE_BARREL +// implement 32 bit barrel shift +// alu_b[6] == 1 ? left(only arithmetic) : right +// alu_b[5] == 1 ? logical : arithmetic +reg bs_running; +reg [31:0] bs_result; +reg [4:0] bs_counter; // 5 bits +wire bs_left = alu_b[6]; +wire bs_logical = alu_b[5]; +wire [4:0] bs_moves = alu_b[4:0]; +wire bs_done = (bs_counter == bs_moves); + +always @(posedge clk) +begin + bs_counter <= 0; + bs_result <= alu_a; + if(bs_running) + begin + if(bs_left) bs_result <= { bs_result[30:0], 1'b0 }; // shift left + else + begin + if(bs_logical) bs_result <= { 1'b0, bs_result[31:1] }; // shift logical right + else bs_result <= { bs_result[31], bs_result[31], bs_result[30:1] };// shift arithmetic right + end + bs_counter <= bs_counter + 1; + end +end +`endif + +// ----- alu add/sub ----- +reg [31:0] alu_b_tmp; +always @(alu_b or alu_op) +begin + alu_b_tmp <= alu_b; // by default, ALU_B as is + if(alu_op == `ALU_PLUS_OFFSET) alu_b_tmp <= { {25{1'b0}}, ~alu_b[4], alu_b[3:0], 2'b0 }; // ALU_B is an offset if ALU_PLUS_OFFSET operation +end + +reg [31:0] alu_r_addsub; // compute R=A+B or A-B based on opcode (ALU_PLUSxx / ALU_SUB-CMP) +always @(alu_a or alu_b_tmp or alu_op) +begin +`ifdef ENABLE_CMP + if(alu_op == `ALU_CMP_SIGNED || alu_op == `ALU_CMP_UNSIGNED) // in case of sub or cmp --> operation is '-' + begin + alu_r_addsub <= alu_a - alu_b_tmp; + end + else +`endif + begin + alu_r_addsub <= alu_a + alu_b_tmp; // by default '+' operation + end +end + +`ifdef ENABLE_CMP +// handle overflow/underflow exceptions in ALU_CMP_SIGNED +reg cmp_exception; +always @(alu_a[31] or alu_b[31] or alu_r_addsub[31]) +begin + cmp_exception <= 0; + if( (alu_a[31] == 0 && alu_b[31] == 1 && alu_r_addsub[31] == 1) || + (alu_a[31] == 1 && alu_b[31] == 0 && alu_r_addsub[31] == 0) ) cmp_exception <= 1; +end +`endif + +// ----- alu operation selection ----- +always @(alu_a or alu_b or alu_op or flag_idim or alu_r_addsub +`ifdef ENABLE_CMP + or cmp_exception +`endif +`ifdef ENABLE_MULT + or mul_done or mul_result +`endif +`ifdef ENABLE_BARREL + or bs_done or bs_result +`endif +`ifdef ENABLE_DIV + or div_done or div_result +`endif +) +begin + done <= 1; // default alu operations are 1 cycle +`ifdef ENABLE_MULT + mul_running <= 0; +`endif +`ifdef ENABLE_BARREL + bs_running <= 0; +`endif +`ifdef ENABLE_DIV + div_running <= 0; +`endif + alu_r <= alu_r_addsub; // ALU_PLUS, ALU_PLUS_OFFSET, ALU_SUB and part of ALU_CMP + case(alu_op) + `ALU_NOP : alu_r <= alu_a; + `ALU_NOP_B : alu_r <= alu_b; + `ALU_AND : alu_r <= alu_a & alu_b; + `ALU_OR : alu_r <= alu_a | alu_b; + `ALU_NOT : alu_r <= ~alu_a; + `ALU_FLIP : alu_r <= { alu_a[0], alu_a[1], alu_a[2], alu_a[3], alu_a[4], alu_a[5], alu_a[6], alu_a[7], + alu_a[8],alu_a[9],alu_a[10],alu_a[11],alu_a[12],alu_a[13],alu_a[14],alu_a[15], + alu_a[16],alu_a[17],alu_a[18],alu_a[19],alu_a[20],alu_a[21],alu_a[22],alu_a[23], + alu_a[24],alu_a[25],alu_a[26],alu_a[27],alu_a[28],alu_a[29],alu_a[30],alu_a[31] }; + `ALU_IM : if(flag_idim) alu_r <= { alu_a[24:0], alu_b[6:0] }; + else alu_r <= { {25{alu_b[6]}}, alu_b[6:0] }; +`ifdef ENABLE_CMP + `ALU_CMP_UNSIGNED:if( (alu_a[31] == alu_b[31] && cmp_exception) || + (alu_a[31] != alu_b[31] && ~cmp_exception) ) + begin + alu_r[31] <= ~alu_r_addsub[31]; + end + `ALU_CMP_SIGNED : if(cmp_exception) + begin + alu_r[31] <= ~alu_r_addsub[31]; + end +`endif +`ifdef ENABLE_XOR + `ALU_XOR : alu_r <= alu_a ^ alu_b; +`endif +`ifdef ENABLE_A_SHIFT + `ALU_A_SHIFT_RIGHT: alu_r <= { alu_a[31], alu_a[31], alu_a[30:1] }; // arithmetic shift left +`endif +`ifdef ENABLE_MULT + `ALU_MULT : begin + mul_running <= ~mul_done; + done <= mul_done; + alu_r <= mul_result; + end +`endif +`ifdef ENABLE_BARREL + `ALU_BARREL : begin + bs_running <= ~bs_done; + done <= bs_done; + alu_r <= bs_result; + end +`endif +`ifdef ENABLE_DIV + `ALU_DIV : begin + div_running<= ~div_done; + done <= div_done; + alu_r <= quotient; + end + `ALU_MOD : begin + div_running<= ~div_done; + done <= div_done; + alu_r <= qr[31:0]; + end +`endif + endcase +end + +endmodule |