summaryrefslogtreecommitdiffstats
path: root/zpu/hdl/avalanche/core/zpu_core.v
blob: e704fbc8a30ee92b6f75909ffacbaa4d0e75c562 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
`timescale 1ns / 1ps
`include "zpu_core_defines.v"

/*      MODULE: zpu_core
        DESCRIPTION: Contains ZPU cpu
		AUTHOR: Antonio J. Anton (aj <at> anro-ingenieros.com)

REVISION HISTORY:
Revision 1.0, 14/09/2009
Initial public release

COPYRIGHT:
Copyright (c) 2009 Antonio J. Anton

Permission is hereby granted, free of charge, to any person obtaining a copy of
this software and associated documentation files (the "Software"), to deal in
the Software without restriction, including without limitation the rights to
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
of the Software, and to permit persons to whom the Software is furnished to do
so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.*/

// --------- MICROPROGRAMMED ZPU CORE ---------------
// all signals are polled on clk rising edge
// all signals positive

module zpu_core (
`ifdef ENABLE_CPU_INTERRUPTS
	interrupt,		// interrupt request
`endif	
	clk,			// clock on rising edge
	reset,			// reset on rising edge
	mem_read,		// request memory read
	mem_write,		// request memory write
	mem_done,		// memory operation completed
	mem_addr,		// memory address
	mem_data_read,	// data readed
	mem_data_write,	// data written
	byte_select		// byte select on memory operation
);

input			clk;
input			reset;
output			mem_read;
output			mem_write;
input			mem_done;
input  [31:0]	mem_data_read;
output [31:0] 	mem_data_write;
output [31:0]	mem_addr;
output [3:0]	byte_select;
`ifdef ENABLE_CPU_INTERRUPTS
input			interrupt;
`endif

wire 		clk;
wire		reset;
wire		mem_read;
wire		mem_write;
wire		mem_done;
wire [31:0]	mem_data_read;
wire [31:0] mem_data_write;
wire [31:0]	mem_addr;
`ifdef ENABLE_CPU_INTERRUPTS
wire		interrupt;
`endif

`ifdef ENABLE_BYTE_SELECT
// ------ unaligned byte/halfword memory operations -----
/// TODO: think rewriting into microcode or in a less resource wasting way

reg  [3:0]	byte_select;
wire		byte_op;
wire		halfw_op;

reg  [31:0]	mem_data_read_int;	// aligned data from memory
reg  [31:0] mem_data_write_out;	// write data already aligned
wire [31:0] mem_data_write_int;	// write data from cpu to be aligned

// --- byte select logic ---
always @(mem_addr[1:0] or byte_op or halfw_op)
begin
  casez( { mem_addr[1:0], byte_op, halfw_op } )
    4'b00_1_? : byte_select <= 4'b0001;		// byte select
    4'b01_1_? : byte_select <= 4'b0010;
    4'b10_1_? : byte_select <= 4'b0100;
    4'b11_1_? : byte_select <= 4'b1000;
    4'b0?_0_1 : byte_select <= 4'b0011;		// half word select
    4'b1?_0_1 : byte_select <= 4'b1100;
    default   : byte_select <= 4'b1111;		// word select
  endcase
end

// --- input data to cpu ---
always @(mem_data_read or mem_addr[1:0] or byte_op or halfw_op)
begin
  casez( { mem_addr[1:0], byte_op, halfw_op } )
   4'b00_1_? : mem_data_read_int <= { 24'b0, mem_data_read[7:0] };	// 8 bit read
   4'b01_1_? : mem_data_read_int <= { 24'b0, mem_data_read[15:8] };
   4'b10_1_? : mem_data_read_int <= { 24'b0, mem_data_read[23:16] };
   4'b11_1_? : mem_data_read_int <= { 24'b0, mem_data_read[31:24] };
   4'b0?_0_1 : mem_data_read_int <= { 16'b0, mem_data_read[7:0], mem_data_read[15:8] };	// 16 bit read
   4'b1?_0_1 : mem_data_read_int <= { 16'b0, mem_data_read[23:16], mem_data_read[31:24] };
   default   : mem_data_read_int <= { mem_data_read[7:0], mem_data_read[15:8], mem_data_read[23:16], mem_data_read[31:24] };	// 32 bit access (default)
  endcase
end

// --- output data from cpu ---
assign mem_data_write = mem_data_write_out;

always @(mem_data_write_int or mem_addr[1:0] or byte_op or halfw_op)
begin
  casez( {mem_addr[1:0], byte_op, halfw_op } )
    4'b00_1_? : mem_data_write_out <= { 24'bX, mem_data_write_int[7:0] };		// 8 bit write
    4'b01_1_? : mem_data_write_out <= { 16'bX, mem_data_write_int[7:0], 8'bX };
    4'b10_1_? : mem_data_write_out <= { 8'bX, mem_data_write_int[7:0], 16'bX };
    4'b11_1_? : mem_data_write_out <= { mem_data_write_int[7:0], 24'bX };
    4'b0?_0_1 : mem_data_write_out <= { 16'bX, mem_data_write_int[7:0], mem_data_write_int[15:8] };		// 16 bit write
    4'b1?_0_1 : mem_data_write_out <= { mem_data_write_int[7:0], mem_data_write_int[15:8], 16'bX };
    default   : mem_data_write_out <= { mem_data_write_int[7:0], mem_data_write_int[15:8], mem_data_write_int[23:16], mem_data_write_int[31:24] };
  endcase
end
`else
// -------- only 32 bit memory access --------
wire [3:0]	byte_select = 4'b1111;			// all memory operations are 32 bit wide
wire [31:0] mem_data_read_int;				// no byte/halfword memory access by HW
wire [31:0] mem_data_write_int;				// byte and halfword memory access must be emulated

// ----- reorder bytes due to MSB-LSB configuration -----
assign mem_data_read_int = { mem_data_read[7:0], mem_data_read[15:8], mem_data_read[23:16], mem_data_read[31:24] };
assign mem_data_write = { mem_data_write_int[7:0], mem_data_write_int[15:8], mem_data_write_int[23:16], mem_data_write_int[31:24] };
`endif

// ------ datapath registers and connections -----------
reg  [31:0]	pc;				// program counter (byte align)
reg  [31:0] sp;				// stack counter (word align)
reg  [31:0]	a;				// operand (address_out, data_out, alu_in)
reg  [31:0]	b;				// operand (address_out)
reg			idim;			// im opcode being processed
reg   [7:0]	opcode;			// opcode being processed
reg  [31:2]	pc_cached;		// cached PC
reg  [31:0]	opcode_cache;	// cached opcodes (current word)
`ifdef ENABLE_CPU_INTERRUPTS
  reg		int_requested;	// interrupt has been requested
  reg		on_interrupt;	// serving interrupt
  wire		exit_interrupt;	// microcode says this is poppc_interrupt
  wire		enter_interrupt; // microcode says we are entering interrupt
`endif
wire  [1:0]	sel_opcode = pc[1:0]; 	// which opcode is selected
wire  		sel_read;		// mux for data-in
wire  [1:0]	sel_alu;		// mux for alu
wire  [1:0]	sel_addr;		// mux for addr
wire		w_pc;			// write PC
`ifdef ENABLE_PC_INCREMENT
  wire		w_pc_increment;	// write PC+1
`endif
wire		w_sp;			// write SP
wire		w_a;			// write A (from ALU result)
wire		w_a_mem;		// write A (from MEM read)
wire		w_b;			// write B
wire		w_op;			// write OPCODE (opcode cache)
wire		set_idim;		// set IDIM
wire		clear_idim;		// clear IDIM
wire		is_op_cached = (pc[31:2] == pc_cached) ? 1'b1 : 1'b0;	// is opcode available?
wire		a_is_zero;		// A == 0
wire		a_is_neg;		// A[31] == 1
wire		busy;			// busy signal to microcode sequencer (stalls cpu)

reg [`MC_MEM_BITS-1:0] 	mc_pc;		// microcode PC
initial mc_pc <= `MC_ADDR_RESET-1;
wire    [`MC_BITS-1:0]	mc_op;		// current microcode operation

// memory addr / write ports
assign mem_addr = (sel_addr == `SEL_ADDR_SP) ? sp :
			      (sel_addr == `SEL_ADDR_A)  ? a  : 
			   	  (sel_addr == `SEL_ADDR_B)  ? b  : pc;
assign mem_data_write_int = a;			// only A can be written to memory

// ------- alu instantiation -------
wire [31:0]					alu_a;
wire [31:0]					alu_b;
wire [31:0]					alu_r;
wire [`ALU_OP_WIDTH-1:0]	alu_op;
wire						alu_done;

// alu inputs multiplexors
// constant in microcode is sign extended (in order to implement substractions like adds)
assign alu_a = 	(sel_read == `SEL_READ_DATA)   ? mem_data_read_int : mem_addr;
assign alu_b = 	(sel_alu == `SEL_ALU_MC_CONST) ? { {25{mc_op[`P_ADDR+6]}} , mc_op[`P_ADDR+6:`P_ADDR] } :	// most priority
				(sel_alu == `SEL_ALU_A)		   ? a :
				(sel_alu == `SEL_ALU_B)	       ? b : { {24{1'b0}} , opcode };	// `SEL_ALU_OPCODE is less priority

zpu_core_alu alu(
	.alu_a(alu_a),
	.alu_b(alu_b),
	.alu_r(alu_r),
	.alu_op(alu_op),
	.flag_idim(idim),
	.clk(clk),
	.done(alu_done)
);

// -------- pc : program counter --------
always @(posedge clk)
begin
  if(w_pc)  pc <= alu_r;
`ifdef ENABLE_PC_INCREMENT		// microcode optimization
  else if(w_pc_increment) pc <= pc + 1;  // usually pc=pc+1
`endif
end

// -------- sp : stack pointer --------
always @(posedge clk)
begin
  if(w_sp) sp <= alu_r;
end

// -------- a : acumulator register ---------
always @(posedge clk)
begin
  if(w_a) 		   a <= alu_r;
  else if(w_a_mem) a <= mem_data_read_int;
end

// alu results over a register instead of alu result
// in order to improve speed
assign a_is_zero = (a == 0);
assign a_is_neg  = a[31];

// -------- b : auxiliary register ---------
always @(posedge clk)
begin
  if(w_b)	b <= alu_r;
end

// -------- opcode and opcode_cache  --------
always @(posedge clk)
begin
  if(w_op)
  begin
    opcode_cache <= alu_r;		// store all opcodes in the word
    pc_cached <= pc[31:2];		// store PC address of cached opcodes
  end
end

// -------- opcode : based on pc[1:0] ---------
always @(sel_opcode or opcode_cache)	// select current opcode from 
begin					// the cached opcode word
    case(sel_opcode)
	0 : opcode <= opcode_cache[31:24];
	1 : opcode <= opcode_cache[23:16];
	2 : opcode <= opcode_cache[15:8];
	3 : opcode <= opcode_cache[7:0];
    endcase
end

// ------- idim : immediate opcode handling  ----------
always @(posedge clk)
begin
  if(set_idim)   	  idim <= 1'b1;
  else if(clear_idim) idim <= 1'b0;
end

`ifdef ENABLE_CPU_INTERRUPTS
// ------ on interrupt status bit -----
always @(posedge clk)
begin
  if(reset | exit_interrupt) on_interrupt <= 1'b0;
  else if(enter_interrupt)	 on_interrupt <= 1'b1;
end
`endif

// ------ microcode execution unit --------
assign sel_read  = mc_op[`P_SEL_READ];	// map datapath signals with microcode program bits
assign sel_alu   = mc_op[`P_SEL_ALU+1:`P_SEL_ALU];
assign sel_addr  = mc_op[`P_SEL_ADDR+1:`P_SEL_ADDR];
assign alu_op    = mc_op[`P_ALU+3:`P_ALU];
assign w_sp      = mc_op[`P_W_SP] & ~busy;
assign w_pc      = mc_op[`P_W_PC] & ~busy;
assign w_a       = mc_op[`P_W_A] & ~busy;
assign w_a_mem   = mc_op[`P_W_A_MEM] & ~busy;
assign w_b	 	 = mc_op[`P_W_B] & ~busy;
assign w_op      = mc_op[`P_W_OPCODE] & ~busy;
assign mem_read  = mc_op[`P_MEM_R];
assign mem_write = mc_op[`P_MEM_W];
assign set_idim  = mc_op[`P_SET_IDIM] & ~busy;
assign clear_idim= mc_op[`P_CLEAR_IDIM] & ~busy;
`ifdef ENABLE_BYTE_SELECT
assign byte_op	 = mc_op[`P_BYTE];
assign halfw_op  = mc_op[`P_HALFWORD];
`endif
`ifdef ENABLE_PC_INCREMENT
  assign w_pc_increment = mc_op[`P_PC_INCREMENT] & ~busy;
`endif
`ifdef ENABLE_CPU_INTERRUPTS
  assign exit_interrupt  = mc_op[`P_EXIT_INT]  & ~busy;
  assign enter_interrupt = mc_op[`P_ENTER_INT] & ~busy;
`endif

wire   cond_op_not_cached = mc_op[`P_OP_NOT_CACHED];	// conditional: true if opcode not cached
wire   cond_a_zero 	  	  = mc_op[`P_A_ZERO];			// conditional: true if A is zero
wire   cond_a_neg 	  	  = mc_op[`P_A_NEG];			// conditional: true if A is negative
wire   decode 		  	  = mc_op[`P_DECODE];			// decode means jumps to apropiate microcode based on zpu opcode
wire   branch 		  	  = mc_op[`P_BRANCH];			// unconditional jump inside microcode

wire [`MC_MEM_BITS-1:0]	mc_goto  = { mc_op[`P_ADDR+6:`P_ADDR], 2'b00 };	// microcode goto (goto = high 7 bits)
wire [`MC_MEM_BITS-1:0] mc_entry = { opcode[6:0], 2'b00 };				// microcode entry point for opcode
reg  [`MC_MEM_BITS-1:0] next_mc_pc;										// next microcode operation to be executed
initial next_mc_pc <= `MC_ADDR_RESET-1;

wire cond_branch = (cond_op_not_cached & ~is_op_cached) |		// sum of all conditionals
				   (cond_a_zero & a_is_zero) |
				   (cond_a_neg & a_is_neg);

assign busy = ((mem_read | mem_write) & ~mem_done) | ~alu_done;	// busy signal for microcode sequencer

// ------- handle interrupts ---------
`ifdef ENABLE_CPU_INTERRUPTS
always @(posedge clk)
begin
  if(reset | on_interrupt) int_requested <= 0;
  else if(interrupt & ~on_interrupt & ~int_requested) int_requested <= 1;	// interrupt requested
end
`endif

// ----- calculate next microcode address (next, decode, branch, specific opcode, etc.) -----
always @(reset or mc_pc or mc_goto or opcode[7:4] or idim or 
	     decode or branch or cond_branch or mc_entry or busy
`ifdef ENABLE_CPU_INTERRUPTS
	     or int_requested
`endif
)
begin
  // default, next microcode instruction
  next_mc_pc  <= mc_pc + 1;
  if(reset)								  next_mc_pc <= `MC_ADDR_RESET;
  else if(~busy)
  begin
    // get next microcode instruction
    if(branch | cond_branch) 			  next_mc_pc <= mc_goto;
    else if(decode)						  // decode: entry point of a new zpu opcode
    begin
`ifdef ENABLE_CPU_INTERRUPTS
      if(int_requested & ~idim)			  next_mc_pc <= `MC_ADDR_INTERRUPT;	// microde to enter interrupt mode
      else
`endif
      if(opcode[7]        == `OP_IM) 	  next_mc_pc <= (idim ? `MC_ADDR_IM_IDIM : `MC_ADDR_IM_NOIDIM);
      else if(opcode[7:5] == `OP_STORESP) next_mc_pc <= `MC_ADDR_STORESP;
      else if(opcode[7:5] == `OP_LOADSP)  next_mc_pc <= `MC_ADDR_LOADSP;
      else if(opcode[7:4] == `OP_ADDSP)   next_mc_pc <= `MC_ADDR_ADDSP;
      else				  				  next_mc_pc <= mc_entry;	// includes EMULATE opcodes
    end
  end
  else next_mc_pc <= mc_pc;		// in case of cpu stalled (busy=1)
end

// set microcode program counter
always @(posedge clk) mc_pc <= next_mc_pc;

// ----- microcode program ------
zpu_core_rom microcode (
	.addr(next_mc_pc),
	.data(mc_op),
	.clk(clk)
);

// -------------- ZPU debugger --------------------
`ifdef ZPU_CORE_DEBUG
//synthesis translate_off
// ---- register operation dump ----
always @(posedge clk)
begin
  if(~reset)
  begin
    if(w_pc) $display("zpu_core: set PC=0x%h", alu.alu_r);
`ifdef ENABLE_PC_INCREMENT
    if(w_pc_increment) $display("zpu_core: set PC=0x%h (PC+1)", pc);
`endif
    if(w_sp) $display("zpu_core: set SP=0x%h", alu.alu_r);
    if(w_a) $display("zpu_core: set A=0x%h", alu.alu_r);
    if(w_a_mem) $display("zpu_core: set A=0x%h (from MEM)", mem_data_read_int);
    if(w_b)  $display("zpu_core: set B=0x%h", alu.alu_r);
    if(w_op & ~is_op_cached) $display("zpu_core: set opcode_cache=0x%h, pc_cached=0x%h", alu.alu_r, {pc[31:2], 2'b0});
`ifdef ENABLE_CPU_INTERRUPTS
    if(~busy & mc_pc == `MC_ADDR_INTERRUPT) $display("zpu_core: ***** ENTERING INTERRUPT MICROCODE ******"); 
    if(~busy & exit_interrupt)  $display("zpu_core: ***** INTERRUPT FLAG CLEARED *****");
    if(~busy & enter_interrupt) $display("zpu_core: ***** INTERRUPT FLAG SET *****");    
`endif
    if(set_idim & ~idim) $display("zpu_core: IDIM=1");
    if(clear_idim & idim) $display("zpu_core: IDIM=0");

// ---- microcode debug ----
`ifdef ZPU_CORE_DEBUG_MICROCODE
    if(~busy)
    begin
      $display("zpu_core: mc_op[%d]=0b%b", mc_pc, mc_op);
      if(branch)      $display("zpu_core: microcode: branch=%d", mc_goto);
      if(cond_branch) $display("zpu_core: microcode: CONDITION branch=%d", mc_goto);
      if(decode)      $display("zpu_core: decoding opcode=0x%h (0b%b) : branch to=%d ", opcode, opcode, mc_entry);
    end
    else $display("zpu_core: busy");
`endif

// ---- cpu abort in case of unaligned memory access ---
`ifdef ASSERT_NON_ALIGNMENT
  /* unaligned word access (except PC) */
  if(sel_addr != `SEL_ADDR_PC & mem_addr[1:0] != 2'b00 & (mem_read | mem_write) & !byte_op & !halfw_op)
  begin
    $display("zpu_core: unaligned word operation at addr=0x%x", mem_addr);
    $finish;
  end
  
  /* unaligned halfword access */
  if(mem_addr[0] & (mem_read | mem_write) & !byte_op & halfw_op)
  begin
    $display("zpu_core: unaligned halfword operation at addr=0x%x", mem_addr);
    $finish;
  end
`endif

  end
end

// ----- opcode dissasembler ------
always @(posedge clk)
begin
if(~busy)
case(mc_pc)
0 : begin
	 $display("zpu_core: ------  breakpoint ------");
	 $finish;
	end
4 : $display("zpu_core: ------  shiftleft ------");
8 : $display("zpu_core: ------  pushsp ------");
12 : $display("zpu_core: ------  popint ------");
16 : $display("zpu_core: ------  poppc ------");
20 : $display("zpu_core: ------  add ------");
24 : $display("zpu_core: ------  and ------");
28 : $display("zpu_core: ------  or ------");
32 : $display("zpu_core: ------  load ------");
36 : $display("zpu_core: ------  not ------");
40 : $display("zpu_core: ------  flip ------");
44 : $display("zpu_core: ------  nop ------");
48 : $display("zpu_core: ------  store ------");
52 : $display("zpu_core: ------  popsp ------");
56 : $display("zpu_core: ------  ipsum ------");
60 : $display("zpu_core: ------  sncpy ------");

`MC_ADDR_IM_NOIDIM : $display("zpu_core: ------  im 0x%h (1st) ------", opcode[6:0] );
`MC_ADDR_IM_IDIM   : $display("zpu_core: ------  im 0x%h (cont) ------", opcode[6:0] );
`MC_ADDR_STORESP   : $display("zpu_core: ------  storesp 0x%h ------", { ~opcode[4], opcode[3:0], 2'b0 } );
`MC_ADDR_LOADSP    : $display("zpu_core: ------  loadsp 0x%h ------", { ~opcode[4], opcode[3:0], 2'b0 } );
`MC_ADDR_ADDSP     : $display("zpu_core: ------  addsp 0x%h ------", { ~opcode[4], opcode[3:0], 2'b0 } );
`MC_ADDR_EMULATE   : $display("zpu_core: ------  emulate 0x%h ------", b[2:0]); // opcode[5:0] );

128 : $display("zpu_core: ------  mcpy ------");
132 : $display("zpu_core: ------  mset ------");
136 : $display("zpu_core: ------  loadh ------");
140 : $display("zpu_core: ------  storeh ------");
144 : $display("zpu_core: ------  lessthan ------");
148 : $display("zpu_core: ------  lessthanorequal ------");
152 : $display("zpu_core: ------  ulessthan ------");
156 : $display("zpu_core: ------  ulessthanorequal ------");
160 : $display("zpu_core: ------  swap ------");
164 : $display("zpu_core: ------  mult ------");
168 : $display("zpu_core: ------  lshiftright ------");
172 : $display("zpu_core: ------  ashiftleft ------");
176 : $display("zpu_core: ------  ashiftright ------");
180 : $display("zpu_core: ------  call ------");
184 : $display("zpu_core: ------  eq ------");
188 : $display("zpu_core: ------  neq ------");
192 : $display("zpu_core: ------  neg ------");
196 : $display("zpu_core: ------  sub ------");
200 : $display("zpu_core: ------  xor ------");
204 : $display("zpu_core: ------  loadb ------");
208 : $display("zpu_core: ------  storeb ------");
212 : $display("zpu_core: ------  div ------");
216 : $display("zpu_core: ------  mod ------");
220 : $display("zpu_core: ------  eqbranch ------");
224 : $display("zpu_core: ------  neqbranch ------");
228 : $display("zpu_core: ------  poppcrel ------");
232 : $display("zpu_core: ------  config ------");
236 : $display("zpu_core: ------  pushpc ------");
240 : $display("zpu_core: ------  syscall_emulate ------");
244 : $display("zpu_core: ------  pushspadd ------");
248 : $display("zpu_core: ------  halfmult ------");
252 : $display("zpu_core: ------  callpcrel ------");
//default : $display("zpu_core: mc_pc=0x%h", decode_mcpc);
endcase
end
//synthesis translate_on
`endif
endmodule

// --------- ZPU CORE ALU UNIT ---------------
module zpu_core_alu(
	alu_a,			// parameter A
	alu_b,			// parameter B
	alu_r,			// computed result
	flag_idim,		// for IMM alu op
	alu_op,			// ALU operation
	clk,			// clock for syncronous multicycle operations
	done			// done signal for alu operation
);

input [31:0]				alu_a;
input [31:0]				alu_b;
input [`ALU_OP_WIDTH-1:0]	alu_op;
input						flag_idim;
output [31:0]				alu_r;
input						clk;
output						done;

wire [31:0]					alu_a;
wire [31:0]					alu_b;
wire [`ALU_OP_WIDTH-1:0]	alu_op;
wire						flag_idim;
reg  [31:0]					alu_r;
wire						clk;
reg							done;

`ifdef ENABLE_MULT
// implement 32 bit pipeline multiplier
reg			mul_running;
reg	[2:0]	mul_counter;
wire		mul_done = (mul_counter == 3);
reg	[31:0]	mul_result, mul_tmp1;
reg	[31:0]	a_in, b_in;

always@(posedge clk)
begin
  a_in		  <= 0;
  b_in		  <= 0;
  mul_tmp1	  <= 0;
  mul_result  <= 0;
  mul_counter <= 0;
  if(mul_running)
  begin	// infer pipeline multiplier
    a_in		<= alu_a;
    b_in		<= alu_b;
    mul_tmp1	<= a_in * b_in;
    mul_result	<= mul_tmp1;
    mul_counter <= mul_counter + 1;
  end
end
`endif

`ifdef ENABLE_DIV
// implement 32 bit divider
// Unsigned/Signed division based on Patterson and Hennessy's algorithm.
// Description: Calculates quotient.  The "sign" input determines whether
// signs (two's complement) should be taken into consideration.
// references: http://www.ece.lsu.edu/ee3755/2002/l07.html
reg  [63:0]	  qr;
wire [33:0]	  diff;
wire [31:0]   quotient;
wire [31:0]   dividend;
wire [31:0]   divider; 	
reg  [6:0]    bit;
wire          div_done;
reg			  div_running;
reg			  divide_sign;
reg			  negative_output;

assign div_done = !bit;
assign diff = qr[63:31] - {1'b0, divider};
assign quotient  = (!negative_output) ? qr[31:0] : ~qr[31:0] + 1'b1;
assign dividend  = (!divide_sign || !alu_a[31]) ? alu_a : ~alu_a + 1'b1;
assign divider   = (!divide_sign || !alu_b[31]) ? alu_b : ~alu_b + 1'b1;
   
always@(posedge clk)
begin
	bit <= 7'b1_000000;				// divider stopped
	if(div_running)
	begin
	  if(bit[6])					// divider started: initialize registers
	  begin
		  bit             <= 7'd32;
		  qr              <= { 32'd0, dividend };
          negative_output <= divide_sign && ((alu_b[31] && !alu_a[31]) || (!alu_b[31] && alu_a[31]));
      end
      else							// step by step divide
	  begin
        if( diff[32] ) 	qr <= { qr[62:0], 1'd0 };
        else 			qr <= { diff[31:0], qr[30:0], 1'd1 };
        bit <= bit - 1;
      end
   end
end
`endif

`ifdef ENABLE_BARREL
// implement 32 bit barrel shift
// alu_b[6] == 1 ? left(only arithmetic) : right
// alu_b[5] == 1 ? logical : arithmetic
reg			  bs_running;
reg [31:0]	  bs_result;
reg  [4:0]	  bs_counter;				// 5 bits
wire		  bs_left 	 = alu_b[6];
wire		  bs_logical = alu_b[5];
wire [4:0]	  bs_moves 	 = alu_b[4:0];
wire		  bs_done	 = (bs_counter == bs_moves);

always @(posedge clk)
begin
  bs_counter <= 0;
  bs_result  <= alu_a;
  if(bs_running)
  begin
	if(bs_left) 	 bs_result <= { bs_result[30:0], 1'b0 };						// shift left
	else
	begin
	  if(bs_logical) bs_result <= { 1'b0, bs_result[31:1] };						// shift logical right
	  else			 bs_result <= { bs_result[31], bs_result[31], bs_result[30:1] };// shift arithmetic right
	end	  
	bs_counter <= bs_counter + 1;
  end
end
`endif

// ----- alu add/sub  -----
reg [31:0] alu_b_tmp;
always @(alu_b or alu_op)
begin
  alu_b_tmp <= alu_b;	// by default, ALU_B as is
  if(alu_op == `ALU_PLUS_OFFSET) alu_b_tmp <= { {25{1'b0}}, ~alu_b[4], alu_b[3:0], 2'b0 };	// ALU_B is an offset if ALU_PLUS_OFFSET operation
end

reg [31:0] alu_r_addsub;	// compute R=A+B or A-B based on opcode (ALU_PLUSxx / ALU_SUB-CMP)
always @(alu_a or alu_b_tmp or alu_op)
begin
`ifdef ENABLE_CMP  
  if(alu_op == `ALU_CMP_SIGNED || alu_op == `ALU_CMP_UNSIGNED)	// in case of sub or cmp --> operation is '-'
  begin
    alu_r_addsub <= alu_a - alu_b_tmp;
  end
  else
`endif
  begin
    alu_r_addsub <= alu_a + alu_b_tmp;	// by default '+' operation
  end
end

`ifdef ENABLE_CMP
// handle overflow/underflow exceptions in ALU_CMP_SIGNED
reg cmp_exception;
always @(alu_a[31] or alu_b[31] or alu_r_addsub[31])
begin
  cmp_exception <= 0;
  if( (alu_a[31] == 0 && alu_b[31] == 1 && alu_r_addsub[31] == 1) ||
	  (alu_a[31] == 1 && alu_b[31] == 0 && alu_r_addsub[31] == 0) ) cmp_exception <= 1;
end
`endif

// ----- alu operation selection -----
always @(alu_a or alu_b or alu_op or flag_idim or alu_r_addsub
`ifdef ENABLE_CMP
		or cmp_exception
`endif
`ifdef ENABLE_MULT
		or mul_done or mul_result
`endif
`ifdef ENABLE_BARREL
		or bs_done or bs_result
`endif
`ifdef ENABLE_DIV
		or div_done or div_result
`endif
)
begin
  done <= 1;		// default alu operations are 1 cycle
`ifdef ENABLE_MULT
  mul_running <= 0;
`endif
`ifdef ENABLE_BARREL
  bs_running <= 0;
`endif
`ifdef ENABLE_DIV
  div_running <= 0;
`endif  
  alu_r <= alu_r_addsub;	// ALU_PLUS, ALU_PLUS_OFFSET, ALU_SUB and part of ALU_CMP
  case(alu_op)
    `ALU_NOP		: alu_r <= alu_a;
    `ALU_NOP_B		: alu_r <= alu_b;   
    `ALU_AND		: alu_r <= alu_a & alu_b;
    `ALU_OR			: alu_r <= alu_a | alu_b;
    `ALU_NOT		: alu_r <= ~alu_a;
    `ALU_FLIP		: alu_r <= { alu_a[0], alu_a[1], alu_a[2], alu_a[3], alu_a[4], alu_a[5], alu_a[6], alu_a[7], 
				     	alu_a[8],alu_a[9],alu_a[10],alu_a[11],alu_a[12],alu_a[13],alu_a[14],alu_a[15],
				     	alu_a[16],alu_a[17],alu_a[18],alu_a[19],alu_a[20],alu_a[21],alu_a[22],alu_a[23],
				     	alu_a[24],alu_a[25],alu_a[26],alu_a[27],alu_a[28],alu_a[29],alu_a[30],alu_a[31] };
    `ALU_IM			: if(flag_idim) alu_r <= { alu_a[24:0], alu_b[6:0] };
			  		  else 		    alu_r <= { {25{alu_b[6]}}, alu_b[6:0] };
`ifdef ENABLE_CMP
	`ALU_CMP_UNSIGNED:if( (alu_a[31] == alu_b[31] && cmp_exception) || 
						  (alu_a[31] != alu_b[31] && ~cmp_exception) )
				      begin
				        alu_r[31] <= ~alu_r_addsub[31];
				      end
	`ALU_CMP_SIGNED	: if(cmp_exception)
				      begin
				      	alu_r[31] <= ~alu_r_addsub[31];
				      end
`endif
`ifdef ENABLE_XOR
	`ALU_XOR		: alu_r <= alu_a ^ alu_b;
`endif
`ifdef ENABLE_A_SHIFT
	`ALU_A_SHIFT_RIGHT: alu_r <= { alu_a[31], alu_a[31], alu_a[30:1] };	// arithmetic shift left
`endif
`ifdef ENABLE_MULT
	`ALU_MULT 		: begin
					    mul_running <= ~mul_done;
					    done 		<= mul_done;
					    alu_r 		<= mul_result;
					  end
`endif
`ifdef ENABLE_BARREL
	`ALU_BARREL		: begin
					    bs_running <= ~bs_done;
					    done 	   <= bs_done;
					    alu_r 	   <= bs_result;
					  end
`endif
`ifdef ENABLE_DIV
	`ALU_DIV		: begin
					    div_running<= ~div_done;
					    done 	   <= div_done;
					    alu_r 	   <= quotient;
					  end
	`ALU_MOD		: begin
					    div_running<= ~div_done;
					    done 	   <= div_done;
					    alu_r 	   <= qr[31:0];
					  end
`endif
  endcase
end

endmodule
OpenPOWER on IntegriCloud