This is the day we have been building towards. Every module from Days 7–14 — the PC, IMEM, control unit, register file, ImmGen, ALU, DMEM, and branch unit — comes together into one complete riscv_core.v. We load a real program, run it, and verify the result. This is a real, working CPU.
Starting from Verilog flip-flops and gates 15 days ago, you now have a complete single-cycle RV32I processor that can execute any RISC-V program that fits in memory. That is a remarkable achievement.
| Module | Day | Role in the CPU |
|---|---|---|
| pc.v | Day 8 | Program counter register — holds the current instruction address |
| imem.v | Day 8 | Instruction memory — reads 32-bit instruction at PC |
| regfile.v | Day 9 | 32 × 32-bit registers, two read ports, one synchronous write port |
| alu.v | Day 10 | Arithmetic/logic unit — add, sub, and, or, xor, slt, shifts |
| immgen.v | Day 11 | Extracts and sign-extends immediates from all 6 instruction formats |
| control.v | Day 11 | Decodes opcode/funct3/funct7 into control signals for all units |
| dmem.v | Day 13 | Data memory — byte/half/word load and store via funct3 |
| branch_unit.v | Day 14 | Evaluates branch condition, computes branch target |
| riscv_core.v | Day 15 | Top-level integration — wires all modules into the full datapath |
// riscv_core.v — Complete single-cycle RV32I CPU
// Supports: R-type, I-type ALU, LW/LH/LB/LHU/LBU, SW/SH/SB,
// BEQ/BNE/BLT/BGE/BLTU/BGEU, JAL, JALR
module riscv_core (
input clk,
input rst
);
// ── 1. Program Counter ────────────────────────────────────────
reg [31:0] pc;
wire [31:0] pc4 = pc + 4;
// ── 2. Instruction Fetch ──────────────────────────────────────
wire [31:0] inst;
imem imem0 (.addr(pc), .rdata(inst));
// ── 3. Instruction Decode ─────────────────────────────────────
wire [6:0] opcode = inst[6:0];
wire [4:0] rd = inst[11:7];
wire [2:0] funct3 = inst[14:12];
wire [4:0] rs1 = inst[19:15];
wire [4:0] rs2 = inst[24:20];
wire [6:0] funct7 = inst[31:25];
// ── 4. Control Unit ───────────────────────────────────────────
wire RegWrite, ALUSrc, MemRead, MemWrite, WBSel, Branch, Jal, Jalr;
wire [3:0] ALUOp;
control ctrl (
.opcode(opcode), .funct3(funct3), .funct7(funct7),
.RegWrite(RegWrite), .ALUSrc(ALUSrc),
.MemRead(MemRead), .MemWrite(MemWrite), .WBSel(WBSel),
.ALUOp(ALUOp), .Branch(Branch), .Jal(Jal), .Jalr(Jalr)
);
// ── 5. Register File ──────────────────────────────────────────
wire [31:0] rdata1, rdata2, wr_data;
regfile rf (
.clk(clk), .we(RegWrite),
.rs1(rs1), .rs2(rs2), .rd(rd), .wdata(wr_data),
.rdata1(rdata1), .rdata2(rdata2)
);
// ── 6. Immediate Generator ────────────────────────────────────
wire [31:0] imm;
immgen ig (.inst(inst), .imm(imm));
// ── 7. ALU ────────────────────────────────────────────────────
wire [31:0] alu_b = ALUSrc ? imm : rdata2;
wire [31:0] alu_out;
wire alu_zero, alu_lt, alu_ltu;
alu alu0 (
.a(rdata1), .b(alu_b), .op(ALUOp),
.result(alu_out), .zero(alu_zero),
.lt(alu_lt), .ltu(alu_ltu)
);
// ── 8. Data Memory ────────────────────────────────────────────
wire [31:0] dmem_rdata;
dmem dmem0 (
.clk(clk), .we(MemWrite),
.addr(alu_out), .wdata(rdata2),
.funct3(funct3), .rdata(dmem_rdata)
);
// ── 9. Branch Unit ────────────────────────────────────────────
wire branch_taken;
wire [31:0] branch_target;
branch_unit bu (
.funct3(funct3), .zero(alu_zero),
.alu_lt(alu_lt), .alu_ltu(alu_ltu),
.pc(pc), .b_imm(imm),
.branch_taken(branch_taken),
.branch_target(branch_target)
);
// ── 10. PC Mux ────────────────────────────────────────────────
wire [31:0] jal_target = pc + imm;
wire [31:0] jalr_target = (rdata1 + imm) & ~32'h1;
wire [31:0] pc_next =
(Branch & branch_taken) ? branch_target :
Jal ? jal_target :
Jalr ? jalr_target : pc4;
always @(posedge clk or posedge rst)
if (rst) pc <= 0; else pc <= pc_next;
// ── 11. Write-Back Mux ────────────────────────────────────────
wire [31:0] wb_mem_or_alu = WBSel ? dmem_rdata : alu_out;
assign wr_data = (Jal | Jalr) ? pc4 : wb_mem_or_alu;
endmodule
We will run a RISC-V loop that computes 1+2+3+4+5 = 15 and stores the result to memory address 0. This exercises: addi (init), add (accumulate), blt (loop condition), sw (store result).
# sum1to5.s — Compute sum = 1+2+3+4+5 = 15
# Register usage:
# x1 = loop counter (i), starts at 1
# x2 = accumulator (sum), starts at 0
# x3 = limit (6)
# x4 = base address for storing result (0)
addi x1, x0, 1 # i = 1
addi x2, x0, 0 # sum = 0
addi x3, x0, 6 # limit = 6
loop: bge x1, x3, done # if i >= 6 goto done
add x2, x2, x1 # sum += i
addi x1, x1, 1 # i++
jal x0, loop # goto loop (x0 discards link)
done: sw x2, 0(x0) # mem[0] = sum (= 15)
jal x0, done # halt (infinite loop)
// program.hex — encoded sum 1..5 program ($readmemh format) // Address 0: addi x1,x0,1 = 00100093 00100093 // Address 4: addi x2,x0,0 = 00000113 00000113 // Address 8: addi x3,x0,6 = 00600193 00600193 // Address C: bge x1,x3,done = 00615663 (offset +12 = done) 00615663 // Address 10: add x2,x2,x1 = 00110133 00110133 // Address 14: addi x1,x1,1 = 00108093 00108093 // Address 18: jal x0,loop = ff5ff06f (offset -12 = loop) ff5ff06f // Address 1C: sw x2,0(x0) = 00202023 (done:) 00202023 // Address 20: jal x0,0 = 0000006f (halt) 0000006f
// tb_riscv_core.v — Testbench for the complete single-cycle RV32I core
// Loads program.hex, runs for 50 cycles, checks mem[0] == 15
`timescale 1ns/1ps
module tb_riscv_core;
reg clk = 0, rst = 1;
always #5 clk = ~clk; // 100 MHz
riscv_core dut (.clk(clk), .rst(rst));
integer i;
initial begin
// Load the sum 1..5 program from hex file
$readmemh("program.hex", dut.imem0.mem);
$dumpfile("tb_riscv_core.vcd");
$dumpvars(0, tb_riscv_core);
// Release reset after 2 cycles
repeat(2) @(posedge clk);
rst = 0;
// Run for enough cycles for the loop to complete
// (9 instructions × loop 5 times + setup = ~30 cycles)
repeat(50) @(posedge clk);
#1;
// Check: memory address 0 should contain 15 (sum 1..5)
// DMEM is byte-addressed; word at addr 0 = bytes [3:0]
begin
reg [31:0] result;
result = {dut.dmem0.mem[3], dut.dmem0.mem[2],
dut.dmem0.mem[1], dut.dmem0.mem[0]};
if (result === 32'd15)
$display("PASS: sum(1..5) = %0d stored at mem[0]", result);
else
$display("FAIL: mem[0] = %0d (expected 15)", result);
end
// Bonus: dump register file
$display("--- Register File Snapshot ---");
for (i = 0; i < 8; i = i+1)
$display(" x%0d = %0d", i, dut.rf.regs[i]);
$finish;
end
endmodule
PASS: sum(1..5) = 15 stored at mem[0] --- Register File Snapshot --- x0 = 0 x1 = 6 x2 = 15 x3 = 6 x4 = 0 x5 = 0 x6 = 0 x7 = 0
$readmemh is the standard way to test CPU RTL without firmware tooling.All R-type, all I-type ALU, LW/LH/LB/LHU/LBU, SW/SH/SB, all six conditional branches, JAL and JALR. LUI, AUIPC, FENCE and ECALL are not implemented in this minimal core but can be added.
A text file in $readmemh hex format — each line is one 32-bit instruction word. The testbench loads it into the imem array using $readmemh("program.hex", mem).
Yes — a functionally correct single-cycle RV32I processor. It runs real RISC-V code. The next step is pipelining (Day 17) to improve throughput.