Day 12 gave us a CPU that executes R-type and I-type ALU instructions. But every real program needs to read and write memory — arrays, stack variables, function arguments. Today we build dmem.v — a byte-addressable data memory — and upgrade the CPU to core_rim.v, which can execute LW, LB, LH, SW, SB, and SH.
RISC-V defines two instruction classes for memory access. Stores (S-type) encode the address as rs1 + S-immediate and write rs2 to memory. Loads (I-type) compute the same address and write the value from memory into rd.
The funct3 field selects the width and sign behaviour:
| funct3 | Instruction | Width | Sign on load |
|---|---|---|---|
| 000 | LB / SB | 8-bit byte | Sign-extend |
| 001 | LH / SH | 16-bit half | Sign-extend |
| 010 | LW / SW | 32-bit word | — |
| 100 | LBU | 8-bit byte | Zero-extend |
| 101 | LHU | 16-bit half | Zero-extend |
The data memory is a simple synchronous RAM. Writes happen on the rising clock edge when we=1. Reads happen combinatorially (for a single-cycle CPU) so the result is available in the same cycle as the address.
| Port | Direction | Width | Description |
|---|---|---|---|
| clk | Input | 1 | Clock — writes are synchronous on rising edge |
| we | Input | 1 | Write enable — 1 for SW/SH/SB, 0 otherwise |
| addr | Input | 32 | Byte address (lower 2 bits used for sub-word alignment) |
| wdata | Input | 32 | Data to write (from rs2) |
| funct3 | Input | 3 | Access size/sign mode (000=byte, 001=half, 010=word, 100=LBU, 101=LHU) |
| rdata | Output | 32 | Data read from memory (sign/zero-extended per funct3) |
// dmem.v — Data Memory (byte-addressable, 1 KB)
// Synchronous write, asynchronous read, byte/half/word via funct3
module dmem #(parameter DEPTH = 256) (
input clk,
input we,
input [31:0] addr,
input [31:0] wdata,
input [ 2:0] funct3,
output reg [31:0] rdata
);
// Storage: word-addressed internally, byte-addressed externally
reg [7:0] mem [0:(DEPTH*4)-1];
// --- Synchronous Write ---
always @(posedge clk) begin
if (we) begin
case (funct3)
3'b000: begin // SB — store byte
mem[addr] <= wdata[7:0];
end
3'b001: begin // SH — store half-word
mem[addr] <= wdata[7:0];
mem[addr+1] <= wdata[15:8];
end
3'b010: begin // SW — store word
mem[addr] <= wdata[7:0];
mem[addr+1] <= wdata[15:8];
mem[addr+2] <= wdata[23:16];
mem[addr+3] <= wdata[31:24];
end
default: ; // ignore illegal funct3
endcase
end
end
// --- Asynchronous Read ---
always @(*) begin
case (funct3)
3'b000: // LB — sign-extend byte
rdata = {{24{mem[addr][7]}}, mem[addr]};
3'b001: // LH — sign-extend half-word
rdata = {{16{mem[addr+1][7]}},
mem[addr+1], mem[addr]};
3'b010: // LW — full word
rdata = {mem[addr+3], mem[addr+2],
mem[addr+1], mem[addr]};
3'b100: // LBU — zero-extend byte
rdata = {24'b0, mem[addr]};
3'b101: // LHU — zero-extend half-word
rdata = {16'b0, mem[addr+1], mem[addr]};
default: rdata = 32'b0;
endcase
end
endmodule
Until Day 12, every instruction wrote the ALU result to the register file. Load instructions are different — they write data from memory. We add a write-back mux controlled by the new control signal WBSel:
The control unit sets MemRead=1 and WBSel=1 for any load opcode (0000011), and MemWrite=1 for store opcode (0100011).
┌──────┐ inst[31:0] ┌────────┐
PC──▶│ IMEM │─────────────▶│Control │──▶ RegWrite, ALUSrc,
└──────┘ │ Unit │ MemRead, MemWrite, WBSel
└────────┘
inst ┌──────────┐
rs1,rs2,rd ──────────────▶│ RegFile │◀── WB mux result
└──┬───┬──┘
rs1 │ │ rs2
▼ ▼
┌──────────────┐
ImmGen ─imm─────▶ ALU │──▶ alu_result
└──────┬───────┘
│ addr
▼
┌───────┐
│ DMEM │──▶ rdata
└───────┘
│
┌────────────┴─────────┐
│ WBSel mux │
│ 0:alu_result │
│ 1:rdata │
└──────────────────────┘
│
wr_data ──▶ RegFile rd
// core_rim.v — Single-cycle RISC-V core (R/I/M instructions)
// Executes R-type, I-type ALU, LW/LB/LH/LBU/LHU, SW/SB/SH
module core_rim (
input clk,
input rst
);
// ── Program Counter ──────────────────────────────────────────
reg [31:0] pc;
wire [31:0] pc_next = pc + 4;
always @(posedge clk or posedge rst)
if (rst) pc <= 0; else pc <= pc_next;
// ── Instruction Fetch ─────────────────────────────────────────
wire [31:0] inst;
imem imem0 (.addr(pc), .rdata(inst));
// ── Decode ────────────────────────────────────────────────────
wire [6:0] opcode = inst[6:0];
wire [4:0] rd = inst[11:7];
wire [2:0] funct3 = inst[14:12];
wire [4:0] rs1 = inst[19:15];
wire [4:0] rs2 = inst[24:20];
wire [6:0] funct7 = inst[31:25];
// ── Control Signals ──────────────────────────────────────────
wire RegWrite, ALUSrc, MemRead, MemWrite, WBSel;
wire [3:0] ALUOp;
control ctrl (
.opcode(opcode), .funct3(funct3), .funct7(funct7),
.RegWrite(RegWrite), .ALUSrc(ALUSrc),
.MemRead(MemRead), .MemWrite(MemWrite),
.WBSel(WBSel), .ALUOp(ALUOp)
);
// ── Register File ────────────────────────────────────────────
wire [31:0] rdata1, rdata2;
wire [31:0] wr_data;
regfile rf (
.clk(clk), .we(RegWrite),
.rs1(rs1), .rs2(rs2), .rd(rd),
.wdata(wr_data),
.rdata1(rdata1), .rdata2(rdata2)
);
// ── Immediate Generator ───────────────────────────────────────
wire [31:0] imm;
immgen ig (.inst(inst), .imm(imm));
// ── ALU ───────────────────────────────────────────────────────
wire [31:0] alu_b = ALUSrc ? imm : rdata2;
wire [31:0] alu_out;
wire alu_zero;
alu alu0 (
.a(rdata1), .b(alu_b),
.op(ALUOp),
.result(alu_out), .zero(alu_zero)
);
// ── Data Memory ───────────────────────────────────────────────
wire [31:0] dmem_rdata;
dmem dmem0 (
.clk(clk),
.we(MemWrite),
.addr(alu_out),
.wdata(rdata2),
.funct3(funct3),
.rdata(dmem_rdata)
);
// ── Write-Back Mux ────────────────────────────────────────────
assign wr_data = WBSel ? dmem_rdata : alu_out;
endmodule
The testbench writes a known value into data memory using SW, then reads it back with LW into a different register and checks the value matches.
// tb_core_rim.v — Testbench for core_rim (loads + stores)
// Program: addi x1,x0,42 → sw x1,0(x0) → lw x2,0(x0)
// Expected: x2 == 42 after lw executes
`timescale 1ns/1ps
module tb_core_rim;
reg clk = 0, rst = 1;
always #5 clk = ~clk; // 100 MHz
core_rim dut (.clk(clk), .rst(rst));
// Preload instruction memory with a tiny store-load program
// (assumes imem is a reg array accessible via hierarchical path)
initial begin
// Encode: addi x1,x0,42 = 0x02a00093
// sw x1,0(x0) = 0x00102023
// lw x2,0(x0) = 0x00002103
// nop (addi x0,x0,0) loop
dut.imem0.mem[0] = 32'h02a00093; // addi x1,x0,42
dut.imem0.mem[1] = 32'h00102023; // sw x1,0(x0)
dut.imem0.mem[2] = 32'h00002103; // lw x2,0(x0)
dut.imem0.mem[3] = 32'h00000013; // nop
$dumpfile("tb_core_rim.vcd");
$dumpvars(0, tb_core_rim);
@(negedge rst); // wait for rst to go low
rst = 0;
repeat(6) @(posedge clk); // run 6 cycles
#1;
// Inspect x2 via hierarchical path to regfile
if (dut.rf.regs[2] === 32'd42)
$display("PASS: x2 = %0d (expected 42)", dut.rf.regs[2]);
else
$display("FAIL: x2 = %0d (expected 42)", dut.rf.regs[2]);
$finish;
end
endmodule
The control.v from Day 11 decoded R-type and I-type ALU. We extend it to drive the two new signals MemRead, MemWrite, and WBSel:
| Opcode | Type | MemRead | MemWrite | WBSel |
|---|---|---|---|---|
| 0110011 | R-type | 0 | 0 | 0 (ALU) |
| 0010011 | I-type ALU | 0 | 0 | 0 (ALU) |
| 0000011 | Load (LW/LB/LH) | 1 | 0 | 1 (DMEM) |
| 0100011 | Store (SW/SB/SH) | 0 | 1 | X (no rd) |
mem[0..N]; a word at address 4 occupies mem[4..7].RegWrite=0 for S-type.funct3 encodes the data width and sign-extension mode. 000=LB/SB (byte), 001=LH/SH (half-word), 010=LW/SW (word), 100=LBU (byte, zero-extend), 101=LHU (half-word, zero-extend).
WBSel selects what is written back to the register file. WBSel=0 writes the ALU result; WBSel=1 writes the DMEM read data (for load instructions). The control unit drives WBSel=1 for opcode 0000011.
Harvard architecture uses separate memories so both can be accessed in the same cycle — the CPU fetches an instruction from imem while simultaneously reading/writing dmem.