Our CPU can compute — but it executes every instruction in sequence. Without branches and jumps there are no loops, no conditionals, no function calls. Today we add a branch_unit.v that computes the branch target and the taken/not-taken decision, then extend the CPU to core_rimb.v with a PC mux that can jump anywhere in the program.
All six conditional branches use the B-type instruction format. The branch target is PC + sign_extend(B-imm) where the offset is in units of 2 bytes (allowing ±4 KB range). The condition depends on funct3:
| funct3 | Instruction | Condition |
|---|---|---|
| 000 | BEQ | rs1 == rs2 (ALU zero flag = 1) |
| 001 | BNE | rs1 != rs2 (zero = 0) |
| 100 | BLT | rs1 < rs2 (signed) |
| 101 | BGE | rs1 >= rs2 (signed) |
| 110 | BLTU | rs1 < rs2 (unsigned) |
| 111 | BGEU | rs1 >= rs2 (unsigned) |
JAL (Jump And Link) uses the J-type format. The 20-bit offset is added to PC to form the target. It writes PC+4 to rd as the return address — this is how function calls work in RISC-V.
JALR (Jump And Link Register) uses the I-type format. The target is rs1 + I-immediate with the LSB cleared. Returns from functions: jalr x0, ra, 0 jumps back to the caller.
| Port | Direction | Width | Description |
|---|---|---|---|
| funct3 | Input | 3 | Branch type (BEQ/BNE/BLT/BGE/BLTU/BGEU) |
| zero | Input | 1 | ALU zero flag (rs1 == rs2) |
| alu_lt | Input | 1 | ALU less-than flag (from SLT operation) |
| alu_ltu | Input | 1 | ALU unsigned less-than flag |
| pc | Input | 32 | Current program counter |
| b_imm | Input | 32 | Sign-extended B-immediate from ImmGen |
| branch_taken | Output | 1 | 1 if the branch condition is true |
| branch_target | Output | 32 | PC + b_imm (the branch destination) |
// branch_unit.v — Branch condition evaluator and target calculator
module branch_unit (
input [ 2:0] funct3,
input zero, // ALU: rs1 == rs2
input alu_lt, // ALU: rs1 < rs2 (signed)
input alu_ltu, // ALU: rs1 < rs2 (unsigned)
input [31:0] pc,
input [31:0] b_imm, // sign-extended B-immediate
output reg branch_taken,
output [31:0] branch_target
);
assign branch_target = pc + b_imm;
always @(*) begin
case (funct3)
3'b000: branch_taken = zero; // BEQ
3'b001: branch_taken = ~zero; // BNE
3'b100: branch_taken = alu_lt; // BLT
3'b101: branch_taken = ~alu_lt; // BGE
3'b110: branch_taken = alu_ltu; // BLTU
3'b111: branch_taken = ~alu_ltu; // BGEU
default: branch_taken = 1'b0;
endcase
end
endmodule
The program counter now has three possible sources. The control unit produces a 2-bit PCSrc signal:
PCSrc = 00 → PC+4 (normal sequential execution)PCSrc = 01 → branch_target (branch taken)PCSrc = 10 → JAL target (PC + J-imm)PCSrc = 11 → JALR target (rs1 + I-imm, lsb cleared)// core_rimb.v — Single-cycle RISC-V core (R/I/M/B-type + JAL/JALR)
module core_rimb (
input clk,
input rst
);
// ── Program Counter ──────────────────────────────────────────
reg [31:0] pc;
wire [31:0] pc4 = pc + 4;
// ── Instruction Fetch ─────────────────────────────────────────
wire [31:0] inst;
imem imem0 (.addr(pc), .rdata(inst));
// ── Decode fields ─────────────────────────────────────────────
wire [6:0] opcode = inst[6:0];
wire [4:0] rd = inst[11:7];
wire [2:0] funct3 = inst[14:12];
wire [4:0] rs1 = inst[19:15];
wire [4:0] rs2 = inst[24:20];
wire [6:0] funct7 = inst[31:25];
// ── Control ───────────────────────────────────────────────────
wire RegWrite, ALUSrc, MemRead, MemWrite, WBSel;
wire [1:0] PCSrc;
wire [3:0] ALUOp;
wire Branch, Jal, Jalr;
control ctrl (
.opcode(opcode), .funct3(funct3), .funct7(funct7),
.RegWrite(RegWrite), .ALUSrc(ALUSrc),
.MemRead(MemRead), .MemWrite(MemWrite),
.WBSel(WBSel), .ALUOp(ALUOp),
.Branch(Branch), .Jal(Jal), .Jalr(Jalr)
);
// ── Register File ─────────────────────────────────────────────
wire [31:0] rdata1, rdata2, wr_data;
regfile rf (
.clk(clk), .we(RegWrite),
.rs1(rs1), .rs2(rs2), .rd(rd),
.wdata(wr_data), .rdata1(rdata1), .rdata2(rdata2)
);
// ── Immediate Generator ───────────────────────────────────────
wire [31:0] imm;
immgen ig (.inst(inst), .imm(imm));
// ── ALU ───────────────────────────────────────────────────────
wire [31:0] alu_b = ALUSrc ? imm : rdata2;
wire [31:0] alu_out;
wire alu_zero, alu_lt, alu_ltu;
alu alu0 (
.a(rdata1), .b(alu_b), .op(ALUOp),
.result(alu_out), .zero(alu_zero),
.lt(alu_lt), .ltu(alu_ltu)
);
// ── Branch Unit ───────────────────────────────────────────────
wire branch_taken;
wire [31:0] branch_target;
branch_unit bu (
.funct3(funct3), .zero(alu_zero),
.alu_lt(alu_lt), .alu_ltu(alu_ltu),
.pc(pc), .b_imm(imm),
.branch_taken(branch_taken),
.branch_target(branch_target)
);
// ── PC Mux ────────────────────────────────────────────────────
wire [31:0] jal_target = pc + imm; // JAL
wire [31:0] jalr_target = (rdata1 + imm) & ~32'h1; // JALR
wire [31:0] pc_next =
(Branch & branch_taken) ? branch_target :
Jal ? jal_target :
Jalr ? jalr_target : pc4;
always @(posedge clk or posedge rst)
if (rst) pc <= 0; else pc <= pc_next;
// ── Data Memory ───────────────────────────────────────────────
wire [31:0] dmem_rdata;
dmem dmem0 (
.clk(clk), .we(MemWrite),
.addr(alu_out), .wdata(rdata2),
.funct3(funct3), .rdata(dmem_rdata)
);
// ── Write-Back ────────────────────────────────────────────────
// WBSel: 0=ALU, 1=DMEM, 2=PC+4 (for JAL/JALR link)
wire [31:0] wr_alu_or_mem = WBSel ? dmem_rdata : alu_out;
assign wr_data = (Jal | Jalr) ? pc4 : wr_alu_or_mem;
endmodule
// tb_core_rimb.v — Testbench for core_rimb (branches + jumps)
// Program:
// addi x1,x0,5 // x1 = 5
// addi x2,x0,5 // x2 = 5
// beq x1,x2, +8 // taken → skip next instruction
// addi x3,x0,99 // SKIPPED if beq taken
// addi x4,x0,1 // x4 = 1 (reached after branch)
// jal x5, +4 // jump forward 4 bytes, x5 = PC+4
// addi x6,x0,77 // SKIPPED by jal
// addi x7,x0,2 // x7 = 2 (jal lands here)
// nop
`timescale 1ns/1ps
module tb_core_rimb;
reg clk = 0, rst = 1;
always #5 clk = ~clk;
core_rimb dut (.clk(clk), .rst(rst));
initial begin
dut.imem0.mem[0] = 32'h00500093; // addi x1,x0,5
dut.imem0.mem[1] = 32'h00500113; // addi x2,x0,5
dut.imem0.mem[2] = 32'h00208463; // beq x1,x2, +8
dut.imem0.mem[3] = 32'h06300193; // addi x3,x0,99 (skip)
dut.imem0.mem[4] = 32'h00100213; // addi x4,x0,1
dut.imem0.mem[5] = 32'h004002ef; // jal x5, +4
dut.imem0.mem[6] = 32'h04d00313; // addi x6,x0,77 (skip)
dut.imem0.mem[7] = 32'h00200393; // addi x7,x0,2
dut.imem0.mem[8] = 32'h00000013; // nop
$dumpfile("tb_core_rimb.vcd");
$dumpvars(0, tb_core_rimb);
@(negedge rst);
rst = 0;
repeat(12) @(posedge clk);
#1;
// x3 should be 0 (BEQ was taken, addi x3 skipped)
if (dut.rf.regs[3] === 32'd0)
$display("PASS: BEQ taken — x3 not written");
else
$display("FAIL: BEQ not taken — x3=%0d", dut.rf.regs[3]);
// x4 should be 1
if (dut.rf.regs[4] === 32'd1)
$display("PASS: x4 = 1");
else
$display("FAIL: x4 = %0d", dut.rf.regs[4]);
// x6 should be 0 (JAL skipped it)
if (dut.rf.regs[6] === 32'd0)
$display("PASS: JAL taken — x6 not written");
else
$display("FAIL: JAL not taken — x6=%0d", dut.rf.regs[6]);
// x7 should be 2
if (dut.rf.regs[7] === 32'd2)
$display("PASS: x7 = 2");
else
$display("FAIL: x7 = %0d", dut.rf.regs[7]);
$finish;
end
endmodule
PC + B-immediate regardless of whether the branch is taken.rd as the return address (used by function call conventions).BEQ subtracts rs2 from rs1 via the ALU. If the zero flag is 1 (rs1==rs2) the branch is taken and the PC is set to PC + B-immediate. Otherwise execution continues at PC+4.
JAL adds a J-immediate to the current PC. JALR adds an I-immediate to rs1 and clears the LSB. Both save PC+4 to rd. JAL is for static function calls; JALR is for returns and computed jumps.
The PC mux selects between PC+4, branch target, JAL target, and JALR target. It is driven by a combination of the Branch, Jal, Jalr control signals and the branch_taken condition.