HomeRISC-V from ScratchDay 24
DAY 24 · PHASE 4 — ADVANCED & REAL HARDWARE

Run Your CPU on an FPGA

By EcrioniX · Updated 2026-06-11

Simulation is powerful, but there is nothing like watching your CPU actually run on silicon. Today we take riscv_core.v, wrap it in an FPGA top module, map instruction memory to block RAM, connect the UART TX pin and LEDs, and generate a bitstream for the Basys3 or Nexys4 board. Your CPU runs at 100 MHz on real hardware.

The FPGA Flow: Synthesis → Implementation → Bitstream

  1. Synthesis — Vivado translates RTL Verilog to a netlist of FPGA primitives (LUTs, flip-flops, BRAMs, DSPs).
  2. Implementation — Place-and-route: assigns each LUT/FF to a physical location and routes the wires between them.
  3. Bitstream generation — Packs the placed-and-routed design into the binary configuration file for the FPGA.
  4. Program device — Downloads the bitstream via USB-JTAG. The FPGA is configured instantly.

fpga_top.v — Basys3/Nexys4 Wrapper

fpga_top.v
// fpga_top.v — RISC-V CPU top level for Basys3 / Nexys4
// Board clock: 100 MHz on pin W5 (Basys3)
// Reset:       BTNC (active high)
// UART TX:     pin A18 (Basys3 USB-RS232 TXD)
// LEDs:        LD0..LD15 — show low 16 bits of register x2 (accumulator)
module fpga_top (
    input         clk_100mhz,  // 100 MHz board clock
    input         btnc,        // reset button
    output        uart_tx,     // UART TX → PC terminal
    output [15:0] led          // 16 LEDs
);
    // ── Clock and reset ───────────────────────────────────────────
    wire clk = clk_100mhz;
    wire rst = btnc;

    // ── RISC-V core ───────────────────────────────────────────────
    // riscv_core from Day 15, with MMIO UART from Day 22
    wire uart_tx_pin;
    riscv_core_fpga #(
        .IMEM_INIT_FILE("program.hex")
    ) cpu (
        .clk(clk),
        .rst(rst),
        .uart_tx(uart_tx_pin)
    );

    assign uart_tx = uart_tx_pin;

    // ── LED display — show x2 register ───────────────────────────
    // x2 is the accumulator in our sum program
    assign led = cpu.rf.regs[2][15:0];

endmodule

// riscv_core_fpga — version of riscv_core that uses BRAM for IMEM
// and connects the MMIO UART
module riscv_core_fpga #(
    parameter IMEM_INIT_FILE = "program.hex"
)(
    input  clk, rst,
    output uart_tx
);
    // ── Program Counter ───────────────────────────────────────────
    reg  [31:0] pc;
    wire [31:0] pc4 = pc + 4;

    // ── BRAM Instruction Memory ───────────────────────────────────
    // Xilinx BRAM inferred from initialized reg array
    // Vivado will map this to block RAM automatically
    reg [31:0] imem [0:255];
    initial $readmemh(IMEM_INIT_FILE, imem);
    wire [31:0] inst = imem[pc[9:2]]; // word-addressed

    // ── (same datapath as riscv_core, abbreviated for clarity) ───
    wire [6:0] opcode = inst[6:0];
    wire [4:0] rd     = inst[11:7];
    wire [2:0] funct3 = inst[14:12];
    wire [4:0] rs1    = inst[19:15];
    wire [4:0] rs2    = inst[24:20];
    wire [6:0] funct7 = inst[31:25];

    wire RegWrite, ALUSrc, MemRead, MemWrite, WBSel, Branch, Jal, Jalr;
    wire [3:0] ALUOp;
    control ctrl(.opcode(opcode),.funct3(funct3),.funct7(funct7),
                 .RegWrite(RegWrite),.ALUSrc(ALUSrc),
                 .MemRead(MemRead),.MemWrite(MemWrite),.WBSel(WBSel),
                 .ALUOp(ALUOp),.Branch(Branch),.Jal(Jal),.Jalr(Jalr));

    wire [31:0] rdata1, rdata2, wr_data;
    regfile rf(.clk(clk),.we(RegWrite),.rs1(rs1),.rs2(rs2),.rd(rd),
               .wdata(wr_data),.rdata1(rdata1),.rdata2(rdata2));

    wire [31:0] imm;
    immgen ig(.inst(inst),.imm(imm));

    wire [31:0] alu_b = ALUSrc ? imm : rdata2;
    wire [31:0] alu_out;
    wire        alu_zero, alu_lt, alu_ltu;
    alu alu0(.a(rdata1),.b(alu_b),.op(ALUOp),
             .result(alu_out),.zero(alu_zero),.lt(alu_lt),.ltu(alu_ltu));

    // MMIO / DMEM address decode
    wire is_mmio = (alu_out[31:28] == 4'h1);
    wire [31:0] dmem_rdata;
    dmem dmem0(.clk(clk),.we(MemWrite && !is_mmio),
               .addr(alu_out),.wdata(rdata2),.funct3(funct3),.rdata(dmem_rdata));

    wire [31:0] mmio_rdata;
    mmio_uart #(.CLK_FREQ(100_000_000),.BAUD(115_200)) uart0(
        .clk(clk),.rst(rst),
        .addr(alu_out),.wdata(rdata2),.we(MemWrite && is_mmio),
        .rdata(mmio_rdata),.uart_tx(uart_tx));

    wire [31:0] mem_rdata = is_mmio ? mmio_rdata : dmem_rdata;

    wire        branch_taken;
    wire [31:0] branch_target;
    branch_unit bu(.funct3(funct3),.zero(alu_zero),.alu_lt(alu_lt),.alu_ltu(alu_ltu),
                   .pc(pc),.b_imm(imm),.branch_taken(branch_taken),.branch_target(branch_target));

    wire [31:0] pc_next =
        (Branch & branch_taken) ? branch_target :
        Jal  ? (pc + imm) :
        Jalr ? ((rdata1 + imm) & ~32'h1) : pc4;

    always @(posedge clk or posedge rst)
        if (rst) pc <= 0; else pc <= pc_next;

    wire [31:0] wb_mem = WBSel ? mem_rdata : alu_out;
    assign wr_data = (Jal | Jalr) ? pc4 : wb_mem;
endmodule

XDC Constraints (Basys3)

basys3.xdc
## Basys3 XDC constraints for riscv_core_fpga
## Clock: 100 MHz on W5
set_property PACKAGE_PIN W5 [get_ports clk_100mhz]
set_property IOSTANDARD LVCMOS33 [get_ports clk_100mhz]
create_clock -add -name sys_clk_pin -period 10.00 \
    -waveform {0 5} [get_ports clk_100mhz]

## Reset: BTNC
set_property PACKAGE_PIN U18 [get_ports btnc]
set_property IOSTANDARD LVCMOS33 [get_ports btnc]

## UART TX → USB serial (goes to PC terminal at 115200 baud)
set_property PACKAGE_PIN A18 [get_ports uart_tx]
set_property IOSTANDARD LVCMOS33 [get_ports uart_tx]

## LEDs LD0..LD15
set_property PACKAGE_PIN U16 [get_ports {led[0]}]
set_property PACKAGE_PIN E19 [get_ports {led[1]}]
set_property PACKAGE_PIN U19 [get_ports {led[2]}]
set_property PACKAGE_PIN V19 [get_ports {led[3]}]
set_property PACKAGE_PIN W18 [get_ports {led[4]}]
set_property PACKAGE_PIN U15 [get_ports {led[5]}]
set_property PACKAGE_PIN U14 [get_ports {led[6]}]
set_property PACKAGE_PIN V14 [get_ports {led[7]}]
set_property PACKAGE_PIN V13 [get_ports {led[8]}]
set_property PACKAGE_PIN V3  [get_ports {led[9]}]
set_property PACKAGE_PIN W3  [get_ports {led[10]}]
set_property PACKAGE_PIN U3  [get_ports {led[11]}]
set_property PACKAGE_PIN P3  [get_ports {led[12]}]
set_property PACKAGE_PIN N3  [get_ports {led[13]}]
set_property PACKAGE_PIN P1  [get_ports {led[14]}]
set_property PACKAGE_PIN L1  [get_ports {led[15]}]
set_property IOSTANDARD LVCMOS33 [get_ports {led[*]}]

Vivado Flow Step by Step

  1. Open Vivado → Create Project → RTL Project → add all .v files + program.hex
  2. Set target device: xc7a35tcpg236-1 (Basys3) or xc7a100tcsg324-1 (Nexys4)
  3. Add the XDC constraints file
  4. Run Synthesis (F11) → check Timing Summary: critical path should be <10 ns for 100 MHz
  5. Run Implementation — place and route
  6. Generate Bitstream → Open Hardware Manager → Program Device

Day 24 Takeaways

FAQ

How do you map instruction memory to FPGA block RAM?

Replace the reg array with an initialized BRAM. Vivado infers block RAM from a reg array initialized with $readmemh. Alternatively use a BRAM IP core with a .coe initialization file.

What XDC constraints are needed?

At minimum: clock period constraint (create_clock), reset button pin, UART TX pin, and LED pins. Each port must have a PACKAGE_PIN and IOSTANDARD property.

What is a bitstream?

The binary file that configures the FPGA's lookup tables, routing, and BRAM contents. Generated by Vivado after synthesis and implementation. Downloaded to the FPGA via USB-JTAG using Program Device.

Previous
← Day 23: M-Extension MUL & DIV

← Full roadmap