A CPU that can only access its own memories is useless. Real systems need to talk to the world — serial consoles, sensors, network interfaces. Memory-mapped I/O (MMIO) is the standard RISC-V approach: peripherals appear at special addresses, and ordinary SW/LW instructions communicate with them. Today we add a UART TX peripheral at address 0x10000000.
The address space is divided into two regions. The lower region (e.g., 0x00000000–0x0FFFFFFF) is RAM. A higher region (e.g., 0x10000000 and above) is MMIO — peripheral registers. The address decoder checks the high bits of the store/load address and routes writes to the appropriate peripheral instead of DMEM.
Address map: 0x00000000 – 0x0FFFFFFF → Data Memory (DMEM) 0x10000000 → UART TX data register 0x10000004 → UART status (bit[0] = TX busy) 0x10000008 → (reserved)
A standard UART frame (8N1): 1 start bit (low) + 8 data bits (LSB first) + 1 stop bit (high). At 115200 baud, each bit is 1/115200 ≈ 8.68 µs wide. With a 100 MHz clock, that is 868 clock cycles per bit.
// mmio_uart.v — Memory-mapped UART TX at 0x10000000
// CPU writes a byte → it is transmitted serially at 115200 baud
// Parameters: CLK_FREQ = 100_000_000, BAUD = 115_200
module mmio_uart #(
parameter CLK_FREQ = 100_000_000,
parameter BAUD = 115_200
)(
input clk, rst,
// MMIO bus interface (from CPU store stage)
input [31:0] addr,
input [31:0] wdata,
input we, // 1 = write this cycle
output [31:0] rdata, // status register read
// UART TX pin
output reg uart_tx
);
localparam BAUD_DIV = CLK_FREQ / BAUD; // = 868 for 100 MHz/115200
// ── TX shift register and control ────────────────────────────
reg [9:0] shift_reg; // {stop, data[7:0], start} = 10 bits
reg [15:0] baud_cnt;
reg [3:0] bit_cnt; // 0..9
reg busy;
// Status register: bit[0] = busy
assign rdata = (addr == 32'h10000000) ? {31'b0, busy} : 32'h0;
// ── Load new byte when CPU writes 0x10000000 ─────────────────
always @(posedge clk or posedge rst) begin
if (rst) begin
busy <= 0;
uart_tx <= 1'b1; // idle = high
baud_cnt <= 0;
bit_cnt <= 0;
shift_reg <= 10'h3FF; // all ones
end else if (we && (addr == 32'h10000000) && !busy) begin
// Load frame: {stop=1, data[7:0], start=0}
shift_reg <= {1'b1, wdata[7:0], 1'b0};
baud_cnt <= 0;
bit_cnt <= 0;
busy <= 1;
end else if (busy) begin
if (baud_cnt == BAUD_DIV - 1) begin
baud_cnt <= 0;
uart_tx <= shift_reg[0]; // output LSB
shift_reg <= {1'b1, shift_reg[9:1]}; // shift right
if (bit_cnt == 9) begin
busy <= 0;
uart_tx <= 1'b1; // return to idle
end else
bit_cnt <= bit_cnt + 1;
end else
baud_cnt <= baud_cnt + 1;
end
end
endmodule
The CPU's memory stage now passes the store address and data to both the DMEM and the MMIO decoder. The MMIO module checks the address and accepts the write if it matches 0x10000000:
// In the CPU top module — MEM stage:
wire is_mmio = (alu_out[31:28] == 4'h1); // addr >= 0x10000000
wire dmem_we = MemWrite && !is_mmio;
wire mmio_we = MemWrite && is_mmio;
// DMEM handles normal memory
dmem dmem0(.clk(clk),.we(dmem_we),.addr(alu_out),
.wdata(rdata2),.funct3(funct3),.rdata(dmem_rdata));
// MMIO handles peripheral addresses
wire [31:0] mmio_rdata;
mmio_uart uart0(.clk(clk),.rst(rst),
.addr(alu_out),.wdata(rdata2),.we(mmio_we),
.rdata(mmio_rdata),.uart_tx(uart_tx));
// Read mux: select DMEM or MMIO based on address
wire [31:0] mem_rdata = is_mmio ? mmio_rdata : dmem_rdata;
// tb_mmio_uart.v — Verify writing to 0x10000000 starts UART TX
`timescale 1ns/1ps
module tb_mmio_uart;
reg clk=0, rst=1;
always #5 clk=~clk; // 100 MHz
reg [31:0] addr, wdata;
reg we;
wire [31:0] rdata;
wire uart_tx;
// Use CLK_FREQ=100 but BAUD=10 for fast simulation
mmio_uart #(.CLK_FREQ(100),.BAUD(10)) dut (
.clk(clk),.rst(rst),
.addr(addr),.wdata(wdata),.we(we),
.rdata(rdata),.uart_tx(uart_tx)
);
initial begin
$dumpfile("tb_mmio_uart.vcd"); $dumpvars(0,tb_mmio_uart);
we=0; addr=0; wdata=0;
@(posedge clk); @(posedge clk); rst=0;
// Write 'A' (0x41) to UART TX register
addr=32'h10000000; wdata=32'h41; we=1;
@(posedge clk); we=0;
// Check busy goes high
@(posedge clk); addr=32'h10000000;
if(rdata[0]===1'b1) $display("PASS: UART busy after write");
else $display("FAIL: UART not busy");
// Wait for transmission to complete (10 bits * 10 clocks = 100 cycles)
repeat(120) @(posedge clk);
addr=32'h10000000;
@(posedge clk);
if(rdata[0]===1'b0) $display("PASS: UART idle after TX complete");
else $display("FAIL: UART still busy");
$finish;
end
endmodule
0x10000000 is the standard QEMU UART address for RISC-V — using the same address makes the design compatible with QEMU emulation.MMIO maps peripheral registers into the CPU's address space. The CPU writes to a peripheral address using SW — no special I/O instructions. An address decoder detects the address range and routes it to the peripheral instead of DMEM.
UART TX shifts 10 bits serially: 1 start bit (low), 8 data bits LSB-first, 1 stop bit (high). A baud-rate counter divides the clock to produce the correct bit timing. At 115200 baud with a 100 MHz clock that is 868 cycles per bit.
0x10000000 — the same as QEMU's RISC-V UART. The address decoder checks if bits[31:28] == 4'h1 and routes all writes in that region to the MMIO module.