When your FPGA design needs a processor — to run C code, handle an OS, or orchestrate complex control flows — you instantiate a soft-core: a CPU built from FPGA LUTs. Connecting that processor to your custom peripherals requires AXI — the ARM-originated bus standard that every modern FPGA SoC uses. This lesson builds a minimal AXI4-Lite slave with 4 control registers and a complete self-checking testbench.
AXI (Advanced eXtensible Interface) is part of the ARM AMBA bus family. AXI4-Lite is the simplified version for memory-mapped register control: single 32-bit reads and writes, no bursts. It uses 5 independent channels:
| Channel | Direction | Signals | Purpose |
|---|---|---|---|
| AW | M→S | AWADDR, AWVALID, AWREADY | Write address |
| W | M→S | WDATA, WSTRB, WVALID, WREADY | Write data |
| B | S→M | BRESP, BVALID, BREADY | Write response |
| AR | M→S | ARADDR, ARVALID, ARREADY | Read address |
| R | S→M | RDATA, RRESP, RVALID, RREADY | Read data |
Every channel uses the same rule: the sender asserts VALID when it has data to send. The receiver asserts READY when it can accept data. A transfer happens only when BOTH are high on a rising clock edge. The sender must hold VALID until READY is seen. The receiver may assert READY before VALID.
| Offset | Name | Access | Description |
|---|---|---|---|
| 0x00 | REG0 — Control | R/W | Control register: bit 0 = enable, bit 1 = direction |
| 0x04 | REG1 — Data | R/W | Data register: 32-bit general purpose |
| 0x08 | REG2 — Status | R only | Status (read-only, driven by hardware logic) |
| 0x0C | REG3 — IRQ | R/W | Interrupt enable and clear register |
// axilite_slave.v — AXI4-Lite slave with 4 x 32-bit registers
// Implements full AW/W/B/AR/R channel handshake.
// BRESP and RRESP are always OKAY (2'b00).
module axilite_slave #(
parameter ADDR_W = 4, // enough for 4 registers (0x00..0x0C)
parameter DATA_W = 32
)(
input wire aclk,
input wire aresetn, // active-low reset
// ---- Write address channel ----
input wire [ADDR_W-1:0] awaddr,
input wire awvalid,
output reg awready,
// ---- Write data channel ----
input wire [DATA_W-1:0] wdata,
input wire [DATA_W/8-1:0] wstrb,
input wire wvalid,
output reg wready,
// ---- Write response channel ----
output reg [1:0] bresp,
output reg bvalid,
input wire bready,
// ---- Read address channel ----
input wire [ADDR_W-1:0] araddr,
input wire arvalid,
output reg arready,
// ---- Read data channel ----
output reg [DATA_W-1:0] rdata,
output reg [1:0] rresp,
output reg rvalid,
input wire rready,
// ---- Hardware status input (wired to REG2) ----
input wire [DATA_W-1:0] hw_status
);
// Internal registers
reg [DATA_W-1:0] reg0, reg1, reg3; // REG2 is read-only from hw_status
// Latch write address/data when both AW and W handshakes complete
reg [ADDR_W-1:0] wr_addr;
reg [DATA_W-1:0] wr_data;
reg wr_addr_done, wr_data_done;
// ---- Write address channel ----
always @(posedge aclk) begin
if (!aresetn) begin
awready <= 0;
wr_addr_done <= 0;
end else begin
if (awvalid && awready) begin
wr_addr <= awaddr;
wr_addr_done <= 1;
awready <= 0;
end else if (!wr_addr_done) begin
awready <= awvalid; // accept as soon as master presents address
end
if (bvalid && bready)
wr_addr_done <= 0; // clear after response
end
end
// ---- Write data channel ----
always @(posedge aclk) begin
if (!aresetn) begin
wready <= 0;
wr_data_done <= 0;
end else begin
if (wvalid && wready) begin
wr_data <= wdata;
wr_data_done <= 1;
wready <= 0;
end else if (!wr_data_done) begin
wready <= wvalid;
end
if (bvalid && bready)
wr_data_done <= 0;
end
end
// ---- Write operation + response ----
always @(posedge aclk) begin
if (!aresetn) begin
reg0 <= 0; reg1 <= 0; reg3 <= 0;
bvalid <= 0; bresp <= 2'b00;
end else begin
if (bvalid && bready)
bvalid <= 0;
// Perform write when both address and data are latched
if (wr_addr_done && wr_data_done && !bvalid) begin
case (wr_addr[3:2])
2'd0: reg0 <= wr_data;
2'd1: reg1 <= wr_data;
2'd2: ; // REG2 is read-only — ignore writes
2'd3: reg3 <= wr_data;
endcase
bvalid <= 1;
bresp <= 2'b00; // OKAY
end
end
end
// ---- Read address channel ----
reg [ADDR_W-1:0] rd_addr;
always @(posedge aclk) begin
if (!aresetn) begin
arready <= 0;
rvalid <= 0;
rdata <= 0;
rresp <= 2'b00;
end else begin
if (rvalid && rready)
rvalid <= 0;
if (arvalid && !rvalid) begin
arready <= 1;
rd_addr <= araddr;
// Perform read
case (araddr[3:2])
2'd0: rdata <= reg0;
2'd1: rdata <= reg1;
2'd2: rdata <= hw_status; // REG2 = hardware status
2'd3: rdata <= reg3;
default: rdata <= 32'hDEADBEEF;
endcase
rvalid <= 1;
rresp <= 2'b00;
end else begin
arready <= 0;
end
end
end
endmodule
// tb_axilite_slave.v — write to reg0, read back, verify
`timescale 1ns/1ps
module tb_axilite_slave;
parameter DW = 32, AW = 4;
reg aclk = 0;
reg aresetn = 0;
// Write address
reg [AW-1:0] awaddr = 0;
reg awvalid = 0;
wire awready;
// Write data
reg [DW-1:0] wdata = 0;
reg [DW/8-1:0] wstrb = 4'hF;
reg wvalid = 0;
wire wready;
// Write response
wire [1:0] bresp;
wire bvalid;
reg bready = 1;
// Read address
reg [AW-1:0] araddr = 0;
reg arvalid = 0;
wire arready;
// Read data
wire [DW-1:0] rdata;
wire [1:0] rresp;
wire rvalid;
reg rready = 1;
// HW status
reg [DW-1:0] hw_status = 32'hCAFEBABE;
axilite_slave #(.ADDR_W(AW),.DATA_W(DW)) dut (
.aclk(aclk),.aresetn(aresetn),
.awaddr(awaddr),.awvalid(awvalid),.awready(awready),
.wdata(wdata),.wstrb(wstrb),.wvalid(wvalid),.wready(wready),
.bresp(bresp),.bvalid(bvalid),.bready(bready),
.araddr(araddr),.arvalid(arvalid),.arready(arready),
.rdata(rdata),.rresp(rresp),.rvalid(rvalid),.rready(rready),
.hw_status(hw_status)
);
always #5 aclk = ~aclk;
integer pass_cnt = 0, fail_cnt = 0;
// AXI write task
task axi_write;
input [AW-1:0] addr;
input [DW-1:0] data;
begin
@(posedge aclk);
awaddr <= addr; awvalid <= 1;
wdata <= data; wvalid <= 1;
// Wait for both handshakes
fork
begin @(posedge aclk); while (!awready) @(posedge aclk); awvalid <= 0; end
begin @(posedge aclk); while (!wready) @(posedge aclk); wvalid <= 0; end
join
// Wait for write response
@(posedge aclk);
while (!bvalid) @(posedge aclk);
@(posedge aclk);
end
endtask
// AXI read task
reg [DW-1:0] read_result;
task axi_read;
input [AW-1:0] addr;
begin
@(posedge aclk);
araddr <= addr; arvalid <= 1;
@(posedge aclk);
while (!arready) @(posedge aclk);
arvalid <= 0;
while (!rvalid) @(posedge aclk);
read_result = rdata;
@(posedge aclk);
end
endtask
initial begin
$dumpfile("tb_axilite_slave.vcd");
$dumpvars(0, tb_axilite_slave);
repeat(4) @(posedge aclk);
aresetn = 1;
repeat(2) @(posedge aclk);
// Test 1: Write 0xDEAD_BEEF to REG0 (addr=0x00)
axi_write(4'h0, 32'hDEADBEEF);
// Test 2: Read back REG0
axi_read(4'h0);
if (read_result === 32'hDEADBEEF) begin
$display("PASS: REG0 = 0x%08X", read_result); pass_cnt = pass_cnt + 1;
end else begin
$display("FAIL: REG0 = 0x%08X exp 0xDEADBEEF", read_result); fail_cnt = fail_cnt + 1;
end
// Test 3: Write to REG1 (addr=0x04)
axi_write(4'h4, 32'h12345678);
axi_read(4'h4);
if (read_result === 32'h12345678) begin
$display("PASS: REG1 = 0x%08X", read_result); pass_cnt = pass_cnt + 1;
end else begin
$display("FAIL: REG1 = 0x%08X exp 0x12345678", read_result); fail_cnt = fail_cnt + 1;
end
// Test 4: Read REG2 (hw_status = 0xCAFEBABE)
axi_read(4'h8);
if (read_result === 32'hCAFEBABE) begin
$display("PASS: REG2(hw_status) = 0x%08X", read_result); pass_cnt = pass_cnt + 1;
end else begin
$display("FAIL: REG2 = 0x%08X exp 0xCAFEBABE", read_result); fail_cnt = fail_cnt + 1;
end
// Test 5: Write to REG2 (read-only, should be ignored)
axi_write(4'h8, 32'hFFFFFFFF);
axi_read(4'h8);
if (read_result === 32'hCAFEBABE) begin
$display("PASS: REG2 unchanged after write (read-only correct)"); pass_cnt = pass_cnt + 1;
end else begin
$display("FAIL: REG2 changed to 0x%08X (should be read-only)", read_result); fail_cnt = fail_cnt + 1;
end
if (fail_cnt == 0)
$display("\nALL TESTS PASSED (%0d/%0d)", pass_cnt, pass_cnt+fail_cnt);
else
$display("\nFAILED: %0d passed, %0d failed", pass_cnt, fail_cnt);
$finish;
end
initial #50000 begin $display("TIMEOUT"); $finish; end
endmodule
PASS: REG0 = 0xDEADBEEF PASS: REG1 = 0x12345678 PASS: REG2(hw_status) = 0xCAFEBABE PASS: REG2 unchanged after write (read-only correct) ALL TESTS PASSED (4/4)
AXI4-Lite is a simplified AXI bus for single 32-bit register reads and writes. It is the standard interface for memory-mapped peripheral control in FPGA SoC designs. Every Xilinx IP core and Vivado custom peripheral uses it for CPU-accessible register access.
A CPU implemented in FPGA LUTs rather than dedicated silicon. Xilinx MicroBlaze is a 32-bit RISC core using ~1000 LUTs. Intel Nios II is similar. Soft cores enable embedded software without external MCUs at the cost of FPGA resources.
AW (write address), W (write data), B (write response), AR (read address), R (read data). Each uses VALID/READY handshake: transfer occurs when both signals are high on the clock edge. Channels are independent and can operate concurrently.