SPI is everywhere: ADCs, DACs, flash memories, SD cards, accelerometers, and display controllers all speak it. Unlike UART, SPI is synchronous — the master drives the clock — making it simpler to implement and capable of much higher data rates. This lesson builds a complete SPI Mode 0 master that you can connect to virtually any SPI device.
SPI uses four signals: SCLK (clock), MOSI (Master Out Slave In), MISO (Master In Slave Out), and CS_N (Chip Select, active low). The master controls everything. A transaction is: assert CS_N low → clock 8 bits out on MOSI while reading 8 bits from MISO → deassert CS_N high.
The four SPI modes are defined by two bits: CPOL (idle clock level) and CPHA (sampling edge). Mode 0 (CPOL=0, CPHA=0) is by far the most common: clock idles low, data is shifted out on falling edges and sampled on rising edges.
| Port | Dir | Width | Description |
|---|---|---|---|
| clk | IN | 1 | System clock |
| rst | IN | 1 | Synchronous active-high reset |
| start | IN | 1 | Pulse high for 1 cycle to begin an 8-bit transfer |
| mosi_data | IN | 8 | Byte to transmit (MSB first). Captured when start is asserted. |
| miso | IN | 1 | Serial data from slave (received MSB first) |
| sclk | OUT | 1 | SPI clock output to slave |
| cs_n | OUT | 1 | Chip select, active low. Asserted for duration of transfer. |
| mosi | OUT | 1 | Serial data to slave (MSB first) |
| busy | OUT | 1 | High while a transfer is in progress |
| done | OUT | 1 | Pulses high for 1 cycle when transfer completes |
| miso_data | OUT | 8 | Received byte. Valid when done pulses. |
// spi_master.v — SPI Mode 0 Master (CPOL=0, CPHA=0)
// CLK_DIV: system_clk / (2 * spi_clk)
// e.g. 100 MHz system, 1 MHz SPI → CLK_DIV = 50
// e.g. 100 MHz system, 25 MHz SPI → CLK_DIV = 2
module spi_master #(
parameter CLK_DIV = 4 // half-period divider (25 MHz from 100 MHz)
)(
input wire clk,
input wire rst,
input wire start,
input wire [7:0] mosi_data,
input wire miso,
output reg sclk,
output reg cs_n,
output reg mosi,
output reg busy,
output reg done,
output reg [7:0] miso_data
);
// Internal registers
reg [7:0] tx_shift; // shift register for TX (MSB first)
reg [7:0] rx_shift; // shift register for RX
reg [2:0] bit_cnt; // bit counter 7..0
reg [7:0] clk_cnt; // clock divider counter
reg clk_edge; // strobe on each SPI clock edge
// State machine
localparam IDLE = 2'd0,
ASSERT = 2'd1, // assert CS, set up first bit
XFER = 2'd2, // clock out/in bits
DONE = 2'd3;
reg [1:0] state;
// SPI clock generation — toggle sclk every CLK_DIV system clocks
always @(posedge clk) begin
if (rst || state == IDLE) begin
clk_cnt <= 0;
sclk <= 0;
clk_edge <= 0;
end else begin
clk_edge <= 0;
if (clk_cnt == CLK_DIV - 1) begin
clk_cnt <= 0;
sclk <= ~sclk;
clk_edge <= 1;
end else begin
clk_cnt <= clk_cnt + 1;
end
end
end
// Main FSM
always @(posedge clk) begin
if (rst) begin
state <= IDLE;
cs_n <= 1;
mosi <= 0;
busy <= 0;
done <= 0;
bit_cnt <= 7;
tx_shift <= 0;
rx_shift <= 0;
miso_data <= 0;
end else begin
done <= 0;
case (state)
// ---- Idle: wait for start ----
IDLE: begin
cs_n <= 1;
mosi <= 0;
busy <= 0;
bit_cnt <= 7;
if (start) begin
tx_shift <= mosi_data;
state <= ASSERT;
end
end
// ---- Assert CS, pre-load first MOSI bit ----
ASSERT: begin
cs_n <= 0;
busy <= 1;
mosi <= mosi_data[7]; // MSB first (pre-load before first edge)
tx_shift <= {mosi_data[6:0], 1'b0};
state <= XFER;
end
// ---- Transfer: shift out MOSI on falling edge, sample MISO on rising ----
XFER: begin
if (clk_edge) begin
if (sclk == 1'b0) begin
// Just went low (falling edge): shift out next bit
mosi <= tx_shift[7];
tx_shift <= {tx_shift[6:0], 1'b0};
end else begin
// Just went high (rising edge): sample MISO
rx_shift <= {rx_shift[6:0], miso};
if (bit_cnt == 0) begin
state <= DONE;
end else begin
bit_cnt <= bit_cnt - 1;
end
end
end
end
// ---- Done: deassert CS, present received data ----
DONE: begin
cs_n <= 1;
mosi <= 0;
busy <= 0;
done <= 1;
miso_data <= rx_shift;
state <= IDLE;
end
endcase
end
end
endmodule
The testbench connects MOSI directly to MISO (loopback), so every byte transmitted should be received unchanged. This verifies the complete shift-in and shift-out path.
// tb_spi_master.v — loopback testbench (MOSI → MISO echo)
`timescale 1ns/1ps
module tb_spi_master;
parameter CLK_DIV = 4;
reg clk = 0;
reg rst = 1;
reg start = 0;
reg [7:0] mosi_data = 0;
wire sclk, cs_n, mosi, busy, done;
wire [7:0] miso_data;
// Loopback: connect MOSI to MISO
spi_master #(.CLK_DIV(CLK_DIV)) dut (
.clk(clk), .rst(rst), .start(start),
.mosi_data(mosi_data), .miso(mosi), // loopback
.sclk(sclk), .cs_n(cs_n), .mosi(mosi),
.busy(busy), .done(done), .miso_data(miso_data)
);
always #5 clk = ~clk;
integer pass_cnt = 0, fail_cnt = 0;
task send_and_check;
input [7:0] data;
begin
@(posedge clk);
mosi_data <= data;
start <= 1;
@(posedge clk);
start <= 0;
// Wait for done
@(posedge done);
@(posedge clk);
if (miso_data === data) begin
$display("PASS: sent 0x%02X, received 0x%02X", data, miso_data);
pass_cnt = pass_cnt + 1;
end else begin
$display("FAIL: sent 0x%02X, received 0x%02X", data, miso_data);
fail_cnt = fail_cnt + 1;
end
repeat(4) @(posedge clk);
end
endtask
initial begin
$dumpfile("tb_spi_master.vcd");
$dumpvars(0, tb_spi_master);
repeat(4) @(posedge clk);
rst = 0;
repeat(2) @(posedge clk);
send_and_check(8'hA5);
send_and_check(8'h3C);
send_and_check(8'h00);
send_and_check(8'hFF);
send_and_check(8'h55);
if (fail_cnt == 0)
$display("\nALL TESTS PASSED (%0d/%0d)", pass_cnt, pass_cnt+fail_cnt);
else
$display("\nFAILED: %0d passed, %0d failed", pass_cnt, fail_cnt);
$finish;
end
initial #100000 begin $display("TIMEOUT"); $finish; end
endmodule
PASS: sent 0xA5, received 0xA5 PASS: sent 0x3C, received 0x3C PASS: sent 0x00, received 0x00 PASS: sent 0xFF, received 0xFF PASS: sent 0x55, received 0x55 ALL TESTS PASSED (5/5)
CPOL=0 (clock idles low) and CPHA=0 (data captured on rising edge, shifted on falling edge). This is the most common SPI configuration used by ADCs, flash memories, and sensor chips.
CPOL (Clock Polarity) sets idle clock level: 0=idle low, 1=idle high. CPHA (Clock Phase) sets sampling edge: 0=first edge, 1=second edge. The four SPI modes are: 0(0,0), 1(0,1), 2(1,0), 3(1,1). Always check the device datasheet.
Use a clock divider counter. CLK_DIV = system_clk / (2 × spi_clk). For 25 MHz SPI from 100 MHz: CLK_DIV=2. For 1 MHz SPI: CLK_DIV=50. Make it a parameter for easy reuse.