Synchronous ROM, asynchronous ROM, $readmemh file init, case-based ROM, and LUT ROM for FPGA — with interactive address explorer.
A ROM (Read-Only Memory) stores fixed data that is set at design time. The memory contents do not change during normal operation — only address-to-data lookup is performed. Common uses include:
Output is purely combinational — data appears immediately when address changes, with no clock required. Synthesis tools map small async ROMs to LUT fabric. No clock edge means no pipeline register.
module rom_async_array #(
parameter DATA_W = 8,
parameter ADDR_W = 4 // 16 locations
)(
input wire [ADDR_W-1:0] addr,
output wire [DATA_W-1:0] dout
);
// Packed array init — synthesizes to LUT logic
reg [DATA_W-1:0] mem [0:(2**ADDR_W)-1];
initial begin
mem[0] = 8'h00; mem[1] = 8'h11;
mem[2] = 8'h22; mem[3] = 8'h33;
mem[4] = 8'h44; mem[5] = 8'h55;
mem[6] = 8'h66; mem[7] = 8'h77;
mem[8] = 8'h88; mem[9] = 8'h99;
mem[10] = 8'hAA; mem[11] = 8'hBB;
mem[12] = 8'hCC; mem[13] = 8'hDD;
mem[14] = 8'hEE; mem[15] = 8'hFF;
end
// Combinational read — no clock
assign dout = mem[addr];
endmodule
module rom_case #(
parameter DATA_W = 8,
parameter ADDR_W = 4
)(
input wire [ADDR_W-1:0] addr,
output reg [DATA_W-1:0] dout
);
always @(*) begin
case (addr)
4'h0: dout = 8'h00;
4'h1: dout = 8'h11;
4'h2: dout = 8'h22;
4'h3: dout = 8'h33;
4'h4: dout = 8'h44;
4'h5: dout = 8'h55;
4'h6: dout = 8'h66;
4'h7: dout = 8'h77;
4'h8: dout = 8'h88;
4'h9: dout = 8'h99;
4'hA: dout = 8'hAA;
4'hB: dout = 8'hBB;
4'hC: dout = 8'hCC;
4'hD: dout = 8'hDD;
4'hE: dout = 8'hEE;
4'hF: dout = 8'hFF;
default: dout = 8'h00;
endcase
end
endmodule
// Function-style ROM — callable from any expression
module rom_func #(parameter ADDR_W = 4)(
input wire [ADDR_W-1:0] addr,
output wire [7:0] dout
);
function automatic [7:0] lookup;
input [ADDR_W-1:0] a;
begin
case (a)
4'h0: lookup = 8'h00;
4'h1: lookup = 8'h11;
// ... fill all addresses
default: lookup = 8'hFF;
endcase
end
endfunction
assign dout = lookup(addr);
endmodule
Output is registered — data appears one clock cycle after the address is presented. Preferred in pipelined designs for timing closure. On Xilinx, adding the output register enables BRAM-style ROM inference.
module rom_sync #(
parameter DATA_W = 8,
parameter ADDR_W = 4
)(
input wire clk,
input wire [ADDR_W-1:0] addr,
output reg [DATA_W-1:0] dout
);
reg [DATA_W-1:0] mem [0:(2**ADDR_W)-1];
initial begin
mem[0] = 8'h00; mem[1] = 8'h11;
mem[2] = 8'h22; mem[3] = 8'h33;
// ... initialize all entries
mem[14] = 8'hEE; mem[15] = 8'hFF;
end
always @(posedge clk) begin
dout <= mem[addr]; // registered — 1 cycle latency
end
endmodule
// Synthesizes to Xilinx BRAM in ROM mode
// Requirements: (* rom_style = "block" *), sync read, no write port
module rom_bram #(
parameter DATA_W = 8,
parameter ADDR_W = 10 // 1K entries
)(
input wire clk,
input wire [ADDR_W-1:0] addr,
output reg [DATA_W-1:0] dout
);
(* rom_style = "block" *)
reg [DATA_W-1:0] mem [0:(2**ADDR_W)-1];
initial $readmemh("rom_init.hex", mem); // load from hex file
always @(posedge clk) begin
dout <= mem[addr];
end
endmodule
// 2-stage pipeline: register address then register data
// Use when ROM data path is on critical path
module rom_pipelined #(
parameter DATA_W = 8,
parameter ADDR_W = 4
)(
input wire clk,
input wire [ADDR_W-1:0] addr,
output reg [DATA_W-1:0] dout // valid 2 cycles after addr
);
reg [DATA_W-1:0] mem [0:(2**ADDR_W)-1];
reg [ADDR_W-1:0] addr_r;
initial $readmemh("rom_init.hex", mem);
always @(posedge clk) begin
addr_r <= addr; // stage 1: register address
dout <= mem[addr_r]; // stage 2: register data
end
endmodule
$readmemh loads a memory array from an external hex file at simulation time (and synthesis time for supported tools like Vivado). This separates data from RTL — changing the ROM contents requires only a new hex file, not an RTL change.
$readmemh inside initial blocks for BRAM initialization. The hex file must be available at synthesis time. Use $readmemb for binary files.
// File: rom_init.hex
// One value per line, no 0x prefix, MSB first
// @address jumps to a specific location
00
11
22
33
@08 // skip to address 8
AA
BB
CC
DD
module rom_readmem #(
parameter DATA_W = 8,
parameter ADDR_W = 8, // 256 entries
parameter HEX_FILE = "rom_init.hex"
)(
input wire clk,
input wire [ADDR_W-1:0] addr,
output reg [DATA_W-1:0] dout
);
reg [DATA_W-1:0] mem [0:(2**ADDR_W)-1];
// $readmemh: load hex file into memory
initial $readmemh(HEX_FILE, mem);
// Load only addresses 0 to 15:
// initial $readmemh(HEX_FILE, mem, 0, 15);
// $readmemb: binary file (0s and 1s per bit)
// initial $readmemb("rom_init.bin", mem);
always @(posedge clk) begin
dout <= mem[addr];
end
endmodule
A 256-entry, 8-bit quantized quarter-wave sine table. The full 360° wave is reconstructed by mirroring the quarter using address MSBs.
// 10-bit phase input → 8-bit unsigned sine output
// addr[9:8] = quadrant (0–3), addr[7:0] = quarter-wave index
module sin_lut (
input wire clk,
input wire [9:0] phase, // 0–1023 = 0–360°
output reg [7:0] sine // 0–255, unsigned (128 = zero crossing)
);
// Quarter-wave table: sin(0°)→sin(90°), 64 entries, 8-bit
reg [7:0] qtable [0:63];
initial begin
// Values = round(127.5 * sin(i/64 * pi/2) + 128) for i=0..63
qtable[0] = 8'd128; qtable[1] = 8'd140; qtable[2] = 8'd152;
qtable[3] = 8'd165; qtable[4] = 8'd177; qtable[5] = 8'd188;
qtable[6] = 8'd200; qtable[7] = 8'd211; qtable[8] = 8'd221;
qtable[9] = 8'd231; qtable[10] = 8'd240; qtable[11] = 8'd248;
qtable[12] = 8'd253; qtable[13] = 8'd254; qtable[14] = 8'd255;
qtable[15] = 8'd255;
// ... (fill remaining 48 entries from offline script)
end
wire [1:0] quadrant = phase[9:8];
wire [5:0] idx = phase[7:2]; // 6-bit index into 64-entry table
wire [7:0] raw;
reg [7:0] raw_r;
// Synchronous read
always @(posedge clk) raw_r <= qtable[idx];
// Mirror for quadrants 1–3
always @(posedge clk) begin
case (quadrant)
2'b00: sine <= raw_r; // Q1: 0–90° ascending
2'b01: sine <= raw_r; // Q2: 90–180° (mirrored index — handled externally)
2'b10: sine <= ~raw_r + 8'd1; // Q3: 180–270° inverted
2'b11: sine <= ~raw_r + 8'd1; // Q4: 270–360°
endcase
end
endmodule
import math
with open("sin_lut.hex", "w") as f:
for i in range(256):
angle = 2 * math.pi * i / 256
val = int(round(127.5 * math.sin(angle) + 127.5))
val = max(0, min(255, val))
f.write(f"{val:02X}\n")
On FPGAs, ROM can live in LUT fabric (distributed RAM) or in dedicated BRAM blocks. The choice depends on size, latency requirements, and available resources.
| Property | LUT ROM (distributed) | BRAM ROM |
|---|---|---|
| Capacity per resource | 64 bits per 6-LUT | 32 Kbits per RAMB36 |
| Suitable size | ≤256 entries (≤2 Kbits) | 256 – millions of entries |
| Read latency | 0 cycles (async) or 1 cycle (sync) | 1–2 cycles (requires output register) |
| Clock required | Optional (async possible) | Required (synchronous read) |
| Synthesis attribute | (* rom_style = "distributed" *) |
(* rom_style = "block" *) |
| FPGA resource cost | LUTs (shared with logic) | Dedicated BRAMs (no LUT usage) |
| Typical use | Small coefficients, decode tables | Boot ROM, large LUT tables, microcode |
// Force LUT (distributed) ROM
(* rom_style = "distributed" *)
reg [7:0] lut_rom [0:15];
// Force BRAM ROM
(* rom_style = "block" *)
reg [7:0] bram_rom [0:1023];
// Let the tool decide (default)
// No attribute — synthesizer chooses based on size heuristic
A ROM stores fixed data set at design time — no write port exists in hardware. In RTL, ROMs are memory arrays initialized with $readmemh or initial blocks and read-only accessed. FPGAs implement them in LUTs (small) or BRAM (large).
Async ROM: combinational, output changes immediately with address — no clock, zero-cycle latency. Sync ROM: output registered through a flip-flop, available one cycle after address — required for BRAM synthesis and timing closure in pipelined designs.
$readmemh("file.hex", mem) reads a hex text file into a Verilog memory array. Each line is one hex value. @addr markers jump to a specific location. Vivado supports it for BRAM init at synthesis time. The hex file must exist at the synthesis run directory.
LUT ROM uses FPGA lookup table fabric — fast, zero/one-cycle, but consumes LUTs needed for logic. Best for ≤256 entries. BRAM ROM uses dedicated block RAM primitives (32–36 Kbits each), requires synchronous read, frees up LUTs. Best for larger tables.
A case-based ROM maps each address to hardcoded data in a case statement inside a combinational always @(*) block. Readable for small tables; synthesis tools optimize it to LUT logic. Equivalent to an array init but explicit per-address.