Architecture Overview
Our CPU is an accumulator-based 8-bit processor with a 4-bit address space (16 bytes of RAM). Instructions are 8 bits wide: the top 4 bits are the opcode, the bottom 4 bits are the operand (address or immediate value). Everything communicates over a shared 8-bit internal data bus.
The control unit is the brain — it reads the opcode from IR and asserts the right control signals (load/enable for each register, ALU op, RAM write) on each clock edge. The 8-bit data bus is the highway — every module connects to it, but only one drives it at a time.
Instruction Set Architecture (ISA)
Every instruction is 8 bits: [7:4] opcode, [3:0] operand. The 4-bit operand is either a memory address (for LDA/STA/ADD/SUB etc.) or an immediate value (for LDI).
Interactive CPU Simulator — Step Every T-State
Select a program, then click Step to advance one clock cycle at a time. Watch every register update live. The highlighted row in memory is the current PC.
Fetch-Decode-Execute: T-State Breakdown
The control unit is a Moore FSM that cycles through T-states on each clock edge. T1–T3 are universal for all instructions (the fetch phase). T4 onwards depend on the decoded opcode.
| T-State | Phase | Operation | Control signals asserted |
|---|---|---|---|
| T1 | Fetch | MAR ← PC | PC_out, MAR_in |
| T2 | Fetch | MDR ← RAM[MAR]; PC++ | RAM_out, MDR_in, PC_inc |
| T3 | Fetch | IR ← MDR | MDR_out, IR_in |
| T4 | Execute | Decode: MAR ← IR[3:0] (for mem instrs) | IR_out, MAR_in |
| T5 | Execute | MDR ← RAM[MAR]; A ← ALU(A, MDR) | RAM_out, MDR_in, ALU_op, A_in, flags_in |
| T4 | Execute | A ← IR[3:0] (LDI) | IR_out, A_in, flags_in |
| T4 | Execute | PC ← IR[3:0] (JMP) | IR_out, PC_in |
| T4 | Execute | OUT ← A (OUT) | A_out, OUT_in |
| T4 | Execute | HALT (HLT) | HLT |
ALU — Arithmetic Logic Unit
The ALU is a purely combinational block. It takes two 8-bit inputs (A and B) and a 3-bit operation selector, and produces an 8-bit result plus flag bits. It never stores state — the result is captured into the A register on the clock edge that asserts A_in.
module alu8 (
input wire [7:0] a, b,
input wire [2:0] op, // operation select
output reg [7:0] result,
output wire zero, // result == 0
output wire carry, // unsigned overflow
output wire negative // result[7]
);
reg carry_r;
always_comb begin
carry_r = 0;
case (op)
3'd0: result = a; // pass A (NOP/LDA)
3'd1: {carry_r, result} = a + b; // ADD
3'd2: {carry_r, result} = a - b; // SUB (carry = borrow)
3'd3: result = a & b; // AND
3'd4: result = a | b; // OR
3'd5: result = a ^ b; // XOR
3'd6: result = ~a; // NOT
default: result = a;
endcase
end
assign zero = (result == 8'd0);
assign carry = carry_r;
assign negative = result[7];
endmoduleRegisters & Internal Bus
Each register is a D flip-flop bank with a load enable. The shared bus requires that only one source drives it per T-state — in RTL this is modeled with conditional assignments; in real gates, tristate buffers or a MUX tree at the bus driver.
// Generic 8-bit register with synchronous load
module reg8 #(parameter RESET_VAL = 8'h00) (
input wire clk, rst_n, load,
input wire [7:0] d,
output reg [7:0] q
);
always @(posedge clk or negedge rst_n)
if (!rst_n) q <= RESET_VAL;
else if (load) q <= d;
endmodule
// Program Counter — auto-increment + jump load
module pc8 (
input wire clk, rst_n,
input wire inc, // increment (fetch phase)
input wire load, // jump: load new address
input wire [7:0] d,
output reg [7:0] q
);
always @(posedge clk or negedge rst_n)
if (!rst_n) q <= 8'h00;
else if (load) q <= d;
else if (inc) q <= q + 1;
endmoduleControl Unit — The CPU's Brain
The control unit is a Moore FSM with a T-state counter (T1–T5) and an opcode decoder. On each clock edge it advances one T-state and asserts the appropriate control signals. After the last execute T-state it resets to T1 for the next instruction.
module control_unit (
input wire clk, rst_n,
input wire [3:0] opcode, // IR[7:4]
input wire flag_z, flag_c,
output reg pc_inc, pc_load,
output reg mar_load, mdr_load,
output reg ir_load,
output reg a_load, b_load,
output reg out_load,
output reg ram_wr,
output reg [2:0] alu_op,
output reg flags_load,
output reg halt
);
// Opcodes
localparam NOP=4'h0,LDA=4'h1,ADD=4'h2,SUB=4'h3,STA=4'h4,
LDI=4'h5,JMP=4'h6,JZ=4'h7,JC=4'h8,
AND=4'h9,OR=4'hA,NOT=4'hB,OUT=4'hE,HLT=4'hF;
// ALU ops
localparam ALU_PASS=3'd0,ALU_ADD=3'd1,ALU_SUB=3'd2,
ALU_AND=3'd3,ALU_OR=3'd4,ALU_NOT=3'd6;
reg [2:0] t; // T-state counter T1=0 .. T4=3
always @(posedge clk or negedge rst_n) begin
if (!rst_n) t <= 0;
else if (halt) t <= t;
else begin
// reset to T1 after last execute state
case (opcode)
NOP,LDI,JMP,JZ,JC,OUT,HLT,NOT: if (t==3) t <= 0;
default: if (t==4) t <= 0; // 2-cycle execute
endcase
if (!halt) t <= t + 1;
end
end
always_comb begin
// default: all deasserted
{pc_inc,pc_load,mar_load,mdr_load,ir_load,
a_load,b_load,out_load,ram_wr,flags_load,halt} = 0;
alu_op = ALU_PASS;
case (t)
0: begin mar_load=1; end // T1: MAR←PC
1: begin mdr_load=1; pc_inc=1; end // T2: MDR←mem, PC++
2: begin ir_load=1; end // T3: IR←MDR
3: begin // T4 execute
case (opcode)
LDA,ADD,SUB,AND,OR,STA: mar_load=1; // MAR←operand
LDI: begin a_load=1; alu_op=ALU_PASS; flags_load=1; end
JMP: pc_load=1;
JZ: if (flag_z) pc_load=1;
JC: if (flag_c) pc_load=1;
NOT: begin a_load=1; alu_op=ALU_NOT; flags_load=1; end
OUT: out_load=1;
HLT: halt=1;
default:;
endcase
end
4: begin // T5 execute (mem instrs)
case (opcode)
LDA: begin mdr_load=1; a_load=1; alu_op=ALU_PASS; flags_load=1; end
ADD: begin mdr_load=1; b_load=1; a_load=1; alu_op=ALU_ADD; flags_load=1; end
SUB: begin mdr_load=1; b_load=1; a_load=1; alu_op=ALU_SUB; flags_load=1; end
AND: begin mdr_load=1; b_load=1; a_load=1; alu_op=ALU_AND; flags_load=1; end
OR: begin mdr_load=1; b_load=1; a_load=1; alu_op=ALU_OR; flags_load=1; end
STA: ram_wr=1;
default:;
endcase
end
endcase
end
endmoduleTop-Level CPU Module
module cpu8 (
input wire clk, rst_n,
output wire [7:0] out_port // wired to 7-seg or LED
);
// Internal bus
wire [7:0] bus;
// Registers
wire [7:0] pc_q, mar_q, mdr_q, ir_q, a_q, b_q;
wire pc_inc, pc_load, mar_load, mdr_load, ir_load;
wire a_load, b_load, out_load, ram_wr, flags_load, halt_sig;
wire [2:0] alu_op;
wire flag_z, flag_c, flag_n;
wire [7:0] alu_result;
// Instantiate components
pc8 PC (.clk(clk),.rst_n(rst_n),.inc(pc_inc),.load(pc_load),.d(bus),.q(pc_q));
reg8 MAR (.clk(clk),.rst_n(rst_n),.load(mar_load),.d(bus),.q(mar_q));
reg8 MDR (.clk(clk),.rst_n(rst_n),.load(mdr_load),.d(bus),.q(mdr_q));
reg8 IR (.clk(clk),.rst_n(rst_n),.load(ir_load),.d(bus),.q(ir_q));
reg8 A (.clk(clk),.rst_n(rst_n),.load(a_load),.d(alu_result),.q(a_q));
reg8 B (.clk(clk),.rst_n(rst_n),.load(b_load),.d(bus),.q(b_q));
reg8 OUT (.clk(clk),.rst_n(rst_n),.load(out_load),.d(a_q),.q(out_port));
// ALU
alu8 ALU (.a(a_q),.b(b_q),.op(alu_op),.result(alu_result),
.zero(flag_z),.carry(flag_c),.negative(flag_n));
// RAM — 16×8 synchronous
reg [7:0] ram [0:15];
wire [7:0] ram_out = ram[mar_q[3:0]];
always @(posedge clk) if (ram_wr) ram[mar_q[3:0]] <= a_q;
// Bus drivers (one-hot — only one active per T-state)
assign bus = pc_load ? pc_q : // PC → bus (jump)
mdr_load ? ram_out: // RAM → MDR → bus
ir_load ? mdr_q : // MDR → IR (via bus)
mar_load ? pc_q : // PC or IR[3:0] → MAR
pc_inc ? pc_q : // (pc_inc internal to PC)
8'hZZ; // no driver (tristate)
// Note: in real CMOS, use mux not tristate for reliability
// Control Unit
control_unit CU (
.clk(clk),.rst_n(rst_n),
.opcode(ir_q[7:4]),
.flag_z(flag_z),.flag_c(flag_c),
.pc_inc(pc_inc),.pc_load(pc_load),
.mar_load(mar_load),.mdr_load(mdr_load),
.ir_load(ir_load),.a_load(a_load),.b_load(b_load),
.out_load(out_load),.ram_wr(ram_wr),
.alu_op(alu_op),.flags_load(flags_load),.halt(halt_sig)
);
endmodule$readmemh in simulation, block RAM init in synthesis). Drive out_port to the board's LEDs or 7-segment display. Clock the CPU at 1–10 MHz to see the output register update.