From 75644e4920240cf8f6d1fcf3c335a651f9d1313a Mon Sep 17 00:00:00 2001 From: UnbalancedCat Date: Thu, 22 Jun 2023 19:36:05 +0800 Subject: [PATCH] [Modified] Rewrite pipeline structure & finish exp11 test --- lacpu/rtl/cpu/alu.v | 38 +- lacpu/rtl/cpu/bru.v | 56 +++ lacpu/rtl/cpu/csr.v | 221 +++++++++ lacpu/rtl/cpu/div.v | 129 ++---- lacpu/rtl/cpu/exe_stage.v | 356 +++++++------- lacpu/rtl/cpu/forward.v | 63 --- lacpu/rtl/cpu/id_stage.v | 464 +++++++++---------- lacpu/rtl/cpu/if_stage.v | 101 ++-- lacpu/rtl/cpu/inst_decoder.v | 446 ++++++++++++++++++ lacpu/rtl/cpu/lsu.v | 56 +++ lacpu/rtl/cpu/mem_stage.v | 234 +++++++--- lacpu/rtl/cpu/mul.v | 47 ++ lacpu/rtl/cpu/mul_div_lock.v | 66 +++ lacpu/rtl/cpu/mul_div_top.v | 87 ++++ lacpu/rtl/cpu/mycpu.v | 148 ++++++ lacpu/rtl/cpu/mycpu.vh | 17 - lacpu/rtl/cpu/mycpu_top.v | 196 -------- lacpu/rtl/cpu/pip_ctrl.v | 44 ++ lacpu/rtl/cpu/regfile.v | 39 +- lacpu/rtl/cpu/tools.v | 56 ++- lacpu/rtl/cpu/wb_stage.v | 104 ++--- lacpu/rtl/soc_lite_top.v | 2 +- lacpu/run_vivado/la32r/la32r.xpr | 74 ++- lacpu/run_vivado/la32r/sim/cpu_tb_behav.wcfg | 109 ++--- 24 files changed, 2058 insertions(+), 1095 deletions(-) create mode 100644 lacpu/rtl/cpu/bru.v create mode 100644 lacpu/rtl/cpu/csr.v delete mode 100644 lacpu/rtl/cpu/forward.v create mode 100644 lacpu/rtl/cpu/inst_decoder.v create mode 100644 lacpu/rtl/cpu/lsu.v create mode 100644 lacpu/rtl/cpu/mul.v create mode 100644 lacpu/rtl/cpu/mul_div_lock.v create mode 100644 lacpu/rtl/cpu/mul_div_top.v create mode 100644 lacpu/rtl/cpu/mycpu.v delete mode 100644 lacpu/rtl/cpu/mycpu.vh delete mode 100644 lacpu/rtl/cpu/mycpu_top.v create mode 100644 lacpu/rtl/cpu/pip_ctrl.v diff --git a/lacpu/rtl/cpu/alu.v b/lacpu/rtl/cpu/alu.v index 1bac5ae..63724a3 100755 --- a/lacpu/rtl/cpu/alu.v +++ b/lacpu/rtl/cpu/alu.v @@ -1,5 +1,5 @@ module alu( - input [14:0] alu_op , + input [11:0] alu_op , input [31:0] alu_src1 , input [31:0] alu_src2 , output [31:0] alu_result @@ -17,9 +17,6 @@ module alu( wire op_sll; wire op_srl; wire op_sra; - wire op_mul; - wire op_mulh; - wire op_mulhu; assign op_add = alu_op[ 0]; @@ -34,9 +31,7 @@ module alu( assign op_srl = alu_op[ 9]; assign op_sra = alu_op[10]; assign op_lui = alu_op[11]; - assign op_mul = alu_op[12]; - assign op_mulh = alu_op[13]; - assign op_mulhu = alu_op[14]; + wire [31:0] add_sub_result; wire [31:0] slt_result; @@ -92,25 +87,16 @@ module alu( assign sr_result = sr64_result[31:0]; - // MUL MULH result - assign mul64_result = $signed(alu_src1) * $signed(alu_src2); - assign mulu64_result = alu_src1 * alu_src2; - - assign mul_result = op_mul ? mul64_result[31: 0] : - op_mulh ? mul64_result[63:32] : - /*op_mulhu*/ mulu64_result[63:32]; - // final result mux - assign alu_result = ({32{op_add|op_sub }} & add_sub_result) - | ({32{op_slt }} & slt_result) - | ({32{op_sltu }} & sltu_result) - | ({32{op_and }} & and_result) - | ({32{op_nor }} & nor_result) - | ({32{op_or }} & or_result) - | ({32{op_xor }} & xor_result) - | ({32{op_lui }} & lui_result) - | ({32{op_sll }} & sll_result) - | ({32{op_srl|op_sra }} & sr_result) - | ({32{op_mul|op_mulh|op_mulhu}} & mul_result); + assign alu_result = ({32{op_add|op_sub}} & add_sub_result) + | ({32{op_slt }} & slt_result) + | ({32{op_sltu }} & sltu_result) + | ({32{op_and }} & and_result) + | ({32{op_nor }} & nor_result) + | ({32{op_or }} & or_result) + | ({32{op_xor }} & xor_result) + | ({32{op_lui }} & lui_result) + | ({32{op_sll }} & sll_result) + | ({32{op_srl|op_sra}} & sr_result); endmodule \ No newline at end of file diff --git a/lacpu/rtl/cpu/bru.v b/lacpu/rtl/cpu/bru.v new file mode 100644 index 0000000..7733e5a --- /dev/null +++ b/lacpu/rtl/cpu/bru.v @@ -0,0 +1,56 @@ +module bru( + input [31:0] pc, + input [31:0] rj_value, + input [31:0] rkd_value, + input [31:0] imm, + + input [ 8:0] branch_op, + + output br_taken, + output [31:0] br_target +); + + wire inst_jirl; + wire inst_b; + wire inst_bl; + wire inst_beq; + wire inst_bne; + wire inst_blt; + wire inst_bge; + wire inst_bltu; + wire inst_bgeu; + + wire rj_eq_rd; + wire rj_lt_rd; + wire rj_ltu_rd; + + assign {inst_beq, + inst_bne, + inst_blt, + inst_bge, + inst_bltu, + inst_bgeu, + inst_jirl, + inst_bl, + inst_b + } = branch_op; + + assign rj_eq_rd = (rj_value == rkd_value); + assign rj_lt_rd = (rj_value < rkd_value); + assign rj_ltu_rd = (rj_value[31] && ~rkd_value[31]) ? 1'b1 : + (~rj_value[31] && rkd_value[31]) ? 1'b0 : + rj_lt_rd; + assign br_taken = ( inst_beq && rj_eq_rd + || inst_bne && !rj_eq_rd + || inst_blt && rj_lt_rd + || inst_bge && !rj_lt_rd + || inst_bltu && rj_ltu_rd + || inst_bgeu && !rj_ltu_rd + || inst_jirl + || inst_bl + || inst_b + ); + + assign br_target = ({32{inst_beq|inst_bne|inst_bl|inst_b|inst_blt|inst_bge|inst_bltu|inst_bgeu}} & (pc + imm)) + | ({32{inst_jirl}} & (rj_value + imm)); +endmodule \ No newline at end of file diff --git a/lacpu/rtl/cpu/csr.v b/lacpu/rtl/cpu/csr.v new file mode 100644 index 0000000..3f1aeca --- /dev/null +++ b/lacpu/rtl/cpu/csr.v @@ -0,0 +1,221 @@ +`define CRMD_ADDR 14'h0 +`define PRMD_ADDR 14'h1 +`define EUEN_ADDR 14'h2 +`define ECFG_ADDR 14'h4 +`define ESTAT_ADDR 14'h5 +`define ERA_ADDR 14'h6 +`define BADV_ADDR 14'h7 +`define EENTRY_ADDR 14'hc +`define TLBIDX_ADDR 14'h10 +`define TLBEHI_ADDR 14'h11 +`define TLBELO0_ADDR 14'h12 +`define TLBELO1_ADDR 14'h13 +`define ASID_ADDR 14'h18 +`define PGDL_ADDR 14'h19 +`define PGDH_ADDR 14'h1a +`define PGD_ADDR 14'h1b +`define CPUID_ADDR 14'h20 +`define SAVE0_ADDR 14'h30 +`define SAVE1_ADDR 14'h31 +`define SAVE2_ADDR 14'h32 +`define SAVE3_ADDR 14'h33 +`define TID_ADDR 14'h40 +`define TCFG_ADDR 14'h41 +`define TVAL_ADDR 14'h42 +`define TICLR_ADDR 14'h44 +`define LLBCTL_ADDR 14'h60 +`define TLBRENTRY_ADDR 14'h88 +`define CTAG_ADDR 14'h98 +`define DMW0_ADDR 14'h180 +`define DMW1_ADDR 14'h181 + +module csr( + input clk, + input reset, + input stall, + + input [31:0] pc, + + input csr_we, + input [ 3:0] csr_op, + input [13:0] csr_addr, + input csr_wdata_sel, + input [31:0] csr_wdata, + output [31:0] csr_rdata, + + output except_en, + output [31:0] new_pc +); + reg [31:0] crmd; // 当前模式信息 + reg [31:0] prmd; // 例外前模式信息 + reg [31:0] euen; // 扩展部件是能 + reg [31:0] ecfg; // 例外配置 + reg [31:0] estat; // 例外状态 + reg [31:0] era; // 例外返回地址 + reg [31:0] badv; // 出错虚地址 + reg [31:0] eentry; // 例外入口地址 + reg [31:0] tlbidx; // TLB 索引 + reg [31:0] tlbehi; // TLB 表项最高位 + reg [31:0] tlbelo0; // TLB 表项低位 0 + reg [31:0] tlbelo1; // TLB 表项低位 1 + reg [31:0] asid; // 地址空间标识符 + reg [31:0] pgdl; // 低半地址空间全局目录基址 + reg [31:0] pgdh; // 高半地址空间全局目录基址 + reg [31:0] pgd; // 全局目录基址 + reg [31:0] cpuid; // 处理器编号 + reg [31:0] save0; // 数据保存0 + reg [31:0] save1; // 数据保存1 + reg [31:0] save2; // 数据保存2 + reg [31:0] save3; // 数据保存3 + reg [31:0] tid; // 定时器编号 + reg [31:0] tcfg; // 定时器配置 + reg [31:0] tval; // 定时器值 + reg [31:0] ticlr; // 定时中断清除 + reg [31:0] llbctl; // LLbit 控制 + reg [31:0] tlbrentry; // TLB 重填例外入口地址 + reg [31:0] ctag; // 高速缓存标签 + reg [31:0] dmw0; // 直接映射配置窗口0 + reg [31:0] dmw1; // 直接映射配置窗口1 + + reg [31:0] csr_rdata_r; + + wire inst_sc_w; + wire inst_csrrd; + wire inst_csrwr; + wire inst_csrxchg; + wire inst_rdcntid_w; + wire inst_rdcntvl_w; + wire inst_rdcntvh_w; + + wire [31:0] csr_wdata_temp; + + + assign csr_rdata = csr_rdata_r; + + always @(*) begin + if(|csr_addr) begin + case(csr_addr) + `CRMD_ADDR : csr_rdata_r <= crmd; + `PRMD_ADDR : csr_rdata_r <= prmd; + `EUEN_ADDR : csr_rdata_r <= euen; + `ECFG_ADDR : csr_rdata_r <= ecfg; + `ESTAT_ADDR : csr_rdata_r <= estat; + `ERA_ADDR : csr_rdata_r <= era; + `BADV_ADDR : csr_rdata_r <= badv; + `EENTRY_ADDR : csr_rdata_r <= eentry; + `TLBIDX_ADDR : csr_rdata_r <= tlbidx; + `TLBEHI_ADDR : csr_rdata_r <= tlbehi; + `TLBELO0_ADDR : csr_rdata_r <= tlbelo0; + `TLBELO1_ADDR : csr_rdata_r <= tlbelo1; + `ASID_ADDR : csr_rdata_r <= asid; + `PGDL_ADDR : csr_rdata_r <= pgdl; + `PGDH_ADDR : csr_rdata_r <= pgdh; + `PGD_ADDR : csr_rdata_r <= pgd; + `CPUID_ADDR : csr_rdata_r <= cpuid; + `SAVE0_ADDR : csr_rdata_r <= save0; + `SAVE1_ADDR : csr_rdata_r <= save1; + `SAVE2_ADDR : csr_rdata_r <= save2; + `SAVE3_ADDR : csr_rdata_r <= save3; + `TID_ADDR : csr_rdata_r <= tid; + `TCFG_ADDR : csr_rdata_r <= tcfg; + `TVAL_ADDR : csr_rdata_r <= tval; + `TICLR_ADDR : csr_rdata_r <= ticlr; + `LLBCTL_ADDR : csr_rdata_r <= llbctl; + `TLBRENTRY_ADDR : csr_rdata_r <= tlbrentry; + `CTAG_ADDR : csr_rdata_r <= ctag; + `DMW0_ADDR : csr_rdata_r <= dmw0; + `DMW1_ADDR : csr_rdata_r <= dmw1; + default : csr_rdata_r <= 32'b0; + endcase + end + else begin + csr_rdata_r <= 32'b0; + end + end + + assign {inst_csrrd, + inst_csrwr, + inst_csrxchg, + inst_rdcntid_w, + inst_rdcntvh_w, + inst_rdcntvl_w, + inst_sc_w + } = csr_op; + + assign csr_wdata_temp = csr_wdata_sel ? csr_rdata_r : csr_wdata; + + always @(posedge clk) begin + if(reset) begin + crmd <= 0; + prmd <= 0; + euen <= 0; + ecfg <= 0; + estat <= 0; + era <= 0; + badv <= 0; + eentry <= 0; + tlbidx <= 0; + tlbehi <= 0; + tlbelo0 <= 0; + tlbelo1 <= 0; + asid <= 0; + pgdl <= 0; + pgdh <= 0; + pgd <= 0; + cpuid <= 0; + save0 <= 0; + save1 <= 0; + save2 <= 0; + save3 <= 0; + tid <= 0; + tcfg <= 0; + tval <= 0; + ticlr <= 0; + llbctl <= 0; + tlbrentry <= 0; + ctag <= 0; + dmw0 <= 0; + dmw1 <= 0; + end + else if (except_en) begin + // ? + end + else if (csr_we) begin + case (csr_addr) + `CRMD_ADDR : crmd <= csr_wdata_temp; + `PRMD_ADDR : prmd <= csr_wdata_temp; + `EUEN_ADDR : euen <= csr_wdata_temp; + `ECFG_ADDR : ecfg <= csr_wdata_temp; + `ESTAT_ADDR : estat <= csr_wdata_temp; + `ERA_ADDR : era <= csr_wdata_temp; + `BADV_ADDR : badv <= csr_wdata_temp; + `EENTRY_ADDR : eentry <= csr_wdata_temp; + `TLBIDX_ADDR : tlbidx <= csr_wdata_temp; + `TLBEHI_ADDR : tlbehi <= csr_wdata_temp; + `TLBELO0_ADDR : tlbelo0 <= csr_wdata_temp; + `TLBELO1_ADDR : tlbelo1 <= csr_wdata_temp; + `ASID_ADDR : asid <= csr_wdata_temp; + `PGDL_ADDR : pgdl <= csr_wdata_temp; + `PGDH_ADDR : pgdh <= csr_wdata_temp; + `PGD_ADDR : pgd <= csr_wdata_temp; + `CPUID_ADDR : cpuid <= csr_wdata_temp; + `SAVE0_ADDR : save0 <= csr_wdata_temp; + `SAVE1_ADDR : save1 <= csr_wdata_temp; + `SAVE2_ADDR : save2 <= csr_wdata_temp; + `SAVE3_ADDR : save3 <= csr_wdata_temp; + `TID_ADDR : tid <= csr_wdata_temp; + `TCFG_ADDR : tcfg <= csr_wdata_temp; + `TVAL_ADDR : tval <= csr_wdata_temp; + `TICLR_ADDR : ticlr <= csr_wdata_temp; + `LLBCTL_ADDR : llbctl <= csr_wdata_temp; + `TLBRENTRY_ADDR : tlbrentry <= csr_wdata_temp; + `CTAG_ADDR : ctag <= csr_wdata_temp; + `DMW0_ADDR : dmw0 <= csr_wdata_temp; + `DMW1_ADDR : dmw1 <= csr_wdata_temp; + endcase + end + end + + assign except_en = 1'b0; // TODO! + assign new_pc = era; // TODO! +endmodule \ No newline at end of file diff --git a/lacpu/rtl/cpu/div.v b/lacpu/rtl/cpu/div.v index 8bd608c..b9024b9 100644 --- a/lacpu/rtl/cpu/div.v +++ b/lacpu/rtl/cpu/div.v @@ -1,93 +1,52 @@ -//x/y //执行需要34个周期 module div( - input div_clk, reset, - input div, - input div_signed, - input [31:0] x, y, - output [31:0] s, r, - output complete - ); + input clk, + input reset, + output stallreq, + input in_valid, + output out_valid, -reg [32:0] UnsignS; -reg [32:0] UnsignR; -reg [32:0] tmp_r; -reg [7:0] count; -wire [32:0] tmp_d; -wire [32:0] result_r; -wire [32:0] UnsignX, UnsignY; + input [31:0] a, + input [31:0] b, + + output reg [31:0] quotient, //商 + output reg [31:0] remainder //余数 +); -reg div_signed_buffer; -reg x_31_buffer; -reg y_31_buffer; -wire real_div_signed; -wire real_x_31; -wire real_y_31; -wire complete_delay; -wire real_complete; + reg [ 5:0] cnt; + wire [31:0] sub_result; + wire carry; + wire [31:0] temp; -assign complete_delay = (count == 8'hf0); -assign real_complete = complete_delay || complete; - -always @(posedge div_clk) begin - if (reset) begin - div_signed_buffer <= 1'b0; - x_31_buffer <= 1'b0; - y_31_buffer <= 1'b0; - end - else if (div) begin - div_signed_buffer <= div_signed; //when div inst go to ms, div_signed will be changed. so buffer it. - x_31_buffer <= x[31]; - y_31_buffer <= y[31]; - end -end - -assign real_div_signed = real_complete ? div_signed_buffer : div_signed; -assign real_x_31 = real_complete ? x_31_buffer : x[31]; -assign real_y_31 = real_complete ? y_31_buffer : y[31]; - -assign UnsignX = {1'b0, (real_div_signed ? (x[31] ? (~x + 1) : x) : x)}; //取绝对值并扩展至33位 -assign UnsignY = {1'b0, (real_div_signed ? (y[31] ? (~y + 1) : y) : y)}; - -always @(posedge div_clk) begin //33位除法计算 - if (reset || ~div || complete_delay) begin - count <= 8'd32; //计算33次 - tmp_r <= 33'b0; - end - else if (~(count[7])) begin - if (tmp_d[32]) begin //tmp_d为负数 - UnsignS <= {UnsignS[31:0], 1'b0}; - tmp_r <= result_r; - end - else begin - UnsignS <= {UnsignS[31:0], 1'b1}; - tmp_r <= tmp_d; + always @ (posedge clk) begin + if (reset) begin + cnt <= 0; + end + else if (cnt != 0) begin + cnt <= cnt -1; + end + else if (in_valid) begin + cnt <= 32; end - count <= count - 8'd1; - end - else begin - UnsignR <= tmp_r; - count <= 8'hf0; //complete signal only maintain one clock end -end + assign temp = {remainder[30:0],quotient[31]}; + assign carry = temp < b ? 0 : 1; + assign sub_result = carry ? temp - b : temp; -assign complete = (count == 8'hff);//chenji - -assign result_r = {tmp_r[31:0], UnsignX[count]}; -assign tmp_d = result_r - UnsignY; - -wire [32:0] TmpS, TmpR; -assign TmpS = (real_div_signed ? ((real_x_31 == real_y_31) ? UnsignS : ~(UnsignS - 1)) : UnsignS); //去绝对值并截位 -assign TmpR = (real_div_signed ? (real_x_31 ? ~(UnsignR - 1) : UnsignR) : UnsignR); - -assign s = TmpS[31:0]; -assign r = TmpR[31:0]; - -endmodule - -//表达式的符号关系 -//x[31] y[31] s[31] r[31] -// 0 0 0 0 -// 0 1 1 0 -// 1 0 1 1 -// 1 1 0 1 + always @ (posedge clk) begin + if (reset) begin + quotient <= 0; + remainder <= 0; + end + else if (cnt != 0) begin + {remainder, quotient} <= {sub_result, quotient[30:0], carry}; + end + else if (in_valid) begin + quotient <= a; + remainder <= 0; + end + end + + assign out_valid = (cnt==0); + assign stallreq = in_valid | (~(cnt==0)); +endmodule \ No newline at end of file diff --git a/lacpu/rtl/cpu/exe_stage.v b/lacpu/rtl/cpu/exe_stage.v index f356ff9..c0a58eb 100755 --- a/lacpu/rtl/cpu/exe_stage.v +++ b/lacpu/rtl/cpu/exe_stage.v @@ -1,184 +1,226 @@ -`include "mycpu.vh" +module exe_stage +#( + parameter BR_BUS_WD = 33, + parameter DS_TO_ES_BUS_WD = 206, + parameter ES_TO_MS_BUS_WD = 175, + parameter MS_TO_ES_BUS_WD = 38, + parameter WS_TO_ES_BUS_WD = 38 +) +( + input clk, + input reset, + input flush, + input [ 5:0] stall, -module exe_stage( - input clk , - input reset , - //allowin - input ms_allowin , - output es_allowin , - //from ds - input ds_to_es_valid, - input [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus , - //to ms - output es_to_ms_valid, - output [`ES_TO_MS_BUS_WD -1:0] es_to_ms_bus , - // data sram interface - output data_sram_en , - output [ 3:0] data_sram_wen , - output [31:0] data_sram_addr , - output [31:0] data_sram_wdata, - //fw - output [`ES_TO_FW_BUS_WD -1:0] es_to_fw_bus , - input [`FW_TO_ES_BUS_WD -1:0] fw_to_es_bus , - //from ms - input [`MS_TO_ES_BUS_WD -1:0] ms_to_ds_bus , - //from ws - input [`WS_TO_ES_BUS_WD -1:0] ws_to_ds_bus , - //div_mul - output es_div_enable , - output es_div_sign , - output [31:0] es_rf_rdata1 , - output [31:0] es_rf_rdata2 , - input div_complete + output stallreq_es, + + input [DS_TO_ES_BUS_WD -1:0] ds_to_es_bus, + output [ES_TO_MS_BUS_WD -1:0] es_to_ms_bus, + input [MS_TO_ES_BUS_WD -1:0] ms_to_es_bus, + input [WS_TO_ES_BUS_WD -1:0] ws_to_es_bus, + + output [BR_BUS_WD -1:0] br_bus, + + output data_sram_en, + output [ 7:0] data_sram_we, + output [31:0] data_sram_addr, + output [31:0] data_sram_wdata ); - reg es_valid ; - wire es_ready_go ; + reg [DS_TO_ES_BUS_WD -1:0] ds_to_es_bus_r; - reg [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus_r; - wire [18:0] es_alu_op; - wire es_src1_is_pc; - wire es_src2_is_imm; - wire es_src2_is_4; - wire es_mem_to_reg; - wire es_reg_we; - wire es_mem_we; - wire [ 4:0] es_load_op; - wire [ 2:0] es_store_op; - wire [ 4:0] es_dest; - wire [31:0] es_imm; + wire [ 6:0] csr_op; + wire csr_wdata_sel; + wire [13:0] csr_addr; + wire csr_we; + wire [11:0] alu_op; + wire [ 3:0] mul_div_op; + wire mul_div_sign; + wire [ 8:0] branch_op; + wire [ 2:0] store_op; + wire [ 5:0] load_op; + wire reg_we; + wire src1_is_pc; + wire src2_is_imm; + wire src2_is_4; + wire [ 4:0] rj; + wire [ 4:0] rkd; + wire [31:0] rj_value; + wire [31:0] rkd_value; + wire [ 4:0] dest; + wire [31:0] imm; wire [31:0] es_pc; + wire [31:0] inst; - wire [31:0] ms_alu_result; - wire [31:0] ws_rf_wdata; + wire ms_reg_we; + wire [ 4:0] ms_dest; + wire [31:0] ms_result; + wire ws_reg_we; + wire [ 4:0] ws_dest; + wire [31:0] ws_result; - wire es_src1_is_es_dest; - wire es_src1_is_ms_dest; - wire es_src2_is_es_dest; - wire es_src2_is_ms_dest; - wire es_data_is_rf_wdata; + wire [31:0] src1; + wire [31:0] src2; + wire [31:0] alu_src1; + wire [31:0] alu_src2; + wire [31:0] alu_result; - assign {es_alu_op , //159:141 - es_src1_is_pc , //140:140 - es_src2_is_imm , //139:139 - es_src2_is_4 , //138:138 - es_mem_to_reg , //137:137 - es_reg_we , //136:136 - es_mem_we , //135:135 - es_load_op , //134:134 - es_store_op , //133:133 - es_dest , //132:128 - es_imm , //127:96 - es_rf_rdata1 , //95 :64 - es_rf_rdata2 , //63 :32 - es_pc //31 :0 - } = ds_to_es_bus_r; + wire br_taken; + wire [31:0] br_target; - assign {es_src1_is_es_dest , - es_src1_is_ms_dest , - es_src2_is_es_dest , - es_src2_is_ms_dest , - es_data_is_rf_wdata - } = fw_to_es_bus; + wire data_sram_en_temp; - assign ms_alu_result = ms_to_ds_bus; - assign ws_rf_wdata = ws_to_ds_bus; + wire stallreq_for_mul_div; + wire [31:0] mul_div_result; + wire [31:0] es_result; - wire [31:0] es_alu_src1 ; - wire [31:0] es_alu_src2 ; - wire [31:0] es_alu_result; - wire es_Carry ; - wire es_Sign ; - wire es_Overflow ; - wire es_Zero ; + wire [31:0] csr_wdata; + wire [63:0] csr_bus; + - wire es_inst_divw ; - wire es_inst_modw ; - wire es_inst_divwu; - wire es_inst_modwu; - wire [ 1:0] div_op; - wire div_stall; + assign {csr_op ,//205:199 + csr_wdata_sel ,//198:198 + csr_addr ,//197:184 + csr_we ,//183:183 + alu_op ,//182:171 + mul_div_op ,//170:167 + mul_div_sign ,//166:166 + branch_op ,//165:157 + store_op ,//156:154 + load_op ,//153:148 + reg_we ,//147:147 + src1_is_pc ,//146:146 + src2_is_imm ,//145:145 + src2_is_4 ,//144:144 + rj ,//143:139 + rkd ,//138:134 + rj_value ,//133:102 + rkd_value ,//101:70 + dest ,//69 :65 + imm ,//95 :64 + es_pc ,//63 :32 + inst //31 :0 + } = ds_to_es_bus_r; - assign es_to_ms_bus = {div_op , //77:76 - es_load_op , //75:71 - es_mem_to_reg , //70:70 - es_reg_we , //69:69 - es_dest , //68:64 - es_alu_result , //63:32 - es_pc //31:0 + assign {ms_reg_we, + ms_dest, + ms_result + } = ms_to_es_bus; + + assign {ws_reg_we, + ws_dest, + ws_result + } = ws_to_es_bus; + + assign es_to_ms_bus = {csr_bus ,//174:111 + load_op ,//110:105 + store_op ,//102:102 + reg_we ,//101:101 + dest ,//100:96 + es_result,//95 :64 + es_pc ,//63 :32 + inst //31 :0 }; - - assign es_to_fw_bus = {es_rf_rdata2 , - es_dest , - es_reg_we , - es_mem_we - }; - - assign es_ready_go = !(div_stall); - assign es_allowin = !es_valid || es_ready_go && ms_allowin; - assign es_to_ms_valid = es_valid && es_ready_go; - always @(posedge clk) begin - if (reset) begin - es_valid <= 1'b0; - end - else if (es_allowin) begin - es_valid <= ds_to_es_valid; - end - + + always @ (posedge clk) begin if (reset) begin ds_to_es_bus_r <= 0; end - if (ds_to_es_valid && es_allowin) begin + else if (flush) begin + ds_to_es_bus_r <= 0; + end + //nop, id stall and ex not stall + else if (stall[2]&(!stall[3])) begin + ds_to_es_bus_r <= 0; + end + //nop, id not stall and br_bus[32] + else if (!stall[2]&br_bus[32]) begin + ds_to_es_bus_r <= 0; + end + // id not stall so can go on + else if (!stall[2]) begin ds_to_es_bus_r <= ds_to_es_bus; end end - assign es_alu_src1 = es_src1_is_pc ? es_pc : - es_src1_is_es_dest ? ms_alu_result : - es_src1_is_ms_dest ? ws_rf_wdata : - es_rf_rdata1; - assign es_alu_src2 = es_src2_is_imm ? es_imm : - es_src2_is_4 ? 32'd4 : - es_src2_is_es_dest ? ms_alu_result : - es_src2_is_ms_dest ? ws_rf_wdata : - es_rf_rdata2; - - assign es_inst_divw = es_alu_op[15]; - assign es_inst_modw = es_alu_op[16]; - assign es_inst_divwu = es_alu_op[17]; - assign es_inst_modwu = es_alu_op[18]; + assign src1 = ms_reg_we & (ms_dest == rj ) & (rj != 1'b0) ? ms_result : + ws_reg_we & (ws_dest == rj ) & (rj != 1'b0) ? ws_result : + rj_value; + assign src2 = ms_reg_we & (ms_dest == rkd) & (rkd != 1'b0) ? ms_result : + ws_reg_we & (ws_dest == rkd) & (rkd != 1'b0) ? ws_result : + rkd_value; - assign div_op[0] = es_inst_divw | es_inst_divwu; - assign div_op[1] = es_inst_modw | es_inst_modwu; - - assign es_div_enable = (div_op[0] | div_op[1]) & es_valid; - - assign es_div_sign = es_inst_divw | es_inst_modw; - - assign div_stall = es_div_enable & ~div_complete; + assign alu_src1 = src1_is_pc ? es_pc : + src1; + assign alu_src2 = src2_is_4 ? 3'd4 : + src2_is_imm ? imm : + src2; alu u_alu( - .alu_op (es_alu_op[14:0]), - .alu_src1 (es_alu_src1 ), - .alu_src2 (es_alu_src2 ), - .alu_result (es_alu_result) - ); + .alu_op (alu_op ), + .alu_src1 (alu_src1 ), + .alu_src2 (alu_src2 ), + .alu_result(alu_result) + ); - assign data_sram_en = 1'b1; - assign data_sram_wen = (es_mem_we && es_valid) ? (({4{es_store_op[0]}} & ({4{es_alu_result[1:0] == 2'b00}} & 4'b0001) - | ({4{es_alu_result[1:0] == 2'b01}} & 4'b0010) - | ({4{es_alu_result[1:0] == 2'b10}} & 4'b0100) - | ({4{es_alu_result[1:0] == 2'b11}} & 4'b1000)) - | ({4{es_store_op[1]}} & ({4{es_alu_result[1:0] == 2'b01}} & 4'b0011) - | ({4{es_alu_result[1:0] == 2'b10}} & 4'b1100)) - | ({4{es_store_op[2]}} & 4'b1111 )) - : 4'b0000; - - assign data_sram_addr = es_alu_result; - assign data_sram_wdata = es_data_is_rf_wdata ? ws_rf_wdata : - es_store_op[0] ? {4{es_rf_rdata2[ 7:0]}} : - es_store_op[1] ? {2{es_rf_rdata2[15:0]}} : - es_store_op[2] ? es_rf_rdata2 : - 32'b0; + bru u_bru( + .pc (es_pc ), + .rj_value (src1 ), + .rkd_value(src2 ), + .imm (imm ), + .branch_op(branch_op), + .br_taken (br_taken ), + .br_target(br_target) + ); + + wire csr_cancel; + wire csr_cancel_reg; + + assign csr_cancel = 1'b1; + assign csr_cancel_reg = 1'b1; //TODO! + + assign br_bus = {br_taken & ~(csr_cancel|csr_cancel_reg), + br_target + }; + + lsu u_lsu( + .load_op (load_op ), + .store_op (store_op ), + .rj_value (rj_value ), + .rkd_value (rkd_value ), + .imm (imm ), + + .data_sram_en (data_sram_en_temp), + .data_sram_we (data_sram_we ), + .data_sram_addr (data_sram_addr ), + .data_sram_wdata(data_sram_wdata ) + ); + assign data_sram_en = (csr_cancel|csr_cancel_reg) ? 1'b0 : data_sram_en_temp; + + // mul_div + mul_div_top u_mul_div_top( + .clk (clk ), + .reset (reset ), + .stall (stall ), + .stallreq (stallreq_for_mul_div), + .mul_div_op (mul_div_op ), + .mul_div_sign (mul_div_sign ), + .a (rj_value ), + .b (rkd_value ), + .mul_div_result(mul_div_result ) + ); + + assign es_result = |mul_div_op ? mul_div_result : + alu_result; + + assign csr_wdata = csr_wdata_sel ? imm : src1; + assign csr_bus = {csr_we, + csr_wdata_sel, + csr_op, + csr_addr, + csr_wdata + }; + + assign stallreq_es = stallreq_for_mul_div; endmodule \ No newline at end of file diff --git a/lacpu/rtl/cpu/forward.v b/lacpu/rtl/cpu/forward.v deleted file mode 100644 index afef503..0000000 --- a/lacpu/rtl/cpu/forward.v +++ /dev/null @@ -1,63 +0,0 @@ -`include "mycpu.vh" - -module forward( - input clk , - input reset , - input [`DS_TO_FW_BUS_WD -1:0] ds_to_fw_bus, - input [`ES_TO_FW_BUS_WD -1:0] es_to_fw_bus, - input [`MS_TO_FW_BUS_WD -1:0] ms_to_fw_bus, - - output [`FW_TO_ES_BUS_WD -1:0] fw_to_es_bus -); - - reg [`DS_TO_FW_BUS_WD -1:0] ds_to_fw_bus_r; - reg [`ES_TO_FW_BUS_WD -1:0] es_to_fw_bus_r; - reg [`MS_TO_FW_BUS_WD -1:0] ms_to_fw_bus_r; - - wire [4:0] ds_rf_raddr1; - wire [4:0] ds_rf_raddr2; - wire [4:0] es_rf_raddr2; - wire [4:0] es_dest; - wire [4:0] ms_dest; - - wire es_mem_we; - wire es_reg_we; - wire ms_reg_we; - - wire src1_is_es_dest; - wire src1_is_ms_dest; - wire src2_is_es_dest; - wire src2_is_ms_dest; - wire data_is_rf_wdata; - - assign {ds_rf_raddr1, ds_rf_raddr2 } = ds_to_fw_bus_r; - assign {es_rf_raddr2, es_dest, es_reg_we, es_mem_we} = es_to_fw_bus_r; - assign {ms_dest , ms_reg_we} = ms_to_fw_bus_r; - - assign fw_to_es_bus = {src1_is_es_dest , //4:4 - src1_is_ms_dest , //3:3 - src2_is_es_dest , //2:2 - src2_is_ms_dest , //1:1 - data_is_rf_wdata //0:0 - }; - always @(posedge clk) begin - if(reset) begin - ds_to_fw_bus_r <= 0; - es_to_fw_bus_r <= 0; - ms_to_fw_bus_r <= 0; - end - else begin - ds_to_fw_bus_r <= ds_to_fw_bus; - es_to_fw_bus_r <= es_to_fw_bus; - ms_to_fw_bus_r <= ms_to_fw_bus; - end - end - - assign src1_is_ms_dest = ms_reg_we && (ms_dest != 5'b0) && (es_dest != ds_rf_raddr1) && (ms_dest == ds_rf_raddr1); - assign src1_is_es_dest = es_reg_we && (es_dest != 5'b0) && (es_dest == ds_rf_raddr1); - assign src2_is_ms_dest = ms_reg_we && (ms_dest != 5'b0) && (es_dest != ds_rf_raddr2) && (ms_dest == ds_rf_raddr2); - assign src2_is_es_dest = es_reg_we && (es_dest != 5'b0) && (es_dest == ds_rf_raddr2); - assign data_is_rf_wdata = ms_reg_we && (ms_dest != 5'b0) && (ms_dest == es_rf_raddr2) && es_mem_we; - - -endmodule \ No newline at end of file diff --git a/lacpu/rtl/cpu/id_stage.v b/lacpu/rtl/cpu/id_stage.v index 944c810..e6ee87c 100755 --- a/lacpu/rtl/cpu/id_stage.v +++ b/lacpu/rtl/cpu/id_stage.v @@ -1,271 +1,211 @@ -`include "mycpu.vh" +module id_stage +#( + parameter FS_TO_DS_BUS_WD = 32, + parameter DS_TO_ES_BUS_WD = 206, + parameter WS_TO_RF_BUS_WD = 38 +) +( + input clk, + input reset, -// 译码阶段 -module id_stage( - input clk , - input reset , - //allowin - input es_allowin , - output ds_allowin , - //from fs - input fs_to_ds_valid, - input [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus , - //to es - output ds_to_es_valid, - output [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus , - //to rf - input [`WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus , - //to fw - output [`DS_TO_FW_BUS_WD -1:0] ds_to_fw_bus , - //to fs - output [`BR_BUS_WD -1:0] br_bus + input flush, + input [ 5:0] stall, + input br_taken, + + output stallreq_id, + + input pc_valid, + input [31:0] inst_sram_rdata, + input [31:0] csr_vec_h, + + input [FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus, + input [WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus, + output [DS_TO_ES_BUS_WD -1:0] ds_to_es_bus ); + reg pc_valid_r; + reg [31:0] fs_to_ds_bus_r; + reg [31:0] csr_vec_h_r; - reg ds_valid ; - wire ds_ready_go; + reg [31:0] inst_r; + reg stall_flag; - wire [31 :0] fs_pc; - reg [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus_r; - assign fs_pc = fs_to_ds_bus[31:0]; + reg [ 6:0] ex_load_buffer; + reg ex_csr_buffer; - wire [31:0] ds_inst; - wire [31:0] ds_pc ; - assign {ds_inst, - ds_pc } = fs_to_ds_bus_r; + wire br_flush; + wire [31:0] ds_pc; - wire rf_we ; - wire [ 4:0] rf_waddr; + wire src1_is_pc; + wire src2_is_imm; + wire src2_is_4; + wire src_reg_is_rd; + wire [ 4:0] rj; + wire [ 4:0] rk; + wire [ 4:0] rd; + wire [ 4:0] rkd; + wire [31:0] imm; + wire [ 4:0] dest; + wire [11:0] alu_op; + wire [ 3:0] mul_div_op; + wire mul_div_sign; + wire [ 8:0] branch_op; + wire [ 5:0] load_op; + wire [ 2:0] store_op; + wire reg_we; + + wire csr_we; + wire [ 6:0] csr_op; + wire [13:0] csr_addr; + wire csr_wdata_sel; + + + wire [31:0] inst; + wire [31:0] next_inst; + + wire rf_raddr1; + wire [31:0] rf_rdata1; + wire rf_raddr2; + wire [31:0] rf_rdata2; + wire rf_we; + wire rf_waddr; wire [31:0] rf_wdata; + + wire [31:0] rj_value; + wire [31:0] rkd_value; + + wire [ 4:0] ex_rf_waddr; + wire ex_is_load; + wire ex_is_csr; + wire ex_rf_we; + wire stallreq_load; + wire stallreq_csr; + + assign ds_pc = fs_to_ds_bus_r; + assign br_flush = br_taken; assign {rf_we , //37:37 rf_waddr, //36:32 rf_wdata //31:0 } = ws_to_rf_bus; - wire [18:0] alu_op; - wire src1_is_pc; - wire src2_is_imm; - wire src2_is_4; - wire mem_to_reg; - wire reg_we; - wire mem_we; - wire [ 4:0] load_op; - wire [ 2:0] store_op; - wire [ 4:0] dest; - wire [31:0] imm; + wire csr_vec_l; + assign csr_vec_l = 0; //TODO! - wire [21:0] op; - wire [ 4:0] ra; - wire [ 4:0] rk; - wire [ 4:0] rj; - wire [ 4:0] rd; - wire [ 7:0] op6_d; - wire [ 7:0] op7_d; - wire [ 7:0] op10_d; - wire [31:0] op17_d; - - wire inst_addw; - wire inst_subw; - wire inst_addiw; - wire inst_lu12iw; - wire inst_slt; - wire inst_sltu; - wire inst_slti; - wire inst_sltui; - wire inst_pcaddu12i; - wire inst_and; - wire inst_or; - wire inst_nor; - wire inst_xor; - wire inst_andi; - wire inst_ori; - wire inst_xori; - wire inst_sllw; - wire inst_srlw; - wire inst_sraw; - wire inst_slliw; - wire inst_srliw; - wire inst_sraiw; - wire inst_mulw; - wire inst_mulhw; - wire inst_mulhwu; - wire inst_beq; - wire inst_bne; - wire inst_blt; - wire inst_bge; - wire inst_bltu; - wire inst_bgeu; - wire inst_b; - wire inst_bl; - wire inst_jirl; - wire inst_ldb; - wire inst_ldh; - wire inst_ldw; - wire inst_ldbu; - wire inst_ldhu; - wire inst_stb; - wire inst_sth; - wire inst_stw; - wire inst_divw; - wire inst_modw; - wire inst_divwu; - wire inst_modwu; - - wire dst_is_r1; - - wire [ 4:0] rf_raddr1; - wire [31:0] rf_rdata1; - wire [ 4:0] rf_raddr2; - wire [31:0] rf_rdata2; - - wire rj_eq_rd; - wire rj_lt_rd_unsign; - wire rj_lt_rd_sign; - wire br_taken; - wire [31:0] br_target; - - assign ds_to_es_bus = {alu_op , //159:141 - src1_is_pc , //140:140 - src2_is_imm , //139:139 - src2_is_4 , //138:138 - mem_to_reg , //137:137 - reg_we , //136:136 - mem_we , //135:135 - load_op , //134:134 - store_op , //133:133 - dest , //132:128 - imm , //127:96 - rf_rdata1 , //95 :64 - rf_rdata2 , //63 :32 - ds_pc //31 :0 + assign csr_vec = {csr_vec_h_r, csr_vec_l}; + assign ds_to_es_bus = {csr_op ,//205:199 + csr_wdata_sel ,//198:198 + csr_addr ,//197:184 + csr_we ,//183:183 + alu_op ,//182:171 + mul_div_op & {4{pc_valid_r}} ,//170:167 + mul_div_sign & pc_valid_r ,//166:166 + branch_op & {9{pc_valid_r}} ,//165:157 + store_op & {3{pc_valid_r}} ,//156:154 + load_op & {6{pc_valid_r}} ,//153:148 + reg_we & pc_valid_r ,//147:147 + src1_is_pc ,//146:146 + src2_is_imm ,//145:145 + src2_is_4 ,//144:144 + rj ,//143:139 + rkd ,//138:134 + rj_value ,//133:102 + rkd_value ,//101:70 + dest ,//69 :65 + imm ,//95 :64 + ds_pc ,//63 :32 + inst & {32{pc_valid_r}} //31 :0 }; - assign ds_to_fw_bus = {rf_raddr1 , - rf_raddr2 - }; - - assign ds_ready_go = 1'b1; - assign ds_allowin = (!ds_valid || ds_ready_go && es_allowin); - assign ds_to_es_valid = ds_valid && ds_ready_go; - always @(posedge clk) begin - + always @ (posedge clk)begin if (reset) begin - ds_valid <= 1'b0; + pc_valid_r <= 1'b0; + fs_to_ds_bus_r <= 32'b0; + csr_vec_h_r <= 32'b0; end - else if (ds_allowin) begin - ds_valid <= fs_to_ds_valid; + else if (flush) begin + pc_valid_r <= 1'b0; + fs_to_ds_bus_r <= 32'b0; + csr_vec_h_r <= 32'b0; end - - if (reset) begin - fs_to_ds_bus_r <= 0; + //nop, ID stall and EX not stall + else if (stall[1] & (!stall[2]))begin + pc_valid_r <= 1'b0; + fs_to_ds_bus_r <= 32'b0; + csr_vec_h_r <= 32'b0; end - else if (fs_to_ds_valid && ds_allowin) begin + //nop, ID not stall but branch + else if (!stall[1] & br_flush) begin + pc_valid_r <= 1'b0; + fs_to_ds_bus_r <= 32'b0; + csr_vec_h_r <= 32'b0; + end + // ID not stall so go on + else if (!stall[1]) begin + pc_valid_r <= pc_valid; fs_to_ds_bus_r <= fs_to_ds_bus; + csr_vec_h_r <= csr_vec_h; end end - assign op = ds_inst[31:10]; - assign ra = ds_inst[19:15]; - assign rk = ds_inst[14:10]; - assign rj = ds_inst[ 9: 5]; - assign rd = ds_inst[ 4: 0]; + always @ (posedge clk) begin + if (reset) begin + inst_r <= 64'b0; + stall_flag <= 1'b0; + end + else if (flush) begin + inst_r <= 64'b0; + stall_flag <= 1'b0; + end + //if not stall, get inst from inst_sram + else if (!stall[1]) begin + inst_r <= inst_sram_rdata; + stall_flag <= 1'b0; + end + else if (stall_flag) begin - decoder_3_8 u_dec0(.in(op[18:16]), .out(op6_d )); - decoder_3_8 u_dec1(.in(op[17:15]), .out(op7_d )); - decoder_3_8 u_dec2(.in(op[14:12]), .out(op10_d)); - decoder_5_32 u_dec3(.in(ra ), .out(op17_d)); + end + //if stall and id stall, get inst from inst_ram ? + else if (stall[1]&stall[2]) begin + inst_r <= inst_sram_rdata; + stall_flag <= 1'b1; + end + end - assign inst_addw = (op[21:10] == 12'b0000_0000_0001) & op17_d[5'b00000]; - assign inst_subw = (op[21:10] == 12'b0000_0000_0001) & op17_d[5'b00001]; - assign inst_slt = (op[21:10] == 12'b0000_0000_0001) & op17_d[5'b00100]; - assign inst_sltu = (op[21:10] == 12'b0000_0000_0001) & op17_d[5'b00101]; - assign inst_nor = (op[21:10] == 12'b0000_0000_0001) & op17_d[5'b01000]; - assign inst_and = (op[21:10] == 12'b0000_0000_0001) & op17_d[5'b01001]; - assign inst_or = (op[21:10] == 12'b0000_0000_0001) & op17_d[5'b01010]; - assign inst_xor = (op[21:10] == 12'b0000_0000_0001) & op17_d[5'b01011]; - assign inst_sllw = (op[21:10] == 12'b0000_0000_0001) & op17_d[5'b01110]; - assign inst_srlw = (op[21:10] == 12'b0000_0000_0001) & op17_d[5'b01111]; - assign inst_sraw = (op[21:10] == 12'b0000_0000_0001) & op17_d[5'b10000]; - assign inst_slliw = (op[21:10] == 12'b0000_0000_0100) & op17_d[5'b00001]; - assign inst_srliw = (op[21:10] == 12'b0000_0000_0100) & op17_d[5'b01001]; - assign inst_sraiw = (op[21:10] == 12'b0000_0000_0100) & op17_d[5'b10001]; - assign inst_mulw = (op[21:10] == 12'b0000_0000_0001) & op17_d[5'b11000]; - assign inst_mulhw = (op[21:10] == 12'b0000_0000_0001) & op17_d[5'b11001]; - assign inst_mulhwu = (op[21:10] == 12'b0000_0000_0001) & op17_d[5'b11010]; - assign inst_divw = (op[21:10] == 12'b0000_0000_0010) & op17_d[5'b00000]; - assign inst_modw = (op[21:10] == 12'b0000_0000_0010) & op17_d[5'b00001]; - assign inst_divwu = (op[21:10] == 12'b0000_0000_0010) & op17_d[5'b00010]; - assign inst_modwu = (op[21:10] == 12'b0000_0000_0010) & op17_d[5'b00011]; - assign inst_slti = (op[21:15] == 7'b0000_001 ) & op10_d[3'b000]; - assign inst_sltui = (op[21:15] == 7'b0000_001 ) & op10_d[3'b001]; - assign inst_addiw = (op[21:15] == 7'b0000_001 ) & op10_d[3'b010]; - assign inst_andi = (op[21:15] == 7'b0000_001 ) & op10_d[3'b101]; - assign inst_ori = (op[21:15] == 7'b0000_001 ) & op10_d[3'b110]; - assign inst_xori = (op[21:15] == 7'b0000_001 ) & op10_d[3'b111]; - assign inst_ldb = (op[21:15] == 7'b0010_100 ) & op10_d[3'b000]; - assign inst_ldh = (op[21:15] == 7'b0010_100 ) & op10_d[3'b001]; - assign inst_ldw = (op[21:15] == 7'b0010_100 ) & op10_d[3'b010]; - assign inst_stb = (op[21:15] == 7'b0010_100 ) & op10_d[3'b100]; - assign inst_sth = (op[21:15] == 7'b0010_100 ) & op10_d[3'b101]; - assign inst_stw = (op[21:15] == 7'b0010_100 ) & op10_d[3'b110]; - assign inst_ldbu = (op[21:15] == 7'b0010_101 ) & op10_d[3'b000]; - assign inst_ldhu = (op[21:15] == 7'b0010_101 ) & op10_d[3'b001]; - assign inst_lu12iw = (op[21:18] == 4'b0001 ) & op7_d[3'b010]; - assign inst_pcaddu12i = (op[21:18] == 4'b0001 ) & op7_d[3'b110]; - assign inst_jirl = (op[21:19] == 3'b010 ) & op6_d[3'b011]; - assign inst_b = (op[21:19] == 3'b010 ) & op6_d[3'b100]; - assign inst_bl = (op[21:19] == 3'b010 ) & op6_d[3'b101]; - assign inst_beq = (op[21:19] == 3'b010 ) & op6_d[3'b110]; - assign inst_bne = (op[21:19] == 3'b010 ) & op6_d[3'b111]; - assign inst_blt = (op[21:19] == 3'b011 ) & op6_d[3'b000]; - assign inst_bge = (op[21:19] == 3'b011 ) & op6_d[3'b001]; - assign inst_bltu = (op[21:19] == 3'b011 ) & op6_d[3'b010]; - assign inst_bgeu = (op[21:19] == 3'b011 ) & op6_d[3'b011]; + assign next_inst = stall_flag ? inst_r : inst_sram_rdata; + assign inst = ~pc_valid_r ? 32'b0 : next_inst; - assign alu_op[ 0] = inst_addw | inst_addiw | inst_pcaddu12i | inst_ldb | inst_ldh | inst_ldbu | inst_ldhu | inst_ldw | inst_stb | inst_sth | inst_stw; - assign alu_op[ 1] = inst_subw; - assign alu_op[ 2] = inst_slt | inst_slti; - assign alu_op[ 3] = inst_sltu | inst_sltui; - assign alu_op[ 4] = inst_and | inst_andi; - assign alu_op[ 5] = inst_nor; - assign alu_op[ 6] = inst_or | inst_ori; - assign alu_op[ 7] = inst_xor | inst_xori; - assign alu_op[ 8] = inst_sllw | inst_slliw; - assign alu_op[ 9] = inst_srlw | inst_srliw; - assign alu_op[10] = inst_sraw | inst_sraiw; - assign alu_op[11] = inst_lu12iw; - assign alu_op[12] = inst_mulw; - assign alu_op[13] = inst_mulhw; - assign alu_op[14] = inst_mulhwu; - assign alu_op[15] = inst_divw; - assign alu_op[16] = inst_modw; - assign alu_op[17] = inst_divwu; - assign alu_op[18] = inst_modwu; + inst_decoder u_inst_decoder( + .inst (inst ), - assign imm = {32{inst_slti | inst_sltui | inst_addiw | inst_ldb | inst_ldh | inst_ldw | inst_stb | inst_sth | inst_stw | inst_ldbu | inst_ldhu}} & {{20{ds_inst[21]}}, ds_inst[21:10]} - | {32{inst_beq | inst_bne | inst_bge | inst_bgeu | inst_blt | inst_bltu | inst_jirl}} & {{14{ds_inst[25]}}, ds_inst[25:10], 2'b0} - | {32{inst_andi | inst_ori | inst_xori }} & { 20'b0 , ds_inst[21:10]} - | {32{inst_lu12iw | inst_pcaddu12i }} & { ds_inst[24: 5], 12'b0} - | {32{inst_slliw | inst_srliw | inst_sraiw}} & { 27'b0 , rk} - | {32{inst_b | inst_bl}} & {{4{ds_inst[9]}}, ds_inst[9:0], ds_inst[25:10], 2'b0}; - - assign src1_is_pc = inst_bl | inst_jirl | inst_pcaddu12i | inst_b; - assign src2_is_4 = inst_bl | inst_jirl; - assign src2_is_imm = inst_addiw | inst_lu12iw | inst_pcaddu12i | inst_andi | inst_ori | inst_xori | inst_slliw | inst_srliw | inst_sraiw | inst_ldb | inst_ldh | inst_ldw | inst_ldbu | inst_ldhu | inst_stb | inst_sth | inst_stw | inst_mulhwu | inst_divwu | inst_modwu | inst_b | inst_beq | inst_bne | inst_bge | inst_bgeu | inst_blt | inst_bltu; - assign dst_is_r1 = inst_bl; - - assign reg_we = ~(inst_b | inst_beq | inst_bne | inst_bge | inst_bgeu | inst_blt | inst_bltu | inst_stw | inst_sth | inst_stb); - assign mem_we = inst_stw | inst_sth | inst_stb; - assign mem_to_reg = inst_ldw | inst_ldh | inst_ldb | inst_ldhu | inst_ldbu; - assign load_op = {inst_ldhu, inst_ldbu, inst_ldw, inst_ldh, inst_ldb}; - assign store_op = {inst_stw , inst_sth , inst_stb}; - - assign dest = dst_is_r1 ? 5'd1 : - rd; + .src1_is_pc (src1_is_pc ), + .src2_is_imm (src2_is_imm ), + .src2_is_4 (src2_is_4 ), + .src_reg_is_rd (src_reg_is_rd ), + .rj (rj ), + .rk (rk ), + .rd (rd ), + .imm (imm ), + .dest (dest ), + .alu_op (alu_op ), + .mul_div_op (mul_div_op ), + .mul_div_sign (mul_div_sign ), + .branch_op (branch_op ), + .load_op (load_op ), + .store_op (store_op ), + .csr_we (csr_we ), + .csr_op (csr_op ), + .csr_addr (csr_addr ), + .csr_wdata_sel (csr_wdata_sel ), + .sel_rf_res (sel_rf_res ), + .reg_we (reg_we ) + ); assign rf_raddr1 = rj; - assign rf_raddr2 = (inst_beq | inst_bne | inst_bge | inst_bgeu | inst_blt | inst_bltu) ? rd : rk; + assign rf_raddr2 = src_reg_is_rd ? rd : rk; + assign rkd = src_reg_is_rd ? rd : rk; + regfile u_regfile( .clk (clk ), + .reset (reset ), .raddr1 (rf_raddr1), .rdata1 (rf_rdata1), .raddr2 (rf_raddr2), @@ -275,22 +215,38 @@ module id_stage( .wdata (rf_wdata ) ); - assign rj_eq_rd = (rf_rdata1 == rf_rdata2); - assign rj_lt_rd_unsign = (rf_rdata1 < rf_rdata2); - assign rj_lt_rd_sign = (rf_rdata1[31] && ~rf_rdata2[31]) ? 1'b1 : - (~rf_rdata1[31] && rf_rdata2[31]) ? 1'b0 : rj_lt_rd_unsign; - assign br_taken = ( inst_beq && rj_eq_rd - || inst_bne && !rj_eq_rd - || inst_blt && rj_lt_rd_sign - || inst_bge && !rj_lt_rd_sign - || inst_bltu && rj_lt_rd_unsign - || inst_bgeu && !rj_lt_rd_unsign - || inst_jirl - || inst_bl - || inst_b - ); - assign br_target = ({32{inst_beq || inst_bne || inst_bl || inst_b || inst_blt || inst_bge || inst_bltu || inst_bgeu}} & (ds_pc + imm)) - | ({32{inst_jirl}} & (rf_rdata1 + imm)) ; + assign rj_value = rf_rdata1; + assign rkd_value = rf_rdata2; + + + always @ (posedge clk) begin + if (reset) begin + ex_load_buffer <= 7'b0; + ex_csr_buffer <= 1'b0; + end + else if (flush) begin + ex_load_buffer <= 7'b0; + ex_csr_buffer <= 1'b0; + end + else if (stall[2]&(!stall[3])) begin + ex_load_buffer <= 7'b0; + ex_csr_buffer <= 1'b0; + end + else if (!stall[2]) begin + ex_load_buffer <= {|load_op, rf_we, rf_waddr}; + ex_csr_buffer <= |csr_op; + end + end + + assign {ex_is_load, + ex_rf_we, + ex_rf_waddr + } = ex_load_buffer; + assign ex_is_csr = ex_csr_buffer; + //ex段为load指令,且发生数据相关时,id段需要被暂停 + assign stallreq_load = ex_is_load & ex_rf_we & ((ex_rf_waddr==rj_value & rj_value!=0)|(ex_rf_waddr==rkd_value & rkd_value!=0)); + assign stallreq_csr = ex_is_csr & ex_rf_we & ((ex_rf_waddr==rj_value & rj_value!=0)|(ex_rf_waddr==rkd_value & rkd_value!=0)); + assign stallreq_id = stallreq_load | stallreq_csr; + - assign br_bus = {br_taken, br_target}; endmodule \ No newline at end of file diff --git a/lacpu/rtl/cpu/if_stage.v b/lacpu/rtl/cpu/if_stage.v index 62fea2b..f8f6a82 100755 --- a/lacpu/rtl/cpu/if_stage.v +++ b/lacpu/rtl/cpu/if_stage.v @@ -1,71 +1,64 @@ -`include "mycpu.vh" +module if_stage +#( + parameter BR_BUS_WD = 33, + parameter FS_TO_DS_BUS_WD = 32 +) +( + input clk , + input reset, + + input flush, + input [ 5:0] stall, + + input [31:0] new_pc, + + input timer_int, + output [31:0] csr_vec_h, -// 取指阶段 -module if_stage( - input clk , - input reset , - //allwoin - input ds_allowin , - //brbus - input [`BR_BUS_WD -1:0] br_bus , - //to ds - output fs_to_ds_valid , - output [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus , - // inst sram interface output inst_sram_en , - output [ 3:0] inst_sram_wen , + output [ 3:0] inst_sram_we , output [31:0] inst_sram_addr , output [31:0] inst_sram_wdata, - input [31:0] inst_sram_rdata + + input [BR_BUS_WD -1:0] br_bus, + output [FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus ); - - reg fs_valid; - wire fs_ready_go; - wire fs_allowin; - wire to_fs_valid; - - wire [31:0] seq_pc; - wire [31:0] nextpc; - - wire br_taken; - wire [ 31:0] br_target; - assign {br_taken, br_target} = br_bus; - - wire [31:0] fs_inst; + reg pc_valid; reg [31:0] fs_pc; - assign fs_to_ds_bus = {fs_inst , - fs_pc }; + wire [31:0] seq_pc; + wire [31:0] next_pc; - // pre-IF stage - assign to_fs_valid = ~reset; - assign seq_pc = fs_pc + 3'h4; - assign nextpc = br_taken ? br_target : seq_pc; + wire br_taken; + wire [31:0] br_target; - // IF stage - assign fs_ready_go = 1'b1; - assign fs_allowin = !fs_valid || fs_ready_go && ds_allowin; - assign fs_to_ds_valid = fs_valid && fs_ready_go; - always @(posedge clk) begin + assign fs_to_ds_bus = fs_pc; + + assign {br_taken, + br_target + } = br_bus; + + always @ (posedge clk) begin if (reset) begin - fs_valid <= 1'b0; + pc_valid <= 1'b0; + fs_pc <= 32'h1bff_fffc; end - else if (fs_allowin) begin - fs_valid <= to_fs_valid; + else if (flush) begin + pc_valid <= 1'b1; + fs_pc <= new_pc; end - - if (reset) begin - fs_pc <= 32'h1bff_fffc; //to make nextpc be 0x1c000000 during reset - end - else if (to_fs_valid && fs_allowin) begin - fs_pc <= nextpc; + else if (!stall[0]) begin + pc_valid <= 1'b1; + fs_pc <= next_pc; end end - assign inst_sram_en = to_fs_valid && fs_allowin; - assign inst_sram_wen = 4'h0; - assign inst_sram_addr = nextpc; - assign inst_sram_wdata = 32'b0; + assign seq_pc = fs_pc + 3'h4; + assign next_pc = br_taken ? br_target : seq_pc; - assign fs_inst = inst_sram_rdata; + assign csr_vec_h = timer_int; + assign inst_sram_en = flush | (br_taken ? 1'b0 : pc_valid); + assign inst_sram_we = 4'h0; + assign inst_sram_addr = fs_pc; + assign inst_sram_wdata = 32'b0; endmodule \ No newline at end of file diff --git a/lacpu/rtl/cpu/inst_decoder.v b/lacpu/rtl/cpu/inst_decoder.v new file mode 100644 index 0000000..e569ba0 --- /dev/null +++ b/lacpu/rtl/cpu/inst_decoder.v @@ -0,0 +1,446 @@ +`include "tools.v" +module inst_decoder( + input [31:0] inst, + + output src1_is_pc, + output src2_is_imm, + output src2_is_4, + output src_reg_is_rd, + output [ 4:0] rj, + output [ 4:0] rk, + output [ 4:0] rd, + output [31:0] imm, + output [ 4:0] dest, + + // alu + output [11:0] alu_op, + + // mul div + output [ 3:0] mul_div_op, + output mul_div_sign, + + // branch + output [ 8:0] branch_op, + output [ 5:0] load_op, + output [ 2:0] store_op, + + // csr + output csr_we, + output [ 6:0] csr_op, + output [13:0] csr_addr, + output csr_wdata_sel, + //output [31:0] csr_vec_l, + + output [ 3:0] sel_rf_res, + + output reg_we +); + wire dest_is_r1; + wire dest_is_rj; + + wire [ 5:0] op_31_26; + wire [ 3:0] op_25_22; + wire [ 1:0] op_21_20; + wire [ 4:0] op_19_15; + wire [63:0] op_31_26_d; + wire [15:0] op_25_22_d; + wire [ 3:0] op_21_20_d; + wire [31:0] op_19_15_d; + wire [31:0] rd_d; + wire [31:0] rj_d; + wire [31:0] rk_d; + wire [11:0] i12; + wire [13:0] i14; + wire [19:0] i20; + wire [15:0] i16; + wire [25:0] i26; + wire [13:0] csr_idx; + + wire inst_add_w; + wire inst_sub_w; + wire inst_slt; + wire inst_sltu; + wire inst_nor; + wire inst_and; + wire inst_or; + wire inst_xor; + wire inst_lu12i_w; + wire inst_addi_w; + wire inst_slti; + wire inst_sltui; + wire inst_pcaddi; + wire inst_pcaddu12i; + //wire inst_andn; + //wire inst_orn; + wire inst_andi; + wire inst_ori; + wire inst_xori; + wire inst_mul_w; + wire inst_mulh_w; + wire inst_mulh_wu; + wire inst_div_w; + wire inst_mod_w; + wire inst_div_wu; + wire inst_mod_wu; + + wire inst_slli_w; + wire inst_srli_w; + wire inst_srai_w; + wire inst_sll_w; + wire inst_srl_w; + wire inst_sra_w; + + wire inst_jirl; + wire inst_b; + wire inst_bl; + wire inst_beq; + wire inst_bne; + wire inst_blt; + wire inst_bge; + wire inst_bltu; + wire inst_bgeu; + + wire inst_ll_w; + wire inst_sc_w; + wire inst_ld_b; + wire inst_ld_bu; + wire inst_ld_h; + wire inst_ld_hu; + wire inst_ld_w; + wire inst_st_b; + wire inst_st_h; + wire inst_st_w; + + wire inst_syscall; + wire inst_break; + wire inst_csrrd; + wire inst_csrwr; + wire inst_csrxchg; + wire inst_ertn; + + wire inst_rdcntid_w; + wire inst_rdcntvl_w; + wire inst_rdcntvh_w; + //wire inst_idle; + + //wire inst_tlbsrch; + //wire inst_tlbrd; + //wire inst_tlbwr; + //wire inst_tlbfill; + //wire inst_invtlb; + + //wire inst_cacop; + //wire inst_preld; + wire inst_dbar; + wire inst_ibar; + + wire need_ui5; + wire need_si12; + wire need_ui12; + wire need_si14_pc; + wire need_si16_pc; + wire need_si20; + wire need_si20_pc; + wire need_si26_pc; + + + assign op_31_26 = inst[31:26]; + assign op_25_22 = inst[25:22]; + assign op_21_20 = inst[21:20]; + assign op_19_15 = inst[19:15]; + + assign rd = inst[ 4: 0]; + assign rj = inst[ 9: 5]; + assign rk = inst[14:10]; + + assign i12 = inst[21:10]; + assign i14 = inst[23:10]; + assign i20 = inst[24: 5]; + assign i16 = inst[25:10]; + assign i26 = {inst[ 9: 0], inst[25:10]}; + + assign csr_idx = inst[23:10]; + + decoder_6_64 u_dec0(.in(op_31_26 ), .out(op_31_26_d )); + decoder_4_16 u_dec1(.in(op_25_22 ), .out(op_25_22_d )); + decoder_2_4 u_dec2(.in(op_21_20 ), .out(op_21_20_d )); + decoder_5_32 u_dec3(.in(op_19_15 ), .out(op_19_15_d )); + + decoder_5_32 u_dec4(.in(rd ), .out(rd_d )); + decoder_5_32 u_dec5(.in(rj ), .out(rj_d )); + decoder_5_32 u_dec6(.in(rk ), .out(rk_d )); + + assign inst_add_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h00]; + assign inst_sub_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h02]; + assign inst_slt = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h04]; + assign inst_sltu = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h05]; + assign inst_nor = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h08]; + assign inst_and = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h09]; + assign inst_or = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0a]; + assign inst_xor = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0b]; + //assign inst_orn = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0c]; + //assign inst_andn = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0d]; + assign inst_sll_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0e]; + assign inst_srl_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h0f]; + assign inst_sra_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h10]; + assign inst_mul_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h18]; + assign inst_mulh_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h19]; + assign inst_mulh_wu = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h1] & op_19_15_d[5'h1a]; + assign inst_div_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h2] & op_19_15_d[5'h00]; + assign inst_mod_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h2] & op_19_15_d[5'h01]; + assign inst_div_wu = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h2] & op_19_15_d[5'h02]; + assign inst_mod_wu = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h2] & op_19_15_d[5'h03]; + assign inst_break = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h2] & op_19_15_d[5'h14]; + assign inst_syscall = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h2] & op_19_15_d[5'h16]; + assign inst_slli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h01]; + assign inst_srli_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h09]; + assign inst_srai_w = op_31_26_d[6'h00] & op_25_22_d[4'h1] & op_21_20_d[2'h0] & op_19_15_d[5'h11]; + //assign inst_idle = op_31_26_d[6'h01] & op_25_22_d[4'h9] & op_21_20_d[2'h0] & op_19_15_d[5'h11]; + //assign inst_invtlb = op_31_26_d[6'h01] & op_25_22_d[4'h9] & op_21_20_d[2'h0] & op_19_15_d[5'h13]; + assign inst_dbar = op_31_26_d[6'h0e] & op_25_22_d[4'h1] & op_21_20_d[2'h3] & op_19_15_d[5'h04]; + assign inst_ibar = op_31_26_d[6'h0e] & op_25_22_d[4'h1] & op_21_20_d[2'h3] & op_19_15_d[5'h05]; + assign inst_slti = op_31_26_d[6'h00] & op_25_22_d[4'h8]; + assign inst_sltui = op_31_26_d[6'h00] & op_25_22_d[4'h9]; + assign inst_addi_w = op_31_26_d[6'h00] & op_25_22_d[4'ha]; + assign inst_andi = op_31_26_d[6'h00] & op_25_22_d[4'hd]; + assign inst_ori = op_31_26_d[6'h00] & op_25_22_d[4'he]; + assign inst_xori = op_31_26_d[6'h00] & op_25_22_d[4'hf]; + assign inst_ld_b = op_31_26_d[6'h0a] & op_25_22_d[4'h0]; + assign inst_ld_h = op_31_26_d[6'h0a] & op_25_22_d[4'h1]; + assign inst_ld_w = op_31_26_d[6'h0a] & op_25_22_d[4'h2]; + assign inst_st_b = op_31_26_d[6'h0a] & op_25_22_d[4'h4]; + assign inst_st_h = op_31_26_d[6'h0a] & op_25_22_d[4'h5]; + assign inst_st_w = op_31_26_d[6'h0a] & op_25_22_d[4'h6]; + assign inst_ld_bu = op_31_26_d[6'h0a] & op_25_22_d[4'h8]; + assign inst_ld_hu = op_31_26_d[6'h0a] & op_25_22_d[4'h9]; + //assign inst_cacop = op_31_26_d[6'h01] & op_25_22_d[4'h8]; + //assign inst_preld = op_31_26_d[6'h0a] & op_25_22_d[4'hb]; + assign inst_jirl = op_31_26_d[6'h13]; + assign inst_b = op_31_26_d[6'h14]; + assign inst_bl = op_31_26_d[6'h15]; + assign inst_beq = op_31_26_d[6'h16]; + assign inst_bne = op_31_26_d[6'h17]; + assign inst_blt = op_31_26_d[6'h18]; + assign inst_bge = op_31_26_d[6'h19]; + assign inst_bltu = op_31_26_d[6'h1a]; + assign inst_bgeu = op_31_26_d[6'h1b]; + assign inst_lu12i_w = op_31_26_d[6'h05] & ~inst[25]; + assign inst_pcaddi = op_31_26_d[6'h06] & ~inst[25]; + assign inst_pcaddu12i = op_31_26_d[6'h07] & ~inst[25]; + assign inst_csrxchg = op_31_26_d[6'h01] & ~inst[25] & ~inst[24] & (~rj_d[5'h00] & ~rj_d[5'h01]); //rj != 0,1 + assign inst_ll_w = op_31_26_d[6'h08] & ~inst[25] & ~inst[24]; + assign inst_sc_w = op_31_26_d[6'h08] & ~inst[25] & inst[24]; + assign inst_csrrd = op_31_26_d[6'h01] & ~inst[25] & ~inst[24] & rj_d[5'h00]; + assign inst_csrwr = op_31_26_d[6'h01] & ~inst[25] & ~inst[24] & rj_d[5'h01]; + assign inst_rdcntid_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h0] & op_19_15_d[5'h00] & rk_d[5'h18] & rd_d[5'h00]; + assign inst_rdcntvl_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h0] & op_19_15_d[5'h00] & rk_d[5'h18] & rj_d[5'h00] & !rd_d[5'h00]; + assign inst_rdcntvh_w = op_31_26_d[6'h00] & op_25_22_d[4'h0] & op_21_20_d[2'h0] & op_19_15_d[5'h00] & rk_d[5'h19] & rj_d[5'h00]; + assign inst_ertn = op_31_26_d[6'h01] & op_25_22_d[4'h9] & op_21_20_d[2'h0] & op_19_15_d[5'h10] & rk_d[5'h0e] & rj_d[5'h00] & rd_d[5'h00]; + //assign inst_tlbsrch = op_31_26_d[6'h01] & op_25_22_d[4'h9] & op_21_20_d[2'h0] & op_19_15_d[5'h10] & rk_d[5'h0a] & rj_d[5'h00] & rd_d[5'h00]; + //assign inst_tlbrd = op_31_26_d[6'h01] & op_25_22_d[4'h9] & op_21_20_d[2'h0] & op_19_15_d[5'h10] & rk_d[5'h0b] & rj_d[5'h00] & rd_d[5'h00]; + //assign inst_tlbwr = op_31_26_d[6'h01] & op_25_22_d[4'h9] & op_21_20_d[2'h0] & op_19_15_d[5'h10] & rk_d[5'h0c] & rj_d[5'h00] & rd_d[5'h00]; + //assign inst_tlbfill = op_31_26_d[6'h01] & op_25_22_d[4'h9] & op_21_20_d[2'h0] & op_19_15_d[5'h10] & rk_d[5'h0d] & rj_d[5'h00] & rd_d[5'h00]; + + + assign src_reg_is_rd = inst_beq | + inst_bne | + inst_blt | + inst_bltu | + inst_bge | + inst_bgeu | + inst_st_b | + inst_st_h | + inst_st_w | + inst_sc_w | + inst_csrwr | + inst_csrxchg; + + assign src1_is_pc = inst_jirl | + inst_bl | + inst_pcaddi | + inst_pcaddu12i; + + assign src2_is_imm = inst_slli_w | + inst_srli_w | + inst_srai_w | + inst_addi_w | + inst_slti | + inst_sltui | + inst_andi | + inst_ori | + inst_xori | + inst_pcaddi | + inst_pcaddu12i | + inst_ld_b | + inst_ld_h | + inst_ld_w | + inst_ld_bu | + inst_ld_hu | + inst_st_b | + inst_st_h | + inst_st_w | + inst_ll_w | + inst_sc_w | + inst_lu12i_w ; + //inst_cacop | + //inst_preld ; + + assign src2_is_4 = inst_jirl | + inst_bl; + + assign dest_is_r1 = inst_bl; + assign dest_is_rj = inst_rdcntid_w; + assign dest = (dest_is_r1) ? 5'd1 : + (dest_is_rj) ? rj : + rd; + + + // alu_op + assign alu_op[ 0] = inst_add_w | + inst_addi_w | + //inst_ld_b | + //inst_ld_h | + //inst_ld_w | + //inst_st_b | + //inst_st_h | + //inst_st_w | + //inst_ld_bu | + //inst_ld_hu | + //inst_ll_w | + //inst_sc_w | + inst_jirl | + inst_bl | + inst_pcaddi | + inst_pcaddu12i; + //inst_cacop | + //inst_preld ; + + assign alu_op[ 1] = inst_sub_w; + assign alu_op[ 2] = inst_slt | inst_slti; + assign alu_op[ 3] = inst_sltu | inst_sltui; + assign alu_op[ 4] = inst_and | inst_andi; + assign alu_op[ 5] = inst_nor; + assign alu_op[ 6] = inst_or | inst_ori; + assign alu_op[ 7] = inst_xor | inst_xori; + assign alu_op[ 8] = inst_sll_w | inst_slli_w; + assign alu_op[ 9] = inst_srl_w | inst_srli_w; + assign alu_op[10] = inst_sra_w | inst_srai_w; + assign alu_op[11] = inst_lu12i_w; + //assign alu_op[12] = inst_andn; + //assign alu_op[13] = inst_orn; + + // imm + assign need_ui5 = inst_slli_w | inst_srli_w | inst_srai_w; + assign need_si12 = inst_addi_w | + inst_ld_b | + inst_ld_h | + inst_ld_w | + inst_st_b | + inst_st_h | + inst_st_w | + inst_ld_bu | + inst_ld_hu | + inst_slti | + inst_sltui; + //inst_cacop | + //inst_preld ; + + assign need_ui12 = inst_andi | inst_ori | inst_xori; + assign need_si14_pc = inst_ll_w | inst_sc_w; + assign need_si16_pc = inst_jirl | + inst_beq | + inst_bne | + inst_blt | + inst_bge | + inst_bltu | + inst_bgeu; + + assign need_si20 = inst_lu12i_w | inst_pcaddu12i; + assign need_si20_pc = inst_pcaddi; + assign need_si26_pc = inst_b | inst_bl; + + assign imm = ({32{need_ui5 }} & {27'b0, rk} ) | + ({32{need_si12 }} & {{20{i12[11]}}, i12} ) | + ({32{need_ui12 }} & {20'b0, i12} ) | + ({32{need_si14_pc}} & {{16{i14[13]}}, i14, 2'b0}) | + ({32{need_si16_pc}} & {{14{i16[15]}}, i16, 2'b0}) | + ({32{need_si20 }} & {i20, 12'b0} ) | + ({32{need_si20_pc}} & {{10{i20[19]}}, i20, 2'b0}) | + ({32{need_si26_pc}} & {{ 4{i26[25]}}, i26, 2'b0}) ; + + // mul_div + assign mul_div_op[ 0] = inst_mul_w; + assign mul_div_op[ 1] = inst_mulh_w | inst_mulh_wu; + assign mul_div_op[ 2] = inst_div_w | inst_div_wu; + assign mul_div_op[ 3] = inst_mod_w | inst_mod_wu; + + assign mul_div_sign = inst_mul_w | inst_mulh_w | inst_div_w | inst_mod_w; + + // branch_op + assign branch_op = {inst_beq, + inst_bne, + inst_blt, + inst_bge, + inst_bltu, + inst_bgeu, + inst_jirl, + inst_bl, + inst_b + }; + + // load_op store_op + assign load_op = {inst_ld_b, + inst_ld_h, + inst_ld_w, + inst_ld_bu, + inst_ld_hu, + inst_ll_w + }; + assign store_op = {inst_st_b, + inst_st_h, + inst_st_w + }; + assign reg_we = ~inst_st_b & + ~inst_st_h & + ~inst_st_w & + ~inst_beq & + ~inst_bne & + ~inst_blt & + ~inst_bge & + ~inst_bltu & + ~inst_bgeu & + ~inst_b & + ~inst_syscall & + //~inst_tlbsrch & + //~inst_tlbrd & + //~inst_tlbwr & + //~inst_tlbfill & + //~inst_invtlb & + //~inst_cacop & + //~inst_preld & + ~inst_dbar & + ~inst_ibar ; + + + + // csr + assign csr_we = inst_csrwr | inst_csrxchg; + assign csr_op = {inst_csrrd, + inst_csrwr, + inst_csrxchg, + inst_rdcntid_w, + inst_rdcntvh_w, + inst_rdcntvl_w, + inst_sc_w + }; + assign csr_addr = inst[23:10]; + assign csr_wdata_sel = inst_csrxchg; + //assign csr_vec_l = ?; + + + // rf_res from + assign sel_rf_res[0] = inst_jirl | inst_bl; + assign sel_rf_res[1] = |load_op; + assign sel_rf_res[2] = |csr_op; + assign sel_rf_res[3] = |mul_div_op; +endmodule diff --git a/lacpu/rtl/cpu/lsu.v b/lacpu/rtl/cpu/lsu.v new file mode 100644 index 0000000..6d68569 --- /dev/null +++ b/lacpu/rtl/cpu/lsu.v @@ -0,0 +1,56 @@ +module lsu( + input [ 5:0] load_op, + input [ 2:0] store_op, + input [31:0] rj_value, + input [31:0] rkd_value, + input [31:0] imm, + + output data_sram_en, + output data_sram_we, + output data_sram_addr, + output data_sram_wdata +); + wire inst_ll_w; + wire inst_ld_b; + wire inst_ld_bu; + wire inst_ld_h; + wire inst_ld_hu; + wire inst_ld_w; + wire inst_st_b; + wire inst_st_h; + wire inst_st_w; + + wire [31:0] addr; + wire [ 3:0] byte_sel; + + assign {inst_ld_b, + inst_ld_h, + inst_ld_w, + inst_ld_bu, + inst_ld_hu, + inst_ll_w + } = load_op; + + assign {inst_st_b, + inst_st_h, + inst_st_w + } = store_op; + + assign addr = rj_value + imm; + + decoder_2_4 u_decoder_2_4( + .in (addr[1:0]), + .out(byte_sel ) + ); + + assign data_sram_en = (|store_op) | (|load_op); + assign data_sram_we = inst_st_b ? byte_sel : + inst_st_h ? {{2{byte_sel[2]}}, {2{byte_sel[0]}}} : + inst_st_w ? { 4{byte_sel[0]}} : + 4'b0; + assign data_sram_addr = addr; + assign data_sram_wdata = inst_st_b ? {4{rkd_value[ 7:0]}} : + inst_st_h ? {2{rkd_value[15:0]}} : + inst_st_w ? rkd_value : + 32'b0; +endmodule \ No newline at end of file diff --git a/lacpu/rtl/cpu/mem_stage.v b/lacpu/rtl/cpu/mem_stage.v index 3027084..6ddcaca 100755 --- a/lacpu/rtl/cpu/mem_stage.v +++ b/lacpu/rtl/cpu/mem_stage.v @@ -1,95 +1,179 @@ -`include "mycpu.vh" +module mem_stage +#( + parameter ES_TO_MS_BUS_WD = 175, + parameter MS_TO_ES_BUS_WD = 38, + parameter MS_TO_WS_BUS_WD = 102 +) +( + input clk, + input reset, + input flush, + input [ 5:0] stall, -module mem_stage( - input clk , - input reset , - //allowin - input ws_allowin , - output ms_allowin , - //from es - input es_to_ms_valid , - input [`ES_TO_MS_BUS_WD -1:0] es_to_ms_bus , - //to ws - output ms_to_ws_valid , - output [`MS_TO_WS_BUS_WD -1:0] ms_to_ws_bus , - //from data-sram - input [31 :0] data_sram_rdata, - //to fw - output [`MS_TO_FW_BUS_WD -1:0] ms_to_fw_bus , - //to es - output [`MS_TO_ES_BUS_WD -1:0] ms_to_es_bus , - //div mul - input [31:0] div_result , - input [31:0] mod_result + output except_en, + output [31:0] new_pc, + + input [ES_TO_MS_BUS_WD -1:0] es_to_ms_bus, + output [MS_TO_ES_BUS_WD -1:0] ms_to_es_bus, + output [MS_TO_WS_BUS_WD -1:0] ms_to_ws_bus, + + input [31:0] data_sram_rdata ); - reg ms_valid; - wire ms_ready_go; + reg [ES_TO_MS_BUS_WD -1:0] es_to_ms_bus_r; + reg [31:0] data_sram_rdata_r; + reg [31:0] csr_rdata_r; + reg stall_flag; - reg [`ES_TO_MS_BUS_WD -1:0] es_to_ms_bus_r; - wire [ 4:0] ms_load_op; - wire [ 2:0] ms_store_op; - wire ms_mem_to_reg; - wire ms_reg_we; - wire [ 4:0] ms_dest; - wire [31:0] ms_alu_result; + wire [63:0] csr_bus; + wire [ 5:0] load_op; + wire [ 2:0] store_op; + wire reg_we; + wire [ 4:0] dest; + wire [31:0] es_result; wire [31:0] ms_pc; - wire [ 1:0] ms_div_op; + wire [31:0] inst; + + wire [31:0] data_temp; + wire [31:0] csr_result; + wire [31:0] csr_rdata; + + wire inst_ll_w; + wire inst_ld_b; + wire inst_ld_bu; + wire inst_ld_h; + wire inst_ld_hu; + wire inst_ld_w; + + wire [ 3:0] byte_sel; + wire [31:0] ms_result; + + wire csr_we; + wire csr_wdata_sel; + wire [ 6:0] csr_op; + wire [13:0] csr_addr; + wire [31:0] csr_wdata; - assign {ms_div_op , //77:76 - ms_load_op , //75:71 - ms_mem_to_reg , //70:70 - ms_reg_we , //69:69 - ms_dest , //68:64 - ms_alu_result , //63:32 - ms_pc //31:0 - } = es_to_ms_bus_r; - - wire [31:0] mem_result; wire [31:0] ms_final_result; + assign {csr_bus ,//174:111 + load_op ,//110:105 + store_op ,//102:102 + reg_we ,//101:101 + dest ,//100:96 + es_result,//95 :64 + ms_pc ,//63 :32 + inst //31 :0 + } = es_to_ms_bus_r; - assign ms_to_ws_bus = {ms_reg_we , //69:69 - ms_dest , //68:64 - ms_final_result, //63:32 - ms_pc //31:0 + assign ms_to_es_bus = {reg_we, + dest, + es_result }; - assign ms_to_fw_bus = {ms_dest, ms_reg_we}; - - assign ms_to_es_bus = {ms_alu_result}; - - assign ms_ready_go = 1'b1; - assign ms_allowin = !ms_valid || ms_ready_go && ws_allowin; - assign ms_to_ws_valid = ms_valid && ms_ready_go; - always @(posedge clk) begin - if (reset) begin - ms_valid <= 1'b0; - end - else if (ms_allowin) begin - ms_valid <= es_to_ms_valid; - end + assign ms_to_ws_bus = {reg_we ,//101:101 + dest ,//100:96 + ms_final_result ,//95 :64 + ms_pc ,//63 :32 + inst //31 :0 + }; + always @ (posedge clk) begin if (reset) begin es_to_ms_bus_r <= 0; end - if (es_to_ms_valid && ms_allowin) begin - es_to_ms_bus_r <= es_to_ms_bus; + else if (flush) begin + es_to_ms_bus_r <= 0; + end + else if (stall[3]&(!stall[4])) begin + es_to_ms_bus_r <= 0; + end + else if (!stall[3]) begin + es_to_ms_bus_r <= es_to_ms_bus; + end + end + + always @ (posedge clk) begin + if (reset) begin + data_sram_rdata_r <= 0; + csr_rdata_r <= 0; + stall_flag <= 1'b0; + end + else if (flush) begin + data_sram_rdata_r <= 0; + csr_rdata_r <= 0; + stall_flag <= 1'b0; + end + else if (!stall[3]) begin + data_sram_rdata_r <= data_sram_rdata; + csr_rdata_r <= csr_rdata; + stall_flag <= 1'b0; + end + else if (stall_flag) begin + + end + else if (stall[3]&stall[4])begin + data_sram_rdata_r <= data_sram_rdata; + csr_rdata_r <= csr_rdata; + stall_flag <= 1'b1; end end - assign mem_result = (ms_load_op[0] || ms_load_op[3]) ? ((ms_alu_result[1:0] == 2'b00) ? {{24{ms_load_op[3] ? data_sram_rdata[ 7] : 1'b0 }}, data_sram_rdata[ 7:0] } : - (ms_alu_result[1:0] == 2'b01) ? {{16{ms_load_op[3] ? data_sram_rdata[ 7] : 1'b0 }}, data_sram_rdata[ 7:0], 8'b0} : - (ms_alu_result[1:0] == 2'b10) ? {{ 8{ms_load_op[3] ? data_sram_rdata[ 7] : 1'b0 }}, data_sram_rdata[ 7:0], 16'b0} : - { data_sram_rdata[ 7:0], 24'b0}) : - (ms_load_op[1] || ms_load_op[4]) ? ((ms_alu_result[1:0] == 2'b00) ? {{16{ms_load_op[4] ? data_sram_rdata[15] : 1'b0 }}, data_sram_rdata[15:0] } : - { data_sram_rdata[15:0], 16'b0}) : - ms_load_op[2] ? ( data_sram_rdata ) : - 32'b0; + assign data_temp = stall_flag ? data_sram_rdata_r : data_sram_rdata; + assign csr_result = stall_flag ? csr_rdata_r : csr_rdata; - assign ms_final_result = ms_mem_to_reg ? mem_result : - ms_div_op[0] ? div_result : - ms_div_op[1] ? mod_result : - ms_alu_result; + assign {inst_ld_b, + inst_ld_h, + inst_ld_w, + inst_ld_bu, + inst_ld_hu, + inst_ll_w + } = load_op; -endmodule + decoder_2_4 u_decoder_2_4( + .in (es_result[1:0]), + .out(byte_sel ) + ); + + assign ms_result = (inst_ld_b & byte_sel[0]) ? {{24{data_temp[ 7]}}, data_temp[ 7: 0] } : + (inst_ld_b & byte_sel[1]) ? {{16{data_temp[15]}}, data_temp[15: 8], 8'b0} : + (inst_ld_b & byte_sel[2]) ? {{ 8{data_temp[23]}}, data_temp[23:16], 16'b0} : + (inst_ld_b & byte_sel[3]) ? { data_temp[31:24], 24'b0} : + (inst_ld_bu & byte_sel[0]) ? { 24'b0, data_temp[ 7: 0] } : + (inst_ld_bu & byte_sel[1]) ? { 16'b0, data_temp[15: 8], 8'b0} : + (inst_ld_bu & byte_sel[2]) ? { 8'b0, data_temp[23:16], 16'b0} : + (inst_ld_bu & byte_sel[3]) ? { data_temp[31:24], 24'b0} : + (inst_ld_h & byte_sel[0]) ? {{16{data_temp[15]}}, data_temp[15: 0] } : + (inst_ld_h & byte_sel[2]) ? { data_temp[15: 0], 16'b0} : + (inst_ld_hu & byte_sel[0]) ? { 16'b0, data_temp[15: 0] } : + (inst_ld_hu & byte_sel[2]) ? { data_temp[15: 0], 16'b0} : + (inst_ld_w & byte_sel[0]) ? data_temp : + 32'b0; // inst_ll ? + + assign {csr_we, + csr_wdata_sel, + csr_op, + csr_addr, + csr_wdata + } = csr_bus; + + csr u_csr( + .clk (clk ), + .reset (reset ), + .stall (stall[3]&stall[4]), + .pc (ms_pc ), + .csr_we (csr_we ), + .csr_op (csr_op ), + .csr_addr (csr_addr ), + .csr_wdata_sel (csr_wdata_sel ), + .csr_wdata (csr_wdata ), + .csr_rdata (csr_rdata ), + .except_en (except_en ), + .new_pc (new_pc ) + ); + + assign ms_final_result = (|load_op) ? ms_result : + (|csr_op) ? csr_result : + es_result; + +endmodule \ No newline at end of file diff --git a/lacpu/rtl/cpu/mul.v b/lacpu/rtl/cpu/mul.v new file mode 100644 index 0000000..984150f --- /dev/null +++ b/lacpu/rtl/cpu/mul.v @@ -0,0 +1,47 @@ +module mul( + input clk, + input reset, + output stallreq, + input in_valid, + output out_valid, + + input [31:0] a, + input [31:0] b, + + output reg [31:0] result_h, + output reg [31:0] result_l +); + reg [ 5:0] cnt; + wire [31:0] add_result; + wire carry; + always @ (posedge clk) begin + if (reset) begin + cnt <= 0; + end + else if (cnt != 0) begin + cnt <= cnt - 1; + end + else if (in_valid) begin + cnt <= 32; + end + end + + assign {carry, add_result} = result_h + (result_l[0] ? a : 0); + + always @ (posedge clk) begin + if (reset) begin + result_h <= 0; + result_l <= 0; + end + else if (cnt != 0) begin + {result_h, result_l} <= {carry, add_result, result_l[31:1]}; + end + else if (in_valid) begin + result_h <= 0; + result_l <= b; + end + end + + assign out_valid = (cnt==0); + assign stallreq = in_valid | (~(cnt==0)); +endmodule \ No newline at end of file diff --git a/lacpu/rtl/cpu/mul_div_lock.v b/lacpu/rtl/cpu/mul_div_lock.v new file mode 100644 index 0000000..6c392a0 --- /dev/null +++ b/lacpu/rtl/cpu/mul_div_lock.v @@ -0,0 +1,66 @@ +module mul_div_lock ( + input clk, + input reset, + input [ 5:0] stall, + input [31:0] a, + input [31:0] b, + input mul_en, + input div_en, + input stallreq_for_mul, + input stallreq_for_div, + + output [31:0] a_locked, + output [31:0] b_locked, + output mul_en_locked, + output div_en_locked +); + reg first_enable; + reg mul_en_musk; + reg div_en_musk; + reg [31:0] a_buffer; + reg [31:0] b_buffer; + + wire stallreq = stallreq_for_mul | stallreq_for_div; + + assign mul_en_locked = mul_en & mul_en_musk; + assign div_en_locked = div_en & div_en_musk; + + assign a_locked = first_enable ? a : a_buffer; + assign b_locked = first_enable ? b : b_buffer; + + always @ (posedge clk) begin + if (reset) begin + a_buffer <= 0; + b_buffer <= 0; + mul_en_musk <= 1; + div_en_musk <= 1; + + first_enable <= 1; + end + else if (mul_en & first_enable) begin + a_buffer <= a; + b_buffer <= b; + mul_en_musk <= 0; + div_en_musk <= 1; + + first_enable <= 0; + end + else if (div_en & first_enable) begin + a_buffer <= a; + b_buffer <= b; + mul_en_musk <= 1; + div_en_musk <= 0; + + first_enable <= 0; + end + else if (!stallreq & (mul_en|div_en) & !first_enable & !stall[2]) begin + a_buffer <= 0; + b_buffer <= 0; + mul_en_musk <= 1; + div_en_musk <= 1; + + first_enable <= 1; + end + + end +endmodule \ No newline at end of file diff --git a/lacpu/rtl/cpu/mul_div_top.v b/lacpu/rtl/cpu/mul_div_top.v new file mode 100644 index 0000000..3a7ce4c --- /dev/null +++ b/lacpu/rtl/cpu/mul_div_top.v @@ -0,0 +1,87 @@ +module mul_div_top( + input clk, + input reset, + input [ 5:0] stall, + output stallreq, + + input [ 3:0] mul_div_op, + input mul_div_sign, + + input [31:0] a, + input [31:0] b, + + output [63:0] mul_div_result +); + wire stallreq_for_mul; + wire stallreq_for_div; + wire sign_flag; + wire [31:0] src_a; + wire [31:0] src_b; + wire [31:0] result_h; + wire [31:0] result_l; + wire [31:0] quotient; + wire [31:0] remainder; + + wire mul_en; + wire div_en; + + wire [31:0] a_locked; + wire [31:0] b_locked; + wire mul_en_locked; + wire div_en_locked; + + assign mul_en = mul_div_op[0] | mul_div_op[1]; + assign div_en = mul_div_op[2] | mul_div_op[3]; + + assign sign_flag = a[31] ^ b[31]; + assign src_a = (mul_div_sign || a[31]) ? ({1'b0, ~a[30:0] + 1'b0}) : a; + assign src_b = (mul_div_sign || b[31]) ? ({1'b0, ~b[30:0] + 1'b0}) : b; + + mul_div_lock u_mul_div_lock( + .clk (clk ), + .reset (reset ), + .stall (stall ), + .a (src_a ), + .b (src_b ), + .mul_en (mul_en ), + .div_en (div_en ), + .stallreq_for_mul (stallreq_for_mul ), + .stallreq_for_div (stallreq_for_div ), + .a_locked (a_locked ), + .b_locked (b_locked ), + .mul_en_locked (mul_en_locked ), + .div_en_locked (div_en_locked ) + ); + + mul u_mul( + .clk (clk ), + .reset (reset ), + .stallreq (stallreq_for_mul), + .in_valid (mul_en_locked ), + .out_valid (), + .a (a_locked ), + .b (b_locked ), + .result_h (result_h ), + .result_l (result_l ) + ); + + div u_div( + .clk (clk ), + .reset (reset ), + .stallreq (stallreq_for_div), + .in_valid (div_en_locked ), + .out_valid (), + .a (a_locked ), + .b (b_locked ), + .quotient (quotient ), + .remainder (remainder ) + ); + + assign stallreq = stallreq_for_mul | stallreq_for_div; + assign mul_div_result = mul_div_op[0] ? result_l : + mul_div_op[1] ? result_h : + mul_div_op[2] ? quotient : + mul_div_op[3] ? remainder : + 32'b0; + +endmodule \ No newline at end of file diff --git a/lacpu/rtl/cpu/mycpu.v b/lacpu/rtl/cpu/mycpu.v new file mode 100644 index 0000000..f5c144e --- /dev/null +++ b/lacpu/rtl/cpu/mycpu.v @@ -0,0 +1,148 @@ +module mycpu +#( + parameter FS_TO_DS_BUS_WD = 32, + parameter DS_TO_ES_BUS_WD = 206, + parameter ES_TO_MS_BUS_WD = 175, + parameter MS_TO_WS_BUS_WD = 102, + parameter WS_TO_RF_BUS_WD = 38, + + parameter MS_TO_ES_BUS_WD = 32, + parameter WS_TO_ES_BUS_WD = 32, + parameter BR_BUS_WD = 33 + +) +( + input clk, + input resetn, + output timer_int, + + // inst sram interface + output inst_sram_en, + output [ 3:0] inst_sram_we, + output [31:0] inst_sram_addr, + output [31:0] inst_sram_wdata, + input [31:0] inst_sram_rdata, + // data sram interface + output data_sram_en, + output [ 3:0] data_sram_we, + output [31:0] data_sram_addr, + output [31:0] data_sram_wdata, + input [31:0] data_sram_rdata, + // trace debug interface + output [31:0] debug_wb_pc, + output [ 3:0] debug_wb_rf_we, + output [ 4:0] debug_wb_rf_wnum, + output [31:0] debug_wb_rf_wdata +); + + reg reset; + always @(posedge clk) reset <= ~resetn; + + wire [FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus; + wire [DS_TO_ES_BUS_WD -1:0] ds_to_es_bus; + wire [ES_TO_MS_BUS_WD -1:0] es_to_ms_bus; + wire [MS_TO_WS_BUS_WD -1:0] ms_to_ws_bus; + wire [WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus; + + wire [BR_BUS_WD -1:0] br_bus; + + wire flush; + wire stallreq_es; + wire stallreq_id; + wire [ 5:0] stall; + wire except_en; + wire [31:0] new_pc; + wire [31:0] csr_vec_h; + + if_stage if_stage( + .clk (clk ), + .reset (reset ), + .flush (flush ), + .stall (stall ), + .new_pc (new_pc ), + .timer_int (timer_int ), + .csr_vec_h (csr_vec_h ), + .fs_to_ds_bus (fs_to_ds_bus ), + .br_bus (br_bus ), + .inst_sram_en (inst_sram_en ), + .inst_sram_we (inst_sram_we ), + .inst_sram_addr (inst_sram_addr ), + .inst_sram_wdata (inst_sram_wdata ) + ); + + id_stage id_stage( + .clk (clk ), + .reset (reset ), + .flush (flush ), + .stall (stall ), + .br_taken (br_bus[32] ), + .stallreq_id (stallreq_id ), + .fs_to_ds_bus (fs_to_ds_bus ), + .pc_valid (inst_sram_en ), + .inst_sram_rdata (inst_sram_rdata ), + .csr_vec_h (csr_vec_h ), + .ws_to_rf_bus (ws_to_rf_bus ), + .ds_to_es_bus (ds_to_es_bus ) + ); + + exe_stage exe_stage( + .clk (clk ), + .reset (reset ), + .flush (flush ), + .stall (stall ), + .stallreq_es (stallreq_es ), + .ds_to_es_bus (ds_to_es_bus ), + .es_to_ms_bus (es_to_ms_bus ), + .ms_to_es_bus (ms_to_es_bus ), + .ws_to_es_bus (ws_to_es_bus ), + + .br_bus (br_bus ), + + .data_sram_en (data_sram_en ), + .data_sram_we (data_sram_we ), + .data_sram_addr (data_sram_addr ), + .data_sram_wdata (data_sram_wdata ) + ); + + mem_stage mem_stage( + .clk (clk ), + .reset (reset ), + .flush (flush ), + .stall (stall ), + .except_en (except_en ), + .new_pc (new_pc ), + + .es_to_ms_bus (es_to_ms_bus ), + .ms_to_es_bus (ms_to_es_bus ), + .ms_to_ws_bus (ms_to_ws_bus ), + + .data_sram_rdata (data_sram_rdata ) + ); + + wb_stage wb_stage( + .clk (clk ), + .reset (reset ), + .flush (flush ), + .stall (stall ), + + .ms_to_ws_bus (ms_to_ws_bus ), + .ws_to_rf_bus (ws_to_rf_bus ), + .ws_to_es_bus (ws_to_es_bus ), + + .debug_wb_pc (debug_wb_pc ), + .debug_wb_rf_we (debug_wb_rf_we ), + .debug_wb_rf_wnum (debug_wb_rf_wnum ), + .debug_wb_rf_wdata (debug_wb_rf_wdata) + ); + + pip_ctrl pip_ctrl( + .reset (reset ), + .except_en (except_en ), + .stallreq_ds (stallreq_ds ), + .stallreq_es (stallreq_es ), + .stallreq_axi (1'b0 ), // TODO! + .flush (flush ), + .stall (stall ) + ); + +endmodule \ No newline at end of file diff --git a/lacpu/rtl/cpu/mycpu.vh b/lacpu/rtl/cpu/mycpu.vh deleted file mode 100644 index 4a9d553..0000000 --- a/lacpu/rtl/cpu/mycpu.vh +++ /dev/null @@ -1,17 +0,0 @@ -`ifndef MYCPU_VH - `define MYCPU_VH - - `define BR_BUS_WD 33 - `define FS_TO_DS_BUS_WD 64 - `define DS_TO_ES_BUS_WD 160 - `define ES_TO_MS_BUS_WD 78 - `define MS_TO_WS_BUS_WD 70 - `define WS_TO_RF_BUS_WD 38 - - `define DS_TO_FW_BUS_WD 10 - `define ES_TO_FW_BUS_WD 12 - `define MS_TO_FW_BUS_WD 6 - `define FW_TO_ES_BUS_WD 5 - `define MS_TO_ES_BUS_WD 32 - `define WS_TO_ES_BUS_WD 32 -`endif diff --git a/lacpu/rtl/cpu/mycpu_top.v b/lacpu/rtl/cpu/mycpu_top.v deleted file mode 100644 index 36ea11d..0000000 --- a/lacpu/rtl/cpu/mycpu_top.v +++ /dev/null @@ -1,196 +0,0 @@ -`include "mycpu.vh" - -module mycpu_top( - input clk, - input resetn, - // inst sram interface - output inst_sram_en, - output [ 3:0] inst_sram_we, - output [31:0] inst_sram_addr, - output [31:0] inst_sram_wdata, - input [31:0] inst_sram_rdata, - // data sram interface - output data_sram_en, - output [ 3:0] data_sram_we, - output [31:0] data_sram_addr, - output [31:0] data_sram_wdata, - input [31:0] data_sram_rdata, - // trace debug interface - output [31:0] debug_wb_pc, - output [ 3:0] debug_wb_rf_we, - output [ 4:0] debug_wb_rf_wnum, - output [31:0] debug_wb_rf_wdata -); - reg reset; - always @(posedge clk) reset <= ~resetn; - - wire ds_allowin; - wire es_allowin; - wire ms_allowin; - wire ws_allowin; - wire fs_to_ds_valid; - wire ds_to_es_valid; - wire es_to_ms_valid; - wire ms_to_ws_valid; - wire [`FS_TO_DS_BUS_WD -1:0] fs_to_ds_bus; - wire [`DS_TO_ES_BUS_WD -1:0] ds_to_es_bus; - wire [`ES_TO_MS_BUS_WD -1:0] es_to_ms_bus; - wire [`MS_TO_WS_BUS_WD -1:0] ms_to_ws_bus; - wire [`WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus; - wire [`BR_BUS_WD -1:0] br_bus; - wire [`DS_TO_FW_BUS_WD -1:0] ds_to_fw_bus; - wire [`ES_TO_FW_BUS_WD -1:0] es_to_fw_bus; - wire [`MS_TO_FW_BUS_WD -1:0] ms_to_fw_bus; - wire [`FW_TO_ES_BUS_WD -1:0] fw_to_es_bus; - wire [`MS_TO_ES_BUS_WD -1:0] ms_to_es_bus; - wire [`WS_TO_ES_BUS_WD -1:0] ws_to_es_bus; - - wire es_div_enable; - wire es_div_sign; - wire [31:0] es_rf_rdata1; - wire [31:0] es_rf_rdata2; - wire div_complete; - wire [31:0] div_result; - wire [31:0] mod_result; - - - // IF stage - if_stage if_stage( - .clk (clk ), - .reset (reset ), - //allowin - .ds_allowin (ds_allowin ), - //brbus - .br_bus (br_bus ), - //outputs - .fs_to_ds_valid (fs_to_ds_valid ), - .fs_to_ds_bus (fs_to_ds_bus ), - // inst sram interface - .inst_sram_en (inst_sram_en ), - .inst_sram_wen (inst_sram_we ), - .inst_sram_addr (inst_sram_addr ), - .inst_sram_wdata(inst_sram_wdata), - .inst_sram_rdata(inst_sram_rdata) - ); - // ID stage - id_stage id_stage( - .clk (clk ), - .reset (reset ), - //allowin - .es_allowin (es_allowin ), - .ds_allowin (ds_allowin ), - //from fs - .fs_to_ds_valid (fs_to_ds_valid ), - .fs_to_ds_bus (fs_to_ds_bus ), - //to es - .ds_to_es_valid (ds_to_es_valid ), - .ds_to_es_bus (ds_to_es_bus ), - //to rf: for write back - .ws_to_rf_bus (ws_to_rf_bus ), - //to fw - .ds_to_fw_bus (ds_to_fw_bus ), - //to fs - .br_bus (br_bus ) - ); - // EXE stage - exe_stage exe_stage( - .clk (clk ), - .reset (reset ), - //allowin - .ms_allowin (ms_allowin ), - .es_allowin (es_allowin ), - //from ds - .ds_to_es_valid (ds_to_es_valid ), - .ds_to_es_bus (ds_to_es_bus ), - //to ms - .es_to_ms_valid (es_to_ms_valid ), - .es_to_ms_bus (es_to_ms_bus ), - //from fw - .fw_to_es_bus (fw_to_es_bus ), - //to fw - .es_to_fw_bus (es_to_fw_bus ), - //from ms - .ms_to_ds_bus (ms_to_es_bus ), - //from ws - .ws_to_ds_bus (ws_to_es_bus ), - // data sram interface - .data_sram_en (data_sram_en ), - .data_sram_wen (data_sram_we ), - .data_sram_addr (data_sram_addr ), - .data_sram_wdata(data_sram_wdata), - // div - .es_div_enable (es_div_enable) , - .es_div_sign (es_div_sign) , - .es_rf_rdata1 (es_rf_rdata1) , - .es_rf_rdata2 (es_rf_rdata2) , - .div_complete (div_complete) - ); - // div - div u_div( - .div_clk (clk ), - .reset (reset ), - .div (es_div_enable ), - .div_signed (es_div_sign ), - .x (es_rf_rdata1 ), - .y (es_rf_rdata2 ), - .s (div_result ), - .r (mod_result ), - .complete (div_complete ) - ); - - // MEM stage - mem_stage mem_stage( - .clk (clk ), - .reset (reset ), - //allowin - .ws_allowin (ws_allowin ), - .ms_allowin (ms_allowin ), - //from es - .es_to_ms_valid (es_to_ms_valid ), - .es_to_ms_bus (es_to_ms_bus ), - //to ws - .ms_to_ws_valid (ms_to_ws_valid ), - .ms_to_ws_bus (ms_to_ws_bus ), - //from data-sram - .data_sram_rdata(data_sram_rdata), - //to fw - .ms_to_fw_bus (ms_to_fw_bus ), - //to es - .ms_to_es_bus (ms_to_es_bus ), - //div - .div_result (div_result ), - .mod_result (mod_result ) - ); - - - // WB stage - wb_stage wb_stage( - .clk (clk ), - .reset (reset ), - //allowin - .ws_allowin (ws_allowin ), - //from ms - .ms_to_ws_valid (ms_to_ws_valid ), - .ms_to_ws_bus (ms_to_ws_bus ), - //to rf: for write back - .ws_to_rf_bus (ws_to_rf_bus ), - //to es - .ws_to_es_bus (ws_to_es_bus ), - //trace debug interface - .debug_wb_pc (debug_wb_pc ), - .debug_wb_rf_wen (debug_wb_rf_we ), - .debug_wb_rf_wnum (debug_wb_rf_wnum ), - .debug_wb_rf_wdata(debug_wb_rf_wdata) - ); - - // Forwarding - forward forward( - .clk (clk ), - .reset (reset ), - .ds_to_fw_bus (ds_to_fw_bus), - .es_to_fw_bus (es_to_fw_bus), - .ms_to_fw_bus (ms_to_fw_bus), - .fw_to_es_bus (fw_to_es_bus) - ); - -endmodule diff --git a/lacpu/rtl/cpu/pip_ctrl.v b/lacpu/rtl/cpu/pip_ctrl.v new file mode 100644 index 0000000..ece7921 --- /dev/null +++ b/lacpu/rtl/cpu/pip_ctrl.v @@ -0,0 +1,44 @@ +`define StallBus 6 +module pip_ctrl( + input reset, + input except_en, + input stallreq_ds, + input stallreq_es, + input stallreq_axi, + output reg flush, + output reg [`StallBus-1:0] stall +); + //stall[0] --? + //stall[1] --? + //stall[2] --id + //stall[3] + //stall[4] + //stall[5] + always @ (*) begin + if (reset) begin + flush = 0; + stall = `StallBus'b000000; + end + else if (stallreq_axi) begin + flush = 0; + stall = `StallBus'b111111; + end + else if (except_en) begin + flush = 1; + stall = `StallBus'b0; + end + //id段发生暂停,此时id及之前暂停 + else if (stallreq_ds) begin + flush = 0; + stall = `StallBus'b000111; + end + else if (stallreq_es) begin + flush = 0; + stall = `StallBus'b111111; + end + else begin + flush = 0; + stall = `StallBus'b000000; + end + end +endmodule diff --git a/lacpu/rtl/cpu/regfile.v b/lacpu/rtl/cpu/regfile.v index bcb3ae6..e9ad95b 100755 --- a/lacpu/rtl/cpu/regfile.v +++ b/lacpu/rtl/cpu/regfile.v @@ -1,5 +1,6 @@ module regfile( input clk, + input reset, // READ PORT 1 input [ 4:0] raddr1, output [31:0] rdata1, @@ -15,7 +16,43 @@ module regfile( //WRITE always @(posedge clk) begin - if (we) rf[waddr]<= wdata; + if (reset) begin + rf[ 0] <= 32'b0; + rf[ 1] <= 32'b0; + rf[ 2] <= 32'b0; + rf[ 3] <= 32'b0; + rf[ 4] <= 32'b0; + rf[ 5] <= 32'b0; + rf[ 6] <= 32'b0; + rf[ 7] <= 32'b0; + rf[ 8] <= 32'b0; + rf[ 9] <= 32'b0; + rf[10] <= 32'b0; + rf[11] <= 32'b0; + rf[12] <= 32'b0; + rf[13] <= 32'b0; + rf[14] <= 32'b0; + rf[15] <= 32'b0; + rf[16] <= 32'b0; + rf[17] <= 32'b0; + rf[18] <= 32'b0; + rf[19] <= 32'b0; + rf[20] <= 32'b0; + rf[21] <= 32'b0; + rf[22] <= 32'b0; + rf[23] <= 32'b0; + rf[24] <= 32'b0; + rf[25] <= 32'b0; + rf[26] <= 32'b0; + rf[27] <= 32'b0; + rf[28] <= 32'b0; + rf[29] <= 32'b0; + rf[30] <= 32'b0; + rf[31] <= 32'b0; + end + else if (we) begin + rf[waddr]<= wdata; + end end //READ OUT 1 diff --git a/lacpu/rtl/cpu/tools.v b/lacpu/rtl/cpu/tools.v index eaea342..234bf1f 100755 --- a/lacpu/rtl/cpu/tools.v +++ b/lacpu/rtl/cpu/tools.v @@ -1,25 +1,53 @@ -`default_nettype wire +module decoder_2_4( + input [ 1:0] in, + output [ 3:0] out +); + +genvar i; +generate for (i=0; i<4; i=i+1) begin : gen_for_dec_2_4 + assign out[i] = (in == i); +end endgenerate + +endmodule + + +module decoder_4_16( + input [ 3:0] in, + output [15:0] out +); + +genvar i; +generate for (i=0; i<16; i=i+1) begin : gen_for_dec_4_16 + assign out[i] = (in == i); +end endgenerate + +endmodule + + module decoder_5_32( input [ 4:0] in, output [31:0] out ); - genvar i; - generate for (i=0; i<32; i=i+1) begin : gen_for_dec_5_32 - assign out[i] = (in == i); - end endgenerate +genvar i; +generate for (i=0; i<32; i=i+1) begin : gen_for_dec_5_32 + assign out[i] = (in == i); +end endgenerate - endmodule +endmodule -module decoder_3_8( - input [2:0] in, - output [7:0] out +module decoder_6_64( + input [ 5:0] in, + output [63:0] out ); - genvar i; - generate for (i=0; i<8; i=i+1) begin : gen_for_dec_3_8 - assign out[i] = (in == i); - end endgenerate +genvar i; +generate for (i=0; i<64; i=i+1) begin : gen_for_dec_6_64 //bug7 + assign out[i] = (in == i); +end endgenerate + +endmodule + + -endmodule \ No newline at end of file diff --git a/lacpu/rtl/cpu/wb_stage.v b/lacpu/rtl/cpu/wb_stage.v index 6c35192..3173862 100755 --- a/lacpu/rtl/cpu/wb_stage.v +++ b/lacpu/rtl/cpu/wb_stage.v @@ -1,74 +1,68 @@ -`include "mycpu.vh" +module wb_stage +#( + parameter MS_TO_WS_BUS_WD = 102, + parameter WS_TO_RF_BUS_WD = 38, + parameter WS_TO_ES_BUS_WD = 38 +) +( + input clk, + input reset, + input flush, + input [5:0] stall, -module wb_stage( - input clk , - input reset , - //allowin - output ws_allowin , - //from ms - input ms_to_ws_valid, - input [`MS_TO_WS_BUS_WD -1:0] ms_to_ws_bus , - //to rf: for write back - output [`WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus , - //to es - output [`WS_TO_ES_BUS_WD -1:0] ws_to_es_bus , - //trace debug interface - output [31:0] debug_wb_pc , - output [ 3:0] debug_wb_rf_wen , + input [MS_TO_WS_BUS_WD -1:0] ms_to_ws_bus, + output [WS_TO_RF_BUS_WD -1:0] ws_to_rf_bus, + output [WS_TO_ES_BUS_WD -1:0] ws_to_es_bus, + + output [31:0] debug_wb_pc, + output [ 3:0] debug_wb_rf_we, output [ 4:0] debug_wb_rf_wnum, output [31:0] debug_wb_rf_wdata ); + reg [MS_TO_WS_BUS_WD -1:0] ms_to_ws_bus_r; - reg ws_valid; - wire ws_ready_go; - - reg [`MS_TO_WS_BUS_WD -1:0] ms_to_ws_bus_r; - wire ws_gr_we; - wire [ 4:0] ws_dest; - wire [31:0] ws_final_result; + wire reg_we; + wire [ 4:0] dest; + wire [31:0] ms_final_result; wire [31:0] ws_pc; - assign {ws_reg_we , //69:69 - ws_dest , //68:64 - ws_final_result, //63:32 - ws_pc //31:0 - } = ms_to_ws_bus_r; + wire [31:0] inst; - wire rf_we; - wire [4 :0] rf_waddr; - wire [31:0] rf_wdata; - assign ws_to_rf_bus = {rf_we , //37:37 - rf_waddr, //36:32 - rf_wdata //31:0 - }; + assign {reg_we ,//101:101 + dest ,//100:96 + ms_final_result ,//95 :64 + ws_pc ,//63 :32 + inst //31 :0 + } = ms_to_ws_bus_r; - assign ws_to_es_bus = {rf_wdata}; + assign ws_to_rf_bus = {reg_we, + dest, + ms_final_result + }; - assign ws_ready_go = 1'b1; - assign ws_allowin = !ws_valid || ws_ready_go; - always @(posedge clk) begin - if (reset) begin - ws_valid <= 1'b0; - end - else if (ws_allowin) begin - ws_valid <= ms_to_ws_valid; - end - + assign ws_to_es_bus = {reg_we, + dest, + ms_final_result + }; + + always @ (posedge clk) begin if (reset) begin ms_to_ws_bus_r <= 0; end - if (ms_to_ws_valid && ws_allowin) begin + else if (flush) begin + ms_to_ws_bus_r <= 0; + end + else if (stall[4]&(!stall[5])) begin + ms_to_ws_bus_r <= 0; + end + else if (!stall[4]) begin ms_to_ws_bus_r <= ms_to_ws_bus; end end - assign rf_we = ws_reg_we && ws_valid; - assign rf_waddr = ws_dest; - assign rf_wdata = ws_final_result; - // debug info generate assign debug_wb_pc = ws_pc; - assign debug_wb_rf_wen = {4{rf_we}}; - assign debug_wb_rf_wnum = ws_dest; - assign debug_wb_rf_wdata = ws_final_result; + assign debug_wb_rf_we = {4{reg_we}}; + assign debug_wb_rf_wnum = ms_final_result; + assign debug_wb_rf_wdata = ms_final_result; -endmodule +endmodule \ No newline at end of file diff --git a/lacpu/rtl/soc_lite_top.v b/lacpu/rtl/soc_lite_top.v index 81e072d..01ca86e 100755 --- a/lacpu/rtl/soc_lite_top.v +++ b/lacpu/rtl/soc_lite_top.v @@ -35,7 +35,7 @@ module soc_lite_top wire [31:0] cpu_data_rdata; //cpu - mycpu_top cpu( + mycpu mycpu( .clk (cpu_clk ), .resetn (cpu_resetn), //low active diff --git a/lacpu/run_vivado/la32r/la32r.xpr b/lacpu/run_vivado/la32r/la32r.xpr index 770218a..e1acad0 100644 --- a/lacpu/run_vivado/la32r/la32r.xpr +++ b/lacpu/run_vivado/la32r/la32r.xpr @@ -29,7 +29,7 @@