diff --git a/.gitignore b/.gitignore index 66790eb..44e1c46 100644 --- a/.gitignore +++ b/.gitignore @@ -7,12 +7,8 @@ ext/ /lacpu/run_vivado/la32r/* /lacpu/rtl/xilinx_ip/inst_ram/* /lacpu/rtl/xilinx_ip/data_ram/* -/lacpu/rtl/xilinx_ip/div/* -/lacpu/rtl/xilinx_ip/divu/* vivado.jou vivado.log !/lacpu/run_vivado/la32r/la32r.xpr !/lacpu/rtl/xilinx_ip/inst_ram/inst_ram.xci !/lacpu/rtl/xilinx_ip/data_ram/data_ram.xci -!/lacpu/rtl/xilinx_ip/div/div.xci -!/lacpu/rtl/xilinx_ip/divu/divu.xci diff --git a/lacpu/rtl/cpu/cpu_top.v b/lacpu/rtl/cpu/cpu_top.v index abb3db6..420ea16 100755 --- a/lacpu/rtl/cpu/cpu_top.v +++ b/lacpu/rtl/cpu/cpu_top.v @@ -48,6 +48,14 @@ module mycpu_top( wire [`ES_TO_LU_BUS_WD -1:0] es_to_lu_bus; wire lu_to_es_bus; + wire es_div_enable; + wire es_div_sign; + wire [31:0] es_rf_rdata1; + wire [31:0] es_rf_rdata2; + wire div_complete; + wire [31:0] div_result; + wire [31:0] mod_result; + // IF stage if_stage if_stage( @@ -116,8 +124,27 @@ module mycpu_top( .data_sram_en (data_sram_en ), .data_sram_wen (data_sram_wen ), .data_sram_addr (data_sram_addr ), - .data_sram_wdata(data_sram_wdata) + .data_sram_wdata(data_sram_wdata), + // div + .es_div_enable (es_div_enable) , + .es_div_sign (es_div_sign) , + .es_rf_rdata1 (es_rf_rdata1) , + .es_rf_rdata2 (es_rf_rdata2) , + .div_complete (div_complete) ); + // div + div u_div( + .div_clk (clk ), + .reset (reset ), + .div (es_div_enable ), + .div_signed (es_div_sign ), + .x (es_rf_rdata1 ), + .y (es_rf_rdata2 ), + .s (div_result ), + .r (mod_result ), + .complete (div_complete ) + ); + // MEM stage mem_stage mem_stage( .clk (clk ), @@ -138,8 +165,13 @@ module mycpu_top( //to fw .ms_to_fw_bus (ms_to_fw_bus ), //to es - .ms_to_es_bus (ms_to_es_bus ) + .ms_to_es_bus (ms_to_es_bus ), + //div + .div_result (div_result ), + .mod_result (mod_result ) ); + + // WB stage wb_stage wb_stage( .clk (clk ), diff --git a/lacpu/rtl/cpu/div.v b/lacpu/rtl/cpu/div.v index e69de29..8bd608c 100644 --- a/lacpu/rtl/cpu/div.v +++ b/lacpu/rtl/cpu/div.v @@ -0,0 +1,93 @@ +//x/y //执行需要34个周期 +module div( + input div_clk, reset, + input div, + input div_signed, + input [31:0] x, y, + output [31:0] s, r, + output complete + ); + +reg [32:0] UnsignS; +reg [32:0] UnsignR; +reg [32:0] tmp_r; +reg [7:0] count; +wire [32:0] tmp_d; +wire [32:0] result_r; +wire [32:0] UnsignX, UnsignY; + +reg div_signed_buffer; +reg x_31_buffer; +reg y_31_buffer; +wire real_div_signed; +wire real_x_31; +wire real_y_31; +wire complete_delay; +wire real_complete; + +assign complete_delay = (count == 8'hf0); +assign real_complete = complete_delay || complete; + +always @(posedge div_clk) begin + if (reset) begin + div_signed_buffer <= 1'b0; + x_31_buffer <= 1'b0; + y_31_buffer <= 1'b0; + end + else if (div) begin + div_signed_buffer <= div_signed; //when div inst go to ms, div_signed will be changed. so buffer it. + x_31_buffer <= x[31]; + y_31_buffer <= y[31]; + end +end + +assign real_div_signed = real_complete ? div_signed_buffer : div_signed; +assign real_x_31 = real_complete ? x_31_buffer : x[31]; +assign real_y_31 = real_complete ? y_31_buffer : y[31]; + +assign UnsignX = {1'b0, (real_div_signed ? (x[31] ? (~x + 1) : x) : x)}; //取绝对值并扩展至33位 +assign UnsignY = {1'b0, (real_div_signed ? (y[31] ? (~y + 1) : y) : y)}; + +always @(posedge div_clk) begin //33位除法计算 + if (reset || ~div || complete_delay) begin + count <= 8'd32; //计算33次 + tmp_r <= 33'b0; + end + else if (~(count[7])) begin + if (tmp_d[32]) begin //tmp_d为负数 + UnsignS <= {UnsignS[31:0], 1'b0}; + tmp_r <= result_r; + end + else begin + UnsignS <= {UnsignS[31:0], 1'b1}; + tmp_r <= tmp_d; + end + count <= count - 8'd1; + end + else begin + UnsignR <= tmp_r; + count <= 8'hf0; //complete signal only maintain one clock + end + +end + +assign complete = (count == 8'hff);//chenji + +assign result_r = {tmp_r[31:0], UnsignX[count]}; +assign tmp_d = result_r - UnsignY; + +wire [32:0] TmpS, TmpR; +assign TmpS = (real_div_signed ? ((real_x_31 == real_y_31) ? UnsignS : ~(UnsignS - 1)) : UnsignS); //去绝对值并截位 +assign TmpR = (real_div_signed ? (real_x_31 ? ~(UnsignR - 1) : UnsignR) : UnsignR); + +assign s = TmpS[31:0]; +assign r = TmpR[31:0]; + +endmodule + +//表达式的符号关系 +//x[31] y[31] s[31] r[31] +// 0 0 0 0 +// 0 1 1 0 +// 1 0 1 1 +// 1 1 0 1 diff --git a/lacpu/rtl/cpu/exe_stage.v b/lacpu/rtl/cpu/exe_stage.v index 48630df..be52128 100755 --- a/lacpu/rtl/cpu/exe_stage.v +++ b/lacpu/rtl/cpu/exe_stage.v @@ -17,18 +17,22 @@ module exe_stage( output [ 3:0] data_sram_wen , output [31:0] data_sram_addr , output [31:0] data_sram_wdata, - //to fw + //fw output [`ES_TO_FW_BUS_WD -1:0] es_to_fw_bus , - //from fw input [`FW_TO_ES_BUS_WD -1:0] fw_to_es_bus , //from ms input [`MS_TO_ES_BUS_WD -1:0] ms_to_ds_bus , //from ws input [`WS_TO_ES_BUS_WD -1:0] ws_to_ds_bus , - //to lu + //lu output [`ES_TO_LU_BUS_WD -1:0] es_to_lu_bus , - //from lu - input lu_to_es_bus + input lu_to_es_bus , + //div_mul + output es_div_enable , + output es_div_sign , + output [31:0] es_rf_rdata1 , + output [31:0] es_rf_rdata2 , + input div_complete ); reg es_valid ; @@ -47,8 +51,6 @@ module exe_stage( wire [ 8:0] es_branch_op; wire [ 4:0] es_dest; wire [31:0] es_imm; - wire [31:0] es_rf_rdata1; - wire [31:0] es_rf_rdata2; wire [31:0] es_pc; wire [31:0] ms_alu_result; @@ -99,47 +101,19 @@ module exe_stage( wire es_Sign ; wire es_Overflow ; wire es_Zero ; - wire [31:0] es_result ; - - reg div_divisor_valid_r; - reg div_divisor_ready_flag; - reg div_dividend_valid_r; - reg div_dividend_ready_flag; - reg divu_divisor_valid_r; - reg divu_divisor_ready_flag; - reg divu_dividend_valid_r; - reg divu_dividend_ready_flag; wire es_inst_divw ; wire es_inst_modw ; wire es_inst_divwu; wire es_inst_modwu; - wire is_div_mod; + wire es_inst_mulw; + wire es_inst_mulhw; + wire es_inst_mulhwu; + wire [ 1:0] div_op; + wire div_stall; - wire [31:0] div_mod_result; - - //div - wire [31:0] div_divisor_data; - wire div_divisor_valid; - wire div_divisor_ready; - wire [31:0] div_dividend_data; - wire div_dividend_valid; - wire div_dividend_ready; - wire div_dout_valid; - wire [63:0] div_dout_data; - //divu - wire [31:0] divu_divisor_data; - wire divu_divisor_valid; - wire divu_divisor_ready; - wire [31:0] divu_dividend_data; - wire divu_dividend_valid; - wire divu_dividend_ready; - wire divu_dout_valid; - wire [63:0] divu_dout_data; - - assign es_result = is_div_mod ? div_mod_result : es_alu_result; - - assign es_to_ms_bus = {br_target , //120:89 + assign es_to_ms_bus = {div_op , //122:121 + br_target , //120:89 es_branch_op , //88 :80 es_Carry , //79 :79 es_Sign , //78 :78 @@ -149,7 +123,7 @@ module exe_stage( es_mem_to_reg , //70 :70 es_reg_we , //69 :69 es_dest , //68 :64 - es_result , //63 :32 + es_alu_result , //63 :32 es_pc //31 :0 }; @@ -161,7 +135,7 @@ module exe_stage( assign es_to_lu_bus = {es_dest, es_load_op}; - assign es_ready_go = (is_div_mod && !(div_dout_valid || divu_dout_valid)) || loaduse_r ? 1'b1 : 1'b0; + assign es_ready_go = div_stall || loaduse_r; assign es_allowin = !es_valid || es_ready_go && ms_allowin; assign es_to_ms_valid = es_valid && es_ready_go; always @(posedge clk) begin @@ -199,118 +173,19 @@ module exe_stage( es_src2_is_ms_dest ? ws_rf_wdata : es_rf_rdata2; - assign is_div_mod = es_inst_divw | es_inst_modw | es_inst_divwu | es_inst_modwu; assign es_inst_divw = es_alu_op[15]; assign es_inst_modw = es_alu_op[16]; assign es_inst_divwu = es_alu_op[17]; assign es_inst_modwu = es_alu_op[18]; - - assign div_divisor_data = es_alu_src1; - assign div_dividend_data = es_alu_src2; - assign divu_divisor_data = es_alu_src1; - assign divu_dividend_data = es_alu_src2; - always @(posedge clk) begin - if(reset) begin - div_divisor_valid_r <= 1'b0; - div_divisor_ready_flag <= 1'b0; - end - else if(div_divisor_valid_r && div_divisor_ready) begin - div_divisor_valid_r <= 1'b0; - div_divisor_ready_flag <= 1'b1; - end - else if((es_inst_divw || es_inst_modw) && !div_divisor_ready_flag) begin - div_divisor_valid_r <= 1'b1; - end - else if(es_ready_go) begin - div_divisor_ready_flag <= 1'b0; - end - end - always @(posedge clk) begin - if(reset) begin - div_dividend_valid_r <= 1'b0; - div_dividend_ready_flag <= 1'b0; - end - else if(div_dividend_valid_r && div_dividend_ready) begin - div_dividend_valid_r <= 1'b0; - div_dividend_ready_flag <= 1'b1; - end - else if((es_inst_divw || es_inst_modw) && !div_dividend_ready_flag) begin - div_dividend_valid_r <= 1'b1; - end - else if(es_ready_go) begin - div_dividend_ready_flag <= 1'b0; - end - end - always @(posedge clk) begin - if(reset) begin - divu_divisor_valid_r <= 1'b0; - divu_divisor_ready_flag <= 1'b0; - end - else if(divu_divisor_valid_r && divu_divisor_ready) begin - divu_divisor_valid_r <= 1'b0; - divu_divisor_ready_flag <= 1'b1; - end - else if((es_inst_divw || es_inst_modw) && !divu_divisor_ready_flag) begin - divu_divisor_valid_r <= 1'b1; - end - else if(es_ready_go) begin - divu_divisor_ready_flag <= 1'b0; - end - end - always @(posedge clk) begin - if(reset) begin - divu_dividend_valid_r <= 1'b0; - divu_dividend_ready_flag <= 1'b0; - end - else if(divu_dividend_valid_r && divu_dividend_ready) begin - divu_dividend_valid_r <= 1'b0; - divu_dividend_ready_flag <= 1'b1; - end - else if((es_inst_divw || es_inst_modw) && !divu_dividend_ready_flag) begin - divu_dividend_valid_r <= 1'b1; - end - else if(es_ready_go) begin - divu_dividend_ready_flag <= 1'b0; - end - end + assign div_op[0] = es_inst_divw | es_inst_divwu; + assign div_op[1] = es_inst_modw | es_inst_modwu; - //div - div div( - .aclk (clk ), - .s_axis_divisor_tdata (div_divisor_data ), - .s_axis_divisor_tvalid (div_divisor_valid ), - .s_axis_divisor_tready (div_divisor_ready ), - .s_axis_dividend_tdata (div_dividend_data ), - .s_axis_dividend_tvalid (div_dividend_valid ), - .s_axis_dividend_tready (div_dividend_ready ), - .m_axis_dout_tvalid (div_dout_valid ), - .m_axis_dout_tdata (div_dout_data ) - ); + assign es_div_enable = (div_op[0] | div_op[1]) & es_valid; - //divu - divu divu( - .aclk (clk ), - .s_axis_divisor_tdata (divu_divisor_data ), - .s_axis_divisor_tvalid (divu_divisor_valid ), - .s_axis_divisor_tready (divu_divisor_ready ), - .s_axis_dividend_tdata (divu_dividend_data ), - .s_axis_dividend_tvalid (divu_dividend_valid), - .s_axis_dividend_tready (divu_dividend_ready), - .m_axis_dout_tvalid (divu_dout_valid ), - .m_axis_dout_tdata (divu_dout_data ) - ); + assign es_div_sign = es_inst_divw | es_inst_modw; - assign div_divisor_valid = div_divisor_valid_r; - assign div_dividend_valid = div_dividend_valid_r; - assign divu_divisor_valid = divu_divisor_valid_r; - assign divu_dividend_valid = divu_dividend_valid_r; - - assign div_mod_result = es_inst_divw ? div_dout_data[63:32] : - es_inst_modw ? div_dout_data[31: 0] : - es_inst_divwu ? divu_dout_data[63:32] : - es_inst_modwu ? divu_dout_data[31: 0] : - 32'b0; + assign div_stall = es_div_enable & ~div_complete; alu u_alu( .alu_op (es_alu_op[14:0]), diff --git a/lacpu/rtl/cpu/mem_stage.v b/lacpu/rtl/cpu/mem_stage.v index 479b872..f1d1e83 100755 --- a/lacpu/rtl/cpu/mem_stage.v +++ b/lacpu/rtl/cpu/mem_stage.v @@ -19,7 +19,10 @@ module mem_stage( //to fw output [`MS_TO_FW_BUS_WD -1:0] ms_to_fw_bus , //to es - output [`MS_TO_ES_BUS_WD -1:0] ms_to_es_bus + output [`MS_TO_ES_BUS_WD -1:0] ms_to_es_bus , + //div mul + input [31:0] div_result , + input [31:0] mod_result ); reg ms_valid; @@ -35,12 +38,14 @@ module mem_stage( wire [ 4:0] ms_dest; wire [31:0] ms_alu_result; wire [31:0] ms_pc; + wire [ 1:0] ms_div_op; wire ms_Carry ; wire ms_Sign ; wire ms_Overflow ; wire ms_Zero ; - assign {br_target , //120:89 + assign {ms_div_op , //122:121 + br_target , //120:89 ms_branch_op , //88 :80 ms_Carry , //79 :79 ms_Sign , //78 :78 @@ -96,8 +101,10 @@ module mem_stage( ms_load_op[2] ? ( data_sram_rdata ) : 32'b0; - assign ms_final_result = ms_mem_to_reg ? mem_result - : ms_alu_result; + assign ms_final_result = ms_mem_to_reg ? mem_result : + ms_div_op[0] ? div_result : + ms_div_op[1] ? mod_result : + ms_alu_result; assign br_taken = ( ms_branch_op[0] && ms_Zero || ms_branch_op[1] && !ms_Zero diff --git a/lacpu/rtl/cpu/mycpu.vh b/lacpu/rtl/cpu/mycpu.vh index 52703ea..05bd54c 100644 --- a/lacpu/rtl/cpu/mycpu.vh +++ b/lacpu/rtl/cpu/mycpu.vh @@ -4,7 +4,7 @@ `define BR_BUS_WD 33 `define FS_TO_DS_BUS_WD 64 `define DS_TO_ES_BUS_WD 174 - `define ES_TO_MS_BUS_WD 121 + `define ES_TO_MS_BUS_WD 123 `define MS_TO_WS_BUS_WD 70 `define WS_TO_RF_BUS_WD 38 diff --git a/lacpu/rtl/xilinx_ip/div/div.xci b/lacpu/rtl/xilinx_ip/div/div.xci deleted file mode 100644 index 56b58f2..0000000 --- a/lacpu/rtl/xilinx_ip/div/div.xci +++ /dev/null @@ -1,161 +0,0 @@ - - - xilinx.com - xci - unknown - 1.0 - - - div - - - - 1000000 - 0 - 0.000 - 0 - - 100000000 - 0 - 0 - 0 - 0 - 0 - undef - 0.000 - 8 - 0 - 0 - 0 - - 100000000 - 0 - 0 - 1 - 0 - 0 - undef - 0.000 - 4 - 0 - 0 - 0 - - 100000000 - 0 - 0 - 1 - 0 - 0 - undef - 0.000 - 4 - 0 - 0 - 0 - 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 37 - 64 - 1 - 32 - 1 - 32 - 1 - 3 - 0 - artix7 - 8 - 32 - 32 - 0 - 32 - 1 - false - false - div - NonBlocking - Performance - Null - false - Radix2 - 8 - false - 32 - false - false - 1 - false - false - 1 - 32 - 32 - 37 - Automatic - Signed - Remainder - artix7 - - - xc7a100t - csg324 - VERILOG - - MIXED - -1 - - - TRUE - TRUE - IP_Flow - 16 - TRUE - . - - . - 2019.2 - OUT_OF_CONTEXT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/lacpu/rtl/xilinx_ip/divu/divu.xci b/lacpu/rtl/xilinx_ip/divu/divu.xci deleted file mode 100644 index c39ebbc..0000000 --- a/lacpu/rtl/xilinx_ip/divu/divu.xci +++ /dev/null @@ -1,155 +0,0 @@ - - - xilinx.com - xci - unknown - 1.0 - - - divu - - - - 1000000 - 0 - 0.000 - 0 - - 100000000 - 0 - 0 - 0 - 0 - 0 - undef - 0.000 - 8 - 0 - 0 - 0 - - 100000000 - 0 - 0 - 1 - 0 - 0 - undef - 0.000 - 4 - 0 - 0 - 0 - - 100000000 - 0 - 0 - 1 - 0 - 0 - undef - 0.000 - 4 - 0 - 0 - 0 - 1 - 0 - 0 - 0 - 0 - 0 - 0 - 0 - 35 - 64 - 1 - 32 - 1 - 32 - 1 - 3 - 0 - artix7 - 8 - 32 - 32 - 0 - 32 - 0 - false - false - divu - NonBlocking - Performance - Null - false - Radix2 - 8 - false - 32 - false - false - 1 - false - false - 1 - 32 - 32 - 35 - Automatic - Unsigned - Remainder - artix7 - - - xc7a100t - csg324 - VERILOG - - MIXED - -1 - - - TRUE - TRUE - IP_Flow - 16 - TRUE - . - - . - 2019.2 - OUT_OF_CONTEXT - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/lacpu/run_vivado/la32r/la32r.xpr b/lacpu/run_vivado/la32r/la32r.xpr index 3afa387..794f7d0 100644 --- a/lacpu/run_vivado/la32r/la32r.xpr +++ b/lacpu/run_vivado/la32r/la32r.xpr @@ -54,20 +54,6 @@ - - - - - - - - - - - - - - @@ -75,14 +61,14 @@ - + - +