diff --git a/.gitignore b/.gitignore
index 66790eb..44e1c46 100644
--- a/.gitignore
+++ b/.gitignore
@@ -7,12 +7,8 @@ ext/
/lacpu/run_vivado/la32r/*
/lacpu/rtl/xilinx_ip/inst_ram/*
/lacpu/rtl/xilinx_ip/data_ram/*
-/lacpu/rtl/xilinx_ip/div/*
-/lacpu/rtl/xilinx_ip/divu/*
vivado.jou
vivado.log
!/lacpu/run_vivado/la32r/la32r.xpr
!/lacpu/rtl/xilinx_ip/inst_ram/inst_ram.xci
!/lacpu/rtl/xilinx_ip/data_ram/data_ram.xci
-!/lacpu/rtl/xilinx_ip/div/div.xci
-!/lacpu/rtl/xilinx_ip/divu/divu.xci
diff --git a/lacpu/rtl/cpu/cpu_top.v b/lacpu/rtl/cpu/cpu_top.v
index abb3db6..420ea16 100755
--- a/lacpu/rtl/cpu/cpu_top.v
+++ b/lacpu/rtl/cpu/cpu_top.v
@@ -48,6 +48,14 @@ module mycpu_top(
wire [`ES_TO_LU_BUS_WD -1:0] es_to_lu_bus;
wire lu_to_es_bus;
+ wire es_div_enable;
+ wire es_div_sign;
+ wire [31:0] es_rf_rdata1;
+ wire [31:0] es_rf_rdata2;
+ wire div_complete;
+ wire [31:0] div_result;
+ wire [31:0] mod_result;
+
// IF stage
if_stage if_stage(
@@ -116,8 +124,27 @@ module mycpu_top(
.data_sram_en (data_sram_en ),
.data_sram_wen (data_sram_wen ),
.data_sram_addr (data_sram_addr ),
- .data_sram_wdata(data_sram_wdata)
+ .data_sram_wdata(data_sram_wdata),
+ // div
+ .es_div_enable (es_div_enable) ,
+ .es_div_sign (es_div_sign) ,
+ .es_rf_rdata1 (es_rf_rdata1) ,
+ .es_rf_rdata2 (es_rf_rdata2) ,
+ .div_complete (div_complete)
);
+ // div
+ div u_div(
+ .div_clk (clk ),
+ .reset (reset ),
+ .div (es_div_enable ),
+ .div_signed (es_div_sign ),
+ .x (es_rf_rdata1 ),
+ .y (es_rf_rdata2 ),
+ .s (div_result ),
+ .r (mod_result ),
+ .complete (div_complete )
+ );
+
// MEM stage
mem_stage mem_stage(
.clk (clk ),
@@ -138,8 +165,13 @@ module mycpu_top(
//to fw
.ms_to_fw_bus (ms_to_fw_bus ),
//to es
- .ms_to_es_bus (ms_to_es_bus )
+ .ms_to_es_bus (ms_to_es_bus ),
+ //div
+ .div_result (div_result ),
+ .mod_result (mod_result )
);
+
+
// WB stage
wb_stage wb_stage(
.clk (clk ),
diff --git a/lacpu/rtl/cpu/div.v b/lacpu/rtl/cpu/div.v
index e69de29..8bd608c 100644
--- a/lacpu/rtl/cpu/div.v
+++ b/lacpu/rtl/cpu/div.v
@@ -0,0 +1,93 @@
+//x/y //执行需要34个周期
+module div(
+ input div_clk, reset,
+ input div,
+ input div_signed,
+ input [31:0] x, y,
+ output [31:0] s, r,
+ output complete
+ );
+
+reg [32:0] UnsignS;
+reg [32:0] UnsignR;
+reg [32:0] tmp_r;
+reg [7:0] count;
+wire [32:0] tmp_d;
+wire [32:0] result_r;
+wire [32:0] UnsignX, UnsignY;
+
+reg div_signed_buffer;
+reg x_31_buffer;
+reg y_31_buffer;
+wire real_div_signed;
+wire real_x_31;
+wire real_y_31;
+wire complete_delay;
+wire real_complete;
+
+assign complete_delay = (count == 8'hf0);
+assign real_complete = complete_delay || complete;
+
+always @(posedge div_clk) begin
+ if (reset) begin
+ div_signed_buffer <= 1'b0;
+ x_31_buffer <= 1'b0;
+ y_31_buffer <= 1'b0;
+ end
+ else if (div) begin
+ div_signed_buffer <= div_signed; //when div inst go to ms, div_signed will be changed. so buffer it.
+ x_31_buffer <= x[31];
+ y_31_buffer <= y[31];
+ end
+end
+
+assign real_div_signed = real_complete ? div_signed_buffer : div_signed;
+assign real_x_31 = real_complete ? x_31_buffer : x[31];
+assign real_y_31 = real_complete ? y_31_buffer : y[31];
+
+assign UnsignX = {1'b0, (real_div_signed ? (x[31] ? (~x + 1) : x) : x)}; //取绝对值并扩展至33位
+assign UnsignY = {1'b0, (real_div_signed ? (y[31] ? (~y + 1) : y) : y)};
+
+always @(posedge div_clk) begin //33位除法计算
+ if (reset || ~div || complete_delay) begin
+ count <= 8'd32; //计算33次
+ tmp_r <= 33'b0;
+ end
+ else if (~(count[7])) begin
+ if (tmp_d[32]) begin //tmp_d为负数
+ UnsignS <= {UnsignS[31:0], 1'b0};
+ tmp_r <= result_r;
+ end
+ else begin
+ UnsignS <= {UnsignS[31:0], 1'b1};
+ tmp_r <= tmp_d;
+ end
+ count <= count - 8'd1;
+ end
+ else begin
+ UnsignR <= tmp_r;
+ count <= 8'hf0; //complete signal only maintain one clock
+ end
+
+end
+
+assign complete = (count == 8'hff);//chenji
+
+assign result_r = {tmp_r[31:0], UnsignX[count]};
+assign tmp_d = result_r - UnsignY;
+
+wire [32:0] TmpS, TmpR;
+assign TmpS = (real_div_signed ? ((real_x_31 == real_y_31) ? UnsignS : ~(UnsignS - 1)) : UnsignS); //去绝对值并截位
+assign TmpR = (real_div_signed ? (real_x_31 ? ~(UnsignR - 1) : UnsignR) : UnsignR);
+
+assign s = TmpS[31:0];
+assign r = TmpR[31:0];
+
+endmodule
+
+//表达式的符号关系
+//x[31] y[31] s[31] r[31]
+// 0 0 0 0
+// 0 1 1 0
+// 1 0 1 1
+// 1 1 0 1
diff --git a/lacpu/rtl/cpu/exe_stage.v b/lacpu/rtl/cpu/exe_stage.v
index 48630df..be52128 100755
--- a/lacpu/rtl/cpu/exe_stage.v
+++ b/lacpu/rtl/cpu/exe_stage.v
@@ -17,18 +17,22 @@ module exe_stage(
output [ 3:0] data_sram_wen ,
output [31:0] data_sram_addr ,
output [31:0] data_sram_wdata,
- //to fw
+ //fw
output [`ES_TO_FW_BUS_WD -1:0] es_to_fw_bus ,
- //from fw
input [`FW_TO_ES_BUS_WD -1:0] fw_to_es_bus ,
//from ms
input [`MS_TO_ES_BUS_WD -1:0] ms_to_ds_bus ,
//from ws
input [`WS_TO_ES_BUS_WD -1:0] ws_to_ds_bus ,
- //to lu
+ //lu
output [`ES_TO_LU_BUS_WD -1:0] es_to_lu_bus ,
- //from lu
- input lu_to_es_bus
+ input lu_to_es_bus ,
+ //div_mul
+ output es_div_enable ,
+ output es_div_sign ,
+ output [31:0] es_rf_rdata1 ,
+ output [31:0] es_rf_rdata2 ,
+ input div_complete
);
reg es_valid ;
@@ -47,8 +51,6 @@ module exe_stage(
wire [ 8:0] es_branch_op;
wire [ 4:0] es_dest;
wire [31:0] es_imm;
- wire [31:0] es_rf_rdata1;
- wire [31:0] es_rf_rdata2;
wire [31:0] es_pc;
wire [31:0] ms_alu_result;
@@ -99,47 +101,19 @@ module exe_stage(
wire es_Sign ;
wire es_Overflow ;
wire es_Zero ;
- wire [31:0] es_result ;
-
- reg div_divisor_valid_r;
- reg div_divisor_ready_flag;
- reg div_dividend_valid_r;
- reg div_dividend_ready_flag;
- reg divu_divisor_valid_r;
- reg divu_divisor_ready_flag;
- reg divu_dividend_valid_r;
- reg divu_dividend_ready_flag;
wire es_inst_divw ;
wire es_inst_modw ;
wire es_inst_divwu;
wire es_inst_modwu;
- wire is_div_mod;
+ wire es_inst_mulw;
+ wire es_inst_mulhw;
+ wire es_inst_mulhwu;
+ wire [ 1:0] div_op;
+ wire div_stall;
- wire [31:0] div_mod_result;
-
- //div
- wire [31:0] div_divisor_data;
- wire div_divisor_valid;
- wire div_divisor_ready;
- wire [31:0] div_dividend_data;
- wire div_dividend_valid;
- wire div_dividend_ready;
- wire div_dout_valid;
- wire [63:0] div_dout_data;
- //divu
- wire [31:0] divu_divisor_data;
- wire divu_divisor_valid;
- wire divu_divisor_ready;
- wire [31:0] divu_dividend_data;
- wire divu_dividend_valid;
- wire divu_dividend_ready;
- wire divu_dout_valid;
- wire [63:0] divu_dout_data;
-
- assign es_result = is_div_mod ? div_mod_result : es_alu_result;
-
- assign es_to_ms_bus = {br_target , //120:89
+ assign es_to_ms_bus = {div_op , //122:121
+ br_target , //120:89
es_branch_op , //88 :80
es_Carry , //79 :79
es_Sign , //78 :78
@@ -149,7 +123,7 @@ module exe_stage(
es_mem_to_reg , //70 :70
es_reg_we , //69 :69
es_dest , //68 :64
- es_result , //63 :32
+ es_alu_result , //63 :32
es_pc //31 :0
};
@@ -161,7 +135,7 @@ module exe_stage(
assign es_to_lu_bus = {es_dest, es_load_op};
- assign es_ready_go = (is_div_mod && !(div_dout_valid || divu_dout_valid)) || loaduse_r ? 1'b1 : 1'b0;
+ assign es_ready_go = div_stall || loaduse_r;
assign es_allowin = !es_valid || es_ready_go && ms_allowin;
assign es_to_ms_valid = es_valid && es_ready_go;
always @(posedge clk) begin
@@ -199,118 +173,19 @@ module exe_stage(
es_src2_is_ms_dest ? ws_rf_wdata :
es_rf_rdata2;
- assign is_div_mod = es_inst_divw | es_inst_modw | es_inst_divwu | es_inst_modwu;
assign es_inst_divw = es_alu_op[15];
assign es_inst_modw = es_alu_op[16];
assign es_inst_divwu = es_alu_op[17];
assign es_inst_modwu = es_alu_op[18];
-
- assign div_divisor_data = es_alu_src1;
- assign div_dividend_data = es_alu_src2;
- assign divu_divisor_data = es_alu_src1;
- assign divu_dividend_data = es_alu_src2;
- always @(posedge clk) begin
- if(reset) begin
- div_divisor_valid_r <= 1'b0;
- div_divisor_ready_flag <= 1'b0;
- end
- else if(div_divisor_valid_r && div_divisor_ready) begin
- div_divisor_valid_r <= 1'b0;
- div_divisor_ready_flag <= 1'b1;
- end
- else if((es_inst_divw || es_inst_modw) && !div_divisor_ready_flag) begin
- div_divisor_valid_r <= 1'b1;
- end
- else if(es_ready_go) begin
- div_divisor_ready_flag <= 1'b0;
- end
- end
- always @(posedge clk) begin
- if(reset) begin
- div_dividend_valid_r <= 1'b0;
- div_dividend_ready_flag <= 1'b0;
- end
- else if(div_dividend_valid_r && div_dividend_ready) begin
- div_dividend_valid_r <= 1'b0;
- div_dividend_ready_flag <= 1'b1;
- end
- else if((es_inst_divw || es_inst_modw) && !div_dividend_ready_flag) begin
- div_dividend_valid_r <= 1'b1;
- end
- else if(es_ready_go) begin
- div_dividend_ready_flag <= 1'b0;
- end
- end
- always @(posedge clk) begin
- if(reset) begin
- divu_divisor_valid_r <= 1'b0;
- divu_divisor_ready_flag <= 1'b0;
- end
- else if(divu_divisor_valid_r && divu_divisor_ready) begin
- divu_divisor_valid_r <= 1'b0;
- divu_divisor_ready_flag <= 1'b1;
- end
- else if((es_inst_divw || es_inst_modw) && !divu_divisor_ready_flag) begin
- divu_divisor_valid_r <= 1'b1;
- end
- else if(es_ready_go) begin
- divu_divisor_ready_flag <= 1'b0;
- end
- end
- always @(posedge clk) begin
- if(reset) begin
- divu_dividend_valid_r <= 1'b0;
- divu_dividend_ready_flag <= 1'b0;
- end
- else if(divu_dividend_valid_r && divu_dividend_ready) begin
- divu_dividend_valid_r <= 1'b0;
- divu_dividend_ready_flag <= 1'b1;
- end
- else if((es_inst_divw || es_inst_modw) && !divu_dividend_ready_flag) begin
- divu_dividend_valid_r <= 1'b1;
- end
- else if(es_ready_go) begin
- divu_dividend_ready_flag <= 1'b0;
- end
- end
+ assign div_op[0] = es_inst_divw | es_inst_divwu;
+ assign div_op[1] = es_inst_modw | es_inst_modwu;
- //div
- div div(
- .aclk (clk ),
- .s_axis_divisor_tdata (div_divisor_data ),
- .s_axis_divisor_tvalid (div_divisor_valid ),
- .s_axis_divisor_tready (div_divisor_ready ),
- .s_axis_dividend_tdata (div_dividend_data ),
- .s_axis_dividend_tvalid (div_dividend_valid ),
- .s_axis_dividend_tready (div_dividend_ready ),
- .m_axis_dout_tvalid (div_dout_valid ),
- .m_axis_dout_tdata (div_dout_data )
- );
+ assign es_div_enable = (div_op[0] | div_op[1]) & es_valid;
- //divu
- divu divu(
- .aclk (clk ),
- .s_axis_divisor_tdata (divu_divisor_data ),
- .s_axis_divisor_tvalid (divu_divisor_valid ),
- .s_axis_divisor_tready (divu_divisor_ready ),
- .s_axis_dividend_tdata (divu_dividend_data ),
- .s_axis_dividend_tvalid (divu_dividend_valid),
- .s_axis_dividend_tready (divu_dividend_ready),
- .m_axis_dout_tvalid (divu_dout_valid ),
- .m_axis_dout_tdata (divu_dout_data )
- );
+ assign es_div_sign = es_inst_divw | es_inst_modw;
- assign div_divisor_valid = div_divisor_valid_r;
- assign div_dividend_valid = div_dividend_valid_r;
- assign divu_divisor_valid = divu_divisor_valid_r;
- assign divu_dividend_valid = divu_dividend_valid_r;
-
- assign div_mod_result = es_inst_divw ? div_dout_data[63:32] :
- es_inst_modw ? div_dout_data[31: 0] :
- es_inst_divwu ? divu_dout_data[63:32] :
- es_inst_modwu ? divu_dout_data[31: 0] :
- 32'b0;
+ assign div_stall = es_div_enable & ~div_complete;
alu u_alu(
.alu_op (es_alu_op[14:0]),
diff --git a/lacpu/rtl/cpu/mem_stage.v b/lacpu/rtl/cpu/mem_stage.v
index 479b872..f1d1e83 100755
--- a/lacpu/rtl/cpu/mem_stage.v
+++ b/lacpu/rtl/cpu/mem_stage.v
@@ -19,7 +19,10 @@ module mem_stage(
//to fw
output [`MS_TO_FW_BUS_WD -1:0] ms_to_fw_bus ,
//to es
- output [`MS_TO_ES_BUS_WD -1:0] ms_to_es_bus
+ output [`MS_TO_ES_BUS_WD -1:0] ms_to_es_bus ,
+ //div mul
+ input [31:0] div_result ,
+ input [31:0] mod_result
);
reg ms_valid;
@@ -35,12 +38,14 @@ module mem_stage(
wire [ 4:0] ms_dest;
wire [31:0] ms_alu_result;
wire [31:0] ms_pc;
+ wire [ 1:0] ms_div_op;
wire ms_Carry ;
wire ms_Sign ;
wire ms_Overflow ;
wire ms_Zero ;
- assign {br_target , //120:89
+ assign {ms_div_op , //122:121
+ br_target , //120:89
ms_branch_op , //88 :80
ms_Carry , //79 :79
ms_Sign , //78 :78
@@ -96,8 +101,10 @@ module mem_stage(
ms_load_op[2] ? ( data_sram_rdata ) :
32'b0;
- assign ms_final_result = ms_mem_to_reg ? mem_result
- : ms_alu_result;
+ assign ms_final_result = ms_mem_to_reg ? mem_result :
+ ms_div_op[0] ? div_result :
+ ms_div_op[1] ? mod_result :
+ ms_alu_result;
assign br_taken = ( ms_branch_op[0] && ms_Zero
|| ms_branch_op[1] && !ms_Zero
diff --git a/lacpu/rtl/cpu/mycpu.vh b/lacpu/rtl/cpu/mycpu.vh
index 52703ea..05bd54c 100644
--- a/lacpu/rtl/cpu/mycpu.vh
+++ b/lacpu/rtl/cpu/mycpu.vh
@@ -4,7 +4,7 @@
`define BR_BUS_WD 33
`define FS_TO_DS_BUS_WD 64
`define DS_TO_ES_BUS_WD 174
- `define ES_TO_MS_BUS_WD 121
+ `define ES_TO_MS_BUS_WD 123
`define MS_TO_WS_BUS_WD 70
`define WS_TO_RF_BUS_WD 38
diff --git a/lacpu/rtl/xilinx_ip/div/div.xci b/lacpu/rtl/xilinx_ip/div/div.xci
deleted file mode 100644
index 56b58f2..0000000
--- a/lacpu/rtl/xilinx_ip/div/div.xci
+++ /dev/null
@@ -1,161 +0,0 @@
-
-
- xilinx.com
- xci
- unknown
- 1.0
-
-
- div
-
-
-
- 1000000
- 0
- 0.000
- 0
-
- 100000000
- 0
- 0
- 0
- 0
- 0
- undef
- 0.000
- 8
- 0
- 0
- 0
-
- 100000000
- 0
- 0
- 1
- 0
- 0
- undef
- 0.000
- 4
- 0
- 0
- 0
-
- 100000000
- 0
- 0
- 1
- 0
- 0
- undef
- 0.000
- 4
- 0
- 0
- 0
- 1
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 37
- 64
- 1
- 32
- 1
- 32
- 1
- 3
- 0
- artix7
- 8
- 32
- 32
- 0
- 32
- 1
- false
- false
- div
- NonBlocking
- Performance
- Null
- false
- Radix2
- 8
- false
- 32
- false
- false
- 1
- false
- false
- 1
- 32
- 32
- 37
- Automatic
- Signed
- Remainder
- artix7
-
-
- xc7a100t
- csg324
- VERILOG
-
- MIXED
- -1
-
-
- TRUE
- TRUE
- IP_Flow
- 16
- TRUE
- .
-
- .
- 2019.2
- OUT_OF_CONTEXT
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/lacpu/rtl/xilinx_ip/divu/divu.xci b/lacpu/rtl/xilinx_ip/divu/divu.xci
deleted file mode 100644
index c39ebbc..0000000
--- a/lacpu/rtl/xilinx_ip/divu/divu.xci
+++ /dev/null
@@ -1,155 +0,0 @@
-
-
- xilinx.com
- xci
- unknown
- 1.0
-
-
- divu
-
-
-
- 1000000
- 0
- 0.000
- 0
-
- 100000000
- 0
- 0
- 0
- 0
- 0
- undef
- 0.000
- 8
- 0
- 0
- 0
-
- 100000000
- 0
- 0
- 1
- 0
- 0
- undef
- 0.000
- 4
- 0
- 0
- 0
-
- 100000000
- 0
- 0
- 1
- 0
- 0
- undef
- 0.000
- 4
- 0
- 0
- 0
- 1
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 35
- 64
- 1
- 32
- 1
- 32
- 1
- 3
- 0
- artix7
- 8
- 32
- 32
- 0
- 32
- 0
- false
- false
- divu
- NonBlocking
- Performance
- Null
- false
- Radix2
- 8
- false
- 32
- false
- false
- 1
- false
- false
- 1
- 32
- 32
- 35
- Automatic
- Unsigned
- Remainder
- artix7
-
-
- xc7a100t
- csg324
- VERILOG
-
- MIXED
- -1
-
-
- TRUE
- TRUE
- IP_Flow
- 16
- TRUE
- .
-
- .
- 2019.2
- OUT_OF_CONTEXT
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/lacpu/run_vivado/la32r/la32r.xpr b/lacpu/run_vivado/la32r/la32r.xpr
index 3afa387..794f7d0 100644
--- a/lacpu/run_vivado/la32r/la32r.xpr
+++ b/lacpu/run_vivado/la32r/la32r.xpr
@@ -54,20 +54,6 @@
-
-
-
-
-
-
-
-
-
-
-
-
-
-
@@ -75,14 +61,14 @@
-
+
-
+