ベクトル命令のデバッグで面倒くさいのは、複数のレジスタへの書き込みが発生したときの対処だ。
LMUL>1の時に、複数のベクトルレジスタの検証が行えるように環境を変更しよう。
void step_spike(long long rtl_time, long long rtl_pc, int rtl_priv, long long rtl_mstatus, int rtl_exception, int rtl_exception_cause, int rtl_cmt_id, int rtl_grp_id, int rtl_insn, int rtl_wr_valid, int rtl_wr_type, int rtl_wr_gpr_addr, int rtl_wr_gpr_rnid, long long rtl_wr_val, const uint8_t* rtl_wr_vec_val0, const uint8_t* rtl_wr_vec_val1, const uint8_t* rtl_wr_vec_val2, const uint8_t* rtl_wr_vec_val3, const uint8_t* rtl_wr_vec_val4, const uint8_t* rtl_wr_vec_val5, const uint8_t* rtl_wr_vec_val6, const uint8_t* rtl_wr_vec_val7)
最大で8つのベクトル・レジスタなので、RTLから8つ分のレジスタの情報を取得し、LMULの数だけ比較する。
} else if (rtl_wr_valid && (iss_wr_type == 2 || rtl_wr_type == 2)) { // VPR write const uint8_t* rtl_wr_vec_val[8] = { rtl_wr_vec_val0, rtl_wr_vec_val1, rtl_wr_vec_val2, rtl_wr_vec_val3, rtl_wr_vec_val4, rtl_wr_vec_val5, rtl_wr_vec_val6, rtl_wr_vec_val7 }; for (size_t lmul = 0; lmul < p->VU.vflmul; lmul++) { bool diff_found = false; for (int b = 0; b < g_rv_vlen / 8; b++) { if (rtl_wr_vec_val[lmul][b] != static_cast<uint8_t *>(p->VU.reg_file)[(rtl_wr_gpr_addr + lmul) * (g_rv_vlen/8) + b]) { diff_found = true; } }
RTL側の実装はこう。8つのベクトル・レジスタをまとめて、比較環境に渡していく。
byte w_physical_vec_data_rnid[scariv_pkg::DISP_SIZE-1: 0][8][riscv_vec_conf_pkg::VLEN_W/8-1: 0]; generate if (riscv_vec_conf_pkg::VLEN_W != 0) begin : vpu for (genvar grp_idx = 0; grp_idx < scariv_pkg::DISP_SIZE; grp_idx++) begin for (genvar lmul_idx = 0; lmul_idx < 8; lmul_idx++) begin for (genvar idx = 0; idx < riscv_vec_conf_pkg::VLEN_W/8; idx++) begin : array_loop assign w_physical_vec_data_rnid[grp_idx][lmul_idx][idx] = w_physical_vec_data[committed_rob_entry.inst[grp_idx].wr_reg.rnid + lmul_idx][idx*8 +: 8]; end end end end endgenerate
step_spike ($time / 4, longint'(committed_rob_entry.inst[grp_idx].pc_addr), int'(u_scariv_subsystem_wrapper.u_scariv_subsystem.u_tile.u_csu.u_scariv_csr.r_priv), u_scariv_subsystem_wrapper.u_scariv_subsystem.u_tile.u_rob.w_sim_mstatus[u_scariv_subsystem_wrapper.u_scariv_subsystem.u_tile.u_rob.w_out_cmt_entry_id][grp_idx], u_scariv_subsystem_wrapper.u_scariv_subsystem.u_tile.u_rob.w_valid_except_grp_id[grp_idx], u_scariv_subsystem_wrapper.u_scariv_subsystem.u_tile.u_rob.w_except_type_selected, u_scariv_subsystem_wrapper.u_scariv_subsystem.u_tile.u_rob.w_out_cmt_id, 1 << grp_idx, committed_rob_entry.inst[grp_idx].rvc_inst_valid ? committed_rob_entry.inst[grp_idx].rvc_inst : committed_rob_entry.inst[grp_idx].inst, committed_rob_entry.inst[grp_idx].wr_reg.valid, committed_rob_entry.inst[grp_idx].wr_reg.typ, committed_rob_entry.inst[grp_idx].wr_reg.regidx, committed_rob_entry.inst[grp_idx].wr_reg.rnid, committed_rob_entry.inst[grp_idx].wr_reg.typ == scariv_pkg::GPR ? w_physical_int_data[committed_rob_entry.inst[grp_idx].wr_reg.rnid] : w_physical_fp_data [committed_rob_entry.inst[grp_idx].wr_reg.rnid], w_physical_vec_data_rnid[grp_idx][0], w_physical_vec_data_rnid[grp_idx][1], w_physical_vec_data_rnid[grp_idx][2], w_physical_vec_data_rnid[grp_idx][3], w_physical_vec_data_rnid[grp_idx][4], w_physical_vec_data_rnid[grp_idx][5], w_physical_vec_data_rnid[grp_idx][6], w_physical_vec_data_rnid[grp_idx][7] );
こんな感じで、ログが出力されるようになる。
246541 : 218 : PC=[0000000080000170] (U,33,01) 02050107 vle8.v v2, (a0) VPR[02](16) <= f50471f1_efe95ed0_caced9af_287d3124_e3c0d409_47cd3b17_7d115150_b866a93d_8133a51e_228fc5c6_a340fa12_4cb8dc1d_f3dccbfa_ddff6231_3ad64ccb_ddb8bebc_ VPR[03](17) <= 2cb424c2_0332619c_e7dac12c_7f8e087c_d9605c57_e4400392_b0fbd100_4aea0272_b7fb9e95_d8f62420_ea875530_9ccef534_dc6de5fa_4dac412d_766c35e5_cc4055f3_
というわけで、問題はvadd.vi
の実装らしい。
246551 : 228 : PC=[0000000080000192] (U,39,01) 02403157 vadd.vi v2, v4, 0 VPR[02](20) <= f50471f1_efe95ed0_caced9af_287d3124_e3c0d409_47cd3b17_7d115150_b866a93d_8133a51e_228fc5c6_a340fa12_4cb8dc1d_f3dccbfa_ddff6231_3ad64ccb_ddb8bebc_ ========================================== Wrong VPR[03](20): ISS[03] = 2cb424c2_0332619c_e7dac12c_7f8e087c_d9605c57_e4400392_b0fbd100_4aea0272_b7fb9e95_d8f62420_ea875530_9ccef534_dc6de5fa_4dac412d_766c35e5_cc4055f3_ RTL[03] = 00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_00000000_ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ~~~~~~~~ ===============================