FPGA開発日記

カテゴリ別記事インデックス https://msyksphinz.github.io/github_pages , English Version https://fpgadevdiary.hatenadiary.com/

Binary Translation型エミュレータを作る(FCVT命令の実装)

Binary Translation型のエミュレータの開発続き。今回は久しぶりに浮動小数点に戻って、FCVT命令を実装する。

FCVT命令は、すべてSoftfloatでの実装となる。FCVT.xx命令をデコードすると、すべてFCVTのヘルパー関数にジャンプし、そこで処理を行うことになる。

という訳で、簡単にTCGを記述してヘルパー関数にジャンプする実装を追加した。

    pub fn translate_fcvt_w_s (&mut self, inst: &InstrInfo) -> Vec<TCGOp> { 
        let mut tcg_lists = vec![];
        let rs1 = TCGv::new_reg(get_rs1_addr!(inst.inst) as u64);
        let rd  = TCGv::new_reg(get_rd_addr!(inst.inst) as u64);
        let fcvt_helper_idx = TCGv::new_imm(CallFcvtIdx::W_S as u64);
        tcg_lists.push(TCGOp::new_helper_call_arg3(CALL_HELPER_IDX::CALL_FCVT_IDX as usize, fcvt_helper_idx, rd, rs1));
        tcg_lists 
    }

このヘルパー関数は、Rustの実装で処理される。具体的には以下のようになる。

    pub fn helper_func_fcvt(emu: &mut EmuEnv, call_idx: u64, rd: u64, rs1: u64, _: u64) -> usize {
        let mut flag = ExceptionFlags::default();
        flag.set();
        let helper_idx = CallFcvtIdx::from_u64(call_idx);
        match helper_idx {
            CallFcvtIdx::W_S  => { let to_data = F32::from_bits(emu.m_fregs[rs1 as usize] as u32).to_i32(RoundingMode::TowardZero, true); emu.m_iregs[rd as usize] = to_data        as u64; },
            CallFcvtIdx::WU_S => { let to_data = F32::from_bits(emu.m_fregs[rs1 as usize] as u32).to_u32(RoundingMode::TowardZero, true); emu.m_iregs[rd as usize] = to_data        as i32 as u64; },
            CallFcvtIdx::S_W  => { let to_data = F32::from_i32 (emu.m_iregs[rs1 as usize] as i32, RoundingMode::TowardZero)     ; emu.m_fregs[rd as usize] = to_data.bits() as u64; },
            CallFcvtIdx::S_WU => { let to_data = F32::from_u32 (emu.m_iregs[rs1 as usize] as u32, RoundingMode::TiesToEven)     ; emu.m_fregs[rd as usize] = to_data.bits() as u64; },
            CallFcvtIdx::S_D  => { let to_data = F64::from_bits(emu.m_fregs[rs1 as usize] as u64).to_f32(RoundingMode::TowardZero); emu.m_fregs[rd as usize] = to_data.bits() as u64; },
            CallFcvtIdx::D_S  => { let to_data = F32::from_bits(emu.m_fregs[rs1 as usize] as u32).to_f64(RoundingMode::TowardZero); emu.m_fregs[rd as usize] = to_data.bits() as u64; },
            CallFcvtIdx::W_D  => { let to_data = F64::from_bits(emu.m_fregs[rs1 as usize] as u64).to_i32(RoundingMode::TowardZero, true); emu.m_iregs[rd as usize] = to_data        as u64; },
            CallFcvtIdx::WU_D => { let to_data = F64::from_bits(emu.m_fregs[rs1 as usize] as u64).to_u32(RoundingMode::TowardZero, true); emu.m_iregs[rd as usize] = to_data        as i32 as u64; },
            CallFcvtIdx::D_W  => { let to_data = F64::from_i32 (emu.m_iregs[rs1 as usize] as i32, RoundingMode::TowardZero)     ; emu.m_fregs[rd as usize] = to_data.bits() as u64; },
            CallFcvtIdx::D_WU => { let to_data = F64::from_u32 (emu.m_iregs[rs1 as usize] as u32, RoundingMode::TowardZero)     ; emu.m_fregs[rd as usize] = to_data.bits() as u64; },
            CallFcvtIdx::L_S  => { let to_data = F32::from_bits(emu.m_fregs[rs1 as usize] as u32).to_i64(RoundingMode::TowardZero, true); emu.m_iregs[rd as usize] = to_data        as u64; },
            CallFcvtIdx::LU_S => { let to_data = F32::from_bits(emu.m_fregs[rs1 as usize] as u32).to_u64(RoundingMode::TowardZero, true); emu.m_iregs[rd as usize] = to_data        as u64; },
            CallFcvtIdx::S_L  => { let to_data = F32::from_i64 (emu.m_iregs[rs1 as usize] as i64, RoundingMode::TowardZero)     ; emu.m_fregs[rd as usize] = to_data.bits() as u64; },
            CallFcvtIdx::S_LU => { let to_data = F32::from_u64 (emu.m_iregs[rs1 as usize] as u64, RoundingMode::TiesToEven)     ; emu.m_fregs[rd as usize] = to_data.bits() as u64; },
            CallFcvtIdx::L_D  => { let to_data = F64::from_bits(emu.m_fregs[rs1 as usize] as u64).to_i64(RoundingMode::TowardZero, true); emu.m_iregs[rd as usize] = to_data        as u64; },
            CallFcvtIdx::LU_D => { let to_data = F64::from_bits(emu.m_fregs[rs1 as usize] as u64).to_u64(RoundingMode::TowardZero, true); emu.m_iregs[rd as usize] = to_data        as u64; },
            CallFcvtIdx::D_L  => { let to_data = F64::from_i64 (emu.m_iregs[rs1 as usize] as i64, RoundingMode::TowardZero)     ; emu.m_fregs[rd as usize] = to_data.bits() as u64; },
            CallFcvtIdx::D_LU => { let to_data = F64::from_u64 (emu.m_iregs[rs1 as usize] as u64, RoundingMode::TiesToEven)     ; emu.m_fregs[rd as usize] = to_data.bits() as u64; },
        };

        flag.get();
        let ret_flag = flag.bits();
        println!("ret_flags = {:x}", ret_flag);
        emu.m_csr.csrrw(CsrAddr::FFlags, ret_flag as i64);

        return 0;
    }

途中でSoftFloatのラッパーにオプションが必要だったので、PRを投げるとすぐに受け入れてくれた。ありがたい。

これだけの実装で、FCVT命令は動くようになった。テストパタンの動作も問題無いようだ。

$ cargo run -- --step --dump-gpr --dump-fpr --dump-guest --dump-host --elf-file rv64uf-p-fcvt
========= BLOCK START =========
85: Guest PC Address = 800001ac
 00000000800001ac:00000000800001ac Hostcode d0257053 : fcvt.s.l ft0, a0
00007FD7A9770000 48BF28B449F9FF7F0000 movabs    $0x7FFF_F949_B428,%rdi
00007FD7A977000A 48BE0C00000000000000 movabs    $0xC,%rsi
00007FD7A9770014 48BA0000000000000000 movabs    $0,%rdx
00007FD7A977001E 48B90A00000000000000 movabs    $0xA,%rcx
00007FD7A9770028 FF95E0040000         callq     *0x4E0(%rbp)
00007FD7A977002E E9DCFF4E00           jmp       0x0000_7FD7_A9C6_000F
ret_flags = 0
x00(zero ) = 0000000000000000  x01(ra   ) = 0000000000000000  x02(sp   ) = 0000000000000000  x03(gp   ) = 0000000000000006
x04(tp   ) = 0000000000000000  x05(t0   ) = 0000000080000108  x06(t1   ) = 0000000000000000  x07(t2   ) = 0000000000000000
x08(s0/fp) = 0000000000000000  x09(s1   ) = 0000000000000000  x10(a0   ) = 0000000000000002  x11(a1   ) = 0000000000000000
x12(a2   ) = 0000000000000000  x13(a3   ) = 0000000040000000  x14(a4   ) = 0000000000000000  x15(a5   ) = 0000000000000000
x16(a6   ) = 0000000000000000  x17(a7   ) = 0000000000000000  x18(s2   ) = 0000000000000000  x19(s3   ) = 0000000000000000
x20(s4   ) = 0000000000000000  x21(s5   ) = 0000000000000000  x22(s6   ) = 0000000000000000  x23(s7   ) = 0000000000000000
x24(s8   ) = 0000000000000000  x25(s9   ) = 0000000000000000  x26(s10  ) = 0000000000000000  x27(s11  ) = 0000000000000000
x28(t3   ) = 0000000000000000  x29(t4   ) = 0000000000000000  x30(t5   ) = 0000000000000000  x31(t6   ) = 0000000000000000

f00(ft0  ) = 0000000040000000  f01(ft1  ) = 0000000000000000  f02(ft2  ) = 0000000000000000  f03(ft3  ) = 0000000000000000
f04(ft4  ) = 0000000000000000  f05(ft5  ) = 0000000000000000  f06(ft6  ) = 0000000000000000  f07(ft7  ) = 0000000000000000
f08(fs0  ) = 0000000000000000  f09(fs1  ) = 0000000000000000  f10(fa0  ) = 0000000000000000  f11(fa1  ) = 0000000000000000
f12(fa2  ) = 0000000000000000  f13(fa3  ) = 0000000000000000  f14(fa4  ) = 0000000000000000  f15(fa5  ) = 0000000000000000
f16(fa6  ) = 0000000000000000  f17(fa7  ) = 0000000000000000  f18(fs2  ) = 0000000000000000  f19(fs3  ) = 0000000000000000
f20(fs4  ) = 0000000000000000  f21(fs5  ) = 0000000000000000  f22(fs6  ) = 0000000000000000  f23(fs7  ) = 0000000000000000
f24(fs8  ) = 0000000000000000  f25(fs9  ) = 0000000000000000  f26(fs10 ) = 0000000000000000  f27(fs11 ) = 0000000000000000
f28(ft8  ) = 0000000000000000  f29(ft9  ) = 0000000000000000  f30(ft10 ) = 0000000000000000  f31(ft11 ) = 0000000000000000
========= BLOCK START =========

変換されたx86命令としては少なめ。全部ヘルパー関数による処理になってしまっている。