LLVMのバックエンドを作るための第一歩 (52. 浮動小数点のその他の命令)

比較命令以外にも、浮動小数点の様々な演算をサポートする。例えば、以下のようなものが挙げられる。

SQRT
浮動小数点符号反転
浮動小数点絶対値
符号付整数から浮動小数点への変換、符号なし整数から浮動小数点への変換
浮動小数点から符号付整数への変換、浮動小数点から符号なし整数への変換

これらの操作について、RISC-Vの命令を定義してサポートを行う。RISC-Vには、浮動小数点のこれらのサポート命令として以下が定義されている。

	funct7	rs2	rs1	funct3	rd	opcode
FSQRT.S	0101100	00000	rs1	rm	rd	1010011
FSGNJ.S	0010000	rs2	rs1	000	rd	1010011
FSIGNJN.S	0010000	rs2	rs1	001	rd	1010011
FSGNJX.S	0010000	rs2	rs1	010	rd	1010011
FCVT.W.S	1100000	00000	rs1	000	rd	1010011
FCVT.WU.S	1100000	00001	rs1	001	rd	1010011
FMV.X.W	1110000	00000	rs1	000	rd	1010011
FCLASS.S	1110000	00000	rs1	001	rd	1010011
FCVT.S.W	1101000	00000	rs1	rm	rd	1010011
FCVT.S.WU	1101000	00001	rs1	rm	rd	1010011
FMV.W.X	1111000	00000	rs1	000	rd	1010011

	funct7	rs2	rs1	funct3	rd	opcode
FSQRT.D	0101101	00000	rs1	rm	rd	1010011
FSGNJ.D	0010001	rs2	rs1	000	rd	1010011
FSIGNJN.D	0010001	rs2	rs1	001	rd	1010011
FSGNJX.D	0010001	rs2	rs1	010	rd	1010011
FCVT.W.D	1100001	00000	rs1	000	rd	1010011
FCVT.WU.D	1100001	00001	rs1	001	rd	1010011
FCLASS.D	1110001	00000	rs1	001	rd	1010011
FCVT.D.W	1101001	00000	rs1	rm	rd	1010011
FCVT.D.WU	1101001	00001	rs1	rm	rd	1010011

これらの命令を実装していくことにする。MYRISCVXInstrInfoFD.tdに以下を追加する。

llvm-myriscvx/lib/Target/MYRISCVX/MYRISCVXInstrInfoFD.td

// Floating-Point instructions with 1 register operands.
class FPTwoOp<bits<7> opcode, bits<3> rm, bits<7> funct7,
                string instr_asm,
                RegisterClass RC> :
  MYRISCVX_R<opcode, rm, funct7, (outs RC:$rd), (ins RC:$rs1, RC:$rs2),
             !strconcat(instr_asm, "\t$rd, $rs1, $rs2"),
             [], IIAlu> {
    let isReMaterializable = 1;
  }


// Floating-Point instructions with 1 register operands.
class FPSingleOp<bits<7> opcode, bits<3> rm, bits<7> funct7, bits<5> rs2_op,
                string instr_asm,
                RegisterClass DstRC, RegisterClass SrcRC> :
  MYRISCVX_R<opcode, rm, funct7, (outs DstRC:$rd), (ins SrcRC:$rs1),
             !strconcat(instr_asm, "\t$rd, $rs1"),
             [], IIAlu> {
    let isReMaterializable = 1;
    let rs2 = rs2_op;
  }

まずは、1オペランド用の命令と2オペランド用の命令テンプレートを用意した。それぞれFPSingleOp, FPTwoOpというテンプレートクラスを作成する。少し注意だが、FPSingleOpテンプレートでは、型の返還命令が入るため入力オペランドのレジスタタイプ(RegisterClass SrcRC)と出力オペランドのレジスタタイプ(RegisterClass DstRC)が個別に指定できる。これに基づいて、まずは単精度浮動小数点命令の定義を行う。

def FSQRT_S   : FPSingleOp<0b1010011, 0b000, 0b0101100, 0b00000, "fsqrt.s",  FPR_S, FPR_S>;
def FSIGNJ_S  : FPTwoOp<0b1010011, 0b000, 0b0010000, "fsgnj.s" , FPR_S>;
def FSIGNJN_S : FPTwoOp<0b1010011, 0b001, 0b0010000, "fsgnjn.s", FPR_S>;
def FSIGNJX_S : FPTwoOp<0b1010011, 0b010, 0b0010000, "fsgnjx.s", FPR_S>;
def FCVT_W_S  : FPSingleOp<0b1010011, 0b000, 0b1100000, 0b00000, "fcvt.w.s",  GPR, FPR_S>;
def FCVT_WU_S : FPSingleOp<0b1010011, 0b000, 0b1100000, 0b00001, "fcvt.wu.s", GPR, FPR_S>;
def FMV_X_W   : FPSingleOp<0b1010011, 0b000, 0b1110000, 0b00000, "fmv.x.w",   GPR, FPR_S>;
def FCLASS_S  : FPSingleOp<0b1010011, 0b001, 0b1110000, 0b00000, "fclass.s",  FPR_S, FPR_S>;
def FCVT_S_W  : FPSingleOp<0b1010011, 0b000, 0b1101000, 0b00000, "fcvt.s.w",  FPR_S, GPR>;
def FCVT_S_WU : FPSingleOp<0b1010011, 0b000, 0b1101000, 0b00001, "fcvt.s.wu", FPR_S, GPR>;
def FMV_W_X   : FPSingleOp<0b1010011, 0b000, 0b1111000, 0b00000, "fmv.w.x",   FPR_S, GPR>;

次に、これらの命令を生成するためのSelectionDAGのパタンを追加する。

llvm-myriscvx/lib/Target/MYRISCVX/MYRISCVXInstrInfoFD.td

def : Pat<(fsqrt FPR_S:$rs1), (FSQRT_S   $rs1)>;
def : Pat<(fneg  FPR_S:$rs1), (FSIGNJN_S $rs1, $rs1)>;
def : Pat<(fabs  FPR_S:$rs1), (FSIGNJX_S $rs1, $rs1)>;
def : InstAlias<"fmv.s  $rd, $rs1", (FSIGNJ_S  FPR_S:$rd, FPR_S:$rs1, FPR_S:$rs1), 0>;
def : InstAlias<"fneg.s $rd, $rs1", (FSIGNJN_S FPR_S:$rd, FPR_S:$rs1, FPR_S:$rs1), 0>;
def : InstAlias<"fabs.s $rd, $rs1", (FSIGNJX_S FPR_S:$rd, FPR_S:$rs1, FPR_S:$rs1), 0>;
def : Pat<(fp_to_sint FPR_S:$rs1), (FCVT_W_S  $rs1)>;
def : Pat<(fp_to_uint FPR_S:$rs1), (FCVT_WU_S $rs1)>;
def : Pat<(sint_to_fp GPR:$rs1), (FCVT_S_W  $rs1)>;
def : Pat<(uint_to_fp GPR:$rs1), (FCVT_S_WU $rs1)>;

エイリアスとしてfmv.s, fneg.s, fabs.sを追加した。アセンブリでは、これらの記法も有効になる。

同様に、倍精度浮動小数点についても命令の定義とパタンを追加する。

llvm-myriscvx/lib/Target/MYRISCVX/MYRISCVXInstrInfoFD.td

def FSQRT_D   : FPSingleOp<0b1010011, 0b000, 0b0101101, 0b00000, "fsqrt.d",  FPR_D, FPR_D>;
def FSIGNJ_D  : FPTwoOp<0b1010011, 0b000, 0b0010001, "fsgnj.d" , FPR_D>;
def FSIGNJN_D : FPTwoOp<0b1010011, 0b001, 0b0010001, "fsgnjn.d", FPR_D>;
def FSIGNJX_D : FPTwoOp<0b1010011, 0b010, 0b0010001, "fsgnjx.d", FPR_D>;
def FCVT_W_D  : FPSingleOp<0b1010011, 0b000, 0b1100001, 0b00000, "fcvt.w.d",  GPR, FPR_D>;
def FCVT_WU_D : FPSingleOp<0b1010011, 0b000, 0b1100001, 0b00001, "fcvt.wu.d", GPR, FPR_D>;
// def FMV_X_W   : FPSingleOp<0b1010011, 0b000, 0b1110001, 0b00000, "fmv.x.w",   GPR, FPR_D>;
def FCLASS_D  : FPSingleOp<0b1010011, 0b001, 0b1110001, 0b00000, "fclass.d",  FPR_D, FPR_D>;
def FCVT_D_W  : FPSingleOp<0b1010011, 0b000, 0b1101001, 0b00000, "fcvt.d.w",  FPR_D, GPR>;
def FCVT_D_WU : FPSingleOp<0b1010011, 0b000, 0b1101001, 0b00001, "fcvt.d.wu", FPR_D, GPR>;
// def FMV_W_X   : FPSingleOp<0b1010011, 0b000, 0b1111001, 0b00000, "fmv.w.x",   FPR_D, GPR>;

def : Pat<(fsqrt FPR_D:$rs1), (FSQRT_D $rs1)>;
def : Pat<(fneg  FPR_D:$rs1), (FSIGNJN_D $rs1, $rs1)>;
def : Pat<(fabs  FPR_D:$rs1), (FSIGNJX_D $rs1, $rs1)>;
def : InstAlias<"fmv.d  $rd, $rs1", (FSIGNJ_D  FPR_D:$rd, FPR_D:$rs1, FPR_D:$rs1), 0>;
def : InstAlias<"fneg.d $rd, $rs1", (FSIGNJN_D FPR_D:$rd, FPR_D:$rs1, FPR_D:$rs1), 0>;
def : InstAlias<"fabs.d $rd, $rs1", (FSIGNJX_D FPR_D:$rd, FPR_D:$rs1, FPR_D:$rs1), 0>;
def : Pat<(fp_to_sint FPR_D:$rs1), (FCVT_W_D  $rs1)>;
def : Pat<(fp_to_uint FPR_D:$rs1), (FCVT_WU_D $rs1)>;
def : Pat<(sint_to_fp GPR:$rs1), (FCVT_D_W  $rs1)>;
def : Pat<(uint_to_fp GPR:$rs1), (FCVT_D_WU $rs1)>;

それでは、テストパタンを作成してコンパイルしてみる。

fp_others.cpp

#include <math.h>
#include <stdint.h>

// float f_sqrt (float in) { return sqrtf(in); }
float f_abs  (float in) { return fabsf(in); }
float f_neg  (float in) { return -in; }

// double d_sqrt (double in) { return sqrt(in); }
double d_abs  (double in) { return fabs(in); }
double d_neg  (double in) { return -in; }

int32_t  cvt_fp_to_sint (float  in) { return static_cast< int32_t>(in); }
uint32_t cvt_fp_to_uint (float  in) { return static_cast<uint32_t>(in); }
int32_t  cvt_dp_to_sint (double in) { return static_cast< int32_t>(in); }
uint32_t cvt_dp_to_uint (double in) { return static_cast<uint32_t>(in); }

float  cvt_sint_to_fp (int32_t  in) { return static_cast<float >(in); }
float  cvt_uint_to_fp (uint32_t in) { return static_cast<float >(in); }
double cvt_sint_to_dp (int32_t  in) { return static_cast<double>(in); }
double cvt_uint_to_dp (uint32_t in) { return static_cast<double>(in); }

単純な関数群を並べた。型の変換や、1オペランドの演算を並べる。これらがどのようにコンパイルされるのかをテストする。

./bin/clang -O3 fp_others.cpp -emit-llvm
./bin/llc -filetype=asm fp_others.bc -mcpu=simple32 -march=myriscvx32 -target-abi=lp64 -o -

_Z5f_absf:
        fsgnjx.s        f10, f10, f10
        ret
_Z5f_negf:
        fsgnjn.s        f10, f10, f10
        ret
_Z5d_absd:
        fsgnjx.d        f10, f10, f10
        ret
_Z5d_negd:
        fsgnjn.d        f10, f10, f10
        ret
_Z14cvt_fp_to_sintf:
        fcvt.w.s        x10, f10
        ret
_Z14cvt_fp_to_uintf:
        fcvt.wu.s       x10, f10
        ret
_Z14cvt_dp_to_sintd:
        fcvt.w.d        x10, f10
        ret
_Z14cvt_dp_to_uintd:
        fcvt.wu.d       x10, f10
        ret
_Z14cvt_sint_to_fpi:
        fcvt.s.w        f10, x10
        ret
_Z14cvt_uint_to_fpj:
        fcvt.s.wu       f10, x10
        ret
_Z14cvt_sint_to_dpi:
        fcvt.d.w        f10, x10
        ret
_Z14cvt_uint_to_dpj:
        fcvt.d.wu       f10, x10
        ret

正しく命令が生成できていそうだ。

FPGA開発日記

カテゴリ別記事インデックス https://msyksphinz.github.io/github_pages , English Version https://fpgadevdiary.hatenadiary.com/

LLVMのバックエンドを作るための第一歩 (52. 浮動小数点のその他の命令)