FPGA開発日記

FPGAというより、コンピュータアーキテクチャかもね! カテゴリ別記事インデックス https://msyksphinz.github.io/github_pages

LLVMのバックエンドを作るための第一歩 (21. 演算命令の追加)

f:id:msyksphinz:20190425001356p:plain

定数が生成できるようになったので、次は演算命令を追加する。 こちらも同様にMYRISCVXInstrInfo.tdに命令パタンを追加していく。

  • llvm-myriscvx/lib/Target/MYRISCVX/MYRISCVXInstrInfo.td
def ADDI : ArithLogicI<0b0010011, 0b000, "addi", add, simm12, immSExt12, GPR>;
def XORI : ArithLogicI<0b0010011, 0b100, "xori", xor, uimm12, immZExt12, GPR>;
def ORI  : ArithLogicI<0b0010011, 0b110, "ori",  or,  uimm12, immZExt12, GPR>;
def ANDI : ArithLogicI<0b0010011, 0b111, "andi", and, uimm12, immZExt12, GPR>;

def LUI  : ArithLogicU<0b0110111, "lui", GPR, simm20, immSExt20>;
def ADD  : ArithLogicR<0b0110011, 0b000, 0b0000000, "add", add, GPR>;
def SUB  : ArithLogicR<0b0110011, 0b000, 0b0100000, "sub", sub, GPR>;

def SLL  : shift_rotate_reg<0b0110011, 0b0000000, 0b001, 0x0, "sll", shl, GPR>;
def AND  : ArithLogicR<0b0110011, 0b111, 0b0000000, "and", and, GPR>;
def SRL  : shift_rotate_reg<0b0110011, 0b0000000, 0b101, 0x0, "srl", srl, GPR>;
def SRA  : shift_rotate_reg<0b0110011, 0b0100000, 0b101, 0x0, "sra", sra, GPR>;
def OR   : ArithLogicR<0b0110011, 0b110, 0b0000000, "or",  or,  GPR>;
def XOR  : ArithLogicR<0b0110011, 0b100, 0b0000000, "xor", xor, GPR>;

ArithLogicI, ArithLogicRの定義を見直してみる。

// Arithmetic and logical instructions with 2 register operands.
class ArithLogicR<bits<7> opcode, bits<3> funct3, bits<7>funct7,
                  string instr_asm, SDNode OpNode,
                  RegisterClass RC> :
  MYRISCVX_R<opcode, funct3, funct7, (outs RC:$rd), (ins RC:$rs1, RC:$rs2),
  !strconcat(instr_asm, "\t$rd, $rs1, $rs2"),
  [(set RC:$rd, (OpNode RC:$rs1, RC:$rs2))], IIAlu> {
    let isReMaterializable = 1;
}


// Arithmetic and logical instructions with 2 register operands.
class ArithLogicI<bits<7> opcode, bits<3> funct3,
                  string instr_asm, SDNode OpNode,
                  Operand Od, PatLeaf imm_type, RegisterClass RC> :
  MYRISCVX_I<opcode, funct3, (outs RC:$rd), (ins RC:$rs1, Od:$imm12),
  !strconcat(instr_asm, "\t$rd, $rs1, $imm12"),
  [(set RC:$rd, (OpNode RC:$rs1, imm_type:$imm12))], IIAlu> {
    let isReMaterializable = 1;
}

例えば、

def ADD  : ArithLogicR<0b0110011, 0b000, 0b0000000, "add", add, GPR>;

の場合、

  • MYRISCVX_Rクラスをベースに扱う。
  • 命令ニーモニック!strconcat(instr_asm, "\t$rd, $rs1, $rs2")add\t$rd, $rs1, $rs2となる。
  • 命令の動作パタンは、(set RC:$rd, (add RC:$rs1, RC:$rs2))
f:id:msyksphinz:20190608175058p:plain
ADDI命令の定義方法
f:id:msyksphinz:20190608175215p:plain
ADD命令の定義方法

となる。これを、算術演算、論理演算について定義して行く。

シフト命令は特殊なフォーマットを定義して、即値、レジスタ指定両方のフォーマットを決める。

  • llvm-myriscvx/lib/Target/MYRISCVX/MYRISCVXInstrInfo.td
class MYRISCVX_ISHIFT<bits<7> opcode, bits<3> funct3, bit arithshift,
                      dag outs, dag ins, string asmstr, list<dag> pattern,
                      InstrItinClass itin>:
  MYRISCVXInst<outs, ins, asmstr, pattern, itin, FrmI>
{
  bits<5> rs1;
  bits<5> rd;
  bits<6> shamt;

  let Inst{31} = 0;
  let Inst{30} = arithshift;
  let Inst{29-26} = 0;
  let Inst{25-20} = shamt;
  let Inst{19-15} = rs1;
  let Inst{14-12} = funct3;
  let Inst{11-7}  = rd;
  let Inst{6-0}   = opcode;
}
  • llvm-myriscvx/lib/Target/MYRISCVX/MYRISCVXInstrInfo.td
// Shifts
class ShiftImm<bits<7> opcode, bits<3> funct3, bit arithshift, string instr_asm,
               SDNode OpNode, PatFrag PF, Operand ImmOpnd,
               RegisterClass RC>:
  MYRISCVX_ISHIFT<opcode, funct3, arithshift, (outs RC:$rd), (ins RC:$rs1, ImmOpnd:$shamt),
                  !strconcat(instr_asm, "\t$rd, $rs1, $shamt"),
                  [(set GPR:$rd, (OpNode RC:$rs1, PF:$shamt))], IIAlu> {
}


// 32-bit shift instructions.
class ShiftImm32<bits<7> opcode, bits<3> funct3, bit arithshift, string instr_asm,
                         SDNode OpNode>:
  ShiftImm<opcode, funct3, arithshift, instr_asm, OpNode, immZExt5, shamt, GPR>;


class ShiftR<bits<7> opcode, bits<7> funct7, bits<3> funct3,
             bits<4> isRotate, string instr_asm,
             SDNode OpNode, RegisterClass RC>:
  MYRISCVX_R<opcode, funct3, funct7, (outs RC:$ra), (ins RC:$rb, RC:$rc),
          !strconcat(instr_asm, "\t$ra, $rb, $rc"),
          [(set GPR:$ra, (OpNode RC:$rb, RC:$rc))], IIAlu> {
}

ShiftRShiftImm32を使って、即値とレジスタ指定のシフト命令を定義する。

  • llvm-myriscvx/lib/Target/MYRISCVX/MYRISCVXInstrInfo.td
def SLL  : ShiftR<0b0110011, 0b0000000, 0b001, 0x0, "sll", shl, GPR>;
def SRL  : ShiftR<0b0110011, 0b0000000, 0b101, 0x0, "srl", srl, GPR>;
def SRA  : ShiftR<0b0110011, 0b0100000, 0b101, 0x0, "sra", sra, GPR>;

def SRLI : ShiftImm32<0b0010011, 0b101, 0, "srli", srl>;
def SLLI : ShiftImm32<0b0010011, 0b001, 0, "slli", shl>;
def SRAI : ShiftImm32<0b0010011, 0b101, 1, "srai", sra>;

まず、LLVM IRが正しく命令に変換されるかテストしてみる。以下のch4_math.llを使用する。

; Function Attrs: nounwind
define i32 @_Z9test_mathv() #0 {
  %a = alloca i32, align 4
  %b = alloca i32, align 4
  %1 = load i32, i32* %a, align 4
  %2 = load i32, i32* %b, align 4

  %3 = add nsw i32 %1, %2
  %4 = sub nsw i32 %1, %2
  %5 = mul nsw i32 %1, %2
  %6 = shl i32 %1, 2
  %7 = ashr i32 %1, 2
  %8 = lshr i32 %1, 30
  %9 = shl i32 1, %2
  %10 = ashr i32 128, %2
  %11 = ashr i32 %1, %2

  %12 = add nsw i32 %3, %4
  %13 = add nsw i32 %12, %5
  %14 = add nsw i32 %13, %6
  %15 = add nsw i32 %14, %7
  %16 = add nsw i32 %15, %8
  %17 = add nsw i32 %16, %9
  %18 = add nsw i32 %17, %10
  %19 = add nsw i32 %18, %11

  ret i32 %19
}
./bin/llc -debug -march=myriscvx32 -mcpu=simple32 -mattr=+64bit -relocation-model=pic -filetype=asm ../lbdex/input/ch4_math.ll -o -
        .globl  _Z9test_mathv           # -- Begin function _Z9test_mathv
        .type   _Z9test_mathv,@function
        .ent    _Z9test_mathv           # @_Z9test_mathv
_Z9test_mathv:
        .cfi_startproc
        .frame  $x8,8,$x1
        .mask   0x00000000,0
        .set    noreorder
        .set    nomacro
        discovered a new reachable node %bb.0
# %bb.0:
        addi    x2, x2, -8
        .cfi_def_cfa_offset 8
        lw      x10, 0(x2)
        lw      x11, 4(x2)
        sub     x12, x11, x10
        add     x13, x11, x10
        add     x12, x13, x12
        mul     x13, x11, x10
        add     x12, x12, x13
        slli    x13, x11, 2
        add     x12, x12, x13
        srai    x13, x11, 2
        add     x12, x12, x13
        srli    x13, x11, 30
        add     x12, x12, x13
        addi    x13, zero, 1
        sll     x13, x13, x10
        add     x12, x12, x13
        sra     x11, x11, x10
        addi    x13, zero, 128
        srl     x10, x13, x10
        add     x10, x12, x10
        add     x10, x10, x11
        addi    x2, x2, 8
        ret     x1
        .set    macro
        .set    reorder
        .end    _Z9test_mathv
$func_end0:
        .size   _Z9test_mathv, ($func_end0)-_Z9test_mathv
        .cfi_endproc
                                        # -- End function

        .section        ".note.GNU-stack","",@progbits

命令が生成されていることが確認できた。なかなか良さそうだ。

さらに、乗除算命令を追加する。

  • llvm-myriscvx/lib/Target/MYRISCVX/MYRISCVXInstrInfo.td
def MUL   : ArithLogicR<0b0110011, 0b000, 0b0000001, "mul",    mul,   GPR>;
def MULH  : ArithLogicR<0b0110011, 0b001, 0b0000001, "mulh",   mulhs, GPR>;
def MULHSU: ArithLogicR<0b0110011, 0b010, 0b0000001, "mulhsu", mulhs, GPR>;
def MULHU : ArithLogicR<0b0110011, 0b011, 0b0000001, "mulhu",  mulhu, GPR>;
def DIV   : ArithLogicR<0b0110011, 0b100, 0b0000001, "div",    sdiv,  GPR>;
def DIVU  : ArithLogicR<0b0110011, 0b101, 0b0000001, "divu",   udiv,  GPR>;
def REM   : ArithLogicR<0b0110011, 0b110, 0b0000001, "rem",    srem,  GPR>;
def REMU  : ArithLogicR<0b0110011, 0b111, 0b0000001, "remu",   urem,  GPR>;

これで乗除算命令が生成できるようになる。

int test_mult()
{
  int a = 12;
  int b = 11;

  b = a * b;

  return b;
}
./bin/clang -target mips-unknown-linux-gnu -c ../lbdex/input/ch4_1_multtest.cpp -emit-llvm
./bin/llc -debug -march=myriscvx32 -mcpu=simple32 -mattr=+64bit -relocation-model=pic -filetype=asm ch4_1_multtest.bc -o -
# %bb.0:                                # %entry
        addi    x2, x2, -8
        addi    x10, zero, 12
        sw      x10, 4(x2)
        addi    x10, zero, 11
        sw      x10, 0(x2)
        lw      x10, 4(x2)
        lw      x11, 0(x2)
        mul     x10, x10, x11
        sw      x10, 0(x2)
        lw      x10, 0(x2)
        addi    x2, x2, 8
        ret     x1
        .set    macro
        .set    reorder
        .end    _Z9test_multv
$func_end0: