FPGA開発日記

FPGAというより、コンピュータアーキテクチャかもね! カテゴリ別記事インデックス <a href="https://msyksphinz.github.io/github_pages/">

AWS EC2 F1インスタンスを使ったハードウェア開発の勉強 (9. 整数行列計算回路の実装)

AWS F1インスタンス HDK の勉強を続けている。 目標としては、以下の部分にAXIマスタを接続してDRAMにアクセスし、データをフェッチする。

  1. DMAでホストからデータをDDR4メモリに格納する。
  2. AXIマスタデータをフェッチする
  3. 演算し、結果を格納する。

として、例えば行列積のアクセラレータをF1インスタンス上で動作させてみたい。

f:id:msyksphinz:20180516232156p:plain

前回、データを自由にフェッチすることができるようになったので、次にフェッチしたデータを使って計算を行ってみたい。

f:id:msyksphinz:20180520003345p:plain

FIFOを挿入してAXIからのデータを受け取り、FIFOにデータが挿入されるたびに積和演算を実行する。

logic fifo_wr, fifo_empty, fifo_full;
assign fifo_wr = (rcv_state == rcv_state_col) & 
                 cl_axi_mstr_bus.rvalid & cl_axi_mstr_bus.rready;
logic [63: 0] fifo_rd_data;

assign cl_axi_mstr_bus.rready = !fifo_full;

fifo u_fifo
(
 .CLK   (clk),
 .nRST  (pipe_rst_n),
 .D     (cl_axi_mstr_bus.rdata[31:0]),
 .Q     (fifo_rd_data),
 .WR    (fifo_wr),
 .RD    (!fifo_empty),
 .FULL  (fifo_full),
 .EMPTY (fifo_empty)
 );

所定の計算結果である 0x00008a20 が出力されていることが確認できた。次は全要素分これを実行しなければ。

            34762000 : [axi_mstr_cfg_bus W] ADDR=00000500     
            34766000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=7 ADDR=0000000400000000
            34766000 : [axi_mstr_cfg_bus W] ADDR=00000500
            34782000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=0000000400010004
            34798000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=0000000400010044
            34814000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=0000000400010084
            34830000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=00000004000100c4
            34846000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=0000000400010104
            34862000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=0000000400010144
            34878000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=0000000400010184
            34894000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=00000004000101c4
            34910000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=0000000400010204
            34926000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=0000000400010244
            34942000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=0000000400010284
            34958000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=00000004000102c4
            34974000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=0000000400010304
            34986000 : [Waiting] rvalid = 1, rready = 1
            34986000 : [cl_axi_mstr_bus  R] DATA=000000100000000f0000000e0000000d0000000c0000000b0000000a000000090000000800000007000000060000000500000004000000030000000200000001
            34990000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=0000000400010344
            34994000 : [Waiting] rvalid = 1, rready = 1
            34994000 : [cl_axi_mstr_bus  R] DATA=000000730000007200000071000000700000006f0000006e0000006d0000006c0000006b0000006a000000690000006800000067000000660000006500000064
            35000000 : [matrix  0] mult = 00000001 x 00000064
            35006000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=0000000400010384
            35014000 : [Waiting] rvalid = 1, rready = 1
            35014000 : [cl_axi_mstr_bus  R] DATA=000000830000008200000081000000800000007f0000007e0000007d0000007c0000007b0000007a000000790000007800000077000000760000007500000074
            35020000 : [matrix  1] mult = 00000002 x 00000074
            35022000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=00000004000103c4
            35026000 : [Waiting] rvalid = 1, rready = 1
            35026000 : [cl_axi_mstr_bus  R] DATA=000000930000009200000091000000900000008f0000008e0000008d0000008c0000008b0000008a000000890000008800000087000000860000008500000084
            35032000 : [matrix  2] mult = 00000003 x 00000084
            35038000 : [cl_axi_mstr_bus AR] LEN=  0 SIZE=2 ADDR=0000000400010404
            35046000 : [Waiting] rvalid = 1, rready = 1
            35046000 : [cl_axi_mstr_bus  R] DATA=000000a3000000a2000000a1000000a00000009f0000009e0000009d0000009c0000009b0000009a000000990000009800000097000000960000009500000094
            35050000 : [Waiting] rvalid = 1, rready = 1
            35050000 : [cl_axi_mstr_bus  R] DATA=000000b3000000b2000000b1000000b0000000af000000ae000000ad000000ac000000ab000000aa000000a9000000a8000000a7000000a6000000a5000000a4
            35052000 : [matrix  3] mult = 00000004 x 00000094
            35056000 : [matrix  4] mult = 00000005 x 000000a4
            35058000 : [Waiting] rvalid = 1, rready = 1
            35058000 : [cl_axi_mstr_bus  R] DATA=000000c3000000c2000000c1000000c0000000bf000000be000000bd000000bc000000bb000000ba000000b9000000b8000000b7000000b6000000b5000000b4
            35064000 : [matrix  5] mult = 00000006 x 000000b4
            35074000 : [Waiting] rvalid = 1, rready = 1
            35074000 : [cl_axi_mstr_bus  R] DATA=000000d3000000d2000000d1000000d0000000cf000000ce000000cd000000cc000000cb000000ca000000c9000000c8000000c7000000c6000000c5000000c4
            35080000 : [matrix  6] mult = 00000007 x 000000c4
            35090000 : [Waiting] rvalid = 1, rready = 1
            35090000 : [cl_axi_mstr_bus  R] DATA=000000e3000000e2000000e1000000e0000000df000000de000000dd000000dc000000db000000da000000d9000000d8000000d7000000d6000000d5000000d4
            35096000 : [matrix  7] mult = 00000008 x 000000d4
            35106000 : [Waiting] rvalid = 1, rready = 1
            35106000 : [cl_axi_mstr_bus  R] DATA=000000f3000000f2000000f1000000f0000000ef000000ee000000ed000000ec000000eb000000ea000000e9000000e8000000e7000000e6000000e5000000e4
            35112000 : [matrix  8] mult = 00000009 x 000000e4
            35138000 : [Waiting] rvalid = 1, rready = 1
            35138000 : [cl_axi_mstr_bus  R] DATA=00000103000001020000010100000100000000ff000000fe000000fd000000fc000000fb000000fa000000f9000000f8000000f7000000f6000000f5000000f4
            35144000 : [matrix  9] mult = 0000000a x 000000f4
            35158000 : [Waiting] rvalid = 1, rready = 1
            35158000 : [cl_axi_mstr_bus  R] DATA=000001130000011200000111000001100000010f0000010e0000010d0000010c0000010b0000010a000001090000010800000107000001060000010500000104
            35164000 : [matrix 10] mult = 0000000b x 00000104
            35174000 : [Waiting] rvalid = 1, rready = 1
            35174000 : [cl_axi_mstr_bus  R] DATA=000001230000012200000121000001200000011f0000011e0000011d0000011c0000011b0000011a000001190000011800000117000001160000011500000114
            35178000 : [Waiting] rvalid = 1, rready = 1
            35178000 : [cl_axi_mstr_bus  R] DATA=000001330000013200000131000001300000012f0000012e0000012d0000012c0000012b0000012a000001290000012800000127000001260000012500000124
            35180000 : [matrix 11] mult = 0000000c x 00000114
            35184000 : [matrix 12] mult = 0000000d x 00000124
            35186000 : [Waiting] rvalid = 1, rready = 1
            35186000 : [Waiting] rvalid = 1, rready = 1
            35186000 : [cl_axi_mstr_bus  R] DATA=000001430000014200000141000001400000013f0000013e0000013d0000013c0000013b0000013a000001390000013800000137000001360000013500000134
            35192000 : [matrix 13] mult = 0000000e x 00000134
            35202000 : [Waiting] rvalid = 1, rready = 1
            35202000 : [cl_axi_mstr_bus  R] DATA=000001530000015200000151000001500000014f0000014e0000014d0000014c0000014b0000014a000001490000014800000147000001460000014500000144
            35208000 : [matrix 14] mult = 0000000f x 00000144
            35218000 : [Waiting] rvalid = 1, rready = 1
            35218000 : [cl_axi_mstr_bus  R] DATA=000001630000016200000161000001600000015f0000015e0000015d0000015c0000015b0000015a000001590000015800000157000001560000015500000154
            35224000 : [matrix 15] mult = 00000010 x 00000154
            35226000 : [matrix] result = 0000000000008a20
            35230000 : [matrix] result = 0000000000008a20
````