AWS F1インスタンス HDK の勉強を続けている。 目標としては、以下の部分にAXIマスタを接続してDRAMにアクセスし、データをフェッチする。
- DMAでホストからデータをDDR4メモリに格納する。
- AXIマスタデータをフェッチする
- 演算し、結果を格納する。
として、例えば行列積のアクセラレータをF1インスタンス上で動作させてみたい。
前回、データを自由にフェッチすることができるようになったので、次にフェッチしたデータを使って計算を行ってみたい。
FIFOを挿入してAXIからのデータを受け取り、FIFOにデータが挿入されるたびに積和演算を実行する。
logic fifo_wr, fifo_empty, fifo_full; assign fifo_wr = (rcv_state == rcv_state_col) & cl_axi_mstr_bus.rvalid & cl_axi_mstr_bus.rready; logic [63: 0] fifo_rd_data; assign cl_axi_mstr_bus.rready = !fifo_full; fifo u_fifo ( .CLK (clk), .nRST (pipe_rst_n), .D (cl_axi_mstr_bus.rdata[31:0]), .Q (fifo_rd_data), .WR (fifo_wr), .RD (!fifo_empty), .FULL (fifo_full), .EMPTY (fifo_empty) );
所定の計算結果である 0x00008a20 が出力されていることが確認できた。次は全要素分これを実行しなければ。
34762000 : [axi_mstr_cfg_bus W] ADDR=00000500 34766000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=7 ADDR=0000000400000000 34766000 : [axi_mstr_cfg_bus W] ADDR=00000500 34782000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=0000000400010004 34798000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=0000000400010044 34814000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=0000000400010084 34830000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=00000004000100c4 34846000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=0000000400010104 34862000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=0000000400010144 34878000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=0000000400010184 34894000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=00000004000101c4 34910000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=0000000400010204 34926000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=0000000400010244 34942000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=0000000400010284 34958000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=00000004000102c4 34974000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=0000000400010304 34986000 : [Waiting] rvalid = 1, rready = 1 34986000 : [cl_axi_mstr_bus R] DATA=000000100000000f0000000e0000000d0000000c0000000b0000000a000000090000000800000007000000060000000500000004000000030000000200000001 34990000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=0000000400010344 34994000 : [Waiting] rvalid = 1, rready = 1 34994000 : [cl_axi_mstr_bus R] DATA=000000730000007200000071000000700000006f0000006e0000006d0000006c0000006b0000006a000000690000006800000067000000660000006500000064 35000000 : [matrix 0] mult = 00000001 x 00000064 35006000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=0000000400010384 35014000 : [Waiting] rvalid = 1, rready = 1 35014000 : [cl_axi_mstr_bus R] DATA=000000830000008200000081000000800000007f0000007e0000007d0000007c0000007b0000007a000000790000007800000077000000760000007500000074 35020000 : [matrix 1] mult = 00000002 x 00000074 35022000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=00000004000103c4 35026000 : [Waiting] rvalid = 1, rready = 1 35026000 : [cl_axi_mstr_bus R] DATA=000000930000009200000091000000900000008f0000008e0000008d0000008c0000008b0000008a000000890000008800000087000000860000008500000084 35032000 : [matrix 2] mult = 00000003 x 00000084 35038000 : [cl_axi_mstr_bus AR] LEN= 0 SIZE=2 ADDR=0000000400010404 35046000 : [Waiting] rvalid = 1, rready = 1 35046000 : [cl_axi_mstr_bus R] DATA=000000a3000000a2000000a1000000a00000009f0000009e0000009d0000009c0000009b0000009a000000990000009800000097000000960000009500000094 35050000 : [Waiting] rvalid = 1, rready = 1 35050000 : [cl_axi_mstr_bus R] DATA=000000b3000000b2000000b1000000b0000000af000000ae000000ad000000ac000000ab000000aa000000a9000000a8000000a7000000a6000000a5000000a4 35052000 : [matrix 3] mult = 00000004 x 00000094 35056000 : [matrix 4] mult = 00000005 x 000000a4 35058000 : [Waiting] rvalid = 1, rready = 1 35058000 : [cl_axi_mstr_bus R] DATA=000000c3000000c2000000c1000000c0000000bf000000be000000bd000000bc000000bb000000ba000000b9000000b8000000b7000000b6000000b5000000b4 35064000 : [matrix 5] mult = 00000006 x 000000b4 35074000 : [Waiting] rvalid = 1, rready = 1 35074000 : [cl_axi_mstr_bus R] DATA=000000d3000000d2000000d1000000d0000000cf000000ce000000cd000000cc000000cb000000ca000000c9000000c8000000c7000000c6000000c5000000c4 35080000 : [matrix 6] mult = 00000007 x 000000c4 35090000 : [Waiting] rvalid = 1, rready = 1 35090000 : [cl_axi_mstr_bus R] DATA=000000e3000000e2000000e1000000e0000000df000000de000000dd000000dc000000db000000da000000d9000000d8000000d7000000d6000000d5000000d4 35096000 : [matrix 7] mult = 00000008 x 000000d4 35106000 : [Waiting] rvalid = 1, rready = 1 35106000 : [cl_axi_mstr_bus R] DATA=000000f3000000f2000000f1000000f0000000ef000000ee000000ed000000ec000000eb000000ea000000e9000000e8000000e7000000e6000000e5000000e4 35112000 : [matrix 8] mult = 00000009 x 000000e4 35138000 : [Waiting] rvalid = 1, rready = 1 35138000 : [cl_axi_mstr_bus R] DATA=00000103000001020000010100000100000000ff000000fe000000fd000000fc000000fb000000fa000000f9000000f8000000f7000000f6000000f5000000f4 35144000 : [matrix 9] mult = 0000000a x 000000f4 35158000 : [Waiting] rvalid = 1, rready = 1 35158000 : [cl_axi_mstr_bus R] DATA=000001130000011200000111000001100000010f0000010e0000010d0000010c0000010b0000010a000001090000010800000107000001060000010500000104 35164000 : [matrix 10] mult = 0000000b x 00000104 35174000 : [Waiting] rvalid = 1, rready = 1 35174000 : [cl_axi_mstr_bus R] DATA=000001230000012200000121000001200000011f0000011e0000011d0000011c0000011b0000011a000001190000011800000117000001160000011500000114 35178000 : [Waiting] rvalid = 1, rready = 1 35178000 : [cl_axi_mstr_bus R] DATA=000001330000013200000131000001300000012f0000012e0000012d0000012c0000012b0000012a000001290000012800000127000001260000012500000124 35180000 : [matrix 11] mult = 0000000c x 00000114 35184000 : [matrix 12] mult = 0000000d x 00000124 35186000 : [Waiting] rvalid = 1, rready = 1 35186000 : [Waiting] rvalid = 1, rready = 1 35186000 : [cl_axi_mstr_bus R] DATA=000001430000014200000141000001400000013f0000013e0000013d0000013c0000013b0000013a000001390000013800000137000001360000013500000134 35192000 : [matrix 13] mult = 0000000e x 00000134 35202000 : [Waiting] rvalid = 1, rready = 1 35202000 : [cl_axi_mstr_bus R] DATA=000001530000015200000151000001500000014f0000014e0000014d0000014c0000014b0000014a000001490000014800000147000001460000014500000144 35208000 : [matrix 14] mult = 0000000f x 00000144 35218000 : [Waiting] rvalid = 1, rready = 1 35218000 : [cl_axi_mstr_bus R] DATA=000001630000016200000161000001600000015f0000015e0000015d0000015c0000015b0000015a000001590000015800000157000001560000015500000154 35224000 : [matrix 15] mult = 00000010 x 00000154 35226000 : [matrix] result = 0000000000008a20 35230000 : [matrix] result = 0000000000008a20 ````