1 .macro vmul_iter reg0 reg1 reg2 idx 2 .irp op, vmul.i16 vmul.f16 vmul.i32 vmul.f32 3 \op d\reg0, d\reg1, d\reg2[\idx] 4 \op q\reg0, q\reg1, d\reg2[\idx] 5 .endr 6 .endm 7 8 .macro vmul_acc_iter reg0 reg1 reg2 idx 9 .irp op, vmla.i16 vmla.i32 vmla.f16 vmla.f32 vmls.i16 vmls.i32 vmls.f16 vmls.f32 10 \op d\reg0, d\reg1, d\reg2[\idx] 11 \op q\reg0, q\reg1, d\reg2[\idx] 12 .endr 13 .endm 14 15 # There are two restriction on the scalar operand: 16 # * The scalar operand is restricted to D0-D7 if size is 16bit wide, 17 # or D0 - D15 otherwise. 18 # * The scalar index should within range, 0-3 if size is 16bit wide, 19 # 0-1 if size is 32bit wide. 20 vmul_iter 0 1 3 0 21 vmul_iter 3 12 7 2 22 vmul_iter 4 9 8 1 23 vmul_iter 13 6 15 3 24 vmul_acc_iter 2 7 1 0 25 vmul_acc_iter 5 4 6 2 26 vmul_acc_iter 4 13 10 1 27 vmul_acc_iter 12 6 13 3 28