Home | History | Annotate | Download | only in Hexagon
      1 ; RUN: llc -march=hexagon < %s | FileCheck %s
      2 
      3 @c = external global <64 x i32>
      4 @d = external global <32 x i32>
      5 
      6 ; CHECK-LABEL: test1:
      7 ; CHECK: v{{[0-9]+}}:{{[0-9]+}}.h += vtmpy(v{{[0-9]+}}:{{[0-9]+}}.b,r{{[0-9]+}}.b)
      8 define void @test1(<64 x i32> %a, i32 %b) #0 {
      9 entry:
     10   %a.addr = alloca <64 x i32>, align 256
     11   %b.addr = alloca i32, align 4
     12   store <64 x i32> %a, <64 x i32>* %a.addr, align 256
     13   store i32 %b, i32* %b.addr, align 4
     14   %0 = load <64 x i32>, <64 x i32>* @c, align 256
     15   %1 = load <64 x i32>, <64 x i32>* %a.addr, align 256
     16   %2 = load i32, i32* %b.addr, align 4
     17   %3 = call <64 x i32> @llvm.hexagon.V6.vtmpyb.acc.128B(<64 x i32> %0, <64 x i32> %1, i32 %2)
     18   store <64 x i32> %3, <64 x i32>* @c, align 256
     19   ret void
     20 }
     21 
     22 ; CHECK-LABEL: test2:
     23 ; CHECK: v{{[0-9]+}}:{{[0-9]+}}.h += vtmpy(v{{[0-9]+}}:{{[0-9]+}}.ub,r{{[0-9]+}}.b)
     24 define void @test2(<64 x i32> %a, i32 %b) #0 {
     25 entry:
     26   %a.addr = alloca <64 x i32>, align 256
     27   %b.addr = alloca i32, align 4
     28   store <64 x i32> %a, <64 x i32>* %a.addr, align 256
     29   store i32 %b, i32* %b.addr, align 4
     30   %0 = load <64 x i32>, <64 x i32>* @c, align 256
     31   %1 = load <64 x i32>, <64 x i32>* %a.addr, align 256
     32   %2 = load i32, i32* %b.addr, align 4
     33   %3 = call <64 x i32> @llvm.hexagon.V6.vtmpybus.acc.128B(<64 x i32> %0, <64 x i32> %1, i32 %2)
     34   store <64 x i32> %3, <64 x i32>* @c, align 256
     35   ret void
     36 }
     37 
     38 ; CHECK-LABEL: test3:
     39 ; CHECK: v{{[0-9]+}}:{{[0-9]+}}.w += vtmpy(v{{[0-9]+}}:{{[0-9]+}}.h,r{{[0-9]+}}.b)
     40 define void @test3(<64 x i32> %a, i32 %b) #0 {
     41 entry:
     42   %a.addr = alloca <64 x i32>, align 256
     43   %b.addr = alloca i32, align 4
     44   store <64 x i32> %a, <64 x i32>* %a.addr, align 256
     45   store i32 %b, i32* %b.addr, align 4
     46   %0 = load <64 x i32>, <64 x i32>* @c, align 256
     47   %1 = load <64 x i32>, <64 x i32>* %a.addr, align 256
     48   %2 = load i32, i32* %b.addr, align 4
     49   %3 = call <64 x i32> @llvm.hexagon.V6.vtmpyhb.acc.128B(<64 x i32> %0, <64 x i32> %1, i32 %2)
     50   store <64 x i32> %3, <64 x i32>* @c, align 256
     51   ret void
     52 }
     53 
     54 ; CHECK-LABEL: test4:
     55 ; CHECK: v{{[0-9]+}}.w += vdmpy(v{{[0-9]+}}.h,r{{[0-9]+}}.b)
     56 define void @test4(<32 x i32> %a, i32 %b) #0 {
     57 entry:
     58   %a.addr = alloca <32 x i32>, align 128
     59   %b.addr = alloca i32, align 4
     60   store <32 x i32> %a, <32 x i32>* %a.addr, align 128
     61   store i32 %b, i32* %b.addr, align 4
     62   %0 = load <32 x i32>, <32 x i32>* @d, align 128
     63   %1 = load <32 x i32>, <32 x i32>* %a.addr, align 128
     64   %2 = load i32, i32* %b.addr, align 4
     65   %3 = call <32 x i32> @llvm.hexagon.V6.vdmpyhb.acc.128B(<32 x i32> %0, <32 x i32> %1, i32 %2)
     66   store <32 x i32> %3, <32 x i32>* @d, align 128
     67   ret void
     68 }
     69 
     70 ; CHECK-LABEL: test5:
     71 ; CHECK: v{{[0-9]+}}.uw += vrmpy(v{{[0-9]+}}.ub,r{{[0-9]+}}.ub)
     72 define void @test5(<32 x i32> %a, i32 %b) #0 {
     73 entry:
     74   %a.addr = alloca <32 x i32>, align 128
     75   %b.addr = alloca i32, align 4
     76   store <32 x i32> %a, <32 x i32>* %a.addr, align 128
     77   store i32 %b, i32* %b.addr, align 4
     78   %0 = load <32 x i32>, <32 x i32>* @d, align 128
     79   %1 = load <32 x i32>, <32 x i32>* %a.addr, align 128
     80   %2 = load i32, i32* %b.addr, align 4
     81   %3 = call <32 x i32> @llvm.hexagon.V6.vrmpyub.acc.128B(<32 x i32> %0, <32 x i32> %1, i32 %2)
     82   store <32 x i32> %3, <32 x i32>* @d, align 128
     83   ret void
     84 }
     85 
     86 ; CHECK-LABEL: test6:
     87 ; CHECK: v{{[0-9]+}}.w += vrmpy(v{{[0-9]+}}.ub,r{{[0-9]+}}.b)
     88 define void @test6(<32 x i32> %a, i32 %b) #0 {
     89 entry:
     90   %a.addr = alloca <32 x i32>, align 128
     91   %b.addr = alloca i32, align 4
     92   store <32 x i32> %a, <32 x i32>* %a.addr, align 128
     93   store i32 %b, i32* %b.addr, align 4
     94   %0 = load <32 x i32>, <32 x i32>* @d, align 128
     95   %1 = load <32 x i32>, <32 x i32>* %a.addr, align 128
     96   %2 = load i32, i32* %b.addr, align 4
     97   %3 = call <32 x i32> @llvm.hexagon.V6.vrmpybus.acc.128B(<32 x i32> %0, <32 x i32> %1, i32 %2)
     98   store <32 x i32> %3, <32 x i32>* @d, align 128
     99   ret void
    100 }
    101 
    102 ; CHECK-LABEL: test7:
    103 ; CHECK: v{{[0-9]+}}.h += vdmpy(v{{[0-9]+}}.ub,r{{[0-9]+}}.b)
    104 define void @test7(<32 x i32> %a, i32 %b) #0 {
    105 entry:
    106   %a.addr = alloca <32 x i32>, align 128
    107   %b.addr = alloca i32, align 4
    108   store <32 x i32> %a, <32 x i32>* %a.addr, align 128
    109   store i32 %b, i32* %b.addr, align 4
    110   %0 = load <32 x i32>, <32 x i32>* @d, align 128
    111   %1 = load <32 x i32>, <32 x i32>* %a.addr, align 128
    112   %2 = load i32, i32* %b.addr, align 4
    113   %3 = call <32 x i32> @llvm.hexagon.V6.vdmpybus.acc.128B(<32 x i32> %0, <32 x i32> %1, i32 %2)
    114   store <32 x i32> %3, <32 x i32>* @d, align 128
    115   ret void
    116 }
    117 
    118 ; CHECK-LABEL: test8:
    119 ; CHECK: v{{[0-9]+}}:{{[0-9]+}}.h += vdmpy(v{{[0-9]+}}:{{[0-9]+}}.ub,r{{[0-9]+}}.b)
    120 define void @test8(<64 x i32> %a, i32 %b) #0 {
    121 entry:
    122   %a.addr = alloca <64 x i32>, align 256
    123   %b.addr = alloca i32, align 4
    124   store <64 x i32> %a, <64 x i32>* %a.addr, align 256
    125   store i32 %b, i32* %b.addr, align 4
    126   %0 = load <64 x i32>, <64 x i32>* @c, align 256
    127   %1 = load <64 x i32>, <64 x i32>* %a.addr, align 256
    128   %2 = load i32, i32* %b.addr, align 4
    129   %3 = call <64 x i32> @llvm.hexagon.V6.vdmpybus.dv.acc.128B(<64 x i32> %0, <64 x i32> %1, i32 %2)
    130   store <64 x i32> %3, <64 x i32>* @c, align 256
    131   ret void
    132 }
    133 
    134 ; CHECK-LABEL: test9:
    135 ; CHECK: v{{[0-9]+}}.w += vdmpy(v{{[0-9]+}}.h,r{{[0-9]+}}.uh):sat
    136 define void @test9(<32 x i32> %a, i32 %b) #0 {
    137 entry:
    138   %a.addr = alloca <32 x i32>, align 128
    139   %b.addr = alloca i32, align 4
    140   store <32 x i32> %a, <32 x i32>* %a.addr, align 128
    141   store i32 %b, i32* %b.addr, align 4
    142   %0 = load <32 x i32>, <32 x i32>* @d, align 128
    143   %1 = load <32 x i32>, <32 x i32>* %a.addr, align 128
    144   %2 = load i32, i32* %b.addr, align 4
    145   %3 = call <32 x i32> @llvm.hexagon.V6.vdmpyhsusat.acc.128B(<32 x i32> %0, <32 x i32> %1, i32 %2)
    146   store <32 x i32> %3, <32 x i32>* @d, align 128
    147   ret void
    148 }
    149 
    150 ; CHECK-LABEL: test10:
    151 ; CHECK: v{{[0-9]+}}.w += vdmpy(v{{[0-9]+}}:{{[0-9]+}}.h,r{{[0-9]+}}.uh,#1):sat
    152 define void @test10(<64 x i32> %a, i32 %b) #0 {
    153 entry:
    154   %a.addr = alloca <64 x i32>, align 256
    155   %b.addr = alloca i32, align 4
    156   store <64 x i32> %a, <64 x i32>* %a.addr, align 256
    157   store i32 %b, i32* %b.addr, align 4
    158   %0 = load <32 x i32>, <32 x i32>* @d, align 128
    159   %1 = load <64 x i32>, <64 x i32>* %a.addr, align 256
    160   %2 = load i32, i32* %b.addr, align 4
    161   %3 = call <32 x i32> @llvm.hexagon.V6.vdmpyhsuisat.acc.128B(<32 x i32> %0, <64 x i32> %1, i32 %2)
    162   store <32 x i32> %3, <32 x i32>* @d, align 128
    163   ret void
    164 }
    165 
    166 ; CHECK-LABEL: test11:
    167 ; CHECK: v{{[0-9]+}}.w += vdmpy(v{{[0-9]+}}:{{[0-9]+}}.h,r{{[0-9]+}}.h):sat
    168 define void @test11(<64 x i32> %a, i32 %b) #0 {
    169 entry:
    170   %a.addr = alloca <64 x i32>, align 256
    171   %b.addr = alloca i32, align 4
    172   store <64 x i32> %a, <64 x i32>* %a.addr, align 256
    173   store i32 %b, i32* %b.addr, align 4
    174   %0 = load <32 x i32>, <32 x i32>* @d, align 128
    175   %1 = load <64 x i32>, <64 x i32>* %a.addr, align 256
    176   %2 = load i32, i32* %b.addr, align 4
    177   %3 = call <32 x i32> @llvm.hexagon.V6.vdmpyhisat.acc.128B(<32 x i32> %0, <64 x i32> %1, i32 %2)
    178   store <32 x i32> %3, <32 x i32>* @d, align 128
    179   ret void
    180 }
    181 
    182 ; CHECK-LABEL: test12:
    183 ; CHECK: v{{[0-9]+}}.w += vdmpy(v{{[0-9]+}}.h,r{{[0-9]+}}.h):sat
    184 define void @test12(<32 x i32> %a, i32 %b) #0 {
    185 entry:
    186   %a.addr = alloca <32 x i32>, align 128
    187   %b.addr = alloca i32, align 4
    188   store <32 x i32> %a, <32 x i32>* %a.addr, align 128
    189   store i32 %b, i32* %b.addr, align 4
    190   %0 = load <32 x i32>, <32 x i32>* @d, align 128
    191   %1 = load <32 x i32>, <32 x i32>* %a.addr, align 128
    192   %2 = load i32, i32* %b.addr, align 4
    193   %3 = call <32 x i32> @llvm.hexagon.V6.vdmpyhsat.acc.128B(<32 x i32> %0, <32 x i32> %1, i32 %2)
    194   store <32 x i32> %3, <32 x i32>* @d, align 128
    195   ret void
    196 }
    197 
    198 ; CHECK-LABEL: test13:
    199 ; CHECK: v{{[0-9]+}}:{{[0-9]+}}.w += vdmpy(v{{[0-9]+}}:{{[0-9]+}}.h,r{{[0-9]+}}.b)
    200 define void @test13(<64 x i32> %a, i32 %b) #0 {
    201 entry:
    202   %a.addr = alloca <64 x i32>, align 256
    203   %b.addr = alloca i32, align 4
    204   store <64 x i32> %a, <64 x i32>* %a.addr, align 256
    205   store i32 %b, i32* %b.addr, align 4
    206   %0 = load <64 x i32>, <64 x i32>* @c, align 256
    207   %1 = load <64 x i32>, <64 x i32>* %a.addr, align 256
    208   %2 = load i32, i32* %b.addr, align 4
    209   %3 = call <64 x i32> @llvm.hexagon.V6.vdmpyhb.dv.acc.128B(<64 x i32> %0, <64 x i32> %1, i32 %2)
    210   store <64 x i32> %3, <64 x i32>* @c, align 256
    211   ret void
    212 }
    213 
    214 ; CHECK-LABEL: test14:
    215 ; CHECK: v{{[0-9]+}}:{{[0-9]+}}.h += vmpy(v{{[0-9]+}}.ub,r{{[0-9]+}}.b)
    216 define void @test14(<32 x i32> %a, i32 %b) #0 {
    217 entry:
    218   %a.addr = alloca <32 x i32>, align 128
    219   %b.addr = alloca i32, align 4
    220   store <32 x i32> %a, <32 x i32>* %a.addr, align 128
    221   store i32 %b, i32* %b.addr, align 4
    222   %0 = load <64 x i32>, <64 x i32>* @c, align 256
    223   %1 = load <32 x i32>, <32 x i32>* %a.addr, align 128
    224   %2 = load i32, i32* %b.addr, align 4
    225   %3 = call <64 x i32> @llvm.hexagon.V6.vmpybus.acc.128B(<64 x i32> %0, <32 x i32> %1, i32 %2)
    226   store <64 x i32> %3, <64 x i32>* @c, align 256
    227   ret void
    228 }
    229 
    230 ; CHECK-LABEL: test15:
    231 ; CHECK: v{{[0-9]+}}:{{[0-9]+}}.h += vmpa(v{{[0-9]+}}:{{[0-9]+}}.ub,r{{[0-9]+}}.b)
    232 define void @test15(<64 x i32> %a, i32 %b) #0 {
    233 entry:
    234   %a.addr = alloca <64 x i32>, align 256
    235   %b.addr = alloca i32, align 4
    236   store <64 x i32> %a, <64 x i32>* %a.addr, align 256
    237   store i32 %b, i32* %b.addr, align 4
    238   %0 = load <64 x i32>, <64 x i32>* @c, align 256
    239   %1 = load <64 x i32>, <64 x i32>* %a.addr, align 256
    240   %2 = load i32, i32* %b.addr, align 4
    241   %3 = call <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32> %0, <64 x i32> %1, i32 %2)
    242   store <64 x i32> %3, <64 x i32>* @c, align 256
    243   ret void
    244 }
    245 
    246 ; CHECK-LABEL: test16:
    247 ; CHECK: v{{[0-9]+}}:{{[0-9]+}}.w += vmpa(v{{[0-9]+}}:{{[0-9]+}}.h,r{{[0-9]+}}.b)
    248 define void @test16(<64 x i32> %a, i32 %b) #0 {
    249 entry:
    250   %a.addr = alloca <64 x i32>, align 256
    251   %b.addr = alloca i32, align 4
    252   store <64 x i32> %a, <64 x i32>* %a.addr, align 256
    253   store i32 %b, i32* %b.addr, align 4
    254   %0 = load <64 x i32>, <64 x i32>* @c, align 256
    255   %1 = load <64 x i32>, <64 x i32>* %a.addr, align 256
    256   %2 = load i32, i32* %b.addr, align 4
    257   %3 = call <64 x i32> @llvm.hexagon.V6.vmpahb.acc.128B(<64 x i32> %0, <64 x i32> %1, i32 %2)
    258   store <64 x i32> %3, <64 x i32>* @c, align 256
    259   ret void
    260 }
    261 
    262 ; CHECK-LABEL: test17:
    263 ; CHECK: v{{[0-9]+}}:{{[0-9]+}}.w += vmpy(v{{[0-9]+}}.h,r{{[0-9]+}}.h):sat
    264 define void @test17(<32 x i32> %a, i32 %b) #0 {
    265 entry:
    266   %a.addr = alloca <32 x i32>, align 128
    267   %b.addr = alloca i32, align 4
    268   store <32 x i32> %a, <32 x i32>* %a.addr, align 128
    269   store i32 %b, i32* %b.addr, align 4
    270   %0 = load <64 x i32>, <64 x i32>* @c, align 256
    271   %1 = load <32 x i32>, <32 x i32>* %a.addr, align 128
    272   %2 = load i32, i32* %b.addr, align 4
    273   %3 = call <64 x i32> @llvm.hexagon.V6.vmpyhsat.acc.128B(<64 x i32> %0, <32 x i32> %1, i32 %2)
    274   store <64 x i32> %3, <64 x i32>* @c, align 256
    275   ret void
    276 }
    277 
    278 ; CHECK-LABEL: test18:
    279 ; CHECK: v{{[0-9]+}}:{{[0-9]+}}.uw += vmpy(v{{[0-9]+}}.uh,r{{[0-9]+}}.uh)
    280 define void @test18(<32 x i32> %a, i32 %b) #0 {
    281 entry:
    282   %a.addr = alloca <32 x i32>, align 128
    283   %b.addr = alloca i32, align 4
    284   store <32 x i32> %a, <32 x i32>* %a.addr, align 128
    285   store i32 %b, i32* %b.addr, align 4
    286   %0 = load <64 x i32>, <64 x i32>* @c, align 256
    287   %1 = load <32 x i32>, <32 x i32>* %a.addr, align 128
    288   %2 = load i32, i32* %b.addr, align 4
    289   %3 = call <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32> %0, <32 x i32> %1, i32 %2)
    290   store <64 x i32> %3, <64 x i32>* @c, align 256
    291   ret void
    292 }
    293 
    294 ; CHECK-LABEL: test19:
    295 ; CHECK: v{{[0-9]+}}.w += vmpyi(v{{[0-9]+}}.w,r{{[0-9]+}}.b)
    296 define void @test19(<32 x i32> %a, i32 %b) #0 {
    297 entry:
    298   %a.addr = alloca <32 x i32>, align 128
    299   %b.addr = alloca i32, align 4
    300   store <32 x i32> %a, <32 x i32>* %a.addr, align 128
    301   store i32 %b, i32* %b.addr, align 4
    302   %0 = load <32 x i32>, <32 x i32>* @d, align 128
    303   %1 = load <32 x i32>, <32 x i32>* %a.addr, align 128
    304   %2 = load i32, i32* %b.addr, align 4
    305   %3 = call <32 x i32> @llvm.hexagon.V6.vmpyiwb.acc.128B(<32 x i32> %0, <32 x i32> %1, i32 %2)
    306   store <32 x i32> %3, <32 x i32>* @d, align 128
    307   ret void
    308 }
    309 
    310 ; CHECK-LABEL: test20:
    311 ; CHECK: v{{[0-9]+}}.w += vmpyi(v{{[0-9]+}}.w,r{{[0-9]+}}.h)
    312 define void @test20(<32 x i32> %a, i32 %b) #0 {
    313 entry:
    314   %a.addr = alloca <32 x i32>, align 128
    315   %b.addr = alloca i32, align 4
    316   store <32 x i32> %a, <32 x i32>* %a.addr, align 128
    317   store i32 %b, i32* %b.addr, align 4
    318   %0 = load <32 x i32>, <32 x i32>* @d, align 128
    319   %1 = load <32 x i32>, <32 x i32>* %a.addr, align 128
    320   %2 = load i32, i32* %b.addr, align 4
    321   %3 = call <32 x i32> @llvm.hexagon.V6.vmpyiwh.acc.128B(<32 x i32> %0, <32 x i32> %1, i32 %2)
    322   store <32 x i32> %3, <32 x i32>* @d, align 128
    323   ret void
    324 }
    325 
    326 ; CHECK-LABEL: test21:
    327 ; CHECK: v{{[0-9]+}}:{{[0-9]+}}.uw += vdsad(v{{[0-9]+}}:{{[0-9]+}}.uh,r{{[0-9]+}}.uh)
    328 define void @test21(<64 x i32> %a, i32 %b) #0 {
    329 entry:
    330   %a.addr = alloca <64 x i32>, align 256
    331   %b.addr = alloca i32, align 4
    332   store <64 x i32> %a, <64 x i32>* %a.addr, align 256
    333   store i32 %b, i32* %b.addr, align 4
    334   %0 = load <64 x i32>, <64 x i32>* @c, align 256
    335   %1 = load <64 x i32>, <64 x i32>* %a.addr, align 256
    336   %2 = load i32, i32* %b.addr, align 4
    337   %3 = call <64 x i32> @llvm.hexagon.V6.vdsaduh.acc.128B(<64 x i32> %0, <64 x i32> %1, i32 %2)
    338   store <64 x i32> %3, <64 x i32>* @c, align 256
    339   ret void
    340 }
    341 
    342 ; CHECK-LABEL: test22:
    343 ; CHECK: v{{[0-9]+}}.h += vmpyi(v{{[0-9]+}}.h,r{{[0-9]+}}.b)
    344 define void @test22(<32 x i32> %a, i32 %b) #0 {
    345 entry:
    346   %a.addr = alloca <32 x i32>, align 128
    347   %b.addr = alloca i32, align 4
    348   store <32 x i32> %a, <32 x i32>* %a.addr, align 128
    349   store i32 %b, i32* %b.addr, align 4
    350   %0 = load <32 x i32>, <32 x i32>* @d, align 128
    351   %1 = load <32 x i32>, <32 x i32>* %a.addr, align 128
    352   %2 = load i32, i32* %b.addr, align 4
    353   %3 = call <32 x i32> @llvm.hexagon.V6.vmpyihb.acc.128B(<32 x i32> %0, <32 x i32> %1, i32 %2)
    354   store <32 x i32> %3, <32 x i32>* @d, align 128
    355   ret void
    356 }
    357 
    358 ; CHECK-LABEL: test23:
    359 ; CHECK: v{{[0-9]+}}.w += vasl(v{{[0-9]+}}.w,r{{[0-9]+}})
    360 define void @test23(<32 x i32> %a, i32 %b) #0 {
    361 entry:
    362   %a.addr = alloca <32 x i32>, align 128
    363   %b.addr = alloca i32, align 4
    364   store <32 x i32> %a, <32 x i32>* %a.addr, align 128
    365   store i32 %b, i32* %b.addr, align 4
    366   %0 = load <32 x i32>, <32 x i32>* @d, align 128
    367   %1 = load <32 x i32>, <32 x i32>* %a.addr, align 128
    368   %2 = load i32, i32* %b.addr, align 4
    369   %3 = call <32 x i32> @llvm.hexagon.V6.vaslw.acc.128B(<32 x i32> %0, <32 x i32> %1, i32 %2)
    370   store <32 x i32> %3, <32 x i32>* @d, align 128
    371   ret void
    372 }
    373 
    374 ; CHECK-LABEL: test24:
    375 ; CHECK: v{{[0-9]+}}.w += vasr(v{{[0-9]+}}.w,r{{[0-9]+}})
    376 define void @test24(<32 x i32> %a, i32 %b) #0 {
    377 entry:
    378   %a.addr = alloca <32 x i32>, align 128
    379   %b.addr = alloca i32, align 4
    380   store <32 x i32> %a, <32 x i32>* %a.addr, align 128
    381   store i32 %b, i32* %b.addr, align 4
    382   %0 = load <32 x i32>, <32 x i32>* @d, align 128
    383   %1 = load <32 x i32>, <32 x i32>* %a.addr, align 128
    384   %2 = load i32, i32* %b.addr, align 4
    385   %3 = call <32 x i32> @llvm.hexagon.V6.vasrw.acc.128B(<32 x i32> %0, <32 x i32> %1, i32 %2)
    386   store <32 x i32> %3, <32 x i32>* @d, align 128
    387   ret void
    388 }
    389 
    390 ; CHECK-LABEL: test25:
    391 ; CHECK: v{{[0-9]+}}:{{[0-9]+}}.uh += vmpy(v{{[0-9]+}}.ub,r{{[0-9]+}}.ub)
    392 define void @test25(<32 x i32> %a, i32 %b) #0 {
    393 entry:
    394   %a.addr = alloca <32 x i32>, align 128
    395   %b.addr = alloca i32, align 4
    396   store <32 x i32> %a, <32 x i32>* %a.addr, align 128
    397   store i32 %b, i32* %b.addr, align 4
    398   %0 = load <64 x i32>, <64 x i32>* @c, align 256
    399   %1 = load <32 x i32>, <32 x i32>* %a.addr, align 128
    400   %2 = load i32, i32* %b.addr, align 4
    401   %3 = call <64 x i32> @llvm.hexagon.V6.vmpyub.acc.128B(<64 x i32> %0, <32 x i32> %1, i32 %2)
    402   store <64 x i32> %3, <64 x i32>* @c, align 256
    403   ret void
    404 }
    405 
    406 declare <64 x i32> @llvm.hexagon.V6.vtmpyb.acc.128B(<64 x i32>, <64 x i32>, i32) #0
    407 declare <64 x i32> @llvm.hexagon.V6.vtmpybus.acc.128B(<64 x i32>, <64 x i32>, i32) #0
    408 declare <64 x i32> @llvm.hexagon.V6.vtmpyhb.acc.128B(<64 x i32>, <64 x i32>, i32) #0
    409 declare <32 x i32> @llvm.hexagon.V6.vdmpyhb.acc.128B(<32 x i32>, <32 x i32>, i32) #0
    410 declare <32 x i32> @llvm.hexagon.V6.vrmpyub.acc.128B(<32 x i32>, <32 x i32>, i32) #0
    411 declare <32 x i32> @llvm.hexagon.V6.vrmpybus.acc.128B(<32 x i32>, <32 x i32>, i32) #0
    412 declare <32 x i32> @llvm.hexagon.V6.vdmpybus.acc.128B(<32 x i32>, <32 x i32>, i32) #0
    413 declare <64 x i32> @llvm.hexagon.V6.vdmpybus.dv.acc.128B(<64 x i32>, <64 x i32>, i32) #0
    414 declare <32 x i32> @llvm.hexagon.V6.vdmpyhsusat.acc.128B(<32 x i32>, <32 x i32>, i32) #0
    415 declare <32 x i32> @llvm.hexagon.V6.vdmpyhsuisat.acc.128B(<32 x i32>, <64 x i32>, i32) #0
    416 declare <32 x i32> @llvm.hexagon.V6.vdmpyhisat.acc.128B(<32 x i32>, <64 x i32>, i32) #0
    417 declare <32 x i32> @llvm.hexagon.V6.vdmpyhsat.acc.128B(<32 x i32>, <32 x i32>, i32) #0
    418 declare <64 x i32> @llvm.hexagon.V6.vdmpyhb.dv.acc.128B(<64 x i32>, <64 x i32>, i32) #0
    419 declare <64 x i32> @llvm.hexagon.V6.vmpybus.acc.128B(<64 x i32>, <32 x i32>, i32) #0
    420 declare <64 x i32> @llvm.hexagon.V6.vmpabus.acc.128B(<64 x i32>, <64 x i32>, i32) #0
    421 declare <64 x i32> @llvm.hexagon.V6.vmpahb.acc.128B(<64 x i32>, <64 x i32>, i32) #0
    422 declare <64 x i32> @llvm.hexagon.V6.vmpyhsat.acc.128B(<64 x i32>, <32 x i32>, i32) #0
    423 declare <64 x i32> @llvm.hexagon.V6.vmpyuh.acc.128B(<64 x i32>, <32 x i32>, i32) #0
    424 declare <32 x i32> @llvm.hexagon.V6.vmpyiwb.acc.128B(<32 x i32>, <32 x i32>, i32) #0
    425 declare <32 x i32> @llvm.hexagon.V6.vmpyiwh.acc.128B(<32 x i32>, <32 x i32>, i32) #0
    426 declare <64 x i32> @llvm.hexagon.V6.vdsaduh.acc.128B(<64 x i32>, <64 x i32>, i32) #0
    427 declare <32 x i32> @llvm.hexagon.V6.vmpyihb.acc.128B(<32 x i32>, <32 x i32>, i32) #0
    428 declare <32 x i32> @llvm.hexagon.V6.vaslw.acc.128B(<32 x i32>, <32 x i32>, i32) #0
    429 declare <32 x i32> @llvm.hexagon.V6.vasrw.acc.128B(<32 x i32>, <32 x i32>, i32) #0
    430 declare <64 x i32> @llvm.hexagon.V6.vmpyub.acc.128B(<64 x i32>, <32 x i32>, i32) #0
    431 
    432 attributes #0 = { nounwind readnone "target-cpu"="hexagonv60" "target-features"="+hvxv60,+hvx-length128b" }
    433