Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW
      7 
      8 define <4 x i16> @mulhuw_v4i16(<4 x i16> %a, <4 x i16> %b) {
      9 ; SSE2-LABEL: mulhuw_v4i16:
     10 ; SSE2:       # %bb.0:
     11 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
     12 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
     13 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     14 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
     15 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
     16 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     17 ; SSE2-NEXT:    pmulhuw %xmm1, %xmm0
     18 ; SSE2-NEXT:    pxor %xmm1, %xmm1
     19 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
     20 ; SSE2-NEXT:    retq
     21 ;
     22 ; SSE41-LABEL: mulhuw_v4i16:
     23 ; SSE41:       # %bb.0:
     24 ; SSE41-NEXT:    pxor %xmm2, %xmm2
     25 ; SSE41-NEXT:    pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
     26 ; SSE41-NEXT:    pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
     27 ; SSE41-NEXT:    pmulld %xmm1, %xmm0
     28 ; SSE41-NEXT:    psrld $16, %xmm0
     29 ; SSE41-NEXT:    retq
     30 ;
     31 ; AVX-LABEL: mulhuw_v4i16:
     32 ; AVX:       # %bb.0:
     33 ; AVX-NEXT:    vpxor %xmm2, %xmm2, %xmm2
     34 ; AVX-NEXT:    vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7]
     35 ; AVX-NEXT:    vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7]
     36 ; AVX-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
     37 ; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
     38 ; AVX-NEXT:    retq
     39   %a1 = zext <4 x i16> %a to <4 x i32>
     40   %b1 = zext <4 x i16> %b to <4 x i32>
     41   %c = mul <4 x i32> %a1, %b1
     42   %d = lshr <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
     43   %e = trunc <4 x i32> %d to <4 x i16>
     44   ret <4 x i16> %e
     45 }
     46 
     47 define <4 x i16> @mulhw_v4i16(<4 x i16> %a, <4 x i16> %b) {
     48 ; SSE2-LABEL: mulhw_v4i16:
     49 ; SSE2:       # %bb.0:
     50 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,2,2,3,4,5,6,7]
     51 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,6,6,7]
     52 ; SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
     53 ; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
     54 ; SSE2-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
     55 ; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
     56 ; SSE2-NEXT:    pmulhw %xmm1, %xmm0
     57 ; SSE2-NEXT:    pxor %xmm1, %xmm1
     58 ; SSE2-NEXT:    punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
     59 ; SSE2-NEXT:    retq
     60 ;
     61 ; SSE41-LABEL: mulhw_v4i16:
     62 ; SSE41:       # %bb.0:
     63 ; SSE41-NEXT:    pslld $16, %xmm0
     64 ; SSE41-NEXT:    psrad $16, %xmm0
     65 ; SSE41-NEXT:    pslld $16, %xmm1
     66 ; SSE41-NEXT:    psrad $16, %xmm1
     67 ; SSE41-NEXT:    pmulld %xmm1, %xmm0
     68 ; SSE41-NEXT:    psrld $16, %xmm0
     69 ; SSE41-NEXT:    retq
     70 ;
     71 ; AVX-LABEL: mulhw_v4i16:
     72 ; AVX:       # %bb.0:
     73 ; AVX-NEXT:    vpslld $16, %xmm0, %xmm0
     74 ; AVX-NEXT:    vpsrad $16, %xmm0, %xmm0
     75 ; AVX-NEXT:    vpslld $16, %xmm1, %xmm1
     76 ; AVX-NEXT:    vpsrad $16, %xmm1, %xmm1
     77 ; AVX-NEXT:    vpmulld %xmm1, %xmm0, %xmm0
     78 ; AVX-NEXT:    vpsrld $16, %xmm0, %xmm0
     79 ; AVX-NEXT:    retq
     80   %a1 = sext <4 x i16> %a to <4 x i32>
     81   %b1 = sext <4 x i16> %b to <4 x i32>
     82   %c = mul <4 x i32> %a1, %b1
     83   %d = lshr <4 x i32> %c, <i32 16, i32 16, i32 16, i32 16>
     84   %e = trunc <4 x i32> %d to <4 x i16>
     85   ret <4 x i16> %e
     86 }
     87 
     88 define <8 x i16> @mulhuw_v8i16(<8 x i16> %a, <8 x i16> %b) {
     89 ; SSE-LABEL: mulhuw_v8i16:
     90 ; SSE:       # %bb.0:
     91 ; SSE-NEXT:    pmulhuw %xmm1, %xmm0
     92 ; SSE-NEXT:    retq
     93 ;
     94 ; AVX-LABEL: mulhuw_v8i16:
     95 ; AVX:       # %bb.0:
     96 ; AVX-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0
     97 ; AVX-NEXT:    retq
     98   %a1 = zext <8 x i16> %a to <8 x i32>
     99   %b1 = zext <8 x i16> %b to <8 x i32>
    100   %c = mul <8 x i32> %a1, %b1
    101   %d = lshr <8 x i32> %c, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
    102   %e = trunc <8 x i32> %d to <8 x i16>
    103   ret <8 x i16> %e
    104 }
    105 
    106 define <8 x i16> @mulhw_v8i16(<8 x i16> %a, <8 x i16> %b) {
    107 ; SSE-LABEL: mulhw_v8i16:
    108 ; SSE:       # %bb.0:
    109 ; SSE-NEXT:    pmulhw %xmm1, %xmm0
    110 ; SSE-NEXT:    retq
    111 ;
    112 ; AVX-LABEL: mulhw_v8i16:
    113 ; AVX:       # %bb.0:
    114 ; AVX-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0
    115 ; AVX-NEXT:    retq
    116   %a1 = sext <8 x i16> %a to <8 x i32>
    117   %b1 = sext <8 x i16> %b to <8 x i32>
    118   %c = mul <8 x i32> %a1, %b1
    119   %d = lshr <8 x i32> %c, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
    120   %e = trunc <8 x i32> %d to <8 x i16>
    121   ret <8 x i16> %e
    122 }
    123 
    124 define <16 x i16> @mulhuw_v16i16(<16 x i16> %a, <16 x i16> %b) {
    125 ; SSE-LABEL: mulhuw_v16i16:
    126 ; SSE:       # %bb.0:
    127 ; SSE-NEXT:    pmulhuw %xmm2, %xmm0
    128 ; SSE-NEXT:    pmulhuw %xmm3, %xmm1
    129 ; SSE-NEXT:    retq
    130 ;
    131 ; AVX-LABEL: mulhuw_v16i16:
    132 ; AVX:       # %bb.0:
    133 ; AVX-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0
    134 ; AVX-NEXT:    retq
    135   %a1 = zext <16 x i16> %a to <16 x i32>
    136   %b1 = zext <16 x i16> %b to <16 x i32>
    137   %c = mul <16 x i32> %a1, %b1
    138   %d = lshr <16 x i32> %c, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
    139   %e = trunc <16 x i32> %d to <16 x i16>
    140   ret <16 x i16> %e
    141 }
    142 
    143 define <16 x i16> @mulhw_v16i16(<16 x i16> %a, <16 x i16> %b) {
    144 ; SSE-LABEL: mulhw_v16i16:
    145 ; SSE:       # %bb.0:
    146 ; SSE-NEXT:    pmulhw %xmm2, %xmm0
    147 ; SSE-NEXT:    pmulhw %xmm3, %xmm1
    148 ; SSE-NEXT:    retq
    149 ;
    150 ; AVX-LABEL: mulhw_v16i16:
    151 ; AVX:       # %bb.0:
    152 ; AVX-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0
    153 ; AVX-NEXT:    retq
    154   %a1 = sext <16 x i16> %a to <16 x i32>
    155   %b1 = sext <16 x i16> %b to <16 x i32>
    156   %c = mul <16 x i32> %a1, %b1
    157   %d = lshr <16 x i32> %c, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
    158   %e = trunc <16 x i32> %d to <16 x i16>
    159   ret <16 x i16> %e
    160 }
    161 
    162 define <32 x i16> @mulhuw_v32i16(<32 x i16> %a, <32 x i16> %b) {
    163 ; SSE-LABEL: mulhuw_v32i16:
    164 ; SSE:       # %bb.0:
    165 ; SSE-NEXT:    pmulhuw %xmm4, %xmm0
    166 ; SSE-NEXT:    pmulhuw %xmm5, %xmm1
    167 ; SSE-NEXT:    pmulhuw %xmm6, %xmm2
    168 ; SSE-NEXT:    pmulhuw %xmm7, %xmm3
    169 ; SSE-NEXT:    retq
    170 ;
    171 ; AVX2-LABEL: mulhuw_v32i16:
    172 ; AVX2:       # %bb.0:
    173 ; AVX2-NEXT:    vpmulhuw %ymm2, %ymm0, %ymm0
    174 ; AVX2-NEXT:    vpmulhuw %ymm3, %ymm1, %ymm1
    175 ; AVX2-NEXT:    retq
    176 ;
    177 ; AVX512F-LABEL: mulhuw_v32i16:
    178 ; AVX512F:       # %bb.0:
    179 ; AVX512F-NEXT:    vpmulhuw %ymm2, %ymm0, %ymm0
    180 ; AVX512F-NEXT:    vpmulhuw %ymm3, %ymm1, %ymm1
    181 ; AVX512F-NEXT:    retq
    182 ;
    183 ; AVX512BW-LABEL: mulhuw_v32i16:
    184 ; AVX512BW:       # %bb.0:
    185 ; AVX512BW-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm0
    186 ; AVX512BW-NEXT:    retq
    187   %a1 = zext <32 x i16> %a to <32 x i32>
    188   %b1 = zext <32 x i16> %b to <32 x i32>
    189   %c = mul <32 x i32> %a1, %b1
    190   %d = lshr <32 x i32> %c, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
    191   %e = trunc <32 x i32> %d to <32 x i16>
    192   ret <32 x i16> %e
    193 }
    194 
    195 define <32 x i16> @mulhw_v32i16(<32 x i16> %a, <32 x i16> %b) {
    196 ; SSE-LABEL: mulhw_v32i16:
    197 ; SSE:       # %bb.0:
    198 ; SSE-NEXT:    pmulhw %xmm4, %xmm0
    199 ; SSE-NEXT:    pmulhw %xmm5, %xmm1
    200 ; SSE-NEXT:    pmulhw %xmm6, %xmm2
    201 ; SSE-NEXT:    pmulhw %xmm7, %xmm3
    202 ; SSE-NEXT:    retq
    203 ;
    204 ; AVX2-LABEL: mulhw_v32i16:
    205 ; AVX2:       # %bb.0:
    206 ; AVX2-NEXT:    vpmulhw %ymm2, %ymm0, %ymm0
    207 ; AVX2-NEXT:    vpmulhw %ymm3, %ymm1, %ymm1
    208 ; AVX2-NEXT:    retq
    209 ;
    210 ; AVX512F-LABEL: mulhw_v32i16:
    211 ; AVX512F:       # %bb.0:
    212 ; AVX512F-NEXT:    vpmulhw %ymm2, %ymm0, %ymm0
    213 ; AVX512F-NEXT:    vpmulhw %ymm3, %ymm1, %ymm1
    214 ; AVX512F-NEXT:    retq
    215 ;
    216 ; AVX512BW-LABEL: mulhw_v32i16:
    217 ; AVX512BW:       # %bb.0:
    218 ; AVX512BW-NEXT:    vpmulhw %zmm1, %zmm0, %zmm0
    219 ; AVX512BW-NEXT:    retq
    220   %a1 = sext <32 x i16> %a to <32 x i32>
    221   %b1 = sext <32 x i16> %b to <32 x i32>
    222   %c = mul <32 x i32> %a1, %b1
    223   %d = lshr <32 x i32> %c, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
    224   %e = trunc <32 x i32> %d to <32 x i16>
    225   ret <32 x i16> %e
    226 }
    227 
    228 define <64 x i16> @mulhuw_v64i16(<64 x i16> %a, <64 x i16> %b) {
    229 ; SSE-LABEL: mulhuw_v64i16:
    230 ; SSE:       # %bb.0:
    231 ; SSE-NEXT:    pmulhuw {{[0-9]+}}(%rsp), %xmm0
    232 ; SSE-NEXT:    pmulhuw {{[0-9]+}}(%rsp), %xmm1
    233 ; SSE-NEXT:    pmulhuw {{[0-9]+}}(%rsp), %xmm2
    234 ; SSE-NEXT:    pmulhuw {{[0-9]+}}(%rsp), %xmm3
    235 ; SSE-NEXT:    pmulhuw {{[0-9]+}}(%rsp), %xmm4
    236 ; SSE-NEXT:    pmulhuw {{[0-9]+}}(%rsp), %xmm5
    237 ; SSE-NEXT:    pmulhuw {{[0-9]+}}(%rsp), %xmm6
    238 ; SSE-NEXT:    pmulhuw {{[0-9]+}}(%rsp), %xmm7
    239 ; SSE-NEXT:    movdqa %xmm7, 112(%rdi)
    240 ; SSE-NEXT:    movdqa %xmm6, 96(%rdi)
    241 ; SSE-NEXT:    movdqa %xmm5, 80(%rdi)
    242 ; SSE-NEXT:    movdqa %xmm4, 64(%rdi)
    243 ; SSE-NEXT:    movdqa %xmm3, 48(%rdi)
    244 ; SSE-NEXT:    movdqa %xmm2, 32(%rdi)
    245 ; SSE-NEXT:    movdqa %xmm1, 16(%rdi)
    246 ; SSE-NEXT:    movdqa %xmm0, (%rdi)
    247 ; SSE-NEXT:    movq %rdi, %rax
    248 ; SSE-NEXT:    retq
    249 ;
    250 ; AVX2-LABEL: mulhuw_v64i16:
    251 ; AVX2:       # %bb.0:
    252 ; AVX2-NEXT:    vpmulhuw %ymm4, %ymm0, %ymm0
    253 ; AVX2-NEXT:    vpmulhuw %ymm5, %ymm1, %ymm1
    254 ; AVX2-NEXT:    vpmulhuw %ymm6, %ymm2, %ymm2
    255 ; AVX2-NEXT:    vpmulhuw %ymm7, %ymm3, %ymm3
    256 ; AVX2-NEXT:    retq
    257 ;
    258 ; AVX512F-LABEL: mulhuw_v64i16:
    259 ; AVX512F:       # %bb.0:
    260 ; AVX512F-NEXT:    vpmulhuw %ymm4, %ymm0, %ymm0
    261 ; AVX512F-NEXT:    vpmulhuw %ymm5, %ymm1, %ymm1
    262 ; AVX512F-NEXT:    vpmulhuw %ymm6, %ymm2, %ymm2
    263 ; AVX512F-NEXT:    vpmulhuw %ymm7, %ymm3, %ymm3
    264 ; AVX512F-NEXT:    retq
    265 ;
    266 ; AVX512BW-LABEL: mulhuw_v64i16:
    267 ; AVX512BW:       # %bb.0:
    268 ; AVX512BW-NEXT:    vpmulhuw %zmm2, %zmm0, %zmm0
    269 ; AVX512BW-NEXT:    vpmulhuw %zmm3, %zmm1, %zmm1
    270 ; AVX512BW-NEXT:    retq
    271   %a1 = zext <64 x i16> %a to <64 x i32>
    272   %b1 = zext <64 x i16> %b to <64 x i32>
    273   %c = mul <64 x i32> %a1, %b1
    274   %d = lshr <64 x i32> %c, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
    275   %e = trunc <64 x i32> %d to <64 x i16>
    276   ret <64 x i16> %e
    277 }
    278 
    279 define <64 x i16> @mulhw_v64i16(<64 x i16> %a, <64 x i16> %b) {
    280 ; SSE-LABEL: mulhw_v64i16:
    281 ; SSE:       # %bb.0:
    282 ; SSE-NEXT:    pmulhw {{[0-9]+}}(%rsp), %xmm0
    283 ; SSE-NEXT:    pmulhw {{[0-9]+}}(%rsp), %xmm1
    284 ; SSE-NEXT:    pmulhw {{[0-9]+}}(%rsp), %xmm2
    285 ; SSE-NEXT:    pmulhw {{[0-9]+}}(%rsp), %xmm3
    286 ; SSE-NEXT:    pmulhw {{[0-9]+}}(%rsp), %xmm4
    287 ; SSE-NEXT:    pmulhw {{[0-9]+}}(%rsp), %xmm5
    288 ; SSE-NEXT:    pmulhw {{[0-9]+}}(%rsp), %xmm6
    289 ; SSE-NEXT:    pmulhw {{[0-9]+}}(%rsp), %xmm7
    290 ; SSE-NEXT:    movdqa %xmm7, 112(%rdi)
    291 ; SSE-NEXT:    movdqa %xmm6, 96(%rdi)
    292 ; SSE-NEXT:    movdqa %xmm5, 80(%rdi)
    293 ; SSE-NEXT:    movdqa %xmm4, 64(%rdi)
    294 ; SSE-NEXT:    movdqa %xmm3, 48(%rdi)
    295 ; SSE-NEXT:    movdqa %xmm2, 32(%rdi)
    296 ; SSE-NEXT:    movdqa %xmm1, 16(%rdi)
    297 ; SSE-NEXT:    movdqa %xmm0, (%rdi)
    298 ; SSE-NEXT:    movq %rdi, %rax
    299 ; SSE-NEXT:    retq
    300 ;
    301 ; AVX2-LABEL: mulhw_v64i16:
    302 ; AVX2:       # %bb.0:
    303 ; AVX2-NEXT:    vpmulhw %ymm4, %ymm0, %ymm0
    304 ; AVX2-NEXT:    vpmulhw %ymm5, %ymm1, %ymm1
    305 ; AVX2-NEXT:    vpmulhw %ymm6, %ymm2, %ymm2
    306 ; AVX2-NEXT:    vpmulhw %ymm7, %ymm3, %ymm3
    307 ; AVX2-NEXT:    retq
    308 ;
    309 ; AVX512F-LABEL: mulhw_v64i16:
    310 ; AVX512F:       # %bb.0:
    311 ; AVX512F-NEXT:    vpmulhw %ymm4, %ymm0, %ymm0
    312 ; AVX512F-NEXT:    vpmulhw %ymm5, %ymm1, %ymm1
    313 ; AVX512F-NEXT:    vpmulhw %ymm6, %ymm2, %ymm2
    314 ; AVX512F-NEXT:    vpmulhw %ymm7, %ymm3, %ymm3
    315 ; AVX512F-NEXT:    retq
    316 ;
    317 ; AVX512BW-LABEL: mulhw_v64i16:
    318 ; AVX512BW:       # %bb.0:
    319 ; AVX512BW-NEXT:    vpmulhw %zmm2, %zmm0, %zmm0
    320 ; AVX512BW-NEXT:    vpmulhw %zmm3, %zmm1, %zmm1
    321 ; AVX512BW-NEXT:    retq
    322   %a1 = sext <64 x i16> %a to <64 x i32>
    323   %b1 = sext <64 x i16> %b to <64 x i32>
    324   %c = mul <64 x i32> %a1, %b1
    325   %d = lshr <64 x i32> %c, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
    326   %e = trunc <64 x i32> %d to <64 x i16>
    327   ret <64 x i16> %e
    328 }
    329