Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=avx2 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X86 --check-prefix=X86-AVX
      3 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL --check-prefix=X86 --check-prefix=X86-AVX512VL
      4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx2 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2 --check-prefix=X64 --check-prefix=X64-AVX
      5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL --check-prefix=X64 --check-prefix=X64-AVX512VL
      6 
      7 define <16 x i16> @test_x86_avx2_packssdw(<8 x i32> %a0, <8 x i32> %a1) {
      8 ; X86-AVX-LABEL: test_x86_avx2_packssdw:
      9 ; X86-AVX:       ## %bb.0:
     10 ; X86-AVX-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0xc1]
     11 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
     12 ;
     13 ; X86-AVX512VL-LABEL: test_x86_avx2_packssdw:
     14 ; X86-AVX512VL:       ## %bb.0:
     15 ; X86-AVX512VL-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1]
     16 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
     17 ;
     18 ; X64-AVX-LABEL: test_x86_avx2_packssdw:
     19 ; X64-AVX:       ## %bb.0:
     20 ; X64-AVX-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x6b,0xc1]
     21 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
     22 ;
     23 ; X64-AVX512VL-LABEL: test_x86_avx2_packssdw:
     24 ; X64-AVX512VL:       ## %bb.0:
     25 ; X64-AVX512VL-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1]
     26 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
     27   %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
     28   ret <16 x i16> %res
     29 }
     30 declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
     31 
     32 
     33 define <16 x i16> @test_x86_avx2_packssdw_fold() {
     34 ; X86-AVX-LABEL: test_x86_avx2_packssdw_fold:
     35 ; X86-AVX:       ## %bb.0:
     36 ; X86-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280]
     37 ; X86-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
     38 ; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI1_0, kind: FK_Data_4
     39 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
     40 ;
     41 ; X86-AVX512VL-LABEL: test_x86_avx2_packssdw_fold:
     42 ; X86-AVX512VL:       ## %bb.0:
     43 ; X86-AVX512VL-NEXT:    vmovaps LCPI1_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280]
     44 ; X86-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
     45 ; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI1_0, kind: FK_Data_4
     46 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
     47 ;
     48 ; X64-AVX-LABEL: test_x86_avx2_packssdw_fold:
     49 ; X64-AVX:       ## %bb.0:
     50 ; X64-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280]
     51 ; X64-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
     52 ; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI1_0-4, kind: reloc_riprel_4byte
     53 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
     54 ;
     55 ; X64-AVX512VL-LABEL: test_x86_avx2_packssdw_fold:
     56 ; X64-AVX512VL:       ## %bb.0:
     57 ; X64-AVX512VL-NEXT:    vmovaps {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,32767,65535,0,0,0,0,32769,32768,0,65280]
     58 ; X64-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
     59 ; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI1_0-4, kind: reloc_riprel_4byte
     60 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
     61   %res = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
     62   ret <16 x i16> %res
     63 }
     64 
     65 
     66 define <32 x i8> @test_x86_avx2_packsswb(<16 x i16> %a0, <16 x i16> %a1) {
     67 ; X86-AVX-LABEL: test_x86_avx2_packsswb:
     68 ; X86-AVX:       ## %bb.0:
     69 ; X86-AVX-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0xc1]
     70 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
     71 ;
     72 ; X86-AVX512VL-LABEL: test_x86_avx2_packsswb:
     73 ; X86-AVX512VL:       ## %bb.0:
     74 ; X86-AVX512VL-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1]
     75 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
     76 ;
     77 ; X64-AVX-LABEL: test_x86_avx2_packsswb:
     78 ; X64-AVX:       ## %bb.0:
     79 ; X64-AVX-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x63,0xc1]
     80 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
     81 ;
     82 ; X64-AVX512VL-LABEL: test_x86_avx2_packsswb:
     83 ; X64-AVX512VL:       ## %bb.0:
     84 ; X64-AVX512VL-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1]
     85 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
     86   %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
     87   ret <32 x i8> %res
     88 }
     89 declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
     90 
     91 
     92 define <32 x i8> @test_x86_avx2_packsswb_fold() {
     93 ; X86-AVX-LABEL: test_x86_avx2_packsswb_fold:
     94 ; X86-AVX:       ## %bb.0:
     95 ; X86-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
     96 ; X86-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
     97 ; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI3_0, kind: FK_Data_4
     98 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
     99 ;
    100 ; X86-AVX512VL-LABEL: test_x86_avx2_packsswb_fold:
    101 ; X86-AVX512VL:       ## %bb.0:
    102 ; X86-AVX512VL-NEXT:    vmovaps LCPI3_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
    103 ; X86-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
    104 ; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI3_0, kind: FK_Data_4
    105 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    106 ;
    107 ; X64-AVX-LABEL: test_x86_avx2_packsswb_fold:
    108 ; X64-AVX:       ## %bb.0:
    109 ; X64-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
    110 ; X64-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
    111 ; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI3_0-4, kind: reloc_riprel_4byte
    112 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    113 ;
    114 ; X64-AVX512VL-LABEL: test_x86_avx2_packsswb_fold:
    115 ; X64-AVX512VL:       ## %bb.0:
    116 ; X64-AVX512VL-NEXT:    vmovaps {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0,0,127,127,255,255,128,128,128,0,0,0,0,0,0,0,0]
    117 ; X64-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
    118 ; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI3_0-4, kind: reloc_riprel_4byte
    119 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    120   %res = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)
    121   ret <32 x i8> %res
    122 }
    123 
    124 
    125 define <32 x i8> @test_x86_avx2_packuswb(<16 x i16> %a0, <16 x i16> %a1) {
    126 ; X86-AVX-LABEL: test_x86_avx2_packuswb:
    127 ; X86-AVX:       ## %bb.0:
    128 ; X86-AVX-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x67,0xc1]
    129 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    130 ;
    131 ; X86-AVX512VL-LABEL: test_x86_avx2_packuswb:
    132 ; X86-AVX512VL:       ## %bb.0:
    133 ; X86-AVX512VL-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1]
    134 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    135 ;
    136 ; X64-AVX-LABEL: test_x86_avx2_packuswb:
    137 ; X64-AVX:       ## %bb.0:
    138 ; X64-AVX-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x67,0xc1]
    139 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    140 ;
    141 ; X64-AVX512VL-LABEL: test_x86_avx2_packuswb:
    142 ; X64-AVX512VL:       ## %bb.0:
    143 ; X64-AVX512VL-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1]
    144 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    145   %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %a0, <16 x i16> %a1) ; <<32 x i8>> [#uses=1]
    146   ret <32 x i8> %res
    147 }
    148 declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
    149 
    150 
    151 define <32 x i8> @test_x86_avx2_packuswb_fold() {
    152 ; X86-AVX-LABEL: test_x86_avx2_packuswb_fold:
    153 ; X86-AVX:       ## %bb.0:
    154 ; X86-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
    155 ; X86-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
    156 ; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI5_0, kind: FK_Data_4
    157 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    158 ;
    159 ; X86-AVX512VL-LABEL: test_x86_avx2_packuswb_fold:
    160 ; X86-AVX512VL:       ## %bb.0:
    161 ; X86-AVX512VL-NEXT:    vmovaps LCPI5_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
    162 ; X86-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
    163 ; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI5_0, kind: FK_Data_4
    164 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    165 ;
    166 ; X64-AVX-LABEL: test_x86_avx2_packuswb_fold:
    167 ; X64-AVX:       ## %bb.0:
    168 ; X64-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
    169 ; X64-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
    170 ; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI5_0-4, kind: reloc_riprel_4byte
    171 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    172 ;
    173 ; X64-AVX512VL-LABEL: test_x86_avx2_packuswb_fold:
    174 ; X64-AVX512VL:       ## %bb.0:
    175 ; X64-AVX512VL-NEXT:    vmovaps {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0,0,255,255,0,0,0,0,0,0,0,0,0,0,0,0,0]
    176 ; X64-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
    177 ; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI5_0-4, kind: reloc_riprel_4byte
    178 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    179   %res = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> <i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678, i16 0, i16 255, i16 256, i16 65535, i16 -1, i16 -255, i16 -256, i16 -32678>, <16 x i16> zeroinitializer)
    180   ret <32 x i8> %res
    181 }
    182 
    183 
    184 define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) {
    185 ; X86-AVX-LABEL: test_x86_avx2_padds_b:
    186 ; X86-AVX:       ## %bb.0:
    187 ; X86-AVX-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xec,0xc1]
    188 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    189 ;
    190 ; X86-AVX512VL-LABEL: test_x86_avx2_padds_b:
    191 ; X86-AVX512VL:       ## %bb.0:
    192 ; X86-AVX512VL-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1]
    193 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    194 ;
    195 ; X64-AVX-LABEL: test_x86_avx2_padds_b:
    196 ; X64-AVX:       ## %bb.0:
    197 ; X64-AVX-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xec,0xc1]
    198 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    199 ;
    200 ; X64-AVX512VL-LABEL: test_x86_avx2_padds_b:
    201 ; X64-AVX512VL:       ## %bb.0:
    202 ; X64-AVX512VL-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1]
    203 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    204   %res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
    205   ret <32 x i8> %res
    206 }
    207 declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone
    208 
    209 
    210 define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) {
    211 ; X86-AVX-LABEL: test_x86_avx2_padds_w:
    212 ; X86-AVX:       ## %bb.0:
    213 ; X86-AVX-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xed,0xc1]
    214 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    215 ;
    216 ; X86-AVX512VL-LABEL: test_x86_avx2_padds_w:
    217 ; X86-AVX512VL:       ## %bb.0:
    218 ; X86-AVX512VL-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1]
    219 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    220 ;
    221 ; X64-AVX-LABEL: test_x86_avx2_padds_w:
    222 ; X64-AVX:       ## %bb.0:
    223 ; X64-AVX-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xed,0xc1]
    224 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    225 ;
    226 ; X64-AVX512VL-LABEL: test_x86_avx2_padds_w:
    227 ; X64-AVX512VL:       ## %bb.0:
    228 ; X64-AVX512VL-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1]
    229 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    230   %res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
    231   ret <16 x i16> %res
    232 }
    233 declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone
    234 
    235 
    236 define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) {
    237 ; X86-AVX-LABEL: test_x86_avx2_paddus_b:
    238 ; X86-AVX:       ## %bb.0:
    239 ; X86-AVX-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xdc,0xc1]
    240 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    241 ;
    242 ; X86-AVX512VL-LABEL: test_x86_avx2_paddus_b:
    243 ; X86-AVX512VL:       ## %bb.0:
    244 ; X86-AVX512VL-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1]
    245 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    246 ;
    247 ; X64-AVX-LABEL: test_x86_avx2_paddus_b:
    248 ; X64-AVX:       ## %bb.0:
    249 ; X64-AVX-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xdc,0xc1]
    250 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    251 ;
    252 ; X64-AVX512VL-LABEL: test_x86_avx2_paddus_b:
    253 ; X64-AVX512VL:       ## %bb.0:
    254 ; X64-AVX512VL-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdc,0xc1]
    255 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    256   %res = call <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
    257   ret <32 x i8> %res
    258 }
    259 declare <32 x i8> @llvm.x86.avx2.paddus.b(<32 x i8>, <32 x i8>) nounwind readnone
    260 
    261 
    262 define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) {
    263 ; X86-AVX-LABEL: test_x86_avx2_paddus_w:
    264 ; X86-AVX:       ## %bb.0:
    265 ; X86-AVX-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xdd,0xc1]
    266 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    267 ;
    268 ; X86-AVX512VL-LABEL: test_x86_avx2_paddus_w:
    269 ; X86-AVX512VL:       ## %bb.0:
    270 ; X86-AVX512VL-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1]
    271 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    272 ;
    273 ; X64-AVX-LABEL: test_x86_avx2_paddus_w:
    274 ; X64-AVX:       ## %bb.0:
    275 ; X64-AVX-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xdd,0xc1]
    276 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    277 ;
    278 ; X64-AVX512VL-LABEL: test_x86_avx2_paddus_w:
    279 ; X64-AVX512VL:       ## %bb.0:
    280 ; X64-AVX512VL-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xdd,0xc1]
    281 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    282   %res = call <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
    283   ret <16 x i16> %res
    284 }
    285 declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone
    286 
    287 
    288 define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) {
    289 ; X86-AVX-LABEL: test_x86_avx2_pmadd_wd:
    290 ; X86-AVX:       ## %bb.0:
    291 ; X86-AVX-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf5,0xc1]
    292 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    293 ;
    294 ; X86-AVX512VL-LABEL: test_x86_avx2_pmadd_wd:
    295 ; X86-AVX512VL:       ## %bb.0:
    296 ; X86-AVX512VL-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xc1]
    297 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    298 ;
    299 ; X64-AVX-LABEL: test_x86_avx2_pmadd_wd:
    300 ; X64-AVX:       ## %bb.0:
    301 ; X64-AVX-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf5,0xc1]
    302 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    303 ;
    304 ; X64-AVX512VL-LABEL: test_x86_avx2_pmadd_wd:
    305 ; X64-AVX512VL:       ## %bb.0:
    306 ; X64-AVX512VL-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xc1]
    307 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    308   %res = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> %a1) ; <<8 x i32>> [#uses=1]
    309   ret <8 x i32> %res
    310 }
    311 declare <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16>, <16 x i16>) nounwind readnone
    312 
    313 
    314 define <16 x i16> @test_x86_avx2_pmaxs_w(<16 x i16> %a0, <16 x i16> %a1) {
    315 ; X86-AVX-LABEL: test_x86_avx2_pmaxs_w:
    316 ; X86-AVX:       ## %bb.0:
    317 ; X86-AVX-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xee,0xc1]
    318 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    319 ;
    320 ; X86-AVX512VL-LABEL: test_x86_avx2_pmaxs_w:
    321 ; X86-AVX512VL:       ## %bb.0:
    322 ; X86-AVX512VL-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xee,0xc1]
    323 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    324 ;
    325 ; X64-AVX-LABEL: test_x86_avx2_pmaxs_w:
    326 ; X64-AVX:       ## %bb.0:
    327 ; X64-AVX-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xee,0xc1]
    328 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    329 ;
    330 ; X64-AVX512VL-LABEL: test_x86_avx2_pmaxs_w:
    331 ; X64-AVX512VL:       ## %bb.0:
    332 ; X64-AVX512VL-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xee,0xc1]
    333 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    334   %res = call <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
    335   ret <16 x i16> %res
    336 }
    337 declare <16 x i16> @llvm.x86.avx2.pmaxs.w(<16 x i16>, <16 x i16>) nounwind readnone
    338 
    339 
    340 define <32 x i8> @test_x86_avx2_pmaxu_b(<32 x i8> %a0, <32 x i8> %a1) {
    341 ; X86-AVX-LABEL: test_x86_avx2_pmaxu_b:
    342 ; X86-AVX:       ## %bb.0:
    343 ; X86-AVX-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xde,0xc1]
    344 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    345 ;
    346 ; X86-AVX512VL-LABEL: test_x86_avx2_pmaxu_b:
    347 ; X86-AVX512VL:       ## %bb.0:
    348 ; X86-AVX512VL-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xde,0xc1]
    349 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    350 ;
    351 ; X64-AVX-LABEL: test_x86_avx2_pmaxu_b:
    352 ; X64-AVX:       ## %bb.0:
    353 ; X64-AVX-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xde,0xc1]
    354 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    355 ;
    356 ; X64-AVX512VL-LABEL: test_x86_avx2_pmaxu_b:
    357 ; X64-AVX512VL:       ## %bb.0:
    358 ; X64-AVX512VL-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xde,0xc1]
    359 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    360   %res = call <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
    361   ret <32 x i8> %res
    362 }
    363 declare <32 x i8> @llvm.x86.avx2.pmaxu.b(<32 x i8>, <32 x i8>) nounwind readnone
    364 
    365 
    366 define <16 x i16> @test_x86_avx2_pmins_w(<16 x i16> %a0, <16 x i16> %a1) {
    367 ; X86-AVX-LABEL: test_x86_avx2_pmins_w:
    368 ; X86-AVX:       ## %bb.0:
    369 ; X86-AVX-NEXT:    vpminsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xea,0xc1]
    370 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    371 ;
    372 ; X86-AVX512VL-LABEL: test_x86_avx2_pmins_w:
    373 ; X86-AVX512VL:       ## %bb.0:
    374 ; X86-AVX512VL-NEXT:    vpminsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xea,0xc1]
    375 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    376 ;
    377 ; X64-AVX-LABEL: test_x86_avx2_pmins_w:
    378 ; X64-AVX:       ## %bb.0:
    379 ; X64-AVX-NEXT:    vpminsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xea,0xc1]
    380 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    381 ;
    382 ; X64-AVX512VL-LABEL: test_x86_avx2_pmins_w:
    383 ; X64-AVX512VL:       ## %bb.0:
    384 ; X64-AVX512VL-NEXT:    vpminsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xea,0xc1]
    385 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    386   %res = call <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
    387   ret <16 x i16> %res
    388 }
    389 declare <16 x i16> @llvm.x86.avx2.pmins.w(<16 x i16>, <16 x i16>) nounwind readnone
    390 
    391 
    392 define <32 x i8> @test_x86_avx2_pminu_b(<32 x i8> %a0, <32 x i8> %a1) {
    393 ; X86-AVX-LABEL: test_x86_avx2_pminu_b:
    394 ; X86-AVX:       ## %bb.0:
    395 ; X86-AVX-NEXT:    vpminub %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xda,0xc1]
    396 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    397 ;
    398 ; X86-AVX512VL-LABEL: test_x86_avx2_pminu_b:
    399 ; X86-AVX512VL:       ## %bb.0:
    400 ; X86-AVX512VL-NEXT:    vpminub %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xda,0xc1]
    401 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    402 ;
    403 ; X64-AVX-LABEL: test_x86_avx2_pminu_b:
    404 ; X64-AVX:       ## %bb.0:
    405 ; X64-AVX-NEXT:    vpminub %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xda,0xc1]
    406 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    407 ;
    408 ; X64-AVX512VL-LABEL: test_x86_avx2_pminu_b:
    409 ; X64-AVX512VL:       ## %bb.0:
    410 ; X64-AVX512VL-NEXT:    vpminub %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xda,0xc1]
    411 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    412   %res = call <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
    413   ret <32 x i8> %res
    414 }
    415 declare <32 x i8> @llvm.x86.avx2.pminu.b(<32 x i8>, <32 x i8>) nounwind readnone
    416 
    417 
    418 define i32 @test_x86_avx2_pmovmskb(<32 x i8> %a0) {
    419 ; X86-LABEL: test_x86_avx2_pmovmskb:
    420 ; X86:       ## %bb.0:
    421 ; X86-NEXT:    vpmovmskb %ymm0, %eax ## encoding: [0xc5,0xfd,0xd7,0xc0]
    422 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    423 ; X86-NEXT:    retl ## encoding: [0xc3]
    424 ;
    425 ; X64-LABEL: test_x86_avx2_pmovmskb:
    426 ; X64:       ## %bb.0:
    427 ; X64-NEXT:    vpmovmskb %ymm0, %eax ## encoding: [0xc5,0xfd,0xd7,0xc0]
    428 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    429 ; X64-NEXT:    retq ## encoding: [0xc3]
    430   %res = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %a0) ; <i32> [#uses=1]
    431   ret i32 %res
    432 }
    433 declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>) nounwind readnone
    434 
    435 
    436 define <16 x i16> @test_x86_avx2_pmulh_w(<16 x i16> %a0, <16 x i16> %a1) {
    437 ; X86-AVX-LABEL: test_x86_avx2_pmulh_w:
    438 ; X86-AVX:       ## %bb.0:
    439 ; X86-AVX-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe5,0xc1]
    440 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    441 ;
    442 ; X86-AVX512VL-LABEL: test_x86_avx2_pmulh_w:
    443 ; X86-AVX512VL:       ## %bb.0:
    444 ; X86-AVX512VL-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xc1]
    445 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    446 ;
    447 ; X64-AVX-LABEL: test_x86_avx2_pmulh_w:
    448 ; X64-AVX:       ## %bb.0:
    449 ; X64-AVX-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe5,0xc1]
    450 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    451 ;
    452 ; X64-AVX512VL-LABEL: test_x86_avx2_pmulh_w:
    453 ; X64-AVX512VL:       ## %bb.0:
    454 ; X64-AVX512VL-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xc1]
    455 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    456   %res = call <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
    457   ret <16 x i16> %res
    458 }
    459 declare <16 x i16> @llvm.x86.avx2.pmulh.w(<16 x i16>, <16 x i16>) nounwind readnone
    460 
    461 
    462 define <16 x i16> @test_x86_avx2_pmulhu_w(<16 x i16> %a0, <16 x i16> %a1) {
    463 ; X86-AVX-LABEL: test_x86_avx2_pmulhu_w:
    464 ; X86-AVX:       ## %bb.0:
    465 ; X86-AVX-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe4,0xc1]
    466 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    467 ;
    468 ; X86-AVX512VL-LABEL: test_x86_avx2_pmulhu_w:
    469 ; X86-AVX512VL:       ## %bb.0:
    470 ; X86-AVX512VL-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1]
    471 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    472 ;
    473 ; X64-AVX-LABEL: test_x86_avx2_pmulhu_w:
    474 ; X64-AVX:       ## %bb.0:
    475 ; X64-AVX-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe4,0xc1]
    476 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    477 ;
    478 ; X64-AVX512VL-LABEL: test_x86_avx2_pmulhu_w:
    479 ; X64-AVX512VL:       ## %bb.0:
    480 ; X64-AVX512VL-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xc1]
    481 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    482   %res = call <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
    483   ret <16 x i16> %res
    484 }
    485 declare <16 x i16> @llvm.x86.avx2.pmulhu.w(<16 x i16>, <16 x i16>) nounwind readnone
    486 
    487 
    488 define <4 x i64> @test_x86_avx2_psad_bw(<32 x i8> %a0, <32 x i8> %a1) {
    489 ; X86-AVX-LABEL: test_x86_avx2_psad_bw:
    490 ; X86-AVX:       ## %bb.0:
    491 ; X86-AVX-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf6,0xc1]
    492 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    493 ;
    494 ; X86-AVX512VL-LABEL: test_x86_avx2_psad_bw:
    495 ; X86-AVX512VL:       ## %bb.0:
    496 ; X86-AVX512VL-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf6,0xc1]
    497 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    498 ;
    499 ; X64-AVX-LABEL: test_x86_avx2_psad_bw:
    500 ; X64-AVX:       ## %bb.0:
    501 ; X64-AVX-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf6,0xc1]
    502 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    503 ;
    504 ; X64-AVX512VL-LABEL: test_x86_avx2_psad_bw:
    505 ; X64-AVX512VL:       ## %bb.0:
    506 ; X64-AVX512VL-NEXT:    vpsadbw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf6,0xc1]
    507 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    508   %res = call <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8> %a0, <32 x i8> %a1) ; <<4 x i64>> [#uses=1]
    509   ret <4 x i64> %res
    510 }
    511 declare <4 x i64> @llvm.x86.avx2.psad.bw(<32 x i8>, <32 x i8>) nounwind readnone
    512 
    513 
    514 define <8 x i32> @test_x86_avx2_psll_d(<8 x i32> %a0, <4 x i32> %a1) {
    515 ; X86-AVX-LABEL: test_x86_avx2_psll_d:
    516 ; X86-AVX:       ## %bb.0:
    517 ; X86-AVX-NEXT:    vpslld %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf2,0xc1]
    518 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    519 ;
    520 ; X86-AVX512VL-LABEL: test_x86_avx2_psll_d:
    521 ; X86-AVX512VL:       ## %bb.0:
    522 ; X86-AVX512VL-NEXT:    vpslld %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xc1]
    523 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    524 ;
    525 ; X64-AVX-LABEL: test_x86_avx2_psll_d:
    526 ; X64-AVX:       ## %bb.0:
    527 ; X64-AVX-NEXT:    vpslld %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf2,0xc1]
    528 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    529 ;
    530 ; X64-AVX512VL-LABEL: test_x86_avx2_psll_d:
    531 ; X64-AVX512VL:       ## %bb.0:
    532 ; X64-AVX512VL-NEXT:    vpslld %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xc1]
    533 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    534   %res = call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
    535   ret <8 x i32> %res
    536 }
    537 declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) nounwind readnone
    538 
    539 
    540 define <4 x i64> @test_x86_avx2_psll_q(<4 x i64> %a0, <2 x i64> %a1) {
    541 ; X86-AVX-LABEL: test_x86_avx2_psll_q:
    542 ; X86-AVX:       ## %bb.0:
    543 ; X86-AVX-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf3,0xc1]
    544 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    545 ;
    546 ; X86-AVX512VL-LABEL: test_x86_avx2_psll_q:
    547 ; X86-AVX512VL:       ## %bb.0:
    548 ; X86-AVX512VL-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xc1]
    549 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    550 ;
    551 ; X64-AVX-LABEL: test_x86_avx2_psll_q:
    552 ; X64-AVX:       ## %bb.0:
    553 ; X64-AVX-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf3,0xc1]
    554 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    555 ;
    556 ; X64-AVX512VL-LABEL: test_x86_avx2_psll_q:
    557 ; X64-AVX512VL:       ## %bb.0:
    558 ; X64-AVX512VL-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xc1]
    559 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    560   %res = call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
    561   ret <4 x i64> %res
    562 }
    563 declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) nounwind readnone
    564 
    565 
    566 define <16 x i16> @test_x86_avx2_psll_w(<16 x i16> %a0, <8 x i16> %a1) {
    567 ; X86-AVX-LABEL: test_x86_avx2_psll_w:
    568 ; X86-AVX:       ## %bb.0:
    569 ; X86-AVX-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf1,0xc1]
    570 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    571 ;
    572 ; X86-AVX512VL-LABEL: test_x86_avx2_psll_w:
    573 ; X86-AVX512VL:       ## %bb.0:
    574 ; X86-AVX512VL-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xc1]
    575 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    576 ;
    577 ; X64-AVX-LABEL: test_x86_avx2_psll_w:
    578 ; X64-AVX:       ## %bb.0:
    579 ; X64-AVX-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xf1,0xc1]
    580 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    581 ;
    582 ; X64-AVX512VL-LABEL: test_x86_avx2_psll_w:
    583 ; X64-AVX512VL:       ## %bb.0:
    584 ; X64-AVX512VL-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xc1]
    585 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    586   %res = call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
    587   ret <16 x i16> %res
    588 }
    589 declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) nounwind readnone
    590 
    591 
    592 define <8 x i32> @test_x86_avx2_pslli_d(<8 x i32> %a0) {
    593 ; X86-AVX-LABEL: test_x86_avx2_pslli_d:
    594 ; X86-AVX:       ## %bb.0:
    595 ; X86-AVX-NEXT:    vpslld $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x72,0xf0,0x07]
    596 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    597 ;
    598 ; X86-AVX512VL-LABEL: test_x86_avx2_pslli_d:
    599 ; X86-AVX512VL:       ## %bb.0:
    600 ; X86-AVX512VL-NEXT:    vpslld $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xf0,0x07]
    601 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    602 ;
    603 ; X64-AVX-LABEL: test_x86_avx2_pslli_d:
    604 ; X64-AVX:       ## %bb.0:
    605 ; X64-AVX-NEXT:    vpslld $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x72,0xf0,0x07]
    606 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    607 ;
    608 ; X64-AVX512VL-LABEL: test_x86_avx2_pslli_d:
    609 ; X64-AVX512VL:       ## %bb.0:
    610 ; X64-AVX512VL-NEXT:    vpslld $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xf0,0x07]
    611 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    612   %res = call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
    613   ret <8 x i32> %res
    614 }
    615 declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) nounwind readnone
    616 
    617 
    618 define <4 x i64> @test_x86_avx2_pslli_q(<4 x i64> %a0) {
    619 ; X86-AVX-LABEL: test_x86_avx2_pslli_q:
    620 ; X86-AVX:       ## %bb.0:
    621 ; X86-AVX-NEXT:    vpsllq $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x73,0xf0,0x07]
    622 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    623 ;
    624 ; X86-AVX512VL-LABEL: test_x86_avx2_pslli_q:
    625 ; X86-AVX512VL:       ## %bb.0:
    626 ; X86-AVX512VL-NEXT:    vpsllq $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xf0,0x07]
    627 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    628 ;
    629 ; X64-AVX-LABEL: test_x86_avx2_pslli_q:
    630 ; X64-AVX:       ## %bb.0:
    631 ; X64-AVX-NEXT:    vpsllq $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x73,0xf0,0x07]
    632 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    633 ;
    634 ; X64-AVX512VL-LABEL: test_x86_avx2_pslli_q:
    635 ; X64-AVX512VL:       ## %bb.0:
    636 ; X64-AVX512VL-NEXT:    vpsllq $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xf0,0x07]
    637 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    638   %res = call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
    639   ret <4 x i64> %res
    640 }
    641 declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) nounwind readnone
    642 
    643 
    644 define <16 x i16> @test_x86_avx2_pslli_w(<16 x i16> %a0) {
    645 ; X86-AVX-LABEL: test_x86_avx2_pslli_w:
    646 ; X86-AVX:       ## %bb.0:
    647 ; X86-AVX-NEXT:    vpsllw $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x71,0xf0,0x07]
    648 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    649 ;
    650 ; X86-AVX512VL-LABEL: test_x86_avx2_pslli_w:
    651 ; X86-AVX512VL:       ## %bb.0:
    652 ; X86-AVX512VL-NEXT:    vpsllw $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x07]
    653 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    654 ;
    655 ; X64-AVX-LABEL: test_x86_avx2_pslli_w:
    656 ; X64-AVX:       ## %bb.0:
    657 ; X64-AVX-NEXT:    vpsllw $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x71,0xf0,0x07]
    658 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    659 ;
    660 ; X64-AVX512VL-LABEL: test_x86_avx2_pslli_w:
    661 ; X64-AVX512VL:       ## %bb.0:
    662 ; X64-AVX512VL-NEXT:    vpsllw $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xf0,0x07]
    663 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    664   %res = call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
    665   ret <16 x i16> %res
    666 }
    667 declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) nounwind readnone
    668 
    669 
    670 define <8 x i32> @test_x86_avx2_psra_d(<8 x i32> %a0, <4 x i32> %a1) {
    671 ; X86-AVX-LABEL: test_x86_avx2_psra_d:
    672 ; X86-AVX:       ## %bb.0:
    673 ; X86-AVX-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe2,0xc1]
    674 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    675 ;
    676 ; X86-AVX512VL-LABEL: test_x86_avx2_psra_d:
    677 ; X86-AVX512VL:       ## %bb.0:
    678 ; X86-AVX512VL-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xc1]
    679 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    680 ;
    681 ; X64-AVX-LABEL: test_x86_avx2_psra_d:
    682 ; X64-AVX:       ## %bb.0:
    683 ; X64-AVX-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe2,0xc1]
    684 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    685 ;
    686 ; X64-AVX512VL-LABEL: test_x86_avx2_psra_d:
    687 ; X64-AVX512VL:       ## %bb.0:
    688 ; X64-AVX512VL-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xc1]
    689 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    690   %res = call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
    691   ret <8 x i32> %res
    692 }
    693 declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) nounwind readnone
    694 
    695 
    696 define <16 x i16> @test_x86_avx2_psra_w(<16 x i16> %a0, <8 x i16> %a1) {
    697 ; X86-AVX-LABEL: test_x86_avx2_psra_w:
    698 ; X86-AVX:       ## %bb.0:
    699 ; X86-AVX-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe1,0xc1]
    700 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    701 ;
    702 ; X86-AVX512VL-LABEL: test_x86_avx2_psra_w:
    703 ; X86-AVX512VL:       ## %bb.0:
    704 ; X86-AVX512VL-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xc1]
    705 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    706 ;
    707 ; X64-AVX-LABEL: test_x86_avx2_psra_w:
    708 ; X64-AVX:       ## %bb.0:
    709 ; X64-AVX-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe1,0xc1]
    710 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    711 ;
    712 ; X64-AVX512VL-LABEL: test_x86_avx2_psra_w:
    713 ; X64-AVX512VL:       ## %bb.0:
    714 ; X64-AVX512VL-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xc1]
    715 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    716   %res = call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
    717   ret <16 x i16> %res
    718 }
    719 declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) nounwind readnone
    720 
    721 
    722 define <8 x i32> @test_x86_avx2_psrai_d(<8 x i32> %a0) {
    723 ; X86-AVX-LABEL: test_x86_avx2_psrai_d:
    724 ; X86-AVX:       ## %bb.0:
    725 ; X86-AVX-NEXT:    vpsrad $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x72,0xe0,0x07]
    726 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    727 ;
    728 ; X86-AVX512VL-LABEL: test_x86_avx2_psrai_d:
    729 ; X86-AVX512VL:       ## %bb.0:
    730 ; X86-AVX512VL-NEXT:    vpsrad $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xe0,0x07]
    731 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    732 ;
    733 ; X64-AVX-LABEL: test_x86_avx2_psrai_d:
    734 ; X64-AVX:       ## %bb.0:
    735 ; X64-AVX-NEXT:    vpsrad $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x72,0xe0,0x07]
    736 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    737 ;
    738 ; X64-AVX512VL-LABEL: test_x86_avx2_psrai_d:
    739 ; X64-AVX512VL:       ## %bb.0:
    740 ; X64-AVX512VL-NEXT:    vpsrad $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xe0,0x07]
    741 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    742   %res = call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
    743   ret <8 x i32> %res
    744 }
    745 declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) nounwind readnone
    746 
    747 
    748 define <16 x i16> @test_x86_avx2_psrai_w(<16 x i16> %a0) {
    749 ; X86-AVX-LABEL: test_x86_avx2_psrai_w:
    750 ; X86-AVX:       ## %bb.0:
    751 ; X86-AVX-NEXT:    vpsraw $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x71,0xe0,0x07]
    752 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    753 ;
    754 ; X86-AVX512VL-LABEL: test_x86_avx2_psrai_w:
    755 ; X86-AVX512VL:       ## %bb.0:
    756 ; X86-AVX512VL-NEXT:    vpsraw $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x07]
    757 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    758 ;
    759 ; X64-AVX-LABEL: test_x86_avx2_psrai_w:
    760 ; X64-AVX:       ## %bb.0:
    761 ; X64-AVX-NEXT:    vpsraw $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x71,0xe0,0x07]
    762 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    763 ;
    764 ; X64-AVX512VL-LABEL: test_x86_avx2_psrai_w:
    765 ; X64-AVX512VL:       ## %bb.0:
    766 ; X64-AVX512VL-NEXT:    vpsraw $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xe0,0x07]
    767 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    768   %res = call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
    769   ret <16 x i16> %res
    770 }
    771 declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) nounwind readnone
    772 
    773 
    774 define <8 x i32> @test_x86_avx2_psrl_d(<8 x i32> %a0, <4 x i32> %a1) {
    775 ; X86-AVX-LABEL: test_x86_avx2_psrl_d:
    776 ; X86-AVX:       ## %bb.0:
    777 ; X86-AVX-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd2,0xc1]
    778 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    779 ;
    780 ; X86-AVX512VL-LABEL: test_x86_avx2_psrl_d:
    781 ; X86-AVX512VL:       ## %bb.0:
    782 ; X86-AVX512VL-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xc1]
    783 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    784 ;
    785 ; X64-AVX-LABEL: test_x86_avx2_psrl_d:
    786 ; X64-AVX:       ## %bb.0:
    787 ; X64-AVX-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd2,0xc1]
    788 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    789 ;
    790 ; X64-AVX512VL-LABEL: test_x86_avx2_psrl_d:
    791 ; X64-AVX512VL:       ## %bb.0:
    792 ; X64-AVX512VL-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xc1]
    793 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    794   %res = call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %a0, <4 x i32> %a1) ; <<8 x i32>> [#uses=1]
    795   ret <8 x i32> %res
    796 }
    797 declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) nounwind readnone
    798 
    799 
    800 define <4 x i64> @test_x86_avx2_psrl_q(<4 x i64> %a0, <2 x i64> %a1) {
    801 ; X86-AVX-LABEL: test_x86_avx2_psrl_q:
    802 ; X86-AVX:       ## %bb.0:
    803 ; X86-AVX-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd3,0xc1]
    804 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    805 ;
    806 ; X86-AVX512VL-LABEL: test_x86_avx2_psrl_q:
    807 ; X86-AVX512VL:       ## %bb.0:
    808 ; X86-AVX512VL-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xc1]
    809 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    810 ;
    811 ; X64-AVX-LABEL: test_x86_avx2_psrl_q:
    812 ; X64-AVX:       ## %bb.0:
    813 ; X64-AVX-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd3,0xc1]
    814 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    815 ;
    816 ; X64-AVX512VL-LABEL: test_x86_avx2_psrl_q:
    817 ; X64-AVX512VL:       ## %bb.0:
    818 ; X64-AVX512VL-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xc1]
    819 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    820   %res = call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %a0, <2 x i64> %a1) ; <<4 x i64>> [#uses=1]
    821   ret <4 x i64> %res
    822 }
    823 declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) nounwind readnone
    824 
    825 
    826 define <16 x i16> @test_x86_avx2_psrl_w(<16 x i16> %a0, <8 x i16> %a1) {
    827 ; X86-AVX-LABEL: test_x86_avx2_psrl_w:
    828 ; X86-AVX:       ## %bb.0:
    829 ; X86-AVX-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd1,0xc1]
    830 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    831 ;
    832 ; X86-AVX512VL-LABEL: test_x86_avx2_psrl_w:
    833 ; X86-AVX512VL:       ## %bb.0:
    834 ; X86-AVX512VL-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xc1]
    835 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    836 ;
    837 ; X64-AVX-LABEL: test_x86_avx2_psrl_w:
    838 ; X64-AVX:       ## %bb.0:
    839 ; X64-AVX-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd1,0xc1]
    840 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    841 ;
    842 ; X64-AVX512VL-LABEL: test_x86_avx2_psrl_w:
    843 ; X64-AVX512VL:       ## %bb.0:
    844 ; X64-AVX512VL-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xc1]
    845 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    846   %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1]
    847   ret <16 x i16> %res
    848 }
    849 declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) nounwind readnone
    850 
    851 
    852 define <8 x i32> @test_x86_avx2_psrli_d(<8 x i32> %a0) {
    853 ; X86-AVX-LABEL: test_x86_avx2_psrli_d:
    854 ; X86-AVX:       ## %bb.0:
    855 ; X86-AVX-NEXT:    vpsrld $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x72,0xd0,0x07]
    856 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    857 ;
    858 ; X86-AVX512VL-LABEL: test_x86_avx2_psrli_d:
    859 ; X86-AVX512VL:       ## %bb.0:
    860 ; X86-AVX512VL-NEXT:    vpsrld $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xd0,0x07]
    861 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    862 ;
    863 ; X64-AVX-LABEL: test_x86_avx2_psrli_d:
    864 ; X64-AVX:       ## %bb.0:
    865 ; X64-AVX-NEXT:    vpsrld $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x72,0xd0,0x07]
    866 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    867 ;
    868 ; X64-AVX512VL-LABEL: test_x86_avx2_psrli_d:
    869 ; X64-AVX512VL:       ## %bb.0:
    870 ; X64-AVX512VL-NEXT:    vpsrld $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xd0,0x07]
    871 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    872   %res = call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %a0, i32 7) ; <<8 x i32>> [#uses=1]
    873   ret <8 x i32> %res
    874 }
    875 declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) nounwind readnone
    876 
    877 
    878 define <4 x i64> @test_x86_avx2_psrli_q(<4 x i64> %a0) {
    879 ; X86-AVX-LABEL: test_x86_avx2_psrli_q:
    880 ; X86-AVX:       ## %bb.0:
    881 ; X86-AVX-NEXT:    vpsrlq $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x73,0xd0,0x07]
    882 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    883 ;
    884 ; X86-AVX512VL-LABEL: test_x86_avx2_psrli_q:
    885 ; X86-AVX512VL:       ## %bb.0:
    886 ; X86-AVX512VL-NEXT:    vpsrlq $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xd0,0x07]
    887 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    888 ;
    889 ; X64-AVX-LABEL: test_x86_avx2_psrli_q:
    890 ; X64-AVX:       ## %bb.0:
    891 ; X64-AVX-NEXT:    vpsrlq $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x73,0xd0,0x07]
    892 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    893 ;
    894 ; X64-AVX512VL-LABEL: test_x86_avx2_psrli_q:
    895 ; X64-AVX512VL:       ## %bb.0:
    896 ; X64-AVX512VL-NEXT:    vpsrlq $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x73,0xd0,0x07]
    897 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    898   %res = call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %a0, i32 7) ; <<4 x i64>> [#uses=1]
    899   ret <4 x i64> %res
    900 }
    901 declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) nounwind readnone
    902 
    903 
    904 define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) {
    905 ; X86-AVX-LABEL: test_x86_avx2_psrli_w:
    906 ; X86-AVX:       ## %bb.0:
    907 ; X86-AVX-NEXT:    vpsrlw $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x71,0xd0,0x07]
    908 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    909 ;
    910 ; X86-AVX512VL-LABEL: test_x86_avx2_psrli_w:
    911 ; X86-AVX512VL:       ## %bb.0:
    912 ; X86-AVX512VL-NEXT:    vpsrlw $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xd0,0x07]
    913 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    914 ;
    915 ; X64-AVX-LABEL: test_x86_avx2_psrli_w:
    916 ; X64-AVX:       ## %bb.0:
    917 ; X64-AVX-NEXT:    vpsrlw $7, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0x71,0xd0,0x07]
    918 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    919 ;
    920 ; X64-AVX512VL-LABEL: test_x86_avx2_psrli_w:
    921 ; X64-AVX512VL:       ## %bb.0:
    922 ; X64-AVX512VL-NEXT:    vpsrlw $7, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xd0,0x07]
    923 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    924   %res = call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %a0, i32 7) ; <<16 x i16>> [#uses=1]
    925   ret <16 x i16> %res
    926 }
    927 declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone
    928 
    929 
    930 define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) {
    931 ; X86-AVX-LABEL: test_x86_avx2_psubs_b:
    932 ; X86-AVX:       ## %bb.0:
    933 ; X86-AVX-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe8,0xc1]
    934 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    935 ;
    936 ; X86-AVX512VL-LABEL: test_x86_avx2_psubs_b:
    937 ; X86-AVX512VL:       ## %bb.0:
    938 ; X86-AVX512VL-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1]
    939 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    940 ;
    941 ; X64-AVX-LABEL: test_x86_avx2_psubs_b:
    942 ; X64-AVX:       ## %bb.0:
    943 ; X64-AVX-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe8,0xc1]
    944 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    945 ;
    946 ; X64-AVX512VL-LABEL: test_x86_avx2_psubs_b:
    947 ; X64-AVX512VL:       ## %bb.0:
    948 ; X64-AVX512VL-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1]
    949 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    950   %res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
    951   ret <32 x i8> %res
    952 }
    953 declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone
    954 
    955 
    956 define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) {
    957 ; X86-AVX-LABEL: test_x86_avx2_psubs_w:
    958 ; X86-AVX:       ## %bb.0:
    959 ; X86-AVX-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe9,0xc1]
    960 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    961 ;
    962 ; X86-AVX512VL-LABEL: test_x86_avx2_psubs_w:
    963 ; X86-AVX512VL:       ## %bb.0:
    964 ; X86-AVX512VL-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1]
    965 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    966 ;
    967 ; X64-AVX-LABEL: test_x86_avx2_psubs_w:
    968 ; X64-AVX:       ## %bb.0:
    969 ; X64-AVX-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe9,0xc1]
    970 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    971 ;
    972 ; X64-AVX512VL-LABEL: test_x86_avx2_psubs_w:
    973 ; X64-AVX512VL:       ## %bb.0:
    974 ; X64-AVX512VL-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1]
    975 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
    976   %res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
    977   ret <16 x i16> %res
    978 }
    979 declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone
    980 
    981 
    982 define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) {
    983 ; X86-AVX-LABEL: test_x86_avx2_psubus_b:
    984 ; X86-AVX:       ## %bb.0:
    985 ; X86-AVX-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd8,0xc1]
    986 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
    987 ;
    988 ; X86-AVX512VL-LABEL: test_x86_avx2_psubus_b:
    989 ; X86-AVX512VL:       ## %bb.0:
    990 ; X86-AVX512VL-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1]
    991 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
    992 ;
    993 ; X64-AVX-LABEL: test_x86_avx2_psubus_b:
    994 ; X64-AVX:       ## %bb.0:
    995 ; X64-AVX-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd8,0xc1]
    996 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
    997 ;
    998 ; X64-AVX512VL-LABEL: test_x86_avx2_psubus_b:
    999 ; X64-AVX512VL:       ## %bb.0:
   1000 ; X64-AVX512VL-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd8,0xc1]
   1001 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1002   %res = call <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
   1003   ret <32 x i8> %res
   1004 }
   1005 declare <32 x i8> @llvm.x86.avx2.psubus.b(<32 x i8>, <32 x i8>) nounwind readnone
   1006 
   1007 
   1008 define <16 x i16> @test_x86_avx2_psubus_w(<16 x i16> %a0, <16 x i16> %a1) {
   1009 ; X86-AVX-LABEL: test_x86_avx2_psubus_w:
   1010 ; X86-AVX:       ## %bb.0:
   1011 ; X86-AVX-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd9,0xc1]
   1012 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1013 ;
   1014 ; X86-AVX512VL-LABEL: test_x86_avx2_psubus_w:
   1015 ; X86-AVX512VL:       ## %bb.0:
   1016 ; X86-AVX512VL-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1]
   1017 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1018 ;
   1019 ; X64-AVX-LABEL: test_x86_avx2_psubus_w:
   1020 ; X64-AVX:       ## %bb.0:
   1021 ; X64-AVX-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xd9,0xc1]
   1022 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1023 ;
   1024 ; X64-AVX512VL-LABEL: test_x86_avx2_psubus_w:
   1025 ; X64-AVX512VL:       ## %bb.0:
   1026 ; X64-AVX512VL-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd9,0xc1]
   1027 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1028   %res = call <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
   1029   ret <16 x i16> %res
   1030 }
   1031 declare <16 x i16> @llvm.x86.avx2.psubus.w(<16 x i16>, <16 x i16>) nounwind readnone
   1032 
   1033 define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) {
   1034 ; X86-LABEL: test_x86_avx2_phadd_d:
   1035 ; X86:       ## %bb.0:
   1036 ; X86-NEXT:    vphaddd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x02,0xc1]
   1037 ; X86-NEXT:    retl ## encoding: [0xc3]
   1038 ;
   1039 ; X64-LABEL: test_x86_avx2_phadd_d:
   1040 ; X64:       ## %bb.0:
   1041 ; X64-NEXT:    vphaddd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x02,0xc1]
   1042 ; X64-NEXT:    retq ## encoding: [0xc3]
   1043   %res = call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
   1044   ret <8 x i32> %res
   1045 }
   1046 declare <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32>, <8 x i32>) nounwind readnone
   1047 
   1048 
   1049 define <16 x i16> @test_x86_avx2_phadd_sw(<16 x i16> %a0, <16 x i16> %a1) {
   1050 ; X86-LABEL: test_x86_avx2_phadd_sw:
   1051 ; X86:       ## %bb.0:
   1052 ; X86-NEXT:    vphaddsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x03,0xc1]
   1053 ; X86-NEXT:    retl ## encoding: [0xc3]
   1054 ;
   1055 ; X64-LABEL: test_x86_avx2_phadd_sw:
   1056 ; X64:       ## %bb.0:
   1057 ; X64-NEXT:    vphaddsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x03,0xc1]
   1058 ; X64-NEXT:    retq ## encoding: [0xc3]
   1059   %res = call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
   1060   ret <16 x i16> %res
   1061 }
   1062 declare <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16>, <16 x i16>) nounwind readnone
   1063 
   1064 
   1065 define <16 x i16> @test_x86_avx2_phadd_w(<16 x i16> %a0, <16 x i16> %a1) {
   1066 ; X86-LABEL: test_x86_avx2_phadd_w:
   1067 ; X86:       ## %bb.0:
   1068 ; X86-NEXT:    vphaddw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x01,0xc1]
   1069 ; X86-NEXT:    retl ## encoding: [0xc3]
   1070 ;
   1071 ; X64-LABEL: test_x86_avx2_phadd_w:
   1072 ; X64:       ## %bb.0:
   1073 ; X64-NEXT:    vphaddw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x01,0xc1]
   1074 ; X64-NEXT:    retq ## encoding: [0xc3]
   1075   %res = call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
   1076   ret <16 x i16> %res
   1077 }
   1078 declare <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16>, <16 x i16>) nounwind readnone
   1079 
   1080 
   1081 define <8 x i32> @test_x86_avx2_phsub_d(<8 x i32> %a0, <8 x i32> %a1) {
   1082 ; X86-LABEL: test_x86_avx2_phsub_d:
   1083 ; X86:       ## %bb.0:
   1084 ; X86-NEXT:    vphsubd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x06,0xc1]
   1085 ; X86-NEXT:    retl ## encoding: [0xc3]
   1086 ;
   1087 ; X64-LABEL: test_x86_avx2_phsub_d:
   1088 ; X64:       ## %bb.0:
   1089 ; X64-NEXT:    vphsubd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x06,0xc1]
   1090 ; X64-NEXT:    retq ## encoding: [0xc3]
   1091   %res = call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
   1092   ret <8 x i32> %res
   1093 }
   1094 declare <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32>, <8 x i32>) nounwind readnone
   1095 
   1096 
   1097 define <16 x i16> @test_x86_avx2_phsub_sw(<16 x i16> %a0, <16 x i16> %a1) {
   1098 ; X86-LABEL: test_x86_avx2_phsub_sw:
   1099 ; X86:       ## %bb.0:
   1100 ; X86-NEXT:    vphsubsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x07,0xc1]
   1101 ; X86-NEXT:    retl ## encoding: [0xc3]
   1102 ;
   1103 ; X64-LABEL: test_x86_avx2_phsub_sw:
   1104 ; X64:       ## %bb.0:
   1105 ; X64-NEXT:    vphsubsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x07,0xc1]
   1106 ; X64-NEXT:    retq ## encoding: [0xc3]
   1107   %res = call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
   1108   ret <16 x i16> %res
   1109 }
   1110 declare <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16>, <16 x i16>) nounwind readnone
   1111 
   1112 
   1113 define <16 x i16> @test_x86_avx2_phsub_w(<16 x i16> %a0, <16 x i16> %a1) {
   1114 ; X86-LABEL: test_x86_avx2_phsub_w:
   1115 ; X86:       ## %bb.0:
   1116 ; X86-NEXT:    vphsubw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x05,0xc1]
   1117 ; X86-NEXT:    retl ## encoding: [0xc3]
   1118 ;
   1119 ; X64-LABEL: test_x86_avx2_phsub_w:
   1120 ; X64:       ## %bb.0:
   1121 ; X64-NEXT:    vphsubw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x05,0xc1]
   1122 ; X64-NEXT:    retq ## encoding: [0xc3]
   1123   %res = call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
   1124   ret <16 x i16> %res
   1125 }
   1126 declare <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16>, <16 x i16>) nounwind readnone
   1127 
   1128 
   1129 define <16 x i16> @test_x86_avx2_pmadd_ub_sw(<32 x i8> %a0, <32 x i8> %a1) {
   1130 ; X86-AVX-LABEL: test_x86_avx2_pmadd_ub_sw:
   1131 ; X86-AVX:       ## %bb.0:
   1132 ; X86-AVX-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x04,0xc1]
   1133 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1134 ;
   1135 ; X86-AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw:
   1136 ; X86-AVX512VL:       ## %bb.0:
   1137 ; X86-AVX512VL-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xc1]
   1138 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1139 ;
   1140 ; X64-AVX-LABEL: test_x86_avx2_pmadd_ub_sw:
   1141 ; X64-AVX:       ## %bb.0:
   1142 ; X64-AVX-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x04,0xc1]
   1143 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1144 ;
   1145 ; X64-AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw:
   1146 ; X64-AVX512VL:       ## %bb.0:
   1147 ; X64-AVX512VL-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xc1]
   1148 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1149   %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1]
   1150   ret <16 x i16> %res
   1151 }
   1152 declare <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8>, <32 x i8>) nounwind readnone
   1153 
   1154 ; Make sure we don't commute this operation.
   1155 define <16 x i16> @test_x86_avx2_pmadd_ub_sw_load_op0(<32 x i8>* %ptr, <32 x i8> %a1) {
   1156 ; X86-AVX-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0:
   1157 ; X86-AVX:       ## %bb.0:
   1158 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1159 ; X86-AVX-NEXT:    vmovdqa (%eax), %ymm1 ## encoding: [0xc5,0xfd,0x6f,0x08]
   1160 ; X86-AVX-NEXT:    vpmaddubsw %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0x04,0xc0]
   1161 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1162 ;
   1163 ; X86-AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0:
   1164 ; X86-AVX512VL:       ## %bb.0:
   1165 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1166 ; X86-AVX512VL-NEXT:    vmovdqa (%eax), %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x08]
   1167 ; X86-AVX512VL-NEXT:    vpmaddubsw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x04,0xc0]
   1168 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1169 ;
   1170 ; X64-AVX-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0:
   1171 ; X64-AVX:       ## %bb.0:
   1172 ; X64-AVX-NEXT:    vmovdqa (%rdi), %ymm1 ## encoding: [0xc5,0xfd,0x6f,0x0f]
   1173 ; X64-AVX-NEXT:    vpmaddubsw %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0x04,0xc0]
   1174 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1175 ;
   1176 ; X64-AVX512VL-LABEL: test_x86_avx2_pmadd_ub_sw_load_op0:
   1177 ; X64-AVX512VL:       ## %bb.0:
   1178 ; X64-AVX512VL-NEXT:    vmovdqa (%rdi), %ymm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x0f]
   1179 ; X64-AVX512VL-NEXT:    vpmaddubsw %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x04,0xc0]
   1180 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1181   %a0 = load <32 x i8>, <32 x i8>* %ptr
   1182   %res = call <16 x i16> @llvm.x86.avx2.pmadd.ub.sw(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i16>> [#uses=1]
   1183   ret <16 x i16> %res
   1184 }
   1185 
   1186 define <16 x i16> @test_x86_avx2_pmul_hr_sw(<16 x i16> %a0, <16 x i16> %a1) {
   1187 ; X86-AVX-LABEL: test_x86_avx2_pmul_hr_sw:
   1188 ; X86-AVX:       ## %bb.0:
   1189 ; X86-AVX-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x0b,0xc1]
   1190 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1191 ;
   1192 ; X86-AVX512VL-LABEL: test_x86_avx2_pmul_hr_sw:
   1193 ; X86-AVX512VL:       ## %bb.0:
   1194 ; X86-AVX512VL-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xc1]
   1195 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1196 ;
   1197 ; X64-AVX-LABEL: test_x86_avx2_pmul_hr_sw:
   1198 ; X64-AVX:       ## %bb.0:
   1199 ; X64-AVX-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x0b,0xc1]
   1200 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1201 ;
   1202 ; X64-AVX512VL-LABEL: test_x86_avx2_pmul_hr_sw:
   1203 ; X64-AVX512VL:       ## %bb.0:
   1204 ; X64-AVX512VL-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xc1]
   1205 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1206   %res = call <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
   1207   ret <16 x i16> %res
   1208 }
   1209 declare <16 x i16> @llvm.x86.avx2.pmul.hr.sw(<16 x i16>, <16 x i16>) nounwind readnone
   1210 
   1211 
   1212 define <32 x i8> @test_x86_avx2_pshuf_b(<32 x i8> %a0, <32 x i8> %a1) {
   1213 ; X86-AVX-LABEL: test_x86_avx2_pshuf_b:
   1214 ; X86-AVX:       ## %bb.0:
   1215 ; X86-AVX-NEXT:    vpshufb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x00,0xc1]
   1216 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1217 ;
   1218 ; X86-AVX512VL-LABEL: test_x86_avx2_pshuf_b:
   1219 ; X86-AVX512VL:       ## %bb.0:
   1220 ; X86-AVX512VL-NEXT:    vpshufb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xc1]
   1221 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1222 ;
   1223 ; X64-AVX-LABEL: test_x86_avx2_pshuf_b:
   1224 ; X64-AVX:       ## %bb.0:
   1225 ; X64-AVX-NEXT:    vpshufb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x00,0xc1]
   1226 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1227 ;
   1228 ; X64-AVX512VL-LABEL: test_x86_avx2_pshuf_b:
   1229 ; X64-AVX512VL:       ## %bb.0:
   1230 ; X64-AVX512VL-NEXT:    vpshufb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xc1]
   1231 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1232   %res = call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %a0, <32 x i8> %a1) ; <<16 x i8>> [#uses=1]
   1233   ret <32 x i8> %res
   1234 }
   1235 declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>) nounwind readnone
   1236 
   1237 
   1238 define <32 x i8> @test_x86_avx2_psign_b(<32 x i8> %a0, <32 x i8> %a1) {
   1239 ; X86-LABEL: test_x86_avx2_psign_b:
   1240 ; X86:       ## %bb.0:
   1241 ; X86-NEXT:    vpsignb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x08,0xc1]
   1242 ; X86-NEXT:    retl ## encoding: [0xc3]
   1243 ;
   1244 ; X64-LABEL: test_x86_avx2_psign_b:
   1245 ; X64:       ## %bb.0:
   1246 ; X64-NEXT:    vpsignb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x08,0xc1]
   1247 ; X64-NEXT:    retq ## encoding: [0xc3]
   1248   %res = call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
   1249   ret <32 x i8> %res
   1250 }
   1251 declare <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8>, <32 x i8>) nounwind readnone
   1252 
   1253 
   1254 define <8 x i32> @test_x86_avx2_psign_d(<8 x i32> %a0, <8 x i32> %a1) {
   1255 ; X86-LABEL: test_x86_avx2_psign_d:
   1256 ; X86:       ## %bb.0:
   1257 ; X86-NEXT:    vpsignd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x0a,0xc1]
   1258 ; X86-NEXT:    retl ## encoding: [0xc3]
   1259 ;
   1260 ; X64-LABEL: test_x86_avx2_psign_d:
   1261 ; X64:       ## %bb.0:
   1262 ; X64-NEXT:    vpsignd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x0a,0xc1]
   1263 ; X64-NEXT:    retq ## encoding: [0xc3]
   1264   %res = call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %a0, <8 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1265   ret <8 x i32> %res
   1266 }
   1267 declare <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32>, <8 x i32>) nounwind readnone
   1268 
   1269 
   1270 define <16 x i16> @test_x86_avx2_psign_w(<16 x i16> %a0, <16 x i16> %a1) {
   1271 ; X86-LABEL: test_x86_avx2_psign_w:
   1272 ; X86:       ## %bb.0:
   1273 ; X86-NEXT:    vpsignw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x09,0xc1]
   1274 ; X86-NEXT:    retl ## encoding: [0xc3]
   1275 ;
   1276 ; X64-LABEL: test_x86_avx2_psign_w:
   1277 ; X64:       ## %bb.0:
   1278 ; X64-NEXT:    vpsignw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x09,0xc1]
   1279 ; X64-NEXT:    retq ## encoding: [0xc3]
   1280   %res = call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
   1281   ret <16 x i16> %res
   1282 }
   1283 declare <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16>, <16 x i16>) nounwind readnone
   1284 
   1285 
   1286 define <16 x i16> @test_x86_avx2_mpsadbw(<32 x i8> %a0, <32 x i8> %a1) {
   1287 ; X86-LABEL: test_x86_avx2_mpsadbw:
   1288 ; X86:       ## %bb.0:
   1289 ; X86-NEXT:    vmpsadbw $7, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x42,0xc1,0x07]
   1290 ; X86-NEXT:    retl ## encoding: [0xc3]
   1291 ;
   1292 ; X64-LABEL: test_x86_avx2_mpsadbw:
   1293 ; X64:       ## %bb.0:
   1294 ; X64-NEXT:    vmpsadbw $7, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x42,0xc1,0x07]
   1295 ; X64-NEXT:    retq ## encoding: [0xc3]
   1296   %res = call <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8> %a0, <32 x i8> %a1, i8 7) ; <<16 x i16>> [#uses=1]
   1297   ret <16 x i16> %res
   1298 }
   1299 declare <16 x i16> @llvm.x86.avx2.mpsadbw(<32 x i8>, <32 x i8>, i8) nounwind readnone
   1300 
   1301 
   1302 define <16 x i16> @test_x86_avx2_packusdw(<8 x i32> %a0, <8 x i32> %a1) {
   1303 ; X86-AVX-LABEL: test_x86_avx2_packusdw:
   1304 ; X86-AVX:       ## %bb.0:
   1305 ; X86-AVX-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
   1306 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1307 ;
   1308 ; X86-AVX512VL-LABEL: test_x86_avx2_packusdw:
   1309 ; X86-AVX512VL:       ## %bb.0:
   1310 ; X86-AVX512VL-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
   1311 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1312 ;
   1313 ; X64-AVX-LABEL: test_x86_avx2_packusdw:
   1314 ; X64-AVX:       ## %bb.0:
   1315 ; X64-AVX-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
   1316 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1317 ;
   1318 ; X64-AVX512VL-LABEL: test_x86_avx2_packusdw:
   1319 ; X64-AVX512VL:       ## %bb.0:
   1320 ; X64-AVX512VL-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
   1321 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1322   %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %a0, <8 x i32> %a1) ; <<16 x i16>> [#uses=1]
   1323   ret <16 x i16> %res
   1324 }
   1325 declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
   1326 
   1327 
   1328 define <16 x i16> @test_x86_avx2_packusdw_fold() {
   1329 ; X86-AVX-LABEL: test_x86_avx2_packusdw_fold:
   1330 ; X86-AVX:       ## %bb.0:
   1331 ; X86-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
   1332 ; X86-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
   1333 ; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI54_0, kind: FK_Data_4
   1334 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1335 ;
   1336 ; X86-AVX512VL-LABEL: test_x86_avx2_packusdw_fold:
   1337 ; X86-AVX512VL:       ## %bb.0:
   1338 ; X86-AVX512VL-NEXT:    vmovaps LCPI54_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
   1339 ; X86-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
   1340 ; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI54_0, kind: FK_Data_4
   1341 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1342 ;
   1343 ; X64-AVX-LABEL: test_x86_avx2_packusdw_fold:
   1344 ; X64-AVX:       ## %bb.0:
   1345 ; X64-AVX-NEXT:    vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
   1346 ; X64-AVX-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
   1347 ; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI54_0-4, kind: reloc_riprel_4byte
   1348 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1349 ;
   1350 ; X64-AVX512VL-LABEL: test_x86_avx2_packusdw_fold:
   1351 ; X64-AVX512VL:       ## %bb.0:
   1352 ; X64-AVX512VL-NEXT:    vmovaps {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0]
   1353 ; X64-AVX512VL-NEXT:    ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A]
   1354 ; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI54_0-4, kind: reloc_riprel_4byte
   1355 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1356   %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>)
   1357   ret <16 x i16> %res
   1358 }
   1359 
   1360 
   1361 define <32 x i8> @test_x86_avx2_pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) {
   1362 ; X86-LABEL: test_x86_avx2_pblendvb:
   1363 ; X86:       ## %bb.0:
   1364 ; X86-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x4c,0xc1,0x20]
   1365 ; X86-NEXT:    retl ## encoding: [0xc3]
   1366 ;
   1367 ; X64-LABEL: test_x86_avx2_pblendvb:
   1368 ; X64:       ## %bb.0:
   1369 ; X64-NEXT:    vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x4c,0xc1,0x20]
   1370 ; X64-NEXT:    retq ## encoding: [0xc3]
   1371   %res = call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) ; <<32 x i8>> [#uses=1]
   1372   ret <32 x i8> %res
   1373 }
   1374 declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>) nounwind readnone
   1375 
   1376 
   1377 define <16 x i16> @test_x86_avx2_pblendw(<16 x i16> %a0, <16 x i16> %a1) {
   1378 ; X86-LABEL: test_x86_avx2_pblendw:
   1379 ; X86:       ## %bb.0:
   1380 ; X86-NEXT:    vpblendw $7, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x0e,0xc1,0x07]
   1381 ; X86-NEXT:    ## ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
   1382 ; X86-NEXT:    retl ## encoding: [0xc3]
   1383 ;
   1384 ; X64-LABEL: test_x86_avx2_pblendw:
   1385 ; X64:       ## %bb.0:
   1386 ; X64-NEXT:    vpblendw $7, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x0e,0xc1,0x07]
   1387 ; X64-NEXT:    ## ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7],ymm1[8,9,10],ymm0[11,12,13,14,15]
   1388 ; X64-NEXT:    retq ## encoding: [0xc3]
   1389   %res = call <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16> %a0, <16 x i16> %a1, i8 7) ; <<16 x i16>> [#uses=1]
   1390   ret <16 x i16> %res
   1391 }
   1392 declare <16 x i16> @llvm.x86.avx2.pblendw(<16 x i16>, <16 x i16>, i8) nounwind readnone
   1393 
   1394 
   1395 define <32 x i8> @test_x86_avx2_pmaxsb(<32 x i8> %a0, <32 x i8> %a1) {
   1396 ; X86-AVX-LABEL: test_x86_avx2_pmaxsb:
   1397 ; X86-AVX:       ## %bb.0:
   1398 ; X86-AVX-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3c,0xc1]
   1399 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1400 ;
   1401 ; X86-AVX512VL-LABEL: test_x86_avx2_pmaxsb:
   1402 ; X86-AVX512VL:       ## %bb.0:
   1403 ; X86-AVX512VL-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3c,0xc1]
   1404 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1405 ;
   1406 ; X64-AVX-LABEL: test_x86_avx2_pmaxsb:
   1407 ; X64-AVX:       ## %bb.0:
   1408 ; X64-AVX-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3c,0xc1]
   1409 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1410 ;
   1411 ; X64-AVX512VL-LABEL: test_x86_avx2_pmaxsb:
   1412 ; X64-AVX512VL:       ## %bb.0:
   1413 ; X64-AVX512VL-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3c,0xc1]
   1414 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1415   %res = call <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
   1416   ret <32 x i8> %res
   1417 }
   1418 declare <32 x i8> @llvm.x86.avx2.pmaxs.b(<32 x i8>, <32 x i8>) nounwind readnone
   1419 
   1420 
   1421 define <8 x i32> @test_x86_avx2_pmaxsd(<8 x i32> %a0, <8 x i32> %a1) {
   1422 ; X86-AVX-LABEL: test_x86_avx2_pmaxsd:
   1423 ; X86-AVX:       ## %bb.0:
   1424 ; X86-AVX-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3d,0xc1]
   1425 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1426 ;
   1427 ; X86-AVX512VL-LABEL: test_x86_avx2_pmaxsd:
   1428 ; X86-AVX512VL:       ## %bb.0:
   1429 ; X86-AVX512VL-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xc1]
   1430 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1431 ;
   1432 ; X64-AVX-LABEL: test_x86_avx2_pmaxsd:
   1433 ; X64-AVX:       ## %bb.0:
   1434 ; X64-AVX-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3d,0xc1]
   1435 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1436 ;
   1437 ; X64-AVX512VL-LABEL: test_x86_avx2_pmaxsd:
   1438 ; X64-AVX512VL:       ## %bb.0:
   1439 ; X64-AVX512VL-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xc1]
   1440 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1441   %res = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
   1442   ret <8 x i32> %res
   1443 }
   1444 declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readnone
   1445 
   1446 
   1447 define <8 x i32> @test_x86_avx2_pmaxud(<8 x i32> %a0, <8 x i32> %a1) {
   1448 ; X86-AVX-LABEL: test_x86_avx2_pmaxud:
   1449 ; X86-AVX:       ## %bb.0:
   1450 ; X86-AVX-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
   1451 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1452 ;
   1453 ; X86-AVX512VL-LABEL: test_x86_avx2_pmaxud:
   1454 ; X86-AVX512VL:       ## %bb.0:
   1455 ; X86-AVX512VL-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
   1456 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1457 ;
   1458 ; X64-AVX-LABEL: test_x86_avx2_pmaxud:
   1459 ; X64-AVX:       ## %bb.0:
   1460 ; X64-AVX-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
   1461 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1462 ;
   1463 ; X64-AVX512VL-LABEL: test_x86_avx2_pmaxud:
   1464 ; X64-AVX512VL:       ## %bb.0:
   1465 ; X64-AVX512VL-NEXT:    vpmaxud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xc1]
   1466 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1467   %res = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
   1468   ret <8 x i32> %res
   1469 }
   1470 declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readnone
   1471 
   1472 
   1473 define <16 x i16> @test_x86_avx2_pmaxuw(<16 x i16> %a0, <16 x i16> %a1) {
   1474 ; X86-AVX-LABEL: test_x86_avx2_pmaxuw:
   1475 ; X86-AVX:       ## %bb.0:
   1476 ; X86-AVX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3e,0xc1]
   1477 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1478 ;
   1479 ; X86-AVX512VL-LABEL: test_x86_avx2_pmaxuw:
   1480 ; X86-AVX512VL:       ## %bb.0:
   1481 ; X86-AVX512VL-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3e,0xc1]
   1482 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1483 ;
   1484 ; X64-AVX-LABEL: test_x86_avx2_pmaxuw:
   1485 ; X64-AVX:       ## %bb.0:
   1486 ; X64-AVX-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3e,0xc1]
   1487 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1488 ;
   1489 ; X64-AVX512VL-LABEL: test_x86_avx2_pmaxuw:
   1490 ; X64-AVX512VL:       ## %bb.0:
   1491 ; X64-AVX512VL-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3e,0xc1]
   1492 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1493   %res = call <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
   1494   ret <16 x i16> %res
   1495 }
   1496 declare <16 x i16> @llvm.x86.avx2.pmaxu.w(<16 x i16>, <16 x i16>) nounwind readnone
   1497 
   1498 
   1499 define <32 x i8> @test_x86_avx2_pminsb(<32 x i8> %a0, <32 x i8> %a1) {
   1500 ; X86-AVX-LABEL: test_x86_avx2_pminsb:
   1501 ; X86-AVX:       ## %bb.0:
   1502 ; X86-AVX-NEXT:    vpminsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x38,0xc1]
   1503 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1504 ;
   1505 ; X86-AVX512VL-LABEL: test_x86_avx2_pminsb:
   1506 ; X86-AVX512VL:       ## %bb.0:
   1507 ; X86-AVX512VL-NEXT:    vpminsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x38,0xc1]
   1508 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1509 ;
   1510 ; X64-AVX-LABEL: test_x86_avx2_pminsb:
   1511 ; X64-AVX:       ## %bb.0:
   1512 ; X64-AVX-NEXT:    vpminsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x38,0xc1]
   1513 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1514 ;
   1515 ; X64-AVX512VL-LABEL: test_x86_avx2_pminsb:
   1516 ; X64-AVX512VL:       ## %bb.0:
   1517 ; X64-AVX512VL-NEXT:    vpminsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x38,0xc1]
   1518 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1519   %res = call <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1]
   1520   ret <32 x i8> %res
   1521 }
   1522 declare <32 x i8> @llvm.x86.avx2.pmins.b(<32 x i8>, <32 x i8>) nounwind readnone
   1523 
   1524 
   1525 define <8 x i32> @test_x86_avx2_pminsd(<8 x i32> %a0, <8 x i32> %a1) {
   1526 ; X86-AVX-LABEL: test_x86_avx2_pminsd:
   1527 ; X86-AVX:       ## %bb.0:
   1528 ; X86-AVX-NEXT:    vpminsd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x39,0xc1]
   1529 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1530 ;
   1531 ; X86-AVX512VL-LABEL: test_x86_avx2_pminsd:
   1532 ; X86-AVX512VL:       ## %bb.0:
   1533 ; X86-AVX512VL-NEXT:    vpminsd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xc1]
   1534 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1535 ;
   1536 ; X64-AVX-LABEL: test_x86_avx2_pminsd:
   1537 ; X64-AVX:       ## %bb.0:
   1538 ; X64-AVX-NEXT:    vpminsd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x39,0xc1]
   1539 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1540 ;
   1541 ; X64-AVX512VL-LABEL: test_x86_avx2_pminsd:
   1542 ; X64-AVX512VL:       ## %bb.0:
   1543 ; X64-AVX512VL-NEXT:    vpminsd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xc1]
   1544 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1545   %res = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
   1546   ret <8 x i32> %res
   1547 }
   1548 declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readnone
   1549 
   1550 
   1551 define <8 x i32> @test_x86_avx2_pminud(<8 x i32> %a0, <8 x i32> %a1) {
   1552 ; X86-AVX-LABEL: test_x86_avx2_pminud:
   1553 ; X86-AVX:       ## %bb.0:
   1554 ; X86-AVX-NEXT:    vpminud %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3b,0xc1]
   1555 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1556 ;
   1557 ; X86-AVX512VL-LABEL: test_x86_avx2_pminud:
   1558 ; X86-AVX512VL:       ## %bb.0:
   1559 ; X86-AVX512VL-NEXT:    vpminud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xc1]
   1560 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1561 ;
   1562 ; X64-AVX-LABEL: test_x86_avx2_pminud:
   1563 ; X64-AVX:       ## %bb.0:
   1564 ; X64-AVX-NEXT:    vpminud %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3b,0xc1]
   1565 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1566 ;
   1567 ; X64-AVX512VL-LABEL: test_x86_avx2_pminud:
   1568 ; X64-AVX512VL:       ## %bb.0:
   1569 ; X64-AVX512VL-NEXT:    vpminud %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xc1]
   1570 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1571   %res = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
   1572   ret <8 x i32> %res
   1573 }
   1574 declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readnone
   1575 
   1576 
   1577 define <16 x i16> @test_x86_avx2_pminuw(<16 x i16> %a0, <16 x i16> %a1) {
   1578 ; X86-AVX-LABEL: test_x86_avx2_pminuw:
   1579 ; X86-AVX:       ## %bb.0:
   1580 ; X86-AVX-NEXT:    vpminuw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3a,0xc1]
   1581 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1582 ;
   1583 ; X86-AVX512VL-LABEL: test_x86_avx2_pminuw:
   1584 ; X86-AVX512VL:       ## %bb.0:
   1585 ; X86-AVX512VL-NEXT:    vpminuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3a,0xc1]
   1586 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1587 ;
   1588 ; X64-AVX-LABEL: test_x86_avx2_pminuw:
   1589 ; X64-AVX:       ## %bb.0:
   1590 ; X64-AVX-NEXT:    vpminuw %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x3a,0xc1]
   1591 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1592 ;
   1593 ; X64-AVX512VL-LABEL: test_x86_avx2_pminuw:
   1594 ; X64-AVX512VL:       ## %bb.0:
   1595 ; X64-AVX512VL-NEXT:    vpminuw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3a,0xc1]
   1596 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1597   %res = call <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1]
   1598   ret <16 x i16> %res
   1599 }
   1600 declare <16 x i16> @llvm.x86.avx2.pminu.w(<16 x i16>, <16 x i16>) nounwind readnone
   1601 
   1602 
   1603 define <4 x i32> @test_x86_avx2_pblendd_128(<4 x i32> %a0, <4 x i32> %a1) {
   1604 ; X86-LABEL: test_x86_avx2_pblendd_128:
   1605 ; X86:       ## %bb.0:
   1606 ; X86-NEXT:    vblendps $8, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08]
   1607 ; X86-NEXT:    ## xmm0 = xmm1[0,1,2],xmm0[3]
   1608 ; X86-NEXT:    retl ## encoding: [0xc3]
   1609 ;
   1610 ; X64-LABEL: test_x86_avx2_pblendd_128:
   1611 ; X64:       ## %bb.0:
   1612 ; X64-NEXT:    vblendps $8, %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08]
   1613 ; X64-NEXT:    ## xmm0 = xmm1[0,1,2],xmm0[3]
   1614 ; X64-NEXT:    retq ## encoding: [0xc3]
   1615   %res = call <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32> %a0, <4 x i32> %a1, i8 7) ; <<4 x i32>> [#uses=1]
   1616   ret <4 x i32> %res
   1617 }
   1618 declare <4 x i32> @llvm.x86.avx2.pblendd.128(<4 x i32>, <4 x i32>, i8) nounwind readnone
   1619 
   1620 
   1621 define <8 x i32> @test_x86_avx2_pblendd_256(<8 x i32> %a0, <8 x i32> %a1) {
   1622 ; X86-LABEL: test_x86_avx2_pblendd_256:
   1623 ; X86:       ## %bb.0:
   1624 ; X86-NEXT:    vblendps $7, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0x07]
   1625 ; X86-NEXT:    ## ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
   1626 ; X86-NEXT:    retl ## encoding: [0xc3]
   1627 ;
   1628 ; X64-LABEL: test_x86_avx2_pblendd_256:
   1629 ; X64:       ## %bb.0:
   1630 ; X64-NEXT:    vblendps $7, %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0x07]
   1631 ; X64-NEXT:    ## ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
   1632 ; X64-NEXT:    retq ## encoding: [0xc3]
   1633   %res = call <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32> %a0, <8 x i32> %a1, i8 7) ; <<8 x i32>> [#uses=1]
   1634   ret <8 x i32> %res
   1635 }
   1636 declare <8 x i32> @llvm.x86.avx2.pblendd.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
   1637 
   1638 
   1639 ; Check that the arguments are swapped between the intrinsic definition
   1640 ; and its lowering. Indeed, the offsets are the first source in
   1641 ; the instruction.
   1642 define <8 x i32> @test_x86_avx2_permd(<8 x i32> %a0, <8 x i32> %a1) {
   1643 ; X86-AVX-LABEL: test_x86_avx2_permd:
   1644 ; X86-AVX:       ## %bb.0:
   1645 ; X86-AVX-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0x16,0xc0]
   1646 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1647 ;
   1648 ; X86-AVX512VL-LABEL: test_x86_avx2_permd:
   1649 ; X86-AVX512VL:       ## %bb.0:
   1650 ; X86-AVX512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0]
   1651 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1652 ;
   1653 ; X64-AVX-LABEL: test_x86_avx2_permd:
   1654 ; X64-AVX:       ## %bb.0:
   1655 ; X64-AVX-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0x16,0xc0]
   1656 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1657 ;
   1658 ; X64-AVX512VL-LABEL: test_x86_avx2_permd:
   1659 ; X64-AVX512VL:       ## %bb.0:
   1660 ; X64-AVX512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0]
   1661 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1662   %res = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
   1663   ret <8 x i32> %res
   1664 }
   1665 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>) nounwind readonly
   1666 
   1667 
   1668 ; Check that the arguments are swapped between the intrinsic definition
   1669 ; and its lowering. Indeed, the offsets are the first source in
   1670 ; the instruction.
   1671 define <8 x float> @test_x86_avx2_permps(<8 x float> %a0, <8 x i32> %a1) {
   1672 ; X86-AVX-LABEL: test_x86_avx2_permps:
   1673 ; X86-AVX:       ## %bb.0:
   1674 ; X86-AVX-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0x16,0xc0]
   1675 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1676 ;
   1677 ; X86-AVX512VL-LABEL: test_x86_avx2_permps:
   1678 ; X86-AVX512VL:       ## %bb.0:
   1679 ; X86-AVX512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0]
   1680 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1681 ;
   1682 ; X64-AVX-LABEL: test_x86_avx2_permps:
   1683 ; X64-AVX:       ## %bb.0:
   1684 ; X64-AVX-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## encoding: [0xc4,0xe2,0x75,0x16,0xc0]
   1685 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1686 ;
   1687 ; X64-AVX512VL-LABEL: test_x86_avx2_permps:
   1688 ; X64-AVX512VL:       ## %bb.0:
   1689 ; X64-AVX512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xc0]
   1690 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1691   %res = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1) ; <<8 x float>> [#uses=1]
   1692   ret <8 x float> %res
   1693 }
   1694 declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>) nounwind readonly
   1695 
   1696 
   1697 define <2 x i64> @test_x86_avx2_maskload_q(i8* %a0, <2 x i64> %a1) {
   1698 ; X86-LABEL: test_x86_avx2_maskload_q:
   1699 ; X86:       ## %bb.0:
   1700 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1701 ; X86-NEXT:    vpmaskmovq (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0xf9,0x8c,0x00]
   1702 ; X86-NEXT:    retl ## encoding: [0xc3]
   1703 ;
   1704 ; X64-LABEL: test_x86_avx2_maskload_q:
   1705 ; X64:       ## %bb.0:
   1706 ; X64-NEXT:    vpmaskmovq (%rdi), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0xf9,0x8c,0x07]
   1707 ; X64-NEXT:    retq ## encoding: [0xc3]
   1708   %res = call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
   1709   ret <2 x i64> %res
   1710 }
   1711 declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>) nounwind readonly
   1712 
   1713 
   1714 define <4 x i64> @test_x86_avx2_maskload_q_256(i8* %a0, <4 x i64> %a1) {
   1715 ; X86-LABEL: test_x86_avx2_maskload_q_256:
   1716 ; X86:       ## %bb.0:
   1717 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1718 ; X86-NEXT:    vpmaskmovq (%eax), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0xfd,0x8c,0x00]
   1719 ; X86-NEXT:    retl ## encoding: [0xc3]
   1720 ;
   1721 ; X64-LABEL: test_x86_avx2_maskload_q_256:
   1722 ; X64:       ## %bb.0:
   1723 ; X64-NEXT:    vpmaskmovq (%rdi), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0xfd,0x8c,0x07]
   1724 ; X64-NEXT:    retq ## encoding: [0xc3]
   1725   %res = call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
   1726   ret <4 x i64> %res
   1727 }
   1728 declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>) nounwind readonly
   1729 
   1730 
   1731 define <4 x i32> @test_x86_avx2_maskload_d(i8* %a0, <4 x i32> %a1) {
   1732 ; X86-LABEL: test_x86_avx2_maskload_d:
   1733 ; X86:       ## %bb.0:
   1734 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1735 ; X86-NEXT:    vpmaskmovd (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x8c,0x00]
   1736 ; X86-NEXT:    retl ## encoding: [0xc3]
   1737 ;
   1738 ; X64-LABEL: test_x86_avx2_maskload_d:
   1739 ; X64:       ## %bb.0:
   1740 ; X64-NEXT:    vpmaskmovd (%rdi), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x8c,0x07]
   1741 ; X64-NEXT:    retq ## encoding: [0xc3]
   1742   %res = call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1743   ret <4 x i32> %res
   1744 }
   1745 declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>) nounwind readonly
   1746 
   1747 
   1748 define <8 x i32> @test_x86_avx2_maskload_d_256(i8* %a0, <8 x i32> %a1) {
   1749 ; X86-LABEL: test_x86_avx2_maskload_d_256:
   1750 ; X86:       ## %bb.0:
   1751 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1752 ; X86-NEXT:    vpmaskmovd (%eax), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x8c,0x00]
   1753 ; X86-NEXT:    retl ## encoding: [0xc3]
   1754 ;
   1755 ; X64-LABEL: test_x86_avx2_maskload_d_256:
   1756 ; X64:       ## %bb.0:
   1757 ; X64-NEXT:    vpmaskmovd (%rdi), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x8c,0x07]
   1758 ; X64-NEXT:    retq ## encoding: [0xc3]
   1759   %res = call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
   1760   ret <8 x i32> %res
   1761 }
   1762 declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>) nounwind readonly
   1763 
   1764 
   1765 define void @test_x86_avx2_maskstore_q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2) {
   1766 ; X86-LABEL: test_x86_avx2_maskstore_q:
   1767 ; X86:       ## %bb.0:
   1768 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1769 ; X86-NEXT:    vpmaskmovq %xmm1, %xmm0, (%eax) ## encoding: [0xc4,0xe2,0xf9,0x8e,0x08]
   1770 ; X86-NEXT:    retl ## encoding: [0xc3]
   1771 ;
   1772 ; X64-LABEL: test_x86_avx2_maskstore_q:
   1773 ; X64:       ## %bb.0:
   1774 ; X64-NEXT:    vpmaskmovq %xmm1, %xmm0, (%rdi) ## encoding: [0xc4,0xe2,0xf9,0x8e,0x0f]
   1775 ; X64-NEXT:    retq ## encoding: [0xc3]
   1776   call void @llvm.x86.avx2.maskstore.q(i8* %a0, <2 x i64> %a1, <2 x i64> %a2)
   1777   ret void
   1778 }
   1779 declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>) nounwind
   1780 
   1781 
   1782 define void @test_x86_avx2_maskstore_q_256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2) {
   1783 ; X86-LABEL: test_x86_avx2_maskstore_q_256:
   1784 ; X86:       ## %bb.0:
   1785 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1786 ; X86-NEXT:    vpmaskmovq %ymm1, %ymm0, (%eax) ## encoding: [0xc4,0xe2,0xfd,0x8e,0x08]
   1787 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1788 ; X86-NEXT:    retl ## encoding: [0xc3]
   1789 ;
   1790 ; X64-LABEL: test_x86_avx2_maskstore_q_256:
   1791 ; X64:       ## %bb.0:
   1792 ; X64-NEXT:    vpmaskmovq %ymm1, %ymm0, (%rdi) ## encoding: [0xc4,0xe2,0xfd,0x8e,0x0f]
   1793 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1794 ; X64-NEXT:    retq ## encoding: [0xc3]
   1795   call void @llvm.x86.avx2.maskstore.q.256(i8* %a0, <4 x i64> %a1, <4 x i64> %a2)
   1796   ret void
   1797 }
   1798 declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>) nounwind
   1799 
   1800 
   1801 define void @test_x86_avx2_maskstore_d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2) {
   1802 ; X86-LABEL: test_x86_avx2_maskstore_d:
   1803 ; X86:       ## %bb.0:
   1804 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1805 ; X86-NEXT:    vpmaskmovd %xmm1, %xmm0, (%eax) ## encoding: [0xc4,0xe2,0x79,0x8e,0x08]
   1806 ; X86-NEXT:    retl ## encoding: [0xc3]
   1807 ;
   1808 ; X64-LABEL: test_x86_avx2_maskstore_d:
   1809 ; X64:       ## %bb.0:
   1810 ; X64-NEXT:    vpmaskmovd %xmm1, %xmm0, (%rdi) ## encoding: [0xc4,0xe2,0x79,0x8e,0x0f]
   1811 ; X64-NEXT:    retq ## encoding: [0xc3]
   1812   call void @llvm.x86.avx2.maskstore.d(i8* %a0, <4 x i32> %a1, <4 x i32> %a2)
   1813   ret void
   1814 }
   1815 declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>) nounwind
   1816 
   1817 
   1818 define void @test_x86_avx2_maskstore_d_256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2) {
   1819 ; X86-LABEL: test_x86_avx2_maskstore_d_256:
   1820 ; X86:       ## %bb.0:
   1821 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1822 ; X86-NEXT:    vpmaskmovd %ymm1, %ymm0, (%eax) ## encoding: [0xc4,0xe2,0x7d,0x8e,0x08]
   1823 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1824 ; X86-NEXT:    retl ## encoding: [0xc3]
   1825 ;
   1826 ; X64-LABEL: test_x86_avx2_maskstore_d_256:
   1827 ; X64:       ## %bb.0:
   1828 ; X64-NEXT:    vpmaskmovd %ymm1, %ymm0, (%rdi) ## encoding: [0xc4,0xe2,0x7d,0x8e,0x0f]
   1829 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1830 ; X64-NEXT:    retq ## encoding: [0xc3]
   1831   call void @llvm.x86.avx2.maskstore.d.256(i8* %a0, <8 x i32> %a1, <8 x i32> %a2)
   1832   ret void
   1833 }
   1834 declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>) nounwind
   1835 
   1836 
   1837 define <4 x i32> @test_x86_avx2_psllv_d(<4 x i32> %a0, <4 x i32> %a1) {
   1838 ; X86-AVX-LABEL: test_x86_avx2_psllv_d:
   1839 ; X86-AVX:       ## %bb.0:
   1840 ; X86-AVX-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x47,0xc1]
   1841 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1842 ;
   1843 ; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d:
   1844 ; X86-AVX512VL:       ## %bb.0:
   1845 ; X86-AVX512VL-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xc1]
   1846 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1847 ;
   1848 ; X64-AVX-LABEL: test_x86_avx2_psllv_d:
   1849 ; X64-AVX:       ## %bb.0:
   1850 ; X64-AVX-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x47,0xc1]
   1851 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1852 ;
   1853 ; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d:
   1854 ; X64-AVX512VL:       ## %bb.0:
   1855 ; X64-AVX512VL-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xc1]
   1856 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1857   %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1858   ret <4 x i32> %res
   1859 }
   1860 declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone
   1861 
   1862 
   1863 define <8 x i32> @test_x86_avx2_psllv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
   1864 ; X86-AVX-LABEL: test_x86_avx2_psllv_d_256:
   1865 ; X86-AVX:       ## %bb.0:
   1866 ; X86-AVX-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x47,0xc1]
   1867 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1868 ;
   1869 ; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_256:
   1870 ; X86-AVX512VL:       ## %bb.0:
   1871 ; X86-AVX512VL-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xc1]
   1872 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1873 ;
   1874 ; X64-AVX-LABEL: test_x86_avx2_psllv_d_256:
   1875 ; X64-AVX:       ## %bb.0:
   1876 ; X64-AVX-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x47,0xc1]
   1877 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1878 ;
   1879 ; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_256:
   1880 ; X64-AVX512VL:       ## %bb.0:
   1881 ; X64-AVX512VL-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xc1]
   1882 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1883   %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
   1884   ret <8 x i32> %res
   1885 }
   1886 declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
   1887 
   1888 
   1889 define <2 x i64> @test_x86_avx2_psllv_q(<2 x i64> %a0, <2 x i64> %a1) {
   1890 ; X86-AVX-LABEL: test_x86_avx2_psllv_q:
   1891 ; X86-AVX:       ## %bb.0:
   1892 ; X86-AVX-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0xf9,0x47,0xc1]
   1893 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1894 ;
   1895 ; X86-AVX512VL-LABEL: test_x86_avx2_psllv_q:
   1896 ; X86-AVX512VL:       ## %bb.0:
   1897 ; X86-AVX512VL-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xc1]
   1898 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1899 ;
   1900 ; X64-AVX-LABEL: test_x86_avx2_psllv_q:
   1901 ; X64-AVX:       ## %bb.0:
   1902 ; X64-AVX-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0xf9,0x47,0xc1]
   1903 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1904 ;
   1905 ; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q:
   1906 ; X64-AVX512VL:       ## %bb.0:
   1907 ; X64-AVX512VL-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xc1]
   1908 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1909   %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
   1910   ret <2 x i64> %res
   1911 }
   1912 declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone
   1913 
   1914 
   1915 define <4 x i64> @test_x86_avx2_psllv_q_256(<4 x i64> %a0, <4 x i64> %a1) {
   1916 ; X86-AVX-LABEL: test_x86_avx2_psllv_q_256:
   1917 ; X86-AVX:       ## %bb.0:
   1918 ; X86-AVX-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0xfd,0x47,0xc1]
   1919 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1920 ;
   1921 ; X86-AVX512VL-LABEL: test_x86_avx2_psllv_q_256:
   1922 ; X86-AVX512VL:       ## %bb.0:
   1923 ; X86-AVX512VL-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xc1]
   1924 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1925 ;
   1926 ; X64-AVX-LABEL: test_x86_avx2_psllv_q_256:
   1927 ; X64-AVX:       ## %bb.0:
   1928 ; X64-AVX-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0xfd,0x47,0xc1]
   1929 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1930 ;
   1931 ; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q_256:
   1932 ; X64-AVX512VL:       ## %bb.0:
   1933 ; X64-AVX512VL-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xc1]
   1934 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1935   %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
   1936   ret <4 x i64> %res
   1937 }
   1938 declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
   1939 
   1940 
   1941 define <4 x i32> @test_x86_avx2_psrlv_d(<4 x i32> %a0, <4 x i32> %a1) {
   1942 ; X86-AVX-LABEL: test_x86_avx2_psrlv_d:
   1943 ; X86-AVX:       ## %bb.0:
   1944 ; X86-AVX-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x45,0xc1]
   1945 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1946 ;
   1947 ; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d:
   1948 ; X86-AVX512VL:       ## %bb.0:
   1949 ; X86-AVX512VL-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xc1]
   1950 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1951 ;
   1952 ; X64-AVX-LABEL: test_x86_avx2_psrlv_d:
   1953 ; X64-AVX:       ## %bb.0:
   1954 ; X64-AVX-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x45,0xc1]
   1955 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1956 ;
   1957 ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d:
   1958 ; X64-AVX512VL:       ## %bb.0:
   1959 ; X64-AVX512VL-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xc1]
   1960 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1961   %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   1962   ret <4 x i32> %res
   1963 }
   1964 declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone
   1965 
   1966 
   1967 define <8 x i32> @test_x86_avx2_psrlv_d_256(<8 x i32> %a0, <8 x i32> %a1) {
   1968 ; X86-AVX-LABEL: test_x86_avx2_psrlv_d_256:
   1969 ; X86-AVX:       ## %bb.0:
   1970 ; X86-AVX-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x45,0xc1]
   1971 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1972 ;
   1973 ; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256:
   1974 ; X86-AVX512VL:       ## %bb.0:
   1975 ; X86-AVX512VL-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xc1]
   1976 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   1977 ;
   1978 ; X64-AVX-LABEL: test_x86_avx2_psrlv_d_256:
   1979 ; X64-AVX:       ## %bb.0:
   1980 ; X64-AVX-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x45,0xc1]
   1981 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   1982 ;
   1983 ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256:
   1984 ; X64-AVX512VL:       ## %bb.0:
   1985 ; X64-AVX512VL-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xc1]
   1986 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   1987   %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
   1988   ret <8 x i32> %res
   1989 }
   1990 declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone
   1991 
   1992 
   1993 define <2 x i64> @test_x86_avx2_psrlv_q(<2 x i64> %a0, <2 x i64> %a1) {
   1994 ; X86-AVX-LABEL: test_x86_avx2_psrlv_q:
   1995 ; X86-AVX:       ## %bb.0:
   1996 ; X86-AVX-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0xf9,0x45,0xc1]
   1997 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   1998 ;
   1999 ; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_q:
   2000 ; X86-AVX512VL:       ## %bb.0:
   2001 ; X86-AVX512VL-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xc1]
   2002 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   2003 ;
   2004 ; X64-AVX-LABEL: test_x86_avx2_psrlv_q:
   2005 ; X64-AVX:       ## %bb.0:
   2006 ; X64-AVX-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0xf9,0x45,0xc1]
   2007 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   2008 ;
   2009 ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q:
   2010 ; X64-AVX512VL:       ## %bb.0:
   2011 ; X64-AVX512VL-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xc1]
   2012 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   2013   %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1]
   2014   ret <2 x i64> %res
   2015 }
   2016 declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone
   2017 
   2018 
   2019 define <4 x i64> @test_x86_avx2_psrlv_q_256(<4 x i64> %a0, <4 x i64> %a1) {
   2020 ; X86-AVX-LABEL: test_x86_avx2_psrlv_q_256:
   2021 ; X86-AVX:       ## %bb.0:
   2022 ; X86-AVX-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0xfd,0x45,0xc1]
   2023 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   2024 ;
   2025 ; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_q_256:
   2026 ; X86-AVX512VL:       ## %bb.0:
   2027 ; X86-AVX512VL-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xc1]
   2028 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   2029 ;
   2030 ; X64-AVX-LABEL: test_x86_avx2_psrlv_q_256:
   2031 ; X64-AVX:       ## %bb.0:
   2032 ; X64-AVX-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0xfd,0x45,0xc1]
   2033 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   2034 ;
   2035 ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_256:
   2036 ; X64-AVX512VL:       ## %bb.0:
   2037 ; X64-AVX512VL-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xc1]
   2038 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   2039   %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1]
   2040   ret <4 x i64> %res
   2041 }
   2042 declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone
   2043 
   2044 
   2045 define <4 x i32> @test_x86_avx2_psrav_d(<4 x i32> %a0, <4 x i32> %a1) {
   2046 ; X86-AVX-LABEL: test_x86_avx2_psrav_d:
   2047 ; X86-AVX:       ## %bb.0:
   2048 ; X86-AVX-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0xc1]
   2049 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   2050 ;
   2051 ; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d:
   2052 ; X86-AVX512VL:       ## %bb.0:
   2053 ; X86-AVX512VL-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xc1]
   2054 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   2055 ;
   2056 ; X64-AVX-LABEL: test_x86_avx2_psrav_d:
   2057 ; X64-AVX:       ## %bb.0:
   2058 ; X64-AVX-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0xc1]
   2059 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   2060 ;
   2061 ; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d:
   2062 ; X64-AVX512VL:       ## %bb.0:
   2063 ; X64-AVX512VL-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xc1]
   2064 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   2065   %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
   2066   ret <4 x i32> %res
   2067 }
   2068 
   2069 define <4 x i32> @test_x86_avx2_psrav_d_const(<4 x i32> %a0, <4 x i32> %a1) {
   2070 ; X86-AVX-LABEL: test_x86_avx2_psrav_d_const:
   2071 ; X86-AVX:       ## %bb.0:
   2072 ; X86-AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23]
   2073 ; X86-AVX-NEXT:    ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
   2074 ; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI86_0, kind: FK_Data_4
   2075 ; X86-AVX-NEXT:    vpsravd LCPI86_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
   2076 ; X86-AVX-NEXT:    ## fixup A - offset: 5, value: LCPI86_1, kind: FK_Data_4
   2077 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   2078 ;
   2079 ; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_const:
   2080 ; X86-AVX512VL:       ## %bb.0:
   2081 ; X86-AVX512VL-NEXT:    vmovdqa LCPI86_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23]
   2082 ; X86-AVX512VL-NEXT:    ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
   2083 ; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI86_0, kind: FK_Data_4
   2084 ; X86-AVX512VL-NEXT:    vpsravd LCPI86_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
   2085 ; X86-AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI86_1, kind: FK_Data_4
   2086 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   2087 ;
   2088 ; X64-AVX-LABEL: test_x86_avx2_psrav_d_const:
   2089 ; X64-AVX:       ## %bb.0:
   2090 ; X64-AVX-NEXT:    vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23]
   2091 ; X64-AVX-NEXT:    ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
   2092 ; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI86_0-4, kind: reloc_riprel_4byte
   2093 ; X64-AVX-NEXT:    vpsravd {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
   2094 ; X64-AVX-NEXT:    ## fixup A - offset: 5, value: LCPI86_1-4, kind: reloc_riprel_4byte
   2095 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   2096 ;
   2097 ; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_const:
   2098 ; X64-AVX512VL:       ## %bb.0:
   2099 ; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23]
   2100 ; X64-AVX512VL-NEXT:    ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
   2101 ; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI86_0-4, kind: reloc_riprel_4byte
   2102 ; X64-AVX512VL-NEXT:    vpsravd {{.*}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A]
   2103 ; X64-AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI86_1-4, kind: reloc_riprel_4byte
   2104 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   2105   %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>)
   2106   ret <4 x i32> %res
   2107 }
   2108 declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone
   2109 
   2110 define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) {
   2111 ; X86-AVX-LABEL: test_x86_avx2_psrav_d_256:
   2112 ; X86-AVX:       ## %bb.0:
   2113 ; X86-AVX-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0xc1]
   2114 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   2115 ;
   2116 ; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_256:
   2117 ; X86-AVX512VL:       ## %bb.0:
   2118 ; X86-AVX512VL-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xc1]
   2119 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   2120 ;
   2121 ; X64-AVX-LABEL: test_x86_avx2_psrav_d_256:
   2122 ; X64-AVX:       ## %bb.0:
   2123 ; X64-AVX-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0xc1]
   2124 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   2125 ;
   2126 ; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_256:
   2127 ; X64-AVX512VL:       ## %bb.0:
   2128 ; X64-AVX512VL-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xc1]
   2129 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   2130   %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1]
   2131   ret <8 x i32> %res
   2132 }
   2133 
   2134 define <8 x i32> @test_x86_avx2_psrav_d_256_const(<8 x i32> %a0, <8 x i32> %a1) {
   2135 ; X86-AVX-LABEL: test_x86_avx2_psrav_d_256_const:
   2136 ; X86-AVX:       ## %bb.0:
   2137 ; X86-AVX-NEXT:    vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
   2138 ; X86-AVX-NEXT:    ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
   2139 ; X86-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI88_0, kind: FK_Data_4
   2140 ; X86-AVX-NEXT:    vpsravd LCPI88_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
   2141 ; X86-AVX-NEXT:    ## fixup A - offset: 5, value: LCPI88_1, kind: FK_Data_4
   2142 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   2143 ;
   2144 ; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const:
   2145 ; X86-AVX512VL:       ## %bb.0:
   2146 ; X86-AVX512VL-NEXT:    vmovdqa LCPI88_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
   2147 ; X86-AVX512VL-NEXT:    ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
   2148 ; X86-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI88_0, kind: FK_Data_4
   2149 ; X86-AVX512VL-NEXT:    vpsravd LCPI88_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
   2150 ; X86-AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI88_1, kind: FK_Data_4
   2151 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   2152 ;
   2153 ; X64-AVX-LABEL: test_x86_avx2_psrav_d_256_const:
   2154 ; X64-AVX:       ## %bb.0:
   2155 ; X64-AVX-NEXT:    vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
   2156 ; X64-AVX-NEXT:    ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
   2157 ; X64-AVX-NEXT:    ## fixup A - offset: 4, value: LCPI88_0-4, kind: reloc_riprel_4byte
   2158 ; X64-AVX-NEXT:    vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
   2159 ; X64-AVX-NEXT:    ## fixup A - offset: 5, value: LCPI88_1-4, kind: reloc_riprel_4byte
   2160 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   2161 ;
   2162 ; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const:
   2163 ; X64-AVX512VL:       ## %bb.0:
   2164 ; X64-AVX512VL-NEXT:    vmovdqa {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
   2165 ; X64-AVX512VL-NEXT:    ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
   2166 ; X64-AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI88_0-4, kind: reloc_riprel_4byte
   2167 ; X64-AVX512VL-NEXT:    vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
   2168 ; X64-AVX512VL-NEXT:    ## fixup A - offset: 5, value: LCPI88_1-4, kind: reloc_riprel_4byte
   2169 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   2170   %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>)
   2171   ret <8 x i32> %res
   2172 }
   2173 declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone
   2174 
   2175 define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1, <4 x i32> %idx, <2 x double> %mask) {
   2176 ; X86-LABEL: test_x86_avx2_gather_d_pd:
   2177 ; X86:       ## %bb.0:
   2178 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2179 ; X86-NEXT:    vgatherdpd %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x48]
   2180 ; X86-NEXT:    retl ## encoding: [0xc3]
   2181 ;
   2182 ; X64-LABEL: test_x86_avx2_gather_d_pd:
   2183 ; X64:       ## %bb.0:
   2184 ; X64-NEXT:    vgatherdpd %xmm2, (%rdi,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0xe9,0x92,0x04,0x4f]
   2185 ; X64-NEXT:    retq ## encoding: [0xc3]
   2186   %res = call <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double> %a0,
   2187                             i8* %a1, <4 x i32> %idx, <2 x double> %mask, i8 2) ;
   2188   ret <2 x double> %res
   2189 }
   2190 declare <2 x double> @llvm.x86.avx2.gather.d.pd(<2 x double>, i8*,
   2191                       <4 x i32>, <2 x double>, i8) nounwind readonly
   2192 
   2193 define <4 x double> @test_x86_avx2_gather_d_pd_256(<4 x double> %a0, i8* %a1, <4 x i32> %idx, <4 x double> %mask) {
   2194 ; X86-LABEL: test_x86_avx2_gather_d_pd_256:
   2195 ; X86:       ## %bb.0:
   2196 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2197 ; X86-NEXT:    vgatherdpd %ymm2, (%eax,%xmm1,2), %ymm0 ## encoding: [0xc4,0xe2,0xed,0x92,0x04,0x48]
   2198 ; X86-NEXT:    retl ## encoding: [0xc3]
   2199 ;
   2200 ; X64-LABEL: test_x86_avx2_gather_d_pd_256:
   2201 ; X64:       ## %bb.0:
   2202 ; X64-NEXT:    vgatherdpd %ymm2, (%rdi,%xmm1,2), %ymm0 ## encoding: [0xc4,0xe2,0xed,0x92,0x04,0x4f]
   2203 ; X64-NEXT:    retq ## encoding: [0xc3]
   2204   %res = call <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double> %a0,
   2205                             i8* %a1, <4 x i32> %idx, <4 x double> %mask, i8 2) ;
   2206   ret <4 x double> %res
   2207 }
   2208 declare <4 x double> @llvm.x86.avx2.gather.d.pd.256(<4 x double>, i8*,
   2209                       <4 x i32>, <4 x double>, i8) nounwind readonly
   2210 
   2211 define <2 x double> @test_x86_avx2_gather_q_pd(<2 x double> %a0, i8* %a1, <2 x i64> %idx, <2 x double> %mask) {
   2212 ; X86-LABEL: test_x86_avx2_gather_q_pd:
   2213 ; X86:       ## %bb.0:
   2214 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2215 ; X86-NEXT:    vgatherqpd %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0xe9,0x93,0x04,0x48]
   2216 ; X86-NEXT:    retl ## encoding: [0xc3]
   2217 ;
   2218 ; X64-LABEL: test_x86_avx2_gather_q_pd:
   2219 ; X64:       ## %bb.0:
   2220 ; X64-NEXT:    vgatherqpd %xmm2, (%rdi,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0xe9,0x93,0x04,0x4f]
   2221 ; X64-NEXT:    retq ## encoding: [0xc3]
   2222   %res = call <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double> %a0,
   2223                             i8* %a1, <2 x i64> %idx, <2 x double> %mask, i8 2) ;
   2224   ret <2 x double> %res
   2225 }
   2226 declare <2 x double> @llvm.x86.avx2.gather.q.pd(<2 x double>, i8*,
   2227                       <2 x i64>, <2 x double>, i8) nounwind readonly
   2228 
   2229 define <4 x double> @test_x86_avx2_gather_q_pd_256(<4 x double> %a0, i8* %a1, <4 x i64> %idx, <4 x double> %mask) {
   2230 ; X86-LABEL: test_x86_avx2_gather_q_pd_256:
   2231 ; X86:       ## %bb.0:
   2232 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2233 ; X86-NEXT:    vgatherqpd %ymm2, (%eax,%ymm1,2), %ymm0 ## encoding: [0xc4,0xe2,0xed,0x93,0x04,0x48]
   2234 ; X86-NEXT:    retl ## encoding: [0xc3]
   2235 ;
   2236 ; X64-LABEL: test_x86_avx2_gather_q_pd_256:
   2237 ; X64:       ## %bb.0:
   2238 ; X64-NEXT:    vgatherqpd %ymm2, (%rdi,%ymm1,2), %ymm0 ## encoding: [0xc4,0xe2,0xed,0x93,0x04,0x4f]
   2239 ; X64-NEXT:    retq ## encoding: [0xc3]
   2240   %res = call <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double> %a0,
   2241                             i8* %a1, <4 x i64> %idx, <4 x double> %mask, i8 2) ;
   2242   ret <4 x double> %res
   2243 }
   2244 declare <4 x double> @llvm.x86.avx2.gather.q.pd.256(<4 x double>, i8*,
   2245                       <4 x i64>, <4 x double>, i8) nounwind readonly
   2246 
   2247 define <4 x float> @test_x86_avx2_gather_d_ps(<4 x float> %a0, i8* %a1, <4 x i32> %idx, <4 x float> %mask) {
   2248 ; X86-LABEL: test_x86_avx2_gather_d_ps:
   2249 ; X86:       ## %bb.0:
   2250 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2251 ; X86-NEXT:    vgatherdps %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x92,0x04,0x48]
   2252 ; X86-NEXT:    retl ## encoding: [0xc3]
   2253 ;
   2254 ; X64-LABEL: test_x86_avx2_gather_d_ps:
   2255 ; X64:       ## %bb.0:
   2256 ; X64-NEXT:    vgatherdps %xmm2, (%rdi,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x92,0x04,0x4f]
   2257 ; X64-NEXT:    retq ## encoding: [0xc3]
   2258   %res = call <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float> %a0,
   2259                             i8* %a1, <4 x i32> %idx, <4 x float> %mask, i8 2) ;
   2260   ret <4 x float> %res
   2261 }
   2262 declare <4 x float> @llvm.x86.avx2.gather.d.ps(<4 x float>, i8*,
   2263                       <4 x i32>, <4 x float>, i8) nounwind readonly
   2264 
   2265 define <8 x float> @test_x86_avx2_gather_d_ps_256(<8 x float> %a0, i8* %a1, <8 x i32> %idx, <8 x float> %mask) {
   2266 ; X86-LABEL: test_x86_avx2_gather_d_ps_256:
   2267 ; X86:       ## %bb.0:
   2268 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2269 ; X86-NEXT:    vgatherdps %ymm2, (%eax,%ymm1,2), %ymm0 ## encoding: [0xc4,0xe2,0x6d,0x92,0x04,0x48]
   2270 ; X86-NEXT:    retl ## encoding: [0xc3]
   2271 ;
   2272 ; X64-LABEL: test_x86_avx2_gather_d_ps_256:
   2273 ; X64:       ## %bb.0:
   2274 ; X64-NEXT:    vgatherdps %ymm2, (%rdi,%ymm1,2), %ymm0 ## encoding: [0xc4,0xe2,0x6d,0x92,0x04,0x4f]
   2275 ; X64-NEXT:    retq ## encoding: [0xc3]
   2276   %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
   2277                             i8* %a1, <8 x i32> %idx, <8 x float> %mask, i8 2) ;
   2278   ret <8 x float> %res
   2279 }
   2280 declare <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float>, i8*,
   2281                       <8 x i32>, <8 x float>, i8) nounwind readonly
   2282 
   2283 define <4 x float> @test_x86_avx2_gather_q_ps(<4 x float> %a0, i8* %a1, <2 x i64> %idx, <4 x float> %mask) {
   2284 ; X86-LABEL: test_x86_avx2_gather_q_ps:
   2285 ; X86:       ## %bb.0:
   2286 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2287 ; X86-NEXT:    vgatherqps %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x93,0x04,0x48]
   2288 ; X86-NEXT:    retl ## encoding: [0xc3]
   2289 ;
   2290 ; X64-LABEL: test_x86_avx2_gather_q_ps:
   2291 ; X64:       ## %bb.0:
   2292 ; X64-NEXT:    vgatherqps %xmm2, (%rdi,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x93,0x04,0x4f]
   2293 ; X64-NEXT:    retq ## encoding: [0xc3]
   2294   %res = call <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float> %a0,
   2295                             i8* %a1, <2 x i64> %idx, <4 x float> %mask, i8 2) ;
   2296   ret <4 x float> %res
   2297 }
   2298 declare <4 x float> @llvm.x86.avx2.gather.q.ps(<4 x float>, i8*,
   2299                       <2 x i64>, <4 x float>, i8) nounwind readonly
   2300 
   2301 define <4 x float> @test_x86_avx2_gather_q_ps_256(<4 x float> %a0, i8* %a1, <4 x i64> %idx, <4 x float> %mask) {
   2302 ; X86-LABEL: test_x86_avx2_gather_q_ps_256:
   2303 ; X86:       ## %bb.0:
   2304 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2305 ; X86-NEXT:    vgatherqps %xmm2, (%eax,%ymm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x6d,0x93,0x04,0x48]
   2306 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   2307 ; X86-NEXT:    retl ## encoding: [0xc3]
   2308 ;
   2309 ; X64-LABEL: test_x86_avx2_gather_q_ps_256:
   2310 ; X64:       ## %bb.0:
   2311 ; X64-NEXT:    vgatherqps %xmm2, (%rdi,%ymm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x6d,0x93,0x04,0x4f]
   2312 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   2313 ; X64-NEXT:    retq ## encoding: [0xc3]
   2314   %res = call <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float> %a0,
   2315                             i8* %a1, <4 x i64> %idx, <4 x float> %mask, i8 2) ;
   2316   ret <4 x float> %res
   2317 }
   2318 declare <4 x float> @llvm.x86.avx2.gather.q.ps.256(<4 x float>, i8*,
   2319                       <4 x i64>, <4 x float>, i8) nounwind readonly
   2320 
   2321 define <2 x i64> @test_x86_avx2_gather_d_q(<2 x i64> %a0, i8* %a1, <4 x i32> %idx, <2 x i64> %mask) {
   2322 ; X86-LABEL: test_x86_avx2_gather_d_q:
   2323 ; X86:       ## %bb.0:
   2324 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2325 ; X86-NEXT:    vpgatherdq %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0xe9,0x90,0x04,0x48]
   2326 ; X86-NEXT:    retl ## encoding: [0xc3]
   2327 ;
   2328 ; X64-LABEL: test_x86_avx2_gather_d_q:
   2329 ; X64:       ## %bb.0:
   2330 ; X64-NEXT:    vpgatherdq %xmm2, (%rdi,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0xe9,0x90,0x04,0x4f]
   2331 ; X64-NEXT:    retq ## encoding: [0xc3]
   2332   %res = call <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64> %a0,
   2333                             i8* %a1, <4 x i32> %idx, <2 x i64> %mask, i8 2) ;
   2334   ret <2 x i64> %res
   2335 }
   2336 declare <2 x i64> @llvm.x86.avx2.gather.d.q(<2 x i64>, i8*,
   2337                       <4 x i32>, <2 x i64>, i8) nounwind readonly
   2338 
   2339 define <4 x i64> @test_x86_avx2_gather_d_q_256(<4 x i64> %a0, i8* %a1, <4 x i32> %idx, <4 x i64> %mask) {
   2340 ; X86-LABEL: test_x86_avx2_gather_d_q_256:
   2341 ; X86:       ## %bb.0:
   2342 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2343 ; X86-NEXT:    vpgatherdq %ymm2, (%eax,%xmm1,2), %ymm0 ## encoding: [0xc4,0xe2,0xed,0x90,0x04,0x48]
   2344 ; X86-NEXT:    retl ## encoding: [0xc3]
   2345 ;
   2346 ; X64-LABEL: test_x86_avx2_gather_d_q_256:
   2347 ; X64:       ## %bb.0:
   2348 ; X64-NEXT:    vpgatherdq %ymm2, (%rdi,%xmm1,2), %ymm0 ## encoding: [0xc4,0xe2,0xed,0x90,0x04,0x4f]
   2349 ; X64-NEXT:    retq ## encoding: [0xc3]
   2350   %res = call <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64> %a0,
   2351                             i8* %a1, <4 x i32> %idx, <4 x i64> %mask, i8 2) ;
   2352   ret <4 x i64> %res
   2353 }
   2354 declare <4 x i64> @llvm.x86.avx2.gather.d.q.256(<4 x i64>, i8*,
   2355                       <4 x i32>, <4 x i64>, i8) nounwind readonly
   2356 
   2357 define <2 x i64> @test_x86_avx2_gather_q_q(<2 x i64> %a0, i8* %a1, <2 x i64> %idx, <2 x i64> %mask) {
   2358 ; X86-LABEL: test_x86_avx2_gather_q_q:
   2359 ; X86:       ## %bb.0:
   2360 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2361 ; X86-NEXT:    vpgatherqq %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x48]
   2362 ; X86-NEXT:    retl ## encoding: [0xc3]
   2363 ;
   2364 ; X64-LABEL: test_x86_avx2_gather_q_q:
   2365 ; X64:       ## %bb.0:
   2366 ; X64-NEXT:    vpgatherqq %xmm2, (%rdi,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0xe9,0x91,0x04,0x4f]
   2367 ; X64-NEXT:    retq ## encoding: [0xc3]
   2368   %res = call <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64> %a0,
   2369                             i8* %a1, <2 x i64> %idx, <2 x i64> %mask, i8 2) ;
   2370   ret <2 x i64> %res
   2371 }
   2372 declare <2 x i64> @llvm.x86.avx2.gather.q.q(<2 x i64>, i8*,
   2373                       <2 x i64>, <2 x i64>, i8) nounwind readonly
   2374 
   2375 define <4 x i64> @test_x86_avx2_gather_q_q_256(<4 x i64> %a0, i8* %a1, <4 x i64> %idx, <4 x i64> %mask) {
   2376 ; X86-LABEL: test_x86_avx2_gather_q_q_256:
   2377 ; X86:       ## %bb.0:
   2378 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2379 ; X86-NEXT:    vpgatherqq %ymm2, (%eax,%ymm1,2), %ymm0 ## encoding: [0xc4,0xe2,0xed,0x91,0x04,0x48]
   2380 ; X86-NEXT:    retl ## encoding: [0xc3]
   2381 ;
   2382 ; X64-LABEL: test_x86_avx2_gather_q_q_256:
   2383 ; X64:       ## %bb.0:
   2384 ; X64-NEXT:    vpgatherqq %ymm2, (%rdi,%ymm1,2), %ymm0 ## encoding: [0xc4,0xe2,0xed,0x91,0x04,0x4f]
   2385 ; X64-NEXT:    retq ## encoding: [0xc3]
   2386   %res = call <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64> %a0,
   2387                             i8* %a1, <4 x i64> %idx, <4 x i64> %mask, i8 2) ;
   2388   ret <4 x i64> %res
   2389 }
   2390 declare <4 x i64> @llvm.x86.avx2.gather.q.q.256(<4 x i64>, i8*,
   2391                       <4 x i64>, <4 x i64>, i8) nounwind readonly
   2392 
   2393 define <4 x i32> @test_x86_avx2_gather_d_d(<4 x i32> %a0, i8* %a1, <4 x i32> %idx, <4 x i32> %mask) {
   2394 ; X86-LABEL: test_x86_avx2_gather_d_d:
   2395 ; X86:       ## %bb.0:
   2396 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2397 ; X86-NEXT:    vpgatherdd %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x90,0x04,0x48]
   2398 ; X86-NEXT:    retl ## encoding: [0xc3]
   2399 ;
   2400 ; X64-LABEL: test_x86_avx2_gather_d_d:
   2401 ; X64:       ## %bb.0:
   2402 ; X64-NEXT:    vpgatherdd %xmm2, (%rdi,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x90,0x04,0x4f]
   2403 ; X64-NEXT:    retq ## encoding: [0xc3]
   2404   %res = call <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32> %a0,
   2405                             i8* %a1, <4 x i32> %idx, <4 x i32> %mask, i8 2) ;
   2406   ret <4 x i32> %res
   2407 }
   2408 declare <4 x i32> @llvm.x86.avx2.gather.d.d(<4 x i32>, i8*,
   2409                       <4 x i32>, <4 x i32>, i8) nounwind readonly
   2410 
   2411 define <8 x i32> @test_x86_avx2_gather_d_d_256(<8 x i32> %a0, i8* %a1, <8 x i32> %idx, <8 x i32> %mask) {
   2412 ; X86-LABEL: test_x86_avx2_gather_d_d_256:
   2413 ; X86:       ## %bb.0:
   2414 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2415 ; X86-NEXT:    vpgatherdd %ymm2, (%eax,%ymm1,2), %ymm0 ## encoding: [0xc4,0xe2,0x6d,0x90,0x04,0x48]
   2416 ; X86-NEXT:    retl ## encoding: [0xc3]
   2417 ;
   2418 ; X64-LABEL: test_x86_avx2_gather_d_d_256:
   2419 ; X64:       ## %bb.0:
   2420 ; X64-NEXT:    vpgatherdd %ymm2, (%rdi,%ymm1,2), %ymm0 ## encoding: [0xc4,0xe2,0x6d,0x90,0x04,0x4f]
   2421 ; X64-NEXT:    retq ## encoding: [0xc3]
   2422   %res = call <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32> %a0,
   2423                             i8* %a1, <8 x i32> %idx, <8 x i32> %mask, i8 2) ;
   2424   ret <8 x i32> %res
   2425 }
   2426 declare <8 x i32> @llvm.x86.avx2.gather.d.d.256(<8 x i32>, i8*,
   2427                       <8 x i32>, <8 x i32>, i8) nounwind readonly
   2428 
   2429 define <4 x i32> @test_x86_avx2_gather_q_d(<4 x i32> %a0, i8* %a1, <2 x i64> %idx, <4 x i32> %mask) {
   2430 ; X86-LABEL: test_x86_avx2_gather_q_d:
   2431 ; X86:       ## %bb.0:
   2432 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2433 ; X86-NEXT:    vpgatherqd %xmm2, (%eax,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x91,0x04,0x48]
   2434 ; X86-NEXT:    retl ## encoding: [0xc3]
   2435 ;
   2436 ; X64-LABEL: test_x86_avx2_gather_q_d:
   2437 ; X64:       ## %bb.0:
   2438 ; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%xmm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x69,0x91,0x04,0x4f]
   2439 ; X64-NEXT:    retq ## encoding: [0xc3]
   2440   %res = call <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32> %a0,
   2441                             i8* %a1, <2 x i64> %idx, <4 x i32> %mask, i8 2) ;
   2442   ret <4 x i32> %res
   2443 }
   2444 declare <4 x i32> @llvm.x86.avx2.gather.q.d(<4 x i32>, i8*,
   2445                       <2 x i64>, <4 x i32>, i8) nounwind readonly
   2446 
   2447 define <4 x i32> @test_x86_avx2_gather_q_d_256(<4 x i32> %a0, i8* %a1, <4 x i64> %idx, <4 x i32> %mask) {
   2448 ; X86-LABEL: test_x86_avx2_gather_q_d_256:
   2449 ; X86:       ## %bb.0:
   2450 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2451 ; X86-NEXT:    vpgatherqd %xmm2, (%eax,%ymm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x6d,0x91,0x04,0x48]
   2452 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   2453 ; X86-NEXT:    retl ## encoding: [0xc3]
   2454 ;
   2455 ; X64-LABEL: test_x86_avx2_gather_q_d_256:
   2456 ; X64:       ## %bb.0:
   2457 ; X64-NEXT:    vpgatherqd %xmm2, (%rdi,%ymm1,2), %xmm0 ## encoding: [0xc4,0xe2,0x6d,0x91,0x04,0x4f]
   2458 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   2459 ; X64-NEXT:    retq ## encoding: [0xc3]
   2460   %res = call <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32> %a0,
   2461                             i8* %a1, <4 x i64> %idx, <4 x i32> %mask, i8 2) ;
   2462   ret <4 x i32> %res
   2463 }
   2464 declare <4 x i32> @llvm.x86.avx2.gather.q.d.256(<4 x i32>, i8*,
   2465                       <4 x i64>, <4 x i32>, i8) nounwind readonly
   2466 
   2467 ; PR13298
   2468 define <8 x float>  @test_gather_mask(<8 x float> %a0, float* %a, <8 x i32> %idx, <8 x float> %mask, float* nocapture %out) {
   2469 ;; gather with mask
   2470 ; X86-AVX-LABEL: test_gather_mask:
   2471 ; X86-AVX:       ## %bb.0:
   2472 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
   2473 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
   2474 ; X86-AVX-NEXT:    vmovaps %ymm2, %ymm3 ## encoding: [0xc5,0xfc,0x28,0xda]
   2475 ; X86-AVX-NEXT:    vgatherdps %ymm3, (%ecx,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x89]
   2476 ; X86-AVX-NEXT:    vmovups %ymm2, (%eax) ## encoding: [0xc5,0xfc,0x11,0x10]
   2477 ; X86-AVX-NEXT:    retl ## encoding: [0xc3]
   2478 ;
   2479 ; X86-AVX512VL-LABEL: test_gather_mask:
   2480 ; X86-AVX512VL:       ## %bb.0:
   2481 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
   2482 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
   2483 ; X86-AVX512VL-NEXT:    vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda]
   2484 ; X86-AVX512VL-NEXT:    vgatherdps %ymm3, (%ecx,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x89]
   2485 ; X86-AVX512VL-NEXT:    vmovups %ymm2, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x10]
   2486 ; X86-AVX512VL-NEXT:    retl ## encoding: [0xc3]
   2487 ;
   2488 ; X64-AVX-LABEL: test_gather_mask:
   2489 ; X64-AVX:       ## %bb.0:
   2490 ; X64-AVX-NEXT:    vmovaps %ymm2, %ymm3 ## encoding: [0xc5,0xfc,0x28,0xda]
   2491 ; X64-AVX-NEXT:    vgatherdps %ymm3, (%rdi,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x8f]
   2492 ; X64-AVX-NEXT:    vmovups %ymm2, (%rsi) ## encoding: [0xc5,0xfc,0x11,0x16]
   2493 ; X64-AVX-NEXT:    retq ## encoding: [0xc3]
   2494 ;
   2495 ; X64-AVX512VL-LABEL: test_gather_mask:
   2496 ; X64-AVX512VL:       ## %bb.0:
   2497 ; X64-AVX512VL-NEXT:    vmovaps %ymm2, %ymm3 ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xda]
   2498 ; X64-AVX512VL-NEXT:    vgatherdps %ymm3, (%rdi,%ymm1,4), %ymm0 ## encoding: [0xc4,0xe2,0x65,0x92,0x04,0x8f]
   2499 ; X64-AVX512VL-NEXT:    vmovups %ymm2, (%rsi) ## EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x16]
   2500 ; X64-AVX512VL-NEXT:    retq ## encoding: [0xc3]
   2501   %a_i8 = bitcast float* %a to i8*
   2502   %res = call <8 x float> @llvm.x86.avx2.gather.d.ps.256(<8 x float> %a0,
   2503                            i8* %a_i8, <8 x i32> %idx, <8 x float> %mask, i8 4) ;
   2504 
   2505 ;; for debugging, we'll just dump out the mask
   2506   %out_ptr = bitcast float * %out to <8 x float> *
   2507   store <8 x float> %mask, <8 x float> * %out_ptr, align 4
   2508 
   2509   ret <8 x float> %res
   2510 }
   2511