Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX
      3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX512VL,X86-AVX512VL
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX512VL,X64-AVX512VL
      6 
      7 ; We don't check any vinsertf128 variant with immediate 0 because that's just a blend.
      8 
      9 define <4 x double> @test_x86_avx_sqrt_pd_256(<4 x double> %a0) {
     10 ; AVX-LABEL: test_x86_avx_sqrt_pd_256:
     11 ; AVX:       # %bb.0:
     12 ; AVX-NEXT:    vsqrtpd %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x51,0xc0]
     13 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     14 ;
     15 ; AVX512VL-LABEL: test_x86_avx_sqrt_pd_256:
     16 ; AVX512VL:       # %bb.0:
     17 ; AVX512VL-NEXT:    vsqrtpd %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x51,0xc0]
     18 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     19   %res = call <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double> %a0) ; <<4 x double>> [#uses=1]
     20   ret <4 x double> %res
     21 }
     22 declare <4 x double> @llvm.x86.avx.sqrt.pd.256(<4 x double>) nounwind readnone
     23 
     24 define <8 x float> @test_x86_avx_sqrt_ps_256(<8 x float> %a0) {
     25 ; AVX-LABEL: test_x86_avx_sqrt_ps_256:
     26 ; AVX:       # %bb.0:
     27 ; AVX-NEXT:    vsqrtps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x51,0xc0]
     28 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     29 ;
     30 ; AVX512VL-LABEL: test_x86_avx_sqrt_ps_256:
     31 ; AVX512VL:       # %bb.0:
     32 ; AVX512VL-NEXT:    vsqrtps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x51,0xc0]
     33 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     34   %res = call <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float> %a0) ; <<8 x float>> [#uses=1]
     35   ret <8 x float> %res
     36 }
     37 declare <8 x float> @llvm.x86.avx.sqrt.ps.256(<8 x float>) nounwind readnone
     38 
     39 define <4 x double> @test_x86_avx_vinsertf128_pd_256_1(<4 x double> %a0, <2 x double> %a1) {
     40 ; AVX-LABEL: test_x86_avx_vinsertf128_pd_256_1:
     41 ; AVX:       # %bb.0:
     42 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
     43 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     44 ;
     45 ; AVX512VL-LABEL: test_x86_avx_vinsertf128_pd_256_1:
     46 ; AVX512VL:       # %bb.0:
     47 ; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
     48 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     49   %res = call <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double> %a0, <2 x double> %a1, i8 1)
     50   ret <4 x double> %res
     51 }
     52 declare <4 x double> @llvm.x86.avx.vinsertf128.pd.256(<4 x double>, <2 x double>, i8) nounwind readnone
     53 
     54 define <8 x float> @test_x86_avx_vinsertf128_ps_256_1(<8 x float> %a0, <4 x float> %a1) {
     55 ; AVX-LABEL: test_x86_avx_vinsertf128_ps_256_1:
     56 ; AVX:       # %bb.0:
     57 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
     58 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     59 ;
     60 ; AVX512VL-LABEL: test_x86_avx_vinsertf128_ps_256_1:
     61 ; AVX512VL:       # %bb.0:
     62 ; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
     63 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     64   %res = call <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float> %a0, <4 x float> %a1, i8 1)
     65   ret <8 x float> %res
     66 }
     67 declare <8 x float> @llvm.x86.avx.vinsertf128.ps.256(<8 x float>, <4 x float>, i8) nounwind readnone
     68 
     69 define <8 x i32> @test_x86_avx_vinsertf128_si_256_1(<8 x i32> %a0, <4 x i32> %a1) {
     70 ; AVX-LABEL: test_x86_avx_vinsertf128_si_256_1:
     71 ; AVX:       # %bb.0:
     72 ; AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
     73 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     74 ;
     75 ; AVX512VL-LABEL: test_x86_avx_vinsertf128_si_256_1:
     76 ; AVX512VL:       # %bb.0:
     77 ; AVX512VL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
     78 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     79   %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 1)
     80   ret <8 x i32> %res
     81 }
     82 
     83 ; Verify that high bits of the immediate are masked off. This should be the equivalent
     84 ; of a vinsertf128 $0 which should be optimized into a blend, so just check that it's
     85 ; not a vinsertf128 $1.
     86 define <8 x i32> @test_x86_avx_vinsertf128_si_256_2(<8 x i32> %a0, <4 x i32> %a1) {
     87 ; CHECK-LABEL: test_x86_avx_vinsertf128_si_256_2:
     88 ; CHECK:       # %bb.0:
     89 ; CHECK-NEXT:    # kill: def $xmm1 killed $xmm1 def $ymm1
     90 ; CHECK-NEXT:    vblendps $240, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x0c,0xc0,0xf0]
     91 ; CHECK-NEXT:    # ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
     92 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     93   %res = call <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32> %a0, <4 x i32> %a1, i8 2)
     94   ret <8 x i32> %res
     95 }
     96 declare <8 x i32> @llvm.x86.avx.vinsertf128.si.256(<8 x i32>, <4 x i32>, i8) nounwind readnone
     97 
     98 ; We don't check any vextractf128 variant with immediate 0 because that's just a move.
     99 
    100 define <2 x double> @test_x86_avx_vextractf128_pd_256_1(<4 x double> %a0) {
    101 ; AVX-LABEL: test_x86_avx_vextractf128_pd_256_1:
    102 ; AVX:       # %bb.0:
    103 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
    104 ; AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    105 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    106 ;
    107 ; AVX512VL-LABEL: test_x86_avx_vextractf128_pd_256_1:
    108 ; AVX512VL:       # %bb.0:
    109 ; AVX512VL-NEXT:    vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
    110 ; AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    111 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    112   %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 1)
    113   ret <2 x double> %res
    114 }
    115 declare <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double>, i8) nounwind readnone
    116 
    117 define <4 x float> @test_x86_avx_vextractf128_ps_256_1(<8 x float> %a0) {
    118 ; AVX-LABEL: test_x86_avx_vextractf128_ps_256_1:
    119 ; AVX:       # %bb.0:
    120 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
    121 ; AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    122 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    123 ;
    124 ; AVX512VL-LABEL: test_x86_avx_vextractf128_ps_256_1:
    125 ; AVX512VL:       # %bb.0:
    126 ; AVX512VL-NEXT:    vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
    127 ; AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    128 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    129   %res = call <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float> %a0, i8 1)
    130   ret <4 x float> %res
    131 }
    132 declare <4 x float> @llvm.x86.avx.vextractf128.ps.256(<8 x float>, i8) nounwind readnone
    133 
    134 define <4 x i32> @test_x86_avx_vextractf128_si_256_1(<8 x i32> %a0) {
    135 ; AVX-LABEL: test_x86_avx_vextractf128_si_256_1:
    136 ; AVX:       # %bb.0:
    137 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm0 # encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
    138 ; AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    139 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    140 ;
    141 ; AVX512VL-LABEL: test_x86_avx_vextractf128_si_256_1:
    142 ; AVX512VL:       # %bb.0:
    143 ; AVX512VL-NEXT:    vextractf128 $1, %ymm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc0,0x01]
    144 ; AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    145 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    146   %res = call <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32> %a0, i8 1)
    147   ret <4 x i32> %res
    148 }
    149 declare <4 x i32> @llvm.x86.avx.vextractf128.si.256(<8 x i32>, i8) nounwind readnone
    150 
    151 ; Verify that high bits of the immediate are masked off. This should be the equivalent
    152 ; of a vextractf128 $0 which should be optimized away, so just check that it's
    153 ; not a vextractf128 of any kind.
    154 define <2 x double> @test_x86_avx_extractf128_pd_256_2(<4 x double> %a0) {
    155 ; CHECK-LABEL: test_x86_avx_extractf128_pd_256_2:
    156 ; CHECK:       # %bb.0:
    157 ; CHECK-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    158 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    159 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    160   %res = call <2 x double> @llvm.x86.avx.vextractf128.pd.256(<4 x double> %a0, i8 2)
    161   ret <2 x double> %res
    162 }
    163 
    164 
    165 define <4 x double> @test_x86_avx_vbroadcastf128_pd_256(i8* %a0) {
    166 ; X86-AVX-LABEL: test_x86_avx_vbroadcastf128_pd_256:
    167 ; X86-AVX:       # %bb.0:
    168 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    169 ; X86-AVX-NEXT:    vbroadcastf128 (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x00]
    170 ; X86-AVX-NEXT:    # ymm0 = mem[0,1,0,1]
    171 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
    172 ;
    173 ; X86-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_pd_256:
    174 ; X86-AVX512VL:       # %bb.0:
    175 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    176 ; X86-AVX512VL-NEXT:    vbroadcastf128 (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x00]
    177 ; X86-AVX512VL-NEXT:    # ymm0 = mem[0,1,0,1]
    178 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
    179 ;
    180 ; X64-AVX-LABEL: test_x86_avx_vbroadcastf128_pd_256:
    181 ; X64-AVX:       # %bb.0:
    182 ; X64-AVX-NEXT:    vbroadcastf128 (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x07]
    183 ; X64-AVX-NEXT:    # ymm0 = mem[0,1,0,1]
    184 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
    185 ;
    186 ; X64-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_pd_256:
    187 ; X64-AVX512VL:       # %bb.0:
    188 ; X64-AVX512VL-NEXT:    vbroadcastf128 (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x07]
    189 ; X64-AVX512VL-NEXT:    # ymm0 = mem[0,1,0,1]
    190 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
    191   %res = call <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8* %a0) ; <<4 x double>> [#uses=1]
    192   ret <4 x double> %res
    193 }
    194 declare <4 x double> @llvm.x86.avx.vbroadcastf128.pd.256(i8*) nounwind readonly
    195 
    196 
    197 define <8 x float> @test_x86_avx_vbroadcastf128_ps_256(i8* %a0) {
    198 ; X86-AVX-LABEL: test_x86_avx_vbroadcastf128_ps_256:
    199 ; X86-AVX:       # %bb.0:
    200 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    201 ; X86-AVX-NEXT:    vbroadcastf128 (%eax), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x00]
    202 ; X86-AVX-NEXT:    # ymm0 = mem[0,1,0,1]
    203 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
    204 ;
    205 ; X86-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_ps_256:
    206 ; X86-AVX512VL:       # %bb.0:
    207 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    208 ; X86-AVX512VL-NEXT:    vbroadcastf128 (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x00]
    209 ; X86-AVX512VL-NEXT:    # ymm0 = mem[0,1,0,1]
    210 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
    211 ;
    212 ; X64-AVX-LABEL: test_x86_avx_vbroadcastf128_ps_256:
    213 ; X64-AVX:       # %bb.0:
    214 ; X64-AVX-NEXT:    vbroadcastf128 (%rdi), %ymm0 # encoding: [0xc4,0xe2,0x7d,0x1a,0x07]
    215 ; X64-AVX-NEXT:    # ymm0 = mem[0,1,0,1]
    216 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
    217 ;
    218 ; X64-AVX512VL-LABEL: test_x86_avx_vbroadcastf128_ps_256:
    219 ; X64-AVX512VL:       # %bb.0:
    220 ; X64-AVX512VL-NEXT:    vbroadcastf128 (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1a,0x07]
    221 ; X64-AVX512VL-NEXT:    # ymm0 = mem[0,1,0,1]
    222 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
    223   %res = call <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8* %a0) ; <<8 x float>> [#uses=1]
    224   ret <8 x float> %res
    225 }
    226 declare <8 x float> @llvm.x86.avx.vbroadcastf128.ps.256(i8*) nounwind readonly
    227 
    228 
    229 define <4 x double> @test_x86_avx_blend_pd_256(<4 x double> %a0, <4 x double> %a1) {
    230 ; CHECK-LABEL: test_x86_avx_blend_pd_256:
    231 ; CHECK:       # %bb.0:
    232 ; CHECK-NEXT:    vblendps $192, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x0c,0xc0,0xc0]
    233 ; CHECK-NEXT:    # ymm0 = ymm1[0,1,2,3,4,5],ymm0[6,7]
    234 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    235   %res = call <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double> %a0, <4 x double> %a1, i32 7) ; <<4 x double>> [#uses=1]
    236   ret <4 x double> %res
    237 }
    238 declare <4 x double> @llvm.x86.avx.blend.pd.256(<4 x double>, <4 x double>, i32) nounwind readnone
    239 
    240 
    241 define <8 x float> @test_x86_avx_blend_ps_256(<8 x float> %a0, <8 x float> %a1) {
    242 ; CHECK-LABEL: test_x86_avx_blend_ps_256:
    243 ; CHECK:       # %bb.0:
    244 ; CHECK-NEXT:    vblendps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0x07]
    245 ; CHECK-NEXT:    # ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
    246 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    247   %res = call <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
    248   ret <8 x float> %res
    249 }
    250 declare <8 x float> @llvm.x86.avx.blend.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
    251 
    252 
    253 define <8 x float> @test_x86_avx_dp_ps_256(<8 x float> %a0, <8 x float> %a1) {
    254 ; CHECK-LABEL: test_x86_avx_dp_ps_256:
    255 ; CHECK:       # %bb.0:
    256 ; CHECK-NEXT:    vdpps $7, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x40,0xc1,0x07]
    257 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    258   %res = call <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float> %a0, <8 x float> %a1, i32 7) ; <<8 x float>> [#uses=1]
    259   ret <8 x float> %res
    260 }
    261 declare <8 x float> @llvm.x86.avx.dp.ps.256(<8 x float>, <8 x float>, i32) nounwind readnone
    262 
    263 
    264 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
    265 ; AVX-LABEL: test_x86_sse2_psll_dq:
    266 ; AVX:       # %bb.0:
    267 ; AVX-NEXT:    vpslldq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x01]
    268 ; AVX-NEXT:    # xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    269 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    270 ;
    271 ; AVX512VL-LABEL: test_x86_sse2_psll_dq:
    272 ; AVX512VL:       # %bb.0:
    273 ; AVX512VL-NEXT:    vpslldq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x01]
    274 ; AVX512VL-NEXT:    # xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    275 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    276   %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
    277   ret <2 x i64> %res
    278 }
    279 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
    280 
    281 
    282 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
    283 ; AVX-LABEL: test_x86_sse2_psrl_dq:
    284 ; AVX:       # %bb.0:
    285 ; AVX-NEXT:    vpsrldq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x01]
    286 ; AVX-NEXT:    # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
    287 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    288 ;
    289 ; AVX512VL-LABEL: test_x86_sse2_psrl_dq:
    290 ; AVX512VL:       # %bb.0:
    291 ; AVX512VL-NEXT:    vpsrldq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x01]
    292 ; AVX512VL-NEXT:    # xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
    293 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    294   %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
    295   ret <2 x i64> %res
    296 }
    297 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
    298 
    299 
    300 define <2 x double> @test_x86_sse41_blendpd(<2 x double> %a0, <2 x double> %a1) {
    301 ; CHECK-LABEL: test_x86_sse41_blendpd:
    302 ; CHECK:       # %bb.0:
    303 ; CHECK-NEXT:    vblendps $3, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x03]
    304 ; CHECK-NEXT:    # xmm0 = xmm0[0,1],xmm1[2,3]
    305 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    306   %res = call <2 x double> @llvm.x86.sse41.blendpd(<2 x double> %a0, <2 x double> %a1, i8 2) ; <<2 x double>> [#uses=1]
    307   ret <2 x double> %res
    308 }
    309 declare <2 x double> @llvm.x86.sse41.blendpd(<2 x double>, <2 x double>, i8) nounwind readnone
    310 
    311 
    312 define <4 x float> @test_x86_sse41_blendps(<4 x float> %a0, <4 x float> %a1) {
    313 ; CHECK-LABEL: test_x86_sse41_blendps:
    314 ; CHECK:       # %bb.0:
    315 ; CHECK-NEXT:    vblendps $8, %xmm0, %xmm1, %xmm0 # encoding: [0xc4,0xe3,0x71,0x0c,0xc0,0x08]
    316 ; CHECK-NEXT:    # xmm0 = xmm1[0,1,2],xmm0[3]
    317 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    318   %res = call <4 x float> @llvm.x86.sse41.blendps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
    319   ret <4 x float> %res
    320 }
    321 declare <4 x float> @llvm.x86.sse41.blendps(<4 x float>, <4 x float>, i8) nounwind readnone
    322 
    323 
    324 define <8 x i16> @test_x86_sse41_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
    325 ; CHECK-LABEL: test_x86_sse41_pblendw:
    326 ; CHECK:       # %bb.0:
    327 ; CHECK-NEXT:    vpblendw $7, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x07]
    328 ; CHECK-NEXT:    # xmm0 = xmm1[0,1,2],xmm0[3,4,5,6,7]
    329 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    330   %res = call <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16> %a0, <8 x i16> %a1, i8 7) ; <<8 x i16>> [#uses=1]
    331   ret <8 x i16> %res
    332 }
    333 declare <8 x i16> @llvm.x86.sse41.pblendw(<8 x i16>, <8 x i16>, i8) nounwind readnone
    334 
    335 
    336 define <4 x i32> @test_x86_sse41_pmovsxbd(<16 x i8> %a0) {
    337 ; AVX-LABEL: test_x86_sse41_pmovsxbd:
    338 ; AVX:       # %bb.0:
    339 ; AVX-NEXT:    vpmovsxbd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x21,0xc0]
    340 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    341 ;
    342 ; AVX512VL-LABEL: test_x86_sse41_pmovsxbd:
    343 ; AVX512VL:       # %bb.0:
    344 ; AVX512VL-NEXT:    vpmovsxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xc0]
    345 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    346   %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
    347   ret <4 x i32> %res
    348 }
    349 declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
    350 
    351 
    352 define <2 x i64> @test_x86_sse41_pmovsxbq(<16 x i8> %a0) {
    353 ; AVX-LABEL: test_x86_sse41_pmovsxbq:
    354 ; AVX:       # %bb.0:
    355 ; AVX-NEXT:    vpmovsxbq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x22,0xc0]
    356 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    357 ;
    358 ; AVX512VL-LABEL: test_x86_sse41_pmovsxbq:
    359 ; AVX512VL:       # %bb.0:
    360 ; AVX512VL-NEXT:    vpmovsxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xc0]
    361 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    362   %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
    363   ret <2 x i64> %res
    364 }
    365 declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
    366 
    367 
    368 define <8 x i16> @test_x86_sse41_pmovsxbw(<16 x i8> %a0) {
    369 ; AVX-LABEL: test_x86_sse41_pmovsxbw:
    370 ; AVX:       # %bb.0:
    371 ; AVX-NEXT:    vpmovsxbw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x20,0xc0]
    372 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    373 ;
    374 ; AVX512VL-LABEL: test_x86_sse41_pmovsxbw:
    375 ; AVX512VL:       # %bb.0:
    376 ; AVX512VL-NEXT:    vpmovsxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xc0]
    377 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    378   %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
    379   ret <8 x i16> %res
    380 }
    381 declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
    382 
    383 
    384 define <2 x i64> @test_x86_sse41_pmovsxdq(<4 x i32> %a0) {
    385 ; AVX-LABEL: test_x86_sse41_pmovsxdq:
    386 ; AVX:       # %bb.0:
    387 ; AVX-NEXT:    vpmovsxdq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x25,0xc0]
    388 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    389 ;
    390 ; AVX512VL-LABEL: test_x86_sse41_pmovsxdq:
    391 ; AVX512VL:       # %bb.0:
    392 ; AVX512VL-NEXT:    vpmovsxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xc0]
    393 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    394   %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
    395   ret <2 x i64> %res
    396 }
    397 declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
    398 
    399 
    400 define <4 x i32> @test_x86_sse41_pmovsxwd(<8 x i16> %a0) {
    401 ; AVX-LABEL: test_x86_sse41_pmovsxwd:
    402 ; AVX:       # %bb.0:
    403 ; AVX-NEXT:    vpmovsxwd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x23,0xc0]
    404 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    405 ;
    406 ; AVX512VL-LABEL: test_x86_sse41_pmovsxwd:
    407 ; AVX512VL:       # %bb.0:
    408 ; AVX512VL-NEXT:    vpmovsxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xc0]
    409 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    410   %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
    411   ret <4 x i32> %res
    412 }
    413 declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
    414 
    415 
    416 define <2 x i64> @test_x86_sse41_pmovsxwq(<8 x i16> %a0) {
    417 ; AVX-LABEL: test_x86_sse41_pmovsxwq:
    418 ; AVX:       # %bb.0:
    419 ; AVX-NEXT:    vpmovsxwq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x24,0xc0]
    420 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    421 ;
    422 ; AVX512VL-LABEL: test_x86_sse41_pmovsxwq:
    423 ; AVX512VL:       # %bb.0:
    424 ; AVX512VL-NEXT:    vpmovsxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xc0]
    425 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    426   %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
    427   ret <2 x i64> %res
    428 }
    429 declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
    430 
    431 
    432 define <4 x i32> @test_x86_sse41_pmovzxbd(<16 x i8> %a0) {
    433 ; AVX-LABEL: test_x86_sse41_pmovzxbd:
    434 ; AVX:       # %bb.0:
    435 ; AVX-NEXT:    vpmovzxbd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x31,0xc0]
    436 ; AVX-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    437 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    438 ;
    439 ; AVX512VL-LABEL: test_x86_sse41_pmovzxbd:
    440 ; AVX512VL:       # %bb.0:
    441 ; AVX512VL-NEXT:    vpmovzxbd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xc0]
    442 ; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
    443 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    444   %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %a0) ; <<4 x i32>> [#uses=1]
    445   ret <4 x i32> %res
    446 }
    447 declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
    448 
    449 
    450 define <2 x i64> @test_x86_sse41_pmovzxbq(<16 x i8> %a0) {
    451 ; AVX-LABEL: test_x86_sse41_pmovzxbq:
    452 ; AVX:       # %bb.0:
    453 ; AVX-NEXT:    vpmovzxbq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x32,0xc0]
    454 ; AVX-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    455 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    456 ;
    457 ; AVX512VL-LABEL: test_x86_sse41_pmovzxbq:
    458 ; AVX512VL:       # %bb.0:
    459 ; AVX512VL-NEXT:    vpmovzxbq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xc0]
    460 ; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
    461 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    462   %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %a0) ; <<2 x i64>> [#uses=1]
    463   ret <2 x i64> %res
    464 }
    465 declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
    466 
    467 
    468 define <8 x i16> @test_x86_sse41_pmovzxbw(<16 x i8> %a0) {
    469 ; AVX-LABEL: test_x86_sse41_pmovzxbw:
    470 ; AVX:       # %bb.0:
    471 ; AVX-NEXT:    vpmovzxbw %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x30,0xc0]
    472 ; AVX-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    473 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    474 ;
    475 ; AVX512VL-LABEL: test_x86_sse41_pmovzxbw:
    476 ; AVX512VL:       # %bb.0:
    477 ; AVX512VL-NEXT:    vpmovzxbw %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xc0]
    478 ; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
    479 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    480   %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %a0) ; <<8 x i16>> [#uses=1]
    481   ret <8 x i16> %res
    482 }
    483 declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
    484 
    485 
    486 define <2 x i64> @test_x86_sse41_pmovzxdq(<4 x i32> %a0) {
    487 ; AVX-LABEL: test_x86_sse41_pmovzxdq:
    488 ; AVX:       # %bb.0:
    489 ; AVX-NEXT:    vpmovzxdq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x35,0xc0]
    490 ; AVX-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero
    491 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    492 ;
    493 ; AVX512VL-LABEL: test_x86_sse41_pmovzxdq:
    494 ; AVX512VL:       # %bb.0:
    495 ; AVX512VL-NEXT:    vpmovzxdq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xc0]
    496 ; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero
    497 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    498   %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %a0) ; <<2 x i64>> [#uses=1]
    499   ret <2 x i64> %res
    500 }
    501 declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
    502 
    503 
    504 define <4 x i32> @test_x86_sse41_pmovzxwd(<8 x i16> %a0) {
    505 ; AVX-LABEL: test_x86_sse41_pmovzxwd:
    506 ; AVX:       # %bb.0:
    507 ; AVX-NEXT:    vpmovzxwd %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x33,0xc0]
    508 ; AVX-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    509 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    510 ;
    511 ; AVX512VL-LABEL: test_x86_sse41_pmovzxwd:
    512 ; AVX512VL:       # %bb.0:
    513 ; AVX512VL-NEXT:    vpmovzxwd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xc0]
    514 ; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
    515 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    516   %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %a0) ; <<4 x i32>> [#uses=1]
    517   ret <4 x i32> %res
    518 }
    519 declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
    520 
    521 
    522 define <2 x i64> @test_x86_sse41_pmovzxwq(<8 x i16> %a0) {
    523 ; AVX-LABEL: test_x86_sse41_pmovzxwq:
    524 ; AVX:       # %bb.0:
    525 ; AVX-NEXT:    vpmovzxwq %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x34,0xc0]
    526 ; AVX-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    527 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    528 ;
    529 ; AVX512VL-LABEL: test_x86_sse41_pmovzxwq:
    530 ; AVX512VL:       # %bb.0:
    531 ; AVX512VL-NEXT:    vpmovzxwq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xc0]
    532 ; AVX512VL-NEXT:    # xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
    533 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    534   %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %a0) ; <<2 x i64>> [#uses=1]
    535   ret <2 x i64> %res
    536 }
    537 declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
    538 
    539 
    540 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
    541 ; AVX-LABEL: test_x86_sse2_cvtdq2pd:
    542 ; AVX:       # %bb.0:
    543 ; AVX-NEXT:    vcvtdq2pd %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xe6,0xc0]
    544 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    545 ;
    546 ; AVX512VL-LABEL: test_x86_sse2_cvtdq2pd:
    547 ; AVX512VL:       # %bb.0:
    548 ; AVX512VL-NEXT:    vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0]
    549 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    550   %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
    551   ret <2 x double> %res
    552 }
    553 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
    554 
    555 
    556 define <4 x double> @test_x86_avx_cvtdq2_pd_256(<4 x i32> %a0) {
    557 ; AVX-LABEL: test_x86_avx_cvtdq2_pd_256:
    558 ; AVX:       # %bb.0:
    559 ; AVX-NEXT:    vcvtdq2pd %xmm0, %ymm0 # encoding: [0xc5,0xfe,0xe6,0xc0]
    560 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    561 ;
    562 ; AVX512VL-LABEL: test_x86_avx_cvtdq2_pd_256:
    563 ; AVX512VL:       # %bb.0:
    564 ; AVX512VL-NEXT:    vcvtdq2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xc0]
    565 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    566   %res = call <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32> %a0) ; <<4 x double>> [#uses=1]
    567   ret <4 x double> %res
    568 }
    569 declare <4 x double> @llvm.x86.avx.cvtdq2.pd.256(<4 x i32>) nounwind readnone
    570 
    571 
    572 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
    573 ; AVX-LABEL: test_x86_sse2_cvtps2pd:
    574 ; AVX:       # %bb.0:
    575 ; AVX-NEXT:    vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0]
    576 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    577 ;
    578 ; AVX512VL-LABEL: test_x86_sse2_cvtps2pd:
    579 ; AVX512VL:       # %bb.0:
    580 ; AVX512VL-NEXT:    vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
    581 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    582   %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
    583   ret <2 x double> %res
    584 }
    585 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
    586 
    587 
    588 define <4 x double> @test_x86_avx_cvt_ps2_pd_256(<4 x float> %a0) {
    589 ; AVX-LABEL: test_x86_avx_cvt_ps2_pd_256:
    590 ; AVX:       # %bb.0:
    591 ; AVX-NEXT:    vcvtps2pd %xmm0, %ymm0 # encoding: [0xc5,0xfc,0x5a,0xc0]
    592 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    593 ;
    594 ; AVX512VL-LABEL: test_x86_avx_cvt_ps2_pd_256:
    595 ; AVX512VL:       # %bb.0:
    596 ; AVX512VL-NEXT:    vcvtps2pd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xc0]
    597 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    598   %res = call <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float> %a0) ; <<4 x double>> [#uses=1]
    599   ret <4 x double> %res
    600 }
    601 declare <4 x double> @llvm.x86.avx.cvt.ps2.pd.256(<4 x float>) nounwind readnone
    602 
    603 
    604 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
    605   ; add operation forces the execution domain.
    606 ; X86-AVX-LABEL: test_x86_sse2_storeu_dq:
    607 ; X86-AVX:       # %bb.0:
    608 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    609 ; X86-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
    610 ; X86-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
    611 ; X86-AVX-NEXT:    vmovdqu %xmm0, (%eax) # encoding: [0xc5,0xfa,0x7f,0x00]
    612 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
    613 ;
    614 ; X86-AVX512VL-LABEL: test_x86_sse2_storeu_dq:
    615 ; X86-AVX512VL:       # %bb.0:
    616 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    617 ; X86-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
    618 ; X86-AVX512VL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
    619 ; X86-AVX512VL-NEXT:    vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
    620 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
    621 ;
    622 ; X64-AVX-LABEL: test_x86_sse2_storeu_dq:
    623 ; X64-AVX:       # %bb.0:
    624 ; X64-AVX-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
    625 ; X64-AVX-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1]
    626 ; X64-AVX-NEXT:    vmovdqu %xmm0, (%rdi) # encoding: [0xc5,0xfa,0x7f,0x07]
    627 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
    628 ;
    629 ; X64-AVX512VL-LABEL: test_x86_sse2_storeu_dq:
    630 ; X64-AVX512VL:       # %bb.0:
    631 ; X64-AVX512VL-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
    632 ; X64-AVX512VL-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
    633 ; X64-AVX512VL-NEXT:    vmovdqu %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x07]
    634 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
    635   %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    636   call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
    637   ret void
    638 }
    639 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
    640 
    641 
    642 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
    643   ; fadd operation forces the execution domain.
    644 ; X86-AVX-LABEL: test_x86_sse2_storeu_pd:
    645 ; X86-AVX:       # %bb.0:
    646 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    647 ; X86-AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
    648 ; X86-AVX-NEXT:    vmovhpd {{\.LCPI.*}}, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
    649 ; X86-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
    650 ; X86-AVX-NEXT:    # xmm1 = xmm1[0],mem[0]
    651 ; X86-AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1]
    652 ; X86-AVX-NEXT:    vmovupd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x11,0x00]
    653 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
    654 ;
    655 ; X86-AVX512VL-LABEL: test_x86_sse2_storeu_pd:
    656 ; X86-AVX512VL:       # %bb.0:
    657 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    658 ; X86-AVX512VL-NEXT:    vmovsd {{\.LCPI.*}}, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A]
    659 ; X86-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
    660 ; X86-AVX512VL-NEXT:    # xmm1 = mem[0],zero
    661 ; X86-AVX512VL-NEXT:    vpslldq $8, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08]
    662 ; X86-AVX512VL-NEXT:    # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
    663 ; X86-AVX512VL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
    664 ; X86-AVX512VL-NEXT:    vmovupd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00]
    665 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
    666 ;
    667 ; X64-AVX-LABEL: test_x86_sse2_storeu_pd:
    668 ; X64-AVX:       # %bb.0:
    669 ; X64-AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
    670 ; X64-AVX-NEXT:    vmovhpd {{.*}}(%rip), %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
    671 ; X64-AVX-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
    672 ; X64-AVX-NEXT:    # xmm1 = xmm1[0],mem[0]
    673 ; X64-AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1]
    674 ; X64-AVX-NEXT:    vmovupd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x11,0x07]
    675 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
    676 ;
    677 ; X64-AVX512VL-LABEL: test_x86_sse2_storeu_pd:
    678 ; X64-AVX512VL:       # %bb.0:
    679 ; X64-AVX512VL-NEXT:    vmovsd {{.*}}(%rip), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A]
    680 ; X64-AVX512VL-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
    681 ; X64-AVX512VL-NEXT:    # xmm1 = mem[0],zero
    682 ; X64-AVX512VL-NEXT:    vpslldq $8, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08]
    683 ; X64-AVX512VL-NEXT:    # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
    684 ; X64-AVX512VL-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
    685 ; X64-AVX512VL-NEXT:    vmovupd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x07]
    686 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
    687   %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
    688   call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
    689   ret void
    690 }
    691 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
    692 
    693 
    694 define void @test_x86_sse_storeu_ps(i8* %a0, <4 x float> %a1) {
    695 ; X86-AVX-LABEL: test_x86_sse_storeu_ps:
    696 ; X86-AVX:       # %bb.0:
    697 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    698 ; X86-AVX-NEXT:    vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00]
    699 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
    700 ;
    701 ; X86-AVX512VL-LABEL: test_x86_sse_storeu_ps:
    702 ; X86-AVX512VL:       # %bb.0:
    703 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    704 ; X86-AVX512VL-NEXT:    vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
    705 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
    706 ;
    707 ; X64-AVX-LABEL: test_x86_sse_storeu_ps:
    708 ; X64-AVX:       # %bb.0:
    709 ; X64-AVX-NEXT:    vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07]
    710 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
    711 ;
    712 ; X64-AVX512VL-LABEL: test_x86_sse_storeu_ps:
    713 ; X64-AVX512VL:       # %bb.0:
    714 ; X64-AVX512VL-NEXT:    vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07]
    715 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
    716   call void @llvm.x86.sse.storeu.ps(i8* %a0, <4 x float> %a1)
    717   ret void
    718 }
    719 declare void @llvm.x86.sse.storeu.ps(i8*, <4 x float>) nounwind
    720 
    721 
    722 define void @test_x86_avx_storeu_dq_256(i8* %a0, <32 x i8> %a1) {
    723   ; FIXME: unfortunately the execution domain fix pass changes this to vmovups and its hard to force with no 256-bit integer instructions
    724   ; add operation forces the execution domain.
    725 ; X86-AVX-LABEL: test_x86_avx_storeu_dq_256:
    726 ; X86-AVX:       # %bb.0:
    727 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    728 ; X86-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1 # encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01]
    729 ; X86-AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
    730 ; X86-AVX-NEXT:    vpsubb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xf8,0xca]
    731 ; X86-AVX-NEXT:    vpsubb %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc2]
    732 ; X86-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
    733 ; X86-AVX-NEXT:    vmovups %ymm0, (%eax) # encoding: [0xc5,0xfc,0x11,0x00]
    734 ; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    735 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
    736 ;
    737 ; X86-AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
    738 ; X86-AVX512VL:       # %bb.0:
    739 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    740 ; X86-AVX512VL-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9]
    741 ; X86-AVX512VL-NEXT:    vpsubb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf8,0xc1]
    742 ; X86-AVX512VL-NEXT:    vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
    743 ; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    744 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
    745 ;
    746 ; X64-AVX-LABEL: test_x86_avx_storeu_dq_256:
    747 ; X64-AVX:       # %bb.0:
    748 ; X64-AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1 # encoding: [0xc4,0xe3,0x7d,0x19,0xc1,0x01]
    749 ; X64-AVX-NEXT:    vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2]
    750 ; X64-AVX-NEXT:    vpsubb %xmm2, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xf8,0xca]
    751 ; X64-AVX-NEXT:    vpsubb %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc2]
    752 ; X64-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x18,0xc1,0x01]
    753 ; X64-AVX-NEXT:    vmovups %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x11,0x07]
    754 ; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    755 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
    756 ;
    757 ; X64-AVX512VL-LABEL: test_x86_avx_storeu_dq_256:
    758 ; X64-AVX512VL:       # %bb.0:
    759 ; X64-AVX512VL-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9]
    760 ; X64-AVX512VL-NEXT:    vpsubb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf8,0xc1]
    761 ; X64-AVX512VL-NEXT:    vmovdqu %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x07]
    762 ; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    763 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
    764   %a2 = add <32 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    765   call void @llvm.x86.avx.storeu.dq.256(i8* %a0, <32 x i8> %a2)
    766   ret void
    767 }
    768 declare void @llvm.x86.avx.storeu.dq.256(i8*, <32 x i8>) nounwind
    769 
    770 
    771 define void @test_x86_avx_storeu_pd_256(i8* %a0, <4 x double> %a1) {
    772   ; add operation forces the execution domain.
    773 ; X86-AVX-LABEL: test_x86_avx_storeu_pd_256:
    774 ; X86-AVX:       # %bb.0:
    775 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    776 ; X86-AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
    777 ; X86-AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
    778 ; X86-AVX-NEXT:    vmovupd %ymm0, (%eax) # encoding: [0xc5,0xfd,0x11,0x00]
    779 ; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    780 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
    781 ;
    782 ; X86-AVX512VL-LABEL: test_x86_avx_storeu_pd_256:
    783 ; X86-AVX512VL:       # %bb.0:
    784 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    785 ; X86-AVX512VL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
    786 ; X86-AVX512VL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
    787 ; X86-AVX512VL-NEXT:    vmovupd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x00]
    788 ; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    789 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
    790 ;
    791 ; X64-AVX-LABEL: test_x86_avx_storeu_pd_256:
    792 ; X64-AVX:       # %bb.0:
    793 ; X64-AVX-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x57,0xc9]
    794 ; X64-AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0x58,0xc1]
    795 ; X64-AVX-NEXT:    vmovupd %ymm0, (%rdi) # encoding: [0xc5,0xfd,0x11,0x07]
    796 ; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    797 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
    798 ;
    799 ; X64-AVX512VL-LABEL: test_x86_avx_storeu_pd_256:
    800 ; X64-AVX512VL:       # %bb.0:
    801 ; X64-AVX512VL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x57,0xc9]
    802 ; X64-AVX512VL-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
    803 ; X64-AVX512VL-NEXT:    vmovupd %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x07]
    804 ; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    805 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
    806   %a2 = fadd <4 x double> %a1, <double 0x0, double 0x0, double 0x0, double 0x0>
    807   call void @llvm.x86.avx.storeu.pd.256(i8* %a0, <4 x double> %a2)
    808   ret void
    809 }
    810 declare void @llvm.x86.avx.storeu.pd.256(i8*, <4 x double>) nounwind
    811 
    812 
    813 define void @test_x86_avx_storeu_ps_256(i8* %a0, <8 x float> %a1) {
    814 ; X86-AVX-LABEL: test_x86_avx_storeu_ps_256:
    815 ; X86-AVX:       # %bb.0:
    816 ; X86-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    817 ; X86-AVX-NEXT:    vmovups %ymm0, (%eax) # encoding: [0xc5,0xfc,0x11,0x00]
    818 ; X86-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    819 ; X86-AVX-NEXT:    retl # encoding: [0xc3]
    820 ;
    821 ; X86-AVX512VL-LABEL: test_x86_avx_storeu_ps_256:
    822 ; X86-AVX512VL:       # %bb.0:
    823 ; X86-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    824 ; X86-AVX512VL-NEXT:    vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00]
    825 ; X86-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    826 ; X86-AVX512VL-NEXT:    retl # encoding: [0xc3]
    827 ;
    828 ; X64-AVX-LABEL: test_x86_avx_storeu_ps_256:
    829 ; X64-AVX:       # %bb.0:
    830 ; X64-AVX-NEXT:    vmovups %ymm0, (%rdi) # encoding: [0xc5,0xfc,0x11,0x07]
    831 ; X64-AVX-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    832 ; X64-AVX-NEXT:    retq # encoding: [0xc3]
    833 ;
    834 ; X64-AVX512VL-LABEL: test_x86_avx_storeu_ps_256:
    835 ; X64-AVX512VL:       # %bb.0:
    836 ; X64-AVX512VL-NEXT:    vmovups %ymm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x07]
    837 ; X64-AVX512VL-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    838 ; X64-AVX512VL-NEXT:    retq # encoding: [0xc3]
    839   call void @llvm.x86.avx.storeu.ps.256(i8* %a0, <8 x float> %a1)
    840   ret void
    841 }
    842 declare void @llvm.x86.avx.storeu.ps.256(i8*, <8 x float>) nounwind
    843 
    844 
    845 define <2 x double> @test_x86_avx_vpermil_pd(<2 x double> %a0) {
    846 ; AVX-LABEL: test_x86_avx_vpermil_pd:
    847 ; AVX:       # %bb.0:
    848 ; AVX-NEXT:    vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
    849 ; AVX-NEXT:    # xmm0 = xmm0[1,0]
    850 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    851 ;
    852 ; AVX512VL-LABEL: test_x86_avx_vpermil_pd:
    853 ; AVX512VL:       # %bb.0:
    854 ; AVX512VL-NEXT:    vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01]
    855 ; AVX512VL-NEXT:    # xmm0 = xmm0[1,0]
    856 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    857   %res = call <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double> %a0, i8 1) ; <<2 x double>> [#uses=1]
    858   ret <2 x double> %res
    859 }
    860 declare <2 x double> @llvm.x86.avx.vpermil.pd(<2 x double>, i8) nounwind readnone
    861 
    862 
    863 define <4 x double> @test_x86_avx_vpermil_pd_256(<4 x double> %a0) {
    864 ; AVX-LABEL: test_x86_avx_vpermil_pd_256:
    865 ; AVX:       # %bb.0:
    866 ; AVX-NEXT:    vpermilpd $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x07]
    867 ; AVX-NEXT:    # ymm0 = ymm0[1,1,3,2]
    868 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    869 ;
    870 ; AVX512VL-LABEL: test_x86_avx_vpermil_pd_256:
    871 ; AVX512VL:       # %bb.0:
    872 ; AVX512VL-NEXT:    vpermilpd $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xc0,0x07]
    873 ; AVX512VL-NEXT:    # ymm0 = ymm0[1,1,3,2]
    874 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    875   %res = call <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double> %a0, i8 7) ; <<4 x double>> [#uses=1]
    876   ret <4 x double> %res
    877 }
    878 declare <4 x double> @llvm.x86.avx.vpermil.pd.256(<4 x double>, i8) nounwind readnone
    879 
    880 
    881 define <4 x float> @test_x86_avx_vpermil_ps(<4 x float> %a0) {
    882 ; AVX-LABEL: test_x86_avx_vpermil_ps:
    883 ; AVX:       # %bb.0:
    884 ; AVX-NEXT:    vpermilps $7, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x07]
    885 ; AVX-NEXT:    # xmm0 = xmm0[3,1,0,0]
    886 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    887 ;
    888 ; AVX512VL-LABEL: test_x86_avx_vpermil_ps:
    889 ; AVX512VL:       # %bb.0:
    890 ; AVX512VL-NEXT:    vpermilps $7, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x07]
    891 ; AVX512VL-NEXT:    # xmm0 = xmm0[3,1,0,0]
    892 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    893   %res = call <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float> %a0, i8 7) ; <<4 x float>> [#uses=1]
    894   ret <4 x float> %res
    895 }
    896 declare <4 x float> @llvm.x86.avx.vpermil.ps(<4 x float>, i8) nounwind readnone
    897 
    898 
    899 define <8 x float> @test_x86_avx_vpermil_ps_256(<8 x float> %a0) {
    900 ; AVX-LABEL: test_x86_avx_vpermil_ps_256:
    901 ; AVX:       # %bb.0:
    902 ; AVX-NEXT:    vpermilps $7, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x07]
    903 ; AVX-NEXT:    # ymm0 = ymm0[3,1,0,0,7,5,4,4]
    904 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    905 ;
    906 ; AVX512VL-LABEL: test_x86_avx_vpermil_ps_256:
    907 ; AVX512VL:       # %bb.0:
    908 ; AVX512VL-NEXT:    vpermilps $7, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xc0,0x07]
    909 ; AVX512VL-NEXT:    # ymm0 = ymm0[3,1,0,0,7,5,4,4]
    910 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    911   %res = call <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float> %a0, i8 7) ; <<8 x float>> [#uses=1]
    912   ret <8 x float> %res
    913 }
    914 declare <8 x float> @llvm.x86.avx.vpermil.ps.256(<8 x float>, i8) nounwind readnone
    915 
    916 
    917 define <4 x double> @test_x86_avx_vperm2f128_pd_256(<4 x double> %a0, <4 x double> %a1) {
    918 ; AVX-LABEL: test_x86_avx_vperm2f128_pd_256:
    919 ; AVX:       # %bb.0:
    920 ; AVX-NEXT:    vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
    921 ; AVX-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
    922 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    923 ;
    924 ; AVX512VL-LABEL: test_x86_avx_vperm2f128_pd_256:
    925 ; AVX512VL:       # %bb.0:
    926 ; AVX512VL-NEXT:    vperm2f128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
    927 ; AVX512VL-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
    928 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    929   %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3) ; <<4 x double>> [#uses=1]
    930   ret <4 x double> %res
    931 }
    932 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
    933 
    934 
    935 define <8 x float> @test_x86_avx_vperm2f128_ps_256(<8 x float> %a0, <8 x float> %a1) {
    936 ; AVX-LABEL: test_x86_avx_vperm2f128_ps_256:
    937 ; AVX:       # %bb.0:
    938 ; AVX-NEXT:    vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
    939 ; AVX-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
    940 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    941 ;
    942 ; AVX512VL-LABEL: test_x86_avx_vperm2f128_ps_256:
    943 ; AVX512VL:       # %bb.0:
    944 ; AVX512VL-NEXT:    vperm2f128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
    945 ; AVX512VL-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
    946 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    947   %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 3) ; <<8 x float>> [#uses=1]
    948   ret <8 x float> %res
    949 }
    950 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
    951 
    952 
    953 define <8 x i32> @test_x86_avx_vperm2f128_si_256(<8 x i32> %a0, <8 x i32> %a1) {
    954 ; AVX-LABEL: test_x86_avx_vperm2f128_si_256:
    955 ; AVX:       # %bb.0:
    956 ; AVX-NEXT:    vperm2f128 $33, %ymm0, %ymm1, %ymm0 # encoding: [0xc4,0xe3,0x75,0x06,0xc0,0x21]
    957 ; AVX-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
    958 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    959 ;
    960 ; AVX512VL-LABEL: test_x86_avx_vperm2f128_si_256:
    961 ; AVX512VL:       # %bb.0:
    962 ; AVX512VL-NEXT:    vperm2i128 $33, %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x46,0xc0,0x21]
    963 ; AVX512VL-NEXT:    # ymm0 = ymm1[2,3],ymm0[0,1]
    964 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    965   %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 3) ; <<8 x i32>> [#uses=1]
    966   ret <8 x i32> %res
    967 }
    968 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
    969 
    970 
    971 define <8 x float> @test_x86_avx_cvtdq2_ps_256(<8 x i32> %a0) {
    972 ; AVX-LABEL: test_x86_avx_cvtdq2_ps_256:
    973 ; AVX:       # %bb.0:
    974 ; AVX-NEXT:    vcvtdq2ps %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x5b,0xc0]
    975 ; AVX-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    976 ;
    977 ; AVX512VL-LABEL: test_x86_avx_cvtdq2_ps_256:
    978 ; AVX512VL:       # %bb.0:
    979 ; AVX512VL-NEXT:    vcvtdq2ps %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xc0]
    980 ; AVX512VL-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    981   %res = call <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32> %a0) ; <<8 x float>> [#uses=1]
    982   ret <8 x float> %res
    983 }
    984 declare <8 x float> @llvm.x86.avx.cvtdq2.ps.256(<8 x i32>) nounwind readnone
    985