Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
      3 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
      4 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
      5 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse2 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
      6 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
      7 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
      8 
      9 
     10 define <2 x double> @test_x86_sse2_sqrt_pd(<2 x double> %a0) {
     11 ; SSE-LABEL: test_x86_sse2_sqrt_pd:
     12 ; SSE:       ## %bb.0:
     13 ; SSE-NEXT:    sqrtpd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x51,0xc0]
     14 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     15 ;
     16 ; AVX1-LABEL: test_x86_sse2_sqrt_pd:
     17 ; AVX1:       ## %bb.0:
     18 ; AVX1-NEXT:    vsqrtpd %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x51,0xc0]
     19 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     20 ;
     21 ; AVX512-LABEL: test_x86_sse2_sqrt_pd:
     22 ; AVX512:       ## %bb.0:
     23 ; AVX512-NEXT:    vsqrtpd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0]
     24 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     25   %res = call <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
     26   ret <2 x double> %res
     27 }
     28 declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
     29 
     30 
     31 define <2 x double> @test_x86_sse2_sqrt_sd(<2 x double> %a0) {
     32 ; SSE-LABEL: test_x86_sse2_sqrt_sd:
     33 ; SSE:       ## %bb.0:
     34 ; SSE-NEXT:    sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
     35 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     36 ;
     37 ; AVX1-LABEL: test_x86_sse2_sqrt_sd:
     38 ; AVX1:       ## %bb.0:
     39 ; AVX1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
     40 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     41 ;
     42 ; AVX512-LABEL: test_x86_sse2_sqrt_sd:
     43 ; AVX512:       ## %bb.0:
     44 ; AVX512-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
     45 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     46   %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a0) ; <<2 x double>> [#uses=1]
     47   ret <2 x double> %res
     48 }
     49 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
     50 
     51 
     52 define <2 x double> @test_x86_sse2_sqrt_sd_vec_load(<2 x double>* %a0) {
     53 ; X86-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load:
     54 ; X86-SSE:       ## %bb.0:
     55 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
     56 ; X86-SSE-NEXT:    movapd (%eax), %xmm0 ## encoding: [0x66,0x0f,0x28,0x00]
     57 ; X86-SSE-NEXT:    sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
     58 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
     59 ;
     60 ; X86-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load:
     61 ; X86-AVX1:       ## %bb.0:
     62 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
     63 ; X86-AVX1-NEXT:    vmovapd (%eax), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x00]
     64 ; X86-AVX1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
     65 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
     66 ;
     67 ; X86-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load:
     68 ; X86-AVX512:       ## %bb.0:
     69 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
     70 ; X86-AVX512-NEXT:    vmovapd (%eax), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00]
     71 ; X86-AVX512-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
     72 ; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
     73 ;
     74 ; X64-SSE-LABEL: test_x86_sse2_sqrt_sd_vec_load:
     75 ; X64-SSE:       ## %bb.0:
     76 ; X64-SSE-NEXT:    movapd (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x28,0x07]
     77 ; X64-SSE-NEXT:    sqrtsd %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x51,0xc0]
     78 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
     79 ;
     80 ; X64-AVX1-LABEL: test_x86_sse2_sqrt_sd_vec_load:
     81 ; X64-AVX1:       ## %bb.0:
     82 ; X64-AVX1-NEXT:    vmovapd (%rdi), %xmm0 ## encoding: [0xc5,0xf9,0x28,0x07]
     83 ; X64-AVX1-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x51,0xc0]
     84 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
     85 ;
     86 ; X64-AVX512-LABEL: test_x86_sse2_sqrt_sd_vec_load:
     87 ; X64-AVX512:       ## %bb.0:
     88 ; X64-AVX512-NEXT:    vmovapd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07]
     89 ; X64-AVX512-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0]
     90 ; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
     91   %a1 = load <2 x double>, <2 x double>* %a0, align 16
     92   %res = call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %a1) ; <<2 x double>> [#uses=1]
     93   ret <2 x double> %res
     94 }
     95 
     96 
     97 define <2 x i64> @test_x86_sse2_psll_dq_bs(<2 x i64> %a0) {
     98 ; SSE-LABEL: test_x86_sse2_psll_dq_bs:
     99 ; SSE:       ## %bb.0:
    100 ; SSE-NEXT:    pslldq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xf8,0x07]
    101 ; SSE-NEXT:    ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
    102 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    103 ;
    104 ; AVX1-LABEL: test_x86_sse2_psll_dq_bs:
    105 ; AVX1:       ## %bb.0:
    106 ; AVX1-NEXT:    vpslldq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xf8,0x07]
    107 ; AVX1-NEXT:    ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
    108 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    109 ;
    110 ; AVX512-LABEL: test_x86_sse2_psll_dq_bs:
    111 ; AVX512:       ## %bb.0:
    112 ; AVX512-NEXT:    vpslldq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x07]
    113 ; AVX512-NEXT:    ## xmm0 = zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8]
    114 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    115   %res = call <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    116   ret <2 x i64> %res
    117 }
    118 declare <2 x i64> @llvm.x86.sse2.psll.dq.bs(<2 x i64>, i32) nounwind readnone
    119 
    120 
    121 define <2 x i64> @test_x86_sse2_psrl_dq_bs(<2 x i64> %a0) {
    122 ; SSE-LABEL: test_x86_sse2_psrl_dq_bs:
    123 ; SSE:       ## %bb.0:
    124 ; SSE-NEXT:    psrldq $7, %xmm0 ## encoding: [0x66,0x0f,0x73,0xd8,0x07]
    125 ; SSE-NEXT:    ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
    126 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    127 ;
    128 ; AVX1-LABEL: test_x86_sse2_psrl_dq_bs:
    129 ; AVX1:       ## %bb.0:
    130 ; AVX1-NEXT:    vpsrldq $7, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xd8,0x07]
    131 ; AVX1-NEXT:    ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
    132 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    133 ;
    134 ; AVX512-LABEL: test_x86_sse2_psrl_dq_bs:
    135 ; AVX512:       ## %bb.0:
    136 ; AVX512-NEXT:    vpsrldq $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x07]
    137 ; AVX512-NEXT:    ## xmm0 = xmm0[7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero
    138 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    139   %res = call <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64> %a0, i32 7) ; <<2 x i64>> [#uses=1]
    140   ret <2 x i64> %res
    141 }
    142 declare <2 x i64> @llvm.x86.sse2.psrl.dq.bs(<2 x i64>, i32) nounwind readnone
    143 
    144 define <2 x i64> @test_x86_sse2_psll_dq(<2 x i64> %a0) {
    145 ; SSE-LABEL: test_x86_sse2_psll_dq:
    146 ; SSE:       ## %bb.0:
    147 ; SSE-NEXT:    pslldq $1, %xmm0 ## encoding: [0x66,0x0f,0x73,0xf8,0x01]
    148 ; SSE-NEXT:    ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    149 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    150 ;
    151 ; AVX1-LABEL: test_x86_sse2_psll_dq:
    152 ; AVX1:       ## %bb.0:
    153 ; AVX1-NEXT:    vpslldq $1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xf8,0x01]
    154 ; AVX1-NEXT:    ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    155 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    156 ;
    157 ; AVX512-LABEL: test_x86_sse2_psll_dq:
    158 ; AVX512:       ## %bb.0:
    159 ; AVX512-NEXT:    vpslldq $1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x01]
    160 ; AVX512-NEXT:    ## xmm0 = zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14]
    161 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    162   %res = call <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
    163   ret <2 x i64> %res
    164 }
    165 declare <2 x i64> @llvm.x86.sse2.psll.dq(<2 x i64>, i32) nounwind readnone
    166 
    167 
    168 define <2 x i64> @test_x86_sse2_psrl_dq(<2 x i64> %a0) {
    169 ; SSE-LABEL: test_x86_sse2_psrl_dq:
    170 ; SSE:       ## %bb.0:
    171 ; SSE-NEXT:    psrldq $1, %xmm0 ## encoding: [0x66,0x0f,0x73,0xd8,0x01]
    172 ; SSE-NEXT:    ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
    173 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    174 ;
    175 ; AVX1-LABEL: test_x86_sse2_psrl_dq:
    176 ; AVX1:       ## %bb.0:
    177 ; AVX1-NEXT:    vpsrldq $1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x73,0xd8,0x01]
    178 ; AVX1-NEXT:    ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
    179 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    180 ;
    181 ; AVX512-LABEL: test_x86_sse2_psrl_dq:
    182 ; AVX512:       ## %bb.0:
    183 ; AVX512-NEXT:    vpsrldq $1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x01]
    184 ; AVX512-NEXT:    ## xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero
    185 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    186   %res = call <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64> %a0, i32 8) ; <<2 x i64>> [#uses=1]
    187   ret <2 x i64> %res
    188 }
    189 declare <2 x i64> @llvm.x86.sse2.psrl.dq(<2 x i64>, i32) nounwind readnone
    190 
    191 
    192 define <2 x double> @test_x86_sse2_cvtdq2pd(<4 x i32> %a0) {
    193 ; SSE-LABEL: test_x86_sse2_cvtdq2pd:
    194 ; SSE:       ## %bb.0:
    195 ; SSE-NEXT:    cvtdq2pd %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0xe6,0xc0]
    196 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    197 ;
    198 ; AVX1-LABEL: test_x86_sse2_cvtdq2pd:
    199 ; AVX1:       ## %bb.0:
    200 ; AVX1-NEXT:    vcvtdq2pd %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0xe6,0xc0]
    201 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    202 ;
    203 ; AVX512-LABEL: test_x86_sse2_cvtdq2pd:
    204 ; AVX512:       ## %bb.0:
    205 ; AVX512-NEXT:    vcvtdq2pd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0]
    206 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    207   %res = call <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32> %a0) ; <<2 x double>> [#uses=1]
    208   ret <2 x double> %res
    209 }
    210 declare <2 x double> @llvm.x86.sse2.cvtdq2pd(<4 x i32>) nounwind readnone
    211 
    212 
    213 define <2 x double> @test_x86_sse2_cvtps2pd(<4 x float> %a0) {
    214 ; SSE-LABEL: test_x86_sse2_cvtps2pd:
    215 ; SSE:       ## %bb.0:
    216 ; SSE-NEXT:    cvtps2pd %xmm0, %xmm0 ## encoding: [0x0f,0x5a,0xc0]
    217 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    218 ;
    219 ; AVX1-LABEL: test_x86_sse2_cvtps2pd:
    220 ; AVX1:       ## %bb.0:
    221 ; AVX1-NEXT:    vcvtps2pd %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5a,0xc0]
    222 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    223 ;
    224 ; AVX512-LABEL: test_x86_sse2_cvtps2pd:
    225 ; AVX512:       ## %bb.0:
    226 ; AVX512-NEXT:    vcvtps2pd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0]
    227 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    228   %res = call <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float> %a0) ; <<2 x double>> [#uses=1]
    229   ret <2 x double> %res
    230 }
    231 declare <2 x double> @llvm.x86.sse2.cvtps2pd(<4 x float>) nounwind readnone
    232 
    233 
    234 define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
    235 ; X86-SSE-LABEL: test_x86_sse2_storel_dq:
    236 ; X86-SSE:       ## %bb.0:
    237 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    238 ; X86-SSE-NEXT:    movlps %xmm0, (%eax) ## encoding: [0x0f,0x13,0x00]
    239 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
    240 ;
    241 ; X86-AVX1-LABEL: test_x86_sse2_storel_dq:
    242 ; X86-AVX1:       ## %bb.0:
    243 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    244 ; X86-AVX1-NEXT:    vmovlps %xmm0, (%eax) ## encoding: [0xc5,0xf8,0x13,0x00]
    245 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
    246 ;
    247 ; X86-AVX512-LABEL: test_x86_sse2_storel_dq:
    248 ; X86-AVX512:       ## %bb.0:
    249 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    250 ; X86-AVX512-NEXT:    vmovlps %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00]
    251 ; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
    252 ;
    253 ; X64-SSE-LABEL: test_x86_sse2_storel_dq:
    254 ; X64-SSE:       ## %bb.0:
    255 ; X64-SSE-NEXT:    movlps %xmm0, (%rdi) ## encoding: [0x0f,0x13,0x07]
    256 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
    257 ;
    258 ; X64-AVX1-LABEL: test_x86_sse2_storel_dq:
    259 ; X64-AVX1:       ## %bb.0:
    260 ; X64-AVX1-NEXT:    vmovlps %xmm0, (%rdi) ## encoding: [0xc5,0xf8,0x13,0x07]
    261 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
    262 ;
    263 ; X64-AVX512-LABEL: test_x86_sse2_storel_dq:
    264 ; X64-AVX512:       ## %bb.0:
    265 ; X64-AVX512-NEXT:    vmovlps %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x07]
    266 ; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
    267   call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
    268   ret void
    269 }
    270 declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
    271 
    272 
    273 define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
    274   ; add operation forces the execution domain.
    275 ; X86-SSE-LABEL: test_x86_sse2_storeu_dq:
    276 ; X86-SSE:       ## %bb.0:
    277 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    278 ; X86-SSE-NEXT:    pcmpeqd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x76,0xc9]
    279 ; X86-SSE-NEXT:    psubb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf8,0xc1]
    280 ; X86-SSE-NEXT:    movdqu %xmm0, (%eax) ## encoding: [0xf3,0x0f,0x7f,0x00]
    281 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
    282 ;
    283 ; X86-AVX1-LABEL: test_x86_sse2_storeu_dq:
    284 ; X86-AVX1:       ## %bb.0:
    285 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    286 ; X86-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
    287 ; X86-AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf8,0xc1]
    288 ; X86-AVX1-NEXT:    vmovdqu %xmm0, (%eax) ## encoding: [0xc5,0xfa,0x7f,0x00]
    289 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
    290 ;
    291 ; X86-AVX512-LABEL: test_x86_sse2_storeu_dq:
    292 ; X86-AVX512:       ## %bb.0:
    293 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    294 ; X86-AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
    295 ; X86-AVX512-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
    296 ; X86-AVX512-NEXT:    vmovdqu %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
    297 ; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
    298 ;
    299 ; X64-SSE-LABEL: test_x86_sse2_storeu_dq:
    300 ; X64-SSE:       ## %bb.0:
    301 ; X64-SSE-NEXT:    pcmpeqd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x76,0xc9]
    302 ; X64-SSE-NEXT:    psubb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf8,0xc1]
    303 ; X64-SSE-NEXT:    movdqu %xmm0, (%rdi) ## encoding: [0xf3,0x0f,0x7f,0x07]
    304 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
    305 ;
    306 ; X64-AVX1-LABEL: test_x86_sse2_storeu_dq:
    307 ; X64-AVX1:       ## %bb.0:
    308 ; X64-AVX1-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
    309 ; X64-AVX1-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf8,0xc1]
    310 ; X64-AVX1-NEXT:    vmovdqu %xmm0, (%rdi) ## encoding: [0xc5,0xfa,0x7f,0x07]
    311 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
    312 ;
    313 ; X64-AVX512-LABEL: test_x86_sse2_storeu_dq:
    314 ; X64-AVX512:       ## %bb.0:
    315 ; X64-AVX512-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
    316 ; X64-AVX512-NEXT:    vpsubb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1]
    317 ; X64-AVX512-NEXT:    vmovdqu %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x07]
    318 ; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
    319   %a2 = add <16 x i8> %a1, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
    320   call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a2)
    321   ret void
    322 }
    323 declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
    324 
    325 
    326 define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
    327   ; fadd operation forces the execution domain.
    328 ; X86-SSE-LABEL: test_x86_sse2_storeu_pd:
    329 ; X86-SSE:       ## %bb.0:
    330 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    331 ; X86-SSE-NEXT:    xorpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x57,0xc9]
    332 ; X86-SSE-NEXT:    movhpd LCPI11_0, %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A]
    333 ; X86-SSE-NEXT:    ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4
    334 ; X86-SSE-NEXT:    ## xmm1 = xmm1[0],mem[0]
    335 ; X86-SSE-NEXT:    addpd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x58,0xc8]
    336 ; X86-SSE-NEXT:    movupd %xmm1, (%eax) ## encoding: [0x66,0x0f,0x11,0x08]
    337 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
    338 ;
    339 ; X86-AVX1-LABEL: test_x86_sse2_storeu_pd:
    340 ; X86-AVX1:       ## %bb.0:
    341 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    342 ; X86-AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
    343 ; X86-AVX1-NEXT:    vmovhpd LCPI11_0, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
    344 ; X86-AVX1-NEXT:    ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4
    345 ; X86-AVX1-NEXT:    ## xmm1 = xmm1[0],mem[0]
    346 ; X86-AVX1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc1]
    347 ; X86-AVX1-NEXT:    vmovupd %xmm0, (%eax) ## encoding: [0xc5,0xf9,0x11,0x00]
    348 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
    349 ;
    350 ; X86-AVX512-LABEL: test_x86_sse2_storeu_pd:
    351 ; X86-AVX512:       ## %bb.0:
    352 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    353 ; X86-AVX512-NEXT:    vmovsd LCPI11_0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A]
    354 ; X86-AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI11_0, kind: FK_Data_4
    355 ; X86-AVX512-NEXT:    ## xmm1 = mem[0],zero
    356 ; X86-AVX512-NEXT:    vpslldq $8, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08]
    357 ; X86-AVX512-NEXT:    ## xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
    358 ; X86-AVX512-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
    359 ; X86-AVX512-NEXT:    vmovupd %xmm0, (%eax) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00]
    360 ; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
    361 ;
    362 ; X64-SSE-LABEL: test_x86_sse2_storeu_pd:
    363 ; X64-SSE:       ## %bb.0:
    364 ; X64-SSE-NEXT:    xorpd %xmm1, %xmm1 ## encoding: [0x66,0x0f,0x57,0xc9]
    365 ; X64-SSE-NEXT:    movhpd {{.*}}(%rip), %xmm1 ## encoding: [0x66,0x0f,0x16,0x0d,A,A,A,A]
    366 ; X64-SSE-NEXT:    ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte
    367 ; X64-SSE-NEXT:    ## xmm1 = xmm1[0],mem[0]
    368 ; X64-SSE-NEXT:    addpd %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x58,0xc8]
    369 ; X64-SSE-NEXT:    movupd %xmm1, (%rdi) ## encoding: [0x66,0x0f,0x11,0x0f]
    370 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
    371 ;
    372 ; X64-AVX1-LABEL: test_x86_sse2_storeu_pd:
    373 ; X64-AVX1:       ## %bb.0:
    374 ; X64-AVX1-NEXT:    vxorpd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x57,0xc9]
    375 ; X64-AVX1-NEXT:    vmovhpd {{.*}}(%rip), %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x16,0x0d,A,A,A,A]
    376 ; X64-AVX1-NEXT:    ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte
    377 ; X64-AVX1-NEXT:    ## xmm1 = xmm1[0],mem[0]
    378 ; X64-AVX1-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc1]
    379 ; X64-AVX1-NEXT:    vmovupd %xmm0, (%rdi) ## encoding: [0xc5,0xf9,0x11,0x07]
    380 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
    381 ;
    382 ; X64-AVX512-LABEL: test_x86_sse2_storeu_pd:
    383 ; X64-AVX512:       ## %bb.0:
    384 ; X64-AVX512-NEXT:    vmovsd {{.*}}(%rip), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0d,A,A,A,A]
    385 ; X64-AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI11_0-4, kind: reloc_riprel_4byte
    386 ; X64-AVX512-NEXT:    ## xmm1 = mem[0],zero
    387 ; X64-AVX512-NEXT:    vpslldq $8, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x73,0xf9,0x08]
    388 ; X64-AVX512-NEXT:    ## xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4,5,6,7]
    389 ; X64-AVX512-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
    390 ; X64-AVX512-NEXT:    vmovupd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x07]
    391 ; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
    392   %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
    393   call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
    394   ret void
    395 }
    396 declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
    397 
    398 define <4 x i32> @test_x86_sse2_pshuf_d(<4 x i32> %a) {
    399 ; SSE-LABEL: test_x86_sse2_pshuf_d:
    400 ; SSE:       ## %bb.0: ## %entry
    401 ; SSE-NEXT:    pshufd $27, %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x70,0xc0,0x1b]
    402 ; SSE-NEXT:    ## xmm0 = xmm0[3,2,1,0]
    403 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    404 ;
    405 ; AVX1-LABEL: test_x86_sse2_pshuf_d:
    406 ; AVX1:       ## %bb.0: ## %entry
    407 ; AVX1-NEXT:    vpermilps $27, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
    408 ; AVX1-NEXT:    ## xmm0 = xmm0[3,2,1,0]
    409 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    410 ;
    411 ; AVX512-LABEL: test_x86_sse2_pshuf_d:
    412 ; AVX512:       ## %bb.0: ## %entry
    413 ; AVX512-NEXT:    vpermilps $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x1b]
    414 ; AVX512-NEXT:    ## xmm0 = xmm0[3,2,1,0]
    415 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    416 entry:
    417   %res = call <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32> %a, i8 27) nounwind readnone
    418   ret <4 x i32> %res
    419 }
    420 declare <4 x i32> @llvm.x86.sse2.pshuf.d(<4 x i32>, i8) nounwind readnone
    421 
    422 define <8 x i16> @test_x86_sse2_pshufl_w(<8 x i16> %a) {
    423 ; SSE-LABEL: test_x86_sse2_pshufl_w:
    424 ; SSE:       ## %bb.0: ## %entry
    425 ; SSE-NEXT:    pshuflw $27, %xmm0, %xmm0 ## encoding: [0xf2,0x0f,0x70,0xc0,0x1b]
    426 ; SSE-NEXT:    ## xmm0 = xmm0[3,2,1,0,4,5,6,7]
    427 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    428 ;
    429 ; AVX1-LABEL: test_x86_sse2_pshufl_w:
    430 ; AVX1:       ## %bb.0: ## %entry
    431 ; AVX1-NEXT:    vpshuflw $27, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x70,0xc0,0x1b]
    432 ; AVX1-NEXT:    ## xmm0 = xmm0[3,2,1,0,4,5,6,7]
    433 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    434 ;
    435 ; AVX512-LABEL: test_x86_sse2_pshufl_w:
    436 ; AVX512:       ## %bb.0: ## %entry
    437 ; AVX512-NEXT:    vpshuflw $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x1b]
    438 ; AVX512-NEXT:    ## xmm0 = xmm0[3,2,1,0,4,5,6,7]
    439 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    440 entry:
    441   %res = call <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16> %a, i8 27) nounwind readnone
    442   ret <8 x i16> %res
    443 }
    444 declare <8 x i16> @llvm.x86.sse2.pshufl.w(<8 x i16>, i8) nounwind readnone
    445 
    446 define <8 x i16> @test_x86_sse2_pshufh_w(<8 x i16> %a) {
    447 ; SSE-LABEL: test_x86_sse2_pshufh_w:
    448 ; SSE:       ## %bb.0: ## %entry
    449 ; SSE-NEXT:    pshufhw $27, %xmm0, %xmm0 ## encoding: [0xf3,0x0f,0x70,0xc0,0x1b]
    450 ; SSE-NEXT:    ## xmm0 = xmm0[0,1,2,3,7,6,5,4]
    451 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    452 ;
    453 ; AVX1-LABEL: test_x86_sse2_pshufh_w:
    454 ; AVX1:       ## %bb.0: ## %entry
    455 ; AVX1-NEXT:    vpshufhw $27, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x70,0xc0,0x1b]
    456 ; AVX1-NEXT:    ## xmm0 = xmm0[0,1,2,3,7,6,5,4]
    457 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    458 ;
    459 ; AVX512-LABEL: test_x86_sse2_pshufh_w:
    460 ; AVX512:       ## %bb.0: ## %entry
    461 ; AVX512-NEXT:    vpshufhw $27, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x1b]
    462 ; AVX512-NEXT:    ## xmm0 = xmm0[0,1,2,3,7,6,5,4]
    463 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    464 entry:
    465   %res = call <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16> %a, i8 27) nounwind readnone
    466   ret <8 x i16> %res
    467 }
    468 declare <8 x i16> @llvm.x86.sse2.pshufh.w(<8 x i16>, i8) nounwind readnone
    469 
    470 define <16 x i8> @max_epu8(<16 x i8> %a0, <16 x i8> %a1) {
    471 ; SSE-LABEL: max_epu8:
    472 ; SSE:       ## %bb.0:
    473 ; SSE-NEXT:    pmaxub %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xde,0xc1]
    474 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    475 ;
    476 ; AVX1-LABEL: max_epu8:
    477 ; AVX1:       ## %bb.0:
    478 ; AVX1-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xde,0xc1]
    479 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    480 ;
    481 ; AVX512-LABEL: max_epu8:
    482 ; AVX512:       ## %bb.0:
    483 ; AVX512-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1]
    484 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    485   %res = call <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8> %a0, <16 x i8> %a1)
    486   ret <16 x i8> %res
    487 }
    488 declare <16 x i8> @llvm.x86.sse2.pmaxu.b(<16 x i8>, <16 x i8>) nounwind readnone
    489 
    490 define <16 x i8> @min_epu8(<16 x i8> %a0, <16 x i8> %a1) {
    491 ; SSE-LABEL: min_epu8:
    492 ; SSE:       ## %bb.0:
    493 ; SSE-NEXT:    pminub %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xda,0xc1]
    494 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    495 ;
    496 ; AVX1-LABEL: min_epu8:
    497 ; AVX1:       ## %bb.0:
    498 ; AVX1-NEXT:    vpminub %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xda,0xc1]
    499 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    500 ;
    501 ; AVX512-LABEL: min_epu8:
    502 ; AVX512:       ## %bb.0:
    503 ; AVX512-NEXT:    vpminub %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1]
    504 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    505   %res = call <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8> %a0, <16 x i8> %a1)
    506   ret <16 x i8> %res
    507 }
    508 declare <16 x i8> @llvm.x86.sse2.pminu.b(<16 x i8>, <16 x i8>) nounwind readnone
    509 
    510 define <8 x i16> @max_epi16(<8 x i16> %a0, <8 x i16> %a1) {
    511 ; SSE-LABEL: max_epi16:
    512 ; SSE:       ## %bb.0:
    513 ; SSE-NEXT:    pmaxsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xee,0xc1]
    514 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    515 ;
    516 ; AVX1-LABEL: max_epi16:
    517 ; AVX1:       ## %bb.0:
    518 ; AVX1-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xee,0xc1]
    519 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    520 ;
    521 ; AVX512-LABEL: max_epi16:
    522 ; AVX512:       ## %bb.0:
    523 ; AVX512-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1]
    524 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    525   %res = call <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16> %a0, <8 x i16> %a1)
    526   ret <8 x i16> %res
    527 }
    528 declare <8 x i16> @llvm.x86.sse2.pmaxs.w(<8 x i16>, <8 x i16>) nounwind readnone
    529 
    530 define <8 x i16> @min_epi16(<8 x i16> %a0, <8 x i16> %a1) {
    531 ; SSE-LABEL: min_epi16:
    532 ; SSE:       ## %bb.0:
    533 ; SSE-NEXT:    pminsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xea,0xc1]
    534 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    535 ;
    536 ; AVX1-LABEL: min_epi16:
    537 ; AVX1:       ## %bb.0:
    538 ; AVX1-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xea,0xc1]
    539 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    540 ;
    541 ; AVX512-LABEL: min_epi16:
    542 ; AVX512:       ## %bb.0:
    543 ; AVX512-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1]
    544 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    545   %res = call <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16> %a0, <8 x i16> %a1)
    546   ret <8 x i16> %res
    547 }
    548 declare <8 x i16> @llvm.x86.sse2.pmins.w(<8 x i16>, <8 x i16>) nounwind readnone
    549 
    550 define <2 x double> @test_x86_sse2_add_sd(<2 x double> %a0, <2 x double> %a1) {
    551 ; SSE-LABEL: test_x86_sse2_add_sd:
    552 ; SSE:       ## %bb.0:
    553 ; SSE-NEXT:    addsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x58,0xc1]
    554 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    555 ;
    556 ; AVX1-LABEL: test_x86_sse2_add_sd:
    557 ; AVX1:       ## %bb.0:
    558 ; AVX1-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x58,0xc1]
    559 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    560 ;
    561 ; AVX512-LABEL: test_x86_sse2_add_sd:
    562 ; AVX512:       ## %bb.0:
    563 ; AVX512-NEXT:    vaddsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x58,0xc1]
    564 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    565   %res = call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    566   ret <2 x double> %res
    567 }
    568 declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>) nounwind readnone
    569 
    570 
    571 define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
    572 ; SSE-LABEL: test_x86_sse2_sub_sd:
    573 ; SSE:       ## %bb.0:
    574 ; SSE-NEXT:    subsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5c,0xc1]
    575 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    576 ;
    577 ; AVX1-LABEL: test_x86_sse2_sub_sd:
    578 ; AVX1:       ## %bb.0:
    579 ; AVX1-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5c,0xc1]
    580 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    581 ;
    582 ; AVX512-LABEL: test_x86_sse2_sub_sd:
    583 ; AVX512:       ## %bb.0:
    584 ; AVX512-NEXT:    vsubsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5c,0xc1]
    585 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    586   %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    587   ret <2 x double> %res
    588 }
    589 declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>) nounwind readnone
    590 
    591 
    592 define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
    593 ; SSE-LABEL: test_x86_sse2_mul_sd:
    594 ; SSE:       ## %bb.0:
    595 ; SSE-NEXT:    mulsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x59,0xc1]
    596 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    597 ;
    598 ; AVX1-LABEL: test_x86_sse2_mul_sd:
    599 ; AVX1:       ## %bb.0:
    600 ; AVX1-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x59,0xc1]
    601 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    602 ;
    603 ; AVX512-LABEL: test_x86_sse2_mul_sd:
    604 ; AVX512:       ## %bb.0:
    605 ; AVX512-NEXT:    vmulsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x59,0xc1]
    606 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    607   %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    608   ret <2 x double> %res
    609 }
    610 declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>) nounwind readnone
    611 
    612 
    613 define <2 x double> @test_x86_sse2_div_sd(<2 x double> %a0, <2 x double> %a1) {
    614 ; SSE-LABEL: test_x86_sse2_div_sd:
    615 ; SSE:       ## %bb.0:
    616 ; SSE-NEXT:    divsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x5e,0xc1]
    617 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    618 ;
    619 ; AVX1-LABEL: test_x86_sse2_div_sd:
    620 ; AVX1:       ## %bb.0:
    621 ; AVX1-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x5e,0xc1]
    622 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    623 ;
    624 ; AVX512-LABEL: test_x86_sse2_div_sd:
    625 ; AVX512:       ## %bb.0:
    626 ; AVX512-NEXT:    vdivsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5e,0xc1]
    627 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    628   %res = call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
    629   ret <2 x double> %res
    630 }
    631 declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>) nounwind readnone
    632 
    633 define <16 x i8> @mm_avg_epu8(<16 x i8> %a0, <16 x i8> %a1) {
    634 ; SSE-LABEL: mm_avg_epu8:
    635 ; SSE:       ## %bb.0:
    636 ; SSE-NEXT:    pavgb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe0,0xc1]
    637 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    638 ;
    639 ; AVX1-LABEL: mm_avg_epu8:
    640 ; AVX1:       ## %bb.0:
    641 ; AVX1-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe0,0xc1]
    642 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    643 ;
    644 ; AVX512-LABEL: mm_avg_epu8:
    645 ; AVX512:       ## %bb.0:
    646 ; AVX512-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xc1]
    647 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    648   %res = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    649   ret <16 x i8> %res
    650 }
    651 declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
    652 
    653 define <8 x i16> @mm_avg_epu16(<8 x i16> %a0, <8 x i16> %a1) {
    654 ; SSE-LABEL: mm_avg_epu16:
    655 ; SSE:       ## %bb.0:
    656 ; SSE-NEXT:    pavgw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe3,0xc1]
    657 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    658 ;
    659 ; AVX1-LABEL: mm_avg_epu16:
    660 ; AVX1:       ## %bb.0:
    661 ; AVX1-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe3,0xc1]
    662 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    663 ;
    664 ; AVX512-LABEL: mm_avg_epu16:
    665 ; AVX512:       ## %bb.0:
    666 ; AVX512-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xc1]
    667 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    668   %res = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    669   ret <8 x i16> %res
    670 }
    671 declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
    672 
    673 
    674 define <2 x i64> @test_x86_sse2_pmulu_dq(<4 x i32> %a0, <4 x i32> %a1) {
    675 ; SSE-LABEL: test_x86_sse2_pmulu_dq:
    676 ; SSE:       ## %bb.0:
    677 ; SSE-NEXT:    pmuludq %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xf4,0xc1]
    678 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    679 ;
    680 ; AVX1-LABEL: test_x86_sse2_pmulu_dq:
    681 ; AVX1:       ## %bb.0:
    682 ; AVX1-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xf4,0xc1]
    683 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    684 ;
    685 ; AVX512-LABEL: test_x86_sse2_pmulu_dq:
    686 ; AVX512:       ## %bb.0:
    687 ; AVX512-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1]
    688 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    689   %res = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> %a1) ; <<2 x i64>> [#uses=1]
    690   ret <2 x i64> %res
    691 }
    692 declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
    693 
    694 
    695 define <2 x double> @test_x86_sse2_cvtsi2sd(<2 x double> %a0, i32 %a1) {
    696 ; X86-SSE-LABEL: test_x86_sse2_cvtsi2sd:
    697 ; X86-SSE:       ## %bb.0:
    698 ; X86-SSE-NEXT:    cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xf2,0x0f,0x2a,0x44,0x24,0x04]
    699 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
    700 ;
    701 ; X86-AVX1-LABEL: test_x86_sse2_cvtsi2sd:
    702 ; X86-AVX1:       ## %bb.0:
    703 ; X86-AVX1-NEXT:    vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
    704 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
    705 ;
    706 ; X86-AVX512-LABEL: test_x86_sse2_cvtsi2sd:
    707 ; X86-AVX512:       ## %bb.0:
    708 ; X86-AVX512-NEXT:    vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04]
    709 ; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
    710 ;
    711 ; X64-SSE-LABEL: test_x86_sse2_cvtsi2sd:
    712 ; X64-SSE:       ## %bb.0:
    713 ; X64-SSE-NEXT:    cvtsi2sdl %edi, %xmm0 ## encoding: [0xf2,0x0f,0x2a,0xc7]
    714 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
    715 ;
    716 ; X64-AVX1-LABEL: test_x86_sse2_cvtsi2sd:
    717 ; X64-AVX1:       ## %bb.0:
    718 ; X64-AVX1-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x2a,0xc7]
    719 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
    720 ;
    721 ; X64-AVX512-LABEL: test_x86_sse2_cvtsi2sd:
    722 ; X64-AVX512:       ## %bb.0:
    723 ; X64-AVX512-NEXT:    vcvtsi2sdl %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7]
    724 ; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
    725   %res = call <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double> %a0, i32 %a1) ; <<2 x double>> [#uses=1]
    726   ret <2 x double> %res
    727 }
    728 declare <2 x double> @llvm.x86.sse2.cvtsi2sd(<2 x double>, i32) nounwind readnone
    729 
    730 
    731 define <2 x double> @test_x86_sse2_cvtss2sd(<2 x double> %a0, <4 x float> %a1) {
    732 ; SSE-LABEL: test_x86_sse2_cvtss2sd:
    733 ; SSE:       ## %bb.0:
    734 ; SSE-NEXT:    cvtss2sd %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x5a,0xc1]
    735 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    736 ;
    737 ; AVX1-LABEL: test_x86_sse2_cvtss2sd:
    738 ; AVX1:       ## %bb.0:
    739 ; AVX1-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x5a,0xc1]
    740 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    741 ;
    742 ; AVX512-LABEL: test_x86_sse2_cvtss2sd:
    743 ; AVX512:       ## %bb.0:
    744 ; AVX512-NEXT:    vcvtss2sd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0xc1]
    745 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    746   %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
    747   ret <2 x double> %res
    748 }
    749 declare <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double>, <4 x float>) nounwind readnone
    750 
    751 
    752 define <2 x double> @test_x86_sse2_cvtss2sd_load(<2 x double> %a0, <4 x float>* %p1) {
    753 ; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load:
    754 ; X86-SSE:       ## %bb.0:
    755 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    756 ; X86-SSE-NEXT:    movss (%eax), %xmm1 ## encoding: [0xf3,0x0f,0x10,0x08]
    757 ; X86-SSE-NEXT:    ## xmm1 = mem[0],zero,zero,zero
    758 ; X86-SSE-NEXT:    cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9]
    759 ; X86-SSE-NEXT:    movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
    760 ; X86-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
    761 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
    762 ;
    763 ; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load:
    764 ; X86-AVX1:       ## %bb.0:
    765 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    766 ; X86-AVX1-NEXT:    vmovss (%eax), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x08]
    767 ; X86-AVX1-NEXT:    ## xmm1 = mem[0],zero,zero,zero
    768 ; X86-AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9]
    769 ; X86-AVX1-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
    770 ; X86-AVX1-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
    771 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
    772 ;
    773 ; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load:
    774 ; X86-AVX512:       ## %bb.0:
    775 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    776 ; X86-AVX512-NEXT:    vmovss (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x08]
    777 ; X86-AVX512-NEXT:    ## xmm1 = mem[0],zero,zero,zero
    778 ; X86-AVX512-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9]
    779 ; X86-AVX512-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
    780 ; X86-AVX512-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
    781 ; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
    782 ;
    783 ; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load:
    784 ; X64-SSE:       ## %bb.0:
    785 ; X64-SSE-NEXT:    movss (%rdi), %xmm1 ## encoding: [0xf3,0x0f,0x10,0x0f]
    786 ; X64-SSE-NEXT:    ## xmm1 = mem[0],zero,zero,zero
    787 ; X64-SSE-NEXT:    cvtss2sd %xmm1, %xmm1 ## encoding: [0xf3,0x0f,0x5a,0xc9]
    788 ; X64-SSE-NEXT:    movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
    789 ; X64-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
    790 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
    791 ;
    792 ; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load:
    793 ; X64-AVX1:       ## %bb.0:
    794 ; X64-AVX1-NEXT:    vmovss (%rdi), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x0f]
    795 ; X64-AVX1-NEXT:    ## xmm1 = mem[0],zero,zero,zero
    796 ; X64-AVX1-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0xc9]
    797 ; X64-AVX1-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
    798 ; X64-AVX1-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
    799 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
    800 ;
    801 ; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load:
    802 ; X64-AVX512:       ## %bb.0:
    803 ; X64-AVX512-NEXT:    vmovss (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0f]
    804 ; X64-AVX512-NEXT:    ## xmm1 = mem[0],zero,zero,zero
    805 ; X64-AVX512-NEXT:    vcvtss2sd %xmm1, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0xc9]
    806 ; X64-AVX512-NEXT:    vblendps $3, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03]
    807 ; X64-AVX512-NEXT:    ## xmm0 = xmm1[0,1],xmm0[2,3]
    808 ; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
    809   %a1 = load <4 x float>, <4 x float>* %p1
    810   %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
    811   ret <2 x double> %res
    812 }
    813 
    814 
    815 define <2 x double> @test_x86_sse2_cvtss2sd_load_optsize(<2 x double> %a0, <4 x float>* %p1) optsize {
    816 ; X86-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
    817 ; X86-SSE:       ## %bb.0:
    818 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    819 ; X86-SSE-NEXT:    cvtss2sd (%eax), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x08]
    820 ; X86-SSE-NEXT:    movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
    821 ; X86-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
    822 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
    823 ;
    824 ; X86-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
    825 ; X86-AVX1:       ## %bb.0:
    826 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    827 ; X86-AVX1-NEXT:    vcvtss2sd (%eax), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x08]
    828 ; X86-AVX1-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
    829 ; X86-AVX1-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
    830 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
    831 ;
    832 ; X86-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
    833 ; X86-AVX512:       ## %bb.0:
    834 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    835 ; X86-AVX512-NEXT:    vcvtss2sd (%eax), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x08]
    836 ; X86-AVX512-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1]
    837 ; X86-AVX512-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
    838 ; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
    839 ;
    840 ; X64-SSE-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
    841 ; X64-SSE:       ## %bb.0:
    842 ; X64-SSE-NEXT:    cvtss2sd (%rdi), %xmm1 ## encoding: [0xf3,0x0f,0x5a,0x0f]
    843 ; X64-SSE-NEXT:    movsd %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x10,0xc1]
    844 ; X64-SSE-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
    845 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
    846 ;
    847 ; X64-AVX1-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
    848 ; X64-AVX1:       ## %bb.0:
    849 ; X64-AVX1-NEXT:    vcvtss2sd (%rdi), %xmm1, %xmm1 ## encoding: [0xc5,0xf2,0x5a,0x0f]
    850 ; X64-AVX1-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x10,0xc1]
    851 ; X64-AVX1-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
    852 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
    853 ;
    854 ; X64-AVX512-LABEL: test_x86_sse2_cvtss2sd_load_optsize:
    855 ; X64-AVX512:       ## %bb.0:
    856 ; X64-AVX512-NEXT:    vcvtss2sd (%rdi), %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf2,0x5a,0x0f]
    857 ; X64-AVX512-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1]
    858 ; X64-AVX512-NEXT:    ## xmm0 = xmm1[0],xmm0[1]
    859 ; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
    860   %a1 = load <4 x float>, <4 x float>* %p1
    861   %res = call <2 x double> @llvm.x86.sse2.cvtss2sd(<2 x double> %a0, <4 x float> %a1) ; <<2 x double>> [#uses=1]
    862   ret <2 x double> %res
    863 }
    864 
    865 
    866 define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) {
    867 ; SSE-LABEL: test_x86_sse2_cvtdq2ps:
    868 ; SSE:       ## %bb.0:
    869 ; SSE-NEXT:    cvtdq2ps %xmm0, %xmm0 ## encoding: [0x0f,0x5b,0xc0]
    870 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    871 ;
    872 ; AVX1-LABEL: test_x86_sse2_cvtdq2ps:
    873 ; AVX1:       ## %bb.0:
    874 ; AVX1-NEXT:    vcvtdq2ps %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x5b,0xc0]
    875 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    876 ;
    877 ; AVX512-LABEL: test_x86_sse2_cvtdq2ps:
    878 ; AVX512:       ## %bb.0:
    879 ; AVX512-NEXT:    vcvtdq2ps %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0]
    880 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    881   %res = call <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32> %a0) ; <<4 x float>> [#uses=1]
    882   ret <4 x float> %res
    883 }
    884 declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone
    885