Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+sse4.1 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE
      3 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1
      4 ; RUN: llc < %s -disable-peephole -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512
      5 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+sse4.1 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE
      6 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1
      7 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512
      8 
      9 define <2 x double> @test_x86_sse41_blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
     10 ; SSE-LABEL: test_x86_sse41_blendvpd:
     11 ; SSE:       ## %bb.0:
     12 ; SSE-NEXT:    movapd %xmm0, %xmm3 ## encoding: [0x66,0x0f,0x28,0xd8]
     13 ; SSE-NEXT:    movaps %xmm2, %xmm0 ## encoding: [0x0f,0x28,0xc2]
     14 ; SSE-NEXT:    blendvpd %xmm0, %xmm1, %xmm3 ## encoding: [0x66,0x0f,0x38,0x15,0xd9]
     15 ; SSE-NEXT:    movapd %xmm3, %xmm0 ## encoding: [0x66,0x0f,0x28,0xc3]
     16 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     17 ;
     18 ; AVX-LABEL: test_x86_sse41_blendvpd:
     19 ; AVX:       ## %bb.0:
     20 ; AVX-NEXT:    vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x4b,0xc1,0x20]
     21 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     22   %res = call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) ; <<2 x double>> [#uses=1]
     23   ret <2 x double> %res
     24 }
     25 declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>) nounwind readnone
     26 
     27 
     28 define <4 x float> @test_x86_sse41_blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
     29 ; SSE-LABEL: test_x86_sse41_blendvps:
     30 ; SSE:       ## %bb.0:
     31 ; SSE-NEXT:    movaps %xmm0, %xmm3 ## encoding: [0x0f,0x28,0xd8]
     32 ; SSE-NEXT:    movaps %xmm2, %xmm0 ## encoding: [0x0f,0x28,0xc2]
     33 ; SSE-NEXT:    blendvps %xmm0, %xmm1, %xmm3 ## encoding: [0x66,0x0f,0x38,0x14,0xd9]
     34 ; SSE-NEXT:    movaps %xmm3, %xmm0 ## encoding: [0x0f,0x28,0xc3]
     35 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     36 ;
     37 ; AVX-LABEL: test_x86_sse41_blendvps:
     38 ; AVX:       ## %bb.0:
     39 ; AVX-NEXT:    vblendvps %xmm2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x4a,0xc1,0x20]
     40 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     41   %res = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) ; <<4 x float>> [#uses=1]
     42   ret <4 x float> %res
     43 }
     44 declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>) nounwind readnone
     45 
     46 
     47 define <2 x double> @test_x86_sse41_dppd(<2 x double> %a0, <2 x double> %a1) {
     48 ; SSE-LABEL: test_x86_sse41_dppd:
     49 ; SSE:       ## %bb.0:
     50 ; SSE-NEXT:    dppd $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x41,0xc1,0x07]
     51 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     52 ;
     53 ; AVX-LABEL: test_x86_sse41_dppd:
     54 ; AVX:       ## %bb.0:
     55 ; AVX-NEXT:    vdppd $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x41,0xc1,0x07]
     56 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     57   %res = call <2 x double> @llvm.x86.sse41.dppd(<2 x double> %a0, <2 x double> %a1, i8 7) ; <<2 x double>> [#uses=1]
     58   ret <2 x double> %res
     59 }
     60 declare <2 x double> @llvm.x86.sse41.dppd(<2 x double>, <2 x double>, i8) nounwind readnone
     61 
     62 
     63 define <4 x float> @test_x86_sse41_dpps(<4 x float> %a0, <4 x float> %a1) {
     64 ; SSE-LABEL: test_x86_sse41_dpps:
     65 ; SSE:       ## %bb.0:
     66 ; SSE-NEXT:    dpps $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x40,0xc1,0x07]
     67 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     68 ;
     69 ; AVX-LABEL: test_x86_sse41_dpps:
     70 ; AVX:       ## %bb.0:
     71 ; AVX-NEXT:    vdpps $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x40,0xc1,0x07]
     72 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     73   %res = call <4 x float> @llvm.x86.sse41.dpps(<4 x float> %a0, <4 x float> %a1, i8 7) ; <<4 x float>> [#uses=1]
     74   ret <4 x float> %res
     75 }
     76 declare <4 x float> @llvm.x86.sse41.dpps(<4 x float>, <4 x float>, i8) nounwind readnone
     77 
     78 
     79 define <4 x float> @test_x86_sse41_insertps(<4 x float> %a0, <4 x float> %a1) {
     80 ; SSE-LABEL: test_x86_sse41_insertps:
     81 ; SSE:       ## %bb.0:
     82 ; SSE-NEXT:    insertps $17, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x21,0xc1,0x11]
     83 ; SSE-NEXT:    ## xmm0 = zero,xmm1[0],xmm0[2,3]
     84 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     85 ;
     86 ; AVX1-LABEL: test_x86_sse41_insertps:
     87 ; AVX1:       ## %bb.0:
     88 ; AVX1-NEXT:    vinsertps $17, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x11]
     89 ; AVX1-NEXT:    ## xmm0 = zero,xmm1[0],xmm0[2,3]
     90 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     91 ;
     92 ; AVX512-LABEL: test_x86_sse41_insertps:
     93 ; AVX512:       ## %bb.0:
     94 ; AVX512-NEXT:    vinsertps $17, %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x21,0xc1,0x11]
     95 ; AVX512-NEXT:    ## xmm0 = zero,xmm1[0],xmm0[2,3]
     96 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
     97   %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 17) ; <<4 x float>> [#uses=1]
     98   ret <4 x float> %res
     99 }
    100 declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
    101 
    102 
    103 
    104 define <8 x i16> @test_x86_sse41_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
    105 ; SSE-LABEL: test_x86_sse41_mpsadbw:
    106 ; SSE:       ## %bb.0:
    107 ; SSE-NEXT:    mpsadbw $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x42,0xc1,0x07]
    108 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    109 ;
    110 ; AVX-LABEL: test_x86_sse41_mpsadbw:
    111 ; AVX:       ## %bb.0:
    112 ; AVX-NEXT:    vmpsadbw $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x42,0xc1,0x07]
    113 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    114   %res = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) ; <<8 x i16>> [#uses=1]
    115   ret <8 x i16> %res
    116 }
    117 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
    118 
    119 
    120 define <8 x i16> @test_x86_sse41_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
    121 ; SSE-LABEL: test_x86_sse41_packusdw:
    122 ; SSE:       ## %bb.0:
    123 ; SSE-NEXT:    packusdw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x2b,0xc1]
    124 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    125 ;
    126 ; AVX1-LABEL: test_x86_sse41_packusdw:
    127 ; AVX1:       ## %bb.0:
    128 ; AVX1-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x2b,0xc1]
    129 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    130 ;
    131 ; AVX512-LABEL: test_x86_sse41_packusdw:
    132 ; AVX512:       ## %bb.0:
    133 ; AVX512-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1]
    134 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    135   %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) ; <<8 x i16>> [#uses=1]
    136   ret <8 x i16> %res
    137 }
    138 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
    139 
    140 
    141 define <8 x i16> @test_x86_sse41_packusdw_fold() {
    142 ; X86-SSE-LABEL: test_x86_sse41_packusdw_fold:
    143 ; X86-SSE:       ## %bb.0:
    144 ; X86-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,0,0,0,65535,65535,0,0]
    145 ; X86-SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
    146 ; X86-SSE-NEXT:    ## fixup A - offset: 3, value: LCPI7_0, kind: FK_Data_4
    147 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
    148 ;
    149 ; X86-AVX1-LABEL: test_x86_sse41_packusdw_fold:
    150 ; X86-AVX1:       ## %bb.0:
    151 ; X86-AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,65535,65535,0,0]
    152 ; X86-AVX1-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
    153 ; X86-AVX1-NEXT:    ## fixup A - offset: 4, value: LCPI7_0, kind: FK_Data_4
    154 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
    155 ;
    156 ; X86-AVX512-LABEL: test_x86_sse41_packusdw_fold:
    157 ; X86-AVX512:       ## %bb.0:
    158 ; X86-AVX512-NEXT:    vmovaps LCPI7_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,65535,65535,0,0]
    159 ; X86-AVX512-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
    160 ; X86-AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI7_0, kind: FK_Data_4
    161 ; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
    162 ;
    163 ; X64-SSE-LABEL: test_x86_sse41_packusdw_fold:
    164 ; X64-SSE:       ## %bb.0:
    165 ; X64-SSE-NEXT:    movaps {{.*#+}} xmm0 = [0,0,0,0,65535,65535,0,0]
    166 ; X64-SSE-NEXT:    ## encoding: [0x0f,0x28,0x05,A,A,A,A]
    167 ; X64-SSE-NEXT:    ## fixup A - offset: 3, value: LCPI7_0-4, kind: reloc_riprel_4byte
    168 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
    169 ;
    170 ; X64-AVX1-LABEL: test_x86_sse41_packusdw_fold:
    171 ; X64-AVX1:       ## %bb.0:
    172 ; X64-AVX1-NEXT:    vmovaps {{.*#+}} xmm0 = [0,0,0,0,65535,65535,0,0]
    173 ; X64-AVX1-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
    174 ; X64-AVX1-NEXT:    ## fixup A - offset: 4, value: LCPI7_0-4, kind: reloc_riprel_4byte
    175 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
    176 ;
    177 ; X64-AVX512-LABEL: test_x86_sse41_packusdw_fold:
    178 ; X64-AVX512:       ## %bb.0:
    179 ; X64-AVX512-NEXT:    vmovaps {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [0,0,0,0,65535,65535,0,0]
    180 ; X64-AVX512-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x05,A,A,A,A]
    181 ; X64-AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI7_0-4, kind: reloc_riprel_4byte
    182 ; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
    183   %res = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> zeroinitializer, <4 x i32> <i32 65535, i32 65536, i32 -1, i32 -131072>)
    184   ret <8 x i16> %res
    185 }
    186 
    187 
    188 define <16 x i8> @test_x86_sse41_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
    189 ; SSE-LABEL: test_x86_sse41_pblendvb:
    190 ; SSE:       ## %bb.0:
    191 ; SSE-NEXT:    movdqa %xmm0, %xmm3 ## encoding: [0x66,0x0f,0x6f,0xd8]
    192 ; SSE-NEXT:    movaps %xmm2, %xmm0 ## encoding: [0x0f,0x28,0xc2]
    193 ; SSE-NEXT:    pblendvb %xmm0, %xmm1, %xmm3 ## encoding: [0x66,0x0f,0x38,0x10,0xd9]
    194 ; SSE-NEXT:    movdqa %xmm3, %xmm0 ## encoding: [0x66,0x0f,0x6f,0xc3]
    195 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    196 ;
    197 ; AVX-LABEL: test_x86_sse41_pblendvb:
    198 ; AVX:       ## %bb.0:
    199 ; AVX-NEXT:    vpblendvb %xmm2, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x4c,0xc1,0x20]
    200 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    201   %res = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) ; <<16 x i8>> [#uses=1]
    202   ret <16 x i8> %res
    203 }
    204 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
    205 
    206 
    207 define <8 x i16> @test_x86_sse41_phminposuw(<8 x i16> %a0) {
    208 ; SSE-LABEL: test_x86_sse41_phminposuw:
    209 ; SSE:       ## %bb.0:
    210 ; SSE-NEXT:    phminposuw %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x41,0xc0]
    211 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    212 ;
    213 ; AVX-LABEL: test_x86_sse41_phminposuw:
    214 ; AVX:       ## %bb.0:
    215 ; AVX-NEXT:    vphminposuw %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x41,0xc0]
    216 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    217   %res = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) ; <<8 x i16>> [#uses=1]
    218   ret <8 x i16> %res
    219 }
    220 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
    221 
    222 
    223 define <16 x i8> @test_x86_sse41_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
    224 ; SSE-LABEL: test_x86_sse41_pmaxsb:
    225 ; SSE:       ## %bb.0:
    226 ; SSE-NEXT:    pmaxsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3c,0xc1]
    227 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    228 ;
    229 ; AVX1-LABEL: test_x86_sse41_pmaxsb:
    230 ; AVX1:       ## %bb.0:
    231 ; AVX1-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3c,0xc1]
    232 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    233 ;
    234 ; AVX512-LABEL: test_x86_sse41_pmaxsb:
    235 ; AVX512:       ## %bb.0:
    236 ; AVX512-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3c,0xc1]
    237 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    238   %res = call <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    239   ret <16 x i8> %res
    240 }
    241 declare <16 x i8> @llvm.x86.sse41.pmaxsb(<16 x i8>, <16 x i8>) nounwind readnone
    242 
    243 
    244 define <4 x i32> @test_x86_sse41_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
    245 ; SSE-LABEL: test_x86_sse41_pmaxsd:
    246 ; SSE:       ## %bb.0:
    247 ; SSE-NEXT:    pmaxsd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3d,0xc1]
    248 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    249 ;
    250 ; AVX1-LABEL: test_x86_sse41_pmaxsd:
    251 ; AVX1:       ## %bb.0:
    252 ; AVX1-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3d,0xc1]
    253 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    254 ;
    255 ; AVX512-LABEL: test_x86_sse41_pmaxsd:
    256 ; AVX512:       ## %bb.0:
    257 ; AVX512-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3d,0xc1]
    258 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    259   %res = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    260   ret <4 x i32> %res
    261 }
    262 declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
    263 
    264 
    265 define <4 x i32> @test_x86_sse41_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
    266 ; SSE-LABEL: test_x86_sse41_pmaxud:
    267 ; SSE:       ## %bb.0:
    268 ; SSE-NEXT:    pmaxud %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3f,0xc1]
    269 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    270 ;
    271 ; AVX1-LABEL: test_x86_sse41_pmaxud:
    272 ; AVX1:       ## %bb.0:
    273 ; AVX1-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3f,0xc1]
    274 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    275 ;
    276 ; AVX512-LABEL: test_x86_sse41_pmaxud:
    277 ; AVX512:       ## %bb.0:
    278 ; AVX512-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3f,0xc1]
    279 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    280   %res = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    281   ret <4 x i32> %res
    282 }
    283 declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
    284 
    285 
    286 define <8 x i16> @test_x86_sse41_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
    287 ; SSE-LABEL: test_x86_sse41_pmaxuw:
    288 ; SSE:       ## %bb.0:
    289 ; SSE-NEXT:    pmaxuw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3e,0xc1]
    290 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    291 ;
    292 ; AVX1-LABEL: test_x86_sse41_pmaxuw:
    293 ; AVX1:       ## %bb.0:
    294 ; AVX1-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3e,0xc1]
    295 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    296 ;
    297 ; AVX512-LABEL: test_x86_sse41_pmaxuw:
    298 ; AVX512:       ## %bb.0:
    299 ; AVX512-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xc1]
    300 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    301   %res = call <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    302   ret <8 x i16> %res
    303 }
    304 declare <8 x i16> @llvm.x86.sse41.pmaxuw(<8 x i16>, <8 x i16>) nounwind readnone
    305 
    306 
    307 define <16 x i8> @test_x86_sse41_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
    308 ; SSE-LABEL: test_x86_sse41_pminsb:
    309 ; SSE:       ## %bb.0:
    310 ; SSE-NEXT:    pminsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x38,0xc1]
    311 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    312 ;
    313 ; AVX1-LABEL: test_x86_sse41_pminsb:
    314 ; AVX1:       ## %bb.0:
    315 ; AVX1-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x38,0xc1]
    316 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    317 ;
    318 ; AVX512-LABEL: test_x86_sse41_pminsb:
    319 ; AVX512:       ## %bb.0:
    320 ; AVX512-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x38,0xc1]
    321 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    322   %res = call <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1]
    323   ret <16 x i8> %res
    324 }
    325 declare <16 x i8> @llvm.x86.sse41.pminsb(<16 x i8>, <16 x i8>) nounwind readnone
    326 
    327 
    328 define <4 x i32> @test_x86_sse41_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
    329 ; SSE-LABEL: test_x86_sse41_pminsd:
    330 ; SSE:       ## %bb.0:
    331 ; SSE-NEXT:    pminsd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x39,0xc1]
    332 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    333 ;
    334 ; AVX1-LABEL: test_x86_sse41_pminsd:
    335 ; AVX1:       ## %bb.0:
    336 ; AVX1-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x39,0xc1]
    337 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    338 ;
    339 ; AVX512-LABEL: test_x86_sse41_pminsd:
    340 ; AVX512:       ## %bb.0:
    341 ; AVX512-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x39,0xc1]
    342 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    343   %res = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    344   ret <4 x i32> %res
    345 }
    346 declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
    347 
    348 
    349 define <4 x i32> @test_x86_sse41_pminud(<4 x i32> %a0, <4 x i32> %a1) {
    350 ; SSE-LABEL: test_x86_sse41_pminud:
    351 ; SSE:       ## %bb.0:
    352 ; SSE-NEXT:    pminud %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3b,0xc1]
    353 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    354 ;
    355 ; AVX1-LABEL: test_x86_sse41_pminud:
    356 ; AVX1:       ## %bb.0:
    357 ; AVX1-NEXT:    vpminud %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3b,0xc1]
    358 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    359 ;
    360 ; AVX512-LABEL: test_x86_sse41_pminud:
    361 ; AVX512:       ## %bb.0:
    362 ; AVX512-NEXT:    vpminud %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3b,0xc1]
    363 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    364   %res = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1]
    365   ret <4 x i32> %res
    366 }
    367 declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
    368 
    369 
    370 define <8 x i16> @test_x86_sse41_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
    371 ; SSE-LABEL: test_x86_sse41_pminuw:
    372 ; SSE:       ## %bb.0:
    373 ; SSE-NEXT:    pminuw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x3a,0xc1]
    374 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    375 ;
    376 ; AVX1-LABEL: test_x86_sse41_pminuw:
    377 ; AVX1:       ## %bb.0:
    378 ; AVX1-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x3a,0xc1]
    379 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    380 ;
    381 ; AVX512-LABEL: test_x86_sse41_pminuw:
    382 ; AVX512:       ## %bb.0:
    383 ; AVX512-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xc1]
    384 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    385   %res = call <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1]
    386   ret <8 x i16> %res
    387 }
    388 declare <8 x i16> @llvm.x86.sse41.pminuw(<8 x i16>, <8 x i16>) nounwind readnone
    389 
    390 
    391 define i32 @test_x86_sse41_ptestc(<2 x i64> %a0, <2 x i64> %a1) {
    392 ; SSE-LABEL: test_x86_sse41_ptestc:
    393 ; SSE:       ## %bb.0:
    394 ; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
    395 ; SSE-NEXT:    ptest %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x17,0xc1]
    396 ; SSE-NEXT:    setb %al ## encoding: [0x0f,0x92,0xc0]
    397 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    398 ;
    399 ; AVX-LABEL: test_x86_sse41_ptestc:
    400 ; AVX:       ## %bb.0:
    401 ; AVX-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
    402 ; AVX-NEXT:    vptest %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x17,0xc1]
    403 ; AVX-NEXT:    setb %al ## encoding: [0x0f,0x92,0xc0]
    404 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    405   %res = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
    406   ret i32 %res
    407 }
    408 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
    409 
    410 
    411 define i32 @test_x86_sse41_ptestnzc(<2 x i64> %a0, <2 x i64> %a1) {
    412 ; SSE-LABEL: test_x86_sse41_ptestnzc:
    413 ; SSE:       ## %bb.0:
    414 ; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
    415 ; SSE-NEXT:    ptest %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x17,0xc1]
    416 ; SSE-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
    417 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    418 ;
    419 ; AVX-LABEL: test_x86_sse41_ptestnzc:
    420 ; AVX:       ## %bb.0:
    421 ; AVX-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
    422 ; AVX-NEXT:    vptest %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x17,0xc1]
    423 ; AVX-NEXT:    seta %al ## encoding: [0x0f,0x97,0xc0]
    424 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    425   %res = call i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
    426   ret i32 %res
    427 }
    428 declare i32 @llvm.x86.sse41.ptestnzc(<2 x i64>, <2 x i64>) nounwind readnone
    429 
    430 
    431 define i32 @test_x86_sse41_ptestz(<2 x i64> %a0, <2 x i64> %a1) {
    432 ; SSE-LABEL: test_x86_sse41_ptestz:
    433 ; SSE:       ## %bb.0:
    434 ; SSE-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
    435 ; SSE-NEXT:    ptest %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x17,0xc1]
    436 ; SSE-NEXT:    sete %al ## encoding: [0x0f,0x94,0xc0]
    437 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    438 ;
    439 ; AVX-LABEL: test_x86_sse41_ptestz:
    440 ; AVX:       ## %bb.0:
    441 ; AVX-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
    442 ; AVX-NEXT:    vptest %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x17,0xc1]
    443 ; AVX-NEXT:    sete %al ## encoding: [0x0f,0x94,0xc0]
    444 ; AVX-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    445   %res = call i32 @llvm.x86.sse41.ptestz(<2 x i64> %a0, <2 x i64> %a1) ; <i32> [#uses=1]
    446   ret i32 %res
    447 }
    448 declare i32 @llvm.x86.sse41.ptestz(<2 x i64>, <2 x i64>) nounwind readnone
    449 
    450 
    451 define <2 x double> @test_x86_sse41_round_pd(<2 x double> %a0) {
    452 ; SSE-LABEL: test_x86_sse41_round_pd:
    453 ; SSE:       ## %bb.0:
    454 ; SSE-NEXT:    roundpd $7, %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x09,0xc0,0x07]
    455 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    456 ;
    457 ; AVX1-LABEL: test_x86_sse41_round_pd:
    458 ; AVX1:       ## %bb.0:
    459 ; AVX1-NEXT:    vroundpd $7, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x09,0xc0,0x07]
    460 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    461 ;
    462 ; AVX512-LABEL: test_x86_sse41_round_pd:
    463 ; AVX512:       ## %bb.0:
    464 ; AVX512-NEXT:    vroundpd $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x09,0xc0,0x07]
    465 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    466   %res = call <2 x double> @llvm.x86.sse41.round.pd(<2 x double> %a0, i32 7) ; <<2 x double>> [#uses=1]
    467   ret <2 x double> %res
    468 }
    469 declare <2 x double> @llvm.x86.sse41.round.pd(<2 x double>, i32) nounwind readnone
    470 
    471 
    472 define <4 x float> @test_x86_sse41_round_ps(<4 x float> %a0) {
    473 ; SSE-LABEL: test_x86_sse41_round_ps:
    474 ; SSE:       ## %bb.0:
    475 ; SSE-NEXT:    roundps $7, %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x08,0xc0,0x07]
    476 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    477 ;
    478 ; AVX1-LABEL: test_x86_sse41_round_ps:
    479 ; AVX1:       ## %bb.0:
    480 ; AVX1-NEXT:    vroundps $7, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x08,0xc0,0x07]
    481 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    482 ;
    483 ; AVX512-LABEL: test_x86_sse41_round_ps:
    484 ; AVX512:       ## %bb.0:
    485 ; AVX512-NEXT:    vroundps $7, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x08,0xc0,0x07]
    486 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    487   %res = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %a0, i32 7) ; <<4 x float>> [#uses=1]
    488   ret <4 x float> %res
    489 }
    490 declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
    491 
    492 
    493 define <2 x double> @test_x86_sse41_round_sd(<2 x double> %a0, <2 x double> %a1) {
    494 ; SSE-LABEL: test_x86_sse41_round_sd:
    495 ; SSE:       ## %bb.0:
    496 ; SSE-NEXT:    roundsd $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0b,0xc1,0x07]
    497 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    498 ;
    499 ; AVX1-LABEL: test_x86_sse41_round_sd:
    500 ; AVX1:       ## %bb.0:
    501 ; AVX1-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0b,0xc1,0x07]
    502 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    503 ;
    504 ; AVX512-LABEL: test_x86_sse41_round_sd:
    505 ; AVX512:       ## %bb.0:
    506 ; AVX512-NEXT:    vroundsd $7, %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0b,0xc1,0x07]
    507 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    508   %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1, i32 7) ; <<2 x double>> [#uses=1]
    509   ret <2 x double> %res
    510 }
    511 declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
    512 
    513 
    514 define <2 x double> @test_x86_sse41_round_sd_load(<2 x double> %a0, <2 x double>* %a1) {
    515 ; X86-SSE-LABEL: test_x86_sse41_round_sd_load:
    516 ; X86-SSE:       ## %bb.0:
    517 ; X86-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    518 ; X86-SSE-NEXT:    roundsd $7, (%eax), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0b,0x00,0x07]
    519 ; X86-SSE-NEXT:    retl ## encoding: [0xc3]
    520 ;
    521 ; X86-AVX1-LABEL: test_x86_sse41_round_sd_load:
    522 ; X86-AVX1:       ## %bb.0:
    523 ; X86-AVX1-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    524 ; X86-AVX1-NEXT:    vroundsd $7, (%eax), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0b,0x00,0x07]
    525 ; X86-AVX1-NEXT:    retl ## encoding: [0xc3]
    526 ;
    527 ; X86-AVX512-LABEL: test_x86_sse41_round_sd_load:
    528 ; X86-AVX512:       ## %bb.0:
    529 ; X86-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    530 ; X86-AVX512-NEXT:    vroundsd $7, (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0b,0x00,0x07]
    531 ; X86-AVX512-NEXT:    retl ## encoding: [0xc3]
    532 ;
    533 ; X64-SSE-LABEL: test_x86_sse41_round_sd_load:
    534 ; X64-SSE:       ## %bb.0:
    535 ; X64-SSE-NEXT:    roundsd $7, (%rdi), %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0b,0x07,0x07]
    536 ; X64-SSE-NEXT:    retq ## encoding: [0xc3]
    537 ;
    538 ; X64-AVX1-LABEL: test_x86_sse41_round_sd_load:
    539 ; X64-AVX1:       ## %bb.0:
    540 ; X64-AVX1-NEXT:    vroundsd $7, (%rdi), %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0b,0x07,0x07]
    541 ; X64-AVX1-NEXT:    retq ## encoding: [0xc3]
    542 ;
    543 ; X64-AVX512-LABEL: test_x86_sse41_round_sd_load:
    544 ; X64-AVX512:       ## %bb.0:
    545 ; X64-AVX512-NEXT:    vroundsd $7, (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0b,0x07,0x07]
    546 ; X64-AVX512-NEXT:    retq ## encoding: [0xc3]
    547   %a1b = load <2 x double>, <2 x double>* %a1
    548   %res = call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a0, <2 x double> %a1b, i32 7) ; <<2 x double>> [#uses=1]
    549   ret <2 x double> %res
    550 }
    551 
    552 
    553 define <4 x float> @test_x86_sse41_round_ss(<4 x float> %a0, <4 x float> %a1) {
    554 ; SSE-LABEL: test_x86_sse41_round_ss:
    555 ; SSE:       ## %bb.0:
    556 ; SSE-NEXT:    roundss $7, %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x3a,0x0a,0xc1,0x07]
    557 ; SSE-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    558 ;
    559 ; AVX1-LABEL: test_x86_sse41_round_ss:
    560 ; AVX1:       ## %bb.0:
    561 ; AVX1-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0a,0xc1,0x07]
    562 ; AVX1-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    563 ;
    564 ; AVX512-LABEL: test_x86_sse41_round_ss:
    565 ; AVX512:       ## %bb.0:
    566 ; AVX512-NEXT:    vroundss $7, %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0a,0xc1,0x07]
    567 ; AVX512-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    568   %res = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %a0, <4 x float> %a1, i32 7) ; <<4 x float>> [#uses=1]
    569   ret <4 x float> %res
    570 }
    571 declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
    572