Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mcpu=corei7   | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 | FileCheck %s --check-prefix=X64
      4 
      5 ; Make sure that we don't crash when legalizing vselect and vsetcc and that
      6 ; we are able to generate vector blend instructions.
      7 
      8 define void @simple_widen(<2 x float> %a, <2 x float> %b) {
      9 ; X32-LABEL: simple_widen:
     10 ; X32:       # %bb.0: # %entry
     11 ; X32-NEXT:    extractps $1, %xmm1, (%eax)
     12 ; X32-NEXT:    movss %xmm1, (%eax)
     13 ; X32-NEXT:    retl
     14 ;
     15 ; X64-LABEL: simple_widen:
     16 ; X64:       # %bb.0: # %entry
     17 ; X64-NEXT:    movlps %xmm1, (%rax)
     18 ; X64-NEXT:    retq
     19 entry:
     20   %0 = select <2 x i1> undef, <2 x float> %a, <2 x float> %b
     21   store <2 x float> %0, <2 x float>* undef
     22   ret void
     23 }
     24 
     25 define void @complex_inreg_work(<2 x float> %a, <2 x float> %b) {
     26 ; X32-LABEL: complex_inreg_work:
     27 ; X32:       # %bb.0: # %entry
     28 ; X32-NEXT:    movaps %xmm0, %xmm2
     29 ; X32-NEXT:    cmpordps %xmm0, %xmm0
     30 ; X32-NEXT:    blendvps %xmm0, %xmm2, %xmm1
     31 ; X32-NEXT:    extractps $1, %xmm1, (%eax)
     32 ; X32-NEXT:    movss %xmm1, (%eax)
     33 ; X32-NEXT:    retl
     34 ;
     35 ; X64-LABEL: complex_inreg_work:
     36 ; X64:       # %bb.0: # %entry
     37 ; X64-NEXT:    movaps %xmm0, %xmm2
     38 ; X64-NEXT:    cmpordps %xmm0, %xmm0
     39 ; X64-NEXT:    blendvps %xmm0, %xmm2, %xmm1
     40 ; X64-NEXT:    movlps %xmm1, (%rax)
     41 ; X64-NEXT:    retq
     42 entry:
     43   %0 = fcmp oeq <2 x float> undef, undef
     44   %1 = select <2 x i1> %0, <2 x float> %a, <2 x float> %b
     45   store <2 x float> %1, <2 x float>* undef
     46   ret void
     47 }
     48 
     49 define void @zero_test() {
     50 ; X32-LABEL: zero_test:
     51 ; X32:       # %bb.0: # %entry
     52 ; X32-NEXT:    xorps %xmm0, %xmm0
     53 ; X32-NEXT:    extractps $1, %xmm0, (%eax)
     54 ; X32-NEXT:    movss %xmm0, (%eax)
     55 ; X32-NEXT:    retl
     56 ;
     57 ; X64-LABEL: zero_test:
     58 ; X64:       # %bb.0: # %entry
     59 ; X64-NEXT:    xorps %xmm0, %xmm0
     60 ; X64-NEXT:    movlps %xmm0, (%rax)
     61 ; X64-NEXT:    retq
     62 entry:
     63   %0 = select <2 x i1> undef, <2 x float> undef, <2 x float> zeroinitializer
     64   store <2 x float> %0, <2 x float>* undef
     65   ret void
     66 }
     67 
     68 define void @full_test() {
     69 ; X32-LABEL: full_test:
     70 ; X32:       # %bb.0: # %entry
     71 ; X32-NEXT:    subl $60, %esp
     72 ; X32-NEXT:    .cfi_def_cfa_offset 64
     73 ; X32-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
     74 ; X32-NEXT:    cvttps2dq %xmm2, %xmm0
     75 ; X32-NEXT:    cvtdq2ps %xmm0, %xmm1
     76 ; X32-NEXT:    xorps %xmm0, %xmm0
     77 ; X32-NEXT:    cmpltps %xmm2, %xmm0
     78 ; X32-NEXT:    movaps {{.*#+}} xmm3 = <1,1,u,u>
     79 ; X32-NEXT:    addps %xmm1, %xmm3
     80 ; X32-NEXT:    movaps %xmm1, %xmm4
     81 ; X32-NEXT:    blendvps %xmm0, %xmm3, %xmm4
     82 ; X32-NEXT:    cmpeqps %xmm2, %xmm1
     83 ; X32-NEXT:    movaps %xmm1, %xmm0
     84 ; X32-NEXT:    blendvps %xmm0, %xmm2, %xmm4
     85 ; X32-NEXT:    movss %xmm4, {{[0-9]+}}(%esp)
     86 ; X32-NEXT:    movshdup {{.*#+}} xmm0 = xmm4[1,1,3,3]
     87 ; X32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
     88 ; X32-NEXT:    movss %xmm4, {{[0-9]+}}(%esp)
     89 ; X32-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
     90 ; X32-NEXT:    addl $60, %esp
     91 ; X32-NEXT:    .cfi_def_cfa_offset 4
     92 ; X32-NEXT:    retl
     93 ;
     94 ; X64-LABEL: full_test:
     95 ; X64:       # %bb.0: # %entry
     96 ; X64-NEXT:    movsd {{.*#+}} xmm2 = mem[0],zero
     97 ; X64-NEXT:    cvttps2dq %xmm2, %xmm0
     98 ; X64-NEXT:    cvtdq2ps %xmm0, %xmm1
     99 ; X64-NEXT:    xorps %xmm0, %xmm0
    100 ; X64-NEXT:    cmpltps %xmm2, %xmm0
    101 ; X64-NEXT:    movaps {{.*#+}} xmm3 = <1,1,u,u>
    102 ; X64-NEXT:    addps %xmm1, %xmm3
    103 ; X64-NEXT:    movaps %xmm1, %xmm4
    104 ; X64-NEXT:    blendvps %xmm0, %xmm3, %xmm4
    105 ; X64-NEXT:    cmpeqps %xmm2, %xmm1
    106 ; X64-NEXT:    movaps %xmm1, %xmm0
    107 ; X64-NEXT:    blendvps %xmm0, %xmm2, %xmm4
    108 ; X64-NEXT:    movlps %xmm4, -{{[0-9]+}}(%rsp)
    109 ; X64-NEXT:    movlps %xmm4, -{{[0-9]+}}(%rsp)
    110 ; X64-NEXT:    retq
    111  entry:
    112    %Cy300 = alloca <4 x float>
    113    %Cy11a = alloca <2 x float>
    114    %Cy118 = alloca <2 x float>
    115    %Cy119 = alloca <2 x float>
    116    br label %B1
    117 
    118  B1:                                               ; preds = %entry
    119    %0 = load <2 x float>, <2 x float>* %Cy119
    120    %1 = fptosi <2 x float> %0 to <2 x i32>
    121    %2 = sitofp <2 x i32> %1 to <2 x float>
    122    %3 = fcmp ogt <2 x float> %0, zeroinitializer
    123    %4 = fadd <2 x float> %2, <float 1.000000e+00, float 1.000000e+00>
    124    %5 = select <2 x i1> %3, <2 x float> %4, <2 x float> %2
    125    %6 = fcmp oeq <2 x float> %2, %0
    126    %7 = select <2 x i1> %6, <2 x float> %0, <2 x float> %5
    127    store <2 x float> %7, <2 x float>* %Cy118
    128    %8 = load <2 x float>, <2 x float>* %Cy118
    129    store <2 x float> %8, <2 x float>* %Cy11a
    130    ret void
    131 }
    132