Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
      3 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
      4 
      5 ; Verify we fold loads into unary sse intrinsics only when optimizing for size
      6 
      7 define float @rcpss(float* %a) {
      8 ; SSE-LABEL: rcpss:
      9 ; SSE:       # BB#0:
     10 ; SSE-NEXT:    movss (%rdi), %xmm0
     11 ; SSE-NEXT:    rcpss %xmm0, %xmm0
     12 ; SSE-NEXT:    retq
     13 ;
     14 ; AVX-LABEL: rcpss:
     15 ; AVX:       # BB#0:
     16 ; AVX-NEXT:    vmovss (%rdi), %xmm0
     17 ; AVX-NEXT:    vrcpss %xmm0, %xmm0, %xmm0
     18 ; AVX-NEXT:    retq
     19     %ld = load float, float* %a
     20     %ins = insertelement <4 x float> undef, float %ld, i32 0
     21     %res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ins)
     22     %ext = extractelement <4 x float> %res, i32 0
     23     ret float %ext
     24 }
     25 
     26 define float @rsqrtss(float* %a) {
     27 ; SSE-LABEL: rsqrtss:
     28 ; SSE:       # BB#0:
     29 ; SSE-NEXT:    movss (%rdi), %xmm0
     30 ; SSE-NEXT:    rsqrtss %xmm0, %xmm0
     31 ; SSE-NEXT:    retq
     32 ;
     33 ; AVX-LABEL: rsqrtss:
     34 ; AVX:       # BB#0:
     35 ; AVX-NEXT:    vmovss (%rdi), %xmm0
     36 ; AVX-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0
     37 ; AVX-NEXT:    retq
     38     %ld = load float, float* %a
     39     %ins = insertelement <4 x float> undef, float %ld, i32 0
     40     %res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ins)
     41     %ext = extractelement <4 x float> %res, i32 0
     42     ret float %ext
     43 }
     44 
     45 define float @sqrtss(float* %a) {
     46 ; SSE-LABEL: sqrtss:
     47 ; SSE:       # BB#0:
     48 ; SSE-NEXT:    movss (%rdi), %xmm0
     49 ; SSE-NEXT:    sqrtss %xmm0, %xmm0
     50 ; SSE-NEXT:    retq
     51 ;
     52 ; AVX-LABEL: sqrtss:
     53 ; AVX:       # BB#0:
     54 ; AVX-NEXT:    vmovss (%rdi), %xmm0
     55 ; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
     56 ; AVX-NEXT:    retq
     57     %ld = load float, float* %a
     58     %ins = insertelement <4 x float> undef, float %ld, i32 0
     59     %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ins)
     60     %ext = extractelement <4 x float> %res, i32 0
     61     ret float %ext
     62 }
     63 
     64 define double @sqrtsd(double* %a) {
     65 ; SSE-LABEL: sqrtsd:
     66 ; SSE:       # BB#0:
     67 ; SSE-NEXT:    movsd (%rdi), %xmm0
     68 ; SSE-NEXT:    sqrtsd %xmm0, %xmm0
     69 ; SSE-NEXT:    retq
     70 ;
     71 ; AVX-LABEL: sqrtsd:
     72 ; AVX:       # BB#0:
     73 ; AVX-NEXT:    vmovsd (%rdi), %xmm0
     74 ; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
     75 ; AVX-NEXT:    retq
     76     %ld = load double, double* %a
     77     %ins = insertelement <2 x double> undef, double %ld, i32 0
     78     %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ins)
     79     %ext = extractelement <2 x double> %res, i32 0
     80     ret double %ext
     81 }
     82 
     83 define float @rcpss_size(float* %a) optsize {
     84 ; SSE-LABEL: rcpss_size:
     85 ; SSE:       # BB#0:
     86 ; SSE-NEXT:    rcpss (%rdi), %xmm0
     87 ; SSE-NEXT:    retq
     88 ;
     89 ; AVX-LABEL: rcpss_size:
     90 ; AVX:       # BB#0:
     91 ; AVX-NEXT:    vrcpss (%rdi), %xmm0, %xmm0
     92 ; AVX-NEXT:    retq
     93     %ld = load float, float* %a
     94     %ins = insertelement <4 x float> undef, float %ld, i32 0
     95     %res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ins)
     96     %ext = extractelement <4 x float> %res, i32 0
     97     ret float %ext
     98 }
     99 
    100 define float @rsqrtss_size(float* %a) optsize {
    101 ; SSE-LABEL: rsqrtss_size:
    102 ; SSE:       # BB#0:
    103 ; SSE-NEXT:    rsqrtss (%rdi), %xmm0
    104 ; SSE-NEXT:    retq
    105 ;
    106 ; AVX-LABEL: rsqrtss_size:
    107 ; AVX:       # BB#0:
    108 ; AVX-NEXT:    vrsqrtss (%rdi), %xmm0, %xmm0
    109 ; AVX-NEXT:    retq
    110     %ld = load float, float* %a
    111     %ins = insertelement <4 x float> undef, float %ld, i32 0
    112     %res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ins)
    113     %ext = extractelement <4 x float> %res, i32 0
    114     ret float %ext
    115 }
    116 
    117 define float @sqrtss_size(float* %a) optsize{
    118 ; SSE-LABEL: sqrtss_size:
    119 ; SSE:       # BB#0:
    120 ; SSE-NEXT:    sqrtss (%rdi), %xmm0
    121 ; SSE-NEXT:    retq
    122 ;
    123 ; AVX-LABEL: sqrtss_size:
    124 ; AVX:       # BB#0:
    125 ; AVX-NEXT:    vsqrtss (%rdi), %xmm0, %xmm0
    126 ; AVX-NEXT:    retq
    127     %ld = load float, float* %a
    128     %ins = insertelement <4 x float> undef, float %ld, i32 0
    129     %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ins)
    130     %ext = extractelement <4 x float> %res, i32 0
    131     ret float %ext
    132 }
    133 
    134 define double @sqrtsd_size(double* %a) optsize {
    135 ; SSE-LABEL: sqrtsd_size:
    136 ; SSE:       # BB#0:
    137 ; SSE-NEXT:    sqrtsd (%rdi), %xmm0
    138 ; SSE-NEXT:    retq
    139 ;
    140 ; AVX-LABEL: sqrtsd_size:
    141 ; AVX:       # BB#0:
    142 ; AVX-NEXT:    vsqrtsd (%rdi), %xmm0, %xmm0
    143 ; AVX-NEXT:    retq
    144     %ld = load double, double* %a
    145     %ins = insertelement <2 x double> undef, double %ld, i32 0
    146     %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ins)
    147     %ext = extractelement <2 x double> %res, i32 0
    148     ret double %ext
    149 }
    150 
    151 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
    152 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
    153 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
    154 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
    155