Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE
      3 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX
      4 ; RUN: llc -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX
      5 
      6 ; Verify we fold loads into unary sse intrinsics only when optimizing for size
      7 
      8 define float @rcpss(float* %a) {
      9 ; SSE-LABEL: rcpss:
     10 ; SSE:       # %bb.0:
     11 ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     12 ; SSE-NEXT:    rcpss %xmm0, %xmm0
     13 ; SSE-NEXT:    retq
     14 ;
     15 ; AVX-LABEL: rcpss:
     16 ; AVX:       # %bb.0:
     17 ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     18 ; AVX-NEXT:    vrcpss %xmm0, %xmm0, %xmm0
     19 ; AVX-NEXT:    retq
     20     %ld = load float, float* %a
     21     %ins = insertelement <4 x float> undef, float %ld, i32 0
     22     %res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ins)
     23     %ext = extractelement <4 x float> %res, i32 0
     24     ret float %ext
     25 }
     26 
     27 define float @rsqrtss(float* %a) {
     28 ; SSE-LABEL: rsqrtss:
     29 ; SSE:       # %bb.0:
     30 ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     31 ; SSE-NEXT:    rsqrtss %xmm0, %xmm0
     32 ; SSE-NEXT:    retq
     33 ;
     34 ; AVX-LABEL: rsqrtss:
     35 ; AVX:       # %bb.0:
     36 ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     37 ; AVX-NEXT:    vrsqrtss %xmm0, %xmm0, %xmm0
     38 ; AVX-NEXT:    retq
     39     %ld = load float, float* %a
     40     %ins = insertelement <4 x float> undef, float %ld, i32 0
     41     %res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ins)
     42     %ext = extractelement <4 x float> %res, i32 0
     43     ret float %ext
     44 }
     45 
     46 define float @sqrtss(float* %a) {
     47 ; SSE-LABEL: sqrtss:
     48 ; SSE:       # %bb.0:
     49 ; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     50 ; SSE-NEXT:    sqrtss %xmm0, %xmm0
     51 ; SSE-NEXT:    retq
     52 ;
     53 ; AVX-LABEL: sqrtss:
     54 ; AVX:       # %bb.0:
     55 ; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
     56 ; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
     57 ; AVX-NEXT:    retq
     58     %ld = load float, float* %a
     59     %ins = insertelement <4 x float> undef, float %ld, i32 0
     60     %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ins)
     61     %ext = extractelement <4 x float> %res, i32 0
     62     ret float %ext
     63 }
     64 
     65 define double @sqrtsd(double* %a) {
     66 ; SSE-LABEL: sqrtsd:
     67 ; SSE:       # %bb.0:
     68 ; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
     69 ; SSE-NEXT:    sqrtsd %xmm0, %xmm0
     70 ; SSE-NEXT:    retq
     71 ;
     72 ; AVX-LABEL: sqrtsd:
     73 ; AVX:       # %bb.0:
     74 ; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
     75 ; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
     76 ; AVX-NEXT:    retq
     77     %ld = load double, double* %a
     78     %ins = insertelement <2 x double> undef, double %ld, i32 0
     79     %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ins)
     80     %ext = extractelement <2 x double> %res, i32 0
     81     ret double %ext
     82 }
     83 
     84 define float @rcpss_size(float* %a) optsize {
     85 ; SSE-LABEL: rcpss_size:
     86 ; SSE:       # %bb.0:
     87 ; SSE-NEXT:    rcpss (%rdi), %xmm0
     88 ; SSE-NEXT:    retq
     89 ;
     90 ; AVX-LABEL: rcpss_size:
     91 ; AVX:       # %bb.0:
     92 ; AVX-NEXT:    vrcpss (%rdi), %xmm0, %xmm0
     93 ; AVX-NEXT:    retq
     94     %ld = load float, float* %a
     95     %ins = insertelement <4 x float> undef, float %ld, i32 0
     96     %res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ins)
     97     %ext = extractelement <4 x float> %res, i32 0
     98     ret float %ext
     99 }
    100 
    101 define <4 x float> @rcpss_full_size(<4 x float>* %a) optsize {
    102 ; SSE-LABEL: rcpss_full_size:
    103 ; SSE:       # %bb.0:
    104 ; SSE-NEXT:    rcpss (%rdi), %xmm0
    105 ; SSE-NEXT:    retq
    106 ;
    107 ; AVX-LABEL: rcpss_full_size:
    108 ; AVX:       # %bb.0:
    109 ; AVX-NEXT:    vrcpss (%rdi), %xmm0, %xmm0
    110 ; AVX-NEXT:    retq
    111     %ld = load <4 x float>, <4 x float>* %a
    112     %res = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %ld)
    113     ret <4 x float> %res
    114 }
    115 
    116 define float @rsqrtss_size(float* %a) optsize {
    117 ; SSE-LABEL: rsqrtss_size:
    118 ; SSE:       # %bb.0:
    119 ; SSE-NEXT:    rsqrtss (%rdi), %xmm0
    120 ; SSE-NEXT:    retq
    121 ;
    122 ; AVX-LABEL: rsqrtss_size:
    123 ; AVX:       # %bb.0:
    124 ; AVX-NEXT:    vrsqrtss (%rdi), %xmm0, %xmm0
    125 ; AVX-NEXT:    retq
    126     %ld = load float, float* %a
    127     %ins = insertelement <4 x float> undef, float %ld, i32 0
    128     %res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ins)
    129     %ext = extractelement <4 x float> %res, i32 0
    130     ret float %ext
    131 }
    132 
    133 define <4 x float> @rsqrtss_full_size(<4 x float>* %a) optsize {
    134 ; SSE-LABEL: rsqrtss_full_size:
    135 ; SSE:       # %bb.0:
    136 ; SSE-NEXT:    rsqrtss (%rdi), %xmm0
    137 ; SSE-NEXT:    retq
    138 ;
    139 ; AVX-LABEL: rsqrtss_full_size:
    140 ; AVX:       # %bb.0:
    141 ; AVX-NEXT:    vrsqrtss (%rdi), %xmm0, %xmm0
    142 ; AVX-NEXT:    retq
    143     %ld = load <4 x float>, <4 x float>* %a
    144     %res = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %ld)
    145     ret <4 x float> %res
    146 }
    147 
    148 define float @sqrtss_size(float* %a) optsize{
    149 ; SSE-LABEL: sqrtss_size:
    150 ; SSE:       # %bb.0:
    151 ; SSE-NEXT:    sqrtss (%rdi), %xmm0
    152 ; SSE-NEXT:    retq
    153 ;
    154 ; AVX-LABEL: sqrtss_size:
    155 ; AVX:       # %bb.0:
    156 ; AVX-NEXT:    vsqrtss (%rdi), %xmm0, %xmm0
    157 ; AVX-NEXT:    retq
    158     %ld = load float, float* %a
    159     %ins = insertelement <4 x float> undef, float %ld, i32 0
    160     %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ins)
    161     %ext = extractelement <4 x float> %res, i32 0
    162     ret float %ext
    163 }
    164 
    165 define <4 x float> @sqrtss_full_size(<4 x float>* %a) optsize{
    166 ; SSE-LABEL: sqrtss_full_size:
    167 ; SSE:       # %bb.0:
    168 ; SSE-NEXT:    movaps (%rdi), %xmm0
    169 ; SSE-NEXT:    sqrtss %xmm0, %xmm0
    170 ; SSE-NEXT:    retq
    171 ;
    172 ; AVX-LABEL: sqrtss_full_size:
    173 ; AVX:       # %bb.0:
    174 ; AVX-NEXT:    vmovaps (%rdi), %xmm0
    175 ; AVX-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
    176 ; AVX-NEXT:    retq
    177     %ld = load <4 x float>, <4 x float>* %a
    178     %res = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %ld)
    179     ret <4 x float> %res
    180 }
    181 
    182 define double @sqrtsd_size(double* %a) optsize {
    183 ; SSE-LABEL: sqrtsd_size:
    184 ; SSE:       # %bb.0:
    185 ; SSE-NEXT:    sqrtsd (%rdi), %xmm0
    186 ; SSE-NEXT:    retq
    187 ;
    188 ; AVX-LABEL: sqrtsd_size:
    189 ; AVX:       # %bb.0:
    190 ; AVX-NEXT:    vsqrtsd (%rdi), %xmm0, %xmm0
    191 ; AVX-NEXT:    retq
    192     %ld = load double, double* %a
    193     %ins = insertelement <2 x double> undef, double %ld, i32 0
    194     %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ins)
    195     %ext = extractelement <2 x double> %res, i32 0
    196     ret double %ext
    197 }
    198 
    199 define <2 x double> @sqrtsd_full_size(<2 x double>* %a) optsize {
    200 ; SSE-LABEL: sqrtsd_full_size:
    201 ; SSE:       # %bb.0:
    202 ; SSE-NEXT:    movapd (%rdi), %xmm0
    203 ; SSE-NEXT:    sqrtsd %xmm0, %xmm0
    204 ; SSE-NEXT:    retq
    205 ;
    206 ; AVX-LABEL: sqrtsd_full_size:
    207 ; AVX:       # %bb.0:
    208 ; AVX-NEXT:    vmovapd (%rdi), %xmm0
    209 ; AVX-NEXT:    vsqrtsd %xmm0, %xmm0, %xmm0
    210 ; AVX-NEXT:    retq
    211     %ld = load <2 x double>, <2 x double>* %a
    212     %res = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %ld)
    213     ret <2 x double> %res
    214 }
    215 
    216 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
    217 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
    218 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
    219 declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
    220