Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE
      3 ; RUN: llc < %s -mtriple=i686-unknown -mattr=+avx | FileCheck %s --check-prefix=X32-AVX
      4 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X64-SSE
      5 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64-AVX
      6 
      7 define void @fptrunc_frommem2(<2 x double>* %in, <2 x float>* %out) {
      8 ; X32-SSE-LABEL: fptrunc_frommem2:
      9 ; X32-SSE:       # %bb.0: # %entry
     10 ; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
     11 ; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     12 ; X32-SSE-NEXT:    cvtpd2ps (%ecx), %xmm0
     13 ; X32-SSE-NEXT:    extractps $1, %xmm0, 4(%eax)
     14 ; X32-SSE-NEXT:    movss %xmm0, (%eax)
     15 ; X32-SSE-NEXT:    retl
     16 ;
     17 ; X32-AVX-LABEL: fptrunc_frommem2:
     18 ; X32-AVX:       # %bb.0: # %entry
     19 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
     20 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     21 ; X32-AVX-NEXT:    vcvtpd2psx (%ecx), %xmm0
     22 ; X32-AVX-NEXT:    vextractps $1, %xmm0, 4(%eax)
     23 ; X32-AVX-NEXT:    vmovss %xmm0, (%eax)
     24 ; X32-AVX-NEXT:    retl
     25 ;
     26 ; X64-SSE-LABEL: fptrunc_frommem2:
     27 ; X64-SSE:       # %bb.0: # %entry
     28 ; X64-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0
     29 ; X64-SSE-NEXT:    movlpd %xmm0, (%rsi)
     30 ; X64-SSE-NEXT:    retq
     31 ;
     32 ; X64-AVX-LABEL: fptrunc_frommem2:
     33 ; X64-AVX:       # %bb.0: # %entry
     34 ; X64-AVX-NEXT:    vcvtpd2psx (%rdi), %xmm0
     35 ; X64-AVX-NEXT:    vmovlpd %xmm0, (%rsi)
     36 ; X64-AVX-NEXT:    retq
     37 entry:
     38   %0 = load <2 x double>, <2 x double>* %in
     39   %1 = fptrunc <2 x double> %0 to <2 x float>
     40   store <2 x float> %1, <2 x float>* %out, align 1
     41   ret void
     42 }
     43 
     44 define void @fptrunc_frommem4(<4 x double>* %in, <4 x float>* %out) {
     45 ; X32-SSE-LABEL: fptrunc_frommem4:
     46 ; X32-SSE:       # %bb.0: # %entry
     47 ; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
     48 ; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     49 ; X32-SSE-NEXT:    cvtpd2ps 16(%ecx), %xmm0
     50 ; X32-SSE-NEXT:    cvtpd2ps (%ecx), %xmm1
     51 ; X32-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
     52 ; X32-SSE-NEXT:    movupd %xmm1, (%eax)
     53 ; X32-SSE-NEXT:    retl
     54 ;
     55 ; X32-AVX-LABEL: fptrunc_frommem4:
     56 ; X32-AVX:       # %bb.0: # %entry
     57 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
     58 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     59 ; X32-AVX-NEXT:    vcvtpd2psy (%ecx), %xmm0
     60 ; X32-AVX-NEXT:    vmovupd %xmm0, (%eax)
     61 ; X32-AVX-NEXT:    retl
     62 ;
     63 ; X64-SSE-LABEL: fptrunc_frommem4:
     64 ; X64-SSE:       # %bb.0: # %entry
     65 ; X64-SSE-NEXT:    cvtpd2ps 16(%rdi), %xmm0
     66 ; X64-SSE-NEXT:    cvtpd2ps (%rdi), %xmm1
     67 ; X64-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
     68 ; X64-SSE-NEXT:    movupd %xmm1, (%rsi)
     69 ; X64-SSE-NEXT:    retq
     70 ;
     71 ; X64-AVX-LABEL: fptrunc_frommem4:
     72 ; X64-AVX:       # %bb.0: # %entry
     73 ; X64-AVX-NEXT:    vcvtpd2psy (%rdi), %xmm0
     74 ; X64-AVX-NEXT:    vmovupd %xmm0, (%rsi)
     75 ; X64-AVX-NEXT:    retq
     76 entry:
     77   %0 = load <4 x double>, <4 x double>* %in
     78   %1 = fptrunc <4 x double> %0 to <4 x float>
     79   store <4 x float> %1, <4 x float>* %out, align 1
     80   ret void
     81 }
     82 
     83 define void @fptrunc_frommem8(<8 x double>* %in, <8 x float>* %out) {
     84 ; X32-SSE-LABEL: fptrunc_frommem8:
     85 ; X32-SSE:       # %bb.0: # %entry
     86 ; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
     87 ; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
     88 ; X32-SSE-NEXT:    cvtpd2ps 16(%ecx), %xmm0
     89 ; X32-SSE-NEXT:    cvtpd2ps (%ecx), %xmm1
     90 ; X32-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
     91 ; X32-SSE-NEXT:    cvtpd2ps 48(%ecx), %xmm0
     92 ; X32-SSE-NEXT:    cvtpd2ps 32(%ecx), %xmm2
     93 ; X32-SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
     94 ; X32-SSE-NEXT:    movupd %xmm2, 16(%eax)
     95 ; X32-SSE-NEXT:    movupd %xmm1, (%eax)
     96 ; X32-SSE-NEXT:    retl
     97 ;
     98 ; X32-AVX-LABEL: fptrunc_frommem8:
     99 ; X32-AVX:       # %bb.0: # %entry
    100 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    101 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %ecx
    102 ; X32-AVX-NEXT:    vcvtpd2psy (%ecx), %xmm0
    103 ; X32-AVX-NEXT:    vcvtpd2psy 32(%ecx), %xmm1
    104 ; X32-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    105 ; X32-AVX-NEXT:    vmovups %ymm0, (%eax)
    106 ; X32-AVX-NEXT:    vzeroupper
    107 ; X32-AVX-NEXT:    retl
    108 ;
    109 ; X64-SSE-LABEL: fptrunc_frommem8:
    110 ; X64-SSE:       # %bb.0: # %entry
    111 ; X64-SSE-NEXT:    cvtpd2ps 16(%rdi), %xmm0
    112 ; X64-SSE-NEXT:    cvtpd2ps (%rdi), %xmm1
    113 ; X64-SSE-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    114 ; X64-SSE-NEXT:    cvtpd2ps 48(%rdi), %xmm0
    115 ; X64-SSE-NEXT:    cvtpd2ps 32(%rdi), %xmm2
    116 ; X64-SSE-NEXT:    unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm0[0]
    117 ; X64-SSE-NEXT:    movupd %xmm2, 16(%rsi)
    118 ; X64-SSE-NEXT:    movupd %xmm1, (%rsi)
    119 ; X64-SSE-NEXT:    retq
    120 ;
    121 ; X64-AVX-LABEL: fptrunc_frommem8:
    122 ; X64-AVX:       # %bb.0: # %entry
    123 ; X64-AVX-NEXT:    vcvtpd2psy (%rdi), %xmm0
    124 ; X64-AVX-NEXT:    vcvtpd2psy 32(%rdi), %xmm1
    125 ; X64-AVX-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    126 ; X64-AVX-NEXT:    vmovups %ymm0, (%rsi)
    127 ; X64-AVX-NEXT:    vzeroupper
    128 ; X64-AVX-NEXT:    retq
    129 entry:
    130   %0 = load <8 x double>, <8 x double>* %in
    131   %1 = fptrunc <8 x double> %0 to <8 x float>
    132   store <8 x float> %1, <8 x float>* %out, align 1
    133   ret void
    134 }
    135 
    136 define <4 x float> @fptrunc_frommem2_zext(<2 x double> * %ld) {
    137 ; X32-SSE-LABEL: fptrunc_frommem2_zext:
    138 ; X32-SSE:       # %bb.0:
    139 ; X32-SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
    140 ; X32-SSE-NEXT:    cvtpd2ps (%eax), %xmm0
    141 ; X32-SSE-NEXT:    retl
    142 ;
    143 ; X32-AVX-LABEL: fptrunc_frommem2_zext:
    144 ; X32-AVX:       # %bb.0:
    145 ; X32-AVX-NEXT:    movl {{[0-9]+}}(%esp), %eax
    146 ; X32-AVX-NEXT:    vcvtpd2psx (%eax), %xmm0
    147 ; X32-AVX-NEXT:    retl
    148 ;
    149 ; X64-SSE-LABEL: fptrunc_frommem2_zext:
    150 ; X64-SSE:       # %bb.0:
    151 ; X64-SSE-NEXT:    cvtpd2ps (%rdi), %xmm0
    152 ; X64-SSE-NEXT:    retq
    153 ;
    154 ; X64-AVX-LABEL: fptrunc_frommem2_zext:
    155 ; X64-AVX:       # %bb.0:
    156 ; X64-AVX-NEXT:    vcvtpd2psx (%rdi), %xmm0
    157 ; X64-AVX-NEXT:    retq
    158   %arg = load <2 x double>, <2 x double> * %ld, align 16
    159   %cvt = fptrunc <2 x double> %arg to <2 x float>
    160   %ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
    161   ret <4 x float> %ret
    162 }
    163 
    164 define <4 x float> @fptrunc_fromreg2_zext(<2 x double> %arg) {
    165 ; X32-SSE-LABEL: fptrunc_fromreg2_zext:
    166 ; X32-SSE:       # %bb.0:
    167 ; X32-SSE-NEXT:    cvtpd2ps %xmm0, %xmm0
    168 ; X32-SSE-NEXT:    retl
    169 ;
    170 ; X32-AVX-LABEL: fptrunc_fromreg2_zext:
    171 ; X32-AVX:       # %bb.0:
    172 ; X32-AVX-NEXT:    vcvtpd2ps %xmm0, %xmm0
    173 ; X32-AVX-NEXT:    retl
    174 ;
    175 ; X64-SSE-LABEL: fptrunc_fromreg2_zext:
    176 ; X64-SSE:       # %bb.0:
    177 ; X64-SSE-NEXT:    cvtpd2ps %xmm0, %xmm0
    178 ; X64-SSE-NEXT:    retq
    179 ;
    180 ; X64-AVX-LABEL: fptrunc_fromreg2_zext:
    181 ; X64-AVX:       # %bb.0:
    182 ; X64-AVX-NEXT:    vcvtpd2ps %xmm0, %xmm0
    183 ; X64-AVX-NEXT:    retq
    184   %cvt = fptrunc <2 x double> %arg to <2 x float>
    185   %ret = shufflevector <2 x float> %cvt, <2 x float> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
    186   ret <4 x float> %ret
    187 }
    188 
    189 ; FIXME: For exact truncations we should be able to fold this.
    190 define <4 x float> @fptrunc_fromconst() {
    191 ; X32-SSE-LABEL: fptrunc_fromconst:
    192 ; X32-SSE:       # %bb.0: # %entry
    193 ; X32-SSE-NEXT:    cvtpd2ps {{\.LCPI.*}}, %xmm1
    194 ; X32-SSE-NEXT:    cvtpd2ps {{\.LCPI.*}}, %xmm0
    195 ; X32-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    196 ; X32-SSE-NEXT:    retl
    197 ;
    198 ; X32-AVX-LABEL: fptrunc_fromconst:
    199 ; X32-AVX:       # %bb.0: # %entry
    200 ; X32-AVX-NEXT:    vcvtpd2psy {{\.LCPI.*}}, %xmm0
    201 ; X32-AVX-NEXT:    retl
    202 ;
    203 ; X64-SSE-LABEL: fptrunc_fromconst:
    204 ; X64-SSE:       # %bb.0: # %entry
    205 ; X64-SSE-NEXT:    cvtpd2ps {{.*}}(%rip), %xmm1
    206 ; X64-SSE-NEXT:    cvtpd2ps {{.*}}(%rip), %xmm0
    207 ; X64-SSE-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    208 ; X64-SSE-NEXT:    retq
    209 ;
    210 ; X64-AVX-LABEL: fptrunc_fromconst:
    211 ; X64-AVX:       # %bb.0: # %entry
    212 ; X64-AVX-NEXT:    vcvtpd2psy {{.*}}(%rip), %xmm0
    213 ; X64-AVX-NEXT:    retq
    214 entry:
    215   %0  = insertelement <4 x double> undef, double 1.0, i32 0
    216   %1  = insertelement <4 x double> %0, double -2.0, i32 1
    217   %2  = insertelement <4 x double> %1, double +4.0, i32 2
    218   %3  = insertelement <4 x double> %2, double -0.0, i32 3
    219   %4  = fptrunc <4 x double> %3 to <4 x float>
    220   ret <4 x float> %4
    221 }
    222