Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+f16c | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=IVY
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2
      8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1
      9 
     10 define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) {
     11 ; GENERIC-LABEL: test_vcvtph2ps_128:
     12 ; GENERIC:       # %bb.0:
     13 ; GENERIC-NEXT:    vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
     14 ; GENERIC-NEXT:    vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
     15 ; GENERIC-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
     16 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     17 ;
     18 ; IVY-LABEL: test_vcvtph2ps_128:
     19 ; IVY:       # %bb.0:
     20 ; IVY-NEXT:    vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
     21 ; IVY-NEXT:    vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
     22 ; IVY-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
     23 ; IVY-NEXT:    retq # sched: [1:1.00]
     24 ;
     25 ; HASWELL-LABEL: test_vcvtph2ps_128:
     26 ; HASWELL:       # %bb.0:
     27 ; HASWELL-NEXT:    vcvtph2ps (%rdi), %xmm1 # sched: [6:1.00]
     28 ; HASWELL-NEXT:    vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00]
     29 ; HASWELL-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
     30 ; HASWELL-NEXT:    retq # sched: [7:1.00]
     31 ;
     32 ; BROADWELL-LABEL: test_vcvtph2ps_128:
     33 ; BROADWELL:       # %bb.0:
     34 ; BROADWELL-NEXT:    vcvtph2ps (%rdi), %xmm1 # sched: [6:1.00]
     35 ; BROADWELL-NEXT:    vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00]
     36 ; BROADWELL-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
     37 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
     38 ;
     39 ; SKYLAKE-LABEL: test_vcvtph2ps_128:
     40 ; SKYLAKE:       # %bb.0:
     41 ; SKYLAKE-NEXT:    vcvtph2ps (%rdi), %xmm1 # sched: [9:0.50]
     42 ; SKYLAKE-NEXT:    vcvtph2ps %xmm0, %xmm0 # sched: [5:1.00]
     43 ; SKYLAKE-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50]
     44 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
     45 ;
     46 ; BTVER2-LABEL: test_vcvtph2ps_128:
     47 ; BTVER2:       # %bb.0:
     48 ; BTVER2-NEXT:    vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00]
     49 ; BTVER2-NEXT:    vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00]
     50 ; BTVER2-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
     51 ; BTVER2-NEXT:    retq # sched: [4:1.00]
     52 ;
     53 ; ZNVER1-LABEL: test_vcvtph2ps_128:
     54 ; ZNVER1:       # %bb.0:
     55 ; ZNVER1-NEXT:    vcvtph2ps (%rdi), %xmm1 # sched: [100:0.25]
     56 ; ZNVER1-NEXT:    vcvtph2ps %xmm0, %xmm0 # sched: [100:0.25]
     57 ; ZNVER1-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00]
     58 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
     59   %1 = load <8 x i16>, <8 x i16> *%a1
     60   %2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1)
     61   %3 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0)
     62   %4 = fadd <4 x float> %2, %3
     63   ret <4 x float> %4
     64 }
     65 declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>)
     66 
     67 define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) {
     68 ; GENERIC-LABEL: test_vcvtph2ps_256:
     69 ; GENERIC:       # %bb.0:
     70 ; GENERIC-NEXT:    vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00]
     71 ; GENERIC-NEXT:    vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00]
     72 ; GENERIC-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
     73 ; GENERIC-NEXT:    retq # sched: [1:1.00]
     74 ;
     75 ; IVY-LABEL: test_vcvtph2ps_256:
     76 ; IVY:       # %bb.0:
     77 ; IVY-NEXT:    vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00]
     78 ; IVY-NEXT:    vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00]
     79 ; IVY-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
     80 ; IVY-NEXT:    retq # sched: [1:1.00]
     81 ;
     82 ; HASWELL-LABEL: test_vcvtph2ps_256:
     83 ; HASWELL:       # %bb.0:
     84 ; HASWELL-NEXT:    vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00]
     85 ; HASWELL-NEXT:    vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00]
     86 ; HASWELL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
     87 ; HASWELL-NEXT:    retq # sched: [7:1.00]
     88 ;
     89 ; BROADWELL-LABEL: test_vcvtph2ps_256:
     90 ; BROADWELL:       # %bb.0:
     91 ; BROADWELL-NEXT:    vcvtph2ps (%rdi), %ymm1 # sched: [6:1.00]
     92 ; BROADWELL-NEXT:    vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00]
     93 ; BROADWELL-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
     94 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
     95 ;
     96 ; SKYLAKE-LABEL: test_vcvtph2ps_256:
     97 ; SKYLAKE:       # %bb.0:
     98 ; SKYLAKE-NEXT:    vcvtph2ps (%rdi), %ymm1 # sched: [10:0.50]
     99 ; SKYLAKE-NEXT:    vcvtph2ps %xmm0, %ymm0 # sched: [7:1.00]
    100 ; SKYLAKE-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50]
    101 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    102 ;
    103 ; BTVER2-LABEL: test_vcvtph2ps_256:
    104 ; BTVER2:       # %bb.0:
    105 ; BTVER2-NEXT:    vcvtph2ps (%rdi), %ymm1 # sched: [8:2.00]
    106 ; BTVER2-NEXT:    vcvtph2ps %xmm0, %ymm0 # sched: [3:2.00]
    107 ; BTVER2-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00]
    108 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    109 ;
    110 ; ZNVER1-LABEL: test_vcvtph2ps_256:
    111 ; ZNVER1:       # %bb.0:
    112 ; ZNVER1-NEXT:    vcvtph2ps (%rdi), %ymm1 # sched: [100:0.25]
    113 ; ZNVER1-NEXT:    vcvtph2ps %xmm0, %ymm0 # sched: [100:0.25]
    114 ; ZNVER1-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00]
    115 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    116   %1 = load <8 x i16>, <8 x i16> *%a1
    117   %2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
    118   %3 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0)
    119   %4 = fadd <8 x float> %2, %3
    120   ret <8 x float> %4
    121 }
    122 declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>)
    123 
    124 define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16> *%a2) {
    125 ; GENERIC-LABEL: test_vcvtps2ph_128:
    126 ; GENERIC:       # %bb.0:
    127 ; GENERIC-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
    128 ; GENERIC-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
    129 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    130 ;
    131 ; IVY-LABEL: test_vcvtps2ph_128:
    132 ; IVY:       # %bb.0:
    133 ; IVY-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
    134 ; IVY-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
    135 ; IVY-NEXT:    retq # sched: [1:1.00]
    136 ;
    137 ; HASWELL-LABEL: test_vcvtps2ph_128:
    138 ; HASWELL:       # %bb.0:
    139 ; HASWELL-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00]
    140 ; HASWELL-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [5:1.00]
    141 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    142 ;
    143 ; BROADWELL-LABEL: test_vcvtps2ph_128:
    144 ; BROADWELL:       # %bb.0:
    145 ; BROADWELL-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00]
    146 ; BROADWELL-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [5:1.00]
    147 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    148 ;
    149 ; SKYLAKE-LABEL: test_vcvtps2ph_128:
    150 ; SKYLAKE:       # %bb.0:
    151 ; SKYLAKE-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # sched: [5:1.00]
    152 ; SKYLAKE-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [6:1.00]
    153 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    154 ;
    155 ; BTVER2-LABEL: test_vcvtps2ph_128:
    156 ; BTVER2:       # %bb.0:
    157 ; BTVER2-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00]
    158 ; BTVER2-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00]
    159 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    160 ;
    161 ; ZNVER1-LABEL: test_vcvtps2ph_128:
    162 ; ZNVER1:       # %bb.0:
    163 ; ZNVER1-NEXT:    vcvtps2ph $0, %xmm0, %xmm0 # sched: [100:0.25]
    164 ; ZNVER1-NEXT:    vcvtps2ph $0, %xmm1, (%rdi) # sched: [100:0.25]
    165 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    166   %1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0)
    167   %2 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a1, i32 0)
    168   %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    169   store <4 x i16> %3, <4 x i16> *%a2
    170   ret <8 x i16> %1
    171 }
    172 declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32)
    173 
    174 define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16> *%a2) {
    175 ; GENERIC-LABEL: test_vcvtps2ph_256:
    176 ; GENERIC:       # %bb.0:
    177 ; GENERIC-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
    178 ; GENERIC-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
    179 ; GENERIC-NEXT:    vzeroupper # sched: [100:0.33]
    180 ; GENERIC-NEXT:    retq # sched: [1:1.00]
    181 ;
    182 ; IVY-LABEL: test_vcvtps2ph_256:
    183 ; IVY:       # %bb.0:
    184 ; IVY-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00]
    185 ; IVY-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00]
    186 ; IVY-NEXT:    vzeroupper # sched: [100:0.33]
    187 ; IVY-NEXT:    retq # sched: [1:1.00]
    188 ;
    189 ; HASWELL-LABEL: test_vcvtps2ph_256:
    190 ; HASWELL:       # %bb.0:
    191 ; HASWELL-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00]
    192 ; HASWELL-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00]
    193 ; HASWELL-NEXT:    vzeroupper # sched: [4:1.00]
    194 ; HASWELL-NEXT:    retq # sched: [7:1.00]
    195 ;
    196 ; BROADWELL-LABEL: test_vcvtps2ph_256:
    197 ; BROADWELL:       # %bb.0:
    198 ; BROADWELL-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00]
    199 ; BROADWELL-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00]
    200 ; BROADWELL-NEXT:    vzeroupper # sched: [4:1.00]
    201 ; BROADWELL-NEXT:    retq # sched: [7:1.00]
    202 ;
    203 ; SKYLAKE-LABEL: test_vcvtps2ph_256:
    204 ; SKYLAKE:       # %bb.0:
    205 ; SKYLAKE-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [7:1.00]
    206 ; SKYLAKE-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00]
    207 ; SKYLAKE-NEXT:    vzeroupper # sched: [4:1.00]
    208 ; SKYLAKE-NEXT:    retq # sched: [7:1.00]
    209 ;
    210 ; BTVER2-LABEL: test_vcvtps2ph_256:
    211 ; BTVER2:       # %bb.0:
    212 ; BTVER2-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:2.00]
    213 ; BTVER2-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:2.00]
    214 ; BTVER2-NEXT:    retq # sched: [4:1.00]
    215 ;
    216 ; ZNVER1-LABEL: test_vcvtps2ph_256:
    217 ; ZNVER1:       # %bb.0:
    218 ; ZNVER1-NEXT:    vcvtps2ph $0, %ymm0, %xmm0 # sched: [100:0.25]
    219 ; ZNVER1-NEXT:    vcvtps2ph $0, %ymm1, (%rdi) # sched: [100:0.25]
    220 ; ZNVER1-NEXT:    vzeroupper # sched: [100:0.25]
    221 ; ZNVER1-NEXT:    retq # sched: [1:0.50]
    222   %1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0)
    223   %2 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a1, i32 0)
    224   store <8 x i16> %2, <8 x i16> *%a2
    225   ret <8 x i16> %1
    226 }
    227 declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32)
    228