1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=x86-64 -mattr=+f16c | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=ivybridge | FileCheck %s --check-prefix=CHECK --check-prefix=IVY 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=haswell | FileCheck %s --check-prefix=CHECK --check-prefix=HASWELL 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=broadwell | FileCheck %s --check-prefix=CHECK --check-prefix=BROADWELL 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=skylake | FileCheck %s --check-prefix=CHECK --check-prefix=SKYLAKE 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=btver2 | FileCheck %s --check-prefix=CHECK --check-prefix=BTVER2 8 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -print-schedule -mcpu=znver1 | FileCheck %s --check-prefix=CHECK --check-prefix=ZNVER1 9 10 define <4 x float> @test_vcvtph2ps_128(<8 x i16> %a0, <8 x i16> *%a1) { 11 ; GENERIC-LABEL: test_vcvtph2ps_128: 12 ; GENERIC: # %bb.0: 13 ; GENERIC-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00] 14 ; GENERIC-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00] 15 ; GENERIC-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 16 ; GENERIC-NEXT: retq # sched: [1:1.00] 17 ; 18 ; IVY-LABEL: test_vcvtph2ps_128: 19 ; IVY: # %bb.0: 20 ; IVY-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00] 21 ; IVY-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00] 22 ; IVY-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 23 ; IVY-NEXT: retq # sched: [1:1.00] 24 ; 25 ; HASWELL-LABEL: test_vcvtph2ps_128: 26 ; HASWELL: # %bb.0: 27 ; HASWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [6:1.00] 28 ; HASWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00] 29 ; HASWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 30 ; HASWELL-NEXT: retq # sched: [7:1.00] 31 ; 32 ; BROADWELL-LABEL: test_vcvtph2ps_128: 33 ; BROADWELL: # %bb.0: 34 ; BROADWELL-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [6:1.00] 35 ; BROADWELL-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [2:1.00] 36 ; BROADWELL-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 37 ; BROADWELL-NEXT: retq # sched: [7:1.00] 38 ; 39 ; SKYLAKE-LABEL: test_vcvtph2ps_128: 40 ; SKYLAKE: # %bb.0: 41 ; SKYLAKE-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [9:0.50] 42 ; SKYLAKE-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [5:1.00] 43 ; SKYLAKE-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [4:0.50] 44 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 45 ; 46 ; BTVER2-LABEL: test_vcvtph2ps_128: 47 ; BTVER2: # %bb.0: 48 ; BTVER2-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [8:1.00] 49 ; BTVER2-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [3:1.00] 50 ; BTVER2-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 51 ; BTVER2-NEXT: retq # sched: [4:1.00] 52 ; 53 ; ZNVER1-LABEL: test_vcvtph2ps_128: 54 ; ZNVER1: # %bb.0: 55 ; ZNVER1-NEXT: vcvtph2ps (%rdi), %xmm1 # sched: [100:0.25] 56 ; ZNVER1-NEXT: vcvtph2ps %xmm0, %xmm0 # sched: [100:0.25] 57 ; ZNVER1-NEXT: vaddps %xmm0, %xmm1, %xmm0 # sched: [3:1.00] 58 ; ZNVER1-NEXT: retq # sched: [1:0.50] 59 %1 = load <8 x i16>, <8 x i16> *%a1 60 %2 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1) 61 %3 = call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %a0) 62 %4 = fadd <4 x float> %2, %3 63 ret <4 x float> %4 64 } 65 declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>) 66 67 define <8 x float> @test_vcvtph2ps_256(<8 x i16> %a0, <8 x i16> *%a1) { 68 ; GENERIC-LABEL: test_vcvtph2ps_256: 69 ; GENERIC: # %bb.0: 70 ; GENERIC-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00] 71 ; GENERIC-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00] 72 ; GENERIC-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 73 ; GENERIC-NEXT: retq # sched: [1:1.00] 74 ; 75 ; IVY-LABEL: test_vcvtph2ps_256: 76 ; IVY: # %bb.0: 77 ; IVY-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:1.00] 78 ; IVY-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:1.00] 79 ; IVY-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 80 ; IVY-NEXT: retq # sched: [1:1.00] 81 ; 82 ; HASWELL-LABEL: test_vcvtph2ps_256: 83 ; HASWELL: # %bb.0: 84 ; HASWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [7:1.00] 85 ; HASWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00] 86 ; HASWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 87 ; HASWELL-NEXT: retq # sched: [7:1.00] 88 ; 89 ; BROADWELL-LABEL: test_vcvtph2ps_256: 90 ; BROADWELL: # %bb.0: 91 ; BROADWELL-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [6:1.00] 92 ; BROADWELL-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [2:1.00] 93 ; BROADWELL-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 94 ; BROADWELL-NEXT: retq # sched: [7:1.00] 95 ; 96 ; SKYLAKE-LABEL: test_vcvtph2ps_256: 97 ; SKYLAKE: # %bb.0: 98 ; SKYLAKE-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [10:0.50] 99 ; SKYLAKE-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [7:1.00] 100 ; SKYLAKE-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [4:0.50] 101 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 102 ; 103 ; BTVER2-LABEL: test_vcvtph2ps_256: 104 ; BTVER2: # %bb.0: 105 ; BTVER2-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [8:2.00] 106 ; BTVER2-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [3:2.00] 107 ; BTVER2-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:2.00] 108 ; BTVER2-NEXT: retq # sched: [4:1.00] 109 ; 110 ; ZNVER1-LABEL: test_vcvtph2ps_256: 111 ; ZNVER1: # %bb.0: 112 ; ZNVER1-NEXT: vcvtph2ps (%rdi), %ymm1 # sched: [100:0.25] 113 ; ZNVER1-NEXT: vcvtph2ps %xmm0, %ymm0 # sched: [100:0.25] 114 ; ZNVER1-NEXT: vaddps %ymm0, %ymm1, %ymm0 # sched: [3:1.00] 115 ; ZNVER1-NEXT: retq # sched: [1:0.50] 116 %1 = load <8 x i16>, <8 x i16> *%a1 117 %2 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1) 118 %3 = call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %a0) 119 %4 = fadd <8 x float> %2, %3 120 ret <8 x float> %4 121 } 122 declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>) 123 124 define <8 x i16> @test_vcvtps2ph_128(<4 x float> %a0, <4 x float> %a1, <4 x i16> *%a2) { 125 ; GENERIC-LABEL: test_vcvtps2ph_128: 126 ; GENERIC: # %bb.0: 127 ; GENERIC-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00] 128 ; GENERIC-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00] 129 ; GENERIC-NEXT: retq # sched: [1:1.00] 130 ; 131 ; IVY-LABEL: test_vcvtps2ph_128: 132 ; IVY: # %bb.0: 133 ; IVY-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00] 134 ; IVY-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00] 135 ; IVY-NEXT: retq # sched: [1:1.00] 136 ; 137 ; HASWELL-LABEL: test_vcvtps2ph_128: 138 ; HASWELL: # %bb.0: 139 ; HASWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00] 140 ; HASWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [5:1.00] 141 ; HASWELL-NEXT: retq # sched: [7:1.00] 142 ; 143 ; BROADWELL-LABEL: test_vcvtps2ph_128: 144 ; BROADWELL: # %bb.0: 145 ; BROADWELL-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [4:1.00] 146 ; BROADWELL-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [5:1.00] 147 ; BROADWELL-NEXT: retq # sched: [7:1.00] 148 ; 149 ; SKYLAKE-LABEL: test_vcvtps2ph_128: 150 ; SKYLAKE: # %bb.0: 151 ; SKYLAKE-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [5:1.00] 152 ; SKYLAKE-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [6:1.00] 153 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 154 ; 155 ; BTVER2-LABEL: test_vcvtps2ph_128: 156 ; BTVER2: # %bb.0: 157 ; BTVER2-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [3:1.00] 158 ; BTVER2-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [4:1.00] 159 ; BTVER2-NEXT: retq # sched: [4:1.00] 160 ; 161 ; ZNVER1-LABEL: test_vcvtps2ph_128: 162 ; ZNVER1: # %bb.0: 163 ; ZNVER1-NEXT: vcvtps2ph $0, %xmm0, %xmm0 # sched: [100:0.25] 164 ; ZNVER1-NEXT: vcvtps2ph $0, %xmm1, (%rdi) # sched: [100:0.25] 165 ; ZNVER1-NEXT: retq # sched: [1:0.50] 166 %1 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a0, i32 0) 167 %2 = call <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float> %a1, i32 0) 168 %3 = shufflevector <8 x i16> %2, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 169 store <4 x i16> %3, <4 x i16> *%a2 170 ret <8 x i16> %1 171 } 172 declare <8 x i16> @llvm.x86.vcvtps2ph.128(<4 x float>, i32) 173 174 define <8 x i16> @test_vcvtps2ph_256(<8 x float> %a0, <8 x float> %a1, <8 x i16> *%a2) { 175 ; GENERIC-LABEL: test_vcvtps2ph_256: 176 ; GENERIC: # %bb.0: 177 ; GENERIC-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00] 178 ; GENERIC-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00] 179 ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] 180 ; GENERIC-NEXT: retq # sched: [1:1.00] 181 ; 182 ; IVY-LABEL: test_vcvtps2ph_256: 183 ; IVY: # %bb.0: 184 ; IVY-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [3:1.00] 185 ; IVY-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [4:1.00] 186 ; IVY-NEXT: vzeroupper # sched: [100:0.33] 187 ; IVY-NEXT: retq # sched: [1:1.00] 188 ; 189 ; HASWELL-LABEL: test_vcvtps2ph_256: 190 ; HASWELL: # %bb.0: 191 ; HASWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00] 192 ; HASWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00] 193 ; HASWELL-NEXT: vzeroupper # sched: [4:1.00] 194 ; HASWELL-NEXT: retq # sched: [7:1.00] 195 ; 196 ; BROADWELL-LABEL: test_vcvtps2ph_256: 197 ; BROADWELL: # %bb.0: 198 ; BROADWELL-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:1.00] 199 ; BROADWELL-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:1.00] 200 ; BROADWELL-NEXT: vzeroupper # sched: [4:1.00] 201 ; BROADWELL-NEXT: retq # sched: [7:1.00] 202 ; 203 ; SKYLAKE-LABEL: test_vcvtps2ph_256: 204 ; SKYLAKE: # %bb.0: 205 ; SKYLAKE-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [7:1.00] 206 ; SKYLAKE-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [8:1.00] 207 ; SKYLAKE-NEXT: vzeroupper # sched: [4:1.00] 208 ; SKYLAKE-NEXT: retq # sched: [7:1.00] 209 ; 210 ; BTVER2-LABEL: test_vcvtps2ph_256: 211 ; BTVER2: # %bb.0: 212 ; BTVER2-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [6:2.00] 213 ; BTVER2-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [7:2.00] 214 ; BTVER2-NEXT: retq # sched: [4:1.00] 215 ; 216 ; ZNVER1-LABEL: test_vcvtps2ph_256: 217 ; ZNVER1: # %bb.0: 218 ; ZNVER1-NEXT: vcvtps2ph $0, %ymm0, %xmm0 # sched: [100:0.25] 219 ; ZNVER1-NEXT: vcvtps2ph $0, %ymm1, (%rdi) # sched: [100:0.25] 220 ; ZNVER1-NEXT: vzeroupper # sched: [100:0.25] 221 ; ZNVER1-NEXT: retq # sched: [1:0.50] 222 %1 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a0, i32 0) 223 %2 = call <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float> %a1, i32 0) 224 store <8 x i16> %2, <8 x i16> *%a2 225 ret <8 x i16> %1 226 } 227 declare <8 x i16> @llvm.x86.vcvtps2ph.256(<8 x float>, i32) 228