1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW 7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL 8 9 ; 10 ; vXf32 11 ; 12 13 define float @test_v2f32(<2 x float> %a0) { 14 ; SSE2-LABEL: test_v2f32: 15 ; SSE2: # %bb.0: 16 ; SSE2-NEXT: movaps %xmm0, %xmm1 17 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] 18 ; SSE2-NEXT: minps %xmm1, %xmm0 19 ; SSE2-NEXT: retq 20 ; 21 ; SSE41-LABEL: test_v2f32: 22 ; SSE41: # %bb.0: 23 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 24 ; SSE41-NEXT: minps %xmm1, %xmm0 25 ; SSE41-NEXT: retq 26 ; 27 ; AVX-LABEL: test_v2f32: 28 ; AVX: # %bb.0: 29 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 30 ; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 31 ; AVX-NEXT: retq 32 ; 33 ; AVX512-LABEL: test_v2f32: 34 ; AVX512: # %bb.0: 35 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 36 ; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 37 ; AVX512-NEXT: retq 38 %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v2f32(<2 x float> %a0) 39 ret float %1 40 } 41 42 define float @test_v4f32(<4 x float> %a0) { 43 ; SSE2-LABEL: test_v4f32: 44 ; SSE2: # %bb.0: 45 ; SSE2-NEXT: movaps %xmm0, %xmm1 46 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 47 ; SSE2-NEXT: minps %xmm1, %xmm0 48 ; SSE2-NEXT: movaps %xmm0, %xmm1 49 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] 50 ; SSE2-NEXT: minps %xmm1, %xmm0 51 ; SSE2-NEXT: retq 52 ; 53 ; SSE41-LABEL: test_v4f32: 54 ; SSE41: # %bb.0: 55 ; SSE41-NEXT: movaps %xmm0, %xmm1 56 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 57 ; SSE41-NEXT: minps %xmm1, %xmm0 58 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 59 ; SSE41-NEXT: minps %xmm1, %xmm0 60 ; SSE41-NEXT: retq 61 ; 62 ; AVX-LABEL: test_v4f32: 63 ; AVX: # %bb.0: 64 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 65 ; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 66 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 67 ; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 68 ; AVX-NEXT: retq 69 ; 70 ; AVX512-LABEL: test_v4f32: 71 ; AVX512: # %bb.0: 72 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 73 ; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 74 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 75 ; AVX512-NEXT: vminps %xmm1, %xmm0, %xmm0 76 ; AVX512-NEXT: retq 77 %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %a0) 78 ret float %1 79 } 80 81 define float @test_v8f32(<8 x float> %a0) { 82 ; SSE2-LABEL: test_v8f32: 83 ; SSE2: # %bb.0: 84 ; SSE2-NEXT: minps %xmm1, %xmm0 85 ; SSE2-NEXT: movaps %xmm0, %xmm1 86 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 87 ; SSE2-NEXT: minps %xmm1, %xmm0 88 ; SSE2-NEXT: movaps %xmm0, %xmm1 89 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] 90 ; SSE2-NEXT: minps %xmm1, %xmm0 91 ; SSE2-NEXT: retq 92 ; 93 ; SSE41-LABEL: test_v8f32: 94 ; SSE41: # %bb.0: 95 ; SSE41-NEXT: minps %xmm1, %xmm0 96 ; SSE41-NEXT: movaps %xmm0, %xmm1 97 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 98 ; SSE41-NEXT: minps %xmm1, %xmm0 99 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 100 ; SSE41-NEXT: minps %xmm1, %xmm0 101 ; SSE41-NEXT: retq 102 ; 103 ; AVX-LABEL: test_v8f32: 104 ; AVX: # %bb.0: 105 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 106 ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 107 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 108 ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 109 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 110 ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 111 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 112 ; AVX-NEXT: vzeroupper 113 ; AVX-NEXT: retq 114 ; 115 ; AVX512-LABEL: test_v8f32: 116 ; AVX512: # %bb.0: 117 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 118 ; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0 119 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 120 ; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0 121 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 122 ; AVX512-NEXT: vminps %ymm1, %ymm0, %ymm0 123 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 124 ; AVX512-NEXT: vzeroupper 125 ; AVX512-NEXT: retq 126 %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v8f32(<8 x float> %a0) 127 ret float %1 128 } 129 130 define float @test_v16f32(<16 x float> %a0) { 131 ; SSE2-LABEL: test_v16f32: 132 ; SSE2: # %bb.0: 133 ; SSE2-NEXT: minps %xmm3, %xmm1 134 ; SSE2-NEXT: minps %xmm2, %xmm0 135 ; SSE2-NEXT: minps %xmm1, %xmm0 136 ; SSE2-NEXT: movaps %xmm0, %xmm1 137 ; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 138 ; SSE2-NEXT: minps %xmm1, %xmm0 139 ; SSE2-NEXT: movaps %xmm0, %xmm1 140 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] 141 ; SSE2-NEXT: minps %xmm1, %xmm0 142 ; SSE2-NEXT: retq 143 ; 144 ; SSE41-LABEL: test_v16f32: 145 ; SSE41: # %bb.0: 146 ; SSE41-NEXT: minps %xmm3, %xmm1 147 ; SSE41-NEXT: minps %xmm2, %xmm0 148 ; SSE41-NEXT: minps %xmm1, %xmm0 149 ; SSE41-NEXT: movaps %xmm0, %xmm1 150 ; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 151 ; SSE41-NEXT: minps %xmm1, %xmm0 152 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 153 ; SSE41-NEXT: minps %xmm1, %xmm0 154 ; SSE41-NEXT: retq 155 ; 156 ; AVX-LABEL: test_v16f32: 157 ; AVX: # %bb.0: 158 ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 159 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 160 ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 161 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 162 ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 163 ; AVX-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 164 ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 165 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 166 ; AVX-NEXT: vzeroupper 167 ; AVX-NEXT: retq 168 ; 169 ; AVX512-LABEL: test_v16f32: 170 ; AVX512: # %bb.0: 171 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 172 ; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 173 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 174 ; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 175 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 176 ; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 177 ; AVX512-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 178 ; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0 179 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 180 ; AVX512-NEXT: vzeroupper 181 ; AVX512-NEXT: retq 182 %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v16f32(<16 x float> %a0) 183 ret float %1 184 } 185 186 ; 187 ; vXf64 188 ; 189 190 define double @test_v2f64(<2 x double> %a0) { 191 ; SSE-LABEL: test_v2f64: 192 ; SSE: # %bb.0: 193 ; SSE-NEXT: movaps %xmm0, %xmm1 194 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 195 ; SSE-NEXT: minpd %xmm1, %xmm0 196 ; SSE-NEXT: retq 197 ; 198 ; AVX-LABEL: test_v2f64: 199 ; AVX: # %bb.0: 200 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 201 ; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 202 ; AVX-NEXT: retq 203 ; 204 ; AVX512-LABEL: test_v2f64: 205 ; AVX512: # %bb.0: 206 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 207 ; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 208 ; AVX512-NEXT: retq 209 %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %a0) 210 ret double %1 211 } 212 213 define double @test_v4f64(<4 x double> %a0) { 214 ; SSE-LABEL: test_v4f64: 215 ; SSE: # %bb.0: 216 ; SSE-NEXT: minpd %xmm1, %xmm0 217 ; SSE-NEXT: movapd %xmm0, %xmm1 218 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 219 ; SSE-NEXT: minpd %xmm1, %xmm0 220 ; SSE-NEXT: retq 221 ; 222 ; AVX-LABEL: test_v4f64: 223 ; AVX: # %bb.0: 224 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 225 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 226 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 227 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 228 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 229 ; AVX-NEXT: vzeroupper 230 ; AVX-NEXT: retq 231 ; 232 ; AVX512-LABEL: test_v4f64: 233 ; AVX512: # %bb.0: 234 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 235 ; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0 236 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 237 ; AVX512-NEXT: vminpd %ymm1, %ymm0, %ymm0 238 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 239 ; AVX512-NEXT: vzeroupper 240 ; AVX512-NEXT: retq 241 %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v4f64(<4 x double> %a0) 242 ret double %1 243 } 244 245 define double @test_v8f64(<8 x double> %a0) { 246 ; SSE-LABEL: test_v8f64: 247 ; SSE: # %bb.0: 248 ; SSE-NEXT: minpd %xmm3, %xmm1 249 ; SSE-NEXT: minpd %xmm2, %xmm0 250 ; SSE-NEXT: minpd %xmm1, %xmm0 251 ; SSE-NEXT: movapd %xmm0, %xmm1 252 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 253 ; SSE-NEXT: minpd %xmm1, %xmm0 254 ; SSE-NEXT: retq 255 ; 256 ; AVX-LABEL: test_v8f64: 257 ; AVX: # %bb.0: 258 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 259 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 260 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 261 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 262 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 263 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 264 ; AVX-NEXT: vzeroupper 265 ; AVX-NEXT: retq 266 ; 267 ; AVX512-LABEL: test_v8f64: 268 ; AVX512: # %bb.0: 269 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 270 ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 271 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 272 ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 273 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 274 ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 275 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 276 ; AVX512-NEXT: vzeroupper 277 ; AVX512-NEXT: retq 278 %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v8f64(<8 x double> %a0) 279 ret double %1 280 } 281 282 define double @test_v16f64(<16 x double> %a0) { 283 ; SSE-LABEL: test_v16f64: 284 ; SSE: # %bb.0: 285 ; SSE-NEXT: minpd %xmm6, %xmm2 286 ; SSE-NEXT: minpd %xmm4, %xmm0 287 ; SSE-NEXT: minpd %xmm2, %xmm0 288 ; SSE-NEXT: minpd %xmm7, %xmm3 289 ; SSE-NEXT: minpd %xmm5, %xmm1 290 ; SSE-NEXT: minpd %xmm3, %xmm1 291 ; SSE-NEXT: minpd %xmm1, %xmm0 292 ; SSE-NEXT: movapd %xmm0, %xmm1 293 ; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] 294 ; SSE-NEXT: minpd %xmm1, %xmm0 295 ; SSE-NEXT: retq 296 ; 297 ; AVX-LABEL: test_v16f64: 298 ; AVX: # %bb.0: 299 ; AVX-NEXT: vminpd %ymm3, %ymm1, %ymm1 300 ; AVX-NEXT: vminpd %ymm2, %ymm0, %ymm0 301 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 302 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 303 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 304 ; AVX-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 305 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 306 ; AVX-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 307 ; AVX-NEXT: vzeroupper 308 ; AVX-NEXT: retq 309 ; 310 ; AVX512-LABEL: test_v16f64: 311 ; AVX512: # %bb.0: 312 ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 313 ; AVX512-NEXT: vextractf64x4 $1, %zmm0, %ymm1 314 ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 315 ; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm1 316 ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 317 ; AVX512-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 318 ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0 319 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 320 ; AVX512-NEXT: vzeroupper 321 ; AVX512-NEXT: retq 322 %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v16f64(<16 x double> %a0) 323 ret double %1 324 } 325 326 declare float @llvm.experimental.vector.reduce.fmin.f32.v2f32(<2 x float>) 327 declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>) 328 declare float @llvm.experimental.vector.reduce.fmin.f32.v8f32(<8 x float>) 329 declare float @llvm.experimental.vector.reduce.fmin.f32.v16f32(<16 x float>) 330 331 declare double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double>) 332 declare double @llvm.experimental.vector.reduce.fmin.f64.v4f64(<4 x double>) 333 declare double @llvm.experimental.vector.reduce.fmin.f64.v8f64(<8 x double>) 334 declare double @llvm.experimental.vector.reduce.fmin.f64.v16f64(<16 x double>) 335