Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE2
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=ALL --check-prefix=SSE --check-prefix=SSE41
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512BW
      7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=AVX512VL
      8 
      9 ;
     10 ; vXf32
     11 ;
     12 
     13 define float @test_v2f32(<2 x float> %a0) {
     14 ; SSE2-LABEL: test_v2f32:
     15 ; SSE2:       # %bb.0:
     16 ; SSE2-NEXT:    movaps %xmm0, %xmm1
     17 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
     18 ; SSE2-NEXT:    minps %xmm1, %xmm0
     19 ; SSE2-NEXT:    retq
     20 ;
     21 ; SSE41-LABEL: test_v2f32:
     22 ; SSE41:       # %bb.0:
     23 ; SSE41-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
     24 ; SSE41-NEXT:    minps %xmm1, %xmm0
     25 ; SSE41-NEXT:    retq
     26 ;
     27 ; AVX-LABEL: test_v2f32:
     28 ; AVX:       # %bb.0:
     29 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
     30 ; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
     31 ; AVX-NEXT:    retq
     32 ;
     33 ; AVX512-LABEL: test_v2f32:
     34 ; AVX512:       # %bb.0:
     35 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
     36 ; AVX512-NEXT:    vminps %xmm1, %xmm0, %xmm0
     37 ; AVX512-NEXT:    retq
     38   %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v2f32(<2 x float> %a0)
     39   ret float %1
     40 }
     41 
     42 define float @test_v4f32(<4 x float> %a0) {
     43 ; SSE2-LABEL: test_v4f32:
     44 ; SSE2:       # %bb.0:
     45 ; SSE2-NEXT:    movaps %xmm0, %xmm1
     46 ; SSE2-NEXT:    movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
     47 ; SSE2-NEXT:    minps %xmm1, %xmm0
     48 ; SSE2-NEXT:    movaps %xmm0, %xmm1
     49 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
     50 ; SSE2-NEXT:    minps %xmm1, %xmm0
     51 ; SSE2-NEXT:    retq
     52 ;
     53 ; SSE41-LABEL: test_v4f32:
     54 ; SSE41:       # %bb.0:
     55 ; SSE41-NEXT:    movaps %xmm0, %xmm1
     56 ; SSE41-NEXT:    movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
     57 ; SSE41-NEXT:    minps %xmm1, %xmm0
     58 ; SSE41-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
     59 ; SSE41-NEXT:    minps %xmm1, %xmm0
     60 ; SSE41-NEXT:    retq
     61 ;
     62 ; AVX-LABEL: test_v4f32:
     63 ; AVX:       # %bb.0:
     64 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
     65 ; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
     66 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
     67 ; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
     68 ; AVX-NEXT:    retq
     69 ;
     70 ; AVX512-LABEL: test_v4f32:
     71 ; AVX512:       # %bb.0:
     72 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
     73 ; AVX512-NEXT:    vminps %xmm1, %xmm0, %xmm0
     74 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
     75 ; AVX512-NEXT:    vminps %xmm1, %xmm0, %xmm0
     76 ; AVX512-NEXT:    retq
     77   %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float> %a0)
     78   ret float %1
     79 }
     80 
     81 define float @test_v8f32(<8 x float> %a0) {
     82 ; SSE2-LABEL: test_v8f32:
     83 ; SSE2:       # %bb.0:
     84 ; SSE2-NEXT:    minps %xmm1, %xmm0
     85 ; SSE2-NEXT:    movaps %xmm0, %xmm1
     86 ; SSE2-NEXT:    movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
     87 ; SSE2-NEXT:    minps %xmm1, %xmm0
     88 ; SSE2-NEXT:    movaps %xmm0, %xmm1
     89 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
     90 ; SSE2-NEXT:    minps %xmm1, %xmm0
     91 ; SSE2-NEXT:    retq
     92 ;
     93 ; SSE41-LABEL: test_v8f32:
     94 ; SSE41:       # %bb.0:
     95 ; SSE41-NEXT:    minps %xmm1, %xmm0
     96 ; SSE41-NEXT:    movaps %xmm0, %xmm1
     97 ; SSE41-NEXT:    movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
     98 ; SSE41-NEXT:    minps %xmm1, %xmm0
     99 ; SSE41-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    100 ; SSE41-NEXT:    minps %xmm1, %xmm0
    101 ; SSE41-NEXT:    retq
    102 ;
    103 ; AVX-LABEL: test_v8f32:
    104 ; AVX:       # %bb.0:
    105 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
    106 ; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
    107 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    108 ; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
    109 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    110 ; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
    111 ; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    112 ; AVX-NEXT:    vzeroupper
    113 ; AVX-NEXT:    retq
    114 ;
    115 ; AVX512-LABEL: test_v8f32:
    116 ; AVX512:       # %bb.0:
    117 ; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
    118 ; AVX512-NEXT:    vminps %ymm1, %ymm0, %ymm0
    119 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    120 ; AVX512-NEXT:    vminps %ymm1, %ymm0, %ymm0
    121 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    122 ; AVX512-NEXT:    vminps %ymm1, %ymm0, %ymm0
    123 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    124 ; AVX512-NEXT:    vzeroupper
    125 ; AVX512-NEXT:    retq
    126   %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v8f32(<8 x float> %a0)
    127   ret float %1
    128 }
    129 
    130 define float @test_v16f32(<16 x float> %a0) {
    131 ; SSE2-LABEL: test_v16f32:
    132 ; SSE2:       # %bb.0:
    133 ; SSE2-NEXT:    minps %xmm3, %xmm1
    134 ; SSE2-NEXT:    minps %xmm2, %xmm0
    135 ; SSE2-NEXT:    minps %xmm1, %xmm0
    136 ; SSE2-NEXT:    movaps %xmm0, %xmm1
    137 ; SSE2-NEXT:    movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
    138 ; SSE2-NEXT:    minps %xmm1, %xmm0
    139 ; SSE2-NEXT:    movaps %xmm0, %xmm1
    140 ; SSE2-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
    141 ; SSE2-NEXT:    minps %xmm1, %xmm0
    142 ; SSE2-NEXT:    retq
    143 ;
    144 ; SSE41-LABEL: test_v16f32:
    145 ; SSE41:       # %bb.0:
    146 ; SSE41-NEXT:    minps %xmm3, %xmm1
    147 ; SSE41-NEXT:    minps %xmm2, %xmm0
    148 ; SSE41-NEXT:    minps %xmm1, %xmm0
    149 ; SSE41-NEXT:    movaps %xmm0, %xmm1
    150 ; SSE41-NEXT:    movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
    151 ; SSE41-NEXT:    minps %xmm1, %xmm0
    152 ; SSE41-NEXT:    movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    153 ; SSE41-NEXT:    minps %xmm1, %xmm0
    154 ; SSE41-NEXT:    retq
    155 ;
    156 ; AVX-LABEL: test_v16f32:
    157 ; AVX:       # %bb.0:
    158 ; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
    159 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
    160 ; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
    161 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    162 ; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
    163 ; AVX-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    164 ; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
    165 ; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    166 ; AVX-NEXT:    vzeroupper
    167 ; AVX-NEXT:    retq
    168 ;
    169 ; AVX512-LABEL: test_v16f32:
    170 ; AVX512:       # %bb.0:
    171 ; AVX512-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
    172 ; AVX512-NEXT:    vminps %zmm1, %zmm0, %zmm0
    173 ; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
    174 ; AVX512-NEXT:    vminps %zmm1, %zmm0, %zmm0
    175 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    176 ; AVX512-NEXT:    vminps %zmm1, %zmm0, %zmm0
    177 ; AVX512-NEXT:    vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
    178 ; AVX512-NEXT:    vminps %zmm1, %zmm0, %zmm0
    179 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
    180 ; AVX512-NEXT:    vzeroupper
    181 ; AVX512-NEXT:    retq
    182   %1 = call float @llvm.experimental.vector.reduce.fmin.f32.v16f32(<16 x float> %a0)
    183   ret float %1
    184 }
    185 
    186 ;
    187 ; vXf64
    188 ;
    189 
    190 define double @test_v2f64(<2 x double> %a0) {
    191 ; SSE-LABEL: test_v2f64:
    192 ; SSE:       # %bb.0:
    193 ; SSE-NEXT:    movaps %xmm0, %xmm1
    194 ; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
    195 ; SSE-NEXT:    minpd %xmm1, %xmm0
    196 ; SSE-NEXT:    retq
    197 ;
    198 ; AVX-LABEL: test_v2f64:
    199 ; AVX:       # %bb.0:
    200 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    201 ; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
    202 ; AVX-NEXT:    retq
    203 ;
    204 ; AVX512-LABEL: test_v2f64:
    205 ; AVX512:       # %bb.0:
    206 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    207 ; AVX512-NEXT:    vminpd %xmm1, %xmm0, %xmm0
    208 ; AVX512-NEXT:    retq
    209   %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double> %a0)
    210   ret double %1
    211 }
    212 
    213 define double @test_v4f64(<4 x double> %a0) {
    214 ; SSE-LABEL: test_v4f64:
    215 ; SSE:       # %bb.0:
    216 ; SSE-NEXT:    minpd %xmm1, %xmm0
    217 ; SSE-NEXT:    movapd %xmm0, %xmm1
    218 ; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
    219 ; SSE-NEXT:    minpd %xmm1, %xmm0
    220 ; SSE-NEXT:    retq
    221 ;
    222 ; AVX-LABEL: test_v4f64:
    223 ; AVX:       # %bb.0:
    224 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
    225 ; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
    226 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    227 ; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
    228 ; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    229 ; AVX-NEXT:    vzeroupper
    230 ; AVX-NEXT:    retq
    231 ;
    232 ; AVX512-LABEL: test_v4f64:
    233 ; AVX512:       # %bb.0:
    234 ; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
    235 ; AVX512-NEXT:    vminpd %ymm1, %ymm0, %ymm0
    236 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    237 ; AVX512-NEXT:    vminpd %ymm1, %ymm0, %ymm0
    238 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    239 ; AVX512-NEXT:    vzeroupper
    240 ; AVX512-NEXT:    retq
    241   %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v4f64(<4 x double> %a0)
    242   ret double %1
    243 }
    244 
    245 define double @test_v8f64(<8 x double> %a0) {
    246 ; SSE-LABEL: test_v8f64:
    247 ; SSE:       # %bb.0:
    248 ; SSE-NEXT:    minpd %xmm3, %xmm1
    249 ; SSE-NEXT:    minpd %xmm2, %xmm0
    250 ; SSE-NEXT:    minpd %xmm1, %xmm0
    251 ; SSE-NEXT:    movapd %xmm0, %xmm1
    252 ; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
    253 ; SSE-NEXT:    minpd %xmm1, %xmm0
    254 ; SSE-NEXT:    retq
    255 ;
    256 ; AVX-LABEL: test_v8f64:
    257 ; AVX:       # %bb.0:
    258 ; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
    259 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
    260 ; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
    261 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    262 ; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
    263 ; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    264 ; AVX-NEXT:    vzeroupper
    265 ; AVX-NEXT:    retq
    266 ;
    267 ; AVX512-LABEL: test_v8f64:
    268 ; AVX512:       # %bb.0:
    269 ; AVX512-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
    270 ; AVX512-NEXT:    vminpd %zmm1, %zmm0, %zmm0
    271 ; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
    272 ; AVX512-NEXT:    vminpd %zmm1, %zmm0, %zmm0
    273 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    274 ; AVX512-NEXT:    vminpd %zmm1, %zmm0, %zmm0
    275 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
    276 ; AVX512-NEXT:    vzeroupper
    277 ; AVX512-NEXT:    retq
    278   %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v8f64(<8 x double> %a0)
    279   ret double %1
    280 }
    281 
    282 define double @test_v16f64(<16 x double> %a0) {
    283 ; SSE-LABEL: test_v16f64:
    284 ; SSE:       # %bb.0:
    285 ; SSE-NEXT:    minpd %xmm6, %xmm2
    286 ; SSE-NEXT:    minpd %xmm4, %xmm0
    287 ; SSE-NEXT:    minpd %xmm2, %xmm0
    288 ; SSE-NEXT:    minpd %xmm7, %xmm3
    289 ; SSE-NEXT:    minpd %xmm5, %xmm1
    290 ; SSE-NEXT:    minpd %xmm3, %xmm1
    291 ; SSE-NEXT:    minpd %xmm1, %xmm0
    292 ; SSE-NEXT:    movapd %xmm0, %xmm1
    293 ; SSE-NEXT:    movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
    294 ; SSE-NEXT:    minpd %xmm1, %xmm0
    295 ; SSE-NEXT:    retq
    296 ;
    297 ; AVX-LABEL: test_v16f64:
    298 ; AVX:       # %bb.0:
    299 ; AVX-NEXT:    vminpd %ymm3, %ymm1, %ymm1
    300 ; AVX-NEXT:    vminpd %ymm2, %ymm0, %ymm0
    301 ; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
    302 ; AVX-NEXT:    vextractf128 $1, %ymm0, %xmm1
    303 ; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
    304 ; AVX-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    305 ; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
    306 ; AVX-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
    307 ; AVX-NEXT:    vzeroupper
    308 ; AVX-NEXT:    retq
    309 ;
    310 ; AVX512-LABEL: test_v16f64:
    311 ; AVX512:       # %bb.0:
    312 ; AVX512-NEXT:    vminpd %zmm1, %zmm0, %zmm0
    313 ; AVX512-NEXT:    vextractf64x4 $1, %zmm0, %ymm1
    314 ; AVX512-NEXT:    vminpd %zmm1, %zmm0, %zmm0
    315 ; AVX512-NEXT:    vextractf128 $1, %ymm0, %xmm1
    316 ; AVX512-NEXT:    vminpd %zmm1, %zmm0, %zmm0
    317 ; AVX512-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    318 ; AVX512-NEXT:    vminpd %zmm1, %zmm0, %zmm0
    319 ; AVX512-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
    320 ; AVX512-NEXT:    vzeroupper
    321 ; AVX512-NEXT:    retq
    322   %1 = call double @llvm.experimental.vector.reduce.fmin.f64.v16f64(<16 x double> %a0)
    323   ret double %1
    324 }
    325 
    326 declare float @llvm.experimental.vector.reduce.fmin.f32.v2f32(<2 x float>)
    327 declare float @llvm.experimental.vector.reduce.fmin.f32.v4f32(<4 x float>)
    328 declare float @llvm.experimental.vector.reduce.fmin.f32.v8f32(<8 x float>)
    329 declare float @llvm.experimental.vector.reduce.fmin.f32.v16f32(<16 x float>)
    330 
    331 declare double @llvm.experimental.vector.reduce.fmin.f64.v2f64(<2 x double>)
    332 declare double @llvm.experimental.vector.reduce.fmin.f64.v4f64(<4 x double>)
    333 declare double @llvm.experimental.vector.reduce.fmin.f64.v8f64(<8 x double>)
    334 declare double @llvm.experimental.vector.reduce.fmin.f64.v16f64(<16 x double>)
    335