Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE
      2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX
      3 
      4 ; Verify that the first two adds are independent regardless of how the inputs are
      5 ; commuted. The destination registers are used as source registers for the third add.
      6 
      7 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
      8 ; SSE-LABEL: reassociate_adds1:
      9 ; SSE:       # BB#0:
     10 ; SSE-NEXT:    addss %xmm1, %xmm0
     11 ; SSE-NEXT:    addss %xmm3, %xmm2
     12 ; SSE-NEXT:    addss %xmm2, %xmm0
     13 ; SSE-NEXT:    retq
     14 ;
     15 ; AVX-LABEL: reassociate_adds1:
     16 ; AVX:       # BB#0:
     17 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     18 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
     19 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     20 ; AVX-NEXT:    retq
     21   %t0 = fadd float %x0, %x1
     22   %t1 = fadd float %t0, %x2
     23   %t2 = fadd float %t1, %x3
     24   ret float %t2
     25 }
     26 
     27 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
     28 ; SSE-LABEL: reassociate_adds2:
     29 ; SSE:       # BB#0:
     30 ; SSE-NEXT:    addss %xmm1, %xmm0
     31 ; SSE-NEXT:    addss %xmm3, %xmm2
     32 ; SSE-NEXT:    addss %xmm2, %xmm0
     33 ; SSE-NEXT:    retq
     34 ;
     35 ; AVX-LABEL: reassociate_adds2:
     36 ; AVX:       # BB#0:
     37 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     38 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
     39 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     40 ; AVX-NEXT:    retq
     41   %t0 = fadd float %x0, %x1
     42   %t1 = fadd float %x2, %t0
     43   %t2 = fadd float %t1, %x3
     44   ret float %t2
     45 }
     46 
     47 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
     48 ; SSE-LABEL: reassociate_adds3:
     49 ; SSE:       # BB#0:
     50 ; SSE-NEXT:    addss %xmm1, %xmm0
     51 ; SSE-NEXT:    addss %xmm3, %xmm2
     52 ; SSE-NEXT:    addss %xmm2, %xmm0
     53 ; SSE-NEXT:    retq
     54 ;
     55 ; AVX-LABEL: reassociate_adds3:
     56 ; AVX:       # BB#0:
     57 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     58 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
     59 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     60 ; AVX-NEXT:    retq
     61   %t0 = fadd float %x0, %x1
     62   %t1 = fadd float %t0, %x2
     63   %t2 = fadd float %x3, %t1
     64   ret float %t2
     65 }
     66 
     67 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
     68 ; SSE-LABEL: reassociate_adds4:
     69 ; SSE:       # BB#0:
     70 ; SSE-NEXT:    addss %xmm1, %xmm0
     71 ; SSE-NEXT:    addss %xmm3, %xmm2
     72 ; SSE-NEXT:    addss %xmm2, %xmm0
     73 ; SSE-NEXT:    retq
     74 ;
     75 ; AVX-LABEL: reassociate_adds4:
     76 ; AVX:       # BB#0:
     77 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     78 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
     79 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
     80 ; AVX-NEXT:    retq
     81   %t0 = fadd float %x0, %x1
     82   %t1 = fadd float %x2, %t0
     83   %t2 = fadd float %x3, %t1
     84   ret float %t2
     85 }
     86 
     87 ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
     88 ; produced because that would cost more compile time.
     89 
     90 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
     91 ; SSE-LABEL: reassociate_adds5:
     92 ; SSE:       # BB#0:
     93 ; SSE-NEXT:    addss %xmm1, %xmm0
     94 ; SSE-NEXT:    addss %xmm3, %xmm2
     95 ; SSE-NEXT:    addss %xmm2, %xmm0
     96 ; SSE-NEXT:    addss %xmm5, %xmm4
     97 ; SSE-NEXT:    addss %xmm6, %xmm4
     98 ; SSE-NEXT:    addss %xmm4, %xmm0
     99 ; SSE-NEXT:    addss %xmm7, %xmm0
    100 ; SSE-NEXT:    retq
    101 ;
    102 ; AVX-LABEL: reassociate_adds5:
    103 ; AVX:       # BB#0:
    104 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
    105 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
    106 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
    107 ; AVX-NEXT:    vaddss %xmm5, %xmm4, %xmm1
    108 ; AVX-NEXT:    vaddss %xmm6, %xmm1, %xmm1
    109 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
    110 ; AVX-NEXT:    vaddss %xmm7, %xmm0, %xmm0
    111 ; AVX-NEXT:    retq
    112   %t0 = fadd float %x0, %x1
    113   %t1 = fadd float %t0, %x2
    114   %t2 = fadd float %t1, %x3
    115   %t3 = fadd float %t2, %x4
    116   %t4 = fadd float %t3, %x5
    117   %t5 = fadd float %t4, %x6
    118   %t6 = fadd float %t5, %x7
    119   ret float %t6
    120 }
    121 
    122 ; Verify that we only need two associative operations to reassociate the operands.
    123 ; Also, we should reassociate such that the result of the high latency division
    124 ; is used by the final 'add' rather than reassociating the %x3 operand with the
    125 ; division. The latter reassociation would not improve anything.
    126 
    127 define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
    128 ; SSE-LABEL: reassociate_adds6:
    129 ; SSE:       # BB#0:
    130 ; SSE-NEXT:    divss %xmm1, %xmm0
    131 ; SSE-NEXT:    addss %xmm3, %xmm2
    132 ; SSE-NEXT:    addss %xmm2, %xmm0
    133 ; SSE-NEXT:    retq
    134 ;
    135 ; AVX-LABEL: reassociate_adds6:
    136 ; AVX:       # BB#0:
    137 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
    138 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
    139 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
    140 ; AVX-NEXT:    retq
    141   %t0 = fdiv float %x0, %x1
    142   %t1 = fadd float %x2, %t0
    143   %t2 = fadd float %x3, %t1
    144   ret float %t2
    145 }
    146 
    147 ; Verify that SSE and AVX scalar single-precision multiplies are reassociated.
    148 
    149 define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
    150 ; SSE-LABEL: reassociate_muls1:
    151 ; SSE:       # BB#0:
    152 ; SSE-NEXT:    divss %xmm1, %xmm0
    153 ; SSE-NEXT:    mulss %xmm3, %xmm2
    154 ; SSE-NEXT:    mulss %xmm2, %xmm0
    155 ; SSE-NEXT:    retq
    156 ;
    157 ; AVX-LABEL: reassociate_muls1:
    158 ; AVX:       # BB#0:
    159 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
    160 ; AVX-NEXT:    vmulss %xmm3, %xmm2, %xmm1
    161 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
    162 ; AVX-NEXT:    retq
    163   %t0 = fdiv float %x0, %x1
    164   %t1 = fmul float %x2, %t0
    165   %t2 = fmul float %x3, %t1
    166   ret float %t2
    167 }
    168 
    169 ; Verify that SSE and AVX scalar double-precision adds are reassociated.
    170 
    171 define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
    172 ; SSE-LABEL: reassociate_adds_double:
    173 ; SSE:       # BB#0:
    174 ; SSE-NEXT:    divsd %xmm1, %xmm0
    175 ; SSE-NEXT:    addsd %xmm3, %xmm2
    176 ; SSE-NEXT:    addsd %xmm2, %xmm0
    177 ; SSE-NEXT:    retq
    178 ;
    179 ; AVX-LABEL: reassociate_adds_double:
    180 ; AVX:       # BB#0:
    181 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
    182 ; AVX-NEXT:    vaddsd %xmm3, %xmm2, %xmm1
    183 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
    184 ; AVX-NEXT:    retq
    185   %t0 = fdiv double %x0, %x1
    186   %t1 = fadd double %x2, %t0
    187   %t2 = fadd double %x3, %t1
    188   ret double %t2
    189 }
    190 
    191 ; Verify that SSE and AVX scalar double-precision multiplies are reassociated.
    192 
    193 define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
    194 ; SSE-LABEL: reassociate_muls_double:
    195 ; SSE:       # BB#0:
    196 ; SSE-NEXT:    divsd %xmm1, %xmm0
    197 ; SSE-NEXT:    mulsd %xmm3, %xmm2
    198 ; SSE-NEXT:    mulsd %xmm2, %xmm0
    199 ; SSE-NEXT:    retq
    200 ;
    201 ; AVX-LABEL: reassociate_muls_double:
    202 ; AVX:       # BB#0:
    203 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
    204 ; AVX-NEXT:    vmulsd %xmm3, %xmm2, %xmm1
    205 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
    206 ; AVX-NEXT:    retq
    207   %t0 = fdiv double %x0, %x1
    208   %t1 = fmul double %x2, %t0
    209   %t2 = fmul double %x3, %t1
    210   ret double %t2
    211 }
    212 
    213 ; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated.
    214 
    215 define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
    216 ; SSE-LABEL: reassociate_adds_v4f32:
    217 ; SSE:       # BB#0:
    218 ; SSE-NEXT:    mulps %xmm1, %xmm0
    219 ; SSE-NEXT:    addps %xmm3, %xmm2
    220 ; SSE-NEXT:    addps %xmm2, %xmm0
    221 ; SSE-NEXT:    retq
    222 ;
    223 ; AVX-LABEL: reassociate_adds_v4f32:
    224 ; AVX:       # BB#0:
    225 ; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    226 ; AVX-NEXT:    vaddps %xmm3, %xmm2, %xmm1
    227 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
    228 ; AVX-NEXT:    retq
    229   %t0 = fmul <4 x float> %x0, %x1
    230   %t1 = fadd <4 x float> %x2, %t0
    231   %t2 = fadd <4 x float> %x3, %t1
    232   ret <4 x float> %t2
    233 }
    234 
    235 ; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated.
    236 
    237 define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
    238 ; SSE-LABEL: reassociate_adds_v2f64:
    239 ; SSE:       # BB#0:
    240 ; SSE-NEXT:    mulpd %xmm1, %xmm0
    241 ; SSE-NEXT:    addpd %xmm3, %xmm2
    242 ; SSE-NEXT:    addpd %xmm2, %xmm0
    243 ; SSE-NEXT:    retq
    244 ;
    245 ; AVX-LABEL: reassociate_adds_v2f64:
    246 ; AVX:       # BB#0:
    247 ; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
    248 ; AVX-NEXT:    vaddpd %xmm3, %xmm2, %xmm1
    249 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
    250 ; AVX-NEXT:    retq
    251   %t0 = fmul <2 x double> %x0, %x1
    252   %t1 = fadd <2 x double> %x2, %t0
    253   %t2 = fadd <2 x double> %x3, %t1
    254   ret <2 x double> %t2
    255 }
    256 
    257 ; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated.
    258 
    259 define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
    260 ; SSE-LABEL: reassociate_muls_v4f32:
    261 ; SSE:       # BB#0:
    262 ; SSE-NEXT:    addps %xmm1, %xmm0
    263 ; SSE-NEXT:    mulps %xmm3, %xmm2
    264 ; SSE-NEXT:    mulps %xmm2, %xmm0
    265 ; SSE-NEXT:    retq
    266 ;
    267 ; AVX-LABEL: reassociate_muls_v4f32:
    268 ; AVX:       # BB#0:
    269 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
    270 ; AVX-NEXT:    vmulps %xmm3, %xmm2, %xmm1
    271 ; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
    272 ; AVX-NEXT:    retq
    273   %t0 = fadd <4 x float> %x0, %x1
    274   %t1 = fmul <4 x float> %x2, %t0
    275   %t2 = fmul <4 x float> %x3, %t1
    276   ret <4 x float> %t2
    277 }
    278 
    279 ; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated.
    280 
    281 define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
    282 ; SSE-LABEL: reassociate_muls_v2f64:
    283 ; SSE:       # BB#0:
    284 ; SSE-NEXT:    addpd %xmm1, %xmm0
    285 ; SSE-NEXT:    mulpd %xmm3, %xmm2
    286 ; SSE-NEXT:    mulpd %xmm2, %xmm0
    287 ; SSE-NEXT:    retq
    288 ;
    289 ; AVX-LABEL: reassociate_muls_v2f64:
    290 ; AVX:       # BB#0:
    291 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
    292 ; AVX-NEXT:    vmulpd %xmm3, %xmm2, %xmm1
    293 ; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
    294 ; AVX-NEXT:    retq
    295   %t0 = fadd <2 x double> %x0, %x1
    296   %t1 = fmul <2 x double> %x2, %t0
    297   %t2 = fmul <2 x double> %x3, %t1
    298   ret <2 x double> %t2
    299 }
    300 
    301 ; Verify that AVX 256-bit vector single-precision adds are reassociated.
    302 
    303 define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
    304 ; AVX-LABEL: reassociate_adds_v8f32:
    305 ; AVX:       # BB#0:
    306 ; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
    307 ; AVX-NEXT:    vaddps %ymm3, %ymm2, %ymm1
    308 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
    309 ; AVX-NEXT:    retq
    310   %t0 = fmul <8 x float> %x0, %x1
    311   %t1 = fadd <8 x float> %x2, %t0
    312   %t2 = fadd <8 x float> %x3, %t1
    313   ret <8 x float> %t2
    314 }
    315 
    316 ; Verify that AVX 256-bit vector double-precision adds are reassociated.
    317 
    318 define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
    319 ; AVX-LABEL: reassociate_adds_v4f64:
    320 ; AVX:       # BB#0:
    321 ; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
    322 ; AVX-NEXT:    vaddpd %ymm3, %ymm2, %ymm1
    323 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
    324 ; AVX-NEXT:    retq
    325   %t0 = fmul <4 x double> %x0, %x1
    326   %t1 = fadd <4 x double> %x2, %t0
    327   %t2 = fadd <4 x double> %x3, %t1
    328   ret <4 x double> %t2
    329 }
    330 
    331 ; Verify that AVX 256-bit vector single-precision multiplies are reassociated.
    332 
    333 define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
    334 ; AVX-LABEL: reassociate_muls_v8f32:
    335 ; AVX:       # BB#0:
    336 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
    337 ; AVX-NEXT:    vmulps %ymm3, %ymm2, %ymm1
    338 ; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
    339 ; AVX-NEXT:    retq
    340   %t0 = fadd <8 x float> %x0, %x1
    341   %t1 = fmul <8 x float> %x2, %t0
    342   %t2 = fmul <8 x float> %x3, %t1
    343   ret <8 x float> %t2
    344 }
    345 
    346 ; Verify that AVX 256-bit vector double-precision multiplies are reassociated.
    347 
    348 define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
    349 ; AVX-LABEL: reassociate_muls_v4f64:
    350 ; AVX:       # BB#0:
    351 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
    352 ; AVX-NEXT:    vmulpd %ymm3, %ymm2, %ymm1
    353 ; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
    354 ; AVX-NEXT:    retq
    355   %t0 = fadd <4 x double> %x0, %x1
    356   %t1 = fmul <4 x double> %x2, %t0
    357   %t2 = fmul <4 x double> %x3, %t1
    358   ret <4 x double> %t2
    359 }
    360 
    361 ; Verify that SSE and AVX scalar single-precision minimum ops are reassociated.
    362 
    363 define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) {
    364 ; SSE-LABEL: reassociate_mins_single:
    365 ; SSE:       # BB#0:
    366 ; SSE-NEXT:    divss %xmm1, %xmm0
    367 ; SSE-NEXT:    minss %xmm3, %xmm2
    368 ; SSE-NEXT:    minss %xmm2, %xmm0
    369 ; SSE-NEXT:    retq
    370 ;
    371 ; AVX-LABEL: reassociate_mins_single:
    372 ; AVX:       # BB#0:
    373 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
    374 ; AVX-NEXT:    vminss %xmm3, %xmm2, %xmm1
    375 ; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
    376 ; AVX-NEXT:    retq
    377   %t0 = fdiv float %x0, %x1
    378   %cmp1 = fcmp olt float %x2, %t0
    379   %sel1 = select i1 %cmp1, float %x2, float %t0
    380   %cmp2 = fcmp olt float %x3, %sel1
    381   %sel2 = select i1 %cmp2, float %x3, float %sel1
    382   ret float %sel2
    383 }
    384 
    385 ; Verify that SSE and AVX scalar single-precision maximum ops are reassociated.
    386 
    387 define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) {
    388 ; SSE-LABEL: reassociate_maxs_single:
    389 ; SSE:       # BB#0:
    390 ; SSE-NEXT:    divss %xmm1, %xmm0
    391 ; SSE-NEXT:    maxss %xmm3, %xmm2
    392 ; SSE-NEXT:    maxss %xmm2, %xmm0
    393 ; SSE-NEXT:    retq
    394 ;
    395 ; AVX-LABEL: reassociate_maxs_single:
    396 ; AVX:       # BB#0:
    397 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
    398 ; AVX-NEXT:    vmaxss %xmm3, %xmm2, %xmm1
    399 ; AVX-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
    400 ; AVX-NEXT:    retq
    401   %t0 = fdiv float %x0, %x1
    402   %cmp1 = fcmp ogt float %x2, %t0
    403   %sel1 = select i1 %cmp1, float %x2, float %t0
    404   %cmp2 = fcmp ogt float %x3, %sel1
    405   %sel2 = select i1 %cmp2, float %x3, float %sel1
    406   ret float %sel2
    407 }
    408 
    409 ; Verify that SSE and AVX scalar double-precision minimum ops are reassociated.
    410 
    411 define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) {
    412 ; SSE-LABEL: reassociate_mins_double:
    413 ; SSE:       # BB#0:
    414 ; SSE-NEXT:    divsd %xmm1, %xmm0
    415 ; SSE-NEXT:    minsd %xmm3, %xmm2
    416 ; SSE-NEXT:    minsd %xmm2, %xmm0
    417 ; SSE-NEXT:    retq
    418 ;
    419 ; AVX-LABEL: reassociate_mins_double:
    420 ; AVX:       # BB#0:
    421 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
    422 ; AVX-NEXT:    vminsd %xmm3, %xmm2, %xmm1
    423 ; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
    424 ; AVX-NEXT:    retq
    425   %t0 = fdiv double %x0, %x1
    426   %cmp1 = fcmp olt double %x2, %t0
    427   %sel1 = select i1 %cmp1, double %x2, double %t0
    428   %cmp2 = fcmp olt double %x3, %sel1
    429   %sel2 = select i1 %cmp2, double %x3, double %sel1
    430   ret double %sel2
    431 }
    432 
    433 ; Verify that SSE and AVX scalar double-precision maximum ops are reassociated.
    434 
    435 define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) {
    436 ; SSE-LABEL: reassociate_maxs_double:
    437 ; SSE:       # BB#0:
    438 ; SSE-NEXT:    divsd %xmm1, %xmm0
    439 ; SSE-NEXT:    maxsd %xmm3, %xmm2
    440 ; SSE-NEXT:    maxsd %xmm2, %xmm0
    441 ; SSE-NEXT:    retq
    442 ;
    443 ; AVX-LABEL: reassociate_maxs_double:
    444 ; AVX:       # BB#0:
    445 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
    446 ; AVX-NEXT:    vmaxsd %xmm3, %xmm2, %xmm1
    447 ; AVX-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
    448 ; AVX-NEXT:    retq
    449   %t0 = fdiv double %x0, %x1
    450   %cmp1 = fcmp ogt double %x2, %t0
    451   %sel1 = select i1 %cmp1, double %x2, double %t0
    452   %cmp2 = fcmp ogt double %x3, %sel1
    453   %sel2 = select i1 %cmp2, double %x3, double %sel1
    454   ret double %sel2
    455 }
    456 
    457 ; Verify that SSE and AVX 128-bit vector single-precision minimum ops are reassociated.
    458 
    459 define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
    460 ; SSE-LABEL: reassociate_mins_v4f32:
    461 ; SSE:       # BB#0:
    462 ; SSE-NEXT:    addps %xmm1, %xmm0
    463 ; SSE-NEXT:    minps %xmm3, %xmm2
    464 ; SSE-NEXT:    minps %xmm2, %xmm0
    465 ; SSE-NEXT:    retq
    466 ;
    467 ; AVX-LABEL: reassociate_mins_v4f32:
    468 ; AVX:       # BB#0:
    469 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
    470 ; AVX-NEXT:    vminps %xmm3, %xmm2, %xmm1
    471 ; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
    472 ; AVX-NEXT:    retq
    473   %t0 = fadd <4 x float> %x0, %x1
    474   %cmp1 = fcmp olt <4 x float> %x2, %t0
    475   %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
    476   %cmp2 = fcmp olt <4 x float> %x3, %sel1
    477   %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
    478   ret <4 x float> %sel2
    479 }
    480 
    481 ; Verify that SSE and AVX 128-bit vector single-precision maximum ops are reassociated.
    482 
    483 define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
    484 ; SSE-LABEL: reassociate_maxs_v4f32:
    485 ; SSE:       # BB#0:
    486 ; SSE-NEXT:    addps %xmm1, %xmm0
    487 ; SSE-NEXT:    maxps %xmm3, %xmm2
    488 ; SSE-NEXT:    maxps %xmm2, %xmm0
    489 ; SSE-NEXT:    retq
    490 ;
    491 ; AVX-LABEL: reassociate_maxs_v4f32:
    492 ; AVX:       # BB#0:
    493 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
    494 ; AVX-NEXT:    vmaxps %xmm3, %xmm2, %xmm1
    495 ; AVX-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
    496 ; AVX-NEXT:    retq
    497   %t0 = fadd <4 x float> %x0, %x1
    498   %cmp1 = fcmp ogt <4 x float> %x2, %t0
    499   %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
    500   %cmp2 = fcmp ogt <4 x float> %x3, %sel1
    501   %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
    502   ret <4 x float> %sel2
    503 }
    504 
    505 ; Verify that SSE and AVX 128-bit vector double-precision minimum ops are reassociated.
    506 
    507 define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
    508 ; SSE-LABEL: reassociate_mins_v2f64:
    509 ; SSE:       # BB#0:
    510 ; SSE-NEXT:    addpd %xmm1, %xmm0
    511 ; SSE-NEXT:    minpd %xmm3, %xmm2
    512 ; SSE-NEXT:    minpd %xmm2, %xmm0
    513 ; SSE-NEXT:    retq
    514 ;
    515 ; AVX-LABEL: reassociate_mins_v2f64:
    516 ; AVX:       # BB#0:
    517 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
    518 ; AVX-NEXT:    vminpd %xmm3, %xmm2, %xmm1
    519 ; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
    520 ; AVX-NEXT:    retq
    521   %t0 = fadd <2 x double> %x0, %x1
    522   %cmp1 = fcmp olt <2 x double> %x2, %t0
    523   %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
    524   %cmp2 = fcmp olt <2 x double> %x3, %sel1
    525   %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
    526   ret <2 x double> %sel2
    527 }
    528 
    529 ; Verify that SSE and AVX 128-bit vector double-precision maximum ops are reassociated.
    530 
    531 define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
    532 ; SSE-LABEL: reassociate_maxs_v2f64:
    533 ; SSE:       # BB#0:
    534 ; SSE-NEXT:    addpd %xmm1, %xmm0
    535 ; SSE-NEXT:    maxpd %xmm3, %xmm2
    536 ; SSE-NEXT:    maxpd %xmm2, %xmm0
    537 ; SSE-NEXT:    retq
    538 ;
    539 ; AVX-LABEL: reassociate_maxs_v2f64:
    540 ; AVX:       # BB#0:
    541 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
    542 ; AVX-NEXT:    vmaxpd %xmm3, %xmm2, %xmm1
    543 ; AVX-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
    544 ; AVX-NEXT:    retq
    545   %t0 = fadd <2 x double> %x0, %x1
    546   %cmp1 = fcmp ogt <2 x double> %x2, %t0
    547   %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
    548   %cmp2 = fcmp ogt <2 x double> %x3, %sel1
    549   %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
    550   ret <2 x double> %sel2
    551 }
    552 
    553 ; Verify that AVX 256-bit vector single-precision minimum ops are reassociated.
    554 
    555 define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
    556 ; AVX-LABEL: reassociate_mins_v8f32:
    557 ; AVX:       # BB#0:
    558 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
    559 ; AVX-NEXT:    vminps %ymm3, %ymm2, %ymm1
    560 ; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
    561 ; AVX-NEXT:    retq
    562   %t0 = fadd <8 x float> %x0, %x1
    563   %cmp1 = fcmp olt <8 x float> %x2, %t0
    564   %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
    565   %cmp2 = fcmp olt <8 x float> %x3, %sel1
    566   %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
    567   ret <8 x float> %sel2
    568 }
    569 
    570 ; Verify that AVX 256-bit vector single-precision maximum ops are reassociated.
    571 
    572 define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
    573 ; AVX-LABEL: reassociate_maxs_v8f32:
    574 ; AVX:       # BB#0:
    575 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
    576 ; AVX-NEXT:    vmaxps %ymm3, %ymm2, %ymm1
    577 ; AVX-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
    578 ; AVX-NEXT:    retq
    579   %t0 = fadd <8 x float> %x0, %x1
    580   %cmp1 = fcmp ogt <8 x float> %x2, %t0
    581   %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
    582   %cmp2 = fcmp ogt <8 x float> %x3, %sel1
    583   %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
    584   ret <8 x float> %sel2
    585 }
    586 
    587 ; Verify that AVX 256-bit vector double-precision minimum ops are reassociated.
    588 
    589 define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
    590 ; AVX-LABEL: reassociate_mins_v4f64:
    591 ; AVX:       # BB#0:
    592 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
    593 ; AVX-NEXT:    vminpd %ymm3, %ymm2, %ymm1
    594 ; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
    595 ; AVX-NEXT:    retq
    596   %t0 = fadd <4 x double> %x0, %x1
    597   %cmp1 = fcmp olt <4 x double> %x2, %t0
    598   %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
    599   %cmp2 = fcmp olt <4 x double> %x3, %sel1
    600   %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
    601   ret <4 x double> %sel2
    602 }
    603 
    604 ; Verify that AVX 256-bit vector double-precision maximum ops are reassociated.
    605 
    606 define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
    607 ; AVX-LABEL: reassociate_maxs_v4f64:
    608 ; AVX:       # BB#0:
    609 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
    610 ; AVX-NEXT:    vmaxpd %ymm3, %ymm2, %ymm1
    611 ; AVX-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
    612 ; AVX-NEXT:    retq
    613   %t0 = fadd <4 x double> %x0, %x1
    614   %cmp1 = fcmp ogt <4 x double> %x2, %t0
    615   %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
    616   %cmp2 = fcmp ogt <4 x double> %x3, %sel1
    617   %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
    618   ret <4 x double> %sel2
    619 }
    620 
    621 ; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
    622 ; Verify that reassociation is not happening needlessly or wrongly.
    623 
    624 declare double @bar()
    625 
    626 define double @reassociate_adds_from_calls() {
    627 ; AVX-LABEL: reassociate_adds_from_calls:
    628 ; AVX:       callq   bar
    629 ; AVX-NEXT:  vmovsd  %xmm0, 16(%rsp)
    630 ; AVX-NEXT:  callq   bar
    631 ; AVX-NEXT:  vmovsd  %xmm0, 8(%rsp)
    632 ; AVX-NEXT:  callq   bar
    633 ; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
    634 ; AVX-NEXT:  callq   bar
    635 ; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
    636 ; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
    637 ; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
    638 ; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
    639 
    640   %x0 = call double @bar()
    641   %x1 = call double @bar()
    642   %x2 = call double @bar()
    643   %x3 = call double @bar()
    644   %t0 = fadd double %x0, %x1
    645   %t1 = fadd double %t0, %x2
    646   %t2 = fadd double %t1, %x3
    647   ret double %t2
    648 }
    649 
    650 define double @already_reassociated() {
    651 ; AVX-LABEL: already_reassociated:
    652 ; AVX:       callq   bar
    653 ; AVX-NEXT:  vmovsd  %xmm0, 16(%rsp)
    654 ; AVX-NEXT:  callq   bar
    655 ; AVX-NEXT:  vmovsd  %xmm0, 8(%rsp)
    656 ; AVX-NEXT:  callq   bar
    657 ; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
    658 ; AVX-NEXT:  callq   bar
    659 ; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
    660 ; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
    661 ; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
    662 ; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
    663 
    664   %x0 = call double @bar()
    665   %x1 = call double @bar()
    666   %x2 = call double @bar()
    667   %x3 = call double @bar()
    668   %t0 = fadd double %x0, %x1
    669   %t1 = fadd double %x2, %x3
    670   %t2 = fadd double %t0, %t1
    671   ret double %t2
    672 }
    673 
    674