Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+avx2 -enable-unsafe-fp-math | FileCheck %s
      3 
      4 ; Check that the ExeDepsFix pass correctly fixes the domain for broadcast instructions.
      5 ; <rdar://problem/16354675>
      6 
      7 define <4 x float> @ExeDepsFix_broadcastss(<4 x float> %arg, <4 x float> %arg2) {
      8 ; CHECK-LABEL: ExeDepsFix_broadcastss:
      9 ; CHECK:       ## BB#0:
     10 ; CHECK-NEXT:    vbroadcastss {{.*}}(%rip), %xmm2
     11 ; CHECK-NEXT:    vandps %xmm2, %xmm0, %xmm0
     12 ; CHECK-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
     13 ; CHECK-NEXT:    retq
     14   %bitcast = bitcast <4 x float> %arg to <4 x i32>
     15   %and = and <4 x i32> %bitcast, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
     16   %floatcast = bitcast <4 x i32> %and to <4 x float>
     17   %max_is_x = fcmp oge <4 x float> %floatcast, %arg2
     18   %max = select <4 x i1> %max_is_x, <4 x float> %floatcast, <4 x float> %arg2
     19   ret <4 x float> %max
     20 }
     21 
     22 define <8 x float> @ExeDepsFix_broadcastss256(<8 x float> %arg, <8 x float> %arg2) {
     23 ; CHECK-LABEL: ExeDepsFix_broadcastss256:
     24 ; CHECK:       ## BB#0:
     25 ; CHECK-NEXT:    vbroadcastss {{.*}}(%rip), %ymm2
     26 ; CHECK-NEXT:    vandps %ymm2, %ymm0, %ymm0
     27 ; CHECK-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
     28 ; CHECK-NEXT:    retq
     29   %bitcast = bitcast <8 x float> %arg to <8 x i32>
     30   %and = and <8 x i32> %bitcast, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
     31   %floatcast = bitcast <8 x i32> %and to <8 x float>
     32   %max_is_x = fcmp oge <8 x float> %floatcast, %arg2
     33   %max = select <8 x i1> %max_is_x, <8 x float> %floatcast, <8 x float> %arg2
     34   ret <8 x float> %max
     35 }
     36 
     37 define <4 x float> @ExeDepsFix_broadcastss_inreg(<4 x float> %arg, <4 x float> %arg2, i32 %broadcastvalue) {
     38 ; CHECK-LABEL: ExeDepsFix_broadcastss_inreg:
     39 ; CHECK:       ## BB#0:
     40 ; CHECK-NEXT:    vmovd %edi, %xmm2
     41 ; CHECK-NEXT:    vbroadcastss %xmm2, %xmm2
     42 ; CHECK-NEXT:    vandps %xmm2, %xmm0, %xmm0
     43 ; CHECK-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
     44 ; CHECK-NEXT:    retq
     45   %bitcast = bitcast <4 x float> %arg to <4 x i32>
     46   %in = insertelement <4 x i32> undef, i32 %broadcastvalue, i32 0
     47   %mask = shufflevector <4 x i32> %in, <4 x i32> undef, <4 x i32> zeroinitializer
     48   %and = and <4 x i32> %bitcast, %mask
     49   %floatcast = bitcast <4 x i32> %and to <4 x float>
     50   %max_is_x = fcmp oge <4 x float> %floatcast, %arg2
     51   %max = select <4 x i1> %max_is_x, <4 x float> %floatcast, <4 x float> %arg2
     52   ret <4 x float> %max
     53 }
     54 
     55 define <8 x float> @ExeDepsFix_broadcastss256_inreg(<8 x float> %arg, <8 x float> %arg2, i32 %broadcastvalue) {
     56 ; CHECK-LABEL: ExeDepsFix_broadcastss256_inreg:
     57 ; CHECK:       ## BB#0:
     58 ; CHECK-NEXT:    vmovd %edi, %xmm2
     59 ; CHECK-NEXT:    vbroadcastss %xmm2, %ymm2
     60 ; CHECK-NEXT:    vandps %ymm2, %ymm0, %ymm0
     61 ; CHECK-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
     62 ; CHECK-NEXT:    retq
     63   %bitcast = bitcast <8 x float> %arg to <8 x i32>
     64   %in = insertelement <8 x i32> undef, i32 %broadcastvalue, i32 0
     65   %mask = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
     66   %and = and <8 x i32> %bitcast, %mask
     67   %floatcast = bitcast <8 x i32> %and to <8 x float>
     68   %max_is_x = fcmp oge <8 x float> %floatcast, %arg2
     69   %max = select <8 x i1> %max_is_x, <8 x float> %floatcast, <8 x float> %arg2
     70   ret <8 x float> %max
     71 }
     72 
     73 ; In that case the broadcast is directly folded into vandpd.
     74 define <2 x double> @ExeDepsFix_broadcastsd(<2 x double> %arg, <2 x double> %arg2) {
     75 ; CHECK-LABEL: ExeDepsFix_broadcastsd:
     76 ; CHECK:       ## BB#0:
     77 ; CHECK-NEXT:    vandpd {{.*}}(%rip), %xmm0, %xmm0
     78 ; CHECK-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
     79 ; CHECK-NEXT:    retq
     80   %bitcast = bitcast <2 x double> %arg to <2 x i64>
     81   %and = and <2 x i64> %bitcast, <i64 2147483647, i64 2147483647>
     82   %floatcast = bitcast <2 x i64> %and to <2 x double>
     83   %max_is_x = fcmp oge <2 x double> %floatcast, %arg2
     84   %max = select <2 x i1> %max_is_x, <2 x double> %floatcast, <2 x double> %arg2
     85   ret <2 x double> %max
     86 }
     87 
     88 define <4 x double> @ExeDepsFix_broadcastsd256(<4 x double> %arg, <4 x double> %arg2) {
     89 ; CHECK-LABEL: ExeDepsFix_broadcastsd256:
     90 ; CHECK:       ## BB#0:
     91 ; CHECK-NEXT:    vbroadcastsd {{.*}}(%rip), %ymm2
     92 ; CHECK-NEXT:    vandpd %ymm2, %ymm0, %ymm0
     93 ; CHECK-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
     94 ; CHECK-NEXT:    retq
     95   %bitcast = bitcast <4 x double> %arg to <4 x i64>
     96   %and = and <4 x i64> %bitcast, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
     97   %floatcast = bitcast <4 x i64> %and to <4 x double>
     98   %max_is_x = fcmp oge <4 x double> %floatcast, %arg2
     99   %max = select <4 x i1> %max_is_x, <4 x double> %floatcast, <4 x double> %arg2
    100   ret <4 x double> %max
    101 }
    102 
    103 ; ExeDepsFix works top down, thus it coalesces vpunpcklqdq domain with
    104 ; vpand and there is nothing more you can do to match vmaxpd.
    105 define <2 x double> @ExeDepsFix_broadcastsd_inreg(<2 x double> %arg, <2 x double> %arg2, i64 %broadcastvalue) {
    106 ; CHECK-LABEL: ExeDepsFix_broadcastsd_inreg:
    107 ; CHECK:       ## BB#0:
    108 ; CHECK-NEXT:    vmovq %rdi, %xmm2
    109 ; CHECK-NEXT:    vpbroadcastq %xmm2, %xmm2
    110 ; CHECK-NEXT:    vpand %xmm2, %xmm0, %xmm0
    111 ; CHECK-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
    112 ; CHECK-NEXT:    retq
    113   %bitcast = bitcast <2 x double> %arg to <2 x i64>
    114   %in = insertelement <2 x i64> undef, i64 %broadcastvalue, i32 0
    115   %mask = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> zeroinitializer
    116   %and = and <2 x i64> %bitcast, %mask
    117   %floatcast = bitcast <2 x i64> %and to <2 x double>
    118   %max_is_x = fcmp oge <2 x double> %floatcast, %arg2
    119   %max = select <2 x i1> %max_is_x, <2 x double> %floatcast, <2 x double> %arg2
    120   ret <2 x double> %max
    121 }
    122 
    123 define <4 x double> @ExeDepsFix_broadcastsd256_inreg(<4 x double> %arg, <4 x double> %arg2, i64 %broadcastvalue) {
    124 ; CHECK-LABEL: ExeDepsFix_broadcastsd256_inreg:
    125 ; CHECK:       ## BB#0:
    126 ; CHECK-NEXT:    vmovq %rdi, %xmm2
    127 ; CHECK-NEXT:    vbroadcastsd %xmm2, %ymm2
    128 ; CHECK-NEXT:    vandpd %ymm2, %ymm0, %ymm0
    129 ; CHECK-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
    130 ; CHECK-NEXT:    retq
    131   %bitcast = bitcast <4 x double> %arg to <4 x i64>
    132   %in = insertelement <4 x i64> undef, i64 %broadcastvalue, i32 0
    133   %mask = shufflevector <4 x i64> %in, <4 x i64> undef, <4 x i32> zeroinitializer
    134   %and = and <4 x i64> %bitcast, %mask
    135   %floatcast = bitcast <4 x i64> %and to <4 x double>
    136   %max_is_x = fcmp oge <4 x double> %floatcast, %arg2
    137   %max = select <4 x i1> %max_is_x, <4 x double> %floatcast, <4 x double> %arg2
    138   ret <4 x double> %max
    139 }
    140