Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -O3 -mtriple=x86_64-apple-macosx -o - < %s -mattr=+avx2 -enable-unsafe-fp-math -mcpu=core2 | FileCheck %s
      2 ; Check that the ExeDepsFix pass correctly fixes the domain for broadcast instructions.
      3 ; <rdar://problem/16354675>
      4 
      5 ; CHECK-LABEL: ExeDepsFix_broadcastss
      6 ; CHECK: broadcastss
      7 ; CHECK: vandps
      8 ; CHECK: vmaxps
      9 ; CHECK: ret
     10 define <4 x float> @ExeDepsFix_broadcastss(<4 x float> %arg, <4 x float> %arg2) {
     11   %bitcast = bitcast <4 x float> %arg to <4 x i32>
     12   %and = and <4 x i32> %bitcast, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
     13   %floatcast = bitcast <4 x i32> %and to <4 x float>
     14   %max_is_x = fcmp oge <4 x float> %floatcast, %arg2
     15   %max = select <4 x i1> %max_is_x, <4 x float> %floatcast, <4 x float> %arg2
     16   ret <4 x float> %max
     17 }
     18 
     19 ; CHECK-LABEL: ExeDepsFix_broadcastss256
     20 ; CHECK: broadcastss
     21 ; CHECK: vandps
     22 ; CHECK: vmaxps
     23 ; CHECK: ret
     24 define <8 x float> @ExeDepsFix_broadcastss256(<8 x float> %arg, <8 x float> %arg2) {
     25   %bitcast = bitcast <8 x float> %arg to <8 x i32>
     26   %and = and <8 x i32> %bitcast, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
     27   %floatcast = bitcast <8 x i32> %and to <8 x float>
     28   %max_is_x = fcmp oge <8 x float> %floatcast, %arg2
     29   %max = select <8 x i1> %max_is_x, <8 x float> %floatcast, <8 x float> %arg2
     30   ret <8 x float> %max
     31 }
     32 
     33 
     34 ; CHECK-LABEL: ExeDepsFix_broadcastss_inreg
     35 ; CHECK: broadcastss
     36 ; CHECK: vandps
     37 ; CHECK: vmaxps
     38 ; CHECK: ret
     39 define <4 x float> @ExeDepsFix_broadcastss_inreg(<4 x float> %arg, <4 x float> %arg2, i32 %broadcastvalue) {
     40   %bitcast = bitcast <4 x float> %arg to <4 x i32>
     41   %in = insertelement <4 x i32> undef, i32 %broadcastvalue, i32 0
     42   %mask = shufflevector <4 x i32> %in, <4 x i32> undef, <4 x i32> zeroinitializer
     43   %and = and <4 x i32> %bitcast, %mask
     44   %floatcast = bitcast <4 x i32> %and to <4 x float>
     45   %max_is_x = fcmp oge <4 x float> %floatcast, %arg2
     46   %max = select <4 x i1> %max_is_x, <4 x float> %floatcast, <4 x float> %arg2
     47   ret <4 x float> %max
     48 }
     49 
     50 ; CHECK-LABEL: ExeDepsFix_broadcastss256_inreg
     51 ; CHECK: broadcastss
     52 ; CHECK: vandps
     53 ; CHECK: vmaxps
     54 ; CHECK: ret
     55 define <8 x float> @ExeDepsFix_broadcastss256_inreg(<8 x float> %arg, <8 x float> %arg2, i32 %broadcastvalue) {
     56   %bitcast = bitcast <8 x float> %arg to <8 x i32>
     57   %in = insertelement <8 x i32> undef, i32 %broadcastvalue, i32 0
     58   %mask = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
     59   %and = and <8 x i32> %bitcast, %mask
     60   %floatcast = bitcast <8 x i32> %and to <8 x float>
     61   %max_is_x = fcmp oge <8 x float> %floatcast, %arg2
     62   %max = select <8 x i1> %max_is_x, <8 x float> %floatcast, <8 x float> %arg2
     63   ret <8 x float> %max
     64 }
     65 
     66 ; CHECK-LABEL: ExeDepsFix_broadcastsd
     67 ; In that case the broadcast is directly folded into vandpd.
     68 ; CHECK: vandpd
     69 ; CHECK: vmaxpd
     70 ; CHECK:ret
     71 define <2 x double> @ExeDepsFix_broadcastsd(<2 x double> %arg, <2 x double> %arg2) {
     72   %bitcast = bitcast <2 x double> %arg to <2 x i64>
     73   %and = and <2 x i64> %bitcast, <i64 2147483647, i64 2147483647>
     74   %floatcast = bitcast <2 x i64> %and to <2 x double>
     75   %max_is_x = fcmp oge <2 x double> %floatcast, %arg2
     76   %max = select <2 x i1> %max_is_x, <2 x double> %floatcast, <2 x double> %arg2
     77   ret <2 x double> %max
     78 }
     79 
     80 ; CHECK-LABEL: ExeDepsFix_broadcastsd256
     81 ; CHECK: broadcastsd
     82 ; CHECK: vandpd
     83 ; CHECK: vmaxpd
     84 ; CHECK: ret
     85 define <4 x double> @ExeDepsFix_broadcastsd256(<4 x double> %arg, <4 x double> %arg2) {
     86   %bitcast = bitcast <4 x double> %arg to <4 x i64>
     87   %and = and <4 x i64> %bitcast, <i64 2147483647, i64 2147483647, i64 2147483647, i64 2147483647>
     88   %floatcast = bitcast <4 x i64> %and to <4 x double>
     89   %max_is_x = fcmp oge <4 x double> %floatcast, %arg2
     90   %max = select <4 x i1> %max_is_x, <4 x double> %floatcast, <4 x double> %arg2
     91   ret <4 x double> %max
     92 }
     93 
     94 
     95 ; CHECK-LABEL: ExeDepsFix_broadcastsd_inreg
     96 ; ExeDepsFix works top down, thus it coalesces vpunpcklqdq domain with
     97 ; vpand and there is nothing more you can do to match vmaxpd.
     98 ; CHECK: vmovq
     99 ; CHECK: vpbroadcastq
    100 ; CHECK: vpand
    101 ; CHECK: vmaxpd
    102 ; CHECK: ret
    103 define <2 x double> @ExeDepsFix_broadcastsd_inreg(<2 x double> %arg, <2 x double> %arg2, i64 %broadcastvalue) {
    104   %bitcast = bitcast <2 x double> %arg to <2 x i64>
    105   %in = insertelement <2 x i64> undef, i64 %broadcastvalue, i32 0
    106   %mask = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> zeroinitializer
    107   %and = and <2 x i64> %bitcast, %mask
    108   %floatcast = bitcast <2 x i64> %and to <2 x double>
    109   %max_is_x = fcmp oge <2 x double> %floatcast, %arg2
    110   %max = select <2 x i1> %max_is_x, <2 x double> %floatcast, <2 x double> %arg2
    111   ret <2 x double> %max
    112 }
    113 
    114 ; CHECK-LABEL: ExeDepsFix_broadcastsd256_inreg
    115 ; CHECK: broadcastsd
    116 ; CHECK: vandpd
    117 ; CHECK: vmaxpd
    118 ; CHECK: ret
    119 define <4 x double> @ExeDepsFix_broadcastsd256_inreg(<4 x double> %arg, <4 x double> %arg2, i64 %broadcastvalue) {
    120   %bitcast = bitcast <4 x double> %arg to <4 x i64>
    121   %in = insertelement <4 x i64> undef, i64 %broadcastvalue, i32 0
    122   %mask = shufflevector <4 x i64> %in, <4 x i64> undef, <4 x i32> zeroinitializer
    123   %and = and <4 x i64> %bitcast, %mask
    124   %floatcast = bitcast <4 x i64> %and to <4 x double>
    125   %max_is_x = fcmp oge <4 x double> %floatcast, %arg2
    126   %max = select <4 x i1> %max_is_x, <4 x double> %floatcast, <4 x double> %arg2
    127   ret <4 x double> %max
    128 }
    129 
    130