Home | History | Annotate | Download | only in X86
      1 ; RUN: opt -slp-vectorizer < %s -S | FileCheck %s
      2 
      3 ; Verify that the SLP vectorizer is able to figure out that commutativity
      4 ; offers the possibility to splat/broadcast %c and thus make it profitable
      5 ; to vectorize this case
      6 
      7 
      8 ; ModuleID = 'bugpoint-reduced-simplified.bc'
      9 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
     10 target triple = "x86_64-apple-macosx10.11.0"
     11 
     12 @cle = external unnamed_addr global [32 x i8], align 16
     13 @cle32 = external unnamed_addr global [32 x i32], align 16
     14 
     15 
     16 ; Check that we correctly detect a splat/broadcast by leveraging the
     17 ; commutativity property of `xor`.
     18 
     19 ; CHECK-LABEL:  @splat
     20 ; CHECK:  store <16 x i8>
     21 define void @splat(i8 %a, i8 %b, i8 %c) {
     22   %1 = xor i8 %c, %a
     23   store i8 %1, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 0), align 16
     24   %2 = xor i8 %a, %c
     25   store i8 %2, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 1)
     26   %3 = xor i8 %a, %c
     27   store i8 %3, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 2)
     28   %4 = xor i8 %a, %c
     29   store i8 %4, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 3)
     30   %5 = xor i8 %c, %a
     31   store i8 %5, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 4)
     32   %6 = xor i8 %c, %b
     33   store i8 %6, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 5)
     34   %7 = xor i8 %c, %a
     35   store i8 %7, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 6)
     36   %8 = xor i8 %c, %b
     37   store i8 %8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 7)
     38   %9 = xor i8 %a, %c
     39   store i8 %9, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 8)
     40   %10 = xor i8 %a, %c
     41   store i8 %10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 9)
     42   %11 = xor i8 %a, %c
     43   store i8 %11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 10)
     44   %12 = xor i8 %a, %c
     45   store i8 %12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 11)
     46   %13 = xor i8 %a, %c
     47   store i8 %13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 12)
     48   %14 = xor i8 %a, %c
     49   store i8 %14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 13)
     50   %15 = xor i8 %a, %c
     51   store i8 %15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 14)
     52   %16 = xor i8 %a, %c
     53   store i8 %16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @cle, i64 0, i64 15)
     54   ret void
     55 }
     56 
     57 
     58 
     59 ; Check that we correctly detect that we can have the same opcode on one side by
     60 ; leveraging the commutativity property of `xor`.
     61 
     62 ; CHECK-LABEL:  @same_opcode_on_one_side
     63 ; CHECK:  store <4 x i32>
     64 define void @same_opcode_on_one_side(i32 %a, i32 %b, i32 %c) {
     65   %add1 = add i32 %c, %a
     66   %add2 = add i32 %c, %a
     67   %add3 = add i32 %a, %c
     68   %add4 = add i32 %c, %a
     69   %1 = xor i32 %add1, %a
     70   store i32 %1, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 0), align 16
     71   %2 = xor i32 %b, %add2
     72   store i32 %2, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 1)
     73   %3 = xor i32 %c, %add3
     74   store i32 %3, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 2)
     75   %4 = xor i32 %a, %add4
     76   store i32 %4, i32* getelementptr inbounds ([32 x i32], [32 x i32]* @cle32, i64 0, i64 3)
     77   ret void
     78 }
     79