Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=nocona -verify-machineinstrs < %s | FileCheck %s
      2 
      3 ; After tail duplication, two copies in an early exit BB can be cancelled out.
      4 ; rdar://10640363
      5 define i32 @t1(i32 %a, i32 %b) nounwind  {
      6 entry:
      7 ; CHECK-LABEL: t1:
      8 ; CHECK: je [[LABEL:.*BB.*]]
      9   %cmp1 = icmp eq i32 %b, 0
     10   br i1 %cmp1, label %while.end, label %while.body
     11 
     12 ; CHECK: [[LABEL]]:
     13 ; CHECK-NOT: mov
     14 ; CHECK: ret
     15 
     16 while.body:                                       ; preds = %entry, %while.body
     17   %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ]
     18   %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ]
     19   %rem = srem i32 %a.addr.03, %b.addr.02
     20   %cmp = icmp eq i32 %rem, 0
     21   br i1 %cmp, label %while.end, label %while.body
     22 
     23 while.end:                                        ; preds = %while.body, %entry
     24   %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ]
     25   ret i32 %a.addr.0.lcssa
     26 }
     27 
     28 ; Two movdqa (from phi-elimination) in the entry BB cancels out.
     29 ; rdar://10428165
     30 define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
     31 entry:
     32 ; CHECK-LABEL: t2:
     33 ; CHECK-NOT: movdqa
     34   %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
     35   ret <8 x i16> %tmp8
     36 }
     37 
     38 define i32 @t3(i64 %a, i64 %b) nounwind  {
     39 entry:
     40 ; CHECK-LABEL: t3:
     41 ; CHECK: je [[LABEL:.*BB.*]]
     42   %cmp1 = icmp eq i64 %b, 0
     43   br i1 %cmp1, label %while.end, label %while.body
     44 
     45 ; CHECK: [[LABEL]]:
     46 ; CHECK-NOT: mov
     47 ; CHECK: ret
     48 
     49 while.body:                                       ; preds = %entry, %while.body
     50   %a.addr.03 = phi i64 [ %b.addr.02, %while.body ], [ %a, %entry ]
     51   %b.addr.02 = phi i64 [ %rem, %while.body ], [ %b, %entry ]
     52   %rem = srem i64 %a.addr.03, %b.addr.02
     53   %cmp = icmp eq i64 %rem, 0
     54   br i1 %cmp, label %while.end, label %while.body
     55 
     56 while.end:                                        ; preds = %while.body, %entry
     57   %a.addr.0.lcssa = phi i64 [ %a, %entry ], [ %b.addr.02, %while.body ]
     58   %t = trunc i64 %a.addr.0.lcssa to i32
     59   ret i32 %t
     60 }
     61 
     62 ; Check that copy propagation does not kill thing like:
     63 ; dst = copy src <-- do not kill that.
     64 ; ... = op1 dst<undef>
     65 ; ... = op2 dst <-- this is used here.
     66 ;
     67 ; CHECK-LABEL: foo:
     68 ; CHECK: psllw $7,
     69 ; CHECK: psllw $7, [[SRC1:%xmm[0-9]+]]
     70 ; CHECK-NEXT: pand {{.*}}(%rip), [[SRC1]]
     71 ; CHECK-NEXT: pcmpgtb [[SRC1]], [[SRC2:%xmm[0-9]+]]
     72 ; CHECK-NEXT: pand %xmm{{[0-9]+}}, [[SRC2]]
     73 ; CHECK-NEXT: movdqa [[SRC2]], [[CPY1:%xmm[0-9]+]]
     74 ; CHECK-NEXT: punpcklbw %xmm{{[0-9]+}}, [[CPY1]]
     75 ; Check that CPY1 is not redefined.
     76 ; CHECK-NOT:  , [[CPY1]]
     77 ; CHECK: punpckhwd %xmm{{[0-9]+}}, [[CPY1]]
     78 ; CHECK-NEXT: pslld $31, [[CPY1]]
     79 ; CHECK-NEXT: psrad $31, [[CPY1]]
     80 ; CHECK: punpckhbw %xmm{{[0-9]+}}, [[CPY2:%xmm[0-9]+]]
     81 ; Check that CPY2 is not redefined.
     82 ; CHECK-NOT:  , [[CPY2]]
     83 ; CHECK: punpckhwd %xmm{{[0-9]+}}, [[CPY2]]
     84 ; CHECK-NEXT: pslld $31, [[CPY2]]
     85 ; CHECK-NEXT: psrad $31, [[CPY2]]
     86 define <16 x float> @foo(<16 x float> %x) {
     87 bb:
     88   %v3 = icmp slt <16 x i32> undef, zeroinitializer
     89   %v14 = zext <16 x i1> %v3 to <16 x i32>
     90   %v16 = fcmp olt <16 x float> %x, zeroinitializer
     91   %v17 = sext <16 x i1> %v16 to <16 x i32>
     92   %v18 = zext <16 x i1> %v16 to <16 x i32>
     93   %v19 = xor <16 x i32> %v14, %v18
     94   %v20 = or <16 x i32> %v17, undef
     95   %v21 = fptosi <16 x float> %x to <16 x i32>
     96   %v22 = sitofp <16 x i32> %v21 to <16 x float>
     97   %v69 = fcmp ogt <16 x float> %v22, zeroinitializer
     98   %v75 = and <16 x i1> %v69, %v3
     99   %v77 = bitcast <16 x float> %v22 to <16 x i32>
    100   %v79 = sext <16 x i1> %v75 to <16 x i32>
    101   %v80 = and <16 x i32> undef, %v79
    102   %v81 = xor <16 x i32> %v77, %v80
    103   %v82 = and <16 x i32> undef, %v81
    104   %v83 = xor <16 x i32> %v19, %v82
    105   %v84 = and <16 x i32> %v83, %v20
    106   %v85 = xor <16 x i32> %v19, %v84
    107   %v86 = bitcast <16 x i32> %v85 to <16 x float>
    108   ret <16 x float> %v86
    109 }
    110