Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 -verify-machineinstrs | FileCheck %s
      3 
      4 ; After tail duplication, two copies in an early exit BB can be cancelled out.
      5 ; rdar://10640363
      6 define i32 @t1(i32 %a, i32 %b) nounwind  {
      7 ; CHECK-LABEL: t1:
      8 ; CHECK:       ## %bb.0: ## %entry
      9 ; CHECK-NEXT:    movl %edi, %eax
     10 ; CHECK-NEXT:    testl %esi, %esi
     11 ; CHECK-NEXT:    je LBB0_1
     12 ; CHECK-NEXT:  ## %bb.2: ## %while.body.preheader
     13 ; CHECK-NEXT:    movl %esi, %edx
     14 ; CHECK-NEXT:    .p2align 4, 0x90
     15 ; CHECK-NEXT:  LBB0_3: ## %while.body
     16 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
     17 ; CHECK-NEXT:    movl %edx, %ecx
     18 ; CHECK-NEXT:    cltd
     19 ; CHECK-NEXT:    idivl %ecx
     20 ; CHECK-NEXT:    testl %edx, %edx
     21 ; CHECK-NEXT:    movl %ecx, %eax
     22 ; CHECK-NEXT:    jne LBB0_3
     23 ; CHECK-NEXT:  ## %bb.4: ## %while.end
     24 ; CHECK-NEXT:    movl %ecx, %eax
     25 ; CHECK-NEXT:    retq
     26 ; CHECK-NEXT:  LBB0_1:
     27 ; CHECK-NEXT:    retq
     28 entry:
     29   %cmp1 = icmp eq i32 %b, 0
     30   br i1 %cmp1, label %while.end, label %while.body
     31 
     32 while.body:                                       ; preds = %entry, %while.body
     33   %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ]
     34   %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ]
     35   %rem = srem i32 %a.addr.03, %b.addr.02
     36   %cmp = icmp eq i32 %rem, 0
     37   br i1 %cmp, label %while.end, label %while.body
     38 
     39 while.end:                                        ; preds = %while.body, %entry
     40   %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ]
     41   ret i32 %a.addr.0.lcssa
     42 }
     43 
     44 ; Two movdqa (from phi-elimination) in the entry BB cancels out.
     45 ; rdar://10428165
     46 define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
     47 ; CHECK-LABEL: t2:
     48 ; CHECK:       ## %bb.0: ## %entry
     49 ; CHECK-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3]
     50 ; CHECK-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7]
     51 ; CHECK-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
     52 ; CHECK-NEXT:    retq
     53 entry:
     54   %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
     55   ret <8 x i16> %tmp8
     56 }
     57 
     58 define i32 @t3(i64 %a, i64 %b) nounwind  {
     59 ; CHECK-LABEL: t3:
     60 ; CHECK:       ## %bb.0: ## %entry
     61 ; CHECK-NEXT:    movq %rdi, %rax
     62 ; CHECK-NEXT:    testq %rsi, %rsi
     63 ; CHECK-NEXT:    je LBB2_1
     64 ; CHECK-NEXT:  ## %bb.2: ## %while.body.preheader
     65 ; CHECK-NEXT:    movq %rsi, %rdx
     66 ; CHECK-NEXT:    .p2align 4, 0x90
     67 ; CHECK-NEXT:  LBB2_3: ## %while.body
     68 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
     69 ; CHECK-NEXT:    movq %rdx, %rcx
     70 ; CHECK-NEXT:    cqto
     71 ; CHECK-NEXT:    idivq %rcx
     72 ; CHECK-NEXT:    testq %rdx, %rdx
     73 ; CHECK-NEXT:    movq %rcx, %rax
     74 ; CHECK-NEXT:    jne LBB2_3
     75 ; CHECK-NEXT:  ## %bb.4: ## %while.end
     76 ; CHECK-NEXT:    movl %ecx, %eax
     77 ; CHECK-NEXT:    retq
     78 ; CHECK-NEXT:  LBB2_1:
     79 ; CHECK-NEXT:    retq
     80 entry:
     81   %cmp1 = icmp eq i64 %b, 0
     82   br i1 %cmp1, label %while.end, label %while.body
     83 
     84 while.body:                                       ; preds = %entry, %while.body
     85   %a.addr.03 = phi i64 [ %b.addr.02, %while.body ], [ %a, %entry ]
     86   %b.addr.02 = phi i64 [ %rem, %while.body ], [ %b, %entry ]
     87   %rem = srem i64 %a.addr.03, %b.addr.02
     88   %cmp = icmp eq i64 %rem, 0
     89   br i1 %cmp, label %while.end, label %while.body
     90 
     91 while.end:                                        ; preds = %while.body, %entry
     92   %a.addr.0.lcssa = phi i64 [ %a, %entry ], [ %b.addr.02, %while.body ]
     93   %t = trunc i64 %a.addr.0.lcssa to i32
     94   ret i32 %t
     95 }
     96 
     97 ; Check that copy propagation does not kill thing like:
     98 ; dst = copy src <-- do not kill that.
     99 ; ... = op1 undef dst
    100 ; ... = op2 dst <-- this is used here.
    101 define <16 x float> @foo(<16 x float> %x) {
    102 ; CHECK-LABEL: foo:
    103 ; CHECK:       ## %bb.0: ## %bb
    104 ; CHECK-NEXT:    movaps %xmm3, %xmm9
    105 ; CHECK-NEXT:    movaps %xmm2, %xmm8
    106 ; CHECK-NEXT:    movaps %xmm1, %xmm6
    107 ; CHECK-NEXT:    movaps %xmm0, %xmm7
    108 ; CHECK-NEXT:    xorps %xmm0, %xmm0
    109 ; CHECK-NEXT:    movaps %xmm3, %xmm1
    110 ; CHECK-NEXT:    cmpltps %xmm0, %xmm1
    111 ; CHECK-NEXT:    movaps %xmm1, %xmm4
    112 ; CHECK-NEXT:    orps {{.*}}(%rip), %xmm4
    113 ; CHECK-NEXT:    movaps %xmm4, %xmm10
    114 ; CHECK-NEXT:    andnps %xmm1, %xmm10
    115 ; CHECK-NEXT:    movaps %xmm2, %xmm1
    116 ; CHECK-NEXT:    cmpltps %xmm0, %xmm1
    117 ; CHECK-NEXT:    movaps {{.*#+}} xmm11 = [9,10,11,12]
    118 ; CHECK-NEXT:    movaps %xmm1, %xmm3
    119 ; CHECK-NEXT:    orps %xmm11, %xmm3
    120 ; CHECK-NEXT:    movaps %xmm3, %xmm14
    121 ; CHECK-NEXT:    andnps %xmm1, %xmm14
    122 ; CHECK-NEXT:    cvttps2dq %xmm6, %xmm12
    123 ; CHECK-NEXT:    cmpltps %xmm0, %xmm6
    124 ; CHECK-NEXT:    movaps {{.*#+}} xmm13 = [5,6,7,8]
    125 ; CHECK-NEXT:    movaps %xmm6, %xmm2
    126 ; CHECK-NEXT:    orps %xmm13, %xmm2
    127 ; CHECK-NEXT:    movaps %xmm2, %xmm5
    128 ; CHECK-NEXT:    andnps %xmm6, %xmm5
    129 ; CHECK-NEXT:    cvttps2dq %xmm7, %xmm6
    130 ; CHECK-NEXT:    cmpltps %xmm0, %xmm7
    131 ; CHECK-NEXT:    movaps {{.*#+}} xmm15 = [1,2,3,4]
    132 ; CHECK-NEXT:    movaps %xmm7, %xmm0
    133 ; CHECK-NEXT:    orps %xmm15, %xmm0
    134 ; CHECK-NEXT:    movaps %xmm0, %xmm1
    135 ; CHECK-NEXT:    andnps %xmm7, %xmm1
    136 ; CHECK-NEXT:    andps %xmm15, %xmm0
    137 ; CHECK-NEXT:    cvtdq2ps %xmm6, %xmm6
    138 ; CHECK-NEXT:    andps %xmm6, %xmm0
    139 ; CHECK-NEXT:    movaps {{.*#+}} xmm6 = [1,1,1,1]
    140 ; CHECK-NEXT:    andps %xmm6, %xmm1
    141 ; CHECK-NEXT:    orps %xmm1, %xmm0
    142 ; CHECK-NEXT:    andps %xmm13, %xmm2
    143 ; CHECK-NEXT:    cvtdq2ps %xmm12, %xmm1
    144 ; CHECK-NEXT:    andps %xmm1, %xmm2
    145 ; CHECK-NEXT:    andps %xmm6, %xmm5
    146 ; CHECK-NEXT:    orps %xmm5, %xmm2
    147 ; CHECK-NEXT:    andps %xmm11, %xmm3
    148 ; CHECK-NEXT:    cvttps2dq %xmm8, %xmm1
    149 ; CHECK-NEXT:    cvtdq2ps %xmm1, %xmm1
    150 ; CHECK-NEXT:    andps %xmm1, %xmm3
    151 ; CHECK-NEXT:    andps %xmm6, %xmm14
    152 ; CHECK-NEXT:    orps %xmm14, %xmm3
    153 ; CHECK-NEXT:    andps %xmm6, %xmm10
    154 ; CHECK-NEXT:    andps {{.*}}(%rip), %xmm4
    155 ; CHECK-NEXT:    cvttps2dq %xmm9, %xmm1
    156 ; CHECK-NEXT:    cvtdq2ps %xmm1, %xmm1
    157 ; CHECK-NEXT:    andps %xmm1, %xmm4
    158 ; CHECK-NEXT:    orps %xmm10, %xmm4
    159 ; CHECK-NEXT:    movaps %xmm2, %xmm1
    160 ; CHECK-NEXT:    movaps %xmm3, %xmm2
    161 ; CHECK-NEXT:    movaps %xmm4, %xmm3
    162 ; CHECK-NEXT:    retq
    163 bb:
    164   %v3 = icmp slt <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, zeroinitializer
    165   %v14 = zext <16 x i1> %v3 to <16 x i32>
    166   %v16 = fcmp olt <16 x float> %x, zeroinitializer
    167   %v17 = sext <16 x i1> %v16 to <16 x i32>
    168   %v18 = zext <16 x i1> %v16 to <16 x i32>
    169   %v19 = xor <16 x i32> %v14, %v18
    170   %v20 = or <16 x i32> %v17, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
    171   %v21 = fptosi <16 x float> %x to <16 x i32>
    172   %v22 = sitofp <16 x i32> %v21 to <16 x float>
    173   %v69 = fcmp ogt <16 x float> %v22, zeroinitializer
    174   %v75 = and <16 x i1> %v69, %v3
    175   %v77 = bitcast <16 x float> %v22 to <16 x i32>
    176   %v79 = sext <16 x i1> %v75 to <16 x i32>
    177   %v80 = and <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, %v79
    178   %v81 = xor <16 x i32> %v77, %v80
    179   %v82 = and <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, %v81
    180   %v83 = xor <16 x i32> %v19, %v82
    181   %v84 = and <16 x i32> %v83, %v20
    182   %v85 = xor <16 x i32> %v19, %v84
    183   %v86 = bitcast <16 x i32> %v85 to <16 x float>
    184   ret <16 x float> %v86
    185 }
    186