Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell | FileCheck %s --check-prefix=HSW
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake | FileCheck %s --check-prefix=SKL
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx | FileCheck %s --check-prefix=SKL
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=silvermont -mattr=+lzcnt,+bmi | FileCheck %s --check-prefix=SKL
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=goldmont -mattr=+lzcnt,+bmi | FileCheck %s --check-prefix=SKL
      6 
      7 ; This tests a fix for bugzilla 33869 https://bugs.llvm.org/show_bug.cgi?id=33869
      8 
      9 declare i32 @llvm.ctpop.i32(i32)
     10 declare i64 @llvm.ctpop.i64(i64)
     11 declare i64 @llvm.ctlz.i64(i64, i1)
     12 declare i32 @llvm.cttz.i32(i32, i1)
     13 declare i64 @llvm.cttz.i64(i64, i1)
     14 declare i32 @llvm.ctlz.i32(i32, i1)
     15 
     16 define i32 @loopdep_popcnt32(i32* nocapture %x, double* nocapture %y) nounwind {
     17 entry:
     18   %vx = load i32, i32* %x
     19   br label %loop
     20 loop:
     21   %i = phi i32 [ 1, %entry ], [ %inc, %loop ]
     22   %s1 = phi i32 [ %vx, %entry ], [ %s2, %loop ]
     23   tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
     24   %j = tail call i32 @llvm.ctpop.i32(i32 %i)
     25   %s2 = add i32 %s1, %j
     26   %inc = add nsw i32 %i, 1
     27   tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
     28   %exitcond = icmp eq i32 %inc, 156250000
     29   br i1 %exitcond, label %ret, label %loop
     30 ret:
     31   ret i32 %s2
     32 
     33 ;HSW-LABEL:@loopdep_popcnt32
     34 ;HSW: xorl [[GPR0:%e[a-d]x]], [[GPR0]]
     35 ;HSW-NEXT: popcntl {{.*}}, [[GPR0]]
     36 
     37 ;SKL-LABEL:@loopdep_popcnt32
     38 ;SKL: xorl [[GPR0:%e[a-d]x]], [[GPR0]]
     39 ;SKL-NEXT: popcntl {{.*}}, [[GPR0]]
     40 }
     41 
     42 define i64 @loopdep_popcnt64(i64* nocapture %x, double* nocapture %y) nounwind {
     43 entry:
     44   %vx = load i64, i64* %x
     45   br label %loop
     46 loop:
     47   %i = phi i64 [ 1, %entry ], [ %inc, %loop ]
     48   %s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ]
     49   tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
     50   %j = tail call i64 @llvm.ctpop.i64(i64 %i)
     51   %s2 = add i64 %s1, %j
     52   %inc = add nsw i64 %i, 1
     53   tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
     54   %exitcond = icmp eq i64 %inc, 156250000
     55   br i1 %exitcond, label %ret, label %loop
     56 ret:
     57   ret i64 %s2
     58 
     59 ;HSW-LABEL:@loopdep_popcnt64
     60 ;HSW: xorl %e[[GPR0:[a-d]x]], %e[[GPR0]]
     61 ;HSW-NEXT: popcntq {{.*}}, %r[[GPR0]]
     62 
     63 ;SKL-LABEL:@loopdep_popcnt64
     64 ;SKL: xorl %e[[GPR0:[a-d]x]], %e[[GPR0]]
     65 ;SKL-NEXT: popcntq {{.*}}, %r[[GPR0]]
     66 }
     67 
     68 define i32 @loopdep_tzct32(i32* nocapture %x, double* nocapture %y) nounwind {
     69 entry:
     70   %vx = load i32, i32* %x
     71   br label %loop
     72 loop:
     73   %i = phi i32 [ 1, %entry ], [ %inc, %loop ]
     74   %s1 = phi i32 [ %vx, %entry ], [ %s2, %loop ]
     75   tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
     76   %j = call i32 @llvm.cttz.i32(i32 %i, i1 true)
     77   %s2 = add i32 %s1, %j
     78   %inc = add nsw i32 %i, 1
     79   tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
     80   %exitcond = icmp eq i32 %inc, 156250000
     81   br i1 %exitcond, label %ret, label %loop
     82 ret:
     83   ret i32 %s2
     84 
     85 ;HSW-LABEL:@loopdep_tzct32
     86 ;HSW: xorl [[GPR0:%e[a-d]x]], [[GPR0]]
     87 ;HSW-NEXT: tzcntl {{.*}}, [[GPR0]]
     88 
     89 ; This false dependecy issue was fixed in Skylake
     90 ;SKL-LABEL:@loopdep_tzct32
     91 ;SKL-NOT: xor
     92 ;SKL: tzcntl
     93 }
     94 
     95 define i64 @loopdep_tzct64(i64* nocapture %x, double* nocapture %y) nounwind {
     96 entry:
     97   %vx = load i64, i64* %x
     98   br label %loop
     99 loop:
    100   %i = phi i64 [ 1, %entry ], [ %inc, %loop ]
    101   %s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ]
    102   tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
    103   %j = tail call i64 @llvm.cttz.i64(i64 %i, i1 true)
    104   %s2 = add i64 %s1, %j
    105   %inc = add nsw i64 %i, 1
    106   tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
    107   %exitcond = icmp eq i64 %inc, 156250000
    108   br i1 %exitcond, label %ret, label %loop
    109 ret:
    110   ret i64 %s2
    111 
    112 ;HSW-LABEL:@loopdep_tzct64
    113 ;HSW: xorl %e[[GPR0:[a-d]x]], %e[[GPR0]]
    114 ;HSW-NEXT: tzcntq {{.*}}, %r[[GPR0]]
    115 
    116 ; This false dependecy issue was fixed in Skylake
    117 ;SKL-LABEL:@loopdep_tzct64
    118 ;SKL-NOT: xor
    119 ;SKL: tzcntq
    120 }
    121 
    122 define i32 @loopdep_lzct32(i32* nocapture %x, double* nocapture %y) nounwind {
    123 entry:
    124   %vx = load i32, i32* %x
    125   br label %loop
    126 loop:
    127   %i = phi i32 [ 1, %entry ], [ %inc, %loop ]
    128   %s1 = phi i32 [ %vx, %entry ], [ %s2, %loop ]
    129   tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
    130   %j = call i32 @llvm.ctlz.i32(i32 %i, i1 true)
    131   %s2 = add i32 %s1, %j
    132   %inc = add nsw i32 %i, 1
    133   tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
    134   %exitcond = icmp eq i32 %inc, 156250000
    135   br i1 %exitcond, label %ret, label %loop
    136 ret:
    137   ret i32 %s2
    138 
    139 ;HSW-LABEL:@loopdep_lzct32
    140 ;HSW: xorl [[GPR0:%e[a-d]x]], [[GPR0]]
    141 ;HSW-NEXT: lzcntl {{.*}}, [[GPR0]]
    142 
    143 ; This false dependecy issue was fixed in Skylake
    144 ;SKL-LABEL:@loopdep_lzct32
    145 ;SKL-NOT: xor
    146 ;SKL: lzcntl
    147 }
    148 
    149 define i64 @loopdep_lzct64(i64* nocapture %x, double* nocapture %y) nounwind {
    150 entry:
    151   %vx = load i64, i64* %x
    152   br label %loop
    153 loop:
    154   %i = phi i64 [ 1, %entry ], [ %inc, %loop ]
    155   %s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ]
    156   tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
    157   %j = tail call i64 @llvm.ctlz.i64(i64 %i, i1 true)
    158   %s2 = add i64 %s1, %j
    159   %inc = add nsw i64 %i, 1
    160   tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
    161   %exitcond = icmp eq i64 %inc, 156250000
    162   br i1 %exitcond, label %ret, label %loop
    163 ret:
    164   ret i64 %s2
    165 
    166 ;HSW-LABEL:@loopdep_lzct64
    167 ;HSW: xorl %e[[GPR0:[a-d]x]], %e[[GPR0]]
    168 ;HSW-NEXT: lzcntq {{.*}}, %r[[GPR0]]
    169 
    170 ; This false dependecy issue was fixed in Skylake
    171 ;SKL-LABEL:@loopdep_lzct64
    172 ;SKL-NOT: xor
    173 ;SKL: lzcntq
    174 }
    175