1 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=haswell | FileCheck %s --check-prefix=HSW 2 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skylake | FileCheck %s --check-prefix=SKL 3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx | FileCheck %s --check-prefix=SKL 4 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=silvermont -mattr=+lzcnt,+bmi | FileCheck %s --check-prefix=SKL 5 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=goldmont -mattr=+lzcnt,+bmi | FileCheck %s --check-prefix=SKL 6 7 ; This tests a fix for bugzilla 33869 https://bugs.llvm.org/show_bug.cgi?id=33869 8 9 declare i32 @llvm.ctpop.i32(i32) 10 declare i64 @llvm.ctpop.i64(i64) 11 declare i64 @llvm.ctlz.i64(i64, i1) 12 declare i32 @llvm.cttz.i32(i32, i1) 13 declare i64 @llvm.cttz.i64(i64, i1) 14 declare i32 @llvm.ctlz.i32(i32, i1) 15 16 define i32 @loopdep_popcnt32(i32* nocapture %x, double* nocapture %y) nounwind { 17 entry: 18 %vx = load i32, i32* %x 19 br label %loop 20 loop: 21 %i = phi i32 [ 1, %entry ], [ %inc, %loop ] 22 %s1 = phi i32 [ %vx, %entry ], [ %s2, %loop ] 23 tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"() 24 %j = tail call i32 @llvm.ctpop.i32(i32 %i) 25 %s2 = add i32 %s1, %j 26 %inc = add nsw i32 %i, 1 27 tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"() 28 %exitcond = icmp eq i32 %inc, 156250000 29 br i1 %exitcond, label %ret, label %loop 30 ret: 31 ret i32 %s2 32 33 ;HSW-LABEL:@loopdep_popcnt32 34 ;HSW: xorl [[GPR0:%e[a-d]x]], [[GPR0]] 35 ;HSW-NEXT: popcntl {{.*}}, [[GPR0]] 36 37 ;SKL-LABEL:@loopdep_popcnt32 38 ;SKL: xorl [[GPR0:%e[a-d]x]], [[GPR0]] 39 ;SKL-NEXT: popcntl {{.*}}, [[GPR0]] 40 } 41 42 define i64 @loopdep_popcnt64(i64* nocapture %x, double* nocapture %y) nounwind { 43 entry: 44 %vx = load i64, i64* %x 45 br label %loop 46 loop: 47 %i = phi i64 [ 1, %entry ], [ %inc, %loop ] 48 %s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ] 49 tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"() 50 %j = tail call i64 @llvm.ctpop.i64(i64 %i) 51 %s2 = add i64 %s1, %j 52 %inc = add nsw i64 %i, 1 53 tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"() 54 %exitcond = icmp eq i64 %inc, 156250000 55 br i1 %exitcond, label %ret, label %loop 56 ret: 57 ret i64 %s2 58 59 ;HSW-LABEL:@loopdep_popcnt64 60 ;HSW: xorl %e[[GPR0:[a-d]x]], %e[[GPR0]] 61 ;HSW-NEXT: popcntq {{.*}}, %r[[GPR0]] 62 63 ;SKL-LABEL:@loopdep_popcnt64 64 ;SKL: xorl %e[[GPR0:[a-d]x]], %e[[GPR0]] 65 ;SKL-NEXT: popcntq {{.*}}, %r[[GPR0]] 66 } 67 68 define i32 @loopdep_tzct32(i32* nocapture %x, double* nocapture %y) nounwind { 69 entry: 70 %vx = load i32, i32* %x 71 br label %loop 72 loop: 73 %i = phi i32 [ 1, %entry ], [ %inc, %loop ] 74 %s1 = phi i32 [ %vx, %entry ], [ %s2, %loop ] 75 tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"() 76 %j = call i32 @llvm.cttz.i32(i32 %i, i1 true) 77 %s2 = add i32 %s1, %j 78 %inc = add nsw i32 %i, 1 79 tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"() 80 %exitcond = icmp eq i32 %inc, 156250000 81 br i1 %exitcond, label %ret, label %loop 82 ret: 83 ret i32 %s2 84 85 ;HSW-LABEL:@loopdep_tzct32 86 ;HSW: xorl [[GPR0:%e[a-d]x]], [[GPR0]] 87 ;HSW-NEXT: tzcntl {{.*}}, [[GPR0]] 88 89 ; This false dependecy issue was fixed in Skylake 90 ;SKL-LABEL:@loopdep_tzct32 91 ;SKL-NOT: xor 92 ;SKL: tzcntl 93 } 94 95 define i64 @loopdep_tzct64(i64* nocapture %x, double* nocapture %y) nounwind { 96 entry: 97 %vx = load i64, i64* %x 98 br label %loop 99 loop: 100 %i = phi i64 [ 1, %entry ], [ %inc, %loop ] 101 %s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ] 102 tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"() 103 %j = tail call i64 @llvm.cttz.i64(i64 %i, i1 true) 104 %s2 = add i64 %s1, %j 105 %inc = add nsw i64 %i, 1 106 tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"() 107 %exitcond = icmp eq i64 %inc, 156250000 108 br i1 %exitcond, label %ret, label %loop 109 ret: 110 ret i64 %s2 111 112 ;HSW-LABEL:@loopdep_tzct64 113 ;HSW: xorl %e[[GPR0:[a-d]x]], %e[[GPR0]] 114 ;HSW-NEXT: tzcntq {{.*}}, %r[[GPR0]] 115 116 ; This false dependecy issue was fixed in Skylake 117 ;SKL-LABEL:@loopdep_tzct64 118 ;SKL-NOT: xor 119 ;SKL: tzcntq 120 } 121 122 define i32 @loopdep_lzct32(i32* nocapture %x, double* nocapture %y) nounwind { 123 entry: 124 %vx = load i32, i32* %x 125 br label %loop 126 loop: 127 %i = phi i32 [ 1, %entry ], [ %inc, %loop ] 128 %s1 = phi i32 [ %vx, %entry ], [ %s2, %loop ] 129 tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"() 130 %j = call i32 @llvm.ctlz.i32(i32 %i, i1 true) 131 %s2 = add i32 %s1, %j 132 %inc = add nsw i32 %i, 1 133 tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"() 134 %exitcond = icmp eq i32 %inc, 156250000 135 br i1 %exitcond, label %ret, label %loop 136 ret: 137 ret i32 %s2 138 139 ;HSW-LABEL:@loopdep_lzct32 140 ;HSW: xorl [[GPR0:%e[a-d]x]], [[GPR0]] 141 ;HSW-NEXT: lzcntl {{.*}}, [[GPR0]] 142 143 ; This false dependecy issue was fixed in Skylake 144 ;SKL-LABEL:@loopdep_lzct32 145 ;SKL-NOT: xor 146 ;SKL: lzcntl 147 } 148 149 define i64 @loopdep_lzct64(i64* nocapture %x, double* nocapture %y) nounwind { 150 entry: 151 %vx = load i64, i64* %x 152 br label %loop 153 loop: 154 %i = phi i64 [ 1, %entry ], [ %inc, %loop ] 155 %s1 = phi i64 [ %vx, %entry ], [ %s2, %loop ] 156 tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"() 157 %j = tail call i64 @llvm.ctlz.i64(i64 %i, i1 true) 158 %s2 = add i64 %s1, %j 159 %inc = add nsw i64 %i, 1 160 tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{esi},~{edi},~{ebp},~{dirflag},~{fpsr},~{flags}"() 161 %exitcond = icmp eq i64 %inc, 156250000 162 br i1 %exitcond, label %ret, label %loop 163 ret: 164 ret i64 %s2 165 166 ;HSW-LABEL:@loopdep_lzct64 167 ;HSW: xorl %e[[GPR0:[a-d]x]], %e[[GPR0]] 168 ;HSW-NEXT: lzcntq {{.*}}, %r[[GPR0]] 169 170 ; This false dependecy issue was fixed in Skylake 171 ;SKL-LABEL:@loopdep_lzct64 172 ;SKL-NOT: xor 173 ;SKL: lzcntq 174 } 175