1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=X86,X86-NOBMI 3 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi2 | FileCheck %s --check-prefixes=X86,X86-BMI 4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=X64,X64-NOBMI 5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi2 | FileCheck %s --check-prefixes=X64,X64-BMI 6 7 ; PR1198 8 9 define i64 @foo(i64 %x, i64 %y) nounwind { 10 ; X86-NOBMI-LABEL: foo: 11 ; X86-NOBMI: # %bb.0: 12 ; X86-NOBMI-NEXT: pushl %ebp 13 ; X86-NOBMI-NEXT: pushl %ebx 14 ; X86-NOBMI-NEXT: pushl %edi 15 ; X86-NOBMI-NEXT: pushl %esi 16 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx 17 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi 18 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebp 19 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edi 20 ; X86-NOBMI-NEXT: movl %ecx, %eax 21 ; X86-NOBMI-NEXT: mull %ebp 22 ; X86-NOBMI-NEXT: movl %edx, %ebx 23 ; X86-NOBMI-NEXT: movl %esi, %eax 24 ; X86-NOBMI-NEXT: mull %ebp 25 ; X86-NOBMI-NEXT: movl %edx, %ebp 26 ; X86-NOBMI-NEXT: movl %eax, %esi 27 ; X86-NOBMI-NEXT: addl %ebx, %esi 28 ; X86-NOBMI-NEXT: adcl $0, %ebp 29 ; X86-NOBMI-NEXT: movl %ecx, %eax 30 ; X86-NOBMI-NEXT: mull %edi 31 ; X86-NOBMI-NEXT: movl %edx, %ebx 32 ; X86-NOBMI-NEXT: addl %esi, %eax 33 ; X86-NOBMI-NEXT: adcl %ebp, %ebx 34 ; X86-NOBMI-NEXT: setb %al 35 ; X86-NOBMI-NEXT: movzbl %al, %ecx 36 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 37 ; X86-NOBMI-NEXT: mull %edi 38 ; X86-NOBMI-NEXT: movl %edx, %esi 39 ; X86-NOBMI-NEXT: movl %eax, %ebp 40 ; X86-NOBMI-NEXT: addl %ebx, %ebp 41 ; X86-NOBMI-NEXT: adcl %ecx, %esi 42 ; X86-NOBMI-NEXT: xorl %ecx, %ecx 43 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 44 ; X86-NOBMI-NEXT: mull %ecx 45 ; X86-NOBMI-NEXT: movl %edx, %edi 46 ; X86-NOBMI-NEXT: movl %eax, %ebx 47 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 48 ; X86-NOBMI-NEXT: mull %ecx 49 ; X86-NOBMI-NEXT: addl %ebx, %eax 50 ; X86-NOBMI-NEXT: adcl %edi, %edx 51 ; X86-NOBMI-NEXT: addl %ebp, %eax 52 ; X86-NOBMI-NEXT: adcl %esi, %edx 53 ; X86-NOBMI-NEXT: popl %esi 54 ; X86-NOBMI-NEXT: popl %edi 55 ; X86-NOBMI-NEXT: popl %ebx 56 ; X86-NOBMI-NEXT: popl %ebp 57 ; X86-NOBMI-NEXT: retl 58 ; 59 ; X86-BMI-LABEL: foo: 60 ; X86-BMI: # %bb.0: 61 ; X86-BMI-NEXT: pushl %ebp 62 ; X86-BMI-NEXT: pushl %ebx 63 ; X86-BMI-NEXT: pushl %edi 64 ; X86-BMI-NEXT: pushl %esi 65 ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx 66 ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax 67 ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi 68 ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edi 69 ; X86-BMI-NEXT: movl %ecx, %edx 70 ; X86-BMI-NEXT: mulxl %esi, %edx, %ebx 71 ; X86-BMI-NEXT: movl %eax, %edx 72 ; X86-BMI-NEXT: mulxl %esi, %ebp, %eax 73 ; X86-BMI-NEXT: addl %ebx, %ebp 74 ; X86-BMI-NEXT: adcl $0, %eax 75 ; X86-BMI-NEXT: movl %ecx, %edx 76 ; X86-BMI-NEXT: mulxl %edi, %edx, %ebx 77 ; X86-BMI-NEXT: addl %ebp, %edx 78 ; X86-BMI-NEXT: adcl %eax, %ebx 79 ; X86-BMI-NEXT: setb %al 80 ; X86-BMI-NEXT: movzbl %al, %eax 81 ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx 82 ; X86-BMI-NEXT: mulxl %edi, %edi, %ebp 83 ; X86-BMI-NEXT: addl %ebx, %edi 84 ; X86-BMI-NEXT: adcl %eax, %ebp 85 ; X86-BMI-NEXT: xorl %eax, %eax 86 ; X86-BMI-NEXT: movl %esi, %edx 87 ; X86-BMI-NEXT: mulxl %eax, %ebx, %esi 88 ; X86-BMI-NEXT: movl %ecx, %edx 89 ; X86-BMI-NEXT: mulxl %eax, %eax, %edx 90 ; X86-BMI-NEXT: addl %ebx, %eax 91 ; X86-BMI-NEXT: adcl %esi, %edx 92 ; X86-BMI-NEXT: addl %edi, %eax 93 ; X86-BMI-NEXT: adcl %ebp, %edx 94 ; X86-BMI-NEXT: popl %esi 95 ; X86-BMI-NEXT: popl %edi 96 ; X86-BMI-NEXT: popl %ebx 97 ; X86-BMI-NEXT: popl %ebp 98 ; X86-BMI-NEXT: retl 99 ; 100 ; X64-NOBMI-LABEL: foo: 101 ; X64-NOBMI: # %bb.0: 102 ; X64-NOBMI-NEXT: movq %rdi, %rax 103 ; X64-NOBMI-NEXT: mulq %rsi 104 ; X64-NOBMI-NEXT: movq %rdx, %rax 105 ; X64-NOBMI-NEXT: retq 106 ; 107 ; X64-BMI-LABEL: foo: 108 ; X64-BMI: # %bb.0: 109 ; X64-BMI-NEXT: movq %rdi, %rdx 110 ; X64-BMI-NEXT: mulxq %rsi, %rcx, %rax 111 ; X64-BMI-NEXT: retq 112 %tmp0 = zext i64 %x to i128 113 %tmp1 = zext i64 %y to i128 114 %tmp2 = mul i128 %tmp0, %tmp1 115 %tmp7 = zext i32 64 to i128 116 %tmp3 = lshr i128 %tmp2, %tmp7 117 %tmp4 = trunc i128 %tmp3 to i64 118 ret i64 %tmp4 119 } 120 121 ; <rdar://problem/14096009> superfluous multiply by high part of 122 ; zero-extended value. 123 124 define i64 @mul1(i64 %n, i64* nocapture %z, i64* nocapture %x, i64 %y) nounwind { 125 ; X86-NOBMI-LABEL: mul1: 126 ; X86-NOBMI: # %bb.0: # %entry 127 ; X86-NOBMI-NEXT: pushl %ebp 128 ; X86-NOBMI-NEXT: pushl %ebx 129 ; X86-NOBMI-NEXT: pushl %edi 130 ; X86-NOBMI-NEXT: pushl %esi 131 ; X86-NOBMI-NEXT: subl $28, %esp 132 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx 133 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 134 ; X86-NOBMI-NEXT: orl %ecx, %eax 135 ; X86-NOBMI-NEXT: je .LBB1_3 136 ; X86-NOBMI-NEXT: # %bb.1: # %for.body.preheader 137 ; X86-NOBMI-NEXT: xorl %eax, %eax 138 ; X86-NOBMI-NEXT: xorl %edx, %edx 139 ; X86-NOBMI-NEXT: xorl %ebx, %ebx 140 ; X86-NOBMI-NEXT: movl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill 141 ; X86-NOBMI-NEXT: .p2align 4, 0x90 142 ; X86-NOBMI-NEXT: .LBB1_2: # %for.body 143 ; X86-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1 144 ; X86-NOBMI-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill 145 ; X86-NOBMI-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill 146 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 147 ; X86-NOBMI-NEXT: movl %eax, %ecx 148 ; X86-NOBMI-NEXT: movl (%eax,%ebx,8), %ebp 149 ; X86-NOBMI-NEXT: movl 4(%eax,%ebx,8), %esi 150 ; X86-NOBMI-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill 151 ; X86-NOBMI-NEXT: movl %ebp, %eax 152 ; X86-NOBMI-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill 153 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx 154 ; X86-NOBMI-NEXT: mull %ecx 155 ; X86-NOBMI-NEXT: movl %edx, %edi 156 ; X86-NOBMI-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill 157 ; X86-NOBMI-NEXT: movl %esi, %eax 158 ; X86-NOBMI-NEXT: mull %ecx 159 ; X86-NOBMI-NEXT: movl %edx, %ecx 160 ; X86-NOBMI-NEXT: movl %eax, %esi 161 ; X86-NOBMI-NEXT: addl %edi, %esi 162 ; X86-NOBMI-NEXT: adcl $0, %ecx 163 ; X86-NOBMI-NEXT: movl %ebp, %eax 164 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx 165 ; X86-NOBMI-NEXT: mull %edx 166 ; X86-NOBMI-NEXT: movl %edx, %ebp 167 ; X86-NOBMI-NEXT: movl %eax, %edi 168 ; X86-NOBMI-NEXT: addl %esi, %edi 169 ; X86-NOBMI-NEXT: adcl %ecx, %ebp 170 ; X86-NOBMI-NEXT: setb {{[0-9]+}}(%esp) # 1-byte Folded Spill 171 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload 172 ; X86-NOBMI-NEXT: mull {{[0-9]+}}(%esp) 173 ; X86-NOBMI-NEXT: movl %edx, %ecx 174 ; X86-NOBMI-NEXT: movl %eax, %esi 175 ; X86-NOBMI-NEXT: addl %ebp, %esi 176 ; X86-NOBMI-NEXT: movzbl {{[0-9]+}}(%esp), %eax # 1-byte Folded Reload 177 ; X86-NOBMI-NEXT: adcl %eax, %ecx 178 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax 179 ; X86-NOBMI-NEXT: xorl %edx, %edx 180 ; X86-NOBMI-NEXT: mull %edx 181 ; X86-NOBMI-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill 182 ; X86-NOBMI-NEXT: movl %eax, %ebp 183 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax # 4-byte Reload 184 ; X86-NOBMI-NEXT: xorl %edx, %edx 185 ; X86-NOBMI-NEXT: mull %edx 186 ; X86-NOBMI-NEXT: addl %ebp, %eax 187 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ebp 188 ; X86-NOBMI-NEXT: adcl {{[0-9]+}}(%esp), %edx # 4-byte Folded Reload 189 ; X86-NOBMI-NEXT: addl %esi, %eax 190 ; X86-NOBMI-NEXT: adcl %ecx, %edx 191 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload 192 ; X86-NOBMI-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload 193 ; X86-NOBMI-NEXT: adcl {{[0-9]+}}(%esp), %edi # 4-byte Folded Reload 194 ; X86-NOBMI-NEXT: adcl $0, %eax 195 ; X86-NOBMI-NEXT: adcl $0, %edx 196 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx 197 ; X86-NOBMI-NEXT: movl %esi, (%ecx,%ebx,8) 198 ; X86-NOBMI-NEXT: movl %edi, 4(%ecx,%ebx,8) 199 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx 200 ; X86-NOBMI-NEXT: movl %ecx, %edi 201 ; X86-NOBMI-NEXT: addl $1, %ebx 202 ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload 203 ; X86-NOBMI-NEXT: adcl $0, %esi 204 ; X86-NOBMI-NEXT: movl %ebx, %ecx 205 ; X86-NOBMI-NEXT: xorl %ebp, %ecx 206 ; X86-NOBMI-NEXT: movl %esi, {{[0-9]+}}(%esp) # 4-byte Spill 207 ; X86-NOBMI-NEXT: xorl %edi, %esi 208 ; X86-NOBMI-NEXT: orl %ecx, %esi 209 ; X86-NOBMI-NEXT: jne .LBB1_2 210 ; X86-NOBMI-NEXT: .LBB1_3: # %for.end 211 ; X86-NOBMI-NEXT: xorl %eax, %eax 212 ; X86-NOBMI-NEXT: xorl %edx, %edx 213 ; X86-NOBMI-NEXT: addl $28, %esp 214 ; X86-NOBMI-NEXT: popl %esi 215 ; X86-NOBMI-NEXT: popl %edi 216 ; X86-NOBMI-NEXT: popl %ebx 217 ; X86-NOBMI-NEXT: popl %ebp 218 ; X86-NOBMI-NEXT: retl 219 ; 220 ; X86-BMI-LABEL: mul1: 221 ; X86-BMI: # %bb.0: # %entry 222 ; X86-BMI-NEXT: pushl %ebp 223 ; X86-BMI-NEXT: pushl %ebx 224 ; X86-BMI-NEXT: pushl %edi 225 ; X86-BMI-NEXT: pushl %esi 226 ; X86-BMI-NEXT: subl $20, %esp 227 ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ecx 228 ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax 229 ; X86-BMI-NEXT: orl %ecx, %eax 230 ; X86-BMI-NEXT: je .LBB1_3 231 ; X86-BMI-NEXT: # %bb.1: # %for.body.preheader 232 ; X86-BMI-NEXT: xorl %ecx, %ecx 233 ; X86-BMI-NEXT: xorl %edx, %edx 234 ; X86-BMI-NEXT: xorl %edi, %edi 235 ; X86-BMI-NEXT: movl $0, {{[0-9]+}}(%esp) # 4-byte Folded Spill 236 ; X86-BMI-NEXT: .p2align 4, 0x90 237 ; X86-BMI-NEXT: .LBB1_2: # %for.body 238 ; X86-BMI-NEXT: # =>This Inner Loop Header: Depth=1 239 ; X86-BMI-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill 240 ; X86-BMI-NEXT: movl %edx, {{[0-9]+}}(%esp) # 4-byte Spill 241 ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax 242 ; X86-BMI-NEXT: movl (%eax,%edi,8), %ecx 243 ; X86-BMI-NEXT: movl 4(%eax,%edi,8), %ebx 244 ; X86-BMI-NEXT: movl %ebx, (%esp) # 4-byte Spill 245 ; X86-BMI-NEXT: movl %ecx, %edx 246 ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax 247 ; X86-BMI-NEXT: movl %eax, %esi 248 ; X86-BMI-NEXT: mulxl %eax, %eax, %ebp 249 ; X86-BMI-NEXT: movl %eax, {{[0-9]+}}(%esp) # 4-byte Spill 250 ; X86-BMI-NEXT: movl %ebx, %edx 251 ; X86-BMI-NEXT: mulxl %esi, %eax, %esi 252 ; X86-BMI-NEXT: addl %ebp, %eax 253 ; X86-BMI-NEXT: adcl $0, %esi 254 ; X86-BMI-NEXT: movl %ecx, %edx 255 ; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %ebp, %ebx 256 ; X86-BMI-NEXT: addl %eax, %ebp 257 ; X86-BMI-NEXT: adcl %esi, %ebx 258 ; X86-BMI-NEXT: movl (%esp), %edx # 4-byte Reload 259 ; X86-BMI-NEXT: mulxl {{[0-9]+}}(%esp), %eax, %esi 260 ; X86-BMI-NEXT: setb %dl 261 ; X86-BMI-NEXT: addl %ebx, %eax 262 ; X86-BMI-NEXT: movzbl %dl, %edx 263 ; X86-BMI-NEXT: adcl %edx, %esi 264 ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %edx 265 ; X86-BMI-NEXT: xorl %ebx, %ebx 266 ; X86-BMI-NEXT: mulxl %ebx, %ebx, %edx 267 ; X86-BMI-NEXT: movl %edx, (%esp) # 4-byte Spill 268 ; X86-BMI-NEXT: movl %ecx, %edx 269 ; X86-BMI-NEXT: xorl %ecx, %ecx 270 ; X86-BMI-NEXT: mulxl %ecx, %ecx, %edx 271 ; X86-BMI-NEXT: addl %ebx, %ecx 272 ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebx 273 ; X86-BMI-NEXT: adcl (%esp), %edx # 4-byte Folded Reload 274 ; X86-BMI-NEXT: addl %eax, %ecx 275 ; X86-BMI-NEXT: adcl %esi, %edx 276 ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %esi # 4-byte Reload 277 ; X86-BMI-NEXT: addl {{[0-9]+}}(%esp), %esi # 4-byte Folded Reload 278 ; X86-BMI-NEXT: adcl {{[0-9]+}}(%esp), %ebp # 4-byte Folded Reload 279 ; X86-BMI-NEXT: adcl $0, %ecx 280 ; X86-BMI-NEXT: adcl $0, %edx 281 ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax 282 ; X86-BMI-NEXT: movl %esi, (%eax,%edi,8) 283 ; X86-BMI-NEXT: movl %ebp, 4(%eax,%edi,8) 284 ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %eax 285 ; X86-BMI-NEXT: movl %eax, %esi 286 ; X86-BMI-NEXT: addl $1, %edi 287 ; X86-BMI-NEXT: movl {{[0-9]+}}(%esp), %ebp # 4-byte Reload 288 ; X86-BMI-NEXT: adcl $0, %ebp 289 ; X86-BMI-NEXT: movl %edi, %eax 290 ; X86-BMI-NEXT: xorl %esi, %eax 291 ; X86-BMI-NEXT: movl %ebp, {{[0-9]+}}(%esp) # 4-byte Spill 292 ; X86-BMI-NEXT: movl %ebp, %esi 293 ; X86-BMI-NEXT: xorl %ebx, %esi 294 ; X86-BMI-NEXT: orl %eax, %esi 295 ; X86-BMI-NEXT: jne .LBB1_2 296 ; X86-BMI-NEXT: .LBB1_3: # %for.end 297 ; X86-BMI-NEXT: xorl %eax, %eax 298 ; X86-BMI-NEXT: xorl %edx, %edx 299 ; X86-BMI-NEXT: addl $20, %esp 300 ; X86-BMI-NEXT: popl %esi 301 ; X86-BMI-NEXT: popl %edi 302 ; X86-BMI-NEXT: popl %ebx 303 ; X86-BMI-NEXT: popl %ebp 304 ; X86-BMI-NEXT: retl 305 ; 306 ; X64-NOBMI-LABEL: mul1: 307 ; X64-NOBMI: # %bb.0: # %entry 308 ; X64-NOBMI-NEXT: testq %rdi, %rdi 309 ; X64-NOBMI-NEXT: je .LBB1_3 310 ; X64-NOBMI-NEXT: # %bb.1: # %for.body.preheader 311 ; X64-NOBMI-NEXT: movq %rcx, %r8 312 ; X64-NOBMI-NEXT: movq %rdx, %r9 313 ; X64-NOBMI-NEXT: xorl %r10d, %r10d 314 ; X64-NOBMI-NEXT: xorl %ecx, %ecx 315 ; X64-NOBMI-NEXT: .p2align 4, 0x90 316 ; X64-NOBMI-NEXT: .LBB1_2: # %for.body 317 ; X64-NOBMI-NEXT: # =>This Inner Loop Header: Depth=1 318 ; X64-NOBMI-NEXT: movq %r8, %rax 319 ; X64-NOBMI-NEXT: mulq (%r9,%rcx,8) 320 ; X64-NOBMI-NEXT: addq %r10, %rax 321 ; X64-NOBMI-NEXT: adcq $0, %rdx 322 ; X64-NOBMI-NEXT: movq %rax, (%rsi,%rcx,8) 323 ; X64-NOBMI-NEXT: incq %rcx 324 ; X64-NOBMI-NEXT: cmpq %rcx, %rdi 325 ; X64-NOBMI-NEXT: movq %rdx, %r10 326 ; X64-NOBMI-NEXT: jne .LBB1_2 327 ; X64-NOBMI-NEXT: .LBB1_3: # %for.end 328 ; X64-NOBMI-NEXT: xorl %eax, %eax 329 ; X64-NOBMI-NEXT: retq 330 ; 331 ; X64-BMI-LABEL: mul1: 332 ; X64-BMI: # %bb.0: # %entry 333 ; X64-BMI-NEXT: testq %rdi, %rdi 334 ; X64-BMI-NEXT: je .LBB1_3 335 ; X64-BMI-NEXT: # %bb.1: # %for.body.preheader 336 ; X64-BMI-NEXT: movq %rcx, %r8 337 ; X64-BMI-NEXT: movq %rdx, %r9 338 ; X64-BMI-NEXT: xorl %r10d, %r10d 339 ; X64-BMI-NEXT: xorl %eax, %eax 340 ; X64-BMI-NEXT: .p2align 4, 0x90 341 ; X64-BMI-NEXT: .LBB1_2: # %for.body 342 ; X64-BMI-NEXT: # =>This Inner Loop Header: Depth=1 343 ; X64-BMI-NEXT: movq %r8, %rdx 344 ; X64-BMI-NEXT: mulxq (%r9,%rax,8), %rcx, %rdx 345 ; X64-BMI-NEXT: addq %r10, %rcx 346 ; X64-BMI-NEXT: adcq $0, %rdx 347 ; X64-BMI-NEXT: movq %rcx, (%rsi,%rax,8) 348 ; X64-BMI-NEXT: incq %rax 349 ; X64-BMI-NEXT: cmpq %rax, %rdi 350 ; X64-BMI-NEXT: movq %rdx, %r10 351 ; X64-BMI-NEXT: jne .LBB1_2 352 ; X64-BMI-NEXT: .LBB1_3: # %for.end 353 ; X64-BMI-NEXT: xorl %eax, %eax 354 ; X64-BMI-NEXT: retq 355 entry: 356 %conv = zext i64 %y to i128 357 %cmp11 = icmp eq i64 %n, 0 358 br i1 %cmp11, label %for.end, label %for.body 359 360 for.body: ; preds = %entry, %for.body 361 %carry.013 = phi i64 [ %conv6, %for.body ], [ 0, %entry ] 362 %i.012 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 363 %arrayidx = getelementptr inbounds i64, i64* %x, i64 %i.012 364 %0 = load i64, i64* %arrayidx, align 8 365 %conv2 = zext i64 %0 to i128 366 %mul = mul i128 %conv2, %conv 367 %conv3 = zext i64 %carry.013 to i128 368 %add = add i128 %mul, %conv3 369 %conv4 = trunc i128 %add to i64 370 %arrayidx5 = getelementptr inbounds i64, i64* %z, i64 %i.012 371 store i64 %conv4, i64* %arrayidx5, align 8 372 %shr = lshr i128 %add, 64 373 %conv6 = trunc i128 %shr to i64 374 %inc = add i64 %i.012, 1 375 %exitcond = icmp eq i64 %inc, %n 376 br i1 %exitcond, label %for.end, label %for.body 377 378 for.end: ; preds = %for.body, %entry 379 ret i64 0 380 } 381