1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-- -mattr=sse2 | FileCheck %s --check-prefixes=ANY,X32-SSE2 3 ; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=ANY,X64-AVX2 4 5 declare i8 @llvm.fshl.i8(i8, i8, i8) 6 declare i16 @llvm.fshl.i16(i16, i16, i16) 7 declare i32 @llvm.fshl.i32(i32, i32, i32) 8 declare i64 @llvm.fshl.i64(i64, i64, i64) 9 declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 10 11 declare i8 @llvm.fshr.i8(i8, i8, i8) 12 declare i16 @llvm.fshr.i16(i16, i16, i16) 13 declare i32 @llvm.fshr.i32(i32, i32, i32) 14 declare i64 @llvm.fshr.i64(i64, i64, i64) 15 declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) 16 17 ; General case - all operands can be variables - x86 has shld, but that's not matched. 18 19 define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind { 20 ; X32-SSE2-LABEL: fshl_i32: 21 ; X32-SSE2: # %bb.0: 22 ; X32-SSE2-NEXT: pushl %edi 23 ; X32-SSE2-NEXT: pushl %esi 24 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi 25 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi 26 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 27 ; X32-SSE2-NEXT: movl $32, %ecx 28 ; X32-SSE2-NEXT: subl %edx, %ecx 29 ; X32-SSE2-NEXT: # kill: def $cl killed $cl killed $ecx 30 ; X32-SSE2-NEXT: shrl %cl, %edi 31 ; X32-SSE2-NEXT: andl $31, %edx 32 ; X32-SSE2-NEXT: movl %esi, %eax 33 ; X32-SSE2-NEXT: movl %edx, %ecx 34 ; X32-SSE2-NEXT: shll %cl, %eax 35 ; X32-SSE2-NEXT: orl %edi, %eax 36 ; X32-SSE2-NEXT: testl %edx, %edx 37 ; X32-SSE2-NEXT: cmovel %esi, %eax 38 ; X32-SSE2-NEXT: popl %esi 39 ; X32-SSE2-NEXT: popl %edi 40 ; X32-SSE2-NEXT: retl 41 ; 42 ; X64-AVX2-LABEL: fshl_i32: 43 ; X64-AVX2: # %bb.0: 44 ; X64-AVX2-NEXT: movl $32, %ecx 45 ; X64-AVX2-NEXT: subl %edx, %ecx 46 ; X64-AVX2-NEXT: # kill: def $cl killed $cl killed $ecx 47 ; X64-AVX2-NEXT: shrl %cl, %esi 48 ; X64-AVX2-NEXT: andl $31, %edx 49 ; X64-AVX2-NEXT: movl %edi, %eax 50 ; X64-AVX2-NEXT: movl %edx, %ecx 51 ; X64-AVX2-NEXT: shll %cl, %eax 52 ; X64-AVX2-NEXT: orl %esi, %eax 53 ; X64-AVX2-NEXT: testl %edx, %edx 54 ; X64-AVX2-NEXT: cmovel %edi, %eax 55 ; X64-AVX2-NEXT: retq 56 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) 57 ret i32 %f 58 } 59 60 ; Verify that weird types are minimally supported. 61 declare i37 @llvm.fshl.i37(i37, i37, i37) 62 define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind { 63 ; X32-SSE2-LABEL: fshl_i37: 64 ; X32-SSE2: # %bb.0: 65 ; X32-SSE2-NEXT: pushl %ebp 66 ; X32-SSE2-NEXT: pushl %ebx 67 ; X32-SSE2-NEXT: pushl %edi 68 ; X32-SSE2-NEXT: pushl %esi 69 ; X32-SSE2-NEXT: subl $8, %esp 70 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi 71 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi 72 ; X32-SSE2-NEXT: andl $31, %esi 73 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx 74 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 75 ; X32-SSE2-NEXT: movl %eax, %ebp 76 ; X32-SSE2-NEXT: andl $31, %ebp 77 ; X32-SSE2-NEXT: movl $37, %ecx 78 ; X32-SSE2-NEXT: subl %ebx, %ecx 79 ; X32-SSE2-NEXT: movl $0, %edx 80 ; X32-SSE2-NEXT: sbbl %eax, %edx 81 ; X32-SSE2-NEXT: andl $31, %edx 82 ; X32-SSE2-NEXT: pushl $0 83 ; X32-SSE2-NEXT: pushl $37 84 ; X32-SSE2-NEXT: pushl %edx 85 ; X32-SSE2-NEXT: pushl %ecx 86 ; X32-SSE2-NEXT: calll __umoddi3 87 ; X32-SSE2-NEXT: addl $16, %esp 88 ; X32-SSE2-NEXT: movl %eax, (%esp) # 4-byte Spill 89 ; X32-SSE2-NEXT: movl %eax, %ecx 90 ; X32-SSE2-NEXT: shrdl %cl, %esi, %edi 91 ; X32-SSE2-NEXT: pushl $0 92 ; X32-SSE2-NEXT: pushl $37 93 ; X32-SSE2-NEXT: pushl %ebp 94 ; X32-SSE2-NEXT: pushl %ebx 95 ; X32-SSE2-NEXT: calll __umoddi3 96 ; X32-SSE2-NEXT: addl $16, %esp 97 ; X32-SSE2-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill 98 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 99 ; X32-SSE2-NEXT: movl %edx, %ebp 100 ; X32-SSE2-NEXT: movl %eax, %ecx 101 ; X32-SSE2-NEXT: shll %cl, %ebp 102 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebx 103 ; X32-SSE2-NEXT: shldl %cl, %edx, %ebx 104 ; X32-SSE2-NEXT: testb $32, %al 105 ; X32-SSE2-NEXT: cmovnel %ebp, %ebx 106 ; X32-SSE2-NEXT: movl $0, %edx 107 ; X32-SSE2-NEXT: cmovnel %edx, %ebp 108 ; X32-SSE2-NEXT: movl (%esp), %ecx # 4-byte Reload 109 ; X32-SSE2-NEXT: shrl %cl, %esi 110 ; X32-SSE2-NEXT: testb $32, %cl 111 ; X32-SSE2-NEXT: cmovnel %esi, %edi 112 ; X32-SSE2-NEXT: cmovnel %edx, %esi 113 ; X32-SSE2-NEXT: orl %ebx, %esi 114 ; X32-SSE2-NEXT: orl %ebp, %edi 115 ; X32-SSE2-NEXT: orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill 116 ; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %edi 117 ; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %esi 118 ; X32-SSE2-NEXT: movl %edi, %eax 119 ; X32-SSE2-NEXT: movl %esi, %edx 120 ; X32-SSE2-NEXT: addl $8, %esp 121 ; X32-SSE2-NEXT: popl %esi 122 ; X32-SSE2-NEXT: popl %edi 123 ; X32-SSE2-NEXT: popl %ebx 124 ; X32-SSE2-NEXT: popl %ebp 125 ; X32-SSE2-NEXT: retl 126 ; 127 ; X64-AVX2-LABEL: fshl_i37: 128 ; X64-AVX2: # %bb.0: 129 ; X64-AVX2-NEXT: pushq %rbx 130 ; X64-AVX2-NEXT: movq %rdx, %r10 131 ; X64-AVX2-NEXT: movabsq $137438953471, %r8 # imm = 0x1FFFFFFFFF 132 ; X64-AVX2-NEXT: andq %r8, %rsi 133 ; X64-AVX2-NEXT: movl $37, %r9d 134 ; X64-AVX2-NEXT: subq %rdx, %r9 135 ; X64-AVX2-NEXT: andq %r8, %r10 136 ; X64-AVX2-NEXT: movabsq $-2492803253203993461, %r11 # imm = 0xDD67C8A60DD67C8B 137 ; X64-AVX2-NEXT: movq %r10, %rax 138 ; X64-AVX2-NEXT: mulq %r11 139 ; X64-AVX2-NEXT: shrq $5, %rdx 140 ; X64-AVX2-NEXT: leaq (%rdx,%rdx,8), %rax 141 ; X64-AVX2-NEXT: leaq (%rdx,%rax,4), %rax 142 ; X64-AVX2-NEXT: subq %rax, %r10 143 ; X64-AVX2-NEXT: movq %rdi, %rbx 144 ; X64-AVX2-NEXT: movl %r10d, %ecx 145 ; X64-AVX2-NEXT: shlq %cl, %rbx 146 ; X64-AVX2-NEXT: andq %r9, %r8 147 ; X64-AVX2-NEXT: movq %r8, %rax 148 ; X64-AVX2-NEXT: mulq %r11 149 ; X64-AVX2-NEXT: shrq $5, %rdx 150 ; X64-AVX2-NEXT: leaq (%rdx,%rdx,8), %rax 151 ; X64-AVX2-NEXT: leal (%rdx,%rax,4), %eax 152 ; X64-AVX2-NEXT: subl %eax, %r9d 153 ; X64-AVX2-NEXT: movl %r9d, %ecx 154 ; X64-AVX2-NEXT: shrq %cl, %rsi 155 ; X64-AVX2-NEXT: orq %rbx, %rsi 156 ; X64-AVX2-NEXT: testq %r10, %r10 157 ; X64-AVX2-NEXT: cmoveq %rdi, %rsi 158 ; X64-AVX2-NEXT: movq %rsi, %rax 159 ; X64-AVX2-NEXT: popq %rbx 160 ; X64-AVX2-NEXT: retq 161 %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) 162 ret i37 %f 163 } 164 165 ; extract(concat(0b1110000, 0b1111111) << 2) = 0b1000011 166 167 declare i7 @llvm.fshl.i7(i7, i7, i7) 168 define i7 @fshl_i7_const_fold() { 169 ; ANY-LABEL: fshl_i7_const_fold: 170 ; ANY: # %bb.0: 171 ; ANY-NEXT: movb $67, %al 172 ; ANY-NEXT: ret{{[l|q]}} 173 %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) 174 ret i7 %f 175 } 176 177 ; With constant shift amount, this is 'shld' with constant operand. 178 179 define i32 @fshl_i32_const_shift(i32 %x, i32 %y) nounwind { 180 ; X32-SSE2-LABEL: fshl_i32_const_shift: 181 ; X32-SSE2: # %bb.0: 182 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 183 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 184 ; X32-SSE2-NEXT: shldl $9, %ecx, %eax 185 ; X32-SSE2-NEXT: retl 186 ; 187 ; X64-AVX2-LABEL: fshl_i32_const_shift: 188 ; X64-AVX2: # %bb.0: 189 ; X64-AVX2-NEXT: shldl $9, %esi, %edi 190 ; X64-AVX2-NEXT: movl %edi, %eax 191 ; X64-AVX2-NEXT: retq 192 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9) 193 ret i32 %f 194 } 195 196 ; Check modulo math on shift amount. 197 198 define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) nounwind { 199 ; X32-SSE2-LABEL: fshl_i32_const_overshift: 200 ; X32-SSE2: # %bb.0: 201 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 202 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 203 ; X32-SSE2-NEXT: shldl $9, %ecx, %eax 204 ; X32-SSE2-NEXT: retl 205 ; 206 ; X64-AVX2-LABEL: fshl_i32_const_overshift: 207 ; X64-AVX2: # %bb.0: 208 ; X64-AVX2-NEXT: shldl $9, %esi, %edi 209 ; X64-AVX2-NEXT: movl %edi, %eax 210 ; X64-AVX2-NEXT: retq 211 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41) 212 ret i32 %f 213 } 214 215 ; 64-bit should also work. 216 217 define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) nounwind { 218 ; X32-SSE2-LABEL: fshl_i64_const_overshift: 219 ; X32-SSE2: # %bb.0: 220 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 221 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 222 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 223 ; X32-SSE2-NEXT: shldl $9, %ecx, %edx 224 ; X32-SSE2-NEXT: shrdl $23, %ecx, %eax 225 ; X32-SSE2-NEXT: retl 226 ; 227 ; X64-AVX2-LABEL: fshl_i64_const_overshift: 228 ; X64-AVX2: # %bb.0: 229 ; X64-AVX2-NEXT: shldq $41, %rsi, %rdi 230 ; X64-AVX2-NEXT: movq %rdi, %rax 231 ; X64-AVX2-NEXT: retq 232 %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105) 233 ret i64 %f 234 } 235 236 ; This should work without any node-specific logic. 237 238 define i8 @fshl_i8_const_fold() nounwind { 239 ; ANY-LABEL: fshl_i8_const_fold: 240 ; ANY: # %bb.0: 241 ; ANY-NEXT: movb $-128, %al 242 ; ANY-NEXT: ret{{[l|q]}} 243 %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) 244 ret i8 %f 245 } 246 247 ; Repeat everything for funnel shift right. 248 249 ; General case - all operands can be variables - x86 has 'shrd', but this doesn't match. 250 251 define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) nounwind { 252 ; X32-SSE2-LABEL: fshr_i32: 253 ; X32-SSE2: # %bb.0: 254 ; X32-SSE2-NEXT: pushl %ebx 255 ; X32-SSE2-NEXT: pushl %edi 256 ; X32-SSE2-NEXT: pushl %esi 257 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi 258 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 259 ; X32-SSE2-NEXT: movl $32, %ebx 260 ; X32-SSE2-NEXT: subl %edx, %ebx 261 ; X32-SSE2-NEXT: andl $31, %edx 262 ; X32-SSE2-NEXT: movl %esi, %edi 263 ; X32-SSE2-NEXT: movl %edx, %ecx 264 ; X32-SSE2-NEXT: shrl %cl, %edi 265 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 266 ; X32-SSE2-NEXT: movl %ebx, %ecx 267 ; X32-SSE2-NEXT: shll %cl, %eax 268 ; X32-SSE2-NEXT: orl %edi, %eax 269 ; X32-SSE2-NEXT: testl %edx, %edx 270 ; X32-SSE2-NEXT: cmovel %esi, %eax 271 ; X32-SSE2-NEXT: popl %esi 272 ; X32-SSE2-NEXT: popl %edi 273 ; X32-SSE2-NEXT: popl %ebx 274 ; X32-SSE2-NEXT: retl 275 ; 276 ; X64-AVX2-LABEL: fshr_i32: 277 ; X64-AVX2: # %bb.0: 278 ; X64-AVX2-NEXT: movl $32, %r8d 279 ; X64-AVX2-NEXT: subl %edx, %r8d 280 ; X64-AVX2-NEXT: andl $31, %edx 281 ; X64-AVX2-NEXT: movl %esi, %eax 282 ; X64-AVX2-NEXT: movl %edx, %ecx 283 ; X64-AVX2-NEXT: shrl %cl, %eax 284 ; X64-AVX2-NEXT: movl %r8d, %ecx 285 ; X64-AVX2-NEXT: shll %cl, %edi 286 ; X64-AVX2-NEXT: orl %eax, %edi 287 ; X64-AVX2-NEXT: testl %edx, %edx 288 ; X64-AVX2-NEXT: cmovel %esi, %edi 289 ; X64-AVX2-NEXT: movl %edi, %eax 290 ; X64-AVX2-NEXT: retq 291 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) 292 ret i32 %f 293 } 294 295 ; Verify that weird types are minimally supported. 296 declare i37 @llvm.fshr.i37(i37, i37, i37) 297 define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind { 298 ; X32-SSE2-LABEL: fshr_i37: 299 ; X32-SSE2: # %bb.0: 300 ; X32-SSE2-NEXT: pushl %ebp 301 ; X32-SSE2-NEXT: pushl %ebx 302 ; X32-SSE2-NEXT: pushl %edi 303 ; X32-SSE2-NEXT: pushl %esi 304 ; X32-SSE2-NEXT: pushl %eax 305 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %esi 306 ; X32-SSE2-NEXT: andl $31, %esi 307 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp 308 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 309 ; X32-SSE2-NEXT: andl $31, %eax 310 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edi 311 ; X32-SSE2-NEXT: pushl $0 312 ; X32-SSE2-NEXT: pushl $37 313 ; X32-SSE2-NEXT: pushl %eax 314 ; X32-SSE2-NEXT: pushl %ebp 315 ; X32-SSE2-NEXT: calll __umoddi3 316 ; X32-SSE2-NEXT: addl $16, %esp 317 ; X32-SSE2-NEXT: movl %eax, %ebx 318 ; X32-SSE2-NEXT: movl %edx, (%esp) # 4-byte Spill 319 ; X32-SSE2-NEXT: movl $37, %eax 320 ; X32-SSE2-NEXT: subl %ebp, %eax 321 ; X32-SSE2-NEXT: movl $0, %edx 322 ; X32-SSE2-NEXT: sbbl {{[0-9]+}}(%esp), %edx 323 ; X32-SSE2-NEXT: andl $31, %edx 324 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ebp 325 ; X32-SSE2-NEXT: movl %ebx, %ecx 326 ; X32-SSE2-NEXT: shrdl %cl, %esi, %ebp 327 ; X32-SSE2-NEXT: pushl $0 328 ; X32-SSE2-NEXT: pushl $37 329 ; X32-SSE2-NEXT: pushl %edx 330 ; X32-SSE2-NEXT: pushl %eax 331 ; X32-SSE2-NEXT: calll __umoddi3 332 ; X32-SSE2-NEXT: addl $16, %esp 333 ; X32-SSE2-NEXT: movl %eax, %ecx 334 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 335 ; X32-SSE2-NEXT: movl %edx, %eax 336 ; X32-SSE2-NEXT: shll %cl, %eax 337 ; X32-SSE2-NEXT: shldl %cl, %edx, %edi 338 ; X32-SSE2-NEXT: testb $32, %cl 339 ; X32-SSE2-NEXT: cmovnel %eax, %edi 340 ; X32-SSE2-NEXT: movl $0, %edx 341 ; X32-SSE2-NEXT: cmovnel %edx, %eax 342 ; X32-SSE2-NEXT: movl %ebx, %ecx 343 ; X32-SSE2-NEXT: shrl %cl, %esi 344 ; X32-SSE2-NEXT: testb $32, %bl 345 ; X32-SSE2-NEXT: cmovnel %esi, %ebp 346 ; X32-SSE2-NEXT: cmovnel %edx, %esi 347 ; X32-SSE2-NEXT: orl %edi, %esi 348 ; X32-SSE2-NEXT: orl %eax, %ebp 349 ; X32-SSE2-NEXT: orl %ebx, (%esp) # 4-byte Folded Spill 350 ; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %ebp 351 ; X32-SSE2-NEXT: cmovel {{[0-9]+}}(%esp), %esi 352 ; X32-SSE2-NEXT: movl %ebp, %eax 353 ; X32-SSE2-NEXT: movl %esi, %edx 354 ; X32-SSE2-NEXT: addl $4, %esp 355 ; X32-SSE2-NEXT: popl %esi 356 ; X32-SSE2-NEXT: popl %edi 357 ; X32-SSE2-NEXT: popl %ebx 358 ; X32-SSE2-NEXT: popl %ebp 359 ; X32-SSE2-NEXT: retl 360 ; 361 ; X64-AVX2-LABEL: fshr_i37: 362 ; X64-AVX2: # %bb.0: 363 ; X64-AVX2-NEXT: pushq %rbx 364 ; X64-AVX2-NEXT: movq %rdx, %r10 365 ; X64-AVX2-NEXT: movabsq $137438953471, %r8 # imm = 0x1FFFFFFFFF 366 ; X64-AVX2-NEXT: movq %rsi, %r11 367 ; X64-AVX2-NEXT: andq %r8, %r11 368 ; X64-AVX2-NEXT: movl $37, %r9d 369 ; X64-AVX2-NEXT: subq %rdx, %r9 370 ; X64-AVX2-NEXT: andq %r8, %r10 371 ; X64-AVX2-NEXT: movabsq $-2492803253203993461, %rbx # imm = 0xDD67C8A60DD67C8B 372 ; X64-AVX2-NEXT: movq %r10, %rax 373 ; X64-AVX2-NEXT: mulq %rbx 374 ; X64-AVX2-NEXT: shrq $5, %rdx 375 ; X64-AVX2-NEXT: leaq (%rdx,%rdx,8), %rax 376 ; X64-AVX2-NEXT: leaq (%rdx,%rax,4), %rax 377 ; X64-AVX2-NEXT: subq %rax, %r10 378 ; X64-AVX2-NEXT: movl %r10d, %ecx 379 ; X64-AVX2-NEXT: shrq %cl, %r11 380 ; X64-AVX2-NEXT: andq %r9, %r8 381 ; X64-AVX2-NEXT: movq %r8, %rax 382 ; X64-AVX2-NEXT: mulq %rbx 383 ; X64-AVX2-NEXT: shrq $5, %rdx 384 ; X64-AVX2-NEXT: leaq (%rdx,%rdx,8), %rax 385 ; X64-AVX2-NEXT: leal (%rdx,%rax,4), %eax 386 ; X64-AVX2-NEXT: subl %eax, %r9d 387 ; X64-AVX2-NEXT: movl %r9d, %ecx 388 ; X64-AVX2-NEXT: shlq %cl, %rdi 389 ; X64-AVX2-NEXT: orq %r11, %rdi 390 ; X64-AVX2-NEXT: testq %r10, %r10 391 ; X64-AVX2-NEXT: cmoveq %rsi, %rdi 392 ; X64-AVX2-NEXT: movq %rdi, %rax 393 ; X64-AVX2-NEXT: popq %rbx 394 ; X64-AVX2-NEXT: retq 395 %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) 396 ret i37 %f 397 } 398 399 ; extract(concat(0b1110000, 0b1111111) >> 2) = 0b0011111 400 401 declare i7 @llvm.fshr.i7(i7, i7, i7) 402 define i7 @fshr_i7_const_fold() nounwind { 403 ; ANY-LABEL: fshr_i7_const_fold: 404 ; ANY: # %bb.0: 405 ; ANY-NEXT: movb $31, %al 406 ; ANY-NEXT: ret{{[l|q]}} 407 %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) 408 ret i7 %f 409 } 410 411 ; With constant shift amount, this is 'shrd' or 'shld'. 412 413 define i32 @fshr_i32_const_shift(i32 %x, i32 %y) nounwind { 414 ; X32-SSE2-LABEL: fshr_i32_const_shift: 415 ; X32-SSE2: # %bb.0: 416 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 417 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 418 ; X32-SSE2-NEXT: shldl $23, %ecx, %eax 419 ; X32-SSE2-NEXT: retl 420 ; 421 ; X64-AVX2-LABEL: fshr_i32_const_shift: 422 ; X64-AVX2: # %bb.0: 423 ; X64-AVX2-NEXT: shldl $23, %esi, %edi 424 ; X64-AVX2-NEXT: movl %edi, %eax 425 ; X64-AVX2-NEXT: retq 426 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9) 427 ret i32 %f 428 } 429 430 ; Check modulo math on shift amount. 41-32=9, but right-shift became left, so 32-9=23. 431 432 define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) nounwind { 433 ; X32-SSE2-LABEL: fshr_i32_const_overshift: 434 ; X32-SSE2: # %bb.0: 435 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 436 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 437 ; X32-SSE2-NEXT: shldl $23, %ecx, %eax 438 ; X32-SSE2-NEXT: retl 439 ; 440 ; X64-AVX2-LABEL: fshr_i32_const_overshift: 441 ; X64-AVX2: # %bb.0: 442 ; X64-AVX2-NEXT: shldl $23, %esi, %edi 443 ; X64-AVX2-NEXT: movl %edi, %eax 444 ; X64-AVX2-NEXT: retq 445 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41) 446 ret i32 %f 447 } 448 449 ; 64-bit should also work. 105-64 = 41, but right-shift became left, so 64-41=23. 450 451 define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) nounwind { 452 ; X32-SSE2-LABEL: fshr_i64_const_overshift: 453 ; X32-SSE2: # %bb.0: 454 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 455 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %ecx 456 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 457 ; X32-SSE2-NEXT: shrdl $9, %ecx, %eax 458 ; X32-SSE2-NEXT: shldl $23, %ecx, %edx 459 ; X32-SSE2-NEXT: retl 460 ; 461 ; X64-AVX2-LABEL: fshr_i64_const_overshift: 462 ; X64-AVX2: # %bb.0: 463 ; X64-AVX2-NEXT: shldq $23, %rsi, %rdi 464 ; X64-AVX2-NEXT: movq %rdi, %rax 465 ; X64-AVX2-NEXT: retq 466 %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105) 467 ret i64 %f 468 } 469 470 ; This should work without any node-specific logic. 471 472 define i8 @fshr_i8_const_fold() nounwind { 473 ; ANY-LABEL: fshr_i8_const_fold: 474 ; ANY: # %bb.0: 475 ; ANY-NEXT: movb $-2, %al 476 ; ANY-NEXT: ret{{[l|q]}} 477 %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) 478 ret i8 %f 479 } 480 481 define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind { 482 ; X32-SSE2-LABEL: fshl_i32_shift_by_bitwidth: 483 ; X32-SSE2: # %bb.0: 484 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 485 ; X32-SSE2-NEXT: retl 486 ; 487 ; X64-AVX2-LABEL: fshl_i32_shift_by_bitwidth: 488 ; X64-AVX2: # %bb.0: 489 ; X64-AVX2-NEXT: movl %edi, %eax 490 ; X64-AVX2-NEXT: retq 491 %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32) 492 ret i32 %f 493 } 494 495 define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind { 496 ; X32-SSE2-LABEL: fshr_i32_shift_by_bitwidth: 497 ; X32-SSE2: # %bb.0: 498 ; X32-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 499 ; X32-SSE2-NEXT: retl 500 ; 501 ; X64-AVX2-LABEL: fshr_i32_shift_by_bitwidth: 502 ; X64-AVX2: # %bb.0: 503 ; X64-AVX2-NEXT: movl %esi, %eax 504 ; X64-AVX2-NEXT: retq 505 %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32) 506 ret i32 %f 507 } 508 509 define <4 x i32> @fshl_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounwind { 510 ; ANY-LABEL: fshl_v4i32_shift_by_bitwidth: 511 ; ANY: # %bb.0: 512 ; ANY-NEXT: ret{{[l|q]}} 513 %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 514 ret <4 x i32> %f 515 } 516 517 define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounwind { 518 ; X32-SSE2-LABEL: fshr_v4i32_shift_by_bitwidth: 519 ; X32-SSE2: # %bb.0: 520 ; X32-SSE2-NEXT: movaps %xmm1, %xmm0 521 ; X32-SSE2-NEXT: retl 522 ; 523 ; X64-AVX2-LABEL: fshr_v4i32_shift_by_bitwidth: 524 ; X64-AVX2: # %bb.0: 525 ; X64-AVX2-NEXT: vmovaps %xmm1, %xmm0 526 ; X64-AVX2-NEXT: retq 527 %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>) 528 ret <4 x i32> %f 529 } 530 531