Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+bmi < %s | FileCheck %s
      2 
      3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
      4 target triple = "x86_64-unknown-unknown"
      5 
      6 ; Stack reload folding tests.
      7 ;
      8 ; By including a nop call with sideeffects we can force a partial register spill of the
      9 ; relevant registers and check that the reload is correctly folded into the instruction.
     10 
     11 define i32 @stack_fold_andn_u32(i32 %a0, i32 %a1) {
     12   ;CHECK-LABEL: stack_fold_andn_u32
     13   ;CHECK:       andnl {{-?[0-9]*}}(%rsp), %eax, %eax {{.*#+}} 4-byte Folded Reload
     14   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     15   %2 = xor i32 %a0, -1
     16   %3 = and i32 %a1, %2
     17   ret i32 %3
     18 }
     19 
     20 define i64 @stack_fold_andn_u64(i64 %a0, i64 %a1) {
     21   ;CHECK-LABEL: stack_fold_andn_u64
     22   ;CHECK:       andnq {{-?[0-9]*}}(%rsp), %rax, %rax {{.*#+}} 8-byte Folded Reload
     23   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     24   %2 = xor i64 %a0, -1
     25   %3 = and i64 %a1, %2
     26   ret i64 %3
     27 }
     28 
     29 define i32 @stack_fold_bextr_u32(i32 %a0, i32 %a1) {
     30   ;CHECK-LABEL: stack_fold_bextr_u32
     31   ;CHECK:       # %bb.0:
     32   ;CHECK:       bextrl %eax, {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
     33   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     34   %2 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %a0, i32 %a1)
     35   ret i32 %2
     36 }
     37 declare i32 @llvm.x86.bmi.bextr.32(i32, i32)
     38 
     39 define i64 @stack_fold_bextr_u64(i64 %a0, i64 %a1) {
     40   ;CHECK-LABEL: stack_fold_bextr_u64
     41   ;CHECK:       # %bb.0:
     42   ;CHECK:       bextrq %rax, {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
     43   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     44   %2 = tail call i64 @llvm.x86.bmi.bextr.64(i64 %a0, i64 %a1)
     45   ret i64 %2
     46 }
     47 declare i64 @llvm.x86.bmi.bextr.64(i64, i64)
     48 
     49 define i32 @stack_fold_blsi_u32(i32 %a0) {
     50   ;CHECK-LABEL: stack_fold_blsi_u32
     51   ;CHECK:       blsil {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
     52   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     53   %2 = sub i32 0, %a0
     54   %3 = and i32 %2, %a0
     55   ret i32 %3
     56 }
     57 
     58 define i64 @stack_fold_blsi_u64(i64 %a0) {
     59   ;CHECK-LABEL: stack_fold_blsi_u64
     60   ;CHECK:       blsiq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
     61   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     62   %2 = sub i64 0, %a0
     63   %3 = and i64 %2, %a0
     64   ret i64 %3
     65 }
     66 
     67 define i32 @stack_fold_blsmsk_u32(i32 %a0) {
     68   ;CHECK-LABEL: stack_fold_blsmsk_u32
     69   ;CHECK:       blsmskl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
     70   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     71   %2 = sub i32 %a0, 1
     72   %3 = xor i32 %2, %a0
     73   ret i32 %3
     74 }
     75 
     76 define i64 @stack_fold_blsmsk_u64(i64 %a0) {
     77   ;CHECK-LABEL: stack_fold_blsmsk_u64
     78   ;CHECK:       blsmskq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
     79   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     80   %2 = sub i64 %a0, 1
     81   %3 = xor i64 %2, %a0
     82   ret i64 %3
     83 }
     84 
     85 define i32 @stack_fold_blsr_u32(i32 %a0) {
     86   ;CHECK-LABEL: stack_fold_blsr_u32
     87   ;CHECK:       blsrl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
     88   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     89   %2 = sub i32 %a0, 1
     90   %3 = and i32 %2, %a0
     91   ret i32 %3
     92 }
     93 
     94 define i64 @stack_fold_blsr_u64(i64 %a0) {
     95   ;CHECK-LABEL: stack_fold_blsr_u64
     96   ;CHECK:       blsrq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
     97   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     98   %2 = sub i64 %a0, 1
     99   %3 = and i64 %2, %a0
    100   ret i64 %3
    101 }
    102 
    103 ;TODO stack_fold_tzcnt_u16
    104 
    105 define i32 @stack_fold_tzcnt_u32(i32 %a0) {
    106   ;CHECK-LABEL: stack_fold_tzcnt_u32
    107   ;CHECK:       tzcntl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
    108   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
    109   %2 = tail call i32 @llvm.cttz.i32(i32 %a0, i1 0)
    110   ret i32 %2
    111 }
    112 declare i32 @llvm.cttz.i32(i32, i1)
    113 
    114 define i64 @stack_fold_tzcnt_u64(i64 %a0) {
    115   ;CHECK-LABEL: stack_fold_tzcnt_u64
    116   ;CHECK:       tzcntq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
    117   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
    118   %2 = tail call i64 @llvm.cttz.i64(i64 %a0, i1 0)
    119   ret i64 %2
    120 }
    121 declare i64 @llvm.cttz.i64(i64, i1)
    122