Home | History | Annotate | Download | only in X86
      1 ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+bmi,+tbm < %s | FileCheck %s
      2 
      3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
      4 target triple = "x86_64-unknown-unknown"
      5 
      6 ; Stack reload folding tests.
      7 ;
      8 ; By including a nop call with sideeffects we can force a partial register spill of the
      9 ; relevant registers and check that the reload is correctly folded into the instruction.
     10 
     11 define i32 @stack_fold_bextri_u32(i32 %a0) {
     12   ;CHECK-LABEL: stack_fold_bextri_u32
     13   ;CHECK:       # %bb.0:
     14   ;CHECK:       bextrl $3841, {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
     15   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     16   %2 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a0, i32 3841)
     17   ret i32 %2
     18 }
     19 declare i32 @llvm.x86.tbm.bextri.u32(i32, i32)
     20 
     21 define i64 @stack_fold_bextri_u64(i64 %a0) {
     22   ;CHECK-LABEL: stack_fold_bextri_u64
     23   ;CHECK:       # %bb.0:
     24   ;CHECK:       bextrq $3841, {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
     25   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     26   %2 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a0, i64 3841)
     27   ret i64 %2
     28 }
     29 declare i64 @llvm.x86.tbm.bextri.u64(i64, i64)
     30 
     31 define i32 @stack_fold_blcfill_u32(i32 %a0) {
     32   ;CHECK-LABEL: stack_fold_blcfill_u32
     33   ;CHECK:       blcfilll {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
     34   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     35   %2 = add i32 %a0, 1
     36   %3 = and i32 %a0, %2
     37   ret i32 %3
     38 }
     39 
     40 define i64 @stack_fold_blcfill_u64(i64 %a0) {
     41   ;CHECK-LABEL: stack_fold_blcfill_u64
     42   ;CHECK:       blcfillq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
     43   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     44   %2 = add i64 %a0, 1
     45   %3 = and i64 %a0, %2
     46   ret i64 %3
     47 }
     48 
     49 define i32 @stack_fold_blci_u32(i32 %a0) {
     50   ;CHECK-LABEL: stack_fold_blci_u32
     51   ;CHECK:       blcil {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
     52   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     53   %2 = add i32 %a0, 1
     54   %3 = xor i32 %2, -1
     55   %4 = or i32 %a0, %3
     56   ret i32 %4
     57 }
     58 
     59 define i64 @stack_fold_blci_u64(i64 %a0) {
     60   ;CHECK-LABEL: stack_fold_blci_u64
     61   ;CHECK:       blciq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
     62   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     63   %2 = add i64 %a0, 1
     64   %3 = xor i64 %2, -1
     65   %4 = or i64 %a0, %3
     66   ret i64 %4
     67 }
     68 
     69 define i32 @stack_fold_blcic_u32(i32 %a0) {
     70   ;CHECK-LABEL: stack_fold_blcic_u32
     71   ;CHECK:       blcicl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
     72   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     73   %2 = add i32 %a0, 1
     74   %3 = xor i32 %a0, -1
     75   %4 = and i32 %2, %3
     76   ret i32 %4
     77 }
     78 
     79 define i64 @stack_fold_blcic_u64(i64 %a0) {
     80   ;CHECK-LABEL: stack_fold_blcic_u64
     81   ;CHECK:       blcicq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
     82   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     83   %2 = add i64 %a0, 1
     84   %3 = xor i64 %a0, -1
     85   %4 = and i64 %2, %3
     86   ret i64 %4
     87 }
     88 
     89 define i32 @stack_fold_blcmsk_u32(i32 %a0) {
     90   ;CHECK-LABEL: stack_fold_blcmsk_u32
     91   ;CHECK:       blcmskl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
     92   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
     93   %2 = add i32 %a0, 1
     94   %3 = xor i32 %a0, %2
     95   ret i32 %3
     96 }
     97 
     98 define i64 @stack_fold_blcmsk_u64(i64 %a0) {
     99   ;CHECK-LABEL: stack_fold_blcmsk_u64
    100   ;CHECK:       blcmskq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
    101   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
    102   %2 = add i64 %a0, 1
    103   %3 = xor i64 %a0, %2
    104   ret i64 %3
    105 }
    106 
    107 define i32 @stack_fold_blcs_u32(i32 %a0) {
    108   ;CHECK-LABEL: stack_fold_blcs_u32
    109   ;CHECK:       blcsl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
    110   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
    111   %2 = add i32 %a0, 1
    112   %3 = or i32 %a0, %2
    113   ret i32 %3
    114 }
    115 
    116 define i64 @stack_fold_blcs_u64(i64 %a0) {
    117   ;CHECK-LABEL: stack_fold_blcs_u64
    118   ;CHECK:       blcsq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
    119   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
    120   %2 = add i64 %a0, 1
    121   %3 = or i64 %a0, %2
    122   ret i64 %3
    123 }
    124 
    125 define i32 @stack_fold_blsfill_u32(i32 %a0) {
    126   ;CHECK-LABEL: stack_fold_blsfill_u32
    127   ;CHECK:       blsfilll {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
    128   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
    129   %2 = sub i32 %a0, 1
    130   %3 = or i32 %a0, %2
    131   ret i32 %3
    132 }
    133 
    134 define i64 @stack_fold_blsfill_u64(i64 %a0) {
    135   ;CHECK-LABEL: stack_fold_blsfill_u64
    136   ;CHECK:       blsfillq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
    137   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
    138   %2 = sub i64 %a0, 1
    139   %3 = or i64 %a0, %2
    140   ret i64 %3
    141 }
    142 
    143 define i32 @stack_fold_blsic_u32(i32 %a0) {
    144   ;CHECK-LABEL: stack_fold_blsic_u32
    145   ;CHECK:       blsicl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
    146   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
    147   %2 = sub i32 %a0, 1
    148   %3 = xor i32 %a0, -1
    149   %4 = or i32 %2, %3
    150   ret i32 %4
    151 }
    152 
    153 define i64 @stack_fold_blsic_u64(i64 %a0) {
    154   ;CHECK-LABEL: stack_fold_blsic_u64
    155   ;CHECK:       blsicq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
    156   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
    157   %2 = sub i64 %a0, 1
    158   %3 = xor i64 %a0, -1
    159   %4 = or i64 %2, %3
    160   ret i64 %4
    161 }
    162 
    163 define i32 @stack_fold_t1mskc_u32(i32 %a0) {
    164   ;CHECK-LABEL: stack_fold_t1mskc_u32
    165   ;CHECK:       t1mskcl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
    166   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
    167   %2 = add i32 %a0, 1
    168   %3 = xor i32 %a0, -1
    169   %4 = or i32 %2, %3
    170   ret i32 %4
    171 }
    172 
    173 define i64 @stack_fold_t1mskc_u64(i64 %a0) {
    174   ;CHECK-LABEL: stack_fold_t1mskc_u64
    175   ;CHECK:       t1mskcq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
    176   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
    177   %2 = add i64 %a0, 1
    178   %3 = xor i64 %a0, -1
    179   %4 = or i64 %2, %3
    180   ret i64 %4
    181 }
    182 
    183 define i32 @stack_fold_tzmsk_u32(i32 %a0) {
    184   ;CHECK-LABEL: stack_fold_tzmsk_u32
    185   ;CHECK:       tzmskl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload
    186   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
    187   %2 = sub i32 %a0, 1
    188   %3 = xor i32 %a0, -1
    189   %4 = and i32 %2, %3
    190   ret i32 %4
    191 }
    192 
    193 define i64 @stack_fold_tzmsk_u64(i64 %a0) {
    194   ;CHECK-LABEL: stack_fold_tzmsk_u64
    195   ;CHECK:       tzmskq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload
    196   %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
    197   %2 = sub i64 %a0, 1
    198   %3 = xor i64 %a0, -1
    199   %4 = and i64 %2, %3
    200   ret i64 %4
    201 }
    202