1 ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+bmi,+tbm < %s | FileCheck %s 2 3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 target triple = "x86_64-unknown-unknown" 5 6 ; Stack reload folding tests. 7 ; 8 ; By including a nop call with sideeffects we can force a partial register spill of the 9 ; relevant registers and check that the reload is correctly folded into the instruction. 10 11 define i32 @stack_fold_bextri_u32(i32 %a0) { 12 ;CHECK-LABEL: stack_fold_bextri_u32 13 ;CHECK: # %bb.0: 14 ;CHECK: bextrl $3841, {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 15 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 16 %2 = tail call i32 @llvm.x86.tbm.bextri.u32(i32 %a0, i32 3841) 17 ret i32 %2 18 } 19 declare i32 @llvm.x86.tbm.bextri.u32(i32, i32) 20 21 define i64 @stack_fold_bextri_u64(i64 %a0) { 22 ;CHECK-LABEL: stack_fold_bextri_u64 23 ;CHECK: # %bb.0: 24 ;CHECK: bextrq $3841, {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 25 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 26 %2 = tail call i64 @llvm.x86.tbm.bextri.u64(i64 %a0, i64 3841) 27 ret i64 %2 28 } 29 declare i64 @llvm.x86.tbm.bextri.u64(i64, i64) 30 31 define i32 @stack_fold_blcfill_u32(i32 %a0) { 32 ;CHECK-LABEL: stack_fold_blcfill_u32 33 ;CHECK: blcfilll {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 34 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 35 %2 = add i32 %a0, 1 36 %3 = and i32 %a0, %2 37 ret i32 %3 38 } 39 40 define i64 @stack_fold_blcfill_u64(i64 %a0) { 41 ;CHECK-LABEL: stack_fold_blcfill_u64 42 ;CHECK: blcfillq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 43 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 44 %2 = add i64 %a0, 1 45 %3 = and i64 %a0, %2 46 ret i64 %3 47 } 48 49 define i32 @stack_fold_blci_u32(i32 %a0) { 50 ;CHECK-LABEL: stack_fold_blci_u32 51 ;CHECK: blcil {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 52 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 53 %2 = add i32 %a0, 1 54 %3 = xor i32 %2, -1 55 %4 = or i32 %a0, %3 56 ret i32 %4 57 } 58 59 define i64 @stack_fold_blci_u64(i64 %a0) { 60 ;CHECK-LABEL: stack_fold_blci_u64 61 ;CHECK: blciq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 62 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 63 %2 = add i64 %a0, 1 64 %3 = xor i64 %2, -1 65 %4 = or i64 %a0, %3 66 ret i64 %4 67 } 68 69 define i32 @stack_fold_blcic_u32(i32 %a0) { 70 ;CHECK-LABEL: stack_fold_blcic_u32 71 ;CHECK: blcicl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 72 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 73 %2 = add i32 %a0, 1 74 %3 = xor i32 %a0, -1 75 %4 = and i32 %2, %3 76 ret i32 %4 77 } 78 79 define i64 @stack_fold_blcic_u64(i64 %a0) { 80 ;CHECK-LABEL: stack_fold_blcic_u64 81 ;CHECK: blcicq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 82 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 83 %2 = add i64 %a0, 1 84 %3 = xor i64 %a0, -1 85 %4 = and i64 %2, %3 86 ret i64 %4 87 } 88 89 define i32 @stack_fold_blcmsk_u32(i32 %a0) { 90 ;CHECK-LABEL: stack_fold_blcmsk_u32 91 ;CHECK: blcmskl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 92 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 93 %2 = add i32 %a0, 1 94 %3 = xor i32 %a0, %2 95 ret i32 %3 96 } 97 98 define i64 @stack_fold_blcmsk_u64(i64 %a0) { 99 ;CHECK-LABEL: stack_fold_blcmsk_u64 100 ;CHECK: blcmskq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 101 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 102 %2 = add i64 %a0, 1 103 %3 = xor i64 %a0, %2 104 ret i64 %3 105 } 106 107 define i32 @stack_fold_blcs_u32(i32 %a0) { 108 ;CHECK-LABEL: stack_fold_blcs_u32 109 ;CHECK: blcsl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 110 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 111 %2 = add i32 %a0, 1 112 %3 = or i32 %a0, %2 113 ret i32 %3 114 } 115 116 define i64 @stack_fold_blcs_u64(i64 %a0) { 117 ;CHECK-LABEL: stack_fold_blcs_u64 118 ;CHECK: blcsq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 119 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 120 %2 = add i64 %a0, 1 121 %3 = or i64 %a0, %2 122 ret i64 %3 123 } 124 125 define i32 @stack_fold_blsfill_u32(i32 %a0) { 126 ;CHECK-LABEL: stack_fold_blsfill_u32 127 ;CHECK: blsfilll {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 128 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 129 %2 = sub i32 %a0, 1 130 %3 = or i32 %a0, %2 131 ret i32 %3 132 } 133 134 define i64 @stack_fold_blsfill_u64(i64 %a0) { 135 ;CHECK-LABEL: stack_fold_blsfill_u64 136 ;CHECK: blsfillq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 137 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 138 %2 = sub i64 %a0, 1 139 %3 = or i64 %a0, %2 140 ret i64 %3 141 } 142 143 define i32 @stack_fold_blsic_u32(i32 %a0) { 144 ;CHECK-LABEL: stack_fold_blsic_u32 145 ;CHECK: blsicl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 146 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 147 %2 = sub i32 %a0, 1 148 %3 = xor i32 %a0, -1 149 %4 = or i32 %2, %3 150 ret i32 %4 151 } 152 153 define i64 @stack_fold_blsic_u64(i64 %a0) { 154 ;CHECK-LABEL: stack_fold_blsic_u64 155 ;CHECK: blsicq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 156 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 157 %2 = sub i64 %a0, 1 158 %3 = xor i64 %a0, -1 159 %4 = or i64 %2, %3 160 ret i64 %4 161 } 162 163 define i32 @stack_fold_t1mskc_u32(i32 %a0) { 164 ;CHECK-LABEL: stack_fold_t1mskc_u32 165 ;CHECK: t1mskcl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 166 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 167 %2 = add i32 %a0, 1 168 %3 = xor i32 %a0, -1 169 %4 = or i32 %2, %3 170 ret i32 %4 171 } 172 173 define i64 @stack_fold_t1mskc_u64(i64 %a0) { 174 ;CHECK-LABEL: stack_fold_t1mskc_u64 175 ;CHECK: t1mskcq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 176 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 177 %2 = add i64 %a0, 1 178 %3 = xor i64 %a0, -1 179 %4 = or i64 %2, %3 180 ret i64 %4 181 } 182 183 define i32 @stack_fold_tzmsk_u32(i32 %a0) { 184 ;CHECK-LABEL: stack_fold_tzmsk_u32 185 ;CHECK: tzmskl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 186 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 187 %2 = sub i32 %a0, 1 188 %3 = xor i32 %a0, -1 189 %4 = and i32 %2, %3 190 ret i32 %4 191 } 192 193 define i64 @stack_fold_tzmsk_u64(i64 %a0) { 194 ;CHECK-LABEL: stack_fold_tzmsk_u64 195 ;CHECK: tzmskq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 196 %1 = tail call i64 asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 197 %2 = sub i64 %a0, 1 198 %3 = xor i64 %a0, -1 199 %4 = and i64 %2, %3 200 ret i64 %4 201 } 202