1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=X32 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=CHECK --check-prefix=X64 4 5 define <2 x i256> @test_shl(<2 x i256> %In) { 6 ; X32-LABEL: test_shl: 7 ; X32: # %bb.0: 8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 9 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 10 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 11 ; X32-NEXT: shldl $2, %edx, %ecx 12 ; X32-NEXT: movl %ecx, 60(%eax) 13 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 14 ; X32-NEXT: shldl $2, %ecx, %edx 15 ; X32-NEXT: movl %edx, 56(%eax) 16 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 17 ; X32-NEXT: shldl $2, %edx, %ecx 18 ; X32-NEXT: movl %ecx, 52(%eax) 19 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 20 ; X32-NEXT: shldl $2, %ecx, %edx 21 ; X32-NEXT: movl %edx, 48(%eax) 22 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 23 ; X32-NEXT: shldl $2, %edx, %ecx 24 ; X32-NEXT: movl %ecx, 44(%eax) 25 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 26 ; X32-NEXT: shldl $2, %ecx, %edx 27 ; X32-NEXT: movl %edx, 40(%eax) 28 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 29 ; X32-NEXT: shldl $2, %edx, %ecx 30 ; X32-NEXT: movl %ecx, 36(%eax) 31 ; X32-NEXT: shll $2, %edx 32 ; X32-NEXT: movl %edx, 32(%eax) 33 ; X32-NEXT: movl $0, 28(%eax) 34 ; X32-NEXT: movl $0, 24(%eax) 35 ; X32-NEXT: movl $0, 20(%eax) 36 ; X32-NEXT: movl $0, 16(%eax) 37 ; X32-NEXT: movl $0, 12(%eax) 38 ; X32-NEXT: movl $0, 8(%eax) 39 ; X32-NEXT: movl $0, 4(%eax) 40 ; X32-NEXT: movl $0, (%eax) 41 ; X32-NEXT: retl $4 42 ; 43 ; X64-LABEL: test_shl: 44 ; X64: # %bb.0: 45 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax 46 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx 47 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx 48 ; X64-NEXT: shldq $2, %rax, %rcx 49 ; X64-NEXT: shldq $2, %rdx, %rax 50 ; X64-NEXT: shldq $2, %r9, %rdx 51 ; X64-NEXT: shlq $2, %r9 52 ; X64-NEXT: movq %rcx, 56(%rdi) 53 ; X64-NEXT: movq %rax, 48(%rdi) 54 ; X64-NEXT: movq %rdx, 40(%rdi) 55 ; X64-NEXT: movq %r9, 32(%rdi) 56 ; X64-NEXT: xorps %xmm0, %xmm0 57 ; X64-NEXT: movaps %xmm0, 16(%rdi) 58 ; X64-NEXT: movaps %xmm0, (%rdi) 59 ; X64-NEXT: movq %rdi, %rax 60 ; X64-NEXT: retq 61 %Amt = insertelement <2 x i256> <i256 1, i256 2>, i256 -1, i32 0 62 %Out = shl <2 x i256> %In, %Amt 63 ret <2 x i256> %Out 64 } 65 66 define <2 x i256> @test_srl(<2 x i256> %In) { 67 ; X32-LABEL: test_srl: 68 ; X32: # %bb.0: 69 ; X32-NEXT: pushl %ebp 70 ; X32-NEXT: .cfi_def_cfa_offset 8 71 ; X32-NEXT: pushl %ebx 72 ; X32-NEXT: .cfi_def_cfa_offset 12 73 ; X32-NEXT: pushl %edi 74 ; X32-NEXT: .cfi_def_cfa_offset 16 75 ; X32-NEXT: pushl %esi 76 ; X32-NEXT: .cfi_def_cfa_offset 20 77 ; X32-NEXT: subl $8, %esp 78 ; X32-NEXT: .cfi_def_cfa_offset 28 79 ; X32-NEXT: .cfi_offset %esi, -20 80 ; X32-NEXT: .cfi_offset %edi, -16 81 ; X32-NEXT: .cfi_offset %ebx, -12 82 ; X32-NEXT: .cfi_offset %ebp, -8 83 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 84 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 85 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 86 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 87 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx 88 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp 89 ; X32-NEXT: movl %edx, %ecx 90 ; X32-NEXT: shldl $28, %eax, %ecx 91 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill 92 ; X32-NEXT: shldl $28, %esi, %eax 93 ; X32-NEXT: movl %eax, (%esp) # 4-byte Spill 94 ; X32-NEXT: shldl $28, %edi, %esi 95 ; X32-NEXT: shldl $28, %ebx, %edi 96 ; X32-NEXT: shldl $28, %ebp, %ebx 97 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 98 ; X32-NEXT: shldl $28, %eax, %ebp 99 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 100 ; X32-NEXT: shrdl $4, %eax, %ecx 101 ; X32-NEXT: shrl $4, %edx 102 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 103 ; X32-NEXT: movl %edx, 60(%eax) 104 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload 105 ; X32-NEXT: movl %edx, 56(%eax) 106 ; X32-NEXT: movl (%esp), %edx # 4-byte Reload 107 ; X32-NEXT: movl %edx, 52(%eax) 108 ; X32-NEXT: movl %esi, 48(%eax) 109 ; X32-NEXT: movl %edi, 44(%eax) 110 ; X32-NEXT: movl %ebx, 40(%eax) 111 ; X32-NEXT: movl %ebp, 36(%eax) 112 ; X32-NEXT: movl %ecx, 32(%eax) 113 ; X32-NEXT: movl $0, 28(%eax) 114 ; X32-NEXT: movl $0, 24(%eax) 115 ; X32-NEXT: movl $0, 20(%eax) 116 ; X32-NEXT: movl $0, 16(%eax) 117 ; X32-NEXT: movl $0, 12(%eax) 118 ; X32-NEXT: movl $0, 8(%eax) 119 ; X32-NEXT: movl $0, 4(%eax) 120 ; X32-NEXT: movl $0, (%eax) 121 ; X32-NEXT: addl $8, %esp 122 ; X32-NEXT: .cfi_def_cfa_offset 20 123 ; X32-NEXT: popl %esi 124 ; X32-NEXT: .cfi_def_cfa_offset 16 125 ; X32-NEXT: popl %edi 126 ; X32-NEXT: .cfi_def_cfa_offset 12 127 ; X32-NEXT: popl %ebx 128 ; X32-NEXT: .cfi_def_cfa_offset 8 129 ; X32-NEXT: popl %ebp 130 ; X32-NEXT: .cfi_def_cfa_offset 4 131 ; X32-NEXT: retl $4 132 ; 133 ; X64-LABEL: test_srl: 134 ; X64: # %bb.0: 135 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax 136 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx 137 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx 138 ; X64-NEXT: shrdq $4, %rdx, %r9 139 ; X64-NEXT: shrdq $4, %rax, %rdx 140 ; X64-NEXT: shrdq $4, %rcx, %rax 141 ; X64-NEXT: shrq $4, %rcx 142 ; X64-NEXT: movq %rcx, 56(%rdi) 143 ; X64-NEXT: movq %rax, 48(%rdi) 144 ; X64-NEXT: movq %rdx, 40(%rdi) 145 ; X64-NEXT: movq %r9, 32(%rdi) 146 ; X64-NEXT: xorps %xmm0, %xmm0 147 ; X64-NEXT: movaps %xmm0, 16(%rdi) 148 ; X64-NEXT: movaps %xmm0, (%rdi) 149 ; X64-NEXT: movq %rdi, %rax 150 ; X64-NEXT: retq 151 %Amt = insertelement <2 x i256> <i256 3, i256 4>, i256 -1, i32 0 152 %Out = lshr <2 x i256> %In, %Amt 153 ret <2 x i256> %Out 154 } 155 156 define <2 x i256> @test_sra(<2 x i256> %In) { 157 ; X32-LABEL: test_sra: 158 ; X32: # %bb.0: 159 ; X32-NEXT: pushl %ebp 160 ; X32-NEXT: .cfi_def_cfa_offset 8 161 ; X32-NEXT: pushl %ebx 162 ; X32-NEXT: .cfi_def_cfa_offset 12 163 ; X32-NEXT: pushl %edi 164 ; X32-NEXT: .cfi_def_cfa_offset 16 165 ; X32-NEXT: pushl %esi 166 ; X32-NEXT: .cfi_def_cfa_offset 20 167 ; X32-NEXT: subl $8, %esp 168 ; X32-NEXT: .cfi_def_cfa_offset 28 169 ; X32-NEXT: .cfi_offset %esi, -20 170 ; X32-NEXT: .cfi_offset %edi, -16 171 ; X32-NEXT: .cfi_offset %ebx, -12 172 ; X32-NEXT: .cfi_offset %ebp, -8 173 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 174 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 175 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 176 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edi 177 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx 178 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ebp 179 ; X32-NEXT: movl %edx, %ecx 180 ; X32-NEXT: shldl $26, %eax, %ecx 181 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) # 4-byte Spill 182 ; X32-NEXT: shldl $26, %esi, %eax 183 ; X32-NEXT: movl %eax, (%esp) # 4-byte Spill 184 ; X32-NEXT: shldl $26, %edi, %esi 185 ; X32-NEXT: shldl $26, %ebx, %edi 186 ; X32-NEXT: shldl $26, %ebp, %ebx 187 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 188 ; X32-NEXT: shldl $26, %eax, %ebp 189 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 190 ; X32-NEXT: shrdl $6, %eax, %ecx 191 ; X32-NEXT: sarl $6, %edx 192 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 193 ; X32-NEXT: movl %edx, 60(%eax) 194 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx # 4-byte Reload 195 ; X32-NEXT: movl %edx, 56(%eax) 196 ; X32-NEXT: movl (%esp), %edx # 4-byte Reload 197 ; X32-NEXT: movl %edx, 52(%eax) 198 ; X32-NEXT: movl %esi, 48(%eax) 199 ; X32-NEXT: movl %edi, 44(%eax) 200 ; X32-NEXT: movl %ebx, 40(%eax) 201 ; X32-NEXT: movl %ebp, 36(%eax) 202 ; X32-NEXT: movl %ecx, 32(%eax) 203 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 204 ; X32-NEXT: sarl $31, %ecx 205 ; X32-NEXT: movl %ecx, 28(%eax) 206 ; X32-NEXT: movl %ecx, 24(%eax) 207 ; X32-NEXT: movl %ecx, 20(%eax) 208 ; X32-NEXT: movl %ecx, 16(%eax) 209 ; X32-NEXT: movl %ecx, 12(%eax) 210 ; X32-NEXT: movl %ecx, 8(%eax) 211 ; X32-NEXT: movl %ecx, 4(%eax) 212 ; X32-NEXT: movl %ecx, (%eax) 213 ; X32-NEXT: addl $8, %esp 214 ; X32-NEXT: .cfi_def_cfa_offset 20 215 ; X32-NEXT: popl %esi 216 ; X32-NEXT: .cfi_def_cfa_offset 16 217 ; X32-NEXT: popl %edi 218 ; X32-NEXT: .cfi_def_cfa_offset 12 219 ; X32-NEXT: popl %ebx 220 ; X32-NEXT: .cfi_def_cfa_offset 8 221 ; X32-NEXT: popl %ebp 222 ; X32-NEXT: .cfi_def_cfa_offset 4 223 ; X32-NEXT: retl $4 224 ; 225 ; X64-LABEL: test_sra: 226 ; X64: # %bb.0: 227 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rax 228 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rcx 229 ; X64-NEXT: movq {{[0-9]+}}(%rsp), %rdx 230 ; X64-NEXT: shrdq $6, %rdx, %r9 231 ; X64-NEXT: shrdq $6, %rax, %rdx 232 ; X64-NEXT: sarq $63, %r8 233 ; X64-NEXT: shrdq $6, %rcx, %rax 234 ; X64-NEXT: sarq $6, %rcx 235 ; X64-NEXT: movq %rcx, 56(%rdi) 236 ; X64-NEXT: movq %rax, 48(%rdi) 237 ; X64-NEXT: movq %rdx, 40(%rdi) 238 ; X64-NEXT: movq %r9, 32(%rdi) 239 ; X64-NEXT: movq %r8, 24(%rdi) 240 ; X64-NEXT: movq %r8, 16(%rdi) 241 ; X64-NEXT: movq %r8, 8(%rdi) 242 ; X64-NEXT: movq %r8, (%rdi) 243 ; X64-NEXT: movq %rdi, %rax 244 ; X64-NEXT: retq 245 %Amt = insertelement <2 x i256> <i256 5, i256 6>, i256 -1, i32 0 246 %Out = ashr <2 x i256> %In, %Amt 247 ret <2 x i256> %Out 248 } 249