1 ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+aes,+pclmul < %s | FileCheck %s 2 3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 target triple = "x86_64-unknown-unknown" 5 6 ; Stack reload folding tests. 7 ; 8 ; By including a nop call with sideeffects we can force a partial register spill of the 9 ; relevant registers and check that the reload is correctly folded into the instruction. 10 11 define <2 x i64> @stack_fold_aesdec(<2 x i64> %a0, <2 x i64> %a1) { 12 ;CHECK-LABEL: stack_fold_aesdec 13 ;CHECK: aesdec {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 15 %2 = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1) 16 ret <2 x i64> %2 17 } 18 declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone 19 20 define <2 x i64> @stack_fold_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) { 21 ;CHECK-LABEL: stack_fold_aesdeclast 22 ;CHECK: aesdeclast {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 23 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 24 %2 = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1) 25 ret <2 x i64> %2 26 } 27 declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone 28 29 define <2 x i64> @stack_fold_aesenc(<2 x i64> %a0, <2 x i64> %a1) { 30 ;CHECK-LABEL: stack_fold_aesenc 31 ;CHECK: aesenc {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 32 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 33 %2 = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1) 34 ret <2 x i64> %2 35 } 36 declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone 37 38 define <2 x i64> @stack_fold_aesenclast(<2 x i64> %a0, <2 x i64> %a1) { 39 ;CHECK-LABEL: stack_fold_aesenclast 40 ;CHECK: aesenclast {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 41 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 42 %2 = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1) 43 ret <2 x i64> %2 44 } 45 declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone 46 47 define <2 x i64> @stack_fold_aesimc(<2 x i64> %a0) { 48 ;CHECK-LABEL: stack_fold_aesimc 49 ;CHECK: aesimc {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 50 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 51 %2 = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0) 52 ret <2 x i64> %2 53 } 54 declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone 55 56 define <2 x i64> @stack_fold_aeskeygenassist(<2 x i64> %a0) { 57 ;CHECK-LABEL: stack_fold_aeskeygenassist 58 ;CHECK: aeskeygenassist $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 59 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 60 %2 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7) 61 ret <2 x i64> %2 62 } 63 declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone 64 65 define i32 @stack_fold_crc32_32_8(i32 %a0, i8 %a1) { 66 ;CHECK-LABEL: stack_fold_crc32_32_8 67 ;CHECK: crc32b {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 1-byte Folded Reload 68 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 69 %2 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1) 70 ret i32 %2 71 } 72 declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind 73 74 define i32 @stack_fold_crc32_32_16(i32 %a0, i16 %a1) { 75 ;CHECK-LABEL: stack_fold_crc32_32_16 76 ;CHECK: crc32w {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 2-byte Folded Reload 77 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 78 %2 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1) 79 ret i32 %2 80 } 81 declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind 82 83 define i32 @stack_fold_crc32_32_32(i32 %a0, i32 %a1) { 84 ;CHECK-LABEL: stack_fold_crc32_32_32 85 ;CHECK: crc32l {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Folded Reload 86 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 87 %2 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1) 88 ret i32 %2 89 } 90 declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind 91 92 define i64 @stack_fold_crc32_64_64(i64 %a0, i64 %a1) { 93 ;CHECK-LABEL: stack_fold_crc32_64_64 94 ;CHECK: crc32q {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Folded Reload 95 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 96 %2 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1) 97 ret i64 %2 98 } 99 declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind 100 101 define <4 x i32> @stack_fold_movd_load(i32 %a0) { 102 ;CHECK-LABEL: stack_fold_movd_load 103 ;CHECK: movd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 104 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 105 %2 = insertelement <4 x i32> zeroinitializer, i32 %a0, i32 0 106 ; add forces execution domain 107 %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1> 108 ret <4 x i32> %3 109 } 110 111 define i32 @stack_fold_movd_store(<4 x i32> %a0) { 112 ;CHECK-LABEL: stack_fold_movd_store 113 ;CHECK: movd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill 114 ; add forces execution domain 115 %1 = add <4 x i32> %a0, <i32 1, i32 1, i32 1, i32 1> 116 %2 = extractelement <4 x i32> %1, i32 0 117 %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 118 ret i32 %2 119 } 120 121 define <2 x i64> @stack_fold_movq_load(<2 x i64> %a0) { 122 ;CHECK-LABEL: stack_fold_movq_load 123 ;CHECK: movq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 124 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 125 %2 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2> 126 ; add forces execution domain 127 %3 = add <2 x i64> %2, <i64 1, i64 1> 128 ret <2 x i64> %3 129 } 130 131 define i64 @stack_fold_movq_store(<2 x i64> %a0) { 132 ;CHECK-LABEL: stack_fold_movq_store 133 ;CHECK: movq {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Spill 134 ; add forces execution domain 135 %1 = add <2 x i64> %a0, <i64 1, i64 1> 136 %2 = extractelement <2 x i64> %1, i32 0 137 %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 138 ret i64 %2 139 } 140 141 define <8 x i16> @stack_fold_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) { 142 ;CHECK-LABEL: stack_fold_mpsadbw 143 ;CHECK: mpsadbw $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 144 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 145 %2 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7) 146 ret <8 x i16> %2 147 } 148 declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone 149 150 define <16 x i8> @stack_fold_pabsb(<16 x i8> %a0) { 151 ;CHECK-LABEL: stack_fold_pabsb 152 ;CHECK: pabsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 153 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 154 %2 = icmp sgt <16 x i8> %a0, zeroinitializer 155 %3 = sub <16 x i8> zeroinitializer, %a0 156 %4 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %3 157 ret <16 x i8> %4 158 } 159 160 define <4 x i32> @stack_fold_pabsd(<4 x i32> %a0) { 161 ;CHECK-LABEL: stack_fold_pabsd 162 ;CHECK: pabsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 163 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 164 %2 = icmp sgt <4 x i32> %a0, zeroinitializer 165 %3 = sub <4 x i32> zeroinitializer, %a0 166 %4 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %3 167 ret <4 x i32> %4 168 } 169 170 define <8 x i16> @stack_fold_pabsw(<8 x i16> %a0) { 171 ;CHECK-LABEL: stack_fold_pabsw 172 ;CHECK: pabsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 173 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 174 %2 = icmp sgt <8 x i16> %a0, zeroinitializer 175 %3 = sub <8 x i16> zeroinitializer, %a0 176 %4 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %3 177 ret <8 x i16> %4 178 } 179 180 define <8 x i16> @stack_fold_packssdw(<4 x i32> %a0, <4 x i32> %a1) { 181 ;CHECK-LABEL: stack_fold_packssdw 182 ;CHECK: packssdw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 183 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 184 %2 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1) 185 ret <8 x i16> %2 186 } 187 declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone 188 189 define <16 x i8> @stack_fold_packsswb(<8 x i16> %a0, <8 x i16> %a1) { 190 ;CHECK-LABEL: stack_fold_packsswb 191 ;CHECK: packsswb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 192 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 193 %2 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1) 194 ret <16 x i8> %2 195 } 196 declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone 197 198 define <8 x i16> @stack_fold_packusdw(<4 x i32> %a0, <4 x i32> %a1) { 199 ;CHECK-LABEL: stack_fold_packusdw 200 ;CHECK: packusdw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 201 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 202 %2 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1) 203 ret <8 x i16> %2 204 } 205 declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone 206 207 define <16 x i8> @stack_fold_packuswb(<8 x i16> %a0, <8 x i16> %a1) { 208 ;CHECK-LABEL: stack_fold_packuswb 209 ;CHECK: packuswb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 210 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 211 %2 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1) 212 ret <16 x i8> %2 213 } 214 declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone 215 216 define <16 x i8> @stack_fold_paddb(<16 x i8> %a0, <16 x i8> %a1) { 217 ;CHECK-LABEL: stack_fold_paddb 218 ;CHECK: paddb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 219 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 220 %2 = add <16 x i8> %a0, %a1 221 ret <16 x i8> %2 222 } 223 224 define <4 x i32> @stack_fold_paddd(<4 x i32> %a0, <4 x i32> %a1) { 225 ;CHECK-LABEL: stack_fold_paddd 226 ;CHECK: paddd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 227 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 228 %2 = add <4 x i32> %a0, %a1 229 ret <4 x i32> %2 230 } 231 232 define <2 x i64> @stack_fold_paddq(<2 x i64> %a0, <2 x i64> %a1) { 233 ;CHECK-LABEL: stack_fold_paddq 234 ;CHECK: paddq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 235 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 236 %2 = add <2 x i64> %a0, %a1 237 ret <2 x i64> %2 238 } 239 240 define <16 x i8> @stack_fold_paddsb(<16 x i8> %a0, <16 x i8> %a1) { 241 ;CHECK-LABEL: stack_fold_paddsb 242 ;CHECK: paddsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 243 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 244 %2 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) 245 ret <16 x i8> %2 246 } 247 declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone 248 249 define <8 x i16> @stack_fold_paddsw(<8 x i16> %a0, <8 x i16> %a1) { 250 ;CHECK-LABEL: stack_fold_paddsw 251 ;CHECK: paddsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 252 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 253 %2 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) 254 ret <8 x i16> %2 255 } 256 declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone 257 258 define <16 x i8> @stack_fold_paddusb(<16 x i8> %a0, <16 x i8> %a1) { 259 ;CHECK-LABEL: stack_fold_paddusb 260 ;CHECK: paddusb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 261 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 262 %2 = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %a0, <16 x i8> %a1) 263 ret <16 x i8> %2 264 } 265 declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnone 266 267 define <8 x i16> @stack_fold_paddusw(<8 x i16> %a0, <8 x i16> %a1) { 268 ;CHECK-LABEL: stack_fold_paddusw 269 ;CHECK: paddusw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 270 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 271 %2 = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %a0, <8 x i16> %a1) 272 ret <8 x i16> %2 273 } 274 declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone 275 276 define <8 x i16> @stack_fold_paddw(<8 x i16> %a0, <8 x i16> %a1) { 277 ;CHECK-LABEL: stack_fold_paddw 278 ;CHECK: paddw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 279 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 280 %2 = add <8 x i16> %a0, %a1 281 ret <8 x i16> %2 282 } 283 284 define <16 x i8> @stack_fold_palignr(<16 x i8> %a0, <16 x i8> %a1) { 285 ;CHECK-LABEL: stack_fold_palignr 286 ;CHECK: palignr $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 287 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 288 %2 = shufflevector <16 x i8> %a1, <16 x i8> %a0, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> 289 ret <16 x i8> %2 290 } 291 292 define <16 x i8> @stack_fold_pand(<16 x i8> %a0, <16 x i8> %a1) { 293 ;CHECK-LABEL: stack_fold_pand 294 ;CHECK: pand {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 295 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 296 %2 = and <16 x i8> %a0, %a1 297 ; add forces execution domain 298 %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 299 ret <16 x i8> %3 300 } 301 302 define <16 x i8> @stack_fold_pandn(<16 x i8> %a0, <16 x i8> %a1) { 303 ;CHECK-LABEL: stack_fold_pandn 304 ;CHECK: pandn {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 305 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 306 %2 = xor <16 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 307 %3 = and <16 x i8> %2, %a1 308 ; add forces execution domain 309 %4 = add <16 x i8> %3, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 310 ret <16 x i8> %4 311 } 312 313 define <16 x i8> @stack_fold_pavgb(<16 x i8> %a0, <16 x i8> %a1) { 314 ;CHECK-LABEL: stack_fold_pavgb 315 ;CHECK: pavgb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 316 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 317 %2 = zext <16 x i8> %a0 to <16 x i16> 318 %3 = zext <16 x i8> %a1 to <16 x i16> 319 %4 = add <16 x i16> %2, %3 320 %5 = add <16 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 321 %6 = lshr <16 x i16> %5, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 322 %7 = trunc <16 x i16> %6 to <16 x i8> 323 ret <16 x i8> %7 324 } 325 326 define <8 x i16> @stack_fold_pavgw(<8 x i16> %a0, <8 x i16> %a1) { 327 ;CHECK-LABEL: stack_fold_pavgw 328 ;CHECK: pavgw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 329 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 330 %2 = zext <8 x i16> %a0 to <8 x i32> 331 %3 = zext <8 x i16> %a1 to <8 x i32> 332 %4 = add <8 x i32> %2, %3 333 %5 = add <8 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 334 %6 = lshr <8 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 335 %7 = trunc <8 x i32> %6 to <8 x i16> 336 ret <8 x i16> %7 337 } 338 339 define <16 x i8> @stack_fold_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %c) { 340 ;CHECK-LABEL: stack_fold_pblendvb 341 ;CHECK: pblendvb %xmm0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 342 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 343 %2 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a1, <16 x i8> %c, <16 x i8> %a0) 344 ret <16 x i8> %2 345 } 346 declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 347 348 define <8 x i16> @stack_fold_pblendw(<8 x i16> %a0, <8 x i16> %a1) { 349 ;CHECK-LABEL: stack_fold_pblendw 350 ;CHECK: pblendw $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 351 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 352 %2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7> 353 ret <8 x i16> %2 354 } 355 356 define <2 x i64> @stack_fold_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) { 357 ;CHECK-LABEL: stack_fold_pclmulqdq 358 ;CHECK: pclmulqdq $0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 359 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 360 %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0) 361 ret <2 x i64> %2 362 } 363 declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone 364 365 define <16 x i8> @stack_fold_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1) { 366 ;CHECK-LABEL: stack_fold_pcmpeqb 367 ;CHECK: pcmpeqb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 368 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 369 %2 = icmp eq <16 x i8> %a0, %a1 370 %3 = sext <16 x i1> %2 to <16 x i8> 371 ret <16 x i8> %3 372 } 373 374 define <4 x i32> @stack_fold_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1) { 375 ;CHECK-LABEL: stack_fold_pcmpeqd 376 ;CHECK: pcmpeqd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 377 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 378 %2 = icmp eq <4 x i32> %a0, %a1 379 %3 = sext <4 x i1> %2 to <4 x i32> 380 ret <4 x i32> %3 381 } 382 383 define <2 x i64> @stack_fold_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) { 384 ;CHECK-LABEL: stack_fold_pcmpeqq 385 ;CHECK: pcmpeqq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 386 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 387 %2 = icmp eq <2 x i64> %a0, %a1 388 %3 = sext <2 x i1> %2 to <2 x i64> 389 ret <2 x i64> %3 390 } 391 392 define <8 x i16> @stack_fold_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1) { 393 ;CHECK-LABEL: stack_fold_pcmpeqw 394 ;CHECK: pcmpeqw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 395 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 396 %2 = icmp eq <8 x i16> %a0, %a1 397 %3 = sext <8 x i1> %2 to <8 x i16> 398 ret <8 x i16> %3 399 } 400 401 define i32 @stack_fold_pcmpestri(<16 x i8> %a0, <16 x i8> %a1) { 402 ;CHECK-LABEL: stack_fold_pcmpestri 403 ;CHECK: pcmpestri $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 404 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"() 405 %2 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7) 406 ret i32 %2 407 } 408 declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 409 410 define <16 x i8> @stack_fold_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1) { 411 ;CHECK-LABEL: stack_fold_pcmpestrm 412 ;CHECK: pcmpestrm $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 413 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"() 414 %2 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7) 415 ret <16 x i8> %2 416 } 417 declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone 418 419 define <16 x i8> @stack_fold_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1) { 420 ;CHECK-LABEL: stack_fold_pcmpgtb 421 ;CHECK: pcmpgtb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 422 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 423 %2 = icmp sgt <16 x i8> %a0, %a1 424 %3 = sext <16 x i1> %2 to <16 x i8> 425 ret <16 x i8> %3 426 } 427 428 define <4 x i32> @stack_fold_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1) { 429 ;CHECK-LABEL: stack_fold_pcmpgtd 430 ;CHECK: pcmpgtd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 431 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 432 %2 = icmp sgt <4 x i32> %a0, %a1 433 %3 = sext <4 x i1> %2 to <4 x i32> 434 ret <4 x i32> %3 435 } 436 437 define <2 x i64> @stack_fold_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) { 438 ;CHECK-LABEL: stack_fold_pcmpgtq 439 ;CHECK: pcmpgtq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 440 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 441 %2 = icmp sgt <2 x i64> %a0, %a1 442 %3 = sext <2 x i1> %2 to <2 x i64> 443 ret <2 x i64> %3 444 } 445 446 define <8 x i16> @stack_fold_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1) { 447 ;CHECK-LABEL: stack_fold_pcmpgtw 448 ;CHECK: pcmpgtw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 449 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 450 %2 = icmp sgt <8 x i16> %a0, %a1 451 %3 = sext <8 x i1> %2 to <8 x i16> 452 ret <8 x i16> %3 453 } 454 455 define i32 @stack_fold_pcmpistri(<16 x i8> %a0, <16 x i8> %a1) { 456 ;CHECK-LABEL: stack_fold_pcmpistri 457 ;CHECK: pcmpistri $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 458 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 459 %2 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7) 460 ret i32 %2 461 } 462 declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone 463 464 define <16 x i8> @stack_fold_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1) { 465 ;CHECK-LABEL: stack_fold_pcmpistrm 466 ;CHECK: pcmpistrm $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 467 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 468 %2 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7) 469 ret <16 x i8> %2 470 } 471 declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone 472 473 ; TODO stack_fold_pextrb 474 475 ; We can't naively fold pextrw as it only writes to a 16-bit memory location 476 ; even though it can store to a 32-bit register. 477 define i16 @stack_fold_pextrw(<8 x i16> %a0) { 478 ; CHECK-LABEL: stack_fold_pextrw 479 ; CHECK: pextrw $1, {{%xmm[0-9][0-9]*}}, %[[GPR32:(e[a-z]+|r[0-9]+d)]] 480 ; CHECK: movl %[[GPR32]], {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Spill 481 ; CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload 482 entry: 483 ; add forces execution domain 484 %add = add <8 x i16> %a0, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8> 485 %extract = extractelement <8 x i16> %add, i32 1 486 %asm = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 487 ret i16 %extract 488 } 489 490 define i32 @stack_fold_pextrd(<4 x i32> %a0) { 491 ;CHECK-LABEL: stack_fold_pextrd 492 ;CHECK: pextrd $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 4-byte Folded Spill 493 ;CHECK: movl {{-?[0-9]*}}(%rsp), %eax {{.*#+}} 4-byte Reload 494 ; add forces execution domain 495 %1 = add <4 x i32> %a0, <i32 1, i32 2, i32 3, i32 4> 496 %2 = extractelement <4 x i32> %1, i32 1 497 %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 498 ret i32 %2 499 } 500 501 define i64 @stack_fold_pextrq(<2 x i64> %a0) { 502 ;CHECK-LABEL: stack_fold_pextrq 503 ;CHECK: pextrq $1, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp) {{.*#+}} 8-byte Folded Spill 504 ;CHECK: movq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 8-byte Reload 505 %1 = extractelement <2 x i64> %a0, i32 1 506 %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 507 ret i64 %1 508 } 509 510 define <4 x i32> @stack_fold_phaddd(<4 x i32> %a0, <4 x i32> %a1) { 511 ;CHECK-LABEL: stack_fold_phaddd 512 ;CHECK: phaddd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 513 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 514 %2 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) 515 ret <4 x i32> %2 516 } 517 declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone 518 519 define <8 x i16> @stack_fold_phaddsw(<8 x i16> %a0, <8 x i16> %a1) { 520 ;CHECK-LABEL: stack_fold_phaddsw 521 ;CHECK: phaddsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 522 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 523 %2 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) 524 ret <8 x i16> %2 525 } 526 declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 527 528 define <8 x i16> @stack_fold_phaddw(<8 x i16> %a0, <8 x i16> %a1) { 529 ;CHECK-LABEL: stack_fold_phaddw 530 ;CHECK: phaddw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 531 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 532 %2 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) 533 ret <8 x i16> %2 534 } 535 declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone 536 537 define <8 x i16> @stack_fold_phminposuw(<8 x i16> %a0) { 538 ;CHECK-LABEL: stack_fold_phminposuw 539 ;CHECK: phminposuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 540 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 541 %2 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0) 542 ret <8 x i16> %2 543 } 544 declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone 545 546 define <4 x i32> @stack_fold_phsubd(<4 x i32> %a0, <4 x i32> %a1) { 547 ;CHECK-LABEL: stack_fold_phsubd 548 ;CHECK: phsubd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 549 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 550 %2 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) 551 ret <4 x i32> %2 552 } 553 declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone 554 555 define <8 x i16> @stack_fold_phsubsw(<8 x i16> %a0, <8 x i16> %a1) { 556 ;CHECK-LABEL: stack_fold_phsubsw 557 ;CHECK: phsubsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 558 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 559 %2 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) 560 ret <8 x i16> %2 561 } 562 declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 563 564 define <8 x i16> @stack_fold_phsubw(<8 x i16> %a0, <8 x i16> %a1) { 565 ;CHECK-LABEL: stack_fold_phsubw 566 ;CHECK: phsubw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 567 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 568 %2 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) 569 ret <8 x i16> %2 570 } 571 declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone 572 573 define <16 x i8> @stack_fold_pinsrb(<16 x i8> %a0, i8 %a1) { 574 ;CHECK-LABEL: stack_fold_pinsrb 575 ;CHECK: pinsrb $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 576 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 577 %2 = insertelement <16 x i8> %a0, i8 %a1, i32 1 578 ret <16 x i8> %2 579 } 580 581 define <4 x i32> @stack_fold_pinsrd(<4 x i32> %a0, i32 %a1) { 582 ;CHECK-LABEL: stack_fold_pinsrd 583 ;CHECK: pinsrd $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 584 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 585 %2 = insertelement <4 x i32> %a0, i32 %a1, i32 1 586 ret <4 x i32> %2 587 } 588 589 define <2 x i64> @stack_fold_pinsrq(<2 x i64> %a0, i64 %a1) { 590 ;CHECK-LABEL: stack_fold_pinsrq 591 ;CHECK: pinsrq $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 8-byte Folded Reload 592 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 593 %2 = insertelement <2 x i64> %a0, i64 %a1, i32 1 594 ret <2 x i64> %2 595 } 596 597 define <8 x i16> @stack_fold_pinsrw(<8 x i16> %a0, i16 %a1) { 598 ;CHECK-LABEL: stack_fold_pinsrw 599 ;CHECK: pinsrw $1, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 4-byte Folded Reload 600 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"() 601 %2 = insertelement <8 x i16> %a0, i16 %a1, i32 1 602 ret <8 x i16> %2 603 } 604 605 define <8 x i16> @stack_fold_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1) { 606 ;CHECK-LABEL: stack_fold_pmaddubsw 607 ;CHECK: pmaddubsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 608 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 609 %2 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) 610 ret <8 x i16> %2 611 } 612 declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone 613 614 define <4 x i32> @stack_fold_pmaddwd(<8 x i16> %a0, <8 x i16> %a1) { 615 ;CHECK-LABEL: stack_fold_pmaddwd 616 ;CHECK: pmaddwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 617 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 618 %2 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1) 619 ret <4 x i32> %2 620 } 621 declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone 622 623 define <16 x i8> @stack_fold_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) { 624 ;CHECK-LABEL: stack_fold_pmaxsb 625 ;CHECK: pmaxsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 626 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 627 %2 = icmp sgt <16 x i8> %a0, %a1 628 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1 629 ret <16 x i8> %3 630 } 631 632 define <4 x i32> @stack_fold_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) { 633 ;CHECK-LABEL: stack_fold_pmaxsd 634 ;CHECK: pmaxsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 635 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 636 %2 = icmp sgt <4 x i32> %a0, %a1 637 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1 638 ret <4 x i32> %3 639 } 640 641 define <8 x i16> @stack_fold_pmaxsw(<8 x i16> %a0, <8 x i16> %a1) { 642 ;CHECK-LABEL: stack_fold_pmaxsw 643 ;CHECK: pmaxsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 644 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 645 %2 = icmp sgt <8 x i16> %a0, %a1 646 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1 647 ret <8 x i16> %3 648 } 649 650 define <16 x i8> @stack_fold_pmaxub(<16 x i8> %a0, <16 x i8> %a1) { 651 ;CHECK-LABEL: stack_fold_pmaxub 652 ;CHECK: pmaxub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 653 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 654 %2 = icmp ugt <16 x i8> %a0, %a1 655 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1 656 ret <16 x i8> %3 657 } 658 659 define <4 x i32> @stack_fold_pmaxud(<4 x i32> %a0, <4 x i32> %a1) { 660 ;CHECK-LABEL: stack_fold_pmaxud 661 ;CHECK: pmaxud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 662 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 663 %2 = icmp ugt <4 x i32> %a0, %a1 664 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1 665 ret <4 x i32> %3 666 } 667 668 define <8 x i16> @stack_fold_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) { 669 ;CHECK-LABEL: stack_fold_pmaxuw 670 ;CHECK: pmaxuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 671 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 672 %2 = icmp ugt <8 x i16> %a0, %a1 673 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1 674 ret <8 x i16> %3 675 } 676 677 define <16 x i8> @stack_fold_pminsb(<16 x i8> %a0, <16 x i8> %a1) { 678 ;CHECK-LABEL: stack_fold_pminsb 679 ;CHECK: pminsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 680 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 681 %2 = icmp slt <16 x i8> %a0, %a1 682 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1 683 ret <16 x i8> %3 684 } 685 686 define <4 x i32> @stack_fold_pminsd(<4 x i32> %a0, <4 x i32> %a1) { 687 ;CHECK-LABEL: stack_fold_pminsd 688 ;CHECK: pminsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 689 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 690 %2 = icmp slt <4 x i32> %a0, %a1 691 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1 692 ret <4 x i32> %3 693 } 694 695 define <8 x i16> @stack_fold_pminsw(<8 x i16> %a0, <8 x i16> %a1) { 696 ;CHECK-LABEL: stack_fold_pminsw 697 ;CHECK: pminsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 698 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 699 %2 = icmp slt <8 x i16> %a0, %a1 700 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1 701 ret <8 x i16> %3 702 } 703 704 define <16 x i8> @stack_fold_pminub(<16 x i8> %a0, <16 x i8> %a1) { 705 ;CHECK-LABEL: stack_fold_pminub 706 ;CHECK: pminub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 707 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 708 %2 = icmp ult <16 x i8> %a0, %a1 709 %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1 710 ret <16 x i8> %3 711 } 712 713 define <4 x i32> @stack_fold_pminud(<4 x i32> %a0, <4 x i32> %a1) { 714 ;CHECK-LABEL: stack_fold_pminud 715 ;CHECK: pminud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 716 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 717 %2 = icmp ult <4 x i32> %a0, %a1 718 %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1 719 ret <4 x i32> %3 720 } 721 722 define <8 x i16> @stack_fold_pminuw(<8 x i16> %a0, <8 x i16> %a1) { 723 ;CHECK-LABEL: stack_fold_pminuw 724 ;CHECK: pminuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 725 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 726 %2 = icmp ult <8 x i16> %a0, %a1 727 %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1 728 ret <8 x i16> %3 729 } 730 731 define <4 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) { 732 ;CHECK-LABEL: stack_fold_pmovsxbd 733 ;CHECK: pmovsxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 734 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 735 %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 736 %3 = sext <4 x i8> %2 to <4 x i32> 737 ret <4 x i32> %3 738 } 739 740 define <2 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) { 741 ;CHECK-LABEL: stack_fold_pmovsxbq 742 ;CHECK: pmovsxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 743 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 744 %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 745 %3 = sext <2 x i8> %2 to <2 x i64> 746 ret <2 x i64> %3 747 } 748 749 define <8 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) { 750 ;CHECK-LABEL: stack_fold_pmovsxbw 751 ;CHECK: pmovsxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 752 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 753 %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 754 %3 = sext <8 x i8> %2 to <8 x i16> 755 ret <8 x i16> %3 756 } 757 758 define <2 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) { 759 ;CHECK-LABEL: stack_fold_pmovsxdq 760 ;CHECK: pmovsxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 761 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 762 %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 763 %3 = sext <2 x i32> %2 to <2 x i64> 764 ret <2 x i64> %3 765 } 766 767 define <4 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) { 768 ;CHECK-LABEL: stack_fold_pmovsxwd 769 ;CHECK: pmovsxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 770 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 771 %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 772 %3 = sext <4 x i16> %2 to <4 x i32> 773 ret <4 x i32> %3 774 } 775 776 define <2 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) { 777 ;CHECK-LABEL: stack_fold_pmovsxwq 778 ;CHECK: pmovsxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 779 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 780 %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 781 %3 = sext <2 x i16> %2 to <2 x i64> 782 ret <2 x i64> %3 783 } 784 785 define <4 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) { 786 ;CHECK-LABEL: stack_fold_pmovzxbd 787 ;CHECK: pmovzxbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 788 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 789 %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 1, i32 19, i32 20, i32 21, i32 2, i32 22, i32 23, i32 24, i32 3, i32 25, i32 26, i32 27> 790 %3 = bitcast <16 x i8> %2 to <4 x i32> 791 ret <4 x i32> %3 792 } 793 794 define <2 x i64> @stack_fold_pmovzxbq(<16 x i8> %a0) { 795 ;CHECK-LABEL: stack_fold_pmovzxbq 796 ;CHECK: pmovzxbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 797 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 798 %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 1, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28> 799 %3 = bitcast <16 x i8> %2 to <2 x i64> 800 ret <2 x i64> %3 801 } 802 803 define <8 x i16> @stack_fold_pmovzxbw(<16 x i8> %a0) { 804 ;CHECK-LABEL: stack_fold_pmovzxbw 805 ;CHECK: pmovzxbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 806 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 807 %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 808 %3 = bitcast <16 x i8> %2 to <8 x i16> 809 ret <8 x i16> %3 810 } 811 812 define <2 x i64> @stack_fold_pmovzxdq(<4 x i32> %a0) { 813 ;CHECK-LABEL: stack_fold_pmovzxdq 814 ;CHECK: pmovzxdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 815 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 816 %2 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 817 %3 = bitcast <4 x i32> %2 to <2 x i64> 818 ret <2 x i64> %3 819 } 820 821 define <4 x i32> @stack_fold_pmovzxwd(<8 x i16> %a0) { 822 ;CHECK-LABEL: stack_fold_pmovzxwd 823 ;CHECK: pmovzxwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 824 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 825 %2 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 826 %3 = bitcast <8 x i16> %2 to <4 x i32> 827 ret <4 x i32> %3 828 } 829 830 define <2 x i64> @stack_fold_pmovzxwq(<8 x i16> %a0) { 831 ;CHECK-LABEL: stack_fold_pmovzxwq 832 ;CHECK: pmovzxwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 833 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 834 %2 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 1, i32 11, i32 12, i32 13> 835 %3 = bitcast <8 x i16> %2 to <2 x i64> 836 ret <2 x i64> %3 837 } 838 839 define <2 x i64> @stack_fold_pmuldq(<4 x i32> %a0, <4 x i32> %a1) { 840 ;CHECK-LABEL: stack_fold_pmuldq 841 ;CHECK: pmuldq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 842 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 843 %2 = bitcast <4 x i32> %a0 to <2 x i64> 844 %3 = bitcast <4 x i32> %a1 to <2 x i64> 845 %4 = shl <2 x i64> %2, <i64 32, i64 32> 846 %5 = ashr <2 x i64> %4, <i64 32, i64 32> 847 %6 = shl <2 x i64> %3, <i64 32, i64 32> 848 %7 = ashr <2 x i64> %6, <i64 32, i64 32> 849 %8 = mul <2 x i64> %5, %7 850 ret <2 x i64> %8 851 } 852 853 define <8 x i16> @stack_fold_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1) { 854 ;CHECK-LABEL: stack_fold_pmulhrsw 855 ;CHECK: pmulhrsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 856 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 857 %2 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) 858 ret <8 x i16> %2 859 } 860 declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone 861 862 define <8 x i16> @stack_fold_pmulhuw(<8 x i16> %a0, <8 x i16> %a1) { 863 ;CHECK-LABEL: stack_fold_pmulhuw 864 ;CHECK: pmulhuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 865 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 866 %2 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1) 867 ret <8 x i16> %2 868 } 869 declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone 870 871 define <8 x i16> @stack_fold_pmulhw(<8 x i16> %a0, <8 x i16> %a1) { 872 ;CHECK-LABEL: stack_fold_pmulhw 873 ;CHECK: pmulhw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 874 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 875 %2 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1) 876 ret <8 x i16> %2 877 } 878 declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone 879 880 define <4 x i32> @stack_fold_pmulld(<4 x i32> %a0, <4 x i32> %a1) { 881 ;CHECK-LABEL: stack_fold_pmulld 882 ;CHECK: pmulld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 883 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 884 %2 = mul <4 x i32> %a0, %a1 885 ret <4 x i32> %2 886 } 887 888 define <8 x i16> @stack_fold_pmullw(<8 x i16> %a0, <8 x i16> %a1) { 889 ;CHECK-LABEL: stack_fold_pmullw 890 ;CHECK: pmullw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 891 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 892 %2 = mul <8 x i16> %a0, %a1 893 ret <8 x i16> %2 894 } 895 896 define <2 x i64> @stack_fold_pmuludq(<4 x i32> %a0, <4 x i32> %a1) { 897 ;CHECK-LABEL: stack_fold_pmuludq 898 ;CHECK: pmuludq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 899 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 900 %2 = bitcast <4 x i32> %a0 to <2 x i64> 901 %3 = bitcast <4 x i32> %a1 to <2 x i64> 902 %4 = and <2 x i64> %2, <i64 4294967295, i64 4294967295> 903 %5 = and <2 x i64> %3, <i64 4294967295, i64 4294967295> 904 %6 = mul <2 x i64> %4, %5 905 ret <2 x i64> %6 906 } 907 908 define <16 x i8> @stack_fold_por(<16 x i8> %a0, <16 x i8> %a1) { 909 ;CHECK-LABEL: stack_fold_por 910 ;CHECK: por {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 911 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 912 %2 = or <16 x i8> %a0, %a1 913 ; add forces execution domain 914 %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 915 ret <16 x i8> %3 916 } 917 918 define <2 x i64> @stack_fold_psadbw(<16 x i8> %a0, <16 x i8> %a1) { 919 ;CHECK-LABEL: stack_fold_psadbw 920 ;CHECK: psadbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 921 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 922 %2 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1) 923 ret <2 x i64> %2 924 } 925 declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone 926 927 define <16 x i8> @stack_fold_pshufb(<16 x i8> %a0, <16 x i8> %a1) { 928 ;CHECK-LABEL: stack_fold_pshufb 929 ;CHECK: pshufb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 930 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 931 %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) 932 ret <16 x i8> %2 933 } 934 declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone 935 936 define <4 x i32> @stack_fold_pshufd(<4 x i32> %a0) { 937 ;CHECK-LABEL: stack_fold_pshufd 938 ;CHECK: pshufd $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 939 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 940 %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 941 ret <4 x i32> %2 942 } 943 944 define <8 x i16> @stack_fold_pshufhw(<8 x i16> %a0) { 945 ;CHECK-LABEL: stack_fold_pshufhw 946 ;CHECK: pshufhw $11, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 947 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 948 %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 4, i32 4> 949 ret <8 x i16> %2 950 } 951 952 define <8 x i16> @stack_fold_pshuflw(<8 x i16> %a0) { 953 ;CHECK-LABEL: stack_fold_pshuflw 954 ;CHECK: pshuflw $27, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 955 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 956 %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7> 957 ret <8 x i16> %2 958 } 959 960 define <16 x i8> @stack_fold_psignb(<16 x i8> %a0, <16 x i8> %a1) { 961 ;CHECK-LABEL: stack_fold_psignb 962 ;CHECK: psignb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 963 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 964 %2 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) 965 ret <16 x i8> %2 966 } 967 declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone 968 969 define <4 x i32> @stack_fold_psignd(<4 x i32> %a0, <4 x i32> %a1) { 970 ;CHECK-LABEL: stack_fold_psignd 971 ;CHECK: psignd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 972 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 973 %2 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) 974 ret <4 x i32> %2 975 } 976 declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone 977 978 define <8 x i16> @stack_fold_psignw(<8 x i16> %a0, <8 x i16> %a1) { 979 ;CHECK-LABEL: stack_fold_psignw 980 ;CHECK: psignw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 981 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 982 %2 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) 983 ret <8 x i16> %2 984 } 985 declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone 986 987 define <4 x i32> @stack_fold_pslld(<4 x i32> %a0, <4 x i32> %a1) { 988 ;CHECK-LABEL: stack_fold_pslld 989 ;CHECK: pslld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 990 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 991 %2 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1) 992 ret <4 x i32> %2 993 } 994 declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone 995 996 define <2 x i64> @stack_fold_psllq(<2 x i64> %a0, <2 x i64> %a1) { 997 ;CHECK-LABEL: stack_fold_psllq 998 ;CHECK: psllq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 999 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1000 %2 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) 1001 ret <2 x i64> %2 1002 } 1003 declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone 1004 1005 define <8 x i16> @stack_fold_psllw(<8 x i16> %a0, <8 x i16> %a1) { 1006 ;CHECK-LABEL: stack_fold_psllw 1007 ;CHECK: psllw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1008 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1009 %2 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1) 1010 ret <8 x i16> %2 1011 } 1012 declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone 1013 1014 define <4 x i32> @stack_fold_psrad(<4 x i32> %a0, <4 x i32> %a1) { 1015 ;CHECK-LABEL: stack_fold_psrad 1016 ;CHECK: psrad {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1017 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1018 %2 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1) 1019 ret <4 x i32> %2 1020 } 1021 declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone 1022 1023 define <8 x i16> @stack_fold_psraw(<8 x i16> %a0, <8 x i16> %a1) { 1024 ;CHECK-LABEL: stack_fold_psraw 1025 ;CHECK: psraw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1026 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1027 %2 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1) 1028 ret <8 x i16> %2 1029 } 1030 declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone 1031 1032 define <4 x i32> @stack_fold_psrld(<4 x i32> %a0, <4 x i32> %a1) { 1033 ;CHECK-LABEL: stack_fold_psrld 1034 ;CHECK: psrld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1035 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1036 %2 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1) 1037 ret <4 x i32> %2 1038 } 1039 declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone 1040 1041 define <2 x i64> @stack_fold_psrlq(<2 x i64> %a0, <2 x i64> %a1) { 1042 ;CHECK-LABEL: stack_fold_psrlq 1043 ;CHECK: psrlq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1044 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1045 %2 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) 1046 ret <2 x i64> %2 1047 } 1048 declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone 1049 1050 define <8 x i16> @stack_fold_psrlw(<8 x i16> %a0, <8 x i16> %a1) { 1051 ;CHECK-LABEL: stack_fold_psrlw 1052 ;CHECK: psrlw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1053 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1054 %2 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) 1055 ret <8 x i16> %2 1056 } 1057 declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone 1058 1059 define <16 x i8> @stack_fold_psubb(<16 x i8> %a0, <16 x i8> %a1) { 1060 ;CHECK-LABEL: stack_fold_psubb 1061 ;CHECK: psubb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1062 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1063 %2 = sub <16 x i8> %a0, %a1 1064 ret <16 x i8> %2 1065 } 1066 1067 define <4 x i32> @stack_fold_psubd(<4 x i32> %a0, <4 x i32> %a1) { 1068 ;CHECK-LABEL: stack_fold_psubd 1069 ;CHECK: psubd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1070 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1071 %2 = sub <4 x i32> %a0, %a1 1072 ret <4 x i32> %2 1073 } 1074 1075 define <2 x i64> @stack_fold_psubq(<2 x i64> %a0, <2 x i64> %a1) { 1076 ;CHECK-LABEL: stack_fold_psubq 1077 ;CHECK: psubq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1078 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1079 %2 = sub <2 x i64> %a0, %a1 1080 ret <2 x i64> %2 1081 } 1082 1083 define <16 x i8> @stack_fold_psubsb(<16 x i8> %a0, <16 x i8> %a1) { 1084 ;CHECK-LABEL: stack_fold_psubsb 1085 ;CHECK: psubsb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1086 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1087 %2 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) 1088 ret <16 x i8> %2 1089 } 1090 declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone 1091 1092 define <8 x i16> @stack_fold_psubsw(<8 x i16> %a0, <8 x i16> %a1) { 1093 ;CHECK-LABEL: stack_fold_psubsw 1094 ;CHECK: psubsw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1095 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1096 %2 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) 1097 ret <8 x i16> %2 1098 } 1099 declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone 1100 1101 define <16 x i8> @stack_fold_psubusb(<16 x i8> %a0, <16 x i8> %a1) { 1102 ;CHECK-LABEL: stack_fold_psubusb 1103 ;CHECK: psubusb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1104 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1105 %2 = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %a0, <16 x i8> %a1) 1106 ret <16 x i8> %2 1107 } 1108 declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnone 1109 1110 define <8 x i16> @stack_fold_psubusw(<8 x i16> %a0, <8 x i16> %a1) { 1111 ;CHECK-LABEL: stack_fold_psubusw 1112 ;CHECK: psubusw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1113 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1114 %2 = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %a0, <8 x i16> %a1) 1115 ret <8 x i16> %2 1116 } 1117 declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnone 1118 1119 define <8 x i16> @stack_fold_psubw(<8 x i16> %a0, <8 x i16> %a1) { 1120 ;CHECK-LABEL: stack_fold_psubw 1121 ;CHECK: psubw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1122 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1123 %2 = sub <8 x i16> %a0, %a1 1124 ret <8 x i16> %2 1125 } 1126 1127 define i32 @stack_fold_ptest(<2 x i64> %a0, <2 x i64> %a1) { 1128 ;CHECK-LABEL: stack_fold_ptest 1129 ;CHECK: ptest {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1130 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1131 %2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1) 1132 ret i32 %2 1133 } 1134 declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone 1135 1136 define <16 x i8> @stack_fold_punpckhbw(<16 x i8> %a0, <16 x i8> %a1) { 1137 ;CHECK-LABEL: stack_fold_punpckhbw 1138 ;CHECK: punpckhbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1139 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1140 %2 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 1141 ret <16 x i8> %2 1142 } 1143 1144 define <4 x i32> @stack_fold_punpckhdq(<4 x i32> %a0, <4 x i32> %a1) { 1145 ;CHECK-LABEL: stack_fold_punpckhdq 1146 ;CHECK: punpckhdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1147 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1148 %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 1149 ; add forces execution domain 1150 %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1> 1151 ret <4 x i32> %3 1152 } 1153 1154 define <2 x i64> @stack_fold_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1) { 1155 ;CHECK-LABEL: stack_fold_punpckhqdq 1156 ;CHECK: punpckhqdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1157 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1158 %2 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3> 1159 ; add forces execution domain 1160 %3 = add <2 x i64> %2, <i64 1, i64 1> 1161 ret <2 x i64> %3 1162 } 1163 1164 define <8 x i16> @stack_fold_punpckhwd(<8 x i16> %a0, <8 x i16> %a1) { 1165 ;CHECK-LABEL: stack_fold_punpckhwd 1166 ;CHECK: punpckhwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1167 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1168 %2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 1169 ret <8 x i16> %2 1170 } 1171 1172 define <16 x i8> @stack_fold_punpcklbw(<16 x i8> %a0, <16 x i8> %a1) { 1173 ;CHECK-LABEL: stack_fold_punpcklbw 1174 ;CHECK: punpcklbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1175 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1176 %2 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 1177 ret <16 x i8> %2 1178 } 1179 1180 define <4 x i32> @stack_fold_punpckldq(<4 x i32> %a0, <4 x i32> %a1) { 1181 ;CHECK-LABEL: stack_fold_punpckldq 1182 ;CHECK: punpckldq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1183 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1184 %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 1185 ; add forces execution domain 1186 %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1> 1187 ret <4 x i32> %3 1188 } 1189 1190 define <2 x i64> @stack_fold_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1) { 1191 ;CHECK-LABEL: stack_fold_punpcklqdq 1192 ;CHECK: punpcklqdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1193 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1194 %2 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2> 1195 ; add forces execution domain 1196 %3 = add <2 x i64> %2, <i64 1, i64 1> 1197 ret <2 x i64> %3 1198 } 1199 1200 define <8 x i16> @stack_fold_punpcklwd(<8 x i16> %a0, <8 x i16> %a1) { 1201 ;CHECK-LABEL: stack_fold_punpcklwd 1202 ;CHECK: punpcklwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1203 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1204 %2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 1205 ret <8 x i16> %2 1206 } 1207 1208 define <16 x i8> @stack_fold_pxor(<16 x i8> %a0, <16 x i8> %a1) { 1209 ;CHECK-LABEL: stack_fold_pxor 1210 ;CHECK: pxor {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 1211 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 1212 %2 = xor <16 x i8> %a0, %a1 1213 ; add forces execution domain 1214 %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 1215 ret <16 x i8> %3 1216 } 1217