1 ; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop < %s | FileCheck %s 2 3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 4 target triple = "x86_64-unknown-unknown" 5 6 ; Stack reload folding tests. 7 ; 8 ; By including a nop call with sideeffects we can force a partial register spill of the 9 ; relevant registers and check that the reload is correctly folded into the instruction. 10 11 define <2 x double> @stack_fold_vfrczpd(<2 x double> %a0) { 12 ;CHECK-LABEL: stack_fold_vfrczpd 13 ;CHECK: vfrczpd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 14 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 15 %2 = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) 16 ret <2 x double> %2 17 } 18 declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone 19 20 define <4 x double> @stack_fold_vfrczpd_ymm(<4 x double> %a0) { 21 ;CHECK-LABEL: stack_fold_vfrczpd_ymm 22 ;CHECK: vfrczpd {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 23 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 24 %2 = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) 25 ret <4 x double> %2 26 } 27 declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone 28 29 define <4 x float> @stack_fold_vfrczps(<4 x float> %a0) { 30 ;CHECK-LABEL: stack_fold_vfrczps 31 ;CHECK: vfrczps {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 32 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 33 %2 = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) 34 ret <4 x float> %2 35 } 36 declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone 37 38 define <8 x float> @stack_fold_vfrczps_ymm(<8 x float> %a0) { 39 ;CHECK-LABEL: stack_fold_vfrczps_ymm 40 ;CHECK: vfrczps {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 41 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 42 %2 = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) 43 ret <8 x float> %2 44 } 45 declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone 46 47 define <2 x double> @stack_fold_vfrczsd(<2 x double> %a0) { 48 ;CHECK-LABEL: stack_fold_vfrczsd 49 ;CHECK: vfrczsd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 50 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 51 %2 = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0) 52 ret <2 x double> %2 53 } 54 declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone 55 56 define <4 x float> @stack_fold_vfrczss(<4 x float> %a0) { 57 ;CHECK-LABEL: stack_fold_vfrczss 58 ;CHECK: vfrczss {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 59 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 60 %2 = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0) 61 ret <4 x float> %2 62 } 63 declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone 64 65 define <2 x i64> @stack_fold_vpcmov_rm(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { 66 ;CHECK-LABEL: stack_fold_vpcmov_rm 67 ;CHECK: vpcmov {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 68 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 69 %2 = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) 70 ret <2 x i64> %2 71 } 72 define <2 x i64> @stack_fold_vpcmov_mr(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { 73 ;CHECK-LABEL: stack_fold_vpcmov_mr 74 ;CHECK: vpcmov {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 75 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 76 %2 = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a2, <2 x i64> %a1) 77 ret <2 x i64> %2 78 } 79 declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone 80 81 define <4 x i64> @stack_fold_vpcmov_rm_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { 82 ;CHECK-LABEL: stack_fold_vpcmov_rm_ymm 83 ;CHECK: vpcmov {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 84 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 85 %2 = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) 86 ret <4 x i64> %2 87 } 88 define <4 x i64> @stack_fold_vpcmov_mr_ymm(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { 89 ;CHECK-LABEL: stack_fold_vpcmov_mr_ymm 90 ;CHECK: vpcmov {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 91 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 92 %2 = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a2, <4 x i64> %a1) 93 ret <4 x i64> %2 94 } 95 declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone 96 97 define <16 x i8> @stack_fold_vpcomb(<16 x i8> %a0, <16 x i8> %a1) { 98 ;CHECK-LABEL: stack_fold_vpcomb 99 ;CHECK: vpcomltb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 100 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 101 %2 = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %a0, <16 x i8> %a1, i8 0) 102 ret <16 x i8> %2 103 } 104 declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone 105 106 define <4 x i32> @stack_fold_vpcomd(<4 x i32> %a0, <4 x i32> %a1) { 107 ;CHECK-LABEL: stack_fold_vpcomd 108 ;CHECK: vpcomltd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 109 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 110 %2 = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %a0, <4 x i32> %a1, i8 0) 111 ret <4 x i32> %2 112 } 113 declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone 114 115 define <2 x i64> @stack_fold_vpcomq(<2 x i64> %a0, <2 x i64> %a1) { 116 ;CHECK-LABEL: stack_fold_vpcomq 117 ;CHECK: vpcomltq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 118 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 119 %2 = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) 120 ret <2 x i64> %2 121 } 122 declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone 123 124 define <16 x i8> @stack_fold_vpcomub(<16 x i8> %a0, <16 x i8> %a1) { 125 ;CHECK-LABEL: stack_fold_vpcomub 126 ;CHECK: vpcomltub {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 127 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 128 %2 = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %a0, <16 x i8> %a1, i8 0) 129 ret <16 x i8> %2 130 } 131 declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone 132 133 define <4 x i32> @stack_fold_vpcomud(<4 x i32> %a0, <4 x i32> %a1) { 134 ;CHECK-LABEL: stack_fold_vpcomud 135 ;CHECK: vpcomltud {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 136 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 137 %2 = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %a0, <4 x i32> %a1, i8 0) 138 ret <4 x i32> %2 139 } 140 declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone 141 142 define <2 x i64> @stack_fold_vpcomuq(<2 x i64> %a0, <2 x i64> %a1) { 143 ;CHECK-LABEL: stack_fold_vpcomuq 144 ;CHECK: vpcomltuq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 145 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 146 %2 = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) 147 ret <2 x i64> %2 148 } 149 declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone 150 151 define <8 x i16> @stack_fold_vpcomuw(<8 x i16> %a0, <8 x i16> %a1) { 152 ;CHECK-LABEL: stack_fold_vpcomuw 153 ;CHECK: vpcomltuw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 154 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 155 %2 = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %a0, <8 x i16> %a1, i8 0) 156 ret <8 x i16> %2 157 } 158 declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone 159 160 define <8 x i16> @stack_fold_vpcomw(<8 x i16> %a0, <8 x i16> %a1) { 161 ;CHECK-LABEL: stack_fold_vpcomw 162 ;CHECK: vpcomltw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 163 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 164 %2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %a0, <8 x i16> %a1, i8 0) 165 ret <8 x i16> %2 166 } 167 declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone 168 169 define <2 x double> @stack_fold_vpermil2pd_rm(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2) { 170 ;CHECK-LABEL: stack_fold_vpermil2pd_rm 171 ;CHECK: vpermil2pd $0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 172 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 173 %2 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2, i8 0) 174 ret <2 x double> %2 175 } 176 define <2 x double> @stack_fold_vpermil2pd_mr(<2 x double> %a0, <2 x i64> %a1, <2 x double> %a2) { 177 ;CHECK-LABEL: stack_fold_vpermil2pd_mr 178 ;CHECK: vpermil2pd $0, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 179 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 180 %2 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a2, <2 x i64> %a1, i8 0) 181 ret <2 x double> %2 182 } 183 declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone 184 185 define <4 x double> @stack_fold_vpermil2pd_rm_ymm(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2) { 186 ;CHECK-LABEL: stack_fold_vpermil2pd_rm 187 ;CHECK: vpermil2pd $0, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 188 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 189 %2 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2, i8 0) 190 ret <4 x double> %2 191 } 192 define <4 x double> @stack_fold_vpermil2pd_mr_ymm(<4 x double> %a0, <4 x i64> %a1, <4 x double> %a2) { 193 ;CHECK-LABEL: stack_fold_vpermil2pd_mr 194 ;CHECK: vpermil2pd $0, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 195 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 196 %2 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a2, <4 x i64> %a1, i8 0) 197 ret <4 x double> %2 198 } 199 declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone 200 201 define <4 x float> @stack_fold_vpermil2ps_rm(<4 x float> %a0, <4 x float> %a1, <4 x i32> %a2) { 202 ;CHECK-LABEL: stack_fold_vpermil2ps_rm 203 ;CHECK: vpermil2ps $0, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 204 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 205 %2 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %a2, i8 0) 206 ret <4 x float> %2 207 } 208 define <4 x float> @stack_fold_vpermil2ps_mr(<4 x float> %a0, <4 x i32> %a1, <4 x float> %a2) { 209 ;CHECK-LABEL: stack_fold_vpermil2ps_mr 210 ;CHECK: vpermil2ps $0, {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 211 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 212 %2 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a2, <4 x i32> %a1, i8 0) 213 ret <4 x float> %2 214 } 215 declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone 216 217 define <8 x float> @stack_fold_vpermil2ps_rm_ymm(<8 x float> %a0, <8 x float> %a1, <8 x i32> %a2) { 218 ;CHECK-LABEL: stack_fold_vpermil2ps_rm 219 ;CHECK: vpermil2ps $0, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 220 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 221 %2 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %a2, i8 0) 222 ret <8 x float> %2 223 } 224 define <8 x float> @stack_fold_vpermil2ps_mr_ymm(<8 x float> %a0, <8 x i32> %a1, <8 x float> %a2) { 225 ;CHECK-LABEL: stack_fold_vpermil2ps_mr 226 ;CHECK: vpermil2ps $0, {{%ymm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%ymm[0-9][0-9]*}}, {{%ymm[0-9][0-9]*}} {{.*#+}} 32-byte Folded Reload 227 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 228 %2 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a2, <8 x i32> %a1, i8 0) 229 ret <8 x float> %2 230 } 231 declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone 232 233 define <4 x i32> @stack_fold_vphaddbd(<16 x i8> %a0) { 234 ;CHECK-LABEL: stack_fold_vphaddbd 235 ;CHECK: vphaddbd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 236 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 237 %2 = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %a0) 238 ret <4 x i32> %2 239 } 240 declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone 241 242 define <2 x i64> @stack_fold_vphaddbq(<16 x i8> %a0) { 243 ;CHECK-LABEL: stack_fold_vphaddbq 244 ;CHECK: vphaddbq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 245 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 246 %2 = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %a0) 247 ret <2 x i64> %2 248 } 249 declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone 250 251 define <8 x i16> @stack_fold_vphaddbw(<16 x i8> %a0) { 252 ;CHECK-LABEL: stack_fold_vphaddbw 253 ;CHECK: vphaddbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 254 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 255 %2 = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %a0) 256 ret <8 x i16> %2 257 } 258 declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone 259 260 define <2 x i64> @stack_fold_vphadddq(<4 x i32> %a0) { 261 ;CHECK-LABEL: stack_fold_vphadddq 262 ;CHECK: vphadddq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 263 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 264 %2 = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %a0) 265 ret <2 x i64> %2 266 } 267 declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone 268 269 define <4 x i32> @stack_fold_vphaddubd(<16 x i8> %a0) { 270 ;CHECK-LABEL: stack_fold_vphaddubd 271 ;CHECK: vphaddubd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 272 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 273 %2 = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %a0) 274 ret <4 x i32> %2 275 } 276 declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone 277 278 define <2 x i64> @stack_fold_vphaddubq(<16 x i8> %a0) { 279 ;CHECK-LABEL: stack_fold_vphaddubq 280 ;CHECK: vphaddubq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 281 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 282 %2 = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %a0) 283 ret <2 x i64> %2 284 } 285 declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone 286 287 define <8 x i16> @stack_fold_vphaddubw(<16 x i8> %a0) { 288 ;CHECK-LABEL: stack_fold_vphaddubw 289 ;CHECK: vphaddubw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 290 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 291 %2 = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %a0) 292 ret <8 x i16> %2 293 } 294 declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone 295 296 define <2 x i64> @stack_fold_vphaddudq(<4 x i32> %a0) { 297 ;CHECK-LABEL: stack_fold_vphaddudq 298 ;CHECK: vphaddudq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 299 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 300 %2 = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %a0) 301 ret <2 x i64> %2 302 } 303 declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone 304 305 define <4 x i32> @stack_fold_vphadduwd(<8 x i16> %a0) { 306 ;CHECK-LABEL: stack_fold_vphadduwd 307 ;CHECK: vphadduwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 308 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 309 %2 = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %a0) 310 ret <4 x i32> %2 311 } 312 declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone 313 314 define <2 x i64> @stack_fold_vphadduwq(<8 x i16> %a0) { 315 ;CHECK-LABEL: stack_fold_vphadduwq 316 ;CHECK: vphadduwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 317 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 318 %2 = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %a0) 319 ret <2 x i64> %2 320 } 321 declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone 322 323 define <4 x i32> @stack_fold_vphaddwd(<8 x i16> %a0) { 324 ;CHECK-LABEL: stack_fold_vphaddwd 325 ;CHECK: vphaddwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 326 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 327 %2 = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %a0) 328 ret <4 x i32> %2 329 } 330 declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone 331 332 define <2 x i64> @stack_fold_vphaddwq(<8 x i16> %a0) { 333 ;CHECK-LABEL: stack_fold_vphaddwq 334 ;CHECK: vphaddwq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 335 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 336 %2 = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %a0) 337 ret <2 x i64> %2 338 } 339 declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone 340 341 define <8 x i16> @stack_fold_vphsubbw(<16 x i8> %a0) { 342 ;CHECK-LABEL: stack_fold_vphsubbw 343 ;CHECK: vphsubbw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 344 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 345 %2 = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %a0) 346 ret <8 x i16> %2 347 } 348 declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone 349 350 define <2 x i64> @stack_fold_vphsubdq(<4 x i32> %a0) { 351 ;CHECK-LABEL: stack_fold_vphsubdq 352 ;CHECK: vphsubdq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 353 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 354 %2 = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %a0) 355 ret <2 x i64> %2 356 } 357 declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone 358 359 define <4 x i32> @stack_fold_vphsubwd(<8 x i16> %a0) { 360 ;CHECK-LABEL: stack_fold_vphsubwd 361 ;CHECK: vphsubwd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 362 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 363 %2 = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %a0) 364 ret <4 x i32> %2 365 } 366 declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone 367 368 define <4 x i32> @stack_fold_vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { 369 ;CHECK-LABEL: stack_fold_vpmacsdd 370 ;CHECK: vpmacsdd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 371 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 372 %2 = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) 373 ret <4 x i32> %2 374 } 375 declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone 376 377 define <2 x i64> @stack_fold_vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) { 378 ;CHECK-LABEL: stack_fold_vpmacsdqh 379 ;CHECK: vpmacsdqh {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 380 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 381 %2 = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) 382 ret <2 x i64> %2 383 } 384 declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 385 386 define <2 x i64> @stack_fold_vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) { 387 ;CHECK-LABEL: stack_fold_vpmacsdql 388 ;CHECK: vpmacsdql {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 389 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 390 %2 = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) 391 ret <2 x i64> %2 392 } 393 declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 394 395 define <4 x i32> @stack_fold_vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) { 396 ;CHECK-LABEL: stack_fold_vpmacssdd 397 ;CHECK: vpmacssdd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 398 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 399 %2 = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %a0, <4 x i32> %a1, <4 x i32> %a2) 400 ret <4 x i32> %2 401 } 402 declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone 403 404 define <2 x i64> @stack_fold_vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) { 405 ;CHECK-LABEL: stack_fold_vpmacssdqh 406 ;CHECK: vpmacssdqh {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 407 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 408 %2 = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) 409 ret <2 x i64> %2 410 } 411 declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 412 413 define <2 x i64> @stack_fold_vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) { 414 ;CHECK-LABEL: stack_fold_vpmacssdql 415 ;CHECK: vpmacssdql {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 416 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 417 %2 = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %a0, <4 x i32> %a1, <2 x i64> %a2) 418 ret <2 x i64> %2 419 } 420 declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 421 422 define <4 x i32> @stack_fold_vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) { 423 ;CHECK-LABEL: stack_fold_vpmacsswd 424 ;CHECK: vpmacsswd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 425 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 426 %2 = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) 427 ret <4 x i32> %2 428 } 429 declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 430 431 define <8 x i16> @stack_fold_vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) { 432 ;CHECK-LABEL: stack_fold_vpmacssww 433 ;CHECK: vpmacssww {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 434 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 435 %2 = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) 436 ret <8 x i16> %2 437 } 438 declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone 439 440 define <4 x i32> @stack_fold_vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) { 441 ;CHECK-LABEL: stack_fold_vpmacswd 442 ;CHECK: vpmacswd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 443 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 444 %2 = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) 445 ret <4 x i32> %2 446 } 447 declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 448 449 define <8 x i16> @stack_fold_vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) { 450 ;CHECK-LABEL: stack_fold_vpmacsww 451 ;CHECK: vpmacsww {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 452 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 453 %2 = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %a0, <8 x i16> %a1, <8 x i16> %a2) 454 ret <8 x i16> %2 455 } 456 declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone 457 458 define <4 x i32> @stack_fold_vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) { 459 ;CHECK-LABEL: stack_fold_vpmadcsswd 460 ;CHECK: vpmadcsswd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 461 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 462 %2 = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) 463 ret <4 x i32> %2 464 } 465 declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 466 467 define <4 x i32> @stack_fold_vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) { 468 ;CHECK-LABEL: stack_fold_vpmadcswd 469 ;CHECK: vpmadcswd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 470 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 471 %2 = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %a0, <8 x i16> %a1, <4 x i32> %a2) 472 ret <4 x i32> %2 473 } 474 declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 475 476 define <16 x i8> @stack_fold_vpperm_rm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { 477 ;CHECK-LABEL: stack_fold_vpperm_rm 478 ;CHECK: vpperm {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 479 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 480 %2 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) 481 ret <16 x i8> %2 482 } 483 define <16 x i8> @stack_fold_vpperm_mr(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) { 484 ;CHECK-LABEL: stack_fold_vpperm_mr 485 ;CHECK: vpperm {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 486 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 487 %2 = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %a0, <16 x i8> %a2, <16 x i8> %a1) 488 ret <16 x i8> %2 489 } 490 declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 491 492 define <16 x i8> @stack_fold_vprotb(<16 x i8> %a0) { 493 ;CHECK-LABEL: stack_fold_vprotb 494 ;CHECK: vprotb $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 495 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 496 %2 = call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %a0, i8 7) 497 ret <16 x i8> %2 498 } 499 declare <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8>, i8) nounwind readnone 500 501 define <16 x i8> @stack_fold_vprotb_rm(<16 x i8> %a0, <16 x i8> %a1) { 502 ;CHECK-LABEL: stack_fold_vprotb_rm 503 ;CHECK: vprotb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 504 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 505 %2 = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %a0, <16 x i8> %a1) 506 ret <16 x i8> %2 507 } 508 define <16 x i8> @stack_fold_vprotb_mr(<16 x i8> %a0, <16 x i8> %a1) { 509 ;CHECK-LABEL: stack_fold_vprotb_mr 510 ;CHECK: vprotb {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 511 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 512 %2 = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %a1, <16 x i8> %a0) 513 ret <16 x i8> %2 514 } 515 declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone 516 517 define <4 x i32> @stack_fold_vprotd(<4 x i32> %a0) { 518 ;CHECK-LABEL: stack_fold_vprotd 519 ;CHECK: vprotd $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 520 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 521 %2 = call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %a0, i8 7) 522 ret <4 x i32> %2 523 } 524 declare <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32>, i8) nounwind readnone 525 526 define <4 x i32> @stack_fold_vprotd_rm(<4 x i32> %a0, <4 x i32> %a1) { 527 ;CHECK-LABEL: stack_fold_vprotd_rm 528 ;CHECK: vprotd {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 529 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 530 %2 = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %a0, <4 x i32> %a1) 531 ret <4 x i32> %2 532 } 533 define <4 x i32> @stack_fold_vprotd_mr(<4 x i32> %a0, <4 x i32> %a1) { 534 ;CHECK-LABEL: stack_fold_vprotd_mr 535 ;CHECK: vprotd {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 536 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 537 %2 = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %a1, <4 x i32> %a0) 538 ret <4 x i32> %2 539 } 540 declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone 541 542 define <2 x i64> @stack_fold_vprotq(<2 x i64> %a0) { 543 ;CHECK-LABEL: stack_fold_vprotq 544 ;CHECK: vprotq $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 545 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 546 %2 = call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %a0, i8 7) 547 ret <2 x i64> %2 548 } 549 declare <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64>, i8) nounwind readnone 550 551 define <2 x i64> @stack_fold_vprotq_rm(<2 x i64> %a0, <2 x i64> %a1) { 552 ;CHECK-LABEL: stack_fold_vprotq_rm 553 ;CHECK: vprotq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 554 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 555 %2 = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) 556 ret <2 x i64> %2 557 } 558 define <2 x i64> @stack_fold_vprotq_mr(<2 x i64> %a0, <2 x i64> %a1) { 559 ;CHECK-LABEL: stack_fold_vprotq_mr 560 ;CHECK: vprotq {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 561 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 562 %2 = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a1, <2 x i64> %a0) 563 ret <2 x i64> %2 564 } 565 declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone 566 567 define <8 x i16> @stack_fold_vprotw(<8 x i16> %a0) { 568 ;CHECK-LABEL: stack_fold_vprotw 569 ;CHECK: vprotw $7, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 570 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 571 %2 = call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %a0, i8 7) 572 ret <8 x i16> %2 573 } 574 declare <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16>, i8) nounwind readnone 575 576 define <8 x i16> @stack_fold_vprotw_rm(<8 x i16> %a0, <8 x i16> %a1) { 577 ;CHECK-LABEL: stack_fold_vprotw_rm 578 ;CHECK: vprotw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 579 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 580 %2 = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %a0, <8 x i16> %a1) 581 ret <8 x i16> %2 582 } 583 define <8 x i16> @stack_fold_vprotw_mr(<8 x i16> %a0, <8 x i16> %a1) { 584 ;CHECK-LABEL: stack_fold_vprotw_mr 585 ;CHECK: vprotw {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 586 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 587 %2 = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %a1, <8 x i16> %a0) 588 ret <8 x i16> %2 589 } 590 declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone 591 592 define <16 x i8> @stack_fold_vpshab_rm(<16 x i8> %a0, <16 x i8> %a1) { 593 ;CHECK-LABEL: stack_fold_vpshab_rm 594 ;CHECK: vpshab {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 595 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 596 %2 = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a0, <16 x i8> %a1) 597 ret <16 x i8> %2 598 } 599 define <16 x i8> @stack_fold_vpshab_mr(<16 x i8> %a0, <16 x i8> %a1) { 600 ;CHECK-LABEL: stack_fold_vpshab_mr 601 ;CHECK: vpshab {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 602 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 603 %2 = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %a1, <16 x i8> %a0) 604 ret <16 x i8> %2 605 } 606 declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone 607 608 define <4 x i32> @stack_fold_vpshad_rm(<4 x i32> %a0, <4 x i32> %a1) { 609 ;CHECK-LABEL: stack_fold_vpshad_rm 610 ;CHECK: vpshad {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 611 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 612 %2 = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a0, <4 x i32> %a1) 613 ret <4 x i32> %2 614 } 615 define <4 x i32> @stack_fold_vpshad_mr(<4 x i32> %a0, <4 x i32> %a1) { 616 ;CHECK-LABEL: stack_fold_vpshad_mr 617 ;CHECK: vpshad {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 618 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 619 %2 = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %a1, <4 x i32> %a0) 620 ret <4 x i32> %2 621 } 622 declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone 623 624 define <2 x i64> @stack_fold_vpshaq_rm(<2 x i64> %a0, <2 x i64> %a1) { 625 ;CHECK-LABEL: stack_fold_vpshaq_rm 626 ;CHECK: vpshaq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 627 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 628 %2 = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) 629 ret <2 x i64> %2 630 } 631 define <2 x i64> @stack_fold_vpshaq_mr(<2 x i64> %a0, <2 x i64> %a1) { 632 ;CHECK-LABEL: stack_fold_vpshaq_mr 633 ;CHECK: vpshaq {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 634 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 635 %2 = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a1, <2 x i64> %a0) 636 ret <2 x i64> %2 637 } 638 declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone 639 640 define <8 x i16> @stack_fold_vpshaw_rm(<8 x i16> %a0, <8 x i16> %a1) { 641 ;CHECK-LABEL: stack_fold_vpshaw_rm 642 ;CHECK: vpshaw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 643 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 644 %2 = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a0, <8 x i16> %a1) 645 ret <8 x i16> %2 646 } 647 define <8 x i16> @stack_fold_vpshaw_mr(<8 x i16> %a0, <8 x i16> %a1) { 648 ;CHECK-LABEL: stack_fold_vpshaw_mr 649 ;CHECK: vpshaw {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 650 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 651 %2 = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %a1, <8 x i16> %a0) 652 ret <8 x i16> %2 653 } 654 declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone 655 656 define <16 x i8> @stack_fold_vpshlb_rm(<16 x i8> %a0, <16 x i8> %a1) { 657 ;CHECK-LABEL: stack_fold_vpshlb_rm 658 ;CHECK: vpshlb {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 659 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 660 %2 = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a0, <16 x i8> %a1) 661 ret <16 x i8> %2 662 } 663 define <16 x i8> @stack_fold_vpshlb_mr(<16 x i8> %a0, <16 x i8> %a1) { 664 ;CHECK-LABEL: stack_fold_vpshlb_mr 665 ;CHECK: vpshlb {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 666 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 667 %2 = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %a1, <16 x i8> %a0) 668 ret <16 x i8> %2 669 } 670 declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone 671 672 define <4 x i32> @stack_fold_vpshld_rm(<4 x i32> %a0, <4 x i32> %a1) { 673 ;CHECK-LABEL: stack_fold_vpshld_rm 674 ;CHECK: vpshld {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 675 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 676 %2 = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a0, <4 x i32> %a1) 677 ret <4 x i32> %2 678 } 679 define <4 x i32> @stack_fold_vpshld_mr(<4 x i32> %a0, <4 x i32> %a1) { 680 ;CHECK-LABEL: stack_fold_vpshld_mr 681 ;CHECK: vpshld {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 682 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 683 %2 = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %a1, <4 x i32> %a0) 684 ret <4 x i32> %2 685 } 686 declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone 687 688 define <2 x i64> @stack_fold_vpshlq_rm(<2 x i64> %a0, <2 x i64> %a1) { 689 ;CHECK-LABEL: stack_fold_vpshlq_rm 690 ;CHECK: vpshlq {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 691 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 692 %2 = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) 693 ret <2 x i64> %2 694 } 695 define <2 x i64> @stack_fold_vpshlq_mr(<2 x i64> %a0, <2 x i64> %a1) { 696 ;CHECK-LABEL: stack_fold_vpshlq_mr 697 ;CHECK: vpshlq {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 698 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 699 %2 = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a1, <2 x i64> %a0) 700 ret <2 x i64> %2 701 } 702 declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone 703 704 define <8 x i16> @stack_fold_vpshlw_rm(<8 x i16> %a0, <8 x i16> %a1) { 705 ;CHECK-LABEL: stack_fold_vpshlw_rm 706 ;CHECK: vpshlw {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}}, {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 707 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 708 %2 = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a0, <8 x i16> %a1) 709 ret <8 x i16> %2 710 } 711 define <8 x i16> @stack_fold_vpshlw_mr(<8 x i16> %a0, <8 x i16> %a1) { 712 ;CHECK-LABEL: stack_fold_vpshlw_mr 713 ;CHECK: vpshlw {{%xmm[0-9][0-9]*}}, {{-?[0-9]*}}(%rsp), {{%xmm[0-9][0-9]*}} {{.*#+}} 16-byte Folded Reload 714 %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"() 715 %2 = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %a1, <8 x i16> %a0) 716 ret <8 x i16> %2 717 } 718 declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone 719