1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=X32 3 ; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx,+xop | FileCheck %s --check-prefix=X64 4 5 define <16 x i8> @commute_fold_vpcomb(<16 x i8>* %a0, <16 x i8> %a1) { 6 ; X32-LABEL: commute_fold_vpcomb: 7 ; X32: # %bb.0: 8 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 9 ; X32-NEXT: vpcomgtb (%eax), %xmm0, %xmm0 10 ; X32-NEXT: retl 11 ; 12 ; X64-LABEL: commute_fold_vpcomb: 13 ; X64: # %bb.0: 14 ; X64-NEXT: vpcomgtb (%rdi), %xmm0, %xmm0 15 ; X64-NEXT: retq 16 %1 = load <16 x i8>, <16 x i8>* %a0 17 %2 = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %1, <16 x i8> %a1, i8 0) ; vpcomltb 18 ret <16 x i8> %2 19 } 20 declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone 21 22 define <4 x i32> @commute_fold_vpcomd(<4 x i32>* %a0, <4 x i32> %a1) { 23 ; X32-LABEL: commute_fold_vpcomd: 24 ; X32: # %bb.0: 25 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 26 ; X32-NEXT: vpcomged (%eax), %xmm0, %xmm0 27 ; X32-NEXT: retl 28 ; 29 ; X64-LABEL: commute_fold_vpcomd: 30 ; X64: # %bb.0: 31 ; X64-NEXT: vpcomged (%rdi), %xmm0, %xmm0 32 ; X64-NEXT: retq 33 %1 = load <4 x i32>, <4 x i32>* %a0 34 %2 = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %1, <4 x i32> %a1, i8 1) ; vpcomled 35 ret <4 x i32> %2 36 } 37 declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone 38 39 define <2 x i64> @commute_fold_vpcomq(<2 x i64>* %a0, <2 x i64> %a1) { 40 ; X32-LABEL: commute_fold_vpcomq: 41 ; X32: # %bb.0: 42 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 43 ; X32-NEXT: vpcomltq (%eax), %xmm0, %xmm0 44 ; X32-NEXT: retl 45 ; 46 ; X64-LABEL: commute_fold_vpcomq: 47 ; X64: # %bb.0: 48 ; X64-NEXT: vpcomltq (%rdi), %xmm0, %xmm0 49 ; X64-NEXT: retq 50 %1 = load <2 x i64>, <2 x i64>* %a0 51 %2 = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %1, <2 x i64> %a1, i8 2) ; vpcomgtq 52 ret <2 x i64> %2 53 } 54 declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone 55 56 define <16 x i8> @commute_fold_vpcomub(<16 x i8>* %a0, <16 x i8> %a1) { 57 ; X32-LABEL: commute_fold_vpcomub: 58 ; X32: # %bb.0: 59 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 60 ; X32-NEXT: vpcomleub (%eax), %xmm0, %xmm0 61 ; X32-NEXT: retl 62 ; 63 ; X64-LABEL: commute_fold_vpcomub: 64 ; X64: # %bb.0: 65 ; X64-NEXT: vpcomleub (%rdi), %xmm0, %xmm0 66 ; X64-NEXT: retq 67 %1 = load <16 x i8>, <16 x i8>* %a0 68 %2 = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %1, <16 x i8> %a1, i8 3) ; vpcomgeub 69 ret <16 x i8> %2 70 } 71 declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone 72 73 define <4 x i32> @commute_fold_vpcomud(<4 x i32>* %a0, <4 x i32> %a1) { 74 ; X32-LABEL: commute_fold_vpcomud: 75 ; X32: # %bb.0: 76 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 77 ; X32-NEXT: vpcomequd (%eax), %xmm0, %xmm0 78 ; X32-NEXT: retl 79 ; 80 ; X64-LABEL: commute_fold_vpcomud: 81 ; X64: # %bb.0: 82 ; X64-NEXT: vpcomequd (%rdi), %xmm0, %xmm0 83 ; X64-NEXT: retq 84 %1 = load <4 x i32>, <4 x i32>* %a0 85 %2 = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %1, <4 x i32> %a1, i8 4) ; vpcomequd 86 ret <4 x i32> %2 87 } 88 declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone 89 90 define <2 x i64> @commute_fold_vpcomuq(<2 x i64>* %a0, <2 x i64> %a1) { 91 ; X32-LABEL: commute_fold_vpcomuq: 92 ; X32: # %bb.0: 93 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 94 ; X32-NEXT: vpcomnequq (%eax), %xmm0, %xmm0 95 ; X32-NEXT: retl 96 ; 97 ; X64-LABEL: commute_fold_vpcomuq: 98 ; X64: # %bb.0: 99 ; X64-NEXT: vpcomnequq (%rdi), %xmm0, %xmm0 100 ; X64-NEXT: retq 101 %1 = load <2 x i64>, <2 x i64>* %a0 102 %2 = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %1, <2 x i64> %a1, i8 5) ; vpcomnequq 103 ret <2 x i64> %2 104 } 105 declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone 106 107 define <8 x i16> @commute_fold_vpcomuw(<8 x i16>* %a0, <8 x i16> %a1) { 108 ; X32-LABEL: commute_fold_vpcomuw: 109 ; X32: # %bb.0: 110 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 111 ; X32-NEXT: vpcomfalseuw (%eax), %xmm0, %xmm0 112 ; X32-NEXT: retl 113 ; 114 ; X64-LABEL: commute_fold_vpcomuw: 115 ; X64: # %bb.0: 116 ; X64-NEXT: vpcomfalseuw (%rdi), %xmm0, %xmm0 117 ; X64-NEXT: retq 118 %1 = load <8 x i16>, <8 x i16>* %a0 119 %2 = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %1, <8 x i16> %a1, i8 6) ; vpcomfalseuw 120 ret <8 x i16> %2 121 } 122 declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone 123 124 define <8 x i16> @commute_fold_vpcomw(<8 x i16>* %a0, <8 x i16> %a1) { 125 ; X32-LABEL: commute_fold_vpcomw: 126 ; X32: # %bb.0: 127 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 128 ; X32-NEXT: vpcomtruew (%eax), %xmm0, %xmm0 129 ; X32-NEXT: retl 130 ; 131 ; X64-LABEL: commute_fold_vpcomw: 132 ; X64: # %bb.0: 133 ; X64-NEXT: vpcomtruew (%rdi), %xmm0, %xmm0 134 ; X64-NEXT: retq 135 %1 = load <8 x i16>, <8 x i16>* %a0 136 %2 = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %1, <8 x i16> %a1, i8 7) ; vpcomtruew 137 ret <8 x i16> %2 138 } 139 declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone 140 141 define <4 x i32> @commute_fold_vpmacsdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32> %a2) { 142 ; X32-LABEL: commute_fold_vpmacsdd: 143 ; X32: # %bb.0: 144 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 145 ; X32-NEXT: vpmacsdd %xmm1, (%eax), %xmm0, %xmm0 146 ; X32-NEXT: retl 147 ; 148 ; X64-LABEL: commute_fold_vpmacsdd: 149 ; X64: # %bb.0: 150 ; X64-NEXT: vpmacsdd %xmm1, (%rdi), %xmm0, %xmm0 151 ; X64-NEXT: retq 152 %1 = load <4 x i32>, <4 x i32>* %a0 153 %2 = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %1, <4 x i32> %a1, <4 x i32> %a2) 154 ret <4 x i32> %2 155 } 156 declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone 157 158 define <2 x i64> @commute_fold_vpmacsdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) { 159 ; X32-LABEL: commute_fold_vpmacsdqh: 160 ; X32: # %bb.0: 161 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 162 ; X32-NEXT: vpmacsdqh %xmm1, (%eax), %xmm0, %xmm0 163 ; X32-NEXT: retl 164 ; 165 ; X64-LABEL: commute_fold_vpmacsdqh: 166 ; X64: # %bb.0: 167 ; X64-NEXT: vpmacsdqh %xmm1, (%rdi), %xmm0, %xmm0 168 ; X64-NEXT: retq 169 %1 = load <4 x i32>, <4 x i32>* %a0 170 %2 = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2) 171 ret <2 x i64> %2 172 } 173 declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 174 175 define <2 x i64> @commute_fold_vpmacsdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) { 176 ; X32-LABEL: commute_fold_vpmacsdql: 177 ; X32: # %bb.0: 178 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 179 ; X32-NEXT: vpmacsdql %xmm1, (%eax), %xmm0, %xmm0 180 ; X32-NEXT: retl 181 ; 182 ; X64-LABEL: commute_fold_vpmacsdql: 183 ; X64: # %bb.0: 184 ; X64-NEXT: vpmacsdql %xmm1, (%rdi), %xmm0, %xmm0 185 ; X64-NEXT: retq 186 %1 = load <4 x i32>, <4 x i32>* %a0 187 %2 = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2) 188 ret <2 x i64> %2 189 } 190 declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 191 192 define <4 x i32> @commute_fold_vpmacssdd(<4 x i32>* %a0, <4 x i32> %a1, <4 x i32> %a2) { 193 ; X32-LABEL: commute_fold_vpmacssdd: 194 ; X32: # %bb.0: 195 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 196 ; X32-NEXT: vpmacssdd %xmm1, (%eax), %xmm0, %xmm0 197 ; X32-NEXT: retl 198 ; 199 ; X64-LABEL: commute_fold_vpmacssdd: 200 ; X64: # %bb.0: 201 ; X64-NEXT: vpmacssdd %xmm1, (%rdi), %xmm0, %xmm0 202 ; X64-NEXT: retq 203 %1 = load <4 x i32>, <4 x i32>* %a0 204 %2 = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %1, <4 x i32> %a1, <4 x i32> %a2) 205 ret <4 x i32> %2 206 } 207 declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone 208 209 define <2 x i64> @commute_fold_vpmacssdqh(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) { 210 ; X32-LABEL: commute_fold_vpmacssdqh: 211 ; X32: # %bb.0: 212 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 213 ; X32-NEXT: vpmacssdqh %xmm1, (%eax), %xmm0, %xmm0 214 ; X32-NEXT: retl 215 ; 216 ; X64-LABEL: commute_fold_vpmacssdqh: 217 ; X64: # %bb.0: 218 ; X64-NEXT: vpmacssdqh %xmm1, (%rdi), %xmm0, %xmm0 219 ; X64-NEXT: retq 220 %1 = load <4 x i32>, <4 x i32>* %a0 221 %2 = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2) 222 ret <2 x i64> %2 223 } 224 declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 225 226 define <2 x i64> @commute_fold_vpmacssdql(<4 x i32>* %a0, <4 x i32> %a1, <2 x i64> %a2) { 227 ; X32-LABEL: commute_fold_vpmacssdql: 228 ; X32: # %bb.0: 229 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 230 ; X32-NEXT: vpmacssdql %xmm1, (%eax), %xmm0, %xmm0 231 ; X32-NEXT: retl 232 ; 233 ; X64-LABEL: commute_fold_vpmacssdql: 234 ; X64: # %bb.0: 235 ; X64-NEXT: vpmacssdql %xmm1, (%rdi), %xmm0, %xmm0 236 ; X64-NEXT: retq 237 %1 = load <4 x i32>, <4 x i32>* %a0 238 %2 = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %1, <4 x i32> %a1, <2 x i64> %a2) 239 ret <2 x i64> %2 240 } 241 declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 242 243 define <4 x i32> @commute_fold_vpmacsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) { 244 ; X32-LABEL: commute_fold_vpmacsswd: 245 ; X32: # %bb.0: 246 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 247 ; X32-NEXT: vpmacsswd %xmm1, (%eax), %xmm0, %xmm0 248 ; X32-NEXT: retl 249 ; 250 ; X64-LABEL: commute_fold_vpmacsswd: 251 ; X64: # %bb.0: 252 ; X64-NEXT: vpmacsswd %xmm1, (%rdi), %xmm0, %xmm0 253 ; X64-NEXT: retq 254 %1 = load <8 x i16>, <8 x i16>* %a0 255 %2 = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2) 256 ret <4 x i32> %2 257 } 258 declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 259 260 define <8 x i16> @commute_fold_vpmacssww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16> %a2) { 261 ; X32-LABEL: commute_fold_vpmacssww: 262 ; X32: # %bb.0: 263 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 264 ; X32-NEXT: vpmacssww %xmm1, (%eax), %xmm0, %xmm0 265 ; X32-NEXT: retl 266 ; 267 ; X64-LABEL: commute_fold_vpmacssww: 268 ; X64: # %bb.0: 269 ; X64-NEXT: vpmacssww %xmm1, (%rdi), %xmm0, %xmm0 270 ; X64-NEXT: retq 271 %1 = load <8 x i16>, <8 x i16>* %a0 272 %2 = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %1, <8 x i16> %a1, <8 x i16> %a2) 273 ret <8 x i16> %2 274 } 275 declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone 276 277 define <4 x i32> @commute_fold_vpmacswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) { 278 ; X32-LABEL: commute_fold_vpmacswd: 279 ; X32: # %bb.0: 280 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 281 ; X32-NEXT: vpmacswd %xmm1, (%eax), %xmm0, %xmm0 282 ; X32-NEXT: retl 283 ; 284 ; X64-LABEL: commute_fold_vpmacswd: 285 ; X64: # %bb.0: 286 ; X64-NEXT: vpmacswd %xmm1, (%rdi), %xmm0, %xmm0 287 ; X64-NEXT: retq 288 %1 = load <8 x i16>, <8 x i16>* %a0 289 %2 = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2) 290 ret <4 x i32> %2 291 } 292 declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 293 294 define <8 x i16> @commute_fold_vpmacsww(<8 x i16>* %a0, <8 x i16> %a1, <8 x i16> %a2) { 295 ; X32-LABEL: commute_fold_vpmacsww: 296 ; X32: # %bb.0: 297 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 298 ; X32-NEXT: vpmacsww %xmm1, (%eax), %xmm0, %xmm0 299 ; X32-NEXT: retl 300 ; 301 ; X64-LABEL: commute_fold_vpmacsww: 302 ; X64: # %bb.0: 303 ; X64-NEXT: vpmacsww %xmm1, (%rdi), %xmm0, %xmm0 304 ; X64-NEXT: retq 305 %1 = load <8 x i16>, <8 x i16>* %a0 306 %2 = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %1, <8 x i16> %a1, <8 x i16> %a2) 307 ret <8 x i16> %2 308 } 309 declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone 310 311 define <4 x i32> @commute_fold_vpmadcsswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) { 312 ; X32-LABEL: commute_fold_vpmadcsswd: 313 ; X32: # %bb.0: 314 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 315 ; X32-NEXT: vpmadcsswd %xmm1, (%eax), %xmm0, %xmm0 316 ; X32-NEXT: retl 317 ; 318 ; X64-LABEL: commute_fold_vpmadcsswd: 319 ; X64: # %bb.0: 320 ; X64-NEXT: vpmadcsswd %xmm1, (%rdi), %xmm0, %xmm0 321 ; X64-NEXT: retq 322 %1 = load <8 x i16>, <8 x i16>* %a0 323 %2 = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2) 324 ret <4 x i32> %2 325 } 326 declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 327 328 define <4 x i32> @commute_fold_vpmadcswd(<8 x i16>* %a0, <8 x i16> %a1, <4 x i32> %a2) { 329 ; X32-LABEL: commute_fold_vpmadcswd: 330 ; X32: # %bb.0: 331 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 332 ; X32-NEXT: vpmadcswd %xmm1, (%eax), %xmm0, %xmm0 333 ; X32-NEXT: retl 334 ; 335 ; X64-LABEL: commute_fold_vpmadcswd: 336 ; X64: # %bb.0: 337 ; X64-NEXT: vpmadcswd %xmm1, (%rdi), %xmm0, %xmm0 338 ; X64-NEXT: retq 339 %1 = load <8 x i16>, <8 x i16>* %a0 340 %2 = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %1, <8 x i16> %a1, <4 x i32> %a2) 341 ret <4 x i32> %2 342 } 343 declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 344