1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -fast-isel -mtriple=i686-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s --check-prefix=ALL --check-prefix=X32 3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+xop | FileCheck %s --check-prefix=ALL --check-prefix=X64 4 5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/xop-builtins.c 6 7 define <2 x i64> @test_mm_maccs_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 8 ; X32-LABEL: test_mm_maccs_epi16: 9 ; X32: # BB#0: 10 ; X32-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 11 ; X32-NEXT: retl 12 ; 13 ; X64-LABEL: test_mm_maccs_epi16: 14 ; X64: # BB#0: 15 ; X64-NEXT: vpmacssww %xmm2, %xmm1, %xmm0, %xmm0 16 ; X64-NEXT: retq 17 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 18 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 19 %arg2 = bitcast <2 x i64> %a2 to <8 x i16> 20 %res = call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %arg0, <8 x i16> %arg1, <8 x i16> %arg2) 21 %bc = bitcast <8 x i16> %res to <2 x i64> 22 ret <2 x i64> %bc 23 } 24 declare <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone 25 26 define <2 x i64> @test_mm_macc_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 27 ; X32-LABEL: test_mm_macc_epi16: 28 ; X32: # BB#0: 29 ; X32-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 30 ; X32-NEXT: retl 31 ; 32 ; X64-LABEL: test_mm_macc_epi16: 33 ; X64: # BB#0: 34 ; X64-NEXT: vpmacsww %xmm2, %xmm1, %xmm0, %xmm0 35 ; X64-NEXT: retq 36 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 37 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 38 %arg2 = bitcast <2 x i64> %a2 to <8 x i16> 39 %res = call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %arg0, <8 x i16> %arg1, <8 x i16> %arg2) 40 %bc = bitcast <8 x i16> %res to <2 x i64> 41 ret <2 x i64> %bc 42 } 43 declare <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16>, <8 x i16>, <8 x i16>) nounwind readnone 44 45 define <2 x i64> @test_mm_maccsd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 46 ; X32-LABEL: test_mm_maccsd_epi16: 47 ; X32: # BB#0: 48 ; X32-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 49 ; X32-NEXT: retl 50 ; 51 ; X64-LABEL: test_mm_maccsd_epi16: 52 ; X64: # BB#0: 53 ; X64-NEXT: vpmacsswd %xmm2, %xmm1, %xmm0, %xmm0 54 ; X64-NEXT: retq 55 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 56 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 57 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 58 %res = call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2) 59 %bc = bitcast <4 x i32> %res to <2 x i64> 60 ret <2 x i64> %bc 61 } 62 declare <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 63 64 define <2 x i64> @test_mm_maccd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 65 ; X32-LABEL: test_mm_maccd_epi16: 66 ; X32: # BB#0: 67 ; X32-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 68 ; X32-NEXT: retl 69 ; 70 ; X64-LABEL: test_mm_maccd_epi16: 71 ; X64: # BB#0: 72 ; X64-NEXT: vpmacswd %xmm2, %xmm1, %xmm0, %xmm0 73 ; X64-NEXT: retq 74 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 75 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 76 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 77 %res = call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2) 78 %bc = bitcast <4 x i32> %res to <2 x i64> 79 ret <2 x i64> %bc 80 } 81 declare <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 82 83 define <2 x i64> @test_mm_maccs_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 84 ; X32-LABEL: test_mm_maccs_epi32: 85 ; X32: # BB#0: 86 ; X32-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 87 ; X32-NEXT: retl 88 ; 89 ; X64-LABEL: test_mm_maccs_epi32: 90 ; X64: # BB#0: 91 ; X64-NEXT: vpmacssdd %xmm2, %xmm1, %xmm0, %xmm0 92 ; X64-NEXT: retq 93 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 94 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 95 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 96 %res = call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2) 97 %bc = bitcast <4 x i32> %res to <2 x i64> 98 ret <2 x i64> %bc 99 } 100 declare <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone 101 102 define <2 x i64> @test_mm_macc_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 103 ; X32-LABEL: test_mm_macc_epi32: 104 ; X32: # BB#0: 105 ; X32-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 106 ; X32-NEXT: retl 107 ; 108 ; X64-LABEL: test_mm_macc_epi32: 109 ; X64: # BB#0: 110 ; X64-NEXT: vpmacsdd %xmm2, %xmm1, %xmm0, %xmm0 111 ; X64-NEXT: retq 112 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 113 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 114 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 115 %res = call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %arg0, <4 x i32> %arg1, <4 x i32> %arg2) 116 %bc = bitcast <4 x i32> %res to <2 x i64> 117 ret <2 x i64> %bc 118 } 119 declare <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32>, <4 x i32>, <4 x i32>) nounwind readnone 120 121 define <2 x i64> @test_mm_maccslo_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 122 ; X32-LABEL: test_mm_maccslo_epi32: 123 ; X32: # BB#0: 124 ; X32-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 125 ; X32-NEXT: retl 126 ; 127 ; X64-LABEL: test_mm_maccslo_epi32: 128 ; X64: # BB#0: 129 ; X64-NEXT: vpmacssdql %xmm2, %xmm1, %xmm0, %xmm0 130 ; X64-NEXT: retq 131 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 132 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 133 %res = call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2) 134 ret <2 x i64> %res 135 } 136 declare <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 137 138 define <2 x i64> @test_mm_macclo_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 139 ; X32-LABEL: test_mm_macclo_epi32: 140 ; X32: # BB#0: 141 ; X32-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 142 ; X32-NEXT: retl 143 ; 144 ; X64-LABEL: test_mm_macclo_epi32: 145 ; X64: # BB#0: 146 ; X64-NEXT: vpmacsdql %xmm2, %xmm1, %xmm0, %xmm0 147 ; X64-NEXT: retq 148 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 149 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 150 %res = call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2) 151 ret <2 x i64> %res 152 } 153 declare <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 154 155 define <2 x i64> @test_mm_maccshi_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 156 ; X32-LABEL: test_mm_maccshi_epi32: 157 ; X32: # BB#0: 158 ; X32-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 159 ; X32-NEXT: retl 160 ; 161 ; X64-LABEL: test_mm_maccshi_epi32: 162 ; X64: # BB#0: 163 ; X64-NEXT: vpmacssdqh %xmm2, %xmm1, %xmm0, %xmm0 164 ; X64-NEXT: retq 165 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 166 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 167 %res = call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2) 168 ret <2 x i64> %res 169 } 170 declare <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 171 172 define <2 x i64> @test_mm_macchi_epi32(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 173 ; X32-LABEL: test_mm_macchi_epi32: 174 ; X32: # BB#0: 175 ; X32-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 176 ; X32-NEXT: retl 177 ; 178 ; X64-LABEL: test_mm_macchi_epi32: 179 ; X64: # BB#0: 180 ; X64-NEXT: vpmacsdqh %xmm2, %xmm1, %xmm0, %xmm0 181 ; X64-NEXT: retq 182 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 183 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 184 %res = call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %arg0, <4 x i32> %arg1, <2 x i64> %a2) 185 ret <2 x i64> %res 186 } 187 declare <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32>, <4 x i32>, <2 x i64>) nounwind readnone 188 189 define <2 x i64> @test_mm_maddsd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 190 ; X32-LABEL: test_mm_maddsd_epi16: 191 ; X32: # BB#0: 192 ; X32-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 193 ; X32-NEXT: retl 194 ; 195 ; X64-LABEL: test_mm_maddsd_epi16: 196 ; X64: # BB#0: 197 ; X64-NEXT: vpmadcsswd %xmm2, %xmm1, %xmm0, %xmm0 198 ; X64-NEXT: retq 199 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 200 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 201 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 202 %res = call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2) 203 %bc = bitcast <4 x i32> %res to <2 x i64> 204 ret <2 x i64> %bc 205 } 206 declare <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 207 208 define <2 x i64> @test_mm_maddd_epi16(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) nounwind { 209 ; X32-LABEL: test_mm_maddd_epi16: 210 ; X32: # BB#0: 211 ; X32-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 212 ; X32-NEXT: retl 213 ; 214 ; X64-LABEL: test_mm_maddd_epi16: 215 ; X64: # BB#0: 216 ; X64-NEXT: vpmadcswd %xmm2, %xmm1, %xmm0, %xmm0 217 ; X64-NEXT: retq 218 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 219 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 220 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 221 %res = call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %arg0, <8 x i16> %arg1, <4 x i32> %arg2) 222 %bc = bitcast <4 x i32> %res to <2 x i64> 223 ret <2 x i64> %bc 224 } 225 declare <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16>, <8 x i16>, <4 x i32>) nounwind readnone 226 227 define <2 x i64> @test_mm_haddw_epi8(<2 x i64> %a0) { 228 ; X32-LABEL: test_mm_haddw_epi8: 229 ; X32: # BB#0: 230 ; X32-NEXT: vphaddbw %xmm0, %xmm0 231 ; X32-NEXT: retl 232 ; 233 ; X64-LABEL: test_mm_haddw_epi8: 234 ; X64: # BB#0: 235 ; X64-NEXT: vphaddbw %xmm0, %xmm0 236 ; X64-NEXT: retq 237 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 238 %res = call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %arg0) 239 %bc = bitcast <8 x i16> %res to <2 x i64> 240 ret <2 x i64> %bc 241 } 242 declare <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8>) nounwind readnone 243 244 define <2 x i64> @test_mm_haddd_epi8(<2 x i64> %a0) { 245 ; X32-LABEL: test_mm_haddd_epi8: 246 ; X32: # BB#0: 247 ; X32-NEXT: vphaddbd %xmm0, %xmm0 248 ; X32-NEXT: retl 249 ; 250 ; X64-LABEL: test_mm_haddd_epi8: 251 ; X64: # BB#0: 252 ; X64-NEXT: vphaddbd %xmm0, %xmm0 253 ; X64-NEXT: retq 254 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 255 %res = call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %arg0) 256 %bc = bitcast <4 x i32> %res to <2 x i64> 257 ret <2 x i64> %bc 258 } 259 declare <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8>) nounwind readnone 260 261 define <2 x i64> @test_mm_haddq_epi8(<2 x i64> %a0) { 262 ; X32-LABEL: test_mm_haddq_epi8: 263 ; X32: # BB#0: 264 ; X32-NEXT: vphaddbq %xmm0, %xmm0 265 ; X32-NEXT: retl 266 ; 267 ; X64-LABEL: test_mm_haddq_epi8: 268 ; X64: # BB#0: 269 ; X64-NEXT: vphaddbq %xmm0, %xmm0 270 ; X64-NEXT: retq 271 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 272 %res = call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %arg0) 273 ret <2 x i64> %res 274 } 275 declare <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8>) nounwind readnone 276 277 define <2 x i64> @test_mm_haddd_epi16(<2 x i64> %a0) { 278 ; X32-LABEL: test_mm_haddd_epi16: 279 ; X32: # BB#0: 280 ; X32-NEXT: vphaddwd %xmm0, %xmm0 281 ; X32-NEXT: retl 282 ; 283 ; X64-LABEL: test_mm_haddd_epi16: 284 ; X64: # BB#0: 285 ; X64-NEXT: vphaddwd %xmm0, %xmm0 286 ; X64-NEXT: retq 287 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 288 %res = call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %arg0) 289 %bc = bitcast <4 x i32> %res to <2 x i64> 290 ret <2 x i64> %bc 291 } 292 declare <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16>) nounwind readnone 293 294 define <2 x i64> @test_mm_haddq_epi16(<2 x i64> %a0) { 295 ; X32-LABEL: test_mm_haddq_epi16: 296 ; X32: # BB#0: 297 ; X32-NEXT: vphaddwq %xmm0, %xmm0 298 ; X32-NEXT: retl 299 ; 300 ; X64-LABEL: test_mm_haddq_epi16: 301 ; X64: # BB#0: 302 ; X64-NEXT: vphaddwq %xmm0, %xmm0 303 ; X64-NEXT: retq 304 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 305 %res = call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %arg0) 306 ret <2 x i64> %res 307 } 308 declare <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16>) nounwind readnone 309 310 define <2 x i64> @test_mm_haddq_epi32(<2 x i64> %a0) { 311 ; X32-LABEL: test_mm_haddq_epi32: 312 ; X32: # BB#0: 313 ; X32-NEXT: vphadddq %xmm0, %xmm0 314 ; X32-NEXT: retl 315 ; 316 ; X64-LABEL: test_mm_haddq_epi32: 317 ; X64: # BB#0: 318 ; X64-NEXT: vphadddq %xmm0, %xmm0 319 ; X64-NEXT: retq 320 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 321 %res = call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %arg0) 322 ret <2 x i64> %res 323 } 324 declare <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32>) nounwind readnone 325 326 define <2 x i64> @test_mm_haddw_epu8(<2 x i64> %a0) { 327 ; X32-LABEL: test_mm_haddw_epu8: 328 ; X32: # BB#0: 329 ; X32-NEXT: vphaddubw %xmm0, %xmm0 330 ; X32-NEXT: retl 331 ; 332 ; X64-LABEL: test_mm_haddw_epu8: 333 ; X64: # BB#0: 334 ; X64-NEXT: vphaddubw %xmm0, %xmm0 335 ; X64-NEXT: retq 336 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 337 %res = call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %arg0) 338 %bc = bitcast <8 x i16> %res to <2 x i64> 339 ret <2 x i64> %bc 340 } 341 declare <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8>) nounwind readnone 342 343 define <2 x i64> @test_mm_haddd_epu8(<2 x i64> %a0) { 344 ; X32-LABEL: test_mm_haddd_epu8: 345 ; X32: # BB#0: 346 ; X32-NEXT: vphaddubd %xmm0, %xmm0 347 ; X32-NEXT: retl 348 ; 349 ; X64-LABEL: test_mm_haddd_epu8: 350 ; X64: # BB#0: 351 ; X64-NEXT: vphaddubd %xmm0, %xmm0 352 ; X64-NEXT: retq 353 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 354 %res = call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %arg0) 355 %bc = bitcast <4 x i32> %res to <2 x i64> 356 ret <2 x i64> %bc 357 } 358 declare <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8>) nounwind readnone 359 360 define <2 x i64> @test_mm_haddq_epu8(<2 x i64> %a0) { 361 ; X32-LABEL: test_mm_haddq_epu8: 362 ; X32: # BB#0: 363 ; X32-NEXT: vphaddubq %xmm0, %xmm0 364 ; X32-NEXT: retl 365 ; 366 ; X64-LABEL: test_mm_haddq_epu8: 367 ; X64: # BB#0: 368 ; X64-NEXT: vphaddubq %xmm0, %xmm0 369 ; X64-NEXT: retq 370 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 371 %res = call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %arg0) 372 ret <2 x i64> %res 373 } 374 declare <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8>) nounwind readnone 375 376 define <2 x i64> @test_mm_haddd_epu16(<2 x i64> %a0) { 377 ; X32-LABEL: test_mm_haddd_epu16: 378 ; X32: # BB#0: 379 ; X32-NEXT: vphadduwd %xmm0, %xmm0 380 ; X32-NEXT: retl 381 ; 382 ; X64-LABEL: test_mm_haddd_epu16: 383 ; X64: # BB#0: 384 ; X64-NEXT: vphadduwd %xmm0, %xmm0 385 ; X64-NEXT: retq 386 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 387 %res = call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %arg0) 388 %bc = bitcast <4 x i32> %res to <2 x i64> 389 ret <2 x i64> %bc 390 } 391 declare <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16>) nounwind readnone 392 393 394 define <2 x i64> @test_mm_haddq_epu16(<2 x i64> %a0) { 395 ; X32-LABEL: test_mm_haddq_epu16: 396 ; X32: # BB#0: 397 ; X32-NEXT: vphadduwq %xmm0, %xmm0 398 ; X32-NEXT: retl 399 ; 400 ; X64-LABEL: test_mm_haddq_epu16: 401 ; X64: # BB#0: 402 ; X64-NEXT: vphadduwq %xmm0, %xmm0 403 ; X64-NEXT: retq 404 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 405 %res = call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %arg0) 406 ret <2 x i64> %res 407 } 408 declare <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16>) nounwind readnone 409 410 define <2 x i64> @test_mm_haddq_epu32(<2 x i64> %a0) { 411 ; X32-LABEL: test_mm_haddq_epu32: 412 ; X32: # BB#0: 413 ; X32-NEXT: vphaddudq %xmm0, %xmm0 414 ; X32-NEXT: retl 415 ; 416 ; X64-LABEL: test_mm_haddq_epu32: 417 ; X64: # BB#0: 418 ; X64-NEXT: vphaddudq %xmm0, %xmm0 419 ; X64-NEXT: retq 420 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 421 %res = call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %arg0) 422 ret <2 x i64> %res 423 } 424 declare <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32>) nounwind readnone 425 426 define <2 x i64> @test_mm_hsubw_epi8(<2 x i64> %a0) { 427 ; X32-LABEL: test_mm_hsubw_epi8: 428 ; X32: # BB#0: 429 ; X32-NEXT: vphsubbw %xmm0, %xmm0 430 ; X32-NEXT: retl 431 ; 432 ; X64-LABEL: test_mm_hsubw_epi8: 433 ; X64: # BB#0: 434 ; X64-NEXT: vphsubbw %xmm0, %xmm0 435 ; X64-NEXT: retq 436 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 437 %res = call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %arg0) 438 %bc = bitcast <8 x i16> %res to <2 x i64> 439 ret <2 x i64> %bc 440 } 441 declare <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8>) nounwind readnone 442 443 define <2 x i64> @test_mm_hsubd_epi16(<2 x i64> %a0) { 444 ; X32-LABEL: test_mm_hsubd_epi16: 445 ; X32: # BB#0: 446 ; X32-NEXT: vphsubwd %xmm0, %xmm0 447 ; X32-NEXT: retl 448 ; 449 ; X64-LABEL: test_mm_hsubd_epi16: 450 ; X64: # BB#0: 451 ; X64-NEXT: vphsubwd %xmm0, %xmm0 452 ; X64-NEXT: retq 453 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 454 %res = call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %arg0) 455 %bc = bitcast <4 x i32> %res to <2 x i64> 456 ret <2 x i64> %bc 457 } 458 declare <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16>) nounwind readnone 459 460 define <2 x i64> @test_mm_hsubq_epi32(<2 x i64> %a0) { 461 ; X32-LABEL: test_mm_hsubq_epi32: 462 ; X32: # BB#0: 463 ; X32-NEXT: vphsubdq %xmm0, %xmm0 464 ; X32-NEXT: retl 465 ; 466 ; X64-LABEL: test_mm_hsubq_epi32: 467 ; X64: # BB#0: 468 ; X64-NEXT: vphsubdq %xmm0, %xmm0 469 ; X64-NEXT: retq 470 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 471 %res = call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %arg0) 472 ret <2 x i64> %res 473 } 474 declare <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32>) nounwind readnone 475 476 define <2 x i64> @test_mm_cmov_si128(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { 477 ; X32-LABEL: test_mm_cmov_si128: 478 ; X32: # BB#0: 479 ; X32-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 480 ; X32-NEXT: vpxor %xmm3, %xmm2, %xmm3 481 ; X32-NEXT: vpand %xmm2, %xmm0, %xmm0 482 ; X32-NEXT: vpand %xmm3, %xmm1, %xmm1 483 ; X32-NEXT: vpor %xmm1, %xmm0, %xmm0 484 ; X32-NEXT: retl 485 ; 486 ; X64-LABEL: test_mm_cmov_si128: 487 ; X64: # BB#0: 488 ; X64-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 489 ; X64-NEXT: vpxor %xmm3, %xmm2, %xmm3 490 ; X64-NEXT: vpand %xmm2, %xmm0, %xmm0 491 ; X64-NEXT: vpand %xmm3, %xmm1, %xmm1 492 ; X64-NEXT: vpor %xmm1, %xmm0, %xmm0 493 ; X64-NEXT: retq 494 %res = call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) 495 ret <2 x i64> %res 496 } 497 declare <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64>, <2 x i64>, <2 x i64>) nounwind readnone 498 499 define <4 x i64> @test_mm256_cmov_si256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) { 500 ; X32-LABEL: test_mm256_cmov_si256: 501 ; X32: # BB#0: 502 ; X32-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 503 ; X32-NEXT: retl 504 ; 505 ; X64-LABEL: test_mm256_cmov_si256: 506 ; X64: # BB#0: 507 ; X64-NEXT: vpcmov %ymm2, %ymm1, %ymm0, %ymm0 508 ; X64-NEXT: retq 509 %res = call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %a0, <4 x i64> %a1, <4 x i64> %a2) 510 ret <4 x i64> %res 511 } 512 declare <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64>, <4 x i64>, <4 x i64>) nounwind readnone 513 514 define <2 x i64> @test_mm_perm_epi8(<2 x i64> %a0, <2 x i64> %a1, <2 x i64> %a2) { 515 ; X32-LABEL: test_mm_perm_epi8: 516 ; X32: # BB#0: 517 ; X32-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 518 ; X32-NEXT: retl 519 ; 520 ; X64-LABEL: test_mm_perm_epi8: 521 ; X64: # BB#0: 522 ; X64-NEXT: vpperm %xmm2, %xmm1, %xmm0, %xmm0 523 ; X64-NEXT: retq 524 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 525 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 526 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 527 %res = call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %arg0, <16 x i8> %arg1, <16 x i8> %arg2) 528 %bc = bitcast <16 x i8> %res to <2 x i64> 529 ret <2 x i64> %bc 530 } 531 declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone 532 533 define <2 x i64> @test_mm_rot_epi8(<2 x i64> %a0, <2 x i64> %a1) { 534 ; X32-LABEL: test_mm_rot_epi8: 535 ; X32: # BB#0: 536 ; X32-NEXT: vprotb %xmm1, %xmm0, %xmm0 537 ; X32-NEXT: retl 538 ; 539 ; X64-LABEL: test_mm_rot_epi8: 540 ; X64: # BB#0: 541 ; X64-NEXT: vprotb %xmm1, %xmm0, %xmm0 542 ; X64-NEXT: retq 543 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 544 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 545 %res = call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %arg0, <16 x i8> %arg1) 546 %bc = bitcast <16 x i8> %res to <2 x i64> 547 ret <2 x i64> %bc 548 } 549 declare <16 x i8> @llvm.x86.xop.vprotb(<16 x i8>, <16 x i8>) nounwind readnone 550 551 define <2 x i64> @test_mm_rot_epi16(<2 x i64> %a0, <2 x i64> %a1) { 552 ; X32-LABEL: test_mm_rot_epi16: 553 ; X32: # BB#0: 554 ; X32-NEXT: vprotw %xmm1, %xmm0, %xmm0 555 ; X32-NEXT: retl 556 ; 557 ; X64-LABEL: test_mm_rot_epi16: 558 ; X64: # BB#0: 559 ; X64-NEXT: vprotw %xmm1, %xmm0, %xmm0 560 ; X64-NEXT: retq 561 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 562 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 563 %res = call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %arg0, <8 x i16> %arg1) 564 %bc = bitcast <8 x i16> %res to <2 x i64> 565 ret <2 x i64> %bc 566 } 567 declare <8 x i16> @llvm.x86.xop.vprotw(<8 x i16>, <8 x i16>) nounwind readnone 568 569 define <2 x i64> @test_mm_rot_epi32(<2 x i64> %a0, <2 x i64> %a1) { 570 ; X32-LABEL: test_mm_rot_epi32: 571 ; X32: # BB#0: 572 ; X32-NEXT: vprotd %xmm1, %xmm0, %xmm0 573 ; X32-NEXT: retl 574 ; 575 ; X64-LABEL: test_mm_rot_epi32: 576 ; X64: # BB#0: 577 ; X64-NEXT: vprotd %xmm1, %xmm0, %xmm0 578 ; X64-NEXT: retq 579 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 580 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 581 %res = call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %arg0, <4 x i32> %arg1) 582 %bc = bitcast <4 x i32> %res to <2 x i64> 583 ret <2 x i64> %bc 584 } 585 declare <4 x i32> @llvm.x86.xop.vprotd(<4 x i32>, <4 x i32>) nounwind readnone 586 587 define <2 x i64> @test_mm_rot_epi64(<2 x i64> %a0, <2 x i64> %a1) { 588 ; X32-LABEL: test_mm_rot_epi64: 589 ; X32: # BB#0: 590 ; X32-NEXT: vprotq %xmm1, %xmm0, %xmm0 591 ; X32-NEXT: retl 592 ; 593 ; X64-LABEL: test_mm_rot_epi64: 594 ; X64: # BB#0: 595 ; X64-NEXT: vprotq %xmm1, %xmm0, %xmm0 596 ; X64-NEXT: retq 597 %res = call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %a0, <2 x i64> %a1) 598 ret <2 x i64> %res 599 } 600 declare <2 x i64> @llvm.x86.xop.vprotq(<2 x i64>, <2 x i64>) nounwind readnone 601 602 define <2 x i64> @test_mm_roti_epi8(<2 x i64> %a0) { 603 ; X32-LABEL: test_mm_roti_epi8: 604 ; X32: # BB#0: 605 ; X32-NEXT: vprotb $1, %xmm0, %xmm0 606 ; X32-NEXT: retl 607 ; 608 ; X64-LABEL: test_mm_roti_epi8: 609 ; X64: # BB#0: 610 ; X64-NEXT: vprotb $1, %xmm0, %xmm0 611 ; X64-NEXT: retq 612 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 613 %res = call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %arg0, i8 1) 614 %bc = bitcast <16 x i8> %res to <2 x i64> 615 ret <2 x i64> %bc 616 } 617 declare <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8>, i8) nounwind readnone 618 619 define <2 x i64> @test_mm_roti_epi16(<2 x i64> %a0) { 620 ; X32-LABEL: test_mm_roti_epi16: 621 ; X32: # BB#0: 622 ; X32-NEXT: vprotw $50, %xmm0, %xmm0 623 ; X32-NEXT: retl 624 ; 625 ; X64-LABEL: test_mm_roti_epi16: 626 ; X64: # BB#0: 627 ; X64-NEXT: vprotw $50, %xmm0, %xmm0 628 ; X64-NEXT: retq 629 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 630 %res = call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %arg0, i8 50) 631 %bc = bitcast <8 x i16> %res to <2 x i64> 632 ret <2 x i64> %bc 633 } 634 declare <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16>, i8) nounwind readnone 635 636 define <2 x i64> @test_mm_roti_epi32(<2 x i64> %a0) { 637 ; X32-LABEL: test_mm_roti_epi32: 638 ; X32: # BB#0: 639 ; X32-NEXT: vprotd $226, %xmm0, %xmm0 640 ; X32-NEXT: retl 641 ; 642 ; X64-LABEL: test_mm_roti_epi32: 643 ; X64: # BB#0: 644 ; X64-NEXT: vprotd $226, %xmm0, %xmm0 645 ; X64-NEXT: retq 646 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 647 %res = call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %arg0, i8 -30) 648 %bc = bitcast <4 x i32> %res to <2 x i64> 649 ret <2 x i64> %bc 650 } 651 declare <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32>, i8) nounwind readnone 652 653 define <2 x i64> @test_mm_roti_epi64(<2 x i64> %a0) { 654 ; X32-LABEL: test_mm_roti_epi64: 655 ; X32: # BB#0: 656 ; X32-NEXT: vprotq $100, %xmm0, %xmm0 657 ; X32-NEXT: retl 658 ; 659 ; X64-LABEL: test_mm_roti_epi64: 660 ; X64: # BB#0: 661 ; X64-NEXT: vprotq $100, %xmm0, %xmm0 662 ; X64-NEXT: retq 663 %res = call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %a0, i8 100) 664 ret <2 x i64> %res 665 } 666 declare <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64>, i8) nounwind readnone 667 668 define <2 x i64> @test_mm_shl_epi8(<2 x i64> %a0, <2 x i64> %a1) { 669 ; X32-LABEL: test_mm_shl_epi8: 670 ; X32: # BB#0: 671 ; X32-NEXT: vpshlb %xmm1, %xmm0, %xmm0 672 ; X32-NEXT: retl 673 ; 674 ; X64-LABEL: test_mm_shl_epi8: 675 ; X64: # BB#0: 676 ; X64-NEXT: vpshlb %xmm1, %xmm0, %xmm0 677 ; X64-NEXT: retq 678 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 679 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 680 %res = call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %arg0, <16 x i8> %arg1) 681 %bc = bitcast <16 x i8> %res to <2 x i64> 682 ret <2 x i64> %bc 683 } 684 declare <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8>, <16 x i8>) nounwind readnone 685 686 define <2 x i64> @test_mm_shl_epi16(<2 x i64> %a0, <2 x i64> %a1) { 687 ; X32-LABEL: test_mm_shl_epi16: 688 ; X32: # BB#0: 689 ; X32-NEXT: vpshlw %xmm1, %xmm0, %xmm0 690 ; X32-NEXT: retl 691 ; 692 ; X64-LABEL: test_mm_shl_epi16: 693 ; X64: # BB#0: 694 ; X64-NEXT: vpshlw %xmm1, %xmm0, %xmm0 695 ; X64-NEXT: retq 696 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 697 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 698 %res = call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %arg0, <8 x i16> %arg1) 699 %bc = bitcast <8 x i16> %res to <2 x i64> 700 ret <2 x i64> %bc 701 } 702 declare <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16>, <8 x i16>) nounwind readnone 703 704 define <2 x i64> @test_mm_shl_epi32(<2 x i64> %a0, <2 x i64> %a1) { 705 ; X32-LABEL: test_mm_shl_epi32: 706 ; X32: # BB#0: 707 ; X32-NEXT: vpshld %xmm1, %xmm0, %xmm0 708 ; X32-NEXT: retl 709 ; 710 ; X64-LABEL: test_mm_shl_epi32: 711 ; X64: # BB#0: 712 ; X64-NEXT: vpshld %xmm1, %xmm0, %xmm0 713 ; X64-NEXT: retq 714 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 715 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 716 %res = call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %arg0, <4 x i32> %arg1) 717 %bc = bitcast <4 x i32> %res to <2 x i64> 718 ret <2 x i64> %bc 719 } 720 declare <4 x i32> @llvm.x86.xop.vpshld(<4 x i32>, <4 x i32>) nounwind readnone 721 722 define <2 x i64> @test_mm_shl_epi64(<2 x i64> %a0, <2 x i64> %a1) { 723 ; X32-LABEL: test_mm_shl_epi64: 724 ; X32: # BB#0: 725 ; X32-NEXT: vpshlq %xmm1, %xmm0, %xmm0 726 ; X32-NEXT: retl 727 ; 728 ; X64-LABEL: test_mm_shl_epi64: 729 ; X64: # BB#0: 730 ; X64-NEXT: vpshlq %xmm1, %xmm0, %xmm0 731 ; X64-NEXT: retq 732 %res = call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %a0, <2 x i64> %a1) 733 ret <2 x i64> %res 734 } 735 declare <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64>, <2 x i64>) nounwind readnone 736 737 define <2 x i64> @test_mm_sha_epi8(<2 x i64> %a0, <2 x i64> %a1) { 738 ; X32-LABEL: test_mm_sha_epi8: 739 ; X32: # BB#0: 740 ; X32-NEXT: vpshab %xmm1, %xmm0, %xmm0 741 ; X32-NEXT: retl 742 ; 743 ; X64-LABEL: test_mm_sha_epi8: 744 ; X64: # BB#0: 745 ; X64-NEXT: vpshab %xmm1, %xmm0, %xmm0 746 ; X64-NEXT: retq 747 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 748 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 749 %res = call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %arg0, <16 x i8> %arg1) 750 %bc = bitcast <16 x i8> %res to <2 x i64> 751 ret <2 x i64> %bc 752 } 753 declare <16 x i8> @llvm.x86.xop.vpshab(<16 x i8>, <16 x i8>) nounwind readnone 754 755 define <2 x i64> @test_mm_sha_epi16(<2 x i64> %a0, <2 x i64> %a1) { 756 ; X32-LABEL: test_mm_sha_epi16: 757 ; X32: # BB#0: 758 ; X32-NEXT: vpshaw %xmm1, %xmm0, %xmm0 759 ; X32-NEXT: retl 760 ; 761 ; X64-LABEL: test_mm_sha_epi16: 762 ; X64: # BB#0: 763 ; X64-NEXT: vpshaw %xmm1, %xmm0, %xmm0 764 ; X64-NEXT: retq 765 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 766 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 767 %res = call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %arg0, <8 x i16> %arg1) 768 %bc = bitcast <8 x i16> %res to <2 x i64> 769 ret <2 x i64> %bc 770 } 771 declare <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16>, <8 x i16>) nounwind readnone 772 773 define <2 x i64> @test_mm_sha_epi32(<2 x i64> %a0, <2 x i64> %a1) { 774 ; X32-LABEL: test_mm_sha_epi32: 775 ; X32: # BB#0: 776 ; X32-NEXT: vpshad %xmm1, %xmm0, %xmm0 777 ; X32-NEXT: retl 778 ; 779 ; X64-LABEL: test_mm_sha_epi32: 780 ; X64: # BB#0: 781 ; X64-NEXT: vpshad %xmm1, %xmm0, %xmm0 782 ; X64-NEXT: retq 783 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 784 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 785 %res = call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %arg0, <4 x i32> %arg1) 786 %bc = bitcast <4 x i32> %res to <2 x i64> 787 ret <2 x i64> %bc 788 } 789 declare <4 x i32> @llvm.x86.xop.vpshad(<4 x i32>, <4 x i32>) nounwind readnone 790 791 define <2 x i64> @test_mm_sha_epi64(<2 x i64> %a0, <2 x i64> %a1) { 792 ; X32-LABEL: test_mm_sha_epi64: 793 ; X32: # BB#0: 794 ; X32-NEXT: vpshaq %xmm1, %xmm0, %xmm0 795 ; X32-NEXT: retl 796 ; 797 ; X64-LABEL: test_mm_sha_epi64: 798 ; X64: # BB#0: 799 ; X64-NEXT: vpshaq %xmm1, %xmm0, %xmm0 800 ; X64-NEXT: retq 801 %res = call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %a0, <2 x i64> %a1) 802 ret <2 x i64> %res 803 } 804 declare <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64>, <2 x i64>) nounwind readnone 805 806 define <2 x i64> @test_mm_com_epu8(<2 x i64> %a0, <2 x i64> %a1) { 807 ; X32-LABEL: test_mm_com_epu8: 808 ; X32: # BB#0: 809 ; X32-NEXT: vpcomltub %xmm1, %xmm0, %xmm0 810 ; X32-NEXT: retl 811 ; 812 ; X64-LABEL: test_mm_com_epu8: 813 ; X64: # BB#0: 814 ; X64-NEXT: vpcomltub %xmm1, %xmm0, %xmm0 815 ; X64-NEXT: retq 816 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 817 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 818 %res = call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %arg0, <16 x i8> %arg1, i8 0) 819 %bc = bitcast <16 x i8> %res to <2 x i64> 820 ret <2 x i64> %bc 821 } 822 declare <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8>, <16 x i8>, i8) nounwind readnone 823 824 define <2 x i64> @test_mm_com_epu16(<2 x i64> %a0, <2 x i64> %a1) { 825 ; X32-LABEL: test_mm_com_epu16: 826 ; X32: # BB#0: 827 ; X32-NEXT: vpcomltuw %xmm1, %xmm0, %xmm0 828 ; X32-NEXT: retl 829 ; 830 ; X64-LABEL: test_mm_com_epu16: 831 ; X64: # BB#0: 832 ; X64-NEXT: vpcomltuw %xmm1, %xmm0, %xmm0 833 ; X64-NEXT: retq 834 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 835 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 836 %res = call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %arg0, <8 x i16> %arg1, i8 0) 837 %bc = bitcast <8 x i16> %res to <2 x i64> 838 ret <2 x i64> %bc 839 } 840 declare <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16>, <8 x i16>, i8) nounwind readnone 841 842 define <2 x i64> @test_mm_com_epu32(<2 x i64> %a0, <2 x i64> %a1) { 843 ; X32-LABEL: test_mm_com_epu32: 844 ; X32: # BB#0: 845 ; X32-NEXT: vpcomltud %xmm1, %xmm0, %xmm0 846 ; X32-NEXT: retl 847 ; 848 ; X64-LABEL: test_mm_com_epu32: 849 ; X64: # BB#0: 850 ; X64-NEXT: vpcomltud %xmm1, %xmm0, %xmm0 851 ; X64-NEXT: retq 852 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 853 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 854 %res = call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %arg0, <4 x i32> %arg1, i8 0) 855 %bc = bitcast <4 x i32> %res to <2 x i64> 856 ret <2 x i64> %bc 857 } 858 declare <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32>, <4 x i32>, i8) nounwind readnone 859 860 define <2 x i64> @test_mm_com_epu64(<2 x i64> %a0, <2 x i64> %a1) { 861 ; X32-LABEL: test_mm_com_epu64: 862 ; X32: # BB#0: 863 ; X32-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0 864 ; X32-NEXT: retl 865 ; 866 ; X64-LABEL: test_mm_com_epu64: 867 ; X64: # BB#0: 868 ; X64-NEXT: vpcomltuq %xmm1, %xmm0, %xmm0 869 ; X64-NEXT: retq 870 %res = call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %a0, <2 x i64> %a1, i8 0) 871 ret <2 x i64> %res 872 } 873 declare <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64>, <2 x i64>, i8) nounwind readnone 874 875 define <2 x i64> @test_mm_com_epi8(<2 x i64> %a0, <2 x i64> %a1) { 876 ; X32-LABEL: test_mm_com_epi8: 877 ; X32: # BB#0: 878 ; X32-NEXT: vpcomltb %xmm1, %xmm0, %xmm0 879 ; X32-NEXT: retl 880 ; 881 ; X64-LABEL: test_mm_com_epi8: 882 ; X64: # BB#0: 883 ; X64-NEXT: vpcomltb %xmm1, %xmm0, %xmm0 884 ; X64-NEXT: retq 885 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 886 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 887 %res = call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %arg0, <16 x i8> %arg1, i8 0) 888 %bc = bitcast <16 x i8> %res to <2 x i64> 889 ret <2 x i64> %bc 890 } 891 declare <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8>, <16 x i8>, i8) nounwind readnone 892 893 define <2 x i64> @test_mm_com_epi16(<2 x i64> %a0, <2 x i64> %a1) { 894 ; X32-LABEL: test_mm_com_epi16: 895 ; X32: # BB#0: 896 ; X32-NEXT: vpcomltw %xmm1, %xmm0, %xmm0 897 ; X32-NEXT: retl 898 ; 899 ; X64-LABEL: test_mm_com_epi16: 900 ; X64: # BB#0: 901 ; X64-NEXT: vpcomltw %xmm1, %xmm0, %xmm0 902 ; X64-NEXT: retq 903 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 904 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 905 %res = call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %arg0, <8 x i16> %arg1, i8 0) 906 %bc = bitcast <8 x i16> %res to <2 x i64> 907 ret <2 x i64> %bc 908 } 909 declare <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16>, <8 x i16>, i8) nounwind readnone 910 911 define <2 x i64> @test_mm_com_epi32(<2 x i64> %a0, <2 x i64> %a1) { 912 ; X32-LABEL: test_mm_com_epi32: 913 ; X32: # BB#0: 914 ; X32-NEXT: vpcomltd %xmm1, %xmm0, %xmm0 915 ; X32-NEXT: retl 916 ; 917 ; X64-LABEL: test_mm_com_epi32: 918 ; X64: # BB#0: 919 ; X64-NEXT: vpcomltd %xmm1, %xmm0, %xmm0 920 ; X64-NEXT: retq 921 %arg0 = bitcast <2 x i64> %a0 to <4 x i32> 922 %arg1 = bitcast <2 x i64> %a1 to <4 x i32> 923 %res = call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %arg0, <4 x i32> %arg1, i8 0) 924 %bc = bitcast <4 x i32> %res to <2 x i64> 925 ret <2 x i64> %bc 926 } 927 declare <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32>, <4 x i32>, i8) nounwind readnone 928 929 define <2 x i64> @test_mm_com_epi64(<2 x i64> %a0, <2 x i64> %a1) { 930 ; X32-LABEL: test_mm_com_epi64: 931 ; X32: # BB#0: 932 ; X32-NEXT: vpcomltq %xmm1, %xmm0, %xmm0 933 ; X32-NEXT: retl 934 ; 935 ; X64-LABEL: test_mm_com_epi64: 936 ; X64: # BB#0: 937 ; X64-NEXT: vpcomltq %xmm1, %xmm0, %xmm0 938 ; X64-NEXT: retq 939 %res = call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %a0, <2 x i64> %a1, i8 0) 940 ret <2 x i64> %res 941 } 942 declare <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64>, <2 x i64>, i8) nounwind readnone 943 944 define <2 x double> @test_mm_permute2_pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2) { 945 ; X32-LABEL: test_mm_permute2_pd: 946 ; X32: # BB#0: 947 ; X32-NEXT: vpermil2pd $0, %xmm2, %xmm1, %xmm0, %xmm0 948 ; X32-NEXT: retl 949 ; 950 ; X64-LABEL: test_mm_permute2_pd: 951 ; X64: # BB#0: 952 ; X64-NEXT: vpermil2pd $0, %xmm2, %xmm1, %xmm0, %xmm0 953 ; X64-NEXT: retq 954 %res = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> %a2, i8 0) 955 ret <2 x double> %res 956 } 957 declare <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double>, <2 x double>, <2 x i64>, i8) nounwind readnone 958 959 define <4 x double> @test_mm256_permute2_pd(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2) { 960 ; X32-LABEL: test_mm256_permute2_pd: 961 ; X32: # BB#0: 962 ; X32-NEXT: vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0 963 ; X32-NEXT: retl 964 ; 965 ; X64-LABEL: test_mm256_permute2_pd: 966 ; X64: # BB#0: 967 ; X64-NEXT: vpermil2pd $0, %ymm2, %ymm1, %ymm0, %ymm0 968 ; X64-NEXT: retq 969 %res = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> %a2, i8 0) 970 ret <4 x double> %res 971 } 972 declare <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double>, <4 x double>, <4 x i64>, i8) nounwind readnone 973 974 define <4 x float> @test_mm_permute2_ps(<4 x float> %a0, <4 x float> %a1, <2 x i64> %a2) { 975 ; X32-LABEL: test_mm_permute2_ps: 976 ; X32: # BB#0: 977 ; X32-NEXT: vpermil2ps $0, %xmm2, %xmm1, %xmm0, %xmm0 978 ; X32-NEXT: retl 979 ; 980 ; X64-LABEL: test_mm_permute2_ps: 981 ; X64: # BB#0: 982 ; X64-NEXT: vpermil2ps $0, %xmm2, %xmm1, %xmm0, %xmm0 983 ; X64-NEXT: retq 984 %arg2 = bitcast <2 x i64> %a2 to <4 x i32> 985 %res = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> %arg2, i8 0) 986 ret <4 x float> %res 987 } 988 declare <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float>, <4 x float>, <4 x i32>, i8) nounwind readnone 989 990 define <8 x float> @test_mm256_permute2_ps(<8 x float> %a0, <8 x float> %a1, <4 x i64> %a2) { 991 ; X32-LABEL: test_mm256_permute2_ps: 992 ; X32: # BB#0: 993 ; X32-NEXT: vpermil2ps $0, %ymm2, %ymm1, %ymm0, %ymm0 994 ; X32-NEXT: retl 995 ; 996 ; X64-LABEL: test_mm256_permute2_ps: 997 ; X64: # BB#0: 998 ; X64-NEXT: vpermil2ps $0, %ymm2, %ymm1, %ymm0, %ymm0 999 ; X64-NEXT: retq 1000 %arg2 = bitcast <4 x i64> %a2 to <8 x i32> 1001 %res = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a0, <8 x float> %a1, <8 x i32> %arg2, i8 0) 1002 ret <8 x float> %res 1003 } 1004 declare <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float>, <8 x float>, <8 x i32>, i8) nounwind readnone 1005 1006 define <4 x float> @test_mm_frcz_ss(<4 x float> %a0) { 1007 ; X32-LABEL: test_mm_frcz_ss: 1008 ; X32: # BB#0: 1009 ; X32-NEXT: vfrczss %xmm0, %xmm0 1010 ; X32-NEXT: retl 1011 ; 1012 ; X64-LABEL: test_mm_frcz_ss: 1013 ; X64: # BB#0: 1014 ; X64-NEXT: vfrczss %xmm0, %xmm0 1015 ; X64-NEXT: retq 1016 %res = call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a0) 1017 ret <4 x float> %res 1018 } 1019 declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone 1020 1021 define <2 x double> @test_mm_frcz_sd(<2 x double> %a0) { 1022 ; X32-LABEL: test_mm_frcz_sd: 1023 ; X32: # BB#0: 1024 ; X32-NEXT: vfrczsd %xmm0, %xmm0 1025 ; X32-NEXT: retl 1026 ; 1027 ; X64-LABEL: test_mm_frcz_sd: 1028 ; X64: # BB#0: 1029 ; X64-NEXT: vfrczsd %xmm0, %xmm0 1030 ; X64-NEXT: retq 1031 %res = call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a0) 1032 ret <2 x double> %res 1033 } 1034 declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone 1035 1036 define <4 x float> @test_mm_frcz_ps(<4 x float> %a0) { 1037 ; X32-LABEL: test_mm_frcz_ps: 1038 ; X32: # BB#0: 1039 ; X32-NEXT: vfrczps %xmm0, %xmm0 1040 ; X32-NEXT: retl 1041 ; 1042 ; X64-LABEL: test_mm_frcz_ps: 1043 ; X64: # BB#0: 1044 ; X64-NEXT: vfrczps %xmm0, %xmm0 1045 ; X64-NEXT: retq 1046 %res = call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %a0) 1047 ret <4 x float> %res 1048 } 1049 declare <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float>) nounwind readnone 1050 1051 define <2 x double> @test_mm_frcz_pd(<2 x double> %a0) { 1052 ; X32-LABEL: test_mm_frcz_pd: 1053 ; X32: # BB#0: 1054 ; X32-NEXT: vfrczpd %xmm0, %xmm0 1055 ; X32-NEXT: retl 1056 ; 1057 ; X64-LABEL: test_mm_frcz_pd: 1058 ; X64: # BB#0: 1059 ; X64-NEXT: vfrczpd %xmm0, %xmm0 1060 ; X64-NEXT: retq 1061 %res = call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %a0) 1062 ret <2 x double> %res 1063 } 1064 declare <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double>) nounwind readnone 1065 1066 define <8 x float> @test_mm256_frcz_ps(<8 x float> %a0) { 1067 ; X32-LABEL: test_mm256_frcz_ps: 1068 ; X32: # BB#0: 1069 ; X32-NEXT: vfrczps %ymm0, %ymm0 1070 ; X32-NEXT: retl 1071 ; 1072 ; X64-LABEL: test_mm256_frcz_ps: 1073 ; X64: # BB#0: 1074 ; X64-NEXT: vfrczps %ymm0, %ymm0 1075 ; X64-NEXT: retq 1076 %res = call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %a0) 1077 ret <8 x float> %res 1078 } 1079 declare <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float>) nounwind readnone 1080 1081 define <4 x double> @test_mm256_frcz_pd(<4 x double> %a0) { 1082 ; X32-LABEL: test_mm256_frcz_pd: 1083 ; X32: # BB#0: 1084 ; X32-NEXT: vfrczpd %ymm0, %ymm0 1085 ; X32-NEXT: retl 1086 ; 1087 ; X64-LABEL: test_mm256_frcz_pd: 1088 ; X64: # BB#0: 1089 ; X64-NEXT: vfrczpd %ymm0, %ymm0 1090 ; X64-NEXT: retq 1091 %res = call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %a0) 1092 ret <4 x double> %res 1093 } 1094 declare <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double>) nounwind readnone 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112