1 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Werror | FileCheck %s 2 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - -Werror | FileCheck %s 3 4 // Don't include mm_malloc.h, it's system specific. 5 #define __MM_MALLOC_H 6 7 #include <x86intrin.h> 8 9 // NOTE: This should match the tests in llvm/test/CodeGen/X86/xop-intrinsics-fast-isel.ll 10 11 __m128i test_mm_maccs_epi16(__m128i a, __m128i b, __m128i c) { 12 // CHECK-LABEL: test_mm_maccs_epi16 13 // CHECK: call <8 x i16> @llvm.x86.xop.vpmacssww(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 14 return _mm_maccs_epi16(a, b, c); 15 } 16 17 __m128i test_mm_macc_epi16(__m128i a, __m128i b, __m128i c) { 18 // CHECK-LABEL: test_mm_macc_epi16 19 // CHECK: call <8 x i16> @llvm.x86.xop.vpmacsww(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 20 return _mm_macc_epi16(a, b, c); 21 } 22 23 __m128i test_mm_maccsd_epi16(__m128i a, __m128i b, __m128i c) { 24 // CHECK-LABEL: test_mm_maccsd_epi16 25 // CHECK: call <4 x i32> @llvm.x86.xop.vpmacsswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}}) 26 return _mm_maccsd_epi16(a, b, c); 27 } 28 29 __m128i test_mm_maccd_epi16(__m128i a, __m128i b, __m128i c) { 30 // CHECK-LABEL: test_mm_maccd_epi16 31 // CHECK: call <4 x i32> @llvm.x86.xop.vpmacswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}}) 32 return _mm_maccd_epi16(a, b, c); 33 } 34 35 __m128i test_mm_maccs_epi32(__m128i a, __m128i b, __m128i c) { 36 // CHECK-LABEL: test_mm_maccs_epi32 37 // CHECK: call <4 x i32> @llvm.x86.xop.vpmacssdd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) 38 return _mm_maccs_epi32(a, b, c); 39 } 40 41 __m128i test_mm_macc_epi32(__m128i a, __m128i b, __m128i c) { 42 // CHECK-LABEL: test_mm_macc_epi32 43 // CHECK: call <4 x i32> @llvm.x86.xop.vpmacsdd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}}) 44 return _mm_macc_epi32(a, b, c); 45 } 46 47 __m128i test_mm_maccslo_epi32(__m128i a, __m128i b, __m128i c) { 48 // CHECK-LABEL: test_mm_maccslo_epi32 49 // CHECK: call <2 x i64> @llvm.x86.xop.vpmacssdql(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}) 50 return _mm_maccslo_epi32(a, b, c); 51 } 52 53 __m128i test_mm_macclo_epi32(__m128i a, __m128i b, __m128i c) { 54 // CHECK-LABEL: test_mm_macclo_epi32 55 // CHECK: call <2 x i64> @llvm.x86.xop.vpmacsdql(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}) 56 return _mm_macclo_epi32(a, b, c); 57 } 58 59 __m128i test_mm_maccshi_epi32(__m128i a, __m128i b, __m128i c) { 60 // CHECK-LABEL: test_mm_maccshi_epi32 61 // CHECK: call <2 x i64> @llvm.x86.xop.vpmacssdqh(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}) 62 return _mm_maccshi_epi32(a, b, c); 63 } 64 65 __m128i test_mm_macchi_epi32(__m128i a, __m128i b, __m128i c) { 66 // CHECK-LABEL: test_mm_macchi_epi32 67 // CHECK: call <2 x i64> @llvm.x86.xop.vpmacsdqh(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <2 x i64> %{{.*}}) 68 return _mm_macchi_epi32(a, b, c); 69 } 70 71 __m128i test_mm_maddsd_epi16(__m128i a, __m128i b, __m128i c) { 72 // CHECK-LABEL: test_mm_maddsd_epi16 73 // CHECK: call <4 x i32> @llvm.x86.xop.vpmadcsswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}}) 74 return _mm_maddsd_epi16(a, b, c); 75 } 76 77 __m128i test_mm_maddd_epi16(__m128i a, __m128i b, __m128i c) { 78 // CHECK-LABEL: test_mm_maddd_epi16 79 // CHECK: call <4 x i32> @llvm.x86.xop.vpmadcswd(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <4 x i32> %{{.*}}) 80 return _mm_maddd_epi16(a, b, c); 81 } 82 83 __m128i test_mm_haddw_epi8(__m128i a) { 84 // CHECK-LABEL: test_mm_haddw_epi8 85 // CHECK: call <8 x i16> @llvm.x86.xop.vphaddbw(<16 x i8> %{{.*}}) 86 return _mm_haddw_epi8(a); 87 } 88 89 __m128i test_mm_haddd_epi8(__m128i a) { 90 // CHECK-LABEL: test_mm_haddd_epi8 91 // CHECK: call <4 x i32> @llvm.x86.xop.vphaddbd(<16 x i8> %{{.*}}) 92 return _mm_haddd_epi8(a); 93 } 94 95 __m128i test_mm_haddq_epi8(__m128i a) { 96 // CHECK-LABEL: test_mm_haddq_epi8 97 // CHECK: call <2 x i64> @llvm.x86.xop.vphaddbq(<16 x i8> %{{.*}}) 98 return _mm_haddq_epi8(a); 99 } 100 101 __m128i test_mm_haddd_epi16(__m128i a) { 102 // CHECK-LABEL: test_mm_haddd_epi16 103 // CHECK: call <4 x i32> @llvm.x86.xop.vphaddwd(<8 x i16> %{{.*}}) 104 return _mm_haddd_epi16(a); 105 } 106 107 __m128i test_mm_haddq_epi16(__m128i a) { 108 // CHECK-LABEL: test_mm_haddq_epi16 109 // CHECK: call <2 x i64> @llvm.x86.xop.vphaddwq(<8 x i16> %{{.*}}) 110 return _mm_haddq_epi16(a); 111 } 112 113 __m128i test_mm_haddq_epi32(__m128i a) { 114 // CHECK-LABEL: test_mm_haddq_epi32 115 // CHECK: call <2 x i64> @llvm.x86.xop.vphadddq(<4 x i32> %{{.*}}) 116 return _mm_haddq_epi32(a); 117 } 118 119 __m128i test_mm_haddw_epu8(__m128i a) { 120 // CHECK-LABEL: test_mm_haddw_epu8 121 // CHECK: call <8 x i16> @llvm.x86.xop.vphaddubw(<16 x i8> %{{.*}}) 122 return _mm_haddw_epu8(a); 123 } 124 125 __m128i test_mm_haddd_epu8(__m128i a) { 126 // CHECK-LABEL: test_mm_haddd_epu8 127 // CHECK: call <4 x i32> @llvm.x86.xop.vphaddubd(<16 x i8> %{{.*}}) 128 return _mm_haddd_epu8(a); 129 } 130 131 __m128i test_mm_haddq_epu8(__m128i a) { 132 // CHECK-LABEL: test_mm_haddq_epu8 133 // CHECK: call <2 x i64> @llvm.x86.xop.vphaddubq(<16 x i8> %{{.*}}) 134 return _mm_haddq_epu8(a); 135 } 136 137 __m128i test_mm_haddd_epu16(__m128i a) { 138 // CHECK-LABEL: test_mm_haddd_epu16 139 // CHECK: call <4 x i32> @llvm.x86.xop.vphadduwd(<8 x i16> %{{.*}}) 140 return _mm_haddd_epu16(a); 141 } 142 143 __m128i test_mm_haddq_epu16(__m128i a) { 144 // CHECK-LABEL: test_mm_haddq_epu16 145 // CHECK: call <2 x i64> @llvm.x86.xop.vphadduwq(<8 x i16> %{{.*}}) 146 return _mm_haddq_epu16(a); 147 } 148 149 __m128i test_mm_haddq_epu32(__m128i a) { 150 // CHECK-LABEL: test_mm_haddq_epu32 151 // CHECK: call <2 x i64> @llvm.x86.xop.vphaddudq(<4 x i32> %{{.*}}) 152 return _mm_haddq_epu32(a); 153 } 154 155 __m128i test_mm_hsubw_epi8(__m128i a) { 156 // CHECK-LABEL: test_mm_hsubw_epi8 157 // CHECK: call <8 x i16> @llvm.x86.xop.vphsubbw(<16 x i8> %{{.*}}) 158 return _mm_hsubw_epi8(a); 159 } 160 161 __m128i test_mm_hsubd_epi16(__m128i a) { 162 // CHECK-LABEL: test_mm_hsubd_epi16 163 // CHECK: call <4 x i32> @llvm.x86.xop.vphsubwd(<8 x i16> %{{.*}}) 164 return _mm_hsubd_epi16(a); 165 } 166 167 __m128i test_mm_hsubq_epi32(__m128i a) { 168 // CHECK-LABEL: test_mm_hsubq_epi32 169 // CHECK: call <2 x i64> @llvm.x86.xop.vphsubdq(<4 x i32> %{{.*}}) 170 return _mm_hsubq_epi32(a); 171 } 172 173 __m128i test_mm_cmov_si128(__m128i a, __m128i b, __m128i c) { 174 // CHECK-LABEL: test_mm_cmov_si128 175 // CHECK: call <2 x i64> @llvm.x86.xop.vpcmov(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}}) 176 return _mm_cmov_si128(a, b, c); 177 } 178 179 __m256i test_mm256_cmov_si256(__m256i a, __m256i b, __m256i c) { 180 // CHECK-LABEL: test_mm256_cmov_si256 181 // CHECK: call <4 x i64> @llvm.x86.xop.vpcmov.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}}) 182 return _mm256_cmov_si256(a, b, c); 183 } 184 185 __m128i test_mm_perm_epi8(__m128i a, __m128i b, __m128i c) { 186 // CHECK-LABEL: test_mm_perm_epi8 187 // CHECK: call <16 x i8> @llvm.x86.xop.vpperm(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 188 return _mm_perm_epi8(a, b, c); 189 } 190 191 __m128i test_mm_rot_epi8(__m128i a, __m128i b) { 192 // CHECK-LABEL: test_mm_rot_epi8 193 // CHECK: call <16 x i8> @llvm.x86.xop.vprotb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 194 return _mm_rot_epi8(a, b); 195 } 196 197 __m128i test_mm_rot_epi16(__m128i a, __m128i b) { 198 // CHECK-LABEL: test_mm_rot_epi16 199 // CHECK: call <8 x i16> @llvm.x86.xop.vprotw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 200 return _mm_rot_epi16(a, b); 201 } 202 203 __m128i test_mm_rot_epi32(__m128i a, __m128i b) { 204 // CHECK-LABEL: test_mm_rot_epi32 205 // CHECK: call <4 x i32> @llvm.x86.xop.vprotd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) 206 return _mm_rot_epi32(a, b); 207 } 208 209 __m128i test_mm_rot_epi64(__m128i a, __m128i b) { 210 // CHECK-LABEL: test_mm_rot_epi64 211 // CHECK: call <2 x i64> @llvm.x86.xop.vprotq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) 212 return _mm_rot_epi64(a, b); 213 } 214 215 __m128i test_mm_roti_epi8(__m128i a) { 216 // CHECK-LABEL: test_mm_roti_epi8 217 // CHECK: call <16 x i8> @llvm.x86.xop.vprotbi(<16 x i8> %{{.*}}, i8 1) 218 return _mm_roti_epi8(a, 1); 219 } 220 221 __m128i test_mm_roti_epi16(__m128i a) { 222 // CHECK-LABEL: test_mm_roti_epi16 223 // CHECK: call <8 x i16> @llvm.x86.xop.vprotwi(<8 x i16> %{{.*}}, i8 50) 224 return _mm_roti_epi16(a, 50); 225 } 226 227 __m128i test_mm_roti_epi32(__m128i a) { 228 // CHECK-LABEL: test_mm_roti_epi32 229 // CHECK: call <4 x i32> @llvm.x86.xop.vprotdi(<4 x i32> %{{.*}}, i8 -30) 230 return _mm_roti_epi32(a, -30); 231 } 232 233 __m128i test_mm_roti_epi64(__m128i a) { 234 // CHECK-LABEL: test_mm_roti_epi64 235 // CHECK: call <2 x i64> @llvm.x86.xop.vprotqi(<2 x i64> %{{.*}}, i8 100) 236 return _mm_roti_epi64(a, 100); 237 } 238 239 __m128i test_mm_shl_epi8(__m128i a, __m128i b) { 240 // CHECK-LABEL: test_mm_shl_epi8 241 // CHECK: call <16 x i8> @llvm.x86.xop.vpshlb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 242 return _mm_shl_epi8(a, b); 243 } 244 245 __m128i test_mm_shl_epi16(__m128i a, __m128i b) { 246 // CHECK-LABEL: test_mm_shl_epi16 247 // CHECK: call <8 x i16> @llvm.x86.xop.vpshlw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 248 return _mm_shl_epi16(a, b); 249 } 250 251 __m128i test_mm_shl_epi32(__m128i a, __m128i b) { 252 // CHECK-LABEL: test_mm_shl_epi32 253 // CHECK: call <4 x i32> @llvm.x86.xop.vpshld(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) 254 return _mm_shl_epi32(a, b); 255 } 256 257 __m128i test_mm_shl_epi64(__m128i a, __m128i b) { 258 // CHECK-LABEL: test_mm_shl_epi64 259 // CHECK: call <2 x i64> @llvm.x86.xop.vpshlq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) 260 return _mm_shl_epi64(a, b); 261 } 262 263 __m128i test_mm_sha_epi8(__m128i a, __m128i b) { 264 // CHECK-LABEL: test_mm_sha_epi8 265 // CHECK: call <16 x i8> @llvm.x86.xop.vpshab(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) 266 return _mm_sha_epi8(a, b); 267 } 268 269 __m128i test_mm_sha_epi16(__m128i a, __m128i b) { 270 // CHECK-LABEL: test_mm_sha_epi16 271 // CHECK: call <8 x i16> @llvm.x86.xop.vpshaw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) 272 return _mm_sha_epi16(a, b); 273 } 274 275 __m128i test_mm_sha_epi32(__m128i a, __m128i b) { 276 // CHECK-LABEL: test_mm_sha_epi32 277 // CHECK: call <4 x i32> @llvm.x86.xop.vpshad(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) 278 return _mm_sha_epi32(a, b); 279 } 280 281 __m128i test_mm_sha_epi64(__m128i a, __m128i b) { 282 // CHECK-LABEL: test_mm_sha_epi64 283 // CHECK: call <2 x i64> @llvm.x86.xop.vpshaq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) 284 return _mm_sha_epi64(a, b); 285 } 286 287 __m128i test_mm_com_epu8(__m128i a, __m128i b) { 288 // CHECK-LABEL: test_mm_com_epu8 289 // CHECK: call <16 x i8> @llvm.x86.xop.vpcomub(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 0) 290 return _mm_com_epu8(a, b, 0); 291 } 292 293 __m128i test_mm_com_epu16(__m128i a, __m128i b) { 294 // CHECK-LABEL: test_mm_com_epu16 295 // CHECK: call <8 x i16> @llvm.x86.xop.vpcomuw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 0) 296 return _mm_com_epu16(a, b, 0); 297 } 298 299 __m128i test_mm_com_epu32(__m128i a, __m128i b) { 300 // CHECK-LABEL: test_mm_com_epu32 301 // CHECK: call <4 x i32> @llvm.x86.xop.vpcomud(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 0) 302 return _mm_com_epu32(a, b, 0); 303 } 304 305 __m128i test_mm_com_epu64(__m128i a, __m128i b) { 306 // CHECK-LABEL: test_mm_com_epu64 307 // CHECK: call <2 x i64> @llvm.x86.xop.vpcomuq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 0) 308 return _mm_com_epu64(a, b, 0); 309 } 310 311 __m128i test_mm_com_epi8(__m128i a, __m128i b) { 312 // CHECK-LABEL: test_mm_com_epi8 313 // CHECK: call <16 x i8> @llvm.x86.xop.vpcomb(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}, i8 0) 314 return _mm_com_epi8(a, b, 0); 315 } 316 317 __m128i test_mm_com_epi16(__m128i a, __m128i b) { 318 // CHECK-LABEL: test_mm_com_epi16 319 // CHECK: call <8 x i16> @llvm.x86.xop.vpcomw(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, i8 0) 320 return _mm_com_epi16(a, b, 0); 321 } 322 323 __m128i test_mm_com_epi32(__m128i a, __m128i b) { 324 // CHECK-LABEL: test_mm_com_epi32 325 // CHECK: call <4 x i32> @llvm.x86.xop.vpcomd(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, i8 0) 326 return _mm_com_epi32(a, b, 0); 327 } 328 329 __m128i test_mm_com_epi64(__m128i a, __m128i b) { 330 // CHECK-LABEL: test_mm_com_epi64 331 // CHECK: call <2 x i64> @llvm.x86.xop.vpcomq(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, i8 0) 332 return _mm_com_epi64(a, b, 0); 333 } 334 335 __m128d test_mm_permute2_pd(__m128d a, __m128d b, __m128i c) { 336 // CHECK-LABEL: test_mm_permute2_pd 337 // CHECK: call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i64> %{{.*}}, i8 0) 338 return _mm_permute2_pd(a, b, c, 0); 339 } 340 341 __m256d test_mm256_permute2_pd(__m256d a, __m256d b, __m256i c) { 342 // CHECK-LABEL: test_mm256_permute2_pd 343 // CHECK: call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i64> %{{.*}}, i8 0) 344 return _mm256_permute2_pd(a, b, c, 0); 345 } 346 347 __m128 test_mm_permute2_ps(__m128 a, __m128 b, __m128i c) { 348 // CHECK-LABEL: test_mm_permute2_ps 349 // CHECK: call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> %{{.*}}, i8 0) 350 return _mm_permute2_ps(a, b, c, 0); 351 } 352 353 __m256 test_mm256_permute2_ps(__m256 a, __m256 b, __m256i c) { 354 // CHECK-LABEL: test_mm256_permute2_ps 355 // CHECK: call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> %{{.*}}, i8 0) 356 return _mm256_permute2_ps(a, b, c, 0); 357 } 358 359 __m128 test_mm_frcz_ss(__m128 a) { 360 // CHECK-LABEL: test_mm_frcz_ss 361 // CHECK: call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %{{.*}}) 362 return _mm_frcz_ss(a); 363 } 364 365 __m128d test_mm_frcz_sd(__m128d a) { 366 // CHECK-LABEL: test_mm_frcz_sd 367 // CHECK: call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %{{.*}}) 368 return _mm_frcz_sd(a); 369 } 370 371 __m128 test_mm_frcz_ps(__m128 a) { 372 // CHECK-LABEL: test_mm_frcz_ps 373 // CHECK: call <4 x float> @llvm.x86.xop.vfrcz.ps(<4 x float> %{{.*}}) 374 return _mm_frcz_ps(a); 375 } 376 377 __m128d test_mm_frcz_pd(__m128d a) { 378 // CHECK-LABEL: test_mm_frcz_pd 379 // CHECK: call <2 x double> @llvm.x86.xop.vfrcz.pd(<2 x double> %{{.*}}) 380 return _mm_frcz_pd(a); 381 } 382 383 __m256 test_mm256_frcz_ps(__m256 a) { 384 // CHECK-LABEL: test_mm256_frcz_ps 385 // CHECK: call <8 x float> @llvm.x86.xop.vfrcz.ps.256(<8 x float> %{{.*}}) 386 return _mm256_frcz_ps(a); 387 } 388 389 __m256d test_mm256_frcz_pd(__m256d a) { 390 // CHECK-LABEL: test_mm256_frcz_pd 391 // CHECK: call <4 x double> @llvm.x86.xop.vfrcz.pd.256(<4 x double> %{{.*}}) 392 return _mm256_frcz_pd(a); 393 } 394