1 // RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +sse -emit-llvm -o - -Werror | FileCheck %s 2 3 // Don't include mm_malloc.h, it's system specific. 4 #define __MM_MALLOC_H 5 6 #include <x86intrin.h> 7 8 // NOTE: This should match the tests in llvm/test/CodeGen/X86/sse-intrinsics-fast-isel.ll 9 10 __m128 test_mm_add_ps(__m128 A, __m128 B) { 11 // CHECK-LABEL: test_mm_add_ps 12 // CHECK: fadd <4 x float> 13 return _mm_add_ps(A, B); 14 } 15 16 __m128 test_mm_add_ss(__m128 A, __m128 B) { 17 // CHECK-LABEL: test_mm_add_ss 18 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 19 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 20 // CHECK: fadd float 21 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 22 return _mm_add_ss(A, B); 23 } 24 25 __m128 test_mm_and_ps(__m128 A, __m128 B) { 26 // CHECK-LABEL: test_mm_and_ps 27 // CHECK: and <4 x i32> 28 return _mm_and_ps(A, B); 29 } 30 31 __m128 test_mm_andnot_ps(__m128 A, __m128 B) { 32 // CHECK-LABEL: test_mm_andnot_ps 33 // CHECK: xor <4 x i32> %{{.*}}, <i32 -1, i32 -1, i32 -1, i32 -1> 34 // CHECK: and <4 x i32> 35 return _mm_andnot_ps(A, B); 36 } 37 38 __m128 test_mm_cmpeq_ps(__m128 __a, __m128 __b) { 39 // CHECK-LABEL: @test_mm_cmpeq_ps 40 // CHECK: [[CMP:%.*]] = fcmp oeq <4 x float> 41 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 42 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 43 // CHECK-NEXT: ret <4 x float> [[BC]] 44 return _mm_cmpeq_ps(__a, __b); 45 } 46 47 __m128 test_mm_cmpeq_ss(__m128 __a, __m128 __b) { 48 // CHECK-LABEL: @test_mm_cmpeq_ss 49 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 0) 50 return _mm_cmpeq_ss(__a, __b); 51 } 52 53 __m128 test_mm_cmpge_ps(__m128 __a, __m128 __b) { 54 // CHECK-LABEL: @test_mm_cmpge_ps 55 // CHECK: [[CMP:%.*]] = fcmp ole <4 x float> 56 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 57 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 58 // CHECK-NEXT: ret <4 x float> [[BC]] 59 return _mm_cmpge_ps(__a, __b); 60 } 61 62 __m128 test_mm_cmpge_ss(__m128 __a, __m128 __b) { 63 // CHECK-LABEL: @test_mm_cmpge_ss 64 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2) 65 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 66 return _mm_cmpge_ss(__a, __b); 67 } 68 69 __m128 test_mm_cmpgt_ps(__m128 __a, __m128 __b) { 70 // CHECK-LABEL: @test_mm_cmpgt_ps 71 // CHECK: [[CMP:%.*]] = fcmp olt <4 x float> 72 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 73 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 74 // CHECK-NEXT: ret <4 x float> [[BC]] 75 return _mm_cmpgt_ps(__a, __b); 76 } 77 78 __m128 test_mm_cmpgt_ss(__m128 __a, __m128 __b) { 79 // CHECK-LABEL: @test_mm_cmpgt_ss 80 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1) 81 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 82 return _mm_cmpgt_ss(__a, __b); 83 } 84 85 __m128 test_mm_cmple_ps(__m128 __a, __m128 __b) { 86 // CHECK-LABEL: @test_mm_cmple_ps 87 // CHECK: [[CMP:%.*]] = fcmp ole <4 x float> 88 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 89 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 90 // CHECK-NEXT: ret <4 x float> [[BC]] 91 return _mm_cmple_ps(__a, __b); 92 } 93 94 __m128 test_mm_cmple_ss(__m128 __a, __m128 __b) { 95 // CHECK-LABEL: @test_mm_cmple_ss 96 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 2) 97 return _mm_cmple_ss(__a, __b); 98 } 99 100 __m128 test_mm_cmplt_ps(__m128 __a, __m128 __b) { 101 // CHECK-LABEL: @test_mm_cmplt_ps 102 // CHECK: [[CMP:%.*]] = fcmp olt <4 x float> 103 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 104 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 105 // CHECK-NEXT: ret <4 x float> [[BC]] 106 return _mm_cmplt_ps(__a, __b); 107 } 108 109 __m128 test_mm_cmplt_ss(__m128 __a, __m128 __b) { 110 // CHECK-LABEL: @test_mm_cmplt_ss 111 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 1) 112 return _mm_cmplt_ss(__a, __b); 113 } 114 115 __m128 test_mm_cmpneq_ps(__m128 __a, __m128 __b) { 116 // CHECK-LABEL: @test_mm_cmpneq_ps 117 // CHECK: [[CMP:%.*]] = fcmp une <4 x float> 118 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 119 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 120 // CHECK-NEXT: ret <4 x float> [[BC]] 121 return _mm_cmpneq_ps(__a, __b); 122 } 123 124 __m128 test_mm_cmpneq_ss(__m128 __a, __m128 __b) { 125 // CHECK-LABEL: @test_mm_cmpneq_ss 126 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 4) 127 return _mm_cmpneq_ss(__a, __b); 128 } 129 130 __m128 test_mm_cmpnge_ps(__m128 __a, __m128 __b) { 131 // CHECK-LABEL: @test_mm_cmpnge_ps 132 // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float> 133 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 134 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 135 // CHECK-NEXT: ret <4 x float> [[BC]] 136 return _mm_cmpnge_ps(__a, __b); 137 } 138 139 __m128 test_mm_cmpnge_ss(__m128 __a, __m128 __b) { 140 // CHECK-LABEL: @test_mm_cmpnge_ss 141 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6) 142 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 143 return _mm_cmpnge_ss(__a, __b); 144 } 145 146 __m128 test_mm_cmpngt_ps(__m128 __a, __m128 __b) { 147 // CHECK-LABEL: @test_mm_cmpngt_ps 148 // CHECK: [[CMP:%.*]] = fcmp uge <4 x float> 149 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 150 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 151 // CHECK-NEXT: ret <4 x float> [[BC]] 152 return _mm_cmpngt_ps(__a, __b); 153 } 154 155 __m128 test_mm_cmpngt_ss(__m128 __a, __m128 __b) { 156 // CHECK-LABEL: @test_mm_cmpngt_ss 157 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5) 158 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 159 return _mm_cmpngt_ss(__a, __b); 160 } 161 162 __m128 test_mm_cmpnle_ps(__m128 __a, __m128 __b) { 163 // CHECK-LABEL: @test_mm_cmpnle_ps 164 // CHECK: [[CMP:%.*]] = fcmp ugt <4 x float> 165 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 166 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 167 // CHECK-NEXT: ret <4 x float> [[BC]] 168 return _mm_cmpnle_ps(__a, __b); 169 } 170 171 __m128 test_mm_cmpnle_ss(__m128 __a, __m128 __b) { 172 // CHECK-LABEL: @test_mm_cmpnle_ss 173 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 6) 174 return _mm_cmpnle_ss(__a, __b); 175 } 176 177 __m128 test_mm_cmpnlt_ps(__m128 __a, __m128 __b) { 178 // CHECK-LABEL: @test_mm_cmpnlt_ps 179 // CHECK: [[CMP:%.*]] = fcmp uge <4 x float> 180 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 181 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 182 // CHECK-NEXT: ret <4 x float> [[BC]] 183 return _mm_cmpnlt_ps(__a, __b); 184 } 185 186 __m128 test_mm_cmpnlt_ss(__m128 __a, __m128 __b) { 187 // CHECK-LABEL: @test_mm_cmpnlt_ss 188 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 5) 189 return _mm_cmpnlt_ss(__a, __b); 190 } 191 192 __m128 test_mm_cmpord_ps(__m128 __a, __m128 __b) { 193 // CHECK-LABEL: @test_mm_cmpord_ps 194 // CHECK: [[CMP:%.*]] = fcmp ord <4 x float> 195 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 196 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 197 // CHECK-NEXT: ret <4 x float> [[BC]] 198 return _mm_cmpord_ps(__a, __b); 199 } 200 201 __m128 test_mm_cmpord_ss(__m128 __a, __m128 __b) { 202 // CHECK-LABEL: @test_mm_cmpord_ss 203 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 7) 204 return _mm_cmpord_ss(__a, __b); 205 } 206 207 __m128 test_mm_cmpunord_ps(__m128 __a, __m128 __b) { 208 // CHECK-LABEL: @test_mm_cmpunord_ps 209 // CHECK: [[CMP:%.*]] = fcmp uno <4 x float> 210 // CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32> 211 // CHECK-NEXT: [[BC:%.*]] = bitcast <4 x i32> [[SEXT]] to <4 x float> 212 // CHECK-NEXT: ret <4 x float> [[BC]] 213 return _mm_cmpunord_ps(__a, __b); 214 } 215 216 __m128 test_mm_cmpunord_ss(__m128 __a, __m128 __b) { 217 // CHECK-LABEL: @test_mm_cmpunord_ss 218 // CHECK: @llvm.x86.sse.cmp.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}, i8 3) 219 return _mm_cmpunord_ss(__a, __b); 220 } 221 222 int test_mm_comieq_ss(__m128 A, __m128 B) { 223 // CHECK-LABEL: test_mm_comieq_ss 224 // CHECK: call i32 @llvm.x86.sse.comieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 225 return _mm_comieq_ss(A, B); 226 } 227 228 int test_mm_comige_ss(__m128 A, __m128 B) { 229 // CHECK-LABEL: test_mm_comige_ss 230 // CHECK: call i32 @llvm.x86.sse.comige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 231 return _mm_comige_ss(A, B); 232 } 233 234 int test_mm_comigt_ss(__m128 A, __m128 B) { 235 // CHECK-LABEL: test_mm_comigt_ss 236 // CHECK: call i32 @llvm.x86.sse.comigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 237 return _mm_comigt_ss(A, B); 238 } 239 240 int test_mm_comile_ss(__m128 A, __m128 B) { 241 // CHECK-LABEL: test_mm_comile_ss 242 // CHECK: call i32 @llvm.x86.sse.comile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 243 return _mm_comile_ss(A, B); 244 } 245 246 int test_mm_comilt_ss(__m128 A, __m128 B) { 247 // CHECK-LABEL: test_mm_comilt_ss 248 // CHECK: call i32 @llvm.x86.sse.comilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 249 return _mm_comilt_ss(A, B); 250 } 251 252 int test_mm_comineq_ss(__m128 A, __m128 B) { 253 // CHECK-LABEL: test_mm_comineq_ss 254 // CHECK: call i32 @llvm.x86.sse.comineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 255 return _mm_comineq_ss(A, B); 256 } 257 258 int test_mm_cvt_ss2si(__m128 A) { 259 // CHECK-LABEL: test_mm_cvt_ss2si 260 // CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}}) 261 return _mm_cvt_ss2si(A); 262 } 263 264 __m128 test_mm_cvtsi32_ss(__m128 A, int B) { 265 // CHECK-LABEL: test_mm_cvtsi32_ss 266 // CHECK: sitofp i32 %{{.*}} to float 267 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 268 return _mm_cvtsi32_ss(A, B); 269 } 270 271 __m128 test_mm_cvtsi64_ss(__m128 A, long long B) { 272 // CHECK-LABEL: test_mm_cvtsi64_ss 273 // CHECK: sitofp i64 %{{.*}} to float 274 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 275 return _mm_cvtsi64_ss(A, B); 276 } 277 278 float test_mm_cvtss_f32(__m128 A) { 279 // CHECK-LABEL: test_mm_cvtss_f32 280 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 281 return _mm_cvtss_f32(A); 282 } 283 284 int test_mm_cvtss_si32(__m128 A) { 285 // CHECK-LABEL: test_mm_cvtss_si32 286 // CHECK: call i32 @llvm.x86.sse.cvtss2si(<4 x float> %{{.*}}) 287 return _mm_cvtss_si32(A); 288 } 289 290 long long test_mm_cvtss_si64(__m128 A) { 291 // CHECK-LABEL: test_mm_cvtss_si64 292 // CHECK: call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %{{.*}}) 293 return _mm_cvtss_si64(A); 294 } 295 296 int test_mm_cvtt_ss2si(__m128 A) { 297 // CHECK-LABEL: test_mm_cvtt_ss2si 298 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 299 // CHECK: fptosi float %{{.*}} to i32 300 return _mm_cvtt_ss2si(A); 301 } 302 303 int test_mm_cvttss_si32(__m128 A) { 304 // CHECK-LABEL: test_mm_cvttss_si32 305 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 306 // CHECK: fptosi float %{{.*}} to i32 307 return _mm_cvttss_si32(A); 308 } 309 310 long long test_mm_cvttss_si64(__m128 A) { 311 // CHECK-LABEL: test_mm_cvttss_si64 312 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 313 // CHECK: fptosi float %{{.*}} to i64 314 return _mm_cvttss_si64(A); 315 } 316 317 __m128 test_mm_div_ps(__m128 A, __m128 B) { 318 // CHECK-LABEL: test_mm_div_ps 319 // CHECK: fdiv <4 x float> 320 return _mm_div_ps(A, B); 321 } 322 323 __m128 test_mm_div_ss(__m128 A, __m128 B) { 324 // CHECK-LABEL: test_mm_div_ss 325 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 326 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 327 // CHECK: fdiv float 328 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 329 return _mm_div_ss(A, B); 330 } 331 332 unsigned int test_MM_GET_EXCEPTION_MASK() { 333 // CHECK-LABEL: test_MM_GET_EXCEPTION_MASK 334 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}}) 335 // CHECK: and i32 %{{.*}}, 8064 336 return _MM_GET_EXCEPTION_MASK(); 337 } 338 339 unsigned int test_MM_GET_EXCEPTION_STATE() { 340 // CHECK-LABEL: test_MM_GET_EXCEPTION_STATE 341 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}}) 342 // CHECK: and i32 %{{.*}}, 63 343 return _MM_GET_EXCEPTION_STATE(); 344 } 345 346 unsigned int test_MM_GET_FLUSH_ZERO_MODE() { 347 // CHECK-LABEL: test_MM_GET_FLUSH_ZERO_MODE 348 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}}) 349 // CHECK: and i32 %{{.*}}, 32768 350 return _MM_GET_FLUSH_ZERO_MODE(); 351 } 352 353 unsigned int test_MM_GET_ROUNDING_MODE() { 354 // CHECK-LABEL: test_MM_GET_ROUNDING_MODE 355 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}}) 356 // CHECK: and i32 %{{.*}}, 24576 357 return _MM_GET_ROUNDING_MODE(); 358 } 359 360 unsigned int test_mm_getcsr() { 361 // CHECK-LABEL: test_mm_getcsr 362 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* %{{.*}}) 363 // CHECK: load i32 364 return _mm_getcsr(); 365 } 366 367 __m128 test_mm_load_ps(float* y) { 368 // CHECK-LABEL: test_mm_load_ps 369 // CHECK: load <4 x float>, <4 x float>* {{.*}}, align 16 370 return _mm_load_ps(y); 371 } 372 373 __m128 test_mm_load_ps1(float* y) { 374 // CHECK-LABEL: test_mm_load_ps1 375 // CHECK: load float, float* %{{.*}}, align 4 376 // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0 377 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1 378 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2 379 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3 380 return _mm_load_ps1(y); 381 } 382 383 __m128 test_mm_load_ss(float* y) { 384 // CHECK-LABEL: test_mm_load_ss 385 // CHECK: load float, float* {{.*}}, align 1{{$}} 386 // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0 387 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 1 388 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 2 389 // CHECK: insertelement <4 x float> %{{.*}}, float 0.000000e+00, i32 3 390 return _mm_load_ss(y); 391 } 392 393 __m128 test_mm_load1_ps(float* y) { 394 // CHECK-LABEL: test_mm_load1_ps 395 // CHECK: load float, float* %{{.*}}, align 4 396 // CHECK: insertelement <4 x float> undef, float %{{.*}}, i32 0 397 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 1 398 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 2 399 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 3 400 return _mm_load1_ps(y); 401 } 402 403 __m128 test_mm_loadh_pi(__m128 x, __m64* y) { 404 // CHECK-LABEL: test_mm_loadh_pi 405 // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}} 406 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1 407 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1, i32 4, i32 5> 408 return _mm_loadh_pi(x,y); 409 } 410 411 __m128 test_mm_loadl_pi(__m128 x, __m64* y) { 412 // CHECK-LABEL: test_mm_loadl_pi 413 // CHECK: load <2 x float>, <2 x float>* {{.*}}, align 1{{$}} 414 // CHECK: shufflevector {{.*}} <4 x i32> <i32 0, i32 1 415 // CHECK: shufflevector {{.*}} <4 x i32> <i32 4, i32 5, i32 2, i32 3> 416 return _mm_loadl_pi(x,y); 417 } 418 419 __m128 test_mm_loadr_ps(float* A) { 420 // CHECK-LABEL: test_mm_loadr_ps 421 // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 16 422 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 423 return _mm_loadr_ps(A); 424 } 425 426 __m128 test_mm_loadu_ps(float* A) { 427 // CHECK-LABEL: test_mm_loadu_ps 428 // CHECK: load <4 x float>, <4 x float>* %{{.*}}, align 1{{$}} 429 return _mm_loadu_ps(A); 430 } 431 432 __m128 test_mm_max_ps(__m128 A, __m128 B) { 433 // CHECK-LABEL: test_mm_max_ps 434 // CHECK: @llvm.x86.sse.max.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 435 return _mm_max_ps(A, B); 436 } 437 438 __m128 test_mm_max_ss(__m128 A, __m128 B) { 439 // CHECK-LABEL: test_mm_max_ss 440 // CHECK: @llvm.x86.sse.max.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 441 return _mm_max_ss(A, B); 442 } 443 444 __m128 test_mm_min_ps(__m128 A, __m128 B) { 445 // CHECK-LABEL: test_mm_min_ps 446 // CHECK: @llvm.x86.sse.min.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 447 return _mm_min_ps(A, B); 448 } 449 450 __m128 test_mm_min_ss(__m128 A, __m128 B) { 451 // CHECK-LABEL: test_mm_min_ss 452 // CHECK: @llvm.x86.sse.min.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 453 return _mm_min_ss(A, B); 454 } 455 456 __m128 test_mm_move_ss(__m128 A, __m128 B) { 457 // CHECK-LABEL: test_mm_move_ss 458 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 459 return _mm_move_ss(A, B); 460 } 461 462 __m128 test_mm_movehl_ps(__m128 A, __m128 B) { 463 // CHECK-LABEL: test_mm_movehl_ps 464 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 465 return _mm_movehl_ps(A, B); 466 } 467 468 __m128 test_mm_movelh_ps(__m128 A, __m128 B) { 469 // CHECK-LABEL: test_mm_movelh_ps 470 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 471 return _mm_movelh_ps(A, B); 472 } 473 474 int test_mm_movemask_ps(__m128 A) { 475 // CHECK-LABEL: test_mm_movemask_ps 476 // CHECK: call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %{{.*}}) 477 return _mm_movemask_ps(A); 478 } 479 480 __m128 test_mm_mul_ps(__m128 A, __m128 B) { 481 // CHECK-LABEL: test_mm_mul_ps 482 // CHECK: fmul <4 x float> 483 return _mm_mul_ps(A, B); 484 } 485 486 __m128 test_mm_mul_ss(__m128 A, __m128 B) { 487 // CHECK-LABEL: test_mm_mul_ss 488 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 489 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 490 // CHECK: fmul float 491 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 492 return _mm_mul_ss(A, B); 493 } 494 495 __m128 test_mm_or_ps(__m128 A, __m128 B) { 496 // CHECK-LABEL: test_mm_or_ps 497 // CHECK: or <4 x i32> 498 return _mm_or_ps(A, B); 499 } 500 501 void test_mm_prefetch(char const* p) { 502 // CHECK-LABEL: test_mm_prefetch 503 // CHECK: call void @llvm.prefetch(i8* {{.*}}, i32 0, i32 0, i32 1) 504 _mm_prefetch(p, 0); 505 } 506 507 __m128 test_mm_rcp_ps(__m128 x) { 508 // CHECK-LABEL: test_mm_rcp_ps 509 // CHECK: call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> {{.*}}) 510 return _mm_rcp_ps(x); 511 } 512 513 __m128 test_mm_rcp_ss(__m128 x) { 514 // CHECK-LABEL: test_mm_rcp_ss 515 // CHECK: call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> {{.*}}) 516 // CHECK: extractelement <4 x float> {{.*}}, i32 0 517 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 518 // CHECK: extractelement <4 x float> {{.*}}, i32 1 519 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 520 // CHECK: extractelement <4 x float> {{.*}}, i32 2 521 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 522 // CHECK: extractelement <4 x float> {{.*}}, i32 3 523 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 524 return _mm_rcp_ss(x); 525 } 526 527 __m128 test_mm_rsqrt_ps(__m128 x) { 528 // CHECK-LABEL: test_mm_rsqrt_ps 529 // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> {{.*}}) 530 return _mm_rsqrt_ps(x); 531 } 532 533 __m128 test_mm_rsqrt_ss(__m128 x) { 534 // CHECK-LABEL: test_mm_rsqrt_ss 535 // CHECK: call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> {{.*}}) 536 // CHECK: extractelement <4 x float> {{.*}}, i32 0 537 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 538 // CHECK: extractelement <4 x float> {{.*}}, i32 1 539 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 540 // CHECK: extractelement <4 x float> {{.*}}, i32 2 541 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 542 // CHECK: extractelement <4 x float> {{.*}}, i32 3 543 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 544 return _mm_rsqrt_ss(x); 545 } 546 547 void test_MM_SET_EXCEPTION_MASK(unsigned int A) { 548 // CHECK-LABEL: test_MM_SET_EXCEPTION_MASK 549 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}}) 550 // CHECK: load i32 551 // CHECK: and i32 {{.*}}, -8065 552 // CHECK: or i32 553 // CHECK: store i32 554 // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}}) 555 _MM_SET_EXCEPTION_MASK(A); 556 } 557 558 void test_MM_SET_EXCEPTION_STATE(unsigned int A) { 559 // CHECK-LABEL: test_MM_SET_EXCEPTION_STATE 560 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}}) 561 // CHECK: load i32 562 // CHECK: and i32 {{.*}}, -64 563 // CHECK: or i32 564 // CHECK: store i32 565 // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}}) 566 _MM_SET_EXCEPTION_STATE(A); 567 } 568 569 void test_MM_SET_FLUSH_ZERO_MODE(unsigned int A) { 570 // CHECK-LABEL: test_MM_SET_FLUSH_ZERO_MODE 571 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}}) 572 // CHECK: load i32 573 // CHECK: and i32 {{.*}}, -32769 574 // CHECK: or i32 575 // CHECK: store i32 576 // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}}) 577 _MM_SET_FLUSH_ZERO_MODE(A); 578 } 579 580 __m128 test_mm_set_ps(float A, float B, float C, float D) { 581 // CHECK-LABEL: test_mm_set_ps 582 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 583 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 584 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 585 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 586 return _mm_set_ps(A, B, C, D); 587 } 588 589 __m128 test_mm_set_ps1(float A) { 590 // CHECK-LABEL: test_mm_set_ps1 591 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 592 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 593 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 594 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 595 return _mm_set_ps1(A); 596 } 597 598 void test_MM_SET_ROUNDING_MODE(unsigned int A) { 599 // CHECK-LABEL: test_MM_SET_ROUNDING_MODE 600 // CHECK: call void @llvm.x86.sse.stmxcsr(i8* {{.*}}) 601 // CHECK: load i32 602 // CHECK: and i32 {{.*}}, -24577 603 // CHECK: or i32 604 // CHECK: store i32 605 // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}}) 606 _MM_SET_ROUNDING_MODE(A); 607 } 608 609 __m128 test_mm_set_ss(float A) { 610 // CHECK-LABEL: test_mm_set_ss 611 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 612 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 1 613 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 2 614 // CHECK: insertelement <4 x float> {{.*}}, float 0.000000e+00, i32 3 615 return _mm_set_ss(A); 616 } 617 618 __m128 test_mm_set1_ps(float A) { 619 // CHECK-LABEL: test_mm_set1_ps 620 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 621 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 622 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 623 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 624 return _mm_set1_ps(A); 625 } 626 627 void test_mm_setcsr(unsigned int A) { 628 // CHECK-LABEL: test_mm_setcsr 629 // CHECK: store i32 630 // CHECK: call void @llvm.x86.sse.ldmxcsr(i8* {{.*}}) 631 _mm_setcsr(A); 632 } 633 634 __m128 test_mm_setr_ps(float A, float B, float C, float D) { 635 // CHECK-LABEL: test_mm_setr_ps 636 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 637 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 638 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 639 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 640 return _mm_setr_ps(A, B, C, D); 641 } 642 643 __m128 test_mm_setzero_ps() { 644 // CHECK-LABEL: test_mm_setzero_ps 645 // CHECK: store <4 x float> zeroinitializer 646 return _mm_setzero_ps(); 647 } 648 649 void test_mm_sfence() { 650 // CHECK-LABEL: test_mm_sfence 651 // CHECK: call void @llvm.x86.sse.sfence() 652 _mm_sfence(); 653 } 654 655 __m128 test_mm_shuffle_ps(__m128 A, __m128 B) { 656 // CHECK-LABEL: test_mm_shuffle_ps 657 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 0, i32 4, i32 4> 658 return _mm_shuffle_ps(A, B, 0); 659 } 660 661 __m128 test_mm_sqrt_ps(__m128 x) { 662 // CHECK-LABEL: test_mm_sqrt_ps 663 // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> {{.*}}) 664 return _mm_sqrt_ps(x); 665 } 666 667 __m128 test_sqrt_ss(__m128 x) { 668 // CHECK: define {{.*}} @test_sqrt_ss 669 // CHECK: call <4 x float> @llvm.x86.sse.sqrt.ss 670 // CHECK: extractelement <4 x float> {{.*}}, i32 0 671 // CHECK: insertelement <4 x float> undef, float {{.*}}, i32 0 672 // CHECK: extractelement <4 x float> {{.*}}, i32 1 673 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 1 674 // CHECK: extractelement <4 x float> {{.*}}, i32 2 675 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 2 676 // CHECK: extractelement <4 x float> {{.*}}, i32 3 677 // CHECK: insertelement <4 x float> {{.*}}, float {{.*}}, i32 3 678 return _mm_sqrt_ss(x); 679 } 680 681 void test_mm_store_ps(float* x, __m128 y) { 682 // CHECK-LABEL: test_mm_store_ps 683 // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16 684 _mm_store_ps(x, y); 685 } 686 687 void test_mm_store_ps1(float* x, __m128 y) { 688 // CHECK-LABEL: test_mm_store_ps1 689 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer 690 // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16 691 _mm_store_ps1(x, y); 692 } 693 694 void test_mm_store_ss(float* x, __m128 y) { 695 // CHECK-LABEL: test_mm_store_ss 696 // CHECK: extractelement <4 x float> {{.*}}, i32 0 697 // CHECK: store float %{{.*}}, float* {{.*}}, align 1{{$}} 698 _mm_store_ss(x, y); 699 } 700 701 void test_mm_store1_ps(float* x, __m128 y) { 702 // CHECK-LABEL: test_mm_store1_ps 703 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> zeroinitializer 704 // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16 705 _mm_store1_ps(x, y); 706 } 707 708 void test_mm_storeh_pi(__m64* x, __m128 y) { 709 // CHECK-LABEL: test_mm_storeh_pi 710 // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64> 711 // CHECK: extractelement <2 x i64> %{{.*}}, i64 1 712 // CHECK: store i64 %{{.*}}, i64* {{.*}} 713 _mm_storeh_pi(x, y); 714 } 715 716 void test_mm_storel_pi(__m64* x, __m128 y) { 717 // CHECK-LABEL: test_mm_storel_pi 718 // CHECK: bitcast <4 x float> %{{.*}} to <2 x i64> 719 // CHECK: extractelement <2 x i64> %{{.*}}, i64 0 720 // CHECK: store i64 %{{.*}}, i64* {{.*}} 721 _mm_storel_pi(x, y); 722 } 723 724 void test_mm_storer_ps(float* x, __m128 y) { 725 // CHECK-LABEL: test_mm_storer_ps 726 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 727 // CHECK: store <4 x float> %{{.*}}, <4 x float>* {{.*}}, align 16 728 _mm_storer_ps(x, y); 729 } 730 731 void test_mm_storeu_ps(float* x, __m128 y) { 732 // CHECK-LABEL: test_mm_storeu_ps 733 // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 1{{$}} 734 // CHECK-NEXT: ret void 735 _mm_storeu_ps(x, y); 736 } 737 738 void test_mm_stream_ps(float*A, __m128d B) { 739 // CHECK-LABEL: test_mm_stream_ps 740 // CHECK: store <4 x float> %{{.*}}, <4 x float>* %{{.*}}, align 16, !nontemporal 741 _mm_stream_ps(A, B); 742 } 743 744 __m128 test_mm_sub_ps(__m128 A, __m128 B) { 745 // CHECK-LABEL: test_mm_sub_ps 746 // CHECK: fsub <4 x float> 747 return _mm_sub_ps(A, B); 748 } 749 750 __m128 test_mm_sub_ss(__m128 A, __m128 B) { 751 // CHECK-LABEL: test_mm_sub_ss 752 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 753 // CHECK: extractelement <4 x float> %{{.*}}, i32 0 754 // CHECK: fsub float 755 // CHECK: insertelement <4 x float> %{{.*}}, float %{{.*}}, i32 0 756 return _mm_sub_ss(A, B); 757 } 758 759 void test_MM_TRANSPOSE4_PS(__m128 *A, __m128 *B, __m128 *C, __m128 *D) { 760 // CHECK-LABEL: test_MM_TRANSPOSE4_PS 761 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 762 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 763 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 764 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 765 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 766 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 767 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 768 // CHECK: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 769 _MM_TRANSPOSE4_PS(*A, *B, *C, *D); 770 } 771 772 int test_mm_ucomieq_ss(__m128 A, __m128 B) { 773 // CHECK-LABEL: test_mm_ucomieq_ss 774 // CHECK: call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 775 return _mm_ucomieq_ss(A, B); 776 } 777 778 int test_mm_ucomige_ss(__m128 A, __m128 B) { 779 // CHECK-LABEL: test_mm_ucomige_ss 780 // CHECK: call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 781 return _mm_ucomige_ss(A, B); 782 } 783 784 int test_mm_ucomigt_ss(__m128 A, __m128 B) { 785 // CHECK-LABEL: test_mm_ucomigt_ss 786 // CHECK: call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 787 return _mm_ucomigt_ss(A, B); 788 } 789 790 int test_mm_ucomile_ss(__m128 A, __m128 B) { 791 // CHECK-LABEL: test_mm_ucomile_ss 792 // CHECK: call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 793 return _mm_ucomile_ss(A, B); 794 } 795 796 int test_mm_ucomilt_ss(__m128 A, __m128 B) { 797 // CHECK-LABEL: test_mm_ucomilt_ss 798 // CHECK: call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 799 return _mm_ucomilt_ss(A, B); 800 } 801 802 int test_mm_ucomineq_ss(__m128 A, __m128 B) { 803 // CHECK-LABEL: test_mm_ucomineq_ss 804 // CHECK: call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %{{.*}}, <4 x float> %{{.*}}) 805 return _mm_ucomineq_ss(A, B); 806 } 807 808 __m128 test_mm_undefined_ps() { 809 // CHECK-LABEL: @test_mm_undefined_ps 810 // CHECK: ret <4 x float> undef 811 return _mm_undefined_ps(); 812 } 813 814 __m128 test_mm_unpackhi_ps(__m128 A, __m128 B) { 815 // CHECK-LABEL: test_mm_unpackhi_ps 816 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 817 return _mm_unpackhi_ps(A, B); 818 } 819 820 __m128 test_mm_unpacklo_ps(__m128 A, __m128 B) { 821 // CHECK-LABEL: test_mm_unpacklo_ps 822 // CHECK: shufflevector <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 823 return _mm_unpacklo_ps(A, B); 824 } 825 826 __m128 test_mm_xor_ps(__m128 A, __m128 B) { 827 // CHECK-LABEL: test_mm_xor_ps 828 // CHECK: xor <4 x i32> 829 return _mm_xor_ps(A, B); 830 } 831