1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse | FileCheck %s --check-prefix=ALL --check-prefix=X32 3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse,-sse2 | FileCheck %s --check-prefix=ALL --check-prefix=X64 4 5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse-builtins.c 6 7 define <4 x float> @test_mm_add_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 8 ; X32-LABEL: test_mm_add_ps: 9 ; X32: # BB#0: 10 ; X32-NEXT: addps %xmm1, %xmm0 11 ; X32-NEXT: retl 12 ; 13 ; X64-LABEL: test_mm_add_ps: 14 ; X64: # BB#0: 15 ; X64-NEXT: addps %xmm1, %xmm0 16 ; X64-NEXT: retq 17 %res = fadd <4 x float> %a0, %a1 18 ret <4 x float> %res 19 } 20 21 define <4 x float> @test_mm_add_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 22 ; X32-LABEL: test_mm_add_ss: 23 ; X32: # BB#0: 24 ; X32-NEXT: addss %xmm1, %xmm0 25 ; X32-NEXT: retl 26 ; 27 ; X64-LABEL: test_mm_add_ss: 28 ; X64: # BB#0: 29 ; X64-NEXT: addss %xmm1, %xmm0 30 ; X64-NEXT: retq 31 %ext0 = extractelement <4 x float> %a0, i32 0 32 %ext1 = extractelement <4 x float> %a1, i32 0 33 %fadd = fadd float %ext0, %ext1 34 %res = insertelement <4 x float> %a0, float %fadd, i32 0 35 ret <4 x float> %res 36 } 37 38 define <4 x float> @test_mm_and_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 39 ; X32-LABEL: test_mm_and_ps: 40 ; X32: # BB#0: 41 ; X32-NEXT: pushl %ebp 42 ; X32-NEXT: movl %esp, %ebp 43 ; X32-NEXT: pushl %esi 44 ; X32-NEXT: andl $-16, %esp 45 ; X32-NEXT: subl $64, %esp 46 ; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) 47 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 48 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 49 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 50 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 51 ; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) 52 ; X32-NEXT: andl {{[0-9]+}}(%esp), %esi 53 ; X32-NEXT: movl %esi, (%esp) 54 ; X32-NEXT: andl {{[0-9]+}}(%esp), %edx 55 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 56 ; X32-NEXT: andl {{[0-9]+}}(%esp), %eax 57 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) 58 ; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx 59 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) 60 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 61 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 62 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 63 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 64 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 65 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 66 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 67 ; X32-NEXT: leal -4(%ebp), %esp 68 ; X32-NEXT: popl %esi 69 ; X32-NEXT: popl %ebp 70 ; X32-NEXT: retl 71 ; 72 ; X64-LABEL: test_mm_and_ps: 73 ; X64: # BB#0: 74 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 75 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax 76 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 77 ; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 78 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx 79 ; X64-NEXT: movq %rdx, %rsi 80 ; X64-NEXT: andl %eax, %edx 81 ; X64-NEXT: shrq $32, %rax 82 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 83 ; X64-NEXT: movq %rcx, %rdi 84 ; X64-NEXT: andl %r8d, %ecx 85 ; X64-NEXT: shrq $32, %r8 86 ; X64-NEXT: shrq $32, %rsi 87 ; X64-NEXT: shrq $32, %rdi 88 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) 89 ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) 90 ; X64-NEXT: andl %r8d, %edi 91 ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) 92 ; X64-NEXT: andl %eax, %esi 93 ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp) 94 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 95 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 96 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 97 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 98 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 99 ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 100 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 101 ; X64-NEXT: retq 102 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 103 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 104 %res = and <4 x i32> %arg0, %arg1 105 %bc = bitcast <4 x i32> %res to <4 x float> 106 ret <4 x float> %bc 107 } 108 109 define <4 x float> @test_mm_andnot_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 110 ; X32-LABEL: test_mm_andnot_ps: 111 ; X32: # BB#0: 112 ; X32-NEXT: pushl %ebp 113 ; X32-NEXT: movl %esp, %ebp 114 ; X32-NEXT: pushl %esi 115 ; X32-NEXT: andl $-16, %esp 116 ; X32-NEXT: subl $64, %esp 117 ; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) 118 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 119 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 120 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 121 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 122 ; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) 123 ; X32-NEXT: notl %edx 124 ; X32-NEXT: notl %ecx 125 ; X32-NEXT: notl %esi 126 ; X32-NEXT: notl %eax 127 ; X32-NEXT: andl {{[0-9]+}}(%esp), %eax 128 ; X32-NEXT: movl %eax, (%esp) 129 ; X32-NEXT: andl {{[0-9]+}}(%esp), %esi 130 ; X32-NEXT: movl %esi, {{[0-9]+}}(%esp) 131 ; X32-NEXT: andl {{[0-9]+}}(%esp), %ecx 132 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) 133 ; X32-NEXT: andl {{[0-9]+}}(%esp), %edx 134 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 135 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 136 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 137 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 138 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 139 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 140 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 141 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 142 ; X32-NEXT: leal -4(%ebp), %esp 143 ; X32-NEXT: popl %esi 144 ; X32-NEXT: popl %ebp 145 ; X32-NEXT: retl 146 ; 147 ; X64-LABEL: test_mm_andnot_ps: 148 ; X64: # BB#0: 149 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 150 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax 151 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 152 ; X64-NEXT: movq %rcx, %rdx 153 ; X64-NEXT: shrq $32, %rdx 154 ; X64-NEXT: movq %rax, %rsi 155 ; X64-NEXT: shrq $32, %rsi 156 ; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 157 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdi 158 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 159 ; X64-NEXT: notl %eax 160 ; X64-NEXT: andl %edi, %eax 161 ; X64-NEXT: shrq $32, %rdi 162 ; X64-NEXT: notl %ecx 163 ; X64-NEXT: andl %r8d, %ecx 164 ; X64-NEXT: shrq $32, %r8 165 ; X64-NEXT: notl %esi 166 ; X64-NEXT: notl %edx 167 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) 168 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) 169 ; X64-NEXT: andl %r8d, %edx 170 ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) 171 ; X64-NEXT: andl %edi, %esi 172 ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp) 173 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 174 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 175 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 176 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 177 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 178 ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 179 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 180 ; X64-NEXT: retq 181 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 182 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 183 %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1> 184 %res = and <4 x i32> %not, %arg1 185 %bc = bitcast <4 x i32> %res to <4 x float> 186 ret <4 x float> %bc 187 } 188 189 define <4 x float> @test_mm_cmpeq_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 190 ; X32-LABEL: test_mm_cmpeq_ps: 191 ; X32: # BB#0: 192 ; X32-NEXT: cmpeqps %xmm1, %xmm0 193 ; X32-NEXT: retl 194 ; 195 ; X64-LABEL: test_mm_cmpeq_ps: 196 ; X64: # BB#0: 197 ; X64-NEXT: cmpeqps %xmm1, %xmm0 198 ; X64-NEXT: retq 199 %cmp = fcmp oeq <4 x float> %a0, %a1 200 %sext = sext <4 x i1> %cmp to <4 x i32> 201 %res = bitcast <4 x i32> %sext to <4 x float> 202 ret <4 x float> %res 203 } 204 205 define <4 x float> @test_mm_cmpeq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 206 ; X32-LABEL: test_mm_cmpeq_ss: 207 ; X32: # BB#0: 208 ; X32-NEXT: cmpeqss %xmm1, %xmm0 209 ; X32-NEXT: retl 210 ; 211 ; X64-LABEL: test_mm_cmpeq_ss: 212 ; X64: # BB#0: 213 ; X64-NEXT: cmpeqss %xmm1, %xmm0 214 ; X64-NEXT: retq 215 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 0) 216 ret <4 x float> %res 217 } 218 declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8) nounwind readnone 219 220 define <4 x float> @test_mm_cmpge_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 221 ; X32-LABEL: test_mm_cmpge_ps: 222 ; X32: # BB#0: 223 ; X32-NEXT: cmpleps %xmm0, %xmm1 224 ; X32-NEXT: movaps %xmm1, %xmm0 225 ; X32-NEXT: retl 226 ; 227 ; X64-LABEL: test_mm_cmpge_ps: 228 ; X64: # BB#0: 229 ; X64-NEXT: cmpleps %xmm0, %xmm1 230 ; X64-NEXT: movaps %xmm1, %xmm0 231 ; X64-NEXT: retq 232 %cmp = fcmp ole <4 x float> %a1, %a0 233 %sext = sext <4 x i1> %cmp to <4 x i32> 234 %res = bitcast <4 x i32> %sext to <4 x float> 235 ret <4 x float> %res 236 } 237 238 define <4 x float> @test_mm_cmpge_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 239 ; X32-LABEL: test_mm_cmpge_ss: 240 ; X32: # BB#0: 241 ; X32-NEXT: cmpless %xmm0, %xmm1 242 ; X32-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 243 ; X32-NEXT: retl 244 ; 245 ; X64-LABEL: test_mm_cmpge_ss: 246 ; X64: # BB#0: 247 ; X64-NEXT: cmpless %xmm0, %xmm1 248 ; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 249 ; X64-NEXT: retq 250 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 2) 251 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 252 ret <4 x float> %res 253 } 254 255 define <4 x float> @test_mm_cmpgt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 256 ; X32-LABEL: test_mm_cmpgt_ps: 257 ; X32: # BB#0: 258 ; X32-NEXT: cmpltps %xmm0, %xmm1 259 ; X32-NEXT: movaps %xmm1, %xmm0 260 ; X32-NEXT: retl 261 ; 262 ; X64-LABEL: test_mm_cmpgt_ps: 263 ; X64: # BB#0: 264 ; X64-NEXT: cmpltps %xmm0, %xmm1 265 ; X64-NEXT: movaps %xmm1, %xmm0 266 ; X64-NEXT: retq 267 %cmp = fcmp olt <4 x float> %a1, %a0 268 %sext = sext <4 x i1> %cmp to <4 x i32> 269 %res = bitcast <4 x i32> %sext to <4 x float> 270 ret <4 x float> %res 271 } 272 273 define <4 x float> @test_mm_cmpgt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 274 ; X32-LABEL: test_mm_cmpgt_ss: 275 ; X32: # BB#0: 276 ; X32-NEXT: cmpltss %xmm0, %xmm1 277 ; X32-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 278 ; X32-NEXT: retl 279 ; 280 ; X64-LABEL: test_mm_cmpgt_ss: 281 ; X64: # BB#0: 282 ; X64-NEXT: cmpltss %xmm0, %xmm1 283 ; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 284 ; X64-NEXT: retq 285 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 1) 286 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 287 ret <4 x float> %res 288 } 289 290 define <4 x float> @test_mm_cmple_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 291 ; X32-LABEL: test_mm_cmple_ps: 292 ; X32: # BB#0: 293 ; X32-NEXT: cmpleps %xmm1, %xmm0 294 ; X32-NEXT: retl 295 ; 296 ; X64-LABEL: test_mm_cmple_ps: 297 ; X64: # BB#0: 298 ; X64-NEXT: cmpleps %xmm1, %xmm0 299 ; X64-NEXT: retq 300 %cmp = fcmp ole <4 x float> %a0, %a1 301 %sext = sext <4 x i1> %cmp to <4 x i32> 302 %res = bitcast <4 x i32> %sext to <4 x float> 303 ret <4 x float> %res 304 } 305 306 define <4 x float> @test_mm_cmple_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 307 ; X32-LABEL: test_mm_cmple_ss: 308 ; X32: # BB#0: 309 ; X32-NEXT: cmpless %xmm1, %xmm0 310 ; X32-NEXT: retl 311 ; 312 ; X64-LABEL: test_mm_cmple_ss: 313 ; X64: # BB#0: 314 ; X64-NEXT: cmpless %xmm1, %xmm0 315 ; X64-NEXT: retq 316 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 2) 317 ret <4 x float> %res 318 } 319 320 define <4 x float> @test_mm_cmplt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 321 ; X32-LABEL: test_mm_cmplt_ps: 322 ; X32: # BB#0: 323 ; X32-NEXT: cmpltps %xmm1, %xmm0 324 ; X32-NEXT: retl 325 ; 326 ; X64-LABEL: test_mm_cmplt_ps: 327 ; X64: # BB#0: 328 ; X64-NEXT: cmpltps %xmm1, %xmm0 329 ; X64-NEXT: retq 330 %cmp = fcmp olt <4 x float> %a0, %a1 331 %sext = sext <4 x i1> %cmp to <4 x i32> 332 %res = bitcast <4 x i32> %sext to <4 x float> 333 ret <4 x float> %res 334 } 335 336 define <4 x float> @test_mm_cmplt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 337 ; X32-LABEL: test_mm_cmplt_ss: 338 ; X32: # BB#0: 339 ; X32-NEXT: cmpltss %xmm1, %xmm0 340 ; X32-NEXT: retl 341 ; 342 ; X64-LABEL: test_mm_cmplt_ss: 343 ; X64: # BB#0: 344 ; X64-NEXT: cmpltss %xmm1, %xmm0 345 ; X64-NEXT: retq 346 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 1) 347 ret <4 x float> %res 348 } 349 350 define <4 x float> @test_mm_cmpneq_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 351 ; X32-LABEL: test_mm_cmpneq_ps: 352 ; X32: # BB#0: 353 ; X32-NEXT: cmpneqps %xmm1, %xmm0 354 ; X32-NEXT: retl 355 ; 356 ; X64-LABEL: test_mm_cmpneq_ps: 357 ; X64: # BB#0: 358 ; X64-NEXT: cmpneqps %xmm1, %xmm0 359 ; X64-NEXT: retq 360 %cmp = fcmp une <4 x float> %a0, %a1 361 %sext = sext <4 x i1> %cmp to <4 x i32> 362 %res = bitcast <4 x i32> %sext to <4 x float> 363 ret <4 x float> %res 364 } 365 366 define <4 x float> @test_mm_cmpneq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 367 ; X32-LABEL: test_mm_cmpneq_ss: 368 ; X32: # BB#0: 369 ; X32-NEXT: cmpneqss %xmm1, %xmm0 370 ; X32-NEXT: retl 371 ; 372 ; X64-LABEL: test_mm_cmpneq_ss: 373 ; X64: # BB#0: 374 ; X64-NEXT: cmpneqss %xmm1, %xmm0 375 ; X64-NEXT: retq 376 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 4) 377 ret <4 x float> %res 378 } 379 380 define <4 x float> @test_mm_cmpnge_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 381 ; X32-LABEL: test_mm_cmpnge_ps: 382 ; X32: # BB#0: 383 ; X32-NEXT: cmpnleps %xmm0, %xmm1 384 ; X32-NEXT: movaps %xmm1, %xmm0 385 ; X32-NEXT: retl 386 ; 387 ; X64-LABEL: test_mm_cmpnge_ps: 388 ; X64: # BB#0: 389 ; X64-NEXT: cmpnleps %xmm0, %xmm1 390 ; X64-NEXT: movaps %xmm1, %xmm0 391 ; X64-NEXT: retq 392 %cmp = fcmp ugt <4 x float> %a1, %a0 393 %sext = sext <4 x i1> %cmp to <4 x i32> 394 %res = bitcast <4 x i32> %sext to <4 x float> 395 ret <4 x float> %res 396 } 397 398 define <4 x float> @test_mm_cmpnge_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 399 ; X32-LABEL: test_mm_cmpnge_ss: 400 ; X32: # BB#0: 401 ; X32-NEXT: cmpnless %xmm0, %xmm1 402 ; X32-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 403 ; X32-NEXT: retl 404 ; 405 ; X64-LABEL: test_mm_cmpnge_ss: 406 ; X64: # BB#0: 407 ; X64-NEXT: cmpnless %xmm0, %xmm1 408 ; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 409 ; X64-NEXT: retq 410 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 6) 411 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 412 ret <4 x float> %res 413 } 414 415 define <4 x float> @test_mm_cmpngt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 416 ; X32-LABEL: test_mm_cmpngt_ps: 417 ; X32: # BB#0: 418 ; X32-NEXT: cmpnltps %xmm0, %xmm1 419 ; X32-NEXT: movaps %xmm1, %xmm0 420 ; X32-NEXT: retl 421 ; 422 ; X64-LABEL: test_mm_cmpngt_ps: 423 ; X64: # BB#0: 424 ; X64-NEXT: cmpnltps %xmm0, %xmm1 425 ; X64-NEXT: movaps %xmm1, %xmm0 426 ; X64-NEXT: retq 427 %cmp = fcmp uge <4 x float> %a1, %a0 428 %sext = sext <4 x i1> %cmp to <4 x i32> 429 %res = bitcast <4 x i32> %sext to <4 x float> 430 ret <4 x float> %res 431 } 432 433 define <4 x float> @test_mm_cmpngt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 434 ; X32-LABEL: test_mm_cmpngt_ss: 435 ; X32: # BB#0: 436 ; X32-NEXT: cmpnltss %xmm0, %xmm1 437 ; X32-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 438 ; X32-NEXT: retl 439 ; 440 ; X64-LABEL: test_mm_cmpngt_ss: 441 ; X64: # BB#0: 442 ; X64-NEXT: cmpnltss %xmm0, %xmm1 443 ; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 444 ; X64-NEXT: retq 445 %cmp = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a1, <4 x float> %a0, i8 5) 446 %res = shufflevector <4 x float> %a0, <4 x float> %cmp, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 447 ret <4 x float> %res 448 } 449 450 define <4 x float> @test_mm_cmpnle_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 451 ; X32-LABEL: test_mm_cmpnle_ps: 452 ; X32: # BB#0: 453 ; X32-NEXT: cmpnleps %xmm1, %xmm0 454 ; X32-NEXT: retl 455 ; 456 ; X64-LABEL: test_mm_cmpnle_ps: 457 ; X64: # BB#0: 458 ; X64-NEXT: cmpnleps %xmm1, %xmm0 459 ; X64-NEXT: retq 460 %cmp = fcmp ugt <4 x float> %a0, %a1 461 %sext = sext <4 x i1> %cmp to <4 x i32> 462 %res = bitcast <4 x i32> %sext to <4 x float> 463 ret <4 x float> %res 464 } 465 466 define <4 x float> @test_mm_cmpnle_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 467 ; X32-LABEL: test_mm_cmpnle_ss: 468 ; X32: # BB#0: 469 ; X32-NEXT: cmpnless %xmm1, %xmm0 470 ; X32-NEXT: retl 471 ; 472 ; X64-LABEL: test_mm_cmpnle_ss: 473 ; X64: # BB#0: 474 ; X64-NEXT: cmpnless %xmm1, %xmm0 475 ; X64-NEXT: retq 476 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 6) 477 ret <4 x float> %res 478 } 479 480 define <4 x float> @test_mm_cmpnlt_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 481 ; X32-LABEL: test_mm_cmpnlt_ps: 482 ; X32: # BB#0: 483 ; X32-NEXT: cmpnltps %xmm1, %xmm0 484 ; X32-NEXT: retl 485 ; 486 ; X64-LABEL: test_mm_cmpnlt_ps: 487 ; X64: # BB#0: 488 ; X64-NEXT: cmpnltps %xmm1, %xmm0 489 ; X64-NEXT: retq 490 %cmp = fcmp uge <4 x float> %a0, %a1 491 %sext = sext <4 x i1> %cmp to <4 x i32> 492 %res = bitcast <4 x i32> %sext to <4 x float> 493 ret <4 x float> %res 494 } 495 496 define <4 x float> @test_mm_cmpnlt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 497 ; X32-LABEL: test_mm_cmpnlt_ss: 498 ; X32: # BB#0: 499 ; X32-NEXT: cmpnltss %xmm1, %xmm0 500 ; X32-NEXT: retl 501 ; 502 ; X64-LABEL: test_mm_cmpnlt_ss: 503 ; X64: # BB#0: 504 ; X64-NEXT: cmpnltss %xmm1, %xmm0 505 ; X64-NEXT: retq 506 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 5) 507 ret <4 x float> %res 508 } 509 510 define <4 x float> @test_mm_cmpord_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 511 ; X32-LABEL: test_mm_cmpord_ps: 512 ; X32: # BB#0: 513 ; X32-NEXT: cmpordps %xmm1, %xmm0 514 ; X32-NEXT: retl 515 ; 516 ; X64-LABEL: test_mm_cmpord_ps: 517 ; X64: # BB#0: 518 ; X64-NEXT: cmpordps %xmm1, %xmm0 519 ; X64-NEXT: retq 520 %cmp = fcmp ord <4 x float> %a0, %a1 521 %sext = sext <4 x i1> %cmp to <4 x i32> 522 %res = bitcast <4 x i32> %sext to <4 x float> 523 ret <4 x float> %res 524 } 525 526 define <4 x float> @test_mm_cmpord_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 527 ; X32-LABEL: test_mm_cmpord_ss: 528 ; X32: # BB#0: 529 ; X32-NEXT: cmpordss %xmm1, %xmm0 530 ; X32-NEXT: retl 531 ; 532 ; X64-LABEL: test_mm_cmpord_ss: 533 ; X64: # BB#0: 534 ; X64-NEXT: cmpordss %xmm1, %xmm0 535 ; X64-NEXT: retq 536 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 7) 537 ret <4 x float> %res 538 } 539 540 define <4 x float> @test_mm_cmpunord_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 541 ; X32-LABEL: test_mm_cmpunord_ps: 542 ; X32: # BB#0: 543 ; X32-NEXT: cmpunordps %xmm1, %xmm0 544 ; X32-NEXT: retl 545 ; 546 ; X64-LABEL: test_mm_cmpunord_ps: 547 ; X64: # BB#0: 548 ; X64-NEXT: cmpunordps %xmm1, %xmm0 549 ; X64-NEXT: retq 550 %cmp = fcmp uno <4 x float> %a0, %a1 551 %sext = sext <4 x i1> %cmp to <4 x i32> 552 %res = bitcast <4 x i32> %sext to <4 x float> 553 ret <4 x float> %res 554 } 555 556 define <4 x float> @test_mm_cmpunord_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 557 ; X32-LABEL: test_mm_cmpunord_ss: 558 ; X32: # BB#0: 559 ; X32-NEXT: cmpunordss %xmm1, %xmm0 560 ; X32-NEXT: retl 561 ; 562 ; X64-LABEL: test_mm_cmpunord_ss: 563 ; X64: # BB#0: 564 ; X64-NEXT: cmpunordss %xmm1, %xmm0 565 ; X64-NEXT: retq 566 %res = call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a0, <4 x float> %a1, i8 3) 567 ret <4 x float> %res 568 } 569 570 define i32 @test_mm_comieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 571 ; X32-LABEL: test_mm_comieq_ss: 572 ; X32: # BB#0: 573 ; X32-NEXT: comiss %xmm1, %xmm0 574 ; X32-NEXT: setnp %al 575 ; X32-NEXT: sete %cl 576 ; X32-NEXT: andb %al, %cl 577 ; X32-NEXT: movzbl %cl, %eax 578 ; X32-NEXT: retl 579 ; 580 ; X64-LABEL: test_mm_comieq_ss: 581 ; X64: # BB#0: 582 ; X64-NEXT: comiss %xmm1, %xmm0 583 ; X64-NEXT: setnp %al 584 ; X64-NEXT: sete %cl 585 ; X64-NEXT: andb %al, %cl 586 ; X64-NEXT: movzbl %cl, %eax 587 ; X64-NEXT: retq 588 %res = call i32 @llvm.x86.sse.comieq.ss(<4 x float> %a0, <4 x float> %a1) 589 ret i32 %res 590 } 591 declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>) nounwind readnone 592 593 define i32 @test_mm_comige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 594 ; X32-LABEL: test_mm_comige_ss: 595 ; X32: # BB#0: 596 ; X32-NEXT: xorl %eax, %eax 597 ; X32-NEXT: comiss %xmm1, %xmm0 598 ; X32-NEXT: setae %al 599 ; X32-NEXT: retl 600 ; 601 ; X64-LABEL: test_mm_comige_ss: 602 ; X64: # BB#0: 603 ; X64-NEXT: xorl %eax, %eax 604 ; X64-NEXT: comiss %xmm1, %xmm0 605 ; X64-NEXT: setae %al 606 ; X64-NEXT: retq 607 %res = call i32 @llvm.x86.sse.comige.ss(<4 x float> %a0, <4 x float> %a1) 608 ret i32 %res 609 } 610 declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>) nounwind readnone 611 612 define i32 @test_mm_comigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 613 ; X32-LABEL: test_mm_comigt_ss: 614 ; X32: # BB#0: 615 ; X32-NEXT: xorl %eax, %eax 616 ; X32-NEXT: comiss %xmm1, %xmm0 617 ; X32-NEXT: seta %al 618 ; X32-NEXT: retl 619 ; 620 ; X64-LABEL: test_mm_comigt_ss: 621 ; X64: # BB#0: 622 ; X64-NEXT: xorl %eax, %eax 623 ; X64-NEXT: comiss %xmm1, %xmm0 624 ; X64-NEXT: seta %al 625 ; X64-NEXT: retq 626 %res = call i32 @llvm.x86.sse.comigt.ss(<4 x float> %a0, <4 x float> %a1) 627 ret i32 %res 628 } 629 declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>) nounwind readnone 630 631 define i32 @test_mm_comile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 632 ; X32-LABEL: test_mm_comile_ss: 633 ; X32: # BB#0: 634 ; X32-NEXT: xorl %eax, %eax 635 ; X32-NEXT: comiss %xmm0, %xmm1 636 ; X32-NEXT: setae %al 637 ; X32-NEXT: retl 638 ; 639 ; X64-LABEL: test_mm_comile_ss: 640 ; X64: # BB#0: 641 ; X64-NEXT: xorl %eax, %eax 642 ; X64-NEXT: comiss %xmm0, %xmm1 643 ; X64-NEXT: setae %al 644 ; X64-NEXT: retq 645 %res = call i32 @llvm.x86.sse.comile.ss(<4 x float> %a0, <4 x float> %a1) 646 ret i32 %res 647 } 648 declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>) nounwind readnone 649 650 define i32 @test_mm_comilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 651 ; X32-LABEL: test_mm_comilt_ss: 652 ; X32: # BB#0: 653 ; X32-NEXT: xorl %eax, %eax 654 ; X32-NEXT: comiss %xmm0, %xmm1 655 ; X32-NEXT: seta %al 656 ; X32-NEXT: retl 657 ; 658 ; X64-LABEL: test_mm_comilt_ss: 659 ; X64: # BB#0: 660 ; X64-NEXT: xorl %eax, %eax 661 ; X64-NEXT: comiss %xmm0, %xmm1 662 ; X64-NEXT: seta %al 663 ; X64-NEXT: retq 664 %res = call i32 @llvm.x86.sse.comilt.ss(<4 x float> %a0, <4 x float> %a1) 665 ret i32 %res 666 } 667 declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>) nounwind readnone 668 669 define i32 @test_mm_comineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 670 ; X32-LABEL: test_mm_comineq_ss: 671 ; X32: # BB#0: 672 ; X32-NEXT: comiss %xmm1, %xmm0 673 ; X32-NEXT: setp %al 674 ; X32-NEXT: setne %cl 675 ; X32-NEXT: orb %al, %cl 676 ; X32-NEXT: movzbl %cl, %eax 677 ; X32-NEXT: retl 678 ; 679 ; X64-LABEL: test_mm_comineq_ss: 680 ; X64: # BB#0: 681 ; X64-NEXT: comiss %xmm1, %xmm0 682 ; X64-NEXT: setp %al 683 ; X64-NEXT: setne %cl 684 ; X64-NEXT: orb %al, %cl 685 ; X64-NEXT: movzbl %cl, %eax 686 ; X64-NEXT: retq 687 %res = call i32 @llvm.x86.sse.comineq.ss(<4 x float> %a0, <4 x float> %a1) 688 ret i32 %res 689 } 690 declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>) nounwind readnone 691 692 define i32 @test_mm_cvt_ss2si(<4 x float> %a0) nounwind { 693 ; X32-LABEL: test_mm_cvt_ss2si: 694 ; X32: # BB#0: 695 ; X32-NEXT: cvtss2si %xmm0, %eax 696 ; X32-NEXT: retl 697 ; 698 ; X64-LABEL: test_mm_cvt_ss2si: 699 ; X64: # BB#0: 700 ; X64-NEXT: cvtss2si %xmm0, %eax 701 ; X64-NEXT: retq 702 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 703 ret i32 %res 704 } 705 declare i32 @llvm.x86.sse.cvtss2si(<4 x float>) nounwind readnone 706 707 define <4 x float> @test_mm_cvtsi32_ss(<4 x float> %a0, i32 %a1) nounwind { 708 ; X32-LABEL: test_mm_cvtsi32_ss: 709 ; X32: # BB#0: 710 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 711 ; X32-NEXT: cvtsi2ssl %eax, %xmm1 712 ; X32-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 713 ; X32-NEXT: retl 714 ; 715 ; X64-LABEL: test_mm_cvtsi32_ss: 716 ; X64: # BB#0: 717 ; X64-NEXT: cvtsi2ssl %edi, %xmm1 718 ; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 719 ; X64-NEXT: retq 720 %cvt = sitofp i32 %a1 to float 721 %res = insertelement <4 x float> %a0, float %cvt, i32 0 722 ret <4 x float> %res 723 } 724 725 define float @test_mm_cvtss_f32(<4 x float> %a0) nounwind { 726 ; X32-LABEL: test_mm_cvtss_f32: 727 ; X32: # BB#0: 728 ; X32-NEXT: pushl %eax 729 ; X32-NEXT: movss %xmm0, (%esp) 730 ; X32-NEXT: flds (%esp) 731 ; X32-NEXT: popl %eax 732 ; X32-NEXT: retl 733 ; 734 ; X64-LABEL: test_mm_cvtss_f32: 735 ; X64: # BB#0: 736 ; X64-NEXT: retq 737 %res = extractelement <4 x float> %a0, i32 0 738 ret float %res 739 } 740 741 define i32 @test_mm_cvtss_si32(<4 x float> %a0) nounwind { 742 ; X32-LABEL: test_mm_cvtss_si32: 743 ; X32: # BB#0: 744 ; X32-NEXT: cvtss2si %xmm0, %eax 745 ; X32-NEXT: retl 746 ; 747 ; X64-LABEL: test_mm_cvtss_si32: 748 ; X64: # BB#0: 749 ; X64-NEXT: cvtss2si %xmm0, %eax 750 ; X64-NEXT: retq 751 %res = call i32 @llvm.x86.sse.cvtss2si(<4 x float> %a0) 752 ret i32 %res 753 } 754 755 define i32 @test_mm_cvttss_si(<4 x float> %a0) nounwind { 756 ; X32-LABEL: test_mm_cvttss_si: 757 ; X32: # BB#0: 758 ; X32-NEXT: cvttss2si %xmm0, %eax 759 ; X32-NEXT: retl 760 ; 761 ; X64-LABEL: test_mm_cvttss_si: 762 ; X64: # BB#0: 763 ; X64-NEXT: cvttss2si %xmm0, %eax 764 ; X64-NEXT: retq 765 %cvt = extractelement <4 x float> %a0, i32 0 766 %res = fptosi float %cvt to i32 767 ret i32 %res 768 } 769 770 define i32 @test_mm_cvttss_si32(<4 x float> %a0) nounwind { 771 ; X32-LABEL: test_mm_cvttss_si32: 772 ; X32: # BB#0: 773 ; X32-NEXT: cvttss2si %xmm0, %eax 774 ; X32-NEXT: retl 775 ; 776 ; X64-LABEL: test_mm_cvttss_si32: 777 ; X64: # BB#0: 778 ; X64-NEXT: cvttss2si %xmm0, %eax 779 ; X64-NEXT: retq 780 %cvt = extractelement <4 x float> %a0, i32 0 781 %res = fptosi float %cvt to i32 782 ret i32 %res 783 } 784 785 define <4 x float> @test_mm_div_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 786 ; X32-LABEL: test_mm_div_ps: 787 ; X32: # BB#0: 788 ; X32-NEXT: divps %xmm1, %xmm0 789 ; X32-NEXT: retl 790 ; 791 ; X64-LABEL: test_mm_div_ps: 792 ; X64: # BB#0: 793 ; X64-NEXT: divps %xmm1, %xmm0 794 ; X64-NEXT: retq 795 %res = fdiv <4 x float> %a0, %a1 796 ret <4 x float> %res 797 } 798 799 define <4 x float> @test_mm_div_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 800 ; X32-LABEL: test_mm_div_ss: 801 ; X32: # BB#0: 802 ; X32-NEXT: divss %xmm1, %xmm0 803 ; X32-NEXT: retl 804 ; 805 ; X64-LABEL: test_mm_div_ss: 806 ; X64: # BB#0: 807 ; X64-NEXT: divss %xmm1, %xmm0 808 ; X64-NEXT: retq 809 %ext0 = extractelement <4 x float> %a0, i32 0 810 %ext1 = extractelement <4 x float> %a1, i32 0 811 %fdiv = fdiv float %ext0, %ext1 812 %res = insertelement <4 x float> %a0, float %fdiv, i32 0 813 ret <4 x float> %res 814 } 815 816 define i32 @test_MM_GET_EXCEPTION_MASK() nounwind { 817 ; X32-LABEL: test_MM_GET_EXCEPTION_MASK: 818 ; X32: # BB#0: 819 ; X32-NEXT: pushl %eax 820 ; X32-NEXT: leal (%esp), %eax 821 ; X32-NEXT: stmxcsr (%eax) 822 ; X32-NEXT: movl (%esp), %eax 823 ; X32-NEXT: andl $8064, %eax # imm = 0x1F80 824 ; X32-NEXT: popl %ecx 825 ; X32-NEXT: retl 826 ; 827 ; X64-LABEL: test_MM_GET_EXCEPTION_MASK: 828 ; X64: # BB#0: 829 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax 830 ; X64-NEXT: stmxcsr (%rax) 831 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax 832 ; X64-NEXT: andl $8064, %eax # imm = 0x1F80 833 ; X64-NEXT: retq 834 %1 = alloca i32, align 4 835 %2 = bitcast i32* %1 to i8* 836 call void @llvm.x86.sse.stmxcsr(i8* %2) 837 %3 = load i32, i32* %1, align 4 838 %4 = and i32 %3, 8064 839 ret i32 %4 840 } 841 declare void @llvm.x86.sse.stmxcsr(i8*) nounwind readnone 842 843 define i32 @test_MM_GET_EXCEPTION_STATE() nounwind { 844 ; X32-LABEL: test_MM_GET_EXCEPTION_STATE: 845 ; X32: # BB#0: 846 ; X32-NEXT: pushl %eax 847 ; X32-NEXT: leal (%esp), %eax 848 ; X32-NEXT: stmxcsr (%eax) 849 ; X32-NEXT: movl (%esp), %eax 850 ; X32-NEXT: andl $63, %eax 851 ; X32-NEXT: popl %ecx 852 ; X32-NEXT: retl 853 ; 854 ; X64-LABEL: test_MM_GET_EXCEPTION_STATE: 855 ; X64: # BB#0: 856 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax 857 ; X64-NEXT: stmxcsr (%rax) 858 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax 859 ; X64-NEXT: andl $63, %eax 860 ; X64-NEXT: retq 861 %1 = alloca i32, align 4 862 %2 = bitcast i32* %1 to i8* 863 call void @llvm.x86.sse.stmxcsr(i8* %2) 864 %3 = load i32, i32* %1, align 4 865 %4 = and i32 %3, 63 866 ret i32 %4 867 } 868 869 define i32 @test_MM_GET_FLUSH_ZERO_MODE() nounwind { 870 ; X32-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 871 ; X32: # BB#0: 872 ; X32-NEXT: pushl %eax 873 ; X32-NEXT: leal (%esp), %eax 874 ; X32-NEXT: stmxcsr (%eax) 875 ; X32-NEXT: movl (%esp), %eax 876 ; X32-NEXT: andl $32768, %eax # imm = 0x8000 877 ; X32-NEXT: popl %ecx 878 ; X32-NEXT: retl 879 ; 880 ; X64-LABEL: test_MM_GET_FLUSH_ZERO_MODE: 881 ; X64: # BB#0: 882 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax 883 ; X64-NEXT: stmxcsr (%rax) 884 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax 885 ; X64-NEXT: andl $32768, %eax # imm = 0x8000 886 ; X64-NEXT: retq 887 %1 = alloca i32, align 4 888 %2 = bitcast i32* %1 to i8* 889 call void @llvm.x86.sse.stmxcsr(i8* %2) 890 %3 = load i32, i32* %1, align 4 891 %4 = and i32 %3, 32768 892 ret i32 %4 893 } 894 895 define i32 @test_MM_GET_ROUNDING_MODE() nounwind { 896 ; X32-LABEL: test_MM_GET_ROUNDING_MODE: 897 ; X32: # BB#0: 898 ; X32-NEXT: pushl %eax 899 ; X32-NEXT: leal (%esp), %eax 900 ; X32-NEXT: stmxcsr (%eax) 901 ; X32-NEXT: movl (%esp), %eax 902 ; X32-NEXT: andl $24576, %eax # imm = 0x6000 903 ; X32-NEXT: popl %ecx 904 ; X32-NEXT: retl 905 ; 906 ; X64-LABEL: test_MM_GET_ROUNDING_MODE: 907 ; X64: # BB#0: 908 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax 909 ; X64-NEXT: stmxcsr (%rax) 910 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax 911 ; X64-NEXT: andl $24576, %eax # imm = 0x6000 912 ; X64-NEXT: retq 913 %1 = alloca i32, align 4 914 %2 = bitcast i32* %1 to i8* 915 call void @llvm.x86.sse.stmxcsr(i8* %2) 916 %3 = load i32, i32* %1, align 4 917 %4 = and i32 %3, 24576 918 ret i32 %4 919 } 920 921 define i32 @test_mm_getcsr() nounwind { 922 ; X32-LABEL: test_mm_getcsr: 923 ; X32: # BB#0: 924 ; X32-NEXT: pushl %eax 925 ; X32-NEXT: leal (%esp), %eax 926 ; X32-NEXT: stmxcsr (%eax) 927 ; X32-NEXT: movl (%esp), %eax 928 ; X32-NEXT: popl %ecx 929 ; X32-NEXT: retl 930 ; 931 ; X64-LABEL: test_mm_getcsr: 932 ; X64: # BB#0: 933 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax 934 ; X64-NEXT: stmxcsr (%rax) 935 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %eax 936 ; X64-NEXT: retq 937 %1 = alloca i32, align 4 938 %2 = bitcast i32* %1 to i8* 939 call void @llvm.x86.sse.stmxcsr(i8* %2) 940 %3 = load i32, i32* %1, align 4 941 ret i32 %3 942 } 943 944 define <4 x float> @test_mm_load_ps(float* %a0) nounwind { 945 ; X32-LABEL: test_mm_load_ps: 946 ; X32: # BB#0: 947 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 948 ; X32-NEXT: movaps (%eax), %xmm0 949 ; X32-NEXT: retl 950 ; 951 ; X64-LABEL: test_mm_load_ps: 952 ; X64: # BB#0: 953 ; X64-NEXT: movaps (%rdi), %xmm0 954 ; X64-NEXT: retq 955 %arg0 = bitcast float* %a0 to <4 x float>* 956 %res = load <4 x float>, <4 x float>* %arg0, align 16 957 ret <4 x float> %res 958 } 959 960 define <4 x float> @test_mm_load_ps1(float* %a0) nounwind { 961 ; X32-LABEL: test_mm_load_ps1: 962 ; X32: # BB#0: 963 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 964 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 965 ; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 966 ; X32-NEXT: retl 967 ; 968 ; X64-LABEL: test_mm_load_ps1: 969 ; X64: # BB#0: 970 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 971 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 972 ; X64-NEXT: retq 973 %ld = load float, float* %a0, align 4 974 %res0 = insertelement <4 x float> undef, float %ld, i32 0 975 %res1 = insertelement <4 x float> %res0, float %ld, i32 1 976 %res2 = insertelement <4 x float> %res1, float %ld, i32 2 977 %res3 = insertelement <4 x float> %res2, float %ld, i32 3 978 ret <4 x float> %res3 979 } 980 981 define <4 x float> @test_mm_load_ss(float* %a0) nounwind { 982 ; X32-LABEL: test_mm_load_ss: 983 ; X32: # BB#0: 984 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 985 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 986 ; X32-NEXT: retl 987 ; 988 ; X64-LABEL: test_mm_load_ss: 989 ; X64: # BB#0: 990 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 991 ; X64-NEXT: retq 992 %ld = load float, float* %a0, align 1 993 %res0 = insertelement <4 x float> undef, float %ld, i32 0 994 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1 995 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2 996 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3 997 ret <4 x float> %res3 998 } 999 1000 define <4 x float> @test_mm_load1_ps(float* %a0) nounwind { 1001 ; X32-LABEL: test_mm_load1_ps: 1002 ; X32: # BB#0: 1003 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1004 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1005 ; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 1006 ; X32-NEXT: retl 1007 ; 1008 ; X64-LABEL: test_mm_load1_ps: 1009 ; X64: # BB#0: 1010 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1011 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 1012 ; X64-NEXT: retq 1013 %ld = load float, float* %a0, align 4 1014 %res0 = insertelement <4 x float> undef, float %ld, i32 0 1015 %res1 = insertelement <4 x float> %res0, float %ld, i32 1 1016 %res2 = insertelement <4 x float> %res1, float %ld, i32 2 1017 %res3 = insertelement <4 x float> %res2, float %ld, i32 3 1018 ret <4 x float> %res3 1019 } 1020 1021 define <4 x float> @test_mm_loadh_pi(<4 x float> %a0, x86_mmx* %a1) { 1022 ; X32-LABEL: test_mm_loadh_pi: 1023 ; X32: # BB#0: 1024 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1025 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1026 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 1027 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1028 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1029 ; X32-NEXT: retl 1030 ; 1031 ; X64-LABEL: test_mm_loadh_pi: 1032 ; X64: # BB#0: 1033 ; X64-NEXT: movq (%rdi), %rax 1034 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) 1035 ; X64-NEXT: shrq $32, %rax 1036 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) 1037 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1038 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 1039 ; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1040 ; X64-NEXT: xorps %xmm2, %xmm2 1041 ; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1042 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1043 ; X64-NEXT: retq 1044 %ptr = bitcast x86_mmx* %a1 to <2 x float>* 1045 %ld = load <2 x float>, <2 x float>* %ptr 1046 %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1047 %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1048 ret <4 x float> %res 1049 } 1050 1051 define <4 x float> @test_mm_loadl_pi(<4 x float> %a0, x86_mmx* %a1) { 1052 ; X32-LABEL: test_mm_loadl_pi: 1053 ; X32: # BB#0: 1054 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1055 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1056 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 1057 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1058 ; X32-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 1059 ; X32-NEXT: movaps %xmm1, %xmm0 1060 ; X32-NEXT: retl 1061 ; 1062 ; X64-LABEL: test_mm_loadl_pi: 1063 ; X64: # BB#0: 1064 ; X64-NEXT: movq (%rdi), %rax 1065 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) 1066 ; X64-NEXT: shrq $32, %rax 1067 ; X64-NEXT: movl %eax, -{{[0-9]+}}(%rsp) 1068 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1069 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 1070 ; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1071 ; X64-NEXT: xorps %xmm2, %xmm2 1072 ; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1073 ; X64-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 1074 ; X64-NEXT: movaps %xmm1, %xmm0 1075 ; X64-NEXT: retq 1076 %ptr = bitcast x86_mmx* %a1 to <2 x float>* 1077 %ld = load <2 x float>, <2 x float>* %ptr 1078 %ext = shufflevector <2 x float> %ld, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1079 %res = shufflevector <4 x float> %a0, <4 x float> %ext, <4 x i32> <i32 4, i32 5, i32 2, i32 3> 1080 ret <4 x float> %res 1081 } 1082 1083 define <4 x float> @test_mm_loadr_ps(float* %a0) nounwind { 1084 ; X32-LABEL: test_mm_loadr_ps: 1085 ; X32: # BB#0: 1086 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1087 ; X32-NEXT: movaps (%eax), %xmm0 1088 ; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1089 ; X32-NEXT: retl 1090 ; 1091 ; X64-LABEL: test_mm_loadr_ps: 1092 ; X64: # BB#0: 1093 ; X64-NEXT: movaps (%rdi), %xmm0 1094 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1095 ; X64-NEXT: retq 1096 %arg0 = bitcast float* %a0 to <4 x float>* 1097 %ld = load <4 x float>, <4 x float>* %arg0, align 16 1098 %res = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1099 ret <4 x float> %res 1100 } 1101 1102 define <4 x float> @test_mm_loadu_ps(float* %a0) nounwind { 1103 ; X32-LABEL: test_mm_loadu_ps: 1104 ; X32: # BB#0: 1105 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1106 ; X32-NEXT: movups (%eax), %xmm0 1107 ; X32-NEXT: retl 1108 ; 1109 ; X64-LABEL: test_mm_loadu_ps: 1110 ; X64: # BB#0: 1111 ; X64-NEXT: movups (%rdi), %xmm0 1112 ; X64-NEXT: retq 1113 %arg0 = bitcast float* %a0 to <4 x float>* 1114 %res = load <4 x float>, <4 x float>* %arg0, align 1 1115 ret <4 x float> %res 1116 } 1117 1118 define <4 x float> @test_mm_max_ps(<4 x float> %a0, <4 x float> %a1) { 1119 ; X32-LABEL: test_mm_max_ps: 1120 ; X32: # BB#0: 1121 ; X32-NEXT: maxps %xmm1, %xmm0 1122 ; X32-NEXT: retl 1123 ; 1124 ; X64-LABEL: test_mm_max_ps: 1125 ; X64: # BB#0: 1126 ; X64-NEXT: maxps %xmm1, %xmm0 1127 ; X64-NEXT: retq 1128 %res = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %a0, <4 x float> %a1) 1129 ret <4 x float> %res 1130 } 1131 declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone 1132 1133 define <4 x float> @test_mm_max_ss(<4 x float> %a0, <4 x float> %a1) { 1134 ; X32-LABEL: test_mm_max_ss: 1135 ; X32: # BB#0: 1136 ; X32-NEXT: maxss %xmm1, %xmm0 1137 ; X32-NEXT: retl 1138 ; 1139 ; X64-LABEL: test_mm_max_ss: 1140 ; X64: # BB#0: 1141 ; X64-NEXT: maxss %xmm1, %xmm0 1142 ; X64-NEXT: retq 1143 %res = call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a0, <4 x float> %a1) 1144 ret <4 x float> %res 1145 } 1146 declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone 1147 1148 define <4 x float> @test_mm_min_ps(<4 x float> %a0, <4 x float> %a1) { 1149 ; X32-LABEL: test_mm_min_ps: 1150 ; X32: # BB#0: 1151 ; X32-NEXT: minps %xmm1, %xmm0 1152 ; X32-NEXT: retl 1153 ; 1154 ; X64-LABEL: test_mm_min_ps: 1155 ; X64: # BB#0: 1156 ; X64-NEXT: minps %xmm1, %xmm0 1157 ; X64-NEXT: retq 1158 %res = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %a0, <4 x float> %a1) 1159 ret <4 x float> %res 1160 } 1161 declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone 1162 1163 define <4 x float> @test_mm_min_ss(<4 x float> %a0, <4 x float> %a1) { 1164 ; X32-LABEL: test_mm_min_ss: 1165 ; X32: # BB#0: 1166 ; X32-NEXT: minss %xmm1, %xmm0 1167 ; X32-NEXT: retl 1168 ; 1169 ; X64-LABEL: test_mm_min_ss: 1170 ; X64: # BB#0: 1171 ; X64-NEXT: minss %xmm1, %xmm0 1172 ; X64-NEXT: retq 1173 %res = call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a0, <4 x float> %a1) 1174 ret <4 x float> %res 1175 } 1176 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone 1177 1178 define <4 x float> @test_mm_move_ss(<4 x float> %a0, <4 x float> %a1) { 1179 ; X32-LABEL: test_mm_move_ss: 1180 ; X32: # BB#0: 1181 ; X32-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1182 ; X32-NEXT: retl 1183 ; 1184 ; X64-LABEL: test_mm_move_ss: 1185 ; X64: # BB#0: 1186 ; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1187 ; X64-NEXT: retq 1188 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 1189 ret <4 x float> %res 1190 } 1191 1192 define <4 x float> @test_mm_movehl_ps(<4 x float> %a0, <4 x float> %a1) { 1193 ; X32-LABEL: test_mm_movehl_ps: 1194 ; X32: # BB#0: 1195 ; X32-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] 1196 ; X32-NEXT: retl 1197 ; 1198 ; X64-LABEL: test_mm_movehl_ps: 1199 ; X64: # BB#0: 1200 ; X64-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] 1201 ; X64-NEXT: retq 1202 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 1203 ret <4 x float> %res 1204 } 1205 1206 define <4 x float> @test_mm_movelh_ps(<4 x float> %a0, <4 x float> %a1) { 1207 ; X32-LABEL: test_mm_movelh_ps: 1208 ; X32: # BB#0: 1209 ; X32-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1210 ; X32-NEXT: retl 1211 ; 1212 ; X64-LABEL: test_mm_movelh_ps: 1213 ; X64: # BB#0: 1214 ; X64-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1215 ; X64-NEXT: retq 1216 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1217 ret <4 x float> %res 1218 } 1219 1220 define i32 @test_mm_movemask_ps(<4 x float> %a0) nounwind { 1221 ; X32-LABEL: test_mm_movemask_ps: 1222 ; X32: # BB#0: 1223 ; X32-NEXT: movmskps %xmm0, %eax 1224 ; X32-NEXT: retl 1225 ; 1226 ; X64-LABEL: test_mm_movemask_ps: 1227 ; X64: # BB#0: 1228 ; X64-NEXT: movmskps %xmm0, %eax 1229 ; X64-NEXT: retq 1230 %res = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0) 1231 ret i32 %res 1232 } 1233 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone 1234 1235 define <4 x float> @test_mm_mul_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 1236 ; X32-LABEL: test_mm_mul_ps: 1237 ; X32: # BB#0: 1238 ; X32-NEXT: mulps %xmm1, %xmm0 1239 ; X32-NEXT: retl 1240 ; 1241 ; X64-LABEL: test_mm_mul_ps: 1242 ; X64: # BB#0: 1243 ; X64-NEXT: mulps %xmm1, %xmm0 1244 ; X64-NEXT: retq 1245 %res = fmul <4 x float> %a0, %a1 1246 ret <4 x float> %res 1247 } 1248 1249 define <4 x float> @test_mm_mul_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 1250 ; X32-LABEL: test_mm_mul_ss: 1251 ; X32: # BB#0: 1252 ; X32-NEXT: mulss %xmm1, %xmm0 1253 ; X32-NEXT: retl 1254 ; 1255 ; X64-LABEL: test_mm_mul_ss: 1256 ; X64: # BB#0: 1257 ; X64-NEXT: mulss %xmm1, %xmm0 1258 ; X64-NEXT: retq 1259 %ext0 = extractelement <4 x float> %a0, i32 0 1260 %ext1 = extractelement <4 x float> %a1, i32 0 1261 %fmul = fmul float %ext0, %ext1 1262 %res = insertelement <4 x float> %a0, float %fmul, i32 0 1263 ret <4 x float> %res 1264 } 1265 1266 define <4 x float> @test_mm_or_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 1267 ; X32-LABEL: test_mm_or_ps: 1268 ; X32: # BB#0: 1269 ; X32-NEXT: pushl %ebp 1270 ; X32-NEXT: movl %esp, %ebp 1271 ; X32-NEXT: pushl %esi 1272 ; X32-NEXT: andl $-16, %esp 1273 ; X32-NEXT: subl $64, %esp 1274 ; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) 1275 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1276 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1277 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1278 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 1279 ; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) 1280 ; X32-NEXT: orl {{[0-9]+}}(%esp), %esi 1281 ; X32-NEXT: movl %esi, (%esp) 1282 ; X32-NEXT: orl {{[0-9]+}}(%esp), %edx 1283 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 1284 ; X32-NEXT: orl {{[0-9]+}}(%esp), %eax 1285 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) 1286 ; X32-NEXT: orl {{[0-9]+}}(%esp), %ecx 1287 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) 1288 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1289 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1290 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1291 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 1292 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1293 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1294 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1295 ; X32-NEXT: leal -4(%ebp), %esp 1296 ; X32-NEXT: popl %esi 1297 ; X32-NEXT: popl %ebp 1298 ; X32-NEXT: retl 1299 ; 1300 ; X64-LABEL: test_mm_or_ps: 1301 ; X64: # BB#0: 1302 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1303 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1304 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 1305 ; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1306 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx 1307 ; X64-NEXT: movq %rdx, %rsi 1308 ; X64-NEXT: orl %eax, %edx 1309 ; X64-NEXT: shrq $32, %rax 1310 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 1311 ; X64-NEXT: movq %rcx, %rdi 1312 ; X64-NEXT: orl %r8d, %ecx 1313 ; X64-NEXT: shrq $32, %r8 1314 ; X64-NEXT: shrq $32, %rsi 1315 ; X64-NEXT: shrq $32, %rdi 1316 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) 1317 ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) 1318 ; X64-NEXT: orl %r8d, %edi 1319 ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) 1320 ; X64-NEXT: orl %eax, %esi 1321 ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp) 1322 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1323 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1324 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1325 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1326 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 1327 ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1328 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1329 ; X64-NEXT: retq 1330 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 1331 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 1332 %res = or <4 x i32> %arg0, %arg1 1333 %bc = bitcast <4 x i32> %res to <4 x float> 1334 ret <4 x float> %bc 1335 } 1336 1337 define void @test_mm_prefetch(i8* %a0) { 1338 ; X32-LABEL: test_mm_prefetch: 1339 ; X32: # BB#0: 1340 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1341 ; X32-NEXT: prefetchnta (%eax) 1342 ; X32-NEXT: retl 1343 ; 1344 ; X64-LABEL: test_mm_prefetch: 1345 ; X64: # BB#0: 1346 ; X64-NEXT: prefetchnta (%rdi) 1347 ; X64-NEXT: retq 1348 call void @llvm.prefetch(i8* %a0, i32 0, i32 0, i32 1) 1349 ret void 1350 } 1351 declare void @llvm.prefetch(i8* nocapture, i32, i32, i32) nounwind readnone 1352 1353 define <4 x float> @test_mm_rcp_ps(<4 x float> %a0) { 1354 ; X32-LABEL: test_mm_rcp_ps: 1355 ; X32: # BB#0: 1356 ; X32-NEXT: rcpps %xmm0, %xmm0 1357 ; X32-NEXT: retl 1358 ; 1359 ; X64-LABEL: test_mm_rcp_ps: 1360 ; X64: # BB#0: 1361 ; X64-NEXT: rcpps %xmm0, %xmm0 1362 ; X64-NEXT: retq 1363 %res = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %a0) 1364 ret <4 x float> %res 1365 } 1366 declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone 1367 1368 define <4 x float> @test_mm_rcp_ss(<4 x float> %a0) { 1369 ; X32-LABEL: test_mm_rcp_ss: 1370 ; X32: # BB#0: 1371 ; X32-NEXT: rcpss %xmm0, %xmm0 1372 ; X32-NEXT: retl 1373 ; 1374 ; X64-LABEL: test_mm_rcp_ss: 1375 ; X64: # BB#0: 1376 ; X64-NEXT: rcpss %xmm0, %xmm0 1377 ; X64-NEXT: retq 1378 %rcp = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %a0) 1379 %ext0 = extractelement <4 x float> %rcp, i32 0 1380 %ins0 = insertelement <4 x float> undef, float %ext0, i32 0 1381 %ext1 = extractelement <4 x float> %a0, i32 1 1382 %ins1 = insertelement <4 x float> %ins0, float %ext1, i32 1 1383 %ext2 = extractelement <4 x float> %a0, i32 2 1384 %ins2 = insertelement <4 x float> %ins1, float %ext2, i32 2 1385 %ext3 = extractelement <4 x float> %a0, i32 3 1386 %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3 1387 ret <4 x float> %ins3 1388 } 1389 declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone 1390 1391 define <4 x float> @test_mm_rsqrt_ps(<4 x float> %a0) { 1392 ; X32-LABEL: test_mm_rsqrt_ps: 1393 ; X32: # BB#0: 1394 ; X32-NEXT: rsqrtps %xmm0, %xmm0 1395 ; X32-NEXT: retl 1396 ; 1397 ; X64-LABEL: test_mm_rsqrt_ps: 1398 ; X64: # BB#0: 1399 ; X64-NEXT: rsqrtps %xmm0, %xmm0 1400 ; X64-NEXT: retq 1401 %res = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %a0) 1402 ret <4 x float> %res 1403 } 1404 declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone 1405 1406 define <4 x float> @test_mm_rsqrt_ss(<4 x float> %a0) { 1407 ; X32-LABEL: test_mm_rsqrt_ss: 1408 ; X32: # BB#0: 1409 ; X32-NEXT: rsqrtss %xmm0, %xmm0 1410 ; X32-NEXT: retl 1411 ; 1412 ; X64-LABEL: test_mm_rsqrt_ss: 1413 ; X64: # BB#0: 1414 ; X64-NEXT: rsqrtss %xmm0, %xmm0 1415 ; X64-NEXT: retq 1416 %rsqrt = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %a0) 1417 %ext0 = extractelement <4 x float> %rsqrt, i32 0 1418 %ins0 = insertelement <4 x float> undef, float %ext0, i32 0 1419 %ext1 = extractelement <4 x float> %a0, i32 1 1420 %ins1 = insertelement <4 x float> %ins0, float %ext1, i32 1 1421 %ext2 = extractelement <4 x float> %a0, i32 2 1422 %ins2 = insertelement <4 x float> %ins1, float %ext2, i32 2 1423 %ext3 = extractelement <4 x float> %a0, i32 3 1424 %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3 1425 ret <4 x float> %ins3 1426 } 1427 declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone 1428 1429 define void @test_MM_SET_EXCEPTION_MASK(i32 %a0) nounwind { 1430 ; X32-LABEL: test_MM_SET_EXCEPTION_MASK: 1431 ; X32: # BB#0: 1432 ; X32-NEXT: pushl %eax 1433 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1434 ; X32-NEXT: leal (%esp), %ecx 1435 ; X32-NEXT: stmxcsr (%ecx) 1436 ; X32-NEXT: movl (%esp), %edx 1437 ; X32-NEXT: andl $-8065, %edx # imm = 0xE07F 1438 ; X32-NEXT: orl %eax, %edx 1439 ; X32-NEXT: movl %edx, (%esp) 1440 ; X32-NEXT: ldmxcsr (%ecx) 1441 ; X32-NEXT: popl %eax 1442 ; X32-NEXT: retl 1443 ; 1444 ; X64-LABEL: test_MM_SET_EXCEPTION_MASK: 1445 ; X64: # BB#0: 1446 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax 1447 ; X64-NEXT: stmxcsr (%rax) 1448 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %ecx 1449 ; X64-NEXT: andl $-8065, %ecx # imm = 0xE07F 1450 ; X64-NEXT: orl %edi, %ecx 1451 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) 1452 ; X64-NEXT: ldmxcsr (%rax) 1453 ; X64-NEXT: retq 1454 %1 = alloca i32, align 4 1455 %2 = bitcast i32* %1 to i8* 1456 call void @llvm.x86.sse.stmxcsr(i8* %2) 1457 %3 = load i32, i32* %1 1458 %4 = and i32 %3, -8065 1459 %5 = or i32 %4, %a0 1460 store i32 %5, i32* %1 1461 call void @llvm.x86.sse.ldmxcsr(i8* %2) 1462 ret void 1463 } 1464 declare void @llvm.x86.sse.ldmxcsr(i8*) nounwind readnone 1465 1466 define void @test_MM_SET_EXCEPTION_STATE(i32 %a0) nounwind { 1467 ; X32-LABEL: test_MM_SET_EXCEPTION_STATE: 1468 ; X32: # BB#0: 1469 ; X32-NEXT: pushl %eax 1470 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1471 ; X32-NEXT: leal (%esp), %ecx 1472 ; X32-NEXT: stmxcsr (%ecx) 1473 ; X32-NEXT: movl (%esp), %edx 1474 ; X32-NEXT: andl $-64, %edx 1475 ; X32-NEXT: orl %eax, %edx 1476 ; X32-NEXT: movl %edx, (%esp) 1477 ; X32-NEXT: ldmxcsr (%ecx) 1478 ; X32-NEXT: popl %eax 1479 ; X32-NEXT: retl 1480 ; 1481 ; X64-LABEL: test_MM_SET_EXCEPTION_STATE: 1482 ; X64: # BB#0: 1483 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax 1484 ; X64-NEXT: stmxcsr (%rax) 1485 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %ecx 1486 ; X64-NEXT: andl $-64, %ecx 1487 ; X64-NEXT: orl %edi, %ecx 1488 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) 1489 ; X64-NEXT: ldmxcsr (%rax) 1490 ; X64-NEXT: retq 1491 %1 = alloca i32, align 4 1492 %2 = bitcast i32* %1 to i8* 1493 call void @llvm.x86.sse.stmxcsr(i8* %2) 1494 %3 = load i32, i32* %1 1495 %4 = and i32 %3, -64 1496 %5 = or i32 %4, %a0 1497 store i32 %5, i32* %1 1498 call void @llvm.x86.sse.ldmxcsr(i8* %2) 1499 ret void 1500 } 1501 1502 define void @test_MM_SET_FLUSH_ZERO_MODE(i32 %a0) nounwind { 1503 ; X32-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1504 ; X32: # BB#0: 1505 ; X32-NEXT: pushl %eax 1506 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1507 ; X32-NEXT: leal (%esp), %ecx 1508 ; X32-NEXT: stmxcsr (%ecx) 1509 ; X32-NEXT: movl (%esp), %edx 1510 ; X32-NEXT: andl $-32769, %edx # imm = 0xFFFF7FFF 1511 ; X32-NEXT: orl %eax, %edx 1512 ; X32-NEXT: movl %edx, (%esp) 1513 ; X32-NEXT: ldmxcsr (%ecx) 1514 ; X32-NEXT: popl %eax 1515 ; X32-NEXT: retl 1516 ; 1517 ; X64-LABEL: test_MM_SET_FLUSH_ZERO_MODE: 1518 ; X64: # BB#0: 1519 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax 1520 ; X64-NEXT: stmxcsr (%rax) 1521 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %ecx 1522 ; X64-NEXT: andl $-32769, %ecx # imm = 0xFFFF7FFF 1523 ; X64-NEXT: orl %edi, %ecx 1524 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) 1525 ; X64-NEXT: ldmxcsr (%rax) 1526 ; X64-NEXT: retq 1527 %1 = alloca i32, align 4 1528 %2 = bitcast i32* %1 to i8* 1529 call void @llvm.x86.sse.stmxcsr(i8* %2) 1530 %3 = load i32, i32* %1 1531 %4 = and i32 %3, -32769 1532 %5 = or i32 %4, %a0 1533 store i32 %5, i32* %1 1534 call void @llvm.x86.sse.ldmxcsr(i8* %2) 1535 ret void 1536 } 1537 1538 define <4 x float> @test_mm_set_ps(float %a0, float %a1, float %a2, float %a3) nounwind { 1539 ; X32-LABEL: test_mm_set_ps: 1540 ; X32: # BB#0: 1541 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1542 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1543 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 1544 ; X32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero 1545 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1546 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1547 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1548 ; X32-NEXT: retl 1549 ; 1550 ; X64-LABEL: test_mm_set_ps: 1551 ; X64: # BB#0: 1552 ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1553 ; X64-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 1554 ; X64-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 1555 ; X64-NEXT: movaps %xmm3, %xmm0 1556 ; X64-NEXT: retq 1557 %res0 = insertelement <4 x float> undef, float %a3, i32 0 1558 %res1 = insertelement <4 x float> %res0, float %a2, i32 1 1559 %res2 = insertelement <4 x float> %res1, float %a1, i32 2 1560 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 1561 ret <4 x float> %res3 1562 } 1563 1564 define <4 x float> @test_mm_set_ps1(float %a0) nounwind { 1565 ; X32-LABEL: test_mm_set_ps1: 1566 ; X32: # BB#0: 1567 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1568 ; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 1569 ; X32-NEXT: retl 1570 ; 1571 ; X64-LABEL: test_mm_set_ps1: 1572 ; X64: # BB#0: 1573 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 1574 ; X64-NEXT: retq 1575 %res0 = insertelement <4 x float> undef, float %a0, i32 0 1576 %res1 = insertelement <4 x float> %res0, float %a0, i32 1 1577 %res2 = insertelement <4 x float> %res1, float %a0, i32 2 1578 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 1579 ret <4 x float> %res3 1580 } 1581 1582 define void @test_MM_SET_ROUNDING_MODE(i32 %a0) nounwind { 1583 ; X32-LABEL: test_MM_SET_ROUNDING_MODE: 1584 ; X32: # BB#0: 1585 ; X32-NEXT: pushl %eax 1586 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1587 ; X32-NEXT: leal (%esp), %ecx 1588 ; X32-NEXT: stmxcsr (%ecx) 1589 ; X32-NEXT: movl (%esp), %edx 1590 ; X32-NEXT: andl $-24577, %edx # imm = 0x9FFF 1591 ; X32-NEXT: orl %eax, %edx 1592 ; X32-NEXT: movl %edx, (%esp) 1593 ; X32-NEXT: ldmxcsr (%ecx) 1594 ; X32-NEXT: popl %eax 1595 ; X32-NEXT: retl 1596 ; 1597 ; X64-LABEL: test_MM_SET_ROUNDING_MODE: 1598 ; X64: # BB#0: 1599 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax 1600 ; X64-NEXT: stmxcsr (%rax) 1601 ; X64-NEXT: movl -{{[0-9]+}}(%rsp), %ecx 1602 ; X64-NEXT: andl $-24577, %ecx # imm = 0x9FFF 1603 ; X64-NEXT: orl %edi, %ecx 1604 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) 1605 ; X64-NEXT: ldmxcsr (%rax) 1606 ; X64-NEXT: retq 1607 %1 = alloca i32, align 4 1608 %2 = bitcast i32* %1 to i8* 1609 call void @llvm.x86.sse.stmxcsr(i8* %2) 1610 %3 = load i32, i32* %1 1611 %4 = and i32 %3, -24577 1612 %5 = or i32 %4, %a0 1613 store i32 %5, i32* %1 1614 call void @llvm.x86.sse.ldmxcsr(i8* %2) 1615 ret void 1616 } 1617 1618 define <4 x float> @test_mm_set_ss(float %a0) nounwind { 1619 ; X32-LABEL: test_mm_set_ss: 1620 ; X32: # BB#0: 1621 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1622 ; X32-NEXT: xorps %xmm0, %xmm0 1623 ; X32-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 1624 ; X32-NEXT: retl 1625 ; 1626 ; X64-LABEL: test_mm_set_ss: 1627 ; X64: # BB#0: 1628 ; X64-NEXT: xorps %xmm1, %xmm1 1629 ; X64-NEXT: movss {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3] 1630 ; X64-NEXT: movaps %xmm1, %xmm0 1631 ; X64-NEXT: retq 1632 %res0 = insertelement <4 x float> undef, float %a0, i32 0 1633 %res1 = insertelement <4 x float> %res0, float 0.0, i32 1 1634 %res2 = insertelement <4 x float> %res1, float 0.0, i32 2 1635 %res3 = insertelement <4 x float> %res2, float 0.0, i32 3 1636 ret <4 x float> %res3 1637 } 1638 1639 define <4 x float> @test_mm_set1_ps(float %a0) nounwind { 1640 ; X32-LABEL: test_mm_set1_ps: 1641 ; X32: # BB#0: 1642 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1643 ; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 1644 ; X32-NEXT: retl 1645 ; 1646 ; X64-LABEL: test_mm_set1_ps: 1647 ; X64: # BB#0: 1648 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 1649 ; X64-NEXT: retq 1650 %res0 = insertelement <4 x float> undef, float %a0, i32 0 1651 %res1 = insertelement <4 x float> %res0, float %a0, i32 1 1652 %res2 = insertelement <4 x float> %res1, float %a0, i32 2 1653 %res3 = insertelement <4 x float> %res2, float %a0, i32 3 1654 ret <4 x float> %res3 1655 } 1656 1657 define void @test_mm_setcsr(i32 %a0) nounwind { 1658 ; X32-LABEL: test_mm_setcsr: 1659 ; X32: # BB#0: 1660 ; X32-NEXT: pushl %eax 1661 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1662 ; X32-NEXT: leal (%esp), %ecx 1663 ; X32-NEXT: movl %eax, (%esp) 1664 ; X32-NEXT: ldmxcsr (%ecx) 1665 ; X32-NEXT: popl %eax 1666 ; X32-NEXT: retl 1667 ; 1668 ; X64-LABEL: test_mm_setcsr: 1669 ; X64: # BB#0: 1670 ; X64-NEXT: leaq -{{[0-9]+}}(%rsp), %rax 1671 ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) 1672 ; X64-NEXT: ldmxcsr (%rax) 1673 ; X64-NEXT: retq 1674 %st = alloca i32, align 4 1675 store i32 %a0, i32* %st, align 4 1676 %bc = bitcast i32* %st to i8* 1677 call void @llvm.x86.sse.ldmxcsr(i8* %bc) 1678 ret void 1679 } 1680 1681 define <4 x float> @test_mm_setr_ps(float %a0, float %a1, float %a2, float %a3) nounwind { 1682 ; X32-LABEL: test_mm_setr_ps: 1683 ; X32: # BB#0: 1684 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1685 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 1686 ; X32-NEXT: movss {{.*#+}} xmm3 = mem[0],zero,zero,zero 1687 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1688 ; X32-NEXT: unpcklps {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1] 1689 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1690 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1691 ; X32-NEXT: retl 1692 ; 1693 ; X64-LABEL: test_mm_setr_ps: 1694 ; X64: # BB#0: 1695 ; X64-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1696 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1697 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1698 ; X64-NEXT: retq 1699 %res0 = insertelement <4 x float> undef, float %a0, i32 0 1700 %res1 = insertelement <4 x float> %res0, float %a1, i32 1 1701 %res2 = insertelement <4 x float> %res1, float %a2, i32 2 1702 %res3 = insertelement <4 x float> %res2, float %a3, i32 3 1703 ret <4 x float> %res3 1704 } 1705 1706 define <4 x float> @test_mm_setzero_ps() { 1707 ; X32-LABEL: test_mm_setzero_ps: 1708 ; X32: # BB#0: 1709 ; X32-NEXT: xorps %xmm0, %xmm0 1710 ; X32-NEXT: retl 1711 ; 1712 ; X64-LABEL: test_mm_setzero_ps: 1713 ; X64: # BB#0: 1714 ; X64-NEXT: xorps %xmm0, %xmm0 1715 ; X64-NEXT: retq 1716 ret <4 x float> zeroinitializer 1717 } 1718 1719 define void @test_mm_sfence() nounwind { 1720 ; X32-LABEL: test_mm_sfence: 1721 ; X32: # BB#0: 1722 ; X32-NEXT: sfence 1723 ; X32-NEXT: retl 1724 ; 1725 ; X64-LABEL: test_mm_sfence: 1726 ; X64: # BB#0: 1727 ; X64-NEXT: sfence 1728 ; X64-NEXT: retq 1729 call void @llvm.x86.sse.sfence() 1730 ret void 1731 } 1732 declare void @llvm.x86.sse.sfence() nounwind readnone 1733 1734 define <4 x float> @test_mm_shuffle_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 1735 ; X32-LABEL: test_mm_shuffle_ps: 1736 ; X32: # BB#0: 1737 ; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 1738 ; X32-NEXT: retl 1739 ; 1740 ; X64-LABEL: test_mm_shuffle_ps: 1741 ; X64: # BB#0: 1742 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0] 1743 ; X64-NEXT: retq 1744 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 0, i32 4, i32 4> 1745 ret <4 x float> %res 1746 } 1747 1748 define <4 x float> @test_mm_sqrt_ps(<4 x float> %a0) { 1749 ; X32-LABEL: test_mm_sqrt_ps: 1750 ; X32: # BB#0: 1751 ; X32-NEXT: sqrtps %xmm0, %xmm0 1752 ; X32-NEXT: retl 1753 ; 1754 ; X64-LABEL: test_mm_sqrt_ps: 1755 ; X64: # BB#0: 1756 ; X64-NEXT: sqrtps %xmm0, %xmm0 1757 ; X64-NEXT: retq 1758 %res = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %a0) 1759 ret <4 x float> %res 1760 } 1761 declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone 1762 1763 define <4 x float> @test_mm_sqrt_ss(<4 x float> %a0) { 1764 ; X32-LABEL: test_mm_sqrt_ss: 1765 ; X32: # BB#0: 1766 ; X32-NEXT: sqrtss %xmm0, %xmm0 1767 ; X32-NEXT: retl 1768 ; 1769 ; X64-LABEL: test_mm_sqrt_ss: 1770 ; X64: # BB#0: 1771 ; X64-NEXT: sqrtss %xmm0, %xmm0 1772 ; X64-NEXT: retq 1773 %sqrt = call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %a0) 1774 %ext0 = extractelement <4 x float> %sqrt, i32 0 1775 %ins0 = insertelement <4 x float> undef, float %ext0, i32 0 1776 %ext1 = extractelement <4 x float> %a0, i32 1 1777 %ins1 = insertelement <4 x float> %ins0, float %ext1, i32 1 1778 %ext2 = extractelement <4 x float> %a0, i32 2 1779 %ins2 = insertelement <4 x float> %ins1, float %ext2, i32 2 1780 %ext3 = extractelement <4 x float> %a0, i32 3 1781 %ins3 = insertelement <4 x float> %ins2, float %ext3, i32 3 1782 ret <4 x float> %ins3 1783 } 1784 declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone 1785 1786 define void @test_mm_store_ps(float *%a0, <4 x float> %a1) { 1787 ; X32-LABEL: test_mm_store_ps: 1788 ; X32: # BB#0: 1789 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1790 ; X32-NEXT: movaps %xmm0, (%eax) 1791 ; X32-NEXT: retl 1792 ; 1793 ; X64-LABEL: test_mm_store_ps: 1794 ; X64: # BB#0: 1795 ; X64-NEXT: movaps %xmm0, (%rdi) 1796 ; X64-NEXT: retq 1797 %arg0 = bitcast float* %a0 to <4 x float>* 1798 store <4 x float> %a1, <4 x float>* %arg0, align 16 1799 ret void 1800 } 1801 1802 define void @test_mm_store_ps1(float *%a0, <4 x float> %a1) { 1803 ; X32-LABEL: test_mm_store_ps1: 1804 ; X32: # BB#0: 1805 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1806 ; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 1807 ; X32-NEXT: movaps %xmm0, (%eax) 1808 ; X32-NEXT: retl 1809 ; 1810 ; X64-LABEL: test_mm_store_ps1: 1811 ; X64: # BB#0: 1812 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 1813 ; X64-NEXT: movaps %xmm0, (%rdi) 1814 ; X64-NEXT: retq 1815 %arg0 = bitcast float* %a0 to <4 x float>* 1816 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer 1817 store <4 x float> %shuf, <4 x float>* %arg0, align 16 1818 ret void 1819 } 1820 1821 define void @test_mm_store_ss(float *%a0, <4 x float> %a1) { 1822 ; X32-LABEL: test_mm_store_ss: 1823 ; X32: # BB#0: 1824 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1825 ; X32-NEXT: movss %xmm0, (%eax) 1826 ; X32-NEXT: retl 1827 ; 1828 ; X64-LABEL: test_mm_store_ss: 1829 ; X64: # BB#0: 1830 ; X64-NEXT: movss %xmm0, (%rdi) 1831 ; X64-NEXT: retq 1832 %ext = extractelement <4 x float> %a1, i32 0 1833 store float %ext, float* %a0, align 1 1834 ret void 1835 } 1836 1837 define void @test_mm_store1_ps(float *%a0, <4 x float> %a1) { 1838 ; X32-LABEL: test_mm_store1_ps: 1839 ; X32: # BB#0: 1840 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1841 ; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 1842 ; X32-NEXT: movaps %xmm0, (%eax) 1843 ; X32-NEXT: retl 1844 ; 1845 ; X64-LABEL: test_mm_store1_ps: 1846 ; X64: # BB#0: 1847 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 1848 ; X64-NEXT: movaps %xmm0, (%rdi) 1849 ; X64-NEXT: retq 1850 %arg0 = bitcast float* %a0 to <4 x float>* 1851 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> zeroinitializer 1852 store <4 x float> %shuf, <4 x float>* %arg0, align 16 1853 ret void 1854 } 1855 1856 define void @test_mm_storeh_ps(x86_mmx *%a0, <4 x float> %a1) nounwind { 1857 ; X32-LABEL: test_mm_storeh_ps: 1858 ; X32: # BB#0: 1859 ; X32-NEXT: pushl %ebp 1860 ; X32-NEXT: movl %esp, %ebp 1861 ; X32-NEXT: andl $-16, %esp 1862 ; X32-NEXT: subl $32, %esp 1863 ; X32-NEXT: movl 8(%ebp), %eax 1864 ; X32-NEXT: movaps %xmm0, (%esp) 1865 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 1866 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1867 ; X32-NEXT: movl %edx, 4(%eax) 1868 ; X32-NEXT: movl %ecx, (%eax) 1869 ; X32-NEXT: movl %ebp, %esp 1870 ; X32-NEXT: popl %ebp 1871 ; X32-NEXT: retl 1872 ; 1873 ; X64-LABEL: test_mm_storeh_ps: 1874 ; X64: # BB#0: 1875 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1876 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1877 ; X64-NEXT: movq %rax, (%rdi) 1878 ; X64-NEXT: retq 1879 %ptr = bitcast x86_mmx* %a0 to i64* 1880 %bc = bitcast <4 x float> %a1 to <2 x i64> 1881 %ext = extractelement <2 x i64> %bc, i32 1 1882 store i64 %ext, i64* %ptr 1883 ret void 1884 } 1885 1886 define void @test_mm_storel_ps(x86_mmx *%a0, <4 x float> %a1) nounwind { 1887 ; X32-LABEL: test_mm_storel_ps: 1888 ; X32: # BB#0: 1889 ; X32-NEXT: pushl %ebp 1890 ; X32-NEXT: movl %esp, %ebp 1891 ; X32-NEXT: andl $-16, %esp 1892 ; X32-NEXT: subl $32, %esp 1893 ; X32-NEXT: movl 8(%ebp), %eax 1894 ; X32-NEXT: movaps %xmm0, (%esp) 1895 ; X32-NEXT: movl (%esp), %ecx 1896 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 1897 ; X32-NEXT: movl %edx, 4(%eax) 1898 ; X32-NEXT: movl %ecx, (%eax) 1899 ; X32-NEXT: movl %ebp, %esp 1900 ; X32-NEXT: popl %ebp 1901 ; X32-NEXT: retl 1902 ; 1903 ; X64-LABEL: test_mm_storel_ps: 1904 ; X64: # BB#0: 1905 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1906 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1907 ; X64-NEXT: movq %rax, (%rdi) 1908 ; X64-NEXT: retq 1909 %ptr = bitcast x86_mmx* %a0 to i64* 1910 %bc = bitcast <4 x float> %a1 to <2 x i64> 1911 %ext = extractelement <2 x i64> %bc, i32 0 1912 store i64 %ext, i64* %ptr 1913 ret void 1914 } 1915 1916 define void @test_mm_storer_ps(float *%a0, <4 x float> %a1) { 1917 ; X32-LABEL: test_mm_storer_ps: 1918 ; X32: # BB#0: 1919 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1920 ; X32-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1921 ; X32-NEXT: movaps %xmm0, (%eax) 1922 ; X32-NEXT: retl 1923 ; 1924 ; X64-LABEL: test_mm_storer_ps: 1925 ; X64: # BB#0: 1926 ; X64-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,2,1,0] 1927 ; X64-NEXT: movaps %xmm0, (%rdi) 1928 ; X64-NEXT: retq 1929 %arg0 = bitcast float* %a0 to <4 x float>* 1930 %shuf = shufflevector <4 x float> %a1, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0> 1931 store <4 x float> %shuf, <4 x float>* %arg0, align 16 1932 ret void 1933 } 1934 1935 define void @test_mm_storeu_ps(float *%a0, <4 x float> %a1) { 1936 ; X32-LABEL: test_mm_storeu_ps: 1937 ; X32: # BB#0: 1938 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1939 ; X32-NEXT: movups %xmm0, (%eax) 1940 ; X32-NEXT: retl 1941 ; 1942 ; X64-LABEL: test_mm_storeu_ps: 1943 ; X64: # BB#0: 1944 ; X64-NEXT: movups %xmm0, (%rdi) 1945 ; X64-NEXT: retq 1946 %arg0 = bitcast float* %a0 to <4 x float>* 1947 store <4 x float> %a1, <4 x float>* %arg0, align 1 1948 ret void 1949 } 1950 1951 define void @test_mm_stream_ps(float *%a0, <4 x float> %a1) { 1952 ; X32-LABEL: test_mm_stream_ps: 1953 ; X32: # BB#0: 1954 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 1955 ; X32-NEXT: movntps %xmm0, (%eax) 1956 ; X32-NEXT: retl 1957 ; 1958 ; X64-LABEL: test_mm_stream_ps: 1959 ; X64: # BB#0: 1960 ; X64-NEXT: movntps %xmm0, (%rdi) 1961 ; X64-NEXT: retq 1962 %arg0 = bitcast float* %a0 to <4 x float>* 1963 store <4 x float> %a1, <4 x float>* %arg0, align 16, !nontemporal !0 1964 ret void 1965 } 1966 1967 define <4 x float> @test_mm_sub_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 1968 ; X32-LABEL: test_mm_sub_ps: 1969 ; X32: # BB#0: 1970 ; X32-NEXT: subps %xmm1, %xmm0 1971 ; X32-NEXT: retl 1972 ; 1973 ; X64-LABEL: test_mm_sub_ps: 1974 ; X64: # BB#0: 1975 ; X64-NEXT: subps %xmm1, %xmm0 1976 ; X64-NEXT: retq 1977 %res = fsub <4 x float> %a0, %a1 1978 ret <4 x float> %res 1979 } 1980 1981 define <4 x float> @test_mm_sub_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 1982 ; X32-LABEL: test_mm_sub_ss: 1983 ; X32: # BB#0: 1984 ; X32-NEXT: subss %xmm1, %xmm0 1985 ; X32-NEXT: retl 1986 ; 1987 ; X64-LABEL: test_mm_sub_ss: 1988 ; X64: # BB#0: 1989 ; X64-NEXT: subss %xmm1, %xmm0 1990 ; X64-NEXT: retq 1991 %ext0 = extractelement <4 x float> %a0, i32 0 1992 %ext1 = extractelement <4 x float> %a1, i32 0 1993 %fsub = fsub float %ext0, %ext1 1994 %res = insertelement <4 x float> %a0, float %fsub, i32 0 1995 ret <4 x float> %res 1996 } 1997 1998 define void @test_MM_TRANSPOSE4_PS(<4 x float>* %a0, <4 x float>* %a1, <4 x float>* %a2, <4 x float>* %a3) nounwind { 1999 ; X32-LABEL: test_MM_TRANSPOSE4_PS: 2000 ; X32: # BB#0: 2001 ; X32-NEXT: pushl %esi 2002 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 2003 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 2004 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 2005 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 2006 ; X32-NEXT: movaps (%esi), %xmm0 2007 ; X32-NEXT: movaps (%edx), %xmm1 2008 ; X32-NEXT: movaps (%ecx), %xmm2 2009 ; X32-NEXT: movaps (%eax), %xmm3 2010 ; X32-NEXT: movaps %xmm0, %xmm4 2011 ; X32-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 2012 ; X32-NEXT: movaps %xmm2, %xmm5 2013 ; X32-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] 2014 ; X32-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2015 ; X32-NEXT: unpckhps {{.*#+}} xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 2016 ; X32-NEXT: movaps %xmm4, %xmm1 2017 ; X32-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm5[0] 2018 ; X32-NEXT: movhlps {{.*#+}} xmm5 = xmm4[1],xmm5[1] 2019 ; X32-NEXT: movaps %xmm0, %xmm3 2020 ; X32-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm2[0] 2021 ; X32-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] 2022 ; X32-NEXT: movaps %xmm1, (%esi) 2023 ; X32-NEXT: movaps %xmm5, (%edx) 2024 ; X32-NEXT: movaps %xmm3, (%ecx) 2025 ; X32-NEXT: movaps %xmm2, (%eax) 2026 ; X32-NEXT: popl %esi 2027 ; X32-NEXT: retl 2028 ; 2029 ; X64-LABEL: test_MM_TRANSPOSE4_PS: 2030 ; X64: # BB#0: 2031 ; X64-NEXT: movaps (%rdi), %xmm0 2032 ; X64-NEXT: movaps (%rsi), %xmm1 2033 ; X64-NEXT: movaps (%rdx), %xmm2 2034 ; X64-NEXT: movaps (%rcx), %xmm3 2035 ; X64-NEXT: movaps %xmm0, %xmm4 2036 ; X64-NEXT: unpcklps {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 2037 ; X64-NEXT: movaps %xmm2, %xmm5 2038 ; X64-NEXT: unpcklps {{.*#+}} xmm5 = xmm5[0],xmm3[0],xmm5[1],xmm3[1] 2039 ; X64-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2040 ; X64-NEXT: unpckhps {{.*#+}} xmm2 = xmm2[2],xmm3[2],xmm2[3],xmm3[3] 2041 ; X64-NEXT: movaps %xmm4, %xmm1 2042 ; X64-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm5[0] 2043 ; X64-NEXT: movhlps {{.*#+}} xmm5 = xmm4[1],xmm5[1] 2044 ; X64-NEXT: movaps %xmm0, %xmm3 2045 ; X64-NEXT: movlhps {{.*#+}} xmm3 = xmm3[0],xmm2[0] 2046 ; X64-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] 2047 ; X64-NEXT: movaps %xmm1, (%rdi) 2048 ; X64-NEXT: movaps %xmm5, (%rsi) 2049 ; X64-NEXT: movaps %xmm3, (%rdx) 2050 ; X64-NEXT: movaps %xmm2, (%rcx) 2051 ; X64-NEXT: retq 2052 %row0 = load <4 x float>, <4 x float>* %a0, align 16 2053 %row1 = load <4 x float>, <4 x float>* %a1, align 16 2054 %row2 = load <4 x float>, <4 x float>* %a2, align 16 2055 %row3 = load <4 x float>, <4 x float>* %a3, align 16 2056 %tmp0 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 2057 %tmp2 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 2058 %tmp1 = shufflevector <4 x float> %row0, <4 x float> %row1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 2059 %tmp3 = shufflevector <4 x float> %row2, <4 x float> %row3, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 2060 %res0 = shufflevector <4 x float> %tmp0, <4 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 2061 %res1 = shufflevector <4 x float> %tmp2, <4 x float> %tmp0, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 2062 %res2 = shufflevector <4 x float> %tmp1, <4 x float> %tmp3, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 2063 %res3 = shufflevector <4 x float> %tmp3, <4 x float> %tmp1, <4 x i32> <i32 6, i32 7, i32 2, i32 3> 2064 store <4 x float> %res0, <4 x float>* %a0, align 16 2065 store <4 x float> %res1, <4 x float>* %a1, align 16 2066 store <4 x float> %res2, <4 x float>* %a2, align 16 2067 store <4 x float> %res3, <4 x float>* %a3, align 16 2068 ret void 2069 } 2070 2071 define i32 @test_mm_ucomieq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 2072 ; X32-LABEL: test_mm_ucomieq_ss: 2073 ; X32: # BB#0: 2074 ; X32-NEXT: ucomiss %xmm1, %xmm0 2075 ; X32-NEXT: setnp %al 2076 ; X32-NEXT: sete %cl 2077 ; X32-NEXT: andb %al, %cl 2078 ; X32-NEXT: movzbl %cl, %eax 2079 ; X32-NEXT: retl 2080 ; 2081 ; X64-LABEL: test_mm_ucomieq_ss: 2082 ; X64: # BB#0: 2083 ; X64-NEXT: ucomiss %xmm1, %xmm0 2084 ; X64-NEXT: setnp %al 2085 ; X64-NEXT: sete %cl 2086 ; X64-NEXT: andb %al, %cl 2087 ; X64-NEXT: movzbl %cl, %eax 2088 ; X64-NEXT: retq 2089 %res = call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %a0, <4 x float> %a1) 2090 ret i32 %res 2091 } 2092 declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>) nounwind readnone 2093 2094 define i32 @test_mm_ucomige_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 2095 ; X32-LABEL: test_mm_ucomige_ss: 2096 ; X32: # BB#0: 2097 ; X32-NEXT: xorl %eax, %eax 2098 ; X32-NEXT: ucomiss %xmm1, %xmm0 2099 ; X32-NEXT: setae %al 2100 ; X32-NEXT: retl 2101 ; 2102 ; X64-LABEL: test_mm_ucomige_ss: 2103 ; X64: # BB#0: 2104 ; X64-NEXT: xorl %eax, %eax 2105 ; X64-NEXT: ucomiss %xmm1, %xmm0 2106 ; X64-NEXT: setae %al 2107 ; X64-NEXT: retq 2108 %res = call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %a0, <4 x float> %a1) 2109 ret i32 %res 2110 } 2111 declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>) nounwind readnone 2112 2113 define i32 @test_mm_ucomigt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 2114 ; X32-LABEL: test_mm_ucomigt_ss: 2115 ; X32: # BB#0: 2116 ; X32-NEXT: xorl %eax, %eax 2117 ; X32-NEXT: ucomiss %xmm1, %xmm0 2118 ; X32-NEXT: seta %al 2119 ; X32-NEXT: retl 2120 ; 2121 ; X64-LABEL: test_mm_ucomigt_ss: 2122 ; X64: # BB#0: 2123 ; X64-NEXT: xorl %eax, %eax 2124 ; X64-NEXT: ucomiss %xmm1, %xmm0 2125 ; X64-NEXT: seta %al 2126 ; X64-NEXT: retq 2127 %res = call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %a0, <4 x float> %a1) 2128 ret i32 %res 2129 } 2130 declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>) nounwind readnone 2131 2132 define i32 @test_mm_ucomile_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 2133 ; X32-LABEL: test_mm_ucomile_ss: 2134 ; X32: # BB#0: 2135 ; X32-NEXT: xorl %eax, %eax 2136 ; X32-NEXT: ucomiss %xmm0, %xmm1 2137 ; X32-NEXT: setae %al 2138 ; X32-NEXT: retl 2139 ; 2140 ; X64-LABEL: test_mm_ucomile_ss: 2141 ; X64: # BB#0: 2142 ; X64-NEXT: xorl %eax, %eax 2143 ; X64-NEXT: ucomiss %xmm0, %xmm1 2144 ; X64-NEXT: setae %al 2145 ; X64-NEXT: retq 2146 %res = call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %a0, <4 x float> %a1) 2147 ret i32 %res 2148 } 2149 declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>) nounwind readnone 2150 2151 define i32 @test_mm_ucomilt_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 2152 ; X32-LABEL: test_mm_ucomilt_ss: 2153 ; X32: # BB#0: 2154 ; X32-NEXT: xorl %eax, %eax 2155 ; X32-NEXT: ucomiss %xmm0, %xmm1 2156 ; X32-NEXT: seta %al 2157 ; X32-NEXT: retl 2158 ; 2159 ; X64-LABEL: test_mm_ucomilt_ss: 2160 ; X64: # BB#0: 2161 ; X64-NEXT: xorl %eax, %eax 2162 ; X64-NEXT: ucomiss %xmm0, %xmm1 2163 ; X64-NEXT: seta %al 2164 ; X64-NEXT: retq 2165 %res = call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %a0, <4 x float> %a1) 2166 ret i32 %res 2167 } 2168 declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>) nounwind readnone 2169 2170 define i32 @test_mm_ucomineq_ss(<4 x float> %a0, <4 x float> %a1) nounwind { 2171 ; X32-LABEL: test_mm_ucomineq_ss: 2172 ; X32: # BB#0: 2173 ; X32-NEXT: ucomiss %xmm1, %xmm0 2174 ; X32-NEXT: setp %al 2175 ; X32-NEXT: setne %cl 2176 ; X32-NEXT: orb %al, %cl 2177 ; X32-NEXT: movzbl %cl, %eax 2178 ; X32-NEXT: retl 2179 ; 2180 ; X64-LABEL: test_mm_ucomineq_ss: 2181 ; X64: # BB#0: 2182 ; X64-NEXT: ucomiss %xmm1, %xmm0 2183 ; X64-NEXT: setp %al 2184 ; X64-NEXT: setne %cl 2185 ; X64-NEXT: orb %al, %cl 2186 ; X64-NEXT: movzbl %cl, %eax 2187 ; X64-NEXT: retq 2188 %res = call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %a0, <4 x float> %a1) 2189 ret i32 %res 2190 } 2191 declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>) nounwind readnone 2192 2193 define <4 x float> @test_mm_undefined_ps() { 2194 ; X32-LABEL: test_mm_undefined_ps: 2195 ; X32: # BB#0: 2196 ; X32-NEXT: retl 2197 ; 2198 ; X64-LABEL: test_mm_undefined_ps: 2199 ; X64: # BB#0: 2200 ; X64-NEXT: retq 2201 ret <4 x float> undef 2202 } 2203 2204 define <4 x float> @test_mm_unpackhi_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 2205 ; X32-LABEL: test_mm_unpackhi_ps: 2206 ; X32: # BB#0: 2207 ; X32-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2208 ; X32-NEXT: retl 2209 ; 2210 ; X64-LABEL: test_mm_unpackhi_ps: 2211 ; X64: # BB#0: 2212 ; X64-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2213 ; X64-NEXT: retq 2214 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 2215 ret <4 x float> %res 2216 } 2217 2218 define <4 x float> @test_mm_unpacklo_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 2219 ; X32-LABEL: test_mm_unpacklo_ps: 2220 ; X32: # BB#0: 2221 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2222 ; X32-NEXT: retl 2223 ; 2224 ; X64-LABEL: test_mm_unpacklo_ps: 2225 ; X64: # BB#0: 2226 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2227 ; X64-NEXT: retq 2228 %res = shufflevector <4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 2229 ret <4 x float> %res 2230 } 2231 2232 define <4 x float> @test_mm_xor_ps(<4 x float> %a0, <4 x float> %a1) nounwind { 2233 ; X32-LABEL: test_mm_xor_ps: 2234 ; X32: # BB#0: 2235 ; X32-NEXT: pushl %ebp 2236 ; X32-NEXT: movl %esp, %ebp 2237 ; X32-NEXT: pushl %esi 2238 ; X32-NEXT: andl $-16, %esp 2239 ; X32-NEXT: subl $64, %esp 2240 ; X32-NEXT: movaps %xmm0, {{[0-9]+}}(%esp) 2241 ; X32-NEXT: movl {{[0-9]+}}(%esp), %eax 2242 ; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx 2243 ; X32-NEXT: movl {{[0-9]+}}(%esp), %edx 2244 ; X32-NEXT: movl {{[0-9]+}}(%esp), %esi 2245 ; X32-NEXT: movaps %xmm1, {{[0-9]+}}(%esp) 2246 ; X32-NEXT: xorl {{[0-9]+}}(%esp), %esi 2247 ; X32-NEXT: movl %esi, (%esp) 2248 ; X32-NEXT: xorl {{[0-9]+}}(%esp), %edx 2249 ; X32-NEXT: movl %edx, {{[0-9]+}}(%esp) 2250 ; X32-NEXT: xorl {{[0-9]+}}(%esp), %eax 2251 ; X32-NEXT: movl %eax, {{[0-9]+}}(%esp) 2252 ; X32-NEXT: xorl {{[0-9]+}}(%esp), %ecx 2253 ; X32-NEXT: movl %ecx, {{[0-9]+}}(%esp) 2254 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2255 ; X32-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 2256 ; X32-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 2257 ; X32-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 2258 ; X32-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2259 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2260 ; X32-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2261 ; X32-NEXT: leal -4(%ebp), %esp 2262 ; X32-NEXT: popl %esi 2263 ; X32-NEXT: popl %ebp 2264 ; X32-NEXT: retl 2265 ; 2266 ; X64-LABEL: test_mm_xor_ps: 2267 ; X64: # BB#0: 2268 ; X64-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 2269 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rax 2270 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %r8 2271 ; X64-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 2272 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rdx 2273 ; X64-NEXT: movq %rdx, %rsi 2274 ; X64-NEXT: xorl %eax, %edx 2275 ; X64-NEXT: shrq $32, %rax 2276 ; X64-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 2277 ; X64-NEXT: movq %rcx, %rdi 2278 ; X64-NEXT: xorl %r8d, %ecx 2279 ; X64-NEXT: shrq $32, %r8 2280 ; X64-NEXT: shrq $32, %rsi 2281 ; X64-NEXT: shrq $32, %rdi 2282 ; X64-NEXT: movl %ecx, -{{[0-9]+}}(%rsp) 2283 ; X64-NEXT: movl %edx, -{{[0-9]+}}(%rsp) 2284 ; X64-NEXT: xorl %r8d, %edi 2285 ; X64-NEXT: movl %edi, -{{[0-9]+}}(%rsp) 2286 ; X64-NEXT: xorl %eax, %esi 2287 ; X64-NEXT: movl %esi, -{{[0-9]+}}(%rsp) 2288 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 2289 ; X64-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 2290 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2291 ; X64-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 2292 ; X64-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 2293 ; X64-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 2294 ; X64-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2295 ; X64-NEXT: retq 2296 %arg0 = bitcast <4 x float> %a0 to <4 x i32> 2297 %arg1 = bitcast <4 x float> %a1 to <4 x i32> 2298 %res = xor <4 x i32> %arg0, %arg1 2299 %bc = bitcast <4 x i32> %res to <4 x float> 2300 ret <4 x float> %bc 2301 } 2302 2303 !0 = !{i32 1} 2304