Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4a,+avx | FileCheck %s --check-prefixes=CHECK,X86
      4 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefixes=CHECK,X64
      5 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4a,+avx | FileCheck %s --check-prefixes=CHECK,X64
      6 
      7 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse4a-builtins.c
      8 
      9 define <2 x i64> @test_mm_extracti_si64(<2 x i64> %x) {
     10 ; CHECK-LABEL: test_mm_extracti_si64:
     11 ; CHECK:       # %bb.0:
     12 ; CHECK-NEXT:    extrq $2, $3, %xmm0
     13 ; CHECK-NEXT:    ret{{[l|q]}}
     14   %res = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
     15   ret <2 x i64> %res
     16 }
     17 declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind readnone
     18 
     19 define <2 x i64> @test_mm_extract_si64(<2 x i64> %x, <2 x i64> %y) {
     20 ; CHECK-LABEL: test_mm_extract_si64:
     21 ; CHECK:       # %bb.0:
     22 ; CHECK-NEXT:    extrq %xmm1, %xmm0
     23 ; CHECK-NEXT:    ret{{[l|q]}}
     24   %bc = bitcast <2 x i64> %y to <16 x i8>
     25   %res = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %bc)
     26   ret <2 x i64> %res
     27 }
     28 declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind readnone
     29 
     30 define <2 x i64> @test_mm_inserti_si64(<2 x i64> %x, <2 x i64> %y) {
     31 ; CHECK-LABEL: test_mm_inserti_si64:
     32 ; CHECK:       # %bb.0:
     33 ; CHECK-NEXT:    insertq $6, $5, %xmm1, %xmm0
     34 ; CHECK-NEXT:    ret{{[l|q]}}
     35   %res = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 5, i8 6)
     36   ret <2 x i64> %res
     37 }
     38 declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind readnone
     39 
     40 define <2 x i64> @test_mm_insert_si64(<2 x i64> %x, <2 x i64> %y) {
     41 ; CHECK-LABEL: test_mm_insert_si64:
     42 ; CHECK:       # %bb.0:
     43 ; CHECK-NEXT:    insertq %xmm1, %xmm0
     44 ; CHECK-NEXT:    ret{{[l|q]}}
     45   %res = call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
     46   ret <2 x i64> %res
     47 }
     48 declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind readnone
     49 
     50 define void @test_stream_sd(double* %p, <2 x double> %a) {
     51 ; X86-LABEL: test_stream_sd:
     52 ; X86:       # %bb.0:
     53 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
     54 ; X86-NEXT:    movntsd %xmm0, (%eax)
     55 ; X86-NEXT:    retl
     56 ;
     57 ; X64-LABEL: test_stream_sd:
     58 ; X64:       # %bb.0:
     59 ; X64-NEXT:    movntsd %xmm0, (%rdi)
     60 ; X64-NEXT:    retq
     61   %1 = extractelement <2 x double> %a, i64 0
     62   store double %1, double* %p, align 1, !nontemporal !1
     63   ret void
     64 }
     65 
     66 define void @test_mm_stream_ss(float* %p, <4 x float> %a) {
     67 ; X86-LABEL: test_mm_stream_ss:
     68 ; X86:       # %bb.0:
     69 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
     70 ; X86-NEXT:    movntss %xmm0, (%eax)
     71 ; X86-NEXT:    retl
     72 ;
     73 ; X64-LABEL: test_mm_stream_ss:
     74 ; X64:       # %bb.0:
     75 ; X64-NEXT:    movntss %xmm0, (%rdi)
     76 ; X64-NEXT:    retq
     77   %1 = extractelement <4 x float> %a, i64 0
     78   store float %1, float* %p, align 1, !nontemporal !1
     79   ret void
     80 }
     81 
     82 !1 = !{i32 1}
     83