Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse4a,+avx | FileCheck %s --check-prefix=X32
      4 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefix=X64
      5 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse4a,+avx | FileCheck %s --check-prefix=X64
      6 
      7 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse4a-builtins.c
      8 
      9 define <2 x i64> @test_mm_extracti_si64(<2 x i64> %x) {
     10 ; X32-LABEL: test_mm_extracti_si64:
     11 ; X32:       # BB#0:
     12 ; X32-NEXT:    extrq $2, $3, %xmm0
     13 ; X32-NEXT:    retl
     14 ;
     15 ; X64-LABEL: test_mm_extracti_si64:
     16 ; X64:       # BB#0:
     17 ; X64-NEXT:    extrq $2, $3, %xmm0
     18 ; X64-NEXT:    retq
     19   %res = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
     20   ret <2 x i64> %res
     21 }
     22 declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind readnone
     23 
     24 define <2 x i64> @test_mm_extract_si64(<2 x i64> %x, <2 x i64> %y) {
     25 ; X32-LABEL: test_mm_extract_si64:
     26 ; X32:       # BB#0:
     27 ; X32-NEXT:    extrq %xmm1, %xmm0
     28 ; X32-NEXT:    retl
     29 ;
     30 ; X64-LABEL: test_mm_extract_si64:
     31 ; X64:       # BB#0:
     32 ; X64-NEXT:    extrq %xmm1, %xmm0
     33 ; X64-NEXT:    retq
     34   %bc = bitcast <2 x i64> %y to <16 x i8>
     35   %res = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %bc)
     36   ret <2 x i64> %res
     37 }
     38 declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind readnone
     39 
     40 define <2 x i64> @test_mm_inserti_si64(<2 x i64> %x, <2 x i64> %y) {
     41 ; X32-LABEL: test_mm_inserti_si64:
     42 ; X32:       # BB#0:
     43 ; X32-NEXT:    insertq $6, $5, %xmm1, %xmm0
     44 ; X32-NEXT:    retl
     45 ;
     46 ; X64-LABEL: test_mm_inserti_si64:
     47 ; X64:       # BB#0:
     48 ; X64-NEXT:    insertq $6, $5, %xmm1, %xmm0
     49 ; X64-NEXT:    retq
     50   %res = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 5, i8 6)
     51   ret <2 x i64> %res
     52 }
     53 declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind readnone
     54 
     55 define <2 x i64> @test_mm_insert_si64(<2 x i64> %x, <2 x i64> %y) {
     56 ; X32-LABEL: test_mm_insert_si64:
     57 ; X32:       # BB#0:
     58 ; X32-NEXT:    insertq %xmm1, %xmm0
     59 ; X32-NEXT:    retl
     60 ;
     61 ; X64-LABEL: test_mm_insert_si64:
     62 ; X64:       # BB#0:
     63 ; X64-NEXT:    insertq %xmm1, %xmm0
     64 ; X64-NEXT:    retq
     65   %res = call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
     66   ret <2 x i64> %res
     67 }
     68 declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind readnone
     69 
     70 define void @test_stream_sd(double* %p, <2 x double> %a) {
     71 ; X32-LABEL: test_stream_sd:
     72 ; X32:       # BB#0:
     73 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     74 ; X32-NEXT:    movntsd %xmm0, (%eax)
     75 ; X32-NEXT:    retl
     76 ;
     77 ; X64-LABEL: test_stream_sd:
     78 ; X64:       # BB#0:
     79 ; X64-NEXT:    movntsd %xmm0, (%rdi)
     80 ; X64-NEXT:    retq
     81   %1 = extractelement <2 x double> %a, i64 0
     82   store double %1, double* %p, align 1, !nontemporal !1
     83   ret void
     84 }
     85 
     86 define void @test_mm_stream_ss(float* %p, <4 x float> %a) {
     87 ; X32-LABEL: test_mm_stream_ss:
     88 ; X32:       # BB#0:
     89 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
     90 ; X32-NEXT:    movntss %xmm0, (%eax)
     91 ; X32-NEXT:    retl
     92 ;
     93 ; X64-LABEL: test_mm_stream_ss:
     94 ; X64:       # BB#0:
     95 ; X64-NEXT:    movntss %xmm0, (%rdi)
     96 ; X64-NEXT:    retq
     97   %1 = extractelement <4 x float> %a, i64 0
     98   store float %1, float* %p, align 1, !nontemporal !1
     99   ret void
    100 }
    101 
    102 !1 = !{i32 1}
    103