Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE
      2 ; RUN: llc -mtriple=armeb-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
      3 
      4 define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      5 ; CHECK-LABEL: vcombine8
      6 ; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
      7 ; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
      8 
      9 ; CHECK-LE-DAG: vmov r0, r1, [[LD0]]
     10 ; CHECK-LE-DAG: vmov r2, r3, [[LD1]]
     11 
     12 ; CHECK-BE-DAG: vmov r1, r0, d16
     13 ; CHECK-BE-DAG: vmov r3, r2, d17
     14 	%tmp1 = load <8 x i8>, <8 x i8>* %A
     15 	%tmp2 = load <8 x i8>, <8 x i8>* %B
     16 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
     17 	ret <16 x i8> %tmp3
     18 }
     19 
     20 define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     21 ; CHECK-LABEL: vcombine16
     22 ; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
     23 ; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
     24 
     25 ; CHECK-LE-DAG: vmov r0, r1, [[LD0]]
     26 ; CHECK-LE-DAG: vmov r2, r3, [[LD1]]
     27 
     28 ; CHECK-BE-DAG: vmov r1, r0, d16
     29 ; CHECK-BE-DAG: vmov r3, r2, d17
     30 	%tmp1 = load <4 x i16>, <4 x i16>* %A
     31 	%tmp2 = load <4 x i16>, <4 x i16>* %B
     32 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     33 	ret <8 x i16> %tmp3
     34 }
     35 
     36 define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     37 ; CHECK-LABEL: vcombine32
     38 
     39 ; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
     40 ; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
     41 
     42 ; CHECK-LE: vmov r0, r1, [[LD0]]
     43 ; CHECK-LE: vmov r2, r3, [[LD1]]
     44 
     45 ; CHECK-BE: vmov r1, r0, d16
     46 ; CHECK-BE: vmov r3, r2, d17
     47 	%tmp1 = load <2 x i32>, <2 x i32>* %A
     48 	%tmp2 = load <2 x i32>, <2 x i32>* %B
     49 	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     50 	ret <4 x i32> %tmp3
     51 }
     52 
     53 define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
     54 ; CHECK-LABEL: vcombinefloat
     55 
     56 ; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
     57 ; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
     58 
     59 ; CHECK-LE: vmov r0, r1, [[LD0]]
     60 ; CHECK-LE: vmov r2, r3, [[LD1]]
     61 
     62 ; CHECK-BE: vmov r1, r0, d16
     63 ; CHECK-BE: vmov r3, r2, d17
     64 	%tmp1 = load <2 x float>, <2 x float>* %A
     65 	%tmp2 = load <2 x float>, <2 x float>* %B
     66 	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     67 	ret <4 x float> %tmp3
     68 }
     69 
     70 define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
     71 ; CHECK-LABEL: vcombine64
     72 ; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
     73 ; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
     74 
     75 ; CHECK-LE: vmov r0, r1, [[LD0]]
     76 ; CHECK-LE: vmov r2, r3, [[LD1]]
     77 
     78 ; CHECK-BE: vmov r1, r0, [[LD0]]
     79 ; CHECK-BE: vmov r3, r2, [[LD1]]
     80 	%tmp1 = load <1 x i64>, <1 x i64>* %A
     81 	%tmp2 = load <1 x i64>, <1 x i64>* %B
     82 	%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
     83 	ret <2 x i64> %tmp3
     84 }
     85 
     86 ; Check for vget_low and vget_high implemented with shufflevector.  PR8411.
     87 ; They should not require storing to the stack.
     88 
     89 define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind {
     90 ; CHECK: vget_low16
     91 ; CHECK-NOT: vst
     92 ; CHECK-LE: vmov r0, r1, d16
     93 ; CHECK-BE: vmov r1, r0, d16
     94 	%tmp1 = load <8 x i16>, <8 x i16>* %A
     95         %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     96         ret <4 x i16> %tmp2
     97 }
     98 
     99 define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind {
    100 ; CHECK: vget_high8
    101 ; CHECK-NOT: vst
    102 ; CHECK-LE-NOT: vld1.64 {d16, d17}, [r0]
    103 ; CHECK-LE: vldr  d16, [r0, #8]
    104 ; CHECK-LE: vmov  r0, r1, d16
    105 ; CHECK-BE: vmov r1, r0, d16
    106 	%tmp1 = load <16 x i8>, <16 x i8>* %A
    107         %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    108         ret <8 x i8> %tmp2
    109 }
    110 
    111 ; vcombine(vld1_dup(p), vld1_dup(p2))
    112 define <8 x i16> @vcombine_vdup(<8 x i16> %src, i16* nocapture readonly %p) {
    113 ; CHECK-LABEL: vcombine_vdup:
    114 ; CHECK: vld1.16 {d16[]},
    115 ; CHECK: vld1.16 {d17[]},
    116 ; CHECK-LE: vmov    r0, r1, d16
    117 ; CHECK-LE: vmov    r2, r3, d17
    118   %a1 = load i16, i16* %p, align 2
    119   %a2 = insertelement <4 x i16> undef, i16 %a1, i32 0
    120   %a3 = shufflevector <4 x i16> %a2, <4 x i16> undef, <4 x i32> zeroinitializer
    121   %p2 = getelementptr inbounds i16, i16* %p, i32 1
    122   %b1 = load i16, i16* %p2, align 2
    123   %b2 = insertelement <4 x i16> undef, i16 %b1, i32 0
    124   %b3 = shufflevector <4 x i16> %b2, <4 x i16> undef, <4 x i32> zeroinitializer
    125   %shuffle = shufflevector <4 x i16> %a3, <4 x i16> %b3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    126   ret <8 x i16> %shuffle
    127 }
    128