Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE
      2 ; RUN: llc -mtriple=armeb-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE
      3 
      4 define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
      5 ; CHECK: vcombine8
      6 ; CHECK-LE: vmov r0, r1, d16
      7 ; CHECK-LE: vmov r2, r3, d17
      8 ; CHECK-BE: vmov r1, r0, d16
      9 ; CHECK-BE: vmov r3, r2, d17
     10 	%tmp1 = load <8 x i8>* %A
     11 	%tmp2 = load <8 x i8>* %B
     12 	%tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
     13 	ret <16 x i8> %tmp3
     14 }
     15 
     16 define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
     17 ; CHECK: vcombine16
     18 ; CHECK-LE: vmov r0, r1, d16
     19 ; CHECK-LE: vmov r2, r3, d17
     20 ; CHECK-BE: vmov r1, r0, d16
     21 ; CHECK-BE: vmov r3, r2, d17
     22 	%tmp1 = load <4 x i16>* %A
     23 	%tmp2 = load <4 x i16>* %B
     24 	%tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     25 	ret <8 x i16> %tmp3
     26 }
     27 
     28 define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
     29 ; CHECK: vcombine32
     30 ; CHECK-LE: vmov r0, r1, d16
     31 ; CHECK-LE: vmov r2, r3, d17
     32 ; CHECK-BE: vmov r1, r0, d16
     33 ; CHECK-BE: vmov r3, r2, d17
     34 	%tmp1 = load <2 x i32>* %A
     35 	%tmp2 = load <2 x i32>* %B
     36 	%tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     37 	ret <4 x i32> %tmp3
     38 }
     39 
     40 define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
     41 ; CHECK: vcombinefloat
     42 ; CHECK-LE: vmov r0, r1, d16
     43 ; CHECK-LE: vmov r2, r3, d17
     44 ; CHECK-BE: vmov r1, r0, d16
     45 ; CHECK-BE: vmov r3, r2, d17
     46 	%tmp1 = load <2 x float>* %A
     47 	%tmp2 = load <2 x float>* %B
     48 	%tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     49 	ret <4 x float> %tmp3
     50 }
     51 
     52 define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
     53 ; CHECK: vcombine64
     54 ; CHECK-LE: vmov r0, r1, d16
     55 ; CHECK-LE: vmov r2, r3, d17
     56 ; CHECK-BE: vmov r1, r0, d16
     57 ; CHECK-BE: vmov r3, r2, d17
     58 	%tmp1 = load <1 x i64>* %A
     59 	%tmp2 = load <1 x i64>* %B
     60 	%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
     61 	ret <2 x i64> %tmp3
     62 }
     63 
     64 ; Check for vget_low and vget_high implemented with shufflevector.  PR8411.
     65 ; They should not require storing to the stack.
     66 
     67 define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind {
     68 ; CHECK: vget_low16
     69 ; CHECK-NOT: vst
     70 ; CHECK-LE: vmov r0, r1, d16
     71 ; CHECK-BE: vmov r1, r0, d16
     72 	%tmp1 = load <8 x i16>* %A
     73         %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     74         ret <4 x i16> %tmp2
     75 }
     76 
     77 define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind {
     78 ; CHECK: vget_high8
     79 ; CHECK-NOT: vst
     80 ; CHECK-LE: vmov r0, r1, d17
     81 ; CHECK-BE: vmov r1, r0, d16
     82 	%tmp1 = load <16 x i8>* %A
     83         %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
     84         ret <8 x i8> %tmp2
     85 }
     86