1 ; RUN: llc -mtriple=arm-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-LE 2 ; RUN: llc -mtriple=armeb-eabi -float-abi=soft -mattr=+neon %s -o - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK-BE 3 4 define <16 x i8> @vcombine8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 5 ; CHECK: vcombine8 6 ; CHECK-LE: vmov r0, r1, d16 7 ; CHECK-LE: vmov r2, r3, d17 8 ; CHECK-BE: vmov r1, r0, d16 9 ; CHECK-BE: vmov r3, r2, d17 10 %tmp1 = load <8 x i8>, <8 x i8>* %A 11 %tmp2 = load <8 x i8>, <8 x i8>* %B 12 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 13 ret <16 x i8> %tmp3 14 } 15 16 define <8 x i16> @vcombine16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 17 ; CHECK: vcombine16 18 ; CHECK-LE: vmov r0, r1, d16 19 ; CHECK-LE: vmov r2, r3, d17 20 ; CHECK-BE: vmov r1, r0, d16 21 ; CHECK-BE: vmov r3, r2, d17 22 %tmp1 = load <4 x i16>, <4 x i16>* %A 23 %tmp2 = load <4 x i16>, <4 x i16>* %B 24 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 25 ret <8 x i16> %tmp3 26 } 27 28 define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind { 29 ; CHECK: vcombine32 30 ; CHECK-LE: vmov r0, r1, d16 31 ; CHECK-LE: vmov r2, r3, d17 32 ; CHECK-BE: vmov r1, r0, d16 33 ; CHECK-BE: vmov r3, r2, d17 34 %tmp1 = load <2 x i32>, <2 x i32>* %A 35 %tmp2 = load <2 x i32>, <2 x i32>* %B 36 %tmp3 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 37 ret <4 x i32> %tmp3 38 } 39 40 define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind { 41 ; CHECK: vcombinefloat 42 ; CHECK-LE: vmov r0, r1, d16 43 ; CHECK-LE: vmov r2, r3, d17 44 ; CHECK-BE: vmov r1, r0, d16 45 ; CHECK-BE: vmov r3, r2, d17 46 %tmp1 = load <2 x float>, <2 x float>* %A 47 %tmp2 = load <2 x float>, <2 x float>* %B 48 %tmp3 = shufflevector <2 x float> %tmp1, <2 x float> %tmp2, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 49 ret <4 x float> %tmp3 50 } 51 52 define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind { 53 ; CHECK: vcombine64 54 ; CHECK-LE: vmov r0, r1, d16 55 ; CHECK-LE: vmov r2, r3, d17 56 ; CHECK-BE: vmov r1, r0, d16 57 ; CHECK-BE: vmov r3, r2, d17 58 %tmp1 = load <1 x i64>, <1 x i64>* %A 59 %tmp2 = load <1 x i64>, <1 x i64>* %B 60 %tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1> 61 ret <2 x i64> %tmp3 62 } 63 64 ; Check for vget_low and vget_high implemented with shufflevector. PR8411. 65 ; They should not require storing to the stack. 66 67 define <4 x i16> @vget_low16(<8 x i16>* %A) nounwind { 68 ; CHECK: vget_low16 69 ; CHECK-NOT: vst 70 ; CHECK-LE: vmov r0, r1, d16 71 ; CHECK-BE: vmov r1, r0, d16 72 %tmp1 = load <8 x i16>, <8 x i16>* %A 73 %tmp2 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 74 ret <4 x i16> %tmp2 75 } 76 77 define <8 x i8> @vget_high8(<16 x i8>* %A) nounwind { 78 ; CHECK: vget_high8 79 ; CHECK-NOT: vst 80 ; CHECK-LE: vmov r0, r1, d17 81 ; CHECK-BE: vmov r1, r0, d16 82 %tmp1 = load <16 x i8>, <16 x i8>* %A 83 %tmp2 = shufflevector <16 x i8> %tmp1, <16 x i8> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 84 ret <8 x i8> %tmp2 85 } 86