1 // RUN: %clang_cc1 -triple thumbv7-apple-darwin \ 2 // RUN: -target-abi apcs-gnu \ 3 // RUN: -target-cpu cortex-a8 \ 4 // RUN: -mfloat-abi soft \ 5 // RUN: -target-feature +soft-float-abi \ 6 // RUN: -ffreestanding \ 7 // RUN: -emit-llvm -w -o - %s | opt -S -mem2reg | FileCheck %s 8 9 #include <arm_neon.h> 10 11 // Check that the vget_low/vget_high intrinsics generate a single shuffle 12 // without any bitcasting. 13 int8x8_t low_s8(int8x16_t a) { 14 // CHECK: shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 15 return vget_low_s8(a); 16 } 17 18 uint8x8_t low_u8 (uint8x16_t a) { 19 // CHECK: shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 20 return vget_low_u8(a); 21 } 22 23 int16x4_t low_s16( int16x8_t a) { 24 // CHECK: shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 25 return vget_low_s16(a); 26 } 27 28 uint16x4_t low_u16(uint16x8_t a) { 29 // CHECK: shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 30 return vget_low_u16(a); 31 } 32 33 int32x2_t low_s32( int32x4_t a) { 34 // CHECK: shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1> 35 return vget_low_s32(a); 36 } 37 38 uint32x2_t low_u32(uint32x4_t a) { 39 // CHECK: shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 0, i32 1> 40 return vget_low_u32(a); 41 } 42 43 int64x1_t low_s64( int64x2_t a) { 44 // CHECK: shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer 45 return vget_low_s64(a); 46 } 47 48 uint64x1_t low_u64(uint64x2_t a) { 49 // CHECK: shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> zeroinitializer 50 return vget_low_u64(a); 51 } 52 53 poly8x8_t low_p8 (poly8x16_t a) { 54 // CHECK: shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 55 return vget_low_p8(a); 56 } 57 58 poly16x4_t low_p16(poly16x8_t a) { 59 // CHECK: shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 60 return vget_low_p16(a); 61 } 62 63 float32x2_t low_f32(float32x4_t a) { 64 // CHECK: shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1> 65 return vget_low_f32(a); 66 } 67 68 69 int8x8_t high_s8(int8x16_t a) { 70 // CHECK: shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 71 return vget_high_s8(a); 72 } 73 74 uint8x8_t high_u8 (uint8x16_t a) { 75 // CHECK: shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 76 return vget_high_u8(a); 77 } 78 79 int16x4_t high_s16( int16x8_t a) { 80 // CHECK: shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 81 return vget_high_s16(a); 82 } 83 84 uint16x4_t high_u16(uint16x8_t a) { 85 // CHECK: shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 86 return vget_high_u16(a); 87 } 88 89 int32x2_t high_s32( int32x4_t a) { 90 // CHECK: shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 91 return vget_high_s32(a); 92 } 93 94 uint32x2_t high_u32(uint32x4_t a) { 95 // CHECK: shufflevector <4 x i32> %a, <4 x i32> %a, <2 x i32> <i32 2, i32 3> 96 return vget_high_u32(a); 97 } 98 99 int64x1_t high_s64( int64x2_t a) { 100 // CHECK: shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1> 101 return vget_high_s64(a); 102 } 103 104 uint64x1_t high_u64(uint64x2_t a) { 105 // CHECK: shufflevector <2 x i64> %a, <2 x i64> %a, <1 x i32> <i32 1> 106 return vget_high_u64(a); 107 } 108 109 poly8x8_t high_p8 (poly8x16_t a) { 110 // CHECK: shufflevector <16 x i8> %a, <16 x i8> %a, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 111 return vget_high_p8(a); 112 } 113 114 poly16x4_t high_p16(poly16x8_t a) { 115 // CHECK: shufflevector <8 x i16> %a, <8 x i16> %a, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 116 return vget_high_p16(a); 117 } 118 119 float32x2_t high_f32(float32x4_t a) { 120 // CHECK: shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 2, i32 3> 121 return vget_high_f32(a); 122 } 123 124