Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 
      3 %struct.__neon_int8x8x2_t = type { <8 x i8>,  <8 x i8> }
      4 %struct.__neon_int16x4x2_t = type { <4 x i16>, <4 x i16> }
      5 %struct.__neon_int32x2x2_t = type { <2 x i32>, <2 x i32> }
      6 %struct.__neon_float32x2x2_t = type { <2 x float>, <2 x float> }
      7 %struct.__neon_int64x1x2_t = type { <1 x i64>, <1 x i64> }
      8 
      9 %struct.__neon_int8x16x2_t = type { <16 x i8>,  <16 x i8> }
     10 %struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
     11 %struct.__neon_int32x4x2_t = type { <4 x i32>, <4 x i32> }
     12 %struct.__neon_float32x4x2_t = type { <4 x float>, <4 x float> }
     13 
     14 define <8 x i8> @vld2i8(i8* %A) nounwind {
     15 ;CHECK: vld2i8:
     16 ;Check the alignment value.  Max for this instruction is 128 bits:
     17 ;CHECK: vld2.8 {d16, d17}, [r0:64]
     18 	%tmp1 = call %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8* %A, i32 8)
     19         %tmp2 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 0
     20         %tmp3 = extractvalue %struct.__neon_int8x8x2_t %tmp1, 1
     21         %tmp4 = add <8 x i8> %tmp2, %tmp3
     22 	ret <8 x i8> %tmp4
     23 }
     24 
     25 define <4 x i16> @vld2i16(i16* %A) nounwind {
     26 ;CHECK: vld2i16:
     27 ;Check the alignment value.  Max for this instruction is 128 bits:
     28 ;CHECK: vld2.16 {d16, d17}, [r0:128]
     29 	%tmp0 = bitcast i16* %A to i8*
     30 	%tmp1 = call %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8* %tmp0, i32 32)
     31         %tmp2 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 0
     32         %tmp3 = extractvalue %struct.__neon_int16x4x2_t %tmp1, 1
     33         %tmp4 = add <4 x i16> %tmp2, %tmp3
     34 	ret <4 x i16> %tmp4
     35 }
     36 
     37 define <2 x i32> @vld2i32(i32* %A) nounwind {
     38 ;CHECK: vld2i32:
     39 ;CHECK: vld2.32
     40 	%tmp0 = bitcast i32* %A to i8*
     41 	%tmp1 = call %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8* %tmp0, i32 1)
     42         %tmp2 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 0
     43         %tmp3 = extractvalue %struct.__neon_int32x2x2_t %tmp1, 1
     44         %tmp4 = add <2 x i32> %tmp2, %tmp3
     45 	ret <2 x i32> %tmp4
     46 }
     47 
     48 define <2 x float> @vld2f(float* %A) nounwind {
     49 ;CHECK: vld2f:
     50 ;CHECK: vld2.32
     51 	%tmp0 = bitcast float* %A to i8*
     52 	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
     53         %tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
     54         %tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
     55         %tmp4 = fadd <2 x float> %tmp2, %tmp3
     56 	ret <2 x float> %tmp4
     57 }
     58 
     59 ;Check for a post-increment updating load. 
     60 define <2 x float> @vld2f_update(float** %ptr) nounwind {
     61 ;CHECK: vld2f_update:
     62 ;CHECK: vld2.32 {d16, d17}, [r1]!
     63 	%A = load float** %ptr
     64 	%tmp0 = bitcast float* %A to i8*
     65 	%tmp1 = call %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8* %tmp0, i32 1)
     66 	%tmp2 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 0
     67 	%tmp3 = extractvalue %struct.__neon_float32x2x2_t %tmp1, 1
     68 	%tmp4 = fadd <2 x float> %tmp2, %tmp3
     69 	%tmp5 = getelementptr float* %A, i32 4
     70 	store float* %tmp5, float** %ptr
     71 	ret <2 x float> %tmp4
     72 }
     73 
     74 define <1 x i64> @vld2i64(i64* %A) nounwind {
     75 ;CHECK: vld2i64:
     76 ;Check the alignment value.  Max for this instruction is 128 bits:
     77 ;CHECK: vld1.64 {d16, d17}, [r0:128]
     78 	%tmp0 = bitcast i64* %A to i8*
     79 	%tmp1 = call %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8* %tmp0, i32 32)
     80         %tmp2 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 0
     81         %tmp3 = extractvalue %struct.__neon_int64x1x2_t %tmp1, 1
     82         %tmp4 = add <1 x i64> %tmp2, %tmp3
     83 	ret <1 x i64> %tmp4
     84 }
     85 
     86 define <16 x i8> @vld2Qi8(i8* %A) nounwind {
     87 ;CHECK: vld2Qi8:
     88 ;Check the alignment value.  Max for this instruction is 256 bits:
     89 ;CHECK: vld2.8 {d16, d17, d18, d19}, [r0:64]
     90 	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 8)
     91         %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
     92         %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
     93         %tmp4 = add <16 x i8> %tmp2, %tmp3
     94 	ret <16 x i8> %tmp4
     95 }
     96 
     97 ;Check for a post-increment updating load with register increment.
     98 define <16 x i8> @vld2Qi8_update(i8** %ptr, i32 %inc) nounwind {
     99 ;CHECK: vld2Qi8_update:
    100 ;CHECK: vld2.8 {d16, d17, d18, d19}, [r2:128], r1
    101 	%A = load i8** %ptr
    102 	%tmp1 = call %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8* %A, i32 16)
    103         %tmp2 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 0
    104         %tmp3 = extractvalue %struct.__neon_int8x16x2_t %tmp1, 1
    105         %tmp4 = add <16 x i8> %tmp2, %tmp3
    106 	%tmp5 = getelementptr i8* %A, i32 %inc
    107 	store i8* %tmp5, i8** %ptr
    108 	ret <16 x i8> %tmp4
    109 }
    110 
    111 define <8 x i16> @vld2Qi16(i16* %A) nounwind {
    112 ;CHECK: vld2Qi16:
    113 ;Check the alignment value.  Max for this instruction is 256 bits:
    114 ;CHECK: vld2.16 {d16, d17, d18, d19}, [r0:128]
    115 	%tmp0 = bitcast i16* %A to i8*
    116 	%tmp1 = call %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8* %tmp0, i32 16)
    117         %tmp2 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 0
    118         %tmp3 = extractvalue %struct.__neon_int16x8x2_t %tmp1, 1
    119         %tmp4 = add <8 x i16> %tmp2, %tmp3
    120 	ret <8 x i16> %tmp4
    121 }
    122 
    123 define <4 x i32> @vld2Qi32(i32* %A) nounwind {
    124 ;CHECK: vld2Qi32:
    125 ;Check the alignment value.  Max for this instruction is 256 bits:
    126 ;CHECK: vld2.32 {d16, d17, d18, d19}, [r0:256]
    127 	%tmp0 = bitcast i32* %A to i8*
    128 	%tmp1 = call %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8* %tmp0, i32 64)
    129         %tmp2 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 0
    130         %tmp3 = extractvalue %struct.__neon_int32x4x2_t %tmp1, 1
    131         %tmp4 = add <4 x i32> %tmp2, %tmp3
    132 	ret <4 x i32> %tmp4
    133 }
    134 
    135 define <4 x float> @vld2Qf(float* %A) nounwind {
    136 ;CHECK: vld2Qf:
    137 ;CHECK: vld2.32
    138 	%tmp0 = bitcast float* %A to i8*
    139 	%tmp1 = call %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8* %tmp0, i32 1)
    140         %tmp2 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 0
    141         %tmp3 = extractvalue %struct.__neon_float32x4x2_t %tmp1, 1
    142         %tmp4 = fadd <4 x float> %tmp2, %tmp3
    143 	ret <4 x float> %tmp4
    144 }
    145 
    146 declare %struct.__neon_int8x8x2_t @llvm.arm.neon.vld2.v8i8(i8*, i32) nounwind readonly
    147 declare %struct.__neon_int16x4x2_t @llvm.arm.neon.vld2.v4i16(i8*, i32) nounwind readonly
    148 declare %struct.__neon_int32x2x2_t @llvm.arm.neon.vld2.v2i32(i8*, i32) nounwind readonly
    149 declare %struct.__neon_float32x2x2_t @llvm.arm.neon.vld2.v2f32(i8*, i32) nounwind readonly
    150 declare %struct.__neon_int64x1x2_t @llvm.arm.neon.vld2.v1i64(i8*, i32) nounwind readonly
    151 
    152 declare %struct.__neon_int8x16x2_t @llvm.arm.neon.vld2.v16i8(i8*, i32) nounwind readonly
    153 declare %struct.__neon_int16x8x2_t @llvm.arm.neon.vld2.v8i16(i8*, i32) nounwind readonly
    154 declare %struct.__neon_int32x4x2_t @llvm.arm.neon.vld2.v4i32(i8*, i32) nounwind readonly
    155 declare %struct.__neon_float32x4x2_t @llvm.arm.neon.vld2.v4f32(i8*, i32) nounwind readonly
    156