Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
      2 
      3 define void @vst1i8(i8* %A, <8 x i8>* %B) nounwind {
      4 ;CHECK: vst1i8:
      5 ;Check the alignment value.  Max for this instruction is 64 bits:
      6 ;CHECK: vst1.8 {d16}, [r0:64]
      7 	%tmp1 = load <8 x i8>* %B
      8 	call void @llvm.arm.neon.vst1.v8i8(i8* %A, <8 x i8> %tmp1, i32 16)
      9 	ret void
     10 }
     11 
     12 define void @vst1i16(i16* %A, <4 x i16>* %B) nounwind {
     13 ;CHECK: vst1i16:
     14 ;CHECK: vst1.16
     15 	%tmp0 = bitcast i16* %A to i8*
     16 	%tmp1 = load <4 x i16>* %B
     17 	call void @llvm.arm.neon.vst1.v4i16(i8* %tmp0, <4 x i16> %tmp1, i32 1)
     18 	ret void
     19 }
     20 
     21 define void @vst1i32(i32* %A, <2 x i32>* %B) nounwind {
     22 ;CHECK: vst1i32:
     23 ;CHECK: vst1.32
     24 	%tmp0 = bitcast i32* %A to i8*
     25 	%tmp1 = load <2 x i32>* %B
     26 	call void @llvm.arm.neon.vst1.v2i32(i8* %tmp0, <2 x i32> %tmp1, i32 1)
     27 	ret void
     28 }
     29 
     30 define void @vst1f(float* %A, <2 x float>* %B) nounwind {
     31 ;CHECK: vst1f:
     32 ;CHECK: vst1.32
     33 	%tmp0 = bitcast float* %A to i8*
     34 	%tmp1 = load <2 x float>* %B
     35 	call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
     36 	ret void
     37 }
     38 
     39 ;Check for a post-increment updating store.
     40 define void @vst1f_update(float** %ptr, <2 x float>* %B) nounwind {
     41 ;CHECK: vst1f_update:
     42 ;CHECK: vst1.32 {d16}, [r1]!
     43 	%A = load float** %ptr
     44 	%tmp0 = bitcast float* %A to i8*
     45 	%tmp1 = load <2 x float>* %B
     46 	call void @llvm.arm.neon.vst1.v2f32(i8* %tmp0, <2 x float> %tmp1, i32 1)
     47 	%tmp2 = getelementptr float* %A, i32 2
     48 	store float* %tmp2, float** %ptr
     49 	ret void
     50 }
     51 
     52 define void @vst1i64(i64* %A, <1 x i64>* %B) nounwind {
     53 ;CHECK: vst1i64:
     54 ;CHECK: vst1.64
     55 	%tmp0 = bitcast i64* %A to i8*
     56 	%tmp1 = load <1 x i64>* %B
     57 	call void @llvm.arm.neon.vst1.v1i64(i8* %tmp0, <1 x i64> %tmp1, i32 1)
     58 	ret void
     59 }
     60 
     61 define void @vst1Qi8(i8* %A, <16 x i8>* %B) nounwind {
     62 ;CHECK: vst1Qi8:
     63 ;Check the alignment value.  Max for this instruction is 128 bits:
     64 ;CHECK: vst1.8 {d16, d17}, [r0:64]
     65 	%tmp1 = load <16 x i8>* %B
     66 	call void @llvm.arm.neon.vst1.v16i8(i8* %A, <16 x i8> %tmp1, i32 8)
     67 	ret void
     68 }
     69 
     70 define void @vst1Qi16(i16* %A, <8 x i16>* %B) nounwind {
     71 ;CHECK: vst1Qi16:
     72 ;Check the alignment value.  Max for this instruction is 128 bits:
     73 ;CHECK: vst1.16 {d16, d17}, [r0:128]
     74 	%tmp0 = bitcast i16* %A to i8*
     75 	%tmp1 = load <8 x i16>* %B
     76 	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 32)
     77 	ret void
     78 }
     79 
     80 ;Check for a post-increment updating store with register increment.
     81 define void @vst1Qi16_update(i16** %ptr, <8 x i16>* %B, i32 %inc) nounwind {
     82 ;CHECK: vst1Qi16_update:
     83 ;CHECK: vst1.16 {d16, d17}, [r1:64], r2
     84 	%A = load i16** %ptr
     85 	%tmp0 = bitcast i16* %A to i8*
     86 	%tmp1 = load <8 x i16>* %B
     87 	call void @llvm.arm.neon.vst1.v8i16(i8* %tmp0, <8 x i16> %tmp1, i32 8)
     88 	%tmp2 = getelementptr i16* %A, i32 %inc
     89 	store i16* %tmp2, i16** %ptr
     90 	ret void
     91 }
     92 
     93 define void @vst1Qi32(i32* %A, <4 x i32>* %B) nounwind {
     94 ;CHECK: vst1Qi32:
     95 ;CHECK: vst1.32
     96 	%tmp0 = bitcast i32* %A to i8*
     97 	%tmp1 = load <4 x i32>* %B
     98 	call void @llvm.arm.neon.vst1.v4i32(i8* %tmp0, <4 x i32> %tmp1, i32 1)
     99 	ret void
    100 }
    101 
    102 define void @vst1Qf(float* %A, <4 x float>* %B) nounwind {
    103 ;CHECK: vst1Qf:
    104 ;CHECK: vst1.32
    105 	%tmp0 = bitcast float* %A to i8*
    106 	%tmp1 = load <4 x float>* %B
    107 	call void @llvm.arm.neon.vst1.v4f32(i8* %tmp0, <4 x float> %tmp1, i32 1)
    108 	ret void
    109 }
    110 
    111 define void @vst1Qi64(i64* %A, <2 x i64>* %B) nounwind {
    112 ;CHECK: vst1Qi64:
    113 ;CHECK: vst1.64
    114 	%tmp0 = bitcast i64* %A to i8*
    115 	%tmp1 = load <2 x i64>* %B
    116 	call void @llvm.arm.neon.vst1.v2i64(i8* %tmp0, <2 x i64> %tmp1, i32 1)
    117 	ret void
    118 }
    119 
    120 declare void @llvm.arm.neon.vst1.v8i8(i8*, <8 x i8>, i32) nounwind
    121 declare void @llvm.arm.neon.vst1.v4i16(i8*, <4 x i16>, i32) nounwind
    122 declare void @llvm.arm.neon.vst1.v2i32(i8*, <2 x i32>, i32) nounwind
    123 declare void @llvm.arm.neon.vst1.v2f32(i8*, <2 x float>, i32) nounwind
    124 declare void @llvm.arm.neon.vst1.v1i64(i8*, <1 x i64>, i32) nounwind
    125 
    126 declare void @llvm.arm.neon.vst1.v16i8(i8*, <16 x i8>, i32) nounwind
    127 declare void @llvm.arm.neon.vst1.v8i16(i8*, <8 x i16>, i32) nounwind
    128 declare void @llvm.arm.neon.vst1.v4i32(i8*, <4 x i32>, i32) nounwind
    129 declare void @llvm.arm.neon.vst1.v4f32(i8*, <4 x float>, i32) nounwind
    130 declare void @llvm.arm.neon.vst1.v2i64(i8*, <2 x i64>, i32) nounwind
    131