Home | History | Annotate | Download | only in ARM
      1 ; RUN: llc < %s -march=arm -mattr=+neon -disable-arm-fast-isel -O0 | FileCheck %s
      2 
      3 define void @vst3i8(i8* %A, <8 x i8>* %B) nounwind {
      4 ;CHECK: vst3i8:
      5 ;Check the alignment value.  Max for this instruction is 64 bits:
      6 ;This test runs at -O0 so do not check for specific register numbers.
      7 ;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
      8 	%tmp1 = load <8 x i8>* %B
      9 	call void @llvm.arm.neon.vst3.v8i8(i8* %A, <8 x i8> %tmp1, <8 x i8> %tmp1, <8 x i8> %tmp1, i32 32)
     10 	ret void
     11 }
     12 
     13 define void @vst3i16(i16* %A, <4 x i16>* %B) nounwind {
     14 ;CHECK: vst3i16:
     15 ;CHECK: vst3.16
     16 	%tmp0 = bitcast i16* %A to i8*
     17 	%tmp1 = load <4 x i16>* %B
     18 	call void @llvm.arm.neon.vst3.v4i16(i8* %tmp0, <4 x i16> %tmp1, <4 x i16> %tmp1, <4 x i16> %tmp1, i32 1)
     19 	ret void
     20 }
     21 
     22 define void @vst3i32(i32* %A, <2 x i32>* %B) nounwind {
     23 ;CHECK: vst3i32:
     24 ;CHECK: vst3.32
     25 	%tmp0 = bitcast i32* %A to i8*
     26 	%tmp1 = load <2 x i32>* %B
     27 	call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
     28 	ret void
     29 }
     30 
     31 ;Check for a post-increment updating store.
     32 define void @vst3i32_update(i32** %ptr, <2 x i32>* %B) nounwind {
     33 ;CHECK: vst3i32_update:
     34 ;CHECK: vst3.32 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
     35 	%A = load i32** %ptr
     36 	%tmp0 = bitcast i32* %A to i8*
     37 	%tmp1 = load <2 x i32>* %B
     38 	call void @llvm.arm.neon.vst3.v2i32(i8* %tmp0, <2 x i32> %tmp1, <2 x i32> %tmp1, <2 x i32> %tmp1, i32 1)
     39 	%tmp2 = getelementptr i32* %A, i32 6
     40 	store i32* %tmp2, i32** %ptr
     41 	ret void
     42 }
     43 
     44 define void @vst3f(float* %A, <2 x float>* %B) nounwind {
     45 ;CHECK: vst3f:
     46 ;CHECK: vst3.32
     47 	%tmp0 = bitcast float* %A to i8*
     48 	%tmp1 = load <2 x float>* %B
     49 	call void @llvm.arm.neon.vst3.v2f32(i8* %tmp0, <2 x float> %tmp1, <2 x float> %tmp1, <2 x float> %tmp1, i32 1)
     50 	ret void
     51 }
     52 
     53 define void @vst3i64(i64* %A, <1 x i64>* %B) nounwind {
     54 ;CHECK: vst3i64:
     55 ;Check the alignment value.  Max for this instruction is 64 bits:
     56 ;This test runs at -O0 so do not check for specific register numbers.
     57 ;CHECK: vst1.64 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
     58 	%tmp0 = bitcast i64* %A to i8*
     59 	%tmp1 = load <1 x i64>* %B
     60 	call void @llvm.arm.neon.vst3.v1i64(i8* %tmp0, <1 x i64> %tmp1, <1 x i64> %tmp1, <1 x i64> %tmp1, i32 16)
     61 	ret void
     62 }
     63 
     64 define void @vst3Qi8(i8* %A, <16 x i8>* %B) nounwind {
     65 ;CHECK: vst3Qi8:
     66 ;Check the alignment value.  Max for this instruction is 64 bits:
     67 ;This test runs at -O0 so do not check for specific register numbers.
     68 ;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]!
     69 ;CHECK: vst3.8 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}, :64]
     70 	%tmp1 = load <16 x i8>* %B
     71 	call void @llvm.arm.neon.vst3.v16i8(i8* %A, <16 x i8> %tmp1, <16 x i8> %tmp1, <16 x i8> %tmp1, i32 32)
     72 	ret void
     73 }
     74 
     75 define void @vst3Qi16(i16* %A, <8 x i16>* %B) nounwind {
     76 ;CHECK: vst3Qi16:
     77 ;CHECK: vst3.16
     78 ;CHECK: vst3.16
     79 	%tmp0 = bitcast i16* %A to i8*
     80 	%tmp1 = load <8 x i16>* %B
     81 	call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
     82 	ret void
     83 }
     84 
     85 ;Check for a post-increment updating store.
     86 define void @vst3Qi16_update(i16** %ptr, <8 x i16>* %B) nounwind {
     87 ;CHECK: vst3Qi16_update:
     88 ;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
     89 ;CHECK: vst3.16 {d{{.*}}, d{{.*}}, d{{.*}}}, [r{{.*}}]!
     90 	%A = load i16** %ptr
     91 	%tmp0 = bitcast i16* %A to i8*
     92 	%tmp1 = load <8 x i16>* %B
     93 	call void @llvm.arm.neon.vst3.v8i16(i8* %tmp0, <8 x i16> %tmp1, <8 x i16> %tmp1, <8 x i16> %tmp1, i32 1)
     94 	%tmp2 = getelementptr i16* %A, i32 24
     95 	store i16* %tmp2, i16** %ptr
     96 	ret void
     97 }
     98 
     99 define void @vst3Qi32(i32* %A, <4 x i32>* %B) nounwind {
    100 ;CHECK: vst3Qi32:
    101 ;CHECK: vst3.32
    102 ;CHECK: vst3.32
    103 	%tmp0 = bitcast i32* %A to i8*
    104 	%tmp1 = load <4 x i32>* %B
    105 	call void @llvm.arm.neon.vst3.v4i32(i8* %tmp0, <4 x i32> %tmp1, <4 x i32> %tmp1, <4 x i32> %tmp1, i32 1)
    106 	ret void
    107 }
    108 
    109 define void @vst3Qf(float* %A, <4 x float>* %B) nounwind {
    110 ;CHECK: vst3Qf:
    111 ;CHECK: vst3.32
    112 ;CHECK: vst3.32
    113 	%tmp0 = bitcast float* %A to i8*
    114 	%tmp1 = load <4 x float>* %B
    115 	call void @llvm.arm.neon.vst3.v4f32(i8* %tmp0, <4 x float> %tmp1, <4 x float> %tmp1, <4 x float> %tmp1, i32 1)
    116 	ret void
    117 }
    118 
    119 declare void @llvm.arm.neon.vst3.v8i8(i8*, <8 x i8>, <8 x i8>, <8 x i8>, i32) nounwind
    120 declare void @llvm.arm.neon.vst3.v4i16(i8*, <4 x i16>, <4 x i16>, <4 x i16>, i32) nounwind
    121 declare void @llvm.arm.neon.vst3.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind
    122 declare void @llvm.arm.neon.vst3.v2f32(i8*, <2 x float>, <2 x float>, <2 x float>, i32) nounwind
    123 declare void @llvm.arm.neon.vst3.v1i64(i8*, <1 x i64>, <1 x i64>, <1 x i64>, i32) nounwind
    124 
    125 declare void @llvm.arm.neon.vst3.v16i8(i8*, <16 x i8>, <16 x i8>, <16 x i8>, i32) nounwind
    126 declare void @llvm.arm.neon.vst3.v8i16(i8*, <8 x i16>, <8 x i16>, <8 x i16>, i32) nounwind
    127 declare void @llvm.arm.neon.vst3.v4i32(i8*, <4 x i32>, <4 x i32>, <4 x i32>, i32) nounwind
    128 declare void @llvm.arm.neon.vst3.v4f32(i8*, <4 x float>, <4 x float>, <4 x float>, i32) nounwind
    129