arm_neon.h | 324 //Vector add: vadd -> Vr[i]:=Va[i]+Vb[i], Vr, Va, Vb have equal lane sizes. 335 //Vector long add: vaddl -> Vr[i]:=Va[i]+Vb[i], Va, Vb have equal lane sizes, result is a 128 bit vector of lanes that are twice the width. 756 uint8x16_t vld1q_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x16_t vec, __constrange(0,15) int lane); \/\/VLD1.8 {d0[0]}, [r0] variable 757 uint16x8_t vld1q_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 758 uint32x4_t vld1q_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 759 uint64x2_t vld1q_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x2_t vec, __constrange(0,1) int lane); \/\/ VLD1.64 {d0}, [r0] variable 760 int8x16_t vld1q_lane_s8(__transfersize(1) int8_t const * ptr, int8x16_t vec, __constrange(0,15) int lane); \/\/VLD1.8 {d0[0]}, [r0] variable 761 int16x8_t vld1q_lane_s16(__transfersize(1) int16_t const * ptr, int16x8_t vec, __constrange(0,7) int lane); \/\/VLD1.16 {d0[0]}, [r0] variable 762 int32x4_t vld1q_lane_s32(__transfersize(1) int32_t const * ptr, int32x4_t vec, __constrange(0,3) int lane); \/\/VLD1.32 {d0[0]}, [r0] variable 763 float32x4_t vld1q_lane_f32(__transfersize(1) float32_t const * ptr, float32x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 764 int64x2_t vld1q_lane_s64(__transfersize(1) int64_t const * ptr, int64x2_t vec, __constrange(0,1) int lane); \/\/VLD1.64 {d0}, [r0] variable 765 poly8x16_t vld1q_lane_p8(__transfersize(1) poly8_t const * ptr, poly8x16_t vec, __constrange(0,15) int lane); \/\/VLD1.8 {d0[0]}, [r0] variable 766 poly16x8_t vld1q_lane_p16(__transfersize(1) poly16_t const * ptr, poly16x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 905 uint16x8x2_t vld2q_lane_u16_ptr(__transfersize(2) uint16_t const * ptr, uint16x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.16 {d0[0], d2[0]}, [r0] variable 906 uint32x4x2_t vld2q_lane_u32_ptr(__transfersize(2) uint32_t const * ptr, uint32x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.32 {d0[0], d2[0]}, [r0] variable 907 int16x8x2_t vld2q_lane_s16_ptr(__transfersize(2) int16_t const * ptr, int16x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.16 {d0[0], d2[0]}, [r0] variable 908 int32x4x2_t vld2q_lane_s32_ptr(__transfersize(2) int32_t const * ptr, int32x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.32 {d0[0], d2[0]}, [r0] variable 909 float16x8x2_t vld2q_lane_f16_ptr(__transfersize(2) __fp16 const * ptr, float16x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.16 {d0[0], d2[0]}, [r0] variable 910 float32x4x2_t vld2q_lane_f32_ptr(__transfersize(2) float32_t const * ptr, float32x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.32 {d0[0], d2[0]}, [r0] variable 911 poly16x8x2_t vld2q_lane_p16_ptr(__transfersize(2) poly16_t const * ptr, poly16x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.16 {d0[0], d2[0]}, [r0] variable 912 uint8x8x2_t vld2_lane_u8_ptr(__transfersize(2) uint8_t const * ptr, uint8x8x2_t * src, __constrange(0,7) int lane); \/\/VLD2.8 {d0[0], d1[0]}, [r0] variable 913 uint16x4x2_t vld2_lane_u16_ptr(__transfersize(2) uint16_t const * ptr, uint16x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.16 {d0[0], d1[0]}, [r0] variable 914 uint32x2x2_t vld2_lane_u32_ptr(__transfersize(2) uint32_t const * ptr, uint32x2x2_t * src, __constrange(0,1) int lane); \/\/ VLD2.32 {d0[0], d1[0]}, [r0] variable 915 int8x8x2_t vld2_lane_s8_ptr(__transfersize(2) int8_t const * ptr, int8x8x2_t * src, __constrange(0,7) int lane); \/\/VLD2.8 {d0[0], d1[0]}, [r0] variable 916 int16x4x2_t vld2_lane_s16_ptr(__transfersize(2) int16_t const * ptr, int16x4x2_t * src, __constrange(0,3) int lane); \/\/VLD2.16 {d0[0], d1[0]}, [r0] variable 917 int32x2x2_t vld2_lane_s32_ptr(__transfersize(2) int32_t const * ptr, int32x2x2_t * src, __constrange(0,1) int lane); \/\/VLD2.32 {d0[0], d1[0]}, [r0] variable 919 float32x2x2_t vld2_lane_f32_ptr(__transfersize(2) float32_t const * ptr, float32x2x2_t * src, __constrange(0,1) int lane); \/\/ VLD2.32 {d0[0], d1[0]}, [r0] variable 920 poly8x8x2_t vld2_lane_p8_ptr(__transfersize(2) poly8_t const * ptr, poly8x8x2_t * src, __constrange(0,7) int lane); \/\/VLD2.8 {d0[0], d1[0]}, [r0] variable 921 poly16x4x2_t vld2_lane_p16_ptr(__transfersize(2) poly16_t const * ptr, poly16x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.16 {d0[0], d1[0]}, [r0] variable 922 uint16x8x3_t vld3q_lane_u16_ptr(__transfersize(3) uint16_t const * ptr, uint16x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 923 uint32x4x3_t vld3q_lane_u32_ptr(__transfersize(3) uint32_t const * ptr, uint32x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.32 {d0[0], d2[0], d4[0]}, [r0] variable 924 int16x8x3_t vld3q_lane_s16_ptr(__transfersize(3) int16_t const * ptr, int16x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 925 int32x4x3_t vld3q_lane_s32_ptr(__transfersize(3) int32_t const * ptr, int32x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.32 {d0[0], d2[0], d4[0]}, [r0] variable 926 float16x8x3_t vld3q_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 927 float32x4x3_t vld3q_lane_f32_ptr(__transfersize(3) float32_t const * ptr, float32x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.32 {d0[0], d2[0], d4[0]}, [r0] variable 928 poly16x8x3_t vld3q_lane_p16_ptr(__transfersize(3) poly16_t const * ptr, poly16x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 929 uint8x8x3_t vld3_lane_u8_ptr(__transfersize(3) uint8_t const * ptr, uint8x8x3_t * src, __constrange(0,7) int lane); \/\/VLD3.8 {d0[0], d1[0], d2[0]}, [r0] variable 930 uint16x4x3_t vld3_lane_u16_ptr(__transfersize(3) uint16_t const * ptr, uint16x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 931 uint32x2x3_t vld3_lane_u32_ptr(__transfersize(3) uint32_t const * ptr, uint32x2x3_t * src, __constrange(0,1) int lane); \/\/ VLD3.32 {d0[0], d1[0], d2[0]}, [r0] variable 932 int8x8x3_t vld3_lane_s8_ptr(__transfersize(3) int8_t const * ptr, int8x8x3_t * src, __constrange(0,7) int lane); \/\/VLD3.8 {d0[0], d1[0], d2[0]}, [r0] variable 933 int16x4x3_t vld3_lane_s16_ptr(__transfersize(3) int16_t const * ptr, int16x4x3_t * src, __constrange(0,3) int lane); \/\/VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 934 int32x2x3_t vld3_lane_s32_ptr(__transfersize(3) int32_t const * ptr, int32x2x3_t * src, __constrange(0,1) int lane); \/\/VLD3.32 {d0[0], d1[0], d2[0]}, [r0] variable 935 float16x4x3_t vld3_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 936 float32x2x3_t vld3_lane_f32_ptr(__transfersize(3) float32_t const * ptr, float32x2x3_t * src, __constrange(0,1) int lane); \/\/ VLD3.32 {d0[0], d1[0], d2[0]}, [r0] variable 937 poly8x8x3_t vld3_lane_p8_ptr(__transfersize(3) poly8_t const * ptr, poly8x8x3_t * src, __constrange(0,7) int lane); \/\/VLD3.8 {d0[0], d1[0], d2[0]}, [r0] variable 938 poly16x4x3_t vld3_lane_p16_ptr(__transfersize(3) poly16_t const * ptr, poly16x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 939 uint16x8x4_t vld4q_lane_u16_ptr(__transfersize(4) uint16_t const * ptr, uint16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 940 uint32x4x4_t vld4q_lane_u32_ptr(__transfersize(4) uint32_t const * ptr, uint32x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 941 int16x8x4_t vld4q_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 942 int32x4x4_t vld4q_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 943 float16x8x4_t vld4q_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 944 float32x4x4_t vld4q_lane_f32_ptr(__transfersize(4) float32_t const * ptr, float32x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 945 poly16x8x4_t vld4q_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 946 uint8x8x4_t vld4_lane_u8_ptr(__transfersize(4) uint8_t const * ptr, uint8x8x4_t * src, __constrange(0,7) int lane); \/\/VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 947 uint16x4x4_t vld4_lane_u16_ptr(__transfersize(4) uint16_t const * ptr, uint16x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 948 uint32x2x4_t vld4_lane_u32_ptr(__transfersize(4) uint32_t const * ptr, uint32x2x4_t * src, __constrange(0,1) int lane); \/\/ VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 949 int8x8x4_t vld4_lane_s8_ptr(__transfersize(4) int8_t const * ptr, int8x8x4_t * src, __constrange(0,7) int lane); \/\/VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 950 int16x4x4_t vld4_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x4x4_t * src, __constrange(0,3) int lane); \/\/VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 951 int32x2x4_t vld4_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x2x4_t * src, __constrange(0,1) int lane); \/\/VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 952 float16x4x4_t vld4_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 953 float32x2x4_t vld4_lane_f32_ptr(__transfersize(4) float32_t const * ptr, float32x2x4_t * src, __constrange(0,1) int lane); \/\/ VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 954 poly8x8x4_t vld4_lane_p8_ptr(__transfersize(4) poly8_t const * ptr, poly8x8x4_t * src, __constrange(0,7) int lane); \/\/VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 955 poly16x4x4_t vld4_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1024 void vst2q_lane_u16_ptr(__transfersize(2) uint16_t * ptr, uint16x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.16{d0[0], d2[0]}, [r0] variable 1025 void vst2q_lane_u32_ptr(__transfersize(2) uint32_t * ptr, uint32x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.32{d0[0], d2[0]}, [r0] variable 1026 void vst2q_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.16{d0[0], d2[0]}, [r0] variable 1027 void vst2q_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.32{d0[0], d2[0]}, [r0] variable 1028 void vst2q_lane_f16_ptr(__transfersize(2) __fp16 * ptr, float16x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.16{d0[0], d2[0]}, [r0] variable 1029 void vst2q_lane_f32_ptr(__transfersize(2) float32_t * ptr, float32x4x2_t * val, __constrange(0,3) int lane); \/\/VST2.32 {d0[0], d2[0]}, [r0] variable 1030 void vst2q_lane_p16_ptr(__transfersize(2) poly16_t * ptr, poly16x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.16{d0[0], d2[0]}, [r0] variable 1031 void vst2_lane_u8_ptr(__transfersize(2) uint8_t * ptr, uint8x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.8{d0[0], d1[0]}, [r0] variable 1032 void vst2_lane_u16_ptr(__transfersize(2) uint16_t * ptr, uint16x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.16{d0[0], d1[0]}, [r0] variable 1033 void vst2_lane_u32_ptr(__transfersize(2) uint32_t * ptr, uint32x2x2_t * val, __constrange(0,1) int lane); \/\/ VST2.32{d0[0], d1[0]}, [r0] variable 1034 void vst2_lane_s8_ptr(__transfersize(2) int8_t * ptr, int8x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.8 {d0[0],d1[0]}, [r0] variable 1035 void vst2_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.16{d0[0], d1[0]}, [r0] variable 1036 void vst2_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x2x2_t * val, __constrange(0,1) int lane); \/\/ VST2.32{d0[0], d1[0]}, [r0] variable 1037 void vst2_lane_f16_ptr(__transfersize(2) __fp16 * ptr, float16x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.16{d0[0], d1[0]}, [r0] variable 1038 void vst2_lane_f32_ptr(__transfersize(2) float32_t * ptr, float32x2x2_t * val, __constrange(0,1) int lane); \/\/ VST2.32{d0[0], d1[0]}, [r0] variable 1039 void vst2_lane_p8_ptr(__transfersize(2) poly8_t * ptr, poly8x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.8{d0[0], d1[0]}, [r0] variable 1040 void vst2_lane_p16_ptr(__transfersize(2) poly16_t * ptr, poly16x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.16{d0[0], d1[0]}, [r0] variable 1041 void vst3q_lane_u16_ptr(__transfersize(3) uint16_t * ptr, uint16x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.16{d0[0], d2[0], d4[0]}, [r0] variable 1042 void vst3q_lane_u32_ptr(__transfersize(3) uint32_t * ptr, uint32x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.32{d0[0], d2[0], d4[0]}, [r0] variable 1043 void vst3q_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.16{d0[0], d2[0], d4[0]}, [r0] variable 1044 void vst3q_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.32{d0[0], d2[0], d4[0]}, [r0] variable 1045 void vst3q_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.16{d0[0], d2[0], d4[0]}, [r0] variable 1046 void vst3q_lane_f32_ptr(__transfersize(3) float32_t * ptr, float32x4x3_t * val, __constrange(0,3) int lane); \/\/VST3.32 {d0[0], d2[0], d4[0]}, [r0] variable 1047 void vst3q_lane_p16_ptr(__transfersize(3) poly16_t * ptr, poly16x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.16{d0[0], d2[0], d4[0]}, [r0] variable 1048 void vst3_lane_u8_ptr(__transfersize(3) uint8_t * ptr, uint8x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.8{d0[0], d1[0], d2[0]}, [r0] variable 1049 void vst3_lane_u16_ptr(__transfersize(3) uint16_t * ptr, uint16x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.16{d0[0], d1[0], d2[0]}, [r0] variable 1050 void vst3_lane_u32_ptr(__transfersize(3) uint32_t * ptr, uint32x2x3_t * val, __constrange(0,1) int lane); \/\/ VST3.32{d0[0], d1[0], d2[0]}, [r0] variable 1051 void vst3_lane_s8_ptr(__transfersize(3) int8_t * ptr, int8x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.8 {d0[0],d1[0], d2[0]}, [r0] variable 1052 void vst3_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.16{d0[0], d1[0], d2[0]}, [r0] variable 1053 void vst3_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x2x3_t * val, __constrange(0,1) int lane); \/\/ VST3.32{d0[0], d1[0], d2[0]}, [r0] variable 1054 void vst3_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.16{d0[0], d1[0], d2[0]}, [r0] variable 1055 void vst3_lane_f32_ptr(__transfersize(3) float32_t * ptr, float32x2x3_t * val, __constrange(0,1) int lane); \/\/ VST3.32{d0[0], d1[0], d2[0]}, [r0] variable 1056 void vst3_lane_p8_ptr(__transfersize(3) poly8_t * ptr, poly8x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.8{d0[0], d1[0], d2[0]}, [r0] variable 1057 void vst3_lane_p16_ptr(__transfersize(3) poly16_t * ptr, poly16x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.16{d0[0], d1[0], d2[0]}, [r0] variable 1058 void vst4q_lane_u16_ptr(__transfersize(4) uint16_t * ptr, uint16x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1059 void vst4q_lane_u32_ptr(__transfersize(4) uint32_t * ptr, uint32x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.32{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1060 void vst4q_lane_s16_ptr(__transfersize(4) int16_t * ptr, int16x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1061 void vst4q_lane_s32_ptr(__transfersize(4) int32_t * ptr, int32x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.32{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1062 void vst4q_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1063 void vst4q_lane_f32_ptr(__transfersize(4) float32_t * ptr, float32x4x4_t * val, __constrange(0,3) int lane); \/\/VST4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1064 void vst4q_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1065 void vst4_lane_u8_ptr(__transfersize(4) uint8_t * ptr, uint8x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.8{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1066 void vst4_lane_u16_ptr(__transfersize(4) uint16_t * ptr, uint16x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1067 void vst4_lane_u32_ptr(__transfersize(4) uint32_t * ptr, uint32x2x4_t * val, __constrange(0,1) int lane); \/\/ VST4.32{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1068 void vst4_lane_s8_ptr(__transfersize(4) int8_t * ptr, int8x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.8 {d0[0],d1[0], d2[0], d3[0]}, [r0] variable 1069 void vst4_lane_s16_ptr(__transfersize(4) int16_t * ptr, int16x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1070 void vst4_lane_s32_ptr(__transfersize(4) int32_t * ptr, int32x2x4_t * val, __constrange(0,1) int lane); \/\/ VST4.32{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1071 void vst4_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1072 void vst4_lane_f32_ptr(__transfersize(4) float32_t * ptr, float32x2x4_t * val, __constrange(0,1) int lane); \/\/ VST4.32{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1073 void vst4_lane_p8_ptr(__transfersize(4) poly8_t * ptr, poly8x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.8{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1074 void vst4_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 4553 uint8x16_t vld1q_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x16_t vec, __constrange(0,15) int lane); \/\/ VLD1.8 {d0[0]}, [r0] variable 4556 uint16x8_t vld1q_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 4559 uint32x4_t vld1q_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 4562 uint64x2_t vld1q_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x2_t vec, __constrange(0,1) int lane); \/\/ VLD1.64 {d0}, [r0] variable 4565 int8x16_t vld1q_lane_s8(__transfersize(1) int8_t const * ptr, int8x16_t vec, __constrange(0,15) int lane); \/\/ VLD1.8 {d0[0]}, [r0] variable 4568 int16x8_t vld1q_lane_s16(__transfersize(1) int16_t const * ptr, int16x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 4571 int32x4_t vld1q_lane_s32(__transfersize(1) int32_t const * ptr, int32x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 4576 float32x4_t vld1q_lane_f32(__transfersize(1) float32_t const * ptr, float32x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 4584 int64x2_t vld1q_lane_s64(__transfersize(1) int64_t const * ptr, int64x2_t vec, __constrange(0,1) int lane); \/\/ VLD1.64 {d0}, [r0] variable 4587 poly8x16_t vld1q_lane_p8(__transfersize(1) poly8_t const * ptr, poly8x16_t vec, __constrange(0,15) int lane); \/\/ VLD1.8 {d0[0]}, [r0] variable 4590 poly16x8_t vld1q_lane_p16(__transfersize(1) poly16_t const * ptr, poly16x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 4695 void vst1q_lane_u8(__transfersize(1) uint8_t * ptr, uint8x16_t val, __constrange(0,15) int lane); \/\/ VST1.8 {d0[0]}, [r0] variable 4698 void vst1q_lane_u16(__transfersize(1) uint16_t * ptr, uint16x8_t val, __constrange(0,7) int lane); \/\/ VST1.16 {d0[0]}, [r0] variable 4701 void vst1q_lane_u32(__transfersize(1) uint32_t * ptr, uint32x4_t val, __constrange(0,3) int lane); \/\/ VST1.32 {d0[0]}, [r0] variable 4704 void vst1q_lane_u64(__transfersize(1) uint64_t * ptr, uint64x2_t val, __constrange(0,1) int lane); \/\/ VST1.64 {d0}, [r0] variable 4707 void vst1q_lane_s8(__transfersize(1) int8_t * ptr, int8x16_t val, __constrange(0,15) int lane); \/\/ VST1.8 {d0[0]}, [r0] variable 4710 void vst1q_lane_s16(__transfersize(1) int16_t * ptr, int16x8_t val, __constrange(0,7) int lane); \/\/ VST1.16 {d0[0]}, [r0] variable 4713 void vst1q_lane_s32(__transfersize(1) int32_t * ptr, int32x4_t val, __constrange(0,3) int lane); \/\/ VST1.32 {d0[0]}, [r0] variable 4716 void vst1q_lane_s64(__transfersize(1) int64_t * ptr, int64x2_t val, __constrange(0,1) int lane); \/\/ VST1.64 {d0}, [r0] variable 4719 void vst1q_lane_f16(__transfersize(1) __fp16 * ptr, float16x8_t val, __constrange(0,7) int lane); \/\/ VST1.16 {d0[0]}, [r0] variable 4722 void vst1q_lane_f32(__transfersize(1) float32_t * ptr, float32x4_t val, __constrange(0,3) int lane); \/\/ VST1.32 {d0[0]}, [r0] variable 4730 void vst1q_lane_p8(__transfersize(1) poly8_t * ptr, poly8x16_t val, __constrange(0,15) int lane); \/\/ VST1.8 {d0[0]}, [r0] variable 4733 void vst1q_lane_p16(__transfersize(1) poly16_t * ptr, poly16x8_t val, __constrange(0,7) int lane); \/\/ VST1.16 {d0[0]}, [r0] variable 5711 int8x8x2_t vld2_lane_s8_ptr(__transfersize(2) int8_t const * ptr, int8x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.8 {d0[0], d1[0]}, [r0] variable 5715 int16x4x2_t vld2_lane_s16_ptr(__transfersize(2) int16_t const * ptr, int16x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.16 {d0[0], d1[0]}, [r0] variable 5719 int32x2x2_t vld2_lane_s32_ptr(__transfersize(2) int32_t const * ptr, int32x2x2_t * src, __constrange(0,1) int lane); \/\/ VLD2.32 {d0[0], d1[0]}, [r0] variable 5725 float32x2x2_t vld2_lane_f32_ptr(__transfersize(2) float32_t const * ptr, float32x2x2_t * src,__constrange(0,1) int lane); \/\/ VLD2.32 {d0[0], d1[0]}, [r0] variable 5729 poly8x8x2_t vld2_lane_p8_ptr(__transfersize(2) poly8_t const * ptr, poly8x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.8 {d0[0], d1[0]}, [r0] variable 5733 poly16x4x2_t vld2_lane_p16_ptr(__transfersize(2) poly16_t const * ptr, poly16x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.16 {d0[0], d1[0]}, [r0] variable 5785 float16x8x3_t vld3q_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 5800 poly16x8x3_t vld3q_lane_p16_ptr(__transfersize(3) poly16_t const * ptr, poly16x8x3_t * src,__constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 5836 int8x8x3_t vld3_lane_s8_ptr(__transfersize(3) int8_t const * ptr, int8x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.8 {d0[0], d1[0], d2[0]}, [r0] variable 5839 int16x4x3_t vld3_lane_s16_ptr(__transfersize(3) int16_t const * ptr, int16x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 5842 int32x2x3_t vld3_lane_s32_ptr(__transfersize(3) int32_t const * ptr, int32x2x3_t * src, __constrange(0,1) int lane); \/\/ VLD3.32 {d0[0], d1[0], d2[0]}, [r0] variable 5845 float16x4x3_t vld3_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 5893 int16x8x4_t vld4q_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 5897 int32x4x4_t vld4q_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 5901 float16x8x4_t vld4q_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 5917 poly16x8x4_t vld4q_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x8x4_t * src,__constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 5957 int8x8x4_t vld4_lane_s8_ptr(__transfersize(4) int8_t const * ptr, int8x8x4_t * src, __constrange(0,7) int lane); variable 5961 int16x4x4_t vld4_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x4x4_t * src, __constrange(0,3) int lane); variable 5965 int32x2x4_t vld4_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x2x4_t * src, __constrange(0,1) int lane); variable 5969 float16x4x4_t vld4_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x4x4_t * src, __constrange(0,3) int lane); variable 5981 poly8x8x4_t vld4_lane_p8_ptr(__transfersize(4) poly8_t const * ptr, poly8x8x4_t * src, __constrange(0,7) int lane); variable 5985 poly16x4x4_t vld4_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x4x4_t * src, __constrange(0,3) int lane); variable 6588 void vst2q_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x8x2_t * val, __constrange(0,7) int lane); variable 6592 void vst2q_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x4x2_t * val, __constrange(0,3) int lane); variable 6596 void vst2q_lane_f16_ptr(__transfersize(2) __fp16 * ptr, float16x8x2_t * val, __constrange(0,7) int lane); variable 6608 void vst2q_lane_p16_ptr(__transfersize(2) poly16_t * ptr, poly16x8x2_t * val, __constrange(0,7) int lane); variable 6612 void vst2_lane_u16_ptr(__transfersize(2) uint16_t * ptr, uint16x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.16 {d0[0], d1[0]}, [r0] variable 6616 void vst2_lane_u32_ptr(__transfersize(2) uint32_t * ptr, uint32x2x2_t * val, __constrange(0,1) int lane); \/\/ VST2.32 {d0[0], d1[0]}, [r0] variable 6620 void vst2_lane_s8_ptr(__transfersize(2) int8_t * ptr, int8x8x2_t * val, __constrange(0,7) int lane); variable 6624 void vst2_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x4x2_t * val, __constrange(0,3) int lane); variable 6628 void vst2_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x2x2_t * val, __constrange(0,1) int lane); variable 6634 void vst2_lane_f32_ptr(__transfersize(2) float32_t * ptr, float32x2x2_t * val, __constrange(0,1) int lane); \/\/ VST2.32 {d0[0], d1[0]}, [r0] variable 6662 void vst3q_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x8x3_t * val, __constrange(0,7) int lane); variable 6666 void vst3q_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x4x3_t * val, __constrange(0,3) int lane); variable 6670 void vst3q_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x8x3_t * val, __constrange(0,7) int lane); variable 6683 void vst3_lane_s8_ptr(__transfersize(3) int8_t * ptr, int8x8x3_t * val, __constrange(0,7) int lane); variable 6687 void vst3_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x4x3_t * val, __constrange(0,3) int lane); variable 6691 void vst3_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x2x3_t * val, __constrange(0,1) int lane); variable 6695 void vst3_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x4x3_t * val, __constrange(0,3) int lane); variable 6699 void vst3_lane_f32_ptr(__transfersize(3) float32_t * ptr, float32x2x3_t * val, __constrange(0,1) int lane); variable 6703 void vst3_lane_p8_ptr(__transfersize(3) poly8_t * ptr, poly8x8x3_t * val, __constrange(0,7) int lane); variable 6707 void vst3_lane_p16_ptr(__transfersize(3) poly16_t * ptr, poly16x4x3_t * val, __constrange(0,3) int lane); variable 6729 void vst4q_lane_s16_ptr(__transfersize(4) int16_t * ptr, int16x8x4_t * val, __constrange(0,7) int lane); variable 6733 void vst4q_lane_s32_ptr(__transfersize(4) int32_t * ptr, int32x4x4_t * val, __constrange(0,3) int lane); variable 6737 void vst4q_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x8x4_t * val, __constrange(0,7) int lane); variable 6751 void vst4q_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x8x4_t * val, __constrange(0,7) int lane); variable 6795 void vst4_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x4x4_t * val, __constrange(0,3) int lane); variable 6802 void vst4_lane_p8_ptr(__transfersize(4) poly8_t * ptr, poly8x8x4_t * val, __constrange(0,7) int lane); variable 6806 void vst4_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x4x4_t * val, __constrange(0,3) int lane); variable [all...] |