arm_neon.h | 324 //Vector add: vadd -> Vr[i]:=Va[i]+Vb[i], Vr, Va, Vb have equal lane sizes. 335 //Vector long add: vaddl -> Vr[i]:=Va[i]+Vb[i], Va, Vb have equal lane sizes, result is a 128 bit vector of lanes that are twice the width. 756 uint8x16_t vld1q_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x16_t vec, __constrange(0,15) int lane); \/\/VLD1.8 {d0[0]}, [r0] variable 757 uint16x8_t vld1q_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 758 uint32x4_t vld1q_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 759 uint64x2_t vld1q_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x2_t vec, __constrange(0,1) int lane); \/\/ VLD1.64 {d0}, [r0] variable 760 int8x16_t vld1q_lane_s8(__transfersize(1) int8_t const * ptr, int8x16_t vec, __constrange(0,15) int lane); \/\/VLD1.8 {d0[0]}, [r0] variable 761 int16x8_t vld1q_lane_s16(__transfersize(1) int16_t const * ptr, int16x8_t vec, __constrange(0,7) int lane); \/\/VLD1.16 {d0[0]}, [r0] variable 762 int32x4_t vld1q_lane_s32(__transfersize(1) int32_t const * ptr, int32x4_t vec, __constrange(0,3) int lane); \/\/VLD1.32 {d0[0]}, [r0] variable 763 float32x4_t vld1q_lane_f32(__transfersize(1) float32_t const * ptr, float32x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 764 int64x2_t vld1q_lane_s64(__transfersize(1) int64_t const * ptr, int64x2_t vec, __constrange(0,1) int lane); \/\/VLD1.64 {d0}, [r0] variable 765 poly8x16_t vld1q_lane_p8(__transfersize(1) poly8_t const * ptr, poly8x16_t vec, __constrange(0,15) int lane); \/\/VLD1.8 {d0[0]}, [r0] variable 766 poly16x8_t vld1q_lane_p16(__transfersize(1) poly16_t const * ptr, poly16x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 905 uint16x8x2_t vld2q_lane_u16_ptr(__transfersize(2) uint16_t const * ptr, uint16x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.16 {d0[0], d2[0]}, [r0] variable 906 uint32x4x2_t vld2q_lane_u32_ptr(__transfersize(2) uint32_t const * ptr, uint32x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.32 {d0[0], d2[0]}, [r0] variable 907 int16x8x2_t vld2q_lane_s16_ptr(__transfersize(2) int16_t const * ptr, int16x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.16 {d0[0], d2[0]}, [r0] variable 908 int32x4x2_t vld2q_lane_s32_ptr(__transfersize(2) int32_t const * ptr, int32x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.32 {d0[0], d2[0]}, [r0] variable 909 float16x8x2_t vld2q_lane_f16_ptr(__transfersize(2) __fp16 const * ptr, float16x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.16 {d0[0], d2[0]}, [r0] variable 910 float32x4x2_t vld2q_lane_f32_ptr(__transfersize(2) float32_t const * ptr, float32x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.32 {d0[0], d2[0]}, [r0] variable 911 poly16x8x2_t vld2q_lane_p16_ptr(__transfersize(2) poly16_t const * ptr, poly16x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.16 {d0[0], d2[0]}, [r0] variable 912 uint8x8x2_t vld2_lane_u8_ptr(__transfersize(2) uint8_t const * ptr, uint8x8x2_t * src, __constrange(0,7) int lane); \/\/VLD2.8 {d0[0], d1[0]}, [r0] variable 913 uint16x4x2_t vld2_lane_u16_ptr(__transfersize(2) uint16_t const * ptr, uint16x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.16 {d0[0], d1[0]}, [r0] variable 914 uint32x2x2_t vld2_lane_u32_ptr(__transfersize(2) uint32_t const * ptr, uint32x2x2_t * src, __constrange(0,1) int lane); \/\/ VLD2.32 {d0[0], d1[0]}, [r0] variable 915 int8x8x2_t vld2_lane_s8_ptr(__transfersize(2) int8_t const * ptr, int8x8x2_t * src, __constrange(0,7) int lane); \/\/VLD2.8 {d0[0], d1[0]}, [r0] variable 916 int16x4x2_t vld2_lane_s16_ptr(__transfersize(2) int16_t const * ptr, int16x4x2_t * src, __constrange(0,3) int lane); \/\/VLD2.16 {d0[0], d1[0]}, [r0] variable 917 int32x2x2_t vld2_lane_s32_ptr(__transfersize(2) int32_t const * ptr, int32x2x2_t * src, __constrange(0,1) int lane); \/\/VLD2.32 {d0[0], d1[0]}, [r0] variable 919 float32x2x2_t vld2_lane_f32_ptr(__transfersize(2) float32_t const * ptr, float32x2x2_t * src, __constrange(0,1) int lane); \/\/ VLD2.32 {d0[0], d1[0]}, [r0] variable 920 poly8x8x2_t vld2_lane_p8_ptr(__transfersize(2) poly8_t const * ptr, poly8x8x2_t * src, __constrange(0,7) int lane); \/\/VLD2.8 {d0[0], d1[0]}, [r0] variable 921 poly16x4x2_t vld2_lane_p16_ptr(__transfersize(2) poly16_t const * ptr, poly16x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.16 {d0[0], d1[0]}, [r0] variable 922 uint16x8x3_t vld3q_lane_u16_ptr(__transfersize(3) uint16_t const * ptr, uint16x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 923 uint32x4x3_t vld3q_lane_u32_ptr(__transfersize(3) uint32_t const * ptr, uint32x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.32 {d0[0], d2[0], d4[0]}, [r0] variable 924 int16x8x3_t vld3q_lane_s16_ptr(__transfersize(3) int16_t const * ptr, int16x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 925 int32x4x3_t vld3q_lane_s32_ptr(__transfersize(3) int32_t const * ptr, int32x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.32 {d0[0], d2[0], d4[0]}, [r0] variable 926 float16x8x3_t vld3q_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 927 float32x4x3_t vld3q_lane_f32_ptr(__transfersize(3) float32_t const * ptr, float32x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.32 {d0[0], d2[0], d4[0]}, [r0] variable 928 poly16x8x3_t vld3q_lane_p16_ptr(__transfersize(3) poly16_t const * ptr, poly16x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 929 uint8x8x3_t vld3_lane_u8_ptr(__transfersize(3) uint8_t const * ptr, uint8x8x3_t * src, __constrange(0,7) int lane); \/\/VLD3.8 {d0[0], d1[0], d2[0]}, [r0] variable 930 uint16x4x3_t vld3_lane_u16_ptr(__transfersize(3) uint16_t const * ptr, uint16x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 931 uint32x2x3_t vld3_lane_u32_ptr(__transfersize(3) uint32_t const * ptr, uint32x2x3_t * src, __constrange(0,1) int lane); \/\/ VLD3.32 {d0[0], d1[0], d2[0]}, [r0] variable 932 int8x8x3_t vld3_lane_s8_ptr(__transfersize(3) int8_t const * ptr, int8x8x3_t * src, __constrange(0,7) int lane); \/\/VLD3.8 {d0[0], d1[0], d2[0]}, [r0] variable 933 int16x4x3_t vld3_lane_s16_ptr(__transfersize(3) int16_t const * ptr, int16x4x3_t * src, __constrange(0,3) int lane); \/\/VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 934 int32x2x3_t vld3_lane_s32_ptr(__transfersize(3) int32_t const * ptr, int32x2x3_t * src, __constrange(0,1) int lane); \/\/VLD3.32 {d0[0], d1[0], d2[0]}, [r0] variable 935 float16x4x3_t vld3_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 936 float32x2x3_t vld3_lane_f32_ptr(__transfersize(3) float32_t const * ptr, float32x2x3_t * src, __constrange(0,1) int lane); \/\/ VLD3.32 {d0[0], d1[0], d2[0]}, [r0] variable 937 poly8x8x3_t vld3_lane_p8_ptr(__transfersize(3) poly8_t const * ptr, poly8x8x3_t * src, __constrange(0,7) int lane); \/\/VLD3.8 {d0[0], d1[0], d2[0]}, [r0] variable 938 poly16x4x3_t vld3_lane_p16_ptr(__transfersize(3) poly16_t const * ptr, poly16x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 939 uint16x8x4_t vld4q_lane_u16_ptr(__transfersize(4) uint16_t const * ptr, uint16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 940 uint32x4x4_t vld4q_lane_u32_ptr(__transfersize(4) uint32_t const * ptr, uint32x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 941 int16x8x4_t vld4q_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 942 int32x4x4_t vld4q_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 943 float16x8x4_t vld4q_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 944 float32x4x4_t vld4q_lane_f32_ptr(__transfersize(4) float32_t const * ptr, float32x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 945 poly16x8x4_t vld4q_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 946 uint8x8x4_t vld4_lane_u8_ptr(__transfersize(4) uint8_t const * ptr, uint8x8x4_t * src, __constrange(0,7) int lane); \/\/VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 947 uint16x4x4_t vld4_lane_u16_ptr(__transfersize(4) uint16_t const * ptr, uint16x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 948 uint32x2x4_t vld4_lane_u32_ptr(__transfersize(4) uint32_t const * ptr, uint32x2x4_t * src, __constrange(0,1) int lane); \/\/ VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 949 int8x8x4_t vld4_lane_s8_ptr(__transfersize(4) int8_t const * ptr, int8x8x4_t * src, __constrange(0,7) int lane); \/\/VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 950 int16x4x4_t vld4_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x4x4_t * src, __constrange(0,3) int lane); \/\/VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 951 int32x2x4_t vld4_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x2x4_t * src, __constrange(0,1) int lane); \/\/VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 952 float16x4x4_t vld4_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 953 float32x2x4_t vld4_lane_f32_ptr(__transfersize(4) float32_t const * ptr, float32x2x4_t * src, __constrange(0,1) int lane); \/\/ VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 954 poly8x8x4_t vld4_lane_p8_ptr(__transfersize(4) poly8_t const * ptr, poly8x8x4_t * src, __constrange(0,7) int lane); \/\/VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 955 poly16x4x4_t vld4_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1024 void vst2q_lane_u16_ptr(__transfersize(2) uint16_t * ptr, uint16x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.16{d0[0], d2[0]}, [r0] variable 1025 void vst2q_lane_u32_ptr(__transfersize(2) uint32_t * ptr, uint32x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.32{d0[0], d2[0]}, [r0] variable 1026 void vst2q_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.16{d0[0], d2[0]}, [r0] variable 1027 void vst2q_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.32{d0[0], d2[0]}, [r0] variable 1028 void vst2q_lane_f16_ptr(__transfersize(2) __fp16 * ptr, float16x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.16{d0[0], d2[0]}, [r0] variable 1029 void vst2q_lane_f32_ptr(__transfersize(2) float32_t * ptr, float32x4x2_t * val, __constrange(0,3) int lane); \/\/VST2.32 {d0[0], d2[0]}, [r0] variable 1030 void vst2q_lane_p16_ptr(__transfersize(2) poly16_t * ptr, poly16x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.16{d0[0], d2[0]}, [r0] variable 1031 void vst2_lane_u8_ptr(__transfersize(2) uint8_t * ptr, uint8x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.8{d0[0], d1[0]}, [r0] variable 1032 void vst2_lane_u16_ptr(__transfersize(2) uint16_t * ptr, uint16x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.16{d0[0], d1[0]}, [r0] variable 1033 void vst2_lane_u32_ptr(__transfersize(2) uint32_t * ptr, uint32x2x2_t * val, __constrange(0,1) int lane); \/\/ VST2.32{d0[0], d1[0]}, [r0] variable 1034 void vst2_lane_s8_ptr(__transfersize(2) int8_t * ptr, int8x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.8 {d0[0],d1[0]}, [r0] variable 1035 void vst2_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.16{d0[0], d1[0]}, [r0] variable 1036 void vst2_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x2x2_t * val, __constrange(0,1) int lane); \/\/ VST2.32{d0[0], d1[0]}, [r0] variable 1037 void vst2_lane_f16_ptr(__transfersize(2) __fp16 * ptr, float16x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.16{d0[0], d1[0]}, [r0] variable 1038 void vst2_lane_f32_ptr(__transfersize(2) float32_t * ptr, float32x2x2_t * val, __constrange(0,1) int lane); \/\/ VST2.32{d0[0], d1[0]}, [r0] variable 1039 void vst2_lane_p8_ptr(__transfersize(2) poly8_t * ptr, poly8x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.8{d0[0], d1[0]}, [r0] variable 1040 void vst2_lane_p16_ptr(__transfersize(2) poly16_t * ptr, poly16x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.16{d0[0], d1[0]}, [r0] variable 1041 void vst3q_lane_u16_ptr(__transfersize(3) uint16_t * ptr, uint16x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.16{d0[0], d2[0], d4[0]}, [r0] variable 1042 void vst3q_lane_u32_ptr(__transfersize(3) uint32_t * ptr, uint32x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.32{d0[0], d2[0], d4[0]}, [r0] variable 1043 void vst3q_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.16{d0[0], d2[0], d4[0]}, [r0] variable 1044 void vst3q_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.32{d0[0], d2[0], d4[0]}, [r0] variable 1045 void vst3q_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.16{d0[0], d2[0], d4[0]}, [r0] variable 1046 void vst3q_lane_f32_ptr(__transfersize(3) float32_t * ptr, float32x4x3_t * val, __constrange(0,3) int lane); \/\/VST3.32 {d0[0], d2[0], d4[0]}, [r0] variable 1047 void vst3q_lane_p16_ptr(__transfersize(3) poly16_t * ptr, poly16x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.16{d0[0], d2[0], d4[0]}, [r0] variable 1048 void vst3_lane_u8_ptr(__transfersize(3) uint8_t * ptr, uint8x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.8{d0[0], d1[0], d2[0]}, [r0] variable 1049 void vst3_lane_u16_ptr(__transfersize(3) uint16_t * ptr, uint16x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.16{d0[0], d1[0], d2[0]}, [r0] variable 1050 void vst3_lane_u32_ptr(__transfersize(3) uint32_t * ptr, uint32x2x3_t * val, __constrange(0,1) int lane); \/\/ VST3.32{d0[0], d1[0], d2[0]}, [r0] variable 1051 void vst3_lane_s8_ptr(__transfersize(3) int8_t * ptr, int8x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.8 {d0[0],d1[0], d2[0]}, [r0] variable 1052 void vst3_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.16{d0[0], d1[0], d2[0]}, [r0] variable 1053 void vst3_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x2x3_t * val, __constrange(0,1) int lane); \/\/ VST3.32{d0[0], d1[0], d2[0]}, [r0] variable 1054 void vst3_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.16{d0[0], d1[0], d2[0]}, [r0] variable 1055 void vst3_lane_f32_ptr(__transfersize(3) float32_t * ptr, float32x2x3_t * val, __constrange(0,1) int lane); \/\/ VST3.32{d0[0], d1[0], d2[0]}, [r0] variable 1056 void vst3_lane_p8_ptr(__transfersize(3) poly8_t * ptr, poly8x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.8{d0[0], d1[0], d2[0]}, [r0] variable 1057 void vst3_lane_p16_ptr(__transfersize(3) poly16_t * ptr, poly16x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.16{d0[0], d1[0], d2[0]}, [r0] variable 1058 void vst4q_lane_u16_ptr(__transfersize(4) uint16_t * ptr, uint16x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1059 void vst4q_lane_u32_ptr(__transfersize(4) uint32_t * ptr, uint32x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.32{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1060 void vst4q_lane_s16_ptr(__transfersize(4) int16_t * ptr, int16x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1061 void vst4q_lane_s32_ptr(__transfersize(4) int32_t * ptr, int32x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.32{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1062 void vst4q_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1063 void vst4q_lane_f32_ptr(__transfersize(4) float32_t * ptr, float32x4x4_t * val, __constrange(0,3) int lane); \/\/VST4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1064 void vst4q_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1065 void vst4_lane_u8_ptr(__transfersize(4) uint8_t * ptr, uint8x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.8{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1066 void vst4_lane_u16_ptr(__transfersize(4) uint16_t * ptr, uint16x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1067 void vst4_lane_u32_ptr(__transfersize(4) uint32_t * ptr, uint32x2x4_t * val, __constrange(0,1) int lane); \/\/ VST4.32{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1068 void vst4_lane_s8_ptr(__transfersize(4) int8_t * ptr, int8x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.8 {d0[0],d1[0], d2[0], d3[0]}, [r0] variable 1069 void vst4_lane_s16_ptr(__transfersize(4) int16_t * ptr, int16x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1070 void vst4_lane_s32_ptr(__transfersize(4) int32_t * ptr, int32x2x4_t * val, __constrange(0,1) int lane); \/\/ VST4.32{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1071 void vst4_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1072 void vst4_lane_f32_ptr(__transfersize(4) float32_t * ptr, float32x2x4_t * val, __constrange(0,1) int lane); \/\/ VST4.32{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1073 void vst4_lane_p8_ptr(__transfersize(4) poly8_t * ptr, poly8x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.8{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1074 void vst4_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 4553 uint8x16_t vld1q_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x16_t vec, __constrange(0,15) int lane); \/\/ VLD1.8 {d0[0]}, [r0] variable 4556 uint16x8_t vld1q_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 4559 uint32x4_t vld1q_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 4562 uint64x2_t vld1q_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x2_t vec, __constrange(0,1) int lane); \/\/ VLD1.64 {d0}, [r0] variable 4565 int8x16_t vld1q_lane_s8(__transfersize(1) int8_t const * ptr, int8x16_t vec, __constrange(0,15) int lane); \/\/ VLD1.8 {d0[0]}, [r0] variable 4568 int16x8_t vld1q_lane_s16(__transfersize(1) int16_t const * ptr, int16x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 4571 int32x4_t vld1q_lane_s32(__transfersize(1) int32_t const * ptr, int32x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 4576 float32x4_t vld1q_lane_f32(__transfersize(1) float32_t const * ptr, float32x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 4584 int64x2_t vld1q_lane_s64(__transfersize(1) int64_t const * ptr, int64x2_t vec, __constrange(0,1) int lane); \/\/ VLD1.64 {d0}, [r0] variable 4587 poly8x16_t vld1q_lane_p8(__transfersize(1) poly8_t const * ptr, poly8x16_t vec, __constrange(0,15) int lane); \/\/ VLD1.8 {d0[0]}, [r0] variable 4590 poly16x8_t vld1q_lane_p16(__transfersize(1) poly16_t const * ptr, poly16x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 4695 void vst1q_lane_u8(__transfersize(1) uint8_t * ptr, uint8x16_t val, __constrange(0,15) int lane); \/\/ VST1.8 {d0[0]}, [r0] variable 4698 void vst1q_lane_u16(__transfersize(1) uint16_t * ptr, uint16x8_t val, __constrange(0,7) int lane); \/\/ VST1.16 {d0[0]}, [r0] variable 4701 void vst1q_lane_u32(__transfersize(1) uint32_t * ptr, uint32x4_t val, __constrange(0,3) int lane); \/\/ VST1.32 {d0[0]}, [r0] variable 4704 void vst1q_lane_u64(__transfersize(1) uint64_t * ptr, uint64x2_t val, __constrange(0,1) int lane); \/\/ VST1.64 {d0}, [r0] variable 4707 void vst1q_lane_s8(__transfersize(1) int8_t * ptr, int8x16_t val, __constrange(0,15) int lane); \/\/ VST1.8 {d0[0]}, [r0] variable 4710 void vst1q_lane_s16(__transfersize(1) int16_t * ptr, int16x8_t val, __constrange(0,7) int lane); \/\/ VST1.16 {d0[0]}, [r0] variable 4713 void vst1q_lane_s32(__transfersize(1) int32_t * ptr, int32x4_t val, __constrange(0,3) int lane); \/\/ VST1.32 {d0[0]}, [r0] variable 4716 void vst1q_lane_s64(__transfersize(1) int64_t * ptr, int64x2_t val, __constrange(0,1) int lane); \/\/ VST1.64 {d0}, [r0] variable 4719 void vst1q_lane_f16(__transfersize(1) __fp16 * ptr, float16x8_t val, __constrange(0,7) int lane); \/\/ VST1.16 {d0[0]}, [r0] variable 4722 void vst1q_lane_f32(__transfersize(1) float32_t * ptr, float32x4_t val, __constrange(0,3) int lane); \/\/ VST1.32 {d0[0]}, [r0] variable 4730 void vst1q_lane_p8(__transfersize(1) poly8_t * ptr, poly8x16_t val, __constrange(0,15) int lane); \/\/ VST1.8 {d0[0]}, [r0] variable 4733 void vst1q_lane_p16(__transfersize(1) poly16_t * ptr, poly16x8_t val, __constrange(0,7) int lane); \/\/ VST1.16 {d0[0]}, [r0] variable 5711 int8x8x2_t vld2_lane_s8_ptr(__transfersize(2) int8_t const * ptr, int8x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.8 {d0[0], d1[0]}, [r0] variable 5715 int16x4x2_t vld2_lane_s16_ptr(__transfersize(2) int16_t const * ptr, int16x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.16 {d0[0], d1[0]}, [r0] variable 5719 int32x2x2_t vld2_lane_s32_ptr(__transfersize(2) int32_t const * ptr, int32x2x2_t * src, __constrange(0,1) int lane); \/\/ VLD2.32 {d0[0], d1[0]}, [r0] variable 5725 float32x2x2_t vld2_lane_f32_ptr(__transfersize(2) float32_t const * ptr, float32x2x2_t * src,__constrange(0,1) int lane); \/\/ VLD2.32 {d0[0], d1[0]}, [r0] variable 5729 poly8x8x2_t vld2_lane_p8_ptr(__transfersize(2) poly8_t const * ptr, poly8x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.8 {d0[0], d1[0]}, [r0] variable 5733 poly16x4x2_t vld2_lane_p16_ptr(__transfersize(2) poly16_t const * ptr, poly16x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.16 {d0[0], d1[0]}, [r0] variable 5785 float16x8x3_t vld3q_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 5800 poly16x8x3_t vld3q_lane_p16_ptr(__transfersize(3) poly16_t const * ptr, poly16x8x3_t * src,__constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 5836 int8x8x3_t vld3_lane_s8_ptr(__transfersize(3) int8_t const * ptr, int8x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.8 {d0[0], d1[0], d2[0]}, [r0] variable 5839 int16x4x3_t vld3_lane_s16_ptr(__transfersize(3) int16_t const * ptr, int16x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 5842 int32x2x3_t vld3_lane_s32_ptr(__transfersize(3) int32_t const * ptr, int32x2x3_t * src, __constrange(0,1) int lane); \/\/ VLD3.32 {d0[0], d1[0], d2[0]}, [r0] variable 5845 float16x4x3_t vld3_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 5893 int16x8x4_t vld4q_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 5897 int32x4x4_t vld4q_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 5901 float16x8x4_t vld4q_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 5917 poly16x8x4_t vld4q_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x8x4_t * src,__constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 5957 int8x8x4_t vld4_lane_s8_ptr(__transfersize(4) int8_t const * ptr, int8x8x4_t * src, __constrange(0,7) int lane); variable 5961 int16x4x4_t vld4_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x4x4_t * src, __constrange(0,3) int lane); variable 5965 int32x2x4_t vld4_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x2x4_t * src, __constrange(0,1) int lane); variable 5969 float16x4x4_t vld4_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x4x4_t * src, __constrange(0,3) int lane); variable 5981 poly8x8x4_t vld4_lane_p8_ptr(__transfersize(4) poly8_t const * ptr, poly8x8x4_t * src, __constrange(0,7) int lane); variable 5985 poly16x4x4_t vld4_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x4x4_t * src, __constrange(0,3) int lane); variable 6588 void vst2q_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x8x2_t * val, __constrange(0,7) int lane); variable 6592 void vst2q_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x4x2_t * val, __constrange(0,3) int lane); variable 6596 void vst2q_lane_f16_ptr(__transfersize(2) __fp16 * ptr, float16x8x2_t * val, __constrange(0,7) int lane); variable 6608 void vst2q_lane_p16_ptr(__transfersize(2) poly16_t * ptr, poly16x8x2_t * val, __constrange(0,7) int lane); variable 6612 void vst2_lane_u16_ptr(__transfersize(2) uint16_t * ptr, uint16x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.16 {d0[0], d1[0]}, [r0] variable 6616 void vst2_lane_u32_ptr(__transfersize(2) uint32_t * ptr, uint32x2x2_t * val, __constrange(0,1) int lane); \/\/ VST2.32 {d0[0], d1[0]}, [r0] variable 6620 void vst2_lane_s8_ptr(__transfersize(2) int8_t * ptr, int8x8x2_t * val, __constrange(0,7) int lane); variable 6624 void vst2_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x4x2_t * val, __constrange(0,3) int lane); variable 6628 void vst2_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x2x2_t * val, __constrange(0,1) int lane); variable 6634 void vst2_lane_f32_ptr(__transfersize(2) float32_t * ptr, float32x2x2_t * val, __constrange(0,1) int lane); \/\/ VST2.32 {d0[0], d1[0]}, [r0] variable 6662 void vst3q_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x8x3_t * val, __constrange(0,7) int lane); variable 6666 void vst3q_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x4x3_t * val, __constrange(0,3) int lane); variable 6670 void vst3q_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x8x3_t * val, __constrange(0,7) int lane); variable 6683 void vst3_lane_s8_ptr(__transfersize(3) int8_t * ptr, int8x8x3_t * val, __constrange(0,7) int lane); variable 6687 void vst3_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x4x3_t * val, __constrange(0,3) int lane); variable 6691 void vst3_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x2x3_t * val, __constrange(0,1) int lane); variable 6695 void vst3_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x4x3_t * val, __constrange(0,3) int lane); variable 6699 void vst3_lane_f32_ptr(__transfersize(3) float32_t * ptr, float32x2x3_t * val, __constrange(0,1) int lane); variable 6703 void vst3_lane_p8_ptr(__transfersize(3) poly8_t * ptr, poly8x8x3_t * val, __constrange(0,7) int lane); variable 6707 void vst3_lane_p16_ptr(__transfersize(3) poly16_t * ptr, poly16x4x3_t * val, __constrange(0,3) int lane); variable 6729 void vst4q_lane_s16_ptr(__transfersize(4) int16_t * ptr, int16x8x4_t * val, __constrange(0,7) int lane); variable 6733 void vst4q_lane_s32_ptr(__transfersize(4) int32_t * ptr, int32x4x4_t * val, __constrange(0,3) int lane); variable 6737 void vst4q_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x8x4_t * val, __constrange(0,7) int lane); variable 6751 void vst4q_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x8x4_t * val, __constrange(0,7) int lane); variable 6795 void vst4_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x4x4_t * val, __constrange(0,3) int lane); variable 6802 void vst4_lane_p8_ptr(__transfersize(4) poly8_t * ptr, poly8x8x4_t * val, __constrange(0,7) int lane); variable 6806 void vst4_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x4x4_t * val, __constrange(0,3) int lane); variable [all...] |
arm_neon.h | 417 //Vector add: vadd -> Vr[i]:=Va[i]+Vb[i], Vr, Va, Vb have equal lane sizes. 436 //Vector long add: vaddl -> Vr[i]:=Va[i]+Vb[i], Va, Vb have equal lane sizes, result is a 128 bit vector of lanes that are twice the width. 525 //multiply lane 1214 uint8x16_t vld1q_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x16_t vec, __constrange(0,15) int lane); \/\/VLD1.8 {d0[0]}, [r0] variable 1215 uint16x8_t vld1q_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 1216 uint32x4_t vld1q_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 1217 uint64x2_t vld1q_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x2_t vec, __constrange(0,1) int lane); \/\/ VLD1.64 {d0}, [r0] variable 1218 int8x16_t vld1q_lane_s8(__transfersize(1) int8_t const * ptr, int8x16_t vec, __constrange(0,15) int lane); \/\/VLD1.8 {d0[0]}, [r0] variable 1219 int16x8_t vld1q_lane_s16(__transfersize(1) int16_t const * ptr, int16x8_t vec, __constrange(0,7) int lane); \/\/VLD1.16 {d0[0]}, [r0] variable 1220 int32x4_t vld1q_lane_s32(__transfersize(1) int32_t const * ptr, int32x4_t vec, __constrange(0,3) int lane); \/\/VLD1.32 {d0[0]}, [r0] variable 1221 float16x8_t vld1q_lane_f16(__transfersize(1) __fp16 const * ptr, float16x8_t vec, __constrange(0,7) int lane); \/\/VLD1.16 {d0[0]}, [r0] variable 1222 float32x4_t vld1q_lane_f32(__transfersize(1) float32_t const * ptr, float32x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 1223 int64x2_t vld1q_lane_s64(__transfersize(1) int64_t const * ptr, int64x2_t vec, __constrange(0,1) int lane); \/\/VLD1.64 {d0}, [r0] variable 1224 poly8x16_t vld1q_lane_p8(__transfersize(1) poly8_t const * ptr, poly8x16_t vec, __constrange(0,15) int lane); \/\/VLD1.8 {d0[0]}, [r0] variable 1225 poly16x8_t vld1q_lane_p16(__transfersize(1) poly16_t const * ptr, poly16x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 1226 uint8x8_t vld1_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x8_t vec, __constrange(0,7) int lane); \/\/VLD1.8 {d0[0]}, [r0] variable 1227 uint16x4_t vld1_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x4_t vec, __constrange(0,3) int lane); \/\/VLD1.16 {d0[0]}, [r0] variable 1228 uint32x2_t vld1_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x2_t vec, __constrange(0,1) int lane); \/\/VLD1.32 {d0[0]}, [r0] variable 1229 uint64x1_t vld1_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x1_t vec, __constrange(0,0) int lane); \/\/VLD1.64 {d0}, [r0] variable 1230 int8x8_t vld1_lane_s8(__transfersize(1) int8_t const * ptr, int8x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.8{d0[0]}, [r0] variable 1231 int16x4_t vld1_lane_s16(__transfersize(1) int16_t const * ptr, int16x4_t vec, __constrange(0,3) int lane); \/\/VLD1.16 {d0[0]}, [r0] variable 1232 int32x2_t vld1_lane_s32(__transfersize(1) int32_t const * ptr, int32x2_t vec, __constrange(0,1) int lane); \/\/VLD1.32 {d0[0]}, [r0] variable 1233 float16x4_t vld1q_lane_f16(__transfersize(1) __fp16 const * ptr, float16x4_t vec, __constrange(0,3) int lane); \/\/VLD1.16 {d0[0]}, [r0] variable 1234 float32x2_t vld1_lane_f32(__transfersize(1) float32_t const * ptr, float32x2_t vec, __constrange(0,1) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 1235 int64x1_t vld1_lane_s64(__transfersize(1) int64_t const * ptr, int64x1_t vec, __constrange(0,0) int lane); \/\/VLD1.64 {d0}, [r0] variable 1236 poly8x8_t vld1_lane_p8(__transfersize(1) poly8_t const * ptr, poly8x8_t vec, __constrange(0,7) int lane); \/\/VLD1.8 {d0[0]}, [r0] variable 1237 poly16x4_t vld1_lane_p16(__transfersize(1) poly16_t const * ptr, poly16x4_t vec, __constrange(0,3) int lane); \/\/VLD1.16 {d0[0]}, [r0] variable 1397 uint16x8x2_t vld2q_lane_u16_ptr(__transfersize(2) uint16_t const * ptr, uint16x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.16 {d0[0], d2[0]}, [r0] variable 1398 uint32x4x2_t vld2q_lane_u32_ptr(__transfersize(2) uint32_t const * ptr, uint32x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.32 {d0[0], d2[0]}, [r0] variable 1399 int16x8x2_t vld2q_lane_s16_ptr(__transfersize(2) int16_t const * ptr, int16x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.16 {d0[0], d2[0]}, [r0] variable 1400 int32x4x2_t vld2q_lane_s32_ptr(__transfersize(2) int32_t const * ptr, int32x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.32 {d0[0], d2[0]}, [r0] variable 1401 float16x8x2_t vld2q_lane_f16_ptr(__transfersize(2) __fp16 const * ptr, float16x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.16 {d0[0], d2[0]}, [r0] variable 1402 float32x4x2_t vld2q_lane_f32_ptr(__transfersize(2) float32_t const * ptr, float32x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.32 {d0[0], d2[0]}, [r0] variable 1403 poly16x8x2_t vld2q_lane_p16_ptr(__transfersize(2) poly16_t const * ptr, poly16x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.16 {d0[0], d2[0]}, [r0] variable 1404 uint8x8x2_t vld2_lane_u8_ptr(__transfersize(2) uint8_t const * ptr, uint8x8x2_t * src, __constrange(0,7) int lane); \/\/VLD2.8 {d0[0], d1[0]}, [r0] variable 1405 uint16x4x2_t vld2_lane_u16_ptr(__transfersize(2) uint16_t const * ptr, uint16x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.16 {d0[0], d1[0]}, [r0] variable 1406 uint32x2x2_t vld2_lane_u32_ptr(__transfersize(2) uint32_t const * ptr, uint32x2x2_t * src, __constrange(0,1) int lane); \/\/ VLD2.32 {d0[0], d1[0]}, [r0] variable 1407 int8x8x2_t vld2_lane_s8_ptr(__transfersize(2) int8_t const * ptr, int8x8x2_t * src, __constrange(0,7) int lane); \/\/VLD2.8 {d0[0], d1[0]}, [r0] variable 1408 int16x4x2_t vld2_lane_s16_ptr(__transfersize(2) int16_t const * ptr, int16x4x2_t * src, __constrange(0,3) int lane); \/\/VLD2.16 {d0[0], d1[0]}, [r0] variable 1409 int32x2x2_t vld2_lane_s32_ptr(__transfersize(2) int32_t const * ptr, int32x2x2_t * src, __constrange(0,1) int lane); \/\/VLD2.32 {d0[0], d1[0]}, [r0] variable 1411 float32x2x2_t vld2_lane_f32_ptr(__transfersize(2) float32_t const * ptr, float32x2x2_t * src, __constrange(0,1) int lane); \/\/ VLD2.32 {d0[0], d1[0]}, [r0] variable 1412 poly8x8x2_t vld2_lane_p8_ptr(__transfersize(2) poly8_t const * ptr, poly8x8x2_t * src, __constrange(0,7) int lane); \/\/VLD2.8 {d0[0], d1[0]}, [r0] variable 1413 poly16x4x2_t vld2_lane_p16_ptr(__transfersize(2) poly16_t const * ptr, poly16x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.16 {d0[0], d1[0]}, [r0] variable 1414 uint16x8x3_t vld3q_lane_u16_ptr(__transfersize(3) uint16_t const * ptr, uint16x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 1415 uint32x4x3_t vld3q_lane_u32_ptr(__transfersize(3) uint32_t const * ptr, uint32x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.32 {d0[0], d2[0], d4[0]}, [r0] variable 1416 int16x8x3_t vld3q_lane_s16_ptr(__transfersize(3) int16_t const * ptr, int16x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 1417 int32x4x3_t vld3q_lane_s32_ptr(__transfersize(3) int32_t const * ptr, int32x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.32 {d0[0], d2[0], d4[0]}, [r0] variable 1418 float16x8x3_t vld3q_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 1419 float32x4x3_t vld3q_lane_f32_ptr(__transfersize(3) float32_t const * ptr, float32x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.32 {d0[0], d2[0], d4[0]}, [r0] variable 1420 poly16x8x3_t vld3q_lane_p16_ptr(__transfersize(3) poly16_t const * ptr, poly16x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 1421 uint8x8x3_t vld3_lane_u8_ptr(__transfersize(3) uint8_t const * ptr, uint8x8x3_t * src, __constrange(0,7) int lane); \/\/VLD3.8 {d0[0], d1[0], d2[0]}, [r0] variable 1422 uint16x4x3_t vld3_lane_u16_ptr(__transfersize(3) uint16_t const * ptr, uint16x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 1423 uint32x2x3_t vld3_lane_u32_ptr(__transfersize(3) uint32_t const * ptr, uint32x2x3_t * src, __constrange(0,1) int lane); \/\/ VLD3.32 {d0[0], d1[0], d2[0]}, [r0] variable 1424 int8x8x3_t vld3_lane_s8_ptr(__transfersize(3) int8_t const * ptr, int8x8x3_t * src, __constrange(0,7) int lane); \/\/VLD3.8 {d0[0], d1[0], d2[0]}, [r0] variable 1425 int16x4x3_t vld3_lane_s16_ptr(__transfersize(3) int16_t const * ptr, int16x4x3_t * src, __constrange(0,3) int lane); \/\/VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 1426 int32x2x3_t vld3_lane_s32_ptr(__transfersize(3) int32_t const * ptr, int32x2x3_t * src, __constrange(0,1) int lane); \/\/VLD3.32 {d0[0], d1[0], d2[0]}, [r0] variable 1427 float16x4x3_t vld3_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 1428 float32x2x3_t vld3_lane_f32_ptr(__transfersize(3) float32_t const * ptr, float32x2x3_t * src, __constrange(0,1) int lane); \/\/ VLD3.32 {d0[0], d1[0], d2[0]}, [r0] variable 1429 poly8x8x3_t vld3_lane_p8_ptr(__transfersize(3) poly8_t const * ptr, poly8x8x3_t * src, __constrange(0,7) int lane); \/\/VLD3.8 {d0[0], d1[0], d2[0]}, [r0] variable 1430 poly16x4x3_t vld3_lane_p16_ptr(__transfersize(3) poly16_t const * ptr, poly16x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 1431 uint16x8x4_t vld4q_lane_u16_ptr(__transfersize(4) uint16_t const * ptr, uint16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1432 uint32x4x4_t vld4q_lane_u32_ptr(__transfersize(4) uint32_t const * ptr, uint32x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1433 int16x8x4_t vld4q_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1434 int32x4x4_t vld4q_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1435 float16x8x4_t vld4q_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1436 float32x4x4_t vld4q_lane_f32_ptr(__transfersize(4) float32_t const * ptr, float32x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1437 poly16x8x4_t vld4q_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1438 uint8x8x4_t vld4_lane_u8_ptr(__transfersize(4) uint8_t const * ptr, uint8x8x4_t * src, __constrange(0,7) int lane); \/\/VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1439 uint16x4x4_t vld4_lane_u16_ptr(__transfersize(4) uint16_t const * ptr, uint16x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1440 uint32x2x4_t vld4_lane_u32_ptr(__transfersize(4) uint32_t const * ptr, uint32x2x4_t * src, __constrange(0,1) int lane); \/\/ VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1441 int8x8x4_t vld4_lane_s8_ptr(__transfersize(4) int8_t const * ptr, int8x8x4_t * src, __constrange(0,7) int lane); \/\/VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1442 int16x4x4_t vld4_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x4x4_t * src, __constrange(0,3) int lane); \/\/VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1443 int32x2x4_t vld4_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x2x4_t * src, __constrange(0,1) int lane); \/\/VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1444 float16x4x4_t vld4_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1445 float32x2x4_t vld4_lane_f32_ptr(__transfersize(4) float32_t const * ptr, float32x2x4_t * src, __constrange(0,1) int lane); \/\/ VLD4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1446 poly8x8x4_t vld4_lane_p8_ptr(__transfersize(4) poly8_t const * ptr, poly8x8x4_t * src, __constrange(0,7) int lane); \/\/VLD4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1447 poly16x4x4_t vld4_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.16 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1516 void vst2q_lane_u16_ptr(__transfersize(2) uint16_t * ptr, uint16x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.16{d0[0], d2[0]}, [r0] variable 1517 void vst2q_lane_u32_ptr(__transfersize(2) uint32_t * ptr, uint32x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.32{d0[0], d2[0]}, [r0] variable 1518 void vst2q_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.16{d0[0], d2[0]}, [r0] variable 1519 void vst2q_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.32{d0[0], d2[0]}, [r0] variable 1520 void vst2q_lane_f16_ptr(__transfersize(2) __fp16 * ptr, float16x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.16{d0[0], d2[0]}, [r0] variable 1521 void vst2q_lane_f32_ptr(__transfersize(2) float32_t * ptr, float32x4x2_t * val, __constrange(0,3) int lane); \/\/VST2.32 {d0[0], d2[0]}, [r0] variable 1522 void vst2q_lane_p16_ptr(__transfersize(2) poly16_t * ptr, poly16x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.16{d0[0], d2[0]}, [r0] variable 1523 void vst2_lane_u8_ptr(__transfersize(2) uint8_t * ptr, uint8x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.8{d0[0], d1[0]}, [r0] variable 1524 void vst2_lane_u16_ptr(__transfersize(2) uint16_t * ptr, uint16x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.16{d0[0], d1[0]}, [r0] variable 1525 void vst2_lane_u32_ptr(__transfersize(2) uint32_t * ptr, uint32x2x2_t * val, __constrange(0,1) int lane); \/\/ VST2.32{d0[0], d1[0]}, [r0] variable 1526 void vst2_lane_s8_ptr(__transfersize(2) int8_t * ptr, int8x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.8 {d0[0],d1[0]}, [r0] variable 1527 void vst2_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.16{d0[0], d1[0]}, [r0] variable 1528 void vst2_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x2x2_t * val, __constrange(0,1) int lane); \/\/ VST2.32{d0[0], d1[0]}, [r0] variable 1529 void vst2_lane_f16_ptr(__transfersize(2) __fp16 * ptr, float16x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.16{d0[0], d1[0]}, [r0] variable 1530 void vst2_lane_f32_ptr(__transfersize(2) float32_t * ptr, float32x2x2_t * val, __constrange(0,1) int lane); \/\/ VST2.32{d0[0], d1[0]}, [r0] variable 1531 void vst2_lane_p8_ptr(__transfersize(2) poly8_t * ptr, poly8x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.8{d0[0], d1[0]}, [r0] variable 1532 void vst2_lane_p16_ptr(__transfersize(2) poly16_t * ptr, poly16x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.16{d0[0], d1[0]}, [r0] variable 1533 void vst3q_lane_u16_ptr(__transfersize(3) uint16_t * ptr, uint16x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.16{d0[0], d2[0], d4[0]}, [r0] variable 1534 void vst3q_lane_u32_ptr(__transfersize(3) uint32_t * ptr, uint32x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.32{d0[0], d2[0], d4[0]}, [r0] variable 1535 void vst3q_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.16{d0[0], d2[0], d4[0]}, [r0] variable 1536 void vst3q_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.32{d0[0], d2[0], d4[0]}, [r0] variable 1537 void vst3q_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.16{d0[0], d2[0], d4[0]}, [r0] variable 1538 void vst3q_lane_f32_ptr(__transfersize(3) float32_t * ptr, float32x4x3_t * val, __constrange(0,3) int lane); \/\/VST3.32 {d0[0], d2[0], d4[0]}, [r0] variable 1539 void vst3q_lane_p16_ptr(__transfersize(3) poly16_t * ptr, poly16x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.16{d0[0], d2[0], d4[0]}, [r0] variable 1540 void vst3_lane_u8_ptr(__transfersize(3) uint8_t * ptr, uint8x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.8{d0[0], d1[0], d2[0]}, [r0] variable 1541 void vst3_lane_u16_ptr(__transfersize(3) uint16_t * ptr, uint16x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.16{d0[0], d1[0], d2[0]}, [r0] variable 1542 void vst3_lane_u32_ptr(__transfersize(3) uint32_t * ptr, uint32x2x3_t * val, __constrange(0,1) int lane); \/\/ VST3.32{d0[0], d1[0], d2[0]}, [r0] variable 1543 void vst3_lane_s8_ptr(__transfersize(3) int8_t * ptr, int8x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.8 {d0[0],d1[0], d2[0]}, [r0] variable 1544 void vst3_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.16{d0[0], d1[0], d2[0]}, [r0] variable 1545 void vst3_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x2x3_t * val, __constrange(0,1) int lane); \/\/ VST3.32{d0[0], d1[0], d2[0]}, [r0] variable 1546 void vst3_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.16{d0[0], d1[0], d2[0]}, [r0] variable 1547 void vst3_lane_f32_ptr(__transfersize(3) float32_t * ptr, float32x2x3_t * val, __constrange(0,1) int lane); \/\/ VST3.32{d0[0], d1[0], d2[0]}, [r0] variable 1548 void vst3_lane_p8_ptr(__transfersize(3) poly8_t * ptr, poly8x8x3_t * val, __constrange(0,7) int lane); \/\/ VST3.8{d0[0], d1[0], d2[0]}, [r0] variable 1549 void vst3_lane_p16_ptr(__transfersize(3) poly16_t * ptr, poly16x4x3_t * val, __constrange(0,3) int lane); \/\/ VST3.16{d0[0], d1[0], d2[0]}, [r0] variable 1550 void vst4q_lane_u16_ptr(__transfersize(4) uint16_t * ptr, uint16x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1551 void vst4q_lane_u32_ptr(__transfersize(4) uint32_t * ptr, uint32x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.32{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1552 void vst4q_lane_s16_ptr(__transfersize(4) int16_t * ptr, int16x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1553 void vst4q_lane_s32_ptr(__transfersize(4) int32_t * ptr, int32x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.32{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1554 void vst4q_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1555 void vst4q_lane_f32_ptr(__transfersize(4) float32_t * ptr, float32x4x4_t * val, __constrange(0,3) int lane); \/\/VST4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1556 void vst4q_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.16{d0[0], d2[0], d4[0], d6[0]}, [r0] variable 1557 void vst4_lane_u8_ptr(__transfersize(4) uint8_t * ptr, uint8x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.8{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1558 void vst4_lane_u16_ptr(__transfersize(4) uint16_t * ptr, uint16x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1559 void vst4_lane_u32_ptr(__transfersize(4) uint32_t * ptr, uint32x2x4_t * val, __constrange(0,1) int lane); \/\/ VST4.32{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1560 void vst4_lane_s8_ptr(__transfersize(4) int8_t * ptr, int8x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.8 {d0[0],d1[0], d2[0], d3[0]}, [r0] variable 1561 void vst4_lane_s16_ptr(__transfersize(4) int16_t * ptr, int16x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1562 void vst4_lane_s32_ptr(__transfersize(4) int32_t * ptr, int32x2x4_t * val, __constrange(0,1) int lane); \/\/ VST4.32{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1563 void vst4_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1564 void vst4_lane_f32_ptr(__transfersize(4) float32_t * ptr, float32x2x4_t * val, __constrange(0,1) int lane); \/\/ VST4.32{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1565 void vst4_lane_p8_ptr(__transfersize(4) poly8_t * ptr, poly8x8x4_t * val, __constrange(0,7) int lane); \/\/ VST4.8{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 1566 void vst4_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x4x4_t * val, __constrange(0,3) int lane); \/\/ VST4.16{d0[0], d1[0], d2[0], d3[0]}, [r0] variable 9283 uint8x16_t vld1q_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x16_t vec, __constrange(0,15) int lane); \/\/ VLD1.8 {d0[0]}, [r0] variable 9286 uint16x8_t vld1q_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 9289 uint32x4_t vld1q_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 9292 uint64x2_t vld1q_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x2_t vec, __constrange(0,1) int lane); \/\/ VLD1.64 {d0}, [r0] variable 9296 int8x16_t vld1q_lane_s8(__transfersize(1) int8_t const * ptr, int8x16_t vec, __constrange(0,15) int lane); \/\/ VLD1.8 {d0[0]}, [r0] variable 9299 int16x8_t vld1q_lane_s16(__transfersize(1) int16_t const * ptr, int16x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 9302 int32x4_t vld1q_lane_s32(__transfersize(1) int32_t const * ptr, int32x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 9305 float16x8_t vld1q_lane_f16(__transfersize(1) __fp16 const * ptr, float16x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 9308 float32x4_t vld1q_lane_f32(__transfersize(1) float32_t const * ptr, float32x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 9317 int64x2_t vld1q_lane_s64(__transfersize(1) int64_t const * ptr, int64x2_t vec, __constrange(0,1) int lane); \/\/ VLD1.64 {d0}, [r0] variable 9320 poly8x16_t vld1q_lane_p8(__transfersize(1) poly8_t const * ptr, poly8x16_t vec, __constrange(0,15) int lane); \/\/ VLD1.8 {d0[0]}, [r0] variable 9323 poly16x8_t vld1q_lane_p16(__transfersize(1) poly16_t const * ptr, poly16x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 9326 uint8x8_t vld1_lane_u8(__transfersize(1) uint8_t const * ptr, uint8x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.8 {d0[0]}, [r0] variable 9335 uint16x4_t vld1_lane_u16(__transfersize(1) uint16_t const * ptr, uint16x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 9344 uint32x2_t vld1_lane_u32(__transfersize(1) uint32_t const * ptr, uint32x2_t vec, __constrange(0,1) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 9353 uint64x1_t vld1_lane_u64(__transfersize(1) uint64_t const * ptr, uint64x1_t vec, __constrange(0,0) int lane); \/\/ VLD1.64 {d0}, [r0] variable 9362 int8x8_t vld1_lane_s8(__transfersize(1) int8_t const * ptr, int8x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.8 {d0[0]}, [r0] variable 9365 int16x4_t vld1_lane_s16(__transfersize(1) int16_t const * ptr, int16x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 9368 int32x2_t vld1_lane_s32(__transfersize(1) int32_t const * ptr, int32x2_t vec, __constrange(0,1) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 9371 float16x4_t vld1_lane_f16(__transfersize(1) __fp16 const * ptr, float16x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 9374 float32x2_t vld1_lane_f32(__transfersize(1) float32_t const * ptr, float32x2_t vec, __constrange(0,1) int lane); \/\/ VLD1.32 {d0[0]}, [r0] variable 9383 int64x1_t vld1_lane_s64(__transfersize(1) int64_t const * ptr, int64x1_t vec, __constrange(0,0) int lane); \/\/ VLD1.64 {d0}, [r0] variable 9386 poly8x8_t vld1_lane_p8(__transfersize(1) poly8_t const * ptr, poly8x8_t vec, __constrange(0,7) int lane); \/\/ VLD1.8 {d0[0]}, [r0] variable 9389 poly16x4_t vld1_lane_p16(__transfersize(1) poly16_t const * ptr, poly16x4_t vec, __constrange(0,3) int lane); \/\/ VLD1.16 {d0[0]}, [r0] variable 9630 void vst1q_lane_u8(__transfersize(1) uint8_t * ptr, uint8x16_t val, __constrange(0,15) int lane); \/\/ VST1.8 {d0[0]}, [r0] variable 9633 void vst1q_lane_u16(__transfersize(1) uint16_t * ptr, uint16x8_t val, __constrange(0,7) int lane); \/\/ VST1.16 {d0[0]}, [r0] variable 9636 void vst1q_lane_u32(__transfersize(1) uint32_t * ptr, uint32x4_t val, __constrange(0,3) int lane); \/\/ VST1.32 {d0[0]}, [r0] variable 9639 void vst1q_lane_u64(__transfersize(1) uint64_t * ptr, uint64x2_t val, __constrange(0,1) int lane); \/\/ VST1.64 {d0}, [r0] variable 9642 void vst1q_lane_s8(__transfersize(1) int8_t * ptr, int8x16_t val, __constrange(0,15) int lane); \/\/ VST1.8 {d0[0]}, [r0] variable 9645 void vst1q_lane_s16(__transfersize(1) int16_t * ptr, int16x8_t val, __constrange(0,7) int lane); \/\/ VST1.16 {d0[0]}, [r0] variable 9648 void vst1q_lane_s32(__transfersize(1) int32_t * ptr, int32x4_t val, __constrange(0,3) int lane); \/\/ VST1.32 {d0[0]}, [r0] variable 9651 void vst1q_lane_s64(__transfersize(1) int64_t * ptr, int64x2_t val, __constrange(0,1) int lane); \/\/ VST1.64 {d0}, [r0] variable 9654 void vst1q_lane_f16(__transfersize(1) __fp16 * ptr, float16x8_t val, __constrange(0,7) int lane); \/\/ VST1.16 {d0[0]}, [r0] variable 9657 void vst1q_lane_f32(__transfersize(1) float32_t * ptr, float32x4_t val, __constrange(0,3) int lane); \/\/ VST1.32 {d0[0]}, [r0] variable 9665 void vst1q_lane_p8(__transfersize(1) poly8_t * ptr, poly8x16_t val, __constrange(0,15) int lane); \/\/ VST1.8 {d0[0]}, [r0] variable 9668 void vst1q_lane_p16(__transfersize(1) poly16_t * ptr, poly16x8_t val, __constrange(0,7) int lane); \/\/ VST1.16 {d0[0]}, [r0] variable 9671 void vst1_lane_u8(__transfersize(1) uint8_t * ptr, uint8x8_t val, __constrange(0,7) int lane); \/\/ VST1.8 {d0[0]}, [r0] variable 9677 void vst1_lane_u16(__transfersize(1) uint16_t * ptr, uint16x4_t val, __constrange(0,3) int lane); \/\/ VST1.16 {d0[0]}, [r0] variable 9683 void vst1_lane_u32(__transfersize(1) uint32_t * ptr, uint32x2_t val, __constrange(0,1) int lane); \/\/ VST1.32 {d0[0]}, [r0] variable 9689 void vst1_lane_u64(__transfersize(1) uint64_t * ptr, uint64x1_t val, __constrange(0,0) int lane); \/\/ VST1.64 {d0}, [r0] variable 9695 void vst1_lane_s8(__transfersize(1) int8_t * ptr, int8x8_t val, __constrange(0,7) int lane); \/\/ VST1.8 {d0[0]}, [r0] variable 9698 void vst1_lane_s16(__transfersize(1) int16_t * ptr, int16x4_t val, __constrange(0,3) int lane); \/\/ VST1.16 {d0[0]}, [r0] variable 9701 void vst1_lane_s32(__transfersize(1) int32_t * ptr, int32x2_t val, __constrange(0,1) int lane); \/\/ VST1.32 {d0[0]}, [r0] variable 9705 void vst1_lane_s64(__transfersize(1) int64_t * ptr, int64x1_t val, __constrange(0,0) int lane); \/\/ VST1.64 {d0}, [r0] variable 9709 void vst1_lane_f16(__transfersize(1) __fp16 * ptr, float16x4_t val, __constrange(0,3) int lane); \/\/ VST1.16 {d0[0]}, [r0] variable 9712 void vst1_lane_f32(__transfersize(1) float32_t * ptr, float32x2_t val, __constrange(0,1) int lane); \/\/ VST1.32 {d0[0]}, [r0] variable 9718 void vst1_lane_p8(__transfersize(1) poly8_t * ptr, poly8x8_t val, __constrange(0,7) int lane); \/\/ VST1.8 {d0[0]}, [r0] variable 9721 void vst1_lane_p16(__transfersize(1) poly16_t * ptr, poly16x4_t val, __constrange(0,3) int lane); \/\/ VST1.16 {d0[0]}, [r0] variable 10738 int8x8x2_t vld2_lane_s8_ptr(__transfersize(2) int8_t const * ptr, int8x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.8 {d0[0], d1[0]}, [r0] variable 10742 int16x4x2_t vld2_lane_s16_ptr(__transfersize(2) int16_t const * ptr, int16x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.16 {d0[0], d1[0]}, [r0] variable 10746 int32x2x2_t vld2_lane_s32_ptr(__transfersize(2) int32_t const * ptr, int32x2x2_t * src, __constrange(0,1) int lane); \/\/ VLD2.32 {d0[0], d1[0]}, [r0] variable 10752 float32x2x2_t vld2_lane_f32_ptr(__transfersize(2) float32_t const * ptr, float32x2x2_t * src,__constrange(0,1) int lane); \/\/ VLD2.32 {d0[0], d1[0]}, [r0] variable 10763 poly8x8x2_t vld2_lane_p8_ptr(__transfersize(2) poly8_t const * ptr, poly8x8x2_t * src, __constrange(0,7) int lane); \/\/ VLD2.8 {d0[0], d1[0]}, [r0] variable 10767 poly16x4x2_t vld2_lane_p16_ptr(__transfersize(2) poly16_t const * ptr, poly16x4x2_t * src, __constrange(0,3) int lane); \/\/ VLD2.16 {d0[0], d1[0]}, [r0] variable 10819 float16x8x3_t vld3q_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 10835 poly16x8x3_t vld3q_lane_p16_ptr(__transfersize(3) poly16_t const * ptr, poly16x8x3_t * src,__constrange(0,7) int lane); \/\/ VLD3.16 {d0[0], d2[0], d4[0]}, [r0] variable 10872 int8x8x3_t vld3_lane_s8_ptr(__transfersize(3) int8_t const * ptr, int8x8x3_t * src, __constrange(0,7) int lane); \/\/ VLD3.8 {d0[0], d1[0], d2[0]}, [r0] variable 10875 int16x4x3_t vld3_lane_s16_ptr(__transfersize(3) int16_t const * ptr, int16x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 10878 int32x2x3_t vld3_lane_s32_ptr(__transfersize(3) int32_t const * ptr, int32x2x3_t * src, __constrange(0,1) int lane); \/\/ VLD3.32 {d0[0], d1[0], d2[0]}, [r0] variable 10881 float16x4x3_t vld3_lane_f16_ptr(__transfersize(3) __fp16 const * ptr, float16x4x3_t * src, __constrange(0,3) int lane); \/\/ VLD3.16 {d0[0], d1[0], d2[0]}, [r0] variable 10931 int16x8x4_t vld4q_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 10935 int32x4x4_t vld4q_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x4x4_t * src, __constrange(0,3) int lane); \/\/ VLD4.32 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 10939 float16x8x4_t vld4q_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x8x4_t * src, __constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 10955 poly16x8x4_t vld4q_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x8x4_t * src,__constrange(0,7) int lane); \/\/ VLD4.16 {d0[0], d2[0], d4[0], d6[0]}, [r0] variable 10995 int8x8x4_t vld4_lane_s8_ptr(__transfersize(4) int8_t const * ptr, int8x8x4_t * src, __constrange(0,7) int lane); variable 10999 int16x4x4_t vld4_lane_s16_ptr(__transfersize(4) int16_t const * ptr, int16x4x4_t * src, __constrange(0,3) int lane); variable 11003 int32x2x4_t vld4_lane_s32_ptr(__transfersize(4) int32_t const * ptr, int32x2x4_t * src, __constrange(0,1) int lane); variable 11007 float16x4x4_t vld4_lane_f16_ptr(__transfersize(4) __fp16 const * ptr, float16x4x4_t * src, __constrange(0,3) int lane); variable 11024 poly8x8x4_t vld4_lane_p8_ptr(__transfersize(4) poly8_t const * ptr, poly8x8x4_t * src, __constrange(0,7) int lane); variable 11028 poly16x4x4_t vld4_lane_p16_ptr(__transfersize(4) poly16_t const * ptr, poly16x4x4_t * src, __constrange(0,3) int lane); variable 11623 void vst2q_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x8x2_t * val, __constrange(0,7) int lane); variable 11627 void vst2q_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x4x2_t * val, __constrange(0,3) int lane); variable 11631 void vst2q_lane_f16_ptr(__transfersize(2) __fp16 * ptr, float16x8x2_t * val, __constrange(0,7) int lane); variable 11643 void vst2q_lane_p16_ptr(__transfersize(2) poly16_t * ptr, poly16x8x2_t * val, __constrange(0,7) int lane); variable 11647 void vst2_lane_u8_ptr(__transfersize(2) uint8_t * ptr, uint8x8x2_t * val, __constrange(0,7) int lane); \/\/ VST2.8 {d0[0], d1[0]}, [r0] variable 11656 void vst2_lane_u16_ptr(__transfersize(2) uint16_t * ptr, uint16x4x2_t * val, __constrange(0,3) int lane); \/\/ VST2.16 {d0[0], d1[0]}, [r0] variable 11665 void vst2_lane_u32_ptr(__transfersize(2) uint32_t * ptr, uint32x2x2_t * val, __constrange(0,1) int lane); \/\/ VST2.32 {d0[0], d1[0]}, [r0] variable 11674 void vst2_lane_s8_ptr(__transfersize(2) int8_t * ptr, int8x8x2_t * val, __constrange(0,7) int lane); variable 11678 void vst2_lane_s16_ptr(__transfersize(2) int16_t * ptr, int16x4x2_t * val, __constrange(0,3) int lane); variable 11682 void vst2_lane_s32_ptr(__transfersize(2) int32_t * ptr, int32x2x2_t * val, __constrange(0,1) int lane); variable 11688 void vst2_lane_f32_ptr(__transfersize(2) float32_t * ptr, float32x2x2_t * val, __constrange(0,1) int lane); \/\/ VST2.32 {d0[0], d1[0]}, [r0] variable 11721 void vst3q_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x8x3_t * val, __constrange(0,7) int lane); variable 11725 void vst3q_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x4x3_t * val, __constrange(0,3) int lane); variable 11729 void vst3q_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x8x3_t * val, __constrange(0,7) int lane); variable 11742 void vst3q_lane_p16_ptr(__transfersize(3) poly16_t * ptr, poly16x8x3_t * val, __constrange(0,7) int lane); variable 11773 void vst3_lane_s8_ptr(__transfersize(3) int8_t * ptr, int8x8x3_t * val, __constrange(0,7) int lane); variable 11777 void vst3_lane_s16_ptr(__transfersize(3) int16_t * ptr, int16x4x3_t * val, __constrange(0,3) int lane); variable 11781 void vst3_lane_s32_ptr(__transfersize(3) int32_t * ptr, int32x2x3_t * val, __constrange(0,1) int lane); variable 11785 void vst3_lane_f16_ptr(__transfersize(3) __fp16 * ptr, float16x4x3_t * val, __constrange(0,3) int lane); variable 11789 void vst3_lane_f32_ptr(__transfersize(3) float32_t * ptr, float32x2x3_t * val, __constrange(0,1) int lane); variable 11799 void vst3_lane_p8_ptr(__transfersize(3) poly8_t * ptr, poly8x8x3_t * val, __constrange(0,7) int lane); variable 11803 void vst3_lane_p16_ptr(__transfersize(3) poly16_t * ptr, poly16x4x3_t * val, __constrange(0,3) int lane); variable 11825 void vst4q_lane_s16_ptr(__transfersize(4) int16_t * ptr, int16x8x4_t * val, __constrange(0,7) int lane); variable 11829 void vst4q_lane_s32_ptr(__transfersize(4) int32_t * ptr, int32x4x4_t * val, __constrange(0,3) int lane); variable 11833 void vst4q_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x8x4_t * val, __constrange(0,7) int lane); variable 11847 void vst4q_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x8x4_t * val, __constrange(0,7) int lane); variable 11890 void vst4_lane_f16_ptr(__transfersize(4) __fp16 * ptr, float16x4x4_t * val, __constrange(0,3) int lane); variable 11893 void vst4_lane_f32_ptr(__transfersize(4) float32_t * ptr, float32x2x4_t * val, __constrange(0,1) int lane); \/\/ VST4.32 {d0[0], d1[0], d2[0], d3[0]}, [r0] variable 11904 void vst4_lane_p8_ptr(__transfersize(4) poly8_t * ptr, poly8x8x4_t * val, __constrange(0,7) int lane); variable 11908 void vst4_lane_p16_ptr(__transfersize(4) poly16_t * ptr, poly16x4x4_t * val, __constrange(0,3) int lane); variable [all...] |