Cross Reference: /external/webp/src/dsp/dec

Lines Matching defs:in
4 // that can be found in the COPYING file in the root of the source
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
90   uint32x4x4_t in;
91   INIT_VECTOR4(in, zero, zero, zero, zero);
93   LOADQ_LANE_32b(in.val[0], 0);
94   LOADQ_LANE_32b(in.val[1], 0);
95   LOADQ_LANE_32b(in.val[2], 0);
96   LOADQ_LANE_32b(in.val[3], 0);
97   LOADQ_LANE_32b(in.val[0], 1);
98   LOADQ_LANE_32b(in.val[1], 1);
99   LOADQ_LANE_32b(in.val[2], 1);
100   LOADQ_LANE_32b(in.val[3], 1);
101   LOADQ_LANE_32b(in.val[0], 2);
102   LOADQ_LANE_32b(in.val[1], 2);
103   LOADQ_LANE_32b(in.val[2], 2);
104   LOADQ_LANE_32b(in.val[3], 2);
105   LOADQ_LANE_32b(in.val[0], 3);
106   LOADQ_LANE_32b(in.val[1], 3);
107   LOADQ_LANE_32b(in.val[2], 3);
108   LOADQ_LANE_32b(in.val[3], 3);
111     const uint8x16x2_t row01 = vtrnq_u8(vreinterpretq_u8_u32(in.val[0]),
112                                         vreinterpretq_u8_u32(in.val[1]));
113     const uint8x16x2_t row23 = vtrnq_u8(vreinterpretq_u8_u32(in.val[2]),
114                                         vreinterpretq_u8_u32(in.val[3]));
163   // We pack the 8x8 u-samples in the lower half of the uint8x16_t destination
187   // We pack the 8x8 u-samples in the lower half of the uint8x16_t destination
308   // p0 and q0 contain the u+v samples packed in low/high halves.
319   // The p1...q1 registers contain the u+v samples packed in low/high halves.
438 // Simple In-loop filtering (Paragraph 15.2)
587   NEEDS_FILTER(p1, p0, q0, q1, thresh, q9)     /* filter mask in q9 */         \
661 // Complex In-loop filtering (Paragraph 15.3)
975 // Technically these are unsigned but vqdmulh is only available in signed.
1030 static void TransformOne(const int16_t* in, uint8_t* dst) {
1032   INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
1040 static void TransformOne(const int16_t* in, uint8_t* dst) {
1046     "vld1.16         {q1, q2}, [%[in]]           \n"
1049     /* d2: in[0]
1050      * d3: in[8]
1051      * d4: in[4]
1052      * d5: in[12]
1056     /* q8 = {in[4], in[12]} * kC1 * 2 >> 16
1057      * q9 = {in[4], in[12]} * kC2 >> 16
1062     /* d22 = a = in[0] + in[8]
1063      * d23 = b = in[0] - in[8]
1071      * We avoided this in kC2 by pre-shifting the constant.
1072      * q8 = in[4]/[12] * kC1 >> 16
1076     /* Add {in[4], in[12]} back after the multiplication. This is handled by
1077      * adding 1 << 16 to kC1 in the libwebp C code.
1081     /* d20 = c = in[4]*kC2 - in[12]*kC1
1082      * d21 = d = in[4]*kC1 + in[12]*kC2
1118     /* d20 = c = in[4]*kC2 - in[12]*kC1
1119      * d21 = d = in[4]*kC1 + in[12]*kC2
1165     : [in] "+r"(in), [dst] "+r"(dst)  /* modified registers */
1173 static void TransformTwo(const int16_t* in, uint8_t* dst, int do_two) {
1174   TransformOne(in, dst);
1176     TransformOne(in + 16, dst + 4);
1180 static void TransformDC(const int16_t* in, uint8_t* dst) {
1181   const int16x8_t DC = vdupq_n_s16(in[0]);
1194 static void TransformWHT(const int16_t* in, int16_t* out) {
1199     const int16x4_t in00_03 = vld1_s16(in + 0);
1200     const int16x4_t in04_07 = vld1_s16(in + 4);
1201     const int16x4_t in08_11 = vld1_s16(in + 8);
1202     const int16x4_t in12_15 = vld1_s16(in + 12);
1203     const int32x4_t a0 = vaddl_s16(in00_03, in12_15);  // in[0..3] + in[12..15]
1204     const int32x4_t a1 = vaddl_s16(in04_07, in08_11);  // in[4..7] + in[8..11]
1205     const int32x4_t a2 = vsubl_s16(in04_07, in08_11);  // in[4..7] - in[8..11]
1206     const int32x4_t a3 = vsubl_s16(in00_03, in12_15);  // in[0..3] - in[12..15]
1246 static void TransformAC3(const int16_t* in, uint8_t* dst) {
1249   const int16x4_t A = vld1_dup_s16(in);
1250   const int16x4_t c4 = vdup_n_s16(MUL(in[4], kC2_full));
1251   const int16x4_t d4 = vdup_n_s16(MUL(in[4], kC1_full));
1252   const int c1 = MUL(in[1], kC2_full);
1253   const int d1 = MUL(in[1], kC1_full);
OpenGrok