Lines Matching defs:in
4 // that can be found in the COPYING file in the root of the source
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
27 // This code is pretty much the same as TransformOne in the dec_neon.c, except
120 const int16_t* in, uint8_t* dst) {
122 INIT_VECTOR2(rows, vld1q_s16(in + 0), vld1q_s16(in + 8));
131 const int16_t* in, uint8_t* dst) {
136 "vld1.16 {q1, q2}, [%[in]] \n"
139 // d2: in[0]
140 // d3: in[8]
141 // d4: in[4]
142 // d5: in[12]
145 // q8 = {in[4], in[12]} * kC1 * 2 >> 16
146 // q9 = {in[4], in[12]} * kC2 >> 16
150 // d22 = a = in[0] + in[8]
151 // d23 = b = in[0] - in[8]
155 // q8 = in[4]/[12] * kC1 >> 16
158 // Add {in[4], in[12]} back after the multiplication.
161 // d20 = c = in[4]*kC2 - in[12]*kC1
162 // d21 = d = in[4]*kC1 + in[12]*kC2
193 // d20 = c = in[4]*kC2 - in[12]*kC1
194 // d21 = d = in[4]*kC1 + in[12]*kC2
238 : [in] "+r"(in), [dst] "+r"(dst) // modified registers
247 const int16_t* in, uint8_t* dst, int do_two) {
248 ITransformOne(ref, in, dst);
250 ITransformOne(ref + 4, in + 16, dst + 4);
370 // load src into q4, q5 in high half
376 // load ref into q6, q7 in high half
382 // Pack the high values in to q4 and q6
400 // Transpose. Register dN is the same as dN in C
485 int16x4x4_t in;
486 INIT_VECTOR4(in, zero, zero, zero, zero);
487 LOAD_LANE_16b(in.val[0], 0);
488 LOAD_LANE_16b(in.val[1], 0);
489 LOAD_LANE_16b(in.val[2], 0);
490 LOAD_LANE_16b(in.val[3], 0);
491 LOAD_LANE_16b(in.val[0], 1);
492 LOAD_LANE_16b(in.val[1], 1);
493 LOAD_LANE_16b(in.val[2], 1);
494 LOAD_LANE_16b(in.val[3], 1);
495 LOAD_LANE_16b(in.val[0], 2);
496 LOAD_LANE_16b(in.val[1], 2);
497 LOAD_LANE_16b(in.val[2], 2);
498 LOAD_LANE_16b(in.val[3], 2);
499 LOAD_LANE_16b(in.val[0], 3);
500 LOAD_LANE_16b(in.val[1], 3);
501 LOAD_LANE_16b(in.val[2], 3);
502 LOAD_LANE_16b(in.val[3], 3);
505 // a0 = in[0 * 16] + in[2 * 16]
506 // a1 = in[1 * 16] + in[3 * 16]
507 // a2 = in[1 * 16] - in[3 * 16]
508 // a3 = in[0 * 16] - in[2 * 16]
509 const int32x4_t a0 = vaddl_s16(in.val[0], in.val[2]);
510 const int32x4_t a1 = vaddl_s16(in.val[1], in.val[3]);
511 const int32x4_t a2 = vsubl_s16(in.val[1], in.val[3]);
512 const int32x4_t a3 = vsubl_s16(in.val[0], in.val[2]);
563 // in the inner rows to restore the source order of differences,
567 // restore source order in the columns containing differences.
577 // a0 = in[0] + in[2] | a1 = in[1] + in[3]
579 // a3 = in[0] - in[2] | a2 = in[1] - in[3]
616 // Calculate the weighted sum of the rows in 'b'.
706 // {a0, a1} = {in[0] + in[2], in[1] + in[3]}
709 // {a3, a2} = {in[0] - in[2], in[1] - in[3]}
757 // These are still in 01 45 23 67 order. We fix it easily in the addition
839 // Hadamard transform needs 4 bits of extra precision (2 bits in each
924 // Horizontal sum of all four uint32_t values in 'sum'.
974 // in iOS/arm64 builds. Disable this function in those cases.
977 static int16x8_t Quantize(int16_t* const in,
985 const int16x8_t a = vld1q_s16(in + offset); // in
986 const uint16x8_t b = vreinterpretq_u16_s16(vabsq_s16(a)); // coeff = abs(in)
999 vst1q_s16(in + offset, c4);
1011 static int QuantizeBlock(int16_t in[16], int16_t out[16],
1013 const int16x8_t out0 = Quantize(in, mtx, 0);
1014 const int16x8_t out1 = Quantize(in, mtx, 8);