1 /****************************************************************************** 2 * 3 * Copyright (C) 2014 The Android Open Source Project 4 * Copyright 2003 - 2004 Open Interface North America, Inc. All rights 5 * reserved. 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at: 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 ******************************************************************************/ 20 21 /******************************************************************************* 22 $Revision: #1 $ 23 ******************************************************************************/ 24 25 /** @file 26 @ingroup codec_internal 27 */ 28 29 /**@addgroup codec_internal*/ 30 /**@{*/ 31 32 /* 33 * Performs an 8-point Type-II scaled DCT using the Arai-Agui-Nakajima 34 * factorization. The scaling factors are folded into the windowing 35 * constants. 29 adds and 5 16x32 multiplies per 8 samples. 36 */ 37 38 #include "oi_codec_sbc_private.h" 39 40 #define AAN_C4_FIX (759250125) /* S1.30 759250125 0.707107*/ 41 42 #define AAN_C6_FIX (410903207) /* S1.30 410903207 0.382683*/ 43 44 #define AAN_Q0_FIX (581104888) /* S1.30 581104888 0.541196*/ 45 46 #define AAN_Q1_FIX (1402911301) /* S1.30 1402911301 1.306563*/ 47 48 /** Scales x by y bits to the right, adding a rounding factor. 49 */ 50 #ifndef SCALE 51 #define SCALE(x, y) (((x) + (1 << ((y)-1))) >> (y)) 52 #endif 53 54 /** 55 * Default C language implementation of a 32x32->32 multiply. This function may 56 * be replaced by a platform-specific version for speed. 57 * 58 * @param u A signed 32-bit multiplicand 59 * @param v A signed 32-bit multiplier 60 61 * @return A signed 32-bit value corresponding to the 32 most significant bits 62 * of the 64-bit product of u and v. 63 */ 64 INLINE int32_t default_mul_32s_32s_hi(int32_t u, int32_t v) { 65 uint32_t u0, v0; 66 int32_t u1, v1, w1, w2, t; 67 68 u0 = u & 0xFFFF; 69 u1 = u >> 16; 70 v0 = v & 0xFFFF; 71 v1 = v >> 16; 72 t = u0 * v0; 73 t = u1 * v0 + ((uint32_t)t >> 16); 74 w1 = t & 0xFFFF; 75 w2 = t >> 16; 76 w1 = u0 * v1 + w1; 77 return u1 * v1 + w2 + (w1 >> 16); 78 } 79 80 #define MUL_32S_32S_HI(_x, _y) default_mul_32s_32s_hi(_x, _y) 81 82 #ifdef DEBUG_DCT 83 PRIVATE void float_dct2_8(float* RESTRICT out, int32_t const* RESTRICT in) { 84 #define FIX(x, bits) \ 85 (((int)floor(0.5f + ((x) * ((float)(1 << bits))))) / ((float)(1 << bits))) 86 #define FLOAT_BUTTERFLY(x, y) \ 87 x += y; \ 88 y = x - (y * 2); \ 89 OI_ASSERT(VALID_INT32(x)); \ 90 OI_ASSERT(VALID_INT32(y)); 91 #define FLOAT_MULT_DCT(K, sample) (FIX(K, 20) * sample) 92 #define FLOAT_SCALE(x, y) (((x) / (double)(1 << (y)))) 93 94 double L00, L01, L02, L03, L04, L05, L06, L07; 95 double L25; 96 97 double in0, in1, in2, in3; 98 double in4, in5, in6, in7; 99 100 in0 = FLOAT_SCALE(in[0], DCTII_8_SHIFT_IN); 101 OI_ASSERT(VALID_INT32(in0)); 102 in1 = FLOAT_SCALE(in[1], DCTII_8_SHIFT_IN); 103 OI_ASSERT(VALID_INT32(in1)); 104 in2 = FLOAT_SCALE(in[2], DCTII_8_SHIFT_IN); 105 OI_ASSERT(VALID_INT32(in2)); 106 in3 = FLOAT_SCALE(in[3], DCTII_8_SHIFT_IN); 107 OI_ASSERT(VALID_INT32(in3)); 108 in4 = FLOAT_SCALE(in[4], DCTII_8_SHIFT_IN); 109 OI_ASSERT(VALID_INT32(in4)); 110 in5 = FLOAT_SCALE(in[5], DCTII_8_SHIFT_IN); 111 OI_ASSERT(VALID_INT32(in5)); 112 in6 = FLOAT_SCALE(in[6], DCTII_8_SHIFT_IN); 113 OI_ASSERT(VALID_INT32(in6)); 114 in7 = FLOAT_SCALE(in[7], DCTII_8_SHIFT_IN); 115 OI_ASSERT(VALID_INT32(in7)); 116 117 L00 = (in0 + in7); 118 OI_ASSERT(VALID_INT32(L00)); 119 L01 = (in1 + in6); 120 OI_ASSERT(VALID_INT32(L01)); 121 L02 = (in2 + in5); 122 OI_ASSERT(VALID_INT32(L02)); 123 L03 = (in3 + in4); 124 OI_ASSERT(VALID_INT32(L03)); 125 126 L04 = (in3 - in4); 127 OI_ASSERT(VALID_INT32(L04)); 128 L05 = (in2 - in5); 129 OI_ASSERT(VALID_INT32(L05)); 130 L06 = (in1 - in6); 131 OI_ASSERT(VALID_INT32(L06)); 132 L07 = (in0 - in7); 133 OI_ASSERT(VALID_INT32(L07)); 134 135 FLOAT_BUTTERFLY(L00, L03); 136 FLOAT_BUTTERFLY(L01, L02); 137 138 L02 += L03; 139 OI_ASSERT(VALID_INT32(L02)); 140 141 L02 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L02); 142 OI_ASSERT(VALID_INT32(L02)); 143 144 FLOAT_BUTTERFLY(L00, L01); 145 146 out[0] = (float)FLOAT_SCALE(L00, DCTII_8_SHIFT_0); 147 OI_ASSERT(VALID_INT16(out[0])); 148 out[4] = (float)FLOAT_SCALE(L01, DCTII_8_SHIFT_4); 149 OI_ASSERT(VALID_INT16(out[4])); 150 151 FLOAT_BUTTERFLY(L03, L02); 152 out[6] = (float)FLOAT_SCALE(L02, DCTII_8_SHIFT_6); 153 OI_ASSERT(VALID_INT16(out[6])); 154 out[2] = (float)FLOAT_SCALE(L03, DCTII_8_SHIFT_2); 155 OI_ASSERT(VALID_INT16(out[2])); 156 157 L04 += L05; 158 OI_ASSERT(VALID_INT32(L04)); 159 L05 += L06; 160 OI_ASSERT(VALID_INT32(L05)); 161 L06 += L07; 162 OI_ASSERT(VALID_INT32(L06)); 163 164 L04 /= 2; 165 L05 /= 2; 166 L06 /= 2; 167 L07 /= 2; 168 169 L05 = FLOAT_MULT_DCT(AAN_C4_FLOAT, L05); 170 OI_ASSERT(VALID_INT32(L05)); 171 172 L25 = L06 - L04; 173 OI_ASSERT(VALID_INT32(L25)); 174 L25 = FLOAT_MULT_DCT(AAN_C6_FLOAT, L25); 175 OI_ASSERT(VALID_INT32(L25)); 176 177 L04 = FLOAT_MULT_DCT(AAN_Q0_FLOAT, L04); 178 OI_ASSERT(VALID_INT32(L04)); 179 L04 -= L25; 180 OI_ASSERT(VALID_INT32(L04)); 181 182 L06 = FLOAT_MULT_DCT(AAN_Q1_FLOAT, L06); 183 OI_ASSERT(VALID_INT32(L06)); 184 L06 -= L25; 185 OI_ASSERT(VALID_INT32(L25)); 186 187 FLOAT_BUTTERFLY(L07, L05); 188 189 FLOAT_BUTTERFLY(L05, L04); 190 out[3] = (float)(FLOAT_SCALE(L04, DCTII_8_SHIFT_3 - 1)); 191 OI_ASSERT(VALID_INT16(out[3])); 192 out[5] = (float)(FLOAT_SCALE(L05, DCTII_8_SHIFT_5 - 1)); 193 OI_ASSERT(VALID_INT16(out[5])); 194 195 FLOAT_BUTTERFLY(L07, L06); 196 out[7] = (float)(FLOAT_SCALE(L06, DCTII_8_SHIFT_7 - 1)); 197 OI_ASSERT(VALID_INT16(out[7])); 198 out[1] = (float)(FLOAT_SCALE(L07, DCTII_8_SHIFT_1 - 1)); 199 OI_ASSERT(VALID_INT16(out[1])); 200 } 201 #undef BUTTERFLY 202 #endif 203 204 /* 205 * This function calculates the AAN DCT. Its inputs are in S16.15 format, as 206 * returned by OI_SBC_Dequant. In practice, abs(in[x]) < 52429.0 / 1.38 207 * (1244918057 integer). The function it computes is an approximation to the 208 * array defined by: 209 * 210 * diag(aan_s) * AAN= C2 211 * 212 * or 213 * 214 * AAN = diag(1/aan_s) * C2 215 * 216 * where C2 is as it is defined in the comment at the head of this file, and 217 * 218 * aan_s[i] = aan_s = 1/(2*cos(i*pi/16)) with i = 1..7, aan_s[0] = 1; 219 * 220 * aan_s[i] = [ 1.000 0.510 0.541 0.601 0.707 0.900 1.307 2.563 ] 221 * 222 * The output ranges are shown as follows: 223 * 224 * Let Y[0..7] = AAN * X[0..7] 225 * 226 * Without loss of generality, assume the input vector X consists of elements 227 * between -1 and 1. The maximum possible value of a given output element occurs 228 * with some particular combination of input vector elements each of which is -1 229 * or 1. Consider the computation of Y[i]. Y[i] = sum t=0..7 of AAN[t,i]*X[i]. Y 230 * is maximized if the sign of X[i] matches the sign of AAN[t,i], ensuring a 231 * positive contribution to the sum. Equivalently, one may simply sum 232 * abs(AAN)[t,i] over t to get the maximum possible value of Y[i]. 233 * 234 * This yields approximately: 235 * [8.00 10.05 9.66 8.52 8.00 5.70 4.00 2.00] 236 * 237 * Given the maximum magnitude sensible input value of +/-37992, this yields the 238 * following vector of maximum output magnitudes: 239 * 240 * [ 303936 381820 367003 323692 303936 216555 151968 75984 ] 241 * 242 * Ultimately, these values must fit into 16 bit signed integers, so they must 243 * be scaled. A non-uniform scaling helps maximize the kept precision. The 244 * relative number of extra bits of precision maintainable with respect to the 245 * largest value is given here: 246 * 247 * [ 0 0 0 0 0 0 1 2 ] 248 * 249 */ 250 PRIVATE void dct2_8(SBC_BUFFER_T* RESTRICT out, int32_t const* RESTRICT in) { 251 #define BUTTERFLY(x, y) \ 252 x += (y); \ 253 (y) = (x) - ((y) << 1); 254 #define FIX_MULT_DCT(K, x) (MUL_32S_32S_HI(K, x) << 2) 255 256 int32_t L00, L01, L02, L03, L04, L05, L06, L07; 257 int32_t L25; 258 259 int32_t in0, in1, in2, in3; 260 int32_t in4, in5, in6, in7; 261 262 #if DCTII_8_SHIFT_IN != 0 263 in0 = SCALE(in[0], DCTII_8_SHIFT_IN); 264 in1 = SCALE(in[1], DCTII_8_SHIFT_IN); 265 in2 = SCALE(in[2], DCTII_8_SHIFT_IN); 266 in3 = SCALE(in[3], DCTII_8_SHIFT_IN); 267 in4 = SCALE(in[4], DCTII_8_SHIFT_IN); 268 in5 = SCALE(in[5], DCTII_8_SHIFT_IN); 269 in6 = SCALE(in[6], DCTII_8_SHIFT_IN); 270 in7 = SCALE(in[7], DCTII_8_SHIFT_IN); 271 #else 272 in0 = in[0]; 273 in1 = in[1]; 274 in2 = in[2]; 275 in3 = in[3]; 276 in4 = in[4]; 277 in5 = in[5]; 278 in6 = in[6]; 279 in7 = in[7]; 280 #endif 281 282 L00 = in0 + in7; 283 L01 = in1 + in6; 284 L02 = in2 + in5; 285 L03 = in3 + in4; 286 287 L04 = in3 - in4; 288 L05 = in2 - in5; 289 L06 = in1 - in6; 290 L07 = in0 - in7; 291 292 BUTTERFLY(L00, L03); 293 BUTTERFLY(L01, L02); 294 295 L02 += L03; 296 297 L02 = FIX_MULT_DCT(AAN_C4_FIX, L02); 298 299 BUTTERFLY(L00, L01); 300 301 out[0] = (int16_t)SCALE(L00, DCTII_8_SHIFT_0); 302 out[4] = (int16_t)SCALE(L01, DCTII_8_SHIFT_4); 303 304 BUTTERFLY(L03, L02); 305 out[6] = (int16_t)SCALE(L02, DCTII_8_SHIFT_6); 306 out[2] = (int16_t)SCALE(L03, DCTII_8_SHIFT_2); 307 308 L04 += L05; 309 L05 += L06; 310 L06 += L07; 311 312 L04 /= 2; 313 L05 /= 2; 314 L06 /= 2; 315 L07 /= 2; 316 317 L05 = FIX_MULT_DCT(AAN_C4_FIX, L05); 318 319 L25 = L06 - L04; 320 L25 = FIX_MULT_DCT(AAN_C6_FIX, L25); 321 322 L04 = FIX_MULT_DCT(AAN_Q0_FIX, L04); 323 L04 -= L25; 324 325 L06 = FIX_MULT_DCT(AAN_Q1_FIX, L06); 326 L06 -= L25; 327 328 BUTTERFLY(L07, L05); 329 330 BUTTERFLY(L05, L04); 331 out[3] = (int16_t)SCALE(L04, DCTII_8_SHIFT_3 - 1); 332 out[5] = (int16_t)SCALE(L05, DCTII_8_SHIFT_5 - 1); 333 334 BUTTERFLY(L07, L06); 335 out[7] = (int16_t)SCALE(L06, DCTII_8_SHIFT_7 - 1); 336 out[1] = (int16_t)SCALE(L07, DCTII_8_SHIFT_1 - 1); 337 #undef BUTTERFLY 338 339 #ifdef DEBUG_DCT 340 { 341 float float_out[8]; 342 float_dct2_8(float_out, in); 343 } 344 #endif 345 } 346 347 /**@}*/ 348