1 /* 2 * 3 * Bluetooth low-complexity, subband codec (SBC) library 4 * 5 * Copyright (C) 2008-2010 Nokia Corporation 6 * Copyright (C) 2004-2010 Marcel Holtmann <marcel (at) holtmann.org> 7 * Copyright (C) 2004-2005 Henryk Ploetz <henryk (at) ploetzli.ch> 8 * Copyright (C) 2005-2006 Brad Midgley <bmidgley (at) xmission.com> 9 * 10 * 11 * This library is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU Lesser General Public 13 * License as published by the Free Software Foundation; either 14 * version 2.1 of the License, or (at your option) any later version. 15 * 16 * This library is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * Lesser General Public License for more details. 20 * 21 * You should have received a copy of the GNU Lesser General Public 22 * License along with this library; if not, write to the Free Software 23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 24 * 25 */ 26 27 #include <stdint.h> 28 #include <limits.h> 29 #include <string.h> 30 #include "sbc.h" 31 #include "sbc_math.h" 32 #include "sbc_tables.h" 33 34 #include "sbc_primitives.h" 35 #include "sbc_primitives_mmx.h" 36 #include "sbc_primitives_iwmmxt.h" 37 #include "sbc_primitives_neon.h" 38 #include "sbc_primitives_armv6.h" 39 40 /* 41 * A reference C code of analysis filter with SIMD-friendly tables 42 * reordering and code layout. This code can be used to develop platform 43 * specific SIMD optimizations. Also it may be used as some kind of test 44 * for compiler autovectorization capabilities (who knows, if the compiler 45 * is very good at this stuff, hand optimized assembly may be not strictly 46 * needed for some platform). 47 * 48 * Note: It is also possible to make a simple variant of analysis filter, 49 * which needs only a single constants table without taking care about 50 * even/odd cases. This simple variant of filter can be implemented without 51 * input data permutation. The only thing that would be lost is the 52 * possibility to use pairwise SIMD multiplications. But for some simple 53 * CPU cores without SIMD extensions it can be useful. If anybody is 54 * interested in implementing such variant of a filter, sourcecode from 55 * bluez versions 4.26/4.27 can be used as a reference and the history of 56 * the changes in git repository done around that time may be worth checking. 57 */ 58 59 static inline void sbc_analyze_four_simd(const int16_t *in, int32_t *out, 60 const FIXED_T *consts) 61 { 62 FIXED_A t1[4]; 63 FIXED_T t2[4]; 64 int hop = 0; 65 66 /* rounding coefficient */ 67 t1[0] = t1[1] = t1[2] = t1[3] = 68 (FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1); 69 70 /* low pass polyphase filter */ 71 for (hop = 0; hop < 40; hop += 8) { 72 t1[0] += (FIXED_A) in[hop] * consts[hop]; 73 t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; 74 t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; 75 t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; 76 t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; 77 t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; 78 t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; 79 t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; 80 } 81 82 /* scaling */ 83 t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE; 84 t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE; 85 t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE; 86 t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE; 87 88 /* do the cos transform */ 89 t1[0] = (FIXED_A) t2[0] * consts[40 + 0]; 90 t1[0] += (FIXED_A) t2[1] * consts[40 + 1]; 91 t1[1] = (FIXED_A) t2[0] * consts[40 + 2]; 92 t1[1] += (FIXED_A) t2[1] * consts[40 + 3]; 93 t1[2] = (FIXED_A) t2[0] * consts[40 + 4]; 94 t1[2] += (FIXED_A) t2[1] * consts[40 + 5]; 95 t1[3] = (FIXED_A) t2[0] * consts[40 + 6]; 96 t1[3] += (FIXED_A) t2[1] * consts[40 + 7]; 97 98 t1[0] += (FIXED_A) t2[2] * consts[40 + 8]; 99 t1[0] += (FIXED_A) t2[3] * consts[40 + 9]; 100 t1[1] += (FIXED_A) t2[2] * consts[40 + 10]; 101 t1[1] += (FIXED_A) t2[3] * consts[40 + 11]; 102 t1[2] += (FIXED_A) t2[2] * consts[40 + 12]; 103 t1[2] += (FIXED_A) t2[3] * consts[40 + 13]; 104 t1[3] += (FIXED_A) t2[2] * consts[40 + 14]; 105 t1[3] += (FIXED_A) t2[3] * consts[40 + 15]; 106 107 out[0] = t1[0] >> 108 (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); 109 out[1] = t1[1] >> 110 (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); 111 out[2] = t1[2] >> 112 (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); 113 out[3] = t1[3] >> 114 (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); 115 } 116 117 static inline void sbc_analyze_eight_simd(const int16_t *in, int32_t *out, 118 const FIXED_T *consts) 119 { 120 FIXED_A t1[8]; 121 FIXED_T t2[8]; 122 int i, hop; 123 124 /* rounding coefficient */ 125 t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 126 (FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1); 127 128 /* low pass polyphase filter */ 129 for (hop = 0; hop < 80; hop += 16) { 130 t1[0] += (FIXED_A) in[hop] * consts[hop]; 131 t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; 132 t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; 133 t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; 134 t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; 135 t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; 136 t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; 137 t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; 138 t1[4] += (FIXED_A) in[hop + 8] * consts[hop + 8]; 139 t1[4] += (FIXED_A) in[hop + 9] * consts[hop + 9]; 140 t1[5] += (FIXED_A) in[hop + 10] * consts[hop + 10]; 141 t1[5] += (FIXED_A) in[hop + 11] * consts[hop + 11]; 142 t1[6] += (FIXED_A) in[hop + 12] * consts[hop + 12]; 143 t1[6] += (FIXED_A) in[hop + 13] * consts[hop + 13]; 144 t1[7] += (FIXED_A) in[hop + 14] * consts[hop + 14]; 145 t1[7] += (FIXED_A) in[hop + 15] * consts[hop + 15]; 146 } 147 148 /* scaling */ 149 t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE; 150 t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE; 151 t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE; 152 t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE; 153 t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE; 154 t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE; 155 t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE; 156 t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE; 157 158 159 /* do the cos transform */ 160 t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0; 161 162 for (i = 0; i < 4; i++) { 163 t1[0] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 0]; 164 t1[0] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 1]; 165 t1[1] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 2]; 166 t1[1] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 3]; 167 t1[2] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 4]; 168 t1[2] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 5]; 169 t1[3] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 6]; 170 t1[3] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 7]; 171 t1[4] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 8]; 172 t1[4] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 9]; 173 t1[5] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 10]; 174 t1[5] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 11]; 175 t1[6] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 12]; 176 t1[6] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 13]; 177 t1[7] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 14]; 178 t1[7] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 15]; 179 } 180 181 for (i = 0; i < 8; i++) 182 out[i] = t1[i] >> 183 (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS); 184 } 185 186 static inline void sbc_analyze_4b_4s_simd(int16_t *x, 187 int32_t *out, int out_stride) 188 { 189 /* Analyze blocks */ 190 sbc_analyze_four_simd(x + 12, out, analysis_consts_fixed4_simd_odd); 191 out += out_stride; 192 sbc_analyze_four_simd(x + 8, out, analysis_consts_fixed4_simd_even); 193 out += out_stride; 194 sbc_analyze_four_simd(x + 4, out, analysis_consts_fixed4_simd_odd); 195 out += out_stride; 196 sbc_analyze_four_simd(x + 0, out, analysis_consts_fixed4_simd_even); 197 } 198 199 static inline void sbc_analyze_4b_8s_simd(int16_t *x, 200 int32_t *out, int out_stride) 201 { 202 /* Analyze blocks */ 203 sbc_analyze_eight_simd(x + 24, out, analysis_consts_fixed8_simd_odd); 204 out += out_stride; 205 sbc_analyze_eight_simd(x + 16, out, analysis_consts_fixed8_simd_even); 206 out += out_stride; 207 sbc_analyze_eight_simd(x + 8, out, analysis_consts_fixed8_simd_odd); 208 out += out_stride; 209 sbc_analyze_eight_simd(x + 0, out, analysis_consts_fixed8_simd_even); 210 } 211 212 static inline int16_t unaligned16_be(const uint8_t *ptr) 213 { 214 return (int16_t) ((ptr[0] << 8) | ptr[1]); 215 } 216 217 static inline int16_t unaligned16_le(const uint8_t *ptr) 218 { 219 return (int16_t) (ptr[0] | (ptr[1] << 8)); 220 } 221 222 /* 223 * Internal helper functions for input data processing. In order to get 224 * optimal performance, it is important to have "nsamples", "nchannels" 225 * and "big_endian" arguments used with this inline function as compile 226 * time constants. 227 */ 228 229 static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s4_internal( 230 int position, 231 const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], 232 int nsamples, int nchannels, int big_endian) 233 { 234 /* handle X buffer wraparound */ 235 if (position < nsamples) { 236 if (nchannels > 0) 237 memcpy(&X[0][SBC_X_BUFFER_SIZE - 40], &X[0][position], 238 36 * sizeof(int16_t)); 239 if (nchannels > 1) 240 memcpy(&X[1][SBC_X_BUFFER_SIZE - 40], &X[1][position], 241 36 * sizeof(int16_t)); 242 position = SBC_X_BUFFER_SIZE - 40; 243 } 244 245 #define PCM(i) (big_endian ? \ 246 unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2)) 247 248 /* copy/permutate audio samples */ 249 while ((nsamples -= 8) >= 0) { 250 position -= 8; 251 if (nchannels > 0) { 252 int16_t *x = &X[0][position]; 253 x[0] = PCM(0 + 7 * nchannels); 254 x[1] = PCM(0 + 3 * nchannels); 255 x[2] = PCM(0 + 6 * nchannels); 256 x[3] = PCM(0 + 4 * nchannels); 257 x[4] = PCM(0 + 0 * nchannels); 258 x[5] = PCM(0 + 2 * nchannels); 259 x[6] = PCM(0 + 1 * nchannels); 260 x[7] = PCM(0 + 5 * nchannels); 261 } 262 if (nchannels > 1) { 263 int16_t *x = &X[1][position]; 264 x[0] = PCM(1 + 7 * nchannels); 265 x[1] = PCM(1 + 3 * nchannels); 266 x[2] = PCM(1 + 6 * nchannels); 267 x[3] = PCM(1 + 4 * nchannels); 268 x[4] = PCM(1 + 0 * nchannels); 269 x[5] = PCM(1 + 2 * nchannels); 270 x[6] = PCM(1 + 1 * nchannels); 271 x[7] = PCM(1 + 5 * nchannels); 272 } 273 pcm += 16 * nchannels; 274 } 275 #undef PCM 276 277 return position; 278 } 279 280 static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s8_internal( 281 int position, 282 const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], 283 int nsamples, int nchannels, int big_endian) 284 { 285 /* handle X buffer wraparound */ 286 if (position < nsamples) { 287 if (nchannels > 0) 288 memcpy(&X[0][SBC_X_BUFFER_SIZE - 72], &X[0][position], 289 72 * sizeof(int16_t)); 290 if (nchannels > 1) 291 memcpy(&X[1][SBC_X_BUFFER_SIZE - 72], &X[1][position], 292 72 * sizeof(int16_t)); 293 position = SBC_X_BUFFER_SIZE - 72; 294 } 295 296 #define PCM(i) (big_endian ? \ 297 unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2)) 298 299 /* copy/permutate audio samples */ 300 while ((nsamples -= 16) >= 0) { 301 position -= 16; 302 if (nchannels > 0) { 303 int16_t *x = &X[0][position]; 304 x[0] = PCM(0 + 15 * nchannels); 305 x[1] = PCM(0 + 7 * nchannels); 306 x[2] = PCM(0 + 14 * nchannels); 307 x[3] = PCM(0 + 8 * nchannels); 308 x[4] = PCM(0 + 13 * nchannels); 309 x[5] = PCM(0 + 9 * nchannels); 310 x[6] = PCM(0 + 12 * nchannels); 311 x[7] = PCM(0 + 10 * nchannels); 312 x[8] = PCM(0 + 11 * nchannels); 313 x[9] = PCM(0 + 3 * nchannels); 314 x[10] = PCM(0 + 6 * nchannels); 315 x[11] = PCM(0 + 0 * nchannels); 316 x[12] = PCM(0 + 5 * nchannels); 317 x[13] = PCM(0 + 1 * nchannels); 318 x[14] = PCM(0 + 4 * nchannels); 319 x[15] = PCM(0 + 2 * nchannels); 320 } 321 if (nchannels > 1) { 322 int16_t *x = &X[1][position]; 323 x[0] = PCM(1 + 15 * nchannels); 324 x[1] = PCM(1 + 7 * nchannels); 325 x[2] = PCM(1 + 14 * nchannels); 326 x[3] = PCM(1 + 8 * nchannels); 327 x[4] = PCM(1 + 13 * nchannels); 328 x[5] = PCM(1 + 9 * nchannels); 329 x[6] = PCM(1 + 12 * nchannels); 330 x[7] = PCM(1 + 10 * nchannels); 331 x[8] = PCM(1 + 11 * nchannels); 332 x[9] = PCM(1 + 3 * nchannels); 333 x[10] = PCM(1 + 6 * nchannels); 334 x[11] = PCM(1 + 0 * nchannels); 335 x[12] = PCM(1 + 5 * nchannels); 336 x[13] = PCM(1 + 1 * nchannels); 337 x[14] = PCM(1 + 4 * nchannels); 338 x[15] = PCM(1 + 2 * nchannels); 339 } 340 pcm += 32 * nchannels; 341 } 342 #undef PCM 343 344 return position; 345 } 346 347 /* 348 * Input data processing functions. The data is endian converted if needed, 349 * channels are deintrleaved and audio samples are reordered for use in 350 * SIMD-friendly analysis filter function. The results are put into "X" 351 * array, getting appended to the previous data (or it is better to say 352 * prepended, as the buffer is filled from top to bottom). Old data is 353 * discarded when neededed, but availability of (10 * nrof_subbands) 354 * contiguous samples is always guaranteed for the input to the analysis 355 * filter. This is achieved by copying a sufficient part of old data 356 * to the top of the buffer on buffer wraparound. 357 */ 358 359 static int sbc_enc_process_input_4s_le(int position, 360 const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], 361 int nsamples, int nchannels) 362 { 363 if (nchannels > 1) 364 return sbc_encoder_process_input_s4_internal( 365 position, pcm, X, nsamples, 2, 0); 366 else 367 return sbc_encoder_process_input_s4_internal( 368 position, pcm, X, nsamples, 1, 0); 369 } 370 371 static int sbc_enc_process_input_4s_be(int position, 372 const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], 373 int nsamples, int nchannels) 374 { 375 if (nchannels > 1) 376 return sbc_encoder_process_input_s4_internal( 377 position, pcm, X, nsamples, 2, 1); 378 else 379 return sbc_encoder_process_input_s4_internal( 380 position, pcm, X, nsamples, 1, 1); 381 } 382 383 static int sbc_enc_process_input_8s_le(int position, 384 const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], 385 int nsamples, int nchannels) 386 { 387 if (nchannels > 1) 388 return sbc_encoder_process_input_s8_internal( 389 position, pcm, X, nsamples, 2, 0); 390 else 391 return sbc_encoder_process_input_s8_internal( 392 position, pcm, X, nsamples, 1, 0); 393 } 394 395 static int sbc_enc_process_input_8s_be(int position, 396 const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], 397 int nsamples, int nchannels) 398 { 399 if (nchannels > 1) 400 return sbc_encoder_process_input_s8_internal( 401 position, pcm, X, nsamples, 2, 1); 402 else 403 return sbc_encoder_process_input_s8_internal( 404 position, pcm, X, nsamples, 1, 1); 405 } 406 407 /* Supplementary function to count the number of leading zeros */ 408 409 static inline int sbc_clz(uint32_t x) 410 { 411 #ifdef __GNUC__ 412 return __builtin_clz(x); 413 #else 414 /* TODO: this should be replaced with something better if good 415 * performance is wanted when using compilers other than gcc */ 416 int cnt = 0; 417 while (x) { 418 cnt++; 419 x >>= 1; 420 } 421 return 32 - cnt; 422 #endif 423 } 424 425 static void sbc_calc_scalefactors( 426 int32_t sb_sample_f[16][2][8], 427 uint32_t scale_factor[2][8], 428 int blocks, int channels, int subbands) 429 { 430 int ch, sb, blk; 431 for (ch = 0; ch < channels; ch++) { 432 for (sb = 0; sb < subbands; sb++) { 433 uint32_t x = 1 << SCALE_OUT_BITS; 434 for (blk = 0; blk < blocks; blk++) { 435 int32_t tmp = fabs(sb_sample_f[blk][ch][sb]); 436 if (tmp != 0) 437 x |= tmp - 1; 438 } 439 scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) - 440 sbc_clz(x); 441 } 442 } 443 } 444 445 static int sbc_calc_scalefactors_j( 446 int32_t sb_sample_f[16][2][8], 447 uint32_t scale_factor[2][8], 448 int blocks, int subbands) 449 { 450 int blk, joint = 0; 451 int32_t tmp0, tmp1; 452 uint32_t x, y; 453 454 /* last subband does not use joint stereo */ 455 int sb = subbands - 1; 456 x = 1 << SCALE_OUT_BITS; 457 y = 1 << SCALE_OUT_BITS; 458 for (blk = 0; blk < blocks; blk++) { 459 tmp0 = fabs(sb_sample_f[blk][0][sb]); 460 tmp1 = fabs(sb_sample_f[blk][1][sb]); 461 if (tmp0 != 0) 462 x |= tmp0 - 1; 463 if (tmp1 != 0) 464 y |= tmp1 - 1; 465 } 466 scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - sbc_clz(x); 467 scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - sbc_clz(y); 468 469 /* the rest of subbands can use joint stereo */ 470 while (--sb >= 0) { 471 int32_t sb_sample_j[16][2]; 472 x = 1 << SCALE_OUT_BITS; 473 y = 1 << SCALE_OUT_BITS; 474 for (blk = 0; blk < blocks; blk++) { 475 tmp0 = sb_sample_f[blk][0][sb]; 476 tmp1 = sb_sample_f[blk][1][sb]; 477 sb_sample_j[blk][0] = ASR(tmp0, 1) + ASR(tmp1, 1); 478 sb_sample_j[blk][1] = ASR(tmp0, 1) - ASR(tmp1, 1); 479 tmp0 = fabs(tmp0); 480 tmp1 = fabs(tmp1); 481 if (tmp0 != 0) 482 x |= tmp0 - 1; 483 if (tmp1 != 0) 484 y |= tmp1 - 1; 485 } 486 scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - 487 sbc_clz(x); 488 scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - 489 sbc_clz(y); 490 x = 1 << SCALE_OUT_BITS; 491 y = 1 << SCALE_OUT_BITS; 492 for (blk = 0; blk < blocks; blk++) { 493 tmp0 = fabs(sb_sample_j[blk][0]); 494 tmp1 = fabs(sb_sample_j[blk][1]); 495 if (tmp0 != 0) 496 x |= tmp0 - 1; 497 if (tmp1 != 0) 498 y |= tmp1 - 1; 499 } 500 x = (31 - SCALE_OUT_BITS) - sbc_clz(x); 501 y = (31 - SCALE_OUT_BITS) - sbc_clz(y); 502 503 /* decide whether to use joint stereo for this subband */ 504 if ((scale_factor[0][sb] + scale_factor[1][sb]) > x + y) { 505 joint |= 1 << (subbands - 1 - sb); 506 scale_factor[0][sb] = x; 507 scale_factor[1][sb] = y; 508 for (blk = 0; blk < blocks; blk++) { 509 sb_sample_f[blk][0][sb] = sb_sample_j[blk][0]; 510 sb_sample_f[blk][1][sb] = sb_sample_j[blk][1]; 511 } 512 } 513 } 514 515 /* bitmask with the information about subbands using joint stereo */ 516 return joint; 517 } 518 519 /* 520 * Detect CPU features and setup function pointers 521 */ 522 void sbc_init_primitives(struct sbc_encoder_state *state) 523 { 524 /* Default implementation for analyze functions */ 525 state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_simd; 526 state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_simd; 527 528 /* Default implementation for input reordering / deinterleaving */ 529 state->sbc_enc_process_input_4s_le = sbc_enc_process_input_4s_le; 530 state->sbc_enc_process_input_4s_be = sbc_enc_process_input_4s_be; 531 state->sbc_enc_process_input_8s_le = sbc_enc_process_input_8s_le; 532 state->sbc_enc_process_input_8s_be = sbc_enc_process_input_8s_be; 533 534 /* Default implementation for scale factors calculation */ 535 state->sbc_calc_scalefactors = sbc_calc_scalefactors; 536 state->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j; 537 state->implementation_info = "Generic C"; 538 539 /* X86/AMD64 optimizations */ 540 #ifdef SBC_BUILD_WITH_MMX_SUPPORT 541 sbc_init_primitives_mmx(state); 542 #endif 543 544 /* ARM optimizations */ 545 #ifdef SBC_BUILD_WITH_ARMV6_SUPPORT 546 sbc_init_primitives_armv6(state); 547 #endif 548 #ifdef SBC_BUILD_WITH_IWMMXT_SUPPORT 549 sbc_init_primitives_iwmmxt(state); 550 #endif 551 #ifdef SBC_BUILD_WITH_NEON_SUPPORT 552 sbc_init_primitives_neon(state); 553 #endif 554 } 555