1 /* 2 * 3 * Bluetooth low-complexity, subband codec (SBC) library 4 * 5 * Copyright (C) 2008-2010 Nokia Corporation 6 * Copyright (C) 2004-2010 Marcel Holtmann <marcel (at) holtmann.org> 7 * Copyright (C) 2004-2005 Henryk Ploetz <henryk (at) ploetzli.ch> 8 * Copyright (C) 2005-2006 Brad Midgley <bmidgley (at) xmission.com> 9 * 10 * 11 * This library is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU Lesser General Public 13 * License as published by the Free Software Foundation; either 14 * version 2.1 of the License, or (at your option) any later version. 15 * 16 * This library is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * Lesser General Public License for more details. 20 * 21 * You should have received a copy of the GNU Lesser General Public 22 * License along with this library; if not, write to the Free Software 23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 24 * 25 */ 26 27 #include <stdint.h> 28 #include <limits.h> 29 #include <string.h> 30 #include "sbc.h" 31 #include "sbc_math.h" 32 #include "sbc_tables.h" 33 34 #include "sbc_primitives.h" 35 #include "sbc_primitives_mmx.h" 36 #include "sbc_primitives_neon.h" 37 #include "sbc_primitives_armv6.h" 38 39 /* 40 * A reference C code of analysis filter with SIMD-friendly tables 41 * reordering and code layout. This code can be used to develop platform 42 * specific SIMD optimizations. Also it may be used as some kind of test 43 * for compiler autovectorization capabilities (who knows, if the compiler 44 * is very good at this stuff, hand optimized assembly may be not strictly 45 * needed for some platform). 46 * 47 * Note: It is also possible to make a simple variant of analysis filter, 48 * which needs only a single constants table without taking care about 49 * even/odd cases. This simple variant of filter can be implemented without 50 * input data permutation. The only thing that would be lost is the 51 * possibility to use pairwise SIMD multiplications. But for some simple 52 * CPU cores without SIMD extensions it can be useful. If anybody is 53 * interested in implementing such variant of a filter, sourcecode from 54 * bluez versions 4.26/4.27 can be used as a reference and the history of 55 * the changes in git repository done around that time may be worth checking. 56 */ 57 58 static inline void sbc_analyze_four_simd(const int16_t *in, int32_t *out, 59 const FIXED_T *consts) 60 { 61 FIXED_A t1[4]; 62 FIXED_T t2[4]; 63 int hop = 0; 64 65 /* rounding coefficient */ 66 t1[0] = t1[1] = t1[2] = t1[3] = 67 (FIXED_A) 1 << (SBC_PROTO_FIXED4_SCALE - 1); 68 69 /* low pass polyphase filter */ 70 for (hop = 0; hop < 40; hop += 8) { 71 t1[0] += (FIXED_A) in[hop] * consts[hop]; 72 t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; 73 t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; 74 t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; 75 t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; 76 t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; 77 t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; 78 t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; 79 } 80 81 /* scaling */ 82 t2[0] = t1[0] >> SBC_PROTO_FIXED4_SCALE; 83 t2[1] = t1[1] >> SBC_PROTO_FIXED4_SCALE; 84 t2[2] = t1[2] >> SBC_PROTO_FIXED4_SCALE; 85 t2[3] = t1[3] >> SBC_PROTO_FIXED4_SCALE; 86 87 /* do the cos transform */ 88 t1[0] = (FIXED_A) t2[0] * consts[40 + 0]; 89 t1[0] += (FIXED_A) t2[1] * consts[40 + 1]; 90 t1[1] = (FIXED_A) t2[0] * consts[40 + 2]; 91 t1[1] += (FIXED_A) t2[1] * consts[40 + 3]; 92 t1[2] = (FIXED_A) t2[0] * consts[40 + 4]; 93 t1[2] += (FIXED_A) t2[1] * consts[40 + 5]; 94 t1[3] = (FIXED_A) t2[0] * consts[40 + 6]; 95 t1[3] += (FIXED_A) t2[1] * consts[40 + 7]; 96 97 t1[0] += (FIXED_A) t2[2] * consts[40 + 8]; 98 t1[0] += (FIXED_A) t2[3] * consts[40 + 9]; 99 t1[1] += (FIXED_A) t2[2] * consts[40 + 10]; 100 t1[1] += (FIXED_A) t2[3] * consts[40 + 11]; 101 t1[2] += (FIXED_A) t2[2] * consts[40 + 12]; 102 t1[2] += (FIXED_A) t2[3] * consts[40 + 13]; 103 t1[3] += (FIXED_A) t2[2] * consts[40 + 14]; 104 t1[3] += (FIXED_A) t2[3] * consts[40 + 15]; 105 106 out[0] = t1[0] >> 107 (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); 108 out[1] = t1[1] >> 109 (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); 110 out[2] = t1[2] >> 111 (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); 112 out[3] = t1[3] >> 113 (SBC_COS_TABLE_FIXED4_SCALE - SCALE_OUT_BITS); 114 } 115 116 static inline void sbc_analyze_eight_simd(const int16_t *in, int32_t *out, 117 const FIXED_T *consts) 118 { 119 FIXED_A t1[8]; 120 FIXED_T t2[8]; 121 int i, hop; 122 123 /* rounding coefficient */ 124 t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 125 (FIXED_A) 1 << (SBC_PROTO_FIXED8_SCALE-1); 126 127 /* low pass polyphase filter */ 128 for (hop = 0; hop < 80; hop += 16) { 129 t1[0] += (FIXED_A) in[hop] * consts[hop]; 130 t1[0] += (FIXED_A) in[hop + 1] * consts[hop + 1]; 131 t1[1] += (FIXED_A) in[hop + 2] * consts[hop + 2]; 132 t1[1] += (FIXED_A) in[hop + 3] * consts[hop + 3]; 133 t1[2] += (FIXED_A) in[hop + 4] * consts[hop + 4]; 134 t1[2] += (FIXED_A) in[hop + 5] * consts[hop + 5]; 135 t1[3] += (FIXED_A) in[hop + 6] * consts[hop + 6]; 136 t1[3] += (FIXED_A) in[hop + 7] * consts[hop + 7]; 137 t1[4] += (FIXED_A) in[hop + 8] * consts[hop + 8]; 138 t1[4] += (FIXED_A) in[hop + 9] * consts[hop + 9]; 139 t1[5] += (FIXED_A) in[hop + 10] * consts[hop + 10]; 140 t1[5] += (FIXED_A) in[hop + 11] * consts[hop + 11]; 141 t1[6] += (FIXED_A) in[hop + 12] * consts[hop + 12]; 142 t1[6] += (FIXED_A) in[hop + 13] * consts[hop + 13]; 143 t1[7] += (FIXED_A) in[hop + 14] * consts[hop + 14]; 144 t1[7] += (FIXED_A) in[hop + 15] * consts[hop + 15]; 145 } 146 147 /* scaling */ 148 t2[0] = t1[0] >> SBC_PROTO_FIXED8_SCALE; 149 t2[1] = t1[1] >> SBC_PROTO_FIXED8_SCALE; 150 t2[2] = t1[2] >> SBC_PROTO_FIXED8_SCALE; 151 t2[3] = t1[3] >> SBC_PROTO_FIXED8_SCALE; 152 t2[4] = t1[4] >> SBC_PROTO_FIXED8_SCALE; 153 t2[5] = t1[5] >> SBC_PROTO_FIXED8_SCALE; 154 t2[6] = t1[6] >> SBC_PROTO_FIXED8_SCALE; 155 t2[7] = t1[7] >> SBC_PROTO_FIXED8_SCALE; 156 157 158 /* do the cos transform */ 159 t1[0] = t1[1] = t1[2] = t1[3] = t1[4] = t1[5] = t1[6] = t1[7] = 0; 160 161 for (i = 0; i < 4; i++) { 162 t1[0] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 0]; 163 t1[0] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 1]; 164 t1[1] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 2]; 165 t1[1] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 3]; 166 t1[2] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 4]; 167 t1[2] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 5]; 168 t1[3] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 6]; 169 t1[3] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 7]; 170 t1[4] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 8]; 171 t1[4] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 9]; 172 t1[5] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 10]; 173 t1[5] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 11]; 174 t1[6] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 12]; 175 t1[6] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 13]; 176 t1[7] += (FIXED_A) t2[i * 2 + 0] * consts[80 + i * 16 + 14]; 177 t1[7] += (FIXED_A) t2[i * 2 + 1] * consts[80 + i * 16 + 15]; 178 } 179 180 for (i = 0; i < 8; i++) 181 out[i] = t1[i] >> 182 (SBC_COS_TABLE_FIXED8_SCALE - SCALE_OUT_BITS); 183 } 184 185 static inline void sbc_analyze_4b_4s_simd(int16_t *x, 186 int32_t *out, int out_stride) 187 { 188 /* Analyze blocks */ 189 sbc_analyze_four_simd(x + 12, out, analysis_consts_fixed4_simd_odd); 190 out += out_stride; 191 sbc_analyze_four_simd(x + 8, out, analysis_consts_fixed4_simd_even); 192 out += out_stride; 193 sbc_analyze_four_simd(x + 4, out, analysis_consts_fixed4_simd_odd); 194 out += out_stride; 195 sbc_analyze_four_simd(x + 0, out, analysis_consts_fixed4_simd_even); 196 } 197 198 static inline void sbc_analyze_4b_8s_simd(int16_t *x, 199 int32_t *out, int out_stride) 200 { 201 /* Analyze blocks */ 202 sbc_analyze_eight_simd(x + 24, out, analysis_consts_fixed8_simd_odd); 203 out += out_stride; 204 sbc_analyze_eight_simd(x + 16, out, analysis_consts_fixed8_simd_even); 205 out += out_stride; 206 sbc_analyze_eight_simd(x + 8, out, analysis_consts_fixed8_simd_odd); 207 out += out_stride; 208 sbc_analyze_eight_simd(x + 0, out, analysis_consts_fixed8_simd_even); 209 } 210 211 static inline int16_t unaligned16_be(const uint8_t *ptr) 212 { 213 return (int16_t) ((ptr[0] << 8) | ptr[1]); 214 } 215 216 static inline int16_t unaligned16_le(const uint8_t *ptr) 217 { 218 return (int16_t) (ptr[0] | (ptr[1] << 8)); 219 } 220 221 /* 222 * Internal helper functions for input data processing. In order to get 223 * optimal performance, it is important to have "nsamples", "nchannels" 224 * and "big_endian" arguments used with this inline function as compile 225 * time constants. 226 */ 227 228 static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s4_internal( 229 int position, 230 const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], 231 int nsamples, int nchannels, int big_endian) 232 { 233 /* handle X buffer wraparound */ 234 if (position < nsamples) { 235 if (nchannels > 0) 236 memcpy(&X[0][SBC_X_BUFFER_SIZE - 40], &X[0][position], 237 36 * sizeof(int16_t)); 238 if (nchannels > 1) 239 memcpy(&X[1][SBC_X_BUFFER_SIZE - 40], &X[1][position], 240 36 * sizeof(int16_t)); 241 position = SBC_X_BUFFER_SIZE - 40; 242 } 243 244 #define PCM(i) (big_endian ? \ 245 unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2)) 246 247 /* copy/permutate audio samples */ 248 while ((nsamples -= 8) >= 0) { 249 position -= 8; 250 if (nchannels > 0) { 251 int16_t *x = &X[0][position]; 252 x[0] = PCM(0 + 7 * nchannels); 253 x[1] = PCM(0 + 3 * nchannels); 254 x[2] = PCM(0 + 6 * nchannels); 255 x[3] = PCM(0 + 4 * nchannels); 256 x[4] = PCM(0 + 0 * nchannels); 257 x[5] = PCM(0 + 2 * nchannels); 258 x[6] = PCM(0 + 1 * nchannels); 259 x[7] = PCM(0 + 5 * nchannels); 260 } 261 if (nchannels > 1) { 262 int16_t *x = &X[1][position]; 263 x[0] = PCM(1 + 7 * nchannels); 264 x[1] = PCM(1 + 3 * nchannels); 265 x[2] = PCM(1 + 6 * nchannels); 266 x[3] = PCM(1 + 4 * nchannels); 267 x[4] = PCM(1 + 0 * nchannels); 268 x[5] = PCM(1 + 2 * nchannels); 269 x[6] = PCM(1 + 1 * nchannels); 270 x[7] = PCM(1 + 5 * nchannels); 271 } 272 pcm += 16 * nchannels; 273 } 274 #undef PCM 275 276 return position; 277 } 278 279 static SBC_ALWAYS_INLINE int sbc_encoder_process_input_s8_internal( 280 int position, 281 const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], 282 int nsamples, int nchannels, int big_endian) 283 { 284 /* handle X buffer wraparound */ 285 if (position < nsamples) { 286 if (nchannels > 0) 287 memcpy(&X[0][SBC_X_BUFFER_SIZE - 72], &X[0][position], 288 72 * sizeof(int16_t)); 289 if (nchannels > 1) 290 memcpy(&X[1][SBC_X_BUFFER_SIZE - 72], &X[1][position], 291 72 * sizeof(int16_t)); 292 position = SBC_X_BUFFER_SIZE - 72; 293 } 294 295 #define PCM(i) (big_endian ? \ 296 unaligned16_be(pcm + (i) * 2) : unaligned16_le(pcm + (i) * 2)) 297 298 /* copy/permutate audio samples */ 299 while ((nsamples -= 16) >= 0) { 300 position -= 16; 301 if (nchannels > 0) { 302 int16_t *x = &X[0][position]; 303 x[0] = PCM(0 + 15 * nchannels); 304 x[1] = PCM(0 + 7 * nchannels); 305 x[2] = PCM(0 + 14 * nchannels); 306 x[3] = PCM(0 + 8 * nchannels); 307 x[4] = PCM(0 + 13 * nchannels); 308 x[5] = PCM(0 + 9 * nchannels); 309 x[6] = PCM(0 + 12 * nchannels); 310 x[7] = PCM(0 + 10 * nchannels); 311 x[8] = PCM(0 + 11 * nchannels); 312 x[9] = PCM(0 + 3 * nchannels); 313 x[10] = PCM(0 + 6 * nchannels); 314 x[11] = PCM(0 + 0 * nchannels); 315 x[12] = PCM(0 + 5 * nchannels); 316 x[13] = PCM(0 + 1 * nchannels); 317 x[14] = PCM(0 + 4 * nchannels); 318 x[15] = PCM(0 + 2 * nchannels); 319 } 320 if (nchannels > 1) { 321 int16_t *x = &X[1][position]; 322 x[0] = PCM(1 + 15 * nchannels); 323 x[1] = PCM(1 + 7 * nchannels); 324 x[2] = PCM(1 + 14 * nchannels); 325 x[3] = PCM(1 + 8 * nchannels); 326 x[4] = PCM(1 + 13 * nchannels); 327 x[5] = PCM(1 + 9 * nchannels); 328 x[6] = PCM(1 + 12 * nchannels); 329 x[7] = PCM(1 + 10 * nchannels); 330 x[8] = PCM(1 + 11 * nchannels); 331 x[9] = PCM(1 + 3 * nchannels); 332 x[10] = PCM(1 + 6 * nchannels); 333 x[11] = PCM(1 + 0 * nchannels); 334 x[12] = PCM(1 + 5 * nchannels); 335 x[13] = PCM(1 + 1 * nchannels); 336 x[14] = PCM(1 + 4 * nchannels); 337 x[15] = PCM(1 + 2 * nchannels); 338 } 339 pcm += 32 * nchannels; 340 } 341 #undef PCM 342 343 return position; 344 } 345 346 /* 347 * Input data processing functions. The data is endian converted if needed, 348 * channels are deintrleaved and audio samples are reordered for use in 349 * SIMD-friendly analysis filter function. The results are put into "X" 350 * array, getting appended to the previous data (or it is better to say 351 * prepended, as the buffer is filled from top to bottom). Old data is 352 * discarded when neededed, but availability of (10 * nrof_subbands) 353 * contiguous samples is always guaranteed for the input to the analysis 354 * filter. This is achieved by copying a sufficient part of old data 355 * to the top of the buffer on buffer wraparound. 356 */ 357 358 static int sbc_enc_process_input_4s_le(int position, 359 const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], 360 int nsamples, int nchannels) 361 { 362 if (nchannels > 1) 363 return sbc_encoder_process_input_s4_internal( 364 position, pcm, X, nsamples, 2, 0); 365 else 366 return sbc_encoder_process_input_s4_internal( 367 position, pcm, X, nsamples, 1, 0); 368 } 369 370 static int sbc_enc_process_input_4s_be(int position, 371 const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], 372 int nsamples, int nchannels) 373 { 374 if (nchannels > 1) 375 return sbc_encoder_process_input_s4_internal( 376 position, pcm, X, nsamples, 2, 1); 377 else 378 return sbc_encoder_process_input_s4_internal( 379 position, pcm, X, nsamples, 1, 1); 380 } 381 382 static int sbc_enc_process_input_8s_le(int position, 383 const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], 384 int nsamples, int nchannels) 385 { 386 if (nchannels > 1) 387 return sbc_encoder_process_input_s8_internal( 388 position, pcm, X, nsamples, 2, 0); 389 else 390 return sbc_encoder_process_input_s8_internal( 391 position, pcm, X, nsamples, 1, 0); 392 } 393 394 static int sbc_enc_process_input_8s_be(int position, 395 const uint8_t *pcm, int16_t X[2][SBC_X_BUFFER_SIZE], 396 int nsamples, int nchannels) 397 { 398 if (nchannels > 1) 399 return sbc_encoder_process_input_s8_internal( 400 position, pcm, X, nsamples, 2, 1); 401 else 402 return sbc_encoder_process_input_s8_internal( 403 position, pcm, X, nsamples, 1, 1); 404 } 405 406 /* Supplementary function to count the number of leading zeros */ 407 408 static inline int sbc_clz(uint32_t x) 409 { 410 #ifdef __GNUC__ 411 return __builtin_clz(x); 412 #else 413 /* TODO: this should be replaced with something better if good 414 * performance is wanted when using compilers other than gcc */ 415 int cnt = 0; 416 while (x) { 417 cnt++; 418 x >>= 1; 419 } 420 return 32 - cnt; 421 #endif 422 } 423 424 static void sbc_calc_scalefactors( 425 int32_t sb_sample_f[16][2][8], 426 uint32_t scale_factor[2][8], 427 int blocks, int channels, int subbands) 428 { 429 int ch, sb, blk; 430 for (ch = 0; ch < channels; ch++) { 431 for (sb = 0; sb < subbands; sb++) { 432 uint32_t x = 1 << SCALE_OUT_BITS; 433 for (blk = 0; blk < blocks; blk++) { 434 int32_t tmp = fabs(sb_sample_f[blk][ch][sb]); 435 if (tmp != 0) 436 x |= tmp - 1; 437 } 438 scale_factor[ch][sb] = (31 - SCALE_OUT_BITS) - 439 sbc_clz(x); 440 } 441 } 442 } 443 444 static int sbc_calc_scalefactors_j( 445 int32_t sb_sample_f[16][2][8], 446 uint32_t scale_factor[2][8], 447 int blocks, int subbands) 448 { 449 int blk, joint = 0; 450 int32_t tmp0, tmp1; 451 uint32_t x, y; 452 453 /* last subband does not use joint stereo */ 454 int sb = subbands - 1; 455 x = 1 << SCALE_OUT_BITS; 456 y = 1 << SCALE_OUT_BITS; 457 for (blk = 0; blk < blocks; blk++) { 458 tmp0 = fabs(sb_sample_f[blk][0][sb]); 459 tmp1 = fabs(sb_sample_f[blk][1][sb]); 460 if (tmp0 != 0) 461 x |= tmp0 - 1; 462 if (tmp1 != 0) 463 y |= tmp1 - 1; 464 } 465 scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - sbc_clz(x); 466 scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - sbc_clz(y); 467 468 /* the rest of subbands can use joint stereo */ 469 while (--sb >= 0) { 470 int32_t sb_sample_j[16][2]; 471 x = 1 << SCALE_OUT_BITS; 472 y = 1 << SCALE_OUT_BITS; 473 for (blk = 0; blk < blocks; blk++) { 474 tmp0 = sb_sample_f[blk][0][sb]; 475 tmp1 = sb_sample_f[blk][1][sb]; 476 sb_sample_j[blk][0] = ASR(tmp0, 1) + ASR(tmp1, 1); 477 sb_sample_j[blk][1] = ASR(tmp0, 1) - ASR(tmp1, 1); 478 tmp0 = fabs(tmp0); 479 tmp1 = fabs(tmp1); 480 if (tmp0 != 0) 481 x |= tmp0 - 1; 482 if (tmp1 != 0) 483 y |= tmp1 - 1; 484 } 485 scale_factor[0][sb] = (31 - SCALE_OUT_BITS) - 486 sbc_clz(x); 487 scale_factor[1][sb] = (31 - SCALE_OUT_BITS) - 488 sbc_clz(y); 489 x = 1 << SCALE_OUT_BITS; 490 y = 1 << SCALE_OUT_BITS; 491 for (blk = 0; blk < blocks; blk++) { 492 tmp0 = fabs(sb_sample_j[blk][0]); 493 tmp1 = fabs(sb_sample_j[blk][1]); 494 if (tmp0 != 0) 495 x |= tmp0 - 1; 496 if (tmp1 != 0) 497 y |= tmp1 - 1; 498 } 499 x = (31 - SCALE_OUT_BITS) - sbc_clz(x); 500 y = (31 - SCALE_OUT_BITS) - sbc_clz(y); 501 502 /* decide whether to use joint stereo for this subband */ 503 if ((scale_factor[0][sb] + scale_factor[1][sb]) > x + y) { 504 joint |= 1 << (subbands - 1 - sb); 505 scale_factor[0][sb] = x; 506 scale_factor[1][sb] = y; 507 for (blk = 0; blk < blocks; blk++) { 508 sb_sample_f[blk][0][sb] = sb_sample_j[blk][0]; 509 sb_sample_f[blk][1][sb] = sb_sample_j[blk][1]; 510 } 511 } 512 } 513 514 /* bitmask with the information about subbands using joint stereo */ 515 return joint; 516 } 517 518 /* 519 * Detect CPU features and setup function pointers 520 */ 521 void sbc_init_primitives(struct sbc_encoder_state *state) 522 { 523 /* Default implementation for analyze functions */ 524 state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_simd; 525 state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_simd; 526 527 /* Default implementation for input reordering / deinterleaving */ 528 state->sbc_enc_process_input_4s_le = sbc_enc_process_input_4s_le; 529 state->sbc_enc_process_input_4s_be = sbc_enc_process_input_4s_be; 530 state->sbc_enc_process_input_8s_le = sbc_enc_process_input_8s_le; 531 state->sbc_enc_process_input_8s_be = sbc_enc_process_input_8s_be; 532 533 /* Default implementation for scale factors calculation */ 534 state->sbc_calc_scalefactors = sbc_calc_scalefactors; 535 state->sbc_calc_scalefactors_j = sbc_calc_scalefactors_j; 536 state->implementation_info = "Generic C"; 537 538 /* X86/AMD64 optimizations */ 539 #ifdef SBC_BUILD_WITH_MMX_SUPPORT 540 sbc_init_primitives_mmx(state); 541 #endif 542 543 /* ARM optimizations */ 544 #ifdef SBC_BUILD_WITH_ARMV6_SUPPORT 545 sbc_init_primitives_armv6(state); 546 #endif 547 #ifdef SBC_BUILD_WITH_NEON_SUPPORT 548 sbc_init_primitives_neon(state); 549 #endif 550 } 551