Home | History | Annotate | Download | only in sbc
      1 /*
      2  *
      3  *  Bluetooth low-complexity, subband codec (SBC) library
      4  *
      5  *  Copyright (C) 2008-2010  Nokia Corporation
      6  *  Copyright (C) 2004-2010  Marcel Holtmann <marcel (at) holtmann.org>
      7  *  Copyright (C) 2004-2005  Henryk Ploetz <henryk (at) ploetzli.ch>
      8  *  Copyright (C) 2005-2006  Brad Midgley <bmidgley (at) xmission.com>
      9  *
     10  *
     11  *  This library is free software; you can redistribute it and/or
     12  *  modify it under the terms of the GNU Lesser General Public
     13  *  License as published by the Free Software Foundation; either
     14  *  version 2.1 of the License, or (at your option) any later version.
     15  *
     16  *  This library is distributed in the hope that it will be useful,
     17  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     18  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     19  *  Lesser General Public License for more details.
     20  *
     21  *  You should have received a copy of the GNU Lesser General Public
     22  *  License along with this library; if not, write to the Free Software
     23  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
     24  *
     25  */
     26 
     27 #include <stdint.h>
     28 #include <limits.h>
     29 #include "sbc.h"
     30 #include "sbc_math.h"
     31 #include "sbc_tables.h"
     32 
     33 #include "sbc_primitives_mmx.h"
     34 
     35 /*
     36  * MMX optimizations
     37  */
     38 
     39 #ifdef SBC_BUILD_WITH_MMX_SUPPORT
     40 
     41 static inline void sbc_analyze_four_mmx(const int16_t *in, int32_t *out,
     42 					const FIXED_T *consts)
     43 {
     44 	static const SBC_ALIGNED int32_t round_c[2] = {
     45 		1 << (SBC_PROTO_FIXED4_SCALE - 1),
     46 		1 << (SBC_PROTO_FIXED4_SCALE - 1),
     47 	};
     48 	asm volatile (
     49 		"movq        (%0), %%mm0\n"
     50 		"movq       8(%0), %%mm1\n"
     51 		"pmaddwd     (%1), %%mm0\n"
     52 		"pmaddwd    8(%1), %%mm1\n"
     53 		"paddd       (%2), %%mm0\n"
     54 		"paddd       (%2), %%mm1\n"
     55 		"\n"
     56 		"movq      16(%0), %%mm2\n"
     57 		"movq      24(%0), %%mm3\n"
     58 		"pmaddwd   16(%1), %%mm2\n"
     59 		"pmaddwd   24(%1), %%mm3\n"
     60 		"paddd      %%mm2, %%mm0\n"
     61 		"paddd      %%mm3, %%mm1\n"
     62 		"\n"
     63 		"movq      32(%0), %%mm2\n"
     64 		"movq      40(%0), %%mm3\n"
     65 		"pmaddwd   32(%1), %%mm2\n"
     66 		"pmaddwd   40(%1), %%mm3\n"
     67 		"paddd      %%mm2, %%mm0\n"
     68 		"paddd      %%mm3, %%mm1\n"
     69 		"\n"
     70 		"movq      48(%0), %%mm2\n"
     71 		"movq      56(%0), %%mm3\n"
     72 		"pmaddwd   48(%1), %%mm2\n"
     73 		"pmaddwd   56(%1), %%mm3\n"
     74 		"paddd      %%mm2, %%mm0\n"
     75 		"paddd      %%mm3, %%mm1\n"
     76 		"\n"
     77 		"movq      64(%0), %%mm2\n"
     78 		"movq      72(%0), %%mm3\n"
     79 		"pmaddwd   64(%1), %%mm2\n"
     80 		"pmaddwd   72(%1), %%mm3\n"
     81 		"paddd      %%mm2, %%mm0\n"
     82 		"paddd      %%mm3, %%mm1\n"
     83 		"\n"
     84 		"psrad         %4, %%mm0\n"
     85 		"psrad         %4, %%mm1\n"
     86 		"packssdw   %%mm0, %%mm0\n"
     87 		"packssdw   %%mm1, %%mm1\n"
     88 		"\n"
     89 		"movq       %%mm0, %%mm2\n"
     90 		"pmaddwd   80(%1), %%mm0\n"
     91 		"pmaddwd   88(%1), %%mm2\n"
     92 		"\n"
     93 		"movq       %%mm1, %%mm3\n"
     94 		"pmaddwd   96(%1), %%mm1\n"
     95 		"pmaddwd  104(%1), %%mm3\n"
     96 		"paddd      %%mm1, %%mm0\n"
     97 		"paddd      %%mm3, %%mm2\n"
     98 		"\n"
     99 		"movq       %%mm0, (%3)\n"
    100 		"movq       %%mm2, 8(%3)\n"
    101 		:
    102 		: "r" (in), "r" (consts), "r" (&round_c), "r" (out),
    103 			"i" (SBC_PROTO_FIXED4_SCALE)
    104 		: "cc", "memory");
    105 }
    106 
    107 static inline void sbc_analyze_eight_mmx(const int16_t *in, int32_t *out,
    108 							const FIXED_T *consts)
    109 {
    110 	static const SBC_ALIGNED int32_t round_c[2] = {
    111 		1 << (SBC_PROTO_FIXED8_SCALE - 1),
    112 		1 << (SBC_PROTO_FIXED8_SCALE - 1),
    113 	};
    114 	asm volatile (
    115 		"movq        (%0), %%mm0\n"
    116 		"movq       8(%0), %%mm1\n"
    117 		"movq      16(%0), %%mm2\n"
    118 		"movq      24(%0), %%mm3\n"
    119 		"pmaddwd     (%1), %%mm0\n"
    120 		"pmaddwd    8(%1), %%mm1\n"
    121 		"pmaddwd   16(%1), %%mm2\n"
    122 		"pmaddwd   24(%1), %%mm3\n"
    123 		"paddd       (%2), %%mm0\n"
    124 		"paddd       (%2), %%mm1\n"
    125 		"paddd       (%2), %%mm2\n"
    126 		"paddd       (%2), %%mm3\n"
    127 		"\n"
    128 		"movq      32(%0), %%mm4\n"
    129 		"movq      40(%0), %%mm5\n"
    130 		"movq      48(%0), %%mm6\n"
    131 		"movq      56(%0), %%mm7\n"
    132 		"pmaddwd   32(%1), %%mm4\n"
    133 		"pmaddwd   40(%1), %%mm5\n"
    134 		"pmaddwd   48(%1), %%mm6\n"
    135 		"pmaddwd   56(%1), %%mm7\n"
    136 		"paddd      %%mm4, %%mm0\n"
    137 		"paddd      %%mm5, %%mm1\n"
    138 		"paddd      %%mm6, %%mm2\n"
    139 		"paddd      %%mm7, %%mm3\n"
    140 		"\n"
    141 		"movq      64(%0), %%mm4\n"
    142 		"movq      72(%0), %%mm5\n"
    143 		"movq      80(%0), %%mm6\n"
    144 		"movq      88(%0), %%mm7\n"
    145 		"pmaddwd   64(%1), %%mm4\n"
    146 		"pmaddwd   72(%1), %%mm5\n"
    147 		"pmaddwd   80(%1), %%mm6\n"
    148 		"pmaddwd   88(%1), %%mm7\n"
    149 		"paddd      %%mm4, %%mm0\n"
    150 		"paddd      %%mm5, %%mm1\n"
    151 		"paddd      %%mm6, %%mm2\n"
    152 		"paddd      %%mm7, %%mm3\n"
    153 		"\n"
    154 		"movq      96(%0), %%mm4\n"
    155 		"movq     104(%0), %%mm5\n"
    156 		"movq     112(%0), %%mm6\n"
    157 		"movq     120(%0), %%mm7\n"
    158 		"pmaddwd   96(%1), %%mm4\n"
    159 		"pmaddwd  104(%1), %%mm5\n"
    160 		"pmaddwd  112(%1), %%mm6\n"
    161 		"pmaddwd  120(%1), %%mm7\n"
    162 		"paddd      %%mm4, %%mm0\n"
    163 		"paddd      %%mm5, %%mm1\n"
    164 		"paddd      %%mm6, %%mm2\n"
    165 		"paddd      %%mm7, %%mm3\n"
    166 		"\n"
    167 		"movq     128(%0), %%mm4\n"
    168 		"movq     136(%0), %%mm5\n"
    169 		"movq     144(%0), %%mm6\n"
    170 		"movq     152(%0), %%mm7\n"
    171 		"pmaddwd  128(%1), %%mm4\n"
    172 		"pmaddwd  136(%1), %%mm5\n"
    173 		"pmaddwd  144(%1), %%mm6\n"
    174 		"pmaddwd  152(%1), %%mm7\n"
    175 		"paddd      %%mm4, %%mm0\n"
    176 		"paddd      %%mm5, %%mm1\n"
    177 		"paddd      %%mm6, %%mm2\n"
    178 		"paddd      %%mm7, %%mm3\n"
    179 		"\n"
    180 		"psrad         %4, %%mm0\n"
    181 		"psrad         %4, %%mm1\n"
    182 		"psrad         %4, %%mm2\n"
    183 		"psrad         %4, %%mm3\n"
    184 		"\n"
    185 		"packssdw   %%mm0, %%mm0\n"
    186 		"packssdw   %%mm1, %%mm1\n"
    187 		"packssdw   %%mm2, %%mm2\n"
    188 		"packssdw   %%mm3, %%mm3\n"
    189 		"\n"
    190 		"movq       %%mm0, %%mm4\n"
    191 		"movq       %%mm0, %%mm5\n"
    192 		"pmaddwd  160(%1), %%mm4\n"
    193 		"pmaddwd  168(%1), %%mm5\n"
    194 		"\n"
    195 		"movq       %%mm1, %%mm6\n"
    196 		"movq       %%mm1, %%mm7\n"
    197 		"pmaddwd  192(%1), %%mm6\n"
    198 		"pmaddwd  200(%1), %%mm7\n"
    199 		"paddd      %%mm6, %%mm4\n"
    200 		"paddd      %%mm7, %%mm5\n"
    201 		"\n"
    202 		"movq       %%mm2, %%mm6\n"
    203 		"movq       %%mm2, %%mm7\n"
    204 		"pmaddwd  224(%1), %%mm6\n"
    205 		"pmaddwd  232(%1), %%mm7\n"
    206 		"paddd      %%mm6, %%mm4\n"
    207 		"paddd      %%mm7, %%mm5\n"
    208 		"\n"
    209 		"movq       %%mm3, %%mm6\n"
    210 		"movq       %%mm3, %%mm7\n"
    211 		"pmaddwd  256(%1), %%mm6\n"
    212 		"pmaddwd  264(%1), %%mm7\n"
    213 		"paddd      %%mm6, %%mm4\n"
    214 		"paddd      %%mm7, %%mm5\n"
    215 		"\n"
    216 		"movq       %%mm4, (%3)\n"
    217 		"movq       %%mm5, 8(%3)\n"
    218 		"\n"
    219 		"movq       %%mm0, %%mm5\n"
    220 		"pmaddwd  176(%1), %%mm0\n"
    221 		"pmaddwd  184(%1), %%mm5\n"
    222 		"\n"
    223 		"movq       %%mm1, %%mm7\n"
    224 		"pmaddwd  208(%1), %%mm1\n"
    225 		"pmaddwd  216(%1), %%mm7\n"
    226 		"paddd      %%mm1, %%mm0\n"
    227 		"paddd      %%mm7, %%mm5\n"
    228 		"\n"
    229 		"movq       %%mm2, %%mm7\n"
    230 		"pmaddwd  240(%1), %%mm2\n"
    231 		"pmaddwd  248(%1), %%mm7\n"
    232 		"paddd      %%mm2, %%mm0\n"
    233 		"paddd      %%mm7, %%mm5\n"
    234 		"\n"
    235 		"movq       %%mm3, %%mm7\n"
    236 		"pmaddwd  272(%1), %%mm3\n"
    237 		"pmaddwd  280(%1), %%mm7\n"
    238 		"paddd      %%mm3, %%mm0\n"
    239 		"paddd      %%mm7, %%mm5\n"
    240 		"\n"
    241 		"movq       %%mm0, 16(%3)\n"
    242 		"movq       %%mm5, 24(%3)\n"
    243 		:
    244 		: "r" (in), "r" (consts), "r" (&round_c), "r" (out),
    245 			"i" (SBC_PROTO_FIXED8_SCALE)
    246 		: "cc", "memory");
    247 }
    248 
    249 static inline void sbc_analyze_4b_4s_mmx(int16_t *x, int32_t *out,
    250 						int out_stride)
    251 {
    252 	/* Analyze blocks */
    253 	sbc_analyze_four_mmx(x + 12, out, analysis_consts_fixed4_simd_odd);
    254 	out += out_stride;
    255 	sbc_analyze_four_mmx(x + 8, out, analysis_consts_fixed4_simd_even);
    256 	out += out_stride;
    257 	sbc_analyze_four_mmx(x + 4, out, analysis_consts_fixed4_simd_odd);
    258 	out += out_stride;
    259 	sbc_analyze_four_mmx(x + 0, out, analysis_consts_fixed4_simd_even);
    260 
    261 	asm volatile ("emms\n");
    262 }
    263 
    264 static inline void sbc_analyze_4b_8s_mmx(int16_t *x, int32_t *out,
    265 						int out_stride)
    266 {
    267 	/* Analyze blocks */
    268 	sbc_analyze_eight_mmx(x + 24, out, analysis_consts_fixed8_simd_odd);
    269 	out += out_stride;
    270 	sbc_analyze_eight_mmx(x + 16, out, analysis_consts_fixed8_simd_even);
    271 	out += out_stride;
    272 	sbc_analyze_eight_mmx(x + 8, out, analysis_consts_fixed8_simd_odd);
    273 	out += out_stride;
    274 	sbc_analyze_eight_mmx(x + 0, out, analysis_consts_fixed8_simd_even);
    275 
    276 	asm volatile ("emms\n");
    277 }
    278 
    279 static void sbc_calc_scalefactors_mmx(
    280 	int32_t sb_sample_f[16][2][8],
    281 	uint32_t scale_factor[2][8],
    282 	int blocks, int channels, int subbands)
    283 {
    284 	static const SBC_ALIGNED int32_t consts[2] = {
    285 		1 << SCALE_OUT_BITS,
    286 		1 << SCALE_OUT_BITS,
    287 	};
    288 	int ch, sb;
    289 	intptr_t blk;
    290 	for (ch = 0; ch < channels; ch++) {
    291 		for (sb = 0; sb < subbands; sb += 2) {
    292 			blk = (blocks - 1) * (((char *) &sb_sample_f[1][0][0] -
    293 				(char *) &sb_sample_f[0][0][0]));
    294 			asm volatile (
    295 				"movq         (%4), %%mm0\n"
    296 			"1:\n"
    297 				"movq     (%1, %0), %%mm1\n"
    298 				"pxor        %%mm2, %%mm2\n"
    299 				"pcmpgtd     %%mm2, %%mm1\n"
    300 				"paddd    (%1, %0), %%mm1\n"
    301 				"pcmpgtd     %%mm1, %%mm2\n"
    302 				"pxor        %%mm2, %%mm1\n"
    303 
    304 				"por         %%mm1, %%mm0\n"
    305 
    306 				"sub            %2, %0\n"
    307 				"jns            1b\n"
    308 
    309 				"movd        %%mm0, %k0\n"
    310 				"psrlq         $32, %%mm0\n"
    311 				"bsrl          %k0, %k0\n"
    312 				"subl           %5, %k0\n"
    313 				"movl          %k0, (%3)\n"
    314 
    315 				"movd        %%mm0, %k0\n"
    316 				"bsrl          %k0, %k0\n"
    317 				"subl           %5, %k0\n"
    318 				"movl          %k0, 4(%3)\n"
    319 			: "+r" (blk)
    320 			: "r" (&sb_sample_f[0][ch][sb]),
    321 				"i" ((char *) &sb_sample_f[1][0][0] -
    322 					(char *) &sb_sample_f[0][0][0]),
    323 				"r" (&scale_factor[ch][sb]),
    324 				"r" (&consts),
    325 				"i" (SCALE_OUT_BITS)
    326 			: "cc", "memory");
    327 		}
    328 	}
    329 	asm volatile ("emms\n");
    330 }
    331 
    332 static int check_mmx_support(void)
    333 {
    334 #ifdef __amd64__
    335 	return 1; /* We assume that all 64-bit processors have MMX support */
    336 #else
    337 	int cpuid_feature_information;
    338 	asm volatile (
    339 		/* According to Intel manual, CPUID instruction is supported
    340 		 * if the value of ID bit (bit 21) in EFLAGS can be modified */
    341 		"pushf\n"
    342 		"movl     (%%esp),   %0\n"
    343 		"xorl     $0x200000, (%%esp)\n" /* try to modify ID bit */
    344 		"popf\n"
    345 		"pushf\n"
    346 		"xorl     (%%esp),   %0\n"      /* check if ID bit changed */
    347 		"jz       1f\n"
    348 		"push     %%eax\n"
    349 		"push     %%ebx\n"
    350 		"push     %%ecx\n"
    351 		"mov      $1,        %%eax\n"
    352 		"cpuid\n"
    353 		"pop      %%ecx\n"
    354 		"pop      %%ebx\n"
    355 		"pop      %%eax\n"
    356 		"1:\n"
    357 		"popf\n"
    358 		: "=d" (cpuid_feature_information)
    359 		:
    360 		: "cc");
    361     return cpuid_feature_information & (1 << 23);
    362 #endif
    363 }
    364 
    365 void sbc_init_primitives_mmx(struct sbc_encoder_state *state)
    366 {
    367 	if (check_mmx_support()) {
    368 		state->sbc_analyze_4b_4s = sbc_analyze_4b_4s_mmx;
    369 		state->sbc_analyze_4b_8s = sbc_analyze_4b_8s_mmx;
    370 		state->sbc_calc_scalefactors = sbc_calc_scalefactors_mmx;
    371 		state->implementation_info = "MMX";
    372 	}
    373 }
    374 
    375 #endif
    376