Home | History | Annotate | Download | only in audio
      1 /*
      2     SDL - Simple DirectMedia Layer
      3     Copyright (C) 1997-2006 Sam Lantinga
      4 
      5     This library is free software; you can redistribute it and/or
      6     modify it under the terms of the GNU Lesser General Public
      7     License as published by the Free Software Foundation; either
      8     version 2.1 of the License, or (at your option) any later version.
      9 
     10     This library is distributed in the hope that it will be useful,
     11     but WITHOUT ANY WARRANTY; without even the implied warranty of
     12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13     Lesser General Public License for more details.
     14 
     15     You should have received a copy of the GNU Lesser General Public
     16     License along with this library; if not, write to the Free Software
     17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
     18 
     19     Sam Lantinga
     20     slouken (at) libsdl.org
     21 */
     22 #include "SDL_config.h"
     23 
     24 /*
     25     MMX assembler version of SDL_MixAudio for signed little endian 16 bit samples and signed 8 bit samples
     26     Copyright 2002 Stephane Marchesin (stephane.marchesin (at) wanadoo.fr)
     27     This code is licensed under the LGPL (see COPYING for details)
     28 
     29     Assumes buffer size in bytes is a multiple of 16
     30     Assumes SDL_MIX_MAXVOLUME = 128
     31 */
     32 
     33 
     34 /***********************************************
     35 *   Mixing for 16 bit signed buffers
     36 ***********************************************/
     37 
     38 #if defined(__GNUC__) && defined(__i386__) && defined(SDL_ASSEMBLY_ROUTINES)
     39 void SDL_MixAudio_MMX_S16(char* dst,char* src,unsigned int size,int volume)
     40 {
     41     __asm__ __volatile__ (
     42 
     43 "	movl %3,%%eax\n"	/* eax = volume */
     44 
     45 "	movl %2,%%edx\n"	/* edx = size */
     46 
     47 "	shrl $4,%%edx\n"	/* process 16 bytes per iteration = 8 samples */
     48 
     49 "	jz .endS16\n"
     50 
     51 "	pxor %%mm0,%%mm0\n"
     52 
     53 "	movd %%eax,%%mm0\n"
     54 "	movq %%mm0,%%mm1\n"
     55 "	psllq $16,%%mm0\n"
     56 "	por %%mm1,%%mm0\n"
     57 "	psllq $16,%%mm0\n"
     58 "	por %%mm1,%%mm0\n"
     59 "	psllq $16,%%mm0\n"
     60 "	por %%mm1,%%mm0\n"		/* mm0 = vol|vol|vol|vol */
     61 
     62 ".align 8\n"
     63 "	.mixloopS16:\n"
     64 
     65 "	movq (%1),%%mm1\n" /* mm1 = a|b|c|d */
     66 
     67 "	movq %%mm1,%%mm2\n" /* mm2 = a|b|c|d */
     68 
     69 "	movq 8(%1),%%mm4\n" /* mm4 = e|f|g|h */
     70 
     71 	/* pr charger le buffer dst dans mm7 */
     72 "	movq (%0),%%mm7\n" /* mm7 = dst[0] */
     73 
     74 	/* multiplier par le volume */
     75 "	pmullw %%mm0,%%mm1\n" /* mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */
     76 
     77 "	pmulhw %%mm0,%%mm2\n" /* mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v) */
     78 "	movq %%mm4,%%mm5\n" /* mm5 = e|f|g|h */
     79 
     80 "	pmullw %%mm0,%%mm4\n" /* mm4 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */
     81 
     82 "	pmulhw %%mm0,%%mm5\n" /* mm5 = h(e*v)|h(f*v)|h(g*v)|h(h*v) */
     83 "	movq %%mm1,%%mm3\n" /* mm3 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */
     84 
     85 "	punpckhwd %%mm2,%%mm1\n" /* mm1 = a*v|b*v */
     86 
     87 "	movq %%mm4,%%mm6\n" /* mm6 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */
     88 "	punpcklwd %%mm2,%%mm3\n" /* mm3 = c*v|d*v */
     89 
     90 "	punpckhwd %%mm5,%%mm4\n" /* mm4 = e*f|f*v */
     91 
     92 "	punpcklwd %%mm5,%%mm6\n" /* mm6 = g*v|h*v */
     93 
     94 	/* pr charger le buffer dst dans mm5 */
     95 "	movq 8(%0),%%mm5\n" /* mm5 = dst[1] */
     96 
     97 	/* diviser par 128 */
     98 "	psrad $7,%%mm1\n" /* mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME */
     99 "	add $16,%1\n"
    100 
    101 "	psrad $7,%%mm3\n" /* mm3 = c*v/128|d*v/128 */
    102 
    103 "	psrad $7,%%mm4\n" /* mm4 = e*v/128|f*v/128 */
    104 
    105 	/* mm1 = le sample avec le volume modifi */
    106 "	packssdw %%mm1,%%mm3\n" /* mm3 = s(a*v|b*v|c*v|d*v) */
    107 
    108 "	psrad $7,%%mm6\n" /* mm6= g*v/128|h*v/128 */
    109 "	paddsw %%mm7,%%mm3\n" /* mm3 = adjust_volume(src)+dst */
    110 
    111 	/* mm4 = le sample avec le volume modifi */
    112 "	packssdw %%mm4,%%mm6\n" /* mm6 = s(e*v|f*v|g*v|h*v) */
    113 "	movq %%mm3,(%0)\n"
    114 
    115 "	paddsw %%mm5,%%mm6\n" /* mm6 = adjust_volume(src)+dst */
    116 
    117 "	movq %%mm6,8(%0)\n"
    118 
    119 "	add $16,%0\n"
    120 
    121 "	dec %%edx\n"
    122 
    123 "	jnz .mixloopS16\n"
    124 
    125 "	emms\n"
    126 
    127 ".endS16:\n"
    128 	 :
    129 	 : "r" (dst), "r"(src),"m"(size),
    130 	 "m"(volume)
    131 	 : "eax","edx","memory"
    132 	 );
    133 }
    134 
    135 
    136 
    137 /*////////////////////////////////////////////// */
    138 /* Mixing for 8 bit signed buffers */
    139 /*////////////////////////////////////////////// */
    140 
    141 void SDL_MixAudio_MMX_S8(char* dst,char* src,unsigned int size,int volume)
    142 {
    143     __asm__ __volatile__ (
    144 
    145 "	movl %3,%%eax\n"	/* eax = volume */
    146 
    147 "	movd %%eax,%%mm0\n"
    148 "	movq %%mm0,%%mm1\n"
    149 "	psllq $16,%%mm0\n"
    150 "	por %%mm1,%%mm0\n"
    151 "	psllq $16,%%mm0\n"
    152 "	por %%mm1,%%mm0\n"
    153 "	psllq $16,%%mm0\n"
    154 "	por %%mm1,%%mm0\n"
    155 
    156 "	movl %2,%%edx\n"	/* edx = size */
    157 "	shr $3,%%edx\n"	/* process 8 bytes per iteration = 8 samples */
    158 
    159 "	cmp $0,%%edx\n"
    160 "	je .endS8\n"
    161 
    162 ".align 8\n"
    163 "	.mixloopS8:\n"
    164 
    165 "	pxor %%mm2,%%mm2\n"		/* mm2 = 0 */
    166 "	movq (%1),%%mm1\n"	/* mm1 = a|b|c|d|e|f|g|h */
    167 
    168 "	movq %%mm1,%%mm3\n" 	/* mm3 = a|b|c|d|e|f|g|h */
    169 
    170 	/* on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0 */
    171 "	pcmpgtb %%mm1,%%mm2\n"	/* mm2 = 11111111|00000000|00000000.... */
    172 
    173 "	punpckhbw %%mm2,%%mm1\n"	/* mm1 = 0|a|0|b|0|c|0|d */
    174 
    175 "	punpcklbw %%mm2,%%mm3\n"	/* mm3 = 0|e|0|f|0|g|0|h */
    176 "	movq (%0),%%mm2\n"	/* mm2 = destination */
    177 
    178 "	pmullw %%mm0,%%mm1\n"	/* mm1 = v*a|v*b|v*c|v*d */
    179 "	add $8,%1\n"
    180 
    181 "	pmullw %%mm0,%%mm3\n"	/* mm3 = v*e|v*f|v*g|v*h */
    182 "	psraw $7,%%mm1\n"		/* mm1 = v*a/128|v*b/128|v*c/128|v*d/128  */
    183 
    184 "	psraw $7,%%mm3\n"		/* mm3 = v*e/128|v*f/128|v*g/128|v*h/128 */
    185 
    186 "	packsswb %%mm1,%%mm3\n"	/* mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128 */
    187 
    188 "	paddsb %%mm2,%%mm3\n"	/* add to destination buffer */
    189 
    190 "	movq %%mm3,(%0)\n"	/* store back to ram */
    191 "	add $8,%0\n"
    192 
    193 "	dec %%edx\n"
    194 
    195 "	jnz .mixloopS8\n"
    196 
    197 ".endS8:\n"
    198 "	emms\n"
    199 	 :
    200 	 : "r" (dst), "r"(src),"m"(size),
    201 	 "m"(volume)
    202 	 : "eax","edx","memory"
    203 	 );
    204 }
    205 #endif
    206 
    207