1 /* 2 SDL - Simple DirectMedia Layer 3 Copyright (C) 1997-2006 Sam Lantinga 4 5 This library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 This library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with this library; if not, write to the Free Software 17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 19 Sam Lantinga 20 slouken (at) libsdl.org 21 */ 22 #include "SDL_config.h" 23 24 /* 25 MMX assembler version of SDL_MixAudio for signed little endian 16 bit samples and signed 8 bit samples 26 Copyright 2002 Stephane Marchesin (stephane.marchesin (at) wanadoo.fr) 27 This code is licensed under the LGPL (see COPYING for details) 28 29 Assumes buffer size in bytes is a multiple of 16 30 Assumes SDL_MIX_MAXVOLUME = 128 31 */ 32 33 34 /*********************************************** 35 * Mixing for 16 bit signed buffers 36 ***********************************************/ 37 38 #if defined(__GNUC__) && defined(__i386__) && defined(SDL_ASSEMBLY_ROUTINES) 39 void SDL_MixAudio_MMX_S16(char* dst,char* src,unsigned int size,int volume) 40 { 41 __asm__ __volatile__ ( 42 43 " movl %3,%%eax\n" /* eax = volume */ 44 45 " movl %2,%%edx\n" /* edx = size */ 46 47 " shrl $4,%%edx\n" /* process 16 bytes per iteration = 8 samples */ 48 49 " jz .endS16\n" 50 51 " pxor %%mm0,%%mm0\n" 52 53 " movd %%eax,%%mm0\n" 54 " movq %%mm0,%%mm1\n" 55 " psllq $16,%%mm0\n" 56 " por %%mm1,%%mm0\n" 57 " psllq $16,%%mm0\n" 58 " por %%mm1,%%mm0\n" 59 " psllq $16,%%mm0\n" 60 " por %%mm1,%%mm0\n" /* mm0 = vol|vol|vol|vol */ 61 62 ".align 8\n" 63 " .mixloopS16:\n" 64 65 " movq (%1),%%mm1\n" /* mm1 = a|b|c|d */ 66 67 " movq %%mm1,%%mm2\n" /* mm2 = a|b|c|d */ 68 69 " movq 8(%1),%%mm4\n" /* mm4 = e|f|g|h */ 70 71 /* pr charger le buffer dst dans mm7 */ 72 " movq (%0),%%mm7\n" /* mm7 = dst[0] */ 73 74 /* multiplier par le volume */ 75 " pmullw %%mm0,%%mm1\n" /* mm1 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */ 76 77 " pmulhw %%mm0,%%mm2\n" /* mm2 = h(a*v)|h(b*v)|h(c*v)|h(d*v) */ 78 " movq %%mm4,%%mm5\n" /* mm5 = e|f|g|h */ 79 80 " pmullw %%mm0,%%mm4\n" /* mm4 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */ 81 82 " pmulhw %%mm0,%%mm5\n" /* mm5 = h(e*v)|h(f*v)|h(g*v)|h(h*v) */ 83 " movq %%mm1,%%mm3\n" /* mm3 = l(a*v)|l(b*v)|l(c*v)|l(d*v) */ 84 85 " punpckhwd %%mm2,%%mm1\n" /* mm1 = a*v|b*v */ 86 87 " movq %%mm4,%%mm6\n" /* mm6 = l(e*v)|l(f*v)|l(g*v)|l(h*v) */ 88 " punpcklwd %%mm2,%%mm3\n" /* mm3 = c*v|d*v */ 89 90 " punpckhwd %%mm5,%%mm4\n" /* mm4 = e*f|f*v */ 91 92 " punpcklwd %%mm5,%%mm6\n" /* mm6 = g*v|h*v */ 93 94 /* pr charger le buffer dst dans mm5 */ 95 " movq 8(%0),%%mm5\n" /* mm5 = dst[1] */ 96 97 /* diviser par 128 */ 98 " psrad $7,%%mm1\n" /* mm1 = a*v/128|b*v/128 , 128 = SDL_MIX_MAXVOLUME */ 99 " add $16,%1\n" 100 101 " psrad $7,%%mm3\n" /* mm3 = c*v/128|d*v/128 */ 102 103 " psrad $7,%%mm4\n" /* mm4 = e*v/128|f*v/128 */ 104 105 /* mm1 = le sample avec le volume modifi */ 106 " packssdw %%mm1,%%mm3\n" /* mm3 = s(a*v|b*v|c*v|d*v) */ 107 108 " psrad $7,%%mm6\n" /* mm6= g*v/128|h*v/128 */ 109 " paddsw %%mm7,%%mm3\n" /* mm3 = adjust_volume(src)+dst */ 110 111 /* mm4 = le sample avec le volume modifi */ 112 " packssdw %%mm4,%%mm6\n" /* mm6 = s(e*v|f*v|g*v|h*v) */ 113 " movq %%mm3,(%0)\n" 114 115 " paddsw %%mm5,%%mm6\n" /* mm6 = adjust_volume(src)+dst */ 116 117 " movq %%mm6,8(%0)\n" 118 119 " add $16,%0\n" 120 121 " dec %%edx\n" 122 123 " jnz .mixloopS16\n" 124 125 " emms\n" 126 127 ".endS16:\n" 128 : 129 : "r" (dst), "r"(src),"m"(size), 130 "m"(volume) 131 : "eax","edx","memory" 132 ); 133 } 134 135 136 137 /*////////////////////////////////////////////// */ 138 /* Mixing for 8 bit signed buffers */ 139 /*////////////////////////////////////////////// */ 140 141 void SDL_MixAudio_MMX_S8(char* dst,char* src,unsigned int size,int volume) 142 { 143 __asm__ __volatile__ ( 144 145 " movl %3,%%eax\n" /* eax = volume */ 146 147 " movd %%eax,%%mm0\n" 148 " movq %%mm0,%%mm1\n" 149 " psllq $16,%%mm0\n" 150 " por %%mm1,%%mm0\n" 151 " psllq $16,%%mm0\n" 152 " por %%mm1,%%mm0\n" 153 " psllq $16,%%mm0\n" 154 " por %%mm1,%%mm0\n" 155 156 " movl %2,%%edx\n" /* edx = size */ 157 " shr $3,%%edx\n" /* process 8 bytes per iteration = 8 samples */ 158 159 " cmp $0,%%edx\n" 160 " je .endS8\n" 161 162 ".align 8\n" 163 " .mixloopS8:\n" 164 165 " pxor %%mm2,%%mm2\n" /* mm2 = 0 */ 166 " movq (%1),%%mm1\n" /* mm1 = a|b|c|d|e|f|g|h */ 167 168 " movq %%mm1,%%mm3\n" /* mm3 = a|b|c|d|e|f|g|h */ 169 170 /* on va faire le "sign extension" en faisant un cmp avec 0 qui retourne 1 si <0, 0 si >0 */ 171 " pcmpgtb %%mm1,%%mm2\n" /* mm2 = 11111111|00000000|00000000.... */ 172 173 " punpckhbw %%mm2,%%mm1\n" /* mm1 = 0|a|0|b|0|c|0|d */ 174 175 " punpcklbw %%mm2,%%mm3\n" /* mm3 = 0|e|0|f|0|g|0|h */ 176 " movq (%0),%%mm2\n" /* mm2 = destination */ 177 178 " pmullw %%mm0,%%mm1\n" /* mm1 = v*a|v*b|v*c|v*d */ 179 " add $8,%1\n" 180 181 " pmullw %%mm0,%%mm3\n" /* mm3 = v*e|v*f|v*g|v*h */ 182 " psraw $7,%%mm1\n" /* mm1 = v*a/128|v*b/128|v*c/128|v*d/128 */ 183 184 " psraw $7,%%mm3\n" /* mm3 = v*e/128|v*f/128|v*g/128|v*h/128 */ 185 186 " packsswb %%mm1,%%mm3\n" /* mm1 = v*a/128|v*b/128|v*c/128|v*d/128|v*e/128|v*f/128|v*g/128|v*h/128 */ 187 188 " paddsb %%mm2,%%mm3\n" /* add to destination buffer */ 189 190 " movq %%mm3,(%0)\n" /* store back to ram */ 191 " add $8,%0\n" 192 193 " dec %%edx\n" 194 195 " jnz .mixloopS8\n" 196 197 ".endS8:\n" 198 " emms\n" 199 : 200 : "r" (dst), "r"(src),"m"(size), 201 "m"(volume) 202 : "eax","edx","memory" 203 ); 204 } 205 #endif 206 207