1 /************************************************************************ 2 * Copyright (C) 2002-2009, Xiph.org Foundation 3 * Copyright (C) 2010, Robin Watts for Pinknoise Productions Ltd 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above 13 * copyright notice, this list of conditions and the following disclaimer 14 * in the documentation and/or other materials provided with the 15 * distribution. 16 * * Neither the names of the Xiph.org Foundation nor Pinknoise 17 * Productions Ltd nor the names of its contributors may be used to 18 * endorse or promote products derived from this software without 19 * specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 ************************************************************************ 33 34 function: arm7 and later wide math functions 35 36 ************************************************************************/ 37 38 #ifdef _ARM_ASSEM_ 39 40 #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_) 41 #define _V_WIDE_MATH 42 43 static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) { 44 int lo,hi; 45 asm volatile("smull\t%0, %1, %2, %3" 46 : "=&r"(lo),"=&r"(hi) 47 : "%r"(x),"r"(y) 48 : "cc"); 49 return(hi); 50 } 51 52 static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) { 53 return MULT32(x,y)<<1; 54 } 55 56 static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) { 57 int lo,hi; 58 asm volatile("smull %0, %1, %2, %3\n\t" 59 "movs %0, %0, lsr #15\n\t" 60 "adc %1, %0, %1, lsl #17\n\t" 61 : "=&r"(lo),"=&r"(hi) 62 : "%r"(x),"r"(y) 63 : "cc"); 64 return(hi); 65 } 66 67 #define MB() asm volatile ("" : : : "memory") 68 69 static inline void XPROD32(ogg_int32_t a, ogg_int32_t b, 70 ogg_int32_t t, ogg_int32_t v, 71 ogg_int32_t *x, ogg_int32_t *y) 72 { 73 int x1, y1, l; 74 asm( "smull %0, %1, %4, %6\n\t" 75 "smlal %0, %1, %5, %7\n\t" 76 "rsb %3, %4, #0\n\t" 77 "smull %0, %2, %5, %6\n\t" 78 "smlal %0, %2, %3, %7" 79 : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a) 80 : "3" (a), "r" (b), "r" (t), "r" (v) 81 : "cc" ); 82 *x = x1; 83 MB(); 84 *y = y1; 85 } 86 87 /* x = (a*t + b*v)>>31, y = (b*t - a*v)>>31 */ 88 static inline void XPROD31(ogg_int32_t a, ogg_int32_t b, 89 ogg_int32_t t, ogg_int32_t v, 90 ogg_int32_t *x, ogg_int32_t *y) 91 { 92 int x1, y1, l; 93 asm( "smull %0, %1, %4, %6\n\t" 94 "smlal %0, %1, %5, %7\n\t" 95 "rsb %3, %4, #0\n\t" 96 "smull %0, %2, %5, %6\n\t" 97 "smlal %0, %2, %3, %7" 98 : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a) 99 : "3" (a), "r" (b), "r" (t), "r" (v) 100 : "cc" ); 101 *x = x1 << 1; 102 MB(); 103 *y = y1 << 1; 104 } 105 106 /* x = (a*t - b*v)>>31, y = (b*t + a*v)>>31 */ 107 static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b, 108 ogg_int32_t t, ogg_int32_t v, 109 ogg_int32_t *x, ogg_int32_t *y) 110 { 111 int x1, y1, l; 112 asm( "rsb %2, %4, #0\n\t" 113 "smull %0, %1, %3, %5\n\t" 114 "smlal %0, %1, %2, %6\n\t" 115 "smull %0, %2, %4, %5\n\t" 116 "smlal %0, %2, %3, %6" 117 : "=&r" (l), "=&r" (x1), "=&r" (y1) 118 : "r" (a), "r" (b), "r" (t), "r" (v) 119 : "cc" ); 120 *x = x1 << 1; 121 MB(); 122 *y = y1 << 1; 123 } 124 125 #endif 126 127 #ifndef _V_CLIP_MATH 128 #define _V_CLIP_MATH 129 130 static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) { 131 int tmp; 132 asm volatile("subs %1, %0, #32768\n\t" 133 "movpl %0, #0x7f00\n\t" 134 "orrpl %0, %0, #0xff\n" 135 "adds %1, %0, #32768\n\t" 136 "movmi %0, #0x8000" 137 : "+r"(x),"=r"(tmp) 138 : 139 : "cc"); 140 return(x); 141 } 142 143 #endif 144 145 #ifndef _V_LSP_MATH_ASM 146 #define _V_LSP_MATH_ASM 147 148 static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip, 149 ogg_int32_t *qexpp, 150 ogg_int32_t *ilsp,ogg_int32_t wi, 151 ogg_int32_t m){ 152 153 ogg_uint32_t qi=*qip,pi=*pip; 154 ogg_int32_t qexp=*qexpp; 155 156 asm("mov r0,%3;" 157 "mov r1,%5,asr#1;" 158 "add r0,r0,r1,lsl#3;" 159 "1:" 160 161 "ldmdb r0!,{r1,r3};" 162 "subs r1,r1,%4;" //ilsp[j]-wi 163 "rsbmi r1,r1,#0;" //labs(ilsp[j]-wi) 164 "umull %0,r2,r1,%0;" //qi*=labs(ilsp[j]-wi) 165 166 "subs r1,r3,%4;" //ilsp[j+1]-wi 167 "rsbmi r1,r1,#0;" //labs(ilsp[j+1]-wi) 168 "umull %1,r3,r1,%1;" //pi*=labs(ilsp[j+1]-wi) 169 170 "cmn r2,r3;" // shift down 16? 171 "beq 0f;" 172 "add %2,%2,#16;" 173 "mov %0,%0,lsr #16;" 174 "orr %0,%0,r2,lsl #16;" 175 "mov %1,%1,lsr #16;" 176 "orr %1,%1,r3,lsl #16;" 177 "0:" 178 "cmp r0,%3;\n" 179 "bhi 1b;\n" 180 181 // odd filter assymetry 182 "ands r0,%5,#1;\n" 183 "beq 2f;\n" 184 "add r0,%3,%5,lsl#2;\n" 185 186 "ldr r1,[r0,#-4];\n" 187 "mov r0,#0x4000;\n" 188 189 "subs r1,r1,%4;\n" //ilsp[j]-wi 190 "rsbmi r1,r1,#0;\n" //labs(ilsp[j]-wi) 191 "umull %0,r2,r1,%0;\n" //qi*=labs(ilsp[j]-wi) 192 "umull %1,r3,r0,%1;\n" //pi*=labs(ilsp[j+1]-wi) 193 194 "cmn r2,r3;\n" // shift down 16? 195 "beq 2f;\n" 196 "add %2,%2,#16;\n" 197 "mov %0,%0,lsr #16;\n" 198 "orr %0,%0,r2,lsl #16;\n" 199 "mov %1,%1,lsr #16;\n" 200 "orr %1,%1,r3,lsl #16;\n" 201 202 //qi=(pi>>shift)*labs(ilsp[j]-wi); 203 //pi=(qi>>shift)*labs(ilsp[j+1]-wi); 204 //qexp+=shift; 205 206 //} 207 208 /* normalize to max 16 sig figs */ 209 "2:" 210 "mov r2,#0;" 211 "orr r1,%0,%1;" 212 "tst r1,#0xff000000;" 213 "addne r2,r2,#8;" 214 "movne r1,r1,lsr #8;" 215 "tst r1,#0x00f00000;" 216 "addne r2,r2,#4;" 217 "movne r1,r1,lsr #4;" 218 "tst r1,#0x000c0000;" 219 "addne r2,r2,#2;" 220 "movne r1,r1,lsr #2;" 221 "tst r1,#0x00020000;" 222 "addne r2,r2,#1;" 223 "movne r1,r1,lsr #1;" 224 "tst r1,#0x00010000;" 225 "addne r2,r2,#1;" 226 "mov %0,%0,lsr r2;" 227 "mov %1,%1,lsr r2;" 228 "add %2,%2,r2;" 229 230 : "+r"(qi),"+r"(pi),"+r"(qexp) 231 : "r"(ilsp),"r"(wi),"r"(m) 232 : "r0","r1","r2","r3","cc"); 233 234 *qip=qi; 235 *pip=pi; 236 *qexpp=qexp; 237 } 238 239 static inline void lsp_norm_asm(ogg_uint32_t *qip,ogg_int32_t *qexpp){ 240 241 ogg_uint32_t qi=*qip; 242 ogg_int32_t qexp=*qexpp; 243 244 asm("tst %0,#0x0000ff00;" 245 "moveq %0,%0,lsl #8;" 246 "subeq %1,%1,#8;" 247 "tst %0,#0x0000f000;" 248 "moveq %0,%0,lsl #4;" 249 "subeq %1,%1,#4;" 250 "tst %0,#0x0000c000;" 251 "moveq %0,%0,lsl #2;" 252 "subeq %1,%1,#2;" 253 "tst %0,#0x00008000;" 254 "moveq %0,%0,lsl #1;" 255 "subeq %1,%1,#1;" 256 : "+r"(qi),"+r"(qexp) 257 : 258 : "cc"); 259 *qip=qi; 260 *qexpp=qexp; 261 } 262 263 #endif 264 #endif 265 266