1 /* 2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_processing/aec/aec_rdft.h" 12 #include "webrtc/typedefs.h" 13 14 static void bitrv2_128_mips(float* a) { 15 // n is 128 16 float xr, xi, yr, yi; 17 18 xr = a[8]; 19 xi = a[9]; 20 yr = a[16]; 21 yi = a[17]; 22 a[8] = yr; 23 a[9] = yi; 24 a[16] = xr; 25 a[17] = xi; 26 27 xr = a[64]; 28 xi = a[65]; 29 yr = a[2]; 30 yi = a[3]; 31 a[64] = yr; 32 a[65] = yi; 33 a[2] = xr; 34 a[3] = xi; 35 36 xr = a[72]; 37 xi = a[73]; 38 yr = a[18]; 39 yi = a[19]; 40 a[72] = yr; 41 a[73] = yi; 42 a[18] = xr; 43 a[19] = xi; 44 45 xr = a[80]; 46 xi = a[81]; 47 yr = a[10]; 48 yi = a[11]; 49 a[80] = yr; 50 a[81] = yi; 51 a[10] = xr; 52 a[11] = xi; 53 54 xr = a[88]; 55 xi = a[89]; 56 yr = a[26]; 57 yi = a[27]; 58 a[88] = yr; 59 a[89] = yi; 60 a[26] = xr; 61 a[27] = xi; 62 63 xr = a[74]; 64 xi = a[75]; 65 yr = a[82]; 66 yi = a[83]; 67 a[74] = yr; 68 a[75] = yi; 69 a[82] = xr; 70 a[83] = xi; 71 72 xr = a[32]; 73 xi = a[33]; 74 yr = a[4]; 75 yi = a[5]; 76 a[32] = yr; 77 a[33] = yi; 78 a[4] = xr; 79 a[5] = xi; 80 81 xr = a[40]; 82 xi = a[41]; 83 yr = a[20]; 84 yi = a[21]; 85 a[40] = yr; 86 a[41] = yi; 87 a[20] = xr; 88 a[21] = xi; 89 90 xr = a[48]; 91 xi = a[49]; 92 yr = a[12]; 93 yi = a[13]; 94 a[48] = yr; 95 a[49] = yi; 96 a[12] = xr; 97 a[13] = xi; 98 99 xr = a[56]; 100 xi = a[57]; 101 yr = a[28]; 102 yi = a[29]; 103 a[56] = yr; 104 a[57] = yi; 105 a[28] = xr; 106 a[29] = xi; 107 108 xr = a[34]; 109 xi = a[35]; 110 yr = a[68]; 111 yi = a[69]; 112 a[34] = yr; 113 a[35] = yi; 114 a[68] = xr; 115 a[69] = xi; 116 117 xr = a[42]; 118 xi = a[43]; 119 yr = a[84]; 120 yi = a[85]; 121 a[42] = yr; 122 a[43] = yi; 123 a[84] = xr; 124 a[85] = xi; 125 126 xr = a[50]; 127 xi = a[51]; 128 yr = a[76]; 129 yi = a[77]; 130 a[50] = yr; 131 a[51] = yi; 132 a[76] = xr; 133 a[77] = xi; 134 135 xr = a[58]; 136 xi = a[59]; 137 yr = a[92]; 138 yi = a[93]; 139 a[58] = yr; 140 a[59] = yi; 141 a[92] = xr; 142 a[93] = xi; 143 144 xr = a[44]; 145 xi = a[45]; 146 yr = a[52]; 147 yi = a[53]; 148 a[44] = yr; 149 a[45] = yi; 150 a[52] = xr; 151 a[53] = xi; 152 153 xr = a[96]; 154 xi = a[97]; 155 yr = a[6]; 156 yi = a[7]; 157 a[96] = yr; 158 a[97] = yi; 159 a[6] = xr; 160 a[7] = xi; 161 162 xr = a[104]; 163 xi = a[105]; 164 yr = a[22]; 165 yi = a[23]; 166 a[104] = yr; 167 a[105] = yi; 168 a[22] = xr; 169 a[23] = xi; 170 171 xr = a[112]; 172 xi = a[113]; 173 yr = a[14]; 174 yi = a[15]; 175 a[112] = yr; 176 a[113] = yi; 177 a[14] = xr; 178 a[15] = xi; 179 180 xr = a[120]; 181 xi = a[121]; 182 yr = a[30]; 183 yi = a[31]; 184 a[120] = yr; 185 a[121] = yi; 186 a[30] = xr; 187 a[31] = xi; 188 189 xr = a[98]; 190 xi = a[99]; 191 yr = a[70]; 192 yi = a[71]; 193 a[98] = yr; 194 a[99] = yi; 195 a[70] = xr; 196 a[71] = xi; 197 198 xr = a[106]; 199 xi = a[107]; 200 yr = a[86]; 201 yi = a[87]; 202 a[106] = yr; 203 a[107] = yi; 204 a[86] = xr; 205 a[87] = xi; 206 207 xr = a[114]; 208 xi = a[115]; 209 yr = a[78]; 210 yi = a[79]; 211 a[114] = yr; 212 a[115] = yi; 213 a[78] = xr; 214 a[79] = xi; 215 216 xr = a[122]; 217 xi = a[123]; 218 yr = a[94]; 219 yi = a[95]; 220 a[122] = yr; 221 a[123] = yi; 222 a[94] = xr; 223 a[95] = xi; 224 225 xr = a[100]; 226 xi = a[101]; 227 yr = a[38]; 228 yi = a[39]; 229 a[100] = yr; 230 a[101] = yi; 231 a[38] = xr; 232 a[39] = xi; 233 234 xr = a[108]; 235 xi = a[109]; 236 yr = a[54]; 237 yi = a[55]; 238 a[108] = yr; 239 a[109] = yi; 240 a[54] = xr; 241 a[55] = xi; 242 243 xr = a[116]; 244 xi = a[117]; 245 yr = a[46]; 246 yi = a[47]; 247 a[116] = yr; 248 a[117] = yi; 249 a[46] = xr; 250 a[47] = xi; 251 252 xr = a[124]; 253 xi = a[125]; 254 yr = a[62]; 255 yi = a[63]; 256 a[124] = yr; 257 a[125] = yi; 258 a[62] = xr; 259 a[63] = xi; 260 261 xr = a[110]; 262 xi = a[111]; 263 yr = a[118]; 264 yi = a[119]; 265 a[110] = yr; 266 a[111] = yi; 267 a[118] = xr; 268 a[119] = xi; 269 } 270 271 static void cft1st_128_mips(float* a) { 272 float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14; 273 int a_ptr, p1_rdft, p2_rdft, count; 274 const float* first = rdft_wk3ri_first; 275 const float* second = rdft_wk3ri_second; 276 277 __asm __volatile ( 278 ".set push \n\t" 279 ".set noreorder \n\t" 280 // first 8 281 "lwc1 %[f0], 0(%[a]) \n\t" 282 "lwc1 %[f1], 4(%[a]) \n\t" 283 "lwc1 %[f2], 8(%[a]) \n\t" 284 "lwc1 %[f3], 12(%[a]) \n\t" 285 "lwc1 %[f4], 16(%[a]) \n\t" 286 "lwc1 %[f5], 20(%[a]) \n\t" 287 "lwc1 %[f6], 24(%[a]) \n\t" 288 "lwc1 %[f7], 28(%[a]) \n\t" 289 "add.s %[f8], %[f0], %[f2] \n\t" 290 "sub.s %[f0], %[f0], %[f2] \n\t" 291 "add.s %[f2], %[f4], %[f6] \n\t" 292 "sub.s %[f4], %[f4], %[f6] \n\t" 293 "add.s %[f6], %[f1], %[f3] \n\t" 294 "sub.s %[f1], %[f1], %[f3] \n\t" 295 "add.s %[f3], %[f5], %[f7] \n\t" 296 "sub.s %[f5], %[f5], %[f7] \n\t" 297 "add.s %[f7], %[f8], %[f2] \n\t" 298 "sub.s %[f8], %[f8], %[f2] \n\t" 299 "sub.s %[f2], %[f1], %[f4] \n\t" 300 "add.s %[f1], %[f1], %[f4] \n\t" 301 "add.s %[f4], %[f6], %[f3] \n\t" 302 "sub.s %[f6], %[f6], %[f3] \n\t" 303 "sub.s %[f3], %[f0], %[f5] \n\t" 304 "add.s %[f0], %[f0], %[f5] \n\t" 305 "swc1 %[f7], 0(%[a]) \n\t" 306 "swc1 %[f8], 16(%[a]) \n\t" 307 "swc1 %[f2], 28(%[a]) \n\t" 308 "swc1 %[f1], 12(%[a]) \n\t" 309 "swc1 %[f4], 4(%[a]) \n\t" 310 "swc1 %[f6], 20(%[a]) \n\t" 311 "swc1 %[f3], 8(%[a]) \n\t" 312 "swc1 %[f0], 24(%[a]) \n\t" 313 // second 8 314 "lwc1 %[f0], 32(%[a]) \n\t" 315 "lwc1 %[f1], 36(%[a]) \n\t" 316 "lwc1 %[f2], 40(%[a]) \n\t" 317 "lwc1 %[f3], 44(%[a]) \n\t" 318 "lwc1 %[f4], 48(%[a]) \n\t" 319 "lwc1 %[f5], 52(%[a]) \n\t" 320 "lwc1 %[f6], 56(%[a]) \n\t" 321 "lwc1 %[f7], 60(%[a]) \n\t" 322 "add.s %[f8], %[f4], %[f6] \n\t" 323 "sub.s %[f4], %[f4], %[f6] \n\t" 324 "add.s %[f6], %[f1], %[f3] \n\t" 325 "sub.s %[f1], %[f1], %[f3] \n\t" 326 "add.s %[f3], %[f0], %[f2] \n\t" 327 "sub.s %[f0], %[f0], %[f2] \n\t" 328 "add.s %[f2], %[f5], %[f7] \n\t" 329 "sub.s %[f5], %[f5], %[f7] \n\t" 330 "add.s %[f7], %[f4], %[f1] \n\t" 331 "sub.s %[f4], %[f4], %[f1] \n\t" 332 "add.s %[f1], %[f3], %[f8] \n\t" 333 "sub.s %[f3], %[f3], %[f8] \n\t" 334 "sub.s %[f8], %[f0], %[f5] \n\t" 335 "add.s %[f0], %[f0], %[f5] \n\t" 336 "add.s %[f5], %[f6], %[f2] \n\t" 337 "sub.s %[f6], %[f2], %[f6] \n\t" 338 "lwc1 %[f9], 8(%[rdft_w]) \n\t" 339 "sub.s %[f2], %[f8], %[f7] \n\t" 340 "add.s %[f8], %[f8], %[f7] \n\t" 341 "sub.s %[f7], %[f4], %[f0] \n\t" 342 "add.s %[f4], %[f4], %[f0] \n\t" 343 // prepare for loop 344 "addiu %[a_ptr], %[a], 64 \n\t" 345 "addiu %[p1_rdft], %[rdft_w], 8 \n\t" 346 "addiu %[p2_rdft], %[rdft_w], 16 \n\t" 347 "addiu %[count], $zero, 7 \n\t" 348 // finish second 8 349 "mul.s %[f2], %[f9], %[f2] \n\t" 350 "mul.s %[f8], %[f9], %[f8] \n\t" 351 "mul.s %[f7], %[f9], %[f7] \n\t" 352 "mul.s %[f4], %[f9], %[f4] \n\t" 353 "swc1 %[f1], 32(%[a]) \n\t" 354 "swc1 %[f3], 52(%[a]) \n\t" 355 "swc1 %[f5], 36(%[a]) \n\t" 356 "swc1 %[f6], 48(%[a]) \n\t" 357 "swc1 %[f2], 40(%[a]) \n\t" 358 "swc1 %[f8], 44(%[a]) \n\t" 359 "swc1 %[f7], 56(%[a]) \n\t" 360 "swc1 %[f4], 60(%[a]) \n\t" 361 // loop 362 "1: \n\t" 363 "lwc1 %[f0], 0(%[a_ptr]) \n\t" 364 "lwc1 %[f1], 4(%[a_ptr]) \n\t" 365 "lwc1 %[f2], 8(%[a_ptr]) \n\t" 366 "lwc1 %[f3], 12(%[a_ptr]) \n\t" 367 "lwc1 %[f4], 16(%[a_ptr]) \n\t" 368 "lwc1 %[f5], 20(%[a_ptr]) \n\t" 369 "lwc1 %[f6], 24(%[a_ptr]) \n\t" 370 "lwc1 %[f7], 28(%[a_ptr]) \n\t" 371 "add.s %[f8], %[f0], %[f2] \n\t" 372 "sub.s %[f0], %[f0], %[f2] \n\t" 373 "add.s %[f2], %[f4], %[f6] \n\t" 374 "sub.s %[f4], %[f4], %[f6] \n\t" 375 "add.s %[f6], %[f1], %[f3] \n\t" 376 "sub.s %[f1], %[f1], %[f3] \n\t" 377 "add.s %[f3], %[f5], %[f7] \n\t" 378 "sub.s %[f5], %[f5], %[f7] \n\t" 379 "lwc1 %[f10], 4(%[p1_rdft]) \n\t" 380 "lwc1 %[f11], 0(%[p2_rdft]) \n\t" 381 "lwc1 %[f12], 4(%[p2_rdft]) \n\t" 382 "lwc1 %[f13], 8(%[first]) \n\t" 383 "lwc1 %[f14], 12(%[first]) \n\t" 384 "add.s %[f7], %[f8], %[f2] \n\t" 385 "sub.s %[f8], %[f8], %[f2] \n\t" 386 "add.s %[f2], %[f6], %[f3] \n\t" 387 "sub.s %[f6], %[f6], %[f3] \n\t" 388 "add.s %[f3], %[f0], %[f5] \n\t" 389 "sub.s %[f0], %[f0], %[f5] \n\t" 390 "add.s %[f5], %[f1], %[f4] \n\t" 391 "sub.s %[f1], %[f1], %[f4] \n\t" 392 "swc1 %[f7], 0(%[a_ptr]) \n\t" 393 "swc1 %[f2], 4(%[a_ptr]) \n\t" 394 "mul.s %[f4], %[f9], %[f8] \n\t" 395 #if defined(MIPS32_R2_LE) 396 "mul.s %[f8], %[f10], %[f8] \n\t" 397 "mul.s %[f7], %[f11], %[f0] \n\t" 398 "mul.s %[f0], %[f12], %[f0] \n\t" 399 "mul.s %[f2], %[f13], %[f3] \n\t" 400 "mul.s %[f3], %[f14], %[f3] \n\t" 401 "nmsub.s %[f4], %[f4], %[f10], %[f6] \n\t" 402 "madd.s %[f8], %[f8], %[f9], %[f6] \n\t" 403 "nmsub.s %[f7], %[f7], %[f12], %[f5] \n\t" 404 "madd.s %[f0], %[f0], %[f11], %[f5] \n\t" 405 "nmsub.s %[f2], %[f2], %[f14], %[f1] \n\t" 406 "madd.s %[f3], %[f3], %[f13], %[f1] \n\t" 407 #else 408 "mul.s %[f7], %[f10], %[f6] \n\t" 409 "mul.s %[f6], %[f9], %[f6] \n\t" 410 "mul.s %[f8], %[f10], %[f8] \n\t" 411 "mul.s %[f2], %[f11], %[f0] \n\t" 412 "mul.s %[f11], %[f11], %[f5] \n\t" 413 "mul.s %[f5], %[f12], %[f5] \n\t" 414 "mul.s %[f0], %[f12], %[f0] \n\t" 415 "mul.s %[f12], %[f13], %[f3] \n\t" 416 "mul.s %[f13], %[f13], %[f1] \n\t" 417 "mul.s %[f1], %[f14], %[f1] \n\t" 418 "mul.s %[f3], %[f14], %[f3] \n\t" 419 "sub.s %[f4], %[f4], %[f7] \n\t" 420 "add.s %[f8], %[f6], %[f8] \n\t" 421 "sub.s %[f7], %[f2], %[f5] \n\t" 422 "add.s %[f0], %[f11], %[f0] \n\t" 423 "sub.s %[f2], %[f12], %[f1] \n\t" 424 "add.s %[f3], %[f13], %[f3] \n\t" 425 #endif 426 "swc1 %[f4], 16(%[a_ptr]) \n\t" 427 "swc1 %[f8], 20(%[a_ptr]) \n\t" 428 "swc1 %[f7], 8(%[a_ptr]) \n\t" 429 "swc1 %[f0], 12(%[a_ptr]) \n\t" 430 "swc1 %[f2], 24(%[a_ptr]) \n\t" 431 "swc1 %[f3], 28(%[a_ptr]) \n\t" 432 "lwc1 %[f0], 32(%[a_ptr]) \n\t" 433 "lwc1 %[f1], 36(%[a_ptr]) \n\t" 434 "lwc1 %[f2], 40(%[a_ptr]) \n\t" 435 "lwc1 %[f3], 44(%[a_ptr]) \n\t" 436 "lwc1 %[f4], 48(%[a_ptr]) \n\t" 437 "lwc1 %[f5], 52(%[a_ptr]) \n\t" 438 "lwc1 %[f6], 56(%[a_ptr]) \n\t" 439 "lwc1 %[f7], 60(%[a_ptr]) \n\t" 440 "add.s %[f8], %[f0], %[f2] \n\t" 441 "sub.s %[f0], %[f0], %[f2] \n\t" 442 "add.s %[f2], %[f4], %[f6] \n\t" 443 "sub.s %[f4], %[f4], %[f6] \n\t" 444 "add.s %[f6], %[f1], %[f3] \n\t" 445 "sub.s %[f1], %[f1], %[f3] \n\t" 446 "add.s %[f3], %[f5], %[f7] \n\t" 447 "sub.s %[f5], %[f5], %[f7] \n\t" 448 "lwc1 %[f11], 8(%[p2_rdft]) \n\t" 449 "lwc1 %[f12], 12(%[p2_rdft]) \n\t" 450 "lwc1 %[f13], 8(%[second]) \n\t" 451 "lwc1 %[f14], 12(%[second]) \n\t" 452 "add.s %[f7], %[f8], %[f2] \n\t" 453 "sub.s %[f8], %[f2], %[f8] \n\t" 454 "add.s %[f2], %[f6], %[f3] \n\t" 455 "sub.s %[f6], %[f3], %[f6] \n\t" 456 "add.s %[f3], %[f0], %[f5] \n\t" 457 "sub.s %[f0], %[f0], %[f5] \n\t" 458 "add.s %[f5], %[f1], %[f4] \n\t" 459 "sub.s %[f1], %[f1], %[f4] \n\t" 460 "swc1 %[f7], 32(%[a_ptr]) \n\t" 461 "swc1 %[f2], 36(%[a_ptr]) \n\t" 462 "mul.s %[f4], %[f10], %[f8] \n\t" 463 #if defined(MIPS32_R2_LE) 464 "mul.s %[f10], %[f10], %[f6] \n\t" 465 "mul.s %[f7], %[f11], %[f0] \n\t" 466 "mul.s %[f11], %[f11], %[f5] \n\t" 467 "mul.s %[f2], %[f13], %[f3] \n\t" 468 "mul.s %[f13], %[f13], %[f1] \n\t" 469 "madd.s %[f4], %[f4], %[f9], %[f6] \n\t" 470 "nmsub.s %[f10], %[f10], %[f9], %[f8] \n\t" 471 "nmsub.s %[f7], %[f7], %[f12], %[f5] \n\t" 472 "madd.s %[f11], %[f11], %[f12], %[f0] \n\t" 473 "nmsub.s %[f2], %[f2], %[f14], %[f1] \n\t" 474 "madd.s %[f13], %[f13], %[f14], %[f3] \n\t" 475 #else 476 "mul.s %[f2], %[f9], %[f6] \n\t" 477 "mul.s %[f10], %[f10], %[f6] \n\t" 478 "mul.s %[f9], %[f9], %[f8] \n\t" 479 "mul.s %[f7], %[f11], %[f0] \n\t" 480 "mul.s %[f8], %[f12], %[f5] \n\t" 481 "mul.s %[f11], %[f11], %[f5] \n\t" 482 "mul.s %[f12], %[f12], %[f0] \n\t" 483 "mul.s %[f5], %[f13], %[f3] \n\t" 484 "mul.s %[f0], %[f14], %[f1] \n\t" 485 "mul.s %[f13], %[f13], %[f1] \n\t" 486 "mul.s %[f14], %[f14], %[f3] \n\t" 487 "add.s %[f4], %[f4], %[f2] \n\t" 488 "sub.s %[f10], %[f10], %[f9] \n\t" 489 "sub.s %[f7], %[f7], %[f8] \n\t" 490 "add.s %[f11], %[f11], %[f12] \n\t" 491 "sub.s %[f2], %[f5], %[f0] \n\t" 492 "add.s %[f13], %[f13], %[f14] \n\t" 493 #endif 494 "swc1 %[f4], 48(%[a_ptr]) \n\t" 495 "swc1 %[f10], 52(%[a_ptr]) \n\t" 496 "swc1 %[f7], 40(%[a_ptr]) \n\t" 497 "swc1 %[f11], 44(%[a_ptr]) \n\t" 498 "swc1 %[f2], 56(%[a_ptr]) \n\t" 499 "swc1 %[f13], 60(%[a_ptr]) \n\t" 500 "addiu %[count], %[count], -1 \n\t" 501 "lwc1 %[f9], 8(%[p1_rdft]) \n\t" 502 "addiu %[a_ptr], %[a_ptr], 64 \n\t" 503 "addiu %[p1_rdft], %[p1_rdft], 8 \n\t" 504 "addiu %[p2_rdft], %[p2_rdft], 16 \n\t" 505 "addiu %[first], %[first], 8 \n\t" 506 "bgtz %[count], 1b \n\t" 507 " addiu %[second], %[second], 8 \n\t" 508 ".set pop \n\t" 509 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), 510 [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), 511 [f8] "=&f" (f8), [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), 512 [f12] "=&f" (f12), [f13] "=&f" (f13), [f14] "=&f" (f14), 513 [a_ptr] "=&r" (a_ptr), [p1_rdft] "=&r" (p1_rdft), [first] "+r" (first), 514 [p2_rdft] "=&r" (p2_rdft), [count] "=&r" (count), [second] "+r" (second) 515 : [a] "r" (a), [rdft_w] "r" (rdft_w) 516 : "memory" 517 ); 518 } 519 520 static void cftmdl_128_mips(float* a) { 521 float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14; 522 int tmp_a, count; 523 __asm __volatile ( 524 ".set push \n\t" 525 ".set noreorder \n\t" 526 "addiu %[tmp_a], %[a], 0 \n\t" 527 "addiu %[count], $zero, 4 \n\t" 528 "1: \n\t" 529 "addiu %[count], %[count], -1 \n\t" 530 "lwc1 %[f0], 0(%[tmp_a]) \n\t" 531 "lwc1 %[f2], 32(%[tmp_a]) \n\t" 532 "lwc1 %[f4], 64(%[tmp_a]) \n\t" 533 "lwc1 %[f6], 96(%[tmp_a]) \n\t" 534 "lwc1 %[f1], 4(%[tmp_a]) \n\t" 535 "lwc1 %[f3], 36(%[tmp_a]) \n\t" 536 "lwc1 %[f5], 68(%[tmp_a]) \n\t" 537 "lwc1 %[f7], 100(%[tmp_a]) \n\t" 538 "add.s %[f8], %[f0], %[f2] \n\t" 539 "sub.s %[f0], %[f0], %[f2] \n\t" 540 "add.s %[f2], %[f4], %[f6] \n\t" 541 "sub.s %[f4], %[f4], %[f6] \n\t" 542 "add.s %[f6], %[f1], %[f3] \n\t" 543 "sub.s %[f1], %[f1], %[f3] \n\t" 544 "add.s %[f3], %[f5], %[f7] \n\t" 545 "sub.s %[f5], %[f5], %[f7] \n\t" 546 "add.s %[f7], %[f8], %[f2] \n\t" 547 "sub.s %[f8], %[f8], %[f2] \n\t" 548 "add.s %[f2], %[f1], %[f4] \n\t" 549 "sub.s %[f1], %[f1], %[f4] \n\t" 550 "add.s %[f4], %[f6], %[f3] \n\t" 551 "sub.s %[f6], %[f6], %[f3] \n\t" 552 "sub.s %[f3], %[f0], %[f5] \n\t" 553 "add.s %[f0], %[f0], %[f5] \n\t" 554 "swc1 %[f7], 0(%[tmp_a]) \n\t" 555 "swc1 %[f8], 64(%[tmp_a]) \n\t" 556 "swc1 %[f2], 36(%[tmp_a]) \n\t" 557 "swc1 %[f1], 100(%[tmp_a]) \n\t" 558 "swc1 %[f4], 4(%[tmp_a]) \n\t" 559 "swc1 %[f6], 68(%[tmp_a]) \n\t" 560 "swc1 %[f3], 32(%[tmp_a]) \n\t" 561 "swc1 %[f0], 96(%[tmp_a]) \n\t" 562 "bgtz %[count], 1b \n\t" 563 " addiu %[tmp_a], %[tmp_a], 8 \n\t" 564 ".set pop \n\t" 565 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), 566 [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), 567 [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) 568 : [a] "r" (a) 569 : "memory" 570 ); 571 f9 = rdft_w[2]; 572 __asm __volatile ( 573 ".set push \n\t" 574 ".set noreorder \n\t" 575 "addiu %[tmp_a], %[a], 128 \n\t" 576 "addiu %[count], $zero, 4 \n\t" 577 "1: \n\t" 578 "addiu %[count], %[count], -1 \n\t" 579 "lwc1 %[f0], 0(%[tmp_a]) \n\t" 580 "lwc1 %[f2], 32(%[tmp_a]) \n\t" 581 "lwc1 %[f5], 68(%[tmp_a]) \n\t" 582 "lwc1 %[f7], 100(%[tmp_a]) \n\t" 583 "lwc1 %[f1], 4(%[tmp_a]) \n\t" 584 "lwc1 %[f3], 36(%[tmp_a]) \n\t" 585 "lwc1 %[f4], 64(%[tmp_a]) \n\t" 586 "lwc1 %[f6], 96(%[tmp_a]) \n\t" 587 "sub.s %[f8], %[f0], %[f2] \n\t" 588 "add.s %[f0], %[f0], %[f2] \n\t" 589 "sub.s %[f2], %[f5], %[f7] \n\t" 590 "add.s %[f5], %[f5], %[f7] \n\t" 591 "sub.s %[f7], %[f1], %[f3] \n\t" 592 "add.s %[f1], %[f1], %[f3] \n\t" 593 "sub.s %[f3], %[f4], %[f6] \n\t" 594 "add.s %[f4], %[f4], %[f6] \n\t" 595 "sub.s %[f6], %[f8], %[f2] \n\t" 596 "add.s %[f8], %[f8], %[f2] \n\t" 597 "add.s %[f2], %[f5], %[f1] \n\t" 598 "sub.s %[f5], %[f5], %[f1] \n\t" 599 "add.s %[f1], %[f3], %[f7] \n\t" 600 "sub.s %[f3], %[f3], %[f7] \n\t" 601 "add.s %[f7], %[f0], %[f4] \n\t" 602 "sub.s %[f0], %[f0], %[f4] \n\t" 603 "sub.s %[f4], %[f6], %[f1] \n\t" 604 "add.s %[f6], %[f6], %[f1] \n\t" 605 "sub.s %[f1], %[f3], %[f8] \n\t" 606 "add.s %[f3], %[f3], %[f8] \n\t" 607 "mul.s %[f4], %[f4], %[f9] \n\t" 608 "mul.s %[f6], %[f6], %[f9] \n\t" 609 "mul.s %[f1], %[f1], %[f9] \n\t" 610 "mul.s %[f3], %[f3], %[f9] \n\t" 611 "swc1 %[f7], 0(%[tmp_a]) \n\t" 612 "swc1 %[f2], 4(%[tmp_a]) \n\t" 613 "swc1 %[f5], 64(%[tmp_a]) \n\t" 614 "swc1 %[f0], 68(%[tmp_a]) \n\t" 615 "swc1 %[f4], 32(%[tmp_a]) \n\t" 616 "swc1 %[f6], 36(%[tmp_a]) \n\t" 617 "swc1 %[f1], 96(%[tmp_a]) \n\t" 618 "swc1 %[f3], 100(%[tmp_a]) \n\t" 619 "bgtz %[count], 1b \n\t" 620 " addiu %[tmp_a], %[tmp_a], 8 \n\t" 621 ".set pop \n\t" 622 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), 623 [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), 624 [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) 625 : [a] "r" (a), [f9] "f" (f9) 626 : "memory" 627 ); 628 f10 = rdft_w[3]; 629 f11 = rdft_w[4]; 630 f12 = rdft_w[5]; 631 f13 = rdft_wk3ri_first[2]; 632 f14 = rdft_wk3ri_first[3]; 633 634 __asm __volatile ( 635 ".set push \n\t" 636 ".set noreorder \n\t" 637 "addiu %[tmp_a], %[a], 256 \n\t" 638 "addiu %[count], $zero, 4 \n\t" 639 "1: \n\t" 640 "addiu %[count], %[count], -1 \n\t" 641 "lwc1 %[f0], 0(%[tmp_a]) \n\t" 642 "lwc1 %[f2], 32(%[tmp_a]) \n\t" 643 "lwc1 %[f4], 64(%[tmp_a]) \n\t" 644 "lwc1 %[f6], 96(%[tmp_a]) \n\t" 645 "lwc1 %[f1], 4(%[tmp_a]) \n\t" 646 "lwc1 %[f3], 36(%[tmp_a]) \n\t" 647 "lwc1 %[f5], 68(%[tmp_a]) \n\t" 648 "lwc1 %[f7], 100(%[tmp_a]) \n\t" 649 "add.s %[f8], %[f0], %[f2] \n\t" 650 "sub.s %[f0], %[f0], %[f2] \n\t" 651 "add.s %[f2], %[f4], %[f6] \n\t" 652 "sub.s %[f4], %[f4], %[f6] \n\t" 653 "add.s %[f6], %[f1], %[f3] \n\t" 654 "sub.s %[f1], %[f1], %[f3] \n\t" 655 "add.s %[f3], %[f5], %[f7] \n\t" 656 "sub.s %[f5], %[f5], %[f7] \n\t" 657 "sub.s %[f7], %[f8], %[f2] \n\t" 658 "add.s %[f8], %[f8], %[f2] \n\t" 659 "add.s %[f2], %[f1], %[f4] \n\t" 660 "sub.s %[f1], %[f1], %[f4] \n\t" 661 "sub.s %[f4], %[f6], %[f3] \n\t" 662 "add.s %[f6], %[f6], %[f3] \n\t" 663 "sub.s %[f3], %[f0], %[f5] \n\t" 664 "add.s %[f0], %[f0], %[f5] \n\t" 665 "swc1 %[f8], 0(%[tmp_a]) \n\t" 666 "swc1 %[f6], 4(%[tmp_a]) \n\t" 667 "mul.s %[f5], %[f9], %[f7] \n\t" 668 #if defined(MIPS32_R2_LE) 669 "mul.s %[f7], %[f10], %[f7] \n\t" 670 "mul.s %[f8], %[f11], %[f3] \n\t" 671 "mul.s %[f3], %[f12], %[f3] \n\t" 672 "mul.s %[f6], %[f13], %[f0] \n\t" 673 "mul.s %[f0], %[f14], %[f0] \n\t" 674 "nmsub.s %[f5], %[f5], %[f10], %[f4] \n\t" 675 "madd.s %[f7], %[f7], %[f9], %[f4] \n\t" 676 "nmsub.s %[f8], %[f8], %[f12], %[f2] \n\t" 677 "madd.s %[f3], %[f3], %[f11], %[f2] \n\t" 678 "nmsub.s %[f6], %[f6], %[f14], %[f1] \n\t" 679 "madd.s %[f0], %[f0], %[f13], %[f1] \n\t" 680 "swc1 %[f5], 64(%[tmp_a]) \n\t" 681 "swc1 %[f7], 68(%[tmp_a]) \n\t" 682 #else 683 "mul.s %[f8], %[f10], %[f4] \n\t" 684 "mul.s %[f4], %[f9], %[f4] \n\t" 685 "mul.s %[f7], %[f10], %[f7] \n\t" 686 "mul.s %[f6], %[f11], %[f3] \n\t" 687 "mul.s %[f3], %[f12], %[f3] \n\t" 688 "sub.s %[f5], %[f5], %[f8] \n\t" 689 "mul.s %[f8], %[f12], %[f2] \n\t" 690 "mul.s %[f2], %[f11], %[f2] \n\t" 691 "add.s %[f7], %[f4], %[f7] \n\t" 692 "mul.s %[f4], %[f13], %[f0] \n\t" 693 "mul.s %[f0], %[f14], %[f0] \n\t" 694 "sub.s %[f8], %[f6], %[f8] \n\t" 695 "mul.s %[f6], %[f14], %[f1] \n\t" 696 "mul.s %[f1], %[f13], %[f1] \n\t" 697 "add.s %[f3], %[f2], %[f3] \n\t" 698 "swc1 %[f5], 64(%[tmp_a]) \n\t" 699 "swc1 %[f7], 68(%[tmp_a]) \n\t" 700 "sub.s %[f6], %[f4], %[f6] \n\t" 701 "add.s %[f0], %[f1], %[f0] \n\t" 702 #endif 703 "swc1 %[f8], 32(%[tmp_a]) \n\t" 704 "swc1 %[f3], 36(%[tmp_a]) \n\t" 705 "swc1 %[f6], 96(%[tmp_a]) \n\t" 706 "swc1 %[f0], 100(%[tmp_a]) \n\t" 707 "bgtz %[count], 1b \n\t" 708 " addiu %[tmp_a], %[tmp_a], 8 \n\t" 709 ".set pop \n\t" 710 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), 711 [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), 712 [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) 713 : [a] "r" (a), [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11), 714 [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14) 715 : "memory" 716 ); 717 f11 = rdft_w[6]; 718 f12 = rdft_w[7]; 719 f13 = rdft_wk3ri_second[2]; 720 f14 = rdft_wk3ri_second[3]; 721 __asm __volatile ( 722 ".set push \n\t" 723 ".set noreorder \n\t" 724 "addiu %[tmp_a], %[a], 384 \n\t" 725 "addiu %[count], $zero, 4 \n\t" 726 "1: \n\t" 727 "addiu %[count], %[count], -1 \n\t" 728 "lwc1 %[f0], 0(%[tmp_a]) \n\t" 729 "lwc1 %[f1], 4(%[tmp_a]) \n\t" 730 "lwc1 %[f2], 32(%[tmp_a]) \n\t" 731 "lwc1 %[f3], 36(%[tmp_a]) \n\t" 732 "lwc1 %[f4], 64(%[tmp_a]) \n\t" 733 "lwc1 %[f5], 68(%[tmp_a]) \n\t" 734 "lwc1 %[f6], 96(%[tmp_a]) \n\t" 735 "lwc1 %[f7], 100(%[tmp_a]) \n\t" 736 "add.s %[f8], %[f0], %[f2] \n\t" 737 "sub.s %[f0], %[f0], %[f2] \n\t" 738 "add.s %[f2], %[f4], %[f6] \n\t" 739 "sub.s %[f4], %[f4], %[f6] \n\t" 740 "add.s %[f6], %[f1], %[f3] \n\t" 741 "sub.s %[f1], %[f1], %[f3] \n\t" 742 "add.s %[f3], %[f5], %[f7] \n\t" 743 "sub.s %[f5], %[f5], %[f7] \n\t" 744 "sub.s %[f7], %[f2], %[f8] \n\t" 745 "add.s %[f2], %[f2], %[f8] \n\t" 746 "add.s %[f8], %[f1], %[f4] \n\t" 747 "sub.s %[f1], %[f1], %[f4] \n\t" 748 "sub.s %[f4], %[f3], %[f6] \n\t" 749 "add.s %[f3], %[f3], %[f6] \n\t" 750 "sub.s %[f6], %[f0], %[f5] \n\t" 751 "add.s %[f0], %[f0], %[f5] \n\t" 752 "swc1 %[f2], 0(%[tmp_a]) \n\t" 753 "swc1 %[f3], 4(%[tmp_a]) \n\t" 754 "mul.s %[f5], %[f10], %[f7] \n\t" 755 #if defined(MIPS32_R2_LE) 756 "mul.s %[f7], %[f9], %[f7] \n\t" 757 "mul.s %[f2], %[f12], %[f8] \n\t" 758 "mul.s %[f8], %[f11], %[f8] \n\t" 759 "mul.s %[f3], %[f14], %[f1] \n\t" 760 "mul.s %[f1], %[f13], %[f1] \n\t" 761 "madd.s %[f5], %[f5], %[f9], %[f4] \n\t" 762 "msub.s %[f7], %[f7], %[f10], %[f4] \n\t" 763 "msub.s %[f2], %[f2], %[f11], %[f6] \n\t" 764 "madd.s %[f8], %[f8], %[f12], %[f6] \n\t" 765 "msub.s %[f3], %[f3], %[f13], %[f0] \n\t" 766 "madd.s %[f1], %[f1], %[f14], %[f0] \n\t" 767 "swc1 %[f5], 64(%[tmp_a]) \n\t" 768 "swc1 %[f7], 68(%[tmp_a]) \n\t" 769 #else 770 "mul.s %[f2], %[f9], %[f4] \n\t" 771 "mul.s %[f4], %[f10], %[f4] \n\t" 772 "mul.s %[f7], %[f9], %[f7] \n\t" 773 "mul.s %[f3], %[f11], %[f6] \n\t" 774 "mul.s %[f6], %[f12], %[f6] \n\t" 775 "add.s %[f5], %[f5], %[f2] \n\t" 776 "sub.s %[f7], %[f4], %[f7] \n\t" 777 "mul.s %[f2], %[f12], %[f8] \n\t" 778 "mul.s %[f8], %[f11], %[f8] \n\t" 779 "mul.s %[f4], %[f14], %[f1] \n\t" 780 "mul.s %[f1], %[f13], %[f1] \n\t" 781 "sub.s %[f2], %[f3], %[f2] \n\t" 782 "mul.s %[f3], %[f13], %[f0] \n\t" 783 "mul.s %[f0], %[f14], %[f0] \n\t" 784 "add.s %[f8], %[f8], %[f6] \n\t" 785 "swc1 %[f5], 64(%[tmp_a]) \n\t" 786 "swc1 %[f7], 68(%[tmp_a]) \n\t" 787 "sub.s %[f3], %[f3], %[f4] \n\t" 788 "add.s %[f1], %[f1], %[f0] \n\t" 789 #endif 790 "swc1 %[f2], 32(%[tmp_a]) \n\t" 791 "swc1 %[f8], 36(%[tmp_a]) \n\t" 792 "swc1 %[f3], 96(%[tmp_a]) \n\t" 793 "swc1 %[f1], 100(%[tmp_a]) \n\t" 794 "bgtz %[count], 1b \n\t" 795 " addiu %[tmp_a], %[tmp_a], 8 \n\t" 796 ".set pop \n\t" 797 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), 798 [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), 799 [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) 800 : [a] "r" (a), [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11), 801 [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14) 802 : "memory" 803 ); 804 } 805 806 static void cftfsub_128_mips(float* a) { 807 float f0, f1, f2, f3, f4, f5, f6, f7, f8; 808 int tmp_a, count; 809 810 cft1st_128(a); 811 cftmdl_128(a); 812 813 __asm __volatile ( 814 ".set push \n\t" 815 ".set noreorder \n\t" 816 "addiu %[tmp_a], %[a], 0 \n\t" 817 "addiu %[count], $zero, 16 \n\t" 818 "1: \n\t" 819 "addiu %[count], %[count], -1 \n\t" 820 "lwc1 %[f0], 0(%[tmp_a]) \n\t" 821 "lwc1 %[f2], 128(%[tmp_a]) \n\t" 822 "lwc1 %[f4], 256(%[tmp_a]) \n\t" 823 "lwc1 %[f6], 384(%[tmp_a]) \n\t" 824 "lwc1 %[f1], 4(%[tmp_a]) \n\t" 825 "lwc1 %[f3], 132(%[tmp_a]) \n\t" 826 "lwc1 %[f5], 260(%[tmp_a]) \n\t" 827 "lwc1 %[f7], 388(%[tmp_a]) \n\t" 828 "add.s %[f8], %[f0], %[f2] \n\t" 829 "sub.s %[f0], %[f0], %[f2] \n\t" 830 "add.s %[f2], %[f4], %[f6] \n\t" 831 "sub.s %[f4], %[f4], %[f6] \n\t" 832 "add.s %[f6], %[f1], %[f3] \n\t" 833 "sub.s %[f1], %[f1], %[f3] \n\t" 834 "add.s %[f3], %[f5], %[f7] \n\t" 835 "sub.s %[f5], %[f5], %[f7] \n\t" 836 "add.s %[f7], %[f8], %[f2] \n\t" 837 "sub.s %[f8], %[f8], %[f2] \n\t" 838 "add.s %[f2], %[f1], %[f4] \n\t" 839 "sub.s %[f1], %[f1], %[f4] \n\t" 840 "add.s %[f4], %[f6], %[f3] \n\t" 841 "sub.s %[f6], %[f6], %[f3] \n\t" 842 "sub.s %[f3], %[f0], %[f5] \n\t" 843 "add.s %[f0], %[f0], %[f5] \n\t" 844 "swc1 %[f7], 0(%[tmp_a]) \n\t" 845 "swc1 %[f8], 256(%[tmp_a]) \n\t" 846 "swc1 %[f2], 132(%[tmp_a]) \n\t" 847 "swc1 %[f1], 388(%[tmp_a]) \n\t" 848 "swc1 %[f4], 4(%[tmp_a]) \n\t" 849 "swc1 %[f6], 260(%[tmp_a]) \n\t" 850 "swc1 %[f3], 128(%[tmp_a]) \n\t" 851 "swc1 %[f0], 384(%[tmp_a]) \n\t" 852 "bgtz %[count], 1b \n\t" 853 " addiu %[tmp_a], %[tmp_a], 8 \n\t" 854 ".set pop \n\t" 855 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), 856 [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), 857 [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), 858 [count] "=&r" (count) 859 : [a] "r" (a) 860 : "memory" 861 ); 862 } 863 864 static void cftbsub_128_mips(float* a) { 865 float f0, f1, f2, f3, f4, f5, f6, f7, f8; 866 int tmp_a, count; 867 868 cft1st_128(a); 869 cftmdl_128(a); 870 871 __asm __volatile ( 872 ".set push \n\t" 873 ".set noreorder \n\t" 874 "addiu %[tmp_a], %[a], 0 \n\t" 875 "addiu %[count], $zero, 16 \n\t" 876 "1: \n\t" 877 "addiu %[count], %[count], -1 \n\t" 878 "lwc1 %[f0], 0(%[tmp_a]) \n\t" 879 "lwc1 %[f2], 128(%[tmp_a]) \n\t" 880 "lwc1 %[f4], 256(%[tmp_a]) \n\t" 881 "lwc1 %[f6], 384(%[tmp_a]) \n\t" 882 "lwc1 %[f1], 4(%[tmp_a]) \n\t" 883 "lwc1 %[f3], 132(%[tmp_a]) \n\t" 884 "lwc1 %[f5], 260(%[tmp_a]) \n\t" 885 "lwc1 %[f7], 388(%[tmp_a]) \n\t" 886 "add.s %[f8], %[f0], %[f2] \n\t" 887 "sub.s %[f0], %[f0], %[f2] \n\t" 888 "add.s %[f2], %[f4], %[f6] \n\t" 889 "sub.s %[f4], %[f4], %[f6] \n\t" 890 "add.s %[f6], %[f1], %[f3] \n\t" 891 "sub.s %[f1], %[f3], %[f1] \n\t" 892 "add.s %[f3], %[f5], %[f7] \n\t" 893 "sub.s %[f5], %[f5], %[f7] \n\t" 894 "add.s %[f7], %[f8], %[f2] \n\t" 895 "sub.s %[f8], %[f8], %[f2] \n\t" 896 "sub.s %[f2], %[f1], %[f4] \n\t" 897 "add.s %[f1], %[f1], %[f4] \n\t" 898 "add.s %[f4], %[f3], %[f6] \n\t" 899 "sub.s %[f6], %[f3], %[f6] \n\t" 900 "sub.s %[f3], %[f0], %[f5] \n\t" 901 "add.s %[f0], %[f0], %[f5] \n\t" 902 "neg.s %[f4], %[f4] \n\t" 903 "swc1 %[f7], 0(%[tmp_a]) \n\t" 904 "swc1 %[f8], 256(%[tmp_a]) \n\t" 905 "swc1 %[f2], 132(%[tmp_a]) \n\t" 906 "swc1 %[f1], 388(%[tmp_a]) \n\t" 907 "swc1 %[f6], 260(%[tmp_a]) \n\t" 908 "swc1 %[f3], 128(%[tmp_a]) \n\t" 909 "swc1 %[f0], 384(%[tmp_a]) \n\t" 910 "swc1 %[f4], 4(%[tmp_a]) \n\t" 911 "bgtz %[count], 1b \n\t" 912 " addiu %[tmp_a], %[tmp_a], 8 \n\t" 913 ".set pop \n\t" 914 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), 915 [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), 916 [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) 917 : [a] "r" (a) 918 : "memory" 919 ); 920 } 921 922 static void rftfsub_128_mips(float* a) { 923 const float* c = rdft_w + 32; 924 const float f0 = 0.5f; 925 float* a1 = &a[2]; 926 float* a2 = &a[126]; 927 const float* c1 = &c[1]; 928 const float* c2 = &c[31]; 929 float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15; 930 int count; 931 932 __asm __volatile ( 933 ".set push \n\t" 934 ".set noreorder \n\t" 935 "lwc1 %[f6], 0(%[c2]) \n\t" 936 "lwc1 %[f1], 0(%[a1]) \n\t" 937 "lwc1 %[f2], 0(%[a2]) \n\t" 938 "lwc1 %[f3], 4(%[a1]) \n\t" 939 "lwc1 %[f4], 4(%[a2]) \n\t" 940 "lwc1 %[f5], 0(%[c1]) \n\t" 941 "sub.s %[f6], %[f0], %[f6] \n\t" 942 "sub.s %[f7], %[f1], %[f2] \n\t" 943 "add.s %[f8], %[f3], %[f4] \n\t" 944 "addiu %[count], $zero, 15 \n\t" 945 "mul.s %[f9], %[f6], %[f7] \n\t" 946 "mul.s %[f6], %[f6], %[f8] \n\t" 947 #if !defined(MIPS32_R2_LE) 948 "mul.s %[f8], %[f5], %[f8] \n\t" 949 "mul.s %[f5], %[f5], %[f7] \n\t" 950 "sub.s %[f9], %[f9], %[f8] \n\t" 951 "add.s %[f6], %[f6], %[f5] \n\t" 952 #else 953 "nmsub.s %[f9], %[f9], %[f5], %[f8] \n\t" 954 "madd.s %[f6], %[f6], %[f5], %[f7] \n\t" 955 #endif 956 "sub.s %[f1], %[f1], %[f9] \n\t" 957 "add.s %[f2], %[f2], %[f9] \n\t" 958 "sub.s %[f3], %[f3], %[f6] \n\t" 959 "sub.s %[f4], %[f4], %[f6] \n\t" 960 "swc1 %[f1], 0(%[a1]) \n\t" 961 "swc1 %[f2], 0(%[a2]) \n\t" 962 "swc1 %[f3], 4(%[a1]) \n\t" 963 "swc1 %[f4], 4(%[a2]) \n\t" 964 "addiu %[a1], %[a1], 8 \n\t" 965 "addiu %[a2], %[a2], -8 \n\t" 966 "addiu %[c1], %[c1], 4 \n\t" 967 "addiu %[c2], %[c2], -4 \n\t" 968 "1: \n\t" 969 "lwc1 %[f6], 0(%[c2]) \n\t" 970 "lwc1 %[f1], 0(%[a1]) \n\t" 971 "lwc1 %[f2], 0(%[a2]) \n\t" 972 "lwc1 %[f3], 4(%[a1]) \n\t" 973 "lwc1 %[f4], 4(%[a2]) \n\t" 974 "lwc1 %[f5], 0(%[c1]) \n\t" 975 "sub.s %[f6], %[f0], %[f6] \n\t" 976 "sub.s %[f7], %[f1], %[f2] \n\t" 977 "add.s %[f8], %[f3], %[f4] \n\t" 978 "lwc1 %[f10], -4(%[c2]) \n\t" 979 "lwc1 %[f11], 8(%[a1]) \n\t" 980 "lwc1 %[f12], -8(%[a2]) \n\t" 981 "mul.s %[f9], %[f6], %[f7] \n\t" 982 "mul.s %[f6], %[f6], %[f8] \n\t" 983 #if !defined(MIPS32_R2_LE) 984 "mul.s %[f8], %[f5], %[f8] \n\t" 985 "mul.s %[f5], %[f5], %[f7] \n\t" 986 "lwc1 %[f13], 12(%[a1]) \n\t" 987 "lwc1 %[f14], -4(%[a2]) \n\t" 988 "lwc1 %[f15], 4(%[c1]) \n\t" 989 "sub.s %[f9], %[f9], %[f8] \n\t" 990 "add.s %[f6], %[f6], %[f5] \n\t" 991 #else 992 "lwc1 %[f13], 12(%[a1]) \n\t" 993 "lwc1 %[f14], -4(%[a2]) \n\t" 994 "lwc1 %[f15], 4(%[c1]) \n\t" 995 "nmsub.s %[f9], %[f9], %[f5], %[f8] \n\t" 996 "madd.s %[f6], %[f6], %[f5], %[f7] \n\t" 997 #endif 998 "sub.s %[f10], %[f0], %[f10] \n\t" 999 "sub.s %[f5], %[f11], %[f12] \n\t" 1000 "add.s %[f7], %[f13], %[f14] \n\t" 1001 "sub.s %[f1], %[f1], %[f9] \n\t" 1002 "add.s %[f2], %[f2], %[f9] \n\t" 1003 "sub.s %[f3], %[f3], %[f6] \n\t" 1004 "mul.s %[f8], %[f10], %[f5] \n\t" 1005 "mul.s %[f10], %[f10], %[f7] \n\t" 1006 #if !defined(MIPS32_R2_LE) 1007 "mul.s %[f9], %[f15], %[f7] \n\t" 1008 "mul.s %[f15], %[f15], %[f5] \n\t" 1009 "sub.s %[f4], %[f4], %[f6] \n\t" 1010 "swc1 %[f1], 0(%[a1]) \n\t" 1011 "swc1 %[f2], 0(%[a2]) \n\t" 1012 "sub.s %[f8], %[f8], %[f9] \n\t" 1013 "add.s %[f10], %[f10], %[f15] \n\t" 1014 #else 1015 "swc1 %[f1], 0(%[a1]) \n\t" 1016 "swc1 %[f2], 0(%[a2]) \n\t" 1017 "sub.s %[f4], %[f4], %[f6] \n\t" 1018 "nmsub.s %[f8], %[f8], %[f15], %[f7] \n\t" 1019 "madd.s %[f10], %[f10], %[f15], %[f5] \n\t" 1020 #endif 1021 "swc1 %[f3], 4(%[a1]) \n\t" 1022 "swc1 %[f4], 4(%[a2]) \n\t" 1023 "sub.s %[f11], %[f11], %[f8] \n\t" 1024 "add.s %[f12], %[f12], %[f8] \n\t" 1025 "sub.s %[f13], %[f13], %[f10] \n\t" 1026 "sub.s %[f14], %[f14], %[f10] \n\t" 1027 "addiu %[c2], %[c2], -8 \n\t" 1028 "addiu %[c1], %[c1], 8 \n\t" 1029 "swc1 %[f11], 8(%[a1]) \n\t" 1030 "swc1 %[f12], -8(%[a2]) \n\t" 1031 "swc1 %[f13], 12(%[a1]) \n\t" 1032 "swc1 %[f14], -4(%[a2]) \n\t" 1033 "addiu %[a1], %[a1], 16 \n\t" 1034 "addiu %[count], %[count], -1 \n\t" 1035 "bgtz %[count], 1b \n\t" 1036 " addiu %[a2], %[a2], -16 \n\t" 1037 ".set pop \n\t" 1038 : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2), 1039 [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4), 1040 [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), 1041 [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12), 1042 [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15), 1043 [count] "=&r" (count) 1044 : [f0] "f" (f0) 1045 : "memory" 1046 ); 1047 } 1048 1049 static void rftbsub_128_mips(float* a) { 1050 const float *c = rdft_w + 32; 1051 const float f0 = 0.5f; 1052 float* a1 = &a[2]; 1053 float* a2 = &a[126]; 1054 const float* c1 = &c[1]; 1055 const float* c2 = &c[31]; 1056 float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15; 1057 int count; 1058 1059 a[1] = -a[1]; 1060 a[65] = -a[65]; 1061 1062 __asm __volatile ( 1063 ".set push \n\t" 1064 ".set noreorder \n\t" 1065 "lwc1 %[f6], 0(%[c2]) \n\t" 1066 "lwc1 %[f1], 0(%[a1]) \n\t" 1067 "lwc1 %[f2], 0(%[a2]) \n\t" 1068 "lwc1 %[f3], 4(%[a1]) \n\t" 1069 "lwc1 %[f4], 4(%[a2]) \n\t" 1070 "lwc1 %[f5], 0(%[c1]) \n\t" 1071 "sub.s %[f6], %[f0], %[f6] \n\t" 1072 "sub.s %[f7], %[f1], %[f2] \n\t" 1073 "add.s %[f8], %[f3], %[f4] \n\t" 1074 "addiu %[count], $zero, 15 \n\t" 1075 "mul.s %[f9], %[f6], %[f7] \n\t" 1076 "mul.s %[f6], %[f6], %[f8] \n\t" 1077 #if !defined(MIPS32_R2_LE) 1078 "mul.s %[f8], %[f5], %[f8] \n\t" 1079 "mul.s %[f5], %[f5], %[f7] \n\t" 1080 "add.s %[f9], %[f9], %[f8] \n\t" 1081 "sub.s %[f6], %[f6], %[f5] \n\t" 1082 #else 1083 "madd.s %[f9], %[f9], %[f5], %[f8] \n\t" 1084 "nmsub.s %[f6], %[f6], %[f5], %[f7] \n\t" 1085 #endif 1086 "sub.s %[f1], %[f1], %[f9] \n\t" 1087 "add.s %[f2], %[f2], %[f9] \n\t" 1088 "sub.s %[f3], %[f6], %[f3] \n\t" 1089 "sub.s %[f4], %[f6], %[f4] \n\t" 1090 "swc1 %[f1], 0(%[a1]) \n\t" 1091 "swc1 %[f2], 0(%[a2]) \n\t" 1092 "swc1 %[f3], 4(%[a1]) \n\t" 1093 "swc1 %[f4], 4(%[a2]) \n\t" 1094 "addiu %[a1], %[a1], 8 \n\t" 1095 "addiu %[a2], %[a2], -8 \n\t" 1096 "addiu %[c1], %[c1], 4 \n\t" 1097 "addiu %[c2], %[c2], -4 \n\t" 1098 "1: \n\t" 1099 "lwc1 %[f6], 0(%[c2]) \n\t" 1100 "lwc1 %[f1], 0(%[a1]) \n\t" 1101 "lwc1 %[f2], 0(%[a2]) \n\t" 1102 "lwc1 %[f3], 4(%[a1]) \n\t" 1103 "lwc1 %[f4], 4(%[a2]) \n\t" 1104 "lwc1 %[f5], 0(%[c1]) \n\t" 1105 "sub.s %[f6], %[f0], %[f6] \n\t" 1106 "sub.s %[f7], %[f1], %[f2] \n\t" 1107 "add.s %[f8], %[f3], %[f4] \n\t" 1108 "lwc1 %[f10], -4(%[c2]) \n\t" 1109 "lwc1 %[f11], 8(%[a1]) \n\t" 1110 "lwc1 %[f12], -8(%[a2]) \n\t" 1111 "mul.s %[f9], %[f6], %[f7] \n\t" 1112 "mul.s %[f6], %[f6], %[f8] \n\t" 1113 #if !defined(MIPS32_R2_LE) 1114 "mul.s %[f8], %[f5], %[f8] \n\t" 1115 "mul.s %[f5], %[f5], %[f7] \n\t" 1116 "lwc1 %[f13], 12(%[a1]) \n\t" 1117 "lwc1 %[f14], -4(%[a2]) \n\t" 1118 "lwc1 %[f15], 4(%[c1]) \n\t" 1119 "add.s %[f9], %[f9], %[f8] \n\t" 1120 "sub.s %[f6], %[f6], %[f5] \n\t" 1121 #else 1122 "lwc1 %[f13], 12(%[a1]) \n\t" 1123 "lwc1 %[f14], -4(%[a2]) \n\t" 1124 "lwc1 %[f15], 4(%[c1]) \n\t" 1125 "madd.s %[f9], %[f9], %[f5], %[f8] \n\t" 1126 "nmsub.s %[f6], %[f6], %[f5], %[f7] \n\t" 1127 #endif 1128 "sub.s %[f10], %[f0], %[f10] \n\t" 1129 "sub.s %[f5], %[f11], %[f12] \n\t" 1130 "add.s %[f7], %[f13], %[f14] \n\t" 1131 "sub.s %[f1], %[f1], %[f9] \n\t" 1132 "add.s %[f2], %[f2], %[f9] \n\t" 1133 "sub.s %[f3], %[f6], %[f3] \n\t" 1134 "mul.s %[f8], %[f10], %[f5] \n\t" 1135 "mul.s %[f10], %[f10], %[f7] \n\t" 1136 #if !defined(MIPS32_R2_LE) 1137 "mul.s %[f9], %[f15], %[f7] \n\t" 1138 "mul.s %[f15], %[f15], %[f5] \n\t" 1139 "sub.s %[f4], %[f6], %[f4] \n\t" 1140 "swc1 %[f1], 0(%[a1]) \n\t" 1141 "swc1 %[f2], 0(%[a2]) \n\t" 1142 "add.s %[f8], %[f8], %[f9] \n\t" 1143 "sub.s %[f10], %[f10], %[f15] \n\t" 1144 #else 1145 "swc1 %[f1], 0(%[a1]) \n\t" 1146 "swc1 %[f2], 0(%[a2]) \n\t" 1147 "sub.s %[f4], %[f6], %[f4] \n\t" 1148 "madd.s %[f8], %[f8], %[f15], %[f7] \n\t" 1149 "nmsub.s %[f10], %[f10], %[f15], %[f5] \n\t" 1150 #endif 1151 "swc1 %[f3], 4(%[a1]) \n\t" 1152 "swc1 %[f4], 4(%[a2]) \n\t" 1153 "sub.s %[f11], %[f11], %[f8] \n\t" 1154 "add.s %[f12], %[f12], %[f8] \n\t" 1155 "sub.s %[f13], %[f10], %[f13] \n\t" 1156 "sub.s %[f14], %[f10], %[f14] \n\t" 1157 "addiu %[c2], %[c2], -8 \n\t" 1158 "addiu %[c1], %[c1], 8 \n\t" 1159 "swc1 %[f11], 8(%[a1]) \n\t" 1160 "swc1 %[f12], -8(%[a2]) \n\t" 1161 "swc1 %[f13], 12(%[a1]) \n\t" 1162 "swc1 %[f14], -4(%[a2]) \n\t" 1163 "addiu %[a1], %[a1], 16 \n\t" 1164 "addiu %[count], %[count], -1 \n\t" 1165 "bgtz %[count], 1b \n\t" 1166 " addiu %[a2], %[a2], -16 \n\t" 1167 ".set pop \n\t" 1168 : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2), 1169 [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4), 1170 [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), 1171 [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12), 1172 [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15), 1173 [count] "=&r" (count) 1174 : [f0] "f" (f0) 1175 : "memory" 1176 ); 1177 } 1178 1179 void aec_rdft_init_mips(void) { 1180 cft1st_128 = cft1st_128_mips; 1181 cftmdl_128 = cftmdl_128_mips; 1182 rftfsub_128 = rftfsub_128_mips; 1183 rftbsub_128 = rftbsub_128_mips; 1184 cftfsub_128 = cftfsub_128_mips; 1185 cftbsub_128 = cftbsub_128_mips; 1186 bitrv2_128 = bitrv2_128_mips; 1187 } 1188