1 /* ------------------------------------------------------------------ 2 * Copyright (C) 1998-2009 PacketVideo 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 * express or implied. 14 * See the License for the specific language governing permissions 15 * and limitations under the License. 16 * ------------------------------------------------------------------- 17 */ 18 /*********************************************************************************/ 19 /* Filename: fastquant_inline.h */ 20 /* Description: Implementation for in-line functions used in dct.cpp */ 21 /* Modified: */ 22 /*********************************************************************************/ 23 #ifndef _FASTQUANT_INLINE_H_ 24 #define _FASTQUANT_INLINE_H_ 25 26 #include "mp4def.h" 27 #include "oscl_base_macros.h" 28 29 #if !defined(PV_ARM_GCC_V5) && !defined(PV_ARM_GCC_V4) /* ARM GNU COMPILER */ 30 31 __inline int32 aan_scale(int32 q_value, int32 coeff, int32 round, int32 QPdiv2) 32 { 33 q_value = coeff * q_value + round; 34 coeff = q_value >> 16; 35 if (coeff < 0) coeff += QPdiv2; 36 else coeff -= QPdiv2; 37 38 return coeff; 39 } 40 41 42 __inline int32 coeff_quant(int32 coeff, int32 q_scale, int32 shift) 43 { 44 int32 q_value; 45 46 q_value = coeff * q_scale; //q_value = -((-(coeff + QPdiv2)*q_scale)>>LSL); 47 q_value >>= shift; //q_value = (((coeff - QPdiv2)*q_scale)>>LSL ); 48 q_value += ((UInt)q_value >> 31); /* add one if negative */ 49 50 return q_value; 51 } 52 53 __inline int32 coeff_clip(int32 q_value, int32 ac_clip) 54 { 55 int32 coeff = q_value + ac_clip; 56 57 if ((UInt)coeff > (UInt)(ac_clip << 1)) 58 q_value = ac_clip ^(q_value >> 31); 59 60 return q_value; 61 } 62 63 __inline int32 coeff_dequant(int32 q_value, int32 QPx2, int32 Addition, int32 tmp) 64 { 65 int32 coeff; 66 67 OSCL_UNUSED_ARG(tmp); 68 69 if (q_value < 0) 70 { 71 coeff = q_value * QPx2 - Addition; 72 if (coeff < -2048) 73 coeff = -2048; 74 } 75 else 76 { 77 coeff = q_value * QPx2 + Addition; 78 if (coeff > 2047) 79 coeff = 2047; 80 } 81 return coeff; 82 } 83 84 __inline int32 smlabb(int32 q_value, int32 coeff, int32 round) 85 { 86 q_value = coeff * q_value + round; 87 88 return q_value; 89 } 90 91 __inline int32 smulbb(int32 q_scale, int32 coeff) 92 { 93 int32 q_value; 94 95 q_value = coeff * q_scale; 96 97 return q_value; 98 } 99 100 __inline int32 aan_dc_scale(int32 coeff, int32 QP) 101 { 102 103 if (coeff < 0) coeff += (QP >> 1); 104 else coeff -= (QP >> 1); 105 106 return coeff; 107 } 108 109 __inline int32 clip_2047(int32 q_value, int32 tmp) 110 { 111 OSCL_UNUSED_ARG(tmp); 112 113 if (q_value < -2048) 114 { 115 q_value = -2048; 116 } 117 else if (q_value > 2047) 118 { 119 q_value = 2047; 120 } 121 122 return q_value; 123 } 124 125 __inline int32 coeff_dequant_mpeg(int32 q_value, int32 stepsize, int32 QP, int32 tmp) 126 { 127 int32 coeff; 128 129 OSCL_UNUSED_ARG(tmp); 130 131 coeff = q_value << 1; 132 stepsize *= QP; 133 if (coeff > 0) 134 { 135 q_value = (coeff + 1) * stepsize; 136 q_value >>= 4; 137 if (q_value > 2047) q_value = 2047; 138 } 139 else 140 { 141 q_value = (coeff - 1) * stepsize; 142 q_value += 15; 143 q_value >>= 4; 144 if (q_value < -2048) q_value = -2048; 145 } 146 147 return q_value; 148 } 149 150 __inline int32 coeff_dequant_mpeg_intra(int32 q_value, int32 tmp) 151 { 152 OSCL_UNUSED_ARG(tmp); 153 154 q_value <<= 1; 155 if (q_value > 0) 156 { 157 q_value >>= 4; 158 if (q_value > 2047) q_value = 2047; 159 } 160 else 161 { 162 q_value += 15; 163 q_value >>= 4; 164 if (q_value < -2048) q_value = -2048; 165 } 166 167 return q_value; 168 } 169 170 #elif defined(__CC_ARM) /* only work with arm v5 */ 171 172 #if defined(__TARGET_ARCH_5TE) 173 174 __inline int32 aan_scale(int32 q_value, int32 coeff, 175 int32 round, int32 QPdiv2) 176 { 177 __asm 178 { 179 smlabb q_value, coeff, q_value, round 180 movs coeff, q_value, asr #16 181 addle coeff, coeff, QPdiv2 182 subgt coeff, coeff, QPdiv2 183 } 184 185 return coeff; 186 } 187 188 __inline int32 coeff_quant(int32 coeff, int32 q_scale, int32 shift) 189 { 190 int32 q_value; 191 192 __asm 193 { 194 smulbb q_value, q_scale, coeff /*mov coeff, coeff, lsl #14*/ 195 mov coeff, q_value, asr shift /*smull tmp, coeff, q_scale, coeff*/ 196 add q_value, coeff, coeff, lsr #31 197 } 198 199 200 return q_value; 201 } 202 203 __inline int32 coeff_dequant(int32 q_value, int32 QPx2, int32 Addition, int32 tmp) 204 { 205 int32 coeff; 206 207 __asm 208 { 209 cmp q_value, #0 210 smulbb coeff, q_value, QPx2 211 sublt coeff, coeff, Addition 212 addge coeff, coeff, Addition 213 add q_value, coeff, tmp 214 subs q_value, q_value, #3840 215 subcss q_value, q_value, #254 216 eorhi coeff, tmp, coeff, asr #31 217 } 218 219 return coeff; 220 } 221 222 __inline int32 smlabb(int32 q_value, int32 coeff, int32 round) 223 { 224 __asm 225 { 226 smlabb q_value, coeff, q_value, round 227 } 228 229 return q_value; 230 } 231 232 __inline int32 smulbb(int32 q_scale, int32 coeff) 233 { 234 int32 q_value; 235 236 __asm 237 { 238 smulbb q_value, q_scale, coeff 239 } 240 241 return q_value; 242 } 243 244 __inline int32 coeff_dequant_mpeg(int32 q_value, int32 stepsize, int32 QP, int32 tmp) 245 { 246 /* tmp must have value of 2047 */ 247 int32 coeff; 248 __asm 249 { 250 movs coeff, q_value, lsl #1 251 smulbb stepsize, stepsize, QP 252 addgt coeff, coeff, #1 253 sublt coeff, coeff, #1 254 smulbb q_value, coeff, stepsize 255 addlt q_value, q_value, #15 256 mov q_value, q_value, asr #4 257 add coeff, q_value, tmp 258 subs coeff, coeff, #0xf00 259 subcss coeff, coeff, #0xfe 260 eorhi q_value, tmp, q_value, asr #31 261 } 262 263 return q_value; 264 } 265 266 267 #else // not ARMV5TE 268 269 __inline int32 aan_scale(int32 q_value, int32 coeff, 270 int32 round, int32 QPdiv2) 271 { 272 __asm 273 { 274 mla q_value, coeff, q_value, round 275 movs coeff, q_value, asr #16 276 addle coeff, coeff, QPdiv2 277 subgt coeff, coeff, QPdiv2 278 } 279 280 return coeff; 281 } 282 283 __inline int32 coeff_quant(int32 coeff, int32 q_scale, int32 shift) 284 { 285 int32 q_value; 286 287 __asm 288 { 289 mul q_value, q_scale, coeff /*mov coeff, coeff, lsl #14*/ 290 mov coeff, q_value, asr shift /*smull tmp, coeff, q_scale, coeff*/ 291 add q_value, coeff, coeff, lsr #31 292 } 293 294 295 return q_value; 296 } 297 298 299 __inline int32 coeff_dequant(int32 q_value, int32 QPx2, int32 Addition, int32 tmp) 300 { 301 int32 coeff; 302 303 __asm 304 { 305 cmp q_value, #0 306 mul coeff, q_value, QPx2 307 sublt coeff, coeff, Addition 308 addge coeff, coeff, Addition 309 add q_value, coeff, tmp 310 subs q_value, q_value, #3840 311 subcss q_value, q_value, #254 312 eorhi coeff, tmp, coeff, asr #31 313 } 314 315 return coeff; 316 } 317 318 __inline int32 smlabb(int32 q_value, int32 coeff, int32 round) 319 { 320 __asm 321 { 322 mla q_value, coeff, q_value, round 323 } 324 325 return q_value; 326 } 327 328 __inline int32 smulbb(int32 q_scale, int32 coeff) 329 { 330 int32 q_value; 331 332 __asm 333 { 334 mul q_value, q_scale, coeff 335 } 336 337 return q_value; 338 } 339 340 341 __inline int32 coeff_dequant_mpeg(int32 q_value, int32 stepsize, int32 QP, int32 tmp) 342 { 343 /* tmp must have value of 2047 */ 344 int32 coeff; 345 __asm 346 { 347 movs coeff, q_value, lsl #1 348 mul stepsize, stepsize, QP 349 addgt coeff, coeff, #1 350 sublt coeff, coeff, #1 351 mul q_value, coeff, stepsize 352 addlt q_value, q_value, #15 353 mov q_value, q_value, asr #4 354 add coeff, q_value, tmp 355 subs coeff, coeff, #0xf00 356 subcss coeff, coeff, #0xfe 357 eorhi q_value, tmp, q_value, asr #31 358 } 359 360 return q_value; 361 } 362 363 364 #endif 365 366 __inline int32 coeff_clip(int32 q_value, int32 ac_clip) 367 { 368 int32 coeff; 369 370 __asm 371 { 372 add coeff, q_value, ac_clip 373 subs coeff, coeff, ac_clip, lsl #1 374 eorhi q_value, ac_clip, q_value, asr #31 375 } 376 377 return q_value; 378 } 379 380 __inline int32 aan_dc_scale(int32 coeff, int32 QP) 381 { 382 383 __asm 384 { 385 cmp coeff, #0 386 addle coeff, coeff, QP, asr #1 387 subgt coeff, coeff, QP, asr #1 388 } 389 390 return coeff; 391 } 392 393 __inline int32 clip_2047(int32 q_value, int32 tmp) 394 { 395 /* tmp must have value of 2047 */ 396 int32 coeff; 397 398 __asm 399 { 400 add coeff, q_value, tmp 401 subs coeff, coeff, #0xf00 402 subcss coeff, coeff, #0xfe 403 eorhi q_value, tmp, q_value, asr #31 404 } 405 406 return q_value; 407 } 408 409 __inline int32 coeff_dequant_mpeg_intra(int32 q_value, int32 tmp) 410 { 411 int32 coeff; 412 413 __asm 414 { 415 movs q_value, q_value, lsl #1 416 addlt q_value, q_value, #15 417 mov q_value, q_value, asr #4 418 add coeff, q_value, tmp 419 subs coeff, coeff, #0xf00 420 subcss coeff, coeff, #0xfe 421 eorhi q_value, tmp, q_value, asr #31 422 } 423 424 return q_value; 425 } 426 427 #elif ( defined(PV_ARM_GCC_V4) || defined(PV_ARM_GCC_V5) ) /* ARM GNU COMPILER */ 428 429 __inline int32 aan_scale(int32 q_value, int32 coeff, 430 int32 round, int32 QPdiv2) 431 { 432 register int32 out; 433 register int32 qv = q_value; 434 register int32 cf = coeff; 435 register int32 rr = round; 436 register int32 qp = QPdiv2; 437 438 asm volatile("smlabb %0, %2, %1, %3\n\t" 439 "movs %0, %0, asr #16\n\t" 440 "addle %0, %0, %4\n\t" 441 "subgt %0, %0, %4" 442 : "=&r"(out) 443 : "r"(qv), 444 "r"(cf), 445 "r"(rr), 446 "r"(qp)); 447 return out; 448 } 449 450 __inline int32 coeff_quant(int32 coeff, int32 q_scale, int32 shift) 451 { 452 register int32 out; 453 register int32 temp1; 454 register int32 cc = coeff; 455 register int32 qs = q_scale; 456 register int32 ss = shift; 457 458 asm volatile("smulbb %0, %3, %2\n\t" 459 "mov %1, %0, asr %4\n\t" 460 "add %0, %1, %1, lsr #31" 461 : "=&r"(out), 462 "=&r"(temp1) 463 : "r"(cc), 464 "r"(qs), 465 "r"(ss)); 466 467 return out; 468 } 469 470 __inline int32 coeff_clip(int32 q_value, int32 ac_clip) 471 { 472 register int32 coeff; 473 474 asm volatile("add %1, %0, %2\n\t" 475 "subs %1, %1, %2, lsl #1\n\t" 476 "eorhi %0, %2, %0, asr #31" 477 : "+r"(q_value), 478 "=&r"(coeff) 479 : "r"(ac_clip)); 480 481 return q_value; 482 } 483 484 __inline int32 coeff_dequant(int32 q_value, int32 QPx2, int32 Addition, int32 tmp) 485 { 486 register int32 out; 487 register int32 temp1; 488 register int32 qv = q_value; 489 register int32 qp = QPx2; 490 register int32 aa = Addition; 491 register int32 tt = tmp; 492 493 asm volatile("cmp %2, #0\n\t" 494 "mul %0, %2, %3\n\t" 495 "sublt %0, %0, %4\n\t" 496 "addge %0, %0, %4\n\t" 497 "add %1, %0, %5\n\t" 498 "subs %1, %1, #3840\n\t" 499 "subcss %1, %1, #254\n\t" 500 "eorhi %0, %5, %0, asr #31" 501 : "=&r"(out), 502 "=&r"(temp1) 503 : "r"(qv), 504 "r"(qp), 505 "r"(aa), 506 "r"(tt)); 507 508 return out; 509 } 510 511 __inline int32 smlabb(int32 q_value, int32 coeff, int32 round) 512 { 513 register int32 out; 514 register int32 aa = (int32)q_value; 515 register int32 bb = (int32)coeff; 516 register int32 cc = (int32)round; 517 518 asm volatile("smlabb %0, %1, %2, %3" 519 : "=&r"(out) 520 : "r"(aa), 521 "r"(bb), 522 "r"(cc)); 523 return out; 524 } 525 526 __inline int32 smulbb(int32 q_scale, int32 coeff) 527 { 528 register int32 out; 529 register int32 aa = (int32)q_scale; 530 register int32 bb = (int32)coeff; 531 532 asm volatile("smulbb %0, %1, %2" 533 : "=&r"(out) 534 : "r"(aa), 535 "r"(bb)); 536 return out; 537 } 538 539 __inline int32 aan_dc_scale(int32 coeff, int32 QP) 540 { 541 register int32 out; 542 register int32 cc = coeff; 543 register int32 qp = QP; 544 545 asm volatile("cmp %1, #0\n\t" 546 "addle %0, %1, %2, asr #1\n\t" 547 "subgt %0, %1, %2, asr #1" 548 : "=&r"(out) 549 : "r"(cc), 550 "r"(qp)); 551 return out; 552 } 553 554 __inline int32 clip_2047(int32 q_value, int32 tmp) 555 { 556 register int32 coeff; 557 asm volatile("add %1, %0, %2\n\t" 558 "subs %1, %1, #0xF00\n\t" 559 "subcss %1, %1, #0xFE\n\t" 560 "eorhi %0, %2, %0, asr #31" 561 : "+r"(q_value), 562 "=&r"(coeff) 563 : "r"(tmp)); 564 565 return q_value; 566 } 567 568 __inline int32 coeff_dequant_mpeg(int32 q_value, int32 stepsize, int32 QP, int32 tmp) 569 { 570 register int32 out; 571 register int32 temp1; 572 register int32 qv = q_value; 573 register int32 ss = stepsize; 574 register int32 qp = QP; 575 register int32 tt = tmp; 576 577 asm volatile("movs %1, %2, lsl #1\n\t" 578 "mul %0, %3, %4\n\t" 579 "addgt %1, %1, #1\n\t" 580 "sublt %1, %1, #1\n\t" 581 "mul %0, %1, %0\n\t" 582 "addlt %0, %0, #15\n\t" 583 "mov %0, %0, asr #4\n\t" 584 "add %1, %0, %5\n\t" 585 "subs %1, %1, #0xF00\n\t" 586 "subcss %1, %1, #0xFE\n\t" 587 "eorhi %0, %5, %0, asr #31" 588 : "=&r"(out), 589 "=&r"(temp1) 590 : "r"(qv), 591 "r"(ss), 592 "r"(qp), 593 "r"(tt)); 594 595 return out; 596 597 } 598 599 __inline int32 coeff_dequant_mpeg_intra(int32 q_value, int32 tmp) 600 { 601 register int32 out; 602 register int32 temp1; 603 register int32 qv = q_value; 604 register int32 tt = tmp; 605 606 asm volatile("movs %1, %2, lsl #1\n\t" 607 "addlt %1, %1, #15\n\t" 608 "mov %0, %1, asr #4\n\t" 609 "add %1, %0, %3\n\t" 610 "subs %1, %1, #0xF00\n\t" 611 "subcss %1, %1, #0xFE\n\t" 612 "eorhi %0, %3, %0, asr #31" 613 : "=&r"(out), 614 "=&r"(temp1) 615 : "r"(qv), 616 "r"(tt)); 617 return out; 618 } 619 620 621 #endif // Platform 622 623 624 #endif //_FASTQUANT_INLINE_H_ 625 626 627