1 /* ------------------------------------------------------------------ 2 * Copyright (C) 1998-2009 PacketVideo 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 * express or implied. 14 * See the License for the specific language governing permissions 15 * and limitations under the License. 16 * ------------------------------------------------------------------- 17 */ 18 #include "mp4enc_lib.h" 19 #include "mp4lib_int.h" 20 #include "dct_inline.h" 21 22 #define FDCT_SHIFT 10 23 24 #ifdef __cplusplus 25 extern "C" 26 { 27 #endif 28 29 /**************************************************************************/ 30 /* Function: BlockDCT_AANwSub 31 Date: 7/31/01 32 Input: 33 Output: out[64] ==> next block 34 Purpose: Do subtraction for zero MV first 35 Modified: 36 **************************************************************************/ 37 38 Void BlockDCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width) 39 { 40 Short *dst; 41 Int k0, k1, k2, k3, k4, k5, k6, k7; 42 Int round; 43 Int k12 = 0x022A02D4; 44 Int k14 = 0x0188053A; 45 Int abs_sum; 46 Int mask; 47 Int tmp, tmp2; 48 Int ColTh; 49 50 dst = out + 64 ; 51 ColTh = *dst; 52 out += 128; 53 round = 1 << (FDCT_SHIFT - 1); 54 55 do /* fdct_nextrow */ 56 { 57 /* assuming the block is word-aligned */ 58 mask = 0x1FE; 59 tmp = *((Int*) cur); /* contains 4 pixels */ 60 tmp2 = *((Int*) pred); /* prediction 4 pixels */ 61 k0 = tmp2 & 0xFF; 62 k1 = mask & (tmp << 1); 63 k0 = k1 - (k0 << 1); 64 k1 = (tmp2 >> 8) & 0xFF; 65 k2 = mask & (tmp >> 7); 66 k1 = k2 - (k1 << 1); 67 k2 = (tmp2 >> 16) & 0xFF; 68 k3 = mask & (tmp >> 15); 69 k2 = k3 - (k2 << 1); 70 k3 = (tmp2 >> 24) & 0xFF; 71 k4 = mask & (tmp >> 23); 72 k3 = k4 - (k3 << 1); 73 tmp = *((Int*)(cur + 4)); /* another 4 pixels */ 74 tmp2 = *((Int*)(pred + 4)); 75 k4 = tmp2 & 0xFF; 76 k5 = mask & (tmp << 1); 77 k4 = k5 - (k4 << 1); 78 k5 = (tmp2 >> 8) & 0xFF; 79 k6 = mask & (tmp >> 7); 80 k5 = k6 - (k5 << 1); 81 k6 = (tmp2 >> 16) & 0xFF; 82 k7 = mask & (tmp >> 15); 83 k6 = k7 - (k6 << 1); 84 k7 = (tmp2 >> 24) & 0xFF; 85 tmp = mask & (tmp >> 23); 86 k7 = tmp - (k7 << 1); 87 cur += width; 88 pred += 16; 89 90 /* fdct_1 */ 91 k0 = k0 + k7; 92 k7 = k0 - (k7 << 1); 93 k1 = k1 + k6; 94 k6 = k1 - (k6 << 1); 95 k2 = k2 + k5; 96 k5 = k2 - (k5 << 1); 97 k3 = k3 + k4; 98 k4 = k3 - (k4 << 1); 99 100 k0 = k0 + k3; 101 k3 = k0 - (k3 << 1); 102 k1 = k1 + k2; 103 k2 = k1 - (k2 << 1); 104 105 k0 = k0 + k1; 106 k1 = k0 - (k1 << 1); 107 /**********/ 108 dst[0] = k0; 109 dst[4] = k1; /* col. 4 */ 110 /* fdct_2 */ 111 k4 = k4 + k5; 112 k5 = k5 + k6; 113 k6 = k6 + k7; 114 k2 = k2 + k3; 115 /* MUL2C k2,k5,724,FDCT_SHIFT */ 116 /* k0, k1 become scratch */ 117 /* assume FAST MULTIPLY */ 118 k1 = mla724(k12, k5, round); 119 k0 = mla724(k12, k2, round); 120 121 k5 = k1 >> FDCT_SHIFT; 122 k2 = k0 >> FDCT_SHIFT; 123 /*****************/ 124 k2 = k2 + k3; 125 k3 = (k3 << 1) - k2; 126 /********/ 127 dst[2] = k2; /* col. 2 */ 128 k3 <<= 1; /* scale up col. 6 */ 129 dst[6] = k3; /* col. 6 */ 130 /* fdct_3 */ 131 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 132 /* assume FAST MULTIPLY */ 133 /* k0, k1 are output */ 134 k0 = k4 - k6; 135 136 k1 = mla392(k0, k14, round); 137 k0 = mla554(k4, k12, k1); 138 k1 = mla1338(k6, k14, k1); 139 140 k4 = k0 >> FDCT_SHIFT; 141 k6 = k1 >> FDCT_SHIFT; 142 /***********************/ 143 k5 = k5 + k7; 144 k7 = (k7 << 1) - k5; 145 k4 = k4 + k7; 146 k7 = (k7 << 1) - k4; 147 k5 = k5 + k6; 148 k4 <<= 1; /* scale up col.5 */ 149 k6 = k5 - (k6 << 1); 150 /********/ 151 dst[5] = k4; /* col. 5 */ 152 k6 <<= 2; /* scale up col. 7 */ 153 dst[1] = k5; /* col. 1 */ 154 dst[7] = k6; /* col. 7 */ 155 dst[3] = k7; /* col. 3 */ 156 dst += 8; 157 } 158 while (dst < out); 159 160 out -= 64; 161 dst = out + 8; 162 163 /* Vertical Block Loop */ 164 do /* Vertical 8xDCT loop */ 165 { 166 k0 = out[0]; 167 k1 = out[8]; 168 k2 = out[16]; 169 k3 = out[24]; 170 k4 = out[32]; 171 k5 = out[40]; 172 k6 = out[48]; 173 k7 = out[56]; 174 /* deadzone thresholding for column */ 175 176 abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); 177 178 if (abs_sum < ColTh) 179 { 180 out[0] = 0x7fff; 181 out++; 182 continue; 183 } 184 185 /* fdct_1 */ 186 k0 = k0 + k7; 187 k7 = k0 - (k7 << 1); 188 k1 = k1 + k6; 189 k6 = k1 - (k6 << 1); 190 k2 = k2 + k5; 191 k5 = k2 - (k5 << 1); 192 k3 = k3 + k4; 193 k4 = k3 - (k4 << 1); 194 195 k0 = k0 + k3; 196 k3 = k0 - (k3 << 1); 197 k1 = k1 + k2; 198 k2 = k1 - (k2 << 1); 199 200 k0 = k0 + k1; 201 k1 = k0 - (k1 << 1); 202 /**********/ 203 out[32] = k1; /* row 4 */ 204 out[0] = k0; /* row 0 */ 205 /* fdct_2 */ 206 k4 = k4 + k5; 207 k5 = k5 + k6; 208 k6 = k6 + k7; 209 k2 = k2 + k3; 210 /* MUL2C k2,k5,724,FDCT_SHIFT */ 211 /* k0, k1 become scratch */ 212 /* assume FAST MULTIPLY */ 213 k1 = mla724(k12, k5, round); 214 k0 = mla724(k12, k2, round); 215 216 k5 = k1 >> FDCT_SHIFT; 217 k2 = k0 >> FDCT_SHIFT; 218 /*****************/ 219 k2 = k2 + k3; 220 k3 = (k3 << 1) - k2; 221 k3 <<= 1; /* scale up col. 6 */ 222 /********/ 223 out[48] = k3; /* row 6 */ 224 out[16] = k2; /* row 2 */ 225 /* fdct_3 */ 226 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 227 /* assume FAST MULTIPLY */ 228 /* k0, k1 are output */ 229 k0 = k4 - k6; 230 231 k1 = mla392(k0, k14, round); 232 k0 = mla554(k4, k12, k1); 233 k1 = mla1338(k6, k14, k1); 234 235 k4 = k0 >> FDCT_SHIFT; 236 k6 = k1 >> FDCT_SHIFT; 237 /***********************/ 238 k5 = k5 + k7; 239 k7 = (k7 << 1) - k5; 240 k4 = k4 + k7; 241 k7 = (k7 << 1) - k4; 242 k5 = k5 + k6; 243 k4 <<= 1; /* scale up col. 5 */ 244 k6 = k5 - (k6 << 1); 245 /********/ 246 out[24] = k7 ; /* row 3 */ 247 k6 <<= 2; /* scale up col. 7 */ 248 out[56] = k6 ; /* row 7 */ 249 out[8] = k5 ; /* row 1 */ 250 out[40] = k4 ; /* row 5 */ 251 out++; 252 } 253 while ((uintptr_t)out < (uintptr_t)dst) ; 254 255 return ; 256 } 257 258 /**************************************************************************/ 259 /* Function: Block4x4DCT_AANwSub 260 Date: 7/31/01 261 Input: 262 Output: out[64] ==> next block 263 Purpose: Do subtraction for zero MV first before 4x4 DCT 264 Modified: 265 **************************************************************************/ 266 267 Void Block4x4DCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width) 268 { 269 Short *dst; 270 Int k0, k1, k2, k3, k4, k5, k6, k7; 271 Int round; 272 Int k12 = 0x022A02D4; 273 Int k14 = 0x0188053A; 274 Int mask; 275 Int tmp, tmp2; 276 Int abs_sum; 277 Int ColTh; 278 279 dst = out + 64 ; 280 ColTh = *dst; 281 out += 128; 282 round = 1 << (FDCT_SHIFT - 1); 283 284 do /* fdct_nextrow */ 285 { 286 /* assuming the block is word-aligned */ 287 mask = 0x1FE; 288 tmp = *((Int*) cur); /* contains 4 pixels */ 289 tmp2 = *((Int*) pred); /* prediction 4 pixels */ 290 k0 = tmp2 & 0xFF; 291 k1 = mask & (tmp << 1); 292 k0 = k1 - (k0 << 1); 293 k1 = (tmp2 >> 8) & 0xFF; 294 k2 = mask & (tmp >> 7); 295 k1 = k2 - (k1 << 1); 296 k2 = (tmp2 >> 16) & 0xFF; 297 k3 = mask & (tmp >> 15); 298 k2 = k3 - (k2 << 1); 299 k3 = (tmp2 >> 24) & 0xFF; 300 k4 = mask & (tmp >> 23); 301 k3 = k4 - (k3 << 1); 302 tmp = *((Int*)(cur + 4)); /* another 4 pixels */ 303 tmp2 = *((Int*)(pred + 4)); 304 k4 = tmp2 & 0xFF; 305 k5 = mask & (tmp << 1); 306 k4 = k5 - (k4 << 1); 307 k5 = (tmp2 >> 8) & 0xFF; 308 k6 = mask & (tmp >> 7); 309 k5 = k6 - (k5 << 1); 310 k6 = (tmp2 >> 16) & 0xFF; 311 k7 = mask & (tmp >> 15); 312 k6 = k7 - (k6 << 1); 313 k7 = (tmp2 >> 24) & 0xFF; 314 tmp = mask & (tmp >> 23); 315 k7 = tmp - (k7 << 1); 316 cur += width; 317 pred += 16; 318 319 /* fdct_1 */ 320 k0 = k0 + k7; 321 k7 = k0 - (k7 << 1); 322 k1 = k1 + k6; 323 k6 = k1 - (k6 << 1); 324 k2 = k2 + k5; 325 k5 = k2 - (k5 << 1); 326 k3 = k3 + k4; 327 k4 = k3 - (k4 << 1); 328 329 k0 = k0 + k3; 330 k3 = k0 - (k3 << 1); 331 k1 = k1 + k2; 332 k2 = k1 - (k2 << 1); 333 334 k0 = k0 + k1; 335 /**********/ 336 dst[0] = k0; 337 /* fdct_2 */ 338 k4 = k4 + k5; 339 k5 = k5 + k6; 340 k6 = k6 + k7; 341 k2 = k2 + k3; 342 /* MUL2C k2,k5,724,FDCT_SHIFT */ 343 /* k0, k1 become scratch */ 344 /* assume FAST MULTIPLY */ 345 k1 = mla724(k12, k5, round); 346 k0 = mla724(k12, k2, round); 347 348 k5 = k1 >> FDCT_SHIFT; 349 k2 = k0 >> FDCT_SHIFT; 350 /*****************/ 351 k2 = k2 + k3; 352 /********/ 353 dst[2] = k2; /* col. 2 */ 354 /* fdct_3 */ 355 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 356 /* assume FAST MULTIPLY */ 357 /* k0, k1 are output */ 358 k0 = k4 - k6; 359 360 k1 = mla392(k0, k14, round); 361 k0 = mla554(k4, k12, k1); 362 k1 = mla1338(k6, k14, k1); 363 364 k4 = k0 >> FDCT_SHIFT; 365 k6 = k1 >> FDCT_SHIFT; 366 /***********************/ 367 k5 = k5 + k7; 368 k7 = (k7 << 1) - k5; 369 k7 = k7 - k4; 370 k5 = k5 + k6; 371 /********/ 372 dst[1] = k5; /* col. 1 */ 373 dst[3] = k7; /* col. 3 */ 374 dst += 8; 375 } 376 while (dst < out); 377 378 out -= 64; 379 dst = out + 4; 380 381 /* Vertical Block Loop */ 382 do /* Vertical 8xDCT loop */ 383 { 384 k0 = out[0]; 385 k1 = out[8]; 386 k2 = out[16]; 387 k3 = out[24]; 388 k4 = out[32]; 389 k5 = out[40]; 390 k6 = out[48]; 391 k7 = out[56]; 392 393 abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); 394 395 if (abs_sum < ColTh) 396 { 397 out[0] = 0x7fff; 398 out++; 399 continue; 400 } 401 /* fdct_1 */ 402 k0 = k0 + k7; 403 k7 = k0 - (k7 << 1); 404 k1 = k1 + k6; 405 k6 = k1 - (k6 << 1); 406 k2 = k2 + k5; 407 k5 = k2 - (k5 << 1); 408 k3 = k3 + k4; 409 k4 = k3 - (k4 << 1); 410 411 k0 = k0 + k3; 412 k3 = k0 - (k3 << 1); 413 k1 = k1 + k2; 414 k2 = k1 - (k2 << 1); 415 416 k0 = k0 + k1; 417 /**********/ 418 out[0] = k0; /* row 0 */ 419 /* fdct_2 */ 420 k4 = k4 + k5; 421 k5 = k5 + k6; 422 k6 = k6 + k7; 423 k2 = k2 + k3; 424 /* MUL2C k2,k5,724,FDCT_SHIFT */ 425 /* k0, k1 become scratch */ 426 /* assume FAST MULTIPLY */ 427 k1 = mla724(k12, k5, round); 428 k0 = mla724(k12, k2, round); 429 430 k5 = k1 >> FDCT_SHIFT; 431 k2 = k0 >> FDCT_SHIFT; 432 /*****************/ 433 k2 = k2 + k3; 434 /********/ 435 out[16] = k2; /* row 2 */ 436 /* fdct_3 */ 437 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 438 /* assume FAST MULTIPLY */ 439 /* k0, k1 are output */ 440 k0 = k4 - k6; 441 442 k1 = mla392(k0, k14, round); 443 k0 = mla554(k4, k12, k1); 444 k1 = mla1338(k6, k14, k1); 445 446 k4 = k0 >> FDCT_SHIFT; 447 k6 = k1 >> FDCT_SHIFT; 448 /***********************/ 449 k5 = k5 + k7; 450 k7 = (k7 << 1) - k5; 451 k7 = k7 - k4 ; 452 k5 = k5 + k6; 453 /********/ 454 out[24] = k7 ; /* row 3 */ 455 out[8] = k5 ; /* row 1 */ 456 out++; 457 } 458 while ((uintptr_t)out < (uintptr_t)dst) ; 459 460 return ; 461 } 462 463 /**************************************************************************/ 464 /* Function: Block2x2DCT_AANwSub 465 Date: 7/31/01 466 Input: 467 Output: out[64] ==> next block 468 Purpose: Do subtraction for zero MV first before 2x2 DCT 469 Modified: 470 **************************************************************************/ 471 472 473 Void Block2x2DCT_AANwSub(Short *out, UChar *cur, UChar *pred, Int width) 474 { 475 Short *dst; 476 Int k0, k1, k2, k3, k4, k5, k6, k7; 477 Int round; 478 Int k12 = 0x022A02D4; 479 Int k14 = 0x018803B2; 480 Int mask; 481 Int tmp, tmp2; 482 Int abs_sum; 483 Int ColTh; 484 485 dst = out + 64 ; 486 ColTh = *dst; 487 out += 128; 488 round = 1 << (FDCT_SHIFT - 1); 489 490 do /* fdct_nextrow */ 491 { 492 /* assuming the block is word-aligned */ 493 mask = 0x1FE; 494 tmp = *((Int*) cur); /* contains 4 pixels */ 495 tmp2 = *((Int*) pred); /* prediction 4 pixels */ 496 k0 = tmp2 & 0xFF; 497 k1 = mask & (tmp << 1); 498 k0 = k1 - (k0 << 1); 499 k1 = (tmp2 >> 8) & 0xFF; 500 k2 = mask & (tmp >> 7); 501 k1 = k2 - (k1 << 1); 502 k2 = (tmp2 >> 16) & 0xFF; 503 k3 = mask & (tmp >> 15); 504 k2 = k3 - (k2 << 1); 505 k3 = (tmp2 >> 24) & 0xFF; 506 k4 = mask & (tmp >> 23); 507 k3 = k4 - (k3 << 1); 508 tmp = *((Int*)(cur + 4)); /* another 4 pixels */ 509 tmp2 = *((Int*)(pred + 4)); 510 k4 = tmp2 & 0xFF; 511 k5 = mask & (tmp << 1); 512 k4 = k5 - (k4 << 1); 513 k5 = (tmp2 >> 8) & 0xFF; 514 k6 = mask & (tmp >> 7); 515 k5 = k6 - (k5 << 1); 516 k6 = (tmp2 >> 16) & 0xFF; 517 k7 = mask & (tmp >> 15); 518 k6 = k7 - (k6 << 1); 519 k7 = (tmp2 >> 24) & 0xFF; 520 tmp = mask & (tmp >> 23); 521 k7 = tmp - (k7 << 1); 522 cur += width; 523 pred += 16; 524 525 /* fdct_1 */ 526 k0 = k0 + k7; 527 k7 = k0 - (k7 << 1); 528 k1 = k1 + k6; 529 k6 = k1 - (k6 << 1); 530 k2 = k2 + k5; 531 k5 = k2 - (k5 << 1); 532 k3 = k3 + k4; 533 k4 = k3 - (k4 << 1); 534 535 k0 = k0 + k3; 536 k3 = k0 - (k3 << 1); 537 k1 = k1 + k2; 538 k2 = k1 - (k2 << 1); 539 540 k0 = k0 + k1; 541 /**********/ 542 dst[0] = k0; 543 /* fdct_2 */ 544 k4 = k4 + k5; 545 k5 = k5 + k6; 546 k6 = k6 + k7; 547 /* MUL2C k2,k5,724,FDCT_SHIFT */ 548 /* k0, k1 become scratch */ 549 /* assume FAST MULTIPLY */ 550 k1 = mla724(k12, k5, round); 551 552 k5 = k1 >> FDCT_SHIFT; 553 /*****************/ 554 /********/ 555 /* fdct_3 */ 556 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 557 /* assume FAST MULTIPLY */ 558 /* k0, k1 are output */ 559 k1 = mla392(k4, k14, round); 560 k1 = mla946(k6, k14, k1); 561 562 k6 = k1 >> FDCT_SHIFT; 563 /***********************/ 564 k5 = k5 + k7; 565 k5 = k5 + k6; 566 /********/ 567 dst[1] = k5; 568 dst += 8; 569 } 570 while (dst < out); 571 out -= 64; 572 dst = out + 2; 573 /* Vertical Block Loop */ 574 do /* Vertical 8xDCT loop */ 575 { 576 k0 = out[0]; 577 k1 = out[8]; 578 k2 = out[16]; 579 k3 = out[24]; 580 k4 = out[32]; 581 k5 = out[40]; 582 k6 = out[48]; 583 k7 = out[56]; 584 585 abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); 586 587 if (abs_sum < ColTh) 588 { 589 out[0] = 0x7fff; 590 out++; 591 continue; 592 } 593 /* fdct_1 */ 594 k0 = k0 + k7; 595 k7 = k0 - (k7 << 1); 596 k1 = k1 + k6; 597 k6 = k1 - (k6 << 1); 598 k2 = k2 + k5; 599 k5 = k2 - (k5 << 1); 600 k3 = k3 + k4; 601 k4 = k3 - (k4 << 1); 602 603 k0 = k0 + k3; 604 k3 = k0 - (k3 << 1); 605 k1 = k1 + k2; 606 k2 = k1 - (k2 << 1); 607 608 k0 = k0 + k1; 609 /**********/ 610 out[0] = k0; /* row 0 */ 611 /* fdct_2 */ 612 k4 = k4 + k5; 613 k5 = k5 + k6; 614 k6 = k6 + k7; 615 /* MUL2C k2,k5,724,FDCT_SHIFT */ 616 /* k0, k1 become scratch */ 617 /* assume FAST MULTIPLY */ 618 k1 = mla724(k12, k5, round); 619 620 k5 = k1 >> FDCT_SHIFT; 621 /*****************/ 622 /********/ 623 /* fdct_3 */ 624 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 625 /* assume FAST MULTIPLY */ 626 /* k0, k1 are output */ 627 k1 = mla392(k4, k14, round); 628 k1 = mla946(k6, k14, k1); 629 630 k6 = k1 >> FDCT_SHIFT; 631 /***********************/ 632 k5 = k5 + k7; 633 k5 = k5 + k6; 634 /********/ 635 out[8] = k5 ; /* row 1 */ 636 out++; 637 } 638 while ((uintptr_t)out < (uintptr_t)dst) ; 639 640 return ; 641 } 642 643 /**************************************************************************/ 644 /* Function: BlockDCT_AANIntra 645 Date: 8/9/01 646 Input: rec 647 Output: out[64] ==> next block 648 Purpose: Input directly from rec frame. 649 Modified: 650 **************************************************************************/ 651 652 Void BlockDCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width) 653 { 654 Short *dst; 655 Int k0, k1, k2, k3, k4, k5, k6, k7; 656 Int round; 657 Int k12 = 0x022A02D4; 658 Int k14 = 0x0188053A; 659 Int abs_sum; 660 Int mask; 661 Int *curInt, tmp; 662 Int ColTh; 663 664 OSCL_UNUSED_ARG(dummy2); 665 666 dst = out + 64 ; 667 ColTh = *dst; 668 out += 128; 669 round = 1 << (FDCT_SHIFT - 1); 670 671 do /* fdct_nextrow */ 672 { 673 mask = 0x1FE; 674 curInt = (Int*) cur; 675 tmp = curInt[0]; /* contains 4 pixels */ 676 k0 = mask & (tmp << 1); 677 k1 = mask & (tmp >> 7); 678 k2 = mask & (tmp >> 15); 679 k3 = mask & (tmp >> 23); 680 tmp = curInt[1]; /* another 4 pixels */ 681 k4 = mask & (tmp << 1); 682 k5 = mask & (tmp >> 7); 683 k6 = mask & (tmp >> 15); 684 k7 = mask & (tmp >> 23); 685 cur += width; 686 /* fdct_1 */ 687 k0 = k0 + k7; 688 k7 = k0 - (k7 << 1); 689 k1 = k1 + k6; 690 k6 = k1 - (k6 << 1); 691 k2 = k2 + k5; 692 k5 = k2 - (k5 << 1); 693 k3 = k3 + k4; 694 k4 = k3 - (k4 << 1); 695 696 k0 = k0 + k3; 697 k3 = k0 - (k3 << 1); 698 k1 = k1 + k2; 699 k2 = k1 - (k2 << 1); 700 701 k0 = k0 + k1; 702 k1 = k0 - (k1 << 1); 703 /**********/ 704 dst[0] = k0; 705 dst[4] = k1; /* col. 4 */ 706 /* fdct_2 */ 707 k4 = k4 + k5; 708 k5 = k5 + k6; 709 k6 = k6 + k7; 710 k2 = k2 + k3; 711 /* MUL2C k2,k5,724,FDCT_SHIFT */ 712 /* k0, k1 become scratch */ 713 /* assume FAST MULTIPLY */ 714 k1 = mla724(k12, k5, round); 715 k0 = mla724(k12, k2, round); 716 717 k5 = k1 >> FDCT_SHIFT; 718 k2 = k0 >> FDCT_SHIFT; 719 /*****************/ 720 k2 = k2 + k3; 721 k3 = (k3 << 1) - k2; 722 /********/ 723 dst[2] = k2; /* col. 2 */ 724 k3 <<= 1; /* scale up col. 6 */ 725 dst[6] = k3; /* col. 6 */ 726 /* fdct_3 */ 727 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 728 /* assume FAST MULTIPLY */ 729 /* k0, k1 are output */ 730 k0 = k4 - k6; 731 732 k1 = mla392(k0, k14, round); 733 k0 = mla554(k4, k12, k1); 734 k1 = mla1338(k6, k14, k1); 735 736 k4 = k0 >> FDCT_SHIFT; 737 k6 = k1 >> FDCT_SHIFT; 738 /***********************/ 739 k5 = k5 + k7; 740 k7 = (k7 << 1) - k5; 741 k4 = k4 + k7; 742 k7 = (k7 << 1) - k4; 743 k5 = k5 + k6; 744 k4 <<= 1; /* scale up col.5 */ 745 k6 = k5 - (k6 << 1); 746 /********/ 747 dst[5] = k4; /* col. 5 */ 748 k6 <<= 2; /* scale up col. 7 */ 749 dst[1] = k5; /* col. 1 */ 750 dst[7] = k6; /* col. 7 */ 751 dst[3] = k7; /* col. 3 */ 752 dst += 8; 753 } 754 while (dst < out); 755 756 out -= 64; 757 dst = out + 8; 758 759 /* Vertical Block Loop */ 760 do /* Vertical 8xDCT loop */ 761 { 762 k0 = out[0]; 763 k1 = out[8]; 764 k2 = out[16]; 765 k3 = out[24]; 766 k4 = out[32]; 767 k5 = out[40]; 768 k6 = out[48]; 769 k7 = out[56]; 770 /* deadzone thresholding for column */ 771 772 abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); 773 774 if (abs_sum < ColTh) 775 { 776 out[0] = 0x7fff; 777 out++; 778 continue; 779 } 780 781 /* fdct_1 */ 782 k0 = k0 + k7; 783 k7 = k0 - (k7 << 1); 784 k1 = k1 + k6; 785 k6 = k1 - (k6 << 1); 786 k2 = k2 + k5; 787 k5 = k2 - (k5 << 1); 788 k3 = k3 + k4; 789 k4 = k3 - (k4 << 1); 790 791 k0 = k0 + k3; 792 k3 = k0 - (k3 << 1); 793 k1 = k1 + k2; 794 k2 = k1 - (k2 << 1); 795 796 k0 = k0 + k1; 797 k1 = k0 - (k1 << 1); 798 /**********/ 799 out[32] = k1; /* row 4 */ 800 out[0] = k0; /* row 0 */ 801 /* fdct_2 */ 802 k4 = k4 + k5; 803 k5 = k5 + k6; 804 k6 = k6 + k7; 805 k2 = k2 + k3; 806 /* MUL2C k2,k5,724,FDCT_SHIFT */ 807 /* k0, k1 become scratch */ 808 /* assume FAST MULTIPLY */ 809 k1 = mla724(k12, k5, round); 810 k0 = mla724(k12, k2, round); 811 812 k5 = k1 >> FDCT_SHIFT; 813 k2 = k0 >> FDCT_SHIFT; 814 /*****************/ 815 k2 = k2 + k3; 816 k3 = (k3 << 1) - k2; 817 k3 <<= 1; /* scale up col. 6 */ 818 /********/ 819 out[48] = k3; /* row 6 */ 820 out[16] = k2; /* row 2 */ 821 /* fdct_3 */ 822 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 823 /* assume FAST MULTIPLY */ 824 /* k0, k1 are output */ 825 k0 = k4 - k6; 826 827 k1 = mla392(k0, k14, round); 828 k0 = mla554(k4, k12, k1); 829 k1 = mla1338(k6, k14, k1); 830 831 k4 = k0 >> FDCT_SHIFT; 832 k6 = k1 >> FDCT_SHIFT; 833 /***********************/ 834 k5 = k5 + k7; 835 k7 = (k7 << 1) - k5; 836 k4 = k4 + k7; 837 k7 = (k7 << 1) - k4; 838 k5 = k5 + k6; 839 k4 <<= 1; /* scale up col. 5 */ 840 k6 = k5 - (k6 << 1); 841 /********/ 842 out[24] = k7 ; /* row 3 */ 843 k6 <<= 2; /* scale up col. 7 */ 844 out[56] = k6 ; /* row 7 */ 845 out[8] = k5 ; /* row 1 */ 846 out[40] = k4 ; /* row 5 */ 847 out++; 848 } 849 while ((uintptr_t)out < (uintptr_t)dst) ; 850 851 return ; 852 } 853 854 /**************************************************************************/ 855 /* Function: Block4x4DCT_AANIntra 856 Date: 8/9/01 857 Input: prev 858 Output: out[64] ==> next block 859 Purpose: Input directly from prev frame. output 2x2 DCT 860 Modified: 861 **************************************************************************/ 862 863 Void Block4x4DCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width) 864 { 865 Short *dst; 866 Int k0, k1, k2, k3, k4, k5, k6, k7; 867 Int round; 868 Int k12 = 0x022A02D4; 869 Int k14 = 0x0188053A; 870 Int mask; 871 Int *curInt, tmp; 872 Int abs_sum; 873 Int ColTh; 874 875 OSCL_UNUSED_ARG(dummy2); 876 877 dst = out + 64 ; 878 ColTh = *dst; 879 out += 128; 880 round = 1 << (FDCT_SHIFT - 1); 881 882 do /* fdct_nextrow */ 883 { 884 mask = 0x1FE; 885 curInt = (Int*) cur; 886 tmp = curInt[0]; /* contains 4 pixels */ 887 k0 = mask & (tmp << 1); 888 k1 = mask & (tmp >> 7); 889 k2 = mask & (tmp >> 15); 890 k3 = mask & (tmp >> 23); 891 tmp = curInt[1]; /* another 4 pixels */ 892 k4 = mask & (tmp << 1); 893 k5 = mask & (tmp >> 7); 894 k6 = mask & (tmp >> 15); 895 k7 = mask & (tmp >> 23); 896 cur += width; 897 /* fdct_1 */ 898 k0 = k0 + k7; 899 k7 = k0 - (k7 << 1); 900 k1 = k1 + k6; 901 k6 = k1 - (k6 << 1); 902 k2 = k2 + k5; 903 k5 = k2 - (k5 << 1); 904 k3 = k3 + k4; 905 k4 = k3 - (k4 << 1); 906 907 k0 = k0 + k3; 908 k3 = k0 - (k3 << 1); 909 k1 = k1 + k2; 910 k2 = k1 - (k2 << 1); 911 912 k0 = k0 + k1; 913 /**********/ 914 dst[0] = k0; 915 /* fdct_2 */ 916 k4 = k4 + k5; 917 k5 = k5 + k6; 918 k6 = k6 + k7; 919 k2 = k2 + k3; 920 /* MUL2C k2,k5,724,FDCT_SHIFT */ 921 /* k0, k1 become scratch */ 922 /* assume FAST MULTIPLY */ 923 k1 = mla724(k12, k5, round); 924 k0 = mla724(k12, k2, round); 925 926 k5 = k1 >> FDCT_SHIFT; 927 k2 = k0 >> FDCT_SHIFT; 928 /*****************/ 929 k2 = k2 + k3; 930 /********/ 931 dst[2] = k2; /* col. 2 */ 932 /* fdct_3 */ 933 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 934 /* assume FAST MULTIPLY */ 935 /* k0, k1 are output */ 936 k0 = k4 - k6; 937 938 k1 = mla392(k0, k14, round); 939 k0 = mla554(k4, k12, k1); 940 k1 = mla1338(k6, k14, k1); 941 942 k4 = k0 >> FDCT_SHIFT; 943 k6 = k1 >> FDCT_SHIFT; 944 /***********************/ 945 k5 = k5 + k7; 946 k7 = (k7 << 1) - k5; 947 k7 = k7 - k4; 948 k5 = k5 + k6; 949 /********/ 950 dst[1] = k5; /* col. 1 */ 951 dst[3] = k7; /* col. 3 */ 952 dst += 8; 953 } 954 while (dst < out); 955 956 out -= 64; 957 dst = out + 4; 958 959 /* Vertical Block Loop */ 960 do /* Vertical 8xDCT loop */ 961 { 962 k0 = out[0]; 963 k1 = out[8]; 964 k2 = out[16]; 965 k3 = out[24]; 966 k4 = out[32]; 967 k5 = out[40]; 968 k6 = out[48]; 969 k7 = out[56]; 970 971 abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); 972 973 if (abs_sum < ColTh) 974 { 975 out[0] = 0x7fff; 976 out++; 977 continue; 978 } 979 /* fdct_1 */ 980 k0 = k0 + k7; 981 k7 = k0 - (k7 << 1); 982 k1 = k1 + k6; 983 k6 = k1 - (k6 << 1); 984 k2 = k2 + k5; 985 k5 = k2 - (k5 << 1); 986 k3 = k3 + k4; 987 k4 = k3 - (k4 << 1); 988 989 k0 = k0 + k3; 990 k3 = k0 - (k3 << 1); 991 k1 = k1 + k2; 992 k2 = k1 - (k2 << 1); 993 994 k0 = k0 + k1; 995 /**********/ 996 out[0] = k0; /* row 0 */ 997 /* fdct_2 */ 998 k4 = k4 + k5; 999 k5 = k5 + k6; 1000 k6 = k6 + k7; 1001 k2 = k2 + k3; 1002 /* MUL2C k2,k5,724,FDCT_SHIFT */ 1003 /* k0, k1 become scratch */ 1004 /* assume FAST MULTIPLY */ 1005 k1 = mla724(k12, k5, round); 1006 k0 = mla724(k12, k2, round); 1007 1008 k5 = k1 >> FDCT_SHIFT; 1009 k2 = k0 >> FDCT_SHIFT; 1010 /*****************/ 1011 k2 = k2 + k3; 1012 /********/ 1013 out[16] = k2; /* row 2 */ 1014 /* fdct_3 */ 1015 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 1016 /* assume FAST MULTIPLY */ 1017 /* k0, k1 are output */ 1018 k0 = k4 - k6; 1019 1020 k1 = mla392(k0, k14, round); 1021 k0 = mla554(k4, k12, k1); 1022 k1 = mla1338(k6, k14, k1); 1023 1024 k4 = k0 >> FDCT_SHIFT; 1025 k6 = k1 >> FDCT_SHIFT; 1026 /***********************/ 1027 k5 = k5 + k7; 1028 k7 = (k7 << 1) - k5; 1029 k7 = k7 - k4 ; 1030 k5 = k5 + k6; 1031 /********/ 1032 out[24] = k7 ; /* row 3 */ 1033 out[8] = k5 ; /* row 1 */ 1034 out++; 1035 } 1036 while ((uintptr_t)out < (uintptr_t)dst) ; 1037 1038 return ; 1039 } 1040 1041 /**************************************************************************/ 1042 /* Function: Block2x2DCT_AANIntra 1043 Date: 8/9/01 1044 Input: prev 1045 Output: out[64] ==> next block 1046 Purpose: Input directly from prev frame. output 2x2 DCT 1047 Modified: 1048 **************************************************************************/ 1049 1050 Void Block2x2DCT_AANIntra(Short *out, UChar *cur, UChar *dummy2, Int width) 1051 { 1052 Short *dst; 1053 Int k0, k1, k2, k3, k4, k5, k6, k7; 1054 Int round; 1055 Int k12 = 0x022A02D4; 1056 Int k14 = 0x018803B2; 1057 Int mask; 1058 Int *curInt, tmp; 1059 Int abs_sum; 1060 Int ColTh; 1061 1062 OSCL_UNUSED_ARG(dummy2); 1063 1064 dst = out + 64 ; 1065 ColTh = *dst; 1066 out += 128; 1067 round = 1 << (FDCT_SHIFT - 1); 1068 1069 do /* fdct_nextrow */ 1070 { 1071 mask = 0x1FE; 1072 curInt = (Int*) cur; 1073 tmp = curInt[0]; /* contains 4 pixels */ 1074 k0 = mask & (tmp << 1); 1075 k1 = mask & (tmp >> 7); 1076 k2 = mask & (tmp >> 15); 1077 k3 = mask & (tmp >> 23); 1078 tmp = curInt[1]; /* another 4 pixels */ 1079 k4 = mask & (tmp << 1); 1080 k5 = mask & (tmp >> 7); 1081 k6 = mask & (tmp >> 15); 1082 k7 = mask & (tmp >> 23); 1083 cur += width; 1084 1085 /* fdct_1 */ 1086 k0 = k0 + k7; 1087 k7 = k0 - (k7 << 1); 1088 k1 = k1 + k6; 1089 k6 = k1 - (k6 << 1); 1090 k2 = k2 + k5; 1091 k5 = k2 - (k5 << 1); 1092 k3 = k3 + k4; 1093 k4 = k3 - (k4 << 1); 1094 1095 k0 = k0 + k3; 1096 k3 = k0 - (k3 << 1); 1097 k1 = k1 + k2; 1098 k2 = k1 - (k2 << 1); 1099 1100 k0 = k0 + k1; 1101 /**********/ 1102 dst[0] = k0; 1103 /* fdct_2 */ 1104 k4 = k4 + k5; 1105 k5 = k5 + k6; 1106 k6 = k6 + k7; 1107 /* MUL2C k2,k5,724,FDCT_SHIFT */ 1108 /* k0, k1 become scratch */ 1109 /* assume FAST MULTIPLY */ 1110 k1 = mla724(k12, k5, round); 1111 1112 k5 = k1 >> FDCT_SHIFT; 1113 /*****************/ 1114 /********/ 1115 /* fdct_3 */ 1116 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 1117 /* assume FAST MULTIPLY */ 1118 /* k0, k1 are output */ 1119 k1 = mla392(k4, k14, round); 1120 k1 = mla946(k6, k14, k1); 1121 1122 k6 = k1 >> FDCT_SHIFT; 1123 /***********************/ 1124 k5 = k5 + k7; 1125 k5 = k5 + k6; 1126 /********/ 1127 dst[1] = k5; 1128 dst += 8; 1129 } 1130 while (dst < out); 1131 out -= 64; 1132 dst = out + 2; 1133 /* Vertical Block Loop */ 1134 do /* Vertical 8xDCT loop */ 1135 { 1136 k0 = out[0]; 1137 k1 = out[8]; 1138 k2 = out[16]; 1139 k3 = out[24]; 1140 k4 = out[32]; 1141 k5 = out[40]; 1142 k6 = out[48]; 1143 k7 = out[56]; 1144 1145 abs_sum = sum_abs(k0, k1, k2, k3, k4, k5, k6, k7); 1146 1147 if (abs_sum < ColTh) 1148 { 1149 out[0] = 0x7fff; 1150 out++; 1151 continue; 1152 } 1153 /* fdct_1 */ 1154 k0 = k0 + k7; 1155 k7 = k0 - (k7 << 1); 1156 k1 = k1 + k6; 1157 k6 = k1 - (k6 << 1); 1158 k2 = k2 + k5; 1159 k5 = k2 - (k5 << 1); 1160 k3 = k3 + k4; 1161 k4 = k3 - (k4 << 1); 1162 1163 k0 = k0 + k3; 1164 k3 = k0 - (k3 << 1); 1165 k1 = k1 + k2; 1166 k2 = k1 - (k2 << 1); 1167 1168 k0 = k0 + k1; 1169 /**********/ 1170 out[0] = k0; /* row 0 */ 1171 /* fdct_2 */ 1172 k4 = k4 + k5; 1173 k5 = k5 + k6; 1174 k6 = k6 + k7; 1175 /* MUL2C k2,k5,724,FDCT_SHIFT */ 1176 /* k0, k1 become scratch */ 1177 /* assume FAST MULTIPLY */ 1178 k1 = mla724(k12, k5, round); 1179 1180 k5 = k1 >> FDCT_SHIFT; 1181 /*****************/ 1182 /********/ 1183 /* fdct_3 */ 1184 /* ROTATE k4,k6,392,946, FDCT_SHIFT */ 1185 /* assume FAST MULTIPLY */ 1186 /* k0, k1 are output */ 1187 k1 = mla392(k4, k14, round); 1188 k1 = mla946(k6, k14, k1); 1189 1190 k6 = k1 >> FDCT_SHIFT; 1191 /***********************/ 1192 k5 = k5 + k7; 1193 k5 = k5 + k6; 1194 /********/ 1195 out[8] = k5 ; /* row 1 */ 1196 out++; 1197 } 1198 while ((uintptr_t)out < (uintptr_t)dst) ; 1199 1200 return ; 1201 } 1202 /**************************************************************************/ 1203 /* Function: Block1x1DCTwSub 1204 Date: 8/9/01 1205 Input: block 1206 Output: y 1207 Purpose: Compute DC value only 1208 Modified: 1209 **************************************************************************/ 1210 void Block1x1DCTwSub(Short *out, UChar *cur, UChar *pred, Int width) 1211 { 1212 UChar *end; 1213 Int temp = 0; 1214 Int offset2; 1215 1216 offset2 = width - 8; 1217 end = pred + (16 << 3); 1218 do 1219 { 1220 temp += (*cur++ - *pred++); 1221 temp += (*cur++ - *pred++); 1222 temp += (*cur++ - *pred++); 1223 temp += (*cur++ - *pred++); 1224 temp += (*cur++ - *pred++); 1225 temp += (*cur++ - *pred++); 1226 temp += (*cur++ - *pred++); 1227 temp += (*cur++ - *pred++); 1228 cur += offset2; 1229 pred += 8; 1230 } 1231 while (pred < end) ; 1232 1233 out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = 0; 1234 out[0] = temp >> 3; 1235 1236 return ; 1237 } 1238 1239 /**************************************************************************/ 1240 /* Function: Block1x1DCTIntra 1241 Date: 8/9/01 1242 Input: prev 1243 Output: out 1244 Purpose: Compute DC value only 1245 Modified: 1246 **************************************************************************/ 1247 void Block1x1DCTIntra(Short *out, UChar *cur, UChar *dummy2, Int width) 1248 { 1249 UChar *end; 1250 Int temp = 0; 1251 ULong word; 1252 1253 OSCL_UNUSED_ARG(dummy2); 1254 1255 end = cur + (width << 3); 1256 do 1257 { 1258 word = *((ULong*)cur); 1259 temp += (word >> 24); 1260 temp += ((word >> 16) & 0xFF); 1261 temp += ((word >> 8) & 0xFF); 1262 temp += (word & 0xFF); 1263 1264 word = *((ULong*)(cur + 4)); 1265 temp += (word >> 24); 1266 temp += ((word >> 16) & 0xFF); 1267 temp += ((word >> 8) & 0xFF); 1268 temp += (word & 0xFF); 1269 1270 cur += width; 1271 } 1272 while (cur < end) ; 1273 1274 out[1] = out[2] = out[3] = out[4] = out[5] = out[6] = out[7] = 0; 1275 out[0] = temp >> 3; 1276 1277 return ; 1278 } 1279 1280 #ifdef __cplusplus 1281 } 1282 #endif 1283 1284