1 /* ------------------------------------------------------------------ 2 * Copyright (C) 1998-2009 PacketVideo 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 * express or implied. 14 * See the License for the specific language governing permissions 15 * and limitations under the License. 16 * ------------------------------------------------------------------- 17 */ 18 /* 19 20 ------------------------------------------------------------------------------ 21 REVISION HISTORY 22 Who: Date: July/2001 23 Description: 1. Optimized BlockIDCT bitmap checking. 24 2. Rearranged functions. 25 3. Do column IDCT first, then row IDCT. 26 4. Combine motion comp and IDCT, require 27 two sets of row IDCTs one for INTRA 28 and one for INTER. 29 5. Add AAN IDCT 30 31 Who: Date: 8/16/01 32 1. Increase the input precision to 8 bits, i.e. change RDCTBITS 33 to 11, have to comment out all in-line assembly since 16 bit 34 multiplication doesn't work. Try to use diffent precision with 35 32 bit mult. but hasn't finished. Turns out that without in-line 36 assembly the performance doesn't change much (only 1%). 37 Who: Date: 9/04/05 38 1. Replace AAN IDCT with Chen's IDCT to accommodate 16 bit data type. 39 40 */ 41 #include "oscl_base_macros.h" // for OSCL_UNUSED_ARG 42 #include "mp4def.h" 43 #include "mp4enc_lib.h" 44 #include "mp4lib_int.h" 45 #include "dct.h" 46 47 #define ADD_CLIP { \ 48 tmp = *rec + tmp; \ 49 if((UInt)tmp > mask) tmp = mask&(~(tmp>>31)); \ 50 *rec++ = tmp; \ 51 } 52 53 #define INTRA_CLIP { \ 54 if((UInt)tmp > mask) tmp = mask&(~(tmp>>31)); \ 55 *rec++ = tmp; \ 56 } 57 58 59 #define CLIP_RESULT(x) if((UInt)x > 0xFF){x = 0xFF & (~(x>>31));} 60 #define ADD_AND_CLIP1(x) x += (pred_word&0xFF); CLIP_RESULT(x); 61 #define ADD_AND_CLIP2(x) x += ((pred_word>>8)&0xFF); CLIP_RESULT(x); 62 #define ADD_AND_CLIP3(x) x += ((pred_word>>16)&0xFF); CLIP_RESULT(x); 63 #define ADD_AND_CLIP4(x) x += ((pred_word>>24)&0xFF); CLIP_RESULT(x); 64 65 66 void idct_col0(Short *blk) 67 { 68 OSCL_UNUSED_ARG(blk); 69 70 return; 71 } 72 73 void idct_col1(Short *blk) 74 { 75 blk[0] = blk[8] = blk[16] = blk[24] = blk[32] = blk[40] = blk[48] = blk[56] = 76 blk[0] << 3; 77 return ; 78 } 79 80 void idct_col2(Short *blk) 81 { 82 int32 x0, x1, x3, x5, x7;//, x8; 83 84 x1 = blk[8]; 85 x0 = ((int32)blk[0] << 11) + 128; 86 /* both upper and lower*/ 87 88 x7 = W7 * x1; 89 x1 = W1 * x1; 90 91 x3 = x7; 92 x5 = (181 * (x1 - x7) + 128) >> 8; 93 x7 = (181 * (x1 + x7) + 128) >> 8; 94 95 blk[0] = (x0 + x1) >> 8; 96 blk[8] = (x0 + x7) >> 8; 97 blk[16] = (x0 + x5) >> 8; 98 blk[24] = (x0 + x3) >> 8; 99 blk[56] = (x0 - x1) >> 8; 100 blk[48] = (x0 - x7) >> 8; 101 blk[40] = (x0 - x5) >> 8; 102 blk[32] = (x0 - x3) >> 8; 103 return ; 104 } 105 106 void idct_col3(Short *blk) 107 { 108 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8; 109 110 x2 = blk[16]; 111 x1 = blk[8]; 112 x0 = ((int32)blk[0] << 11) + 128; 113 114 x4 = x0; 115 x6 = W6 * x2; 116 x2 = W2 * x2; 117 x8 = x0 - x2; 118 x0 += x2; 119 x2 = x8; 120 x8 = x4 - x6; 121 x4 += x6; 122 x6 = x8; 123 124 x7 = W7 * x1; 125 x1 = W1 * x1; 126 x3 = x7; 127 x5 = (181 * (x1 - x7) + 128) >> 8; 128 x7 = (181 * (x1 + x7) + 128) >> 8; 129 130 blk[0] = (x0 + x1) >> 8; 131 blk[8] = (x4 + x7) >> 8; 132 blk[16] = (x6 + x5) >> 8; 133 blk[24] = (x2 + x3) >> 8; 134 blk[56] = (x0 - x1) >> 8; 135 blk[48] = (x4 - x7) >> 8; 136 blk[40] = (x6 - x5) >> 8; 137 blk[32] = (x2 - x3) >> 8; 138 return ; 139 } 140 141 void idct_col4(Short *blk) 142 { 143 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8; 144 x2 = blk[16]; 145 x1 = blk[8]; 146 x3 = blk[24]; 147 x0 = ((int32)blk[0] << 11) + 128; 148 149 x4 = x0; 150 x6 = W6 * x2; 151 x2 = W2 * x2; 152 x8 = x0 - x2; 153 x0 += x2; 154 x2 = x8; 155 x8 = x4 - x6; 156 x4 += x6; 157 x6 = x8; 158 159 x7 = W7 * x1; 160 x1 = W1 * x1; 161 x5 = W3 * x3; 162 x3 = -W5 * x3; 163 x8 = x1 - x5; 164 x1 += x5; 165 x5 = x8; 166 x8 = x7 - x3; 167 x3 += x7; 168 x7 = (181 * (x5 + x8) + 128) >> 8; 169 x5 = (181 * (x5 - x8) + 128) >> 8; 170 171 172 blk[0] = (x0 + x1) >> 8; 173 blk[8] = (x4 + x7) >> 8; 174 blk[16] = (x6 + x5) >> 8; 175 blk[24] = (x2 + x3) >> 8; 176 blk[56] = (x0 - x1) >> 8; 177 blk[48] = (x4 - x7) >> 8; 178 blk[40] = (x6 - x5) >> 8; 179 blk[32] = (x2 - x3) >> 8; 180 return ; 181 } 182 183 #ifndef SMALL_DCT 184 void idct_col0x40(Short *blk) 185 { 186 int32 x1, x3, x5, x7;//, x8; 187 188 x1 = blk[8]; 189 /* both upper and lower*/ 190 191 x7 = W7 * x1; 192 x1 = W1 * x1; 193 194 x3 = x7; 195 x5 = (181 * (x1 - x7) + 128) >> 8; 196 x7 = (181 * (x1 + x7) + 128) >> 8; 197 198 blk[0] = (128 + x1) >> 8; 199 blk[8] = (128 + x7) >> 8; 200 blk[16] = (128 + x5) >> 8; 201 blk[24] = (128 + x3) >> 8; 202 blk[56] = (128 - x1) >> 8; 203 blk[48] = (128 - x7) >> 8; 204 blk[40] = (128 - x5) >> 8; 205 blk[32] = (128 - x3) >> 8; 206 207 return ; 208 } 209 210 void idct_col0x20(Short *blk) 211 { 212 int32 x0, x2, x4, x6; 213 214 x2 = blk[16]; 215 x6 = W6 * x2; 216 x2 = W2 * x2; 217 x0 = 128 + x2; 218 x2 = 128 - x2; 219 x4 = 128 + x6; 220 x6 = 128 - x6; 221 222 blk[0] = (x0) >> 8; 223 blk[56] = (x0) >> 8; 224 blk[8] = (x4) >> 8; 225 blk[48] = (x4) >> 8; 226 blk[16] = (x6) >> 8; 227 blk[40] = (x6) >> 8; 228 blk[24] = (x2) >> 8; 229 blk[32] = (x2) >> 8; 230 231 return ; 232 } 233 234 void idct_col0x10(Short *blk) 235 { 236 int32 x1, x3, x5, x7; 237 238 x3 = blk[24]; 239 x1 = W3 * x3; 240 x3 = W5 * x3; 241 242 x7 = (181 * (x3 - x1) + 128) >> 8; 243 x5 = (-181 * (x1 + x3) + 128) >> 8; 244 245 246 blk[0] = (128 + x1) >> 8; 247 blk[8] = (128 + x7) >> 8; 248 blk[16] = (128 + x5) >> 8; 249 blk[24] = (128 - x3) >> 8; 250 blk[56] = (128 - x1) >> 8; 251 blk[48] = (128 - x7) >> 8; 252 blk[40] = (128 - x5) >> 8; 253 blk[32] = (128 + x3) >> 8; 254 255 return ; 256 } 257 258 #endif /* SMALL_DCT */ 259 260 void idct_col(Short *blk) 261 { 262 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8; 263 264 x1 = (int32)blk[32] << 11; 265 x2 = blk[48]; 266 x3 = blk[16]; 267 x4 = blk[8]; 268 x5 = blk[56]; 269 x6 = blk[40]; 270 x7 = blk[24]; 271 x0 = ((int32)blk[0] << 11) + 128; 272 273 /* first stage */ 274 x8 = W7 * (x4 + x5); 275 x4 = x8 + (W1 - W7) * x4; 276 x5 = x8 - (W1 + W7) * x5; 277 x8 = W3 * (x6 + x7); 278 x6 = x8 - (W3 - W5) * x6; 279 x7 = x8 - (W3 + W5) * x7; 280 281 /* second stage */ 282 x8 = x0 + x1; 283 x0 -= x1; 284 x1 = W6 * (x3 + x2); 285 x2 = x1 - (W2 + W6) * x2; 286 x3 = x1 + (W2 - W6) * x3; 287 x1 = x4 + x6; 288 x4 -= x6; 289 x6 = x5 + x7; 290 x5 -= x7; 291 292 /* third stage */ 293 x7 = x8 + x3; 294 x8 -= x3; 295 x3 = x0 + x2; 296 x0 -= x2; 297 x2 = (181 * (x4 + x5) + 128) >> 8; 298 x4 = (181 * (x4 - x5) + 128) >> 8; 299 300 /* fourth stage */ 301 blk[0] = (x7 + x1) >> 8; 302 blk[8] = (x3 + x2) >> 8; 303 blk[16] = (x0 + x4) >> 8; 304 blk[24] = (x8 + x6) >> 8; 305 blk[32] = (x8 - x6) >> 8; 306 blk[40] = (x0 - x4) >> 8; 307 blk[48] = (x3 - x2) >> 8; 308 blk[56] = (x7 - x1) >> 8; 309 310 return ; 311 } 312 313 /* This function should not be called at all ****/ 314 void idct_row0Inter(Short *srce, UChar *rec, Int lx) 315 { 316 OSCL_UNUSED_ARG(srce); 317 318 OSCL_UNUSED_ARG(rec); 319 320 OSCL_UNUSED_ARG(lx); 321 322 return; 323 } 324 325 void idct_row1Inter(Short *blk, UChar *rec, Int lx) 326 { 327 int tmp; 328 int i = 8; 329 uint32 pred_word, dst_word; 330 int res, res2; 331 332 /* preset the offset, such that we can take advantage pre-offset addressing mode */ 333 rec -= lx; 334 blk -= 8; 335 336 while (i--) 337 { 338 tmp = (*(blk += 8) + 32) >> 6; 339 *blk = 0; 340 341 pred_word = *((uint32*)(rec += lx)); /* read 4 bytes from pred */ 342 res = tmp + (pred_word & 0xFF); 343 CLIP_RESULT(res); 344 res2 = tmp + ((pred_word >> 8) & 0xFF); 345 CLIP_RESULT(res2); 346 dst_word = (res2 << 8) | res; 347 res = tmp + ((pred_word >> 16) & 0xFF); 348 CLIP_RESULT(res); 349 dst_word |= (res << 16); 350 res = tmp + ((pred_word >> 24) & 0xFF); 351 CLIP_RESULT(res); 352 dst_word |= (res << 24); 353 *((uint32*)rec) = dst_word; /* save 4 bytes to dst */ 354 355 pred_word = *((uint32*)(rec + 4)); /* read 4 bytes from pred */ 356 res = tmp + (pred_word & 0xFF); 357 CLIP_RESULT(res); 358 res2 = tmp + ((pred_word >> 8) & 0xFF); 359 CLIP_RESULT(res2); 360 dst_word = (res2 << 8) | res; 361 res = tmp + ((pred_word >> 16) & 0xFF); 362 CLIP_RESULT(res); 363 dst_word |= (res << 16); 364 res = tmp + ((pred_word >> 24) & 0xFF); 365 CLIP_RESULT(res); 366 dst_word |= (res << 24); 367 *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */ 368 } 369 return; 370 } 371 372 void idct_row2Inter(Short *blk, UChar *rec, Int lx) 373 { 374 int32 x0, x1, x2, x4, x5; 375 int i = 8; 376 uint32 pred_word, dst_word; 377 int res, res2; 378 379 /* preset the offset, such that we can take advantage pre-offset addressing mode */ 380 rec -= lx; 381 blk -= 8; 382 383 while (i--) 384 { 385 /* shortcut */ 386 x4 = blk[9]; 387 blk[9] = 0; 388 x0 = ((*(blk += 8)) << 8) + 8192; 389 *blk = 0; /* for proper rounding in the fourth stage */ 390 391 /* first stage */ 392 x5 = (W7 * x4 + 4) >> 3; 393 x4 = (W1 * x4 + 4) >> 3; 394 395 /* third stage */ 396 x2 = (181 * (x4 + x5) + 128) >> 8; 397 x1 = (181 * (x4 - x5) + 128) >> 8; 398 399 /* fourth stage */ 400 pred_word = *((uint32*)(rec += lx)); /* read 4 bytes from pred */ 401 res = (x0 + x4) >> 14; 402 ADD_AND_CLIP1(res); 403 res2 = (x0 + x2) >> 14; 404 ADD_AND_CLIP2(res2); 405 dst_word = (res2 << 8) | res; 406 res = (x0 + x1) >> 14; 407 ADD_AND_CLIP3(res); 408 dst_word |= (res << 16); 409 res = (x0 + x5) >> 14; 410 ADD_AND_CLIP4(res); 411 dst_word |= (res << 24); 412 *((uint32*)rec) = dst_word; /* save 4 bytes to dst */ 413 414 pred_word = *((uint32*)(rec + 4)); /* read 4 bytes from pred */ 415 res = (x0 - x5) >> 14; 416 ADD_AND_CLIP1(res); 417 res2 = (x0 - x1) >> 14; 418 ADD_AND_CLIP2(res2); 419 dst_word = (res2 << 8) | res; 420 res = (x0 - x2) >> 14; 421 ADD_AND_CLIP3(res); 422 dst_word |= (res << 16); 423 res = (x0 - x4) >> 14; 424 ADD_AND_CLIP4(res); 425 dst_word |= (res << 24); 426 *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */ 427 } 428 return ; 429 } 430 431 void idct_row3Inter(Short *blk, UChar *rec, Int lx) 432 { 433 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8; 434 int i = 8; 435 uint32 pred_word, dst_word; 436 int res, res2; 437 438 /* preset the offset, such that we can take advantage pre-offset addressing mode */ 439 rec -= lx; 440 blk -= 8; 441 442 while (i--) 443 { 444 x2 = blk[10]; 445 blk[10] = 0; 446 x1 = blk[9]; 447 blk[9] = 0; 448 x0 = ((*(blk += 8)) << 8) + 8192; 449 *blk = 0; /* for proper rounding in the fourth stage */ 450 /* both upper and lower*/ 451 /* both x2orx6 and x0orx4 */ 452 453 x4 = x0; 454 x6 = (W6 * x2 + 4) >> 3; 455 x2 = (W2 * x2 + 4) >> 3; 456 x8 = x0 - x2; 457 x0 += x2; 458 x2 = x8; 459 x8 = x4 - x6; 460 x4 += x6; 461 x6 = x8; 462 463 x7 = (W7 * x1 + 4) >> 3; 464 x1 = (W1 * x1 + 4) >> 3; 465 x3 = x7; 466 x5 = (181 * (x1 - x7) + 128) >> 8; 467 x7 = (181 * (x1 + x7) + 128) >> 8; 468 469 pred_word = *((uint32*)(rec += lx)); /* read 4 bytes from pred */ 470 res = (x0 + x1) >> 14; 471 ADD_AND_CLIP1(res); 472 res2 = (x4 + x7) >> 14; 473 ADD_AND_CLIP2(res2); 474 dst_word = (res2 << 8) | res; 475 res = (x6 + x5) >> 14; 476 ADD_AND_CLIP3(res); 477 dst_word |= (res << 16); 478 res = (x2 + x3) >> 14; 479 ADD_AND_CLIP4(res); 480 dst_word |= (res << 24); 481 *((uint32*)rec) = dst_word; /* save 4 bytes to dst */ 482 483 pred_word = *((uint32*)(rec + 4)); /* read 4 bytes from pred */ 484 res = (x2 - x3) >> 14; 485 ADD_AND_CLIP1(res); 486 res2 = (x6 - x5) >> 14; 487 ADD_AND_CLIP2(res2); 488 dst_word = (res2 << 8) | res; 489 res = (x4 - x7) >> 14; 490 ADD_AND_CLIP3(res); 491 dst_word |= (res << 16); 492 res = (x0 - x1) >> 14; 493 ADD_AND_CLIP4(res); 494 dst_word |= (res << 24); 495 *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */ 496 } 497 498 return ; 499 } 500 501 void idct_row4Inter(Short *blk, UChar *rec, Int lx) 502 { 503 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8; 504 int i = 8; 505 uint32 pred_word, dst_word; 506 int res, res2; 507 508 /* preset the offset, such that we can take advantage pre-offset addressing mode */ 509 rec -= lx; 510 blk -= 8; 511 512 while (i--) 513 { 514 x2 = blk[10]; 515 blk[10] = 0; 516 x1 = blk[9]; 517 blk[9] = 0; 518 x3 = blk[11]; 519 blk[11] = 0; 520 x0 = ((*(blk += 8)) << 8) + 8192; 521 *blk = 0; /* for proper rounding in the fourth stage */ 522 523 x4 = x0; 524 x6 = (W6 * x2 + 4) >> 3; 525 x2 = (W2 * x2 + 4) >> 3; 526 x8 = x0 - x2; 527 x0 += x2; 528 x2 = x8; 529 x8 = x4 - x6; 530 x4 += x6; 531 x6 = x8; 532 533 x7 = (W7 * x1 + 4) >> 3; 534 x1 = (W1 * x1 + 4) >> 3; 535 x5 = (W3 * x3 + 4) >> 3; 536 x3 = (- W5 * x3 + 4) >> 3; 537 x8 = x1 - x5; 538 x1 += x5; 539 x5 = x8; 540 x8 = x7 - x3; 541 x3 += x7; 542 x7 = (181 * (x5 + x8) + 128) >> 8; 543 x5 = (181 * (x5 - x8) + 128) >> 8; 544 545 pred_word = *((uint32*)(rec += lx)); /* read 4 bytes from pred */ 546 res = (x0 + x1) >> 14; 547 ADD_AND_CLIP1(res); 548 res2 = (x4 + x7) >> 14; 549 ADD_AND_CLIP2(res2); 550 dst_word = (res2 << 8) | res; 551 res = (x6 + x5) >> 14; 552 ADD_AND_CLIP3(res); 553 dst_word |= (res << 16); 554 res = (x2 + x3) >> 14; 555 ADD_AND_CLIP4(res); 556 dst_word |= (res << 24); 557 *((uint32*)rec) = dst_word; /* save 4 bytes to dst */ 558 559 pred_word = *((uint32*)(rec + 4)); /* read 4 bytes from pred */ 560 res = (x2 - x3) >> 14; 561 ADD_AND_CLIP1(res); 562 res2 = (x6 - x5) >> 14; 563 ADD_AND_CLIP2(res2); 564 dst_word = (res2 << 8) | res; 565 res = (x4 - x7) >> 14; 566 ADD_AND_CLIP3(res); 567 dst_word |= (res << 16); 568 res = (x0 - x1) >> 14; 569 ADD_AND_CLIP4(res); 570 dst_word |= (res << 24); 571 *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */ 572 } 573 return ; 574 } 575 576 #ifndef SMALL_DCT 577 void idct_row0x40Inter(Short *blk, UChar *rec, Int lx) 578 { 579 int32 x1, x2, x4, x5; 580 int i = 8; 581 uint32 pred_word, dst_word; 582 int res, res2; 583 584 /* preset the offset, such that we can take advantage pre-offset addressing mode */ 585 rec -= lx; 586 587 while (i--) 588 { 589 /* shortcut */ 590 x4 = blk[1]; 591 blk[1] = 0; 592 blk += 8; /* for proper rounding in the fourth stage */ 593 594 /* first stage */ 595 x5 = (W7 * x4 + 4) >> 3; 596 x4 = (W1 * x4 + 4) >> 3; 597 598 /* third stage */ 599 x2 = (181 * (x4 + x5) + 128) >> 8; 600 x1 = (181 * (x4 - x5) + 128) >> 8; 601 602 /* fourth stage */ 603 pred_word = *((uint32*)(rec += lx)); /* read 4 bytes from pred */ 604 res = (8192 + x4) >> 14; 605 ADD_AND_CLIP1(res); 606 res2 = (8192 + x2) >> 14; 607 ADD_AND_CLIP2(res2); 608 dst_word = (res2 << 8) | res; 609 res = (8192 + x1) >> 14; 610 ADD_AND_CLIP3(res); 611 dst_word |= (res << 16); 612 res = (8192 + x5) >> 14; 613 ADD_AND_CLIP4(res); 614 dst_word |= (res << 24); 615 *((uint32*)rec) = dst_word; /* save 4 bytes to dst */ 616 617 pred_word = *((uint32*)(rec + 4)); /* read 4 bytes from pred */ 618 res = (8192 - x5) >> 14; 619 ADD_AND_CLIP1(res); 620 res2 = (8192 - x1) >> 14; 621 ADD_AND_CLIP2(res2); 622 dst_word = (res2 << 8) | res; 623 res = (8192 - x2) >> 14; 624 ADD_AND_CLIP3(res); 625 dst_word |= (res << 16); 626 res = (8192 - x4) >> 14; 627 ADD_AND_CLIP4(res); 628 dst_word |= (res << 24); 629 *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */ 630 } 631 return ; 632 } 633 634 void idct_row0x20Inter(Short *blk, UChar *rec, Int lx) 635 { 636 int32 x0, x2, x4, x6; 637 int i = 8; 638 uint32 pred_word, dst_word; 639 int res, res2; 640 641 /* preset the offset, such that we can take advantage pre-offset addressing mode */ 642 rec -= lx; 643 644 while (i--) 645 { 646 x2 = blk[2]; 647 blk[2] = 0; 648 blk += 8; /* for proper rounding in the fourth stage */ 649 /* both upper and lower*/ 650 /* both x2orx6 and x0orx4 */ 651 x6 = (W6 * x2 + 4) >> 3; 652 x2 = (W2 * x2 + 4) >> 3; 653 x0 = 8192 + x2; 654 x2 = 8192 - x2; 655 x4 = 8192 + x6; 656 x6 = 8192 - x6; 657 658 pred_word = *((uint32*)(rec += lx)); /* read 4 bytes from pred */ 659 res = (x0) >> 14; 660 ADD_AND_CLIP1(res); 661 res2 = (x4) >> 14; 662 ADD_AND_CLIP2(res2); 663 dst_word = (res2 << 8) | res; 664 res = (x6) >> 14; 665 ADD_AND_CLIP3(res); 666 dst_word |= (res << 16); 667 res = (x2) >> 14; 668 ADD_AND_CLIP4(res); 669 dst_word |= (res << 24); 670 *((uint32*)rec) = dst_word; /* save 4 bytes to dst */ 671 672 pred_word = *((uint32*)(rec + 4)); /* read 4 bytes from pred */ 673 res = (x2) >> 14; 674 ADD_AND_CLIP1(res); 675 res2 = (x6) >> 14; 676 ADD_AND_CLIP2(res2); 677 dst_word = (res2 << 8) | res; 678 res = (x4) >> 14; 679 ADD_AND_CLIP3(res); 680 dst_word |= (res << 16); 681 res = (x0) >> 14; 682 ADD_AND_CLIP4(res); 683 dst_word |= (res << 24); 684 *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */ 685 } 686 687 return ; 688 } 689 690 void idct_row0x10Inter(Short *blk, UChar *rec, Int lx) 691 { 692 int32 x1, x3, x5, x7; 693 int i = 8; 694 uint32 pred_word, dst_word; 695 int res, res2; 696 697 /* preset the offset, such that we can take advantage pre-offset addressing mode */ 698 rec -= lx; 699 700 while (i--) 701 { 702 x3 = blk[3]; 703 blk[3] = 0; 704 blk += 8; 705 706 x1 = (W3 * x3 + 4) >> 3; 707 x3 = (-W5 * x3 + 4) >> 3; 708 709 x7 = (-181 * (x3 + x1) + 128) >> 8; 710 x5 = (181 * (x3 - x1) + 128) >> 8; 711 712 pred_word = *((uint32*)(rec += lx)); /* read 4 bytes from pred */ 713 res = (8192 + x1) >> 14; 714 ADD_AND_CLIP1(res); 715 res2 = (8192 + x7) >> 14; 716 ADD_AND_CLIP2(res2); 717 dst_word = (res2 << 8) | res; 718 res = (8192 + x5) >> 14; 719 ADD_AND_CLIP3(res); 720 dst_word |= (res << 16); 721 res = (8192 + x3) >> 14; 722 ADD_AND_CLIP4(res); 723 dst_word |= (res << 24); 724 *((uint32*)rec) = dst_word; /* save 4 bytes to dst */ 725 726 pred_word = *((uint32*)(rec + 4)); /* read 4 bytes from pred */ 727 res = (8192 - x3) >> 14; 728 ADD_AND_CLIP1(res); 729 res2 = (8192 - x5) >> 14; 730 ADD_AND_CLIP2(res2); 731 dst_word = (res2 << 8) | res; 732 res = (8192 - x7) >> 14; 733 ADD_AND_CLIP3(res); 734 dst_word |= (res << 16); 735 res = (8192 - x1) >> 14; 736 ADD_AND_CLIP4(res); 737 dst_word |= (res << 24); 738 *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */ 739 } 740 return ; 741 } 742 743 #endif /* SMALL_DCT */ 744 745 void idct_rowInter(Short *blk, UChar *rec, Int lx) 746 { 747 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8; 748 int i = 8; 749 uint32 pred_word, dst_word; 750 int res, res2; 751 752 /* preset the offset, such that we can take advantage pre-offset addressing mode */ 753 rec -= lx; 754 blk -= 8; 755 756 while (i--) 757 { 758 x1 = (int32)blk[12] << 8; 759 blk[12] = 0; 760 x2 = blk[14]; 761 blk[14] = 0; 762 x3 = blk[10]; 763 blk[10] = 0; 764 x4 = blk[9]; 765 blk[9] = 0; 766 x5 = blk[15]; 767 blk[15] = 0; 768 x6 = blk[13]; 769 blk[13] = 0; 770 x7 = blk[11]; 771 blk[11] = 0; 772 x0 = ((*(blk += 8)) << 8) + 8192; 773 *blk = 0; /* for proper rounding in the fourth stage */ 774 775 /* first stage */ 776 x8 = W7 * (x4 + x5) + 4; 777 x4 = (x8 + (W1 - W7) * x4) >> 3; 778 x5 = (x8 - (W1 + W7) * x5) >> 3; 779 x8 = W3 * (x6 + x7) + 4; 780 x6 = (x8 - (W3 - W5) * x6) >> 3; 781 x7 = (x8 - (W3 + W5) * x7) >> 3; 782 783 /* second stage */ 784 x8 = x0 + x1; 785 x0 -= x1; 786 x1 = W6 * (x3 + x2) + 4; 787 x2 = (x1 - (W2 + W6) * x2) >> 3; 788 x3 = (x1 + (W2 - W6) * x3) >> 3; 789 x1 = x4 + x6; 790 x4 -= x6; 791 x6 = x5 + x7; 792 x5 -= x7; 793 794 /* third stage */ 795 x7 = x8 + x3; 796 x8 -= x3; 797 x3 = x0 + x2; 798 x0 -= x2; 799 x2 = (181 * (x4 + x5) + 128) >> 8; 800 x4 = (181 * (x4 - x5) + 128) >> 8; 801 802 /* fourth stage */ 803 pred_word = *((uint32*)(rec += lx)); /* read 4 bytes from pred */ 804 805 res = (x7 + x1) >> 14; 806 ADD_AND_CLIP1(res); 807 res2 = (x3 + x2) >> 14; 808 ADD_AND_CLIP2(res2); 809 dst_word = (res2 << 8) | res; 810 res = (x0 + x4) >> 14; 811 ADD_AND_CLIP3(res); 812 dst_word |= (res << 16); 813 res = (x8 + x6) >> 14; 814 ADD_AND_CLIP4(res); 815 dst_word |= (res << 24); 816 *((uint32*)rec) = dst_word; /* save 4 bytes to dst */ 817 818 pred_word = *((uint32*)(rec + 4)); /* read 4 bytes from pred */ 819 820 res = (x8 - x6) >> 14; 821 ADD_AND_CLIP1(res); 822 res2 = (x0 - x4) >> 14; 823 ADD_AND_CLIP2(res2); 824 dst_word = (res2 << 8) | res; 825 res = (x3 - x2) >> 14; 826 ADD_AND_CLIP3(res); 827 dst_word |= (res << 16); 828 res = (x7 - x1) >> 14; 829 ADD_AND_CLIP4(res); 830 dst_word |= (res << 24); 831 *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */ 832 } 833 return; 834 } 835 836 void idct_row0Intra(Short *srce, UChar *rec, Int lx) 837 { 838 OSCL_UNUSED_ARG(srce); 839 840 OSCL_UNUSED_ARG(rec); 841 842 OSCL_UNUSED_ARG(lx); 843 844 return; 845 } 846 847 void idct_row1Intra(Short *blk, UChar *rec, Int lx) 848 { 849 int32 tmp; 850 int i = 8; 851 852 rec -= lx; 853 blk -= 8; 854 while (i--) 855 { 856 tmp = ((*(blk += 8) + 32) >> 6); 857 *blk = 0; 858 CLIP_RESULT(tmp) 859 860 tmp |= (tmp << 8); 861 tmp |= (tmp << 16); 862 *((uint32*)(rec += lx)) = tmp; 863 *((uint32*)(rec + 4)) = tmp; 864 } 865 return; 866 } 867 868 void idct_row2Intra(Short *blk, UChar *rec, Int lx) 869 { 870 int32 x0, x1, x2, x4, x5; 871 int res, res2; 872 uint32 dst_word; 873 int i = 8; 874 875 rec -= lx; 876 blk -= 8; 877 while (i--) 878 { 879 /* shortcut */ 880 x4 = blk[9]; 881 blk[9] = 0; 882 x0 = ((*(blk += 8)) << 8) + 8192; 883 *blk = 0; /* for proper rounding in the fourth stage */ 884 885 /* first stage */ 886 x5 = (W7 * x4 + 4) >> 3; 887 x4 = (W1 * x4 + 4) >> 3; 888 889 /* third stage */ 890 x2 = (181 * (x4 + x5) + 128) >> 8; 891 x1 = (181 * (x4 - x5) + 128) >> 8; 892 893 /* fourth stage */ 894 res = ((x0 + x4) >> 14); 895 CLIP_RESULT(res) 896 res2 = ((x0 + x2) >> 14); 897 CLIP_RESULT(res2) 898 dst_word = (res2 << 8) | res; 899 res = ((x0 + x1) >> 14); 900 CLIP_RESULT(res) 901 dst_word |= (res << 16); 902 res = ((x0 + x5) >> 14); 903 CLIP_RESULT(res) 904 dst_word |= (res << 24); 905 *((uint32*)(rec += lx)) = dst_word; 906 907 res = ((x0 - x5) >> 14); 908 CLIP_RESULT(res) 909 res2 = ((x0 - x1) >> 14); 910 CLIP_RESULT(res2) 911 dst_word = (res2 << 8) | res; 912 res = ((x0 - x2) >> 14); 913 CLIP_RESULT(res) 914 dst_word |= (res << 16); 915 res = ((x0 - x4) >> 14); 916 CLIP_RESULT(res) 917 dst_word |= (res << 24); 918 *((uint32*)(rec + 4)) = dst_word; 919 } 920 return ; 921 } 922 923 void idct_row3Intra(Short *blk, UChar *rec, Int lx) 924 { 925 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8; 926 int res, res2; 927 uint32 dst_word; 928 int i = 8; 929 930 rec -= lx; 931 blk -= 8; 932 while (i--) 933 { 934 x2 = blk[10]; 935 blk[10] = 0; 936 x1 = blk[9]; 937 blk[9] = 0; 938 x0 = ((*(blk += 8)) << 8) + 8192; 939 *blk = 0;/* for proper rounding in the fourth stage */ 940 /* both upper and lower*/ 941 /* both x2orx6 and x0orx4 */ 942 943 x4 = x0; 944 x6 = (W6 * x2 + 4) >> 3; 945 x2 = (W2 * x2 + 4) >> 3; 946 x8 = x0 - x2; 947 x0 += x2; 948 x2 = x8; 949 x8 = x4 - x6; 950 x4 += x6; 951 x6 = x8; 952 953 x7 = (W7 * x1 + 4) >> 3; 954 x1 = (W1 * x1 + 4) >> 3; 955 x3 = x7; 956 x5 = (181 * (x1 - x7) + 128) >> 8; 957 x7 = (181 * (x1 + x7) + 128) >> 8; 958 959 res = ((x0 + x1) >> 14); 960 CLIP_RESULT(res) 961 res2 = ((x4 + x7) >> 14); 962 CLIP_RESULT(res2) 963 dst_word = (res2 << 8) | res; 964 res = ((x6 + x5) >> 14); 965 CLIP_RESULT(res) 966 dst_word |= (res << 16); 967 res = ((x2 + x3) >> 14); 968 CLIP_RESULT(res) 969 dst_word |= (res << 24); 970 *((uint32*)(rec += lx)) = dst_word; 971 972 res = ((x2 - x3) >> 14); 973 CLIP_RESULT(res) 974 res2 = ((x6 - x5) >> 14); 975 CLIP_RESULT(res2) 976 dst_word = (res2 << 8) | res; 977 res = ((x4 - x7) >> 14); 978 CLIP_RESULT(res) 979 dst_word |= (res << 16); 980 res = ((x0 - x1) >> 14); 981 CLIP_RESULT(res) 982 dst_word |= (res << 24); 983 *((uint32*)(rec + 4)) = dst_word; 984 985 } 986 return ; 987 } 988 989 void idct_row4Intra(Short *blk, UChar *rec, Int lx) 990 { 991 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8; 992 int res, res2; 993 uint32 dst_word; 994 int i = 8; 995 996 rec -= lx; 997 blk -= 8; 998 while (i--) 999 { 1000 x2 = blk[10]; 1001 blk[10] = 0; 1002 x1 = blk[9]; 1003 blk[9] = 0; 1004 x3 = blk[11]; 1005 blk[11] = 0; 1006 x0 = ((*(blk += 8)) << 8) + 8192; 1007 *blk = 0; /* for proper rounding in the fourth stage */ 1008 1009 x4 = x0; 1010 x6 = (W6 * x2 + 4) >> 3; 1011 x2 = (W2 * x2 + 4) >> 3; 1012 x8 = x0 - x2; 1013 x0 += x2; 1014 x2 = x8; 1015 x8 = x4 - x6; 1016 x4 += x6; 1017 x6 = x8; 1018 1019 x7 = (W7 * x1 + 4) >> 3; 1020 x1 = (W1 * x1 + 4) >> 3; 1021 x5 = (W3 * x3 + 4) >> 3; 1022 x3 = (- W5 * x3 + 4) >> 3; 1023 x8 = x1 - x5; 1024 x1 += x5; 1025 x5 = x8; 1026 x8 = x7 - x3; 1027 x3 += x7; 1028 x7 = (181 * (x5 + x8) + 128) >> 8; 1029 x5 = (181 * (x5 - x8) + 128) >> 8; 1030 1031 res = ((x0 + x1) >> 14); 1032 CLIP_RESULT(res) 1033 res2 = ((x4 + x7) >> 14); 1034 CLIP_RESULT(res2) 1035 dst_word = (res2 << 8) | res; 1036 res = ((x6 + x5) >> 14); 1037 CLIP_RESULT(res) 1038 dst_word |= (res << 16); 1039 res = ((x2 + x3) >> 14); 1040 CLIP_RESULT(res) 1041 dst_word |= (res << 24); 1042 *((uint32*)(rec += lx)) = dst_word; 1043 1044 res = ((x2 - x3) >> 14); 1045 CLIP_RESULT(res) 1046 res2 = ((x6 - x5) >> 14); 1047 CLIP_RESULT(res2) 1048 dst_word = (res2 << 8) | res; 1049 res = ((x4 - x7) >> 14); 1050 CLIP_RESULT(res) 1051 dst_word |= (res << 16); 1052 res = ((x0 - x1) >> 14); 1053 CLIP_RESULT(res) 1054 dst_word |= (res << 24); 1055 *((uint32*)(rec + 4)) = dst_word; 1056 } 1057 1058 return ; 1059 } 1060 1061 #ifndef SMALL_DCT 1062 void idct_row0x40Intra(Short *blk, UChar *rec, Int lx) 1063 { 1064 int32 x1, x2, x4, x5; 1065 int res, res2; 1066 uint32 dst_word; 1067 int i = 8; 1068 1069 rec -= lx; 1070 1071 while (i--) 1072 { 1073 /* shortcut */ 1074 x4 = blk[1]; 1075 blk[1] = 0; 1076 blk += 8; 1077 1078 /* first stage */ 1079 x5 = (W7 * x4 + 4) >> 3; 1080 x4 = (W1 * x4 + 4) >> 3; 1081 1082 /* third stage */ 1083 x2 = (181 * (x4 + x5) + 128) >> 8; 1084 x1 = (181 * (x4 - x5) + 128) >> 8; 1085 1086 /* fourth stage */ 1087 res = ((8192 + x4) >> 14); 1088 CLIP_RESULT(res) 1089 res2 = ((8192 + x2) >> 14); 1090 CLIP_RESULT(res2) 1091 dst_word = (res2 << 8) | res; 1092 res = ((8192 + x1) >> 14); 1093 CLIP_RESULT(res) 1094 dst_word |= (res << 16); 1095 res = ((8192 + x5) >> 14); 1096 CLIP_RESULT(res) 1097 dst_word |= (res << 24); 1098 *((uint32*)(rec += lx)) = dst_word; 1099 1100 res = ((8192 - x5) >> 14); 1101 CLIP_RESULT(res) 1102 res2 = ((8192 - x1) >> 14); 1103 CLIP_RESULT(res2) 1104 dst_word = (res2 << 8) | res; 1105 res = ((8192 - x2) >> 14); 1106 CLIP_RESULT(res) 1107 dst_word |= (res << 16); 1108 res = ((8192 - x4) >> 14); 1109 CLIP_RESULT(res) 1110 dst_word |= (res << 24); 1111 *((uint32*)(rec + 4)) = dst_word; 1112 1113 } 1114 return ; 1115 } 1116 1117 void idct_row0x20Intra(Short *blk, UChar *rec, Int lx) 1118 { 1119 int32 x0, x2, x4, x6; 1120 int res, res2; 1121 uint32 dst_word; 1122 int i = 8; 1123 1124 rec -= lx; 1125 while (i--) 1126 { 1127 x2 = blk[2]; 1128 blk[2] = 0; 1129 blk += 8; 1130 1131 /* both upper and lower*/ 1132 /* both x2orx6 and x0orx4 */ 1133 x6 = (W6 * x2 + 4) >> 3; 1134 x2 = (W2 * x2 + 4) >> 3; 1135 x0 = 8192 + x2; 1136 x2 = 8192 - x2; 1137 x4 = 8192 + x6; 1138 x6 = 8192 - x6; 1139 1140 res = ((x0) >> 14); 1141 CLIP_RESULT(res) 1142 res2 = ((x4) >> 14); 1143 CLIP_RESULT(res2) 1144 dst_word = (res2 << 8) | res; 1145 res = ((x6) >> 14); 1146 CLIP_RESULT(res) 1147 dst_word |= (res << 16); 1148 res = ((x2) >> 14); 1149 CLIP_RESULT(res) 1150 dst_word |= (res << 24); 1151 *((uint32*)(rec += lx)) = dst_word; 1152 1153 res = ((x2) >> 14); 1154 CLIP_RESULT(res) 1155 res2 = ((x6) >> 14); 1156 CLIP_RESULT(res2) 1157 dst_word = (res2 << 8) | res; 1158 res = ((x4) >> 14); 1159 CLIP_RESULT(res) 1160 dst_word |= (res << 16); 1161 res = ((x0) >> 14); 1162 CLIP_RESULT(res) 1163 dst_word |= (res << 24); 1164 *((uint32*)(rec + 4)) = dst_word; 1165 1166 } 1167 return ; 1168 } 1169 1170 void idct_row0x10Intra(Short *blk, UChar *rec, Int lx) 1171 { 1172 int32 x1, x3, x5, x7; 1173 int res, res2; 1174 uint32 dst_word; 1175 int i = 8; 1176 1177 rec -= lx; 1178 while (i--) 1179 { 1180 x3 = blk[3]; 1181 blk[3] = 0 ; 1182 blk += 8; 1183 1184 x1 = (W3 * x3 + 4) >> 3; 1185 x3 = (W5 * x3 + 4) >> 3; 1186 1187 x7 = (181 * (x3 - x1) + 128) >> 8; 1188 x5 = (-181 * (x1 + x3) + 128) >> 8; 1189 1190 res = ((8192 + x1) >> 14); 1191 CLIP_RESULT(res) 1192 res2 = ((8192 + x7) >> 14); 1193 CLIP_RESULT(res2) 1194 dst_word = (res2 << 8) | res; 1195 res = ((8192 + x5) >> 14); 1196 CLIP_RESULT(res) 1197 dst_word |= (res << 16); 1198 res = ((8192 - x3) >> 14); 1199 CLIP_RESULT(res) 1200 dst_word |= (res << 24); 1201 *((uint32*)(rec += lx)) = dst_word; 1202 1203 res = ((8192 + x3) >> 14); 1204 CLIP_RESULT(res) 1205 res2 = ((8192 - x5) >> 14); 1206 CLIP_RESULT(res2) 1207 dst_word = (res2 << 8) | res; 1208 res = ((8192 - x7) >> 14); 1209 CLIP_RESULT(res) 1210 dst_word |= (res << 16); 1211 res = ((8192 - x1) >> 14); 1212 CLIP_RESULT(res) 1213 dst_word |= (res << 24); 1214 *((uint32*)(rec + 4)) = dst_word; 1215 1216 } 1217 1218 return ; 1219 } 1220 1221 #endif /* SMALL_DCT */ 1222 void idct_rowIntra(Short *blk, UChar *rec, Int lx) 1223 { 1224 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8; 1225 int i = 8; 1226 int res, res2; 1227 uint32 dst_word; 1228 1229 blk -= 8; 1230 rec -= lx; 1231 1232 while (i--) 1233 { 1234 x1 = (int32)blk[12] << 8; 1235 blk[12] = 0; 1236 x2 = blk[14]; 1237 blk[14] = 0; 1238 x3 = blk[10]; 1239 blk[10] = 0; 1240 x4 = blk[9]; 1241 blk[9] = 0; 1242 x5 = blk[15]; 1243 blk[15] = 0; 1244 x6 = blk[13]; 1245 blk[13] = 0; 1246 x7 = blk[11]; 1247 blk[11] = 0; 1248 x0 = ((*(blk += 8)) << 8) + 8192; 1249 *blk = 0; /* for proper rounding in the fourth stage */ 1250 1251 /* first stage */ 1252 x8 = W7 * (x4 + x5) + 4; 1253 x4 = (x8 + (W1 - W7) * x4) >> 3; 1254 x5 = (x8 - (W1 + W7) * x5) >> 3; 1255 x8 = W3 * (x6 + x7) + 4; 1256 x6 = (x8 - (W3 - W5) * x6) >> 3; 1257 x7 = (x8 - (W3 + W5) * x7) >> 3; 1258 1259 /* second stage */ 1260 x8 = x0 + x1; 1261 x0 -= x1; 1262 x1 = W6 * (x3 + x2) + 4; 1263 x2 = (x1 - (W2 + W6) * x2) >> 3; 1264 x3 = (x1 + (W2 - W6) * x3) >> 3; 1265 x1 = x4 + x6; 1266 x4 -= x6; 1267 x6 = x5 + x7; 1268 x5 -= x7; 1269 1270 /* third stage */ 1271 x7 = x8 + x3; 1272 x8 -= x3; 1273 x3 = x0 + x2; 1274 x0 -= x2; 1275 x2 = (181 * (x4 + x5) + 128) >> 8; 1276 x4 = (181 * (x4 - x5) + 128) >> 8; 1277 1278 /* fourth stage */ 1279 res = ((x7 + x1) >> 14); 1280 CLIP_RESULT(res) 1281 res2 = ((x3 + x2) >> 14); 1282 CLIP_RESULT(res2) 1283 dst_word = res | (res2 << 8); 1284 res = ((x0 + x4) >> 14); 1285 CLIP_RESULT(res) 1286 dst_word |= (res << 16); 1287 res = ((x8 + x6) >> 14); 1288 CLIP_RESULT(res) 1289 dst_word |= (res << 24); 1290 *((uint32*)(rec += lx)) = dst_word; 1291 1292 res = ((x8 - x6) >> 14); 1293 CLIP_RESULT(res) 1294 res2 = ((x0 - x4) >> 14); 1295 CLIP_RESULT(res2) 1296 dst_word = res | (res2 << 8); 1297 res = ((x3 - x2) >> 14); 1298 CLIP_RESULT(res) 1299 dst_word |= (res << 16); 1300 res = ((x7 - x1) >> 14); 1301 CLIP_RESULT(res) 1302 dst_word |= (res << 24); 1303 *((uint32*)(rec + 4)) = dst_word; 1304 } 1305 return; 1306 } 1307 1308 1309 /* This function should not be called at all ****/ 1310 void idct_row0zmv(Short *srce, UChar *rec, UChar *pred, Int lx) 1311 { 1312 OSCL_UNUSED_ARG(srce); 1313 OSCL_UNUSED_ARG(rec); 1314 OSCL_UNUSED_ARG(pred); 1315 OSCL_UNUSED_ARG(lx); 1316 1317 return; 1318 } 1319 1320 void idct_row1zmv(Short *blk, UChar *rec, UChar *pred, Int lx) 1321 { 1322 int tmp; 1323 int i = 8; 1324 uint32 pred_word, dst_word; 1325 int res, res2; 1326 1327 /* preset the offset, such that we can take advantage pre-offset addressing mode */ 1328 pred -= 16; 1329 rec -= lx; 1330 blk -= 8; 1331 1332 while (i--) 1333 { 1334 tmp = (*(blk += 8) + 32) >> 6; 1335 *blk = 0; 1336 1337 pred_word = *((uint32*)(pred += 16)); /* read 4 bytes from pred */ 1338 res = tmp + (pred_word & 0xFF); 1339 CLIP_RESULT(res); 1340 res2 = tmp + ((pred_word >> 8) & 0xFF); 1341 CLIP_RESULT(res2); 1342 dst_word = (res2 << 8) | res; 1343 res = tmp + ((pred_word >> 16) & 0xFF); 1344 CLIP_RESULT(res); 1345 dst_word |= (res << 16); 1346 res = tmp + ((pred_word >> 24) & 0xFF); 1347 CLIP_RESULT(res); 1348 dst_word |= (res << 24); 1349 *((uint32*)(rec += lx)) = dst_word; /* save 4 bytes to dst */ 1350 1351 pred_word = *((uint32*)(pred + 4)); /* read 4 bytes from pred */ 1352 res = tmp + (pred_word & 0xFF); 1353 CLIP_RESULT(res); 1354 res2 = tmp + ((pred_word >> 8) & 0xFF); 1355 CLIP_RESULT(res2); 1356 dst_word = (res2 << 8) | res; 1357 res = tmp + ((pred_word >> 16) & 0xFF); 1358 CLIP_RESULT(res); 1359 dst_word |= (res << 16); 1360 res = tmp + ((pred_word >> 24) & 0xFF); 1361 CLIP_RESULT(res); 1362 dst_word |= (res << 24); 1363 *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */ 1364 } 1365 return; 1366 } 1367 1368 void idct_row2zmv(Short *blk, UChar *rec, UChar *pred, Int lx) 1369 { 1370 int32 x0, x1, x2, x4, x5; 1371 int i = 8; 1372 uint32 pred_word, dst_word; 1373 int res, res2; 1374 1375 /* preset the offset, such that we can take advantage pre-offset addressing mode */ 1376 rec -= lx; 1377 pred -= 16; 1378 blk -= 8; 1379 1380 while (i--) 1381 { 1382 /* shortcut */ 1383 x4 = blk[9]; 1384 blk[9] = 0; 1385 x0 = ((*(blk += 8)) << 8) + 8192; 1386 *blk = 0; /* for proper rounding in the fourth stage */ 1387 1388 /* first stage */ 1389 x5 = (W7 * x4 + 4) >> 3; 1390 x4 = (W1 * x4 + 4) >> 3; 1391 1392 /* third stage */ 1393 x2 = (181 * (x4 + x5) + 128) >> 8; 1394 x1 = (181 * (x4 - x5) + 128) >> 8; 1395 1396 /* fourth stage */ 1397 pred_word = *((uint32*)(pred += 16)); /* read 4 bytes from pred */ 1398 res = (x0 + x4) >> 14; 1399 ADD_AND_CLIP1(res); 1400 res2 = (x0 + x2) >> 14; 1401 ADD_AND_CLIP2(res2); 1402 dst_word = (res2 << 8) | res; 1403 res = (x0 + x1) >> 14; 1404 ADD_AND_CLIP3(res); 1405 dst_word |= (res << 16); 1406 res = (x0 + x5) >> 14; 1407 ADD_AND_CLIP4(res); 1408 dst_word |= (res << 24); 1409 *((uint32*)(rec += lx)) = dst_word; /* save 4 bytes to dst */ 1410 1411 pred_word = *((uint32*)(pred + 4)); /* read 4 bytes from pred */ 1412 res = (x0 - x5) >> 14; 1413 ADD_AND_CLIP1(res); 1414 res2 = (x0 - x1) >> 14; 1415 ADD_AND_CLIP2(res2); 1416 dst_word = (res2 << 8) | res; 1417 res = (x0 - x2) >> 14; 1418 ADD_AND_CLIP3(res); 1419 dst_word |= (res << 16); 1420 res = (x0 - x4) >> 14; 1421 ADD_AND_CLIP4(res); 1422 dst_word |= (res << 24); 1423 *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */ 1424 } 1425 return ; 1426 } 1427 1428 void idct_row3zmv(Short *blk, UChar *rec, UChar *pred, Int lx) 1429 { 1430 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8; 1431 int i = 8; 1432 uint32 pred_word, dst_word; 1433 int res, res2; 1434 1435 /* preset the offset, such that we can take advantage pre-offset addressing mode */ 1436 rec -= lx; 1437 pred -= 16; 1438 blk -= 8; 1439 1440 while (i--) 1441 { 1442 x2 = blk[10]; 1443 blk[10] = 0; 1444 x1 = blk[9]; 1445 blk[9] = 0; 1446 x0 = ((*(blk += 8)) << 8) + 8192; 1447 *blk = 0; /* for proper rounding in the fourth stage */ 1448 /* both upper and lower*/ 1449 /* both x2orx6 and x0orx4 */ 1450 1451 x4 = x0; 1452 x6 = (W6 * x2 + 4) >> 3; 1453 x2 = (W2 * x2 + 4) >> 3; 1454 x8 = x0 - x2; 1455 x0 += x2; 1456 x2 = x8; 1457 x8 = x4 - x6; 1458 x4 += x6; 1459 x6 = x8; 1460 1461 x7 = (W7 * x1 + 4) >> 3; 1462 x1 = (W1 * x1 + 4) >> 3; 1463 x3 = x7; 1464 x5 = (181 * (x1 - x7) + 128) >> 8; 1465 x7 = (181 * (x1 + x7) + 128) >> 8; 1466 1467 pred_word = *((uint32*)(pred += 16)); /* read 4 bytes from pred */ 1468 res = (x0 + x1) >> 14; 1469 ADD_AND_CLIP1(res); 1470 res2 = (x4 + x7) >> 14; 1471 ADD_AND_CLIP2(res2); 1472 dst_word = (res2 << 8) | res; 1473 res = (x6 + x5) >> 14; 1474 ADD_AND_CLIP3(res); 1475 dst_word |= (res << 16); 1476 res = (x2 + x3) >> 14; 1477 ADD_AND_CLIP4(res); 1478 dst_word |= (res << 24); 1479 *((uint32*)(rec += lx)) = dst_word; /* save 4 bytes to dst */ 1480 1481 pred_word = *((uint32*)(pred + 4)); /* read 4 bytes from pred */ 1482 res = (x2 - x3) >> 14; 1483 ADD_AND_CLIP1(res); 1484 res2 = (x6 - x5) >> 14; 1485 ADD_AND_CLIP2(res2); 1486 dst_word = (res2 << 8) | res; 1487 res = (x4 - x7) >> 14; 1488 ADD_AND_CLIP3(res); 1489 dst_word |= (res << 16); 1490 res = (x0 - x1) >> 14; 1491 ADD_AND_CLIP4(res); 1492 dst_word |= (res << 24); 1493 *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */ 1494 } 1495 1496 return ; 1497 } 1498 1499 void idct_row4zmv(Short *blk, UChar *rec, UChar *pred, Int lx) 1500 { 1501 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8; 1502 int i = 8; 1503 uint32 pred_word, dst_word; 1504 int res, res2; 1505 1506 /* preset the offset, such that we can take advantage pre-offset addressing mode */ 1507 rec -= lx; 1508 pred -= 16; 1509 blk -= 8; 1510 1511 while (i--) 1512 { 1513 x2 = blk[10]; 1514 blk[10] = 0; 1515 x1 = blk[9]; 1516 blk[9] = 0; 1517 x3 = blk[11]; 1518 blk[11] = 0; 1519 x0 = ((*(blk += 8)) << 8) + 8192; 1520 *blk = 0; /* for proper rounding in the fourth stage */ 1521 1522 x4 = x0; 1523 x6 = (W6 * x2 + 4) >> 3; 1524 x2 = (W2 * x2 + 4) >> 3; 1525 x8 = x0 - x2; 1526 x0 += x2; 1527 x2 = x8; 1528 x8 = x4 - x6; 1529 x4 += x6; 1530 x6 = x8; 1531 1532 x7 = (W7 * x1 + 4) >> 3; 1533 x1 = (W1 * x1 + 4) >> 3; 1534 x5 = (W3 * x3 + 4) >> 3; 1535 x3 = (- W5 * x3 + 4) >> 3; 1536 x8 = x1 - x5; 1537 x1 += x5; 1538 x5 = x8; 1539 x8 = x7 - x3; 1540 x3 += x7; 1541 x7 = (181 * (x5 + x8) + 128) >> 8; 1542 x5 = (181 * (x5 - x8) + 128) >> 8; 1543 1544 pred_word = *((uint32*)(pred += 16)); /* read 4 bytes from pred */ 1545 res = (x0 + x1) >> 14; 1546 ADD_AND_CLIP1(res); 1547 res2 = (x4 + x7) >> 14; 1548 ADD_AND_CLIP2(res2); 1549 dst_word = (res2 << 8) | res; 1550 res = (x6 + x5) >> 14; 1551 ADD_AND_CLIP3(res); 1552 dst_word |= (res << 16); 1553 res = (x2 + x3) >> 14; 1554 ADD_AND_CLIP4(res); 1555 dst_word |= (res << 24); 1556 *((uint32*)(rec += lx)) = dst_word; /* save 4 bytes to dst */ 1557 1558 pred_word = *((uint32*)(pred + 4)); /* read 4 bytes from pred */ 1559 res = (x2 - x3) >> 14; 1560 ADD_AND_CLIP1(res); 1561 res2 = (x6 - x5) >> 14; 1562 ADD_AND_CLIP2(res2); 1563 dst_word = (res2 << 8) | res; 1564 res = (x4 - x7) >> 14; 1565 ADD_AND_CLIP3(res); 1566 dst_word |= (res << 16); 1567 res = (x0 - x1) >> 14; 1568 ADD_AND_CLIP4(res); 1569 dst_word |= (res << 24); 1570 *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */ 1571 } 1572 return ; 1573 } 1574 1575 #ifndef SMALL_DCT 1576 void idct_row0x40zmv(Short *blk, UChar *rec, UChar *pred, Int lx) 1577 { 1578 int32 x1, x2, x4, x5; 1579 int i = 8; 1580 uint32 pred_word, dst_word; 1581 int res, res2; 1582 1583 /* preset the offset, such that we can take advantage pre-offset addressing mode */ 1584 rec -= lx; 1585 pred -= 16; 1586 1587 while (i--) 1588 { 1589 /* shortcut */ 1590 x4 = blk[1]; 1591 blk[1] = 0; 1592 blk += 8; /* for proper rounding in the fourth stage */ 1593 1594 /* first stage */ 1595 x5 = (W7 * x4 + 4) >> 3; 1596 x4 = (W1 * x4 + 4) >> 3; 1597 1598 /* third stage */ 1599 x2 = (181 * (x4 + x5) + 128) >> 8; 1600 x1 = (181 * (x4 - x5) + 128) >> 8; 1601 1602 /* fourth stage */ 1603 pred_word = *((uint32*)(pred += 16)); /* read 4 bytes from pred */ 1604 res = (8192 + x4) >> 14; 1605 ADD_AND_CLIP1(res); 1606 res2 = (8192 + x2) >> 14; 1607 ADD_AND_CLIP2(res2); 1608 dst_word = (res2 << 8) | res; 1609 res = (8192 + x1) >> 14; 1610 ADD_AND_CLIP3(res); 1611 dst_word |= (res << 16); 1612 res = (8192 + x5) >> 14; 1613 ADD_AND_CLIP4(res); 1614 dst_word |= (res << 24); 1615 *((uint32*)(rec += lx)) = dst_word; /* save 4 bytes to dst */ 1616 1617 pred_word = *((uint32*)(pred + 4)); /* read 4 bytes from pred */ 1618 res = (8192 - x5) >> 14; 1619 ADD_AND_CLIP1(res); 1620 res2 = (8192 - x1) >> 14; 1621 ADD_AND_CLIP2(res2); 1622 dst_word = (res2 << 8) | res; 1623 res = (8192 - x2) >> 14; 1624 ADD_AND_CLIP3(res); 1625 dst_word |= (res << 16); 1626 res = (8192 - x4) >> 14; 1627 ADD_AND_CLIP4(res); 1628 dst_word |= (res << 24); 1629 *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */ 1630 } 1631 return ; 1632 } 1633 1634 void idct_row0x20zmv(Short *blk, UChar *rec, UChar *pred, Int lx) 1635 { 1636 int32 x0, x2, x4, x6; 1637 int i = 8; 1638 uint32 pred_word, dst_word; 1639 int res, res2; 1640 1641 /* preset the offset, such that we can take advantage pre-offset addressing mode */ 1642 rec -= lx; 1643 pred -= 16; 1644 1645 while (i--) 1646 { 1647 x2 = blk[2]; 1648 blk[2] = 0; 1649 blk += 8; /* for proper rounding in the fourth stage */ 1650 /* both upper and lower*/ 1651 /* both x2orx6 and x0orx4 */ 1652 x6 = (W6 * x2 + 4) >> 3; 1653 x2 = (W2 * x2 + 4) >> 3; 1654 x0 = 8192 + x2; 1655 x2 = 8192 - x2; 1656 x4 = 8192 + x6; 1657 x6 = 8192 - x6; 1658 1659 pred_word = *((uint32*)(pred += 16)); /* read 4 bytes from pred */ 1660 res = (x0) >> 14; 1661 ADD_AND_CLIP1(res); 1662 res2 = (x4) >> 14; 1663 ADD_AND_CLIP2(res2); 1664 dst_word = (res2 << 8) | res; 1665 res = (x6) >> 14; 1666 ADD_AND_CLIP3(res); 1667 dst_word |= (res << 16); 1668 res = (x2) >> 14; 1669 ADD_AND_CLIP4(res); 1670 dst_word |= (res << 24); 1671 *((uint32*)(rec += lx)) = dst_word; /* save 4 bytes to dst */ 1672 1673 pred_word = *((uint32*)(pred + 4)); /* read 4 bytes from pred */ 1674 res = (x2) >> 14; 1675 ADD_AND_CLIP1(res); 1676 res2 = (x6) >> 14; 1677 ADD_AND_CLIP2(res2); 1678 dst_word = (res2 << 8) | res; 1679 res = (x4) >> 14; 1680 ADD_AND_CLIP3(res); 1681 dst_word |= (res << 16); 1682 res = (x0) >> 14; 1683 ADD_AND_CLIP4(res); 1684 dst_word |= (res << 24); 1685 *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */ 1686 } 1687 1688 return ; 1689 } 1690 1691 void idct_row0x10zmv(Short *blk, UChar *rec, UChar *pred, Int lx) 1692 { 1693 int32 x1, x3, x5, x7; 1694 int i = 8; 1695 uint32 pred_word, dst_word; 1696 int res, res2; 1697 1698 /* preset the offset, such that we can take advantage pre-offset addressing mode */ 1699 rec -= lx; 1700 pred -= 16; 1701 1702 while (i--) 1703 { 1704 x3 = blk[3]; 1705 blk[3] = 0; 1706 blk += 8; 1707 1708 x1 = (W3 * x3 + 4) >> 3; 1709 x3 = (-W5 * x3 + 4) >> 3; 1710 1711 x7 = (-181 * (x3 + x1) + 128) >> 8; 1712 x5 = (181 * (x3 - x1) + 128) >> 8; 1713 1714 pred_word = *((uint32*)(pred += 16)); /* read 4 bytes from pred */ 1715 res = (8192 + x1) >> 14; 1716 ADD_AND_CLIP1(res); 1717 res2 = (8192 + x7) >> 14; 1718 ADD_AND_CLIP2(res2); 1719 dst_word = (res2 << 8) | res; 1720 res = (8192 + x5) >> 14; 1721 ADD_AND_CLIP3(res); 1722 dst_word |= (res << 16); 1723 res = (8192 + x3) >> 14; 1724 ADD_AND_CLIP4(res); 1725 dst_word |= (res << 24); 1726 *((uint32*)(rec += lx)) = dst_word; /* save 4 bytes to dst */ 1727 1728 pred_word = *((uint32*)(pred + 4)); /* read 4 bytes from pred */ 1729 res = (8192 - x3) >> 14; 1730 ADD_AND_CLIP1(res); 1731 res2 = (8192 - x5) >> 14; 1732 ADD_AND_CLIP2(res2); 1733 dst_word = (res2 << 8) | res; 1734 res = (8192 - x7) >> 14; 1735 ADD_AND_CLIP3(res); 1736 dst_word |= (res << 16); 1737 res = (8192 - x1) >> 14; 1738 ADD_AND_CLIP4(res); 1739 dst_word |= (res << 24); 1740 *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */ 1741 } 1742 return ; 1743 } 1744 1745 #endif /* SMALL_DCT */ 1746 1747 void idct_rowzmv(Short *blk, UChar *rec, UChar *pred, Int lx) 1748 { 1749 int32 x0, x1, x2, x3, x4, x5, x6, x7, x8; 1750 int i = 8; 1751 uint32 pred_word, dst_word; 1752 int res, res2; 1753 1754 /* preset the offset, such that we can take advantage pre-offset addressing mode */ 1755 rec -= lx; 1756 pred -= 16; 1757 blk -= 8; 1758 1759 while (i--) 1760 { 1761 x1 = (int32)blk[12] << 8; 1762 blk[12] = 0; 1763 x2 = blk[14]; 1764 blk[14] = 0; 1765 x3 = blk[10]; 1766 blk[10] = 0; 1767 x4 = blk[9]; 1768 blk[9] = 0; 1769 x5 = blk[15]; 1770 blk[15] = 0; 1771 x6 = blk[13]; 1772 blk[13] = 0; 1773 x7 = blk[11]; 1774 blk[11] = 0; 1775 x0 = ((*(blk += 8)) << 8) + 8192; 1776 *blk = 0; /* for proper rounding in the fourth stage */ 1777 1778 /* first stage */ 1779 x8 = W7 * (x4 + x5) + 4; 1780 x4 = (x8 + (W1 - W7) * x4) >> 3; 1781 x5 = (x8 - (W1 + W7) * x5) >> 3; 1782 x8 = W3 * (x6 + x7) + 4; 1783 x6 = (x8 - (W3 - W5) * x6) >> 3; 1784 x7 = (x8 - (W3 + W5) * x7) >> 3; 1785 1786 /* second stage */ 1787 x8 = x0 + x1; 1788 x0 -= x1; 1789 x1 = W6 * (x3 + x2) + 4; 1790 x2 = (x1 - (W2 + W6) * x2) >> 3; 1791 x3 = (x1 + (W2 - W6) * x3) >> 3; 1792 x1 = x4 + x6; 1793 x4 -= x6; 1794 x6 = x5 + x7; 1795 x5 -= x7; 1796 1797 /* third stage */ 1798 x7 = x8 + x3; 1799 x8 -= x3; 1800 x3 = x0 + x2; 1801 x0 -= x2; 1802 x2 = (181 * (x4 + x5) + 128) >> 8; 1803 x4 = (181 * (x4 - x5) + 128) >> 8; 1804 1805 /* fourth stage */ 1806 pred_word = *((uint32*)(pred += 16)); /* read 4 bytes from pred */ 1807 1808 res = (x7 + x1) >> 14; 1809 ADD_AND_CLIP1(res); 1810 res2 = (x3 + x2) >> 14; 1811 ADD_AND_CLIP2(res2); 1812 dst_word = (res2 << 8) | res; 1813 res = (x0 + x4) >> 14; 1814 ADD_AND_CLIP3(res); 1815 dst_word |= (res << 16); 1816 res = (x8 + x6) >> 14; 1817 ADD_AND_CLIP4(res); 1818 dst_word |= (res << 24); 1819 *((uint32*)(rec += lx)) = dst_word; /* save 4 bytes to dst */ 1820 1821 pred_word = *((uint32*)(pred + 4)); /* read 4 bytes from pred */ 1822 1823 res = (x8 - x6) >> 14; 1824 ADD_AND_CLIP1(res); 1825 res2 = (x0 - x4) >> 14; 1826 ADD_AND_CLIP2(res2); 1827 dst_word = (res2 << 8) | res; 1828 res = (x3 - x2) >> 14; 1829 ADD_AND_CLIP3(res); 1830 dst_word |= (res << 16); 1831 res = (x7 - x1) >> 14; 1832 ADD_AND_CLIP4(res); 1833 dst_word |= (res << 24); 1834 *((uint32*)(rec + 4)) = dst_word; /* save 4 bytes to dst */ 1835 } 1836 return; 1837 } 1838 1839 /*---------------------------------------------------------------------------- 1840 ; End Function: idctcol 1841 ----------------------------------------------------------------------------*/ 1842 /* ======================================================================== */ 1843 /* Function : BlockIDCTMotionComp */ 1844 /* Date : 10/16/2000 */ 1845 /* Purpose : fast IDCT routine */ 1846 /* In/out : */ 1847 /* Int* coeff_in Dequantized coefficient 1848 Int block_out output IDCT coefficient 1849 Int maxval clip value */ 1850 /* Modified : 7/31/01, add checking for all-zero and DC-only block. */ 1851 /* do 8 columns at a time */ 1852 /* 8/2/01, do column first then row-IDCT. */ 1853 /* 8/2/01, remove clipping (included in motion comp). */ 1854 /* 8/7/01, combine with motion comp. */ 1855 /* 8/8/01, use AAN IDCT */ 1856 /* 9/4/05, use Chen's IDCT and 16 bit block */ 1857 /* ======================================================================== */ 1858 void BlockIDCTMotionComp(Short *block, UChar *bitmapcol, UChar bitmaprow, 1859 Int dctMode, UChar *rec, UChar *pred, Int lx_intra) 1860 { 1861 Int i; 1862 Int tmp, tmp2; 1863 ULong tmp4; 1864 Int bmap; 1865 Short *ptr = block; 1866 UChar *endcol; 1867 UInt mask = 0xFF; 1868 Int lx = lx_intra >> 1; 1869 Int intra = (lx_intra & 1); 1870 1871 /* all-zero block */ 1872 if (dctMode == 0 || bitmaprow == 0) 1873 { 1874 if (intra) 1875 { 1876 *((ULong*)rec) = *((ULong*)(rec + 4)) = 0; 1877 *((ULong*)(rec += lx)) = 0; 1878 *((ULong*)(rec + 4)) = 0; 1879 *((ULong*)(rec += lx)) = 0; 1880 *((ULong*)(rec + 4)) = 0; 1881 *((ULong*)(rec += lx)) = 0; 1882 *((ULong*)(rec + 4)) = 0; 1883 *((ULong*)(rec += lx)) = 0; 1884 *((ULong*)(rec + 4)) = 0; 1885 *((ULong*)(rec += lx)) = 0; 1886 *((ULong*)(rec + 4)) = 0; 1887 *((ULong*)(rec += lx)) = 0; 1888 *((ULong*)(rec + 4)) = 0; 1889 *((ULong*)(rec += lx)) = 0; 1890 *((ULong*)(rec + 4)) = 0; 1891 return ; 1892 } 1893 else /* copy from previous frame */ 1894 { 1895 *((ULong*)rec) = *((ULong*)pred); 1896 *((ULong*)(rec + 4)) = *((ULong*)(pred + 4)); 1897 *((ULong*)(rec += lx)) = *((ULong*)(pred += 16)); 1898 *((ULong*)(rec + 4)) = *((ULong*)(pred + 4)); 1899 *((ULong*)(rec += lx)) = *((ULong*)(pred += 16)); 1900 *((ULong*)(rec + 4)) = *((ULong*)(pred + 4)); 1901 *((ULong*)(rec += lx)) = *((ULong*)(pred += 16)); 1902 *((ULong*)(rec + 4)) = *((ULong*)(pred + 4)); 1903 *((ULong*)(rec += lx)) = *((ULong*)(pred += 16)); 1904 *((ULong*)(rec + 4)) = *((ULong*)(pred + 4)); 1905 *((ULong*)(rec += lx)) = *((ULong*)(pred += 16)); 1906 *((ULong*)(rec + 4)) = *((ULong*)(pred + 4)); 1907 *((ULong*)(rec += lx)) = *((ULong*)(pred += 16)); 1908 *((ULong*)(rec + 4)) = *((ULong*)(pred + 4)); 1909 *((ULong*)(rec += lx)) = *((ULong*)(pred += 16)); 1910 *((ULong*)(rec + 4)) = *((ULong*)(pred + 4)); 1911 return ; 1912 } 1913 } 1914 1915 /* Test for DC only block */ 1916 if (dctMode == 1 || (bitmaprow == 0x80 && bitmapcol[0] == 0x80)) 1917 { 1918 i = ((block[0] << 3) + 32) >> 6; 1919 block[0] = 0; 1920 if (intra) 1921 { 1922 if ((UInt)i > mask) i = mask & (~(i >> 31)); 1923 1924 tmp = i | (i << 8); 1925 tmp |= (tmp << 16); 1926 1927 *((ULong*)rec) = *((ULong*)(rec + 4)) = tmp; 1928 *((ULong*)(rec += lx)) = tmp; 1929 *((ULong*)(rec + 4)) = tmp; 1930 *((ULong*)(rec += lx)) = tmp; 1931 *((ULong*)(rec + 4)) = tmp; 1932 *((ULong*)(rec += lx)) = tmp; 1933 *((ULong*)(rec + 4)) = tmp; 1934 *((ULong*)(rec += lx)) = tmp; 1935 *((ULong*)(rec + 4)) = tmp; 1936 *((ULong*)(rec += lx)) = tmp; 1937 *((ULong*)(rec + 4)) = tmp; 1938 *((ULong*)(rec += lx)) = tmp; 1939 *((ULong*)(rec + 4)) = tmp; 1940 *((ULong*)(rec += lx)) = tmp; 1941 *((ULong*)(rec + 4)) = tmp; 1942 1943 return ; 1944 } 1945 else 1946 { 1947 endcol = rec + (lx << 3); 1948 do 1949 { 1950 tmp4 = *((ULong*)pred); 1951 tmp2 = tmp4 & 0xFF; 1952 tmp2 += i; 1953 if ((UInt)tmp2 > mask) tmp2 = mask & (~(tmp2 >> 31)); 1954 tmp = (tmp4 >> 8) & 0xFF; 1955 tmp += i; 1956 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31)); 1957 tmp2 |= (tmp << 8); 1958 tmp = (tmp4 >> 16) & 0xFF; 1959 tmp += i; 1960 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31)); 1961 tmp2 |= (tmp << 16); 1962 tmp = (tmp4 >> 24) & 0xFF; 1963 tmp += i; 1964 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31)); 1965 tmp2 |= (tmp << 24); 1966 *((ULong*)rec) = tmp2; 1967 1968 tmp4 = *((ULong*)(pred + 4)); 1969 tmp2 = tmp4 & 0xFF; 1970 tmp2 += i; 1971 if ((UInt)tmp2 > mask) tmp2 = mask & (~(tmp2 >> 31)); 1972 tmp = (tmp4 >> 8) & 0xFF; 1973 tmp += i; 1974 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31)); 1975 tmp2 |= (tmp << 8); 1976 tmp = (tmp4 >> 16) & 0xFF; 1977 tmp += i; 1978 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31)); 1979 tmp2 |= (tmp << 16); 1980 tmp = (tmp4 >> 24) & 0xFF; 1981 tmp += i; 1982 if ((UInt)tmp > mask) tmp = mask & (~(tmp >> 31)); 1983 tmp2 |= (tmp << 24); 1984 *((ULong*)(rec + 4)) = tmp2; 1985 1986 rec += lx; 1987 pred += 16; 1988 } 1989 while (rec < endcol); 1990 return ; 1991 } 1992 } 1993 1994 for (i = 0; i < dctMode; i++) 1995 { 1996 bmap = (Int)bitmapcol[i]; 1997 if (bmap) 1998 { 1999 if ((bmap&0xf) == 0) 2000 (*(idctcolVCA[bmap>>4]))(ptr); 2001 else 2002 idct_col(ptr); 2003 } 2004 ptr++; 2005 } 2006 2007 if ((bitmaprow&0xf) == 0) 2008 { 2009 if (intra) 2010 (*(idctrowVCAIntra[(Int)(bitmaprow>>4)]))(block, rec, lx); 2011 else 2012 (*(idctrowVCAzmv[(Int)(bitmaprow>>4)]))(block, rec, pred, lx); 2013 } 2014 else 2015 { 2016 if (intra) 2017 idct_rowIntra(block, rec, lx); 2018 else 2019 idct_rowzmv(block, rec, pred, lx); 2020 } 2021 } 2022