1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12 /**************************************************************************** 13 * 14 * Module Title : gen_scalers.c 15 * 16 * Description : Generic image scaling functions. 17 * 18 ***************************************************************************/ 19 20 /**************************************************************************** 21 * Header Files 22 ****************************************************************************/ 23 #include "vpx_scale/vpxscale.h" 24 25 /**************************************************************************** 26 * Imports 27 ****************************************************************************/ 28 29 /**************************************************************************** 30 * 31 * ROUTINE : horizontal_line_4_5_scale_c4 32 * 33 * INPUTS : const unsigned char *source : Pointer to source data. 34 * unsigned int source_width : Stride of source. 35 * unsigned char *dest : Pointer to destination data. 36 * unsigned int dest_width : Stride of destination (NOT USED). 37 * 38 * OUTPUTS : None. 39 * 40 * RETURNS : void 41 * 42 * FUNCTION : Copies horizontal line of pixels from source to 43 * destination scaling up by 4 to 5. 44 * 45 * SPECIAL NOTES : None. 46 * 47 ****************************************************************************/ 48 static 49 void horizontal_line_4_5_scale_c64 50 ( 51 const unsigned char *source, 52 unsigned int source_width, 53 unsigned char *dest, 54 unsigned int dest_width 55 ) 56 { 57 unsigned i; 58 unsigned int ba, cb, dc, ed; 59 unsigned char *restrict des = dest; 60 unsigned int *restrict src = (unsigned int *)source; 61 unsigned int const_51_205, const_102_154, 62 const_205_51, const_154_102; 63 64 unsigned int src_current, src_next; 65 66 (void) dest_width; 67 68 // Constants that are to be used for the filtering. For 69 // best speed we are going to want to right shift by 16. 70 // In the generic version they were shift by 8, so put 71 // an extra 8 in now so that 16 will come out later. 72 const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8); 73 const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); 74 const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8); 75 const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); 76 77 // 5 points are needed to filter to give 5 output points. 78 // A load can pull up 4 at a time, and one needs to be 79 // "borrowed" from the next set of data. So instead of 80 // loading those 5 points each time, "steal" a point from 81 // the next set and only load up 4 each time through. 82 src_current = _mem4(src); 83 84 for (i = 0; i < source_width - 4; i += 4) 85 { 86 src_next = _mem4(src++); 87 88 // Reorder the data so that it is ready for the 89 // dot product. 90 ba = _unpklu4(src_current); 91 cb = _unpkhu4(_rotl(src_current, 8)); 92 dc = _unpkhu4(src_current); 93 ed = _unpkhu4(_shrmb(src_next, src_current)); 94 95 // Use the dot product with round and shift. 96 des [0] = src_current & 0xff; 97 des [1] = _dotprsu2(ba, const_205_51); 98 des [2] = _dotprsu2(cb, const_154_102); 99 des [3] = _dotprsu2(dc, const_102_154); 100 des [4] = _dotprsu2(ed, const_51_205); 101 102 des += 5; 103 104 // reuse loaded vales next time around. 105 src_current = src_next; 106 } 107 108 // vp8_filter the last set of points. Normally a point from the next set 109 // would be used, but there is no next set, so just fill. 110 ba = _unpklu4(src_current); 111 cb = _unpkhu4(_rotl(src_current, 8)); 112 dc = _unpkhu4(src_current); 113 114 des [0] = src_current & 0xff; 115 des [1] = _dotprsu2(ba, const_205_51); 116 des [2] = _dotprsu2(cb, const_154_102); 117 des [3] = _dotprsu2(dc, const_102_154); 118 des [4] = src_current & 0xff; 119 120 } 121 /**************************************************************************** 122 * 123 * ROUTINE : vertical_band_4_5_scale_c64 124 * 125 * INPUTS : unsigned char *dest : Pointer to destination data. 126 * unsigned int dest_pitch : Stride of destination data. 127 * unsigned int dest_width : Width of destination data. 128 * 129 * OUTPUTS : None. 130 * 131 * RETURNS : void 132 * 133 * FUNCTION : Scales vertical band of pixels by scale 4 to 5. The 134 * height of the band scaled is 4-pixels. 135 * 136 * SPECIAL NOTES : The routine uses the first line of the band below 137 * the current band. 138 * 139 ****************************************************************************/ 140 static 141 void vertical_band_4_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) 142 { 143 unsigned int i; 144 unsigned int a, b, c, d, e; 145 unsigned int ba, cb, dc, ed; 146 unsigned char *restrict src = dest; 147 unsigned char *restrict des = dest; 148 unsigned int const_51_205, const_102_154, 149 const_205_51, const_154_102; 150 151 const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8); 152 const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); 153 const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8); 154 const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); 155 156 // Force a loop unroll here so that there is not such a 157 // dependancy. 158 a = src [0]; 159 b = src [dest_pitch]; 160 c = src [dest_pitch*2]; 161 d = src [dest_pitch*3]; 162 e = src [dest_pitch*5]; 163 src ++; 164 165 for (i = 0; i < dest_width; i++) 166 { 167 ba = _pack2(b, a); 168 cb = _pack2(c, b); 169 dc = _pack2(d, c); 170 ed = _pack2(e, d); 171 172 a = src [0]; 173 b = src [dest_pitch]; 174 c = src [dest_pitch*2]; 175 d = src [dest_pitch*3]; 176 e = src [dest_pitch*5]; 177 src ++; 178 179 des [dest_pitch] = _dotprsu2(ba, const_205_51); 180 des [dest_pitch*2] = _dotprsu2(cb, const_154_102); 181 des [dest_pitch*3] = _dotprsu2(dc, const_102_154); 182 des [dest_pitch*4] = _dotprsu2(ed, const_51_205); 183 184 des ++; 185 } 186 } 187 188 /**************************************************************************** 189 * 190 * ROUTINE : last_vertical_band_4_5_scale_c64 191 * 192 * INPUTS : unsigned char *dest : Pointer to destination data. 193 * unsigned int dest_pitch : Stride of destination data. 194 * unsigned int dest_width : Width of destination data. 195 * 196 * OUTPUTS : None. 197 * 198 * RETURNS : void 199 * 200 * FUNCTION : Scales last vertical band of pixels by scale 4 to 5. The 201 * height of the band scaled is 4-pixels. 202 * 203 * SPECIAL NOTES : The routine does not have available the first line of 204 * the band below the current band, since this is the 205 * last band. 206 * 207 ****************************************************************************/ 208 static 209 void last_vertical_band_4_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) 210 { 211 unsigned int i; 212 unsigned int a, b, c, d; 213 unsigned int ba, cb, dc; 214 unsigned char *restrict src = dest; 215 unsigned char *restrict des = dest; 216 unsigned int const_102_154, const_205_51, const_154_102; 217 218 const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); 219 const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8); 220 const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); 221 222 a = src [0]; 223 b = src [dest_pitch]; 224 c = src [dest_pitch*2]; 225 d = src [dest_pitch*3]; 226 src ++; 227 228 for (i = 0; i < dest_width; ++i) 229 { 230 ba = _pack2(b, a); 231 cb = _pack2(c, b); 232 dc = _pack2(d, c); 233 234 a = src [0]; 235 b = src [dest_pitch]; 236 c = src [dest_pitch*2]; 237 d = src [dest_pitch*3]; 238 src ++; 239 240 des [dest_pitch] = _dotprsu2(ba, const_205_51); 241 des [dest_pitch*2] = _dotprsu2(cb, const_154_102); 242 des [dest_pitch*3] = _dotprsu2(dc, const_102_154); 243 des [dest_pitch*4] = (unsigned char) d; 244 245 des++; 246 } 247 } 248 249 /**************************************************************************** 250 * 251 * ROUTINE : horizontal_line_3_5_scale_c64 252 * 253 * INPUTS : const unsigned char *source : Pointer to source data. 254 * unsigned int source_width : Stride of source. 255 * unsigned char *dest : Pointer to destination data. 256 * unsigned int dest_width : Stride of destination (NOT USED). 257 * 258 * OUTPUTS : None. 259 * 260 * RETURNS : void 261 * 262 * FUNCTION : Copies horizontal line of pixels from source to 263 * destination scaling up by 3 to 5. 264 * 265 * SPECIAL NOTES : None. 266 * 267 * 268 ****************************************************************************/ 269 static 270 void horizontal_line_3_5_scale_c64 271 ( 272 const unsigned char *source, 273 unsigned int source_width, 274 unsigned char *dest, 275 unsigned int dest_width 276 ) 277 { 278 unsigned int i; 279 unsigned int ba, cb, dc; 280 unsigned int src_current; 281 unsigned char *restrict des = dest; 282 unsigned char *restrict src = (unsigned char *)source; 283 unsigned int const_51_205, const_102_154, 284 const_205_51, const_154_102; 285 286 (void) dest_width; 287 288 const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8); 289 const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); 290 const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8); 291 const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); 292 293 for (i = 0; i < source_width - 3; i += 3) 294 { 295 src_current = _mem4(src); 296 297 // Reorder the data so that it is ready for the 298 // dot product. 299 ba = _unpklu4(src_current); 300 cb = _unpkhu4(_rotl(src_current, 8)); 301 dc = _unpkhu4(src_current); 302 303 des [0] = src_current & 0xff; 304 des [1] = _dotprsu2(ba, const_154_102); 305 des [2] = _dotprsu2(cb, const_51_205); 306 des [3] = _dotprsu2(cb, const_205_51); 307 des [4] = _dotprsu2(dc, const_102_154); 308 309 src += 3; 310 des += 5; 311 } 312 313 src_current = _mem4(src); 314 315 ba = _unpklu4(src_current); 316 cb = _unpkhu4(_rotl(src_current, 8)); 317 dc = _unpkhu4(src_current); 318 319 320 des [0] = src_current & 0xff; 321 des [1] = _dotprsu2(ba, const_154_102); 322 des [2] = _dotprsu2(cb, const_51_205); 323 des [3] = _dotprsu2(cb, const_205_51); 324 des [4] = dc & 0xff; 325 326 } 327 328 /**************************************************************************** 329 * 330 * ROUTINE : vertical_band_3_5_scale_c64 331 * 332 * INPUTS : unsigned char *dest : Pointer to destination data. 333 * unsigned int dest_pitch : Stride of destination data. 334 * unsigned int dest_width : Width of destination data. 335 * 336 * OUTPUTS : None. 337 * 338 * RETURNS : void 339 * 340 * FUNCTION : Scales vertical band of pixels by scale 3 to 5. The 341 * height of the band scaled is 3-pixels. 342 * 343 * SPECIAL NOTES : The routine uses the first line of the band below 344 * the current band. 345 * 346 ****************************************************************************/ 347 static 348 void vertical_band_3_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) 349 { 350 unsigned int i; 351 unsigned int a, b, c, d; 352 unsigned int ba, cb, dc; 353 unsigned char *restrict src = dest; 354 unsigned char *restrict des = dest; 355 unsigned int const_51_205, const_102_154, 356 const_205_51, const_154_102; 357 358 const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8); 359 const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); 360 const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8); 361 const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); 362 363 a = src [0]; 364 b = src [dest_pitch]; 365 c = src [dest_pitch*2]; 366 d = src [dest_pitch*5]; 367 src ++; 368 369 for (i = 0; i < dest_width; i++) 370 { 371 ba = _pack2(b, a); 372 cb = _pack2(c, b); 373 dc = _pack2(d, c); 374 375 a = src [0]; 376 b = src [dest_pitch]; 377 c = src [dest_pitch*2]; 378 d = src [dest_pitch*5]; 379 src ++; 380 381 des [dest_pitch] = _dotprsu2(ba, const_154_102); 382 des [dest_pitch*2] = _dotprsu2(cb, const_51_205); 383 des [dest_pitch*3] = _dotprsu2(cb, const_205_51); 384 des [dest_pitch*4] = _dotprsu2(dc, const_102_154); 385 386 des++; 387 } 388 } 389 390 /**************************************************************************** 391 * 392 * ROUTINE : last_vertical_band_3_5_scale_c64 393 * 394 * INPUTS : unsigned char *dest : Pointer to destination data. 395 * unsigned int dest_pitch : Stride of destination data. 396 * unsigned int dest_width : Width of destination data. 397 * 398 * OUTPUTS : None. 399 * 400 * RETURNS : void 401 * 402 * FUNCTION : Scales last vertical band of pixels by scale 3 to 5. The 403 * height of the band scaled is 3-pixels. 404 * 405 * SPECIAL NOTES : The routine does not have available the first line of 406 * the band below the current band, since this is the 407 * last band. 408 * 409 ****************************************************************************/ 410 static 411 void last_vertical_band_3_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) 412 { 413 unsigned int i; 414 unsigned int a, b, c; 415 unsigned int ba, cb; 416 unsigned char *restrict src = dest; 417 unsigned char *restrict des = dest; 418 unsigned int const_51_205, const_205_51, const_154_102; 419 420 const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8); 421 const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8); 422 const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8); 423 424 a = src [0]; 425 b = src [dest_pitch]; 426 c = src [dest_pitch*2]; 427 src ++; 428 429 for (i = 0; i < dest_width; ++i) 430 { 431 ba = _pack2(b, a); 432 cb = _pack2(c, b); 433 434 a = src [0]; 435 b = src [dest_pitch]; 436 c = src [dest_pitch*2]; 437 src ++; 438 439 des [dest_pitch] = _dotprsu2(ba, const_154_102); 440 des [dest_pitch*2] = _dotprsu2(cb, const_51_205); 441 des [dest_pitch*3] = _dotprsu2(cb, const_205_51); 442 des [dest_pitch*4] = (unsigned char)(c) ; 443 444 des++; 445 } 446 } 447 448 /**************************************************************************** 449 * 450 * ROUTINE : horizontal_line_1_2_scale_c64 451 * 452 * INPUTS : const unsigned char *source : Pointer to source data. 453 * unsigned int source_width : Stride of source. 454 * unsigned char *dest : Pointer to destination data. 455 * unsigned int dest_width : Stride of destination (NOT USED). 456 * 457 * OUTPUTS : None. 458 * 459 * RETURNS : void 460 * 461 * FUNCTION : Copies horizontal line of pixels from source to 462 * destination scaling up by 1 to 2. 463 * 464 * SPECIAL NOTES : source width must be a multiple of 4. 465 * 466 ****************************************************************************/ 467 void horizontal_line_1_2_scale_c64 468 ( 469 const unsigned char *source, 470 unsigned int source_width, 471 unsigned char *dest, 472 unsigned int dest_width 473 ) 474 { 475 unsigned int i; 476 unsigned char *restrict des = dest; 477 unsigned char *restrict src = (unsigned char *)source; 478 unsigned int src7_4i, src4_1i, src3_0i; 479 unsigned int a4_0i, ahi, alo; 480 double src7_0d, src3_0d; 481 const unsigned int k01 = 0x01010101; 482 483 for (i = 0; i < source_width / 4; i += 1) 484 { 485 // Load up the data from src. Here a wide load is 486 // used to get 8 bytes at once, only 5 will be used 487 // for the actual computation. 488 src7_0d = _memd8(src); 489 src3_0i = _lo(src7_0d); 490 src7_4i = _hi(src7_0d); 491 492 // Need to average between points. Shift byte 5 into 493 // the lower word. This will result in bytes 5-1 494 // averaged with 4-0. 495 src4_1i = _shrmb(src7_4i, src3_0i); 496 a4_0i = _avgu4(src4_1i, src3_0i); 497 498 // Expand the data out. Could do an unpack, however 499 // all but the multiply units are getting pretty hard 500 // here the multiply unit can take some of the computations. 501 src3_0d = _mpyu4(src3_0i, k01); 502 503 // The averages need to be unpacked so that they are in 16 504 // bit form and will be able to be interleaved with the 505 // original data 506 ahi = _unpkhu4(a4_0i); 507 alo = _unpklu4(a4_0i); 508 509 ahi = _swap4(ahi); 510 alo = _swap4(alo); 511 512 // Mix the average result in with the orginal data. 513 ahi = _hi(src3_0d) | ahi; 514 alo = _lo(src3_0d) | alo; 515 516 _memd8(des) = _itod(ahi, alo); 517 518 des += 8; 519 src += 4; 520 } 521 } 522 523 524 /**************************************************************************** 525 * 526 * ROUTINE : vertical_band_1_2_scale_c64 527 * 528 * INPUTS : unsigned char *dest : Pointer to destination data. 529 * unsigned int dest_pitch : Stride of destination data. 530 * unsigned int dest_width : Width of destination data. 531 * 532 * OUTPUTS : None. 533 * 534 * RETURNS : void 535 * 536 * FUNCTION : Scales vertical band of pixels by scale 1 to 2. The 537 * height of the band scaled is 1-pixel. 538 * 539 * SPECIAL NOTES : The routine uses the first line of the band below 540 * the current band. 541 * Destination width must be a multiple of 4. Because the 542 * intput must be, therefore the output must be. 543 * 544 ****************************************************************************/ 545 static 546 void vertical_band_1_2_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) 547 { 548 unsigned int i; 549 unsigned int a, b; 550 unsigned int *restrict line_a = (unsigned int *)dest; 551 unsigned int *restrict line_b = (unsigned int *)(dest + (dest_pitch * 2)); 552 unsigned int *restrict des = (unsigned int *)(dest + dest_pitch); 553 554 for (i = 0; i < dest_width / 4; i++) 555 { 556 a = _mem4(line_a++); 557 b = _mem4(line_b++); 558 559 _mem4(des++) = _avgu4(a, b); 560 } 561 } 562 563 /**************************************************************************** 564 * 565 * ROUTINE : last_vertical_band_1_2_scale_c64 566 * 567 * INPUTS : unsigned char *dest : Pointer to destination data. 568 * unsigned int dest_pitch : Stride of destination data. 569 * unsigned int dest_width : Width of destination data. 570 * 571 * OUTPUTS : None. 572 * 573 * RETURNS : void 574 * 575 * FUNCTION : Scales last vertical band of pixels by scale 1 to 2. The 576 * height of the band scaled is 1-pixel. 577 * 578 * SPECIAL NOTES : The routine does not have available the first line of 579 * the band below the current band, since this is the 580 * last band. Again, width must be a multiple of 4. 581 * 582 ****************************************************************************/ 583 static 584 void last_vertical_band_1_2_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) 585 { 586 unsigned int i; 587 unsigned int *restrict src = (unsigned int *)dest; 588 unsigned int *restrict des = (unsigned int *)(dest + dest_pitch); 589 590 for (i = 0; i < dest_width / 4; ++i) 591 { 592 _mem4(des++) = _mem4(src++); 593 } 594 } 595 596 void 597 register_generic_scalers(void) 598 { 599 vp8_horizontal_line_1_2_scale = horizontal_line_1_2_scale_c64; 600 vp8_vertical_band_1_2_scale = vertical_band_1_2_scale_c64; 601 vp8_last_vertical_band_1_2_scale = last_vertical_band_1_2_scale_c64; 602 vp8_horizontal_line_3_5_scale = horizontal_line_3_5_scale_c64; 603 vp8_vertical_band_3_5_scale = vertical_band_3_5_scale_c64; 604 vp8_last_vertical_band_3_5_scale = last_vertical_band_3_5_scale_c64; 605 vp8_horizontal_line_4_5_scale = horizontal_line_4_5_scale_c64; 606 vp8_vertical_band_4_5_scale = vertical_band_4_5_scale_c64; 607 vp8_last_vertical_band_4_5_scale = last_vertical_band_4_5_scale_c64; 608 } 609