1 /* Copyright (C) 2007-2008 The Android Open Source Project 2 ** 3 ** This software is licensed under the terms of the GNU General Public 4 ** License version 2, as published by the Free Software Foundation, and 5 ** may be copied, distributed, and modified under those terms. 6 ** 7 ** This program is distributed in the hope that it will be useful, 8 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 9 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 ** GNU General Public License for more details. 11 */ 12 /* this file contains template code and may be included multiple times */ 13 14 #ifndef ARGB_T_DEFINED 15 #define ARGB_T_DEFINED 16 17 #if USE_MMX 18 #include <mmintrin.h> 19 20 typedef __m64 mmx_t; 21 typedef mmx_t argb_t; 22 23 static inline mmx_t 24 mmx_load8888( unsigned value, mmx_t zero ) 25 { 26 return _mm_unpacklo_pi8( _mm_cvtsi32_si64 (value), zero); 27 } 28 29 static inline unsigned 30 mmx_save8888( mmx_t argb, mmx_t zero ) 31 { 32 return (unsigned) _mm_cvtsi64_si32( _mm_packs_pu16( argb, zero ) ); 33 } 34 35 static inline mmx_t 36 mmx_expand16( int value ) 37 { 38 mmx_t t1 = _mm_cvtsi32_si64( value ); 39 return _mm_packs_pi32( t1, t1 ); 40 } 41 42 static inline mmx_t 43 mmx_mulshift( mmx_t argb, int multiplier, int rshift, mmx_t zero ) 44 { 45 mmx_t ar = _mm_unpackhi_pi16(argb, zero ); 46 mmx_t gb = _mm_unpacklo_pi16(argb, zero ); 47 mmx_t mult = mmx_expand16(multiplier); 48 49 ar = _mm_srli_pi32( _mm_madd_pi16( ar, mult ), rshift ); 50 gb = _mm_srli_pi32( _mm_madd_pi16( gb, mult ), rshift ); 51 52 return _mm_packs_pi32( gb, ar ); 53 } 54 55 static inline mmx_t 56 mmx_interp255( mmx_t m1, mmx_t m2, mmx_t zero, int alpha ) 57 { 58 mmx_t mult, mult2, t1, t2, r1, r2; 59 60 // m1 = [ a1 | r1 | g1 | b1 ] 61 // m2 = [ a2 | r2 | g2 | b2 ] 62 alpha = (alpha << 16) | (alpha ^ 255); 63 mult = _mm_cvtsi32_si64( alpha ); // mult = [ 0 | 0 | a | 1-a ] 64 mult2 = _mm_slli_si64( mult, 32 ); // mult2 = [ a | 1-a | 0 | 0 ] 65 mult = _mm_or_si64( mult, mult2 ); // mults = [ a | 1-a | a | 1-a ] 66 67 t1 = _mm_unpackhi_pi16( m1, m2 ); // t1 = [ a2 | a1 | r2 | r1 ] 68 r1 = _mm_madd_pi16( t1, mult ); // r1 = [ ra | rr ] 69 70 t2 = _mm_unpacklo_pi16( m1, m2 ); // t1 = [ g2 | g1 | b2 | b1 ] 71 r2 = _mm_madd_pi16( t2, mult ); // r2 = [ rg | rb ] 72 73 r1 = _mm_srli_pi32( r1, 8 ); 74 r2 = _mm_srli_pi32( r2, 8 ); 75 76 return _mm_packs_pi32( r2, r1 ); 77 } 78 79 #define ARGB_DECL_ZERO() mmx_t _zero = _mm_setzero_si64() 80 #define ARGB_DECL(x) mmx_t x 81 #define ARGB_DECL2(x1,x2) mmx_t x1, x2 82 #define ARGB_ZERO(x) x = _zero 83 #define ARGB_UNPACK(x,v) x = mmx_load8888((v), _zero) 84 #define ARGB_PACK(x) mmx_save8888(x, _zero) 85 #define ARGB_COPY(x,y) x = y 86 #define ARGB_SUM(x1,x2,x3) x1 = _mm_add_pi32(x2, x3) 87 #define ARGB_REDUCE(x,red) \ 88 ({ \ 89 int _red = (red) >> 8; \ 90 if (_red < 256) \ 91 x = mmx_mulshift( x, _red, 8, _zero ); \ 92 }) 93 94 #define ARGB_INTERP255(x1,x2,x3,alpha) \ 95 x1 = mmx_interp255( x2, x3, _zero, (alpha)) 96 97 #define ARGB_ADDW_11(x1,x2,x3) \ 98 ARGB_SUM(x1,x2,x3) 99 100 #define ARGB_ADDW_31(x1,x2,x3) \ 101 ({ \ 102 mmx_t _t1 = _mm_add_pi16(x2, x3); \ 103 mmx_t _t2 = _mm_slli_pi16(x2, 1); \ 104 x1 = _mm_add_pi16(_t1, _t2); \ 105 }) 106 107 #define ARGB_ADDW_13(x1,x2,x3) \ 108 ({ \ 109 mmx_t _t1 = _mm_add_pi16(x2, x3); \ 110 mmx_t _t2 = _mm_slli_pi16(x3, 1); \ 111 x1 = _mm_add_pi16(_t1, _t2); \ 112 }) 113 114 #define ARGB_SHR(x1,x2,s) \ 115 x1 = _mm_srli_pi16(x2, s) 116 117 118 #define ARGB_MULSHIFT(x1,x2,v,s) \ 119 x1 = mmx_mulshift(x2, v, s, _zero) 120 121 #define ARGB_BEGIN _mm_empty() 122 #define ARGB_DONE _mm_empty() 123 124 #define ARGB_RESCALE_SHIFT 10 125 #define ARGB_DECL_SCALE(s2,s) int s2 = (int)((s)*(s)*(1 << ARGB_RESCALE_SHIFT)) 126 #define ARGB_RESCALE(x,s2) x = mmx_mulshift( x, s2, ARGB_RESCALE_SHIFT, _zero ) 127 128 #else /* !USE_MMX */ 129 130 typedef uint32_t argb_t; 131 132 #define ARGB_DECL_ZERO() /* nothing */ 133 #define ARGB_DECL(x) argb_t x##_ag, x##_rb 134 #define ARGB_DECL2(x1,x2) argb_t x1##_ag, x1##_rb, x2##_ag, x2##_rb 135 #define ARGB_ZERO(x) (x##_ag = x##_rb = 0) 136 #define ARGB_COPY(x,y) (x##_ag = y##_ag, x##_rb = y##_rb) 137 138 #define ARGB_UNPACK(x,v) \ 139 ({ \ 140 argb_t _v = (argb_t)(v); \ 141 x##_ag = (_v >> 8) & 0xff00ff; \ 142 x##_rb = (_v) & 0xff00ff; \ 143 }) 144 145 #define ARGB_PACK(x) (uint32_t)(((x##_ag) << 8) | x##_rb) 146 147 #define ARGB_SUM(x1,x2,x3) \ 148 ({ \ 149 x1##_ag = x2##_ag + x3##_ag; \ 150 x1##_rb = x2##_rb + x3##_rb; \ 151 }) 152 153 #define ARGB_REDUCE(x,red) \ 154 ({ \ 155 int _red = (red) >> 8; \ 156 if (_red < 256) { \ 157 x##_ag = ((x##_ag*_red) >> 8) & 0xff00ff; \ 158 x##_rb = ((x##_rb*_red) >> 8) & 0xff00ff; \ 159 } \ 160 }) 161 162 #define ARGB_INTERP255(x1,x2,x3,alpha) \ 163 ({ \ 164 int _alpha = (alpha); \ 165 int _ialpha; \ 166 _alpha += _alpha >> 8; \ 167 _ialpha = 256 - _alpha; \ 168 x1##_ag = ((x2##_ag*_ialpha + x3##_ag*_alpha) >> 8) & 0xff00ff; \ 169 x1##_rb = ((x2##_rb*_ialpha + x3##_rb*_alpha) >> 8) & 0xff00ff; \ 170 }) 171 172 #define ARGB_ADDW_11(x1,x2,x3) \ 173 ({ \ 174 x1##_ag = (x2##_ag + x3##_ag); \ 175 x1##_rb = (x2##_rb + x3##_rb); \ 176 }) 177 178 #define ARGB_ADDW_31(x1,x2,x3) \ 179 ({ \ 180 x1##_ag = (3*x2##_ag + x3##_ag); \ 181 x1##_rb = (3*x2##_rb + x3##_rb); \ 182 }) 183 184 #define ARGB_ADDW_13(x1,x2,x3) \ 185 ({ \ 186 x1##_ag = (x2##_ag + 3*x3##_ag); \ 187 x1##_rb = (x2##_rb + 3*x3##_rb); \ 188 }) 189 190 #define ARGB_MULSHIFT(x1,x2,v,s) \ 191 ({ \ 192 unsigned _vv = (v); \ 193 x1##_ag = ((x2##_ag * _vv) >> (s)) & 0xff00ff; \ 194 x1##_rb = ((x2##_rb * _vv) >> (s)) & 0xff00ff; \ 195 }) 196 197 #define ARGB_SHR(x1,x2,s) \ 198 ({ \ 199 int _s = (s); \ 200 x1##_ag = (x2##_ag >> _s) & 0xff00ff; \ 201 x1##_rb = (x2##_rb >> _s) & 0xff00ff; \ 202 }) 203 204 #define ARGB_BEGIN ((void)0) 205 #define ARGB_DONE ((void)0) 206 207 #define ARGB_RESCALE_SHIFT 8 208 #define ARGB_DECL_SCALE(s2,s) int s2 = (int)((s)*(s)*(1 << ARGB_RESCALE_SHIFT)) 209 #define ARGB_RESCALE(x,scale2) ARGB_MULSHIFT(x,x,scale2,ARGB_RESCALE_SHIFT) 210 211 #endif /* !USE_MMX */ 212 213 #define ARGB_ADD(x1,x2) ARGB_SUM(x1,x1,x2) 214 #define ARGB_READ(x,p) ARGB_UNPACK(x,*(uint32_t*)(p)) 215 #define ARGB_WRITE(x,p) *(uint32_t*)(p) = ARGB_PACK(x) 216 217 #endif /* !ARGB_T_DEFINED */ 218 219 220 221 #ifdef ARGB_SCALE_GENERIC 222 static void 223 ARGB_SCALE_GENERIC( ScaleOp* op ) 224 { 225 int dst_pitch = op->dst_pitch; 226 int src_pitch = op->src_pitch; 227 uint8_t* dst_line = op->dst_line; 228 uint8_t* src_line = op->src_line; 229 ARGB_DECL_SCALE(scale2, op->scale); 230 int h; 231 int sx = op->sx; 232 int sy = op->sy; 233 int ix = op->ix; 234 int iy = op->iy; 235 236 ARGB_BEGIN; 237 238 src_line += (sx >> 16)*4 + (sy >> 16)*src_pitch; 239 sx &= 0xffff; 240 sy &= 0xffff; 241 242 for ( h = op->rd.h; h > 0; h-- ) { 243 uint8_t* dst = dst_line; 244 uint8_t* src = src_line; 245 uint8_t* dst_end = dst + 4*op->rd.w; 246 int sx1 = sx; 247 int sy1 = sy; 248 249 for ( ; dst < dst_end; ) { 250 int sx2 = sx1 + ix; 251 int sy2 = sy1 + iy; 252 253 ARGB_DECL_ZERO(); 254 ARGB_DECL(spix); 255 ARGB_DECL(pix); 256 ARGB_ZERO(pix); 257 258 /* the current destination pixel maps to the (sx1,sy1)-(sx2,sy2) 259 * source square, we're going to compute the sum of its pixels' 260 * colors... simple box filtering 261 */ 262 { 263 int gsy, gsx; 264 for ( gsy = 0; gsy < sy2; gsy += 65536 ) { 265 for ( gsx = 0; gsx < sx2; gsx += 65536 ) { 266 uint8_t* s = src + (gsx >> 16)*4 + (gsy >> 16)*src_pitch; 267 int xmin = gsx, xmax = gsx + 65536, ymin = gsy, ymax = gsy + 65536; 268 unsigned ww, hh; 269 unsigned red; 270 271 if (xmin < sx1) xmin = sx1; 272 if (xmax > sx2) xmax = sx2; 273 if (ymin < sy1) ymin = sy1; 274 if (ymax > sy2) ymax = sy2; 275 276 ww = (unsigned)(xmax-xmin); 277 red = ww; 278 279 hh = (unsigned)(ymax-ymin); 280 red = (hh < 65536) ? (red*hh >> 16U) : red; 281 282 ARGB_READ(spix,s); 283 ARGB_REDUCE(spix,red); 284 ARGB_ADD(pix,spix); 285 } 286 } 287 } 288 289 ARGB_RESCALE(pix,scale2); 290 ARGB_WRITE(pix,dst); 291 292 sx1 = sx2; 293 src += (sx1 >> 16)*4; 294 sx1 &= 0xffff; 295 dst += 4; 296 } 297 298 sy += iy; 299 src_line += (sy >> 16)*src_pitch; 300 sy &= 0xffff; 301 302 dst_line += dst_pitch; 303 } 304 ARGB_DONE; 305 } 306 #endif 307 #undef ARGB_SCALE_GENERIC 308 309 310 #ifdef ARGB_SCALE_05_TO_10 311 static inline int cross( int x, int y ) { 312 if (x == 65536 && y == 65536) 313 return 65536; 314 315 return (int)((unsigned)x * (unsigned)y >> 16U); 316 } 317 318 static void 319 scale_05_to_10( ScaleOp* op ) 320 { 321 int dst_pitch = op->dst_pitch; 322 int src_pitch = op->src_pitch; 323 uint8_t* dst_line = op->dst_line; 324 uint8_t* src_line = op->src_line; 325 ARGB_DECL_SCALE(scale2, op->scale); 326 int h; 327 int sx = op->sx; 328 int sy = op->sy; 329 int ix = op->ix; 330 int iy = op->iy; 331 332 ARGB_BEGIN; 333 334 src_line += (sx >> 16)*4 + (sy >> 16)*src_pitch; 335 sx &= 0xffff; 336 sy &= 0xffff; 337 338 for ( h = op->rd.h; h > 0; h-- ) { 339 uint8_t* dst = dst_line; 340 uint8_t* src = src_line; 341 uint8_t* dst_end = dst + 4*op->rd.w; 342 int sx1 = sx; 343 int sy1 = sy; 344 345 for ( ; dst < dst_end; ) { 346 int sx2 = sx1 + ix; 347 int sy2 = sy1 + iy; 348 349 ARGB_DECL_ZERO(); 350 ARGB_DECL2(spix, pix); 351 352 int off = src_pitch; 353 int fx1 = sx1 & 0xffff; 354 int fx2 = sx2 & 0xffff; 355 int fy1 = sy1 & 0xffff; 356 int fy2 = sy2 & 0xffff; 357 358 int center_x = ((sx1 >> 16) + 1) < ((sx2-1) >> 16); 359 int center_y = ((sy1 >> 16) + 1) < ((sy2-1) >> 16); 360 361 ARGB_ZERO(pix); 362 363 if (fx2 == 0) { 364 fx2 = 65536; 365 } 366 if (fy2 == 0) { 367 fy2 = 65536; 368 } 369 fx1 = 65536 - fx1; 370 fy1 = 65536 - fy1; 371 372 /** TOP BAND 373 **/ 374 375 /* top-left pixel */ 376 ARGB_READ(spix,src); 377 ARGB_REDUCE(spix,cross(fx1,fy1)); 378 ARGB_ADD(pix,spix); 379 380 /* top-center pixel, if any */ 381 ARGB_READ(spix,src + 4); 382 if (center_x) { 383 ARGB_REDUCE(spix,fy1); 384 ARGB_ADD(pix,spix); 385 ARGB_READ(spix,src + 8); 386 } 387 388 /* top-right pixel */ 389 ARGB_REDUCE(spix,cross(fx2,fy1)); 390 ARGB_ADD(pix,spix); 391 392 /** MIDDLE BAND, IF ANY 393 **/ 394 if (center_y) { 395 /* left-middle pixel */ 396 ARGB_READ(spix,src + off); 397 ARGB_REDUCE(spix,fx1); 398 ARGB_ADD(pix,spix); 399 400 /* center pixel, if any */ 401 ARGB_READ(spix,src + off + 4); 402 if (center_x) { 403 ARGB_ADD(pix,spix); 404 ARGB_READ(spix,src + off + 8); 405 } 406 407 /* right-middle pixel */ 408 ARGB_REDUCE(spix,fx2); 409 ARGB_ADD(pix,spix); 410 411 off += src_pitch; 412 } 413 414 /** BOTTOM BAND 415 **/ 416 /* left-bottom pixel */ 417 ARGB_READ(spix,src + off); 418 ARGB_REDUCE(spix,cross(fx1,fy2)); 419 ARGB_ADD(pix,spix); 420 421 /* center-bottom, if any */ 422 ARGB_READ(spix,src + off + 4); 423 if (center_x) { 424 ARGB_REDUCE(spix,fy2); 425 ARGB_ADD(pix,spix); 426 ARGB_READ(spix,src + off + 8); 427 } 428 429 /* right-bottom pixel */ 430 ARGB_REDUCE(spix,cross(fx2,fy2)); 431 ARGB_ADD(pix,spix); 432 433 /** WRITE IT 434 **/ 435 ARGB_RESCALE(pix,scale2); 436 ARGB_WRITE(pix,dst); 437 438 sx1 = sx2; 439 src += (sx1 >> 16)*4; 440 sx1 &= 0xffff; 441 dst += 4; 442 } 443 444 sy += iy; 445 src_line += (sy >> 16)*src_pitch; 446 sy &= 0xffff; 447 448 dst_line += dst_pitch; 449 } 450 ARGB_DONE; 451 } 452 #endif 453 #undef ARGB_SCALE_05_TO_10 454 455 456 #ifdef ARGB_SCALE_UP_BILINEAR 457 static void 458 scale_up_bilinear( ScaleOp* op ) 459 { 460 int dst_pitch = op->dst_pitch; 461 int src_pitch = op->src_pitch; 462 uint8_t* dst_line = op->dst_line; 463 uint8_t* src_line = op->src_line; 464 int sx = op->sx; 465 int sy = op->sy; 466 int ix = op->ix; 467 int iy = op->iy; 468 int xlimit, ylimit; 469 int h, sx0; 470 471 ARGB_BEGIN; 472 473 /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */ 474 /* the four nearest source pixels, which are at (0.5,0.5) offsets */ 475 476 sx = sx + ix/2 - 32768; 477 sy = sy + iy/2 - 32768; 478 479 xlimit = (op->src_w-1); 480 ylimit = (op->src_h-1); 481 482 sx0 = sx; 483 484 for ( h = op->rd.h; h > 0; h-- ) { 485 uint8_t* dst = dst_line; 486 uint8_t* dst_end = dst + 4*op->rd.w; 487 488 sx = sx0; 489 for ( ; dst < dst_end; ) { 490 int ex1, ex2, ey1, ey2, alpha; 491 uint8_t* s; 492 493 ARGB_DECL_ZERO(); 494 ARGB_DECL2(spix1,spix2); 495 ARGB_DECL2(pix3,pix4); 496 ARGB_DECL(pix); 497 498 /* find the four neighbours */ 499 ex1 = (sx >> 16); 500 ey1 = (sy >> 16); 501 ex2 = (sx+65535) >> 16; 502 ey2 = (sy+65535) >> 16; 503 504 if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit; 505 if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit; 506 if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit; 507 if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit; 508 509 ex2 = (ex2-ex1)*4; 510 ey2 = (ey2-ey1)*src_pitch; 511 512 /* interpolate */ 513 s = src_line + ex1*4 + ey1*src_pitch; 514 ARGB_READ(spix1, s); 515 ARGB_READ(spix2, s+ex2); 516 517 alpha = (sx >> 8) & 0xff; 518 ARGB_INTERP255(pix3,spix1,spix2,alpha); 519 520 s += ey2; 521 ARGB_READ(spix1, s); 522 ARGB_READ(spix2, s+ex2); 523 524 ARGB_INTERP255(pix4,spix1,spix2,alpha); 525 526 alpha = (sy >> 8) & 0xff; 527 ARGB_INTERP255(pix,pix3,pix4,alpha); 528 529 ARGB_WRITE(pix,dst); 530 531 sx += ix; 532 dst += 4; 533 } 534 535 sy += iy; 536 dst_line += dst_pitch; 537 } 538 ARGB_DONE; 539 } 540 #endif 541 #undef ARGB_SCALE_UP_BILINEAR 542 543 #ifdef ARGB_SCALE_UP_QUICK_4x4 544 static void 545 ARGB_SCALE_UP_QUICK_4x4( ScaleOp* op ) 546 { 547 int dst_pitch = op->dst_pitch; 548 int src_pitch = op->src_pitch; 549 uint8_t* dst_line = op->dst_line; 550 uint8_t* src_line = op->src_line; 551 int sx = op->sx; 552 int sy = op->sy; 553 int ix = op->ix; 554 int iy = op->iy; 555 int xlimit, ylimit; 556 int h, sx0; 557 558 ARGB_BEGIN; 559 560 /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */ 561 /* the four nearest source pixels, which are at (0.5,0.5) offsets */ 562 563 sx = sx + ix/2 - 32768; 564 sy = sy + iy/2 - 32768; 565 566 xlimit = (op->src_w-1); 567 ylimit = (op->src_h-1); 568 569 sx0 = sx; 570 571 for ( h = op->rd.h; h > 0; h-- ) { 572 uint8_t* dst = dst_line; 573 uint8_t* dst_end = dst + 4*op->rd.w; 574 575 sx = sx0; 576 for ( ; dst < dst_end; ) { 577 int ex1, ex2, ey1, ey2; 578 uint8_t* p; 579 ARGB_DECL_ZERO(); 580 ARGB_DECL(pix); 581 ARGB_DECL2(spix1, spix2); 582 ARGB_DECL2(pix3, pix4); 583 584 /* find the four neighbours */ 585 ex1 = (sx >> 16); 586 ey1 = (sy >> 16); 587 ex2 = (sx+65535) >> 16; 588 ey2 = (sy+65535) >> 16; 589 590 if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit; 591 if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit; 592 if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit; 593 if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit; 594 595 /* interpolate */ 596 p = (src_line + ex1*4 + ey1*src_pitch); 597 598 ex2 = (ex2-ex1)*4; 599 ey2 = (ey2-ey1)*src_pitch; 600 601 switch (((sx >> 14) & 3) | ((sy >> 12) & 12)) { 602 case 0: 603 *(uint32_t*)dst = *(uint32_t*)p; 604 break; 605 606 /* top-line is easy */ 607 case 1: 608 ARGB_READ(spix1, p); 609 ARGB_READ(spix2, p+ex2); 610 ARGB_ADDW_31(pix,spix1,spix2); 611 ARGB_SHR(pix,pix,2); 612 ARGB_WRITE(pix, dst); 613 break; 614 615 case 2: 616 ARGB_READ(spix1, p); 617 ARGB_READ(spix2, p+ex2); 618 ARGB_ADDW_11(pix, spix1, spix2); 619 ARGB_SHR(pix,pix,1); 620 ARGB_WRITE(pix, dst); 621 break; 622 623 case 3: 624 ARGB_READ(spix1, p); 625 ARGB_READ(spix2, p+ex2); 626 ARGB_ADDW_13(pix,spix1,spix2); 627 ARGB_SHR(pix,pix,2); 628 ARGB_WRITE(pix, dst); 629 break; 630 631 /* second line is harder */ 632 case 4: 633 ARGB_READ(spix1, p); 634 ARGB_READ(spix2, p+ey2); 635 ARGB_ADDW_31(pix,spix1,spix2); 636 ARGB_SHR(pix,pix,2); 637 ARGB_WRITE(pix, dst); 638 break; 639 640 case 5: 641 ARGB_READ(spix1, p); 642 ARGB_READ(spix2, p+ex2); 643 ARGB_ADDW_31(pix3,spix1,spix2); 644 p += ey2; 645 ARGB_READ(spix1, p); 646 ARGB_READ(spix2, p+ex2); 647 ARGB_ADDW_31(pix4,spix1,spix2); 648 649 ARGB_ADDW_31(pix,pix3,pix4); 650 ARGB_SHR(pix,pix,4); 651 ARGB_WRITE(pix,dst); 652 break; 653 654 case 6: 655 ARGB_READ(spix1, p); 656 ARGB_READ(spix2, p+ex2); 657 ARGB_ADDW_11(pix3,spix1,spix2); 658 p += ey2; 659 ARGB_READ(spix1, p); 660 ARGB_READ(spix2, p+ex2); 661 ARGB_ADDW_11(pix4,spix1,spix2); 662 663 ARGB_ADDW_31(pix,pix3,pix4); 664 ARGB_SHR(pix,pix,3); 665 ARGB_WRITE(pix,dst); 666 break; 667 668 case 7: 669 ARGB_READ(spix1, p); 670 ARGB_READ(spix2, p+ex2); 671 ARGB_ADDW_13(pix3,spix1,spix2); 672 p += ey2; 673 ARGB_READ(spix1, p); 674 ARGB_READ(spix2, p+ex2); 675 ARGB_ADDW_13(pix4,spix1,spix2); 676 677 ARGB_ADDW_31(pix,pix3,pix4); 678 ARGB_SHR(pix,pix,4); 679 ARGB_WRITE(pix,dst); 680 break; 681 682 /* third line */ 683 case 8: 684 ARGB_READ(spix1, p); 685 ARGB_READ(spix2, p+ey2); 686 ARGB_ADDW_11(pix,spix1,spix2); 687 ARGB_SHR(pix,pix,1); 688 ARGB_WRITE(pix, dst); 689 break; 690 691 case 9: 692 ARGB_READ(spix1, p); 693 ARGB_READ(spix2, p+ex2); 694 ARGB_ADDW_31(pix3,spix1,spix2); 695 p += ey2; 696 ARGB_READ(spix1, p); 697 ARGB_READ(spix2, p+ex2); 698 ARGB_ADDW_31(pix4,spix1,spix2); 699 700 ARGB_ADDW_11(pix,pix3,pix4); 701 ARGB_SHR(pix,pix,3); 702 ARGB_WRITE(pix,dst); 703 break; 704 705 case 10: 706 ARGB_READ(spix1, p); 707 ARGB_READ(spix2, p+ex2); 708 ARGB_ADDW_11(pix3,spix1,spix2); 709 p += ey2; 710 ARGB_READ(spix1, p); 711 ARGB_READ(spix2, p+ex2); 712 ARGB_ADDW_11(pix4,spix1,spix2); 713 714 ARGB_ADDW_11(pix,pix3,pix4); 715 ARGB_SHR(pix,pix,2); 716 ARGB_WRITE(pix,dst); 717 break; 718 719 case 11: 720 ARGB_READ(spix1, p); 721 ARGB_READ(spix2, p+ex2); 722 ARGB_ADDW_13(pix3,spix1,spix2); 723 p += ey2; 724 ARGB_READ(spix1, p); 725 ARGB_READ(spix2, p+ex2); 726 ARGB_ADDW_13(pix4,spix1,spix2); 727 728 ARGB_ADDW_11(pix,pix3,pix4); 729 ARGB_SHR(pix,pix,3); 730 ARGB_WRITE(pix,dst); 731 break; 732 733 /* last line */ 734 case 12: 735 ARGB_READ(spix1, p); 736 ARGB_READ(spix2, p+ey2); 737 ARGB_ADDW_13(pix,spix1,spix2); 738 ARGB_SHR(pix,pix,2); 739 ARGB_WRITE(pix, dst); 740 break; 741 742 case 13: 743 ARGB_READ(spix1, p); 744 ARGB_READ(spix2, p+ex2); 745 ARGB_ADDW_31(pix3,spix1,spix2); 746 p += ey2; 747 ARGB_READ(spix1, p); 748 ARGB_READ(spix2, p+ex2); 749 ARGB_ADDW_31(pix4,spix1,spix2); 750 751 ARGB_ADDW_13(pix,pix3,pix4); 752 ARGB_SHR(pix,pix,4); 753 ARGB_WRITE(pix,dst); 754 break; 755 756 case 14: 757 ARGB_READ(spix1, p); 758 ARGB_READ(spix2, p+ex2); 759 ARGB_ADDW_11(pix3,spix1,spix2); 760 p += ey2; 761 ARGB_READ(spix1, p); 762 ARGB_READ(spix2, p+ex2); 763 ARGB_ADDW_11(pix4,spix1,spix2); 764 765 ARGB_ADDW_13(pix,pix3,pix4); 766 ARGB_SHR(pix,pix,3); 767 ARGB_WRITE(pix,dst); 768 break; 769 770 default: 771 ARGB_READ(spix1, p); 772 ARGB_READ(spix2, p+ex2); 773 ARGB_ADDW_13(pix3,spix1,spix2); 774 p += ey2; 775 ARGB_READ(spix1, p); 776 ARGB_READ(spix2, p+ex2); 777 ARGB_ADDW_13(pix4,spix1,spix2); 778 779 ARGB_ADDW_13(pix,pix3,pix4); 780 ARGB_SHR(pix,pix,4); 781 ARGB_WRITE(pix,dst); 782 } 783 sx += ix; 784 dst += 4; 785 } 786 787 sy += iy; 788 dst_line += dst_pitch; 789 } 790 ARGB_DONE; 791 } 792 #endif 793 #undef ARGB_SCALE_UP_QUICK_4x4 794 795 796 #ifdef ARGB_SCALE_NEAREST 797 /* this version scales up with nearest neighbours - looks crap */ 798 static void 799 ARGB_SCALE_NEAREST( ScaleOp* op ) 800 { 801 int dst_pitch = op->dst_pitch; 802 int src_pitch = op->src_pitch; 803 uint8_t* dst_line = op->dst_line; 804 uint8_t* src_line = op->src_line; 805 int sx = op->sx; 806 int sy = op->sy; 807 int ix = op->ix; 808 int iy = op->iy; 809 int xlimit, ylimit; 810 int h, sx0; 811 812 ARGB_BEGIN; 813 814 /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */ 815 /* the four nearest source pixels, which are at (0.5,0.5) offsets */ 816 817 sx = sx + ix/2 - 32768; 818 sy = sy + iy/2 - 32768; 819 820 xlimit = (op->src_w-1); 821 ylimit = (op->src_h-1); 822 823 sx0 = sx; 824 825 for ( h = op->rd.h; h > 0; h-- ) { 826 uint8_t* dst = dst_line; 827 uint8_t* dst_end = dst + 4*op->rd.w; 828 829 sx = sx0; 830 for ( ; dst < dst_end; ) { 831 int ex1, ex2, ey1, ey2; 832 unsigned* p; 833 834 /* find the top-left neighbour */ 835 ex1 = (sx >> 16); 836 ey1 = (sy >> 16); 837 ex2 = ex1+1; 838 ey2 = ey1+1; 839 840 if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit; 841 if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit; 842 if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit; 843 if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit; 844 845 p = (unsigned*)(src_line + ex1*4 + ey1*src_pitch); 846 if ((sx & 0xffff) >= 32768) 847 p += (ex2-ex1); 848 if ((sy & 0xffff) >= 32768) 849 p = (unsigned*)((char*)p + (ey2-ey1)*src_pitch); 850 851 *(unsigned*)dst = p[0]; 852 853 sx += ix; 854 dst += 4; 855 } 856 857 sy += iy; 858 dst_line += dst_pitch; 859 } 860 } 861 #endif 862 #undef ARGB_SCALE_NEAREST 863