1 /* Copyright (C) 2007-2008 The Android Open Source Project 2 ** 3 ** This software is licensed under the terms of the GNU General Public 4 ** License version 2, as published by the Free Software Foundation, and 5 ** may be copied, distributed, and modified under those terms. 6 ** 7 ** This program is distributed in the hope that it will be useful, 8 ** but WITHOUT ANY WARRANTY; without even the implied warranty of 9 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 10 ** GNU General Public License for more details. 11 */ 12 /* this file contains template code and may be included multiple times */ 13 14 #ifndef ARGB_T_DEFINED 15 #define ARGB_T_DEFINED 16 17 #if USE_MMX 18 #include <mmintrin.h> 19 20 typedef __m64 mmx_t; 21 typedef mmx_t argb_t; 22 23 static inline mmx_t 24 mmx_load8888( unsigned value, mmx_t zero ) 25 { 26 return _mm_unpacklo_pi8( _mm_cvtsi32_si64 (value), zero); 27 } 28 29 static inline unsigned 30 mmx_save8888( mmx_t argb, mmx_t zero ) 31 { 32 return (unsigned) _mm_cvtsi64_si32( _mm_packs_pu16( argb, zero ) ); 33 } 34 35 static inline mmx_t 36 mmx_expand16( int value ) 37 { 38 mmx_t t1 = _mm_cvtsi32_si64( value ); 39 return _mm_packs_pi32( t1, t1 ); 40 } 41 42 static inline mmx_t 43 mmx_mulshift( mmx_t argb, int multiplier, int rshift, mmx_t zero ) 44 { 45 mmx_t ar = _mm_unpackhi_pi16(argb, zero ); 46 mmx_t gb = _mm_unpacklo_pi16(argb, zero ); 47 mmx_t mult = mmx_expand16(multiplier); 48 49 ar = _mm_srli_pi32( _mm_madd_pi16( ar, mult ), rshift ); 50 gb = _mm_srli_pi32( _mm_madd_pi16( gb, mult ), rshift ); 51 52 return _mm_packs_pi32( gb, ar ); 53 } 54 55 static inline mmx_t 56 mmx_interp255( mmx_t m1, mmx_t m2, mmx_t zero, int alpha ) 57 { 58 mmx_t mult, mult2, t1, t2, r1, r2; 59 60 // m1 = [ a1 | r1 | g1 | b1 ] 61 // m2 = [ a2 | r2 | g2 | b2 ] 62 alpha = (alpha << 16) | (alpha ^ 255); 63 mult = _mm_cvtsi32_si64( alpha ); // mult = [ 0 | 0 | a | 1-a ] 64 mult2 = _mm_slli_si64( mult, 32 ); // mult2 = [ a | 1-a | 0 | 0 ] 65 mult = _mm_or_si64( mult, mult2 ); // mults = [ a | 1-a | a | 1-a ] 66 67 t1 = _mm_unpackhi_pi16( m1, m2 ); // t1 = [ a2 | a1 | r2 | r1 ] 68 r1 = _mm_madd_pi16( t1, mult ); // r1 = [ ra | rr ] 69 70 t2 = _mm_unpacklo_pi16( m1, m2 ); // t1 = [ g2 | g1 | b2 | b1 ] 71 r2 = _mm_madd_pi16( t2, mult ); // r2 = [ rg | rb ] 72 73 r1 = _mm_srli_pi32( r1, 8 ); 74 r2 = _mm_srli_pi32( r2, 8 ); 75 76 return _mm_packs_pi32( r2, r1 ); 77 } 78 79 #define ARGB_DECL_ZERO() mmx_t _zero = _mm_setzero_si64() 80 #define ARGB_DECL(x) mmx_t x 81 #define ARGB_DECL2(x1,x2) mmx_t x1, x2 82 #define ARGB_ZERO(x) x = _zero 83 #define ARGB_UNPACK(x,v) x = mmx_load8888((v), _zero) 84 #define ARGB_PACK(x) mmx_save8888(x, _zero) 85 #define ARGB_COPY(x,y) x = y 86 #define ARGB_SUM(x1,x2,x3) x1 = _mm_add_pi32(x2, x3) 87 #define ARGB_REDUCE(x,red) \ 88 ({ \ 89 int _red = (red) >> 8; \ 90 if (_red < 256) \ 91 x = mmx_mulshift( x, _red, 8, _zero ); \ 92 }) 93 94 #define ARGB_INTERP255(x1,x2,x3,alpha) \ 95 x1 = mmx_interp255( x2, x3, _zero, (alpha)) 96 97 #define ARGB_ADDW_11(x1,x2,x3) \ 98 ARGB_SUM(x1,x2,x3) 99 100 #define ARGB_ADDW_31(x1,x2,x3) \ 101 ({ \ 102 mmx_t _t1 = _mm_add_pi16(x2, x3); \ 103 mmx_t _t2 = _mm_slli_pi16(x2, 1); \ 104 x1 = _mm_add_pi16(_t1, _t2); \ 105 }) 106 107 #define ARGB_ADDW_13(x1,x2,x3) \ 108 ({ \ 109 mmx_t _t1 = _mm_add_pi16(x2, x3); \ 110 mmx_t _t2 = _mm_slli_pi16(x3, 1); \ 111 x1 = _mm_add_pi16(_t1, _t2); \ 112 }) 113 114 #define ARGB_SHR(x1,x2,s) \ 115 x1 = _mm_srli_pi16(x2, s) 116 117 118 #define ARGB_MULSHIFT(x1,x2,v,s) \ 119 x1 = mmx_mulshift(x2, v, s, _zero) 120 121 #define ARGB_DONE _mm_empty() 122 123 #define ARGB_RESCALE_SHIFT 10 124 #define ARGB_DECL_SCALE(s2,s) int s2 = (int)((s)*(s)*(1 << ARGB_RESCALE_SHIFT)) 125 #define ARGB_RESCALE(x,s2) x = mmx_mulshift( x, s2, ARGB_RESCALE_SHIFT, _zero ) 126 127 #else /* !USE_MMX */ 128 129 typedef uint32_t argb_t; 130 131 #define ARGB_DECL_ZERO() /* nothing */ 132 #define ARGB_DECL(x) argb_t x##_ag, x##_rb 133 #define ARGB_DECL2(x1,x2) argb_t x1##_ag, x1##_rb, x2##_ag, x2##_rb 134 #define ARGB_ZERO(x) (x##_ag = x##_rb = 0) 135 #define ARGB_COPY(x,y) (x##_ag = y##_ag, x##_rb = y##_rb) 136 137 #define ARGB_UNPACK(x,v) \ 138 ({ \ 139 argb_t _v = (argb_t)(v); \ 140 x##_ag = (_v >> 8) & 0xff00ff; \ 141 x##_rb = (_v) & 0xff00ff; \ 142 }) 143 144 #define ARGB_PACK(x) (uint32_t)(((x##_ag) << 8) | x##_rb) 145 146 #define ARGB_SUM(x1,x2,x3) \ 147 ({ \ 148 x1##_ag = x2##_ag + x3##_ag; \ 149 x1##_rb = x2##_rb + x3##_rb; \ 150 }) 151 152 #define ARGB_REDUCE(x,red) \ 153 ({ \ 154 int _red = (red) >> 8; \ 155 if (_red < 256) { \ 156 x##_ag = ((x##_ag*_red) >> 8) & 0xff00ff; \ 157 x##_rb = ((x##_rb*_red) >> 8) & 0xff00ff; \ 158 } \ 159 }) 160 161 #define ARGB_INTERP255(x1,x2,x3,alpha) \ 162 ({ \ 163 int _alpha = (alpha); \ 164 int _ialpha; \ 165 _alpha += _alpha >> 8; \ 166 _ialpha = 256 - _alpha; \ 167 x1##_ag = ((x2##_ag*_ialpha + x3##_ag*_alpha) >> 8) & 0xff00ff; \ 168 x1##_rb = ((x2##_rb*_ialpha + x3##_rb*_alpha) >> 8) & 0xff00ff; \ 169 }) 170 171 #define ARGB_ADDW_11(x1,x2,x3) \ 172 ({ \ 173 x1##_ag = (x2##_ag + x3##_ag); \ 174 x1##_rb = (x2##_rb + x3##_rb); \ 175 }) 176 177 #define ARGB_ADDW_31(x1,x2,x3) \ 178 ({ \ 179 x1##_ag = (3*x2##_ag + x3##_ag); \ 180 x1##_rb = (3*x2##_rb + x3##_rb); \ 181 }) 182 183 #define ARGB_ADDW_13(x1,x2,x3) \ 184 ({ \ 185 x1##_ag = (x2##_ag + 3*x3##_ag); \ 186 x1##_rb = (x2##_rb + 3*x3##_rb); \ 187 }) 188 189 #define ARGB_MULSHIFT(x1,x2,v,s) \ 190 ({ \ 191 unsigned _vv = (v); \ 192 x1##_ag = ((x2##_ag * _vv) >> (s)) & 0xff00ff; \ 193 x1##_rb = ((x2##_rb * _vv) >> (s)) & 0xff00ff; \ 194 }) 195 196 #define ARGB_SHR(x1,x2,s) \ 197 ({ \ 198 int _s = (s); \ 199 x1##_ag = (x2##_ag >> _s) & 0xff00ff; \ 200 x1##_rb = (x2##_rb >> _s) & 0xff00ff; \ 201 }) 202 203 #define ARGB_DONE ((void)0) 204 205 #define ARGB_RESCALE_SHIFT 8 206 #define ARGB_DECL_SCALE(s2,s) int s2 = (int)((s)*(s)*(1 << ARGB_RESCALE_SHIFT)) 207 #define ARGB_RESCALE(x,scale2) ARGB_MULSHIFT(x,x,scale2,ARGB_RESCALE_SHIFT) 208 209 #endif /* !USE_MMX */ 210 211 #define ARGB_ADD(x1,x2) ARGB_SUM(x1,x1,x2) 212 #define ARGB_READ(x,p) ARGB_UNPACK(x,*(uint32_t*)(p)) 213 #define ARGB_WRITE(x,p) *(uint32_t*)(p) = ARGB_PACK(x) 214 215 #endif /* !ARGB_T_DEFINED */ 216 217 218 219 #ifdef ARGB_SCALE_GENERIC 220 static void 221 ARGB_SCALE_GENERIC( ScaleOp* op ) 222 { 223 int dst_pitch = op->dst_pitch; 224 int src_pitch = op->src_pitch; 225 uint8_t* dst_line = op->dst_line; 226 uint8_t* src_line = op->src_line; 227 ARGB_DECL_SCALE(scale2, op->scale); 228 int h; 229 int sx = op->sx; 230 int sy = op->sy; 231 int ix = op->ix; 232 int iy = op->iy; 233 234 _mm_empty(); 235 236 src_line += (sx >> 16)*4 + (sy >> 16)*src_pitch; 237 sx &= 0xffff; 238 sy &= 0xffff; 239 240 for ( h = op->rd.h; h > 0; h-- ) { 241 uint8_t* dst = dst_line; 242 uint8_t* src = src_line; 243 uint8_t* dst_end = dst + 4*op->rd.w; 244 int sx1 = sx; 245 int sy1 = sy; 246 247 for ( ; dst < dst_end; ) { 248 int sx2 = sx1 + ix; 249 int sy2 = sy1 + iy; 250 251 ARGB_DECL_ZERO(); 252 ARGB_DECL(spix); 253 ARGB_DECL(pix); 254 ARGB_ZERO(pix); 255 256 /* the current destination pixel maps to the (sx1,sy1)-(sx2,sy2) 257 * source square, we're going to compute the sum of its pixels' 258 * colors... simple box filtering 259 */ 260 { 261 int gsy, gsx; 262 for ( gsy = 0; gsy < sy2; gsy += 65536 ) { 263 for ( gsx = 0; gsx < sx2; gsx += 65536 ) { 264 uint8_t* s = src + (gsx >> 16)*4 + (gsy >> 16)*src_pitch; 265 int xmin = gsx, xmax = gsx + 65536, ymin = gsy, ymax = gsy + 65536; 266 unsigned ww, hh; 267 unsigned red; 268 269 if (xmin < sx1) xmin = sx1; 270 if (xmax > sx2) xmax = sx2; 271 if (ymin < sy1) ymin = sy1; 272 if (ymax > sy2) ymax = sy2; 273 274 ww = (unsigned)(xmax-xmin); 275 red = ww; 276 277 hh = (unsigned)(ymax-ymin); 278 red = (hh < 65536) ? (red*hh >> 16U) : red; 279 280 ARGB_READ(spix,s); 281 ARGB_REDUCE(spix,red); 282 ARGB_ADD(pix,spix); 283 } 284 } 285 } 286 287 ARGB_RESCALE(pix,scale2); 288 ARGB_WRITE(pix,dst); 289 290 sx1 = sx2; 291 src += (sx1 >> 16)*4; 292 sx1 &= 0xffff; 293 dst += 4; 294 } 295 296 sy += iy; 297 src_line += (sy >> 16)*src_pitch; 298 sy &= 0xffff; 299 300 dst_line += dst_pitch; 301 } 302 ARGB_DONE; 303 } 304 #endif 305 #undef ARGB_SCALE_GENERIC 306 307 308 #ifdef ARGB_SCALE_05_TO_10 309 static inline int cross( int x, int y ) { 310 if (x == 65536 && y == 65536) 311 return 65536; 312 313 return (int)((unsigned)x * (unsigned)y >> 16U); 314 } 315 316 static void 317 scale_05_to_10( ScaleOp* op ) 318 { 319 int dst_pitch = op->dst_pitch; 320 int src_pitch = op->src_pitch; 321 uint8_t* dst_line = op->dst_line; 322 uint8_t* src_line = op->src_line; 323 ARGB_DECL_SCALE(scale2, op->scale); 324 int h; 325 int sx = op->sx; 326 int sy = op->sy; 327 int ix = op->ix; 328 int iy = op->iy; 329 330 _mm_empty(); 331 332 src_line += (sx >> 16)*4 + (sy >> 16)*src_pitch; 333 sx &= 0xffff; 334 sy &= 0xffff; 335 336 for ( h = op->rd.h; h > 0; h-- ) { 337 uint8_t* dst = dst_line; 338 uint8_t* src = src_line; 339 uint8_t* dst_end = dst + 4*op->rd.w; 340 int sx1 = sx; 341 int sy1 = sy; 342 343 for ( ; dst < dst_end; ) { 344 int sx2 = sx1 + ix; 345 int sy2 = sy1 + iy; 346 347 ARGB_DECL_ZERO(); 348 ARGB_DECL2(spix, pix); 349 350 int off = src_pitch; 351 int fx1 = sx1 & 0xffff; 352 int fx2 = sx2 & 0xffff; 353 int fy1 = sy1 & 0xffff; 354 int fy2 = sy2 & 0xffff; 355 356 int center_x = ((sx1 >> 16) + 1) < ((sx2-1) >> 16); 357 int center_y = ((sy1 >> 16) + 1) < ((sy2-1) >> 16); 358 359 ARGB_ZERO(pix); 360 361 if (fx2 == 0) { 362 fx2 = 65536; 363 } 364 if (fy2 == 0) { 365 fy2 = 65536; 366 } 367 fx1 = 65536 - fx1; 368 fy1 = 65536 - fy1; 369 370 /** TOP BAND 371 **/ 372 373 /* top-left pixel */ 374 ARGB_READ(spix,src); 375 ARGB_REDUCE(spix,cross(fx1,fy1)); 376 ARGB_ADD(pix,spix); 377 378 /* top-center pixel, if any */ 379 ARGB_READ(spix,src + 4); 380 if (center_x) { 381 ARGB_REDUCE(spix,fy1); 382 ARGB_ADD(pix,spix); 383 ARGB_READ(spix,src + 8); 384 } 385 386 /* top-right pixel */ 387 ARGB_REDUCE(spix,cross(fx2,fy1)); 388 ARGB_ADD(pix,spix); 389 390 /** MIDDLE BAND, IF ANY 391 **/ 392 if (center_y) { 393 /* left-middle pixel */ 394 ARGB_READ(spix,src + off); 395 ARGB_REDUCE(spix,fx1); 396 ARGB_ADD(pix,spix); 397 398 /* center pixel, if any */ 399 ARGB_READ(spix,src + off + 4); 400 if (center_x) { 401 ARGB_ADD(pix,spix); 402 ARGB_READ(spix,src + off + 8); 403 } 404 405 /* right-middle pixel */ 406 ARGB_REDUCE(spix,fx2); 407 ARGB_ADD(pix,spix); 408 409 off += src_pitch; 410 } 411 412 /** BOTTOM BAND 413 **/ 414 /* left-bottom pixel */ 415 ARGB_READ(spix,src + off); 416 ARGB_REDUCE(spix,cross(fx1,fy2)); 417 ARGB_ADD(pix,spix); 418 419 /* center-bottom, if any */ 420 ARGB_READ(spix,src + off + 4); 421 if (center_x) { 422 ARGB_REDUCE(spix,fy2); 423 ARGB_ADD(pix,spix); 424 ARGB_READ(spix,src + off + 8); 425 } 426 427 /* right-bottom pixel */ 428 ARGB_REDUCE(spix,cross(fx2,fy2)); 429 ARGB_ADD(pix,spix); 430 431 /** WRITE IT 432 **/ 433 ARGB_RESCALE(pix,scale2); 434 ARGB_WRITE(pix,dst); 435 436 sx1 = sx2; 437 src += (sx1 >> 16)*4; 438 sx1 &= 0xffff; 439 dst += 4; 440 } 441 442 sy += iy; 443 src_line += (sy >> 16)*src_pitch; 444 sy &= 0xffff; 445 446 dst_line += dst_pitch; 447 } 448 ARGB_DONE; 449 } 450 #endif 451 #undef ARGB_SCALE_05_TO_10 452 453 454 #ifdef ARGB_SCALE_UP_BILINEAR 455 static void 456 scale_up_bilinear( ScaleOp* op ) 457 { 458 int dst_pitch = op->dst_pitch; 459 int src_pitch = op->src_pitch; 460 uint8_t* dst_line = op->dst_line; 461 uint8_t* src_line = op->src_line; 462 int sx = op->sx; 463 int sy = op->sy; 464 int ix = op->ix; 465 int iy = op->iy; 466 int xlimit, ylimit; 467 int h, sx0; 468 469 _mm_empty(); 470 471 /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */ 472 /* the four nearest source pixels, which are at (0.5,0.5) offsets */ 473 474 sx = sx + ix/2 - 32768; 475 sy = sy + iy/2 - 32768; 476 477 xlimit = (op->src_w-1); 478 ylimit = (op->src_h-1); 479 480 sx0 = sx; 481 482 for ( h = op->rd.h; h > 0; h-- ) { 483 uint8_t* dst = dst_line; 484 uint8_t* dst_end = dst + 4*op->rd.w; 485 486 sx = sx0; 487 for ( ; dst < dst_end; ) { 488 int ex1, ex2, ey1, ey2, alpha; 489 uint8_t* s; 490 491 ARGB_DECL_ZERO(); 492 ARGB_DECL2(spix1,spix2); 493 ARGB_DECL2(pix3,pix4); 494 ARGB_DECL(pix); 495 496 /* find the four neighbours */ 497 ex1 = (sx >> 16); 498 ey1 = (sy >> 16); 499 ex2 = (sx+65535) >> 16; 500 ey2 = (sy+65535) >> 16; 501 502 if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit; 503 if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit; 504 if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit; 505 if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit; 506 507 ex2 = (ex2-ex1)*4; 508 ey2 = (ey2-ey1)*src_pitch; 509 510 /* interpolate */ 511 s = src_line + ex1*4 + ey1*src_pitch; 512 ARGB_READ(spix1, s); 513 ARGB_READ(spix2, s+ex2); 514 515 alpha = (sx >> 8) & 0xff; 516 ARGB_INTERP255(pix3,spix1,spix2,alpha); 517 518 s += ey2; 519 ARGB_READ(spix1, s); 520 ARGB_READ(spix2, s+ex2); 521 522 ARGB_INTERP255(pix4,spix1,spix2,alpha); 523 524 alpha = (sy >> 8) & 0xff; 525 ARGB_INTERP255(pix,pix3,pix4,alpha); 526 527 ARGB_WRITE(pix,dst); 528 529 sx += ix; 530 dst += 4; 531 } 532 533 sy += iy; 534 dst_line += dst_pitch; 535 } 536 ARGB_DONE; 537 } 538 #endif 539 #undef ARGB_SCALE_UP_BILINEAR 540 541 #ifdef ARGB_SCALE_UP_QUICK_4x4 542 static void 543 ARGB_SCALE_UP_QUICK_4x4( ScaleOp* op ) 544 { 545 int dst_pitch = op->dst_pitch; 546 int src_pitch = op->src_pitch; 547 uint8_t* dst_line = op->dst_line; 548 uint8_t* src_line = op->src_line; 549 int sx = op->sx; 550 int sy = op->sy; 551 int ix = op->ix; 552 int iy = op->iy; 553 int xlimit, ylimit; 554 int h, sx0; 555 556 _mm_empty(); 557 558 /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */ 559 /* the four nearest source pixels, which are at (0.5,0.5) offsets */ 560 561 sx = sx + ix/2 - 32768; 562 sy = sy + iy/2 - 32768; 563 564 xlimit = (op->src_w-1); 565 ylimit = (op->src_h-1); 566 567 sx0 = sx; 568 569 for ( h = op->rd.h; h > 0; h-- ) { 570 uint8_t* dst = dst_line; 571 uint8_t* dst_end = dst + 4*op->rd.w; 572 573 sx = sx0; 574 for ( ; dst < dst_end; ) { 575 int ex1, ex2, ey1, ey2; 576 uint8_t* p; 577 ARGB_DECL_ZERO(); 578 ARGB_DECL(pix); 579 ARGB_DECL2(spix1, spix2); 580 ARGB_DECL2(pix3, pix4); 581 582 /* find the four neighbours */ 583 ex1 = (sx >> 16); 584 ey1 = (sy >> 16); 585 ex2 = (sx+65535) >> 16; 586 ey2 = (sy+65535) >> 16; 587 588 if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit; 589 if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit; 590 if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit; 591 if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit; 592 593 /* interpolate */ 594 p = (src_line + ex1*4 + ey1*src_pitch); 595 596 ex2 = (ex2-ex1)*4; 597 ey2 = (ey2-ey1)*src_pitch; 598 599 switch (((sx >> 14) & 3) | ((sy >> 12) & 12)) { 600 case 0: 601 *(uint32_t*)dst = *(uint32_t*)p; 602 break; 603 604 /* top-line is easy */ 605 case 1: 606 ARGB_READ(spix1, p); 607 ARGB_READ(spix2, p+ex2); 608 ARGB_ADDW_31(pix,spix1,spix2); 609 ARGB_SHR(pix,pix,2); 610 ARGB_WRITE(pix, dst); 611 break; 612 613 case 2: 614 ARGB_READ(spix1, p); 615 ARGB_READ(spix2, p+ex2); 616 ARGB_ADDW_11(pix, spix1, spix2); 617 ARGB_SHR(pix,pix,1); 618 ARGB_WRITE(pix, dst); 619 break; 620 621 case 3: 622 ARGB_READ(spix1, p); 623 ARGB_READ(spix2, p+ex2); 624 ARGB_ADDW_13(pix,spix1,spix2); 625 ARGB_SHR(pix,pix,2); 626 ARGB_WRITE(pix, dst); 627 break; 628 629 /* second line is harder */ 630 case 4: 631 ARGB_READ(spix1, p); 632 ARGB_READ(spix2, p+ey2); 633 ARGB_ADDW_31(pix,spix1,spix2); 634 ARGB_SHR(pix,pix,2); 635 ARGB_WRITE(pix, dst); 636 break; 637 638 case 5: 639 ARGB_READ(spix1, p); 640 ARGB_READ(spix2, p+ex2); 641 ARGB_ADDW_31(pix3,spix1,spix2); 642 p += ey2; 643 ARGB_READ(spix1, p); 644 ARGB_READ(spix2, p+ex2); 645 ARGB_ADDW_31(pix4,spix1,spix2); 646 647 ARGB_ADDW_31(pix,pix3,pix4); 648 ARGB_SHR(pix,pix,4); 649 ARGB_WRITE(pix,dst); 650 break; 651 652 case 6: 653 ARGB_READ(spix1, p); 654 ARGB_READ(spix2, p+ex2); 655 ARGB_ADDW_11(pix3,spix1,spix2); 656 p += ey2; 657 ARGB_READ(spix1, p); 658 ARGB_READ(spix2, p+ex2); 659 ARGB_ADDW_11(pix4,spix1,spix2); 660 661 ARGB_ADDW_31(pix,pix3,pix4); 662 ARGB_SHR(pix,pix,3); 663 ARGB_WRITE(pix,dst); 664 break; 665 666 case 7: 667 ARGB_READ(spix1, p); 668 ARGB_READ(spix2, p+ex2); 669 ARGB_ADDW_13(pix3,spix1,spix2); 670 p += ey2; 671 ARGB_READ(spix1, p); 672 ARGB_READ(spix2, p+ex2); 673 ARGB_ADDW_13(pix4,spix1,spix2); 674 675 ARGB_ADDW_31(pix,pix3,pix4); 676 ARGB_SHR(pix,pix,4); 677 ARGB_WRITE(pix,dst); 678 break; 679 680 /* third line */ 681 case 8: 682 ARGB_READ(spix1, p); 683 ARGB_READ(spix2, p+ey2); 684 ARGB_ADDW_11(pix,spix1,spix2); 685 ARGB_SHR(pix,pix,1); 686 ARGB_WRITE(pix, dst); 687 break; 688 689 case 9: 690 ARGB_READ(spix1, p); 691 ARGB_READ(spix2, p+ex2); 692 ARGB_ADDW_31(pix3,spix1,spix2); 693 p += ey2; 694 ARGB_READ(spix1, p); 695 ARGB_READ(spix2, p+ex2); 696 ARGB_ADDW_31(pix4,spix1,spix2); 697 698 ARGB_ADDW_11(pix,pix3,pix4); 699 ARGB_SHR(pix,pix,3); 700 ARGB_WRITE(pix,dst); 701 break; 702 703 case 10: 704 ARGB_READ(spix1, p); 705 ARGB_READ(spix2, p+ex2); 706 ARGB_ADDW_11(pix3,spix1,spix2); 707 p += ey2; 708 ARGB_READ(spix1, p); 709 ARGB_READ(spix2, p+ex2); 710 ARGB_ADDW_11(pix4,spix1,spix2); 711 712 ARGB_ADDW_11(pix,pix3,pix4); 713 ARGB_SHR(pix,pix,2); 714 ARGB_WRITE(pix,dst); 715 break; 716 717 case 11: 718 ARGB_READ(spix1, p); 719 ARGB_READ(spix2, p+ex2); 720 ARGB_ADDW_13(pix3,spix1,spix2); 721 p += ey2; 722 ARGB_READ(spix1, p); 723 ARGB_READ(spix2, p+ex2); 724 ARGB_ADDW_13(pix4,spix1,spix2); 725 726 ARGB_ADDW_11(pix,pix3,pix4); 727 ARGB_SHR(pix,pix,3); 728 ARGB_WRITE(pix,dst); 729 break; 730 731 /* last line */ 732 case 12: 733 ARGB_READ(spix1, p); 734 ARGB_READ(spix2, p+ey2); 735 ARGB_ADDW_13(pix,spix1,spix2); 736 ARGB_SHR(pix,pix,2); 737 ARGB_WRITE(pix, dst); 738 break; 739 740 case 13: 741 ARGB_READ(spix1, p); 742 ARGB_READ(spix2, p+ex2); 743 ARGB_ADDW_31(pix3,spix1,spix2); 744 p += ey2; 745 ARGB_READ(spix1, p); 746 ARGB_READ(spix2, p+ex2); 747 ARGB_ADDW_31(pix4,spix1,spix2); 748 749 ARGB_ADDW_13(pix,pix3,pix4); 750 ARGB_SHR(pix,pix,4); 751 ARGB_WRITE(pix,dst); 752 break; 753 754 case 14: 755 ARGB_READ(spix1, p); 756 ARGB_READ(spix2, p+ex2); 757 ARGB_ADDW_11(pix3,spix1,spix2); 758 p += ey2; 759 ARGB_READ(spix1, p); 760 ARGB_READ(spix2, p+ex2); 761 ARGB_ADDW_11(pix4,spix1,spix2); 762 763 ARGB_ADDW_13(pix,pix3,pix4); 764 ARGB_SHR(pix,pix,3); 765 ARGB_WRITE(pix,dst); 766 break; 767 768 default: 769 ARGB_READ(spix1, p); 770 ARGB_READ(spix2, p+ex2); 771 ARGB_ADDW_13(pix3,spix1,spix2); 772 p += ey2; 773 ARGB_READ(spix1, p); 774 ARGB_READ(spix2, p+ex2); 775 ARGB_ADDW_13(pix4,spix1,spix2); 776 777 ARGB_ADDW_13(pix,pix3,pix4); 778 ARGB_SHR(pix,pix,4); 779 ARGB_WRITE(pix,dst); 780 } 781 sx += ix; 782 dst += 4; 783 } 784 785 sy += iy; 786 dst_line += dst_pitch; 787 } 788 ARGB_DONE; 789 } 790 #endif 791 #undef ARGB_SCALE_UP_QUICK_4x4 792 793 794 #ifdef ARGB_SCALE_NEAREST 795 /* this version scales up with nearest neighbours - looks crap */ 796 static void 797 ARGB_SCALE_NEAREST( ScaleOp* op ) 798 { 799 int dst_pitch = op->dst_pitch; 800 int src_pitch = op->src_pitch; 801 uint8_t* dst_line = op->dst_line; 802 uint8_t* src_line = op->src_line; 803 int sx = op->sx; 804 int sy = op->sy; 805 int ix = op->ix; 806 int iy = op->iy; 807 int xlimit, ylimit; 808 int h, sx0; 809 810 _mm_empty(); 811 812 /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */ 813 /* the four nearest source pixels, which are at (0.5,0.5) offsets */ 814 815 sx = sx + ix/2 - 32768; 816 sy = sy + iy/2 - 32768; 817 818 xlimit = (op->src_w-1); 819 ylimit = (op->src_h-1); 820 821 sx0 = sx; 822 823 for ( h = op->rd.h; h > 0; h-- ) { 824 uint8_t* dst = dst_line; 825 uint8_t* dst_end = dst + 4*op->rd.w; 826 827 sx = sx0; 828 for ( ; dst < dst_end; ) { 829 int ex1, ex2, ey1, ey2; 830 unsigned* p; 831 832 /* find the top-left neighbour */ 833 ex1 = (sx >> 16); 834 ey1 = (sy >> 16); 835 ex2 = ex1+1; 836 ey2 = ey1+1; 837 838 if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit; 839 if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit; 840 if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit; 841 if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit; 842 843 p = (unsigned*)(src_line + ex1*4 + ey1*src_pitch); 844 if ((sx & 0xffff) >= 32768) 845 p += (ex2-ex1); 846 if ((sy & 0xffff) >= 32768) 847 p = (unsigned*)((char*)p + (ey2-ey1)*src_pitch); 848 849 *(unsigned*)dst = p[0]; 850 851 sx += ix; 852 dst += 4; 853 } 854 855 sy += iy; 856 dst_line += dst_pitch; 857 } 858 } 859 #endif 860 #undef ARGB_SCALE_NEAREST 861