Home | History | Annotate | Download | only in skin
      1 /* Copyright (C) 2007-2008 The Android Open Source Project
      2 **
      3 ** This software is licensed under the terms of the GNU General Public
      4 ** License version 2, as published by the Free Software Foundation, and
      5 ** may be copied, distributed, and modified under those terms.
      6 **
      7 ** This program is distributed in the hope that it will be useful,
      8 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
      9 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     10 ** GNU General Public License for more details.
     11 */
     12 /* this file contains template code and may be included multiple times */
     13 
     14 #ifndef ARGB_T_DEFINED
     15 #define ARGB_T_DEFINED
     16 
     17 #if USE_MMX
     18 #include <mmintrin.h>
     19 
     20 typedef __m64   mmx_t;
     21 typedef  mmx_t  argb_t;
     22 
     23 static inline mmx_t
     24 mmx_load8888( unsigned  value, mmx_t  zero )
     25 {
     26     return _mm_unpacklo_pi8( _mm_cvtsi32_si64 (value), zero);
     27 }
     28 
     29 static inline unsigned
     30 mmx_save8888( mmx_t   argb, mmx_t  zero )
     31 {
     32     return (unsigned) _mm_cvtsi64_si32( _mm_packs_pu16( argb, zero ) );
     33 }
     34 
     35 static inline mmx_t
     36 mmx_expand16( int  value )
     37 {
     38     mmx_t  t1 = _mm_cvtsi32_si64( value );
     39     return _mm_packs_pi32( t1, t1 );
     40 }
     41 
     42 static inline mmx_t
     43 mmx_mulshift( mmx_t   argb, int  multiplier, int  rshift, mmx_t  zero )
     44 {
     45     mmx_t   ar   = _mm_unpackhi_pi16(argb, zero );
     46     mmx_t   gb   = _mm_unpacklo_pi16(argb, zero );
     47     mmx_t   mult = mmx_expand16(multiplier);
     48 
     49     ar = _mm_srli_pi32( _mm_madd_pi16( ar, mult ), rshift );
     50     gb = _mm_srli_pi32( _mm_madd_pi16( gb, mult ), rshift );
     51 
     52     return _mm_packs_pi32( gb, ar );
     53 }
     54 
     55 static inline mmx_t
     56 mmx_interp255( mmx_t  m1, mmx_t  m2, mmx_t  zero, int  alpha )
     57 {
     58     mmx_t  mult, mult2, t1, t2, r1, r2;
     59 
     60     // m1 = [ a1 | r1 | g1 | b1 ]
     61     // m2 = [ a2 | r2 | g2 | b2 ]
     62     alpha = (alpha << 16) | (alpha ^ 255);
     63     mult  = _mm_cvtsi32_si64( alpha );                   // mult  = [  0  |  0  |  a  | 1-a ]
     64     mult2 = _mm_slli_si64( mult, 32 );                   // mult2 = [  a  | 1-a |  0  |  0  ]
     65     mult  = _mm_or_si64( mult, mult2 );                  // mults = [  a  | 1-a |  a  | 1-a ]
     66 
     67     t1 = _mm_unpackhi_pi16( m1, m2 );    // t1 = [ a2 | a1 | r2 | r1 ]
     68     r1 = _mm_madd_pi16( t1, mult );      // r1 = [   ra    |    rr   ]
     69 
     70     t2 = _mm_unpacklo_pi16( m1, m2 );    // t1 = [ g2 | g1 | b2 | b1 ]
     71     r2 = _mm_madd_pi16( t2, mult );      // r2 = [   rg    |    rb   ]
     72 
     73     r1 = _mm_srli_pi32( r1, 8 );
     74     r2 = _mm_srli_pi32( r2, 8 );
     75 
     76     return  _mm_packs_pi32( r2, r1 );
     77 }
     78 
     79 #define   ARGB_DECL_ZERO()      mmx_t    _zero = _mm_setzero_si64()
     80 #define   ARGB_DECL(x)          mmx_t    x
     81 #define   ARGB_DECL2(x1,x2)     mmx_t    x1, x2
     82 #define   ARGB_ZERO(x)          x = _zero
     83 #define   ARGB_UNPACK(x,v)      x =  mmx_load8888((v), _zero)
     84 #define   ARGB_PACK(x)          mmx_save8888(x, _zero)
     85 #define   ARGB_COPY(x,y)        x = y
     86 #define   ARGB_SUM(x1,x2,x3)    x1 = _mm_add_pi32(x2, x3)
     87 #define   ARGB_REDUCE(x,red)   \
     88     ({ \
     89         int  _red = (red) >> 8;  \
     90         if (_red < 256) \
     91             x = mmx_mulshift( x, _red, 8, _zero ); \
     92     })
     93 
     94 #define  ARGB_INTERP255(x1,x2,x3,alpha)  \
     95     x1 = mmx_interp255( x2, x3, _zero, (alpha))
     96 
     97 #define    ARGB_ADDW_11(x1,x2,x3)  \
     98     ARGB_SUM(x1,x2,x3)
     99 
    100 #define    ARGB_ADDW_31(x1,x2,x3)  \
    101     ({ \
    102         mmx_t   _t1 = _mm_add_pi16(x2, x3);  \
    103         mmx_t   _t2 = _mm_slli_pi16(x2, 1);  \
    104         x1 = _mm_add_pi16(_t1, _t2);  \
    105     })
    106 
    107 #define    ARGB_ADDW_13(x1,x2,x3)  \
    108     ({ \
    109         mmx_t   _t1 = _mm_add_pi16(x2, x3);  \
    110         mmx_t   _t2 = _mm_slli_pi16(x3, 1);  \
    111         x1 = _mm_add_pi16(_t1, _t2);  \
    112     })
    113 
    114 #define    ARGB_SHR(x1,x2,s)   \
    115     x1 = _mm_srli_pi16(x2, s)
    116 
    117 
    118 #define    ARGB_MULSHIFT(x1,x2,v,s)   \
    119     x1 = mmx_mulshift(x2, v, s, _zero)
    120 
    121 #define   ARGB_BEGIN _mm_empty()
    122 #define   ARGB_DONE  _mm_empty()
    123 
    124 #define   ARGB_RESCALE_SHIFT      10
    125 #define   ARGB_DECL_SCALE(s2,s)   int   s2 = (int)((s)*(s)*(1 << ARGB_RESCALE_SHIFT))
    126 #define   ARGB_RESCALE(x,s2)      x = mmx_mulshift( x, s2, ARGB_RESCALE_SHIFT, _zero )
    127 
    128 #else /* !USE_MMX */
    129 
    130 typedef uint32_t    argb_t;
    131 
    132 #define  ARGB_DECL_ZERO()   /* nothing */
    133 #define  ARGB_DECL(x)       argb_t    x##_ag, x##_rb
    134 #define  ARGB_DECL2(x1,x2)  argb_t    x1##_ag, x1##_rb, x2##_ag, x2##_rb
    135 #define  ARGB_ZERO(x)       (x##_ag = x##_rb = 0)
    136 #define  ARGB_COPY(x,y)     (x##_ag = y##_ag, x##_rb = y##_rb)
    137 
    138 #define  ARGB_UNPACK(x,v)  \
    139     ({ \
    140         argb_t  _v = (argb_t)(v); \
    141         x##_ag = (_v >> 8) & 0xff00ff; \
    142         x##_rb = (_v)      & 0xff00ff; \
    143     })
    144 
    145 #define  ARGB_PACK(x)      (uint32_t)(((x##_ag) << 8) | x##_rb)
    146 
    147 #define   ARGB_SUM(x1,x2,x3)  \
    148     ({ \
    149         x1##_ag = x2##_ag + x3##_ag; \
    150         x1##_rb = x2##_rb + x3##_rb; \
    151     })
    152 
    153 #define   ARGB_REDUCE(x,red)   \
    154     ({ \
    155         int  _red = (red) >> 8; \
    156         if (_red < 256) { \
    157             x##_ag = ((x##_ag*_red) >> 8) & 0xff00ff; \
    158             x##_rb = ((x##_rb*_red) >> 8) & 0xff00ff; \
    159         } \
    160     })
    161 
    162 #define    ARGB_INTERP255(x1,x2,x3,alpha)  \
    163     ({ \
    164         int  _alpha = (alpha); \
    165         int  _ialpha; \
    166         _alpha += _alpha >> 8; \
    167         _ialpha = 256 - _alpha; \
    168         x1##_ag = ((x2##_ag*_ialpha + x3##_ag*_alpha) >> 8) & 0xff00ff;  \
    169         x1##_rb = ((x2##_rb*_ialpha + x3##_rb*_alpha) >> 8) & 0xff00ff;  \
    170     })
    171 
    172 #define    ARGB_ADDW_11(x1,x2,x3)  \
    173     ({ \
    174         x1##_ag = (x2##_ag + x3##_ag);  \
    175         x1##_rb = (x2##_rb + x3##_rb);  \
    176     })
    177 
    178 #define    ARGB_ADDW_31(x1,x2,x3)  \
    179     ({ \
    180         x1##_ag = (3*x2##_ag + x3##_ag);  \
    181         x1##_rb = (3*x2##_rb + x3##_rb);  \
    182     })
    183 
    184 #define    ARGB_ADDW_13(x1,x2,x3)  \
    185     ({ \
    186         x1##_ag = (x2##_ag + 3*x3##_ag);  \
    187         x1##_rb = (x2##_rb + 3*x3##_rb);  \
    188     })
    189 
    190 #define    ARGB_MULSHIFT(x1,x2,v,s)   \
    191     ({ \
    192         unsigned  _vv = (v);  \
    193         x1##_ag = ((x2##_ag * _vv) >> (s)) & 0xff00ff;  \
    194         x1##_rb = ((x2##_rb * _vv) >> (s)) & 0xff00ff;  \
    195     })
    196 
    197 #define   ARGB_SHR(x1,x2,s)  \
    198     ({  \
    199         int  _s = (s);  \
    200         x1##_ag = (x2##_ag >> _s) & 0xff00ff; \
    201         x1##_rb = (x2##_rb >> _s) & 0xff00ff; \
    202     })
    203 
    204 #define   ARGB_BEGIN ((void)0)
    205 #define   ARGB_DONE  ((void)0)
    206 
    207 #define   ARGB_RESCALE_SHIFT      8
    208 #define   ARGB_DECL_SCALE(s2,s)   int   s2 = (int)((s)*(s)*(1 << ARGB_RESCALE_SHIFT))
    209 #define   ARGB_RESCALE(x,scale2)  ARGB_MULSHIFT(x,x,scale2,ARGB_RESCALE_SHIFT)
    210 
    211 #endif /* !USE_MMX */
    212 
    213 #define   ARGB_ADD(x1,x2)     ARGB_SUM(x1,x1,x2)
    214 #define   ARGB_READ(x,p)      ARGB_UNPACK(x,*(uint32_t*)(p))
    215 #define   ARGB_WRITE(x,p)     *(uint32_t*)(p) = ARGB_PACK(x)
    216 
    217 #endif /* !ARGB_T_DEFINED */
    218 
    219 
    220 
    221 #ifdef ARGB_SCALE_GENERIC
    222 static void
    223 ARGB_SCALE_GENERIC( ScaleOp*   op )
    224 {
    225     int        dst_pitch = op->dst_pitch;
    226     int        src_pitch = op->src_pitch;
    227     uint8_t*   dst_line  = op->dst_line;
    228     uint8_t*   src_line  = op->src_line;
    229     ARGB_DECL_SCALE(scale2, op->scale);
    230     int        h;
    231     int        sx = op->sx;
    232     int        sy = op->sy;
    233     int        ix = op->ix;
    234     int        iy = op->iy;
    235 
    236     ARGB_BEGIN;
    237 
    238     src_line += (sx >> 16)*4 + (sy >> 16)*src_pitch;
    239     sx       &= 0xffff;
    240     sy       &= 0xffff;
    241 
    242     for ( h = op->rd.h; h > 0; h-- ) {
    243         uint8_t*  dst = dst_line;
    244         uint8_t*  src = src_line;
    245         uint8_t*  dst_end = dst + 4*op->rd.w;
    246         int       sx1 = sx;
    247         int       sy1 = sy;
    248 
    249         for ( ; dst < dst_end; ) {
    250             int  sx2 = sx1 + ix;
    251             int  sy2 = sy1 + iy;
    252 
    253             ARGB_DECL_ZERO();
    254             ARGB_DECL(spix);
    255             ARGB_DECL(pix);
    256             ARGB_ZERO(pix);
    257 
    258             /* the current destination pixel maps to the (sx1,sy1)-(sx2,sy2)
    259             * source square, we're going to compute the sum of its pixels'
    260             * colors...  simple box filtering
    261             */
    262             {
    263                 int  gsy, gsx;
    264                 for ( gsy = 0; gsy < sy2; gsy += 65536 ) {
    265                     for ( gsx = 0; gsx < sx2; gsx += 65536 ) {
    266                         uint8_t*  s    = src + (gsx >> 16)*4 + (gsy >> 16)*src_pitch;
    267                         int       xmin = gsx, xmax = gsx + 65536, ymin = gsy, ymax = gsy + 65536;
    268                         unsigned  ww, hh;
    269                         unsigned  red;
    270 
    271                         if (xmin < sx1) xmin = sx1;
    272                         if (xmax > sx2) xmax = sx2;
    273                         if (ymin < sy1) ymin = sy1;
    274                         if (ymax > sy2) ymax = sy2;
    275 
    276                         ww = (unsigned)(xmax-xmin);
    277                         red = ww;
    278 
    279                         hh = (unsigned)(ymax-ymin);
    280                         red = (hh < 65536) ? (red*hh >> 16U) : red;
    281 
    282                         ARGB_READ(spix,s);
    283                         ARGB_REDUCE(spix,red);
    284                         ARGB_ADD(pix,spix);
    285                     }
    286                 }
    287             }
    288 
    289             ARGB_RESCALE(pix,scale2);
    290             ARGB_WRITE(pix,dst);
    291 
    292             sx1  = sx2;
    293             src += (sx1 >> 16)*4;
    294             sx1 &= 0xffff;
    295             dst += 4;
    296         }
    297 
    298         sy       += iy;
    299         src_line += (sy >> 16)*src_pitch;
    300         sy       &= 0xffff;
    301 
    302         dst_line += dst_pitch;
    303     }
    304     ARGB_DONE;
    305 }
    306 #endif
    307 #undef  ARGB_SCALE_GENERIC
    308 
    309 
    310 #ifdef ARGB_SCALE_05_TO_10
    311 static inline int cross( int  x, int  y ) {
    312     if (x == 65536 && y == 65536)
    313         return 65536;
    314 
    315     return (int)((unsigned)x * (unsigned)y >> 16U);
    316 }
    317 
    318 static void
    319 scale_05_to_10( ScaleOp*   op )
    320 {
    321     int        dst_pitch = op->dst_pitch;
    322     int        src_pitch = op->src_pitch;
    323     uint8_t*   dst_line  = op->dst_line;
    324     uint8_t*   src_line  = op->src_line;
    325     ARGB_DECL_SCALE(scale2, op->scale);
    326     int        h;
    327     int        sx = op->sx;
    328     int        sy = op->sy;
    329     int        ix = op->ix;
    330     int        iy = op->iy;
    331 
    332     ARGB_BEGIN;
    333 
    334     src_line += (sx >> 16)*4 + (sy >> 16)*src_pitch;
    335     sx       &= 0xffff;
    336     sy       &= 0xffff;
    337 
    338     for ( h = op->rd.h; h > 0; h-- ) {
    339         uint8_t*  dst = dst_line;
    340         uint8_t*  src = src_line;
    341         uint8_t*  dst_end = dst + 4*op->rd.w;
    342         int       sx1 = sx;
    343         int       sy1 = sy;
    344 
    345         for ( ; dst < dst_end; ) {
    346             int  sx2 = sx1 + ix;
    347             int  sy2 = sy1 + iy;
    348 
    349             ARGB_DECL_ZERO();
    350             ARGB_DECL2(spix, pix);
    351 
    352             int      off = src_pitch;
    353             int      fx1 = sx1 & 0xffff;
    354             int      fx2 = sx2 & 0xffff;
    355             int      fy1 = sy1 & 0xffff;
    356             int      fy2 = sy2 & 0xffff;
    357 
    358             int      center_x = ((sx1 >> 16) + 1) < ((sx2-1) >> 16);
    359             int      center_y = ((sy1 >> 16) + 1) < ((sy2-1) >> 16);
    360 
    361             ARGB_ZERO(pix);
    362 
    363             if (fx2 == 0) {
    364                 fx2  = 65536;
    365             }
    366             if (fy2 == 0) {
    367                 fy2  = 65536;
    368             }
    369             fx1 = 65536 - fx1;
    370             fy1 = 65536 - fy1;
    371 
    372             /** TOP BAND
    373              **/
    374 
    375             /* top-left pixel */
    376             ARGB_READ(spix,src);
    377             ARGB_REDUCE(spix,cross(fx1,fy1));
    378             ARGB_ADD(pix,spix);
    379 
    380             /* top-center pixel, if any */
    381             ARGB_READ(spix,src + 4);
    382             if (center_x) {
    383                 ARGB_REDUCE(spix,fy1);
    384                 ARGB_ADD(pix,spix);
    385                 ARGB_READ(spix,src + 8);
    386             }
    387 
    388             /* top-right pixel */
    389             ARGB_REDUCE(spix,cross(fx2,fy1));
    390             ARGB_ADD(pix,spix);
    391 
    392             /** MIDDLE BAND, IF ANY
    393              **/
    394             if (center_y) {
    395                 /* left-middle pixel */
    396                 ARGB_READ(spix,src + off);
    397                 ARGB_REDUCE(spix,fx1);
    398                 ARGB_ADD(pix,spix);
    399 
    400                 /* center pixel, if any */
    401                 ARGB_READ(spix,src + off + 4);
    402                 if (center_x) {
    403                     ARGB_ADD(pix,spix);
    404                     ARGB_READ(spix,src + off + 8);
    405                 }
    406 
    407                 /* right-middle pixel */
    408                 ARGB_REDUCE(spix,fx2);
    409                 ARGB_ADD(pix,spix);
    410 
    411                 off += src_pitch;
    412             }
    413 
    414             /** BOTTOM BAND
    415              **/
    416             /* left-bottom pixel */
    417             ARGB_READ(spix,src + off);
    418             ARGB_REDUCE(spix,cross(fx1,fy2));
    419             ARGB_ADD(pix,spix);
    420 
    421             /* center-bottom, if any */
    422             ARGB_READ(spix,src + off + 4);
    423             if (center_x) {
    424                 ARGB_REDUCE(spix,fy2);
    425                 ARGB_ADD(pix,spix);
    426                 ARGB_READ(spix,src + off + 8);
    427             }
    428 
    429             /* right-bottom pixel */
    430             ARGB_REDUCE(spix,cross(fx2,fy2));
    431             ARGB_ADD(pix,spix);
    432 
    433             /** WRITE IT
    434              **/
    435             ARGB_RESCALE(pix,scale2);
    436             ARGB_WRITE(pix,dst);
    437 
    438             sx1  = sx2;
    439             src += (sx1 >> 16)*4;
    440             sx1 &= 0xffff;
    441             dst += 4;
    442         }
    443 
    444         sy       += iy;
    445         src_line += (sy >> 16)*src_pitch;
    446         sy       &= 0xffff;
    447 
    448         dst_line += dst_pitch;
    449     }
    450     ARGB_DONE;
    451 }
    452 #endif
    453 #undef ARGB_SCALE_05_TO_10
    454 
    455 
    456 #ifdef ARGB_SCALE_UP_BILINEAR
    457 static void
    458 scale_up_bilinear( ScaleOp*  op )
    459 {
    460     int        dst_pitch = op->dst_pitch;
    461     int        src_pitch = op->src_pitch;
    462     uint8_t*   dst_line  = op->dst_line;
    463     uint8_t*   src_line  = op->src_line;
    464     int        sx = op->sx;
    465     int        sy = op->sy;
    466     int        ix = op->ix;
    467     int        iy = op->iy;
    468     int        xlimit, ylimit;
    469     int        h, sx0;
    470 
    471     ARGB_BEGIN;
    472 
    473     /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */
    474     /* the four nearest source pixels, which are at (0.5,0.5) offsets */
    475 
    476     sx = sx + ix/2 - 32768;
    477     sy = sy + iy/2 - 32768;
    478 
    479     xlimit = (op->src_w-1);
    480     ylimit = (op->src_h-1);
    481 
    482     sx0 = sx;
    483 
    484     for ( h = op->rd.h; h > 0; h-- ) {
    485         uint8_t*  dst = dst_line;
    486         uint8_t*  dst_end = dst + 4*op->rd.w;
    487 
    488         sx = sx0;
    489         for ( ; dst < dst_end; ) {
    490             int        ex1, ex2, ey1, ey2, alpha;
    491             uint8_t*   s;
    492 
    493             ARGB_DECL_ZERO();
    494             ARGB_DECL2(spix1,spix2);
    495             ARGB_DECL2(pix3,pix4);
    496             ARGB_DECL(pix);
    497 
    498             /* find the four neighbours */
    499             ex1 = (sx >> 16);
    500             ey1 = (sy >> 16);
    501             ex2 = (sx+65535) >> 16;
    502             ey2 = (sy+65535) >> 16;
    503 
    504             if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit;
    505             if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit;
    506             if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit;
    507             if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit;
    508 
    509             ex2 = (ex2-ex1)*4;
    510             ey2 = (ey2-ey1)*src_pitch;
    511 
    512             /* interpolate */
    513             s   = src_line + ex1*4 + ey1*src_pitch;
    514             ARGB_READ(spix1, s);
    515             ARGB_READ(spix2, s+ex2);
    516 
    517             alpha  = (sx >> 8) & 0xff;
    518             ARGB_INTERP255(pix3,spix1,spix2,alpha);
    519 
    520             s  += ey2;
    521             ARGB_READ(spix1, s);
    522             ARGB_READ(spix2, s+ex2);
    523 
    524             ARGB_INTERP255(pix4,spix1,spix2,alpha);
    525 
    526             alpha = (sy >> 8) & 0xff;
    527             ARGB_INTERP255(pix,pix3,pix4,alpha);
    528 
    529             ARGB_WRITE(pix,dst);
    530 
    531             sx  += ix;
    532             dst += 4;
    533         }
    534 
    535         sy       += iy;
    536         dst_line += dst_pitch;
    537     }
    538     ARGB_DONE;
    539 }
    540 #endif
    541 #undef ARGB_SCALE_UP_BILINEAR
    542 
    543 #ifdef ARGB_SCALE_UP_QUICK_4x4
    544 static void
    545 ARGB_SCALE_UP_QUICK_4x4( ScaleOp*  op )
    546 {
    547     int        dst_pitch = op->dst_pitch;
    548     int        src_pitch = op->src_pitch;
    549     uint8_t*   dst_line  = op->dst_line;
    550     uint8_t*   src_line  = op->src_line;
    551     int        sx = op->sx;
    552     int        sy = op->sy;
    553     int        ix = op->ix;
    554     int        iy = op->iy;
    555     int        xlimit, ylimit;
    556     int        h, sx0;
    557 
    558     ARGB_BEGIN;
    559 
    560     /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */
    561     /* the four nearest source pixels, which are at (0.5,0.5) offsets */
    562 
    563     sx = sx + ix/2 - 32768;
    564     sy = sy + iy/2 - 32768;
    565 
    566     xlimit = (op->src_w-1);
    567     ylimit = (op->src_h-1);
    568 
    569     sx0 = sx;
    570 
    571     for ( h = op->rd.h; h > 0; h-- ) {
    572         uint8_t*  dst = dst_line;
    573         uint8_t*  dst_end = dst + 4*op->rd.w;
    574 
    575         sx = sx0;
    576         for ( ; dst < dst_end; ) {
    577             int        ex1, ex2, ey1, ey2;
    578             uint8_t*   p;
    579             ARGB_DECL_ZERO();
    580             ARGB_DECL(pix);
    581             ARGB_DECL2(spix1, spix2);
    582             ARGB_DECL2(pix3, pix4);
    583 
    584             /* find the four neighbours */
    585             ex1 = (sx >> 16);
    586             ey1 = (sy >> 16);
    587             ex2 = (sx+65535) >> 16;
    588             ey2 = (sy+65535) >> 16;
    589 
    590             if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit;
    591             if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit;
    592             if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit;
    593             if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit;
    594 
    595             /* interpolate */
    596             p   = (src_line + ex1*4 + ey1*src_pitch);
    597 
    598             ex2 = (ex2-ex1)*4;
    599             ey2 = (ey2-ey1)*src_pitch;
    600 
    601             switch (((sx >> 14) & 3) | ((sy >> 12) & 12)) {
    602                 case 0:
    603                     *(uint32_t*)dst = *(uint32_t*)p;
    604                     break;
    605 
    606                 /* top-line is easy */
    607                 case 1:
    608                     ARGB_READ(spix1, p);
    609                     ARGB_READ(spix2, p+ex2);
    610                     ARGB_ADDW_31(pix,spix1,spix2);
    611                     ARGB_SHR(pix,pix,2);
    612                     ARGB_WRITE(pix, dst);
    613                     break;
    614 
    615                 case 2:
    616                     ARGB_READ(spix1, p);
    617                     ARGB_READ(spix2, p+ex2);
    618                     ARGB_ADDW_11(pix, spix1, spix2);
    619                     ARGB_SHR(pix,pix,1);
    620                     ARGB_WRITE(pix, dst);
    621                     break;
    622 
    623                 case 3:
    624                     ARGB_READ(spix1, p);
    625                     ARGB_READ(spix2, p+ex2);
    626                     ARGB_ADDW_13(pix,spix1,spix2);
    627                     ARGB_SHR(pix,pix,2);
    628                     ARGB_WRITE(pix, dst);
    629                     break;
    630 
    631                 /* second line is harder */
    632                 case 4:
    633                     ARGB_READ(spix1, p);
    634                     ARGB_READ(spix2, p+ey2);
    635                     ARGB_ADDW_31(pix,spix1,spix2);
    636                     ARGB_SHR(pix,pix,2);
    637                     ARGB_WRITE(pix, dst);
    638                     break;
    639 
    640                 case 5:
    641                     ARGB_READ(spix1, p);
    642                     ARGB_READ(spix2, p+ex2);
    643                     ARGB_ADDW_31(pix3,spix1,spix2);
    644                     p += ey2;
    645                     ARGB_READ(spix1, p);
    646                     ARGB_READ(spix2, p+ex2);
    647                     ARGB_ADDW_31(pix4,spix1,spix2);
    648 
    649                     ARGB_ADDW_31(pix,pix3,pix4);
    650                     ARGB_SHR(pix,pix,4);
    651                     ARGB_WRITE(pix,dst);
    652                     break;
    653 
    654                 case 6:
    655                     ARGB_READ(spix1, p);
    656                     ARGB_READ(spix2, p+ex2);
    657                     ARGB_ADDW_11(pix3,spix1,spix2);
    658                     p += ey2;
    659                     ARGB_READ(spix1, p);
    660                     ARGB_READ(spix2, p+ex2);
    661                     ARGB_ADDW_11(pix4,spix1,spix2);
    662 
    663                     ARGB_ADDW_31(pix,pix3,pix4);
    664                     ARGB_SHR(pix,pix,3);
    665                     ARGB_WRITE(pix,dst);
    666                     break;
    667 
    668                 case 7:
    669                     ARGB_READ(spix1, p);
    670                     ARGB_READ(spix2, p+ex2);
    671                     ARGB_ADDW_13(pix3,spix1,spix2);
    672                     p += ey2;
    673                     ARGB_READ(spix1, p);
    674                     ARGB_READ(spix2, p+ex2);
    675                     ARGB_ADDW_13(pix4,spix1,spix2);
    676 
    677                     ARGB_ADDW_31(pix,pix3,pix4);
    678                     ARGB_SHR(pix,pix,4);
    679                     ARGB_WRITE(pix,dst);
    680                     break;
    681 
    682                  /* third line */
    683                 case 8:
    684                     ARGB_READ(spix1, p);
    685                     ARGB_READ(spix2, p+ey2);
    686                     ARGB_ADDW_11(pix,spix1,spix2);
    687                     ARGB_SHR(pix,pix,1);
    688                     ARGB_WRITE(pix, dst);
    689                     break;
    690 
    691                 case 9:
    692                     ARGB_READ(spix1, p);
    693                     ARGB_READ(spix2, p+ex2);
    694                     ARGB_ADDW_31(pix3,spix1,spix2);
    695                     p += ey2;
    696                     ARGB_READ(spix1, p);
    697                     ARGB_READ(spix2, p+ex2);
    698                     ARGB_ADDW_31(pix4,spix1,spix2);
    699 
    700                     ARGB_ADDW_11(pix,pix3,pix4);
    701                     ARGB_SHR(pix,pix,3);
    702                     ARGB_WRITE(pix,dst);
    703                     break;
    704 
    705                 case 10:
    706                     ARGB_READ(spix1, p);
    707                     ARGB_READ(spix2, p+ex2);
    708                     ARGB_ADDW_11(pix3,spix1,spix2);
    709                     p += ey2;
    710                     ARGB_READ(spix1, p);
    711                     ARGB_READ(spix2, p+ex2);
    712                     ARGB_ADDW_11(pix4,spix1,spix2);
    713 
    714                     ARGB_ADDW_11(pix,pix3,pix4);
    715                     ARGB_SHR(pix,pix,2);
    716                     ARGB_WRITE(pix,dst);
    717                     break;
    718 
    719                 case 11:
    720                     ARGB_READ(spix1, p);
    721                     ARGB_READ(spix2, p+ex2);
    722                     ARGB_ADDW_13(pix3,spix1,spix2);
    723                     p += ey2;
    724                     ARGB_READ(spix1, p);
    725                     ARGB_READ(spix2, p+ex2);
    726                     ARGB_ADDW_13(pix4,spix1,spix2);
    727 
    728                     ARGB_ADDW_11(pix,pix3,pix4);
    729                     ARGB_SHR(pix,pix,3);
    730                     ARGB_WRITE(pix,dst);
    731                     break;
    732 
    733                  /* last line */
    734                 case 12:
    735                     ARGB_READ(spix1, p);
    736                     ARGB_READ(spix2, p+ey2);
    737                     ARGB_ADDW_13(pix,spix1,spix2);
    738                     ARGB_SHR(pix,pix,2);
    739                     ARGB_WRITE(pix, dst);
    740                     break;
    741 
    742                 case 13:
    743                     ARGB_READ(spix1, p);
    744                     ARGB_READ(spix2, p+ex2);
    745                     ARGB_ADDW_31(pix3,spix1,spix2);
    746                     p += ey2;
    747                     ARGB_READ(spix1, p);
    748                     ARGB_READ(spix2, p+ex2);
    749                     ARGB_ADDW_31(pix4,spix1,spix2);
    750 
    751                     ARGB_ADDW_13(pix,pix3,pix4);
    752                     ARGB_SHR(pix,pix,4);
    753                     ARGB_WRITE(pix,dst);
    754                     break;
    755 
    756                 case 14:
    757                     ARGB_READ(spix1, p);
    758                     ARGB_READ(spix2, p+ex2);
    759                     ARGB_ADDW_11(pix3,spix1,spix2);
    760                     p += ey2;
    761                     ARGB_READ(spix1, p);
    762                     ARGB_READ(spix2, p+ex2);
    763                     ARGB_ADDW_11(pix4,spix1,spix2);
    764 
    765                     ARGB_ADDW_13(pix,pix3,pix4);
    766                     ARGB_SHR(pix,pix,3);
    767                     ARGB_WRITE(pix,dst);
    768                     break;
    769 
    770                 default:
    771                     ARGB_READ(spix1, p);
    772                     ARGB_READ(spix2, p+ex2);
    773                     ARGB_ADDW_13(pix3,spix1,spix2);
    774                     p += ey2;
    775                     ARGB_READ(spix1, p);
    776                     ARGB_READ(spix2, p+ex2);
    777                     ARGB_ADDW_13(pix4,spix1,spix2);
    778 
    779                     ARGB_ADDW_13(pix,pix3,pix4);
    780                     ARGB_SHR(pix,pix,4);
    781                     ARGB_WRITE(pix,dst);
    782             }
    783             sx  += ix;
    784             dst += 4;
    785         }
    786 
    787         sy       += iy;
    788         dst_line += dst_pitch;
    789     }
    790     ARGB_DONE;
    791 }
    792 #endif
    793 #undef  ARGB_SCALE_UP_QUICK_4x4
    794 
    795 
    796 #ifdef ARGB_SCALE_NEAREST
    797 /* this version scales up with nearest neighbours - looks crap */
    798 static void
    799 ARGB_SCALE_NEAREST( ScaleOp*  op )
    800 {
    801     int        dst_pitch = op->dst_pitch;
    802     int        src_pitch = op->src_pitch;
    803     uint8_t*   dst_line  = op->dst_line;
    804     uint8_t*   src_line  = op->src_line;
    805     int        sx = op->sx;
    806     int        sy = op->sy;
    807     int        ix = op->ix;
    808     int        iy = op->iy;
    809     int        xlimit, ylimit;
    810     int        h, sx0;
    811 
    812     ARGB_BEGIN;
    813 
    814     /* the center pixel is at (sx+ix/2, sy+iy/2), we then want to get */
    815     /* the four nearest source pixels, which are at (0.5,0.5) offsets */
    816 
    817     sx = sx + ix/2 - 32768;
    818     sy = sy + iy/2 - 32768;
    819 
    820     xlimit = (op->src_w-1);
    821     ylimit = (op->src_h-1);
    822 
    823     sx0 = sx;
    824 
    825     for ( h = op->rd.h; h > 0; h-- ) {
    826         uint8_t*  dst = dst_line;
    827         uint8_t*  dst_end = dst + 4*op->rd.w;
    828 
    829         sx = sx0;
    830         for ( ; dst < dst_end; ) {
    831             int        ex1, ex2, ey1, ey2;
    832             unsigned*  p;
    833 
    834             /* find the top-left neighbour */
    835             ex1 = (sx >> 16);
    836             ey1 = (sy >> 16);
    837             ex2 = ex1+1;
    838             ey2 = ey1+1;
    839 
    840             if (ex1 < 0) ex1 = 0; else if (ex1 > xlimit) ex1 = xlimit;
    841             if (ey1 < 0) ey1 = 0; else if (ey1 > ylimit) ey1 = ylimit;
    842             if (ex2 < 0) ex2 = 0; else if (ex2 > xlimit) ex2 = xlimit;
    843             if (ey2 < 0) ey2 = 0; else if (ey2 > ylimit) ey2 = ylimit;
    844 
    845             p   = (unsigned*)(src_line + ex1*4 + ey1*src_pitch);
    846             if ((sx & 0xffff) >= 32768)
    847                 p += (ex2-ex1);
    848             if ((sy & 0xffff) >= 32768)
    849                 p = (unsigned*)((char*)p + (ey2-ey1)*src_pitch);
    850 
    851             *(unsigned*)dst = p[0];
    852 
    853             sx  += ix;
    854             dst += 4;
    855         }
    856 
    857         sy       += iy;
    858         dst_line += dst_pitch;
    859     }
    860 }
    861 #endif
    862 #undef  ARGB_SCALE_NEAREST
    863