Home | History | Annotate | Download | only in codeflinger
      1 /* libs/pixelflinger/codeflinger/blending.cpp
      2 **
      3 ** Copyright 2006, The Android Open Source Project
      4 **
      5 ** Licensed under the Apache License, Version 2.0 (the "License");
      6 ** you may not use this file except in compliance with the License.
      7 ** You may obtain a copy of the License at
      8 **
      9 **     http://www.apache.org/licenses/LICENSE-2.0
     10 **
     11 ** Unless required by applicable law or agreed to in writing, software
     12 ** distributed under the License is distributed on an "AS IS" BASIS,
     13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 ** See the License for the specific language governing permissions and
     15 ** limitations under the License.
     16 */
     17 
     18 #include <assert.h>
     19 #include <stdint.h>
     20 #include <stdlib.h>
     21 #include <stdio.h>
     22 #include <sys/types.h>
     23 
     24 #include <cutils/log.h>
     25 
     26 #include "GGLAssembler.h"
     27 
     28 
     29 namespace android {
     30 
     31 void GGLAssembler::build_fog(
     32                         component_t& temp,      // incomming fragment / output
     33                         int component,
     34                         Scratch& regs)
     35 {
     36    if (mInfo[component].fog) {
     37         Scratch scratches(registerFile());
     38         comment("fog");
     39 
     40         integer_t fragment(temp.reg, temp.h, temp.flags);
     41         if (!(temp.flags & CORRUPTIBLE)) {
     42             temp.reg = regs.obtain();
     43             temp.flags |= CORRUPTIBLE;
     44         }
     45 
     46         integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
     47         LDRB(AL, fogColor.reg, mBuilderContext.Rctx,
     48                 immed12_pre(GGL_OFFSETOF(state.fog.color[component])));
     49 
     50         integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
     51         CONTEXT_LOAD(factor.reg, generated_vars.f);
     52 
     53         // clamp fog factor (TODO: see if there is a way to guarantee
     54         // we won't overflow, when setting the iterators)
     55         BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31));
     56         CMP(AL, factor.reg, imm( 0x10000 ));
     57         MOV(HS, 0, factor.reg, imm( 0x10000 ));
     58 
     59         build_blendFOneMinusF(temp, factor, fragment, fogColor);
     60     }
     61 }
     62 
     63 void GGLAssembler::build_blending(
     64                         component_t& temp,      // incomming fragment / output
     65                         const pixel_t& pixel,   // framebuffer
     66                         int component,
     67                         Scratch& regs)
     68 {
     69    if (!mInfo[component].blend)
     70         return;
     71 
     72     int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
     73     int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
     74     if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
     75         fs = GGL_ONE;
     76     const int blending = blending_codes(fs, fd);
     77     if (!temp.size()) {
     78         // here, blending will produce something which doesn't depend on
     79         // that component (eg: GL_ZERO:GL_*), so the register has not been
     80         // allocated yet. Will never be used as a source.
     81         temp = component_t(regs.obtain(), CORRUPTIBLE);
     82     }
     83 
     84     // we are doing real blending...
     85     // fb:          extracted dst
     86     // fragment:    extracted src
     87     // temp:        component_t(fragment) and result
     88 
     89     // scoped register allocator
     90     Scratch scratches(registerFile());
     91     comment("blending");
     92 
     93     // we can optimize these cases a bit...
     94     // (1) saturation is not needed
     95     // (2) we can use only one multiply instead of 2
     96     // (3) we can reduce the register pressure
     97     //      R = S*f + D*(1-f) = (S-D)*f + D
     98     //      R = S*(1-f) + D*f = (D-S)*f + S
     99 
    100     const bool same_factor_opt1 =
    101         (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) ||
    102         (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) ||
    103         (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) ||
    104         (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
    105 
    106     const bool same_factor_opt2 =
    107         (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) ||
    108         (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) ||
    109         (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) ||
    110         (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
    111 
    112 
    113     // XXX: we could also optimize these cases:
    114     // R = S*f + D*f = (S+D)*f
    115     // R = S*(1-f) + D*(1-f) = (S+D)*(1-f)
    116     // R = S*D + D*S = 2*S*D
    117 
    118 
    119     // see if we need to extract 'component' from the destination (fb)
    120     integer_t fb;
    121     if (blending & (BLEND_DST|FACTOR_DST)) {
    122         fb.setTo(scratches.obtain(), 32);
    123         extract(fb, pixel, component);
    124         if (mDithering) {
    125             // XXX: maybe what we should do instead, is simply
    126             // expand fb -or- fragment to the larger of the two
    127             if (fb.size() < temp.size()) {
    128                 // for now we expand 'fb' to min(fragment, 8)
    129                 int new_size = temp.size() < 8 ? temp.size() : 8;
    130                 expand(fb, fb, new_size);
    131             }
    132         }
    133     }
    134 
    135 
    136     // convert input fragment to integer_t
    137     if (temp.l && (temp.flags & CORRUPTIBLE)) {
    138         MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l));
    139         temp.h -= temp.l;
    140         temp.l = 0;
    141     }
    142     integer_t fragment(temp.reg, temp.size(), temp.flags);
    143 
    144     // if not done yet, convert input fragment to integer_t
    145     if (temp.l) {
    146         // here we know temp is not CORRUPTIBLE
    147         fragment.reg = scratches.obtain();
    148         MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l));
    149         fragment.flags |= CORRUPTIBLE;
    150     }
    151 
    152     if (!(temp.flags & CORRUPTIBLE)) {
    153         // temp is not corruptible, but since it's the destination it
    154         // will be modified, so we need to allocate a new register.
    155         temp.reg = regs.obtain();
    156         temp.flags &= ~CORRUPTIBLE;
    157         fragment.flags &= ~CORRUPTIBLE;
    158     }
    159 
    160     if ((blending & BLEND_SRC) && !same_factor_opt1) {
    161         // source (fragment) is needed for the blending stage
    162         // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
    163         fragment.flags &= ~CORRUPTIBLE;
    164     }
    165 
    166 
    167     if (same_factor_opt1) {
    168         //  R = S*f + D*(1-f) = (S-D)*f + D
    169         integer_t factor;
    170         build_blend_factor(factor, fs,
    171                 component, pixel, fragment, fb, scratches);
    172         // fb is always corruptible from this point
    173         fb.flags |= CORRUPTIBLE;
    174         build_blendFOneMinusF(temp, factor, fragment, fb);
    175     } else if (same_factor_opt2) {
    176         //  R = S*(1-f) + D*f = (D-S)*f + S
    177         integer_t factor;
    178         // fb is always corrruptible here
    179         fb.flags |= CORRUPTIBLE;
    180         build_blend_factor(factor, fd,
    181                 component, pixel, fragment, fb, scratches);
    182         build_blendOneMinusFF(temp, factor, fragment, fb);
    183     } else {
    184         integer_t src_factor;
    185         integer_t dst_factor;
    186 
    187         // if destination (fb) is not needed for the blending stage,
    188         // then it can be marked as CORRUPTIBLE
    189         if (!(blending & BLEND_DST)) {
    190             fb.flags |= CORRUPTIBLE;
    191         }
    192 
    193         // XXX: try to mark some registers as CORRUPTIBLE
    194         // in most case we could make those corruptible
    195         // when we're processing the last component
    196         // but not always, for instance
    197         //    when fragment is constant and not reloaded
    198         //    when fb is needed for logic-ops or masking
    199         //    when a register is aliased (for instance with mAlphaSource)
    200 
    201         // blend away...
    202         if (fs==GGL_ZERO) {
    203             if (fd==GGL_ZERO) {         // R = 0
    204                 // already taken care of
    205             } else if (fd==GGL_ONE) {   // R = D
    206                 // already taken care of
    207             } else {                    // R = D*fd
    208                 // compute fd
    209                 build_blend_factor(dst_factor, fd,
    210                         component, pixel, fragment, fb, scratches);
    211                 mul_factor(temp, fb, dst_factor);
    212             }
    213         } else if (fs==GGL_ONE) {
    214             if (fd==GGL_ZERO) {         // R = S
    215                 // NOP, taken care of
    216             } else if (fd==GGL_ONE) {   // R = S + D
    217                 component_add(temp, fb, fragment); // args order matters
    218                 component_sat(temp);
    219             } else {                    // R = S + D*fd
    220                 // compute fd
    221                 build_blend_factor(dst_factor, fd,
    222                         component, pixel, fragment, fb, scratches);
    223                 mul_factor_add(temp, fb, dst_factor, component_t(fragment));
    224                 component_sat(temp);
    225             }
    226         } else {
    227             // compute fs
    228             build_blend_factor(src_factor, fs,
    229                     component, pixel, fragment, fb, scratches);
    230             if (fd==GGL_ZERO) {         // R = S*fs
    231                 mul_factor(temp, fragment, src_factor);
    232             } else if (fd==GGL_ONE) {   // R = S*fs + D
    233                 mul_factor_add(temp, fragment, src_factor, component_t(fb));
    234                 component_sat(temp);
    235             } else {                    // R = S*fs + D*fd
    236                 mul_factor(temp, fragment, src_factor);
    237                 if (scratches.isUsed(src_factor.reg))
    238                     scratches.recycle(src_factor.reg);
    239                 // compute fd
    240                 build_blend_factor(dst_factor, fd,
    241                         component, pixel, fragment, fb, scratches);
    242                 mul_factor_add(temp, fb, dst_factor, temp);
    243                 if (!same_factor_opt1 && !same_factor_opt2) {
    244                     component_sat(temp);
    245                 }
    246             }
    247         }
    248     }
    249 
    250     // now we can be corrupted (it's the dest)
    251     temp.flags |= CORRUPTIBLE;
    252 }
    253 
    254 void GGLAssembler::build_blend_factor(
    255         integer_t& factor, int f, int component,
    256         const pixel_t& dst_pixel,
    257         integer_t& fragment,
    258         integer_t& fb,
    259         Scratch& scratches)
    260 {
    261     integer_t src_alpha(fragment);
    262 
    263     // src_factor/dst_factor won't be used after blending,
    264     // so it's fine to mark them as CORRUPTIBLE (if not aliased)
    265     factor.flags |= CORRUPTIBLE;
    266 
    267     switch(f) {
    268     case GGL_ONE_MINUS_SRC_ALPHA:
    269     case GGL_SRC_ALPHA:
    270         if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
    271             // we're processing alpha, so we already have
    272             // src-alpha in fragment, and we need src-alpha just this time.
    273         } else {
    274            // alpha-src will be needed for other components
    275             if (!mBlendFactorCached || mBlendFactorCached==f) {
    276                 src_alpha = mAlphaSource;
    277                 factor = mAlphaSource;
    278                 factor.flags &= ~CORRUPTIBLE;
    279                 // we already computed the blend factor before, nothing to do.
    280                 if (mBlendFactorCached)
    281                     return;
    282                 // this is the first time, make sure to compute the blend
    283                 // factor properly.
    284                 mBlendFactorCached = f;
    285                 break;
    286             } else {
    287                 // we have a cached alpha blend factor, but we want another one,
    288                 // this should really not happen because by construction,
    289                 // we cannot have BOTH source and destination
    290                 // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because
    291                 // the blending stage uses the f/(1-f) optimization
    292 
    293                 // for completeness, we handle this case though. Since there
    294                 // are only 2 choices, this meens we want "the other one"
    295                 // (1-factor)
    296                 factor = mAlphaSource;
    297                 factor.flags &= ~CORRUPTIBLE;
    298                 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
    299                 mBlendFactorCached = f;
    300                 return;
    301             }
    302         }
    303         // fall-through...
    304     case GGL_ONE_MINUS_DST_COLOR:
    305     case GGL_DST_COLOR:
    306     case GGL_ONE_MINUS_SRC_COLOR:
    307     case GGL_SRC_COLOR:
    308     case GGL_ONE_MINUS_DST_ALPHA:
    309     case GGL_DST_ALPHA:
    310     case GGL_SRC_ALPHA_SATURATE:
    311         // help us find out what register we can use for the blend-factor
    312         // CORRUPTIBLE registers are chosen first, or a new one is allocated.
    313         if (fragment.flags & CORRUPTIBLE) {
    314             factor.setTo(fragment.reg, 32, CORRUPTIBLE);
    315             fragment.flags &= ~CORRUPTIBLE;
    316         } else if (fb.flags & CORRUPTIBLE) {
    317             factor.setTo(fb.reg, 32, CORRUPTIBLE);
    318             fb.flags &= ~CORRUPTIBLE;
    319         } else {
    320             factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
    321         }
    322         break;
    323     }
    324 
    325     // XXX: doesn't work if size==1
    326 
    327     switch(f) {
    328     case GGL_ONE_MINUS_DST_COLOR:
    329     case GGL_DST_COLOR:
    330         factor.s = fb.s;
    331         ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1));
    332         break;
    333     case GGL_ONE_MINUS_SRC_COLOR:
    334     case GGL_SRC_COLOR:
    335         factor.s = fragment.s;
    336         ADD(AL, 0, factor.reg, fragment.reg,
    337             reg_imm(fragment.reg, LSR, fragment.s-1));
    338         break;
    339     case GGL_ONE_MINUS_SRC_ALPHA:
    340     case GGL_SRC_ALPHA:
    341         factor.s = src_alpha.s;
    342         ADD(AL, 0, factor.reg, src_alpha.reg,
    343                 reg_imm(src_alpha.reg, LSR, src_alpha.s-1));
    344         break;
    345     case GGL_ONE_MINUS_DST_ALPHA:
    346     case GGL_DST_ALPHA:
    347         // XXX: should be precomputed
    348         extract(factor, dst_pixel, GGLFormat::ALPHA);
    349         ADD(AL, 0, factor.reg, factor.reg,
    350                 reg_imm(factor.reg, LSR, factor.s-1));
    351         break;
    352     case GGL_SRC_ALPHA_SATURATE:
    353         // XXX: should be precomputed
    354         // XXX: f = min(As, 1-Ad)
    355         // btw, we're guaranteed that Ad's size is <= 8, because
    356         // it's extracted from the framebuffer
    357         break;
    358     }
    359 
    360     switch(f) {
    361     case GGL_ONE_MINUS_DST_COLOR:
    362     case GGL_ONE_MINUS_SRC_COLOR:
    363     case GGL_ONE_MINUS_DST_ALPHA:
    364     case GGL_ONE_MINUS_SRC_ALPHA:
    365         RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
    366     }
    367 
    368     // don't need more than 8-bits for the blend factor
    369     // and this will prevent overflows in the multiplies later
    370     if (factor.s > 8) {
    371         MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8));
    372         factor.s = 8;
    373     }
    374 }
    375 
    376 int GGLAssembler::blending_codes(int fs, int fd)
    377 {
    378     int blending = 0;
    379     switch(fs) {
    380     case GGL_ONE:
    381         blending |= BLEND_SRC;
    382         break;
    383 
    384     case GGL_ONE_MINUS_DST_COLOR:
    385     case GGL_DST_COLOR:
    386         blending |= FACTOR_DST|BLEND_SRC;
    387         break;
    388     case GGL_ONE_MINUS_DST_ALPHA:
    389     case GGL_DST_ALPHA:
    390         // no need to extract 'component' from the destination
    391         // for the blend factor, because we need ALPHA only.
    392         blending |= BLEND_SRC;
    393         break;
    394 
    395     case GGL_ONE_MINUS_SRC_COLOR:
    396     case GGL_SRC_COLOR:
    397         blending |= FACTOR_SRC|BLEND_SRC;
    398         break;
    399     case GGL_ONE_MINUS_SRC_ALPHA:
    400     case GGL_SRC_ALPHA:
    401     case GGL_SRC_ALPHA_SATURATE:
    402         blending |= FACTOR_SRC|BLEND_SRC;
    403         break;
    404     }
    405     switch(fd) {
    406     case GGL_ONE:
    407         blending |= BLEND_DST;
    408         break;
    409 
    410     case GGL_ONE_MINUS_DST_COLOR:
    411     case GGL_DST_COLOR:
    412         blending |= FACTOR_DST|BLEND_DST;
    413         break;
    414     case GGL_ONE_MINUS_DST_ALPHA:
    415     case GGL_DST_ALPHA:
    416         blending |= FACTOR_DST|BLEND_DST;
    417         break;
    418 
    419     case GGL_ONE_MINUS_SRC_COLOR:
    420     case GGL_SRC_COLOR:
    421         blending |= FACTOR_SRC|BLEND_DST;
    422         break;
    423     case GGL_ONE_MINUS_SRC_ALPHA:
    424     case GGL_SRC_ALPHA:
    425         // no need to extract 'component' from the source
    426         // for the blend factor, because we need ALPHA only.
    427         blending |= BLEND_DST;
    428         break;
    429     }
    430     return blending;
    431 }
    432 
    433 // ---------------------------------------------------------------------------
    434 
    435 void GGLAssembler::build_blendFOneMinusF(
    436         component_t& temp,
    437         const integer_t& factor,
    438         const integer_t& fragment,
    439         const integer_t& fb)
    440 {
    441     //  R = S*f + D*(1-f) = (S-D)*f + D
    442     Scratch scratches(registerFile());
    443     // compute S-D
    444     integer_t diff(fragment.flags & CORRUPTIBLE ?
    445             fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
    446     const int shift = fragment.size() - fb.size();
    447     if (shift>0)        RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
    448     else if (shift<0)   RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
    449     else                RSB(AL, 0, diff.reg, fb.reg, fragment.reg);
    450     mul_factor_add(temp, diff, factor, component_t(fb));
    451 }
    452 
    453 void GGLAssembler::build_blendOneMinusFF(
    454         component_t& temp,
    455         const integer_t& factor,
    456         const integer_t& fragment,
    457         const integer_t& fb)
    458 {
    459     //  R = S*f + D*(1-f) = (S-D)*f + D
    460     Scratch scratches(registerFile());
    461     // compute D-S
    462     integer_t diff(fb.flags & CORRUPTIBLE ?
    463             fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
    464     const int shift = fragment.size() - fb.size();
    465     if (shift>0)        SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
    466     else if (shift<0)   SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
    467     else                SUB(AL, 0, diff.reg, fb.reg, fragment.reg);
    468     mul_factor_add(temp, diff, factor, component_t(fragment));
    469 }
    470 
    471 // ---------------------------------------------------------------------------
    472 
    473 void GGLAssembler::mul_factor(  component_t& d,
    474                                 const integer_t& v,
    475                                 const integer_t& f)
    476 {
    477     int vs = v.size();
    478     int fs = f.size();
    479     int ms = vs+fs;
    480 
    481     // XXX: we could have special cases for 1 bit mul
    482 
    483     // all this code below to use the best multiply instruction
    484     // wrt the parameters size. We take advantage of the fact
    485     // that the 16-bits multiplies allow a 16-bit shift
    486     // The trick is that we just make sure that we have at least 8-bits
    487     // per component (which is enough for a 8 bits display).
    488 
    489     int xy;
    490     int vshift = 0;
    491     int fshift = 0;
    492     int smulw = 0;
    493 
    494     if (vs<16) {
    495         if (fs<16) {
    496             xy = xyBB;
    497         } else if (GGL_BETWEEN(fs, 24, 31)) {
    498             ms -= 16;
    499             xy = xyTB;
    500         } else {
    501             // eg: 15 * 18  ->  15 * 15
    502             fshift = fs - 15;
    503             ms -= fshift;
    504             xy = xyBB;
    505         }
    506     } else if (GGL_BETWEEN(vs, 24, 31)) {
    507         if (fs<16) {
    508             ms -= 16;
    509             xy = xyTB;
    510         } else if (GGL_BETWEEN(fs, 24, 31)) {
    511             ms -= 32;
    512             xy = xyTT;
    513         } else {
    514             // eg: 24 * 18  ->  8 * 18
    515             fshift = fs - 15;
    516             ms -= 16 + fshift;
    517             xy = xyTB;
    518         }
    519     } else {
    520         if (fs<16) {
    521             // eg: 18 * 15  ->  15 * 15
    522             vshift = vs - 15;
    523             ms -= vshift;
    524             xy = xyBB;
    525         } else if (GGL_BETWEEN(fs, 24, 31)) {
    526             // eg: 18 * 24  ->  15 * 8
    527             vshift = vs - 15;
    528             ms -= 16 + vshift;
    529             xy = xyBT;
    530         } else {
    531             // eg: 18 * 18  ->  (15 * 18)>>16
    532             fshift = fs - 15;
    533             ms -= 16 + fshift;
    534             xy = yB;    //XXX SMULWB
    535             smulw = 1;
    536         }
    537     }
    538 
    539     ALOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
    540 
    541     int vreg = v.reg;
    542     int freg = f.reg;
    543     if (vshift) {
    544         MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift));
    545         vreg = d.reg;
    546     }
    547     if (fshift) {
    548         MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift));
    549         freg = d.reg;
    550     }
    551     if (smulw)  SMULW(AL, xy, d.reg, vreg, freg);
    552     else        SMUL(AL, xy, d.reg, vreg, freg);
    553 
    554 
    555     d.h = ms;
    556     if (mDithering) {
    557         d.l = 0;
    558     } else {
    559         d.l = fs;
    560         d.flags |= CLEAR_LO;
    561     }
    562 }
    563 
    564 void GGLAssembler::mul_factor_add(  component_t& d,
    565                                     const integer_t& v,
    566                                     const integer_t& f,
    567                                     const component_t& a)
    568 {
    569     // XXX: we could have special cases for 1 bit mul
    570     Scratch scratches(registerFile());
    571 
    572     int vs = v.size();
    573     int fs = f.size();
    574     int as = a.h;
    575     int ms = vs+fs;
    576 
    577     ALOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
    578 
    579     integer_t add(a.reg, a.h, a.flags);
    580 
    581     // 'a' is a component_t but it is guaranteed to have
    582     // its high bits set to 0. However in the dithering case,
    583     // we can't get away with truncating the potentially bad bits
    584     // so extraction is needed.
    585 
    586    if ((mDithering) && (a.size() < ms)) {
    587         // we need to expand a
    588         if (!(a.flags & CORRUPTIBLE)) {
    589             // ... but it's not corruptible, so we need to pick a
    590             // temporary register.
    591             // Try to uses the destination register first (it's likely
    592             // to be usable, unless it aliases an input).
    593             if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
    594                 add.reg = d.reg;
    595             } else {
    596                 add.reg = scratches.obtain();
    597             }
    598         }
    599         expand(add, a, ms); // extracts and expands
    600         as = ms;
    601     }
    602 
    603     if (ms == as) {
    604         if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg);
    605         else                MLA(AL, 0, d.reg, v.reg, f.reg, add.reg);
    606     } else {
    607         int temp = d.reg;
    608         if (temp == add.reg) {
    609             // the mul will modify add.reg, we need an intermediary reg
    610             if (v.flags & CORRUPTIBLE)      temp = v.reg;
    611             else if (f.flags & CORRUPTIBLE) temp = f.reg;
    612             else                            temp = scratches.obtain();
    613         }
    614 
    615         if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg);
    616         else                MUL(AL, 0, temp, v.reg, f.reg);
    617 
    618         if (ms>as) {
    619             ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as));
    620         } else if (ms<as) {
    621             // not sure if we should expand the mul instead?
    622             ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms));
    623         }
    624     }
    625 
    626     d.h = ms;
    627     if (mDithering) {
    628         d.l = a.l;
    629     } else {
    630         d.l = fs>a.l ? fs : a.l;
    631         d.flags |= CLEAR_LO;
    632     }
    633 }
    634 
    635 void GGLAssembler::component_add(component_t& d,
    636         const integer_t& dst, const integer_t& src)
    637 {
    638     // here we're guaranteed that fragment.size() >= fb.size()
    639     const int shift = src.size() - dst.size();
    640     if (!shift) {
    641         ADD(AL, 0, d.reg, src.reg, dst.reg);
    642     } else {
    643         ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift));
    644     }
    645 
    646     d.h = src.size();
    647     if (mDithering) {
    648         d.l = 0;
    649     } else {
    650         d.l = shift;
    651         d.flags |= CLEAR_LO;
    652     }
    653 }
    654 
    655 void GGLAssembler::component_sat(const component_t& v)
    656 {
    657     const int one = ((1<<v.size())-1)<<v.l;
    658     CMP(AL, v.reg, imm( 1<<v.h ));
    659     if (isValidImmediate(one)) {
    660         MOV(HS, 0, v.reg, imm( one ));
    661     } else if (isValidImmediate(~one)) {
    662         MVN(HS, 0, v.reg, imm( ~one ));
    663     } else {
    664         MOV(HS, 0, v.reg, imm( 1<<v.h ));
    665         SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l ));
    666     }
    667 }
    668 
    669 // ----------------------------------------------------------------------------
    670 
    671 }; // namespace android
    672 
    673