Home | History | Annotate | Download | only in codeflinger
      1 /* libs/pixelflinger/codeflinger/blending.cpp
      2 **
      3 ** Copyright 2006, The Android Open Source Project
      4 **
      5 ** Licensed under the Apache License, Version 2.0 (the "License");
      6 ** you may not use this file except in compliance with the License.
      7 ** You may obtain a copy of the License at
      8 **
      9 **     http://www.apache.org/licenses/LICENSE-2.0
     10 **
     11 ** Unless required by applicable law or agreed to in writing, software
     12 ** distributed under the License is distributed on an "AS IS" BASIS,
     13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 ** See the License for the specific language governing permissions and
     15 ** limitations under the License.
     16 */
     17 
     18 #define LOG_TAG "pixelflinger-code"
     19 
     20 #include <assert.h>
     21 #include <stdint.h>
     22 #include <stdio.h>
     23 #include <stdlib.h>
     24 #include <sys/types.h>
     25 
     26 #include <log/log.h>
     27 
     28 #include "GGLAssembler.h"
     29 
     30 namespace android {
     31 
     32 void GGLAssembler::build_fog(
     33                         component_t& temp,      // incomming fragment / output
     34                         int component,
     35                         Scratch& regs)
     36 {
     37    if (mInfo[component].fog) {
     38         Scratch scratches(registerFile());
     39         comment("fog");
     40 
     41         integer_t fragment(temp.reg, temp.h, temp.flags);
     42         if (!(temp.flags & CORRUPTIBLE)) {
     43             temp.reg = regs.obtain();
     44             temp.flags |= CORRUPTIBLE;
     45         }
     46 
     47         integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE);
     48         LDRB(AL, fogColor.reg, mBuilderContext.Rctx,
     49                 immed12_pre(GGL_OFFSETOF(state.fog.color[component])));
     50 
     51         integer_t factor(scratches.obtain(), 16, CORRUPTIBLE);
     52         CONTEXT_LOAD(factor.reg, generated_vars.f);
     53 
     54         // clamp fog factor (TODO: see if there is a way to guarantee
     55         // we won't overflow, when setting the iterators)
     56         BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31));
     57         CMP(AL, factor.reg, imm( 0x10000 ));
     58         MOV(HS, 0, factor.reg, imm( 0x10000 ));
     59 
     60         build_blendFOneMinusF(temp, factor, fragment, fogColor);
     61     }
     62 }
     63 
     64 void GGLAssembler::build_blending(
     65                         component_t& temp,      // incomming fragment / output
     66                         const pixel_t& pixel,   // framebuffer
     67                         int component,
     68                         Scratch& regs)
     69 {
     70    if (!mInfo[component].blend)
     71         return;
     72 
     73     int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
     74     int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
     75     if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA)
     76         fs = GGL_ONE;
     77     const int blending = blending_codes(fs, fd);
     78     if (!temp.size()) {
     79         // here, blending will produce something which doesn't depend on
     80         // that component (eg: GL_ZERO:GL_*), so the register has not been
     81         // allocated yet. Will never be used as a source.
     82         temp = component_t(regs.obtain(), CORRUPTIBLE);
     83     }
     84 
     85     // we are doing real blending...
     86     // fb:          extracted dst
     87     // fragment:    extracted src
     88     // temp:        component_t(fragment) and result
     89 
     90     // scoped register allocator
     91     Scratch scratches(registerFile());
     92     comment("blending");
     93 
     94     // we can optimize these cases a bit...
     95     // (1) saturation is not needed
     96     // (2) we can use only one multiply instead of 2
     97     // (3) we can reduce the register pressure
     98     //      R = S*f + D*(1-f) = (S-D)*f + D
     99     //      R = S*(1-f) + D*f = (D-S)*f + S
    100 
    101     const bool same_factor_opt1 =
    102         (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) ||
    103         (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) ||
    104         (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) ||
    105         (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA);
    106 
    107     const bool same_factor_opt2 =
    108         (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) ||
    109         (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) ||
    110         (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) ||
    111         (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA);
    112 
    113 
    114     // XXX: we could also optimize these cases:
    115     // R = S*f + D*f = (S+D)*f
    116     // R = S*(1-f) + D*(1-f) = (S+D)*(1-f)
    117     // R = S*D + D*S = 2*S*D
    118 
    119 
    120     // see if we need to extract 'component' from the destination (fb)
    121     integer_t fb;
    122     if (blending & (BLEND_DST|FACTOR_DST)) {
    123         fb.setTo(scratches.obtain(), 32);
    124         extract(fb, pixel, component);
    125         if (mDithering) {
    126             // XXX: maybe what we should do instead, is simply
    127             // expand fb -or- fragment to the larger of the two
    128             if (fb.size() < temp.size()) {
    129                 // for now we expand 'fb' to min(fragment, 8)
    130                 int new_size = temp.size() < 8 ? temp.size() : 8;
    131                 expand(fb, fb, new_size);
    132             }
    133         }
    134     }
    135 
    136 
    137     // convert input fragment to integer_t
    138     if (temp.l && (temp.flags & CORRUPTIBLE)) {
    139         MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l));
    140         temp.h -= temp.l;
    141         temp.l = 0;
    142     }
    143     integer_t fragment(temp.reg, temp.size(), temp.flags);
    144 
    145     // if not done yet, convert input fragment to integer_t
    146     if (temp.l) {
    147         // here we know temp is not CORRUPTIBLE
    148         fragment.reg = scratches.obtain();
    149         MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l));
    150         fragment.flags |= CORRUPTIBLE;
    151     }
    152 
    153     if (!(temp.flags & CORRUPTIBLE)) {
    154         // temp is not corruptible, but since it's the destination it
    155         // will be modified, so we need to allocate a new register.
    156         temp.reg = regs.obtain();
    157         temp.flags &= ~CORRUPTIBLE;
    158         fragment.flags &= ~CORRUPTIBLE;
    159     }
    160 
    161     if ((blending & BLEND_SRC) && !same_factor_opt1) {
    162         // source (fragment) is needed for the blending stage
    163         // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1)
    164         fragment.flags &= ~CORRUPTIBLE;
    165     }
    166 
    167 
    168     if (same_factor_opt1) {
    169         //  R = S*f + D*(1-f) = (S-D)*f + D
    170         integer_t factor;
    171         build_blend_factor(factor, fs,
    172                 component, pixel, fragment, fb, scratches);
    173         // fb is always corruptible from this point
    174         fb.flags |= CORRUPTIBLE;
    175         build_blendFOneMinusF(temp, factor, fragment, fb);
    176     } else if (same_factor_opt2) {
    177         //  R = S*(1-f) + D*f = (D-S)*f + S
    178         integer_t factor;
    179         // fb is always corrruptible here
    180         fb.flags |= CORRUPTIBLE;
    181         build_blend_factor(factor, fd,
    182                 component, pixel, fragment, fb, scratches);
    183         build_blendOneMinusFF(temp, factor, fragment, fb);
    184     } else {
    185         integer_t src_factor;
    186         integer_t dst_factor;
    187 
    188         // if destination (fb) is not needed for the blending stage,
    189         // then it can be marked as CORRUPTIBLE
    190         if (!(blending & BLEND_DST)) {
    191             fb.flags |= CORRUPTIBLE;
    192         }
    193 
    194         // XXX: try to mark some registers as CORRUPTIBLE
    195         // in most case we could make those corruptible
    196         // when we're processing the last component
    197         // but not always, for instance
    198         //    when fragment is constant and not reloaded
    199         //    when fb is needed for logic-ops or masking
    200         //    when a register is aliased (for instance with mAlphaSource)
    201 
    202         // blend away...
    203         if (fs==GGL_ZERO) {
    204             if (fd==GGL_ZERO) {         // R = 0
    205                 // already taken care of
    206             } else if (fd==GGL_ONE) {   // R = D
    207                 // already taken care of
    208             } else {                    // R = D*fd
    209                 // compute fd
    210                 build_blend_factor(dst_factor, fd,
    211                         component, pixel, fragment, fb, scratches);
    212                 mul_factor(temp, fb, dst_factor);
    213             }
    214         } else if (fs==GGL_ONE) {
    215             if (fd==GGL_ZERO) {         // R = S
    216                 // NOP, taken care of
    217             } else if (fd==GGL_ONE) {   // R = S + D
    218                 component_add(temp, fb, fragment); // args order matters
    219                 component_sat(temp);
    220             } else {                    // R = S + D*fd
    221                 // compute fd
    222                 build_blend_factor(dst_factor, fd,
    223                         component, pixel, fragment, fb, scratches);
    224                 mul_factor_add(temp, fb, dst_factor, component_t(fragment));
    225                 component_sat(temp);
    226             }
    227         } else {
    228             // compute fs
    229             build_blend_factor(src_factor, fs,
    230                     component, pixel, fragment, fb, scratches);
    231             if (fd==GGL_ZERO) {         // R = S*fs
    232                 mul_factor(temp, fragment, src_factor);
    233             } else if (fd==GGL_ONE) {   // R = S*fs + D
    234                 mul_factor_add(temp, fragment, src_factor, component_t(fb));
    235                 component_sat(temp);
    236             } else {                    // R = S*fs + D*fd
    237                 mul_factor(temp, fragment, src_factor);
    238                 if (scratches.isUsed(src_factor.reg))
    239                     scratches.recycle(src_factor.reg);
    240                 // compute fd
    241                 build_blend_factor(dst_factor, fd,
    242                         component, pixel, fragment, fb, scratches);
    243                 mul_factor_add(temp, fb, dst_factor, temp);
    244                 if (!same_factor_opt1 && !same_factor_opt2) {
    245                     component_sat(temp);
    246                 }
    247             }
    248         }
    249     }
    250 
    251     // now we can be corrupted (it's the dest)
    252     temp.flags |= CORRUPTIBLE;
    253 }
    254 
    255 void GGLAssembler::build_blend_factor(
    256         integer_t& factor, int f, int component,
    257         const pixel_t& dst_pixel,
    258         integer_t& fragment,
    259         integer_t& fb,
    260         Scratch& scratches)
    261 {
    262     integer_t src_alpha(fragment);
    263 
    264     // src_factor/dst_factor won't be used after blending,
    265     // so it's fine to mark them as CORRUPTIBLE (if not aliased)
    266     factor.flags |= CORRUPTIBLE;
    267 
    268     switch(f) {
    269     case GGL_ONE_MINUS_SRC_ALPHA:
    270     case GGL_SRC_ALPHA:
    271         if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) {
    272             // we're processing alpha, so we already have
    273             // src-alpha in fragment, and we need src-alpha just this time.
    274         } else {
    275            // alpha-src will be needed for other components
    276             if (!mBlendFactorCached || mBlendFactorCached==f) {
    277                 src_alpha = mAlphaSource;
    278                 factor = mAlphaSource;
    279                 factor.flags &= ~CORRUPTIBLE;
    280                 // we already computed the blend factor before, nothing to do.
    281                 if (mBlendFactorCached)
    282                     return;
    283                 // this is the first time, make sure to compute the blend
    284                 // factor properly.
    285                 mBlendFactorCached = f;
    286                 break;
    287             } else {
    288                 // we have a cached alpha blend factor, but we want another one,
    289                 // this should really not happen because by construction,
    290                 // we cannot have BOTH source and destination
    291                 // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because
    292                 // the blending stage uses the f/(1-f) optimization
    293 
    294                 // for completeness, we handle this case though. Since there
    295                 // are only 2 choices, this meens we want "the other one"
    296                 // (1-factor)
    297                 factor = mAlphaSource;
    298                 factor.flags &= ~CORRUPTIBLE;
    299                 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
    300                 mBlendFactorCached = f;
    301                 return;
    302             }
    303         }
    304         // fall-through...
    305     case GGL_ONE_MINUS_DST_COLOR:
    306     case GGL_DST_COLOR:
    307     case GGL_ONE_MINUS_SRC_COLOR:
    308     case GGL_SRC_COLOR:
    309     case GGL_ONE_MINUS_DST_ALPHA:
    310     case GGL_DST_ALPHA:
    311     case GGL_SRC_ALPHA_SATURATE:
    312         // help us find out what register we can use for the blend-factor
    313         // CORRUPTIBLE registers are chosen first, or a new one is allocated.
    314         if (fragment.flags & CORRUPTIBLE) {
    315             factor.setTo(fragment.reg, 32, CORRUPTIBLE);
    316             fragment.flags &= ~CORRUPTIBLE;
    317         } else if (fb.flags & CORRUPTIBLE) {
    318             factor.setTo(fb.reg, 32, CORRUPTIBLE);
    319             fb.flags &= ~CORRUPTIBLE;
    320         } else {
    321             factor.setTo(scratches.obtain(), 32, CORRUPTIBLE);
    322         }
    323         break;
    324     }
    325 
    326     // XXX: doesn't work if size==1
    327 
    328     switch(f) {
    329     case GGL_ONE_MINUS_DST_COLOR:
    330     case GGL_DST_COLOR:
    331         factor.s = fb.s;
    332         ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1));
    333         break;
    334     case GGL_ONE_MINUS_SRC_COLOR:
    335     case GGL_SRC_COLOR:
    336         factor.s = fragment.s;
    337         ADD(AL, 0, factor.reg, fragment.reg,
    338             reg_imm(fragment.reg, LSR, fragment.s-1));
    339         break;
    340     case GGL_ONE_MINUS_SRC_ALPHA:
    341     case GGL_SRC_ALPHA:
    342         factor.s = src_alpha.s;
    343         ADD(AL, 0, factor.reg, src_alpha.reg,
    344                 reg_imm(src_alpha.reg, LSR, src_alpha.s-1));
    345         break;
    346     case GGL_ONE_MINUS_DST_ALPHA:
    347     case GGL_DST_ALPHA:
    348         // XXX: should be precomputed
    349         extract(factor, dst_pixel, GGLFormat::ALPHA);
    350         ADD(AL, 0, factor.reg, factor.reg,
    351                 reg_imm(factor.reg, LSR, factor.s-1));
    352         break;
    353     case GGL_SRC_ALPHA_SATURATE:
    354         // XXX: should be precomputed
    355         // XXX: f = min(As, 1-Ad)
    356         // btw, we're guaranteed that Ad's size is <= 8, because
    357         // it's extracted from the framebuffer
    358         break;
    359     }
    360 
    361     switch(f) {
    362     case GGL_ONE_MINUS_DST_COLOR:
    363     case GGL_ONE_MINUS_SRC_COLOR:
    364     case GGL_ONE_MINUS_DST_ALPHA:
    365     case GGL_ONE_MINUS_SRC_ALPHA:
    366         RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s)));
    367     }
    368 
    369     // don't need more than 8-bits for the blend factor
    370     // and this will prevent overflows in the multiplies later
    371     if (factor.s > 8) {
    372         MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8));
    373         factor.s = 8;
    374     }
    375 }
    376 
    377 int GGLAssembler::blending_codes(int fs, int fd)
    378 {
    379     int blending = 0;
    380     switch(fs) {
    381     case GGL_ONE:
    382         blending |= BLEND_SRC;
    383         break;
    384 
    385     case GGL_ONE_MINUS_DST_COLOR:
    386     case GGL_DST_COLOR:
    387         blending |= FACTOR_DST|BLEND_SRC;
    388         break;
    389     case GGL_ONE_MINUS_DST_ALPHA:
    390     case GGL_DST_ALPHA:
    391         // no need to extract 'component' from the destination
    392         // for the blend factor, because we need ALPHA only.
    393         blending |= BLEND_SRC;
    394         break;
    395 
    396     case GGL_ONE_MINUS_SRC_COLOR:
    397     case GGL_SRC_COLOR:
    398         blending |= FACTOR_SRC|BLEND_SRC;
    399         break;
    400     case GGL_ONE_MINUS_SRC_ALPHA:
    401     case GGL_SRC_ALPHA:
    402     case GGL_SRC_ALPHA_SATURATE:
    403         blending |= FACTOR_SRC|BLEND_SRC;
    404         break;
    405     }
    406     switch(fd) {
    407     case GGL_ONE:
    408         blending |= BLEND_DST;
    409         break;
    410 
    411     case GGL_ONE_MINUS_DST_COLOR:
    412     case GGL_DST_COLOR:
    413         blending |= FACTOR_DST|BLEND_DST;
    414         break;
    415     case GGL_ONE_MINUS_DST_ALPHA:
    416     case GGL_DST_ALPHA:
    417         blending |= FACTOR_DST|BLEND_DST;
    418         break;
    419 
    420     case GGL_ONE_MINUS_SRC_COLOR:
    421     case GGL_SRC_COLOR:
    422         blending |= FACTOR_SRC|BLEND_DST;
    423         break;
    424     case GGL_ONE_MINUS_SRC_ALPHA:
    425     case GGL_SRC_ALPHA:
    426         // no need to extract 'component' from the source
    427         // for the blend factor, because we need ALPHA only.
    428         blending |= BLEND_DST;
    429         break;
    430     }
    431     return blending;
    432 }
    433 
    434 // ---------------------------------------------------------------------------
    435 
    436 void GGLAssembler::build_blendFOneMinusF(
    437         component_t& temp,
    438         const integer_t& factor,
    439         const integer_t& fragment,
    440         const integer_t& fb)
    441 {
    442     //  R = S*f + D*(1-f) = (S-D)*f + D
    443     Scratch scratches(registerFile());
    444     // compute S-D
    445     integer_t diff(fragment.flags & CORRUPTIBLE ?
    446             fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
    447     const int shift = fragment.size() - fb.size();
    448     if (shift>0)        RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
    449     else if (shift<0)   RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
    450     else                RSB(AL, 0, diff.reg, fb.reg, fragment.reg);
    451     mul_factor_add(temp, diff, factor, component_t(fb));
    452 }
    453 
    454 void GGLAssembler::build_blendOneMinusFF(
    455         component_t& temp,
    456         const integer_t& factor,
    457         const integer_t& fragment,
    458         const integer_t& fb)
    459 {
    460     //  R = S*f + D*(1-f) = (S-D)*f + D
    461     Scratch scratches(registerFile());
    462     // compute D-S
    463     integer_t diff(fb.flags & CORRUPTIBLE ?
    464             fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE);
    465     const int shift = fragment.size() - fb.size();
    466     if (shift>0)        SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift));
    467     else if (shift<0)   SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift));
    468     else                SUB(AL, 0, diff.reg, fb.reg, fragment.reg);
    469     mul_factor_add(temp, diff, factor, component_t(fragment));
    470 }
    471 
    472 // ---------------------------------------------------------------------------
    473 
    474 void GGLAssembler::mul_factor(  component_t& d,
    475                                 const integer_t& v,
    476                                 const integer_t& f)
    477 {
    478     int vs = v.size();
    479     int fs = f.size();
    480     int ms = vs+fs;
    481 
    482     // XXX: we could have special cases for 1 bit mul
    483 
    484     // all this code below to use the best multiply instruction
    485     // wrt the parameters size. We take advantage of the fact
    486     // that the 16-bits multiplies allow a 16-bit shift
    487     // The trick is that we just make sure that we have at least 8-bits
    488     // per component (which is enough for a 8 bits display).
    489 
    490     int xy;
    491     int vshift = 0;
    492     int fshift = 0;
    493     int smulw = 0;
    494 
    495     if (vs<16) {
    496         if (fs<16) {
    497             xy = xyBB;
    498         } else if (GGL_BETWEEN(fs, 24, 31)) {
    499             ms -= 16;
    500             xy = xyTB;
    501         } else {
    502             // eg: 15 * 18  ->  15 * 15
    503             fshift = fs - 15;
    504             ms -= fshift;
    505             xy = xyBB;
    506         }
    507     } else if (GGL_BETWEEN(vs, 24, 31)) {
    508         if (fs<16) {
    509             ms -= 16;
    510             xy = xyTB;
    511         } else if (GGL_BETWEEN(fs, 24, 31)) {
    512             ms -= 32;
    513             xy = xyTT;
    514         } else {
    515             // eg: 24 * 18  ->  8 * 18
    516             fshift = fs - 15;
    517             ms -= 16 + fshift;
    518             xy = xyTB;
    519         }
    520     } else {
    521         if (fs<16) {
    522             // eg: 18 * 15  ->  15 * 15
    523             vshift = vs - 15;
    524             ms -= vshift;
    525             xy = xyBB;
    526         } else if (GGL_BETWEEN(fs, 24, 31)) {
    527             // eg: 18 * 24  ->  15 * 8
    528             vshift = vs - 15;
    529             ms -= 16 + vshift;
    530             xy = xyBT;
    531         } else {
    532             // eg: 18 * 18  ->  (15 * 18)>>16
    533             fshift = fs - 15;
    534             ms -= 16 + fshift;
    535             xy = yB;    //XXX SMULWB
    536             smulw = 1;
    537         }
    538     }
    539 
    540     ALOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs);
    541 
    542     int vreg = v.reg;
    543     int freg = f.reg;
    544     if (vshift) {
    545         MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift));
    546         vreg = d.reg;
    547     }
    548     if (fshift) {
    549         MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift));
    550         freg = d.reg;
    551     }
    552     if (smulw)  SMULW(AL, xy, d.reg, vreg, freg);
    553     else        SMUL(AL, xy, d.reg, vreg, freg);
    554 
    555 
    556     d.h = ms;
    557     if (mDithering) {
    558         d.l = 0;
    559     } else {
    560         d.l = fs;
    561         d.flags |= CLEAR_LO;
    562     }
    563 }
    564 
    565 void GGLAssembler::mul_factor_add(  component_t& d,
    566                                     const integer_t& v,
    567                                     const integer_t& f,
    568                                     const component_t& a)
    569 {
    570     // XXX: we could have special cases for 1 bit mul
    571     Scratch scratches(registerFile());
    572 
    573     int vs = v.size();
    574     int fs = f.size();
    575     int as = a.h;
    576     int ms = vs+fs;
    577 
    578     ALOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as);
    579 
    580     integer_t add(a.reg, a.h, a.flags);
    581 
    582     // 'a' is a component_t but it is guaranteed to have
    583     // its high bits set to 0. However in the dithering case,
    584     // we can't get away with truncating the potentially bad bits
    585     // so extraction is needed.
    586 
    587    if ((mDithering) && (a.size() < ms)) {
    588         // we need to expand a
    589         if (!(a.flags & CORRUPTIBLE)) {
    590             // ... but it's not corruptible, so we need to pick a
    591             // temporary register.
    592             // Try to uses the destination register first (it's likely
    593             // to be usable, unless it aliases an input).
    594             if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) {
    595                 add.reg = d.reg;
    596             } else {
    597                 add.reg = scratches.obtain();
    598             }
    599         }
    600         expand(add, a, ms); // extracts and expands
    601         as = ms;
    602     }
    603 
    604     if (ms == as) {
    605         if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg);
    606         else                MLA(AL, 0, d.reg, v.reg, f.reg, add.reg);
    607     } else {
    608         int temp = d.reg;
    609         if (temp == add.reg) {
    610             // the mul will modify add.reg, we need an intermediary reg
    611             if (v.flags & CORRUPTIBLE)      temp = v.reg;
    612             else if (f.flags & CORRUPTIBLE) temp = f.reg;
    613             else                            temp = scratches.obtain();
    614         }
    615 
    616         if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg);
    617         else                MUL(AL, 0, temp, v.reg, f.reg);
    618 
    619         if (ms>as) {
    620             ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as));
    621         } else if (ms<as) {
    622             // not sure if we should expand the mul instead?
    623             ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms));
    624         }
    625     }
    626 
    627     d.h = ms;
    628     if (mDithering) {
    629         d.l = a.l;
    630     } else {
    631         d.l = fs>a.l ? fs : a.l;
    632         d.flags |= CLEAR_LO;
    633     }
    634 }
    635 
    636 void GGLAssembler::component_add(component_t& d,
    637         const integer_t& dst, const integer_t& src)
    638 {
    639     // here we're guaranteed that fragment.size() >= fb.size()
    640     const int shift = src.size() - dst.size();
    641     if (!shift) {
    642         ADD(AL, 0, d.reg, src.reg, dst.reg);
    643     } else {
    644         ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift));
    645     }
    646 
    647     d.h = src.size();
    648     if (mDithering) {
    649         d.l = 0;
    650     } else {
    651         d.l = shift;
    652         d.flags |= CLEAR_LO;
    653     }
    654 }
    655 
    656 void GGLAssembler::component_sat(const component_t& v)
    657 {
    658     const int one = ((1<<v.size())-1)<<v.l;
    659     CMP(AL, v.reg, imm( 1<<v.h ));
    660     if (isValidImmediate(one)) {
    661         MOV(HS, 0, v.reg, imm( one ));
    662     } else if (isValidImmediate(~one)) {
    663         MVN(HS, 0, v.reg, imm( ~one ));
    664     } else {
    665         MOV(HS, 0, v.reg, imm( 1<<v.h ));
    666         SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l ));
    667     }
    668 }
    669 
    670 // ----------------------------------------------------------------------------
    671 
    672 }; // namespace android
    673 
    674