Home | History | Annotate | Download | only in codeflinger
      1 /* libs/pixelflinger/codeflinger/GGLAssembler.cpp
      2 **
      3 ** Copyright 2006, The Android Open Source Project
      4 **
      5 ** Licensed under the Apache License, Version 2.0 (the "License");
      6 ** you may not use this file except in compliance with the License.
      7 ** You may obtain a copy of the License at
      8 **
      9 **     http://www.apache.org/licenses/LICENSE-2.0
     10 **
     11 ** Unless required by applicable law or agreed to in writing, software
     12 ** distributed under the License is distributed on an "AS IS" BASIS,
     13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 ** See the License for the specific language governing permissions and
     15 ** limitations under the License.
     16 */
     17 
     18 #define LOG_TAG "GGLAssembler"
     19 
     20 #include <assert.h>
     21 #include <stdint.h>
     22 #include <stdlib.h>
     23 #include <stdio.h>
     24 #include <sys/types.h>
     25 #include <cutils/log.h>
     26 
     27 #include "codeflinger/GGLAssembler.h"
     28 
     29 namespace android {
     30 
     31 // ----------------------------------------------------------------------------
     32 
     33 GGLAssembler::GGLAssembler(ARMAssemblerInterface* target)
     34     : ARMAssemblerProxy(target), RegisterAllocator(), mOptLevel(7)
     35 {
     36 }
     37 
     38 GGLAssembler::~GGLAssembler()
     39 {
     40 }
     41 
     42 void GGLAssembler::prolog()
     43 {
     44     ARMAssemblerProxy::prolog();
     45 }
     46 
     47 void GGLAssembler::epilog(uint32_t touched)
     48 {
     49     ARMAssemblerProxy::epilog(touched);
     50 }
     51 
     52 void GGLAssembler::reset(int opt_level)
     53 {
     54     ARMAssemblerProxy::reset();
     55     RegisterAllocator::reset();
     56     mOptLevel = opt_level;
     57 }
     58 
     59 // ---------------------------------------------------------------------------
     60 
     61 int GGLAssembler::scanline(const needs_t& needs, context_t const* c)
     62 {
     63     int err = 0;
     64     int opt_level = mOptLevel;
     65     while (opt_level >= 0) {
     66         reset(opt_level);
     67         err = scanline_core(needs, c);
     68         if (err == 0)
     69             break;
     70         opt_level--;
     71     }
     72 
     73     // XXX: in theory, pcForLabel is not valid before generate()
     74     uint32_t* fragment_start_pc = pcForLabel("fragment_loop");
     75     uint32_t* fragment_end_pc = pcForLabel("epilog");
     76     const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc);
     77 
     78     // build a name for our pipeline
     79     char name[64];
     80     sprintf(name,
     81             "scanline__%08X:%08X_%08X_%08X [%3d ipp]",
     82             needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops);
     83 
     84     if (err) {
     85         ALOGE("Error while generating ""%s""\n", name);
     86         disassemble(name);
     87         return -1;
     88     }
     89 
     90     return generate(name);
     91 }
     92 
     93 int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c)
     94 {
     95     int64_t duration = ggl_system_time();
     96 
     97     mBlendFactorCached = 0;
     98     mBlending = 0;
     99     mMasking = 0;
    100     mAA        = GGL_READ_NEEDS(P_AA, needs.p);
    101     mDithering = GGL_READ_NEEDS(P_DITHER, needs.p);
    102     mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER;
    103     mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER;
    104     mFog       = GGL_READ_NEEDS(P_FOG, needs.p) != 0;
    105     mSmooth    = GGL_READ_NEEDS(SHADE, needs.n) != 0;
    106     mBuilderContext.needs = needs;
    107     mBuilderContext.c = c;
    108     mBuilderContext.Rctx = reserveReg(R0); // context always in R0
    109     mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ];
    110 
    111     // ------------------------------------------------------------------------
    112 
    113     decodeLogicOpNeeds(needs);
    114 
    115     decodeTMUNeeds(needs, c);
    116 
    117     mBlendSrc  = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n));
    118     mBlendDst  = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n));
    119     mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n));
    120     mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n));
    121 
    122     if (!mCbFormat.c[GGLFormat::ALPHA].h) {
    123         if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) ||
    124             (mBlendSrc == GGL_DST_ALPHA)) {
    125             mBlendSrc = GGL_ONE;
    126         }
    127         if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) ||
    128             (mBlendSrcA == GGL_DST_ALPHA)) {
    129             mBlendSrcA = GGL_ONE;
    130         }
    131         if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) ||
    132             (mBlendDst == GGL_DST_ALPHA)) {
    133             mBlendDst = GGL_ONE;
    134         }
    135         if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) ||
    136             (mBlendDstA == GGL_DST_ALPHA)) {
    137             mBlendDstA = GGL_ONE;
    138         }
    139     }
    140 
    141     // if we need the framebuffer, read it now
    142     const int blending =    blending_codes(mBlendSrc, mBlendDst) |
    143                             blending_codes(mBlendSrcA, mBlendDstA);
    144 
    145     // XXX: handle special cases, destination not modified...
    146     if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
    147         (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) {
    148         // Destination unmodified (beware of logic ops)
    149     } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) &&
    150         (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) {
    151         // Destination is zero (beware of logic ops)
    152     }
    153 
    154     int fbComponents = 0;
    155     const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n);
    156     for (int i=0 ; i<4 ; i++) {
    157         const int mask = 1<<i;
    158         component_info_t& info = mInfo[i];
    159         int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
    160         int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
    161         if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA)
    162             fs = GGL_ONE;
    163         info.masked =   !!(masking & mask);
    164         info.inDest =   !info.masked && mCbFormat.c[i].h &&
    165                         ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp));
    166         if (mCbFormat.components >= GGL_LUMINANCE &&
    167                 (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) {
    168             info.inDest = false;
    169         }
    170         info.needed =   (i==GGLFormat::ALPHA) &&
    171                         (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS);
    172         info.replaced = !!(mTextureMachine.replaced & mask);
    173         info.iterated = (!info.replaced && (info.inDest || info.needed));
    174         info.smooth =   mSmooth && info.iterated;
    175         info.fog =      mFog && info.inDest && (i != GGLFormat::ALPHA);
    176         info.blend =    (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
    177 
    178         mBlending |= (info.blend ? mask : 0);
    179         mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0;
    180         fbComponents |= mCbFormat.c[i].h ? mask : 0;
    181     }
    182 
    183     mAllMasked = (mMasking == fbComponents);
    184     if (mAllMasked) {
    185         mDithering = 0;
    186     }
    187 
    188     fragment_parts_t parts;
    189 
    190     // ------------------------------------------------------------------------
    191     prolog();
    192     // ------------------------------------------------------------------------
    193 
    194     build_scanline_prolog(parts, needs);
    195 
    196     if (registerFile().status())
    197         return registerFile().status();
    198 
    199     // ------------------------------------------------------------------------
    200     label("fragment_loop");
    201     // ------------------------------------------------------------------------
    202     {
    203         Scratch regs(registerFile());
    204 
    205         if (mDithering) {
    206             // update the dither index.
    207             MOV(AL, 0, parts.count.reg,
    208                     reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT));
    209             ADD(AL, 0, parts.count.reg, parts.count.reg,
    210                     imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT)));
    211             MOV(AL, 0, parts.count.reg,
    212                     reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT));
    213         }
    214 
    215         // XXX: could we do an early alpha-test here in some cases?
    216         // It would probaly be used only with smooth-alpha and no texture
    217         // (or no alpha component in the texture).
    218 
    219         // Early z-test
    220         if (mAlphaTest==GGL_ALWAYS) {
    221             build_depth_test(parts, Z_TEST|Z_WRITE);
    222         } else {
    223             // we cannot do the z-write here, because
    224             // it might be killed by the alpha-test later
    225             build_depth_test(parts, Z_TEST);
    226         }
    227 
    228         { // texture coordinates
    229             Scratch scratches(registerFile());
    230 
    231             // texel generation
    232             build_textures(parts, regs);
    233         }
    234 
    235         if ((blending & (FACTOR_DST|BLEND_DST)) ||
    236                 (mMasking && !mAllMasked) ||
    237                 (mLogicOp & LOGIC_OP_DST))
    238         {
    239             // blending / logic_op / masking need the framebuffer
    240             mDstPixel.setTo(regs.obtain(), &mCbFormat);
    241 
    242             // load the framebuffer pixel
    243             comment("fetch color-buffer");
    244             load(parts.cbPtr, mDstPixel);
    245         }
    246 
    247         if (registerFile().status())
    248             return registerFile().status();
    249 
    250         pixel_t pixel;
    251         int directTex = mTextureMachine.directTexture;
    252         if (directTex | parts.packed) {
    253             // note: we can't have both here
    254             // iterated color or direct texture
    255             pixel = directTex ? parts.texel[directTex-1] : parts.iterated;
    256             pixel.flags &= ~CORRUPTIBLE;
    257         } else {
    258             if (mDithering) {
    259                 const int ctxtReg = mBuilderContext.Rctx;
    260                 const int mask = GGL_DITHER_SIZE-1;
    261                 parts.dither = reg_t(regs.obtain());
    262                 AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask));
    263                 ADD(AL, 0, parts.dither.reg, parts.dither.reg, ctxtReg);
    264                 LDRB(AL, parts.dither.reg, parts.dither.reg,
    265                         immed12_pre(GGL_OFFSETOF(ditherMatrix)));
    266             }
    267 
    268             // allocate a register for the resulting pixel
    269             pixel.setTo(regs.obtain(), &mCbFormat, FIRST);
    270 
    271             build_component(pixel, parts, GGLFormat::ALPHA,    regs);
    272 
    273             if (mAlphaTest!=GGL_ALWAYS) {
    274                 // only handle the z-write part here. We know z-test
    275                 // was successful, as well as alpha-test.
    276                 build_depth_test(parts, Z_WRITE);
    277             }
    278 
    279             build_component(pixel, parts, GGLFormat::RED,      regs);
    280             build_component(pixel, parts, GGLFormat::GREEN,    regs);
    281             build_component(pixel, parts, GGLFormat::BLUE,     regs);
    282 
    283             pixel.flags |= CORRUPTIBLE;
    284         }
    285 
    286         if (registerFile().status())
    287             return registerFile().status();
    288 
    289         if (pixel.reg == -1) {
    290             // be defensive here. if we're here it's probably
    291             // that this whole fragment is a no-op.
    292             pixel = mDstPixel;
    293         }
    294 
    295         if (!mAllMasked) {
    296             // logic operation
    297             build_logic_op(pixel, regs);
    298 
    299             // masking
    300             build_masking(pixel, regs);
    301 
    302             comment("store");
    303             store(parts.cbPtr, pixel, WRITE_BACK);
    304         }
    305     }
    306 
    307     if (registerFile().status())
    308         return registerFile().status();
    309 
    310     // update the iterated color...
    311     if (parts.reload != 3) {
    312         build_smooth_shade(parts);
    313     }
    314 
    315     // update iterated z
    316     build_iterate_z(parts);
    317 
    318     // update iterated fog
    319     build_iterate_f(parts);
    320 
    321     SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
    322     B(PL, "fragment_loop");
    323     label("epilog");
    324     epilog(registerFile().touched());
    325 
    326     if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) {
    327         if (mDepthTest!=GGL_ALWAYS) {
    328             label("discard_before_textures");
    329             build_iterate_texture_coordinates(parts);
    330         }
    331         label("discard_after_textures");
    332         build_smooth_shade(parts);
    333         build_iterate_z(parts);
    334         build_iterate_f(parts);
    335         if (!mAllMasked) {
    336             ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3));
    337         }
    338         SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16));
    339         B(PL, "fragment_loop");
    340         epilog(registerFile().touched());
    341     }
    342 
    343     return registerFile().status();
    344 }
    345 
    346 // ---------------------------------------------------------------------------
    347 
    348 void GGLAssembler::build_scanline_prolog(
    349     fragment_parts_t& parts, const needs_t& needs)
    350 {
    351     Scratch scratches(registerFile());
    352     int Rctx = mBuilderContext.Rctx;
    353 
    354     // compute count
    355     comment("compute ct (# of pixels to process)");
    356     parts.count.setTo(obtainReg());
    357     int Rx = scratches.obtain();
    358     int Ry = scratches.obtain();
    359     CONTEXT_LOAD(Rx, iterators.xl);
    360     CONTEXT_LOAD(parts.count.reg, iterators.xr);
    361     CONTEXT_LOAD(Ry, iterators.y);
    362 
    363     // parts.count = iterators.xr - Rx
    364     SUB(AL, 0, parts.count.reg, parts.count.reg, Rx);
    365     SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1));
    366 
    367     if (mDithering) {
    368         // parts.count.reg = 0xNNNNXXDD
    369         // NNNN = count-1
    370         // DD   = dither offset
    371         // XX   = 0xxxxxxx (x = garbage)
    372         Scratch scratches(registerFile());
    373         int tx = scratches.obtain();
    374         int ty = scratches.obtain();
    375         AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK));
    376         AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK));
    377         ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT));
    378         ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16));
    379     } else {
    380         // parts.count.reg = 0xNNNN0000
    381         // NNNN = count-1
    382         MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16));
    383     }
    384 
    385     if (!mAllMasked) {
    386         // compute dst ptr
    387         comment("compute color-buffer pointer");
    388         const int cb_bits = mCbFormat.size*8;
    389         int Rs = scratches.obtain();
    390         parts.cbPtr.setTo(obtainReg(), cb_bits);
    391         CONTEXT_LOAD(Rs, state.buffers.color.stride);
    392         CONTEXT_LOAD(parts.cbPtr.reg, state.buffers.color.data);
    393         SMLABB(AL, Rs, Ry, Rs, Rx);  // Rs = Rx + Ry*Rs
    394         base_offset(parts.cbPtr, parts.cbPtr, Rs);
    395         scratches.recycle(Rs);
    396     }
    397 
    398     // init fog
    399     const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p);
    400     if (need_fog) {
    401         comment("compute initial fog coordinate");
    402         Scratch scratches(registerFile());
    403         int dfdx = scratches.obtain();
    404         int ydfdy = scratches.obtain();
    405         int f = ydfdy;
    406         CONTEXT_LOAD(dfdx,  generated_vars.dfdx);
    407         CONTEXT_LOAD(ydfdy, iterators.ydfdy);
    408         MLA(AL, 0, f, Rx, dfdx, ydfdy);
    409         CONTEXT_STORE(f, generated_vars.f);
    410     }
    411 
    412     // init Z coordinate
    413     if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
    414         parts.z = reg_t(obtainReg());
    415         comment("compute initial Z coordinate");
    416         Scratch scratches(registerFile());
    417         int dzdx = scratches.obtain();
    418         int ydzdy = parts.z.reg;
    419         CONTEXT_LOAD(dzdx,  generated_vars.dzdx);   // 1.31 fixed-point
    420         CONTEXT_LOAD(ydzdy, iterators.ydzdy);       // 1.31 fixed-point
    421         MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy);
    422 
    423         // we're going to index zbase of parts.count
    424         // zbase = base + (xl-count + stride*y)*2
    425         int Rs = dzdx;
    426         int zbase = scratches.obtain();
    427         CONTEXT_LOAD(Rs, state.buffers.depth.stride);
    428         CONTEXT_LOAD(zbase, state.buffers.depth.data);
    429         SMLABB(AL, Rs, Ry, Rs, Rx);
    430         ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16));
    431         ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1));
    432         CONTEXT_STORE(zbase, generated_vars.zbase);
    433     }
    434 
    435     // init texture coordinates
    436     init_textures(parts.coords, reg_t(Rx), reg_t(Ry));
    437     scratches.recycle(Ry);
    438 
    439     // iterated color
    440     init_iterated_color(parts, reg_t(Rx));
    441 
    442     // init coverage factor application (anti-aliasing)
    443     if (mAA) {
    444         parts.covPtr.setTo(obtainReg(), 16);
    445         CONTEXT_LOAD(parts.covPtr.reg, state.buffers.coverage);
    446         ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1));
    447     }
    448 }
    449 
    450 // ---------------------------------------------------------------------------
    451 
    452 void GGLAssembler::build_component( pixel_t& pixel,
    453                                     const fragment_parts_t& parts,
    454                                     int component,
    455                                     Scratch& regs)
    456 {
    457     static char const * comments[] = {"alpha", "red", "green", "blue"};
    458     comment(comments[component]);
    459 
    460     // local register file
    461     Scratch scratches(registerFile());
    462     const int dst_component_size = pixel.component_size(component);
    463 
    464     component_t temp(-1);
    465     build_incoming_component( temp, dst_component_size,
    466             parts, component, scratches, regs);
    467 
    468     if (mInfo[component].inDest) {
    469 
    470         // blending...
    471         build_blending( temp, mDstPixel, component, scratches );
    472 
    473         // downshift component and rebuild pixel...
    474         downshift(pixel, component, temp, parts.dither);
    475     }
    476 }
    477 
    478 void GGLAssembler::build_incoming_component(
    479                                     component_t& temp,
    480                                     int dst_size,
    481                                     const fragment_parts_t& parts,
    482                                     int component,
    483                                     Scratch& scratches,
    484                                     Scratch& global_regs)
    485 {
    486     const uint32_t component_mask = 1<<component;
    487 
    488     // Figure out what we need for the blending stage...
    489     int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc;
    490     int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst;
    491     if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) {
    492         fs = GGL_ONE;
    493     }
    494 
    495     // Figure out what we need to extract and for what reason
    496     const int blending = blending_codes(fs, fd);
    497 
    498     // Are we actually going to blend?
    499     const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO));
    500 
    501     // expand the source if the destination has more bits
    502     int need_expander = false;
    503     for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) {
    504         texture_unit_t& tmu = mTextureMachine.tmu[i];
    505         if ((tmu.format_idx) &&
    506             (parts.texel[i].component_size(component) < dst_size)) {
    507             need_expander = true;
    508         }
    509     }
    510 
    511     // do we need to extract this component?
    512     const bool multiTexture = mTextureMachine.activeUnits > 1;
    513     const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) &&
    514                                         (isAlphaSourceNeeded());
    515     int need_extract = mInfo[component].needed;
    516     if (mInfo[component].inDest)
    517     {
    518         need_extract |= ((need_blending ?
    519                 (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander));
    520         need_extract |= (mTextureMachine.mask != mTextureMachine.replaced);
    521         need_extract |= mInfo[component].smooth;
    522         need_extract |= mInfo[component].fog;
    523         need_extract |= mDithering;
    524         need_extract |= multiTexture;
    525     }
    526 
    527     if (need_extract) {
    528         Scratch& regs = blend_needs_alpha_source ? global_regs : scratches;
    529         component_t fragment;
    530 
    531         // iterated color
    532         build_iterated_color(fragment, parts, component, regs);
    533 
    534         // texture environement (decal, modulate, replace)
    535         build_texture_environment(fragment, parts, component, regs);
    536 
    537         // expand the source if the destination has more bits
    538         if (need_expander && (fragment.size() < dst_size)) {
    539             // we're here only if we fetched a texel
    540             // (so we know for sure fragment is CORRUPTIBLE)
    541             expand(fragment, fragment, dst_size);
    542         }
    543 
    544         // We have a few specific things to do for the alpha-channel
    545         if ((component==GGLFormat::ALPHA) &&
    546             (mInfo[component].needed || fragment.size()<dst_size))
    547         {
    548             // convert to integer_t first and make sure
    549             // we don't corrupt a needed register
    550             if (fragment.l) {
    551                 component_t incoming(fragment);
    552                 modify(fragment, regs);
    553                 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l));
    554                 fragment.h -= fragment.l;
    555                 fragment.l = 0;
    556             }
    557 
    558             // coverage factor application
    559             build_coverage_application(fragment, parts, regs);
    560 
    561             // alpha-test
    562             build_alpha_test(fragment, parts);
    563 
    564             if (blend_needs_alpha_source) {
    565                 // We keep only 8 bits for the blending stage
    566                 const int shift = fragment.h <= 8 ? 0 : fragment.h-8;
    567                 if (fragment.flags & CORRUPTIBLE) {
    568                     fragment.flags &= ~CORRUPTIBLE;
    569                     mAlphaSource.setTo(fragment.reg,
    570                             fragment.size(), fragment.flags);
    571                     if (shift) {
    572                         MOV(AL, 0, mAlphaSource.reg,
    573                             reg_imm(mAlphaSource.reg, LSR, shift));
    574                     }
    575                 } else {
    576                     // XXX: it would better to do this in build_blend_factor()
    577                     // so we can avoid the extra MOV below.
    578                     mAlphaSource.setTo(regs.obtain(),
    579                             fragment.size(), CORRUPTIBLE);
    580                     if (shift) {
    581                         MOV(AL, 0, mAlphaSource.reg,
    582                             reg_imm(fragment.reg, LSR, shift));
    583                     } else {
    584                         MOV(AL, 0, mAlphaSource.reg, fragment.reg);
    585                     }
    586                 }
    587                 mAlphaSource.s -= shift;
    588             }
    589         }
    590 
    591         // fog...
    592         build_fog( fragment, component, regs );
    593 
    594         temp = fragment;
    595     } else {
    596         if (mInfo[component].inDest) {
    597             // extraction not needed and replace
    598             // we just select the right component
    599             if ((mTextureMachine.replaced & component_mask) == 0) {
    600                 // component wasn't replaced, so use it!
    601                 temp = component_t(parts.iterated, component);
    602             }
    603             for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) {
    604                 const texture_unit_t& tmu = mTextureMachine.tmu[i];
    605                 if ((tmu.mask & component_mask) &&
    606                     ((tmu.replaced & component_mask) == 0)) {
    607                     temp = component_t(parts.texel[i], component);
    608                 }
    609             }
    610         }
    611     }
    612 }
    613 
    614 bool GGLAssembler::isAlphaSourceNeeded() const
    615 {
    616     // XXX: also needed for alpha-test
    617     const int bs = mBlendSrc;
    618     const int bd = mBlendDst;
    619     return  bs==GGL_SRC_ALPHA_SATURATE ||
    620             bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA ||
    621             bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ;
    622 }
    623 
    624 // ---------------------------------------------------------------------------
    625 
    626 void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts)
    627 {
    628     if (mSmooth && !parts.iterated_packed) {
    629         // update the iterated color in a pipelined way...
    630         comment("update iterated color");
    631         Scratch scratches(registerFile());
    632 
    633         const int reload = parts.reload;
    634         for (int i=0 ; i<4 ; i++) {
    635             if (!mInfo[i].iterated)
    636                 continue;
    637 
    638             int c = parts.argb[i].reg;
    639             int dx = parts.argb_dx[i].reg;
    640 
    641             if (reload & 1) {
    642                 c = scratches.obtain();
    643                 CONTEXT_LOAD(c, generated_vars.argb[i].c);
    644             }
    645             if (reload & 2) {
    646                 dx = scratches.obtain();
    647                 CONTEXT_LOAD(dx, generated_vars.argb[i].dx);
    648             }
    649 
    650             if (mSmooth) {
    651                 ADD(AL, 0, c, c, dx);
    652             }
    653 
    654             if (reload & 1) {
    655                 CONTEXT_STORE(c, generated_vars.argb[i].c);
    656                 scratches.recycle(c);
    657             }
    658             if (reload & 2) {
    659                 scratches.recycle(dx);
    660             }
    661         }
    662     }
    663 }
    664 
    665 // ---------------------------------------------------------------------------
    666 
    667 void GGLAssembler::build_coverage_application(component_t& fragment,
    668         const fragment_parts_t& parts, Scratch& regs)
    669 {
    670     // here fragment.l is guarenteed to be 0
    671     if (mAA) {
    672         // coverages are 1.15 fixed-point numbers
    673         comment("coverage application");
    674 
    675         component_t incoming(fragment);
    676         modify(fragment, regs);
    677 
    678         Scratch scratches(registerFile());
    679         int cf = scratches.obtain();
    680         LDRH(AL, cf, parts.covPtr.reg, immed8_post(2));
    681         if (fragment.h > 31) {
    682             fragment.h--;
    683             SMULWB(AL, fragment.reg, incoming.reg, cf);
    684         } else {
    685             MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1));
    686             SMULWB(AL, fragment.reg, fragment.reg, cf);
    687         }
    688     }
    689 }
    690 
    691 // ---------------------------------------------------------------------------
    692 
    693 void GGLAssembler::build_alpha_test(component_t& fragment,
    694                                     const fragment_parts_t& parts)
    695 {
    696     if (mAlphaTest != GGL_ALWAYS) {
    697         comment("Alpha Test");
    698         Scratch scratches(registerFile());
    699         int ref = scratches.obtain();
    700         const int shift = GGL_COLOR_BITS-fragment.size();
    701         CONTEXT_LOAD(ref, state.alpha_test.ref);
    702         if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift));
    703         else       CMP(AL, fragment.reg, ref);
    704         int cc = NV;
    705         switch (mAlphaTest) {
    706         case GGL_NEVER:     cc = NV;    break;
    707         case GGL_LESS:      cc = LT;    break;
    708         case GGL_EQUAL:     cc = EQ;    break;
    709         case GGL_LEQUAL:    cc = LS;    break;
    710         case GGL_GREATER:   cc = HI;    break;
    711         case GGL_NOTEQUAL:  cc = NE;    break;
    712         case GGL_GEQUAL:    cc = HS;    break;
    713         }
    714         B(cc^1, "discard_after_textures");
    715     }
    716 }
    717 
    718 // ---------------------------------------------------------------------------
    719 
    720 void GGLAssembler::build_depth_test(
    721         const fragment_parts_t& parts, uint32_t mask)
    722 {
    723     mask &= Z_TEST|Z_WRITE;
    724     const needs_t& needs = mBuilderContext.needs;
    725     const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p);
    726     Scratch scratches(registerFile());
    727 
    728     if (mDepthTest != GGL_ALWAYS || zmask) {
    729         int cc=AL, ic=AL;
    730         switch (mDepthTest) {
    731         case GGL_LESS:      ic = HI;    break;
    732         case GGL_EQUAL:     ic = EQ;    break;
    733         case GGL_LEQUAL:    ic = HS;    break;
    734         case GGL_GREATER:   ic = LT;    break;
    735         case GGL_NOTEQUAL:  ic = NE;    break;
    736         case GGL_GEQUAL:    ic = LS;    break;
    737         case GGL_NEVER:
    738             // this never happens, because it's taken care of when
    739             // computing the needs. but we keep it for completness.
    740             comment("Depth Test (NEVER)");
    741             B(AL, "discard_before_textures");
    742             return;
    743         case GGL_ALWAYS:
    744             // we're here because zmask is enabled
    745             mask &= ~Z_TEST;    // test always passes.
    746             break;
    747         }
    748 
    749         // inverse the condition
    750         cc = ic^1;
    751 
    752         if ((mask & Z_WRITE) && !zmask) {
    753             mask &= ~Z_WRITE;
    754         }
    755 
    756         if (!mask)
    757             return;
    758 
    759         comment("Depth Test");
    760 
    761         int zbase = scratches.obtain();
    762         int depth = scratches.obtain();
    763         int z = parts.z.reg;
    764 
    765         CONTEXT_LOAD(zbase, generated_vars.zbase);  // stall
    766         SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15));
    767             // above does zbase = zbase + ((count >> 16) << 1)
    768 
    769         if (mask & Z_TEST) {
    770             LDRH(AL, depth, zbase);  // stall
    771             CMP(AL, depth, reg_imm(z, LSR, 16));
    772             B(cc, "discard_before_textures");
    773         }
    774         if (mask & Z_WRITE) {
    775             if (mask == Z_WRITE) {
    776                 // only z-write asked, cc is meaningless
    777                 ic = AL;
    778             }
    779             MOV(AL, 0, depth, reg_imm(z, LSR, 16));
    780             STRH(ic, depth, zbase);
    781         }
    782     }
    783 }
    784 
    785 void GGLAssembler::build_iterate_z(const fragment_parts_t& parts)
    786 {
    787     const needs_t& needs = mBuilderContext.needs;
    788     if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) {
    789         Scratch scratches(registerFile());
    790         int dzdx = scratches.obtain();
    791         CONTEXT_LOAD(dzdx, generated_vars.dzdx);    // stall
    792         ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx);
    793     }
    794 }
    795 
    796 void GGLAssembler::build_iterate_f(const fragment_parts_t& parts)
    797 {
    798     const needs_t& needs = mBuilderContext.needs;
    799     if (GGL_READ_NEEDS(P_FOG, needs.p)) {
    800         Scratch scratches(registerFile());
    801         int dfdx = scratches.obtain();
    802         int f = scratches.obtain();
    803         CONTEXT_LOAD(f,     generated_vars.f);
    804         CONTEXT_LOAD(dfdx,  generated_vars.dfdx);   // stall
    805         ADD(AL, 0, f, f, dfdx);
    806         CONTEXT_STORE(f,    generated_vars.f);
    807     }
    808 }
    809 
    810 // ---------------------------------------------------------------------------
    811 
    812 void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs)
    813 {
    814     const needs_t& needs = mBuilderContext.needs;
    815     const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
    816     if (opcode == GGL_COPY)
    817         return;
    818 
    819     comment("logic operation");
    820 
    821     pixel_t s(pixel);
    822     if (!(pixel.flags & CORRUPTIBLE)) {
    823         pixel.reg = regs.obtain();
    824         pixel.flags |= CORRUPTIBLE;
    825     }
    826 
    827     pixel_t d(mDstPixel);
    828     switch(opcode) {
    829     case GGL_CLEAR:         MOV(AL, 0, pixel.reg, imm(0));          break;
    830     case GGL_AND:           AND(AL, 0, pixel.reg, s.reg, d.reg);    break;
    831     case GGL_AND_REVERSE:   BIC(AL, 0, pixel.reg, s.reg, d.reg);    break;
    832     case GGL_COPY:                                                  break;
    833     case GGL_AND_INVERTED:  BIC(AL, 0, pixel.reg, d.reg, s.reg);    break;
    834     case GGL_NOOP:          MOV(AL, 0, pixel.reg, d.reg);           break;
    835     case GGL_XOR:           EOR(AL, 0, pixel.reg, s.reg, d.reg);    break;
    836     case GGL_OR:            ORR(AL, 0, pixel.reg, s.reg, d.reg);    break;
    837     case GGL_NOR:           ORR(AL, 0, pixel.reg, s.reg, d.reg);
    838                             MVN(AL, 0, pixel.reg, pixel.reg);       break;
    839     case GGL_EQUIV:         EOR(AL, 0, pixel.reg, s.reg, d.reg);
    840                             MVN(AL, 0, pixel.reg, pixel.reg);       break;
    841     case GGL_INVERT:        MVN(AL, 0, pixel.reg, d.reg);           break;
    842     case GGL_OR_REVERSE:    // s | ~d == ~(~s & d)
    843                             BIC(AL, 0, pixel.reg, d.reg, s.reg);
    844                             MVN(AL, 0, pixel.reg, pixel.reg);       break;
    845     case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg);           break;
    846     case GGL_OR_INVERTED:   // ~s | d == ~(s & ~d)
    847                             BIC(AL, 0, pixel.reg, s.reg, d.reg);
    848                             MVN(AL, 0, pixel.reg, pixel.reg);       break;
    849     case GGL_NAND:          AND(AL, 0, pixel.reg, s.reg, d.reg);
    850                             MVN(AL, 0, pixel.reg, pixel.reg);       break;
    851     case GGL_SET:           MVN(AL, 0, pixel.reg, imm(0));          break;
    852     };
    853 }
    854 
    855 // ---------------------------------------------------------------------------
    856 
    857 static uint32_t find_bottom(uint32_t val)
    858 {
    859     uint32_t i = 0;
    860     while (!(val & (3<<i)))
    861         i+= 2;
    862     return i;
    863 }
    864 
    865 static void normalize(uint32_t& val, uint32_t& rot)
    866 {
    867     rot = 0;
    868     while (!(val&3)  || (val & 0xFC000000)) {
    869         uint32_t newval;
    870         newval = val >> 2;
    871         newval |= (val&3) << 30;
    872         val = newval;
    873         rot += 2;
    874         if (rot == 32) {
    875             rot = 0;
    876             break;
    877         }
    878     }
    879 }
    880 
    881 void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits)
    882 {
    883     uint32_t rot;
    884     uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1;
    885     mask &= size;
    886 
    887     if (mask == size) {
    888         if (d != s)
    889             MOV( AL, 0, d, s);
    890         return;
    891     }
    892 
    893     int negative_logic = !isValidImmediate(mask);
    894     if (negative_logic) {
    895         mask = ~mask & size;
    896     }
    897     normalize(mask, rot);
    898 
    899     if (mask) {
    900         while (mask) {
    901             uint32_t bitpos = find_bottom(mask);
    902             int shift = rot + bitpos;
    903             uint32_t m = mask & (0xff << bitpos);
    904             mask &= ~m;
    905             m >>= bitpos;
    906             int32_t newMask =  (m<<shift) | (m>>(32-shift));
    907             if (!negative_logic) {
    908                 AND( AL, 0, d, s, imm(newMask) );
    909             } else {
    910                 BIC( AL, 0, d, s, imm(newMask) );
    911             }
    912             s = d;
    913         }
    914     } else {
    915         MOV( AL, 0, d, imm(0));
    916     }
    917 }
    918 
    919 void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs)
    920 {
    921     if (!mMasking || mAllMasked) {
    922         return;
    923     }
    924 
    925     comment("color mask");
    926 
    927     pixel_t fb(mDstPixel);
    928     pixel_t s(pixel);
    929     if (!(pixel.flags & CORRUPTIBLE)) {
    930         pixel.reg = regs.obtain();
    931         pixel.flags |= CORRUPTIBLE;
    932     }
    933 
    934     int mask = 0;
    935     for (int i=0 ; i<4 ; i++) {
    936         const int component_mask = 1<<i;
    937         const int h = fb.format.c[i].h;
    938         const int l = fb.format.c[i].l;
    939         if (h && (!(mMasking & component_mask))) {
    940             mask |= ((1<<(h-l))-1) << l;
    941         }
    942     }
    943 
    944     // There is no need to clear the masked components of the source
    945     // (unless we applied a logic op), because they're already zeroed
    946     // by construction (masked components are not computed)
    947 
    948     if (mLogicOp) {
    949         const needs_t& needs = mBuilderContext.needs;
    950         const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR;
    951         if (opcode != GGL_CLEAR) {
    952             // clear masked component of source
    953             build_and_immediate(pixel.reg, s.reg, mask, fb.size());
    954             s = pixel;
    955         }
    956     }
    957 
    958     // clear non masked components of destination
    959     build_and_immediate(fb.reg, fb.reg, ~mask, fb.size());
    960 
    961     // or back the channels that were masked
    962     if (s.reg == fb.reg) {
    963          // this is in fact a MOV
    964         if (s.reg == pixel.reg) {
    965             // ugh. this in in fact a nop
    966         } else {
    967             MOV(AL, 0, pixel.reg, fb.reg);
    968         }
    969     } else {
    970         ORR(AL, 0, pixel.reg, s.reg, fb.reg);
    971     }
    972 }
    973 
    974 // ---------------------------------------------------------------------------
    975 
    976 void GGLAssembler::base_offset(
    977         const pointer_t& d, const pointer_t& b, const reg_t& o)
    978 {
    979     switch (b.size) {
    980     case 32:
    981         ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2));
    982         break;
    983     case 24:
    984         if (d.reg == b.reg) {
    985             ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
    986             ADD(AL, 0, d.reg, d.reg, o.reg);
    987         } else {
    988             ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1));
    989             ADD(AL, 0, d.reg, d.reg, b.reg);
    990         }
    991         break;
    992     case 16:
    993         ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1));
    994         break;
    995     case 8:
    996         ADD(AL, 0, d.reg, b.reg, o.reg);
    997         break;
    998     }
    999 }
   1000 
   1001 // ----------------------------------------------------------------------------
   1002 // cheezy register allocator...
   1003 // ----------------------------------------------------------------------------
   1004 
   1005 void RegisterAllocator::reset()
   1006 {
   1007     mRegs.reset();
   1008 }
   1009 
   1010 int RegisterAllocator::reserveReg(int reg)
   1011 {
   1012     return mRegs.reserve(reg);
   1013 }
   1014 
   1015 int RegisterAllocator::obtainReg()
   1016 {
   1017     return mRegs.obtain();
   1018 }
   1019 
   1020 void RegisterAllocator::recycleReg(int reg)
   1021 {
   1022     mRegs.recycle(reg);
   1023 }
   1024 
   1025 RegisterAllocator::RegisterFile& RegisterAllocator::registerFile()
   1026 {
   1027     return mRegs;
   1028 }
   1029 
   1030 // ----------------------------------------------------------------------------
   1031 
   1032 RegisterAllocator::RegisterFile::RegisterFile()
   1033     : mRegs(0), mTouched(0), mStatus(0)
   1034 {
   1035     reserve(ARMAssemblerInterface::SP);
   1036     reserve(ARMAssemblerInterface::PC);
   1037 }
   1038 
   1039 RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs)
   1040     : mRegs(rhs.mRegs), mTouched(rhs.mTouched)
   1041 {
   1042 }
   1043 
   1044 RegisterAllocator::RegisterFile::~RegisterFile()
   1045 {
   1046 }
   1047 
   1048 bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const
   1049 {
   1050     return (mRegs == rhs.mRegs);
   1051 }
   1052 
   1053 void RegisterAllocator::RegisterFile::reset()
   1054 {
   1055     mRegs = mTouched = mStatus = 0;
   1056     reserve(ARMAssemblerInterface::SP);
   1057     reserve(ARMAssemblerInterface::PC);
   1058 }
   1059 
   1060 int RegisterAllocator::RegisterFile::reserve(int reg)
   1061 {
   1062     LOG_ALWAYS_FATAL_IF(isUsed(reg),
   1063                         "reserving register %d, but already in use",
   1064                         reg);
   1065     mRegs |= (1<<reg);
   1066     mTouched |= mRegs;
   1067     return reg;
   1068 }
   1069 
   1070 void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask)
   1071 {
   1072     mRegs |= regMask;
   1073     mTouched |= regMask;
   1074 }
   1075 
   1076 int RegisterAllocator::RegisterFile::isUsed(int reg) const
   1077 {
   1078     LOG_ALWAYS_FATAL_IF(reg>=16, "invalid register %d", reg);
   1079     return mRegs & (1<<reg);
   1080 }
   1081 
   1082 int RegisterAllocator::RegisterFile::obtain()
   1083 {
   1084     const char priorityList[14] = {  0,  1, 2, 3,
   1085                                     12, 14, 4, 5,
   1086                                      6,  7, 8, 9,
   1087                                     10, 11 };
   1088     const int nbreg = sizeof(priorityList);
   1089     int i, r;
   1090     for (i=0 ; i<nbreg ; i++) {
   1091         r = priorityList[i];
   1092         if (!isUsed(r)) {
   1093             break;
   1094         }
   1095     }
   1096     // this is not an error anymore because, we'll try again with
   1097     // a lower optimization level.
   1098     //ALOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n");
   1099     if (i >= nbreg) {
   1100         mStatus |= OUT_OF_REGISTERS;
   1101         // we return SP so we can more easily debug things
   1102         // the code will never be run anyway.
   1103         return ARMAssemblerInterface::SP;
   1104     }
   1105     reserve(r);
   1106     return r;
   1107 }
   1108 
   1109 bool RegisterAllocator::RegisterFile::hasFreeRegs() const
   1110 {
   1111     return ((mRegs & 0xFFFF) == 0xFFFF) ? false : true;
   1112 }
   1113 
   1114 int RegisterAllocator::RegisterFile::countFreeRegs() const
   1115 {
   1116     int f = ~mRegs & 0xFFFF;
   1117     // now count number of 1
   1118    f = (f & 0x5555) + ((f>>1) & 0x5555);
   1119    f = (f & 0x3333) + ((f>>2) & 0x3333);
   1120    f = (f & 0x0F0F) + ((f>>4) & 0x0F0F);
   1121    f = (f & 0x00FF) + ((f>>8) & 0x00FF);
   1122    return f;
   1123 }
   1124 
   1125 void RegisterAllocator::RegisterFile::recycle(int reg)
   1126 {
   1127     LOG_FATAL_IF(!isUsed(reg),
   1128             "recycling unallocated register %d",
   1129             reg);
   1130     mRegs &= ~(1<<reg);
   1131 }
   1132 
   1133 void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask)
   1134 {
   1135     LOG_FATAL_IF((mRegs & regMask)!=regMask,
   1136             "recycling unallocated registers "
   1137             "(recycle=%08x, allocated=%08x, unallocated=%08x)",
   1138             regMask, mRegs, mRegs&regMask);
   1139     mRegs &= ~regMask;
   1140 }
   1141 
   1142 uint32_t RegisterAllocator::RegisterFile::touched() const
   1143 {
   1144     return mTouched;
   1145 }
   1146 
   1147 // ----------------------------------------------------------------------------
   1148 
   1149 }; // namespace android
   1150 
   1151