Home | History | Annotate | Download | only in codeflinger
      1 /* libs/pixelflinger/codeflinger/load_store.cpp
      2 **
      3 ** Copyright 2006, The Android Open Source Project
      4 **
      5 ** Licensed under the Apache License, Version 2.0 (the "License");
      6 ** you may not use this file except in compliance with the License.
      7 ** You may obtain a copy of the License at
      8 **
      9 **     http://www.apache.org/licenses/LICENSE-2.0
     10 **
     11 ** Unless required by applicable law or agreed to in writing, software
     12 ** distributed under the License is distributed on an "AS IS" BASIS,
     13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 ** See the License for the specific language governing permissions and
     15 ** limitations under the License.
     16 */
     17 
     18 #define LOG_TAG "pixelflinger-code"
     19 
     20 #include <assert.h>
     21 #include <stdio.h>
     22 
     23 #include <log/log.h>
     24 
     25 #include "GGLAssembler.h"
     26 
     27 namespace android {
     28 
     29 // ----------------------------------------------------------------------------
     30 
     31 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
     32 {
     33     const int bits = addr.size;
     34     const int inc = (flags & WRITE_BACK)?1:0;
     35     switch (bits) {
     36     case 32:
     37         if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
     38         else        STR(AL, s.reg, addr.reg);
     39         break;
     40     case 24:
     41         // 24 bits formats are a little special and used only for RGB
     42         // 0x00BBGGRR is unpacked as R,G,B
     43         STRB(AL, s.reg, addr.reg, immed12_pre(0));
     44         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
     45         STRB(AL, s.reg, addr.reg, immed12_pre(1));
     46         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
     47         STRB(AL, s.reg, addr.reg, immed12_pre(2));
     48         if (!(s.flags & CORRUPTIBLE)) {
     49             MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
     50         }
     51         if (inc)
     52             ADD(AL, 0, addr.reg, addr.reg, imm(3));
     53         break;
     54     case 16:
     55         if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
     56         else        STRH(AL, s.reg, addr.reg);
     57         break;
     58     case  8:
     59         if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
     60         else        STRB(AL, s.reg, addr.reg);
     61         break;
     62     }
     63 }
     64 
     65 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
     66 {
     67     Scratch scratches(registerFile());
     68     int s0;
     69 
     70     const int bits = addr.size;
     71     const int inc = (flags & WRITE_BACK)?1:0;
     72     switch (bits) {
     73     case 32:
     74         if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
     75         else        LDR(AL, s.reg, addr.reg);
     76         break;
     77     case 24:
     78         // 24 bits formats are a little special and used only for RGB
     79         // R,G,B is packed as 0x00BBGGRR
     80         s0 = scratches.obtain();
     81         if (s.reg != addr.reg) {
     82             LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
     83             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
     84             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
     85             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
     86             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
     87         } else {
     88             int s1 = scratches.obtain();
     89             LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
     90             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
     91             ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
     92             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
     93             ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
     94         }
     95         if (inc)
     96             ADD(AL, 0, addr.reg, addr.reg, imm(3));
     97         break;
     98     case 16:
     99         if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
    100         else        LDRH(AL, s.reg, addr.reg);
    101         break;
    102     case  8:
    103         if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
    104         else        LDRB(AL, s.reg, addr.reg);
    105         break;
    106     }
    107 }
    108 
    109 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
    110 {
    111     const int maskLen = h-l;
    112 
    113 #ifdef __mips__
    114     assert(maskLen<=11);
    115 #else
    116     assert(maskLen<=8);
    117 #endif
    118     assert(h);
    119 
    120     if (h != bits) {
    121         const int mask = ((1<<maskLen)-1) << l;
    122         if (isValidImmediate(mask)) {
    123             AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
    124         } else if (isValidImmediate(~mask)) {
    125             BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
    126         } else {
    127             MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
    128             l += 32-h;
    129             h = 32;
    130         }
    131         s = d.reg;
    132     }
    133 
    134     if (l) {
    135         MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
    136         s = d.reg;
    137     }
    138 
    139     if (s != d.reg) {
    140         MOV(AL, 0, d.reg, s);
    141     }
    142 
    143     d.s = maskLen;
    144 }
    145 
    146 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
    147 {
    148     extract(d,  s.reg,
    149                 s.format.c[component].h,
    150                 s.format.c[component].l,
    151                 s.size());
    152 }
    153 
    154 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
    155 {
    156     integer_t r(d.reg, 32, d.flags);
    157     extract(r,  s.reg,
    158                 s.format.c[component].h,
    159                 s.format.c[component].l,
    160                 s.size());
    161     d = component_t(r);
    162 }
    163 
    164 
    165 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
    166 {
    167     if (s.l || (s.flags & CLEAR_HI)) {
    168         extract(d, s.reg, s.h, s.l, 32);
    169         expand(d, d, dbits);
    170     } else {
    171         expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
    172     }
    173 }
    174 
    175 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
    176 {
    177     integer_t r(d.reg, 32, d.flags);
    178     expand(r, s, dbits);
    179     d = component_t(r);
    180 }
    181 
    182 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
    183 {
    184     assert(src.size());
    185 
    186     int sbits = src.size();
    187     int s = src.reg;
    188     int d = dst.reg;
    189 
    190     // be sure to set 'dst' after we read 'src' as they may be identical
    191     dst.s = dbits;
    192     dst.flags = 0;
    193 
    194     if (dbits<=sbits) {
    195         if (s != d) {
    196             MOV(AL, 0, d, s);
    197         }
    198         return;
    199     }
    200 
    201     if (sbits == 1) {
    202         RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
    203             // d = (s<<dbits) - s;
    204         return;
    205     }
    206 
    207     if (dbits % sbits) {
    208         MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
    209             // d = s << (dbits-sbits);
    210         dbits -= sbits;
    211         do {
    212             ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
    213                 // d |= d >> sbits;
    214             dbits -= sbits;
    215             sbits *= 2;
    216         } while(dbits>0);
    217         return;
    218     }
    219 
    220     dbits -= sbits;
    221     do {
    222         ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
    223             // d |= d<<sbits;
    224         s = d;
    225         dbits -= sbits;
    226         if (sbits*2 < dbits) {
    227             sbits *= 2;
    228         }
    229     } while(dbits>0);
    230 }
    231 
    232 void GGLAssembler::downshift(
    233         pixel_t& d, int component, component_t s, const reg_t& dither)
    234 {
    235     const needs_t& needs = mBuilderContext.needs;
    236     Scratch scratches(registerFile());
    237 
    238     int sh = s.h;
    239     int sl = s.l;
    240     int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
    241     int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
    242     int sbits = sh - sl;
    243 
    244     int dh = d.format.c[component].h;
    245     int dl = d.format.c[component].l;
    246     int dbits = dh - dl;
    247     int dithering = 0;
    248 
    249     ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
    250 
    251     if (sbits>dbits) {
    252         // see if we need to dither
    253         dithering = mDithering;
    254     }
    255 
    256     int ireg = d.reg;
    257     if (!(d.flags & FIRST)) {
    258         if (s.flags & CORRUPTIBLE)  {
    259             ireg = s.reg;
    260         } else {
    261             ireg = scratches.obtain();
    262         }
    263     }
    264     d.flags &= ~FIRST;
    265 
    266     if (maskHiBits) {
    267         // we need to mask the high bits (and possibly the lowbits too)
    268         // and we might be able to use immediate mask.
    269         if (!dithering) {
    270             // we don't do this if we only have maskLoBits because we can
    271             // do it more efficiently below (in the case where dl=0)
    272             const int offset = sh - dbits;
    273             if (dbits<=8 && offset >= 0) {
    274                 const uint32_t mask = ((1<<dbits)-1) << offset;
    275                 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
    276                     build_and_immediate(ireg, s.reg, mask, 32);
    277                     sl = offset;
    278                     s.reg = ireg;
    279                     sbits = dbits;
    280                     maskLoBits = maskHiBits = 0;
    281                 }
    282             }
    283         } else {
    284             // in the dithering case though, we need to preserve the lower bits
    285             const uint32_t mask = ((1<<sbits)-1) << sl;
    286             if (isValidImmediate(mask) || isValidImmediate(~mask)) {
    287                 build_and_immediate(ireg, s.reg, mask, 32);
    288                 s.reg = ireg;
    289                 maskLoBits = maskHiBits = 0;
    290             }
    291         }
    292     }
    293 
    294     // XXX: we could special case (maskHiBits & !maskLoBits)
    295     // like we do for maskLoBits below, but it happens very rarely
    296     // that we have maskHiBits only and the conditions necessary to lead
    297     // to better code (like doing d |= s << 24)
    298 
    299     if (maskHiBits) {
    300         MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
    301         sl += 32-sh;
    302         sh = 32;
    303         s.reg = ireg;
    304         maskHiBits = 0;
    305     }
    306 
    307     //	Downsampling should be performed as follows:
    308     //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
    309     //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
    310     //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
    311     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
    312     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
    313     //
    314     //	By approximating (1>>dbits) and (1>>sbits) to 0:
    315     //
    316     //		V>>(sbits-dbits)	-	V>>sbits
    317     //
    318 	//  A good approximation is V>>(sbits-dbits),
    319     //  but better one (needed for dithering) is:
    320     //
    321     //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
    322     //		(V<<dbits	-	V)>>sbits
    323     //		(V	-	V>>dbits)>>(sbits-dbits)
    324 
    325     // Dithering is done here
    326     if (dithering) {
    327         comment("dithering");
    328         if (sl) {
    329             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
    330             sh -= sl;
    331             sl = 0;
    332             s.reg = ireg;
    333         }
    334         // scaling (V-V>>dbits)
    335         SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
    336         const int shift = (GGL_DITHER_BITS - (sbits-dbits));
    337         if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
    338         else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
    339         else                ADD(AL, 0, ireg, ireg, dither.reg);
    340         s.reg = ireg;
    341     }
    342 
    343     if ((maskLoBits|dithering) && (sh > dbits)) {
    344         int shift = sh-dbits;
    345         if (dl) {
    346             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
    347             if (ireg == d.reg) {
    348                 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
    349             } else {
    350                 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
    351             }
    352         } else {
    353             if (ireg == d.reg) {
    354                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
    355             } else {
    356                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
    357             }
    358         }
    359     } else {
    360         int shift = sh-dh;
    361         if (shift>0) {
    362             if (ireg == d.reg) {
    363                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
    364             } else {
    365                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
    366             }
    367         } else if (shift<0) {
    368             if (ireg == d.reg) {
    369                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
    370             } else {
    371                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
    372             }
    373         } else {
    374             if (ireg == d.reg) {
    375                 if (s.reg != d.reg) {
    376                     MOV(AL, 0, d.reg, s.reg);
    377                 }
    378             } else {
    379                 ORR(AL, 0, d.reg, d.reg, s.reg);
    380             }
    381         }
    382     }
    383 }
    384 
    385 }; // namespace android
    386