Home | History | Annotate | Download | only in codeflinger
      1 /* libs/pixelflinger/codeflinger/load_store.cpp
      2 **
      3 ** Copyright 2006, The Android Open Source Project
      4 **
      5 ** Licensed under the Apache License, Version 2.0 (the "License");
      6 ** you may not use this file except in compliance with the License.
      7 ** You may obtain a copy of the License at
      8 **
      9 **     http://www.apache.org/licenses/LICENSE-2.0
     10 **
     11 ** Unless required by applicable law or agreed to in writing, software
     12 ** distributed under the License is distributed on an "AS IS" BASIS,
     13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 ** See the License for the specific language governing permissions and
     15 ** limitations under the License.
     16 */
     17 
     18 #define LOG_TAG "pixelflinger-code"
     19 
     20 #include <assert.h>
     21 #include <stdio.h>
     22 
     23 #include <log/log.h>
     24 
     25 #include "GGLAssembler.h"
     26 
     27 namespace android {
     28 
     29 // ----------------------------------------------------------------------------
     30 
     31 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
     32 {
     33     const int bits = addr.size;
     34     const int inc = (flags & WRITE_BACK)?1:0;
     35     switch (bits) {
     36     case 32:
     37         if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
     38         else        STR(AL, s.reg, addr.reg);
     39         break;
     40     case 24:
     41         // 24 bits formats are a little special and used only for RGB
     42         // 0x00BBGGRR is unpacked as R,G,B
     43         STRB(AL, s.reg, addr.reg, immed12_pre(0));
     44         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
     45         STRB(AL, s.reg, addr.reg, immed12_pre(1));
     46         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
     47         STRB(AL, s.reg, addr.reg, immed12_pre(2));
     48         if (!(s.flags & CORRUPTIBLE)) {
     49             MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
     50         }
     51         if (inc)
     52             ADD(AL, 0, addr.reg, addr.reg, imm(3));
     53         break;
     54     case 16:
     55         if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
     56         else        STRH(AL, s.reg, addr.reg);
     57         break;
     58     case  8:
     59         if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
     60         else        STRB(AL, s.reg, addr.reg);
     61         break;
     62     }
     63 }
     64 
     65 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
     66 {
     67     Scratch scratches(registerFile());
     68     int s0;
     69 
     70     const int bits = addr.size;
     71     const int inc = (flags & WRITE_BACK)?1:0;
     72     switch (bits) {
     73     case 32:
     74         if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
     75         else        LDR(AL, s.reg, addr.reg);
     76         break;
     77     case 24:
     78         // 24 bits formats are a little special and used only for RGB
     79         // R,G,B is packed as 0x00BBGGRR
     80         s0 = scratches.obtain();
     81         if (s.reg != addr.reg) {
     82             LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
     83             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
     84             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
     85             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
     86             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
     87         } else {
     88             int s1 = scratches.obtain();
     89             LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
     90             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
     91             ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
     92             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
     93             ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
     94         }
     95         if (inc)
     96             ADD(AL, 0, addr.reg, addr.reg, imm(3));
     97         break;
     98     case 16:
     99         if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
    100         else        LDRH(AL, s.reg, addr.reg);
    101         break;
    102     case  8:
    103         if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
    104         else        LDRB(AL, s.reg, addr.reg);
    105         break;
    106     }
    107 }
    108 
    109 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
    110 {
    111     const int maskLen = h-l;
    112 
    113 #ifdef __mips__
    114     assert(maskLen<=11);
    115 #else
    116     assert(maskLen<=8);
    117 #endif
    118     assert(h);
    119 
    120     if (h != bits) {
    121         const int mask = ((1<<maskLen)-1) << l;
    122         if (isValidImmediate(mask)) {
    123             AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
    124         } else if (isValidImmediate(~mask)) {
    125             BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
    126         } else {
    127             MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
    128             l += 32-h;
    129             h = 32;
    130         }
    131         s = d.reg;
    132     }
    133 
    134     if (l) {
    135         MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
    136         s = d.reg;
    137     }
    138 
    139     if (s != d.reg) {
    140         MOV(AL, 0, d.reg, s);
    141     }
    142 
    143     d.s = maskLen;
    144 }
    145 
    146 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
    147 {
    148     extract(d,  s.reg,
    149                 s.format.c[component].h,
    150                 s.format.c[component].l,
    151                 s.size());
    152 }
    153 
    154 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
    155 {
    156     integer_t r(d.reg, 32, d.flags);
    157     extract(r,  s.reg,
    158                 s.format.c[component].h,
    159                 s.format.c[component].l,
    160                 s.size());
    161     d = component_t(r);
    162 }
    163 
    164 
    165 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
    166 {
    167     if (s.l || (s.flags & CLEAR_HI)) {
    168         extract(d, s.reg, s.h, s.l, 32);
    169         expand(d, d, dbits);
    170     } else {
    171         expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
    172     }
    173 }
    174 
    175 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
    176 {
    177     integer_t r(d.reg, 32, d.flags);
    178     expand(r, s, dbits);
    179     d = component_t(r);
    180 }
    181 
    182 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
    183 {
    184     assert(src.size());
    185 
    186     int sbits = src.size();
    187     int s = src.reg;
    188     int d = dst.reg;
    189 
    190     // be sure to set 'dst' after we read 'src' as they may be identical
    191     dst.s = dbits;
    192     dst.flags = 0;
    193 
    194     if (dbits<=sbits) {
    195         if (s != d) {
    196             MOV(AL, 0, d, s);
    197         }
    198         return;
    199     }
    200 
    201     if (sbits == 1) {
    202         RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
    203             // d = (s<<dbits) - s;
    204         return;
    205     }
    206 
    207     if (dbits % sbits) {
    208         MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
    209             // d = s << (dbits-sbits);
    210         dbits -= sbits;
    211         do {
    212             ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
    213                 // d |= d >> sbits;
    214             dbits -= sbits;
    215             sbits *= 2;
    216         } while(dbits>0);
    217         return;
    218     }
    219 
    220     dbits -= sbits;
    221     do {
    222         ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
    223             // d |= d<<sbits;
    224         s = d;
    225         dbits -= sbits;
    226         if (sbits*2 < dbits) {
    227             sbits *= 2;
    228         }
    229     } while(dbits>0);
    230 }
    231 
    232 void GGLAssembler::downshift(
    233         pixel_t& d, int component, component_t s, const reg_t& dither)
    234 {
    235     Scratch scratches(registerFile());
    236 
    237     int sh = s.h;
    238     int sl = s.l;
    239     int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
    240     int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
    241     int sbits = sh - sl;
    242 
    243     int dh = d.format.c[component].h;
    244     int dl = d.format.c[component].l;
    245     int dbits = dh - dl;
    246     int dithering = 0;
    247 
    248     ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
    249 
    250     if (sbits>dbits) {
    251         // see if we need to dither
    252         dithering = mDithering;
    253     }
    254 
    255     int ireg = d.reg;
    256     if (!(d.flags & FIRST)) {
    257         if (s.flags & CORRUPTIBLE)  {
    258             ireg = s.reg;
    259         } else {
    260             ireg = scratches.obtain();
    261         }
    262     }
    263     d.flags &= ~FIRST;
    264 
    265     if (maskHiBits) {
    266         // we need to mask the high bits (and possibly the lowbits too)
    267         // and we might be able to use immediate mask.
    268         if (!dithering) {
    269             // we don't do this if we only have maskLoBits because we can
    270             // do it more efficiently below (in the case where dl=0)
    271             const int offset = sh - dbits;
    272             if (dbits<=8 && offset >= 0) {
    273                 const uint32_t mask = ((1<<dbits)-1) << offset;
    274                 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
    275                     build_and_immediate(ireg, s.reg, mask, 32);
    276                     sl = offset;
    277                     s.reg = ireg;
    278                     sbits = dbits;
    279                     maskLoBits = maskHiBits = 0;
    280                 }
    281             }
    282         } else {
    283             // in the dithering case though, we need to preserve the lower bits
    284             const uint32_t mask = ((1<<sbits)-1) << sl;
    285             if (isValidImmediate(mask) || isValidImmediate(~mask)) {
    286                 build_and_immediate(ireg, s.reg, mask, 32);
    287                 s.reg = ireg;
    288                 maskLoBits = maskHiBits = 0;
    289             }
    290         }
    291     }
    292 
    293     // XXX: we could special case (maskHiBits & !maskLoBits)
    294     // like we do for maskLoBits below, but it happens very rarely
    295     // that we have maskHiBits only and the conditions necessary to lead
    296     // to better code (like doing d |= s << 24)
    297 
    298     if (maskHiBits) {
    299         MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
    300         sl += 32-sh;
    301         sh = 32;
    302         s.reg = ireg;
    303         maskHiBits = 0;
    304     }
    305 
    306     //	Downsampling should be performed as follows:
    307     //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
    308     //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
    309     //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
    310     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
    311     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
    312     //
    313     //	By approximating (1>>dbits) and (1>>sbits) to 0:
    314     //
    315     //		V>>(sbits-dbits)	-	V>>sbits
    316     //
    317 	//  A good approximation is V>>(sbits-dbits),
    318     //  but better one (needed for dithering) is:
    319     //
    320     //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
    321     //		(V<<dbits	-	V)>>sbits
    322     //		(V	-	V>>dbits)>>(sbits-dbits)
    323 
    324     // Dithering is done here
    325     if (dithering) {
    326         comment("dithering");
    327         if (sl) {
    328             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
    329             sh -= sl;
    330             sl = 0;
    331             s.reg = ireg;
    332         }
    333         // scaling (V-V>>dbits)
    334         SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
    335         const int shift = (GGL_DITHER_BITS - (sbits-dbits));
    336         if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
    337         else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
    338         else                ADD(AL, 0, ireg, ireg, dither.reg);
    339         s.reg = ireg;
    340     }
    341 
    342     if ((maskLoBits|dithering) && (sh > dbits)) {
    343         int shift = sh-dbits;
    344         if (dl) {
    345             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
    346             if (ireg == d.reg) {
    347                 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
    348             } else {
    349                 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
    350             }
    351         } else {
    352             if (ireg == d.reg) {
    353                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
    354             } else {
    355                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
    356             }
    357         }
    358     } else {
    359         int shift = sh-dh;
    360         if (shift>0) {
    361             if (ireg == d.reg) {
    362                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
    363             } else {
    364                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
    365             }
    366         } else if (shift<0) {
    367             if (ireg == d.reg) {
    368                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
    369             } else {
    370                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
    371             }
    372         } else {
    373             if (ireg == d.reg) {
    374                 if (s.reg != d.reg) {
    375                     MOV(AL, 0, d.reg, s.reg);
    376                 }
    377             } else {
    378                 ORR(AL, 0, d.reg, d.reg, s.reg);
    379             }
    380         }
    381     }
    382 }
    383 
    384 }; // namespace android
    385