Home | History | Annotate | Download | only in codeflinger
      1 /* libs/pixelflinger/codeflinger/load_store.cpp
      2 **
      3 ** Copyright 2006, The Android Open Source Project
      4 **
      5 ** Licensed under the Apache License, Version 2.0 (the "License");
      6 ** you may not use this file except in compliance with the License.
      7 ** You may obtain a copy of the License at
      8 **
      9 **     http://www.apache.org/licenses/LICENSE-2.0
     10 **
     11 ** Unless required by applicable law or agreed to in writing, software
     12 ** distributed under the License is distributed on an "AS IS" BASIS,
     13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 ** See the License for the specific language governing permissions and
     15 ** limitations under the License.
     16 */
     17 
     18 #include <assert.h>
     19 #include <stdio.h>
     20 #include <cutils/log.h>
     21 #include "GGLAssembler.h"
     22 
     23 namespace android {
     24 
     25 // ----------------------------------------------------------------------------
     26 
     27 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
     28 {
     29     const int bits = addr.size;
     30     const int inc = (flags & WRITE_BACK)?1:0;
     31     switch (bits) {
     32     case 32:
     33         if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
     34         else        STR(AL, s.reg, addr.reg);
     35         break;
     36     case 24:
     37         // 24 bits formats are a little special and used only for RGB
     38         // 0x00BBGGRR is unpacked as R,G,B
     39         STRB(AL, s.reg, addr.reg, immed12_pre(0));
     40         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
     41         STRB(AL, s.reg, addr.reg, immed12_pre(1));
     42         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
     43         STRB(AL, s.reg, addr.reg, immed12_pre(2));
     44         if (!(s.flags & CORRUPTIBLE)) {
     45             MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
     46         }
     47         if (inc)
     48             ADD(AL, 0, addr.reg, addr.reg, imm(3));
     49         break;
     50     case 16:
     51         if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
     52         else        STRH(AL, s.reg, addr.reg);
     53         break;
     54     case  8:
     55         if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
     56         else        STRB(AL, s.reg, addr.reg);
     57         break;
     58     }
     59 }
     60 
     61 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
     62 {
     63     Scratch scratches(registerFile());
     64     int s0;
     65 
     66     const int bits = addr.size;
     67     const int inc = (flags & WRITE_BACK)?1:0;
     68     switch (bits) {
     69     case 32:
     70         if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
     71         else        LDR(AL, s.reg, addr.reg);
     72         break;
     73     case 24:
     74         // 24 bits formats are a little special and used only for RGB
     75         // R,G,B is packed as 0x00BBGGRR
     76         s0 = scratches.obtain();
     77         if (s.reg != addr.reg) {
     78             LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
     79             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
     80             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
     81             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
     82             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
     83         } else {
     84             int s1 = scratches.obtain();
     85             LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
     86             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
     87             ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
     88             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
     89             ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
     90         }
     91         if (inc)
     92             ADD(AL, 0, addr.reg, addr.reg, imm(3));
     93         break;
     94     case 16:
     95         if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
     96         else        LDRH(AL, s.reg, addr.reg);
     97         break;
     98     case  8:
     99         if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
    100         else        LDRB(AL, s.reg, addr.reg);
    101         break;
    102     }
    103 }
    104 
    105 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
    106 {
    107     const int maskLen = h-l;
    108 
    109 #ifdef __mips__
    110     assert(maskLen<=11);
    111 #else
    112     assert(maskLen<=8);
    113 #endif
    114     assert(h);
    115 
    116     if (h != bits) {
    117         const int mask = ((1<<maskLen)-1) << l;
    118         if (isValidImmediate(mask)) {
    119             AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
    120         } else if (isValidImmediate(~mask)) {
    121             BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
    122         } else {
    123             MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
    124             l += 32-h;
    125             h = 32;
    126         }
    127         s = d.reg;
    128     }
    129 
    130     if (l) {
    131         MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
    132         s = d.reg;
    133     }
    134 
    135     if (s != d.reg) {
    136         MOV(AL, 0, d.reg, s);
    137     }
    138 
    139     d.s = maskLen;
    140 }
    141 
    142 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
    143 {
    144     extract(d,  s.reg,
    145                 s.format.c[component].h,
    146                 s.format.c[component].l,
    147                 s.size());
    148 }
    149 
    150 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
    151 {
    152     integer_t r(d.reg, 32, d.flags);
    153     extract(r,  s.reg,
    154                 s.format.c[component].h,
    155                 s.format.c[component].l,
    156                 s.size());
    157     d = component_t(r);
    158 }
    159 
    160 
    161 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
    162 {
    163     if (s.l || (s.flags & CLEAR_HI)) {
    164         extract(d, s.reg, s.h, s.l, 32);
    165         expand(d, d, dbits);
    166     } else {
    167         expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
    168     }
    169 }
    170 
    171 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
    172 {
    173     integer_t r(d.reg, 32, d.flags);
    174     expand(r, s, dbits);
    175     d = component_t(r);
    176 }
    177 
    178 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
    179 {
    180     assert(src.size());
    181 
    182     int sbits = src.size();
    183     int s = src.reg;
    184     int d = dst.reg;
    185 
    186     // be sure to set 'dst' after we read 'src' as they may be identical
    187     dst.s = dbits;
    188     dst.flags = 0;
    189 
    190     if (dbits<=sbits) {
    191         if (s != d) {
    192             MOV(AL, 0, d, s);
    193         }
    194         return;
    195     }
    196 
    197     if (sbits == 1) {
    198         RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
    199             // d = (s<<dbits) - s;
    200         return;
    201     }
    202 
    203     if (dbits % sbits) {
    204         MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
    205             // d = s << (dbits-sbits);
    206         dbits -= sbits;
    207         do {
    208             ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
    209                 // d |= d >> sbits;
    210             dbits -= sbits;
    211             sbits *= 2;
    212         } while(dbits>0);
    213         return;
    214     }
    215 
    216     dbits -= sbits;
    217     do {
    218         ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
    219             // d |= d<<sbits;
    220         s = d;
    221         dbits -= sbits;
    222         if (sbits*2 < dbits) {
    223             sbits *= 2;
    224         }
    225     } while(dbits>0);
    226 }
    227 
    228 void GGLAssembler::downshift(
    229         pixel_t& d, int component, component_t s, const reg_t& dither)
    230 {
    231     const needs_t& needs = mBuilderContext.needs;
    232     Scratch scratches(registerFile());
    233 
    234     int sh = s.h;
    235     int sl = s.l;
    236     int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
    237     int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
    238     int sbits = sh - sl;
    239 
    240     int dh = d.format.c[component].h;
    241     int dl = d.format.c[component].l;
    242     int dbits = dh - dl;
    243     int dithering = 0;
    244 
    245     ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
    246 
    247     if (sbits>dbits) {
    248         // see if we need to dither
    249         dithering = mDithering;
    250     }
    251 
    252     int ireg = d.reg;
    253     if (!(d.flags & FIRST)) {
    254         if (s.flags & CORRUPTIBLE)  {
    255             ireg = s.reg;
    256         } else {
    257             ireg = scratches.obtain();
    258         }
    259     }
    260     d.flags &= ~FIRST;
    261 
    262     if (maskHiBits) {
    263         // we need to mask the high bits (and possibly the lowbits too)
    264         // and we might be able to use immediate mask.
    265         if (!dithering) {
    266             // we don't do this if we only have maskLoBits because we can
    267             // do it more efficiently below (in the case where dl=0)
    268             const int offset = sh - dbits;
    269             if (dbits<=8 && offset >= 0) {
    270                 const uint32_t mask = ((1<<dbits)-1) << offset;
    271                 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
    272                     build_and_immediate(ireg, s.reg, mask, 32);
    273                     sl = offset;
    274                     s.reg = ireg;
    275                     sbits = dbits;
    276                     maskLoBits = maskHiBits = 0;
    277                 }
    278             }
    279         } else {
    280             // in the dithering case though, we need to preserve the lower bits
    281             const uint32_t mask = ((1<<sbits)-1) << sl;
    282             if (isValidImmediate(mask) || isValidImmediate(~mask)) {
    283                 build_and_immediate(ireg, s.reg, mask, 32);
    284                 s.reg = ireg;
    285                 maskLoBits = maskHiBits = 0;
    286             }
    287         }
    288     }
    289 
    290     // XXX: we could special case (maskHiBits & !maskLoBits)
    291     // like we do for maskLoBits below, but it happens very rarely
    292     // that we have maskHiBits only and the conditions necessary to lead
    293     // to better code (like doing d |= s << 24)
    294 
    295     if (maskHiBits) {
    296         MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
    297         sl += 32-sh;
    298         sh = 32;
    299         s.reg = ireg;
    300         maskHiBits = 0;
    301     }
    302 
    303     //	Downsampling should be performed as follows:
    304     //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
    305     //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
    306     //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
    307     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
    308     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
    309     //
    310     //	By approximating (1>>dbits) and (1>>sbits) to 0:
    311     //
    312     //		V>>(sbits-dbits)	-	V>>sbits
    313     //
    314 	//  A good approximation is V>>(sbits-dbits),
    315     //  but better one (needed for dithering) is:
    316     //
    317     //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
    318     //		(V<<dbits	-	V)>>sbits
    319     //		(V	-	V>>dbits)>>(sbits-dbits)
    320 
    321     // Dithering is done here
    322     if (dithering) {
    323         comment("dithering");
    324         if (sl) {
    325             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
    326             sh -= sl;
    327             sl = 0;
    328             s.reg = ireg;
    329         }
    330         // scaling (V-V>>dbits)
    331         SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
    332         const int shift = (GGL_DITHER_BITS - (sbits-dbits));
    333         if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
    334         else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
    335         else                ADD(AL, 0, ireg, ireg, dither.reg);
    336         s.reg = ireg;
    337     }
    338 
    339     if ((maskLoBits|dithering) && (sh > dbits)) {
    340         int shift = sh-dbits;
    341         if (dl) {
    342             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
    343             if (ireg == d.reg) {
    344                 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
    345             } else {
    346                 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
    347             }
    348         } else {
    349             if (ireg == d.reg) {
    350                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
    351             } else {
    352                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
    353             }
    354         }
    355     } else {
    356         int shift = sh-dh;
    357         if (shift>0) {
    358             if (ireg == d.reg) {
    359                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
    360             } else {
    361                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
    362             }
    363         } else if (shift<0) {
    364             if (ireg == d.reg) {
    365                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
    366             } else {
    367                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
    368             }
    369         } else {
    370             if (ireg == d.reg) {
    371                 if (s.reg != d.reg) {
    372                     MOV(AL, 0, d.reg, s.reg);
    373                 }
    374             } else {
    375                 ORR(AL, 0, d.reg, d.reg, s.reg);
    376             }
    377         }
    378     }
    379 }
    380 
    381 }; // namespace android
    382