Home | History | Annotate | Download | only in codeflinger
      1 /* libs/pixelflinger/codeflinger/load_store.cpp
      2 **
      3 ** Copyright 2006, The Android Open Source Project
      4 **
      5 ** Licensed under the Apache License, Version 2.0 (the "License");
      6 ** you may not use this file except in compliance with the License.
      7 ** You may obtain a copy of the License at
      8 **
      9 **     http://www.apache.org/licenses/LICENSE-2.0
     10 **
     11 ** Unless required by applicable law or agreed to in writing, software
     12 ** distributed under the License is distributed on an "AS IS" BASIS,
     13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 ** See the License for the specific language governing permissions and
     15 ** limitations under the License.
     16 */
     17 
     18 #include <assert.h>
     19 #include <stdio.h>
     20 #include <cutils/log.h>
     21 #include "codeflinger/GGLAssembler.h"
     22 
     23 #ifdef __ARM_ARCH__
     24 #include <machine/cpu-features.h>
     25 #endif
     26 
     27 namespace android {
     28 
     29 // ----------------------------------------------------------------------------
     30 
     31 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
     32 {
     33     const int bits = addr.size;
     34     const int inc = (flags & WRITE_BACK)?1:0;
     35     switch (bits) {
     36     case 32:
     37         if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
     38         else        STR(AL, s.reg, addr.reg);
     39         break;
     40     case 24:
     41         // 24 bits formats are a little special and used only for RGB
     42         // 0x00BBGGRR is unpacked as R,G,B
     43         STRB(AL, s.reg, addr.reg, immed12_pre(0));
     44         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
     45         STRB(AL, s.reg, addr.reg, immed12_pre(1));
     46         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
     47         STRB(AL, s.reg, addr.reg, immed12_pre(2));
     48         if (!(s.flags & CORRUPTIBLE)) {
     49             MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
     50         }
     51         if (inc)
     52             ADD(AL, 0, addr.reg, addr.reg, imm(3));
     53         break;
     54     case 16:
     55         if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
     56         else        STRH(AL, s.reg, addr.reg);
     57         break;
     58     case  8:
     59         if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
     60         else        STRB(AL, s.reg, addr.reg);
     61         break;
     62     }
     63 }
     64 
     65 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
     66 {
     67     Scratch scratches(registerFile());
     68     int s0;
     69 
     70     const int bits = addr.size;
     71     const int inc = (flags & WRITE_BACK)?1:0;
     72     switch (bits) {
     73     case 32:
     74         if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
     75         else        LDR(AL, s.reg, addr.reg);
     76         break;
     77     case 24:
     78         // 24 bits formats are a little special and used only for RGB
     79         // R,G,B is packed as 0x00BBGGRR
     80         s0 = scratches.obtain();
     81         if (s.reg != addr.reg) {
     82             LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
     83             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
     84             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
     85             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
     86             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
     87         } else {
     88             int s1 = scratches.obtain();
     89             LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
     90             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
     91             ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
     92             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
     93             ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
     94         }
     95         if (inc)
     96             ADD(AL, 0, addr.reg, addr.reg, imm(3));
     97         break;
     98     case 16:
     99         if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
    100         else        LDRH(AL, s.reg, addr.reg);
    101         break;
    102     case  8:
    103         if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
    104         else        LDRB(AL, s.reg, addr.reg);
    105         break;
    106     }
    107 }
    108 
    109 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
    110 {
    111     const int maskLen = h-l;
    112 
    113     assert(maskLen<=8);
    114     assert(h);
    115 
    116 #if __ARM_ARCH__ >= 7
    117     const int mask = (1<<maskLen)-1;
    118     if ((h == bits) && !l && (s != d.reg)) {
    119         MOV(AL, 0, d.reg, s);                   // component = packed;
    120     } else if ((h == bits) && l) {
    121         MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
    122     } else if (!l && isValidImmediate(mask)) {
    123         AND(AL, 0, d.reg, s, imm(mask));        // component = packed & mask;
    124     } else if (!l && isValidImmediate(~mask)) {
    125         BIC(AL, 0, d.reg, s, imm(~mask));       // component = packed & mask;
    126     } else {
    127         UBFX(AL, d.reg, s, l, maskLen);         // component = (packed & mask) >> l;
    128     }
    129 #else
    130     if (h != bits) {
    131         const int mask = ((1<<maskLen)-1) << l;
    132         if (isValidImmediate(mask)) {
    133             AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
    134         } else if (isValidImmediate(~mask)) {
    135             BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
    136         } else {
    137             MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
    138             l += 32-h;
    139             h = 32;
    140         }
    141         s = d.reg;
    142     }
    143 
    144     if (l) {
    145         MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
    146         s = d.reg;
    147     }
    148 
    149     if (s != d.reg) {
    150         MOV(AL, 0, d.reg, s);
    151     }
    152 #endif
    153 
    154     d.s = maskLen;
    155 }
    156 
    157 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
    158 {
    159     extract(d,  s.reg,
    160                 s.format.c[component].h,
    161                 s.format.c[component].l,
    162                 s.size());
    163 }
    164 
    165 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
    166 {
    167     integer_t r(d.reg, 32, d.flags);
    168     extract(r,  s.reg,
    169                 s.format.c[component].h,
    170                 s.format.c[component].l,
    171                 s.size());
    172     d = component_t(r);
    173 }
    174 
    175 
    176 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
    177 {
    178     if (s.l || (s.flags & CLEAR_HI)) {
    179         extract(d, s.reg, s.h, s.l, 32);
    180         expand(d, d, dbits);
    181     } else {
    182         expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
    183     }
    184 }
    185 
    186 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
    187 {
    188     integer_t r(d.reg, 32, d.flags);
    189     expand(r, s, dbits);
    190     d = component_t(r);
    191 }
    192 
    193 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
    194 {
    195     assert(src.size());
    196 
    197     int sbits = src.size();
    198     int s = src.reg;
    199     int d = dst.reg;
    200 
    201     // be sure to set 'dst' after we read 'src' as they may be identical
    202     dst.s = dbits;
    203     dst.flags = 0;
    204 
    205     if (dbits<=sbits) {
    206         if (s != d) {
    207             MOV(AL, 0, d, s);
    208         }
    209         return;
    210     }
    211 
    212     if (sbits == 1) {
    213         RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
    214             // d = (s<<dbits) - s;
    215         return;
    216     }
    217 
    218     if (dbits % sbits) {
    219         MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
    220             // d = s << (dbits-sbits);
    221         dbits -= sbits;
    222         do {
    223             ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
    224                 // d |= d >> sbits;
    225             dbits -= sbits;
    226             sbits *= 2;
    227         } while(dbits>0);
    228         return;
    229     }
    230 
    231     dbits -= sbits;
    232     do {
    233         ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
    234             // d |= d<<sbits;
    235         s = d;
    236         dbits -= sbits;
    237         if (sbits*2 < dbits) {
    238             sbits *= 2;
    239         }
    240     } while(dbits>0);
    241 }
    242 
    243 void GGLAssembler::downshift(
    244         pixel_t& d, int component, component_t s, const reg_t& dither)
    245 {
    246     const needs_t& needs = mBuilderContext.needs;
    247     Scratch scratches(registerFile());
    248 
    249     int sh = s.h;
    250     int sl = s.l;
    251     int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
    252     int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
    253     int sbits = sh - sl;
    254 
    255     int dh = d.format.c[component].h;
    256     int dl = d.format.c[component].l;
    257     int dbits = dh - dl;
    258     int dithering = 0;
    259 
    260     LOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
    261 
    262     if (sbits>dbits) {
    263         // see if we need to dither
    264         dithering = mDithering;
    265     }
    266 
    267     int ireg = d.reg;
    268     if (!(d.flags & FIRST)) {
    269         if (s.flags & CORRUPTIBLE)  {
    270             ireg = s.reg;
    271         } else {
    272             ireg = scratches.obtain();
    273         }
    274     }
    275     d.flags &= ~FIRST;
    276 
    277     if (maskHiBits) {
    278         // we need to mask the high bits (and possibly the lowbits too)
    279         // and we might be able to use immediate mask.
    280         if (!dithering) {
    281             // we don't do this if we only have maskLoBits because we can
    282             // do it more efficiently below (in the case where dl=0)
    283             const int offset = sh - dbits;
    284             if (dbits<=8 && offset >= 0) {
    285                 const uint32_t mask = ((1<<dbits)-1) << offset;
    286                 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
    287                     build_and_immediate(ireg, s.reg, mask, 32);
    288                     sl = offset;
    289                     s.reg = ireg;
    290                     sbits = dbits;
    291                     maskLoBits = maskHiBits = 0;
    292                 }
    293             }
    294         } else {
    295             // in the dithering case though, we need to preserve the lower bits
    296             const uint32_t mask = ((1<<sbits)-1) << sl;
    297             if (isValidImmediate(mask) || isValidImmediate(~mask)) {
    298                 build_and_immediate(ireg, s.reg, mask, 32);
    299                 s.reg = ireg;
    300                 maskLoBits = maskHiBits = 0;
    301             }
    302         }
    303     }
    304 
    305     // XXX: we could special case (maskHiBits & !maskLoBits)
    306     // like we do for maskLoBits below, but it happens very rarely
    307     // that we have maskHiBits only and the conditions necessary to lead
    308     // to better code (like doing d |= s << 24)
    309 
    310     if (maskHiBits) {
    311         MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
    312         sl += 32-sh;
    313         sh = 32;
    314         s.reg = ireg;
    315         maskHiBits = 0;
    316     }
    317 
    318     //	Downsampling should be performed as follows:
    319     //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
    320     //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
    321     //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
    322     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
    323     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
    324     //
    325     //	By approximating (1>>dbits) and (1>>sbits) to 0:
    326     //
    327     //		V>>(sbits-dbits)	-	V>>sbits
    328     //
    329 	//  A good approximation is V>>(sbits-dbits),
    330     //  but better one (needed for dithering) is:
    331     //
    332     //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
    333     //		(V<<dbits	-	V)>>sbits
    334     //		(V	-	V>>dbits)>>(sbits-dbits)
    335 
    336     // Dithering is done here
    337     if (dithering) {
    338         comment("dithering");
    339         if (sl) {
    340             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
    341             sh -= sl;
    342             sl = 0;
    343             s.reg = ireg;
    344         }
    345         // scaling (V-V>>dbits)
    346         SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
    347         const int shift = (GGL_DITHER_BITS - (sbits-dbits));
    348         if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
    349         else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
    350         else                ADD(AL, 0, ireg, ireg, dither.reg);
    351         s.reg = ireg;
    352     }
    353 
    354     if ((maskLoBits|dithering) && (sh > dbits)) {
    355         int shift = sh-dbits;
    356         if (dl) {
    357             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
    358             if (ireg == d.reg) {
    359                 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
    360             } else {
    361                 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
    362             }
    363         } else {
    364             if (ireg == d.reg) {
    365                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
    366             } else {
    367                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
    368             }
    369         }
    370     } else {
    371         int shift = sh-dh;
    372         if (shift>0) {
    373             if (ireg == d.reg) {
    374                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
    375             } else {
    376                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
    377             }
    378         } else if (shift<0) {
    379             if (ireg == d.reg) {
    380                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
    381             } else {
    382                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
    383             }
    384         } else {
    385             if (ireg == d.reg) {
    386                 if (s.reg != d.reg) {
    387                     MOV(AL, 0, d.reg, s.reg);
    388                 }
    389             } else {
    390                 ORR(AL, 0, d.reg, d.reg, s.reg);
    391             }
    392         }
    393     }
    394 }
    395 
    396 }; // namespace android
    397