Home | History | Annotate | Download | only in codeflinger
      1 /* libs/pixelflinger/codeflinger/load_store.cpp
      2 **
      3 ** Copyright 2006, The Android Open Source Project
      4 **
      5 ** Licensed under the Apache License, Version 2.0 (the "License");
      6 ** you may not use this file except in compliance with the License.
      7 ** You may obtain a copy of the License at
      8 **
      9 **     http://www.apache.org/licenses/LICENSE-2.0
     10 **
     11 ** Unless required by applicable law or agreed to in writing, software
     12 ** distributed under the License is distributed on an "AS IS" BASIS,
     13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 ** See the License for the specific language governing permissions and
     15 ** limitations under the License.
     16 */
     17 
     18 #include <assert.h>
     19 #include <stdio.h>
     20 #include <cutils/log.h>
     21 #include "GGLAssembler.h"
     22 
     23 #ifdef __ARM_ARCH__
     24 #include <machine/cpu-features.h>
     25 #endif
     26 
     27 namespace android {
     28 
     29 // ----------------------------------------------------------------------------
     30 
     31 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags)
     32 {
     33     const int bits = addr.size;
     34     const int inc = (flags & WRITE_BACK)?1:0;
     35     switch (bits) {
     36     case 32:
     37         if (inc)    STR(AL, s.reg, addr.reg, immed12_post(4));
     38         else        STR(AL, s.reg, addr.reg);
     39         break;
     40     case 24:
     41         // 24 bits formats are a little special and used only for RGB
     42         // 0x00BBGGRR is unpacked as R,G,B
     43         STRB(AL, s.reg, addr.reg, immed12_pre(0));
     44         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
     45         STRB(AL, s.reg, addr.reg, immed12_pre(1));
     46         MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8));
     47         STRB(AL, s.reg, addr.reg, immed12_pre(2));
     48         if (!(s.flags & CORRUPTIBLE)) {
     49             MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16));
     50         }
     51         if (inc)
     52             ADD(AL, 0, addr.reg, addr.reg, imm(3));
     53         break;
     54     case 16:
     55         if (inc)    STRH(AL, s.reg, addr.reg, immed8_post(2));
     56         else        STRH(AL, s.reg, addr.reg);
     57         break;
     58     case  8:
     59         if (inc)    STRB(AL, s.reg, addr.reg, immed12_post(1));
     60         else        STRB(AL, s.reg, addr.reg);
     61         break;
     62     }
     63 }
     64 
     65 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags)
     66 {
     67     Scratch scratches(registerFile());
     68     int s0;
     69 
     70     const int bits = addr.size;
     71     const int inc = (flags & WRITE_BACK)?1:0;
     72     switch (bits) {
     73     case 32:
     74         if (inc)    LDR(AL, s.reg, addr.reg, immed12_post(4));
     75         else        LDR(AL, s.reg, addr.reg);
     76         break;
     77     case 24:
     78         // 24 bits formats are a little special and used only for RGB
     79         // R,G,B is packed as 0x00BBGGRR
     80         s0 = scratches.obtain();
     81         if (s.reg != addr.reg) {
     82             LDRB(AL, s.reg, addr.reg, immed12_pre(0));      // R
     83             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
     84             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8));
     85             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
     86             ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16));
     87         } else {
     88             int s1 = scratches.obtain();
     89             LDRB(AL, s1, addr.reg, immed12_pre(0));         // R
     90             LDRB(AL, s0, addr.reg, immed12_pre(1));         // G
     91             ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8));
     92             LDRB(AL, s0, addr.reg, immed12_pre(2));         // B
     93             ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16));
     94         }
     95         if (inc)
     96             ADD(AL, 0, addr.reg, addr.reg, imm(3));
     97         break;
     98     case 16:
     99         if (inc)    LDRH(AL, s.reg, addr.reg, immed8_post(2));
    100         else        LDRH(AL, s.reg, addr.reg);
    101         break;
    102     case  8:
    103         if (inc)    LDRB(AL, s.reg, addr.reg, immed12_post(1));
    104         else        LDRB(AL, s.reg, addr.reg);
    105         break;
    106     }
    107 }
    108 
    109 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits)
    110 {
    111     const int maskLen = h-l;
    112 
    113 #ifdef __mips__
    114     assert(maskLen<=11);
    115 #else
    116     assert(maskLen<=8);
    117 #endif
    118     assert(h);
    119 
    120 #if __ARM_ARCH__ >= 7
    121     const int mask = (1<<maskLen)-1;
    122     if ((h == bits) && !l && (s != d.reg)) {
    123         MOV(AL, 0, d.reg, s);                   // component = packed;
    124     } else if ((h == bits) && l) {
    125         MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
    126     } else if (!l && isValidImmediate(mask)) {
    127         AND(AL, 0, d.reg, s, imm(mask));        // component = packed & mask;
    128     } else if (!l && isValidImmediate(~mask)) {
    129         BIC(AL, 0, d.reg, s, imm(~mask));       // component = packed & mask;
    130     } else {
    131         UBFX(AL, d.reg, s, l, maskLen);         // component = (packed & mask) >> l;
    132     }
    133 #else
    134     if (h != bits) {
    135         const int mask = ((1<<maskLen)-1) << l;
    136         if (isValidImmediate(mask)) {
    137             AND(AL, 0, d.reg, s, imm(mask));    // component = packed & mask;
    138         } else if (isValidImmediate(~mask)) {
    139             BIC(AL, 0, d.reg, s, imm(~mask));   // component = packed & mask;
    140         } else {
    141             MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h));
    142             l += 32-h;
    143             h = 32;
    144         }
    145         s = d.reg;
    146     }
    147 
    148     if (l) {
    149         MOV(AL, 0, d.reg, reg_imm(s, LSR, l));  // component = packed >> l;
    150         s = d.reg;
    151     }
    152 
    153     if (s != d.reg) {
    154         MOV(AL, 0, d.reg, s);
    155     }
    156 #endif
    157 
    158     d.s = maskLen;
    159 }
    160 
    161 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component)
    162 {
    163     extract(d,  s.reg,
    164                 s.format.c[component].h,
    165                 s.format.c[component].l,
    166                 s.size());
    167 }
    168 
    169 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component)
    170 {
    171     integer_t r(d.reg, 32, d.flags);
    172     extract(r,  s.reg,
    173                 s.format.c[component].h,
    174                 s.format.c[component].l,
    175                 s.size());
    176     d = component_t(r);
    177 }
    178 
    179 
    180 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits)
    181 {
    182     if (s.l || (s.flags & CLEAR_HI)) {
    183         extract(d, s.reg, s.h, s.l, 32);
    184         expand(d, d, dbits);
    185     } else {
    186         expand(d, integer_t(s.reg, s.size(), s.flags), dbits);
    187     }
    188 }
    189 
    190 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits)
    191 {
    192     integer_t r(d.reg, 32, d.flags);
    193     expand(r, s, dbits);
    194     d = component_t(r);
    195 }
    196 
    197 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits)
    198 {
    199     assert(src.size());
    200 
    201     int sbits = src.size();
    202     int s = src.reg;
    203     int d = dst.reg;
    204 
    205     // be sure to set 'dst' after we read 'src' as they may be identical
    206     dst.s = dbits;
    207     dst.flags = 0;
    208 
    209     if (dbits<=sbits) {
    210         if (s != d) {
    211             MOV(AL, 0, d, s);
    212         }
    213         return;
    214     }
    215 
    216     if (sbits == 1) {
    217         RSB(AL, 0, d, s, reg_imm(s, LSL, dbits));
    218             // d = (s<<dbits) - s;
    219         return;
    220     }
    221 
    222     if (dbits % sbits) {
    223         MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits));
    224             // d = s << (dbits-sbits);
    225         dbits -= sbits;
    226         do {
    227             ORR(AL, 0, d, d, reg_imm(d, LSR, sbits));
    228                 // d |= d >> sbits;
    229             dbits -= sbits;
    230             sbits *= 2;
    231         } while(dbits>0);
    232         return;
    233     }
    234 
    235     dbits -= sbits;
    236     do {
    237         ORR(AL, 0, d, s, reg_imm(s, LSL, sbits));
    238             // d |= d<<sbits;
    239         s = d;
    240         dbits -= sbits;
    241         if (sbits*2 < dbits) {
    242             sbits *= 2;
    243         }
    244     } while(dbits>0);
    245 }
    246 
    247 void GGLAssembler::downshift(
    248         pixel_t& d, int component, component_t s, const reg_t& dither)
    249 {
    250     const needs_t& needs = mBuilderContext.needs;
    251     Scratch scratches(registerFile());
    252 
    253     int sh = s.h;
    254     int sl = s.l;
    255     int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0;
    256     int maskLoBits = (sl!=0)  ? ((s.flags & CLEAR_LO)?1:0) : 0;
    257     int sbits = sh - sl;
    258 
    259     int dh = d.format.c[component].h;
    260     int dl = d.format.c[component].l;
    261     int dbits = dh - dl;
    262     int dithering = 0;
    263 
    264     ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits);
    265 
    266     if (sbits>dbits) {
    267         // see if we need to dither
    268         dithering = mDithering;
    269     }
    270 
    271     int ireg = d.reg;
    272     if (!(d.flags & FIRST)) {
    273         if (s.flags & CORRUPTIBLE)  {
    274             ireg = s.reg;
    275         } else {
    276             ireg = scratches.obtain();
    277         }
    278     }
    279     d.flags &= ~FIRST;
    280 
    281     if (maskHiBits) {
    282         // we need to mask the high bits (and possibly the lowbits too)
    283         // and we might be able to use immediate mask.
    284         if (!dithering) {
    285             // we don't do this if we only have maskLoBits because we can
    286             // do it more efficiently below (in the case where dl=0)
    287             const int offset = sh - dbits;
    288             if (dbits<=8 && offset >= 0) {
    289                 const uint32_t mask = ((1<<dbits)-1) << offset;
    290                 if (isValidImmediate(mask) || isValidImmediate(~mask)) {
    291                     build_and_immediate(ireg, s.reg, mask, 32);
    292                     sl = offset;
    293                     s.reg = ireg;
    294                     sbits = dbits;
    295                     maskLoBits = maskHiBits = 0;
    296                 }
    297             }
    298         } else {
    299             // in the dithering case though, we need to preserve the lower bits
    300             const uint32_t mask = ((1<<sbits)-1) << sl;
    301             if (isValidImmediate(mask) || isValidImmediate(~mask)) {
    302                 build_and_immediate(ireg, s.reg, mask, 32);
    303                 s.reg = ireg;
    304                 maskLoBits = maskHiBits = 0;
    305             }
    306         }
    307     }
    308 
    309     // XXX: we could special case (maskHiBits & !maskLoBits)
    310     // like we do for maskLoBits below, but it happens very rarely
    311     // that we have maskHiBits only and the conditions necessary to lead
    312     // to better code (like doing d |= s << 24)
    313 
    314     if (maskHiBits) {
    315         MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh));
    316         sl += 32-sh;
    317         sh = 32;
    318         s.reg = ireg;
    319         maskHiBits = 0;
    320     }
    321 
    322     //	Downsampling should be performed as follows:
    323     //  V * ((1<<dbits)-1) / ((1<<sbits)-1)
    324     //	V * [(1<<dbits)/((1<<sbits)-1)	-	1/((1<<sbits)-1)]
    325     //	V * [1/((1<<sbits)-1)>>dbits	-	1/((1<<sbits)-1)]
    326     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/((1<<sbits)-1)>>sbits
    327     //	V/((1<<(sbits-dbits))-(1>>dbits))	-	(V>>sbits)/(1-(1>>sbits))
    328     //
    329     //	By approximating (1>>dbits) and (1>>sbits) to 0:
    330     //
    331     //		V>>(sbits-dbits)	-	V>>sbits
    332     //
    333 	//  A good approximation is V>>(sbits-dbits),
    334     //  but better one (needed for dithering) is:
    335     //
    336     //		(V>>(sbits-dbits)<<sbits	-	V)>>sbits
    337     //		(V<<dbits	-	V)>>sbits
    338     //		(V	-	V>>dbits)>>(sbits-dbits)
    339 
    340     // Dithering is done here
    341     if (dithering) {
    342         comment("dithering");
    343         if (sl) {
    344             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl));
    345             sh -= sl;
    346             sl = 0;
    347             s.reg = ireg;
    348         }
    349         // scaling (V-V>>dbits)
    350         SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits));
    351         const int shift = (GGL_DITHER_BITS - (sbits-dbits));
    352         if (shift>0)        ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift));
    353         else if (shift<0)   ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift));
    354         else                ADD(AL, 0, ireg, ireg, dither.reg);
    355         s.reg = ireg;
    356     }
    357 
    358     if ((maskLoBits|dithering) && (sh > dbits)) {
    359         int shift = sh-dbits;
    360         if (dl) {
    361             MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift));
    362             if (ireg == d.reg) {
    363                 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl));
    364             } else {
    365                 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl));
    366             }
    367         } else {
    368             if (ireg == d.reg) {
    369                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
    370             } else {
    371                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
    372             }
    373         }
    374     } else {
    375         int shift = sh-dh;
    376         if (shift>0) {
    377             if (ireg == d.reg) {
    378                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift));
    379             } else {
    380                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift));
    381             }
    382         } else if (shift<0) {
    383             if (ireg == d.reg) {
    384                 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift));
    385             } else {
    386                 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift));
    387             }
    388         } else {
    389             if (ireg == d.reg) {
    390                 if (s.reg != d.reg) {
    391                     MOV(AL, 0, d.reg, s.reg);
    392                 }
    393             } else {
    394                 ORR(AL, 0, d.reg, d.reg, s.reg);
    395             }
    396         }
    397     }
    398 }
    399 
    400 }; // namespace android
    401