1 /* libs/pixelflinger/codeflinger/load_store.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #include <assert.h> 19 #include <stdio.h> 20 #include <cutils/log.h> 21 #include "GGLAssembler.h" 22 23 namespace android { 24 25 // ---------------------------------------------------------------------------- 26 27 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags) 28 { 29 const int bits = addr.size; 30 const int inc = (flags & WRITE_BACK)?1:0; 31 switch (bits) { 32 case 32: 33 if (inc) STR(AL, s.reg, addr.reg, immed12_post(4)); 34 else STR(AL, s.reg, addr.reg); 35 break; 36 case 24: 37 // 24 bits formats are a little special and used only for RGB 38 // 0x00BBGGRR is unpacked as R,G,B 39 STRB(AL, s.reg, addr.reg, immed12_pre(0)); 40 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); 41 STRB(AL, s.reg, addr.reg, immed12_pre(1)); 42 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); 43 STRB(AL, s.reg, addr.reg, immed12_pre(2)); 44 if (!(s.flags & CORRUPTIBLE)) { 45 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16)); 46 } 47 if (inc) 48 ADD(AL, 0, addr.reg, addr.reg, imm(3)); 49 break; 50 case 16: 51 if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2)); 52 else STRH(AL, s.reg, addr.reg); 53 break; 54 case 8: 55 if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1)); 56 else STRB(AL, s.reg, addr.reg); 57 break; 58 } 59 } 60 61 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags) 62 { 63 Scratch scratches(registerFile()); 64 int s0; 65 66 const int bits = addr.size; 67 const int inc = (flags & WRITE_BACK)?1:0; 68 switch (bits) { 69 case 32: 70 if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4)); 71 else LDR(AL, s.reg, addr.reg); 72 break; 73 case 24: 74 // 24 bits formats are a little special and used only for RGB 75 // R,G,B is packed as 0x00BBGGRR 76 s0 = scratches.obtain(); 77 if (s.reg != addr.reg) { 78 LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R 79 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G 80 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8)); 81 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B 82 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16)); 83 } else { 84 int s1 = scratches.obtain(); 85 LDRB(AL, s1, addr.reg, immed12_pre(0)); // R 86 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G 87 ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8)); 88 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B 89 ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16)); 90 } 91 if (inc) 92 ADD(AL, 0, addr.reg, addr.reg, imm(3)); 93 break; 94 case 16: 95 if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2)); 96 else LDRH(AL, s.reg, addr.reg); 97 break; 98 case 8: 99 if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1)); 100 else LDRB(AL, s.reg, addr.reg); 101 break; 102 } 103 } 104 105 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits) 106 { 107 const int maskLen = h-l; 108 109 #ifdef __mips__ 110 assert(maskLen<=11); 111 #else 112 assert(maskLen<=8); 113 #endif 114 assert(h); 115 116 if (h != bits) { 117 const int mask = ((1<<maskLen)-1) << l; 118 if (isValidImmediate(mask)) { 119 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask; 120 } else if (isValidImmediate(~mask)) { 121 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask; 122 } else { 123 MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h)); 124 l += 32-h; 125 h = 32; 126 } 127 s = d.reg; 128 } 129 130 if (l) { 131 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l; 132 s = d.reg; 133 } 134 135 if (s != d.reg) { 136 MOV(AL, 0, d.reg, s); 137 } 138 139 d.s = maskLen; 140 } 141 142 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component) 143 { 144 extract(d, s.reg, 145 s.format.c[component].h, 146 s.format.c[component].l, 147 s.size()); 148 } 149 150 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component) 151 { 152 integer_t r(d.reg, 32, d.flags); 153 extract(r, s.reg, 154 s.format.c[component].h, 155 s.format.c[component].l, 156 s.size()); 157 d = component_t(r); 158 } 159 160 161 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits) 162 { 163 if (s.l || (s.flags & CLEAR_HI)) { 164 extract(d, s.reg, s.h, s.l, 32); 165 expand(d, d, dbits); 166 } else { 167 expand(d, integer_t(s.reg, s.size(), s.flags), dbits); 168 } 169 } 170 171 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits) 172 { 173 integer_t r(d.reg, 32, d.flags); 174 expand(r, s, dbits); 175 d = component_t(r); 176 } 177 178 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits) 179 { 180 assert(src.size()); 181 182 int sbits = src.size(); 183 int s = src.reg; 184 int d = dst.reg; 185 186 // be sure to set 'dst' after we read 'src' as they may be identical 187 dst.s = dbits; 188 dst.flags = 0; 189 190 if (dbits<=sbits) { 191 if (s != d) { 192 MOV(AL, 0, d, s); 193 } 194 return; 195 } 196 197 if (sbits == 1) { 198 RSB(AL, 0, d, s, reg_imm(s, LSL, dbits)); 199 // d = (s<<dbits) - s; 200 return; 201 } 202 203 if (dbits % sbits) { 204 MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits)); 205 // d = s << (dbits-sbits); 206 dbits -= sbits; 207 do { 208 ORR(AL, 0, d, d, reg_imm(d, LSR, sbits)); 209 // d |= d >> sbits; 210 dbits -= sbits; 211 sbits *= 2; 212 } while(dbits>0); 213 return; 214 } 215 216 dbits -= sbits; 217 do { 218 ORR(AL, 0, d, s, reg_imm(s, LSL, sbits)); 219 // d |= d<<sbits; 220 s = d; 221 dbits -= sbits; 222 if (sbits*2 < dbits) { 223 sbits *= 2; 224 } 225 } while(dbits>0); 226 } 227 228 void GGLAssembler::downshift( 229 pixel_t& d, int component, component_t s, const reg_t& dither) 230 { 231 const needs_t& needs = mBuilderContext.needs; 232 Scratch scratches(registerFile()); 233 234 int sh = s.h; 235 int sl = s.l; 236 int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0; 237 int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0; 238 int sbits = sh - sl; 239 240 int dh = d.format.c[component].h; 241 int dl = d.format.c[component].l; 242 int dbits = dh - dl; 243 int dithering = 0; 244 245 ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits); 246 247 if (sbits>dbits) { 248 // see if we need to dither 249 dithering = mDithering; 250 } 251 252 int ireg = d.reg; 253 if (!(d.flags & FIRST)) { 254 if (s.flags & CORRUPTIBLE) { 255 ireg = s.reg; 256 } else { 257 ireg = scratches.obtain(); 258 } 259 } 260 d.flags &= ~FIRST; 261 262 if (maskHiBits) { 263 // we need to mask the high bits (and possibly the lowbits too) 264 // and we might be able to use immediate mask. 265 if (!dithering) { 266 // we don't do this if we only have maskLoBits because we can 267 // do it more efficiently below (in the case where dl=0) 268 const int offset = sh - dbits; 269 if (dbits<=8 && offset >= 0) { 270 const uint32_t mask = ((1<<dbits)-1) << offset; 271 if (isValidImmediate(mask) || isValidImmediate(~mask)) { 272 build_and_immediate(ireg, s.reg, mask, 32); 273 sl = offset; 274 s.reg = ireg; 275 sbits = dbits; 276 maskLoBits = maskHiBits = 0; 277 } 278 } 279 } else { 280 // in the dithering case though, we need to preserve the lower bits 281 const uint32_t mask = ((1<<sbits)-1) << sl; 282 if (isValidImmediate(mask) || isValidImmediate(~mask)) { 283 build_and_immediate(ireg, s.reg, mask, 32); 284 s.reg = ireg; 285 maskLoBits = maskHiBits = 0; 286 } 287 } 288 } 289 290 // XXX: we could special case (maskHiBits & !maskLoBits) 291 // like we do for maskLoBits below, but it happens very rarely 292 // that we have maskHiBits only and the conditions necessary to lead 293 // to better code (like doing d |= s << 24) 294 295 if (maskHiBits) { 296 MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh)); 297 sl += 32-sh; 298 sh = 32; 299 s.reg = ireg; 300 maskHiBits = 0; 301 } 302 303 // Downsampling should be performed as follows: 304 // V * ((1<<dbits)-1) / ((1<<sbits)-1) 305 // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)] 306 // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)] 307 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits 308 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits)) 309 // 310 // By approximating (1>>dbits) and (1>>sbits) to 0: 311 // 312 // V>>(sbits-dbits) - V>>sbits 313 // 314 // A good approximation is V>>(sbits-dbits), 315 // but better one (needed for dithering) is: 316 // 317 // (V>>(sbits-dbits)<<sbits - V)>>sbits 318 // (V<<dbits - V)>>sbits 319 // (V - V>>dbits)>>(sbits-dbits) 320 321 // Dithering is done here 322 if (dithering) { 323 comment("dithering"); 324 if (sl) { 325 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl)); 326 sh -= sl; 327 sl = 0; 328 s.reg = ireg; 329 } 330 // scaling (V-V>>dbits) 331 SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits)); 332 const int shift = (GGL_DITHER_BITS - (sbits-dbits)); 333 if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift)); 334 else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift)); 335 else ADD(AL, 0, ireg, ireg, dither.reg); 336 s.reg = ireg; 337 } 338 339 if ((maskLoBits|dithering) && (sh > dbits)) { 340 int shift = sh-dbits; 341 if (dl) { 342 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift)); 343 if (ireg == d.reg) { 344 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl)); 345 } else { 346 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl)); 347 } 348 } else { 349 if (ireg == d.reg) { 350 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); 351 } else { 352 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); 353 } 354 } 355 } else { 356 int shift = sh-dh; 357 if (shift>0) { 358 if (ireg == d.reg) { 359 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); 360 } else { 361 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); 362 } 363 } else if (shift<0) { 364 if (ireg == d.reg) { 365 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift)); 366 } else { 367 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift)); 368 } 369 } else { 370 if (ireg == d.reg) { 371 if (s.reg != d.reg) { 372 MOV(AL, 0, d.reg, s.reg); 373 } 374 } else { 375 ORR(AL, 0, d.reg, d.reg, s.reg); 376 } 377 } 378 } 379 } 380 381 }; // namespace android 382