1 /* libs/pixelflinger/codeflinger/load_store.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #define LOG_TAG "pixelflinger-code" 19 20 #include <assert.h> 21 #include <stdio.h> 22 23 #include <log/log.h> 24 25 #include "GGLAssembler.h" 26 27 namespace android { 28 29 // ---------------------------------------------------------------------------- 30 31 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags) 32 { 33 const int bits = addr.size; 34 const int inc = (flags & WRITE_BACK)?1:0; 35 switch (bits) { 36 case 32: 37 if (inc) STR(AL, s.reg, addr.reg, immed12_post(4)); 38 else STR(AL, s.reg, addr.reg); 39 break; 40 case 24: 41 // 24 bits formats are a little special and used only for RGB 42 // 0x00BBGGRR is unpacked as R,G,B 43 STRB(AL, s.reg, addr.reg, immed12_pre(0)); 44 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); 45 STRB(AL, s.reg, addr.reg, immed12_pre(1)); 46 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); 47 STRB(AL, s.reg, addr.reg, immed12_pre(2)); 48 if (!(s.flags & CORRUPTIBLE)) { 49 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16)); 50 } 51 if (inc) 52 ADD(AL, 0, addr.reg, addr.reg, imm(3)); 53 break; 54 case 16: 55 if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2)); 56 else STRH(AL, s.reg, addr.reg); 57 break; 58 case 8: 59 if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1)); 60 else STRB(AL, s.reg, addr.reg); 61 break; 62 } 63 } 64 65 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags) 66 { 67 Scratch scratches(registerFile()); 68 int s0; 69 70 const int bits = addr.size; 71 const int inc = (flags & WRITE_BACK)?1:0; 72 switch (bits) { 73 case 32: 74 if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4)); 75 else LDR(AL, s.reg, addr.reg); 76 break; 77 case 24: 78 // 24 bits formats are a little special and used only for RGB 79 // R,G,B is packed as 0x00BBGGRR 80 s0 = scratches.obtain(); 81 if (s.reg != addr.reg) { 82 LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R 83 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G 84 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8)); 85 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B 86 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16)); 87 } else { 88 int s1 = scratches.obtain(); 89 LDRB(AL, s1, addr.reg, immed12_pre(0)); // R 90 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G 91 ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8)); 92 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B 93 ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16)); 94 } 95 if (inc) 96 ADD(AL, 0, addr.reg, addr.reg, imm(3)); 97 break; 98 case 16: 99 if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2)); 100 else LDRH(AL, s.reg, addr.reg); 101 break; 102 case 8: 103 if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1)); 104 else LDRB(AL, s.reg, addr.reg); 105 break; 106 } 107 } 108 109 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits) 110 { 111 const int maskLen = h-l; 112 113 #ifdef __mips__ 114 assert(maskLen<=11); 115 #else 116 assert(maskLen<=8); 117 #endif 118 assert(h); 119 120 if (h != bits) { 121 const int mask = ((1<<maskLen)-1) << l; 122 if (isValidImmediate(mask)) { 123 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask; 124 } else if (isValidImmediate(~mask)) { 125 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask; 126 } else { 127 MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h)); 128 l += 32-h; 129 h = 32; 130 } 131 s = d.reg; 132 } 133 134 if (l) { 135 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l; 136 s = d.reg; 137 } 138 139 if (s != d.reg) { 140 MOV(AL, 0, d.reg, s); 141 } 142 143 d.s = maskLen; 144 } 145 146 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component) 147 { 148 extract(d, s.reg, 149 s.format.c[component].h, 150 s.format.c[component].l, 151 s.size()); 152 } 153 154 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component) 155 { 156 integer_t r(d.reg, 32, d.flags); 157 extract(r, s.reg, 158 s.format.c[component].h, 159 s.format.c[component].l, 160 s.size()); 161 d = component_t(r); 162 } 163 164 165 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits) 166 { 167 if (s.l || (s.flags & CLEAR_HI)) { 168 extract(d, s.reg, s.h, s.l, 32); 169 expand(d, d, dbits); 170 } else { 171 expand(d, integer_t(s.reg, s.size(), s.flags), dbits); 172 } 173 } 174 175 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits) 176 { 177 integer_t r(d.reg, 32, d.flags); 178 expand(r, s, dbits); 179 d = component_t(r); 180 } 181 182 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits) 183 { 184 assert(src.size()); 185 186 int sbits = src.size(); 187 int s = src.reg; 188 int d = dst.reg; 189 190 // be sure to set 'dst' after we read 'src' as they may be identical 191 dst.s = dbits; 192 dst.flags = 0; 193 194 if (dbits<=sbits) { 195 if (s != d) { 196 MOV(AL, 0, d, s); 197 } 198 return; 199 } 200 201 if (sbits == 1) { 202 RSB(AL, 0, d, s, reg_imm(s, LSL, dbits)); 203 // d = (s<<dbits) - s; 204 return; 205 } 206 207 if (dbits % sbits) { 208 MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits)); 209 // d = s << (dbits-sbits); 210 dbits -= sbits; 211 do { 212 ORR(AL, 0, d, d, reg_imm(d, LSR, sbits)); 213 // d |= d >> sbits; 214 dbits -= sbits; 215 sbits *= 2; 216 } while(dbits>0); 217 return; 218 } 219 220 dbits -= sbits; 221 do { 222 ORR(AL, 0, d, s, reg_imm(s, LSL, sbits)); 223 // d |= d<<sbits; 224 s = d; 225 dbits -= sbits; 226 if (sbits*2 < dbits) { 227 sbits *= 2; 228 } 229 } while(dbits>0); 230 } 231 232 void GGLAssembler::downshift( 233 pixel_t& d, int component, component_t s, const reg_t& dither) 234 { 235 Scratch scratches(registerFile()); 236 237 int sh = s.h; 238 int sl = s.l; 239 int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0; 240 int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0; 241 int sbits = sh - sl; 242 243 int dh = d.format.c[component].h; 244 int dl = d.format.c[component].l; 245 int dbits = dh - dl; 246 int dithering = 0; 247 248 ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits); 249 250 if (sbits>dbits) { 251 // see if we need to dither 252 dithering = mDithering; 253 } 254 255 int ireg = d.reg; 256 if (!(d.flags & FIRST)) { 257 if (s.flags & CORRUPTIBLE) { 258 ireg = s.reg; 259 } else { 260 ireg = scratches.obtain(); 261 } 262 } 263 d.flags &= ~FIRST; 264 265 if (maskHiBits) { 266 // we need to mask the high bits (and possibly the lowbits too) 267 // and we might be able to use immediate mask. 268 if (!dithering) { 269 // we don't do this if we only have maskLoBits because we can 270 // do it more efficiently below (in the case where dl=0) 271 const int offset = sh - dbits; 272 if (dbits<=8 && offset >= 0) { 273 const uint32_t mask = ((1<<dbits)-1) << offset; 274 if (isValidImmediate(mask) || isValidImmediate(~mask)) { 275 build_and_immediate(ireg, s.reg, mask, 32); 276 sl = offset; 277 s.reg = ireg; 278 sbits = dbits; 279 maskLoBits = maskHiBits = 0; 280 } 281 } 282 } else { 283 // in the dithering case though, we need to preserve the lower bits 284 const uint32_t mask = ((1<<sbits)-1) << sl; 285 if (isValidImmediate(mask) || isValidImmediate(~mask)) { 286 build_and_immediate(ireg, s.reg, mask, 32); 287 s.reg = ireg; 288 maskLoBits = maskHiBits = 0; 289 } 290 } 291 } 292 293 // XXX: we could special case (maskHiBits & !maskLoBits) 294 // like we do for maskLoBits below, but it happens very rarely 295 // that we have maskHiBits only and the conditions necessary to lead 296 // to better code (like doing d |= s << 24) 297 298 if (maskHiBits) { 299 MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh)); 300 sl += 32-sh; 301 sh = 32; 302 s.reg = ireg; 303 maskHiBits = 0; 304 } 305 306 // Downsampling should be performed as follows: 307 // V * ((1<<dbits)-1) / ((1<<sbits)-1) 308 // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)] 309 // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)] 310 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits 311 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits)) 312 // 313 // By approximating (1>>dbits) and (1>>sbits) to 0: 314 // 315 // V>>(sbits-dbits) - V>>sbits 316 // 317 // A good approximation is V>>(sbits-dbits), 318 // but better one (needed for dithering) is: 319 // 320 // (V>>(sbits-dbits)<<sbits - V)>>sbits 321 // (V<<dbits - V)>>sbits 322 // (V - V>>dbits)>>(sbits-dbits) 323 324 // Dithering is done here 325 if (dithering) { 326 comment("dithering"); 327 if (sl) { 328 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl)); 329 sh -= sl; 330 sl = 0; 331 s.reg = ireg; 332 } 333 // scaling (V-V>>dbits) 334 SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits)); 335 const int shift = (GGL_DITHER_BITS - (sbits-dbits)); 336 if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift)); 337 else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift)); 338 else ADD(AL, 0, ireg, ireg, dither.reg); 339 s.reg = ireg; 340 } 341 342 if ((maskLoBits|dithering) && (sh > dbits)) { 343 int shift = sh-dbits; 344 if (dl) { 345 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift)); 346 if (ireg == d.reg) { 347 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl)); 348 } else { 349 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl)); 350 } 351 } else { 352 if (ireg == d.reg) { 353 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); 354 } else { 355 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); 356 } 357 } 358 } else { 359 int shift = sh-dh; 360 if (shift>0) { 361 if (ireg == d.reg) { 362 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); 363 } else { 364 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); 365 } 366 } else if (shift<0) { 367 if (ireg == d.reg) { 368 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift)); 369 } else { 370 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift)); 371 } 372 } else { 373 if (ireg == d.reg) { 374 if (s.reg != d.reg) { 375 MOV(AL, 0, d.reg, s.reg); 376 } 377 } else { 378 ORR(AL, 0, d.reg, d.reg, s.reg); 379 } 380 } 381 } 382 } 383 384 }; // namespace android 385