1 /* libs/pixelflinger/codeflinger/load_store.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #include <assert.h> 19 #include <stdio.h> 20 #include <cutils/log.h> 21 #include "codeflinger/GGLAssembler.h" 22 23 #ifdef __ARM_ARCH__ 24 #include <machine/cpu-features.h> 25 #endif 26 27 namespace android { 28 29 // ---------------------------------------------------------------------------- 30 31 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags) 32 { 33 const int bits = addr.size; 34 const int inc = (flags & WRITE_BACK)?1:0; 35 switch (bits) { 36 case 32: 37 if (inc) STR(AL, s.reg, addr.reg, immed12_post(4)); 38 else STR(AL, s.reg, addr.reg); 39 break; 40 case 24: 41 // 24 bits formats are a little special and used only for RGB 42 // 0x00BBGGRR is unpacked as R,G,B 43 STRB(AL, s.reg, addr.reg, immed12_pre(0)); 44 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); 45 STRB(AL, s.reg, addr.reg, immed12_pre(1)); 46 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); 47 STRB(AL, s.reg, addr.reg, immed12_pre(2)); 48 if (!(s.flags & CORRUPTIBLE)) { 49 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16)); 50 } 51 if (inc) 52 ADD(AL, 0, addr.reg, addr.reg, imm(3)); 53 break; 54 case 16: 55 if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2)); 56 else STRH(AL, s.reg, addr.reg); 57 break; 58 case 8: 59 if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1)); 60 else STRB(AL, s.reg, addr.reg); 61 break; 62 } 63 } 64 65 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags) 66 { 67 Scratch scratches(registerFile()); 68 int s0; 69 70 const int bits = addr.size; 71 const int inc = (flags & WRITE_BACK)?1:0; 72 switch (bits) { 73 case 32: 74 if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4)); 75 else LDR(AL, s.reg, addr.reg); 76 break; 77 case 24: 78 // 24 bits formats are a little special and used only for RGB 79 // R,G,B is packed as 0x00BBGGRR 80 s0 = scratches.obtain(); 81 if (s.reg != addr.reg) { 82 LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R 83 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G 84 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8)); 85 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B 86 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16)); 87 } else { 88 int s1 = scratches.obtain(); 89 LDRB(AL, s1, addr.reg, immed12_pre(0)); // R 90 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G 91 ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8)); 92 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B 93 ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16)); 94 } 95 if (inc) 96 ADD(AL, 0, addr.reg, addr.reg, imm(3)); 97 break; 98 case 16: 99 if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2)); 100 else LDRH(AL, s.reg, addr.reg); 101 break; 102 case 8: 103 if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1)); 104 else LDRB(AL, s.reg, addr.reg); 105 break; 106 } 107 } 108 109 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits) 110 { 111 const int maskLen = h-l; 112 113 assert(maskLen<=8); 114 assert(h); 115 116 #if __ARM_ARCH__ >= 7 117 const int mask = (1<<maskLen)-1; 118 if ((h == bits) && !l && (s != d.reg)) { 119 MOV(AL, 0, d.reg, s); // component = packed; 120 } else if ((h == bits) && l) { 121 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l; 122 } else if (!l && isValidImmediate(mask)) { 123 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask; 124 } else if (!l && isValidImmediate(~mask)) { 125 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask; 126 } else { 127 UBFX(AL, d.reg, s, l, maskLen); // component = (packed & mask) >> l; 128 } 129 #else 130 if (h != bits) { 131 const int mask = ((1<<maskLen)-1) << l; 132 if (isValidImmediate(mask)) { 133 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask; 134 } else if (isValidImmediate(~mask)) { 135 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask; 136 } else { 137 MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h)); 138 l += 32-h; 139 h = 32; 140 } 141 s = d.reg; 142 } 143 144 if (l) { 145 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l; 146 s = d.reg; 147 } 148 149 if (s != d.reg) { 150 MOV(AL, 0, d.reg, s); 151 } 152 #endif 153 154 d.s = maskLen; 155 } 156 157 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component) 158 { 159 extract(d, s.reg, 160 s.format.c[component].h, 161 s.format.c[component].l, 162 s.size()); 163 } 164 165 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component) 166 { 167 integer_t r(d.reg, 32, d.flags); 168 extract(r, s.reg, 169 s.format.c[component].h, 170 s.format.c[component].l, 171 s.size()); 172 d = component_t(r); 173 } 174 175 176 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits) 177 { 178 if (s.l || (s.flags & CLEAR_HI)) { 179 extract(d, s.reg, s.h, s.l, 32); 180 expand(d, d, dbits); 181 } else { 182 expand(d, integer_t(s.reg, s.size(), s.flags), dbits); 183 } 184 } 185 186 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits) 187 { 188 integer_t r(d.reg, 32, d.flags); 189 expand(r, s, dbits); 190 d = component_t(r); 191 } 192 193 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits) 194 { 195 assert(src.size()); 196 197 int sbits = src.size(); 198 int s = src.reg; 199 int d = dst.reg; 200 201 // be sure to set 'dst' after we read 'src' as they may be identical 202 dst.s = dbits; 203 dst.flags = 0; 204 205 if (dbits<=sbits) { 206 if (s != d) { 207 MOV(AL, 0, d, s); 208 } 209 return; 210 } 211 212 if (sbits == 1) { 213 RSB(AL, 0, d, s, reg_imm(s, LSL, dbits)); 214 // d = (s<<dbits) - s; 215 return; 216 } 217 218 if (dbits % sbits) { 219 MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits)); 220 // d = s << (dbits-sbits); 221 dbits -= sbits; 222 do { 223 ORR(AL, 0, d, d, reg_imm(d, LSR, sbits)); 224 // d |= d >> sbits; 225 dbits -= sbits; 226 sbits *= 2; 227 } while(dbits>0); 228 return; 229 } 230 231 dbits -= sbits; 232 do { 233 ORR(AL, 0, d, s, reg_imm(s, LSL, sbits)); 234 // d |= d<<sbits; 235 s = d; 236 dbits -= sbits; 237 if (sbits*2 < dbits) { 238 sbits *= 2; 239 } 240 } while(dbits>0); 241 } 242 243 void GGLAssembler::downshift( 244 pixel_t& d, int component, component_t s, const reg_t& dither) 245 { 246 const needs_t& needs = mBuilderContext.needs; 247 Scratch scratches(registerFile()); 248 249 int sh = s.h; 250 int sl = s.l; 251 int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0; 252 int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0; 253 int sbits = sh - sl; 254 255 int dh = d.format.c[component].h; 256 int dl = d.format.c[component].l; 257 int dbits = dh - dl; 258 int dithering = 0; 259 260 ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits); 261 262 if (sbits>dbits) { 263 // see if we need to dither 264 dithering = mDithering; 265 } 266 267 int ireg = d.reg; 268 if (!(d.flags & FIRST)) { 269 if (s.flags & CORRUPTIBLE) { 270 ireg = s.reg; 271 } else { 272 ireg = scratches.obtain(); 273 } 274 } 275 d.flags &= ~FIRST; 276 277 if (maskHiBits) { 278 // we need to mask the high bits (and possibly the lowbits too) 279 // and we might be able to use immediate mask. 280 if (!dithering) { 281 // we don't do this if we only have maskLoBits because we can 282 // do it more efficiently below (in the case where dl=0) 283 const int offset = sh - dbits; 284 if (dbits<=8 && offset >= 0) { 285 const uint32_t mask = ((1<<dbits)-1) << offset; 286 if (isValidImmediate(mask) || isValidImmediate(~mask)) { 287 build_and_immediate(ireg, s.reg, mask, 32); 288 sl = offset; 289 s.reg = ireg; 290 sbits = dbits; 291 maskLoBits = maskHiBits = 0; 292 } 293 } 294 } else { 295 // in the dithering case though, we need to preserve the lower bits 296 const uint32_t mask = ((1<<sbits)-1) << sl; 297 if (isValidImmediate(mask) || isValidImmediate(~mask)) { 298 build_and_immediate(ireg, s.reg, mask, 32); 299 s.reg = ireg; 300 maskLoBits = maskHiBits = 0; 301 } 302 } 303 } 304 305 // XXX: we could special case (maskHiBits & !maskLoBits) 306 // like we do for maskLoBits below, but it happens very rarely 307 // that we have maskHiBits only and the conditions necessary to lead 308 // to better code (like doing d |= s << 24) 309 310 if (maskHiBits) { 311 MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh)); 312 sl += 32-sh; 313 sh = 32; 314 s.reg = ireg; 315 maskHiBits = 0; 316 } 317 318 // Downsampling should be performed as follows: 319 // V * ((1<<dbits)-1) / ((1<<sbits)-1) 320 // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)] 321 // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)] 322 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits 323 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits)) 324 // 325 // By approximating (1>>dbits) and (1>>sbits) to 0: 326 // 327 // V>>(sbits-dbits) - V>>sbits 328 // 329 // A good approximation is V>>(sbits-dbits), 330 // but better one (needed for dithering) is: 331 // 332 // (V>>(sbits-dbits)<<sbits - V)>>sbits 333 // (V<<dbits - V)>>sbits 334 // (V - V>>dbits)>>(sbits-dbits) 335 336 // Dithering is done here 337 if (dithering) { 338 comment("dithering"); 339 if (sl) { 340 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl)); 341 sh -= sl; 342 sl = 0; 343 s.reg = ireg; 344 } 345 // scaling (V-V>>dbits) 346 SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits)); 347 const int shift = (GGL_DITHER_BITS - (sbits-dbits)); 348 if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift)); 349 else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift)); 350 else ADD(AL, 0, ireg, ireg, dither.reg); 351 s.reg = ireg; 352 } 353 354 if ((maskLoBits|dithering) && (sh > dbits)) { 355 int shift = sh-dbits; 356 if (dl) { 357 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift)); 358 if (ireg == d.reg) { 359 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl)); 360 } else { 361 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl)); 362 } 363 } else { 364 if (ireg == d.reg) { 365 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); 366 } else { 367 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); 368 } 369 } 370 } else { 371 int shift = sh-dh; 372 if (shift>0) { 373 if (ireg == d.reg) { 374 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); 375 } else { 376 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); 377 } 378 } else if (shift<0) { 379 if (ireg == d.reg) { 380 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift)); 381 } else { 382 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift)); 383 } 384 } else { 385 if (ireg == d.reg) { 386 if (s.reg != d.reg) { 387 MOV(AL, 0, d.reg, s.reg); 388 } 389 } else { 390 ORR(AL, 0, d.reg, d.reg, s.reg); 391 } 392 } 393 } 394 } 395 396 }; // namespace android 397