1 /* libs/pixelflinger/codeflinger/load_store.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #include <assert.h> 19 #include <stdio.h> 20 #include <cutils/log.h> 21 #include "GGLAssembler.h" 22 23 #ifdef __ARM_ARCH__ 24 #include <machine/cpu-features.h> 25 #endif 26 27 namespace android { 28 29 // ---------------------------------------------------------------------------- 30 31 void GGLAssembler::store(const pointer_t& addr, const pixel_t& s, uint32_t flags) 32 { 33 const int bits = addr.size; 34 const int inc = (flags & WRITE_BACK)?1:0; 35 switch (bits) { 36 case 32: 37 if (inc) STR(AL, s.reg, addr.reg, immed12_post(4)); 38 else STR(AL, s.reg, addr.reg); 39 break; 40 case 24: 41 // 24 bits formats are a little special and used only for RGB 42 // 0x00BBGGRR is unpacked as R,G,B 43 STRB(AL, s.reg, addr.reg, immed12_pre(0)); 44 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); 45 STRB(AL, s.reg, addr.reg, immed12_pre(1)); 46 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 8)); 47 STRB(AL, s.reg, addr.reg, immed12_pre(2)); 48 if (!(s.flags & CORRUPTIBLE)) { 49 MOV(AL, 0, s.reg, reg_imm(s.reg, ROR, 16)); 50 } 51 if (inc) 52 ADD(AL, 0, addr.reg, addr.reg, imm(3)); 53 break; 54 case 16: 55 if (inc) STRH(AL, s.reg, addr.reg, immed8_post(2)); 56 else STRH(AL, s.reg, addr.reg); 57 break; 58 case 8: 59 if (inc) STRB(AL, s.reg, addr.reg, immed12_post(1)); 60 else STRB(AL, s.reg, addr.reg); 61 break; 62 } 63 } 64 65 void GGLAssembler::load(const pointer_t& addr, const pixel_t& s, uint32_t flags) 66 { 67 Scratch scratches(registerFile()); 68 int s0; 69 70 const int bits = addr.size; 71 const int inc = (flags & WRITE_BACK)?1:0; 72 switch (bits) { 73 case 32: 74 if (inc) LDR(AL, s.reg, addr.reg, immed12_post(4)); 75 else LDR(AL, s.reg, addr.reg); 76 break; 77 case 24: 78 // 24 bits formats are a little special and used only for RGB 79 // R,G,B is packed as 0x00BBGGRR 80 s0 = scratches.obtain(); 81 if (s.reg != addr.reg) { 82 LDRB(AL, s.reg, addr.reg, immed12_pre(0)); // R 83 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G 84 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 8)); 85 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B 86 ORR(AL, 0, s.reg, s.reg, reg_imm(s0, LSL, 16)); 87 } else { 88 int s1 = scratches.obtain(); 89 LDRB(AL, s1, addr.reg, immed12_pre(0)); // R 90 LDRB(AL, s0, addr.reg, immed12_pre(1)); // G 91 ORR(AL, 0, s1, s1, reg_imm(s0, LSL, 8)); 92 LDRB(AL, s0, addr.reg, immed12_pre(2)); // B 93 ORR(AL, 0, s.reg, s1, reg_imm(s0, LSL, 16)); 94 } 95 if (inc) 96 ADD(AL, 0, addr.reg, addr.reg, imm(3)); 97 break; 98 case 16: 99 if (inc) LDRH(AL, s.reg, addr.reg, immed8_post(2)); 100 else LDRH(AL, s.reg, addr.reg); 101 break; 102 case 8: 103 if (inc) LDRB(AL, s.reg, addr.reg, immed12_post(1)); 104 else LDRB(AL, s.reg, addr.reg); 105 break; 106 } 107 } 108 109 void GGLAssembler::extract(integer_t& d, int s, int h, int l, int bits) 110 { 111 const int maskLen = h-l; 112 113 #ifdef __mips__ 114 assert(maskLen<=11); 115 #else 116 assert(maskLen<=8); 117 #endif 118 assert(h); 119 120 #if __ARM_ARCH__ >= 7 121 const int mask = (1<<maskLen)-1; 122 if ((h == bits) && !l && (s != d.reg)) { 123 MOV(AL, 0, d.reg, s); // component = packed; 124 } else if ((h == bits) && l) { 125 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l; 126 } else if (!l && isValidImmediate(mask)) { 127 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask; 128 } else if (!l && isValidImmediate(~mask)) { 129 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask; 130 } else { 131 UBFX(AL, d.reg, s, l, maskLen); // component = (packed & mask) >> l; 132 } 133 #else 134 if (h != bits) { 135 const int mask = ((1<<maskLen)-1) << l; 136 if (isValidImmediate(mask)) { 137 AND(AL, 0, d.reg, s, imm(mask)); // component = packed & mask; 138 } else if (isValidImmediate(~mask)) { 139 BIC(AL, 0, d.reg, s, imm(~mask)); // component = packed & mask; 140 } else { 141 MOV(AL, 0, d.reg, reg_imm(s, LSL, 32-h)); 142 l += 32-h; 143 h = 32; 144 } 145 s = d.reg; 146 } 147 148 if (l) { 149 MOV(AL, 0, d.reg, reg_imm(s, LSR, l)); // component = packed >> l; 150 s = d.reg; 151 } 152 153 if (s != d.reg) { 154 MOV(AL, 0, d.reg, s); 155 } 156 #endif 157 158 d.s = maskLen; 159 } 160 161 void GGLAssembler::extract(integer_t& d, const pixel_t& s, int component) 162 { 163 extract(d, s.reg, 164 s.format.c[component].h, 165 s.format.c[component].l, 166 s.size()); 167 } 168 169 void GGLAssembler::extract(component_t& d, const pixel_t& s, int component) 170 { 171 integer_t r(d.reg, 32, d.flags); 172 extract(r, s.reg, 173 s.format.c[component].h, 174 s.format.c[component].l, 175 s.size()); 176 d = component_t(r); 177 } 178 179 180 void GGLAssembler::expand(integer_t& d, const component_t& s, int dbits) 181 { 182 if (s.l || (s.flags & CLEAR_HI)) { 183 extract(d, s.reg, s.h, s.l, 32); 184 expand(d, d, dbits); 185 } else { 186 expand(d, integer_t(s.reg, s.size(), s.flags), dbits); 187 } 188 } 189 190 void GGLAssembler::expand(component_t& d, const component_t& s, int dbits) 191 { 192 integer_t r(d.reg, 32, d.flags); 193 expand(r, s, dbits); 194 d = component_t(r); 195 } 196 197 void GGLAssembler::expand(integer_t& dst, const integer_t& src, int dbits) 198 { 199 assert(src.size()); 200 201 int sbits = src.size(); 202 int s = src.reg; 203 int d = dst.reg; 204 205 // be sure to set 'dst' after we read 'src' as they may be identical 206 dst.s = dbits; 207 dst.flags = 0; 208 209 if (dbits<=sbits) { 210 if (s != d) { 211 MOV(AL, 0, d, s); 212 } 213 return; 214 } 215 216 if (sbits == 1) { 217 RSB(AL, 0, d, s, reg_imm(s, LSL, dbits)); 218 // d = (s<<dbits) - s; 219 return; 220 } 221 222 if (dbits % sbits) { 223 MOV(AL, 0, d, reg_imm(s, LSL, dbits-sbits)); 224 // d = s << (dbits-sbits); 225 dbits -= sbits; 226 do { 227 ORR(AL, 0, d, d, reg_imm(d, LSR, sbits)); 228 // d |= d >> sbits; 229 dbits -= sbits; 230 sbits *= 2; 231 } while(dbits>0); 232 return; 233 } 234 235 dbits -= sbits; 236 do { 237 ORR(AL, 0, d, s, reg_imm(s, LSL, sbits)); 238 // d |= d<<sbits; 239 s = d; 240 dbits -= sbits; 241 if (sbits*2 < dbits) { 242 sbits *= 2; 243 } 244 } while(dbits>0); 245 } 246 247 void GGLAssembler::downshift( 248 pixel_t& d, int component, component_t s, const reg_t& dither) 249 { 250 const needs_t& needs = mBuilderContext.needs; 251 Scratch scratches(registerFile()); 252 253 int sh = s.h; 254 int sl = s.l; 255 int maskHiBits = (sh!=32) ? ((s.flags & CLEAR_HI)?1:0) : 0; 256 int maskLoBits = (sl!=0) ? ((s.flags & CLEAR_LO)?1:0) : 0; 257 int sbits = sh - sl; 258 259 int dh = d.format.c[component].h; 260 int dl = d.format.c[component].l; 261 int dbits = dh - dl; 262 int dithering = 0; 263 264 ALOGE_IF(sbits<dbits, "sbits (%d) < dbits (%d) in downshift", sbits, dbits); 265 266 if (sbits>dbits) { 267 // see if we need to dither 268 dithering = mDithering; 269 } 270 271 int ireg = d.reg; 272 if (!(d.flags & FIRST)) { 273 if (s.flags & CORRUPTIBLE) { 274 ireg = s.reg; 275 } else { 276 ireg = scratches.obtain(); 277 } 278 } 279 d.flags &= ~FIRST; 280 281 if (maskHiBits) { 282 // we need to mask the high bits (and possibly the lowbits too) 283 // and we might be able to use immediate mask. 284 if (!dithering) { 285 // we don't do this if we only have maskLoBits because we can 286 // do it more efficiently below (in the case where dl=0) 287 const int offset = sh - dbits; 288 if (dbits<=8 && offset >= 0) { 289 const uint32_t mask = ((1<<dbits)-1) << offset; 290 if (isValidImmediate(mask) || isValidImmediate(~mask)) { 291 build_and_immediate(ireg, s.reg, mask, 32); 292 sl = offset; 293 s.reg = ireg; 294 sbits = dbits; 295 maskLoBits = maskHiBits = 0; 296 } 297 } 298 } else { 299 // in the dithering case though, we need to preserve the lower bits 300 const uint32_t mask = ((1<<sbits)-1) << sl; 301 if (isValidImmediate(mask) || isValidImmediate(~mask)) { 302 build_and_immediate(ireg, s.reg, mask, 32); 303 s.reg = ireg; 304 maskLoBits = maskHiBits = 0; 305 } 306 } 307 } 308 309 // XXX: we could special case (maskHiBits & !maskLoBits) 310 // like we do for maskLoBits below, but it happens very rarely 311 // that we have maskHiBits only and the conditions necessary to lead 312 // to better code (like doing d |= s << 24) 313 314 if (maskHiBits) { 315 MOV(AL, 0, ireg, reg_imm(s.reg, LSL, 32-sh)); 316 sl += 32-sh; 317 sh = 32; 318 s.reg = ireg; 319 maskHiBits = 0; 320 } 321 322 // Downsampling should be performed as follows: 323 // V * ((1<<dbits)-1) / ((1<<sbits)-1) 324 // V * [(1<<dbits)/((1<<sbits)-1) - 1/((1<<sbits)-1)] 325 // V * [1/((1<<sbits)-1)>>dbits - 1/((1<<sbits)-1)] 326 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/((1<<sbits)-1)>>sbits 327 // V/((1<<(sbits-dbits))-(1>>dbits)) - (V>>sbits)/(1-(1>>sbits)) 328 // 329 // By approximating (1>>dbits) and (1>>sbits) to 0: 330 // 331 // V>>(sbits-dbits) - V>>sbits 332 // 333 // A good approximation is V>>(sbits-dbits), 334 // but better one (needed for dithering) is: 335 // 336 // (V>>(sbits-dbits)<<sbits - V)>>sbits 337 // (V<<dbits - V)>>sbits 338 // (V - V>>dbits)>>(sbits-dbits) 339 340 // Dithering is done here 341 if (dithering) { 342 comment("dithering"); 343 if (sl) { 344 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, sl)); 345 sh -= sl; 346 sl = 0; 347 s.reg = ireg; 348 } 349 // scaling (V-V>>dbits) 350 SUB(AL, 0, ireg, s.reg, reg_imm(s.reg, LSR, dbits)); 351 const int shift = (GGL_DITHER_BITS - (sbits-dbits)); 352 if (shift>0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSR, shift)); 353 else if (shift<0) ADD(AL, 0, ireg, ireg, reg_imm(dither.reg, LSL,-shift)); 354 else ADD(AL, 0, ireg, ireg, dither.reg); 355 s.reg = ireg; 356 } 357 358 if ((maskLoBits|dithering) && (sh > dbits)) { 359 int shift = sh-dbits; 360 if (dl) { 361 MOV(AL, 0, ireg, reg_imm(s.reg, LSR, shift)); 362 if (ireg == d.reg) { 363 MOV(AL, 0, d.reg, reg_imm(ireg, LSL, dl)); 364 } else { 365 ORR(AL, 0, d.reg, d.reg, reg_imm(ireg, LSL, dl)); 366 } 367 } else { 368 if (ireg == d.reg) { 369 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); 370 } else { 371 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); 372 } 373 } 374 } else { 375 int shift = sh-dh; 376 if (shift>0) { 377 if (ireg == d.reg) { 378 MOV(AL, 0, d.reg, reg_imm(s.reg, LSR, shift)); 379 } else { 380 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSR, shift)); 381 } 382 } else if (shift<0) { 383 if (ireg == d.reg) { 384 MOV(AL, 0, d.reg, reg_imm(s.reg, LSL, -shift)); 385 } else { 386 ORR(AL, 0, d.reg, d.reg, reg_imm(s.reg, LSL, -shift)); 387 } 388 } else { 389 if (ireg == d.reg) { 390 if (s.reg != d.reg) { 391 MOV(AL, 0, d.reg, s.reg); 392 } 393 } else { 394 ORR(AL, 0, d.reg, d.reg, s.reg); 395 } 396 } 397 } 398 } 399 400 }; // namespace android 401