1 /* libs/pixelflinger/codeflinger/texturing.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #include <assert.h> 19 #include <stdint.h> 20 #include <stdlib.h> 21 #include <stdio.h> 22 #include <sys/types.h> 23 24 #include <cutils/log.h> 25 26 #include "GGLAssembler.h" 27 28 namespace android { 29 30 // --------------------------------------------------------------------------- 31 32 // iterators are initialized like this: 33 // (intToFixedCenter(x) * dx)>>16 + x0 34 // ((x<<16 + 0x8000) * dx)>>16 + x0 35 // ((x<<16)*dx + (0x8000*dx))>>16 + x0 36 // ( (x*dx) + dx>>1 ) + x0 37 // (x*dx) + (dx>>1 + x0) 38 39 void GGLAssembler::init_iterated_color(fragment_parts_t& parts, const reg_t& x) 40 { 41 context_t const* c = mBuilderContext.c; 42 const needs_t& needs = mBuilderContext.needs; 43 44 if (mSmooth) { 45 // NOTE: we could take this case in the mDithering + !mSmooth case, 46 // but this would use up to 4 more registers for the color components 47 // for only a little added quality. 48 // Currently, this causes the system to run out of registers in 49 // some case (see issue #719496) 50 51 comment("compute initial iterated color (smooth and/or dither case)"); 52 53 parts.iterated_packed = 0; 54 parts.packed = 0; 55 56 // 0x1: color component 57 // 0x2: iterators 58 const int optReload = mOptLevel >> 1; 59 if (optReload >= 3) parts.reload = 0; // reload nothing 60 else if (optReload == 2) parts.reload = 2; // reload iterators 61 else if (optReload == 1) parts.reload = 1; // reload colors 62 else if (optReload <= 0) parts.reload = 3; // reload both 63 64 if (!mSmooth) { 65 // we're not smoothing (just dithering), we never have to 66 // reload the iterators 67 parts.reload &= ~2; 68 } 69 70 Scratch scratches(registerFile()); 71 const int t0 = (parts.reload & 1) ? scratches.obtain() : 0; 72 const int t1 = (parts.reload & 2) ? scratches.obtain() : 0; 73 for (int i=0 ; i<4 ; i++) { 74 if (!mInfo[i].iterated) 75 continue; 76 77 // this component exists in the destination and is not replaced 78 // by a texture unit. 79 const int c = (parts.reload & 1) ? t0 : obtainReg(); 80 if (i==0) CONTEXT_LOAD(c, iterators.ydady); 81 if (i==1) CONTEXT_LOAD(c, iterators.ydrdy); 82 if (i==2) CONTEXT_LOAD(c, iterators.ydgdy); 83 if (i==3) CONTEXT_LOAD(c, iterators.ydbdy); 84 parts.argb[i].reg = c; 85 86 if (mInfo[i].smooth) { 87 parts.argb_dx[i].reg = (parts.reload & 2) ? t1 : obtainReg(); 88 const int dvdx = parts.argb_dx[i].reg; 89 CONTEXT_LOAD(dvdx, generated_vars.argb[i].dx); 90 MLA(AL, 0, c, x.reg, dvdx, c); 91 92 // adjust the color iterator to make sure it won't overflow 93 if (!mAA) { 94 // this is not needed when we're using anti-aliasing 95 // because we will (have to) clamp the components 96 // anyway. 97 int end = scratches.obtain(); 98 MOV(AL, 0, end, reg_imm(parts.count.reg, LSR, 16)); 99 MLA(AL, 1, end, dvdx, end, c); 100 SUB(MI, 0, c, c, end); 101 BIC(AL, 0, c, c, reg_imm(c, ASR, 31)); 102 scratches.recycle(end); 103 } 104 } 105 106 if (parts.reload & 1) { 107 CONTEXT_STORE(c, generated_vars.argb[i].c); 108 } 109 } 110 } else { 111 // We're not smoothed, so we can 112 // just use a packed version of the color and extract the 113 // components as needed (or not at all if we don't blend) 114 115 // figure out if we need the iterated color 116 int load = 0; 117 for (int i=0 ; i<4 ; i++) { 118 component_info_t& info = mInfo[i]; 119 if ((info.inDest || info.needed) && !info.replaced) 120 load |= 1; 121 } 122 123 parts.iterated_packed = 1; 124 parts.packed = (!mTextureMachine.mask && !mBlending 125 && !mFog && !mDithering); 126 parts.reload = 0; 127 if (load || parts.packed) { 128 if (mBlending || mDithering || mInfo[GGLFormat::ALPHA].needed) { 129 comment("load initial iterated color (8888 packed)"); 130 parts.iterated.setTo(obtainReg(), 131 &(c->formats[GGL_PIXEL_FORMAT_RGBA_8888])); 132 CONTEXT_LOAD(parts.iterated.reg, packed8888); 133 } else { 134 comment("load initial iterated color (dest format packed)"); 135 136 parts.iterated.setTo(obtainReg(), &mCbFormat); 137 138 // pre-mask the iterated color 139 const int bits = parts.iterated.size(); 140 const uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1; 141 uint32_t mask = 0; 142 if (mMasking) { 143 for (int i=0 ; i<4 ; i++) { 144 const int component_mask = 1<<i; 145 const int h = parts.iterated.format.c[i].h; 146 const int l = parts.iterated.format.c[i].l; 147 if (h && (!(mMasking & component_mask))) { 148 mask |= ((1<<(h-l))-1) << l; 149 } 150 } 151 } 152 153 if (mMasking && ((mask & size)==0)) { 154 // none of the components are present in the mask 155 } else { 156 CONTEXT_LOAD(parts.iterated.reg, packed); 157 if (mCbFormat.size == 1) { 158 AND(AL, 0, parts.iterated.reg, 159 parts.iterated.reg, imm(0xFF)); 160 } else if (mCbFormat.size == 2) { 161 MOV(AL, 0, parts.iterated.reg, 162 reg_imm(parts.iterated.reg, LSR, 16)); 163 } 164 } 165 166 // pre-mask the iterated color 167 if (mMasking) { 168 build_and_immediate(parts.iterated.reg, parts.iterated.reg, 169 mask, bits); 170 } 171 } 172 } 173 } 174 } 175 176 void GGLAssembler::build_iterated_color( 177 component_t& fragment, 178 const fragment_parts_t& parts, 179 int component, 180 Scratch& regs) 181 { 182 fragment.setTo( regs.obtain(), 0, 32, CORRUPTIBLE); 183 184 if (!mInfo[component].iterated) 185 return; 186 187 if (parts.iterated_packed) { 188 // iterated colors are packed, extract the one we need 189 extract(fragment, parts.iterated, component); 190 } else { 191 fragment.h = GGL_COLOR_BITS; 192 fragment.l = GGL_COLOR_BITS - 8; 193 fragment.flags |= CLEAR_LO; 194 // iterated colors are held in their own register, 195 // (smooth and/or dithering case) 196 if (parts.reload==3) { 197 // this implies mSmooth 198 Scratch scratches(registerFile()); 199 int dx = scratches.obtain(); 200 CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); 201 CONTEXT_LOAD(dx, generated_vars.argb[component].dx); 202 ADD(AL, 0, dx, fragment.reg, dx); 203 CONTEXT_STORE(dx, generated_vars.argb[component].c); 204 } else if (parts.reload & 1) { 205 CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); 206 } else { 207 // we don't reload, so simply rename the register and mark as 208 // non CORRUPTIBLE so that the texture env or blending code 209 // won't modify this (renamed) register 210 regs.recycle(fragment.reg); 211 fragment.reg = parts.argb[component].reg; 212 fragment.flags &= ~CORRUPTIBLE; 213 } 214 if (mInfo[component].smooth && mAA) { 215 // when using smooth shading AND anti-aliasing, we need to clamp 216 // the iterators because there is always an extra pixel on the 217 // edges, which most of the time will cause an overflow 218 // (since technically its outside of the domain). 219 BIC(AL, 0, fragment.reg, fragment.reg, 220 reg_imm(fragment.reg, ASR, 31)); 221 component_sat(fragment); 222 } 223 } 224 } 225 226 // --------------------------------------------------------------------------- 227 228 void GGLAssembler::decodeLogicOpNeeds(const needs_t& needs) 229 { 230 // gather some informations about the components we need to process... 231 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; 232 switch(opcode) { 233 case GGL_COPY: 234 mLogicOp = 0; 235 break; 236 case GGL_CLEAR: 237 case GGL_SET: 238 mLogicOp = LOGIC_OP; 239 break; 240 case GGL_AND: 241 case GGL_AND_REVERSE: 242 case GGL_AND_INVERTED: 243 case GGL_XOR: 244 case GGL_OR: 245 case GGL_NOR: 246 case GGL_EQUIV: 247 case GGL_OR_REVERSE: 248 case GGL_OR_INVERTED: 249 case GGL_NAND: 250 mLogicOp = LOGIC_OP|LOGIC_OP_SRC|LOGIC_OP_DST; 251 break; 252 case GGL_NOOP: 253 case GGL_INVERT: 254 mLogicOp = LOGIC_OP|LOGIC_OP_DST; 255 break; 256 case GGL_COPY_INVERTED: 257 mLogicOp = LOGIC_OP|LOGIC_OP_SRC; 258 break; 259 }; 260 } 261 262 void GGLAssembler::decodeTMUNeeds(const needs_t& needs, context_t const* c) 263 { 264 uint8_t replaced=0; 265 mTextureMachine.mask = 0; 266 mTextureMachine.activeUnits = 0; 267 for (int i=GGL_TEXTURE_UNIT_COUNT-1 ; i>=0 ; i--) { 268 texture_unit_t& tmu = mTextureMachine.tmu[i]; 269 if (replaced == 0xF) { 270 // all components are replaced, skip this TMU. 271 tmu.format_idx = 0; 272 tmu.mask = 0; 273 tmu.replaced = replaced; 274 continue; 275 } 276 tmu.format_idx = GGL_READ_NEEDS(T_FORMAT, needs.t[i]); 277 tmu.format = c->formats[tmu.format_idx]; 278 tmu.bits = tmu.format.size*8; 279 tmu.swrap = GGL_READ_NEEDS(T_S_WRAP, needs.t[i]); 280 tmu.twrap = GGL_READ_NEEDS(T_T_WRAP, needs.t[i]); 281 tmu.env = ggl_needs_to_env(GGL_READ_NEEDS(T_ENV, needs.t[i])); 282 tmu.pot = GGL_READ_NEEDS(T_POT, needs.t[i]); 283 tmu.linear = GGL_READ_NEEDS(T_LINEAR, needs.t[i]) 284 && tmu.format.size!=3; // XXX: only 8, 16 and 32 modes for now 285 286 // 5551 linear filtering is not supported 287 if (tmu.format_idx == GGL_PIXEL_FORMAT_RGBA_5551) 288 tmu.linear = 0; 289 290 tmu.mask = 0; 291 tmu.replaced = replaced; 292 293 if (tmu.format_idx) { 294 mTextureMachine.activeUnits++; 295 if (tmu.format.c[0].h) tmu.mask |= 0x1; 296 if (tmu.format.c[1].h) tmu.mask |= 0x2; 297 if (tmu.format.c[2].h) tmu.mask |= 0x4; 298 if (tmu.format.c[3].h) tmu.mask |= 0x8; 299 if (tmu.env == GGL_REPLACE) { 300 replaced |= tmu.mask; 301 } else if (tmu.env == GGL_DECAL) { 302 if (!tmu.format.c[GGLFormat::ALPHA].h) { 303 // if we don't have alpha, decal does nothing 304 tmu.mask = 0; 305 } else { 306 // decal always ignores At 307 tmu.mask &= ~(1<<GGLFormat::ALPHA); 308 } 309 } 310 } 311 mTextureMachine.mask |= tmu.mask; 312 //printf("%d: mask=%08lx, replaced=%08lx\n", 313 // i, int(tmu.mask), int(tmu.replaced)); 314 } 315 mTextureMachine.replaced = replaced; 316 mTextureMachine.directTexture = 0; 317 //printf("replaced=%08lx\n", mTextureMachine.replaced); 318 } 319 320 321 void GGLAssembler::init_textures( 322 tex_coord_t* coords, 323 const reg_t& x, const reg_t& y) 324 { 325 context_t const* c = mBuilderContext.c; 326 const needs_t& needs = mBuilderContext.needs; 327 int Rctx = mBuilderContext.Rctx; 328 int Rx = x.reg; 329 int Ry = y.reg; 330 331 if (mTextureMachine.mask) { 332 comment("compute texture coordinates"); 333 } 334 335 // init texture coordinates for each tmu 336 const int cb_format_idx = GGL_READ_NEEDS(CB_FORMAT, needs.n); 337 const bool multiTexture = mTextureMachine.activeUnits > 1; 338 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { 339 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 340 if (tmu.format_idx == 0) 341 continue; 342 if ((tmu.swrap == GGL_NEEDS_WRAP_11) && 343 (tmu.twrap == GGL_NEEDS_WRAP_11)) 344 { 345 // 1:1 texture 346 pointer_t& txPtr = coords[i].ptr; 347 txPtr.setTo(obtainReg(), tmu.bits); 348 CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydsdy); 349 ADD(AL, 0, Rx, Rx, reg_imm(txPtr.reg, ASR, 16)); // x += (s>>16) 350 CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydtdy); 351 ADD(AL, 0, Ry, Ry, reg_imm(txPtr.reg, ASR, 16)); // y += (t>>16) 352 // merge base & offset 353 CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride); 354 SMLABB(AL, Rx, Ry, txPtr.reg, Rx); // x+y*stride 355 CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data); 356 base_offset(txPtr, txPtr, Rx); 357 } else { 358 Scratch scratches(registerFile()); 359 reg_t& s = coords[i].s; 360 reg_t& t = coords[i].t; 361 // s = (x * dsdx)>>16 + ydsdy 362 // s = (x * dsdx)>>16 + (y*dsdy)>>16 + s0 363 // t = (x * dtdx)>>16 + ydtdy 364 // t = (x * dtdx)>>16 + (y*dtdy)>>16 + t0 365 s.setTo(obtainReg()); 366 t.setTo(obtainReg()); 367 const int need_w = GGL_READ_NEEDS(W, needs.n); 368 if (need_w) { 369 CONTEXT_LOAD(s.reg, state.texture[i].iterators.ydsdy); 370 CONTEXT_LOAD(t.reg, state.texture[i].iterators.ydtdy); 371 } else { 372 int ydsdy = scratches.obtain(); 373 int ydtdy = scratches.obtain(); 374 CONTEXT_LOAD(s.reg, generated_vars.texture[i].dsdx); 375 CONTEXT_LOAD(ydsdy, state.texture[i].iterators.ydsdy); 376 CONTEXT_LOAD(t.reg, generated_vars.texture[i].dtdx); 377 CONTEXT_LOAD(ydtdy, state.texture[i].iterators.ydtdy); 378 MLA(AL, 0, s.reg, Rx, s.reg, ydsdy); 379 MLA(AL, 0, t.reg, Rx, t.reg, ydtdy); 380 } 381 382 if ((mOptLevel&1)==0) { 383 CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]); 384 CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]); 385 recycleReg(s.reg); 386 recycleReg(t.reg); 387 } 388 } 389 390 // direct texture? 391 if (!multiTexture && !mBlending && !mDithering && !mFog && 392 cb_format_idx == tmu.format_idx && !tmu.linear && 393 mTextureMachine.replaced == tmu.mask) 394 { 395 mTextureMachine.directTexture = i + 1; 396 } 397 } 398 } 399 400 void GGLAssembler::build_textures( fragment_parts_t& parts, 401 Scratch& regs) 402 { 403 context_t const* c = mBuilderContext.c; 404 const needs_t& needs = mBuilderContext.needs; 405 int Rctx = mBuilderContext.Rctx; 406 407 // We don't have a way to spill registers automatically 408 // spill depth and AA regs, when we know we may have to. 409 // build the spill list... 410 uint32_t spill_list = 0; 411 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { 412 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 413 if (tmu.format_idx == 0) 414 continue; 415 if (tmu.linear) { 416 // we may run out of register if we have linear filtering 417 // at 1 or 4 bytes / pixel on any texture unit. 418 if (tmu.format.size == 1) { 419 // if depth and AA enabled, we'll run out of 1 register 420 if (parts.z.reg > 0 && parts.covPtr.reg > 0) 421 spill_list |= 1<<parts.covPtr.reg; 422 } 423 if (tmu.format.size == 4) { 424 // if depth or AA enabled, we'll run out of 1 or 2 registers 425 if (parts.z.reg > 0) 426 spill_list |= 1<<parts.z.reg; 427 if (parts.covPtr.reg > 0) 428 spill_list |= 1<<parts.covPtr.reg; 429 } 430 } 431 } 432 433 Spill spill(registerFile(), *this, spill_list); 434 435 const bool multiTexture = mTextureMachine.activeUnits > 1; 436 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { 437 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 438 if (tmu.format_idx == 0) 439 continue; 440 441 pointer_t& txPtr = parts.coords[i].ptr; 442 pixel_t& texel = parts.texel[i]; 443 444 // repeat... 445 if ((tmu.swrap == GGL_NEEDS_WRAP_11) && 446 (tmu.twrap == GGL_NEEDS_WRAP_11)) 447 { // 1:1 textures 448 comment("fetch texel"); 449 texel.setTo(regs.obtain(), &tmu.format); 450 load(txPtr, texel, WRITE_BACK); 451 } else { 452 Scratch scratches(registerFile()); 453 reg_t& s = parts.coords[i].s; 454 reg_t& t = parts.coords[i].t; 455 if ((mOptLevel&1)==0) { 456 comment("reload s/t (multitexture or linear filtering)"); 457 s.reg = scratches.obtain(); 458 t.reg = scratches.obtain(); 459 CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]); 460 CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]); 461 } 462 463 if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) 464 return; 465 466 comment("compute repeat/clamp"); 467 int u = scratches.obtain(); 468 int v = scratches.obtain(); 469 int width = scratches.obtain(); 470 int height = scratches.obtain(); 471 int U = 0; 472 int V = 0; 473 474 if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) 475 return; 476 477 CONTEXT_LOAD(width, generated_vars.texture[i].width); 478 CONTEXT_LOAD(height, generated_vars.texture[i].height); 479 480 int FRAC_BITS = 0; 481 if (tmu.linear) { 482 // linear interpolation 483 if (tmu.format.size == 1) { 484 // for 8-bits textures, we can afford 485 // 7 bits of fractional precision at no 486 // additional cost (we can't do 8 bits 487 // because filter8 uses signed 16 bits muls) 488 FRAC_BITS = 7; 489 } else if (tmu.format.size == 2) { 490 // filter16() is internally limited to 4 bits, so: 491 // FRAC_BITS=2 generates less instructions, 492 // FRAC_BITS=3,4,5 creates unpleasant artifacts, 493 // FRAC_BITS=6+ looks good 494 FRAC_BITS = 6; 495 } else if (tmu.format.size == 4) { 496 // filter32() is internally limited to 8 bits, so: 497 // FRAC_BITS=4 looks good 498 // FRAC_BITS=5+ looks better, but generates 3 extra ipp 499 FRAC_BITS = 6; 500 } else { 501 // for all other cases we use 4 bits. 502 FRAC_BITS = 4; 503 } 504 } 505 wrapping(u, s.reg, width, tmu.swrap, FRAC_BITS); 506 wrapping(v, t.reg, height, tmu.twrap, FRAC_BITS); 507 508 if (tmu.linear) { 509 comment("compute linear filtering offsets"); 510 // pixel size scale 511 const int shift = 31 - gglClz(tmu.format.size); 512 U = scratches.obtain(); 513 V = scratches.obtain(); 514 515 if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) 516 return; 517 518 // sample the texel center 519 SUB(AL, 0, u, u, imm(1<<(FRAC_BITS-1))); 520 SUB(AL, 0, v, v, imm(1<<(FRAC_BITS-1))); 521 522 // get the fractionnal part of U,V 523 AND(AL, 0, U, u, imm((1<<FRAC_BITS)-1)); 524 AND(AL, 0, V, v, imm((1<<FRAC_BITS)-1)); 525 526 // compute width-1 and height-1 527 SUB(AL, 0, width, width, imm(1)); 528 SUB(AL, 0, height, height, imm(1)); 529 530 // get the integer part of U,V and clamp/wrap 531 // and compute offset to the next texel 532 if (tmu.swrap == GGL_NEEDS_WRAP_REPEAT) { 533 // u has already been REPEATed 534 MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS)); 535 MOV(MI, 0, u, width); 536 CMP(AL, u, width); 537 MOV(LT, 0, width, imm(1 << shift)); 538 if (shift) 539 MOV(GE, 0, width, reg_imm(width, LSL, shift)); 540 RSB(GE, 0, width, width, imm(0)); 541 } else { 542 // u has not been CLAMPed yet 543 // algorithm: 544 // if ((u>>4) >= width) 545 // u = width<<4 546 // width = 0 547 // else 548 // width = 1<<shift 549 // u = u>>4; // get integer part 550 // if (u<0) 551 // u = 0 552 // width = 0 553 // generated_vars.rt = width 554 555 CMP(AL, width, reg_imm(u, ASR, FRAC_BITS)); 556 MOV(LE, 0, u, reg_imm(width, LSL, FRAC_BITS)); 557 MOV(LE, 0, width, imm(0)); 558 MOV(GT, 0, width, imm(1 << shift)); 559 MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS)); 560 MOV(MI, 0, u, imm(0)); 561 MOV(MI, 0, width, imm(0)); 562 } 563 CONTEXT_STORE(width, generated_vars.rt); 564 565 const int stride = width; 566 CONTEXT_LOAD(stride, generated_vars.texture[i].stride); 567 if (tmu.twrap == GGL_NEEDS_WRAP_REPEAT) { 568 // v has already been REPEATed 569 MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS)); 570 MOV(MI, 0, v, height); 571 CMP(AL, v, height); 572 MOV(LT, 0, height, imm(1 << shift)); 573 if (shift) 574 MOV(GE, 0, height, reg_imm(height, LSL, shift)); 575 RSB(GE, 0, height, height, imm(0)); 576 MUL(AL, 0, height, stride, height); 577 } else { 578 // v has not been CLAMPed yet 579 CMP(AL, height, reg_imm(v, ASR, FRAC_BITS)); 580 MOV(LE, 0, v, reg_imm(height, LSL, FRAC_BITS)); 581 MOV(LE, 0, height, imm(0)); 582 if (shift) { 583 MOV(GT, 0, height, reg_imm(stride, LSL, shift)); 584 } else { 585 MOV(GT, 0, height, stride); 586 } 587 MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS)); 588 MOV(MI, 0, v, imm(0)); 589 MOV(MI, 0, height, imm(0)); 590 } 591 CONTEXT_STORE(height, generated_vars.lb); 592 } 593 594 scratches.recycle(width); 595 scratches.recycle(height); 596 597 // iterate texture coordinates... 598 comment("iterate s,t"); 599 int dsdx = scratches.obtain(); 600 int dtdx = scratches.obtain(); 601 602 if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) 603 return; 604 605 CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); 606 CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); 607 ADD(AL, 0, s.reg, s.reg, dsdx); 608 ADD(AL, 0, t.reg, t.reg, dtdx); 609 if ((mOptLevel&1)==0) { 610 CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]); 611 CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]); 612 scratches.recycle(s.reg); 613 scratches.recycle(t.reg); 614 } 615 scratches.recycle(dsdx); 616 scratches.recycle(dtdx); 617 618 // merge base & offset... 619 comment("merge base & offset"); 620 texel.setTo(regs.obtain(), &tmu.format); 621 txPtr.setTo(texel.reg, tmu.bits); 622 int stride = scratches.obtain(); 623 624 if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) 625 return; 626 627 CONTEXT_LOAD(stride, generated_vars.texture[i].stride); 628 CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data); 629 SMLABB(AL, u, v, stride, u); // u+v*stride 630 base_offset(txPtr, txPtr, u); 631 632 // load texel 633 if (!tmu.linear) { 634 comment("fetch texel"); 635 load(txPtr, texel, 0); 636 } else { 637 // recycle registers we don't need anymore 638 scratches.recycle(u); 639 scratches.recycle(v); 640 scratches.recycle(stride); 641 642 comment("fetch texel, bilinear"); 643 switch (tmu.format.size) { 644 case 1: filter8(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; 645 case 2: filter16(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; 646 case 3: filter24(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; 647 case 4: filter32(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; 648 } 649 } 650 } 651 } 652 } 653 654 void GGLAssembler::build_iterate_texture_coordinates( 655 const fragment_parts_t& parts) 656 { 657 const bool multiTexture = mTextureMachine.activeUnits > 1; 658 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { 659 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 660 if (tmu.format_idx == 0) 661 continue; 662 663 if ((tmu.swrap == GGL_NEEDS_WRAP_11) && 664 (tmu.twrap == GGL_NEEDS_WRAP_11)) 665 { // 1:1 textures 666 const pointer_t& txPtr = parts.coords[i].ptr; 667 ADD(AL, 0, txPtr.reg, txPtr.reg, imm(txPtr.size>>3)); 668 } else { 669 Scratch scratches(registerFile()); 670 int s = parts.coords[i].s.reg; 671 int t = parts.coords[i].t.reg; 672 if ((mOptLevel&1)==0) { 673 s = scratches.obtain(); 674 t = scratches.obtain(); 675 CONTEXT_LOAD(s, generated_vars.texture[i].spill[0]); 676 CONTEXT_LOAD(t, generated_vars.texture[i].spill[1]); 677 } 678 int dsdx = scratches.obtain(); 679 int dtdx = scratches.obtain(); 680 CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); 681 CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); 682 ADD(AL, 0, s, s, dsdx); 683 ADD(AL, 0, t, t, dtdx); 684 if ((mOptLevel&1)==0) { 685 CONTEXT_STORE(s, generated_vars.texture[i].spill[0]); 686 CONTEXT_STORE(t, generated_vars.texture[i].spill[1]); 687 } 688 } 689 } 690 } 691 692 void GGLAssembler::filter8( 693 const fragment_parts_t& /*parts*/, 694 pixel_t& texel, const texture_unit_t& tmu, 695 int U, int V, pointer_t& txPtr, 696 int FRAC_BITS) 697 { 698 if (tmu.format.components != GGL_ALPHA && 699 tmu.format.components != GGL_LUMINANCE) 700 { 701 // this is a packed format, and we don't support 702 // linear filtering (it's probably RGB 332) 703 // Should not happen with OpenGL|ES 704 LDRB(AL, texel.reg, txPtr.reg); 705 return; 706 } 707 708 // ------------------------ 709 // about ~22 cycles / pixel 710 Scratch scratches(registerFile()); 711 712 int pixel= scratches.obtain(); 713 int d = scratches.obtain(); 714 int u = scratches.obtain(); 715 int k = scratches.obtain(); 716 int rt = scratches.obtain(); 717 int lb = scratches.obtain(); 718 719 // RB -> U * V 720 721 CONTEXT_LOAD(rt, generated_vars.rt); 722 CONTEXT_LOAD(lb, generated_vars.lb); 723 724 int offset = pixel; 725 ADD(AL, 0, offset, lb, rt); 726 LDRB(AL, pixel, txPtr.reg, reg_scale_pre(offset)); 727 SMULBB(AL, u, U, V); 728 SMULBB(AL, d, pixel, u); 729 RSB(AL, 0, k, u, imm(1<<(FRAC_BITS*2))); 730 731 // LB -> (1-U) * V 732 RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); 733 LDRB(AL, pixel, txPtr.reg, reg_scale_pre(lb)); 734 SMULBB(AL, u, U, V); 735 SMLABB(AL, d, pixel, u, d); 736 SUB(AL, 0, k, k, u); 737 738 // LT -> (1-U)*(1-V) 739 RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); 740 LDRB(AL, pixel, txPtr.reg); 741 SMULBB(AL, u, U, V); 742 SMLABB(AL, d, pixel, u, d); 743 744 // RT -> U*(1-V) 745 LDRB(AL, pixel, txPtr.reg, reg_scale_pre(rt)); 746 SUB(AL, 0, u, k, u); 747 SMLABB(AL, texel.reg, pixel, u, d); 748 749 for (int i=0 ; i<4 ; i++) { 750 if (!texel.format.c[i].h) continue; 751 texel.format.c[i].h = FRAC_BITS*2+8; 752 texel.format.c[i].l = FRAC_BITS*2; // keeping 8 bits in enough 753 } 754 texel.format.size = 4; 755 texel.format.bitsPerPixel = 32; 756 texel.flags |= CLEAR_LO; 757 } 758 759 void GGLAssembler::filter16( 760 const fragment_parts_t& /*parts*/, 761 pixel_t& texel, const texture_unit_t& tmu, 762 int U, int V, pointer_t& txPtr, 763 int FRAC_BITS) 764 { 765 // compute the mask 766 // XXX: it would be nice if the mask below could be computed 767 // automatically. 768 uint32_t mask = 0; 769 int shift = 0; 770 int prec = 0; 771 switch (tmu.format_idx) { 772 case GGL_PIXEL_FORMAT_RGB_565: 773 // source: 00000ggg.ggg00000 | rrrrr000.000bbbbb 774 // result: gggggggg.gggrrrrr | rrrrr0bb.bbbbbbbb 775 mask = 0x07E0F81F; 776 shift = 16; 777 prec = 5; 778 break; 779 case GGL_PIXEL_FORMAT_RGBA_4444: 780 // 0000,1111,0000,1111 | 0000,1111,0000,1111 781 mask = 0x0F0F0F0F; 782 shift = 12; 783 prec = 4; 784 break; 785 case GGL_PIXEL_FORMAT_LA_88: 786 // 0000,0000,1111,1111 | 0000,0000,1111,1111 787 // AALL -> 00AA | 00LL 788 mask = 0x00FF00FF; 789 shift = 8; 790 prec = 8; 791 break; 792 default: 793 // unsupported format, do something sensical... 794 ALOGE("Unsupported 16-bits texture format (%d)", tmu.format_idx); 795 LDRH(AL, texel.reg, txPtr.reg); 796 return; 797 } 798 799 const int adjust = FRAC_BITS*2 - prec; 800 const int round = 0; 801 802 // update the texel format 803 texel.format.size = 4; 804 texel.format.bitsPerPixel = 32; 805 texel.flags |= CLEAR_HI|CLEAR_LO; 806 for (int i=0 ; i<4 ; i++) { 807 if (!texel.format.c[i].h) continue; 808 const uint32_t offset = (mask & tmu.format.mask(i)) ? 0 : shift; 809 texel.format.c[i].h = tmu.format.c[i].h + offset + prec; 810 texel.format.c[i].l = texel.format.c[i].h - (tmu.format.bits(i) + prec); 811 } 812 813 // ------------------------ 814 // about ~40 cycles / pixel 815 Scratch scratches(registerFile()); 816 817 int pixel= scratches.obtain(); 818 int d = scratches.obtain(); 819 int u = scratches.obtain(); 820 int k = scratches.obtain(); 821 822 // RB -> U * V 823 int offset = pixel; 824 CONTEXT_LOAD(offset, generated_vars.rt); 825 CONTEXT_LOAD(u, generated_vars.lb); 826 ADD(AL, 0, offset, offset, u); 827 828 LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); 829 SMULBB(AL, u, U, V); 830 ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); 831 build_and_immediate(pixel, pixel, mask, 32); 832 if (adjust) { 833 if (round) 834 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 835 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 836 } 837 MUL(AL, 0, d, pixel, u); 838 RSB(AL, 0, k, u, imm(1<<prec)); 839 840 // LB -> (1-U) * V 841 CONTEXT_LOAD(offset, generated_vars.lb); 842 RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); 843 LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); 844 SMULBB(AL, u, U, V); 845 ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); 846 build_and_immediate(pixel, pixel, mask, 32); 847 if (adjust) { 848 if (round) 849 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 850 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 851 } 852 MLA(AL, 0, d, pixel, u, d); 853 SUB(AL, 0, k, k, u); 854 855 // LT -> (1-U)*(1-V) 856 RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); 857 LDRH(AL, pixel, txPtr.reg); 858 SMULBB(AL, u, U, V); 859 ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); 860 build_and_immediate(pixel, pixel, mask, 32); 861 if (adjust) { 862 if (round) 863 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 864 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 865 } 866 MLA(AL, 0, d, pixel, u, d); 867 868 // RT -> U*(1-V) 869 CONTEXT_LOAD(offset, generated_vars.rt); 870 LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); 871 SUB(AL, 0, u, k, u); 872 ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); 873 build_and_immediate(pixel, pixel, mask, 32); 874 MLA(AL, 0, texel.reg, pixel, u, d); 875 } 876 877 void GGLAssembler::filter24( 878 const fragment_parts_t& /*parts*/, 879 pixel_t& texel, const texture_unit_t& /*tmu*/, 880 int /*U*/, int /*V*/, pointer_t& txPtr, 881 int /*FRAC_BITS*/) 882 { 883 // not supported yet (currently disabled) 884 load(txPtr, texel, 0); 885 } 886 887 void GGLAssembler::filter32( 888 const fragment_parts_t& /*parts*/, 889 pixel_t& texel, const texture_unit_t& /*tmu*/, 890 int U, int V, pointer_t& txPtr, 891 int FRAC_BITS) 892 { 893 const int adjust = FRAC_BITS*2 - 8; 894 const int round = 0; 895 896 // ------------------------ 897 // about ~38 cycles / pixel 898 Scratch scratches(registerFile()); 899 900 int pixel= scratches.obtain(); 901 int dh = scratches.obtain(); 902 int u = scratches.obtain(); 903 int k = scratches.obtain(); 904 905 int temp = scratches.obtain(); 906 int dl = scratches.obtain(); 907 int mask = scratches.obtain(); 908 909 MOV(AL, 0, mask, imm(0xFF)); 910 ORR(AL, 0, mask, mask, imm(0xFF0000)); 911 912 // RB -> U * V 913 int offset = pixel; 914 CONTEXT_LOAD(offset, generated_vars.rt); 915 CONTEXT_LOAD(u, generated_vars.lb); 916 ADD(AL, 0, offset, offset, u); 917 918 LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); 919 SMULBB(AL, u, U, V); 920 AND(AL, 0, temp, mask, pixel); 921 if (adjust) { 922 if (round) 923 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 924 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 925 } 926 MUL(AL, 0, dh, temp, u); 927 AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); 928 MUL(AL, 0, dl, temp, u); 929 RSB(AL, 0, k, u, imm(0x100)); 930 931 // LB -> (1-U) * V 932 CONTEXT_LOAD(offset, generated_vars.lb); 933 RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); 934 LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); 935 SMULBB(AL, u, U, V); 936 AND(AL, 0, temp, mask, pixel); 937 if (adjust) { 938 if (round) 939 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 940 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 941 } 942 MLA(AL, 0, dh, temp, u, dh); 943 AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); 944 MLA(AL, 0, dl, temp, u, dl); 945 SUB(AL, 0, k, k, u); 946 947 // LT -> (1-U)*(1-V) 948 RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); 949 LDR(AL, pixel, txPtr.reg); 950 SMULBB(AL, u, U, V); 951 AND(AL, 0, temp, mask, pixel); 952 if (adjust) { 953 if (round) 954 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 955 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 956 } 957 MLA(AL, 0, dh, temp, u, dh); 958 AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); 959 MLA(AL, 0, dl, temp, u, dl); 960 961 // RT -> U*(1-V) 962 CONTEXT_LOAD(offset, generated_vars.rt); 963 LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); 964 SUB(AL, 0, u, k, u); 965 AND(AL, 0, temp, mask, pixel); 966 MLA(AL, 0, dh, temp, u, dh); 967 AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); 968 MLA(AL, 0, dl, temp, u, dl); 969 970 AND(AL, 0, dh, mask, reg_imm(dh, LSR, 8)); 971 AND(AL, 0, dl, dl, reg_imm(mask, LSL, 8)); 972 ORR(AL, 0, texel.reg, dh, dl); 973 } 974 975 void GGLAssembler::build_texture_environment( 976 component_t& fragment, 977 const fragment_parts_t& parts, 978 int component, 979 Scratch& regs) 980 { 981 const uint32_t component_mask = 1<<component; 982 const bool multiTexture = mTextureMachine.activeUnits > 1; 983 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) { 984 texture_unit_t& tmu = mTextureMachine.tmu[i]; 985 986 if (tmu.mask & component_mask) { 987 // replace or modulate with this texture 988 if ((tmu.replaced & component_mask) == 0) { 989 // not replaced by a later tmu... 990 991 Scratch scratches(registerFile()); 992 pixel_t texel(parts.texel[i]); 993 994 if (multiTexture && 995 tmu.swrap == GGL_NEEDS_WRAP_11 && 996 tmu.twrap == GGL_NEEDS_WRAP_11) 997 { 998 texel.reg = scratches.obtain(); 999 texel.flags |= CORRUPTIBLE; 1000 comment("fetch texel (multitexture 1:1)"); 1001 load(parts.coords[i].ptr, texel, WRITE_BACK); 1002 } 1003 1004 component_t incoming(fragment); 1005 modify(fragment, regs); 1006 1007 switch (tmu.env) { 1008 case GGL_REPLACE: 1009 extract(fragment, texel, component); 1010 break; 1011 case GGL_MODULATE: 1012 modulate(fragment, incoming, texel, component); 1013 break; 1014 case GGL_DECAL: 1015 decal(fragment, incoming, texel, component); 1016 break; 1017 case GGL_BLEND: 1018 blend(fragment, incoming, texel, component, i); 1019 break; 1020 case GGL_ADD: 1021 add(fragment, incoming, texel, component); 1022 break; 1023 } 1024 } 1025 } 1026 } 1027 } 1028 1029 // --------------------------------------------------------------------------- 1030 1031 void GGLAssembler::wrapping( 1032 int d, 1033 int coord, int size, 1034 int tx_wrap, int tx_linear) 1035 { 1036 // notes: 1037 // if tx_linear is set, we need 4 extra bits of precision on the result 1038 // SMULL/UMULL is 3 cycles 1039 Scratch scratches(registerFile()); 1040 int c = coord; 1041 if (tx_wrap == GGL_NEEDS_WRAP_REPEAT) { 1042 // UMULL takes 4 cycles (interlocked), and we can get away with 1043 // 2 cycles using SMULWB, but we're loosing 16 bits of precision 1044 // out of 32 (this is not a problem because the iterator keeps 1045 // its full precision) 1046 // UMULL(AL, 0, size, d, c, size); 1047 // note: we can't use SMULTB because it's signed. 1048 MOV(AL, 0, d, reg_imm(c, LSR, 16-tx_linear)); 1049 SMULWB(AL, d, d, size); 1050 } else if (tx_wrap == GGL_NEEDS_WRAP_CLAMP_TO_EDGE) { 1051 if (tx_linear) { 1052 // 1 cycle 1053 MOV(AL, 0, d, reg_imm(coord, ASR, 16-tx_linear)); 1054 } else { 1055 // 4 cycles (common case) 1056 MOV(AL, 0, d, reg_imm(coord, ASR, 16)); 1057 BIC(AL, 0, d, d, reg_imm(d, ASR, 31)); 1058 CMP(AL, d, size); 1059 SUB(GE, 0, d, size, imm(1)); 1060 } 1061 } 1062 } 1063 1064 // --------------------------------------------------------------------------- 1065 1066 void GGLAssembler::modulate( 1067 component_t& dest, 1068 const component_t& incoming, 1069 const pixel_t& incomingTexel, int component) 1070 { 1071 Scratch locals(registerFile()); 1072 integer_t texel(locals.obtain(), 32, CORRUPTIBLE); 1073 extract(texel, incomingTexel, component); 1074 1075 const int Nt = texel.size(); 1076 // Nt should always be less than 10 bits because it comes 1077 // from the TMU. 1078 1079 int Ni = incoming.size(); 1080 // Ni could be big because it comes from previous MODULATEs 1081 1082 if (Nt == 1) { 1083 // texel acts as a bit-mask 1084 // dest = incoming & ((texel << incoming.h)-texel) 1085 RSB(AL, 0, dest.reg, texel.reg, reg_imm(texel.reg, LSL, incoming.h)); 1086 AND(AL, 0, dest.reg, dest.reg, incoming.reg); 1087 dest.l = incoming.l; 1088 dest.h = incoming.h; 1089 dest.flags |= (incoming.flags & CLEAR_LO); 1090 } else if (Ni == 1) { 1091 MOV(AL, 0, dest.reg, reg_imm(incoming.reg, LSL, 31-incoming.h)); 1092 AND(AL, 0, dest.reg, texel.reg, reg_imm(dest.reg, ASR, 31)); 1093 dest.l = 0; 1094 dest.h = Nt; 1095 } else { 1096 int inReg = incoming.reg; 1097 int shift = incoming.l; 1098 if ((Nt + Ni) > 32) { 1099 // we will overflow, reduce the precision of Ni to 8 bits 1100 // (Note Nt cannot be more than 10 bits which happens with 1101 // 565 textures and GGL_LINEAR) 1102 shift += Ni-8; 1103 Ni = 8; 1104 } 1105 1106 // modulate by the component with the lowest precision 1107 if (Nt >= Ni) { 1108 if (shift) { 1109 // XXX: we should be able to avoid this shift 1110 // when shift==16 && Nt<16 && Ni<16, in which 1111 // we could use SMULBT below. 1112 MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift)); 1113 inReg = dest.reg; 1114 shift = 0; 1115 } 1116 // operation: (Cf*Ct)/((1<<Ni)-1) 1117 // approximated with: Cf*(Ct + Ct>>(Ni-1))>>Ni 1118 // this operation doesn't change texel's size 1119 ADD(AL, 0, dest.reg, inReg, reg_imm(inReg, LSR, Ni-1)); 1120 if (Nt<16 && Ni<16) SMULBB(AL, dest.reg, texel.reg, dest.reg); 1121 else MUL(AL, 0, dest.reg, texel.reg, dest.reg); 1122 dest.l = Ni; 1123 dest.h = Nt + Ni; 1124 } else { 1125 if (shift && (shift != 16)) { 1126 // if shift==16, we can use 16-bits mul instructions later 1127 MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift)); 1128 inReg = dest.reg; 1129 shift = 0; 1130 } 1131 // operation: (Cf*Ct)/((1<<Nt)-1) 1132 // approximated with: Ct*(Cf + Cf>>(Nt-1))>>Nt 1133 // this operation doesn't change incoming's size 1134 Scratch scratches(registerFile()); 1135 int t = (texel.flags & CORRUPTIBLE) ? texel.reg : dest.reg; 1136 if (t == inReg) 1137 t = scratches.obtain(); 1138 ADD(AL, 0, t, texel.reg, reg_imm(texel.reg, LSR, Nt-1)); 1139 if (Nt<16 && Ni<16) { 1140 if (shift==16) SMULBT(AL, dest.reg, t, inReg); 1141 else SMULBB(AL, dest.reg, t, inReg); 1142 } else MUL(AL, 0, dest.reg, t, inReg); 1143 dest.l = Nt; 1144 dest.h = Nt + Ni; 1145 } 1146 1147 // low bits are not valid 1148 dest.flags |= CLEAR_LO; 1149 1150 // no need to keep more than 8 bits/component 1151 if (dest.size() > 8) 1152 dest.l = dest.h-8; 1153 } 1154 } 1155 1156 void GGLAssembler::decal( 1157 component_t& dest, 1158 const component_t& incoming, 1159 const pixel_t& incomingTexel, int component) 1160 { 1161 // RGBA: 1162 // Cv = Cf*(1 - At) + Ct*At = Cf + (Ct - Cf)*At 1163 // Av = Af 1164 Scratch locals(registerFile()); 1165 integer_t texel(locals.obtain(), 32, CORRUPTIBLE); 1166 integer_t factor(locals.obtain(), 32, CORRUPTIBLE); 1167 extract(texel, incomingTexel, component); 1168 extract(factor, incomingTexel, GGLFormat::ALPHA); 1169 1170 // no need to keep more than 8-bits for decal 1171 int Ni = incoming.size(); 1172 int shift = incoming.l; 1173 if (Ni > 8) { 1174 shift += Ni-8; 1175 Ni = 8; 1176 } 1177 integer_t incomingNorm(incoming.reg, Ni, incoming.flags); 1178 if (shift) { 1179 MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift)); 1180 incomingNorm.reg = dest.reg; 1181 incomingNorm.flags |= CORRUPTIBLE; 1182 } 1183 ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1)); 1184 build_blendOneMinusFF(dest, factor, incomingNorm, texel); 1185 } 1186 1187 void GGLAssembler::blend( 1188 component_t& dest, 1189 const component_t& incoming, 1190 const pixel_t& incomingTexel, int component, int tmu) 1191 { 1192 // RGBA: 1193 // Cv = (1 - Ct)*Cf + Ct*Cc = Cf + (Cc - Cf)*Ct 1194 // Av = At*Af 1195 1196 if (component == GGLFormat::ALPHA) { 1197 modulate(dest, incoming, incomingTexel, component); 1198 return; 1199 } 1200 1201 Scratch locals(registerFile()); 1202 integer_t color(locals.obtain(), 8, CORRUPTIBLE); 1203 integer_t factor(locals.obtain(), 32, CORRUPTIBLE); 1204 LDRB(AL, color.reg, mBuilderContext.Rctx, 1205 immed12_pre(GGL_OFFSETOF(state.texture[tmu].env_color[component]))); 1206 extract(factor, incomingTexel, component); 1207 1208 // no need to keep more than 8-bits for blend 1209 int Ni = incoming.size(); 1210 int shift = incoming.l; 1211 if (Ni > 8) { 1212 shift += Ni-8; 1213 Ni = 8; 1214 } 1215 integer_t incomingNorm(incoming.reg, Ni, incoming.flags); 1216 if (shift) { 1217 MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift)); 1218 incomingNorm.reg = dest.reg; 1219 incomingNorm.flags |= CORRUPTIBLE; 1220 } 1221 ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1)); 1222 build_blendOneMinusFF(dest, factor, incomingNorm, color); 1223 } 1224 1225 void GGLAssembler::add( 1226 component_t& dest, 1227 const component_t& incoming, 1228 const pixel_t& incomingTexel, int component) 1229 { 1230 // RGBA: 1231 // Cv = Cf + Ct; 1232 Scratch locals(registerFile()); 1233 1234 component_t incomingTemp(incoming); 1235 1236 // use "dest" as a temporary for extracting the texel, unless "dest" 1237 // overlaps "incoming". 1238 integer_t texel(dest.reg, 32, CORRUPTIBLE); 1239 if (dest.reg == incomingTemp.reg) 1240 texel.reg = locals.obtain(); 1241 extract(texel, incomingTexel, component); 1242 1243 if (texel.s < incomingTemp.size()) { 1244 expand(texel, texel, incomingTemp.size()); 1245 } else if (texel.s > incomingTemp.size()) { 1246 if (incomingTemp.flags & CORRUPTIBLE) { 1247 expand(incomingTemp, incomingTemp, texel.s); 1248 } else { 1249 incomingTemp.reg = locals.obtain(); 1250 expand(incomingTemp, incoming, texel.s); 1251 } 1252 } 1253 1254 if (incomingTemp.l) { 1255 ADD(AL, 0, dest.reg, texel.reg, 1256 reg_imm(incomingTemp.reg, LSR, incomingTemp.l)); 1257 } else { 1258 ADD(AL, 0, dest.reg, texel.reg, incomingTemp.reg); 1259 } 1260 dest.l = 0; 1261 dest.h = texel.size(); 1262 component_sat(dest); 1263 } 1264 1265 // ---------------------------------------------------------------------------- 1266 1267 }; // namespace android 1268 1269