1 /* libs/pixelflinger/codeflinger/texturing.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #define LOG_TAG "pixelflinger-code" 19 20 #include <assert.h> 21 #include <stdint.h> 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <sys/types.h> 25 26 #include <log/log.h> 27 28 #include "GGLAssembler.h" 29 30 namespace android { 31 32 // --------------------------------------------------------------------------- 33 34 // iterators are initialized like this: 35 // (intToFixedCenter(x) * dx)>>16 + x0 36 // ((x<<16 + 0x8000) * dx)>>16 + x0 37 // ((x<<16)*dx + (0x8000*dx))>>16 + x0 38 // ( (x*dx) + dx>>1 ) + x0 39 // (x*dx) + (dx>>1 + x0) 40 41 void GGLAssembler::init_iterated_color(fragment_parts_t& parts, const reg_t& x) 42 { 43 context_t const* c = mBuilderContext.c; 44 const needs_t& needs = mBuilderContext.needs; 45 46 if (mSmooth) { 47 // NOTE: we could take this case in the mDithering + !mSmooth case, 48 // but this would use up to 4 more registers for the color components 49 // for only a little added quality. 50 // Currently, this causes the system to run out of registers in 51 // some case (see issue #719496) 52 53 comment("compute initial iterated color (smooth and/or dither case)"); 54 55 parts.iterated_packed = 0; 56 parts.packed = 0; 57 58 // 0x1: color component 59 // 0x2: iterators 60 const int optReload = mOptLevel >> 1; 61 if (optReload >= 3) parts.reload = 0; // reload nothing 62 else if (optReload == 2) parts.reload = 2; // reload iterators 63 else if (optReload == 1) parts.reload = 1; // reload colors 64 else if (optReload <= 0) parts.reload = 3; // reload both 65 66 if (!mSmooth) { 67 // we're not smoothing (just dithering), we never have to 68 // reload the iterators 69 parts.reload &= ~2; 70 } 71 72 Scratch scratches(registerFile()); 73 const int t0 = (parts.reload & 1) ? scratches.obtain() : 0; 74 const int t1 = (parts.reload & 2) ? scratches.obtain() : 0; 75 for (int i=0 ; i<4 ; i++) { 76 if (!mInfo[i].iterated) 77 continue; 78 79 // this component exists in the destination and is not replaced 80 // by a texture unit. 81 const int c = (parts.reload & 1) ? t0 : obtainReg(); 82 if (i==0) CONTEXT_LOAD(c, iterators.ydady); 83 if (i==1) CONTEXT_LOAD(c, iterators.ydrdy); 84 if (i==2) CONTEXT_LOAD(c, iterators.ydgdy); 85 if (i==3) CONTEXT_LOAD(c, iterators.ydbdy); 86 parts.argb[i].reg = c; 87 88 if (mInfo[i].smooth) { 89 parts.argb_dx[i].reg = (parts.reload & 2) ? t1 : obtainReg(); 90 const int dvdx = parts.argb_dx[i].reg; 91 CONTEXT_LOAD(dvdx, generated_vars.argb[i].dx); 92 MLA(AL, 0, c, x.reg, dvdx, c); 93 94 // adjust the color iterator to make sure it won't overflow 95 if (!mAA) { 96 // this is not needed when we're using anti-aliasing 97 // because we will (have to) clamp the components 98 // anyway. 99 int end = scratches.obtain(); 100 MOV(AL, 0, end, reg_imm(parts.count.reg, LSR, 16)); 101 MLA(AL, 1, end, dvdx, end, c); 102 SUB(MI, 0, c, c, end); 103 BIC(AL, 0, c, c, reg_imm(c, ASR, 31)); 104 scratches.recycle(end); 105 } 106 } 107 108 if (parts.reload & 1) { 109 CONTEXT_STORE(c, generated_vars.argb[i].c); 110 } 111 } 112 } else { 113 // We're not smoothed, so we can 114 // just use a packed version of the color and extract the 115 // components as needed (or not at all if we don't blend) 116 117 // figure out if we need the iterated color 118 int load = 0; 119 for (int i=0 ; i<4 ; i++) { 120 component_info_t& info = mInfo[i]; 121 if ((info.inDest || info.needed) && !info.replaced) 122 load |= 1; 123 } 124 125 parts.iterated_packed = 1; 126 parts.packed = (!mTextureMachine.mask && !mBlending 127 && !mFog && !mDithering); 128 parts.reload = 0; 129 if (load || parts.packed) { 130 if (mBlending || mDithering || mInfo[GGLFormat::ALPHA].needed) { 131 comment("load initial iterated color (8888 packed)"); 132 parts.iterated.setTo(obtainReg(), 133 &(c->formats[GGL_PIXEL_FORMAT_RGBA_8888])); 134 CONTEXT_LOAD(parts.iterated.reg, packed8888); 135 } else { 136 comment("load initial iterated color (dest format packed)"); 137 138 parts.iterated.setTo(obtainReg(), &mCbFormat); 139 140 // pre-mask the iterated color 141 const int bits = parts.iterated.size(); 142 const uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1; 143 uint32_t mask = 0; 144 if (mMasking) { 145 for (int i=0 ; i<4 ; i++) { 146 const int component_mask = 1<<i; 147 const int h = parts.iterated.format.c[i].h; 148 const int l = parts.iterated.format.c[i].l; 149 if (h && (!(mMasking & component_mask))) { 150 mask |= ((1<<(h-l))-1) << l; 151 } 152 } 153 } 154 155 if (mMasking && ((mask & size)==0)) { 156 // none of the components are present in the mask 157 } else { 158 CONTEXT_LOAD(parts.iterated.reg, packed); 159 if (mCbFormat.size == 1) { 160 AND(AL, 0, parts.iterated.reg, 161 parts.iterated.reg, imm(0xFF)); 162 } else if (mCbFormat.size == 2) { 163 MOV(AL, 0, parts.iterated.reg, 164 reg_imm(parts.iterated.reg, LSR, 16)); 165 } 166 } 167 168 // pre-mask the iterated color 169 if (mMasking) { 170 build_and_immediate(parts.iterated.reg, parts.iterated.reg, 171 mask, bits); 172 } 173 } 174 } 175 } 176 } 177 178 void GGLAssembler::build_iterated_color( 179 component_t& fragment, 180 const fragment_parts_t& parts, 181 int component, 182 Scratch& regs) 183 { 184 fragment.setTo( regs.obtain(), 0, 32, CORRUPTIBLE); 185 186 if (!mInfo[component].iterated) 187 return; 188 189 if (parts.iterated_packed) { 190 // iterated colors are packed, extract the one we need 191 extract(fragment, parts.iterated, component); 192 } else { 193 fragment.h = GGL_COLOR_BITS; 194 fragment.l = GGL_COLOR_BITS - 8; 195 fragment.flags |= CLEAR_LO; 196 // iterated colors are held in their own register, 197 // (smooth and/or dithering case) 198 if (parts.reload==3) { 199 // this implies mSmooth 200 Scratch scratches(registerFile()); 201 int dx = scratches.obtain(); 202 CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); 203 CONTEXT_LOAD(dx, generated_vars.argb[component].dx); 204 ADD(AL, 0, dx, fragment.reg, dx); 205 CONTEXT_STORE(dx, generated_vars.argb[component].c); 206 } else if (parts.reload & 1) { 207 CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); 208 } else { 209 // we don't reload, so simply rename the register and mark as 210 // non CORRUPTIBLE so that the texture env or blending code 211 // won't modify this (renamed) register 212 regs.recycle(fragment.reg); 213 fragment.reg = parts.argb[component].reg; 214 fragment.flags &= ~CORRUPTIBLE; 215 } 216 if (mInfo[component].smooth && mAA) { 217 // when using smooth shading AND anti-aliasing, we need to clamp 218 // the iterators because there is always an extra pixel on the 219 // edges, which most of the time will cause an overflow 220 // (since technically its outside of the domain). 221 BIC(AL, 0, fragment.reg, fragment.reg, 222 reg_imm(fragment.reg, ASR, 31)); 223 component_sat(fragment); 224 } 225 } 226 } 227 228 // --------------------------------------------------------------------------- 229 230 void GGLAssembler::decodeLogicOpNeeds(const needs_t& needs) 231 { 232 // gather some informations about the components we need to process... 233 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; 234 switch(opcode) { 235 case GGL_COPY: 236 mLogicOp = 0; 237 break; 238 case GGL_CLEAR: 239 case GGL_SET: 240 mLogicOp = LOGIC_OP; 241 break; 242 case GGL_AND: 243 case GGL_AND_REVERSE: 244 case GGL_AND_INVERTED: 245 case GGL_XOR: 246 case GGL_OR: 247 case GGL_NOR: 248 case GGL_EQUIV: 249 case GGL_OR_REVERSE: 250 case GGL_OR_INVERTED: 251 case GGL_NAND: 252 mLogicOp = LOGIC_OP|LOGIC_OP_SRC|LOGIC_OP_DST; 253 break; 254 case GGL_NOOP: 255 case GGL_INVERT: 256 mLogicOp = LOGIC_OP|LOGIC_OP_DST; 257 break; 258 case GGL_COPY_INVERTED: 259 mLogicOp = LOGIC_OP|LOGIC_OP_SRC; 260 break; 261 }; 262 } 263 264 void GGLAssembler::decodeTMUNeeds(const needs_t& needs, context_t const* c) 265 { 266 uint8_t replaced=0; 267 mTextureMachine.mask = 0; 268 mTextureMachine.activeUnits = 0; 269 for (int i=GGL_TEXTURE_UNIT_COUNT-1 ; i>=0 ; i--) { 270 texture_unit_t& tmu = mTextureMachine.tmu[i]; 271 if (replaced == 0xF) { 272 // all components are replaced, skip this TMU. 273 tmu.format_idx = 0; 274 tmu.mask = 0; 275 tmu.replaced = replaced; 276 continue; 277 } 278 tmu.format_idx = GGL_READ_NEEDS(T_FORMAT, needs.t[i]); 279 tmu.format = c->formats[tmu.format_idx]; 280 tmu.bits = tmu.format.size*8; 281 tmu.swrap = GGL_READ_NEEDS(T_S_WRAP, needs.t[i]); 282 tmu.twrap = GGL_READ_NEEDS(T_T_WRAP, needs.t[i]); 283 tmu.env = ggl_needs_to_env(GGL_READ_NEEDS(T_ENV, needs.t[i])); 284 tmu.pot = GGL_READ_NEEDS(T_POT, needs.t[i]); 285 tmu.linear = GGL_READ_NEEDS(T_LINEAR, needs.t[i]) 286 && tmu.format.size!=3; // XXX: only 8, 16 and 32 modes for now 287 288 // 5551 linear filtering is not supported 289 if (tmu.format_idx == GGL_PIXEL_FORMAT_RGBA_5551) 290 tmu.linear = 0; 291 292 tmu.mask = 0; 293 tmu.replaced = replaced; 294 295 if (tmu.format_idx) { 296 mTextureMachine.activeUnits++; 297 if (tmu.format.c[0].h) tmu.mask |= 0x1; 298 if (tmu.format.c[1].h) tmu.mask |= 0x2; 299 if (tmu.format.c[2].h) tmu.mask |= 0x4; 300 if (tmu.format.c[3].h) tmu.mask |= 0x8; 301 if (tmu.env == GGL_REPLACE) { 302 replaced |= tmu.mask; 303 } else if (tmu.env == GGL_DECAL) { 304 if (!tmu.format.c[GGLFormat::ALPHA].h) { 305 // if we don't have alpha, decal does nothing 306 tmu.mask = 0; 307 } else { 308 // decal always ignores At 309 tmu.mask &= ~(1<<GGLFormat::ALPHA); 310 } 311 } 312 } 313 mTextureMachine.mask |= tmu.mask; 314 //printf("%d: mask=%08lx, replaced=%08lx\n", 315 // i, int(tmu.mask), int(tmu.replaced)); 316 } 317 mTextureMachine.replaced = replaced; 318 mTextureMachine.directTexture = 0; 319 //printf("replaced=%08lx\n", mTextureMachine.replaced); 320 } 321 322 323 void GGLAssembler::init_textures( 324 tex_coord_t* coords, 325 const reg_t& x, const reg_t& y) 326 { 327 context_t const* c = mBuilderContext.c; 328 const needs_t& needs = mBuilderContext.needs; 329 int Rctx = mBuilderContext.Rctx; 330 int Rx = x.reg; 331 int Ry = y.reg; 332 333 if (mTextureMachine.mask) { 334 comment("compute texture coordinates"); 335 } 336 337 // init texture coordinates for each tmu 338 const int cb_format_idx = GGL_READ_NEEDS(CB_FORMAT, needs.n); 339 const bool multiTexture = mTextureMachine.activeUnits > 1; 340 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { 341 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 342 if (tmu.format_idx == 0) 343 continue; 344 if ((tmu.swrap == GGL_NEEDS_WRAP_11) && 345 (tmu.twrap == GGL_NEEDS_WRAP_11)) 346 { 347 // 1:1 texture 348 pointer_t& txPtr = coords[i].ptr; 349 txPtr.setTo(obtainReg(), tmu.bits); 350 CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydsdy); 351 ADD(AL, 0, Rx, Rx, reg_imm(txPtr.reg, ASR, 16)); // x += (s>>16) 352 CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydtdy); 353 ADD(AL, 0, Ry, Ry, reg_imm(txPtr.reg, ASR, 16)); // y += (t>>16) 354 // merge base & offset 355 CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride); 356 SMLABB(AL, Rx, Ry, txPtr.reg, Rx); // x+y*stride 357 CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data); 358 base_offset(txPtr, txPtr, Rx); 359 } else { 360 Scratch scratches(registerFile()); 361 reg_t& s = coords[i].s; 362 reg_t& t = coords[i].t; 363 // s = (x * dsdx)>>16 + ydsdy 364 // s = (x * dsdx)>>16 + (y*dsdy)>>16 + s0 365 // t = (x * dtdx)>>16 + ydtdy 366 // t = (x * dtdx)>>16 + (y*dtdy)>>16 + t0 367 s.setTo(obtainReg()); 368 t.setTo(obtainReg()); 369 const int need_w = GGL_READ_NEEDS(W, needs.n); 370 if (need_w) { 371 CONTEXT_LOAD(s.reg, state.texture[i].iterators.ydsdy); 372 CONTEXT_LOAD(t.reg, state.texture[i].iterators.ydtdy); 373 } else { 374 int ydsdy = scratches.obtain(); 375 int ydtdy = scratches.obtain(); 376 CONTEXT_LOAD(s.reg, generated_vars.texture[i].dsdx); 377 CONTEXT_LOAD(ydsdy, state.texture[i].iterators.ydsdy); 378 CONTEXT_LOAD(t.reg, generated_vars.texture[i].dtdx); 379 CONTEXT_LOAD(ydtdy, state.texture[i].iterators.ydtdy); 380 MLA(AL, 0, s.reg, Rx, s.reg, ydsdy); 381 MLA(AL, 0, t.reg, Rx, t.reg, ydtdy); 382 } 383 384 if ((mOptLevel&1)==0) { 385 CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]); 386 CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]); 387 recycleReg(s.reg); 388 recycleReg(t.reg); 389 } 390 } 391 392 // direct texture? 393 if (!multiTexture && !mBlending && !mDithering && !mFog && 394 cb_format_idx == tmu.format_idx && !tmu.linear && 395 mTextureMachine.replaced == tmu.mask) 396 { 397 mTextureMachine.directTexture = i + 1; 398 } 399 } 400 } 401 402 void GGLAssembler::build_textures( fragment_parts_t& parts, 403 Scratch& regs) 404 { 405 context_t const* c = mBuilderContext.c; 406 const needs_t& needs = mBuilderContext.needs; 407 int Rctx = mBuilderContext.Rctx; 408 409 // We don't have a way to spill registers automatically 410 // spill depth and AA regs, when we know we may have to. 411 // build the spill list... 412 uint32_t spill_list = 0; 413 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { 414 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 415 if (tmu.format_idx == 0) 416 continue; 417 if (tmu.linear) { 418 // we may run out of register if we have linear filtering 419 // at 1 or 4 bytes / pixel on any texture unit. 420 if (tmu.format.size == 1) { 421 // if depth and AA enabled, we'll run out of 1 register 422 if (parts.z.reg > 0 && parts.covPtr.reg > 0) 423 spill_list |= 1<<parts.covPtr.reg; 424 } 425 if (tmu.format.size == 4) { 426 // if depth or AA enabled, we'll run out of 1 or 2 registers 427 if (parts.z.reg > 0) 428 spill_list |= 1<<parts.z.reg; 429 if (parts.covPtr.reg > 0) 430 spill_list |= 1<<parts.covPtr.reg; 431 } 432 } 433 } 434 435 Spill spill(registerFile(), *this, spill_list); 436 437 const bool multiTexture = mTextureMachine.activeUnits > 1; 438 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { 439 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 440 if (tmu.format_idx == 0) 441 continue; 442 443 pointer_t& txPtr = parts.coords[i].ptr; 444 pixel_t& texel = parts.texel[i]; 445 446 // repeat... 447 if ((tmu.swrap == GGL_NEEDS_WRAP_11) && 448 (tmu.twrap == GGL_NEEDS_WRAP_11)) 449 { // 1:1 textures 450 comment("fetch texel"); 451 texel.setTo(regs.obtain(), &tmu.format); 452 load(txPtr, texel, WRITE_BACK); 453 } else { 454 Scratch scratches(registerFile()); 455 reg_t& s = parts.coords[i].s; 456 reg_t& t = parts.coords[i].t; 457 if ((mOptLevel&1)==0) { 458 comment("reload s/t (multitexture or linear filtering)"); 459 s.reg = scratches.obtain(); 460 t.reg = scratches.obtain(); 461 CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]); 462 CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]); 463 } 464 465 if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) 466 return; 467 468 comment("compute repeat/clamp"); 469 int u = scratches.obtain(); 470 int v = scratches.obtain(); 471 int width = scratches.obtain(); 472 int height = scratches.obtain(); 473 int U = 0; 474 int V = 0; 475 476 if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) 477 return; 478 479 CONTEXT_LOAD(width, generated_vars.texture[i].width); 480 CONTEXT_LOAD(height, generated_vars.texture[i].height); 481 482 int FRAC_BITS = 0; 483 if (tmu.linear) { 484 // linear interpolation 485 if (tmu.format.size == 1) { 486 // for 8-bits textures, we can afford 487 // 7 bits of fractional precision at no 488 // additional cost (we can't do 8 bits 489 // because filter8 uses signed 16 bits muls) 490 FRAC_BITS = 7; 491 } else if (tmu.format.size == 2) { 492 // filter16() is internally limited to 4 bits, so: 493 // FRAC_BITS=2 generates less instructions, 494 // FRAC_BITS=3,4,5 creates unpleasant artifacts, 495 // FRAC_BITS=6+ looks good 496 FRAC_BITS = 6; 497 } else if (tmu.format.size == 4) { 498 // filter32() is internally limited to 8 bits, so: 499 // FRAC_BITS=4 looks good 500 // FRAC_BITS=5+ looks better, but generates 3 extra ipp 501 FRAC_BITS = 6; 502 } else { 503 // for all other cases we use 4 bits. 504 FRAC_BITS = 4; 505 } 506 } 507 wrapping(u, s.reg, width, tmu.swrap, FRAC_BITS); 508 wrapping(v, t.reg, height, tmu.twrap, FRAC_BITS); 509 510 if (tmu.linear) { 511 comment("compute linear filtering offsets"); 512 // pixel size scale 513 const int shift = 31 - gglClz(tmu.format.size); 514 U = scratches.obtain(); 515 V = scratches.obtain(); 516 517 if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) 518 return; 519 520 // sample the texel center 521 SUB(AL, 0, u, u, imm(1<<(FRAC_BITS-1))); 522 SUB(AL, 0, v, v, imm(1<<(FRAC_BITS-1))); 523 524 // get the fractionnal part of U,V 525 AND(AL, 0, U, u, imm((1<<FRAC_BITS)-1)); 526 AND(AL, 0, V, v, imm((1<<FRAC_BITS)-1)); 527 528 // compute width-1 and height-1 529 SUB(AL, 0, width, width, imm(1)); 530 SUB(AL, 0, height, height, imm(1)); 531 532 // get the integer part of U,V and clamp/wrap 533 // and compute offset to the next texel 534 if (tmu.swrap == GGL_NEEDS_WRAP_REPEAT) { 535 // u has already been REPEATed 536 MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS)); 537 MOV(MI, 0, u, width); 538 CMP(AL, u, width); 539 MOV(LT, 0, width, imm(1 << shift)); 540 if (shift) 541 MOV(GE, 0, width, reg_imm(width, LSL, shift)); 542 RSB(GE, 0, width, width, imm(0)); 543 } else { 544 // u has not been CLAMPed yet 545 // algorithm: 546 // if ((u>>4) >= width) 547 // u = width<<4 548 // width = 0 549 // else 550 // width = 1<<shift 551 // u = u>>4; // get integer part 552 // if (u<0) 553 // u = 0 554 // width = 0 555 // generated_vars.rt = width 556 557 CMP(AL, width, reg_imm(u, ASR, FRAC_BITS)); 558 MOV(LE, 0, u, reg_imm(width, LSL, FRAC_BITS)); 559 MOV(LE, 0, width, imm(0)); 560 MOV(GT, 0, width, imm(1 << shift)); 561 MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS)); 562 MOV(MI, 0, u, imm(0)); 563 MOV(MI, 0, width, imm(0)); 564 } 565 CONTEXT_STORE(width, generated_vars.rt); 566 567 const int stride = width; 568 CONTEXT_LOAD(stride, generated_vars.texture[i].stride); 569 if (tmu.twrap == GGL_NEEDS_WRAP_REPEAT) { 570 // v has already been REPEATed 571 MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS)); 572 MOV(MI, 0, v, height); 573 CMP(AL, v, height); 574 MOV(LT, 0, height, imm(1 << shift)); 575 if (shift) 576 MOV(GE, 0, height, reg_imm(height, LSL, shift)); 577 RSB(GE, 0, height, height, imm(0)); 578 MUL(AL, 0, height, stride, height); 579 } else { 580 // v has not been CLAMPed yet 581 CMP(AL, height, reg_imm(v, ASR, FRAC_BITS)); 582 MOV(LE, 0, v, reg_imm(height, LSL, FRAC_BITS)); 583 MOV(LE, 0, height, imm(0)); 584 if (shift) { 585 MOV(GT, 0, height, reg_imm(stride, LSL, shift)); 586 } else { 587 MOV(GT, 0, height, stride); 588 } 589 MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS)); 590 MOV(MI, 0, v, imm(0)); 591 MOV(MI, 0, height, imm(0)); 592 } 593 CONTEXT_STORE(height, generated_vars.lb); 594 } 595 596 scratches.recycle(width); 597 scratches.recycle(height); 598 599 // iterate texture coordinates... 600 comment("iterate s,t"); 601 int dsdx = scratches.obtain(); 602 int dtdx = scratches.obtain(); 603 604 if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) 605 return; 606 607 CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); 608 CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); 609 ADD(AL, 0, s.reg, s.reg, dsdx); 610 ADD(AL, 0, t.reg, t.reg, dtdx); 611 if ((mOptLevel&1)==0) { 612 CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]); 613 CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]); 614 scratches.recycle(s.reg); 615 scratches.recycle(t.reg); 616 } 617 scratches.recycle(dsdx); 618 scratches.recycle(dtdx); 619 620 // merge base & offset... 621 comment("merge base & offset"); 622 texel.setTo(regs.obtain(), &tmu.format); 623 txPtr.setTo(texel.reg, tmu.bits); 624 int stride = scratches.obtain(); 625 626 if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) 627 return; 628 629 CONTEXT_LOAD(stride, generated_vars.texture[i].stride); 630 CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data); 631 SMLABB(AL, u, v, stride, u); // u+v*stride 632 base_offset(txPtr, txPtr, u); 633 634 // load texel 635 if (!tmu.linear) { 636 comment("fetch texel"); 637 load(txPtr, texel, 0); 638 } else { 639 // recycle registers we don't need anymore 640 scratches.recycle(u); 641 scratches.recycle(v); 642 scratches.recycle(stride); 643 644 comment("fetch texel, bilinear"); 645 switch (tmu.format.size) { 646 case 1: filter8(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; 647 case 2: filter16(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; 648 case 3: filter24(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; 649 case 4: filter32(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; 650 } 651 } 652 } 653 } 654 } 655 656 void GGLAssembler::build_iterate_texture_coordinates( 657 const fragment_parts_t& parts) 658 { 659 const bool multiTexture = mTextureMachine.activeUnits > 1; 660 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { 661 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 662 if (tmu.format_idx == 0) 663 continue; 664 665 if ((tmu.swrap == GGL_NEEDS_WRAP_11) && 666 (tmu.twrap == GGL_NEEDS_WRAP_11)) 667 { // 1:1 textures 668 const pointer_t& txPtr = parts.coords[i].ptr; 669 ADD(AL, 0, txPtr.reg, txPtr.reg, imm(txPtr.size>>3)); 670 } else { 671 Scratch scratches(registerFile()); 672 int s = parts.coords[i].s.reg; 673 int t = parts.coords[i].t.reg; 674 if ((mOptLevel&1)==0) { 675 s = scratches.obtain(); 676 t = scratches.obtain(); 677 CONTEXT_LOAD(s, generated_vars.texture[i].spill[0]); 678 CONTEXT_LOAD(t, generated_vars.texture[i].spill[1]); 679 } 680 int dsdx = scratches.obtain(); 681 int dtdx = scratches.obtain(); 682 CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); 683 CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); 684 ADD(AL, 0, s, s, dsdx); 685 ADD(AL, 0, t, t, dtdx); 686 if ((mOptLevel&1)==0) { 687 CONTEXT_STORE(s, generated_vars.texture[i].spill[0]); 688 CONTEXT_STORE(t, generated_vars.texture[i].spill[1]); 689 } 690 } 691 } 692 } 693 694 void GGLAssembler::filter8( 695 const fragment_parts_t& /*parts*/, 696 pixel_t& texel, const texture_unit_t& tmu, 697 int U, int V, pointer_t& txPtr, 698 int FRAC_BITS) 699 { 700 if (tmu.format.components != GGL_ALPHA && 701 tmu.format.components != GGL_LUMINANCE) 702 { 703 // this is a packed format, and we don't support 704 // linear filtering (it's probably RGB 332) 705 // Should not happen with OpenGL|ES 706 LDRB(AL, texel.reg, txPtr.reg); 707 return; 708 } 709 710 // ------------------------ 711 // about ~22 cycles / pixel 712 Scratch scratches(registerFile()); 713 714 int pixel= scratches.obtain(); 715 int d = scratches.obtain(); 716 int u = scratches.obtain(); 717 int k = scratches.obtain(); 718 int rt = scratches.obtain(); 719 int lb = scratches.obtain(); 720 721 // RB -> U * V 722 723 CONTEXT_LOAD(rt, generated_vars.rt); 724 CONTEXT_LOAD(lb, generated_vars.lb); 725 726 int offset = pixel; 727 ADD(AL, 0, offset, lb, rt); 728 LDRB(AL, pixel, txPtr.reg, reg_scale_pre(offset)); 729 SMULBB(AL, u, U, V); 730 SMULBB(AL, d, pixel, u); 731 RSB(AL, 0, k, u, imm(1<<(FRAC_BITS*2))); 732 733 // LB -> (1-U) * V 734 RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); 735 LDRB(AL, pixel, txPtr.reg, reg_scale_pre(lb)); 736 SMULBB(AL, u, U, V); 737 SMLABB(AL, d, pixel, u, d); 738 SUB(AL, 0, k, k, u); 739 740 // LT -> (1-U)*(1-V) 741 RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); 742 LDRB(AL, pixel, txPtr.reg); 743 SMULBB(AL, u, U, V); 744 SMLABB(AL, d, pixel, u, d); 745 746 // RT -> U*(1-V) 747 LDRB(AL, pixel, txPtr.reg, reg_scale_pre(rt)); 748 SUB(AL, 0, u, k, u); 749 SMLABB(AL, texel.reg, pixel, u, d); 750 751 for (int i=0 ; i<4 ; i++) { 752 if (!texel.format.c[i].h) continue; 753 texel.format.c[i].h = FRAC_BITS*2+8; 754 texel.format.c[i].l = FRAC_BITS*2; // keeping 8 bits in enough 755 } 756 texel.format.size = 4; 757 texel.format.bitsPerPixel = 32; 758 texel.flags |= CLEAR_LO; 759 } 760 761 void GGLAssembler::filter16( 762 const fragment_parts_t& /*parts*/, 763 pixel_t& texel, const texture_unit_t& tmu, 764 int U, int V, pointer_t& txPtr, 765 int FRAC_BITS) 766 { 767 // compute the mask 768 // XXX: it would be nice if the mask below could be computed 769 // automatically. 770 uint32_t mask = 0; 771 int shift = 0; 772 int prec = 0; 773 switch (tmu.format_idx) { 774 case GGL_PIXEL_FORMAT_RGB_565: 775 // source: 00000ggg.ggg00000 | rrrrr000.000bbbbb 776 // result: gggggggg.gggrrrrr | rrrrr0bb.bbbbbbbb 777 mask = 0x07E0F81F; 778 shift = 16; 779 prec = 5; 780 break; 781 case GGL_PIXEL_FORMAT_RGBA_4444: 782 // 0000,1111,0000,1111 | 0000,1111,0000,1111 783 mask = 0x0F0F0F0F; 784 shift = 12; 785 prec = 4; 786 break; 787 case GGL_PIXEL_FORMAT_LA_88: 788 // 0000,0000,1111,1111 | 0000,0000,1111,1111 789 // AALL -> 00AA | 00LL 790 mask = 0x00FF00FF; 791 shift = 8; 792 prec = 8; 793 break; 794 default: 795 // unsupported format, do something sensical... 796 ALOGE("Unsupported 16-bits texture format (%d)", tmu.format_idx); 797 LDRH(AL, texel.reg, txPtr.reg); 798 return; 799 } 800 801 const int adjust = FRAC_BITS*2 - prec; 802 const int round = 0; 803 804 // update the texel format 805 texel.format.size = 4; 806 texel.format.bitsPerPixel = 32; 807 texel.flags |= CLEAR_HI|CLEAR_LO; 808 for (int i=0 ; i<4 ; i++) { 809 if (!texel.format.c[i].h) continue; 810 const uint32_t offset = (mask & tmu.format.mask(i)) ? 0 : shift; 811 texel.format.c[i].h = tmu.format.c[i].h + offset + prec; 812 texel.format.c[i].l = texel.format.c[i].h - (tmu.format.bits(i) + prec); 813 } 814 815 // ------------------------ 816 // about ~40 cycles / pixel 817 Scratch scratches(registerFile()); 818 819 int pixel= scratches.obtain(); 820 int d = scratches.obtain(); 821 int u = scratches.obtain(); 822 int k = scratches.obtain(); 823 824 // RB -> U * V 825 int offset = pixel; 826 CONTEXT_LOAD(offset, generated_vars.rt); 827 CONTEXT_LOAD(u, generated_vars.lb); 828 ADD(AL, 0, offset, offset, u); 829 830 LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); 831 SMULBB(AL, u, U, V); 832 ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); 833 build_and_immediate(pixel, pixel, mask, 32); 834 if (adjust) { 835 if (round) 836 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 837 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 838 } 839 MUL(AL, 0, d, pixel, u); 840 RSB(AL, 0, k, u, imm(1<<prec)); 841 842 // LB -> (1-U) * V 843 CONTEXT_LOAD(offset, generated_vars.lb); 844 RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); 845 LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); 846 SMULBB(AL, u, U, V); 847 ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); 848 build_and_immediate(pixel, pixel, mask, 32); 849 if (adjust) { 850 if (round) 851 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 852 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 853 } 854 MLA(AL, 0, d, pixel, u, d); 855 SUB(AL, 0, k, k, u); 856 857 // LT -> (1-U)*(1-V) 858 RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); 859 LDRH(AL, pixel, txPtr.reg); 860 SMULBB(AL, u, U, V); 861 ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); 862 build_and_immediate(pixel, pixel, mask, 32); 863 if (adjust) { 864 if (round) 865 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 866 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 867 } 868 MLA(AL, 0, d, pixel, u, d); 869 870 // RT -> U*(1-V) 871 CONTEXT_LOAD(offset, generated_vars.rt); 872 LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); 873 SUB(AL, 0, u, k, u); 874 ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); 875 build_and_immediate(pixel, pixel, mask, 32); 876 MLA(AL, 0, texel.reg, pixel, u, d); 877 } 878 879 void GGLAssembler::filter24( 880 const fragment_parts_t& /*parts*/, 881 pixel_t& texel, const texture_unit_t& /*tmu*/, 882 int /*U*/, int /*V*/, pointer_t& txPtr, 883 int /*FRAC_BITS*/) 884 { 885 // not supported yet (currently disabled) 886 load(txPtr, texel, 0); 887 } 888 889 void GGLAssembler::filter32( 890 const fragment_parts_t& /*parts*/, 891 pixel_t& texel, const texture_unit_t& /*tmu*/, 892 int U, int V, pointer_t& txPtr, 893 int FRAC_BITS) 894 { 895 const int adjust = FRAC_BITS*2 - 8; 896 const int round = 0; 897 898 // ------------------------ 899 // about ~38 cycles / pixel 900 Scratch scratches(registerFile()); 901 902 int pixel= scratches.obtain(); 903 int dh = scratches.obtain(); 904 int u = scratches.obtain(); 905 int k = scratches.obtain(); 906 907 int temp = scratches.obtain(); 908 int dl = scratches.obtain(); 909 int mask = scratches.obtain(); 910 911 MOV(AL, 0, mask, imm(0xFF)); 912 ORR(AL, 0, mask, mask, imm(0xFF0000)); 913 914 // RB -> U * V 915 int offset = pixel; 916 CONTEXT_LOAD(offset, generated_vars.rt); 917 CONTEXT_LOAD(u, generated_vars.lb); 918 ADD(AL, 0, offset, offset, u); 919 920 LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); 921 SMULBB(AL, u, U, V); 922 AND(AL, 0, temp, mask, pixel); 923 if (adjust) { 924 if (round) 925 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 926 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 927 } 928 MUL(AL, 0, dh, temp, u); 929 AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); 930 MUL(AL, 0, dl, temp, u); 931 RSB(AL, 0, k, u, imm(0x100)); 932 933 // LB -> (1-U) * V 934 CONTEXT_LOAD(offset, generated_vars.lb); 935 RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); 936 LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); 937 SMULBB(AL, u, U, V); 938 AND(AL, 0, temp, mask, pixel); 939 if (adjust) { 940 if (round) 941 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 942 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 943 } 944 MLA(AL, 0, dh, temp, u, dh); 945 AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); 946 MLA(AL, 0, dl, temp, u, dl); 947 SUB(AL, 0, k, k, u); 948 949 // LT -> (1-U)*(1-V) 950 RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); 951 LDR(AL, pixel, txPtr.reg); 952 SMULBB(AL, u, U, V); 953 AND(AL, 0, temp, mask, pixel); 954 if (adjust) { 955 if (round) 956 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 957 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 958 } 959 MLA(AL, 0, dh, temp, u, dh); 960 AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); 961 MLA(AL, 0, dl, temp, u, dl); 962 963 // RT -> U*(1-V) 964 CONTEXT_LOAD(offset, generated_vars.rt); 965 LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); 966 SUB(AL, 0, u, k, u); 967 AND(AL, 0, temp, mask, pixel); 968 MLA(AL, 0, dh, temp, u, dh); 969 AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); 970 MLA(AL, 0, dl, temp, u, dl); 971 972 AND(AL, 0, dh, mask, reg_imm(dh, LSR, 8)); 973 AND(AL, 0, dl, dl, reg_imm(mask, LSL, 8)); 974 ORR(AL, 0, texel.reg, dh, dl); 975 } 976 977 void GGLAssembler::build_texture_environment( 978 component_t& fragment, 979 const fragment_parts_t& parts, 980 int component, 981 Scratch& regs) 982 { 983 const uint32_t component_mask = 1<<component; 984 const bool multiTexture = mTextureMachine.activeUnits > 1; 985 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) { 986 texture_unit_t& tmu = mTextureMachine.tmu[i]; 987 988 if (tmu.mask & component_mask) { 989 // replace or modulate with this texture 990 if ((tmu.replaced & component_mask) == 0) { 991 // not replaced by a later tmu... 992 993 Scratch scratches(registerFile()); 994 pixel_t texel(parts.texel[i]); 995 996 if (multiTexture && 997 tmu.swrap == GGL_NEEDS_WRAP_11 && 998 tmu.twrap == GGL_NEEDS_WRAP_11) 999 { 1000 texel.reg = scratches.obtain(); 1001 texel.flags |= CORRUPTIBLE; 1002 comment("fetch texel (multitexture 1:1)"); 1003 load(parts.coords[i].ptr, texel, WRITE_BACK); 1004 } 1005 1006 component_t incoming(fragment); 1007 modify(fragment, regs); 1008 1009 switch (tmu.env) { 1010 case GGL_REPLACE: 1011 extract(fragment, texel, component); 1012 break; 1013 case GGL_MODULATE: 1014 modulate(fragment, incoming, texel, component); 1015 break; 1016 case GGL_DECAL: 1017 decal(fragment, incoming, texel, component); 1018 break; 1019 case GGL_BLEND: 1020 blend(fragment, incoming, texel, component, i); 1021 break; 1022 case GGL_ADD: 1023 add(fragment, incoming, texel, component); 1024 break; 1025 } 1026 } 1027 } 1028 } 1029 } 1030 1031 // --------------------------------------------------------------------------- 1032 1033 void GGLAssembler::wrapping( 1034 int d, 1035 int coord, int size, 1036 int tx_wrap, int tx_linear) 1037 { 1038 // notes: 1039 // if tx_linear is set, we need 4 extra bits of precision on the result 1040 // SMULL/UMULL is 3 cycles 1041 Scratch scratches(registerFile()); 1042 int c = coord; 1043 if (tx_wrap == GGL_NEEDS_WRAP_REPEAT) { 1044 // UMULL takes 4 cycles (interlocked), and we can get away with 1045 // 2 cycles using SMULWB, but we're loosing 16 bits of precision 1046 // out of 32 (this is not a problem because the iterator keeps 1047 // its full precision) 1048 // UMULL(AL, 0, size, d, c, size); 1049 // note: we can't use SMULTB because it's signed. 1050 MOV(AL, 0, d, reg_imm(c, LSR, 16-tx_linear)); 1051 SMULWB(AL, d, d, size); 1052 } else if (tx_wrap == GGL_NEEDS_WRAP_CLAMP_TO_EDGE) { 1053 if (tx_linear) { 1054 // 1 cycle 1055 MOV(AL, 0, d, reg_imm(coord, ASR, 16-tx_linear)); 1056 } else { 1057 // 4 cycles (common case) 1058 MOV(AL, 0, d, reg_imm(coord, ASR, 16)); 1059 BIC(AL, 0, d, d, reg_imm(d, ASR, 31)); 1060 CMP(AL, d, size); 1061 SUB(GE, 0, d, size, imm(1)); 1062 } 1063 } 1064 } 1065 1066 // --------------------------------------------------------------------------- 1067 1068 void GGLAssembler::modulate( 1069 component_t& dest, 1070 const component_t& incoming, 1071 const pixel_t& incomingTexel, int component) 1072 { 1073 Scratch locals(registerFile()); 1074 integer_t texel(locals.obtain(), 32, CORRUPTIBLE); 1075 extract(texel, incomingTexel, component); 1076 1077 const int Nt = texel.size(); 1078 // Nt should always be less than 10 bits because it comes 1079 // from the TMU. 1080 1081 int Ni = incoming.size(); 1082 // Ni could be big because it comes from previous MODULATEs 1083 1084 if (Nt == 1) { 1085 // texel acts as a bit-mask 1086 // dest = incoming & ((texel << incoming.h)-texel) 1087 RSB(AL, 0, dest.reg, texel.reg, reg_imm(texel.reg, LSL, incoming.h)); 1088 AND(AL, 0, dest.reg, dest.reg, incoming.reg); 1089 dest.l = incoming.l; 1090 dest.h = incoming.h; 1091 dest.flags |= (incoming.flags & CLEAR_LO); 1092 } else if (Ni == 1) { 1093 MOV(AL, 0, dest.reg, reg_imm(incoming.reg, LSL, 31-incoming.h)); 1094 AND(AL, 0, dest.reg, texel.reg, reg_imm(dest.reg, ASR, 31)); 1095 dest.l = 0; 1096 dest.h = Nt; 1097 } else { 1098 int inReg = incoming.reg; 1099 int shift = incoming.l; 1100 if ((Nt + Ni) > 32) { 1101 // we will overflow, reduce the precision of Ni to 8 bits 1102 // (Note Nt cannot be more than 10 bits which happens with 1103 // 565 textures and GGL_LINEAR) 1104 shift += Ni-8; 1105 Ni = 8; 1106 } 1107 1108 // modulate by the component with the lowest precision 1109 if (Nt >= Ni) { 1110 if (shift) { 1111 // XXX: we should be able to avoid this shift 1112 // when shift==16 && Nt<16 && Ni<16, in which 1113 // we could use SMULBT below. 1114 MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift)); 1115 inReg = dest.reg; 1116 shift = 0; 1117 } 1118 // operation: (Cf*Ct)/((1<<Ni)-1) 1119 // approximated with: Cf*(Ct + Ct>>(Ni-1))>>Ni 1120 // this operation doesn't change texel's size 1121 ADD(AL, 0, dest.reg, inReg, reg_imm(inReg, LSR, Ni-1)); 1122 if (Nt<16 && Ni<16) SMULBB(AL, dest.reg, texel.reg, dest.reg); 1123 else MUL(AL, 0, dest.reg, texel.reg, dest.reg); 1124 dest.l = Ni; 1125 dest.h = Nt + Ni; 1126 } else { 1127 if (shift && (shift != 16)) { 1128 // if shift==16, we can use 16-bits mul instructions later 1129 MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift)); 1130 inReg = dest.reg; 1131 shift = 0; 1132 } 1133 // operation: (Cf*Ct)/((1<<Nt)-1) 1134 // approximated with: Ct*(Cf + Cf>>(Nt-1))>>Nt 1135 // this operation doesn't change incoming's size 1136 Scratch scratches(registerFile()); 1137 int t = (texel.flags & CORRUPTIBLE) ? texel.reg : dest.reg; 1138 if (t == inReg) 1139 t = scratches.obtain(); 1140 ADD(AL, 0, t, texel.reg, reg_imm(texel.reg, LSR, Nt-1)); 1141 if (Nt<16 && Ni<16) { 1142 if (shift==16) SMULBT(AL, dest.reg, t, inReg); 1143 else SMULBB(AL, dest.reg, t, inReg); 1144 } else MUL(AL, 0, dest.reg, t, inReg); 1145 dest.l = Nt; 1146 dest.h = Nt + Ni; 1147 } 1148 1149 // low bits are not valid 1150 dest.flags |= CLEAR_LO; 1151 1152 // no need to keep more than 8 bits/component 1153 if (dest.size() > 8) 1154 dest.l = dest.h-8; 1155 } 1156 } 1157 1158 void GGLAssembler::decal( 1159 component_t& dest, 1160 const component_t& incoming, 1161 const pixel_t& incomingTexel, int component) 1162 { 1163 // RGBA: 1164 // Cv = Cf*(1 - At) + Ct*At = Cf + (Ct - Cf)*At 1165 // Av = Af 1166 Scratch locals(registerFile()); 1167 integer_t texel(locals.obtain(), 32, CORRUPTIBLE); 1168 integer_t factor(locals.obtain(), 32, CORRUPTIBLE); 1169 extract(texel, incomingTexel, component); 1170 extract(factor, incomingTexel, GGLFormat::ALPHA); 1171 1172 // no need to keep more than 8-bits for decal 1173 int Ni = incoming.size(); 1174 int shift = incoming.l; 1175 if (Ni > 8) { 1176 shift += Ni-8; 1177 Ni = 8; 1178 } 1179 integer_t incomingNorm(incoming.reg, Ni, incoming.flags); 1180 if (shift) { 1181 MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift)); 1182 incomingNorm.reg = dest.reg; 1183 incomingNorm.flags |= CORRUPTIBLE; 1184 } 1185 ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1)); 1186 build_blendOneMinusFF(dest, factor, incomingNorm, texel); 1187 } 1188 1189 void GGLAssembler::blend( 1190 component_t& dest, 1191 const component_t& incoming, 1192 const pixel_t& incomingTexel, int component, int tmu) 1193 { 1194 // RGBA: 1195 // Cv = (1 - Ct)*Cf + Ct*Cc = Cf + (Cc - Cf)*Ct 1196 // Av = At*Af 1197 1198 if (component == GGLFormat::ALPHA) { 1199 modulate(dest, incoming, incomingTexel, component); 1200 return; 1201 } 1202 1203 Scratch locals(registerFile()); 1204 integer_t color(locals.obtain(), 8, CORRUPTIBLE); 1205 integer_t factor(locals.obtain(), 32, CORRUPTIBLE); 1206 LDRB(AL, color.reg, mBuilderContext.Rctx, 1207 immed12_pre(GGL_OFFSETOF(state.texture[tmu].env_color[component]))); 1208 extract(factor, incomingTexel, component); 1209 1210 // no need to keep more than 8-bits for blend 1211 int Ni = incoming.size(); 1212 int shift = incoming.l; 1213 if (Ni > 8) { 1214 shift += Ni-8; 1215 Ni = 8; 1216 } 1217 integer_t incomingNorm(incoming.reg, Ni, incoming.flags); 1218 if (shift) { 1219 MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift)); 1220 incomingNorm.reg = dest.reg; 1221 incomingNorm.flags |= CORRUPTIBLE; 1222 } 1223 ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1)); 1224 build_blendOneMinusFF(dest, factor, incomingNorm, color); 1225 } 1226 1227 void GGLAssembler::add( 1228 component_t& dest, 1229 const component_t& incoming, 1230 const pixel_t& incomingTexel, int component) 1231 { 1232 // RGBA: 1233 // Cv = Cf + Ct; 1234 Scratch locals(registerFile()); 1235 1236 component_t incomingTemp(incoming); 1237 1238 // use "dest" as a temporary for extracting the texel, unless "dest" 1239 // overlaps "incoming". 1240 integer_t texel(dest.reg, 32, CORRUPTIBLE); 1241 if (dest.reg == incomingTemp.reg) 1242 texel.reg = locals.obtain(); 1243 extract(texel, incomingTexel, component); 1244 1245 if (texel.s < incomingTemp.size()) { 1246 expand(texel, texel, incomingTemp.size()); 1247 } else if (texel.s > incomingTemp.size()) { 1248 if (incomingTemp.flags & CORRUPTIBLE) { 1249 expand(incomingTemp, incomingTemp, texel.s); 1250 } else { 1251 incomingTemp.reg = locals.obtain(); 1252 expand(incomingTemp, incoming, texel.s); 1253 } 1254 } 1255 1256 if (incomingTemp.l) { 1257 ADD(AL, 0, dest.reg, texel.reg, 1258 reg_imm(incomingTemp.reg, LSR, incomingTemp.l)); 1259 } else { 1260 ADD(AL, 0, dest.reg, texel.reg, incomingTemp.reg); 1261 } 1262 dest.l = 0; 1263 dest.h = texel.size(); 1264 component_sat(dest); 1265 } 1266 1267 // ---------------------------------------------------------------------------- 1268 1269 }; // namespace android 1270 1271