1 /* libs/pixelflinger/codeflinger/texturing.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #define LOG_TAG "pixelflinger-code" 19 20 #include <assert.h> 21 #include <stdint.h> 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <sys/types.h> 25 26 #include <log/log.h> 27 28 #include "GGLAssembler.h" 29 30 namespace android { 31 32 // --------------------------------------------------------------------------- 33 34 // iterators are initialized like this: 35 // (intToFixedCenter(x) * dx)>>16 + x0 36 // ((x<<16 + 0x8000) * dx)>>16 + x0 37 // ((x<<16)*dx + (0x8000*dx))>>16 + x0 38 // ( (x*dx) + dx>>1 ) + x0 39 // (x*dx) + (dx>>1 + x0) 40 41 void GGLAssembler::init_iterated_color(fragment_parts_t& parts, const reg_t& x) 42 { 43 context_t const* c = mBuilderContext.c; 44 45 if (mSmooth) { 46 // NOTE: we could take this case in the mDithering + !mSmooth case, 47 // but this would use up to 4 more registers for the color components 48 // for only a little added quality. 49 // Currently, this causes the system to run out of registers in 50 // some case (see issue #719496) 51 52 comment("compute initial iterated color (smooth and/or dither case)"); 53 54 parts.iterated_packed = 0; 55 parts.packed = 0; 56 57 // 0x1: color component 58 // 0x2: iterators 59 const int optReload = mOptLevel >> 1; 60 if (optReload >= 3) parts.reload = 0; // reload nothing 61 else if (optReload == 2) parts.reload = 2; // reload iterators 62 else if (optReload == 1) parts.reload = 1; // reload colors 63 else if (optReload <= 0) parts.reload = 3; // reload both 64 65 if (!mSmooth) { 66 // we're not smoothing (just dithering), we never have to 67 // reload the iterators 68 parts.reload &= ~2; 69 } 70 71 Scratch scratches(registerFile()); 72 const int t0 = (parts.reload & 1) ? scratches.obtain() : 0; 73 const int t1 = (parts.reload & 2) ? scratches.obtain() : 0; 74 for (int i=0 ; i<4 ; i++) { 75 if (!mInfo[i].iterated) 76 continue; 77 78 // this component exists in the destination and is not replaced 79 // by a texture unit. 80 const int c = (parts.reload & 1) ? t0 : obtainReg(); 81 if (i==0) CONTEXT_LOAD(c, iterators.ydady); 82 if (i==1) CONTEXT_LOAD(c, iterators.ydrdy); 83 if (i==2) CONTEXT_LOAD(c, iterators.ydgdy); 84 if (i==3) CONTEXT_LOAD(c, iterators.ydbdy); 85 parts.argb[i].reg = c; 86 87 if (mInfo[i].smooth) { 88 parts.argb_dx[i].reg = (parts.reload & 2) ? t1 : obtainReg(); 89 const int dvdx = parts.argb_dx[i].reg; 90 CONTEXT_LOAD(dvdx, generated_vars.argb[i].dx); 91 MLA(AL, 0, c, x.reg, dvdx, c); 92 93 // adjust the color iterator to make sure it won't overflow 94 if (!mAA) { 95 // this is not needed when we're using anti-aliasing 96 // because we will (have to) clamp the components 97 // anyway. 98 int end = scratches.obtain(); 99 MOV(AL, 0, end, reg_imm(parts.count.reg, LSR, 16)); 100 MLA(AL, 1, end, dvdx, end, c); 101 SUB(MI, 0, c, c, end); 102 BIC(AL, 0, c, c, reg_imm(c, ASR, 31)); 103 scratches.recycle(end); 104 } 105 } 106 107 if (parts.reload & 1) { 108 CONTEXT_STORE(c, generated_vars.argb[i].c); 109 } 110 } 111 } else { 112 // We're not smoothed, so we can 113 // just use a packed version of the color and extract the 114 // components as needed (or not at all if we don't blend) 115 116 // figure out if we need the iterated color 117 int load = 0; 118 for (int i=0 ; i<4 ; i++) { 119 component_info_t& info = mInfo[i]; 120 if ((info.inDest || info.needed) && !info.replaced) 121 load |= 1; 122 } 123 124 parts.iterated_packed = 1; 125 parts.packed = (!mTextureMachine.mask && !mBlending 126 && !mFog && !mDithering); 127 parts.reload = 0; 128 if (load || parts.packed) { 129 if (mBlending || mDithering || mInfo[GGLFormat::ALPHA].needed) { 130 comment("load initial iterated color (8888 packed)"); 131 parts.iterated.setTo(obtainReg(), 132 &(c->formats[GGL_PIXEL_FORMAT_RGBA_8888])); 133 CONTEXT_LOAD(parts.iterated.reg, packed8888); 134 } else { 135 comment("load initial iterated color (dest format packed)"); 136 137 parts.iterated.setTo(obtainReg(), &mCbFormat); 138 139 // pre-mask the iterated color 140 const int bits = parts.iterated.size(); 141 const uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1; 142 uint32_t mask = 0; 143 if (mMasking) { 144 for (int i=0 ; i<4 ; i++) { 145 const int component_mask = 1<<i; 146 const int h = parts.iterated.format.c[i].h; 147 const int l = parts.iterated.format.c[i].l; 148 if (h && (!(mMasking & component_mask))) { 149 mask |= ((1<<(h-l))-1) << l; 150 } 151 } 152 } 153 154 if (mMasking && ((mask & size)==0)) { 155 // none of the components are present in the mask 156 } else { 157 CONTEXT_LOAD(parts.iterated.reg, packed); 158 if (mCbFormat.size == 1) { 159 AND(AL, 0, parts.iterated.reg, 160 parts.iterated.reg, imm(0xFF)); 161 } else if (mCbFormat.size == 2) { 162 MOV(AL, 0, parts.iterated.reg, 163 reg_imm(parts.iterated.reg, LSR, 16)); 164 } 165 } 166 167 // pre-mask the iterated color 168 if (mMasking) { 169 build_and_immediate(parts.iterated.reg, parts.iterated.reg, 170 mask, bits); 171 } 172 } 173 } 174 } 175 } 176 177 void GGLAssembler::build_iterated_color( 178 component_t& fragment, 179 const fragment_parts_t& parts, 180 int component, 181 Scratch& regs) 182 { 183 fragment.setTo( regs.obtain(), 0, 32, CORRUPTIBLE); 184 185 if (!mInfo[component].iterated) 186 return; 187 188 if (parts.iterated_packed) { 189 // iterated colors are packed, extract the one we need 190 extract(fragment, parts.iterated, component); 191 } else { 192 fragment.h = GGL_COLOR_BITS; 193 fragment.l = GGL_COLOR_BITS - 8; 194 fragment.flags |= CLEAR_LO; 195 // iterated colors are held in their own register, 196 // (smooth and/or dithering case) 197 if (parts.reload==3) { 198 // this implies mSmooth 199 Scratch scratches(registerFile()); 200 int dx = scratches.obtain(); 201 CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); 202 CONTEXT_LOAD(dx, generated_vars.argb[component].dx); 203 ADD(AL, 0, dx, fragment.reg, dx); 204 CONTEXT_STORE(dx, generated_vars.argb[component].c); 205 } else if (parts.reload & 1) { 206 CONTEXT_LOAD(fragment.reg, generated_vars.argb[component].c); 207 } else { 208 // we don't reload, so simply rename the register and mark as 209 // non CORRUPTIBLE so that the texture env or blending code 210 // won't modify this (renamed) register 211 regs.recycle(fragment.reg); 212 fragment.reg = parts.argb[component].reg; 213 fragment.flags &= ~CORRUPTIBLE; 214 } 215 if (mInfo[component].smooth && mAA) { 216 // when using smooth shading AND anti-aliasing, we need to clamp 217 // the iterators because there is always an extra pixel on the 218 // edges, which most of the time will cause an overflow 219 // (since technically its outside of the domain). 220 BIC(AL, 0, fragment.reg, fragment.reg, 221 reg_imm(fragment.reg, ASR, 31)); 222 component_sat(fragment); 223 } 224 } 225 } 226 227 // --------------------------------------------------------------------------- 228 229 void GGLAssembler::decodeLogicOpNeeds(const needs_t& needs) 230 { 231 // gather some informations about the components we need to process... 232 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; 233 switch(opcode) { 234 case GGL_COPY: 235 mLogicOp = 0; 236 break; 237 case GGL_CLEAR: 238 case GGL_SET: 239 mLogicOp = LOGIC_OP; 240 break; 241 case GGL_AND: 242 case GGL_AND_REVERSE: 243 case GGL_AND_INVERTED: 244 case GGL_XOR: 245 case GGL_OR: 246 case GGL_NOR: 247 case GGL_EQUIV: 248 case GGL_OR_REVERSE: 249 case GGL_OR_INVERTED: 250 case GGL_NAND: 251 mLogicOp = LOGIC_OP|LOGIC_OP_SRC|LOGIC_OP_DST; 252 break; 253 case GGL_NOOP: 254 case GGL_INVERT: 255 mLogicOp = LOGIC_OP|LOGIC_OP_DST; 256 break; 257 case GGL_COPY_INVERTED: 258 mLogicOp = LOGIC_OP|LOGIC_OP_SRC; 259 break; 260 }; 261 } 262 263 void GGLAssembler::decodeTMUNeeds(const needs_t& needs, context_t const* c) 264 { 265 uint8_t replaced=0; 266 mTextureMachine.mask = 0; 267 mTextureMachine.activeUnits = 0; 268 for (int i=GGL_TEXTURE_UNIT_COUNT-1 ; i>=0 ; i--) { 269 texture_unit_t& tmu = mTextureMachine.tmu[i]; 270 if (replaced == 0xF) { 271 // all components are replaced, skip this TMU. 272 tmu.format_idx = 0; 273 tmu.mask = 0; 274 tmu.replaced = replaced; 275 continue; 276 } 277 tmu.format_idx = GGL_READ_NEEDS(T_FORMAT, needs.t[i]); 278 tmu.format = c->formats[tmu.format_idx]; 279 tmu.bits = tmu.format.size*8; 280 tmu.swrap = GGL_READ_NEEDS(T_S_WRAP, needs.t[i]); 281 tmu.twrap = GGL_READ_NEEDS(T_T_WRAP, needs.t[i]); 282 tmu.env = ggl_needs_to_env(GGL_READ_NEEDS(T_ENV, needs.t[i])); 283 tmu.pot = GGL_READ_NEEDS(T_POT, needs.t[i]); 284 tmu.linear = GGL_READ_NEEDS(T_LINEAR, needs.t[i]) 285 && tmu.format.size!=3; // XXX: only 8, 16 and 32 modes for now 286 287 // 5551 linear filtering is not supported 288 if (tmu.format_idx == GGL_PIXEL_FORMAT_RGBA_5551) 289 tmu.linear = 0; 290 291 tmu.mask = 0; 292 tmu.replaced = replaced; 293 294 if (tmu.format_idx) { 295 mTextureMachine.activeUnits++; 296 if (tmu.format.c[0].h) tmu.mask |= 0x1; 297 if (tmu.format.c[1].h) tmu.mask |= 0x2; 298 if (tmu.format.c[2].h) tmu.mask |= 0x4; 299 if (tmu.format.c[3].h) tmu.mask |= 0x8; 300 if (tmu.env == GGL_REPLACE) { 301 replaced |= tmu.mask; 302 } else if (tmu.env == GGL_DECAL) { 303 if (!tmu.format.c[GGLFormat::ALPHA].h) { 304 // if we don't have alpha, decal does nothing 305 tmu.mask = 0; 306 } else { 307 // decal always ignores At 308 tmu.mask &= ~(1<<GGLFormat::ALPHA); 309 } 310 } 311 } 312 mTextureMachine.mask |= tmu.mask; 313 //printf("%d: mask=%08lx, replaced=%08lx\n", 314 // i, int(tmu.mask), int(tmu.replaced)); 315 } 316 mTextureMachine.replaced = replaced; 317 mTextureMachine.directTexture = 0; 318 //printf("replaced=%08lx\n", mTextureMachine.replaced); 319 } 320 321 322 void GGLAssembler::init_textures( 323 tex_coord_t* coords, 324 const reg_t& x, const reg_t& y) 325 { 326 const needs_t& needs = mBuilderContext.needs; 327 int Rx = x.reg; 328 int Ry = y.reg; 329 330 if (mTextureMachine.mask) { 331 comment("compute texture coordinates"); 332 } 333 334 // init texture coordinates for each tmu 335 const int cb_format_idx = GGL_READ_NEEDS(CB_FORMAT, needs.n); 336 const bool multiTexture = mTextureMachine.activeUnits > 1; 337 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { 338 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 339 if (tmu.format_idx == 0) 340 continue; 341 if ((tmu.swrap == GGL_NEEDS_WRAP_11) && 342 (tmu.twrap == GGL_NEEDS_WRAP_11)) 343 { 344 // 1:1 texture 345 pointer_t& txPtr = coords[i].ptr; 346 txPtr.setTo(obtainReg(), tmu.bits); 347 CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydsdy); 348 ADD(AL, 0, Rx, Rx, reg_imm(txPtr.reg, ASR, 16)); // x += (s>>16) 349 CONTEXT_LOAD(txPtr.reg, state.texture[i].iterators.ydtdy); 350 ADD(AL, 0, Ry, Ry, reg_imm(txPtr.reg, ASR, 16)); // y += (t>>16) 351 // merge base & offset 352 CONTEXT_LOAD(txPtr.reg, generated_vars.texture[i].stride); 353 SMLABB(AL, Rx, Ry, txPtr.reg, Rx); // x+y*stride 354 CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data); 355 base_offset(txPtr, txPtr, Rx); 356 } else { 357 Scratch scratches(registerFile()); 358 reg_t& s = coords[i].s; 359 reg_t& t = coords[i].t; 360 // s = (x * dsdx)>>16 + ydsdy 361 // s = (x * dsdx)>>16 + (y*dsdy)>>16 + s0 362 // t = (x * dtdx)>>16 + ydtdy 363 // t = (x * dtdx)>>16 + (y*dtdy)>>16 + t0 364 s.setTo(obtainReg()); 365 t.setTo(obtainReg()); 366 const int need_w = GGL_READ_NEEDS(W, needs.n); 367 if (need_w) { 368 CONTEXT_LOAD(s.reg, state.texture[i].iterators.ydsdy); 369 CONTEXT_LOAD(t.reg, state.texture[i].iterators.ydtdy); 370 } else { 371 int ydsdy = scratches.obtain(); 372 int ydtdy = scratches.obtain(); 373 CONTEXT_LOAD(s.reg, generated_vars.texture[i].dsdx); 374 CONTEXT_LOAD(ydsdy, state.texture[i].iterators.ydsdy); 375 CONTEXT_LOAD(t.reg, generated_vars.texture[i].dtdx); 376 CONTEXT_LOAD(ydtdy, state.texture[i].iterators.ydtdy); 377 MLA(AL, 0, s.reg, Rx, s.reg, ydsdy); 378 MLA(AL, 0, t.reg, Rx, t.reg, ydtdy); 379 } 380 381 if ((mOptLevel&1)==0) { 382 CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]); 383 CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]); 384 recycleReg(s.reg); 385 recycleReg(t.reg); 386 } 387 } 388 389 // direct texture? 390 if (!multiTexture && !mBlending && !mDithering && !mFog && 391 cb_format_idx == tmu.format_idx && !tmu.linear && 392 mTextureMachine.replaced == tmu.mask) 393 { 394 mTextureMachine.directTexture = i + 1; 395 } 396 } 397 } 398 399 void GGLAssembler::build_textures( fragment_parts_t& parts, 400 Scratch& regs) 401 { 402 // We don't have a way to spill registers automatically 403 // spill depth and AA regs, when we know we may have to. 404 // build the spill list... 405 uint32_t spill_list = 0; 406 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { 407 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 408 if (tmu.format_idx == 0) 409 continue; 410 if (tmu.linear) { 411 // we may run out of register if we have linear filtering 412 // at 1 or 4 bytes / pixel on any texture unit. 413 if (tmu.format.size == 1) { 414 // if depth and AA enabled, we'll run out of 1 register 415 if (parts.z.reg > 0 && parts.covPtr.reg > 0) 416 spill_list |= 1<<parts.covPtr.reg; 417 } 418 if (tmu.format.size == 4) { 419 // if depth or AA enabled, we'll run out of 1 or 2 registers 420 if (parts.z.reg > 0) 421 spill_list |= 1<<parts.z.reg; 422 if (parts.covPtr.reg > 0) 423 spill_list |= 1<<parts.covPtr.reg; 424 } 425 } 426 } 427 428 Spill spill(registerFile(), *this, spill_list); 429 430 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { 431 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 432 if (tmu.format_idx == 0) 433 continue; 434 435 pointer_t& txPtr = parts.coords[i].ptr; 436 pixel_t& texel = parts.texel[i]; 437 438 // repeat... 439 if ((tmu.swrap == GGL_NEEDS_WRAP_11) && 440 (tmu.twrap == GGL_NEEDS_WRAP_11)) 441 { // 1:1 textures 442 comment("fetch texel"); 443 texel.setTo(regs.obtain(), &tmu.format); 444 load(txPtr, texel, WRITE_BACK); 445 } else { 446 Scratch scratches(registerFile()); 447 reg_t& s = parts.coords[i].s; 448 reg_t& t = parts.coords[i].t; 449 if ((mOptLevel&1)==0) { 450 comment("reload s/t (multitexture or linear filtering)"); 451 s.reg = scratches.obtain(); 452 t.reg = scratches.obtain(); 453 CONTEXT_LOAD(s.reg, generated_vars.texture[i].spill[0]); 454 CONTEXT_LOAD(t.reg, generated_vars.texture[i].spill[1]); 455 } 456 457 if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) 458 return; 459 460 comment("compute repeat/clamp"); 461 int u = scratches.obtain(); 462 int v = scratches.obtain(); 463 int width = scratches.obtain(); 464 int height = scratches.obtain(); 465 int U = 0; 466 int V = 0; 467 468 if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) 469 return; 470 471 CONTEXT_LOAD(width, generated_vars.texture[i].width); 472 CONTEXT_LOAD(height, generated_vars.texture[i].height); 473 474 int FRAC_BITS = 0; 475 if (tmu.linear) { 476 // linear interpolation 477 if (tmu.format.size == 1) { 478 // for 8-bits textures, we can afford 479 // 7 bits of fractional precision at no 480 // additional cost (we can't do 8 bits 481 // because filter8 uses signed 16 bits muls) 482 FRAC_BITS = 7; 483 } else if (tmu.format.size == 2) { 484 // filter16() is internally limited to 4 bits, so: 485 // FRAC_BITS=2 generates less instructions, 486 // FRAC_BITS=3,4,5 creates unpleasant artifacts, 487 // FRAC_BITS=6+ looks good 488 FRAC_BITS = 6; 489 } else if (tmu.format.size == 4) { 490 // filter32() is internally limited to 8 bits, so: 491 // FRAC_BITS=4 looks good 492 // FRAC_BITS=5+ looks better, but generates 3 extra ipp 493 FRAC_BITS = 6; 494 } else { 495 // for all other cases we use 4 bits. 496 FRAC_BITS = 4; 497 } 498 } 499 wrapping(u, s.reg, width, tmu.swrap, FRAC_BITS); 500 wrapping(v, t.reg, height, tmu.twrap, FRAC_BITS); 501 502 if (tmu.linear) { 503 comment("compute linear filtering offsets"); 504 // pixel size scale 505 const int shift = 31 - gglClz(tmu.format.size); 506 U = scratches.obtain(); 507 V = scratches.obtain(); 508 509 if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) 510 return; 511 512 // sample the texel center 513 SUB(AL, 0, u, u, imm(1<<(FRAC_BITS-1))); 514 SUB(AL, 0, v, v, imm(1<<(FRAC_BITS-1))); 515 516 // get the fractionnal part of U,V 517 AND(AL, 0, U, u, imm((1<<FRAC_BITS)-1)); 518 AND(AL, 0, V, v, imm((1<<FRAC_BITS)-1)); 519 520 // compute width-1 and height-1 521 SUB(AL, 0, width, width, imm(1)); 522 SUB(AL, 0, height, height, imm(1)); 523 524 // get the integer part of U,V and clamp/wrap 525 // and compute offset to the next texel 526 if (tmu.swrap == GGL_NEEDS_WRAP_REPEAT) { 527 // u has already been REPEATed 528 MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS)); 529 MOV(MI, 0, u, width); 530 CMP(AL, u, width); 531 MOV(LT, 0, width, imm(1 << shift)); 532 if (shift) 533 MOV(GE, 0, width, reg_imm(width, LSL, shift)); 534 RSB(GE, 0, width, width, imm(0)); 535 } else { 536 // u has not been CLAMPed yet 537 // algorithm: 538 // if ((u>>4) >= width) 539 // u = width<<4 540 // width = 0 541 // else 542 // width = 1<<shift 543 // u = u>>4; // get integer part 544 // if (u<0) 545 // u = 0 546 // width = 0 547 // generated_vars.rt = width 548 549 CMP(AL, width, reg_imm(u, ASR, FRAC_BITS)); 550 MOV(LE, 0, u, reg_imm(width, LSL, FRAC_BITS)); 551 MOV(LE, 0, width, imm(0)); 552 MOV(GT, 0, width, imm(1 << shift)); 553 MOV(AL, 1, u, reg_imm(u, ASR, FRAC_BITS)); 554 MOV(MI, 0, u, imm(0)); 555 MOV(MI, 0, width, imm(0)); 556 } 557 CONTEXT_STORE(width, generated_vars.rt); 558 559 const int stride = width; 560 CONTEXT_LOAD(stride, generated_vars.texture[i].stride); 561 if (tmu.twrap == GGL_NEEDS_WRAP_REPEAT) { 562 // v has already been REPEATed 563 MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS)); 564 MOV(MI, 0, v, height); 565 CMP(AL, v, height); 566 MOV(LT, 0, height, imm(1 << shift)); 567 if (shift) 568 MOV(GE, 0, height, reg_imm(height, LSL, shift)); 569 RSB(GE, 0, height, height, imm(0)); 570 MUL(AL, 0, height, stride, height); 571 } else { 572 // v has not been CLAMPed yet 573 CMP(AL, height, reg_imm(v, ASR, FRAC_BITS)); 574 MOV(LE, 0, v, reg_imm(height, LSL, FRAC_BITS)); 575 MOV(LE, 0, height, imm(0)); 576 if (shift) { 577 MOV(GT, 0, height, reg_imm(stride, LSL, shift)); 578 } else { 579 MOV(GT, 0, height, stride); 580 } 581 MOV(AL, 1, v, reg_imm(v, ASR, FRAC_BITS)); 582 MOV(MI, 0, v, imm(0)); 583 MOV(MI, 0, height, imm(0)); 584 } 585 CONTEXT_STORE(height, generated_vars.lb); 586 } 587 588 scratches.recycle(width); 589 scratches.recycle(height); 590 591 // iterate texture coordinates... 592 comment("iterate s,t"); 593 int dsdx = scratches.obtain(); 594 int dtdx = scratches.obtain(); 595 596 if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) 597 return; 598 599 CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); 600 CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); 601 ADD(AL, 0, s.reg, s.reg, dsdx); 602 ADD(AL, 0, t.reg, t.reg, dtdx); 603 if ((mOptLevel&1)==0) { 604 CONTEXT_STORE(s.reg, generated_vars.texture[i].spill[0]); 605 CONTEXT_STORE(t.reg, generated_vars.texture[i].spill[1]); 606 scratches.recycle(s.reg); 607 scratches.recycle(t.reg); 608 } 609 scratches.recycle(dsdx); 610 scratches.recycle(dtdx); 611 612 // merge base & offset... 613 comment("merge base & offset"); 614 texel.setTo(regs.obtain(), &tmu.format); 615 txPtr.setTo(texel.reg, tmu.bits); 616 int stride = scratches.obtain(); 617 618 if (registerFile().status() & RegisterFile::OUT_OF_REGISTERS) 619 return; 620 621 CONTEXT_LOAD(stride, generated_vars.texture[i].stride); 622 CONTEXT_ADDR_LOAD(txPtr.reg, generated_vars.texture[i].data); 623 SMLABB(AL, u, v, stride, u); // u+v*stride 624 base_offset(txPtr, txPtr, u); 625 626 // load texel 627 if (!tmu.linear) { 628 comment("fetch texel"); 629 load(txPtr, texel, 0); 630 } else { 631 // recycle registers we don't need anymore 632 scratches.recycle(u); 633 scratches.recycle(v); 634 scratches.recycle(stride); 635 636 comment("fetch texel, bilinear"); 637 switch (tmu.format.size) { 638 case 1: filter8(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; 639 case 2: filter16(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; 640 case 3: filter24(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; 641 case 4: filter32(parts, texel, tmu, U, V, txPtr, FRAC_BITS); break; 642 } 643 } 644 } 645 } 646 } 647 648 void GGLAssembler::build_iterate_texture_coordinates( 649 const fragment_parts_t& parts) 650 { 651 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT; i++) { 652 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 653 if (tmu.format_idx == 0) 654 continue; 655 656 if ((tmu.swrap == GGL_NEEDS_WRAP_11) && 657 (tmu.twrap == GGL_NEEDS_WRAP_11)) 658 { // 1:1 textures 659 const pointer_t& txPtr = parts.coords[i].ptr; 660 ADD(AL, 0, txPtr.reg, txPtr.reg, imm(txPtr.size>>3)); 661 } else { 662 Scratch scratches(registerFile()); 663 int s = parts.coords[i].s.reg; 664 int t = parts.coords[i].t.reg; 665 if ((mOptLevel&1)==0) { 666 s = scratches.obtain(); 667 t = scratches.obtain(); 668 CONTEXT_LOAD(s, generated_vars.texture[i].spill[0]); 669 CONTEXT_LOAD(t, generated_vars.texture[i].spill[1]); 670 } 671 int dsdx = scratches.obtain(); 672 int dtdx = scratches.obtain(); 673 CONTEXT_LOAD(dsdx, generated_vars.texture[i].dsdx); 674 CONTEXT_LOAD(dtdx, generated_vars.texture[i].dtdx); 675 ADD(AL, 0, s, s, dsdx); 676 ADD(AL, 0, t, t, dtdx); 677 if ((mOptLevel&1)==0) { 678 CONTEXT_STORE(s, generated_vars.texture[i].spill[0]); 679 CONTEXT_STORE(t, generated_vars.texture[i].spill[1]); 680 } 681 } 682 } 683 } 684 685 void GGLAssembler::filter8( 686 const fragment_parts_t& /*parts*/, 687 pixel_t& texel, const texture_unit_t& tmu, 688 int U, int V, pointer_t& txPtr, 689 int FRAC_BITS) 690 { 691 if (tmu.format.components != GGL_ALPHA && 692 tmu.format.components != GGL_LUMINANCE) 693 { 694 // this is a packed format, and we don't support 695 // linear filtering (it's probably RGB 332) 696 // Should not happen with OpenGL|ES 697 LDRB(AL, texel.reg, txPtr.reg); 698 return; 699 } 700 701 // ------------------------ 702 // about ~22 cycles / pixel 703 Scratch scratches(registerFile()); 704 705 int pixel= scratches.obtain(); 706 int d = scratches.obtain(); 707 int u = scratches.obtain(); 708 int k = scratches.obtain(); 709 int rt = scratches.obtain(); 710 int lb = scratches.obtain(); 711 712 // RB -> U * V 713 714 CONTEXT_LOAD(rt, generated_vars.rt); 715 CONTEXT_LOAD(lb, generated_vars.lb); 716 717 int offset = pixel; 718 ADD(AL, 0, offset, lb, rt); 719 LDRB(AL, pixel, txPtr.reg, reg_scale_pre(offset)); 720 SMULBB(AL, u, U, V); 721 SMULBB(AL, d, pixel, u); 722 RSB(AL, 0, k, u, imm(1<<(FRAC_BITS*2))); 723 724 // LB -> (1-U) * V 725 RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); 726 LDRB(AL, pixel, txPtr.reg, reg_scale_pre(lb)); 727 SMULBB(AL, u, U, V); 728 SMLABB(AL, d, pixel, u, d); 729 SUB(AL, 0, k, k, u); 730 731 // LT -> (1-U)*(1-V) 732 RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); 733 LDRB(AL, pixel, txPtr.reg); 734 SMULBB(AL, u, U, V); 735 SMLABB(AL, d, pixel, u, d); 736 737 // RT -> U*(1-V) 738 LDRB(AL, pixel, txPtr.reg, reg_scale_pre(rt)); 739 SUB(AL, 0, u, k, u); 740 SMLABB(AL, texel.reg, pixel, u, d); 741 742 for (int i=0 ; i<4 ; i++) { 743 if (!texel.format.c[i].h) continue; 744 texel.format.c[i].h = FRAC_BITS*2+8; 745 texel.format.c[i].l = FRAC_BITS*2; // keeping 8 bits in enough 746 } 747 texel.format.size = 4; 748 texel.format.bitsPerPixel = 32; 749 texel.flags |= CLEAR_LO; 750 } 751 752 void GGLAssembler::filter16( 753 const fragment_parts_t& /*parts*/, 754 pixel_t& texel, const texture_unit_t& tmu, 755 int U, int V, pointer_t& txPtr, 756 int FRAC_BITS) 757 { 758 // compute the mask 759 // XXX: it would be nice if the mask below could be computed 760 // automatically. 761 uint32_t mask = 0; 762 int shift = 0; 763 int prec = 0; 764 switch (tmu.format_idx) { 765 case GGL_PIXEL_FORMAT_RGB_565: 766 // source: 00000ggg.ggg00000 | rrrrr000.000bbbbb 767 // result: gggggggg.gggrrrrr | rrrrr0bb.bbbbbbbb 768 mask = 0x07E0F81F; 769 shift = 16; 770 prec = 5; 771 break; 772 case GGL_PIXEL_FORMAT_RGBA_4444: 773 // 0000,1111,0000,1111 | 0000,1111,0000,1111 774 mask = 0x0F0F0F0F; 775 shift = 12; 776 prec = 4; 777 break; 778 case GGL_PIXEL_FORMAT_LA_88: 779 // 0000,0000,1111,1111 | 0000,0000,1111,1111 780 // AALL -> 00AA | 00LL 781 mask = 0x00FF00FF; 782 shift = 8; 783 prec = 8; 784 break; 785 default: 786 // unsupported format, do something sensical... 787 ALOGE("Unsupported 16-bits texture format (%d)", tmu.format_idx); 788 LDRH(AL, texel.reg, txPtr.reg); 789 return; 790 } 791 792 const int adjust = FRAC_BITS*2 - prec; 793 const int round = 0; 794 795 // update the texel format 796 texel.format.size = 4; 797 texel.format.bitsPerPixel = 32; 798 texel.flags |= CLEAR_HI|CLEAR_LO; 799 for (int i=0 ; i<4 ; i++) { 800 if (!texel.format.c[i].h) continue; 801 const uint32_t offset = (mask & tmu.format.mask(i)) ? 0 : shift; 802 texel.format.c[i].h = tmu.format.c[i].h + offset + prec; 803 texel.format.c[i].l = texel.format.c[i].h - (tmu.format.bits(i) + prec); 804 } 805 806 // ------------------------ 807 // about ~40 cycles / pixel 808 Scratch scratches(registerFile()); 809 810 int pixel= scratches.obtain(); 811 int d = scratches.obtain(); 812 int u = scratches.obtain(); 813 int k = scratches.obtain(); 814 815 // RB -> U * V 816 int offset = pixel; 817 CONTEXT_LOAD(offset, generated_vars.rt); 818 CONTEXT_LOAD(u, generated_vars.lb); 819 ADD(AL, 0, offset, offset, u); 820 821 LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); 822 SMULBB(AL, u, U, V); 823 ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); 824 build_and_immediate(pixel, pixel, mask, 32); 825 if (adjust) { 826 if (round) 827 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 828 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 829 } 830 MUL(AL, 0, d, pixel, u); 831 RSB(AL, 0, k, u, imm(1<<prec)); 832 833 // LB -> (1-U) * V 834 CONTEXT_LOAD(offset, generated_vars.lb); 835 RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); 836 LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); 837 SMULBB(AL, u, U, V); 838 ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); 839 build_and_immediate(pixel, pixel, mask, 32); 840 if (adjust) { 841 if (round) 842 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 843 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 844 } 845 MLA(AL, 0, d, pixel, u, d); 846 SUB(AL, 0, k, k, u); 847 848 // LT -> (1-U)*(1-V) 849 RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); 850 LDRH(AL, pixel, txPtr.reg); 851 SMULBB(AL, u, U, V); 852 ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); 853 build_and_immediate(pixel, pixel, mask, 32); 854 if (adjust) { 855 if (round) 856 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 857 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 858 } 859 MLA(AL, 0, d, pixel, u, d); 860 861 // RT -> U*(1-V) 862 CONTEXT_LOAD(offset, generated_vars.rt); 863 LDRH(AL, pixel, txPtr.reg, reg_pre(offset)); 864 SUB(AL, 0, u, k, u); 865 ORR(AL, 0, pixel, pixel, reg_imm(pixel, LSL, shift)); 866 build_and_immediate(pixel, pixel, mask, 32); 867 MLA(AL, 0, texel.reg, pixel, u, d); 868 } 869 870 void GGLAssembler::filter24( 871 const fragment_parts_t& /*parts*/, 872 pixel_t& texel, const texture_unit_t& /*tmu*/, 873 int /*U*/, int /*V*/, pointer_t& txPtr, 874 int /*FRAC_BITS*/) 875 { 876 // not supported yet (currently disabled) 877 load(txPtr, texel, 0); 878 } 879 880 void GGLAssembler::filter32( 881 const fragment_parts_t& /*parts*/, 882 pixel_t& texel, const texture_unit_t& /*tmu*/, 883 int U, int V, pointer_t& txPtr, 884 int FRAC_BITS) 885 { 886 const int adjust = FRAC_BITS*2 - 8; 887 const int round = 0; 888 889 // ------------------------ 890 // about ~38 cycles / pixel 891 Scratch scratches(registerFile()); 892 893 int pixel= scratches.obtain(); 894 int dh = scratches.obtain(); 895 int u = scratches.obtain(); 896 int k = scratches.obtain(); 897 898 int temp = scratches.obtain(); 899 int dl = scratches.obtain(); 900 int mask = scratches.obtain(); 901 902 MOV(AL, 0, mask, imm(0xFF)); 903 ORR(AL, 0, mask, mask, imm(0xFF0000)); 904 905 // RB -> U * V 906 int offset = pixel; 907 CONTEXT_LOAD(offset, generated_vars.rt); 908 CONTEXT_LOAD(u, generated_vars.lb); 909 ADD(AL, 0, offset, offset, u); 910 911 LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); 912 SMULBB(AL, u, U, V); 913 AND(AL, 0, temp, mask, pixel); 914 if (adjust) { 915 if (round) 916 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 917 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 918 } 919 MUL(AL, 0, dh, temp, u); 920 AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); 921 MUL(AL, 0, dl, temp, u); 922 RSB(AL, 0, k, u, imm(0x100)); 923 924 // LB -> (1-U) * V 925 CONTEXT_LOAD(offset, generated_vars.lb); 926 RSB(AL, 0, U, U, imm(1<<FRAC_BITS)); 927 LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); 928 SMULBB(AL, u, U, V); 929 AND(AL, 0, temp, mask, pixel); 930 if (adjust) { 931 if (round) 932 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 933 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 934 } 935 MLA(AL, 0, dh, temp, u, dh); 936 AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); 937 MLA(AL, 0, dl, temp, u, dl); 938 SUB(AL, 0, k, k, u); 939 940 // LT -> (1-U)*(1-V) 941 RSB(AL, 0, V, V, imm(1<<FRAC_BITS)); 942 LDR(AL, pixel, txPtr.reg); 943 SMULBB(AL, u, U, V); 944 AND(AL, 0, temp, mask, pixel); 945 if (adjust) { 946 if (round) 947 ADD(AL, 0, u, u, imm(1<<(adjust-1))); 948 MOV(AL, 0, u, reg_imm(u, LSR, adjust)); 949 } 950 MLA(AL, 0, dh, temp, u, dh); 951 AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); 952 MLA(AL, 0, dl, temp, u, dl); 953 954 // RT -> U*(1-V) 955 CONTEXT_LOAD(offset, generated_vars.rt); 956 LDR(AL, pixel, txPtr.reg, reg_scale_pre(offset)); 957 SUB(AL, 0, u, k, u); 958 AND(AL, 0, temp, mask, pixel); 959 MLA(AL, 0, dh, temp, u, dh); 960 AND(AL, 0, temp, mask, reg_imm(pixel, LSR, 8)); 961 MLA(AL, 0, dl, temp, u, dl); 962 963 AND(AL, 0, dh, mask, reg_imm(dh, LSR, 8)); 964 AND(AL, 0, dl, dl, reg_imm(mask, LSL, 8)); 965 ORR(AL, 0, texel.reg, dh, dl); 966 } 967 968 void GGLAssembler::build_texture_environment( 969 component_t& fragment, 970 const fragment_parts_t& parts, 971 int component, 972 Scratch& regs) 973 { 974 const uint32_t component_mask = 1<<component; 975 const bool multiTexture = mTextureMachine.activeUnits > 1; 976 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) { 977 texture_unit_t& tmu = mTextureMachine.tmu[i]; 978 979 if (tmu.mask & component_mask) { 980 // replace or modulate with this texture 981 if ((tmu.replaced & component_mask) == 0) { 982 // not replaced by a later tmu... 983 984 Scratch scratches(registerFile()); 985 pixel_t texel(parts.texel[i]); 986 987 if (multiTexture && 988 tmu.swrap == GGL_NEEDS_WRAP_11 && 989 tmu.twrap == GGL_NEEDS_WRAP_11) 990 { 991 texel.reg = scratches.obtain(); 992 texel.flags |= CORRUPTIBLE; 993 comment("fetch texel (multitexture 1:1)"); 994 load(parts.coords[i].ptr, texel, WRITE_BACK); 995 } 996 997 component_t incoming(fragment); 998 modify(fragment, regs); 999 1000 switch (tmu.env) { 1001 case GGL_REPLACE: 1002 extract(fragment, texel, component); 1003 break; 1004 case GGL_MODULATE: 1005 modulate(fragment, incoming, texel, component); 1006 break; 1007 case GGL_DECAL: 1008 decal(fragment, incoming, texel, component); 1009 break; 1010 case GGL_BLEND: 1011 blend(fragment, incoming, texel, component, i); 1012 break; 1013 case GGL_ADD: 1014 add(fragment, incoming, texel, component); 1015 break; 1016 } 1017 } 1018 } 1019 } 1020 } 1021 1022 // --------------------------------------------------------------------------- 1023 1024 void GGLAssembler::wrapping( 1025 int d, 1026 int coord, int size, 1027 int tx_wrap, int tx_linear) 1028 { 1029 // notes: 1030 // if tx_linear is set, we need 4 extra bits of precision on the result 1031 // SMULL/UMULL is 3 cycles 1032 Scratch scratches(registerFile()); 1033 int c = coord; 1034 if (tx_wrap == GGL_NEEDS_WRAP_REPEAT) { 1035 // UMULL takes 4 cycles (interlocked), and we can get away with 1036 // 2 cycles using SMULWB, but we're loosing 16 bits of precision 1037 // out of 32 (this is not a problem because the iterator keeps 1038 // its full precision) 1039 // UMULL(AL, 0, size, d, c, size); 1040 // note: we can't use SMULTB because it's signed. 1041 MOV(AL, 0, d, reg_imm(c, LSR, 16-tx_linear)); 1042 SMULWB(AL, d, d, size); 1043 } else if (tx_wrap == GGL_NEEDS_WRAP_CLAMP_TO_EDGE) { 1044 if (tx_linear) { 1045 // 1 cycle 1046 MOV(AL, 0, d, reg_imm(coord, ASR, 16-tx_linear)); 1047 } else { 1048 // 4 cycles (common case) 1049 MOV(AL, 0, d, reg_imm(coord, ASR, 16)); 1050 BIC(AL, 0, d, d, reg_imm(d, ASR, 31)); 1051 CMP(AL, d, size); 1052 SUB(GE, 0, d, size, imm(1)); 1053 } 1054 } 1055 } 1056 1057 // --------------------------------------------------------------------------- 1058 1059 void GGLAssembler::modulate( 1060 component_t& dest, 1061 const component_t& incoming, 1062 const pixel_t& incomingTexel, int component) 1063 { 1064 Scratch locals(registerFile()); 1065 integer_t texel(locals.obtain(), 32, CORRUPTIBLE); 1066 extract(texel, incomingTexel, component); 1067 1068 const int Nt = texel.size(); 1069 // Nt should always be less than 10 bits because it comes 1070 // from the TMU. 1071 1072 int Ni = incoming.size(); 1073 // Ni could be big because it comes from previous MODULATEs 1074 1075 if (Nt == 1) { 1076 // texel acts as a bit-mask 1077 // dest = incoming & ((texel << incoming.h)-texel) 1078 RSB(AL, 0, dest.reg, texel.reg, reg_imm(texel.reg, LSL, incoming.h)); 1079 AND(AL, 0, dest.reg, dest.reg, incoming.reg); 1080 dest.l = incoming.l; 1081 dest.h = incoming.h; 1082 dest.flags |= (incoming.flags & CLEAR_LO); 1083 } else if (Ni == 1) { 1084 MOV(AL, 0, dest.reg, reg_imm(incoming.reg, LSL, 31-incoming.h)); 1085 AND(AL, 0, dest.reg, texel.reg, reg_imm(dest.reg, ASR, 31)); 1086 dest.l = 0; 1087 dest.h = Nt; 1088 } else { 1089 int inReg = incoming.reg; 1090 int shift = incoming.l; 1091 if ((Nt + Ni) > 32) { 1092 // we will overflow, reduce the precision of Ni to 8 bits 1093 // (Note Nt cannot be more than 10 bits which happens with 1094 // 565 textures and GGL_LINEAR) 1095 shift += Ni-8; 1096 Ni = 8; 1097 } 1098 1099 // modulate by the component with the lowest precision 1100 if (Nt >= Ni) { 1101 if (shift) { 1102 // XXX: we should be able to avoid this shift 1103 // when shift==16 && Nt<16 && Ni<16, in which 1104 // we could use SMULBT below. 1105 MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift)); 1106 inReg = dest.reg; 1107 shift = 0; 1108 } 1109 // operation: (Cf*Ct)/((1<<Ni)-1) 1110 // approximated with: Cf*(Ct + Ct>>(Ni-1))>>Ni 1111 // this operation doesn't change texel's size 1112 ADD(AL, 0, dest.reg, inReg, reg_imm(inReg, LSR, Ni-1)); 1113 if (Nt<16 && Ni<16) SMULBB(AL, dest.reg, texel.reg, dest.reg); 1114 else MUL(AL, 0, dest.reg, texel.reg, dest.reg); 1115 dest.l = Ni; 1116 dest.h = Nt + Ni; 1117 } else { 1118 if (shift && (shift != 16)) { 1119 // if shift==16, we can use 16-bits mul instructions later 1120 MOV(AL, 0, dest.reg, reg_imm(inReg, LSR, shift)); 1121 inReg = dest.reg; 1122 shift = 0; 1123 } 1124 // operation: (Cf*Ct)/((1<<Nt)-1) 1125 // approximated with: Ct*(Cf + Cf>>(Nt-1))>>Nt 1126 // this operation doesn't change incoming's size 1127 Scratch scratches(registerFile()); 1128 int t = (texel.flags & CORRUPTIBLE) ? texel.reg : dest.reg; 1129 if (t == inReg) 1130 t = scratches.obtain(); 1131 ADD(AL, 0, t, texel.reg, reg_imm(texel.reg, LSR, Nt-1)); 1132 if (Nt<16 && Ni<16) { 1133 if (shift==16) SMULBT(AL, dest.reg, t, inReg); 1134 else SMULBB(AL, dest.reg, t, inReg); 1135 } else MUL(AL, 0, dest.reg, t, inReg); 1136 dest.l = Nt; 1137 dest.h = Nt + Ni; 1138 } 1139 1140 // low bits are not valid 1141 dest.flags |= CLEAR_LO; 1142 1143 // no need to keep more than 8 bits/component 1144 if (dest.size() > 8) 1145 dest.l = dest.h-8; 1146 } 1147 } 1148 1149 void GGLAssembler::decal( 1150 component_t& dest, 1151 const component_t& incoming, 1152 const pixel_t& incomingTexel, int component) 1153 { 1154 // RGBA: 1155 // Cv = Cf*(1 - At) + Ct*At = Cf + (Ct - Cf)*At 1156 // Av = Af 1157 Scratch locals(registerFile()); 1158 integer_t texel(locals.obtain(), 32, CORRUPTIBLE); 1159 integer_t factor(locals.obtain(), 32, CORRUPTIBLE); 1160 extract(texel, incomingTexel, component); 1161 extract(factor, incomingTexel, GGLFormat::ALPHA); 1162 1163 // no need to keep more than 8-bits for decal 1164 int Ni = incoming.size(); 1165 int shift = incoming.l; 1166 if (Ni > 8) { 1167 shift += Ni-8; 1168 Ni = 8; 1169 } 1170 integer_t incomingNorm(incoming.reg, Ni, incoming.flags); 1171 if (shift) { 1172 MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift)); 1173 incomingNorm.reg = dest.reg; 1174 incomingNorm.flags |= CORRUPTIBLE; 1175 } 1176 ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1)); 1177 build_blendOneMinusFF(dest, factor, incomingNorm, texel); 1178 } 1179 1180 void GGLAssembler::blend( 1181 component_t& dest, 1182 const component_t& incoming, 1183 const pixel_t& incomingTexel, int component, int tmu) 1184 { 1185 // RGBA: 1186 // Cv = (1 - Ct)*Cf + Ct*Cc = Cf + (Cc - Cf)*Ct 1187 // Av = At*Af 1188 1189 if (component == GGLFormat::ALPHA) { 1190 modulate(dest, incoming, incomingTexel, component); 1191 return; 1192 } 1193 1194 Scratch locals(registerFile()); 1195 integer_t color(locals.obtain(), 8, CORRUPTIBLE); 1196 integer_t factor(locals.obtain(), 32, CORRUPTIBLE); 1197 LDRB(AL, color.reg, mBuilderContext.Rctx, 1198 immed12_pre(GGL_OFFSETOF(state.texture[tmu].env_color[component]))); 1199 extract(factor, incomingTexel, component); 1200 1201 // no need to keep more than 8-bits for blend 1202 int Ni = incoming.size(); 1203 int shift = incoming.l; 1204 if (Ni > 8) { 1205 shift += Ni-8; 1206 Ni = 8; 1207 } 1208 integer_t incomingNorm(incoming.reg, Ni, incoming.flags); 1209 if (shift) { 1210 MOV(AL, 0, dest.reg, reg_imm(incomingNorm.reg, LSR, shift)); 1211 incomingNorm.reg = dest.reg; 1212 incomingNorm.flags |= CORRUPTIBLE; 1213 } 1214 ADD(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, LSR, factor.s-1)); 1215 build_blendOneMinusFF(dest, factor, incomingNorm, color); 1216 } 1217 1218 void GGLAssembler::add( 1219 component_t& dest, 1220 const component_t& incoming, 1221 const pixel_t& incomingTexel, int component) 1222 { 1223 // RGBA: 1224 // Cv = Cf + Ct; 1225 Scratch locals(registerFile()); 1226 1227 component_t incomingTemp(incoming); 1228 1229 // use "dest" as a temporary for extracting the texel, unless "dest" 1230 // overlaps "incoming". 1231 integer_t texel(dest.reg, 32, CORRUPTIBLE); 1232 if (dest.reg == incomingTemp.reg) 1233 texel.reg = locals.obtain(); 1234 extract(texel, incomingTexel, component); 1235 1236 if (texel.s < incomingTemp.size()) { 1237 expand(texel, texel, incomingTemp.size()); 1238 } else if (texel.s > incomingTemp.size()) { 1239 if (incomingTemp.flags & CORRUPTIBLE) { 1240 expand(incomingTemp, incomingTemp, texel.s); 1241 } else { 1242 incomingTemp.reg = locals.obtain(); 1243 expand(incomingTemp, incoming, texel.s); 1244 } 1245 } 1246 1247 if (incomingTemp.l) { 1248 ADD(AL, 0, dest.reg, texel.reg, 1249 reg_imm(incomingTemp.reg, LSR, incomingTemp.l)); 1250 } else { 1251 ADD(AL, 0, dest.reg, texel.reg, incomingTemp.reg); 1252 } 1253 dest.l = 0; 1254 dest.h = texel.size(); 1255 component_sat(dest); 1256 } 1257 1258 // ---------------------------------------------------------------------------- 1259 1260 }; // namespace android 1261 1262