1 /* libs/pixelflinger/codeflinger/GGLAssembler.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #define LOG_TAG "GGLAssembler" 19 20 #include <assert.h> 21 #include <stdint.h> 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <sys/types.h> 25 26 #include <log/log.h> 27 28 #include "GGLAssembler.h" 29 30 namespace android { 31 32 // ---------------------------------------------------------------------------- 33 34 GGLAssembler::GGLAssembler(ARMAssemblerInterface* target) 35 : ARMAssemblerProxy(target), 36 RegisterAllocator(ARMAssemblerProxy::getCodegenArch()), mOptLevel(7) 37 { 38 } 39 40 GGLAssembler::~GGLAssembler() 41 { 42 } 43 44 void GGLAssembler::prolog() 45 { 46 ARMAssemblerProxy::prolog(); 47 } 48 49 void GGLAssembler::epilog(uint32_t touched) 50 { 51 ARMAssemblerProxy::epilog(touched); 52 } 53 54 void GGLAssembler::reset(int opt_level) 55 { 56 ARMAssemblerProxy::reset(); 57 RegisterAllocator::reset(); 58 mOptLevel = opt_level; 59 } 60 61 // --------------------------------------------------------------------------- 62 63 int GGLAssembler::scanline(const needs_t& needs, context_t const* c) 64 { 65 int err = 0; 66 int opt_level = mOptLevel; 67 while (opt_level >= 0) { 68 reset(opt_level); 69 err = scanline_core(needs, c); 70 if (err == 0) 71 break; 72 opt_level--; 73 } 74 75 // XXX: in theory, pcForLabel is not valid before generate() 76 uint32_t* fragment_start_pc = pcForLabel("fragment_loop"); 77 uint32_t* fragment_end_pc = pcForLabel("epilog"); 78 const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc); 79 80 // build a name for our pipeline 81 char name[64]; 82 sprintf(name, 83 "scanline__%08X:%08X_%08X_%08X [%3d ipp]", 84 needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops); 85 86 if (err) { 87 ALOGE("Error while generating ""%s""\n", name); 88 disassemble(name); 89 return -1; 90 } 91 92 return generate(name); 93 } 94 95 int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c) 96 { 97 mBlendFactorCached = 0; 98 mBlending = 0; 99 mMasking = 0; 100 mAA = GGL_READ_NEEDS(P_AA, needs.p); 101 mDithering = GGL_READ_NEEDS(P_DITHER, needs.p); 102 mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER; 103 mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER; 104 mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0; 105 mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0; 106 mBuilderContext.needs = needs; 107 mBuilderContext.c = c; 108 mBuilderContext.Rctx = reserveReg(R0); // context always in R0 109 mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ]; 110 111 // ------------------------------------------------------------------------ 112 113 decodeLogicOpNeeds(needs); 114 115 decodeTMUNeeds(needs, c); 116 117 mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n)); 118 mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n)); 119 mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n)); 120 mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n)); 121 122 if (!mCbFormat.c[GGLFormat::ALPHA].h) { 123 if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) || 124 (mBlendSrc == GGL_DST_ALPHA)) { 125 mBlendSrc = GGL_ONE; 126 } 127 if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) || 128 (mBlendSrcA == GGL_DST_ALPHA)) { 129 mBlendSrcA = GGL_ONE; 130 } 131 if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) || 132 (mBlendDst == GGL_DST_ALPHA)) { 133 mBlendDst = GGL_ONE; 134 } 135 if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) || 136 (mBlendDstA == GGL_DST_ALPHA)) { 137 mBlendDstA = GGL_ONE; 138 } 139 } 140 141 // if we need the framebuffer, read it now 142 const int blending = blending_codes(mBlendSrc, mBlendDst) | 143 blending_codes(mBlendSrcA, mBlendDstA); 144 145 // XXX: handle special cases, destination not modified... 146 if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) && 147 (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) { 148 // Destination unmodified (beware of logic ops) 149 } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) && 150 (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) { 151 // Destination is zero (beware of logic ops) 152 } 153 154 int fbComponents = 0; 155 const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n); 156 for (int i=0 ; i<4 ; i++) { 157 const int mask = 1<<i; 158 component_info_t& info = mInfo[i]; 159 int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; 160 int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; 161 if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA) 162 fs = GGL_ONE; 163 info.masked = !!(masking & mask); 164 info.inDest = !info.masked && mCbFormat.c[i].h && 165 ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp)); 166 if (mCbFormat.components >= GGL_LUMINANCE && 167 (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) { 168 info.inDest = false; 169 } 170 info.needed = (i==GGLFormat::ALPHA) && 171 (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS); 172 info.replaced = !!(mTextureMachine.replaced & mask); 173 info.iterated = (!info.replaced && (info.inDest || info.needed)); 174 info.smooth = mSmooth && info.iterated; 175 info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA); 176 info.blend = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO)); 177 178 mBlending |= (info.blend ? mask : 0); 179 mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0; 180 fbComponents |= mCbFormat.c[i].h ? mask : 0; 181 } 182 183 mAllMasked = (mMasking == fbComponents); 184 if (mAllMasked) { 185 mDithering = 0; 186 } 187 188 fragment_parts_t parts; 189 190 // ------------------------------------------------------------------------ 191 prolog(); 192 // ------------------------------------------------------------------------ 193 194 build_scanline_prolog(parts, needs); 195 196 if (registerFile().status()) 197 return registerFile().status(); 198 199 // ------------------------------------------------------------------------ 200 label("fragment_loop"); 201 // ------------------------------------------------------------------------ 202 { 203 Scratch regs(registerFile()); 204 205 if (mDithering) { 206 // update the dither index. 207 MOV(AL, 0, parts.count.reg, 208 reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT)); 209 ADD(AL, 0, parts.count.reg, parts.count.reg, 210 imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT))); 211 MOV(AL, 0, parts.count.reg, 212 reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT)); 213 } 214 215 // XXX: could we do an early alpha-test here in some cases? 216 // It would probaly be used only with smooth-alpha and no texture 217 // (or no alpha component in the texture). 218 219 // Early z-test 220 if (mAlphaTest==GGL_ALWAYS) { 221 build_depth_test(parts, Z_TEST|Z_WRITE); 222 } else { 223 // we cannot do the z-write here, because 224 // it might be killed by the alpha-test later 225 build_depth_test(parts, Z_TEST); 226 } 227 228 { // texture coordinates 229 Scratch scratches(registerFile()); 230 231 // texel generation 232 build_textures(parts, regs); 233 if (registerFile().status()) 234 return registerFile().status(); 235 } 236 237 if ((blending & (FACTOR_DST|BLEND_DST)) || 238 (mMasking && !mAllMasked) || 239 (mLogicOp & LOGIC_OP_DST)) 240 { 241 // blending / logic_op / masking need the framebuffer 242 mDstPixel.setTo(regs.obtain(), &mCbFormat); 243 244 // load the framebuffer pixel 245 comment("fetch color-buffer"); 246 load(parts.cbPtr, mDstPixel); 247 } 248 249 if (registerFile().status()) 250 return registerFile().status(); 251 252 pixel_t pixel; 253 int directTex = mTextureMachine.directTexture; 254 if (directTex | parts.packed) { 255 // note: we can't have both here 256 // iterated color or direct texture 257 pixel = directTex ? parts.texel[directTex-1] : parts.iterated; 258 pixel.flags &= ~CORRUPTIBLE; 259 } else { 260 if (mDithering) { 261 const int ctxtReg = mBuilderContext.Rctx; 262 const int mask = GGL_DITHER_SIZE-1; 263 parts.dither = reg_t(regs.obtain()); 264 AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask)); 265 ADDR_ADD(AL, 0, parts.dither.reg, ctxtReg, parts.dither.reg); 266 LDRB(AL, parts.dither.reg, parts.dither.reg, 267 immed12_pre(GGL_OFFSETOF(ditherMatrix))); 268 } 269 270 // allocate a register for the resulting pixel 271 pixel.setTo(regs.obtain(), &mCbFormat, FIRST); 272 273 build_component(pixel, parts, GGLFormat::ALPHA, regs); 274 275 if (mAlphaTest!=GGL_ALWAYS) { 276 // only handle the z-write part here. We know z-test 277 // was successful, as well as alpha-test. 278 build_depth_test(parts, Z_WRITE); 279 } 280 281 build_component(pixel, parts, GGLFormat::RED, regs); 282 build_component(pixel, parts, GGLFormat::GREEN, regs); 283 build_component(pixel, parts, GGLFormat::BLUE, regs); 284 285 pixel.flags |= CORRUPTIBLE; 286 } 287 288 if (registerFile().status()) 289 return registerFile().status(); 290 291 if (pixel.reg == -1) { 292 // be defensive here. if we're here it's probably 293 // that this whole fragment is a no-op. 294 pixel = mDstPixel; 295 } 296 297 if (!mAllMasked) { 298 // logic operation 299 build_logic_op(pixel, regs); 300 301 // masking 302 build_masking(pixel, regs); 303 304 comment("store"); 305 store(parts.cbPtr, pixel, WRITE_BACK); 306 } 307 } 308 309 if (registerFile().status()) 310 return registerFile().status(); 311 312 // update the iterated color... 313 if (parts.reload != 3) { 314 build_smooth_shade(parts); 315 } 316 317 // update iterated z 318 build_iterate_z(parts); 319 320 // update iterated fog 321 build_iterate_f(parts); 322 323 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16)); 324 B(PL, "fragment_loop"); 325 label("epilog"); 326 epilog(registerFile().touched()); 327 328 if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) { 329 if (mDepthTest!=GGL_ALWAYS) { 330 label("discard_before_textures"); 331 build_iterate_texture_coordinates(parts); 332 } 333 label("discard_after_textures"); 334 build_smooth_shade(parts); 335 build_iterate_z(parts); 336 build_iterate_f(parts); 337 if (!mAllMasked) { 338 ADDR_ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3)); 339 } 340 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16)); 341 B(PL, "fragment_loop"); 342 epilog(registerFile().touched()); 343 } 344 345 return registerFile().status(); 346 } 347 348 // --------------------------------------------------------------------------- 349 350 void GGLAssembler::build_scanline_prolog( 351 fragment_parts_t& parts, const needs_t& needs) 352 { 353 Scratch scratches(registerFile()); 354 355 // compute count 356 comment("compute ct (# of pixels to process)"); 357 parts.count.setTo(obtainReg()); 358 int Rx = scratches.obtain(); 359 int Ry = scratches.obtain(); 360 CONTEXT_LOAD(Rx, iterators.xl); 361 CONTEXT_LOAD(parts.count.reg, iterators.xr); 362 CONTEXT_LOAD(Ry, iterators.y); 363 364 // parts.count = iterators.xr - Rx 365 SUB(AL, 0, parts.count.reg, parts.count.reg, Rx); 366 SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1)); 367 368 if (mDithering) { 369 // parts.count.reg = 0xNNNNXXDD 370 // NNNN = count-1 371 // DD = dither offset 372 // XX = 0xxxxxxx (x = garbage) 373 Scratch scratches(registerFile()); 374 int tx = scratches.obtain(); 375 int ty = scratches.obtain(); 376 AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK)); 377 AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK)); 378 ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT)); 379 ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16)); 380 } else { 381 // parts.count.reg = 0xNNNN0000 382 // NNNN = count-1 383 MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16)); 384 } 385 386 if (!mAllMasked) { 387 // compute dst ptr 388 comment("compute color-buffer pointer"); 389 const int cb_bits = mCbFormat.size*8; 390 int Rs = scratches.obtain(); 391 parts.cbPtr.setTo(obtainReg(), cb_bits); 392 CONTEXT_LOAD(Rs, state.buffers.color.stride); 393 CONTEXT_ADDR_LOAD(parts.cbPtr.reg, state.buffers.color.data); 394 SMLABB(AL, Rs, Ry, Rs, Rx); // Rs = Rx + Ry*Rs 395 base_offset(parts.cbPtr, parts.cbPtr, Rs); 396 scratches.recycle(Rs); 397 } 398 399 // init fog 400 const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p); 401 if (need_fog) { 402 comment("compute initial fog coordinate"); 403 Scratch scratches(registerFile()); 404 int dfdx = scratches.obtain(); 405 int ydfdy = scratches.obtain(); 406 int f = ydfdy; 407 CONTEXT_LOAD(dfdx, generated_vars.dfdx); 408 CONTEXT_LOAD(ydfdy, iterators.ydfdy); 409 MLA(AL, 0, f, Rx, dfdx, ydfdy); 410 CONTEXT_STORE(f, generated_vars.f); 411 } 412 413 // init Z coordinate 414 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) { 415 parts.z = reg_t(obtainReg()); 416 comment("compute initial Z coordinate"); 417 Scratch scratches(registerFile()); 418 int dzdx = scratches.obtain(); 419 int ydzdy = parts.z.reg; 420 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point 421 CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point 422 MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy); 423 424 // we're going to index zbase of parts.count 425 // zbase = base + (xl-count + stride*y)*2 426 int Rs = dzdx; 427 int zbase = scratches.obtain(); 428 CONTEXT_LOAD(Rs, state.buffers.depth.stride); 429 CONTEXT_ADDR_LOAD(zbase, state.buffers.depth.data); 430 SMLABB(AL, Rs, Ry, Rs, Rx); 431 ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16)); 432 ADDR_ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1)); 433 CONTEXT_ADDR_STORE(zbase, generated_vars.zbase); 434 } 435 436 // init texture coordinates 437 init_textures(parts.coords, reg_t(Rx), reg_t(Ry)); 438 scratches.recycle(Ry); 439 440 // iterated color 441 init_iterated_color(parts, reg_t(Rx)); 442 443 // init coverage factor application (anti-aliasing) 444 if (mAA) { 445 parts.covPtr.setTo(obtainReg(), 16); 446 CONTEXT_ADDR_LOAD(parts.covPtr.reg, state.buffers.coverage); 447 ADDR_ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1)); 448 } 449 } 450 451 // --------------------------------------------------------------------------- 452 453 void GGLAssembler::build_component( pixel_t& pixel, 454 const fragment_parts_t& parts, 455 int component, 456 Scratch& regs) 457 { 458 static char const * comments[] = {"alpha", "red", "green", "blue"}; 459 comment(comments[component]); 460 461 // local register file 462 Scratch scratches(registerFile()); 463 const int dst_component_size = pixel.component_size(component); 464 465 component_t temp(-1); 466 build_incoming_component( temp, dst_component_size, 467 parts, component, scratches, regs); 468 469 if (mInfo[component].inDest) { 470 471 // blending... 472 build_blending( temp, mDstPixel, component, scratches ); 473 474 // downshift component and rebuild pixel... 475 downshift(pixel, component, temp, parts.dither); 476 } 477 } 478 479 void GGLAssembler::build_incoming_component( 480 component_t& temp, 481 int dst_size, 482 const fragment_parts_t& parts, 483 int component, 484 Scratch& scratches, 485 Scratch& global_regs) 486 { 487 const uint32_t component_mask = 1<<component; 488 489 // Figure out what we need for the blending stage... 490 int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; 491 int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; 492 if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) { 493 fs = GGL_ONE; 494 } 495 496 // Figure out what we need to extract and for what reason 497 const int blending = blending_codes(fs, fd); 498 499 // Are we actually going to blend? 500 const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO)); 501 502 // expand the source if the destination has more bits 503 int need_expander = false; 504 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) { 505 texture_unit_t& tmu = mTextureMachine.tmu[i]; 506 if ((tmu.format_idx) && 507 (parts.texel[i].component_size(component) < dst_size)) { 508 need_expander = true; 509 } 510 } 511 512 // do we need to extract this component? 513 const bool multiTexture = mTextureMachine.activeUnits > 1; 514 const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) && 515 (isAlphaSourceNeeded()); 516 int need_extract = mInfo[component].needed; 517 if (mInfo[component].inDest) 518 { 519 need_extract |= ((need_blending ? 520 (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander)); 521 need_extract |= (mTextureMachine.mask != mTextureMachine.replaced); 522 need_extract |= mInfo[component].smooth; 523 need_extract |= mInfo[component].fog; 524 need_extract |= mDithering; 525 need_extract |= multiTexture; 526 } 527 528 if (need_extract) { 529 Scratch& regs = blend_needs_alpha_source ? global_regs : scratches; 530 component_t fragment; 531 532 // iterated color 533 build_iterated_color(fragment, parts, component, regs); 534 535 // texture environement (decal, modulate, replace) 536 build_texture_environment(fragment, parts, component, regs); 537 538 // expand the source if the destination has more bits 539 if (need_expander && (fragment.size() < dst_size)) { 540 // we're here only if we fetched a texel 541 // (so we know for sure fragment is CORRUPTIBLE) 542 expand(fragment, fragment, dst_size); 543 } 544 545 // We have a few specific things to do for the alpha-channel 546 if ((component==GGLFormat::ALPHA) && 547 (mInfo[component].needed || fragment.size()<dst_size)) 548 { 549 // convert to integer_t first and make sure 550 // we don't corrupt a needed register 551 if (fragment.l) { 552 component_t incoming(fragment); 553 modify(fragment, regs); 554 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l)); 555 fragment.h -= fragment.l; 556 fragment.l = 0; 557 } 558 559 // coverage factor application 560 build_coverage_application(fragment, parts, regs); 561 562 // alpha-test 563 build_alpha_test(fragment, parts); 564 565 if (blend_needs_alpha_source) { 566 // We keep only 8 bits for the blending stage 567 const int shift = fragment.h <= 8 ? 0 : fragment.h-8; 568 if (fragment.flags & CORRUPTIBLE) { 569 fragment.flags &= ~CORRUPTIBLE; 570 mAlphaSource.setTo(fragment.reg, 571 fragment.size(), fragment.flags); 572 if (shift) { 573 MOV(AL, 0, mAlphaSource.reg, 574 reg_imm(mAlphaSource.reg, LSR, shift)); 575 } 576 } else { 577 // XXX: it would better to do this in build_blend_factor() 578 // so we can avoid the extra MOV below. 579 mAlphaSource.setTo(regs.obtain(), 580 fragment.size(), CORRUPTIBLE); 581 if (shift) { 582 MOV(AL, 0, mAlphaSource.reg, 583 reg_imm(fragment.reg, LSR, shift)); 584 } else { 585 MOV(AL, 0, mAlphaSource.reg, fragment.reg); 586 } 587 } 588 mAlphaSource.s -= shift; 589 } 590 } 591 592 // fog... 593 build_fog( fragment, component, regs ); 594 595 temp = fragment; 596 } else { 597 if (mInfo[component].inDest) { 598 // extraction not needed and replace 599 // we just select the right component 600 if ((mTextureMachine.replaced & component_mask) == 0) { 601 // component wasn't replaced, so use it! 602 temp = component_t(parts.iterated, component); 603 } 604 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) { 605 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 606 if ((tmu.mask & component_mask) && 607 ((tmu.replaced & component_mask) == 0)) { 608 temp = component_t(parts.texel[i], component); 609 } 610 } 611 } 612 } 613 } 614 615 bool GGLAssembler::isAlphaSourceNeeded() const 616 { 617 // XXX: also needed for alpha-test 618 const int bs = mBlendSrc; 619 const int bd = mBlendDst; 620 return bs==GGL_SRC_ALPHA_SATURATE || 621 bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA || 622 bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ; 623 } 624 625 // --------------------------------------------------------------------------- 626 627 void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts) 628 { 629 if (mSmooth && !parts.iterated_packed) { 630 // update the iterated color in a pipelined way... 631 comment("update iterated color"); 632 Scratch scratches(registerFile()); 633 634 const int reload = parts.reload; 635 for (int i=0 ; i<4 ; i++) { 636 if (!mInfo[i].iterated) 637 continue; 638 639 int c = parts.argb[i].reg; 640 int dx = parts.argb_dx[i].reg; 641 642 if (reload & 1) { 643 c = scratches.obtain(); 644 CONTEXT_LOAD(c, generated_vars.argb[i].c); 645 } 646 if (reload & 2) { 647 dx = scratches.obtain(); 648 CONTEXT_LOAD(dx, generated_vars.argb[i].dx); 649 } 650 651 if (mSmooth) { 652 ADD(AL, 0, c, c, dx); 653 } 654 655 if (reload & 1) { 656 CONTEXT_STORE(c, generated_vars.argb[i].c); 657 scratches.recycle(c); 658 } 659 if (reload & 2) { 660 scratches.recycle(dx); 661 } 662 } 663 } 664 } 665 666 // --------------------------------------------------------------------------- 667 668 void GGLAssembler::build_coverage_application(component_t& fragment, 669 const fragment_parts_t& parts, Scratch& regs) 670 { 671 // here fragment.l is guarenteed to be 0 672 if (mAA) { 673 // coverages are 1.15 fixed-point numbers 674 comment("coverage application"); 675 676 component_t incoming(fragment); 677 modify(fragment, regs); 678 679 Scratch scratches(registerFile()); 680 int cf = scratches.obtain(); 681 LDRH(AL, cf, parts.covPtr.reg, immed8_post(2)); 682 if (fragment.h > 31) { 683 fragment.h--; 684 SMULWB(AL, fragment.reg, incoming.reg, cf); 685 } else { 686 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1)); 687 SMULWB(AL, fragment.reg, fragment.reg, cf); 688 } 689 } 690 } 691 692 // --------------------------------------------------------------------------- 693 694 void GGLAssembler::build_alpha_test(component_t& fragment, 695 const fragment_parts_t& /*parts*/) 696 { 697 if (mAlphaTest != GGL_ALWAYS) { 698 comment("Alpha Test"); 699 Scratch scratches(registerFile()); 700 int ref = scratches.obtain(); 701 const int shift = GGL_COLOR_BITS-fragment.size(); 702 CONTEXT_LOAD(ref, state.alpha_test.ref); 703 if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift)); 704 else CMP(AL, fragment.reg, ref); 705 int cc = NV; 706 switch (mAlphaTest) { 707 case GGL_NEVER: cc = NV; break; 708 case GGL_LESS: cc = LT; break; 709 case GGL_EQUAL: cc = EQ; break; 710 case GGL_LEQUAL: cc = LS; break; 711 case GGL_GREATER: cc = HI; break; 712 case GGL_NOTEQUAL: cc = NE; break; 713 case GGL_GEQUAL: cc = HS; break; 714 } 715 B(cc^1, "discard_after_textures"); 716 } 717 } 718 719 // --------------------------------------------------------------------------- 720 721 void GGLAssembler::build_depth_test( 722 const fragment_parts_t& parts, uint32_t mask) 723 { 724 mask &= Z_TEST|Z_WRITE; 725 const needs_t& needs = mBuilderContext.needs; 726 const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p); 727 Scratch scratches(registerFile()); 728 729 if (mDepthTest != GGL_ALWAYS || zmask) { 730 int cc=AL, ic=AL; 731 switch (mDepthTest) { 732 case GGL_LESS: ic = HI; break; 733 case GGL_EQUAL: ic = EQ; break; 734 case GGL_LEQUAL: ic = HS; break; 735 case GGL_GREATER: ic = LT; break; 736 case GGL_NOTEQUAL: ic = NE; break; 737 case GGL_GEQUAL: ic = LS; break; 738 case GGL_NEVER: 739 // this never happens, because it's taken care of when 740 // computing the needs. but we keep it for completness. 741 comment("Depth Test (NEVER)"); 742 B(AL, "discard_before_textures"); 743 return; 744 case GGL_ALWAYS: 745 // we're here because zmask is enabled 746 mask &= ~Z_TEST; // test always passes. 747 break; 748 } 749 750 // inverse the condition 751 cc = ic^1; 752 753 if ((mask & Z_WRITE) && !zmask) { 754 mask &= ~Z_WRITE; 755 } 756 757 if (!mask) 758 return; 759 760 comment("Depth Test"); 761 762 int zbase = scratches.obtain(); 763 int depth = scratches.obtain(); 764 int z = parts.z.reg; 765 766 CONTEXT_ADDR_LOAD(zbase, generated_vars.zbase); // stall 767 ADDR_SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15)); 768 // above does zbase = zbase + ((count >> 16) << 1) 769 770 if (mask & Z_TEST) { 771 LDRH(AL, depth, zbase); // stall 772 CMP(AL, depth, reg_imm(z, LSR, 16)); 773 B(cc, "discard_before_textures"); 774 } 775 if (mask & Z_WRITE) { 776 if (mask == Z_WRITE) { 777 // only z-write asked, cc is meaningless 778 ic = AL; 779 } 780 MOV(AL, 0, depth, reg_imm(z, LSR, 16)); 781 STRH(ic, depth, zbase); 782 } 783 } 784 } 785 786 void GGLAssembler::build_iterate_z(const fragment_parts_t& parts) 787 { 788 const needs_t& needs = mBuilderContext.needs; 789 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) { 790 Scratch scratches(registerFile()); 791 int dzdx = scratches.obtain(); 792 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall 793 ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx); 794 } 795 } 796 797 void GGLAssembler::build_iterate_f(const fragment_parts_t& /*parts*/) 798 { 799 const needs_t& needs = mBuilderContext.needs; 800 if (GGL_READ_NEEDS(P_FOG, needs.p)) { 801 Scratch scratches(registerFile()); 802 int dfdx = scratches.obtain(); 803 int f = scratches.obtain(); 804 CONTEXT_LOAD(f, generated_vars.f); 805 CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall 806 ADD(AL, 0, f, f, dfdx); 807 CONTEXT_STORE(f, generated_vars.f); 808 } 809 } 810 811 // --------------------------------------------------------------------------- 812 813 void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs) 814 { 815 const needs_t& needs = mBuilderContext.needs; 816 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; 817 if (opcode == GGL_COPY) 818 return; 819 820 comment("logic operation"); 821 822 pixel_t s(pixel); 823 if (!(pixel.flags & CORRUPTIBLE)) { 824 pixel.reg = regs.obtain(); 825 pixel.flags |= CORRUPTIBLE; 826 } 827 828 pixel_t d(mDstPixel); 829 switch(opcode) { 830 case GGL_CLEAR: MOV(AL, 0, pixel.reg, imm(0)); break; 831 case GGL_AND: AND(AL, 0, pixel.reg, s.reg, d.reg); break; 832 case GGL_AND_REVERSE: BIC(AL, 0, pixel.reg, s.reg, d.reg); break; 833 case GGL_COPY: break; 834 case GGL_AND_INVERTED: BIC(AL, 0, pixel.reg, d.reg, s.reg); break; 835 case GGL_NOOP: MOV(AL, 0, pixel.reg, d.reg); break; 836 case GGL_XOR: EOR(AL, 0, pixel.reg, s.reg, d.reg); break; 837 case GGL_OR: ORR(AL, 0, pixel.reg, s.reg, d.reg); break; 838 case GGL_NOR: ORR(AL, 0, pixel.reg, s.reg, d.reg); 839 MVN(AL, 0, pixel.reg, pixel.reg); break; 840 case GGL_EQUIV: EOR(AL, 0, pixel.reg, s.reg, d.reg); 841 MVN(AL, 0, pixel.reg, pixel.reg); break; 842 case GGL_INVERT: MVN(AL, 0, pixel.reg, d.reg); break; 843 case GGL_OR_REVERSE: // s | ~d == ~(~s & d) 844 BIC(AL, 0, pixel.reg, d.reg, s.reg); 845 MVN(AL, 0, pixel.reg, pixel.reg); break; 846 case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg); break; 847 case GGL_OR_INVERTED: // ~s | d == ~(s & ~d) 848 BIC(AL, 0, pixel.reg, s.reg, d.reg); 849 MVN(AL, 0, pixel.reg, pixel.reg); break; 850 case GGL_NAND: AND(AL, 0, pixel.reg, s.reg, d.reg); 851 MVN(AL, 0, pixel.reg, pixel.reg); break; 852 case GGL_SET: MVN(AL, 0, pixel.reg, imm(0)); break; 853 }; 854 } 855 856 // --------------------------------------------------------------------------- 857 858 static uint32_t find_bottom(uint32_t val) 859 { 860 uint32_t i = 0; 861 while (!(val & (3<<i))) 862 i+= 2; 863 return i; 864 } 865 866 static void normalize(uint32_t& val, uint32_t& rot) 867 { 868 rot = 0; 869 while (!(val&3) || (val & 0xFC000000)) { 870 uint32_t newval; 871 newval = val >> 2; 872 newval |= (val&3) << 30; 873 val = newval; 874 rot += 2; 875 if (rot == 32) { 876 rot = 0; 877 break; 878 } 879 } 880 } 881 882 void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits) 883 { 884 uint32_t rot; 885 uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1; 886 mask &= size; 887 888 if (mask == size) { 889 if (d != s) 890 MOV( AL, 0, d, s); 891 return; 892 } 893 894 if ((getCodegenArch() == CODEGEN_ARCH_MIPS) || 895 (getCodegenArch() == CODEGEN_ARCH_MIPS64)) { 896 // MIPS can do 16-bit imm in 1 instr, 32-bit in 3 instr 897 // the below ' while (mask)' code is buggy on mips 898 // since mips returns true on isValidImmediate() 899 // then we get multiple AND instr (positive logic) 900 AND( AL, 0, d, s, imm(mask) ); 901 return; 902 } 903 else if (getCodegenArch() == CODEGEN_ARCH_ARM64) { 904 AND( AL, 0, d, s, imm(mask) ); 905 return; 906 } 907 908 int negative_logic = !isValidImmediate(mask); 909 if (negative_logic) { 910 mask = ~mask & size; 911 } 912 normalize(mask, rot); 913 914 if (mask) { 915 while (mask) { 916 uint32_t bitpos = find_bottom(mask); 917 int shift = rot + bitpos; 918 uint32_t m = mask & (0xff << bitpos); 919 mask &= ~m; 920 m >>= bitpos; 921 int32_t newMask = (m<<shift) | (m>>(32-shift)); 922 if (!negative_logic) { 923 AND( AL, 0, d, s, imm(newMask) ); 924 } else { 925 BIC( AL, 0, d, s, imm(newMask) ); 926 } 927 s = d; 928 } 929 } else { 930 MOV( AL, 0, d, imm(0)); 931 } 932 } 933 934 void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs) 935 { 936 if (!mMasking || mAllMasked) { 937 return; 938 } 939 940 comment("color mask"); 941 942 pixel_t fb(mDstPixel); 943 pixel_t s(pixel); 944 if (!(pixel.flags & CORRUPTIBLE)) { 945 pixel.reg = regs.obtain(); 946 pixel.flags |= CORRUPTIBLE; 947 } 948 949 int mask = 0; 950 for (int i=0 ; i<4 ; i++) { 951 const int component_mask = 1<<i; 952 const int h = fb.format.c[i].h; 953 const int l = fb.format.c[i].l; 954 if (h && (!(mMasking & component_mask))) { 955 mask |= ((1<<(h-l))-1) << l; 956 } 957 } 958 959 // There is no need to clear the masked components of the source 960 // (unless we applied a logic op), because they're already zeroed 961 // by construction (masked components are not computed) 962 963 if (mLogicOp) { 964 const needs_t& needs = mBuilderContext.needs; 965 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; 966 if (opcode != GGL_CLEAR) { 967 // clear masked component of source 968 build_and_immediate(pixel.reg, s.reg, mask, fb.size()); 969 s = pixel; 970 } 971 } 972 973 // clear non masked components of destination 974 build_and_immediate(fb.reg, fb.reg, ~mask, fb.size()); 975 976 // or back the channels that were masked 977 if (s.reg == fb.reg) { 978 // this is in fact a MOV 979 if (s.reg == pixel.reg) { 980 // ugh. this in in fact a nop 981 } else { 982 MOV(AL, 0, pixel.reg, fb.reg); 983 } 984 } else { 985 ORR(AL, 0, pixel.reg, s.reg, fb.reg); 986 } 987 } 988 989 // --------------------------------------------------------------------------- 990 991 void GGLAssembler::base_offset( 992 const pointer_t& d, const pointer_t& b, const reg_t& o) 993 { 994 switch (b.size) { 995 case 32: 996 ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2)); 997 break; 998 case 24: 999 if (d.reg == b.reg) { 1000 ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); 1001 ADDR_ADD(AL, 0, d.reg, d.reg, o.reg); 1002 } else { 1003 ADDR_ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1)); 1004 ADDR_ADD(AL, 0, d.reg, d.reg, b.reg); 1005 } 1006 break; 1007 case 16: 1008 ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); 1009 break; 1010 case 8: 1011 ADDR_ADD(AL, 0, d.reg, b.reg, o.reg); 1012 break; 1013 } 1014 } 1015 1016 // ---------------------------------------------------------------------------- 1017 // cheezy register allocator... 1018 // ---------------------------------------------------------------------------- 1019 1020 // Modified to support MIPS processors, in a very simple way. We retain the 1021 // (Arm) limit of 16 total registers, but shift the mapping of those registers 1022 // from 0-15, to 2-17. Register 0 on Mips cannot be used as GP registers, and 1023 // register 1 has a traditional use as a temp). 1024 1025 RegisterAllocator::RegisterAllocator(int arch) : mRegs(arch) 1026 { 1027 } 1028 1029 void RegisterAllocator::reset() 1030 { 1031 mRegs.reset(); 1032 } 1033 1034 int RegisterAllocator::reserveReg(int reg) 1035 { 1036 return mRegs.reserve(reg); 1037 } 1038 1039 int RegisterAllocator::obtainReg() 1040 { 1041 return mRegs.obtain(); 1042 } 1043 1044 void RegisterAllocator::recycleReg(int reg) 1045 { 1046 mRegs.recycle(reg); 1047 } 1048 1049 RegisterAllocator::RegisterFile& RegisterAllocator::registerFile() 1050 { 1051 return mRegs; 1052 } 1053 1054 // ---------------------------------------------------------------------------- 1055 1056 RegisterAllocator::RegisterFile::RegisterFile(int codegen_arch) 1057 : mRegs(0), mTouched(0), mStatus(0), mArch(codegen_arch), mRegisterOffset(0) 1058 { 1059 if ((mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) || 1060 (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS64)) { 1061 mRegisterOffset = 2; // ARM has regs 0..15, MIPS offset to 2..17 1062 } 1063 reserve(ARMAssemblerInterface::SP); 1064 reserve(ARMAssemblerInterface::PC); 1065 } 1066 1067 RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs, int codegen_arch) 1068 : mRegs(rhs.mRegs), mTouched(rhs.mTouched), mArch(codegen_arch), mRegisterOffset(0) 1069 { 1070 if ((mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) || 1071 (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS64)) { 1072 mRegisterOffset = 2; // ARM has regs 0..15, MIPS offset to 2..17 1073 } 1074 } 1075 1076 RegisterAllocator::RegisterFile::~RegisterFile() 1077 { 1078 } 1079 1080 bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const 1081 { 1082 return (mRegs == rhs.mRegs); 1083 } 1084 1085 void RegisterAllocator::RegisterFile::reset() 1086 { 1087 mRegs = mTouched = mStatus = 0; 1088 reserve(ARMAssemblerInterface::SP); 1089 reserve(ARMAssemblerInterface::PC); 1090 } 1091 1092 // RegisterFile::reserve() take a register parameter in the 1093 // range 0-15 (Arm compatible), but on a Mips processor, will 1094 // return the actual allocated register in the range 2-17. 1095 int RegisterAllocator::RegisterFile::reserve(int reg) 1096 { 1097 reg += mRegisterOffset; 1098 LOG_ALWAYS_FATAL_IF(isUsed(reg), 1099 "reserving register %d, but already in use", 1100 reg); 1101 mRegs |= (1<<reg); 1102 mTouched |= mRegs; 1103 return reg; 1104 } 1105 1106 // This interface uses regMask in range 2-17 on MIPS, no translation. 1107 void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask) 1108 { 1109 mRegs |= regMask; 1110 mTouched |= regMask; 1111 } 1112 1113 int RegisterAllocator::RegisterFile::isUsed(int reg) const 1114 { 1115 LOG_ALWAYS_FATAL_IF(reg>=16+(int)mRegisterOffset, "invalid register %d", reg); 1116 return mRegs & (1<<reg); 1117 } 1118 1119 int RegisterAllocator::RegisterFile::obtain() 1120 { 1121 const char priorityList[14] = { 0, 1, 2, 3, 1122 12, 14, 4, 5, 1123 6, 7, 8, 9, 1124 10, 11 }; 1125 const int nbreg = sizeof(priorityList); 1126 int i, r, reg; 1127 for (i=0 ; i<nbreg ; i++) { 1128 r = priorityList[i]; 1129 if (!isUsed(r + mRegisterOffset)) { 1130 break; 1131 } 1132 } 1133 // this is not an error anymore because, we'll try again with 1134 // a lower optimization level. 1135 //ALOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n"); 1136 if (i >= nbreg) { 1137 mStatus |= OUT_OF_REGISTERS; 1138 // we return SP so we can more easily debug things 1139 // the code will never be run anyway. 1140 return ARMAssemblerInterface::SP; 1141 } 1142 reg = reserve(r); // Param in Arm range 0-15, returns range 2-17 on Mips. 1143 return reg; 1144 } 1145 1146 bool RegisterAllocator::RegisterFile::hasFreeRegs() const 1147 { 1148 uint32_t regs = mRegs >> mRegisterOffset; // MIPS fix. 1149 return ((regs & 0xFFFF) == 0xFFFF) ? false : true; 1150 } 1151 1152 int RegisterAllocator::RegisterFile::countFreeRegs() const 1153 { 1154 uint32_t regs = mRegs >> mRegisterOffset; // MIPS fix. 1155 int f = ~regs & 0xFFFF; 1156 // now count number of 1 1157 f = (f & 0x5555) + ((f>>1) & 0x5555); 1158 f = (f & 0x3333) + ((f>>2) & 0x3333); 1159 f = (f & 0x0F0F) + ((f>>4) & 0x0F0F); 1160 f = (f & 0x00FF) + ((f>>8) & 0x00FF); 1161 return f; 1162 } 1163 1164 void RegisterAllocator::RegisterFile::recycle(int reg) 1165 { 1166 // commented out, since common failure of running out of regs 1167 // triggers this assertion. Since the code is not execectued 1168 // in that case, it does not matter. No reason to FATAL err. 1169 // LOG_FATAL_IF(!isUsed(reg), 1170 // "recycling unallocated register %d", 1171 // reg); 1172 mRegs &= ~(1<<reg); 1173 } 1174 1175 void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask) 1176 { 1177 // commented out, since common failure of running out of regs 1178 // triggers this assertion. Since the code is not execectued 1179 // in that case, it does not matter. No reason to FATAL err. 1180 // LOG_FATAL_IF((mRegs & regMask)!=regMask, 1181 // "recycling unallocated registers " 1182 // "(recycle=%08x, allocated=%08x, unallocated=%08x)", 1183 // regMask, mRegs, mRegs®Mask); 1184 mRegs &= ~regMask; 1185 } 1186 1187 uint32_t RegisterAllocator::RegisterFile::touched() const 1188 { 1189 return mTouched; 1190 } 1191 1192 // ---------------------------------------------------------------------------- 1193 1194 }; // namespace android 1195 1196