1 /* libs/pixelflinger/codeflinger/GGLAssembler.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #define LOG_TAG "GGLAssembler" 19 20 #include <assert.h> 21 #include <stdint.h> 22 #include <stdlib.h> 23 #include <stdio.h> 24 #include <sys/types.h> 25 #include <cutils/log.h> 26 27 #include "GGLAssembler.h" 28 29 namespace android { 30 31 // ---------------------------------------------------------------------------- 32 33 GGLAssembler::GGLAssembler(ARMAssemblerInterface* target) 34 : ARMAssemblerProxy(target), 35 RegisterAllocator(ARMAssemblerProxy::getCodegenArch()), mOptLevel(7) 36 { 37 } 38 39 GGLAssembler::~GGLAssembler() 40 { 41 } 42 43 void GGLAssembler::prolog() 44 { 45 ARMAssemblerProxy::prolog(); 46 } 47 48 void GGLAssembler::epilog(uint32_t touched) 49 { 50 ARMAssemblerProxy::epilog(touched); 51 } 52 53 void GGLAssembler::reset(int opt_level) 54 { 55 ARMAssemblerProxy::reset(); 56 RegisterAllocator::reset(); 57 mOptLevel = opt_level; 58 } 59 60 // --------------------------------------------------------------------------- 61 62 int GGLAssembler::scanline(const needs_t& needs, context_t const* c) 63 { 64 int err = 0; 65 int opt_level = mOptLevel; 66 while (opt_level >= 0) { 67 reset(opt_level); 68 err = scanline_core(needs, c); 69 if (err == 0) 70 break; 71 opt_level--; 72 } 73 74 // XXX: in theory, pcForLabel is not valid before generate() 75 uint32_t* fragment_start_pc = pcForLabel("fragment_loop"); 76 uint32_t* fragment_end_pc = pcForLabel("epilog"); 77 const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc); 78 79 // build a name for our pipeline 80 char name[64]; 81 sprintf(name, 82 "scanline__%08X:%08X_%08X_%08X [%3d ipp]", 83 needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops); 84 85 if (err) { 86 ALOGE("Error while generating ""%s""\n", name); 87 disassemble(name); 88 return -1; 89 } 90 91 return generate(name); 92 } 93 94 int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c) 95 { 96 int64_t duration = ggl_system_time(); 97 98 mBlendFactorCached = 0; 99 mBlending = 0; 100 mMasking = 0; 101 mAA = GGL_READ_NEEDS(P_AA, needs.p); 102 mDithering = GGL_READ_NEEDS(P_DITHER, needs.p); 103 mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER; 104 mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER; 105 mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0; 106 mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0; 107 mBuilderContext.needs = needs; 108 mBuilderContext.c = c; 109 mBuilderContext.Rctx = reserveReg(R0); // context always in R0 110 mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ]; 111 112 // ------------------------------------------------------------------------ 113 114 decodeLogicOpNeeds(needs); 115 116 decodeTMUNeeds(needs, c); 117 118 mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n)); 119 mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n)); 120 mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n)); 121 mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n)); 122 123 if (!mCbFormat.c[GGLFormat::ALPHA].h) { 124 if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) || 125 (mBlendSrc == GGL_DST_ALPHA)) { 126 mBlendSrc = GGL_ONE; 127 } 128 if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) || 129 (mBlendSrcA == GGL_DST_ALPHA)) { 130 mBlendSrcA = GGL_ONE; 131 } 132 if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) || 133 (mBlendDst == GGL_DST_ALPHA)) { 134 mBlendDst = GGL_ONE; 135 } 136 if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) || 137 (mBlendDstA == GGL_DST_ALPHA)) { 138 mBlendDstA = GGL_ONE; 139 } 140 } 141 142 // if we need the framebuffer, read it now 143 const int blending = blending_codes(mBlendSrc, mBlendDst) | 144 blending_codes(mBlendSrcA, mBlendDstA); 145 146 // XXX: handle special cases, destination not modified... 147 if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) && 148 (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) { 149 // Destination unmodified (beware of logic ops) 150 } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) && 151 (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) { 152 // Destination is zero (beware of logic ops) 153 } 154 155 int fbComponents = 0; 156 const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n); 157 for (int i=0 ; i<4 ; i++) { 158 const int mask = 1<<i; 159 component_info_t& info = mInfo[i]; 160 int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; 161 int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; 162 if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA) 163 fs = GGL_ONE; 164 info.masked = !!(masking & mask); 165 info.inDest = !info.masked && mCbFormat.c[i].h && 166 ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp)); 167 if (mCbFormat.components >= GGL_LUMINANCE && 168 (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) { 169 info.inDest = false; 170 } 171 info.needed = (i==GGLFormat::ALPHA) && 172 (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS); 173 info.replaced = !!(mTextureMachine.replaced & mask); 174 info.iterated = (!info.replaced && (info.inDest || info.needed)); 175 info.smooth = mSmooth && info.iterated; 176 info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA); 177 info.blend = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO)); 178 179 mBlending |= (info.blend ? mask : 0); 180 mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0; 181 fbComponents |= mCbFormat.c[i].h ? mask : 0; 182 } 183 184 mAllMasked = (mMasking == fbComponents); 185 if (mAllMasked) { 186 mDithering = 0; 187 } 188 189 fragment_parts_t parts; 190 191 // ------------------------------------------------------------------------ 192 prolog(); 193 // ------------------------------------------------------------------------ 194 195 build_scanline_prolog(parts, needs); 196 197 if (registerFile().status()) 198 return registerFile().status(); 199 200 // ------------------------------------------------------------------------ 201 label("fragment_loop"); 202 // ------------------------------------------------------------------------ 203 { 204 Scratch regs(registerFile()); 205 206 if (mDithering) { 207 // update the dither index. 208 MOV(AL, 0, parts.count.reg, 209 reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT)); 210 ADD(AL, 0, parts.count.reg, parts.count.reg, 211 imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT))); 212 MOV(AL, 0, parts.count.reg, 213 reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT)); 214 } 215 216 // XXX: could we do an early alpha-test here in some cases? 217 // It would probaly be used only with smooth-alpha and no texture 218 // (or no alpha component in the texture). 219 220 // Early z-test 221 if (mAlphaTest==GGL_ALWAYS) { 222 build_depth_test(parts, Z_TEST|Z_WRITE); 223 } else { 224 // we cannot do the z-write here, because 225 // it might be killed by the alpha-test later 226 build_depth_test(parts, Z_TEST); 227 } 228 229 { // texture coordinates 230 Scratch scratches(registerFile()); 231 232 // texel generation 233 build_textures(parts, regs); 234 if (registerFile().status()) 235 return registerFile().status(); 236 } 237 238 if ((blending & (FACTOR_DST|BLEND_DST)) || 239 (mMasking && !mAllMasked) || 240 (mLogicOp & LOGIC_OP_DST)) 241 { 242 // blending / logic_op / masking need the framebuffer 243 mDstPixel.setTo(regs.obtain(), &mCbFormat); 244 245 // load the framebuffer pixel 246 comment("fetch color-buffer"); 247 load(parts.cbPtr, mDstPixel); 248 } 249 250 if (registerFile().status()) 251 return registerFile().status(); 252 253 pixel_t pixel; 254 int directTex = mTextureMachine.directTexture; 255 if (directTex | parts.packed) { 256 // note: we can't have both here 257 // iterated color or direct texture 258 pixel = directTex ? parts.texel[directTex-1] : parts.iterated; 259 pixel.flags &= ~CORRUPTIBLE; 260 } else { 261 if (mDithering) { 262 const int ctxtReg = mBuilderContext.Rctx; 263 const int mask = GGL_DITHER_SIZE-1; 264 parts.dither = reg_t(regs.obtain()); 265 AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask)); 266 ADDR_ADD(AL, 0, parts.dither.reg, ctxtReg, parts.dither.reg); 267 LDRB(AL, parts.dither.reg, parts.dither.reg, 268 immed12_pre(GGL_OFFSETOF(ditherMatrix))); 269 } 270 271 // allocate a register for the resulting pixel 272 pixel.setTo(regs.obtain(), &mCbFormat, FIRST); 273 274 build_component(pixel, parts, GGLFormat::ALPHA, regs); 275 276 if (mAlphaTest!=GGL_ALWAYS) { 277 // only handle the z-write part here. We know z-test 278 // was successful, as well as alpha-test. 279 build_depth_test(parts, Z_WRITE); 280 } 281 282 build_component(pixel, parts, GGLFormat::RED, regs); 283 build_component(pixel, parts, GGLFormat::GREEN, regs); 284 build_component(pixel, parts, GGLFormat::BLUE, regs); 285 286 pixel.flags |= CORRUPTIBLE; 287 } 288 289 if (registerFile().status()) 290 return registerFile().status(); 291 292 if (pixel.reg == -1) { 293 // be defensive here. if we're here it's probably 294 // that this whole fragment is a no-op. 295 pixel = mDstPixel; 296 } 297 298 if (!mAllMasked) { 299 // logic operation 300 build_logic_op(pixel, regs); 301 302 // masking 303 build_masking(pixel, regs); 304 305 comment("store"); 306 store(parts.cbPtr, pixel, WRITE_BACK); 307 } 308 } 309 310 if (registerFile().status()) 311 return registerFile().status(); 312 313 // update the iterated color... 314 if (parts.reload != 3) { 315 build_smooth_shade(parts); 316 } 317 318 // update iterated z 319 build_iterate_z(parts); 320 321 // update iterated fog 322 build_iterate_f(parts); 323 324 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16)); 325 B(PL, "fragment_loop"); 326 label("epilog"); 327 epilog(registerFile().touched()); 328 329 if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) { 330 if (mDepthTest!=GGL_ALWAYS) { 331 label("discard_before_textures"); 332 build_iterate_texture_coordinates(parts); 333 } 334 label("discard_after_textures"); 335 build_smooth_shade(parts); 336 build_iterate_z(parts); 337 build_iterate_f(parts); 338 if (!mAllMasked) { 339 ADDR_ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3)); 340 } 341 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16)); 342 B(PL, "fragment_loop"); 343 epilog(registerFile().touched()); 344 } 345 346 return registerFile().status(); 347 } 348 349 // --------------------------------------------------------------------------- 350 351 void GGLAssembler::build_scanline_prolog( 352 fragment_parts_t& parts, const needs_t& needs) 353 { 354 Scratch scratches(registerFile()); 355 int Rctx = mBuilderContext.Rctx; 356 357 // compute count 358 comment("compute ct (# of pixels to process)"); 359 parts.count.setTo(obtainReg()); 360 int Rx = scratches.obtain(); 361 int Ry = scratches.obtain(); 362 CONTEXT_LOAD(Rx, iterators.xl); 363 CONTEXT_LOAD(parts.count.reg, iterators.xr); 364 CONTEXT_LOAD(Ry, iterators.y); 365 366 // parts.count = iterators.xr - Rx 367 SUB(AL, 0, parts.count.reg, parts.count.reg, Rx); 368 SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1)); 369 370 if (mDithering) { 371 // parts.count.reg = 0xNNNNXXDD 372 // NNNN = count-1 373 // DD = dither offset 374 // XX = 0xxxxxxx (x = garbage) 375 Scratch scratches(registerFile()); 376 int tx = scratches.obtain(); 377 int ty = scratches.obtain(); 378 AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK)); 379 AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK)); 380 ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT)); 381 ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16)); 382 } else { 383 // parts.count.reg = 0xNNNN0000 384 // NNNN = count-1 385 MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16)); 386 } 387 388 if (!mAllMasked) { 389 // compute dst ptr 390 comment("compute color-buffer pointer"); 391 const int cb_bits = mCbFormat.size*8; 392 int Rs = scratches.obtain(); 393 parts.cbPtr.setTo(obtainReg(), cb_bits); 394 CONTEXT_LOAD(Rs, state.buffers.color.stride); 395 CONTEXT_ADDR_LOAD(parts.cbPtr.reg, state.buffers.color.data); 396 SMLABB(AL, Rs, Ry, Rs, Rx); // Rs = Rx + Ry*Rs 397 base_offset(parts.cbPtr, parts.cbPtr, Rs); 398 scratches.recycle(Rs); 399 } 400 401 // init fog 402 const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p); 403 if (need_fog) { 404 comment("compute initial fog coordinate"); 405 Scratch scratches(registerFile()); 406 int dfdx = scratches.obtain(); 407 int ydfdy = scratches.obtain(); 408 int f = ydfdy; 409 CONTEXT_LOAD(dfdx, generated_vars.dfdx); 410 CONTEXT_LOAD(ydfdy, iterators.ydfdy); 411 MLA(AL, 0, f, Rx, dfdx, ydfdy); 412 CONTEXT_STORE(f, generated_vars.f); 413 } 414 415 // init Z coordinate 416 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) { 417 parts.z = reg_t(obtainReg()); 418 comment("compute initial Z coordinate"); 419 Scratch scratches(registerFile()); 420 int dzdx = scratches.obtain(); 421 int ydzdy = parts.z.reg; 422 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point 423 CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point 424 MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy); 425 426 // we're going to index zbase of parts.count 427 // zbase = base + (xl-count + stride*y)*2 428 int Rs = dzdx; 429 int zbase = scratches.obtain(); 430 CONTEXT_LOAD(Rs, state.buffers.depth.stride); 431 CONTEXT_ADDR_LOAD(zbase, state.buffers.depth.data); 432 SMLABB(AL, Rs, Ry, Rs, Rx); 433 ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16)); 434 ADDR_ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1)); 435 CONTEXT_ADDR_STORE(zbase, generated_vars.zbase); 436 } 437 438 // init texture coordinates 439 init_textures(parts.coords, reg_t(Rx), reg_t(Ry)); 440 scratches.recycle(Ry); 441 442 // iterated color 443 init_iterated_color(parts, reg_t(Rx)); 444 445 // init coverage factor application (anti-aliasing) 446 if (mAA) { 447 parts.covPtr.setTo(obtainReg(), 16); 448 CONTEXT_ADDR_LOAD(parts.covPtr.reg, state.buffers.coverage); 449 ADDR_ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1)); 450 } 451 } 452 453 // --------------------------------------------------------------------------- 454 455 void GGLAssembler::build_component( pixel_t& pixel, 456 const fragment_parts_t& parts, 457 int component, 458 Scratch& regs) 459 { 460 static char const * comments[] = {"alpha", "red", "green", "blue"}; 461 comment(comments[component]); 462 463 // local register file 464 Scratch scratches(registerFile()); 465 const int dst_component_size = pixel.component_size(component); 466 467 component_t temp(-1); 468 build_incoming_component( temp, dst_component_size, 469 parts, component, scratches, regs); 470 471 if (mInfo[component].inDest) { 472 473 // blending... 474 build_blending( temp, mDstPixel, component, scratches ); 475 476 // downshift component and rebuild pixel... 477 downshift(pixel, component, temp, parts.dither); 478 } 479 } 480 481 void GGLAssembler::build_incoming_component( 482 component_t& temp, 483 int dst_size, 484 const fragment_parts_t& parts, 485 int component, 486 Scratch& scratches, 487 Scratch& global_regs) 488 { 489 const uint32_t component_mask = 1<<component; 490 491 // Figure out what we need for the blending stage... 492 int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; 493 int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; 494 if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) { 495 fs = GGL_ONE; 496 } 497 498 // Figure out what we need to extract and for what reason 499 const int blending = blending_codes(fs, fd); 500 501 // Are we actually going to blend? 502 const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO)); 503 504 // expand the source if the destination has more bits 505 int need_expander = false; 506 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) { 507 texture_unit_t& tmu = mTextureMachine.tmu[i]; 508 if ((tmu.format_idx) && 509 (parts.texel[i].component_size(component) < dst_size)) { 510 need_expander = true; 511 } 512 } 513 514 // do we need to extract this component? 515 const bool multiTexture = mTextureMachine.activeUnits > 1; 516 const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) && 517 (isAlphaSourceNeeded()); 518 int need_extract = mInfo[component].needed; 519 if (mInfo[component].inDest) 520 { 521 need_extract |= ((need_blending ? 522 (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander)); 523 need_extract |= (mTextureMachine.mask != mTextureMachine.replaced); 524 need_extract |= mInfo[component].smooth; 525 need_extract |= mInfo[component].fog; 526 need_extract |= mDithering; 527 need_extract |= multiTexture; 528 } 529 530 if (need_extract) { 531 Scratch& regs = blend_needs_alpha_source ? global_regs : scratches; 532 component_t fragment; 533 534 // iterated color 535 build_iterated_color(fragment, parts, component, regs); 536 537 // texture environement (decal, modulate, replace) 538 build_texture_environment(fragment, parts, component, regs); 539 540 // expand the source if the destination has more bits 541 if (need_expander && (fragment.size() < dst_size)) { 542 // we're here only if we fetched a texel 543 // (so we know for sure fragment is CORRUPTIBLE) 544 expand(fragment, fragment, dst_size); 545 } 546 547 // We have a few specific things to do for the alpha-channel 548 if ((component==GGLFormat::ALPHA) && 549 (mInfo[component].needed || fragment.size()<dst_size)) 550 { 551 // convert to integer_t first and make sure 552 // we don't corrupt a needed register 553 if (fragment.l) { 554 component_t incoming(fragment); 555 modify(fragment, regs); 556 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l)); 557 fragment.h -= fragment.l; 558 fragment.l = 0; 559 } 560 561 // coverage factor application 562 build_coverage_application(fragment, parts, regs); 563 564 // alpha-test 565 build_alpha_test(fragment, parts); 566 567 if (blend_needs_alpha_source) { 568 // We keep only 8 bits for the blending stage 569 const int shift = fragment.h <= 8 ? 0 : fragment.h-8; 570 if (fragment.flags & CORRUPTIBLE) { 571 fragment.flags &= ~CORRUPTIBLE; 572 mAlphaSource.setTo(fragment.reg, 573 fragment.size(), fragment.flags); 574 if (shift) { 575 MOV(AL, 0, mAlphaSource.reg, 576 reg_imm(mAlphaSource.reg, LSR, shift)); 577 } 578 } else { 579 // XXX: it would better to do this in build_blend_factor() 580 // so we can avoid the extra MOV below. 581 mAlphaSource.setTo(regs.obtain(), 582 fragment.size(), CORRUPTIBLE); 583 if (shift) { 584 MOV(AL, 0, mAlphaSource.reg, 585 reg_imm(fragment.reg, LSR, shift)); 586 } else { 587 MOV(AL, 0, mAlphaSource.reg, fragment.reg); 588 } 589 } 590 mAlphaSource.s -= shift; 591 } 592 } 593 594 // fog... 595 build_fog( fragment, component, regs ); 596 597 temp = fragment; 598 } else { 599 if (mInfo[component].inDest) { 600 // extraction not needed and replace 601 // we just select the right component 602 if ((mTextureMachine.replaced & component_mask) == 0) { 603 // component wasn't replaced, so use it! 604 temp = component_t(parts.iterated, component); 605 } 606 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) { 607 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 608 if ((tmu.mask & component_mask) && 609 ((tmu.replaced & component_mask) == 0)) { 610 temp = component_t(parts.texel[i], component); 611 } 612 } 613 } 614 } 615 } 616 617 bool GGLAssembler::isAlphaSourceNeeded() const 618 { 619 // XXX: also needed for alpha-test 620 const int bs = mBlendSrc; 621 const int bd = mBlendDst; 622 return bs==GGL_SRC_ALPHA_SATURATE || 623 bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA || 624 bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ; 625 } 626 627 // --------------------------------------------------------------------------- 628 629 void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts) 630 { 631 if (mSmooth && !parts.iterated_packed) { 632 // update the iterated color in a pipelined way... 633 comment("update iterated color"); 634 Scratch scratches(registerFile()); 635 636 const int reload = parts.reload; 637 for (int i=0 ; i<4 ; i++) { 638 if (!mInfo[i].iterated) 639 continue; 640 641 int c = parts.argb[i].reg; 642 int dx = parts.argb_dx[i].reg; 643 644 if (reload & 1) { 645 c = scratches.obtain(); 646 CONTEXT_LOAD(c, generated_vars.argb[i].c); 647 } 648 if (reload & 2) { 649 dx = scratches.obtain(); 650 CONTEXT_LOAD(dx, generated_vars.argb[i].dx); 651 } 652 653 if (mSmooth) { 654 ADD(AL, 0, c, c, dx); 655 } 656 657 if (reload & 1) { 658 CONTEXT_STORE(c, generated_vars.argb[i].c); 659 scratches.recycle(c); 660 } 661 if (reload & 2) { 662 scratches.recycle(dx); 663 } 664 } 665 } 666 } 667 668 // --------------------------------------------------------------------------- 669 670 void GGLAssembler::build_coverage_application(component_t& fragment, 671 const fragment_parts_t& parts, Scratch& regs) 672 { 673 // here fragment.l is guarenteed to be 0 674 if (mAA) { 675 // coverages are 1.15 fixed-point numbers 676 comment("coverage application"); 677 678 component_t incoming(fragment); 679 modify(fragment, regs); 680 681 Scratch scratches(registerFile()); 682 int cf = scratches.obtain(); 683 LDRH(AL, cf, parts.covPtr.reg, immed8_post(2)); 684 if (fragment.h > 31) { 685 fragment.h--; 686 SMULWB(AL, fragment.reg, incoming.reg, cf); 687 } else { 688 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1)); 689 SMULWB(AL, fragment.reg, fragment.reg, cf); 690 } 691 } 692 } 693 694 // --------------------------------------------------------------------------- 695 696 void GGLAssembler::build_alpha_test(component_t& fragment, 697 const fragment_parts_t& /*parts*/) 698 { 699 if (mAlphaTest != GGL_ALWAYS) { 700 comment("Alpha Test"); 701 Scratch scratches(registerFile()); 702 int ref = scratches.obtain(); 703 const int shift = GGL_COLOR_BITS-fragment.size(); 704 CONTEXT_LOAD(ref, state.alpha_test.ref); 705 if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift)); 706 else CMP(AL, fragment.reg, ref); 707 int cc = NV; 708 switch (mAlphaTest) { 709 case GGL_NEVER: cc = NV; break; 710 case GGL_LESS: cc = LT; break; 711 case GGL_EQUAL: cc = EQ; break; 712 case GGL_LEQUAL: cc = LS; break; 713 case GGL_GREATER: cc = HI; break; 714 case GGL_NOTEQUAL: cc = NE; break; 715 case GGL_GEQUAL: cc = HS; break; 716 } 717 B(cc^1, "discard_after_textures"); 718 } 719 } 720 721 // --------------------------------------------------------------------------- 722 723 void GGLAssembler::build_depth_test( 724 const fragment_parts_t& parts, uint32_t mask) 725 { 726 mask &= Z_TEST|Z_WRITE; 727 const needs_t& needs = mBuilderContext.needs; 728 const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p); 729 Scratch scratches(registerFile()); 730 731 if (mDepthTest != GGL_ALWAYS || zmask) { 732 int cc=AL, ic=AL; 733 switch (mDepthTest) { 734 case GGL_LESS: ic = HI; break; 735 case GGL_EQUAL: ic = EQ; break; 736 case GGL_LEQUAL: ic = HS; break; 737 case GGL_GREATER: ic = LT; break; 738 case GGL_NOTEQUAL: ic = NE; break; 739 case GGL_GEQUAL: ic = LS; break; 740 case GGL_NEVER: 741 // this never happens, because it's taken care of when 742 // computing the needs. but we keep it for completness. 743 comment("Depth Test (NEVER)"); 744 B(AL, "discard_before_textures"); 745 return; 746 case GGL_ALWAYS: 747 // we're here because zmask is enabled 748 mask &= ~Z_TEST; // test always passes. 749 break; 750 } 751 752 // inverse the condition 753 cc = ic^1; 754 755 if ((mask & Z_WRITE) && !zmask) { 756 mask &= ~Z_WRITE; 757 } 758 759 if (!mask) 760 return; 761 762 comment("Depth Test"); 763 764 int zbase = scratches.obtain(); 765 int depth = scratches.obtain(); 766 int z = parts.z.reg; 767 768 CONTEXT_ADDR_LOAD(zbase, generated_vars.zbase); // stall 769 ADDR_SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15)); 770 // above does zbase = zbase + ((count >> 16) << 1) 771 772 if (mask & Z_TEST) { 773 LDRH(AL, depth, zbase); // stall 774 CMP(AL, depth, reg_imm(z, LSR, 16)); 775 B(cc, "discard_before_textures"); 776 } 777 if (mask & Z_WRITE) { 778 if (mask == Z_WRITE) { 779 // only z-write asked, cc is meaningless 780 ic = AL; 781 } 782 MOV(AL, 0, depth, reg_imm(z, LSR, 16)); 783 STRH(ic, depth, zbase); 784 } 785 } 786 } 787 788 void GGLAssembler::build_iterate_z(const fragment_parts_t& parts) 789 { 790 const needs_t& needs = mBuilderContext.needs; 791 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) { 792 Scratch scratches(registerFile()); 793 int dzdx = scratches.obtain(); 794 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall 795 ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx); 796 } 797 } 798 799 void GGLAssembler::build_iterate_f(const fragment_parts_t& /*parts*/) 800 { 801 const needs_t& needs = mBuilderContext.needs; 802 if (GGL_READ_NEEDS(P_FOG, needs.p)) { 803 Scratch scratches(registerFile()); 804 int dfdx = scratches.obtain(); 805 int f = scratches.obtain(); 806 CONTEXT_LOAD(f, generated_vars.f); 807 CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall 808 ADD(AL, 0, f, f, dfdx); 809 CONTEXT_STORE(f, generated_vars.f); 810 } 811 } 812 813 // --------------------------------------------------------------------------- 814 815 void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs) 816 { 817 const needs_t& needs = mBuilderContext.needs; 818 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; 819 if (opcode == GGL_COPY) 820 return; 821 822 comment("logic operation"); 823 824 pixel_t s(pixel); 825 if (!(pixel.flags & CORRUPTIBLE)) { 826 pixel.reg = regs.obtain(); 827 pixel.flags |= CORRUPTIBLE; 828 } 829 830 pixel_t d(mDstPixel); 831 switch(opcode) { 832 case GGL_CLEAR: MOV(AL, 0, pixel.reg, imm(0)); break; 833 case GGL_AND: AND(AL, 0, pixel.reg, s.reg, d.reg); break; 834 case GGL_AND_REVERSE: BIC(AL, 0, pixel.reg, s.reg, d.reg); break; 835 case GGL_COPY: break; 836 case GGL_AND_INVERTED: BIC(AL, 0, pixel.reg, d.reg, s.reg); break; 837 case GGL_NOOP: MOV(AL, 0, pixel.reg, d.reg); break; 838 case GGL_XOR: EOR(AL, 0, pixel.reg, s.reg, d.reg); break; 839 case GGL_OR: ORR(AL, 0, pixel.reg, s.reg, d.reg); break; 840 case GGL_NOR: ORR(AL, 0, pixel.reg, s.reg, d.reg); 841 MVN(AL, 0, pixel.reg, pixel.reg); break; 842 case GGL_EQUIV: EOR(AL, 0, pixel.reg, s.reg, d.reg); 843 MVN(AL, 0, pixel.reg, pixel.reg); break; 844 case GGL_INVERT: MVN(AL, 0, pixel.reg, d.reg); break; 845 case GGL_OR_REVERSE: // s | ~d == ~(~s & d) 846 BIC(AL, 0, pixel.reg, d.reg, s.reg); 847 MVN(AL, 0, pixel.reg, pixel.reg); break; 848 case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg); break; 849 case GGL_OR_INVERTED: // ~s | d == ~(s & ~d) 850 BIC(AL, 0, pixel.reg, s.reg, d.reg); 851 MVN(AL, 0, pixel.reg, pixel.reg); break; 852 case GGL_NAND: AND(AL, 0, pixel.reg, s.reg, d.reg); 853 MVN(AL, 0, pixel.reg, pixel.reg); break; 854 case GGL_SET: MVN(AL, 0, pixel.reg, imm(0)); break; 855 }; 856 } 857 858 // --------------------------------------------------------------------------- 859 860 static uint32_t find_bottom(uint32_t val) 861 { 862 uint32_t i = 0; 863 while (!(val & (3<<i))) 864 i+= 2; 865 return i; 866 } 867 868 static void normalize(uint32_t& val, uint32_t& rot) 869 { 870 rot = 0; 871 while (!(val&3) || (val & 0xFC000000)) { 872 uint32_t newval; 873 newval = val >> 2; 874 newval |= (val&3) << 30; 875 val = newval; 876 rot += 2; 877 if (rot == 32) { 878 rot = 0; 879 break; 880 } 881 } 882 } 883 884 void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits) 885 { 886 uint32_t rot; 887 uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1; 888 mask &= size; 889 890 if (mask == size) { 891 if (d != s) 892 MOV( AL, 0, d, s); 893 return; 894 } 895 896 if (getCodegenArch() == CODEGEN_ARCH_MIPS) { 897 // MIPS can do 16-bit imm in 1 instr, 32-bit in 3 instr 898 // the below ' while (mask)' code is buggy on mips 899 // since mips returns true on isValidImmediate() 900 // then we get multiple AND instr (positive logic) 901 AND( AL, 0, d, s, imm(mask) ); 902 return; 903 } 904 else if (getCodegenArch() == CODEGEN_ARCH_ARM64) { 905 AND( AL, 0, d, s, imm(mask) ); 906 return; 907 } 908 909 int negative_logic = !isValidImmediate(mask); 910 if (negative_logic) { 911 mask = ~mask & size; 912 } 913 normalize(mask, rot); 914 915 if (mask) { 916 while (mask) { 917 uint32_t bitpos = find_bottom(mask); 918 int shift = rot + bitpos; 919 uint32_t m = mask & (0xff << bitpos); 920 mask &= ~m; 921 m >>= bitpos; 922 int32_t newMask = (m<<shift) | (m>>(32-shift)); 923 if (!negative_logic) { 924 AND( AL, 0, d, s, imm(newMask) ); 925 } else { 926 BIC( AL, 0, d, s, imm(newMask) ); 927 } 928 s = d; 929 } 930 } else { 931 MOV( AL, 0, d, imm(0)); 932 } 933 } 934 935 void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs) 936 { 937 if (!mMasking || mAllMasked) { 938 return; 939 } 940 941 comment("color mask"); 942 943 pixel_t fb(mDstPixel); 944 pixel_t s(pixel); 945 if (!(pixel.flags & CORRUPTIBLE)) { 946 pixel.reg = regs.obtain(); 947 pixel.flags |= CORRUPTIBLE; 948 } 949 950 int mask = 0; 951 for (int i=0 ; i<4 ; i++) { 952 const int component_mask = 1<<i; 953 const int h = fb.format.c[i].h; 954 const int l = fb.format.c[i].l; 955 if (h && (!(mMasking & component_mask))) { 956 mask |= ((1<<(h-l))-1) << l; 957 } 958 } 959 960 // There is no need to clear the masked components of the source 961 // (unless we applied a logic op), because they're already zeroed 962 // by construction (masked components are not computed) 963 964 if (mLogicOp) { 965 const needs_t& needs = mBuilderContext.needs; 966 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; 967 if (opcode != GGL_CLEAR) { 968 // clear masked component of source 969 build_and_immediate(pixel.reg, s.reg, mask, fb.size()); 970 s = pixel; 971 } 972 } 973 974 // clear non masked components of destination 975 build_and_immediate(fb.reg, fb.reg, ~mask, fb.size()); 976 977 // or back the channels that were masked 978 if (s.reg == fb.reg) { 979 // this is in fact a MOV 980 if (s.reg == pixel.reg) { 981 // ugh. this in in fact a nop 982 } else { 983 MOV(AL, 0, pixel.reg, fb.reg); 984 } 985 } else { 986 ORR(AL, 0, pixel.reg, s.reg, fb.reg); 987 } 988 } 989 990 // --------------------------------------------------------------------------- 991 992 void GGLAssembler::base_offset( 993 const pointer_t& d, const pointer_t& b, const reg_t& o) 994 { 995 switch (b.size) { 996 case 32: 997 ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2)); 998 break; 999 case 24: 1000 if (d.reg == b.reg) { 1001 ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); 1002 ADDR_ADD(AL, 0, d.reg, d.reg, o.reg); 1003 } else { 1004 ADDR_ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1)); 1005 ADDR_ADD(AL, 0, d.reg, d.reg, b.reg); 1006 } 1007 break; 1008 case 16: 1009 ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); 1010 break; 1011 case 8: 1012 ADDR_ADD(AL, 0, d.reg, b.reg, o.reg); 1013 break; 1014 } 1015 } 1016 1017 // ---------------------------------------------------------------------------- 1018 // cheezy register allocator... 1019 // ---------------------------------------------------------------------------- 1020 1021 // Modified to support MIPS processors, in a very simple way. We retain the 1022 // (Arm) limit of 16 total registers, but shift the mapping of those registers 1023 // from 0-15, to 2-17. Register 0 on Mips cannot be used as GP registers, and 1024 // register 1 has a traditional use as a temp). 1025 1026 RegisterAllocator::RegisterAllocator(int arch) : mRegs(arch) 1027 { 1028 } 1029 1030 void RegisterAllocator::reset() 1031 { 1032 mRegs.reset(); 1033 } 1034 1035 int RegisterAllocator::reserveReg(int reg) 1036 { 1037 return mRegs.reserve(reg); 1038 } 1039 1040 int RegisterAllocator::obtainReg() 1041 { 1042 return mRegs.obtain(); 1043 } 1044 1045 void RegisterAllocator::recycleReg(int reg) 1046 { 1047 mRegs.recycle(reg); 1048 } 1049 1050 RegisterAllocator::RegisterFile& RegisterAllocator::registerFile() 1051 { 1052 return mRegs; 1053 } 1054 1055 // ---------------------------------------------------------------------------- 1056 1057 RegisterAllocator::RegisterFile::RegisterFile(int codegen_arch) 1058 : mRegs(0), mTouched(0), mStatus(0), mArch(codegen_arch), mRegisterOffset(0) 1059 { 1060 if (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) { 1061 mRegisterOffset = 2; // ARM has regs 0..15, MIPS offset to 2..17 1062 } 1063 reserve(ARMAssemblerInterface::SP); 1064 reserve(ARMAssemblerInterface::PC); 1065 } 1066 1067 RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs, int codegen_arch) 1068 : mRegs(rhs.mRegs), mTouched(rhs.mTouched), mArch(codegen_arch), mRegisterOffset(0) 1069 { 1070 if (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) { 1071 mRegisterOffset = 2; // ARM has regs 0..15, MIPS offset to 2..17 1072 } 1073 } 1074 1075 RegisterAllocator::RegisterFile::~RegisterFile() 1076 { 1077 } 1078 1079 bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const 1080 { 1081 return (mRegs == rhs.mRegs); 1082 } 1083 1084 void RegisterAllocator::RegisterFile::reset() 1085 { 1086 mRegs = mTouched = mStatus = 0; 1087 reserve(ARMAssemblerInterface::SP); 1088 reserve(ARMAssemblerInterface::PC); 1089 } 1090 1091 // RegisterFile::reserve() take a register parameter in the 1092 // range 0-15 (Arm compatible), but on a Mips processor, will 1093 // return the actual allocated register in the range 2-17. 1094 int RegisterAllocator::RegisterFile::reserve(int reg) 1095 { 1096 reg += mRegisterOffset; 1097 LOG_ALWAYS_FATAL_IF(isUsed(reg), 1098 "reserving register %d, but already in use", 1099 reg); 1100 mRegs |= (1<<reg); 1101 mTouched |= mRegs; 1102 return reg; 1103 } 1104 1105 // This interface uses regMask in range 2-17 on MIPS, no translation. 1106 void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask) 1107 { 1108 mRegs |= regMask; 1109 mTouched |= regMask; 1110 } 1111 1112 int RegisterAllocator::RegisterFile::isUsed(int reg) const 1113 { 1114 LOG_ALWAYS_FATAL_IF(reg>=16+(int)mRegisterOffset, "invalid register %d", reg); 1115 return mRegs & (1<<reg); 1116 } 1117 1118 int RegisterAllocator::RegisterFile::obtain() 1119 { 1120 const char priorityList[14] = { 0, 1, 2, 3, 1121 12, 14, 4, 5, 1122 6, 7, 8, 9, 1123 10, 11 }; 1124 const int nbreg = sizeof(priorityList); 1125 int i, r, reg; 1126 for (i=0 ; i<nbreg ; i++) { 1127 r = priorityList[i]; 1128 if (!isUsed(r + mRegisterOffset)) { 1129 break; 1130 } 1131 } 1132 // this is not an error anymore because, we'll try again with 1133 // a lower optimization level. 1134 //ALOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n"); 1135 if (i >= nbreg) { 1136 mStatus |= OUT_OF_REGISTERS; 1137 // we return SP so we can more easily debug things 1138 // the code will never be run anyway. 1139 return ARMAssemblerInterface::SP; 1140 } 1141 reg = reserve(r); // Param in Arm range 0-15, returns range 2-17 on Mips. 1142 return reg; 1143 } 1144 1145 bool RegisterAllocator::RegisterFile::hasFreeRegs() const 1146 { 1147 uint32_t regs = mRegs >> mRegisterOffset; // MIPS fix. 1148 return ((regs & 0xFFFF) == 0xFFFF) ? false : true; 1149 } 1150 1151 int RegisterAllocator::RegisterFile::countFreeRegs() const 1152 { 1153 uint32_t regs = mRegs >> mRegisterOffset; // MIPS fix. 1154 int f = ~regs & 0xFFFF; 1155 // now count number of 1 1156 f = (f & 0x5555) + ((f>>1) & 0x5555); 1157 f = (f & 0x3333) + ((f>>2) & 0x3333); 1158 f = (f & 0x0F0F) + ((f>>4) & 0x0F0F); 1159 f = (f & 0x00FF) + ((f>>8) & 0x00FF); 1160 return f; 1161 } 1162 1163 void RegisterAllocator::RegisterFile::recycle(int reg) 1164 { 1165 // commented out, since common failure of running out of regs 1166 // triggers this assertion. Since the code is not execectued 1167 // in that case, it does not matter. No reason to FATAL err. 1168 // LOG_FATAL_IF(!isUsed(reg), 1169 // "recycling unallocated register %d", 1170 // reg); 1171 mRegs &= ~(1<<reg); 1172 } 1173 1174 void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask) 1175 { 1176 // commented out, since common failure of running out of regs 1177 // triggers this assertion. Since the code is not execectued 1178 // in that case, it does not matter. No reason to FATAL err. 1179 // LOG_FATAL_IF((mRegs & regMask)!=regMask, 1180 // "recycling unallocated registers " 1181 // "(recycle=%08x, allocated=%08x, unallocated=%08x)", 1182 // regMask, mRegs, mRegs®Mask); 1183 mRegs &= ~regMask; 1184 } 1185 1186 uint32_t RegisterAllocator::RegisterFile::touched() const 1187 { 1188 return mTouched; 1189 } 1190 1191 // ---------------------------------------------------------------------------- 1192 1193 }; // namespace android 1194 1195