1 /* libs/pixelflinger/codeflinger/GGLAssembler.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #define LOG_TAG "GGLAssembler" 19 20 #include <assert.h> 21 #include <stdint.h> 22 #include <stdio.h> 23 #include <stdlib.h> 24 #include <sys/types.h> 25 26 #include <log/log.h> 27 28 #include "GGLAssembler.h" 29 30 namespace android { 31 32 // ---------------------------------------------------------------------------- 33 34 GGLAssembler::GGLAssembler(ARMAssemblerInterface* target) 35 : ARMAssemblerProxy(target), 36 RegisterAllocator(ARMAssemblerProxy::getCodegenArch()), mOptLevel(7) 37 { 38 } 39 40 GGLAssembler::~GGLAssembler() 41 { 42 } 43 44 void GGLAssembler::prolog() 45 { 46 ARMAssemblerProxy::prolog(); 47 } 48 49 void GGLAssembler::epilog(uint32_t touched) 50 { 51 ARMAssemblerProxy::epilog(touched); 52 } 53 54 void GGLAssembler::reset(int opt_level) 55 { 56 ARMAssemblerProxy::reset(); 57 RegisterAllocator::reset(); 58 mOptLevel = opt_level; 59 } 60 61 // --------------------------------------------------------------------------- 62 63 int GGLAssembler::scanline(const needs_t& needs, context_t const* c) 64 { 65 int err = 0; 66 int opt_level = mOptLevel; 67 while (opt_level >= 0) { 68 reset(opt_level); 69 err = scanline_core(needs, c); 70 if (err == 0) 71 break; 72 opt_level--; 73 } 74 75 // XXX: in theory, pcForLabel is not valid before generate() 76 uint32_t* fragment_start_pc = pcForLabel("fragment_loop"); 77 uint32_t* fragment_end_pc = pcForLabel("epilog"); 78 const int per_fragment_ops = int(fragment_end_pc - fragment_start_pc); 79 80 // build a name for our pipeline 81 char name[64]; 82 sprintf(name, 83 "scanline__%08X:%08X_%08X_%08X [%3d ipp]", 84 needs.p, needs.n, needs.t[0], needs.t[1], per_fragment_ops); 85 86 if (err) { 87 ALOGE("Error while generating ""%s""\n", name); 88 disassemble(name); 89 return -1; 90 } 91 92 return generate(name); 93 } 94 95 int GGLAssembler::scanline_core(const needs_t& needs, context_t const* c) 96 { 97 int64_t duration = ggl_system_time(); 98 99 mBlendFactorCached = 0; 100 mBlending = 0; 101 mMasking = 0; 102 mAA = GGL_READ_NEEDS(P_AA, needs.p); 103 mDithering = GGL_READ_NEEDS(P_DITHER, needs.p); 104 mAlphaTest = GGL_READ_NEEDS(P_ALPHA_TEST, needs.p) + GGL_NEVER; 105 mDepthTest = GGL_READ_NEEDS(P_DEPTH_TEST, needs.p) + GGL_NEVER; 106 mFog = GGL_READ_NEEDS(P_FOG, needs.p) != 0; 107 mSmooth = GGL_READ_NEEDS(SHADE, needs.n) != 0; 108 mBuilderContext.needs = needs; 109 mBuilderContext.c = c; 110 mBuilderContext.Rctx = reserveReg(R0); // context always in R0 111 mCbFormat = c->formats[ GGL_READ_NEEDS(CB_FORMAT, needs.n) ]; 112 113 // ------------------------------------------------------------------------ 114 115 decodeLogicOpNeeds(needs); 116 117 decodeTMUNeeds(needs, c); 118 119 mBlendSrc = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRC, needs.n)); 120 mBlendDst = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DST, needs.n)); 121 mBlendSrcA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_SRCA, needs.n)); 122 mBlendDstA = ggl_needs_to_blendfactor(GGL_READ_NEEDS(BLEND_DSTA, needs.n)); 123 124 if (!mCbFormat.c[GGLFormat::ALPHA].h) { 125 if ((mBlendSrc == GGL_ONE_MINUS_DST_ALPHA) || 126 (mBlendSrc == GGL_DST_ALPHA)) { 127 mBlendSrc = GGL_ONE; 128 } 129 if ((mBlendSrcA == GGL_ONE_MINUS_DST_ALPHA) || 130 (mBlendSrcA == GGL_DST_ALPHA)) { 131 mBlendSrcA = GGL_ONE; 132 } 133 if ((mBlendDst == GGL_ONE_MINUS_DST_ALPHA) || 134 (mBlendDst == GGL_DST_ALPHA)) { 135 mBlendDst = GGL_ONE; 136 } 137 if ((mBlendDstA == GGL_ONE_MINUS_DST_ALPHA) || 138 (mBlendDstA == GGL_DST_ALPHA)) { 139 mBlendDstA = GGL_ONE; 140 } 141 } 142 143 // if we need the framebuffer, read it now 144 const int blending = blending_codes(mBlendSrc, mBlendDst) | 145 blending_codes(mBlendSrcA, mBlendDstA); 146 147 // XXX: handle special cases, destination not modified... 148 if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) && 149 (mBlendDst==GGL_ONE) && (mBlendDstA==GGL_ONE)) { 150 // Destination unmodified (beware of logic ops) 151 } else if ((mBlendSrc==GGL_ZERO) && (mBlendSrcA==GGL_ZERO) && 152 (mBlendDst==GGL_ZERO) && (mBlendDstA==GGL_ZERO)) { 153 // Destination is zero (beware of logic ops) 154 } 155 156 int fbComponents = 0; 157 const int masking = GGL_READ_NEEDS(MASK_ARGB, needs.n); 158 for (int i=0 ; i<4 ; i++) { 159 const int mask = 1<<i; 160 component_info_t& info = mInfo[i]; 161 int fs = i==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; 162 int fd = i==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; 163 if (fs==GGL_SRC_ALPHA_SATURATE && i==GGLFormat::ALPHA) 164 fs = GGL_ONE; 165 info.masked = !!(masking & mask); 166 info.inDest = !info.masked && mCbFormat.c[i].h && 167 ((mLogicOp & LOGIC_OP_SRC) || (!mLogicOp)); 168 if (mCbFormat.components >= GGL_LUMINANCE && 169 (i==GGLFormat::GREEN || i==GGLFormat::BLUE)) { 170 info.inDest = false; 171 } 172 info.needed = (i==GGLFormat::ALPHA) && 173 (isAlphaSourceNeeded() || mAlphaTest != GGL_ALWAYS); 174 info.replaced = !!(mTextureMachine.replaced & mask); 175 info.iterated = (!info.replaced && (info.inDest || info.needed)); 176 info.smooth = mSmooth && info.iterated; 177 info.fog = mFog && info.inDest && (i != GGLFormat::ALPHA); 178 info.blend = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO)); 179 180 mBlending |= (info.blend ? mask : 0); 181 mMasking |= (mCbFormat.c[i].h && info.masked) ? mask : 0; 182 fbComponents |= mCbFormat.c[i].h ? mask : 0; 183 } 184 185 mAllMasked = (mMasking == fbComponents); 186 if (mAllMasked) { 187 mDithering = 0; 188 } 189 190 fragment_parts_t parts; 191 192 // ------------------------------------------------------------------------ 193 prolog(); 194 // ------------------------------------------------------------------------ 195 196 build_scanline_prolog(parts, needs); 197 198 if (registerFile().status()) 199 return registerFile().status(); 200 201 // ------------------------------------------------------------------------ 202 label("fragment_loop"); 203 // ------------------------------------------------------------------------ 204 { 205 Scratch regs(registerFile()); 206 207 if (mDithering) { 208 // update the dither index. 209 MOV(AL, 0, parts.count.reg, 210 reg_imm(parts.count.reg, ROR, GGL_DITHER_ORDER_SHIFT)); 211 ADD(AL, 0, parts.count.reg, parts.count.reg, 212 imm( 1 << (32 - GGL_DITHER_ORDER_SHIFT))); 213 MOV(AL, 0, parts.count.reg, 214 reg_imm(parts.count.reg, ROR, 32 - GGL_DITHER_ORDER_SHIFT)); 215 } 216 217 // XXX: could we do an early alpha-test here in some cases? 218 // It would probaly be used only with smooth-alpha and no texture 219 // (or no alpha component in the texture). 220 221 // Early z-test 222 if (mAlphaTest==GGL_ALWAYS) { 223 build_depth_test(parts, Z_TEST|Z_WRITE); 224 } else { 225 // we cannot do the z-write here, because 226 // it might be killed by the alpha-test later 227 build_depth_test(parts, Z_TEST); 228 } 229 230 { // texture coordinates 231 Scratch scratches(registerFile()); 232 233 // texel generation 234 build_textures(parts, regs); 235 if (registerFile().status()) 236 return registerFile().status(); 237 } 238 239 if ((blending & (FACTOR_DST|BLEND_DST)) || 240 (mMasking && !mAllMasked) || 241 (mLogicOp & LOGIC_OP_DST)) 242 { 243 // blending / logic_op / masking need the framebuffer 244 mDstPixel.setTo(regs.obtain(), &mCbFormat); 245 246 // load the framebuffer pixel 247 comment("fetch color-buffer"); 248 load(parts.cbPtr, mDstPixel); 249 } 250 251 if (registerFile().status()) 252 return registerFile().status(); 253 254 pixel_t pixel; 255 int directTex = mTextureMachine.directTexture; 256 if (directTex | parts.packed) { 257 // note: we can't have both here 258 // iterated color or direct texture 259 pixel = directTex ? parts.texel[directTex-1] : parts.iterated; 260 pixel.flags &= ~CORRUPTIBLE; 261 } else { 262 if (mDithering) { 263 const int ctxtReg = mBuilderContext.Rctx; 264 const int mask = GGL_DITHER_SIZE-1; 265 parts.dither = reg_t(regs.obtain()); 266 AND(AL, 0, parts.dither.reg, parts.count.reg, imm(mask)); 267 ADDR_ADD(AL, 0, parts.dither.reg, ctxtReg, parts.dither.reg); 268 LDRB(AL, parts.dither.reg, parts.dither.reg, 269 immed12_pre(GGL_OFFSETOF(ditherMatrix))); 270 } 271 272 // allocate a register for the resulting pixel 273 pixel.setTo(regs.obtain(), &mCbFormat, FIRST); 274 275 build_component(pixel, parts, GGLFormat::ALPHA, regs); 276 277 if (mAlphaTest!=GGL_ALWAYS) { 278 // only handle the z-write part here. We know z-test 279 // was successful, as well as alpha-test. 280 build_depth_test(parts, Z_WRITE); 281 } 282 283 build_component(pixel, parts, GGLFormat::RED, regs); 284 build_component(pixel, parts, GGLFormat::GREEN, regs); 285 build_component(pixel, parts, GGLFormat::BLUE, regs); 286 287 pixel.flags |= CORRUPTIBLE; 288 } 289 290 if (registerFile().status()) 291 return registerFile().status(); 292 293 if (pixel.reg == -1) { 294 // be defensive here. if we're here it's probably 295 // that this whole fragment is a no-op. 296 pixel = mDstPixel; 297 } 298 299 if (!mAllMasked) { 300 // logic operation 301 build_logic_op(pixel, regs); 302 303 // masking 304 build_masking(pixel, regs); 305 306 comment("store"); 307 store(parts.cbPtr, pixel, WRITE_BACK); 308 } 309 } 310 311 if (registerFile().status()) 312 return registerFile().status(); 313 314 // update the iterated color... 315 if (parts.reload != 3) { 316 build_smooth_shade(parts); 317 } 318 319 // update iterated z 320 build_iterate_z(parts); 321 322 // update iterated fog 323 build_iterate_f(parts); 324 325 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16)); 326 B(PL, "fragment_loop"); 327 label("epilog"); 328 epilog(registerFile().touched()); 329 330 if ((mAlphaTest!=GGL_ALWAYS) || (mDepthTest!=GGL_ALWAYS)) { 331 if (mDepthTest!=GGL_ALWAYS) { 332 label("discard_before_textures"); 333 build_iterate_texture_coordinates(parts); 334 } 335 label("discard_after_textures"); 336 build_smooth_shade(parts); 337 build_iterate_z(parts); 338 build_iterate_f(parts); 339 if (!mAllMasked) { 340 ADDR_ADD(AL, 0, parts.cbPtr.reg, parts.cbPtr.reg, imm(parts.cbPtr.size>>3)); 341 } 342 SUB(AL, S, parts.count.reg, parts.count.reg, imm(1<<16)); 343 B(PL, "fragment_loop"); 344 epilog(registerFile().touched()); 345 } 346 347 return registerFile().status(); 348 } 349 350 // --------------------------------------------------------------------------- 351 352 void GGLAssembler::build_scanline_prolog( 353 fragment_parts_t& parts, const needs_t& needs) 354 { 355 Scratch scratches(registerFile()); 356 int Rctx = mBuilderContext.Rctx; 357 358 // compute count 359 comment("compute ct (# of pixels to process)"); 360 parts.count.setTo(obtainReg()); 361 int Rx = scratches.obtain(); 362 int Ry = scratches.obtain(); 363 CONTEXT_LOAD(Rx, iterators.xl); 364 CONTEXT_LOAD(parts.count.reg, iterators.xr); 365 CONTEXT_LOAD(Ry, iterators.y); 366 367 // parts.count = iterators.xr - Rx 368 SUB(AL, 0, parts.count.reg, parts.count.reg, Rx); 369 SUB(AL, 0, parts.count.reg, parts.count.reg, imm(1)); 370 371 if (mDithering) { 372 // parts.count.reg = 0xNNNNXXDD 373 // NNNN = count-1 374 // DD = dither offset 375 // XX = 0xxxxxxx (x = garbage) 376 Scratch scratches(registerFile()); 377 int tx = scratches.obtain(); 378 int ty = scratches.obtain(); 379 AND(AL, 0, tx, Rx, imm(GGL_DITHER_MASK)); 380 AND(AL, 0, ty, Ry, imm(GGL_DITHER_MASK)); 381 ADD(AL, 0, tx, tx, reg_imm(ty, LSL, GGL_DITHER_ORDER_SHIFT)); 382 ORR(AL, 0, parts.count.reg, tx, reg_imm(parts.count.reg, LSL, 16)); 383 } else { 384 // parts.count.reg = 0xNNNN0000 385 // NNNN = count-1 386 MOV(AL, 0, parts.count.reg, reg_imm(parts.count.reg, LSL, 16)); 387 } 388 389 if (!mAllMasked) { 390 // compute dst ptr 391 comment("compute color-buffer pointer"); 392 const int cb_bits = mCbFormat.size*8; 393 int Rs = scratches.obtain(); 394 parts.cbPtr.setTo(obtainReg(), cb_bits); 395 CONTEXT_LOAD(Rs, state.buffers.color.stride); 396 CONTEXT_ADDR_LOAD(parts.cbPtr.reg, state.buffers.color.data); 397 SMLABB(AL, Rs, Ry, Rs, Rx); // Rs = Rx + Ry*Rs 398 base_offset(parts.cbPtr, parts.cbPtr, Rs); 399 scratches.recycle(Rs); 400 } 401 402 // init fog 403 const int need_fog = GGL_READ_NEEDS(P_FOG, needs.p); 404 if (need_fog) { 405 comment("compute initial fog coordinate"); 406 Scratch scratches(registerFile()); 407 int dfdx = scratches.obtain(); 408 int ydfdy = scratches.obtain(); 409 int f = ydfdy; 410 CONTEXT_LOAD(dfdx, generated_vars.dfdx); 411 CONTEXT_LOAD(ydfdy, iterators.ydfdy); 412 MLA(AL, 0, f, Rx, dfdx, ydfdy); 413 CONTEXT_STORE(f, generated_vars.f); 414 } 415 416 // init Z coordinate 417 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) { 418 parts.z = reg_t(obtainReg()); 419 comment("compute initial Z coordinate"); 420 Scratch scratches(registerFile()); 421 int dzdx = scratches.obtain(); 422 int ydzdy = parts.z.reg; 423 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // 1.31 fixed-point 424 CONTEXT_LOAD(ydzdy, iterators.ydzdy); // 1.31 fixed-point 425 MLA(AL, 0, parts.z.reg, Rx, dzdx, ydzdy); 426 427 // we're going to index zbase of parts.count 428 // zbase = base + (xl-count + stride*y)*2 429 int Rs = dzdx; 430 int zbase = scratches.obtain(); 431 CONTEXT_LOAD(Rs, state.buffers.depth.stride); 432 CONTEXT_ADDR_LOAD(zbase, state.buffers.depth.data); 433 SMLABB(AL, Rs, Ry, Rs, Rx); 434 ADD(AL, 0, Rs, Rs, reg_imm(parts.count.reg, LSR, 16)); 435 ADDR_ADD(AL, 0, zbase, zbase, reg_imm(Rs, LSL, 1)); 436 CONTEXT_ADDR_STORE(zbase, generated_vars.zbase); 437 } 438 439 // init texture coordinates 440 init_textures(parts.coords, reg_t(Rx), reg_t(Ry)); 441 scratches.recycle(Ry); 442 443 // iterated color 444 init_iterated_color(parts, reg_t(Rx)); 445 446 // init coverage factor application (anti-aliasing) 447 if (mAA) { 448 parts.covPtr.setTo(obtainReg(), 16); 449 CONTEXT_ADDR_LOAD(parts.covPtr.reg, state.buffers.coverage); 450 ADDR_ADD(AL, 0, parts.covPtr.reg, parts.covPtr.reg, reg_imm(Rx, LSL, 1)); 451 } 452 } 453 454 // --------------------------------------------------------------------------- 455 456 void GGLAssembler::build_component( pixel_t& pixel, 457 const fragment_parts_t& parts, 458 int component, 459 Scratch& regs) 460 { 461 static char const * comments[] = {"alpha", "red", "green", "blue"}; 462 comment(comments[component]); 463 464 // local register file 465 Scratch scratches(registerFile()); 466 const int dst_component_size = pixel.component_size(component); 467 468 component_t temp(-1); 469 build_incoming_component( temp, dst_component_size, 470 parts, component, scratches, regs); 471 472 if (mInfo[component].inDest) { 473 474 // blending... 475 build_blending( temp, mDstPixel, component, scratches ); 476 477 // downshift component and rebuild pixel... 478 downshift(pixel, component, temp, parts.dither); 479 } 480 } 481 482 void GGLAssembler::build_incoming_component( 483 component_t& temp, 484 int dst_size, 485 const fragment_parts_t& parts, 486 int component, 487 Scratch& scratches, 488 Scratch& global_regs) 489 { 490 const uint32_t component_mask = 1<<component; 491 492 // Figure out what we need for the blending stage... 493 int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; 494 int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; 495 if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) { 496 fs = GGL_ONE; 497 } 498 499 // Figure out what we need to extract and for what reason 500 const int blending = blending_codes(fs, fd); 501 502 // Are we actually going to blend? 503 const int need_blending = (fs != int(GGL_ONE)) || (fd > int(GGL_ZERO)); 504 505 // expand the source if the destination has more bits 506 int need_expander = false; 507 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT-1 ; i++) { 508 texture_unit_t& tmu = mTextureMachine.tmu[i]; 509 if ((tmu.format_idx) && 510 (parts.texel[i].component_size(component) < dst_size)) { 511 need_expander = true; 512 } 513 } 514 515 // do we need to extract this component? 516 const bool multiTexture = mTextureMachine.activeUnits > 1; 517 const int blend_needs_alpha_source = (component==GGLFormat::ALPHA) && 518 (isAlphaSourceNeeded()); 519 int need_extract = mInfo[component].needed; 520 if (mInfo[component].inDest) 521 { 522 need_extract |= ((need_blending ? 523 (blending & (BLEND_SRC|FACTOR_SRC)) : need_expander)); 524 need_extract |= (mTextureMachine.mask != mTextureMachine.replaced); 525 need_extract |= mInfo[component].smooth; 526 need_extract |= mInfo[component].fog; 527 need_extract |= mDithering; 528 need_extract |= multiTexture; 529 } 530 531 if (need_extract) { 532 Scratch& regs = blend_needs_alpha_source ? global_regs : scratches; 533 component_t fragment; 534 535 // iterated color 536 build_iterated_color(fragment, parts, component, regs); 537 538 // texture environement (decal, modulate, replace) 539 build_texture_environment(fragment, parts, component, regs); 540 541 // expand the source if the destination has more bits 542 if (need_expander && (fragment.size() < dst_size)) { 543 // we're here only if we fetched a texel 544 // (so we know for sure fragment is CORRUPTIBLE) 545 expand(fragment, fragment, dst_size); 546 } 547 548 // We have a few specific things to do for the alpha-channel 549 if ((component==GGLFormat::ALPHA) && 550 (mInfo[component].needed || fragment.size()<dst_size)) 551 { 552 // convert to integer_t first and make sure 553 // we don't corrupt a needed register 554 if (fragment.l) { 555 component_t incoming(fragment); 556 modify(fragment, regs); 557 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSR, incoming.l)); 558 fragment.h -= fragment.l; 559 fragment.l = 0; 560 } 561 562 // coverage factor application 563 build_coverage_application(fragment, parts, regs); 564 565 // alpha-test 566 build_alpha_test(fragment, parts); 567 568 if (blend_needs_alpha_source) { 569 // We keep only 8 bits for the blending stage 570 const int shift = fragment.h <= 8 ? 0 : fragment.h-8; 571 if (fragment.flags & CORRUPTIBLE) { 572 fragment.flags &= ~CORRUPTIBLE; 573 mAlphaSource.setTo(fragment.reg, 574 fragment.size(), fragment.flags); 575 if (shift) { 576 MOV(AL, 0, mAlphaSource.reg, 577 reg_imm(mAlphaSource.reg, LSR, shift)); 578 } 579 } else { 580 // XXX: it would better to do this in build_blend_factor() 581 // so we can avoid the extra MOV below. 582 mAlphaSource.setTo(regs.obtain(), 583 fragment.size(), CORRUPTIBLE); 584 if (shift) { 585 MOV(AL, 0, mAlphaSource.reg, 586 reg_imm(fragment.reg, LSR, shift)); 587 } else { 588 MOV(AL, 0, mAlphaSource.reg, fragment.reg); 589 } 590 } 591 mAlphaSource.s -= shift; 592 } 593 } 594 595 // fog... 596 build_fog( fragment, component, regs ); 597 598 temp = fragment; 599 } else { 600 if (mInfo[component].inDest) { 601 // extraction not needed and replace 602 // we just select the right component 603 if ((mTextureMachine.replaced & component_mask) == 0) { 604 // component wasn't replaced, so use it! 605 temp = component_t(parts.iterated, component); 606 } 607 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; i++) { 608 const texture_unit_t& tmu = mTextureMachine.tmu[i]; 609 if ((tmu.mask & component_mask) && 610 ((tmu.replaced & component_mask) == 0)) { 611 temp = component_t(parts.texel[i], component); 612 } 613 } 614 } 615 } 616 } 617 618 bool GGLAssembler::isAlphaSourceNeeded() const 619 { 620 // XXX: also needed for alpha-test 621 const int bs = mBlendSrc; 622 const int bd = mBlendDst; 623 return bs==GGL_SRC_ALPHA_SATURATE || 624 bs==GGL_SRC_ALPHA || bs==GGL_ONE_MINUS_SRC_ALPHA || 625 bd==GGL_SRC_ALPHA || bd==GGL_ONE_MINUS_SRC_ALPHA ; 626 } 627 628 // --------------------------------------------------------------------------- 629 630 void GGLAssembler::build_smooth_shade(const fragment_parts_t& parts) 631 { 632 if (mSmooth && !parts.iterated_packed) { 633 // update the iterated color in a pipelined way... 634 comment("update iterated color"); 635 Scratch scratches(registerFile()); 636 637 const int reload = parts.reload; 638 for (int i=0 ; i<4 ; i++) { 639 if (!mInfo[i].iterated) 640 continue; 641 642 int c = parts.argb[i].reg; 643 int dx = parts.argb_dx[i].reg; 644 645 if (reload & 1) { 646 c = scratches.obtain(); 647 CONTEXT_LOAD(c, generated_vars.argb[i].c); 648 } 649 if (reload & 2) { 650 dx = scratches.obtain(); 651 CONTEXT_LOAD(dx, generated_vars.argb[i].dx); 652 } 653 654 if (mSmooth) { 655 ADD(AL, 0, c, c, dx); 656 } 657 658 if (reload & 1) { 659 CONTEXT_STORE(c, generated_vars.argb[i].c); 660 scratches.recycle(c); 661 } 662 if (reload & 2) { 663 scratches.recycle(dx); 664 } 665 } 666 } 667 } 668 669 // --------------------------------------------------------------------------- 670 671 void GGLAssembler::build_coverage_application(component_t& fragment, 672 const fragment_parts_t& parts, Scratch& regs) 673 { 674 // here fragment.l is guarenteed to be 0 675 if (mAA) { 676 // coverages are 1.15 fixed-point numbers 677 comment("coverage application"); 678 679 component_t incoming(fragment); 680 modify(fragment, regs); 681 682 Scratch scratches(registerFile()); 683 int cf = scratches.obtain(); 684 LDRH(AL, cf, parts.covPtr.reg, immed8_post(2)); 685 if (fragment.h > 31) { 686 fragment.h--; 687 SMULWB(AL, fragment.reg, incoming.reg, cf); 688 } else { 689 MOV(AL, 0, fragment.reg, reg_imm(incoming.reg, LSL, 1)); 690 SMULWB(AL, fragment.reg, fragment.reg, cf); 691 } 692 } 693 } 694 695 // --------------------------------------------------------------------------- 696 697 void GGLAssembler::build_alpha_test(component_t& fragment, 698 const fragment_parts_t& /*parts*/) 699 { 700 if (mAlphaTest != GGL_ALWAYS) { 701 comment("Alpha Test"); 702 Scratch scratches(registerFile()); 703 int ref = scratches.obtain(); 704 const int shift = GGL_COLOR_BITS-fragment.size(); 705 CONTEXT_LOAD(ref, state.alpha_test.ref); 706 if (shift) CMP(AL, fragment.reg, reg_imm(ref, LSR, shift)); 707 else CMP(AL, fragment.reg, ref); 708 int cc = NV; 709 switch (mAlphaTest) { 710 case GGL_NEVER: cc = NV; break; 711 case GGL_LESS: cc = LT; break; 712 case GGL_EQUAL: cc = EQ; break; 713 case GGL_LEQUAL: cc = LS; break; 714 case GGL_GREATER: cc = HI; break; 715 case GGL_NOTEQUAL: cc = NE; break; 716 case GGL_GEQUAL: cc = HS; break; 717 } 718 B(cc^1, "discard_after_textures"); 719 } 720 } 721 722 // --------------------------------------------------------------------------- 723 724 void GGLAssembler::build_depth_test( 725 const fragment_parts_t& parts, uint32_t mask) 726 { 727 mask &= Z_TEST|Z_WRITE; 728 const needs_t& needs = mBuilderContext.needs; 729 const int zmask = GGL_READ_NEEDS(P_MASK_Z, needs.p); 730 Scratch scratches(registerFile()); 731 732 if (mDepthTest != GGL_ALWAYS || zmask) { 733 int cc=AL, ic=AL; 734 switch (mDepthTest) { 735 case GGL_LESS: ic = HI; break; 736 case GGL_EQUAL: ic = EQ; break; 737 case GGL_LEQUAL: ic = HS; break; 738 case GGL_GREATER: ic = LT; break; 739 case GGL_NOTEQUAL: ic = NE; break; 740 case GGL_GEQUAL: ic = LS; break; 741 case GGL_NEVER: 742 // this never happens, because it's taken care of when 743 // computing the needs. but we keep it for completness. 744 comment("Depth Test (NEVER)"); 745 B(AL, "discard_before_textures"); 746 return; 747 case GGL_ALWAYS: 748 // we're here because zmask is enabled 749 mask &= ~Z_TEST; // test always passes. 750 break; 751 } 752 753 // inverse the condition 754 cc = ic^1; 755 756 if ((mask & Z_WRITE) && !zmask) { 757 mask &= ~Z_WRITE; 758 } 759 760 if (!mask) 761 return; 762 763 comment("Depth Test"); 764 765 int zbase = scratches.obtain(); 766 int depth = scratches.obtain(); 767 int z = parts.z.reg; 768 769 CONTEXT_ADDR_LOAD(zbase, generated_vars.zbase); // stall 770 ADDR_SUB(AL, 0, zbase, zbase, reg_imm(parts.count.reg, LSR, 15)); 771 // above does zbase = zbase + ((count >> 16) << 1) 772 773 if (mask & Z_TEST) { 774 LDRH(AL, depth, zbase); // stall 775 CMP(AL, depth, reg_imm(z, LSR, 16)); 776 B(cc, "discard_before_textures"); 777 } 778 if (mask & Z_WRITE) { 779 if (mask == Z_WRITE) { 780 // only z-write asked, cc is meaningless 781 ic = AL; 782 } 783 MOV(AL, 0, depth, reg_imm(z, LSR, 16)); 784 STRH(ic, depth, zbase); 785 } 786 } 787 } 788 789 void GGLAssembler::build_iterate_z(const fragment_parts_t& parts) 790 { 791 const needs_t& needs = mBuilderContext.needs; 792 if ((mDepthTest != GGL_ALWAYS) || GGL_READ_NEEDS(P_MASK_Z, needs.p)) { 793 Scratch scratches(registerFile()); 794 int dzdx = scratches.obtain(); 795 CONTEXT_LOAD(dzdx, generated_vars.dzdx); // stall 796 ADD(AL, 0, parts.z.reg, parts.z.reg, dzdx); 797 } 798 } 799 800 void GGLAssembler::build_iterate_f(const fragment_parts_t& /*parts*/) 801 { 802 const needs_t& needs = mBuilderContext.needs; 803 if (GGL_READ_NEEDS(P_FOG, needs.p)) { 804 Scratch scratches(registerFile()); 805 int dfdx = scratches.obtain(); 806 int f = scratches.obtain(); 807 CONTEXT_LOAD(f, generated_vars.f); 808 CONTEXT_LOAD(dfdx, generated_vars.dfdx); // stall 809 ADD(AL, 0, f, f, dfdx); 810 CONTEXT_STORE(f, generated_vars.f); 811 } 812 } 813 814 // --------------------------------------------------------------------------- 815 816 void GGLAssembler::build_logic_op(pixel_t& pixel, Scratch& regs) 817 { 818 const needs_t& needs = mBuilderContext.needs; 819 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; 820 if (opcode == GGL_COPY) 821 return; 822 823 comment("logic operation"); 824 825 pixel_t s(pixel); 826 if (!(pixel.flags & CORRUPTIBLE)) { 827 pixel.reg = regs.obtain(); 828 pixel.flags |= CORRUPTIBLE; 829 } 830 831 pixel_t d(mDstPixel); 832 switch(opcode) { 833 case GGL_CLEAR: MOV(AL, 0, pixel.reg, imm(0)); break; 834 case GGL_AND: AND(AL, 0, pixel.reg, s.reg, d.reg); break; 835 case GGL_AND_REVERSE: BIC(AL, 0, pixel.reg, s.reg, d.reg); break; 836 case GGL_COPY: break; 837 case GGL_AND_INVERTED: BIC(AL, 0, pixel.reg, d.reg, s.reg); break; 838 case GGL_NOOP: MOV(AL, 0, pixel.reg, d.reg); break; 839 case GGL_XOR: EOR(AL, 0, pixel.reg, s.reg, d.reg); break; 840 case GGL_OR: ORR(AL, 0, pixel.reg, s.reg, d.reg); break; 841 case GGL_NOR: ORR(AL, 0, pixel.reg, s.reg, d.reg); 842 MVN(AL, 0, pixel.reg, pixel.reg); break; 843 case GGL_EQUIV: EOR(AL, 0, pixel.reg, s.reg, d.reg); 844 MVN(AL, 0, pixel.reg, pixel.reg); break; 845 case GGL_INVERT: MVN(AL, 0, pixel.reg, d.reg); break; 846 case GGL_OR_REVERSE: // s | ~d == ~(~s & d) 847 BIC(AL, 0, pixel.reg, d.reg, s.reg); 848 MVN(AL, 0, pixel.reg, pixel.reg); break; 849 case GGL_COPY_INVERTED: MVN(AL, 0, pixel.reg, s.reg); break; 850 case GGL_OR_INVERTED: // ~s | d == ~(s & ~d) 851 BIC(AL, 0, pixel.reg, s.reg, d.reg); 852 MVN(AL, 0, pixel.reg, pixel.reg); break; 853 case GGL_NAND: AND(AL, 0, pixel.reg, s.reg, d.reg); 854 MVN(AL, 0, pixel.reg, pixel.reg); break; 855 case GGL_SET: MVN(AL, 0, pixel.reg, imm(0)); break; 856 }; 857 } 858 859 // --------------------------------------------------------------------------- 860 861 static uint32_t find_bottom(uint32_t val) 862 { 863 uint32_t i = 0; 864 while (!(val & (3<<i))) 865 i+= 2; 866 return i; 867 } 868 869 static void normalize(uint32_t& val, uint32_t& rot) 870 { 871 rot = 0; 872 while (!(val&3) || (val & 0xFC000000)) { 873 uint32_t newval; 874 newval = val >> 2; 875 newval |= (val&3) << 30; 876 val = newval; 877 rot += 2; 878 if (rot == 32) { 879 rot = 0; 880 break; 881 } 882 } 883 } 884 885 void GGLAssembler::build_and_immediate(int d, int s, uint32_t mask, int bits) 886 { 887 uint32_t rot; 888 uint32_t size = ((bits>=32) ? 0 : (1LU << bits)) - 1; 889 mask &= size; 890 891 if (mask == size) { 892 if (d != s) 893 MOV( AL, 0, d, s); 894 return; 895 } 896 897 if ((getCodegenArch() == CODEGEN_ARCH_MIPS) || 898 (getCodegenArch() == CODEGEN_ARCH_MIPS64)) { 899 // MIPS can do 16-bit imm in 1 instr, 32-bit in 3 instr 900 // the below ' while (mask)' code is buggy on mips 901 // since mips returns true on isValidImmediate() 902 // then we get multiple AND instr (positive logic) 903 AND( AL, 0, d, s, imm(mask) ); 904 return; 905 } 906 else if (getCodegenArch() == CODEGEN_ARCH_ARM64) { 907 AND( AL, 0, d, s, imm(mask) ); 908 return; 909 } 910 911 int negative_logic = !isValidImmediate(mask); 912 if (negative_logic) { 913 mask = ~mask & size; 914 } 915 normalize(mask, rot); 916 917 if (mask) { 918 while (mask) { 919 uint32_t bitpos = find_bottom(mask); 920 int shift = rot + bitpos; 921 uint32_t m = mask & (0xff << bitpos); 922 mask &= ~m; 923 m >>= bitpos; 924 int32_t newMask = (m<<shift) | (m>>(32-shift)); 925 if (!negative_logic) { 926 AND( AL, 0, d, s, imm(newMask) ); 927 } else { 928 BIC( AL, 0, d, s, imm(newMask) ); 929 } 930 s = d; 931 } 932 } else { 933 MOV( AL, 0, d, imm(0)); 934 } 935 } 936 937 void GGLAssembler::build_masking(pixel_t& pixel, Scratch& regs) 938 { 939 if (!mMasking || mAllMasked) { 940 return; 941 } 942 943 comment("color mask"); 944 945 pixel_t fb(mDstPixel); 946 pixel_t s(pixel); 947 if (!(pixel.flags & CORRUPTIBLE)) { 948 pixel.reg = regs.obtain(); 949 pixel.flags |= CORRUPTIBLE; 950 } 951 952 int mask = 0; 953 for (int i=0 ; i<4 ; i++) { 954 const int component_mask = 1<<i; 955 const int h = fb.format.c[i].h; 956 const int l = fb.format.c[i].l; 957 if (h && (!(mMasking & component_mask))) { 958 mask |= ((1<<(h-l))-1) << l; 959 } 960 } 961 962 // There is no need to clear the masked components of the source 963 // (unless we applied a logic op), because they're already zeroed 964 // by construction (masked components are not computed) 965 966 if (mLogicOp) { 967 const needs_t& needs = mBuilderContext.needs; 968 const int opcode = GGL_READ_NEEDS(LOGIC_OP, needs.n) | GGL_CLEAR; 969 if (opcode != GGL_CLEAR) { 970 // clear masked component of source 971 build_and_immediate(pixel.reg, s.reg, mask, fb.size()); 972 s = pixel; 973 } 974 } 975 976 // clear non masked components of destination 977 build_and_immediate(fb.reg, fb.reg, ~mask, fb.size()); 978 979 // or back the channels that were masked 980 if (s.reg == fb.reg) { 981 // this is in fact a MOV 982 if (s.reg == pixel.reg) { 983 // ugh. this in in fact a nop 984 } else { 985 MOV(AL, 0, pixel.reg, fb.reg); 986 } 987 } else { 988 ORR(AL, 0, pixel.reg, s.reg, fb.reg); 989 } 990 } 991 992 // --------------------------------------------------------------------------- 993 994 void GGLAssembler::base_offset( 995 const pointer_t& d, const pointer_t& b, const reg_t& o) 996 { 997 switch (b.size) { 998 case 32: 999 ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 2)); 1000 break; 1001 case 24: 1002 if (d.reg == b.reg) { 1003 ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); 1004 ADDR_ADD(AL, 0, d.reg, d.reg, o.reg); 1005 } else { 1006 ADDR_ADD(AL, 0, d.reg, o.reg, reg_imm(o.reg, LSL, 1)); 1007 ADDR_ADD(AL, 0, d.reg, d.reg, b.reg); 1008 } 1009 break; 1010 case 16: 1011 ADDR_ADD(AL, 0, d.reg, b.reg, reg_imm(o.reg, LSL, 1)); 1012 break; 1013 case 8: 1014 ADDR_ADD(AL, 0, d.reg, b.reg, o.reg); 1015 break; 1016 } 1017 } 1018 1019 // ---------------------------------------------------------------------------- 1020 // cheezy register allocator... 1021 // ---------------------------------------------------------------------------- 1022 1023 // Modified to support MIPS processors, in a very simple way. We retain the 1024 // (Arm) limit of 16 total registers, but shift the mapping of those registers 1025 // from 0-15, to 2-17. Register 0 on Mips cannot be used as GP registers, and 1026 // register 1 has a traditional use as a temp). 1027 1028 RegisterAllocator::RegisterAllocator(int arch) : mRegs(arch) 1029 { 1030 } 1031 1032 void RegisterAllocator::reset() 1033 { 1034 mRegs.reset(); 1035 } 1036 1037 int RegisterAllocator::reserveReg(int reg) 1038 { 1039 return mRegs.reserve(reg); 1040 } 1041 1042 int RegisterAllocator::obtainReg() 1043 { 1044 return mRegs.obtain(); 1045 } 1046 1047 void RegisterAllocator::recycleReg(int reg) 1048 { 1049 mRegs.recycle(reg); 1050 } 1051 1052 RegisterAllocator::RegisterFile& RegisterAllocator::registerFile() 1053 { 1054 return mRegs; 1055 } 1056 1057 // ---------------------------------------------------------------------------- 1058 1059 RegisterAllocator::RegisterFile::RegisterFile(int codegen_arch) 1060 : mRegs(0), mTouched(0), mStatus(0), mArch(codegen_arch), mRegisterOffset(0) 1061 { 1062 if ((mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) || 1063 (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS64)) { 1064 mRegisterOffset = 2; // ARM has regs 0..15, MIPS offset to 2..17 1065 } 1066 reserve(ARMAssemblerInterface::SP); 1067 reserve(ARMAssemblerInterface::PC); 1068 } 1069 1070 RegisterAllocator::RegisterFile::RegisterFile(const RegisterFile& rhs, int codegen_arch) 1071 : mRegs(rhs.mRegs), mTouched(rhs.mTouched), mArch(codegen_arch), mRegisterOffset(0) 1072 { 1073 if ((mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS) || 1074 (mArch == ARMAssemblerInterface::CODEGEN_ARCH_MIPS64)) { 1075 mRegisterOffset = 2; // ARM has regs 0..15, MIPS offset to 2..17 1076 } 1077 } 1078 1079 RegisterAllocator::RegisterFile::~RegisterFile() 1080 { 1081 } 1082 1083 bool RegisterAllocator::RegisterFile::operator == (const RegisterFile& rhs) const 1084 { 1085 return (mRegs == rhs.mRegs); 1086 } 1087 1088 void RegisterAllocator::RegisterFile::reset() 1089 { 1090 mRegs = mTouched = mStatus = 0; 1091 reserve(ARMAssemblerInterface::SP); 1092 reserve(ARMAssemblerInterface::PC); 1093 } 1094 1095 // RegisterFile::reserve() take a register parameter in the 1096 // range 0-15 (Arm compatible), but on a Mips processor, will 1097 // return the actual allocated register in the range 2-17. 1098 int RegisterAllocator::RegisterFile::reserve(int reg) 1099 { 1100 reg += mRegisterOffset; 1101 LOG_ALWAYS_FATAL_IF(isUsed(reg), 1102 "reserving register %d, but already in use", 1103 reg); 1104 mRegs |= (1<<reg); 1105 mTouched |= mRegs; 1106 return reg; 1107 } 1108 1109 // This interface uses regMask in range 2-17 on MIPS, no translation. 1110 void RegisterAllocator::RegisterFile::reserveSeveral(uint32_t regMask) 1111 { 1112 mRegs |= regMask; 1113 mTouched |= regMask; 1114 } 1115 1116 int RegisterAllocator::RegisterFile::isUsed(int reg) const 1117 { 1118 LOG_ALWAYS_FATAL_IF(reg>=16+(int)mRegisterOffset, "invalid register %d", reg); 1119 return mRegs & (1<<reg); 1120 } 1121 1122 int RegisterAllocator::RegisterFile::obtain() 1123 { 1124 const char priorityList[14] = { 0, 1, 2, 3, 1125 12, 14, 4, 5, 1126 6, 7, 8, 9, 1127 10, 11 }; 1128 const int nbreg = sizeof(priorityList); 1129 int i, r, reg; 1130 for (i=0 ; i<nbreg ; i++) { 1131 r = priorityList[i]; 1132 if (!isUsed(r + mRegisterOffset)) { 1133 break; 1134 } 1135 } 1136 // this is not an error anymore because, we'll try again with 1137 // a lower optimization level. 1138 //ALOGE_IF(i >= nbreg, "pixelflinger ran out of registers\n"); 1139 if (i >= nbreg) { 1140 mStatus |= OUT_OF_REGISTERS; 1141 // we return SP so we can more easily debug things 1142 // the code will never be run anyway. 1143 return ARMAssemblerInterface::SP; 1144 } 1145 reg = reserve(r); // Param in Arm range 0-15, returns range 2-17 on Mips. 1146 return reg; 1147 } 1148 1149 bool RegisterAllocator::RegisterFile::hasFreeRegs() const 1150 { 1151 uint32_t regs = mRegs >> mRegisterOffset; // MIPS fix. 1152 return ((regs & 0xFFFF) == 0xFFFF) ? false : true; 1153 } 1154 1155 int RegisterAllocator::RegisterFile::countFreeRegs() const 1156 { 1157 uint32_t regs = mRegs >> mRegisterOffset; // MIPS fix. 1158 int f = ~regs & 0xFFFF; 1159 // now count number of 1 1160 f = (f & 0x5555) + ((f>>1) & 0x5555); 1161 f = (f & 0x3333) + ((f>>2) & 0x3333); 1162 f = (f & 0x0F0F) + ((f>>4) & 0x0F0F); 1163 f = (f & 0x00FF) + ((f>>8) & 0x00FF); 1164 return f; 1165 } 1166 1167 void RegisterAllocator::RegisterFile::recycle(int reg) 1168 { 1169 // commented out, since common failure of running out of regs 1170 // triggers this assertion. Since the code is not execectued 1171 // in that case, it does not matter. No reason to FATAL err. 1172 // LOG_FATAL_IF(!isUsed(reg), 1173 // "recycling unallocated register %d", 1174 // reg); 1175 mRegs &= ~(1<<reg); 1176 } 1177 1178 void RegisterAllocator::RegisterFile::recycleSeveral(uint32_t regMask) 1179 { 1180 // commented out, since common failure of running out of regs 1181 // triggers this assertion. Since the code is not execectued 1182 // in that case, it does not matter. No reason to FATAL err. 1183 // LOG_FATAL_IF((mRegs & regMask)!=regMask, 1184 // "recycling unallocated registers " 1185 // "(recycle=%08x, allocated=%08x, unallocated=%08x)", 1186 // regMask, mRegs, mRegs®Mask); 1187 mRegs &= ~regMask; 1188 } 1189 1190 uint32_t RegisterAllocator::RegisterFile::touched() const 1191 { 1192 return mTouched; 1193 } 1194 1195 // ---------------------------------------------------------------------------- 1196 1197 }; // namespace android 1198 1199