1 /* libs/pixelflinger/codeflinger/blending.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 #include <assert.h> 19 #include <stdint.h> 20 #include <stdlib.h> 21 #include <stdio.h> 22 #include <sys/types.h> 23 24 #include <cutils/log.h> 25 26 #include "GGLAssembler.h" 27 28 29 namespace android { 30 31 void GGLAssembler::build_fog( 32 component_t& temp, // incomming fragment / output 33 int component, 34 Scratch& regs) 35 { 36 if (mInfo[component].fog) { 37 Scratch scratches(registerFile()); 38 comment("fog"); 39 40 integer_t fragment(temp.reg, temp.h, temp.flags); 41 if (!(temp.flags & CORRUPTIBLE)) { 42 temp.reg = regs.obtain(); 43 temp.flags |= CORRUPTIBLE; 44 } 45 46 integer_t fogColor(scratches.obtain(), 8, CORRUPTIBLE); 47 LDRB(AL, fogColor.reg, mBuilderContext.Rctx, 48 immed12_pre(GGL_OFFSETOF(state.fog.color[component]))); 49 50 integer_t factor(scratches.obtain(), 16, CORRUPTIBLE); 51 CONTEXT_LOAD(factor.reg, generated_vars.f); 52 53 // clamp fog factor (TODO: see if there is a way to guarantee 54 // we won't overflow, when setting the iterators) 55 BIC(AL, 0, factor.reg, factor.reg, reg_imm(factor.reg, ASR, 31)); 56 CMP(AL, factor.reg, imm( 0x10000 )); 57 MOV(HS, 0, factor.reg, imm( 0x10000 )); 58 59 build_blendFOneMinusF(temp, factor, fragment, fogColor); 60 } 61 } 62 63 void GGLAssembler::build_blending( 64 component_t& temp, // incomming fragment / output 65 const pixel_t& pixel, // framebuffer 66 int component, 67 Scratch& regs) 68 { 69 if (!mInfo[component].blend) 70 return; 71 72 int fs = component==GGLFormat::ALPHA ? mBlendSrcA : mBlendSrc; 73 int fd = component==GGLFormat::ALPHA ? mBlendDstA : mBlendDst; 74 if (fs==GGL_SRC_ALPHA_SATURATE && component==GGLFormat::ALPHA) 75 fs = GGL_ONE; 76 const int blending = blending_codes(fs, fd); 77 if (!temp.size()) { 78 // here, blending will produce something which doesn't depend on 79 // that component (eg: GL_ZERO:GL_*), so the register has not been 80 // allocated yet. Will never be used as a source. 81 temp = component_t(regs.obtain(), CORRUPTIBLE); 82 } 83 84 // we are doing real blending... 85 // fb: extracted dst 86 // fragment: extracted src 87 // temp: component_t(fragment) and result 88 89 // scoped register allocator 90 Scratch scratches(registerFile()); 91 comment("blending"); 92 93 // we can optimize these cases a bit... 94 // (1) saturation is not needed 95 // (2) we can use only one multiply instead of 2 96 // (3) we can reduce the register pressure 97 // R = S*f + D*(1-f) = (S-D)*f + D 98 // R = S*(1-f) + D*f = (D-S)*f + S 99 100 const bool same_factor_opt1 = 101 (fs==GGL_DST_COLOR && fd==GGL_ONE_MINUS_DST_COLOR) || 102 (fs==GGL_SRC_COLOR && fd==GGL_ONE_MINUS_SRC_COLOR) || 103 (fs==GGL_DST_ALPHA && fd==GGL_ONE_MINUS_DST_ALPHA) || 104 (fs==GGL_SRC_ALPHA && fd==GGL_ONE_MINUS_SRC_ALPHA); 105 106 const bool same_factor_opt2 = 107 (fs==GGL_ONE_MINUS_DST_COLOR && fd==GGL_DST_COLOR) || 108 (fs==GGL_ONE_MINUS_SRC_COLOR && fd==GGL_SRC_COLOR) || 109 (fs==GGL_ONE_MINUS_DST_ALPHA && fd==GGL_DST_ALPHA) || 110 (fs==GGL_ONE_MINUS_SRC_ALPHA && fd==GGL_SRC_ALPHA); 111 112 113 // XXX: we could also optimize these cases: 114 // R = S*f + D*f = (S+D)*f 115 // R = S*(1-f) + D*(1-f) = (S+D)*(1-f) 116 // R = S*D + D*S = 2*S*D 117 118 119 // see if we need to extract 'component' from the destination (fb) 120 integer_t fb; 121 if (blending & (BLEND_DST|FACTOR_DST)) { 122 fb.setTo(scratches.obtain(), 32); 123 extract(fb, pixel, component); 124 if (mDithering) { 125 // XXX: maybe what we should do instead, is simply 126 // expand fb -or- fragment to the larger of the two 127 if (fb.size() < temp.size()) { 128 // for now we expand 'fb' to min(fragment, 8) 129 int new_size = temp.size() < 8 ? temp.size() : 8; 130 expand(fb, fb, new_size); 131 } 132 } 133 } 134 135 136 // convert input fragment to integer_t 137 if (temp.l && (temp.flags & CORRUPTIBLE)) { 138 MOV(AL, 0, temp.reg, reg_imm(temp.reg, LSR, temp.l)); 139 temp.h -= temp.l; 140 temp.l = 0; 141 } 142 integer_t fragment(temp.reg, temp.size(), temp.flags); 143 144 // if not done yet, convert input fragment to integer_t 145 if (temp.l) { 146 // here we know temp is not CORRUPTIBLE 147 fragment.reg = scratches.obtain(); 148 MOV(AL, 0, fragment.reg, reg_imm(temp.reg, LSR, temp.l)); 149 fragment.flags |= CORRUPTIBLE; 150 } 151 152 if (!(temp.flags & CORRUPTIBLE)) { 153 // temp is not corruptible, but since it's the destination it 154 // will be modified, so we need to allocate a new register. 155 temp.reg = regs.obtain(); 156 temp.flags &= ~CORRUPTIBLE; 157 fragment.flags &= ~CORRUPTIBLE; 158 } 159 160 if ((blending & BLEND_SRC) && !same_factor_opt1) { 161 // source (fragment) is needed for the blending stage 162 // so it's not CORRUPTIBLE (unless we're doing same_factor_opt1) 163 fragment.flags &= ~CORRUPTIBLE; 164 } 165 166 167 if (same_factor_opt1) { 168 // R = S*f + D*(1-f) = (S-D)*f + D 169 integer_t factor; 170 build_blend_factor(factor, fs, 171 component, pixel, fragment, fb, scratches); 172 // fb is always corruptible from this point 173 fb.flags |= CORRUPTIBLE; 174 build_blendFOneMinusF(temp, factor, fragment, fb); 175 } else if (same_factor_opt2) { 176 // R = S*(1-f) + D*f = (D-S)*f + S 177 integer_t factor; 178 // fb is always corrruptible here 179 fb.flags |= CORRUPTIBLE; 180 build_blend_factor(factor, fd, 181 component, pixel, fragment, fb, scratches); 182 build_blendOneMinusFF(temp, factor, fragment, fb); 183 } else { 184 integer_t src_factor; 185 integer_t dst_factor; 186 187 // if destination (fb) is not needed for the blending stage, 188 // then it can be marked as CORRUPTIBLE 189 if (!(blending & BLEND_DST)) { 190 fb.flags |= CORRUPTIBLE; 191 } 192 193 // XXX: try to mark some registers as CORRUPTIBLE 194 // in most case we could make those corruptible 195 // when we're processing the last component 196 // but not always, for instance 197 // when fragment is constant and not reloaded 198 // when fb is needed for logic-ops or masking 199 // when a register is aliased (for instance with mAlphaSource) 200 201 // blend away... 202 if (fs==GGL_ZERO) { 203 if (fd==GGL_ZERO) { // R = 0 204 // already taken care of 205 } else if (fd==GGL_ONE) { // R = D 206 // already taken care of 207 } else { // R = D*fd 208 // compute fd 209 build_blend_factor(dst_factor, fd, 210 component, pixel, fragment, fb, scratches); 211 mul_factor(temp, fb, dst_factor); 212 } 213 } else if (fs==GGL_ONE) { 214 if (fd==GGL_ZERO) { // R = S 215 // NOP, taken care of 216 } else if (fd==GGL_ONE) { // R = S + D 217 component_add(temp, fb, fragment); // args order matters 218 component_sat(temp); 219 } else { // R = S + D*fd 220 // compute fd 221 build_blend_factor(dst_factor, fd, 222 component, pixel, fragment, fb, scratches); 223 mul_factor_add(temp, fb, dst_factor, component_t(fragment)); 224 component_sat(temp); 225 } 226 } else { 227 // compute fs 228 build_blend_factor(src_factor, fs, 229 component, pixel, fragment, fb, scratches); 230 if (fd==GGL_ZERO) { // R = S*fs 231 mul_factor(temp, fragment, src_factor); 232 } else if (fd==GGL_ONE) { // R = S*fs + D 233 mul_factor_add(temp, fragment, src_factor, component_t(fb)); 234 component_sat(temp); 235 } else { // R = S*fs + D*fd 236 mul_factor(temp, fragment, src_factor); 237 if (scratches.isUsed(src_factor.reg)) 238 scratches.recycle(src_factor.reg); 239 // compute fd 240 build_blend_factor(dst_factor, fd, 241 component, pixel, fragment, fb, scratches); 242 mul_factor_add(temp, fb, dst_factor, temp); 243 if (!same_factor_opt1 && !same_factor_opt2) { 244 component_sat(temp); 245 } 246 } 247 } 248 } 249 250 // now we can be corrupted (it's the dest) 251 temp.flags |= CORRUPTIBLE; 252 } 253 254 void GGLAssembler::build_blend_factor( 255 integer_t& factor, int f, int component, 256 const pixel_t& dst_pixel, 257 integer_t& fragment, 258 integer_t& fb, 259 Scratch& scratches) 260 { 261 integer_t src_alpha(fragment); 262 263 // src_factor/dst_factor won't be used after blending, 264 // so it's fine to mark them as CORRUPTIBLE (if not aliased) 265 factor.flags |= CORRUPTIBLE; 266 267 switch(f) { 268 case GGL_ONE_MINUS_SRC_ALPHA: 269 case GGL_SRC_ALPHA: 270 if (component==GGLFormat::ALPHA && !isAlphaSourceNeeded()) { 271 // we're processing alpha, so we already have 272 // src-alpha in fragment, and we need src-alpha just this time. 273 } else { 274 // alpha-src will be needed for other components 275 if (!mBlendFactorCached || mBlendFactorCached==f) { 276 src_alpha = mAlphaSource; 277 factor = mAlphaSource; 278 factor.flags &= ~CORRUPTIBLE; 279 // we already computed the blend factor before, nothing to do. 280 if (mBlendFactorCached) 281 return; 282 // this is the first time, make sure to compute the blend 283 // factor properly. 284 mBlendFactorCached = f; 285 break; 286 } else { 287 // we have a cached alpha blend factor, but we want another one, 288 // this should really not happen because by construction, 289 // we cannot have BOTH source and destination 290 // blend factors use ALPHA *and* ONE_MINUS_ALPHA (because 291 // the blending stage uses the f/(1-f) optimization 292 293 // for completeness, we handle this case though. Since there 294 // are only 2 choices, this meens we want "the other one" 295 // (1-factor) 296 factor = mAlphaSource; 297 factor.flags &= ~CORRUPTIBLE; 298 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s))); 299 mBlendFactorCached = f; 300 return; 301 } 302 } 303 // fall-through... 304 case GGL_ONE_MINUS_DST_COLOR: 305 case GGL_DST_COLOR: 306 case GGL_ONE_MINUS_SRC_COLOR: 307 case GGL_SRC_COLOR: 308 case GGL_ONE_MINUS_DST_ALPHA: 309 case GGL_DST_ALPHA: 310 case GGL_SRC_ALPHA_SATURATE: 311 // help us find out what register we can use for the blend-factor 312 // CORRUPTIBLE registers are chosen first, or a new one is allocated. 313 if (fragment.flags & CORRUPTIBLE) { 314 factor.setTo(fragment.reg, 32, CORRUPTIBLE); 315 fragment.flags &= ~CORRUPTIBLE; 316 } else if (fb.flags & CORRUPTIBLE) { 317 factor.setTo(fb.reg, 32, CORRUPTIBLE); 318 fb.flags &= ~CORRUPTIBLE; 319 } else { 320 factor.setTo(scratches.obtain(), 32, CORRUPTIBLE); 321 } 322 break; 323 } 324 325 // XXX: doesn't work if size==1 326 327 switch(f) { 328 case GGL_ONE_MINUS_DST_COLOR: 329 case GGL_DST_COLOR: 330 factor.s = fb.s; 331 ADD(AL, 0, factor.reg, fb.reg, reg_imm(fb.reg, LSR, fb.s-1)); 332 break; 333 case GGL_ONE_MINUS_SRC_COLOR: 334 case GGL_SRC_COLOR: 335 factor.s = fragment.s; 336 ADD(AL, 0, factor.reg, fragment.reg, 337 reg_imm(fragment.reg, LSR, fragment.s-1)); 338 break; 339 case GGL_ONE_MINUS_SRC_ALPHA: 340 case GGL_SRC_ALPHA: 341 factor.s = src_alpha.s; 342 ADD(AL, 0, factor.reg, src_alpha.reg, 343 reg_imm(src_alpha.reg, LSR, src_alpha.s-1)); 344 break; 345 case GGL_ONE_MINUS_DST_ALPHA: 346 case GGL_DST_ALPHA: 347 // XXX: should be precomputed 348 extract(factor, dst_pixel, GGLFormat::ALPHA); 349 ADD(AL, 0, factor.reg, factor.reg, 350 reg_imm(factor.reg, LSR, factor.s-1)); 351 break; 352 case GGL_SRC_ALPHA_SATURATE: 353 // XXX: should be precomputed 354 // XXX: f = min(As, 1-Ad) 355 // btw, we're guaranteed that Ad's size is <= 8, because 356 // it's extracted from the framebuffer 357 break; 358 } 359 360 switch(f) { 361 case GGL_ONE_MINUS_DST_COLOR: 362 case GGL_ONE_MINUS_SRC_COLOR: 363 case GGL_ONE_MINUS_DST_ALPHA: 364 case GGL_ONE_MINUS_SRC_ALPHA: 365 RSB(AL, 0, factor.reg, factor.reg, imm((1<<factor.s))); 366 } 367 368 // don't need more than 8-bits for the blend factor 369 // and this will prevent overflows in the multiplies later 370 if (factor.s > 8) { 371 MOV(AL, 0, factor.reg, reg_imm(factor.reg, LSR, factor.s-8)); 372 factor.s = 8; 373 } 374 } 375 376 int GGLAssembler::blending_codes(int fs, int fd) 377 { 378 int blending = 0; 379 switch(fs) { 380 case GGL_ONE: 381 blending |= BLEND_SRC; 382 break; 383 384 case GGL_ONE_MINUS_DST_COLOR: 385 case GGL_DST_COLOR: 386 blending |= FACTOR_DST|BLEND_SRC; 387 break; 388 case GGL_ONE_MINUS_DST_ALPHA: 389 case GGL_DST_ALPHA: 390 // no need to extract 'component' from the destination 391 // for the blend factor, because we need ALPHA only. 392 blending |= BLEND_SRC; 393 break; 394 395 case GGL_ONE_MINUS_SRC_COLOR: 396 case GGL_SRC_COLOR: 397 blending |= FACTOR_SRC|BLEND_SRC; 398 break; 399 case GGL_ONE_MINUS_SRC_ALPHA: 400 case GGL_SRC_ALPHA: 401 case GGL_SRC_ALPHA_SATURATE: 402 blending |= FACTOR_SRC|BLEND_SRC; 403 break; 404 } 405 switch(fd) { 406 case GGL_ONE: 407 blending |= BLEND_DST; 408 break; 409 410 case GGL_ONE_MINUS_DST_COLOR: 411 case GGL_DST_COLOR: 412 blending |= FACTOR_DST|BLEND_DST; 413 break; 414 case GGL_ONE_MINUS_DST_ALPHA: 415 case GGL_DST_ALPHA: 416 blending |= FACTOR_DST|BLEND_DST; 417 break; 418 419 case GGL_ONE_MINUS_SRC_COLOR: 420 case GGL_SRC_COLOR: 421 blending |= FACTOR_SRC|BLEND_DST; 422 break; 423 case GGL_ONE_MINUS_SRC_ALPHA: 424 case GGL_SRC_ALPHA: 425 // no need to extract 'component' from the source 426 // for the blend factor, because we need ALPHA only. 427 blending |= BLEND_DST; 428 break; 429 } 430 return blending; 431 } 432 433 // --------------------------------------------------------------------------- 434 435 void GGLAssembler::build_blendFOneMinusF( 436 component_t& temp, 437 const integer_t& factor, 438 const integer_t& fragment, 439 const integer_t& fb) 440 { 441 // R = S*f + D*(1-f) = (S-D)*f + D 442 Scratch scratches(registerFile()); 443 // compute S-D 444 integer_t diff(fragment.flags & CORRUPTIBLE ? 445 fragment.reg : scratches.obtain(), fb.size(), CORRUPTIBLE); 446 const int shift = fragment.size() - fb.size(); 447 if (shift>0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift)); 448 else if (shift<0) RSB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift)); 449 else RSB(AL, 0, diff.reg, fb.reg, fragment.reg); 450 mul_factor_add(temp, diff, factor, component_t(fb)); 451 } 452 453 void GGLAssembler::build_blendOneMinusFF( 454 component_t& temp, 455 const integer_t& factor, 456 const integer_t& fragment, 457 const integer_t& fb) 458 { 459 // R = S*f + D*(1-f) = (S-D)*f + D 460 Scratch scratches(registerFile()); 461 // compute D-S 462 integer_t diff(fb.flags & CORRUPTIBLE ? 463 fb.reg : scratches.obtain(), fb.size(), CORRUPTIBLE); 464 const int shift = fragment.size() - fb.size(); 465 if (shift>0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSR, shift)); 466 else if (shift<0) SUB(AL, 0, diff.reg, fb.reg, reg_imm(fragment.reg, LSL,-shift)); 467 else SUB(AL, 0, diff.reg, fb.reg, fragment.reg); 468 mul_factor_add(temp, diff, factor, component_t(fragment)); 469 } 470 471 // --------------------------------------------------------------------------- 472 473 void GGLAssembler::mul_factor( component_t& d, 474 const integer_t& v, 475 const integer_t& f) 476 { 477 int vs = v.size(); 478 int fs = f.size(); 479 int ms = vs+fs; 480 481 // XXX: we could have special cases for 1 bit mul 482 483 // all this code below to use the best multiply instruction 484 // wrt the parameters size. We take advantage of the fact 485 // that the 16-bits multiplies allow a 16-bit shift 486 // The trick is that we just make sure that we have at least 8-bits 487 // per component (which is enough for a 8 bits display). 488 489 int xy; 490 int vshift = 0; 491 int fshift = 0; 492 int smulw = 0; 493 494 if (vs<16) { 495 if (fs<16) { 496 xy = xyBB; 497 } else if (GGL_BETWEEN(fs, 24, 31)) { 498 ms -= 16; 499 xy = xyTB; 500 } else { 501 // eg: 15 * 18 -> 15 * 15 502 fshift = fs - 15; 503 ms -= fshift; 504 xy = xyBB; 505 } 506 } else if (GGL_BETWEEN(vs, 24, 31)) { 507 if (fs<16) { 508 ms -= 16; 509 xy = xyTB; 510 } else if (GGL_BETWEEN(fs, 24, 31)) { 511 ms -= 32; 512 xy = xyTT; 513 } else { 514 // eg: 24 * 18 -> 8 * 18 515 fshift = fs - 15; 516 ms -= 16 + fshift; 517 xy = xyTB; 518 } 519 } else { 520 if (fs<16) { 521 // eg: 18 * 15 -> 15 * 15 522 vshift = vs - 15; 523 ms -= vshift; 524 xy = xyBB; 525 } else if (GGL_BETWEEN(fs, 24, 31)) { 526 // eg: 18 * 24 -> 15 * 8 527 vshift = vs - 15; 528 ms -= 16 + vshift; 529 xy = xyBT; 530 } else { 531 // eg: 18 * 18 -> (15 * 18)>>16 532 fshift = fs - 15; 533 ms -= 16 + fshift; 534 xy = yB; //XXX SMULWB 535 smulw = 1; 536 } 537 } 538 539 ALOGE_IF(ms>=32, "mul_factor overflow vs=%d, fs=%d", vs, fs); 540 541 int vreg = v.reg; 542 int freg = f.reg; 543 if (vshift) { 544 MOV(AL, 0, d.reg, reg_imm(vreg, LSR, vshift)); 545 vreg = d.reg; 546 } 547 if (fshift) { 548 MOV(AL, 0, d.reg, reg_imm(vreg, LSR, fshift)); 549 freg = d.reg; 550 } 551 if (smulw) SMULW(AL, xy, d.reg, vreg, freg); 552 else SMUL(AL, xy, d.reg, vreg, freg); 553 554 555 d.h = ms; 556 if (mDithering) { 557 d.l = 0; 558 } else { 559 d.l = fs; 560 d.flags |= CLEAR_LO; 561 } 562 } 563 564 void GGLAssembler::mul_factor_add( component_t& d, 565 const integer_t& v, 566 const integer_t& f, 567 const component_t& a) 568 { 569 // XXX: we could have special cases for 1 bit mul 570 Scratch scratches(registerFile()); 571 572 int vs = v.size(); 573 int fs = f.size(); 574 int as = a.h; 575 int ms = vs+fs; 576 577 ALOGE_IF(ms>=32, "mul_factor_add overflow vs=%d, fs=%d, as=%d", vs, fs, as); 578 579 integer_t add(a.reg, a.h, a.flags); 580 581 // 'a' is a component_t but it is guaranteed to have 582 // its high bits set to 0. However in the dithering case, 583 // we can't get away with truncating the potentially bad bits 584 // so extraction is needed. 585 586 if ((mDithering) && (a.size() < ms)) { 587 // we need to expand a 588 if (!(a.flags & CORRUPTIBLE)) { 589 // ... but it's not corruptible, so we need to pick a 590 // temporary register. 591 // Try to uses the destination register first (it's likely 592 // to be usable, unless it aliases an input). 593 if (d.reg!=a.reg && d.reg!=v.reg && d.reg!=f.reg) { 594 add.reg = d.reg; 595 } else { 596 add.reg = scratches.obtain(); 597 } 598 } 599 expand(add, a, ms); // extracts and expands 600 as = ms; 601 } 602 603 if (ms == as) { 604 if (vs<16 && fs<16) SMLABB(AL, d.reg, v.reg, f.reg, add.reg); 605 else MLA(AL, 0, d.reg, v.reg, f.reg, add.reg); 606 } else { 607 int temp = d.reg; 608 if (temp == add.reg) { 609 // the mul will modify add.reg, we need an intermediary reg 610 if (v.flags & CORRUPTIBLE) temp = v.reg; 611 else if (f.flags & CORRUPTIBLE) temp = f.reg; 612 else temp = scratches.obtain(); 613 } 614 615 if (vs<16 && fs<16) SMULBB(AL, temp, v.reg, f.reg); 616 else MUL(AL, 0, temp, v.reg, f.reg); 617 618 if (ms>as) { 619 ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSL, ms-as)); 620 } else if (ms<as) { 621 // not sure if we should expand the mul instead? 622 ADD(AL, 0, d.reg, temp, reg_imm(add.reg, LSR, as-ms)); 623 } 624 } 625 626 d.h = ms; 627 if (mDithering) { 628 d.l = a.l; 629 } else { 630 d.l = fs>a.l ? fs : a.l; 631 d.flags |= CLEAR_LO; 632 } 633 } 634 635 void GGLAssembler::component_add(component_t& d, 636 const integer_t& dst, const integer_t& src) 637 { 638 // here we're guaranteed that fragment.size() >= fb.size() 639 const int shift = src.size() - dst.size(); 640 if (!shift) { 641 ADD(AL, 0, d.reg, src.reg, dst.reg); 642 } else { 643 ADD(AL, 0, d.reg, src.reg, reg_imm(dst.reg, LSL, shift)); 644 } 645 646 d.h = src.size(); 647 if (mDithering) { 648 d.l = 0; 649 } else { 650 d.l = shift; 651 d.flags |= CLEAR_LO; 652 } 653 } 654 655 void GGLAssembler::component_sat(const component_t& v) 656 { 657 const int one = ((1<<v.size())-1)<<v.l; 658 CMP(AL, v.reg, imm( 1<<v.h )); 659 if (isValidImmediate(one)) { 660 MOV(HS, 0, v.reg, imm( one )); 661 } else if (isValidImmediate(~one)) { 662 MVN(HS, 0, v.reg, imm( ~one )); 663 } else { 664 MOV(HS, 0, v.reg, imm( 1<<v.h )); 665 SUB(HS, 0, v.reg, v.reg, imm( 1<<v.l )); 666 } 667 } 668 669 // ---------------------------------------------------------------------------- 670 671 }; // namespace android 672 673