1 /* libs/pixelflinger/scanline.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 19 #define LOG_TAG "pixelflinger" 20 21 #include <assert.h> 22 #include <stdlib.h> 23 #include <stdio.h> 24 #include <string.h> 25 26 #include <cutils/memory.h> 27 #include <cutils/log.h> 28 29 #include "buffer.h" 30 #include "scanline.h" 31 32 #include "codeflinger/CodeCache.h" 33 #include "codeflinger/GGLAssembler.h" 34 #include "codeflinger/ARMAssembler.h" 35 //#include "codeflinger/ARMAssemblerOptimizer.h" 36 37 // ---------------------------------------------------------------------------- 38 39 #define ANDROID_CODEGEN_GENERIC 0 // force generic pixel pipeline 40 #define ANDROID_CODEGEN_C 1 // hand-written C, fallback generic 41 #define ANDROID_CODEGEN_ASM 2 // hand-written asm, fallback generic 42 #define ANDROID_CODEGEN_GENERATED 3 // hand-written asm, fallback codegen 43 44 #ifdef NDEBUG 45 # define ANDROID_RELEASE 46 # define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED 47 #else 48 # define ANDROID_DEBUG 49 # define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED 50 #endif 51 52 #if defined(__arm__) 53 # define ANDROID_ARM_CODEGEN 1 54 #else 55 # define ANDROID_ARM_CODEGEN 0 56 #endif 57 58 #define DEBUG__CODEGEN_ONLY 0 59 60 61 #define ASSEMBLY_SCRATCH_SIZE 2048 62 63 // ---------------------------------------------------------------------------- 64 namespace android { 65 // ---------------------------------------------------------------------------- 66 67 static void init_y(context_t*, int32_t); 68 static void init_y_noop(context_t*, int32_t); 69 static void init_y_packed(context_t*, int32_t); 70 static void init_y_error(context_t*, int32_t); 71 72 static void step_y__generic(context_t* c); 73 static void step_y__nop(context_t*); 74 static void step_y__smooth(context_t* c); 75 static void step_y__tmu(context_t* c); 76 static void step_y__w(context_t* c); 77 78 static void scanline(context_t* c); 79 static void scanline_perspective(context_t* c); 80 static void scanline_perspective_single(context_t* c); 81 static void scanline_t32cb16blend(context_t* c); 82 static void scanline_t32cb16(context_t* c); 83 static void scanline_memcpy(context_t* c); 84 static void scanline_memset8(context_t* c); 85 static void scanline_memset16(context_t* c); 86 static void scanline_memset32(context_t* c); 87 static void scanline_noop(context_t* c); 88 static void scanline_set(context_t* c); 89 static void scanline_clear(context_t* c); 90 91 static void rect_generic(context_t* c, size_t yc); 92 static void rect_memcpy(context_t* c, size_t yc); 93 94 extern "C" void scanline_t32cb16blend_arm(uint16_t*, uint32_t*, size_t); 95 extern "C" void scanline_t32cb16_arm(uint16_t *dst, uint32_t *src, size_t ct); 96 97 // ---------------------------------------------------------------------------- 98 99 struct shortcut_t { 100 needs_filter_t filter; 101 const char* desc; 102 void (*scanline)(context_t*); 103 void (*init_y)(context_t*, int32_t); 104 }; 105 106 // Keep in sync with needs 107 static shortcut_t shortcuts[] = { 108 { { { 0x03515104, 0x00000077, { 0x00000A01, 0x00000000 } }, 109 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 110 "565 fb, 8888 tx, blend", scanline_t32cb16blend, init_y_noop }, 111 { { { 0x03010104, 0x00000077, { 0x00000A01, 0x00000000 } }, 112 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 113 "565 fb, 8888 tx", scanline_t32cb16, init_y_noop }, 114 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } }, 115 { 0x00000000, 0x00000007, { 0x00000000, 0x00000000 } } }, 116 "(nop) alpha test", scanline_noop, init_y_noop }, 117 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } }, 118 { 0x00000000, 0x00000070, { 0x00000000, 0x00000000 } } }, 119 "(nop) depth test", scanline_noop, init_y_noop }, 120 { { { 0x05000000, 0x00000000, { 0x00000000, 0x00000000 } }, 121 { 0x0F000000, 0x00000080, { 0x00000000, 0x00000000 } } }, 122 "(nop) logic_op", scanline_noop, init_y_noop }, 123 { { { 0xF0000000, 0x00000000, { 0x00000000, 0x00000000 } }, 124 { 0xF0000000, 0x00000080, { 0x00000000, 0x00000000 } } }, 125 "(nop) color mask", scanline_noop, init_y_noop }, 126 { { { 0x0F000000, 0x00000077, { 0x00000000, 0x00000000 } }, 127 { 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } }, 128 "(set) logic_op", scanline_set, init_y_noop }, 129 { { { 0x00000000, 0x00000077, { 0x00000000, 0x00000000 } }, 130 { 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } }, 131 "(clear) logic_op", scanline_clear, init_y_noop }, 132 { { { 0x03000000, 0x00000077, { 0x00000000, 0x00000000 } }, 133 { 0xFFFFFF00, 0x000000F7, { 0x00000000, 0x00000000 } } }, 134 "(clear) blending 0/0", scanline_clear, init_y_noop }, 135 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } }, 136 { 0x0000003F, 0x00000000, { 0x00000000, 0x00000000 } } }, 137 "(error) invalid color-buffer format", scanline_noop, init_y_error }, 138 }; 139 static const needs_filter_t noblend1to1 = { 140 // (disregard dithering, see below) 141 { 0x03010100, 0x00000077, { 0x00000A00, 0x00000000 } }, 142 { 0xFFFFFFC0, 0xFFFFFEFF, { 0xFFFFFFC0, 0x0000003F } } 143 }; 144 static const needs_filter_t fill16noblend = { 145 { 0x03010100, 0x00000077, { 0x00000000, 0x00000000 } }, 146 { 0xFFFFFFC0, 0xFFFFFFFF, { 0x0000003F, 0x0000003F } } 147 }; 148 149 // ---------------------------------------------------------------------------- 150 151 #if ANDROID_ARM_CODEGEN 152 static CodeCache gCodeCache(12 * 1024); 153 154 class ScanlineAssembly : public Assembly { 155 AssemblyKey<needs_t> mKey; 156 public: 157 ScanlineAssembly(needs_t needs, size_t size) 158 : Assembly(size), mKey(needs) { } 159 const AssemblyKey<needs_t>& key() const { return mKey; } 160 }; 161 #endif 162 163 // ---------------------------------------------------------------------------- 164 165 void ggl_init_scanline(context_t* c) 166 { 167 c->init_y = init_y; 168 c->step_y = step_y__generic; 169 c->scanline = scanline; 170 } 171 172 void ggl_uninit_scanline(context_t* c) 173 { 174 if (c->state.buffers.coverage) 175 free(c->state.buffers.coverage); 176 #if ANDROID_ARM_CODEGEN 177 if (c->scanline_as) 178 c->scanline_as->decStrong(c); 179 #endif 180 } 181 182 // ---------------------------------------------------------------------------- 183 184 static void pick_scanline(context_t* c) 185 { 186 #if (!defined(DEBUG__CODEGEN_ONLY) || (DEBUG__CODEGEN_ONLY == 0)) 187 188 #if ANDROID_CODEGEN == ANDROID_CODEGEN_GENERIC 189 c->init_y = init_y; 190 c->step_y = step_y__generic; 191 c->scanline = scanline; 192 return; 193 #endif 194 195 //printf("*** needs [%08lx:%08lx:%08lx:%08lx]\n", 196 // c->state.needs.n, c->state.needs.p, 197 // c->state.needs.t[0], c->state.needs.t[1]); 198 199 // first handle the special case that we cannot test with a filter 200 const uint32_t cb_format = GGL_READ_NEEDS(CB_FORMAT, c->state.needs.n); 201 if (GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0]) == cb_format) { 202 if (c->state.needs.match(noblend1to1)) { 203 // this will match regardless of dithering state, since both 204 // src and dest have the same format anyway, there is no dithering 205 // to be done. 206 const GGLFormat* f = 207 &(c->formats[GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0])]); 208 if ((f->components == GGL_RGB) || 209 (f->components == GGL_RGBA) || 210 (f->components == GGL_LUMINANCE) || 211 (f->components == GGL_LUMINANCE_ALPHA)) 212 { 213 // format must have all of RGB components 214 // (so the current color doesn't show through) 215 c->scanline = scanline_memcpy; 216 c->init_y = init_y_noop; 217 return; 218 } 219 } 220 } 221 222 if (c->state.needs.match(fill16noblend)) { 223 c->init_y = init_y_packed; 224 switch (c->formats[cb_format].size) { 225 case 1: c->scanline = scanline_memset8; return; 226 case 2: c->scanline = scanline_memset16; return; 227 case 4: c->scanline = scanline_memset32; return; 228 } 229 } 230 231 const int numFilters = sizeof(shortcuts)/sizeof(shortcut_t); 232 for (int i=0 ; i<numFilters ; i++) { 233 if (c->state.needs.match(shortcuts[i].filter)) { 234 c->scanline = shortcuts[i].scanline; 235 c->init_y = shortcuts[i].init_y; 236 return; 237 } 238 } 239 240 #endif // DEBUG__CODEGEN_ONLY 241 242 c->init_y = init_y; 243 c->step_y = step_y__generic; 244 245 #if ANDROID_ARM_CODEGEN 246 // we're going to have to generate some code... 247 // here, generate code for our pixel pipeline 248 const AssemblyKey<needs_t> key(c->state.needs); 249 sp<Assembly> assembly = gCodeCache.lookup(key); 250 if (assembly == 0) { 251 // create a new assembly region 252 sp<ScanlineAssembly> a = new ScanlineAssembly(c->state.needs, 253 ASSEMBLY_SCRATCH_SIZE); 254 // initialize our assembler 255 GGLAssembler assembler( new ARMAssembler(a) ); 256 //GGLAssembler assembler( 257 // new ARMAssemblerOptimizer(new ARMAssembler(a)) ); 258 // generate the scanline code for the given needs 259 int err = assembler.scanline(c->state.needs, c); 260 if (ggl_likely(!err)) { 261 // finally, cache this assembly 262 err = gCodeCache.cache(a->key(), a); 263 } 264 if (ggl_unlikely(err)) { 265 LOGE("error generating or caching assembly. Reverting to NOP."); 266 c->scanline = scanline_noop; 267 c->init_y = init_y_noop; 268 c->step_y = step_y__nop; 269 return; 270 } 271 assembly = a; 272 } 273 274 // release the previous assembly 275 if (c->scanline_as) { 276 c->scanline_as->decStrong(c); 277 } 278 279 //LOGI("using generated pixel-pipeline"); 280 c->scanline_as = assembly.get(); 281 c->scanline_as->incStrong(c); // hold on to assembly 282 c->scanline = (void(*)(context_t* c))assembly->base(); 283 #else 284 // LOGW("using generic (slow) pixel-pipeline"); 285 c->scanline = scanline; 286 #endif 287 } 288 289 void ggl_pick_scanline(context_t* c) 290 { 291 pick_scanline(c); 292 if ((c->state.enables & GGL_ENABLE_W) && 293 (c->state.enables & GGL_ENABLE_TMUS)) 294 { 295 c->span = c->scanline; 296 c->scanline = scanline_perspective; 297 if (!(c->state.enabled_tmu & (c->state.enabled_tmu - 1))) { 298 // only one TMU enabled 299 c->scanline = scanline_perspective_single; 300 } 301 } 302 } 303 304 // ---------------------------------------------------------------------------- 305 306 static void blending(context_t* c, pixel_t* fragment, pixel_t* fb); 307 static void blend_factor(context_t* c, pixel_t* r, uint32_t factor, 308 const pixel_t* src, const pixel_t* dst); 309 static void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv); 310 311 #if ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED) 312 313 // no need to compile the generic-pipeline, it can't be reached 314 void scanline(context_t*) 315 { 316 } 317 318 #else 319 320 void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv) 321 { 322 if (su && sv) { 323 if (su > sv) { 324 v = ggl_expand(v, sv, su); 325 sv = su; 326 } else if (su < sv) { 327 u = ggl_expand(u, su, sv); 328 su = sv; 329 } 330 } 331 } 332 333 void blending(context_t* c, pixel_t* fragment, pixel_t* fb) 334 { 335 rescale(fragment->c[0], fragment->s[0], fb->c[0], fb->s[0]); 336 rescale(fragment->c[1], fragment->s[1], fb->c[1], fb->s[1]); 337 rescale(fragment->c[2], fragment->s[2], fb->c[2], fb->s[2]); 338 rescale(fragment->c[3], fragment->s[3], fb->c[3], fb->s[3]); 339 340 pixel_t sf, df; 341 blend_factor(c, &sf, c->state.blend.src, fragment, fb); 342 blend_factor(c, &df, c->state.blend.dst, fragment, fb); 343 344 fragment->c[1] = 345 gglMulAddx(fragment->c[1], sf.c[1], gglMulx(fb->c[1], df.c[1])); 346 fragment->c[2] = 347 gglMulAddx(fragment->c[2], sf.c[2], gglMulx(fb->c[2], df.c[2])); 348 fragment->c[3] = 349 gglMulAddx(fragment->c[3], sf.c[3], gglMulx(fb->c[3], df.c[3])); 350 351 if (c->state.blend.alpha_separate) { 352 blend_factor(c, &sf, c->state.blend.src_alpha, fragment, fb); 353 blend_factor(c, &df, c->state.blend.dst_alpha, fragment, fb); 354 } 355 356 fragment->c[0] = 357 gglMulAddx(fragment->c[0], sf.c[0], gglMulx(fb->c[0], df.c[0])); 358 359 // clamp to 1.0 360 if (fragment->c[0] >= (1LU<<fragment->s[0])) 361 fragment->c[0] = (1<<fragment->s[0])-1; 362 if (fragment->c[1] >= (1LU<<fragment->s[1])) 363 fragment->c[1] = (1<<fragment->s[1])-1; 364 if (fragment->c[2] >= (1LU<<fragment->s[2])) 365 fragment->c[2] = (1<<fragment->s[2])-1; 366 if (fragment->c[3] >= (1LU<<fragment->s[3])) 367 fragment->c[3] = (1<<fragment->s[3])-1; 368 } 369 370 static inline int blendfactor(uint32_t x, uint32_t size, uint32_t def = 0) 371 { 372 if (!size) 373 return def; 374 375 // scale to 16 bits 376 if (size > 16) { 377 x >>= (size - 16); 378 } else if (size < 16) { 379 x = ggl_expand(x, size, 16); 380 } 381 x += x >> 15; 382 return x; 383 } 384 385 void blend_factor(context_t* c, pixel_t* r, 386 uint32_t factor, const pixel_t* src, const pixel_t* dst) 387 { 388 switch (factor) { 389 case GGL_ZERO: 390 r->c[1] = 391 r->c[2] = 392 r->c[3] = 393 r->c[0] = 0; 394 break; 395 case GGL_ONE: 396 r->c[1] = 397 r->c[2] = 398 r->c[3] = 399 r->c[0] = FIXED_ONE; 400 break; 401 case GGL_DST_COLOR: 402 r->c[1] = blendfactor(dst->c[1], dst->s[1]); 403 r->c[2] = blendfactor(dst->c[2], dst->s[2]); 404 r->c[3] = blendfactor(dst->c[3], dst->s[3]); 405 r->c[0] = blendfactor(dst->c[0], dst->s[0]); 406 break; 407 case GGL_SRC_COLOR: 408 r->c[1] = blendfactor(src->c[1], src->s[1]); 409 r->c[2] = blendfactor(src->c[2], src->s[2]); 410 r->c[3] = blendfactor(src->c[3], src->s[3]); 411 r->c[0] = blendfactor(src->c[0], src->s[0]); 412 break; 413 case GGL_ONE_MINUS_DST_COLOR: 414 r->c[1] = FIXED_ONE - blendfactor(dst->c[1], dst->s[1]); 415 r->c[2] = FIXED_ONE - blendfactor(dst->c[2], dst->s[2]); 416 r->c[3] = FIXED_ONE - blendfactor(dst->c[3], dst->s[3]); 417 r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0]); 418 break; 419 case GGL_ONE_MINUS_SRC_COLOR: 420 r->c[1] = FIXED_ONE - blendfactor(src->c[1], src->s[1]); 421 r->c[2] = FIXED_ONE - blendfactor(src->c[2], src->s[2]); 422 r->c[3] = FIXED_ONE - blendfactor(src->c[3], src->s[3]); 423 r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0]); 424 break; 425 case GGL_SRC_ALPHA: 426 r->c[1] = 427 r->c[2] = 428 r->c[3] = 429 r->c[0] = blendfactor(src->c[0], src->s[0], FIXED_ONE); 430 break; 431 case GGL_ONE_MINUS_SRC_ALPHA: 432 r->c[1] = 433 r->c[2] = 434 r->c[3] = 435 r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0], FIXED_ONE); 436 break; 437 case GGL_DST_ALPHA: 438 r->c[1] = 439 r->c[2] = 440 r->c[3] = 441 r->c[0] = blendfactor(dst->c[0], dst->s[0], FIXED_ONE); 442 break; 443 case GGL_ONE_MINUS_DST_ALPHA: 444 r->c[1] = 445 r->c[2] = 446 r->c[3] = 447 r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0], FIXED_ONE); 448 break; 449 case GGL_SRC_ALPHA_SATURATE: 450 // XXX: GGL_SRC_ALPHA_SATURATE 451 break; 452 } 453 } 454 455 static GGLfixed wrapping(int32_t coord, uint32_t size, int tx_wrap) 456 { 457 GGLfixed d; 458 if (tx_wrap == GGL_REPEAT) { 459 d = (uint32_t(coord)>>16) * size; 460 } else if (tx_wrap == GGL_CLAMP) { // CLAMP_TO_EDGE semantics 461 const GGLfixed clamp_min = FIXED_HALF; 462 const GGLfixed clamp_max = (size << 16) - FIXED_HALF; 463 if (coord < clamp_min) coord = clamp_min; 464 if (coord > clamp_max) coord = clamp_max; 465 d = coord; 466 } else { // 1:1 467 const GGLfixed clamp_min = 0; 468 const GGLfixed clamp_max = (size << 16); 469 if (coord < clamp_min) coord = clamp_min; 470 if (coord > clamp_max) coord = clamp_max; 471 d = coord; 472 } 473 return d; 474 } 475 476 static inline 477 GGLcolor ADJUST_COLOR_ITERATOR(GGLcolor v, GGLcolor dvdx, int len) 478 { 479 const int32_t end = dvdx * (len-1) + v; 480 if (end < 0) 481 v -= end; 482 v &= ~(v>>31); 483 return v; 484 } 485 486 void scanline(context_t* c) 487 { 488 const uint32_t enables = c->state.enables; 489 const int xs = c->iterators.xl; 490 const int x1 = c->iterators.xr; 491 int xc = x1 - xs; 492 const int16_t* covPtr = c->state.buffers.coverage + xs; 493 494 // All iterated values are sampled at the pixel center 495 496 // reset iterators for that scanline... 497 GGLcolor r, g, b, a; 498 iterators_t& ci = c->iterators; 499 if (enables & GGL_ENABLE_SMOOTH) { 500 r = (xs * c->shade.drdx) + ci.ydrdy; 501 g = (xs * c->shade.dgdx) + ci.ydgdy; 502 b = (xs * c->shade.dbdx) + ci.ydbdy; 503 a = (xs * c->shade.dadx) + ci.ydady; 504 r = ADJUST_COLOR_ITERATOR(r, c->shade.drdx, xc); 505 g = ADJUST_COLOR_ITERATOR(g, c->shade.dgdx, xc); 506 b = ADJUST_COLOR_ITERATOR(b, c->shade.dbdx, xc); 507 a = ADJUST_COLOR_ITERATOR(a, c->shade.dadx, xc); 508 } else { 509 r = ci.ydrdy; 510 g = ci.ydgdy; 511 b = ci.ydbdy; 512 a = ci.ydady; 513 } 514 515 // z iterators are 1.31 516 GGLfixed z = (xs * c->shade.dzdx) + ci.ydzdy; 517 GGLfixed f = (xs * c->shade.dfdx) + ci.ydfdy; 518 519 struct { 520 GGLfixed s, t; 521 } tc[GGL_TEXTURE_UNIT_COUNT]; 522 if (enables & GGL_ENABLE_TMUS) { 523 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 524 if (c->state.texture[i].enable) { 525 texture_iterators_t& ti = c->state.texture[i].iterators; 526 if (enables & GGL_ENABLE_W) { 527 tc[i].s = ti.ydsdy; 528 tc[i].t = ti.ydtdy; 529 } else { 530 tc[i].s = (xs * ti.dsdx) + ti.ydsdy; 531 tc[i].t = (xs * ti.dtdx) + ti.ydtdy; 532 } 533 } 534 } 535 } 536 537 pixel_t fragment; 538 pixel_t texel; 539 pixel_t fb; 540 541 uint32_t x = xs; 542 uint32_t y = c->iterators.y; 543 544 while (xc--) { 545 546 { // just a scope 547 548 // read color (convert to 8 bits by keeping only the integer part) 549 fragment.s[1] = fragment.s[2] = 550 fragment.s[3] = fragment.s[0] = 8; 551 fragment.c[1] = r >> (GGL_COLOR_BITS-8); 552 fragment.c[2] = g >> (GGL_COLOR_BITS-8); 553 fragment.c[3] = b >> (GGL_COLOR_BITS-8); 554 fragment.c[0] = a >> (GGL_COLOR_BITS-8); 555 556 // texturing 557 if (enables & GGL_ENABLE_TMUS) { 558 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 559 texture_t& tx = c->state.texture[i]; 560 if (!tx.enable) 561 continue; 562 texture_iterators_t& ti = tx.iterators; 563 int32_t u, v; 564 565 // s-coordinate 566 if (tx.s_coord != GGL_ONE_TO_ONE) { 567 const int w = tx.surface.width; 568 u = wrapping(tc[i].s, w, tx.s_wrap); 569 tc[i].s += ti.dsdx; 570 } else { 571 u = (((tx.shade.is0>>16) + x)<<16) + FIXED_HALF; 572 } 573 574 // t-coordinate 575 if (tx.t_coord != GGL_ONE_TO_ONE) { 576 const int h = tx.surface.height; 577 v = wrapping(tc[i].t, h, tx.t_wrap); 578 tc[i].t += ti.dtdx; 579 } else { 580 v = (((tx.shade.it0>>16) + y)<<16) + FIXED_HALF; 581 } 582 583 // read texture 584 if (tx.mag_filter == GGL_NEAREST && 585 tx.min_filter == GGL_NEAREST) 586 { 587 u >>= 16; 588 v >>= 16; 589 tx.surface.read(&tx.surface, c, u, v, &texel); 590 } else { 591 const int w = tx.surface.width; 592 const int h = tx.surface.height; 593 u -= FIXED_HALF; 594 v -= FIXED_HALF; 595 int u0 = u >> 16; 596 int v0 = v >> 16; 597 int u1 = u0 + 1; 598 int v1 = v0 + 1; 599 if (tx.s_wrap == GGL_REPEAT) { 600 if (u0<0) u0 += w; 601 if (u1<0) u1 += w; 602 if (u0>=w) u0 -= w; 603 if (u1>=w) u1 -= w; 604 } else { 605 if (u0<0) u0 = 0; 606 if (u1<0) u1 = 0; 607 if (u0>=w) u0 = w-1; 608 if (u1>=w) u1 = w-1; 609 } 610 if (tx.t_wrap == GGL_REPEAT) { 611 if (v0<0) v0 += h; 612 if (v1<0) v1 += h; 613 if (v0>=h) v0 -= h; 614 if (v1>=h) v1 -= h; 615 } else { 616 if (v0<0) v0 = 0; 617 if (v1<0) v1 = 0; 618 if (v0>=h) v0 = h-1; 619 if (v1>=h) v1 = h-1; 620 } 621 pixel_t texels[4]; 622 uint32_t mm[4]; 623 tx.surface.read(&tx.surface, c, u0, v0, &texels[0]); 624 tx.surface.read(&tx.surface, c, u0, v1, &texels[1]); 625 tx.surface.read(&tx.surface, c, u1, v0, &texels[2]); 626 tx.surface.read(&tx.surface, c, u1, v1, &texels[3]); 627 u = (u >> 12) & 0xF; 628 v = (v >> 12) & 0xF; 629 u += u>>3; 630 v += v>>3; 631 mm[0] = (0x10 - u) * (0x10 - v); 632 mm[1] = (0x10 - u) * v; 633 mm[2] = u * (0x10 - v); 634 mm[3] = 0x100 - (mm[0] + mm[1] + mm[2]); 635 for (int j=0 ; j<4 ; j++) { 636 texel.s[j] = texels[0].s[j]; 637 if (!texel.s[j]) continue; 638 texel.s[j] += 8; 639 texel.c[j] = texels[0].c[j]*mm[0] + 640 texels[1].c[j]*mm[1] + 641 texels[2].c[j]*mm[2] + 642 texels[3].c[j]*mm[3] ; 643 } 644 } 645 646 // Texture environnement... 647 for (int j=0 ; j<4 ; j++) { 648 uint32_t& Cf = fragment.c[j]; 649 uint32_t& Ct = texel.c[j]; 650 uint8_t& sf = fragment.s[j]; 651 uint8_t& st = texel.s[j]; 652 uint32_t At = texel.c[0]; 653 uint8_t sat = texel.s[0]; 654 switch (tx.env) { 655 case GGL_REPLACE: 656 if (st) { 657 Cf = Ct; 658 sf = st; 659 } 660 break; 661 case GGL_MODULATE: 662 if (st) { 663 uint32_t factor = Ct + (Ct>>(st-1)); 664 Cf = (Cf * factor) >> st; 665 } 666 break; 667 case GGL_DECAL: 668 if (sat) { 669 rescale(Cf, sf, Ct, st); 670 Cf += ((Ct - Cf) * (At + (At>>(sat-1)))) >> sat; 671 } 672 break; 673 case GGL_BLEND: 674 if (st) { 675 uint32_t Cc = tx.env_color[i]; 676 if (sf>8) Cc = (Cc * ((1<<sf)-1))>>8; 677 else if (sf<8) Cc = (Cc - (Cc>>(8-sf)))>>(8-sf); 678 uint32_t factor = Ct + (Ct>>(st-1)); 679 Cf = ((((1<<st) - factor) * Cf) + Ct*Cc)>>st; 680 } 681 break; 682 case GGL_ADD: 683 if (st) { 684 rescale(Cf, sf, Ct, st); 685 Cf += Ct; 686 } 687 break; 688 } 689 } 690 } 691 } 692 693 // coverage application 694 if (enables & GGL_ENABLE_AA) { 695 int16_t cf = *covPtr++; 696 fragment.c[0] = (int64_t(fragment.c[0]) * cf) >> 15; 697 } 698 699 // alpha-test 700 if (enables & GGL_ENABLE_ALPHA_TEST) { 701 GGLcolor ref = c->state.alpha_test.ref; 702 GGLcolor alpha = (uint64_t(fragment.c[0]) * 703 ((1<<GGL_COLOR_BITS)-1)) / ((1<<fragment.s[0])-1); 704 switch (c->state.alpha_test.func) { 705 case GGL_NEVER: goto discard; 706 case GGL_LESS: if (alpha<ref) break; goto discard; 707 case GGL_EQUAL: if (alpha==ref) break; goto discard; 708 case GGL_LEQUAL: if (alpha<=ref) break; goto discard; 709 case GGL_GREATER: if (alpha>ref) break; goto discard; 710 case GGL_NOTEQUAL: if (alpha!=ref) break; goto discard; 711 case GGL_GEQUAL: if (alpha>=ref) break; goto discard; 712 } 713 } 714 715 // depth test 716 if (c->state.buffers.depth.format) { 717 if (enables & GGL_ENABLE_DEPTH_TEST) { 718 surface_t* cb = &(c->state.buffers.depth); 719 uint16_t* p = (uint16_t*)(cb->data)+(x+(cb->stride*y)); 720 uint16_t zz = uint32_t(z)>>(16); 721 uint16_t depth = *p; 722 switch (c->state.depth_test.func) { 723 case GGL_NEVER: goto discard; 724 case GGL_LESS: if (zz<depth) break; goto discard; 725 case GGL_EQUAL: if (zz==depth) break; goto discard; 726 case GGL_LEQUAL: if (zz<=depth) break; goto discard; 727 case GGL_GREATER: if (zz>depth) break; goto discard; 728 case GGL_NOTEQUAL: if (zz!=depth) break; goto discard; 729 case GGL_GEQUAL: if (zz>=depth) break; goto discard; 730 } 731 // depth buffer is not enabled, if depth-test is not enabled 732 /* 733 fragment.s[1] = fragment.s[2] = 734 fragment.s[3] = fragment.s[0] = 8; 735 fragment.c[1] = 736 fragment.c[2] = 737 fragment.c[3] = 738 fragment.c[0] = 255 - (zz>>8); 739 */ 740 if (c->state.mask.depth) { 741 *p = zz; 742 } 743 } 744 } 745 746 // fog 747 if (enables & GGL_ENABLE_FOG) { 748 for (int i=1 ; i<=3 ; i++) { 749 GGLfixed fc = (c->state.fog.color[i] * 0x10000) / 0xFF; 750 uint32_t& c = fragment.c[i]; 751 uint8_t& s = fragment.s[i]; 752 c = (c * 0x10000) / ((1<<s)-1); 753 c = gglMulAddx(c, f, gglMulx(fc, 0x10000 - f)); 754 s = 16; 755 } 756 } 757 758 // blending 759 if (enables & GGL_ENABLE_BLENDING) { 760 fb.c[1] = fb.c[2] = fb.c[3] = fb.c[0] = 0; // placate valgrind 761 fb.s[1] = fb.s[2] = fb.s[3] = fb.s[0] = 0; 762 c->state.buffers.color.read( 763 &(c->state.buffers.color), c, x, y, &fb); 764 blending( c, &fragment, &fb ); 765 } 766 767 // write 768 c->state.buffers.color.write( 769 &(c->state.buffers.color), c, x, y, &fragment); 770 } 771 772 discard: 773 // iterate... 774 x += 1; 775 if (enables & GGL_ENABLE_SMOOTH) { 776 r += c->shade.drdx; 777 g += c->shade.dgdx; 778 b += c->shade.dbdx; 779 a += c->shade.dadx; 780 } 781 z += c->shade.dzdx; 782 f += c->shade.dfdx; 783 } 784 } 785 786 #endif // ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED) 787 788 // ---------------------------------------------------------------------------- 789 #if 0 790 #pragma mark - 791 #pragma mark Scanline 792 #endif 793 794 template <typename T, typename U> 795 static inline __attribute__((const)) 796 T interpolate(int y, T v0, U dvdx, U dvdy) { 797 // interpolates in pixel's centers 798 // v = v0 + (y + 0.5) * dvdy + (0.5 * dvdx) 799 return (y * dvdy) + (v0 + ((dvdy + dvdx) >> 1)); 800 } 801 802 // ---------------------------------------------------------------------------- 803 #if 0 804 #pragma mark - 805 #endif 806 807 void init_y(context_t* c, int32_t ys) 808 { 809 const uint32_t enables = c->state.enables; 810 811 // compute iterators... 812 iterators_t& ci = c->iterators; 813 814 // sample in the center 815 ci.y = ys; 816 817 if (enables & (GGL_ENABLE_DEPTH_TEST|GGL_ENABLE_W|GGL_ENABLE_FOG)) { 818 ci.ydzdy = interpolate(ys, c->shade.z0, c->shade.dzdx, c->shade.dzdy); 819 ci.ydwdy = interpolate(ys, c->shade.w0, c->shade.dwdx, c->shade.dwdy); 820 ci.ydfdy = interpolate(ys, c->shade.f0, c->shade.dfdx, c->shade.dfdy); 821 } 822 823 if (ggl_unlikely(enables & GGL_ENABLE_SMOOTH)) { 824 ci.ydrdy = interpolate(ys, c->shade.r0, c->shade.drdx, c->shade.drdy); 825 ci.ydgdy = interpolate(ys, c->shade.g0, c->shade.dgdx, c->shade.dgdy); 826 ci.ydbdy = interpolate(ys, c->shade.b0, c->shade.dbdx, c->shade.dbdy); 827 ci.ydady = interpolate(ys, c->shade.a0, c->shade.dadx, c->shade.dady); 828 c->step_y = step_y__smooth; 829 } else { 830 ci.ydrdy = c->shade.r0; 831 ci.ydgdy = c->shade.g0; 832 ci.ydbdy = c->shade.b0; 833 ci.ydady = c->shade.a0; 834 // XXX: do only if needed, or make sure this is fast 835 c->packed = ggl_pack_color(c, c->state.buffers.color.format, 836 ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady); 837 c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888, 838 ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady); 839 } 840 841 // initialize the variables we need in the shader 842 generated_vars_t& gen = c->generated_vars; 843 gen.argb[GGLFormat::ALPHA].c = ci.ydady; 844 gen.argb[GGLFormat::ALPHA].dx = c->shade.dadx; 845 gen.argb[GGLFormat::RED ].c = ci.ydrdy; 846 gen.argb[GGLFormat::RED ].dx = c->shade.drdx; 847 gen.argb[GGLFormat::GREEN].c = ci.ydgdy; 848 gen.argb[GGLFormat::GREEN].dx = c->shade.dgdx; 849 gen.argb[GGLFormat::BLUE ].c = ci.ydbdy; 850 gen.argb[GGLFormat::BLUE ].dx = c->shade.dbdx; 851 gen.dzdx = c->shade.dzdx; 852 gen.f = ci.ydfdy; 853 gen.dfdx = c->shade.dfdx; 854 855 if (enables & GGL_ENABLE_TMUS) { 856 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 857 texture_t& t = c->state.texture[i]; 858 if (!t.enable) continue; 859 860 texture_iterators_t& ti = t.iterators; 861 if (t.s_coord == GGL_ONE_TO_ONE && t.t_coord == GGL_ONE_TO_ONE) { 862 // we need to set all of these to 0 because in some cases 863 // step_y__generic() or step_y__tmu() will be used and 864 // therefore will update dtdy, however, in 1:1 mode 865 // this is always done by the scanline rasterizer. 866 ti.dsdx = ti.dsdy = ti.dtdx = ti.dtdy = 0; 867 ti.ydsdy = t.shade.is0; 868 ti.ydtdy = t.shade.it0; 869 } else { 870 const int adjustSWrap = ((t.s_wrap==GGL_CLAMP)?0:16); 871 const int adjustTWrap = ((t.t_wrap==GGL_CLAMP)?0:16); 872 ti.sscale = t.shade.sscale + adjustSWrap; 873 ti.tscale = t.shade.tscale + adjustTWrap; 874 if (!(enables & GGL_ENABLE_W)) { 875 // S coordinate 876 const int32_t sscale = ti.sscale; 877 const int32_t sy = interpolate(ys, 878 t.shade.is0, t.shade.idsdx, t.shade.idsdy); 879 if (sscale>=0) { 880 ti.ydsdy= sy << sscale; 881 ti.dsdx = t.shade.idsdx << sscale; 882 ti.dsdy = t.shade.idsdy << sscale; 883 } else { 884 ti.ydsdy= sy >> -sscale; 885 ti.dsdx = t.shade.idsdx >> -sscale; 886 ti.dsdy = t.shade.idsdy >> -sscale; 887 } 888 // T coordinate 889 const int32_t tscale = ti.tscale; 890 const int32_t ty = interpolate(ys, 891 t.shade.it0, t.shade.idtdx, t.shade.idtdy); 892 if (tscale>=0) { 893 ti.ydtdy= ty << tscale; 894 ti.dtdx = t.shade.idtdx << tscale; 895 ti.dtdy = t.shade.idtdy << tscale; 896 } else { 897 ti.ydtdy= ty >> -tscale; 898 ti.dtdx = t.shade.idtdx >> -tscale; 899 ti.dtdy = t.shade.idtdy >> -tscale; 900 } 901 } 902 } 903 // mirror for generated code... 904 generated_tex_vars_t& gen = c->generated_vars.texture[i]; 905 gen.width = t.surface.width; 906 gen.height = t.surface.height; 907 gen.stride = t.surface.stride; 908 gen.data = int32_t(t.surface.data); 909 gen.dsdx = ti.dsdx; 910 gen.dtdx = ti.dtdx; 911 } 912 } 913 914 // choose the y-stepper 915 c->step_y = step_y__nop; 916 if (enables & GGL_ENABLE_FOG) { 917 c->step_y = step_y__generic; 918 } else if (enables & GGL_ENABLE_TMUS) { 919 if (enables & GGL_ENABLE_SMOOTH) { 920 c->step_y = step_y__generic; 921 } else if (enables & GGL_ENABLE_W) { 922 c->step_y = step_y__w; 923 } else { 924 c->step_y = step_y__tmu; 925 } 926 } else { 927 if (enables & GGL_ENABLE_SMOOTH) { 928 c->step_y = step_y__smooth; 929 } 930 } 931 932 // choose the rectangle blitter 933 c->rect = rect_generic; 934 if ((c->step_y == step_y__nop) && 935 (c->scanline == scanline_memcpy)) 936 { 937 c->rect = rect_memcpy; 938 } 939 } 940 941 void init_y_packed(context_t* c, int32_t y0) 942 { 943 uint8_t f = c->state.buffers.color.format; 944 c->packed = ggl_pack_color(c, f, 945 c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0); 946 c->iterators.y = y0; 947 c->step_y = step_y__nop; 948 // choose the rectangle blitter 949 c->rect = rect_generic; 950 if (c->scanline == scanline_memcpy) { 951 c->rect = rect_memcpy; 952 } 953 } 954 955 void init_y_noop(context_t* c, int32_t y0) 956 { 957 c->iterators.y = y0; 958 c->step_y = step_y__nop; 959 // choose the rectangle blitter 960 c->rect = rect_generic; 961 if (c->scanline == scanline_memcpy) { 962 c->rect = rect_memcpy; 963 } 964 } 965 966 void init_y_error(context_t* c, int32_t y0) 967 { 968 // woooops, shoud never happen, 969 // fail gracefully (don't display anything) 970 init_y_noop(c, y0); 971 LOGE("color-buffer has an invalid format!"); 972 } 973 974 // ---------------------------------------------------------------------------- 975 #if 0 976 #pragma mark - 977 #endif 978 979 void step_y__generic(context_t* c) 980 { 981 const uint32_t enables = c->state.enables; 982 983 // iterate... 984 iterators_t& ci = c->iterators; 985 ci.y += 1; 986 987 if (enables & GGL_ENABLE_SMOOTH) { 988 ci.ydrdy += c->shade.drdy; 989 ci.ydgdy += c->shade.dgdy; 990 ci.ydbdy += c->shade.dbdy; 991 ci.ydady += c->shade.dady; 992 } 993 994 const uint32_t mask = 995 GGL_ENABLE_DEPTH_TEST | 996 GGL_ENABLE_W | 997 GGL_ENABLE_FOG; 998 if (enables & mask) { 999 ci.ydzdy += c->shade.dzdy; 1000 ci.ydwdy += c->shade.dwdy; 1001 ci.ydfdy += c->shade.dfdy; 1002 } 1003 1004 if ((enables & GGL_ENABLE_TMUS) && (!(enables & GGL_ENABLE_W))) { 1005 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1006 if (c->state.texture[i].enable) { 1007 texture_iterators_t& ti = c->state.texture[i].iterators; 1008 ti.ydsdy += ti.dsdy; 1009 ti.ydtdy += ti.dtdy; 1010 } 1011 } 1012 } 1013 } 1014 1015 void step_y__nop(context_t* c) 1016 { 1017 c->iterators.y += 1; 1018 c->iterators.ydzdy += c->shade.dzdy; 1019 } 1020 1021 void step_y__smooth(context_t* c) 1022 { 1023 iterators_t& ci = c->iterators; 1024 ci.y += 1; 1025 ci.ydrdy += c->shade.drdy; 1026 ci.ydgdy += c->shade.dgdy; 1027 ci.ydbdy += c->shade.dbdy; 1028 ci.ydady += c->shade.dady; 1029 ci.ydzdy += c->shade.dzdy; 1030 } 1031 1032 void step_y__w(context_t* c) 1033 { 1034 iterators_t& ci = c->iterators; 1035 ci.y += 1; 1036 ci.ydzdy += c->shade.dzdy; 1037 ci.ydwdy += c->shade.dwdy; 1038 } 1039 1040 void step_y__tmu(context_t* c) 1041 { 1042 iterators_t& ci = c->iterators; 1043 ci.y += 1; 1044 ci.ydzdy += c->shade.dzdy; 1045 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1046 if (c->state.texture[i].enable) { 1047 texture_iterators_t& ti = c->state.texture[i].iterators; 1048 ti.ydsdy += ti.dsdy; 1049 ti.ydtdy += ti.dtdy; 1050 } 1051 } 1052 } 1053 1054 // ---------------------------------------------------------------------------- 1055 #if 0 1056 #pragma mark - 1057 #endif 1058 1059 void scanline_perspective(context_t* c) 1060 { 1061 struct { 1062 union { 1063 struct { 1064 int32_t s, sq; 1065 int32_t t, tq; 1066 }; 1067 struct { 1068 int32_t v, q; 1069 } st[2]; 1070 }; 1071 } tc[GGL_TEXTURE_UNIT_COUNT] __attribute__((aligned(16))); 1072 1073 // XXX: we should have a special case when dwdx = 0 1074 1075 // 32 pixels spans works okay. 16 is a lot better, 1076 // but hey, it's a software renderer... 1077 const uint32_t SPAN_BITS = 5; 1078 const uint32_t ys = c->iterators.y; 1079 const uint32_t xs = c->iterators.xl; 1080 const uint32_t x1 = c->iterators.xr; 1081 const uint32_t xc = x1 - xs; 1082 uint32_t remainder = xc & ((1<<SPAN_BITS)-1); 1083 uint32_t numSpans = xc >> SPAN_BITS; 1084 1085 const iterators_t& ci = c->iterators; 1086 int32_t w0 = (xs * c->shade.dwdx) + ci.ydwdy; 1087 int32_t q0 = gglRecipQ(w0, 30); 1088 const int iwscale = 32 - gglClz(q0); 1089 1090 const int32_t dwdx = c->shade.dwdx << SPAN_BITS; 1091 int32_t xl = c->iterators.xl; 1092 1093 // We process s & t with a loop to reduce the code size 1094 // (and i-cache pressure). 1095 1096 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1097 const texture_t& tmu = c->state.texture[i]; 1098 if (!tmu.enable) continue; 1099 int32_t s = tmu.shade.is0 + 1100 (tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) + 1101 ((tmu.shade.idsdx + tmu.shade.idsdy)>>1); 1102 int32_t t = tmu.shade.it0 + 1103 (tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) + 1104 ((tmu.shade.idtdx + tmu.shade.idtdy)>>1); 1105 tc[i].s = s; 1106 tc[i].t = t; 1107 tc[i].sq = gglMulx(s, q0, iwscale); 1108 tc[i].tq = gglMulx(t, q0, iwscale); 1109 } 1110 1111 int32_t span = 0; 1112 do { 1113 int32_t w1; 1114 if (ggl_likely(numSpans)) { 1115 w1 = w0 + dwdx; 1116 } else { 1117 if (remainder) { 1118 // finish off the scanline... 1119 span = remainder; 1120 w1 = (c->shade.dwdx * span) + w0; 1121 } else { 1122 break; 1123 } 1124 } 1125 int32_t q1 = gglRecipQ(w1, 30); 1126 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1127 texture_t& tmu = c->state.texture[i]; 1128 if (!tmu.enable) continue; 1129 texture_iterators_t& ti = tmu.iterators; 1130 1131 for (int j=0 ; j<2 ; j++) { 1132 int32_t v = tc[i].st[j].v; 1133 if (span) v += (tmu.shade.st[j].dx)*span; 1134 else v += (tmu.shade.st[j].dx)<<SPAN_BITS; 1135 const int32_t v0 = tc[i].st[j].q; 1136 const int32_t v1 = gglMulx(v, q1, iwscale); 1137 int32_t dvdx = v1 - v0; 1138 if (span) dvdx /= span; 1139 else dvdx >>= SPAN_BITS; 1140 tc[i].st[j].v = v; 1141 tc[i].st[j].q = v1; 1142 1143 const int scale = ti.st[j].scale + (iwscale - 30); 1144 if (scale >= 0) { 1145 ti.st[j].ydvdy = v0 << scale; 1146 ti.st[j].dvdx = dvdx << scale; 1147 } else { 1148 ti.st[j].ydvdy = v0 >> -scale; 1149 ti.st[j].dvdx = dvdx >> -scale; 1150 } 1151 } 1152 generated_tex_vars_t& gen = c->generated_vars.texture[i]; 1153 gen.dsdx = ti.st[0].dvdx; 1154 gen.dtdx = ti.st[1].dvdx; 1155 } 1156 c->iterators.xl = xl; 1157 c->iterators.xr = xl = xl + (span ? span : (1<<SPAN_BITS)); 1158 w0 = w1; 1159 q0 = q1; 1160 c->span(c); 1161 } while(numSpans--); 1162 } 1163 1164 void scanline_perspective_single(context_t* c) 1165 { 1166 // 32 pixels spans works okay. 16 is a lot better, 1167 // but hey, it's a software renderer... 1168 const uint32_t SPAN_BITS = 5; 1169 const uint32_t ys = c->iterators.y; 1170 const uint32_t xs = c->iterators.xl; 1171 const uint32_t x1 = c->iterators.xr; 1172 const uint32_t xc = x1 - xs; 1173 1174 const iterators_t& ci = c->iterators; 1175 int32_t w = (xs * c->shade.dwdx) + ci.ydwdy; 1176 int32_t iw = gglRecipQ(w, 30); 1177 const int iwscale = 32 - gglClz(iw); 1178 1179 const int i = 31 - gglClz(c->state.enabled_tmu); 1180 generated_tex_vars_t& gen = c->generated_vars.texture[i]; 1181 texture_t& tmu = c->state.texture[i]; 1182 texture_iterators_t& ti = tmu.iterators; 1183 const int sscale = ti.sscale + (iwscale - 30); 1184 const int tscale = ti.tscale + (iwscale - 30); 1185 int32_t s = tmu.shade.is0 + 1186 (tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) + 1187 ((tmu.shade.idsdx + tmu.shade.idsdy)>>1); 1188 int32_t t = tmu.shade.it0 + 1189 (tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) + 1190 ((tmu.shade.idtdx + tmu.shade.idtdy)>>1); 1191 int32_t s0 = gglMulx(s, iw, iwscale); 1192 int32_t t0 = gglMulx(t, iw, iwscale); 1193 int32_t xl = c->iterators.xl; 1194 1195 int32_t sq, tq, dsdx, dtdx; 1196 int32_t premainder = xc & ((1<<SPAN_BITS)-1); 1197 uint32_t numSpans = xc >> SPAN_BITS; 1198 if (c->shade.dwdx == 0) { 1199 // XXX: we could choose to do this if the error is small enough 1200 numSpans = 0; 1201 premainder = xc; 1202 goto no_perspective; 1203 } 1204 1205 if (premainder) { 1206 w += c->shade.dwdx * premainder; 1207 iw = gglRecipQ(w, 30); 1208 no_perspective: 1209 s += tmu.shade.idsdx * premainder; 1210 t += tmu.shade.idtdx * premainder; 1211 sq = gglMulx(s, iw, iwscale); 1212 tq = gglMulx(t, iw, iwscale); 1213 dsdx = (sq - s0) / premainder; 1214 dtdx = (tq - t0) / premainder; 1215 c->iterators.xl = xl; 1216 c->iterators.xr = xl = xl + premainder; 1217 goto finish; 1218 } 1219 1220 while (numSpans--) { 1221 w += c->shade.dwdx << SPAN_BITS; 1222 s += tmu.shade.idsdx << SPAN_BITS; 1223 t += tmu.shade.idtdx << SPAN_BITS; 1224 iw = gglRecipQ(w, 30); 1225 sq = gglMulx(s, iw, iwscale); 1226 tq = gglMulx(t, iw, iwscale); 1227 dsdx = (sq - s0) >> SPAN_BITS; 1228 dtdx = (tq - t0) >> SPAN_BITS; 1229 c->iterators.xl = xl; 1230 c->iterators.xr = xl = xl + (1<<SPAN_BITS); 1231 finish: 1232 if (sscale >= 0) { 1233 ti.ydsdy = s0 << sscale; 1234 ti.dsdx = dsdx << sscale; 1235 } else { 1236 ti.ydsdy = s0 >>-sscale; 1237 ti.dsdx = dsdx >>-sscale; 1238 } 1239 if (tscale >= 0) { 1240 ti.ydtdy = t0 << tscale; 1241 ti.dtdx = dtdx << tscale; 1242 } else { 1243 ti.ydtdy = t0 >>-tscale; 1244 ti.dtdx = dtdx >>-tscale; 1245 } 1246 s0 = sq; 1247 t0 = tq; 1248 gen.dsdx = ti.dsdx; 1249 gen.dtdx = ti.dtdx; 1250 c->span(c); 1251 } 1252 } 1253 1254 // ---------------------------------------------------------------------------- 1255 1256 void scanline_t32cb16(context_t* c) 1257 { 1258 int32_t x = c->iterators.xl; 1259 size_t ct = c->iterators.xr - x; 1260 int32_t y = c->iterators.y; 1261 surface_t* cb = &(c->state.buffers.color); 1262 union { 1263 uint16_t* dst; 1264 uint32_t* dst32; 1265 }; 1266 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 1267 1268 surface_t* tex = &(c->state.texture[0].surface); 1269 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 1270 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 1271 uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v)); 1272 int sR, sG, sB; 1273 uint32_t s, d; 1274 1275 if (ct==1 || uint32_t(dst)&2) { 1276 last_one: 1277 s = GGL_RGBA_TO_HOST( *src++ ); 1278 sR = (s >> ( 3))&0x1F; 1279 sG = (s >> ( 8+2))&0x3F; 1280 sB = (s >> (16+3))&0x1F; 1281 *dst++ = uint16_t((sR<<11)|(sG<<5)|sB); 1282 ct--; 1283 } 1284 1285 while (ct >= 2) { 1286 s = GGL_RGBA_TO_HOST( *src++ ); 1287 sR = (s >> ( 3))&0x1F; 1288 sG = (s >> ( 8+2))&0x3F; 1289 sB = (s >> (16+3))&0x1F; 1290 d = (sR<<11)|(sG<<5)|sB; 1291 1292 s = GGL_RGBA_TO_HOST( *src++ ); 1293 sR = (s >> ( 3))&0x1F; 1294 sG = (s >> ( 8+2))&0x3F; 1295 sB = (s >> (16+3))&0x1F; 1296 d |= ((sR<<11)|(sG<<5)|sB)<<16; 1297 1298 #if BYTE_ORDER == BIG_ENDIAN 1299 d = (d>>16) | (d<<16); 1300 #endif 1301 1302 *dst32++ = d; 1303 ct -= 2; 1304 } 1305 1306 if (ct > 0) { 1307 goto last_one; 1308 } 1309 } 1310 1311 void scanline_t32cb16blend(context_t* c) 1312 { 1313 int32_t x = c->iterators.xl; 1314 size_t ct = c->iterators.xr - x; 1315 int32_t y = c->iterators.y; 1316 surface_t* cb = &(c->state.buffers.color); 1317 uint16_t* dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 1318 1319 surface_t* tex = &(c->state.texture[0].surface); 1320 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 1321 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 1322 uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v)); 1323 1324 #if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__)) 1325 scanline_t32cb16blend_arm(dst, src, ct); 1326 #else 1327 while (ct--) { 1328 uint32_t s = *src++; 1329 if (!s) { 1330 dst++; 1331 continue; 1332 } 1333 uint16_t d = *dst; 1334 s = GGL_RGBA_TO_HOST(s); 1335 int sR = (s >> ( 3))&0x1F; 1336 int sG = (s >> ( 8+2))&0x3F; 1337 int sB = (s >> (16+3))&0x1F; 1338 int sA = (s>>24); 1339 int f = 0x100 - (sA + (sA>>7)); 1340 int dR = (d>>11)&0x1f; 1341 int dG = (d>>5)&0x3f; 1342 int dB = (d)&0x1f; 1343 sR += (f*dR)>>8; 1344 sG += (f*dG)>>8; 1345 sB += (f*dB)>>8; 1346 *dst++ = uint16_t((sR<<11)|(sG<<5)|sB); 1347 } 1348 #endif 1349 } 1350 1351 void scanline_memcpy(context_t* c) 1352 { 1353 int32_t x = c->iterators.xl; 1354 size_t ct = c->iterators.xr - x; 1355 int32_t y = c->iterators.y; 1356 surface_t* cb = &(c->state.buffers.color); 1357 const GGLFormat* fp = &(c->formats[cb->format]); 1358 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 1359 (x + (cb->stride * y)) * fp->size; 1360 1361 surface_t* tex = &(c->state.texture[0].surface); 1362 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 1363 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 1364 uint8_t *src = reinterpret_cast<uint8_t*>(tex->data) + 1365 (u + (tex->stride * v)) * fp->size; 1366 1367 const size_t size = ct * fp->size; 1368 memcpy(dst, src, size); 1369 } 1370 1371 void scanline_memset8(context_t* c) 1372 { 1373 int32_t x = c->iterators.xl; 1374 size_t ct = c->iterators.xr - x; 1375 int32_t y = c->iterators.y; 1376 surface_t* cb = &(c->state.buffers.color); 1377 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + (x+(cb->stride*y)); 1378 uint32_t packed = c->packed; 1379 memset(dst, packed, ct); 1380 } 1381 1382 void scanline_memset16(context_t* c) 1383 { 1384 int32_t x = c->iterators.xl; 1385 size_t ct = c->iterators.xr - x; 1386 int32_t y = c->iterators.y; 1387 surface_t* cb = &(c->state.buffers.color); 1388 uint16_t* dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 1389 uint32_t packed = c->packed; 1390 android_memset16(dst, packed, ct*2); 1391 } 1392 1393 void scanline_memset32(context_t* c) 1394 { 1395 int32_t x = c->iterators.xl; 1396 size_t ct = c->iterators.xr - x; 1397 int32_t y = c->iterators.y; 1398 surface_t* cb = &(c->state.buffers.color); 1399 uint32_t* dst = reinterpret_cast<uint32_t*>(cb->data) + (x+(cb->stride*y)); 1400 uint32_t packed = GGL_HOST_TO_RGBA(c->packed); 1401 android_memset32(dst, packed, ct*4); 1402 } 1403 1404 void scanline_clear(context_t* c) 1405 { 1406 int32_t x = c->iterators.xl; 1407 size_t ct = c->iterators.xr - x; 1408 int32_t y = c->iterators.y; 1409 surface_t* cb = &(c->state.buffers.color); 1410 const GGLFormat* fp = &(c->formats[cb->format]); 1411 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 1412 (x + (cb->stride * y)) * fp->size; 1413 const size_t size = ct * fp->size; 1414 memset(dst, 0, size); 1415 } 1416 1417 void scanline_set(context_t* c) 1418 { 1419 int32_t x = c->iterators.xl; 1420 size_t ct = c->iterators.xr - x; 1421 int32_t y = c->iterators.y; 1422 surface_t* cb = &(c->state.buffers.color); 1423 const GGLFormat* fp = &(c->formats[cb->format]); 1424 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 1425 (x + (cb->stride * y)) * fp->size; 1426 const size_t size = ct * fp->size; 1427 memset(dst, 0xFF, size); 1428 } 1429 1430 void scanline_noop(context_t* c) 1431 { 1432 } 1433 1434 void rect_generic(context_t* c, size_t yc) 1435 { 1436 do { 1437 c->scanline(c); 1438 c->step_y(c); 1439 } while (--yc); 1440 } 1441 1442 void rect_memcpy(context_t* c, size_t yc) 1443 { 1444 int32_t x = c->iterators.xl; 1445 size_t ct = c->iterators.xr - x; 1446 int32_t y = c->iterators.y; 1447 surface_t* cb = &(c->state.buffers.color); 1448 const GGLFormat* fp = &(c->formats[cb->format]); 1449 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 1450 (x + (cb->stride * y)) * fp->size; 1451 1452 surface_t* tex = &(c->state.texture[0].surface); 1453 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 1454 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 1455 uint8_t *src = reinterpret_cast<uint8_t*>(tex->data) + 1456 (u + (tex->stride * v)) * fp->size; 1457 1458 if (cb->stride == tex->stride && ct == size_t(cb->stride)) { 1459 memcpy(dst, src, ct * fp->size * yc); 1460 } else { 1461 const size_t size = ct * fp->size; 1462 const size_t dbpr = cb->stride * fp->size; 1463 const size_t sbpr = tex->stride * fp->size; 1464 do { 1465 memcpy(dst, src, size); 1466 dst += dbpr; 1467 src += sbpr; 1468 } while (--yc); 1469 } 1470 } 1471 // ---------------------------------------------------------------------------- 1472 }; // namespace android 1473 1474 using namespace android; 1475 extern "C" void ggl_test_codegen(uint32_t n, uint32_t p, uint32_t t0, uint32_t t1) 1476 { 1477 #if ANDROID_ARM_CODEGEN 1478 GGLContext* c; 1479 gglInit(&c); 1480 needs_t needs; 1481 needs.n = n; 1482 needs.p = p; 1483 needs.t[0] = t0; 1484 needs.t[1] = t1; 1485 sp<ScanlineAssembly> a(new ScanlineAssembly(needs, ASSEMBLY_SCRATCH_SIZE)); 1486 GGLAssembler assembler( new ARMAssembler(a) ); 1487 int err = assembler.scanline(needs, (context_t*)c); 1488 if (err != 0) { 1489 printf("error %08x (%s)\n", err, strerror(-err)); 1490 } 1491 gglUninit(c); 1492 #else 1493 printf("This test runs only on ARM\n"); 1494 #endif 1495 } 1496 1497