1 /* libs/pixelflinger/scanline.cpp 2 ** 3 ** Copyright 2006, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 19 #define LOG_TAG "pixelflinger" 20 21 #include <assert.h> 22 #include <stdlib.h> 23 #include <stdio.h> 24 #include <string.h> 25 26 #include <cutils/memory.h> 27 #include <cutils/log.h> 28 29 #include "buffer.h" 30 #include "scanline.h" 31 32 #include "codeflinger/CodeCache.h" 33 #include "codeflinger/GGLAssembler.h" 34 #include "codeflinger/ARMAssembler.h" 35 //#include "codeflinger/ARMAssemblerOptimizer.h" 36 37 // ---------------------------------------------------------------------------- 38 39 #define ANDROID_CODEGEN_GENERIC 0 // force generic pixel pipeline 40 #define ANDROID_CODEGEN_C 1 // hand-written C, fallback generic 41 #define ANDROID_CODEGEN_ASM 2 // hand-written asm, fallback generic 42 #define ANDROID_CODEGEN_GENERATED 3 // hand-written asm, fallback codegen 43 44 #ifdef NDEBUG 45 # define ANDROID_RELEASE 46 # define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED 47 #else 48 # define ANDROID_DEBUG 49 # define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED 50 #endif 51 52 #if defined(__arm__) 53 # define ANDROID_ARM_CODEGEN 1 54 #else 55 # define ANDROID_ARM_CODEGEN 0 56 #endif 57 58 #define DEBUG__CODEGEN_ONLY 0 59 60 61 #define ASSEMBLY_SCRATCH_SIZE 2048 62 63 // ---------------------------------------------------------------------------- 64 namespace android { 65 // ---------------------------------------------------------------------------- 66 67 static void init_y(context_t*, int32_t); 68 static void init_y_noop(context_t*, int32_t); 69 static void init_y_packed(context_t*, int32_t); 70 static void init_y_error(context_t*, int32_t); 71 72 static void step_y__generic(context_t* c); 73 static void step_y__nop(context_t*); 74 static void step_y__smooth(context_t* c); 75 static void step_y__tmu(context_t* c); 76 static void step_y__w(context_t* c); 77 78 static void scanline(context_t* c); 79 static void scanline_perspective(context_t* c); 80 static void scanline_perspective_single(context_t* c); 81 static void scanline_t32cb16blend(context_t* c); 82 static void scanline_t32cb16(context_t* c); 83 static void scanline_col32cb16blend(context_t* c); 84 static void scanline_memcpy(context_t* c); 85 static void scanline_memset8(context_t* c); 86 static void scanline_memset16(context_t* c); 87 static void scanline_memset32(context_t* c); 88 static void scanline_noop(context_t* c); 89 static void scanline_set(context_t* c); 90 static void scanline_clear(context_t* c); 91 92 static void rect_generic(context_t* c, size_t yc); 93 static void rect_memcpy(context_t* c, size_t yc); 94 95 extern "C" void scanline_t32cb16blend_arm(uint16_t*, uint32_t*, size_t); 96 extern "C" void scanline_t32cb16_arm(uint16_t *dst, uint32_t *src, size_t ct); 97 extern "C" void scanline_col32cb16blend_neon(uint16_t *dst, uint32_t *col, size_t ct); 98 extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct); 99 100 // ---------------------------------------------------------------------------- 101 102 struct shortcut_t { 103 needs_filter_t filter; 104 const char* desc; 105 void (*scanline)(context_t*); 106 void (*init_y)(context_t*, int32_t); 107 }; 108 109 // Keep in sync with needs 110 static shortcut_t shortcuts[] = { 111 { { { 0x03515104, 0x00000077, { 0x00000A01, 0x00000000 } }, 112 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 113 "565 fb, 8888 tx, blend", scanline_t32cb16blend, init_y_noop }, 114 { { { 0x03010104, 0x00000077, { 0x00000A01, 0x00000000 } }, 115 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 116 "565 fb, 8888 tx", scanline_t32cb16, init_y_noop }, 117 { { { 0x03515104, 0x00000077, { 0x00000000, 0x00000000 } }, 118 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0xFFFFFFFF } } }, 119 "565 fb, 8888 fixed color", scanline_col32cb16blend, init_y_packed }, 120 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } }, 121 { 0x00000000, 0x00000007, { 0x00000000, 0x00000000 } } }, 122 "(nop) alpha test", scanline_noop, init_y_noop }, 123 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } }, 124 { 0x00000000, 0x00000070, { 0x00000000, 0x00000000 } } }, 125 "(nop) depth test", scanline_noop, init_y_noop }, 126 { { { 0x05000000, 0x00000000, { 0x00000000, 0x00000000 } }, 127 { 0x0F000000, 0x00000080, { 0x00000000, 0x00000000 } } }, 128 "(nop) logic_op", scanline_noop, init_y_noop }, 129 { { { 0xF0000000, 0x00000000, { 0x00000000, 0x00000000 } }, 130 { 0xF0000000, 0x00000080, { 0x00000000, 0x00000000 } } }, 131 "(nop) color mask", scanline_noop, init_y_noop }, 132 { { { 0x0F000000, 0x00000077, { 0x00000000, 0x00000000 } }, 133 { 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } }, 134 "(set) logic_op", scanline_set, init_y_noop }, 135 { { { 0x00000000, 0x00000077, { 0x00000000, 0x00000000 } }, 136 { 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } }, 137 "(clear) logic_op", scanline_clear, init_y_noop }, 138 { { { 0x03000000, 0x00000077, { 0x00000000, 0x00000000 } }, 139 { 0xFFFFFF00, 0x000000F7, { 0x00000000, 0x00000000 } } }, 140 "(clear) blending 0/0", scanline_clear, init_y_noop }, 141 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } }, 142 { 0x0000003F, 0x00000000, { 0x00000000, 0x00000000 } } }, 143 "(error) invalid color-buffer format", scanline_noop, init_y_error }, 144 }; 145 static const needs_filter_t noblend1to1 = { 146 // (disregard dithering, see below) 147 { 0x03010100, 0x00000077, { 0x00000A00, 0x00000000 } }, 148 { 0xFFFFFFC0, 0xFFFFFEFF, { 0xFFFFFFC0, 0x0000003F } } 149 }; 150 static const needs_filter_t fill16noblend = { 151 { 0x03010100, 0x00000077, { 0x00000000, 0x00000000 } }, 152 { 0xFFFFFFC0, 0xFFFFFFFF, { 0x0000003F, 0x0000003F } } 153 }; 154 155 // ---------------------------------------------------------------------------- 156 157 #if ANDROID_ARM_CODEGEN 158 static CodeCache gCodeCache(12 * 1024); 159 160 class ScanlineAssembly : public Assembly { 161 AssemblyKey<needs_t> mKey; 162 public: 163 ScanlineAssembly(needs_t needs, size_t size) 164 : Assembly(size), mKey(needs) { } 165 const AssemblyKey<needs_t>& key() const { return mKey; } 166 }; 167 #endif 168 169 // ---------------------------------------------------------------------------- 170 171 void ggl_init_scanline(context_t* c) 172 { 173 c->init_y = init_y; 174 c->step_y = step_y__generic; 175 c->scanline = scanline; 176 } 177 178 void ggl_uninit_scanline(context_t* c) 179 { 180 if (c->state.buffers.coverage) 181 free(c->state.buffers.coverage); 182 #if ANDROID_ARM_CODEGEN 183 if (c->scanline_as) 184 c->scanline_as->decStrong(c); 185 #endif 186 } 187 188 // ---------------------------------------------------------------------------- 189 190 static void pick_scanline(context_t* c) 191 { 192 #if (!defined(DEBUG__CODEGEN_ONLY) || (DEBUG__CODEGEN_ONLY == 0)) 193 194 #if ANDROID_CODEGEN == ANDROID_CODEGEN_GENERIC 195 c->init_y = init_y; 196 c->step_y = step_y__generic; 197 c->scanline = scanline; 198 return; 199 #endif 200 201 //printf("*** needs [%08lx:%08lx:%08lx:%08lx]\n", 202 // c->state.needs.n, c->state.needs.p, 203 // c->state.needs.t[0], c->state.needs.t[1]); 204 205 // first handle the special case that we cannot test with a filter 206 const uint32_t cb_format = GGL_READ_NEEDS(CB_FORMAT, c->state.needs.n); 207 if (GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0]) == cb_format) { 208 if (c->state.needs.match(noblend1to1)) { 209 // this will match regardless of dithering state, since both 210 // src and dest have the same format anyway, there is no dithering 211 // to be done. 212 const GGLFormat* f = 213 &(c->formats[GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0])]); 214 if ((f->components == GGL_RGB) || 215 (f->components == GGL_RGBA) || 216 (f->components == GGL_LUMINANCE) || 217 (f->components == GGL_LUMINANCE_ALPHA)) 218 { 219 // format must have all of RGB components 220 // (so the current color doesn't show through) 221 c->scanline = scanline_memcpy; 222 c->init_y = init_y_noop; 223 return; 224 } 225 } 226 } 227 228 if (c->state.needs.match(fill16noblend)) { 229 c->init_y = init_y_packed; 230 switch (c->formats[cb_format].size) { 231 case 1: c->scanline = scanline_memset8; return; 232 case 2: c->scanline = scanline_memset16; return; 233 case 4: c->scanline = scanline_memset32; return; 234 } 235 } 236 237 const int numFilters = sizeof(shortcuts)/sizeof(shortcut_t); 238 for (int i=0 ; i<numFilters ; i++) { 239 if (c->state.needs.match(shortcuts[i].filter)) { 240 c->scanline = shortcuts[i].scanline; 241 c->init_y = shortcuts[i].init_y; 242 return; 243 } 244 } 245 246 #endif // DEBUG__CODEGEN_ONLY 247 248 c->init_y = init_y; 249 c->step_y = step_y__generic; 250 251 #if ANDROID_ARM_CODEGEN 252 // we're going to have to generate some code... 253 // here, generate code for our pixel pipeline 254 const AssemblyKey<needs_t> key(c->state.needs); 255 sp<Assembly> assembly = gCodeCache.lookup(key); 256 if (assembly == 0) { 257 // create a new assembly region 258 sp<ScanlineAssembly> a = new ScanlineAssembly(c->state.needs, 259 ASSEMBLY_SCRATCH_SIZE); 260 // initialize our assembler 261 GGLAssembler assembler( new ARMAssembler(a) ); 262 //GGLAssembler assembler( 263 // new ARMAssemblerOptimizer(new ARMAssembler(a)) ); 264 // generate the scanline code for the given needs 265 int err = assembler.scanline(c->state.needs, c); 266 if (ggl_likely(!err)) { 267 // finally, cache this assembly 268 err = gCodeCache.cache(a->key(), a); 269 } 270 if (ggl_unlikely(err)) { 271 LOGE("error generating or caching assembly. Reverting to NOP."); 272 c->scanline = scanline_noop; 273 c->init_y = init_y_noop; 274 c->step_y = step_y__nop; 275 return; 276 } 277 assembly = a; 278 } 279 280 // release the previous assembly 281 if (c->scanline_as) { 282 c->scanline_as->decStrong(c); 283 } 284 285 //LOGI("using generated pixel-pipeline"); 286 c->scanline_as = assembly.get(); 287 c->scanline_as->incStrong(c); // hold on to assembly 288 c->scanline = (void(*)(context_t* c))assembly->base(); 289 #else 290 // LOGW("using generic (slow) pixel-pipeline"); 291 c->scanline = scanline; 292 #endif 293 } 294 295 void ggl_pick_scanline(context_t* c) 296 { 297 pick_scanline(c); 298 if ((c->state.enables & GGL_ENABLE_W) && 299 (c->state.enables & GGL_ENABLE_TMUS)) 300 { 301 c->span = c->scanline; 302 c->scanline = scanline_perspective; 303 if (!(c->state.enabled_tmu & (c->state.enabled_tmu - 1))) { 304 // only one TMU enabled 305 c->scanline = scanline_perspective_single; 306 } 307 } 308 } 309 310 // ---------------------------------------------------------------------------- 311 312 static void blending(context_t* c, pixel_t* fragment, pixel_t* fb); 313 static void blend_factor(context_t* c, pixel_t* r, uint32_t factor, 314 const pixel_t* src, const pixel_t* dst); 315 static void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv); 316 317 #if ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED) 318 319 // no need to compile the generic-pipeline, it can't be reached 320 void scanline(context_t*) 321 { 322 } 323 324 #else 325 326 void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv) 327 { 328 if (su && sv) { 329 if (su > sv) { 330 v = ggl_expand(v, sv, su); 331 sv = su; 332 } else if (su < sv) { 333 u = ggl_expand(u, su, sv); 334 su = sv; 335 } 336 } 337 } 338 339 void blending(context_t* c, pixel_t* fragment, pixel_t* fb) 340 { 341 rescale(fragment->c[0], fragment->s[0], fb->c[0], fb->s[0]); 342 rescale(fragment->c[1], fragment->s[1], fb->c[1], fb->s[1]); 343 rescale(fragment->c[2], fragment->s[2], fb->c[2], fb->s[2]); 344 rescale(fragment->c[3], fragment->s[3], fb->c[3], fb->s[3]); 345 346 pixel_t sf, df; 347 blend_factor(c, &sf, c->state.blend.src, fragment, fb); 348 blend_factor(c, &df, c->state.blend.dst, fragment, fb); 349 350 fragment->c[1] = 351 gglMulAddx(fragment->c[1], sf.c[1], gglMulx(fb->c[1], df.c[1])); 352 fragment->c[2] = 353 gglMulAddx(fragment->c[2], sf.c[2], gglMulx(fb->c[2], df.c[2])); 354 fragment->c[3] = 355 gglMulAddx(fragment->c[3], sf.c[3], gglMulx(fb->c[3], df.c[3])); 356 357 if (c->state.blend.alpha_separate) { 358 blend_factor(c, &sf, c->state.blend.src_alpha, fragment, fb); 359 blend_factor(c, &df, c->state.blend.dst_alpha, fragment, fb); 360 } 361 362 fragment->c[0] = 363 gglMulAddx(fragment->c[0], sf.c[0], gglMulx(fb->c[0], df.c[0])); 364 365 // clamp to 1.0 366 if (fragment->c[0] >= (1LU<<fragment->s[0])) 367 fragment->c[0] = (1<<fragment->s[0])-1; 368 if (fragment->c[1] >= (1LU<<fragment->s[1])) 369 fragment->c[1] = (1<<fragment->s[1])-1; 370 if (fragment->c[2] >= (1LU<<fragment->s[2])) 371 fragment->c[2] = (1<<fragment->s[2])-1; 372 if (fragment->c[3] >= (1LU<<fragment->s[3])) 373 fragment->c[3] = (1<<fragment->s[3])-1; 374 } 375 376 static inline int blendfactor(uint32_t x, uint32_t size, uint32_t def = 0) 377 { 378 if (!size) 379 return def; 380 381 // scale to 16 bits 382 if (size > 16) { 383 x >>= (size - 16); 384 } else if (size < 16) { 385 x = ggl_expand(x, size, 16); 386 } 387 x += x >> 15; 388 return x; 389 } 390 391 void blend_factor(context_t* c, pixel_t* r, 392 uint32_t factor, const pixel_t* src, const pixel_t* dst) 393 { 394 switch (factor) { 395 case GGL_ZERO: 396 r->c[1] = 397 r->c[2] = 398 r->c[3] = 399 r->c[0] = 0; 400 break; 401 case GGL_ONE: 402 r->c[1] = 403 r->c[2] = 404 r->c[3] = 405 r->c[0] = FIXED_ONE; 406 break; 407 case GGL_DST_COLOR: 408 r->c[1] = blendfactor(dst->c[1], dst->s[1]); 409 r->c[2] = blendfactor(dst->c[2], dst->s[2]); 410 r->c[3] = blendfactor(dst->c[3], dst->s[3]); 411 r->c[0] = blendfactor(dst->c[0], dst->s[0]); 412 break; 413 case GGL_SRC_COLOR: 414 r->c[1] = blendfactor(src->c[1], src->s[1]); 415 r->c[2] = blendfactor(src->c[2], src->s[2]); 416 r->c[3] = blendfactor(src->c[3], src->s[3]); 417 r->c[0] = blendfactor(src->c[0], src->s[0]); 418 break; 419 case GGL_ONE_MINUS_DST_COLOR: 420 r->c[1] = FIXED_ONE - blendfactor(dst->c[1], dst->s[1]); 421 r->c[2] = FIXED_ONE - blendfactor(dst->c[2], dst->s[2]); 422 r->c[3] = FIXED_ONE - blendfactor(dst->c[3], dst->s[3]); 423 r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0]); 424 break; 425 case GGL_ONE_MINUS_SRC_COLOR: 426 r->c[1] = FIXED_ONE - blendfactor(src->c[1], src->s[1]); 427 r->c[2] = FIXED_ONE - blendfactor(src->c[2], src->s[2]); 428 r->c[3] = FIXED_ONE - blendfactor(src->c[3], src->s[3]); 429 r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0]); 430 break; 431 case GGL_SRC_ALPHA: 432 r->c[1] = 433 r->c[2] = 434 r->c[3] = 435 r->c[0] = blendfactor(src->c[0], src->s[0], FIXED_ONE); 436 break; 437 case GGL_ONE_MINUS_SRC_ALPHA: 438 r->c[1] = 439 r->c[2] = 440 r->c[3] = 441 r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0], FIXED_ONE); 442 break; 443 case GGL_DST_ALPHA: 444 r->c[1] = 445 r->c[2] = 446 r->c[3] = 447 r->c[0] = blendfactor(dst->c[0], dst->s[0], FIXED_ONE); 448 break; 449 case GGL_ONE_MINUS_DST_ALPHA: 450 r->c[1] = 451 r->c[2] = 452 r->c[3] = 453 r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0], FIXED_ONE); 454 break; 455 case GGL_SRC_ALPHA_SATURATE: 456 // XXX: GGL_SRC_ALPHA_SATURATE 457 break; 458 } 459 } 460 461 static GGLfixed wrapping(int32_t coord, uint32_t size, int tx_wrap) 462 { 463 GGLfixed d; 464 if (tx_wrap == GGL_REPEAT) { 465 d = (uint32_t(coord)>>16) * size; 466 } else if (tx_wrap == GGL_CLAMP) { // CLAMP_TO_EDGE semantics 467 const GGLfixed clamp_min = FIXED_HALF; 468 const GGLfixed clamp_max = (size << 16) - FIXED_HALF; 469 if (coord < clamp_min) coord = clamp_min; 470 if (coord > clamp_max) coord = clamp_max; 471 d = coord; 472 } else { // 1:1 473 const GGLfixed clamp_min = 0; 474 const GGLfixed clamp_max = (size << 16); 475 if (coord < clamp_min) coord = clamp_min; 476 if (coord > clamp_max) coord = clamp_max; 477 d = coord; 478 } 479 return d; 480 } 481 482 static inline 483 GGLcolor ADJUST_COLOR_ITERATOR(GGLcolor v, GGLcolor dvdx, int len) 484 { 485 const int32_t end = dvdx * (len-1) + v; 486 if (end < 0) 487 v -= end; 488 v &= ~(v>>31); 489 return v; 490 } 491 492 void scanline(context_t* c) 493 { 494 const uint32_t enables = c->state.enables; 495 const int xs = c->iterators.xl; 496 const int x1 = c->iterators.xr; 497 int xc = x1 - xs; 498 const int16_t* covPtr = c->state.buffers.coverage + xs; 499 500 // All iterated values are sampled at the pixel center 501 502 // reset iterators for that scanline... 503 GGLcolor r, g, b, a; 504 iterators_t& ci = c->iterators; 505 if (enables & GGL_ENABLE_SMOOTH) { 506 r = (xs * c->shade.drdx) + ci.ydrdy; 507 g = (xs * c->shade.dgdx) + ci.ydgdy; 508 b = (xs * c->shade.dbdx) + ci.ydbdy; 509 a = (xs * c->shade.dadx) + ci.ydady; 510 r = ADJUST_COLOR_ITERATOR(r, c->shade.drdx, xc); 511 g = ADJUST_COLOR_ITERATOR(g, c->shade.dgdx, xc); 512 b = ADJUST_COLOR_ITERATOR(b, c->shade.dbdx, xc); 513 a = ADJUST_COLOR_ITERATOR(a, c->shade.dadx, xc); 514 } else { 515 r = ci.ydrdy; 516 g = ci.ydgdy; 517 b = ci.ydbdy; 518 a = ci.ydady; 519 } 520 521 // z iterators are 1.31 522 GGLfixed z = (xs * c->shade.dzdx) + ci.ydzdy; 523 GGLfixed f = (xs * c->shade.dfdx) + ci.ydfdy; 524 525 struct { 526 GGLfixed s, t; 527 } tc[GGL_TEXTURE_UNIT_COUNT]; 528 if (enables & GGL_ENABLE_TMUS) { 529 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 530 if (c->state.texture[i].enable) { 531 texture_iterators_t& ti = c->state.texture[i].iterators; 532 if (enables & GGL_ENABLE_W) { 533 tc[i].s = ti.ydsdy; 534 tc[i].t = ti.ydtdy; 535 } else { 536 tc[i].s = (xs * ti.dsdx) + ti.ydsdy; 537 tc[i].t = (xs * ti.dtdx) + ti.ydtdy; 538 } 539 } 540 } 541 } 542 543 pixel_t fragment; 544 pixel_t texel; 545 pixel_t fb; 546 547 uint32_t x = xs; 548 uint32_t y = c->iterators.y; 549 550 while (xc--) { 551 552 { // just a scope 553 554 // read color (convert to 8 bits by keeping only the integer part) 555 fragment.s[1] = fragment.s[2] = 556 fragment.s[3] = fragment.s[0] = 8; 557 fragment.c[1] = r >> (GGL_COLOR_BITS-8); 558 fragment.c[2] = g >> (GGL_COLOR_BITS-8); 559 fragment.c[3] = b >> (GGL_COLOR_BITS-8); 560 fragment.c[0] = a >> (GGL_COLOR_BITS-8); 561 562 // texturing 563 if (enables & GGL_ENABLE_TMUS) { 564 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 565 texture_t& tx = c->state.texture[i]; 566 if (!tx.enable) 567 continue; 568 texture_iterators_t& ti = tx.iterators; 569 int32_t u, v; 570 571 // s-coordinate 572 if (tx.s_coord != GGL_ONE_TO_ONE) { 573 const int w = tx.surface.width; 574 u = wrapping(tc[i].s, w, tx.s_wrap); 575 tc[i].s += ti.dsdx; 576 } else { 577 u = (((tx.shade.is0>>16) + x)<<16) + FIXED_HALF; 578 } 579 580 // t-coordinate 581 if (tx.t_coord != GGL_ONE_TO_ONE) { 582 const int h = tx.surface.height; 583 v = wrapping(tc[i].t, h, tx.t_wrap); 584 tc[i].t += ti.dtdx; 585 } else { 586 v = (((tx.shade.it0>>16) + y)<<16) + FIXED_HALF; 587 } 588 589 // read texture 590 if (tx.mag_filter == GGL_NEAREST && 591 tx.min_filter == GGL_NEAREST) 592 { 593 u >>= 16; 594 v >>= 16; 595 tx.surface.read(&tx.surface, c, u, v, &texel); 596 } else { 597 const int w = tx.surface.width; 598 const int h = tx.surface.height; 599 u -= FIXED_HALF; 600 v -= FIXED_HALF; 601 int u0 = u >> 16; 602 int v0 = v >> 16; 603 int u1 = u0 + 1; 604 int v1 = v0 + 1; 605 if (tx.s_wrap == GGL_REPEAT) { 606 if (u0<0) u0 += w; 607 if (u1<0) u1 += w; 608 if (u0>=w) u0 -= w; 609 if (u1>=w) u1 -= w; 610 } else { 611 if (u0<0) u0 = 0; 612 if (u1<0) u1 = 0; 613 if (u0>=w) u0 = w-1; 614 if (u1>=w) u1 = w-1; 615 } 616 if (tx.t_wrap == GGL_REPEAT) { 617 if (v0<0) v0 += h; 618 if (v1<0) v1 += h; 619 if (v0>=h) v0 -= h; 620 if (v1>=h) v1 -= h; 621 } else { 622 if (v0<0) v0 = 0; 623 if (v1<0) v1 = 0; 624 if (v0>=h) v0 = h-1; 625 if (v1>=h) v1 = h-1; 626 } 627 pixel_t texels[4]; 628 uint32_t mm[4]; 629 tx.surface.read(&tx.surface, c, u0, v0, &texels[0]); 630 tx.surface.read(&tx.surface, c, u0, v1, &texels[1]); 631 tx.surface.read(&tx.surface, c, u1, v0, &texels[2]); 632 tx.surface.read(&tx.surface, c, u1, v1, &texels[3]); 633 u = (u >> 12) & 0xF; 634 v = (v >> 12) & 0xF; 635 u += u>>3; 636 v += v>>3; 637 mm[0] = (0x10 - u) * (0x10 - v); 638 mm[1] = (0x10 - u) * v; 639 mm[2] = u * (0x10 - v); 640 mm[3] = 0x100 - (mm[0] + mm[1] + mm[2]); 641 for (int j=0 ; j<4 ; j++) { 642 texel.s[j] = texels[0].s[j]; 643 if (!texel.s[j]) continue; 644 texel.s[j] += 8; 645 texel.c[j] = texels[0].c[j]*mm[0] + 646 texels[1].c[j]*mm[1] + 647 texels[2].c[j]*mm[2] + 648 texels[3].c[j]*mm[3] ; 649 } 650 } 651 652 // Texture environnement... 653 for (int j=0 ; j<4 ; j++) { 654 uint32_t& Cf = fragment.c[j]; 655 uint32_t& Ct = texel.c[j]; 656 uint8_t& sf = fragment.s[j]; 657 uint8_t& st = texel.s[j]; 658 uint32_t At = texel.c[0]; 659 uint8_t sat = texel.s[0]; 660 switch (tx.env) { 661 case GGL_REPLACE: 662 if (st) { 663 Cf = Ct; 664 sf = st; 665 } 666 break; 667 case GGL_MODULATE: 668 if (st) { 669 uint32_t factor = Ct + (Ct>>(st-1)); 670 Cf = (Cf * factor) >> st; 671 } 672 break; 673 case GGL_DECAL: 674 if (sat) { 675 rescale(Cf, sf, Ct, st); 676 Cf += ((Ct - Cf) * (At + (At>>(sat-1)))) >> sat; 677 } 678 break; 679 case GGL_BLEND: 680 if (st) { 681 uint32_t Cc = tx.env_color[i]; 682 if (sf>8) Cc = (Cc * ((1<<sf)-1))>>8; 683 else if (sf<8) Cc = (Cc - (Cc>>(8-sf)))>>(8-sf); 684 uint32_t factor = Ct + (Ct>>(st-1)); 685 Cf = ((((1<<st) - factor) * Cf) + Ct*Cc)>>st; 686 } 687 break; 688 case GGL_ADD: 689 if (st) { 690 rescale(Cf, sf, Ct, st); 691 Cf += Ct; 692 } 693 break; 694 } 695 } 696 } 697 } 698 699 // coverage application 700 if (enables & GGL_ENABLE_AA) { 701 int16_t cf = *covPtr++; 702 fragment.c[0] = (int64_t(fragment.c[0]) * cf) >> 15; 703 } 704 705 // alpha-test 706 if (enables & GGL_ENABLE_ALPHA_TEST) { 707 GGLcolor ref = c->state.alpha_test.ref; 708 GGLcolor alpha = (uint64_t(fragment.c[0]) * 709 ((1<<GGL_COLOR_BITS)-1)) / ((1<<fragment.s[0])-1); 710 switch (c->state.alpha_test.func) { 711 case GGL_NEVER: goto discard; 712 case GGL_LESS: if (alpha<ref) break; goto discard; 713 case GGL_EQUAL: if (alpha==ref) break; goto discard; 714 case GGL_LEQUAL: if (alpha<=ref) break; goto discard; 715 case GGL_GREATER: if (alpha>ref) break; goto discard; 716 case GGL_NOTEQUAL: if (alpha!=ref) break; goto discard; 717 case GGL_GEQUAL: if (alpha>=ref) break; goto discard; 718 } 719 } 720 721 // depth test 722 if (c->state.buffers.depth.format) { 723 if (enables & GGL_ENABLE_DEPTH_TEST) { 724 surface_t* cb = &(c->state.buffers.depth); 725 uint16_t* p = (uint16_t*)(cb->data)+(x+(cb->stride*y)); 726 uint16_t zz = uint32_t(z)>>(16); 727 uint16_t depth = *p; 728 switch (c->state.depth_test.func) { 729 case GGL_NEVER: goto discard; 730 case GGL_LESS: if (zz<depth) break; goto discard; 731 case GGL_EQUAL: if (zz==depth) break; goto discard; 732 case GGL_LEQUAL: if (zz<=depth) break; goto discard; 733 case GGL_GREATER: if (zz>depth) break; goto discard; 734 case GGL_NOTEQUAL: if (zz!=depth) break; goto discard; 735 case GGL_GEQUAL: if (zz>=depth) break; goto discard; 736 } 737 // depth buffer is not enabled, if depth-test is not enabled 738 /* 739 fragment.s[1] = fragment.s[2] = 740 fragment.s[3] = fragment.s[0] = 8; 741 fragment.c[1] = 742 fragment.c[2] = 743 fragment.c[3] = 744 fragment.c[0] = 255 - (zz>>8); 745 */ 746 if (c->state.mask.depth) { 747 *p = zz; 748 } 749 } 750 } 751 752 // fog 753 if (enables & GGL_ENABLE_FOG) { 754 for (int i=1 ; i<=3 ; i++) { 755 GGLfixed fc = (c->state.fog.color[i] * 0x10000) / 0xFF; 756 uint32_t& c = fragment.c[i]; 757 uint8_t& s = fragment.s[i]; 758 c = (c * 0x10000) / ((1<<s)-1); 759 c = gglMulAddx(c, f, gglMulx(fc, 0x10000 - f)); 760 s = 16; 761 } 762 } 763 764 // blending 765 if (enables & GGL_ENABLE_BLENDING) { 766 fb.c[1] = fb.c[2] = fb.c[3] = fb.c[0] = 0; // placate valgrind 767 fb.s[1] = fb.s[2] = fb.s[3] = fb.s[0] = 0; 768 c->state.buffers.color.read( 769 &(c->state.buffers.color), c, x, y, &fb); 770 blending( c, &fragment, &fb ); 771 } 772 773 // write 774 c->state.buffers.color.write( 775 &(c->state.buffers.color), c, x, y, &fragment); 776 } 777 778 discard: 779 // iterate... 780 x += 1; 781 if (enables & GGL_ENABLE_SMOOTH) { 782 r += c->shade.drdx; 783 g += c->shade.dgdx; 784 b += c->shade.dbdx; 785 a += c->shade.dadx; 786 } 787 z += c->shade.dzdx; 788 f += c->shade.dfdx; 789 } 790 } 791 792 #endif // ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED) 793 794 // ---------------------------------------------------------------------------- 795 #if 0 796 #pragma mark - 797 #pragma mark Scanline 798 #endif 799 800 template <typename T, typename U> 801 static inline __attribute__((const)) 802 T interpolate(int y, T v0, U dvdx, U dvdy) { 803 // interpolates in pixel's centers 804 // v = v0 + (y + 0.5) * dvdy + (0.5 * dvdx) 805 return (y * dvdy) + (v0 + ((dvdy + dvdx) >> 1)); 806 } 807 808 // ---------------------------------------------------------------------------- 809 #if 0 810 #pragma mark - 811 #endif 812 813 void init_y(context_t* c, int32_t ys) 814 { 815 const uint32_t enables = c->state.enables; 816 817 // compute iterators... 818 iterators_t& ci = c->iterators; 819 820 // sample in the center 821 ci.y = ys; 822 823 if (enables & (GGL_ENABLE_DEPTH_TEST|GGL_ENABLE_W|GGL_ENABLE_FOG)) { 824 ci.ydzdy = interpolate(ys, c->shade.z0, c->shade.dzdx, c->shade.dzdy); 825 ci.ydwdy = interpolate(ys, c->shade.w0, c->shade.dwdx, c->shade.dwdy); 826 ci.ydfdy = interpolate(ys, c->shade.f0, c->shade.dfdx, c->shade.dfdy); 827 } 828 829 if (ggl_unlikely(enables & GGL_ENABLE_SMOOTH)) { 830 ci.ydrdy = interpolate(ys, c->shade.r0, c->shade.drdx, c->shade.drdy); 831 ci.ydgdy = interpolate(ys, c->shade.g0, c->shade.dgdx, c->shade.dgdy); 832 ci.ydbdy = interpolate(ys, c->shade.b0, c->shade.dbdx, c->shade.dbdy); 833 ci.ydady = interpolate(ys, c->shade.a0, c->shade.dadx, c->shade.dady); 834 c->step_y = step_y__smooth; 835 } else { 836 ci.ydrdy = c->shade.r0; 837 ci.ydgdy = c->shade.g0; 838 ci.ydbdy = c->shade.b0; 839 ci.ydady = c->shade.a0; 840 // XXX: do only if needed, or make sure this is fast 841 c->packed = ggl_pack_color(c, c->state.buffers.color.format, 842 ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady); 843 c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888, 844 ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady); 845 } 846 847 // initialize the variables we need in the shader 848 generated_vars_t& gen = c->generated_vars; 849 gen.argb[GGLFormat::ALPHA].c = ci.ydady; 850 gen.argb[GGLFormat::ALPHA].dx = c->shade.dadx; 851 gen.argb[GGLFormat::RED ].c = ci.ydrdy; 852 gen.argb[GGLFormat::RED ].dx = c->shade.drdx; 853 gen.argb[GGLFormat::GREEN].c = ci.ydgdy; 854 gen.argb[GGLFormat::GREEN].dx = c->shade.dgdx; 855 gen.argb[GGLFormat::BLUE ].c = ci.ydbdy; 856 gen.argb[GGLFormat::BLUE ].dx = c->shade.dbdx; 857 gen.dzdx = c->shade.dzdx; 858 gen.f = ci.ydfdy; 859 gen.dfdx = c->shade.dfdx; 860 861 if (enables & GGL_ENABLE_TMUS) { 862 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 863 texture_t& t = c->state.texture[i]; 864 if (!t.enable) continue; 865 866 texture_iterators_t& ti = t.iterators; 867 if (t.s_coord == GGL_ONE_TO_ONE && t.t_coord == GGL_ONE_TO_ONE) { 868 // we need to set all of these to 0 because in some cases 869 // step_y__generic() or step_y__tmu() will be used and 870 // therefore will update dtdy, however, in 1:1 mode 871 // this is always done by the scanline rasterizer. 872 ti.dsdx = ti.dsdy = ti.dtdx = ti.dtdy = 0; 873 ti.ydsdy = t.shade.is0; 874 ti.ydtdy = t.shade.it0; 875 } else { 876 const int adjustSWrap = ((t.s_wrap==GGL_CLAMP)?0:16); 877 const int adjustTWrap = ((t.t_wrap==GGL_CLAMP)?0:16); 878 ti.sscale = t.shade.sscale + adjustSWrap; 879 ti.tscale = t.shade.tscale + adjustTWrap; 880 if (!(enables & GGL_ENABLE_W)) { 881 // S coordinate 882 const int32_t sscale = ti.sscale; 883 const int32_t sy = interpolate(ys, 884 t.shade.is0, t.shade.idsdx, t.shade.idsdy); 885 if (sscale>=0) { 886 ti.ydsdy= sy << sscale; 887 ti.dsdx = t.shade.idsdx << sscale; 888 ti.dsdy = t.shade.idsdy << sscale; 889 } else { 890 ti.ydsdy= sy >> -sscale; 891 ti.dsdx = t.shade.idsdx >> -sscale; 892 ti.dsdy = t.shade.idsdy >> -sscale; 893 } 894 // T coordinate 895 const int32_t tscale = ti.tscale; 896 const int32_t ty = interpolate(ys, 897 t.shade.it0, t.shade.idtdx, t.shade.idtdy); 898 if (tscale>=0) { 899 ti.ydtdy= ty << tscale; 900 ti.dtdx = t.shade.idtdx << tscale; 901 ti.dtdy = t.shade.idtdy << tscale; 902 } else { 903 ti.ydtdy= ty >> -tscale; 904 ti.dtdx = t.shade.idtdx >> -tscale; 905 ti.dtdy = t.shade.idtdy >> -tscale; 906 } 907 } 908 } 909 // mirror for generated code... 910 generated_tex_vars_t& gen = c->generated_vars.texture[i]; 911 gen.width = t.surface.width; 912 gen.height = t.surface.height; 913 gen.stride = t.surface.stride; 914 gen.data = int32_t(t.surface.data); 915 gen.dsdx = ti.dsdx; 916 gen.dtdx = ti.dtdx; 917 } 918 } 919 920 // choose the y-stepper 921 c->step_y = step_y__nop; 922 if (enables & GGL_ENABLE_FOG) { 923 c->step_y = step_y__generic; 924 } else if (enables & GGL_ENABLE_TMUS) { 925 if (enables & GGL_ENABLE_SMOOTH) { 926 c->step_y = step_y__generic; 927 } else if (enables & GGL_ENABLE_W) { 928 c->step_y = step_y__w; 929 } else { 930 c->step_y = step_y__tmu; 931 } 932 } else { 933 if (enables & GGL_ENABLE_SMOOTH) { 934 c->step_y = step_y__smooth; 935 } 936 } 937 938 // choose the rectangle blitter 939 c->rect = rect_generic; 940 if ((c->step_y == step_y__nop) && 941 (c->scanline == scanline_memcpy)) 942 { 943 c->rect = rect_memcpy; 944 } 945 } 946 947 void init_y_packed(context_t* c, int32_t y0) 948 { 949 uint8_t f = c->state.buffers.color.format; 950 c->packed = ggl_pack_color(c, f, 951 c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0); 952 c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888, 953 c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0); 954 c->iterators.y = y0; 955 c->step_y = step_y__nop; 956 // choose the rectangle blitter 957 c->rect = rect_generic; 958 if (c->scanline == scanline_memcpy) { 959 c->rect = rect_memcpy; 960 } 961 } 962 963 void init_y_noop(context_t* c, int32_t y0) 964 { 965 c->iterators.y = y0; 966 c->step_y = step_y__nop; 967 // choose the rectangle blitter 968 c->rect = rect_generic; 969 if (c->scanline == scanline_memcpy) { 970 c->rect = rect_memcpy; 971 } 972 } 973 974 void init_y_error(context_t* c, int32_t y0) 975 { 976 // woooops, shoud never happen, 977 // fail gracefully (don't display anything) 978 init_y_noop(c, y0); 979 LOGE("color-buffer has an invalid format!"); 980 } 981 982 // ---------------------------------------------------------------------------- 983 #if 0 984 #pragma mark - 985 #endif 986 987 void step_y__generic(context_t* c) 988 { 989 const uint32_t enables = c->state.enables; 990 991 // iterate... 992 iterators_t& ci = c->iterators; 993 ci.y += 1; 994 995 if (enables & GGL_ENABLE_SMOOTH) { 996 ci.ydrdy += c->shade.drdy; 997 ci.ydgdy += c->shade.dgdy; 998 ci.ydbdy += c->shade.dbdy; 999 ci.ydady += c->shade.dady; 1000 } 1001 1002 const uint32_t mask = 1003 GGL_ENABLE_DEPTH_TEST | 1004 GGL_ENABLE_W | 1005 GGL_ENABLE_FOG; 1006 if (enables & mask) { 1007 ci.ydzdy += c->shade.dzdy; 1008 ci.ydwdy += c->shade.dwdy; 1009 ci.ydfdy += c->shade.dfdy; 1010 } 1011 1012 if ((enables & GGL_ENABLE_TMUS) && (!(enables & GGL_ENABLE_W))) { 1013 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1014 if (c->state.texture[i].enable) { 1015 texture_iterators_t& ti = c->state.texture[i].iterators; 1016 ti.ydsdy += ti.dsdy; 1017 ti.ydtdy += ti.dtdy; 1018 } 1019 } 1020 } 1021 } 1022 1023 void step_y__nop(context_t* c) 1024 { 1025 c->iterators.y += 1; 1026 c->iterators.ydzdy += c->shade.dzdy; 1027 } 1028 1029 void step_y__smooth(context_t* c) 1030 { 1031 iterators_t& ci = c->iterators; 1032 ci.y += 1; 1033 ci.ydrdy += c->shade.drdy; 1034 ci.ydgdy += c->shade.dgdy; 1035 ci.ydbdy += c->shade.dbdy; 1036 ci.ydady += c->shade.dady; 1037 ci.ydzdy += c->shade.dzdy; 1038 } 1039 1040 void step_y__w(context_t* c) 1041 { 1042 iterators_t& ci = c->iterators; 1043 ci.y += 1; 1044 ci.ydzdy += c->shade.dzdy; 1045 ci.ydwdy += c->shade.dwdy; 1046 } 1047 1048 void step_y__tmu(context_t* c) 1049 { 1050 iterators_t& ci = c->iterators; 1051 ci.y += 1; 1052 ci.ydzdy += c->shade.dzdy; 1053 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1054 if (c->state.texture[i].enable) { 1055 texture_iterators_t& ti = c->state.texture[i].iterators; 1056 ti.ydsdy += ti.dsdy; 1057 ti.ydtdy += ti.dtdy; 1058 } 1059 } 1060 } 1061 1062 // ---------------------------------------------------------------------------- 1063 #if 0 1064 #pragma mark - 1065 #endif 1066 1067 void scanline_perspective(context_t* c) 1068 { 1069 struct { 1070 union { 1071 struct { 1072 int32_t s, sq; 1073 int32_t t, tq; 1074 }; 1075 struct { 1076 int32_t v, q; 1077 } st[2]; 1078 }; 1079 } tc[GGL_TEXTURE_UNIT_COUNT] __attribute__((aligned(16))); 1080 1081 // XXX: we should have a special case when dwdx = 0 1082 1083 // 32 pixels spans works okay. 16 is a lot better, 1084 // but hey, it's a software renderer... 1085 const uint32_t SPAN_BITS = 5; 1086 const uint32_t ys = c->iterators.y; 1087 const uint32_t xs = c->iterators.xl; 1088 const uint32_t x1 = c->iterators.xr; 1089 const uint32_t xc = x1 - xs; 1090 uint32_t remainder = xc & ((1<<SPAN_BITS)-1); 1091 uint32_t numSpans = xc >> SPAN_BITS; 1092 1093 const iterators_t& ci = c->iterators; 1094 int32_t w0 = (xs * c->shade.dwdx) + ci.ydwdy; 1095 int32_t q0 = gglRecipQ(w0, 30); 1096 const int iwscale = 32 - gglClz(q0); 1097 1098 const int32_t dwdx = c->shade.dwdx << SPAN_BITS; 1099 int32_t xl = c->iterators.xl; 1100 1101 // We process s & t with a loop to reduce the code size 1102 // (and i-cache pressure). 1103 1104 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1105 const texture_t& tmu = c->state.texture[i]; 1106 if (!tmu.enable) continue; 1107 int32_t s = tmu.shade.is0 + 1108 (tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) + 1109 ((tmu.shade.idsdx + tmu.shade.idsdy)>>1); 1110 int32_t t = tmu.shade.it0 + 1111 (tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) + 1112 ((tmu.shade.idtdx + tmu.shade.idtdy)>>1); 1113 tc[i].s = s; 1114 tc[i].t = t; 1115 tc[i].sq = gglMulx(s, q0, iwscale); 1116 tc[i].tq = gglMulx(t, q0, iwscale); 1117 } 1118 1119 int32_t span = 0; 1120 do { 1121 int32_t w1; 1122 if (ggl_likely(numSpans)) { 1123 w1 = w0 + dwdx; 1124 } else { 1125 if (remainder) { 1126 // finish off the scanline... 1127 span = remainder; 1128 w1 = (c->shade.dwdx * span) + w0; 1129 } else { 1130 break; 1131 } 1132 } 1133 int32_t q1 = gglRecipQ(w1, 30); 1134 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1135 texture_t& tmu = c->state.texture[i]; 1136 if (!tmu.enable) continue; 1137 texture_iterators_t& ti = tmu.iterators; 1138 1139 for (int j=0 ; j<2 ; j++) { 1140 int32_t v = tc[i].st[j].v; 1141 if (span) v += (tmu.shade.st[j].dx)*span; 1142 else v += (tmu.shade.st[j].dx)<<SPAN_BITS; 1143 const int32_t v0 = tc[i].st[j].q; 1144 const int32_t v1 = gglMulx(v, q1, iwscale); 1145 int32_t dvdx = v1 - v0; 1146 if (span) dvdx /= span; 1147 else dvdx >>= SPAN_BITS; 1148 tc[i].st[j].v = v; 1149 tc[i].st[j].q = v1; 1150 1151 const int scale = ti.st[j].scale + (iwscale - 30); 1152 if (scale >= 0) { 1153 ti.st[j].ydvdy = v0 << scale; 1154 ti.st[j].dvdx = dvdx << scale; 1155 } else { 1156 ti.st[j].ydvdy = v0 >> -scale; 1157 ti.st[j].dvdx = dvdx >> -scale; 1158 } 1159 } 1160 generated_tex_vars_t& gen = c->generated_vars.texture[i]; 1161 gen.dsdx = ti.st[0].dvdx; 1162 gen.dtdx = ti.st[1].dvdx; 1163 } 1164 c->iterators.xl = xl; 1165 c->iterators.xr = xl = xl + (span ? span : (1<<SPAN_BITS)); 1166 w0 = w1; 1167 q0 = q1; 1168 c->span(c); 1169 } while(numSpans--); 1170 } 1171 1172 void scanline_perspective_single(context_t* c) 1173 { 1174 // 32 pixels spans works okay. 16 is a lot better, 1175 // but hey, it's a software renderer... 1176 const uint32_t SPAN_BITS = 5; 1177 const uint32_t ys = c->iterators.y; 1178 const uint32_t xs = c->iterators.xl; 1179 const uint32_t x1 = c->iterators.xr; 1180 const uint32_t xc = x1 - xs; 1181 1182 const iterators_t& ci = c->iterators; 1183 int32_t w = (xs * c->shade.dwdx) + ci.ydwdy; 1184 int32_t iw = gglRecipQ(w, 30); 1185 const int iwscale = 32 - gglClz(iw); 1186 1187 const int i = 31 - gglClz(c->state.enabled_tmu); 1188 generated_tex_vars_t& gen = c->generated_vars.texture[i]; 1189 texture_t& tmu = c->state.texture[i]; 1190 texture_iterators_t& ti = tmu.iterators; 1191 const int sscale = ti.sscale + (iwscale - 30); 1192 const int tscale = ti.tscale + (iwscale - 30); 1193 int32_t s = tmu.shade.is0 + 1194 (tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) + 1195 ((tmu.shade.idsdx + tmu.shade.idsdy)>>1); 1196 int32_t t = tmu.shade.it0 + 1197 (tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) + 1198 ((tmu.shade.idtdx + tmu.shade.idtdy)>>1); 1199 int32_t s0 = gglMulx(s, iw, iwscale); 1200 int32_t t0 = gglMulx(t, iw, iwscale); 1201 int32_t xl = c->iterators.xl; 1202 1203 int32_t sq, tq, dsdx, dtdx; 1204 int32_t premainder = xc & ((1<<SPAN_BITS)-1); 1205 uint32_t numSpans = xc >> SPAN_BITS; 1206 if (c->shade.dwdx == 0) { 1207 // XXX: we could choose to do this if the error is small enough 1208 numSpans = 0; 1209 premainder = xc; 1210 goto no_perspective; 1211 } 1212 1213 if (premainder) { 1214 w += c->shade.dwdx * premainder; 1215 iw = gglRecipQ(w, 30); 1216 no_perspective: 1217 s += tmu.shade.idsdx * premainder; 1218 t += tmu.shade.idtdx * premainder; 1219 sq = gglMulx(s, iw, iwscale); 1220 tq = gglMulx(t, iw, iwscale); 1221 dsdx = (sq - s0) / premainder; 1222 dtdx = (tq - t0) / premainder; 1223 c->iterators.xl = xl; 1224 c->iterators.xr = xl = xl + premainder; 1225 goto finish; 1226 } 1227 1228 while (numSpans--) { 1229 w += c->shade.dwdx << SPAN_BITS; 1230 s += tmu.shade.idsdx << SPAN_BITS; 1231 t += tmu.shade.idtdx << SPAN_BITS; 1232 iw = gglRecipQ(w, 30); 1233 sq = gglMulx(s, iw, iwscale); 1234 tq = gglMulx(t, iw, iwscale); 1235 dsdx = (sq - s0) >> SPAN_BITS; 1236 dtdx = (tq - t0) >> SPAN_BITS; 1237 c->iterators.xl = xl; 1238 c->iterators.xr = xl = xl + (1<<SPAN_BITS); 1239 finish: 1240 if (sscale >= 0) { 1241 ti.ydsdy = s0 << sscale; 1242 ti.dsdx = dsdx << sscale; 1243 } else { 1244 ti.ydsdy = s0 >>-sscale; 1245 ti.dsdx = dsdx >>-sscale; 1246 } 1247 if (tscale >= 0) { 1248 ti.ydtdy = t0 << tscale; 1249 ti.dtdx = dtdx << tscale; 1250 } else { 1251 ti.ydtdy = t0 >>-tscale; 1252 ti.dtdx = dtdx >>-tscale; 1253 } 1254 s0 = sq; 1255 t0 = tq; 1256 gen.dsdx = ti.dsdx; 1257 gen.dtdx = ti.dtdx; 1258 c->span(c); 1259 } 1260 } 1261 1262 // ---------------------------------------------------------------------------- 1263 1264 void scanline_col32cb16blend(context_t* c) 1265 { 1266 int32_t x = c->iterators.xl; 1267 size_t ct = c->iterators.xr - x; 1268 int32_t y = c->iterators.y; 1269 surface_t* cb = &(c->state.buffers.color); 1270 union { 1271 uint16_t* dst; 1272 uint32_t* dst32; 1273 }; 1274 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 1275 1276 #if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__)) 1277 #if defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN 1278 scanline_col32cb16blend_neon(dst, &(c->packed8888), ct); 1279 #else // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN 1280 scanline_col32cb16blend_arm(dst, GGL_RGBA_TO_HOST(c->packed8888), ct); 1281 #endif // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN 1282 #else 1283 uint32_t s = GGL_RGBA_TO_HOST(c->packed8888); 1284 int sA = (s>>24); 1285 int f = 0x100 - (sA + (sA>>7)); 1286 while (ct--) { 1287 uint16_t d = *dst; 1288 int dR = (d>>11)&0x1f; 1289 int dG = (d>>5)&0x3f; 1290 int dB = (d)&0x1f; 1291 int sR = (s >> ( 3))&0x1F; 1292 int sG = (s >> ( 8+2))&0x3F; 1293 int sB = (s >> (16+3))&0x1F; 1294 sR += (f*dR)>>8; 1295 sG += (f*dG)>>8; 1296 sB += (f*dB)>>8; 1297 *dst++ = uint16_t((sR<<11)|(sG<<5)|sB); 1298 } 1299 #endif 1300 1301 } 1302 1303 void scanline_t32cb16(context_t* c) 1304 { 1305 int32_t x = c->iterators.xl; 1306 size_t ct = c->iterators.xr - x; 1307 int32_t y = c->iterators.y; 1308 surface_t* cb = &(c->state.buffers.color); 1309 union { 1310 uint16_t* dst; 1311 uint32_t* dst32; 1312 }; 1313 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 1314 1315 surface_t* tex = &(c->state.texture[0].surface); 1316 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 1317 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 1318 uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v)); 1319 int sR, sG, sB; 1320 uint32_t s, d; 1321 1322 if (ct==1 || uint32_t(dst)&2) { 1323 last_one: 1324 s = GGL_RGBA_TO_HOST( *src++ ); 1325 sR = (s >> ( 3))&0x1F; 1326 sG = (s >> ( 8+2))&0x3F; 1327 sB = (s >> (16+3))&0x1F; 1328 *dst++ = uint16_t((sR<<11)|(sG<<5)|sB); 1329 ct--; 1330 } 1331 1332 while (ct >= 2) { 1333 s = GGL_RGBA_TO_HOST( *src++ ); 1334 sR = (s >> ( 3))&0x1F; 1335 sG = (s >> ( 8+2))&0x3F; 1336 sB = (s >> (16+3))&0x1F; 1337 d = (sR<<11)|(sG<<5)|sB; 1338 1339 s = GGL_RGBA_TO_HOST( *src++ ); 1340 sR = (s >> ( 3))&0x1F; 1341 sG = (s >> ( 8+2))&0x3F; 1342 sB = (s >> (16+3))&0x1F; 1343 d |= ((sR<<11)|(sG<<5)|sB)<<16; 1344 1345 #if BYTE_ORDER == BIG_ENDIAN 1346 d = (d>>16) | (d<<16); 1347 #endif 1348 1349 *dst32++ = d; 1350 ct -= 2; 1351 } 1352 1353 if (ct > 0) { 1354 goto last_one; 1355 } 1356 } 1357 1358 void scanline_t32cb16blend(context_t* c) 1359 { 1360 int32_t x = c->iterators.xl; 1361 size_t ct = c->iterators.xr - x; 1362 int32_t y = c->iterators.y; 1363 surface_t* cb = &(c->state.buffers.color); 1364 uint16_t* dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 1365 1366 surface_t* tex = &(c->state.texture[0].surface); 1367 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 1368 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 1369 uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v)); 1370 1371 #if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__)) 1372 scanline_t32cb16blend_arm(dst, src, ct); 1373 #else 1374 while (ct--) { 1375 uint32_t s = *src++; 1376 if (!s) { 1377 dst++; 1378 continue; 1379 } 1380 uint16_t d = *dst; 1381 s = GGL_RGBA_TO_HOST(s); 1382 int sR = (s >> ( 3))&0x1F; 1383 int sG = (s >> ( 8+2))&0x3F; 1384 int sB = (s >> (16+3))&0x1F; 1385 int sA = (s>>24); 1386 int f = 0x100 - (sA + (sA>>7)); 1387 int dR = (d>>11)&0x1f; 1388 int dG = (d>>5)&0x3f; 1389 int dB = (d)&0x1f; 1390 sR += (f*dR)>>8; 1391 sG += (f*dG)>>8; 1392 sB += (f*dB)>>8; 1393 *dst++ = uint16_t((sR<<11)|(sG<<5)|sB); 1394 } 1395 #endif 1396 } 1397 1398 void scanline_memcpy(context_t* c) 1399 { 1400 int32_t x = c->iterators.xl; 1401 size_t ct = c->iterators.xr - x; 1402 int32_t y = c->iterators.y; 1403 surface_t* cb = &(c->state.buffers.color); 1404 const GGLFormat* fp = &(c->formats[cb->format]); 1405 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 1406 (x + (cb->stride * y)) * fp->size; 1407 1408 surface_t* tex = &(c->state.texture[0].surface); 1409 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 1410 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 1411 uint8_t *src = reinterpret_cast<uint8_t*>(tex->data) + 1412 (u + (tex->stride * v)) * fp->size; 1413 1414 const size_t size = ct * fp->size; 1415 memcpy(dst, src, size); 1416 } 1417 1418 void scanline_memset8(context_t* c) 1419 { 1420 int32_t x = c->iterators.xl; 1421 size_t ct = c->iterators.xr - x; 1422 int32_t y = c->iterators.y; 1423 surface_t* cb = &(c->state.buffers.color); 1424 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + (x+(cb->stride*y)); 1425 uint32_t packed = c->packed; 1426 memset(dst, packed, ct); 1427 } 1428 1429 void scanline_memset16(context_t* c) 1430 { 1431 int32_t x = c->iterators.xl; 1432 size_t ct = c->iterators.xr - x; 1433 int32_t y = c->iterators.y; 1434 surface_t* cb = &(c->state.buffers.color); 1435 uint16_t* dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 1436 uint32_t packed = c->packed; 1437 android_memset16(dst, packed, ct*2); 1438 } 1439 1440 void scanline_memset32(context_t* c) 1441 { 1442 int32_t x = c->iterators.xl; 1443 size_t ct = c->iterators.xr - x; 1444 int32_t y = c->iterators.y; 1445 surface_t* cb = &(c->state.buffers.color); 1446 uint32_t* dst = reinterpret_cast<uint32_t*>(cb->data) + (x+(cb->stride*y)); 1447 uint32_t packed = GGL_HOST_TO_RGBA(c->packed); 1448 android_memset32(dst, packed, ct*4); 1449 } 1450 1451 void scanline_clear(context_t* c) 1452 { 1453 int32_t x = c->iterators.xl; 1454 size_t ct = c->iterators.xr - x; 1455 int32_t y = c->iterators.y; 1456 surface_t* cb = &(c->state.buffers.color); 1457 const GGLFormat* fp = &(c->formats[cb->format]); 1458 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 1459 (x + (cb->stride * y)) * fp->size; 1460 const size_t size = ct * fp->size; 1461 memset(dst, 0, size); 1462 } 1463 1464 void scanline_set(context_t* c) 1465 { 1466 int32_t x = c->iterators.xl; 1467 size_t ct = c->iterators.xr - x; 1468 int32_t y = c->iterators.y; 1469 surface_t* cb = &(c->state.buffers.color); 1470 const GGLFormat* fp = &(c->formats[cb->format]); 1471 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 1472 (x + (cb->stride * y)) * fp->size; 1473 const size_t size = ct * fp->size; 1474 memset(dst, 0xFF, size); 1475 } 1476 1477 void scanline_noop(context_t* c) 1478 { 1479 } 1480 1481 void rect_generic(context_t* c, size_t yc) 1482 { 1483 do { 1484 c->scanline(c); 1485 c->step_y(c); 1486 } while (--yc); 1487 } 1488 1489 void rect_memcpy(context_t* c, size_t yc) 1490 { 1491 int32_t x = c->iterators.xl; 1492 size_t ct = c->iterators.xr - x; 1493 int32_t y = c->iterators.y; 1494 surface_t* cb = &(c->state.buffers.color); 1495 const GGLFormat* fp = &(c->formats[cb->format]); 1496 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 1497 (x + (cb->stride * y)) * fp->size; 1498 1499 surface_t* tex = &(c->state.texture[0].surface); 1500 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 1501 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 1502 uint8_t *src = reinterpret_cast<uint8_t*>(tex->data) + 1503 (u + (tex->stride * v)) * fp->size; 1504 1505 if (cb->stride == tex->stride && ct == size_t(cb->stride)) { 1506 memcpy(dst, src, ct * fp->size * yc); 1507 } else { 1508 const size_t size = ct * fp->size; 1509 const size_t dbpr = cb->stride * fp->size; 1510 const size_t sbpr = tex->stride * fp->size; 1511 do { 1512 memcpy(dst, src, size); 1513 dst += dbpr; 1514 src += sbpr; 1515 } while (--yc); 1516 } 1517 } 1518 // ---------------------------------------------------------------------------- 1519 }; // namespace android 1520 1521 using namespace android; 1522 extern "C" void ggl_test_codegen(uint32_t n, uint32_t p, uint32_t t0, uint32_t t1) 1523 { 1524 #if ANDROID_ARM_CODEGEN 1525 GGLContext* c; 1526 gglInit(&c); 1527 needs_t needs; 1528 needs.n = n; 1529 needs.p = p; 1530 needs.t[0] = t0; 1531 needs.t[1] = t1; 1532 sp<ScanlineAssembly> a(new ScanlineAssembly(needs, ASSEMBLY_SCRATCH_SIZE)); 1533 GGLAssembler assembler( new ARMAssembler(a) ); 1534 int err = assembler.scanline(needs, (context_t*)c); 1535 if (err != 0) { 1536 printf("error %08x (%s)\n", err, strerror(-err)); 1537 } 1538 gglUninit(c); 1539 #else 1540 printf("This test runs only on ARM\n"); 1541 #endif 1542 } 1543 1544