1 /* libs/pixelflinger/scanline.cpp 2 ** 3 ** Copyright 2006-2011, The Android Open Source Project 4 ** 5 ** Licensed under the Apache License, Version 2.0 (the "License"); 6 ** you may not use this file except in compliance with the License. 7 ** You may obtain a copy of the License at 8 ** 9 ** http://www.apache.org/licenses/LICENSE-2.0 10 ** 11 ** Unless required by applicable law or agreed to in writing, software 12 ** distributed under the License is distributed on an "AS IS" BASIS, 13 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 ** See the License for the specific language governing permissions and 15 ** limitations under the License. 16 */ 17 18 19 #define LOG_TAG "pixelflinger" 20 21 #include <assert.h> 22 #include <stdlib.h> 23 #include <stdio.h> 24 #include <string.h> 25 26 #include <cutils/memory.h> 27 #include <cutils/log.h> 28 29 #ifdef __arm__ 30 #include <machine/cpu-features.h> 31 #endif 32 33 #include "buffer.h" 34 #include "scanline.h" 35 36 #include "codeflinger/CodeCache.h" 37 #include "codeflinger/GGLAssembler.h" 38 #if defined(__arm__) 39 #include "codeflinger/ARMAssembler.h" 40 #elif defined(__aarch64__) 41 #include "codeflinger/Arm64Assembler.h" 42 #elif defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6 43 #include "codeflinger/MIPSAssembler.h" 44 #elif defined(__mips__) && defined(__LP64__) 45 #include "codeflinger/MIPS64Assembler.h" 46 #endif 47 //#include "codeflinger/ARMAssemblerOptimizer.h" 48 49 // ---------------------------------------------------------------------------- 50 51 #define ANDROID_CODEGEN_GENERIC 0 // force generic pixel pipeline 52 #define ANDROID_CODEGEN_C 1 // hand-written C, fallback generic 53 #define ANDROID_CODEGEN_ASM 2 // hand-written asm, fallback generic 54 #define ANDROID_CODEGEN_GENERATED 3 // hand-written asm, fallback codegen 55 56 #ifdef NDEBUG 57 # define ANDROID_RELEASE 58 # define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED 59 #else 60 # define ANDROID_DEBUG 61 # define ANDROID_CODEGEN ANDROID_CODEGEN_GENERATED 62 #endif 63 64 #if defined(__arm__) || (defined(__mips__) && ((!defined(__LP64__) && __mips_isa_rev < 6) || defined(__LP64__))) || defined(__aarch64__) 65 # define ANDROID_ARM_CODEGEN 1 66 #else 67 # define ANDROID_ARM_CODEGEN 0 68 #endif 69 70 #define DEBUG__CODEGEN_ONLY 0 71 72 /* Set to 1 to dump to the log the states that need a new 73 * code-generated scanline callback, i.e. those that don't 74 * have a corresponding shortcut function. 75 */ 76 #define DEBUG_NEEDS 0 77 78 #if defined( __mips__) && ((!defined(__LP64__) && __mips_isa_rev < 6) || defined(__LP64__)) 79 #define ASSEMBLY_SCRATCH_SIZE 4096 80 #elif defined(__aarch64__) 81 #define ASSEMBLY_SCRATCH_SIZE 8192 82 #else 83 #define ASSEMBLY_SCRATCH_SIZE 2048 84 #endif 85 86 // ---------------------------------------------------------------------------- 87 namespace android { 88 // ---------------------------------------------------------------------------- 89 90 static void init_y(context_t*, int32_t); 91 static void init_y_noop(context_t*, int32_t); 92 static void init_y_packed(context_t*, int32_t); 93 static void init_y_error(context_t*, int32_t); 94 95 static void step_y__generic(context_t* c); 96 static void step_y__nop(context_t*); 97 static void step_y__smooth(context_t* c); 98 static void step_y__tmu(context_t* c); 99 static void step_y__w(context_t* c); 100 101 static void scanline(context_t* c); 102 static void scanline_perspective(context_t* c); 103 static void scanline_perspective_single(context_t* c); 104 static void scanline_t32cb16blend(context_t* c); 105 static void scanline_t32cb16blend_dither(context_t* c); 106 static void scanline_t32cb16blend_srca(context_t* c); 107 static void scanline_t32cb16blend_clamp(context_t* c); 108 static void scanline_t32cb16blend_clamp_dither(context_t* c); 109 static void scanline_t32cb16blend_clamp_mod(context_t* c); 110 static void scanline_x32cb16blend_clamp_mod(context_t* c); 111 static void scanline_t32cb16blend_clamp_mod_dither(context_t* c); 112 static void scanline_x32cb16blend_clamp_mod_dither(context_t* c); 113 static void scanline_t32cb16(context_t* c); 114 static void scanline_t32cb16_dither(context_t* c); 115 static void scanline_t32cb16_clamp(context_t* c); 116 static void scanline_t32cb16_clamp_dither(context_t* c); 117 static void scanline_col32cb16blend(context_t* c); 118 static void scanline_t16cb16_clamp(context_t* c); 119 static void scanline_t16cb16blend_clamp_mod(context_t* c); 120 static void scanline_memcpy(context_t* c); 121 static void scanline_memset8(context_t* c); 122 static void scanline_memset16(context_t* c); 123 static void scanline_memset32(context_t* c); 124 static void scanline_noop(context_t* c); 125 static void scanline_set(context_t* c); 126 static void scanline_clear(context_t* c); 127 128 static void rect_generic(context_t* c, size_t yc); 129 static void rect_memcpy(context_t* c, size_t yc); 130 131 #if defined( __arm__) 132 extern "C" void scanline_t32cb16blend_arm(uint16_t*, uint32_t*, size_t); 133 extern "C" void scanline_t32cb16_arm(uint16_t *dst, uint32_t *src, size_t ct); 134 extern "C" void scanline_col32cb16blend_neon(uint16_t *dst, uint32_t *col, size_t ct); 135 extern "C" void scanline_col32cb16blend_arm(uint16_t *dst, uint32_t col, size_t ct); 136 #elif defined(__aarch64__) 137 extern "C" void scanline_t32cb16blend_arm64(uint16_t*, uint32_t*, size_t); 138 extern "C" void scanline_col32cb16blend_arm64(uint16_t *dst, uint32_t col, size_t ct); 139 #elif defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6 140 extern "C" void scanline_t32cb16blend_mips(uint16_t*, uint32_t*, size_t); 141 #elif defined(__mips__) && defined(__LP64__) 142 extern "C" void scanline_t32cb16blend_mips64(uint16_t*, uint32_t*, size_t); 143 extern "C" void scanline_col32cb16blend_mips64(uint16_t *dst, uint32_t col, size_t ct); 144 #endif 145 146 // ---------------------------------------------------------------------------- 147 148 static inline uint16_t convertAbgr8888ToRgb565(uint32_t pix) 149 { 150 return uint16_t( ((pix << 8) & 0xf800) | 151 ((pix >> 5) & 0x07e0) | 152 ((pix >> 19) & 0x001f) ); 153 } 154 155 struct shortcut_t { 156 needs_filter_t filter; 157 const char* desc; 158 void (*scanline)(context_t*); 159 void (*init_y)(context_t*, int32_t); 160 }; 161 162 // Keep in sync with needs 163 164 /* To understand the values here, have a look at: 165 * system/core/include/private/pixelflinger/ggl_context.h 166 * 167 * Especially the lines defining and using GGL_RESERVE_NEEDS 168 * 169 * Quick reminders: 170 * - the last nibble of the first value is the destination buffer format. 171 * - the last nibble of the third value is the source texture format 172 * - formats: 4=rgb565 1=abgr8888 2=xbgr8888 173 * 174 * In the descriptions below: 175 * 176 * SRC means we copy the source pixels to the destination 177 * 178 * SRC_OVER means we blend the source pixels to the destination 179 * with dstFactor = 1-srcA, srcFactor=1 (premultiplied source). 180 * This mode is otherwise called 'blend'. 181 * 182 * SRCA_OVER means we blend the source pixels to the destination 183 * with dstFactor=srcA*(1-srcA) srcFactor=srcA (non-premul source). 184 * This mode is otherwise called 'blend_srca' 185 * 186 * clamp means we fetch source pixels from a texture with u/v clamping 187 * 188 * mod means the source pixels are modulated (multiplied) by the 189 * a/r/g/b of the current context's color. Typically used for 190 * fade-in / fade-out. 191 * 192 * dither means we dither 32 bit values to 16 bits 193 */ 194 static shortcut_t shortcuts[] = { 195 { { { 0x03515104, 0x00000077, { 0x00000A01, 0x00000000 } }, 196 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 197 "565 fb, 8888 tx, blend SRC_OVER", scanline_t32cb16blend, init_y_noop }, 198 { { { 0x03010104, 0x00000077, { 0x00000A01, 0x00000000 } }, 199 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 200 "565 fb, 8888 tx, SRC", scanline_t32cb16, init_y_noop }, 201 /* same as first entry, but with dithering */ 202 { { { 0x03515104, 0x00000177, { 0x00000A01, 0x00000000 } }, 203 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 204 "565 fb, 8888 tx, blend SRC_OVER dither", scanline_t32cb16blend_dither, init_y_noop }, 205 /* same as second entry, but with dithering */ 206 { { { 0x03010104, 0x00000177, { 0x00000A01, 0x00000000 } }, 207 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 208 "565 fb, 8888 tx, SRC dither", scanline_t32cb16_dither, init_y_noop }, 209 /* this is used during the boot animation - CHEAT: ignore dithering */ 210 { { { 0x03545404, 0x00000077, { 0x00000A01, 0x00000000 } }, 211 { 0xFFFFFFFF, 0xFFFFFEFF, { 0xFFFFFFFF, 0x0000003F } } }, 212 "565 fb, 8888 tx, blend dst:ONE_MINUS_SRCA src:SRCA", scanline_t32cb16blend_srca, init_y_noop }, 213 /* special case for arbitrary texture coordinates (think scaling) */ 214 { { { 0x03515104, 0x00000077, { 0x00000001, 0x00000000 } }, 215 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 216 "565 fb, 8888 tx, SRC_OVER clamp", scanline_t32cb16blend_clamp, init_y }, 217 { { { 0x03515104, 0x00000177, { 0x00000001, 0x00000000 } }, 218 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 219 "565 fb, 8888 tx, SRC_OVER clamp dither", scanline_t32cb16blend_clamp_dither, init_y }, 220 /* another case used during emulation */ 221 { { { 0x03515104, 0x00000077, { 0x00001001, 0x00000000 } }, 222 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 223 "565 fb, 8888 tx, SRC_OVER clamp modulate", scanline_t32cb16blend_clamp_mod, init_y }, 224 /* and this */ 225 { { { 0x03515104, 0x00000077, { 0x00001002, 0x00000000 } }, 226 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 227 "565 fb, x888 tx, SRC_OVER clamp modulate", scanline_x32cb16blend_clamp_mod, init_y }, 228 { { { 0x03515104, 0x00000177, { 0x00001001, 0x00000000 } }, 229 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 230 "565 fb, 8888 tx, SRC_OVER clamp modulate dither", scanline_t32cb16blend_clamp_mod_dither, init_y }, 231 { { { 0x03515104, 0x00000177, { 0x00001002, 0x00000000 } }, 232 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 233 "565 fb, x888 tx, SRC_OVER clamp modulate dither", scanline_x32cb16blend_clamp_mod_dither, init_y }, 234 { { { 0x03010104, 0x00000077, { 0x00000001, 0x00000000 } }, 235 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 236 "565 fb, 8888 tx, SRC clamp", scanline_t32cb16_clamp, init_y }, 237 { { { 0x03010104, 0x00000077, { 0x00000002, 0x00000000 } }, 238 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 239 "565 fb, x888 tx, SRC clamp", scanline_t32cb16_clamp, init_y }, 240 { { { 0x03010104, 0x00000177, { 0x00000001, 0x00000000 } }, 241 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 242 "565 fb, 8888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y }, 243 { { { 0x03010104, 0x00000177, { 0x00000002, 0x00000000 } }, 244 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 245 "565 fb, x888 tx, SRC clamp dither", scanline_t32cb16_clamp_dither, init_y }, 246 { { { 0x03010104, 0x00000077, { 0x00000004, 0x00000000 } }, 247 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 248 "565 fb, 565 tx, SRC clamp", scanline_t16cb16_clamp, init_y }, 249 { { { 0x03515104, 0x00000077, { 0x00001004, 0x00000000 } }, 250 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0x0000003F } } }, 251 "565 fb, 565 tx, SRC_OVER clamp", scanline_t16cb16blend_clamp_mod, init_y }, 252 { { { 0x03515104, 0x00000077, { 0x00000000, 0x00000000 } }, 253 { 0xFFFFFFFF, 0xFFFFFFFF, { 0xFFFFFFFF, 0xFFFFFFFF } } }, 254 "565 fb, 8888 fixed color", scanline_col32cb16blend, init_y_packed }, 255 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } }, 256 { 0x00000000, 0x00000007, { 0x00000000, 0x00000000 } } }, 257 "(nop) alpha test", scanline_noop, init_y_noop }, 258 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } }, 259 { 0x00000000, 0x00000070, { 0x00000000, 0x00000000 } } }, 260 "(nop) depth test", scanline_noop, init_y_noop }, 261 { { { 0x05000000, 0x00000000, { 0x00000000, 0x00000000 } }, 262 { 0x0F000000, 0x00000080, { 0x00000000, 0x00000000 } } }, 263 "(nop) logic_op", scanline_noop, init_y_noop }, 264 { { { 0xF0000000, 0x00000000, { 0x00000000, 0x00000000 } }, 265 { 0xF0000000, 0x00000080, { 0x00000000, 0x00000000 } } }, 266 "(nop) color mask", scanline_noop, init_y_noop }, 267 { { { 0x0F000000, 0x00000077, { 0x00000000, 0x00000000 } }, 268 { 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } }, 269 "(set) logic_op", scanline_set, init_y_noop }, 270 { { { 0x00000000, 0x00000077, { 0x00000000, 0x00000000 } }, 271 { 0xFF000000, 0x000000F7, { 0x00000000, 0x00000000 } } }, 272 "(clear) logic_op", scanline_clear, init_y_noop }, 273 { { { 0x03000000, 0x00000077, { 0x00000000, 0x00000000 } }, 274 { 0xFFFFFF00, 0x000000F7, { 0x00000000, 0x00000000 } } }, 275 "(clear) blending 0/0", scanline_clear, init_y_noop }, 276 { { { 0x00000000, 0x00000000, { 0x00000000, 0x00000000 } }, 277 { 0x0000003F, 0x00000000, { 0x00000000, 0x00000000 } } }, 278 "(error) invalid color-buffer format", scanline_noop, init_y_error }, 279 }; 280 static const needs_filter_t noblend1to1 = { 281 // (disregard dithering, see below) 282 { 0x03010100, 0x00000077, { 0x00000A00, 0x00000000 } }, 283 { 0xFFFFFFC0, 0xFFFFFEFF, { 0xFFFFFFC0, 0x0000003F } } 284 }; 285 static const needs_filter_t fill16noblend = { 286 { 0x03010100, 0x00000077, { 0x00000000, 0x00000000 } }, 287 { 0xFFFFFFC0, 0xFFFFFFFF, { 0x0000003F, 0x0000003F } } 288 }; 289 290 // ---------------------------------------------------------------------------- 291 292 #if ANDROID_ARM_CODEGEN 293 294 #if defined(__mips__) && ((!defined(__LP64__) && __mips_isa_rev < 6) || defined(__LP64__)) 295 static CodeCache gCodeCache(32 * 1024); 296 #elif defined(__aarch64__) 297 static CodeCache gCodeCache(48 * 1024); 298 #else 299 static CodeCache gCodeCache(12 * 1024); 300 #endif 301 302 class ScanlineAssembly : public Assembly { 303 AssemblyKey<needs_t> mKey; 304 public: 305 ScanlineAssembly(needs_t needs, size_t size) 306 : Assembly(size), mKey(needs) { } 307 const AssemblyKey<needs_t>& key() const { return mKey; } 308 }; 309 #endif 310 311 // ---------------------------------------------------------------------------- 312 313 void ggl_init_scanline(context_t* c) 314 { 315 c->init_y = init_y; 316 c->step_y = step_y__generic; 317 c->scanline = scanline; 318 } 319 320 void ggl_uninit_scanline(context_t* c) 321 { 322 if (c->state.buffers.coverage) 323 free(c->state.buffers.coverage); 324 #if ANDROID_ARM_CODEGEN 325 if (c->scanline_as) 326 c->scanline_as->decStrong(c); 327 #endif 328 } 329 330 // ---------------------------------------------------------------------------- 331 332 static void pick_scanline(context_t* c) 333 { 334 #if (!defined(DEBUG__CODEGEN_ONLY) || (DEBUG__CODEGEN_ONLY == 0)) 335 336 #if ANDROID_CODEGEN == ANDROID_CODEGEN_GENERIC 337 c->init_y = init_y; 338 c->step_y = step_y__generic; 339 c->scanline = scanline; 340 return; 341 #endif 342 343 //printf("*** needs [%08lx:%08lx:%08lx:%08lx]\n", 344 // c->state.needs.n, c->state.needs.p, 345 // c->state.needs.t[0], c->state.needs.t[1]); 346 347 // first handle the special case that we cannot test with a filter 348 const uint32_t cb_format = GGL_READ_NEEDS(CB_FORMAT, c->state.needs.n); 349 if (GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0]) == cb_format) { 350 if (c->state.needs.match(noblend1to1)) { 351 // this will match regardless of dithering state, since both 352 // src and dest have the same format anyway, there is no dithering 353 // to be done. 354 const GGLFormat* f = 355 &(c->formats[GGL_READ_NEEDS(T_FORMAT, c->state.needs.t[0])]); 356 if ((f->components == GGL_RGB) || 357 (f->components == GGL_RGBA) || 358 (f->components == GGL_LUMINANCE) || 359 (f->components == GGL_LUMINANCE_ALPHA)) 360 { 361 // format must have all of RGB components 362 // (so the current color doesn't show through) 363 c->scanline = scanline_memcpy; 364 c->init_y = init_y_noop; 365 return; 366 } 367 } 368 } 369 370 if (c->state.needs.match(fill16noblend)) { 371 c->init_y = init_y_packed; 372 switch (c->formats[cb_format].size) { 373 case 1: c->scanline = scanline_memset8; return; 374 case 2: c->scanline = scanline_memset16; return; 375 case 4: c->scanline = scanline_memset32; return; 376 } 377 } 378 379 const int numFilters = sizeof(shortcuts)/sizeof(shortcut_t); 380 for (int i=0 ; i<numFilters ; i++) { 381 if (c->state.needs.match(shortcuts[i].filter)) { 382 c->scanline = shortcuts[i].scanline; 383 c->init_y = shortcuts[i].init_y; 384 return; 385 } 386 } 387 388 #if DEBUG_NEEDS 389 ALOGI("Needs: n=0x%08x p=0x%08x t0=0x%08x t1=0x%08x", 390 c->state.needs.n, c->state.needs.p, 391 c->state.needs.t[0], c->state.needs.t[1]); 392 #endif 393 394 #endif // DEBUG__CODEGEN_ONLY 395 396 c->init_y = init_y; 397 c->step_y = step_y__generic; 398 399 #if ANDROID_ARM_CODEGEN 400 // we're going to have to generate some code... 401 // here, generate code for our pixel pipeline 402 const AssemblyKey<needs_t> key(c->state.needs); 403 sp<Assembly> assembly = gCodeCache.lookup(key); 404 if (assembly == 0) { 405 // create a new assembly region 406 sp<ScanlineAssembly> a = new ScanlineAssembly(c->state.needs, 407 ASSEMBLY_SCRATCH_SIZE); 408 // initialize our assembler 409 #if defined(__arm__) 410 GGLAssembler assembler( new ARMAssembler(a) ); 411 //GGLAssembler assembler( 412 // new ARMAssemblerOptimizer(new ARMAssembler(a)) ); 413 #endif 414 #if defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6 415 GGLAssembler assembler( new ArmToMipsAssembler(a) ); 416 #elif defined(__mips__) && defined(__LP64__) 417 GGLAssembler assembler( new ArmToMips64Assembler(a) ); 418 #elif defined(__aarch64__) 419 GGLAssembler assembler( new ArmToArm64Assembler(a) ); 420 #endif 421 // generate the scanline code for the given needs 422 bool err = assembler.scanline(c->state.needs, c) != 0; 423 if (ggl_likely(!err)) { 424 // finally, cache this assembly 425 err = gCodeCache.cache(a->key(), a) < 0; 426 } 427 if (ggl_unlikely(err)) { 428 ALOGE("error generating or caching assembly. Reverting to NOP."); 429 c->scanline = scanline_noop; 430 c->init_y = init_y_noop; 431 c->step_y = step_y__nop; 432 return; 433 } 434 assembly = a; 435 } 436 437 // release the previous assembly 438 if (c->scanline_as) { 439 c->scanline_as->decStrong(c); 440 } 441 442 //ALOGI("using generated pixel-pipeline"); 443 c->scanline_as = assembly.get(); 444 c->scanline_as->incStrong(c); // hold on to assembly 445 c->scanline = (void(*)(context_t* c))assembly->base(); 446 #else 447 // ALOGW("using generic (slow) pixel-pipeline"); 448 c->scanline = scanline; 449 #endif 450 } 451 452 void ggl_pick_scanline(context_t* c) 453 { 454 pick_scanline(c); 455 if ((c->state.enables & GGL_ENABLE_W) && 456 (c->state.enables & GGL_ENABLE_TMUS)) 457 { 458 c->span = c->scanline; 459 c->scanline = scanline_perspective; 460 if (!(c->state.enabled_tmu & (c->state.enabled_tmu - 1))) { 461 // only one TMU enabled 462 c->scanline = scanline_perspective_single; 463 } 464 } 465 } 466 467 // ---------------------------------------------------------------------------- 468 469 static void blending(context_t* c, pixel_t* fragment, pixel_t* fb); 470 static void blend_factor(context_t* c, pixel_t* r, uint32_t factor, 471 const pixel_t* src, const pixel_t* dst); 472 static void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv); 473 474 #if ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED) 475 476 // no need to compile the generic-pipeline, it can't be reached 477 void scanline(context_t*) 478 { 479 } 480 481 #else 482 483 void rescale(uint32_t& u, uint8_t& su, uint32_t& v, uint8_t& sv) 484 { 485 if (su && sv) { 486 if (su > sv) { 487 v = ggl_expand(v, sv, su); 488 sv = su; 489 } else if (su < sv) { 490 u = ggl_expand(u, su, sv); 491 su = sv; 492 } 493 } 494 } 495 496 void blending(context_t* c, pixel_t* fragment, pixel_t* fb) 497 { 498 rescale(fragment->c[0], fragment->s[0], fb->c[0], fb->s[0]); 499 rescale(fragment->c[1], fragment->s[1], fb->c[1], fb->s[1]); 500 rescale(fragment->c[2], fragment->s[2], fb->c[2], fb->s[2]); 501 rescale(fragment->c[3], fragment->s[3], fb->c[3], fb->s[3]); 502 503 pixel_t sf, df; 504 blend_factor(c, &sf, c->state.blend.src, fragment, fb); 505 blend_factor(c, &df, c->state.blend.dst, fragment, fb); 506 507 fragment->c[1] = 508 gglMulAddx(fragment->c[1], sf.c[1], gglMulx(fb->c[1], df.c[1])); 509 fragment->c[2] = 510 gglMulAddx(fragment->c[2], sf.c[2], gglMulx(fb->c[2], df.c[2])); 511 fragment->c[3] = 512 gglMulAddx(fragment->c[3], sf.c[3], gglMulx(fb->c[3], df.c[3])); 513 514 if (c->state.blend.alpha_separate) { 515 blend_factor(c, &sf, c->state.blend.src_alpha, fragment, fb); 516 blend_factor(c, &df, c->state.blend.dst_alpha, fragment, fb); 517 } 518 519 fragment->c[0] = 520 gglMulAddx(fragment->c[0], sf.c[0], gglMulx(fb->c[0], df.c[0])); 521 522 // clamp to 1.0 523 if (fragment->c[0] >= (1LU<<fragment->s[0])) 524 fragment->c[0] = (1<<fragment->s[0])-1; 525 if (fragment->c[1] >= (1LU<<fragment->s[1])) 526 fragment->c[1] = (1<<fragment->s[1])-1; 527 if (fragment->c[2] >= (1LU<<fragment->s[2])) 528 fragment->c[2] = (1<<fragment->s[2])-1; 529 if (fragment->c[3] >= (1LU<<fragment->s[3])) 530 fragment->c[3] = (1<<fragment->s[3])-1; 531 } 532 533 static inline int blendfactor(uint32_t x, uint32_t size, uint32_t def = 0) 534 { 535 if (!size) 536 return def; 537 538 // scale to 16 bits 539 if (size > 16) { 540 x >>= (size - 16); 541 } else if (size < 16) { 542 x = ggl_expand(x, size, 16); 543 } 544 x += x >> 15; 545 return x; 546 } 547 548 void blend_factor(context_t* /*c*/, pixel_t* r, 549 uint32_t factor, const pixel_t* src, const pixel_t* dst) 550 { 551 switch (factor) { 552 case GGL_ZERO: 553 r->c[1] = 554 r->c[2] = 555 r->c[3] = 556 r->c[0] = 0; 557 break; 558 case GGL_ONE: 559 r->c[1] = 560 r->c[2] = 561 r->c[3] = 562 r->c[0] = FIXED_ONE; 563 break; 564 case GGL_DST_COLOR: 565 r->c[1] = blendfactor(dst->c[1], dst->s[1]); 566 r->c[2] = blendfactor(dst->c[2], dst->s[2]); 567 r->c[3] = blendfactor(dst->c[3], dst->s[3]); 568 r->c[0] = blendfactor(dst->c[0], dst->s[0]); 569 break; 570 case GGL_SRC_COLOR: 571 r->c[1] = blendfactor(src->c[1], src->s[1]); 572 r->c[2] = blendfactor(src->c[2], src->s[2]); 573 r->c[3] = blendfactor(src->c[3], src->s[3]); 574 r->c[0] = blendfactor(src->c[0], src->s[0]); 575 break; 576 case GGL_ONE_MINUS_DST_COLOR: 577 r->c[1] = FIXED_ONE - blendfactor(dst->c[1], dst->s[1]); 578 r->c[2] = FIXED_ONE - blendfactor(dst->c[2], dst->s[2]); 579 r->c[3] = FIXED_ONE - blendfactor(dst->c[3], dst->s[3]); 580 r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0]); 581 break; 582 case GGL_ONE_MINUS_SRC_COLOR: 583 r->c[1] = FIXED_ONE - blendfactor(src->c[1], src->s[1]); 584 r->c[2] = FIXED_ONE - blendfactor(src->c[2], src->s[2]); 585 r->c[3] = FIXED_ONE - blendfactor(src->c[3], src->s[3]); 586 r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0]); 587 break; 588 case GGL_SRC_ALPHA: 589 r->c[1] = 590 r->c[2] = 591 r->c[3] = 592 r->c[0] = blendfactor(src->c[0], src->s[0], FIXED_ONE); 593 break; 594 case GGL_ONE_MINUS_SRC_ALPHA: 595 r->c[1] = 596 r->c[2] = 597 r->c[3] = 598 r->c[0] = FIXED_ONE - blendfactor(src->c[0], src->s[0], FIXED_ONE); 599 break; 600 case GGL_DST_ALPHA: 601 r->c[1] = 602 r->c[2] = 603 r->c[3] = 604 r->c[0] = blendfactor(dst->c[0], dst->s[0], FIXED_ONE); 605 break; 606 case GGL_ONE_MINUS_DST_ALPHA: 607 r->c[1] = 608 r->c[2] = 609 r->c[3] = 610 r->c[0] = FIXED_ONE - blendfactor(dst->c[0], dst->s[0], FIXED_ONE); 611 break; 612 case GGL_SRC_ALPHA_SATURATE: 613 // XXX: GGL_SRC_ALPHA_SATURATE 614 break; 615 } 616 } 617 618 static GGLfixed wrapping(int32_t coord, uint32_t size, int tx_wrap) 619 { 620 GGLfixed d; 621 if (tx_wrap == GGL_REPEAT) { 622 d = (uint32_t(coord)>>16) * size; 623 } else if (tx_wrap == GGL_CLAMP) { // CLAMP_TO_EDGE semantics 624 const GGLfixed clamp_min = FIXED_HALF; 625 const GGLfixed clamp_max = (size << 16) - FIXED_HALF; 626 if (coord < clamp_min) coord = clamp_min; 627 if (coord > clamp_max) coord = clamp_max; 628 d = coord; 629 } else { // 1:1 630 const GGLfixed clamp_min = 0; 631 const GGLfixed clamp_max = (size << 16); 632 if (coord < clamp_min) coord = clamp_min; 633 if (coord > clamp_max) coord = clamp_max; 634 d = coord; 635 } 636 return d; 637 } 638 639 static inline 640 GGLcolor ADJUST_COLOR_ITERATOR(GGLcolor v, GGLcolor dvdx, int len) 641 { 642 const int32_t end = dvdx * (len-1) + v; 643 if (end < 0) 644 v -= end; 645 v &= ~(v>>31); 646 return v; 647 } 648 649 void scanline(context_t* c) 650 { 651 const uint32_t enables = c->state.enables; 652 const int xs = c->iterators.xl; 653 const int x1 = c->iterators.xr; 654 int xc = x1 - xs; 655 const int16_t* covPtr = c->state.buffers.coverage + xs; 656 657 // All iterated values are sampled at the pixel center 658 659 // reset iterators for that scanline... 660 GGLcolor r, g, b, a; 661 iterators_t& ci = c->iterators; 662 if (enables & GGL_ENABLE_SMOOTH) { 663 r = (xs * c->shade.drdx) + ci.ydrdy; 664 g = (xs * c->shade.dgdx) + ci.ydgdy; 665 b = (xs * c->shade.dbdx) + ci.ydbdy; 666 a = (xs * c->shade.dadx) + ci.ydady; 667 r = ADJUST_COLOR_ITERATOR(r, c->shade.drdx, xc); 668 g = ADJUST_COLOR_ITERATOR(g, c->shade.dgdx, xc); 669 b = ADJUST_COLOR_ITERATOR(b, c->shade.dbdx, xc); 670 a = ADJUST_COLOR_ITERATOR(a, c->shade.dadx, xc); 671 } else { 672 r = ci.ydrdy; 673 g = ci.ydgdy; 674 b = ci.ydbdy; 675 a = ci.ydady; 676 } 677 678 // z iterators are 1.31 679 GGLfixed z = (xs * c->shade.dzdx) + ci.ydzdy; 680 GGLfixed f = (xs * c->shade.dfdx) + ci.ydfdy; 681 682 struct { 683 GGLfixed s, t; 684 } tc[GGL_TEXTURE_UNIT_COUNT]; 685 if (enables & GGL_ENABLE_TMUS) { 686 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 687 if (c->state.texture[i].enable) { 688 texture_iterators_t& ti = c->state.texture[i].iterators; 689 if (enables & GGL_ENABLE_W) { 690 tc[i].s = ti.ydsdy; 691 tc[i].t = ti.ydtdy; 692 } else { 693 tc[i].s = (xs * ti.dsdx) + ti.ydsdy; 694 tc[i].t = (xs * ti.dtdx) + ti.ydtdy; 695 } 696 } 697 } 698 } 699 700 pixel_t fragment; 701 pixel_t texel; 702 pixel_t fb; 703 704 uint32_t x = xs; 705 uint32_t y = c->iterators.y; 706 707 while (xc--) { 708 709 { // just a scope 710 711 // read color (convert to 8 bits by keeping only the integer part) 712 fragment.s[1] = fragment.s[2] = 713 fragment.s[3] = fragment.s[0] = 8; 714 fragment.c[1] = r >> (GGL_COLOR_BITS-8); 715 fragment.c[2] = g >> (GGL_COLOR_BITS-8); 716 fragment.c[3] = b >> (GGL_COLOR_BITS-8); 717 fragment.c[0] = a >> (GGL_COLOR_BITS-8); 718 719 // texturing 720 if (enables & GGL_ENABLE_TMUS) { 721 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 722 texture_t& tx = c->state.texture[i]; 723 if (!tx.enable) 724 continue; 725 texture_iterators_t& ti = tx.iterators; 726 int32_t u, v; 727 728 // s-coordinate 729 if (tx.s_coord != GGL_ONE_TO_ONE) { 730 const int w = tx.surface.width; 731 u = wrapping(tc[i].s, w, tx.s_wrap); 732 tc[i].s += ti.dsdx; 733 } else { 734 u = (((tx.shade.is0>>16) + x)<<16) + FIXED_HALF; 735 } 736 737 // t-coordinate 738 if (tx.t_coord != GGL_ONE_TO_ONE) { 739 const int h = tx.surface.height; 740 v = wrapping(tc[i].t, h, tx.t_wrap); 741 tc[i].t += ti.dtdx; 742 } else { 743 v = (((tx.shade.it0>>16) + y)<<16) + FIXED_HALF; 744 } 745 746 // read texture 747 if (tx.mag_filter == GGL_NEAREST && 748 tx.min_filter == GGL_NEAREST) 749 { 750 u >>= 16; 751 v >>= 16; 752 tx.surface.read(&tx.surface, c, u, v, &texel); 753 } else { 754 const int w = tx.surface.width; 755 const int h = tx.surface.height; 756 u -= FIXED_HALF; 757 v -= FIXED_HALF; 758 int u0 = u >> 16; 759 int v0 = v >> 16; 760 int u1 = u0 + 1; 761 int v1 = v0 + 1; 762 if (tx.s_wrap == GGL_REPEAT) { 763 if (u0<0) u0 += w; 764 if (u1<0) u1 += w; 765 if (u0>=w) u0 -= w; 766 if (u1>=w) u1 -= w; 767 } else { 768 if (u0<0) u0 = 0; 769 if (u1<0) u1 = 0; 770 if (u0>=w) u0 = w-1; 771 if (u1>=w) u1 = w-1; 772 } 773 if (tx.t_wrap == GGL_REPEAT) { 774 if (v0<0) v0 += h; 775 if (v1<0) v1 += h; 776 if (v0>=h) v0 -= h; 777 if (v1>=h) v1 -= h; 778 } else { 779 if (v0<0) v0 = 0; 780 if (v1<0) v1 = 0; 781 if (v0>=h) v0 = h-1; 782 if (v1>=h) v1 = h-1; 783 } 784 pixel_t texels[4]; 785 uint32_t mm[4]; 786 tx.surface.read(&tx.surface, c, u0, v0, &texels[0]); 787 tx.surface.read(&tx.surface, c, u0, v1, &texels[1]); 788 tx.surface.read(&tx.surface, c, u1, v0, &texels[2]); 789 tx.surface.read(&tx.surface, c, u1, v1, &texels[3]); 790 u = (u >> 12) & 0xF; 791 v = (v >> 12) & 0xF; 792 u += u>>3; 793 v += v>>3; 794 mm[0] = (0x10 - u) * (0x10 - v); 795 mm[1] = (0x10 - u) * v; 796 mm[2] = u * (0x10 - v); 797 mm[3] = 0x100 - (mm[0] + mm[1] + mm[2]); 798 for (int j=0 ; j<4 ; j++) { 799 texel.s[j] = texels[0].s[j]; 800 if (!texel.s[j]) continue; 801 texel.s[j] += 8; 802 texel.c[j] = texels[0].c[j]*mm[0] + 803 texels[1].c[j]*mm[1] + 804 texels[2].c[j]*mm[2] + 805 texels[3].c[j]*mm[3] ; 806 } 807 } 808 809 // Texture environnement... 810 for (int j=0 ; j<4 ; j++) { 811 uint32_t& Cf = fragment.c[j]; 812 uint32_t& Ct = texel.c[j]; 813 uint8_t& sf = fragment.s[j]; 814 uint8_t& st = texel.s[j]; 815 uint32_t At = texel.c[0]; 816 uint8_t sat = texel.s[0]; 817 switch (tx.env) { 818 case GGL_REPLACE: 819 if (st) { 820 Cf = Ct; 821 sf = st; 822 } 823 break; 824 case GGL_MODULATE: 825 if (st) { 826 uint32_t factor = Ct + (Ct>>(st-1)); 827 Cf = (Cf * factor) >> st; 828 } 829 break; 830 case GGL_DECAL: 831 if (sat) { 832 rescale(Cf, sf, Ct, st); 833 Cf += ((Ct - Cf) * (At + (At>>(sat-1)))) >> sat; 834 } 835 break; 836 case GGL_BLEND: 837 if (st) { 838 uint32_t Cc = tx.env_color[i]; 839 if (sf>8) Cc = (Cc * ((1<<sf)-1))>>8; 840 else if (sf<8) Cc = (Cc - (Cc>>(8-sf)))>>(8-sf); 841 uint32_t factor = Ct + (Ct>>(st-1)); 842 Cf = ((((1<<st) - factor) * Cf) + Ct*Cc)>>st; 843 } 844 break; 845 case GGL_ADD: 846 if (st) { 847 rescale(Cf, sf, Ct, st); 848 Cf += Ct; 849 } 850 break; 851 } 852 } 853 } 854 } 855 856 // coverage application 857 if (enables & GGL_ENABLE_AA) { 858 int16_t cf = *covPtr++; 859 fragment.c[0] = (int64_t(fragment.c[0]) * cf) >> 15; 860 } 861 862 // alpha-test 863 if (enables & GGL_ENABLE_ALPHA_TEST) { 864 GGLcolor ref = c->state.alpha_test.ref; 865 GGLcolor alpha = (uint64_t(fragment.c[0]) * 866 ((1<<GGL_COLOR_BITS)-1)) / ((1<<fragment.s[0])-1); 867 switch (c->state.alpha_test.func) { 868 case GGL_NEVER: goto discard; 869 case GGL_LESS: if (alpha<ref) break; goto discard; 870 case GGL_EQUAL: if (alpha==ref) break; goto discard; 871 case GGL_LEQUAL: if (alpha<=ref) break; goto discard; 872 case GGL_GREATER: if (alpha>ref) break; goto discard; 873 case GGL_NOTEQUAL: if (alpha!=ref) break; goto discard; 874 case GGL_GEQUAL: if (alpha>=ref) break; goto discard; 875 } 876 } 877 878 // depth test 879 if (c->state.buffers.depth.format) { 880 if (enables & GGL_ENABLE_DEPTH_TEST) { 881 surface_t* cb = &(c->state.buffers.depth); 882 uint16_t* p = (uint16_t*)(cb->data)+(x+(cb->stride*y)); 883 uint16_t zz = uint32_t(z)>>(16); 884 uint16_t depth = *p; 885 switch (c->state.depth_test.func) { 886 case GGL_NEVER: goto discard; 887 case GGL_LESS: if (zz<depth) break; goto discard; 888 case GGL_EQUAL: if (zz==depth) break; goto discard; 889 case GGL_LEQUAL: if (zz<=depth) break; goto discard; 890 case GGL_GREATER: if (zz>depth) break; goto discard; 891 case GGL_NOTEQUAL: if (zz!=depth) break; goto discard; 892 case GGL_GEQUAL: if (zz>=depth) break; goto discard; 893 } 894 // depth buffer is not enabled, if depth-test is not enabled 895 /* 896 fragment.s[1] = fragment.s[2] = 897 fragment.s[3] = fragment.s[0] = 8; 898 fragment.c[1] = 899 fragment.c[2] = 900 fragment.c[3] = 901 fragment.c[0] = 255 - (zz>>8); 902 */ 903 if (c->state.mask.depth) { 904 *p = zz; 905 } 906 } 907 } 908 909 // fog 910 if (enables & GGL_ENABLE_FOG) { 911 for (int i=1 ; i<=3 ; i++) { 912 GGLfixed fc = (c->state.fog.color[i] * 0x10000) / 0xFF; 913 uint32_t& c = fragment.c[i]; 914 uint8_t& s = fragment.s[i]; 915 c = (c * 0x10000) / ((1<<s)-1); 916 c = gglMulAddx(c, f, gglMulx(fc, 0x10000 - f)); 917 s = 16; 918 } 919 } 920 921 // blending 922 if (enables & GGL_ENABLE_BLENDING) { 923 fb.c[1] = fb.c[2] = fb.c[3] = fb.c[0] = 0; // placate valgrind 924 fb.s[1] = fb.s[2] = fb.s[3] = fb.s[0] = 0; 925 c->state.buffers.color.read( 926 &(c->state.buffers.color), c, x, y, &fb); 927 blending( c, &fragment, &fb ); 928 } 929 930 // write 931 c->state.buffers.color.write( 932 &(c->state.buffers.color), c, x, y, &fragment); 933 } 934 935 discard: 936 // iterate... 937 x += 1; 938 if (enables & GGL_ENABLE_SMOOTH) { 939 r += c->shade.drdx; 940 g += c->shade.dgdx; 941 b += c->shade.dbdx; 942 a += c->shade.dadx; 943 } 944 z += c->shade.dzdx; 945 f += c->shade.dfdx; 946 } 947 } 948 949 #endif // ANDROID_ARM_CODEGEN && (ANDROID_CODEGEN == ANDROID_CODEGEN_GENERATED) 950 951 // ---------------------------------------------------------------------------- 952 #if 0 953 #pragma mark - 954 #pragma mark Scanline 955 #endif 956 957 /* Used to parse a 32-bit source texture linearly. Usage is: 958 * 959 * horz_iterator32 hi(context); 960 * while (...) { 961 * uint32_t src_pixel = hi.get_pixel32(); 962 * ... 963 * } 964 * 965 * Use only for one-to-one texture mapping. 966 */ 967 struct horz_iterator32 { 968 horz_iterator32(context_t* c) { 969 const int x = c->iterators.xl; 970 const int y = c->iterators.y; 971 texture_t& tx = c->state.texture[0]; 972 const int32_t u = (tx.shade.is0>>16) + x; 973 const int32_t v = (tx.shade.it0>>16) + y; 974 m_src = reinterpret_cast<uint32_t*>(tx.surface.data)+(u+(tx.surface.stride*v)); 975 } 976 uint32_t get_pixel32() { 977 return *m_src++; 978 } 979 protected: 980 uint32_t* m_src; 981 }; 982 983 /* A variant for 16-bit source textures. */ 984 struct horz_iterator16 { 985 horz_iterator16(context_t* c) { 986 const int x = c->iterators.xl; 987 const int y = c->iterators.y; 988 texture_t& tx = c->state.texture[0]; 989 const int32_t u = (tx.shade.is0>>16) + x; 990 const int32_t v = (tx.shade.it0>>16) + y; 991 m_src = reinterpret_cast<uint16_t*>(tx.surface.data)+(u+(tx.surface.stride*v)); 992 } 993 uint16_t get_pixel16() { 994 return *m_src++; 995 } 996 protected: 997 uint16_t* m_src; 998 }; 999 1000 /* A clamp iterator is used to iterate inside a texture with GGL_CLAMP. 1001 * After initialization, call get_src16() or get_src32() to get the current 1002 * texture pixel value. 1003 */ 1004 struct clamp_iterator { 1005 clamp_iterator(context_t* c) { 1006 const int xs = c->iterators.xl; 1007 texture_t& tx = c->state.texture[0]; 1008 texture_iterators_t& ti = tx.iterators; 1009 m_s = (xs * ti.dsdx) + ti.ydsdy; 1010 m_t = (xs * ti.dtdx) + ti.ydtdy; 1011 m_ds = ti.dsdx; 1012 m_dt = ti.dtdx; 1013 m_width_m1 = tx.surface.width - 1; 1014 m_height_m1 = tx.surface.height - 1; 1015 m_data = tx.surface.data; 1016 m_stride = tx.surface.stride; 1017 } 1018 uint16_t get_pixel16() { 1019 int u, v; 1020 get_uv(u, v); 1021 uint16_t* src = reinterpret_cast<uint16_t*>(m_data) + (u + (m_stride*v)); 1022 return src[0]; 1023 } 1024 uint32_t get_pixel32() { 1025 int u, v; 1026 get_uv(u, v); 1027 uint32_t* src = reinterpret_cast<uint32_t*>(m_data) + (u + (m_stride*v)); 1028 return src[0]; 1029 } 1030 private: 1031 void get_uv(int& u, int& v) { 1032 int uu = m_s >> 16; 1033 int vv = m_t >> 16; 1034 if (uu < 0) 1035 uu = 0; 1036 if (uu > m_width_m1) 1037 uu = m_width_m1; 1038 if (vv < 0) 1039 vv = 0; 1040 if (vv > m_height_m1) 1041 vv = m_height_m1; 1042 u = uu; 1043 v = vv; 1044 m_s += m_ds; 1045 m_t += m_dt; 1046 } 1047 1048 GGLfixed m_s, m_t; 1049 GGLfixed m_ds, m_dt; 1050 int m_width_m1, m_height_m1; 1051 uint8_t* m_data; 1052 int m_stride; 1053 }; 1054 1055 /* 1056 * The 'horizontal clamp iterator' variant corresponds to the case where 1057 * the 'v' coordinate doesn't change. This is useful to avoid one mult and 1058 * extra adds / checks per pixels, if the blending/processing operation after 1059 * this is very fast. 1060 */ 1061 static int is_context_horizontal(const context_t* c) { 1062 return (c->state.texture[0].iterators.dtdx == 0); 1063 } 1064 1065 struct horz_clamp_iterator { 1066 uint16_t get_pixel16() { 1067 int u = m_s >> 16; 1068 m_s += m_ds; 1069 if (u < 0) 1070 u = 0; 1071 if (u > m_width_m1) 1072 u = m_width_m1; 1073 const uint16_t* src = reinterpret_cast<const uint16_t*>(m_data); 1074 return src[u]; 1075 } 1076 uint32_t get_pixel32() { 1077 int u = m_s >> 16; 1078 m_s += m_ds; 1079 if (u < 0) 1080 u = 0; 1081 if (u > m_width_m1) 1082 u = m_width_m1; 1083 const uint32_t* src = reinterpret_cast<const uint32_t*>(m_data); 1084 return src[u]; 1085 } 1086 protected: 1087 void init(const context_t* c, int shift); 1088 GGLfixed m_s; 1089 GGLfixed m_ds; 1090 int m_width_m1; 1091 const uint8_t* m_data; 1092 }; 1093 1094 void horz_clamp_iterator::init(const context_t* c, int shift) 1095 { 1096 const int xs = c->iterators.xl; 1097 const texture_t& tx = c->state.texture[0]; 1098 const texture_iterators_t& ti = tx.iterators; 1099 m_s = (xs * ti.dsdx) + ti.ydsdy; 1100 m_ds = ti.dsdx; 1101 m_width_m1 = tx.surface.width-1; 1102 m_data = tx.surface.data; 1103 1104 GGLfixed t = (xs * ti.dtdx) + ti.ydtdy; 1105 int v = t >> 16; 1106 if (v < 0) 1107 v = 0; 1108 else if (v >= (int)tx.surface.height) 1109 v = (int)tx.surface.height-1; 1110 1111 m_data += (tx.surface.stride*v) << shift; 1112 } 1113 1114 struct horz_clamp_iterator16 : horz_clamp_iterator { 1115 horz_clamp_iterator16(const context_t* c) { 1116 init(c,1); 1117 }; 1118 }; 1119 1120 struct horz_clamp_iterator32 : horz_clamp_iterator { 1121 horz_clamp_iterator32(context_t* c) { 1122 init(c,2); 1123 }; 1124 }; 1125 1126 /* This is used to perform dithering operations. 1127 */ 1128 struct ditherer { 1129 ditherer(const context_t* c) { 1130 const int x = c->iterators.xl; 1131 const int y = c->iterators.y; 1132 m_line = &c->ditherMatrix[ ((y & GGL_DITHER_MASK)<<GGL_DITHER_ORDER_SHIFT) ]; 1133 m_index = x & GGL_DITHER_MASK; 1134 } 1135 void step(void) { 1136 m_index++; 1137 } 1138 int get_value(void) { 1139 int ret = m_line[m_index & GGL_DITHER_MASK]; 1140 m_index++; 1141 return ret; 1142 } 1143 uint16_t abgr8888ToRgb565(uint32_t s) { 1144 uint32_t r = s & 0xff; 1145 uint32_t g = (s >> 8) & 0xff; 1146 uint32_t b = (s >> 16) & 0xff; 1147 return rgb888ToRgb565(r,g,b); 1148 } 1149 /* The following assumes that r/g/b are in the 0..255 range each */ 1150 uint16_t rgb888ToRgb565(uint32_t& r, uint32_t& g, uint32_t &b) { 1151 int threshold = get_value(); 1152 /* dither in on GGL_DITHER_BITS, and each of r, g, b is on 8 bits */ 1153 r += (threshold >> (GGL_DITHER_BITS-8 +5)); 1154 g += (threshold >> (GGL_DITHER_BITS-8 +6)); 1155 b += (threshold >> (GGL_DITHER_BITS-8 +5)); 1156 if (r > 0xff) 1157 r = 0xff; 1158 if (g > 0xff) 1159 g = 0xff; 1160 if (b > 0xff) 1161 b = 0xff; 1162 return uint16_t(((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3)); 1163 } 1164 protected: 1165 const uint8_t* m_line; 1166 int m_index; 1167 }; 1168 1169 /* This structure is used to blend (SRC_OVER) 32-bit source pixels 1170 * onto 16-bit destination ones. Usage is simply: 1171 * 1172 * blender.blend(<32-bit-src-pixel-value>,<ptr-to-16-bit-dest-pixel>) 1173 */ 1174 struct blender_32to16 { 1175 blender_32to16(context_t* /*c*/) { } 1176 void write(uint32_t s, uint16_t* dst) { 1177 if (s == 0) 1178 return; 1179 s = GGL_RGBA_TO_HOST(s); 1180 int sA = (s>>24); 1181 if (sA == 0xff) { 1182 *dst = convertAbgr8888ToRgb565(s); 1183 } else { 1184 int f = 0x100 - (sA + (sA>>7)); 1185 int sR = (s >> ( 3))&0x1F; 1186 int sG = (s >> ( 8+2))&0x3F; 1187 int sB = (s >> (16+3))&0x1F; 1188 uint16_t d = *dst; 1189 int dR = (d>>11)&0x1f; 1190 int dG = (d>>5)&0x3f; 1191 int dB = (d)&0x1f; 1192 sR += (f*dR)>>8; 1193 sG += (f*dG)>>8; 1194 sB += (f*dB)>>8; 1195 *dst = uint16_t((sR<<11)|(sG<<5)|sB); 1196 } 1197 } 1198 void write(uint32_t s, uint16_t* dst, ditherer& di) { 1199 if (s == 0) { 1200 di.step(); 1201 return; 1202 } 1203 s = GGL_RGBA_TO_HOST(s); 1204 int sA = (s>>24); 1205 if (sA == 0xff) { 1206 *dst = di.abgr8888ToRgb565(s); 1207 } else { 1208 int threshold = di.get_value() << (8 - GGL_DITHER_BITS); 1209 int f = 0x100 - (sA + (sA>>7)); 1210 int sR = (s >> ( 3))&0x1F; 1211 int sG = (s >> ( 8+2))&0x3F; 1212 int sB = (s >> (16+3))&0x1F; 1213 uint16_t d = *dst; 1214 int dR = (d>>11)&0x1f; 1215 int dG = (d>>5)&0x3f; 1216 int dB = (d)&0x1f; 1217 sR = ((sR << 8) + f*dR + threshold)>>8; 1218 sG = ((sG << 8) + f*dG + threshold)>>8; 1219 sB = ((sB << 8) + f*dB + threshold)>>8; 1220 if (sR > 0x1f) sR = 0x1f; 1221 if (sG > 0x3f) sG = 0x3f; 1222 if (sB > 0x1f) sB = 0x1f; 1223 *dst = uint16_t((sR<<11)|(sG<<5)|sB); 1224 } 1225 } 1226 }; 1227 1228 /* This blender does the same for the 'blend_srca' operation. 1229 * where dstFactor=srcA*(1-srcA) srcFactor=srcA 1230 */ 1231 struct blender_32to16_srcA { 1232 blender_32to16_srcA(const context_t* /*c*/) { } 1233 void write(uint32_t s, uint16_t* dst) { 1234 if (!s) { 1235 return; 1236 } 1237 uint16_t d = *dst; 1238 s = GGL_RGBA_TO_HOST(s); 1239 int sR = (s >> ( 3))&0x1F; 1240 int sG = (s >> ( 8+2))&0x3F; 1241 int sB = (s >> (16+3))&0x1F; 1242 int sA = (s>>24); 1243 int f1 = (sA + (sA>>7)); 1244 int f2 = 0x100-f1; 1245 int dR = (d>>11)&0x1f; 1246 int dG = (d>>5)&0x3f; 1247 int dB = (d)&0x1f; 1248 sR = (f1*sR + f2*dR)>>8; 1249 sG = (f1*sG + f2*dG)>>8; 1250 sB = (f1*sB + f2*dB)>>8; 1251 *dst = uint16_t((sR<<11)|(sG<<5)|sB); 1252 } 1253 }; 1254 1255 /* Common init code the modulating blenders */ 1256 struct blender_modulate { 1257 void init(const context_t* c) { 1258 const int r = c->iterators.ydrdy >> (GGL_COLOR_BITS-8); 1259 const int g = c->iterators.ydgdy >> (GGL_COLOR_BITS-8); 1260 const int b = c->iterators.ydbdy >> (GGL_COLOR_BITS-8); 1261 const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8); 1262 m_r = r + (r >> 7); 1263 m_g = g + (g >> 7); 1264 m_b = b + (b >> 7); 1265 m_a = a + (a >> 7); 1266 } 1267 protected: 1268 int m_r, m_g, m_b, m_a; 1269 }; 1270 1271 /* This blender does a normal blend after modulation. 1272 */ 1273 struct blender_32to16_modulate : blender_modulate { 1274 blender_32to16_modulate(const context_t* c) { 1275 init(c); 1276 } 1277 void write(uint32_t s, uint16_t* dst) { 1278 // blend source and destination 1279 if (!s) { 1280 return; 1281 } 1282 s = GGL_RGBA_TO_HOST(s); 1283 1284 /* We need to modulate s */ 1285 uint32_t sA = (s >> 24); 1286 uint32_t sB = (s >> 16) & 0xff; 1287 uint32_t sG = (s >> 8) & 0xff; 1288 uint32_t sR = s & 0xff; 1289 1290 sA = (sA*m_a) >> 8; 1291 /* Keep R/G/B scaled to 5.8 or 6.8 fixed float format */ 1292 sR = (sR*m_r) >> (8 - 5); 1293 sG = (sG*m_g) >> (8 - 6); 1294 sB = (sB*m_b) >> (8 - 5); 1295 1296 /* Now do a normal blend */ 1297 int f = 0x100 - (sA + (sA>>7)); 1298 uint16_t d = *dst; 1299 int dR = (d>>11)&0x1f; 1300 int dG = (d>>5)&0x3f; 1301 int dB = (d)&0x1f; 1302 sR = (sR + f*dR)>>8; 1303 sG = (sG + f*dG)>>8; 1304 sB = (sB + f*dB)>>8; 1305 *dst = uint16_t((sR<<11)|(sG<<5)|sB); 1306 } 1307 void write(uint32_t s, uint16_t* dst, ditherer& di) { 1308 // blend source and destination 1309 if (!s) { 1310 di.step(); 1311 return; 1312 } 1313 s = GGL_RGBA_TO_HOST(s); 1314 1315 /* We need to modulate s */ 1316 uint32_t sA = (s >> 24); 1317 uint32_t sB = (s >> 16) & 0xff; 1318 uint32_t sG = (s >> 8) & 0xff; 1319 uint32_t sR = s & 0xff; 1320 1321 sA = (sA*m_a) >> 8; 1322 /* keep R/G/B scaled to 5.8 or 6.8 fixed float format */ 1323 sR = (sR*m_r) >> (8 - 5); 1324 sG = (sG*m_g) >> (8 - 6); 1325 sB = (sB*m_b) >> (8 - 5); 1326 1327 /* Scale threshold to 0.8 fixed float format */ 1328 int threshold = di.get_value() << (8 - GGL_DITHER_BITS); 1329 int f = 0x100 - (sA + (sA>>7)); 1330 uint16_t d = *dst; 1331 int dR = (d>>11)&0x1f; 1332 int dG = (d>>5)&0x3f; 1333 int dB = (d)&0x1f; 1334 sR = (sR + f*dR + threshold)>>8; 1335 sG = (sG + f*dG + threshold)>>8; 1336 sB = (sB + f*dB + threshold)>>8; 1337 if (sR > 0x1f) sR = 0x1f; 1338 if (sG > 0x3f) sG = 0x3f; 1339 if (sB > 0x1f) sB = 0x1f; 1340 *dst = uint16_t((sR<<11)|(sG<<5)|sB); 1341 } 1342 }; 1343 1344 /* same as 32to16_modulate, except that the input is xRGB, instead of ARGB */ 1345 struct blender_x32to16_modulate : blender_modulate { 1346 blender_x32to16_modulate(const context_t* c) { 1347 init(c); 1348 } 1349 void write(uint32_t s, uint16_t* dst) { 1350 s = GGL_RGBA_TO_HOST(s); 1351 1352 uint32_t sB = (s >> 16) & 0xff; 1353 uint32_t sG = (s >> 8) & 0xff; 1354 uint32_t sR = s & 0xff; 1355 1356 /* Keep R/G/B in 5.8 or 6.8 format */ 1357 sR = (sR*m_r) >> (8 - 5); 1358 sG = (sG*m_g) >> (8 - 6); 1359 sB = (sB*m_b) >> (8 - 5); 1360 1361 int f = 0x100 - m_a; 1362 uint16_t d = *dst; 1363 int dR = (d>>11)&0x1f; 1364 int dG = (d>>5)&0x3f; 1365 int dB = (d)&0x1f; 1366 sR = (sR + f*dR)>>8; 1367 sG = (sG + f*dG)>>8; 1368 sB = (sB + f*dB)>>8; 1369 *dst = uint16_t((sR<<11)|(sG<<5)|sB); 1370 } 1371 void write(uint32_t s, uint16_t* dst, ditherer& di) { 1372 s = GGL_RGBA_TO_HOST(s); 1373 1374 uint32_t sB = (s >> 16) & 0xff; 1375 uint32_t sG = (s >> 8) & 0xff; 1376 uint32_t sR = s & 0xff; 1377 1378 sR = (sR*m_r) >> (8 - 5); 1379 sG = (sG*m_g) >> (8 - 6); 1380 sB = (sB*m_b) >> (8 - 5); 1381 1382 /* Now do a normal blend */ 1383 int threshold = di.get_value() << (8 - GGL_DITHER_BITS); 1384 int f = 0x100 - m_a; 1385 uint16_t d = *dst; 1386 int dR = (d>>11)&0x1f; 1387 int dG = (d>>5)&0x3f; 1388 int dB = (d)&0x1f; 1389 sR = (sR + f*dR + threshold)>>8; 1390 sG = (sG + f*dG + threshold)>>8; 1391 sB = (sB + f*dB + threshold)>>8; 1392 if (sR > 0x1f) sR = 0x1f; 1393 if (sG > 0x3f) sG = 0x3f; 1394 if (sB > 0x1f) sB = 0x1f; 1395 *dst = uint16_t((sR<<11)|(sG<<5)|sB); 1396 } 1397 }; 1398 1399 /* Same as above, but source is 16bit rgb565 */ 1400 struct blender_16to16_modulate : blender_modulate { 1401 blender_16to16_modulate(const context_t* c) { 1402 init(c); 1403 } 1404 void write(uint16_t s16, uint16_t* dst) { 1405 uint32_t s = s16; 1406 1407 uint32_t sR = s >> 11; 1408 uint32_t sG = (s >> 5) & 0x3f; 1409 uint32_t sB = s & 0x1f; 1410 1411 sR = (sR*m_r); 1412 sG = (sG*m_g); 1413 sB = (sB*m_b); 1414 1415 int f = 0x100 - m_a; 1416 uint16_t d = *dst; 1417 int dR = (d>>11)&0x1f; 1418 int dG = (d>>5)&0x3f; 1419 int dB = (d)&0x1f; 1420 sR = (sR + f*dR)>>8; 1421 sG = (sG + f*dG)>>8; 1422 sB = (sB + f*dB)>>8; 1423 *dst = uint16_t((sR<<11)|(sG<<5)|sB); 1424 } 1425 }; 1426 1427 /* This is used to iterate over a 16-bit destination color buffer. 1428 * Usage is: 1429 * 1430 * dst_iterator16 di(context); 1431 * while (di.count--) { 1432 * <do stuff with dest pixel at di.dst> 1433 * di.dst++; 1434 * } 1435 */ 1436 struct dst_iterator16 { 1437 dst_iterator16(const context_t* c) { 1438 const int x = c->iterators.xl; 1439 const int width = c->iterators.xr - x; 1440 const int32_t y = c->iterators.y; 1441 const surface_t* cb = &(c->state.buffers.color); 1442 count = width; 1443 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 1444 } 1445 int count; 1446 uint16_t* dst; 1447 }; 1448 1449 1450 static void scanline_t32cb16_clamp(context_t* c) 1451 { 1452 dst_iterator16 di(c); 1453 1454 if (is_context_horizontal(c)) { 1455 /* Special case for simple horizontal scaling */ 1456 horz_clamp_iterator32 ci(c); 1457 while (di.count--) { 1458 uint32_t s = ci.get_pixel32(); 1459 *di.dst++ = convertAbgr8888ToRgb565(s); 1460 } 1461 } else { 1462 /* General case */ 1463 clamp_iterator ci(c); 1464 while (di.count--) { 1465 uint32_t s = ci.get_pixel32(); 1466 *di.dst++ = convertAbgr8888ToRgb565(s); 1467 } 1468 } 1469 } 1470 1471 static void scanline_t32cb16_dither(context_t* c) 1472 { 1473 horz_iterator32 si(c); 1474 dst_iterator16 di(c); 1475 ditherer dither(c); 1476 1477 while (di.count--) { 1478 uint32_t s = si.get_pixel32(); 1479 *di.dst++ = dither.abgr8888ToRgb565(s); 1480 } 1481 } 1482 1483 static void scanline_t32cb16_clamp_dither(context_t* c) 1484 { 1485 dst_iterator16 di(c); 1486 ditherer dither(c); 1487 1488 if (is_context_horizontal(c)) { 1489 /* Special case for simple horizontal scaling */ 1490 horz_clamp_iterator32 ci(c); 1491 while (di.count--) { 1492 uint32_t s = ci.get_pixel32(); 1493 *di.dst++ = dither.abgr8888ToRgb565(s); 1494 } 1495 } else { 1496 /* General case */ 1497 clamp_iterator ci(c); 1498 while (di.count--) { 1499 uint32_t s = ci.get_pixel32(); 1500 *di.dst++ = dither.abgr8888ToRgb565(s); 1501 } 1502 } 1503 } 1504 1505 static void scanline_t32cb16blend_dither(context_t* c) 1506 { 1507 dst_iterator16 di(c); 1508 ditherer dither(c); 1509 blender_32to16 bl(c); 1510 horz_iterator32 hi(c); 1511 while (di.count--) { 1512 uint32_t s = hi.get_pixel32(); 1513 bl.write(s, di.dst, dither); 1514 di.dst++; 1515 } 1516 } 1517 1518 static void scanline_t32cb16blend_clamp(context_t* c) 1519 { 1520 dst_iterator16 di(c); 1521 blender_32to16 bl(c); 1522 1523 if (is_context_horizontal(c)) { 1524 horz_clamp_iterator32 ci(c); 1525 while (di.count--) { 1526 uint32_t s = ci.get_pixel32(); 1527 bl.write(s, di.dst); 1528 di.dst++; 1529 } 1530 } else { 1531 clamp_iterator ci(c); 1532 while (di.count--) { 1533 uint32_t s = ci.get_pixel32(); 1534 bl.write(s, di.dst); 1535 di.dst++; 1536 } 1537 } 1538 } 1539 1540 static void scanline_t32cb16blend_clamp_dither(context_t* c) 1541 { 1542 dst_iterator16 di(c); 1543 ditherer dither(c); 1544 blender_32to16 bl(c); 1545 1546 clamp_iterator ci(c); 1547 while (di.count--) { 1548 uint32_t s = ci.get_pixel32(); 1549 bl.write(s, di.dst, dither); 1550 di.dst++; 1551 } 1552 } 1553 1554 void scanline_t32cb16blend_clamp_mod(context_t* c) 1555 { 1556 dst_iterator16 di(c); 1557 blender_32to16_modulate bl(c); 1558 1559 clamp_iterator ci(c); 1560 while (di.count--) { 1561 uint32_t s = ci.get_pixel32(); 1562 bl.write(s, di.dst); 1563 di.dst++; 1564 } 1565 } 1566 1567 void scanline_t32cb16blend_clamp_mod_dither(context_t* c) 1568 { 1569 dst_iterator16 di(c); 1570 blender_32to16_modulate bl(c); 1571 ditherer dither(c); 1572 1573 clamp_iterator ci(c); 1574 while (di.count--) { 1575 uint32_t s = ci.get_pixel32(); 1576 bl.write(s, di.dst, dither); 1577 di.dst++; 1578 } 1579 } 1580 1581 /* Variant of scanline_t32cb16blend_clamp_mod with a xRGB texture */ 1582 void scanline_x32cb16blend_clamp_mod(context_t* c) 1583 { 1584 dst_iterator16 di(c); 1585 blender_x32to16_modulate bl(c); 1586 1587 clamp_iterator ci(c); 1588 while (di.count--) { 1589 uint32_t s = ci.get_pixel32(); 1590 bl.write(s, di.dst); 1591 di.dst++; 1592 } 1593 } 1594 1595 void scanline_x32cb16blend_clamp_mod_dither(context_t* c) 1596 { 1597 dst_iterator16 di(c); 1598 blender_x32to16_modulate bl(c); 1599 ditherer dither(c); 1600 1601 clamp_iterator ci(c); 1602 while (di.count--) { 1603 uint32_t s = ci.get_pixel32(); 1604 bl.write(s, di.dst, dither); 1605 di.dst++; 1606 } 1607 } 1608 1609 void scanline_t16cb16_clamp(context_t* c) 1610 { 1611 dst_iterator16 di(c); 1612 1613 /* Special case for simple horizontal scaling */ 1614 if (is_context_horizontal(c)) { 1615 horz_clamp_iterator16 ci(c); 1616 while (di.count--) { 1617 *di.dst++ = ci.get_pixel16(); 1618 } 1619 } else { 1620 clamp_iterator ci(c); 1621 while (di.count--) { 1622 *di.dst++ = ci.get_pixel16(); 1623 } 1624 } 1625 } 1626 1627 1628 1629 template <typename T, typename U> 1630 static inline __attribute__((const)) 1631 T interpolate(int y, T v0, U dvdx, U dvdy) { 1632 // interpolates in pixel's centers 1633 // v = v0 + (y + 0.5) * dvdy + (0.5 * dvdx) 1634 return (y * dvdy) + (v0 + ((dvdy + dvdx) >> 1)); 1635 } 1636 1637 // ---------------------------------------------------------------------------- 1638 #if 0 1639 #pragma mark - 1640 #endif 1641 1642 void init_y(context_t* c, int32_t ys) 1643 { 1644 const uint32_t enables = c->state.enables; 1645 1646 // compute iterators... 1647 iterators_t& ci = c->iterators; 1648 1649 // sample in the center 1650 ci.y = ys; 1651 1652 if (enables & (GGL_ENABLE_DEPTH_TEST|GGL_ENABLE_W|GGL_ENABLE_FOG)) { 1653 ci.ydzdy = interpolate(ys, c->shade.z0, c->shade.dzdx, c->shade.dzdy); 1654 ci.ydwdy = interpolate(ys, c->shade.w0, c->shade.dwdx, c->shade.dwdy); 1655 ci.ydfdy = interpolate(ys, c->shade.f0, c->shade.dfdx, c->shade.dfdy); 1656 } 1657 1658 if (ggl_unlikely(enables & GGL_ENABLE_SMOOTH)) { 1659 ci.ydrdy = interpolate(ys, c->shade.r0, c->shade.drdx, c->shade.drdy); 1660 ci.ydgdy = interpolate(ys, c->shade.g0, c->shade.dgdx, c->shade.dgdy); 1661 ci.ydbdy = interpolate(ys, c->shade.b0, c->shade.dbdx, c->shade.dbdy); 1662 ci.ydady = interpolate(ys, c->shade.a0, c->shade.dadx, c->shade.dady); 1663 c->step_y = step_y__smooth; 1664 } else { 1665 ci.ydrdy = c->shade.r0; 1666 ci.ydgdy = c->shade.g0; 1667 ci.ydbdy = c->shade.b0; 1668 ci.ydady = c->shade.a0; 1669 // XXX: do only if needed, or make sure this is fast 1670 c->packed = ggl_pack_color(c, c->state.buffers.color.format, 1671 ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady); 1672 c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888, 1673 ci.ydrdy, ci.ydgdy, ci.ydbdy, ci.ydady); 1674 } 1675 1676 // initialize the variables we need in the shader 1677 generated_vars_t& gen = c->generated_vars; 1678 gen.argb[GGLFormat::ALPHA].c = ci.ydady; 1679 gen.argb[GGLFormat::ALPHA].dx = c->shade.dadx; 1680 gen.argb[GGLFormat::RED ].c = ci.ydrdy; 1681 gen.argb[GGLFormat::RED ].dx = c->shade.drdx; 1682 gen.argb[GGLFormat::GREEN].c = ci.ydgdy; 1683 gen.argb[GGLFormat::GREEN].dx = c->shade.dgdx; 1684 gen.argb[GGLFormat::BLUE ].c = ci.ydbdy; 1685 gen.argb[GGLFormat::BLUE ].dx = c->shade.dbdx; 1686 gen.dzdx = c->shade.dzdx; 1687 gen.f = ci.ydfdy; 1688 gen.dfdx = c->shade.dfdx; 1689 1690 if (enables & GGL_ENABLE_TMUS) { 1691 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1692 texture_t& t = c->state.texture[i]; 1693 if (!t.enable) continue; 1694 1695 texture_iterators_t& ti = t.iterators; 1696 if (t.s_coord == GGL_ONE_TO_ONE && t.t_coord == GGL_ONE_TO_ONE) { 1697 // we need to set all of these to 0 because in some cases 1698 // step_y__generic() or step_y__tmu() will be used and 1699 // therefore will update dtdy, however, in 1:1 mode 1700 // this is always done by the scanline rasterizer. 1701 ti.dsdx = ti.dsdy = ti.dtdx = ti.dtdy = 0; 1702 ti.ydsdy = t.shade.is0; 1703 ti.ydtdy = t.shade.it0; 1704 } else { 1705 const int adjustSWrap = ((t.s_wrap==GGL_CLAMP)?0:16); 1706 const int adjustTWrap = ((t.t_wrap==GGL_CLAMP)?0:16); 1707 ti.sscale = t.shade.sscale + adjustSWrap; 1708 ti.tscale = t.shade.tscale + adjustTWrap; 1709 if (!(enables & GGL_ENABLE_W)) { 1710 // S coordinate 1711 const int32_t sscale = ti.sscale; 1712 const int32_t sy = interpolate(ys, 1713 t.shade.is0, t.shade.idsdx, t.shade.idsdy); 1714 if (sscale>=0) { 1715 ti.ydsdy= sy << sscale; 1716 ti.dsdx = t.shade.idsdx << sscale; 1717 ti.dsdy = t.shade.idsdy << sscale; 1718 } else { 1719 ti.ydsdy= sy >> -sscale; 1720 ti.dsdx = t.shade.idsdx >> -sscale; 1721 ti.dsdy = t.shade.idsdy >> -sscale; 1722 } 1723 // T coordinate 1724 const int32_t tscale = ti.tscale; 1725 const int32_t ty = interpolate(ys, 1726 t.shade.it0, t.shade.idtdx, t.shade.idtdy); 1727 if (tscale>=0) { 1728 ti.ydtdy= ty << tscale; 1729 ti.dtdx = t.shade.idtdx << tscale; 1730 ti.dtdy = t.shade.idtdy << tscale; 1731 } else { 1732 ti.ydtdy= ty >> -tscale; 1733 ti.dtdx = t.shade.idtdx >> -tscale; 1734 ti.dtdy = t.shade.idtdy >> -tscale; 1735 } 1736 } 1737 } 1738 // mirror for generated code... 1739 generated_tex_vars_t& gen = c->generated_vars.texture[i]; 1740 gen.width = t.surface.width; 1741 gen.height = t.surface.height; 1742 gen.stride = t.surface.stride; 1743 gen.data = uintptr_t(t.surface.data); 1744 gen.dsdx = ti.dsdx; 1745 gen.dtdx = ti.dtdx; 1746 } 1747 } 1748 1749 // choose the y-stepper 1750 c->step_y = step_y__nop; 1751 if (enables & GGL_ENABLE_FOG) { 1752 c->step_y = step_y__generic; 1753 } else if (enables & GGL_ENABLE_TMUS) { 1754 if (enables & GGL_ENABLE_SMOOTH) { 1755 c->step_y = step_y__generic; 1756 } else if (enables & GGL_ENABLE_W) { 1757 c->step_y = step_y__w; 1758 } else { 1759 c->step_y = step_y__tmu; 1760 } 1761 } else { 1762 if (enables & GGL_ENABLE_SMOOTH) { 1763 c->step_y = step_y__smooth; 1764 } 1765 } 1766 1767 // choose the rectangle blitter 1768 c->rect = rect_generic; 1769 if ((c->step_y == step_y__nop) && 1770 (c->scanline == scanline_memcpy)) 1771 { 1772 c->rect = rect_memcpy; 1773 } 1774 } 1775 1776 void init_y_packed(context_t* c, int32_t y0) 1777 { 1778 uint8_t f = c->state.buffers.color.format; 1779 c->packed = ggl_pack_color(c, f, 1780 c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0); 1781 c->packed8888 = ggl_pack_color(c, GGL_PIXEL_FORMAT_RGBA_8888, 1782 c->shade.r0, c->shade.g0, c->shade.b0, c->shade.a0); 1783 c->iterators.y = y0; 1784 c->step_y = step_y__nop; 1785 // choose the rectangle blitter 1786 c->rect = rect_generic; 1787 if (c->scanline == scanline_memcpy) { 1788 c->rect = rect_memcpy; 1789 } 1790 } 1791 1792 void init_y_noop(context_t* c, int32_t y0) 1793 { 1794 c->iterators.y = y0; 1795 c->step_y = step_y__nop; 1796 // choose the rectangle blitter 1797 c->rect = rect_generic; 1798 if (c->scanline == scanline_memcpy) { 1799 c->rect = rect_memcpy; 1800 } 1801 } 1802 1803 void init_y_error(context_t* c, int32_t y0) 1804 { 1805 // woooops, shoud never happen, 1806 // fail gracefully (don't display anything) 1807 init_y_noop(c, y0); 1808 ALOGE("color-buffer has an invalid format!"); 1809 } 1810 1811 // ---------------------------------------------------------------------------- 1812 #if 0 1813 #pragma mark - 1814 #endif 1815 1816 void step_y__generic(context_t* c) 1817 { 1818 const uint32_t enables = c->state.enables; 1819 1820 // iterate... 1821 iterators_t& ci = c->iterators; 1822 ci.y += 1; 1823 1824 if (enables & GGL_ENABLE_SMOOTH) { 1825 ci.ydrdy += c->shade.drdy; 1826 ci.ydgdy += c->shade.dgdy; 1827 ci.ydbdy += c->shade.dbdy; 1828 ci.ydady += c->shade.dady; 1829 } 1830 1831 const uint32_t mask = 1832 GGL_ENABLE_DEPTH_TEST | 1833 GGL_ENABLE_W | 1834 GGL_ENABLE_FOG; 1835 if (enables & mask) { 1836 ci.ydzdy += c->shade.dzdy; 1837 ci.ydwdy += c->shade.dwdy; 1838 ci.ydfdy += c->shade.dfdy; 1839 } 1840 1841 if ((enables & GGL_ENABLE_TMUS) && (!(enables & GGL_ENABLE_W))) { 1842 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1843 if (c->state.texture[i].enable) { 1844 texture_iterators_t& ti = c->state.texture[i].iterators; 1845 ti.ydsdy += ti.dsdy; 1846 ti.ydtdy += ti.dtdy; 1847 } 1848 } 1849 } 1850 } 1851 1852 void step_y__nop(context_t* c) 1853 { 1854 c->iterators.y += 1; 1855 c->iterators.ydzdy += c->shade.dzdy; 1856 } 1857 1858 void step_y__smooth(context_t* c) 1859 { 1860 iterators_t& ci = c->iterators; 1861 ci.y += 1; 1862 ci.ydrdy += c->shade.drdy; 1863 ci.ydgdy += c->shade.dgdy; 1864 ci.ydbdy += c->shade.dbdy; 1865 ci.ydady += c->shade.dady; 1866 ci.ydzdy += c->shade.dzdy; 1867 } 1868 1869 void step_y__w(context_t* c) 1870 { 1871 iterators_t& ci = c->iterators; 1872 ci.y += 1; 1873 ci.ydzdy += c->shade.dzdy; 1874 ci.ydwdy += c->shade.dwdy; 1875 } 1876 1877 void step_y__tmu(context_t* c) 1878 { 1879 iterators_t& ci = c->iterators; 1880 ci.y += 1; 1881 ci.ydzdy += c->shade.dzdy; 1882 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1883 if (c->state.texture[i].enable) { 1884 texture_iterators_t& ti = c->state.texture[i].iterators; 1885 ti.ydsdy += ti.dsdy; 1886 ti.ydtdy += ti.dtdy; 1887 } 1888 } 1889 } 1890 1891 // ---------------------------------------------------------------------------- 1892 #if 0 1893 #pragma mark - 1894 #endif 1895 1896 void scanline_perspective(context_t* c) 1897 { 1898 struct { 1899 union { 1900 struct { 1901 int32_t s, sq; 1902 int32_t t, tq; 1903 } sqtq; 1904 struct { 1905 int32_t v, q; 1906 } st[2]; 1907 }; 1908 } tc[GGL_TEXTURE_UNIT_COUNT] __attribute__((aligned(16))); 1909 1910 // XXX: we should have a special case when dwdx = 0 1911 1912 // 32 pixels spans works okay. 16 is a lot better, 1913 // but hey, it's a software renderer... 1914 const uint32_t SPAN_BITS = 5; 1915 const uint32_t ys = c->iterators.y; 1916 const uint32_t xs = c->iterators.xl; 1917 const uint32_t x1 = c->iterators.xr; 1918 const uint32_t xc = x1 - xs; 1919 uint32_t remainder = xc & ((1<<SPAN_BITS)-1); 1920 uint32_t numSpans = xc >> SPAN_BITS; 1921 1922 const iterators_t& ci = c->iterators; 1923 int32_t w0 = (xs * c->shade.dwdx) + ci.ydwdy; 1924 int32_t q0 = gglRecipQ(w0, 30); 1925 const int iwscale = 32 - gglClz(q0); 1926 1927 const int32_t dwdx = c->shade.dwdx << SPAN_BITS; 1928 int32_t xl = c->iterators.xl; 1929 1930 // We process s & t with a loop to reduce the code size 1931 // (and i-cache pressure). 1932 1933 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1934 const texture_t& tmu = c->state.texture[i]; 1935 if (!tmu.enable) continue; 1936 int32_t s = tmu.shade.is0 + 1937 (tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) + 1938 ((tmu.shade.idsdx + tmu.shade.idsdy)>>1); 1939 int32_t t = tmu.shade.it0 + 1940 (tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) + 1941 ((tmu.shade.idtdx + tmu.shade.idtdy)>>1); 1942 tc[i].sqtq.s = s; 1943 tc[i].sqtq.t = t; 1944 tc[i].sqtq.sq = gglMulx(s, q0, iwscale); 1945 tc[i].sqtq.tq = gglMulx(t, q0, iwscale); 1946 } 1947 1948 int32_t span = 0; 1949 do { 1950 int32_t w1; 1951 if (ggl_likely(numSpans)) { 1952 w1 = w0 + dwdx; 1953 } else { 1954 if (remainder) { 1955 // finish off the scanline... 1956 span = remainder; 1957 w1 = (c->shade.dwdx * span) + w0; 1958 } else { 1959 break; 1960 } 1961 } 1962 int32_t q1 = gglRecipQ(w1, 30); 1963 for (int i=0 ; i<GGL_TEXTURE_UNIT_COUNT ; ++i) { 1964 texture_t& tmu = c->state.texture[i]; 1965 if (!tmu.enable) continue; 1966 texture_iterators_t& ti = tmu.iterators; 1967 1968 for (int j=0 ; j<2 ; j++) { 1969 int32_t v = tc[i].st[j].v; 1970 if (span) v += (tmu.shade.st[j].dx)*span; 1971 else v += (tmu.shade.st[j].dx)<<SPAN_BITS; 1972 const int32_t v0 = tc[i].st[j].q; 1973 const int32_t v1 = gglMulx(v, q1, iwscale); 1974 int32_t dvdx = v1 - v0; 1975 if (span) dvdx /= span; 1976 else dvdx >>= SPAN_BITS; 1977 tc[i].st[j].v = v; 1978 tc[i].st[j].q = v1; 1979 1980 const int scale = ti.st[j].scale + (iwscale - 30); 1981 if (scale >= 0) { 1982 ti.st[j].ydvdy = v0 << scale; 1983 ti.st[j].dvdx = dvdx << scale; 1984 } else { 1985 ti.st[j].ydvdy = v0 >> -scale; 1986 ti.st[j].dvdx = dvdx >> -scale; 1987 } 1988 } 1989 generated_tex_vars_t& gen = c->generated_vars.texture[i]; 1990 gen.dsdx = ti.st[0].dvdx; 1991 gen.dtdx = ti.st[1].dvdx; 1992 } 1993 c->iterators.xl = xl; 1994 c->iterators.xr = xl = xl + (span ? span : (1<<SPAN_BITS)); 1995 w0 = w1; 1996 q0 = q1; 1997 c->span(c); 1998 } while(numSpans--); 1999 } 2000 2001 void scanline_perspective_single(context_t* c) 2002 { 2003 // 32 pixels spans works okay. 16 is a lot better, 2004 // but hey, it's a software renderer... 2005 const uint32_t SPAN_BITS = 5; 2006 const uint32_t ys = c->iterators.y; 2007 const uint32_t xs = c->iterators.xl; 2008 const uint32_t x1 = c->iterators.xr; 2009 const uint32_t xc = x1 - xs; 2010 2011 const iterators_t& ci = c->iterators; 2012 int32_t w = (xs * c->shade.dwdx) + ci.ydwdy; 2013 int32_t iw = gglRecipQ(w, 30); 2014 const int iwscale = 32 - gglClz(iw); 2015 2016 const int i = 31 - gglClz(c->state.enabled_tmu); 2017 generated_tex_vars_t& gen = c->generated_vars.texture[i]; 2018 texture_t& tmu = c->state.texture[i]; 2019 texture_iterators_t& ti = tmu.iterators; 2020 const int sscale = ti.sscale + (iwscale - 30); 2021 const int tscale = ti.tscale + (iwscale - 30); 2022 int32_t s = tmu.shade.is0 + 2023 (tmu.shade.idsdy * ys) + (tmu.shade.idsdx * xs) + 2024 ((tmu.shade.idsdx + tmu.shade.idsdy)>>1); 2025 int32_t t = tmu.shade.it0 + 2026 (tmu.shade.idtdy * ys) + (tmu.shade.idtdx * xs) + 2027 ((tmu.shade.idtdx + tmu.shade.idtdy)>>1); 2028 int32_t s0 = gglMulx(s, iw, iwscale); 2029 int32_t t0 = gglMulx(t, iw, iwscale); 2030 int32_t xl = c->iterators.xl; 2031 2032 int32_t sq, tq, dsdx, dtdx; 2033 int32_t premainder = xc & ((1<<SPAN_BITS)-1); 2034 uint32_t numSpans = xc >> SPAN_BITS; 2035 if (c->shade.dwdx == 0) { 2036 // XXX: we could choose to do this if the error is small enough 2037 numSpans = 0; 2038 premainder = xc; 2039 goto no_perspective; 2040 } 2041 2042 if (premainder) { 2043 w += c->shade.dwdx * premainder; 2044 iw = gglRecipQ(w, 30); 2045 no_perspective: 2046 s += tmu.shade.idsdx * premainder; 2047 t += tmu.shade.idtdx * premainder; 2048 sq = gglMulx(s, iw, iwscale); 2049 tq = gglMulx(t, iw, iwscale); 2050 dsdx = (sq - s0) / premainder; 2051 dtdx = (tq - t0) / premainder; 2052 c->iterators.xl = xl; 2053 c->iterators.xr = xl = xl + premainder; 2054 goto finish; 2055 } 2056 2057 while (numSpans--) { 2058 w += c->shade.dwdx << SPAN_BITS; 2059 s += tmu.shade.idsdx << SPAN_BITS; 2060 t += tmu.shade.idtdx << SPAN_BITS; 2061 iw = gglRecipQ(w, 30); 2062 sq = gglMulx(s, iw, iwscale); 2063 tq = gglMulx(t, iw, iwscale); 2064 dsdx = (sq - s0) >> SPAN_BITS; 2065 dtdx = (tq - t0) >> SPAN_BITS; 2066 c->iterators.xl = xl; 2067 c->iterators.xr = xl = xl + (1<<SPAN_BITS); 2068 finish: 2069 if (sscale >= 0) { 2070 ti.ydsdy = s0 << sscale; 2071 ti.dsdx = dsdx << sscale; 2072 } else { 2073 ti.ydsdy = s0 >>-sscale; 2074 ti.dsdx = dsdx >>-sscale; 2075 } 2076 if (tscale >= 0) { 2077 ti.ydtdy = t0 << tscale; 2078 ti.dtdx = dtdx << tscale; 2079 } else { 2080 ti.ydtdy = t0 >>-tscale; 2081 ti.dtdx = dtdx >>-tscale; 2082 } 2083 s0 = sq; 2084 t0 = tq; 2085 gen.dsdx = ti.dsdx; 2086 gen.dtdx = ti.dtdx; 2087 c->span(c); 2088 } 2089 } 2090 2091 // ---------------------------------------------------------------------------- 2092 2093 void scanline_col32cb16blend(context_t* c) 2094 { 2095 int32_t x = c->iterators.xl; 2096 size_t ct = c->iterators.xr - x; 2097 int32_t y = c->iterators.y; 2098 surface_t* cb = &(c->state.buffers.color); 2099 union { 2100 uint16_t* dst; 2101 uint32_t* dst32; 2102 }; 2103 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 2104 2105 #if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__arm__)) 2106 #if defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN 2107 scanline_col32cb16blend_neon(dst, &(c->packed8888), ct); 2108 #else // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN 2109 scanline_col32cb16blend_arm(dst, GGL_RGBA_TO_HOST(c->packed8888), ct); 2110 #endif // defined(__ARM_HAVE_NEON) && BYTE_ORDER == LITTLE_ENDIAN 2111 #elif ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && defined(__aarch64__)) 2112 scanline_col32cb16blend_arm64(dst, GGL_RGBA_TO_HOST(c->packed8888), ct); 2113 #elif ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__mips__) && defined(__LP64__))) 2114 scanline_col32cb16blend_mips64(dst, GGL_RGBA_TO_HOST(c->packed8888), ct); 2115 #else 2116 uint32_t s = GGL_RGBA_TO_HOST(c->packed8888); 2117 int sA = (s>>24); 2118 int f = 0x100 - (sA + (sA>>7)); 2119 while (ct--) { 2120 uint16_t d = *dst; 2121 int dR = (d>>11)&0x1f; 2122 int dG = (d>>5)&0x3f; 2123 int dB = (d)&0x1f; 2124 int sR = (s >> ( 3))&0x1F; 2125 int sG = (s >> ( 8+2))&0x3F; 2126 int sB = (s >> (16+3))&0x1F; 2127 sR += (f*dR)>>8; 2128 sG += (f*dG)>>8; 2129 sB += (f*dB)>>8; 2130 *dst++ = uint16_t((sR<<11)|(sG<<5)|sB); 2131 } 2132 #endif 2133 2134 } 2135 2136 void scanline_t32cb16(context_t* c) 2137 { 2138 int32_t x = c->iterators.xl; 2139 size_t ct = c->iterators.xr - x; 2140 int32_t y = c->iterators.y; 2141 surface_t* cb = &(c->state.buffers.color); 2142 union { 2143 uint16_t* dst; 2144 uint32_t* dst32; 2145 }; 2146 dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 2147 2148 surface_t* tex = &(c->state.texture[0].surface); 2149 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 2150 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 2151 uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v)); 2152 int sR, sG, sB; 2153 uint32_t s, d; 2154 2155 if (ct==1 || uintptr_t(dst)&2) { 2156 last_one: 2157 s = GGL_RGBA_TO_HOST( *src++ ); 2158 *dst++ = convertAbgr8888ToRgb565(s); 2159 ct--; 2160 } 2161 2162 while (ct >= 2) { 2163 #if BYTE_ORDER == BIG_ENDIAN 2164 s = GGL_RGBA_TO_HOST( *src++ ); 2165 d = convertAbgr8888ToRgb565_hi16(s); 2166 2167 s = GGL_RGBA_TO_HOST( *src++ ); 2168 d |= convertAbgr8888ToRgb565(s); 2169 #else 2170 s = GGL_RGBA_TO_HOST( *src++ ); 2171 d = convertAbgr8888ToRgb565(s); 2172 2173 s = GGL_RGBA_TO_HOST( *src++ ); 2174 d |= convertAbgr8888ToRgb565(s) << 16; 2175 #endif 2176 *dst32++ = d; 2177 ct -= 2; 2178 } 2179 2180 if (ct > 0) { 2181 goto last_one; 2182 } 2183 } 2184 2185 void scanline_t32cb16blend(context_t* c) 2186 { 2187 #if ((ANDROID_CODEGEN >= ANDROID_CODEGEN_ASM) && (defined(__arm__) || defined(__aarch64__) || \ 2188 (defined(__mips__) && ((!defined(__LP64__) && __mips_isa_rev < 6) || defined(__LP64__))))) 2189 int32_t x = c->iterators.xl; 2190 size_t ct = c->iterators.xr - x; 2191 int32_t y = c->iterators.y; 2192 surface_t* cb = &(c->state.buffers.color); 2193 uint16_t* dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 2194 2195 surface_t* tex = &(c->state.texture[0].surface); 2196 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 2197 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 2198 uint32_t *src = reinterpret_cast<uint32_t*>(tex->data)+(u+(tex->stride*v)); 2199 2200 #ifdef __arm__ 2201 scanline_t32cb16blend_arm(dst, src, ct); 2202 #elif defined(__aarch64__) 2203 scanline_t32cb16blend_arm64(dst, src, ct); 2204 #elif defined(__mips__) && !defined(__LP64__) && __mips_isa_rev < 6 2205 scanline_t32cb16blend_mips(dst, src, ct); 2206 #elif defined(__mips__) && defined(__LP64__) 2207 scanline_t32cb16blend_mips64(dst, src, ct); 2208 #endif 2209 #else 2210 dst_iterator16 di(c); 2211 horz_iterator32 hi(c); 2212 blender_32to16 bl(c); 2213 while (di.count--) { 2214 uint32_t s = hi.get_pixel32(); 2215 bl.write(s, di.dst); 2216 di.dst++; 2217 } 2218 #endif 2219 } 2220 2221 void scanline_t32cb16blend_srca(context_t* c) 2222 { 2223 dst_iterator16 di(c); 2224 horz_iterator32 hi(c); 2225 blender_32to16_srcA blender(c); 2226 2227 while (di.count--) { 2228 uint32_t s = hi.get_pixel32(); 2229 blender.write(s,di.dst); 2230 di.dst++; 2231 } 2232 } 2233 2234 void scanline_t16cb16blend_clamp_mod(context_t* c) 2235 { 2236 const int a = c->iterators.ydady >> (GGL_COLOR_BITS-8); 2237 if (a == 0) { 2238 return; 2239 } 2240 2241 if (a == 255) { 2242 scanline_t16cb16_clamp(c); 2243 return; 2244 } 2245 2246 dst_iterator16 di(c); 2247 blender_16to16_modulate blender(c); 2248 clamp_iterator ci(c); 2249 2250 while (di.count--) { 2251 uint16_t s = ci.get_pixel16(); 2252 blender.write(s, di.dst); 2253 di.dst++; 2254 } 2255 } 2256 2257 void scanline_memcpy(context_t* c) 2258 { 2259 int32_t x = c->iterators.xl; 2260 size_t ct = c->iterators.xr - x; 2261 int32_t y = c->iterators.y; 2262 surface_t* cb = &(c->state.buffers.color); 2263 const GGLFormat* fp = &(c->formats[cb->format]); 2264 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 2265 (x + (cb->stride * y)) * fp->size; 2266 2267 surface_t* tex = &(c->state.texture[0].surface); 2268 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 2269 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 2270 uint8_t *src = reinterpret_cast<uint8_t*>(tex->data) + 2271 (u + (tex->stride * v)) * fp->size; 2272 2273 const size_t size = ct * fp->size; 2274 memcpy(dst, src, size); 2275 } 2276 2277 void scanline_memset8(context_t* c) 2278 { 2279 int32_t x = c->iterators.xl; 2280 size_t ct = c->iterators.xr - x; 2281 int32_t y = c->iterators.y; 2282 surface_t* cb = &(c->state.buffers.color); 2283 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + (x+(cb->stride*y)); 2284 uint32_t packed = c->packed; 2285 memset(dst, packed, ct); 2286 } 2287 2288 void scanline_memset16(context_t* c) 2289 { 2290 int32_t x = c->iterators.xl; 2291 size_t ct = c->iterators.xr - x; 2292 int32_t y = c->iterators.y; 2293 surface_t* cb = &(c->state.buffers.color); 2294 uint16_t* dst = reinterpret_cast<uint16_t*>(cb->data) + (x+(cb->stride*y)); 2295 uint32_t packed = c->packed; 2296 android_memset16(dst, packed, ct*2); 2297 } 2298 2299 void scanline_memset32(context_t* c) 2300 { 2301 int32_t x = c->iterators.xl; 2302 size_t ct = c->iterators.xr - x; 2303 int32_t y = c->iterators.y; 2304 surface_t* cb = &(c->state.buffers.color); 2305 uint32_t* dst = reinterpret_cast<uint32_t*>(cb->data) + (x+(cb->stride*y)); 2306 uint32_t packed = GGL_HOST_TO_RGBA(c->packed); 2307 android_memset32(dst, packed, ct*4); 2308 } 2309 2310 void scanline_clear(context_t* c) 2311 { 2312 int32_t x = c->iterators.xl; 2313 size_t ct = c->iterators.xr - x; 2314 int32_t y = c->iterators.y; 2315 surface_t* cb = &(c->state.buffers.color); 2316 const GGLFormat* fp = &(c->formats[cb->format]); 2317 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 2318 (x + (cb->stride * y)) * fp->size; 2319 const size_t size = ct * fp->size; 2320 memset(dst, 0, size); 2321 } 2322 2323 void scanline_set(context_t* c) 2324 { 2325 int32_t x = c->iterators.xl; 2326 size_t ct = c->iterators.xr - x; 2327 int32_t y = c->iterators.y; 2328 surface_t* cb = &(c->state.buffers.color); 2329 const GGLFormat* fp = &(c->formats[cb->format]); 2330 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 2331 (x + (cb->stride * y)) * fp->size; 2332 const size_t size = ct * fp->size; 2333 memset(dst, 0xFF, size); 2334 } 2335 2336 void scanline_noop(context_t* /*c*/) 2337 { 2338 } 2339 2340 void rect_generic(context_t* c, size_t yc) 2341 { 2342 do { 2343 c->scanline(c); 2344 c->step_y(c); 2345 } while (--yc); 2346 } 2347 2348 void rect_memcpy(context_t* c, size_t yc) 2349 { 2350 int32_t x = c->iterators.xl; 2351 size_t ct = c->iterators.xr - x; 2352 int32_t y = c->iterators.y; 2353 surface_t* cb = &(c->state.buffers.color); 2354 const GGLFormat* fp = &(c->formats[cb->format]); 2355 uint8_t* dst = reinterpret_cast<uint8_t*>(cb->data) + 2356 (x + (cb->stride * y)) * fp->size; 2357 2358 surface_t* tex = &(c->state.texture[0].surface); 2359 const int32_t u = (c->state.texture[0].shade.is0>>16) + x; 2360 const int32_t v = (c->state.texture[0].shade.it0>>16) + y; 2361 uint8_t *src = reinterpret_cast<uint8_t*>(tex->data) + 2362 (u + (tex->stride * v)) * fp->size; 2363 2364 if (cb->stride == tex->stride && ct == size_t(cb->stride)) { 2365 memcpy(dst, src, ct * fp->size * yc); 2366 } else { 2367 const size_t size = ct * fp->size; 2368 const size_t dbpr = cb->stride * fp->size; 2369 const size_t sbpr = tex->stride * fp->size; 2370 do { 2371 memcpy(dst, src, size); 2372 dst += dbpr; 2373 src += sbpr; 2374 } while (--yc); 2375 } 2376 } 2377 // ---------------------------------------------------------------------------- 2378 }; // namespace android 2379 2380