1 /* 2 SDL - Simple DirectMedia Layer 3 Copyright (C) 1997-2012 Sam Lantinga 4 5 This library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 This library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with this library; if not, write to the Free Software 17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 19 Sam Lantinga 20 slouken (at) libsdl.org 21 */ 22 #include "SDL_config.h" 23 24 /* 25 * RLE encoding for software colorkey and alpha-channel acceleration 26 * 27 * Original version by Sam Lantinga 28 * 29 * Mattias Engdegrd (Yorick): Rewrite. New encoding format, encoder and 30 * decoder. Added per-surface alpha blitter. Added per-pixel alpha 31 * format, encoder and blitter. 32 * 33 * Many thanks to Xark and johns for hints, benchmarks and useful comments 34 * leading to this code. 35 * 36 * Welcome to Macro Mayhem. 37 */ 38 39 /* 40 * The encoding translates the image data to a stream of segments of the form 41 * 42 * <skip> <run> <data> 43 * 44 * where <skip> is the number of transparent pixels to skip, 45 * <run> is the number of opaque pixels to blit, 46 * and <data> are the pixels themselves. 47 * 48 * This basic structure is used both for colorkeyed surfaces, used for simple 49 * binary transparency and for per-surface alpha blending, and for surfaces 50 * with per-pixel alpha. The details differ, however: 51 * 52 * Encoding of colorkeyed surfaces: 53 * 54 * Encoded pixels always have the same format as the target surface. 55 * <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth 56 * where they are 16 bit. This makes the pixel data aligned at all times. 57 * Segments never wrap around from one scan line to the next. 58 * 59 * The end of the sequence is marked by a zero <skip>,<run> pair at the * 60 * beginning of a line. 61 * 62 * Encoding of surfaces with per-pixel alpha: 63 * 64 * The sequence begins with a struct RLEDestFormat describing the target 65 * pixel format, to provide reliable un-encoding. 66 * 67 * Each scan line is encoded twice: First all completely opaque pixels, 68 * encoded in the target format as described above, and then all 69 * partially transparent (translucent) pixels (where 1 <= alpha <= 254), 70 * in the following 32-bit format: 71 * 72 * For 32-bit targets, each pixel has the target RGB format but with 73 * the alpha value occupying the highest 8 bits. The <skip> and <run> 74 * counts are 16 bit. 75 * 76 * For 16-bit targets, each pixel has the target RGB format, but with 77 * the middle component (usually green) shifted 16 steps to the left, 78 * and the hole filled with the 5 most significant bits of the alpha value. 79 * i.e. if the target has the format rrrrrggggggbbbbb, 80 * the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb. 81 * The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit 82 * for the translucent lines. Two padding bytes may be inserted 83 * before each translucent line to keep them 32-bit aligned. 84 * 85 * The end of the sequence is marked by a zero <skip>,<run> pair at the 86 * beginning of an opaque line. 87 */ 88 89 #include "SDL_video.h" 90 #include "SDL_sysvideo.h" 91 #include "SDL_blit.h" 92 #include "SDL_RLEaccel_c.h" 93 94 /* Force MMX to 0; this blows up on almost every major compiler now. --ryan. */ 95 #if 0 && defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && SDL_ASSEMBLY_ROUTINES 96 #define MMX_ASMBLIT 97 #endif 98 99 #ifdef MMX_ASMBLIT 100 #include "mmx.h" 101 #include "SDL_cpuinfo.h" 102 #endif 103 104 #ifndef MAX 105 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 106 #endif 107 #ifndef MIN 108 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 109 #endif 110 111 #define PIXEL_COPY(to, from, len, bpp) \ 112 do { \ 113 if(bpp == 4) { \ 114 SDL_memcpy4(to, from, (size_t)(len)); \ 115 } else { \ 116 SDL_memcpy(to, from, (size_t)(len) * (bpp)); \ 117 } \ 118 } while(0) 119 120 /* 121 * Various colorkey blit methods, for opaque and per-surface alpha 122 */ 123 124 #define OPAQUE_BLIT(to, from, length, bpp, alpha) \ 125 PIXEL_COPY(to, from, length, bpp) 126 127 #ifdef MMX_ASMBLIT 128 129 #define ALPHA_BLIT32_888MMX(to, from, length, bpp, alpha) \ 130 do { \ 131 Uint32 *srcp = (Uint32 *)(from); \ 132 Uint32 *dstp = (Uint32 *)(to); \ 133 int i = 0x00FF00FF; \ 134 movd_m2r(*(&i), mm3); \ 135 punpckldq_r2r(mm3, mm3); \ 136 i = 0xFF000000; \ 137 movd_m2r(*(&i), mm7); \ 138 punpckldq_r2r(mm7, mm7); \ 139 i = alpha | alpha << 16; \ 140 movd_m2r(*(&i), mm4); \ 141 punpckldq_r2r(mm4, mm4); \ 142 pcmpeqd_r2r(mm5,mm5); /* set mm5 to "1" */ \ 143 pxor_r2r(mm7, mm5); /* make clear alpha mask */ \ 144 i = length; \ 145 if(i & 1) { \ 146 movd_m2r((*srcp), mm1); /* src -> mm1 */ \ 147 punpcklbw_r2r(mm1, mm1); \ 148 pand_r2r(mm3, mm1); \ 149 movd_m2r((*dstp), mm2); /* dst -> mm2 */ \ 150 punpcklbw_r2r(mm2, mm2); \ 151 pand_r2r(mm3, mm2); \ 152 psubw_r2r(mm2, mm1); \ 153 pmullw_r2r(mm4, mm1); \ 154 psrlw_i2r(8, mm1); \ 155 paddw_r2r(mm1, mm2); \ 156 pand_r2r(mm3, mm2); \ 157 packuswb_r2r(mm2, mm2); \ 158 pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */ \ 159 movd_r2m(mm2, *dstp); \ 160 ++srcp; \ 161 ++dstp; \ 162 i--; \ 163 } \ 164 for(; i > 0; --i) { \ 165 movq_m2r((*srcp), mm0); \ 166 movq_r2r(mm0, mm1); \ 167 punpcklbw_r2r(mm0, mm0); \ 168 movq_m2r((*dstp), mm2); \ 169 punpckhbw_r2r(mm1, mm1); \ 170 movq_r2r(mm2, mm6); \ 171 pand_r2r(mm3, mm0); \ 172 punpcklbw_r2r(mm2, mm2); \ 173 pand_r2r(mm3, mm1); \ 174 punpckhbw_r2r(mm6, mm6); \ 175 pand_r2r(mm3, mm2); \ 176 psubw_r2r(mm2, mm0); \ 177 pmullw_r2r(mm4, mm0); \ 178 pand_r2r(mm3, mm6); \ 179 psubw_r2r(mm6, mm1); \ 180 pmullw_r2r(mm4, mm1); \ 181 psrlw_i2r(8, mm0); \ 182 paddw_r2r(mm0, mm2); \ 183 psrlw_i2r(8, mm1); \ 184 paddw_r2r(mm1, mm6); \ 185 pand_r2r(mm3, mm2); \ 186 pand_r2r(mm3, mm6); \ 187 packuswb_r2r(mm2, mm2); \ 188 packuswb_r2r(mm6, mm6); \ 189 psrlq_i2r(32, mm2); \ 190 psllq_i2r(32, mm6); \ 191 por_r2r(mm6, mm2); \ 192 pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */ \ 193 movq_r2m(mm2, *dstp); \ 194 srcp += 2; \ 195 dstp += 2; \ 196 i--; \ 197 } \ 198 emms(); \ 199 } while(0) 200 201 #define ALPHA_BLIT16_565MMX(to, from, length, bpp, alpha) \ 202 do { \ 203 int i, n = 0; \ 204 Uint16 *srcp = (Uint16 *)(from); \ 205 Uint16 *dstp = (Uint16 *)(to); \ 206 Uint32 ALPHA = 0xF800; \ 207 movd_m2r(*(&ALPHA), mm1); \ 208 punpcklwd_r2r(mm1, mm1); \ 209 punpcklwd_r2r(mm1, mm1); \ 210 ALPHA = 0x07E0; \ 211 movd_m2r(*(&ALPHA), mm4); \ 212 punpcklwd_r2r(mm4, mm4); \ 213 punpcklwd_r2r(mm4, mm4); \ 214 ALPHA = 0x001F; \ 215 movd_m2r(*(&ALPHA), mm7); \ 216 punpcklwd_r2r(mm7, mm7); \ 217 punpcklwd_r2r(mm7, mm7); \ 218 alpha &= ~(1+2+4); \ 219 i = (Uint32)alpha | (Uint32)alpha << 16; \ 220 movd_m2r(*(&i), mm0); \ 221 punpckldq_r2r(mm0, mm0); \ 222 ALPHA = alpha >> 3; \ 223 i = ((int)(length) & 3); \ 224 for(; i > 0; --i) { \ 225 Uint32 s = *srcp++; \ 226 Uint32 d = *dstp; \ 227 s = (s | s << 16) & 0x07e0f81f; \ 228 d = (d | d << 16) & 0x07e0f81f; \ 229 d += (s - d) * ALPHA >> 5; \ 230 d &= 0x07e0f81f; \ 231 *dstp++ = d | d >> 16; \ 232 n++; \ 233 } \ 234 i = (int)(length) - n; \ 235 for(; i > 0; --i) { \ 236 movq_m2r((*dstp), mm3); \ 237 movq_m2r((*srcp), mm2); \ 238 movq_r2r(mm2, mm5); \ 239 pand_r2r(mm1 , mm5); \ 240 psrlq_i2r(11, mm5); \ 241 movq_r2r(mm3, mm6); \ 242 pand_r2r(mm1 , mm6); \ 243 psrlq_i2r(11, mm6); \ 244 psubw_r2r(mm6, mm5); \ 245 pmullw_r2r(mm0, mm5); \ 246 psrlw_i2r(8, mm5); \ 247 paddw_r2r(mm5, mm6); \ 248 psllq_i2r(11, mm6); \ 249 pand_r2r(mm1, mm6); \ 250 movq_r2r(mm4, mm5); \ 251 por_r2r(mm7, mm5); \ 252 pand_r2r(mm5, mm3); \ 253 por_r2r(mm6, mm3); \ 254 movq_r2r(mm2, mm5); \ 255 pand_r2r(mm4 , mm5); \ 256 psrlq_i2r(5, mm5); \ 257 movq_r2r(mm3, mm6); \ 258 pand_r2r(mm4 , mm6); \ 259 psrlq_i2r(5, mm6); \ 260 psubw_r2r(mm6, mm5); \ 261 pmullw_r2r(mm0, mm5); \ 262 psrlw_i2r(8, mm5); \ 263 paddw_r2r(mm5, mm6); \ 264 psllq_i2r(5, mm6); \ 265 pand_r2r(mm4, mm6); \ 266 movq_r2r(mm1, mm5); \ 267 por_r2r(mm7, mm5); \ 268 pand_r2r(mm5, mm3); \ 269 por_r2r(mm6, mm3); \ 270 movq_r2r(mm2, mm5); \ 271 pand_r2r(mm7 , mm5); \ 272 movq_r2r(mm3, mm6); \ 273 pand_r2r(mm7 , mm6); \ 274 psubw_r2r(mm6, mm5); \ 275 pmullw_r2r(mm0, mm5); \ 276 psrlw_i2r(8, mm5); \ 277 paddw_r2r(mm5, mm6); \ 278 pand_r2r(mm7, mm6); \ 279 movq_r2r(mm1, mm5); \ 280 por_r2r(mm4, mm5); \ 281 pand_r2r(mm5, mm3); \ 282 por_r2r(mm6, mm3); \ 283 movq_r2m(mm3, *dstp); \ 284 srcp += 4; \ 285 dstp += 4; \ 286 i -= 3; \ 287 } \ 288 emms(); \ 289 } while(0) 290 291 #define ALPHA_BLIT16_555MMX(to, from, length, bpp, alpha) \ 292 do { \ 293 int i, n = 0; \ 294 Uint16 *srcp = (Uint16 *)(from); \ 295 Uint16 *dstp = (Uint16 *)(to); \ 296 Uint32 ALPHA = 0x7C00; \ 297 movd_m2r(*(&ALPHA), mm1); \ 298 punpcklwd_r2r(mm1, mm1); \ 299 punpcklwd_r2r(mm1, mm1); \ 300 ALPHA = 0x03E0; \ 301 movd_m2r(*(&ALPHA), mm4); \ 302 punpcklwd_r2r(mm4, mm4); \ 303 punpcklwd_r2r(mm4, mm4); \ 304 ALPHA = 0x001F; \ 305 movd_m2r(*(&ALPHA), mm7); \ 306 punpcklwd_r2r(mm7, mm7); \ 307 punpcklwd_r2r(mm7, mm7); \ 308 alpha &= ~(1+2+4); \ 309 i = (Uint32)alpha | (Uint32)alpha << 16; \ 310 movd_m2r(*(&i), mm0); \ 311 punpckldq_r2r(mm0, mm0); \ 312 i = ((int)(length) & 3); \ 313 ALPHA = alpha >> 3; \ 314 for(; i > 0; --i) { \ 315 Uint32 s = *srcp++; \ 316 Uint32 d = *dstp; \ 317 s = (s | s << 16) & 0x03e07c1f; \ 318 d = (d | d << 16) & 0x03e07c1f; \ 319 d += (s - d) * ALPHA >> 5; \ 320 d &= 0x03e07c1f; \ 321 *dstp++ = d | d >> 16; \ 322 n++; \ 323 } \ 324 i = (int)(length) - n; \ 325 for(; i > 0; --i) { \ 326 movq_m2r((*dstp), mm3); \ 327 movq_m2r((*srcp), mm2); \ 328 movq_r2r(mm2, mm5); \ 329 pand_r2r(mm1 , mm5); \ 330 psrlq_i2r(10, mm5); \ 331 movq_r2r(mm3, mm6); \ 332 pand_r2r(mm1 , mm6); \ 333 psrlq_i2r(10, mm6); \ 334 psubw_r2r(mm6, mm5); \ 335 pmullw_r2r(mm0, mm5); \ 336 psrlw_i2r(8, mm5); \ 337 paddw_r2r(mm5, mm6); \ 338 psllq_i2r(10, mm6); \ 339 pand_r2r(mm1, mm6); \ 340 movq_r2r(mm4, mm5); \ 341 por_r2r(mm7, mm5); \ 342 pand_r2r(mm5, mm3); \ 343 por_r2r(mm6, mm3); \ 344 movq_r2r(mm2, mm5); \ 345 pand_r2r(mm4 , mm5); \ 346 psrlq_i2r(5, mm5); \ 347 movq_r2r(mm3, mm6); \ 348 pand_r2r(mm4 , mm6); \ 349 psrlq_i2r(5, mm6); \ 350 psubw_r2r(mm6, mm5); \ 351 pmullw_r2r(mm0, mm5); \ 352 psrlw_i2r(8, mm5); \ 353 paddw_r2r(mm5, mm6); \ 354 psllq_i2r(5, mm6); \ 355 pand_r2r(mm4, mm6); \ 356 movq_r2r(mm1, mm5); \ 357 por_r2r(mm7, mm5); \ 358 pand_r2r(mm5, mm3); \ 359 por_r2r(mm6, mm3); \ 360 movq_r2r(mm2, mm5); \ 361 pand_r2r(mm7 , mm5); \ 362 movq_r2r(mm3, mm6); \ 363 pand_r2r(mm7 , mm6); \ 364 psubw_r2r(mm6, mm5); \ 365 pmullw_r2r(mm0, mm5); \ 366 psrlw_i2r(8, mm5); \ 367 paddw_r2r(mm5, mm6); \ 368 pand_r2r(mm7, mm6); \ 369 movq_r2r(mm1, mm5); \ 370 por_r2r(mm4, mm5); \ 371 pand_r2r(mm5, mm3); \ 372 por_r2r(mm6, mm3); \ 373 movq_r2m(mm3, *dstp); \ 374 srcp += 4; \ 375 dstp += 4; \ 376 i -= 3; \ 377 } \ 378 emms(); \ 379 } while(0) 380 381 #endif 382 383 /* 384 * For 32bpp pixels on the form 0x00rrggbb: 385 * If we treat the middle component separately, we can process the two 386 * remaining in parallel. This is safe to do because of the gap to the left 387 * of each component, so the bits from the multiplication don't collide. 388 * This can be used for any RGB permutation of course. 389 */ 390 #define ALPHA_BLIT32_888(to, from, length, bpp, alpha) \ 391 do { \ 392 int i; \ 393 Uint32 *src = (Uint32 *)(from); \ 394 Uint32 *dst = (Uint32 *)(to); \ 395 for(i = 0; i < (int)(length); i++) { \ 396 Uint32 s = *src++; \ 397 Uint32 d = *dst; \ 398 Uint32 s1 = s & 0xff00ff; \ 399 Uint32 d1 = d & 0xff00ff; \ 400 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \ 401 s &= 0xff00; \ 402 d &= 0xff00; \ 403 d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ 404 *dst++ = d1 | d; \ 405 } \ 406 } while(0) 407 408 /* 409 * For 16bpp pixels we can go a step further: put the middle component 410 * in the high 16 bits of a 32 bit word, and process all three RGB 411 * components at the same time. Since the smallest gap is here just 412 * 5 bits, we have to scale alpha down to 5 bits as well. 413 */ 414 #define ALPHA_BLIT16_565(to, from, length, bpp, alpha) \ 415 do { \ 416 int i; \ 417 Uint16 *src = (Uint16 *)(from); \ 418 Uint16 *dst = (Uint16 *)(to); \ 419 Uint32 ALPHA = alpha >> 3; \ 420 for(i = 0; i < (int)(length); i++) { \ 421 Uint32 s = *src++; \ 422 Uint32 d = *dst; \ 423 s = (s | s << 16) & 0x07e0f81f; \ 424 d = (d | d << 16) & 0x07e0f81f; \ 425 d += (s - d) * ALPHA >> 5; \ 426 d &= 0x07e0f81f; \ 427 *dst++ = (Uint16)(d | d >> 16); \ 428 } \ 429 } while(0) 430 431 #define ALPHA_BLIT16_555(to, from, length, bpp, alpha) \ 432 do { \ 433 int i; \ 434 Uint16 *src = (Uint16 *)(from); \ 435 Uint16 *dst = (Uint16 *)(to); \ 436 Uint32 ALPHA = alpha >> 3; \ 437 for(i = 0; i < (int)(length); i++) { \ 438 Uint32 s = *src++; \ 439 Uint32 d = *dst; \ 440 s = (s | s << 16) & 0x03e07c1f; \ 441 d = (d | d << 16) & 0x03e07c1f; \ 442 d += (s - d) * ALPHA >> 5; \ 443 d &= 0x03e07c1f; \ 444 *dst++ = (Uint16)(d | d >> 16); \ 445 } \ 446 } while(0) 447 448 /* 449 * The general slow catch-all function, for remaining depths and formats 450 */ 451 #define ALPHA_BLIT_ANY(to, from, length, bpp, alpha) \ 452 do { \ 453 int i; \ 454 Uint8 *src = from; \ 455 Uint8 *dst = to; \ 456 for(i = 0; i < (int)(length); i++) { \ 457 Uint32 s, d; \ 458 unsigned rs, gs, bs, rd, gd, bd; \ 459 switch(bpp) { \ 460 case 2: \ 461 s = *(Uint16 *)src; \ 462 d = *(Uint16 *)dst; \ 463 break; \ 464 case 3: \ 465 if(SDL_BYTEORDER == SDL_BIG_ENDIAN) { \ 466 s = (src[0] << 16) | (src[1] << 8) | src[2]; \ 467 d = (dst[0] << 16) | (dst[1] << 8) | dst[2]; \ 468 } else { \ 469 s = (src[2] << 16) | (src[1] << 8) | src[0]; \ 470 d = (dst[2] << 16) | (dst[1] << 8) | dst[0]; \ 471 } \ 472 break; \ 473 case 4: \ 474 s = *(Uint32 *)src; \ 475 d = *(Uint32 *)dst; \ 476 break; \ 477 } \ 478 RGB_FROM_PIXEL(s, fmt, rs, gs, bs); \ 479 RGB_FROM_PIXEL(d, fmt, rd, gd, bd); \ 480 rd += (rs - rd) * alpha >> 8; \ 481 gd += (gs - gd) * alpha >> 8; \ 482 bd += (bs - bd) * alpha >> 8; \ 483 PIXEL_FROM_RGB(d, fmt, rd, gd, bd); \ 484 switch(bpp) { \ 485 case 2: \ 486 *(Uint16 *)dst = (Uint16)d; \ 487 break; \ 488 case 3: \ 489 if(SDL_BYTEORDER == SDL_BIG_ENDIAN) { \ 490 dst[0] = (Uint8)(d >> 16); \ 491 dst[1] = (Uint8)(d >> 8); \ 492 dst[2] = (Uint8)(d); \ 493 } else { \ 494 dst[0] = (Uint8)d; \ 495 dst[1] = (Uint8)(d >> 8); \ 496 dst[2] = (Uint8)(d >> 16); \ 497 } \ 498 break; \ 499 case 4: \ 500 *(Uint32 *)dst = d; \ 501 break; \ 502 } \ 503 src += bpp; \ 504 dst += bpp; \ 505 } \ 506 } while(0) 507 508 #ifdef MMX_ASMBLIT 509 510 #define ALPHA_BLIT32_888_50MMX(to, from, length, bpp, alpha) \ 511 do { \ 512 Uint32 *srcp = (Uint32 *)(from); \ 513 Uint32 *dstp = (Uint32 *)(to); \ 514 int i = 0x00fefefe; \ 515 movd_m2r(*(&i), mm4); \ 516 punpckldq_r2r(mm4, mm4); \ 517 i = 0x00010101; \ 518 movd_m2r(*(&i), mm3); \ 519 punpckldq_r2r(mm3, mm3); \ 520 i = (int)(length); \ 521 if( i & 1 ) { \ 522 Uint32 s = *srcp++; \ 523 Uint32 d = *dstp; \ 524 *dstp++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \ 525 + (s & d & 0x00010101); \ 526 i--; \ 527 } \ 528 for(; i > 0; --i) { \ 529 movq_m2r((*dstp), mm2); /* dst -> mm2 */ \ 530 movq_r2r(mm2, mm6); /* dst -> mm6 */ \ 531 movq_m2r((*srcp), mm1); /* src -> mm1 */ \ 532 movq_r2r(mm1, mm5); /* src -> mm5 */ \ 533 pand_r2r(mm4, mm6); /* dst & 0x00fefefe -> mm6 */ \ 534 pand_r2r(mm4, mm5); /* src & 0x00fefefe -> mm5 */ \ 535 paddd_r2r(mm6, mm5); /* (dst & 0x00fefefe) + (dst & 0x00fefefe) -> mm5 */ \ 536 psrld_i2r(1, mm5); \ 537 pand_r2r(mm1, mm2); /* s & d -> mm2 */ \ 538 pand_r2r(mm3, mm2); /* s & d & 0x00010101 -> mm2 */ \ 539 paddd_r2r(mm5, mm2); \ 540 movq_r2m(mm2, (*dstp)); \ 541 dstp += 2; \ 542 srcp += 2; \ 543 i--; \ 544 } \ 545 emms(); \ 546 } while(0) 547 548 #endif 549 550 /* 551 * Special case: 50% alpha (alpha=128) 552 * This is treated specially because it can be optimized very well, and 553 * since it is good for many cases of semi-translucency. 554 * The theory is to do all three components at the same time: 555 * First zero the lowest bit of each component, which gives us room to 556 * add them. Then shift right and add the sum of the lowest bits. 557 */ 558 #define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha) \ 559 do { \ 560 int i; \ 561 Uint32 *src = (Uint32 *)(from); \ 562 Uint32 *dst = (Uint32 *)(to); \ 563 for(i = 0; i < (int)(length); i++) { \ 564 Uint32 s = *src++; \ 565 Uint32 d = *dst; \ 566 *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \ 567 + (s & d & 0x00010101); \ 568 } \ 569 } while(0) 570 571 /* 572 * For 16bpp, we can actually blend two pixels in parallel, if we take 573 * care to shift before we add, not after. 574 */ 575 576 /* helper: blend a single 16 bit pixel at 50% */ 577 #define BLEND16_50(dst, src, mask) \ 578 do { \ 579 Uint32 s = *src++; \ 580 Uint32 d = *dst; \ 581 *dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) + \ 582 (s & d & (~mask & 0xffff))); \ 583 } while(0) 584 585 /* basic 16bpp blender. mask is the pixels to keep when adding. */ 586 #define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask) \ 587 do { \ 588 unsigned n = (length); \ 589 Uint16 *src = (Uint16 *)(from); \ 590 Uint16 *dst = (Uint16 *)(to); \ 591 if(((uintptr_t)src ^ (uintptr_t)dst) & 3) { \ 592 /* source and destination not in phase, blit one by one */ \ 593 while(n--) \ 594 BLEND16_50(dst, src, mask); \ 595 } else { \ 596 if((uintptr_t)src & 3) { \ 597 /* first odd pixel */ \ 598 BLEND16_50(dst, src, mask); \ 599 n--; \ 600 } \ 601 for(; n > 1; n -= 2) { \ 602 Uint32 s = *(Uint32 *)src; \ 603 Uint32 d = *(Uint32 *)dst; \ 604 *(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1) \ 605 + ((d & (mask | mask << 16)) >> 1) \ 606 + (s & d & (~(mask | mask << 16))); \ 607 src += 2; \ 608 dst += 2; \ 609 } \ 610 if(n) \ 611 BLEND16_50(dst, src, mask); /* last odd pixel */ \ 612 } \ 613 } while(0) 614 615 #define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha) \ 616 ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de) 617 618 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha) \ 619 ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde) 620 621 #ifdef MMX_ASMBLIT 622 623 #define CHOOSE_BLIT(blitter, alpha, fmt) \ 624 do { \ 625 if(alpha == 255) { \ 626 switch(fmt->BytesPerPixel) { \ 627 case 1: blitter(1, Uint8, OPAQUE_BLIT); break; \ 628 case 2: blitter(2, Uint8, OPAQUE_BLIT); break; \ 629 case 3: blitter(3, Uint8, OPAQUE_BLIT); break; \ 630 case 4: blitter(4, Uint16, OPAQUE_BLIT); break; \ 631 } \ 632 } else { \ 633 switch(fmt->BytesPerPixel) { \ 634 case 1: \ 635 /* No 8bpp alpha blitting */ \ 636 break; \ 637 \ 638 case 2: \ 639 switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) { \ 640 case 0xffff: \ 641 if(fmt->Gmask == 0x07e0 \ 642 || fmt->Rmask == 0x07e0 \ 643 || fmt->Bmask == 0x07e0) { \ 644 if(alpha == 128) \ 645 blitter(2, Uint8, ALPHA_BLIT16_565_50); \ 646 else { \ 647 if(SDL_HasMMX()) \ 648 blitter(2, Uint8, ALPHA_BLIT16_565MMX); \ 649 else \ 650 blitter(2, Uint8, ALPHA_BLIT16_565); \ 651 } \ 652 } else \ 653 goto general16; \ 654 break; \ 655 \ 656 case 0x7fff: \ 657 if(fmt->Gmask == 0x03e0 \ 658 || fmt->Rmask == 0x03e0 \ 659 || fmt->Bmask == 0x03e0) { \ 660 if(alpha == 128) \ 661 blitter(2, Uint8, ALPHA_BLIT16_555_50); \ 662 else { \ 663 if(SDL_HasMMX()) \ 664 blitter(2, Uint8, ALPHA_BLIT16_555MMX); \ 665 else \ 666 blitter(2, Uint8, ALPHA_BLIT16_555); \ 667 } \ 668 break; \ 669 } \ 670 /* fallthrough */ \ 671 \ 672 default: \ 673 general16: \ 674 blitter(2, Uint8, ALPHA_BLIT_ANY); \ 675 } \ 676 break; \ 677 \ 678 case 3: \ 679 blitter(3, Uint8, ALPHA_BLIT_ANY); \ 680 break; \ 681 \ 682 case 4: \ 683 if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \ 684 && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \ 685 || fmt->Bmask == 0xff00)) { \ 686 if(alpha == 128) \ 687 { \ 688 if(SDL_HasMMX()) \ 689 blitter(4, Uint16, ALPHA_BLIT32_888_50MMX);\ 690 else \ 691 blitter(4, Uint16, ALPHA_BLIT32_888_50);\ 692 } \ 693 else \ 694 { \ 695 if(SDL_HasMMX()) \ 696 blitter(4, Uint16, ALPHA_BLIT32_888MMX);\ 697 else \ 698 blitter(4, Uint16, ALPHA_BLIT32_888); \ 699 } \ 700 } else \ 701 blitter(4, Uint16, ALPHA_BLIT_ANY); \ 702 break; \ 703 } \ 704 } \ 705 } while(0) 706 707 #else 708 709 #define CHOOSE_BLIT(blitter, alpha, fmt) \ 710 do { \ 711 if(alpha == 255) { \ 712 switch(fmt->BytesPerPixel) { \ 713 case 1: blitter(1, Uint8, OPAQUE_BLIT); break; \ 714 case 2: blitter(2, Uint8, OPAQUE_BLIT); break; \ 715 case 3: blitter(3, Uint8, OPAQUE_BLIT); break; \ 716 case 4: blitter(4, Uint16, OPAQUE_BLIT); break; \ 717 } \ 718 } else { \ 719 switch(fmt->BytesPerPixel) { \ 720 case 1: \ 721 /* No 8bpp alpha blitting */ \ 722 break; \ 723 \ 724 case 2: \ 725 switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) { \ 726 case 0xffff: \ 727 if(fmt->Gmask == 0x07e0 \ 728 || fmt->Rmask == 0x07e0 \ 729 || fmt->Bmask == 0x07e0) { \ 730 if(alpha == 128) \ 731 blitter(2, Uint8, ALPHA_BLIT16_565_50); \ 732 else { \ 733 blitter(2, Uint8, ALPHA_BLIT16_565); \ 734 } \ 735 } else \ 736 goto general16; \ 737 break; \ 738 \ 739 case 0x7fff: \ 740 if(fmt->Gmask == 0x03e0 \ 741 || fmt->Rmask == 0x03e0 \ 742 || fmt->Bmask == 0x03e0) { \ 743 if(alpha == 128) \ 744 blitter(2, Uint8, ALPHA_BLIT16_555_50); \ 745 else { \ 746 blitter(2, Uint8, ALPHA_BLIT16_555); \ 747 } \ 748 break; \ 749 } \ 750 /* fallthrough */ \ 751 \ 752 default: \ 753 general16: \ 754 blitter(2, Uint8, ALPHA_BLIT_ANY); \ 755 } \ 756 break; \ 757 \ 758 case 3: \ 759 blitter(3, Uint8, ALPHA_BLIT_ANY); \ 760 break; \ 761 \ 762 case 4: \ 763 if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \ 764 && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \ 765 || fmt->Bmask == 0xff00)) { \ 766 if(alpha == 128) \ 767 blitter(4, Uint16, ALPHA_BLIT32_888_50); \ 768 else \ 769 blitter(4, Uint16, ALPHA_BLIT32_888); \ 770 } else \ 771 blitter(4, Uint16, ALPHA_BLIT_ANY); \ 772 break; \ 773 } \ 774 } \ 775 } while(0) 776 777 #endif 778 779 /* 780 * This takes care of the case when the surface is clipped on the left and/or 781 * right. Top clipping has already been taken care of. 782 */ 783 static void RLEClipBlit(int w, Uint8 *srcbuf, SDL_Surface *dst, 784 Uint8 *dstbuf, SDL_Rect *srcrect, unsigned alpha) 785 { 786 SDL_PixelFormat *fmt = dst->format; 787 788 #define RLECLIPBLIT(bpp, Type, do_blit) \ 789 do { \ 790 int linecount = srcrect->h; \ 791 int ofs = 0; \ 792 int left = srcrect->x; \ 793 int right = left + srcrect->w; \ 794 dstbuf -= left * bpp; \ 795 for(;;) { \ 796 int run; \ 797 ofs += *(Type *)srcbuf; \ 798 run = ((Type *)srcbuf)[1]; \ 799 srcbuf += 2 * sizeof(Type); \ 800 if(run) { \ 801 /* clip to left and right borders */ \ 802 if(ofs < right) { \ 803 int start = 0; \ 804 int len = run; \ 805 int startcol; \ 806 if(left - ofs > 0) { \ 807 start = left - ofs; \ 808 len -= start; \ 809 if(len <= 0) \ 810 goto nocopy ## bpp ## do_blit; \ 811 } \ 812 startcol = ofs + start; \ 813 if(len > right - startcol) \ 814 len = right - startcol; \ 815 do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \ 816 len, bpp, alpha); \ 817 } \ 818 nocopy ## bpp ## do_blit: \ 819 srcbuf += run * bpp; \ 820 ofs += run; \ 821 } else if(!ofs) \ 822 break; \ 823 if(ofs == w) { \ 824 ofs = 0; \ 825 dstbuf += dst->pitch; \ 826 if(!--linecount) \ 827 break; \ 828 } \ 829 } \ 830 } while(0) 831 832 CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt); 833 834 #undef RLECLIPBLIT 835 836 } 837 838 839 /* blit a colorkeyed RLE surface */ 840 int SDL_RLEBlit(SDL_Surface *src, SDL_Rect *srcrect, 841 SDL_Surface *dst, SDL_Rect *dstrect) 842 { 843 Uint8 *dstbuf; 844 Uint8 *srcbuf; 845 int x, y; 846 int w = src->w; 847 unsigned alpha; 848 849 /* Lock the destination if necessary */ 850 if ( SDL_MUSTLOCK(dst) ) { 851 if ( SDL_LockSurface(dst) < 0 ) { 852 return(-1); 853 } 854 } 855 856 /* Set up the source and destination pointers */ 857 x = dstrect->x; 858 y = dstrect->y; 859 dstbuf = (Uint8 *)dst->pixels 860 + y * dst->pitch + x * src->format->BytesPerPixel; 861 srcbuf = (Uint8 *)src->map->sw_data->aux_data; 862 863 { 864 /* skip lines at the top if neccessary */ 865 int vskip = srcrect->y; 866 int ofs = 0; 867 if(vskip) { 868 869 #define RLESKIP(bpp, Type) \ 870 for(;;) { \ 871 int run; \ 872 ofs += *(Type *)srcbuf; \ 873 run = ((Type *)srcbuf)[1]; \ 874 srcbuf += sizeof(Type) * 2; \ 875 if(run) { \ 876 srcbuf += run * bpp; \ 877 ofs += run; \ 878 } else if(!ofs) \ 879 goto done; \ 880 if(ofs == w) { \ 881 ofs = 0; \ 882 if(!--vskip) \ 883 break; \ 884 } \ 885 } 886 887 switch(src->format->BytesPerPixel) { 888 case 1: RLESKIP(1, Uint8); break; 889 case 2: RLESKIP(2, Uint8); break; 890 case 3: RLESKIP(3, Uint8); break; 891 case 4: RLESKIP(4, Uint16); break; 892 } 893 894 #undef RLESKIP 895 896 } 897 } 898 899 alpha = (src->flags & SDL_SRCALPHA) == SDL_SRCALPHA 900 ? src->format->alpha : 255; 901 /* if left or right edge clipping needed, call clip blit */ 902 if ( srcrect->x || srcrect->w != src->w ) { 903 RLEClipBlit(w, srcbuf, dst, dstbuf, srcrect, alpha); 904 } else { 905 SDL_PixelFormat *fmt = src->format; 906 907 #define RLEBLIT(bpp, Type, do_blit) \ 908 do { \ 909 int linecount = srcrect->h; \ 910 int ofs = 0; \ 911 for(;;) { \ 912 unsigned run; \ 913 ofs += *(Type *)srcbuf; \ 914 run = ((Type *)srcbuf)[1]; \ 915 srcbuf += 2 * sizeof(Type); \ 916 if(run) { \ 917 do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \ 918 srcbuf += run * bpp; \ 919 ofs += run; \ 920 } else if(!ofs) \ 921 break; \ 922 if(ofs == w) { \ 923 ofs = 0; \ 924 dstbuf += dst->pitch; \ 925 if(!--linecount) \ 926 break; \ 927 } \ 928 } \ 929 } while(0) 930 931 CHOOSE_BLIT(RLEBLIT, alpha, fmt); 932 933 #undef RLEBLIT 934 } 935 936 done: 937 /* Unlock the destination if necessary */ 938 if ( SDL_MUSTLOCK(dst) ) { 939 SDL_UnlockSurface(dst); 940 } 941 return(0); 942 } 943 944 #undef OPAQUE_BLIT 945 946 /* 947 * Per-pixel blitting macros for translucent pixels: 948 * These use the same techniques as the per-surface blitting macros 949 */ 950 951 /* 952 * For 32bpp pixels, we have made sure the alpha is stored in the top 953 * 8 bits, so proceed as usual 954 */ 955 #define BLIT_TRANSL_888(src, dst) \ 956 do { \ 957 Uint32 s = src; \ 958 Uint32 d = dst; \ 959 unsigned alpha = s >> 24; \ 960 Uint32 s1 = s & 0xff00ff; \ 961 Uint32 d1 = d & 0xff00ff; \ 962 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \ 963 s &= 0xff00; \ 964 d &= 0xff00; \ 965 d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ 966 dst = d1 | d; \ 967 } while(0) 968 969 /* 970 * For 16bpp pixels, we have stored the 5 most significant alpha bits in 971 * bits 5-10. As before, we can process all 3 RGB components at the same time. 972 */ 973 #define BLIT_TRANSL_565(src, dst) \ 974 do { \ 975 Uint32 s = src; \ 976 Uint32 d = dst; \ 977 unsigned alpha = (s & 0x3e0) >> 5; \ 978 s &= 0x07e0f81f; \ 979 d = (d | d << 16) & 0x07e0f81f; \ 980 d += (s - d) * alpha >> 5; \ 981 d &= 0x07e0f81f; \ 982 dst = (Uint16)(d | d >> 16); \ 983 } while(0) 984 985 #define BLIT_TRANSL_555(src, dst) \ 986 do { \ 987 Uint32 s = src; \ 988 Uint32 d = dst; \ 989 unsigned alpha = (s & 0x3e0) >> 5; \ 990 s &= 0x03e07c1f; \ 991 d = (d | d << 16) & 0x03e07c1f; \ 992 d += (s - d) * alpha >> 5; \ 993 d &= 0x03e07c1f; \ 994 dst = (Uint16)(d | d >> 16); \ 995 } while(0) 996 997 /* used to save the destination format in the encoding. Designed to be 998 macro-compatible with SDL_PixelFormat but without the unneeded fields */ 999 typedef struct { 1000 Uint8 BytesPerPixel; 1001 Uint8 Rloss; 1002 Uint8 Gloss; 1003 Uint8 Bloss; 1004 Uint8 Rshift; 1005 Uint8 Gshift; 1006 Uint8 Bshift; 1007 Uint8 Ashift; 1008 Uint32 Rmask; 1009 Uint32 Gmask; 1010 Uint32 Bmask; 1011 Uint32 Amask; 1012 } RLEDestFormat; 1013 1014 /* blit a pixel-alpha RLE surface clipped at the right and/or left edges */ 1015 static void RLEAlphaClipBlit(int w, Uint8 *srcbuf, SDL_Surface *dst, 1016 Uint8 *dstbuf, SDL_Rect *srcrect) 1017 { 1018 SDL_PixelFormat *df = dst->format; 1019 /* 1020 * clipped blitter: Ptype is the destination pixel type, 1021 * Ctype the translucent count type, and do_blend the macro 1022 * to blend one pixel. 1023 */ 1024 #define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend) \ 1025 do { \ 1026 int linecount = srcrect->h; \ 1027 int left = srcrect->x; \ 1028 int right = left + srcrect->w; \ 1029 dstbuf -= left * sizeof(Ptype); \ 1030 do { \ 1031 int ofs = 0; \ 1032 /* blit opaque pixels on one line */ \ 1033 do { \ 1034 unsigned run; \ 1035 ofs += ((Ctype *)srcbuf)[0]; \ 1036 run = ((Ctype *)srcbuf)[1]; \ 1037 srcbuf += 2 * sizeof(Ctype); \ 1038 if(run) { \ 1039 /* clip to left and right borders */ \ 1040 int cofs = ofs; \ 1041 int crun = run; \ 1042 if(left - cofs > 0) { \ 1043 crun -= left - cofs; \ 1044 cofs = left; \ 1045 } \ 1046 if(crun > right - cofs) \ 1047 crun = right - cofs; \ 1048 if(crun > 0) \ 1049 PIXEL_COPY(dstbuf + cofs * sizeof(Ptype), \ 1050 srcbuf + (cofs - ofs) * sizeof(Ptype), \ 1051 (unsigned)crun, sizeof(Ptype)); \ 1052 srcbuf += run * sizeof(Ptype); \ 1053 ofs += run; \ 1054 } else if(!ofs) \ 1055 return; \ 1056 } while(ofs < w); \ 1057 /* skip padding if necessary */ \ 1058 if(sizeof(Ptype) == 2) \ 1059 srcbuf += (uintptr_t)srcbuf & 2; \ 1060 /* blit translucent pixels on the same line */ \ 1061 ofs = 0; \ 1062 do { \ 1063 unsigned run; \ 1064 ofs += ((Uint16 *)srcbuf)[0]; \ 1065 run = ((Uint16 *)srcbuf)[1]; \ 1066 srcbuf += 4; \ 1067 if(run) { \ 1068 /* clip to left and right borders */ \ 1069 int cofs = ofs; \ 1070 int crun = run; \ 1071 if(left - cofs > 0) { \ 1072 crun -= left - cofs; \ 1073 cofs = left; \ 1074 } \ 1075 if(crun > right - cofs) \ 1076 crun = right - cofs; \ 1077 if(crun > 0) { \ 1078 Ptype *dst = (Ptype *)dstbuf + cofs; \ 1079 Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs); \ 1080 int i; \ 1081 for(i = 0; i < crun; i++) \ 1082 do_blend(src[i], dst[i]); \ 1083 } \ 1084 srcbuf += run * 4; \ 1085 ofs += run; \ 1086 } \ 1087 } while(ofs < w); \ 1088 dstbuf += dst->pitch; \ 1089 } while(--linecount); \ 1090 } while(0) 1091 1092 switch(df->BytesPerPixel) { 1093 case 2: 1094 if(df->Gmask == 0x07e0 || df->Rmask == 0x07e0 1095 || df->Bmask == 0x07e0) 1096 RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565); 1097 else 1098 RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555); 1099 break; 1100 case 4: 1101 RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888); 1102 break; 1103 } 1104 } 1105 1106 /* blit a pixel-alpha RLE surface */ 1107 int SDL_RLEAlphaBlit(SDL_Surface *src, SDL_Rect *srcrect, 1108 SDL_Surface *dst, SDL_Rect *dstrect) 1109 { 1110 int x, y; 1111 int w = src->w; 1112 Uint8 *srcbuf, *dstbuf; 1113 SDL_PixelFormat *df = dst->format; 1114 1115 /* Lock the destination if necessary */ 1116 if ( SDL_MUSTLOCK(dst) ) { 1117 if ( SDL_LockSurface(dst) < 0 ) { 1118 return -1; 1119 } 1120 } 1121 1122 x = dstrect->x; 1123 y = dstrect->y; 1124 dstbuf = (Uint8 *)dst->pixels 1125 + y * dst->pitch + x * df->BytesPerPixel; 1126 srcbuf = (Uint8 *)src->map->sw_data->aux_data + sizeof(RLEDestFormat); 1127 1128 { 1129 /* skip lines at the top if necessary */ 1130 int vskip = srcrect->y; 1131 if(vskip) { 1132 int ofs; 1133 if(df->BytesPerPixel == 2) { 1134 /* the 16/32 interleaved format */ 1135 do { 1136 /* skip opaque line */ 1137 ofs = 0; 1138 do { 1139 int run; 1140 ofs += srcbuf[0]; 1141 run = srcbuf[1]; 1142 srcbuf += 2; 1143 if(run) { 1144 srcbuf += 2 * run; 1145 ofs += run; 1146 } else if(!ofs) 1147 goto done; 1148 } while(ofs < w); 1149 1150 /* skip padding */ 1151 srcbuf += (uintptr_t)srcbuf & 2; 1152 1153 /* skip translucent line */ 1154 ofs = 0; 1155 do { 1156 int run; 1157 ofs += ((Uint16 *)srcbuf)[0]; 1158 run = ((Uint16 *)srcbuf)[1]; 1159 srcbuf += 4 * (run + 1); 1160 ofs += run; 1161 } while(ofs < w); 1162 } while(--vskip); 1163 } else { 1164 /* the 32/32 interleaved format */ 1165 vskip <<= 1; /* opaque and translucent have same format */ 1166 do { 1167 ofs = 0; 1168 do { 1169 int run; 1170 ofs += ((Uint16 *)srcbuf)[0]; 1171 run = ((Uint16 *)srcbuf)[1]; 1172 srcbuf += 4; 1173 if(run) { 1174 srcbuf += 4 * run; 1175 ofs += run; 1176 } else if(!ofs) 1177 goto done; 1178 } while(ofs < w); 1179 } while(--vskip); 1180 } 1181 } 1182 } 1183 1184 /* if left or right edge clipping needed, call clip blit */ 1185 if(srcrect->x || srcrect->w != src->w) { 1186 RLEAlphaClipBlit(w, srcbuf, dst, dstbuf, srcrect); 1187 } else { 1188 1189 /* 1190 * non-clipped blitter. Ptype is the destination pixel type, 1191 * Ctype the translucent count type, and do_blend the 1192 * macro to blend one pixel. 1193 */ 1194 #define RLEALPHABLIT(Ptype, Ctype, do_blend) \ 1195 do { \ 1196 int linecount = srcrect->h; \ 1197 do { \ 1198 int ofs = 0; \ 1199 /* blit opaque pixels on one line */ \ 1200 do { \ 1201 unsigned run; \ 1202 ofs += ((Ctype *)srcbuf)[0]; \ 1203 run = ((Ctype *)srcbuf)[1]; \ 1204 srcbuf += 2 * sizeof(Ctype); \ 1205 if(run) { \ 1206 PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \ 1207 run, sizeof(Ptype)); \ 1208 srcbuf += run * sizeof(Ptype); \ 1209 ofs += run; \ 1210 } else if(!ofs) \ 1211 goto done; \ 1212 } while(ofs < w); \ 1213 /* skip padding if necessary */ \ 1214 if(sizeof(Ptype) == 2) \ 1215 srcbuf += (uintptr_t)srcbuf & 2; \ 1216 /* blit translucent pixels on the same line */ \ 1217 ofs = 0; \ 1218 do { \ 1219 unsigned run; \ 1220 ofs += ((Uint16 *)srcbuf)[0]; \ 1221 run = ((Uint16 *)srcbuf)[1]; \ 1222 srcbuf += 4; \ 1223 if(run) { \ 1224 Ptype *dst = (Ptype *)dstbuf + ofs; \ 1225 unsigned i; \ 1226 for(i = 0; i < run; i++) { \ 1227 Uint32 src = *(Uint32 *)srcbuf; \ 1228 do_blend(src, *dst); \ 1229 srcbuf += 4; \ 1230 dst++; \ 1231 } \ 1232 ofs += run; \ 1233 } \ 1234 } while(ofs < w); \ 1235 dstbuf += dst->pitch; \ 1236 } while(--linecount); \ 1237 } while(0) 1238 1239 switch(df->BytesPerPixel) { 1240 case 2: 1241 if(df->Gmask == 0x07e0 || df->Rmask == 0x07e0 1242 || df->Bmask == 0x07e0) 1243 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565); 1244 else 1245 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555); 1246 break; 1247 case 4: 1248 RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888); 1249 break; 1250 } 1251 } 1252 1253 done: 1254 /* Unlock the destination if necessary */ 1255 if ( SDL_MUSTLOCK(dst) ) { 1256 SDL_UnlockSurface(dst); 1257 } 1258 return 0; 1259 } 1260 1261 /* 1262 * Auxiliary functions: 1263 * The encoding functions take 32bpp rgb + a, and 1264 * return the number of bytes copied to the destination. 1265 * The decoding functions copy to 32bpp rgb + a, and 1266 * return the number of bytes copied from the source. 1267 * These are only used in the encoder and un-RLE code and are therefore not 1268 * highly optimised. 1269 */ 1270 1271 /* encode 32bpp rgb + a into 16bpp rgb, losing alpha */ 1272 static int copy_opaque_16(void *dst, Uint32 *src, int n, 1273 SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt) 1274 { 1275 int i; 1276 Uint16 *d = dst; 1277 for(i = 0; i < n; i++) { 1278 unsigned r, g, b; 1279 RGB_FROM_PIXEL(*src, sfmt, r, g, b); 1280 PIXEL_FROM_RGB(*d, dfmt, r, g, b); 1281 src++; 1282 d++; 1283 } 1284 return n * 2; 1285 } 1286 1287 /* decode opaque pixels from 16bpp to 32bpp rgb + a */ 1288 static int uncopy_opaque_16(Uint32 *dst, void *src, int n, 1289 RLEDestFormat *sfmt, SDL_PixelFormat *dfmt) 1290 { 1291 int i; 1292 Uint16 *s = src; 1293 unsigned alpha = dfmt->Amask ? 255 : 0; 1294 for(i = 0; i < n; i++) { 1295 unsigned r, g, b; 1296 RGB_FROM_PIXEL(*s, sfmt, r, g, b); 1297 PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha); 1298 s++; 1299 dst++; 1300 } 1301 return n * 2; 1302 } 1303 1304 1305 1306 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */ 1307 static int copy_transl_565(void *dst, Uint32 *src, int n, 1308 SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt) 1309 { 1310 int i; 1311 Uint32 *d = dst; 1312 for(i = 0; i < n; i++) { 1313 unsigned r, g, b, a; 1314 Uint16 pix; 1315 RGBA_FROM_8888(*src, sfmt, r, g, b, a); 1316 PIXEL_FROM_RGB(pix, dfmt, r, g, b); 1317 *d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0); 1318 src++; 1319 d++; 1320 } 1321 return n * 4; 1322 } 1323 1324 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */ 1325 static int copy_transl_555(void *dst, Uint32 *src, int n, 1326 SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt) 1327 { 1328 int i; 1329 Uint32 *d = dst; 1330 for(i = 0; i < n; i++) { 1331 unsigned r, g, b, a; 1332 Uint16 pix; 1333 RGBA_FROM_8888(*src, sfmt, r, g, b, a); 1334 PIXEL_FROM_RGB(pix, dfmt, r, g, b); 1335 *d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0); 1336 src++; 1337 d++; 1338 } 1339 return n * 4; 1340 } 1341 1342 /* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */ 1343 static int uncopy_transl_16(Uint32 *dst, void *src, int n, 1344 RLEDestFormat *sfmt, SDL_PixelFormat *dfmt) 1345 { 1346 int i; 1347 Uint32 *s = src; 1348 for(i = 0; i < n; i++) { 1349 unsigned r, g, b, a; 1350 Uint32 pix = *s++; 1351 a = (pix & 0x3e0) >> 2; 1352 pix = (pix & ~0x3e0) | pix >> 16; 1353 RGB_FROM_PIXEL(pix, sfmt, r, g, b); 1354 PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a); 1355 dst++; 1356 } 1357 return n * 4; 1358 } 1359 1360 /* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */ 1361 static int copy_32(void *dst, Uint32 *src, int n, 1362 SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt) 1363 { 1364 int i; 1365 Uint32 *d = dst; 1366 for(i = 0; i < n; i++) { 1367 unsigned r, g, b, a; 1368 Uint32 pixel; 1369 RGBA_FROM_8888(*src, sfmt, r, g, b, a); 1370 PIXEL_FROM_RGB(pixel, dfmt, r, g, b); 1371 *d++ = pixel | a << 24; 1372 src++; 1373 } 1374 return n * 4; 1375 } 1376 1377 /* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */ 1378 static int uncopy_32(Uint32 *dst, void *src, int n, 1379 RLEDestFormat *sfmt, SDL_PixelFormat *dfmt) 1380 { 1381 int i; 1382 Uint32 *s = src; 1383 for(i = 0; i < n; i++) { 1384 unsigned r, g, b, a; 1385 Uint32 pixel = *s++; 1386 RGB_FROM_PIXEL(pixel, sfmt, r, g, b); 1387 a = pixel >> 24; 1388 PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a); 1389 dst++; 1390 } 1391 return n * 4; 1392 } 1393 1394 #define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255) 1395 1396 #define ISTRANSL(pixel, fmt) \ 1397 ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U) 1398 1399 /* convert surface to be quickly alpha-blittable onto dest, if possible */ 1400 static int RLEAlphaSurface(SDL_Surface *surface) 1401 { 1402 SDL_Surface *dest; 1403 SDL_PixelFormat *df; 1404 int maxsize = 0; 1405 int max_opaque_run; 1406 int max_transl_run = 65535; 1407 unsigned masksum; 1408 Uint8 *rlebuf, *dst; 1409 int (*copy_opaque)(void *, Uint32 *, int, 1410 SDL_PixelFormat *, SDL_PixelFormat *); 1411 int (*copy_transl)(void *, Uint32 *, int, 1412 SDL_PixelFormat *, SDL_PixelFormat *); 1413 1414 dest = surface->map->dst; 1415 if(!dest) 1416 return -1; 1417 df = dest->format; 1418 if(surface->format->BitsPerPixel != 32) 1419 return -1; /* only 32bpp source supported */ 1420 1421 /* find out whether the destination is one we support, 1422 and determine the max size of the encoded result */ 1423 masksum = df->Rmask | df->Gmask | df->Bmask; 1424 switch(df->BytesPerPixel) { 1425 case 2: 1426 /* 16bpp: only support 565 and 555 formats */ 1427 switch(masksum) { 1428 case 0xffff: 1429 if(df->Gmask == 0x07e0 1430 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) { 1431 copy_opaque = copy_opaque_16; 1432 copy_transl = copy_transl_565; 1433 } else 1434 return -1; 1435 break; 1436 case 0x7fff: 1437 if(df->Gmask == 0x03e0 1438 || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) { 1439 copy_opaque = copy_opaque_16; 1440 copy_transl = copy_transl_555; 1441 } else 1442 return -1; 1443 break; 1444 default: 1445 return -1; 1446 } 1447 max_opaque_run = 255; /* runs stored as bytes */ 1448 1449 /* worst case is alternating opaque and translucent pixels, 1450 with room for alignment padding between lines */ 1451 maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2; 1452 break; 1453 case 4: 1454 if(masksum != 0x00ffffff) 1455 return -1; /* requires unused high byte */ 1456 copy_opaque = copy_32; 1457 copy_transl = copy_32; 1458 max_opaque_run = 255; /* runs stored as short ints */ 1459 1460 /* worst case is alternating opaque and translucent pixels */ 1461 maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4; 1462 break; 1463 default: 1464 return -1; /* anything else unsupported right now */ 1465 } 1466 1467 maxsize += sizeof(RLEDestFormat); 1468 rlebuf = (Uint8 *)SDL_malloc(maxsize); 1469 if(!rlebuf) { 1470 SDL_OutOfMemory(); 1471 return -1; 1472 } 1473 { 1474 /* save the destination format so we can undo the encoding later */ 1475 RLEDestFormat *r = (RLEDestFormat *)rlebuf; 1476 r->BytesPerPixel = df->BytesPerPixel; 1477 r->Rloss = df->Rloss; 1478 r->Gloss = df->Gloss; 1479 r->Bloss = df->Bloss; 1480 r->Rshift = df->Rshift; 1481 r->Gshift = df->Gshift; 1482 r->Bshift = df->Bshift; 1483 r->Ashift = df->Ashift; 1484 r->Rmask = df->Rmask; 1485 r->Gmask = df->Gmask; 1486 r->Bmask = df->Bmask; 1487 r->Amask = df->Amask; 1488 } 1489 dst = rlebuf + sizeof(RLEDestFormat); 1490 1491 /* Do the actual encoding */ 1492 { 1493 int x, y; 1494 int h = surface->h, w = surface->w; 1495 SDL_PixelFormat *sf = surface->format; 1496 Uint32 *src = (Uint32 *)surface->pixels; 1497 Uint8 *lastline = dst; /* end of last non-blank line */ 1498 1499 /* opaque counts are 8 or 16 bits, depending on target depth */ 1500 #define ADD_OPAQUE_COUNTS(n, m) \ 1501 if(df->BytesPerPixel == 4) { \ 1502 ((Uint16 *)dst)[0] = n; \ 1503 ((Uint16 *)dst)[1] = m; \ 1504 dst += 4; \ 1505 } else { \ 1506 dst[0] = n; \ 1507 dst[1] = m; \ 1508 dst += 2; \ 1509 } 1510 1511 /* translucent counts are always 16 bit */ 1512 #define ADD_TRANSL_COUNTS(n, m) \ 1513 (((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4) 1514 1515 for(y = 0; y < h; y++) { 1516 int runstart, skipstart; 1517 int blankline = 0; 1518 /* First encode all opaque pixels of a scan line */ 1519 x = 0; 1520 do { 1521 int run, skip, len; 1522 skipstart = x; 1523 while(x < w && !ISOPAQUE(src[x], sf)) 1524 x++; 1525 runstart = x; 1526 while(x < w && ISOPAQUE(src[x], sf)) 1527 x++; 1528 skip = runstart - skipstart; 1529 if(skip == w) 1530 blankline = 1; 1531 run = x - runstart; 1532 while(skip > max_opaque_run) { 1533 ADD_OPAQUE_COUNTS(max_opaque_run, 0); 1534 skip -= max_opaque_run; 1535 } 1536 len = MIN(run, max_opaque_run); 1537 ADD_OPAQUE_COUNTS(skip, len); 1538 dst += copy_opaque(dst, src + runstart, len, sf, df); 1539 runstart += len; 1540 run -= len; 1541 while(run) { 1542 len = MIN(run, max_opaque_run); 1543 ADD_OPAQUE_COUNTS(0, len); 1544 dst += copy_opaque(dst, src + runstart, len, sf, df); 1545 runstart += len; 1546 run -= len; 1547 } 1548 } while(x < w); 1549 1550 /* Make sure the next output address is 32-bit aligned */ 1551 dst += (uintptr_t)dst & 2; 1552 1553 /* Next, encode all translucent pixels of the same scan line */ 1554 x = 0; 1555 do { 1556 int run, skip, len; 1557 skipstart = x; 1558 while(x < w && !ISTRANSL(src[x], sf)) 1559 x++; 1560 runstart = x; 1561 while(x < w && ISTRANSL(src[x], sf)) 1562 x++; 1563 skip = runstart - skipstart; 1564 blankline &= (skip == w); 1565 run = x - runstart; 1566 while(skip > max_transl_run) { 1567 ADD_TRANSL_COUNTS(max_transl_run, 0); 1568 skip -= max_transl_run; 1569 } 1570 len = MIN(run, max_transl_run); 1571 ADD_TRANSL_COUNTS(skip, len); 1572 dst += copy_transl(dst, src + runstart, len, sf, df); 1573 runstart += len; 1574 run -= len; 1575 while(run) { 1576 len = MIN(run, max_transl_run); 1577 ADD_TRANSL_COUNTS(0, len); 1578 dst += copy_transl(dst, src + runstart, len, sf, df); 1579 runstart += len; 1580 run -= len; 1581 } 1582 if(!blankline) 1583 lastline = dst; 1584 } while(x < w); 1585 1586 src += surface->pitch >> 2; 1587 } 1588 dst = lastline; /* back up past trailing blank lines */ 1589 ADD_OPAQUE_COUNTS(0, 0); 1590 } 1591 1592 #undef ADD_OPAQUE_COUNTS 1593 #undef ADD_TRANSL_COUNTS 1594 1595 /* Now that we have it encoded, release the original pixels */ 1596 if((surface->flags & SDL_PREALLOC) != SDL_PREALLOC 1597 && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) { 1598 SDL_free( surface->pixels ); 1599 surface->pixels = NULL; 1600 } 1601 1602 /* realloc the buffer to release unused memory */ 1603 { 1604 Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf); 1605 if(!p) 1606 p = rlebuf; 1607 surface->map->sw_data->aux_data = p; 1608 } 1609 1610 return 0; 1611 } 1612 1613 static Uint32 getpix_8(Uint8 *srcbuf) 1614 { 1615 return *srcbuf; 1616 } 1617 1618 static Uint32 getpix_16(Uint8 *srcbuf) 1619 { 1620 return *(Uint16 *)srcbuf; 1621 } 1622 1623 static Uint32 getpix_24(Uint8 *srcbuf) 1624 { 1625 #if SDL_BYTEORDER == SDL_LIL_ENDIAN 1626 return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16); 1627 #else 1628 return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2]; 1629 #endif 1630 } 1631 1632 static Uint32 getpix_32(Uint8 *srcbuf) 1633 { 1634 return *(Uint32 *)srcbuf; 1635 } 1636 1637 typedef Uint32 (*getpix_func)(Uint8 *); 1638 1639 static getpix_func getpixes[4] = { 1640 getpix_8, getpix_16, getpix_24, getpix_32 1641 }; 1642 1643 static int RLEColorkeySurface(SDL_Surface *surface) 1644 { 1645 Uint8 *rlebuf, *dst; 1646 int maxn; 1647 int y; 1648 Uint8 *srcbuf, *lastline; 1649 int maxsize = 0; 1650 int bpp = surface->format->BytesPerPixel; 1651 getpix_func getpix; 1652 Uint32 ckey, rgbmask; 1653 int w, h; 1654 1655 /* calculate the worst case size for the compressed surface */ 1656 switch(bpp) { 1657 case 1: 1658 /* worst case is alternating opaque and transparent pixels, 1659 starting with an opaque pixel */ 1660 maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2; 1661 break; 1662 case 2: 1663 case 3: 1664 /* worst case is solid runs, at most 255 pixels wide */ 1665 maxsize = surface->h * (2 * (surface->w / 255 + 1) 1666 + surface->w * bpp) + 2; 1667 break; 1668 case 4: 1669 /* worst case is solid runs, at most 65535 pixels wide */ 1670 maxsize = surface->h * (4 * (surface->w / 65535 + 1) 1671 + surface->w * 4) + 4; 1672 break; 1673 } 1674 1675 rlebuf = (Uint8 *)SDL_malloc(maxsize); 1676 if ( rlebuf == NULL ) { 1677 SDL_OutOfMemory(); 1678 return(-1); 1679 } 1680 1681 /* Set up the conversion */ 1682 srcbuf = (Uint8 *)surface->pixels; 1683 maxn = bpp == 4 ? 65535 : 255; 1684 dst = rlebuf; 1685 rgbmask = ~surface->format->Amask; 1686 ckey = surface->format->colorkey & rgbmask; 1687 lastline = dst; 1688 getpix = getpixes[bpp - 1]; 1689 w = surface->w; 1690 h = surface->h; 1691 1692 #define ADD_COUNTS(n, m) \ 1693 if(bpp == 4) { \ 1694 ((Uint16 *)dst)[0] = n; \ 1695 ((Uint16 *)dst)[1] = m; \ 1696 dst += 4; \ 1697 } else { \ 1698 dst[0] = n; \ 1699 dst[1] = m; \ 1700 dst += 2; \ 1701 } 1702 1703 for(y = 0; y < h; y++) { 1704 int x = 0; 1705 int blankline = 0; 1706 do { 1707 int run, skip, len; 1708 int runstart; 1709 int skipstart = x; 1710 1711 /* find run of transparent, then opaque pixels */ 1712 while(x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey) 1713 x++; 1714 runstart = x; 1715 while(x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey) 1716 x++; 1717 skip = runstart - skipstart; 1718 if(skip == w) 1719 blankline = 1; 1720 run = x - runstart; 1721 1722 /* encode segment */ 1723 while(skip > maxn) { 1724 ADD_COUNTS(maxn, 0); 1725 skip -= maxn; 1726 } 1727 len = MIN(run, maxn); 1728 ADD_COUNTS(skip, len); 1729 SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp); 1730 dst += len * bpp; 1731 run -= len; 1732 runstart += len; 1733 while(run) { 1734 len = MIN(run, maxn); 1735 ADD_COUNTS(0, len); 1736 SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp); 1737 dst += len * bpp; 1738 runstart += len; 1739 run -= len; 1740 } 1741 if(!blankline) 1742 lastline = dst; 1743 } while(x < w); 1744 1745 srcbuf += surface->pitch; 1746 } 1747 dst = lastline; /* back up bast trailing blank lines */ 1748 ADD_COUNTS(0, 0); 1749 1750 #undef ADD_COUNTS 1751 1752 /* Now that we have it encoded, release the original pixels */ 1753 if((surface->flags & SDL_PREALLOC) != SDL_PREALLOC 1754 && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) { 1755 SDL_free( surface->pixels ); 1756 surface->pixels = NULL; 1757 } 1758 1759 /* realloc the buffer to release unused memory */ 1760 { 1761 /* If realloc returns NULL, the original block is left intact */ 1762 Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf); 1763 if(!p) 1764 p = rlebuf; 1765 surface->map->sw_data->aux_data = p; 1766 } 1767 1768 return(0); 1769 } 1770 1771 int SDL_RLESurface(SDL_Surface *surface) 1772 { 1773 int retcode; 1774 1775 /* Clear any previous RLE conversion */ 1776 if ( (surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL ) { 1777 SDL_UnRLESurface(surface, 1); 1778 } 1779 1780 /* We don't support RLE encoding of bitmaps */ 1781 if ( surface->format->BitsPerPixel < 8 ) { 1782 return(-1); 1783 } 1784 1785 /* Lock the surface if it's in hardware */ 1786 if ( SDL_MUSTLOCK(surface) ) { 1787 if ( SDL_LockSurface(surface) < 0 ) { 1788 return(-1); 1789 } 1790 } 1791 1792 /* Encode */ 1793 if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) { 1794 retcode = RLEColorkeySurface(surface); 1795 } else { 1796 if((surface->flags & SDL_SRCALPHA) == SDL_SRCALPHA 1797 && surface->format->Amask != 0) 1798 retcode = RLEAlphaSurface(surface); 1799 else 1800 retcode = -1; /* no RLE for per-surface alpha sans ckey */ 1801 } 1802 1803 /* Unlock the surface if it's in hardware */ 1804 if ( SDL_MUSTLOCK(surface) ) { 1805 SDL_UnlockSurface(surface); 1806 } 1807 1808 if(retcode < 0) 1809 return -1; 1810 1811 /* The surface is now accelerated */ 1812 surface->flags |= SDL_RLEACCEL; 1813 1814 return(0); 1815 } 1816 1817 /* 1818 * Un-RLE a surface with pixel alpha 1819 * This may not give back exactly the image before RLE-encoding; all 1820 * completely transparent pixels will be lost, and colour and alpha depth 1821 * may have been reduced (when encoding for 16bpp targets). 1822 */ 1823 static SDL_bool UnRLEAlpha(SDL_Surface *surface) 1824 { 1825 Uint8 *srcbuf; 1826 Uint32 *dst; 1827 SDL_PixelFormat *sf = surface->format; 1828 RLEDestFormat *df = surface->map->sw_data->aux_data; 1829 int (*uncopy_opaque)(Uint32 *, void *, int, 1830 RLEDestFormat *, SDL_PixelFormat *); 1831 int (*uncopy_transl)(Uint32 *, void *, int, 1832 RLEDestFormat *, SDL_PixelFormat *); 1833 int w = surface->w; 1834 int bpp = df->BytesPerPixel; 1835 1836 if(bpp == 2) { 1837 uncopy_opaque = uncopy_opaque_16; 1838 uncopy_transl = uncopy_transl_16; 1839 } else { 1840 uncopy_opaque = uncopy_transl = uncopy_32; 1841 } 1842 1843 surface->pixels = SDL_malloc(surface->h * surface->pitch); 1844 if ( !surface->pixels ) { 1845 return(SDL_FALSE); 1846 } 1847 /* fill background with transparent pixels */ 1848 SDL_memset(surface->pixels, 0, surface->h * surface->pitch); 1849 1850 dst = surface->pixels; 1851 srcbuf = (Uint8 *)(df + 1); 1852 for(;;) { 1853 /* copy opaque pixels */ 1854 int ofs = 0; 1855 do { 1856 unsigned run; 1857 if(bpp == 2) { 1858 ofs += srcbuf[0]; 1859 run = srcbuf[1]; 1860 srcbuf += 2; 1861 } else { 1862 ofs += ((Uint16 *)srcbuf)[0]; 1863 run = ((Uint16 *)srcbuf)[1]; 1864 srcbuf += 4; 1865 } 1866 if(run) { 1867 srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf); 1868 ofs += run; 1869 } else if(!ofs) 1870 return(SDL_TRUE); 1871 } while(ofs < w); 1872 1873 /* skip padding if needed */ 1874 if(bpp == 2) 1875 srcbuf += (uintptr_t)srcbuf & 2; 1876 1877 /* copy translucent pixels */ 1878 ofs = 0; 1879 do { 1880 unsigned run; 1881 ofs += ((Uint16 *)srcbuf)[0]; 1882 run = ((Uint16 *)srcbuf)[1]; 1883 srcbuf += 4; 1884 if(run) { 1885 srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf); 1886 ofs += run; 1887 } 1888 } while(ofs < w); 1889 dst += surface->pitch >> 2; 1890 } 1891 /* Make the compiler happy */ 1892 return(SDL_TRUE); 1893 } 1894 1895 void SDL_UnRLESurface(SDL_Surface *surface, int recode) 1896 { 1897 if ( (surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL ) { 1898 surface->flags &= ~SDL_RLEACCEL; 1899 1900 if(recode && (surface->flags & SDL_PREALLOC) != SDL_PREALLOC 1901 && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) { 1902 if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) { 1903 SDL_Rect full; 1904 unsigned alpha_flag; 1905 1906 /* re-create the original surface */ 1907 surface->pixels = SDL_malloc(surface->h * surface->pitch); 1908 if ( !surface->pixels ) { 1909 /* Oh crap... */ 1910 surface->flags |= SDL_RLEACCEL; 1911 return; 1912 } 1913 1914 /* fill it with the background colour */ 1915 SDL_FillRect(surface, NULL, surface->format->colorkey); 1916 1917 /* now render the encoded surface */ 1918 full.x = full.y = 0; 1919 full.w = surface->w; 1920 full.h = surface->h; 1921 alpha_flag = surface->flags & SDL_SRCALPHA; 1922 surface->flags &= ~SDL_SRCALPHA; /* opaque blit */ 1923 SDL_RLEBlit(surface, &full, surface, &full); 1924 surface->flags |= alpha_flag; 1925 } else { 1926 if ( !UnRLEAlpha(surface) ) { 1927 /* Oh crap... */ 1928 surface->flags |= SDL_RLEACCEL; 1929 return; 1930 } 1931 } 1932 } 1933 1934 if ( surface->map && surface->map->sw_data->aux_data ) { 1935 SDL_free(surface->map->sw_data->aux_data); 1936 surface->map->sw_data->aux_data = NULL; 1937 } 1938 } 1939 } 1940 1941 1942