1 /* 2 SDL - Simple DirectMedia Layer 3 Copyright (C) 1997-2006 Sam Lantinga 4 5 This library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 This library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with this library; if not, write to the Free Software 17 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 19 Sam Lantinga 20 slouken (at) libsdl.org 21 */ 22 #include "SDL_config.h" 23 24 /* 25 * RLE encoding for software colorkey and alpha-channel acceleration 26 * 27 * Original version by Sam Lantinga 28 * 29 * Mattias Engdegrd (Yorick): Rewrite. New encoding format, encoder and 30 * decoder. Added per-surface alpha blitter. Added per-pixel alpha 31 * format, encoder and blitter. 32 * 33 * Many thanks to Xark and johns for hints, benchmarks and useful comments 34 * leading to this code. 35 * 36 * Welcome to Macro Mayhem. 37 */ 38 39 /* 40 * The encoding translates the image data to a stream of segments of the form 41 * 42 * <skip> <run> <data> 43 * 44 * where <skip> is the number of transparent pixels to skip, 45 * <run> is the number of opaque pixels to blit, 46 * and <data> are the pixels themselves. 47 * 48 * This basic structure is used both for colorkeyed surfaces, used for simple 49 * binary transparency and for per-surface alpha blending, and for surfaces 50 * with per-pixel alpha. The details differ, however: 51 * 52 * Encoding of colorkeyed surfaces: 53 * 54 * Encoded pixels always have the same format as the target surface. 55 * <skip> and <run> are unsigned 8 bit integers, except for 32 bit depth 56 * where they are 16 bit. This makes the pixel data aligned at all times. 57 * Segments never wrap around from one scan line to the next. 58 * 59 * The end of the sequence is marked by a zero <skip>,<run> pair at the * 60 * beginning of a line. 61 * 62 * Encoding of surfaces with per-pixel alpha: 63 * 64 * The sequence begins with a struct RLEDestFormat describing the target 65 * pixel format, to provide reliable un-encoding. 66 * 67 * Each scan line is encoded twice: First all completely opaque pixels, 68 * encoded in the target format as described above, and then all 69 * partially transparent (translucent) pixels (where 1 <= alpha <= 254), 70 * in the following 32-bit format: 71 * 72 * For 32-bit targets, each pixel has the target RGB format but with 73 * the alpha value occupying the highest 8 bits. The <skip> and <run> 74 * counts are 16 bit. 75 * 76 * For 16-bit targets, each pixel has the target RGB format, but with 77 * the middle component (usually green) shifted 16 steps to the left, 78 * and the hole filled with the 5 most significant bits of the alpha value. 79 * i.e. if the target has the format rrrrrggggggbbbbb, 80 * the encoded pixel will be 00000gggggg00000rrrrr0aaaaabbbbb. 81 * The <skip> and <run> counts are 8 bit for the opaque lines, 16 bit 82 * for the translucent lines. Two padding bytes may be inserted 83 * before each translucent line to keep them 32-bit aligned. 84 * 85 * The end of the sequence is marked by a zero <skip>,<run> pair at the 86 * beginning of an opaque line. 87 */ 88 89 #include "SDL_video.h" 90 #include "SDL_sysvideo.h" 91 #include "SDL_blit.h" 92 #include "SDL_RLEaccel_c.h" 93 94 #if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) && SDL_ASSEMBLY_ROUTINES 95 #define MMX_ASMBLIT 96 #endif 97 98 #ifdef MMX_ASMBLIT 99 #include "mmx.h" 100 #include "SDL_cpuinfo.h" 101 #endif 102 103 #ifndef MAX 104 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 105 #endif 106 #ifndef MIN 107 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 108 #endif 109 110 #define PIXEL_COPY(to, from, len, bpp) \ 111 do { \ 112 if(bpp == 4) { \ 113 SDL_memcpy4(to, from, (size_t)(len)); \ 114 } else { \ 115 SDL_memcpy(to, from, (size_t)(len) * (bpp)); \ 116 } \ 117 } while(0) 118 119 /* 120 * Various colorkey blit methods, for opaque and per-surface alpha 121 */ 122 123 #define OPAQUE_BLIT(to, from, length, bpp, alpha) \ 124 PIXEL_COPY(to, from, length, bpp) 125 126 #ifdef MMX_ASMBLIT 127 128 #define ALPHA_BLIT32_888MMX(to, from, length, bpp, alpha) \ 129 do { \ 130 Uint32 *srcp = (Uint32 *)(from); \ 131 Uint32 *dstp = (Uint32 *)(to); \ 132 int i = 0x00FF00FF; \ 133 movd_m2r(*(&i), mm3); \ 134 punpckldq_r2r(mm3, mm3); \ 135 i = 0xFF000000; \ 136 movd_m2r(*(&i), mm7); \ 137 punpckldq_r2r(mm7, mm7); \ 138 i = alpha | alpha << 16; \ 139 movd_m2r(*(&i), mm4); \ 140 punpckldq_r2r(mm4, mm4); \ 141 pcmpeqd_r2r(mm5,mm5); /* set mm5 to "1" */ \ 142 pxor_r2r(mm7, mm5); /* make clear alpha mask */ \ 143 i = length; \ 144 if(i & 1) { \ 145 movd_m2r((*srcp), mm1); /* src -> mm1 */ \ 146 punpcklbw_r2r(mm1, mm1); \ 147 pand_r2r(mm3, mm1); \ 148 movd_m2r((*dstp), mm2); /* dst -> mm2 */ \ 149 punpcklbw_r2r(mm2, mm2); \ 150 pand_r2r(mm3, mm2); \ 151 psubw_r2r(mm2, mm1); \ 152 pmullw_r2r(mm4, mm1); \ 153 psrlw_i2r(8, mm1); \ 154 paddw_r2r(mm1, mm2); \ 155 pand_r2r(mm3, mm2); \ 156 packuswb_r2r(mm2, mm2); \ 157 pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */ \ 158 movd_r2m(mm2, *dstp); \ 159 ++srcp; \ 160 ++dstp; \ 161 i--; \ 162 } \ 163 for(; i > 0; --i) { \ 164 movq_m2r((*srcp), mm0); \ 165 movq_r2r(mm0, mm1); \ 166 punpcklbw_r2r(mm0, mm0); \ 167 movq_m2r((*dstp), mm2); \ 168 punpckhbw_r2r(mm1, mm1); \ 169 movq_r2r(mm2, mm6); \ 170 pand_r2r(mm3, mm0); \ 171 punpcklbw_r2r(mm2, mm2); \ 172 pand_r2r(mm3, mm1); \ 173 punpckhbw_r2r(mm6, mm6); \ 174 pand_r2r(mm3, mm2); \ 175 psubw_r2r(mm2, mm0); \ 176 pmullw_r2r(mm4, mm0); \ 177 pand_r2r(mm3, mm6); \ 178 psubw_r2r(mm6, mm1); \ 179 pmullw_r2r(mm4, mm1); \ 180 psrlw_i2r(8, mm0); \ 181 paddw_r2r(mm0, mm2); \ 182 psrlw_i2r(8, mm1); \ 183 paddw_r2r(mm1, mm6); \ 184 pand_r2r(mm3, mm2); \ 185 pand_r2r(mm3, mm6); \ 186 packuswb_r2r(mm2, mm2); \ 187 packuswb_r2r(mm6, mm6); \ 188 psrlq_i2r(32, mm2); \ 189 psllq_i2r(32, mm6); \ 190 por_r2r(mm6, mm2); \ 191 pand_r2r(mm5, mm2); /* 00000RGB -> mm2 */ \ 192 movq_r2m(mm2, *dstp); \ 193 srcp += 2; \ 194 dstp += 2; \ 195 i--; \ 196 } \ 197 emms(); \ 198 } while(0) 199 200 #define ALPHA_BLIT16_565MMX(to, from, length, bpp, alpha) \ 201 do { \ 202 int i, n = 0; \ 203 Uint16 *srcp = (Uint16 *)(from); \ 204 Uint16 *dstp = (Uint16 *)(to); \ 205 Uint32 ALPHA = 0xF800; \ 206 movd_m2r(*(&ALPHA), mm1); \ 207 punpcklwd_r2r(mm1, mm1); \ 208 punpcklwd_r2r(mm1, mm1); \ 209 ALPHA = 0x07E0; \ 210 movd_m2r(*(&ALPHA), mm4); \ 211 punpcklwd_r2r(mm4, mm4); \ 212 punpcklwd_r2r(mm4, mm4); \ 213 ALPHA = 0x001F; \ 214 movd_m2r(*(&ALPHA), mm7); \ 215 punpcklwd_r2r(mm7, mm7); \ 216 punpcklwd_r2r(mm7, mm7); \ 217 alpha &= ~(1+2+4); \ 218 i = (Uint32)alpha | (Uint32)alpha << 16; \ 219 movd_m2r(*(&i), mm0); \ 220 punpckldq_r2r(mm0, mm0); \ 221 ALPHA = alpha >> 3; \ 222 i = ((int)(length) & 3); \ 223 for(; i > 0; --i) { \ 224 Uint32 s = *srcp++; \ 225 Uint32 d = *dstp; \ 226 s = (s | s << 16) & 0x07e0f81f; \ 227 d = (d | d << 16) & 0x07e0f81f; \ 228 d += (s - d) * ALPHA >> 5; \ 229 d &= 0x07e0f81f; \ 230 *dstp++ = d | d >> 16; \ 231 n++; \ 232 } \ 233 i = (int)(length) - n; \ 234 for(; i > 0; --i) { \ 235 movq_m2r((*dstp), mm3); \ 236 movq_m2r((*srcp), mm2); \ 237 movq_r2r(mm2, mm5); \ 238 pand_r2r(mm1 , mm5); \ 239 psrlq_i2r(11, mm5); \ 240 movq_r2r(mm3, mm6); \ 241 pand_r2r(mm1 , mm6); \ 242 psrlq_i2r(11, mm6); \ 243 psubw_r2r(mm6, mm5); \ 244 pmullw_r2r(mm0, mm5); \ 245 psrlw_i2r(8, mm5); \ 246 paddw_r2r(mm5, mm6); \ 247 psllq_i2r(11, mm6); \ 248 pand_r2r(mm1, mm6); \ 249 movq_r2r(mm4, mm5); \ 250 por_r2r(mm7, mm5); \ 251 pand_r2r(mm5, mm3); \ 252 por_r2r(mm6, mm3); \ 253 movq_r2r(mm2, mm5); \ 254 pand_r2r(mm4 , mm5); \ 255 psrlq_i2r(5, mm5); \ 256 movq_r2r(mm3, mm6); \ 257 pand_r2r(mm4 , mm6); \ 258 psrlq_i2r(5, mm6); \ 259 psubw_r2r(mm6, mm5); \ 260 pmullw_r2r(mm0, mm5); \ 261 psrlw_i2r(8, mm5); \ 262 paddw_r2r(mm5, mm6); \ 263 psllq_i2r(5, mm6); \ 264 pand_r2r(mm4, mm6); \ 265 movq_r2r(mm1, mm5); \ 266 por_r2r(mm7, mm5); \ 267 pand_r2r(mm5, mm3); \ 268 por_r2r(mm6, mm3); \ 269 movq_r2r(mm2, mm5); \ 270 pand_r2r(mm7 , mm5); \ 271 movq_r2r(mm3, mm6); \ 272 pand_r2r(mm7 , mm6); \ 273 psubw_r2r(mm6, mm5); \ 274 pmullw_r2r(mm0, mm5); \ 275 psrlw_i2r(8, mm5); \ 276 paddw_r2r(mm5, mm6); \ 277 pand_r2r(mm7, mm6); \ 278 movq_r2r(mm1, mm5); \ 279 por_r2r(mm4, mm5); \ 280 pand_r2r(mm5, mm3); \ 281 por_r2r(mm6, mm3); \ 282 movq_r2m(mm3, *dstp); \ 283 srcp += 4; \ 284 dstp += 4; \ 285 i -= 3; \ 286 } \ 287 emms(); \ 288 } while(0) 289 290 #define ALPHA_BLIT16_555MMX(to, from, length, bpp, alpha) \ 291 do { \ 292 int i, n = 0; \ 293 Uint16 *srcp = (Uint16 *)(from); \ 294 Uint16 *dstp = (Uint16 *)(to); \ 295 Uint32 ALPHA = 0x7C00; \ 296 movd_m2r(*(&ALPHA), mm1); \ 297 punpcklwd_r2r(mm1, mm1); \ 298 punpcklwd_r2r(mm1, mm1); \ 299 ALPHA = 0x03E0; \ 300 movd_m2r(*(&ALPHA), mm4); \ 301 punpcklwd_r2r(mm4, mm4); \ 302 punpcklwd_r2r(mm4, mm4); \ 303 ALPHA = 0x001F; \ 304 movd_m2r(*(&ALPHA), mm7); \ 305 punpcklwd_r2r(mm7, mm7); \ 306 punpcklwd_r2r(mm7, mm7); \ 307 alpha &= ~(1+2+4); \ 308 i = (Uint32)alpha | (Uint32)alpha << 16; \ 309 movd_m2r(*(&i), mm0); \ 310 punpckldq_r2r(mm0, mm0); \ 311 i = ((int)(length) & 3); \ 312 ALPHA = alpha >> 3; \ 313 for(; i > 0; --i) { \ 314 Uint32 s = *srcp++; \ 315 Uint32 d = *dstp; \ 316 s = (s | s << 16) & 0x03e07c1f; \ 317 d = (d | d << 16) & 0x03e07c1f; \ 318 d += (s - d) * ALPHA >> 5; \ 319 d &= 0x03e07c1f; \ 320 *dstp++ = d | d >> 16; \ 321 n++; \ 322 } \ 323 i = (int)(length) - n; \ 324 for(; i > 0; --i) { \ 325 movq_m2r((*dstp), mm3); \ 326 movq_m2r((*srcp), mm2); \ 327 movq_r2r(mm2, mm5); \ 328 pand_r2r(mm1 , mm5); \ 329 psrlq_i2r(10, mm5); \ 330 movq_r2r(mm3, mm6); \ 331 pand_r2r(mm1 , mm6); \ 332 psrlq_i2r(10, mm6); \ 333 psubw_r2r(mm6, mm5); \ 334 pmullw_r2r(mm0, mm5); \ 335 psrlw_i2r(8, mm5); \ 336 paddw_r2r(mm5, mm6); \ 337 psllq_i2r(10, mm6); \ 338 pand_r2r(mm1, mm6); \ 339 movq_r2r(mm4, mm5); \ 340 por_r2r(mm7, mm5); \ 341 pand_r2r(mm5, mm3); \ 342 por_r2r(mm6, mm3); \ 343 movq_r2r(mm2, mm5); \ 344 pand_r2r(mm4 , mm5); \ 345 psrlq_i2r(5, mm5); \ 346 movq_r2r(mm3, mm6); \ 347 pand_r2r(mm4 , mm6); \ 348 psrlq_i2r(5, mm6); \ 349 psubw_r2r(mm6, mm5); \ 350 pmullw_r2r(mm0, mm5); \ 351 psrlw_i2r(8, mm5); \ 352 paddw_r2r(mm5, mm6); \ 353 psllq_i2r(5, mm6); \ 354 pand_r2r(mm4, mm6); \ 355 movq_r2r(mm1, mm5); \ 356 por_r2r(mm7, mm5); \ 357 pand_r2r(mm5, mm3); \ 358 por_r2r(mm6, mm3); \ 359 movq_r2r(mm2, mm5); \ 360 pand_r2r(mm7 , mm5); \ 361 movq_r2r(mm3, mm6); \ 362 pand_r2r(mm7 , mm6); \ 363 psubw_r2r(mm6, mm5); \ 364 pmullw_r2r(mm0, mm5); \ 365 psrlw_i2r(8, mm5); \ 366 paddw_r2r(mm5, mm6); \ 367 pand_r2r(mm7, mm6); \ 368 movq_r2r(mm1, mm5); \ 369 por_r2r(mm4, mm5); \ 370 pand_r2r(mm5, mm3); \ 371 por_r2r(mm6, mm3); \ 372 movq_r2m(mm3, *dstp); \ 373 srcp += 4; \ 374 dstp += 4; \ 375 i -= 3; \ 376 } \ 377 emms(); \ 378 } while(0) 379 380 #endif 381 382 /* 383 * For 32bpp pixels on the form 0x00rrggbb: 384 * If we treat the middle component separately, we can process the two 385 * remaining in parallel. This is safe to do because of the gap to the left 386 * of each component, so the bits from the multiplication don't collide. 387 * This can be used for any RGB permutation of course. 388 */ 389 #define ALPHA_BLIT32_888(to, from, length, bpp, alpha) \ 390 do { \ 391 int i; \ 392 Uint32 *src = (Uint32 *)(from); \ 393 Uint32 *dst = (Uint32 *)(to); \ 394 for(i = 0; i < (int)(length); i++) { \ 395 Uint32 s = *src++; \ 396 Uint32 d = *dst; \ 397 Uint32 s1 = s & 0xff00ff; \ 398 Uint32 d1 = d & 0xff00ff; \ 399 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \ 400 s &= 0xff00; \ 401 d &= 0xff00; \ 402 d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ 403 *dst++ = d1 | d; \ 404 } \ 405 } while(0) 406 407 /* 408 * For 16bpp pixels we can go a step further: put the middle component 409 * in the high 16 bits of a 32 bit word, and process all three RGB 410 * components at the same time. Since the smallest gap is here just 411 * 5 bits, we have to scale alpha down to 5 bits as well. 412 */ 413 #define ALPHA_BLIT16_565(to, from, length, bpp, alpha) \ 414 do { \ 415 int i; \ 416 Uint16 *src = (Uint16 *)(from); \ 417 Uint16 *dst = (Uint16 *)(to); \ 418 Uint32 ALPHA = alpha >> 3; \ 419 for(i = 0; i < (int)(length); i++) { \ 420 Uint32 s = *src++; \ 421 Uint32 d = *dst; \ 422 s = (s | s << 16) & 0x07e0f81f; \ 423 d = (d | d << 16) & 0x07e0f81f; \ 424 d += (s - d) * ALPHA >> 5; \ 425 d &= 0x07e0f81f; \ 426 *dst++ = (Uint16)(d | d >> 16); \ 427 } \ 428 } while(0) 429 430 #define ALPHA_BLIT16_555(to, from, length, bpp, alpha) \ 431 do { \ 432 int i; \ 433 Uint16 *src = (Uint16 *)(from); \ 434 Uint16 *dst = (Uint16 *)(to); \ 435 Uint32 ALPHA = alpha >> 3; \ 436 for(i = 0; i < (int)(length); i++) { \ 437 Uint32 s = *src++; \ 438 Uint32 d = *dst; \ 439 s = (s | s << 16) & 0x03e07c1f; \ 440 d = (d | d << 16) & 0x03e07c1f; \ 441 d += (s - d) * ALPHA >> 5; \ 442 d &= 0x03e07c1f; \ 443 *dst++ = (Uint16)(d | d >> 16); \ 444 } \ 445 } while(0) 446 447 /* 448 * The general slow catch-all function, for remaining depths and formats 449 */ 450 #define ALPHA_BLIT_ANY(to, from, length, bpp, alpha) \ 451 do { \ 452 int i; \ 453 Uint8 *src = from; \ 454 Uint8 *dst = to; \ 455 for(i = 0; i < (int)(length); i++) { \ 456 Uint32 s, d; \ 457 unsigned rs, gs, bs, rd, gd, bd; \ 458 switch(bpp) { \ 459 case 2: \ 460 s = *(Uint16 *)src; \ 461 d = *(Uint16 *)dst; \ 462 break; \ 463 case 3: \ 464 if(SDL_BYTEORDER == SDL_BIG_ENDIAN) { \ 465 s = (src[0] << 16) | (src[1] << 8) | src[2]; \ 466 d = (dst[0] << 16) | (dst[1] << 8) | dst[2]; \ 467 } else { \ 468 s = (src[2] << 16) | (src[1] << 8) | src[0]; \ 469 d = (dst[2] << 16) | (dst[1] << 8) | dst[0]; \ 470 } \ 471 break; \ 472 case 4: \ 473 s = *(Uint32 *)src; \ 474 d = *(Uint32 *)dst; \ 475 break; \ 476 } \ 477 RGB_FROM_PIXEL(s, fmt, rs, gs, bs); \ 478 RGB_FROM_PIXEL(d, fmt, rd, gd, bd); \ 479 rd += (rs - rd) * alpha >> 8; \ 480 gd += (gs - gd) * alpha >> 8; \ 481 bd += (bs - bd) * alpha >> 8; \ 482 PIXEL_FROM_RGB(d, fmt, rd, gd, bd); \ 483 switch(bpp) { \ 484 case 2: \ 485 *(Uint16 *)dst = (Uint16)d; \ 486 break; \ 487 case 3: \ 488 if(SDL_BYTEORDER == SDL_BIG_ENDIAN) { \ 489 dst[0] = (Uint8)(d >> 16); \ 490 dst[1] = (Uint8)(d >> 8); \ 491 dst[2] = (Uint8)(d); \ 492 } else { \ 493 dst[0] = (Uint8)d; \ 494 dst[1] = (Uint8)(d >> 8); \ 495 dst[2] = (Uint8)(d >> 16); \ 496 } \ 497 break; \ 498 case 4: \ 499 *(Uint32 *)dst = d; \ 500 break; \ 501 } \ 502 src += bpp; \ 503 dst += bpp; \ 504 } \ 505 } while(0) 506 507 #ifdef MMX_ASMBLIT 508 509 #define ALPHA_BLIT32_888_50MMX(to, from, length, bpp, alpha) \ 510 do { \ 511 Uint32 *srcp = (Uint32 *)(from); \ 512 Uint32 *dstp = (Uint32 *)(to); \ 513 int i = 0x00fefefe; \ 514 movd_m2r(*(&i), mm4); \ 515 punpckldq_r2r(mm4, mm4); \ 516 i = 0x00010101; \ 517 movd_m2r(*(&i), mm3); \ 518 punpckldq_r2r(mm3, mm3); \ 519 i = (int)(length); \ 520 if( i & 1 ) { \ 521 Uint32 s = *srcp++; \ 522 Uint32 d = *dstp; \ 523 *dstp++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \ 524 + (s & d & 0x00010101); \ 525 i--; \ 526 } \ 527 for(; i > 0; --i) { \ 528 movq_m2r((*dstp), mm2); /* dst -> mm2 */ \ 529 movq_r2r(mm2, mm6); /* dst -> mm6 */ \ 530 movq_m2r((*srcp), mm1); /* src -> mm1 */ \ 531 movq_r2r(mm1, mm5); /* src -> mm5 */ \ 532 pand_r2r(mm4, mm6); /* dst & 0x00fefefe -> mm6 */ \ 533 pand_r2r(mm4, mm5); /* src & 0x00fefefe -> mm5 */ \ 534 paddd_r2r(mm6, mm5); /* (dst & 0x00fefefe) + (dst & 0x00fefefe) -> mm5 */ \ 535 psrld_i2r(1, mm5); \ 536 pand_r2r(mm1, mm2); /* s & d -> mm2 */ \ 537 pand_r2r(mm3, mm2); /* s & d & 0x00010101 -> mm2 */ \ 538 paddd_r2r(mm5, mm2); \ 539 movq_r2m(mm2, (*dstp)); \ 540 dstp += 2; \ 541 srcp += 2; \ 542 i--; \ 543 } \ 544 emms(); \ 545 } while(0) 546 547 #endif 548 549 /* 550 * Special case: 50% alpha (alpha=128) 551 * This is treated specially because it can be optimized very well, and 552 * since it is good for many cases of semi-translucency. 553 * The theory is to do all three components at the same time: 554 * First zero the lowest bit of each component, which gives us room to 555 * add them. Then shift right and add the sum of the lowest bits. 556 */ 557 #define ALPHA_BLIT32_888_50(to, from, length, bpp, alpha) \ 558 do { \ 559 int i; \ 560 Uint32 *src = (Uint32 *)(from); \ 561 Uint32 *dst = (Uint32 *)(to); \ 562 for(i = 0; i < (int)(length); i++) { \ 563 Uint32 s = *src++; \ 564 Uint32 d = *dst; \ 565 *dst++ = (((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) \ 566 + (s & d & 0x00010101); \ 567 } \ 568 } while(0) 569 570 /* 571 * For 16bpp, we can actually blend two pixels in parallel, if we take 572 * care to shift before we add, not after. 573 */ 574 575 /* helper: blend a single 16 bit pixel at 50% */ 576 #define BLEND16_50(dst, src, mask) \ 577 do { \ 578 Uint32 s = *src++; \ 579 Uint32 d = *dst; \ 580 *dst++ = (Uint16)((((s & mask) + (d & mask)) >> 1) + \ 581 (s & d & (~mask & 0xffff))); \ 582 } while(0) 583 584 /* basic 16bpp blender. mask is the pixels to keep when adding. */ 585 #define ALPHA_BLIT16_50(to, from, length, bpp, alpha, mask) \ 586 do { \ 587 unsigned n = (length); \ 588 Uint16 *src = (Uint16 *)(from); \ 589 Uint16 *dst = (Uint16 *)(to); \ 590 if(((uintptr_t)src ^ (uintptr_t)dst) & 3) { \ 591 /* source and destination not in phase, blit one by one */ \ 592 while(n--) \ 593 BLEND16_50(dst, src, mask); \ 594 } else { \ 595 if((uintptr_t)src & 3) { \ 596 /* first odd pixel */ \ 597 BLEND16_50(dst, src, mask); \ 598 n--; \ 599 } \ 600 for(; n > 1; n -= 2) { \ 601 Uint32 s = *(Uint32 *)src; \ 602 Uint32 d = *(Uint32 *)dst; \ 603 *(Uint32 *)dst = ((s & (mask | mask << 16)) >> 1) \ 604 + ((d & (mask | mask << 16)) >> 1) \ 605 + (s & d & (~(mask | mask << 16))); \ 606 src += 2; \ 607 dst += 2; \ 608 } \ 609 if(n) \ 610 BLEND16_50(dst, src, mask); /* last odd pixel */ \ 611 } \ 612 } while(0) 613 614 #define ALPHA_BLIT16_565_50(to, from, length, bpp, alpha) \ 615 ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xf7de) 616 617 #define ALPHA_BLIT16_555_50(to, from, length, bpp, alpha) \ 618 ALPHA_BLIT16_50(to, from, length, bpp, alpha, 0xfbde) 619 620 #ifdef MMX_ASMBLIT 621 622 #define CHOOSE_BLIT(blitter, alpha, fmt) \ 623 do { \ 624 if(alpha == 255) { \ 625 switch(fmt->BytesPerPixel) { \ 626 case 1: blitter(1, Uint8, OPAQUE_BLIT); break; \ 627 case 2: blitter(2, Uint8, OPAQUE_BLIT); break; \ 628 case 3: blitter(3, Uint8, OPAQUE_BLIT); break; \ 629 case 4: blitter(4, Uint16, OPAQUE_BLIT); break; \ 630 } \ 631 } else { \ 632 switch(fmt->BytesPerPixel) { \ 633 case 1: \ 634 /* No 8bpp alpha blitting */ \ 635 break; \ 636 \ 637 case 2: \ 638 switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) { \ 639 case 0xffff: \ 640 if(fmt->Gmask == 0x07e0 \ 641 || fmt->Rmask == 0x07e0 \ 642 || fmt->Bmask == 0x07e0) { \ 643 if(alpha == 128) \ 644 blitter(2, Uint8, ALPHA_BLIT16_565_50); \ 645 else { \ 646 if(SDL_HasMMX()) \ 647 blitter(2, Uint8, ALPHA_BLIT16_565MMX); \ 648 else \ 649 blitter(2, Uint8, ALPHA_BLIT16_565); \ 650 } \ 651 } else \ 652 goto general16; \ 653 break; \ 654 \ 655 case 0x7fff: \ 656 if(fmt->Gmask == 0x03e0 \ 657 || fmt->Rmask == 0x03e0 \ 658 || fmt->Bmask == 0x03e0) { \ 659 if(alpha == 128) \ 660 blitter(2, Uint8, ALPHA_BLIT16_555_50); \ 661 else { \ 662 if(SDL_HasMMX()) \ 663 blitter(2, Uint8, ALPHA_BLIT16_555MMX); \ 664 else \ 665 blitter(2, Uint8, ALPHA_BLIT16_555); \ 666 } \ 667 break; \ 668 } \ 669 /* fallthrough */ \ 670 \ 671 default: \ 672 general16: \ 673 blitter(2, Uint8, ALPHA_BLIT_ANY); \ 674 } \ 675 break; \ 676 \ 677 case 3: \ 678 blitter(3, Uint8, ALPHA_BLIT_ANY); \ 679 break; \ 680 \ 681 case 4: \ 682 if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \ 683 && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \ 684 || fmt->Bmask == 0xff00)) { \ 685 if(alpha == 128) \ 686 { \ 687 if(SDL_HasMMX()) \ 688 blitter(4, Uint16, ALPHA_BLIT32_888_50MMX);\ 689 else \ 690 blitter(4, Uint16, ALPHA_BLIT32_888_50);\ 691 } \ 692 else \ 693 { \ 694 if(SDL_HasMMX()) \ 695 blitter(4, Uint16, ALPHA_BLIT32_888MMX);\ 696 else \ 697 blitter(4, Uint16, ALPHA_BLIT32_888); \ 698 } \ 699 } else \ 700 blitter(4, Uint16, ALPHA_BLIT_ANY); \ 701 break; \ 702 } \ 703 } \ 704 } while(0) 705 706 #else 707 708 #define CHOOSE_BLIT(blitter, alpha, fmt) \ 709 do { \ 710 if(alpha == 255) { \ 711 switch(fmt->BytesPerPixel) { \ 712 case 1: blitter(1, Uint8, OPAQUE_BLIT); break; \ 713 case 2: blitter(2, Uint8, OPAQUE_BLIT); break; \ 714 case 3: blitter(3, Uint8, OPAQUE_BLIT); break; \ 715 case 4: blitter(4, Uint16, OPAQUE_BLIT); break; \ 716 } \ 717 } else { \ 718 switch(fmt->BytesPerPixel) { \ 719 case 1: \ 720 /* No 8bpp alpha blitting */ \ 721 break; \ 722 \ 723 case 2: \ 724 switch(fmt->Rmask | fmt->Gmask | fmt->Bmask) { \ 725 case 0xffff: \ 726 if(fmt->Gmask == 0x07e0 \ 727 || fmt->Rmask == 0x07e0 \ 728 || fmt->Bmask == 0x07e0) { \ 729 if(alpha == 128) \ 730 blitter(2, Uint8, ALPHA_BLIT16_565_50); \ 731 else { \ 732 blitter(2, Uint8, ALPHA_BLIT16_565); \ 733 } \ 734 } else \ 735 goto general16; \ 736 break; \ 737 \ 738 case 0x7fff: \ 739 if(fmt->Gmask == 0x03e0 \ 740 || fmt->Rmask == 0x03e0 \ 741 || fmt->Bmask == 0x03e0) { \ 742 if(alpha == 128) \ 743 blitter(2, Uint8, ALPHA_BLIT16_555_50); \ 744 else { \ 745 blitter(2, Uint8, ALPHA_BLIT16_555); \ 746 } \ 747 break; \ 748 } \ 749 /* fallthrough */ \ 750 \ 751 default: \ 752 general16: \ 753 blitter(2, Uint8, ALPHA_BLIT_ANY); \ 754 } \ 755 break; \ 756 \ 757 case 3: \ 758 blitter(3, Uint8, ALPHA_BLIT_ANY); \ 759 break; \ 760 \ 761 case 4: \ 762 if((fmt->Rmask | fmt->Gmask | fmt->Bmask) == 0x00ffffff \ 763 && (fmt->Gmask == 0xff00 || fmt->Rmask == 0xff00 \ 764 || fmt->Bmask == 0xff00)) { \ 765 if(alpha == 128) \ 766 blitter(4, Uint16, ALPHA_BLIT32_888_50); \ 767 else \ 768 blitter(4, Uint16, ALPHA_BLIT32_888); \ 769 } else \ 770 blitter(4, Uint16, ALPHA_BLIT_ANY); \ 771 break; \ 772 } \ 773 } \ 774 } while(0) 775 776 #endif 777 778 /* 779 * This takes care of the case when the surface is clipped on the left and/or 780 * right. Top clipping has already been taken care of. 781 */ 782 static void RLEClipBlit(int w, Uint8 *srcbuf, SDL_Surface *dst, 783 Uint8 *dstbuf, SDL_Rect *srcrect, unsigned alpha) 784 { 785 SDL_PixelFormat *fmt = dst->format; 786 787 #define RLECLIPBLIT(bpp, Type, do_blit) \ 788 do { \ 789 int linecount = srcrect->h; \ 790 int ofs = 0; \ 791 int left = srcrect->x; \ 792 int right = left + srcrect->w; \ 793 dstbuf -= left * bpp; \ 794 for(;;) { \ 795 int run; \ 796 ofs += *(Type *)srcbuf; \ 797 run = ((Type *)srcbuf)[1]; \ 798 srcbuf += 2 * sizeof(Type); \ 799 if(run) { \ 800 /* clip to left and right borders */ \ 801 if(ofs < right) { \ 802 int start = 0; \ 803 int len = run; \ 804 int startcol; \ 805 if(left - ofs > 0) { \ 806 start = left - ofs; \ 807 len -= start; \ 808 if(len <= 0) \ 809 goto nocopy ## bpp ## do_blit; \ 810 } \ 811 startcol = ofs + start; \ 812 if(len > right - startcol) \ 813 len = right - startcol; \ 814 do_blit(dstbuf + startcol * bpp, srcbuf + start * bpp, \ 815 len, bpp, alpha); \ 816 } \ 817 nocopy ## bpp ## do_blit: \ 818 srcbuf += run * bpp; \ 819 ofs += run; \ 820 } else if(!ofs) \ 821 break; \ 822 if(ofs == w) { \ 823 ofs = 0; \ 824 dstbuf += dst->pitch; \ 825 if(!--linecount) \ 826 break; \ 827 } \ 828 } \ 829 } while(0) 830 831 CHOOSE_BLIT(RLECLIPBLIT, alpha, fmt); 832 833 #undef RLECLIPBLIT 834 835 } 836 837 838 /* blit a colorkeyed RLE surface */ 839 int SDL_RLEBlit(SDL_Surface *src, SDL_Rect *srcrect, 840 SDL_Surface *dst, SDL_Rect *dstrect) 841 { 842 Uint8 *dstbuf; 843 Uint8 *srcbuf; 844 int x, y; 845 int w = src->w; 846 unsigned alpha; 847 848 /* Lock the destination if necessary */ 849 if ( SDL_MUSTLOCK(dst) ) { 850 if ( SDL_LockSurface(dst) < 0 ) { 851 return(-1); 852 } 853 } 854 855 /* Set up the source and destination pointers */ 856 x = dstrect->x; 857 y = dstrect->y; 858 dstbuf = (Uint8 *)dst->pixels 859 + y * dst->pitch + x * src->format->BytesPerPixel; 860 srcbuf = (Uint8 *)src->map->sw_data->aux_data; 861 862 { 863 /* skip lines at the top if neccessary */ 864 int vskip = srcrect->y; 865 int ofs = 0; 866 if(vskip) { 867 868 #define RLESKIP(bpp, Type) \ 869 for(;;) { \ 870 int run; \ 871 ofs += *(Type *)srcbuf; \ 872 run = ((Type *)srcbuf)[1]; \ 873 srcbuf += sizeof(Type) * 2; \ 874 if(run) { \ 875 srcbuf += run * bpp; \ 876 ofs += run; \ 877 } else if(!ofs) \ 878 goto done; \ 879 if(ofs == w) { \ 880 ofs = 0; \ 881 if(!--vskip) \ 882 break; \ 883 } \ 884 } 885 886 switch(src->format->BytesPerPixel) { 887 case 1: RLESKIP(1, Uint8); break; 888 case 2: RLESKIP(2, Uint8); break; 889 case 3: RLESKIP(3, Uint8); break; 890 case 4: RLESKIP(4, Uint16); break; 891 } 892 893 #undef RLESKIP 894 895 } 896 } 897 898 alpha = (src->flags & SDL_SRCALPHA) == SDL_SRCALPHA 899 ? src->format->alpha : 255; 900 /* if left or right edge clipping needed, call clip blit */ 901 if ( srcrect->x || srcrect->w != src->w ) { 902 RLEClipBlit(w, srcbuf, dst, dstbuf, srcrect, alpha); 903 } else { 904 SDL_PixelFormat *fmt = src->format; 905 906 #define RLEBLIT(bpp, Type, do_blit) \ 907 do { \ 908 int linecount = srcrect->h; \ 909 int ofs = 0; \ 910 for(;;) { \ 911 unsigned run; \ 912 ofs += *(Type *)srcbuf; \ 913 run = ((Type *)srcbuf)[1]; \ 914 srcbuf += 2 * sizeof(Type); \ 915 if(run) { \ 916 do_blit(dstbuf + ofs * bpp, srcbuf, run, bpp, alpha); \ 917 srcbuf += run * bpp; \ 918 ofs += run; \ 919 } else if(!ofs) \ 920 break; \ 921 if(ofs == w) { \ 922 ofs = 0; \ 923 dstbuf += dst->pitch; \ 924 if(!--linecount) \ 925 break; \ 926 } \ 927 } \ 928 } while(0) 929 930 CHOOSE_BLIT(RLEBLIT, alpha, fmt); 931 932 #undef RLEBLIT 933 } 934 935 done: 936 /* Unlock the destination if necessary */ 937 if ( SDL_MUSTLOCK(dst) ) { 938 SDL_UnlockSurface(dst); 939 } 940 return(0); 941 } 942 943 #undef OPAQUE_BLIT 944 945 /* 946 * Per-pixel blitting macros for translucent pixels: 947 * These use the same techniques as the per-surface blitting macros 948 */ 949 950 /* 951 * For 32bpp pixels, we have made sure the alpha is stored in the top 952 * 8 bits, so proceed as usual 953 */ 954 #define BLIT_TRANSL_888(src, dst) \ 955 do { \ 956 Uint32 s = src; \ 957 Uint32 d = dst; \ 958 unsigned alpha = s >> 24; \ 959 Uint32 s1 = s & 0xff00ff; \ 960 Uint32 d1 = d & 0xff00ff; \ 961 d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \ 962 s &= 0xff00; \ 963 d &= 0xff00; \ 964 d = (d + ((s - d) * alpha >> 8)) & 0xff00; \ 965 dst = d1 | d; \ 966 } while(0) 967 968 /* 969 * For 16bpp pixels, we have stored the 5 most significant alpha bits in 970 * bits 5-10. As before, we can process all 3 RGB components at the same time. 971 */ 972 #define BLIT_TRANSL_565(src, dst) \ 973 do { \ 974 Uint32 s = src; \ 975 Uint32 d = dst; \ 976 unsigned alpha = (s & 0x3e0) >> 5; \ 977 s &= 0x07e0f81f; \ 978 d = (d | d << 16) & 0x07e0f81f; \ 979 d += (s - d) * alpha >> 5; \ 980 d &= 0x07e0f81f; \ 981 dst = (Uint16)(d | d >> 16); \ 982 } while(0) 983 984 #define BLIT_TRANSL_555(src, dst) \ 985 do { \ 986 Uint32 s = src; \ 987 Uint32 d = dst; \ 988 unsigned alpha = (s & 0x3e0) >> 5; \ 989 s &= 0x03e07c1f; \ 990 d = (d | d << 16) & 0x03e07c1f; \ 991 d += (s - d) * alpha >> 5; \ 992 d &= 0x03e07c1f; \ 993 dst = (Uint16)(d | d >> 16); \ 994 } while(0) 995 996 /* used to save the destination format in the encoding. Designed to be 997 macro-compatible with SDL_PixelFormat but without the unneeded fields */ 998 typedef struct { 999 Uint8 BytesPerPixel; 1000 Uint8 Rloss; 1001 Uint8 Gloss; 1002 Uint8 Bloss; 1003 Uint8 Rshift; 1004 Uint8 Gshift; 1005 Uint8 Bshift; 1006 Uint8 Ashift; 1007 Uint32 Rmask; 1008 Uint32 Gmask; 1009 Uint32 Bmask; 1010 Uint32 Amask; 1011 } RLEDestFormat; 1012 1013 /* blit a pixel-alpha RLE surface clipped at the right and/or left edges */ 1014 static void RLEAlphaClipBlit(int w, Uint8 *srcbuf, SDL_Surface *dst, 1015 Uint8 *dstbuf, SDL_Rect *srcrect) 1016 { 1017 SDL_PixelFormat *df = dst->format; 1018 /* 1019 * clipped blitter: Ptype is the destination pixel type, 1020 * Ctype the translucent count type, and do_blend the macro 1021 * to blend one pixel. 1022 */ 1023 #define RLEALPHACLIPBLIT(Ptype, Ctype, do_blend) \ 1024 do { \ 1025 int linecount = srcrect->h; \ 1026 int left = srcrect->x; \ 1027 int right = left + srcrect->w; \ 1028 dstbuf -= left * sizeof(Ptype); \ 1029 do { \ 1030 int ofs = 0; \ 1031 /* blit opaque pixels on one line */ \ 1032 do { \ 1033 unsigned run; \ 1034 ofs += ((Ctype *)srcbuf)[0]; \ 1035 run = ((Ctype *)srcbuf)[1]; \ 1036 srcbuf += 2 * sizeof(Ctype); \ 1037 if(run) { \ 1038 /* clip to left and right borders */ \ 1039 int cofs = ofs; \ 1040 int crun = run; \ 1041 if(left - cofs > 0) { \ 1042 crun -= left - cofs; \ 1043 cofs = left; \ 1044 } \ 1045 if(crun > right - cofs) \ 1046 crun = right - cofs; \ 1047 if(crun > 0) \ 1048 PIXEL_COPY(dstbuf + cofs * sizeof(Ptype), \ 1049 srcbuf + (cofs - ofs) * sizeof(Ptype), \ 1050 (unsigned)crun, sizeof(Ptype)); \ 1051 srcbuf += run * sizeof(Ptype); \ 1052 ofs += run; \ 1053 } else if(!ofs) \ 1054 return; \ 1055 } while(ofs < w); \ 1056 /* skip padding if necessary */ \ 1057 if(sizeof(Ptype) == 2) \ 1058 srcbuf += (uintptr_t)srcbuf & 2; \ 1059 /* blit translucent pixels on the same line */ \ 1060 ofs = 0; \ 1061 do { \ 1062 unsigned run; \ 1063 ofs += ((Uint16 *)srcbuf)[0]; \ 1064 run = ((Uint16 *)srcbuf)[1]; \ 1065 srcbuf += 4; \ 1066 if(run) { \ 1067 /* clip to left and right borders */ \ 1068 int cofs = ofs; \ 1069 int crun = run; \ 1070 if(left - cofs > 0) { \ 1071 crun -= left - cofs; \ 1072 cofs = left; \ 1073 } \ 1074 if(crun > right - cofs) \ 1075 crun = right - cofs; \ 1076 if(crun > 0) { \ 1077 Ptype *dst = (Ptype *)dstbuf + cofs; \ 1078 Uint32 *src = (Uint32 *)srcbuf + (cofs - ofs); \ 1079 int i; \ 1080 for(i = 0; i < crun; i++) \ 1081 do_blend(src[i], dst[i]); \ 1082 } \ 1083 srcbuf += run * 4; \ 1084 ofs += run; \ 1085 } \ 1086 } while(ofs < w); \ 1087 dstbuf += dst->pitch; \ 1088 } while(--linecount); \ 1089 } while(0) 1090 1091 switch(df->BytesPerPixel) { 1092 case 2: 1093 if(df->Gmask == 0x07e0 || df->Rmask == 0x07e0 1094 || df->Bmask == 0x07e0) 1095 RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_565); 1096 else 1097 RLEALPHACLIPBLIT(Uint16, Uint8, BLIT_TRANSL_555); 1098 break; 1099 case 4: 1100 RLEALPHACLIPBLIT(Uint32, Uint16, BLIT_TRANSL_888); 1101 break; 1102 } 1103 } 1104 1105 /* blit a pixel-alpha RLE surface */ 1106 int SDL_RLEAlphaBlit(SDL_Surface *src, SDL_Rect *srcrect, 1107 SDL_Surface *dst, SDL_Rect *dstrect) 1108 { 1109 int x, y; 1110 int w = src->w; 1111 Uint8 *srcbuf, *dstbuf; 1112 SDL_PixelFormat *df = dst->format; 1113 1114 /* Lock the destination if necessary */ 1115 if ( SDL_MUSTLOCK(dst) ) { 1116 if ( SDL_LockSurface(dst) < 0 ) { 1117 return -1; 1118 } 1119 } 1120 1121 x = dstrect->x; 1122 y = dstrect->y; 1123 dstbuf = (Uint8 *)dst->pixels 1124 + y * dst->pitch + x * df->BytesPerPixel; 1125 srcbuf = (Uint8 *)src->map->sw_data->aux_data + sizeof(RLEDestFormat); 1126 1127 { 1128 /* skip lines at the top if necessary */ 1129 int vskip = srcrect->y; 1130 if(vskip) { 1131 int ofs; 1132 if(df->BytesPerPixel == 2) { 1133 /* the 16/32 interleaved format */ 1134 do { 1135 /* skip opaque line */ 1136 ofs = 0; 1137 do { 1138 int run; 1139 ofs += srcbuf[0]; 1140 run = srcbuf[1]; 1141 srcbuf += 2; 1142 if(run) { 1143 srcbuf += 2 * run; 1144 ofs += run; 1145 } else if(!ofs) 1146 goto done; 1147 } while(ofs < w); 1148 1149 /* skip padding */ 1150 srcbuf += (uintptr_t)srcbuf & 2; 1151 1152 /* skip translucent line */ 1153 ofs = 0; 1154 do { 1155 int run; 1156 ofs += ((Uint16 *)srcbuf)[0]; 1157 run = ((Uint16 *)srcbuf)[1]; 1158 srcbuf += 4 * (run + 1); 1159 ofs += run; 1160 } while(ofs < w); 1161 } while(--vskip); 1162 } else { 1163 /* the 32/32 interleaved format */ 1164 vskip <<= 1; /* opaque and translucent have same format */ 1165 do { 1166 ofs = 0; 1167 do { 1168 int run; 1169 ofs += ((Uint16 *)srcbuf)[0]; 1170 run = ((Uint16 *)srcbuf)[1]; 1171 srcbuf += 4; 1172 if(run) { 1173 srcbuf += 4 * run; 1174 ofs += run; 1175 } else if(!ofs) 1176 goto done; 1177 } while(ofs < w); 1178 } while(--vskip); 1179 } 1180 } 1181 } 1182 1183 /* if left or right edge clipping needed, call clip blit */ 1184 if(srcrect->x || srcrect->w != src->w) { 1185 RLEAlphaClipBlit(w, srcbuf, dst, dstbuf, srcrect); 1186 } else { 1187 1188 /* 1189 * non-clipped blitter. Ptype is the destination pixel type, 1190 * Ctype the translucent count type, and do_blend the 1191 * macro to blend one pixel. 1192 */ 1193 #define RLEALPHABLIT(Ptype, Ctype, do_blend) \ 1194 do { \ 1195 int linecount = srcrect->h; \ 1196 do { \ 1197 int ofs = 0; \ 1198 /* blit opaque pixels on one line */ \ 1199 do { \ 1200 unsigned run; \ 1201 ofs += ((Ctype *)srcbuf)[0]; \ 1202 run = ((Ctype *)srcbuf)[1]; \ 1203 srcbuf += 2 * sizeof(Ctype); \ 1204 if(run) { \ 1205 PIXEL_COPY(dstbuf + ofs * sizeof(Ptype), srcbuf, \ 1206 run, sizeof(Ptype)); \ 1207 srcbuf += run * sizeof(Ptype); \ 1208 ofs += run; \ 1209 } else if(!ofs) \ 1210 goto done; \ 1211 } while(ofs < w); \ 1212 /* skip padding if necessary */ \ 1213 if(sizeof(Ptype) == 2) \ 1214 srcbuf += (uintptr_t)srcbuf & 2; \ 1215 /* blit translucent pixels on the same line */ \ 1216 ofs = 0; \ 1217 do { \ 1218 unsigned run; \ 1219 ofs += ((Uint16 *)srcbuf)[0]; \ 1220 run = ((Uint16 *)srcbuf)[1]; \ 1221 srcbuf += 4; \ 1222 if(run) { \ 1223 Ptype *dst = (Ptype *)dstbuf + ofs; \ 1224 unsigned i; \ 1225 for(i = 0; i < run; i++) { \ 1226 Uint32 src = *(Uint32 *)srcbuf; \ 1227 do_blend(src, *dst); \ 1228 srcbuf += 4; \ 1229 dst++; \ 1230 } \ 1231 ofs += run; \ 1232 } \ 1233 } while(ofs < w); \ 1234 dstbuf += dst->pitch; \ 1235 } while(--linecount); \ 1236 } while(0) 1237 1238 switch(df->BytesPerPixel) { 1239 case 2: 1240 if(df->Gmask == 0x07e0 || df->Rmask == 0x07e0 1241 || df->Bmask == 0x07e0) 1242 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_565); 1243 else 1244 RLEALPHABLIT(Uint16, Uint8, BLIT_TRANSL_555); 1245 break; 1246 case 4: 1247 RLEALPHABLIT(Uint32, Uint16, BLIT_TRANSL_888); 1248 break; 1249 } 1250 } 1251 1252 done: 1253 /* Unlock the destination if necessary */ 1254 if ( SDL_MUSTLOCK(dst) ) { 1255 SDL_UnlockSurface(dst); 1256 } 1257 return 0; 1258 } 1259 1260 /* 1261 * Auxiliary functions: 1262 * The encoding functions take 32bpp rgb + a, and 1263 * return the number of bytes copied to the destination. 1264 * The decoding functions copy to 32bpp rgb + a, and 1265 * return the number of bytes copied from the source. 1266 * These are only used in the encoder and un-RLE code and are therefore not 1267 * highly optimised. 1268 */ 1269 1270 /* encode 32bpp rgb + a into 16bpp rgb, losing alpha */ 1271 static int copy_opaque_16(void *dst, Uint32 *src, int n, 1272 SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt) 1273 { 1274 int i; 1275 Uint16 *d = dst; 1276 for(i = 0; i < n; i++) { 1277 unsigned r, g, b; 1278 RGB_FROM_PIXEL(*src, sfmt, r, g, b); 1279 PIXEL_FROM_RGB(*d, dfmt, r, g, b); 1280 src++; 1281 d++; 1282 } 1283 return n * 2; 1284 } 1285 1286 /* decode opaque pixels from 16bpp to 32bpp rgb + a */ 1287 static int uncopy_opaque_16(Uint32 *dst, void *src, int n, 1288 RLEDestFormat *sfmt, SDL_PixelFormat *dfmt) 1289 { 1290 int i; 1291 Uint16 *s = src; 1292 unsigned alpha = dfmt->Amask ? 255 : 0; 1293 for(i = 0; i < n; i++) { 1294 unsigned r, g, b; 1295 RGB_FROM_PIXEL(*s, sfmt, r, g, b); 1296 PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, alpha); 1297 s++; 1298 dst++; 1299 } 1300 return n * 2; 1301 } 1302 1303 1304 1305 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 565 */ 1306 static int copy_transl_565(void *dst, Uint32 *src, int n, 1307 SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt) 1308 { 1309 int i; 1310 Uint32 *d = dst; 1311 for(i = 0; i < n; i++) { 1312 unsigned r, g, b, a; 1313 Uint16 pix; 1314 RGBA_FROM_8888(*src, sfmt, r, g, b, a); 1315 PIXEL_FROM_RGB(pix, dfmt, r, g, b); 1316 *d = ((pix & 0x7e0) << 16) | (pix & 0xf81f) | ((a << 2) & 0x7e0); 1317 src++; 1318 d++; 1319 } 1320 return n * 4; 1321 } 1322 1323 /* encode 32bpp rgb + a into 32bpp G0RAB format for blitting into 555 */ 1324 static int copy_transl_555(void *dst, Uint32 *src, int n, 1325 SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt) 1326 { 1327 int i; 1328 Uint32 *d = dst; 1329 for(i = 0; i < n; i++) { 1330 unsigned r, g, b, a; 1331 Uint16 pix; 1332 RGBA_FROM_8888(*src, sfmt, r, g, b, a); 1333 PIXEL_FROM_RGB(pix, dfmt, r, g, b); 1334 *d = ((pix & 0x3e0) << 16) | (pix & 0xfc1f) | ((a << 2) & 0x3e0); 1335 src++; 1336 d++; 1337 } 1338 return n * 4; 1339 } 1340 1341 /* decode translucent pixels from 32bpp GORAB to 32bpp rgb + a */ 1342 static int uncopy_transl_16(Uint32 *dst, void *src, int n, 1343 RLEDestFormat *sfmt, SDL_PixelFormat *dfmt) 1344 { 1345 int i; 1346 Uint32 *s = src; 1347 for(i = 0; i < n; i++) { 1348 unsigned r, g, b, a; 1349 Uint32 pix = *s++; 1350 a = (pix & 0x3e0) >> 2; 1351 pix = (pix & ~0x3e0) | pix >> 16; 1352 RGB_FROM_PIXEL(pix, sfmt, r, g, b); 1353 PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a); 1354 dst++; 1355 } 1356 return n * 4; 1357 } 1358 1359 /* encode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */ 1360 static int copy_32(void *dst, Uint32 *src, int n, 1361 SDL_PixelFormat *sfmt, SDL_PixelFormat *dfmt) 1362 { 1363 int i; 1364 Uint32 *d = dst; 1365 for(i = 0; i < n; i++) { 1366 unsigned r, g, b, a; 1367 Uint32 pixel; 1368 RGBA_FROM_8888(*src, sfmt, r, g, b, a); 1369 PIXEL_FROM_RGB(pixel, dfmt, r, g, b); 1370 *d++ = pixel | a << 24; 1371 src++; 1372 } 1373 return n * 4; 1374 } 1375 1376 /* decode 32bpp rgba into 32bpp rgba, keeping alpha (dual purpose) */ 1377 static int uncopy_32(Uint32 *dst, void *src, int n, 1378 RLEDestFormat *sfmt, SDL_PixelFormat *dfmt) 1379 { 1380 int i; 1381 Uint32 *s = src; 1382 for(i = 0; i < n; i++) { 1383 unsigned r, g, b, a; 1384 Uint32 pixel = *s++; 1385 RGB_FROM_PIXEL(pixel, sfmt, r, g, b); 1386 a = pixel >> 24; 1387 PIXEL_FROM_RGBA(*dst, dfmt, r, g, b, a); 1388 dst++; 1389 } 1390 return n * 4; 1391 } 1392 1393 #define ISOPAQUE(pixel, fmt) ((((pixel) & fmt->Amask) >> fmt->Ashift) == 255) 1394 1395 #define ISTRANSL(pixel, fmt) \ 1396 ((unsigned)((((pixel) & fmt->Amask) >> fmt->Ashift) - 1U) < 254U) 1397 1398 /* convert surface to be quickly alpha-blittable onto dest, if possible */ 1399 static int RLEAlphaSurface(SDL_Surface *surface) 1400 { 1401 SDL_Surface *dest; 1402 SDL_PixelFormat *df; 1403 int maxsize = 0; 1404 int max_opaque_run; 1405 int max_transl_run = 65535; 1406 unsigned masksum; 1407 Uint8 *rlebuf, *dst; 1408 int (*copy_opaque)(void *, Uint32 *, int, 1409 SDL_PixelFormat *, SDL_PixelFormat *); 1410 int (*copy_transl)(void *, Uint32 *, int, 1411 SDL_PixelFormat *, SDL_PixelFormat *); 1412 1413 dest = surface->map->dst; 1414 if(!dest) 1415 return -1; 1416 df = dest->format; 1417 if(surface->format->BitsPerPixel != 32) 1418 return -1; /* only 32bpp source supported */ 1419 1420 /* find out whether the destination is one we support, 1421 and determine the max size of the encoded result */ 1422 masksum = df->Rmask | df->Gmask | df->Bmask; 1423 switch(df->BytesPerPixel) { 1424 case 2: 1425 /* 16bpp: only support 565 and 555 formats */ 1426 switch(masksum) { 1427 case 0xffff: 1428 if(df->Gmask == 0x07e0 1429 || df->Rmask == 0x07e0 || df->Bmask == 0x07e0) { 1430 copy_opaque = copy_opaque_16; 1431 copy_transl = copy_transl_565; 1432 } else 1433 return -1; 1434 break; 1435 case 0x7fff: 1436 if(df->Gmask == 0x03e0 1437 || df->Rmask == 0x03e0 || df->Bmask == 0x03e0) { 1438 copy_opaque = copy_opaque_16; 1439 copy_transl = copy_transl_555; 1440 } else 1441 return -1; 1442 break; 1443 default: 1444 return -1; 1445 } 1446 max_opaque_run = 255; /* runs stored as bytes */ 1447 1448 /* worst case is alternating opaque and translucent pixels, 1449 with room for alignment padding between lines */ 1450 maxsize = surface->h * (2 + (4 + 2) * (surface->w + 1)) + 2; 1451 break; 1452 case 4: 1453 if(masksum != 0x00ffffff) 1454 return -1; /* requires unused high byte */ 1455 copy_opaque = copy_32; 1456 copy_transl = copy_32; 1457 max_opaque_run = 255; /* runs stored as short ints */ 1458 1459 /* worst case is alternating opaque and translucent pixels */ 1460 maxsize = surface->h * 2 * 4 * (surface->w + 1) + 4; 1461 break; 1462 default: 1463 return -1; /* anything else unsupported right now */ 1464 } 1465 1466 maxsize += sizeof(RLEDestFormat); 1467 rlebuf = (Uint8 *)SDL_malloc(maxsize); 1468 if(!rlebuf) { 1469 SDL_OutOfMemory(); 1470 return -1; 1471 } 1472 { 1473 /* save the destination format so we can undo the encoding later */ 1474 RLEDestFormat *r = (RLEDestFormat *)rlebuf; 1475 r->BytesPerPixel = df->BytesPerPixel; 1476 r->Rloss = df->Rloss; 1477 r->Gloss = df->Gloss; 1478 r->Bloss = df->Bloss; 1479 r->Rshift = df->Rshift; 1480 r->Gshift = df->Gshift; 1481 r->Bshift = df->Bshift; 1482 r->Ashift = df->Ashift; 1483 r->Rmask = df->Rmask; 1484 r->Gmask = df->Gmask; 1485 r->Bmask = df->Bmask; 1486 r->Amask = df->Amask; 1487 } 1488 dst = rlebuf + sizeof(RLEDestFormat); 1489 1490 /* Do the actual encoding */ 1491 { 1492 int x, y; 1493 int h = surface->h, w = surface->w; 1494 SDL_PixelFormat *sf = surface->format; 1495 Uint32 *src = (Uint32 *)surface->pixels; 1496 Uint8 *lastline = dst; /* end of last non-blank line */ 1497 1498 /* opaque counts are 8 or 16 bits, depending on target depth */ 1499 #define ADD_OPAQUE_COUNTS(n, m) \ 1500 if(df->BytesPerPixel == 4) { \ 1501 ((Uint16 *)dst)[0] = n; \ 1502 ((Uint16 *)dst)[1] = m; \ 1503 dst += 4; \ 1504 } else { \ 1505 dst[0] = n; \ 1506 dst[1] = m; \ 1507 dst += 2; \ 1508 } 1509 1510 /* translucent counts are always 16 bit */ 1511 #define ADD_TRANSL_COUNTS(n, m) \ 1512 (((Uint16 *)dst)[0] = n, ((Uint16 *)dst)[1] = m, dst += 4) 1513 1514 for(y = 0; y < h; y++) { 1515 int runstart, skipstart; 1516 int blankline = 0; 1517 /* First encode all opaque pixels of a scan line */ 1518 x = 0; 1519 do { 1520 int run, skip, len; 1521 skipstart = x; 1522 while(x < w && !ISOPAQUE(src[x], sf)) 1523 x++; 1524 runstart = x; 1525 while(x < w && ISOPAQUE(src[x], sf)) 1526 x++; 1527 skip = runstart - skipstart; 1528 if(skip == w) 1529 blankline = 1; 1530 run = x - runstart; 1531 while(skip > max_opaque_run) { 1532 ADD_OPAQUE_COUNTS(max_opaque_run, 0); 1533 skip -= max_opaque_run; 1534 } 1535 len = MIN(run, max_opaque_run); 1536 ADD_OPAQUE_COUNTS(skip, len); 1537 dst += copy_opaque(dst, src + runstart, len, sf, df); 1538 runstart += len; 1539 run -= len; 1540 while(run) { 1541 len = MIN(run, max_opaque_run); 1542 ADD_OPAQUE_COUNTS(0, len); 1543 dst += copy_opaque(dst, src + runstart, len, sf, df); 1544 runstart += len; 1545 run -= len; 1546 } 1547 } while(x < w); 1548 1549 /* Make sure the next output address is 32-bit aligned */ 1550 dst += (uintptr_t)dst & 2; 1551 1552 /* Next, encode all translucent pixels of the same scan line */ 1553 x = 0; 1554 do { 1555 int run, skip, len; 1556 skipstart = x; 1557 while(x < w && !ISTRANSL(src[x], sf)) 1558 x++; 1559 runstart = x; 1560 while(x < w && ISTRANSL(src[x], sf)) 1561 x++; 1562 skip = runstart - skipstart; 1563 blankline &= (skip == w); 1564 run = x - runstart; 1565 while(skip > max_transl_run) { 1566 ADD_TRANSL_COUNTS(max_transl_run, 0); 1567 skip -= max_transl_run; 1568 } 1569 len = MIN(run, max_transl_run); 1570 ADD_TRANSL_COUNTS(skip, len); 1571 dst += copy_transl(dst, src + runstart, len, sf, df); 1572 runstart += len; 1573 run -= len; 1574 while(run) { 1575 len = MIN(run, max_transl_run); 1576 ADD_TRANSL_COUNTS(0, len); 1577 dst += copy_transl(dst, src + runstart, len, sf, df); 1578 runstart += len; 1579 run -= len; 1580 } 1581 if(!blankline) 1582 lastline = dst; 1583 } while(x < w); 1584 1585 src += surface->pitch >> 2; 1586 } 1587 dst = lastline; /* back up past trailing blank lines */ 1588 ADD_OPAQUE_COUNTS(0, 0); 1589 } 1590 1591 #undef ADD_OPAQUE_COUNTS 1592 #undef ADD_TRANSL_COUNTS 1593 1594 /* Now that we have it encoded, release the original pixels */ 1595 if((surface->flags & SDL_PREALLOC) != SDL_PREALLOC 1596 && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) { 1597 SDL_free( surface->pixels ); 1598 surface->pixels = NULL; 1599 } 1600 1601 /* realloc the buffer to release unused memory */ 1602 { 1603 Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf); 1604 if(!p) 1605 p = rlebuf; 1606 surface->map->sw_data->aux_data = p; 1607 } 1608 1609 return 0; 1610 } 1611 1612 static Uint32 getpix_8(Uint8 *srcbuf) 1613 { 1614 return *srcbuf; 1615 } 1616 1617 static Uint32 getpix_16(Uint8 *srcbuf) 1618 { 1619 return *(Uint16 *)srcbuf; 1620 } 1621 1622 static Uint32 getpix_24(Uint8 *srcbuf) 1623 { 1624 #if SDL_BYTEORDER == SDL_LIL_ENDIAN 1625 return srcbuf[0] + (srcbuf[1] << 8) + (srcbuf[2] << 16); 1626 #else 1627 return (srcbuf[0] << 16) + (srcbuf[1] << 8) + srcbuf[2]; 1628 #endif 1629 } 1630 1631 static Uint32 getpix_32(Uint8 *srcbuf) 1632 { 1633 return *(Uint32 *)srcbuf; 1634 } 1635 1636 typedef Uint32 (*getpix_func)(Uint8 *); 1637 1638 static getpix_func getpixes[4] = { 1639 getpix_8, getpix_16, getpix_24, getpix_32 1640 }; 1641 1642 static int RLEColorkeySurface(SDL_Surface *surface) 1643 { 1644 Uint8 *rlebuf, *dst; 1645 int maxn; 1646 int y; 1647 Uint8 *srcbuf, *curbuf, *lastline; 1648 int maxsize = 0; 1649 int skip, run; 1650 int bpp = surface->format->BytesPerPixel; 1651 getpix_func getpix; 1652 Uint32 ckey, rgbmask; 1653 int w, h; 1654 1655 /* calculate the worst case size for the compressed surface */ 1656 switch(bpp) { 1657 case 1: 1658 /* worst case is alternating opaque and transparent pixels, 1659 starting with an opaque pixel */ 1660 maxsize = surface->h * 3 * (surface->w / 2 + 1) + 2; 1661 break; 1662 case 2: 1663 case 3: 1664 /* worst case is solid runs, at most 255 pixels wide */ 1665 maxsize = surface->h * (2 * (surface->w / 255 + 1) 1666 + surface->w * bpp) + 2; 1667 break; 1668 case 4: 1669 /* worst case is solid runs, at most 65535 pixels wide */ 1670 maxsize = surface->h * (4 * (surface->w / 65535 + 1) 1671 + surface->w * 4) + 4; 1672 break; 1673 } 1674 1675 rlebuf = (Uint8 *)SDL_malloc(maxsize); 1676 if ( rlebuf == NULL ) { 1677 SDL_OutOfMemory(); 1678 return(-1); 1679 } 1680 1681 /* Set up the conversion */ 1682 srcbuf = (Uint8 *)surface->pixels; 1683 curbuf = srcbuf; 1684 maxn = bpp == 4 ? 65535 : 255; 1685 skip = run = 0; 1686 dst = rlebuf; 1687 rgbmask = ~surface->format->Amask; 1688 ckey = surface->format->colorkey & rgbmask; 1689 lastline = dst; 1690 getpix = getpixes[bpp - 1]; 1691 w = surface->w; 1692 h = surface->h; 1693 1694 #define ADD_COUNTS(n, m) \ 1695 if(bpp == 4) { \ 1696 ((Uint16 *)dst)[0] = n; \ 1697 ((Uint16 *)dst)[1] = m; \ 1698 dst += 4; \ 1699 } else { \ 1700 dst[0] = n; \ 1701 dst[1] = m; \ 1702 dst += 2; \ 1703 } 1704 1705 for(y = 0; y < h; y++) { 1706 int x = 0; 1707 int blankline = 0; 1708 do { 1709 int run, skip, len; 1710 int runstart; 1711 int skipstart = x; 1712 1713 /* find run of transparent, then opaque pixels */ 1714 while(x < w && (getpix(srcbuf + x * bpp) & rgbmask) == ckey) 1715 x++; 1716 runstart = x; 1717 while(x < w && (getpix(srcbuf + x * bpp) & rgbmask) != ckey) 1718 x++; 1719 skip = runstart - skipstart; 1720 if(skip == w) 1721 blankline = 1; 1722 run = x - runstart; 1723 1724 /* encode segment */ 1725 while(skip > maxn) { 1726 ADD_COUNTS(maxn, 0); 1727 skip -= maxn; 1728 } 1729 len = MIN(run, maxn); 1730 ADD_COUNTS(skip, len); 1731 SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp); 1732 dst += len * bpp; 1733 run -= len; 1734 runstart += len; 1735 while(run) { 1736 len = MIN(run, maxn); 1737 ADD_COUNTS(0, len); 1738 SDL_memcpy(dst, srcbuf + runstart * bpp, len * bpp); 1739 dst += len * bpp; 1740 runstart += len; 1741 run -= len; 1742 } 1743 if(!blankline) 1744 lastline = dst; 1745 } while(x < w); 1746 1747 srcbuf += surface->pitch; 1748 } 1749 dst = lastline; /* back up bast trailing blank lines */ 1750 ADD_COUNTS(0, 0); 1751 1752 #undef ADD_COUNTS 1753 1754 /* Now that we have it encoded, release the original pixels */ 1755 if((surface->flags & SDL_PREALLOC) != SDL_PREALLOC 1756 && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) { 1757 SDL_free( surface->pixels ); 1758 surface->pixels = NULL; 1759 } 1760 1761 /* realloc the buffer to release unused memory */ 1762 { 1763 /* If realloc returns NULL, the original block is left intact */ 1764 Uint8 *p = SDL_realloc(rlebuf, dst - rlebuf); 1765 if(!p) 1766 p = rlebuf; 1767 surface->map->sw_data->aux_data = p; 1768 } 1769 1770 return(0); 1771 } 1772 1773 int SDL_RLESurface(SDL_Surface *surface) 1774 { 1775 int retcode; 1776 1777 /* Clear any previous RLE conversion */ 1778 if ( (surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL ) { 1779 SDL_UnRLESurface(surface, 1); 1780 } 1781 1782 /* We don't support RLE encoding of bitmaps */ 1783 if ( surface->format->BitsPerPixel < 8 ) { 1784 return(-1); 1785 } 1786 1787 /* Lock the surface if it's in hardware */ 1788 if ( SDL_MUSTLOCK(surface) ) { 1789 if ( SDL_LockSurface(surface) < 0 ) { 1790 return(-1); 1791 } 1792 } 1793 1794 /* Encode */ 1795 if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) { 1796 retcode = RLEColorkeySurface(surface); 1797 } else { 1798 if((surface->flags & SDL_SRCALPHA) == SDL_SRCALPHA 1799 && surface->format->Amask != 0) 1800 retcode = RLEAlphaSurface(surface); 1801 else 1802 retcode = -1; /* no RLE for per-surface alpha sans ckey */ 1803 } 1804 1805 /* Unlock the surface if it's in hardware */ 1806 if ( SDL_MUSTLOCK(surface) ) { 1807 SDL_UnlockSurface(surface); 1808 } 1809 1810 if(retcode < 0) 1811 return -1; 1812 1813 /* The surface is now accelerated */ 1814 surface->flags |= SDL_RLEACCEL; 1815 1816 return(0); 1817 } 1818 1819 /* 1820 * Un-RLE a surface with pixel alpha 1821 * This may not give back exactly the image before RLE-encoding; all 1822 * completely transparent pixels will be lost, and colour and alpha depth 1823 * may have been reduced (when encoding for 16bpp targets). 1824 */ 1825 static SDL_bool UnRLEAlpha(SDL_Surface *surface) 1826 { 1827 Uint8 *srcbuf; 1828 Uint32 *dst; 1829 SDL_PixelFormat *sf = surface->format; 1830 RLEDestFormat *df = surface->map->sw_data->aux_data; 1831 int (*uncopy_opaque)(Uint32 *, void *, int, 1832 RLEDestFormat *, SDL_PixelFormat *); 1833 int (*uncopy_transl)(Uint32 *, void *, int, 1834 RLEDestFormat *, SDL_PixelFormat *); 1835 int w = surface->w; 1836 int bpp = df->BytesPerPixel; 1837 1838 if(bpp == 2) { 1839 uncopy_opaque = uncopy_opaque_16; 1840 uncopy_transl = uncopy_transl_16; 1841 } else { 1842 uncopy_opaque = uncopy_transl = uncopy_32; 1843 } 1844 1845 surface->pixels = SDL_malloc(surface->h * surface->pitch); 1846 if ( !surface->pixels ) { 1847 return(SDL_FALSE); 1848 } 1849 /* fill background with transparent pixels */ 1850 SDL_memset(surface->pixels, 0, surface->h * surface->pitch); 1851 1852 dst = surface->pixels; 1853 srcbuf = (Uint8 *)(df + 1); 1854 for(;;) { 1855 /* copy opaque pixels */ 1856 int ofs = 0; 1857 do { 1858 unsigned run; 1859 if(bpp == 2) { 1860 ofs += srcbuf[0]; 1861 run = srcbuf[1]; 1862 srcbuf += 2; 1863 } else { 1864 ofs += ((Uint16 *)srcbuf)[0]; 1865 run = ((Uint16 *)srcbuf)[1]; 1866 srcbuf += 4; 1867 } 1868 if(run) { 1869 srcbuf += uncopy_opaque(dst + ofs, srcbuf, run, df, sf); 1870 ofs += run; 1871 } else if(!ofs) 1872 return(SDL_TRUE); 1873 } while(ofs < w); 1874 1875 /* skip padding if needed */ 1876 if(bpp == 2) 1877 srcbuf += (uintptr_t)srcbuf & 2; 1878 1879 /* copy translucent pixels */ 1880 ofs = 0; 1881 do { 1882 unsigned run; 1883 ofs += ((Uint16 *)srcbuf)[0]; 1884 run = ((Uint16 *)srcbuf)[1]; 1885 srcbuf += 4; 1886 if(run) { 1887 srcbuf += uncopy_transl(dst + ofs, srcbuf, run, df, sf); 1888 ofs += run; 1889 } 1890 } while(ofs < w); 1891 dst += surface->pitch >> 2; 1892 } 1893 /* Make the compiler happy */ 1894 return(SDL_TRUE); 1895 } 1896 1897 void SDL_UnRLESurface(SDL_Surface *surface, int recode) 1898 { 1899 if ( (surface->flags & SDL_RLEACCEL) == SDL_RLEACCEL ) { 1900 surface->flags &= ~SDL_RLEACCEL; 1901 1902 if(recode && (surface->flags & SDL_PREALLOC) != SDL_PREALLOC 1903 && (surface->flags & SDL_HWSURFACE) != SDL_HWSURFACE) { 1904 if((surface->flags & SDL_SRCCOLORKEY) == SDL_SRCCOLORKEY) { 1905 SDL_Rect full; 1906 unsigned alpha_flag; 1907 1908 /* re-create the original surface */ 1909 surface->pixels = SDL_malloc(surface->h * surface->pitch); 1910 if ( !surface->pixels ) { 1911 /* Oh crap... */ 1912 surface->flags |= SDL_RLEACCEL; 1913 return; 1914 } 1915 1916 /* fill it with the background colour */ 1917 SDL_FillRect(surface, NULL, surface->format->colorkey); 1918 1919 /* now render the encoded surface */ 1920 full.x = full.y = 0; 1921 full.w = surface->w; 1922 full.h = surface->h; 1923 alpha_flag = surface->flags & SDL_SRCALPHA; 1924 surface->flags &= ~SDL_SRCALPHA; /* opaque blit */ 1925 SDL_RLEBlit(surface, &full, surface, &full); 1926 surface->flags |= alpha_flag; 1927 } else { 1928 if ( !UnRLEAlpha(surface) ) { 1929 /* Oh crap... */ 1930 surface->flags |= SDL_RLEACCEL; 1931 return; 1932 } 1933 } 1934 } 1935 1936 if ( surface->map && surface->map->sw_data->aux_data ) { 1937 SDL_free(surface->map->sw_data->aux_data); 1938 surface->map->sw_data->aux_data = NULL; 1939 } 1940 } 1941 } 1942 1943 1944