Home | History | Annotate | Download | only in hermes
      1 ;
      2 ; x86 format converters for HERMES
      3 ; Some routines Copyright (c) 1998 Christian Nentwich (brn (a] eleet.mcb.at)
      4 ; This source code is licensed under the GNU LGPL
      5 ; 
      6 ; Please refer to the file COPYING.LIB contained in the distribution for
      7 ; licensing conditions		
      8 ;
      9 ; Most routines are (c) Glenn Fiedler (ptc (a] gaffer.org), used with permission
     10 ; 
     11 
     12 BITS 32
     13 
     14 %include "common.inc"
     15 
     16 SDL_FUNC _ConvertX86p32_32BGR888
     17 SDL_FUNC _ConvertX86p32_32RGBA888
     18 SDL_FUNC _ConvertX86p32_32BGRA888
     19 SDL_FUNC _ConvertX86p32_24RGB888	
     20 SDL_FUNC _ConvertX86p32_24BGR888
     21 SDL_FUNC _ConvertX86p32_16RGB565
     22 SDL_FUNC _ConvertX86p32_16BGR565
     23 SDL_FUNC _ConvertX86p32_16RGB555
     24 SDL_FUNC _ConvertX86p32_16BGR555
     25 SDL_FUNC _ConvertX86p32_8RGB332
     26 
     27 SECTION .text
     28 
     29 ;; _Convert_*
     30 ;; Paramters:	
     31 ;;   ESI = source 
     32 ;;   EDI = dest
     33 ;;   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
     34 ;; Destroys:
     35 ;;   EAX, EBX, EDX
     36 
     37 
     38 _ConvertX86p32_32BGR888:
     39 
     40     ; check short
     41     cmp ecx,BYTE 32
     42     ja .L3
     43 
     44 .L1 ; short loop
     45     mov edx,[esi]
     46     bswap edx
     47     ror edx,8
     48     mov [edi],edx
     49     add esi,BYTE 4
     50     add edi,BYTE 4
     51     dec ecx
     52     jnz .L1
     53 .L2
     54     retn
     55 
     56 .L3 ; save ebp
     57     push ebp
     58 
     59     ; unroll four times
     60     mov ebp,ecx
     61     shr ebp,2
     62     
     63     ; save count
     64     push ecx
     65 
     66 .L4     mov eax,[esi]
     67         mov ebx,[esi+4]
     68 
     69         bswap eax
     70 
     71         bswap ebx
     72 
     73         ror eax,8
     74         mov ecx,[esi+8]
     75 
     76         ror ebx,8
     77         mov edx,[esi+12]
     78 
     79         bswap ecx
     80 
     81         bswap edx
     82 
     83         ror ecx,8
     84         mov [edi+0],eax
     85 
     86         ror edx,8
     87         mov [edi+4],ebx
     88 
     89         mov [edi+8],ecx
     90         mov [edi+12],edx
     91 
     92         add esi,BYTE 16
     93         add edi,BYTE 16
     94 
     95         dec ebp
     96         jnz .L4                 
     97 
     98     ; check tail
     99     pop ecx
    100     and ecx,BYTE 11b
    101     jz .L6
    102 
    103 .L5 ; tail loop
    104     mov edx,[esi]
    105     bswap edx
    106     ror edx,8
    107     mov [edi],edx
    108     add esi,BYTE 4
    109     add edi,BYTE 4
    110     dec ecx
    111     jnz .L5
    112 
    113 .L6 pop ebp
    114     retn
    115 	
    116 
    117 	
    118 		
    119 _ConvertX86p32_32RGBA888:
    120 	
    121     ; check short
    122     cmp ecx,BYTE 32
    123     ja .L3
    124 
    125 .L1 ; short loop
    126     mov edx,[esi]
    127     rol edx,8
    128     mov [edi],edx
    129     add esi,BYTE 4
    130     add edi,BYTE 4
    131     dec ecx
    132     jnz .L1
    133 .L2
    134     retn
    135 
    136 .L3 ; save ebp
    137     push ebp
    138 
    139     ; unroll four times
    140     mov ebp,ecx
    141     shr ebp,2
    142     
    143     ; save count
    144     push ecx
    145 
    146 .L4     mov eax,[esi]
    147         mov ebx,[esi+4]
    148 
    149         rol eax,8
    150         mov ecx,[esi+8]
    151 
    152         rol ebx,8
    153         mov edx,[esi+12]
    154 
    155         rol ecx,8
    156         mov [edi+0],eax
    157 
    158         rol edx,8
    159         mov [edi+4],ebx
    160 
    161         mov [edi+8],ecx
    162         mov [edi+12],edx
    163 
    164         add esi,BYTE 16
    165         add edi,BYTE 16
    166 
    167         dec ebp
    168         jnz .L4                 
    169 
    170     ; check tail
    171     pop ecx
    172     and ecx,BYTE 11b
    173     jz .L6
    174 
    175 .L5 ; tail loop
    176     mov edx,[esi]
    177     rol edx,8
    178     mov [edi],edx
    179     add esi,BYTE 4
    180     add edi,BYTE 4
    181     dec ecx
    182     jnz .L5
    183 
    184 .L6 pop ebp
    185     retn
    186 
    187 	
    188 
    189 
    190 _ConvertX86p32_32BGRA888:
    191 
    192     ; check short
    193     cmp ecx,BYTE 32
    194     ja .L3
    195 
    196 .L1 ; short loop
    197     mov edx,[esi]
    198     bswap edx
    199     mov [edi],edx
    200     add esi,BYTE 4
    201     add edi,BYTE 4
    202     dec ecx
    203     jnz .L1
    204 .L2
    205     retn
    206 
    207 .L3 ; save ebp
    208     push ebp
    209 
    210     ; unroll four times
    211     mov ebp,ecx
    212     shr ebp,2
    213     
    214     ; save count
    215     push ecx
    216 
    217 .L4     mov eax,[esi]
    218         mov ebx,[esi+4]
    219 
    220         mov ecx,[esi+8]
    221         mov edx,[esi+12]
    222 
    223         bswap eax
    224 
    225         bswap ebx
    226 
    227         bswap ecx
    228 
    229         bswap edx
    230 
    231         mov [edi+0],eax
    232         mov [edi+4],ebx
    233 
    234         mov [edi+8],ecx
    235         mov [edi+12],edx
    236 
    237         add esi,BYTE 16
    238         add edi,BYTE 16
    239 
    240         dec ebp
    241         jnz .L4                 
    242 
    243     ; check tail
    244     pop ecx
    245     and ecx,BYTE 11b
    246     jz .L6
    247 
    248 .L5 ; tail loop
    249     mov edx,[esi]
    250     bswap edx
    251     mov [edi],edx
    252     add esi,BYTE 4
    253     add edi,BYTE 4
    254     dec ecx
    255     jnz .L5
    256 
    257 .L6 pop ebp
    258     retn
    259 
    260 
    261 	
    262 	
    263 ;; 32 bit RGB 888 to 24 BIT RGB 888
    264 
    265 _ConvertX86p32_24RGB888:
    266 
    267 	; check short
    268 	cmp ecx,BYTE 32
    269 	ja .L3
    270 
    271 .L1	; short loop
    272 	mov al,[esi]
    273 	mov bl,[esi+1]
    274 	mov dl,[esi+2]
    275 	mov [edi],al
    276 	mov [edi+1],bl
    277 	mov [edi+2],dl
    278 	add esi,BYTE 4
    279 	add edi,BYTE 3
    280 	dec ecx
    281 	jnz .L1
    282 .L2 
    283 	retn
    284 
    285 .L3	;	 head
    286 	mov edx,edi
    287 	and edx,BYTE 11b
    288 	jz .L4
    289 	mov al,[esi]
    290 	mov bl,[esi+1]
    291 	mov dl,[esi+2]
    292 	mov [edi],al
    293 	mov [edi+1],bl
    294 	mov [edi+2],dl
    295 	add esi,BYTE 4
    296 	add edi,BYTE 3
    297 	dec ecx
    298 	jmp SHORT .L3
    299 
    300 .L4 ; unroll 4 times
    301 	push ebp
    302 	mov ebp,ecx
    303 	shr ebp,2
    304 
    305     ; save count
    306 	push ecx
    307 
    308 .L5     mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
    309         mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
    310 
    311         shl eax,8                       ;                        eax = [R][G][B][.]
    312         mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
    313 
    314         shl ebx,8                       ;                        ebx = [r][g][b][.]
    315         mov al,[esi+4]                  ;                        eax = [R][G][B][b]
    316 
    317         ror eax,8                       ;                        eax = [b][R][G][B] (done)
    318         mov bh,[esi+8+1]                ;                        ebx = [r][g][G][.]
    319 
    320         mov [edi],eax
    321         add edi,BYTE 3*4
    322 
    323         shl ecx,8                       ;                        ecx = [r][g][b][.]
    324         mov bl,[esi+8+0]                ;                        ebx = [r][g][G][B]
    325 
    326         rol ebx,16                      ;                        ebx = [G][B][r][g] (done)
    327         mov cl,[esi+8+2]                ;                        ecx = [r][g][b][R] (done)
    328 
    329         mov [edi+4-3*4],ebx
    330         add esi,BYTE 4*4
    331         
    332         mov [edi+8-3*4],ecx
    333         dec ebp
    334 
    335         jnz .L5
    336 
    337     ; check tail
    338 	pop ecx
    339 	and ecx,BYTE 11b
    340 	jz .L7
    341 
    342 .L6 ; tail loop
    343 	mov al,[esi]
    344 	mov bl,[esi+1]
    345 	mov dl,[esi+2]
    346 	mov [edi],al
    347 	mov [edi+1],bl
    348 	mov [edi+2],dl
    349 	add esi,BYTE 4
    350 	add edi,BYTE 3
    351 	dec ecx
    352 	jnz .L6
    353 
    354 .L7	pop ebp
    355 	retn
    356 
    357 
    358 
    359 
    360 ;; 32 bit RGB 888 to 24 bit BGR 888
    361 
    362 _ConvertX86p32_24BGR888:
    363 
    364 	; check short
    365 	cmp ecx,BYTE 32
    366 	ja .L3
    367 
    368 	
    369 .L1	; short loop
    370 	mov dl,[esi]
    371 	mov bl,[esi+1]
    372 	mov al,[esi+2]
    373 	mov [edi],al
    374 	mov [edi+1],bl
    375 	mov [edi+2],dl
    376 	add esi,BYTE 4
    377 	add edi,BYTE 3
    378 	dec ecx
    379 	jnz .L1
    380 .L2
    381 	retn
    382 
    383 .L3 ; head
    384 	mov edx,edi
    385 	and edx,BYTE 11b
    386 	jz .L4
    387 	mov dl,[esi]
    388 	mov bl,[esi+1]
    389 	mov al,[esi+2]
    390 	mov [edi],al
    391 	mov [edi+1],bl
    392 	mov [edi+2],dl
    393 	add esi,BYTE 4
    394 	add edi,BYTE 3
    395 	dec ecx
    396 	jmp SHORT .L3
    397 
    398 .L4	; unroll 4 times
    399 	push ebp
    400 	mov ebp,ecx
    401 	shr ebp,2
    402 
    403 	; save count
    404 	push ecx
    405 
    406 .L5     
    407 	mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
    408         mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
    409         
    410         bswap eax                       ;                        eax = [B][G][R][A]
    411 
    412         bswap ebx                       ;                        ebx = [b][g][r][a]
    413 
    414         mov al,[esi+4+2]                ;                        eax = [B][G][R][r] 
    415         mov bh,[esi+4+4+1]              ;                        ebx = [b][g][G][a]
    416 
    417         ror eax,8                       ;                        eax = [r][B][G][R] (done)
    418         mov bl,[esi+4+4+2]              ;                        ebx = [b][g][G][R]
    419 
    420         ror ebx,16                      ;                        ebx = [G][R][b][g] (done)
    421         mov [edi],eax
    422     
    423         mov [edi+4],ebx
    424         mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
    425         
    426         bswap ecx                       ;                        ecx = [b][g][r][a]
    427         
    428         mov cl,[esi+8]                  ;                        ecx = [b][g][r][B] (done)
    429         add esi,BYTE 4*4
    430 
    431         mov [edi+8],ecx
    432         add edi,BYTE 3*4
    433 
    434         dec ebp
    435         jnz .L5
    436 
    437 	; check tail
    438 	pop ecx
    439 	and ecx,BYTE 11b
    440 	jz .L7
    441 
    442 .L6	; tail loop
    443 	mov dl,[esi]
    444 	mov bl,[esi+1]
    445 	mov al,[esi+2]
    446 	mov [edi],al
    447 	mov [edi+1],bl
    448 	mov [edi+2],dl
    449 	add esi,BYTE 4
    450 	add edi,BYTE 3
    451 	dec ecx
    452 	jnz .L6
    453 
    454 .L7 
    455 	pop ebp
    456 	retn
    457  
    458 
    459 	
    460 		
    461 ;; 32 bit RGB 888 to 16 BIT RGB 565 
    462 
    463 _ConvertX86p32_16RGB565:
    464 	; check short
    465 	cmp ecx,BYTE 16
    466 	ja .L3
    467 
    468 .L1 ; short loop
    469 	mov bl,[esi+0]    ; blue
    470 	mov al,[esi+1]    ; green
    471 	mov ah,[esi+2]    ; red
    472 	shr ah,3
    473         and al,11111100b
    474 	shl eax,3
    475 	shr bl,3
    476 	add al,bl
    477 	mov [edi+0],al
    478 	mov [edi+1],ah
    479 	add esi,BYTE 4
    480 	add edi,BYTE 2
    481 	dec ecx
    482 	jnz .L1
    483 
    484 .L2:				; End of short loop
    485 	retn
    486 
    487 	
    488 .L3	; head
    489 	mov ebx,edi
    490 	and ebx,BYTE 11b
    491 	jz .L4
    492 	
    493 	mov bl,[esi+0]    ; blue
    494 	mov al,[esi+1]    ; green
    495 	mov ah,[esi+2]    ; red
    496 	shr ah,3
    497 	and al,11111100b
    498 	shl eax,3
    499 	shr bl,3
    500 	add al,bl
    501 	mov [edi+0],al
    502 	mov [edi+1],ah
    503 	add esi,BYTE 4
    504 	add edi,BYTE 2
    505 	dec ecx
    506 
    507 .L4:	 
    508     ; save count
    509 	push ecx
    510 
    511     ; unroll twice
    512 	shr ecx,1
    513     
    514     ; point arrays to end
    515 	lea esi,[esi+ecx*8]
    516 	lea edi,[edi+ecx*4]
    517 
    518     ; negative counter 
    519 	neg ecx
    520 	jmp SHORT .L6
    521 
    522 .L5:	    
    523 	mov [edi+ecx*4-4],eax
    524 .L6:	
    525 	mov eax,[esi+ecx*8]
    526 
    527         shr ah,2
    528         mov ebx,[esi+ecx*8+4]
    529 
    530         shr eax,3
    531         mov edx,[esi+ecx*8+4]
    532 
    533         shr bh,2
    534         mov dl,[esi+ecx*8+2]
    535 
    536         shl ebx,13
    537         and eax,000007FFh
    538         
    539         shl edx,8
    540         and ebx,07FF0000h
    541 
    542         and edx,0F800F800h
    543         add eax,ebx
    544 
    545         add eax,edx
    546         inc ecx
    547 
    548         jnz .L5                 
    549 
    550 	mov [edi+ecx*4-4],eax
    551 
    552     ; tail
    553 	pop ecx
    554 	test cl,1
    555 	jz .L7
    556 	
    557 	mov bl,[esi+0]    ; blue
    558 	mov al,[esi+1]    ; green
    559 	mov ah,[esi+2]    ; red
    560 	shr ah,3
    561 	and al,11111100b
    562 	shl eax,3
    563 	shr bl,3
    564 	add al,bl
    565 	mov [edi+0],al
    566 	mov [edi+1],ah
    567 	add esi,BYTE 4
    568 	add edi,BYTE 2
    569 
    570 .L7:	
    571 	retn
    572 
    573 
    574 
    575 	
    576 ;; 32 bit RGB 888 to 16 BIT BGR 565 
    577 
    578 _ConvertX86p32_16BGR565:
    579 	
    580 	; check short
    581 	cmp ecx,BYTE 16
    582 	ja .L3
    583 
    584 .L1	; short loop
    585 	mov ah,[esi+0]    ; blue
    586 	mov al,[esi+1]    ; green
    587 	mov bl,[esi+2]    ; red
    588 	shr ah,3
    589 	and al,11111100b
    590 	shl eax,3
    591 	shr bl,3
    592 	add al,bl
    593 	mov [edi+0],al
    594 	mov [edi+1],ah
    595 	add esi,BYTE 4
    596 	add edi,BYTE 2
    597 	dec ecx
    598 	jnz .L1
    599 .L2
    600 	retn
    601 
    602 .L3	; head
    603 	mov ebx,edi
    604 	and ebx,BYTE 11b
    605 	jz .L4   
    606 	mov ah,[esi+0]    ; blue
    607 	mov al,[esi+1]    ; green
    608 	mov bl,[esi+2]    ; red
    609 	shr ah,3
    610 	and al,11111100b
    611 	shl eax,3
    612 	shr bl,3
    613 	add al,bl
    614 	mov [edi+0],al
    615 	mov [edi+1],ah
    616 	add esi,BYTE 4
    617 	add edi,BYTE 2
    618 	dec ecx
    619 
    620 .L4	; save count
    621 	push ecx
    622 
    623 	; unroll twice
    624 	shr ecx,1
    625     
    626 	; point arrays to end
    627 	lea esi,[esi+ecx*8]
    628 	lea edi,[edi+ecx*4]
    629 
    630 	; negative count
    631 	neg ecx
    632 	jmp SHORT .L6
    633 
    634 .L5     
    635 	mov [edi+ecx*4-4],eax            
    636 .L6     
    637 	mov edx,[esi+ecx*8+4]
    638 
    639         mov bh,[esi+ecx*8+4]                       
    640         mov ah,[esi+ecx*8]                       
    641 
    642         shr bh,3
    643         mov al,[esi+ecx*8+1]             
    644 
    645         shr ah,3
    646         mov bl,[esi+ecx*8+5]           
    647 
    648         shl eax,3
    649         mov dl,[esi+ecx*8+2]
    650 
    651         shl ebx,19
    652         and eax,0000FFE0h              
    653                 
    654         shr edx,3
    655         and ebx,0FFE00000h             
    656         
    657         and edx,001F001Fh               
    658         add eax,ebx
    659 
    660         add eax,edx
    661         inc ecx
    662 
    663         jnz .L5                 
    664 
    665 	mov [edi+ecx*4-4],eax            
    666 
    667 	; tail
    668 	pop ecx
    669 	and ecx,BYTE 1
    670 	jz .L7
    671 	mov ah,[esi+0]    ; blue
    672 	mov al,[esi+1]    ; green
    673 	mov bl,[esi+2]    ; red
    674 	shr ah,3
    675 	and al,11111100b
    676 	shl eax,3
    677 	shr bl,3
    678 	add al,bl
    679 	mov [edi+0],al
    680 	mov [edi+1],ah
    681 	add esi,BYTE 4
    682 	add edi,BYTE 2
    683 
    684 .L7 
    685 	retn
    686 
    687 
    688 	
    689 	
    690 ;; 32 BIT RGB TO 16 BIT RGB 555
    691 
    692 _ConvertX86p32_16RGB555:
    693 
    694 	; check short
    695 	cmp ecx,BYTE 16
    696 	ja .L3
    697 
    698 .L1	; short loop
    699 	mov bl,[esi+0]    ; blue
    700 	mov al,[esi+1]    ; green
    701 	mov ah,[esi+2]    ; red
    702 	shr ah,3
    703 	and al,11111000b
    704 	shl eax,2
    705 	shr bl,3
    706 	add al,bl
    707 	mov [edi+0],al
    708 	mov [edi+1],ah
    709 	add esi,BYTE 4
    710 	add edi,BYTE 2
    711 	dec ecx
    712 	jnz .L1
    713 .L2
    714 	retn
    715 
    716 .L3	; head
    717 	mov ebx,edi
    718         and ebx,BYTE 11b
    719 	jz .L4   
    720 	mov bl,[esi+0]    ; blue
    721 	mov al,[esi+1]    ; green
    722 	mov ah,[esi+2]    ; red
    723 	shr ah,3
    724 	and al,11111000b
    725 	shl eax,2
    726 	shr bl,3
    727 	add al,bl
    728 	mov [edi+0],al
    729 	mov [edi+1],ah
    730 	add esi,BYTE 4
    731 	add edi,BYTE 2
    732 	dec ecx
    733 
    734 .L4	; save count
    735 	push ecx
    736 
    737 	; unroll twice
    738 	shr ecx,1
    739     
    740 	; point arrays to end
    741 	lea esi,[esi+ecx*8]
    742 	lea edi,[edi+ecx*4]
    743 
    744 	; negative counter 
    745 	neg ecx
    746 	jmp SHORT .L6
    747 
    748 .L5     
    749 	mov [edi+ecx*4-4],eax
    750 .L6     
    751 	mov eax,[esi+ecx*8]
    752 
    753         shr ah,3
    754         mov ebx,[esi+ecx*8+4]
    755 
    756         shr eax,3
    757         mov edx,[esi+ecx*8+4]
    758 
    759         shr bh,3
    760         mov dl,[esi+ecx*8+2]
    761 
    762         shl ebx,13
    763         and eax,000007FFh
    764         
    765         shl edx,7
    766         and ebx,07FF0000h
    767 
    768         and edx,07C007C00h
    769         add eax,ebx
    770 
    771         add eax,edx
    772         inc ecx
    773 
    774         jnz .L5                 
    775 
    776 	mov [edi+ecx*4-4],eax
    777 
    778 	; tail
    779 	pop ecx
    780 	and ecx,BYTE 1
    781 	jz .L7
    782 	mov bl,[esi+0]    ; blue
    783 	mov al,[esi+1]    ; green
    784 	mov ah,[esi+2]    ; red
    785 	shr ah,3
    786 	and al,11111000b
    787 	shl eax,2
    788 	shr bl,3
    789 	add al,bl
    790 	mov [edi+0],al
    791 	mov [edi+1],ah
    792 	add esi,BYTE 4
    793 	add edi,BYTE 2
    794 
    795 .L7
    796 	retn
    797 
    798 
    799 
    800 
    801 ;; 32 BIT RGB TO 16 BIT BGR 555
    802 	
    803 _ConvertX86p32_16BGR555:
    804 	
    805 	; check short
    806 	cmp ecx,BYTE 16
    807 	ja .L3
    808 
    809 
    810 .L1	; short loop
    811 	mov ah,[esi+0]    ; blue
    812 	mov al,[esi+1]    ; green
    813 	mov bl,[esi+2]    ; red
    814 	shr ah,3
    815 	and al,11111000b
    816 	shl eax,2
    817 	shr bl,3
    818 	add al,bl
    819 	mov [edi+0],al
    820 	mov [edi+1],ah
    821 	add esi,BYTE 4
    822 	add edi,BYTE 2
    823 	dec ecx
    824 	jnz .L1
    825 .L2 
    826 	retn
    827 
    828 .L3	; head
    829 	mov ebx,edi
    830         and ebx,BYTE 11b
    831 	jz .L4   
    832 	mov ah,[esi+0]    ; blue
    833 	mov al,[esi+1]    ; green
    834 	mov bl,[esi+2]    ; red
    835 	shr ah,3
    836 	and al,11111000b
    837 	shl eax,2
    838 	shr bl,3
    839 	add al,bl
    840 	mov [edi+0],al
    841 	mov [edi+1],ah
    842 	add esi,BYTE 4
    843 	add edi,BYTE 2
    844 	dec ecx
    845 
    846 .L4	; save count
    847 	push ecx
    848 
    849 	; unroll twice
    850 	shr ecx,1
    851     
    852 	; point arrays to end
    853 	lea esi,[esi+ecx*8]
    854 	lea edi,[edi+ecx*4]
    855 
    856 	; negative counter 
    857 	neg ecx
    858 	jmp SHORT .L6
    859 
    860 .L5     
    861 	mov [edi+ecx*4-4],eax            
    862 .L6     
    863 	mov edx,[esi+ecx*8+4]
    864 
    865         mov bh,[esi+ecx*8+4]                       
    866         mov ah,[esi+ecx*8]                       
    867 
    868         shr bh,3
    869         mov al,[esi+ecx*8+1]             
    870 
    871         shr ah,3
    872         mov bl,[esi+ecx*8+5]           
    873 
    874         shl eax,2
    875         mov dl,[esi+ecx*8+2]
    876 
    877         shl ebx,18
    878         and eax,00007FE0h              
    879                 
    880         shr edx,3
    881         and ebx,07FE00000h             
    882         
    883         and edx,001F001Fh               
    884         add eax,ebx
    885 
    886         add eax,edx
    887         inc ecx
    888 
    889         jnz .L5                 
    890 
    891 	mov [edi+ecx*4-4],eax            
    892 
    893 	; tail
    894 	pop ecx
    895 	and ecx,BYTE 1
    896 	jz .L7
    897 	mov ah,[esi+0]    ; blue
    898 	mov al,[esi+1]    ; green
    899 	mov bl,[esi+2]    ; red
    900 	shr ah,3
    901 	and al,11111000b
    902 	shl eax,2
    903 	shr bl,3
    904 	add al,bl
    905 	mov [edi+0],al
    906 	mov [edi+1],ah
    907 	add esi,BYTE 4
    908 	add edi,BYTE 2
    909 
    910 .L7
    911 	retn
    912 
    913 
    914 
    915 
    916 	
    917 ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
    918 ;; This routine writes FOUR pixels at once (dword) and then, if they exist
    919 ;; the trailing three pixels
    920 _ConvertX86p32_8RGB332:
    921 
    922 	
    923 .L_ALIGNED
    924 	push ecx
    925 
    926 	shr ecx,2		; We will draw 4 pixels at once
    927 	jnz .L1
    928 	
    929 	jmp .L2			; short jump out of range :(
    930 	
    931 .L1:
    932 	mov eax,[esi]		; first pair of pixels
    933 	mov edx,[esi+4]
    934 
    935 	shr dl,6
    936 	mov ebx,eax
    937 
    938 	shr al,6
    939 	and ah,0e0h
    940 
    941 	shr ebx,16
    942 	and dh,0e0h
    943 	
    944 	shr ah,3
    945 	and bl,0e0h
    946 
    947 	shr dh,3
    948 	
    949 	or al,bl
    950 	
    951 	mov ebx,edx	
    952 	or al,ah
    953 	
    954 	shr ebx,16
    955 	or dl,dh
    956 
    957 	and bl,0e0h
    958 	
    959 	or dl,bl
    960 
    961 	mov ah,dl
    962 
    963 	
    964 		
    965 	mov ebx,[esi+8]		; second pair of pixels
    966 
    967 	mov edx,ebx
    968 	and bh,0e0h
    969 
    970 	shr bl,6
    971 	and edx,0e00000h
    972 
    973 	shr edx,16
    974 
    975 	shr bh,3
    976 
    977 	ror eax,16
    978 	or bl,dl
    979 
    980 	mov edx,[esi+12]
    981 	or bl,bh
    982 	
    983 	mov al,bl
    984 
    985 	mov ebx,edx
    986 	and dh,0e0h
    987 
    988 	shr dl,6
    989 	and ebx,0e00000h
    990 	
    991 	shr dh,3
    992 	mov ah,dl
    993 
    994 	shr ebx,16
    995 	or ah,dh
    996 
    997 	or ah,bl
    998 
    999 	rol eax,16
   1000 	add esi,BYTE 16
   1001 			
   1002 	mov [edi],eax	
   1003 	add edi,BYTE 4
   1004 	
   1005 	dec ecx
   1006 	jz .L2			; L1 out of range for short jump :(
   1007 	
   1008 	jmp .L1
   1009 .L2:
   1010 	
   1011 	pop ecx
   1012 	and ecx,BYTE 3		; mask out number of pixels to draw
   1013 	
   1014 	jz .L4			; Nothing to do anymore
   1015 
   1016 .L3:
   1017 	mov eax,[esi]		; single pixel conversion for trailing pixels
   1018 
   1019         mov ebx,eax
   1020 
   1021         shr al,6
   1022         and ah,0e0h
   1023 
   1024         shr ebx,16
   1025 
   1026         shr ah,3
   1027         and bl,0e0h
   1028 
   1029         or al,ah
   1030         or al,bl
   1031 
   1032         mov [edi],al
   1033 
   1034         inc edi
   1035         add esi,BYTE 4
   1036 
   1037 	dec ecx
   1038 	jnz .L3
   1039 	
   1040 .L4:	
   1041 	retn
   1042 
   1043 %ifidn __OUTPUT_FORMAT__,elf
   1044 section .note.GNU-stack noalloc noexec nowrite progbits
   1045 %endif
   1046