Home | History | Annotate | Download | only in hermes
      1 ;
      2 ; x86 format converters for HERMES
      3 ; Some routines Copyright (c) 1998 Christian Nentwich (brn (a] eleet.mcb.at)
      4 ; This source code is licensed under the GNU LGPL
      5 ; 
      6 ; Please refer to the file COPYING.LIB contained in the distribution for
      7 ; licensing conditions		
      8 ;
      9 ; Most routines are (c) Glenn Fiedler (ptc (a] gaffer.org), used with permission
     10 ; 
     11 
     12 BITS 32
     13 
     14 %include "common.inc"
     15 
     16 SDL_FUNC _ConvertX86p32_32BGR888
     17 SDL_FUNC _ConvertX86p32_32RGBA888
     18 SDL_FUNC _ConvertX86p32_32BGRA888
     19 SDL_FUNC _ConvertX86p32_24RGB888	
     20 SDL_FUNC _ConvertX86p32_24BGR888
     21 SDL_FUNC _ConvertX86p32_16RGB565
     22 SDL_FUNC _ConvertX86p32_16BGR565
     23 SDL_FUNC _ConvertX86p32_16RGB555
     24 SDL_FUNC _ConvertX86p32_16BGR555
     25 SDL_FUNC _ConvertX86p32_8RGB332
     26 
     27 SECTION .text
     28 
     29 ;; _Convert_*
     30 ;; Paramters:	
     31 ;;   ESI = source 
     32 ;;   EDI = dest
     33 ;;   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
     34 ;; Destroys:
     35 ;;   EAX, EBX, EDX
     36 
     37 
     38 _ConvertX86p32_32BGR888:
     39 
     40     ; check short
     41     cmp ecx,BYTE 32
     42     ja .L3
     43 
     44 .L1: ; short loop
     45     mov edx,[esi]
     46     bswap edx
     47     ror edx,8
     48     mov [edi],edx
     49     add esi,BYTE 4
     50     add edi,BYTE 4
     51     dec ecx
     52     jnz .L1
     53 .L2:
     54     retn
     55 
     56 .L3: ; save ebp
     57     push ebp
     58 
     59     ; unroll four times
     60     mov ebp,ecx
     61     shr ebp,2
     62     
     63     ; save count
     64     push ecx
     65 
     66 .L4:    mov eax,[esi]
     67         mov ebx,[esi+4]
     68 
     69         bswap eax
     70 
     71         bswap ebx
     72 
     73         ror eax,8
     74         mov ecx,[esi+8]
     75 
     76         ror ebx,8
     77         mov edx,[esi+12]
     78 
     79         bswap ecx
     80 
     81         bswap edx
     82 
     83         ror ecx,8
     84         mov [edi+0],eax
     85 
     86         ror edx,8
     87         mov [edi+4],ebx
     88 
     89         mov [edi+8],ecx
     90         mov [edi+12],edx
     91 
     92         add esi,BYTE 16
     93         add edi,BYTE 16
     94 
     95         dec ebp
     96         jnz .L4                 
     97 
     98     ; check tail
     99     pop ecx
    100     and ecx,BYTE 11b
    101     jz .L6
    102 
    103 .L5: ; tail loop
    104     mov edx,[esi]
    105     bswap edx
    106     ror edx,8
    107     mov [edi],edx
    108     add esi,BYTE 4
    109     add edi,BYTE 4
    110     dec ecx
    111     jnz .L5
    112 
    113 .L6: pop ebp
    114     retn
    115 	
    116 
    117 	
    118 		
    119 _ConvertX86p32_32RGBA888:
    120 	
    121     ; check short
    122     cmp ecx,BYTE 32
    123     ja .L3
    124 
    125 .L1: ; short loop
    126     mov edx,[esi]
    127     rol edx,8
    128     mov [edi],edx
    129     add esi,BYTE 4
    130     add edi,BYTE 4
    131     dec ecx
    132     jnz .L1
    133 .L2:
    134     retn
    135 
    136 .L3: ; save ebp
    137     push ebp
    138 
    139     ; unroll four times
    140     mov ebp,ecx
    141     shr ebp,2
    142     
    143     ; save count
    144     push ecx
    145 
    146 .L4:    mov eax,[esi]
    147         mov ebx,[esi+4]
    148 
    149         rol eax,8
    150         mov ecx,[esi+8]
    151 
    152         rol ebx,8
    153         mov edx,[esi+12]
    154 
    155         rol ecx,8
    156         mov [edi+0],eax
    157 
    158         rol edx,8
    159         mov [edi+4],ebx
    160 
    161         mov [edi+8],ecx
    162         mov [edi+12],edx
    163 
    164         add esi,BYTE 16
    165         add edi,BYTE 16
    166 
    167         dec ebp
    168         jnz .L4                 
    169 
    170     ; check tail
    171     pop ecx
    172     and ecx,BYTE 11b
    173     jz .L6
    174 
    175 .L5: ; tail loop
    176     mov edx,[esi]
    177     rol edx,8
    178     mov [edi],edx
    179     add esi,BYTE 4
    180     add edi,BYTE 4
    181     dec ecx
    182     jnz .L5
    183 
    184 .L6: pop ebp
    185     retn
    186 
    187 	
    188 
    189 
    190 _ConvertX86p32_32BGRA888:
    191 
    192     ; check short
    193     cmp ecx,BYTE 32
    194     ja .L3
    195 
    196 .L1: ; short loop
    197     mov edx,[esi]
    198     bswap edx
    199     mov [edi],edx
    200     add esi,BYTE 4
    201     add edi,BYTE 4
    202     dec ecx
    203     jnz .L1
    204 .L2:
    205     retn
    206 
    207 .L3: ; save ebp
    208     push ebp
    209 
    210     ; unroll four times
    211     mov ebp,ecx
    212     shr ebp,2
    213     
    214     ; save count
    215     push ecx
    216 
    217 .L4:    mov eax,[esi]
    218         mov ebx,[esi+4]
    219 
    220         mov ecx,[esi+8]
    221         mov edx,[esi+12]
    222 
    223         bswap eax
    224 
    225         bswap ebx
    226 
    227         bswap ecx
    228 
    229         bswap edx
    230 
    231         mov [edi+0],eax
    232         mov [edi+4],ebx
    233 
    234         mov [edi+8],ecx
    235         mov [edi+12],edx
    236 
    237         add esi,BYTE 16
    238         add edi,BYTE 16
    239 
    240         dec ebp
    241         jnz .L4                 
    242 
    243     ; check tail
    244     pop ecx
    245     and ecx,BYTE 11b
    246     jz .L6
    247 
    248 .L5: ; tail loop
    249     mov edx,[esi]
    250     bswap edx
    251     mov [edi],edx
    252     add esi,BYTE 4
    253     add edi,BYTE 4
    254     dec ecx
    255     jnz .L5
    256 
    257 .L6: pop ebp
    258     retn
    259 
    260 
    261 	
    262 	
    263 ;; 32 bit RGB 888 to 24 BIT RGB 888
    264 
    265 _ConvertX86p32_24RGB888:
    266 
    267 	; check short
    268 	cmp ecx,BYTE 32
    269 	ja .L3
    270 
    271 .L1:	; short loop
    272 	mov al,[esi]
    273 	mov bl,[esi+1]
    274 	mov dl,[esi+2]
    275 	mov [edi],al
    276 	mov [edi+1],bl
    277 	mov [edi+2],dl
    278 	add esi,BYTE 4
    279 	add edi,BYTE 3
    280 	dec ecx
    281 	jnz .L1
    282 .L2:
    283 	retn
    284 
    285 .L3:	;	 head
    286 	mov edx,edi
    287 	and edx,BYTE 11b
    288 	jz .L4
    289 	mov al,[esi]
    290 	mov bl,[esi+1]
    291 	mov dl,[esi+2]
    292 	mov [edi],al
    293 	mov [edi+1],bl
    294 	mov [edi+2],dl
    295 	add esi,BYTE 4
    296 	add edi,BYTE 3
    297 	dec ecx
    298 	jmp SHORT .L3
    299 
    300 .L4: ; unroll 4 times
    301 	push ebp
    302 	mov ebp,ecx
    303 	shr ebp,2
    304 
    305     ; save count
    306 	push ecx
    307 
    308 .L5:    mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
    309         mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
    310 
    311         shl eax,8                       ;                        eax = [R][G][B][.]
    312         mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
    313 
    314         shl ebx,8                       ;                        ebx = [r][g][b][.]
    315         mov al,[esi+4]                  ;                        eax = [R][G][B][b]
    316 
    317         ror eax,8                       ;                        eax = [b][R][G][B] (done)
    318         mov bh,[esi+8+1]                ;                        ebx = [r][g][G][.]
    319 
    320         mov [edi],eax
    321         add edi,BYTE 3*4
    322 
    323         shl ecx,8                       ;                        ecx = [r][g][b][.]
    324         mov bl,[esi+8+0]                ;                        ebx = [r][g][G][B]
    325 
    326         rol ebx,16                      ;                        ebx = [G][B][r][g] (done)
    327         mov cl,[esi+8+2]                ;                        ecx = [r][g][b][R] (done)
    328 
    329         mov [edi+4-3*4],ebx
    330         add esi,BYTE 4*4
    331         
    332         mov [edi+8-3*4],ecx
    333         dec ebp
    334 
    335         jnz .L5
    336 
    337     ; check tail
    338 	pop ecx
    339 	and ecx,BYTE 11b
    340 	jz .L7
    341 
    342 .L6: ; tail loop
    343 	mov al,[esi]
    344 	mov bl,[esi+1]
    345 	mov dl,[esi+2]
    346 	mov [edi],al
    347 	mov [edi+1],bl
    348 	mov [edi+2],dl
    349 	add esi,BYTE 4
    350 	add edi,BYTE 3
    351 	dec ecx
    352 	jnz .L6
    353 
    354 .L7:	pop ebp
    355 	retn
    356 
    357 
    358 
    359 
    360 ;; 32 bit RGB 888 to 24 bit BGR 888
    361 
    362 _ConvertX86p32_24BGR888:
    363 
    364 	; check short
    365 	cmp ecx,BYTE 32
    366 	ja .L3
    367 
    368 .L1:	; short loop
    369 	mov dl,[esi]
    370 	mov bl,[esi+1]
    371 	mov al,[esi+2]
    372 	mov [edi],al
    373 	mov [edi+1],bl
    374 	mov [edi+2],dl
    375 	add esi,BYTE 4
    376 	add edi,BYTE 3
    377 	dec ecx
    378 	jnz .L1
    379 .L2:
    380 	retn
    381 
    382 .L3: ; head
    383 	mov edx,edi
    384 	and edx,BYTE 11b
    385 	jz .L4
    386 	mov dl,[esi]
    387 	mov bl,[esi+1]
    388 	mov al,[esi+2]
    389 	mov [edi],al
    390 	mov [edi+1],bl
    391 	mov [edi+2],dl
    392 	add esi,BYTE 4
    393 	add edi,BYTE 3
    394 	dec ecx
    395 	jmp SHORT .L3
    396 
    397 .L4:	; unroll 4 times
    398 	push ebp
    399 	mov ebp,ecx
    400 	shr ebp,2
    401 
    402 	; save count
    403 	push ecx
    404 
    405 .L5:
    406 	mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
    407         mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
    408 
    409         bswap eax                       ;                        eax = [B][G][R][A]
    410 
    411         bswap ebx                       ;                        ebx = [b][g][r][a]
    412 
    413         mov al,[esi+4+2]                ;                        eax = [B][G][R][r] 
    414         mov bh,[esi+4+4+1]              ;                        ebx = [b][g][G][a]
    415 
    416         ror eax,8                       ;                        eax = [r][B][G][R] (done)
    417         mov bl,[esi+4+4+2]              ;                        ebx = [b][g][G][R]
    418 
    419         ror ebx,16                      ;                        ebx = [G][R][b][g] (done)
    420         mov [edi],eax
    421     
    422         mov [edi+4],ebx
    423         mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
    424         
    425         bswap ecx                       ;                        ecx = [b][g][r][a]
    426         
    427         mov cl,[esi+8]                  ;                        ecx = [b][g][r][B] (done)
    428         add esi,BYTE 4*4
    429 
    430         mov [edi+8],ecx
    431         add edi,BYTE 3*4
    432 
    433         dec ebp
    434         jnz .L5
    435 
    436 	; check tail
    437 	pop ecx
    438 	and ecx,BYTE 11b
    439 	jz .L7
    440 
    441 .L6:	; tail loop
    442 	mov dl,[esi]
    443 	mov bl,[esi+1]
    444 	mov al,[esi+2]
    445 	mov [edi],al
    446 	mov [edi+1],bl
    447 	mov [edi+2],dl
    448 	add esi,BYTE 4
    449 	add edi,BYTE 3
    450 	dec ecx
    451 	jnz .L6
    452 
    453 .L7:
    454 	pop ebp
    455 	retn
    456  
    457 
    458 	
    459 		
    460 ;; 32 bit RGB 888 to 16 BIT RGB 565 
    461 
    462 _ConvertX86p32_16RGB565:
    463 	; check short
    464 	cmp ecx,BYTE 16
    465 	ja .L3
    466 
    467 .L1: ; short loop
    468 	mov bl,[esi+0]    ; blue
    469 	mov al,[esi+1]    ; green
    470 	mov ah,[esi+2]    ; red
    471 	shr ah,3
    472         and al,11111100b
    473 	shl eax,3
    474 	shr bl,3
    475 	add al,bl
    476 	mov [edi+0],al
    477 	mov [edi+1],ah
    478 	add esi,BYTE 4
    479 	add edi,BYTE 2
    480 	dec ecx
    481 	jnz .L1
    482 
    483 .L2:				; End of short loop
    484 	retn
    485 
    486 	
    487 .L3:	; head
    488 	mov ebx,edi
    489 	and ebx,BYTE 11b
    490 	jz .L4
    491 	
    492 	mov bl,[esi+0]    ; blue
    493 	mov al,[esi+1]    ; green
    494 	mov ah,[esi+2]    ; red
    495 	shr ah,3
    496 	and al,11111100b
    497 	shl eax,3
    498 	shr bl,3
    499 	add al,bl
    500 	mov [edi+0],al
    501 	mov [edi+1],ah
    502 	add esi,BYTE 4
    503 	add edi,BYTE 2
    504 	dec ecx
    505 
    506 .L4:	 
    507     ; save count
    508 	push ecx
    509 
    510     ; unroll twice
    511 	shr ecx,1
    512     
    513     ; point arrays to end
    514 	lea esi,[esi+ecx*8]
    515 	lea edi,[edi+ecx*4]
    516 
    517     ; negative counter 
    518 	neg ecx
    519 	jmp SHORT .L6
    520 
    521 .L5:	    
    522 	mov [edi+ecx*4-4],eax
    523 .L6:	
    524 	mov eax,[esi+ecx*8]
    525 
    526         shr ah,2
    527         mov ebx,[esi+ecx*8+4]
    528 
    529         shr eax,3
    530         mov edx,[esi+ecx*8+4]
    531 
    532         shr bh,2
    533         mov dl,[esi+ecx*8+2]
    534 
    535         shl ebx,13
    536         and eax,000007FFh
    537         
    538         shl edx,8
    539         and ebx,07FF0000h
    540 
    541         and edx,0F800F800h
    542         add eax,ebx
    543 
    544         add eax,edx
    545         inc ecx
    546 
    547         jnz .L5                 
    548 
    549 	mov [edi+ecx*4-4],eax
    550 
    551     ; tail
    552 	pop ecx
    553 	test cl,1
    554 	jz .L7
    555 	
    556 	mov bl,[esi+0]    ; blue
    557 	mov al,[esi+1]    ; green
    558 	mov ah,[esi+2]    ; red
    559 	shr ah,3
    560 	and al,11111100b
    561 	shl eax,3
    562 	shr bl,3
    563 	add al,bl
    564 	mov [edi+0],al
    565 	mov [edi+1],ah
    566 	add esi,BYTE 4
    567 	add edi,BYTE 2
    568 
    569 .L7:	
    570 	retn
    571 
    572 
    573 
    574 	
    575 ;; 32 bit RGB 888 to 16 BIT BGR 565 
    576 
    577 _ConvertX86p32_16BGR565:
    578 	
    579 	; check short
    580 	cmp ecx,BYTE 16
    581 	ja .L3
    582 
    583 .L1:	; short loop
    584 	mov ah,[esi+0]    ; blue
    585 	mov al,[esi+1]    ; green
    586 	mov bl,[esi+2]    ; red
    587 	shr ah,3
    588 	and al,11111100b
    589 	shl eax,3
    590 	shr bl,3
    591 	add al,bl
    592 	mov [edi+0],al
    593 	mov [edi+1],ah
    594 	add esi,BYTE 4
    595 	add edi,BYTE 2
    596 	dec ecx
    597 	jnz .L1
    598 .L2:
    599 	retn
    600 
    601 .L3:	; head
    602 	mov ebx,edi
    603 	and ebx,BYTE 11b
    604 	jz .L4   
    605 	mov ah,[esi+0]    ; blue
    606 	mov al,[esi+1]    ; green
    607 	mov bl,[esi+2]    ; red
    608 	shr ah,3
    609 	and al,11111100b
    610 	shl eax,3
    611 	shr bl,3
    612 	add al,bl
    613 	mov [edi+0],al
    614 	mov [edi+1],ah
    615 	add esi,BYTE 4
    616 	add edi,BYTE 2
    617 	dec ecx
    618 
    619 .L4:	; save count
    620 	push ecx
    621 
    622 	; unroll twice
    623 	shr ecx,1
    624     
    625 	; point arrays to end
    626 	lea esi,[esi+ecx*8]
    627 	lea edi,[edi+ecx*4]
    628 
    629 	; negative count
    630 	neg ecx
    631 	jmp SHORT .L6
    632 
    633 .L5:
    634 	mov [edi+ecx*4-4],eax            
    635 .L6:
    636 	mov edx,[esi+ecx*8+4]
    637 
    638         mov bh,[esi+ecx*8+4]                       
    639         mov ah,[esi+ecx*8]                       
    640 
    641         shr bh,3
    642         mov al,[esi+ecx*8+1]             
    643 
    644         shr ah,3
    645         mov bl,[esi+ecx*8+5]           
    646 
    647         shl eax,3
    648         mov dl,[esi+ecx*8+2]
    649 
    650         shl ebx,19
    651         and eax,0000FFE0h              
    652                 
    653         shr edx,3
    654         and ebx,0FFE00000h             
    655         
    656         and edx,001F001Fh               
    657         add eax,ebx
    658 
    659         add eax,edx
    660         inc ecx
    661 
    662         jnz .L5                 
    663 
    664 	mov [edi+ecx*4-4],eax            
    665 
    666 	; tail
    667 	pop ecx
    668 	and ecx,BYTE 1
    669 	jz .L7
    670 	mov ah,[esi+0]    ; blue
    671 	mov al,[esi+1]    ; green
    672 	mov bl,[esi+2]    ; red
    673 	shr ah,3
    674 	and al,11111100b
    675 	shl eax,3
    676 	shr bl,3
    677 	add al,bl
    678 	mov [edi+0],al
    679 	mov [edi+1],ah
    680 	add esi,BYTE 4
    681 	add edi,BYTE 2
    682 
    683 .L7:
    684 	retn
    685 
    686 
    687 	
    688 	
    689 ;; 32 BIT RGB TO 16 BIT RGB 555
    690 
    691 _ConvertX86p32_16RGB555:
    692 
    693 	; check short
    694 	cmp ecx,BYTE 16
    695 	ja .L3
    696 
    697 .L1:	; short loop
    698 	mov bl,[esi+0]    ; blue
    699 	mov al,[esi+1]    ; green
    700 	mov ah,[esi+2]    ; red
    701 	shr ah,3
    702 	and al,11111000b
    703 	shl eax,2
    704 	shr bl,3
    705 	add al,bl
    706 	mov [edi+0],al
    707 	mov [edi+1],ah
    708 	add esi,BYTE 4
    709 	add edi,BYTE 2
    710 	dec ecx
    711 	jnz .L1
    712 .L2:
    713 	retn
    714 
    715 .L3:	; head
    716 	mov ebx,edi
    717         and ebx,BYTE 11b
    718 	jz .L4   
    719 	mov bl,[esi+0]    ; blue
    720 	mov al,[esi+1]    ; green
    721 	mov ah,[esi+2]    ; red
    722 	shr ah,3
    723 	and al,11111000b
    724 	shl eax,2
    725 	shr bl,3
    726 	add al,bl
    727 	mov [edi+0],al
    728 	mov [edi+1],ah
    729 	add esi,BYTE 4
    730 	add edi,BYTE 2
    731 	dec ecx
    732 
    733 .L4:	; save count
    734 	push ecx
    735 
    736 	; unroll twice
    737 	shr ecx,1
    738     
    739 	; point arrays to end
    740 	lea esi,[esi+ecx*8]
    741 	lea edi,[edi+ecx*4]
    742 
    743 	; negative counter 
    744 	neg ecx
    745 	jmp SHORT .L6
    746 
    747 .L5:
    748 	mov [edi+ecx*4-4],eax
    749 .L6:
    750 	mov eax,[esi+ecx*8]
    751 
    752         shr ah,3
    753         mov ebx,[esi+ecx*8+4]
    754 
    755         shr eax,3
    756         mov edx,[esi+ecx*8+4]
    757 
    758         shr bh,3
    759         mov dl,[esi+ecx*8+2]
    760 
    761         shl ebx,13
    762         and eax,000007FFh
    763         
    764         shl edx,7
    765         and ebx,07FF0000h
    766 
    767         and edx,07C007C00h
    768         add eax,ebx
    769 
    770         add eax,edx
    771         inc ecx
    772 
    773         jnz .L5                 
    774 
    775 	mov [edi+ecx*4-4],eax
    776 
    777 	; tail
    778 	pop ecx
    779 	and ecx,BYTE 1
    780 	jz .L7
    781 	mov bl,[esi+0]    ; blue
    782 	mov al,[esi+1]    ; green
    783 	mov ah,[esi+2]    ; red
    784 	shr ah,3
    785 	and al,11111000b
    786 	shl eax,2
    787 	shr bl,3
    788 	add al,bl
    789 	mov [edi+0],al
    790 	mov [edi+1],ah
    791 	add esi,BYTE 4
    792 	add edi,BYTE 2
    793 
    794 .L7:
    795 	retn
    796 
    797 
    798 
    799 
    800 ;; 32 BIT RGB TO 16 BIT BGR 555
    801 	
    802 _ConvertX86p32_16BGR555:
    803 	
    804 	; check short
    805 	cmp ecx,BYTE 16
    806 	ja .L3
    807 
    808 
    809 .L1:	; short loop
    810 	mov ah,[esi+0]    ; blue
    811 	mov al,[esi+1]    ; green
    812 	mov bl,[esi+2]    ; red
    813 	shr ah,3
    814 	and al,11111000b
    815 	shl eax,2
    816 	shr bl,3
    817 	add al,bl
    818 	mov [edi+0],al
    819 	mov [edi+1],ah
    820 	add esi,BYTE 4
    821 	add edi,BYTE 2
    822 	dec ecx
    823 	jnz .L1
    824 .L2:
    825 	retn
    826 
    827 .L3:	; head
    828 	mov ebx,edi
    829         and ebx,BYTE 11b
    830 	jz .L4   
    831 	mov ah,[esi+0]    ; blue
    832 	mov al,[esi+1]    ; green
    833 	mov bl,[esi+2]    ; red
    834 	shr ah,3
    835 	and al,11111000b
    836 	shl eax,2
    837 	shr bl,3
    838 	add al,bl
    839 	mov [edi+0],al
    840 	mov [edi+1],ah
    841 	add esi,BYTE 4
    842 	add edi,BYTE 2
    843 	dec ecx
    844 
    845 .L4:	; save count
    846 	push ecx
    847 
    848 	; unroll twice
    849 	shr ecx,1
    850     
    851 	; point arrays to end
    852 	lea esi,[esi+ecx*8]
    853 	lea edi,[edi+ecx*4]
    854 
    855 	; negative counter 
    856 	neg ecx
    857 	jmp SHORT .L6
    858 
    859 .L5:
    860 	mov [edi+ecx*4-4],eax            
    861 .L6:
    862 	mov edx,[esi+ecx*8+4]
    863 
    864         mov bh,[esi+ecx*8+4]                       
    865         mov ah,[esi+ecx*8]                       
    866 
    867         shr bh,3
    868         mov al,[esi+ecx*8+1]             
    869 
    870         shr ah,3
    871         mov bl,[esi+ecx*8+5]           
    872 
    873         shl eax,2
    874         mov dl,[esi+ecx*8+2]
    875 
    876         shl ebx,18
    877         and eax,00007FE0h              
    878                 
    879         shr edx,3
    880         and ebx,07FE00000h             
    881         
    882         and edx,001F001Fh               
    883         add eax,ebx
    884 
    885         add eax,edx
    886         inc ecx
    887 
    888         jnz .L5                 
    889 
    890 	mov [edi+ecx*4-4],eax            
    891 
    892 	; tail
    893 	pop ecx
    894 	and ecx,BYTE 1
    895 	jz .L7
    896 	mov ah,[esi+0]    ; blue
    897 	mov al,[esi+1]    ; green
    898 	mov bl,[esi+2]    ; red
    899 	shr ah,3
    900 	and al,11111000b
    901 	shl eax,2
    902 	shr bl,3
    903 	add al,bl
    904 	mov [edi+0],al
    905 	mov [edi+1],ah
    906 	add esi,BYTE 4
    907 	add edi,BYTE 2
    908 
    909 .L7:
    910 	retn
    911 
    912 
    913 
    914 
    915 	
    916 ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
    917 ;; This routine writes FOUR pixels at once (dword) and then, if they exist
    918 ;; the trailing three pixels
    919 _ConvertX86p32_8RGB332:
    920 
    921 	
    922 .L_ALIGNED:
    923 	push ecx
    924 
    925 	shr ecx,2		; We will draw 4 pixels at once
    926 	jnz .L1
    927 	
    928 	jmp .L2			; short jump out of range :(
    929 	
    930 .L1:
    931 	mov eax,[esi]		; first pair of pixels
    932 	mov edx,[esi+4]
    933 
    934 	shr dl,6
    935 	mov ebx,eax
    936 
    937 	shr al,6
    938 	and ah,0e0h
    939 
    940 	shr ebx,16
    941 	and dh,0e0h
    942 	
    943 	shr ah,3
    944 	and bl,0e0h
    945 
    946 	shr dh,3
    947 	
    948 	or al,bl
    949 	
    950 	mov ebx,edx	
    951 	or al,ah
    952 	
    953 	shr ebx,16
    954 	or dl,dh
    955 
    956 	and bl,0e0h
    957 	
    958 	or dl,bl
    959 
    960 	mov ah,dl
    961 
    962 	
    963 		
    964 	mov ebx,[esi+8]		; second pair of pixels
    965 
    966 	mov edx,ebx
    967 	and bh,0e0h
    968 
    969 	shr bl,6
    970 	and edx,0e00000h
    971 
    972 	shr edx,16
    973 
    974 	shr bh,3
    975 
    976 	ror eax,16
    977 	or bl,dl
    978 
    979 	mov edx,[esi+12]
    980 	or bl,bh
    981 	
    982 	mov al,bl
    983 
    984 	mov ebx,edx
    985 	and dh,0e0h
    986 
    987 	shr dl,6
    988 	and ebx,0e00000h
    989 	
    990 	shr dh,3
    991 	mov ah,dl
    992 
    993 	shr ebx,16
    994 	or ah,dh
    995 
    996 	or ah,bl
    997 
    998 	rol eax,16
    999 	add esi,BYTE 16
   1000 			
   1001 	mov [edi],eax	
   1002 	add edi,BYTE 4
   1003 	
   1004 	dec ecx
   1005 	jz .L2			; L1 out of range for short jump :(
   1006 	
   1007 	jmp .L1
   1008 .L2:
   1009 	
   1010 	pop ecx
   1011 	and ecx,BYTE 3		; mask out number of pixels to draw
   1012 	
   1013 	jz .L4			; Nothing to do anymore
   1014 
   1015 .L3:
   1016 	mov eax,[esi]		; single pixel conversion for trailing pixels
   1017 
   1018         mov ebx,eax
   1019 
   1020         shr al,6
   1021         and ah,0e0h
   1022 
   1023         shr ebx,16
   1024 
   1025         shr ah,3
   1026         and bl,0e0h
   1027 
   1028         or al,ah
   1029         or al,bl
   1030 
   1031         mov [edi],al
   1032 
   1033         inc edi
   1034         add esi,BYTE 4
   1035 
   1036 	dec ecx
   1037 	jnz .L3
   1038 	
   1039 .L4:	
   1040 	retn
   1041 
   1042 %ifidn __OUTPUT_FORMAT__,elf32
   1043 section .note.GNU-stack noalloc noexec nowrite progbits
   1044 %endif
   1045