Home | History | Annotate | Download | only in core
      1 ;; -----------------------------------------------------------------------
      2 ;;
      3 ;;   Copyright 1994-2009 H. Peter Anvin - All Rights Reserved
      4 ;;   Copyright 2009-2010 Intel Corporation; author: H. Peter Anvin
      5 ;;
      6 ;;   This program is free software; you can redistribute it and/or modify
      7 ;;   it under the terms of the GNU General Public License as published by
      8 ;;   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
      9 ;;   Boston MA 02111-1307, USA; either version 2 of the License, or
     10 ;;   (at your option) any later version; incorporated herein by reference.
     11 ;;
     12 ;; -----------------------------------------------------------------------
     13 
     14 ;;
     15 ;; bcopy32xx.inc
     16 ;;
     17 
     18 
     19 ;
     20 ; 32-bit bcopy routine
     21 ;
     22 ; This is the actual 32-bit portion of the bcopy and shuffle and boot
     23 ; routines.  ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the
     24 ; sole exception being the actual relocation code at the beginning of
     25 ; pm_shuffle_boot.
     26 ;
     27 ; It also really needs to live all in a single segment, for the
     28 ; address calculcations to actually work.
     29 ;
     30 
     31 		bits 32
     32 		section .bcopyxx.text
     33 		align 16
     34 ;
     35 ; pm_bcopy:
     36 ;
     37 ;	This is the protected-mode core of the "bcopy" routine.
     38 ;	Try to do aligned transfers; if the src and dst are relatively
     39 ;	misaligned, align the dst.
     40 ;
     41 ;	ECX is guaranteed to not be zero on entry.
     42 ;
     43 ;	Clobbers ESI, EDI, ECX.
     44 ;
     45 
     46 pm_bcopy:
     47 		push ebx
     48 		push edx
     49 		push eax
     50 
     51 		cmp esi,-1
     52 		je .bzero
     53 
     54 		cmp esi,edi		; If source < destination, we might
     55 		jb .reverse		; have to copy backwards
     56 
     57 .forward:
     58 		; Initial alignment
     59 		mov edx,edi
     60 		shr edx,1
     61 		jnc .faa1
     62 		movsb
     63 		dec ecx
     64 .faa1:
     65 		mov al,cl
     66 		cmp ecx,2
     67 		jb .f_tiny
     68 
     69 		shr edx,1
     70 		jnc .faa2
     71 		movsw
     72 		sub ecx,2
     73 .faa2:
     74 
     75 		; Bulk transfer
     76 		mov al,cl		; Save low bits
     77 		shr ecx,2		; Convert to dwords
     78 		rep movsd		; Do our business
     79 		; At this point ecx == 0
     80 
     81 		test al,2
     82 		jz .fab2
     83 		movsw
     84 .fab2:
     85 .f_tiny:
     86 		test al,1
     87 		jz .fab1
     88 		movsb
     89 .fab1:
     90 .done:
     91 		pop eax
     92 		pop edx
     93 		pop ebx
     94 		ret
     95 
     96 .reverse:
     97 		lea eax,[esi+ecx-1]	; Point to final byte
     98 		cmp edi,eax
     99 		ja .forward		; No overlap, do forward copy
    100 
    101 		std			; Reverse copy
    102 		lea edi,[edi+ecx-1]
    103 		mov esi,eax
    104 
    105 		; Initial alignment
    106 		mov edx,edi
    107 		shr edx,1
    108 		jc .raa1
    109 		movsb
    110 		dec ecx
    111 .raa1:
    112 
    113 		dec esi
    114 		dec edi
    115 		mov al,cl
    116 		cmp ecx,2
    117 		jb .r_tiny
    118 		shr edx,1
    119 		jc .raa2
    120 		movsw
    121 		sub ecx,2
    122 .raa2:
    123 
    124 		; Bulk copy
    125 		sub esi,2
    126 		sub edi,2
    127 		mov al,cl		; Save low bits
    128 		shr ecx,2
    129 		rep movsd
    130 
    131 		; Final alignment
    132 .r_final:
    133 		add esi,2
    134 		add edi,2
    135 		test al,2
    136 		jz .rab2
    137 		movsw
    138 .rab2:
    139 .r_tiny:
    140 		inc esi
    141 		inc edi
    142 		test al,1
    143 		jz .rab1
    144 		movsb
    145 .rab1:
    146 		cld
    147 		jmp short .done
    148 
    149 .bzero:
    150 		xor eax,eax
    151 
    152 		; Initial alignment
    153 		mov edx,edi
    154 		shr edx,1
    155 		jnc .zaa1
    156 		stosb
    157 		dec ecx
    158 .zaa1:
    159 
    160 		mov bl,cl
    161 		cmp ecx,2
    162 		jb .z_tiny
    163 		shr edx,1
    164 		jnc .zaa2
    165 		stosw
    166 		sub ecx,2
    167 .zaa2:
    168 
    169 		; Bulk
    170 		mov bl,cl		; Save low bits
    171 		shr ecx,2
    172 		rep stosd
    173 
    174 		test bl,2
    175 		jz .zab2
    176 		stosw
    177 .zab2:
    178 .z_tiny:
    179 		test bl,1
    180 		jz .zab1
    181 		stosb
    182 .zab1:
    183 		jmp short .done
    184 
    185 ;
    186 ; shuffle_and_boot:
    187 ;
    188 ; This routine is used to shuffle memory around, followed by
    189 ; invoking an entry point somewhere in low memory.  This routine
    190 ; can clobber any memory outside the bcopy special area.
    191 ;
    192 ; IMPORTANT: This routine does not set up any registers.
    193 ; It is the responsibility of the caller to generate an appropriate entry
    194 ; stub; *especially* when going to real mode.
    195 ;
    196 ; Inputs:
    197 ;	ESI		-> Pointer to list of (dst, src, len) pairs(*)
    198 ;	EDI		-> Pointer to safe area for list + shuffler
    199 ;			   (must not overlap this code nor the RM stack)
    200 ;	ECX		-> Byte count of list area (for initial copy)
    201 ;
    202 ;     If src == -1: then the memory pointed to by (dst, len) is bzeroed;
    203 ;		    this is handled inside the bcopy routine.
    204 ;
    205 ;     If len == 0:  this marks the end of the list; dst indicates
    206 ;		    the entry point and src the mode (0 = pm, 1 = rm)
    207 ;
    208 ;     (*) dst, src, and len are four bytes each
    209 ;
    210 ; do_raw_shuffle_and_boot is the same entry point, but with a C ABI:
    211 ; do_raw_shuffle_and_boot(safearea, descriptors, bytecount)
    212 ;
    213 		global do_raw_shuffle_and_boot
    214 do_raw_shuffle_and_boot:
    215 		mov edi,eax
    216 		mov esi,edx
    217 
    218 pm_shuffle:
    219 		cli			; End interrupt service (for good)
    220 		mov ebx,edi		; EBX <- descriptor list
    221 		lea edx,[edi+ecx+15]	; EDX <- where to relocate our code to
    222 		and edx,~15		; Align 16 to benefit the GDT
    223 		call pm_bcopy
    224 		mov esi,__bcopyxx_start	; Absolute source address
    225 		mov edi,edx		; Absolute target address
    226 		sub edx,esi		; EDX <- address delta
    227 		mov ecx,__bcopyxx_dwords
    228 		lea eax,[edx+.safe]	; Resume point
    229 		; Relocate this code
    230 		rep movsd
    231 		jmp eax			; Jump to safe location
    232 .safe:
    233 		; Give ourselves a safe stack
    234 		lea esp,[edx+bcopyxx_stack+__bcopyxx_end]
    235 		add edx,bcopy_gdt	; EDX <- new GDT
    236 		mov [edx+2],edx		; GDT self-pointer
    237 		lgdt [edx]		; Switch to local GDT
    238 
    239 		; Now for the actual shuffling...
    240 .loop:
    241 		mov edi,[ebx]
    242 		mov esi,[ebx+4]
    243 		mov ecx,[ebx+8]
    244 		add ebx,12
    245 		jecxz .done
    246 		call pm_bcopy
    247 		jmp .loop
    248 .done:
    249 		lidt [edx+RM_IDT_ptr-bcopy_gdt]	; RM-like IDT
    250 		push ecx		; == 0, for cleaning the flags register
    251 		and esi,esi
    252 		jz pm_shuffle_16
    253 		popfd			; Clean the flags
    254 		jmp edi			; Protected mode entry
    255 
    256 		; We have a 16-bit entry point, so we need to return
    257 		; to 16-bit mode.  Note: EDX already points to the GDT.
    258 pm_shuffle_16:
    259 		mov eax,edi
    260 		mov [edx+PM_CS16+2],ax
    261 		mov [edx+PM_DS16+2],ax
    262 		shr eax,16
    263 		mov [edx+PM_CS16+4],al
    264 		mov [edx+PM_CS16+7],ah
    265 		mov [edx+PM_DS16+4],al
    266 		mov [edx+PM_DS16+7],ah
    267 		mov eax,cr0
    268 		and al,~1
    269 		popfd			; Clean the flags
    270 		; No flag-changing instructions below...
    271 		mov dx,PM_DS16
    272 		mov ds,edx
    273 		mov es,edx
    274 		mov fs,edx
    275 		mov gs,edx
    276 		mov ss,edx
    277 		jmp PM_CS16:0
    278 
    279 		section	.bcopyxx.data
    280 
    281 		alignz 16
    282 ; GDT descriptor entry
    283 %macro desc 1
    284 bcopy_gdt.%1:
    285 PM_%1		equ bcopy_gdt.%1-bcopy_gdt
    286 %endmacro
    287 
    288 bcopy_gdt:
    289 		dw bcopy_gdt_size-1	; Null descriptor - contains GDT
    290 		dd bcopy_gdt		; pointer for LGDT instruction
    291 		dw 0
    292 
    293 		; TSS segment to keep Intel VT happy.  Intel VT is
    294 		; unhappy about anything that doesn't smell like a
    295 		; full-blown 32-bit OS.
    296 	desc TSS
    297 		dw 104-1, DummyTSS	; 08h 32-bit task state segment
    298 		dd 00008900h		; present, dpl 0, 104 bytes @DummyTSS
    299 
    300 	desc CS16
    301 		dd 0000ffffh		; 10h Code segment, use16, readable,
    302 		dd 00009b00h		; present, dpl 0, cover 64K
    303 	desc DS16
    304 		dd 0000ffffh		; 18h Data segment, use16, read/write,
    305 		dd 00009300h		; present, dpl 0, cover 64K
    306 	desc CS32
    307 		dd 0000ffffh		; 20h Code segment, use32, readable,
    308 		dd 00cf9b00h		; present, dpl 0, cover all 4G
    309 	desc DS32
    310 		dd 0000ffffh		; 28h Data segment, use32, read/write,
    311 		dd 00cf9300h		; present, dpl 0, cover all 4G
    312 
    313 bcopy_gdt_size:	equ $-bcopy_gdt
    314 ;
    315 ; Space for a dummy task state segment.  It should never be actually
    316 ; accessed, but just in case it is, point to a chunk of memory that
    317 ; has a chance to not be used for anything real...
    318 ;
    319 DummyTSS	equ 0x580
    320 
    321 		align 4
    322 RM_IDT_ptr:	dw 0FFFFh		; Length (nonsense, but matches CPU)
    323 		dd 0			; Offset
    324 
    325 bcopyxx_stack	equ 128			; We want this much stack
    326 
    327 		section .rodata
    328 		global __syslinux_shuffler_size
    329 		extern __bcopyxx_len
    330 		align 4
    331 __syslinux_shuffler_size:
    332 		dd __bcopyxx_len
    333 
    334 		bits 16
    335 		section .text16
    336