1 ;; ----------------------------------------------------------------------- 2 ;; 3 ;; Copyright 1994-2009 H. Peter Anvin - All Rights Reserved 4 ;; Copyright 2009-2010 Intel Corporation; author: H. Peter Anvin 5 ;; 6 ;; This program is free software; you can redistribute it and/or modify 7 ;; it under the terms of the GNU General Public License as published by 8 ;; the Free Software Foundation, Inc., 53 Temple Place Ste 330, 9 ;; Boston MA 02111-1307, USA; either version 2 of the License, or 10 ;; (at your option) any later version; incorporated herein by reference. 11 ;; 12 ;; ----------------------------------------------------------------------- 13 14 ;; 15 ;; bcopy32xx.inc 16 ;; 17 18 19 ; 20 ; 32-bit bcopy routine 21 ; 22 ; This is the actual 32-bit portion of the bcopy and shuffle and boot 23 ; routines. ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the 24 ; sole exception being the actual relocation code at the beginning of 25 ; pm_shuffle_boot. 26 ; 27 ; It also really needs to live all in a single segment, for the 28 ; address calculcations to actually work. 29 ; 30 31 bits 32 32 section .bcopyxx.text 33 align 16 34 ; 35 ; pm_bcopy: 36 ; 37 ; This is the protected-mode core of the "bcopy" routine. 38 ; Try to do aligned transfers; if the src and dst are relatively 39 ; misaligned, align the dst. 40 ; 41 ; ECX is guaranteed to not be zero on entry. 42 ; 43 ; Clobbers ESI, EDI, ECX. 44 ; 45 46 pm_bcopy: 47 push ebx 48 push edx 49 push eax 50 51 cmp esi,-1 52 je .bzero 53 54 cmp esi,edi ; If source < destination, we might 55 jb .reverse ; have to copy backwards 56 57 .forward: 58 ; Initial alignment 59 mov edx,edi 60 shr edx,1 61 jnc .faa1 62 movsb 63 dec ecx 64 .faa1: 65 mov al,cl 66 cmp ecx,2 67 jb .f_tiny 68 69 shr edx,1 70 jnc .faa2 71 movsw 72 sub ecx,2 73 .faa2: 74 75 ; Bulk transfer 76 mov al,cl ; Save low bits 77 shr ecx,2 ; Convert to dwords 78 rep movsd ; Do our business 79 ; At this point ecx == 0 80 81 test al,2 82 jz .fab2 83 movsw 84 .fab2: 85 .f_tiny: 86 test al,1 87 jz .fab1 88 movsb 89 .fab1: 90 .done: 91 pop eax 92 pop edx 93 pop ebx 94 ret 95 96 .reverse: 97 lea eax,[esi+ecx-1] ; Point to final byte 98 cmp edi,eax 99 ja .forward ; No overlap, do forward copy 100 101 std ; Reverse copy 102 lea edi,[edi+ecx-1] 103 mov esi,eax 104 105 ; Initial alignment 106 mov edx,edi 107 shr edx,1 108 jc .raa1 109 movsb 110 dec ecx 111 .raa1: 112 113 dec esi 114 dec edi 115 mov al,cl 116 cmp ecx,2 117 jb .r_tiny 118 shr edx,1 119 jc .raa2 120 movsw 121 sub ecx,2 122 .raa2: 123 124 ; Bulk copy 125 sub esi,2 126 sub edi,2 127 mov al,cl ; Save low bits 128 shr ecx,2 129 rep movsd 130 131 ; Final alignment 132 .r_final: 133 add esi,2 134 add edi,2 135 test al,2 136 jz .rab2 137 movsw 138 .rab2: 139 .r_tiny: 140 inc esi 141 inc edi 142 test al,1 143 jz .rab1 144 movsb 145 .rab1: 146 cld 147 jmp short .done 148 149 .bzero: 150 xor eax,eax 151 152 ; Initial alignment 153 mov edx,edi 154 shr edx,1 155 jnc .zaa1 156 stosb 157 dec ecx 158 .zaa1: 159 160 mov bl,cl 161 cmp ecx,2 162 jb .z_tiny 163 shr edx,1 164 jnc .zaa2 165 stosw 166 sub ecx,2 167 .zaa2: 168 169 ; Bulk 170 mov bl,cl ; Save low bits 171 shr ecx,2 172 rep stosd 173 174 test bl,2 175 jz .zab2 176 stosw 177 .zab2: 178 .z_tiny: 179 test bl,1 180 jz .zab1 181 stosb 182 .zab1: 183 jmp short .done 184 185 ; 186 ; shuffle_and_boot: 187 ; 188 ; This routine is used to shuffle memory around, followed by 189 ; invoking an entry point somewhere in low memory. This routine 190 ; can clobber any memory outside the bcopy special area. 191 ; 192 ; IMPORTANT: This routine does not set up any registers. 193 ; It is the responsibility of the caller to generate an appropriate entry 194 ; stub; *especially* when going to real mode. 195 ; 196 ; Inputs: 197 ; ESI -> Pointer to list of (dst, src, len) pairs(*) 198 ; EDI -> Pointer to safe area for list + shuffler 199 ; (must not overlap this code nor the RM stack) 200 ; ECX -> Byte count of list area (for initial copy) 201 ; 202 ; If src == -1: then the memory pointed to by (dst, len) is bzeroed; 203 ; this is handled inside the bcopy routine. 204 ; 205 ; If len == 0: this marks the end of the list; dst indicates 206 ; the entry point and src the mode (0 = pm, 1 = rm) 207 ; 208 ; (*) dst, src, and len are four bytes each 209 ; 210 ; do_raw_shuffle_and_boot is the same entry point, but with a C ABI: 211 ; do_raw_shuffle_and_boot(safearea, descriptors, bytecount) 212 ; 213 global do_raw_shuffle_and_boot 214 do_raw_shuffle_and_boot: 215 mov edi,eax 216 mov esi,edx 217 218 pm_shuffle: 219 cli ; End interrupt service (for good) 220 mov ebx,edi ; EBX <- descriptor list 221 lea edx,[edi+ecx+15] ; EDX <- where to relocate our code to 222 and edx,~15 ; Align 16 to benefit the GDT 223 call pm_bcopy 224 mov esi,__bcopyxx_start ; Absolute source address 225 mov edi,edx ; Absolute target address 226 sub edx,esi ; EDX <- address delta 227 mov ecx,__bcopyxx_dwords 228 lea eax,[edx+.safe] ; Resume point 229 ; Relocate this code 230 rep movsd 231 jmp eax ; Jump to safe location 232 .safe: 233 ; Give ourselves a safe stack 234 lea esp,[edx+bcopyxx_stack+__bcopyxx_end] 235 add edx,bcopy_gdt ; EDX <- new GDT 236 mov [edx+2],edx ; GDT self-pointer 237 lgdt [edx] ; Switch to local GDT 238 239 ; Now for the actual shuffling... 240 .loop: 241 mov edi,[ebx] 242 mov esi,[ebx+4] 243 mov ecx,[ebx+8] 244 add ebx,12 245 jecxz .done 246 call pm_bcopy 247 jmp .loop 248 .done: 249 lidt [edx+RM_IDT_ptr-bcopy_gdt] ; RM-like IDT 250 push ecx ; == 0, for cleaning the flags register 251 and esi,esi 252 jz pm_shuffle_16 253 popfd ; Clean the flags 254 jmp edi ; Protected mode entry 255 256 ; We have a 16-bit entry point, so we need to return 257 ; to 16-bit mode. Note: EDX already points to the GDT. 258 pm_shuffle_16: 259 mov eax,edi 260 mov [edx+PM_CS16+2],ax 261 mov [edx+PM_DS16+2],ax 262 shr eax,16 263 mov [edx+PM_CS16+4],al 264 mov [edx+PM_CS16+7],ah 265 mov [edx+PM_DS16+4],al 266 mov [edx+PM_DS16+7],ah 267 mov eax,cr0 268 and al,~1 269 popfd ; Clean the flags 270 ; No flag-changing instructions below... 271 mov dx,PM_DS16 272 mov ds,edx 273 mov es,edx 274 mov fs,edx 275 mov gs,edx 276 mov ss,edx 277 jmp PM_CS16:0 278 279 section .bcopyxx.data 280 281 alignz 16 282 ; GDT descriptor entry 283 %macro desc 1 284 bcopy_gdt.%1: 285 PM_%1 equ bcopy_gdt.%1-bcopy_gdt 286 %endmacro 287 288 bcopy_gdt: 289 dw bcopy_gdt_size-1 ; Null descriptor - contains GDT 290 dd bcopy_gdt ; pointer for LGDT instruction 291 dw 0 292 293 ; TSS segment to keep Intel VT happy. Intel VT is 294 ; unhappy about anything that doesn't smell like a 295 ; full-blown 32-bit OS. 296 desc TSS 297 dw 104-1, DummyTSS ; 08h 32-bit task state segment 298 dd 00008900h ; present, dpl 0, 104 bytes @DummyTSS 299 300 desc CS16 301 dd 0000ffffh ; 10h Code segment, use16, readable, 302 dd 00009b00h ; present, dpl 0, cover 64K 303 desc DS16 304 dd 0000ffffh ; 18h Data segment, use16, read/write, 305 dd 00009300h ; present, dpl 0, cover 64K 306 desc CS32 307 dd 0000ffffh ; 20h Code segment, use32, readable, 308 dd 00cf9b00h ; present, dpl 0, cover all 4G 309 desc DS32 310 dd 0000ffffh ; 28h Data segment, use32, read/write, 311 dd 00cf9300h ; present, dpl 0, cover all 4G 312 313 bcopy_gdt_size: equ $-bcopy_gdt 314 ; 315 ; Space for a dummy task state segment. It should never be actually 316 ; accessed, but just in case it is, point to a chunk of memory that 317 ; has a chance to not be used for anything real... 318 ; 319 DummyTSS equ 0x580 320 321 align 4 322 RM_IDT_ptr: dw 0FFFFh ; Length (nonsense, but matches CPU) 323 dd 0 ; Offset 324 325 bcopyxx_stack equ 128 ; We want this much stack 326 327 section .rodata 328 global __syslinux_shuffler_size 329 extern __bcopyxx_len 330 align 4 331 __syslinux_shuffler_size: 332 dd __bcopyxx_len 333 334 bits 16 335 section .text16 336