1 ;; -*- fundamental -*- 2 ;; ----------------------------------------------------------------------- 3 ;; 4 ;; Copyright 1994-2008 H. Peter Anvin - All Rights Reserved 5 ;; Copyright 2009 Intel Corporation; author: H. Peter Anvin 6 ;; 7 ;; This program is free software; you can redistribute it and/or modify 8 ;; it under the terms of the GNU General Public License as published by 9 ;; the Free Software Foundation, Inc., 53 Temple Place Ste 330, 10 ;; Boston MA 02111-1307, USA; either version 2 of the License, or 11 ;; (at your option) any later version; incorporated herein by reference. 12 ;; 13 ;; ----------------------------------------------------------------------- 14 15 ;; 16 ;; init16.asm 17 ;; 18 ;; Routine to initialize and to trampoline into 32-bit 19 ;; protected memory. This code is derived from bcopy32.inc and 20 ;; com32.inc in the main SYSLINUX distribution. 21 ;; 22 23 %include '../version.gen' 24 25 MY_CS equ 0x0800 ; Segment address to use 26 CS_BASE equ (MY_CS << 4) ; Corresponding address 27 28 ; Low memory bounce buffer 29 BOUNCE_SEG equ (MY_CS+0x1000) 30 31 %define DO_WBINVD 0 32 33 section .rodata align=16 34 section .data align=16 35 section .bss align=16 36 section .stack align=16 nobits 37 stack resb 512 38 stack_end equ $ 39 40 ;; ----------------------------------------------------------------------- 41 ;; Kernel image header 42 ;; ----------------------------------------------------------------------- 43 44 section .text ; Must be first in image 45 bits 16 46 47 cmdline times 497 db 0 ; We put the command line here 48 setup_sects db 0 49 root_flags dw 0 50 syssize dw 0 51 swap_dev dw 0 52 ram_size dw 0 53 vid_mode dw 0 54 root_dev dw 0 55 boot_flag dw 0xAA55 56 57 _start: jmp short start 58 59 db "HdrS" ; Header signature 60 dw 0x0203 ; Header version number 61 62 realmode_swtch dw 0, 0 ; default_switch, SETUPSEG 63 start_sys_seg dw 0x1000 ; obsolete 64 version_ptr dw memdisk_version-0x200 ; version string ptr 65 type_of_loader db 0 ; Filled in by boot loader 66 loadflags db 1 ; Please load high 67 setup_move_size dw 0 ; Unused 68 code32_start dd 0x100000 ; 32-bit start address 69 ramdisk_image dd 0 ; Loaded ramdisk image address 70 ramdisk_size dd 0 ; Size of loaded ramdisk 71 bootsect_kludge dw 0, 0 72 heap_end_ptr dw 0 73 pad1 dw 0 74 cmd_line_ptr dd 0 ; Command line 75 ramdisk_max dd 0xffffffff ; Highest allowed ramdisk address 76 77 ; 78 ; These fields aren't real setup fields, they're poked in by the 79 ; 32-bit code. 80 ; 81 b_esdi dd 0 ; ES:DI for boot sector invocation 82 b_edx dd 0 ; EDX for boot sector invocation 83 b_sssp dd 0 ; SS:SP on boot sector invocation 84 b_csip dd 0 ; CS:IP on boot sector invocation 85 86 section .rodata 87 memdisk_version: 88 db "MEMDISK ", VERSION_STR, " ", DATE, 0 89 90 ;; ----------------------------------------------------------------------- 91 ;; End kernel image header 92 ;; ----------------------------------------------------------------------- 93 94 ; 95 ; Move ourselves down into memory to reduce the risk of conflicts; 96 ; then canonicalize CS to match the other segments. 97 ; 98 section .text 99 bits 16 100 start: 101 mov ax,MY_CS 102 mov es,ax 103 movzx cx,byte [setup_sects] 104 inc cx ; Add one for the boot sector 105 shl cx,7 ; Convert to dwords 106 xor si,si 107 xor di,di 108 mov fs,si ; fs <- 0 109 cld 110 rep movsd 111 mov ds,ax 112 mov ss,ax 113 mov esp,stack_end 114 jmp MY_CS:.next 115 .next: 116 117 ; 118 ; Copy the command line, if there is one 119 ; 120 copy_cmdline: 121 xor di,di ; Bottom of our own segment (= "boot sector") 122 mov eax,[cmd_line_ptr] 123 and eax,eax 124 jz .endcmd ; No command line 125 mov si,ax 126 shr eax,4 ; Convert to segment 127 and si,0x000F ; Starting offset only 128 mov gs,ax 129 mov cx,496 ; Max number of bytes 130 .copycmd: 131 gs lodsb 132 and al,al 133 jz .endcmd 134 stosb 135 loop .copycmd 136 .endcmd: 137 xor al,al 138 stosb 139 140 ; 141 ; Now jump to 32-bit code 142 ; 143 sti 144 call init32 145 ; 146 ; When init32 returns, we have been set up, the new boot sector loaded, 147 ; and we should go and and run the newly loaded boot sector. 148 ; 149 ; The setup function will have poked values into the setup area. 150 ; 151 movzx edi,word [cs:b_esdi] 152 mov es,word [cs:b_esdi+2] 153 mov edx,[cs:b_edx] 154 155 cli 156 xor esi,esi ; No partition table involved 157 mov ds,si ; Make all the segments consistent 158 mov fs,si 159 mov gs,si 160 lss sp,[cs:b_sssp] 161 movzx esp,sp 162 jmp far [cs:b_csip] 163 164 ; 165 ; We enter protected mode, set up a flat 32-bit environment, run rep movsd 166 ; and then exit. IMPORTANT: This code assumes cs == MY_CS. 167 ; 168 ; This code is probably excessively anal-retentive in its handling of 169 ; segments, but this stuff is painful enough as it is without having to rely 170 ; on everything happening "as it ought to." 171 ; 172 DummyTSS equ 0x580 ; Hopefully safe place in low mmoery 173 174 section .data 175 176 ; desc base, limit, flags 177 %macro desc 3 178 dd (%2 & 0xffff) | ((%1 & 0xffff) << 16) 179 dd (%1 & 0xff000000) | (%2 & 0xf0000) | ((%3 & 0xf0ff) << 8) | ((%1 & 0x00ff0000) >> 16) 180 %endmacro 181 182 align 8, db 0 183 call32_gdt: dw call32_gdt_size-1 ; Null descriptor - contains GDT 184 .adj1: dd call32_gdt+CS_BASE ; pointer for LGDT instruction 185 dw 0 186 187 ; 0008: Dummy TSS to make Intel VT happy 188 ; Should never be actually accessed... 189 desc DummyTSS, 103, 0x8089 190 191 ; 0010: Code segment, use16, readable, dpl 0, base CS_BASE, 64K 192 desc CS_BASE, 0xffff, 0x009b 193 194 ; 0018: Data segment, use16, read/write, dpl 0, base CS_BASE, 64K 195 desc CS_BASE, 0xffff, 0x0093 196 197 ; 0020: Code segment, use32, read/write, dpl 0, base 0, 4G 198 desc 0, 0xfffff, 0xc09b 199 200 ; 0028: Data segment, use32, read/write, dpl 0, base 0, 4G 201 desc 0, 0xfffff, 0xc093 202 203 call32_gdt_size: equ $-call32_gdt 204 205 err_a20: db 'ERROR: A20 gate not responding!',13,10,0 206 207 section .bss 208 alignb 4 209 Return resd 1 ; Return value 210 SavedSP resw 1 ; Place to save SP 211 A20Tries resb 1 212 213 section .data 214 align 4, db 0 215 Target dd 0 ; Target address 216 Target_Seg dw 20h ; Target CS 217 218 A20Type dw 0 ; Default = unknown 219 220 section .text 221 bits 16 222 ; 223 ; Routines to enable and disable (yuck) A20. These routines are gathered 224 ; from tips from a couple of sources, including the Linux kernel and 225 ; http://www.x86.org/. The need for the delay to be as large as given here 226 ; is indicated by Donnie Barnes of RedHat, the problematic system being an 227 ; IBM ThinkPad 760EL. 228 ; 229 ; We typically toggle A20 twice for every 64K transferred. 230 ; 231 %define io_delay call _io_delay 232 %define IO_DELAY_PORT 80h ; Invalid port (we hope!) 233 %define disable_wait 32 ; How long to wait for a disable 234 235 %define A20_DUNNO 0 ; A20 type unknown 236 %define A20_NONE 1 ; A20 always on? 237 %define A20_BIOS 2 ; A20 BIOS enable 238 %define A20_KBC 3 ; A20 through KBC 239 %define A20_FAST 4 ; A20 through port 92h 240 241 align 2, db 0 242 A20List dw a20_dunno, a20_none, a20_bios, a20_kbc, a20_fast 243 A20DList dw a20d_dunno, a20d_none, a20d_bios, a20d_kbc, a20d_fast 244 a20_adjust_cnt equ ($-A20List)/2 245 246 slow_out: out dx, al ; Fall through 247 248 _io_delay: out IO_DELAY_PORT,al 249 out IO_DELAY_PORT,al 250 ret 251 252 enable_a20: 253 pushad 254 mov byte [A20Tries],255 ; Times to try to make this work 255 256 try_enable_a20: 257 258 ; 259 ; Flush the caches 260 ; 261 %if DO_WBINVD 262 call try_wbinvd 263 %endif 264 265 ; 266 ; If the A20 type is known, jump straight to type 267 ; 268 mov bp,[A20Type] 269 add bp,bp ; Convert to word offset 270 .adj4: jmp word [bp+A20List] 271 272 ; 273 ; First, see if we are on a system with no A20 gate 274 ; 275 a20_dunno: 276 a20_none: 277 mov byte [A20Type], A20_NONE 278 call a20_test 279 jnz a20_done 280 281 ; 282 ; Next, try the BIOS (INT 15h AX=2401h) 283 ; 284 a20_bios: 285 mov byte [A20Type], A20_BIOS 286 mov ax,2401h 287 pushf ; Some BIOSes muck with IF 288 int 15h 289 popf 290 291 call a20_test 292 jnz a20_done 293 294 ; 295 ; Enable the keyboard controller A20 gate 296 ; 297 a20_kbc: 298 mov dl, 1 ; Allow early exit 299 call empty_8042 300 jnz a20_done ; A20 live, no need to use KBC 301 302 mov byte [A20Type], A20_KBC ; Starting KBC command sequence 303 304 mov al,0D1h ; Write output port 305 out 064h, al 306 call empty_8042_uncond 307 308 mov al,0DFh ; A20 on 309 out 060h, al 310 call empty_8042_uncond 311 312 ; Apparently the UHCI spec assumes that A20 toggle 313 ; ends with a null command (assumed to be for sychronization?) 314 ; Put it here to see if it helps anything... 315 mov al,0FFh ; Null command 316 out 064h, al 317 call empty_8042_uncond 318 319 ; Verify that A20 actually is enabled. Do that by 320 ; observing a word in low memory and the same word in 321 ; the HMA until they are no longer coherent. Note that 322 ; we don't do the same check in the disable case, because 323 ; we don't want to *require* A20 masking (SYSLINUX should 324 ; work fine without it, if the BIOS does.) 325 .kbc_wait: push cx 326 xor cx,cx 327 .kbc_wait_loop: 328 call a20_test 329 jnz a20_done_pop 330 loop .kbc_wait_loop 331 332 pop cx 333 ; 334 ; Running out of options here. Final attempt: enable the "fast A20 gate" 335 ; 336 a20_fast: 337 mov byte [A20Type], A20_FAST ; Haven't used the KBC yet 338 in al, 092h 339 or al,02h 340 and al,~01h ; Don't accidentally reset the machine! 341 out 092h, al 342 343 .fast_wait: push cx 344 xor cx,cx 345 .fast_wait_loop: 346 call a20_test 347 jnz a20_done_pop 348 loop .fast_wait_loop 349 350 pop cx 351 352 ; 353 ; Oh bugger. A20 is not responding. Try frobbing it again; eventually give up 354 ; and report failure to the user. 355 ; 356 357 dec byte [A20Tries] 358 jnz try_enable_a20 359 360 361 ; Error message time 362 mov si,err_a20 363 print_err: 364 lodsb 365 and al,al 366 jz die 367 mov bx,7 368 mov ah,0xe 369 int 10h 370 jmp print_err 371 372 373 die: 374 sti 375 .hlt: hlt 376 jmp short .hlt 377 378 ; 379 ; A20 unmasked, proceed... 380 ; 381 a20_done_pop: pop cx 382 a20_done: popad 383 ret 384 385 ; 386 ; This routine tests if A20 is enabled (ZF = 0). This routine 387 ; must not destroy any register contents. 388 ; 389 390 ; This is the INT 1Fh vector, which is standard PCs is used by the 391 ; BIOS when the screen is in graphics mode. Even if it is, it points to 392 ; data, not code, so it should be safe enough to fiddle with. 393 A20Test equ (1Fh*4) 394 395 a20_test: 396 push ds 397 push es 398 push cx 399 push eax 400 xor ax,ax 401 mov ds,ax ; DS == 0 402 dec ax 403 mov es,ax ; ES == 0FFFFh 404 mov cx,32 ; Loop count 405 mov eax,[A20Test] 406 cmp eax,[es:A20Test+10h] 407 jne .a20_done 408 push eax 409 .a20_wait: 410 inc eax 411 mov [A20Test],eax 412 io_delay 413 cmp eax,[es:A20Test+10h] 414 loopz .a20_wait 415 pop dword [A20Test] ; Restore original value 416 .a20_done: 417 pop eax 418 pop cx 419 pop es 420 pop ds 421 ret 422 423 disable_a20: 424 pushad 425 ; 426 ; Flush the caches 427 ; 428 %if DO_WBINVD 429 call try_wbinvd 430 %endif 431 432 mov bp,[A20Type] 433 add bp,bp ; Convert to word offset 434 .adj5: jmp word [bp+A20DList] 435 436 a20d_bios: 437 mov ax,2400h 438 pushf ; Some BIOSes muck with IF 439 int 15h 440 popf 441 jmp short a20d_snooze 442 443 ; 444 ; Disable the "fast A20 gate" 445 ; 446 a20d_fast: 447 in al, 092h 448 and al,~03h 449 out 092h, al 450 jmp short a20d_snooze 451 452 ; 453 ; Disable the keyboard controller A20 gate 454 ; 455 a20d_kbc: 456 call empty_8042_uncond 457 458 mov al,0D1h 459 out 064h, al ; Write output port 460 call empty_8042_uncond 461 462 mov al,0DDh ; A20 off 463 out 060h, al 464 call empty_8042_uncond 465 466 mov al,0FFh ; Null command/synchronization 467 out 064h, al 468 call empty_8042_uncond 469 470 ; Wait a bit for it to take effect 471 a20d_snooze: 472 push cx 473 mov cx, disable_wait 474 .delayloop: call a20_test 475 jz .disabled 476 loop .delayloop 477 .disabled: pop cx 478 a20d_dunno: 479 a20d_none: 480 popad 481 ret 482 483 ; 484 ; Routine to empty the 8042 KBC controller. If dl != 0 485 ; then we will test A20 in the loop and exit if A20 is 486 ; suddenly enabled. 487 ; 488 empty_8042_uncond: 489 xor dl,dl 490 empty_8042: 491 call a20_test 492 jz .a20_on 493 and dl,dl 494 jnz .done 495 .a20_on: io_delay 496 in al, 064h ; Status port 497 test al,1 498 jz .no_output 499 io_delay 500 in al, 060h ; Read input 501 jmp short empty_8042 502 .no_output: 503 test al,2 504 jnz empty_8042 505 io_delay 506 .done: ret 507 508 ; 509 ; Execute a WBINVD instruction if possible on this CPU 510 ; 511 %if DO_WBINVD 512 try_wbinvd: 513 wbinvd 514 ret 515 %endif 516 517 section .bss 518 alignb 4 519 PMESP resd 1 ; Protected mode %esp 520 521 section .idt nobits align=4096 522 alignb 4096 523 pm_idt resb 4096 ; Protected-mode IDT, followed by interrupt stubs 524 525 526 527 528 pm_entry: equ 0x100000 529 530 section .rodata 531 align 2, db 0 532 call32_rmidt: 533 dw 0ffffh ; Limit 534 dd 0 ; Address 535 536 section .data 537 alignb 2 538 call32_pmidt: 539 dw 8*256 ; Limit 540 dd 0 ; Address (entered later) 541 542 section .text 543 ; 544 ; This is the main entrypoint in this function 545 ; 546 init32: 547 mov bx,call32_call_start ; Where to go in PM 548 549 ; 550 ; Enter protected mode. BX contains the entry point relative to the 551 ; real-mode CS. 552 ; 553 call32_enter_pm: 554 mov ax,cs 555 mov ds,ax 556 movzx ebp,ax 557 shl ebp,4 ; EBP <- CS_BASE 558 movzx ebx,bx 559 add ebx,ebp ; entry point += CS_BASE 560 cli 561 mov [SavedSP],sp 562 cld 563 call enable_a20 564 mov byte [call32_gdt+8+5],89h ; Mark TSS unbusy 565 o32 lgdt [call32_gdt] ; Set up GDT 566 o32 lidt [call32_pmidt] ; Set up IDT 567 mov eax,cr0 568 or al,1 569 mov cr0,eax ; Enter protected mode 570 jmp 20h:strict dword .in_pm+CS_BASE 571 .pm_jmp equ $-6 572 573 574 bits 32 575 .in_pm: 576 xor eax,eax ; Available for future use... 577 mov fs,eax 578 mov gs,eax 579 lldt ax 580 581 mov al,28h ; Set up data segments 582 mov es,eax 583 mov ds,eax 584 mov ss,eax 585 586 mov al,08h 587 ltr ax 588 589 mov esp,[ebp+PMESP] ; Load protmode %esp if available 590 jmp ebx ; Go to where we need to go 591 592 ; 593 ; This is invoked before first dispatch of the 32-bit code, in 32-bit mode 594 ; 595 call32_call_start: 596 ; 597 ; Set up a temporary stack in the bounce buffer; 598 ; start32.S will override this to point us to the real 599 ; high-memory stack. 600 ; 601 mov esp, (BOUNCE_SEG << 4) + 0x10000 602 603 push dword call32_enter_rm.rm_jmp+CS_BASE 604 push dword call32_enter_pm.pm_jmp+CS_BASE 605 push dword stack_end ; RM size 606 push dword call32_gdt+CS_BASE 607 push dword call32_handle_interrupt+CS_BASE 608 push dword CS_BASE ; Segment base 609 push dword (BOUNCE_SEG << 4) ; Bounce buffer address 610 push dword call32_syscall+CS_BASE ; Syscall entry point 611 612 call pm_entry-CS_BASE ; Run the program... 613 614 ; ... fall through to call32_exit ... 615 616 call32_exit: 617 mov bx,call32_done ; Return to command loop 618 619 call32_enter_rm: 620 ; Careful here... the PM code may have relocated the 621 ; entire RM code, so we need to figure out exactly 622 ; where we are executing from. If the PM code has 623 ; relocated us, it *will* have adjusted the GDT to 624 ; match, though. 625 call .here 626 .here: pop ebp 627 sub ebp,.here 628 o32 sidt [ebp+call32_pmidt] 629 cli 630 cld 631 mov [ebp+PMESP],esp ; Save exit %esp 632 xor esp,esp ; Make sure the high bits are zero 633 jmp 10h:.in_pm16 ; Return to 16-bit mode first 634 635 bits 16 636 .in_pm16: 637 mov ax,18h ; Real-mode-like segment 638 mov es,ax 639 mov ds,ax 640 mov ss,ax 641 mov fs,ax 642 mov gs,ax 643 644 lidt [call32_rmidt] ; Real-mode IDT (rm needs no GDT) 645 mov eax,cr0 646 and al,~1 647 mov cr0,eax 648 jmp MY_CS:.in_rm 649 .rm_jmp equ $-2 650 651 .in_rm: ; Back in real mode 652 mov ax,cs 653 mov ds,ax 654 mov es,ax 655 mov fs,ax 656 mov gs,ax 657 mov ss,ax 658 mov sp,[SavedSP] ; Restore stack 659 jmp bx ; Go to whereever we need to go... 660 661 call32_done: 662 call disable_a20 663 sti 664 ret 665 666 ; 667 ; 16-bit support code 668 ; 669 bits 16 670 671 ; 672 ; 16-bit interrupt-handling code 673 ; 674 call32_int_rm: 675 pushf ; Flags on stack 676 push cs ; Return segment 677 push word .cont ; Return address 678 push dword edx ; Segment:offset of IVT entry 679 retf ; Invoke IVT routine 680 .cont: ; ... on resume ... 681 mov bx,call32_int_resume 682 jmp call32_enter_pm ; Go back to PM 683 684 ; 685 ; 16-bit system call handling code 686 ; 687 call32_sys_rm: 688 pop gs 689 pop fs 690 pop es 691 pop ds 692 popad 693 popfd 694 retf ; Invoke routine 695 .return: 696 pushfd 697 pushad 698 push ds 699 push es 700 push fs 701 push gs 702 mov bx,call32_sys_resume 703 jmp call32_enter_pm 704 705 ; 706 ; 32-bit support code 707 ; 708 bits 32 709 710 ; 711 ; This is invoked on getting an interrupt in protected mode. At 712 ; this point, we need to context-switch to real mode and invoke 713 ; the interrupt routine. 714 ; 715 ; When this gets invoked, the registers are saved on the stack and 716 ; AL contains the register number. 717 ; 718 call32_handle_interrupt: 719 movzx eax,al 720 xor ebx,ebx ; Actually makes the code smaller 721 mov edx,[ebx+eax*4] ; Get the segment:offset of the routine 722 mov bx,call32_int_rm 723 jmp call32_enter_rm ; Go to real mode 724 725 call32_int_resume: 726 popad 727 iret 728 729 ; 730 ; Syscall invocation. We manifest a structure on the real-mode stack, 731 ; containing the call32sys_t structure from <call32.h> as well as 732 ; the following entries (from low to high address): 733 ; - Target offset 734 ; - Target segment 735 ; - Return offset 736 ; - Return segment (== real mode cs) 737 ; - Return flags 738 ; 739 call32_syscall: 740 pushfd ; Save IF among other things... 741 pushad ; We only need to save some, but... 742 cld 743 call .here 744 .here: pop ebp 745 sub ebp,.here 746 747 movzx edi,word [ebp+SavedSP] 748 sub edi,54 ; Allocate 54 bytes 749 mov [ebp+SavedSP],di 750 add edi,ebp ; Create linear address 751 752 mov esi,[esp+11*4] ; Source regs 753 xor ecx,ecx 754 mov cl,11 ; 44 bytes to copy 755 rep movsd 756 757 movzx eax,byte [esp+10*4] ; Interrupt number 758 ; ecx == 0 here; adding it to the EA makes the 759 ; encoding smaller 760 mov eax,[ecx+eax*4] ; Get IVT entry 761 stosd ; Save in stack frame 762 mov ax,call32_sys_rm.return ; Return offset 763 stosw ; Save in stack frame 764 mov eax,ebp 765 shr eax,4 ; Return segment 766 stosw ; Save in stack frame 767 mov eax,[edi-12] ; Return flags 768 and eax,0x200cd7 ; Mask (potentially) unsafe flags 769 mov [edi-12],eax ; Primary flags entry 770 stosw ; Return flags 771 772 mov bx,call32_sys_rm 773 jmp call32_enter_rm ; Go to real mode 774 775 ; On return, the 44-byte return structure is on the 776 ; real-mode stack. call32_enter_pm will leave ebp 777 ; pointing to the real-mode base. 778 call32_sys_resume: 779 movzx esi,word [ebp+SavedSP] 780 mov edi,[esp+12*4] ; Dest regs 781 add esi,ebp ; Create linear address 782 and edi,edi ; NULL pointer? 783 jnz .do_copy 784 .no_copy: mov edi,esi ; Do a dummy copy-to-self 785 .do_copy: xor ecx,ecx 786 mov cl,11 ; 44 bytes 787 rep movsd ; Copy register block 788 789 add word [ebp+SavedSP],44 ; Remove from stack 790 791 popad 792 popfd 793 ret ; Return to 32-bit program 794