Home | History | Annotate | Download | only in X64
      1 ;------------------------------------------------------------------------------ ;
      2 ; Copyright (c) 2015 - 2016, Intel Corporation. All rights reserved.<BR>
      3 ; This program and the accompanying materials
      4 ; are licensed and made available under the terms and conditions of the BSD License
      5 ; which accompanies this distribution.  The full text of the license may be found at
      6 ; http://opensource.org/licenses/bsd-license.php.
      7 ;
      8 ; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
      9 ; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
     10 ;
     11 ; Module Name:
     12 ;
     13 ;   MpFuncs.nasm
     14 ;
     15 ; Abstract:
     16 ;
     17 ;   This is the assembly code for MP support
     18 ;
     19 ;-------------------------------------------------------------------------------
     20 
     21 %include "MpEqu.inc"
     22 extern ASM_PFX(InitializeFloatingPointUnits)
     23 
     24 DEFAULT REL
     25 
     26 SECTION .text
     27 
     28 ;-------------------------------------------------------------------------------------
     29 ;RendezvousFunnelProc  procedure follows. All APs execute their procedure. This
     30 ;procedure serializes all the AP processors through an Init sequence. It must be
     31 ;noted that APs arrive here very raw...ie: real mode, no stack.
     32 ;ALSO THIS PROCEDURE IS EXECUTED BY APs ONLY ON 16 BIT MODE. HENCE THIS PROC
     33 ;IS IN MACHINE CODE.
     34 ;-------------------------------------------------------------------------------------
     35 global ASM_PFX(RendezvousFunnelProc)
     36 ASM_PFX(RendezvousFunnelProc):
     37 RendezvousFunnelProcStart:
     38 ; At this point CS = 0x(vv00) and ip= 0x0.
     39 ; Save BIST information to ebp firstly
     40 
     41 BITS 16
     42     mov        ebp, eax                        ; Save BIST information
     43 
     44     mov        ax, cs
     45     mov        ds, ax
     46     mov        es, ax
     47     mov        ss, ax
     48     xor        ax, ax
     49     mov        fs, ax
     50     mov        gs, ax
     51 
     52     mov        si,  BufferStartLocation
     53     mov        ebx, [si]
     54 
     55     mov        di,  ModeOffsetLocation
     56     mov        eax, [di]
     57     mov        di,  CodeSegmentLocation
     58     mov        edx, [di]
     59     mov        di,  ax
     60     sub        di,  02h
     61     mov        [di],dx                         ; Patch long mode CS
     62     sub        di,  04h
     63     add        eax, ebx
     64     mov        [di],eax                        ; Patch address
     65 
     66     mov        si, GdtrLocation
     67 o32 lgdt       [cs:si]
     68 
     69     mov        si, IdtrLocation
     70 o32 lidt       [cs:si]
     71 
     72     mov        si, EnableExecuteDisableLocation
     73     cmp        byte [si], 0
     74     jz         SkipEnableExecuteDisableBit
     75 
     76     ;
     77     ; Enable execute disable bit
     78     ;
     79     mov        ecx, 0c0000080h             ; EFER MSR number
     80     rdmsr                                  ; Read EFER
     81     bts        eax, 11                     ; Enable Execute Disable Bit
     82     wrmsr                                  ; Write EFER
     83 
     84 SkipEnableExecuteDisableBit:
     85 
     86     mov        di,  DataSegmentLocation
     87     mov        edi, [di]                   ; Save long mode DS in edi
     88 
     89     mov        si, Cr3Location             ; Save CR3 in ecx
     90     mov        ecx, [si]
     91 
     92     xor        ax,  ax
     93     mov        ds,  ax                     ; Clear data segment
     94 
     95     mov        eax, cr0                    ; Get control register 0
     96     or         eax, 000000003h             ; Set PE bit (bit #0) & MP
     97     mov        cr0, eax
     98 
     99     mov        eax, cr4
    100     bts        eax, 5
    101     mov        cr4, eax
    102 
    103     mov        cr3, ecx                    ; Load CR3
    104 
    105     mov        ecx, 0c0000080h             ; EFER MSR number
    106     rdmsr                                  ; Read EFER
    107     bts        eax, 8                      ; Set LME=1
    108     wrmsr                                  ; Write EFER
    109 
    110     mov        eax, cr0                    ; Read CR0
    111     bts        eax, 31                     ; Set PG=1
    112     mov        cr0, eax                    ; Write CR0
    113 
    114     jmp        0:strict dword 0  ; far jump to long mode
    115 BITS 64
    116 LongModeStart:
    117     mov        eax, edi
    118     mov        ds,  ax
    119     mov        es,  ax
    120     mov        ss,  ax
    121 
    122     mov        esi, ebx
    123     lea        edi, [esi + InitFlagLocation]
    124     cmp        qword [edi], 1       ; ApInitConfig
    125     jnz        GetApicId
    126 
    127     ; AP init
    128     mov        edi, esi
    129     add        edi, LockLocation
    130     mov        rax, NotVacantFlag
    131 
    132 TestLock:
    133     xchg       qword [edi], rax
    134     cmp        rax, NotVacantFlag
    135     jz         TestLock
    136 
    137     lea        ecx, [esi + NumApsExecutingLocation]
    138     inc        dword [ecx]
    139     mov        ebx, [ecx]
    140 
    141 Releaselock:
    142     mov        rax, VacantFlag
    143     xchg       qword [edi], rax
    144     ; program stack
    145     mov        edi, esi
    146     add        edi, StackSizeLocation
    147     mov        eax, dword [edi]
    148     mov        ecx, ebx
    149     inc        ecx
    150     mul        ecx                               ; EAX = StackSize * (CpuNumber + 1)
    151     mov        edi, esi
    152     add        edi, StackStartAddressLocation
    153     add        rax, qword [edi]
    154     mov        rsp, rax
    155     jmp        CProcedureInvoke
    156 
    157 GetApicId:
    158     mov        eax, 0
    159     cpuid
    160     cmp        eax, 0bh
    161     jb         NoX2Apic             ; CPUID level below CPUID_EXTENDED_TOPOLOGY
    162 
    163     mov        eax, 0bh
    164     xor        ecx, ecx
    165     cpuid
    166     test       ebx, 0ffffh
    167     jz         NoX2Apic             ; CPUID.0BH:EBX[15:0] is zero
    168 
    169     ; Processor is x2APIC capable; 32-bit x2APIC ID is already in EDX
    170     jmp        GetProcessorNumber
    171 
    172 NoX2Apic:
    173     ; Processor is not x2APIC capable, so get 8-bit APIC ID
    174     mov        eax, 1
    175     cpuid
    176     shr        ebx, 24
    177     mov        edx, ebx
    178 
    179 GetProcessorNumber:
    180     ;
    181     ; Get processor number for this AP
    182     ; Note that BSP may become an AP due to SwitchBsp()
    183     ;
    184     xor         ebx, ebx
    185     lea         eax, [esi + CpuInfoLocation]
    186     mov         edi, [eax]
    187 
    188 GetNextProcNumber:
    189     cmp         dword [edi], edx                      ; APIC ID match?
    190     jz          ProgramStack
    191     add         edi, 20
    192     inc         ebx
    193     jmp         GetNextProcNumber    
    194 
    195 ProgramStack:
    196     mov         rsp, qword [edi + 12]
    197 
    198 CProcedureInvoke:
    199     push       rbp               ; Push BIST data at top of AP stack
    200     xor        rbp, rbp          ; Clear ebp for call stack trace
    201     push       rbp
    202     mov        rbp, rsp
    203 
    204     mov        rax, ASM_PFX(InitializeFloatingPointUnits)
    205     sub        rsp, 20h
    206     call       rax               ; Call assembly function to initialize FPU per UEFI spec
    207     add        rsp, 20h
    208 
    209     mov        edx, ebx          ; edx is NumApsExecuting
    210     mov        ecx, esi
    211     add        ecx, LockLocation ; rcx is address of exchange info data buffer
    212 
    213     mov        edi, esi
    214     add        edi, ApProcedureLocation
    215     mov        rax, qword [edi]
    216 
    217     sub        rsp, 20h
    218     call       rax               ; Invoke C function
    219     add        rsp, 20h
    220     jmp        $                 ; Should never reach here
    221 
    222 RendezvousFunnelProcEnd:
    223 
    224 ;-------------------------------------------------------------------------------------
    225 ;  AsmRelocateApLoop (MwaitSupport, ApTargetCState, PmCodeSegment, TopOfApStack, CountTofinish);
    226 ;-------------------------------------------------------------------------------------
    227 global ASM_PFX(AsmRelocateApLoop)
    228 ASM_PFX(AsmRelocateApLoop):
    229 AsmRelocateApLoopStart:
    230     mov        rax, [rsp + 40]   ; CountTofinish
    231     lock dec   dword [rax]       ; (*CountTofinish)--
    232     mov        rsp, r9
    233     push       rcx
    234     push       rdx
    235 
    236     lea        rsi, [PmEntry]    ; rsi <- The start address of transition code
    237 
    238     push       r8
    239     push       rsi
    240     DB         0x48
    241     retf
    242 BITS 32
    243 PmEntry:
    244     mov        eax, cr0
    245     btr        eax, 31           ; Clear CR0.PG
    246     mov        cr0, eax          ; Disable paging and caches
    247 
    248     mov        ebx, edx          ; Save EntryPoint to rbx, for rdmsr will overwrite rdx
    249     mov        ecx, 0xc0000080
    250     rdmsr
    251     and        ah, ~ 1           ; Clear LME
    252     wrmsr
    253     mov        eax, cr4
    254     and        al, ~ (1 << 5)    ; Clear PAE
    255     mov        cr4, eax
    256 
    257     pop        edx
    258     add        esp, 4
    259     pop        ecx,
    260     add        esp, 4
    261     cmp        cl, 1              ; Check mwait-monitor support
    262     jnz        HltLoop
    263     mov        ebx, edx           ; Save C-State to ebx
    264 MwaitLoop:
    265     mov        eax, esp           ; Set Monitor Address
    266     xor        ecx, ecx           ; ecx = 0
    267     xor        edx, edx           ; edx = 0
    268     monitor
    269     mov        eax, ebx           ; Mwait Cx, Target C-State per eax[7:4]
    270     shl        eax, 4
    271     mwait
    272     jmp        MwaitLoop
    273 HltLoop:
    274     cli
    275     hlt
    276     jmp        HltLoop
    277 BITS 64
    278 AsmRelocateApLoopEnd:
    279 
    280 ;-------------------------------------------------------------------------------------
    281 ;  AsmGetAddressMap (&AddressMap);
    282 ;-------------------------------------------------------------------------------------
    283 global ASM_PFX(AsmGetAddressMap)
    284 ASM_PFX(AsmGetAddressMap):
    285     mov        rax, ASM_PFX(RendezvousFunnelProc)
    286     mov        qword [rcx], rax
    287     mov        qword [rcx +  8h], LongModeStart - RendezvousFunnelProcStart
    288     mov        qword [rcx + 10h], RendezvousFunnelProcEnd - RendezvousFunnelProcStart
    289     mov        rax, ASM_PFX(AsmRelocateApLoop)
    290     mov        qword [rcx + 18h], rax
    291     mov        qword [rcx + 20h], AsmRelocateApLoopEnd - AsmRelocateApLoopStart
    292     ret
    293 
    294 ;-------------------------------------------------------------------------------------
    295 ;AsmExchangeRole procedure follows. This procedure executed by current BSP, that is
    296 ;about to become an AP. It switches its stack with the current AP.
    297 ;AsmExchangeRole (IN   CPU_EXCHANGE_INFO    *MyInfo, IN   CPU_EXCHANGE_INFO    *OthersInfo);
    298 ;-------------------------------------------------------------------------------------
    299 global ASM_PFX(AsmExchangeRole)
    300 ASM_PFX(AsmExchangeRole):
    301     ; DO NOT call other functions in this function, since 2 CPU may use 1 stack
    302     ; at the same time. If 1 CPU try to call a function, stack will be corrupted.
    303 
    304     push       rax
    305     push       rbx
    306     push       rcx
    307     push       rdx
    308     push       rsi
    309     push       rdi
    310     push       rbp
    311     push       r8
    312     push       r9
    313     push       r10
    314     push       r11
    315     push       r12
    316     push       r13
    317     push       r14
    318     push       r15
    319 
    320     mov        rax, cr0
    321     push       rax
    322 
    323     mov        rax, cr4
    324     push       rax
    325 
    326     ; rsi contains MyInfo pointer
    327     mov        rsi, rcx
    328 
    329     ; rdi contains OthersInfo pointer
    330     mov        rdi, rdx
    331 
    332     ;Store EFLAGS, GDTR and IDTR regiter to stack
    333     pushfq
    334     sgdt       [rsi + 16]
    335     sidt       [rsi + 26]
    336 
    337     ; Store the its StackPointer
    338     mov        [rsi + 8], rsp
    339 
    340     ; update its switch state to STORED
    341     mov        byte [rsi], CPU_SWITCH_STATE_STORED
    342 
    343 WaitForOtherStored:
    344     ; wait until the other CPU finish storing its state
    345     cmp        byte [rdi], CPU_SWITCH_STATE_STORED
    346     jz         OtherStored
    347     pause
    348     jmp        WaitForOtherStored
    349 
    350 OtherStored:
    351     ; Since another CPU already stored its state, load them
    352     ; load GDTR value
    353     lgdt       [rdi + 16]
    354 
    355     ; load IDTR value
    356     lidt       [rdi + 26]
    357 
    358     ; load its future StackPointer
    359     mov        rsp, [rdi + 8]
    360 
    361     ; update the other CPU's switch state to LOADED
    362     mov        byte [rdi], CPU_SWITCH_STATE_LOADED
    363 
    364 WaitForOtherLoaded:
    365     ; wait until the other CPU finish loading new state,
    366     ; otherwise the data in stack may corrupt
    367     cmp        byte [rsi], CPU_SWITCH_STATE_LOADED
    368     jz         OtherLoaded
    369     pause
    370     jmp        WaitForOtherLoaded
    371 
    372 OtherLoaded:
    373     ; since the other CPU already get the data it want, leave this procedure
    374     popfq
    375 
    376     pop        rax
    377     mov        cr4, rax
    378 
    379     pop        rax
    380     mov        cr0, rax
    381 
    382     pop        r15
    383     pop        r14
    384     pop        r13
    385     pop        r12
    386     pop        r11
    387     pop        r10
    388     pop        r9
    389     pop        r8
    390     pop        rbp
    391     pop        rdi
    392     pop        rsi
    393     pop        rdx
    394     pop        rcx
    395     pop        rbx
    396     pop        rax
    397 
    398     ret
    399