Home | History | Annotate | Download | only in Ipf
      1 /// @file
      2 ///  Low level IPF routines used by the debug support driver
      3 ///
      4 /// Copyright (c) 2006 - 2008, Intel Corporation. All rights reserved.<BR>
      5 /// This program and the accompanying materials
      6 /// are licensed and made available under the terms and conditions of the BSD License
      7 /// which accompanies this distribution.  The full text of the license may be found at
      8 /// http://opensource.org/licenses/bsd-license.php
      9 ///
     10 /// THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
     11 /// WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
     12 ///
     13 ///
     14 
     15 
     16 #include "Common.i"
     17 #include "Ds64Macros.i"
     18 
     19 ASM_GLOBAL PatchSaveBuffer
     20 ASM_GLOBAL IpfContextBuf
     21 ASM_GLOBAL CommonHandler
     22 ASM_GLOBAL ExternalInterruptCount
     23 
     24 
     25 /////////////////////////////////////////////
     26 //
     27 //  Name:
     28 //      InstructionCacheFlush
     29 //
     30 //  Description:
     31 //      Flushes instruction cache for specified number of bytes
     32 //
     33         ASM_GLOBAL InstructionCacheFlush
     34         .proc   InstructionCacheFlush
     35         .align 32
     36 InstructionCacheFlush::
     37  {      .mii
     38         alloc   r3=2, 0, 0, 0
     39         cmp4.leu p0,p6=32, r33;;
     40         (p6)    mov r33=32;;
     41  }
     42  {      .mii
     43         nop.m    0
     44         zxt4    r29=r33;;
     45         dep.z   r30=r29, 0, 5;;
     46  }
     47  {      .mii
     48         cmp4.eq p0,p7=r0, r30
     49         shr.u   r28=r29, 5;;
     50         (p7)    adds    r28=1, r28;;
     51  }
     52  {      .mii
     53         nop.m    0
     54         shl r27=r28, 5;;
     55         zxt4    r26=r27;;
     56  }
     57  {      .mfb
     58         add r31=r26, r32
     59         nop.f    0
     60         nop.b    0
     61  }
     62 LoopBack:   // $L143:
     63  {      .mii
     64         fc   r32
     65         adds    r32=32, r32;;
     66         cmp.ltu p14,p15=r32, r31
     67  }
     68  {      .mfb
     69         nop.m    0
     70         nop.f    0
     71         //(p14) br.cond.dptk.few $L143#;;
     72         (p14)   br.cond.dptk.few LoopBack;;
     73  }
     74  {      .mmi
     75         sync.i;;
     76         srlz.i
     77         nop.i   0;;
     78  }
     79  {      .mfb
     80         nop.m    0
     81         nop.f    0
     82         br.ret.sptk.few b0;;
     83  }
     84         .endp   InstructionCacheFlush
     85 
     86 
     87 /////////////////////////////////////////////
     88 //
     89 //  Name:
     90 //      ChainHandler
     91 //
     92 //  Description:
     93 //      Chains an interrupt handler
     94 //
     95 //      The purpose of this function is to enable chaining of the external interrupt.
     96 //      Since there's no clean SAL abstraction for doing this, we must do it
     97 //      surreptitiously.
     98 //
     99 //      The reserved IVT entry at offset 0x3400 is coopted for use by this handler.
    100 //      According to Itanium architecture, it is reserved.  Strictly speaking, this is
    101 //      not safe, as we're cheating and violating the Itanium architecture.  However,
    102 //      as long as we're the only ones cheating, we should be OK.  Without hooks in
    103 //      the SAL to enable IVT management, there aren't many good options.
    104 //
    105 //      The strategy is to replace the first bundle of the external interrupt handler
    106 //      with our own that will branch into a piece of code we've supplied and located
    107 //      in the reserved IVT entry.  Only the first bundle of the external interrupt
    108 //      IVT entry is modified.
    109 //
    110 //      The original bundle is moved and relocated to space
    111 //      allocated within the reserved IVT entry.  The next bundle following is
    112 //      is generated to go a hard coded branch back to the second bundle of the
    113 //      external interrupt IVT entry just in case the first bundle had no branch.
    114 //
    115 //      Our new code will execute our handler, and then fall through to the
    116 //      original bundle after restoring all context appropriately.
    117 //
    118 //      The following is a representation of what the IVT memory map looks like with
    119 //      our chained handler installed:
    120 //
    121 //
    122 //
    123 //
    124 //
    125 //      This IVT entry is      Failsafe bundle
    126 //      reserved by the
    127 //      Itanium architecture   Original bundle 0
    128 //      and is used for
    129 //      for locating our
    130 //      handler and the
    131 //      original bundle        Patch code...
    132 //      zero of the ext
    133 //      interrupt handler
    134 //
    135 //      RSVD    (3400)         Unused
    136 //
    137 //
    138 //
    139 //
    140 //
    141 //
    142 //
    143 //
    144 //
    145 //
    146 //
    147 //
    148 //      EXT_INT (3000)         Bundle 0               Bundle zero - This one is
    149 //                                modified, all other bundles
    150 //                                                       in the EXT_INT entry are
    151 //                                                       untouched.
    152 //
    153 //
    154 //       Arguments:
    155 //
    156 //       Returns:
    157 //
    158 //       Notes:
    159 //
    160 //
    161         ASM_GLOBAL ChainHandler
    162         .proc ChainHandler
    163 ChainHandler:
    164 
    165         NESTED_SETUP( 0,2+3,3,0 )
    166 
    167         mov         r8=1                           // r8 = success
    168         mov         r2=cr.iva;;
    169 //
    170 // NOTE: There's a potential hazard here in that we're simply stealing a bunch of
    171 // bundles (memory) from the IVT and assuming there's no catastrophic side effect.
    172 //
    173 // First, save IVT area we're taking over with the patch so we can restore it later
    174 //
    175         addl        out0=PATCH_ENTRY_OFFSET, r2    // out0 = source buffer
    176         movl        out1=PatchSaveBuffer           // out1 = destination buffer
    177         mov         out2=0x40;;                    // out2 = number of bundles to copy... save entire IDT entry
    178         br.call.sptk.few    b0 = CopyBundles
    179 
    180 // Next, copy the patch code into the IVT
    181         movl        out0=PatchCode                 // out0 = source buffer of patch code
    182         addl        out1=PATCH_OFFSET, r2          // out1 = destination buffer - in IVT
    183         mov         out2=PATCH_CODE_SIZE;;
    184         shr         out2=out2, 4;;                 // out2 = number of bundles to copy
    185         br.call.sptk.few    b0 = CopyBundles
    186 
    187 
    188 // copy original bundle 0 from the external interrupt handler to the
    189 // appropriate place in the reserved IVT interrupt slot
    190         addl        out0=EXT_INT_ENTRY_OFFSET, r2  // out0 = source buffer
    191         addl        out1=RELOCATED_EXT_INT, r2     // out1 = destination buffer - in reserved IVT
    192         mov         out2=1;;                       // out2 = copy 1 bundle
    193         br.call.sptk.few    b0 = CopyBundles
    194 
    195 // Now relocate it there because it very likely had a branch instruction that
    196 // that must now be fixed up.
    197         addl        out0=RELOCATED_EXT_INT, r2     // out0 = new runtime address of bundle - in reserved IVT
    198         addl        out1=EXT_INT_ENTRY_OFFSET, r2;;// out1 = IP address of previous location
    199         mov         out2=out0;;                    // out2 = IP address of new location
    200         br.call.sptk.few    b0 = RelocateBundle
    201 
    202 // Now copy into the failsafe branch into the next bundle just in case
    203 // the original ext int bundle 0 bundle did not contain a branch instruction
    204         movl        out0=FailsafeBranch            // out0 = source buffer
    205         addl        out1=FAILSAFE_BRANCH_OFFSET, r2  // out1 = destination buffer - in reserved IVT
    206         mov         out2=1;;                       // out2 = copy 1 bundle
    207         br.call.sptk.few    b0 = CopyBundles
    208 
    209 // Last, copy in our replacement for the external interrupt IVT entry bundle 0
    210         movl        out0=PatchCodeNewBun0          // out0 = source buffer - our replacement bundle 0
    211         addl        out1=EXT_INT_ENTRY_OFFSET, r2  // out1 = destination buffer - bundle 0 of External interrupt entry
    212         mov         out2=1;;                       // out2 = copy 1 bundle
    213         br.call.sptk.few    b0 = CopyBundles
    214 
    215 ChainHandlerDone:
    216         NESTED_RETURN
    217 
    218         .endp ChainHandler
    219 
    220 
    221 /////////////////////////////////////////////
    222 //
    223 //  Name:
    224 //      UnchainHandler
    225 //
    226 //  Description:
    227 //      Unchains an interrupt handler
    228 //
    229 //  Arguments:
    230 //
    231 //  Returns:
    232 //
    233 //  Notes:
    234 //
    235 //
    236         ASM_GLOBAL UnchainHandler
    237         .proc UnchainHandler
    238 
    239 UnchainHandler:
    240 
    241         NESTED_SETUP( 0,2+3,3,0 )
    242 
    243         mov         r8=1                        // r8 = success
    244         mov         r2=cr.iva;;                 // r2 = interrupt vector address
    245 
    246 // First copy original Ext Int bundle 0 back to it's proper home...
    247         addl        out0=RELOCATED_EXT_INT, r2     // out0 = source - in reserved IVT
    248         addl        out1=EXT_INT_ENTRY_OFFSET, r2  // out1 = destination buffer - first bundle of Ext Int entry
    249         mov         out2=1;;                       // out2 = copy 1 bundle
    250         br.call.sptk.few    b0 = CopyBundles
    251 
    252 // Now, relocate it again...
    253         addl        out0=EXT_INT_ENTRY_OFFSET, r2  // out1 = New runtime address
    254         addl        out1=RELOCATED_EXT_INT, r2;;   // out0 = IP address of previous location
    255         mov         out2=out0;;                    // out2 = IP address of new location
    256         br.call.sptk.few    b0 = RelocateBundle
    257 
    258 // Last, restore the patch area
    259         movl        out0=PatchSaveBuffer           // out0 = source buffer
    260         addl        out1=PATCH_ENTRY_OFFSET, r2    // out1 = destination buffer
    261         mov         out2=0x40;;                    // out2 = number of bundles to copy... save entire IDT entry
    262         br.call.sptk.few    b0 = CopyBundles
    263 
    264 UnchainHandlerDone:
    265         NESTED_RETURN
    266 
    267         .endp UnchainHandler
    268 
    269 
    270 /////////////////////////////////////////////
    271 //
    272 //  Name:
    273 //      CopyBundles
    274 //
    275 //  Description:
    276 //      Copies instruction bundles - flushes icache as necessary
    277 //
    278 //  Arguments:
    279 //      in0 - Bundle source
    280 //      in1 - Bundle destination
    281 //      in2 - Bundle count
    282 //
    283 //  Returns:
    284 //
    285 //  Notes:
    286 //      This procedure is a leaf routine
    287 //
    288         .proc   CopyBundles
    289 
    290 CopyBundles:
    291 
    292         NESTED_SETUP(3,2+1,0,0)
    293 
    294         shl         in2=in2, 1;;                // in2 = count of 8 byte blocks to copy
    295 
    296 CopyBundlesLoop:
    297 
    298         cmp.eq      p14, p15 = 0, in2;;         // Check if done
    299 (p14)   br.sptk.few CopyBundlesDone;;
    300 
    301         ld8         loc2=[in0], 0x8;;           // loc2 = source bytes
    302         st8         [in1]=loc2;;                // [in1] = destination bytes
    303         fc          in1;;                       // Flush instruction cache
    304         sync.i;;                                // Ensure local and remote data/inst caches in sync
    305         srlz.i;;                                // Ensure sync has been observed
    306         add         in1=0x8, in1;;              // in1 = next destination
    307         add         in2=-1, in2;;               // in2 = decrement 8 bytes blocks to copy
    308         br.sptk.few CopyBundlesLoop;;
    309 
    310 CopyBundlesDone:
    311         NESTED_RETURN
    312 
    313         .endp   CopyBundles
    314 
    315 
    316 /////////////////////////////////////////////
    317 //
    318 //  Name:
    319 //      RelocateBundle
    320 //
    321 //  Description:
    322 //      Relocates an instruction bundle by updating any ip-relative branch instructions.
    323 //
    324 //  Arguments:
    325 //      in0 - Runtime address of bundle
    326 //      in1 - IP address of previous location of bundle
    327 //      in2 - IP address of new location of bundle
    328 //
    329 //  Returns:
    330 //      in0 - 1 if successful or 0 if unsuccessful
    331 //
    332 //  Notes:
    333 //      This routine examines all slots in the given bundle that are destined for the
    334 //      branch execution unit.  If any of these slots contain an IP-relative branch
    335 //      namely instructions B1, B2, B3, or B6, the slot is fixed-up with a new relative
    336 //      address.  Errors can occur if a branch cannot be reached.
    337 //
    338         .proc   RelocateBundle
    339 
    340 RelocateBundle:
    341 
    342         NESTED_SETUP(3,2+4,3,0)
    343 
    344         mov         loc2=SLOT0                  // loc2 = slot index
    345         mov         loc5=in0;;                  // loc5 = runtime address of bundle
    346         mov         in0=1;;                     // in0 = success
    347 
    348 RelocateBundleNextSlot:
    349 
    350         cmp.ge      p14, p15 = SLOT2, loc2;;    // Check if maximum slot
    351 (p15)   br.sptk.few RelocateBundleDone
    352 
    353         mov         out0=loc5;;                 // out0 = runtime address of bundle
    354         br.call.sptk.few    b0 = GetTemplate
    355         mov         loc3=out0;;                 // loc3 = instruction template
    356         mov         out0=loc5                   // out0 = runtime address of bundle
    357         mov         out1=loc2;;                 // out1 = instruction slot number
    358         br.call.sptk.few    b0 = GetSlot
    359         mov         loc4=out0;;                 // loc4 = instruction encoding
    360         mov         out0=loc4                   // out0 = instuction encoding
    361         mov         out1=loc2                   // out1 = instruction slot number
    362         mov         out2=loc3;;                 // out2 = instruction template
    363         br.call.sptk.few    b0 = IsSlotBranch
    364         cmp.eq      p14, p15 = 1, out0;;        // Check if branch slot
    365 (p15)   add         loc2=1,loc2                 // Increment slot
    366 (p15)   br.sptk.few RelocateBundleNextSlot
    367         mov         out0=loc4                   // out0 = instuction encoding
    368         mov         out1=in1                    // out1 = IP address of previous location
    369         mov         out2=in2;;                  // out2 = IP address of new location
    370         br.call.sptk.few    b0 = RelocateSlot
    371         cmp.eq      p14, p15 = 1, out1;;        // Check if relocated slot
    372 (p15)   mov         in0=0                       // in0 = failure
    373 (p15)   br.sptk.few RelocateBundleDone
    374         mov         out2=out0;;                 // out2 = instruction encoding
    375         mov         out0=loc5                   // out0 = runtime address of bundle
    376         mov         out1=loc2;;                 // out1 = instruction slot number
    377         br.call.sptk.few    b0 = SetSlot
    378         add         loc2=1,loc2;;               // Increment slot
    379         br.sptk.few RelocateBundleNextSlot
    380 
    381 RelocateBundleDone:
    382         NESTED_RETURN
    383 
    384         .endp   RelocateBundle
    385 
    386 
    387 /////////////////////////////////////////////
    388 //
    389 //  Name:
    390 //      RelocateSlot
    391 //
    392 //  Description:
    393 //      Relocates an instruction bundle by updating any ip-relative branch instructions.
    394 //
    395 //  Arguments:
    396 //      in0 - Instruction encoding (41-bits, right justified)
    397 //      in1 - IP address of previous location of bundle
    398 //      in2 - IP address of new location of bundle
    399 //
    400 //  Returns:
    401 //      in0 - Instruction encoding (41-bits, right justified)
    402 //      in1 - 1 if successful otherwise 0
    403 //
    404 //  Notes:
    405 //      This procedure is a leaf routine
    406 //
    407         .proc   RelocateSlot
    408 
    409 RelocateSlot:
    410         NESTED_SETUP(3,2+5,0,0)
    411         extr.u      loc2=in0, 37, 4;;           // loc2 = instruction opcode
    412         cmp.eq      p14, p15 = 4, loc2;;        // IP-relative branch (B1) or
    413                                                 // IP-relative counted branch (B2)
    414 (p15)   cmp.eq      p14, p15 = 5, loc2;;        // IP-relative call (B3)
    415 (p15)   cmp.eq      p14, p15 = 7, loc2;;        // IP-relative predict (B6)
    416 (p15)   mov         in1=1                       // Instruction did not need to be reencoded
    417 (p15)   br.sptk.few RelocateSlotDone
    418         tbit.nz     p14, p15 = in0, 36;;        // put relative offset sign bit in p14
    419         extr.u      loc2=in0, 13, 20;;          // loc2 = relative offset in instruction
    420 (p14)   movl        loc3=0xfffffffffff00000;;   // extend sign
    421 (p14)   or          loc2=loc2, loc3;;
    422         shl         loc2=loc2,4;;               // convert to byte offset instead of bundle offset
    423         add         loc3=loc2, in1;;            // loc3 = physical address of branch target
    424 (p14)   sub         loc2=r0,loc2;;              // flip sign in loc2 if offset is negative
    425         sub         loc4=loc3,in2;;             // loc4 = relative offset from new ip to branch target
    426         cmp.lt      p15, p14 = 0, loc4;;        // get new sign bit
    427 (p14)   sub         loc5=r0,loc4                // get absolute value of offset
    428 (p15)   mov         loc5=loc4;;
    429         movl        loc6=0x0FFFFFF;;            // maximum offset in bytes for ip-rel branch
    430         cmp.gt      p14, p15 = loc5, loc6;;     // check to see we're not out of range for an ip-relative branch
    431 (p14)   br.sptk.few RelocateSlotError
    432         cmp.lt      p15, p14 = 0, loc4;;        // store sign in p14 again
    433 (p14)   dep         in0=-1,in0,36,1              // store sign bit in instruction
    434 (p15)   dep         in0=0,in0,36,1
    435         shr         loc4=loc4, 4;;              // convert back to bundle offset
    436         dep         in0=loc4,in0,13,16;;        // put first 16 bits of new offset into instruction
    437         shr         loc4=loc4,16;;
    438         dep         in0=loc4,in0,13+16,4        // put last 4 bits of new offset into instruction
    439         mov         in1=1;;                     // in1 = success
    440         br.sptk.few RelocateSlotDone;;
    441 
    442 RelocateSlotError:
    443         mov         in1=0;;                     // in1 = failure
    444 
    445 RelocateSlotDone:
    446         NESTED_RETURN
    447 
    448         .endp   RelocateSlot
    449 
    450 
    451 /////////////////////////////////////////////
    452 //
    453 //  Name:
    454 //      IsSlotBranch
    455 //
    456 //  Description:
    457 //      Determines if the given instruction is a branch instruction.
    458 //
    459 //  Arguments:
    460 //      in0 - Instruction encoding (41-bits, right justified)
    461 //      in1 - Instruction slot number
    462 //      in2 - Bundle template
    463 //
    464 //  Returns:
    465 //      in0 - 1 if branch or 0 if not branch
    466 //
    467 //  Notes:
    468 //      This procedure is a leaf routine
    469 //
    470 //      IsSlotBranch recognizes all branch instructions by looking at the provided template.
    471 //      The instruction encoding is only passed to this routine for future expansion.
    472 //
    473         .proc   IsSlotBranch
    474 
    475 IsSlotBranch:
    476 
    477         NESTED_SETUP (3,2+0,0,0)
    478 
    479         mov         in0=1;;                     // in0 = 1 which destroys the instruction
    480         andcm       in2=in2,in0;;               // in2 = even template to reduce compares
    481         mov         in0=0;;                     // in0 = not a branch
    482         cmp.eq      p14, p15 = 0x16, in2;;      // Template 0x16 is BBB
    483 (p14)   br.sptk.few IsSlotBranchTrue
    484         cmp.eq      p14, p15 = SLOT0, in1;;     // Slot 0 has no other possiblities
    485 (p14)   br.sptk.few IsSlotBranchDone
    486         cmp.eq      p14, p15 = 0x12, in2;;      // Template 0x12 is MBB
    487 (p14)   br.sptk.few IsSlotBranchTrue
    488         cmp.eq      p14, p15 = SLOT1, in1;;     // Slot 1 has no other possiblities
    489 (p14)   br.sptk.few IsSlotBranchDone
    490         cmp.eq      p14, p15 = 0x10, in2;;      // Template 0x10 is MIB
    491 (p14)   br.sptk.few IsSlotBranchTrue
    492         cmp.eq      p14, p15 = 0x18, in2;;      // Template 0x18 is MMB
    493 (p14)   br.sptk.few IsSlotBranchTrue
    494         cmp.eq      p14, p15 = 0x1C, in2;;      // Template 0x1C is MFB
    495 (p14)   br.sptk.few IsSlotBranchTrue
    496         br.sptk.few IsSlotBranchDone
    497 
    498 IsSlotBranchTrue:
    499         mov         in0=1;;                     // in0 = branch
    500 
    501 IsSlotBranchDone:
    502         NESTED_RETURN
    503 
    504         .endp   IsSlotBranch
    505 
    506 
    507 /////////////////////////////////////////////
    508 //
    509 //  Name:
    510 //      GetTemplate
    511 //
    512 //  Description:
    513 //      Retrieves the instruction template for an instruction bundle
    514 //
    515 //  Arguments:
    516 //      in0 - Runtime address of bundle
    517 //
    518 //  Returns:
    519 //      in0 - Instruction template (5-bits, right-justified)
    520 //
    521 //  Notes:
    522 //      This procedure is a leaf routine
    523 //
    524         .proc   GetTemplate
    525 
    526 GetTemplate:
    527 
    528         NESTED_SETUP (1,2+2,0,0)
    529 
    530         ld8     loc2=[in0], 0x8             // loc2 = first 8 bytes of branch bundle
    531         movl    loc3=MASK_0_4;;             // loc3 = template mask
    532         and     loc2=loc2,loc3;;            // loc2 = template, right justified
    533         mov     in0=loc2;;                  // in0 = template, right justified
    534 
    535         NESTED_RETURN
    536 
    537         .endp   GetTemplate
    538 
    539 
    540 /////////////////////////////////////////////
    541 //
    542 //  Name:
    543 //      GetSlot
    544 //
    545 //  Description:
    546 //      Gets the instruction encoding for an instruction slot and bundle
    547 //
    548 //  Arguments:
    549 //      in0 - Runtime address of bundle
    550 //      in1 - Instruction slot (either 0, 1, or 2)
    551 //
    552 //  Returns:
    553 //      in0 - Instruction encoding (41-bits, right justified)
    554 //
    555 //  Notes:
    556 //      This procedure is a leaf routine
    557 //
    558 //      Slot0 - [in0 + 0x8] Bits 45-5
    559 //      Slot1 - [in0 + 0x8] Bits 63-46 and [in0] Bits 22-0
    560 //      Slot2 - [in0] Bits 63-23
    561 //
    562         .proc   GetSlot
    563 
    564 GetSlot:
    565         NESTED_SETUP (2,2+3,0,0)
    566 
    567         ld8     loc2=[in0], 0x8;;           // loc2 = first 8 bytes of branch bundle
    568         ld8     loc3=[in0];;                // loc3 = second 8 bytes of branch bundle
    569         cmp.eq  p14, p15 = 2, in1;;         // check if slot 2 specified
    570  (p14)  br.cond.sptk.few    GetSlot2;;      // get slot 2
    571         cmp.eq  p14, p15 = 1, in1;;         // check if slot 1 specified
    572  (p14)  br.cond.sptk.few    GetSlot1;;      // get slot 1
    573 
    574 GetSlot0:
    575         extr.u  in0=loc2, 5, 45             // in0 = extracted slot 0
    576         br.sptk.few GetSlotDone;;
    577 
    578 GetSlot1:
    579         extr.u  in0=loc2, 46, 18            // in0 = bits 63-46 of loc2 right-justified
    580         extr.u  loc4=loc3, 0, 23;;          // loc4 = bits 22-0 of loc3 right-justified
    581         dep     in0=loc4, in0, 18, 15;;
    582         shr.u   loc4=loc4,15;;
    583         dep     in0=loc4, in0, 33, 8;;      // in0 = extracted slot 1
    584         br.sptk.few GetSlotDone;;
    585 
    586 GetSlot2:
    587         extr.u  in0=loc3, 23, 41;;          // in0 = extracted slot 2
    588 
    589 GetSlotDone:
    590         NESTED_RETURN
    591 
    592         .endp   GetSlot
    593 
    594 
    595 /////////////////////////////////////////////
    596 //
    597 //  Name:
    598 //      SetSlot
    599 //
    600 //  Description:
    601 //      Sets the instruction encoding for an instruction slot and bundle
    602 //
    603 //  Arguments:
    604 //      in0 - Runtime address of bundle
    605 //      in1 - Instruction slot (either 0, 1, or 2)
    606 //      in2 - Instruction encoding (41-bits, right justified)
    607 //
    608 //  Returns:
    609 //
    610 //  Notes:
    611 //      This procedure is a leaf routine
    612 //
    613         .proc       SetSlot
    614 
    615 SetSlot:
    616         NESTED_SETUP (3,2+3,0,0)
    617 
    618         ld8     loc2=[in0], 0x8;;           // loc2 = first 8 bytes of bundle
    619         ld8     loc3=[in0];;                // loc3 = second 8 bytes of bundle
    620         cmp.eq  p14, p15 = 2, in1;;         // check if slot 2 specified
    621  (p14)  br.cond.sptk.few    SetSlot2;;      // set slot 2
    622         cmp.eq  p14, p15 = 1, in1;;         // check if slot 1 specified
    623  (p14)  br.cond.sptk.few    SetSlot1;;      // set slot 1
    624 
    625 SetSlot0:
    626         dep     loc2=0, loc2, 5, 41;;       // remove old instruction from slot 0
    627         shl     loc4=in2, 5;;               // loc4 = new instruction ready to be inserted
    628         or      loc2=loc2, loc4;;           // loc2 = updated first 8 bytes of bundle
    629         add     loc4=0x8,in0;;              // loc4 = address to store first 8 bytes of bundle
    630         st8     [loc4]=loc2                 // [loc4] = updated bundle
    631         br.sptk.few SetSlotDone;;
    632         ;;
    633 
    634 SetSlot1:
    635         dep     loc2=0, loc2, 46, 18        // remove old instruction from slot 1
    636         dep     loc3=0, loc3, 0, 23;;
    637         shl     loc4=in2, 46;;              // loc4 = partial instruction ready to be inserted
    638         or      loc2=loc2, loc4;;           // loc2 = updated first 8 bytes of bundle
    639         add     loc4=0x8,in0;;              // loc4 = address to store first 8 bytes of bundle
    640         st8     [loc4]=loc2;;               // [loc4] = updated bundle
    641         shr.u   loc4=in2, 18;;              // loc4 = partial instruction ready to be inserted
    642         or      loc3=loc3, loc4;;           // loc3 = updated second 8 bytes of bundle
    643         st8     [in0]=loc3;;                // [in0] = updated bundle
    644         br.sptk.few SetSlotDone;;
    645 
    646 SetSlot2:
    647         dep     loc3=0, loc3, 23, 41;;      // remove old instruction from slot 2
    648         shl     loc4=in2, 23;;              // loc4 = instruction ready to be inserted
    649         or      loc3=loc3, loc4;;           // loc3 = updated second 8 bytes of bundle
    650         st8     [in0]=loc3;;                // [in0] = updated bundle
    651 
    652 SetSlotDone:
    653 
    654         NESTED_RETURN
    655         .endp       SetSlot
    656 
    657 
    658 /////////////////////////////////////////////
    659 //
    660 //  Name:
    661 //      GetIva
    662 //
    663 //  Description:
    664 //      C callable function to obtain the current value of IVA
    665 //
    666 //  Returns:
    667 //      Current value if IVA
    668 
    669         ASM_GLOBAL     GetIva
    670         .proc       GetIva
    671 GetIva:
    672         mov         r8=cr2;;
    673         br.ret.sptk.many    b0
    674 
    675         .endp       GetIva
    676 
    677 
    678 /////////////////////////////////////////////
    679 //
    680 //  Name:
    681 //      ProgramInterruptFlags
    682 //
    683 //  Description:
    684 //      C callable function to enable/disable interrupts
    685 //
    686 //  Returns:
    687 //      Previous state of psr.ic
    688 //
    689         ASM_GLOBAL     ProgramInterruptFlags
    690         .proc       ProgramInterruptFlags
    691 ProgramInterruptFlags:
    692         alloc       loc0=1,2,0,0;;
    693         mov         loc0=psr
    694         mov         loc1=0x6000;;
    695         and         r8=loc0, loc1           // obtain current psr.ic and psr.i state
    696         and         in0=in0, loc1           // insure no extra bits set in input
    697         andcm       loc0=loc0,loc1;;        // clear original psr.i and psr.ic
    698         or          loc0=loc0,in0;;         // OR in new psr.ic value
    699         mov         psr.l=loc0;;            // write new psr
    700         srlz.d
    701         br.ret.sptk.many    b0              // return
    702 
    703         .endp       ProgramInterruptFlags
    704 
    705 
    706 /////////////////////////////////////////////
    707 //
    708 //  Name:
    709 //      SpillContext
    710 //
    711 //  Description:
    712 //      Saves system context to context record.
    713 //
    714 //  Arguments:
    715 //          in0 = 512 byte aligned context record address
    716 //          in1 = original B0
    717 //          in2 = original ar.bsp
    718 //          in3 = original ar.bspstore
    719 //          in4 = original ar.rnat
    720 //          in5 = original ar.pfs
    721 //
    722 //  Notes:
    723 //      loc0 - scratch
    724 //      loc1 - scratch
    725 //      loc2 - temporary application unat storage
    726 //      loc3 - temporary exception handler unat storage
    727 
    728         .proc       SpillContext
    729 
    730 SpillContext:
    731         alloc       loc0=6,4,0,0;;          // alloc 6 input, 4 locals, 0 outs
    732         mov         loc2=ar.unat;;          // save application context unat (spilled later)
    733         mov         ar.unat=r0;;            // set UNAT=0
    734         st8.spill   [in0]=r0,8;;
    735         st8.spill   [in0]=r1,8;;            // save R1 - R31
    736         st8.spill   [in0]=r2,8;;
    737         st8.spill   [in0]=r3,8;;
    738         st8.spill   [in0]=r4,8;;
    739         st8.spill   [in0]=r5,8;;
    740         st8.spill   [in0]=r6,8;;
    741         st8.spill   [in0]=r7,8;;
    742         st8.spill   [in0]=r8,8;;
    743         st8.spill   [in0]=r9,8;;
    744         st8.spill   [in0]=r10,8;;
    745         st8.spill   [in0]=r11,8;;
    746         st8.spill   [in0]=r12,8;;
    747         st8.spill   [in0]=r13,8;;
    748         st8.spill   [in0]=r14,8;;
    749         st8.spill   [in0]=r15,8;;
    750         st8.spill   [in0]=r16,8;;
    751         st8.spill   [in0]=r17,8;;
    752         st8.spill   [in0]=r18,8;;
    753         st8.spill   [in0]=r19,8;;
    754         st8.spill   [in0]=r20,8;;
    755         st8.spill   [in0]=r21,8;;
    756         st8.spill   [in0]=r22,8;;
    757         st8.spill   [in0]=r23,8;;
    758         st8.spill   [in0]=r24,8;;
    759         st8.spill   [in0]=r25,8;;
    760         st8.spill   [in0]=r26,8;;
    761         st8.spill   [in0]=r27,8;;
    762         st8.spill   [in0]=r28,8;;
    763         st8.spill   [in0]=r29,8;;
    764         st8.spill   [in0]=r30,8;;
    765         st8.spill   [in0]=r31,8;;
    766         mov         loc3=ar.unat;;          // save debugger context unat (spilled later)
    767         stf.spill   [in0]=f2,16;;           // save f2 - f31
    768         stf.spill   [in0]=f3,16;;
    769         stf.spill   [in0]=f4,16;;
    770         stf.spill   [in0]=f5,16;;
    771         stf.spill   [in0]=f6,16;;
    772         stf.spill   [in0]=f7,16;;
    773         stf.spill   [in0]=f8,16;;
    774         stf.spill   [in0]=f9,16;;
    775         stf.spill   [in0]=f10,16;;
    776         stf.spill   [in0]=f11,16;;
    777         stf.spill   [in0]=f12,16;;
    778         stf.spill   [in0]=f13,16;;
    779         stf.spill   [in0]=f14,16;;
    780         stf.spill   [in0]=f15,16;;
    781         stf.spill   [in0]=f16,16;;
    782         stf.spill   [in0]=f17,16;;
    783         stf.spill   [in0]=f18,16;;
    784         stf.spill   [in0]=f19,16;;
    785         stf.spill   [in0]=f20,16;;
    786         stf.spill   [in0]=f21,16;;
    787         stf.spill   [in0]=f22,16;;
    788         stf.spill   [in0]=f23,16;;
    789         stf.spill   [in0]=f24,16;;
    790         stf.spill   [in0]=f25,16;;
    791         stf.spill   [in0]=f26,16;;
    792         stf.spill   [in0]=f27,16;;
    793         stf.spill   [in0]=f28,16;;
    794         stf.spill   [in0]=f29,16;;
    795         stf.spill   [in0]=f30,16;;
    796         stf.spill   [in0]=f31,16;;
    797         mov         loc0=pr;;               // save predicates
    798         st8.spill   [in0]=loc0,8;;
    799         st8.spill   [in0]=in1,8;;           // save b0 - b7... in1 already equals saved b0
    800         mov         loc0=b1;;
    801         st8.spill   [in0]=loc0,8;;
    802         mov         loc0=b2;;
    803         st8.spill   [in0]=loc0,8;;
    804         mov         loc0=b3;;
    805         st8.spill   [in0]=loc0,8;;
    806         mov         loc0=b4;;
    807         st8.spill   [in0]=loc0,8;;
    808         mov         loc0=b5;;
    809         st8.spill   [in0]=loc0,8;;
    810         mov         loc0=b6;;
    811         st8.spill   [in0]=loc0,8;;
    812         mov         loc0=b7;;
    813         st8.spill   [in0]=loc0,8;;
    814         mov         loc0=ar.rsc;;           // save ar.rsc
    815         st8.spill   [in0]=loc0,8;;
    816         st8.spill   [in0]=in2,8;;           // save ar.bsp (in2)
    817         st8.spill   [in0]=in3,8;;           // save ar.bspstore (in3)
    818         st8.spill   [in0]=in4,8;;           // save ar.rnat (in4)
    819         mov         loc0=ar.fcr;;           // save ar.fcr (ar21 - IA32 floating-point control register)
    820         st8.spill   [in0]=loc0,8;;
    821         mov         loc0=ar.eflag;;         // save ar.eflag (ar24)
    822         st8.spill   [in0]=loc0,8;;
    823         mov         loc0=ar.csd;;           // save ar.csd (ar25 - ia32 CS descriptor)
    824         st8.spill   [in0]=loc0,8;;
    825         mov         loc0=ar.ssd;;           // save ar.ssd (ar26 - ia32 ss descriptor)
    826         st8.spill   [in0]=loc0,8;;
    827         mov         loc0=ar.cflg;;          // save ar.cflg (ar27 - ia32 cr0 and cr4)
    828         st8.spill   [in0]=loc0,8;;
    829         mov         loc0=ar.fsr;;           // save ar.fsr (ar28 - ia32 floating-point status register)
    830         st8.spill   [in0]=loc0,8;;
    831         mov         loc0=ar.fir;;           // save ar.fir (ar29 - ia32 floating-point instruction register)
    832         st8.spill   [in0]=loc0,8;;
    833         mov         loc0=ar.fdr;;           // save ar.fdr (ar30 - ia32 floating-point data register)
    834         st8.spill   [in0]=loc0,8;;
    835         mov         loc0=ar.ccv;;           // save ar.ccv
    836         st8.spill   [in0]=loc0,8;;
    837         st8.spill   [in0]=loc2,8;;          // save ar.unat (saved to loc2 earlier)
    838         mov         loc0=ar.fpsr;;          // save floating point status register
    839         st8.spill   [in0]=loc0,8;;
    840         st8.spill   [in0]=in5,8;;           // save ar.pfs
    841         mov         loc0=ar.lc;;            // save ar.lc
    842         st8.spill   [in0]=loc0,8;;
    843         mov         loc0=ar.ec;;            // save ar.ec
    844         st8.spill   [in0]=loc0,8;;
    845 
    846         // save control registers
    847         mov         loc0=cr.dcr;;           // save dcr
    848         st8.spill   [in0]=loc0,8;;
    849         mov         loc0=cr.itm;;           // save itm
    850         st8.spill   [in0]=loc0,8;;
    851         mov         loc0=cr.iva;;           // save iva
    852         st8.spill   [in0]=loc0,8;;
    853         mov         loc0=cr.pta;;           // save pta
    854         st8.spill   [in0]=loc0,8;;
    855         mov         loc0=cr.ipsr;;          // save ipsr
    856         st8.spill   [in0]=loc0,8;;
    857         mov         loc0=cr.isr;;           // save isr
    858         st8.spill   [in0]=loc0,8;;
    859         mov         loc0=cr.iip;;           // save iip
    860         st8.spill   [in0]=loc0,8;;
    861         mov         loc0=cr.ifa;;           // save ifa
    862         st8.spill   [in0]=loc0,8;;
    863         mov         loc0=cr.itir;;          // save itir
    864         st8.spill   [in0]=loc0,8;;
    865         mov         loc0=cr.iipa;;          // save iipa
    866         st8.spill   [in0]=loc0,8;;
    867         mov         loc0=cr.ifs;;           // save ifs
    868         st8.spill   [in0]=loc0,8;;
    869         mov         loc0=cr.iim;;           // save iim
    870         st8.spill   [in0]=loc0,8;;
    871         mov         loc0=cr.iha;;           // save iha
    872         st8.spill   [in0]=loc0,8;;
    873 
    874         // save debug registers
    875         mov         loc0=dbr[r0];;          // save dbr0 - dbr7
    876         st8.spill   [in0]=loc0,8;;
    877         movl        loc1=1;;
    878         mov         loc0=dbr[loc1];;
    879         st8.spill   [in0]=loc0,8;;
    880         movl        loc1=2;;
    881         mov         loc0=dbr[loc1];;
    882         st8.spill   [in0]=loc0,8;;
    883         movl        loc1=3;;
    884         mov         loc0=dbr[loc1];;
    885         st8.spill   [in0]=loc0,8;;
    886         movl        loc1=4;;
    887         mov         loc0=dbr[loc1];;
    888         st8.spill   [in0]=loc0,8;;
    889         movl        loc1=5;;
    890         mov         loc0=dbr[loc1];;
    891         st8.spill   [in0]=loc0,8;;
    892         movl        loc1=6;;
    893         mov         loc0=dbr[loc1];;
    894         st8.spill   [in0]=loc0,8;;
    895         movl        loc1=7;;
    896         mov         loc0=dbr[loc1];;
    897         st8.spill   [in0]=loc0,8;;
    898         mov         loc0=ibr[r0];;          // save ibr0 - ibr7
    899         st8.spill   [in0]=loc0,8;;
    900         movl        loc1=1;;
    901         mov         loc0=ibr[loc1];;
    902         st8.spill   [in0]=loc0,8;;
    903         movl        loc1=2;;
    904         mov         loc0=ibr[loc1];;
    905         st8.spill   [in0]=loc0,8;;
    906         movl        loc1=3;;
    907         mov         loc0=ibr[loc1];;
    908         st8.spill   [in0]=loc0,8;;
    909         movl        loc1=4;;
    910         mov         loc0=ibr[loc1];;
    911         st8.spill   [in0]=loc0,8;;
    912         movl        loc1=5;;
    913         mov         loc0=ibr[loc1];;
    914         st8.spill   [in0]=loc0,8;;
    915         movl        loc1=6;;
    916         mov         loc0=ibr[loc1];;
    917         st8.spill   [in0]=loc0,8;;
    918         movl        loc1=7;;
    919         mov         loc0=ibr[loc1];;
    920         st8.spill   [in0]=loc0,8;;
    921         st8.spill   [in0]=loc3;;
    922 
    923         br.ret.sptk.few     b0
    924 
    925         .endp       SpillContext
    926 
    927 
    928 /////////////////////////////////////////////
    929 //
    930 //  Name:
    931 //      FillContext
    932 //
    933 //  Description:
    934 //      Restores register context from context record.
    935 //
    936 //  Arguments:
    937 //          in0 = address of last element 512 byte aligned context record address
    938 //          in1 = modified B0
    939 //          in2 = modified ar.bsp
    940 //          in3 = modified ar.bspstore
    941 //          in4 = modified ar.rnat
    942 //          in5 = modified ar.pfs
    943 //
    944 //  Notes:
    945 //      loc0 - scratch
    946 //      loc1 - scratch
    947 //      loc2 - temporary application unat storage
    948 //      loc3 - temporary exception handler unat storage
    949 
    950         .proc       FillContext
    951 FillContext:
    952         alloc       loc0=6,4,0,0;;          // alloc 6 inputs, 4 locals, 0 outs
    953         ld8.fill    loc3=[in0],-8;;         // int_nat (nat bits for R1-31)
    954         movl        loc1=7;;                // ibr7
    955         ld8.fill    loc0=[in0],-8;;
    956         mov         ibr[loc1]=loc0;;
    957         movl        loc1=6;;                // ibr6
    958         ld8.fill    loc0=[in0],-8;;
    959         mov         ibr[loc1]=loc0;;
    960         movl        loc1=5;;                // ibr5
    961         ld8.fill    loc0=[in0],-8;;
    962         mov         ibr[loc1]=loc0;;
    963         movl        loc1=4;;                // ibr4
    964         ld8.fill    loc0=[in0],-8;;
    965         mov         ibr[loc1]=loc0;;
    966         movl        loc1=3;;                // ibr3
    967         ld8.fill    loc0=[in0],-8;;
    968         mov         ibr[loc1]=loc0;;
    969         movl        loc1=2;;                // ibr2
    970         ld8.fill    loc0=[in0],-8;;
    971         mov         ibr[loc1]=loc0;;
    972         movl        loc1=1;;                // ibr1
    973         ld8.fill    loc0=[in0],-8;;
    974         mov         ibr[loc1]=loc0;;
    975         ld8.fill    loc0=[in0],-8;;         // ibr0
    976         mov         ibr[r0]=loc0;;
    977         movl        loc1=7;;                // dbr7
    978         ld8.fill    loc0=[in0],-8;;
    979         mov         dbr[loc1]=loc0;;
    980         movl        loc1=6;;                // dbr6
    981         ld8.fill    loc0=[in0],-8;;
    982         mov         dbr[loc1]=loc0;;
    983         movl        loc1=5;;                // dbr5
    984         ld8.fill    loc0=[in0],-8;;
    985         mov         dbr[loc1]=loc0;;
    986         movl        loc1=4;;                // dbr4
    987         ld8.fill    loc0=[in0],-8;;
    988         mov         dbr[loc1]=loc0;;
    989         movl        loc1=3;;                // dbr3
    990         ld8.fill    loc0=[in0],-8;;
    991         mov         dbr[loc1]=loc0;;
    992         movl        loc1=2;;                // dbr2
    993         ld8.fill    loc0=[in0],-8;;
    994         mov         dbr[loc1]=loc0;;
    995         movl        loc1=1;;                // dbr1
    996         ld8.fill    loc0=[in0],-8;;
    997         mov         dbr[loc1]=loc0;;
    998         ld8.fill    loc0=[in0],-8;;         // dbr0
    999         mov         dbr[r0]=loc0;;
   1000         ld8.fill    loc0=[in0],-8;;         // iha
   1001         mov         cr.iha=loc0;;
   1002         ld8.fill    loc0=[in0],-8;;         // iim
   1003         mov         cr.iim=loc0;;
   1004         ld8.fill    loc0=[in0],-8;;         // ifs
   1005         mov         cr.ifs=loc0;;
   1006         ld8.fill    loc0=[in0],-8;;         // iipa
   1007         mov         cr.iipa=loc0;;
   1008         ld8.fill    loc0=[in0],-8;;         // itir
   1009         mov         cr.itir=loc0;;
   1010         ld8.fill    loc0=[in0],-8;;         // ifa
   1011         mov         cr.ifa=loc0;;
   1012         ld8.fill    loc0=[in0],-8;;         // iip
   1013         mov         cr.iip=loc0;;
   1014         ld8.fill    loc0=[in0],-8;;         // isr
   1015         mov         cr.isr=loc0;;
   1016         ld8.fill    loc0=[in0],-8;;         // ipsr
   1017         mov         cr.ipsr=loc0;;
   1018         ld8.fill    loc0=[in0],-8;;         // pta
   1019         mov         cr.pta=loc0;;
   1020         ld8.fill    loc0=[in0],-8;;         // iva
   1021         mov         cr.iva=loc0;;
   1022         ld8.fill    loc0=[in0],-8;;         // itm
   1023         mov         cr.itm=loc0;;
   1024         ld8.fill    loc0=[in0],-8;;         // dcr
   1025         mov         cr.dcr=loc0;;
   1026         ld8.fill    loc0=[in0],-8;;         // ec
   1027         mov         ar.ec=loc0;;
   1028         ld8.fill    loc0=[in0],-8;;         // lc
   1029         mov         ar.lc=loc0;;
   1030         ld8.fill    in5=[in0],-8;;          // ar.pfs
   1031         ld8.fill    loc0=[in0],-8;;         // ar.fpsr
   1032         mov         ar.fpsr=loc0;;
   1033         ld8.fill    loc2=[in0],-8;;         // ar.unat - restored later...
   1034         ld8.fill    loc0=[in0],-8;;         // ar.ccv
   1035         mov         ar.ccv=loc0;;
   1036         ld8.fill    loc0=[in0],-8;;         // ar.fdr
   1037         mov         ar.fdr=loc0;;
   1038         ld8.fill    loc0=[in0],-8;;         // ar.fir
   1039         mov         ar.fir=loc0;;
   1040         ld8.fill    loc0=[in0],-8;;         // ar.fsr
   1041         mov         ar.fsr=loc0;;
   1042         ld8.fill    loc0=[in0],-8;;         // ar.cflg
   1043         mov         ar.cflg=loc0;;
   1044         ld8.fill    loc0=[in0],-8;;         // ar.ssd
   1045         mov         ar.ssd=loc0;;
   1046         ld8.fill    loc0=[in0],-8;;         // ar.csd
   1047         mov         ar.csd=loc0;;
   1048         ld8.fill    loc0=[in0],-8;;         // ar.eflag
   1049         mov         ar.eflag=loc0;;
   1050         ld8.fill    loc0=[in0],-8;;         // ar.fcr
   1051         mov         ar.fcr=loc0;;
   1052         ld8.fill    in4=[in0],-8;;          // ar.rnat
   1053         ld8.fill    in3=[in0],-8;;          // bspstore
   1054         ld8.fill    in2=[in0],-8;;          // bsp
   1055         ld8.fill    loc0=[in0],-8;;         // ar.rsc
   1056         mov         ar.rsc=loc0;;
   1057         ld8.fill    loc0=[in0],-8;;         // B7 - B0
   1058         mov         b7=loc0;;
   1059         ld8.fill    loc0=[in0],-8;;
   1060         mov         b6=loc0;;
   1061         ld8.fill    loc0=[in0],-8;;
   1062         mov         b5=loc0;;
   1063         ld8.fill    loc0=[in0],-8;;
   1064         mov         b4=loc0;;
   1065         ld8.fill    loc0=[in0],-8;;
   1066         mov         b3=loc0;;
   1067         ld8.fill    loc0=[in0],-8;;
   1068         mov         b2=loc0;;
   1069         ld8.fill    loc0=[in0],-8;;
   1070         mov         b1=loc0;;
   1071         ld8.fill    in1=[in0],-8;;          // b0 is temporarily stored in in1
   1072         ld8.fill    loc0=[in0],-16;;        // predicates
   1073         mov         pr=loc0;;
   1074         ldf.fill    f31=[in0],-16;;
   1075         ldf.fill    f30=[in0],-16;;
   1076         ldf.fill    f29=[in0],-16;;
   1077         ldf.fill    f28=[in0],-16;;
   1078         ldf.fill    f27=[in0],-16;;
   1079         ldf.fill    f26=[in0],-16;;
   1080         ldf.fill    f25=[in0],-16;;
   1081         ldf.fill    f24=[in0],-16;;
   1082         ldf.fill    f23=[in0],-16;;
   1083         ldf.fill    f22=[in0],-16;;
   1084         ldf.fill    f21=[in0],-16;;
   1085         ldf.fill    f20=[in0],-16;;
   1086         ldf.fill    f19=[in0],-16;;
   1087         ldf.fill    f18=[in0],-16;;
   1088         ldf.fill    f17=[in0],-16;;
   1089         ldf.fill    f16=[in0],-16;;
   1090         ldf.fill    f15=[in0],-16;;
   1091         ldf.fill    f14=[in0],-16;;
   1092         ldf.fill    f13=[in0],-16;;
   1093         ldf.fill    f12=[in0],-16;;
   1094         ldf.fill    f11=[in0],-16;;
   1095         ldf.fill    f10=[in0],-16;;
   1096         ldf.fill    f9=[in0],-16;;
   1097         ldf.fill    f8=[in0],-16;;
   1098         ldf.fill    f7=[in0],-16;;
   1099         ldf.fill    f6=[in0],-16;;
   1100         ldf.fill    f5=[in0],-16;;
   1101         ldf.fill    f4=[in0],-16;;
   1102         ldf.fill    f3=[in0],-16;;
   1103         ldf.fill    f2=[in0],-8;;
   1104         mov         ar.unat=loc3;;          // restore unat (int_nat) before fill of general registers
   1105         ld8.fill    r31=[in0],-8;;
   1106         ld8.fill    r30=[in0],-8;;
   1107         ld8.fill    r29=[in0],-8;;
   1108         ld8.fill    r28=[in0],-8;;
   1109         ld8.fill    r27=[in0],-8;;
   1110         ld8.fill    r26=[in0],-8;;
   1111         ld8.fill    r25=[in0],-8;;
   1112         ld8.fill    r24=[in0],-8;;
   1113         ld8.fill    r23=[in0],-8;;
   1114         ld8.fill    r22=[in0],-8;;
   1115         ld8.fill    r21=[in0],-8;;
   1116         ld8.fill    r20=[in0],-8;;
   1117         ld8.fill    r19=[in0],-8;;
   1118         ld8.fill    r18=[in0],-8;;
   1119         ld8.fill    r17=[in0],-8;;
   1120         ld8.fill    r16=[in0],-8;;
   1121         ld8.fill    r15=[in0],-8;;
   1122         ld8.fill    r14=[in0],-8;;
   1123         ld8.fill    r13=[in0],-8;;
   1124         ld8.fill    r12=[in0],-8;;
   1125         ld8.fill    r11=[in0],-8;;
   1126         ld8.fill    r10=[in0],-8;;
   1127         ld8.fill    r9=[in0],-8;;
   1128         ld8.fill    r8=[in0],-8;;
   1129         ld8.fill    r7=[in0],-8;;
   1130         ld8.fill    r6=[in0],-8;;
   1131         ld8.fill    r5=[in0],-8;;
   1132         ld8.fill    r4=[in0],-8;;
   1133         ld8.fill    r3=[in0],-8;;
   1134         ld8.fill    r2=[in0],-8;;
   1135         ld8.fill    r1=[in0],-8;;
   1136         mov         ar.unat=loc2;;          // restore application context unat
   1137 
   1138         br.ret.sptk.many    b0
   1139 
   1140         .endp       FillContext
   1141 
   1142 
   1143 /////////////////////////////////////////////
   1144 //
   1145 //  Name:
   1146 //      HookHandler
   1147 //
   1148 //  Description:
   1149 //      Common branch target from hooked IVT entries.  Runs in interrupt context.
   1150 //      Responsible for saving and restoring context and calling common C
   1151 //      handler.  Banked registers running on bank 0 at entry.
   1152 //
   1153 //  Arguments:
   1154 //      All arguments are passed in banked registers:
   1155 //          B0_REG = Original B0
   1156 //          SCRATCH_REG1 = IVT entry index
   1157 //
   1158 //  Returns:
   1159 //      Returns via rfi
   1160 //
   1161 //  Notes:
   1162 //      loc0 - scratch
   1163 //      loc1 - scratch
   1164 //      loc2 - vector number / mask
   1165 //      loc3 - 16 byte aligned context record address
   1166 //      loc4 - temporary storage of last address in context record
   1167 
   1168 HookHandler:
   1169         flushrs;;                               // Synch RSE with backing store
   1170         mov         SCRATCH_REG2=ar.bsp         // save interrupted context bsp
   1171         mov         SCRATCH_REG3=ar.bspstore    // save interrupted context bspstore
   1172         mov         SCRATCH_REG4=ar.rnat        // save interrupted context rnat
   1173         mov         SCRATCH_REG6=cr.ifs;;       // save IFS in case we need to chain...
   1174         cover;;                                 // creates new frame, moves old
   1175                                                 //   CFM to IFS.
   1176         alloc       SCRATCH_REG5=0,5,6,0        // alloc 5 locals, 6 outs
   1177         ;;
   1178         // save banked registers to locals
   1179         mov         out1=B0_REG                 // out1 = Original B0
   1180         mov         out2=SCRATCH_REG2           // out2 = original ar.bsp
   1181         mov         out3=SCRATCH_REG3           // out3 = original ar.bspstore
   1182         mov         out4=SCRATCH_REG4           // out4 = original ar.rnat
   1183         mov         out5=SCRATCH_REG5           // out5 = original ar.pfs
   1184         mov         loc2=SCRATCH_REG1;;         // loc2 = vector number + chain flag
   1185         bsw.1;;                                 // switch banked registers to bank 1
   1186         srlz.d                                  // explicit serialize required
   1187                                                 // now fill in context record structure
   1188         movl        loc3=IpfContextBuf          // Insure context record is aligned
   1189         add         loc0=-0x200,r0;;            // mask the lower 9 bits (align on 512 byte boundary)
   1190         and         loc3=loc3,loc0;;
   1191         add         loc3=0x200,loc3;;           // move to next 512 byte boundary
   1192                                                 // loc3 now contains the 512 byte aligned context record
   1193                                                 // spill register context into context record
   1194         mov         out0=loc3;;                 // Context record base in out0
   1195                                                 // original B0 in out1 already
   1196                                                 // original ar.bsp in out2 already
   1197                                                 // original ar.bspstore in out3 already
   1198         br.call.sptk.few b0=SpillContext;;      // spill context
   1199         mov         loc4=out0                   // save modified address
   1200 
   1201     // At this point, the context has been saved to the context record and we're
   1202     // ready to call the C part of the handler...
   1203 
   1204         movl        loc0=CommonHandler;;        // obtain address of plabel
   1205         ld8         loc1=[loc0];;               // get entry point of CommonHandler
   1206         mov         b6=loc1;;                   // put it in a branch register
   1207         adds        loc1= 8, loc0;;             // index to GP in plabel
   1208         ld8         r1=[loc1];;                 // set up gp for C call
   1209         mov         loc1=0xfffff;;              // mask off so only vector bits are present
   1210         and         out0=loc2,loc1;;            // pass vector number (exception type)
   1211         mov         out1=loc3;;                 // pass context record address
   1212         br.call.sptk.few b0=b6;;                // call C handler
   1213 
   1214     // We've returned from the C call, so restore the context and either rfi
   1215     // back to interrupted thread, or chain into the SAL if this was an external interrupt
   1216         mov         out0=loc4;;                 // pass address of last element in context record
   1217         br.call.sptk.few b0=FillContext;;       // Fill context
   1218         mov         b0=out1                     // fill in b0
   1219         mov         ar.rnat=out4
   1220         mov         ar.pfs=out5
   1221 
   1222   // Loadrs is necessary because the debugger may have changed some values in
   1223   // the backing store.  The processor, however may not be aware that the
   1224   // stacked registers need to be reloaded from the backing store.  Therefore,
   1225   // we explicitly cause the RSE to refresh the stacked register's contents
   1226   // from the backing store.
   1227         mov         loc0=ar.rsc                 // get RSC value
   1228         mov         loc1=ar.rsc                 // save it so we can restore it
   1229         movl        loc3=0xffffffffc000ffff;;   // create mask for clearing RSC.loadrs
   1230         and         loc0=loc0,loc3;;            // create value for RSC with RSC.loadrs==0
   1231         mov         ar.rsc=loc0;;               // modify RSC
   1232         loadrs;;                                // invalidate register stack
   1233         mov         ar.rsc=loc1;;               // restore original RSC
   1234 
   1235         bsw.0;;                                 // switch banked registers back to bank 0
   1236         srlz.d;;                                // explicit serialize required
   1237         mov         PR_REG=pr                   // save predicates - to be restored after chaining decision
   1238         mov         B0_REG=b0                   // save b0 - required by chain code
   1239         mov         loc2=EXCPT_EXTERNAL_INTERRUPT;;
   1240         cmp.eq      p7,p0=SCRATCH_REG1,loc2;;   // check to see if this is the timer tick
   1241   (p7)  br.cond.dpnt.few    DO_CHAIN;;
   1242 
   1243 NO_CHAIN:
   1244         mov         pr=PR_REG;;
   1245         rfi;;                                   // we're outa here.
   1246 
   1247 DO_CHAIN:
   1248         mov         pr=PR_REG
   1249         mov         SCRATCH_REG1=cr.iva
   1250         mov         SCRATCH_REG2=PATCH_RETURN_OFFSET;;
   1251         add         SCRATCH_REG1=SCRATCH_REG1, SCRATCH_REG2;;
   1252         mov         b0=SCRATCH_REG1;;
   1253         br.cond.sptk.few  b0;;
   1254 
   1255 EndHookHandler:
   1256 
   1257 
   1258 /////////////////////////////////////////////
   1259 //
   1260 //  Name:
   1261 //      HookStub
   1262 //
   1263 //  Description:
   1264 //      HookStub will be copied from it's loaded location into the IVT when
   1265 //      an IVT entry is hooked.  The IVT entry does an indirect jump via B0 to
   1266 //      HookHandler, which in turn calls into the default C handler, which calls
   1267 //      the user-installed C handler.  The calls return and HookHandler executes
   1268 //      an rfi.
   1269 //
   1270 //  Notes:
   1271 //      Saves B0 to B0_REG
   1272 //      Saves IVT index to SCRATCH_REG1 (immediate value is fixed up when code is copied
   1273 //          to the IVT entry.
   1274 
   1275         ASM_GLOBAL HookStub
   1276         .proc   HookStub
   1277 HookStub:
   1278 
   1279         mov         B0_REG=b0
   1280         movl        SCRATCH_REG1=HookHandler;;
   1281         mov         b0=SCRATCH_REG1;;
   1282         mov         SCRATCH_REG1=0;;// immediate value is fixed up during install of handler to be the vector number
   1283         br.cond.sptk.few b0
   1284 
   1285         .endp       HookStub
   1286 
   1287 
   1288 /////////////////////////////////////////////
   1289 // The following code is moved into IVT entry 14 (offset 3400) which is reserved
   1290 // in the Itanium architecture.  The patch code is located at the end of the
   1291 // IVT entry.
   1292 
   1293 PatchCode:
   1294         mov       SCRATCH_REG0=psr
   1295         mov       SCRATCH_REG6=cr.ipsr
   1296         mov       PR_REG=pr
   1297         mov       B0_REG=b0;;
   1298 
   1299         // turn off any virtual translations
   1300         movl      SCRATCH_REG1 = ~( MASK(PSR_DT,1) | MASK(PSR_RT,1));;
   1301         and       SCRATCH_REG1 = SCRATCH_REG0, SCRATCH_REG1;;
   1302         mov       psr.l = SCRATCH_REG1;;
   1303         srlz.d
   1304         tbit.z    p14, p15 = SCRATCH_REG6, PSR_IS;;   // Check to see if we were
   1305                                                       // interrupted from IA32
   1306                                                       // context.  If so, bail out
   1307                                                       // and chain to SAL immediately
   1308  (p15)  br.cond.sptk.few Stub_IVT_Passthru;;
   1309         // we only want to take 1 out of 32 external interrupts to minimize the
   1310         // impact to system performance.  Check our interrupt count and bail
   1311         // out if we're not up to 32
   1312         movl      SCRATCH_REG1=ExternalInterruptCount;;
   1313         ld8       SCRATCH_REG2=[SCRATCH_REG1];;       // ExternalInterruptCount
   1314         tbit.z    p14, p15 = SCRATCH_REG2, 5;;        // bit 5 set?
   1315  (p14)  add       SCRATCH_REG2=1, SCRATCH_REG2;;      // No?  Then increment
   1316                                                       // ExternalInterruptCount
   1317                                                       // and Chain to SAL
   1318                                                       // immediately
   1319  (p14)  st8       [SCRATCH_REG1]=SCRATCH_REG2;;
   1320  (p14)  br.cond.sptk.few Stub_IVT_Passthru;;
   1321  (p15)  mov       SCRATCH_REG2=0;;                    // Yes?  Then reset
   1322                                                         // ExternalInterruptCount
   1323                                                         // and branch to
   1324                                                         // HookHandler
   1325  (p15)  st8       [SCRATCH_REG1]=SCRATCH_REG2;;
   1326         mov       pr=PR_REG
   1327         movl      SCRATCH_REG1=HookHandler;;          // SCRATCH_REG1 = entrypoint of HookHandler
   1328         mov       b0=SCRATCH_REG1;;                   // b0 = entrypoint of HookHandler
   1329         mov       SCRATCH_REG1=EXCPT_EXTERNAL_INTERRUPT;;
   1330         br.sptk.few b0;;                                // branch to HookHandler
   1331 
   1332 PatchCodeRet:
   1333         // fake-up an rfi to get RSE back to being coherent and insure psr has
   1334         // original contents when interrupt occured, then exit to SAL
   1335         // at this point:
   1336         //      cr.ifs has been modified by previous "cover"
   1337         //      SCRATCH_REG6 has original cr.ifs
   1338 
   1339         mov       SCRATCH_REG5=cr.ipsr
   1340         mov       SCRATCH_REG4=cr.iip;;
   1341         mov       cr.ipsr=SCRATCH_REG0
   1342         mov       SCRATCH_REG1=ip;;
   1343         add       SCRATCH_REG1=0x30, SCRATCH_REG1;;
   1344         mov       cr.iip=SCRATCH_REG1;;
   1345         rfi;;                       // rfi to next instruction
   1346 
   1347 Stub_RfiTarget:
   1348         mov       cr.ifs=SCRATCH_REG6
   1349         mov       cr.ipsr=SCRATCH_REG5
   1350         mov       cr.iip=SCRATCH_REG4;;
   1351 
   1352 Stub_IVT_Passthru:
   1353         mov       pr=PR_REG                         // pr = saved predicate registers
   1354         mov       b0=B0_REG;;                       // b0 = saved b0
   1355 EndPatchCode:
   1356 
   1357 
   1358 /////////////////////////////////////////////
   1359 // The following bundle is moved into IVT entry 14 (offset 0x3400) which is reserved
   1360 // in the Itanium architecture.  This bundle will be the last bundle and will
   1361 // be located at offset 0x37F0 in the IVT.
   1362 
   1363 FailsafeBranch:
   1364 {
   1365         .mib
   1366         nop.m     0
   1367         nop.i     0
   1368         br.sptk.few -(FAILSAFE_BRANCH_OFFSET - EXT_INT_ENTRY_OFFSET - 0x10)
   1369 }
   1370 
   1371 
   1372 /////////////////////////////////////////////
   1373 // The following bundle is moved into IVT entry 13 (offset 0x3000) which is the
   1374 // external interrupt.  It branches to the patch code.
   1375 
   1376 PatchCodeNewBun0:
   1377 {
   1378         .mib
   1379         nop.m     0
   1380         nop.i     0
   1381         br.cond.sptk.few PATCH_BRANCH
   1382 }
   1383