Home | History | Annotate | Download | only in arm
      1 @/******************************************************************************
      2 @ *
      3 @ * Copyright (C) 2015 The Android Open Source Project
      4 @ *
      5 @ * Licensed under the Apache License, Version 2.0 (the "License");
      6 @ * you may not use this file except in compliance with the License.
      7 @ * You may obtain a copy of the License at:
      8 @ *
      9 @ * http://www.apache.org/licenses/LICENSE-2.0
     10 @ *
     11 @ * Unless required by applicable law or agreed to in writing, software
     12 @ * distributed under the License is distributed on an "AS IS" BASIS,
     13 @ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 @ * See the License for the specific language governing permissions and
     15 @ * limitations under the License.
     16 @ *
     17 @ *****************************************************************************
     18 @ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 @*/
     20 @*
     21 @ *******************************************************************************
     22 @ * @file
     23 @ *  ih264_padding_neon.s
     24 @ *
     25 @ * @brief
     26 @ *  Contains function definitions padding
     27 @ *
     28 @ * @author
     29 @ *  Ittiam
     30 @ *
     31 @ * @par List of Functions:
     32 @ *  - ih264_pad_top_a9q()
     33 @ *  - ih264_pad_left_luma_a9q()
     34 @ *  - ih264_pad_left_chroma_a9q()
     35 @ *  - ih264_pad_right_luma_a9q()
     36 @ *  - ih264_pad_right_chroma_a9q()
     37 @ *
     38 @ * @remarks
     39 @ *  None
     40 @ *
     41 @ *******************************************************************************
     42 @*
     43 
     44 
     45 @**
     46 @*******************************************************************************
     47 @*
     48 @* @brief pad at the top of a 2d array
     49 @*
     50 @* @par Description:
     51 @*  The top row of a 2d array is replicated for pad_size times at the top
     52 @*
     53 @* @param[in] pu1_src
     54 @*  UWORD8 pointer to the source
     55 @*
     56 @* @param[in] src_strd
     57 @*  integer source stride
     58 @*
     59 @* @param[in] wd
     60 @*  integer width of the array
     61 @*
     62 @* @param[in] pad_size
     63 @*  integer -padding size of the array
     64 @*
     65 @* @returns none
     66 @*
     67 @* @remarks none
     68 @*
     69 @*******************************************************************************
     70 @*
     71 @void ih264_pad_top(UWORD8 *pu1_src,
     72 @                   WORD32 src_strd,
     73 @                   WORD32 wd,
     74 @                   WORD32 pad_size)
     75 @**************Variables Vs Registers*************************
     76 @   r0 => *pu1_src
     77 @   r1 => src_strd
     78 @   r2 => wd
     79 @   r3 => pad_size
     80 
     81 .text
     82 .p2align 2
     83 
     84     .global ih264_pad_top_a9q
     85 
     86 ih264_pad_top_a9q:
     87 
     88     stmfd         sp!, {r4-r11, lr}     @stack stores the values of the arguments
     89 
     90     sub           r5, r0, r1
     91     rsb           r6, r1, #0
     92 
     93 loop_neon_memcpy_mul_16:
     94     @ Load 16 bytes
     95     vld1.8        {d0, d1}, [r0]!
     96     mov           r4, r5
     97     mov           r7, r3
     98     add           r5, r5, #16
     99 
    100 loop_neon_pad_top:
    101     vst1.8        {d0, d1}, [r4], r6
    102     subs          r7, r7, #1
    103     bne           loop_neon_pad_top
    104 
    105     subs          r2, r2, #16
    106     bne           loop_neon_memcpy_mul_16
    107 
    108     ldmfd         sp!, {r4-r11, pc}     @Reload the registers from SP
    109 
    110 
    111 
    112 
    113 @**
    114 @*******************************************************************************
    115 @*
    116 @* @brief
    117 @*   Padding (luma block) at the left of a 2d array
    118 @*
    119 @* @par Description:
    120 @*   The left column of a 2d array is replicated for pad_size times at the left
    121 @*
    122 @*
    123 @* @param[in] pu1_src
    124 @*  UWORD8 pointer to the source
    125 @*
    126 @* @param[in] src_strd
    127 @*  integer source stride
    128 @*
    129 @* @param[in] ht
    130 @*  integer height of the array
    131 @*
    132 @* @param[in] wd
    133 @*  integer width of the array
    134 @*
    135 @* @param[in] pad_size
    136 @*  integer -padding size of the array
    137 @*
    138 @* @param[in] ht
    139 @*  integer height of the array
    140 @*
    141 @* @param[in] wd
    142 @*  integer width of the array
    143 @*
    144 @* @returns
    145 @*
    146 @* @remarks
    147 @*  None
    148 @*
    149 @*******************************************************************************
    150 @*
    151 @#if PAD_LEFT_LUMA == C
    152 @void ih264_pad_left_luma(UWORD8 *pu1_src,
    153 @                        WORD32 src_strd,
    154 @                        WORD32 ht,
    155 @                        WORD32 pad_size)
    156 @**************Variables Vs Registers*************************
    157 @   r0 => *pu1_src
    158 @   r1 => src_strd
    159 @   r2 => ht
    160 @   r3 => pad_size
    161 
    162 
    163 
    164     .global ih264_pad_left_luma_a9q
    165 
    166 ih264_pad_left_luma_a9q:
    167 
    168     stmfd         sp!, {r4-r11, lr}     @stack stores the values of the arguments
    169 
    170 
    171     sub           r4, r0, r3
    172     sub           r6, r1, #16
    173     subs          r5, r3, #16
    174     bne           loop_32
    175 loop_16:                                @  /*hard coded for width=16  ,height =8,16*/
    176     ldrb          r8, [r0], r1
    177     ldrb          r9, [r0], r1
    178     vdup.u8       q0, r8
    179     ldrb          r10, [r0], r1
    180     vst1.8        {q0}, [r4], r1        @ 16 bytes store
    181     vdup.u8       q1, r9
    182     vst1.8        {q1}, [r4], r1        @ 16 bytes store
    183     ldrb          r11, [r0], r1
    184     vdup.u8       q2, r10
    185     vdup.u8       q3, r11
    186     vst1.8        {q2}, [r4], r1        @ 16 bytes store
    187     ldrb          r8, [r0], r1
    188     vst1.8        {q3}, [r4], r1        @ 16 bytes store
    189     ldrb          r9, [r0], r1
    190     vdup.u8       q0, r8
    191     ldrb          r10, [r0], r1
    192     vst1.8        {q0}, [r4], r1        @ 16 bytes store
    193     vdup.u8       q1, r9
    194     ldrb          r11, [r0], r1
    195     vst1.8        {q1}, [r4], r1        @ 16 bytes store
    196     vdup.u8       q2, r10
    197     vdup.u8       q3, r11
    198     subs          r2, r2, #8
    199     vst1.8        {q2}, [r4], r1        @ 16 bytes store
    200     vst1.8        {q3}, [r4], r1        @ 16 bytes store
    201     bne           loop_16
    202     b             end_func
    203 
    204 loop_32:                                @  /*hard coded for width=32 ,height =8,16*/
    205     ldrb          r8, [r0], r1
    206     ldrb          r9, [r0], r1
    207     vdup.u8       q0, r8
    208     ldrb          r10, [r0], r1
    209     vst1.8        {q0}, [r4]!           @ 16 bytes store
    210     vdup.u8       q1, r9
    211     vst1.8        {q0}, [r4], r6
    212     vst1.8        {q1}, [r4]!           @ 16 bytes store
    213     vdup.u8       q2, r10
    214     vst1.8        {q1}, [r4], r6        @ 16 bytes store
    215     ldrb          r11, [r0], r1
    216     vst1.8        {q2}, [r4]!           @ 16 bytes store
    217     vdup.u8       q3, r11
    218     vst1.8        {q2}, [r4], r6        @ 16 bytes store
    219     ldrb          r8, [r0], r1
    220     vst1.8        {q3}, [r4]!           @ 16 bytes store
    221     vdup.u8       q0, r8
    222     ldrb          r9, [r0], r1
    223     vst1.8        {q3}, [r4], r6        @ 16 bytes store
    224     ldrb          r10, [r0], r1
    225     vst1.8        {q0}, [r4]!           @ 16 bytes store
    226     vdup.u8       q1, r9
    227     vst1.8        {q0}, [r4], r6        @ 16 bytes store
    228     ldrb          r11, [r0], r1
    229     vst1.8        {q1}, [r4]!           @ 16 bytes store
    230     vdup.u8       q2, r10
    231     vst1.8        {q1}, [r4], r6        @ 16 bytes store
    232     vst1.8        {q2}, [r4]!           @ 16 bytes store
    233     vdup.u8       q3, r11
    234     vst1.8        {q2}, [r4], r6        @ 16 bytes store
    235     subs          r2, r2, #8
    236     vst1.8        {q3}, [r4]!           @ 16 bytes store
    237     vst1.8        {q3}, [r4], r6        @ 16 bytes store
    238     bne           loop_32
    239 
    240 
    241 
    242 end_func:
    243     ldmfd         sp!, {r4-r11, pc}     @Reload the registers from SP
    244 
    245 
    246 
    247 
    248 
    249 @**
    250 @*******************************************************************************
    251 @*
    252 @* @brief
    253 @*   Padding (chroma block) at the left of a 2d array
    254 @*
    255 @* @par Description:
    256 @*   The left column of a 2d array is replicated for pad_size times at the left
    257 @*
    258 @*
    259 @* @param[in] pu1_src
    260 @*  UWORD8 pointer to the source
    261 @*
    262 @* @param[in] src_strd
    263 @*  integer source stride
    264 @*
    265 @* @param[in] ht
    266 @*  integer height of the array
    267 @*
    268 @* @param[in] wd
    269 @*  integer width of the array (each colour component)
    270 @*
    271 @* @param[in] pad_size
    272 @*  integer -padding size of the array
    273 @*
    274 @* @param[in] ht
    275 @*  integer height of the array
    276 @*
    277 @* @param[in] wd
    278 @*  integer width of the array
    279 @*
    280 @* @returns
    281 @*
    282 @* @remarks
    283 @*  None
    284 @*
    285 @*******************************************************************************
    286 @*
    287 @#if PAD_LEFT_CHROMA == C
    288 @void ih264_pad_left_chroma(UWORD8 *pu1_src,
    289 @                            WORD32 src_strd,
    290 @                            WORD32 ht,
    291 @                            WORD32 pad_size)
    292 @{
    293 @   r0 => *pu1_src
    294 @   r1 => src_strd
    295 @   r2 => ht
    296 @   r3 => pad_size
    297 
    298 
    299 
    300     .global ih264_pad_left_chroma_a9q
    301 
    302 ih264_pad_left_chroma_a9q:
    303 
    304     stmfd         sp!, {r4-r11, lr}     @stack stores the values of the arguments
    305 
    306     sub           r4, r0, r3
    307     sub           r6, r1, #16
    308 
    309 
    310 loop_32_l_c:                            @  /*hard coded for width=32  ,height =4,8,12*/
    311     ldrh          r8, [r0], r1
    312     ldrh          r9, [r0], r1
    313     vdup.u16      q0, r8
    314     ldrh          r10, [r0], r1
    315     vst1.8        {q0}, [r4]!           @ 16 bytes store
    316     vdup.u16      q1, r9
    317     vst1.8        {q0}, [r4], r6        @ 16 bytes store
    318     ldrh          r11, [r0], r1
    319     vst1.8        {q1}, [r4]!           @ 16 bytes store
    320     vdup.u16      q2, r10
    321     vst1.8        {q1}, [r4], r6        @ 16 bytes store
    322     vdup.u16      q3, r11
    323     vst1.8        {q2}, [r4]!           @ 16 bytes store
    324     vst1.8        {q2}, [r4], r6        @ 16 bytes store
    325     subs          r2, r2, #4
    326     vst1.8        {q3}, [r4]!           @ 16 bytes store
    327     vst1.8        {q3}, [r4], r6        @ 16 bytes store
    328 
    329 
    330     beq           end_func_l_c          @/* Branching when ht=4*/
    331 
    332     ldrh          r8, [r0], r1
    333     ldrh          r9, [r0], r1
    334     vdup.u16      q0, r8
    335     ldrh          r10, [r0], r1
    336     vst1.8        {q0}, [r4]!           @ 16 bytes store
    337     vdup.u16      q1, r9
    338     vst1.8        {q0}, [r4], r6
    339     ldrh          r11, [r0], r1
    340     vst1.8        {q1}, [r4]!           @ 16 bytes store
    341     vdup.u16      q2, r10
    342     vst1.8        {q1}, [r4], r6        @ 16 bytes store
    343     vdup.u16      q3, r11
    344     vst1.8        {q2}, [r4]!           @ 16 bytes store
    345     vst1.8        {q2}, [r4], r6        @ 16 bytes store
    346     subs          r2, r2, #4
    347     vst1.8        {q3}, [r4]!           @ 16 bytes store
    348     vst1.8        {q3}, [r4], r6        @ 16 bytes store
    349 
    350     beq           end_func_l_c          @/* Branching when ht=8*/
    351     bne           loop_32_l_c
    352 
    353     ldrh          r8, [r0], r1
    354     ldrh          r9, [r0], r1
    355     vdup.u16      q0, r8
    356     ldrh          r10, [r0], r1
    357     vst1.8        {q0}, [r4]!           @ 16 bytes store
    358     vdup.u16      q1, r9
    359     vst1.8        {q0}, [r4], r6
    360     ldrh          r11, [r0], r1
    361     vst1.8        {q1}, [r4]!           @ 16 bytes store
    362     vdup.u16      q2, r10
    363     vst1.8        {q1}, [r4], r6        @ 16 bytes store
    364     vdup.u16      q3, r11
    365     vst1.8        {q2}, [r4]!           @ 16 bytes store
    366     vst1.8        {q2}, [r4], r6        @ 16 bytes store
    367     vst1.8        {q3}, [r4]!           @ 16 bytes store
    368     vst1.8        {q3}, [r4], r6        @ 16 bytes store
    369 
    370 end_func_l_c:
    371     ldmfd         sp!, {r4-r11, pc}     @Reload the registers from SP
    372 
    373 
    374 
    375 
    376 
    377 @**
    378 @*******************************************************************************
    379 @*
    380 @* @brief
    381 @* Padding (luma block) at the right of a 2d array
    382 @*
    383 @* @par Description:
    384 @* The right column of a 2d array is replicated for pad_size times at the right
    385 @*
    386 @*
    387 @* @param[in] pu1_src
    388 @*  UWORD8 pointer to the source
    389 @*
    390 @* @param[in] src_strd
    391 @*  integer source stride
    392 @*
    393 @* @param[in] ht
    394 @*  integer height of the array
    395 @*
    396 @* @param[in] wd
    397 @*  integer width of the array
    398 @*
    399 @* @param[in] pad_size
    400 @*  integer -padding size of the array
    401 @*
    402 @* @param[in] ht
    403 @*  integer height of the array
    404 @*
    405 @* @param[in] wd
    406 @*  integer width of the array
    407 @*
    408 @* @returns
    409 @*
    410 @* @remarks
    411 @*  None
    412 @*
    413 @*******************************************************************************
    414 @*
    415 @#if PAD_RIGHT_LUMA == C
    416 @void ih264_pad_right_luma(UWORD8 *pu1_src,
    417 @                        WORD32 src_strd,
    418 @                        WORD32 ht,
    419 @                        WORD32 pad_size)
    420 @{
    421 @    WORD32 row;
    422 @
    423 @    for(row = 0; row < ht; row++)
    424 @    {
    425 @        memset(pu1_src, *(pu1_src -1), pad_size);
    426 @
    427 @        pu1_src += src_strd;
    428 @    }
    429 @}
    430 @
    431 @   r0 => *pu1_src
    432 @   r1 => src_strd
    433 @   r2 => ht
    434 @   r3 => pad_size
    435 
    436 
    437 
    438     .global ih264_pad_right_luma_a9q
    439 
    440 ih264_pad_right_luma_a9q:
    441 
    442     stmfd         sp!, {r4-r11, lr}     @stack stores the values of the arguments
    443 
    444     mov           r4, r0
    445     sub           r6, r1, #16
    446     sub           r0, r0, #1
    447     subs          r5, r3, #16
    448     bne           loop_32
    449 loop_16_r: @  /*hard coded for width=16  ,height =8,16*/
    450     ldrb          r8, [r0], r1
    451     ldrb          r9, [r0], r1
    452     vdup.u8       q0, r8
    453     ldrb          r10, [r0], r1
    454     vst1.8        {q0}, [r4], r1        @ 16 bytes store
    455     vdup.u8       q1, r9
    456     vst1.8        {q1}, [r4], r1        @ 16 bytes store
    457     ldrb          r11, [r0], r1
    458     vdup.u8       q2, r10
    459     vdup.u8       q3, r11
    460     vst1.8        {q2}, [r4], r1        @ 16 bytes store
    461     ldrb          r8, [r0], r1
    462     vst1.8        {q3}, [r4], r1        @ 16 bytes store
    463     ldrb          r9, [r0], r1
    464     vdup.u8       q0, r8
    465     ldrb          r10, [r0], r1
    466     vst1.8        {q0}, [r4], r1        @ 16 bytes store
    467     vdup.u8       q1, r9
    468     ldrb          r11, [r0], r1
    469     vst1.8        {q1}, [r4], r1        @ 16 bytes store
    470     vdup.u8       q2, r10
    471     vdup.u8       q3, r11
    472     subs          r2, r2, #8
    473     vst1.8        {q2}, [r4], r1        @ 16 bytes store
    474     vst1.8        {q3}, [r4], r1        @ 16 bytes store
    475     bne           loop_16_r
    476     b             end_func_r
    477 
    478 loop_32_r:                              @  /*hard coded for width=32  ,height =8,16*/
    479     ldrb          r8, [r0], r1
    480     ldrb          r9, [r0], r1
    481     vdup.u8       q0, r8
    482     ldrb          r10, [r0], r1
    483     vst1.8        {q0}, [r4]!           @ 16 bytes store
    484     vdup.u8       q1, r9
    485     vst1.8        {q0}, [r4], r6
    486     vst1.8        {q1}, [r4]!           @ 16 bytes store
    487     vdup.u8       q2, r10
    488     vst1.8        {q1}, [r4], r6        @ 16 bytes store
    489     ldrb          r11, [r0], r1
    490     vst1.8        {q2}, [r4]!           @ 16 bytes store
    491     vdup.u8       q3, r11
    492     vst1.8        {q2}, [r4], r6        @ 16 bytes store
    493     ldrb          r8, [r0], r1
    494     vst1.8        {q3}, [r4]!           @ 16 bytes store
    495     ldrb          r9, [r0], r1
    496     vdup.u8       q0, r8
    497     vst1.8        {q3}, [r4], r6        @ 16 bytes store
    498     ldrb          r10, [r0], r1
    499     vst1.8        {q0}, [r4]!           @ 16 bytes store
    500     vdup.u8       q1, r9
    501     vst1.8        {q0}, [r4], r6        @ 16 bytes store
    502     ldrb          r11, [r0], r1
    503     vst1.8        {q1}, [r4]!           @ 16 bytes store
    504     vdup.u8       q2, r10
    505     vst1.8        {q1}, [r4], r6        @ 16 bytes store
    506     vst1.8        {q2}, [r4]!           @ 16 bytes store
    507     vdup.u8       q3, r11
    508     vst1.8        {q2}, [r4], r6        @ 16 bytes store
    509     subs          r2, r2, #8
    510     vst1.8        {q3}, [r4]!           @ 16 bytes store
    511     vst1.8        {q3}, [r4], r6        @ 16 bytes store
    512     bne           loop_32_r
    513 
    514 
    515 
    516 end_func_r:
    517     ldmfd         sp!, {r4-r11, pc}     @Reload the registers from SP
    518 
    519 
    520 
    521 
    522 
    523 @**
    524 @*******************************************************************************
    525 @*
    526 @* @brief
    527 @;* Padding (chroma block) at the right of a 2d array
    528 @*
    529 @* @par Description:
    530 @* The right column of a 2d array is replicated for pad_size times at the right
    531 @*
    532 @*
    533 @* @param[in] pu1_src
    534 @;*  UWORD8 pointer to the source
    535 @*
    536 @* @param[in] src_strd
    537 @*  integer source stride
    538 @*
    539 @* @param[in] ht
    540 @;*  integer height of the array
    541 @*
    542 @* @param[in] wd
    543 @*  integer width of the array (each colour component)
    544 @*
    545 @* @param[in] pad_size
    546 @*  integer -padding size of the array
    547 @*
    548 @* @param[in] ht
    549 @;*  integer height of the array
    550 @*
    551 @* @param[in] wd
    552 @*  integer width of the array
    553 @*
    554 @* @returns
    555 @*
    556 @* @remarks
    557 @*  None
    558 @*
    559 @*******************************************************************************
    560 @*
    561 @#if PAD_RIGHT_CHROMA == C
    562 @void ih264_pad_right_chroma(UWORD8 *pu1_src,
    563 @                        WORD32 src_strd,
    564 @                        WORD32 ht,
    565 @                        WORD32 pad_size)
    566 @   r0 => *pu1_src
    567 @   r1 => src_strd
    568 @   r2 => ht
    569 @   r3 => pad_size
    570 
    571 
    572 
    573     .global ih264_pad_right_chroma_a9q
    574 
    575 ih264_pad_right_chroma_a9q:
    576 
    577     stmfd         sp!, {r4-r11, lr}     @stack stores the values of the arguments
    578 
    579     mov           r4, r0
    580     sub           r6, r1, #16
    581     sub           r0, r0, #2
    582 loop_32_r_c: @  /*hard coded for width=32 ,height =8,4*/
    583     ldrh          r8, [r0], r1
    584     ldrh          r9, [r0], r1
    585     vdup.u16      q0, r8
    586     ldrh          r10, [r0], r1
    587     vst1.8        {q0}, [r4]!           @ 16 bytes store
    588     vdup.u16      q1, r9
    589     vst1.8        {q0}, [r4], r6
    590     vst1.8        {q1}, [r4]!           @ 16 bytes store
    591     vdup.u16      q2, r10
    592     vst1.8        {q1}, [r4], r6        @ 16 bytes store
    593     subs          r2, r2, #4
    594     ldrh          r11, [r0], r1
    595     vst1.8        {q2}, [r4]!           @ 16 bytes store
    596     vdup.u16      q3, r11
    597     vst1.8        {q2}, [r4], r6        @ 16 bytes store
    598     vst1.8        {q3}, [r4]!           @ 16 bytes store
    599     vst1.8        {q3}, [r4], r6        @ 16 bytes store
    600 
    601     beq           end_func_r_c          @/* Branching when ht=4*/
    602 
    603     ldrh          r8, [r0], r1
    604     vdup.u16      q0, r8
    605     ldrh          r9, [r0], r1
    606     ldrh          r10, [r0], r1
    607     vst1.8        {q0}, [r4]!           @ 16 bytes store
    608     vdup.u16      q1, r9
    609     vst1.8        {q0}, [r4], r6        @ 16 bytes store
    610     ldrh          r11, [r0], r1
    611     vst1.8        {q1}, [r4]!           @ 16 bytes store
    612     vdup.u16      q2, r10
    613     vst1.8        {q1}, [r4], r6        @ 16 bytes store
    614     vst1.8        {q2}, [r4]!           @ 16 bytes store
    615     vdup.u16      q3, r11
    616     vst1.8        {q2}, [r4], r6        @ 16 bytes store
    617     subs          r2, r2, #4
    618     vst1.8        {q3}, [r4]!           @ 16 bytes store
    619     vst1.8        {q3}, [r4], r6        @ 16 bytes store
    620 
    621     beq           end_func_r_c          @/* Branching when ht=8*/
    622     bne           loop_32_r_c
    623 
    624     ldrh          r8, [r0], r1
    625     vdup.u16      q0, r8
    626     ldrh          r9, [r0], r1
    627     ldrh          r10, [r0], r1
    628     vst1.8        {q0}, [r4]!           @ 16 bytes store
    629     vdup.u16      q1, r9
    630     vst1.8        {q0}, [r4], r6        @ 16 bytes store
    631     ldrh          r11, [r0], r1
    632     vst1.8        {q1}, [r4]!           @ 16 bytes store
    633     vdup.u16      q2, r10
    634     vst1.8        {q1}, [r4], r6        @ 16 bytes store
    635     vst1.8        {q2}, [r4]!           @ 16 bytes store
    636     vdup.u16      q3, r11
    637     vst1.8        {q2}, [r4], r6        @ 16 bytes store
    638     vst1.8        {q3}, [r4]!           @ 16 bytes store
    639     vst1.8        {q3}, [r4], r6        @ 16 bytes store
    640 
    641 end_func_r_c:
    642     ldmfd         sp!, {r4-r11, pc}     @Reload the registers from SP
    643 
    644 
    645 
    646 
    647 
    648