Home | History | Annotate | Download | only in Arm
      1 #------------------------------------------------------------------------------
      2 #
      3 # CopyMem() worker for ARM
      4 #
      5 # This file started out as C code that did 64 bit moves if the buffer was
      6 # 32-bit aligned, else it does a byte copy. It also does a byte copy for
      7 # any trailing bytes. It was updated to do 32-byte copies using stm/ldm.
      8 #
      9 # Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR>
     10 # This program and the accompanying materials
     11 # are licensed and made available under the terms and conditions of the BSD License
     12 # which accompanies this distribution.  The full text of the license may be found at
     13 # http://opensource.org/licenses/bsd-license.php
     14 #
     15 # THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
     16 # WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
     17 #
     18 #------------------------------------------------------------------------------
     19 
     20 /**
     21   Copy Length bytes from Source to Destination. Overlap is OK.
     22 
     23   This implementation
     24 
     25   @param  Destination Target of copy
     26   @param  Source      Place to copy from
     27   @param  Length      Number of bytes to copy
     28 
     29   @return Destination
     30 
     31 
     32 VOID *
     33 EFIAPI
     34 InternalMemCopyMem (
     35   OUT     VOID                      *DestinationBuffer,
     36   IN      CONST VOID                *SourceBuffer,
     37   IN      UINTN                     Length
     38   )
     39 **/
     40 .text
     41 .align 2
     42 GCC_ASM_EXPORT(InternalMemCopyMem)
     43 
     44 ASM_PFX(InternalMemCopyMem):
     45   stmfd  sp!, {r4-r11, lr}
     46   // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
     47   mov  r11, r0
     48   mov  r10, r0
     49   mov  r12, r2
     50   mov  r14, r1
     51 
     52 memcopy_check_overlapped:
     53   cmp  r11, r1
     54   // If (dest < source)
     55   bcc  memcopy_check_optim_default
     56   // If (dest <= source). But with the previous condition ->  If (dest == source)
     57   bls  memcopy_end
     58 
     59   // If (source + length < dest)
     60   rsb  r3, r1, r11
     61   cmp  r12, r3
     62   bcc  memcopy_check_optim_default
     63 
     64   // If (length == 0)
     65   cmp  r12, #0
     66   beq  memcopy_end
     67 
     68   b     memcopy_check_optim_overlap
     69 
     70 memcopy_check_optim_default:
     71   // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
     72   tst  r0, #0xF
     73   movne  r0, #0
     74   bne   memcopy_default
     75   tst  r1, #0xF
     76   movne  r3, #0
     77   moveq  r3, #1
     78   cmp  r2, #31
     79   movls  r0, #0
     80   andhi  r0, r3, #1
     81   b     memcopy_default
     82 
     83 memcopy_check_optim_overlap:
     84   // r10 = dest_end, r14 = source_end
     85   add  r10, r11, r12
     86   add  r14, r12, r1
     87 
     88   // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
     89   cmp  r2, #31
     90   movls  r0, #0
     91   movhi  r0, #1
     92   tst  r10, #0xF
     93   movne  r0, #0
     94   tst  r14, #0xF
     95   movne  r0, #0
     96   b  memcopy_overlapped
     97 
     98 memcopy_overlapped_non_optim:
     99   // We read 1 byte from the end of the source buffer
    100   sub  r3, r14, #1
    101   sub  r12, r12, #1
    102   ldrb  r3, [r3, #0]
    103   sub  r2, r10, #1
    104   cmp  r12, #0
    105   // We write 1 byte at the end of the dest buffer
    106   sub  r10, r10, #1
    107   sub  r14, r14, #1
    108   strb  r3, [r2, #0]
    109   bne  memcopy_overlapped_non_optim
    110   b   memcopy_end
    111 
    112 // r10 = dest_end, r14 = source_end
    113 memcopy_overlapped:
    114   // Are we in the optimized case ?
    115   cmp  r0, #0
    116   beq  memcopy_overlapped_non_optim
    117 
    118   // Optimized Overlapped - Read 32 bytes
    119   sub  r14, r14, #32
    120   sub  r12, r12, #32
    121   cmp  r12, #31
    122   ldmia  r14, {r2-r9}
    123 
    124   // If length is less than 32 then disable optim
    125   movls  r0, #0
    126 
    127   cmp  r12, #0
    128 
    129   // Optimized Overlapped - Write 32 bytes
    130   sub  r10, r10, #32
    131   stmia  r10, {r2-r9}
    132 
    133   // while (length != 0)
    134   bne  memcopy_overlapped
    135   b   memcopy_end
    136 
    137 memcopy_default_non_optim:
    138   // Byte copy
    139   ldrb  r3, [r14], #1
    140   sub  r12, r12, #1
    141   strb  r3, [r10], #1
    142 
    143 memcopy_default:
    144   cmp  r12, #0
    145   beq  memcopy_end
    146 
    147 // r10 = dest, r14 = source
    148 memcopy_default_loop:
    149   cmp  r0, #0
    150   beq  memcopy_default_non_optim
    151 
    152   // Optimized memcopy - Read 32 Bytes
    153   sub  r12, r12, #32
    154   cmp  r12, #31
    155   ldmia  r14!, {r2-r9}
    156 
    157   // If length is less than 32 then disable optim
    158   movls  r0, #0
    159 
    160   cmp  r12, #0
    161 
    162   // Optimized memcopy - Write 32 Bytes
    163   stmia  r10!, {r2-r9}
    164 
    165   // while (length != 0)
    166   bne  memcopy_default_loop
    167 
    168 memcopy_end:
    169   mov  r0, r11
    170   ldmfd  sp!, {r4-r11, pc}
    171 
    172