Home | History | Annotate | Download | only in Arm
      1 ;------------------------------------------------------------------------------
      2 ;
      3 ; CopyMem() worker for ARM
      4 ;
      5 ; This file started out as C code that did 64 bit moves if the buffer was
      6 ; 32-bit aligned, else it does a byte copy. It also does a byte copy for
      7 ; any trailing bytes. It was updated to do 32-byte copies using stm/ldm.
      8 ;
      9 ; Copyright (c) 2008 - 2010, Apple Inc. All rights reserved.<BR>
     10 ; Copyright (c) 2016, Linaro Ltd. All rights reserved.<BR>
     11 ; This program and the accompanying materials
     12 ; are licensed and made available under the terms and conditions of the BSD License
     13 ; which accompanies this distribution.  The full text of the license may be found at
     14 ; http://opensource.org/licenses/bsd-license.php
     15 ;
     16 ; THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
     17 ; WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
     18 ;
     19 ;------------------------------------------------------------------------------
     20 
     21     EXPORT  InternalMemCopyMem
     22     AREA    SetMem, CODE, READONLY
     23     THUMB
     24 
     25 InternalMemCopyMem
     26   stmfd  sp!, {r4-r11, lr}
     27   // Save the input parameters in extra registers (r11 = destination, r14 = source, r12 = length)
     28   mov  r11, r0
     29   mov  r10, r0
     30   mov  r12, r2
     31   mov  r14, r1
     32 
     33 memcopy_check_overlapped
     34   cmp  r11, r1
     35   // If (dest < source)
     36   bcc  memcopy_check_optim_default
     37 
     38   // If (source + length < dest)
     39   rsb  r3, r1, r11
     40   cmp  r12, r3
     41   bcc  memcopy_check_optim_default
     42   b     memcopy_check_optim_overlap
     43 
     44 memcopy_check_optim_default
     45   // Check if we can use an optimized path ((length >= 32) && destination word-aligned && source word-aligned) for the memcopy (optimized path if r0 == 1)
     46   tst  r0, #0xF
     47   movne  r0, #0
     48   bne   memcopy_default
     49   tst  r1, #0xF
     50   movne  r3, #0
     51   moveq  r3, #1
     52   cmp  r2, #31
     53   movls  r0, #0
     54   andhi  r0, r3, #1
     55   b     memcopy_default
     56 
     57 memcopy_check_optim_overlap
     58   // r10 = dest_end, r14 = source_end
     59   add  r10, r11, r12
     60   add  r14, r12, r1
     61 
     62   // Are we in the optimized case ((length >= 32) && dest_end word-aligned && source_end word-aligned)
     63   cmp  r2, #31
     64   movls  r0, #0
     65   movhi  r0, #1
     66   tst  r10, #0xF
     67   movne  r0, #0
     68   tst  r14, #0xF
     69   movne  r0, #0
     70   b  memcopy_overlapped
     71 
     72 memcopy_overlapped_non_optim
     73   // We read 1 byte from the end of the source buffer
     74   sub  r3, r14, #1
     75   sub  r12, r12, #1
     76   ldrb  r3, [r3, #0]
     77   sub  r2, r10, #1
     78   cmp  r12, #0
     79   // We write 1 byte at the end of the dest buffer
     80   sub  r10, r10, #1
     81   sub  r14, r14, #1
     82   strb  r3, [r2, #0]
     83   bne  memcopy_overlapped_non_optim
     84   b   memcopy_end
     85 
     86 // r10 = dest_end, r14 = source_end
     87 memcopy_overlapped
     88   // Are we in the optimized case ?
     89   cmp  r0, #0
     90   beq  memcopy_overlapped_non_optim
     91 
     92   // Optimized Overlapped - Read 32 bytes
     93   sub  r14, r14, #32
     94   sub  r12, r12, #32
     95   cmp  r12, #31
     96   ldmia  r14, {r2-r9}
     97 
     98   // If length is less than 32 then disable optim
     99   movls  r0, #0
    100 
    101   cmp  r12, #0
    102 
    103   // Optimized Overlapped - Write 32 bytes
    104   sub  r10, r10, #32
    105   stmia  r10, {r2-r9}
    106 
    107   // while (length != 0)
    108   bne  memcopy_overlapped
    109   b   memcopy_end
    110 
    111 memcopy_default_non_optim
    112   // Byte copy
    113   ldrb  r3, [r14], #1
    114   sub  r12, r12, #1
    115   strb  r3, [r10], #1
    116 
    117 memcopy_default
    118   cmp  r12, #0
    119   beq  memcopy_end
    120 
    121 // r10 = dest, r14 = source
    122 memcopy_default_loop
    123   cmp  r0, #0
    124   beq  memcopy_default_non_optim
    125 
    126   // Optimized memcopy - Read 32 Bytes
    127   sub  r12, r12, #32
    128   cmp  r12, #31
    129   ldmia  r14!, {r2-r9}
    130 
    131   // If length is less than 32 then disable optim
    132   movls  r0, #0
    133 
    134   cmp  r12, #0
    135 
    136   // Optimized memcopy - Write 32 Bytes
    137   stmia  r10!, {r2-r9}
    138 
    139   // while (length != 0)
    140   bne  memcopy_default_loop
    141 
    142 memcopy_end
    143   mov  r0, r11
    144   ldmfd  sp!, {r4-r11, pc}
    145 
    146   END
    147 
    148