Home | History | Annotate | Download | only in Arm
      1 ; Copyright (c) 2010-2011, Linaro Limited
      2 ; All rights reserved.
      3 ;
      4 ; Redistribution and use in source and binary forms, with or without
      5 ; modification, are permitted provided that the following conditions
      6 ; are met:
      7 ;
      8 ;    * Redistributions of source code must retain the above copyright
      9 ;    notice, this list of conditions and the following disclaimer.
     10 ;
     11 ;    * Redistributions in binary form must reproduce the above copyright
     12 ;    notice, this list of conditions and the following disclaimer in the
     13 ;    documentation and/or other materials provided with the distribution.
     14 ;
     15 ;    * Neither the name of Linaro Limited nor the names of its
     16 ;    contributors may be used to endorse or promote products derived
     17 ;    from this software without specific prior written permission.
     18 ;
     19 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 ; HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 ;
     31 
     32 ;
     33 ; Written by Dave Gilbert <david.gilbert (a] linaro.org>
     34 ;
     35 ; This memchr routine is optimised on a Cortex-A9 and should work on
     36 ; all ARMv7 processors.   It has a fast past for short sizes, and has
     37 ; an optimised path for large data sets; the worst case is finding the
     38 ; match early in a large data set.
     39 ;
     40 
     41 
     42 ; 2011-02-07 david.gilbert (a] linaro.org
     43 ;    Extracted from local git a5b438d861
     44 ; 2011-07-14 david.gilbert (a] linaro.org
     45 ;    Import endianness fix from local git ea786f1b
     46 ; 2011-12-07 david.gilbert (a] linaro.org
     47 ;    Removed unneeded cbz from align loop
     48 
     49 ; this lets us check a flag in a 00/ff byte easily in either endianness
     50 #define CHARTSTMASK(c) 1<<(c*8)
     51 
     52     EXPORT  InternalMemScanMem8
     53     AREA    ScanMem, CODE, READONLY
     54     THUMB
     55 
     56 InternalMemScanMem8
     57     ; r0 = start of memory to scan
     58     ; r1 = length
     59     ; r2 = character to look for
     60     ; returns r0 = pointer to character or NULL if not found
     61     uxtb    r2, r2        ; Don't think we can trust the caller to actually pass a char
     62 
     63     cmp     r1, #16       ; If it's short don't bother with anything clever
     64     blt     L20
     65 
     66     tst     r0, #7        ; If it's already aligned skip the next bit
     67     beq     L10
     68 
     69     ; Work up to an aligned point
     70 L5
     71     ldrb    r3, [r0],#1
     72     subs    r1, r1, #1
     73     cmp     r3, r2
     74     beq     L50           ; If it matches exit found
     75     tst     r0, #7
     76     bne     L5            ; If not aligned yet then do next byte
     77 
     78 L10
     79     ; At this point, we are aligned, we know we have at least 8 bytes to work with
     80     push    {r4-r7}
     81     orr     r2, r2, r2, lsl #8  ; expand the match word across to all bytes
     82     orr     r2, r2, r2, lsl #16
     83     bic     r4, r1, #7    ; Number of double words to work with
     84     mvns    r7, #0        ; all F's
     85     movs    r3, #0
     86 
     87 L15
     88     ldmia   r0!, {r5,r6}
     89     subs    r4, r4, #8
     90     eor     r5, r5, r2    ; Get it so that r5,r6 have 00's where the bytes match the target
     91     eor     r6, r6, r2
     92     uadd8   r5, r5, r7    ; Parallel add 0xff - sets the GE bits for anything that wasn't 0
     93     sel     r5, r3, r7    ; bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
     94     uadd8   r6, r6, r7    ; Parallel add 0xff - sets the GE bits for anything that wasn't 0
     95     sel     r6, r5, r7    ; chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION
     96     cbnz    r6, L60
     97     bne     L15           ; (Flags from the subs above) If not run out of bytes then go around again
     98 
     99     pop     {r4-r7}
    100     and     r2, r2, #0xff ; Get r2 back to a single character from the expansion above
    101     and     r1, r1, #7    ; Leave the count remaining as the number after the double words have been done
    102 
    103 L20
    104     cbz     r1, L40       ; 0 length or hit the end already then not found
    105 
    106 L21 ; Post aligned section, or just a short call
    107     ldrb    r3, [r0], #1
    108     subs    r1, r1, #1
    109     eor     r3, r3, r2    ; r3 = 0 if match - doesn't break flags from sub
    110     cbz     r3, L50
    111     bne     L21           ; on r1 flags
    112 
    113 L40
    114     movs    r0, #0        ; not found
    115     bx      lr
    116 
    117 L50
    118     subs    r0, r0, #1    ; found
    119     bx      lr
    120 
    121 L60 ; We're here because the fast path found a hit - now we have to track down exactly which word it was
    122     ; r0 points to the start of the double word after the one that was tested
    123     ; r5 has the 00/ff pattern for the first word, r6 has the chained value
    124     cmp     r5, #0
    125     itte    eq
    126     moveq   r5, r6        ; the end is in the 2nd word
    127     subeq   r0, r0, #3    ; Points to 2nd byte of 2nd word
    128     subne   r0, r0, #7    ; or 2nd byte of 1st word
    129 
    130     ; r0 currently points to the 3rd byte of the word containing the hit
    131     tst     r5, #CHARTSTMASK(0)     ; 1st character
    132     bne     L61
    133     adds    r0, r0, #1
    134     tst     r5, #CHARTSTMASK(1)     ; 2nd character
    135     ittt    eq
    136     addeq   r0, r0 ,#1
    137     tsteq   r5, #(3 << 15)          ; 2nd & 3rd character
    138     ; If not the 3rd must be the last one
    139     addeq   r0, r0, #1
    140 
    141 L61
    142     pop     {r4-r7}
    143     subs    r0, r0, #1
    144     bx      lr
    145 
    146     END
    147 
    148