Home | History | Annotate | Download | only in Arm
      1 ;
      2 ; Copyright (c) 2013 - 2016, Linaro Limited
      3 ; All rights reserved.
      4 ;
      5 ; Redistribution and use in source and binary forms, with or without
      6 ; modification, are permitted provided that the following conditions are met:
      7 ;     * Redistributions of source code must retain the above copyright
      8 ;       notice, this list of conditions and the following disclaimer.
      9 ;     * Redistributions in binary form must reproduce the above copyright
     10 ;       notice, this list of conditions and the following disclaimer in the
     11 ;       documentation and/or other materials provided with the distribution.
     12 ;     * Neither the name of the Linaro nor the
     13 ;       names of its contributors may be used to endorse or promote products
     14 ;       derived from this software without specific prior written permission.
     15 ;
     16 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 ; HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 ;
     28 
     29 ; Parameters and result.
     30 #define src1      r0
     31 #define src2      r1
     32 #define limit     r2
     33 #define result    r0
     34 
     35 ; Internal variables.
     36 #define data1     r3
     37 #define data2     r4
     38 #define limit_wd  r5
     39 #define diff      r6
     40 #define tmp1      r7
     41 #define tmp2      r12
     42 #define pos       r8
     43 #define mask      r14
     44 
     45     EXPORT  InternalMemCompareMem
     46     THUMB
     47     AREA    CompareMem, CODE, READONLY
     48 
     49 InternalMemCompareMem
     50     push    {r4-r8, lr}
     51     eor     tmp1, src1, src2
     52     tst     tmp1, #3
     53     bne     Lmisaligned4
     54     ands    tmp1, src1, #3
     55     bne     Lmutual_align
     56     add     limit_wd, limit, #3
     57     nop.w
     58     lsr     limit_wd, limit_wd, #2
     59 
     60     ; Start of performance-critical section  -- one 32B cache line.
     61 Lloop_aligned
     62     ldr     data1, [src1], #4
     63     ldr     data2, [src2], #4
     64 Lstart_realigned
     65     subs    limit_wd, limit_wd, #1
     66     eor     diff, data1, data2        ; Non-zero if differences found.
     67     cbnz    diff, L0
     68     bne     Lloop_aligned
     69     ; End of performance-critical section  -- one 32B cache line.
     70 
     71     ; Not reached the limit, must have found a diff.
     72 L0
     73     cbnz    limit_wd, Lnot_limit
     74 
     75     // Limit % 4 == 0 => all bytes significant.
     76     ands    limit, limit, #3
     77     beq     Lnot_limit
     78 
     79     lsl     limit, limit, #3              // Bits -> bytes.
     80     mov     mask, #~0
     81     lsl     mask, mask, limit
     82     bic     data1, data1, mask
     83     bic     data2, data2, mask
     84 
     85     orr     diff, diff, mask
     86 
     87 Lnot_limit
     88     rev     diff, diff
     89     rev     data1, data1
     90     rev     data2, data2
     91 
     92     ; The MS-non-zero bit of DIFF marks either the first bit
     93     ; that is different, or the end of the significant data.
     94     ; Shifting left now will bring the critical information into the
     95     ; top bits.
     96     clz     pos, diff
     97     lsl     data1, data1, pos
     98     lsl     data2, data2, pos
     99 
    100     ; But we need to zero-extend (char is unsigned) the value and then
    101     ; perform a signed 32-bit subtraction.
    102     lsr     data1, data1, #28
    103     sub     result, data1, data2, lsr #28
    104     pop     {r4-r8, pc}
    105 
    106 Lmutual_align
    107     ; Sources are mutually aligned, but are not currently at an
    108     ; alignment boundary.  Round down the addresses and then mask off
    109     ; the bytes that precede the start point.
    110     bic     src1, src1, #3
    111     bic     src2, src2, #3
    112     add     limit, limit, tmp1          ; Adjust the limit for the extra.
    113     lsl     tmp1, tmp1, #2              ; Bytes beyond alignment -> bits.
    114     ldr     data1, [src1], #4
    115     neg     tmp1, tmp1                  ; Bits to alignment -32.
    116     ldr     data2, [src2], #4
    117     mov     tmp2, #~0
    118 
    119     ; Little-endian.  Early bytes are at LSB.
    120     lsr     tmp2, tmp2, tmp1            ; Shift (tmp1 & 31).
    121     add     limit_wd, limit, #3
    122     orr     data1, data1, tmp2
    123     orr     data2, data2, tmp2
    124     lsr     limit_wd, limit_wd, #2
    125     b       Lstart_realigned
    126 
    127 Lmisaligned4
    128     sub     limit, limit, #1
    129 L1
    130     // Perhaps we can do better than this.
    131     ldrb    data1, [src1], #1
    132     ldrb    data2, [src2], #1
    133     subs    limit, limit, #1
    134     it      cs
    135     cmpcs   data1, data2
    136     beq     L1
    137     sub     result, data1, data2
    138     pop     {r4-r8, pc}
    139 
    140     END
    141