1 ; 2 ; Copyright (c) 2013 - 2016, Linaro Limited 3 ; All rights reserved. 4 ; 5 ; Redistribution and use in source and binary forms, with or without 6 ; modification, are permitted provided that the following conditions are met: 7 ; * Redistributions of source code must retain the above copyright 8 ; notice, this list of conditions and the following disclaimer. 9 ; * Redistributions in binary form must reproduce the above copyright 10 ; notice, this list of conditions and the following disclaimer in the 11 ; documentation and/or other materials provided with the distribution. 12 ; * Neither the name of the Linaro nor the 13 ; names of its contributors may be used to endorse or promote products 14 ; derived from this software without specific prior written permission. 15 ; 16 ; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 ; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 ; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19 ; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20 ; HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 ; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22 ; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 ; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 ; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 ; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 ; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 ; 28 29 ; Parameters and result. 30 #define src1 r0 31 #define src2 r1 32 #define limit r2 33 #define result r0 34 35 ; Internal variables. 36 #define data1 r3 37 #define data2 r4 38 #define limit_wd r5 39 #define diff r6 40 #define tmp1 r7 41 #define tmp2 r12 42 #define pos r8 43 #define mask r14 44 45 EXPORT InternalMemCompareMem 46 THUMB 47 AREA CompareMem, CODE, READONLY 48 49 InternalMemCompareMem 50 push {r4-r8, lr} 51 eor tmp1, src1, src2 52 tst tmp1, #3 53 bne Lmisaligned4 54 ands tmp1, src1, #3 55 bne Lmutual_align 56 add limit_wd, limit, #3 57 nop.w 58 lsr limit_wd, limit_wd, #2 59 60 ; Start of performance-critical section -- one 32B cache line. 61 Lloop_aligned 62 ldr data1, [src1], #4 63 ldr data2, [src2], #4 64 Lstart_realigned 65 subs limit_wd, limit_wd, #1 66 eor diff, data1, data2 ; Non-zero if differences found. 67 cbnz diff, L0 68 bne Lloop_aligned 69 ; End of performance-critical section -- one 32B cache line. 70 71 ; Not reached the limit, must have found a diff. 72 L0 73 cbnz limit_wd, Lnot_limit 74 75 // Limit % 4 == 0 => all bytes significant. 76 ands limit, limit, #3 77 beq Lnot_limit 78 79 lsl limit, limit, #3 // Bits -> bytes. 80 mov mask, #~0 81 lsl mask, mask, limit 82 bic data1, data1, mask 83 bic data2, data2, mask 84 85 orr diff, diff, mask 86 87 Lnot_limit 88 rev diff, diff 89 rev data1, data1 90 rev data2, data2 91 92 ; The MS-non-zero bit of DIFF marks either the first bit 93 ; that is different, or the end of the significant data. 94 ; Shifting left now will bring the critical information into the 95 ; top bits. 96 clz pos, diff 97 lsl data1, data1, pos 98 lsl data2, data2, pos 99 100 ; But we need to zero-extend (char is unsigned) the value and then 101 ; perform a signed 32-bit subtraction. 102 lsr data1, data1, #28 103 sub result, data1, data2, lsr #28 104 pop {r4-r8, pc} 105 106 Lmutual_align 107 ; Sources are mutually aligned, but are not currently at an 108 ; alignment boundary. Round down the addresses and then mask off 109 ; the bytes that precede the start point. 110 bic src1, src1, #3 111 bic src2, src2, #3 112 add limit, limit, tmp1 ; Adjust the limit for the extra. 113 lsl tmp1, tmp1, #2 ; Bytes beyond alignment -> bits. 114 ldr data1, [src1], #4 115 neg tmp1, tmp1 ; Bits to alignment -32. 116 ldr data2, [src2], #4 117 mov tmp2, #~0 118 119 ; Little-endian. Early bytes are at LSB. 120 lsr tmp2, tmp2, tmp1 ; Shift (tmp1 & 31). 121 add limit_wd, limit, #3 122 orr data1, data1, tmp2 123 orr data2, data2, tmp2 124 lsr limit_wd, limit_wd, #2 125 b Lstart_realigned 126 127 Lmisaligned4 128 sub limit, limit, #1 129 L1 130 // Perhaps we can do better than this. 131 ldrb data1, [src1], #1 132 ldrb data2, [src2], #1 133 subs limit, limit, #1 134 it cs 135 cmpcs data1, data2 136 beq L1 137 sub result, data1, data2 138 pop {r4-r8, pc} 139 140 END 141