1 /* Copyright (c) 2012, Linaro Limited 2 All rights reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions are met: 6 * Redistributions of source code must retain the above copyright 7 notice, this list of conditions and the following disclaimer. 8 * Redistributions in binary form must reproduce the above copyright 9 notice, this list of conditions and the following disclaimer in the 10 documentation and/or other materials provided with the distribution. 11 * Neither the name of the Linaro nor the 12 names of its contributors may be used to endorse or promote products 13 derived from this software without specific prior written permission. 14 15 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 18 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 19 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 21 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 /* Assumptions: 29 * 30 * ARMv8-a, AArch64 31 */ 32 33 #include <private/bionic_asm.h> 34 35 #define L(label) .L ## label 36 37 #define REP8_01 0x0101010101010101 38 #define REP8_7f 0x7f7f7f7f7f7f7f7f 39 #define REP8_80 0x8080808080808080 40 41 /* Parameters and result. */ 42 #define src1 x0 43 #define src2 x1 44 #define result x0 45 46 /* Internal variables. */ 47 #define data1 x2 48 #define data1w w2 49 #define data2 x3 50 #define data2w w3 51 #define has_nul x4 52 #define diff x5 53 #define syndrome x6 54 #define tmp1 x7 55 #define tmp2 x8 56 #define tmp3 x9 57 #define zeroones x10 58 #define pos x11 59 60 /* Start of performance-critical section -- one 64B cache line. */ 61 ENTRY(strcmp) 62 .p2align 6 63 eor tmp1, src1, src2 64 mov zeroones, #REP8_01 65 tst tmp1, #7 66 b.ne L(misaligned8) 67 ands tmp1, src1, #7 68 b.ne L(mutual_align) 69 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 70 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 71 can be done in parallel across the entire word. */ 72 L(loop_aligned): 73 ldr data1, [src1], #8 74 ldr data2, [src2], #8 75 L(start_realigned): 76 sub tmp1, data1, zeroones 77 orr tmp2, data1, #REP8_7f 78 eor diff, data1, data2 /* Non-zero if differences found. */ 79 bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 80 orr syndrome, diff, has_nul 81 cbz syndrome, L(loop_aligned) 82 /* End of performance-critical section -- one 64B cache line. */ 83 84 L(end): 85 #ifndef __AARCH64EB__ 86 rev syndrome, syndrome 87 rev data1, data1 88 /* The MS-non-zero bit of the syndrome marks either the first bit 89 that is different, or the top bit of the first zero byte. 90 Shifting left now will bring the critical information into the 91 top bits. */ 92 clz pos, syndrome 93 rev data2, data2 94 lsl data1, data1, pos 95 lsl data2, data2, pos 96 /* But we need to zero-extend (char is unsigned) the value and then 97 perform a signed 32-bit subtraction. */ 98 lsr data1, data1, #56 99 sub result, data1, data2, lsr #56 100 ret 101 #else 102 /* For big-endian we cannot use the trick with the syndrome value 103 as carry-propagation can corrupt the upper bits if the trailing 104 bytes in the string contain 0x01. */ 105 /* However, if there is no NUL byte in the dword, we can generate 106 the result directly. We can't just subtract the bytes as the 107 MSB might be significant. */ 108 cbnz has_nul, 1f 109 cmp data1, data2 110 cset result, ne 111 cneg result, result, lo 112 ret 113 1: 114 /* Re-compute the NUL-byte detection, using a byte-reversed value. */ 115 rev tmp3, data1 116 sub tmp1, tmp3, zeroones 117 orr tmp2, tmp3, #REP8_7f 118 bic has_nul, tmp1, tmp2 119 rev has_nul, has_nul 120 orr syndrome, diff, has_nul 121 clz pos, syndrome 122 /* The MS-non-zero bit of the syndrome marks either the first bit 123 that is different, or the top bit of the first zero byte. 124 Shifting left now will bring the critical information into the 125 top bits. */ 126 lsl data1, data1, pos 127 lsl data2, data2, pos 128 /* But we need to zero-extend (char is unsigned) the value and then 129 perform a signed 32-bit subtraction. */ 130 lsr data1, data1, #56 131 sub result, data1, data2, lsr #56 132 ret 133 #endif 134 135 L(mutual_align): 136 /* Sources are mutually aligned, but are not currently at an 137 alignment boundary. Round down the addresses and then mask off 138 the bytes that preceed the start point. */ 139 bic src1, src1, #7 140 bic src2, src2, #7 141 lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ 142 ldr data1, [src1], #8 143 neg tmp1, tmp1 /* Bits to alignment -64. */ 144 ldr data2, [src2], #8 145 mov tmp2, #~0 146 #ifdef __AARCH64EB__ 147 /* Big-endian. Early bytes are at MSB. */ 148 lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 149 #else 150 /* Little-endian. Early bytes are at LSB. */ 151 lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 152 #endif 153 orr data1, data1, tmp2 154 orr data2, data2, tmp2 155 b L(start_realigned) 156 157 L(misaligned8): 158 /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always 159 checking to make sure that we don't access beyond page boundary in 160 SRC2. */ 161 tst src1, #7 162 b.eq L(loop_misaligned) 163 L(do_misaligned): 164 ldrb data1w, [src1], #1 165 ldrb data2w, [src2], #1 166 cmp data1w, #1 167 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 168 b.ne L(done) 169 tst src1, #7 170 b.ne L(do_misaligned) 171 172 L(loop_misaligned): 173 /* Test if we are within the last dword of the end of a 4K page. If 174 yes then jump back to the misaligned loop to copy a byte at a time. */ 175 and tmp1, src2, #0xff8 176 eor tmp1, tmp1, #0xff8 177 cbz tmp1, L(do_misaligned) 178 ldr data1, [src1], #8 179 ldr data2, [src2], #8 180 181 sub tmp1, data1, zeroones 182 orr tmp2, data1, #REP8_7f 183 eor diff, data1, data2 /* Non-zero if differences found. */ 184 bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 185 orr syndrome, diff, has_nul 186 cbz syndrome, L(loop_misaligned) 187 b L(end) 188 189 L(done): 190 sub result, data1, data2 191 ret 192 END(strcmp) 193