1 /* 2 * Copyright (c) 2017 ARM Ltd 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The name of the company may not be used to endorse or promote 14 * products derived from this software without specific prior written 15 * permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 18 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 19 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 22 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* Assumptions: 30 * 31 * ARMv8-a, AArch64, unaligned accesses. 32 */ 33 34 #include <private/bionic_asm.h> 35 36 /* Parameters and result. */ 37 #define src1 x0 38 #define src2 x1 39 #define limit x2 40 #define result w0 41 42 /* Internal variables. */ 43 #define data1 x3 44 #define data1w w3 45 #define data2 x4 46 #define data2w w4 47 #define tmp1 x5 48 49 /* Small inputs of less than 8 bytes are handled separately. This allows the 50 main code to be sped up using unaligned loads since there are now at least 51 8 bytes to be compared. If the first 8 bytes are equal, align src1. 52 This ensures each iteration does at most one unaligned access even if both 53 src1 and src2 are unaligned, and mutually aligned inputs behave as if 54 aligned. After the main loop, process the last 8 bytes using unaligned 55 accesses. */ 56 57 .p2align 6 58 ENTRY(memcmp) 59 subs limit, limit, 8 60 b.lo .Lless8 61 62 /* Limit >= 8, so check first 8 bytes using unaligned loads. */ 63 ldr data1, [src1], 8 64 ldr data2, [src2], 8 65 and tmp1, src1, 7 66 add limit, limit, tmp1 67 cmp data1, data2 68 bne .Lreturn 69 70 /* Align src1 and adjust src2 with bytes not yet done. */ 71 sub src1, src1, tmp1 72 sub src2, src2, tmp1 73 74 subs limit, limit, 8 75 b.ls .Llast_bytes 76 77 /* Loop performing 8 bytes per iteration using aligned src1. 78 Limit is pre-decremented by 8 and must be larger than zero. 79 Exit if <= 8 bytes left to do or if the data is not equal. */ 80 .p2align 4 81 .Lloop8: 82 ldr data1, [src1], 8 83 ldr data2, [src2], 8 84 subs limit, limit, 8 85 ccmp data1, data2, 0, hi /* NZCV = 0b0000. */ 86 b.eq .Lloop8 87 88 cmp data1, data2 89 bne .Lreturn 90 91 /* Compare last 1-8 bytes using unaligned access. */ 92 .Llast_bytes: 93 ldr data1, [src1, limit] 94 ldr data2, [src2, limit] 95 96 /* Compare data bytes and set return value to 0, -1 or 1. */ 97 .Lreturn: 98 #ifndef __AARCH64EB__ 99 rev data1, data1 100 rev data2, data2 101 #endif 102 cmp data1, data2 103 .Lret_eq: 104 cset result, ne 105 cneg result, result, lo 106 ret 107 108 .p2align 4 109 /* Compare up to 8 bytes. Limit is [-8..-1]. */ 110 .Lless8: 111 adds limit, limit, 4 112 b.lo .Lless4 113 ldr data1w, [src1], 4 114 ldr data2w, [src2], 4 115 cmp data1w, data2w 116 b.ne .Lreturn 117 sub limit, limit, 4 118 .Lless4: 119 adds limit, limit, 4 120 beq .Lret_eq 121 .Lbyte_loop: 122 ldrb data1w, [src1], 1 123 ldrb data2w, [src2], 1 124 subs limit, limit, 1 125 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 126 b.eq .Lbyte_loop 127 sub result, data1w, data2w 128 ret 129 END(memcmp) 130