1 /* 2 * Copyright (c) 2017 Imagination Technologies. 3 * 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in the documentation and/or other materials provided with 15 * the distribution. 16 * * Neither the name of Imagination Technologies nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <string.h> 34 #include <stdint.h> 35 36 #define ENABLE_PREFETCH 1 37 38 #define STRNG(X) #X 39 #define PREFETCH(src_ptr, offset) \ 40 asm("pref 0, " STRNG(offset) "(%[src]) \n\t" : : [src] "r" (src_ptr)); 41 42 #if !defined(UNALIGNED_INSTR_SUPPORT) 43 /* does target have unaligned lw/ld/ualw/uald instructions? */ 44 #define UNALIGNED_INSTR_SUPPORT 0 45 #if __mips_isa_rev < 6 && !__mips1 46 #undef UNALIGNED_INSTR_SUPPORT 47 #define UNALIGNED_INSTR_SUPPORT 1 48 #endif 49 #endif 50 51 #if !defined(HW_UNALIGNED_SUPPORT) 52 /* Does target have hardware support for unaligned accesses? */ 53 #define HW_UNALIGNED_SUPPORT 0 54 #if __mips_isa_rev >= 6 55 #undef HW_UNALIGNED_SUPPORT 56 #define HW_UNALIGNED_SUPPORT 1 57 #endif 58 #endif 59 60 #define SIZEOF_reg_t 4 61 #if _MIPS_SIM == _ABIO32 62 typedef unsigned long reg_t; 63 typedef struct bits 64 { 65 reg_t B0:8, B1:8, B2:8, B3:8; 66 } bits_t; 67 #else 68 #undef SIZEOF_reg_t 69 #define SIZEOF_reg_t 8 70 typedef unsigned long long reg_t; 71 typedef struct bits 72 { 73 reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8; 74 } bits_t; 75 #endif 76 77 /* This union assumes that small structures can be in registers. If 78 not, then memory accesses will be done - not optimal, but ok. */ 79 typedef union 80 { 81 reg_t v; 82 bits_t b; 83 } bitfields_t; 84 85 #define do_bitfield(__i) \ 86 if (x.b.B##__i != y.b.B##__i) return x.b.B##__i - y.b.B##__i; 87 88 /* pull apart the words to find the first differing unsigned byte. */ 89 static int __attribute__ ((noinline)) do_by_bitfields (reg_t a, reg_t b) 90 { 91 bitfields_t x, y; 92 x.v = a; 93 y.v = b; 94 do_bitfield (0); 95 do_bitfield (1); 96 do_bitfield (2); 97 #if SIZEOF_reg_t == 4 98 return x.b.B3 - y.b.B3; 99 #else 100 do_bitfield (3); 101 do_bitfield (4); 102 do_bitfield (5); 103 do_bitfield (6); 104 return x.b.B7 - y.b.B7; 105 #endif 106 } 107 108 /* This code is called when aligning a pointer, there are remaining bytes 109 after doing word compares, or architecture does not have some form 110 of unaligned support. */ 111 static inline int __attribute__ ((always_inline)) 112 do_bytes (const void *a, const void *b, unsigned long len) 113 { 114 unsigned char *x = (unsigned char *) a; 115 unsigned char *y = (unsigned char *) b; 116 unsigned long i; 117 118 /* 'len' might be zero here, so preloading the first two values 119 before the loop may access unallocated memory. */ 120 for (i = 0; i < len; i++) { 121 if (*x != *y) 122 return *x - *y; 123 x++; 124 y++; 125 } 126 return 0; 127 } 128 129 #if !HW_UNALIGNED_SUPPORT 130 #if UNALIGNED_INSTR_SUPPORT 131 /* for MIPS GCC, there are no unaligned builtins - so this struct forces 132 the compiler to treat the pointer access as unaligned. */ 133 struct ulw 134 { 135 reg_t uli; 136 } __attribute__ ((packed)); 137 138 /* first pointer is not aligned while second pointer is. */ 139 static int unaligned_words (const struct ulw *a, const reg_t *b, 140 unsigned long words, unsigned long bytes) 141 { 142 #if ENABLE_PREFETCH 143 /* prefetch pointer aligned to 32 byte boundary */ 144 const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31); 145 const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31); 146 #endif 147 for (; words >= 16; words -= 8) { 148 #if ENABLE_PREFETCH 149 pref_ptr += 8; 150 PREFETCH(pref_ptr, 0); 151 PREFETCH(pref_ptr, 32); 152 153 pref_ptr_a += 8; 154 PREFETCH(pref_ptr_a, 0); 155 PREFETCH(pref_ptr_a, 32); 156 #endif 157 reg_t x0 = a[0].uli, x1 = a[1].uli; 158 reg_t x2 = a[2].uli, x3 = a[3].uli; 159 reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; 160 if (x0 != y0) 161 return do_by_bitfields (x0, y0); 162 if (x1 != y1) 163 return do_by_bitfields (x1, y1); 164 if (x2 != y2) 165 return do_by_bitfields (x2, y2); 166 if (x3 != y3) 167 return do_by_bitfields (x3, y3); 168 169 x0 = a[4].uli; x1 = a[5].uli; 170 x2 = a[6].uli; x3 = a[7].uli; 171 y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7]; 172 if (x0 != y0) 173 return do_by_bitfields (x0, y0); 174 if (x1 != y1) 175 return do_by_bitfields (x1, y1); 176 if (x2 != y2) 177 return do_by_bitfields (x2, y2); 178 if (x3 != y3) 179 return do_by_bitfields (x3, y3); 180 181 a += 8; 182 b += 8; 183 } 184 185 for (; words >= 4; words -= 4) { 186 reg_t x0 = a[0].uli, x1 = a[1].uli; 187 reg_t x2 = a[2].uli, x3 = a[3].uli; 188 reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; 189 if (x0 != y0) 190 return do_by_bitfields (x0, y0); 191 if (x1 != y1) 192 return do_by_bitfields (x1, y1); 193 if (x2 != y2) 194 return do_by_bitfields (x2, y2); 195 if (x3 != y3) 196 return do_by_bitfields (x3, y3); 197 a += 4; 198 b += 4; 199 } 200 201 /* do remaining words. */ 202 while (words--) { 203 reg_t x0 = a->uli; 204 reg_t y0 = *b; 205 a += 1; 206 b += 1; 207 if (x0 != y0) 208 return do_by_bitfields (x0, y0); 209 } 210 211 /* mop up any remaining bytes. */ 212 return do_bytes (a, b, bytes); 213 } 214 #else 215 /* no HW support or unaligned lw/ld/ualw/uald instructions. */ 216 static int unaligned_words (const reg_t *a, const reg_t *b, 217 unsigned long words, unsigned long bytes) 218 { 219 return do_bytes (a, b, (sizeof (reg_t) * words) + bytes); 220 } 221 #endif /* UNALIGNED_INSTR_SUPPORT */ 222 #endif /* HW_UNALIGNED_SUPPORT */ 223 224 /* both pointers are aligned, or first isn't and HW support for unaligned. */ 225 static int aligned_words (const reg_t *a, const reg_t *b, 226 unsigned long words, unsigned long bytes) 227 { 228 #if ENABLE_PREFETCH 229 /* prefetch pointer aligned to 32 byte boundary */ 230 const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31); 231 const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31); 232 #endif 233 234 for (; words >= 24; words -= 12) { 235 #if ENABLE_PREFETCH 236 pref_ptr += 12; 237 PREFETCH(pref_ptr, 0); 238 PREFETCH(pref_ptr, 32); 239 PREFETCH(pref_ptr, 64); 240 241 pref_ptr_a += 12; 242 PREFETCH(pref_ptr_a, 0); 243 PREFETCH(pref_ptr_a, 32); 244 PREFETCH(pref_ptr_a, 64); 245 #endif 246 reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3]; 247 reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; 248 if (x0 != y0) 249 return do_by_bitfields (x0, y0); 250 if (x1 != y1) 251 return do_by_bitfields (x1, y1); 252 if (x2 != y2) 253 return do_by_bitfields (x2, y2); 254 if (x3 != y3) 255 return do_by_bitfields (x3, y3); 256 257 x0 = a[4]; x1 = a[5]; x2 = a[6]; x3 = a[7]; 258 y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7]; 259 if (x0 != y0) 260 return do_by_bitfields (x0, y0); 261 if (x1 != y1) 262 return do_by_bitfields (x1, y1); 263 if (x2 != y2) 264 return do_by_bitfields (x2, y2); 265 if (x3 != y3) 266 return do_by_bitfields (x3, y3); 267 268 x0 = a[8]; x1 = a[9]; x2 = a[10]; x3 = a[11]; 269 y0 = b[8]; y1 = b[9]; y2 = b[10]; y3 = b[11]; 270 if (x0 != y0) 271 return do_by_bitfields (x0, y0); 272 if (x1 != y1) 273 return do_by_bitfields (x1, y1); 274 if (x2 != y2) 275 return do_by_bitfields (x2, y2); 276 if (x3 != y3) 277 return do_by_bitfields (x3, y3); 278 279 a += 12; 280 b += 12; 281 } 282 283 for (; words >= 4; words -= 4) { 284 reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3]; 285 reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; 286 if (x0 != y0) 287 return do_by_bitfields (x0, y0); 288 if (x1 != y1) 289 return do_by_bitfields (x1, y1); 290 if (x2 != y2) 291 return do_by_bitfields (x2, y2); 292 if (x3 != y3) 293 return do_by_bitfields (x3, y3); 294 a += 4; 295 b += 4; 296 } 297 298 /* do remaining words. */ 299 while (words--) { 300 reg_t x0 = *a; 301 reg_t y0 = *b; 302 a += 1; 303 b += 1; 304 if (x0 != y0) 305 return do_by_bitfields (x0, y0); 306 } 307 308 /* mop up any remaining bytes. */ 309 return do_bytes (a, b, bytes); 310 } 311 312 int memcmp (const void *a, const void *b, size_t len) 313 { 314 unsigned long bytes, words; 315 316 /* shouldn't hit that often. */ 317 if (len < sizeof (reg_t) * 4) { 318 return do_bytes (a, b, len); 319 } 320 321 /* Align the second pointer to word/dword alignment. 322 Note that the pointer is only 32-bits for o32/n32 ABIs. For 323 n32, loads are done as 64-bit while address remains 32-bit. */ 324 bytes = ((unsigned long) b) % sizeof (reg_t); 325 if (bytes) { 326 int res; 327 bytes = sizeof (reg_t) - bytes; 328 if (bytes > len) 329 bytes = len; 330 res = do_bytes (a, b, bytes); 331 if (res || len == bytes) 332 return res; 333 len -= bytes; 334 a = (const void *) (((unsigned char *) a) + bytes); 335 b = (const void *) (((unsigned char *) b) + bytes); 336 } 337 338 /* Second pointer now aligned. */ 339 words = len / sizeof (reg_t); 340 bytes = len % sizeof (reg_t); 341 342 #if HW_UNALIGNED_SUPPORT 343 /* treat possible unaligned first pointer as aligned. */ 344 return aligned_words (a, b, words, bytes); 345 #else 346 if (((unsigned long) a) % sizeof (reg_t) == 0) { 347 return aligned_words (a, b, words, bytes); 348 } 349 /* need to use unaligned instructions on first pointer. */ 350 return unaligned_words (a, b, words, bytes); 351 #endif 352 } 353