1 /* 2 * Copyright (c) 2017 Imagination Technologies. 3 * 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in the documentation and/or other materials provided with 15 * the distribution. 16 * * Neither the name of Imagination Technologies nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <string.h> 34 35 #if !defined(UNALIGNED_INSTR_SUPPORT) 36 /* does target have unaligned lw/ld/ualw/uald instructions? */ 37 #define UNALIGNED_INSTR_SUPPORT 0 38 #if __mips_isa_rev < 6 && !__mips1 39 #undef UNALIGNED_INSTR_SUPPORT 40 #define UNALIGNED_INSTR_SUPPORT 1 41 #endif 42 #endif 43 44 #if !defined(HW_UNALIGNED_SUPPORT) 45 /* Does target have hardware support for unaligned accesses? */ 46 #define HW_UNALIGNED_SUPPORT 0 47 #if __mips_isa_rev >= 6 48 #undef HW_UNALIGNED_SUPPORT 49 #define HW_UNALIGNED_SUPPORT 1 50 #endif 51 #endif 52 53 #define ENABLE_PREFETCH 1 54 55 #if ENABLE_PREFETCH 56 #define PREFETCH(addr) __builtin_prefetch (addr, 0, 1); 57 #else 58 #define PREFETCH(addr) 59 #endif 60 61 #if _MIPS_SIM == _ABIO32 62 typedef unsigned long reg_t; 63 typedef struct 64 { 65 reg_t B0:8, B1:8, B2:8, B3:8; 66 } bits_t; 67 #else 68 typedef unsigned long long reg_t; 69 typedef struct 70 { 71 reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8; 72 } bits_t; 73 #endif 74 75 typedef union 76 { 77 reg_t v; 78 bits_t b; 79 } bitfields_t; 80 81 #define DO_BYTE(a, i) \ 82 a[i] = bw.b.B##i; \ 83 len--; \ 84 if(!len) return ret; \ 85 86 /* This code is called when aligning a pointer, there are remaining bytes 87 after doing word compares, or architecture does not have some form 88 of unaligned support. */ 89 static inline void * __attribute__ ((always_inline)) 90 do_bytes (void *a, const void *b, unsigned long len, void *ret) 91 { 92 unsigned char *x = (unsigned char *) a; 93 unsigned char *y = (unsigned char *) b; 94 unsigned long i; 95 96 /* 'len' might be zero here, so preloading the first two values 97 before the loop may access unallocated memory. */ 98 for (i = 0; i < len; i++) { 99 *x = *y; 100 x++; 101 y++; 102 } 103 return ret; 104 } 105 106 /* This code is called to copy only remaining bytes within word or doubleword */ 107 static inline void * __attribute__ ((always_inline)) 108 do_bytes_remaining (void *a, const void *b, unsigned long len, void *ret) 109 { 110 unsigned char *x = (unsigned char *) a; 111 112 if(len > 0) { 113 bitfields_t bw; 114 bw.v = *((reg_t*) b); 115 116 #if __mips64 117 DO_BYTE(x, 0); 118 DO_BYTE(x, 1); 119 DO_BYTE(x, 2); 120 DO_BYTE(x, 3); 121 DO_BYTE(x, 4); 122 DO_BYTE(x, 5); 123 DO_BYTE(x, 6); 124 DO_BYTE(x, 7); 125 #else 126 DO_BYTE(x, 0); 127 DO_BYTE(x, 1); 128 DO_BYTE(x, 2); 129 DO_BYTE(x, 3); 130 #endif 131 } 132 133 return ret; 134 } 135 136 #if !HW_UNALIGNED_SUPPORT 137 #if UNALIGNED_INSTR_SUPPORT 138 /* for MIPS GCC, there are no unaligned builtins - so this struct forces 139 the compiler to treat the pointer access as unaligned. */ 140 struct ulw 141 { 142 reg_t uli; 143 } __attribute__ ((packed)); 144 145 /* first pointer is not aligned while second pointer is. */ 146 static void * 147 unaligned_words (struct ulw *a, const reg_t * b, 148 unsigned long words, unsigned long bytes, void *ret) 149 { 150 #if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400) 151 unsigned long i, words_by_8, words_by_1; 152 words_by_1 = words % 8; 153 words_by_8 = words >> 3; 154 for (; words_by_8 > 0; words_by_8--) { 155 if(words_by_8 != 1) 156 PREFETCH (b + 8); 157 reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; 158 reg_t y4 = b[4], y5 = b[5], y6 = b[6], y7 = b[7]; 159 a[0].uli = y0; 160 a[1].uli = y1; 161 a[2].uli = y2; 162 a[3].uli = y3; 163 a[4].uli = y4; 164 a[5].uli = y5; 165 a[6].uli = y6; 166 a[7].uli = y7; 167 a += 8; 168 b += 8; 169 } 170 #else 171 unsigned long i, words_by_4, words_by_1; 172 words_by_1 = words % 4; 173 words_by_4 = words >> 2; 174 for (; words_by_4 > 0; words_by_4--) { 175 if(words_by_4 != 1) 176 PREFETCH (b + 4); 177 reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3]; 178 a[0].uli = y0; 179 a[1].uli = y1; 180 a[2].uli = y2; 181 a[3].uli = y3; 182 a += 4; 183 b += 4; 184 } 185 #endif 186 187 /* do remaining words. */ 188 for (i = 0; i < words_by_1; i++) { 189 a->uli = *b; 190 a += 1; 191 b += 1; 192 } 193 194 /* mop up any remaining bytes. */ 195 return do_bytes_remaining (a, b, bytes, ret); 196 } 197 #else 198 /* no HW support or unaligned lw/ld/ualw/uald instructions. */ 199 static void * 200 unaligned_words (reg_t * a, const reg_t * b, 201 unsigned long words, unsigned long bytes, void *ret) 202 { 203 unsigned long i; 204 unsigned char *x = (unsigned char *) a; 205 206 for (i = 0; i < words; i++) { 207 bitfields_t bw; 208 bw.v = *((reg_t*) b); 209 x = (unsigned char *) a; 210 #if __mips64 211 x[0] = bw.b.B0; 212 x[1] = bw.b.B1; 213 x[2] = bw.b.B2; 214 x[3] = bw.b.B3; 215 x[4] = bw.b.B4; 216 x[5] = bw.b.B5; 217 x[6] = bw.b.B6; 218 x[7] = bw.b.B7; 219 #else 220 x[0] = bw.b.B0; 221 x[1] = bw.b.B1; 222 x[2] = bw.b.B2; 223 x[3] = bw.b.B3; 224 #endif 225 a += 1; 226 b += 1; 227 } 228 229 /* mop up any remaining bytes */ 230 return do_bytes_remaining (a, b, bytes, ret); 231 } 232 #endif /* UNALIGNED_INSTR_SUPPORT */ 233 #endif /* HW_UNALIGNED_SUPPORT */ 234 235 /* both pointers are aligned, or first isn't and HW support for unaligned. */ 236 static void * 237 aligned_words (reg_t * a, const reg_t * b, 238 unsigned long words, unsigned long bytes, void *ret) 239 { 240 #if ((_MIPS_SIM == _ABIO32) || _MIPS_TUNE_I6400) 241 unsigned long i, words_by_8, words_by_1; 242 words_by_1 = words % 8; 243 words_by_8 = words >> 3; 244 for (; words_by_8 > 0; words_by_8--) { 245 if(words_by_8 != 1) 246 PREFETCH (b + 8); 247 reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3]; 248 reg_t x4 = b[4], x5 = b[5], x6 = b[6], x7 = b[7]; 249 a[0] = x0; 250 a[1] = x1; 251 a[2] = x2; 252 a[3] = x3; 253 a[4] = x4; 254 a[5] = x5; 255 a[6] = x6; 256 a[7] = x7; 257 a += 8; 258 b += 8; 259 } 260 #else 261 unsigned long i, words_by_4, words_by_1; 262 words_by_1 = words % 4; 263 words_by_4 = words >> 2; 264 for (; words_by_4 > 0; words_by_4--) { 265 if(words_by_4 != 1) 266 PREFETCH (b + 4); 267 reg_t x0 = b[0], x1 = b[1], x2 = b[2], x3 = b[3]; 268 a[0] = x0; 269 a[1] = x1; 270 a[2] = x2; 271 a[3] = x3; 272 a += 4; 273 b += 4; 274 } 275 #endif 276 277 /* do remaining words. */ 278 for (i = 0; i < words_by_1; i++) { 279 *a = *b; 280 a += 1; 281 b += 1; 282 } 283 284 /* mop up any remaining bytes. */ 285 return do_bytes_remaining (a, b, bytes, ret); 286 } 287 288 void * 289 memcpy (void *a, const void *b, size_t len) __overloadable 290 { 291 unsigned long bytes, words; 292 void *ret = a; 293 294 /* shouldn't hit that often. */ 295 if (len < sizeof (reg_t) * 4) { 296 return do_bytes (a, b, len, a); 297 } 298 299 /* Align the second pointer to word/dword alignment. 300 Note that the pointer is only 32-bits for o32/n32 ABIs. For 301 n32, loads are done as 64-bit while address remains 32-bit. */ 302 bytes = ((unsigned long) b) % sizeof (reg_t); 303 if (bytes) { 304 bytes = sizeof (reg_t) - bytes; 305 if (bytes > len) 306 bytes = len; 307 do_bytes (a, b, bytes, ret); 308 if (len == bytes) 309 return ret; 310 len -= bytes; 311 a = (void *) (((unsigned char *) a) + bytes); 312 b = (const void *) (((unsigned char *) b) + bytes); 313 } 314 315 /* Second pointer now aligned. */ 316 words = len / sizeof (reg_t); 317 bytes = len % sizeof (reg_t); 318 #if HW_UNALIGNED_SUPPORT 319 /* treat possible unaligned first pointer as aligned. */ 320 return aligned_words (a, b, words, bytes, ret); 321 #else 322 if (((unsigned long) a) % sizeof (reg_t) == 0) { 323 return aligned_words (a, b, words, bytes, ret); 324 } 325 /* need to use unaligned instructions on first pointer. */ 326 return unaligned_words (a, b, words, bytes, ret); 327 #endif 328 } 329