Home | History | Annotate | Download | only in string
      1 /*
      2  * Copyright (c) 2017 Imagination Technologies.
      3  *
      4  * All rights reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  *
     10  *      * Redistributions of source code must retain the above copyright
     11  *        notice, this list of conditions and the following disclaimer.
     12  *      * Redistributions in binary form must reproduce the above copyright
     13  *        notice, this list of conditions and the following disclaimer
     14  *        in the documentation and/or other materials provided with
     15  *        the distribution.
     16  *      * Neither the name of Imagination Technologies nor the names of its
     17  *        contributors may be used to endorse or promote products derived
     18  *        from this software without specific prior written permission.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     21  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     23  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     24  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     25  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     26  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     27  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     28  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     29  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     30  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31  */
     32 
     33 #include <string.h>
     34 #include <stdint.h>
     35 
     36 #define ENABLE_PREFETCH 1
     37 
     38 #define STRNG(X) #X
     39 #define PREFETCH(src_ptr, offset)  \
     40   asm("pref 0, " STRNG(offset) "(%[src]) \n\t" : : [src] "r" (src_ptr));
     41 
     42 #if !defined(UNALIGNED_INSTR_SUPPORT)
     43 /* does target have unaligned lw/ld/ualw/uald instructions? */
     44 #define UNALIGNED_INSTR_SUPPORT 0
     45 #if __mips_isa_rev < 6 && !__mips1
     46 #undef UNALIGNED_INSTR_SUPPORT
     47 #define UNALIGNED_INSTR_SUPPORT 1
     48 #endif
     49 #endif
     50 
     51 #if !defined(HW_UNALIGNED_SUPPORT)
     52 /* Does target have hardware support for unaligned accesses?  */
     53 #define HW_UNALIGNED_SUPPORT 0
     54 #if __mips_isa_rev >= 6
     55 #undef HW_UNALIGNED_SUPPORT
     56 #define HW_UNALIGNED_SUPPORT 1
     57 #endif
     58 #endif
     59 
     60 #define SIZEOF_reg_t 4
     61 #if _MIPS_SIM == _ABIO32
     62 typedef unsigned long reg_t;
     63 typedef struct bits
     64 {
     65   reg_t B0:8, B1:8, B2:8, B3:8;
     66 } bits_t;
     67 #else
     68 #undef SIZEOF_reg_t
     69 #define SIZEOF_reg_t 8
     70 typedef unsigned long long reg_t;
     71 typedef struct bits
     72 {
     73     reg_t B0:8, B1:8, B2:8, B3:8, B4:8, B5:8, B6:8, B7:8;
     74 } bits_t;
     75 #endif
     76 
     77 /* This union assumes that small structures can be in registers.  If
     78    not, then memory accesses will be done - not optimal, but ok.  */
     79 typedef union
     80 {
     81   reg_t v;
     82   bits_t b;
     83 } bitfields_t;
     84 
     85 #define do_bitfield(__i) \
     86   if (x.b.B##__i != y.b.B##__i) return x.b.B##__i - y.b.B##__i;
     87 
     88 /* pull apart the words to find the first differing unsigned byte.  */
     89 static int __attribute__ ((noinline)) do_by_bitfields (reg_t a, reg_t b)
     90 {
     91   bitfields_t x, y;
     92   x.v = a;
     93   y.v = b;
     94   do_bitfield (0);
     95   do_bitfield (1);
     96   do_bitfield (2);
     97 #if SIZEOF_reg_t == 4
     98   return x.b.B3 - y.b.B3;
     99 #else
    100   do_bitfield (3);
    101   do_bitfield (4);
    102   do_bitfield (5);
    103   do_bitfield (6);
    104   return x.b.B7 - y.b.B7;
    105 #endif
    106 }
    107 
    108 /* This code is called when aligning a pointer, there are remaining bytes
    109    after doing word compares, or architecture does not have some form
    110    of unaligned support.  */
    111 static inline int __attribute__ ((always_inline))
    112 do_bytes (const void *a, const void *b, unsigned long len)
    113 {
    114   unsigned char *x = (unsigned char *) a;
    115   unsigned char *y = (unsigned char *) b;
    116   unsigned long i;
    117 
    118   /* 'len' might be zero here, so preloading the first two values
    119      before the loop may access unallocated memory.  */
    120   for (i = 0; i < len; i++) {
    121     if (*x != *y)
    122       return *x - *y;
    123     x++;
    124     y++;
    125   }
    126   return 0;
    127 }
    128 
    129 #if !HW_UNALIGNED_SUPPORT
    130 #if UNALIGNED_INSTR_SUPPORT
    131 /* for MIPS GCC, there are no unaligned builtins - so this struct forces
    132    the compiler to treat the pointer access as unaligned.  */
    133 struct ulw
    134 {
    135   reg_t uli;
    136 } __attribute__ ((packed));
    137 
    138 /* first pointer is not aligned while second pointer is.  */
    139 static int unaligned_words (const struct ulw *a, const reg_t *b,
    140                             unsigned long words, unsigned long bytes)
    141 {
    142 #if ENABLE_PREFETCH
    143   /* prefetch pointer aligned to 32 byte boundary */
    144   const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31);
    145   const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31);
    146 #endif
    147   for (; words >= 16; words -= 8) {
    148 #if ENABLE_PREFETCH
    149     pref_ptr += 8;
    150     PREFETCH(pref_ptr, 0);
    151     PREFETCH(pref_ptr, 32);
    152 
    153     pref_ptr_a += 8;
    154     PREFETCH(pref_ptr_a, 0);
    155     PREFETCH(pref_ptr_a, 32);
    156 #endif
    157     reg_t x0 = a[0].uli, x1 = a[1].uli;
    158     reg_t x2 = a[2].uli, x3 = a[3].uli;
    159     reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
    160     if (x0 != y0)
    161       return do_by_bitfields (x0, y0);
    162     if (x1 != y1)
    163       return do_by_bitfields (x1, y1);
    164     if (x2 != y2)
    165       return do_by_bitfields (x2, y2);
    166     if (x3 != y3)
    167       return do_by_bitfields (x3, y3);
    168 
    169     x0 = a[4].uli; x1 = a[5].uli;
    170     x2 = a[6].uli; x3 = a[7].uli;
    171     y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7];
    172     if (x0 != y0)
    173       return do_by_bitfields (x0, y0);
    174     if (x1 != y1)
    175       return do_by_bitfields (x1, y1);
    176     if (x2 != y2)
    177       return do_by_bitfields (x2, y2);
    178     if (x3 != y3)
    179       return do_by_bitfields (x3, y3);
    180 
    181     a += 8;
    182     b += 8;
    183   }
    184 
    185   for (; words >= 4; words -= 4) {
    186     reg_t x0 = a[0].uli, x1 = a[1].uli;
    187     reg_t x2 = a[2].uli, x3 = a[3].uli;
    188     reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
    189     if (x0 != y0)
    190       return do_by_bitfields (x0, y0);
    191     if (x1 != y1)
    192       return do_by_bitfields (x1, y1);
    193     if (x2 != y2)
    194       return do_by_bitfields (x2, y2);
    195     if (x3 != y3)
    196       return do_by_bitfields (x3, y3);
    197     a += 4;
    198     b += 4;
    199   }
    200 
    201   /* do remaining words.  */
    202   while (words--) {
    203     reg_t x0 = a->uli;
    204     reg_t y0 = *b;
    205     a += 1;
    206     b += 1;
    207     if (x0 != y0)
    208       return do_by_bitfields (x0, y0);
    209   }
    210 
    211   /* mop up any remaining bytes.  */
    212   return do_bytes (a, b, bytes);
    213 }
    214 #else
    215 /* no HW support or unaligned lw/ld/ualw/uald instructions.  */
    216 static int unaligned_words (const reg_t *a, const reg_t *b,
    217                             unsigned long words, unsigned long bytes)
    218 {
    219   return do_bytes (a, b, (sizeof (reg_t) * words) + bytes);
    220 }
    221 #endif /* UNALIGNED_INSTR_SUPPORT */
    222 #endif /* HW_UNALIGNED_SUPPORT */
    223 
    224 /* both pointers are aligned, or first isn't and HW support for unaligned.  */
    225 static int aligned_words (const reg_t *a, const reg_t *b,
    226                           unsigned long words, unsigned long bytes)
    227 {
    228 #if ENABLE_PREFETCH
    229   /* prefetch pointer aligned to 32 byte boundary */
    230   const reg_t *pref_ptr = (const reg_t *) (((uintptr_t) b + 31) & ~31);
    231   const reg_t *pref_ptr_a = (const reg_t *) (((uintptr_t) a + 31) & ~31);
    232 #endif
    233 
    234   for (; words >= 24; words -= 12) {
    235 #if ENABLE_PREFETCH
    236     pref_ptr += 12;
    237     PREFETCH(pref_ptr, 0);
    238     PREFETCH(pref_ptr, 32);
    239     PREFETCH(pref_ptr, 64);
    240 
    241     pref_ptr_a += 12;
    242     PREFETCH(pref_ptr_a, 0);
    243     PREFETCH(pref_ptr_a, 32);
    244     PREFETCH(pref_ptr_a, 64);
    245 #endif
    246     reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3];
    247     reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
    248     if (x0 != y0)
    249       return do_by_bitfields (x0, y0);
    250     if (x1 != y1)
    251       return do_by_bitfields (x1, y1);
    252     if (x2 != y2)
    253       return do_by_bitfields (x2, y2);
    254     if (x3 != y3)
    255       return do_by_bitfields (x3, y3);
    256 
    257     x0 = a[4]; x1 = a[5]; x2 = a[6]; x3 = a[7];
    258     y0 = b[4]; y1 = b[5]; y2 = b[6]; y3 = b[7];
    259     if (x0 != y0)
    260       return do_by_bitfields (x0, y0);
    261     if (x1 != y1)
    262       return do_by_bitfields (x1, y1);
    263     if (x2 != y2)
    264       return do_by_bitfields (x2, y2);
    265     if (x3 != y3)
    266       return do_by_bitfields (x3, y3);
    267 
    268     x0 = a[8]; x1 = a[9]; x2 = a[10]; x3 = a[11];
    269     y0 = b[8]; y1 = b[9]; y2 = b[10]; y3 = b[11];
    270     if (x0 != y0)
    271       return do_by_bitfields (x0, y0);
    272     if (x1 != y1)
    273       return do_by_bitfields (x1, y1);
    274     if (x2 != y2)
    275       return do_by_bitfields (x2, y2);
    276     if (x3 != y3)
    277       return do_by_bitfields (x3, y3);
    278 
    279     a += 12;
    280     b += 12;
    281   }
    282 
    283   for (; words >= 4; words -= 4) {
    284     reg_t x0 = a[0], x1 = a[1], x2 = a[2], x3 = a[3];
    285     reg_t y0 = b[0], y1 = b[1], y2 = b[2], y3 = b[3];
    286     if (x0 != y0)
    287       return do_by_bitfields (x0, y0);
    288     if (x1 != y1)
    289       return do_by_bitfields (x1, y1);
    290     if (x2 != y2)
    291       return do_by_bitfields (x2, y2);
    292     if (x3 != y3)
    293       return do_by_bitfields (x3, y3);
    294     a += 4;
    295     b += 4;
    296   }
    297 
    298   /* do remaining words.  */
    299   while (words--) {
    300     reg_t x0 = *a;
    301     reg_t y0 = *b;
    302     a += 1;
    303     b += 1;
    304     if (x0 != y0)
    305       return do_by_bitfields (x0, y0);
    306   }
    307 
    308   /* mop up any remaining bytes.  */
    309   return do_bytes (a, b, bytes);
    310 }
    311 
    312 int memcmp (const void *a, const void *b, size_t len)
    313 {
    314   unsigned long bytes, words;
    315 
    316   /* shouldn't hit that often.  */
    317   if (len < sizeof (reg_t) * 4) {
    318     return do_bytes (a, b, len);
    319   }
    320 
    321   /* Align the second pointer to word/dword alignment.
    322      Note that the pointer is only 32-bits for o32/n32 ABIs. For
    323      n32, loads are done as 64-bit while address remains 32-bit.   */
    324   bytes = ((unsigned long) b) % sizeof (reg_t);
    325   if (bytes) {
    326     int res;
    327     bytes = sizeof (reg_t) - bytes;
    328     if (bytes > len)
    329       bytes = len;
    330     res = do_bytes (a, b, bytes);
    331     if (res || len == bytes)
    332       return res;
    333     len -= bytes;
    334     a = (const void *) (((unsigned char *) a) + bytes);
    335     b = (const void *) (((unsigned char *) b) + bytes);
    336   }
    337 
    338   /* Second pointer now aligned.  */
    339   words = len / sizeof (reg_t);
    340   bytes = len % sizeof (reg_t);
    341 
    342 #if HW_UNALIGNED_SUPPORT
    343   /* treat possible unaligned first pointer as aligned.  */
    344   return aligned_words (a, b, words, bytes);
    345 #else
    346   if (((unsigned long) a) % sizeof (reg_t) == 0) {
    347     return aligned_words (a, b, words, bytes);
    348   }
    349   /* need to use unaligned instructions on first pointer.  */
    350   return unaligned_words (a, b, words, bytes);
    351 #endif
    352 }
    353