Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright  2011,2012  Google, Inc.
      3  *
      4  *  This is part of HarfBuzz, a text shaping library.
      5  *
      6  * Permission is hereby granted, without written agreement and without
      7  * license or royalty fees, to use, copy, modify, and distribute this
      8  * software and its documentation for any purpose, provided that the
      9  * above copyright notice and the following two paragraphs appear in
     10  * all copies of this software.
     11  *
     12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16  * DAMAGE.
     17  *
     18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23  *
     24  * Google Author(s): Behdad Esfahbod
     25  */
     26 
     27 #ifndef HB_UTF_PRIVATE_HH
     28 #define HB_UTF_PRIVATE_HH
     29 
     30 #include "hb-private.hh"
     31 
     32 
     33 /* UTF-8 */
     34 
     35 #define HB_UTF8_COMPUTE(Char, Mask, Len) \
     36   if (Char < 128) { Len = 1; Mask = 0x7f; } \
     37   else if ((Char & 0xe0) == 0xc0) { Len = 2; Mask = 0x1f; } \
     38   else if ((Char & 0xf0) == 0xe0) { Len = 3; Mask = 0x0f; } \
     39   else if ((Char & 0xf8) == 0xf0) { Len = 4; Mask = 0x07; } \
     40   else Len = 0;
     41 
     42 static inline const uint8_t *
     43 hb_utf_next (const uint8_t *text,
     44 	     const uint8_t *end,
     45 	     hb_codepoint_t *unicode)
     46 {
     47   hb_codepoint_t c = *text, mask;
     48   unsigned int len;
     49 
     50   /* TODO check for overlong sequences? */
     51 
     52   HB_UTF8_COMPUTE (c, mask, len);
     53   if (unlikely (!len || (unsigned int) (end - text) < len)) {
     54     *unicode = -1;
     55     return text + 1;
     56   } else {
     57     hb_codepoint_t result;
     58     unsigned int i;
     59     result = c & mask;
     60     for (i = 1; i < len; i++)
     61       {
     62 	if (unlikely ((text[i] & 0xc0) != 0x80))
     63 	  {
     64 	    *unicode = -1;
     65 	    return text + 1;
     66 	  }
     67 	result <<= 6;
     68 	result |= (text[i] & 0x3f);
     69       }
     70     *unicode = result;
     71     return text + len;
     72   }
     73 }
     74 
     75 static inline const uint8_t *
     76 hb_utf_prev (const uint8_t *text,
     77 	     const uint8_t *start,
     78 	     hb_codepoint_t *unicode)
     79 {
     80   const uint8_t *end = text--;
     81   while (start < text && (*text & 0xc0) == 0x80 && end - text < 4)
     82     text--;
     83 
     84   hb_codepoint_t c = *text, mask;
     85   unsigned int len;
     86 
     87   /* TODO check for overlong sequences? */
     88 
     89   HB_UTF8_COMPUTE (c, mask, len);
     90   if (unlikely (!len || (unsigned int) (end - text) != len)) {
     91     *unicode = -1;
     92     return end - 1;
     93   } else {
     94     hb_codepoint_t result;
     95     unsigned int i;
     96     result = c & mask;
     97     for (i = 1; i < len; i++)
     98       {
     99 	result <<= 6;
    100 	result |= (text[i] & 0x3f);
    101       }
    102     *unicode = result;
    103     return text;
    104   }
    105 }
    106 
    107 
    108 static inline unsigned int
    109 hb_utf_strlen (const uint8_t *text)
    110 {
    111   return strlen ((const char *) text);
    112 }
    113 
    114 
    115 /* UTF-16 */
    116 
    117 static inline const uint16_t *
    118 hb_utf_next (const uint16_t *text,
    119 	     const uint16_t *end,
    120 	     hb_codepoint_t *unicode)
    121 {
    122   hb_codepoint_t c = *text++;
    123 
    124   if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xd800, 0xdbff)))
    125   {
    126     /* high surrogate */
    127     hb_codepoint_t l;
    128     if (text < end && ((l = *text), likely (hb_in_range<hb_codepoint_t> (l, 0xdc00, 0xdfff))))
    129     {
    130       /* low surrogate */
    131       *unicode = (c << 10) + l - ((0xd800 << 10) - 0x10000 + 0xdc00);
    132        text++;
    133     } else
    134       *unicode = -1;
    135   } else
    136     *unicode = c;
    137 
    138   return text;
    139 }
    140 
    141 static inline const uint16_t *
    142 hb_utf_prev (const uint16_t *text,
    143 	     const uint16_t *start,
    144 	     hb_codepoint_t *unicode)
    145 {
    146   hb_codepoint_t c = *--text;
    147 
    148   if (unlikely (hb_in_range<hb_codepoint_t> (c, 0xdc00, 0xdfff)))
    149   {
    150     /* low surrogate */
    151     hb_codepoint_t h;
    152     if (start < text && ((h = *(text - 1)), likely (hb_in_range<hb_codepoint_t> (h, 0xd800, 0xdbff))))
    153     {
    154       /* high surrogate */
    155       *unicode = (h << 10) + c - ((0xd800 << 10) - 0x10000 + 0xdc00);
    156        text--;
    157     } else
    158       *unicode = -1;
    159   } else
    160     *unicode = c;
    161 
    162   return text;
    163 }
    164 
    165 
    166 static inline unsigned int
    167 hb_utf_strlen (const uint16_t *text)
    168 {
    169   unsigned int l = 0;
    170   while (*text++) l++;
    171   return l;
    172 }
    173 
    174 
    175 /* UTF-32 */
    176 
    177 static inline const uint32_t *
    178 hb_utf_next (const uint32_t *text,
    179 	     const uint32_t *end HB_UNUSED,
    180 	     hb_codepoint_t *unicode)
    181 {
    182   *unicode = *text++;
    183   return text;
    184 }
    185 
    186 static inline const uint32_t *
    187 hb_utf_prev (const uint32_t *text,
    188 	     const uint32_t *start HB_UNUSED,
    189 	     hb_codepoint_t *unicode)
    190 {
    191   *unicode = *--text;
    192   return text;
    193 }
    194 
    195 static inline unsigned int
    196 hb_utf_strlen (const uint32_t *text)
    197 {
    198   unsigned int l = 0;
    199   while (*text++) l++;
    200   return l;
    201 }
    202 
    203 
    204 #endif /* HB_UTF_PRIVATE_HH */
    205