Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright  2014  Google, Inc.
      3  *
      4  *  This is part of HarfBuzz, a text shaping library.
      5  *
      6  * Permission is hereby granted, without written agreement and without
      7  * license or royalty fees, to use, copy, modify, and distribute this
      8  * software and its documentation for any purpose, provided that the
      9  * above copyright notice and the following two paragraphs appear in
     10  * all copies of this software.
     11  *
     12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16  * DAMAGE.
     17  *
     18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23  *
     24  * Google Author(s): Behdad Esfahbod
     25  */
     26 
     27 #ifndef HB_OT_CMAP_TABLE_HH
     28 #define HB_OT_CMAP_TABLE_HH
     29 
     30 #include "hb-open-type-private.hh"
     31 
     32 
     33 namespace OT {
     34 
     35 
     36 /*
     37  * cmap -- Character To Glyph Index Mapping Table
     38  */
     39 
     40 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
     41 
     42 
     43 struct CmapSubtableFormat0
     44 {
     45   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
     46   {
     47     hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
     48     if (!gid)
     49       return false;
     50     *glyph = gid;
     51     return true;
     52   }
     53 
     54   inline bool sanitize (hb_sanitize_context_t *c) const
     55   {
     56     TRACE_SANITIZE (this);
     57     return_trace (c->check_struct (this));
     58   }
     59 
     60   protected:
     61   USHORT	format;		/* Format number is set to 0. */
     62   USHORT	lengthZ;	/* Byte length of this subtable. */
     63   USHORT	languageZ;	/* Ignore. */
     64   BYTE		glyphIdArray[256];/* An array that maps character
     65 				 * code to glyph index values. */
     66   public:
     67   DEFINE_SIZE_STATIC (6 + 256);
     68 };
     69 
     70 struct CmapSubtableFormat4
     71 {
     72   struct accelerator_t
     73   {
     74     inline void init (const CmapSubtableFormat4 *subtable)
     75     {
     76       segCount = subtable->segCountX2 / 2;
     77       endCount = subtable->values;
     78       startCount = endCount + segCount + 1;
     79       idDelta = startCount + segCount;
     80       idRangeOffset = idDelta + segCount;
     81       glyphIdArray = idRangeOffset + segCount;
     82       glyphIdArrayLength = (subtable->length - 16 - 8 * segCount) / 2;
     83     }
     84 
     85     static inline bool get_glyph_func (const void *obj, hb_codepoint_t codepoint, hb_codepoint_t *glyph)
     86     {
     87       const accelerator_t *thiz = (const accelerator_t *) obj;
     88 
     89       /* Custom two-array bsearch. */
     90       int min = 0, max = (int) thiz->segCount - 1;
     91       const USHORT *startCount = thiz->startCount;
     92       const USHORT *endCount = thiz->endCount;
     93       unsigned int i;
     94       while (min <= max)
     95       {
     96 	int mid = (min + max) / 2;
     97 	if (codepoint < startCount[mid])
     98 	  max = mid - 1;
     99 	else if (codepoint > endCount[mid])
    100 	  min = mid + 1;
    101 	else
    102 	{
    103 	  i = mid;
    104 	  goto found;
    105 	}
    106       }
    107       return false;
    108 
    109     found:
    110       hb_codepoint_t gid;
    111       unsigned int rangeOffset = thiz->idRangeOffset[i];
    112       if (rangeOffset == 0)
    113 	gid = codepoint + thiz->idDelta[i];
    114       else
    115       {
    116 	/* Somebody has been smoking... */
    117 	unsigned int index = rangeOffset / 2 + (codepoint - thiz->startCount[i]) + i - thiz->segCount;
    118 	if (unlikely (index >= thiz->glyphIdArrayLength))
    119 	  return false;
    120 	gid = thiz->glyphIdArray[index];
    121 	if (unlikely (!gid))
    122 	  return false;
    123 	gid += thiz->idDelta[i];
    124       }
    125 
    126       *glyph = gid & 0xFFFFu;
    127       return true;
    128     }
    129 
    130     const USHORT *endCount;
    131     const USHORT *startCount;
    132     const USHORT *idDelta;
    133     const USHORT *idRangeOffset;
    134     const USHORT *glyphIdArray;
    135     unsigned int segCount;
    136     unsigned int glyphIdArrayLength;
    137   };
    138 
    139   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
    140   {
    141     accelerator_t accel;
    142     accel.init (this);
    143     return accel.get_glyph_func (&accel, codepoint, glyph);
    144   }
    145 
    146   inline bool sanitize (hb_sanitize_context_t *c) const
    147   {
    148     TRACE_SANITIZE (this);
    149     if (unlikely (!c->check_struct (this)))
    150       return_trace (false);
    151 
    152     if (unlikely (!c->check_range (this, length)))
    153     {
    154       /* Some broken fonts have too long of a "length" value.
    155        * If that is the case, just change the value to truncate
    156        * the subtable at the end of the blob. */
    157       uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535,
    158 					    (uintptr_t) (c->end -
    159 							 (char *) this));
    160       if (!c->try_set (&length, new_length))
    161 	return_trace (false);
    162     }
    163 
    164     return_trace (16 + 4 * (unsigned int) segCountX2 <= length);
    165   }
    166 
    167   protected:
    168   USHORT	format;		/* Format number is set to 4. */
    169   USHORT	length;		/* This is the length in bytes of the
    170 				 * subtable. */
    171   USHORT	languageZ;	/* Ignore. */
    172   USHORT	segCountX2;	/* 2 x segCount. */
    173   USHORT	searchRangeZ;	/* 2 * (2**floor(log2(segCount))) */
    174   USHORT	entrySelectorZ;	/* log2(searchRange/2) */
    175   USHORT	rangeShiftZ;	/* 2 x segCount - searchRange */
    176 
    177   USHORT	values[VAR];
    178 #if 0
    179   USHORT	endCount[segCount];	/* End characterCode for each segment,
    180 					 * last=0xFFFFu. */
    181   USHORT	reservedPad;		/* Set to 0. */
    182   USHORT	startCount[segCount];	/* Start character code for each segment. */
    183   SHORT		idDelta[segCount];	/* Delta for all character codes in segment. */
    184   USHORT	idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
    185   USHORT	glyphIdArray[VAR];	/* Glyph index array (arbitrary length) */
    186 #endif
    187 
    188   public:
    189   DEFINE_SIZE_ARRAY (14, values);
    190 };
    191 
    192 struct CmapSubtableLongGroup
    193 {
    194   friend struct CmapSubtableFormat12;
    195   friend struct CmapSubtableFormat13;
    196 
    197   int cmp (hb_codepoint_t codepoint) const
    198   {
    199     if (codepoint < startCharCode) return -1;
    200     if (codepoint > endCharCode)   return +1;
    201     return 0;
    202   }
    203 
    204   inline bool sanitize (hb_sanitize_context_t *c) const
    205   {
    206     TRACE_SANITIZE (this);
    207     return_trace (c->check_struct (this));
    208   }
    209 
    210   private:
    211   ULONG		startCharCode;	/* First character code in this group. */
    212   ULONG		endCharCode;	/* Last character code in this group. */
    213   ULONG		glyphID;	/* Glyph index; interpretation depends on
    214 				 * subtable format. */
    215   public:
    216   DEFINE_SIZE_STATIC (12);
    217 };
    218 
    219 template <typename UINT>
    220 struct CmapSubtableTrimmed
    221 {
    222   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
    223   {
    224     /* Rely on our implicit array bound-checking. */
    225     hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
    226     if (!gid)
    227       return false;
    228     *glyph = gid;
    229     return true;
    230   }
    231 
    232   inline bool sanitize (hb_sanitize_context_t *c) const
    233   {
    234     TRACE_SANITIZE (this);
    235     return_trace (c->check_struct (this) && glyphIdArray.sanitize (c));
    236   }
    237 
    238   protected:
    239   UINT		formatReserved;	/* Subtable format and (maybe) padding. */
    240   UINT		lengthZ;	/* Byte length of this subtable. */
    241   UINT		languageZ;	/* Ignore. */
    242   UINT		startCharCode;	/* First character code covered. */
    243   ArrayOf<GlyphID, UINT>
    244 		glyphIdArray;	/* Array of glyph index values for character
    245 				 * codes in the range. */
    246   public:
    247   DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
    248 };
    249 
    250 struct CmapSubtableFormat6  : CmapSubtableTrimmed<USHORT> {};
    251 struct CmapSubtableFormat10 : CmapSubtableTrimmed<ULONG > {};
    252 
    253 template <typename T>
    254 struct CmapSubtableLongSegmented
    255 {
    256   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
    257   {
    258     int i = groups.bsearch (codepoint);
    259     if (i == -1)
    260       return false;
    261     *glyph = T::group_get_glyph (groups[i], codepoint);
    262     return true;
    263   }
    264 
    265   inline bool sanitize (hb_sanitize_context_t *c) const
    266   {
    267     TRACE_SANITIZE (this);
    268     return_trace (c->check_struct (this) && groups.sanitize (c));
    269   }
    270 
    271   protected:
    272   USHORT	format;		/* Subtable format; set to 12. */
    273   USHORT	reservedZ;	/* Reserved; set to 0. */
    274   ULONG		lengthZ;	/* Byte length of this subtable. */
    275   ULONG		languageZ;	/* Ignore. */
    276   SortedArrayOf<CmapSubtableLongGroup, ULONG>
    277 		groups;		/* Groupings. */
    278   public:
    279   DEFINE_SIZE_ARRAY (16, groups);
    280 };
    281 
    282 struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
    283 {
    284   static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
    285 						hb_codepoint_t u)
    286   { return group.glyphID + (u - group.startCharCode); }
    287 };
    288 
    289 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
    290 {
    291   static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
    292 						hb_codepoint_t u HB_UNUSED)
    293   { return group.glyphID; }
    294 };
    295 
    296 typedef enum
    297 {
    298   GLYPH_VARIANT_NOT_FOUND = 0,
    299   GLYPH_VARIANT_FOUND = 1,
    300   GLYPH_VARIANT_USE_DEFAULT = 2
    301 } glyph_variant_t;
    302 
    303 struct UnicodeValueRange
    304 {
    305   inline int cmp (const hb_codepoint_t &codepoint) const
    306   {
    307     if (codepoint < startUnicodeValue) return -1;
    308     if (codepoint > startUnicodeValue + additionalCount) return +1;
    309     return 0;
    310   }
    311 
    312   inline bool sanitize (hb_sanitize_context_t *c) const
    313   {
    314     TRACE_SANITIZE (this);
    315     return_trace (c->check_struct (this));
    316   }
    317 
    318   UINT24	startUnicodeValue;	/* First value in this range. */
    319   BYTE		additionalCount;	/* Number of additional values in this
    320 					 * range. */
    321   public:
    322   DEFINE_SIZE_STATIC (4);
    323 };
    324 
    325 typedef SortedArrayOf<UnicodeValueRange, ULONG> DefaultUVS;
    326 
    327 struct UVSMapping
    328 {
    329   inline int cmp (const hb_codepoint_t &codepoint) const
    330   {
    331     return unicodeValue.cmp (codepoint);
    332   }
    333 
    334   inline bool sanitize (hb_sanitize_context_t *c) const
    335   {
    336     TRACE_SANITIZE (this);
    337     return_trace (c->check_struct (this));
    338   }
    339 
    340   UINT24	unicodeValue;	/* Base Unicode value of the UVS */
    341   GlyphID	glyphID;	/* Glyph ID of the UVS */
    342   public:
    343   DEFINE_SIZE_STATIC (5);
    344 };
    345 
    346 typedef SortedArrayOf<UVSMapping, ULONG> NonDefaultUVS;
    347 
    348 struct VariationSelectorRecord
    349 {
    350   inline glyph_variant_t get_glyph (hb_codepoint_t codepoint,
    351 				    hb_codepoint_t *glyph,
    352 				    const void *base) const
    353   {
    354     int i;
    355     const DefaultUVS &defaults = base+defaultUVS;
    356     i = defaults.bsearch (codepoint);
    357     if (i != -1)
    358       return GLYPH_VARIANT_USE_DEFAULT;
    359     const NonDefaultUVS &nonDefaults = base+nonDefaultUVS;
    360     i = nonDefaults.bsearch (codepoint);
    361     if (i != -1)
    362     {
    363       *glyph = nonDefaults[i].glyphID;
    364        return GLYPH_VARIANT_FOUND;
    365     }
    366     return GLYPH_VARIANT_NOT_FOUND;
    367   }
    368 
    369   inline int cmp (const hb_codepoint_t &variation_selector) const
    370   {
    371     return varSelector.cmp (variation_selector);
    372   }
    373 
    374   inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
    375   {
    376     TRACE_SANITIZE (this);
    377     return_trace (c->check_struct (this) &&
    378 		  defaultUVS.sanitize (c, base) &&
    379 		  nonDefaultUVS.sanitize (c, base));
    380   }
    381 
    382   UINT24	varSelector;	/* Variation selector. */
    383   OffsetTo<DefaultUVS, ULONG>
    384 		defaultUVS;	/* Offset to Default UVS Table. May be 0. */
    385   OffsetTo<NonDefaultUVS, ULONG>
    386 		nonDefaultUVS;	/* Offset to Non-Default UVS Table. May be 0. */
    387   public:
    388   DEFINE_SIZE_STATIC (11);
    389 };
    390 
    391 struct CmapSubtableFormat14
    392 {
    393   inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
    394 					    hb_codepoint_t variation_selector,
    395 					    hb_codepoint_t *glyph) const
    396   {
    397     return record[record.bsearch(variation_selector)].get_glyph (codepoint, glyph, this);
    398   }
    399 
    400   inline bool sanitize (hb_sanitize_context_t *c) const
    401   {
    402     TRACE_SANITIZE (this);
    403     return_trace (c->check_struct (this) &&
    404 		  record.sanitize (c, this));
    405   }
    406 
    407   protected:
    408   USHORT	format;		/* Format number is set to 14. */
    409   ULONG		lengthZ;	/* Byte length of this subtable. */
    410   SortedArrayOf<VariationSelectorRecord, ULONG>
    411 		record;		/* Variation selector records; sorted
    412 				 * in increasing order of `varSelector'. */
    413   public:
    414   DEFINE_SIZE_ARRAY (10, record);
    415 };
    416 
    417 struct CmapSubtable
    418 {
    419   /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
    420 
    421   inline bool get_glyph (hb_codepoint_t codepoint,
    422 			 hb_codepoint_t *glyph) const
    423   {
    424     switch (u.format) {
    425     case  0: return u.format0 .get_glyph(codepoint, glyph);
    426     case  4: return u.format4 .get_glyph(codepoint, glyph);
    427     case  6: return u.format6 .get_glyph(codepoint, glyph);
    428     case 10: return u.format10.get_glyph(codepoint, glyph);
    429     case 12: return u.format12.get_glyph(codepoint, glyph);
    430     case 13: return u.format13.get_glyph(codepoint, glyph);
    431     case 14:
    432     default: return false;
    433     }
    434   }
    435 
    436   inline bool sanitize (hb_sanitize_context_t *c) const
    437   {
    438     TRACE_SANITIZE (this);
    439     if (!u.format.sanitize (c)) return_trace (false);
    440     switch (u.format) {
    441     case  0: return_trace (u.format0 .sanitize (c));
    442     case  4: return_trace (u.format4 .sanitize (c));
    443     case  6: return_trace (u.format6 .sanitize (c));
    444     case 10: return_trace (u.format10.sanitize (c));
    445     case 12: return_trace (u.format12.sanitize (c));
    446     case 13: return_trace (u.format13.sanitize (c));
    447     case 14: return_trace (u.format14.sanitize (c));
    448     default:return_trace (true);
    449     }
    450   }
    451 
    452   public:
    453   union {
    454   USHORT		format;		/* Format identifier */
    455   CmapSubtableFormat0	format0;
    456   CmapSubtableFormat4	format4;
    457   CmapSubtableFormat6	format6;
    458   CmapSubtableFormat10	format10;
    459   CmapSubtableFormat12	format12;
    460   CmapSubtableFormat13	format13;
    461   CmapSubtableFormat14	format14;
    462   } u;
    463   public:
    464   DEFINE_SIZE_UNION (2, format);
    465 };
    466 
    467 
    468 struct EncodingRecord
    469 {
    470   inline int cmp (const EncodingRecord &other) const
    471   {
    472     int ret;
    473     ret = platformID.cmp (other.platformID);
    474     if (ret) return ret;
    475     ret = encodingID.cmp (other.encodingID);
    476     if (ret) return ret;
    477     return 0;
    478   }
    479 
    480   inline bool sanitize (hb_sanitize_context_t *c, const void *base) const
    481   {
    482     TRACE_SANITIZE (this);
    483     return_trace (c->check_struct (this) &&
    484 		  subtable.sanitize (c, base));
    485   }
    486 
    487   USHORT	platformID;	/* Platform ID. */
    488   USHORT	encodingID;	/* Platform-specific encoding ID. */
    489   OffsetTo<CmapSubtable, ULONG>
    490 		subtable;	/* Byte offset from beginning of table to the subtable for this encoding. */
    491   public:
    492   DEFINE_SIZE_STATIC (8);
    493 };
    494 
    495 struct cmap
    496 {
    497   static const hb_tag_t tableTag	= HB_OT_TAG_cmap;
    498 
    499   inline const CmapSubtable *find_subtable (unsigned int platform_id,
    500 					    unsigned int encoding_id) const
    501   {
    502     EncodingRecord key;
    503     key.platformID.set (platform_id);
    504     key.encodingID.set (encoding_id);
    505 
    506     /* Note: We can use bsearch, but since it has no performance
    507      * implications, we use lsearch and as such accept fonts with
    508      * unsorted subtable list. */
    509     int result = encodingRecord./*bsearch*/lsearch (key);
    510     if (result == -1 || !encodingRecord[result].subtable)
    511       return NULL;
    512 
    513     return &(this+encodingRecord[result].subtable);
    514   }
    515 
    516   inline bool sanitize (hb_sanitize_context_t *c) const
    517   {
    518     TRACE_SANITIZE (this);
    519     return_trace (c->check_struct (this) &&
    520 		  likely (version == 0) &&
    521 		  encodingRecord.sanitize (c, this));
    522   }
    523 
    524   USHORT		version;	/* Table version number (0). */
    525   SortedArrayOf<EncodingRecord>
    526 			encodingRecord;	/* Encoding tables. */
    527   public:
    528   DEFINE_SIZE_ARRAY (4, encodingRecord);
    529 };
    530 
    531 
    532 } /* namespace OT */
    533 
    534 
    535 #endif /* HB_OT_CMAP_TABLE_HH */
    536