Home | History | Annotate | Download | only in src
      1 /*
      2  * Copyright  2014  Google, Inc.
      3  *
      4  *  This is part of HarfBuzz, a text shaping library.
      5  *
      6  * Permission is hereby granted, without written agreement and without
      7  * license or royalty fees, to use, copy, modify, and distribute this
      8  * software and its documentation for any purpose, provided that the
      9  * above copyright notice and the following two paragraphs appear in
     10  * all copies of this software.
     11  *
     12  * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR
     13  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
     14  * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN
     15  * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
     16  * DAMAGE.
     17  *
     18  * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING,
     19  * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
     20  * FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     21  * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
     22  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     23  *
     24  * Google Author(s): Behdad Esfahbod
     25  */
     26 
     27 #ifndef HB_OT_CMAP_TABLE_HH
     28 #define HB_OT_CMAP_TABLE_HH
     29 
     30 #include "hb-open-type-private.hh"
     31 
     32 
     33 namespace OT {
     34 
     35 
     36 /*
     37  * cmap -- Character To Glyph Index Mapping Table
     38  */
     39 
     40 #define HB_OT_TAG_cmap HB_TAG('c','m','a','p')
     41 
     42 
     43 struct CmapSubtableFormat0
     44 {
     45   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
     46   {
     47     hb_codepoint_t gid = codepoint < 256 ? glyphIdArray[codepoint] : 0;
     48     if (!gid)
     49       return false;
     50     *glyph = gid;
     51     return true;
     52   }
     53 
     54   inline bool sanitize (hb_sanitize_context_t *c) {
     55     TRACE_SANITIZE (this);
     56     return TRACE_RETURN (c->check_struct (this));
     57   }
     58 
     59   protected:
     60   USHORT	format;		/* Format number is set to 0. */
     61   USHORT	lengthZ;	/* Byte length of this subtable. */
     62   USHORT	languageZ;	/* Ignore. */
     63   BYTE		glyphIdArray[256];/* An array that maps character
     64 				 * code to glyph index values. */
     65   public:
     66   DEFINE_SIZE_STATIC (6 + 256);
     67 };
     68 
     69 struct CmapSubtableFormat4
     70 {
     71   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
     72   {
     73     unsigned int segCount;
     74     const USHORT *endCount;
     75     const USHORT *startCount;
     76     const USHORT *idDelta;
     77     const USHORT *idRangeOffset;
     78     const USHORT *glyphIdArray;
     79     unsigned int glyphIdArrayLength;
     80 
     81     segCount = this->segCountX2 / 2;
     82     endCount = this->values;
     83     startCount = endCount + segCount + 1;
     84     idDelta = startCount + segCount;
     85     idRangeOffset = idDelta + segCount;
     86     glyphIdArray = idRangeOffset + segCount;
     87     glyphIdArrayLength = (this->length - 16 - 8 * segCount) / 2;
     88 
     89     /* Custom two-array bsearch. */
     90     int min = 0, max = (int) segCount - 1;
     91     unsigned int i;
     92     while (min <= max)
     93     {
     94       int mid = (min + max) / 2;
     95       if (codepoint < startCount[mid])
     96         max = mid - 1;
     97       else if (codepoint > endCount[mid])
     98         min = mid + 1;
     99       else
    100       {
    101 	i = mid;
    102 	goto found;
    103       }
    104     }
    105     return false;
    106 
    107   found:
    108     hb_codepoint_t gid;
    109     unsigned int rangeOffset = idRangeOffset[i];
    110     if (rangeOffset == 0)
    111       gid = codepoint + idDelta[i];
    112     else
    113     {
    114       /* Somebody has been smoking... */
    115       unsigned int index = rangeOffset / 2 + (codepoint - startCount[i]) + i - segCount;
    116       if (unlikely (index >= glyphIdArrayLength))
    117 	return false;
    118       gid = glyphIdArray[index];
    119       if (unlikely (!gid))
    120 	return false;
    121       gid += idDelta[i];
    122     }
    123 
    124     *glyph = gid & 0xFFFFu;
    125     return true;
    126   }
    127 
    128   inline bool sanitize (hb_sanitize_context_t *c)
    129   {
    130     TRACE_SANITIZE (this);
    131     if (unlikely (!c->check_struct (this)))
    132       return TRACE_RETURN (false);
    133 
    134     if (unlikely (!c->check_range (this, length)))
    135     {
    136       /* Some broken fonts have too long of a "length" value.
    137        * If that is the case, just change the value to truncate
    138        * the subtable at the end of the blob. */
    139       uint16_t new_length = (uint16_t) MIN ((uintptr_t) 65535,
    140 					    (uintptr_t) (c->end -
    141 							 (char *) this));
    142       if (!c->try_set (&length, new_length))
    143 	return TRACE_RETURN (false);
    144     }
    145 
    146     return TRACE_RETURN (16 + 4 * (unsigned int) segCountX2 <= length);
    147   }
    148 
    149   protected:
    150   USHORT	format;		/* Format number is set to 4. */
    151   USHORT	length;		/* This is the length in bytes of the
    152 				 * subtable. */
    153   USHORT	languageZ;	/* Ignore. */
    154   USHORT	segCountX2;	/* 2 x segCount. */
    155   USHORT	searchRangeZ;	/* 2 * (2**floor(log2(segCount))) */
    156   USHORT	entrySelectorZ;	/* log2(searchRange/2) */
    157   USHORT	rangeShiftZ;	/* 2 x segCount - searchRange */
    158 
    159   USHORT	values[VAR];
    160 #if 0
    161   USHORT	endCount[segCount];	/* End characterCode for each segment,
    162 					 * last=0xFFFFu. */
    163   USHORT	reservedPad;		/* Set to 0. */
    164   USHORT	startCount[segCount];	/* Start character code for each segment. */
    165   SHORT		idDelta[segCount];	/* Delta for all character codes in segment. */
    166   USHORT	idRangeOffset[segCount];/* Offsets into glyphIdArray or 0 */
    167   USHORT	glyphIdArray[VAR];	/* Glyph index array (arbitrary length) */
    168 #endif
    169 
    170   public:
    171   DEFINE_SIZE_ARRAY (14, values);
    172 };
    173 
    174 struct CmapSubtableLongGroup
    175 {
    176   friend struct CmapSubtableFormat12;
    177   friend struct CmapSubtableFormat13;
    178 
    179   int cmp (hb_codepoint_t codepoint) const
    180   {
    181     if (codepoint < startCharCode) return -1;
    182     if (codepoint > endCharCode)   return +1;
    183     return 0;
    184   }
    185 
    186   inline bool sanitize (hb_sanitize_context_t *c) {
    187     TRACE_SANITIZE (this);
    188     return TRACE_RETURN (c->check_struct (this));
    189   }
    190 
    191   private:
    192   ULONG		startCharCode;	/* First character code in this group. */
    193   ULONG		endCharCode;	/* Last character code in this group. */
    194   ULONG		glyphID;	/* Glyph index; interpretation depends on
    195 				 * subtable format. */
    196   public:
    197   DEFINE_SIZE_STATIC (12);
    198 };
    199 
    200 template <typename UINT>
    201 struct CmapSubtableTrimmed
    202 {
    203   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
    204   {
    205     /* Rely on our implicit array bound-checking. */
    206     hb_codepoint_t gid = glyphIdArray[codepoint - startCharCode];
    207     if (!gid)
    208       return false;
    209     *glyph = gid;
    210     return true;
    211   }
    212 
    213   inline bool sanitize (hb_sanitize_context_t *c) {
    214     TRACE_SANITIZE (this);
    215     return TRACE_RETURN (c->check_struct (this) && glyphIdArray.sanitize (c));
    216   }
    217 
    218   protected:
    219   UINT		formatReserved;	/* Subtable format and (maybe) padding. */
    220   UINT		lengthZ;	/* Byte length of this subtable. */
    221   UINT		languageZ;	/* Ignore. */
    222   UINT		startCharCode;	/* First character code covered. */
    223   ArrayOf<GlyphID, UINT>
    224 		glyphIdArray;	/* Array of glyph index values for character
    225 				 * codes in the range. */
    226   public:
    227   DEFINE_SIZE_ARRAY (5 * sizeof (UINT), glyphIdArray);
    228 };
    229 
    230 struct CmapSubtableFormat6  : CmapSubtableTrimmed<USHORT> {};
    231 struct CmapSubtableFormat10 : CmapSubtableTrimmed<ULONG > {};
    232 
    233 template <typename T>
    234 struct CmapSubtableLongSegmented
    235 {
    236   inline bool get_glyph (hb_codepoint_t codepoint, hb_codepoint_t *glyph) const
    237   {
    238     int i = groups.bsearch (codepoint);
    239     if (i == -1)
    240       return false;
    241     *glyph = T::group_get_glyph (groups[i], codepoint);
    242     return true;
    243   }
    244 
    245   inline bool sanitize (hb_sanitize_context_t *c) {
    246     TRACE_SANITIZE (this);
    247     return TRACE_RETURN (c->check_struct (this) && groups.sanitize (c));
    248   }
    249 
    250   protected:
    251   USHORT	format;		/* Subtable format; set to 12. */
    252   USHORT	reservedZ;	/* Reserved; set to 0. */
    253   ULONG		lengthZ;	/* Byte length of this subtable. */
    254   ULONG		languageZ;	/* Ignore. */
    255   SortedArrayOf<CmapSubtableLongGroup, ULONG>
    256 		groups;		/* Groupings. */
    257   public:
    258   DEFINE_SIZE_ARRAY (16, groups);
    259 };
    260 
    261 struct CmapSubtableFormat12 : CmapSubtableLongSegmented<CmapSubtableFormat12>
    262 {
    263   static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
    264 						hb_codepoint_t u)
    265   { return group.glyphID + (u - group.startCharCode); }
    266 };
    267 
    268 struct CmapSubtableFormat13 : CmapSubtableLongSegmented<CmapSubtableFormat13>
    269 {
    270   static inline hb_codepoint_t group_get_glyph (const CmapSubtableLongGroup &group,
    271 						hb_codepoint_t u HB_UNUSED)
    272   { return group.glyphID; }
    273 };
    274 
    275 typedef enum
    276 {
    277   GLYPH_VARIANT_NOT_FOUND = 0,
    278   GLYPH_VARIANT_FOUND = 1,
    279   GLYPH_VARIANT_USE_DEFAULT = 2
    280 } glyph_variant_t;
    281 
    282 struct UnicodeValueRange
    283 {
    284   inline int cmp (const hb_codepoint_t &codepoint) const
    285   {
    286     if (codepoint < startUnicodeValue) return -1;
    287     if (codepoint > startUnicodeValue + additionalCount) return +1;
    288     return 0;
    289   }
    290 
    291   inline bool sanitize (hb_sanitize_context_t *c) {
    292     TRACE_SANITIZE (this);
    293     return TRACE_RETURN (c->check_struct (this));
    294   }
    295 
    296   UINT24	startUnicodeValue;	/* First value in this range. */
    297   BYTE		additionalCount;	/* Number of additional values in this
    298 					 * range. */
    299   public:
    300   DEFINE_SIZE_STATIC (4);
    301 };
    302 
    303 typedef SortedArrayOf<UnicodeValueRange, ULONG> DefaultUVS;
    304 
    305 struct UVSMapping
    306 {
    307   inline int cmp (const hb_codepoint_t &codepoint) const
    308   {
    309     return unicodeValue.cmp (codepoint);
    310   }
    311 
    312   inline bool sanitize (hb_sanitize_context_t *c) {
    313     TRACE_SANITIZE (this);
    314     return TRACE_RETURN (c->check_struct (this));
    315   }
    316 
    317   UINT24	unicodeValue;	/* Base Unicode value of the UVS */
    318   GlyphID	glyphID;	/* Glyph ID of the UVS */
    319   public:
    320   DEFINE_SIZE_STATIC (5);
    321 };
    322 
    323 typedef SortedArrayOf<UVSMapping, ULONG> NonDefaultUVS;
    324 
    325 struct VariationSelectorRecord
    326 {
    327   inline glyph_variant_t get_glyph (hb_codepoint_t codepoint,
    328 				    hb_codepoint_t *glyph,
    329 				    const void *base) const
    330   {
    331     int i;
    332     const DefaultUVS &defaults = base+defaultUVS;
    333     i = defaults.bsearch (codepoint);
    334     if (i != -1)
    335       return GLYPH_VARIANT_USE_DEFAULT;
    336     const NonDefaultUVS &nonDefaults = base+nonDefaultUVS;
    337     i = nonDefaults.bsearch (codepoint);
    338     if (i != -1)
    339     {
    340       *glyph = nonDefaults[i].glyphID;
    341        return GLYPH_VARIANT_FOUND;
    342     }
    343     return GLYPH_VARIANT_NOT_FOUND;
    344   }
    345 
    346   inline int cmp (const hb_codepoint_t &variation_selector) const
    347   {
    348     return varSelector.cmp (variation_selector);
    349   }
    350 
    351   inline bool sanitize (hb_sanitize_context_t *c, void *base) {
    352     TRACE_SANITIZE (this);
    353     return TRACE_RETURN (c->check_struct (this) &&
    354 			 defaultUVS.sanitize (c, base) &&
    355 			 nonDefaultUVS.sanitize (c, base));
    356   }
    357 
    358   UINT24	varSelector;	/* Variation selector. */
    359   OffsetTo<DefaultUVS, ULONG>
    360 		defaultUVS;	/* Offset to Default UVS Table. May be 0. */
    361   OffsetTo<NonDefaultUVS, ULONG>
    362 		nonDefaultUVS;	/* Offset to Non-Default UVS Table. May be 0. */
    363   public:
    364   DEFINE_SIZE_STATIC (11);
    365 };
    366 
    367 struct CmapSubtableFormat14
    368 {
    369   inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
    370 					    hb_codepoint_t variation_selector,
    371 					    hb_codepoint_t *glyph) const
    372   {
    373     return record[record.bsearch(variation_selector)].get_glyph (codepoint, glyph, this);
    374   }
    375 
    376   inline bool sanitize (hb_sanitize_context_t *c) {
    377     TRACE_SANITIZE (this);
    378     return TRACE_RETURN (c->check_struct (this) &&
    379 			 record.sanitize (c, this));
    380   }
    381 
    382   protected:
    383   USHORT	format;		/* Format number is set to 0. */
    384   ULONG		lengthZ;	/* Byte length of this subtable. */
    385   SortedArrayOf<VariationSelectorRecord, ULONG>
    386 		record;		/* Variation selector records; sorted
    387 				 * in increasing order of `varSelector'. */
    388   public:
    389   DEFINE_SIZE_ARRAY (10, record);
    390 };
    391 
    392 struct CmapSubtable
    393 {
    394   /* Note: We intentionally do NOT implement subtable formats 2 and 8. */
    395 
    396   inline bool get_glyph (hb_codepoint_t codepoint,
    397 			 hb_codepoint_t *glyph) const
    398   {
    399     switch (u.format) {
    400     case  0: return u.format0 .get_glyph(codepoint, glyph);
    401     case  4: return u.format4 .get_glyph(codepoint, glyph);
    402     case  6: return u.format6 .get_glyph(codepoint, glyph);
    403     case 10: return u.format10.get_glyph(codepoint, glyph);
    404     case 12: return u.format12.get_glyph(codepoint, glyph);
    405     case 13: return u.format13.get_glyph(codepoint, glyph);
    406     case 14:
    407     default: return false;
    408     }
    409   }
    410 
    411   inline glyph_variant_t get_glyph_variant (hb_codepoint_t codepoint,
    412 					    hb_codepoint_t variation_selector,
    413 					    hb_codepoint_t *glyph) const
    414   {
    415     switch (u.format) {
    416     case 14: return u.format14.get_glyph_variant(codepoint, variation_selector, glyph);
    417     default: return GLYPH_VARIANT_NOT_FOUND;
    418     }
    419   }
    420 
    421   inline bool sanitize (hb_sanitize_context_t *c) {
    422     TRACE_SANITIZE (this);
    423     if (!u.format.sanitize (c)) return TRACE_RETURN (false);
    424     switch (u.format) {
    425     case  0: return TRACE_RETURN (u.format0 .sanitize (c));
    426     case  4: return TRACE_RETURN (u.format4 .sanitize (c));
    427     case  6: return TRACE_RETURN (u.format6 .sanitize (c));
    428     case 10: return TRACE_RETURN (u.format10.sanitize (c));
    429     case 12: return TRACE_RETURN (u.format12.sanitize (c));
    430     case 13: return TRACE_RETURN (u.format13.sanitize (c));
    431     case 14: return TRACE_RETURN (u.format14.sanitize (c));
    432     default:return TRACE_RETURN (true);
    433     }
    434   }
    435 
    436   protected:
    437   union {
    438   USHORT		format;		/* Format identifier */
    439   CmapSubtableFormat0	format0;
    440   CmapSubtableFormat4	format4;
    441   CmapSubtableFormat6	format6;
    442   CmapSubtableFormat10	format10;
    443   CmapSubtableFormat12	format12;
    444   CmapSubtableFormat13	format13;
    445   CmapSubtableFormat14	format14;
    446   } u;
    447   public:
    448   DEFINE_SIZE_UNION (2, format);
    449 };
    450 
    451 
    452 struct EncodingRecord
    453 {
    454   inline int cmp (const EncodingRecord &other) const
    455   {
    456     int ret;
    457     ret = platformID.cmp (other.platformID);
    458     if (ret) return ret;
    459     ret = encodingID.cmp (other.encodingID);
    460     if (ret) return ret;
    461     return 0;
    462   }
    463 
    464   inline bool sanitize (hb_sanitize_context_t *c, void *base) {
    465     TRACE_SANITIZE (this);
    466     return TRACE_RETURN (c->check_struct (this) &&
    467 			 subtable.sanitize (c, base));
    468   }
    469 
    470   USHORT	platformID;	/* Platform ID. */
    471   USHORT	encodingID;	/* Platform-specific encoding ID. */
    472   OffsetTo<CmapSubtable, ULONG>
    473 		subtable;	/* Byte offset from beginning of table to the subtable for this encoding. */
    474   public:
    475   DEFINE_SIZE_STATIC (8);
    476 };
    477 
    478 struct cmap
    479 {
    480   static const hb_tag_t tableTag	= HB_OT_TAG_cmap;
    481 
    482   inline const CmapSubtable *find_subtable (unsigned int platform_id,
    483 					    unsigned int encoding_id) const
    484   {
    485     EncodingRecord key;
    486     key.platformID.set (platform_id);
    487     key.encodingID.set (encoding_id);
    488 
    489     /* Note: We can use bsearch, but since it has no performance
    490      * implications, we use lsearch and as such accept fonts with
    491      * unsorted subtable list. */
    492     int result = encodingRecord./*bsearch*/lsearch (key);
    493     if (result == -1 || !encodingRecord[result].subtable)
    494       return NULL;
    495 
    496     return &(this+encodingRecord[result].subtable);
    497   }
    498 
    499   inline bool sanitize (hb_sanitize_context_t *c) {
    500     TRACE_SANITIZE (this);
    501     return TRACE_RETURN (c->check_struct (this) &&
    502 			 likely (version == 0) &&
    503 			 encodingRecord.sanitize (c, this));
    504   }
    505 
    506   USHORT		version;	/* Table version number (0). */
    507   SortedArrayOf<EncodingRecord>
    508 			encodingRecord;	/* Encoding tables. */
    509   public:
    510   DEFINE_SIZE_ARRAY (4, encodingRecord);
    511 };
    512 
    513 
    514 } /* namespace OT */
    515 
    516 
    517 #endif /* HB_OT_CMAP_TABLE_HH */
    518