1 /* 2 * Copyright 2018 Google, Inc. 3 * 4 * This is part of HarfBuzz, a text shaping library. 5 * 6 * Permission is hereby granted, without written agreement and without 7 * license or royalty fees, to use, copy, modify, and distribute this 8 * software and its documentation for any purpose, provided that the 9 * above copyright notice and the following two paragraphs appear in 10 * all copies of this software. 11 * 12 * IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE TO ANY PARTY FOR 13 * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES 14 * ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN 15 * IF THE COPYRIGHT HOLDER HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH 16 * DAMAGE. 17 * 18 * THE COPYRIGHT HOLDER SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, 19 * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND 20 * FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS 21 * ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO 22 * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. 23 * 24 * Google Author(s): Garret Rieger 25 */ 26 27 #ifndef HB_OT_OS2_UNICODE_RANGES_HH 28 #define HB_OT_OS2_UNICODE_RANGES_HH 29 30 #include "hb.hh" 31 32 namespace OT { 33 34 struct OS2Range 35 { 36 static int 37 cmp (const void *_key, const void *_item) 38 { 39 hb_codepoint_t cp = *((hb_codepoint_t *) _key); 40 const OS2Range *range = (OS2Range *) _item; 41 42 if (cp < range->start) 43 return -1; 44 else if (cp <= range->end) 45 return 0; 46 else 47 return +1; 48 } 49 50 hb_codepoint_t start; 51 hb_codepoint_t end; 52 unsigned int bit; 53 }; 54 55 /* Note: The contents of this array was generated using gen-os2-unicode-ranges.py. */ 56 static const OS2Range _hb_os2_unicode_ranges[] = 57 { 58 { 0x0, 0x7F, 0}, // Basic Latin 59 { 0x80, 0xFF, 1}, // Latin-1 Supplement 60 { 0x100, 0x17F, 2}, // Latin Extended-A 61 { 0x180, 0x24F, 3}, // Latin Extended-B 62 { 0x250, 0x2AF, 4}, // IPA Extensions 63 { 0x2B0, 0x2FF, 5}, // Spacing Modifier Letters 64 { 0x300, 0x36F, 6}, // Combining Diacritical Marks 65 { 0x370, 0x3FF, 7}, // Greek and Coptic 66 { 0x400, 0x4FF, 9}, // Cyrillic 67 { 0x500, 0x52F, 9}, // Cyrillic Supplement 68 { 0x530, 0x58F, 10}, // Armenian 69 { 0x590, 0x5FF, 11}, // Hebrew 70 { 0x600, 0x6FF, 13}, // Arabic 71 { 0x700, 0x74F, 71}, // Syriac 72 { 0x750, 0x77F, 13}, // Arabic Supplement 73 { 0x780, 0x7BF, 72}, // Thaana 74 { 0x7C0, 0x7FF, 14}, // NKo 75 { 0x900, 0x97F, 15}, // Devanagari 76 { 0x980, 0x9FF, 16}, // Bengali 77 { 0xA00, 0xA7F, 17}, // Gurmukhi 78 { 0xA80, 0xAFF, 18}, // Gujarati 79 { 0xB00, 0xB7F, 19}, // Oriya 80 { 0xB80, 0xBFF, 20}, // Tamil 81 { 0xC00, 0xC7F, 21}, // Telugu 82 { 0xC80, 0xCFF, 22}, // Kannada 83 { 0xD00, 0xD7F, 23}, // Malayalam 84 { 0xD80, 0xDFF, 73}, // Sinhala 85 { 0xE00, 0xE7F, 24}, // Thai 86 { 0xE80, 0xEFF, 25}, // Lao 87 { 0xF00, 0xFFF, 70}, // Tibetan 88 { 0x1000, 0x109F, 74}, // Myanmar 89 { 0x10A0, 0x10FF, 26}, // Georgian 90 { 0x1100, 0x11FF, 28}, // Hangul Jamo 91 { 0x1200, 0x137F, 75}, // Ethiopic 92 { 0x1380, 0x139F, 75}, // Ethiopic Supplement 93 { 0x13A0, 0x13FF, 76}, // Cherokee 94 { 0x1400, 0x167F, 77}, // Unified Canadian Aboriginal Syllabics 95 { 0x1680, 0x169F, 78}, // Ogham 96 { 0x16A0, 0x16FF, 79}, // Runic 97 { 0x1700, 0x171F, 84}, // Tagalog 98 { 0x1720, 0x173F, 84}, // Hanunoo 99 { 0x1740, 0x175F, 84}, // Buhid 100 { 0x1760, 0x177F, 84}, // Tagbanwa 101 { 0x1780, 0x17FF, 80}, // Khmer 102 { 0x1800, 0x18AF, 81}, // Mongolian 103 { 0x1900, 0x194F, 93}, // Limbu 104 { 0x1950, 0x197F, 94}, // Tai Le 105 { 0x1980, 0x19DF, 95}, // New Tai Lue 106 { 0x19E0, 0x19FF, 80}, // Khmer Symbols 107 { 0x1A00, 0x1A1F, 96}, // Buginese 108 { 0x1B00, 0x1B7F, 27}, // Balinese 109 { 0x1B80, 0x1BBF, 112}, // Sundanese 110 { 0x1C00, 0x1C4F, 113}, // Lepcha 111 { 0x1C50, 0x1C7F, 114}, // Ol Chiki 112 { 0x1D00, 0x1D7F, 4}, // Phonetic Extensions 113 { 0x1D80, 0x1DBF, 4}, // Phonetic Extensions Supplement 114 { 0x1DC0, 0x1DFF, 6}, // Combining Diacritical Marks Supplement 115 { 0x1E00, 0x1EFF, 29}, // Latin Extended Additional 116 { 0x1F00, 0x1FFF, 30}, // Greek Extended 117 { 0x2000, 0x206F, 31}, // General Punctuation 118 { 0x2070, 0x209F, 32}, // Superscripts And Subscripts 119 { 0x20A0, 0x20CF, 33}, // Currency Symbols 120 { 0x20D0, 0x20FF, 34}, // Combining Diacritical Marks For Symbols 121 { 0x2100, 0x214F, 35}, // Letterlike Symbols 122 { 0x2150, 0x218F, 36}, // Number Forms 123 { 0x2190, 0x21FF, 37}, // Arrows 124 { 0x2200, 0x22FF, 38}, // Mathematical Operators 125 { 0x2300, 0x23FF, 39}, // Miscellaneous Technical 126 { 0x2400, 0x243F, 40}, // Control Pictures 127 { 0x2440, 0x245F, 41}, // Optical Character Recognition 128 { 0x2460, 0x24FF, 42}, // Enclosed Alphanumerics 129 { 0x2500, 0x257F, 43}, // Box Drawing 130 { 0x2580, 0x259F, 44}, // Block Elements 131 { 0x25A0, 0x25FF, 45}, // Geometric Shapes 132 { 0x2600, 0x26FF, 46}, // Miscellaneous Symbols 133 { 0x2700, 0x27BF, 47}, // Dingbats 134 { 0x27C0, 0x27EF, 38}, // Miscellaneous Mathematical Symbols-A 135 { 0x27F0, 0x27FF, 37}, // Supplemental Arrows-A 136 { 0x2800, 0x28FF, 82}, // Braille Patterns 137 { 0x2900, 0x297F, 37}, // Supplemental Arrows-B 138 { 0x2980, 0x29FF, 38}, // Miscellaneous Mathematical Symbols-B 139 { 0x2A00, 0x2AFF, 38}, // Supplemental Mathematical Operators 140 { 0x2B00, 0x2BFF, 37}, // Miscellaneous Symbols and Arrows 141 { 0x2C00, 0x2C5F, 97}, // Glagolitic 142 { 0x2C60, 0x2C7F, 29}, // Latin Extended-C 143 { 0x2C80, 0x2CFF, 8}, // Coptic 144 { 0x2D00, 0x2D2F, 26}, // Georgian Supplement 145 { 0x2D30, 0x2D7F, 98}, // Tifinagh 146 { 0x2D80, 0x2DDF, 75}, // Ethiopic Extended 147 { 0x2DE0, 0x2DFF, 9}, // Cyrillic Extended-A 148 { 0x2E00, 0x2E7F, 31}, // Supplemental Punctuation 149 { 0x2E80, 0x2EFF, 59}, // CJK Radicals Supplement 150 { 0x2F00, 0x2FDF, 59}, // Kangxi Radicals 151 { 0x2FF0, 0x2FFF, 59}, // Ideographic Description Characters 152 { 0x3000, 0x303F, 48}, // CJK Symbols And Punctuation 153 { 0x3040, 0x309F, 49}, // Hiragana 154 { 0x30A0, 0x30FF, 50}, // Katakana 155 { 0x3100, 0x312F, 51}, // Bopomofo 156 { 0x3130, 0x318F, 52}, // Hangul Compatibility Jamo 157 { 0x3190, 0x319F, 59}, // Kanbun 158 { 0x31A0, 0x31BF, 51}, // Bopomofo Extended 159 { 0x31C0, 0x31EF, 61}, // CJK Strokes 160 { 0x31F0, 0x31FF, 50}, // Katakana Phonetic Extensions 161 { 0x3200, 0x32FF, 54}, // Enclosed CJK Letters And Months 162 { 0x3300, 0x33FF, 55}, // CJK Compatibility 163 { 0x3400, 0x4DBF, 59}, // CJK Unified Ideographs Extension A 164 { 0x4DC0, 0x4DFF, 99}, // Yijing Hexagram Symbols 165 { 0x4E00, 0x9FFF, 59}, // CJK Unified Ideographs 166 { 0xA000, 0xA48F, 83}, // Yi Syllables 167 { 0xA490, 0xA4CF, 83}, // Yi Radicals 168 { 0xA500, 0xA63F, 12}, // Vai 169 { 0xA640, 0xA69F, 9}, // Cyrillic Extended-B 170 { 0xA700, 0xA71F, 5}, // Modifier Tone Letters 171 { 0xA720, 0xA7FF, 29}, // Latin Extended-D 172 { 0xA800, 0xA82F, 100}, // Syloti Nagri 173 { 0xA840, 0xA87F, 53}, // Phags-pa 174 { 0xA880, 0xA8DF, 115}, // Saurashtra 175 { 0xA900, 0xA92F, 116}, // Kayah Li 176 { 0xA930, 0xA95F, 117}, // Rejang 177 { 0xAA00, 0xAA5F, 118}, // Cham 178 { 0xAC00, 0xD7AF, 56}, // Hangul Syllables 179 { 0xD800, 0xDFFF, 57}, // Non-Plane 0 * 180 { 0xE000, 0xF8FF, 60}, // Private Use Area (plane 0) 181 { 0xF900, 0xFAFF, 61}, // CJK Compatibility Ideographs 182 { 0xFB00, 0xFB4F, 62}, // Alphabetic Presentation Forms 183 { 0xFB50, 0xFDFF, 63}, // Arabic Presentation Forms-A 184 { 0xFE00, 0xFE0F, 91}, // Variation Selectors 185 { 0xFE10, 0xFE1F, 65}, // Vertical Forms 186 { 0xFE20, 0xFE2F, 64}, // Combining Half Marks 187 { 0xFE30, 0xFE4F, 65}, // CJK Compatibility Forms 188 { 0xFE50, 0xFE6F, 66}, // Small Form Variants 189 { 0xFE70, 0xFEFF, 67}, // Arabic Presentation Forms-B 190 { 0xFF00, 0xFFEF, 68}, // Halfwidth And Fullwidth Forms 191 { 0xFFF0, 0xFFFF, 69}, // Specials 192 { 0x10000, 0x1007F, 101}, // Linear B Syllabary 193 { 0x10080, 0x100FF, 101}, // Linear B Ideograms 194 { 0x10100, 0x1013F, 101}, // Aegean Numbers 195 { 0x10140, 0x1018F, 102}, // Ancient Greek Numbers 196 { 0x10190, 0x101CF, 119}, // Ancient Symbols 197 { 0x101D0, 0x101FF, 120}, // Phaistos Disc 198 { 0x10280, 0x1029F, 121}, // Lycian 199 { 0x102A0, 0x102DF, 121}, // Carian 200 { 0x10300, 0x1032F, 85}, // Old Italic 201 { 0x10330, 0x1034F, 86}, // Gothic 202 { 0x10380, 0x1039F, 103}, // Ugaritic 203 { 0x103A0, 0x103DF, 104}, // Old Persian 204 { 0x10400, 0x1044F, 87}, // Deseret 205 { 0x10450, 0x1047F, 105}, // Shavian 206 { 0x10480, 0x104AF, 106}, // Osmanya 207 { 0x10800, 0x1083F, 107}, // Cypriot Syllabary 208 { 0x10900, 0x1091F, 58}, // Phoenician 209 { 0x10920, 0x1093F, 121}, // Lydian 210 { 0x10A00, 0x10A5F, 108}, // Kharoshthi 211 { 0x12000, 0x123FF, 110}, // Cuneiform 212 { 0x12400, 0x1247F, 110}, // Cuneiform Numbers and Punctuation 213 { 0x1D000, 0x1D0FF, 88}, // Byzantine Musical Symbols 214 { 0x1D100, 0x1D1FF, 88}, // Musical Symbols 215 { 0x1D200, 0x1D24F, 88}, // Ancient Greek Musical Notation 216 { 0x1D300, 0x1D35F, 109}, // Tai Xuan Jing Symbols 217 { 0x1D360, 0x1D37F, 111}, // Counting Rod Numerals 218 { 0x1D400, 0x1D7FF, 89}, // Mathematical Alphanumeric Symbols 219 { 0x1F000, 0x1F02F, 122}, // Mahjong Tiles 220 { 0x1F030, 0x1F09F, 122}, // Domino Tiles 221 { 0x20000, 0x2A6DF, 59}, // CJK Unified Ideographs Extension B 222 { 0x2F800, 0x2FA1F, 61}, // CJK Compatibility Ideographs Supplement 223 { 0xE0000, 0xE007F, 92}, // Tags 224 { 0xE0100, 0xE01EF, 91}, // Variation Selectors Supplement 225 { 0xF0000, 0xFFFFD, 90}, // Private Use (plane 15) 226 {0x100000, 0x10FFFD, 90}, // Private Use (plane 16) 227 }; 228 229 /** 230 * _hb_ot_os2_get_unicode_range_bit: 231 * Returns the bit to be set in os/2 ulUnicodeOS2Range for a given codepoint. 232 **/ 233 static unsigned int 234 _hb_ot_os2_get_unicode_range_bit (hb_codepoint_t cp) 235 { 236 OS2Range *range = (OS2Range*) hb_bsearch (&cp, _hb_os2_unicode_ranges, 237 ARRAY_LENGTH (_hb_os2_unicode_ranges), 238 sizeof (OS2Range), 239 OS2Range::cmp); 240 if (range != nullptr) 241 return range->bit; 242 return -1; 243 } 244 245 } /* namespace OT */ 246 247 #endif /* HB_OT_OS2_UNICODE_RANGES_HH */ 248