1 #!/usr/bin/env python 2 3 import collections 4 import copy 5 import glob 6 from os import path 7 import sys 8 from xml.etree import ElementTree 9 10 from fontTools import ttLib 11 12 EMOJI_VS = 0xFE0F 13 14 LANG_TO_SCRIPT = { 15 'as': 'Beng', 16 'bg': 'Cyrl', 17 'bn': 'Beng', 18 'cu': 'Cyrl', 19 'cy': 'Latn', 20 'da': 'Latn', 21 'de': 'Latn', 22 'en': 'Latn', 23 'es': 'Latn', 24 'et': 'Latn', 25 'eu': 'Latn', 26 'fr': 'Latn', 27 'ga': 'Latn', 28 'gu': 'Gujr', 29 'hi': 'Deva', 30 'hr': 'Latn', 31 'hu': 'Latn', 32 'hy': 'Armn', 33 'ja': 'Jpan', 34 'kn': 'Knda', 35 'ko': 'Kore', 36 'ml': 'Mlym', 37 'mn': 'Cyrl', 38 'mr': 'Deva', 39 'nb': 'Latn', 40 'nn': 'Latn', 41 'or': 'Orya', 42 'pa': 'Guru', 43 'pt': 'Latn', 44 'sl': 'Latn', 45 'ta': 'Taml', 46 'te': 'Telu', 47 'tk': 'Latn', 48 } 49 50 def lang_to_script(lang_code): 51 lang = lang_code.lower() 52 while lang not in LANG_TO_SCRIPT: 53 hyphen_idx = lang.rfind('-') 54 assert hyphen_idx != -1, ( 55 'We do not know what script the "%s" language is written in.' 56 % lang_code) 57 assumed_script = lang[hyphen_idx+1:] 58 if len(assumed_script) == 4 and assumed_script.isalpha(): 59 # This is actually the script 60 return assumed_script.title() 61 lang = lang[:hyphen_idx] 62 return LANG_TO_SCRIPT[lang] 63 64 65 def printable(inp): 66 if type(inp) is set: # set of character sequences 67 return '{' + ', '.join([printable(seq) for seq in inp]) + '}' 68 if type(inp) is tuple: # character sequence 69 return '<' + (', '.join([printable(ch) for ch in inp])) + '>' 70 else: # single character 71 return 'U+%04X' % inp 72 73 74 def open_font(font): 75 font_file, index = font 76 font_path = path.join(_fonts_dir, font_file) 77 if index is not None: 78 return ttLib.TTFont(font_path, fontNumber=index) 79 else: 80 return ttLib.TTFont(font_path) 81 82 83 def get_best_cmap(font): 84 ttfont = open_font(font) 85 all_unicode_cmap = None 86 bmp_cmap = None 87 for cmap in ttfont['cmap'].tables: 88 specifier = (cmap.format, cmap.platformID, cmap.platEncID) 89 if specifier == (4, 3, 1): 90 assert bmp_cmap is None, 'More than one BMP cmap in %s' % (font, ) 91 bmp_cmap = cmap 92 elif specifier == (12, 3, 10): 93 assert all_unicode_cmap is None, ( 94 'More than one UCS-4 cmap in %s' % (font, )) 95 all_unicode_cmap = cmap 96 97 return all_unicode_cmap.cmap if all_unicode_cmap else bmp_cmap.cmap 98 99 100 def get_variation_sequences_cmap(font): 101 ttfont = open_font(font) 102 vs_cmap = None 103 for cmap in ttfont['cmap'].tables: 104 specifier = (cmap.format, cmap.platformID, cmap.platEncID) 105 if specifier == (14, 0, 5): 106 assert vs_cmap is None, 'More than one VS cmap in %s' % (font, ) 107 vs_cmap = cmap 108 return vs_cmap 109 110 111 def get_emoji_map(font): 112 # Add normal characters 113 emoji_map = copy.copy(get_best_cmap(font)) 114 reverse_cmap = {glyph: code for code, glyph in emoji_map.items()} 115 116 # Add variation sequences 117 vs_dict = get_variation_sequences_cmap(font).uvsDict 118 for vs in vs_dict: 119 for base, glyph in vs_dict[vs]: 120 if glyph is None: 121 emoji_map[(base, vs)] = emoji_map[base] 122 else: 123 emoji_map[(base, vs)] = glyph 124 125 # Add GSUB rules 126 ttfont = open_font(font) 127 for lookup in ttfont['GSUB'].table.LookupList.Lookup: 128 if lookup.LookupType != 4: 129 # Other lookups are used in the emoji font for fallback. 130 # We ignore them for now. 131 continue 132 for subtable in lookup.SubTable: 133 ligatures = subtable.ligatures 134 for first_glyph in ligatures: 135 for ligature in ligatures[first_glyph]: 136 sequence = [first_glyph] + ligature.Component 137 sequence = [reverse_cmap[glyph] for glyph in sequence] 138 sequence = tuple(sequence) 139 # Make sure no starting subsequence of 'sequence' has been 140 # seen before. 141 for sub_len in range(2, len(sequence)+1): 142 subsequence = sequence[:sub_len] 143 assert subsequence not in emoji_map 144 emoji_map[sequence] = ligature.LigGlyph 145 146 return emoji_map 147 148 149 def assert_font_supports_any_of_chars(font, chars): 150 best_cmap = get_best_cmap(font) 151 for char in chars: 152 if char in best_cmap: 153 return 154 sys.exit('None of characters in %s were found in %s' % (chars, font)) 155 156 157 def assert_font_supports_all_of_chars(font, chars): 158 best_cmap = get_best_cmap(font) 159 for char in chars: 160 assert char in best_cmap, ( 161 'U+%04X was not found in %s' % (char, font)) 162 163 164 def assert_font_supports_none_of_chars(font, chars): 165 best_cmap = get_best_cmap(font) 166 for char in chars: 167 assert char not in best_cmap, ( 168 'U+%04X was found in %s' % (char, font)) 169 170 171 def assert_font_supports_all_sequences(font, sequences): 172 vs_dict = get_variation_sequences_cmap(font).uvsDict 173 for base, vs in sorted(sequences): 174 assert vs in vs_dict and (base, None) in vs_dict[vs], ( 175 '<U+%04X, U+%04X> was not found in %s' % (base, vs, font)) 176 177 178 def check_hyphens(hyphens_dir): 179 # Find all the scripts that need automatic hyphenation 180 scripts = set() 181 for hyb_file in glob.iglob(path.join(hyphens_dir, '*.hyb')): 182 hyb_file = path.basename(hyb_file) 183 assert hyb_file.startswith('hyph-'), ( 184 'Unknown hyphenation file %s' % hyb_file) 185 lang_code = hyb_file[hyb_file.index('-')+1:hyb_file.index('.')] 186 scripts.add(lang_to_script(lang_code)) 187 188 HYPHENS = {0x002D, 0x2010} 189 for script in scripts: 190 fonts = _script_to_font_map[script] 191 assert fonts, 'No fonts found for the "%s" script' % script 192 for font in fonts: 193 assert_font_supports_any_of_chars(font, HYPHENS) 194 195 196 class FontRecord(object): 197 def __init__(self, name, scripts, variant, weight, style, font): 198 self.name = name 199 self.scripts = scripts 200 self.variant = variant 201 self.weight = weight 202 self.style = style 203 self.font = font 204 205 206 def parse_fonts_xml(fonts_xml_path): 207 global _script_to_font_map, _fallback_chain 208 _script_to_font_map = collections.defaultdict(set) 209 _fallback_chain = [] 210 tree = ElementTree.parse(fonts_xml_path) 211 families = tree.findall('family') 212 # Minikin supports up to 254 but users can place their own font at the first 213 # place. Thus, 253 is the maximum allowed number of font families in the 214 # default collection. 215 assert len(families) < 254, ( 216 'System font collection can contains up to 253 font families.') 217 for family in families: 218 name = family.get('name') 219 variant = family.get('variant') 220 langs = family.get('lang') 221 if name: 222 assert variant is None, ( 223 'No variant expected for LGC font %s.' % name) 224 assert langs is None, ( 225 'No language expected for LGC fonts %s.' % name) 226 else: 227 assert variant in {None, 'elegant', 'compact'}, ( 228 'Unexpected value for variant: %s' % variant) 229 230 if langs: 231 langs = langs.split() 232 scripts = {lang_to_script(lang) for lang in langs} 233 else: 234 scripts = set() 235 236 for child in family: 237 assert child.tag == 'font', ( 238 'Unknown tag <%s>' % child.tag) 239 font_file = child.text.rstrip() 240 weight = int(child.get('weight')) 241 assert weight % 100 == 0, ( 242 'Font weight "%d" is not a multiple of 100.' % weight) 243 244 style = child.get('style') 245 assert style in {'normal', 'italic'}, ( 246 'Unknown style "%s"' % style) 247 248 index = child.get('index') 249 if index: 250 index = int(index) 251 252 _fallback_chain.append(FontRecord( 253 name, 254 frozenset(scripts), 255 variant, 256 weight, 257 style, 258 (font_file, index))) 259 260 if name: # non-empty names are used for default LGC fonts 261 map_scripts = {'Latn', 'Grek', 'Cyrl'} 262 else: 263 map_scripts = scripts 264 for script in map_scripts: 265 _script_to_font_map[script].add((font_file, index)) 266 267 268 def check_emoji_coverage(all_emoji, equivalent_emoji): 269 emoji_font = get_emoji_font() 270 check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji) 271 272 273 def get_emoji_font(): 274 emoji_fonts = [ 275 record.font for record in _fallback_chain 276 if 'Zsye' in record.scripts] 277 assert len(emoji_fonts) == 1, 'There are %d emoji fonts.' % len(emoji_fonts) 278 return emoji_fonts[0] 279 280 281 def check_emoji_font_coverage(emoji_font, all_emoji, equivalent_emoji): 282 coverage = get_emoji_map(emoji_font) 283 for sequence in all_emoji: 284 assert sequence in coverage, ( 285 '%s is not supported in the emoji font.' % printable(sequence)) 286 287 for sequence in coverage: 288 if sequence in {0x0000, 0x000D, 0x0020}: 289 # The font needs to support a few extra characters, which is OK 290 continue 291 assert sequence in all_emoji, ( 292 'Emoji font should not support %s.' % printable(sequence)) 293 294 for first, second in sorted(equivalent_emoji.items()): 295 assert coverage[first] == coverage[second], ( 296 '%s and %s should map to the same glyph.' % ( 297 printable(first), 298 printable(second))) 299 300 for glyph in set(coverage.values()): 301 maps_to_glyph = [seq for seq in coverage if coverage[seq] == glyph] 302 if len(maps_to_glyph) > 1: 303 # There are more than one sequences mapping to the same glyph. We 304 # need to make sure they were expected to be equivalent. 305 equivalent_seqs = set() 306 for seq in maps_to_glyph: 307 equivalent_seq = seq 308 while equivalent_seq in equivalent_emoji: 309 equivalent_seq = equivalent_emoji[equivalent_seq] 310 equivalent_seqs.add(equivalent_seq) 311 assert len(equivalent_seqs) == 1, ( 312 'The sequences %s should not result in the same glyph %s' % ( 313 printable(equivalent_seqs), 314 glyph)) 315 316 317 def check_emoji_defaults(default_emoji): 318 missing_text_chars = _emoji_properties['Emoji'] - default_emoji 319 emoji_font_seen = False 320 for record in _fallback_chain: 321 if 'Zsye' in record.scripts: 322 emoji_font_seen = True 323 # No need to check the emoji font 324 continue 325 # For later fonts, we only check them if they have a script 326 # defined, since the defined script may get them to a higher 327 # score even if they appear after the emoji font. However, 328 # we should skip checking the text symbols font, since 329 # symbol fonts should be able to override the emoji display 330 # style when 'Zsym' is explicitly specified by the user. 331 if emoji_font_seen and (not record.scripts or 'Zsym' in record.scripts): 332 continue 333 334 # Check default emoji-style characters 335 assert_font_supports_none_of_chars(record.font, sorted(default_emoji)) 336 337 # Mark default text-style characters appearing in fonts above the emoji 338 # font as seen 339 if not emoji_font_seen: 340 missing_text_chars -= set(get_best_cmap(record.font)) 341 342 # Noto does not have monochrome glyphs for Unicode 7.0 wingdings and 343 # webdings yet. 344 missing_text_chars -= _chars_by_age['7.0'] 345 assert missing_text_chars == set(), ( 346 'Text style version of some emoji characters are missing: ' + 347 repr(missing_text_chars)) 348 349 350 # Setting reverse to true returns a dictionary that maps the values to sets of 351 # characters, useful for some binary properties. Otherwise, we get a 352 # dictionary that maps characters to the property values, assuming there's only 353 # one property in the file. 354 def parse_unicode_datafile(file_path, reverse=False): 355 if reverse: 356 output_dict = collections.defaultdict(set) 357 else: 358 output_dict = {} 359 with open(file_path) as datafile: 360 for line in datafile: 361 if '#' in line: 362 line = line[:line.index('#')] 363 line = line.strip() 364 if not line: 365 continue 366 367 chars, prop = line.split(';')[:2] 368 chars = chars.strip() 369 prop = prop.strip() 370 371 if ' ' in chars: # character sequence 372 sequence = [int(ch, 16) for ch in chars.split(' ')] 373 additions = [tuple(sequence)] 374 elif '..' in chars: # character range 375 char_start, char_end = chars.split('..') 376 char_start = int(char_start, 16) 377 char_end = int(char_end, 16) 378 additions = xrange(char_start, char_end+1) 379 else: # singe character 380 additions = [int(chars, 16)] 381 if reverse: 382 output_dict[prop].update(additions) 383 else: 384 for addition in additions: 385 assert addition not in output_dict 386 output_dict[addition] = prop 387 return output_dict 388 389 390 def parse_emoji_variants(file_path): 391 emoji_set = set() 392 text_set = set() 393 with open(file_path) as datafile: 394 for line in datafile: 395 if '#' in line: 396 line = line[:line.index('#')] 397 line = line.strip() 398 if not line: 399 continue 400 sequence, description, _ = line.split(';') 401 sequence = sequence.strip().split(' ') 402 base = int(sequence[0], 16) 403 vs = int(sequence[1], 16) 404 description = description.strip() 405 if description == 'text style': 406 text_set.add((base, vs)) 407 elif description == 'emoji style': 408 emoji_set.add((base, vs)) 409 return text_set, emoji_set 410 411 412 def parse_ucd(ucd_path): 413 global _emoji_properties, _chars_by_age 414 global _text_variation_sequences, _emoji_variation_sequences 415 global _emoji_sequences, _emoji_zwj_sequences 416 _emoji_properties = parse_unicode_datafile( 417 path.join(ucd_path, 'emoji-data.txt'), reverse=True) 418 emoji_properties_additions = parse_unicode_datafile( 419 path.join(ucd_path, 'additions', 'emoji-data.txt'), reverse=True) 420 for prop in emoji_properties_additions.keys(): 421 _emoji_properties[prop].update(emoji_properties_additions[prop]) 422 423 _chars_by_age = parse_unicode_datafile( 424 path.join(ucd_path, 'DerivedAge.txt'), reverse=True) 425 sequences = parse_emoji_variants( 426 path.join(ucd_path, 'emoji-variation-sequences.txt')) 427 _text_variation_sequences, _emoji_variation_sequences = sequences 428 _emoji_sequences = parse_unicode_datafile( 429 path.join(ucd_path, 'emoji-sequences.txt')) 430 _emoji_sequences.update(parse_unicode_datafile( 431 path.join(ucd_path, 'additions', 'emoji-sequences.txt'))) 432 _emoji_zwj_sequences = parse_unicode_datafile( 433 path.join(ucd_path, 'emoji-zwj-sequences.txt')) 434 _emoji_zwj_sequences.update(parse_unicode_datafile( 435 path.join(ucd_path, 'additions', 'emoji-zwj-sequences.txt'))) 436 437 438 def flag_sequence(territory_code): 439 return tuple(0x1F1E6 + ord(ch) - ord('A') for ch in territory_code) 440 441 442 UNSUPPORTED_FLAGS = frozenset({ 443 flag_sequence('BL'), flag_sequence('BQ'), flag_sequence('DG'), 444 flag_sequence('EA'), flag_sequence('EH'), flag_sequence('FK'), 445 flag_sequence('GF'), flag_sequence('GP'), flag_sequence('GS'), 446 flag_sequence('MF'), flag_sequence('MQ'), flag_sequence('NC'), 447 flag_sequence('PM'), flag_sequence('RE'), flag_sequence('TF'), 448 flag_sequence('WF'), flag_sequence('XK'), flag_sequence('YT'), 449 }) 450 451 EQUIVALENT_FLAGS = { 452 flag_sequence('BV'): flag_sequence('NO'), 453 flag_sequence('CP'): flag_sequence('FR'), 454 flag_sequence('HM'): flag_sequence('AU'), 455 flag_sequence('SJ'): flag_sequence('NO'), 456 flag_sequence('UM'): flag_sequence('US'), 457 } 458 459 COMBINING_KEYCAP = 0x20E3 460 461 LEGACY_ANDROID_EMOJI = { 462 0xFE4E5: flag_sequence('JP'), 463 0xFE4E6: flag_sequence('US'), 464 0xFE4E7: flag_sequence('FR'), 465 0xFE4E8: flag_sequence('DE'), 466 0xFE4E9: flag_sequence('IT'), 467 0xFE4EA: flag_sequence('GB'), 468 0xFE4EB: flag_sequence('ES'), 469 0xFE4EC: flag_sequence('RU'), 470 0xFE4ED: flag_sequence('CN'), 471 0xFE4EE: flag_sequence('KR'), 472 0xFE82C: (ord('#'), COMBINING_KEYCAP), 473 0xFE82E: (ord('1'), COMBINING_KEYCAP), 474 0xFE82F: (ord('2'), COMBINING_KEYCAP), 475 0xFE830: (ord('3'), COMBINING_KEYCAP), 476 0xFE831: (ord('4'), COMBINING_KEYCAP), 477 0xFE832: (ord('5'), COMBINING_KEYCAP), 478 0xFE833: (ord('6'), COMBINING_KEYCAP), 479 0xFE834: (ord('7'), COMBINING_KEYCAP), 480 0xFE835: (ord('8'), COMBINING_KEYCAP), 481 0xFE836: (ord('9'), COMBINING_KEYCAP), 482 0xFE837: (ord('0'), COMBINING_KEYCAP), 483 } 484 485 ZWJ_IDENTICALS = { 486 # KISS 487 (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F48B, 0x200D, 0x1F468): 0x1F48F, 488 # COUPLE WITH HEART 489 (0x1F469, 0x200D, 0x2764, 0x200D, 0x1F468): 0x1F491, 490 # FAMILY 491 (0x1F468, 0x200D, 0x1F469, 0x200D, 0x1F466): 0x1F46A, 492 } 493 494 ZWJ = 0x200D 495 FEMALE_SIGN = 0x2640 496 MALE_SIGN = 0x2642 497 498 GENDER_DEFAULTS = [ 499 (0x26F9, MALE_SIGN), # PERSON WITH BALL 500 (0x1F3C3, MALE_SIGN), # RUNNER 501 (0x1F3C4, MALE_SIGN), # SURFER 502 (0x1F3CA, MALE_SIGN), # SWIMMER 503 (0x1F3CB, MALE_SIGN), # WEIGHT LIFTER 504 (0x1F3CC, MALE_SIGN), # GOLFER 505 (0x1F46E, MALE_SIGN), # POLICE OFFICER 506 (0x1F46F, FEMALE_SIGN), # WOMAN WITH BUNNY EARS 507 (0x1F471, MALE_SIGN), # PERSON WITH BLOND HAIR 508 (0x1F473, MALE_SIGN), # MAN WITH TURBAN 509 (0x1F477, MALE_SIGN), # CONSTRUCTION WORKER 510 (0x1F481, FEMALE_SIGN), # INFORMATION DESK PERSON 511 (0x1F482, MALE_SIGN), # GUARDSMAN 512 (0x1F486, FEMALE_SIGN), # FACE MASSAGE 513 (0x1F487, FEMALE_SIGN), # HAIRCUT 514 (0x1F575, MALE_SIGN), # SLEUTH OR SPY 515 (0x1F645, FEMALE_SIGN), # FACE WITH NO GOOD GESTURE 516 (0x1F646, FEMALE_SIGN), # FACE WITH OK GESTURE 517 (0x1F647, MALE_SIGN), # PERSON BOWING DEEPLY 518 (0x1F64B, FEMALE_SIGN), # HAPPY PERSON RAISING ONE HAND 519 (0x1F64D, FEMALE_SIGN), # PERSON FROWNING 520 (0x1F64E, FEMALE_SIGN), # PERSON WITH POUTING FACE 521 (0x1F6A3, MALE_SIGN), # ROWBOAT 522 (0x1F6B4, MALE_SIGN), # BICYCLIST 523 (0x1F6B5, MALE_SIGN), # MOUNTAIN BICYCLIST 524 (0x1F6B6, MALE_SIGN), # PEDESTRIAN 525 (0x1F926, FEMALE_SIGN), # FACE PALM 526 (0x1F937, FEMALE_SIGN), # SHRUG 527 (0x1F938, MALE_SIGN), # PERSON DOING CARTWHEEL 528 (0x1F939, MALE_SIGN), # JUGGLING 529 (0x1F93C, MALE_SIGN), # WRESTLERS 530 (0x1F93D, MALE_SIGN), # WATER POLO 531 (0x1F93E, MALE_SIGN), # HANDBALL 532 (0x1F9D6, FEMALE_SIGN), # PERSON IN STEAMY ROOM 533 (0x1F9D7, FEMALE_SIGN), # PERSON CLIMBING 534 (0x1F9D8, FEMALE_SIGN), # PERSON IN LOTUS POSITION 535 (0x1F9D9, FEMALE_SIGN), # MAGE 536 (0x1F9DA, FEMALE_SIGN), # FAIRY 537 (0x1F9DB, FEMALE_SIGN), # VAMPIRE 538 (0x1F9DC, FEMALE_SIGN), # MERPERSON 539 (0x1F9DD, FEMALE_SIGN), # ELF 540 (0x1F9DE, FEMALE_SIGN), # GENIE 541 (0x1F9DF, FEMALE_SIGN), # ZOMBIE 542 ] 543 544 def is_fitzpatrick_modifier(cp): 545 return 0x1F3FB <= cp <= 0x1F3FF 546 547 548 def reverse_emoji(seq): 549 rev = list(reversed(seq)) 550 # if there are fitzpatrick modifiers in the sequence, keep them after 551 # the emoji they modify 552 for i in xrange(1, len(rev)): 553 if is_fitzpatrick_modifier(rev[i-1]): 554 rev[i], rev[i-1] = rev[i-1], rev[i] 555 return tuple(rev) 556 557 558 def compute_expected_emoji(): 559 equivalent_emoji = {} 560 sequence_pieces = set() 561 all_sequences = set() 562 all_sequences.update(_emoji_variation_sequences) 563 564 # add zwj sequences not in the current emoji-zwj-sequences.txt 565 adjusted_emoji_zwj_sequences = dict(_emoji_zwj_sequences) 566 adjusted_emoji_zwj_sequences.update(_emoji_zwj_sequences) 567 568 # Add empty flag tag sequence that is supported as fallback 569 _emoji_sequences[(0x1F3F4, 0xE007F)] = 'Emoji_Tag_Sequence' 570 571 for sequence in _emoji_sequences.keys(): 572 sequence = tuple(ch for ch in sequence if ch != EMOJI_VS) 573 all_sequences.add(sequence) 574 sequence_pieces.update(sequence) 575 if _emoji_sequences.get(sequence, None) == 'Emoji_Tag_Sequence': 576 # Add reverse of all emoji ZWJ sequences, which are added to the 577 # fonts as a workaround to get the sequences work in RTL text. 578 # TODO: test if these are actually needed by Minikin/HarfBuzz. 579 reversed_seq = reverse_emoji(sequence) 580 all_sequences.add(reversed_seq) 581 equivalent_emoji[reversed_seq] = sequence 582 583 for sequence in adjusted_emoji_zwj_sequences.keys(): 584 sequence = tuple(ch for ch in sequence if ch != EMOJI_VS) 585 all_sequences.add(sequence) 586 sequence_pieces.update(sequence) 587 # Add reverse of all emoji ZWJ sequences, which are added to the fonts 588 # as a workaround to get the sequences work in RTL text. 589 reversed_seq = reverse_emoji(sequence) 590 all_sequences.add(reversed_seq) 591 equivalent_emoji[reversed_seq] = sequence 592 593 # Remove unsupported flags 594 all_sequences.difference_update(UNSUPPORTED_FLAGS) 595 596 # Add all tag characters used in flags 597 sequence_pieces.update(range(0xE0030, 0xE0039 + 1)) 598 sequence_pieces.update(range(0xE0061, 0xE007A + 1)) 599 600 all_emoji = ( 601 _emoji_properties['Emoji'] | 602 all_sequences | 603 sequence_pieces | 604 set(LEGACY_ANDROID_EMOJI.keys())) 605 default_emoji = ( 606 _emoji_properties['Emoji_Presentation'] | 607 all_sequences | 608 set(LEGACY_ANDROID_EMOJI.keys())) 609 610 equivalent_emoji.update(EQUIVALENT_FLAGS) 611 equivalent_emoji.update(LEGACY_ANDROID_EMOJI) 612 equivalent_emoji.update(ZWJ_IDENTICALS) 613 614 for ch, gender in GENDER_DEFAULTS: 615 equivalent_emoji[(ch, ZWJ, gender)] = ch 616 for skin_tone in range(0x1F3FB, 0x1F3FF+1): 617 skin_toned = (ch, skin_tone, ZWJ, gender) 618 if skin_toned in all_emoji: 619 equivalent_emoji[skin_toned] = (ch, skin_tone) 620 621 for seq in _emoji_variation_sequences: 622 equivalent_emoji[seq] = seq[0] 623 624 return all_emoji, default_emoji, equivalent_emoji 625 626 627 def check_vertical_metrics(): 628 for record in _fallback_chain: 629 if record.name in ['sans-serif', 'sans-serif-condensed']: 630 font = open_font(record.font) 631 assert font['head'].yMax == 2163 and font['head'].yMin == -555, ( 632 'yMax and yMin of %s do not match expected values.' % ( 633 record.font,)) 634 635 if record.name in ['sans-serif', 'sans-serif-condensed', 636 'serif', 'monospace']: 637 font = open_font(record.font) 638 assert (font['hhea'].ascent == 1900 and 639 font['hhea'].descent == -500), ( 640 'ascent and descent of %s do not match expected ' 641 'values.' % (record.font,)) 642 643 644 def check_cjk_punctuation(): 645 cjk_scripts = {'Hans', 'Hant', 'Jpan', 'Kore'} 646 cjk_punctuation = range(0x3000, 0x301F + 1) 647 for record in _fallback_chain: 648 if record.scripts.intersection(cjk_scripts): 649 # CJK font seen. Stop checking the rest of the fonts. 650 break 651 assert_font_supports_none_of_chars(record.font, cjk_punctuation) 652 653 654 def main(): 655 global _fonts_dir 656 target_out = sys.argv[1] 657 _fonts_dir = path.join(target_out, 'fonts') 658 659 fonts_xml_path = path.join(target_out, 'etc', 'fonts.xml') 660 parse_fonts_xml(fonts_xml_path) 661 662 check_vertical_metrics() 663 664 hyphens_dir = path.join(target_out, 'usr', 'hyphen-data') 665 check_hyphens(hyphens_dir) 666 667 check_cjk_punctuation() 668 669 check_emoji = sys.argv[2] 670 if check_emoji == 'true': 671 ucd_path = sys.argv[3] 672 parse_ucd(ucd_path) 673 all_emoji, default_emoji, equivalent_emoji = compute_expected_emoji() 674 check_emoji_coverage(all_emoji, equivalent_emoji) 675 check_emoji_defaults(default_emoji) 676 677 678 if __name__ == '__main__': 679 main() 680