Home | History | Annotate | Download | only in emoji-compat
      1 #!/usr/bin/python
      2 #
      3 # Copyright (C) 2017 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 """
     18 Creates the EmojiCompat font with the metadata. Metadata is embedded in FlatBuffers binary format
     19 under a meta tag with name 'Emji'.
     20 
     21 In order to create the final font the followings are used as inputs:
     22 
     23 - NotoColorEmoji.ttf: Emoji font in the Android framework. Currently at
     24 external/noto-fonts/emoji/NotoColorEmoji.ttf
     25 
     26 - Unicode files: Unicode files that are in the framework, and lists information about all the
     27 emojis. These files are emoji-data.txt, emoji-sequences.txt, emoji-zwj-sequences.txt,
     28 and emoji-variation-sequences.txt. Currently at external/unicode/.
     29 
     30 - additions/emoji-zwj-sequences.txt: Includes emojis that are not defined in Unicode files, but are
     31 in the Android font. Resides in framework and currently under external/unicode/.
     32 
     33 - data/emoji_metadata.txt: The file that includes the id, codepoints, the first
     34 Android OS version that the emoji was added (sdkAdded), and finally the first EmojiCompat font
     35 version that the emoji was added (compatAdded). Updated when the script is executed.
     36 
     37 - data/emoji_metadata.fbs: The flatbuffer schema file. See http://google.github.io/flatbuffers/.
     38 
     39 After execution the following files are generated if they don't exist otherwise, they are updated:
     40 - font/NotoColorEmojiCompat.ttf
     41 - supported-emojis/emojis.txt
     42 - data/emoji_metadata.txt
     43 - src/java/android/support/text/emoji/flatbuffer/*
     44 """
     45 
     46 from __future__ import print_function
     47 
     48 import contextlib
     49 import csv
     50 import hashlib
     51 import itertools
     52 import json
     53 import os
     54 import shutil
     55 import sys
     56 import tempfile
     57 from fontTools import ttLib
     58 
     59 ########### UPDATE OR CHECK WHEN A NEW FONT IS BEING GENERATED ###########
     60 # Last Android SDK Version
     61 SDK_VERSION = 26
     62 # metadata version that will be embedded into font. If there are updates to the font that would
     63 # cause data/emoji_metadata.txt to change, this integer number should be incremented. This number
     64 # defines in which EmojiCompat metadata version the emoji is added to the font.
     65 METADATA_VERSION = 2
     66 
     67 ####### main directories where output files are created #######
     68 SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__))
     69 FONT_DIR = os.path.join(SCRIPT_DIR, 'font')
     70 DATA_DIR = os.path.join(SCRIPT_DIR, 'data')
     71 SUPPORTED_EMOJIS_DIR = os.path.join(SCRIPT_DIR, 'supported-emojis')
     72 JAVA_SRC_DIR = os.path.join(SCRIPT_DIR, 'src', 'java')
     73 ####### output files #######
     74 # font file
     75 FONT_PATH = os.path.join(FONT_DIR, 'NotoColorEmojiCompat.ttf')
     76 # emoji metadata json output file
     77 OUTPUT_META_FILE = os.path.join(DATA_DIR, 'emoji_metadata.txt')
     78 # emojis test file
     79 TEST_DATA_PATH = os.path.join(SUPPORTED_EMOJIS_DIR, 'emojis.txt')
     80 ####### input files #######
     81 # Unicode file names to read emoji data
     82 EMOJI_DATA_FILE = 'emoji-data.txt'
     83 EMOJI_SEQ_FILE = 'emoji-sequences.txt'
     84 EMOJI_ZWJ_FILE = 'emoji-zwj-sequences.txt'
     85 EMOJI_VARIATION_SEQ_FILE = 'emoji-variation-sequences.txt'
     86 # Android OS emoji file for emojis that are not in Unicode files
     87 ANDROID_EMOJI_ZWJ_SEQ_FILE = os.path.join('additions', 'emoji-zwj-sequences.txt')
     88 ANDROID_EMOJIS_SEQ_FILE = os.path.join('additions', 'emoji-sequences.txt')
     89 # Android OS emoji style override file. Codepoints that are rendered with emoji style by default
     90 # even though not defined so in <code>emoji-data.txt</code>.
     91 EMOJI_STYLE_OVERRIDE_FILE = os.path.join('additions', 'emoji-data.txt')
     92 # emoji metadata file
     93 INPUT_META_FILE = OUTPUT_META_FILE
     94 # flatbuffer schema
     95 FLATBUFFER_SCHEMA = os.path.join(DATA_DIR, 'emoji_metadata.fbs')
     96 # file path for java header, it will be prepended to flatbuffer java files
     97 FLATBUFFER_HEADER = os.path.join(DATA_DIR, "flatbuffer_header.txt")
     98 # temporary emoji metadata json output file
     99 OUTPUT_JSON_FILE_NAME = 'emoji_metadata.json'
    100 # temporary binary file generated by flatbuffer
    101 FLATBUFFER_BIN = 'emoji_metadata.bin'
    102 # directory representation for flatbuffer java package
    103 FLATBUFFER_PACKAGE_PATH = os.path.join('androidx', 'text', 'emoji', 'flatbuffer', '')
    104 # temporary directory that contains flatbuffer java files
    105 FLATBUFFER_JAVA_PATH = os.path.join(FLATBUFFER_PACKAGE_PATH)
    106 FLATBUFFER_METADATA_LIST_JAVA = "MetadataList.java"
    107 FLATBUFFER_METADATA_ITEM_JAVA = "MetadataItem.java"
    108 # directory under source where flatbuffer java files will be copied into
    109 FLATBUFFER_JAVA_TARGET = os.path.join(JAVA_SRC_DIR, FLATBUFFER_PACKAGE_PATH)
    110 # meta tag name used in the font to embed the emoji metadata. This value is also used in
    111 # MetadataListReader.java in order to locate the metadata location.
    112 EMOJI_META_TAG_NAME = 'Emji'
    113 
    114 EMOJI_PRESENTATION_STR = 'EMOJI_PRESENTATION'
    115 STD_VARIANTS_EMOJI_STYLE = 'EMOJI STYLE'
    116 
    117 DEFAULT_EMOJI_ID = 0xF0001
    118 EMOJI_STYLE_VS = 0xFE0F
    119 
    120 def to_hex_str(value):
    121     """Converts given int value to hex without the 0x prefix"""
    122     return format(value, 'X')
    123 
    124 def hex_str_to_int(string):
    125     """Convert a hex string into int"""
    126     return int(string, 16)
    127 
    128 def codepoint_to_string(codepoints):
    129     """Converts a list of codepoints into a string separated with space."""
    130     return ' '.join([to_hex_str(x) for x in codepoints])
    131 
    132 def prepend_header_to_file(file_path):
    133     """Prepends the header to the file. Used to update flatbuffer java files with header, comments
    134     and annotations."""
    135     with open(file_path, "r+") as original_file:
    136         with open(FLATBUFFER_HEADER, "r") as copyright_file:
    137             original_content = original_file.read()
    138             start_index = original_content.index("public final class")
    139             original_file.seek(0)
    140             original_file.write(copyright_file.read() + "\n" + original_content[start_index:])
    141 
    142 
    143 def update_flatbuffer_java_files(flatbuffer_java_dir):
    144     """Prepends headers to flatbuffer java files and copies to the final destination"""
    145     tmp_metadata_list = flatbuffer_java_dir + FLATBUFFER_METADATA_LIST_JAVA
    146     tmp_metadata_item = flatbuffer_java_dir + FLATBUFFER_METADATA_ITEM_JAVA
    147     prepend_header_to_file(tmp_metadata_list)
    148     prepend_header_to_file(tmp_metadata_item)
    149 
    150     if not os.path.exists(FLATBUFFER_JAVA_TARGET):
    151         os.makedirs(FLATBUFFER_JAVA_TARGET)
    152 
    153     shutil.copy(tmp_metadata_list, FLATBUFFER_JAVA_TARGET + FLATBUFFER_METADATA_LIST_JAVA)
    154     shutil.copy(tmp_metadata_item, FLATBUFFER_JAVA_TARGET + FLATBUFFER_METADATA_ITEM_JAVA)
    155 
    156 def create_test_data(unicode_path):
    157     """Read all the emojis in the unicode files and update the test file"""
    158     lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_ZWJ_FILE))
    159     lines += read_emoji_lines(os.path.join(unicode_path, EMOJI_SEQ_FILE))
    160 
    161     lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), optional=True)
    162     lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), optional=True)
    163 
    164     # standardized variants contains a huge list of sequences, only read the ones that are emojis
    165     # and also the ones with FE0F (emoji style)
    166     standardized_variants_lines = read_emoji_lines(
    167         os.path.join(unicode_path, EMOJI_VARIATION_SEQ_FILE))
    168     for line in standardized_variants_lines:
    169         if STD_VARIANTS_EMOJI_STYLE in line:
    170             lines.append(line)
    171 
    172     emojis_set = set()
    173     for line in lines:
    174         codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
    175         emojis_set.add(codepoint_to_string(codepoints).upper())
    176 
    177     emoji_data_lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_DATA_FILE))
    178     for line in emoji_data_lines:
    179         codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
    180         is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
    181         if is_emoji_style:
    182             codepoints = [to_hex_str(x) for x in
    183                           codepoints_for_emojirange(codepoints_range)]
    184             emojis_set.update(codepoints)
    185 
    186     emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
    187     #  finally add the android default emoji exceptions
    188     emojis_set.update([to_hex_str(x) for x in emoji_style_exceptions])
    189 
    190     emojis_list = list(emojis_set)
    191     emojis_list.sort()
    192     with open(TEST_DATA_PATH, "w") as test_file:
    193         for line in emojis_list:
    194             test_file.write("%s\n" % line)
    195 
    196 class _EmojiData(object):
    197     """Holds the information about a single emoji."""
    198 
    199     def __init__(self, codepoints, is_emoji_style):
    200         self.codepoints = codepoints
    201         self.emoji_style = is_emoji_style
    202         self.emoji_id = 0
    203         self.width = 0
    204         self.height = 0
    205         self.sdk_added = SDK_VERSION
    206         self.compat_added = METADATA_VERSION
    207 
    208     def update_metrics(self, metrics):
    209         """Updates width/height instance variables with the values given in metrics dictionary.
    210         :param metrics: a dictionary object that has width and height values.
    211         """
    212         self.width = metrics.width
    213         self.height = metrics.height
    214 
    215     def __repr__(self):
    216         return '<EmojiData {0} - {1}>'.format(self.emoji_style,
    217                                               codepoint_to_string(self.codepoints))
    218 
    219     def create_json_element(self):
    220         """Creates the json representation of EmojiData."""
    221         json_element = {}
    222         json_element['id'] = self.emoji_id
    223         json_element['emojiStyle'] = self.emoji_style
    224         json_element['sdkAdded'] = self.sdk_added
    225         json_element['compatAdded'] = self.compat_added
    226         json_element['width'] = self.width
    227         json_element['height'] = self.height
    228         json_element['codepoints'] = self.codepoints
    229         return json_element
    230 
    231     def create_txt_row(self):
    232         """Creates array of values for CSV of EmojiData."""
    233         row = [to_hex_str(self.emoji_id), self.sdk_added, self.compat_added]
    234         row += [to_hex_str(x) for x in self.codepoints]
    235         return row
    236 
    237     def update(self, emoji_id, sdk_added, compat_added):
    238         """Updates current EmojiData with the values in a json element"""
    239         self.emoji_id = emoji_id
    240         self.sdk_added = sdk_added
    241         self.compat_added = compat_added
    242 
    243 
    244 def read_emoji_lines(file_path, optional=False):
    245     """Read all lines in an unicode emoji file into a list of uppercase strings. Ignore the empty
    246     lines and comments
    247     :param file_path: unicode emoji file path
    248     :param optional: if True no exception is raised when the file cannot be read
    249     :return: list of uppercase strings
    250     """
    251     result = []
    252     try:
    253         with open(file_path) as file_stream:
    254             for line in file_stream:
    255                 line = line.strip()
    256                 if line and not line.startswith('#'):
    257                     result.append(line.upper())
    258     except IOError:
    259         if optional:
    260             pass
    261         else:
    262             raise
    263 
    264     return result
    265 
    266 def get_emoji_style_exceptions(unicode_path):
    267     """Read EMOJI_STYLE_OVERRIDE_FILE and return the codepoints as integers"""
    268     lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_STYLE_OVERRIDE_FILE))
    269     exceptions = []
    270     for line in lines:
    271         codepoint = hex_str_to_int(codepoints_and_emoji_prop(line)[0])
    272         exceptions.append(codepoint)
    273     return exceptions
    274 
    275 def codepoints_for_emojirange(codepoints_range):
    276     """ Return codepoints given in emoji files. Expand the codepoints that are given as a range
    277     such as XYZ ... UVT
    278     """
    279     codepoints = []
    280     if '..' in codepoints_range:
    281         range_start, range_end = codepoints_range.split('..')
    282         codepoints_range = range(hex_str_to_int(range_start),
    283                                  hex_str_to_int(range_end) + 1)
    284         codepoints.extend(codepoints_range)
    285     else:
    286         codepoints.append(hex_str_to_int(codepoints_range))
    287     return codepoints
    288 
    289 def codepoints_and_emoji_prop(line):
    290     """For a given emoji file line, return codepoints and emoji property in the line.
    291     1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base] # [...]"""
    292     line = line.strip()
    293     if '#' in line:
    294         line = line[:line.index('#')]
    295     else:
    296         raise ValueError("Line is expected to have # in it")
    297     line = line.split(';')
    298     codepoints_range = line[0].strip()
    299     emoji_property = line[1].strip()
    300 
    301     return codepoints_range, emoji_property
    302 
    303 def read_emoji_intervals(emoji_data_map, file_path, emoji_style_exceptions):
    304     """Read unicode lines of unicode emoji file in which each line describes a set of codepoint
    305     intervals. Expands the interval on a line and inserts related EmojiDatas into emoji_data_map.
    306     A line format that is expected is as follows:
    307     1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base] # [...]"""
    308     lines = read_emoji_lines(file_path)
    309 
    310     for line in lines:
    311         codepoints_range, emoji_property = codepoints_and_emoji_prop(line)
    312         is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR
    313         codepoints = codepoints_for_emojirange(codepoints_range)
    314 
    315         for codepoint in codepoints:
    316             key = codepoint_to_string([codepoint])
    317             codepoint_is_emoji_style = is_emoji_style or codepoint in emoji_style_exceptions
    318             if key in emoji_data_map:
    319                 # since there are multiple definitions of emojis, only update when emoji style is
    320                 # True
    321                 if codepoint_is_emoji_style:
    322                     emoji_data_map[key].emoji_style = True
    323             else:
    324                 emoji_data = _EmojiData([codepoint], codepoint_is_emoji_style)
    325                 emoji_data_map[key] = emoji_data
    326 
    327 
    328 def read_emoji_sequences(emoji_data_map, file_path, optional=False):
    329     """Reads the content of the file which contains emoji sequences. Creates EmojiData for each
    330     line and puts into emoji_data_map."""
    331     lines = read_emoji_lines(file_path, optional)
    332     # 1F1E6 1F1E8 ; Name ; [...]
    333     for line in lines:
    334         codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')]
    335         codepoints = [x for x in codepoints if x != EMOJI_STYLE_VS]
    336         key = codepoint_to_string(codepoints)
    337         if not key in emoji_data_map:
    338             emoji_data = _EmojiData(codepoints, False)
    339             emoji_data_map[key] = emoji_data
    340 
    341 
    342 def load_emoji_data_map(unicode_path):
    343     """Reads the emoji data files, constructs a map of space separated codepoints to EmojiData.
    344     :return: map of space separated codepoints to EmojiData
    345     """
    346     emoji_data_map = {}
    347     emoji_style_exceptions = get_emoji_style_exceptions(unicode_path)
    348     read_emoji_intervals(emoji_data_map, os.path.join(unicode_path, EMOJI_DATA_FILE),
    349                          emoji_style_exceptions)
    350     read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_ZWJ_FILE))
    351     read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_SEQ_FILE))
    352 
    353     # Add the optional ANDROID_EMOJI_ZWJ_SEQ_FILE if it exists.
    354     read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE),
    355                          optional=True)
    356     # Add the optional ANDROID_EMOJIS_SEQ_FILE if it exists.
    357     read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE),
    358                          optional=True)
    359 
    360     return emoji_data_map
    361 
    362 
    363 def load_previous_metadata(emoji_data_map):
    364     """Updates emoji data elements in emoji_data_map using the id, sdk_added and compat_added fields
    365        in emoji_metadata.txt. Returns the smallest available emoji id to use. i.e. if the largest
    366        emoji id emoji_metadata.txt is 1, function would return 2. If emoji_metadata.txt does not
    367        exist, or contains no emojis defined returns DEFAULT_EMOJI_ID"""
    368     current_emoji_id = DEFAULT_EMOJI_ID
    369     if os.path.isfile(INPUT_META_FILE):
    370         with open(INPUT_META_FILE) as csvfile:
    371             reader = csv.reader(csvfile, delimiter=' ')
    372             for row in reader:
    373                 if row[0].startswith('#'):
    374                     continue
    375                 emoji_id = hex_str_to_int(row[0])
    376                 sdk_added = int(row[1])
    377                 compat_added = int(row[2])
    378                 key = codepoint_to_string(hex_str_to_int(x) for x in row[3:])
    379                 if key in emoji_data_map:
    380                     emoji_data = emoji_data_map[key]
    381                     emoji_data.update(emoji_id, sdk_added, compat_added)
    382                     if emoji_data.emoji_id >= current_emoji_id:
    383                         current_emoji_id = emoji_data.emoji_id + 1
    384 
    385     return current_emoji_id
    386 
    387 
    388 def update_ttlib_orig_sort():
    389     """Updates the ttLib tag sort with a closure that makes the meta table first."""
    390     orig_sort = ttLib.sortedTagList
    391 
    392     def meta_first_table_sort(tag_list, table_order=None):
    393         """Sorts the tables with the original ttLib sort, then makes the meta table first."""
    394         tag_list = orig_sort(tag_list, table_order)
    395         tag_list.remove('meta')
    396         tag_list.insert(0, 'meta')
    397         return tag_list
    398 
    399     ttLib.sortedTagList = meta_first_table_sort
    400 
    401 
    402 def inject_meta_into_font(ttf, flatbuffer_bin_filename):
    403     """inject metadata binary into font"""
    404     if not 'meta' in ttf:
    405         ttf['meta'] = ttLib.getTableClass('meta')()
    406     meta = ttf['meta']
    407     with open(flatbuffer_bin_filename) as flatbuffer_bin_file:
    408         meta.data[EMOJI_META_TAG_NAME] = flatbuffer_bin_file.read()
    409 
    410     # sort meta tables for faster access
    411     update_ttlib_orig_sort()
    412 
    413 
    414 def validate_input_files(font_path, unicode_path):
    415     """Validate the existence of font file and the unicode files"""
    416     if not os.path.isfile(font_path):
    417         raise ValueError("Font file does not exist: " + font_path)
    418 
    419     if not os.path.isdir(unicode_path):
    420         raise ValueError(
    421             "Unicode directory does not exist or is not a directory " + unicode_path)
    422 
    423     emoji_filenames = [os.path.join(unicode_path, EMOJI_DATA_FILE),
    424                        os.path.join(unicode_path, EMOJI_ZWJ_FILE),
    425                        os.path.join(unicode_path, EMOJI_SEQ_FILE)]
    426     for emoji_filename in emoji_filenames:
    427         if not os.path.isfile(emoji_filename):
    428             raise ValueError("Unicode emoji data file does not exist: " + emoji_filename)
    429 
    430 
    431 def add_file_to_sha(sha_algo, file_path):
    432     with open(file_path, 'rb') as input_file:
    433         for data in iter(lambda: input_file.read(8192), ''):
    434             sha_algo.update(data)
    435 
    436 def create_sha_from_source_files(font_paths):
    437     """Creates a SHA from the given font files"""
    438     sha_algo = hashlib.sha256()
    439     for file_path in font_paths:
    440         add_file_to_sha(sha_algo, file_path)
    441     return sha_algo.hexdigest()
    442 
    443 
    444 class EmojiFontCreator(object):
    445     """Creates the EmojiCompat font"""
    446 
    447     def __init__(self, font_path, unicode_path):
    448         validate_input_files(font_path, unicode_path)
    449 
    450         self.font_path = font_path
    451         self.unicode_path = unicode_path
    452         self.emoji_data_map = {}
    453         self.remapped_codepoints = {}
    454         self.glyph_to_image_metrics_map = {}
    455         # set default emoji id to start of Supplemental Private Use Area-A
    456         self.emoji_id = DEFAULT_EMOJI_ID
    457 
    458     def update_emoji_data(self, codepoints, glyph_name):
    459         """Updates the existing EmojiData identified with codepoints. The fields that are set are:
    460         - emoji_id (if it does not exist)
    461         - image width/height"""
    462         key = codepoint_to_string(codepoints)
    463         if key in self.emoji_data_map:
    464             # add emoji to final data
    465             emoji_data = self.emoji_data_map[key]
    466             emoji_data.update_metrics(self.glyph_to_image_metrics_map[glyph_name])
    467             if emoji_data.emoji_id == 0:
    468                 emoji_data.emoji_id = self.emoji_id
    469                 self.emoji_id = self.emoji_id + 1
    470             self.remapped_codepoints[emoji_data.emoji_id] = glyph_name
    471 
    472     def read_cbdt(self, ttf):
    473         """Read image size data from CBDT."""
    474         cbdt = ttf['CBDT']
    475         for strike_data in cbdt.strikeData:
    476             for key, data in strike_data.iteritems():
    477                 data.decompile()
    478                 self.glyph_to_image_metrics_map[key] = data.metrics
    479 
    480     def read_cmap12(self, ttf, glyph_to_codepoint_map):
    481         """Reads single code point emojis that are in cmap12, updates glyph_to_codepoint_map and
    482         finally clears all elements in CMAP 12"""
    483         cmap = ttf['cmap']
    484         for table in cmap.tables:
    485             if table.format == 12 and table.platformID == 3 and table.platEncID == 10:
    486                 for codepoint, glyph_name in table.cmap.iteritems():
    487                     glyph_to_codepoint_map[glyph_name] = codepoint
    488                     self.update_emoji_data([codepoint], glyph_name)
    489                 return table
    490         raise ValueError("Font doesn't contain cmap with format:12, platformID:3 and platEncID:10")
    491 
    492     def read_gsub(self, ttf, glyph_to_codepoint_map):
    493         """Reads the emoji sequences defined in GSUB and clear all elements under GSUB"""
    494         gsub = ttf['GSUB']
    495         ligature_subtables = []
    496         context_subtables = []
    497         # this code is font dependent, implementing all gsub rules is out of scope of EmojiCompat
    498         # and would be expensive with little value
    499         for lookup in gsub.table.LookupList.Lookup:
    500             for subtable in lookup.SubTable:
    501                 if subtable.LookupType == 5:
    502                     context_subtables.append(subtable)
    503                 elif subtable.LookupType == 4:
    504                     ligature_subtables.append(subtable)
    505 
    506         for subtable in context_subtables:
    507             self.add_gsub_context_subtable(subtable, gsub.table.LookupList, glyph_to_codepoint_map)
    508 
    509         for subtable in ligature_subtables:
    510             self.add_gsub_ligature_subtable(subtable, glyph_to_codepoint_map)
    511 
    512     def add_gsub_context_subtable(self, subtable, lookup_list, glyph_to_codepoint_map):
    513         """Add substitutions defined as OpenType Context Substitution"""
    514         for sub_class_set in subtable.SubClassSet:
    515             if sub_class_set:
    516                 for sub_class_rule in sub_class_set.SubClassRule:
    517                     # prepare holder for substitution list. each rule will have a list that is added
    518                     # to the subs_list.
    519                     subs_list = len(sub_class_rule.SubstLookupRecord) * [None]
    520                     for record in sub_class_rule.SubstLookupRecord:
    521                         subs_list[record.SequenceIndex] = self.get_substitutions(lookup_list,
    522                                                                             record.LookupListIndex)
    523                     # create combinations or all lists. the combinations will be filtered by
    524                     # emoji_data_map. the first element that contain as a valid glyph will be used
    525                     # as the final glyph
    526                     combinations = list(itertools.product(*subs_list))
    527                     for seq in combinations:
    528                         glyph_names = [x["input"] for x in seq]
    529                         codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
    530                         outputs = [x["output"] for x in seq if x["output"]]
    531                         nonempty_outputs = filter(lambda x: x.strip() , outputs)
    532                         if len(nonempty_outputs) == 0:
    533                             print("Warning: no output glyph is set for " + str(glyph_names))
    534                             continue
    535                         elif len(nonempty_outputs) > 1:
    536                             print(
    537                                 "Warning: multiple glyph is set for "
    538                                     + str(glyph_names) + ", will use the first one")
    539 
    540                         glyph = nonempty_outputs[0]
    541                         self.update_emoji_data(codepoints, glyph)
    542 
    543     def get_substitutions(self, lookup_list, index):
    544         result = []
    545         for x in lookup_list.Lookup[index].SubTable:
    546             for input, output in x.mapping.iteritems():
    547                 result.append({"input": input, "output": output})
    548         return result
    549 
    550     def add_gsub_ligature_subtable(self, subtable, glyph_to_codepoint_map):
    551         for name, ligatures in subtable.ligatures.iteritems():
    552             for ligature in ligatures:
    553                 glyph_names = [name] + ligature.Component
    554                 codepoints = [glyph_to_codepoint_map[x] for x in glyph_names]
    555                 self.update_emoji_data(codepoints, ligature.LigGlyph)
    556 
    557     def write_metadata_json(self, output_json_file_path):
    558         """Writes the emojis into a json file"""
    559         output_json = {}
    560         output_json['version'] = METADATA_VERSION
    561         output_json['sourceSha'] = create_sha_from_source_files(
    562             [self.font_path, OUTPUT_META_FILE, FLATBUFFER_SCHEMA])
    563         output_json['list'] = []
    564 
    565         emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
    566 
    567         total_emoji_count = 0
    568         for emoji_data in emoji_data_list:
    569             element = emoji_data.create_json_element()
    570             output_json['list'].append(element)
    571             total_emoji_count = total_emoji_count + 1
    572 
    573         # write the new json file to be processed by FlatBuffers
    574         with open(output_json_file_path, 'w') as json_file:
    575             print(json.dumps(output_json, indent=4, sort_keys=True, separators=(',', ':')),
    576                   file=json_file)
    577 
    578         return total_emoji_count
    579 
    580     def write_metadata_csv(self):
    581         """Writes emoji metadata into space separated file"""
    582         with open(OUTPUT_META_FILE, 'w') as csvfile:
    583             csvwriter = csv.writer(csvfile, delimiter=' ')
    584             emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id)
    585             csvwriter.writerow(['#id', 'sdkAdded', 'compatAdded', 'codepoints'])
    586             for emoji_data in emoji_data_list:
    587                 csvwriter.writerow(emoji_data.create_txt_row())
    588 
    589     def create_font(self):
    590         """Creates the EmojiCompat font.
    591         :param font_path: path to Android NotoColorEmoji font
    592         :param unicode_path: path to directory that contains unicode files
    593         """
    594 
    595         tmp_dir = tempfile.mkdtemp()
    596 
    597         # create emoji codepoints to EmojiData map
    598         self.emoji_data_map = load_emoji_data_map(self.unicode_path)
    599 
    600         # read previous metadata file to update id, sdkAdded and compatAdded. emoji id that is
    601         # returned is either default or 1 greater than the largest id in previous data
    602         self.emoji_id = load_previous_metadata(self.emoji_data_map)
    603 
    604         # recalcTimestamp parameter will keep the modified field same as the original font. Changing
    605         # the modified field in the font causes the font ttf file to change, which makes it harder
    606         # to understand if something really changed in the font.
    607         with contextlib.closing(ttLib.TTFont(self.font_path, recalcTimestamp=False)) as ttf:
    608             # read image size data
    609             self.read_cbdt(ttf)
    610 
    611             # glyph name to codepoint map
    612             glyph_to_codepoint_map = {}
    613 
    614             # read single codepoint emojis under cmap12 and clear the table contents
    615             cmap12_table = self.read_cmap12(ttf, glyph_to_codepoint_map)
    616 
    617             # read emoji sequences gsub and clear the table contents
    618             self.read_gsub(ttf, glyph_to_codepoint_map)
    619 
    620             # add all new codepoint to glyph mappings
    621             cmap12_table.cmap.update(self.remapped_codepoints)
    622 
    623             # final metadata csv will be used to generate the sha, therefore write it before
    624             # metadata json is written.
    625             self.write_metadata_csv()
    626 
    627             output_json_file = os.path.join(tmp_dir, OUTPUT_JSON_FILE_NAME)
    628             flatbuffer_bin_file = os.path.join(tmp_dir, FLATBUFFER_BIN)
    629             flatbuffer_java_dir = os.path.join(tmp_dir, FLATBUFFER_JAVA_PATH)
    630 
    631             total_emoji_count = self.write_metadata_json(output_json_file)
    632 
    633             # create the flatbuffers binary and java classes
    634             sys_command = 'flatc -o {0} -b -j {1} {2}'
    635             os.system(sys_command.format(tmp_dir, FLATBUFFER_SCHEMA, output_json_file))
    636 
    637             # inject metadata binary into font
    638             inject_meta_into_font(ttf, flatbuffer_bin_file)
    639 
    640             # update CBDT and CBLC versions since older android versions cannot read > 2.0
    641             ttf['CBDT'].version = 2.0
    642             ttf['CBLC'].version = 2.0
    643 
    644             # save the new font
    645             ttf.save(FONT_PATH)
    646 
    647             update_flatbuffer_java_files(flatbuffer_java_dir)
    648 
    649             create_test_data(self.unicode_path)
    650 
    651             # clear the tmp output directory
    652             shutil.rmtree(tmp_dir, ignore_errors=True)
    653 
    654             print(
    655                 "{0} emojis are written to\n{1}".format(total_emoji_count, FONT_DIR))
    656 
    657 
    658 def print_usage():
    659     """Prints how to use the script."""
    660     print("Please specify a path to font and unicode files.\n"
    661           "usage: createfont.py noto-color-emoji-path unicode-dir-path")
    662 
    663 
    664 if __name__ == '__main__':
    665     if len(sys.argv) < 3:
    666         print_usage()
    667         sys.exit(1)
    668     EmojiFontCreator(sys.argv[1], sys.argv[2]).create_font()
    669