1 #!/usr/bin/python 2 # 3 # Copyright (C) 2017 The Android Open Source Project 4 # 5 # Licensed under the Apache License, Version 2.0 (the "License"); 6 # you may not use this file except in compliance with the License. 7 # You may obtain a copy of the License at 8 # 9 # http://www.apache.org/licenses/LICENSE-2.0 10 # 11 # Unless required by applicable law or agreed to in writing, software 12 # distributed under the License is distributed on an "AS IS" BASIS, 13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 # See the License for the specific language governing permissions and 15 # limitations under the License. 16 17 """ 18 Creates the EmojiCompat font with the metadata. Metadata is embedded in FlatBuffers binary format 19 under a meta tag with name 'Emji'. 20 21 In order to create the final font the followings are used as inputs: 22 23 - NotoColorEmoji.ttf: Emoji font in the Android framework. Currently at 24 external/noto-fonts/emoji/NotoColorEmoji.ttf 25 26 - Unicode files: Unicode files that are in the framework, and lists information about all the 27 emojis. These files are emoji-data.txt, emoji-sequences.txt, emoji-zwj-sequences.txt, 28 and emoji-variation-sequences.txt. Currently at external/unicode/. 29 30 - additions/emoji-zwj-sequences.txt: Includes emojis that are not defined in Unicode files, but are 31 in the Android font. Resides in framework and currently under external/unicode/. 32 33 - data/emoji_metadata.txt: The file that includes the id, codepoints, the first 34 Android OS version that the emoji was added (sdkAdded), and finally the first EmojiCompat font 35 version that the emoji was added (compatAdded). Updated when the script is executed. 36 37 - data/emoji_metadata.fbs: The flatbuffer schema file. See http://google.github.io/flatbuffers/. 38 39 After execution the following files are generated if they don't exist otherwise, they are updated: 40 - font/NotoColorEmojiCompat.ttf 41 - supported-emojis/emojis.txt 42 - data/emoji_metadata.txt 43 - src/java/android/support/text/emoji/flatbuffer/* 44 """ 45 46 from __future__ import print_function 47 48 import contextlib 49 import csv 50 import hashlib 51 import itertools 52 import json 53 import os 54 import shutil 55 import sys 56 import tempfile 57 from fontTools import ttLib 58 59 ########### UPDATE OR CHECK WHEN A NEW FONT IS BEING GENERATED ########### 60 # Last Android SDK Version 61 SDK_VERSION = 26 62 # metadata version that will be embedded into font. If there are updates to the font that would 63 # cause data/emoji_metadata.txt to change, this integer number should be incremented. This number 64 # defines in which EmojiCompat metadata version the emoji is added to the font. 65 METADATA_VERSION = 2 66 67 ####### main directories where output files are created ####### 68 SCRIPT_DIR = os.path.abspath(os.path.dirname(__file__)) 69 FONT_DIR = os.path.join(SCRIPT_DIR, 'font') 70 DATA_DIR = os.path.join(SCRIPT_DIR, 'data') 71 SUPPORTED_EMOJIS_DIR = os.path.join(SCRIPT_DIR, 'supported-emojis') 72 JAVA_SRC_DIR = os.path.join(SCRIPT_DIR, 'src', 'java') 73 ####### output files ####### 74 # font file 75 FONT_PATH = os.path.join(FONT_DIR, 'NotoColorEmojiCompat.ttf') 76 # emoji metadata json output file 77 OUTPUT_META_FILE = os.path.join(DATA_DIR, 'emoji_metadata.txt') 78 # emojis test file 79 TEST_DATA_PATH = os.path.join(SUPPORTED_EMOJIS_DIR, 'emojis.txt') 80 ####### input files ####### 81 # Unicode file names to read emoji data 82 EMOJI_DATA_FILE = 'emoji-data.txt' 83 EMOJI_SEQ_FILE = 'emoji-sequences.txt' 84 EMOJI_ZWJ_FILE = 'emoji-zwj-sequences.txt' 85 EMOJI_VARIATION_SEQ_FILE = 'emoji-variation-sequences.txt' 86 # Android OS emoji file for emojis that are not in Unicode files 87 ANDROID_EMOJI_ZWJ_SEQ_FILE = os.path.join('additions', 'emoji-zwj-sequences.txt') 88 ANDROID_EMOJIS_SEQ_FILE = os.path.join('additions', 'emoji-sequences.txt') 89 # Android OS emoji style override file. Codepoints that are rendered with emoji style by default 90 # even though not defined so in <code>emoji-data.txt</code>. 91 EMOJI_STYLE_OVERRIDE_FILE = os.path.join('additions', 'emoji-data.txt') 92 # emoji metadata file 93 INPUT_META_FILE = OUTPUT_META_FILE 94 # flatbuffer schema 95 FLATBUFFER_SCHEMA = os.path.join(DATA_DIR, 'emoji_metadata.fbs') 96 # file path for java header, it will be prepended to flatbuffer java files 97 FLATBUFFER_HEADER = os.path.join(DATA_DIR, "flatbuffer_header.txt") 98 # temporary emoji metadata json output file 99 OUTPUT_JSON_FILE_NAME = 'emoji_metadata.json' 100 # temporary binary file generated by flatbuffer 101 FLATBUFFER_BIN = 'emoji_metadata.bin' 102 # directory representation for flatbuffer java package 103 FLATBUFFER_PACKAGE_PATH = os.path.join('androidx', 'text', 'emoji', 'flatbuffer', '') 104 # temporary directory that contains flatbuffer java files 105 FLATBUFFER_JAVA_PATH = os.path.join(FLATBUFFER_PACKAGE_PATH) 106 FLATBUFFER_METADATA_LIST_JAVA = "MetadataList.java" 107 FLATBUFFER_METADATA_ITEM_JAVA = "MetadataItem.java" 108 # directory under source where flatbuffer java files will be copied into 109 FLATBUFFER_JAVA_TARGET = os.path.join(JAVA_SRC_DIR, FLATBUFFER_PACKAGE_PATH) 110 # meta tag name used in the font to embed the emoji metadata. This value is also used in 111 # MetadataListReader.java in order to locate the metadata location. 112 EMOJI_META_TAG_NAME = 'Emji' 113 114 EMOJI_PRESENTATION_STR = 'EMOJI_PRESENTATION' 115 STD_VARIANTS_EMOJI_STYLE = 'EMOJI STYLE' 116 117 DEFAULT_EMOJI_ID = 0xF0001 118 EMOJI_STYLE_VS = 0xFE0F 119 120 def to_hex_str(value): 121 """Converts given int value to hex without the 0x prefix""" 122 return format(value, 'X') 123 124 def hex_str_to_int(string): 125 """Convert a hex string into int""" 126 return int(string, 16) 127 128 def codepoint_to_string(codepoints): 129 """Converts a list of codepoints into a string separated with space.""" 130 return ' '.join([to_hex_str(x) for x in codepoints]) 131 132 def prepend_header_to_file(file_path): 133 """Prepends the header to the file. Used to update flatbuffer java files with header, comments 134 and annotations.""" 135 with open(file_path, "r+") as original_file: 136 with open(FLATBUFFER_HEADER, "r") as copyright_file: 137 original_content = original_file.read() 138 start_index = original_content.index("public final class") 139 original_file.seek(0) 140 original_file.write(copyright_file.read() + "\n" + original_content[start_index:]) 141 142 143 def update_flatbuffer_java_files(flatbuffer_java_dir): 144 """Prepends headers to flatbuffer java files and copies to the final destination""" 145 tmp_metadata_list = flatbuffer_java_dir + FLATBUFFER_METADATA_LIST_JAVA 146 tmp_metadata_item = flatbuffer_java_dir + FLATBUFFER_METADATA_ITEM_JAVA 147 prepend_header_to_file(tmp_metadata_list) 148 prepend_header_to_file(tmp_metadata_item) 149 150 if not os.path.exists(FLATBUFFER_JAVA_TARGET): 151 os.makedirs(FLATBUFFER_JAVA_TARGET) 152 153 shutil.copy(tmp_metadata_list, FLATBUFFER_JAVA_TARGET + FLATBUFFER_METADATA_LIST_JAVA) 154 shutil.copy(tmp_metadata_item, FLATBUFFER_JAVA_TARGET + FLATBUFFER_METADATA_ITEM_JAVA) 155 156 def create_test_data(unicode_path): 157 """Read all the emojis in the unicode files and update the test file""" 158 lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_ZWJ_FILE)) 159 lines += read_emoji_lines(os.path.join(unicode_path, EMOJI_SEQ_FILE)) 160 161 lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), optional=True) 162 lines += read_emoji_lines(os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), optional=True) 163 164 # standardized variants contains a huge list of sequences, only read the ones that are emojis 165 # and also the ones with FE0F (emoji style) 166 standardized_variants_lines = read_emoji_lines( 167 os.path.join(unicode_path, EMOJI_VARIATION_SEQ_FILE)) 168 for line in standardized_variants_lines: 169 if STD_VARIANTS_EMOJI_STYLE in line: 170 lines.append(line) 171 172 emojis_set = set() 173 for line in lines: 174 codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')] 175 emojis_set.add(codepoint_to_string(codepoints).upper()) 176 177 emoji_data_lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_DATA_FILE)) 178 for line in emoji_data_lines: 179 codepoints_range, emoji_property = codepoints_and_emoji_prop(line) 180 is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR 181 if is_emoji_style: 182 codepoints = [to_hex_str(x) for x in 183 codepoints_for_emojirange(codepoints_range)] 184 emojis_set.update(codepoints) 185 186 emoji_style_exceptions = get_emoji_style_exceptions(unicode_path) 187 # finally add the android default emoji exceptions 188 emojis_set.update([to_hex_str(x) for x in emoji_style_exceptions]) 189 190 emojis_list = list(emojis_set) 191 emojis_list.sort() 192 with open(TEST_DATA_PATH, "w") as test_file: 193 for line in emojis_list: 194 test_file.write("%s\n" % line) 195 196 class _EmojiData(object): 197 """Holds the information about a single emoji.""" 198 199 def __init__(self, codepoints, is_emoji_style): 200 self.codepoints = codepoints 201 self.emoji_style = is_emoji_style 202 self.emoji_id = 0 203 self.width = 0 204 self.height = 0 205 self.sdk_added = SDK_VERSION 206 self.compat_added = METADATA_VERSION 207 208 def update_metrics(self, metrics): 209 """Updates width/height instance variables with the values given in metrics dictionary. 210 :param metrics: a dictionary object that has width and height values. 211 """ 212 self.width = metrics.width 213 self.height = metrics.height 214 215 def __repr__(self): 216 return '<EmojiData {0} - {1}>'.format(self.emoji_style, 217 codepoint_to_string(self.codepoints)) 218 219 def create_json_element(self): 220 """Creates the json representation of EmojiData.""" 221 json_element = {} 222 json_element['id'] = self.emoji_id 223 json_element['emojiStyle'] = self.emoji_style 224 json_element['sdkAdded'] = self.sdk_added 225 json_element['compatAdded'] = self.compat_added 226 json_element['width'] = self.width 227 json_element['height'] = self.height 228 json_element['codepoints'] = self.codepoints 229 return json_element 230 231 def create_txt_row(self): 232 """Creates array of values for CSV of EmojiData.""" 233 row = [to_hex_str(self.emoji_id), self.sdk_added, self.compat_added] 234 row += [to_hex_str(x) for x in self.codepoints] 235 return row 236 237 def update(self, emoji_id, sdk_added, compat_added): 238 """Updates current EmojiData with the values in a json element""" 239 self.emoji_id = emoji_id 240 self.sdk_added = sdk_added 241 self.compat_added = compat_added 242 243 244 def read_emoji_lines(file_path, optional=False): 245 """Read all lines in an unicode emoji file into a list of uppercase strings. Ignore the empty 246 lines and comments 247 :param file_path: unicode emoji file path 248 :param optional: if True no exception is raised when the file cannot be read 249 :return: list of uppercase strings 250 """ 251 result = [] 252 try: 253 with open(file_path) as file_stream: 254 for line in file_stream: 255 line = line.strip() 256 if line and not line.startswith('#'): 257 result.append(line.upper()) 258 except IOError: 259 if optional: 260 pass 261 else: 262 raise 263 264 return result 265 266 def get_emoji_style_exceptions(unicode_path): 267 """Read EMOJI_STYLE_OVERRIDE_FILE and return the codepoints as integers""" 268 lines = read_emoji_lines(os.path.join(unicode_path, EMOJI_STYLE_OVERRIDE_FILE)) 269 exceptions = [] 270 for line in lines: 271 codepoint = hex_str_to_int(codepoints_and_emoji_prop(line)[0]) 272 exceptions.append(codepoint) 273 return exceptions 274 275 def codepoints_for_emojirange(codepoints_range): 276 """ Return codepoints given in emoji files. Expand the codepoints that are given as a range 277 such as XYZ ... UVT 278 """ 279 codepoints = [] 280 if '..' in codepoints_range: 281 range_start, range_end = codepoints_range.split('..') 282 codepoints_range = range(hex_str_to_int(range_start), 283 hex_str_to_int(range_end) + 1) 284 codepoints.extend(codepoints_range) 285 else: 286 codepoints.append(hex_str_to_int(codepoints_range)) 287 return codepoints 288 289 def codepoints_and_emoji_prop(line): 290 """For a given emoji file line, return codepoints and emoji property in the line. 291 1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base] # [...]""" 292 line = line.strip() 293 if '#' in line: 294 line = line[:line.index('#')] 295 else: 296 raise ValueError("Line is expected to have # in it") 297 line = line.split(';') 298 codepoints_range = line[0].strip() 299 emoji_property = line[1].strip() 300 301 return codepoints_range, emoji_property 302 303 def read_emoji_intervals(emoji_data_map, file_path, emoji_style_exceptions): 304 """Read unicode lines of unicode emoji file in which each line describes a set of codepoint 305 intervals. Expands the interval on a line and inserts related EmojiDatas into emoji_data_map. 306 A line format that is expected is as follows: 307 1F93C..1F93E ; [Emoji|Emoji_Presentation|Emoji_Modifier_Base] # [...]""" 308 lines = read_emoji_lines(file_path) 309 310 for line in lines: 311 codepoints_range, emoji_property = codepoints_and_emoji_prop(line) 312 is_emoji_style = emoji_property == EMOJI_PRESENTATION_STR 313 codepoints = codepoints_for_emojirange(codepoints_range) 314 315 for codepoint in codepoints: 316 key = codepoint_to_string([codepoint]) 317 codepoint_is_emoji_style = is_emoji_style or codepoint in emoji_style_exceptions 318 if key in emoji_data_map: 319 # since there are multiple definitions of emojis, only update when emoji style is 320 # True 321 if codepoint_is_emoji_style: 322 emoji_data_map[key].emoji_style = True 323 else: 324 emoji_data = _EmojiData([codepoint], codepoint_is_emoji_style) 325 emoji_data_map[key] = emoji_data 326 327 328 def read_emoji_sequences(emoji_data_map, file_path, optional=False): 329 """Reads the content of the file which contains emoji sequences. Creates EmojiData for each 330 line and puts into emoji_data_map.""" 331 lines = read_emoji_lines(file_path, optional) 332 # 1F1E6 1F1E8 ; Name ; [...] 333 for line in lines: 334 codepoints = [hex_str_to_int(x) for x in line.split(';')[0].strip().split(' ')] 335 codepoints = [x for x in codepoints if x != EMOJI_STYLE_VS] 336 key = codepoint_to_string(codepoints) 337 if not key in emoji_data_map: 338 emoji_data = _EmojiData(codepoints, False) 339 emoji_data_map[key] = emoji_data 340 341 342 def load_emoji_data_map(unicode_path): 343 """Reads the emoji data files, constructs a map of space separated codepoints to EmojiData. 344 :return: map of space separated codepoints to EmojiData 345 """ 346 emoji_data_map = {} 347 emoji_style_exceptions = get_emoji_style_exceptions(unicode_path) 348 read_emoji_intervals(emoji_data_map, os.path.join(unicode_path, EMOJI_DATA_FILE), 349 emoji_style_exceptions) 350 read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_ZWJ_FILE)) 351 read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, EMOJI_SEQ_FILE)) 352 353 # Add the optional ANDROID_EMOJI_ZWJ_SEQ_FILE if it exists. 354 read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJI_ZWJ_SEQ_FILE), 355 optional=True) 356 # Add the optional ANDROID_EMOJIS_SEQ_FILE if it exists. 357 read_emoji_sequences(emoji_data_map, os.path.join(unicode_path, ANDROID_EMOJIS_SEQ_FILE), 358 optional=True) 359 360 return emoji_data_map 361 362 363 def load_previous_metadata(emoji_data_map): 364 """Updates emoji data elements in emoji_data_map using the id, sdk_added and compat_added fields 365 in emoji_metadata.txt. Returns the smallest available emoji id to use. i.e. if the largest 366 emoji id emoji_metadata.txt is 1, function would return 2. If emoji_metadata.txt does not 367 exist, or contains no emojis defined returns DEFAULT_EMOJI_ID""" 368 current_emoji_id = DEFAULT_EMOJI_ID 369 if os.path.isfile(INPUT_META_FILE): 370 with open(INPUT_META_FILE) as csvfile: 371 reader = csv.reader(csvfile, delimiter=' ') 372 for row in reader: 373 if row[0].startswith('#'): 374 continue 375 emoji_id = hex_str_to_int(row[0]) 376 sdk_added = int(row[1]) 377 compat_added = int(row[2]) 378 key = codepoint_to_string(hex_str_to_int(x) for x in row[3:]) 379 if key in emoji_data_map: 380 emoji_data = emoji_data_map[key] 381 emoji_data.update(emoji_id, sdk_added, compat_added) 382 if emoji_data.emoji_id >= current_emoji_id: 383 current_emoji_id = emoji_data.emoji_id + 1 384 385 return current_emoji_id 386 387 388 def update_ttlib_orig_sort(): 389 """Updates the ttLib tag sort with a closure that makes the meta table first.""" 390 orig_sort = ttLib.sortedTagList 391 392 def meta_first_table_sort(tag_list, table_order=None): 393 """Sorts the tables with the original ttLib sort, then makes the meta table first.""" 394 tag_list = orig_sort(tag_list, table_order) 395 tag_list.remove('meta') 396 tag_list.insert(0, 'meta') 397 return tag_list 398 399 ttLib.sortedTagList = meta_first_table_sort 400 401 402 def inject_meta_into_font(ttf, flatbuffer_bin_filename): 403 """inject metadata binary into font""" 404 if not 'meta' in ttf: 405 ttf['meta'] = ttLib.getTableClass('meta')() 406 meta = ttf['meta'] 407 with open(flatbuffer_bin_filename) as flatbuffer_bin_file: 408 meta.data[EMOJI_META_TAG_NAME] = flatbuffer_bin_file.read() 409 410 # sort meta tables for faster access 411 update_ttlib_orig_sort() 412 413 414 def validate_input_files(font_path, unicode_path): 415 """Validate the existence of font file and the unicode files""" 416 if not os.path.isfile(font_path): 417 raise ValueError("Font file does not exist: " + font_path) 418 419 if not os.path.isdir(unicode_path): 420 raise ValueError( 421 "Unicode directory does not exist or is not a directory " + unicode_path) 422 423 emoji_filenames = [os.path.join(unicode_path, EMOJI_DATA_FILE), 424 os.path.join(unicode_path, EMOJI_ZWJ_FILE), 425 os.path.join(unicode_path, EMOJI_SEQ_FILE)] 426 for emoji_filename in emoji_filenames: 427 if not os.path.isfile(emoji_filename): 428 raise ValueError("Unicode emoji data file does not exist: " + emoji_filename) 429 430 431 def add_file_to_sha(sha_algo, file_path): 432 with open(file_path, 'rb') as input_file: 433 for data in iter(lambda: input_file.read(8192), ''): 434 sha_algo.update(data) 435 436 def create_sha_from_source_files(font_paths): 437 """Creates a SHA from the given font files""" 438 sha_algo = hashlib.sha256() 439 for file_path in font_paths: 440 add_file_to_sha(sha_algo, file_path) 441 return sha_algo.hexdigest() 442 443 444 class EmojiFontCreator(object): 445 """Creates the EmojiCompat font""" 446 447 def __init__(self, font_path, unicode_path): 448 validate_input_files(font_path, unicode_path) 449 450 self.font_path = font_path 451 self.unicode_path = unicode_path 452 self.emoji_data_map = {} 453 self.remapped_codepoints = {} 454 self.glyph_to_image_metrics_map = {} 455 # set default emoji id to start of Supplemental Private Use Area-A 456 self.emoji_id = DEFAULT_EMOJI_ID 457 458 def update_emoji_data(self, codepoints, glyph_name): 459 """Updates the existing EmojiData identified with codepoints. The fields that are set are: 460 - emoji_id (if it does not exist) 461 - image width/height""" 462 key = codepoint_to_string(codepoints) 463 if key in self.emoji_data_map: 464 # add emoji to final data 465 emoji_data = self.emoji_data_map[key] 466 emoji_data.update_metrics(self.glyph_to_image_metrics_map[glyph_name]) 467 if emoji_data.emoji_id == 0: 468 emoji_data.emoji_id = self.emoji_id 469 self.emoji_id = self.emoji_id + 1 470 self.remapped_codepoints[emoji_data.emoji_id] = glyph_name 471 472 def read_cbdt(self, ttf): 473 """Read image size data from CBDT.""" 474 cbdt = ttf['CBDT'] 475 for strike_data in cbdt.strikeData: 476 for key, data in strike_data.iteritems(): 477 data.decompile() 478 self.glyph_to_image_metrics_map[key] = data.metrics 479 480 def read_cmap12(self, ttf, glyph_to_codepoint_map): 481 """Reads single code point emojis that are in cmap12, updates glyph_to_codepoint_map and 482 finally clears all elements in CMAP 12""" 483 cmap = ttf['cmap'] 484 for table in cmap.tables: 485 if table.format == 12 and table.platformID == 3 and table.platEncID == 10: 486 for codepoint, glyph_name in table.cmap.iteritems(): 487 glyph_to_codepoint_map[glyph_name] = codepoint 488 self.update_emoji_data([codepoint], glyph_name) 489 return table 490 raise ValueError("Font doesn't contain cmap with format:12, platformID:3 and platEncID:10") 491 492 def read_gsub(self, ttf, glyph_to_codepoint_map): 493 """Reads the emoji sequences defined in GSUB and clear all elements under GSUB""" 494 gsub = ttf['GSUB'] 495 ligature_subtables = [] 496 context_subtables = [] 497 # this code is font dependent, implementing all gsub rules is out of scope of EmojiCompat 498 # and would be expensive with little value 499 for lookup in gsub.table.LookupList.Lookup: 500 for subtable in lookup.SubTable: 501 if subtable.LookupType == 5: 502 context_subtables.append(subtable) 503 elif subtable.LookupType == 4: 504 ligature_subtables.append(subtable) 505 506 for subtable in context_subtables: 507 self.add_gsub_context_subtable(subtable, gsub.table.LookupList, glyph_to_codepoint_map) 508 509 for subtable in ligature_subtables: 510 self.add_gsub_ligature_subtable(subtable, glyph_to_codepoint_map) 511 512 def add_gsub_context_subtable(self, subtable, lookup_list, glyph_to_codepoint_map): 513 """Add substitutions defined as OpenType Context Substitution""" 514 for sub_class_set in subtable.SubClassSet: 515 if sub_class_set: 516 for sub_class_rule in sub_class_set.SubClassRule: 517 # prepare holder for substitution list. each rule will have a list that is added 518 # to the subs_list. 519 subs_list = len(sub_class_rule.SubstLookupRecord) * [None] 520 for record in sub_class_rule.SubstLookupRecord: 521 subs_list[record.SequenceIndex] = self.get_substitutions(lookup_list, 522 record.LookupListIndex) 523 # create combinations or all lists. the combinations will be filtered by 524 # emoji_data_map. the first element that contain as a valid glyph will be used 525 # as the final glyph 526 combinations = list(itertools.product(*subs_list)) 527 for seq in combinations: 528 glyph_names = [x["input"] for x in seq] 529 codepoints = [glyph_to_codepoint_map[x] for x in glyph_names] 530 outputs = [x["output"] for x in seq if x["output"]] 531 nonempty_outputs = filter(lambda x: x.strip() , outputs) 532 if len(nonempty_outputs) == 0: 533 print("Warning: no output glyph is set for " + str(glyph_names)) 534 continue 535 elif len(nonempty_outputs) > 1: 536 print( 537 "Warning: multiple glyph is set for " 538 + str(glyph_names) + ", will use the first one") 539 540 glyph = nonempty_outputs[0] 541 self.update_emoji_data(codepoints, glyph) 542 543 def get_substitutions(self, lookup_list, index): 544 result = [] 545 for x in lookup_list.Lookup[index].SubTable: 546 for input, output in x.mapping.iteritems(): 547 result.append({"input": input, "output": output}) 548 return result 549 550 def add_gsub_ligature_subtable(self, subtable, glyph_to_codepoint_map): 551 for name, ligatures in subtable.ligatures.iteritems(): 552 for ligature in ligatures: 553 glyph_names = [name] + ligature.Component 554 codepoints = [glyph_to_codepoint_map[x] for x in glyph_names] 555 self.update_emoji_data(codepoints, ligature.LigGlyph) 556 557 def write_metadata_json(self, output_json_file_path): 558 """Writes the emojis into a json file""" 559 output_json = {} 560 output_json['version'] = METADATA_VERSION 561 output_json['sourceSha'] = create_sha_from_source_files( 562 [self.font_path, OUTPUT_META_FILE, FLATBUFFER_SCHEMA]) 563 output_json['list'] = [] 564 565 emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id) 566 567 total_emoji_count = 0 568 for emoji_data in emoji_data_list: 569 element = emoji_data.create_json_element() 570 output_json['list'].append(element) 571 total_emoji_count = total_emoji_count + 1 572 573 # write the new json file to be processed by FlatBuffers 574 with open(output_json_file_path, 'w') as json_file: 575 print(json.dumps(output_json, indent=4, sort_keys=True, separators=(',', ':')), 576 file=json_file) 577 578 return total_emoji_count 579 580 def write_metadata_csv(self): 581 """Writes emoji metadata into space separated file""" 582 with open(OUTPUT_META_FILE, 'w') as csvfile: 583 csvwriter = csv.writer(csvfile, delimiter=' ') 584 emoji_data_list = sorted(self.emoji_data_map.values(), key=lambda x: x.emoji_id) 585 csvwriter.writerow(['#id', 'sdkAdded', 'compatAdded', 'codepoints']) 586 for emoji_data in emoji_data_list: 587 csvwriter.writerow(emoji_data.create_txt_row()) 588 589 def create_font(self): 590 """Creates the EmojiCompat font. 591 :param font_path: path to Android NotoColorEmoji font 592 :param unicode_path: path to directory that contains unicode files 593 """ 594 595 tmp_dir = tempfile.mkdtemp() 596 597 # create emoji codepoints to EmojiData map 598 self.emoji_data_map = load_emoji_data_map(self.unicode_path) 599 600 # read previous metadata file to update id, sdkAdded and compatAdded. emoji id that is 601 # returned is either default or 1 greater than the largest id in previous data 602 self.emoji_id = load_previous_metadata(self.emoji_data_map) 603 604 # recalcTimestamp parameter will keep the modified field same as the original font. Changing 605 # the modified field in the font causes the font ttf file to change, which makes it harder 606 # to understand if something really changed in the font. 607 with contextlib.closing(ttLib.TTFont(self.font_path, recalcTimestamp=False)) as ttf: 608 # read image size data 609 self.read_cbdt(ttf) 610 611 # glyph name to codepoint map 612 glyph_to_codepoint_map = {} 613 614 # read single codepoint emojis under cmap12 and clear the table contents 615 cmap12_table = self.read_cmap12(ttf, glyph_to_codepoint_map) 616 617 # read emoji sequences gsub and clear the table contents 618 self.read_gsub(ttf, glyph_to_codepoint_map) 619 620 # add all new codepoint to glyph mappings 621 cmap12_table.cmap.update(self.remapped_codepoints) 622 623 # final metadata csv will be used to generate the sha, therefore write it before 624 # metadata json is written. 625 self.write_metadata_csv() 626 627 output_json_file = os.path.join(tmp_dir, OUTPUT_JSON_FILE_NAME) 628 flatbuffer_bin_file = os.path.join(tmp_dir, FLATBUFFER_BIN) 629 flatbuffer_java_dir = os.path.join(tmp_dir, FLATBUFFER_JAVA_PATH) 630 631 total_emoji_count = self.write_metadata_json(output_json_file) 632 633 # create the flatbuffers binary and java classes 634 sys_command = 'flatc -o {0} -b -j {1} {2}' 635 os.system(sys_command.format(tmp_dir, FLATBUFFER_SCHEMA, output_json_file)) 636 637 # inject metadata binary into font 638 inject_meta_into_font(ttf, flatbuffer_bin_file) 639 640 # update CBDT and CBLC versions since older android versions cannot read > 2.0 641 ttf['CBDT'].version = 2.0 642 ttf['CBLC'].version = 2.0 643 644 # save the new font 645 ttf.save(FONT_PATH) 646 647 update_flatbuffer_java_files(flatbuffer_java_dir) 648 649 create_test_data(self.unicode_path) 650 651 # clear the tmp output directory 652 shutil.rmtree(tmp_dir, ignore_errors=True) 653 654 print( 655 "{0} emojis are written to\n{1}".format(total_emoji_count, FONT_DIR)) 656 657 658 def print_usage(): 659 """Prints how to use the script.""" 660 print("Please specify a path to font and unicode files.\n" 661 "usage: createfont.py noto-color-emoji-path unicode-dir-path") 662 663 664 if __name__ == '__main__': 665 if len(sys.argv) < 3: 666 print_usage() 667 sys.exit(1) 668 EmojiFontCreator(sys.argv[1], sys.argv[2]).create_font() 669