Home | History | Annotate | Download | only in cjk
      1 #!/usr/bin/python
      2 # coding=UTF-8
      3 #
      4 # Copyright 2016 Google Inc. All rights reserved.
      5 #
      6 # Licensed under the Apache License, Version 2.0 (the "License");
      7 # you may not use this file except in compliance with the License.
      8 # You may obtain a copy of the License at
      9 #
     10 #     http://www.apache.org/licenses/LICENSE-2.0
     11 #
     12 # Unless required by applicable law or agreed to in writing, software
     13 # distributed under the License is distributed on an "AS IS" BASIS,
     14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 # See the License for the specific language governing permissions and
     16 # limitations under the License.
     17 
     18 """Create a curated subset of Noto CJK for Android."""
     19 
     20 import os
     21 
     22 from fontTools import ttLib
     23 from nototools import font_data
     24 from nototools import tool_utils
     25 from nototools import ttc_utils
     26 
     27 # Characters supported in Noto CJK fonts that UTR #51 recommends default to
     28 # emoji-style.
     29 EMOJI_IN_CJK = {
     30     0x26BD, #  SOCCER BALL
     31     0x26BE, #  BASEBALL
     32     0x1F18E, #  NEGATIVE SQUARED AB
     33     0x1F191, #  SQUARED CL
     34     0x1F192, #  SQUARED COOL
     35     0x1F193, #  SQUARED FREE
     36     0x1F194, #  SQUARED ID
     37     0x1F195, #  SQUARED NEW
     38     0x1F196, #  SQUARED NG
     39     0x1F197, #  SQUARED OK
     40     0x1F198, #  SQUARED SOS
     41     0x1F199, #  SQUARED UP WITH EXCLAMATION MARK
     42     0x1F19A, #  SQUARED VS
     43     0x1F201, #  SQUARED KATAKANA KOKO
     44     0x1F21A, #  SQUARED CJK UNIFIED IDEOGRAPH-7121
     45     0x1F22F, #  SQUARED CJK UNIFIED IDEOGRAPH-6307
     46     0x1F232, #  SQUARED CJK UNIFIED IDEOGRAPH-7981
     47     0x1F233, #  SQUARED CJK UNIFIED IDEOGRAPH-7A7A
     48     0x1F234, #  SQUARED CJK UNIFIED IDEOGRAPH-5408
     49     0x1F235, #  SQUARED CJK UNIFIED IDEOGRAPH-6E80
     50     0x1F236, #  SQUARED CJK UNIFIED IDEOGRAPH-6709
     51     0x1F238, #  SQUARED CJK UNIFIED IDEOGRAPH-7533
     52     0x1F239, #  SQUARED CJK UNIFIED IDEOGRAPH-5272
     53     0x1F23A, #  SQUARED CJK UNIFIED IDEOGRAPH-55B6
     54     0x1F250, #  CIRCLED IDEOGRAPH ADVANTAGE
     55     0x1F251, #  CIRCLED IDEOGRAPH ACCEPT
     56 }
     57 
     58 # Characters we have decided we are doing as emoji-style in Android,
     59 # despite UTR #51's recommendation
     60 ANDROID_EMOJI = {
     61     0x2600, #  BLACK SUN WITH RAYS
     62     0x2601, #  CLOUD
     63     0X260E, #  BLACK TELEPHONE
     64     0x261D, #  WHITE UP POINTING INDEX
     65     0x263A, #  WHITE SMILING FACE
     66     0x2660, #  BLACK SPADE SUIT
     67     0x2663, #  BLACK CLUB SUIT
     68     0x2665, #  BLACK HEART SUIT
     69     0x2666, #  BLACK DIAMOND SUIT
     70     0x270C, #  VICTORY HAND
     71     0x2744, #  SNOWFLAKE
     72     0x2764, #  HEAVY BLACK HEART
     73 }
     74 
     75 # We don't want support for ASCII control chars.
     76 CONTROL_CHARS = tool_utils.parse_int_ranges('0000-001F');
     77 
     78 EXCLUDED_CODEPOINTS = sorted(EMOJI_IN_CJK | ANDROID_EMOJI | CONTROL_CHARS)
     79 
     80 
     81 def remove_from_cmap(infile, outfile, exclude=frozenset()):
     82     """Removes a set of characters from a font file's cmap table."""
     83     font = ttLib.TTFont(infile)
     84     font_data.delete_from_cmap(font, exclude)
     85     font.save(outfile)
     86 
     87 
     88 TEMP_DIR = 'subsetted'
     89 
     90 def remove_codepoints_from_ttc(ttc_name):
     91     otf_names = ttc_utils.ttcfile_extract(ttc_name, TEMP_DIR)
     92 
     93     with tool_utils.temp_chdir(TEMP_DIR):
     94         for index, otf_name in enumerate(otf_names):
     95             print 'Subsetting %s...' % otf_name
     96             remove_from_cmap(otf_name, otf_name, exclude=EXCLUDED_CODEPOINTS)
     97         ttc_utils.ttcfile_build(ttc_name, otf_names)
     98         for f in otf_names:
     99             os.remove(f)
    100 
    101 
    102 remove_codepoints_from_ttc('NotoSansCJK-Regular.ttc')
    103 remove_codepoints_from_ttc('NotoSerifCJK-Regular.ttc')
    104