Home | History | Annotate | Download | only in other
      1 #!/usr/bin/python
      2 # coding=UTF-8
      3 #
      4 # Copyright 2014 Google Inc. All rights reserved.
      5 #
      6 # Licensed under the Apache License, Version 2.0 (the "License");
      7 # you may not use this file except in compliance with the License.
      8 # You may obtain a copy of the License at
      9 #
     10 #     http://www.apache.org/licenses/LICENSE-2.0
     11 #
     12 # Unless required by applicable law or agreed to in writing, software
     13 # distributed under the License is distributed on an "AS IS" BASIS,
     14 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     15 # See the License for the specific language governing permissions and
     16 # limitations under the License.
     17 
     18 """Create a curated subset of NotoSansSymbols for Android."""
     19 
     20 __author__ = 'roozbeh (at] google.com (Roozbeh Pournader)'
     21 
     22 import sys
     23 
     24 from nototools import subset
     25 from nototools import unicode_data
     26 
     27 # Unicode blocks that we want to include in the font
     28 BLOCKS_TO_INCLUDE = """
     29 20D0..20FF; Combining Diacritical Marks for Symbols
     30 2100..214F; Letterlike Symbols
     31 2190..21FF; Arrows
     32 2200..22FF; Mathematical Operators
     33 2300..23FF; Miscellaneous Technical
     34 2400..243F; Control Pictures
     35 2440..245F; Optical Character Recognition
     36 2460..24FF; Enclosed Alphanumerics
     37 2500..257F; Box Drawing
     38 2580..259F; Block Elements
     39 25A0..25FF; Geometric Shapes
     40 2600..26FF; Miscellaneous Symbols
     41 2700..27BF; Dingbats
     42 27C0..27EF; Miscellaneous Mathematical Symbols-A
     43 27F0..27FF; Supplemental Arrows-A
     44 2800..28FF; Braille Patterns
     45 2A00..2AFF; Supplemental Mathematical Operators
     46 """
     47 
     48 # One-off characters to be included, needed for backward compatibility and
     49 # supporting various character sets, including ARIB sets and black and white
     50 # emoji
     51 ONE_OFF_ADDITIONS = {
     52     0x27D0, #  WHITE DIAMOND WITH CENTRED DOT
     53     0x2934, #  ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS
     54     0x2935, #  ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS
     55     0x2985, #  LEFT WHITE PARENTHESIS
     56     0x2986, #  RIGHT WHITE PARENTHESIS
     57     0x2B05, #  LEFTWARDS BLACK ARROW
     58     0x2B06, #  UPWARDS BLACK ARROW
     59     0x2B07, #  DOWNWARDS BLACK ARROW
     60     0x2B24, #  BLACK LARGE CIRCLE
     61     0x2B2E, #  BLACK VERTICAL ELLIPSE
     62     0x2B2F, #  WHITE VERTICAL ELLIPSE
     63     0x2B56, #  HEAVY OVAL WITH OVAL INSIDE
     64     0x2B57, #  HEAVY CIRCLE WITH CIRCLE INSIDE
     65     0x2B58, #  HEAVY CIRCLE
     66     0x2B59, #  HEAVY CIRCLED SALTIRE
     67 }
     68 
     69 # letter-based characters, provided by Roboto
     70 LETTERLIKE_CHARS_IN_ROBOTO = {
     71     0x2100, #  ACCOUNT OF
     72     0x2101, #  ADDRESSED TO THE SUBJECT
     73     0x2103, #  DEGREE CELSIUS
     74     0x2105, #  CARE OF
     75     0x2106, #  CADA UNA
     76     0x2109, #  DEGREE FAHRENHEIT
     77     0x2113, #  SCRIPT SMALL L
     78     0x2116, #  NUMERO SIGN
     79     0x2117, #  SOUND RECORDING COPYRIGHT
     80     0x211E, #  PRESCRIPTION TAKE
     81     0x211F, #  RESPONSE
     82     0x2120, #  SERVICE MARK
     83     0x2121, #  TELEPHONE SIGN
     84     0x2122, #  TRADE MARK SIGN
     85     0x2123, #  VERSICLE
     86     0x2125, #  OUNCE SIGN
     87     0x2126, #  OHM SIGN
     88     0x212A, #  KELVIN SIGN
     89     0x212B, #  ANGSTROM SIGN
     90     0x212E, #  ESTIMATED SYMBOL
     91     0x2132, #  TURNED CAPITAL F
     92     0x213B, #  FACSIMILE SIGN
     93     0x214D, #  AKTIESELSKAB
     94     0x214F, #  SYMBOL FOR SAMARITAN SOURCE
     95 }
     96 
     97 # default emoji characters in the BMP, based on
     98 # http://www.unicode.org/draft/Public/emoji/1.0/emoji-data.txt
     99 # We exclude these, so we don't block color emoji.
    100 BMP_DEFAULT_EMOJI = {
    101     0x231A, #  WATCH
    102     0x231B, #  HOURGLASS
    103     0x23E9, #  BLACK RIGHT-POINTING DOUBLE TRIANGLE
    104     0x23EA, #  BLACK LEFT-POINTING DOUBLE TRIANGLE
    105     0x23EB, #  BLACK UP-POINTING DOUBLE TRIANGLE
    106     0x23EC, #  BLACK DOWN-POINTING DOUBLE TRIANGLE
    107     0x23F0, #  ALARM CLOCK
    108     0x23F3, #  HOURGLASS WITH FLOWING SAND
    109     0x25FD, #  WHITE MEDIUM SMALL SQUARE
    110     0x25FE, #  BLACK MEDIUM SMALL SQUARE
    111     0x2614, #  UMBRELLA WITH RAIN DROPS
    112     0x2615, #  HOT BEVERAGE
    113     0x2648, #  ARIES
    114     0x2649, #  TAURUS
    115     0x264A, #  GEMINI
    116     0x264B, #  CANCER
    117     0x264C, #  LEO
    118     0x264D, #  VIRGO
    119     0x264E, #  LIBRA
    120     0x264F, #  SCORPIUS
    121     0x2650, #  SAGITTARIUS
    122     0x2651, #  CAPRICORN
    123     0x2652, #  AQUARIUS
    124     0x2653, #  PISCES
    125     0x267F, #  WHEELCHAIR SYMBOL
    126     0x2693, #  ANCHOR
    127     0x26A1, #  HIGH VOLTAGE SIGN
    128     0x26AA, #  MEDIUM WHITE CIRCLE
    129     0x26AB, #  MEDIUM BLACK CIRCLE
    130     0x26BD, #  SOCCER BALL
    131     0x26BE, #  BASEBALL
    132     0x26C4, #  SNOWMAN WITHOUT SNOW
    133     0x26C5, #  SUN BEHIND CLOUD
    134     0x26CE, #  OPHIUCHUS
    135     0x26D4, #  NO ENTRY
    136     0x26EA, #  CHURCH
    137     0x26F2, #  FOUNTAIN
    138     0x26F3, #  FLAG IN HOLE
    139     0x26F5, #  SAILBOAT
    140     0x26FA, #  TENT
    141     0x26FD, #  FUEL PUMP
    142     0x2705, #  WHITE HEAVY CHECK MARK
    143     0x270A, #  RAISED FIST
    144     0x270B, #  RAISED HAND
    145     0x2728, #  SPARKLES
    146     0x274C, #  CROSS MARK
    147     0x274E, #  NEGATIVE SQUARED CROSS MARK
    148     0x2753, #  BLACK QUESTION MARK ORNAMENT
    149     0x2754, #  WHITE QUESTION MARK ORNAMENT
    150     0x2755, #  WHITE EXCLAMATION MARK ORNAMENT
    151     0x2757, #  HEAVY EXCLAMATION MARK SYMBOL
    152     0x2795, #  HEAVY PLUS SIGN
    153     0x2796, #  HEAVY MINUS SIGN
    154     0x2797, #  HEAVY DIVISION SIGN
    155     0x27B0, #  CURLY LOOP
    156     0x27BF, #  DOUBLE CURLY LOOP
    157     0x2B1B, #  BLACK LARGE SQUARE
    158     0x2B1C, #  WHITE LARGE SQUARE
    159     0x2B50, #  WHITE MEDIUM STAR
    160     0x2B55, #  HEAVY LARGE CIRCLE
    161 }
    162 
    163 # Characters we have decided we are doing as emoji-style in Android,
    164 # despite UTR#51's recommendation
    165 ANDROID_EMOJI = {
    166     0x2600, #  BLACK SUN WITH RAYS
    167     0x2601, #  CLOUD
    168     0X260E, #  BLACK TELEPHONE
    169     0x261D, #  WHITE UP POINTING INDEX
    170     0x263A, #  WHITE SMILING FACE
    171     0x2660, #  BLACK SPADE SUIT
    172     0x2663, #  BLACK CLUB SUIT
    173     0x2665, #  BLACK HEART SUIT
    174     0x2666, #  BLACK DIAMOND SUIT
    175     0x270C, #  VICTORY HAND
    176     0x2744, #  SNOWFLAKE
    177     0x2764, #  HEAVY BLACK HEART
    178 }
    179 
    180 def main(argv):
    181     """Subset the Noto Symbols font.
    182 
    183     The first argument is the source file name, and the second argument is
    184     the target file name.
    185     """
    186 
    187     target_coverage = set()
    188     # Add all characters in BLOCKS_TO_INCLUDE
    189     for first, last, _ in unicode_data._parse_code_ranges(BLOCKS_TO_INCLUDE):
    190         target_coverage.update(range(first, last+1))
    191 
    192     # Add one-off characters
    193     target_coverage |= ONE_OFF_ADDITIONS
    194     # Remove characters preferably coming from Roboto
    195     target_coverage -= LETTERLIKE_CHARS_IN_ROBOTO
    196     # Remove characters that are supposed to default to emoji
    197     target_coverage -= BMP_DEFAULT_EMOJI | ANDROID_EMOJI
    198 
    199     # Remove dentistry symbols, as their main use appears to be for CJK:
    200     # http://www.unicode.org/L2/L2000/00098-n2195.pdf
    201     target_coverage -= set(range(0x23BE, 0x23CC+1))
    202 
    203     # Remove COMBINING ENCLOSING KEYCAP. It's needed for Android's color emoji
    204     # mechanism to work properly
    205     target_coverage.remove(0x20E3)
    206 
    207     source_file_name = argv[1]
    208     target_file_name = argv[2]
    209     subset.subset_font(
    210         source_file_name,
    211         target_file_name,
    212         include=target_coverage)
    213 
    214 
    215 if __name__ == '__main__':
    216     main(sys.argv)
    217