Home | History | Annotate | Download | only in scripts
      1 #!/usr/bin/env python3
      2 
      3 """
      4 For each argument on the command line, look for it in the set of all Unicode
      5 names.  Arguments are treated as case-insensitive regular expressions, e.g.:
      6 
      7     % find-uname 'small letter a$' 'horizontal line'
      8     *** small letter a$ matches ***
      9     LATIN SMALL LETTER A (97)
     10     COMBINING LATIN SMALL LETTER A (867)
     11     CYRILLIC SMALL LETTER A (1072)
     12     PARENTHESIZED LATIN SMALL LETTER A (9372)
     13     CIRCLED LATIN SMALL LETTER A (9424)
     14     FULLWIDTH LATIN SMALL LETTER A (65345)
     15     *** horizontal line matches ***
     16     HORIZONTAL LINE EXTENSION (9135)
     17 """
     18 
     19 import unicodedata
     20 import sys
     21 import re
     22 
     23 def main(args):
     24     unicode_names = []
     25     for ix in range(sys.maxunicode+1):
     26         try:
     27             unicode_names.append((ix, unicodedata.name(chr(ix))))
     28         except ValueError: # no name for the character
     29             pass
     30     for arg in args:
     31         pat = re.compile(arg, re.I)
     32         matches = [(y,x) for (x,y) in unicode_names
     33                    if pat.search(y) is not None]
     34         if matches:
     35             print("***", arg, "matches", "***")
     36             for match in matches:
     37                 print("%s (%d)" % match)
     38 
     39 if __name__ == "__main__":
     40     main(sys.argv[1:])
     41