Home | History | Annotate | Download | only in shaping
      1 #!/usr/bin/env python
      2 
      3 from __future__ import print_function, division, absolute_import
      4 
      5 import sys, os, re, difflib, unicodedata, errno, cgi
      6 from itertools import *
      7 try:
      8 	import unicodedata2 as unicodedata
      9 except Exception:
     10 	pass
     11 
     12 diff_symbols = "-+=*&^%$#@!~/"
     13 diff_colors = ['red', 'green', 'blue']
     14 
     15 def codepoints(s):
     16 	return (ord (u) for u in s)
     17 
     18 try:
     19 	unichr = unichr
     20 
     21 	if sys.maxunicode < 0x10FFFF:
     22 		# workarounds for Python 2 "narrow" builds with UCS2-only support.
     23 
     24 		_narrow_unichr = unichr
     25 
     26 		def unichr(i):
     27 			"""
     28 			Return the unicode character whose Unicode code is the integer 'i'.
     29 			The valid range is 0 to 0x10FFFF inclusive.
     30 
     31 			>>> _narrow_unichr(0xFFFF + 1)
     32 			Traceback (most recent call last):
     33 			  File "<stdin>", line 1, in ?
     34 			ValueError: unichr() arg not in range(0x10000) (narrow Python build)
     35 			>>> unichr(0xFFFF + 1) == u'\U00010000'
     36 			True
     37 			>>> unichr(1114111) == u'\U0010FFFF'
     38 			True
     39 			>>> unichr(0x10FFFF + 1)
     40 			Traceback (most recent call last):
     41 			  File "<stdin>", line 1, in ?
     42 			ValueError: unichr() arg not in range(0x110000)
     43 			"""
     44 			try:
     45 				return _narrow_unichr(i)
     46 			except ValueError:
     47 				try:
     48 					padded_hex_str = hex(i)[2:].zfill(8)
     49 					escape_str = "\\U" + padded_hex_str
     50 					return escape_str.decode("unicode-escape")
     51 				except UnicodeDecodeError:
     52 					raise ValueError('unichr() arg not in range(0x110000)')
     53 
     54 		def codepoints(s):
     55 			high_surrogate = None
     56 			for u in s:
     57 				cp = ord (u)
     58 				if 0xDC00 <= cp <= 0xDFFF:
     59 					if high_surrogate:
     60 						yield 0x10000 + (high_surrogate - 0xD800) * 0x400 + (cp - 0xDC00)
     61 						high_surrogate = None
     62 					else:
     63 						yield 0xFFFD
     64 				else:
     65 					if high_surrogate:
     66 						yield 0xFFFD
     67 						high_surrogate = None
     68 					if 0xD800 <= cp <= 0xDBFF:
     69 						high_surrogate = cp
     70 					else:
     71 						yield cp
     72 						high_surrogate = None
     73 			if high_surrogate:
     74 				yield 0xFFFD
     75 
     76 except NameError:
     77 	unichr = chr
     78 
     79 try:
     80 	unicode = unicode
     81 except NameError:
     82 	unicode = str
     83 
     84 def tounicode(s, encoding='ascii', errors='strict'):
     85 	if not isinstance(s, unicode):
     86 		return s.decode(encoding, errors)
     87 	else:
     88 		return s
     89 
     90 class ColorFormatter:
     91 
     92 	class Null:
     93 		@staticmethod
     94 		def start_color (c): return ''
     95 		@staticmethod
     96 		def end_color (): return ''
     97 		@staticmethod
     98 		def escape (s): return s
     99 		@staticmethod
    100 		def newline (): return '\n'
    101 
    102 	class ANSI:
    103 		@staticmethod
    104 		def start_color (c):
    105 			return {
    106 				'red': '\033[41;37;1m',
    107 				'green': '\033[42;37;1m',
    108 				'blue': '\033[44;37;1m',
    109 			}[c]
    110 		@staticmethod
    111 		def end_color ():
    112 			return '\033[m'
    113 		@staticmethod
    114 		def escape (s): return s
    115 		@staticmethod
    116 		def newline (): return '\n'
    117 
    118 	class HTML:
    119 		@staticmethod
    120 		def start_color (c):
    121 			return '<span style="background:%s">' % c
    122 		@staticmethod
    123 		def end_color ():
    124 			return '</span>'
    125 		@staticmethod
    126 		def escape (s): return cgi.escape (s)
    127 		@staticmethod
    128 		def newline (): return '<br/>\n'
    129 
    130 	@staticmethod
    131 	def Auto (argv = [], out = sys.stdout):
    132 		format = ColorFormatter.ANSI
    133 		if "--format" in argv:
    134 			argv.remove ("--format")
    135 			format = ColorFormatter.ANSI
    136 		if "--format=ansi" in argv:
    137 			argv.remove ("--format=ansi")
    138 			format = ColorFormatter.ANSI
    139 		if "--format=html" in argv:
    140 			argv.remove ("--format=html")
    141 			format = ColorFormatter.HTML
    142 		if "--no-format" in argv:
    143 			argv.remove ("--no-format")
    144 			format = ColorFormatter.Null
    145 		return format
    146 
    147 
    148 class DiffColorizer:
    149 
    150 	diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
    151 
    152 	def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
    153 		self.formatter = formatter
    154 		self.colors = colors
    155 		self.symbols = symbols
    156 
    157 	def colorize_lines (self, lines):
    158 		lines = (l if l else '' for l in lines)
    159 		ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
    160 		oo = ["",""]
    161 		st = [False, False]
    162 		for l in difflib.Differ().compare (*ss):
    163 			if l[0] == '?':
    164 				continue
    165 			if l[0] == ' ':
    166 				for i in range(2):
    167 					if st[i]:
    168 						oo[i] += self.formatter.end_color ()
    169 						st[i] = False
    170 				oo = [o + self.formatter.escape (l[2:]) for o in oo]
    171 				continue
    172 			if l[0] in self.symbols:
    173 				i = self.symbols.index (l[0])
    174 				if not st[i]:
    175 					oo[i] += self.formatter.start_color (self.colors[i])
    176 					st[i] = True
    177 				oo[i] += self.formatter.escape (l[2:])
    178 				continue
    179 		for i in range(2):
    180 			if st[i]:
    181 				oo[i] += self.formatter.end_color ()
    182 				st[i] = False
    183 		oo = [o.replace ('\n', '') for o in oo]
    184 		return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
    185 
    186 	def colorize_diff (self, f):
    187 		lines = [None, None]
    188 		for l in f:
    189 			if l[0] not in self.symbols:
    190 				yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
    191 				continue
    192 			i = self.symbols.index (l[0])
    193 			if lines[i]:
    194 				# Flush
    195 				for line in self.colorize_lines (lines):
    196 					yield line
    197 				lines = [None, None]
    198 			lines[i] = l[1:]
    199 			if (all (lines)):
    200 				# Flush
    201 				for line in self.colorize_lines (lines):
    202 					yield line
    203 				lines = [None, None]
    204 		if (any (lines)):
    205 			# Flush
    206 			for line in self.colorize_lines (lines):
    207 				yield line
    208 
    209 
    210 class ZipDiffer:
    211 
    212 	@staticmethod
    213 	def diff_files (files, symbols=diff_symbols):
    214 		files = tuple (files) # in case it's a generator, copy it
    215 		try:
    216 			for lines in izip_longest (*files):
    217 				if all (lines[0] == line for line in lines[1:]):
    218 					sys.stdout.writelines ([" ", lines[0]])
    219 					continue
    220 
    221 				for i, l in enumerate (lines):
    222 					if l:
    223 						sys.stdout.writelines ([symbols[i], l])
    224 		except IOError as e:
    225 			if e.errno != errno.EPIPE:
    226 				print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
    227 				sys.exit (1)
    228 
    229 
    230 class DiffFilters:
    231 
    232 	@staticmethod
    233 	def filter_failures (f):
    234 		for key, lines in DiffHelpers.separate_test_cases (f):
    235 			lines = list (lines)
    236 			if not DiffHelpers.test_passed (lines):
    237 				for l in lines: yield l
    238 
    239 class Stat:
    240 
    241 	def __init__ (self):
    242 		self.count = 0
    243 		self.freq = 0
    244 
    245 	def add (self, test):
    246 		self.count += 1
    247 		self.freq += test.freq
    248 
    249 class Stats:
    250 
    251 	def __init__ (self):
    252 		self.passed = Stat ()
    253 		self.failed = Stat ()
    254 		self.total  = Stat ()
    255 
    256 	def add (self, test):
    257 		self.total.add (test)
    258 		if test.passed:
    259 			self.passed.add (test)
    260 		else:
    261 			self.failed.add (test)
    262 
    263 	def mean (self):
    264 		return float (self.passed.count) / self.total.count
    265 
    266 	def variance (self):
    267 		return (float (self.passed.count) / self.total.count) * \
    268 		       (float (self.failed.count) / self.total.count)
    269 
    270 	def stddev (self):
    271 		return self.variance () ** .5
    272 
    273 	def zscore (self, population):
    274 		"""Calculate the standard score.
    275 		   Population is the Stats for population.
    276 		   Self is Stats for sample.
    277 		   Returns larger absolute value if sample is highly unlikely to be random.
    278 		   Anything outside of -3..+3 is very unlikely to be random.
    279 		   See: http://en.wikipedia.org/wiki/Standard_score"""
    280 
    281 		return (self.mean () - population.mean ()) / population.stddev ()
    282 
    283 
    284 
    285 
    286 class DiffSinks:
    287 
    288 	@staticmethod
    289 	def print_stat (f):
    290 		passed = 0
    291 		failed = 0
    292 		# XXX port to Stats, but that would really slow us down here
    293 		for key, lines in DiffHelpers.separate_test_cases (f):
    294 			if DiffHelpers.test_passed (lines):
    295 				passed += 1
    296 			else:
    297 				failed += 1
    298 		total = passed + failed
    299 		print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
    300 
    301 
    302 class Test:
    303 
    304 	def __init__ (self, lines):
    305 		self.freq = 1
    306 		self.passed = True
    307 		self.identifier = None
    308 		self.text = None
    309 		self.unicodes = None
    310 		self.glyphs = None
    311 		for l in lines:
    312 			symbol = l[0]
    313 			if symbol != ' ':
    314 				self.passed = False
    315 			i = 1
    316 			if ':' in l:
    317 				i = l.index (':')
    318 				if not self.identifier:
    319 					self.identifier = l[1:i]
    320 				i = i + 2 # Skip colon and space
    321 			j = -1
    322 			if l[j] == '\n':
    323 				j -= 1
    324 			brackets = l[i] + l[j]
    325 			l = l[i+1:-2]
    326 			if brackets == '()':
    327 				self.text = l
    328 			elif brackets == '<>':
    329 				self.unicodes = Unicode.parse (l)
    330 			elif brackets == '[]':
    331 				# XXX we don't handle failed tests here
    332 				self.glyphs = l
    333 
    334 
    335 class DiffHelpers:
    336 
    337 	@staticmethod
    338 	def separate_test_cases (f):
    339 		'''Reads lines from f, and if the lines have identifiers, ie.
    340 		   have a colon character, groups them by identifier,
    341 		   yielding lists of all lines with the same identifier.'''
    342 
    343 		def identifier (l):
    344 			if ':' in l[1:]:
    345 				return l[1:l.index (':')]
    346 			return l
    347 		return groupby (f, key=identifier)
    348 
    349 	@staticmethod
    350 	def test_passed (lines):
    351 		lines = list (lines)
    352 		# XXX This is a hack, but does the job for now.
    353 		if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
    354 		if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
    355 		if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
    356 		if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
    357 		if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
    358 		if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
    359 		return all (l[0] == ' ' for l in lines)
    360 
    361 
    362 class FilterHelpers:
    363 
    364 	@staticmethod
    365 	def filter_printer_function (filter_callback):
    366 		def printer (f):
    367 			for line in filter_callback (f):
    368 				print (line)
    369 		return printer
    370 
    371 	@staticmethod
    372 	def filter_printer_function_no_newline (filter_callback):
    373 		def printer (f):
    374 			for line in filter_callback (f):
    375 				sys.stdout.writelines ([line])
    376 		return printer
    377 
    378 
    379 class Ngram:
    380 
    381 	@staticmethod
    382 	def generator (n):
    383 
    384 		def gen (f):
    385 			l = []
    386 			for x in f:
    387 				l.append (x)
    388 				if len (l) == n:
    389 					yield tuple (l)
    390 					l[:1] = []
    391 
    392 		gen.n = n
    393 		return gen
    394 
    395 
    396 class UtilMains:
    397 
    398 	@staticmethod
    399 	def process_multiple_files (callback, mnemonic = "FILE"):
    400 
    401 		if "--help" in sys.argv:
    402 			print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
    403 			sys.exit (1)
    404 
    405 		try:
    406 			files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
    407 			for s in files:
    408 				callback (FileHelpers.open_file_or_stdin (s))
    409 		except IOError as e:
    410 			if e.errno != errno.EPIPE:
    411 				print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
    412 				sys.exit (1)
    413 
    414 	@staticmethod
    415 	def process_multiple_args (callback, mnemonic):
    416 
    417 		if len (sys.argv) == 1 or "--help" in sys.argv:
    418 			print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
    419 			sys.exit (1)
    420 
    421 		try:
    422 			for s in sys.argv[1:]:
    423 				callback (s)
    424 		except IOError as e:
    425 			if e.errno != errno.EPIPE:
    426 				print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
    427 				sys.exit (1)
    428 
    429 	@staticmethod
    430 	def filter_multiple_strings_or_stdin (callback, mnemonic, \
    431 					      separator = " ", \
    432 					      concat_separator = False):
    433 
    434 		if "--help" in sys.argv:
    435 			print ("Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
    436 			      % (sys.argv[0], mnemonic, sys.argv[0]))
    437 			sys.exit (1)
    438 
    439 		try:
    440 			if len (sys.argv) == 1:
    441 				while (1):
    442 					line = sys.stdin.readline ()
    443 					if not len (line):
    444 						break
    445 					if line[-1] == '\n':
    446 						line = line[:-1]
    447 					print (callback (line))
    448 			else:
    449 				args = sys.argv[1:]
    450 				if concat_separator != False:
    451 					args = [concat_separator.join (args)]
    452 				print (separator.join (callback (x) for x in (args)))
    453 		except IOError as e:
    454 			if e.errno != errno.EPIPE:
    455 				print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
    456 				sys.exit (1)
    457 
    458 
    459 class Unicode:
    460 
    461 	@staticmethod
    462 	def decode (s):
    463 		return u','.join ("U+%04X" % cp for cp in codepoints (tounicode (s, 'utf-8')))
    464 
    465 	@staticmethod
    466 	def parse (s):
    467 		s = re.sub (r"0[xX]", " ", s)
    468 		s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n\t]", " ", s)
    469 		return [int (x, 16) for x in s.split ()]
    470 
    471 	@staticmethod
    472 	def encode (s):
    473 		s = u''.join (unichr (x) for x in Unicode.parse (s))
    474 		if sys.version_info[0] == 2: s = s.encode ('utf-8')
    475 		return s
    476 
    477 	shorthands = {
    478 		"ZERO WIDTH NON-JOINER": "ZWNJ",
    479 		"ZERO WIDTH JOINER": "ZWJ",
    480 		"NARROW NO-BREAK SPACE": "NNBSP",
    481 		"COMBINING GRAPHEME JOINER": "CGJ",
    482 		"LEFT-TO-RIGHT MARK": "LRM",
    483 		"RIGHT-TO-LEFT MARK": "RLM",
    484 		"LEFT-TO-RIGHT EMBEDDING": "LRE",
    485 		"RIGHT-TO-LEFT EMBEDDING": "RLE",
    486 		"POP DIRECTIONAL FORMATTING": "PDF",
    487 		"LEFT-TO-RIGHT OVERRIDE": "LRO",
    488 		"RIGHT-TO-LEFT OVERRIDE": "RLO",
    489 	}
    490 
    491 	@staticmethod
    492 	def pretty_name (u):
    493 		try:
    494 			s = unicodedata.name (u)
    495 		except ValueError:
    496 			return "XXX"
    497 		s = re.sub (".* LETTER ", "", s)
    498 		s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
    499 		s = re.sub (".* SIGN ", "", s)
    500 		s = re.sub (".* COMBINING ", "", s)
    501 		if re.match (".* VIRAMA", s):
    502 			s = "HALANT"
    503 		if s in Unicode.shorthands:
    504 			s = Unicode.shorthands[s]
    505 		return s
    506 
    507 	@staticmethod
    508 	def pretty_names (s):
    509 		s = re.sub (r"[<+>\\uU]", " ", s)
    510 		s = re.sub (r"0[xX]", " ", s)
    511 		s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
    512 		return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
    513 
    514 
    515 class FileHelpers:
    516 
    517 	@staticmethod
    518 	def open_file_or_stdin (f):
    519 		if f == '-':
    520 			return sys.stdin
    521 		return open (f)
    522 
    523 
    524 class Manifest:
    525 
    526 	@staticmethod
    527 	def read (s, strict = True):
    528 
    529 		if not os.path.exists (s):
    530 			if strict:
    531 				print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
    532 				sys.exit (1)
    533 			return
    534 
    535 		s = os.path.normpath (s)
    536 
    537 		if os.path.isdir (s):
    538 
    539 			try:
    540 				m = open (os.path.join (s, "MANIFEST"))
    541 				items = [x.strip () for x in m.readlines ()]
    542 				for f in items:
    543 					for p in Manifest.read (os.path.join (s, f)):
    544 						yield p
    545 			except IOError:
    546 				if strict:
    547 					print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
    548 					sys.exit (1)
    549 				return
    550 		else:
    551 			yield s
    552 
    553 	@staticmethod
    554 	def update_recursive (s):
    555 
    556 		for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
    557 
    558 			for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
    559 				if f in dirnames:
    560 					dirnames.remove (f)
    561 				if f in filenames:
    562 					filenames.remove (f)
    563 			dirnames.sort ()
    564 			filenames.sort ()
    565 			ms = os.path.join (dirpath, "MANIFEST")
    566 			print ("  GEN    %s" % ms)
    567 			m = open (ms, "w")
    568 			for f in filenames:
    569 				print (f, file=m)
    570 			for f in dirnames:
    571 				print (f, file=m)
    572 			for f in dirnames:
    573 				Manifest.update_recursive (os.path.join (dirpath, f))
    574 
    575 if __name__ == '__main__':
    576 	pass
    577