Home | History | Annotate | Download | only in shaping
      1 #!/usr/bin/env python
      2 
      3 from __future__ import print_function
      4 import sys, os, re, difflib, unicodedata, errno, cgi
      5 from itertools import *
      6 
      7 diff_symbols = "-+=*&^%$#@!~/"
      8 diff_colors = ['red', 'green', 'blue']
      9 
     10 if sys.version_info[0] >= 3:
     11 	unichr = chr
     12 
     13 class ColorFormatter:
     14 
     15 	class Null:
     16 		@staticmethod
     17 		def start_color (c): return ''
     18 		@staticmethod
     19 		def end_color (): return ''
     20 		@staticmethod
     21 		def escape (s): return s
     22 		@staticmethod
     23 		def newline (): return '\n'
     24 
     25 	class ANSI:
     26 		@staticmethod
     27 		def start_color (c):
     28 			return {
     29 				'red': '\033[41;37;1m',
     30 				'green': '\033[42;37;1m',
     31 				'blue': '\033[44;37;1m',
     32 			}[c]
     33 		@staticmethod
     34 		def end_color ():
     35 			return '\033[m'
     36 		@staticmethod
     37 		def escape (s): return s
     38 		@staticmethod
     39 		def newline (): return '\n'
     40 
     41 	class HTML:
     42 		@staticmethod
     43 		def start_color (c):
     44 			return '<span style="background:%s">' % c
     45 		@staticmethod
     46 		def end_color ():
     47 			return '</span>'
     48 		@staticmethod
     49 		def escape (s): return cgi.escape (s)
     50 		@staticmethod
     51 		def newline (): return '<br/>\n'
     52 
     53 	@staticmethod
     54 	def Auto (argv = [], out = sys.stdout):
     55 		format = ColorFormatter.ANSI
     56 		if "--format" in argv:
     57 			argv.remove ("--format")
     58 			format = ColorFormatter.ANSI
     59 		if "--format=ansi" in argv:
     60 			argv.remove ("--format=ansi")
     61 			format = ColorFormatter.ANSI
     62 		if "--format=html" in argv:
     63 			argv.remove ("--format=html")
     64 			format = ColorFormatter.HTML
     65 		if "--no-format" in argv:
     66 			argv.remove ("--no-format")
     67 			format = ColorFormatter.Null
     68 		return format
     69 
     70 
     71 class DiffColorizer:
     72 
     73 	diff_regex = re.compile ('([a-za-z0-9_]*)([^a-za-z0-9_]?)')
     74 
     75 	def __init__ (self, formatter, colors=diff_colors, symbols=diff_symbols):
     76 		self.formatter = formatter
     77 		self.colors = colors
     78 		self.symbols = symbols
     79 
     80 	def colorize_lines (self, lines):
     81 		lines = (l if l else '' for l in lines)
     82 		ss = [self.diff_regex.sub (r'\1\n\2\n', l).splitlines (True) for l in lines]
     83 		oo = ["",""]
     84 		st = [False, False]
     85 		for l in difflib.Differ().compare (*ss):
     86 			if l[0] == '?':
     87 				continue
     88 			if l[0] == ' ':
     89 				for i in range(2):
     90 					if st[i]:
     91 						oo[i] += self.formatter.end_color ()
     92 						st[i] = False
     93 				oo = [o + self.formatter.escape (l[2:]) for o in oo]
     94 				continue
     95 			if l[0] in self.symbols:
     96 				i = self.symbols.index (l[0])
     97 				if not st[i]:
     98 					oo[i] += self.formatter.start_color (self.colors[i])
     99 					st[i] = True
    100 				oo[i] += self.formatter.escape (l[2:])
    101 				continue
    102 		for i in range(2):
    103 			if st[i]:
    104 				oo[i] += self.formatter.end_color ()
    105 				st[i] = False
    106 		oo = [o.replace ('\n', '') for o in oo]
    107 		return [s1+s2+self.formatter.newline () for (s1,s2) in zip (self.symbols, oo) if s2]
    108 
    109 	def colorize_diff (self, f):
    110 		lines = [None, None]
    111 		for l in f:
    112 			if l[0] not in self.symbols:
    113 				yield self.formatter.escape (l).replace ('\n', self.formatter.newline ())
    114 				continue
    115 			i = self.symbols.index (l[0])
    116 			if lines[i]:
    117 				# Flush
    118 				for line in self.colorize_lines (lines):
    119 					yield line
    120 				lines = [None, None]
    121 			lines[i] = l[1:]
    122 			if (all (lines)):
    123 				# Flush
    124 				for line in self.colorize_lines (lines):
    125 					yield line
    126 				lines = [None, None]
    127 		if (any (lines)):
    128 			# Flush
    129 			for line in self.colorize_lines (lines):
    130 				yield line
    131 
    132 
    133 class ZipDiffer:
    134 
    135 	@staticmethod
    136 	def diff_files (files, symbols=diff_symbols):
    137 		files = tuple (files) # in case it's a generator, copy it
    138 		try:
    139 			for lines in izip_longest (*files):
    140 				if all (lines[0] == line for line in lines[1:]):
    141 					sys.stdout.writelines ([" ", lines[0]])
    142 					continue
    143 
    144 				for i, l in enumerate (lines):
    145 					if l:
    146 						sys.stdout.writelines ([symbols[i], l])
    147 		except IOError as e:
    148 			if e.errno != errno.EPIPE:
    149 				print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
    150 				sys.exit (1)
    151 
    152 
    153 class DiffFilters:
    154 
    155 	@staticmethod
    156 	def filter_failures (f):
    157 		for key, lines in DiffHelpers.separate_test_cases (f):
    158 			lines = list (lines)
    159 			if not DiffHelpers.test_passed (lines):
    160 				for l in lines: yield l
    161 
    162 class Stat:
    163 
    164 	def __init__ (self):
    165 		self.count = 0
    166 		self.freq = 0
    167 
    168 	def add (self, test):
    169 		self.count += 1
    170 		self.freq += test.freq
    171 
    172 class Stats:
    173 
    174 	def __init__ (self):
    175 		self.passed = Stat ()
    176 		self.failed = Stat ()
    177 		self.total  = Stat ()
    178 
    179 	def add (self, test):
    180 		self.total.add (test)
    181 		if test.passed:
    182 			self.passed.add (test)
    183 		else:
    184 			self.failed.add (test)
    185 
    186 	def mean (self):
    187 		return float (self.passed.count) / self.total.count
    188 
    189 	def variance (self):
    190 		return (float (self.passed.count) / self.total.count) * \
    191 		       (float (self.failed.count) / self.total.count)
    192 
    193 	def stddev (self):
    194 		return self.variance () ** .5
    195 
    196 	def zscore (self, population):
    197 		"""Calculate the standard score.
    198 		   Population is the Stats for population.
    199 		   Self is Stats for sample.
    200 		   Returns larger absolute value if sample is highly unlikely to be random.
    201 		   Anything outside of -3..+3 is very unlikely to be random.
    202 		   See: http://en.wikipedia.org/wiki/Standard_score"""
    203 
    204 		return (self.mean () - population.mean ()) / population.stddev ()
    205 
    206 
    207 
    208 
    209 class DiffSinks:
    210 
    211 	@staticmethod
    212 	def print_stat (f):
    213 		passed = 0
    214 		failed = 0
    215 		# XXX port to Stats, but that would really slow us down here
    216 		for key, lines in DiffHelpers.separate_test_cases (f):
    217 			if DiffHelpers.test_passed (lines):
    218 				passed += 1
    219 			else:
    220 				failed += 1
    221 		total = passed + failed
    222 		print ("%d out of %d tests passed.  %d failed (%g%%)" % (passed, total, failed, 100. * failed / total))
    223 
    224 	@staticmethod
    225 	def print_ngrams (f, ns=(1,2,3)):
    226 		gens = tuple (Ngram.generator (n) for n in ns)
    227 		allstats = Stats ()
    228 		allgrams = {}
    229 		for key, lines in DiffHelpers.separate_test_cases (f):
    230 			test = Test (lines)
    231 			allstats.add (test)
    232 
    233 			for gen in gens:
    234 				for ngram in gen (test.unicodes):
    235 					if ngram not in allgrams:
    236 						allgrams[ngram] = Stats ()
    237 					allgrams[ngram].add (test)
    238 
    239 		importantgrams = {}
    240 		for ngram, stats in allgrams.iteritems ():
    241 			if stats.failed.count >= 30: # for statistical reasons
    242 				importantgrams[ngram] = stats
    243 		allgrams = importantgrams
    244 		del importantgrams
    245 
    246 		for ngram, stats in allgrams.iteritems ():
    247 			print ("zscore: %9f failed: %6d passed: %6d ngram: <%s>" % (stats.zscore (allstats), stats.failed.count, stats.passed.count, ','.join ("U+%04X" % u for u in ngram)))
    248 
    249 
    250 
    251 class Test:
    252 
    253 	def __init__ (self, lines):
    254 		self.freq = 1
    255 		self.passed = True
    256 		self.identifier = None
    257 		self.text = None
    258 		self.unicodes = None
    259 		self.glyphs = None
    260 		for l in lines:
    261 			symbol = l[0]
    262 			if symbol != ' ':
    263 				self.passed = False
    264 			i = 1
    265 			if ':' in l:
    266 				i = l.index (':')
    267 				if not self.identifier:
    268 					self.identifier = l[1:i]
    269 				i = i + 2 # Skip colon and space
    270 			j = -1
    271 			if l[j] == '\n':
    272 				j -= 1
    273 			brackets = l[i] + l[j]
    274 			l = l[i+1:-2]
    275 			if brackets == '()':
    276 				self.text = l
    277 			elif brackets == '<>':
    278 				self.unicodes = Unicode.parse (l)
    279 			elif brackets == '[]':
    280 				# XXX we don't handle failed tests here
    281 				self.glyphs = l
    282 
    283 
    284 class DiffHelpers:
    285 
    286 	@staticmethod
    287 	def separate_test_cases (f):
    288 		'''Reads lines from f, and if the lines have identifiers, ie.
    289 		   have a colon character, groups them by identifier,
    290 		   yielding lists of all lines with the same identifier.'''
    291 
    292 		def identifier (l):
    293 			if ':' in l[1:]:
    294 				return l[1:l.index (':')]
    295 			return l
    296 		return groupby (f, key=identifier)
    297 
    298 	@staticmethod
    299 	def test_passed (lines):
    300 		lines = list (lines)
    301 		# XXX This is a hack, but does the job for now.
    302 		if any (l.find("space+0|space+0") >= 0 for l in lines if l[0] == '+'): return True
    303 		if any (l.find("uni25CC") >= 0 for l in lines if l[0] == '+'): return True
    304 		if any (l.find("dottedcircle") >= 0 for l in lines if l[0] == '+'): return True
    305 		if any (l.find("glyph0") >= 0 for l in lines if l[0] == '+'): return True
    306 		if any (l.find("gid0") >= 0 for l in lines if l[0] == '+'): return True
    307 		if any (l.find("notdef") >= 0 for l in lines if l[0] == '+'): return True
    308 		return all (l[0] == ' ' for l in lines)
    309 
    310 
    311 class FilterHelpers:
    312 
    313 	@staticmethod
    314 	def filter_printer_function (filter_callback):
    315 		def printer (f):
    316 			for line in filter_callback (f):
    317 				print (line)
    318 		return printer
    319 
    320 	@staticmethod
    321 	def filter_printer_function_no_newline (filter_callback):
    322 		def printer (f):
    323 			for line in filter_callback (f):
    324 				sys.stdout.writelines ([line])
    325 		return printer
    326 
    327 
    328 class Ngram:
    329 
    330 	@staticmethod
    331 	def generator (n):
    332 
    333 		def gen (f):
    334 			l = []
    335 			for x in f:
    336 				l.append (x)
    337 				if len (l) == n:
    338 					yield tuple (l)
    339 					l[:1] = []
    340 
    341 		gen.n = n
    342 		return gen
    343 
    344 
    345 class UtilMains:
    346 
    347 	@staticmethod
    348 	def process_multiple_files (callback, mnemonic = "FILE"):
    349 
    350 		if "--help" in sys.argv:
    351 			print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
    352 			sys.exit (1)
    353 
    354 		try:
    355 			files = sys.argv[1:] if len (sys.argv) > 1 else ['-']
    356 			for s in files:
    357 				callback (FileHelpers.open_file_or_stdin (s))
    358 		except IOError as e:
    359 			if e.errno != errno.EPIPE:
    360 				print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
    361 				sys.exit (1)
    362 
    363 	@staticmethod
    364 	def process_multiple_args (callback, mnemonic):
    365 
    366 		if len (sys.argv) == 1 or "--help" in sys.argv:
    367 			print ("Usage: %s %s..." % (sys.argv[0], mnemonic))
    368 			sys.exit (1)
    369 
    370 		try:
    371 			for s in sys.argv[1:]:
    372 				callback (s)
    373 		except IOError as e:
    374 			if e.errno != errno.EPIPE:
    375 				print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
    376 				sys.exit (1)
    377 
    378 	@staticmethod
    379 	def filter_multiple_strings_or_stdin (callback, mnemonic, \
    380 					      separator = " ", \
    381 					      concat_separator = False):
    382 
    383 		if "--help" in sys.argv:
    384 			print ("Usage:\n  %s %s...\nor:\n  %s\n\nWhen called with no arguments, input is read from standard input." \
    385 			      % (sys.argv[0], mnemonic, sys.argv[0]))
    386 			sys.exit (1)
    387 
    388 		try:
    389 			if len (sys.argv) == 1:
    390 				while (1):
    391 					line = sys.stdin.readline ()
    392 					if not len (line):
    393 						break
    394 					if line[-1] == '\n':
    395 						line = line[:-1]
    396 					print (callback (line))
    397 			else:
    398 				args = sys.argv[1:]
    399 				if concat_separator != False:
    400 					args = [concat_separator.join (args)]
    401 				print (separator.join (callback (x) for x in (args)))
    402 		except IOError as e:
    403 			if e.errno != errno.EPIPE:
    404 				print ("%s: %s: %s" % (sys.argv[0], e.filename, e.strerror), file=sys.stderr)
    405 				sys.exit (1)
    406 
    407 
    408 class Unicode:
    409 
    410 	@staticmethod
    411 	def decode (s):
    412 		return u','.join ("U+%04X" % ord (u) for u in unicode (s, 'utf-8')).encode ('utf-8')
    413 
    414 	@staticmethod
    415 	def parse (s):
    416 		s = re.sub (r"0[xX]", " ", s)
    417 		s = re.sub (r"[<+>{},;&#\\xXuUnNiI\n	]", " ", s)
    418 		return [int (x, 16) for x in s.split ()]
    419 
    420 	@staticmethod
    421 	def encode (s):
    422 		s = u''.join (unichr (x) for x in Unicode.parse (s))
    423 		if sys.version_info[0] == 2: s = s.encode ('utf-8')
    424 		return s
    425 
    426 	shorthands = {
    427 		"ZERO WIDTH NON-JOINER": "ZWNJ",
    428 		"ZERO WIDTH JOINER": "ZWJ",
    429 		"NARROW NO-BREAK SPACE": "NNBSP",
    430 		"COMBINING GRAPHEME JOINER": "CGJ",
    431 		"LEFT-TO-RIGHT MARK": "LRM",
    432 		"RIGHT-TO-LEFT MARK": "RLM",
    433 		"LEFT-TO-RIGHT EMBEDDING": "LRE",
    434 		"RIGHT-TO-LEFT EMBEDDING": "RLE",
    435 		"POP DIRECTIONAL FORMATTING": "PDF",
    436 		"LEFT-TO-RIGHT OVERRIDE": "LRO",
    437 		"RIGHT-TO-LEFT OVERRIDE": "RLO",
    438 	}
    439 
    440 	@staticmethod
    441 	def pretty_name (u):
    442 		try:
    443 			s = unicodedata.name (u)
    444 		except ValueError:
    445 			return "XXX"
    446 		s = re.sub (".* LETTER ", "", s)
    447 		s = re.sub (".* VOWEL SIGN (.*)", r"\1-MATRA", s)
    448 		s = re.sub (".* SIGN ", "", s)
    449 		s = re.sub (".* COMBINING ", "", s)
    450 		if re.match (".* VIRAMA", s):
    451 			s = "HALANT"
    452 		if s in Unicode.shorthands:
    453 			s = Unicode.shorthands[s]
    454 		return s
    455 
    456 	@staticmethod
    457 	def pretty_names (s):
    458 		s = re.sub (r"[<+>\\uU]", " ", s)
    459 		s = re.sub (r"0[xX]", " ", s)
    460 		s = [unichr (int (x, 16)) for x in re.split ('[, \n]', s) if len (x)]
    461 		return u' + '.join (Unicode.pretty_name (x) for x in s).encode ('utf-8')
    462 
    463 
    464 class FileHelpers:
    465 
    466 	@staticmethod
    467 	def open_file_or_stdin (f):
    468 		if f == '-':
    469 			return sys.stdin
    470 		return file (f)
    471 
    472 
    473 class Manifest:
    474 
    475 	@staticmethod
    476 	def read (s, strict = True):
    477 
    478 		if not os.path.exists (s):
    479 			if strict:
    480 				print ("%s: %s does not exist" % (sys.argv[0], s), file=sys.stderr)
    481 				sys.exit (1)
    482 			return
    483 
    484 		s = os.path.normpath (s)
    485 
    486 		if os.path.isdir (s):
    487 
    488 			try:
    489 				m = file (os.path.join (s, "MANIFEST"))
    490 				items = [x.strip () for x in m.readlines ()]
    491 				for f in items:
    492 					for p in Manifest.read (os.path.join (s, f)):
    493 						yield p
    494 			except IOError:
    495 				if strict:
    496 					print ("%s: %s does not exist" % (sys.argv[0], os.path.join (s, "MANIFEST")), file=sys.stderr)
    497 					sys.exit (1)
    498 				return
    499 		else:
    500 			yield s
    501 
    502 	@staticmethod
    503 	def update_recursive (s):
    504 
    505 		for dirpath, dirnames, filenames in os.walk (s, followlinks=True):
    506 
    507 			for f in ["MANIFEST", "README", "LICENSE", "COPYING", "AUTHORS", "SOURCES", "ChangeLog"]:
    508 				if f in dirnames:
    509 					dirnames.remove (f)
    510 				if f in filenames:
    511 					filenames.remove (f)
    512 			dirnames.sort ()
    513 			filenames.sort ()
    514 			ms = os.path.join (dirpath, "MANIFEST")
    515 			print ("  GEN    %s" % ms)
    516 			m = open (ms, "w")
    517 			for f in filenames:
    518 				print (f, file=m)
    519 			for f in dirnames:
    520 				print (f, file=m)
    521 			for f in dirnames:
    522 				Manifest.update_recursive (os.path.join (dirpath, f))
    523 
    524 if __name__ == '__main__':
    525 	pass
    526