Home | History | Annotate | Download | only in tables
      1 from __future__ import print_function, division, absolute_import
      2 from fontTools.misc.py23 import *
      3 from fontTools.misc.textTools import safeEval, readHex
      4 from fontTools.ttLib import getSearchRange
      5 from fontTools.unicode import Unicode
      6 from . import DefaultTable
      7 import sys
      8 import struct
      9 import array
     10 import operator
     11 
     12 
     13 class table__c_m_a_p(DefaultTable.DefaultTable):
     14 	
     15 	def getcmap(self, platformID, platEncID):
     16 		for subtable in self.tables:
     17 			if (subtable.platformID == platformID and 
     18 					subtable.platEncID == platEncID):
     19 				return subtable
     20 		return None # not found
     21 	
     22 	def decompile(self, data, ttFont):
     23 		tableVersion, numSubTables = struct.unpack(">HH", data[:4])
     24 		self.tableVersion = int(tableVersion)
     25 		self.tables = tables = []
     26 		seenOffsets = {}
     27 		for i in range(numSubTables):
     28 			platformID, platEncID, offset = struct.unpack(
     29 					">HHl", data[4+i*8:4+(i+1)*8])
     30 			platformID, platEncID = int(platformID), int(platEncID)
     31 			format, length = struct.unpack(">HH", data[offset:offset+4])
     32 			if format in [8,10,12,13]:
     33 				format, reserved, length = struct.unpack(">HHL", data[offset:offset+8])
     34 			elif format in [14]:
     35 				format, length = struct.unpack(">HL", data[offset:offset+6])
     36 				
     37 			if not length:
     38 				print("Error: cmap subtable is reported as having zero length: platformID %s, platEncID %s,  format %s offset %s. Skipping table." % (platformID, platEncID,format, offset))
     39 				continue
     40 			if format not in cmap_classes:
     41 				table = cmap_format_unknown(format)
     42 			else:
     43 				table = cmap_classes[format](format)
     44 			table.platformID = platformID
     45 			table.platEncID = platEncID
     46 			# Note that by default we decompile only the subtable header info;
     47 			# any other data gets decompiled only when an attribute of the
     48 			# subtable is referenced.
     49 			table.decompileHeader(data[offset:offset+int(length)], ttFont)
     50 			if offset in seenOffsets:
     51 				table.cmap = tables[seenOffsets[offset]].cmap
     52 			else:
     53 				seenOffsets[offset] = i
     54 			tables.append(table)
     55 	
     56 	def compile(self, ttFont):
     57 		self.tables.sort()    # sort according to the spec; see CmapSubtable.__lt__()
     58 		numSubTables = len(self.tables)
     59 		totalOffset = 4 + 8 * numSubTables
     60 		data = struct.pack(">HH", self.tableVersion, numSubTables)
     61 		tableData = b""
     62 		seen = {}  # Some tables are the same object reference. Don't compile them twice.
     63 		done = {}  # Some tables are different objects, but compile to the same data chunk
     64 		for table in self.tables:
     65 			try:
     66 				offset = seen[id(table.cmap)]
     67 			except KeyError:
     68 				chunk = table.compile(ttFont)
     69 				if chunk in done:
     70 					offset = done[chunk]
     71 				else:
     72 					offset = seen[id(table.cmap)] = done[chunk] = totalOffset + len(tableData)
     73 					tableData = tableData + chunk
     74 			data = data + struct.pack(">HHl", table.platformID, table.platEncID, offset)
     75 		return data + tableData
     76 	
     77 	def toXML(self, writer, ttFont):
     78 		writer.simpletag("tableVersion", version=self.tableVersion)
     79 		writer.newline()
     80 		for table in self.tables:
     81 			table.toXML(writer, ttFont)
     82 	
     83 	def fromXML(self, name, attrs, content, ttFont):
     84 		if name == "tableVersion":
     85 			self.tableVersion = safeEval(attrs["version"])
     86 			return
     87 		if name[:12] != "cmap_format_":
     88 			return
     89 		if not hasattr(self, "tables"):
     90 			self.tables = []
     91 		format = safeEval(name[12:])
     92 		if format not in cmap_classes:
     93 			table = cmap_format_unknown(format)
     94 		else:
     95 			table = cmap_classes[format](format)
     96 		table.platformID = safeEval(attrs["platformID"])
     97 		table.platEncID = safeEval(attrs["platEncID"])
     98 		table.fromXML(name, attrs, content, ttFont)
     99 		self.tables.append(table)
    100 
    101 
    102 class CmapSubtable(object):
    103 	
    104 	def __init__(self, format):
    105 		self.format = format
    106 		self.data = None
    107 		self.ttFont = None
    108 
    109 	def __getattr__(self, attr):
    110 		# allow lazy decompilation of subtables.
    111 		if attr[:2] == '__': # don't handle requests for member functions like '__lt__'
    112 			raise AttributeError(attr)
    113 		if self.data is None:
    114 			raise AttributeError(attr)
    115 		self.decompile(None, None) # use saved data.
    116 		self.data = None # Once this table has been decompiled, make sure we don't
    117 						# just return the original data. Also avoids recursion when
    118 						# called with an attribute that the cmap subtable doesn't have.
    119 		return getattr(self, attr)
    120 	
    121 	def decompileHeader(self, data, ttFont):
    122 		format, length, language = struct.unpack(">HHH", data[:6])
    123 		assert len(data) == length, "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
    124 		self.format = int(format)
    125 		self.length = int(length)
    126 		self.language = int(language)
    127 		self.data = data[6:]
    128 		self.ttFont = ttFont
    129 
    130 	def toXML(self, writer, ttFont):
    131 		writer.begintag(self.__class__.__name__, [
    132 				("platformID", self.platformID),
    133 				("platEncID", self.platEncID),
    134 				("language", self.language),
    135 				])
    136 		writer.newline()
    137 		codes = sorted(self.cmap.items())
    138 		self._writeCodes(codes, writer)
    139 		writer.endtag(self.__class__.__name__)
    140 		writer.newline()
    141 
    142 	def isUnicode(self):
    143 		return (self.platformID == 0 or
    144 			(self.platformID == 3 and self.platEncID in [1, 10]))
    145 
    146 	def isSymbol(self):
    147 		return self.platformID == 3 and self.platEncID == 0
    148 
    149 	def _writeCodes(self, codes, writer):
    150 		isUnicode = self.isUnicode()
    151 		for code, name in codes:
    152 			writer.simpletag("map", code=hex(code), name=name)
    153 			if isUnicode:
    154 				writer.comment(Unicode[code])
    155 			writer.newline()
    156 	
    157 	def __lt__(self, other):
    158 		if not isinstance(other, CmapSubtable):
    159 			return NotImplemented
    160 
    161 		# implemented so that list.sort() sorts according to the spec.
    162 		selfTuple = (
    163 			getattr(self, "platformID", None),
    164 			getattr(self, "platEncID", None),
    165 			getattr(self, "language", None),
    166 			self.__dict__)
    167 		otherTuple = (
    168 			getattr(other, "platformID", None),
    169 			getattr(other, "platEncID", None),
    170 			getattr(other, "language", None),
    171 			other.__dict__)
    172 		return selfTuple < otherTuple
    173 
    174 
    175 class cmap_format_0(CmapSubtable):
    176 	
    177 	def decompile(self, data, ttFont):
    178 		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
    179 		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
    180 		if data is not None and ttFont is not None:
    181 			self.decompileHeader(data[offset:offset+int(length)], ttFont)
    182 		else:
    183 			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
    184 		data = self.data # decompileHeader assigns the data after the header to self.data
    185 		assert 262 == self.length, "Format 0 cmap subtable not 262 bytes"
    186 		glyphIdArray = array.array("B")
    187 		glyphIdArray.fromstring(self.data)
    188 		self.cmap = cmap = {}
    189 		lenArray = len(glyphIdArray)
    190 		charCodes = list(range(lenArray))
    191 		names = map(self.ttFont.getGlyphName, glyphIdArray)
    192 		list(map(operator.setitem, [cmap]*lenArray, charCodes, names))
    193 
    194 	
    195 	def compile(self, ttFont):
    196 		if self.data:
    197 			return struct.pack(">HHH", 0, 262, self.language) + self.data
    198 
    199 		charCodeList = sorted(self.cmap.items())
    200 		charCodes = [entry[0] for entry in charCodeList]
    201 		valueList = [entry[1] for entry in charCodeList]
    202 		assert charCodes == list(range(256))
    203 		valueList = map(ttFont.getGlyphID, valueList)
    204 
    205 		glyphIdArray = array.array("B", valueList)
    206 		data = struct.pack(">HHH", 0, 262, self.language) + glyphIdArray.tostring()
    207 		assert len(data) == 262
    208 		return data
    209 	
    210 	def fromXML(self, name, attrs, content, ttFont):
    211 		self.language = safeEval(attrs["language"])
    212 		if not hasattr(self, "cmap"):
    213 			self.cmap = {}
    214 		cmap = self.cmap
    215 		for element in content:
    216 			if not isinstance(element, tuple):
    217 				continue
    218 			name, attrs, content = element
    219 			if name != "map":
    220 				continue
    221 			cmap[safeEval(attrs["code"])] = attrs["name"]
    222 
    223 
    224 subHeaderFormat = ">HHhH"
    225 class SubHeader(object):
    226 	def __init__(self):
    227 		self.firstCode = None
    228 		self.entryCount = None
    229 		self.idDelta = None
    230 		self.idRangeOffset = None
    231 		self.glyphIndexArray = []
    232 		
    233 class cmap_format_2(CmapSubtable):
    234 	
    235 	def setIDDelta(self, subHeader):
    236 		subHeader.idDelta = 0
    237 		# find the minGI which is not zero.
    238 		minGI = subHeader.glyphIndexArray[0]
    239 		for gid in subHeader.glyphIndexArray:
    240 			if (gid != 0) and (gid < minGI):
    241 				minGI = gid
    242 		# The lowest gid in glyphIndexArray, after subtracting idDelta, must be 1.
    243 		# idDelta is a short, and must be between -32K and 32K. minGI can be between 1 and 64K.
    244 		# We would like to pick an idDelta such that the first glyphArray GID is 1, 
    245 		# so that we are more likely to be able to combine glypharray GID subranges.
    246 		# This means that we have a problem when minGI is > 32K
    247 		# Since the final gi is reconstructed from the glyphArray GID by:
    248 		#    (short)finalGID = (gid +  idDelta) % 0x10000),
    249 		# we can get from a glypharray GID of 1 to a final GID of 65K by subtracting 2, and casting the
    250 		# negative number to an unsigned short. 
    251 
    252 		if  (minGI > 1):
    253 			if  minGI > 0x7FFF:
    254 				subHeader.idDelta = -(0x10000 - minGI) -1
    255 			else:
    256 				subHeader.idDelta =  minGI -1
    257 			idDelta = subHeader.idDelta
    258 			for i in range(subHeader.entryCount):
    259 				gid = subHeader.glyphIndexArray[i]
    260 				if gid > 0: 
    261 					subHeader.glyphIndexArray[i] = gid - idDelta 
    262 
    263 
    264 	def decompile(self, data, ttFont):
    265 		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
    266 		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
    267 		if data is not None and ttFont is not None:
    268 			self.decompileHeader(data[offset:offset+int(length)], ttFont)
    269 		else:
    270 			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
    271 
    272 		data = self.data # decompileHeader assigns the data after the header to self.data
    273 		subHeaderKeys = []
    274 		maxSubHeaderindex = 0
    275 		# get the key array, and determine the number of subHeaders.
    276 		allKeys = array.array("H")
    277 		allKeys.fromstring(data[:512])
    278 		data = data[512:]
    279 		if sys.byteorder != "big":
    280 			allKeys.byteswap()
    281 		subHeaderKeys = [ key//8 for key in allKeys]
    282 		maxSubHeaderindex = max(subHeaderKeys)
    283 	
    284 		#Load subHeaders
    285 		subHeaderList = []
    286 		pos = 0
    287 		for i in range(maxSubHeaderindex + 1):
    288 			subHeader = SubHeader()
    289 			(subHeader.firstCode, subHeader.entryCount, subHeader.idDelta, \
    290 				subHeader.idRangeOffset) = struct.unpack(subHeaderFormat, data[pos:pos + 8])
    291 			pos += 8
    292 			giDataPos = pos + subHeader.idRangeOffset-2
    293 			giList = array.array("H")
    294 			giList.fromstring(data[giDataPos:giDataPos + subHeader.entryCount*2])
    295 			if sys.byteorder != "big":
    296 				giList.byteswap()
    297 			subHeader.glyphIndexArray = giList
    298 			subHeaderList.append(subHeader)
    299 		# How this gets processed. 
    300 		# Charcodes may be one or two bytes.
    301 		# The first byte of a charcode is mapped through the  subHeaderKeys, to select
    302 		# a subHeader. For any subheader but 0, the next byte is then mapped through the
    303 		# selected subheader. If subheader Index 0 is selected, then the byte itself is 
    304 		# mapped through the subheader, and there is no second byte.
    305 		# Then assume that the subsequent byte is the first byte of the next charcode,and repeat.
    306 		# 
    307 		# Each subheader references a range in the glyphIndexArray whose length is entryCount.
    308 		# The range in glyphIndexArray referenced by a sunheader may overlap with the range in glyphIndexArray
    309 		# referenced by another subheader.
    310 		# The only subheader that will be referenced by more than one first-byte value is the subheader
    311 		# that maps the entire range of glyphID values to glyphIndex 0, e.g notdef:
    312 		#	 {firstChar 0, EntryCount 0,idDelta 0,idRangeOffset xx}
    313 		# A byte being mapped though a subheader is treated as in index into a mapping of array index to font glyphIndex.
    314 		# A subheader specifies a subrange within (0...256) by the
    315 		# firstChar and EntryCount values. If the byte value is outside the subrange, then the glyphIndex is zero
    316 		# (e.g. glyph not in font).
    317 		# If the byte index is in the subrange, then an offset index is calculated as (byteIndex - firstChar).
    318 		# The index to glyphIndex mapping is a subrange of the glyphIndexArray. You find the start of the subrange by 
    319 		# counting idRangeOffset bytes from the idRangeOffset word. The first value in this subrange is the
    320 		# glyphIndex for the index firstChar. The offset index should then be used in this array to get the glyphIndex.
    321 		# Example for Logocut-Medium
    322 		# first byte of charcode = 129; selects subheader 1.
    323 		# subheader 1 = {firstChar 64, EntryCount 108,idDelta 42,idRangeOffset 0252}
    324 		# second byte of charCode = 66
    325 		# the index offset = 66-64 = 2.
    326 		# The subrange of the glyphIndexArray starting at 0x0252 bytes from the idRangeOffset word is:
    327 		# [glyphIndexArray index], [subrange array index] = glyphIndex
    328 		# [256], [0]=1 	from charcode [129, 64]
    329 		# [257], [1]=2  	from charcode [129, 65]
    330 		# [258], [2]=3  	from charcode [129, 66]
    331 		# [259], [3]=4  	from charcode [129, 67]
    332 		# So, the glyphIndex = 3 from the array. Then if idDelta is not zero and the glyph ID is not zero, 
    333 		# add it to the glyphID to get the final glyphIndex
    334 		# value. In this case the final glyph index = 3+ 42 -> 45 for the final glyphIndex. Whew!
    335 		
    336 		self.data = b""
    337 		self.cmap = cmap = {}
    338 		notdefGI = 0
    339 		for firstByte in range(256):
    340 			subHeadindex = subHeaderKeys[firstByte]
    341 			subHeader = subHeaderList[subHeadindex]
    342 			if subHeadindex == 0:
    343 				if (firstByte < subHeader.firstCode) or (firstByte >= subHeader.firstCode + subHeader.entryCount):
    344 					continue # gi is notdef.
    345 				else:
    346 					charCode = firstByte
    347 					offsetIndex = firstByte - subHeader.firstCode
    348 					gi = subHeader.glyphIndexArray[offsetIndex]
    349 					if gi != 0:
    350 						gi = (gi + subHeader.idDelta) % 0x10000
    351 					else:
    352 						continue # gi is notdef.
    353 				cmap[charCode] = gi
    354 			else:
    355 				if subHeader.entryCount:
    356 					charCodeOffset = firstByte * 256 + subHeader.firstCode
    357 					for offsetIndex in range(subHeader.entryCount):
    358 						charCode = charCodeOffset + offsetIndex
    359 						gi = subHeader.glyphIndexArray[offsetIndex]
    360 						if gi != 0:
    361 							gi = (gi + subHeader.idDelta) % 0x10000
    362 						else:
    363 							continue
    364 						cmap[charCode] = gi
    365 				# If not subHeader.entryCount, then all char codes with this first byte are
    366 				# mapped to .notdef. We can skip this subtable, and leave the glyphs un-encoded, which is the 
    367 				# same as mapping it to .notdef.
    368 		# cmap values are GID's.
    369 		glyphOrder = self.ttFont.getGlyphOrder()
    370 		gids = list(cmap.values())
    371 		charCodes = list(cmap.keys())
    372 		lenCmap = len(gids)
    373 		try:
    374 			names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
    375 		except IndexError:
    376 			getGlyphName = self.ttFont.getGlyphName
    377 			names = list(map(getGlyphName, gids ))
    378 		list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
    379 	
    380 		
    381 	def compile(self, ttFont):
    382 		if self.data:
    383 			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
    384 		kEmptyTwoCharCodeRange = -1
    385 		notdefGI = 0
    386 
    387 		items = sorted(self.cmap.items())
    388 		charCodes = [item[0] for item in items]
    389 		names = [item[1] for item in items]
    390 		nameMap = ttFont.getReverseGlyphMap()
    391 		lenCharCodes = len(charCodes) 
    392 		try:
    393 			gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
    394 		except KeyError:
    395 			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
    396 			try:
    397 				gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
    398 			except KeyError:
    399 				# allow virtual GIDs in format 2 tables
    400 				gids = []
    401 				for name in names:
    402 					try:
    403 						gid = nameMap[name]
    404 					except KeyError:
    405 						try:
    406 							if (name[:3] == 'gid'):
    407 								gid = eval(name[3:])
    408 							else:
    409 								gid = ttFont.getGlyphID(name)
    410 						except:
    411 							raise KeyError(name)
    412 
    413 					gids.append(gid)
    414 
    415 		# Process the (char code to gid) item list  in char code order.
    416 		# By definition, all one byte char codes map to subheader 0. 
    417 		# For all the two byte char codes, we assume that the first byte maps maps to the empty subhead (with an entry count of 0, 
    418 		# which defines all char codes in its range to map to notdef) unless proven otherwise.
    419 		# Note that since the char code items are processed in char code order, all the char codes with the
    420 		# same first byte are in sequential order.
    421 
    422 		subHeaderKeys = [ kEmptyTwoCharCodeRange for x in  range(256)] # list of indices into subHeaderList.
    423 		subHeaderList = []
    424 
    425 		# We force this subheader entry 0  to exist in the subHeaderList in the case where some one comes up
    426 		# with a cmap where all the one byte char codes map to notdef,
    427 		# with the result that the subhead 0 would not get created just by processing the item list.
    428 		charCode = charCodes[0]
    429 		if charCode > 255:
    430 			subHeader = SubHeader()
    431 			subHeader.firstCode = 0
    432 			subHeader.entryCount = 0
    433 			subHeader.idDelta = 0
    434 			subHeader.idRangeOffset = 0
    435 			subHeaderList.append(subHeader)
    436 			
    437 		
    438 		lastFirstByte = -1
    439 		items = zip(charCodes, gids)
    440 		for charCode, gid in items:
    441 			if gid == 0:
    442 				continue
    443 			firstbyte = charCode >> 8
    444 			secondByte = charCode & 0x00FF
    445 
    446 			if firstbyte != lastFirstByte: # Need to update the current subhead, and start a new one.
    447 				if lastFirstByte > -1:
    448 					# fix GI's and iDelta of current subheader.
    449 					self.setIDDelta(subHeader)
    450 
    451 					# If it was sunheader 0 for one-byte charCodes, then we need to set the subHeaderKeys value to zero
    452 					# for the indices matching the char codes.
    453 					if lastFirstByte == 0:
    454 						for index in range(subHeader.entryCount):
    455 							charCode = subHeader.firstCode + index
    456 							subHeaderKeys[charCode] = 0
    457 
    458 					assert (subHeader.entryCount == len(subHeader.glyphIndexArray)), "Error - subhead entry count does not match len of glyphID subrange."
    459 				# init new subheader
    460 				subHeader = SubHeader()
    461 				subHeader.firstCode = secondByte
    462 				subHeader.entryCount = 1
    463 				subHeader.glyphIndexArray.append(gid)
    464 				subHeaderList.append(subHeader)
    465 				subHeaderKeys[firstbyte] = len(subHeaderList) -1
    466 				lastFirstByte = firstbyte
    467 			else:
    468 				# need to fill in with notdefs all the code points between the last charCode and the current charCode.
    469 				codeDiff = secondByte - (subHeader.firstCode + subHeader.entryCount)
    470 				for i in range(codeDiff):
    471 					subHeader.glyphIndexArray.append(notdefGI)
    472 				subHeader.glyphIndexArray.append(gid)
    473 				subHeader.entryCount = subHeader.entryCount + codeDiff + 1
    474 					
    475 		# fix GI's and iDelta of last subheader that we we added to the subheader array.
    476 		self.setIDDelta(subHeader)
    477 
    478 		# Now we add a final subheader for the subHeaderKeys which maps to empty two byte charcode ranges.
    479 		subHeader = SubHeader()
    480 		subHeader.firstCode = 0
    481 		subHeader.entryCount = 0
    482 		subHeader.idDelta = 0
    483 		subHeader.idRangeOffset = 2
    484 		subHeaderList.append(subHeader)
    485 		emptySubheadIndex = len(subHeaderList) - 1
    486 		for index in range(256):
    487 			if subHeaderKeys[index] == kEmptyTwoCharCodeRange:
    488 				subHeaderKeys[index] = emptySubheadIndex
    489 		# Since this is the last subheader, the GlyphIndex Array starts two bytes after the start of the
    490 		# idRangeOffset word of this subHeader. We can safely point to the first entry in the GlyphIndexArray,
    491 		# since the first subrange of the GlyphIndexArray is for subHeader 0, which always starts with 
    492 		# charcode 0 and GID 0.
    493 		
    494 		idRangeOffset = (len(subHeaderList)-1)*8  + 2 # offset to beginning of glyphIDArray from first subheader idRangeOffset.
    495 		subheadRangeLen = len(subHeaderList) -1 # skip last special empty-set subheader; we've already hardocodes its idRangeOffset to 2.
    496 		for index in range(subheadRangeLen): 
    497 			subHeader = subHeaderList[index]
    498 			subHeader.idRangeOffset = 0
    499 			for j  in range(index):
    500 				prevSubhead = subHeaderList[j]
    501 				if prevSubhead.glyphIndexArray == subHeader.glyphIndexArray: # use the glyphIndexArray subarray
    502 					subHeader.idRangeOffset = prevSubhead.idRangeOffset - (index-j)*8
    503 					subHeader.glyphIndexArray = []
    504 					break
    505 			if subHeader.idRangeOffset == 0: # didn't find one. 
    506 				subHeader.idRangeOffset = idRangeOffset
    507 				idRangeOffset = (idRangeOffset - 8) + subHeader.entryCount*2 # one less subheader, one more subArray.
    508 			else:
    509 				idRangeOffset = idRangeOffset - 8  # one less subheader
    510 
    511 		# Now we can write out the data!
    512 		length = 6 + 512 + 8*len(subHeaderList) # header, 256 subHeaderKeys, and subheader array.
    513 		for subhead in 	subHeaderList[:-1]:
    514 			length = length + len(subhead.glyphIndexArray)*2  # We can't use subhead.entryCount, as some of the subhead may share subArrays.
    515 		dataList = [struct.pack(">HHH", 2, length, self.language)]
    516 		for index in subHeaderKeys:
    517 			dataList.append(struct.pack(">H", index*8))
    518 		for subhead in 	subHeaderList:
    519 			dataList.append(struct.pack(subHeaderFormat, subhead.firstCode, subhead.entryCount, subhead.idDelta, subhead.idRangeOffset))
    520 		for subhead in 	subHeaderList[:-1]:
    521 			for gi in subhead.glyphIndexArray:
    522 				dataList.append(struct.pack(">H", gi))
    523 		data = bytesjoin(dataList)
    524 		assert (len(data) == length), "Error: cmap format 2 is not same length as calculated! actual: " + str(len(data))+ " calc : " + str(length)
    525 		return data
    526 
    527 
    528 	def fromXML(self, name, attrs, content, ttFont):
    529 		self.language = safeEval(attrs["language"])
    530 		if not hasattr(self, "cmap"):
    531 			self.cmap = {}
    532 		cmap = self.cmap
    533 
    534 		for element in content:
    535 			if not isinstance(element, tuple):
    536 				continue
    537 			name, attrs, content = element
    538 			if name != "map":
    539 				continue
    540 			cmap[safeEval(attrs["code"])] = attrs["name"]
    541 
    542 
    543 cmap_format_4_format = ">7H"
    544 
    545 #uint16  endCode[segCount]          # Ending character code for each segment, last = 0xFFFF.
    546 #uint16  reservedPad                # This value should be zero
    547 #uint16  startCode[segCount]        # Starting character code for each segment
    548 #uint16  idDelta[segCount]          # Delta for all character codes in segment
    549 #uint16  idRangeOffset[segCount]    # Offset in bytes to glyph indexArray, or 0
    550 #uint16  glyphIndexArray[variable]  # Glyph index array
    551 
    552 def splitRange(startCode, endCode, cmap):
    553 	# Try to split a range of character codes into subranges with consecutive
    554 	# glyph IDs in such a way that the cmap4 subtable can be stored "most"
    555 	# efficiently. I can't prove I've got the optimal solution, but it seems
    556 	# to do well with the fonts I tested: none became bigger, many became smaller.
    557 	if startCode == endCode:
    558 		return [], [endCode]
    559 	
    560 	lastID = cmap[startCode]
    561 	lastCode = startCode
    562 	inOrder = None
    563 	orderedBegin = None
    564 	subRanges = []
    565 	
    566 	# Gather subranges in which the glyph IDs are consecutive.
    567 	for code in range(startCode + 1, endCode + 1):
    568 		glyphID = cmap[code]
    569 		
    570 		if glyphID - 1 == lastID:
    571 			if inOrder is None or not inOrder:
    572 				inOrder = 1
    573 				orderedBegin = lastCode
    574 		else:
    575 			if inOrder:
    576 				inOrder = 0
    577 				subRanges.append((orderedBegin, lastCode))
    578 				orderedBegin = None
    579 				
    580 		lastID = glyphID
    581 		lastCode = code
    582 	
    583 	if inOrder:
    584 		subRanges.append((orderedBegin, lastCode))
    585 	assert lastCode == endCode
    586 	
    587 	# Now filter out those new subranges that would only make the data bigger.
    588 	# A new segment cost 8 bytes, not using a new segment costs 2 bytes per
    589 	# character.
    590 	newRanges = []
    591 	for b, e in subRanges:
    592 		if b == startCode and e == endCode:
    593 			break  # the whole range, we're fine
    594 		if b == startCode or e == endCode:
    595 			threshold = 4  # split costs one more segment
    596 		else:
    597 			threshold = 8  # split costs two more segments
    598 		if (e - b + 1) > threshold:
    599 			newRanges.append((b, e))
    600 	subRanges = newRanges
    601 	
    602 	if not subRanges:
    603 		return [], [endCode]
    604 	
    605 	if subRanges[0][0] != startCode:
    606 		subRanges.insert(0, (startCode, subRanges[0][0] - 1))
    607 	if subRanges[-1][1] != endCode:
    608 		subRanges.append((subRanges[-1][1] + 1, endCode))
    609 	
    610 	# Fill the "holes" in the segments list -- those are the segments in which
    611 	# the glyph IDs are _not_ consecutive.
    612 	i = 1
    613 	while i < len(subRanges):
    614 		if subRanges[i-1][1] + 1 != subRanges[i][0]:
    615 			subRanges.insert(i, (subRanges[i-1][1] + 1, subRanges[i][0] - 1))
    616 			i = i + 1
    617 		i = i + 1
    618 	
    619 	# Transform the ranges into startCode/endCode lists.
    620 	start = []
    621 	end = []
    622 	for b, e in subRanges:
    623 		start.append(b)
    624 		end.append(e)
    625 	start.pop(0)
    626 	
    627 	assert len(start) + 1 == len(end)
    628 	return start, end
    629 
    630 
    631 class cmap_format_4(CmapSubtable):
    632 	
    633 	def decompile(self, data, ttFont):
    634 		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
    635 		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
    636 		if data is not None and ttFont is not None:
    637 			self.decompileHeader(self.data[offset:offset+int(length)], ttFont)
    638 		else:
    639 			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
    640 
    641 		data = self.data # decompileHeader assigns the data after the header to self.data
    642 		(segCountX2, searchRange, entrySelector, rangeShift) = \
    643 					struct.unpack(">4H", data[:8])
    644 		data = data[8:]
    645 		segCount = segCountX2 // 2
    646 		
    647 		allCodes = array.array("H")
    648 		allCodes.fromstring(data)
    649 		self.data = data = None
    650 
    651 		if sys.byteorder != "big":
    652 			allCodes.byteswap()
    653 		
    654 		# divide the data
    655 		endCode = allCodes[:segCount]
    656 		allCodes = allCodes[segCount+1:]  # the +1 is skipping the reservedPad field
    657 		startCode = allCodes[:segCount]
    658 		allCodes = allCodes[segCount:]
    659 		idDelta = allCodes[:segCount]
    660 		allCodes = allCodes[segCount:]
    661 		idRangeOffset = allCodes[:segCount]
    662 		glyphIndexArray = allCodes[segCount:]
    663 		lenGIArray = len(glyphIndexArray)
    664 
    665 		# build 2-byte character mapping
    666 		charCodes = []
    667 		gids = []
    668 		for i in range(len(startCode) - 1):	# don't do 0xffff!
    669 			start = startCode[i]
    670 			delta = idDelta[i]
    671 			rangeOffset = idRangeOffset[i]
    672 			# *someone* needs to get killed.
    673 			partial = rangeOffset // 2 - start + i - len(idRangeOffset)
    674 
    675 			rangeCharCodes = list(range(startCode[i], endCode[i] + 1))
    676 			charCodes.extend(rangeCharCodes)
    677 			if rangeOffset == 0:
    678 				gids.extend([(charCode + delta) & 0xFFFF for charCode in rangeCharCodes])
    679 			else:
    680 				for charCode in rangeCharCodes:
    681 					index = charCode + partial
    682 					assert (index < lenGIArray), "In format 4 cmap, range (%d), the calculated index (%d) into the glyph index array  is not less than the length of the array (%d) !" % (i, index, lenGIArray)
    683 					if glyphIndexArray[index] != 0:  # if not missing glyph
    684 						glyphID = glyphIndexArray[index] + delta
    685 					else:
    686 						glyphID = 0  # missing glyph
    687 					gids.append(glyphID & 0xFFFF)
    688 
    689 		self.cmap = cmap = {}
    690 		lenCmap = len(gids)
    691 		glyphOrder = self.ttFont.getGlyphOrder()
    692 		try:
    693 			names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
    694 		except IndexError:
    695 			getGlyphName = self.ttFont.getGlyphName
    696 			names = list(map(getGlyphName, gids ))
    697 		list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
    698 
    699 
    700 	def compile(self, ttFont):
    701 		if self.data:
    702 			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
    703 		
    704 		charCodes = list(self.cmap.keys())
    705 		lenCharCodes = len(charCodes)
    706 		if lenCharCodes == 0:
    707 			startCode = [0xffff]
    708 			endCode = [0xffff]
    709 		else:
    710 			charCodes.sort()
    711 			names = list(map(operator.getitem, [self.cmap]*lenCharCodes, charCodes))
    712 			nameMap = ttFont.getReverseGlyphMap()
    713 			try:
    714 				gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
    715 			except KeyError:
    716 				nameMap = ttFont.getReverseGlyphMap(rebuild=True)
    717 				try:
    718 					gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
    719 				except KeyError:
    720 					# allow virtual GIDs in format 4 tables
    721 					gids = []
    722 					for name in names:
    723 						try:
    724 							gid = nameMap[name]
    725 						except KeyError:
    726 							try:
    727 								if (name[:3] == 'gid'):
    728 									gid = eval(name[3:])
    729 								else:
    730 									gid = ttFont.getGlyphID(name)
    731 							except:
    732 								raise KeyError(name)
    733 	
    734 						gids.append(gid)
    735 			cmap = {}  # code:glyphID mapping
    736 			list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids))
    737 		
    738 			# Build startCode and endCode lists.
    739 			# Split the char codes in ranges of consecutive char codes, then split
    740 			# each range in more ranges of consecutive/not consecutive glyph IDs.
    741 			# See splitRange().
    742 			lastCode = charCodes[0]
    743 			endCode = []
    744 			startCode = [lastCode]
    745 			for charCode in charCodes[1:]:  # skip the first code, it's the first start code
    746 				if charCode == lastCode + 1:
    747 					lastCode = charCode
    748 					continue
    749 				start, end = splitRange(startCode[-1], lastCode, cmap)
    750 				startCode.extend(start)
    751 				endCode.extend(end)
    752 				startCode.append(charCode)
    753 				lastCode = charCode
    754 			endCode.append(lastCode)
    755 			startCode.append(0xffff)
    756 			endCode.append(0xffff)
    757 		
    758 		# build up rest of cruft
    759 		idDelta = []
    760 		idRangeOffset = []
    761 		glyphIndexArray = []
    762 		for i in range(len(endCode)-1):  # skip the closing codes (0xffff)
    763 			indices = []
    764 			for charCode in range(startCode[i], endCode[i] + 1):
    765 				indices.append(cmap[charCode])
    766 			if  (indices == list(range(indices[0], indices[0] + len(indices)))):
    767 				idDelta.append((indices[0] - startCode[i]) % 0x10000)
    768 				idRangeOffset.append(0)
    769 			else:
    770 				# someone *definitely* needs to get killed.
    771 				idDelta.append(0)
    772 				idRangeOffset.append(2 * (len(endCode) + len(glyphIndexArray) - i))
    773 				glyphIndexArray.extend(indices)
    774 		idDelta.append(1)  # 0xffff + 1 == (tadaa!) 0. So this end code maps to .notdef
    775 		idRangeOffset.append(0)
    776 		
    777 		# Insane.
    778 		segCount = len(endCode)
    779 		segCountX2 = segCount * 2
    780 		searchRange, entrySelector, rangeShift = getSearchRange(segCount, 2)
    781 		
    782 		charCodeArray = array.array("H", endCode + [0] + startCode)
    783 		idDeltaArray = array.array("H", idDelta)
    784 		restArray = array.array("H", idRangeOffset + glyphIndexArray)
    785 		if sys.byteorder != "big":
    786 			charCodeArray.byteswap()
    787 			idDeltaArray.byteswap()
    788 			restArray.byteswap()
    789 		data = charCodeArray.tostring() + idDeltaArray.tostring() + restArray.tostring()
    790 
    791 		length = struct.calcsize(cmap_format_4_format) + len(data)
    792 		header = struct.pack(cmap_format_4_format, self.format, length, self.language, 
    793 				segCountX2, searchRange, entrySelector, rangeShift)
    794 		return header + data
    795 	
    796 	def fromXML(self, name, attrs, content, ttFont):
    797 		self.language = safeEval(attrs["language"])
    798 		if not hasattr(self, "cmap"):
    799 			self.cmap = {}
    800 		cmap = self.cmap
    801 
    802 		for element in content:
    803 			if not isinstance(element, tuple):
    804 				continue
    805 			nameMap, attrsMap, dummyContent = element
    806 			if nameMap != "map":
    807 				assert 0, "Unrecognized keyword in cmap subtable"
    808 			cmap[safeEval(attrsMap["code"])] = attrsMap["name"]
    809 
    810 
    811 class cmap_format_6(CmapSubtable):
    812 	
    813 	def decompile(self, data, ttFont):
    814 		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
    815 		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
    816 		if data is not None and ttFont is not None:
    817 			self.decompileHeader(data[offset:offset+int(length)], ttFont)
    818 		else:
    819 			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
    820 
    821 		data = self.data # decompileHeader assigns the data after the header to self.data
    822 		firstCode, entryCount = struct.unpack(">HH", data[:4])
    823 		firstCode = int(firstCode)
    824 		data = data[4:]
    825 		#assert len(data) == 2 * entryCount  # XXX not true in Apple's Helvetica!!!
    826 		glyphIndexArray = array.array("H")
    827 		glyphIndexArray.fromstring(data[:2 * int(entryCount)])
    828 		if sys.byteorder != "big":
    829 			glyphIndexArray.byteswap()
    830 		self.data = data = None
    831 
    832 		self.cmap = cmap = {}
    833 
    834 		lenArray = len(glyphIndexArray)
    835 		charCodes = list(range(firstCode, firstCode + lenArray))
    836 		glyphOrder = self.ttFont.getGlyphOrder()
    837 		try:
    838 			names = list(map(operator.getitem, [glyphOrder]*lenArray, glyphIndexArray ))
    839 		except IndexError:
    840 			getGlyphName = self.ttFont.getGlyphName
    841 			names = list(map(getGlyphName, glyphIndexArray ))
    842 		list(map(operator.setitem, [cmap]*lenArray, charCodes, names))
    843 	
    844 	def compile(self, ttFont):
    845 		if self.data:
    846 			return struct.pack(">HHH", self.format, self.length, self.language) + self.data
    847 		cmap = self.cmap
    848 		codes = list(cmap.keys())
    849 		if codes: # yes, there are empty cmap tables.
    850 			codes = list(range(codes[0], codes[-1] + 1))
    851 			firstCode = codes[0]
    852 			valueList = [cmap.get(code, ".notdef") for code in codes]
    853 			valueList = map(ttFont.getGlyphID, valueList)
    854 			glyphIndexArray = array.array("H", valueList)
    855 			if sys.byteorder != "big":
    856 				glyphIndexArray.byteswap()
    857 			data = glyphIndexArray.tostring()
    858 		else:
    859 			data = b""
    860 			firstCode = 0
    861 		header = struct.pack(">HHHHH", 
    862 				6, len(data) + 10, self.language, firstCode, len(codes))
    863 		return header + data
    864 	
    865 	def fromXML(self, name, attrs, content, ttFont):
    866 		self.language = safeEval(attrs["language"])
    867 		if not hasattr(self, "cmap"):
    868 			self.cmap = {}
    869 		cmap = self.cmap
    870 
    871 		for element in content:
    872 			if not isinstance(element, tuple):
    873 				continue
    874 			name, attrs, content = element
    875 			if name != "map":
    876 				continue
    877 			cmap[safeEval(attrs["code"])] = attrs["name"]
    878 
    879 
    880 class cmap_format_12_or_13(CmapSubtable):
    881 	
    882 	def __init__(self, format):
    883 		self.format = format
    884 		self.reserved = 0
    885 		self.data = None
    886 		self.ttFont = None
    887 
    888 	def decompileHeader(self, data, ttFont):
    889 		format, reserved, length, language, nGroups = struct.unpack(">HHLLL", data[:16])
    890 		assert len(data) == (16 + nGroups*12) == (length), "corrupt cmap table format %d (data length: %d, header length: %d)" % (format, len(data), length)
    891 		self.format = format
    892 		self.reserved = reserved
    893 		self.length = length
    894 		self.language = language
    895 		self.nGroups = nGroups
    896 		self.data = data[16:]
    897 		self.ttFont = ttFont
    898 
    899 	def decompile(self, data, ttFont):
    900 		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
    901 		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
    902 		if data is not None and ttFont is not None:
    903 			self.decompileHeader(data[offset:offset+int(length)], ttFont)
    904 		else:
    905 			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
    906 
    907 		data = self.data # decompileHeader assigns the data after the header to self.data
    908 		charCodes = []
    909 		gids = []
    910 		pos = 0
    911 		for i in range(self.nGroups):
    912 			startCharCode, endCharCode, glyphID = struct.unpack(">LLL",data[pos:pos+12] )
    913 			pos += 12
    914 			lenGroup = 1 + endCharCode - startCharCode
    915 			charCodes.extend(list(range(startCharCode, endCharCode +1)))
    916 			gids.extend(self._computeGIDs(glyphID, lenGroup))
    917 		self.data = data = None
    918 		self.cmap = cmap = {}
    919 		lenCmap = len(gids)
    920 		glyphOrder = self.ttFont.getGlyphOrder()
    921 		try:
    922 			names = list(map(operator.getitem, [glyphOrder]*lenCmap, gids ))
    923 		except IndexError:
    924 			getGlyphName = self.ttFont.getGlyphName
    925 			names = list(map(getGlyphName, gids ))
    926 		list(map(operator.setitem, [cmap]*lenCmap, charCodes, names))
    927 	
    928 	def compile(self, ttFont):
    929 		if self.data:
    930 			return struct.pack(">HHLLL", self.format, self.reserved, self.length, self.language, self.nGroups) + self.data
    931 		charCodes = list(self.cmap.keys())
    932 		lenCharCodes = len(charCodes) 
    933 		names = list(self.cmap.values())
    934 		nameMap = ttFont.getReverseGlyphMap()
    935 		try:
    936 			gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
    937 		except KeyError:
    938 			nameMap = ttFont.getReverseGlyphMap(rebuild=True)
    939 			try:
    940 				gids = list(map(operator.getitem, [nameMap]*lenCharCodes, names))
    941 			except KeyError:
    942 				# allow virtual GIDs in format 12 tables
    943 				gids = []
    944 				for name in names:
    945 					try:
    946 						gid = nameMap[name]
    947 					except KeyError:
    948 						try:
    949 							if (name[:3] == 'gid'):
    950 								gid = eval(name[3:])
    951 							else:
    952 								gid = ttFont.getGlyphID(name)
    953 						except:
    954 							raise KeyError(name)
    955 
    956 					gids.append(gid)
    957 		
    958 		cmap = {}  # code:glyphID mapping
    959 		list(map(operator.setitem, [cmap]*len(charCodes), charCodes, gids))
    960 
    961 		charCodes.sort()
    962 		index = 0
    963 		startCharCode = charCodes[0]
    964 		startGlyphID = cmap[startCharCode]
    965 		lastGlyphID = startGlyphID - self._format_step
    966 		lastCharCode = startCharCode - 1
    967 		nGroups = 0
    968 		dataList =  []
    969 		maxIndex = len(charCodes)
    970 		for index in range(maxIndex):
    971 			charCode = charCodes[index]
    972 			glyphID = cmap[charCode]
    973 			if not self._IsInSameRun(glyphID, lastGlyphID, charCode, lastCharCode):
    974 				dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
    975 				startCharCode = charCode
    976 				startGlyphID = glyphID
    977 				nGroups = nGroups + 1
    978 			lastGlyphID = glyphID
    979 			lastCharCode = charCode
    980 		dataList.append(struct.pack(">LLL", startCharCode, lastCharCode, startGlyphID))
    981 		nGroups = nGroups + 1
    982 		data = bytesjoin(dataList)
    983 		lengthSubtable = len(data) +16
    984 		assert len(data) == (nGroups*12) == (lengthSubtable-16) 
    985 		return struct.pack(">HHLLL", self.format, self.reserved , lengthSubtable, self.language, nGroups) + data
    986 	
    987 	def toXML(self, writer, ttFont):
    988 		writer.begintag(self.__class__.__name__, [
    989 				("platformID", self.platformID),
    990 				("platEncID", self.platEncID),
    991 				("format", self.format),
    992 				("reserved", self.reserved),
    993 				("length", self.length),
    994 				("language", self.language),
    995 				("nGroups", self.nGroups),
    996 				])
    997 		writer.newline()
    998 		codes = sorted(self.cmap.items())
    999 		self._writeCodes(codes, writer)
   1000 		writer.endtag(self.__class__.__name__)
   1001 		writer.newline()
   1002 	
   1003 	def fromXML(self, name, attrs, content, ttFont):
   1004 		self.format = safeEval(attrs["format"])
   1005 		self.reserved = safeEval(attrs["reserved"])
   1006 		self.length = safeEval(attrs["length"])
   1007 		self.language = safeEval(attrs["language"])
   1008 		self.nGroups = safeEval(attrs["nGroups"])
   1009 		if not hasattr(self, "cmap"):
   1010 			self.cmap = {}
   1011 		cmap = self.cmap
   1012 
   1013 		for element in content:
   1014 			if not isinstance(element, tuple):
   1015 				continue
   1016 			name, attrs, content = element
   1017 			if name != "map":
   1018 				continue
   1019 			cmap[safeEval(attrs["code"])] = attrs["name"]
   1020 
   1021 
   1022 class cmap_format_12(cmap_format_12_or_13):
   1023 	def __init__(self, format):
   1024 		cmap_format_12_or_13.__init__(self, format)
   1025 		self._format_step = 1
   1026 
   1027 	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
   1028 		return list(range(startingGlyph, startingGlyph + numberOfGlyphs))
   1029 
   1030 	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
   1031 		return (glyphID == 1 + lastGlyphID) and (charCode == 1 + lastCharCode)
   1032 
   1033 
   1034 class cmap_format_13(cmap_format_12_or_13):
   1035 	def __init__(self, format):
   1036 		cmap_format_12_or_13.__init__(self, format)
   1037 		self._format_step = 0
   1038 
   1039 	def _computeGIDs(self, startingGlyph, numberOfGlyphs):
   1040 		return [startingGlyph] * numberOfGlyphs
   1041 
   1042 	def _IsInSameRun(self, glyphID, lastGlyphID, charCode, lastCharCode):
   1043 		return (glyphID == lastGlyphID) and (charCode == 1 + lastCharCode)
   1044 
   1045 
   1046 def  cvtToUVS(threeByteString):
   1047 	data = b"\0" + threeByteString
   1048 	val, = struct.unpack(">L", data)
   1049 	return val
   1050 
   1051 def  cvtFromUVS(val):
   1052 	assert 0 <= val < 0x1000000
   1053 	fourByteString = struct.pack(">L", val)
   1054 	return fourByteString[1:]
   1055 
   1056 
   1057 class cmap_format_14(CmapSubtable):
   1058 
   1059 	def decompileHeader(self, data, ttFont):
   1060 		format, length, numVarSelectorRecords = struct.unpack(">HLL", data[:10])
   1061 		self.data = data[10:]
   1062 		self.length = length
   1063 		self.numVarSelectorRecords = numVarSelectorRecords
   1064 		self.ttFont = ttFont
   1065 		self.language = 0xFF # has no language.
   1066 
   1067 	def decompile(self, data, ttFont):
   1068 		if data is not None and ttFont is not None and ttFont.lazy:
   1069 			self.decompileHeader(data, ttFont)
   1070 		else:
   1071 			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
   1072 		data = self.data
   1073 		
   1074 		self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
   1075 		uvsDict = {}
   1076 		recOffset = 0
   1077 		for n in range(self.numVarSelectorRecords):
   1078 			uvs, defOVSOffset, nonDefUVSOffset =  struct.unpack(">3sLL", data[recOffset:recOffset +11])		
   1079 			recOffset += 11
   1080 			varUVS = cvtToUVS(uvs)
   1081 			if defOVSOffset:
   1082 				startOffset = defOVSOffset  - 10
   1083 				numValues, = struct.unpack(">L", data[startOffset:startOffset+4])
   1084 				startOffset +=4
   1085 				for r in range(numValues):
   1086 					uv, addtlCnt = struct.unpack(">3sB", data[startOffset:startOffset+4])
   1087 					startOffset += 4
   1088 					firstBaseUV = cvtToUVS(uv)
   1089 					cnt = addtlCnt+1
   1090 					baseUVList = list(range(firstBaseUV, firstBaseUV+cnt))
   1091 					glyphList = [None]*cnt
   1092 					localUVList = zip(baseUVList, glyphList)
   1093 					try:
   1094 						uvsDict[varUVS].extend(localUVList)
   1095 					except KeyError:
   1096 						uvsDict[varUVS] = list(localUVList)
   1097 				
   1098 			if nonDefUVSOffset:
   1099 				startOffset = nonDefUVSOffset  - 10
   1100 				numRecs, = struct.unpack(">L", data[startOffset:startOffset+4])
   1101 				startOffset +=4
   1102 				localUVList = []
   1103 				for r in range(numRecs):
   1104 					uv, gid = struct.unpack(">3sH", data[startOffset:startOffset+5])
   1105 					startOffset += 5
   1106 					uv = cvtToUVS(uv)
   1107 					glyphName = self.ttFont.getGlyphName(gid)
   1108 					localUVList.append( [uv, glyphName] )
   1109 				try:
   1110 					uvsDict[varUVS].extend(localUVList)
   1111 				except KeyError:
   1112 					uvsDict[varUVS] = localUVList
   1113 					
   1114 		self.uvsDict = uvsDict
   1115 							
   1116 	def toXML(self, writer, ttFont):
   1117 		writer.begintag(self.__class__.__name__, [
   1118 				("platformID", self.platformID),
   1119 				("platEncID", self.platEncID),
   1120 				("format", self.format),
   1121 				("length", self.length),
   1122 				("numVarSelectorRecords", self.numVarSelectorRecords),
   1123 				])
   1124 		writer.newline()
   1125 		uvsDict = self.uvsDict
   1126 		uvsList = sorted(uvsDict.keys())
   1127 		for uvs in uvsList:
   1128 			uvList = uvsDict[uvs]
   1129 			uvList.sort(key=lambda item: (item[1] is not None, item[0], item[1]))
   1130 			for uv, gname in uvList:
   1131 				if gname is None:
   1132 					gname = "None"
   1133 				# I use the arg rather than th keyword syntax in order to preserve the attribute order.
   1134 				writer.simpletag("map", [ ("uvs",hex(uvs)), ("uv",hex(uv)), ("name", gname)]  )
   1135 				writer.newline()
   1136 		writer.endtag(self.__class__.__name__)
   1137 		writer.newline()
   1138 
   1139 	def fromXML(self, name, attrs, content, ttFont):
   1140 		self.format = safeEval(attrs["format"])
   1141 		self.length = safeEval(attrs["length"])
   1142 		self.numVarSelectorRecords = safeEval(attrs["numVarSelectorRecords"])
   1143 		self.language = 0xFF # provide a value so that  CmapSubtable.__lt__() won't fail
   1144 		if not hasattr(self, "cmap"):
   1145 			self.cmap = {} # so that clients that expect this to exist in a cmap table won't fail.
   1146 		if not hasattr(self, "uvsDict"):
   1147 			self.uvsDict  = {}
   1148 			uvsDict = self.uvsDict 
   1149 
   1150 		for element in content:
   1151 			if not isinstance(element, tuple):
   1152 				continue
   1153 			name, attrs, content = element
   1154 			if name != "map":
   1155 				continue
   1156 			uvs = safeEval(attrs["uvs"])
   1157 			uv = safeEval(attrs["uv"])
   1158 			gname = attrs["name"]
   1159 			if gname == "None":
   1160 				gname = None
   1161 			try:
   1162 				uvsDict[uvs].append( [uv, gname])
   1163 			except KeyError:
   1164 				uvsDict[uvs] = [ [uv, gname] ]
   1165 			
   1166 
   1167 	def compile(self, ttFont):
   1168 		if self.data:
   1169 			return struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords) + self.data
   1170 
   1171 		uvsDict = self.uvsDict
   1172 		uvsList = sorted(uvsDict.keys())
   1173 		self.numVarSelectorRecords = len(uvsList)
   1174 		offset = 10 + self.numVarSelectorRecords*11 # current value is end of VarSelectorRecords block.
   1175 		data = []
   1176 		varSelectorRecords =[]
   1177 		for uvs in uvsList:
   1178 			entryList = uvsDict[uvs]
   1179 
   1180 			defList = [entry for entry in entryList if entry[1] is None]
   1181 			if defList:
   1182 				defList = [entry[0] for entry in defList]
   1183 				defOVSOffset = offset
   1184 				defList.sort()
   1185 
   1186 				lastUV = defList[0]
   1187 				cnt = -1
   1188 				defRecs = []
   1189 				for defEntry in defList:
   1190 					cnt +=1
   1191 					if (lastUV+cnt) != defEntry:
   1192 						rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt-1)
   1193 						lastUV = defEntry
   1194 						defRecs.append(rec)
   1195 						cnt = 0
   1196 					
   1197 				rec = struct.pack(">3sB", cvtFromUVS(lastUV), cnt)
   1198 				defRecs.append(rec)
   1199 
   1200 				numDefRecs = len(defRecs)
   1201 				data.append(struct.pack(">L", numDefRecs))
   1202 				data.extend(defRecs)
   1203 				offset += 4 + numDefRecs*4
   1204 			else:
   1205 				defOVSOffset = 0
   1206 
   1207 			ndefList = [entry for entry in entryList if entry[1] is not None]
   1208 			if ndefList:
   1209 				nonDefUVSOffset = offset
   1210 				ndefList.sort()
   1211 				numNonDefRecs = len(ndefList)
   1212 				data.append(struct.pack(">L", numNonDefRecs))
   1213 				offset += 4 + numNonDefRecs*5
   1214 
   1215 				for uv, gname in ndefList:
   1216 					gid = ttFont.getGlyphID(gname)
   1217 					ndrec = struct.pack(">3sH", cvtFromUVS(uv), gid)
   1218 					data.append(ndrec)
   1219 			else:
   1220 				nonDefUVSOffset = 0
   1221 				
   1222 			vrec = struct.pack(">3sLL", cvtFromUVS(uvs), defOVSOffset, nonDefUVSOffset)
   1223 			varSelectorRecords.append(vrec)
   1224 				
   1225 		data = bytesjoin(varSelectorRecords) + bytesjoin(data)
   1226 		self.length = 10 + len(data)
   1227 		headerdata = struct.pack(">HLL", self.format, self.length , self.numVarSelectorRecords)
   1228 		self.data = headerdata + data
   1229 	
   1230 		return self.data
   1231 		
   1232 		
   1233 class cmap_format_unknown(CmapSubtable):
   1234 	
   1235 	def toXML(self, writer, ttFont):
   1236 		cmapName = self.__class__.__name__[:12] + str(self.format)
   1237 		writer.begintag(cmapName, [
   1238 				("platformID", self.platformID),
   1239 				("platEncID", self.platEncID),
   1240 				])
   1241 		writer.newline()
   1242 		writer.dumphex(self.data)
   1243 		writer.endtag(cmapName)
   1244 		writer.newline()
   1245 	
   1246 	def fromXML(self, name, attrs, content, ttFont):
   1247 		self.data = readHex(content)
   1248 		self.cmap = {}
   1249 	
   1250 	def decompileHeader(self, data, ttFont):
   1251 		self.language = 0  # dummy value
   1252 		self.data = data
   1253 	
   1254 	def decompile(self, data, ttFont):
   1255 		# we usually get here indirectly from the subtable __getattr__ function, in which case both args must be None.
   1256 		# If not, someone is calling  the subtable decompile() directly, and must provide both args.
   1257 		if data is not None and ttFont is not None:
   1258 			self.decompileHeader(data[offset:offset+int(length)], ttFont)
   1259 		else:
   1260 			assert (data is None and ttFont is None), "Need both data and ttFont arguments"
   1261 
   1262 	def compile(self, ttFont):
   1263 		if self.data:
   1264 			return self.data
   1265 		else:
   1266 			return None
   1267 
   1268 cmap_classes = {
   1269 		0: cmap_format_0,
   1270 		2: cmap_format_2,
   1271 		4: cmap_format_4,
   1272 		6: cmap_format_6,
   1273 		12: cmap_format_12,
   1274 		13: cmap_format_13,
   1275 		14: cmap_format_14,
   1276 		}
   1277