Home | History | Annotate | Download | only in format
      1 #!/usr/bin/env python
      2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Support for formatting a data pack file used for platform agnostic resource
      7 files.
      8 """
      9 
     10 import collections
     11 import exceptions
     12 import os
     13 import struct
     14 import sys
     15 if __name__ == '__main__':
     16   sys.path.append(os.path.join(os.path.dirname(__file__), '../..'))
     17 
     18 from grit import util
     19 from grit.node import include
     20 from grit.node import message
     21 from grit.node import structure
     22 
     23 
     24 PACK_FILE_VERSION = 4
     25 HEADER_LENGTH = 2 * 4 + 1  # Two uint32s. (file version, number of entries) and
     26                            # one uint8 (encoding of text resources)
     27 BINARY, UTF8, UTF16 = range(3)
     28 
     29 
     30 class WrongFileVersion(Exception):
     31   pass
     32 
     33 
     34 DataPackContents = collections.namedtuple(
     35     'DataPackContents', 'resources encoding')
     36 
     37 
     38 def Format(root, lang='en', output_dir='.'):
     39   """Writes out the data pack file format (platform agnostic resource file)."""
     40   data = {}
     41   for node in root.ActiveDescendants():
     42     with node:
     43       if isinstance(node, (include.IncludeNode, message.MessageNode,
     44                            structure.StructureNode)):
     45         id, value = node.GetDataPackPair(lang, UTF8)
     46         if value is not None:
     47           data[id] = value
     48   return WriteDataPackToString(data, UTF8)
     49 
     50 
     51 def ReadDataPack(input_file):
     52   """Reads a data pack file and returns a dictionary."""
     53   data = util.ReadFile(input_file, util.BINARY)
     54   original_data = data
     55 
     56   # Read the header.
     57   version, num_entries, encoding = struct.unpack('<IIB', data[:HEADER_LENGTH])
     58   if version != PACK_FILE_VERSION:
     59     print 'Wrong file version in ', input_file
     60     raise WrongFileVersion
     61 
     62   resources = {}
     63   if num_entries == 0:
     64     return DataPackContents(resources, encoding)
     65 
     66   # Read the index and data.
     67   data = data[HEADER_LENGTH:]
     68   kIndexEntrySize = 2 + 4  # Each entry is a uint16 and a uint32.
     69   for _ in range(num_entries):
     70     id, offset = struct.unpack('<HI', data[:kIndexEntrySize])
     71     data = data[kIndexEntrySize:]
     72     next_id, next_offset = struct.unpack('<HI', data[:kIndexEntrySize])
     73     resources[id] = original_data[offset:next_offset]
     74 
     75   return DataPackContents(resources, encoding)
     76 
     77 
     78 def WriteDataPackToString(resources, encoding):
     79   """Returns a string with a map of id=>data in the data pack format."""
     80   ids = sorted(resources.keys())
     81   ret = []
     82 
     83   # Write file header.
     84   ret.append(struct.pack('<IIB', PACK_FILE_VERSION, len(ids), encoding))
     85   HEADER_LENGTH = 2 * 4 + 1            # Two uint32s and one uint8.
     86 
     87   # Each entry is a uint16 + a uint32s. We have one extra entry for the last
     88   # item.
     89   index_length = (len(ids) + 1) * (2 + 4)
     90 
     91   # Write index.
     92   data_offset = HEADER_LENGTH + index_length
     93   for id in ids:
     94     ret.append(struct.pack('<HI', id, data_offset))
     95     data_offset += len(resources[id])
     96 
     97   ret.append(struct.pack('<HI', 0, data_offset))
     98 
     99   # Write data.
    100   for id in ids:
    101     ret.append(resources[id])
    102   return ''.join(ret)
    103 
    104 
    105 def WriteDataPack(resources, output_file, encoding):
    106   """Writes a map of id=>data into output_file as a data pack."""
    107   content = WriteDataPackToString(resources, encoding)
    108   with open(output_file, 'wb') as file:
    109     file.write(content)
    110 
    111 
    112 def RePack(output_file, input_files, whitelist_file=None):
    113   """Write a new data pack file by combining input pack files.
    114 
    115   Args:
    116       output_file: path to the new data pack file.
    117       input_files: a list of paths to the data pack files to combine.
    118       whitelist_file: path to the file that contains the list of resource IDs
    119                       that should be kept in the output file or None to include
    120                       all resources.
    121 
    122   Raises:
    123       KeyError: if there are duplicate keys or resource encoding is
    124       inconsistent.
    125   """
    126   input_data_packs = [ReadDataPack(filename) for filename in input_files]
    127   whitelist = None
    128   if whitelist_file:
    129     whitelist = util.ReadFile(whitelist_file, util.RAW_TEXT).strip().split('\n')
    130     whitelist = set(map(int, whitelist))
    131   resources, encoding = RePackFromDataPackStrings(input_data_packs, whitelist)
    132   WriteDataPack(resources, output_file, encoding)
    133 
    134 
    135 def RePackFromDataPackStrings(inputs, whitelist):
    136   """Returns a data pack string that combines the resources from inputs.
    137 
    138   Args:
    139       inputs: a list of data pack strings that need to be combined.
    140       whitelist: a list of resource IDs that should be kept in the output string
    141                  or None to include all resources.
    142 
    143   Returns:
    144       DataPackContents: a tuple containing the new combined data pack and its
    145                         encoding.
    146 
    147   Raises:
    148       KeyError: if there are duplicate keys or resource encoding is
    149       inconsistent.
    150   """
    151   resources = {}
    152   encoding = None
    153   for content in inputs:
    154     # Make sure we have no dups.
    155     duplicate_keys = set(content.resources.keys()) & set(resources.keys())
    156     if duplicate_keys:
    157       raise exceptions.KeyError('Duplicate keys: ' + str(list(duplicate_keys)))
    158 
    159     # Make sure encoding is consistent.
    160     if encoding in (None, BINARY):
    161       encoding = content.encoding
    162     elif content.encoding not in (BINARY, encoding):
    163       raise exceptions.KeyError('Inconsistent encodings: ' + str(encoding) +
    164                                 ' vs ' + str(content.encoding))
    165 
    166     if whitelist:
    167       whitelisted_resources = dict([(key, content.resources[key])
    168                                     for key in content.resources.keys()
    169                                     if key in whitelist])
    170       resources.update(whitelisted_resources)
    171       removed_keys = [key for key in content.resources.keys()
    172                       if key not in whitelist]
    173       for key in removed_keys:
    174         print 'RePackFromDataPackStrings Removed Key:', key
    175     else:
    176       resources.update(content.resources)
    177 
    178   # Encoding is 0 for BINARY, 1 for UTF8 and 2 for UTF16
    179   if encoding is None:
    180     encoding = BINARY
    181   return DataPackContents(resources, encoding)
    182 
    183 
    184 # Temporary hack for external programs that import data_pack.
    185 # TODO(benrg): Remove this.
    186 class DataPack(object):
    187   pass
    188 DataPack.ReadDataPack = staticmethod(ReadDataPack)
    189 DataPack.WriteDataPackToString = staticmethod(WriteDataPackToString)
    190 DataPack.WriteDataPack = staticmethod(WriteDataPack)
    191 DataPack.RePack = staticmethod(RePack)
    192 
    193 
    194 def main():
    195   if len(sys.argv) > 1:
    196     # When an argument is given, read and explode the file to text
    197     # format, for easier diffing.
    198     data = ReadDataPack(sys.argv[1])
    199     print data.encoding
    200     for (resource_id, text) in data.resources.iteritems():
    201       print '%s: %s' % (resource_id, text)
    202   else:
    203     # Just write a simple file.
    204     data = {1: '', 4: 'this is id 4', 6: 'this is id 6', 10: ''}
    205     WriteDataPack(data, 'datapack1.pak', UTF8)
    206     data2 = {1000: 'test', 5: 'five'}
    207     WriteDataPack(data2, 'datapack2.pak', UTF8)
    208     print 'wrote datapack1 and datapack2 to current directory.'
    209 
    210 
    211 if __name__ == '__main__':
    212   main()
    213