Home | History | Annotate | Download | only in tool
      1 #!/usr/bin/env python
      2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 '''The 'grit rc2grd' tool.'''
      7 
      8 
      9 import os.path
     10 import getopt
     11 import re
     12 import StringIO
     13 import types
     14 
     15 import grit.node.empty
     16 from grit.node import include
     17 from grit.node import structure
     18 from grit.node import message
     19 
     20 from grit.gather import rc
     21 from grit.gather import tr_html
     22 
     23 from grit.tool import interface
     24 from grit.tool import postprocess_interface
     25 from grit.tool import preprocess_interface
     26 
     27 from grit import grd_reader
     28 from grit import lazy_re
     29 from grit import tclib
     30 from grit import util
     31 
     32 
     33 # Matches files referenced from an .rc file
     34 _FILE_REF = lazy_re.compile('''
     35   ^(?P<id>[A-Z_0-9.]+)[ \t]+
     36   (?P<type>[A-Z_0-9]+)[ \t]+
     37   "(?P<file>.*?([^"]|""))"[ \t]*$''', re.VERBOSE | re.MULTILINE)
     38 
     39 
     40 # Matches a dialog section
     41 _DIALOG = lazy_re.compile(
     42     '^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s*$.+?^END\s*$',
     43     re.MULTILINE | re.DOTALL)
     44 
     45 
     46 # Matches a menu section
     47 _MENU = lazy_re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s*$.+?^END\s*$',
     48                         re.MULTILINE | re.DOTALL)
     49 
     50 
     51 # Matches a versioninfo section
     52 _VERSIONINFO = lazy_re.compile(
     53     '^(?P<id>[A-Z0-9_]+)\s+VERSIONINFO\s.+?^BEGIN\s*$.+?^END\s*$',
     54     re.MULTILINE | re.DOTALL)
     55 
     56 
     57 # Matches a stringtable
     58 _STRING_TABLE = lazy_re.compile(
     59     ('^STRINGTABLE(\s+(PRELOAD|DISCARDABLE|CHARACTERISTICS.+|LANGUAGE.+|'
     60      'VERSION.+))*\s*\nBEGIN\s*$(?P<body>.+?)^END\s*$'),
     61     re.MULTILINE | re.DOTALL)
     62 
     63 
     64 # Matches each message inside a stringtable, breaking it up into comments,
     65 # the ID of the message, and the (RC-escaped) message text.
     66 _MESSAGE = lazy_re.compile('''
     67   (?P<comment>(^\s+//.+?)*)  # 0 or more lines of comments preceding the message
     68   ^\s*
     69   (?P<id>[A-Za-z0-9_]+)  # id
     70   \s+
     71   "(?P<text>.*?([^"]|""))"([^"]|$)  # The message itself
     72   ''', re.MULTILINE | re.DOTALL | re.VERBOSE)
     73 
     74 
     75 # Matches each line of comment text in a multi-line comment.
     76 _COMMENT_TEXT = lazy_re.compile('^\s*//\s*(?P<text>.+?)$', re.MULTILINE)
     77 
     78 
     79 # Matches a string that is empty or all whitespace
     80 _WHITESPACE_ONLY = lazy_re.compile('\A\s*\Z', re.MULTILINE)
     81 
     82 
     83 # Finds printf and FormatMessage style format specifiers
     84 # Uses non-capturing groups except for the outermost group, so the output of
     85 # re.split() should include both the normal text and what we intend to
     86 # replace with placeholders.
     87 # TODO(joi) Check documentation for printf (and Windows variants) and FormatMessage
     88 _FORMAT_SPECIFIER = lazy_re.compile(
     89   '(%[-# +]?(?:[0-9]*|\*)(?:\.(?:[0-9]+|\*))?(?:h|l|L)?' # printf up to last char
     90   '(?:d|i|o|u|x|X|e|E|f|F|g|G|c|r|s|ls|ws)'              # printf last char
     91   '|\$[1-9][0-9]*)')                                     # FormatMessage
     92 
     93 
     94 class Rc2Grd(interface.Tool):
     95   '''A tool for converting .rc files to .grd files.  This tool is only for
     96 converting the source (nontranslated) .rc file to a .grd file.  For importing
     97 existing translations, use the rc2xtb tool.
     98 
     99 Usage:  grit [global options] rc2grd [OPTIONS] RCFILE
    100 
    101 The tool takes a single argument, which is the path to the .rc file to convert.
    102 It outputs a .grd file with the same name in the same directory as the .rc file.
    103 The .grd file may have one or more TODO comments for things that have to be
    104 cleaned up manually.
    105 
    106 OPTIONS may be any of the following:
    107 
    108   -e ENCODING    Specify the ENCODING of the .rc file. Default is 'cp1252'.
    109 
    110   -h TYPE        Specify the TYPE attribute for HTML structures.
    111                  Default is 'tr_html'.
    112 
    113   -u ENCODING    Specify the ENCODING of HTML files. Default is 'utf-8'.
    114 
    115   -n MATCH       Specify the regular expression to match in comments that will
    116                  indicate that the resource the comment belongs to is not
    117                  translateable. Default is 'Not locali(s|z)able'.
    118 
    119   -r GRDFILE     Specify that GRDFILE should be used as a "role model" for
    120                  any placeholders that otherwise would have had TODO names.
    121                  This attempts to find an identical message in the GRDFILE
    122                  and uses that instead of the automatically placeholderized
    123                  message.
    124 
    125   --pre CLASS    Specify an optional, fully qualified classname, which
    126                  has to be a subclass of grit.tool.PreProcessor, to
    127                  run on the text of the RC file before conversion occurs.
    128                  This can be used to support constructs in the RC files
    129                  that GRIT cannot handle on its own.
    130 
    131   --post CLASS   Specify an optional, fully qualified classname, which
    132                  has to be a subclass of grit.tool.PostProcessor, to
    133                  run on the text of the converted RC file.
    134                  This can be used to alter the content of the RC file
    135                  based on the conversion that occured.
    136 
    137 For menus, dialogs and version info, the .grd file will refer to the original
    138 .rc file.  Once conversion is complete, you can strip the original .rc file
    139 of its string table and all comments as these will be available in the .grd
    140 file.
    141 
    142 Note that this tool WILL NOT obey C preprocessor rules, so even if something
    143 is #if 0-ed out it will still be included in the output of this tool
    144 Therefore, if your .rc file contains sections like this, you should run the
    145 C preprocessor on the .rc file or manually edit it before using this tool.
    146 '''
    147 
    148   def ShortDescription(self):
    149     return 'A tool for converting .rc source files to .grd files.'
    150 
    151   def __init__(self):
    152     self.input_encoding = 'cp1252'
    153     self.html_type = 'tr_html'
    154     self.html_encoding = 'utf-8'
    155     self.not_localizable_re = re.compile('Not locali(s|z)able')
    156     self.role_model = None
    157     self.pre_process = None
    158     self.post_process = None
    159 
    160   def ParseOptions(self, args):
    161     '''Given a list of arguments, set this object's options and return
    162     all non-option arguments.
    163     '''
    164     (own_opts, args) = getopt.getopt(args, 'e:h:u:n:r', ['pre=', 'post='])
    165     for (key, val) in own_opts:
    166       if key == '-e':
    167         self.input_encoding = val
    168       elif key == '-h':
    169         self.html_type = val
    170       elif key == '-u':
    171         self.html_encoding = val
    172       elif key == '-n':
    173         self.not_localizable_re = re.compile(val)
    174       elif key == '-r':
    175         self.role_model = grd_reader.Parse(val)
    176       elif key == '--pre':
    177         self.pre_process = val
    178       elif key == '--post':
    179         self.post_process = val
    180     return args
    181 
    182   def Run(self, opts, args):
    183     args = self.ParseOptions(args)
    184     if len(args) != 1:
    185       print ('This tool takes a single tool-specific argument, the path to the\n'
    186              '.rc file to process.')
    187       return 2
    188     self.SetOptions(opts)
    189 
    190     path = args[0]
    191     out_path = os.path.join(util.dirname(path),
    192                 os.path.splitext(os.path.basename(path))[0] + '.grd')
    193 
    194     rctext = util.ReadFile(path, self.input_encoding)
    195     grd_text = unicode(self.Process(rctext, path))
    196     with util.WrapOutputStream(file(out_path, 'w'), 'utf-8') as outfile:
    197       outfile.write(grd_text)
    198 
    199     print 'Wrote output file %s.\nPlease check for TODO items in the file.' % out_path
    200 
    201 
    202   def Process(self, rctext, rc_path):
    203     '''Processes 'rctext' and returns a resource tree corresponding to it.
    204 
    205     Args:
    206       rctext: complete text of the rc file
    207       rc_path: 'resource\resource.rc'
    208 
    209     Return:
    210       grit.node.base.Node subclass
    211     '''
    212 
    213     if self.pre_process:
    214       preprocess_class = util.NewClassInstance(self.pre_process,
    215                                                preprocess_interface.PreProcessor)
    216       if preprocess_class:
    217         rctext = preprocess_class.Process(rctext, rc_path)
    218       else:
    219         self.Out(
    220           'PreProcessing class could not be found. Skipping preprocessing.\n')
    221 
    222     # Start with a basic skeleton for the .grd file
    223     root = grd_reader.Parse(StringIO.StringIO(
    224       '''<?xml version="1.0" encoding="UTF-8"?>
    225       <grit base_dir="." latest_public_release="0"
    226           current_release="1" source_lang_id="en">
    227         <outputs />
    228         <translations />
    229         <release seq="1">
    230           <includes />
    231           <structures />
    232           <messages />
    233         </release>
    234       </grit>'''), util.dirname(rc_path))
    235     includes = root.children[2].children[0]
    236     structures = root.children[2].children[1]
    237     messages = root.children[2].children[2]
    238     assert (isinstance(includes, grit.node.empty.IncludesNode) and
    239             isinstance(structures, grit.node.empty.StructuresNode) and
    240             isinstance(messages, grit.node.empty.MessagesNode))
    241 
    242     self.AddIncludes(rctext, includes)
    243     self.AddStructures(rctext, structures, os.path.basename(rc_path))
    244     self.AddMessages(rctext, messages)
    245 
    246     self.VerboseOut('Validating that all IDs are unique...\n')
    247     root.ValidateUniqueIds()
    248     self.ExtraVerboseOut('Done validating that all IDs are unique.\n')
    249 
    250     if self.post_process:
    251       postprocess_class = util.NewClassInstance(self.post_process,
    252                                                 postprocess_interface.PostProcessor)
    253       if postprocess_class:
    254         root = postprocess_class.Process(rctext, rc_path, root)
    255       else:
    256         self.Out(
    257           'PostProcessing class could not be found. Skipping postprocessing.\n')
    258 
    259     return root
    260 
    261 
    262   def IsHtml(self, res_type, fname):
    263     '''Check whether both the type and file extension indicate HTML'''
    264     fext = fname.split('.')[-1].lower()
    265     return res_type == 'HTML' and fext in ('htm', 'html')
    266 
    267 
    268   def AddIncludes(self, rctext, node):
    269     '''Scans 'rctext' for included resources (e.g. BITMAP, ICON) and
    270     adds each included resource as an <include> child node of 'node'.'''
    271     for m in _FILE_REF.finditer(rctext):
    272       id = m.group('id')
    273       res_type = m.group('type').upper()
    274       fname = rc.Section.UnEscape(m.group('file'))
    275       assert fname.find('\n') == -1
    276       if not self.IsHtml(res_type, fname):
    277         self.VerboseOut('Processing %s with ID %s (filename: %s)\n' %
    278                         (res_type, id, fname))
    279         node.AddChild(include.IncludeNode.Construct(node, id, res_type, fname))
    280 
    281 
    282   def AddStructures(self, rctext, node, rc_filename):
    283     '''Scans 'rctext' for structured resources (e.g. menus, dialogs, version
    284     information resources and HTML templates) and adds each as a <structure>
    285     child of 'node'.'''
    286     # First add HTML includes
    287     for m in _FILE_REF.finditer(rctext):
    288       id = m.group('id')
    289       res_type = m.group('type').upper()
    290       fname = rc.Section.UnEscape(m.group('file'))
    291       if self.IsHtml(type, fname):
    292         node.AddChild(structure.StructureNode.Construct(
    293           node, id, self.html_type, fname, self.html_encoding))
    294 
    295     # Then add all RC includes
    296     def AddStructure(res_type, id):
    297       self.VerboseOut('Processing %s with ID %s\n' % (res_type, id))
    298       node.AddChild(structure.StructureNode.Construct(node, id, res_type,
    299                                                       rc_filename,
    300                                                       encoding=self.input_encoding))
    301     for m in _MENU.finditer(rctext):
    302       AddStructure('menu', m.group('id'))
    303     for m in _DIALOG.finditer(rctext):
    304       AddStructure('dialog', m.group('id'))
    305     for m in _VERSIONINFO.finditer(rctext):
    306       AddStructure('version', m.group('id'))
    307 
    308 
    309   def AddMessages(self, rctext, node):
    310     '''Scans 'rctext' for all messages in string tables, preprocesses them as
    311     much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d
    312     type format specifiers get those specifiers replaced with placeholders, and
    313     HTML-formatted messages get run through the HTML-placeholderizer).  Adds
    314     each message as a <message> node child of 'node'.'''
    315     for tm in _STRING_TABLE.finditer(rctext):
    316       table = tm.group('body')
    317       for mm in _MESSAGE.finditer(table):
    318         comment_block = mm.group('comment')
    319         comment_text = []
    320         for cm in _COMMENT_TEXT.finditer(comment_block):
    321           comment_text.append(cm.group('text'))
    322         comment_text = ' '.join(comment_text)
    323 
    324         id = mm.group('id')
    325         text = rc.Section.UnEscape(mm.group('text'))
    326 
    327         self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text))
    328 
    329         msg_obj = self.Placeholderize(text)
    330 
    331         # Messages that contain only placeholders do not need translation.
    332         is_translateable = False
    333         for item in msg_obj.GetContent():
    334           if isinstance(item, types.StringTypes):
    335             if not _WHITESPACE_ONLY.match(item):
    336               is_translateable = True
    337 
    338         if self.not_localizable_re.search(comment_text):
    339           is_translateable = False
    340 
    341         message_meaning = ''
    342         internal_comment = ''
    343 
    344         # If we have a "role model" (existing GRD file) and this node exists
    345         # in the role model, use the description, meaning and translateable
    346         # attributes from the role model.
    347         if self.role_model:
    348           role_node = self.role_model.GetNodeById(id)
    349           if role_node:
    350             is_translateable = role_node.IsTranslateable()
    351             message_meaning = role_node.attrs['meaning']
    352             comment_text = role_node.attrs['desc']
    353             internal_comment = role_node.attrs['internal_comment']
    354 
    355         # For nontranslateable messages, we don't want the complexity of
    356         # placeholderizing everything.
    357         if not is_translateable:
    358           msg_obj = tclib.Message(text=text)
    359 
    360         msg_node = message.MessageNode.Construct(node, msg_obj, id,
    361                                                  desc=comment_text,
    362                                                  translateable=is_translateable,
    363                                                  meaning=message_meaning)
    364         msg_node.attrs['internal_comment'] = internal_comment
    365 
    366         node.AddChild(msg_node)
    367         self.ExtraVerboseOut('Done processing message %s\n' % id)
    368 
    369 
    370   def Placeholderize(self, text):
    371     '''Creates a tclib.Message object from 'text', attempting to recognize
    372     a few different formats of text that can be automatically placeholderized
    373     (HTML code, printf-style format strings, and FormatMessage-style format
    374     strings).
    375     '''
    376 
    377     try:
    378       # First try HTML placeholderizing.
    379       # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing
    380       msg = tr_html.HtmlToMessage(text, True)
    381       for item in msg.GetContent():
    382         if not isinstance(item, types.StringTypes):
    383           return msg  # Contained at least one placeholder, so we're done
    384 
    385       # HTML placeholderization didn't do anything, so try to find printf or
    386       # FormatMessage format specifiers and change them into placeholders.
    387       msg = tclib.Message()
    388       parts = _FORMAT_SPECIFIER.split(text)
    389       todo_counter = 1  # We make placeholder IDs 'TODO_0001' etc.
    390       for part in parts:
    391         if _FORMAT_SPECIFIER.match(part):
    392           msg.AppendPlaceholder(tclib.Placeholder(
    393             'TODO_%04d' % todo_counter, part, 'TODO'))
    394           todo_counter += 1
    395         elif part != '':
    396           msg.AppendText(part)
    397 
    398       if self.role_model and len(parts) > 1:  # there are TODO placeholders
    399         role_model_msg = self.role_model.UberClique().BestCliqueByOriginalText(
    400           msg.GetRealContent(), '')
    401         if role_model_msg:
    402           # replace wholesale to get placeholder names and examples
    403           msg = role_model_msg
    404 
    405       return msg
    406     except:
    407       print 'Exception processing message with text "%s"' % text
    408       raise
    409 
    410