      1 #!/usr/bin/env python
      2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      6 '''The 'grit rc2grd' tool.'''
      9 import os.path
     10 import getopt
     11 import re
     12 import StringIO
     13 import types
     15 import grit.node.empty
     16 from grit.node import include
     17 from grit.node import structure
     18 from grit.node import message
     20 from grit.gather import rc
     21 from grit.gather import tr_html
     23 from grit.tool import interface
     24 from grit.tool import postprocess_interface
     25 from grit.tool import preprocess_interface
     27 from grit import grd_reader
     28 from grit import lazy_re
     29 from grit import tclib
     30 from grit import util
     33 # Matches files referenced from an .rc file
     34 _FILE_REF = lazy_re.compile('''
     35   ^(?P<id>[A-Z_0-9.]+)[ \t]+
     36   (?P<type>[A-Z_0-9]+)[ \t]+
     37   "(?P<file>.*?([^"]|""))"[ \t]*$''', re.VERBOSE | re.MULTILINE)
     40 # Matches a dialog section
     41 _DIALOG = lazy_re.compile(
     42     '^(?P<id>[A-Z0-9_]+)\s+DIALOG(EX)?\s.+?^BEGIN\s*$.+?^END\s*$',
     43     re.MULTILINE | re.DOTALL)
     46 # Matches a menu section
     47 _MENU = lazy_re.compile('^(?P<id>[A-Z0-9_]+)\s+MENU.+?^BEGIN\s*$.+?^END\s*$',
     48                         re.MULTILINE | re.DOTALL)
     51 # Matches a versioninfo section
     52 _VERSIONINFO = lazy_re.compile(
     53     '^(?P<id>[A-Z0-9_]+)\s+VERSIONINFO\s.+?^BEGIN\s*$.+?^END\s*$',
     54     re.MULTILINE | re.DOTALL)
     57 # Matches a stringtable
     58 _STRING_TABLE = lazy_re.compile(
     60      'VERSION.+))*\s*\nBEGIN\s*$(?P<body>.+?)^END\s*$'),
     61     re.MULTILINE | re.DOTALL)
     64 # Matches each message inside a stringtable, breaking it up into comments,
     65 # the ID of the message, and the (RC-escaped) message text.
     66 _MESSAGE = lazy_re.compile('''
     67   (?P<comment>(^\s+//.+?)*)  # 0 or more lines of comments preceding the message
     68   ^\s*
     69   (?P<id>[A-Za-z0-9_]+)  # id
     70   \s+
     71   "(?P<text>.*?([^"]|""))"([^"]|$)  # The message itself
     72   ''', re.MULTILINE | re.DOTALL | re.VERBOSE)
     75 # Matches each line of comment text in a multi-line comment.
     76 _COMMENT_TEXT = lazy_re.compile('^\s*//\s*(?P<text>.+?)$', re.MULTILINE)
     79 # Matches a string that is empty or all whitespace
     80 _WHITESPACE_ONLY = lazy_re.compile('\A\s*\Z', re.MULTILINE)
     83 # Finds printf and FormatMessage style format specifiers
     84 # Uses non-capturing groups except for the outermost group, so the output of
     85 # re.split() should include both the normal text and what we intend to
     86 # replace with placeholders.
     87 # TODO(joi) Check documentation for printf (and Windows variants) and FormatMessage
     88 _FORMAT_SPECIFIER = lazy_re.compile(
     89   '(%[-# +]?(?:[0-9]*|\*)(?:\.(?:[0-9]+|\*))?(?:h|l|L)?' # printf up to last char
     90   '(?:d|i|o|u|x|X|e|E|f|F|g|G|c|r|s|ls|ws)'              # printf last char
     91   '|\$[1-9][0-9]*)')                                     # FormatMessage
     94 class Rc2Grd(interface.Tool):
     95   '''A tool for converting .rc files to .grd files.  This tool is only for
     96 converting the source (nontranslated) .rc file to a .grd file.  For importing
     97 existing translations, use the rc2xtb tool.
     99 Usage:  grit [global options] rc2grd [OPTIONS] RCFILE
    101 The tool takes a single argument, which is the path to the .rc file to convert.
    102 It outputs a .grd file with the same name in the same directory as the .rc file.
    103 The .grd file may have one or more TODO comments for things that have to be
    104 cleaned up manually.
    106 OPTIONS may be any of the following:
    108   -e ENCODING    Specify the ENCODING of the .rc file. Default is 'cp1252'.
    110   -h TYPE        Specify the TYPE attribute for HTML structures.
    111                  Default is 'tr_html'.
    113   -u ENCODING    Specify the ENCODING of HTML files. Default is 'utf-8'.
    115   -n MATCH       Specify the regular expression to match in comments that will
    116                  indicate that the resource the comment belongs to is not
    117                  translateable. Default is 'Not locali(s|z)able'.
    119   -r GRDFILE     Specify that GRDFILE should be used as a "role model" for
    120                  any placeholders that otherwise would have had TODO names.
    121                  This attempts to find an identical message in the GRDFILE
    122                  and uses that instead of the automatically placeholderized
    123                  message.
    125   --pre CLASS    Specify an optional, fully qualified classname, which
    126                  has to be a subclass of grit.tool.PreProcessor, to
    127                  run on the text of the RC file before conversion occurs.
    128                  This can be used to support constructs in the RC files
    129                  that GRIT cannot handle on its own.
    131   --post CLASS   Specify an optional, fully qualified classname, which
    132                  has to be a subclass of grit.tool.PostProcessor, to
    133                  run on the text of the converted RC file.
    134                  This can be used to alter the content of the RC file
    135                  based on the conversion that occured.
    137 For menus, dialogs and version info, the .grd file will refer to the original
    138 .rc file.  Once conversion is complete, you can strip the original .rc file
    139 of its string table and all comments as these will be available in the .grd
    140 file.
    142 Note that this tool WILL NOT obey C preprocessor rules, so even if something
    143 is #if 0-ed out it will still be included in the output of this tool
    144 Therefore, if your .rc file contains sections like this, you should run the
    145 C preprocessor on the .rc file or manually edit it before using this tool.
    146 '''
    148   def ShortDescription(self):
    149     return 'A tool for converting .rc source files to .grd files.'
    151   def __init__(self):
    152     self.input_encoding = 'cp1252'
    153     self.html_type = 'tr_html'
    154     self.html_encoding = 'utf-8'
    155     self.not_localizable_re = re.compile('Not locali(s|z)able')
    156     self.role_model = None
    157     self.pre_process = None
    158     self.post_process = None
    160   def ParseOptions(self, args):
    161     '''Given a list of arguments, set this object's options and return
    162     all non-option arguments.
    163     '''
    164     (own_opts, args) = getopt.getopt(args, 'e:h:u:n:r', ['pre=', 'post='])
    165     for (key, val) in own_opts:
    166       if key == '-e':
    167         self.input_encoding = val
    168       elif key == '-h':
    169         self.html_type = val
    170       elif key == '-u':
    171         self.html_encoding = val
    172       elif key == '-n':
    173         self.not_localizable_re = re.compile(val)
    174       elif key == '-r':
    175         self.role_model = grd_reader.Parse(val)
    176       elif key == '--pre':
    177         self.pre_process = val
    178       elif key == '--post':
    179         self.post_process = val
    180     return args
    182   def Run(self, opts, args):
    183     args = self.ParseOptions(args)
    184     if len(args) != 1:
    185       print ('This tool takes a single tool-specific argument, the path to the\n'
    186              '.rc file to process.')
    187       return 2
    188     self.SetOptions(opts)
    190     path = args[0]
    191     out_path = os.path.join(util.dirname(path),
    192                 os.path.splitext(os.path.basename(path))[0] + '.grd')
    194     rctext = util.ReadFile(path, self.input_encoding)
    195     grd_text = unicode(self.Process(rctext, path))
    196     with util.WrapOutputStream(file(out_path, 'w'), 'utf-8') as outfile:
    197       outfile.write(grd_text)
    199     print 'Wrote output file %s.\nPlease check for TODO items in the file.' % out_path
    202   def Process(self, rctext, rc_path):
    203     '''Processes 'rctext' and returns a resource tree corresponding to it.
    205     Args:
    206       rctext: complete text of the rc file
    207       rc_path: 'resource\resource.rc'
    209     Return:
    210       grit.node.base.Node subclass
    211     '''
    213     if self.pre_process:
    214       preprocess_class = util.NewClassInstance(self.pre_process,
    215                                                preprocess_interface.PreProcessor)
    216       if preprocess_class:
    217         rctext = preprocess_class.Process(rctext, rc_path)
    218       else:
    219         self.Out(
    220           'PreProcessing class could not be found. Skipping preprocessing.\n')
    222     # Start with a basic skeleton for the .grd file
    223     root = grd_reader.Parse(StringIO.StringIO(
    224       '''<?xml version="1.0" encoding="UTF-8"?>
    225       <grit base_dir="." latest_public_release="0"
    226           current_release="1" source_lang_id="en">
    227         <outputs />
    228         <translations />
    229         <release seq="1">
    230           <includes />
    231           <structures />
    232           <messages />
    233         </release>
    234       </grit>'''), util.dirname(rc_path))
    235     includes = root.children[2].children[0]
    236     structures = root.children[2].children[1]
    237     messages = root.children[2].children[2]
    238     assert (isinstance(includes, grit.node.empty.IncludesNode) and
    239             isinstance(structures, grit.node.empty.StructuresNode) and
    240             isinstance(messages, grit.node.empty.MessagesNode))
    242     self.AddIncludes(rctext, includes)
    243     self.AddStructures(rctext, structures, os.path.basename(rc_path))
    244     self.AddMessages(rctext, messages)
    246     self.VerboseOut('Validating that all IDs are unique...\n')
    247     root.ValidateUniqueIds()
    248     self.ExtraVerboseOut('Done validating that all IDs are unique.\n')
    250     if self.post_process:
    251       postprocess_class = util.NewClassInstance(self.post_process,
    252                                                 postprocess_interface.PostProcessor)
    253       if postprocess_class:
    254         root = postprocess_class.Process(rctext, rc_path, root)
    255       else:
    256         self.Out(
    257           'PostProcessing class could not be found. Skipping postprocessing.\n')
    259     return root
    262   def IsHtml(self, res_type, fname):
    263     '''Check whether both the type and file extension indicate HTML'''
    264     fext = fname.split('.')[-1].lower()
    265     return res_type == 'HTML' and fext in ('htm', 'html')
    268   def AddIncludes(self, rctext, node):
    269     '''Scans 'rctext' for included resources (e.g. BITMAP, ICON) and
    270     adds each included resource as an <include> child node of 'node'.'''
    271     for m in _FILE_REF.finditer(rctext):
    272       id = m.group('id')
    273       res_type = m.group('type').upper()
    274       fname = rc.Section.UnEscape(m.group('file'))
    275       assert fname.find('\n') == -1
    276       if not self.IsHtml(res_type, fname):
    277         self.VerboseOut('Processing %s with ID %s (filename: %s)\n' %
    278                         (res_type, id, fname))
    279         node.AddChild(include.IncludeNode.Construct(node, id, res_type, fname))
    282   def AddStructures(self, rctext, node, rc_filename):
    283     '''Scans 'rctext' for structured resources (e.g. menus, dialogs, version
    284     information resources and HTML templates) and adds each as a <structure>
    285     child of 'node'.'''
    286     # First add HTML includes
    287     for m in _FILE_REF.finditer(rctext):
    288       id = m.group('id')
    289       res_type = m.group('type').upper()
    290       fname = rc.Section.UnEscape(m.group('file'))
    291       if self.IsHtml(type, fname):
    292         node.AddChild(structure.StructureNode.Construct(
    293           node, id, self.html_type, fname, self.html_encoding))
    295     # Then add all RC includes
    296     def AddStructure(res_type, id):
    297       self.VerboseOut('Processing %s with ID %s\n' % (res_type, id))
    298       node.AddChild(structure.StructureNode.Construct(node, id, res_type,
    299                                                       rc_filename,
    300                                                       encoding=self.input_encoding))
    301     for m in _MENU.finditer(rctext):
    302       AddStructure('menu', m.group('id'))
    303     for m in _DIALOG.finditer(rctext):
    304       AddStructure('dialog', m.group('id'))
    305     for m in _VERSIONINFO.finditer(rctext):
    306       AddStructure('version', m.group('id'))
    309   def AddMessages(self, rctext, node):
    310     '''Scans 'rctext' for all messages in string tables, preprocesses them as
    311     much as possible for placeholders (e.g. messages containing $1, $2 or %s, %d
    312     type format specifiers get those specifiers replaced with placeholders, and
    313     HTML-formatted messages get run through the HTML-placeholderizer).  Adds
    314     each message as a <message> node child of 'node'.'''
    315     for tm in _STRING_TABLE.finditer(rctext):
    316       table = tm.group('body')
    317       for mm in _MESSAGE.finditer(table):
    318         comment_block = mm.group('comment')
    319         comment_text = []
    320         for cm in _COMMENT_TEXT.finditer(comment_block):
    321           comment_text.append(cm.group('text'))
    322         comment_text = ' '.join(comment_text)
    324         id = mm.group('id')
    325         text = rc.Section.UnEscape(mm.group('text'))
    327         self.VerboseOut('Processing message %s (text: "%s")\n' % (id, text))
    329         msg_obj = self.Placeholderize(text)
    331         # Messages that contain only placeholders do not need translation.
    332         is_translateable = False
    333         for item in msg_obj.GetContent():
    334           if isinstance(item, types.StringTypes):
    335             if not _WHITESPACE_ONLY.match(item):
    336               is_translateable = True
    338         if self.not_localizable_re.search(comment_text):
    339           is_translateable = False
    341         message_meaning = ''
    342         internal_comment = ''
    344         # If we have a "role model" (existing GRD file) and this node exists
    345         # in the role model, use the description, meaning and translateable
    346         # attributes from the role model.
    347         if self.role_model:
    348           role_node = self.role_model.GetNodeById(id)
    349           if role_node:
    350             is_translateable = role_node.IsTranslateable()
    351             message_meaning = role_node.attrs['meaning']
    352             comment_text = role_node.attrs['desc']
    353             internal_comment = role_node.attrs['internal_comment']
    355         # For nontranslateable messages, we don't want the complexity of
    356         # placeholderizing everything.
    357         if not is_translateable:
    358           msg_obj = tclib.Message(text=text)
    360         msg_node = message.MessageNode.Construct(node, msg_obj, id,
    361                                                  desc=comment_text,
    362                                                  translateable=is_translateable,
    363                                                  meaning=message_meaning)
    364         msg_node.attrs['internal_comment'] = internal_comment
    366         node.AddChild(msg_node)
    367         self.ExtraVerboseOut('Done processing message %s\n' % id)
    370   def Placeholderize(self, text):
    371     '''Creates a tclib.Message object from 'text', attempting to recognize
    372     a few different formats of text that can be automatically placeholderized
    373     (HTML code, printf-style format strings, and FormatMessage-style format
    374     strings).
    375     '''
    377     try:
    378       # First try HTML placeholderizing.
    379       # TODO(joi) Allow use of non-TotalRecall flavors of HTML placeholderizing
    380       msg = tr_html.HtmlToMessage(text, True)
    381       for item in msg.GetContent():
    382         if not isinstance(item, types.StringTypes):
    383           return msg  # Contained at least one placeholder, so we're done
    385       # HTML placeholderization didn't do anything, so try to find printf or
    386       # FormatMessage format specifiers and change them into placeholders.
    387       msg = tclib.Message()
    388       parts = _FORMAT_SPECIFIER.split(text)
    389       todo_counter = 1  # We make placeholder IDs 'TODO_0001' etc.
    390       for part in parts:
    391         if _FORMAT_SPECIFIER.match(part):
    392           msg.AppendPlaceholder(tclib.Placeholder(
    393             'TODO_%04d' % todo_counter, part, 'TODO'))
    394           todo_counter += 1
    395         elif part != '':
    396           msg.AppendText(part)
    398       if self.role_model and len(parts) > 1:  # there are TODO placeholders
    399         role_model_msg = self.role_model.UberClique().BestCliqueByOriginalText(
    400           msg.GetRealContent(), '')
    401         if role_model_msg:
    402           # replace wholesale to get placeholder names and examples
    403           msg = role_model_msg
    405       return msg
    406     except:
    407       print 'Exception processing message with text "%s"' % text
    408       raise