Home | History | Annotate | Download | only in i18n
      1 #! /usr/bin/env python
      2 # -*- coding: iso-8859-1 -*-
      3 # Written by Martin v. Lwis <loewis (at] informatik.hu-berlin.de>
      4 
      5 """Generate binary message catalog from textual translation description.
      6 
      7 This program converts a textual Uniforum-style message catalog (.po file) into
      8 a binary GNU catalog (.mo file).  This is essentially the same function as the
      9 GNU msgfmt program, however, it is a simpler implementation.
     10 
     11 Usage: msgfmt.py [OPTIONS] filename.po
     12 
     13 Options:
     14     -o file
     15     --output-file=file
     16         Specify the output file to write to.  If omitted, output will go to a
     17         file named filename.mo (based off the input file name).
     18 
     19     -h
     20     --help
     21         Print this message and exit.
     22 
     23     -V
     24     --version
     25         Display version information and exit.
     26 """
     27 
     28 import os
     29 import sys
     30 import ast
     31 import getopt
     32 import struct
     33 import array
     34 
     35 __version__ = "1.1"
     36 
     37 MESSAGES = {}
     38 
     39 
     40 
     42 def usage(code, msg=''):
     43     print >> sys.stderr, __doc__
     44     if msg:
     45         print >> sys.stderr, msg
     46     sys.exit(code)
     47 
     48 
     49 
     51 def add(id, str, fuzzy):
     52     "Add a non-fuzzy translation to the dictionary."
     53     global MESSAGES
     54     if not fuzzy and str:
     55         MESSAGES[id] = str
     56 
     57 
     58 
     60 def generate():
     61     "Return the generated output."
     62     global MESSAGES
     63     keys = MESSAGES.keys()
     64     # the keys are sorted in the .mo file
     65     keys.sort()
     66     offsets = []
     67     ids = strs = ''
     68     for id in keys:
     69         # For each string, we need size and file offset.  Each string is NUL
     70         # terminated; the NUL does not count into the size.
     71         offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
     72         ids += id + '\0'
     73         strs += MESSAGES[id] + '\0'
     74     output = ''
     75     # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
     76     # the keys start right after the index tables.
     77     # translated string.
     78     keystart = 7*4+16*len(keys)
     79     # and the values start after the keys
     80     valuestart = keystart + len(ids)
     81     koffsets = []
     82     voffsets = []
     83     # The string table first has the list of keys, then the list of values.
     84     # Each entry has first the size of the string, then the file offset.
     85     for o1, l1, o2, l2 in offsets:
     86         koffsets += [l1, o1+keystart]
     87         voffsets += [l2, o2+valuestart]
     88     offsets = koffsets + voffsets
     89     output = struct.pack("Iiiiiii",
     90                          0x950412deL,       # Magic
     91                          0,                 # Version
     92                          len(keys),         # # of entries
     93                          7*4,               # start of key index
     94                          7*4+len(keys)*8,   # start of value index
     95                          0, 0)              # size and offset of hash table
     96     output += array.array("i", offsets).tostring()
     97     output += ids
     98     output += strs
     99     return output
    100 
    101 
    102 
    104 def make(filename, outfile):
    105     ID = 1
    106     STR = 2
    107 
    108     # Compute .mo name from .po name and arguments
    109     if filename.endswith('.po'):
    110         infile = filename
    111     else:
    112         infile = filename + '.po'
    113     if outfile is None:
    114         outfile = os.path.splitext(infile)[0] + '.mo'
    115 
    116     try:
    117         lines = open(infile).readlines()
    118     except IOError, msg:
    119         print >> sys.stderr, msg
    120         sys.exit(1)
    121 
    122     section = None
    123     fuzzy = 0
    124 
    125     # Parse the catalog
    126     lno = 0
    127     for l in lines:
    128         lno += 1
    129         # If we get a comment line after a msgstr, this is a new entry
    130         if l[0] == '#' and section == STR:
    131             add(msgid, msgstr, fuzzy)
    132             section = None
    133             fuzzy = 0
    134         # Record a fuzzy mark
    135         if l[:2] == '#,' and 'fuzzy' in l:
    136             fuzzy = 1
    137         # Skip comments
    138         if l[0] == '#':
    139             continue
    140         # Now we are in a msgid section, output previous section
    141         if l.startswith('msgid') and not l.startswith('msgid_plural'):
    142             if section == STR:
    143                 add(msgid, msgstr, fuzzy)
    144             section = ID
    145             l = l[5:]
    146             msgid = msgstr = ''
    147             is_plural = False
    148         # This is a message with plural forms
    149         elif l.startswith('msgid_plural'):
    150             if section != ID:
    151                 print >> sys.stderr, 'msgid_plural not preceded by msgid on %s:%d' %\
    152                     (infile, lno)
    153                 sys.exit(1)
    154             l = l[12:]
    155             msgid += '\0' # separator of singular and plural
    156             is_plural = True
    157         # Now we are in a msgstr section
    158         elif l.startswith('msgstr'):
    159             section = STR
    160             if l.startswith('msgstr['):
    161                 if not is_plural:
    162                     print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\
    163                         (infile, lno)
    164                     sys.exit(1)
    165                 l = l.split(']', 1)[1]
    166                 if msgstr:
    167                     msgstr += '\0' # Separator of the various plural forms
    168             else:
    169                 if is_plural:
    170                     print >> sys.stderr, 'indexed msgstr required for plural on  %s:%d' %\
    171                         (infile, lno)
    172                     sys.exit(1)
    173                 l = l[6:]
    174         # Skip empty lines
    175         l = l.strip()
    176         if not l:
    177             continue
    178         l = ast.literal_eval(l)
    179         if section == ID:
    180             msgid += l
    181         elif section == STR:
    182             msgstr += l
    183         else:
    184             print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \
    185                   'before:'
    186             print >> sys.stderr, l
    187             sys.exit(1)
    188     # Add last entry
    189     if section == STR:
    190         add(msgid, msgstr, fuzzy)
    191 
    192     # Compute output
    193     output = generate()
    194 
    195     try:
    196         open(outfile,"wb").write(output)
    197     except IOError,msg:
    198         print >> sys.stderr, msg
    199 
    200 
    201 
    203 def main():
    204     try:
    205         opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
    206                                    ['help', 'version', 'output-file='])
    207     except getopt.error, msg:
    208         usage(1, msg)
    209 
    210     outfile = None
    211     # parse options
    212     for opt, arg in opts:
    213         if opt in ('-h', '--help'):
    214             usage(0)
    215         elif opt in ('-V', '--version'):
    216             print >> sys.stderr, "msgfmt.py", __version__
    217             sys.exit(0)
    218         elif opt in ('-o', '--output-file'):
    219             outfile = arg
    220     # do it
    221     if not args:
    222         print >> sys.stderr, 'No input file given'
    223         print >> sys.stderr, "Try `msgfmt --help' for more information."
    224         return
    225 
    226     for filename in args:
    227         make(filename, outfile)
    228 
    229 
    230 if __name__ == '__main__':
    231     main()
    232