Home | History | Annotate | Download | only in i18n
      1 #! /usr/bin/env python

      2 # -*- coding: iso-8859-1 -*-

      3 # Written by Martin v. Lwis <loewis (at] informatik.hu-berlin.de>

      4 
      5 """Generate binary message catalog from textual translation description.
      6 
      7 This program converts a textual Uniforum-style message catalog (.po file) into
      8 a binary GNU catalog (.mo file).  This is essentially the same function as the
      9 GNU msgfmt program, however, it is a simpler implementation.
     10 
     11 Usage: msgfmt.py [OPTIONS] filename.po
     12 
     13 Options:
     14     -o file
     15     --output-file=file
     16         Specify the output file to write to.  If omitted, output will go to a
     17         file named filename.mo (based off the input file name).
     18 
     19     -h
     20     --help
     21         Print this message and exit.
     22 
     23     -V
     24     --version
     25         Display version information and exit.
     26 """
     27 
     28 import sys
     29 import os
     30 import getopt
     31 import struct
     32 import array
     33 
     34 __version__ = "1.1"
     35 
     36 MESSAGES = {}
     37 
     38 
     39 
     41 def usage(code, msg=''):
     42     print >> sys.stderr, __doc__
     43     if msg:
     44         print >> sys.stderr, msg
     45     sys.exit(code)
     46 
     47 
     48 
     50 def add(id, str, fuzzy):
     51     "Add a non-fuzzy translation to the dictionary."
     52     global MESSAGES
     53     if not fuzzy and str:
     54         MESSAGES[id] = str
     55 
     56 
     57 
     59 def generate():
     60     "Return the generated output."
     61     global MESSAGES
     62     keys = MESSAGES.keys()
     63     # the keys are sorted in the .mo file

     64     keys.sort()
     65     offsets = []
     66     ids = strs = ''
     67     for id in keys:
     68         # For each string, we need size and file offset.  Each string is NUL

     69         # terminated; the NUL does not count into the size.

     70         offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
     71         ids += id + '\0'
     72         strs += MESSAGES[id] + '\0'
     73     output = ''
     74     # The header is 7 32-bit unsigned integers.  We don't use hash tables, so

     75     # the keys start right after the index tables.

     76     # translated string.

     77     keystart = 7*4+16*len(keys)
     78     # and the values start after the keys

     79     valuestart = keystart + len(ids)
     80     koffsets = []
     81     voffsets = []
     82     # The string table first has the list of keys, then the list of values.

     83     # Each entry has first the size of the string, then the file offset.

     84     for o1, l1, o2, l2 in offsets:
     85         koffsets += [l1, o1+keystart]
     86         voffsets += [l2, o2+valuestart]
     87     offsets = koffsets + voffsets
     88     output = struct.pack("Iiiiiii",
     89                          0x950412deL,       # Magic

     90                          0,                 # Version

     91                          len(keys),         # # of entries

     92                          7*4,               # start of key index

     93                          7*4+len(keys)*8,   # start of value index

     94                          0, 0)              # size and offset of hash table

     95     output += array.array("i", offsets).tostring()
     96     output += ids
     97     output += strs
     98     return output
     99 
    100 
    101 
    103 def make(filename, outfile):
    104     ID = 1
    105     STR = 2
    106 
    107     # Compute .mo name from .po name and arguments

    108     if filename.endswith('.po'):
    109         infile = filename
    110     else:
    111         infile = filename + '.po'
    112     if outfile is None:
    113         outfile = os.path.splitext(infile)[0] + '.mo'
    114 
    115     try:
    116         lines = open(infile).readlines()
    117     except IOError, msg:
    118         print >> sys.stderr, msg
    119         sys.exit(1)
    120 
    121     section = None
    122     fuzzy = 0
    123 
    124     # Parse the catalog

    125     lno = 0
    126     for l in lines:
    127         lno += 1
    128         # If we get a comment line after a msgstr, this is a new entry

    129         if l[0] == '#' and section == STR:
    130             add(msgid, msgstr, fuzzy)
    131             section = None
    132             fuzzy = 0
    133         # Record a fuzzy mark

    134         if l[:2] == '#,' and 'fuzzy' in l:
    135             fuzzy = 1
    136         # Skip comments

    137         if l[0] == '#':
    138             continue
    139         # Now we are in a msgid section, output previous section

    140         if l.startswith('msgid') and not l.startswith('msgid_plural'):
    141             if section == STR:
    142                 add(msgid, msgstr, fuzzy)
    143             section = ID
    144             l = l[5:]
    145             msgid = msgstr = ''
    146             is_plural = False
    147         # This is a message with plural forms

    148         elif l.startswith('msgid_plural'):
    149             if section != ID:
    150                 print >> sys.stderr, 'msgid_plural not preceeded by msgid on %s:%d' %\
    151                     (infile, lno)
    152                 sys.exit(1)
    153             l = l[12:]
    154             msgid += '\0' # separator of singular and plural

    155             is_plural = True
    156         # Now we are in a msgstr section

    157         elif l.startswith('msgstr'):
    158             section = STR
    159             if l.startswith('msgstr['):
    160                 if not is_plural:
    161                     print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\
    162                         (infile, lno)
    163                     sys.exit(1)
    164                 l = l.split(']', 1)[1]
    165                 if msgstr:
    166                     msgstr += '\0' # Separator of the various plural forms

    167             else:
    168                 if is_plural:
    169                     print >> sys.stderr, 'indexed msgstr required for plural on  %s:%d' %\
    170                         (infile, lno)
    171                     sys.exit(1)
    172                 l = l[6:]
    173         # Skip empty lines

    174         l = l.strip()
    175         if not l:
    176             continue
    177         # XXX: Does this always follow Python escape semantics?

    178         l = eval(l)
    179         if section == ID:
    180             msgid += l
    181         elif section == STR:
    182             msgstr += l
    183         else:
    184             print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \
    185                   'before:'
    186             print >> sys.stderr, l
    187             sys.exit(1)
    188     # Add last entry

    189     if section == STR:
    190         add(msgid, msgstr, fuzzy)
    191 
    192     # Compute output

    193     output = generate()
    194 
    195     try:
    196         open(outfile,"wb").write(output)
    197     except IOError,msg:
    198         print >> sys.stderr, msg
    199 
    200 
    201 
    203 def main():
    204     try:
    205         opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
    206                                    ['help', 'version', 'output-file='])
    207     except getopt.error, msg:
    208         usage(1, msg)
    209 
    210     outfile = None
    211     # parse options

    212     for opt, arg in opts:
    213         if opt in ('-h', '--help'):
    214             usage(0)
    215         elif opt in ('-V', '--version'):
    216             print >> sys.stderr, "msgfmt.py", __version__
    217             sys.exit(0)
    218         elif opt in ('-o', '--output-file'):
    219             outfile = arg
    220     # do it

    221     if not args:
    222         print >> sys.stderr, 'No input file given'
    223         print >> sys.stderr, "Try `msgfmt --help' for more information."
    224         return
    225 
    226     for filename in args:
    227         make(filename, outfile)
    228 
    229 
    230 if __name__ == '__main__':
    231     main()
    232