Home | History | Annotate | Download | only in i18n
      1 #! /usr/bin/env python3
      2 # Written by Martin v. Lwis <loewis (at] informatik.hu-berlin.de>
      3 
      4 """Generate binary message catalog from textual translation description.
      5 
      6 This program converts a textual Uniforum-style message catalog (.po file) into
      7 a binary GNU catalog (.mo file).  This is essentially the same function as the
      8 GNU msgfmt program, however, it is a simpler implementation.
      9 
     10 Usage: msgfmt.py [OPTIONS] filename.po
     11 
     12 Options:
     13     -o file
     14     --output-file=file
     15         Specify the output file to write to.  If omitted, output will go to a
     16         file named filename.mo (based off the input file name).
     17 
     18     -h
     19     --help
     20         Print this message and exit.
     21 
     22     -V
     23     --version
     24         Display version information and exit.
     25 """
     26 
     27 import os
     28 import sys
     29 import ast
     30 import getopt
     31 import struct
     32 import array
     33 from email.parser import HeaderParser
     34 
     35 __version__ = "1.1"
     36 
     37 MESSAGES = {}
     38 
     39 
     40 
     42 def usage(code, msg=''):
     43     print(__doc__, file=sys.stderr)
     44     if msg:
     45         print(msg, file=sys.stderr)
     46     sys.exit(code)
     47 
     48 
     49 
     51 def add(id, str, fuzzy):
     52     "Add a non-fuzzy translation to the dictionary."
     53     global MESSAGES
     54     if not fuzzy and str:
     55         MESSAGES[id] = str
     56 
     57 
     58 
     60 def generate():
     61     "Return the generated output."
     62     global MESSAGES
     63     # the keys are sorted in the .mo file
     64     keys = sorted(MESSAGES.keys())
     65     offsets = []
     66     ids = strs = b''
     67     for id in keys:
     68         # For each string, we need size and file offset.  Each string is NUL
     69         # terminated; the NUL does not count into the size.
     70         offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
     71         ids += id + b'\0'
     72         strs += MESSAGES[id] + b'\0'
     73     output = ''
     74     # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
     75     # the keys start right after the index tables.
     76     # translated string.
     77     keystart = 7*4+16*len(keys)
     78     # and the values start after the keys
     79     valuestart = keystart + len(ids)
     80     koffsets = []
     81     voffsets = []
     82     # The string table first has the list of keys, then the list of values.
     83     # Each entry has first the size of the string, then the file offset.
     84     for o1, l1, o2, l2 in offsets:
     85         koffsets += [l1, o1+keystart]
     86         voffsets += [l2, o2+valuestart]
     87     offsets = koffsets + voffsets
     88     output = struct.pack("Iiiiiii",
     89                          0x950412de,       # Magic
     90                          0,                 # Version
     91                          len(keys),         # # of entries
     92                          7*4,               # start of key index
     93                          7*4+len(keys)*8,   # start of value index
     94                          0, 0)              # size and offset of hash table
     95     output += array.array("i", offsets).tostring()
     96     output += ids
     97     output += strs
     98     return output
     99 
    100 
    101 
    103 def make(filename, outfile):
    104     ID = 1
    105     STR = 2
    106 
    107     # Compute .mo name from .po name and arguments
    108     if filename.endswith('.po'):
    109         infile = filename
    110     else:
    111         infile = filename + '.po'
    112     if outfile is None:
    113         outfile = os.path.splitext(infile)[0] + '.mo'
    114 
    115     try:
    116         lines = open(infile, 'rb').readlines()
    117     except IOError as msg:
    118         print(msg, file=sys.stderr)
    119         sys.exit(1)
    120 
    121     section = None
    122     fuzzy = 0
    123 
    124     # Start off assuming Latin-1, so everything decodes without failure,
    125     # until we know the exact encoding
    126     encoding = 'latin-1'
    127 
    128     # Parse the catalog
    129     lno = 0
    130     for l in lines:
    131         l = l.decode(encoding)
    132         lno += 1
    133         # If we get a comment line after a msgstr, this is a new entry
    134         if l[0] == '#' and section == STR:
    135             add(msgid, msgstr, fuzzy)
    136             section = None
    137             fuzzy = 0
    138         # Record a fuzzy mark
    139         if l[:2] == '#,' and 'fuzzy' in l:
    140             fuzzy = 1
    141         # Skip comments
    142         if l[0] == '#':
    143             continue
    144         # Now we are in a msgid section, output previous section
    145         if l.startswith('msgid') and not l.startswith('msgid_plural'):
    146             if section == STR:
    147                 add(msgid, msgstr, fuzzy)
    148                 if not msgid:
    149                     # See whether there is an encoding declaration
    150                     p = HeaderParser()
    151                     charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
    152                     if charset:
    153                         encoding = charset
    154             section = ID
    155             l = l[5:]
    156             msgid = msgstr = b''
    157             is_plural = False
    158         # This is a message with plural forms
    159         elif l.startswith('msgid_plural'):
    160             if section != ID:
    161                 print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno),
    162                       file=sys.stderr)
    163                 sys.exit(1)
    164             l = l[12:]
    165             msgid += b'\0' # separator of singular and plural
    166             is_plural = True
    167         # Now we are in a msgstr section
    168         elif l.startswith('msgstr'):
    169             section = STR
    170             if l.startswith('msgstr['):
    171                 if not is_plural:
    172                     print('plural without msgid_plural on %s:%d' % (infile, lno),
    173                           file=sys.stderr)
    174                     sys.exit(1)
    175                 l = l.split(']', 1)[1]
    176                 if msgstr:
    177                     msgstr += b'\0' # Separator of the various plural forms
    178             else:
    179                 if is_plural:
    180                     print('indexed msgstr required for plural on  %s:%d' % (infile, lno),
    181                           file=sys.stderr)
    182                     sys.exit(1)
    183                 l = l[6:]
    184         # Skip empty lines
    185         l = l.strip()
    186         if not l:
    187             continue
    188         l = ast.literal_eval(l)
    189         if section == ID:
    190             msgid += l.encode(encoding)
    191         elif section == STR:
    192             msgstr += l.encode(encoding)
    193         else:
    194             print('Syntax error on %s:%d' % (infile, lno), \
    195                   'before:', file=sys.stderr)
    196             print(l, file=sys.stderr)
    197             sys.exit(1)
    198     # Add last entry
    199     if section == STR:
    200         add(msgid, msgstr, fuzzy)
    201 
    202     # Compute output
    203     output = generate()
    204 
    205     try:
    206         open(outfile,"wb").write(output)
    207     except IOError as msg:
    208         print(msg, file=sys.stderr)
    209 
    210 
    211 
    213 def main():
    214     try:
    215         opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
    216                                    ['help', 'version', 'output-file='])
    217     except getopt.error as msg:
    218         usage(1, msg)
    219 
    220     outfile = None
    221     # parse options
    222     for opt, arg in opts:
    223         if opt in ('-h', '--help'):
    224             usage(0)
    225         elif opt in ('-V', '--version'):
    226             print("msgfmt.py", __version__)
    227             sys.exit(0)
    228         elif opt in ('-o', '--output-file'):
    229             outfile = arg
    230     # do it
    231     if not args:
    232         print('No input file given', file=sys.stderr)
    233         print("Try `msgfmt --help' for more information.", file=sys.stderr)
    234         return
    235 
    236     for filename in args:
    237         make(filename, outfile)
    238 
    239 
    240 if __name__ == '__main__':
    241     main()
    242