Home | History | Annotate | Download | only in i18n
      1 #! /usr/bin/env python3
      2 # Written by Martin v. Lwis <loewis (at] informatik.hu-berlin.de>
      3 
      4 """Generate binary message catalog from textual translation description.
      5 
      6 This program converts a textual Uniforum-style message catalog (.po file) into
      7 a binary GNU catalog (.mo file).  This is essentially the same function as the
      8 GNU msgfmt program, however, it is a simpler implementation.
      9 
     10 Usage: msgfmt.py [OPTIONS] filename.po
     11 
     12 Options:
     13     -o file
     14     --output-file=file
     15         Specify the output file to write to.  If omitted, output will go to a
     16         file named filename.mo (based off the input file name).
     17 
     18     -h
     19     --help
     20         Print this message and exit.
     21 
     22     -V
     23     --version
     24         Display version information and exit.
     25 """
     26 
     27 import os
     28 import sys
     29 import ast
     30 import getopt
     31 import struct
     32 import array
     33 from email.parser import HeaderParser
     34 
     35 __version__ = "1.1"
     36 
     37 MESSAGES = {}
     38 
     39 
     40 
     42 def usage(code, msg=''):
     43     print(__doc__, file=sys.stderr)
     44     if msg:
     45         print(msg, file=sys.stderr)
     46     sys.exit(code)
     47 
     48 
     49 
     51 def add(id, str, fuzzy):
     52     "Add a non-fuzzy translation to the dictionary."
     53     global MESSAGES
     54     if not fuzzy and str:
     55         MESSAGES[id] = str
     56 
     57 
     58 
     60 def generate():
     61     "Return the generated output."
     62     global MESSAGES
     63     # the keys are sorted in the .mo file
     64     keys = sorted(MESSAGES.keys())
     65     offsets = []
     66     ids = strs = b''
     67     for id in keys:
     68         # For each string, we need size and file offset.  Each string is NUL
     69         # terminated; the NUL does not count into the size.
     70         offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
     71         ids += id + b'\0'
     72         strs += MESSAGES[id] + b'\0'
     73     output = ''
     74     # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
     75     # the keys start right after the index tables.
     76     # translated string.
     77     keystart = 7*4+16*len(keys)
     78     # and the values start after the keys
     79     valuestart = keystart + len(ids)
     80     koffsets = []
     81     voffsets = []
     82     # The string table first has the list of keys, then the list of values.
     83     # Each entry has first the size of the string, then the file offset.
     84     for o1, l1, o2, l2 in offsets:
     85         koffsets += [l1, o1+keystart]
     86         voffsets += [l2, o2+valuestart]
     87     offsets = koffsets + voffsets
     88     output = struct.pack("Iiiiiii",
     89                          0x950412de,       # Magic
     90                          0,                 # Version
     91                          len(keys),         # # of entries
     92                          7*4,               # start of key index
     93                          7*4+len(keys)*8,   # start of value index
     94                          0, 0)              # size and offset of hash table
     95     output += array.array("i", offsets).tobytes()
     96     output += ids
     97     output += strs
     98     return output
     99 
    100 
    101 
    103 def make(filename, outfile):
    104     ID = 1
    105     STR = 2
    106 
    107     # Compute .mo name from .po name and arguments
    108     if filename.endswith('.po'):
    109         infile = filename
    110     else:
    111         infile = filename + '.po'
    112     if outfile is None:
    113         outfile = os.path.splitext(infile)[0] + '.mo'
    114 
    115     try:
    116         with open(infile, 'rb') as f:
    117             lines = f.readlines()
    118     except IOError as msg:
    119         print(msg, file=sys.stderr)
    120         sys.exit(1)
    121 
    122     section = None
    123     fuzzy = 0
    124 
    125     # Start off assuming Latin-1, so everything decodes without failure,
    126     # until we know the exact encoding
    127     encoding = 'latin-1'
    128 
    129     # Parse the catalog
    130     lno = 0
    131     for l in lines:
    132         l = l.decode(encoding)
    133         lno += 1
    134         # If we get a comment line after a msgstr, this is a new entry
    135         if l[0] == '#' and section == STR:
    136             add(msgid, msgstr, fuzzy)
    137             section = None
    138             fuzzy = 0
    139         # Record a fuzzy mark
    140         if l[:2] == '#,' and 'fuzzy' in l:
    141             fuzzy = 1
    142         # Skip comments
    143         if l[0] == '#':
    144             continue
    145         # Now we are in a msgid section, output previous section
    146         if l.startswith('msgid') and not l.startswith('msgid_plural'):
    147             if section == STR:
    148                 add(msgid, msgstr, fuzzy)
    149                 if not msgid:
    150                     # See whether there is an encoding declaration
    151                     p = HeaderParser()
    152                     charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
    153                     if charset:
    154                         encoding = charset
    155             section = ID
    156             l = l[5:]
    157             msgid = msgstr = b''
    158             is_plural = False
    159         # This is a message with plural forms
    160         elif l.startswith('msgid_plural'):
    161             if section != ID:
    162                 print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno),
    163                       file=sys.stderr)
    164                 sys.exit(1)
    165             l = l[12:]
    166             msgid += b'\0' # separator of singular and plural
    167             is_plural = True
    168         # Now we are in a msgstr section
    169         elif l.startswith('msgstr'):
    170             section = STR
    171             if l.startswith('msgstr['):
    172                 if not is_plural:
    173                     print('plural without msgid_plural on %s:%d' % (infile, lno),
    174                           file=sys.stderr)
    175                     sys.exit(1)
    176                 l = l.split(']', 1)[1]
    177                 if msgstr:
    178                     msgstr += b'\0' # Separator of the various plural forms
    179             else:
    180                 if is_plural:
    181                     print('indexed msgstr required for plural on  %s:%d' % (infile, lno),
    182                           file=sys.stderr)
    183                     sys.exit(1)
    184                 l = l[6:]
    185         # Skip empty lines
    186         l = l.strip()
    187         if not l:
    188             continue
    189         l = ast.literal_eval(l)
    190         if section == ID:
    191             msgid += l.encode(encoding)
    192         elif section == STR:
    193             msgstr += l.encode(encoding)
    194         else:
    195             print('Syntax error on %s:%d' % (infile, lno), \
    196                   'before:', file=sys.stderr)
    197             print(l, file=sys.stderr)
    198             sys.exit(1)
    199     # Add last entry
    200     if section == STR:
    201         add(msgid, msgstr, fuzzy)
    202 
    203     # Compute output
    204     output = generate()
    205 
    206     try:
    207         with open(outfile,"wb") as f:
    208             f.write(output)
    209     except IOError as msg:
    210         print(msg, file=sys.stderr)
    211 
    212 
    213 
    215 def main():
    216     try:
    217         opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
    218                                    ['help', 'version', 'output-file='])
    219     except getopt.error as msg:
    220         usage(1, msg)
    221 
    222     outfile = None
    223     # parse options
    224     for opt, arg in opts:
    225         if opt in ('-h', '--help'):
    226             usage(0)
    227         elif opt in ('-V', '--version'):
    228             print("msgfmt.py", __version__)
    229             sys.exit(0)
    230         elif opt in ('-o', '--output-file'):
    231             outfile = arg
    232     # do it
    233     if not args:
    234         print('No input file given', file=sys.stderr)
    235         print("Try `msgfmt --help' for more information.", file=sys.stderr)
    236         return
    237 
    238     for filename in args:
    239         make(filename, outfile)
    240 
    241 
    242 if __name__ == '__main__':
    243     main()
    244