1 #! /usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 # Written by Martin v. Lwis <loewis (at] informatik.hu-berlin.de> 4 5 """Generate binary message catalog from textual translation description. 6 7 This program converts a textual Uniforum-style message catalog (.po file) into 8 a binary GNU catalog (.mo file). This is essentially the same function as the 9 GNU msgfmt program, however, it is a simpler implementation. 10 11 Usage: msgfmt.py [OPTIONS] filename.po 12 13 Options: 14 -o file 15 --output-file=file 16 Specify the output file to write to. If omitted, output will go to a 17 file named filename.mo (based off the input file name). 18 19 -h 20 --help 21 Print this message and exit. 22 23 -V 24 --version 25 Display version information and exit. 26 """ 27 28 import sys 29 import os 30 import getopt 31 import struct 32 import array 33 34 __version__ = "1.1" 35 36 MESSAGES = {} 37 38 39 41 def usage(code, msg=''): 42 print >> sys.stderr, __doc__ 43 if msg: 44 print >> sys.stderr, msg 45 sys.exit(code) 46 47 48 50 def add(id, str, fuzzy): 51 "Add a non-fuzzy translation to the dictionary." 52 global MESSAGES 53 if not fuzzy and str: 54 MESSAGES[id] = str 55 56 57 59 def generate(): 60 "Return the generated output." 61 global MESSAGES 62 keys = MESSAGES.keys() 63 # the keys are sorted in the .mo file 64 keys.sort() 65 offsets = [] 66 ids = strs = '' 67 for id in keys: 68 # For each string, we need size and file offset. Each string is NUL 69 # terminated; the NUL does not count into the size. 70 offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) 71 ids += id + '\0' 72 strs += MESSAGES[id] + '\0' 73 output = '' 74 # The header is 7 32-bit unsigned integers. We don't use hash tables, so 75 # the keys start right after the index tables. 76 # translated string. 77 keystart = 7*4+16*len(keys) 78 # and the values start after the keys 79 valuestart = keystart + len(ids) 80 koffsets = [] 81 voffsets = [] 82 # The string table first has the list of keys, then the list of values. 83 # Each entry has first the size of the string, then the file offset. 84 for o1, l1, o2, l2 in offsets: 85 koffsets += [l1, o1+keystart] 86 voffsets += [l2, o2+valuestart] 87 offsets = koffsets + voffsets 88 output = struct.pack("Iiiiiii", 89 0x950412deL, # Magic 90 0, # Version 91 len(keys), # # of entries 92 7*4, # start of key index 93 7*4+len(keys)*8, # start of value index 94 0, 0) # size and offset of hash table 95 output += array.array("i", offsets).tostring() 96 output += ids 97 output += strs 98 return output 99 100 101 103 def make(filename, outfile): 104 ID = 1 105 STR = 2 106 107 # Compute .mo name from .po name and arguments 108 if filename.endswith('.po'): 109 infile = filename 110 else: 111 infile = filename + '.po' 112 if outfile is None: 113 outfile = os.path.splitext(infile)[0] + '.mo' 114 115 try: 116 lines = open(infile).readlines() 117 except IOError, msg: 118 print >> sys.stderr, msg 119 sys.exit(1) 120 121 section = None 122 fuzzy = 0 123 124 # Parse the catalog 125 lno = 0 126 for l in lines: 127 lno += 1 128 # If we get a comment line after a msgstr, this is a new entry 129 if l[0] == '#' and section == STR: 130 add(msgid, msgstr, fuzzy) 131 section = None 132 fuzzy = 0 133 # Record a fuzzy mark 134 if l[:2] == '#,' and 'fuzzy' in l: 135 fuzzy = 1 136 # Skip comments 137 if l[0] == '#': 138 continue 139 # Now we are in a msgid section, output previous section 140 if l.startswith('msgid') and not l.startswith('msgid_plural'): 141 if section == STR: 142 add(msgid, msgstr, fuzzy) 143 section = ID 144 l = l[5:] 145 msgid = msgstr = '' 146 is_plural = False 147 # This is a message with plural forms 148 elif l.startswith('msgid_plural'): 149 if section != ID: 150 print >> sys.stderr, 'msgid_plural not preceeded by msgid on %s:%d' %\ 151 (infile, lno) 152 sys.exit(1) 153 l = l[12:] 154 msgid += '\0' # separator of singular and plural 155 is_plural = True 156 # Now we are in a msgstr section 157 elif l.startswith('msgstr'): 158 section = STR 159 if l.startswith('msgstr['): 160 if not is_plural: 161 print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\ 162 (infile, lno) 163 sys.exit(1) 164 l = l.split(']', 1)[1] 165 if msgstr: 166 msgstr += '\0' # Separator of the various plural forms 167 else: 168 if is_plural: 169 print >> sys.stderr, 'indexed msgstr required for plural on %s:%d' %\ 170 (infile, lno) 171 sys.exit(1) 172 l = l[6:] 173 # Skip empty lines 174 l = l.strip() 175 if not l: 176 continue 177 # XXX: Does this always follow Python escape semantics? 178 l = eval(l) 179 if section == ID: 180 msgid += l 181 elif section == STR: 182 msgstr += l 183 else: 184 print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \ 185 'before:' 186 print >> sys.stderr, l 187 sys.exit(1) 188 # Add last entry 189 if section == STR: 190 add(msgid, msgstr, fuzzy) 191 192 # Compute output 193 output = generate() 194 195 try: 196 open(outfile,"wb").write(output) 197 except IOError,msg: 198 print >> sys.stderr, msg 199 200 201 203 def main(): 204 try: 205 opts, args = getopt.getopt(sys.argv[1:], 'hVo:', 206 ['help', 'version', 'output-file=']) 207 except getopt.error, msg: 208 usage(1, msg) 209 210 outfile = None 211 # parse options 212 for opt, arg in opts: 213 if opt in ('-h', '--help'): 214 usage(0) 215 elif opt in ('-V', '--version'): 216 print >> sys.stderr, "msgfmt.py", __version__ 217 sys.exit(0) 218 elif opt in ('-o', '--output-file'): 219 outfile = arg 220 # do it 221 if not args: 222 print >> sys.stderr, 'No input file given' 223 print >> sys.stderr, "Try `msgfmt --help' for more information." 224 return 225 226 for filename in args: 227 make(filename, outfile) 228 229 230 if __name__ == '__main__': 231 main() 232