1 #! /usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 # Written by Martin v. Lwis <loewis (at] informatik.hu-berlin.de> 4 5 """Generate binary message catalog from textual translation description. 6 7 This program converts a textual Uniforum-style message catalog (.po file) into 8 a binary GNU catalog (.mo file). This is essentially the same function as the 9 GNU msgfmt program, however, it is a simpler implementation. 10 11 Usage: msgfmt.py [OPTIONS] filename.po 12 13 Options: 14 -o file 15 --output-file=file 16 Specify the output file to write to. If omitted, output will go to a 17 file named filename.mo (based off the input file name). 18 19 -h 20 --help 21 Print this message and exit. 22 23 -V 24 --version 25 Display version information and exit. 26 """ 27 28 import os 29 import sys 30 import ast 31 import getopt 32 import struct 33 import array 34 35 __version__ = "1.1" 36 37 MESSAGES = {} 38 39 40 42 def usage(code, msg=''): 43 print >> sys.stderr, __doc__ 44 if msg: 45 print >> sys.stderr, msg 46 sys.exit(code) 47 48 49 51 def add(id, str, fuzzy): 52 "Add a non-fuzzy translation to the dictionary." 53 global MESSAGES 54 if not fuzzy and str: 55 MESSAGES[id] = str 56 57 58 60 def generate(): 61 "Return the generated output." 62 global MESSAGES 63 keys = MESSAGES.keys() 64 # the keys are sorted in the .mo file 65 keys.sort() 66 offsets = [] 67 ids = strs = '' 68 for id in keys: 69 # For each string, we need size and file offset. Each string is NUL 70 # terminated; the NUL does not count into the size. 71 offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) 72 ids += id + '\0' 73 strs += MESSAGES[id] + '\0' 74 output = '' 75 # The header is 7 32-bit unsigned integers. We don't use hash tables, so 76 # the keys start right after the index tables. 77 # translated string. 78 keystart = 7*4+16*len(keys) 79 # and the values start after the keys 80 valuestart = keystart + len(ids) 81 koffsets = [] 82 voffsets = [] 83 # The string table first has the list of keys, then the list of values. 84 # Each entry has first the size of the string, then the file offset. 85 for o1, l1, o2, l2 in offsets: 86 koffsets += [l1, o1+keystart] 87 voffsets += [l2, o2+valuestart] 88 offsets = koffsets + voffsets 89 output = struct.pack("Iiiiiii", 90 0x950412deL, # Magic 91 0, # Version 92 len(keys), # # of entries 93 7*4, # start of key index 94 7*4+len(keys)*8, # start of value index 95 0, 0) # size and offset of hash table 96 output += array.array("i", offsets).tostring() 97 output += ids 98 output += strs 99 return output 100 101 102 104 def make(filename, outfile): 105 ID = 1 106 STR = 2 107 108 # Compute .mo name from .po name and arguments 109 if filename.endswith('.po'): 110 infile = filename 111 else: 112 infile = filename + '.po' 113 if outfile is None: 114 outfile = os.path.splitext(infile)[0] + '.mo' 115 116 try: 117 lines = open(infile).readlines() 118 except IOError, msg: 119 print >> sys.stderr, msg 120 sys.exit(1) 121 122 section = None 123 fuzzy = 0 124 125 # Parse the catalog 126 lno = 0 127 for l in lines: 128 lno += 1 129 # If we get a comment line after a msgstr, this is a new entry 130 if l[0] == '#' and section == STR: 131 add(msgid, msgstr, fuzzy) 132 section = None 133 fuzzy = 0 134 # Record a fuzzy mark 135 if l[:2] == '#,' and 'fuzzy' in l: 136 fuzzy = 1 137 # Skip comments 138 if l[0] == '#': 139 continue 140 # Now we are in a msgid section, output previous section 141 if l.startswith('msgid') and not l.startswith('msgid_plural'): 142 if section == STR: 143 add(msgid, msgstr, fuzzy) 144 section = ID 145 l = l[5:] 146 msgid = msgstr = '' 147 is_plural = False 148 # This is a message with plural forms 149 elif l.startswith('msgid_plural'): 150 if section != ID: 151 print >> sys.stderr, 'msgid_plural not preceded by msgid on %s:%d' %\ 152 (infile, lno) 153 sys.exit(1) 154 l = l[12:] 155 msgid += '\0' # separator of singular and plural 156 is_plural = True 157 # Now we are in a msgstr section 158 elif l.startswith('msgstr'): 159 section = STR 160 if l.startswith('msgstr['): 161 if not is_plural: 162 print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\ 163 (infile, lno) 164 sys.exit(1) 165 l = l.split(']', 1)[1] 166 if msgstr: 167 msgstr += '\0' # Separator of the various plural forms 168 else: 169 if is_plural: 170 print >> sys.stderr, 'indexed msgstr required for plural on %s:%d' %\ 171 (infile, lno) 172 sys.exit(1) 173 l = l[6:] 174 # Skip empty lines 175 l = l.strip() 176 if not l: 177 continue 178 l = ast.literal_eval(l) 179 if section == ID: 180 msgid += l 181 elif section == STR: 182 msgstr += l 183 else: 184 print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \ 185 'before:' 186 print >> sys.stderr, l 187 sys.exit(1) 188 # Add last entry 189 if section == STR: 190 add(msgid, msgstr, fuzzy) 191 192 # Compute output 193 output = generate() 194 195 try: 196 open(outfile,"wb").write(output) 197 except IOError,msg: 198 print >> sys.stderr, msg 199 200 201 203 def main(): 204 try: 205 opts, args = getopt.getopt(sys.argv[1:], 'hVo:', 206 ['help', 'version', 'output-file=']) 207 except getopt.error, msg: 208 usage(1, msg) 209 210 outfile = None 211 # parse options 212 for opt, arg in opts: 213 if opt in ('-h', '--help'): 214 usage(0) 215 elif opt in ('-V', '--version'): 216 print >> sys.stderr, "msgfmt.py", __version__ 217 sys.exit(0) 218 elif opt in ('-o', '--output-file'): 219 outfile = arg 220 # do it 221 if not args: 222 print >> sys.stderr, 'No input file given' 223 print >> sys.stderr, "Try `msgfmt --help' for more information." 224 return 225 226 for filename in args: 227 make(filename, outfile) 228 229 230 if __name__ == '__main__': 231 main() 232