1 #! /usr/bin/env python3 2 # Written by Martin v. Lwis <loewis (at] informatik.hu-berlin.de> 3 4 """Generate binary message catalog from textual translation description. 5 6 This program converts a textual Uniforum-style message catalog (.po file) into 7 a binary GNU catalog (.mo file). This is essentially the same function as the 8 GNU msgfmt program, however, it is a simpler implementation. 9 10 Usage: msgfmt.py [OPTIONS] filename.po 11 12 Options: 13 -o file 14 --output-file=file 15 Specify the output file to write to. If omitted, output will go to a 16 file named filename.mo (based off the input file name). 17 18 -h 19 --help 20 Print this message and exit. 21 22 -V 23 --version 24 Display version information and exit. 25 """ 26 27 import os 28 import sys 29 import ast 30 import getopt 31 import struct 32 import array 33 from email.parser import HeaderParser 34 35 __version__ = "1.1" 36 37 MESSAGES = {} 38 39 40 42 def usage(code, msg=''): 43 print(__doc__, file=sys.stderr) 44 if msg: 45 print(msg, file=sys.stderr) 46 sys.exit(code) 47 48 49 51 def add(id, str, fuzzy): 52 "Add a non-fuzzy translation to the dictionary." 53 global MESSAGES 54 if not fuzzy and str: 55 MESSAGES[id] = str 56 57 58 60 def generate(): 61 "Return the generated output." 62 global MESSAGES 63 # the keys are sorted in the .mo file 64 keys = sorted(MESSAGES.keys()) 65 offsets = [] 66 ids = strs = b'' 67 for id in keys: 68 # For each string, we need size and file offset. Each string is NUL 69 # terminated; the NUL does not count into the size. 70 offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) 71 ids += id + b'\0' 72 strs += MESSAGES[id] + b'\0' 73 output = '' 74 # The header is 7 32-bit unsigned integers. We don't use hash tables, so 75 # the keys start right after the index tables. 76 # translated string. 77 keystart = 7*4+16*len(keys) 78 # and the values start after the keys 79 valuestart = keystart + len(ids) 80 koffsets = [] 81 voffsets = [] 82 # The string table first has the list of keys, then the list of values. 83 # Each entry has first the size of the string, then the file offset. 84 for o1, l1, o2, l2 in offsets: 85 koffsets += [l1, o1+keystart] 86 voffsets += [l2, o2+valuestart] 87 offsets = koffsets + voffsets 88 output = struct.pack("Iiiiiii", 89 0x950412de, # Magic 90 0, # Version 91 len(keys), # # of entries 92 7*4, # start of key index 93 7*4+len(keys)*8, # start of value index 94 0, 0) # size and offset of hash table 95 output += array.array("i", offsets).tostring() 96 output += ids 97 output += strs 98 return output 99 100 101 103 def make(filename, outfile): 104 ID = 1 105 STR = 2 106 107 # Compute .mo name from .po name and arguments 108 if filename.endswith('.po'): 109 infile = filename 110 else: 111 infile = filename + '.po' 112 if outfile is None: 113 outfile = os.path.splitext(infile)[0] + '.mo' 114 115 try: 116 lines = open(infile, 'rb').readlines() 117 except IOError as msg: 118 print(msg, file=sys.stderr) 119 sys.exit(1) 120 121 section = None 122 fuzzy = 0 123 124 # Start off assuming Latin-1, so everything decodes without failure, 125 # until we know the exact encoding 126 encoding = 'latin-1' 127 128 # Parse the catalog 129 lno = 0 130 for l in lines: 131 l = l.decode(encoding) 132 lno += 1 133 # If we get a comment line after a msgstr, this is a new entry 134 if l[0] == '#' and section == STR: 135 add(msgid, msgstr, fuzzy) 136 section = None 137 fuzzy = 0 138 # Record a fuzzy mark 139 if l[:2] == '#,' and 'fuzzy' in l: 140 fuzzy = 1 141 # Skip comments 142 if l[0] == '#': 143 continue 144 # Now we are in a msgid section, output previous section 145 if l.startswith('msgid') and not l.startswith('msgid_plural'): 146 if section == STR: 147 add(msgid, msgstr, fuzzy) 148 if not msgid: 149 # See whether there is an encoding declaration 150 p = HeaderParser() 151 charset = p.parsestr(msgstr.decode(encoding)).get_content_charset() 152 if charset: 153 encoding = charset 154 section = ID 155 l = l[5:] 156 msgid = msgstr = b'' 157 is_plural = False 158 # This is a message with plural forms 159 elif l.startswith('msgid_plural'): 160 if section != ID: 161 print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno), 162 file=sys.stderr) 163 sys.exit(1) 164 l = l[12:] 165 msgid += b'\0' # separator of singular and plural 166 is_plural = True 167 # Now we are in a msgstr section 168 elif l.startswith('msgstr'): 169 section = STR 170 if l.startswith('msgstr['): 171 if not is_plural: 172 print('plural without msgid_plural on %s:%d' % (infile, lno), 173 file=sys.stderr) 174 sys.exit(1) 175 l = l.split(']', 1)[1] 176 if msgstr: 177 msgstr += b'\0' # Separator of the various plural forms 178 else: 179 if is_plural: 180 print('indexed msgstr required for plural on %s:%d' % (infile, lno), 181 file=sys.stderr) 182 sys.exit(1) 183 l = l[6:] 184 # Skip empty lines 185 l = l.strip() 186 if not l: 187 continue 188 l = ast.literal_eval(l) 189 if section == ID: 190 msgid += l.encode(encoding) 191 elif section == STR: 192 msgstr += l.encode(encoding) 193 else: 194 print('Syntax error on %s:%d' % (infile, lno), \ 195 'before:', file=sys.stderr) 196 print(l, file=sys.stderr) 197 sys.exit(1) 198 # Add last entry 199 if section == STR: 200 add(msgid, msgstr, fuzzy) 201 202 # Compute output 203 output = generate() 204 205 try: 206 open(outfile,"wb").write(output) 207 except IOError as msg: 208 print(msg, file=sys.stderr) 209 210 211 213 def main(): 214 try: 215 opts, args = getopt.getopt(sys.argv[1:], 'hVo:', 216 ['help', 'version', 'output-file=']) 217 except getopt.error as msg: 218 usage(1, msg) 219 220 outfile = None 221 # parse options 222 for opt, arg in opts: 223 if opt in ('-h', '--help'): 224 usage(0) 225 elif opt in ('-V', '--version'): 226 print("msgfmt.py", __version__) 227 sys.exit(0) 228 elif opt in ('-o', '--output-file'): 229 outfile = arg 230 # do it 231 if not args: 232 print('No input file given', file=sys.stderr) 233 print("Try `msgfmt --help' for more information.", file=sys.stderr) 234 return 235 236 for filename in args: 237 make(filename, outfile) 238 239 240 if __name__ == '__main__': 241 main() 242