1 #! /usr/bin/env python3 2 # Written by Martin v. Lwis <loewis (at] informatik.hu-berlin.de> 3 4 """Generate binary message catalog from textual translation description. 5 6 This program converts a textual Uniforum-style message catalog (.po file) into 7 a binary GNU catalog (.mo file). This is essentially the same function as the 8 GNU msgfmt program, however, it is a simpler implementation. 9 10 Usage: msgfmt.py [OPTIONS] filename.po 11 12 Options: 13 -o file 14 --output-file=file 15 Specify the output file to write to. If omitted, output will go to a 16 file named filename.mo (based off the input file name). 17 18 -h 19 --help 20 Print this message and exit. 21 22 -V 23 --version 24 Display version information and exit. 25 """ 26 27 import os 28 import sys 29 import ast 30 import getopt 31 import struct 32 import array 33 from email.parser import HeaderParser 34 35 __version__ = "1.1" 36 37 MESSAGES = {} 38 39 40 42 def usage(code, msg=''): 43 print(__doc__, file=sys.stderr) 44 if msg: 45 print(msg, file=sys.stderr) 46 sys.exit(code) 47 48 49 51 def add(id, str, fuzzy): 52 "Add a non-fuzzy translation to the dictionary." 53 global MESSAGES 54 if not fuzzy and str: 55 MESSAGES[id] = str 56 57 58 60 def generate(): 61 "Return the generated output." 62 global MESSAGES 63 # the keys are sorted in the .mo file 64 keys = sorted(MESSAGES.keys()) 65 offsets = [] 66 ids = strs = b'' 67 for id in keys: 68 # For each string, we need size and file offset. Each string is NUL 69 # terminated; the NUL does not count into the size. 70 offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) 71 ids += id + b'\0' 72 strs += MESSAGES[id] + b'\0' 73 output = '' 74 # The header is 7 32-bit unsigned integers. We don't use hash tables, so 75 # the keys start right after the index tables. 76 # translated string. 77 keystart = 7*4+16*len(keys) 78 # and the values start after the keys 79 valuestart = keystart + len(ids) 80 koffsets = [] 81 voffsets = [] 82 # The string table first has the list of keys, then the list of values. 83 # Each entry has first the size of the string, then the file offset. 84 for o1, l1, o2, l2 in offsets: 85 koffsets += [l1, o1+keystart] 86 voffsets += [l2, o2+valuestart] 87 offsets = koffsets + voffsets 88 output = struct.pack("Iiiiiii", 89 0x950412de, # Magic 90 0, # Version 91 len(keys), # # of entries 92 7*4, # start of key index 93 7*4+len(keys)*8, # start of value index 94 0, 0) # size and offset of hash table 95 output += array.array("i", offsets).tobytes() 96 output += ids 97 output += strs 98 return output 99 100 101 103 def make(filename, outfile): 104 ID = 1 105 STR = 2 106 107 # Compute .mo name from .po name and arguments 108 if filename.endswith('.po'): 109 infile = filename 110 else: 111 infile = filename + '.po' 112 if outfile is None: 113 outfile = os.path.splitext(infile)[0] + '.mo' 114 115 try: 116 with open(infile, 'rb') as f: 117 lines = f.readlines() 118 except IOError as msg: 119 print(msg, file=sys.stderr) 120 sys.exit(1) 121 122 section = None 123 fuzzy = 0 124 125 # Start off assuming Latin-1, so everything decodes without failure, 126 # until we know the exact encoding 127 encoding = 'latin-1' 128 129 # Parse the catalog 130 lno = 0 131 for l in lines: 132 l = l.decode(encoding) 133 lno += 1 134 # If we get a comment line after a msgstr, this is a new entry 135 if l[0] == '#' and section == STR: 136 add(msgid, msgstr, fuzzy) 137 section = None 138 fuzzy = 0 139 # Record a fuzzy mark 140 if l[:2] == '#,' and 'fuzzy' in l: 141 fuzzy = 1 142 # Skip comments 143 if l[0] == '#': 144 continue 145 # Now we are in a msgid section, output previous section 146 if l.startswith('msgid') and not l.startswith('msgid_plural'): 147 if section == STR: 148 add(msgid, msgstr, fuzzy) 149 if not msgid: 150 # See whether there is an encoding declaration 151 p = HeaderParser() 152 charset = p.parsestr(msgstr.decode(encoding)).get_content_charset() 153 if charset: 154 encoding = charset 155 section = ID 156 l = l[5:] 157 msgid = msgstr = b'' 158 is_plural = False 159 # This is a message with plural forms 160 elif l.startswith('msgid_plural'): 161 if section != ID: 162 print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno), 163 file=sys.stderr) 164 sys.exit(1) 165 l = l[12:] 166 msgid += b'\0' # separator of singular and plural 167 is_plural = True 168 # Now we are in a msgstr section 169 elif l.startswith('msgstr'): 170 section = STR 171 if l.startswith('msgstr['): 172 if not is_plural: 173 print('plural without msgid_plural on %s:%d' % (infile, lno), 174 file=sys.stderr) 175 sys.exit(1) 176 l = l.split(']', 1)[1] 177 if msgstr: 178 msgstr += b'\0' # Separator of the various plural forms 179 else: 180 if is_plural: 181 print('indexed msgstr required for plural on %s:%d' % (infile, lno), 182 file=sys.stderr) 183 sys.exit(1) 184 l = l[6:] 185 # Skip empty lines 186 l = l.strip() 187 if not l: 188 continue 189 l = ast.literal_eval(l) 190 if section == ID: 191 msgid += l.encode(encoding) 192 elif section == STR: 193 msgstr += l.encode(encoding) 194 else: 195 print('Syntax error on %s:%d' % (infile, lno), \ 196 'before:', file=sys.stderr) 197 print(l, file=sys.stderr) 198 sys.exit(1) 199 # Add last entry 200 if section == STR: 201 add(msgid, msgstr, fuzzy) 202 203 # Compute output 204 output = generate() 205 206 try: 207 with open(outfile,"wb") as f: 208 f.write(output) 209 except IOError as msg: 210 print(msg, file=sys.stderr) 211 212 213 215 def main(): 216 try: 217 opts, args = getopt.getopt(sys.argv[1:], 'hVo:', 218 ['help', 'version', 'output-file=']) 219 except getopt.error as msg: 220 usage(1, msg) 221 222 outfile = None 223 # parse options 224 for opt, arg in opts: 225 if opt in ('-h', '--help'): 226 usage(0) 227 elif opt in ('-V', '--version'): 228 print("msgfmt.py", __version__) 229 sys.exit(0) 230 elif opt in ('-o', '--output-file'): 231 outfile = arg 232 # do it 233 if not args: 234 print('No input file given', file=sys.stderr) 235 print("Try `msgfmt --help' for more information.", file=sys.stderr) 236 return 237 238 for filename in args: 239 make(filename, outfile) 240 241 242 if __name__ == '__main__': 243 main() 244