1 #!lua 2 ----------------------------------------------------------------------------- 3 -- lua script picoloadphones.lua --- creates pkb containing phones table. 4 -- 5 -- Copyright (C) 2009 SVOX AG. All rights reserved. 6 ----------------------------------------------------------------------------- 7 8 -- load pico phones src file and create phones pkb file 9 10 -- accepted syntax: 11 -- - parses line of the following format: 12 -- :SYM "<sym>" :PROP mapval = <uint8> { , <propname> = <int> } 13 -- - initial '!' and trailing '!.*' are treated as comments, no '[]' 14 15 16 --- valid property names 17 propnames = {mapval=0, vowel=0, diphth=0, glott=0, nonsyllvowel=0, syllcons=0} 18 --- valid property names (that may occur once only) 19 upropnames = {primstress=0, secstress=0, syllbound=0, wordbound=0, pause=0} 20 21 22 -- init 23 if #arg ~= 2 then 24 print("*** error: wrong number of arguments, must be 2"); return 25 end 26 local infile = io.open(arg[1], "r") 27 if not infile then 28 print("*** error: could not open input file: " .. arg[1]); return 29 end 30 local outfile = io.open(arg[2], "wb") 31 if not outfile then 32 print("*** error: could not open output file: " .. arg[2]); return 33 end 34 35 36 -- tables 37 --- table with symbol name keys (not really used currently) 38 local syms = {} 39 --- table with symbol name number keys (specified with property mapval) 40 local symnrs = {} 41 --- array of symbol name numer keys used (to check for unique mapvals) 42 local symnrsused = {} 43 44 45 -- parse input file, build up syms and symnrs tables 46 for line in infile:lines() do 47 if string.match(line, "^%s*!.*$") or string.match(line, "^%s*$") then 48 -- discard comment-only lines 49 else 50 cline = string.gsub(line, "^%s*", "") 51 -- get :SYM 52 sym = string.match(cline, "^:SYM%s+\"([^\"]-)\"%s+") 53 if not sym then 54 sym = string.match(cline, "^:SYM%s+'([^']-)'%s+") 55 end 56 if sym then 57 cline = string.gsub(cline, "^:SYM%s+['\"].-['\"]%s+", "") 58 -- get :PROP and mapval prop/propval 59 propval = string.match(cline, "^:PROP%s+mapval%s*=%s*(%d+)%s*") 60 if propval then 61 cline = string.gsub(cline, "^:PROP%s+mapval%s*=%s*%d+%s*", "") 62 -- construct props table and add first mapval property 63 props = {mapval = tonumber(propval)} 64 symnr = tonumber(propval) 65 if not symnrsused[symnr] then 66 symnrsused[symnr] = true 67 else 68 io.write("*** error: mapval values must be unique, ", symnr, "\n") 69 print("line: ", line); return 70 end 71 -- check if remaining part are comments only 72 cline = string.gsub(cline, "^!.*", "") 73 while (#cline > 0) do 74 -- try to get next prop/propval and add to props 75 prop, propval = string.match(cline, "^,%s*(%w+)%s*=%s*(%d+)%s*") 76 if prop and propval then 77 cline = string.gsub(cline, "^,%s*%w+%s*=%s*%d+%s*", "") 78 props[prop] = tonumber(propval) 79 else 80 print("*** error: syntax error in property list") 81 print("line: ", line); return 82 end 83 -- cleanup if only comments remaining 84 cline = string.gsub(cline, "^!.*", "") 85 end 86 else 87 print("*** error: no mapval property found") 88 print("line: ", line); return 89 end 90 syms[sym] = props 91 symnrs[symnr] = props 92 else 93 print("*** error: no symbol found") 94 print("line: ", line) 95 return 96 end 97 end 98 end 99 100 101 -- check syms and symnrs 102 103 function checksymtable (st) 104 for s in pairs(propnames) do propnames[s] = 0 end 105 for s in pairs(upropnames) do upropnames[s] = 0 end 106 for s, p in pairs(st) do 107 for prop, propval in pairs(p) do 108 if not propnames[prop] and not upropnames[prop] then 109 io.write("*** error: invalid property name '", prop, "'\n") 110 return 111 end 112 if propnames[prop] then 113 propnames[prop] = propnames[prop] + 1 114 elseif upropnames[prop] then 115 upropnames[prop] = upropnames[prop] + 1 116 end 117 end 118 for prop, propval in pairs(upropnames) do 119 if propval > 1 then 120 io.write("*** error: property '", prop, "' must be unique\n"); return 121 end 122 end 123 end 124 end 125 126 checksymtable(syms) 127 checksymtable(symnrs) 128 129 130 -- get IDs of unique specids 131 132 specid = {} 133 for i = 1, 8 do specid[i] = 0 end 134 for s, pl in pairs(symnrs) do 135 if pl["primstress"] then specid[1] = pl["mapval"] 136 elseif pl["secstress"] then specid[2] = pl["mapval"] 137 elseif pl["syllbound"] then specid[3] = pl["mapval"] 138 elseif pl["pause"] then specid[4] = pl["mapval"] 139 elseif pl["wordbound"] then specid[5] = pl["mapval"] 140 end 141 end 142 143 144 -- write out Phones pkb 145 146 function encodeprops (n) 147 rv = 0 148 pl = symnrs[n] 149 if pl then 150 if pl["vowel"] then rv = 1 end 151 if pl["diphth"]then rv = rv + 2 end 152 if pl["glott"] then rv = rv + 4 end 153 if pl["nonsyllvowel"] then rv = rv + 8 end 154 if pl["syllcons"] then rv = rv + 16 end 155 end 156 return rv 157 end 158 159 for i=1,8 do 160 if specid[i] == 0 then outfile:write("\0") 161 else outfile:write(string.format("%c", specid[i])) 162 end 163 end 164 for i = 0, 255 do 165 nr = encodeprops(i) 166 if nr == 0 then outfile:write("\0") 167 else outfile:write(string.format("%c", nr)) 168 end 169 end 170 171 172 -- tini 173 174 infile:close() 175 outfile:close() 176 177 -- end 178