Home | History | Annotate | Download | only in tools
      1 #!lua

      2 -----------------------------------------------------------------------------
      3 -- lua script picoloadphones.lua --- creates pkb containing phones table.
      4 --
      5 -- Copyright (C) 2009 SVOX AG. All rights reserved.
      6 -----------------------------------------------------------------------------
      7 
      8 -- load pico phones src file and create phones pkb file
      9 
     10 -- accepted syntax:
     11 -- - parses line of the following format:
     12 --   :SYM "<sym>" :PROP mapval = <uint8> { , <propname> = <int> }
     13 -- - initial '!' and trailing '!.*' are treated as comments, no '[]'
     14 
     15 
     16 --- valid property names
     17 propnames = {mapval=0, vowel=0, diphth=0, glott=0, nonsyllvowel=0, syllcons=0}
     18 --- valid property names (that may occur once only)
     19 upropnames = {primstress=0, secstress=0, syllbound=0, wordbound=0, pause=0}
     20 
     21 
     22 -- init
     23 if #arg ~= 2 then

     24   print("*** error: wrong number of arguments, must be 2"); return
     25 end
     26 local infile = io.open(arg[1], "r")
     27 if not infile then
     28   print("*** error: could not open input file: " .. arg[1]); return
     29 end
     30 local outfile = io.open(arg[2], "wb")
     31 if not outfile then
     32   print("*** error: could not open output file: " .. arg[2]); return
     33 end
     34 
     35 
     36 -- tables
     37 --- table with symbol name keys (not really used currently)
     38 local syms = {}
     39 --- table with symbol name number keys (specified with property mapval)
     40 local symnrs = {}
     41 --- array of symbol name numer keys used (to check for unique mapvals)
     42 local symnrsused = {}
     43 
     44 
     45 -- parse input file, build up syms and symnrs tables
     46 for line in infile:lines() do
     47   if string.match(line, "^%s*!.*$") or string.match(line, "^%s*$") then
     48     -- discard comment-only lines
     49   else
     50     cline = string.gsub(line, "^%s*", "")
     51     -- get :SYM
     52     sym = string.match(cline, "^:SYM%s+\"([^\"]-)\"%s+")
     53     if not sym then
     54       sym = string.match(cline, "^:SYM%s+'([^']-)'%s+")
     55     end
     56     if sym then
     57       cline = string.gsub(cline, "^:SYM%s+['\"].-['\"]%s+", "")
     58       -- get :PROP and mapval prop/propval
     59       propval = string.match(cline, "^:PROP%s+mapval%s*=%s*(%d+)%s*")
     60       if propval then
     61 	cline = string.gsub(cline, "^:PROP%s+mapval%s*=%s*%d+%s*", "")
     62 	-- construct props table and add first mapval property
     63 	props = {mapval = tonumber(propval)}
     64 	symnr = tonumber(propval)
     65 	if not symnrsused[symnr] then
     66 	  symnrsused[symnr] = true
     67 	else
     68 	  io.write("*** error: mapval values must be unique, ", symnr, "\n")
     69 	  print("line: ", line); return
     70 	end
     71 	-- check if remaining part are comments only
     72 	cline = string.gsub(cline, "^!.*", "")
     73 	while (#cline > 0) do

     74 	  -- try to get next prop/propval and add to props
     75 	  prop, propval = string.match(cline, "^,%s*(%w+)%s*=%s*(%d+)%s*")
     76 	  if prop and propval then
     77 	    cline = string.gsub(cline, "^,%s*%w+%s*=%s*%d+%s*", "")
     78 	    props[prop] = tonumber(propval)
     79 	  else
     80 	    print("*** error: syntax error in property list")
     81 	    print("line: ", line); return
     82 	  end
     83 	  -- cleanup if only comments remaining
     84 	  cline = string.gsub(cline, "^!.*", "")
     85 	end
     86       else
     87 	print("*** error: no mapval property found")
     88 	print("line: ", line); return
     89       end
     90       syms[sym] = props
     91       symnrs[symnr] = props
     92     else
     93       print("*** error: no symbol found")
     94       print("line: ", line)
     95       return
     96     end
     97   end
     98 end
     99 
    100 
    101 -- check syms and symnrs
    102 
    103 function checksymtable (st)
    104   for s in pairs(propnames) do propnames[s] = 0 end
    105   for s in pairs(upropnames) do upropnames[s] = 0 end
    106   for s, p in pairs(st) do
    107     for prop, propval in pairs(p) do
    108       if not propnames[prop] and not upropnames[prop] then
    109 	io.write("*** error: invalid property name '", prop, "'\n")
    110 	return
    111       end
    112       if propnames[prop] then
    113 	propnames[prop] = propnames[prop] + 1
    114       elseif upropnames[prop] then
    115 	upropnames[prop] = upropnames[prop] + 1
    116       end
    117     end
    118     for prop, propval in pairs(upropnames) do
    119       if propval > 1  then
    120 	io.write("*** error: property '", prop, "' must be unique\n"); return
    121       end
    122     end
    123   end
    124 end
    125 
    126 checksymtable(syms)
    127 checksymtable(symnrs)
    128 
    129 
    130 -- get IDs of unique specids
    131 
    132 specid = {}
    133 for i = 1, 8 do specid[i] = 0 end
    134 for s, pl in pairs(symnrs) do
    135   if pl["primstress"] then    specid[1] = pl["mapval"]
    136   elseif pl["secstress"] then specid[2] = pl["mapval"]
    137   elseif pl["syllbound"] then specid[3] = pl["mapval"]
    138   elseif pl["pause"] then     specid[4] = pl["mapval"]
    139   elseif pl["wordbound"] then specid[5] = pl["mapval"]
    140   end
    141 end
    142 
    143 
    144 -- write out Phones pkb
    145 
    146 function encodeprops (n)
    147   rv = 0
    148   pl = symnrs[n]
    149   if pl then
    150     if pl["vowel"] then rv = 1 end
    151     if pl["diphth"]then rv = rv + 2 end
    152     if pl["glott"] then rv = rv + 4 end
    153     if pl["nonsyllvowel"] then rv = rv + 8 end
    154     if pl["syllcons"] then rv = rv + 16 end
    155   end
    156   return rv
    157 end
    158 
    159 for i=1,8 do
    160   if specid[i] == 0 then outfile:write("\0")
    161   else outfile:write(string.format("%c", specid[i]))
    162   end
    163 end
    164 for i = 0, 255 do
    165   nr = encodeprops(i)
    166   if nr == 0 then outfile:write("\0")
    167   else outfile:write(string.format("%c", nr))
    168   end
    169 end
    170 
    171 
    172 -- tini
    173 
    174 infile:close()
    175 outfile:close()
    176 
    177 -- end
    178