Home | History | Annotate | Download | only in tools
      1 #!lua

      2 -----------------------------------------------------------------------------
      3 -- lua script picoloaddbg.lua --- creates pkb containing phoneme information.
      4 --                                This pkb is only used for debug purposes. 
      5 --
      6 -- Copyright (C) 2009 SVOX AG. All rights reserved.
      7 -----------------------------------------------------------------------------
      8 
      9 -- load pico phones src file and create dbg pkb file
     10 
     11 -- accepted syntax:
     12 -- - parses line of the following format:
     13 --   :SYM "<sym>" :PROP mapval = <uint8> { , <propname> = <int> }
     14 -- - initial '!' and trailing '!.*' are treated as comments, no '[]'
     15 
     16 
     17 --- valid property names
     18 propnames = {mapval=0, vowel=0, diphth=0, glott=0, nonsyllvowel=0, syllcons=0}
     19 
     20 --- valid property names (that may occur once only)
     21 upropnames = {primstress=0, secstress=0, syllbound=0, wordbound=0, pause=0}
     22 
     23 
     24 -- init
     25 if #arg ~= 2 then

     26   print("*** error: wrong number of arguments, must be 2"); return
     27 end
     28 local infile = io.open(arg[1], "r")
     29 if not infile then
     30   print("*** error: could not open input file: " .. arg[1]); return
     31 end
     32 local outfile = io.open(arg[2], "wb")
     33 if not outfile then
     34   print("*** error: could not open output file: " .. arg[2]); return
     35 end
     36 
     37 
     38 -- tables
     39 --- table with symbol name keys (not really used currently)
     40 local syms = {}
     41 --- table with symbol name number keys (specified with property mapval)
     42 local symnrs = {}
     43 --- array of symbol name numer keys used (to check for unique mapvals)
     44 local symnrsused = {}
     45 
     46 
     47 -- parse input file, build up syms and symnrs tables
     48 for line in infile:lines() do
     49   if string.match(line, "^%s*!.*$") or string.match(line, "^%s*$") then
     50     -- discard comment-only lines
     51   else
     52     cline = string.gsub(line, "^%s*", "")
     53     -- get :SYM
     54     sym = string.match(cline, "^:SYM%s+\"([^\"]-)\"%s+")
     55     if not sym then
     56       sym = string.match(cline, "^:SYM%s+'([^']-)'%s+")
     57     end
     58     if sym then
     59       cline = string.gsub(cline, "^:SYM%s+['\"].-['\"]%s+", "")
     60       -- get :PROP and mapval prop/propval
     61       propval = string.match(cline, "^:PROP%s+mapval%s*=%s*(%d+)%s*")
     62       if propval then
     63 	cline = string.gsub(cline, "^:PROP%s+mapval%s*=%s*%d+%s*", "")
     64 	-- construct props table and add first mapval property
     65 	props = {mapval = tonumber(propval)}
     66 	symnr = tonumber(propval)
     67 	if not symnrsused[symnr] then
     68 	  symnrsused[symnr] = true
     69 	else
     70 	  io.write("*** error: mapval values must be unique, ", symnr, "\n")
     71 	  print("line: ", line); return
     72 	end
     73 	-- check if remaining part are comments only
     74 	cline = string.gsub(cline, "^!.*", "")
     75 	while (#cline > 0) do

     76 	  -- try to get next prop/propval and add to props
     77 	  prop, propval = string.match(cline, "^,%s*(%w+)%s*=%s*(%d+)%s*")
     78 	  if prop and propval then
     79 	    cline = string.gsub(cline, "^,%s*%w+%s*=%s*%d+%s*", "")
     80 	    props[prop] = tonumber(propval)
     81 	  else
     82 	    print("*** error: syntax error in property list")
     83 	    print("line: ", line); return
     84 	  end
     85 	  -- cleanup if only comments remaining
     86 	  cline = string.gsub(cline, "^!.*", "")
     87 	end
     88       else
     89 	print("*** error: no mapval property found")
     90 	print("line: ", line); return
     91       end
     92       syms[sym] = props
     93       symnrs[symnr] = props
     94     else
     95       print("*** error: no symbol found")
     96       print("line: ", line)
     97       return
     98     end
     99   end
    100 end
    101 
    102 
    103 -- check syms and symnrs
    104 
    105 function checksymtable (st)
    106   for s in pairs(propnames) do propnames[s] = 0 end
    107   for s in pairs(upropnames) do upropnames[s] = 0 end
    108   for s, p in pairs(st) do
    109     for prop, propval in pairs(p) do
    110       if not propnames[prop] and not upropnames[prop] then
    111 	io.write("*** error: invalid property name '", prop, "'\n")
    112 	return
    113       end
    114       if propnames[prop] then
    115 	propnames[prop] = propnames[prop] + 1
    116       elseif upropnames[prop] then
    117 	upropnames[prop] = upropnames[prop] + 1
    118       end
    119     end
    120     for prop, propval in pairs(upropnames) do
    121       if propval > 1  then
    122 	io.write("*** error: property '", prop, "' must be unique\n"); return
    123       end
    124     end
    125   end
    126 end
    127 
    128 checksymtable(syms)
    129 checksymtable(symnrs)
    130 
    131 
    132 -- get IDs of unique specids
    133 
    134 specid = {}
    135 for i = 1, 8 do specid[i] = 0 end
    136 for s, pl in pairs(symnrs) do
    137   if pl["primstress"] then    specid[1] = pl["mapval"]
    138   elseif pl["secstress"] then specid[2] = pl["mapval"]
    139   elseif pl["syllbound"] then specid[3] = pl["mapval"]
    140   elseif pl["pause"] then     specid[4] = pl["mapval"]
    141   elseif pl["wordbound"] then specid[5] = pl["mapval"]
    142   end
    143 end
    144 
    145 
    146 -- write out Phones pkb
    147 
    148 function encodeprops (n)
    149   rv = 0
    150   pl = symnrs[n]
    151   if pl then
    152     if pl["vowel"] then rv = 1 end
    153     if pl["diphth"]then rv = rv + 2 end
    154     if pl["glott"] then rv = rv + 4 end
    155     if pl["nonsyllvowel"] then rv = rv + 8 end
    156     if pl["syllcons"] then rv = rv + 16 end
    157   end
    158   return rv
    159 end
    160 
    161 
    162 symtab = {}
    163 for k, v in pairs(syms) do
    164   symtab[tonumber(v["mapval"])] = k
    165 end
    166 
    167 for i = 0, 255 do
    168   if symtab[i] then
    169     for j = 1, 8 do
    170       if (j <= string.len(symtab[i])) then
    171 	outfile:write(string.sub(symtab[i], j, j))
    172       else
    173 	outfile:write("\0")
    174       end
    175 
    176     end
    177   else
    178     outfile:write("\0\0\0\0\0\0\0\0")
    179   end
    180 end
    181 
    182 
    183 
    184 -- tini
    185 
    186 infile:close()
    187 outfile:close()
    188 
    189 -- end
    190