Home | History | Annotate | Download | only in xmlwf
      1 #define CHARSET_MAX 41
      2 
      3 static const char *
      4 getTok(const char **pp)
      5 {
      6   enum { inAtom, inString, init, inComment };
      7   int state = init;
      8   const char *tokStart = 0;
      9   for (;;) {
     10     switch (**pp) {
     11     case '\0':
     12       return 0;
     13     case ' ':
     14     case '\r':
     15     case '\t':
     16     case '\n':
     17       if (state == inAtom)
     18         return tokStart;
     19       break;
     20     case '(':
     21       if (state == inAtom)
     22         return tokStart;
     23       if (state != inString)
     24         state++;
     25       break;
     26     case ')':
     27       if (state > init)
     28         --state;
     29       else if (state != inString)
     30         return 0;
     31       break;
     32     case ';':
     33     case '/':
     34     case '=':
     35       if (state == inAtom)
     36         return tokStart;
     37       if (state == init)
     38         return (*pp)++;
     39       break;
     40     case '\\':
     41       ++*pp;
     42       if (**pp == '\0')
     43         return 0;
     44       break;
     45     case '"':
     46       switch (state) {
     47       case inString:
     48         ++*pp;
     49         return tokStart;
     50       case inAtom:
     51         return tokStart;
     52       case init:
     53         tokStart = *pp;
     54         state = inString;
     55         break;
     56       }
     57       break;
     58     default:
     59       if (state == init) {
     60         tokStart = *pp;
     61         state = inAtom;
     62       }
     63       break;
     64     }
     65     ++*pp;
     66   }
     67   /* not reached */
     68 }
     69 
     70 /* key must be lowercase ASCII */
     71 
     72 static int
     73 matchkey(const char *start, const char *end, const char *key)
     74 {
     75   if (!start)
     76     return 0;
     77   for (; start != end; start++, key++)
     78     if (*start != *key && *start != 'A' + (*key - 'a'))
     79       return 0;
     80   return *key == '\0';
     81 }
     82 
     83 void
     84 getXMLCharset(const char *buf, char *charset)
     85 {
     86   const char *next, *p;
     87 
     88   charset[0] = '\0';
     89   next = buf;
     90   p = getTok(&next);
     91   if (matchkey(p, next, "text"))
     92     strcpy(charset, "us-ascii");
     93   else if (!matchkey(p, next, "application"))
     94     return;
     95   p = getTok(&next);
     96   if (!p || *p != '/')
     97     return;
     98   p = getTok(&next);
     99   if (matchkey(p, next, "xml"))
    100     isXml = 1;
    101   p = getTok(&next);
    102   while (p) {
    103     if (*p == ';') {
    104       p = getTok(&next);
    105       if (matchkey(p, next, "charset")) {
    106         p = getTok(&next);
    107         if (p && *p == '=') {
    108           p = getTok(&next);
    109           if (p) {
    110             char *s = charset;
    111             if (*p == '"') {
    112               while (++p != next - 1) {
    113                 if (*p == '\\')
    114                   ++p;
    115                 if (s == charset + CHARSET_MAX - 1) {
    116                   charset[0] = '\0';
    117                   break;
    118                 }
    119                 *s++ = *p;
    120               }
    121               *s++ = '\0';
    122             }
    123             else {
    124               if (next - p > CHARSET_MAX - 1)
    125                 break;
    126               while (p != next)
    127                 *s++ = *p++;
    128               *s = 0;
    129               break;
    130             }
    131           }
    132         }
    133       }
    134     }
    135   else
    136     p = getTok(&next);
    137   }
    138 }
    139 
    140 int
    141 main(int argc, char **argv)
    142 {
    143   char buf[CHARSET_MAX];
    144   getXMLCharset(argv[1], buf);
    145   printf("charset = \"%s\"\n", buf);
    146   return 0;
    147 }
    148