1 #include <string.h> 2 #include "xmlmime.h" 3 4 static const char * 5 getTok(const char **pp) 6 { 7 /* inComment means one level of nesting; inComment+1 means two levels etc */ 8 enum { inAtom, inString, init, inComment }; 9 int state = init; 10 const char *tokStart = 0; 11 for (;;) { 12 switch (**pp) { 13 case '\0': 14 if (state == inAtom) 15 return tokStart; 16 return 0; 17 case ' ': 18 case '\r': 19 case '\t': 20 case '\n': 21 if (state == inAtom) 22 return tokStart; 23 break; 24 case '(': 25 if (state == inAtom) 26 return tokStart; 27 if (state != inString) 28 state++; 29 break; 30 case ')': 31 if (state > init) 32 --state; 33 else if (state != inString) 34 return 0; 35 break; 36 case ';': 37 case '/': 38 case '=': 39 if (state == inAtom) 40 return tokStart; 41 if (state == init) 42 return (*pp)++; 43 break; 44 case '\\': 45 ++*pp; 46 if (**pp == '\0') 47 return 0; 48 break; 49 case '"': 50 switch (state) { 51 case inString: 52 ++*pp; 53 return tokStart; 54 case inAtom: 55 return tokStart; 56 case init: 57 tokStart = *pp; 58 state = inString; 59 break; 60 } 61 break; 62 default: 63 if (state == init) { 64 tokStart = *pp; 65 state = inAtom; 66 } 67 break; 68 } 69 ++*pp; 70 } 71 /* not reached */ 72 } 73 74 /* key must be lowercase ASCII */ 75 76 static int 77 matchkey(const char *start, const char *end, const char *key) 78 { 79 if (!start) 80 return 0; 81 for (; start != end; start++, key++) 82 if (*start != *key && *start != 'A' + (*key - 'a')) 83 return 0; 84 return *key == '\0'; 85 } 86 87 void 88 getXMLCharset(const char *buf, char *charset) 89 { 90 const char *next, *p; 91 92 charset[0] = '\0'; 93 next = buf; 94 p = getTok(&next); 95 if (matchkey(p, next, "text")) 96 strcpy(charset, "us-ascii"); 97 else if (!matchkey(p, next, "application")) 98 return; 99 p = getTok(&next); 100 if (!p || *p != '/') 101 return; 102 p = getTok(&next); 103 #if 0 104 if (!matchkey(p, next, "xml") && charset[0] == '\0') 105 return; 106 #endif 107 p = getTok(&next); 108 while (p) { 109 if (*p == ';') { 110 p = getTok(&next); 111 if (matchkey(p, next, "charset")) { 112 p = getTok(&next); 113 if (p && *p == '=') { 114 p = getTok(&next); 115 if (p) { 116 char *s = charset; 117 if (*p == '"') { 118 while (++p != next - 1) { 119 if (*p == '\\') 120 ++p; 121 if (s == charset + CHARSET_MAX - 1) { 122 charset[0] = '\0'; 123 break; 124 } 125 *s++ = *p; 126 } 127 *s++ = '\0'; 128 } 129 else { 130 if (next - p > CHARSET_MAX - 1) 131 break; 132 while (p != next) 133 *s++ = *p++; 134 *s = 0; 135 break; 136 } 137 } 138 } 139 break; 140 } 141 } 142 else 143 p = getTok(&next); 144 } 145 } 146 147 #ifdef TEST 148 149 #include <stdio.h> 150 151 int 152 main(int argc, char *argv[]) 153 { 154 char buf[CHARSET_MAX]; 155 if (argc <= 1) 156 return 1; 157 printf("%s\n", argv[1]); 158 getXMLCharset(argv[1], buf); 159 printf("charset=\"%s\"\n", buf); 160 return 0; 161 } 162 163 #endif /* TEST */ 164