Home | History | Annotate | Download | only in wps
      1 /*
      2  * UPnP XML helper routines
      3  * Copyright (c) 2000-2003 Intel Corporation
      4  * Copyright (c) 2006-2007 Sony Corporation
      5  * Copyright (c) 2008-2009 Atheros Communications
      6  * Copyright (c) 2009, Jouni Malinen <j (at) w1.fi>
      7  *
      8  * See wps_upnp.c for more details on licensing and code history.
      9  */
     10 
     11 #include "includes.h"
     12 
     13 #include "common.h"
     14 #include "base64.h"
     15 #include "http.h"
     16 #include "upnp_xml.h"
     17 
     18 
     19 /*
     20  * XML parsing and formatting
     21  *
     22  * XML is a markup language based on unicode; usually (and in our case,
     23  * always!) based on utf-8. utf-8 uses a variable number of bytes per
     24  * character. utf-8 has the advantage that all non-ASCII unicode characters are
     25  * represented by sequences of non-ascii (high bit set) bytes, whereas ASCII
     26  * characters are single ascii bytes, thus we can use typical text processing.
     27  *
     28  * (One other interesting thing about utf-8 is that it is possible to look at
     29  * any random byte and determine if it is the first byte of a character as
     30  * versus a continuation byte).
     31  *
     32  * The base syntax of XML uses a few ASCII punctionation characters; any
     33  * characters that would appear in the payload data are rewritten using
     34  * sequences, e.g., &amp; for ampersand(&) and &lt for left angle bracket (<).
     35  * Five such escapes total (more can be defined but that does not apply to our
     36  * case). Thus we can safely parse for angle brackets etc.
     37  *
     38  * XML describes tree structures of tagged data, with each element beginning
     39  * with an opening tag <label> and ending with a closing tag </label> with
     40  * matching label. (There is also a self-closing tag <label/> which is supposed
     41  * to be equivalent to <label></label>, i.e., no payload, but we are unlikely
     42  * to see it for our purpose).
     43  *
     44  * Actually the opening tags are a little more complicated because they can
     45  * contain "attributes" after the label (delimited by ascii space or tab chars)
     46  * of the form attribute_label="value" or attribute_label='value'; as it turns
     47  * out we do not have to read any of these attributes, just ignore them.
     48  *
     49  * Labels are any sequence of chars other than space, tab, right angle bracket
     50  * (and ?), but may have an inner structure of <namespace><colon><plain_label>.
     51  * As it turns out, we can ignore the namespaces, in fact we can ignore the
     52  * entire tree hierarchy, because the plain labels we are looking for will be
     53  * unique (not in general, but for this application). We do however have to be
     54  * careful to skip over the namespaces.
     55  *
     56  * In generating XML we have to be more careful, but that is easy because
     57  * everything we do is pretty canned. The only real care to take is to escape
     58  * any special chars in our payload.
     59  */
     60 
     61 /**
     62  * xml_next_tag - Advance to next tag
     63  * @in: Input
     64  * @out: OUT: start of tag just after '<'
     65  * @out_tagname: OUT: start of name of tag, skipping namespace
     66  * @end: OUT: one after tag
     67  * Returns: 0 on success, 1 on failure
     68  *
     69  * A tag has form:
     70  *     <left angle bracket><...><right angle bracket>
     71  * Within the angle brackets, there is an optional leading forward slash (which
     72  * makes the tag an ending tag), then an optional leading label (followed by
     73  * colon) and then the tag name itself.
     74  *
     75  * Note that angle brackets present in the original data must have been encoded
     76  * as &lt; and &gt; so they will not trouble us.
     77  */
     78 int xml_next_tag(const char *in, const char **out,
     79 		 const char **out_tagname, const char **end)
     80 {
     81 	while (*in && *in != '<')
     82 		in++;
     83 	if (*in != '<')
     84 		return 1;
     85 	*out = ++in;
     86 	if (*in == '/')
     87 		in++;
     88 	*out_tagname = in; /* maybe */
     89 	while (isalnum(*in) || *in == '-')
     90 		in++;
     91 	if (*in == ':')
     92 		*out_tagname = ++in;
     93 	while (*in && *in != '>')
     94 		in++;
     95 	if (*in != '>')
     96 		return 1;
     97 	*end = ++in;
     98 	return 0;
     99 }
    100 
    101 
    102 /* xml_data_encode -- format data for xml file, escaping special characters.
    103  *
    104  * Note that we assume we are using utf8 both as input and as output!
    105  * In utf8, characters may be classed as follows:
    106  *     0xxxxxxx(2) -- 1 byte ascii char
    107  *     11xxxxxx(2) -- 1st byte of multi-byte char w/ unicode value >= 0x80
    108  *         110xxxxx(2) -- 1st byte of 2 byte sequence (5 payload bits here)
    109  *         1110xxxx(2) -- 1st byte of 3 byte sequence (4 payload bits here)
    110  *         11110xxx(2) -- 1st byte of 4 byte sequence (3 payload bits here)
    111  *      10xxxxxx(2) -- extension byte (6 payload bits per byte)
    112  *      Some values implied by the above are however illegal because they
    113  *      do not represent unicode chars or are not the shortest encoding.
    114  * Actually, we can almost entirely ignore the above and just do
    115  * text processing same as for ascii text.
    116  *
    117  * XML is written with arbitrary unicode characters, except that five
    118  * characters have special meaning and so must be escaped where they
    119  * appear in payload data... which we do here.
    120  */
    121 void xml_data_encode(struct wpabuf *buf, const char *data, int len)
    122 {
    123 	int i;
    124 	for (i = 0; i < len; i++) {
    125 		u8 c = ((u8 *) data)[i];
    126 		if (c == '<') {
    127 			wpabuf_put_str(buf, "&lt;");
    128 			continue;
    129 		}
    130 		if (c == '>') {
    131 			wpabuf_put_str(buf, "&gt;");
    132 			continue;
    133 		}
    134 		if (c == '&') {
    135 			wpabuf_put_str(buf, "&amp;");
    136 			continue;
    137 		}
    138 		if (c == '\'') {
    139 			wpabuf_put_str(buf, "&apos;");
    140 			continue;
    141 		}
    142 		if (c == '"') {
    143 			wpabuf_put_str(buf, "&quot;");
    144 			continue;
    145 		}
    146 		/*
    147 		 * We could try to represent control characters using the
    148 		 * sequence: &#x; where x is replaced by a hex numeral, but not
    149 		 * clear why we would do this.
    150 		 */
    151 		wpabuf_put_u8(buf, c);
    152 	}
    153 }
    154 
    155 
    156 /* xml_add_tagged_data -- format tagged data as a new xml line.
    157  *
    158  * tag must not have any special chars.
    159  * data may have special chars, which are escaped.
    160  */
    161 void xml_add_tagged_data(struct wpabuf *buf, const char *tag, const char *data)
    162 {
    163 	wpabuf_printf(buf, "<%s>", tag);
    164 	xml_data_encode(buf, data, os_strlen(data));
    165 	wpabuf_printf(buf, "</%s>\n", tag);
    166 }
    167 
    168 
    169 /* A POST body looks something like (per upnp spec):
    170  * <?xml version="1.0"?>
    171  * <s:Envelope
    172  *     xmlns:s="http://schemas.xmlsoap.org/soap/envelope/"
    173  *     s:encodingStyle="http://schemas.xmlsoap.org/soap/encoding/">
    174  *   <s:Body>
    175  *     <u:actionName xmlns:u="urn:schemas-upnp-org:service:serviceType:v">
    176  *       <argumentName>in arg value</argumentName>
    177  *       other in args and their values go here, if any
    178  *     </u:actionName>
    179  *   </s:Body>
    180  * </s:Envelope>
    181  *
    182  * where :
    183  *      s: might be some other namespace name followed by colon
    184  *      u: might be some other namespace name followed by colon
    185  *      actionName will be replaced according to action requested
    186  *      schema following actionName will be WFA scheme instead
    187  *      argumentName will be actual argument name
    188  *      (in arg value) will be actual argument value
    189  */
    190 char * xml_get_first_item(const char *doc, const char *item)
    191 {
    192 	const char *match = item;
    193 	int match_len = os_strlen(item);
    194 	const char *tag, *tagname, *end;
    195 	char *value;
    196 
    197 	/*
    198 	 * This is crude: ignore any possible tag name conflicts and go right
    199 	 * to the first tag of this name. This should be ok for the limited
    200 	 * domain of UPnP messages.
    201 	 */
    202 	for (;;) {
    203 		if (xml_next_tag(doc, &tag, &tagname, &end))
    204 			return NULL;
    205 		doc = end;
    206 		if (!os_strncasecmp(tagname, match, match_len) &&
    207 		    *tag != '/' &&
    208 		    (tagname[match_len] == '>' ||
    209 		     !isgraph(tagname[match_len]))) {
    210 			break;
    211 		}
    212 	}
    213 	end = doc;
    214 	while (*end && *end != '<')
    215 		end++;
    216 	value = os_zalloc(1 + (end - doc));
    217 	if (value == NULL)
    218 		return NULL;
    219 	os_memcpy(value, doc, end - doc);
    220 	return value;
    221 }
    222 
    223 
    224 struct wpabuf * xml_get_base64_item(const char *data, const char *name,
    225 				    enum http_reply_code *ret)
    226 {
    227 	char *msg;
    228 	struct wpabuf *buf;
    229 	unsigned char *decoded;
    230 	size_t len;
    231 
    232 	msg = xml_get_first_item(data, name);
    233 	if (msg == NULL) {
    234 		*ret = UPNP_ARG_VALUE_INVALID;
    235 		return NULL;
    236 	}
    237 
    238 	decoded = base64_decode((unsigned char *) msg, os_strlen(msg), &len);
    239 	os_free(msg);
    240 	if (decoded == NULL) {
    241 		*ret = UPNP_OUT_OF_MEMORY;
    242 		return NULL;
    243 	}
    244 
    245 	buf = wpabuf_alloc_ext_data(decoded, len);
    246 	if (buf == NULL) {
    247 		os_free(decoded);
    248 		*ret = UPNP_OUT_OF_MEMORY;
    249 		return NULL;
    250 	}
    251 	return buf;
    252 }
    253