Home | History | Annotate | Download | only in lib
      1 /*
      2  * vsscanf.c
      3  *
      4  * vsscanf(), from which the rest of the scanf()
      5  * family is built
      6  */
      7 
      8 #include <ctype.h>
      9 #include <stdarg.h>
     10 #include <stddef.h>
     11 #include <inttypes.h>
     12 #include <string.h>
     13 #include <limits.h>
     14 #include <stdio.h>
     15 #include <sys/bitops.h>
     16 
     17 #ifndef LONG_BIT
     18 #define LONG_BIT (CHAR_BIT*sizeof(long))
     19 #endif
     20 
     21 enum flags {
     22     FL_SPLAT = 0x01,		/* Drop the value, do not assign */
     23     FL_WIDTH = 0x02,		/* Field width specified */
     24     FL_MINUS = 0x04,		/* Negative number */
     25 };
     26 
     27 enum ranks {
     28     rank_char = -2,
     29     rank_short = -1,
     30     rank_int = 0,
     31     rank_long = 1,
     32     rank_longlong = 2,
     33     rank_ptr = INT_MAX		/* Special value used for pointers */
     34 };
     35 
     36 #define MIN_RANK	rank_char
     37 #define MAX_RANK	rank_longlong
     38 
     39 #define INTMAX_RANK	rank_longlong
     40 #define SIZE_T_RANK	rank_long
     41 #define PTRDIFF_T_RANK	rank_long
     42 
     43 enum bail {
     44     bail_none = 0,		/* No error condition */
     45     bail_eof,			/* Hit EOF */
     46     bail_err			/* Conversion mismatch */
     47 };
     48 
     49 int vsscanf(const char *buffer, const char *format, va_list ap)
     50 {
     51     const char *p = format;
     52     char ch;
     53     const char *q = buffer;
     54     const char *qq;
     55     uintmax_t val = 0;
     56     int rank = rank_int;	/* Default rank */
     57     unsigned int width = UINT_MAX;
     58     int base;
     59     enum flags flags = 0;
     60     enum {
     61 	st_normal,		/* Ground state */
     62 	st_flags,		/* Special flags */
     63 	st_width,		/* Field width */
     64 	st_modifiers,		/* Length or conversion modifiers */
     65 	st_match_init,		/* Initial state of %[ sequence */
     66 	st_match,		/* Main state of %[ sequence */
     67 	st_match_range,		/* After - in a %[ sequence */
     68     } state = st_normal;
     69     char *sarg = NULL;		/* %s %c or %[ string argument */
     70     enum bail bail = bail_none;
     71     int converted = 0;		/* Successful conversions */
     72     unsigned long matchmap[((1 << CHAR_BIT) + (LONG_BIT - 1)) / LONG_BIT];
     73     int matchinv = 0;		/* Is match map inverted? */
     74     unsigned char range_start = 0;
     75 
     76     while ((ch = *p++) && !bail) {
     77 	switch (state) {
     78 	case st_normal:
     79 	    if (ch == '%') {
     80 		state = st_flags;
     81 		flags = 0;
     82 		rank = rank_int;
     83 		width = UINT_MAX;
     84 	    } else if (isspace((unsigned char)ch)) {
     85 		q = skipspace(q);
     86 	    } else {
     87 		if (*q == ch)
     88 		    q++;
     89 		else
     90 		    bail = bail_err;	/* Match failure */
     91 	    }
     92 	    break;
     93 
     94 	case st_flags:
     95 	    switch (ch) {
     96 	    case '*':
     97 		flags |= FL_SPLAT;
     98 		break;
     99 	    case '0' ... '9':
    100 		width = (ch - '0');
    101 		state = st_width;
    102 		flags |= FL_WIDTH;
    103 		break;
    104 	    default:
    105 		state = st_modifiers;
    106 		p--;		/* Process this character again */
    107 		break;
    108 	    }
    109 	    break;
    110 
    111 	case st_width:
    112 	    if (ch >= '0' && ch <= '9') {
    113 		width = width * 10 + (ch - '0');
    114 	    } else {
    115 		state = st_modifiers;
    116 		p--;		/* Process this character again */
    117 	    }
    118 	    break;
    119 
    120 	case st_modifiers:
    121 	    switch (ch) {
    122 		/* Length modifiers - nonterminal sequences */
    123 	    case 'h':
    124 		rank--;		/* Shorter rank */
    125 		break;
    126 	    case 'l':
    127 		rank++;		/* Longer rank */
    128 		break;
    129 	    case 'j':
    130 		rank = INTMAX_RANK;
    131 		break;
    132 	    case 'z':
    133 		rank = SIZE_T_RANK;
    134 		break;
    135 	    case 't':
    136 		rank = PTRDIFF_T_RANK;
    137 		break;
    138 	    case 'L':
    139 	    case 'q':
    140 		rank = rank_longlong;	/* long double/long long */
    141 		break;
    142 
    143 	    default:
    144 		/* Output modifiers - terminal sequences */
    145 		state = st_normal;	/* Next state will be normal */
    146 		if (rank < MIN_RANK)	/* Canonicalize rank */
    147 		    rank = MIN_RANK;
    148 		else if (rank > MAX_RANK)
    149 		    rank = MAX_RANK;
    150 
    151 		switch (ch) {
    152 		case 'P':	/* Upper case pointer */
    153 		case 'p':	/* Pointer */
    154 #if 0				/* Enable this to allow null pointers by name */
    155 		    q = skipspace(q);
    156 		    if (!isdigit((unsigned char)*q)) {
    157 			static const char *const nullnames[] =
    158 			    { "null", "nul", "nil", "(null)", "(nul)", "(nil)",
    159 0 };
    160 			const char *const *np;
    161 
    162 			/* Check to see if it's a null pointer by name */
    163 			for (np = nullnames; *np; np++) {
    164 			    if (!strncasecmp(q, *np, strlen(*np))) {
    165 				val = (uintmax_t) ((void *)NULL);
    166 				goto set_integer;
    167 			    }
    168 			}
    169 			/* Failure */
    170 			bail = bail_err;
    171 			break;
    172 		    }
    173 		    /* else */
    174 #endif
    175 		    rank = rank_ptr;
    176 		    base = 0;
    177 		    goto scan_int;
    178 
    179 		case 'i':	/* Base-independent integer */
    180 		    base = 0;
    181 		    goto scan_int;
    182 
    183 		case 'd':	/* Decimal integer */
    184 		    base = 10;
    185 		    goto scan_int;
    186 
    187 		case 'o':	/* Octal integer */
    188 		    base = 8;
    189 		    goto scan_int;
    190 
    191 		case 'u':	/* Unsigned decimal integer */
    192 		    base = 10;
    193 		    goto scan_int;
    194 
    195 		case 'x':	/* Hexadecimal integer */
    196 		case 'X':
    197 		    base = 16;
    198 		    goto scan_int;
    199 
    200 		case 'n':	/* Number of characters consumed */
    201 		    val = (q - buffer);
    202 		    goto set_integer;
    203 
    204 scan_int:
    205 		    q = skipspace(q);
    206 		    if (!*q) {
    207 			bail = bail_eof;
    208 			break;
    209 		    }
    210 		    val = strntoumax(q, (char **)&qq, base, width);
    211 		    if (qq == q) {
    212 			bail = bail_err;
    213 			break;
    214 		    }
    215 		    q = qq;
    216 		    converted++;
    217 		    /* fall through */
    218 
    219 set_integer:
    220 		    if (!(flags & FL_SPLAT)) {
    221 			switch (rank) {
    222 			case rank_char:
    223 			    *va_arg(ap, unsigned char *) = (unsigned char)val;
    224 			    break;
    225 			case rank_short:
    226 			    *va_arg(ap, unsigned short *) = (unsigned short)val;
    227 			    break;
    228 			case rank_int:
    229 			    *va_arg(ap, unsigned int *) = (unsigned int)val;
    230 			    break;
    231 			case rank_long:
    232 			    *va_arg(ap, unsigned long *) = (unsigned long)val;
    233 			    break;
    234 			case rank_longlong:
    235 			    *va_arg(ap, unsigned long long *) =
    236 				(unsigned long long)val;
    237 			    break;
    238 			case rank_ptr:
    239 			    *va_arg(ap, void **) = (void *)(uintptr_t) val;
    240 			    break;
    241 			}
    242 		    }
    243 		    break;
    244 
    245 		case 'c':	/* Character */
    246 		    width = (flags & FL_WIDTH) ? width : 1;	/* Default width == 1 */
    247 		    sarg = va_arg(ap, char *);
    248 		    while (width--) {
    249 			if (!*q) {
    250 			    bail = bail_eof;
    251 			    break;
    252 			}
    253 			*sarg++ = *q++;
    254 		    }
    255 		    if (!bail)
    256 			converted++;
    257 		    break;
    258 
    259 		case 's':	/* String */
    260 		    {
    261 			char *sp;
    262 			sp = sarg = va_arg(ap, char *);
    263 			while (width-- && *q && !isspace((unsigned char)*q)) {
    264 			    *sp++ = *q++;
    265 			}
    266 			if (sarg != sp) {
    267 			    *sp = '\0';	/* Terminate output */
    268 			    converted++;
    269 			} else {
    270 			    bail = bail_eof;
    271 			}
    272 		    }
    273 		    break;
    274 
    275 		case '[':	/* Character range */
    276 		    sarg = va_arg(ap, char *);
    277 		    state = st_match_init;
    278 		    matchinv = 0;
    279 		    memset(matchmap, 0, sizeof matchmap);
    280 		    break;
    281 
    282 		case '%':	/* %% sequence */
    283 		    if (*q == '%')
    284 			q++;
    285 		    else
    286 			bail = bail_err;
    287 		    break;
    288 
    289 		default:	/* Anything else */
    290 		    bail = bail_err;	/* Unknown sequence */
    291 		    break;
    292 		}
    293 	    }
    294 	    break;
    295 
    296 	case st_match_init:	/* Initial state for %[ match */
    297 	    if (ch == '^' && !matchinv) {
    298 		matchinv = 1;
    299 	    } else {
    300 		range_start = (unsigned char)ch;
    301 		set_bit((unsigned char)ch, matchmap);
    302 		state = st_match;
    303 	    }
    304 	    break;
    305 
    306 	case st_match:		/* Main state for %[ match */
    307 	    if (ch == ']') {
    308 		goto match_run;
    309 	    } else if (ch == '-') {
    310 		state = st_match_range;
    311 	    } else {
    312 		range_start = (unsigned char)ch;
    313 		set_bit((unsigned char)ch, matchmap);
    314 	    }
    315 	    break;
    316 
    317 	case st_match_range:	/* %[ match after - */
    318 	    if (ch == ']') {
    319 		set_bit((unsigned char)'-', matchmap);	/* - was last character */
    320 		goto match_run;
    321 	    } else {
    322 		int i;
    323 		for (i = range_start; i <= (unsigned char)ch; i++)
    324 		    set_bit(i, matchmap);
    325 		state = st_match;
    326 	    }
    327 	    break;
    328 
    329 match_run:			/* Match expression finished */
    330 	    qq = q;
    331 	    while (width && *q
    332 		   && test_bit((unsigned char)*q, matchmap) ^ matchinv) {
    333 		*sarg++ = *q++;
    334 	    }
    335 	    if (q != qq) {
    336 		*sarg = '\0';
    337 		converted++;
    338 	    } else {
    339 		bail = *q ? bail_err : bail_eof;
    340 	    }
    341 	    break;
    342 	}
    343     }
    344 
    345     if (bail == bail_eof && !converted)
    346 	converted = -1;		/* Return EOF (-1) */
    347 
    348     return converted;
    349 }
    350