Home | History | Annotate | Download | only in stdio
      1 /*	$OpenBSD: vfwscanf.c,v 1.4 2014/03/19 05:17:01 guenther Exp $ */
      2 /*-
      3  * Copyright (c) 1990, 1993
      4  *	The Regents of the University of California.  All rights reserved.
      5  *
      6  * This code is derived from software contributed to Berkeley by
      7  * Chris Torek.
      8  *
      9  * Redistribution and use in source and binary forms, with or without
     10  * modification, are permitted provided that the following conditions
     11  * are met:
     12  * 1. Redistributions of source code must retain the above copyright
     13  *    notice, this list of conditions and the following disclaimer.
     14  * 2. Redistributions in binary form must reproduce the above copyright
     15  *    notice, this list of conditions and the following disclaimer in the
     16  *    documentation and/or other materials provided with the distribution.
     17  * 3. Neither the name of the University nor the names of its contributors
     18  *    may be used to endorse or promote products derived from this software
     19  *    without specific prior written permission.
     20  *
     21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     31  * SUCH DAMAGE.
     32  */
     33 
     34 #include <inttypes.h>
     35 #include <limits.h>
     36 #include <locale.h>
     37 #include <stdarg.h>
     38 #include <stddef.h>
     39 #include <stdio.h>
     40 #include <stdlib.h>
     41 #include <string.h>
     42 #include <wctype.h>
     43 #include "local.h"
     44 
     45 #define BUF 513 /* Maximum length of numeric string. */
     46 
     47 /*
     48  * Flags used during conversion.
     49  */
     50 #define LONG 0x00001       /* l: long or double */
     51 #define LONGDBL 0x00002    /* L: long double */
     52 #define SHORT 0x00004      /* h: short */
     53 #define SHORTSHORT 0x00008 /* hh: 8 bit integer */
     54 #define LLONG 0x00010      /* ll: long long (+ deprecated q: quad) */
     55 #define POINTER 0x00020    /* p: void * (as hex) */
     56 #define SIZEINT 0x00040    /* z: (signed) size_t */
     57 #define MAXINT 0x00080     /* j: intmax_t */
     58 #define PTRINT 0x00100     /* t: ptrdiff_t */
     59 #define NOSKIP 0x00200     /* [ or c: do not skip blanks */
     60 #define SUPPRESS 0x00400   /* *: suppress assignment */
     61 #define UNSIGNED 0x00800   /* %[oupxX] conversions */
     62 
     63 /*
     64  * The following are used in numeric conversions only:
     65  * SIGNOK, HAVESIGN, NDIGITS, DPTOK, and EXPOK are for floating point;
     66  * SIGNOK, HAVESIGN, NDIGITS, PFXOK, and NZDIGITS are for integral.
     67  */
     68 #define SIGNOK 0x01000   /* +/- is (still) legal */
     69 #define HAVESIGN 0x02000 /* sign detected */
     70 #define NDIGITS 0x04000  /* no digits detected */
     71 
     72 #define DPTOK 0x08000 /* (float) decimal point is still legal */
     73 #define EXPOK 0x10000 /* (float) exponent (e+3, etc) still legal */
     74 
     75 #define PFXOK 0x08000    /* 0x prefix is (still) legal */
     76 #define NZDIGITS 0x10000 /* no zero digits detected */
     77 
     78 /*
     79  * Conversion types.
     80  */
     81 #define CT_CHAR 0   /* %c conversion */
     82 #define CT_CCL 1    /* %[...] conversion */
     83 #define CT_STRING 2 /* %s conversion */
     84 #define CT_INT 3    /* integer, i.e., strtoimax or strtoumax */
     85 #define CT_FLOAT 4  /* floating, i.e., strtod */
     86 
     87 // An interpretive version of __sccl from vfscanf.c --- a table of all wchar_t values would
     88 // be a little too expensive, and some kind of compressed version isn't worth the trouble.
     89 static inline bool in_ccl(wchar_t wc, const wchar_t* ccl) {
     90   // Is this a negated set?
     91   bool member_result = true;
     92   if (*ccl == '^') {
     93     member_result = false;
     94     ++ccl;
     95   }
     96 
     97   // The first character may be ']' or '-' without being special.
     98   if (*ccl == '-' || *ccl == ']') {
     99     // A literal match?
    100     if (*ccl == wc) return member_result;
    101     ++ccl;
    102   }
    103 
    104   while (*ccl && *ccl != ']') {
    105     // The last character may be '-' without being special.
    106     if (*ccl == '-' && ccl[1] != '\0' && ccl[1] != ']') {
    107       wchar_t first = *(ccl - 1);
    108       wchar_t last = *(ccl + 1);
    109       if (first <= last) {
    110         // In the range?
    111         if (wc >= first && wc <= last) return member_result;
    112         ccl += 2;
    113         continue;
    114       }
    115       // A '-' is not considered to be part of a range if the character after
    116       // is not greater than the character before, so fall through...
    117     }
    118     // A literal match?
    119     if (*ccl == wc) return member_result;
    120     ++ccl;
    121   }
    122   return !member_result;
    123 }
    124 
    125 #pragma GCC diagnostic push
    126 #pragma GCC diagnostic ignored "-Wframe-larger-than="
    127 
    128 /*
    129  * vfwscanf
    130  */
    131 int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) {
    132   wint_t c;               /* character from format, or conversion */
    133   size_t width;           /* field width, or 0 */
    134   wchar_t* p;             /* points into all kinds of strings */
    135   int n;                  /* handy integer */
    136   int flags;              /* flags as defined above */
    137   wchar_t* p0;            /* saves original value of p when necessary */
    138   int nassigned;          /* number of fields assigned */
    139   int nconversions;       /* number of conversions */
    140   int nread;              /* number of characters consumed from fp */
    141   int base;               /* base argument to strtoimax/strtouimax */
    142   wchar_t buf[BUF];       /* buffer for numeric conversions */
    143   const wchar_t* ccl;
    144   wint_t wi;              /* handy wint_t */
    145   char* mbp;              /* multibyte string pointer for %c %s %[ */
    146   size_t nconv;           /* number of bytes in mb. conversion */
    147   char mbbuf[MB_LEN_MAX]; /* temporary mb. character buffer */
    148   mbstate_t mbs;
    149 
    150   /* `basefix' is used to avoid `if' tests in the integer scanner */
    151   static short basefix[17] = { 10, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 };
    152 
    153   _SET_ORIENTATION(fp, 1);
    154 
    155   nassigned = 0;
    156   nconversions = 0;
    157   nread = 0;
    158   base = 0; /* XXX just to keep gcc happy */
    159   for (;;) {
    160     c = *fmt++;
    161     if (c == 0) {
    162       return (nassigned);
    163     }
    164     if (iswspace(c)) {
    165       while ((c = __fgetwc_unlock(fp)) != WEOF && iswspace(c))
    166         ;
    167       if (c != WEOF) __ungetwc(c, fp);
    168       continue;
    169     }
    170     if (c != '%') goto literal;
    171     width = 0;
    172     flags = 0;
    173     /*
    174      * switch on the format.  continue if done;
    175      * break once format type is derived.
    176      */
    177   again:
    178     c = *fmt++;
    179     switch (c) {
    180       case '%':
    181       literal:
    182         if ((wi = __fgetwc_unlock(fp)) == WEOF) goto input_failure;
    183         if (wi != c) {
    184           __ungetwc(wi, fp);
    185           goto match_failure;
    186         }
    187         nread++;
    188         continue;
    189 
    190       case '*':
    191         flags |= SUPPRESS;
    192         goto again;
    193       case 'j':
    194         flags |= MAXINT;
    195         goto again;
    196       case 'L':
    197         flags |= LONGDBL;
    198         goto again;
    199       case 'h':
    200         if (*fmt == 'h') {
    201           fmt++;
    202           flags |= SHORTSHORT;
    203         } else {
    204           flags |= SHORT;
    205         }
    206         goto again;
    207       case 'l':
    208         if (*fmt == 'l') {
    209           fmt++;
    210           flags |= LLONG;
    211         } else {
    212           flags |= LONG;
    213         }
    214         goto again;
    215       case 'q':
    216         flags |= LLONG; /* deprecated */
    217         goto again;
    218       case 't':
    219         flags |= PTRINT;
    220         goto again;
    221       case 'z':
    222         flags |= SIZEINT;
    223         goto again;
    224 
    225       case '0':
    226       case '1':
    227       case '2':
    228       case '3':
    229       case '4':
    230       case '5':
    231       case '6':
    232       case '7':
    233       case '8':
    234       case '9':
    235         width = width * 10 + c - '0';
    236         goto again;
    237 
    238       /*
    239        * Conversions.
    240        * Those marked `compat' are for 4.[123]BSD compatibility.
    241        */
    242       case 'D': /* compat */
    243         flags |= LONG;
    244         /* FALLTHROUGH */
    245       case 'd':
    246         c = CT_INT;
    247         base = 10;
    248         break;
    249 
    250       case 'i':
    251         c = CT_INT;
    252         base = 0;
    253         break;
    254 
    255       case 'O': /* compat */
    256         flags |= LONG;
    257         /* FALLTHROUGH */
    258       case 'o':
    259         c = CT_INT;
    260         flags |= UNSIGNED;
    261         base = 8;
    262         break;
    263 
    264       case 'u':
    265         c = CT_INT;
    266         flags |= UNSIGNED;
    267         base = 10;
    268         break;
    269 
    270       case 'X':
    271       case 'x':
    272         flags |= PFXOK; /* enable 0x prefixing */
    273         c = CT_INT;
    274         flags |= UNSIGNED;
    275         base = 16;
    276         break;
    277 
    278       case 'e':
    279       case 'E':
    280       case 'f':
    281       case 'F':
    282       case 'g':
    283       case 'G':
    284       case 'a':
    285       case 'A':
    286         c = CT_FLOAT;
    287         break;
    288 
    289       case 's':
    290         c = CT_STRING;
    291         break;
    292 
    293       case '[':
    294         ccl = fmt;
    295         if (*fmt == '^') fmt++;
    296         if (*fmt == ']') fmt++;
    297         while (*fmt != '\0' && *fmt != ']') fmt++;
    298         fmt++;
    299         flags |= NOSKIP;
    300         c = CT_CCL;
    301         break;
    302 
    303       case 'c':
    304         flags |= NOSKIP;
    305         c = CT_CHAR;
    306         break;
    307 
    308       case 'p': /* pointer format is like hex */
    309         flags |= POINTER | PFXOK;
    310         c = CT_INT;
    311         flags |= UNSIGNED;
    312         base = 16;
    313         break;
    314 
    315       case 'n':
    316         nconversions++;
    317         if (flags & SUPPRESS) continue;
    318         if (flags & SHORTSHORT)
    319           *va_arg(ap, signed char*) = nread;
    320         else if (flags & SHORT)
    321           *va_arg(ap, short*) = nread;
    322         else if (flags & LONG)
    323           *va_arg(ap, long*) = nread;
    324         else if (flags & SIZEINT)
    325           *va_arg(ap, ssize_t*) = nread;
    326         else if (flags & PTRINT)
    327           *va_arg(ap, ptrdiff_t*) = nread;
    328         else if (flags & LLONG)
    329           *va_arg(ap, long long*) = nread;
    330         else if (flags & MAXINT)
    331           *va_arg(ap, intmax_t*) = nread;
    332         else
    333           *va_arg(ap, int*) = nread;
    334         continue;
    335 
    336       /*
    337        * Disgusting backwards compatibility hacks.	XXX
    338        */
    339       case '\0': /* compat */
    340         return (EOF);
    341 
    342       default: /* compat */
    343         if (iswupper(c)) flags |= LONG;
    344         c = CT_INT;
    345         base = 10;
    346         break;
    347     }
    348 
    349     /*
    350      * Consume leading white space, except for formats
    351      * that suppress this.
    352      */
    353     if ((flags & NOSKIP) == 0) {
    354       while ((wi = __fgetwc_unlock(fp)) != WEOF && iswspace(wi)) nread++;
    355       if (wi == WEOF) goto input_failure;
    356       __ungetwc(wi, fp);
    357     }
    358 
    359     /*
    360      * Do the conversion.
    361      */
    362     switch (c) {
    363       case CT_CHAR:
    364         /* scan arbitrary characters (sets NOSKIP) */
    365         if (width == 0) width = 1;
    366         if (flags & LONG) {
    367           if (!(flags & SUPPRESS)) p = va_arg(ap, wchar_t*);
    368           n = 0;
    369           while (width-- != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) {
    370             if (!(flags & SUPPRESS)) *p++ = (wchar_t)wi;
    371             n++;
    372           }
    373           if (n == 0) goto input_failure;
    374           nread += n;
    375           if (!(flags & SUPPRESS)) nassigned++;
    376         } else {
    377           if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
    378           n = 0;
    379           memset(&mbs, 0, sizeof(mbs));
    380           while (width != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) {
    381             if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
    382               nconv = wcrtomb(mbp, wi, &mbs);
    383               if (nconv == (size_t)-1) goto input_failure;
    384             } else {
    385               nconv = wcrtomb(mbbuf, wi, &mbs);
    386               if (nconv == (size_t)-1) goto input_failure;
    387               if (nconv > width) {
    388                 __ungetwc(wi, fp);
    389                 break;
    390               }
    391               if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv);
    392             }
    393             if (!(flags & SUPPRESS)) mbp += nconv;
    394             width -= nconv;
    395             n++;
    396           }
    397           if (n == 0) goto input_failure;
    398           nread += n;
    399           if (!(flags & SUPPRESS)) nassigned++;
    400         }
    401         nconversions++;
    402         break;
    403 
    404       case CT_CCL:
    405       case CT_STRING:
    406         // CT_CCL: scan a (nonempty) character class (sets NOSKIP).
    407         // CT_STRING: like CCL, but zero-length string OK, & no NOSKIP.
    408         if (width == 0) width = (size_t)~0; // 'infinity'.
    409         if ((flags & SUPPRESS) && (flags & LONG)) {
    410           n = 0;
    411           while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) n++;
    412           if (wi != WEOF) __ungetwc(wi, fp);
    413         } else if (flags & LONG) {
    414           p0 = p = va_arg(ap, wchar_t*);
    415           while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) {
    416             *p++ = (wchar_t)wi;
    417           }
    418           if (wi != WEOF) __ungetwc(wi, fp);
    419           n = p - p0;
    420         } else {
    421           if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
    422           n = 0;
    423           memset(&mbs, 0, sizeof(mbs));
    424           while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) {
    425             if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
    426               nconv = wcrtomb(mbp, wi, &mbs);
    427               if (nconv == (size_t)-1) goto input_failure;
    428             } else {
    429               nconv = wcrtomb(mbbuf, wi, &mbs);
    430               if (nconv == (size_t)-1) goto input_failure;
    431               if (nconv > width) break;
    432               if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv);
    433             }
    434             if (!(flags & SUPPRESS)) mbp += nconv;
    435             width -= nconv;
    436             n++;
    437           }
    438           if (wi != WEOF) __ungetwc(wi, fp);
    439         }
    440         if (c == CT_CCL && n == 0) goto match_failure;
    441         if (!(flags & SUPPRESS)) {
    442           if (flags & LONG) {
    443             *p = L'\0';
    444           } else {
    445             *mbp = '\0';
    446           }
    447           ++nassigned;
    448         }
    449         nread += n;
    450         nconversions++;
    451         break;
    452 
    453       case CT_INT:
    454         /* scan an integer as if by strtoimax/strtoumax */
    455         if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1)
    456           width = sizeof(buf) / sizeof(*buf) - 1;
    457         flags |= SIGNOK | NDIGITS | NZDIGITS;
    458         for (p = buf; width; width--) {
    459           c = __fgetwc_unlock(fp);
    460           /*
    461            * Switch on the character; `goto ok'
    462            * if we accept it as a part of number.
    463            */
    464           switch (c) {
    465             /*
    466              * The digit 0 is always legal, but is
    467              * special.  For %i conversions, if no
    468              * digits (zero or nonzero) have been
    469              * scanned (only signs), we will have
    470              * base==0.  In that case, we should set
    471              * it to 8 and enable 0x prefixing.
    472              * Also, if we have not scanned zero digits
    473              * before this, do not turn off prefixing
    474              * (someone else will turn it off if we
    475              * have scanned any nonzero digits).
    476              */
    477             case '0':
    478               if (base == 0) {
    479                 base = 8;
    480                 flags |= PFXOK;
    481               }
    482               if (flags & NZDIGITS)
    483                 flags &= ~(SIGNOK | NZDIGITS | NDIGITS);
    484               else
    485                 flags &= ~(SIGNOK | PFXOK | NDIGITS);
    486               goto ok;
    487 
    488             /* 1 through 7 always legal */
    489             case '1':
    490             case '2':
    491             case '3':
    492             case '4':
    493             case '5':
    494             case '6':
    495             case '7':
    496               base = basefix[base];
    497               flags &= ~(SIGNOK | PFXOK | NDIGITS);
    498               goto ok;
    499 
    500             /* digits 8 and 9 ok iff decimal or hex */
    501             case '8':
    502             case '9':
    503               base = basefix[base];
    504               if (base <= 8) break; /* not legal here */
    505               flags &= ~(SIGNOK | PFXOK | NDIGITS);
    506               goto ok;
    507 
    508             /* letters ok iff hex */
    509             case 'A':
    510             case 'B':
    511             case 'C':
    512             case 'D':
    513             case 'E':
    514             case 'F':
    515             case 'a':
    516             case 'b':
    517             case 'c':
    518             case 'd':
    519             case 'e':
    520             case 'f':
    521               /* no need to fix base here */
    522               if (base <= 10) break; /* not legal here */
    523               flags &= ~(SIGNOK | PFXOK | NDIGITS);
    524               goto ok;
    525 
    526             /* sign ok only as first character */
    527             case '+':
    528             case '-':
    529               if (flags & SIGNOK) {
    530                 flags &= ~SIGNOK;
    531                 flags |= HAVESIGN;
    532                 goto ok;
    533               }
    534               break;
    535 
    536             /*
    537              * x ok iff flag still set and 2nd char (or
    538              * 3rd char if we have a sign).
    539              */
    540             case 'x':
    541             case 'X':
    542               if ((flags & PFXOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
    543                 base = 16; /* if %i */
    544                 flags &= ~PFXOK;
    545                 goto ok;
    546               }
    547               break;
    548           }
    549 
    550           /*
    551            * If we got here, c is not a legal character
    552            * for a number.  Stop accumulating digits.
    553            */
    554           if (c != WEOF) __ungetwc(c, fp);
    555           break;
    556         ok:
    557           /*
    558            * c is legal: store it and look at the next.
    559            */
    560           *p++ = (wchar_t)c;
    561         }
    562         /*
    563          * If we had only a sign, it is no good; push
    564          * back the sign.  If the number ends in `x',
    565          * it was [sign] '0' 'x', so push back the x
    566          * and treat it as [sign] '0'.
    567          */
    568         if (flags & NDIGITS) {
    569           if (p > buf) __ungetwc(*--p, fp);
    570           goto match_failure;
    571         }
    572         c = p[-1];
    573         if (c == 'x' || c == 'X') {
    574           --p;
    575           __ungetwc(c, fp);
    576         }
    577         if ((flags & SUPPRESS) == 0) {
    578           uintmax_t res;
    579 
    580           *p = '\0';
    581           if (flags & UNSIGNED)
    582             res = wcstoimax(buf, NULL, base);
    583           else
    584             res = wcstoumax(buf, NULL, base);
    585           if (flags & POINTER)
    586             *va_arg(ap, void**) = (void*)(uintptr_t)res;
    587           else if (flags & MAXINT)
    588             *va_arg(ap, intmax_t*) = res;
    589           else if (flags & LLONG)
    590             *va_arg(ap, long long*) = res;
    591           else if (flags & SIZEINT)
    592             *va_arg(ap, ssize_t*) = res;
    593           else if (flags & PTRINT)
    594             *va_arg(ap, ptrdiff_t*) = res;
    595           else if (flags & LONG)
    596             *va_arg(ap, long*) = res;
    597           else if (flags & SHORT)
    598             *va_arg(ap, short*) = res;
    599           else if (flags & SHORTSHORT)
    600             *va_arg(ap, signed char*) = res;
    601           else
    602             *va_arg(ap, int*) = res;
    603           nassigned++;
    604         }
    605         nread += p - buf;
    606         nconversions++;
    607         break;
    608 
    609       case CT_FLOAT:
    610         /* scan a floating point number as if by strtod */
    611         if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1)
    612           width = sizeof(buf) / sizeof(*buf) - 1;
    613         if ((width = wparsefloat(fp, buf, buf + width)) == 0) goto match_failure;
    614         if ((flags & SUPPRESS) == 0) {
    615           if (flags & LONGDBL) {
    616             long double res = wcstold(buf, &p);
    617             *va_arg(ap, long double*) = res;
    618           } else if (flags & LONG) {
    619             double res = wcstod(buf, &p);
    620             *va_arg(ap, double*) = res;
    621           } else {
    622             float res = wcstof(buf, &p);
    623             *va_arg(ap, float*) = res;
    624           }
    625           if (p - buf != (ptrdiff_t)width) abort();
    626           nassigned++;
    627         }
    628         nread += width;
    629         nconversions++;
    630         break;
    631     }
    632   }
    633 input_failure:
    634   return (nconversions != 0 ? nassigned : EOF);
    635 match_failure:
    636   return (nassigned);
    637 }
    638 #pragma GCC diagnostic pop
    639