Home | History | Annotate | Download | only in lib
      1 /* Convert string representation of a number into an integer value.
      2 
      3    Copyright (C) 1991-1992, 1994-1999, 2003, 2005-2007, 2009-2012 Free Software
      4    Foundation, Inc.
      5 
      6    NOTE: The canonical source of this file is maintained with the GNU C
      7    Library.  Bugs can be reported to bug-glibc (at) gnu.org.
      8 
      9    This program is free software: you can redistribute it and/or modify it
     10    under the terms of the GNU General Public License as published by the
     11    Free Software Foundation; either version 3 of the License, or any
     12    later version.
     13 
     14    This program is distributed in the hope that it will be useful,
     15    but WITHOUT ANY WARRANTY; without even the implied warranty of
     16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17    GNU General Public License for more details.
     18 
     19    You should have received a copy of the GNU General Public License
     20    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
     21 
     22 #ifdef _LIBC
     23 # define USE_NUMBER_GROUPING
     24 #else
     25 # include <config.h>
     26 #endif
     27 
     28 #include <ctype.h>
     29 #include <errno.h>
     30 #ifndef __set_errno
     31 # define __set_errno(Val) errno = (Val)
     32 #endif
     33 
     34 #include <limits.h>
     35 #include <stddef.h>
     36 #include <stdlib.h>
     37 #include <string.h>
     38 
     39 #ifdef USE_NUMBER_GROUPING
     40 # include "../locale/localeinfo.h"
     41 #endif
     42 
     43 /* Nonzero if we are defining 'strtoul' or 'strtoull', operating on
     44    unsigned integers.  */
     45 #ifndef UNSIGNED
     46 # define UNSIGNED 0
     47 # define INT LONG int
     48 #else
     49 # define INT unsigned LONG int
     50 #endif
     51 
     52 /* Determine the name.  */
     53 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
     54 # if UNSIGNED
     55 #  ifdef USE_WIDE_CHAR
     56 #   ifdef QUAD
     57 #    define strtol __wcstoull_l
     58 #   else
     59 #    define strtol __wcstoul_l
     60 #   endif
     61 #  else
     62 #   ifdef QUAD
     63 #    define strtol __strtoull_l
     64 #   else
     65 #    define strtol __strtoul_l
     66 #   endif
     67 #  endif
     68 # else
     69 #  ifdef USE_WIDE_CHAR
     70 #   ifdef QUAD
     71 #    define strtol __wcstoll_l
     72 #   else
     73 #    define strtol __wcstol_l
     74 #   endif
     75 #  else
     76 #   ifdef QUAD
     77 #    define strtol __strtoll_l
     78 #   else
     79 #    define strtol __strtol_l
     80 #   endif
     81 #  endif
     82 # endif
     83 #else
     84 # if UNSIGNED
     85 #  ifdef USE_WIDE_CHAR
     86 #   ifdef QUAD
     87 #    define strtol wcstoull
     88 #   else
     89 #    define strtol wcstoul
     90 #   endif
     91 #  else
     92 #   ifdef QUAD
     93 #    define strtol strtoull
     94 #   else
     95 #    define strtol strtoul
     96 #   endif
     97 #  endif
     98 # else
     99 #  ifdef USE_WIDE_CHAR
    100 #   ifdef QUAD
    101 #    define strtol wcstoll
    102 #   else
    103 #    define strtol wcstol
    104 #   endif
    105 #  else
    106 #   ifdef QUAD
    107 #    define strtol strtoll
    108 #   endif
    109 #  endif
    110 # endif
    111 #endif
    112 
    113 /* If QUAD is defined, we are defining 'strtoll' or 'strtoull',
    114    operating on 'long long int's.  */
    115 #ifdef QUAD
    116 # define LONG long long
    117 # define STRTOL_LONG_MIN LLONG_MIN
    118 # define STRTOL_LONG_MAX LLONG_MAX
    119 # define STRTOL_ULONG_MAX ULLONG_MAX
    120 
    121 /* The extra casts in the following macros work around compiler bugs,
    122    e.g., in Cray C 5.0.3.0.  */
    123 
    124 /* True if negative values of the signed integer type T use two's
    125    complement, ones' complement, or signed magnitude representation,
    126    respectively.  Much GNU code assumes two's complement, but some
    127    people like to be portable to all possible C hosts.  */
    128 # define TYPE_TWOS_COMPLEMENT(t) ((t) ~ (t) 0 == (t) -1)
    129 # define TYPE_ONES_COMPLEMENT(t) ((t) ~ (t) 0 == 0)
    130 # define TYPE_SIGNED_MAGNITUDE(t) ((t) ~ (t) 0 < (t) -1)
    131 
    132 /* True if the arithmetic type T is signed.  */
    133 # define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
    134 
    135 /* The maximum and minimum values for the integer type T.  These
    136    macros have undefined behavior if T is signed and has padding bits.
    137    If this is a problem for you, please let us know how to fix it for
    138    your host.  */
    139 # define TYPE_MINIMUM(t) \
    140    ((t) (! TYPE_SIGNED (t) \
    141          ? (t) 0 \
    142          : TYPE_SIGNED_MAGNITUDE (t) \
    143          ? ~ (t) 0 \
    144          : ~ TYPE_MAXIMUM (t)))
    145 # define TYPE_MAXIMUM(t) \
    146    ((t) (! TYPE_SIGNED (t) \
    147          ? (t) -1 \
    148          : ((((t) 1 << (sizeof (t) * CHAR_BIT - 2)) - 1) * 2 + 1)))
    149 
    150 # ifndef ULLONG_MAX
    151 #  define ULLONG_MAX TYPE_MAXIMUM (unsigned long long)
    152 # endif
    153 # ifndef LLONG_MAX
    154 #  define LLONG_MAX TYPE_MAXIMUM (long long int)
    155 # endif
    156 # ifndef LLONG_MIN
    157 #  define LLONG_MIN TYPE_MINIMUM (long long int)
    158 # endif
    159 
    160 # if __GNUC__ == 2 && __GNUC_MINOR__ < 7
    161    /* Work around gcc bug with using this constant.  */
    162    static const unsigned long long int maxquad = ULLONG_MAX;
    163 #  undef STRTOL_ULONG_MAX
    164 #  define STRTOL_ULONG_MAX maxquad
    165 # endif
    166 #else
    167 # define LONG long
    168 # define STRTOL_LONG_MIN LONG_MIN
    169 # define STRTOL_LONG_MAX LONG_MAX
    170 # define STRTOL_ULONG_MAX ULONG_MAX
    171 #endif
    172 
    173 
    174 /* We use this code also for the extended locale handling where the
    175    function gets as an additional argument the locale which has to be
    176    used.  To access the values we have to redefine the _NL_CURRENT
    177    macro.  */
    178 #ifdef USE_IN_EXTENDED_LOCALE_MODEL
    179 # undef _NL_CURRENT
    180 # define _NL_CURRENT(category, item) \
    181   (current->values[_NL_ITEM_INDEX (item)].string)
    182 # define LOCALE_PARAM , loc
    183 # define LOCALE_PARAM_PROTO , __locale_t loc
    184 #else
    185 # define LOCALE_PARAM
    186 # define LOCALE_PARAM_PROTO
    187 #endif
    188 
    189 #ifdef USE_WIDE_CHAR
    190 # include <wchar.h>
    191 # include <wctype.h>
    192 # define L_(Ch) L##Ch
    193 # define UCHAR_TYPE wint_t
    194 # define STRING_TYPE wchar_t
    195 # ifdef USE_IN_EXTENDED_LOCALE_MODEL
    196 #  define ISSPACE(Ch) __iswspace_l ((Ch), loc)
    197 #  define ISALPHA(Ch) __iswalpha_l ((Ch), loc)
    198 #  define TOUPPER(Ch) __towupper_l ((Ch), loc)
    199 # else
    200 #  define ISSPACE(Ch) iswspace (Ch)
    201 #  define ISALPHA(Ch) iswalpha (Ch)
    202 #  define TOUPPER(Ch) towupper (Ch)
    203 # endif
    204 #else
    205 # define L_(Ch) Ch
    206 # define UCHAR_TYPE unsigned char
    207 # define STRING_TYPE char
    208 # ifdef USE_IN_EXTENDED_LOCALE_MODEL
    209 #  define ISSPACE(Ch) __isspace_l ((Ch), loc)
    210 #  define ISALPHA(Ch) __isalpha_l ((Ch), loc)
    211 #  define TOUPPER(Ch) __toupper_l ((Ch), loc)
    212 # else
    213 #  define ISSPACE(Ch) isspace (Ch)
    214 #  define ISALPHA(Ch) isalpha (Ch)
    215 #  define TOUPPER(Ch) toupper (Ch)
    216 # endif
    217 #endif
    218 
    219 #define INTERNAL(X) INTERNAL1(X)
    220 #define INTERNAL1(X) __##X##_internal
    221 #define WEAKNAME(X) WEAKNAME1(X)
    222 
    223 #ifdef USE_NUMBER_GROUPING
    224 /* This file defines a function to check for correct grouping.  */
    225 # include "grouping.h"
    226 #endif
    227 
    228 
    229 
    230 /* Convert NPTR to an 'unsigned long int' or 'long int' in base BASE.
    231    If BASE is 0 the base is determined by the presence of a leading
    232    zero, indicating octal or a leading "0x" or "0X", indicating hexadecimal.
    233    If BASE is < 2 or > 36, it is reset to 10.
    234    If ENDPTR is not NULL, a pointer to the character after the last
    235    one converted is stored in *ENDPTR.  */
    236 
    237 INT
    238 INTERNAL (strtol) (const STRING_TYPE *nptr, STRING_TYPE **endptr,
    239                    int base, int group LOCALE_PARAM_PROTO)
    240 {
    241   int negative;
    242   register unsigned LONG int cutoff;
    243   register unsigned int cutlim;
    244   register unsigned LONG int i;
    245   register const STRING_TYPE *s;
    246   register UCHAR_TYPE c;
    247   const STRING_TYPE *save, *end;
    248   int overflow;
    249 
    250 #ifdef USE_NUMBER_GROUPING
    251 # ifdef USE_IN_EXTENDED_LOCALE_MODEL
    252   struct locale_data *current = loc->__locales[LC_NUMERIC];
    253 # endif
    254   /* The thousands character of the current locale.  */
    255   wchar_t thousands = L'\0';
    256   /* The numeric grouping specification of the current locale,
    257      in the format described in <locale.h>.  */
    258   const char *grouping;
    259 
    260   if (group)
    261     {
    262       grouping = _NL_CURRENT (LC_NUMERIC, GROUPING);
    263       if (*grouping <= 0 || *grouping == CHAR_MAX)
    264         grouping = NULL;
    265       else
    266         {
    267           /* Figure out the thousands separator character.  */
    268 # if defined _LIBC || defined _HAVE_BTOWC
    269           thousands = __btowc (*_NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP));
    270           if (thousands == WEOF)
    271             thousands = L'\0';
    272 # endif
    273           if (thousands == L'\0')
    274             grouping = NULL;
    275         }
    276     }
    277   else
    278     grouping = NULL;
    279 #endif
    280 
    281   if (base < 0 || base == 1 || base > 36)
    282     {
    283       __set_errno (EINVAL);
    284       return 0;
    285     }
    286 
    287   save = s = nptr;
    288 
    289   /* Skip white space.  */
    290   while (ISSPACE (*s))
    291     ++s;
    292   if (*s == L_('\0'))
    293     goto noconv;
    294 
    295   /* Check for a sign.  */
    296   if (*s == L_('-'))
    297     {
    298       negative = 1;
    299       ++s;
    300     }
    301   else if (*s == L_('+'))
    302     {
    303       negative = 0;
    304       ++s;
    305     }
    306   else
    307     negative = 0;
    308 
    309   /* Recognize number prefix and if BASE is zero, figure it out ourselves.  */
    310   if (*s == L_('0'))
    311     {
    312       if ((base == 0 || base == 16) && TOUPPER (s[1]) == L_('X'))
    313         {
    314           s += 2;
    315           base = 16;
    316         }
    317       else if (base == 0)
    318         base = 8;
    319     }
    320   else if (base == 0)
    321     base = 10;
    322 
    323   /* Save the pointer so we can check later if anything happened.  */
    324   save = s;
    325 
    326 #ifdef USE_NUMBER_GROUPING
    327   if (group)
    328     {
    329       /* Find the end of the digit string and check its grouping.  */
    330       end = s;
    331       for (c = *end; c != L_('\0'); c = *++end)
    332         if ((wchar_t) c != thousands
    333             && ((wchar_t) c < L_('0') || (wchar_t) c > L_('9'))
    334             && (!ISALPHA (c) || (int) (TOUPPER (c) - L_('A') + 10) >= base))
    335           break;
    336       if (*s == thousands)
    337         end = s;
    338       else
    339         end = correctly_grouped_prefix (s, end, thousands, grouping);
    340     }
    341   else
    342 #endif
    343     end = NULL;
    344 
    345   cutoff = STRTOL_ULONG_MAX / (unsigned LONG int) base;
    346   cutlim = STRTOL_ULONG_MAX % (unsigned LONG int) base;
    347 
    348   overflow = 0;
    349   i = 0;
    350   for (c = *s; c != L_('\0'); c = *++s)
    351     {
    352       if (s == end)
    353         break;
    354       if (c >= L_('0') && c <= L_('9'))
    355         c -= L_('0');
    356       else if (ISALPHA (c))
    357         c = TOUPPER (c) - L_('A') + 10;
    358       else
    359         break;
    360       if ((int) c >= base)
    361         break;
    362       /* Check for overflow.  */
    363       if (i > cutoff || (i == cutoff && c > cutlim))
    364         overflow = 1;
    365       else
    366         {
    367           i *= (unsigned LONG int) base;
    368           i += c;
    369         }
    370     }
    371 
    372   /* Check if anything actually happened.  */
    373   if (s == save)
    374     goto noconv;
    375 
    376   /* Store in ENDPTR the address of one character
    377      past the last character we converted.  */
    378   if (endptr != NULL)
    379     *endptr = (STRING_TYPE *) s;
    380 
    381 #if !UNSIGNED
    382   /* Check for a value that is within the range of
    383      'unsigned LONG int', but outside the range of 'LONG int'.  */
    384   if (overflow == 0
    385       && i > (negative
    386               ? -((unsigned LONG int) (STRTOL_LONG_MIN + 1)) + 1
    387               : (unsigned LONG int) STRTOL_LONG_MAX))
    388     overflow = 1;
    389 #endif
    390 
    391   if (overflow)
    392     {
    393       __set_errno (ERANGE);
    394 #if UNSIGNED
    395       return STRTOL_ULONG_MAX;
    396 #else
    397       return negative ? STRTOL_LONG_MIN : STRTOL_LONG_MAX;
    398 #endif
    399     }
    400 
    401   /* Return the result of the appropriate sign.  */
    402   return negative ? -i : i;
    403 
    404 noconv:
    405   /* We must handle a special case here: the base is 0 or 16 and the
    406      first two characters are '0' and 'x', but the rest are no
    407      hexadecimal digits.  This is no error case.  We return 0 and
    408      ENDPTR points to the 'x'.  */
    409   if (endptr != NULL)
    410     {
    411       if (save - nptr >= 2 && TOUPPER (save[-1]) == L_('X')
    412           && save[-2] == L_('0'))
    413         *endptr = (STRING_TYPE *) &save[-1];
    414       else
    415         /*  There was no number to convert.  */
    416         *endptr = (STRING_TYPE *) nptr;
    417     }
    418 
    419   return 0L;
    420 }
    421 
    422 /* External user entry point.  */
    424 
    425 
    426 INT
    427 #ifdef weak_function
    428 weak_function
    429 #endif
    430 strtol (const STRING_TYPE *nptr, STRING_TYPE **endptr,
    431         int base LOCALE_PARAM_PROTO)
    432 {
    433   return INTERNAL (strtol) (nptr, endptr, base, 0 LOCALE_PARAM);
    434 }
    435