Home | History | Annotate | Download | only in wtf
      1 /*
      2  * Copyright (C) 2007, 2008, 2009 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  *
      8  * 1.  Redistributions of source code must retain the above copyright
      9  *     notice, this list of conditions and the following disclaimer.
     10  * 2.  Redistributions in binary form must reproduce the above copyright
     11  *     notice, this list of conditions and the following disclaimer in the
     12  *     documentation and/or other materials provided with the distribution.
     13  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
     14  *     its contributors may be used to endorse or promote products derived
     15  *     from this software without specific prior written permission.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
     18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     20  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
     21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #ifndef WTF_ASCIICType_h
     30 #define WTF_ASCIICType_h
     31 
     32 #include <wtf/Assertions.h>
     33 
     34 // The behavior of many of the functions in the <ctype.h> header is dependent
     35 // on the current locale. But in the WebKit project, all uses of those functions
     36 // are in code processing something that's not locale-specific. These equivalents
     37 // for some of the <ctype.h> functions are named more explicitly, not dependent
     38 // on the C library locale, and we should also optimize them as needed.
     39 
     40 // All functions return false or leave the character unchanged if passed a character
     41 // that is outside the range 0-7F. So they can be used on Unicode strings or
     42 // characters if the intent is to do processing only if the character is ASCII.
     43 
     44 namespace WTF {
     45 
     46     inline bool isASCII(char c) { return !(c & ~0x7F); }
     47     inline bool isASCII(unsigned short c) { return !(c & ~0x7F); }
     48 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
     49     inline bool isASCII(wchar_t c) { return !(c & ~0x7F); }
     50 #endif
     51     inline bool isASCII(int c) { return !(c & ~0x7F); }
     52     inline bool isASCII(unsigned c) { return !(c & ~0x7F); }
     53 
     54     inline bool isASCIIAlpha(char c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
     55     inline bool isASCIIAlpha(unsigned short c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
     56 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
     57     inline bool isASCIIAlpha(wchar_t c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
     58 #endif
     59     inline bool isASCIIAlpha(int c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
     60     inline bool isASCIIAlpha(unsigned c) { return (c | 0x20) >= 'a' && (c | 0x20) <= 'z'; }
     61 
     62     inline bool isASCIIAlphanumeric(char c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
     63     inline bool isASCIIAlphanumeric(unsigned short c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
     64 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
     65     inline bool isASCIIAlphanumeric(wchar_t c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
     66 #endif
     67     inline bool isASCIIAlphanumeric(int c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
     68     inline bool isASCIIAlphanumeric(unsigned c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'z'); }
     69 
     70     inline bool isASCIIDigit(char c) { return (c >= '0') & (c <= '9'); }
     71     inline bool isASCIIDigit(unsigned short c) { return (c >= '0') & (c <= '9'); }
     72 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
     73     inline bool isASCIIDigit(wchar_t c) { return (c >= '0') & (c <= '9'); }
     74 #endif
     75     inline bool isASCIIDigit(int c) { return (c >= '0') & (c <= '9'); }
     76     inline bool isASCIIDigit(unsigned c) { return (c >= '0') & (c <= '9'); }
     77 
     78     inline bool isASCIIHexDigit(char c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
     79     inline bool isASCIIHexDigit(unsigned short c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
     80 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
     81     inline bool isASCIIHexDigit(wchar_t c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
     82 #endif
     83     inline bool isASCIIHexDigit(int c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
     84     inline bool isASCIIHexDigit(unsigned c) { return (c >= '0' && c <= '9') || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f'); }
     85 
     86     inline bool isASCIIOctalDigit(char c) { return (c >= '0') & (c <= '7'); }
     87     inline bool isASCIIOctalDigit(unsigned short c) { return (c >= '0') & (c <= '7'); }
     88 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
     89     inline bool isASCIIOctalDigit(wchar_t c) { return (c >= '0') & (c <= '7'); }
     90 #endif
     91     inline bool isASCIIOctalDigit(int c) { return (c >= '0') & (c <= '7'); }
     92     inline bool isASCIIOctalDigit(unsigned c) { return (c >= '0') & (c <= '7'); }
     93 
     94     inline bool isASCIILower(char c) { return c >= 'a' && c <= 'z'; }
     95     inline bool isASCIILower(unsigned short c) { return c >= 'a' && c <= 'z'; }
     96 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
     97     inline bool isASCIILower(wchar_t c) { return c >= 'a' && c <= 'z'; }
     98 #endif
     99     inline bool isASCIILower(int c) { return c >= 'a' && c <= 'z'; }
    100     inline bool isASCIILower(unsigned c) { return c >= 'a' && c <= 'z'; }
    101 
    102     inline bool isASCIIUpper(char c) { return c >= 'A' && c <= 'Z'; }
    103     inline bool isASCIIUpper(unsigned short c) { return c >= 'A' && c <= 'Z'; }
    104 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
    105     inline bool isASCIIUpper(wchar_t c) { return c >= 'A' && c <= 'Z'; }
    106 #endif
    107     inline bool isASCIIUpper(int c) { return c >= 'A' && c <= 'Z'; }
    108     inline bool isASCIIUpper(unsigned c) { return c >= 'A' && c <= 'Z'; }
    109 
    110     /*
    111         Statistics from a run of Apple's page load test for callers of isASCIISpace:
    112 
    113             character          count
    114             ---------          -----
    115             non-spaces         689383
    116         20  space              294720
    117         0A  \n                 89059
    118         09  \t                 28320
    119         0D  \r                 0
    120         0C  \f                 0
    121         0B  \v                 0
    122     */
    123     inline bool isASCIISpace(char c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
    124     inline bool isASCIISpace(unsigned short c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
    125 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
    126     inline bool isASCIISpace(wchar_t c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
    127 #endif
    128     inline bool isASCIISpace(int c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
    129     inline bool isASCIISpace(unsigned c) { return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9)); }
    130 
    131     inline char toASCIILower(char c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
    132     inline unsigned short toASCIILower(unsigned short c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
    133 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
    134     inline wchar_t toASCIILower(wchar_t c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
    135 #endif
    136     inline int toASCIILower(int c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
    137     inline unsigned toASCIILower(unsigned c) { return c | ((c >= 'A' && c <= 'Z') << 5); }
    138 
    139     // FIXME: Why do these need static_cast?
    140     inline char toASCIIUpper(char c) { return static_cast<char>(c & ~((c >= 'a' && c <= 'z') << 5)); }
    141     inline unsigned short toASCIIUpper(unsigned short c) { return static_cast<unsigned short>(c & ~((c >= 'a' && c <= 'z') << 5)); }
    142 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
    143     inline wchar_t toASCIIUpper(wchar_t c) { return static_cast<wchar_t>(c & ~((c >= 'a' && c <= 'z') << 5)); }
    144 #endif
    145     inline int toASCIIUpper(int c) { return static_cast<int>(c & ~((c >= 'a' && c <= 'z') << 5)); }
    146     inline unsigned toASCIIUpper(unsigned c) { return static_cast<unsigned>(c & ~((c >= 'a' && c <= 'z') << 5)); }
    147 
    148     inline int toASCIIHexValue(char c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
    149     inline int toASCIIHexValue(unsigned short c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
    150 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
    151     inline int toASCIIHexValue(wchar_t c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
    152 #endif
    153     inline int toASCIIHexValue(int c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
    154     inline int toASCIIHexValue(unsigned c) { ASSERT(isASCIIHexDigit(c)); return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF; }
    155 
    156     inline bool isASCIIPrintable(char c) { return c >= ' ' && c <= '~'; }
    157     inline bool isASCIIPrintable(unsigned short c) { return c >= ' ' && c <= '~'; }
    158 #if !COMPILER(MSVC) || defined(_NATIVE_WCHAR_T_DEFINED)
    159     inline bool isASCIIPrintable(wchar_t c) { return c >= ' ' && c <= '~'; }
    160 #endif
    161     inline bool isASCIIPrintable(int c) { return c >= ' ' && c <= '~'; }
    162     inline bool isASCIIPrintable(unsigned c) { return c >= ' ' && c <= '~'; }
    163 }
    164 
    165 using WTF::isASCII;
    166 using WTF::isASCIIAlpha;
    167 using WTF::isASCIIAlphanumeric;
    168 using WTF::isASCIIDigit;
    169 using WTF::isASCIIHexDigit;
    170 using WTF::isASCIILower;
    171 using WTF::isASCIIOctalDigit;
    172 using WTF::isASCIIPrintable;
    173 using WTF::isASCIISpace;
    174 using WTF::isASCIIUpper;
    175 using WTF::toASCIIHexValue;
    176 using WTF::toASCIILower;
    177 using WTF::toASCIIUpper;
    178 
    179 #endif
    180