Home | History | Annotate | Download | only in wtf
      1 /*
      2  * Copyright (C) 2007, 2008, 2009, 2011 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  *
      8  * 1.  Redistributions of source code must retain the above copyright
      9  *     notice, this list of conditions and the following disclaimer.
     10  * 2.  Redistributions in binary form must reproduce the above copyright
     11  *     notice, this list of conditions and the following disclaimer in the
     12  *     documentation and/or other materials provided with the distribution.
     13  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
     14  *     its contributors may be used to endorse or promote products derived
     15  *     from this software without specific prior written permission.
     16  *
     17  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
     18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     19  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     20  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
     21  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     22  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     23  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     24  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #ifndef WTF_ASCIICType_h
     30 #define WTF_ASCIICType_h
     31 
     32 #include "wtf/Assertions.h"
     33 
     34 // The behavior of many of the functions in the <ctype.h> header is dependent
     35 // on the current locale. But in the WebKit project, all uses of those functions
     36 // are in code processing something that's not locale-specific. These equivalents
     37 // for some of the <ctype.h> functions are named more explicitly, not dependent
     38 // on the C library locale, and we should also optimize them as needed.
     39 
     40 // All functions return false or leave the character unchanged if passed a character
     41 // that is outside the range 0-7F. So they can be used on Unicode strings or
     42 // characters if the intent is to do processing only if the character is ASCII.
     43 
     44 namespace WTF {
     45 
     46 template<typename CharType> inline bool isASCII(CharType c)
     47 {
     48     return !(c & ~0x7F);
     49 }
     50 
     51 template<typename CharType> inline bool isASCIIAlpha(CharType c)
     52 {
     53     return (c | 0x20) >= 'a' && (c | 0x20) <= 'z';
     54 }
     55 
     56 template<typename CharType> inline bool isASCIIDigit(CharType c)
     57 {
     58     return c >= '0' && c <= '9';
     59 }
     60 
     61 template<typename CharType> inline bool isASCIIAlphanumeric(CharType c)
     62 {
     63     return isASCIIDigit(c) || isASCIIAlpha(c);
     64 }
     65 
     66 template<typename CharType> inline bool isASCIIHexDigit(CharType c)
     67 {
     68     return isASCIIDigit(c) || ((c | 0x20) >= 'a' && (c | 0x20) <= 'f');
     69 }
     70 
     71 template<typename CharType> inline bool isASCIILower(CharType c)
     72 {
     73     return c >= 'a' && c <= 'z';
     74 }
     75 
     76 template<typename CharType> inline bool isASCIIOctalDigit(CharType c)
     77 {
     78     return (c >= '0') & (c <= '7');
     79 }
     80 
     81 template<typename CharType> inline bool isASCIIPrintable(CharType c)
     82 {
     83     return c >= ' ' && c <= '~';
     84 }
     85 
     86 /*
     87  Statistics from a run of Apple's page load test for callers of isASCIISpace:
     88 
     89  character          count
     90  ---------          -----
     91  non-spaces         689383
     92  20  space          294720
     93  0A  \n             89059
     94  09  \t             28320
     95  0D  \r             0
     96  0C  \f             0
     97  0B  \v             0
     98  */
     99 template<typename CharType> inline bool isASCIISpace(CharType c)
    100 {
    101     return c <= ' ' && (c == ' ' || (c <= 0xD && c >= 0x9));
    102 }
    103 
    104 template<typename CharType> inline bool isASCIIUpper(CharType c)
    105 {
    106     return c >= 'A' && c <= 'Z';
    107 }
    108 
    109 template<typename CharType> inline CharType toASCIILower(CharType c)
    110 {
    111 #if defined(_MSC_FULL_VER) && _MSC_FULL_VER == 170060610
    112     // Make a workaround for VS2012 update 3 optimizer bug, remove once VS2012 fix it.
    113     return (c >= 'A' && c <= 'Z') ? c + 0x20 : c;
    114 #else
    115     return c | ((c >= 'A' && c <= 'Z') << 5);
    116 #endif
    117 }
    118 
    119 template<typename CharType> inline CharType toASCIILowerUnchecked(CharType character)
    120 {
    121     // This function can be used for comparing any input character
    122     // to a lowercase English character. The isASCIIAlphaCaselessEqual
    123     // below should be used for regular comparison of ASCII alpha
    124     // characters, but switch statements in CSS tokenizer require
    125     // direct use of this function.
    126     return character | 0x20;
    127 }
    128 
    129 template<typename CharType> inline CharType toASCIIUpper(CharType c)
    130 {
    131     return c & ~((c >= 'a' && c <= 'z') << 5);
    132 }
    133 
    134 template<typename CharType> inline int toASCIIHexValue(CharType c)
    135 {
    136     ASSERT(isASCIIHexDigit(c));
    137     return c < 'A' ? c - '0' : (c - 'A' + 10) & 0xF;
    138 }
    139 
    140 template<typename CharType> inline int toASCIIHexValue(CharType upperValue, CharType lowerValue)
    141 {
    142     ASSERT(isASCIIHexDigit(upperValue) && isASCIIHexDigit(lowerValue));
    143     return ((toASCIIHexValue(upperValue) << 4) & 0xF0) | toASCIIHexValue(lowerValue);
    144 }
    145 
    146 inline char lowerNibbleToASCIIHexDigit(char c)
    147 {
    148     char nibble = c & 0xF;
    149     return nibble < 10 ? '0' + nibble : 'A' + nibble - 10;
    150 }
    151 
    152 inline char upperNibbleToASCIIHexDigit(char c)
    153 {
    154     char nibble = (c >> 4) & 0xF;
    155     return nibble < 10 ? '0' + nibble : 'A' + nibble - 10;
    156 }
    157 
    158 template<typename CharType> inline bool isASCIIAlphaCaselessEqual(CharType cssCharacter, char character)
    159 {
    160     // This function compares a (preferrably) constant ASCII
    161     // lowercase letter to any input character.
    162     ASSERT(character >= 'a' && character <= 'z');
    163     return LIKELY(toASCIILowerUnchecked(cssCharacter) == character);
    164 }
    165 
    166 }
    167 
    168 using WTF::isASCII;
    169 using WTF::isASCIIAlpha;
    170 using WTF::isASCIIAlphanumeric;
    171 using WTF::isASCIIDigit;
    172 using WTF::isASCIIHexDigit;
    173 using WTF::isASCIILower;
    174 using WTF::isASCIIOctalDigit;
    175 using WTF::isASCIIPrintable;
    176 using WTF::isASCIISpace;
    177 using WTF::isASCIIUpper;
    178 using WTF::toASCIIHexValue;
    179 using WTF::toASCIILower;
    180 using WTF::toASCIILowerUnchecked;
    181 using WTF::toASCIIUpper;
    182 using WTF::lowerNibbleToASCIIHexDigit;
    183 using WTF::upperNibbleToASCIIHexDigit;
    184 using WTF::isASCIIAlphaCaselessEqual;
    185 
    186 #endif
    187