1 From 4cc54e7dcd19f78a3ebaaa3c6f05885029c53b01 Mon Sep 17 00:00:00 2001 2 From: David 'Digit' Turner <digit (a] google.com> 3 Date: Wed, 12 Feb 2014 20:04:18 +0800 4 Subject: [PATCH 01/12] android: Add locale support. 5 6 This is based on the Bionic <ctype.h> declarations. Note that 7 unfortunately, the _ctype_ table exposed by this header has a bug 8 so a fixed copy is included here instead. 9 10 See src/support/android/locale_android.cpp for details. 11 --- 12 include/__locale | 17 +++++- 13 src/locale.cpp | 25 +++++++- 14 src/support/android/locale_android.cpp | 101 +++++++++++++++++++++++++++++++++ 15 3 files changed, 140 insertions(+), 3 deletions(-) 16 create mode 100644 src/support/android/locale_android.cpp 17 18 diff --git a/include/__locale b/include/__locale 19 index fb5b196..c793cfe 100644 20 --- a/include/__locale 21 +++ b/include/__locale 22 @@ -375,7 +375,20 @@ public: 23 static const mask punct = _ISPUNCT; 24 static const mask xdigit = _ISXDIGIT; 25 static const mask blank = _ISBLANK; 26 -#else // __GLIBC__ || _WIN32 || __APPLE__ || __FreeBSD__ || __EMSCRIPTEN__ || __sun__ 27 +#elif defined(__ANDROID__) 28 + typedef unsigned short mask; 29 + static const mask space = _S; 30 + static const mask print = _P | _U | _L | _N | _B; 31 + static const mask cntrl = _C; 32 + static const mask upper = _U; 33 + static const mask lower = _L; 34 + static const mask alpha = _U | _L; 35 + static const mask digit = _N; 36 + static const mask punct = _P; 37 + static const mask xdigit = _N | _X; 38 + // See src/support/android/locale_android.cpp for details! 39 + static const mask blank = 0x100; 40 +#else // __ANDROID__ 41 typedef unsigned long mask; 42 static const mask space = 1<<0; 43 static const mask print = 1<<1; 44 @@ -387,7 +400,7 @@ public: 45 static const mask punct = 1<<7; 46 static const mask xdigit = 1<<8; 47 static const mask blank = 1<<9; 48 -#endif // __GLIBC__ || _WIN32 || __APPLE__ || __FreeBSD__ 49 +#endif // __GLIBC__ || _WIN32 || __APPLE__ || __FreeBSD__ || __EMSCRIPTEN__ || __sun__ || __ANDROID__ 50 static const mask alnum = alpha | digit; 51 static const mask graph = alnum | punct; 52 53 diff --git a/src/locale.cpp b/src/locale.cpp 54 index 4877f2b..66aaca9 100644 55 --- a/src/locale.cpp 56 +++ b/src/locale.cpp 57 @@ -814,6 +814,8 @@ ctype<wchar_t>::do_toupper(char_type c) const 58 return isascii(c) ? _DefaultRuneLocale.__mapupper[c] : c; 59 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) 60 return isascii(c) ? ctype<char>::__classic_upper_table()[c] : c; 61 +#elif defined(__ANDROID__) 62 + return isascii(c) ? _toupper_tab_[c + 1] : c; 63 #else 64 return (isascii(c) && iswlower_l(c, __cloc())) ? c-L'a'+L'A' : c; 65 #endif 66 @@ -828,6 +830,8 @@ ctype<wchar_t>::do_toupper(char_type* low, const char_type* high) const 67 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) 68 *low = isascii(*low) ? ctype<char>::__classic_upper_table()[*low] 69 : *low; 70 +#elif defined(__ANDROID__) 71 + *low = isascii(*low) ? _toupper_tab_[*low + 1] : *low; 72 #else 73 *low = (isascii(*low) && islower_l(*low, __cloc())) ? (*low-L'a'+L'A') : *low; 74 #endif 75 @@ -841,6 +845,8 @@ ctype<wchar_t>::do_tolower(char_type c) const 76 return isascii(c) ? _DefaultRuneLocale.__maplower[c] : c; 77 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) 78 return isascii(c) ? ctype<char>::__classic_lower_table()[c] : c; 79 +#elif defined(__ANDROID__) 80 + return isascii(c) ? _tolower_tab_[c + 1] : c; 81 #else 82 return (isascii(c) && isupper_l(c, __cloc())) ? c-L'A'+'a' : c; 83 #endif 84 @@ -855,6 +861,8 @@ ctype<wchar_t>::do_tolower(char_type* low, const char_type* high) const 85 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) 86 *low = isascii(*low) ? ctype<char>::__classic_lower_table()[*low] 87 : *low; 88 +#elif defined(__ANDROID__) 89 + *low = isascii(*low) ? _tolower_tab_[*low + 1] : *low; 90 #else 91 *low = (isascii(*low) && isupper_l(*low, __cloc())) ? *low-L'A'+L'a' : *low; 92 #endif 93 @@ -924,6 +932,8 @@ ctype<char>::do_toupper(char_type c) const 94 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) 95 return isascii(c) ? 96 static_cast<char>(__classic_upper_table()[static_cast<unsigned char>(c)]) : c; 97 +#elif defined(__ANDROID__) 98 + return isascii(c) ? _toupper_tab_[c + 1] : c; 99 #else 100 return (isascii(c) && islower_l(c, __cloc())) ? c-'a'+'A' : c; 101 #endif 102 @@ -941,6 +951,8 @@ ctype<char>::do_toupper(char_type* low, const char_type* high) const 103 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) 104 *low = isascii(*low) ? 105 static_cast<char>(__classic_upper_table()[static_cast<size_t>(*low)]) : *low; 106 +#elif defined(__ANDROID__) 107 + *low = isascii(*low) ? _toupper_tab_[*low + 1] : *low; 108 #else 109 *low = (isascii(*low) && islower_l(*low, __cloc())) ? *low-'a'+'A' : *low; 110 #endif 111 @@ -958,6 +970,8 @@ ctype<char>::do_tolower(char_type c) const 112 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) 113 return isascii(c) ? 114 static_cast<char>(__classic_lower_table()[static_cast<size_t>(c)]) : c; 115 +#elif defined(__ANDROID__) 116 + return isascii(c) ? _tolower_tab_[c + 1] : c; 117 #else 118 return (isascii(c) && isupper_l(c, __cloc())) ? c-'A'+'a' : c; 119 #endif 120 @@ -973,6 +987,8 @@ ctype<char>::do_tolower(char_type* low, const char_type* high) const 121 *low = static_cast<char>(__classic_lower_table()[static_cast<unsigned char>(*low)]); 122 #elif defined(__GLIBC__) || defined(__EMSCRIPTEN__) 123 *low = isascii(*low) ? static_cast<char>(__classic_lower_table()[static_cast<size_t>(*low)]) : *low; 124 +#elif defined(__ANDROID__) 125 + *low = isascii(*low) ? _tolower_tab_[*low + 1] : *low; 126 #else 127 *low = (isascii(*low) && isupper_l(*low, __cloc())) ? *low-'A'+'a' : *low; 128 #endif 129 @@ -1018,6 +1034,11 @@ extern "C" const int ** __ctype_tolower_loc(); 130 extern "C" const int ** __ctype_toupper_loc(); 131 #endif 132 133 +#if defined(__ANDROID__) 134 +// See src/support/android/android_locale.cpp 135 +extern "C" const unsigned short* const _ctype_android; 136 +#endif 137 + 138 const ctype<char>::mask* 139 ctype<char>::classic_table() _NOEXCEPT 140 { 141 @@ -1035,6 +1056,8 @@ ctype<char>::classic_table() _NOEXCEPT 142 // going to end up dereferencing it later... 143 #elif defined(__EMSCRIPTEN__) 144 return *__ctype_b_loc(); 145 +#elif defined(__ANDROID__) 146 + return _ctype_android; 147 #elif defined(_AIX) 148 return (const unsigned int *)__lc_ctype_ptr->obj->mask; 149 #else 150 @@ -1422,7 +1445,7 @@ locale::id codecvt<wchar_t, char, mbstate_t>::id; 151 152 codecvt<wchar_t, char, mbstate_t>::codecvt(size_t refs) 153 : locale::facet(refs), 154 - __l(_LIBCPP_GET_C_LOCALE) 155 + __l(0) 156 { 157 } 158 159 diff --git a/src/support/android/locale_android.cpp b/src/support/android/locale_android.cpp 160 new file mode 100644 161 index 0000000..7193028 162 --- /dev/null 163 +++ b/src/support/android/locale_android.cpp 164 @@ -0,0 +1,101 @@ 165 +// -*- C++ -*- 166 +//===-------------------- support/win32/locale_win32.cpp ------------------===// 167 +// 168 +// The LLVM Compiler Infrastructure 169 +// 170 +// This file is dual licensed under the MIT and the University of Illinois Open 171 +// Source Licenses. See LICENSE.TXT for details. 172 +// 173 +//===----------------------------------------------------------------------===// 174 + 175 +#include <ctype.h> 176 + 177 +// Bionic exports the non-standard _ctype_ array in <ctype.h>, 178 +// unfortunately, cannot be used directly for libc++ because it doesn't 179 +// have a proper bit-flag for blank characters. 180 +// 181 +// Note that the header does define a _B flag (as 0x80), but it 182 +// is only set on the space (32) character, and used to implement 183 +// isprint() properly. The implementation of isblank() relies on 184 +// direct comparisons with 9 and 32 instead. 185 +// 186 +// The following is a local copy of the Bionic _ctype_ array that has 187 +// been modified in the following way: 188 +// 189 +// - It stores 16-bit unsigned values, instead of 8-bit char ones. 190 +// 191 +// - Bit flag _BLANK (0x100) is used to indicate blank characters. 192 +// It is only set for indices 9 (TAB) and 32 (SPACE). 193 +// 194 +// - Support signed char properly for indexing. 195 + 196 +// Used to tag blank characters, this doesn't appear in <ctype.h> nor 197 +// the original Bionic _ctype_ array. 198 +#define _BLANK 0x100 199 + 200 +// NOTE: A standalone forward declaration is required to ensure that this 201 +// variable is properly exported with a C name. In other words, this does 202 +// _not_ work: 203 +// 204 +// extern "C" { 205 +// const char* const _ctype_android = ...; 206 +// } 207 +// 208 +extern "C" const unsigned short* const _ctype_android; 209 + 210 +static const unsigned short ctype_android_tab[256+128] = { 211 + /* -128..-1 */ 212 + _C, _C, _C, _C, _C, _C, _C, _C, /* 80 */ 213 + _C, _C, _C, _C, _C, _C, _C, _C, /* 88 */ 214 + _C, _C, _C, _C, _C, _C, _C, _C, /* 90 */ 215 + _C, _C, _C, _C, _C, _C, _C, _C, /* 98 */ 216 + _P, _P, _P, _P, _P, _P, _P, _P, /* A0 */ 217 + _P, _P, _P, _P, _P, _P, _P, _P, /* A8 */ 218 + _P, _P, _P, _P, _P, _P, _P, _P, /* B0 */ 219 + _P, _P, _P, _P, _P, _P, _P, _P, /* B8 */ 220 + _P, _P, _P, _P, _P, _P, _P, _P, /* C0 */ 221 + _P, _P, _P, _P, _P, _P, _P, _P, /* C8 */ 222 + _P, _P, _P, _P, _P, _P, _P, _P, /* D0 */ 223 + _P, _P, _P, _P, _P, _P, _P, _P, /* D8 */ 224 + _P, _P, _P, _P, _P, _P, _P, _P, /* E0 */ 225 + _P, _P, _P, _P, _P, _P, _P, _P, /* E8 */ 226 + _P, _P, _P, _P, _P, _P, _P, _P, /* F0 */ 227 + _P, _P, _P, _P, _P, _P, _P, _P, /* F8 */ 228 + /* 0..127 */ 229 + _C, _C, _C, _C, _C, _C, _C, _C, 230 + _C, _C|_S|_BLANK, _C|_S, _C|_S, _C|_S, _C|_S, _C, _C, 231 + _C, _C, _C, _C, _C, _C, _C, _C, 232 + _C, _C, _C, _C, _C, _C, _C, _C, 233 + _S|_B|_BLANK, _P, _P, _P, _P, _P, _P, _P, 234 + _P, _P, _P, _P, _P, _P, _P, _P, 235 + _N, _N, _N, _N, _N, _N, _N, _N, 236 + _N, _N, _P, _P, _P, _P, _P, _P, 237 + _P, _U|_X, _U|_X, _U|_X, _U|_X, _U|_X, _U|_X, _U, 238 + _U, _U, _U, _U, _U, _U, _U, _U, 239 + _U, _U, _U, _U, _U, _U, _U, _U, 240 + _U, _U, _U, _P, _P, _P, _P, _P, 241 + _P, _L|_X, _L|_X, _L|_X, _L|_X, _L|_X, _L|_X, _L, 242 + _L, _L, _L, _L, _L, _L, _L, _L, 243 + _L, _L, _L, _L, _L, _L, _L, _L, 244 + /* determine printability based on the IS0 8859 8-bit standard */ 245 + _L, _L, _L, _P, _P, _P, _P, _C, 246 + /* 128..255, same as -128..127 */ 247 + _C, _C, _C, _C, _C, _C, _C, _C, /* 80 */ 248 + _C, _C, _C, _C, _C, _C, _C, _C, /* 88 */ 249 + _C, _C, _C, _C, _C, _C, _C, _C, /* 90 */ 250 + _C, _C, _C, _C, _C, _C, _C, _C, /* 98 */ 251 + _P, _P, _P, _P, _P, _P, _P, _P, /* A0 */ 252 + _P, _P, _P, _P, _P, _P, _P, _P, /* A8 */ 253 + _P, _P, _P, _P, _P, _P, _P, _P, /* B0 */ 254 + _P, _P, _P, _P, _P, _P, _P, _P, /* B8 */ 255 + _P, _P, _P, _P, _P, _P, _P, _P, /* C0 */ 256 + _P, _P, _P, _P, _P, _P, _P, _P, /* C8 */ 257 + _P, _P, _P, _P, _P, _P, _P, _P, /* D0 */ 258 + _P, _P, _P, _P, _P, _P, _P, _P, /* D8 */ 259 + _P, _P, _P, _P, _P, _P, _P, _P, /* E0 */ 260 + _P, _P, _P, _P, _P, _P, _P, _P, /* E8 */ 261 + _P, _P, _P, _P, _P, _P, _P, _P, /* F0 */ 262 + _P, _P, _P, _P, _P, _P, _P, _P, /* F8 */ 263 +}; 264 + 265 +const unsigned short* const _ctype_android = ctype_android_tab + 128; 266 -- 267 1.9.1.423.g4596e3a 268 269