1 Index: source/common/ucnv2022.cpp 2 =================================================================== 3 --- source/common/ucnv2022.cpp (revision 259715) 4 +++ source/common/ucnv2022.cpp (working copy) 5 @@ -167,13 +167,19 @@ 6 * all versions, not just JIS7 and JIS8. 7 * - ICU does not distinguish between different versions of JIS X 0208. 8 */ 9 +#if UCONFIG_NO_NON_HTML5_CONVERSION 10 +enum { MAX_JA_VERSION=0 }; 11 +#else 12 enum { MAX_JA_VERSION=4 }; 13 +#endif 14 static const uint16_t jpCharsetMasks[MAX_JA_VERSION+1]={ 15 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT), 16 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 17 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212), 18 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 19 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7), 20 CSM(ASCII)|CSM(JISX201)|CSM(JISX208)|CSM(HWKANA_7BIT)|CSM(JISX212)|CSM(GB2312)|CSM(KSC5601)|CSM(ISO8859_1)|CSM(ISO8859_7) 21 +#endif 22 }; 23 24 typedef enum { 25 @@ -361,14 +367,25 @@ 26 }; 27 28 29 +/* Enable ISO-2022-{KR,CN,CN-Ext} for now. 30 + * TODO(jshin): Disable it when we know what to do about 'replacement' 31 + * encodings. See http://crbug.com/277037 and 32 + * https://codereview.chromium.org/145973021/ 33 + */ 34 +#ifndef U_ENABLE_ISO_2022_KR_CN 35 +#define U_ENABLE_ISO_2022_KR_CN 1 36 +#endif 37 + 38 /* Type def for refactoring changeState_2022 code*/ 39 typedef enum{ 40 #ifdef U_ENABLE_GENERIC_ISO_2022 41 ISO_2022=0, 42 #endif 43 ISO_2022_JP=1, 44 +#ifdef U_ENABLE_ISO_2022_KR_CN 45 ISO_2022_KR=2, 46 ISO_2022_CN=3 47 +#endif 48 } Variant2022; 49 50 /*********** ISO 2022 Converter Protos ***********/ 51 @@ -485,24 +502,28 @@ 52 /* prevent indexing beyond jpCharsetMasks[] */ 53 myConverterData->version = version = 0; 54 } 55 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 56 if(jpCharsetMasks[version]&CSM(ISO8859_7)) { 57 myConverterData->myConverterArray[ISO8859_7] = 58 ucnv_loadSharedData("ISO8859_7", &stackPieces, &stackArgs, errorCode); 59 } 60 +#endif 61 myConverterData->myConverterArray[JISX208] = 62 ucnv_loadSharedData("Shift-JIS", &stackPieces, &stackArgs, errorCode); 63 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 64 if(jpCharsetMasks[version]&CSM(JISX212)) { 65 myConverterData->myConverterArray[JISX212] = 66 ucnv_loadSharedData("jisx-212", &stackPieces, &stackArgs, errorCode); 67 } 68 if(jpCharsetMasks[version]&CSM(GB2312)) { 69 myConverterData->myConverterArray[GB2312] = 70 - ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */ 71 + ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode); /* gb_2312_80-1 */ 72 } 73 if(jpCharsetMasks[version]&CSM(KSC5601)) { 74 myConverterData->myConverterArray[KSC5601] = 75 ucnv_loadSharedData("ksc_5601", &stackPieces, &stackArgs, errorCode); 76 } 77 +#endif 78 79 /* set the function pointers to appropriate funtions */ 80 cnv->sharedData=(UConverterSharedData*)(&_ISO2022JPData); 81 @@ -513,6 +534,7 @@ 82 myConverterData->name[len]=(char)(myConverterData->version+(int)'0'); 83 myConverterData->name[len+1]='\0'; 84 } 85 +#ifdef U_ENABLE_ISO_2022_KR_CN 86 else if(myLocale[0]=='k' && (myLocale[1]=='o'|| myLocale[1]=='r') && 87 (myLocale[2]=='_' || myLocale[2]=='\0')) 88 { 89 @@ -558,13 +580,13 @@ 90 91 /* open the required converters and cache them */ 92 myConverterData->myConverterArray[GB2312_1] = 93 - ucnv_loadSharedData("ibm-5478", &stackPieces, &stackArgs, errorCode); 94 + ucnv_loadSharedData("noop-gb2312_gl", &stackPieces, &stackArgs, errorCode); 95 if(version==1) { 96 myConverterData->myConverterArray[ISO_IR_165] = 97 - ucnv_loadSharedData("iso-ir-165", &stackPieces, &stackArgs, errorCode); 98 + ucnv_loadSharedData("noop-iso-ir-165", &stackPieces, &stackArgs, errorCode); 99 } 100 myConverterData->myConverterArray[CNS_11643] = 101 - ucnv_loadSharedData("cns-11643-1992", &stackPieces, &stackArgs, errorCode); 102 + ucnv_loadSharedData("noop-cns-11643", &stackPieces, &stackArgs, errorCode); 103 104 105 /* set the function pointers to appropriate funtions */ 106 @@ -582,6 +604,7 @@ 107 (void)uprv_strcpy(myConverterData->name,"ISO_2022,locale=zh,version=2"); 108 } 109 } 110 +#endif // U_ENABLE_ISO_2022_KR_CN 111 else{ 112 #ifdef U_ENABLE_GENERIC_ISO_2022 113 myConverterData->isFirstBuffer = TRUE; 114 Index: source/common/ucnvbocu.cpp 115 =================================================================== 116 --- source/common/ucnvbocu.cpp (revision 259715) 117 +++ source/common/ucnvbocu.cpp (working copy) 118 @@ -19,7 +19,7 @@ 119 120 #include "unicode/utypes.h" 121 122 -#if !UCONFIG_NO_CONVERSION 123 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 124 125 #include "unicode/ucnv.h" 126 #include "unicode/ucnv_cb.h" 127 Index: source/common/ucnvisci.c 128 =================================================================== 129 --- source/common/ucnvisci.c (revision 259715) 130 +++ source/common/ucnvisci.c (working copy) 131 @@ -17,7 +17,7 @@ 132 133 #include "unicode/utypes.h" 134 135 -#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION 136 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_LEGACY_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 137 138 #include "unicode/ucnv.h" 139 #include "unicode/ucnv_cb.h" 140 Index: source/common/ucnvscsu.c 141 =================================================================== 142 --- source/common/ucnvscsu.c (revision 259715) 143 +++ source/common/ucnvscsu.c (working copy) 144 @@ -21,7 +21,7 @@ 145 146 #include "unicode/utypes.h" 147 148 -#if !UCONFIG_NO_CONVERSION 149 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 150 151 #include "unicode/ucnv.h" 152 #include "unicode/ucnv_cb.h" 153 Index: source/common/ucnv_u7.c 154 =================================================================== 155 --- source/common/ucnv_u7.c (revision 259715) 156 +++ source/common/ucnv_u7.c (working copy) 157 @@ -16,7 +16,7 @@ 158 159 #include "unicode/utypes.h" 160 161 -#if !UCONFIG_NO_CONVERSION 162 +#if !UCONFIG_NO_CONVERSION && !UCONFIG_NO_NON_HTML5_CONVERSION 163 164 #include "unicode/ucnv.h" 165 #include "ucnv_bld.h" 166 Index: source/common/unicode/uconfig.h 167 =================================================================== 168 --- source/common/unicode/uconfig.h (revision 259715) 169 +++ source/common/unicode/uconfig.h (working copy) 170 @@ -265,6 +265,14 @@ 171 #endif 172 173 /** 174 + * This switch turns off all the converters NOT listed in 175 + * the encoding standard : http://encoding.spec.whatwg.org 176 + */ 177 +#ifndef UCONFIG_NO_NON_HTML5_CONVERSION 178 +#define UCONFIG_NO_NON_HTML5_CONVERSION 0 179 +#endif 180 + 181 +/** 182 * \def UCONFIG_NO_LEGACY_CONVERSION 183 * This switch turns off all converters except for 184 * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1) 185 Index: source/common/ucnv_bld.cpp 186 =================================================================== 187 --- source/common/ucnv_bld.cpp (revision 259715) 188 +++ source/common/ucnv_bld.cpp (working copy) 189 @@ -79,16 +79,25 @@ 190 &_HZData, 191 #endif 192 193 +#if UCONFIG_NO_NON_HTML5_CONVERSION 194 + NULL, 195 +#else 196 &_SCSUData, 197 +#endif 198 199 -#if UCONFIG_NO_LEGACY_CONVERSION 200 + 201 +#if UCONFIG_NO_LEGACY_CONVERSION || UCONFIG_NO_NON_HTML5_CONVERSION 202 NULL, 203 #else 204 &_ISCIIData, 205 #endif 206 207 &_ASCIIData, 208 +#if UCONFIG_NO_NON_HTML5_CONVERSION 209 + NULL, NULL, &_UTF16Data, &_UTF32Data, NULL, NULL, 210 +#else 211 &_UTF7Data, &_Bocu1Data, &_UTF16Data, &_UTF32Data, &_CESU8Data, &_IMAPData, 212 +#endif 213 214 #if UCONFIG_NO_LEGACY_CONVERSION 215 NULL, 216 Index: source/common/ucnv_u8.c 217 =================================================================== 218 --- source/common/ucnv_u8.c (revision 259715) 219 +++ source/common/ucnv_u8.c (working copy) 220 @@ -87,6 +87,15 @@ 221 static const uint32_t 222 utf8_minChar32[7]={ 0, 0, 0x80, 0x800, 0x10000, 0xffffffff, 0xffffffff }; 223 224 +static UBool hasCESU8Data(const UConverter *cnv) 225 +{ 226 +#if UCONFIG_NO_NON_HTML5_CONVERSION 227 + return FALSE; 228 +#else 229 + return (UBool)(cnv->sharedData == &_CESU8Data); 230 +#endif 231 +} 232 + 233 static void ucnv_toUnicode_UTF8 (UConverterToUnicodeArgs * args, 234 UErrorCode * err) 235 { 236 @@ -96,10 +105,10 @@ 237 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 238 const UChar *targetLimit = args->targetLimit; 239 unsigned char *toUBytes = cnv->toUBytes; 240 - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); 241 + UBool isCESU8 = hasCESU8Data(cnv); 242 uint32_t ch, ch2 = 0; 243 int32_t i, inBytes; 244 - 245 + 246 /* Restore size of current sequence */ 247 if (cnv->toUnicodeStatus && myTarget < targetLimit) 248 { 249 @@ -226,7 +235,7 @@ 250 const unsigned char *sourceLimit = (unsigned char *) args->sourceLimit; 251 const UChar *targetLimit = args->targetLimit; 252 unsigned char *toUBytes = cnv->toUBytes; 253 - UBool isCESU8 = (UBool)(cnv->sharedData == &_CESU8Data); 254 + UBool isCESU8 = hasCESU8Data(cnv); 255 uint32_t ch, ch2 = 0; 256 int32_t i, inBytes; 257 258 @@ -357,7 +366,7 @@ 259 UChar32 ch; 260 uint8_t tempBuf[4]; 261 int32_t indexToWrite; 262 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); 263 + UBool isNotCESU8 = !hasCESU8Data(cnv); 264 265 if (cnv->fromUChar32 && myTarget < targetLimit) 266 { 267 @@ -473,7 +482,7 @@ 268 int32_t offsetNum, nextSourceIndex; 269 int32_t indexToWrite; 270 uint8_t tempBuf[4]; 271 - UBool isNotCESU8 = (UBool)(cnv->sharedData != &_CESU8Data); 272 + UBool isNotCESU8 = !hasCESU8Data(cnv); 273 274 if (cnv->fromUChar32 && myTarget < targetLimit) 275 { 276 Index: source/common/unicode/urename.h 277 =================================================================== 278 --- source/common/unicode/urename.h (revision 259715) 279 +++ source/common/unicode/urename.h (working copy) 280 @@ -73,12 +73,16 @@ 281 #define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData) 282 #define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign) 283 #define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData) 284 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 285 #define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data) 286 #define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data) 287 +#endif 288 #define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData) 289 #define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData) 290 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 291 #define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData) 292 #define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData) 293 +#endif 294 #define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data) 295 #define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1) 296 #define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11) 297 @@ -94,14 +98,18 @@ 298 #define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8) 299 #define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data) 300 #define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData) 301 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 302 #define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData) 303 +#endif 304 #define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData) 305 #define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data) 306 #define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData) 307 #define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData) 308 #define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data) 309 #define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData) 310 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 311 #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) 312 +#endif 313 #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) 314 #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) 315 #define cmemory_inUse U_ICU_ENTRY_POINT_RENAME(cmemory_inUse) 316 Index: source/common/ucnv_cnv.h 317 =================================================================== 318 --- source/common/ucnv_cnv.h (revision 259715) 319 +++ source/common/ucnv_cnv.h (working copy) 320 @@ -259,8 +259,13 @@ 321 _ISO2022Data, 322 _LMBCSData1,_LMBCSData2, _LMBCSData3, _LMBCSData4, _LMBCSData5, _LMBCSData6, 323 _LMBCSData8,_LMBCSData11,_LMBCSData16,_LMBCSData17,_LMBCSData18,_LMBCSData19, 324 +#if !UCONFIG_NO_NON_HTML5_CONVERSION 325 _HZData,_ISCIIData, _SCSUData, _ASCIIData, 326 _UTF7Data, _Bocu1Data, _UTF16Data, _UTF32Data, _CESU8Data, _IMAPData, _CompoundTextData; 327 +#else 328 + _HZData, _ASCIIData, 329 + _UTF16Data, _UTF32Data, _CompoundTextData; 330 +#endif 331 332 U_CDECL_END 333 334