1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2003-2011, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: nptrans.h 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2003feb1 16 * created by: Ram Viswanadha 17 */ 18 19 #ifndef NPTRANS_H 20 #define NPTRANS_H 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_IDNA 25 #if !UCONFIG_NO_TRANSLITERATION 26 27 #include "unicode/uniset.h" 28 #include "unicode/ures.h" 29 #include "unicode/translit.h" 30 31 #include "intltest.h" 32 33 34 #define ASCII_SPACE 0x0020 35 36 class NamePrepTransform { 37 38 private : 39 Transliterator *mapping; 40 UnicodeSet unassigned; 41 UnicodeSet prohibited; 42 UnicodeSet labelSeparatorSet; 43 UResourceBundle *bundle; 44 NamePrepTransform(UParseError& parseError, UErrorCode& status); 45 46 47 public : 48 49 static NamePrepTransform* createInstance(UParseError& parseError, UErrorCode& status); 50 51 virtual ~NamePrepTransform(); 52 53 54 inline UBool isProhibited(UChar32 ch); 55 56 /** 57 * ICU "poor man's RTTI", returns a UClassID for the actual class. 58 */ 59 inline UClassID getDynamicClassID() const { return getStaticClassID(); } 60 61 /** 62 * ICU "poor man's RTTI", returns a UClassID for this class. 63 */ 64 static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; } 65 66 /** 67 * Map every character in input stream with mapping character 68 * in the mapping table and populate the output stream. 69 * For any individual character the mapping table may specify 70 * that that a character be mapped to nothing, mapped to one 71 * other character or to a string of other characters. 72 * 73 * @param src Pointer to UChar buffer containing a single label 74 * @param srcLength Number of characters in the source label 75 * @param dest Pointer to the destination buffer to receive the output 76 * @param destCapacity The capacity of destination array 77 * @param allowUnassigned Unassigned values can be converted to ASCII for query operations 78 * If TRUE unassigned values are treated as normal Unicode code point. 79 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 80 * @param status ICU error code in/out parameter. 81 * Must fulfill U_SUCCESS before the function call. 82 * @return The number of UChars in the destination buffer 83 */ 84 int32_t map(const UChar* src, int32_t srcLength, 85 UChar* dest, int32_t destCapacity, 86 UBool allowUnassigned, 87 UParseError* parseError, 88 UErrorCode& status ); 89 90 /** 91 * Prepare the input stream with for use. This operation maps, normalizes(NFKC), 92 * checks for prohited and BiDi characters in the order defined by RFC 3454 93 * 94 * @param src Pointer to UChar buffer containing a single label 95 * @param srcLength Number of characters in the source label 96 * @param dest Pointer to the destination buffer to receive the output 97 * @param destCapacity The capacity of destination array 98 * @param allowUnassigned Unassigned values can be converted to ASCII for query operations 99 * If TRUE unassigned values are treated as normal Unicode code point. 100 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code. 101 * @param status ICU error code in/out parameter. 102 * Must fulfill U_SUCCESS before the function call. 103 * @return The number of UChars in the destination buffer 104 */ 105 int32_t process(const UChar* src, int32_t srcLength, 106 UChar* dest, int32_t destCapacity, 107 UBool allowUnassigned, 108 UParseError* parseError, 109 UErrorCode& status ); 110 111 /** 112 * Ascertain if the given code point is a label separator as specified by IDNA 113 * 114 * @return TRUE is the code point is a label separator 115 */ 116 UBool isLabelSeparator(UChar32 ch, UErrorCode& status); 117 118 inline UBool isLDHChar(UChar32 ch); 119 120 private: 121 /** 122 * The address of this static class variable serves as this class's ID 123 * for ICU "poor man's RTTI". 124 */ 125 static const char fgClassID; 126 }; 127 128 inline UBool NamePrepTransform::isLDHChar(UChar32 ch){ 129 // high runner case 130 if(ch>0x007A){ 131 return FALSE; 132 } 133 //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] 134 if( (ch==0x002D) || 135 (0x0030 <= ch && ch <= 0x0039) || 136 (0x0041 <= ch && ch <= 0x005A) || 137 (0x0061 <= ch && ch <= 0x007A) 138 ){ 139 return TRUE; 140 } 141 return FALSE; 142 } 143 144 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 145 #else 146 class NamePrepTransform { 147 }; 148 #endif /* #if !UCONFIG_NO_IDNA */ 149 150 #endif 151 152 /* 153 * Hey, Emacs, please set the following: 154 * 155 * Local Variables: 156 * indent-tabs-mode: nil 157 * End: 158 * 159 */ 160