1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2003-2011, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: nptrans.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2003feb1 14 * created by: Ram Viswanadha 15 */ 16 17 #ifndef NPTRANS_H 18 #define NPTRANS_H 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_IDNA 23 #if !UCONFIG_NO_TRANSLITERATION 24 25 #include "unicode/uniset.h" 26 #include "unicode/ures.h" 27 #include "unicode/translit.h" 28 29 #include "intltest.h" 30 31 32 #define ASCII_SPACE 0x0020 33 34 class NamePrepTransform { 35 36 private : 37 Transliterator *mapping; 38 UnicodeSet unassigned; 39 UnicodeSet prohibited; 40 UnicodeSet labelSeparatorSet; 41 UResourceBundle *bundle; 42 NamePrepTransform(UParseError& parseError, UErrorCode& status); 43 44 45 public : 46 47 static NamePrepTransform* createInstance(UParseError& parseError, UErrorCode& status); 48 49 virtual ~NamePrepTransform(); 50 51 52 inline UBool isProhibited(UChar32 ch); 53 54 /** 55 * ICU "poor man's RTTI", returns a UClassID for the actual class. 56 */ 57 inline UClassID getDynamicClassID() const { return getStaticClassID(); } 58 59 /** 60 * ICU "poor man's RTTI", returns a UClassID for this class. 61 */ 62 static inline UClassID getStaticClassID() { return (UClassID)&fgClassID; } 63 64 /** 65 * Map every character in input stream with mapping character 66 * in the mapping table and populate the output stream. 67 * For any individual character the mapping table may specify 68 * that that a character be mapped to nothing, mapped to one 69 * other character or to a string of other characters. 70 * 71 * @param src Pointer to UChar buffer containing a single label 72 * @param srcLength Number of characters in the source label 73 * @param dest Pointer to the destination buffer to receive the output 74 * @param destCapacity The capacity of destination array 75 * @param allowUnassigned Unassigned values can be converted to ASCII for query operations 76 * If TRUE unassigned values are treated as normal Unicode code point. 77 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 78 * @param status ICU error code in/out parameter. 79 * Must fulfill U_SUCCESS before the function call. 80 * @return The number of UChars in the destination buffer 81 */ 82 int32_t map(const UChar* src, int32_t srcLength, 83 UChar* dest, int32_t destCapacity, 84 UBool allowUnassigned, 85 UParseError* parseError, 86 UErrorCode& status ); 87 88 /** 89 * Prepare the input stream with for use. This operation maps, normalizes(NFKC), 90 * checks for prohited and BiDi characters in the order defined by RFC 3454 91 * 92 * @param src Pointer to UChar buffer containing a single label 93 * @param srcLength Number of characters in the source label 94 * @param dest Pointer to the destination buffer to receive the output 95 * @param destCapacity The capacity of destination array 96 * @param allowUnassigned Unassigned values can be converted to ASCII for query operations 97 * If TRUE unassigned values are treated as normal Unicode code point. 98 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT error code. 99 * @param status ICU error code in/out parameter. 100 * Must fulfill U_SUCCESS before the function call. 101 * @return The number of UChars in the destination buffer 102 */ 103 int32_t process(const UChar* src, int32_t srcLength, 104 UChar* dest, int32_t destCapacity, 105 UBool allowUnassigned, 106 UParseError* parseError, 107 UErrorCode& status ); 108 109 /** 110 * Ascertain if the given code point is a label separator as specified by IDNA 111 * 112 * @return TRUE is the code point is a label separator 113 */ 114 UBool isLabelSeparator(UChar32 ch, UErrorCode& status); 115 116 inline UBool isLDHChar(UChar32 ch); 117 118 private: 119 /** 120 * The address of this static class variable serves as this class's ID 121 * for ICU "poor man's RTTI". 122 */ 123 static const char fgClassID; 124 }; 125 126 inline UBool NamePrepTransform::isLDHChar(UChar32 ch){ 127 // high runner case 128 if(ch>0x007A){ 129 return FALSE; 130 } 131 //[\\u002D \\u0030-\\u0039 \\u0041-\\u005A \\u0061-\\u007A] 132 if( (ch==0x002D) || 133 (0x0030 <= ch && ch <= 0x0039) || 134 (0x0041 <= ch && ch <= 0x005A) || 135 (0x0061 <= ch && ch <= 0x007A) 136 ){ 137 return TRUE; 138 } 139 return FALSE; 140 } 141 142 #endif /* #if !UCONFIG_NO_TRANSLITERATION */ 143 #else 144 class NamePrepTransform { 145 }; 146 #endif /* #if !UCONFIG_NO_IDNA */ 147 148 #endif 149 150 /* 151 * Hey, Emacs, please set the following: 152 * 153 * Local Variables: 154 * indent-tabs-mode: nil 155 * End: 156 * 157 */ 158