1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2003-2011, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: idnaref.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2003feb1 14 * created by: Ram Viswanadha 15 */ 16 17 #ifndef __IDNAREF_H__ 18 #define __IDNAREF_H__ 19 20 #include "unicode/utypes.h" 21 22 #if !UCONFIG_NO_IDNA 23 24 #include "unicode/parseerr.h" 25 26 #define IDNAREF_DEFAULT 0x0000 27 #define IDNAREF_ALLOW_UNASSIGNED 0x0001 28 #define IDNAREF_USE_STD3_RULES 0x0002 29 30 /** 31 * This function implements the ToASCII operation as defined in the IDNA draft. 32 * This operation is done on <b>single labels</b> before sending it to something that expects 33 * ASCII names. A label is an individual part of a domain name. Labels are usually 34 * separated by dots; for e.g." "www.example.com" is composed of 3 labels 35 * "www","example", and "com". 36 * 37 * 38 * @param src Input Unicode label. 39 * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 40 * @param dest Output Unicode array with ACE encoded ASCII label. 41 * @param destCapacity Size of dest. 42 * @param options A bit set of options: 43 * 44 * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 45 * If TRUE unassigned values are treated as normal Unicode code points. 46 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 47 * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 48 * If TRUE and the input does not statisfy STD3 rules, the operation 49 * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 50 * 51 * @param parseError Pointer to UParseError struct to recieve information on position 52 * of error if an error is encountered. Can be NULL. 53 * @param status ICU in/out error code parameter. 54 * U_INVALID_CHAR_FOUND if src contains 55 * unmatched single surrogates. 56 * U_INDEX_OUTOFBOUNDS_ERROR if src contains 57 * too many code points. 58 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 59 * @return Number of ASCII characters converted. 60 */ 61 U_CFUNC int32_t U_EXPORT2 62 idnaref_toASCII(const UChar* src, int32_t srcLength, 63 UChar* dest, int32_t destCapacity, 64 int32_t options, 65 UParseError* parseError, 66 UErrorCode* status); 67 68 69 /** 70 * This function implements the ToUnicode operation as defined in the IDNA draft. 71 * This operation is done on <b>single labels</b> before sending it to something that expects 72 * ASCII names. A label is an individual part of a domain name. Labels are usually 73 * separated by dots; for e.g." "www.example.com" is composed of 3 labels 74 * "www","example", and "com". 75 * 76 * @param src Input ASCII (ACE encoded) label. 77 * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 78 * @param dest Output Converted Unicode array. 79 * @param destCapacity Size of dest. 80 * @param options A bit set of options: 81 * 82 * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 83 * If TRUE unassigned values are treated as normal Unicode code points. 84 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 85 * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 86 * If TRUE and the input does not statisfy STD3 rules, the operation 87 * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 88 * 89 * @param parseError Pointer to UParseError struct to recieve information on position 90 * of error if an error is encountered. Can be NULL. 91 * @param status ICU in/out error code parameter. 92 * U_INVALID_CHAR_FOUND if src contains 93 * unmatched single surrogates. 94 * U_INDEX_OUTOFBOUNDS_ERROR if src contains 95 * too many code points. 96 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 97 * @return Number of Unicode characters converted. 98 */ 99 U_CFUNC int32_t U_EXPORT2 100 idnaref_toUnicode(const UChar* src, int32_t srcLength, 101 UChar* dest, int32_t destCapacity, 102 int32_t options, 103 UParseError* parseError, 104 UErrorCode* status); 105 106 107 /** 108 * Convenience function that implements the IDNToASCII operation as defined in the IDNA draft. 109 * This operation is done on complete domain names, e.g: "www.example.com". 110 * It is important to note that this operation can fail. If it fails, then the input 111 * domain name cannot be used as an Internationalized Domain Name and the application 112 * should have methods defined to deal with the failure. 113 * 114 * <b>Note:</b> IDNA draft specifies that a conformant application should divide a domain name 115 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 116 * and then convert. This function does not offer that level of granularity. The options once 117 * set will apply to all labels in the domain name 118 * 119 * @param src Input ASCII IDN. 120 * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 121 * @param dest Output Unicode array. 122 * @param destCapacity Size of dest. 123 * @param options A bit set of options: 124 * 125 * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 126 * If TRUE unassigned values are treated as normal Unicode code points. 127 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 128 * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 129 * If TRUE and the input does not statisfy STD3 rules, the operation 130 * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 131 * 132 * @param parseError Pointer to UParseError struct to recieve information on position 133 * of error if an error is encountered. Can be NULL. 134 * @param status ICU in/out error code parameter. 135 * U_INVALID_CHAR_FOUND if src contains 136 * unmatched single surrogates. 137 * U_INDEX_OUTOFBOUNDS_ERROR if src contains 138 * too many code points. 139 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 140 * @return Number of ASCII characters converted. 141 */ 142 U_CFUNC int32_t U_EXPORT2 143 idnaref_IDNToASCII( const UChar* src, int32_t srcLength, 144 UChar* dest, int32_t destCapacity, 145 int32_t options, 146 UParseError* parseError, 147 UErrorCode* status); 148 149 /** 150 * Convenience function that implements the IDNToUnicode operation as defined in the IDNA draft. 151 * This operation is done on complete domain names, e.g: "www.example.com". 152 * 153 * <b>Note:</b> IDNA draft specifies that a conformant application should divide a domain name 154 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 155 * and then convert. This function does not offer that level of granularity. The options once 156 * set will apply to all labels in the domain name 157 * 158 * @param src Input Unicode IDN. 159 * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 160 * @param dest Output ASCII array. 161 * @param destCapacity Size of dest. 162 * @param options A bit set of options: 163 * 164 * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 165 * If TRUE unassigned values are treated as normal Unicode code points. 166 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 167 * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 168 * If TRUE and the input does not statisfy STD3 rules, the operation 169 * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 170 * 171 * @param parseError Pointer to UParseError struct to recieve information on position 172 * of error if an error is encountered. Can be NULL. 173 * @param status ICU in/out error code parameter. 174 * U_INVALID_CHAR_FOUND if src contains 175 * unmatched single surrogates. 176 * U_INDEX_OUTOFBOUNDS_ERROR if src contains 177 * too many code points. 178 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 179 * @return Number of ASCII characters converted. 180 */ 181 U_CFUNC int32_t U_EXPORT2 182 idnaref_IDNToUnicode( const UChar* src, int32_t srcLength, 183 UChar* dest, int32_t destCapacity, 184 int32_t options, 185 UParseError* parseError, 186 UErrorCode* status); 187 188 /** 189 * Compare two strings for IDNs for equivalence. 190 * This function splits the domain names into labels and compares them. 191 * According to IDN draft, whenever two labels are compared, they are 192 * considered equal if and only if their ASCII forms (obtained by 193 * applying toASCII) match using an case-insensitive ASCII comparison. 194 * Two domain names are considered a match if and only if all labels 195 * match regardless of whether label separators match. 196 * 197 * @param s1 First source string. 198 * @param length1 Length of first source string, or -1 if NUL-terminated. 199 * 200 * @param s2 Second source string. 201 * @param length2 Length of second source string, or -1 if NUL-terminated. 202 * @param options A bit set of options: 203 * 204 * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 205 * If TRUE unassigned values are treated as normal Unicode code points. 206 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 207 * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 208 * If TRUE and the input does not statisfy STD3 rules, the operation 209 * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 210 * 211 * @param status ICU error code in/out parameter. 212 * Must fulfill U_SUCCESS before the function call. 213 * @return <0 or 0 or >0 as usual for string comparisons 214 */ 215 U_CFUNC int32_t U_EXPORT2 216 idnaref_compare( const UChar *s1, int32_t length1, 217 const UChar *s2, int32_t length2, 218 int32_t options, 219 UErrorCode* status); 220 221 #endif /* #if !UCONFIG_NO_IDNA */ 222 223 #endif 224