1 // 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 2003-2011, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * file name: idnaref.h 11 * encoding: UTF-8 12 * tab size: 8 (not used) 13 * indentation:4 14 * 15 * created on: 2003feb1 16 * created by: Ram Viswanadha 17 */ 18 19 #ifndef __IDNAREF_H__ 20 #define __IDNAREF_H__ 21 22 #include "unicode/utypes.h" 23 24 #if !UCONFIG_NO_IDNA 25 26 #include "unicode/parseerr.h" 27 28 #define IDNAREF_DEFAULT 0x0000 29 #define IDNAREF_ALLOW_UNASSIGNED 0x0001 30 #define IDNAREF_USE_STD3_RULES 0x0002 31 32 /** 33 * This function implements the ToASCII operation as defined in the IDNA draft. 34 * This operation is done on <b>single labels</b> before sending it to something that expects 35 * ASCII names. A label is an individual part of a domain name. Labels are usually 36 * separated by dots; for e.g." "www.example.com" is composed of 3 labels 37 * "www","example", and "com". 38 * 39 * 40 * @param src Input Unicode label. 41 * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 42 * @param dest Output Unicode array with ACE encoded ASCII label. 43 * @param destCapacity Size of dest. 44 * @param options A bit set of options: 45 * 46 * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 47 * If TRUE unassigned values are treated as normal Unicode code points. 48 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 49 * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 50 * If TRUE and the input does not statisfy STD3 rules, the operation 51 * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 52 * 53 * @param parseError Pointer to UParseError struct to recieve information on position 54 * of error if an error is encountered. Can be NULL. 55 * @param status ICU in/out error code parameter. 56 * U_INVALID_CHAR_FOUND if src contains 57 * unmatched single surrogates. 58 * U_INDEX_OUTOFBOUNDS_ERROR if src contains 59 * too many code points. 60 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 61 * @return Number of ASCII characters converted. 62 */ 63 U_CFUNC int32_t U_EXPORT2 64 idnaref_toASCII(const UChar* src, int32_t srcLength, 65 UChar* dest, int32_t destCapacity, 66 int32_t options, 67 UParseError* parseError, 68 UErrorCode* status); 69 70 71 /** 72 * This function implements the ToUnicode operation as defined in the IDNA draft. 73 * This operation is done on <b>single labels</b> before sending it to something that expects 74 * ASCII names. A label is an individual part of a domain name. Labels are usually 75 * separated by dots; for e.g." "www.example.com" is composed of 3 labels 76 * "www","example", and "com". 77 * 78 * @param src Input ASCII (ACE encoded) label. 79 * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 80 * @param dest Output Converted Unicode array. 81 * @param destCapacity Size of dest. 82 * @param options A bit set of options: 83 * 84 * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 85 * If TRUE unassigned values are treated as normal Unicode code points. 86 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 87 * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 88 * If TRUE and the input does not statisfy STD3 rules, the operation 89 * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 90 * 91 * @param parseError Pointer to UParseError struct to recieve information on position 92 * of error if an error is encountered. Can be NULL. 93 * @param status ICU in/out error code parameter. 94 * U_INVALID_CHAR_FOUND if src contains 95 * unmatched single surrogates. 96 * U_INDEX_OUTOFBOUNDS_ERROR if src contains 97 * too many code points. 98 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 99 * @return Number of Unicode characters converted. 100 */ 101 U_CFUNC int32_t U_EXPORT2 102 idnaref_toUnicode(const UChar* src, int32_t srcLength, 103 UChar* dest, int32_t destCapacity, 104 int32_t options, 105 UParseError* parseError, 106 UErrorCode* status); 107 108 109 /** 110 * Convenience function that implements the IDNToASCII operation as defined in the IDNA draft. 111 * This operation is done on complete domain names, e.g: "www.example.com". 112 * It is important to note that this operation can fail. If it fails, then the input 113 * domain name cannot be used as an Internationalized Domain Name and the application 114 * should have methods defined to deal with the failure. 115 * 116 * <b>Note:</b> IDNA draft specifies that a conformant application should divide a domain name 117 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 118 * and then convert. This function does not offer that level of granularity. The options once 119 * set will apply to all labels in the domain name 120 * 121 * @param src Input ASCII IDN. 122 * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 123 * @param dest Output Unicode array. 124 * @param destCapacity Size of dest. 125 * @param options A bit set of options: 126 * 127 * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 128 * If TRUE unassigned values are treated as normal Unicode code points. 129 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 130 * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 131 * If TRUE and the input does not statisfy STD3 rules, the operation 132 * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 133 * 134 * @param parseError Pointer to UParseError struct to recieve information on position 135 * of error if an error is encountered. Can be NULL. 136 * @param status ICU in/out error code parameter. 137 * U_INVALID_CHAR_FOUND if src contains 138 * unmatched single surrogates. 139 * U_INDEX_OUTOFBOUNDS_ERROR if src contains 140 * too many code points. 141 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 142 * @return Number of ASCII characters converted. 143 */ 144 U_CFUNC int32_t U_EXPORT2 145 idnaref_IDNToASCII( const UChar* src, int32_t srcLength, 146 UChar* dest, int32_t destCapacity, 147 int32_t options, 148 UParseError* parseError, 149 UErrorCode* status); 150 151 /** 152 * Convenience function that implements the IDNToUnicode operation as defined in the IDNA draft. 153 * This operation is done on complete domain names, e.g: "www.example.com". 154 * 155 * <b>Note:</b> IDNA draft specifies that a conformant application should divide a domain name 156 * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each, 157 * and then convert. This function does not offer that level of granularity. The options once 158 * set will apply to all labels in the domain name 159 * 160 * @param src Input Unicode IDN. 161 * @param srcLength Number of UChars in src, or -1 if NUL-terminated. 162 * @param dest Output ASCII array. 163 * @param destCapacity Size of dest. 164 * @param options A bit set of options: 165 * 166 * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 167 * If TRUE unassigned values are treated as normal Unicode code points. 168 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 169 * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 170 * If TRUE and the input does not statisfy STD3 rules, the operation 171 * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 172 * 173 * @param parseError Pointer to UParseError struct to recieve information on position 174 * of error if an error is encountered. Can be NULL. 175 * @param status ICU in/out error code parameter. 176 * U_INVALID_CHAR_FOUND if src contains 177 * unmatched single surrogates. 178 * U_INDEX_OUTOFBOUNDS_ERROR if src contains 179 * too many code points. 180 * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough 181 * @return Number of ASCII characters converted. 182 */ 183 U_CFUNC int32_t U_EXPORT2 184 idnaref_IDNToUnicode( const UChar* src, int32_t srcLength, 185 UChar* dest, int32_t destCapacity, 186 int32_t options, 187 UParseError* parseError, 188 UErrorCode* status); 189 190 /** 191 * Compare two strings for IDNs for equivalence. 192 * This function splits the domain names into labels and compares them. 193 * According to IDN draft, whenever two labels are compared, they are 194 * considered equal if and only if their ASCII forms (obtained by 195 * applying toASCII) match using an case-insensitive ASCII comparison. 196 * Two domain names are considered a match if and only if all labels 197 * match regardless of whether label separators match. 198 * 199 * @param s1 First source string. 200 * @param length1 Length of first source string, or -1 if NUL-terminated. 201 * 202 * @param s2 Second source string. 203 * @param length2 Length of second source string, or -1 if NUL-terminated. 204 * @param options A bit set of options: 205 * 206 * - idnaref_UNASSIGNED Unassigned values can be converted to ASCII for query operations 207 * If TRUE unassigned values are treated as normal Unicode code points. 208 * If FALSE the operation fails with U_UNASSIGNED_CODE_POINT_FOUND error code. 209 * - idnaref_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions 210 * If TRUE and the input does not statisfy STD3 rules, the operation 211 * will fail with U_IDNA_STD3_ASCII_RULES_ERROR 212 * 213 * @param status ICU error code in/out parameter. 214 * Must fulfill U_SUCCESS before the function call. 215 * @return <0 or 0 or >0 as usual for string comparisons 216 */ 217 U_CFUNC int32_t U_EXPORT2 218 idnaref_compare( const UChar *s1, int32_t length1, 219 const UChar *s2, int32_t length2, 220 int32_t options, 221 UErrorCode* status); 222 223 #endif /* #if !UCONFIG_NO_IDNA */ 224 225 #endif 226