Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2002-2003, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  punycode.h
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2002jan31
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 /* This ICU code derived from: */
     18 /*
     19 punycode.c 0.4.0 (2001-Nov-17-Sat)
     20 http://www.cs.berkeley.edu/~amc/idn/
     21 Adam M. Costello
     22 http://www.nicemice.net/amc/
     23 */
     24 
     25 #ifndef __PUNYCODE_H__
     26 #define __PUNYCODE_H__
     27 
     28 #include "unicode/utypes.h"
     29 
     30 #if !UCONFIG_NO_IDNA
     31 
     32 /**
     33  * u_strToPunycode() converts Unicode to Punycode.
     34  *
     35  * The input string must not contain single, unpaired surrogates.
     36  * The output will be represented as an array of ASCII code points.
     37  *
     38  * The output string is NUL-terminated according to normal ICU
     39  * string output rules.
     40  *
     41  * @param src Input Unicode string.
     42  *            This function handles a limited amount of code points
     43  *            (the limit is >=64).
     44  *            U_INDEX_OUTOFBOUNDS_ERROR is set if the limit is exceeded.
     45  * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
     46  * @param dest Output Punycode array.
     47  * @param destCapacity Size of dest.
     48  * @param caseFlags Vector of boolean values, one per input UChar,
     49  *                  indicating that the corresponding character is to be
     50  *                  marked for the decoder optionally
     51  *                  uppercasing (TRUE) or lowercasing (FALSE)
     52  *                  the character.
     53  *                  ASCII characters are output directly in the case as marked.
     54  *                  Flags corresponding to trail surrogates are ignored.
     55  *                  If caseFlags==NULL then input characters are not
     56  *                  case-mapped.
     57  * @param pErrorCode ICU in/out error code parameter.
     58  *                   U_INVALID_CHAR_FOUND if src contains
     59  *                   unmatched single surrogates.
     60  *                   U_INDEX_OUTOFBOUNDS_ERROR if src contains
     61  *                   too many code points.
     62  * @return Number of ASCII characters in puny.
     63  *
     64  * @see u_strFromPunycode
     65  */
     66 U_CFUNC int32_t
     67 u_strToPunycode(const UChar *src, int32_t srcLength,
     68                 UChar *dest, int32_t destCapacity,
     69                 const UBool *caseFlags,
     70                 UErrorCode *pErrorCode);
     71 
     72 /**
     73  * u_strFromPunycode() converts Punycode to Unicode.
     74  * The Unicode string will be at most as long (in UChars)
     75  * than the Punycode string (in chars).
     76  *
     77  * @param src Input Punycode string.
     78  * @param srcLength Length of puny, or -1 if NUL-terminated
     79  * @param dest Output Unicode string buffer.
     80  * @param destCapacity Size of dest in number of UChars,
     81  *                     and of caseFlags in numbers of UBools.
     82  * @param caseFlags Output array for case flags as
     83  *                  defined by the Punycode string.
     84  *                  The caller should uppercase (TRUE) or lowercase (FASLE)
     85  *                  the corresponding character in dest.
     86  *                  For supplementary characters, only the lead surrogate
     87  *                  is marked, and FALSE is stored for the trail surrogate.
     88  *                  This is redundant and not necessary for ASCII characters
     89  *                  because they are already in the case indicated.
     90  *                  Can be NULL if the case flags are not needed.
     91  * @param pErrorCode ICU in/out error code parameter.
     92  *                   U_INVALID_CHAR_FOUND if a non-ASCII character
     93  *                   precedes the last delimiter ('-'),
     94  *                   or if an invalid character (not a-zA-Z0-9) is found
     95  *                   after the last delimiter.
     96  *                   U_ILLEGAL_CHAR_FOUND if the delta sequence is ill-formed.
     97  * @return Number of UChars written to dest.
     98  *
     99  * @see u_strToPunycode
    100  */
    101 U_CFUNC int32_t
    102 u_strFromPunycode(const UChar *src, int32_t srcLength,
    103                   UChar *dest, int32_t destCapacity,
    104                   UBool *caseFlags,
    105                   UErrorCode *pErrorCode);
    106 
    107 #endif /* #if !UCONFIG_NO_IDNA */
    108 
    109 #endif
    110 
    111 /*
    112  * Hey, Emacs, please set the following:
    113  *
    114  * Local Variables:
    115  * indent-tabs-mode: nil
    116  * End:
    117  *
    118  */
    119