Home | History | Annotate | Download | only in Include
      1 #ifndef Py_UNICODEOBJECT_H
      2 #define Py_UNICODEOBJECT_H
      3 
      4 #include <stdarg.h>
      5 
      6 /*
      7 
      8 Unicode implementation based on original code by Fredrik Lundh,
      9 modified by Marc-Andre Lemburg (mal (at) lemburg.com) according to the
     10 Unicode Integration Proposal. (See
     11 http://www.egenix.com/files/python/unicode-proposal.txt).
     12 
     13 Copyright (c) Corporation for National Research Initiatives.
     14 
     15 
     16  Original header:
     17  --------------------------------------------------------------------
     18 
     19  * Yet another Unicode string type for Python.  This type supports the
     20  * 16-bit Basic Multilingual Plane (BMP) only.
     21  *
     22  * Written by Fredrik Lundh, January 1999.
     23  *
     24  * Copyright (c) 1999 by Secret Labs AB.
     25  * Copyright (c) 1999 by Fredrik Lundh.
     26  *
     27  * fredrik (at) pythonware.com
     28  * http://www.pythonware.com
     29  *
     30  * --------------------------------------------------------------------
     31  * This Unicode String Type is
     32  *
     33  * Copyright (c) 1999 by Secret Labs AB
     34  * Copyright (c) 1999 by Fredrik Lundh
     35  *
     36  * By obtaining, using, and/or copying this software and/or its
     37  * associated documentation, you agree that you have read, understood,
     38  * and will comply with the following terms and conditions:
     39  *
     40  * Permission to use, copy, modify, and distribute this software and its
     41  * associated documentation for any purpose and without fee is hereby
     42  * granted, provided that the above copyright notice appears in all
     43  * copies, and that both that copyright notice and this permission notice
     44  * appear in supporting documentation, and that the name of Secret Labs
     45  * AB or the author not be used in advertising or publicity pertaining to
     46  * distribution of the software without specific, written prior
     47  * permission.
     48  *
     49  * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
     50  * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
     51  * FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
     52  * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     53  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
     54  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
     55  * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     56  * -------------------------------------------------------------------- */
     57 
     58 #include <ctype.h>
     59 
     60 /* === Internal API ======================================================= */
     61 
     62 /* --- Internal Unicode Format -------------------------------------------- */
     63 
     64 /* Python 3.x requires unicode */
     65 #define Py_USING_UNICODE
     66 
     67 #ifndef SIZEOF_WCHAR_T
     68 #error Must define SIZEOF_WCHAR_T
     69 #endif
     70 
     71 #define Py_UNICODE_SIZE SIZEOF_WCHAR_T
     72 
     73 /* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
     74    Otherwise, Unicode strings are stored as UCS-2 (with limited support
     75    for UTF-16) */
     76 
     77 #if Py_UNICODE_SIZE >= 4
     78 #define Py_UNICODE_WIDE
     79 #endif
     80 
     81 /* Set these flags if the platform has "wchar.h" and the
     82    wchar_t type is a 16-bit unsigned type */
     83 /* #define HAVE_WCHAR_H */
     84 /* #define HAVE_USABLE_WCHAR_T */
     85 
     86 /* Py_UNICODE was the native Unicode storage format (code unit) used by
     87    Python and represents a single Unicode element in the Unicode type.
     88    With PEP 393, Py_UNICODE is deprecated and replaced with a
     89    typedef to wchar_t. */
     90 
     91 #ifndef Py_LIMITED_API
     92 #define PY_UNICODE_TYPE wchar_t
     93 typedef wchar_t Py_UNICODE /* Py_DEPRECATED(3.3) */;
     94 #endif
     95 
     96 /* If the compiler provides a wchar_t type we try to support it
     97    through the interface functions PyUnicode_FromWideChar(),
     98    PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
     99 
    100 #ifdef HAVE_USABLE_WCHAR_T
    101 # ifndef HAVE_WCHAR_H
    102 #  define HAVE_WCHAR_H
    103 # endif
    104 #endif
    105 
    106 #ifdef HAVE_WCHAR_H
    107 #  include <wchar.h>
    108 #endif
    109 
    110 /* Py_UCS4 and Py_UCS2 are typedefs for the respective
    111    unicode representations. */
    112 typedef uint32_t Py_UCS4;
    113 typedef uint16_t Py_UCS2;
    114 typedef uint8_t Py_UCS1;
    115 
    116 /* --- Internal Unicode Operations ---------------------------------------- */
    117 
    118 /* Since splitting on whitespace is an important use case, and
    119    whitespace in most situations is solely ASCII whitespace, we
    120    optimize for the common case by using a quick look-up table
    121    _Py_ascii_whitespace (see below) with an inlined check.
    122 
    123  */
    124 #ifndef Py_LIMITED_API
    125 #define Py_UNICODE_ISSPACE(ch) \
    126     ((ch) < 128U ? _Py_ascii_whitespace[(ch)] : _PyUnicode_IsWhitespace(ch))
    127 
    128 #define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch)
    129 #define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch)
    130 #define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch)
    131 #define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch)
    132 
    133 #define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch)
    134 #define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch)
    135 #define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch)
    136 
    137 #define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch)
    138 #define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch)
    139 #define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch)
    140 #define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch)
    141 
    142 #define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch)
    143 #define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch)
    144 #define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch)
    145 
    146 #define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch)
    147 
    148 #define Py_UNICODE_ISALNUM(ch) \
    149        (Py_UNICODE_ISALPHA(ch) || \
    150     Py_UNICODE_ISDECIMAL(ch) || \
    151     Py_UNICODE_ISDIGIT(ch) || \
    152     Py_UNICODE_ISNUMERIC(ch))
    153 
    154 #define Py_UNICODE_COPY(target, source, length) \
    155     memcpy((target), (source), (length)*sizeof(Py_UNICODE))
    156 
    157 #define Py_UNICODE_FILL(target, value, length) \
    158     do {Py_ssize_t i_; Py_UNICODE *t_ = (target); Py_UNICODE v_ = (value);\
    159         for (i_ = 0; i_ < (length); i_++) t_[i_] = v_;\
    160     } while (0)
    161 
    162 /* macros to work with surrogates */
    163 #define Py_UNICODE_IS_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDFFF)
    164 #define Py_UNICODE_IS_HIGH_SURROGATE(ch) (0xD800 <= (ch) && (ch) <= 0xDBFF)
    165 #define Py_UNICODE_IS_LOW_SURROGATE(ch) (0xDC00 <= (ch) && (ch) <= 0xDFFF)
    166 /* Join two surrogate characters and return a single Py_UCS4 value. */
    167 #define Py_UNICODE_JOIN_SURROGATES(high, low)  \
    168     (((((Py_UCS4)(high) & 0x03FF) << 10) |      \
    169       ((Py_UCS4)(low) & 0x03FF)) + 0x10000)
    170 /* high surrogate = top 10 bits added to D800 */
    171 #define Py_UNICODE_HIGH_SURROGATE(ch) (0xD800 - (0x10000 >> 10) + ((ch) >> 10))
    172 /* low surrogate = bottom 10 bits added to DC00 */
    173 #define Py_UNICODE_LOW_SURROGATE(ch) (0xDC00 + ((ch) & 0x3FF))
    174 
    175 /* Check if substring matches at given offset.  The offset must be
    176    valid, and the substring must not be empty. */
    177 
    178 #define Py_UNICODE_MATCH(string, offset, substring) \
    179     ((*((string)->wstr + (offset)) == *((substring)->wstr)) && \
    180      ((*((string)->wstr + (offset) + (substring)->wstr_length-1) == *((substring)->wstr + (substring)->wstr_length-1))) && \
    181      !memcmp((string)->wstr + (offset), (substring)->wstr, (substring)->wstr_length*sizeof(Py_UNICODE)))
    182 
    183 #endif /* Py_LIMITED_API */
    184 
    185 #ifdef __cplusplus
    186 extern "C" {
    187 #endif
    188 
    189 /* --- Unicode Type ------------------------------------------------------- */
    190 
    191 #ifndef Py_LIMITED_API
    192 
    193 /* ASCII-only strings created through PyUnicode_New use the PyASCIIObject
    194    structure. state.ascii and state.compact are set, and the data
    195    immediately follow the structure. utf8_length and wstr_length can be found
    196    in the length field; the utf8 pointer is equal to the data pointer. */
    197 typedef struct {
    198     /* There are 4 forms of Unicode strings:
    199 
    200        - compact ascii:
    201 
    202          * structure = PyASCIIObject
    203          * test: PyUnicode_IS_COMPACT_ASCII(op)
    204          * kind = PyUnicode_1BYTE_KIND
    205          * compact = 1
    206          * ascii = 1
    207          * ready = 1
    208          * (length is the length of the utf8 and wstr strings)
    209          * (data starts just after the structure)
    210          * (since ASCII is decoded from UTF-8, the utf8 string are the data)
    211 
    212        - compact:
    213 
    214          * structure = PyCompactUnicodeObject
    215          * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op)
    216          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
    217            PyUnicode_4BYTE_KIND
    218          * compact = 1
    219          * ready = 1
    220          * ascii = 0
    221          * utf8 is not shared with data
    222          * utf8_length = 0 if utf8 is NULL
    223          * wstr is shared with data and wstr_length=length
    224            if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
    225            or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_t)=4
    226          * wstr_length = 0 if wstr is NULL
    227          * (data starts just after the structure)
    228 
    229        - legacy string, not ready:
    230 
    231          * structure = PyUnicodeObject
    232          * test: kind == PyUnicode_WCHAR_KIND
    233          * length = 0 (use wstr_length)
    234          * hash = -1
    235          * kind = PyUnicode_WCHAR_KIND
    236          * compact = 0
    237          * ascii = 0
    238          * ready = 0
    239          * interned = SSTATE_NOT_INTERNED
    240          * wstr is not NULL
    241          * data.any is NULL
    242          * utf8 is NULL
    243          * utf8_length = 0
    244 
    245        - legacy string, ready:
    246 
    247          * structure = PyUnicodeObject structure
    248          * test: !PyUnicode_IS_COMPACT(op) && kind != PyUnicode_WCHAR_KIND
    249          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
    250            PyUnicode_4BYTE_KIND
    251          * compact = 0
    252          * ready = 1
    253          * data.any is not NULL
    254          * utf8 is shared and utf8_length = length with data.any if ascii = 1
    255          * utf8_length = 0 if utf8 is NULL
    256          * wstr is shared with data.any and wstr_length = length
    257            if kind=PyUnicode_2BYTE_KIND and sizeof(wchar_t)=2
    258            or if kind=PyUnicode_4BYTE_KIND and sizeof(wchar_4)=4
    259          * wstr_length = 0 if wstr is NULL
    260 
    261        Compact strings use only one memory block (structure + characters),
    262        whereas legacy strings use one block for the structure and one block
    263        for characters.
    264 
    265        Legacy strings are created by PyUnicode_FromUnicode() and
    266        PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
    267        when PyUnicode_READY() is called.
    268 
    269        See also _PyUnicode_CheckConsistency().
    270     */
    271     PyObject_HEAD
    272     Py_ssize_t length;          /* Number of code points in the string */
    273     Py_hash_t hash;             /* Hash value; -1 if not set */
    274     struct {
    275         /*
    276            SSTATE_NOT_INTERNED (0)
    277            SSTATE_INTERNED_MORTAL (1)
    278            SSTATE_INTERNED_IMMORTAL (2)
    279 
    280            If interned != SSTATE_NOT_INTERNED, the two references from the
    281            dictionary to this object are *not* counted in ob_refcnt.
    282          */
    283         unsigned int interned:2;
    284         /* Character size:
    285 
    286            - PyUnicode_WCHAR_KIND (0):
    287 
    288              * character type = wchar_t (16 or 32 bits, depending on the
    289                platform)
    290 
    291            - PyUnicode_1BYTE_KIND (1):
    292 
    293              * character type = Py_UCS1 (8 bits, unsigned)
    294              * all characters are in the range U+0000-U+00FF (latin1)
    295              * if ascii is set, all characters are in the range U+0000-U+007F
    296                (ASCII), otherwise at least one character is in the range
    297                U+0080-U+00FF
    298 
    299            - PyUnicode_2BYTE_KIND (2):
    300 
    301              * character type = Py_UCS2 (16 bits, unsigned)
    302              * all characters are in the range U+0000-U+FFFF (BMP)
    303              * at least one character is in the range U+0100-U+FFFF
    304 
    305            - PyUnicode_4BYTE_KIND (4):
    306 
    307              * character type = Py_UCS4 (32 bits, unsigned)
    308              * all characters are in the range U+0000-U+10FFFF
    309              * at least one character is in the range U+10000-U+10FFFF
    310          */
    311         unsigned int kind:3;
    312         /* Compact is with respect to the allocation scheme. Compact unicode
    313            objects only require one memory block while non-compact objects use
    314            one block for the PyUnicodeObject struct and another for its data
    315            buffer. */
    316         unsigned int compact:1;
    317         /* The string only contains characters in the range U+0000-U+007F (ASCII)
    318            and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
    319            set, use the PyASCIIObject structure. */
    320         unsigned int ascii:1;
    321         /* The ready flag indicates whether the object layout is initialized
    322            completely. This means that this is either a compact object, or
    323            the data pointer is filled out. The bit is redundant, and helps
    324            to minimize the test in PyUnicode_IS_READY(). */
    325         unsigned int ready:1;
    326         /* Padding to ensure that PyUnicode_DATA() is always aligned to
    327            4 bytes (see issue #19537 on m68k). */
    328         unsigned int :24;
    329     } state;
    330     wchar_t *wstr;              /* wchar_t representation (null-terminated) */
    331 } PyASCIIObject;
    332 
    333 /* Non-ASCII strings allocated through PyUnicode_New use the
    334    PyCompactUnicodeObject structure. state.compact is set, and the data
    335    immediately follow the structure. */
    336 typedef struct {
    337     PyASCIIObject _base;
    338     Py_ssize_t utf8_length;     /* Number of bytes in utf8, excluding the
    339                                  * terminating \0. */
    340     char *utf8;                 /* UTF-8 representation (null-terminated) */
    341     Py_ssize_t wstr_length;     /* Number of code points in wstr, possible
    342                                  * surrogates count as two code points. */
    343 } PyCompactUnicodeObject;
    344 
    345 /* Strings allocated through PyUnicode_FromUnicode(NULL, len) use the
    346    PyUnicodeObject structure. The actual string data is initially in the wstr
    347    block, and copied into the data block using _PyUnicode_Ready. */
    348 typedef struct {
    349     PyCompactUnicodeObject _base;
    350     union {
    351         void *any;
    352         Py_UCS1 *latin1;
    353         Py_UCS2 *ucs2;
    354         Py_UCS4 *ucs4;
    355     } data;                     /* Canonical, smallest-form Unicode buffer */
    356 } PyUnicodeObject;
    357 #endif
    358 
    359 PyAPI_DATA(PyTypeObject) PyUnicode_Type;
    360 PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
    361 
    362 #define PyUnicode_Check(op) \
    363                  PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
    364 #define PyUnicode_CheckExact(op) (Py_TYPE(op) == &PyUnicode_Type)
    365 
    366 /* Fast access macros */
    367 #ifndef Py_LIMITED_API
    368 
    369 #define PyUnicode_WSTR_LENGTH(op) \
    370     (PyUnicode_IS_COMPACT_ASCII(op) ?                  \
    371      ((PyASCIIObject*)op)->length :                    \
    372      ((PyCompactUnicodeObject*)op)->wstr_length)
    373 
    374 /* Returns the deprecated Py_UNICODE representation's size in code units
    375    (this includes surrogate pairs as 2 units).
    376    If the Py_UNICODE representation is not available, it will be computed
    377    on request.  Use PyUnicode_GET_LENGTH() for the length in code points. */
    378 
    379 #define PyUnicode_GET_SIZE(op)                       \
    380     (assert(PyUnicode_Check(op)),                    \
    381      (((PyASCIIObject *)(op))->wstr) ?               \
    382       PyUnicode_WSTR_LENGTH(op) :                    \
    383       ((void)PyUnicode_AsUnicode((PyObject *)(op)),  \
    384        assert(((PyASCIIObject *)(op))->wstr),        \
    385        PyUnicode_WSTR_LENGTH(op)))
    386     /* Py_DEPRECATED(3.3) */
    387 
    388 #define PyUnicode_GET_DATA_SIZE(op) \
    389     (PyUnicode_GET_SIZE(op) * Py_UNICODE_SIZE)
    390     /* Py_DEPRECATED(3.3) */
    391 
    392 /* Alias for PyUnicode_AsUnicode().  This will create a wchar_t/Py_UNICODE
    393    representation on demand.  Using this macro is very inefficient now,
    394    try to port your code to use the new PyUnicode_*BYTE_DATA() macros or
    395    use PyUnicode_WRITE() and PyUnicode_READ(). */
    396 
    397 #define PyUnicode_AS_UNICODE(op) \
    398     (assert(PyUnicode_Check(op)), \
    399      (((PyASCIIObject *)(op))->wstr) ? (((PyASCIIObject *)(op))->wstr) : \
    400       PyUnicode_AsUnicode((PyObject *)(op)))
    401     /* Py_DEPRECATED(3.3) */
    402 
    403 #define PyUnicode_AS_DATA(op) \
    404     ((const char *)(PyUnicode_AS_UNICODE(op)))
    405     /* Py_DEPRECATED(3.3) */
    406 
    407 
    408 /* --- Flexible String Representation Helper Macros (PEP 393) -------------- */
    409 
    410 /* Values for PyASCIIObject.state: */
    411 
    412 /* Interning state. */
    413 #define SSTATE_NOT_INTERNED 0
    414 #define SSTATE_INTERNED_MORTAL 1
    415 #define SSTATE_INTERNED_IMMORTAL 2
    416 
    417 /* Return true if the string contains only ASCII characters, or 0 if not. The
    418    string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be
    419    ready. */
    420 #define PyUnicode_IS_ASCII(op)                   \
    421     (assert(PyUnicode_Check(op)),                \
    422      assert(PyUnicode_IS_READY(op)),             \
    423      ((PyASCIIObject*)op)->state.ascii)
    424 
    425 /* Return true if the string is compact or 0 if not.
    426    No type checks or Ready calls are performed. */
    427 #define PyUnicode_IS_COMPACT(op) \
    428     (((PyASCIIObject*)(op))->state.compact)
    429 
    430 /* Return true if the string is a compact ASCII string (use PyASCIIObject
    431    structure), or 0 if not.  No type checks or Ready calls are performed. */
    432 #define PyUnicode_IS_COMPACT_ASCII(op)                 \
    433     (((PyASCIIObject*)op)->state.ascii && PyUnicode_IS_COMPACT(op))
    434 
    435 enum PyUnicode_Kind {
    436 /* String contains only wstr byte characters.  This is only possible
    437    when the string was created with a legacy API and _PyUnicode_Ready()
    438    has not been called yet.  */
    439     PyUnicode_WCHAR_KIND = 0,
    440 /* Return values of the PyUnicode_KIND() macro: */
    441     PyUnicode_1BYTE_KIND = 1,
    442     PyUnicode_2BYTE_KIND = 2,
    443     PyUnicode_4BYTE_KIND = 4
    444 };
    445 
    446 /* Return pointers to the canonical representation cast to unsigned char,
    447    Py_UCS2, or Py_UCS4 for direct character access.
    448    No checks are performed, use PyUnicode_KIND() before to ensure
    449    these will work correctly. */
    450 
    451 #define PyUnicode_1BYTE_DATA(op) ((Py_UCS1*)PyUnicode_DATA(op))
    452 #define PyUnicode_2BYTE_DATA(op) ((Py_UCS2*)PyUnicode_DATA(op))
    453 #define PyUnicode_4BYTE_DATA(op) ((Py_UCS4*)PyUnicode_DATA(op))
    454 
    455 /* Return one of the PyUnicode_*_KIND values defined above. */
    456 #define PyUnicode_KIND(op) \
    457     (assert(PyUnicode_Check(op)), \
    458      assert(PyUnicode_IS_READY(op)),            \
    459      ((PyASCIIObject *)(op))->state.kind)
    460 
    461 /* Return a void pointer to the raw unicode buffer. */
    462 #define _PyUnicode_COMPACT_DATA(op)                     \
    463     (PyUnicode_IS_ASCII(op) ?                   \
    464      ((void*)((PyASCIIObject*)(op) + 1)) :              \
    465      ((void*)((PyCompactUnicodeObject*)(op) + 1)))
    466 
    467 #define _PyUnicode_NONCOMPACT_DATA(op)                  \
    468     (assert(((PyUnicodeObject*)(op))->data.any),        \
    469      ((((PyUnicodeObject *)(op))->data.any)))
    470 
    471 #define PyUnicode_DATA(op) \
    472     (assert(PyUnicode_Check(op)), \
    473      PyUnicode_IS_COMPACT(op) ? _PyUnicode_COMPACT_DATA(op) :   \
    474      _PyUnicode_NONCOMPACT_DATA(op))
    475 
    476 /* In the access macros below, "kind" may be evaluated more than once.
    477    All other macro parameters are evaluated exactly once, so it is safe
    478    to put side effects into them (such as increasing the index). */
    479 
    480 /* Write into the canonical representation, this macro does not do any sanity
    481    checks and is intended for usage in loops.  The caller should cache the
    482    kind and data pointers obtained from other macro calls.
    483    index is the index in the string (starts at 0) and value is the new
    484    code point value which should be written to that location. */
    485 #define PyUnicode_WRITE(kind, data, index, value) \
    486     do { \
    487         switch ((kind)) { \
    488         case PyUnicode_1BYTE_KIND: { \
    489             ((Py_UCS1 *)(data))[(index)] = (Py_UCS1)(value); \
    490             break; \
    491         } \
    492         case PyUnicode_2BYTE_KIND: { \
    493             ((Py_UCS2 *)(data))[(index)] = (Py_UCS2)(value); \
    494             break; \
    495         } \
    496         default: { \
    497             assert((kind) == PyUnicode_4BYTE_KIND); \
    498             ((Py_UCS4 *)(data))[(index)] = (Py_UCS4)(value); \
    499         } \
    500         } \
    501     } while (0)
    502 
    503 /* Read a code point from the string's canonical representation.  No checks
    504    or ready calls are performed. */
    505 #define PyUnicode_READ(kind, data, index) \
    506     ((Py_UCS4) \
    507     ((kind) == PyUnicode_1BYTE_KIND ? \
    508         ((const Py_UCS1 *)(data))[(index)] : \
    509         ((kind) == PyUnicode_2BYTE_KIND ? \
    510             ((const Py_UCS2 *)(data))[(index)] : \
    511             ((const Py_UCS4 *)(data))[(index)] \
    512         ) \
    513     ))
    514 
    515 /* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it
    516    calls PyUnicode_KIND() and might call it twice.  For single reads, use
    517    PyUnicode_READ_CHAR, for multiple consecutive reads callers should
    518    cache kind and use PyUnicode_READ instead. */
    519 #define PyUnicode_READ_CHAR(unicode, index) \
    520     (assert(PyUnicode_Check(unicode)),          \
    521      assert(PyUnicode_IS_READY(unicode)),       \
    522      (Py_UCS4)                                  \
    523         (PyUnicode_KIND((unicode)) == PyUnicode_1BYTE_KIND ? \
    524             ((const Py_UCS1 *)(PyUnicode_DATA((unicode))))[(index)] : \
    525             (PyUnicode_KIND((unicode)) == PyUnicode_2BYTE_KIND ? \
    526                 ((const Py_UCS2 *)(PyUnicode_DATA((unicode))))[(index)] : \
    527                 ((const Py_UCS4 *)(PyUnicode_DATA((unicode))))[(index)] \
    528             ) \
    529         ))
    530 
    531 /* Returns the length of the unicode string. The caller has to make sure that
    532    the string has it's canonical representation set before calling
    533    this macro.  Call PyUnicode_(FAST_)Ready to ensure that. */
    534 #define PyUnicode_GET_LENGTH(op)                \
    535     (assert(PyUnicode_Check(op)),               \
    536      assert(PyUnicode_IS_READY(op)),            \
    537      ((PyASCIIObject *)(op))->length)
    538 
    539 
    540 /* Fast check to determine whether an object is ready. Equivalent to
    541    PyUnicode_IS_COMPACT(op) || ((PyUnicodeObject*)(op))->data.any) */
    542 
    543 #define PyUnicode_IS_READY(op) (((PyASCIIObject*)op)->state.ready)
    544 
    545 /* PyUnicode_READY() does less work than _PyUnicode_Ready() in the best
    546    case.  If the canonical representation is not yet set, it will still call
    547    _PyUnicode_Ready().
    548    Returns 0 on success and -1 on errors. */
    549 #define PyUnicode_READY(op)                        \
    550     (assert(PyUnicode_Check(op)),                       \
    551      (PyUnicode_IS_READY(op) ?                          \
    552       0 : _PyUnicode_Ready((PyObject *)(op))))
    553 
    554 /* Return a maximum character value which is suitable for creating another
    555    string based on op.  This is always an approximation but more efficient
    556    than iterating over the string. */
    557 #define PyUnicode_MAX_CHAR_VALUE(op) \
    558     (assert(PyUnicode_IS_READY(op)),                                    \
    559      (PyUnicode_IS_ASCII(op) ?                                          \
    560       (0x7f) :                                                          \
    561       (PyUnicode_KIND(op) == PyUnicode_1BYTE_KIND ?                     \
    562        (0xffU) :                                                        \
    563        (PyUnicode_KIND(op) == PyUnicode_2BYTE_KIND ?                    \
    564         (0xffffU) :                                                     \
    565         (0x10ffffU)))))
    566 
    567 #endif
    568 
    569 /* --- Constants ---------------------------------------------------------- */
    570 
    571 /* This Unicode character will be used as replacement character during
    572    decoding if the errors argument is set to "replace". Note: the
    573    Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
    574    Unicode 3.0. */
    575 
    576 #define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
    577 
    578 /* === Public API ========================================================= */
    579 
    580 /* --- Plain Py_UNICODE --------------------------------------------------- */
    581 
    582 /* With PEP 393, this is the recommended way to allocate a new unicode object.
    583    This function will allocate the object and its buffer in a single memory
    584    block.  Objects created using this function are not resizable. */
    585 #ifndef Py_LIMITED_API
    586 PyAPI_FUNC(PyObject*) PyUnicode_New(
    587     Py_ssize_t size,            /* Number of code points in the new string */
    588     Py_UCS4 maxchar             /* maximum code point value in the string */
    589     );
    590 #endif
    591 
    592 /* Initializes the canonical string representation from the deprecated
    593    wstr/Py_UNICODE representation. This function is used to convert Unicode
    594    objects which were created using the old API to the new flexible format
    595    introduced with PEP 393.
    596 
    597    Don't call this function directly, use the public PyUnicode_READY() macro
    598    instead. */
    599 #ifndef Py_LIMITED_API
    600 PyAPI_FUNC(int) _PyUnicode_Ready(
    601     PyObject *unicode           /* Unicode object */
    602     );
    603 #endif
    604 
    605 /* Get a copy of a Unicode string. */
    606 #ifndef Py_LIMITED_API
    607 PyAPI_FUNC(PyObject*) _PyUnicode_Copy(
    608     PyObject *unicode
    609     );
    610 #endif
    611 
    612 /* Copy character from one unicode object into another, this function performs
    613    character conversion when necessary and falls back to memcpy() if possible.
    614 
    615    Fail if to is too small (smaller than *how_many* or smaller than
    616    len(from)-from_start), or if kind(from[from_start:from_start+how_many]) >
    617    kind(to), or if *to* has more than 1 reference.
    618 
    619    Return the number of written character, or return -1 and raise an exception
    620    on error.
    621 
    622    Pseudo-code:
    623 
    624        how_many = min(how_many, len(from) - from_start)
    625        to[to_start:to_start+how_many] = from[from_start:from_start+how_many]
    626        return how_many
    627 
    628    Note: The function doesn't write a terminating null character.
    629    */
    630 #ifndef Py_LIMITED_API
    631 PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters(
    632     PyObject *to,
    633     Py_ssize_t to_start,
    634     PyObject *from,
    635     Py_ssize_t from_start,
    636     Py_ssize_t how_many
    637     );
    638 
    639 /* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so
    640    may crash if parameters are invalid (e.g. if the output string
    641    is too short). */
    642 PyAPI_FUNC(void) _PyUnicode_FastCopyCharacters(
    643     PyObject *to,
    644     Py_ssize_t to_start,
    645     PyObject *from,
    646     Py_ssize_t from_start,
    647     Py_ssize_t how_many
    648     );
    649 #endif
    650 
    651 #ifndef Py_LIMITED_API
    652 /* Fill a string with a character: write fill_char into
    653    unicode[start:start+length].
    654 
    655    Fail if fill_char is bigger than the string maximum character, or if the
    656    string has more than 1 reference.
    657 
    658    Return the number of written character, or return -1 and raise an exception
    659    on error. */
    660 PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill(
    661     PyObject *unicode,
    662     Py_ssize_t start,
    663     Py_ssize_t length,
    664     Py_UCS4 fill_char
    665     );
    666 
    667 /* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash
    668    if parameters are invalid (e.g. if length is longer than the string). */
    669 PyAPI_FUNC(void) _PyUnicode_FastFill(
    670     PyObject *unicode,
    671     Py_ssize_t start,
    672     Py_ssize_t length,
    673     Py_UCS4 fill_char
    674     );
    675 #endif
    676 
    677 /* Create a Unicode Object from the Py_UNICODE buffer u of the given
    678    size.
    679 
    680    u may be NULL which causes the contents to be undefined. It is the
    681    user's responsibility to fill in the needed data afterwards. Note
    682    that modifying the Unicode object contents after construction is
    683    only allowed if u was set to NULL.
    684 
    685    The buffer is copied into the new object. */
    686 
    687 #ifndef Py_LIMITED_API
    688 PyAPI_FUNC(PyObject*) PyUnicode_FromUnicode(
    689     const Py_UNICODE *u,        /* Unicode buffer */
    690     Py_ssize_t size             /* size of buffer */
    691     ) /* Py_DEPRECATED(3.3) */;
    692 #endif
    693 
    694 /* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
    695 PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
    696     const char *u,             /* UTF-8 encoded string */
    697     Py_ssize_t size            /* size of buffer */
    698     );
    699 
    700 /* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
    701    UTF-8 encoded bytes.  The size is determined with strlen(). */
    702 PyAPI_FUNC(PyObject*) PyUnicode_FromString(
    703     const char *u              /* UTF-8 encoded string */
    704     );
    705 
    706 #ifndef Py_LIMITED_API
    707 /* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters.
    708    Scan the string to find the maximum character. */
    709 PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData(
    710     int kind,
    711     const void *buffer,
    712     Py_ssize_t size);
    713 
    714 /* Create a new string from a buffer of ASCII characters.
    715    WARNING: Don't check if the string contains any non-ASCII character. */
    716 PyAPI_FUNC(PyObject*) _PyUnicode_FromASCII(
    717     const char *buffer,
    718     Py_ssize_t size);
    719 #endif
    720 
    721 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
    722 PyAPI_FUNC(PyObject*) PyUnicode_Substring(
    723     PyObject *str,
    724     Py_ssize_t start,
    725     Py_ssize_t end);
    726 #endif
    727 
    728 #ifndef Py_LIMITED_API
    729 /* Compute the maximum character of the substring unicode[start:end].
    730    Return 127 for an empty string. */
    731 PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
    732     PyObject *unicode,
    733     Py_ssize_t start,
    734     Py_ssize_t end);
    735 #endif
    736 
    737 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
    738 /* Copy the string into a UCS4 buffer including the null character if copy_null
    739    is set. Return NULL and raise an exception on error. Raise a SystemError if
    740    the buffer is smaller than the string. Return buffer on success.
    741 
    742    buflen is the length of the buffer in (Py_UCS4) characters. */
    743 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
    744     PyObject *unicode,
    745     Py_UCS4* buffer,
    746     Py_ssize_t buflen,
    747     int copy_null);
    748 
    749 /* Copy the string into a UCS4 buffer. A new buffer is allocated using
    750  * PyMem_Malloc; if this fails, NULL is returned with a memory error
    751    exception set. */
    752 PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode);
    753 #endif
    754 
    755 #ifndef Py_LIMITED_API
    756 /* Return a read-only pointer to the Unicode object's internal
    757    Py_UNICODE buffer.
    758    If the wchar_t/Py_UNICODE representation is not yet available, this
    759    function will calculate it. */
    760 
    761 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicode(
    762     PyObject *unicode           /* Unicode object */
    763     ) /* Py_DEPRECATED(3.3) */;
    764 
    765 /* Similar to PyUnicode_AsUnicode(), but raises a ValueError if the string
    766    contains null characters. */
    767 PyAPI_FUNC(const Py_UNICODE *) _PyUnicode_AsUnicode(
    768     PyObject *unicode           /* Unicode object */
    769     );
    770 
    771 /* Return a read-only pointer to the Unicode object's internal
    772    Py_UNICODE buffer and save the length at size.
    773    If the wchar_t/Py_UNICODE representation is not yet available, this
    774    function will calculate it. */
    775 
    776 PyAPI_FUNC(Py_UNICODE *) PyUnicode_AsUnicodeAndSize(
    777     PyObject *unicode,          /* Unicode object */
    778     Py_ssize_t *size            /* location where to save the length */
    779     )  /* Py_DEPRECATED(3.3) */;
    780 #endif
    781 
    782 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
    783 /* Get the length of the Unicode object. */
    784 
    785 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
    786     PyObject *unicode
    787 );
    788 #endif
    789 
    790 /* Get the number of Py_UNICODE units in the
    791    string representation. */
    792 
    793 PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
    794     PyObject *unicode           /* Unicode object */
    795     ) Py_DEPRECATED(3.3);
    796 
    797 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
    798 /* Read a character from the string. */
    799 
    800 PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
    801     PyObject *unicode,
    802     Py_ssize_t index
    803     );
    804 
    805 /* Write a character to the string. The string must have been created through
    806    PyUnicode_New, must not be shared, and must not have been hashed yet.
    807 
    808    Return 0 on success, -1 on error. */
    809 
    810 PyAPI_FUNC(int) PyUnicode_WriteChar(
    811     PyObject *unicode,
    812     Py_ssize_t index,
    813     Py_UCS4 character
    814     );
    815 #endif
    816 
    817 #ifndef Py_LIMITED_API
    818 /* Get the maximum ordinal for a Unicode character. */
    819 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void) Py_DEPRECATED(3.3);
    820 #endif
    821 
    822 /* Resize a Unicode object. The length is the number of characters, except
    823    if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length
    824    is the number of Py_UNICODE characters.
    825 
    826    *unicode is modified to point to the new (resized) object and 0
    827    returned on success.
    828 
    829    Try to resize the string in place (which is usually faster than allocating
    830    a new string and copy characters), or create a new string.
    831 
    832    Error handling is implemented as follows: an exception is set, -1
    833    is returned and *unicode left untouched.
    834 
    835    WARNING: The function doesn't check string content, the result may not be a
    836             string in canonical representation. */
    837 
    838 PyAPI_FUNC(int) PyUnicode_Resize(
    839     PyObject **unicode,         /* Pointer to the Unicode object */
    840     Py_ssize_t length           /* New length */
    841     );
    842 
    843 /* Decode obj to a Unicode object.
    844 
    845    bytes, bytearray and other bytes-like objects are decoded according to the
    846    given encoding and error handler. The encoding and error handler can be
    847    NULL to have the interface use UTF-8 and "strict".
    848 
    849    All other objects (including Unicode objects) raise an exception.
    850 
    851    The API returns NULL in case of an error. The caller is responsible
    852    for decref'ing the returned objects.
    853 
    854 */
    855 
    856 PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
    857     PyObject *obj,              /* Object */
    858     const char *encoding,       /* encoding */
    859     const char *errors          /* error handling */
    860     );
    861 
    862 /* Copy an instance of a Unicode subtype to a new true Unicode object if
    863    necessary. If obj is already a true Unicode object (not a subtype), return
    864    the reference with *incremented* refcount.
    865 
    866    The API returns NULL in case of an error. The caller is responsible
    867    for decref'ing the returned objects.
    868 
    869 */
    870 
    871 PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
    872     PyObject *obj      /* Object */
    873     );
    874 
    875 PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
    876     const char *format,   /* ASCII-encoded string  */
    877     va_list vargs
    878     );
    879 PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
    880     const char *format,   /* ASCII-encoded string  */
    881     ...
    882     );
    883 
    884 #ifndef Py_LIMITED_API
    885 typedef struct {
    886     PyObject *buffer;
    887     void *data;
    888     enum PyUnicode_Kind kind;
    889     Py_UCS4 maxchar;
    890     Py_ssize_t size;
    891     Py_ssize_t pos;
    892 
    893     /* minimum number of allocated characters (default: 0) */
    894     Py_ssize_t min_length;
    895 
    896     /* minimum character (default: 127, ASCII) */
    897     Py_UCS4 min_char;
    898 
    899     /* If non-zero, overallocate the buffer (default: 0). */
    900     unsigned char overallocate;
    901 
    902     /* If readonly is 1, buffer is a shared string (cannot be modified)
    903        and size is set to 0. */
    904     unsigned char readonly;
    905 } _PyUnicodeWriter ;
    906 
    907 /* Initialize a Unicode writer.
    908  *
    909  * By default, the minimum buffer size is 0 character and overallocation is
    910  * disabled. Set min_length, min_char and overallocate attributes to control
    911  * the allocation of the buffer. */
    912 PyAPI_FUNC(void)
    913 _PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
    914 
    915 /* Prepare the buffer to write 'length' characters
    916    with the specified maximum character.
    917 
    918    Return 0 on success, raise an exception and return -1 on error. */
    919 #define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR)             \
    920     (((MAXCHAR) <= (WRITER)->maxchar                                  \
    921       && (LENGTH) <= (WRITER)->size - (WRITER)->pos)                  \
    922      ? 0                                                              \
    923      : (((LENGTH) == 0)                                               \
    924         ? 0                                                           \
    925         : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
    926 
    927 /* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
    928    instead. */
    929 PyAPI_FUNC(int)
    930 _PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
    931                                  Py_ssize_t length, Py_UCS4 maxchar);
    932 
    933 /* Prepare the buffer to have at least the kind KIND.
    934    For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
    935    support characters in range U+000-U+FFFF.
    936 
    937    Return 0 on success, raise an exception and return -1 on error. */
    938 #define _PyUnicodeWriter_PrepareKind(WRITER, KIND)                    \
    939     (assert((KIND) != PyUnicode_WCHAR_KIND),                          \
    940      (KIND) <= (WRITER)->kind                                         \
    941      ? 0                                                              \
    942      : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
    943 
    944 /* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
    945    macro instead. */
    946 PyAPI_FUNC(int)
    947 _PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
    948                                      enum PyUnicode_Kind kind);
    949 
    950 /* Append a Unicode character.
    951    Return 0 on success, raise an exception and return -1 on error. */
    952 PyAPI_FUNC(int)
    953 _PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
    954     Py_UCS4 ch
    955     );
    956 
    957 /* Append a Unicode string.
    958    Return 0 on success, raise an exception and return -1 on error. */
    959 PyAPI_FUNC(int)
    960 _PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
    961     PyObject *str               /* Unicode string */
    962     );
    963 
    964 /* Append a substring of a Unicode string.
    965    Return 0 on success, raise an exception and return -1 on error. */
    966 PyAPI_FUNC(int)
    967 _PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
    968     PyObject *str,              /* Unicode string */
    969     Py_ssize_t start,
    970     Py_ssize_t end
    971     );
    972 
    973 /* Append an ASCII-encoded byte string.
    974    Return 0 on success, raise an exception and return -1 on error. */
    975 PyAPI_FUNC(int)
    976 _PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
    977     const char *str,           /* ASCII-encoded byte string */
    978     Py_ssize_t len             /* number of bytes, or -1 if unknown */
    979     );
    980 
    981 /* Append a latin1-encoded byte string.
    982    Return 0 on success, raise an exception and return -1 on error. */
    983 PyAPI_FUNC(int)
    984 _PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
    985     const char *str,           /* latin1-encoded byte string */
    986     Py_ssize_t len             /* length in bytes */
    987     );
    988 
    989 /* Get the value of the writer as a Unicode string. Clear the
    990    buffer of the writer. Raise an exception and return NULL
    991    on error. */
    992 PyAPI_FUNC(PyObject *)
    993 _PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
    994 
    995 /* Deallocate memory of a writer (clear its internal buffer). */
    996 PyAPI_FUNC(void)
    997 _PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
    998 #endif
    999 
   1000 #ifndef Py_LIMITED_API
   1001 /* Format the object based on the format_spec, as defined in PEP 3101
   1002    (Advanced String Formatting). */
   1003 PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
   1004     _PyUnicodeWriter *writer,
   1005     PyObject *obj,
   1006     PyObject *format_spec,
   1007     Py_ssize_t start,
   1008     Py_ssize_t end);
   1009 #endif
   1010 
   1011 PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
   1012 PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
   1013 PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
   1014     const char *u              /* UTF-8 encoded string */
   1015     );
   1016 #ifndef Py_LIMITED_API
   1017 PyAPI_FUNC(void) _Py_ReleaseInternedUnicodeStrings(void);
   1018 #endif
   1019 
   1020 /* Use only if you know it's a string */
   1021 #define PyUnicode_CHECK_INTERNED(op) \
   1022     (((PyASCIIObject *)(op))->state.interned)
   1023 
   1024 /* --- wchar_t support for platforms which support it --------------------- */
   1025 
   1026 #ifdef HAVE_WCHAR_H
   1027 
   1028 /* Create a Unicode Object from the wchar_t buffer w of the given
   1029    size.
   1030 
   1031    The buffer is copied into the new object. */
   1032 
   1033 PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
   1034     const wchar_t *w,           /* wchar_t buffer */
   1035     Py_ssize_t size             /* size of buffer */
   1036     );
   1037 
   1038 /* Copies the Unicode Object contents into the wchar_t buffer w.  At
   1039    most size wchar_t characters are copied.
   1040 
   1041    Note that the resulting wchar_t string may or may not be
   1042    0-terminated.  It is the responsibility of the caller to make sure
   1043    that the wchar_t string is 0-terminated in case this is required by
   1044    the application.
   1045 
   1046    Returns the number of wchar_t characters copied (excluding a
   1047    possibly trailing 0-termination character) or -1 in case of an
   1048    error. */
   1049 
   1050 PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
   1051     PyObject *unicode,          /* Unicode object */
   1052     wchar_t *w,                 /* wchar_t buffer */
   1053     Py_ssize_t size             /* size of buffer */
   1054     );
   1055 
   1056 /* Convert the Unicode object to a wide character string. The output string
   1057    always ends with a nul character. If size is not NULL, write the number of
   1058    wide characters (excluding the null character) into *size.
   1059 
   1060    Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it)
   1061    on success. On error, returns NULL, *size is undefined and raises a
   1062    MemoryError. */
   1063 
   1064 PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
   1065     PyObject *unicode,          /* Unicode object */
   1066     Py_ssize_t *size            /* number of characters of the result */
   1067     );
   1068 
   1069 #ifndef Py_LIMITED_API
   1070 PyAPI_FUNC(void*) _PyUnicode_AsKind(PyObject *s, unsigned int kind);
   1071 #endif
   1072 
   1073 #endif
   1074 
   1075 /* --- Unicode ordinals --------------------------------------------------- */
   1076 
   1077 /* Create a Unicode Object from the given Unicode code point ordinal.
   1078 
   1079    The ordinal must be in range(0x110000). A ValueError is
   1080    raised in case it is not.
   1081 
   1082 */
   1083 
   1084 PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
   1085 
   1086 /* --- Free-list management ----------------------------------------------- */
   1087 
   1088 /* Clear the free list used by the Unicode implementation.
   1089 
   1090    This can be used to release memory used for objects on the free
   1091    list back to the Python memory allocator.
   1092 
   1093 */
   1094 
   1095 PyAPI_FUNC(int) PyUnicode_ClearFreeList(void);
   1096 
   1097 /* === Builtin Codecs =====================================================
   1098 
   1099    Many of these APIs take two arguments encoding and errors. These
   1100    parameters encoding and errors have the same semantics as the ones
   1101    of the builtin str() API.
   1102 
   1103    Setting encoding to NULL causes the default encoding (UTF-8) to be used.
   1104 
   1105    Error handling is set by errors which may also be set to NULL
   1106    meaning to use the default handling defined for the codec. Default
   1107    error handling for all builtin codecs is "strict" (ValueErrors are
   1108    raised).
   1109 
   1110    The codecs all use a similar interface. Only deviation from the
   1111    generic ones are documented.
   1112 
   1113 */
   1114 
   1115 /* --- Manage the default encoding ---------------------------------------- */
   1116 
   1117 /* Returns a pointer to the default encoding (UTF-8) of the
   1118    Unicode object unicode and the size of the encoded representation
   1119    in bytes stored in *size.
   1120 
   1121    In case of an error, no *size is set.
   1122 
   1123    This function caches the UTF-8 encoded string in the unicodeobject
   1124    and subsequent calls will return the same string.  The memory is released
   1125    when the unicodeobject is deallocated.
   1126 
   1127    _PyUnicode_AsStringAndSize is a #define for PyUnicode_AsUTF8AndSize to
   1128    support the previous internal function with the same behaviour.
   1129 
   1130    *** This API is for interpreter INTERNAL USE ONLY and will likely
   1131    *** be removed or changed in the future.
   1132 
   1133    *** If you need to access the Unicode object as UTF-8 bytes string,
   1134    *** please use PyUnicode_AsUTF8String() instead.
   1135 */
   1136 
   1137 #ifndef Py_LIMITED_API
   1138 PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize(
   1139     PyObject *unicode,
   1140     Py_ssize_t *size);
   1141 #define _PyUnicode_AsStringAndSize PyUnicode_AsUTF8AndSize
   1142 #endif
   1143 
   1144 /* Returns a pointer to the default encoding (UTF-8) of the
   1145    Unicode object unicode.
   1146 
   1147    Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation
   1148    in the unicodeobject.
   1149 
   1150    _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to
   1151    support the previous internal function with the same behaviour.
   1152 
   1153    Use of this API is DEPRECATED since no size information can be
   1154    extracted from the returned data.
   1155 
   1156    *** This API is for interpreter INTERNAL USE ONLY and will likely
   1157    *** be removed or changed for Python 3.1.
   1158 
   1159    *** If you need to access the Unicode object as UTF-8 bytes string,
   1160    *** please use PyUnicode_AsUTF8String() instead.
   1161 
   1162 */
   1163 
   1164 #ifndef Py_LIMITED_API
   1165 PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode);
   1166 #define _PyUnicode_AsString PyUnicode_AsUTF8
   1167 #endif
   1168 
   1169 /* Returns "utf-8".  */
   1170 
   1171 PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
   1172 
   1173 /* --- Generic Codecs ----------------------------------------------------- */
   1174 
   1175 /* Create a Unicode object by decoding the encoded string s of the
   1176    given size. */
   1177 
   1178 PyAPI_FUNC(PyObject*) PyUnicode_Decode(
   1179     const char *s,              /* encoded string */
   1180     Py_ssize_t size,            /* size of buffer */
   1181     const char *encoding,       /* encoding */
   1182     const char *errors          /* error handling */
   1183     );
   1184 
   1185 /* Decode a Unicode object unicode and return the result as Python
   1186    object.
   1187 
   1188    This API is DEPRECATED. The only supported standard encoding is rot13.
   1189    Use PyCodec_Decode() to decode with rot13 and non-standard codecs
   1190    that decode from str. */
   1191 
   1192 PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
   1193     PyObject *unicode,          /* Unicode object */
   1194     const char *encoding,       /* encoding */
   1195     const char *errors          /* error handling */
   1196     ) Py_DEPRECATED(3.6);
   1197 
   1198 /* Decode a Unicode object unicode and return the result as Unicode
   1199    object.
   1200 
   1201    This API is DEPRECATED. The only supported standard encoding is rot13.
   1202    Use PyCodec_Decode() to decode with rot13 and non-standard codecs
   1203    that decode from str to str. */
   1204 
   1205 PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
   1206     PyObject *unicode,          /* Unicode object */
   1207     const char *encoding,       /* encoding */
   1208     const char *errors          /* error handling */
   1209     ) Py_DEPRECATED(3.6);
   1210 
   1211 /* Encodes a Py_UNICODE buffer of the given size and returns a
   1212    Python string object. */
   1213 
   1214 #ifndef Py_LIMITED_API
   1215 PyAPI_FUNC(PyObject*) PyUnicode_Encode(
   1216     const Py_UNICODE *s,        /* Unicode char buffer */
   1217     Py_ssize_t size,            /* number of Py_UNICODE chars to encode */
   1218     const char *encoding,       /* encoding */
   1219     const char *errors          /* error handling */
   1220     ) Py_DEPRECATED(3.3);
   1221 #endif
   1222 
   1223 /* Encodes a Unicode object and returns the result as Python
   1224    object.
   1225 
   1226    This API is DEPRECATED.  It is superseded by PyUnicode_AsEncodedString()
   1227    since all standard encodings (except rot13) encode str to bytes.
   1228    Use PyCodec_Encode() for encoding with rot13 and non-standard codecs
   1229    that encode form str to non-bytes. */
   1230 
   1231 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
   1232     PyObject *unicode,          /* Unicode object */
   1233     const char *encoding,       /* encoding */
   1234     const char *errors          /* error handling */
   1235     ) Py_DEPRECATED(3.6);
   1236 
   1237 /* Encodes a Unicode object and returns the result as Python string
   1238    object. */
   1239 
   1240 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
   1241     PyObject *unicode,          /* Unicode object */
   1242     const char *encoding,       /* encoding */
   1243     const char *errors          /* error handling */
   1244     );
   1245 
   1246 /* Encodes a Unicode object and returns the result as Unicode
   1247    object.
   1248 
   1249    This API is DEPRECATED.  The only supported standard encodings is rot13.
   1250    Use PyCodec_Encode() to encode with rot13 and non-standard codecs
   1251    that encode from str to str. */
   1252 
   1253 PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
   1254     PyObject *unicode,          /* Unicode object */
   1255     const char *encoding,       /* encoding */
   1256     const char *errors          /* error handling */
   1257     ) Py_DEPRECATED(3.6);
   1258 
   1259 /* Build an encoding map. */
   1260 
   1261 PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
   1262     PyObject* string            /* 256 character map */
   1263    );
   1264 
   1265 /* --- UTF-7 Codecs ------------------------------------------------------- */
   1266 
   1267 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
   1268     const char *string,         /* UTF-7 encoded string */
   1269     Py_ssize_t length,          /* size of string */
   1270     const char *errors          /* error handling */
   1271     );
   1272 
   1273 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
   1274     const char *string,         /* UTF-7 encoded string */
   1275     Py_ssize_t length,          /* size of string */
   1276     const char *errors,         /* error handling */
   1277     Py_ssize_t *consumed        /* bytes consumed */
   1278     );
   1279 
   1280 #ifndef Py_LIMITED_API
   1281 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF7(
   1282     const Py_UNICODE *data,     /* Unicode char buffer */
   1283     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
   1284     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
   1285     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
   1286     const char *errors          /* error handling */
   1287     ) Py_DEPRECATED(3.3);
   1288 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF7(
   1289     PyObject *unicode,          /* Unicode object */
   1290     int base64SetO,             /* Encode RFC2152 Set O characters in base64 */
   1291     int base64WhiteSpace,       /* Encode whitespace (sp, ht, nl, cr) in base64 */
   1292     const char *errors          /* error handling */
   1293     );
   1294 #endif
   1295 
   1296 /* --- UTF-8 Codecs ------------------------------------------------------- */
   1297 
   1298 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
   1299     const char *string,         /* UTF-8 encoded string */
   1300     Py_ssize_t length,          /* size of string */
   1301     const char *errors          /* error handling */
   1302     );
   1303 
   1304 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
   1305     const char *string,         /* UTF-8 encoded string */
   1306     Py_ssize_t length,          /* size of string */
   1307     const char *errors,         /* error handling */
   1308     Py_ssize_t *consumed        /* bytes consumed */
   1309     );
   1310 
   1311 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
   1312     PyObject *unicode           /* Unicode object */
   1313     );
   1314 
   1315 #ifndef Py_LIMITED_API
   1316 PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String(
   1317     PyObject *unicode,
   1318     const char *errors);
   1319 
   1320 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF8(
   1321     const Py_UNICODE *data,     /* Unicode char buffer */
   1322     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
   1323     const char *errors          /* error handling */
   1324     ) Py_DEPRECATED(3.3);
   1325 #endif
   1326 
   1327 /* --- UTF-32 Codecs ------------------------------------------------------ */
   1328 
   1329 /* Decodes length bytes from a UTF-32 encoded buffer string and returns
   1330    the corresponding Unicode object.
   1331 
   1332    errors (if non-NULL) defines the error handling. It defaults
   1333    to "strict".
   1334 
   1335    If byteorder is non-NULL, the decoder starts decoding using the
   1336    given byte order:
   1337 
   1338     *byteorder == -1: little endian
   1339     *byteorder == 0:  native order
   1340     *byteorder == 1:  big endian
   1341 
   1342    In native mode, the first four bytes of the stream are checked for a
   1343    BOM mark. If found, the BOM mark is analysed, the byte order
   1344    adjusted and the BOM skipped.  In the other modes, no BOM mark
   1345    interpretation is done. After completion, *byteorder is set to the
   1346    current byte order at the end of input data.
   1347 
   1348    If byteorder is NULL, the codec starts in native order mode.
   1349 
   1350 */
   1351 
   1352 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
   1353     const char *string,         /* UTF-32 encoded string */
   1354     Py_ssize_t length,          /* size of string */
   1355     const char *errors,         /* error handling */
   1356     int *byteorder              /* pointer to byteorder to use
   1357                                    0=native;-1=LE,1=BE; updated on
   1358                                    exit */
   1359     );
   1360 
   1361 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
   1362     const char *string,         /* UTF-32 encoded string */
   1363     Py_ssize_t length,          /* size of string */
   1364     const char *errors,         /* error handling */
   1365     int *byteorder,             /* pointer to byteorder to use
   1366                                    0=native;-1=LE,1=BE; updated on
   1367                                    exit */
   1368     Py_ssize_t *consumed        /* bytes consumed */
   1369     );
   1370 
   1371 /* Returns a Python string using the UTF-32 encoding in native byte
   1372    order. The string always starts with a BOM mark.  */
   1373 
   1374 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
   1375     PyObject *unicode           /* Unicode object */
   1376     );
   1377 
   1378 /* Returns a Python string object holding the UTF-32 encoded value of
   1379    the Unicode data.
   1380 
   1381    If byteorder is not 0, output is written according to the following
   1382    byte order:
   1383 
   1384    byteorder == -1: little endian
   1385    byteorder == 0:  native byte order (writes a BOM mark)
   1386    byteorder == 1:  big endian
   1387 
   1388    If byteorder is 0, the output string will always start with the
   1389    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
   1390    prepended.
   1391 
   1392 */
   1393 
   1394 #ifndef Py_LIMITED_API
   1395 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF32(
   1396     const Py_UNICODE *data,     /* Unicode char buffer */
   1397     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
   1398     const char *errors,         /* error handling */
   1399     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
   1400     ) Py_DEPRECATED(3.3);
   1401 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32(
   1402     PyObject *object,           /* Unicode object */
   1403     const char *errors,         /* error handling */
   1404     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
   1405     );
   1406 #endif
   1407 
   1408 /* --- UTF-16 Codecs ------------------------------------------------------ */
   1409 
   1410 /* Decodes length bytes from a UTF-16 encoded buffer string and returns
   1411    the corresponding Unicode object.
   1412 
   1413    errors (if non-NULL) defines the error handling. It defaults
   1414    to "strict".
   1415 
   1416    If byteorder is non-NULL, the decoder starts decoding using the
   1417    given byte order:
   1418 
   1419     *byteorder == -1: little endian
   1420     *byteorder == 0:  native order
   1421     *byteorder == 1:  big endian
   1422 
   1423    In native mode, the first two bytes of the stream are checked for a
   1424    BOM mark. If found, the BOM mark is analysed, the byte order
   1425    adjusted and the BOM skipped.  In the other modes, no BOM mark
   1426    interpretation is done. After completion, *byteorder is set to the
   1427    current byte order at the end of input data.
   1428 
   1429    If byteorder is NULL, the codec starts in native order mode.
   1430 
   1431 */
   1432 
   1433 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
   1434     const char *string,         /* UTF-16 encoded string */
   1435     Py_ssize_t length,          /* size of string */
   1436     const char *errors,         /* error handling */
   1437     int *byteorder              /* pointer to byteorder to use
   1438                                    0=native;-1=LE,1=BE; updated on
   1439                                    exit */
   1440     );
   1441 
   1442 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
   1443     const char *string,         /* UTF-16 encoded string */
   1444     Py_ssize_t length,          /* size of string */
   1445     const char *errors,         /* error handling */
   1446     int *byteorder,             /* pointer to byteorder to use
   1447                                    0=native;-1=LE,1=BE; updated on
   1448                                    exit */
   1449     Py_ssize_t *consumed        /* bytes consumed */
   1450     );
   1451 
   1452 /* Returns a Python string using the UTF-16 encoding in native byte
   1453    order. The string always starts with a BOM mark.  */
   1454 
   1455 PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
   1456     PyObject *unicode           /* Unicode object */
   1457     );
   1458 
   1459 /* Returns a Python string object holding the UTF-16 encoded value of
   1460    the Unicode data.
   1461 
   1462    If byteorder is not 0, output is written according to the following
   1463    byte order:
   1464 
   1465    byteorder == -1: little endian
   1466    byteorder == 0:  native byte order (writes a BOM mark)
   1467    byteorder == 1:  big endian
   1468 
   1469    If byteorder is 0, the output string will always start with the
   1470    Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
   1471    prepended.
   1472 
   1473    Note that Py_UNICODE data is being interpreted as UTF-16 reduced to
   1474    UCS-2. This trick makes it possible to add full UTF-16 capabilities
   1475    at a later point without compromising the APIs.
   1476 
   1477 */
   1478 
   1479 #ifndef Py_LIMITED_API
   1480 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUTF16(
   1481     const Py_UNICODE *data,     /* Unicode char buffer */
   1482     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
   1483     const char *errors,         /* error handling */
   1484     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
   1485     ) Py_DEPRECATED(3.3);
   1486 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16(
   1487     PyObject* unicode,          /* Unicode object */
   1488     const char *errors,         /* error handling */
   1489     int byteorder               /* byteorder to use 0=BOM+native;-1=LE,1=BE */
   1490     );
   1491 #endif
   1492 
   1493 /* --- Unicode-Escape Codecs ---------------------------------------------- */
   1494 
   1495 PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
   1496     const char *string,         /* Unicode-Escape encoded string */
   1497     Py_ssize_t length,          /* size of string */
   1498     const char *errors          /* error handling */
   1499     );
   1500 
   1501 #ifndef Py_LIMITED_API
   1502 /* Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape
   1503    chars. */
   1504 PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscape(
   1505         const char *string,     /* Unicode-Escape encoded string */
   1506         Py_ssize_t length,      /* size of string */
   1507         const char *errors,     /* error handling */
   1508         const char **first_invalid_escape  /* on return, points to first
   1509                                               invalid escaped char in
   1510                                               string. */
   1511 );
   1512 #endif
   1513 
   1514 PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
   1515     PyObject *unicode           /* Unicode object */
   1516     );
   1517 
   1518 #ifndef Py_LIMITED_API
   1519 PyAPI_FUNC(PyObject*) PyUnicode_EncodeUnicodeEscape(
   1520     const Py_UNICODE *data,     /* Unicode char buffer */
   1521     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
   1522     ) Py_DEPRECATED(3.3);
   1523 #endif
   1524 
   1525 /* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
   1526 
   1527 PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
   1528     const char *string,         /* Raw-Unicode-Escape encoded string */
   1529     Py_ssize_t length,          /* size of string */
   1530     const char *errors          /* error handling */
   1531     );
   1532 
   1533 PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
   1534     PyObject *unicode           /* Unicode object */
   1535     );
   1536 
   1537 #ifndef Py_LIMITED_API
   1538 PyAPI_FUNC(PyObject*) PyUnicode_EncodeRawUnicodeEscape(
   1539     const Py_UNICODE *data,     /* Unicode char buffer */
   1540     Py_ssize_t length           /* Number of Py_UNICODE chars to encode */
   1541     ) Py_DEPRECATED(3.3);
   1542 #endif
   1543 
   1544 /* --- Unicode Internal Codec ---------------------------------------------
   1545 
   1546     Only for internal use in _codecsmodule.c */
   1547 
   1548 #ifndef Py_LIMITED_API
   1549 PyObject *_PyUnicode_DecodeUnicodeInternal(
   1550     const char *string,
   1551     Py_ssize_t length,
   1552     const char *errors
   1553     );
   1554 #endif
   1555 
   1556 /* --- Latin-1 Codecs -----------------------------------------------------
   1557 
   1558    Note: Latin-1 corresponds to the first 256 Unicode ordinals.
   1559 
   1560 */
   1561 
   1562 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
   1563     const char *string,         /* Latin-1 encoded string */
   1564     Py_ssize_t length,          /* size of string */
   1565     const char *errors          /* error handling */
   1566     );
   1567 
   1568 PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
   1569     PyObject *unicode           /* Unicode object */
   1570     );
   1571 
   1572 #ifndef Py_LIMITED_API
   1573 PyAPI_FUNC(PyObject*) _PyUnicode_AsLatin1String(
   1574     PyObject* unicode,
   1575     const char* errors);
   1576 
   1577 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLatin1(
   1578     const Py_UNICODE *data,     /* Unicode char buffer */
   1579     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
   1580     const char *errors          /* error handling */
   1581     ) Py_DEPRECATED(3.3);
   1582 #endif
   1583 
   1584 /* --- ASCII Codecs -------------------------------------------------------
   1585 
   1586    Only 7-bit ASCII data is excepted. All other codes generate errors.
   1587 
   1588 */
   1589 
   1590 PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
   1591     const char *string,         /* ASCII encoded string */
   1592     Py_ssize_t length,          /* size of string */
   1593     const char *errors          /* error handling */
   1594     );
   1595 
   1596 PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
   1597     PyObject *unicode           /* Unicode object */
   1598     );
   1599 
   1600 #ifndef Py_LIMITED_API
   1601 PyAPI_FUNC(PyObject*) _PyUnicode_AsASCIIString(
   1602     PyObject* unicode,
   1603     const char* errors);
   1604 
   1605 PyAPI_FUNC(PyObject*) PyUnicode_EncodeASCII(
   1606     const Py_UNICODE *data,     /* Unicode char buffer */
   1607     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
   1608     const char *errors          /* error handling */
   1609     ) Py_DEPRECATED(3.3);
   1610 #endif
   1611 
   1612 /* --- Character Map Codecs -----------------------------------------------
   1613 
   1614    This codec uses mappings to encode and decode characters.
   1615 
   1616    Decoding mappings must map byte ordinals (integers in the range from 0 to
   1617    255) to Unicode strings, integers (which are then interpreted as Unicode
   1618    ordinals) or None.  Unmapped data bytes (ones which cause a LookupError)
   1619    as well as mapped to None, 0xFFFE or '\ufffe' are treated as "undefined
   1620    mapping" and cause an error.
   1621 
   1622    Encoding mappings must map Unicode ordinal integers to bytes objects,
   1623    integers in the range from 0 to 255 or None.  Unmapped character
   1624    ordinals (ones which cause a LookupError) as well as mapped to
   1625    None are treated as "undefined mapping" and cause an error.
   1626 
   1627 */
   1628 
   1629 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
   1630     const char *string,         /* Encoded string */
   1631     Py_ssize_t length,          /* size of string */
   1632     PyObject *mapping,          /* decoding mapping */
   1633     const char *errors          /* error handling */
   1634     );
   1635 
   1636 PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
   1637     PyObject *unicode,          /* Unicode object */
   1638     PyObject *mapping           /* encoding mapping */
   1639     );
   1640 
   1641 #ifndef Py_LIMITED_API
   1642 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCharmap(
   1643     const Py_UNICODE *data,     /* Unicode char buffer */
   1644     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
   1645     PyObject *mapping,          /* encoding mapping */
   1646     const char *errors          /* error handling */
   1647     ) Py_DEPRECATED(3.3);
   1648 PyAPI_FUNC(PyObject*) _PyUnicode_EncodeCharmap(
   1649     PyObject *unicode,          /* Unicode object */
   1650     PyObject *mapping,          /* encoding mapping */
   1651     const char *errors          /* error handling */
   1652     );
   1653 #endif
   1654 
   1655 /* Translate a Py_UNICODE buffer of the given length by applying a
   1656    character mapping table to it and return the resulting Unicode
   1657    object.
   1658 
   1659    The mapping table must map Unicode ordinal integers to Unicode strings,
   1660    Unicode ordinal integers or None (causing deletion of the character).
   1661 
   1662    Mapping tables may be dictionaries or sequences. Unmapped character
   1663    ordinals (ones which cause a LookupError) are left untouched and
   1664    are copied as-is.
   1665 
   1666 */
   1667 
   1668 #ifndef Py_LIMITED_API
   1669 PyAPI_FUNC(PyObject *) PyUnicode_TranslateCharmap(
   1670     const Py_UNICODE *data,     /* Unicode char buffer */
   1671     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
   1672     PyObject *table,            /* Translate table */
   1673     const char *errors          /* error handling */
   1674     ) Py_DEPRECATED(3.3);
   1675 #endif
   1676 
   1677 #ifdef MS_WINDOWS
   1678 
   1679 /* --- MBCS codecs for Windows -------------------------------------------- */
   1680 
   1681 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
   1682     const char *string,         /* MBCS encoded string */
   1683     Py_ssize_t length,          /* size of string */
   1684     const char *errors          /* error handling */
   1685     );
   1686 
   1687 PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
   1688     const char *string,         /* MBCS encoded string */
   1689     Py_ssize_t length,          /* size of string */
   1690     const char *errors,         /* error handling */
   1691     Py_ssize_t *consumed        /* bytes consumed */
   1692     );
   1693 
   1694 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
   1695 PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
   1696     int code_page,              /* code page number */
   1697     const char *string,         /* encoded string */
   1698     Py_ssize_t length,          /* size of string */
   1699     const char *errors,         /* error handling */
   1700     Py_ssize_t *consumed        /* bytes consumed */
   1701     );
   1702 #endif
   1703 
   1704 PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
   1705     PyObject *unicode           /* Unicode object */
   1706     );
   1707 
   1708 #ifndef Py_LIMITED_API
   1709 PyAPI_FUNC(PyObject*) PyUnicode_EncodeMBCS(
   1710     const Py_UNICODE *data,     /* Unicode char buffer */
   1711     Py_ssize_t length,          /* number of Py_UNICODE chars to encode */
   1712     const char *errors          /* error handling */
   1713     ) Py_DEPRECATED(3.3);
   1714 #endif
   1715 
   1716 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
   1717 PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
   1718     int code_page,              /* code page number */
   1719     PyObject *unicode,          /* Unicode object */
   1720     const char *errors          /* error handling */
   1721     );
   1722 #endif
   1723 
   1724 #endif /* MS_WINDOWS */
   1725 
   1726 #ifndef Py_LIMITED_API
   1727 /* --- Decimal Encoder ---------------------------------------------------- */
   1728 
   1729 /* Takes a Unicode string holding a decimal value and writes it into
   1730    an output buffer using standard ASCII digit codes.
   1731 
   1732    The output buffer has to provide at least length+1 bytes of storage
   1733    area. The output string is 0-terminated.
   1734 
   1735    The encoder converts whitespace to ' ', decimal characters to their
   1736    corresponding ASCII digit and all other Latin-1 characters except
   1737    \0 as-is. Characters outside this range (Unicode ordinals 1-256)
   1738    are treated as errors. This includes embedded NULL bytes.
   1739 
   1740    Error handling is defined by the errors argument:
   1741 
   1742       NULL or "strict": raise a ValueError
   1743       "ignore": ignore the wrong characters (these are not copied to the
   1744                 output buffer)
   1745       "replace": replaces illegal characters with '?'
   1746 
   1747    Returns 0 on success, -1 on failure.
   1748 
   1749 */
   1750 
   1751 PyAPI_FUNC(int) PyUnicode_EncodeDecimal(
   1752     Py_UNICODE *s,              /* Unicode buffer */
   1753     Py_ssize_t length,          /* Number of Py_UNICODE chars to encode */
   1754     char *output,               /* Output buffer; must have size >= length */
   1755     const char *errors          /* error handling */
   1756     ) /* Py_DEPRECATED(3.3) */;
   1757 
   1758 /* Transforms code points that have decimal digit property to the
   1759    corresponding ASCII digit code points.
   1760 
   1761    Returns a new Unicode string on success, NULL on failure.
   1762 */
   1763 
   1764 PyAPI_FUNC(PyObject*) PyUnicode_TransformDecimalToASCII(
   1765     Py_UNICODE *s,              /* Unicode buffer */
   1766     Py_ssize_t length           /* Number of Py_UNICODE chars to transform */
   1767     ) /* Py_DEPRECATED(3.3) */;
   1768 
   1769 /* Coverts a Unicode object holding a decimal value to an ASCII string
   1770    for using in int, float and complex parsers.
   1771    Transforms code points that have decimal digit property to the
   1772    corresponding ASCII digit code points.  Transforms spaces to ASCII.
   1773    Transforms code points starting from the first non-ASCII code point that
   1774    is neither a decimal digit nor a space to the end into '?'. */
   1775 
   1776 PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII(
   1777     PyObject *unicode           /* Unicode object */
   1778     );
   1779 #endif
   1780 
   1781 /* --- Locale encoding --------------------------------------------------- */
   1782 
   1783 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
   1784 /* Decode a string from the current locale encoding. The decoder is strict if
   1785    *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape'
   1786    error handler (PEP 383) to escape undecodable bytes. If a byte sequence can
   1787    be decoded as a surrogate character and *surrogateescape* is not equal to
   1788    zero, the byte sequence is escaped using the 'surrogateescape' error handler
   1789    instead of being decoded. *str* must end with a null character but cannot
   1790    contain embedded null characters. */
   1791 
   1792 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
   1793     const char *str,
   1794     Py_ssize_t len,
   1795     const char *errors);
   1796 
   1797 /* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
   1798    length using strlen(). */
   1799 
   1800 PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
   1801     const char *str,
   1802     const char *errors);
   1803 
   1804 /* Encode a Unicode object to the current locale encoding. The encoder is
   1805    strict is *surrogateescape* is equal to zero, otherwise the
   1806    "surrogateescape" error handler is used. Return a bytes object. The string
   1807    cannot contain embedded null characters. */
   1808 
   1809 PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
   1810     PyObject *unicode,
   1811     const char *errors
   1812     );
   1813 #endif
   1814 
   1815 /* --- File system encoding ---------------------------------------------- */
   1816 
   1817 /* ParseTuple converter: encode str objects to bytes using
   1818    PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
   1819 
   1820 PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*);
   1821 
   1822 /* ParseTuple converter: decode bytes objects to unicode using
   1823    PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
   1824 
   1825 PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*);
   1826 
   1827 /* Decode a null-terminated string using Py_FileSystemDefaultEncoding
   1828    and the "surrogateescape" error handler.
   1829 
   1830    If Py_FileSystemDefaultEncoding is not set, fall back to the locale
   1831    encoding.
   1832 
   1833    Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
   1834 */
   1835 
   1836 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
   1837     const char *s               /* encoded string */
   1838     );
   1839 
   1840 /* Decode a string using Py_FileSystemDefaultEncoding
   1841    and the "surrogateescape" error handler.
   1842 
   1843    If Py_FileSystemDefaultEncoding is not set, fall back to the locale
   1844    encoding.
   1845 */
   1846 
   1847 PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
   1848     const char *s,               /* encoded string */
   1849     Py_ssize_t size              /* size */
   1850     );
   1851 
   1852 /* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
   1853    "surrogateescape" error handler, and return bytes.
   1854 
   1855    If Py_FileSystemDefaultEncoding is not set, fall back to the locale
   1856    encoding.
   1857 */
   1858 
   1859 PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
   1860     PyObject *unicode
   1861     );
   1862 
   1863 /* --- Methods & Slots ----------------------------------------------------
   1864 
   1865    These are capable of handling Unicode objects and strings on input
   1866    (we refer to them as strings in the descriptions) and return
   1867    Unicode objects or integers as appropriate. */
   1868 
   1869 /* Concat two strings giving a new Unicode string. */
   1870 
   1871 PyAPI_FUNC(PyObject*) PyUnicode_Concat(
   1872     PyObject *left,             /* Left string */
   1873     PyObject *right             /* Right string */
   1874     );
   1875 
   1876 /* Concat two strings and put the result in *pleft
   1877    (sets *pleft to NULL on error) */
   1878 
   1879 PyAPI_FUNC(void) PyUnicode_Append(
   1880     PyObject **pleft,           /* Pointer to left string */
   1881     PyObject *right             /* Right string */
   1882     );
   1883 
   1884 /* Concat two strings, put the result in *pleft and drop the right object
   1885    (sets *pleft to NULL on error) */
   1886 
   1887 PyAPI_FUNC(void) PyUnicode_AppendAndDel(
   1888     PyObject **pleft,           /* Pointer to left string */
   1889     PyObject *right             /* Right string */
   1890     );
   1891 
   1892 /* Split a string giving a list of Unicode strings.
   1893 
   1894    If sep is NULL, splitting will be done at all whitespace
   1895    substrings. Otherwise, splits occur at the given separator.
   1896 
   1897    At most maxsplit splits will be done. If negative, no limit is set.
   1898 
   1899    Separators are not included in the resulting list.
   1900 
   1901 */
   1902 
   1903 PyAPI_FUNC(PyObject*) PyUnicode_Split(
   1904     PyObject *s,                /* String to split */
   1905     PyObject *sep,              /* String separator */
   1906     Py_ssize_t maxsplit         /* Maxsplit count */
   1907     );
   1908 
   1909 /* Dito, but split at line breaks.
   1910 
   1911    CRLF is considered to be one line break. Line breaks are not
   1912    included in the resulting list. */
   1913 
   1914 PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
   1915     PyObject *s,                /* String to split */
   1916     int keepends                /* If true, line end markers are included */
   1917     );
   1918 
   1919 /* Partition a string using a given separator. */
   1920 
   1921 PyAPI_FUNC(PyObject*) PyUnicode_Partition(
   1922     PyObject *s,                /* String to partition */
   1923     PyObject *sep               /* String separator */
   1924     );
   1925 
   1926 /* Partition a string using a given separator, searching from the end of the
   1927    string. */
   1928 
   1929 PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
   1930     PyObject *s,                /* String to partition */
   1931     PyObject *sep               /* String separator */
   1932     );
   1933 
   1934 /* Split a string giving a list of Unicode strings.
   1935 
   1936    If sep is NULL, splitting will be done at all whitespace
   1937    substrings. Otherwise, splits occur at the given separator.
   1938 
   1939    At most maxsplit splits will be done. But unlike PyUnicode_Split
   1940    PyUnicode_RSplit splits from the end of the string. If negative,
   1941    no limit is set.
   1942 
   1943    Separators are not included in the resulting list.
   1944 
   1945 */
   1946 
   1947 PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
   1948     PyObject *s,                /* String to split */
   1949     PyObject *sep,              /* String separator */
   1950     Py_ssize_t maxsplit         /* Maxsplit count */
   1951     );
   1952 
   1953 /* Translate a string by applying a character mapping table to it and
   1954    return the resulting Unicode object.
   1955 
   1956    The mapping table must map Unicode ordinal integers to Unicode strings,
   1957    Unicode ordinal integers or None (causing deletion of the character).
   1958 
   1959    Mapping tables may be dictionaries or sequences. Unmapped character
   1960    ordinals (ones which cause a LookupError) are left untouched and
   1961    are copied as-is.
   1962 
   1963 */
   1964 
   1965 PyAPI_FUNC(PyObject *) PyUnicode_Translate(
   1966     PyObject *str,              /* String */
   1967     PyObject *table,            /* Translate table */
   1968     const char *errors          /* error handling */
   1969     );
   1970 
   1971 /* Join a sequence of strings using the given separator and return
   1972    the resulting Unicode string. */
   1973 
   1974 PyAPI_FUNC(PyObject*) PyUnicode_Join(
   1975     PyObject *separator,        /* Separator string */
   1976     PyObject *seq               /* Sequence object */
   1977     );
   1978 
   1979 #ifndef Py_LIMITED_API
   1980 PyAPI_FUNC(PyObject *) _PyUnicode_JoinArray(
   1981     PyObject *separator,
   1982     PyObject *const *items,
   1983     Py_ssize_t seqlen
   1984     );
   1985 #endif /* Py_LIMITED_API */
   1986 
   1987 /* Return 1 if substr matches str[start:end] at the given tail end, 0
   1988    otherwise. */
   1989 
   1990 PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
   1991     PyObject *str,              /* String */
   1992     PyObject *substr,           /* Prefix or Suffix string */
   1993     Py_ssize_t start,           /* Start index */
   1994     Py_ssize_t end,             /* Stop index */
   1995     int direction               /* Tail end: -1 prefix, +1 suffix */
   1996     );
   1997 
   1998 /* Return the first position of substr in str[start:end] using the
   1999    given search direction or -1 if not found. -2 is returned in case
   2000    an error occurred and an exception is set. */
   2001 
   2002 PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
   2003     PyObject *str,              /* String */
   2004     PyObject *substr,           /* Substring to find */
   2005     Py_ssize_t start,           /* Start index */
   2006     Py_ssize_t end,             /* Stop index */
   2007     int direction               /* Find direction: +1 forward, -1 backward */
   2008     );
   2009 
   2010 #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000
   2011 /* Like PyUnicode_Find, but search for single character only. */
   2012 PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
   2013     PyObject *str,
   2014     Py_UCS4 ch,
   2015     Py_ssize_t start,
   2016     Py_ssize_t end,
   2017     int direction
   2018     );
   2019 #endif
   2020 
   2021 /* Count the number of occurrences of substr in str[start:end]. */
   2022 
   2023 PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
   2024     PyObject *str,              /* String */
   2025     PyObject *substr,           /* Substring to count */
   2026     Py_ssize_t start,           /* Start index */
   2027     Py_ssize_t end              /* Stop index */
   2028     );
   2029 
   2030 /* Replace at most maxcount occurrences of substr in str with replstr
   2031    and return the resulting Unicode object. */
   2032 
   2033 PyAPI_FUNC(PyObject *) PyUnicode_Replace(
   2034     PyObject *str,              /* String */
   2035     PyObject *substr,           /* Substring to find */
   2036     PyObject *replstr,          /* Substring to replace */
   2037     Py_ssize_t maxcount         /* Max. number of replacements to apply;
   2038                                    -1 = all */
   2039     );
   2040 
   2041 /* Compare two strings and return -1, 0, 1 for less than, equal,
   2042    greater than resp.
   2043    Raise an exception and return -1 on error. */
   2044 
   2045 PyAPI_FUNC(int) PyUnicode_Compare(
   2046     PyObject *left,             /* Left string */
   2047     PyObject *right             /* Right string */
   2048     );
   2049 
   2050 #ifndef Py_LIMITED_API
   2051 /* Test whether a unicode is equal to ASCII identifier.  Return 1 if true,
   2052    0 otherwise.  The right argument must be ASCII identifier.
   2053    Any error occurs inside will be cleared before return. */
   2054 
   2055 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIId(
   2056     PyObject *left,             /* Left string */
   2057     _Py_Identifier *right       /* Right identifier */
   2058     );
   2059 #endif
   2060 
   2061 /* Compare a Unicode object with C string and return -1, 0, 1 for less than,
   2062    equal, and greater than, respectively.  It is best to pass only
   2063    ASCII-encoded strings, but the function interprets the input string as
   2064    ISO-8859-1 if it contains non-ASCII characters.
   2065    This function does not raise exceptions. */
   2066 
   2067 PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
   2068     PyObject *left,
   2069     const char *right           /* ASCII-encoded string */
   2070     );
   2071 
   2072 #ifndef Py_LIMITED_API
   2073 /* Test whether a unicode is equal to ASCII string.  Return 1 if true,
   2074    0 otherwise.  The right argument must be ASCII-encoded string.
   2075    Any error occurs inside will be cleared before return. */
   2076 
   2077 PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString(
   2078     PyObject *left,
   2079     const char *right           /* ASCII-encoded string */
   2080     );
   2081 #endif
   2082 
   2083 /* Rich compare two strings and return one of the following:
   2084 
   2085    - NULL in case an exception was raised
   2086    - Py_True or Py_False for successful comparisons
   2087    - Py_NotImplemented in case the type combination is unknown
   2088 
   2089    Possible values for op:
   2090 
   2091      Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
   2092 
   2093 */
   2094 
   2095 PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
   2096     PyObject *left,             /* Left string */
   2097     PyObject *right,            /* Right string */
   2098     int op                      /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
   2099     );
   2100 
   2101 /* Apply an argument tuple or dictionary to a format string and return
   2102    the resulting Unicode string. */
   2103 
   2104 PyAPI_FUNC(PyObject *) PyUnicode_Format(
   2105     PyObject *format,           /* Format string */
   2106     PyObject *args              /* Argument tuple or dictionary */
   2107     );
   2108 
   2109 /* Checks whether element is contained in container and return 1/0
   2110    accordingly.
   2111 
   2112    element has to coerce to a one element Unicode string. -1 is
   2113    returned in case of an error. */
   2114 
   2115 PyAPI_FUNC(int) PyUnicode_Contains(
   2116     PyObject *container,        /* Container string */
   2117     PyObject *element           /* Element string */
   2118     );
   2119 
   2120 /* Checks whether argument is a valid identifier. */
   2121 
   2122 PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
   2123 
   2124 #ifndef Py_LIMITED_API
   2125 /* Externally visible for str.strip(unicode) */
   2126 PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
   2127     PyObject *self,
   2128     int striptype,
   2129     PyObject *sepobj
   2130     );
   2131 #endif
   2132 
   2133 /* Using explicit passed-in values, insert the thousands grouping
   2134    into the string pointed to by buffer.  For the argument descriptions,
   2135    see Objects/stringlib/localeutil.h */
   2136 #ifndef Py_LIMITED_API
   2137 PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
   2138     _PyUnicodeWriter *writer,
   2139     Py_ssize_t n_buffer,
   2140     PyObject *digits,
   2141     Py_ssize_t d_pos,
   2142     Py_ssize_t n_digits,
   2143     Py_ssize_t min_width,
   2144     const char *grouping,
   2145     PyObject *thousands_sep,
   2146     Py_UCS4 *maxchar);
   2147 #endif
   2148 /* === Characters Type APIs =============================================== */
   2149 
   2150 /* Helper array used by Py_UNICODE_ISSPACE(). */
   2151 
   2152 #ifndef Py_LIMITED_API
   2153 PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[];
   2154 
   2155 /* These should not be used directly. Use the Py_UNICODE_IS* and
   2156    Py_UNICODE_TO* macros instead.
   2157 
   2158    These APIs are implemented in Objects/unicodectype.c.
   2159 
   2160 */
   2161 
   2162 PyAPI_FUNC(int) _PyUnicode_IsLowercase(
   2163     Py_UCS4 ch       /* Unicode character */
   2164     );
   2165 
   2166 PyAPI_FUNC(int) _PyUnicode_IsUppercase(
   2167     Py_UCS4 ch       /* Unicode character */
   2168     );
   2169 
   2170 PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
   2171     Py_UCS4 ch       /* Unicode character */
   2172     );
   2173 
   2174 PyAPI_FUNC(int) _PyUnicode_IsXidStart(
   2175     Py_UCS4 ch       /* Unicode character */
   2176     );
   2177 
   2178 PyAPI_FUNC(int) _PyUnicode_IsXidContinue(
   2179     Py_UCS4 ch       /* Unicode character */
   2180     );
   2181 
   2182 PyAPI_FUNC(int) _PyUnicode_IsWhitespace(
   2183     const Py_UCS4 ch         /* Unicode character */
   2184     );
   2185 
   2186 PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
   2187     const Py_UCS4 ch         /* Unicode character */
   2188     );
   2189 
   2190 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
   2191     Py_UCS4 ch       /* Unicode character */
   2192     ) /* Py_DEPRECATED(3.3) */;
   2193 
   2194 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
   2195     Py_UCS4 ch       /* Unicode character */
   2196     ) /* Py_DEPRECATED(3.3) */;
   2197 
   2198 PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
   2199     Py_UCS4 ch       /* Unicode character */
   2200     ) Py_DEPRECATED(3.3);
   2201 
   2202 PyAPI_FUNC(int) _PyUnicode_ToLowerFull(
   2203     Py_UCS4 ch,       /* Unicode character */
   2204     Py_UCS4 *res
   2205     );
   2206 
   2207 PyAPI_FUNC(int) _PyUnicode_ToTitleFull(
   2208     Py_UCS4 ch,       /* Unicode character */
   2209     Py_UCS4 *res
   2210     );
   2211 
   2212 PyAPI_FUNC(int) _PyUnicode_ToUpperFull(
   2213     Py_UCS4 ch,       /* Unicode character */
   2214     Py_UCS4 *res
   2215     );
   2216 
   2217 PyAPI_FUNC(int) _PyUnicode_ToFoldedFull(
   2218     Py_UCS4 ch,       /* Unicode character */
   2219     Py_UCS4 *res
   2220     );
   2221 
   2222 PyAPI_FUNC(int) _PyUnicode_IsCaseIgnorable(
   2223     Py_UCS4 ch         /* Unicode character */
   2224     );
   2225 
   2226 PyAPI_FUNC(int) _PyUnicode_IsCased(
   2227     Py_UCS4 ch         /* Unicode character */
   2228     );
   2229 
   2230 PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit(
   2231     Py_UCS4 ch       /* Unicode character */
   2232     );
   2233 
   2234 PyAPI_FUNC(int) _PyUnicode_ToDigit(
   2235     Py_UCS4 ch       /* Unicode character */
   2236     );
   2237 
   2238 PyAPI_FUNC(double) _PyUnicode_ToNumeric(
   2239     Py_UCS4 ch       /* Unicode character */
   2240     );
   2241 
   2242 PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit(
   2243     Py_UCS4 ch       /* Unicode character */
   2244     );
   2245 
   2246 PyAPI_FUNC(int) _PyUnicode_IsDigit(
   2247     Py_UCS4 ch       /* Unicode character */
   2248     );
   2249 
   2250 PyAPI_FUNC(int) _PyUnicode_IsNumeric(
   2251     Py_UCS4 ch       /* Unicode character */
   2252     );
   2253 
   2254 PyAPI_FUNC(int) _PyUnicode_IsPrintable(
   2255     Py_UCS4 ch       /* Unicode character */
   2256     );
   2257 
   2258 PyAPI_FUNC(int) _PyUnicode_IsAlpha(
   2259     Py_UCS4 ch       /* Unicode character */
   2260     );
   2261 
   2262 PyAPI_FUNC(size_t) Py_UNICODE_strlen(
   2263     const Py_UNICODE *u
   2264     ) Py_DEPRECATED(3.3);
   2265 
   2266 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcpy(
   2267     Py_UNICODE *s1,
   2268     const Py_UNICODE *s2) Py_DEPRECATED(3.3);
   2269 
   2270 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strcat(
   2271     Py_UNICODE *s1, const Py_UNICODE *s2) Py_DEPRECATED(3.3);
   2272 
   2273 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strncpy(
   2274     Py_UNICODE *s1,
   2275     const Py_UNICODE *s2,
   2276     size_t n) Py_DEPRECATED(3.3);
   2277 
   2278 PyAPI_FUNC(int) Py_UNICODE_strcmp(
   2279     const Py_UNICODE *s1,
   2280     const Py_UNICODE *s2
   2281     ) Py_DEPRECATED(3.3);
   2282 
   2283 PyAPI_FUNC(int) Py_UNICODE_strncmp(
   2284     const Py_UNICODE *s1,
   2285     const Py_UNICODE *s2,
   2286     size_t n
   2287     ) Py_DEPRECATED(3.3);
   2288 
   2289 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strchr(
   2290     const Py_UNICODE *s,
   2291     Py_UNICODE c
   2292     ) Py_DEPRECATED(3.3);
   2293 
   2294 PyAPI_FUNC(Py_UNICODE*) Py_UNICODE_strrchr(
   2295     const Py_UNICODE *s,
   2296     Py_UNICODE c
   2297     ) Py_DEPRECATED(3.3);
   2298 
   2299 PyAPI_FUNC(PyObject*) _PyUnicode_FormatLong(PyObject *, int, int, int);
   2300 
   2301 /* Create a copy of a unicode string ending with a nul character. Return NULL
   2302    and raise a MemoryError exception on memory allocation failure, otherwise
   2303    return a new allocated buffer (use PyMem_Free() to free the buffer). */
   2304 
   2305 PyAPI_FUNC(Py_UNICODE*) PyUnicode_AsUnicodeCopy(
   2306     PyObject *unicode
   2307     ) Py_DEPRECATED(3.3);
   2308 #endif /* Py_LIMITED_API */
   2309 
   2310 #if defined(Py_DEBUG) && !defined(Py_LIMITED_API)
   2311 PyAPI_FUNC(int) _PyUnicode_CheckConsistency(
   2312     PyObject *op,
   2313     int check_content);
   2314 #elif !defined(NDEBUG)
   2315 /* For asserts that call _PyUnicode_CheckConsistency(), which would
   2316  * otherwise be a problem when building with asserts but without Py_DEBUG. */
   2317 #define _PyUnicode_CheckConsistency(op, check_content) PyUnicode_Check(op)
   2318 #endif
   2319 
   2320 #ifndef Py_LIMITED_API
   2321 /* Return an interned Unicode object for an Identifier; may fail if there is no memory.*/
   2322 PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*);
   2323 /* Clear all static strings. */
   2324 PyAPI_FUNC(void) _PyUnicode_ClearStaticStrings(void);
   2325 
   2326 /* Fast equality check when the inputs are known to be exact unicode types
   2327    and where the hash values are equal (i.e. a very probable match) */
   2328 PyAPI_FUNC(int) _PyUnicode_EQ(PyObject *, PyObject *);
   2329 #endif /* !Py_LIMITED_API */
   2330 
   2331 #ifdef __cplusplus
   2332 }
   2333 #endif
   2334 #endif /* !Py_UNICODEOBJECT_H */
   2335