Home | History | Annotate | Download | only in unicode
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 ******************************************************************************
      5 *
      6 *   Copyright (C) 1999-2015, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 ******************************************************************************
     10 *   file name:  umachine.h
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 1999sep13
     16 *   created by: Markus W. Scherer
     17 *
     18 *   This file defines basic types and constants for ICU to be
     19 *   platform-independent. umachine.h and utf.h are included into
     20 *   utypes.h to provide all the general definitions for ICU.
     21 *   All of these definitions used to be in utypes.h before
     22 *   the UTF-handling macros made this unmaintainable.
     23 */
     24 
     25 #ifndef __UMACHINE_H__
     26 #define __UMACHINE_H__
     27 
     28 
     29 /**
     30  * \file
     31  * \brief Basic types and constants for UTF
     32  *
     33  * <h2> Basic types and constants for UTF </h2>
     34  *   This file defines basic types and constants for utf.h to be
     35  *   platform-independent. umachine.h and utf.h are included into
     36  *   utypes.h to provide all the general definitions for ICU.
     37  *   All of these definitions used to be in utypes.h before
     38  *   the UTF-handling macros made this unmaintainable.
     39  *
     40  */
     41 /*==========================================================================*/
     42 /* Include platform-dependent definitions                                   */
     43 /* which are contained in the platform-specific file platform.h             */
     44 /*==========================================================================*/
     45 
     46 #include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
     47 
     48 /*
     49  * ANSI C headers:
     50  * stddef.h defines wchar_t
     51  */
     52 #include <stddef.h>
     53 
     54 /*==========================================================================*/
     55 /* For C wrappers, we use the symbol U_STABLE.                                */
     56 /* This works properly if the includer is C or C++.                         */
     57 /* Functions are declared   U_STABLE return-type U_EXPORT2 function-name()... */
     58 /*==========================================================================*/
     59 
     60 /**
     61  * \def U_CFUNC
     62  * This is used in a declaration of a library private ICU C function.
     63  * @stable ICU 2.4
     64  */
     65 
     66 /**
     67  * \def U_CDECL_BEGIN
     68  * This is used to begin a declaration of a library private ICU C API.
     69  * @stable ICU 2.4
     70  */
     71 
     72 /**
     73  * \def U_CDECL_END
     74  * This is used to end a declaration of a library private ICU C API
     75  * @stable ICU 2.4
     76  */
     77 
     78 #ifdef __cplusplus
     79 #   define U_CFUNC extern "C"
     80 #   define U_CDECL_BEGIN extern "C" {
     81 #   define U_CDECL_END   }
     82 #else
     83 #   define U_CFUNC extern
     84 #   define U_CDECL_BEGIN
     85 #   define U_CDECL_END
     86 #endif
     87 
     88 #ifndef U_ATTRIBUTE_DEPRECATED
     89 /**
     90  * \def U_ATTRIBUTE_DEPRECATED
     91  *  This is used for GCC specific attributes
     92  * @internal
     93  */
     94 #if U_GCC_MAJOR_MINOR >= 302
     95 #    define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
     96 /**
     97  * \def U_ATTRIBUTE_DEPRECATED
     98  * This is used for Visual C++ specific attributes
     99  * @internal
    100  */
    101 #elif defined(_MSC_VER) && (_MSC_VER >= 1400)
    102 #    define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
    103 #else
    104 #    define U_ATTRIBUTE_DEPRECATED
    105 #endif
    106 #endif
    107 
    108 /** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
    109 #define U_CAPI U_CFUNC U_EXPORT
    110 /** This is used to declare a function as a stable public ICU C API*/
    111 #define U_STABLE U_CAPI
    112 /** This is used to declare a function as a draft public ICU C API  */
    113 #define U_DRAFT  U_CAPI
    114 /** This is used to declare a function as a deprecated public ICU C API  */
    115 #define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
    116 /** This is used to declare a function as an obsolete public ICU C API  */
    117 #define U_OBSOLETE U_CAPI
    118 /** This is used to declare a function as an internal ICU C API  */
    119 #define U_INTERNAL U_CAPI
    120 
    121 /**
    122  * \def U_OVERRIDE
    123  * Defined to the C++11 "override" keyword if available.
    124  * Denotes a class or member which is an override of the base class.
    125  * May result in an error if it applied to something not an override.
    126  * @internal
    127  */
    128 
    129 /**
    130  * \def U_FINAL
    131  * Defined to the C++11 "final" keyword if available.
    132  * Denotes a class or member which may not be overridden in subclasses.
    133  * May result in an error if subclasses attempt to override.
    134  * @internal
    135  */
    136 
    137 #if U_CPLUSPLUS_VERSION >= 11
    138 /* C++11 */
    139 #ifndef U_OVERRIDE
    140 #define U_OVERRIDE override
    141 #endif
    142 #ifndef U_FINAL
    143 #define U_FINAL final
    144 #endif
    145 #else
    146 /* not C++11 - define to nothing */
    147 #ifndef U_OVERRIDE
    148 #define U_OVERRIDE
    149 #endif
    150 #ifndef U_FINAL
    151 #define U_FINAL
    152 #endif
    153 #endif
    154 
    155 /*==========================================================================*/
    156 /* limits for int32_t etc., like in POSIX inttypes.h                        */
    157 /*==========================================================================*/
    158 
    159 #ifndef INT8_MIN
    160 /** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
    161 #   define INT8_MIN        ((int8_t)(-128))
    162 #endif
    163 #ifndef INT16_MIN
    164 /** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
    165 #   define INT16_MIN       ((int16_t)(-32767-1))
    166 #endif
    167 #ifndef INT32_MIN
    168 /** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
    169 #   define INT32_MIN       ((int32_t)(-2147483647-1))
    170 #endif
    171 
    172 #ifndef INT8_MAX
    173 /** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
    174 #   define INT8_MAX        ((int8_t)(127))
    175 #endif
    176 #ifndef INT16_MAX
    177 /** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
    178 #   define INT16_MAX       ((int16_t)(32767))
    179 #endif
    180 #ifndef INT32_MAX
    181 /** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
    182 #   define INT32_MAX       ((int32_t)(2147483647))
    183 #endif
    184 
    185 #ifndef UINT8_MAX
    186 /** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
    187 #   define UINT8_MAX       ((uint8_t)(255U))
    188 #endif
    189 #ifndef UINT16_MAX
    190 /** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
    191 #   define UINT16_MAX      ((uint16_t)(65535U))
    192 #endif
    193 #ifndef UINT32_MAX
    194 /** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
    195 #   define UINT32_MAX      ((uint32_t)(4294967295U))
    196 #endif
    197 
    198 #if defined(U_INT64_T_UNAVAILABLE)
    199 # error int64_t is required for decimal format and rule-based number format.
    200 #else
    201 # ifndef INT64_C
    202 /**
    203  * Provides a platform independent way to specify a signed 64-bit integer constant.
    204  * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
    205  * @stable ICU 2.8
    206  */
    207 #   define INT64_C(c) c ## LL
    208 # endif
    209 # ifndef UINT64_C
    210 /**
    211  * Provides a platform independent way to specify an unsigned 64-bit integer constant.
    212  * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
    213  * @stable ICU 2.8
    214  */
    215 #   define UINT64_C(c) c ## ULL
    216 # endif
    217 # ifndef U_INT64_MIN
    218 /** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
    219 #     define U_INT64_MIN       ((int64_t)(INT64_C(-9223372036854775807)-1))
    220 # endif
    221 # ifndef U_INT64_MAX
    222 /** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
    223 #     define U_INT64_MAX       ((int64_t)(INT64_C(9223372036854775807)))
    224 # endif
    225 # ifndef U_UINT64_MAX
    226 /** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
    227 #     define U_UINT64_MAX      ((uint64_t)(UINT64_C(18446744073709551615)))
    228 # endif
    229 #endif
    230 
    231 /*==========================================================================*/
    232 /* Boolean data type                                                        */
    233 /*==========================================================================*/
    234 
    235 /** The ICU boolean type @stable ICU 2.0 */
    236 typedef int8_t UBool;
    237 
    238 #ifndef TRUE
    239 /** The TRUE value of a UBool @stable ICU 2.0 */
    240 #   define TRUE  1
    241 #endif
    242 #ifndef FALSE
    243 /** The FALSE value of a UBool @stable ICU 2.0 */
    244 #   define FALSE 0
    245 #endif
    246 
    247 
    248 /*==========================================================================*/
    249 /* Unicode data types                                                       */
    250 /*==========================================================================*/
    251 
    252 /* wchar_t-related definitions -------------------------------------------- */
    253 
    254 /*
    255  * \def U_WCHAR_IS_UTF16
    256  * Defined if wchar_t uses UTF-16.
    257  *
    258  * @stable ICU 2.0
    259  */
    260 /*
    261  * \def U_WCHAR_IS_UTF32
    262  * Defined if wchar_t uses UTF-32.
    263  *
    264  * @stable ICU 2.0
    265  */
    266 #if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
    267 #   ifdef __STDC_ISO_10646__
    268 #       if (U_SIZEOF_WCHAR_T==2)
    269 #           define U_WCHAR_IS_UTF16
    270 #       elif (U_SIZEOF_WCHAR_T==4)
    271 #           define  U_WCHAR_IS_UTF32
    272 #       endif
    273 #   elif defined __UCS2__
    274 #       if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
    275 #           define U_WCHAR_IS_UTF16
    276 #       endif
    277 #   elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
    278 #       if (U_SIZEOF_WCHAR_T==4)
    279 #           define U_WCHAR_IS_UTF32
    280 #       endif
    281 #   elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
    282 #       define U_WCHAR_IS_UTF32
    283 #   elif U_PLATFORM_HAS_WIN32_API
    284 #       define U_WCHAR_IS_UTF16
    285 #   endif
    286 #endif
    287 
    288 /* UChar and UChar32 definitions -------------------------------------------- */
    289 
    290 /** Number of bytes in a UChar. @stable ICU 2.0 */
    291 #define U_SIZEOF_UCHAR 2
    292 
    293 /**
    294  * \def U_CHAR16_IS_TYPEDEF
    295  * If 1, then char16_t is a typedef and not a real type (yet)
    296  * @internal
    297  */
    298 #if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
    299 // for AIX, uchar.h needs to be included
    300 # include <uchar.h>
    301 # define U_CHAR16_IS_TYPEDEF 1
    302 #else
    303 # define U_CHAR16_IS_TYPEDEF 0
    304 #endif
    305 
    306 
    307 /**
    308  * \var UChar
    309  *
    310  * The base type for UTF-16 code units and pointers.
    311  * Unsigned 16-bit integer.
    312  * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
    313  *
    314  * UChar is configurable by defining the macro UCHAR_TYPE
    315  * on the preprocessor or compiler command line:
    316  * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
    317  * (The UCHAR_TYPE can also be #defined earlier in this file, for outside the ICU library code.)
    318  * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
    319  *
    320  * The default is UChar=char16_t.
    321  *
    322  * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
    323  *
    324  * In C, char16_t is a simple typedef of uint_least16_t.
    325  * ICU requires uint_least16_t=uint16_t for data memory mapping.
    326  * On macOS, char16_t is not available because the uchar.h standard header is missing.
    327  *
    328  * @stable ICU 4.4
    329  */
    330 
    331 #if 1
    332     // #if 1 is normal. UChar defaults to char16_t in C++.
    333     // For configuration testing of UChar=uint16_t temporarily change this to #if 0.
    334     // The intltest Makefile #defines UCHAR_TYPE=char16_t,
    335     // so we only #define it to uint16_t if it is undefined so far.
    336 #elif !defined(UCHAR_TYPE)
    337 #   define UCHAR_TYPE uint16_t
    338 #endif
    339 
    340 #if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
    341         defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
    342     // Inside the ICU library code, never configurable.
    343     typedef char16_t UChar;
    344 #elif defined(UCHAR_TYPE)
    345     typedef UCHAR_TYPE UChar;
    346 #elif defined(__cplusplus)
    347     typedef char16_t UChar;
    348 #else
    349     typedef uint16_t UChar;
    350 #endif
    351 
    352 /**
    353  * \var OldUChar
    354  * Default ICU 58 definition of UChar.
    355  * A base type for UTF-16 code units and pointers.
    356  * Unsigned 16-bit integer.
    357  *
    358  * Define OldUChar to be wchar_t if that is 16 bits wide.
    359  * If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
    360  *
    361  * This makes the definition of OldUChar platform-dependent
    362  * but allows direct string type compatibility with platforms with
    363  * 16-bit wchar_t types.
    364  *
    365  * This is how UChar was defined in ICU 58, for transition convenience.
    366  * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
    367  * The current UChar responds to UCHAR_TYPE but OldUChar does not.
    368  *
    369  * @draft ICU 59
    370  */
    371 #if U_SIZEOF_WCHAR_T==2
    372     typedef wchar_t OldUChar;
    373 #elif defined(__CHAR16_TYPE__)
    374     typedef __CHAR16_TYPE__ OldUChar;
    375 #else
    376     typedef uint16_t OldUChar;
    377 #endif
    378 
    379 /**
    380  * Define UChar32 as a type for single Unicode code points.
    381  * UChar32 is a signed 32-bit integer (same as int32_t).
    382  *
    383  * The Unicode code point range is 0..0x10ffff.
    384  * All other values (negative or >=0x110000) are illegal as Unicode code points.
    385  * They may be used as sentinel values to indicate "done", "error"
    386  * or similar non-code point conditions.
    387  *
    388  * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
    389  * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
    390  * or else to be uint32_t.
    391  * That is, the definition of UChar32 was platform-dependent.
    392  *
    393  * @see U_SENTINEL
    394  * @stable ICU 2.4
    395  */
    396 typedef int32_t UChar32;
    397 
    398 /**
    399  * This value is intended for sentinel values for APIs that
    400  * (take or) return single code points (UChar32).
    401  * It is outside of the Unicode code point range 0..0x10ffff.
    402  *
    403  * For example, a "done" or "error" value in a new API
    404  * could be indicated with U_SENTINEL.
    405  *
    406  * ICU APIs designed before ICU 2.4 usually define service-specific "done"
    407  * values, mostly 0xffff.
    408  * Those may need to be distinguished from
    409  * actual U+ffff text contents by calling functions like
    410  * CharacterIterator::hasNext() or UnicodeString::length().
    411  *
    412  * @return -1
    413  * @see UChar32
    414  * @stable ICU 2.4
    415  */
    416 #define U_SENTINEL (-1)
    417 
    418 #include "unicode/urename.h"
    419 
    420 #endif
    421