Home | History | Annotate | Download | only in unicode
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 1999-2012, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  utf16.h
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 1999sep09
     16 *   created by: Markus W. Scherer
     17 */
     18 
     19 /**
     20  * \file
     21  * \brief C API: 16-bit Unicode handling macros
     22  *
     23  * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
     24  *
     25  * For more information see utf.h and the ICU User Guide Strings chapter
     26  * (http://userguide.icu-project.org/strings).
     27  *
     28  * <em>Usage:</em>
     29  * ICU coding guidelines for if() statements should be followed when using these macros.
     30  * Compound statements (curly braces {}) must be used  for if-else-while...
     31  * bodies and all macro statements should be terminated with semicolon.
     32  */
     33 
     34 #ifndef __UTF16_H__
     35 #define __UTF16_H__
     36 
     37 #include "unicode/umachine.h"
     38 #ifndef __UTF_H__
     39 #   include "unicode/utf.h"
     40 #endif
     41 
     42 /* single-code point definitions -------------------------------------------- */
     43 
     44 /**
     45  * Does this code unit alone encode a code point (BMP, not a surrogate)?
     46  * @param c 16-bit code unit
     47  * @return TRUE or FALSE
     48  * @stable ICU 2.4
     49  */
     50 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
     51 
     52 /**
     53  * Is this code unit a lead surrogate (U+d800..U+dbff)?
     54  * @param c 16-bit code unit
     55  * @return TRUE or FALSE
     56  * @stable ICU 2.4
     57  */
     58 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
     59 
     60 /**
     61  * Is this code unit a trail surrogate (U+dc00..U+dfff)?
     62  * @param c 16-bit code unit
     63  * @return TRUE or FALSE
     64  * @stable ICU 2.4
     65  */
     66 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
     67 
     68 /**
     69  * Is this code unit a surrogate (U+d800..U+dfff)?
     70  * @param c 16-bit code unit
     71  * @return TRUE or FALSE
     72  * @stable ICU 2.4
     73  */
     74 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
     75 
     76 /**
     77  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
     78  * is it a lead surrogate?
     79  * @param c 16-bit code unit
     80  * @return TRUE or FALSE
     81  * @stable ICU 2.4
     82  */
     83 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
     84 
     85 /**
     86  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
     87  * is it a trail surrogate?
     88  * @param c 16-bit code unit
     89  * @return TRUE or FALSE
     90  * @stable ICU 4.2
     91  */
     92 #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
     93 
     94 /**
     95  * Helper constant for U16_GET_SUPPLEMENTARY.
     96  * @internal
     97  */
     98 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
     99 
    100 /**
    101  * Get a supplementary code point value (U+10000..U+10ffff)
    102  * from its lead and trail surrogates.
    103  * The result is undefined if the input values are not
    104  * lead and trail surrogates.
    105  *
    106  * @param lead lead surrogate (U+d800..U+dbff)
    107  * @param trail trail surrogate (U+dc00..U+dfff)
    108  * @return supplementary code point (U+10000..U+10ffff)
    109  * @stable ICU 2.4
    110  */
    111 #define U16_GET_SUPPLEMENTARY(lead, trail) \
    112     (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
    113 
    114 
    115 /**
    116  * Get the lead surrogate (0xd800..0xdbff) for a
    117  * supplementary code point (0x10000..0x10ffff).
    118  * @param supplementary 32-bit code point (U+10000..U+10ffff)
    119  * @return lead surrogate (U+d800..U+dbff) for supplementary
    120  * @stable ICU 2.4
    121  */
    122 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
    123 
    124 /**
    125  * Get the trail surrogate (0xdc00..0xdfff) for a
    126  * supplementary code point (0x10000..0x10ffff).
    127  * @param supplementary 32-bit code point (U+10000..U+10ffff)
    128  * @return trail surrogate (U+dc00..U+dfff) for supplementary
    129  * @stable ICU 2.4
    130  */
    131 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
    132 
    133 /**
    134  * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
    135  * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
    136  * @param c 32-bit code point
    137  * @return 1 or 2
    138  * @stable ICU 2.4
    139  */
    140 #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
    141 
    142 /**
    143  * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
    144  * @return 2
    145  * @stable ICU 2.4
    146  */
    147 #define U16_MAX_LENGTH 2
    148 
    149 /**
    150  * Get a code point from a string at a random-access offset,
    151  * without changing the offset.
    152  * "Unsafe" macro, assumes well-formed UTF-16.
    153  *
    154  * The offset may point to either the lead or trail surrogate unit
    155  * for a supplementary code point, in which case the macro will read
    156  * the adjacent matching surrogate as well.
    157  * The result is undefined if the offset points to a single, unpaired surrogate.
    158  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
    159  *
    160  * @param s const UChar * string
    161  * @param i string offset
    162  * @param c output UChar32 variable
    163  * @see U16_GET
    164  * @stable ICU 2.4
    165  */
    166 #define U16_GET_UNSAFE(s, i, c) { \
    167     (c)=(s)[i]; \
    168     if(U16_IS_SURROGATE(c)) { \
    169         if(U16_IS_SURROGATE_LEAD(c)) { \
    170             (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
    171         } else { \
    172             (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
    173         } \
    174     } \
    175 }
    176 
    177 /**
    178  * Get a code point from a string at a random-access offset,
    179  * without changing the offset.
    180  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    181  *
    182  * The offset may point to either the lead or trail surrogate unit
    183  * for a supplementary code point, in which case the macro will read
    184  * the adjacent matching surrogate as well.
    185  *
    186  * The length can be negative for a NUL-terminated string.
    187  *
    188  * If the offset points to a single, unpaired surrogate, then
    189  * c is set to that unpaired surrogate.
    190  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
    191  *
    192  * @param s const UChar * string
    193  * @param start starting string offset (usually 0)
    194  * @param i string offset, must be start<=i<length
    195  * @param length string length
    196  * @param c output UChar32 variable
    197  * @see U16_GET_UNSAFE
    198  * @stable ICU 2.4
    199  */
    200 #define U16_GET(s, start, i, length, c) { \
    201     (c)=(s)[i]; \
    202     if(U16_IS_SURROGATE(c)) { \
    203         uint16_t __c2; \
    204         if(U16_IS_SURROGATE_LEAD(c)) { \
    205             if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
    206                 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
    207             } \
    208         } else { \
    209             if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
    210                 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
    211             } \
    212         } \
    213     } \
    214 }
    215 
    216 #ifndef U_HIDE_DRAFT_API
    217 
    218 /**
    219  * Get a code point from a string at a random-access offset,
    220  * without changing the offset.
    221  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    222  *
    223  * The offset may point to either the lead or trail surrogate unit
    224  * for a supplementary code point, in which case the macro will read
    225  * the adjacent matching surrogate as well.
    226  *
    227  * The length can be negative for a NUL-terminated string.
    228  *
    229  * If the offset points to a single, unpaired surrogate, then
    230  * c is set to U+FFFD.
    231  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
    232  *
    233  * @param s const UChar * string
    234  * @param start starting string offset (usually 0)
    235  * @param i string offset, must be start<=i<length
    236  * @param length string length
    237  * @param c output UChar32 variable
    238  * @see U16_GET_UNSAFE
    239  * @draft ICU 60
    240  */
    241 #define U16_GET_OR_FFFD(s, start, i, length, c) { \
    242     (c)=(s)[i]; \
    243     if(U16_IS_SURROGATE(c)) { \
    244         uint16_t __c2; \
    245         if(U16_IS_SURROGATE_LEAD(c)) { \
    246             if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
    247                 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
    248             } else { \
    249                 (c)=0xfffd; \
    250             } \
    251         } else { \
    252             if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
    253                 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
    254             } else { \
    255                 (c)=0xfffd; \
    256             } \
    257         } \
    258     } \
    259 }
    260 
    261 #endif  // U_HIDE_DRAFT_API
    262 
    263 /* definitions with forward iteration --------------------------------------- */
    264 
    265 /**
    266  * Get a code point from a string at a code point boundary offset,
    267  * and advance the offset to the next code point boundary.
    268  * (Post-incrementing forward iteration.)
    269  * "Unsafe" macro, assumes well-formed UTF-16.
    270  *
    271  * The offset may point to the lead surrogate unit
    272  * for a supplementary code point, in which case the macro will read
    273  * the following trail surrogate as well.
    274  * If the offset points to a trail surrogate, then that itself
    275  * will be returned as the code point.
    276  * The result is undefined if the offset points to a single, unpaired lead surrogate.
    277  *
    278  * @param s const UChar * string
    279  * @param i string offset
    280  * @param c output UChar32 variable
    281  * @see U16_NEXT
    282  * @stable ICU 2.4
    283  */
    284 #define U16_NEXT_UNSAFE(s, i, c) { \
    285     (c)=(s)[(i)++]; \
    286     if(U16_IS_LEAD(c)) { \
    287         (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
    288     } \
    289 }
    290 
    291 /**
    292  * Get a code point from a string at a code point boundary offset,
    293  * and advance the offset to the next code point boundary.
    294  * (Post-incrementing forward iteration.)
    295  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    296  *
    297  * The length can be negative for a NUL-terminated string.
    298  *
    299  * The offset may point to the lead surrogate unit
    300  * for a supplementary code point, in which case the macro will read
    301  * the following trail surrogate as well.
    302  * If the offset points to a trail surrogate or
    303  * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
    304  *
    305  * @param s const UChar * string
    306  * @param i string offset, must be i<length
    307  * @param length string length
    308  * @param c output UChar32 variable
    309  * @see U16_NEXT_UNSAFE
    310  * @stable ICU 2.4
    311  */
    312 #define U16_NEXT(s, i, length, c) { \
    313     (c)=(s)[(i)++]; \
    314     if(U16_IS_LEAD(c)) { \
    315         uint16_t __c2; \
    316         if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
    317             ++(i); \
    318             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
    319         } \
    320     } \
    321 }
    322 
    323 #ifndef U_HIDE_DRAFT_API
    324 
    325 /**
    326  * Get a code point from a string at a code point boundary offset,
    327  * and advance the offset to the next code point boundary.
    328  * (Post-incrementing forward iteration.)
    329  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    330  *
    331  * The length can be negative for a NUL-terminated string.
    332  *
    333  * The offset may point to the lead surrogate unit
    334  * for a supplementary code point, in which case the macro will read
    335  * the following trail surrogate as well.
    336  * If the offset points to a trail surrogate or
    337  * to a single, unpaired lead surrogate, then c is set to U+FFFD.
    338  *
    339  * @param s const UChar * string
    340  * @param i string offset, must be i<length
    341  * @param length string length
    342  * @param c output UChar32 variable
    343  * @see U16_NEXT_UNSAFE
    344  * @draft ICU 60
    345  */
    346 #define U16_NEXT_OR_FFFD(s, i, length, c) { \
    347     (c)=(s)[(i)++]; \
    348     if(U16_IS_SURROGATE(c)) { \
    349         uint16_t __c2; \
    350         if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
    351             ++(i); \
    352             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
    353         } else { \
    354             (c)=0xfffd; \
    355         } \
    356     } \
    357 }
    358 
    359 #endif  // U_HIDE_DRAFT_API
    360 
    361 /**
    362  * Append a code point to a string, overwriting 1 or 2 code units.
    363  * The offset points to the current end of the string contents
    364  * and is advanced (post-increment).
    365  * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
    366  * Otherwise, the result is undefined.
    367  *
    368  * @param s const UChar * string buffer
    369  * @param i string offset
    370  * @param c code point to append
    371  * @see U16_APPEND
    372  * @stable ICU 2.4
    373  */
    374 #define U16_APPEND_UNSAFE(s, i, c) { \
    375     if((uint32_t)(c)<=0xffff) { \
    376         (s)[(i)++]=(uint16_t)(c); \
    377     } else { \
    378         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
    379         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
    380     } \
    381 }
    382 
    383 /**
    384  * Append a code point to a string, overwriting 1 or 2 code units.
    385  * The offset points to the current end of the string contents
    386  * and is advanced (post-increment).
    387  * "Safe" macro, checks for a valid code point.
    388  * If a surrogate pair is written, checks for sufficient space in the string.
    389  * If the code point is not valid or a trail surrogate does not fit,
    390  * then isError is set to TRUE.
    391  *
    392  * @param s const UChar * string buffer
    393  * @param i string offset, must be i<capacity
    394  * @param capacity size of the string buffer
    395  * @param c code point to append
    396  * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
    397  * @see U16_APPEND_UNSAFE
    398  * @stable ICU 2.4
    399  */
    400 #define U16_APPEND(s, i, capacity, c, isError) { \
    401     if((uint32_t)(c)<=0xffff) { \
    402         (s)[(i)++]=(uint16_t)(c); \
    403     } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
    404         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
    405         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
    406     } else /* c>0x10ffff or not enough space */ { \
    407         (isError)=TRUE; \
    408     } \
    409 }
    410 
    411 /**
    412  * Advance the string offset from one code point boundary to the next.
    413  * (Post-incrementing iteration.)
    414  * "Unsafe" macro, assumes well-formed UTF-16.
    415  *
    416  * @param s const UChar * string
    417  * @param i string offset
    418  * @see U16_FWD_1
    419  * @stable ICU 2.4
    420  */
    421 #define U16_FWD_1_UNSAFE(s, i) { \
    422     if(U16_IS_LEAD((s)[(i)++])) { \
    423         ++(i); \
    424     } \
    425 }
    426 
    427 /**
    428  * Advance the string offset from one code point boundary to the next.
    429  * (Post-incrementing iteration.)
    430  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    431  *
    432  * The length can be negative for a NUL-terminated string.
    433  *
    434  * @param s const UChar * string
    435  * @param i string offset, must be i<length
    436  * @param length string length
    437  * @see U16_FWD_1_UNSAFE
    438  * @stable ICU 2.4
    439  */
    440 #define U16_FWD_1(s, i, length) { \
    441     if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
    442         ++(i); \
    443     } \
    444 }
    445 
    446 /**
    447  * Advance the string offset from one code point boundary to the n-th next one,
    448  * i.e., move forward by n code points.
    449  * (Post-incrementing iteration.)
    450  * "Unsafe" macro, assumes well-formed UTF-16.
    451  *
    452  * @param s const UChar * string
    453  * @param i string offset
    454  * @param n number of code points to skip
    455  * @see U16_FWD_N
    456  * @stable ICU 2.4
    457  */
    458 #define U16_FWD_N_UNSAFE(s, i, n) { \
    459     int32_t __N=(n); \
    460     while(__N>0) { \
    461         U16_FWD_1_UNSAFE(s, i); \
    462         --__N; \
    463     } \
    464 }
    465 
    466 /**
    467  * Advance the string offset from one code point boundary to the n-th next one,
    468  * i.e., move forward by n code points.
    469  * (Post-incrementing iteration.)
    470  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    471  *
    472  * The length can be negative for a NUL-terminated string.
    473  *
    474  * @param s const UChar * string
    475  * @param i int32_t string offset, must be i<length
    476  * @param length int32_t string length
    477  * @param n number of code points to skip
    478  * @see U16_FWD_N_UNSAFE
    479  * @stable ICU 2.4
    480  */
    481 #define U16_FWD_N(s, i, length, n) { \
    482     int32_t __N=(n); \
    483     while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
    484         U16_FWD_1(s, i, length); \
    485         --__N; \
    486     } \
    487 }
    488 
    489 /**
    490  * Adjust a random-access offset to a code point boundary
    491  * at the start of a code point.
    492  * If the offset points to the trail surrogate of a surrogate pair,
    493  * then the offset is decremented.
    494  * Otherwise, it is not modified.
    495  * "Unsafe" macro, assumes well-formed UTF-16.
    496  *
    497  * @param s const UChar * string
    498  * @param i string offset
    499  * @see U16_SET_CP_START
    500  * @stable ICU 2.4
    501  */
    502 #define U16_SET_CP_START_UNSAFE(s, i) { \
    503     if(U16_IS_TRAIL((s)[i])) { \
    504         --(i); \
    505     } \
    506 }
    507 
    508 /**
    509  * Adjust a random-access offset to a code point boundary
    510  * at the start of a code point.
    511  * If the offset points to the trail surrogate of a surrogate pair,
    512  * then the offset is decremented.
    513  * Otherwise, it is not modified.
    514  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    515  *
    516  * @param s const UChar * string
    517  * @param start starting string offset (usually 0)
    518  * @param i string offset, must be start<=i
    519  * @see U16_SET_CP_START_UNSAFE
    520  * @stable ICU 2.4
    521  */
    522 #define U16_SET_CP_START(s, start, i) { \
    523     if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
    524         --(i); \
    525     } \
    526 }
    527 
    528 /* definitions with backward iteration -------------------------------------- */
    529 
    530 /**
    531  * Move the string offset from one code point boundary to the previous one
    532  * and get the code point between them.
    533  * (Pre-decrementing backward iteration.)
    534  * "Unsafe" macro, assumes well-formed UTF-16.
    535  *
    536  * The input offset may be the same as the string length.
    537  * If the offset is behind a trail surrogate unit
    538  * for a supplementary code point, then the macro will read
    539  * the preceding lead surrogate as well.
    540  * If the offset is behind a lead surrogate, then that itself
    541  * will be returned as the code point.
    542  * The result is undefined if the offset is behind a single, unpaired trail surrogate.
    543  *
    544  * @param s const UChar * string
    545  * @param i string offset
    546  * @param c output UChar32 variable
    547  * @see U16_PREV
    548  * @stable ICU 2.4
    549  */
    550 #define U16_PREV_UNSAFE(s, i, c) { \
    551     (c)=(s)[--(i)]; \
    552     if(U16_IS_TRAIL(c)) { \
    553         (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
    554     } \
    555 }
    556 
    557 /**
    558  * Move the string offset from one code point boundary to the previous one
    559  * and get the code point between them.
    560  * (Pre-decrementing backward iteration.)
    561  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    562  *
    563  * The input offset may be the same as the string length.
    564  * If the offset is behind a trail surrogate unit
    565  * for a supplementary code point, then the macro will read
    566  * the preceding lead surrogate as well.
    567  * If the offset is behind a lead surrogate or behind a single, unpaired
    568  * trail surrogate, then c is set to that unpaired surrogate.
    569  *
    570  * @param s const UChar * string
    571  * @param start starting string offset (usually 0)
    572  * @param i string offset, must be start<i
    573  * @param c output UChar32 variable
    574  * @see U16_PREV_UNSAFE
    575  * @stable ICU 2.4
    576  */
    577 #define U16_PREV(s, start, i, c) { \
    578     (c)=(s)[--(i)]; \
    579     if(U16_IS_TRAIL(c)) { \
    580         uint16_t __c2; \
    581         if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
    582             --(i); \
    583             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
    584         } \
    585     } \
    586 }
    587 
    588 #ifndef U_HIDE_DRAFT_API
    589 
    590 /**
    591  * Move the string offset from one code point boundary to the previous one
    592  * and get the code point between them.
    593  * (Pre-decrementing backward iteration.)
    594  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    595  *
    596  * The input offset may be the same as the string length.
    597  * If the offset is behind a trail surrogate unit
    598  * for a supplementary code point, then the macro will read
    599  * the preceding lead surrogate as well.
    600  * If the offset is behind a lead surrogate or behind a single, unpaired
    601  * trail surrogate, then c is set to U+FFFD.
    602  *
    603  * @param s const UChar * string
    604  * @param start starting string offset (usually 0)
    605  * @param i string offset, must be start<i
    606  * @param c output UChar32 variable
    607  * @see U16_PREV_UNSAFE
    608  * @draft ICU 60
    609  */
    610 #define U16_PREV_OR_FFFD(s, start, i, c) { \
    611     (c)=(s)[--(i)]; \
    612     if(U16_IS_SURROGATE(c)) { \
    613         uint16_t __c2; \
    614         if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
    615             --(i); \
    616             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
    617         } else { \
    618             (c)=0xfffd; \
    619         } \
    620     } \
    621 }
    622 
    623 #endif  // U_HIDE_DRAFT_API
    624 
    625 /**
    626  * Move the string offset from one code point boundary to the previous one.
    627  * (Pre-decrementing backward iteration.)
    628  * The input offset may be the same as the string length.
    629  * "Unsafe" macro, assumes well-formed UTF-16.
    630  *
    631  * @param s const UChar * string
    632  * @param i string offset
    633  * @see U16_BACK_1
    634  * @stable ICU 2.4
    635  */
    636 #define U16_BACK_1_UNSAFE(s, i) { \
    637     if(U16_IS_TRAIL((s)[--(i)])) { \
    638         --(i); \
    639     } \
    640 }
    641 
    642 /**
    643  * Move the string offset from one code point boundary to the previous one.
    644  * (Pre-decrementing backward iteration.)
    645  * The input offset may be the same as the string length.
    646  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    647  *
    648  * @param s const UChar * string
    649  * @param start starting string offset (usually 0)
    650  * @param i string offset, must be start<i
    651  * @see U16_BACK_1_UNSAFE
    652  * @stable ICU 2.4
    653  */
    654 #define U16_BACK_1(s, start, i) { \
    655     if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
    656         --(i); \
    657     } \
    658 }
    659 
    660 /**
    661  * Move the string offset from one code point boundary to the n-th one before it,
    662  * i.e., move backward by n code points.
    663  * (Pre-decrementing backward iteration.)
    664  * The input offset may be the same as the string length.
    665  * "Unsafe" macro, assumes well-formed UTF-16.
    666  *
    667  * @param s const UChar * string
    668  * @param i string offset
    669  * @param n number of code points to skip
    670  * @see U16_BACK_N
    671  * @stable ICU 2.4
    672  */
    673 #define U16_BACK_N_UNSAFE(s, i, n) { \
    674     int32_t __N=(n); \
    675     while(__N>0) { \
    676         U16_BACK_1_UNSAFE(s, i); \
    677         --__N; \
    678     } \
    679 }
    680 
    681 /**
    682  * Move the string offset from one code point boundary to the n-th one before it,
    683  * i.e., move backward by n code points.
    684  * (Pre-decrementing backward iteration.)
    685  * The input offset may be the same as the string length.
    686  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    687  *
    688  * @param s const UChar * string
    689  * @param start start of string
    690  * @param i string offset, must be start<i
    691  * @param n number of code points to skip
    692  * @see U16_BACK_N_UNSAFE
    693  * @stable ICU 2.4
    694  */
    695 #define U16_BACK_N(s, start, i, n) { \
    696     int32_t __N=(n); \
    697     while(__N>0 && (i)>(start)) { \
    698         U16_BACK_1(s, start, i); \
    699         --__N; \
    700     } \
    701 }
    702 
    703 /**
    704  * Adjust a random-access offset to a code point boundary after a code point.
    705  * If the offset is behind the lead surrogate of a surrogate pair,
    706  * then the offset is incremented.
    707  * Otherwise, it is not modified.
    708  * The input offset may be the same as the string length.
    709  * "Unsafe" macro, assumes well-formed UTF-16.
    710  *
    711  * @param s const UChar * string
    712  * @param i string offset
    713  * @see U16_SET_CP_LIMIT
    714  * @stable ICU 2.4
    715  */
    716 #define U16_SET_CP_LIMIT_UNSAFE(s, i) { \
    717     if(U16_IS_LEAD((s)[(i)-1])) { \
    718         ++(i); \
    719     } \
    720 }
    721 
    722 /**
    723  * Adjust a random-access offset to a code point boundary after a code point.
    724  * If the offset is behind the lead surrogate of a surrogate pair,
    725  * then the offset is incremented.
    726  * Otherwise, it is not modified.
    727  * The input offset may be the same as the string length.
    728  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    729  *
    730  * The length can be negative for a NUL-terminated string.
    731  *
    732  * @param s const UChar * string
    733  * @param start int32_t starting string offset (usually 0)
    734  * @param i int32_t string offset, start<=i<=length
    735  * @param length int32_t string length
    736  * @see U16_SET_CP_LIMIT_UNSAFE
    737  * @stable ICU 2.4
    738  */
    739 #define U16_SET_CP_LIMIT(s, start, i, length) { \
    740     if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
    741         ++(i); \
    742     } \
    743 }
    744 
    745 #endif
    746