Home | History | Annotate | Download | only in unicode
      1 //  2016 and later: Unicode, Inc. and others.
      2 // License & terms of use: http://www.unicode.org/copyright.html
      3 /*
      4 *******************************************************************************
      5 *
      6 *   Copyright (C) 1999-2012, International Business Machines
      7 *   Corporation and others.  All Rights Reserved.
      8 *
      9 *******************************************************************************
     10 *   file name:  utf16.h
     11 *   encoding:   UTF-8
     12 *   tab size:   8 (not used)
     13 *   indentation:4
     14 *
     15 *   created on: 1999sep09
     16 *   created by: Markus W. Scherer
     17 */
     18 
     19 /**
     20  * \file
     21  * \brief C API: 16-bit Unicode handling macros
     22  *
     23  * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
     24  *
     25  * For more information see utf.h and the ICU User Guide Strings chapter
     26  * (http://userguide.icu-project.org/strings).
     27  *
     28  * <em>Usage:</em>
     29  * ICU coding guidelines for if() statements should be followed when using these macros.
     30  * Compound statements (curly braces {}) must be used  for if-else-while...
     31  * bodies and all macro statements should be terminated with semicolon.
     32  */
     33 
     34 #ifndef __UTF16_H__
     35 #define __UTF16_H__
     36 
     37 #include "unicode/umachine.h"
     38 #ifndef __UTF_H__
     39 #   include "unicode/utf.h"
     40 #endif
     41 
     42 /* single-code point definitions -------------------------------------------- */
     43 
     44 /**
     45  * Does this code unit alone encode a code point (BMP, not a surrogate)?
     46  * @param c 16-bit code unit
     47  * @return TRUE or FALSE
     48  * @stable ICU 2.4
     49  */
     50 #define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
     51 
     52 /**
     53  * Is this code unit a lead surrogate (U+d800..U+dbff)?
     54  * @param c 16-bit code unit
     55  * @return TRUE or FALSE
     56  * @stable ICU 2.4
     57  */
     58 #define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
     59 
     60 /**
     61  * Is this code unit a trail surrogate (U+dc00..U+dfff)?
     62  * @param c 16-bit code unit
     63  * @return TRUE or FALSE
     64  * @stable ICU 2.4
     65  */
     66 #define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
     67 
     68 /**
     69  * Is this code unit a surrogate (U+d800..U+dfff)?
     70  * @param c 16-bit code unit
     71  * @return TRUE or FALSE
     72  * @stable ICU 2.4
     73  */
     74 #define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
     75 
     76 /**
     77  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
     78  * is it a lead surrogate?
     79  * @param c 16-bit code unit
     80  * @return TRUE or FALSE
     81  * @stable ICU 2.4
     82  */
     83 #define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
     84 
     85 /**
     86  * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
     87  * is it a trail surrogate?
     88  * @param c 16-bit code unit
     89  * @return TRUE or FALSE
     90  * @stable ICU 4.2
     91  */
     92 #define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
     93 
     94 /**
     95  * Helper constant for U16_GET_SUPPLEMENTARY.
     96  * @internal
     97  */
     98 #define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
     99 
    100 /**
    101  * Get a supplementary code point value (U+10000..U+10ffff)
    102  * from its lead and trail surrogates.
    103  * The result is undefined if the input values are not
    104  * lead and trail surrogates.
    105  *
    106  * @param lead lead surrogate (U+d800..U+dbff)
    107  * @param trail trail surrogate (U+dc00..U+dfff)
    108  * @return supplementary code point (U+10000..U+10ffff)
    109  * @stable ICU 2.4
    110  */
    111 #define U16_GET_SUPPLEMENTARY(lead, trail) \
    112     (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
    113 
    114 
    115 /**
    116  * Get the lead surrogate (0xd800..0xdbff) for a
    117  * supplementary code point (0x10000..0x10ffff).
    118  * @param supplementary 32-bit code point (U+10000..U+10ffff)
    119  * @return lead surrogate (U+d800..U+dbff) for supplementary
    120  * @stable ICU 2.4
    121  */
    122 #define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
    123 
    124 /**
    125  * Get the trail surrogate (0xdc00..0xdfff) for a
    126  * supplementary code point (0x10000..0x10ffff).
    127  * @param supplementary 32-bit code point (U+10000..U+10ffff)
    128  * @return trail surrogate (U+dc00..U+dfff) for supplementary
    129  * @stable ICU 2.4
    130  */
    131 #define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
    132 
    133 /**
    134  * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
    135  * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
    136  * @param c 32-bit code point
    137  * @return 1 or 2
    138  * @stable ICU 2.4
    139  */
    140 #define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
    141 
    142 /**
    143  * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
    144  * @return 2
    145  * @stable ICU 2.4
    146  */
    147 #define U16_MAX_LENGTH 2
    148 
    149 /**
    150  * Get a code point from a string at a random-access offset,
    151  * without changing the offset.
    152  * "Unsafe" macro, assumes well-formed UTF-16.
    153  *
    154  * The offset may point to either the lead or trail surrogate unit
    155  * for a supplementary code point, in which case the macro will read
    156  * the adjacent matching surrogate as well.
    157  * The result is undefined if the offset points to a single, unpaired surrogate.
    158  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
    159  *
    160  * @param s const UChar * string
    161  * @param i string offset
    162  * @param c output UChar32 variable
    163  * @see U16_GET
    164  * @stable ICU 2.4
    165  */
    166 #define U16_GET_UNSAFE(s, i, c) { \
    167     (c)=(s)[i]; \
    168     if(U16_IS_SURROGATE(c)) { \
    169         if(U16_IS_SURROGATE_LEAD(c)) { \
    170             (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
    171         } else { \
    172             (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
    173         } \
    174     } \
    175 }
    176 
    177 /**
    178  * Get a code point from a string at a random-access offset,
    179  * without changing the offset.
    180  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    181  *
    182  * The offset may point to either the lead or trail surrogate unit
    183  * for a supplementary code point, in which case the macro will read
    184  * the adjacent matching surrogate as well.
    185  *
    186  * The length can be negative for a NUL-terminated string.
    187  *
    188  * If the offset points to a single, unpaired surrogate, then
    189  * c is set to that unpaired surrogate.
    190  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
    191  *
    192  * @param s const UChar * string
    193  * @param start starting string offset (usually 0)
    194  * @param i string offset, must be start<=i<length
    195  * @param length string length
    196  * @param c output UChar32 variable
    197  * @see U16_GET_UNSAFE
    198  * @stable ICU 2.4
    199  */
    200 #define U16_GET(s, start, i, length, c) { \
    201     (c)=(s)[i]; \
    202     if(U16_IS_SURROGATE(c)) { \
    203         uint16_t __c2; \
    204         if(U16_IS_SURROGATE_LEAD(c)) { \
    205             if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
    206                 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
    207             } \
    208         } else { \
    209             if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
    210                 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
    211             } \
    212         } \
    213     } \
    214 }
    215 
    216 /**
    217  * Get a code point from a string at a random-access offset,
    218  * without changing the offset.
    219  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    220  *
    221  * The offset may point to either the lead or trail surrogate unit
    222  * for a supplementary code point, in which case the macro will read
    223  * the adjacent matching surrogate as well.
    224  *
    225  * The length can be negative for a NUL-terminated string.
    226  *
    227  * If the offset points to a single, unpaired surrogate, then
    228  * c is set to U+FFFD.
    229  * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
    230  *
    231  * @param s const UChar * string
    232  * @param start starting string offset (usually 0)
    233  * @param i string offset, must be start<=i<length
    234  * @param length string length
    235  * @param c output UChar32 variable
    236  * @see U16_GET_UNSAFE
    237  * @stable ICU 60
    238  */
    239 #define U16_GET_OR_FFFD(s, start, i, length, c) { \
    240     (c)=(s)[i]; \
    241     if(U16_IS_SURROGATE(c)) { \
    242         uint16_t __c2; \
    243         if(U16_IS_SURROGATE_LEAD(c)) { \
    244             if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
    245                 (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
    246             } else { \
    247                 (c)=0xfffd; \
    248             } \
    249         } else { \
    250             if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
    251                 (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
    252             } else { \
    253                 (c)=0xfffd; \
    254             } \
    255         } \
    256     } \
    257 }
    258 
    259 /* definitions with forward iteration --------------------------------------- */
    260 
    261 /**
    262  * Get a code point from a string at a code point boundary offset,
    263  * and advance the offset to the next code point boundary.
    264  * (Post-incrementing forward iteration.)
    265  * "Unsafe" macro, assumes well-formed UTF-16.
    266  *
    267  * The offset may point to the lead surrogate unit
    268  * for a supplementary code point, in which case the macro will read
    269  * the following trail surrogate as well.
    270  * If the offset points to a trail surrogate, then that itself
    271  * will be returned as the code point.
    272  * The result is undefined if the offset points to a single, unpaired lead surrogate.
    273  *
    274  * @param s const UChar * string
    275  * @param i string offset
    276  * @param c output UChar32 variable
    277  * @see U16_NEXT
    278  * @stable ICU 2.4
    279  */
    280 #define U16_NEXT_UNSAFE(s, i, c) { \
    281     (c)=(s)[(i)++]; \
    282     if(U16_IS_LEAD(c)) { \
    283         (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
    284     } \
    285 }
    286 
    287 /**
    288  * Get a code point from a string at a code point boundary offset,
    289  * and advance the offset to the next code point boundary.
    290  * (Post-incrementing forward iteration.)
    291  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    292  *
    293  * The length can be negative for a NUL-terminated string.
    294  *
    295  * The offset may point to the lead surrogate unit
    296  * for a supplementary code point, in which case the macro will read
    297  * the following trail surrogate as well.
    298  * If the offset points to a trail surrogate or
    299  * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
    300  *
    301  * @param s const UChar * string
    302  * @param i string offset, must be i<length
    303  * @param length string length
    304  * @param c output UChar32 variable
    305  * @see U16_NEXT_UNSAFE
    306  * @stable ICU 2.4
    307  */
    308 #define U16_NEXT(s, i, length, c) { \
    309     (c)=(s)[(i)++]; \
    310     if(U16_IS_LEAD(c)) { \
    311         uint16_t __c2; \
    312         if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
    313             ++(i); \
    314             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
    315         } \
    316     } \
    317 }
    318 
    319 /**
    320  * Get a code point from a string at a code point boundary offset,
    321  * and advance the offset to the next code point boundary.
    322  * (Post-incrementing forward iteration.)
    323  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    324  *
    325  * The length can be negative for a NUL-terminated string.
    326  *
    327  * The offset may point to the lead surrogate unit
    328  * for a supplementary code point, in which case the macro will read
    329  * the following trail surrogate as well.
    330  * If the offset points to a trail surrogate or
    331  * to a single, unpaired lead surrogate, then c is set to U+FFFD.
    332  *
    333  * @param s const UChar * string
    334  * @param i string offset, must be i<length
    335  * @param length string length
    336  * @param c output UChar32 variable
    337  * @see U16_NEXT_UNSAFE
    338  * @stable ICU 60
    339  */
    340 #define U16_NEXT_OR_FFFD(s, i, length, c) { \
    341     (c)=(s)[(i)++]; \
    342     if(U16_IS_SURROGATE(c)) { \
    343         uint16_t __c2; \
    344         if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
    345             ++(i); \
    346             (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
    347         } else { \
    348             (c)=0xfffd; \
    349         } \
    350     } \
    351 }
    352 
    353 /**
    354  * Append a code point to a string, overwriting 1 or 2 code units.
    355  * The offset points to the current end of the string contents
    356  * and is advanced (post-increment).
    357  * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
    358  * Otherwise, the result is undefined.
    359  *
    360  * @param s const UChar * string buffer
    361  * @param i string offset
    362  * @param c code point to append
    363  * @see U16_APPEND
    364  * @stable ICU 2.4
    365  */
    366 #define U16_APPEND_UNSAFE(s, i, c) { \
    367     if((uint32_t)(c)<=0xffff) { \
    368         (s)[(i)++]=(uint16_t)(c); \
    369     } else { \
    370         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
    371         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
    372     } \
    373 }
    374 
    375 /**
    376  * Append a code point to a string, overwriting 1 or 2 code units.
    377  * The offset points to the current end of the string contents
    378  * and is advanced (post-increment).
    379  * "Safe" macro, checks for a valid code point.
    380  * If a surrogate pair is written, checks for sufficient space in the string.
    381  * If the code point is not valid or a trail surrogate does not fit,
    382  * then isError is set to TRUE.
    383  *
    384  * @param s const UChar * string buffer
    385  * @param i string offset, must be i<capacity
    386  * @param capacity size of the string buffer
    387  * @param c code point to append
    388  * @param isError output UBool set to TRUE if an error occurs, otherwise not modified
    389  * @see U16_APPEND_UNSAFE
    390  * @stable ICU 2.4
    391  */
    392 #define U16_APPEND(s, i, capacity, c, isError) { \
    393     if((uint32_t)(c)<=0xffff) { \
    394         (s)[(i)++]=(uint16_t)(c); \
    395     } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
    396         (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
    397         (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
    398     } else /* c>0x10ffff or not enough space */ { \
    399         (isError)=TRUE; \
    400     } \
    401 }
    402 
    403 /**
    404  * Advance the string offset from one code point boundary to the next.
    405  * (Post-incrementing iteration.)
    406  * "Unsafe" macro, assumes well-formed UTF-16.
    407  *
    408  * @param s const UChar * string
    409  * @param i string offset
    410  * @see U16_FWD_1
    411  * @stable ICU 2.4
    412  */
    413 #define U16_FWD_1_UNSAFE(s, i) { \
    414     if(U16_IS_LEAD((s)[(i)++])) { \
    415         ++(i); \
    416     } \
    417 }
    418 
    419 /**
    420  * Advance the string offset from one code point boundary to the next.
    421  * (Post-incrementing iteration.)
    422  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    423  *
    424  * The length can be negative for a NUL-terminated string.
    425  *
    426  * @param s const UChar * string
    427  * @param i string offset, must be i<length
    428  * @param length string length
    429  * @see U16_FWD_1_UNSAFE
    430  * @stable ICU 2.4
    431  */
    432 #define U16_FWD_1(s, i, length) { \
    433     if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
    434         ++(i); \
    435     } \
    436 }
    437 
    438 /**
    439  * Advance the string offset from one code point boundary to the n-th next one,
    440  * i.e., move forward by n code points.
    441  * (Post-incrementing iteration.)
    442  * "Unsafe" macro, assumes well-formed UTF-16.
    443  *
    444  * @param s const UChar * string
    445  * @param i string offset
    446  * @param n number of code points to skip
    447  * @see U16_FWD_N
    448  * @stable ICU 2.4
    449  */
    450 #define U16_FWD_N_UNSAFE(s, i, n) { \
    451     int32_t __N=(n); \
    452     while(__N>0) { \
    453         U16_FWD_1_UNSAFE(s, i); \
    454         --__N; \
    455     } \
    456 }
    457 
    458 /**
    459  * Advance the string offset from one code point boundary to the n-th next one,
    460  * i.e., move forward by n code points.
    461  * (Post-incrementing iteration.)
    462  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    463  *
    464  * The length can be negative for a NUL-terminated string.
    465  *
    466  * @param s const UChar * string
    467  * @param i int32_t string offset, must be i<length
    468  * @param length int32_t string length
    469  * @param n number of code points to skip
    470  * @see U16_FWD_N_UNSAFE
    471  * @stable ICU 2.4
    472  */
    473 #define U16_FWD_N(s, i, length, n) { \
    474     int32_t __N=(n); \
    475     while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
    476         U16_FWD_1(s, i, length); \
    477         --__N; \
    478     } \
    479 }
    480 
    481 /**
    482  * Adjust a random-access offset to a code point boundary
    483  * at the start of a code point.
    484  * If the offset points to the trail surrogate of a surrogate pair,
    485  * then the offset is decremented.
    486  * Otherwise, it is not modified.
    487  * "Unsafe" macro, assumes well-formed UTF-16.
    488  *
    489  * @param s const UChar * string
    490  * @param i string offset
    491  * @see U16_SET_CP_START
    492  * @stable ICU 2.4
    493  */
    494 #define U16_SET_CP_START_UNSAFE(s, i) { \
    495     if(U16_IS_TRAIL((s)[i])) { \
    496         --(i); \
    497     } \
    498 }
    499 
    500 /**
    501  * Adjust a random-access offset to a code point boundary
    502  * at the start of a code point.
    503  * If the offset points to the trail surrogate of a surrogate pair,
    504  * then the offset is decremented.
    505  * Otherwise, it is not modified.
    506  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    507  *
    508  * @param s const UChar * string
    509  * @param start starting string offset (usually 0)
    510  * @param i string offset, must be start<=i
    511  * @see U16_SET_CP_START_UNSAFE
    512  * @stable ICU 2.4
    513  */
    514 #define U16_SET_CP_START(s, start, i) { \
    515     if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
    516         --(i); \
    517     } \
    518 }
    519 
    520 /* definitions with backward iteration -------------------------------------- */
    521 
    522 /**
    523  * Move the string offset from one code point boundary to the previous one
    524  * and get the code point between them.
    525  * (Pre-decrementing backward iteration.)
    526  * "Unsafe" macro, assumes well-formed UTF-16.
    527  *
    528  * The input offset may be the same as the string length.
    529  * If the offset is behind a trail surrogate unit
    530  * for a supplementary code point, then the macro will read
    531  * the preceding lead surrogate as well.
    532  * If the offset is behind a lead surrogate, then that itself
    533  * will be returned as the code point.
    534  * The result is undefined if the offset is behind a single, unpaired trail surrogate.
    535  *
    536  * @param s const UChar * string
    537  * @param i string offset
    538  * @param c output UChar32 variable
    539  * @see U16_PREV
    540  * @stable ICU 2.4
    541  */
    542 #define U16_PREV_UNSAFE(s, i, c) { \
    543     (c)=(s)[--(i)]; \
    544     if(U16_IS_TRAIL(c)) { \
    545         (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
    546     } \
    547 }
    548 
    549 /**
    550  * Move the string offset from one code point boundary to the previous one
    551  * and get the code point between them.
    552  * (Pre-decrementing backward iteration.)
    553  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    554  *
    555  * The input offset may be the same as the string length.
    556  * If the offset is behind a trail surrogate unit
    557  * for a supplementary code point, then the macro will read
    558  * the preceding lead surrogate as well.
    559  * If the offset is behind a lead surrogate or behind a single, unpaired
    560  * trail surrogate, then c is set to that unpaired surrogate.
    561  *
    562  * @param s const UChar * string
    563  * @param start starting string offset (usually 0)
    564  * @param i string offset, must be start<i
    565  * @param c output UChar32 variable
    566  * @see U16_PREV_UNSAFE
    567  * @stable ICU 2.4
    568  */
    569 #define U16_PREV(s, start, i, c) { \
    570     (c)=(s)[--(i)]; \
    571     if(U16_IS_TRAIL(c)) { \
    572         uint16_t __c2; \
    573         if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
    574             --(i); \
    575             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
    576         } \
    577     } \
    578 }
    579 
    580 /**
    581  * Move the string offset from one code point boundary to the previous one
    582  * and get the code point between them.
    583  * (Pre-decrementing backward iteration.)
    584  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    585  *
    586  * The input offset may be the same as the string length.
    587  * If the offset is behind a trail surrogate unit
    588  * for a supplementary code point, then the macro will read
    589  * the preceding lead surrogate as well.
    590  * If the offset is behind a lead surrogate or behind a single, unpaired
    591  * trail surrogate, then c is set to U+FFFD.
    592  *
    593  * @param s const UChar * string
    594  * @param start starting string offset (usually 0)
    595  * @param i string offset, must be start<i
    596  * @param c output UChar32 variable
    597  * @see U16_PREV_UNSAFE
    598  * @stable ICU 60
    599  */
    600 #define U16_PREV_OR_FFFD(s, start, i, c) { \
    601     (c)=(s)[--(i)]; \
    602     if(U16_IS_SURROGATE(c)) { \
    603         uint16_t __c2; \
    604         if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
    605             --(i); \
    606             (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
    607         } else { \
    608             (c)=0xfffd; \
    609         } \
    610     } \
    611 }
    612 
    613 /**
    614  * Move the string offset from one code point boundary to the previous one.
    615  * (Pre-decrementing backward iteration.)
    616  * The input offset may be the same as the string length.
    617  * "Unsafe" macro, assumes well-formed UTF-16.
    618  *
    619  * @param s const UChar * string
    620  * @param i string offset
    621  * @see U16_BACK_1
    622  * @stable ICU 2.4
    623  */
    624 #define U16_BACK_1_UNSAFE(s, i) { \
    625     if(U16_IS_TRAIL((s)[--(i)])) { \
    626         --(i); \
    627     } \
    628 }
    629 
    630 /**
    631  * Move the string offset from one code point boundary to the previous one.
    632  * (Pre-decrementing backward iteration.)
    633  * The input offset may be the same as the string length.
    634  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    635  *
    636  * @param s const UChar * string
    637  * @param start starting string offset (usually 0)
    638  * @param i string offset, must be start<i
    639  * @see U16_BACK_1_UNSAFE
    640  * @stable ICU 2.4
    641  */
    642 #define U16_BACK_1(s, start, i) { \
    643     if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
    644         --(i); \
    645     } \
    646 }
    647 
    648 /**
    649  * Move the string offset from one code point boundary to the n-th one before it,
    650  * i.e., move backward by n code points.
    651  * (Pre-decrementing backward iteration.)
    652  * The input offset may be the same as the string length.
    653  * "Unsafe" macro, assumes well-formed UTF-16.
    654  *
    655  * @param s const UChar * string
    656  * @param i string offset
    657  * @param n number of code points to skip
    658  * @see U16_BACK_N
    659  * @stable ICU 2.4
    660  */
    661 #define U16_BACK_N_UNSAFE(s, i, n) { \
    662     int32_t __N=(n); \
    663     while(__N>0) { \
    664         U16_BACK_1_UNSAFE(s, i); \
    665         --__N; \
    666     } \
    667 }
    668 
    669 /**
    670  * Move the string offset from one code point boundary to the n-th one before it,
    671  * i.e., move backward by n code points.
    672  * (Pre-decrementing backward iteration.)
    673  * The input offset may be the same as the string length.
    674  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    675  *
    676  * @param s const UChar * string
    677  * @param start start of string
    678  * @param i string offset, must be start<i
    679  * @param n number of code points to skip
    680  * @see U16_BACK_N_UNSAFE
    681  * @stable ICU 2.4
    682  */
    683 #define U16_BACK_N(s, start, i, n) { \
    684     int32_t __N=(n); \
    685     while(__N>0 && (i)>(start)) { \
    686         U16_BACK_1(s, start, i); \
    687         --__N; \
    688     } \
    689 }
    690 
    691 /**
    692  * Adjust a random-access offset to a code point boundary after a code point.
    693  * If the offset is behind the lead surrogate of a surrogate pair,
    694  * then the offset is incremented.
    695  * Otherwise, it is not modified.
    696  * The input offset may be the same as the string length.
    697  * "Unsafe" macro, assumes well-formed UTF-16.
    698  *
    699  * @param s const UChar * string
    700  * @param i string offset
    701  * @see U16_SET_CP_LIMIT
    702  * @stable ICU 2.4
    703  */
    704 #define U16_SET_CP_LIMIT_UNSAFE(s, i) { \
    705     if(U16_IS_LEAD((s)[(i)-1])) { \
    706         ++(i); \
    707     } \
    708 }
    709 
    710 /**
    711  * Adjust a random-access offset to a code point boundary after a code point.
    712  * If the offset is behind the lead surrogate of a surrogate pair,
    713  * then the offset is incremented.
    714  * Otherwise, it is not modified.
    715  * The input offset may be the same as the string length.
    716  * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
    717  *
    718  * The length can be negative for a NUL-terminated string.
    719  *
    720  * @param s const UChar * string
    721  * @param start int32_t starting string offset (usually 0)
    722  * @param i int32_t string offset, start<=i<=length
    723  * @param length int32_t string length
    724  * @see U16_SET_CP_LIMIT_UNSAFE
    725  * @stable ICU 2.4
    726  */
    727 #define U16_SET_CP_LIMIT(s, start, i, length) { \
    728     if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
    729         ++(i); \
    730     } \
    731 }
    732 
    733 #endif
    734