Home | History | Annotate | Download | only in common
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2003, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  unorm_it.h
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2003jan21
     14 *   created by: Markus W. Scherer
     15 */
     16 
     17 #ifndef __UNORM_IT_H__
     18 #define __UNORM_IT_H__
     19 
     20 #include "unicode/utypes.h"
     21 
     22 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION
     23 
     24 #include "unicode/uiter.h"
     25 #include "unicode/unorm.h"
     26 
     27 /**
     28  * Normalizing UCharIterator wrapper.
     29  * This internal API basically duplicates the functionality of the C++ Normalizer
     30  * but
     31  * - it actually implements a character iterator (UCharIterator)
     32  *   with few restrictions (see unorm_setIter())
     33  * - it supports UCharIterator getState()/setState()
     34  * - it uses lower-level APIs and buffers more text and states,
     35  *   hopefully resulting in higher performance
     36  *
     37  * Usage example:
     38  * \code
     39  * function(UCharIterator *srcIter) {
     40  *     UNormIterator *uni;
     41  *     UCharIterator *iter;
     42  *     UErrorCode errorCode;
     43  *
     44  *     errorCode=U_ZERO_ERROR;
     45  *     uni=unorm_openIter(&errorCode);
     46  *     if(U_FAILURE(errorCode)) {
     47  *         // report error
     48  *         return;
     49  *     }
     50  *
     51  *     iter=unorm_setIter(uni, srcIter, UNORM_FCD, &errorCode);
     52  *     if(U_FAILURE(errorCode)) {
     53  *         // report error
     54  *     } else {
     55  *         // use iter to iterate over the canonically ordered
     56  *         // version of srcIter's text
     57  *         uint32_t state;
     58  *
     59  *         ...
     60  *
     61  *         state=uiter_getState(iter);
     62  *         if(state!=UITER_NO_STATE) {
     63  *             // use valid state, store it, use iter some more
     64  *             ...
     65  *
     66  *             // later restore iter to the saved state:
     67  *             uiter_setState(iter, state, &errorCode);
     68  *
     69  *             ...
     70  *         }
     71  *
     72  *         ...
     73  *     }
     74  *     unorm_closeIter(uni);
     75  * }
     76  * \endcode
     77  *
     78  * See also the ICU test suites.
     79  *
     80  * @internal
     81  */
     82 struct UNormIterator;
     83 typedef struct UNormIterator UNormIterator;
     84 
     85 /**
     86  * Size of a stack buffer to hold a UNormIterator, see the stackMem parameter
     87  * of unorm_openIter().
     88  *
     89  * @internal
     90  */
     91 #define UNORM_ITER_SIZE 1024
     92 
     93 /**
     94  * Open a normalizing iterator. Must be closed later.
     95  * Use unorm_setIter().
     96  *
     97  * @param stackMem Pointer to preallocated (stack-allocated) buffer to hold
     98  *                 the UNormIterator if possible; can be NULL.
     99  * @param stackMemSize Number of bytes at stackMem; can be 0,
    100  *                     or should be >= UNORM_ITER_SIZE for a non-NULL stackMem.
    101  * @param pErrorCode ICU error code
    102  * @return an allocated and pre-initialized UNormIterator
    103  * @internal
    104  */
    105 U_CAPI UNormIterator * U_EXPORT2
    106 unorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode);
    107 
    108 /**
    109  * Close a normalizing iterator.
    110  *
    111  * @param uni UNormIterator from unorm_openIter()
    112  * @internal
    113  */
    114 U_CAPI void U_EXPORT2
    115 unorm_closeIter(UNormIterator *uni);
    116 
    117 /**
    118  * Set a UCharIterator and a normalization mode for the normalizing iterator
    119  * to wrap. The normalizing iterator will read from the character iterator,
    120  * normalize the text, and in turn deliver it with its own wrapper UCharIterator
    121  * interface which it returns.
    122  *
    123  * The source iterator remains at its current position through the unorm_setIter()
    124  * call but will be used and moved as soon as the
    125  * the returned normalizing iterator is.
    126  *
    127  * The returned interface pointer is valid for as long as the normalizing iterator
    128  * is open and until another unorm_setIter() call is made on it.
    129  *
    130  * The normalizing iterator's UCharIterator interface has the following properties:
    131  * - getIndex() and move() will almost always return UITER_UNKNOWN_INDEX
    132  * - getState() will return UITER_NO_STATE for unknown states for positions
    133  *              that are not at normalization boundaries
    134  *
    135  * @param uni UNormIterator from unorm_openIter()
    136  * @param iter The source text UCharIterator to be wrapped. It is aliases into the normalizing iterator.
    137  *             Must support getState() and setState().
    138  * @param mode The normalization mode.
    139  * @param pErrorCode ICU error code
    140  * @return an alias to the normalizing iterator's UCharIterator interface
    141  * @internal
    142  */
    143 U_CAPI UCharIterator * U_EXPORT2
    144 unorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode);
    145 
    146 #endif /* uconfig.h switches */
    147 
    148 #endif
    149