Home | History | Annotate | Download | only in toolutil
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 2000-2004, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *   file name:  uparse.h
      9 *   encoding:   US-ASCII
     10 *   tab size:   8 (not used)
     11 *   indentation:4
     12 *
     13 *   created on: 2000apr18
     14 *   created by: Markus W. Scherer
     15 *
     16 *   This file provides a parser for files that are delimited by one single
     17 *   character like ';' or TAB. Example: the Unicode Character Properties files
     18 *   like UnicodeData.txt are semicolon-delimited.
     19 */
     20 
     21 #ifndef __UPARSE_H__
     22 #define __UPARSE_H__
     23 
     24 #include "unicode/utypes.h"
     25 
     26 U_CDECL_BEGIN
     27 
     28 /**
     29  * Skip space ' ' and TAB '\t' characters.
     30  *
     31  * @param s Pointer to characters.
     32  * @return Pointer to first character at or after s that is not a space or TAB.
     33  */
     34 U_CAPI const char * U_EXPORT2
     35 u_skipWhitespace(const char *s);
     36 
     37 /** Function type for u_parseDelimitedFile(). */
     38 typedef void U_CALLCONV
     39 UParseLineFn(void *context,
     40               char *fields[][2],
     41               int32_t fieldCount,
     42               UErrorCode *pErrorCode);
     43 
     44 /**
     45  * Parser for files that are similar to UnicodeData.txt:
     46  * This function opens the file and reads it line by line. It skips empty lines
     47  * and comment lines that start with a '#'.
     48  * All other lines are separated into fields with one delimiter character
     49  * (semicolon for Unicode Properties files) between two fields. The last field in
     50  * a line does not need to be terminated with a delimiter.
     51  *
     52  * For each line, after segmenting it, a line function is called.
     53  * It gets passed the array of field start and limit pointers that is
     54  * passed into this parser and filled by it for each line.
     55  * For each field i of the line, the start pointer in fields[i][0]
     56  * points to the beginning of the field, while the limit pointer in fields[i][1]
     57  * points behind the field, i.e., to the delimiter or the line end.
     58  *
     59  * The context parameter of the line function is
     60  * the same as the one for the parse function.
     61  *
     62  * The line function may modify the contents of the fields including the
     63  * limit characters.
     64  *
     65  * If the file cannot be opened, or there is a parsing error or a field function
     66  * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code.
     67  */
     68 U_CAPI void U_EXPORT2
     69 u_parseDelimitedFile(const char *filename, char delimiter,
     70                      char *fields[][2], int32_t fieldCount,
     71                      UParseLineFn *lineFn, void *context,
     72                      UErrorCode *pErrorCode);
     73 
     74 /**
     75  * Parse a string of code points like 0061 0308 0300.
     76  * s must end with either ';' or NUL.
     77  *
     78  * @return Number of code points.
     79  */
     80 U_CAPI int32_t U_EXPORT2
     81 u_parseCodePoints(const char *s,
     82                   uint32_t *dest, int32_t destCapacity,
     83                   UErrorCode *pErrorCode);
     84 
     85 /**
     86  * Parse a list of code points like 0061 0308 0300
     87  * into a UChar * string.
     88  * s must end with either ';' or NUL.
     89  *
     90  * Set the first code point in *pFirst.
     91  *
     92  * @param s Input char * string.
     93  * @param dest Output string buffer.
     94  * @param destCapacity Capacity of dest in numbers of UChars.
     95  * @param pFirst If pFirst!=NULL the *pFirst will be set to the first
     96  *               code point in the string.
     97  * @param pErrorCode ICU error code.
     98  * @return The length of the string in numbers of UChars.
     99  */
    100 U_CAPI int32_t U_EXPORT2
    101 u_parseString(const char *s,
    102               UChar *dest, int32_t destCapacity,
    103               uint32_t *pFirst,
    104               UErrorCode *pErrorCode);
    105 
    106 /**
    107  * Parse a code point range like
    108  * 0085 or
    109  * 4E00..9FA5.
    110  *
    111  * s must contain such a range and end with either ';' or NUL.
    112  *
    113  * @return Length of code point range, end-start+1
    114  */
    115 U_CAPI int32_t U_EXPORT2
    116 u_parseCodePointRange(const char *s,
    117                       uint32_t *pStart, uint32_t *pEnd,
    118                       UErrorCode *pErrorCode);
    119 
    120 
    121 U_CAPI int32_t U_EXPORT2
    122 u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status);
    123 
    124 U_CDECL_END
    125 
    126 #endif
    127