1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 2000-2004, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * file name: uparse.h 9 * encoding: US-ASCII 10 * tab size: 8 (not used) 11 * indentation:4 12 * 13 * created on: 2000apr18 14 * created by: Markus W. Scherer 15 * 16 * This file provides a parser for files that are delimited by one single 17 * character like ';' or TAB. Example: the Unicode Character Properties files 18 * like UnicodeData.txt are semicolon-delimited. 19 */ 20 21 #ifndef __UPARSE_H__ 22 #define __UPARSE_H__ 23 24 #include "unicode/utypes.h" 25 26 U_CDECL_BEGIN 27 28 /** 29 * Skip space ' ' and TAB '\t' characters. 30 * 31 * @param s Pointer to characters. 32 * @return Pointer to first character at or after s that is not a space or TAB. 33 */ 34 U_CAPI const char * U_EXPORT2 35 u_skipWhitespace(const char *s); 36 37 /** Function type for u_parseDelimitedFile(). */ 38 typedef void U_CALLCONV 39 UParseLineFn(void *context, 40 char *fields[][2], 41 int32_t fieldCount, 42 UErrorCode *pErrorCode); 43 44 /** 45 * Parser for files that are similar to UnicodeData.txt: 46 * This function opens the file and reads it line by line. It skips empty lines 47 * and comment lines that start with a '#'. 48 * All other lines are separated into fields with one delimiter character 49 * (semicolon for Unicode Properties files) between two fields. The last field in 50 * a line does not need to be terminated with a delimiter. 51 * 52 * For each line, after segmenting it, a line function is called. 53 * It gets passed the array of field start and limit pointers that is 54 * passed into this parser and filled by it for each line. 55 * For each field i of the line, the start pointer in fields[i][0] 56 * points to the beginning of the field, while the limit pointer in fields[i][1] 57 * points behind the field, i.e., to the delimiter or the line end. 58 * 59 * The context parameter of the line function is 60 * the same as the one for the parse function. 61 * 62 * The line function may modify the contents of the fields including the 63 * limit characters. 64 * 65 * If the file cannot be opened, or there is a parsing error or a field function 66 * sets *pErrorCode, then the parser returns with *pErrorCode set to an error code. 67 */ 68 U_CAPI void U_EXPORT2 69 u_parseDelimitedFile(const char *filename, char delimiter, 70 char *fields[][2], int32_t fieldCount, 71 UParseLineFn *lineFn, void *context, 72 UErrorCode *pErrorCode); 73 74 /** 75 * Parse a string of code points like 0061 0308 0300. 76 * s must end with either ';' or NUL. 77 * 78 * @return Number of code points. 79 */ 80 U_CAPI int32_t U_EXPORT2 81 u_parseCodePoints(const char *s, 82 uint32_t *dest, int32_t destCapacity, 83 UErrorCode *pErrorCode); 84 85 /** 86 * Parse a list of code points like 0061 0308 0300 87 * into a UChar * string. 88 * s must end with either ';' or NUL. 89 * 90 * Set the first code point in *pFirst. 91 * 92 * @param s Input char * string. 93 * @param dest Output string buffer. 94 * @param destCapacity Capacity of dest in numbers of UChars. 95 * @param pFirst If pFirst!=NULL the *pFirst will be set to the first 96 * code point in the string. 97 * @param pErrorCode ICU error code. 98 * @return The length of the string in numbers of UChars. 99 */ 100 U_CAPI int32_t U_EXPORT2 101 u_parseString(const char *s, 102 UChar *dest, int32_t destCapacity, 103 uint32_t *pFirst, 104 UErrorCode *pErrorCode); 105 106 /** 107 * Parse a code point range like 108 * 0085 or 109 * 4E00..9FA5. 110 * 111 * s must contain such a range and end with either ';' or NUL. 112 * 113 * @return Length of code point range, end-start+1 114 */ 115 U_CAPI int32_t U_EXPORT2 116 u_parseCodePointRange(const char *s, 117 uint32_t *pStart, uint32_t *pEnd, 118 UErrorCode *pErrorCode); 119 120 121 U_CAPI int32_t U_EXPORT2 122 u_parseUTF8(const char *source, int32_t sLen, char *dest, int32_t destCapacity, UErrorCode *status); 123 124 U_CDECL_END 125 126 #endif 127