Home | History | Annotate | Download | only in toolutil
      1 /*
      2 *******************************************************************************
      3 *
      4 *   Copyright (C) 1998-2008, International Business Machines
      5 *   Corporation and others.  All Rights Reserved.
      6 *
      7 *******************************************************************************
      8 *
      9 * File ucbuf.c
     10 *
     11 * Modification History:
     12 *
     13 *   Date        Name        Description
     14 *   05/10/01    Ram         Creation.
     15 *
     16 * This API reads in files and returns UChars
     17 *******************************************************************************
     18 */
     19 
     20 #include "unicode/ucnv.h"
     21 #include "filestrm.h"
     22 
     23 #if !UCONFIG_NO_CONVERSION
     24 
     25 #ifndef UCBUF_H
     26 #define UCBUF_H 1
     27 
     28 typedef struct UCHARBUF UCHARBUF;
     29 /**
     30  * End of file value
     31  */
     32 #define U_EOF 0xFFFFFFFF
     33 /**
     34  * Error value if a sequence cannot be unescaped
     35  */
     36 #define U_ERR 0xFFFFFFFE
     37 
     38 typedef struct ULine ULine;
     39 
     40 struct  ULine {
     41     UChar     *name;
     42     int32_t   len;
     43 };
     44 
     45 /**
     46  * Opens the UCHARBUF with the given file stream and code page for conversion
     47  * @param fileName  Name of the file to open.
     48  * @param codepage  The encoding of the file stream to convert to Unicode.
     49  *                  If *codepoge is NULL on input the API will try to autodetect
     50  *                  popular Unicode encodings
     51  * @param showWarning Flag to print out warnings to STDOUT
     52  * @param buffered  If TRUE performs a buffered read of the input file. If FALSE reads
     53  *                  the whole file into memory and converts it.
     54  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
     55  *        indicates a failure on entry, the function will immediately return.
     56  *        On exit the value will indicate the success of the operation.
     57  * @return pointer to the newly opened UCHARBUF
     58  */
     59 U_CAPI UCHARBUF* U_EXPORT2
     60 ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err);
     61 
     62 /**
     63  * Gets a UTF-16 code unit at the current position from the converted buffer
     64  * and increments the current position
     65  * @param buf Pointer to UCHARBUF structure
     66  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
     67  *        indicates a failure on entry, the function will immediately return.
     68  *        On exit the value will indicate the success of the operation.
     69  */
     70 U_CAPI int32_t U_EXPORT2
     71 ucbuf_getc(UCHARBUF* buf,UErrorCode* err);
     72 
     73 /**
     74  * Gets a UTF-32 code point at the current position from the converted buffer
     75  * and increments the current position
     76  * @param buf Pointer to UCHARBUF structure
     77  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
     78  *        indicates a failure on entry, the function will immediately return.
     79  *        On exit the value will indicate the success of the operation.
     80  */
     81 U_CAPI int32_t U_EXPORT2
     82 ucbuf_getc32(UCHARBUF* buf,UErrorCode* err);
     83 
     84 /**
     85  * Gets a UTF-16 code unit at the current position from the converted buffer after
     86  * unescaping and increments the current position. If the escape sequence is for UTF-32
     87  * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned
     88  * @param buf Pointer to UCHARBUF structure
     89  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
     90  *        indicates a failure on entry, the function will immediately return.
     91  *        On exit the value will indicate the success of the operation.
     92  */
     93 U_CAPI int32_t U_EXPORT2
     94 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err);
     95 
     96 /**
     97  * Gets a pointer to the current position in the internal buffer and length of the line.
     98  * It imperative to make a copy of the returned buffere before performing operations on it.
     99  * @param buf Pointer to UCHARBUF structure
    100  * @param len Output param to receive the len of the buffer returned till end of the line
    101  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    102  *        indicates a failure on entry, the function will immediately return.
    103  *        On exit the value will indicate the success of the operation.
    104  *        Error: U_TRUNCATED_CHAR_FOUND
    105  * @return Pointer to the internal buffer, NULL if EOF
    106  */
    107 U_CAPI const UChar* U_EXPORT2
    108 ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err);
    109 
    110 
    111 /**
    112  * Resets the buffers and the underlying file stream.
    113  * @param buf Pointer to UCHARBUF structure
    114  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    115  *        indicates a failure on entry, the function will immediately return.
    116  *        On exit the value will indicate the success of the operation.
    117  */
    118 U_CAPI void U_EXPORT2
    119 ucbuf_rewind(UCHARBUF* buf,UErrorCode* err);
    120 
    121 /**
    122  * Returns a pointer to the internal converted buffer
    123  * @param buf Pointer to UCHARBUF structure
    124  * @param len Pointer to int32_t to receive the lenth of buffer
    125  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    126  *        indicates a failure on entry, the function will immediately return.
    127  *        On exit the value will indicate the success of the operation.
    128  * @return Pointer to internal UChar buffer
    129  */
    130 U_CAPI const UChar* U_EXPORT2
    131 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err);
    132 
    133 /**
    134  * Closes the UCHARBUF structure members and cleans up the malloc'ed memory
    135  * @param buf Pointer to UCHARBUF structure
    136  */
    137 U_CAPI void U_EXPORT2
    138 ucbuf_close(UCHARBUF* buf);
    139 
    140 /**
    141  * Rewinds the buffer by one codepoint. Does not rewind over escaped characters.
    142  */
    143 U_CAPI void U_EXPORT2
    144 ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf);
    145 
    146 
    147 /**
    148  * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
    149  * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
    150  * the converter to correct state for converting the rest of the stream. So the UConverter parameter
    151  * is necessary.
    152  * If the charset was autodetected, the caller must close both the input FileStream
    153  * and the converter.
    154  *
    155  * @param fileName The file name to be opened and encoding autodected
    156  * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
    157  * @param cp Output param to receive the detected encoding
    158  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    159  *        indicates a failure on entry, the function will immediately return.
    160  *        On exit the value will indicate the success of the operation.
    161  * @return The input FileStream if its charset was autodetected; NULL otherwise.
    162  */
    163 U_CAPI FileStream * U_EXPORT2
    164 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv,
    165 int32_t* signatureLength, UErrorCode* status);
    166 
    167 /**
    168  * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected.
    169  * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring
    170  * the converter to correct state for converting the rest of the stream. So the UConverter parameter
    171  * is necessary.
    172  * If the charset was autodetected, the caller must close the converter.
    173  *
    174  * @param fileStream The file stream whose encoding is to be detected
    175  * @param conv  Output param to receive the opened converter if autodetected; NULL otherwise.
    176  * @param cp Output param to receive the detected encoding
    177  * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value
    178  *        indicates a failure on entry, the function will immediately return.
    179  *        On exit the value will indicate the success of the operation.
    180  * @return Boolean whether the Unicode charset was autodetected.
    181  */
    182 
    183 U_CAPI UBool U_EXPORT2
    184 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status);
    185 
    186 /**
    187  * Returns the approximate size in UChars required for converting the file to UChars
    188  */
    189 U_CAPI int32_t U_EXPORT2
    190 ucbuf_size(UCHARBUF* buf);
    191 
    192 U_CAPI const char* U_EXPORT2
    193 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status);
    194 
    195 #endif
    196 #endif
    197 
    198