1 /* 2 ******************************************************************************* 3 * 4 * Copyright (C) 1998-2015, International Business Machines 5 * Corporation and others. All Rights Reserved. 6 * 7 ******************************************************************************* 8 * 9 * File ucbuf.c 10 * 11 * Modification History: 12 * 13 * Date Name Description 14 * 05/10/01 Ram Creation. 15 * 16 * This API reads in files and returns UChars 17 ******************************************************************************* 18 */ 19 20 #include "unicode/ucnv.h" 21 #include "filestrm.h" 22 23 #if !UCONFIG_NO_CONVERSION 24 25 #ifndef UCBUF_H 26 #define UCBUF_H 1 27 28 typedef struct UCHARBUF UCHARBUF; 29 /** 30 * End of file value 31 */ 32 #define U_EOF 0xFFFFFFFF 33 /** 34 * Error value if a sequence cannot be unescaped 35 */ 36 #define U_ERR 0xFFFFFFFE 37 38 typedef struct ULine ULine; 39 40 struct ULine { 41 UChar *name; 42 int32_t len; 43 }; 44 45 /** 46 * Opens the UCHARBUF with the given file stream and code page for conversion 47 * @param fileName Name of the file to open. 48 * @param codepage The encoding of the file stream to convert to Unicode. 49 * If *codepoge is NULL on input the API will try to autodetect 50 * popular Unicode encodings 51 * @param showWarning Flag to print out warnings to STDOUT 52 * @param buffered If TRUE performs a buffered read of the input file. If FALSE reads 53 * the whole file into memory and converts it. 54 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 55 * indicates a failure on entry, the function will immediately return. 56 * On exit the value will indicate the success of the operation. 57 * @return pointer to the newly opened UCHARBUF 58 */ 59 U_CAPI UCHARBUF* U_EXPORT2 60 ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err); 61 62 /** 63 * Gets a UTF-16 code unit at the current position from the converted buffer 64 * and increments the current position 65 * @param buf Pointer to UCHARBUF structure 66 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 67 * indicates a failure on entry, the function will immediately return. 68 * On exit the value will indicate the success of the operation. 69 */ 70 U_CAPI int32_t U_EXPORT2 71 ucbuf_getc(UCHARBUF* buf,UErrorCode* err); 72 73 /** 74 * Gets a UTF-32 code point at the current position from the converted buffer 75 * and increments the current position 76 * @param buf Pointer to UCHARBUF structure 77 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 78 * indicates a failure on entry, the function will immediately return. 79 * On exit the value will indicate the success of the operation. 80 */ 81 U_CAPI int32_t U_EXPORT2 82 ucbuf_getc32(UCHARBUF* buf,UErrorCode* err); 83 84 /** 85 * Gets a UTF-16 code unit at the current position from the converted buffer after 86 * unescaping and increments the current position. If the escape sequence is for UTF-32 87 * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned 88 * @param buf Pointer to UCHARBUF structure 89 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 90 * indicates a failure on entry, the function will immediately return. 91 * On exit the value will indicate the success of the operation. 92 */ 93 U_CAPI int32_t U_EXPORT2 94 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err); 95 96 /** 97 * Gets a pointer to the current position in the internal buffer and length of the line. 98 * It imperative to make a copy of the returned buffere before performing operations on it. 99 * @param buf Pointer to UCHARBUF structure 100 * @param len Output param to receive the len of the buffer returned till end of the line 101 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 102 * indicates a failure on entry, the function will immediately return. 103 * On exit the value will indicate the success of the operation. 104 * Error: U_TRUNCATED_CHAR_FOUND 105 * @return Pointer to the internal buffer, NULL if EOF 106 */ 107 U_CAPI const UChar* U_EXPORT2 108 ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err); 109 110 111 /** 112 * Resets the buffers and the underlying file stream. 113 * @param buf Pointer to UCHARBUF structure 114 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 115 * indicates a failure on entry, the function will immediately return. 116 * On exit the value will indicate the success of the operation. 117 */ 118 U_CAPI void U_EXPORT2 119 ucbuf_rewind(UCHARBUF* buf,UErrorCode* err); 120 121 /** 122 * Returns a pointer to the internal converted buffer 123 * @param buf Pointer to UCHARBUF structure 124 * @param len Pointer to int32_t to receive the lenth of buffer 125 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 126 * indicates a failure on entry, the function will immediately return. 127 * On exit the value will indicate the success of the operation. 128 * @return Pointer to internal UChar buffer 129 */ 130 U_CAPI const UChar* U_EXPORT2 131 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err); 132 133 /** 134 * Closes the UCHARBUF structure members and cleans up the malloc'ed memory 135 * @param buf Pointer to UCHARBUF structure 136 */ 137 U_CAPI void U_EXPORT2 138 ucbuf_close(UCHARBUF* buf); 139 140 #if U_SHOW_CPLUSPLUS_API 141 142 U_NAMESPACE_BEGIN 143 144 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer, UCHARBUF, ucbuf_close); 145 146 U_NAMESPACE_END 147 148 #endif 149 150 /** 151 * Rewinds the buffer by one codepoint. Does not rewind over escaped characters. 152 */ 153 U_CAPI void U_EXPORT2 154 ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf); 155 156 157 /** 158 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. 159 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring 160 * the converter to correct state for converting the rest of the stream. So the UConverter parameter 161 * is necessary. 162 * If the charset was autodetected, the caller must close both the input FileStream 163 * and the converter. 164 * 165 * @param fileName The file name to be opened and encoding autodected 166 * @param conv Output param to receive the opened converter if autodetected; NULL otherwise. 167 * @param cp Output param to receive the detected encoding 168 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 169 * indicates a failure on entry, the function will immediately return. 170 * On exit the value will indicate the success of the operation. 171 * @return The input FileStream if its charset was autodetected; NULL otherwise. 172 */ 173 U_CAPI FileStream * U_EXPORT2 174 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, 175 int32_t* signatureLength, UErrorCode* status); 176 177 /** 178 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. 179 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring 180 * the converter to correct state for converting the rest of the stream. So the UConverter parameter 181 * is necessary. 182 * If the charset was autodetected, the caller must close the converter. 183 * 184 * @param fileStream The file stream whose encoding is to be detected 185 * @param conv Output param to receive the opened converter if autodetected; NULL otherwise. 186 * @param cp Output param to receive the detected encoding 187 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 188 * indicates a failure on entry, the function will immediately return. 189 * On exit the value will indicate the success of the operation. 190 * @return Boolean whether the Unicode charset was autodetected. 191 */ 192 193 U_CAPI UBool U_EXPORT2 194 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status); 195 196 /** 197 * Returns the approximate size in UChars required for converting the file to UChars 198 */ 199 U_CAPI int32_t U_EXPORT2 200 ucbuf_size(UCHARBUF* buf); 201 202 U_CAPI const char* U_EXPORT2 203 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status); 204 205 #endif 206 #endif 207 208