1 // Copyright (C) 2016 and later: Unicode, Inc. and others. 2 // License & terms of use: http://www.unicode.org/copyright.html 3 /* 4 ******************************************************************************* 5 * 6 * Copyright (C) 1998-2016, International Business Machines 7 * Corporation and others. All Rights Reserved. 8 * 9 ******************************************************************************* 10 * 11 * File ucbuf.h 12 * 13 * Modification History: 14 * 15 * Date Name Description 16 * 05/10/01 Ram Creation. 17 * 18 * This API reads in files and returns UChars 19 ******************************************************************************* 20 */ 21 22 #include "unicode/localpointer.h" 23 #include "unicode/ucnv.h" 24 #include "filestrm.h" 25 26 #if !UCONFIG_NO_CONVERSION 27 28 #ifndef UCBUF_H 29 #define UCBUF_H 1 30 31 typedef struct UCHARBUF UCHARBUF; 32 /** 33 * End of file value 34 */ 35 #define U_EOF 0xFFFFFFFF 36 /** 37 * Error value if a sequence cannot be unescaped 38 */ 39 #define U_ERR 0xFFFFFFFE 40 41 typedef struct ULine ULine; 42 43 struct ULine { 44 UChar *name; 45 int32_t len; 46 }; 47 48 /** 49 * Opens the UCHARBUF with the given file stream and code page for conversion 50 * @param fileName Name of the file to open. 51 * @param codepage The encoding of the file stream to convert to Unicode. 52 * If *codepoge is NULL on input the API will try to autodetect 53 * popular Unicode encodings 54 * @param showWarning Flag to print out warnings to STDOUT 55 * @param buffered If TRUE performs a buffered read of the input file. If FALSE reads 56 * the whole file into memory and converts it. 57 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 58 * indicates a failure on entry, the function will immediately return. 59 * On exit the value will indicate the success of the operation. 60 * @return pointer to the newly opened UCHARBUF 61 */ 62 U_CAPI UCHARBUF* U_EXPORT2 63 ucbuf_open(const char* fileName,const char** codepage,UBool showWarning, UBool buffered, UErrorCode* err); 64 65 /** 66 * Gets a UTF-16 code unit at the current position from the converted buffer 67 * and increments the current position 68 * @param buf Pointer to UCHARBUF structure 69 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 70 * indicates a failure on entry, the function will immediately return. 71 * On exit the value will indicate the success of the operation. 72 */ 73 U_CAPI int32_t U_EXPORT2 74 ucbuf_getc(UCHARBUF* buf,UErrorCode* err); 75 76 /** 77 * Gets a UTF-32 code point at the current position from the converted buffer 78 * and increments the current position 79 * @param buf Pointer to UCHARBUF structure 80 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 81 * indicates a failure on entry, the function will immediately return. 82 * On exit the value will indicate the success of the operation. 83 */ 84 U_CAPI int32_t U_EXPORT2 85 ucbuf_getc32(UCHARBUF* buf,UErrorCode* err); 86 87 /** 88 * Gets a UTF-16 code unit at the current position from the converted buffer after 89 * unescaping and increments the current position. If the escape sequence is for UTF-32 90 * code point (\\Uxxxxxxxx) then a UTF-32 codepoint is returned 91 * @param buf Pointer to UCHARBUF structure 92 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 93 * indicates a failure on entry, the function will immediately return. 94 * On exit the value will indicate the success of the operation. 95 */ 96 U_CAPI int32_t U_EXPORT2 97 ucbuf_getcx32(UCHARBUF* buf,UErrorCode* err); 98 99 /** 100 * Gets a pointer to the current position in the internal buffer and length of the line. 101 * It imperative to make a copy of the returned buffer before performing operations on it. 102 * @param buf Pointer to UCHARBUF structure 103 * @param len Output param to receive the len of the buffer returned till end of the line 104 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 105 * indicates a failure on entry, the function will immediately return. 106 * On exit the value will indicate the success of the operation. 107 * Error: U_TRUNCATED_CHAR_FOUND 108 * @return Pointer to the internal buffer, NULL if EOF 109 */ 110 U_CAPI const UChar* U_EXPORT2 111 ucbuf_readline(UCHARBUF* buf,int32_t* len, UErrorCode* err); 112 113 114 /** 115 * Resets the buffers and the underlying file stream. 116 * @param buf Pointer to UCHARBUF structure 117 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 118 * indicates a failure on entry, the function will immediately return. 119 * On exit the value will indicate the success of the operation. 120 */ 121 U_CAPI void U_EXPORT2 122 ucbuf_rewind(UCHARBUF* buf,UErrorCode* err); 123 124 /** 125 * Returns a pointer to the internal converted buffer 126 * @param buf Pointer to UCHARBUF structure 127 * @param len Pointer to int32_t to receive the lenth of buffer 128 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 129 * indicates a failure on entry, the function will immediately return. 130 * On exit the value will indicate the success of the operation. 131 * @return Pointer to internal UChar buffer 132 */ 133 U_CAPI const UChar* U_EXPORT2 134 ucbuf_getBuffer(UCHARBUF* buf,int32_t* len,UErrorCode* err); 135 136 /** 137 * Closes the UCHARBUF structure members and cleans up the malloc'ed memory 138 * @param buf Pointer to UCHARBUF structure 139 */ 140 U_CAPI void U_EXPORT2 141 ucbuf_close(UCHARBUF* buf); 142 143 #if U_SHOW_CPLUSPLUS_API 144 145 U_NAMESPACE_BEGIN 146 147 /** 148 * \class LocalUCHARBUFPointer 149 * "Smart pointer" class, closes a UCHARBUF via ucbuf_close(). 150 * For most methods see the LocalPointerBase base class. 151 * 152 * @see LocalPointerBase 153 * @see LocalPointer 154 */ 155 U_DEFINE_LOCAL_OPEN_POINTER(LocalUCHARBUFPointer, UCHARBUF, ucbuf_close); 156 157 U_NAMESPACE_END 158 159 #endif 160 161 /** 162 * Rewinds the buffer by one codepoint. Does not rewind over escaped characters. 163 */ 164 U_CAPI void U_EXPORT2 165 ucbuf_ungetc(int32_t ungetChar,UCHARBUF* buf); 166 167 168 /** 169 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. 170 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring 171 * the converter to correct state for converting the rest of the stream. So the UConverter parameter 172 * is necessary. 173 * If the charset was autodetected, the caller must close both the input FileStream 174 * and the converter. 175 * 176 * @param fileName The file name to be opened and encoding autodected 177 * @param conv Output param to receive the opened converter if autodetected; NULL otherwise. 178 * @param cp Output param to receive the detected encoding 179 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 180 * indicates a failure on entry, the function will immediately return. 181 * On exit the value will indicate the success of the operation. 182 * @return The input FileStream if its charset was autodetected; NULL otherwise. 183 */ 184 U_CAPI FileStream * U_EXPORT2 185 ucbuf_autodetect(const char* fileName, const char** cp,UConverter** conv, 186 int32_t* signatureLength, UErrorCode* status); 187 188 /** 189 * Autodetects the encoding of the file stream. Only Unicode charsets are autodectected. 190 * Some Unicode charsets are stateful and need byte identifiers to be converted also to bring 191 * the converter to correct state for converting the rest of the stream. So the UConverter parameter 192 * is necessary. 193 * If the charset was autodetected, the caller must close the converter. 194 * 195 * @param fileStream The file stream whose encoding is to be detected 196 * @param conv Output param to receive the opened converter if autodetected; NULL otherwise. 197 * @param cp Output param to receive the detected encoding 198 * @param err is a pointer to a valid <code>UErrorCode</code> value. If this value 199 * indicates a failure on entry, the function will immediately return. 200 * On exit the value will indicate the success of the operation. 201 * @return Boolean whether the Unicode charset was autodetected. 202 */ 203 204 U_CAPI UBool U_EXPORT2 205 ucbuf_autodetect_fs(FileStream* in, const char** cp, UConverter** conv, int32_t* signatureLength, UErrorCode* status); 206 207 /** 208 * Returns the approximate size in UChars required for converting the file to UChars 209 */ 210 U_CAPI int32_t U_EXPORT2 211 ucbuf_size(UCHARBUF* buf); 212 213 U_CAPI const char* U_EXPORT2 214 ucbuf_resolveFileName(const char* inputDir, const char* fileName, char* target, int32_t* len, UErrorCode* status); 215 216 #endif 217 #endif 218 219