1 /* 2 ****************************************************************************** 3 * 2016 and later: Unicode, Inc. and others. * 4 * License & terms of use: http://www.unicode.org/copyright.html#License * 5 ****************************************************************************** 6 ****************************************************************************** 7 * Copyright (C) 1998-2005, International Business Machines Corporation and * 8 * others. All Rights Reserved. * 9 ****************************************************************************** 10 */ 11 12 #include <errno.h> 13 #include <stdio.h> 14 #include <string.h> 15 16 #include "unicode/utypes.h" 17 #include "unicode/unistr.h" 18 19 #include "layout/LETypes.h" 20 21 #include "GUISupport.h" 22 #include "UnicodeReader.h" 23 24 #define BYTE(b) (((int) b) & 0xFF) 25 26 /* 27 * Read the text from a file. The text must start with a Unicode Byte 28 * Order Mark (BOM) so that we know what order to read the bytes in. 29 */ 30 const UChar *UnicodeReader::readFile(const char *fileName, GUISupport *guiSupport, int32_t &charCount) 31 { 32 FILE *f; 33 int32_t fileSize; 34 35 UChar *charBuffer; 36 char *byteBuffer; 37 char startBytes[4] = {'\xA5', '\xA5', '\xA5', '\xA5'}; 38 char errorMessage[128]; 39 const char *cp = ""; 40 int32_t signatureLength = 0; 41 42 f = fopen(fileName, "rb"); 43 44 if( f == NULL ) { 45 sprintf(errorMessage,"Couldn't open %s: %s \n", fileName, strerror(errno)); 46 guiSupport->postErrorMessage(errorMessage, "Text File Error"); 47 return 0; 48 } 49 50 fseek(f, 0, SEEK_END); 51 fileSize = ftell(f); 52 53 fseek(f, 0, SEEK_SET); 54 fread(startBytes, sizeof(char), 4, f); 55 56 if (startBytes[0] == '\xFE' && startBytes[1] == '\xFF') { 57 cp = "UTF-16BE"; 58 signatureLength = 2; 59 } else if (startBytes[0] == '\xFF' && startBytes[1] == '\xFE') { 60 if (startBytes[2] == '\x00' && startBytes[3] == '\x00') { 61 cp = "UTF-32LE"; 62 signatureLength = 4; 63 } else { 64 cp = "UTF-16LE"; 65 signatureLength = 2; 66 } 67 } else if (startBytes[0] == '\xEF' && startBytes[1] == '\xBB' && startBytes[2] == '\xBF') { 68 cp = "UTF-8"; 69 signatureLength = 3; 70 } else if (startBytes[0] == '\x0E' && startBytes[1] == '\xFE' && startBytes[2] == '\xFF') { 71 cp = "SCSU"; 72 signatureLength = 3; 73 } else if (startBytes[0] == '\x00' && startBytes[1] == '\x00' && 74 startBytes[2] == '\xFE' && startBytes[3] == '\xFF') { 75 cp = "UTF-32BE"; 76 signatureLength = 4; 77 } else { 78 sprintf(errorMessage, "Couldn't detect the encoding of %s: (%2.2X, %2.2X, %2.2X, %2.2X)\n", fileName, 79 BYTE(startBytes[0]), BYTE(startBytes[1]), BYTE(startBytes[2]), BYTE(startBytes[3])); 80 guiSupport->postErrorMessage(errorMessage, "Text File Error"); 81 fclose(f); 82 return 0; 83 } 84 85 fileSize -= signatureLength; 86 fseek(f, signatureLength, SEEK_SET); 87 byteBuffer = new char[fileSize]; 88 89 if(byteBuffer == 0) { 90 sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno)); 91 guiSupport->postErrorMessage(errorMessage, "Text File Error"); 92 fclose(f); 93 return 0; 94 } 95 96 fread(byteBuffer, sizeof(char), fileSize, f); 97 if( ferror(f) ) { 98 sprintf(errorMessage,"Couldn't read %s: %s \n", fileName, strerror(errno)); 99 guiSupport->postErrorMessage(errorMessage, "Text File Error"); 100 fclose(f); 101 delete[] byteBuffer; 102 return 0; 103 } 104 fclose(f); 105 106 UnicodeString myText(byteBuffer, fileSize, cp); 107 108 delete[] byteBuffer; 109 110 charCount = myText.length(); 111 charBuffer = LE_NEW_ARRAY(UChar, charCount + 1); 112 if(charBuffer == 0) { 113 sprintf(errorMessage,"Couldn't get memory for reading %s: %s \n", fileName, strerror(errno)); 114 guiSupport->postErrorMessage(errorMessage, "Text File Error"); 115 return 0; 116 } 117 118 myText.extract(0, myText.length(), charBuffer); 119 charBuffer[charCount] = 0; // NULL terminate for easier reading in the debugger 120 121 return charBuffer; 122 } 123 124