1 /* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <media/mediascanner.h> 18 19 #include <utils/StringArray.h> 20 21 #include "autodetect.h" 22 #include "unicode/ucnv.h" 23 #include "unicode/ustring.h" 24 25 namespace android { 26 27 MediaScannerClient::MediaScannerClient() 28 : mNames(NULL), 29 mValues(NULL), 30 mLocaleEncoding(kEncodingNone) 31 { 32 } 33 34 MediaScannerClient::~MediaScannerClient() 35 { 36 delete mNames; 37 delete mValues; 38 } 39 40 void MediaScannerClient::setLocale(const char* locale) 41 { 42 if (!locale) return; 43 44 if (!strncmp(locale, "ja", 2)) 45 mLocaleEncoding = kEncodingShiftJIS; 46 else if (!strncmp(locale, "ko", 2)) 47 mLocaleEncoding = kEncodingEUCKR; 48 else if (!strncmp(locale, "zh", 2)) { 49 if (!strcmp(locale, "zh_CN")) { 50 // simplified chinese for mainland China 51 mLocaleEncoding = kEncodingGBK; 52 } else { 53 // assume traditional for non-mainland Chinese locales (Taiwan, Hong Kong, Singapore) 54 mLocaleEncoding = kEncodingBig5; 55 } 56 } 57 } 58 59 void MediaScannerClient::beginFile() 60 { 61 mNames = new StringArray; 62 mValues = new StringArray; 63 } 64 65 status_t MediaScannerClient::addStringTag(const char* name, const char* value) 66 { 67 if (mLocaleEncoding != kEncodingNone) { 68 // don't bother caching strings that are all ASCII. 69 // call handleStringTag directly instead. 70 // check to see if value (which should be utf8) has any non-ASCII characters 71 bool nonAscii = false; 72 const char* chp = value; 73 char ch; 74 while ((ch = *chp++)) { 75 if (ch & 0x80) { 76 nonAscii = true; 77 break; 78 } 79 } 80 81 if (nonAscii) { 82 // save the strings for later so they can be used for native encoding detection 83 mNames->push_back(name); 84 mValues->push_back(value); 85 return OK; 86 } 87 // else fall through 88 } 89 90 // autodetection is not necessary, so no need to cache the values 91 // pass directly to the client instead 92 return handleStringTag(name, value); 93 } 94 95 static uint32_t possibleEncodings(const char* s) 96 { 97 uint32_t result = kEncodingAll; 98 // if s contains a native encoding, then it was mistakenly encoded in utf8 as if it were latin-1 99 // so we need to reverse the latin-1 -> utf8 conversion to get the native chars back 100 uint8_t ch1, ch2; 101 uint8_t* chp = (uint8_t *)s; 102 103 while ((ch1 = *chp++)) { 104 if (ch1 & 0x80) { 105 ch2 = *chp++; 106 ch1 = ((ch1 << 6) & 0xC0) | (ch2 & 0x3F); 107 // ch1 is now the first byte of the potential native char 108 109 ch2 = *chp++; 110 if (ch2 & 0x80) 111 ch2 = ((ch2 << 6) & 0xC0) | (*chp++ & 0x3F); 112 // ch2 is now the second byte of the potential native char 113 int ch = (int)ch1 << 8 | (int)ch2; 114 result &= findPossibleEncodings(ch); 115 } 116 // else ASCII character, which could be anything 117 } 118 119 return result; 120 } 121 122 void MediaScannerClient::convertValues(uint32_t encoding) 123 { 124 const char* enc = NULL; 125 switch (encoding) { 126 case kEncodingShiftJIS: 127 enc = "shift-jis"; 128 break; 129 case kEncodingGBK: 130 enc = "gbk"; 131 break; 132 case kEncodingBig5: 133 enc = "Big5"; 134 break; 135 case kEncodingEUCKR: 136 enc = "EUC-KR"; 137 break; 138 } 139 140 if (enc) { 141 UErrorCode status = U_ZERO_ERROR; 142 143 UConverter *conv = ucnv_open(enc, &status); 144 if (U_FAILURE(status)) { 145 ALOGE("could not create UConverter for %s", enc); 146 return; 147 } 148 UConverter *utf8Conv = ucnv_open("UTF-8", &status); 149 if (U_FAILURE(status)) { 150 ALOGE("could not create UConverter for UTF-8"); 151 ucnv_close(conv); 152 return; 153 } 154 155 // for each value string, convert from native encoding to UTF-8 156 for (int i = 0; i < mNames->size(); i++) { 157 // first we need to untangle the utf8 and convert it back to the original bytes 158 // since we are reducing the length of the string, we can do this in place 159 uint8_t* src = (uint8_t *)mValues->getEntry(i); 160 int len = strlen((char *)src); 161 uint8_t* dest = src; 162 163 uint8_t uch; 164 while ((uch = *src++)) { 165 if (uch & 0x80) 166 *dest++ = ((uch << 6) & 0xC0) | (*src++ & 0x3F); 167 else 168 *dest++ = uch; 169 } 170 *dest = 0; 171 172 // now convert from native encoding to UTF-8 173 const char* source = mValues->getEntry(i); 174 int targetLength = len * 3 + 1; 175 char* buffer = new char[targetLength]; 176 // don't normally check for NULL, but in this case targetLength may be large 177 if (!buffer) 178 break; 179 char* target = buffer; 180 181 ucnv_convertEx(utf8Conv, conv, &target, target + targetLength, 182 &source, (const char *)dest, NULL, NULL, NULL, NULL, TRUE, TRUE, &status); 183 if (U_FAILURE(status)) { 184 ALOGE("ucnv_convertEx failed: %d", status); 185 mValues->setEntry(i, "???"); 186 } else { 187 // zero terminate 188 *target = 0; 189 mValues->setEntry(i, buffer); 190 } 191 192 delete[] buffer; 193 } 194 195 ucnv_close(conv); 196 ucnv_close(utf8Conv); 197 } 198 } 199 200 void MediaScannerClient::endFile() 201 { 202 if (mLocaleEncoding != kEncodingNone) { 203 int size = mNames->size(); 204 uint32_t encoding = kEncodingAll; 205 206 // compute a bit mask containing all possible encodings 207 for (int i = 0; i < mNames->size(); i++) 208 encoding &= possibleEncodings(mValues->getEntry(i)); 209 210 // if the locale encoding matches, then assume we have a native encoding. 211 if (encoding & mLocaleEncoding) 212 convertValues(mLocaleEncoding); 213 214 // finally, push all name/value pairs to the client 215 for (int i = 0; i < mNames->size(); i++) { 216 status_t status = handleStringTag(mNames->getEntry(i), mValues->getEntry(i)); 217 if (status) { 218 break; 219 } 220 } 221 } 222 // else addStringTag() has done all the work so we have nothing to do 223 224 delete mNames; 225 delete mValues; 226 mNames = NULL; 227 mValues = NULL; 228 } 229 230 } // namespace android 231