Home | History | Annotate | Download | only in libmedia
      1 /*
      2  * Copyright (C) 2009 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <media/mediascanner.h>
     18 
     19 #include "StringArray.h"
     20 
     21 #include "autodetect.h"
     22 #include "unicode/ucnv.h"
     23 #include "unicode/ustring.h"
     24 
     25 namespace android {
     26 
     27 MediaScannerClient::MediaScannerClient()
     28     :   mNames(NULL),
     29         mValues(NULL),
     30         mLocaleEncoding(kEncodingNone)
     31 {
     32 }
     33 
     34 MediaScannerClient::~MediaScannerClient()
     35 {
     36     delete mNames;
     37     delete mValues;
     38 }
     39 
     40 void MediaScannerClient::setLocale(const char* locale)
     41 {
     42     if (!locale) return;
     43 
     44     if (!strncmp(locale, "ja", 2))
     45         mLocaleEncoding = kEncodingShiftJIS;
     46     else if (!strncmp(locale, "ko", 2))
     47         mLocaleEncoding = kEncodingEUCKR;
     48     else if (!strncmp(locale, "zh", 2)) {
     49         if (!strcmp(locale, "zh_CN")) {
     50             // simplified chinese for mainland China
     51             mLocaleEncoding = kEncodingGBK;
     52         } else {
     53             // assume traditional for non-mainland Chinese locales (Taiwan, Hong Kong, Singapore)
     54             mLocaleEncoding = kEncodingBig5;
     55         }
     56     }
     57 }
     58 
     59 void MediaScannerClient::beginFile()
     60 {
     61     mNames = new StringArray;
     62     mValues = new StringArray;
     63 }
     64 
     65 status_t MediaScannerClient::addStringTag(const char* name, const char* value)
     66 {
     67     if (mLocaleEncoding != kEncodingNone) {
     68         // don't bother caching strings that are all ASCII.
     69         // call handleStringTag directly instead.
     70         // check to see if value (which should be utf8) has any non-ASCII characters
     71         bool nonAscii = false;
     72         const char* chp = value;
     73         char ch;
     74         while ((ch = *chp++)) {
     75             if (ch & 0x80) {
     76                 nonAscii = true;
     77                 break;
     78             }
     79         }
     80 
     81         if (nonAscii) {
     82             // save the strings for later so they can be used for native encoding detection
     83             mNames->push_back(name);
     84             mValues->push_back(value);
     85             return OK;
     86         }
     87         // else fall through
     88     }
     89 
     90     // autodetection is not necessary, so no need to cache the values
     91     // pass directly to the client instead
     92     return handleStringTag(name, value);
     93 }
     94 
     95 static uint32_t possibleEncodings(const char* s)
     96 {
     97     uint32_t result = kEncodingAll;
     98     // if s contains a native encoding, then it was mistakenly encoded in utf8 as if it were latin-1
     99     // so we need to reverse the latin-1 -> utf8 conversion to get the native chars back
    100     uint8_t ch1, ch2;
    101     uint8_t* chp = (uint8_t *)s;
    102 
    103     while ((ch1 = *chp++)) {
    104         if (ch1 & 0x80) {
    105             ch2 = *chp++;
    106             ch1 = ((ch1 << 6) & 0xC0) | (ch2 & 0x3F);
    107             // ch1 is now the first byte of the potential native char
    108 
    109             ch2 = *chp++;
    110             if (ch2 & 0x80)
    111                 ch2 = ((ch2 << 6) & 0xC0) | (*chp++ & 0x3F);
    112             // ch2 is now the second byte of the potential native char
    113             int ch = (int)ch1 << 8 | (int)ch2;
    114             result &= findPossibleEncodings(ch);
    115         }
    116         // else ASCII character, which could be anything
    117     }
    118 
    119     return result;
    120 }
    121 
    122 void MediaScannerClient::convertValues(uint32_t encoding)
    123 {
    124     const char* enc = NULL;
    125     switch (encoding) {
    126         case kEncodingShiftJIS:
    127             enc = "shift-jis";
    128             break;
    129         case kEncodingGBK:
    130             enc = "gbk";
    131             break;
    132         case kEncodingBig5:
    133             enc = "Big5";
    134             break;
    135         case kEncodingEUCKR:
    136             enc = "EUC-KR";
    137             break;
    138     }
    139 
    140     if (enc) {
    141         UErrorCode status = U_ZERO_ERROR;
    142 
    143         UConverter *conv = ucnv_open(enc, &status);
    144         if (U_FAILURE(status)) {
    145             ALOGE("could not create UConverter for %s", enc);
    146             return;
    147         }
    148         UConverter *utf8Conv = ucnv_open("UTF-8", &status);
    149         if (U_FAILURE(status)) {
    150             ALOGE("could not create UConverter for UTF-8");
    151             ucnv_close(conv);
    152             return;
    153         }
    154 
    155         // for each value string, convert from native encoding to UTF-8
    156         for (int i = 0; i < mNames->size(); i++) {
    157             // first we need to untangle the utf8 and convert it back to the original bytes
    158             // since we are reducing the length of the string, we can do this in place
    159             uint8_t* src = (uint8_t *)mValues->getEntry(i);
    160             int len = strlen((char *)src);
    161             uint8_t* dest = src;
    162 
    163             uint8_t uch;
    164             while ((uch = *src++)) {
    165                 if (uch & 0x80)
    166                     *dest++ = ((uch << 6) & 0xC0) | (*src++ & 0x3F);
    167                 else
    168                     *dest++ = uch;
    169             }
    170             *dest = 0;
    171 
    172             // now convert from native encoding to UTF-8
    173             const char* source = mValues->getEntry(i);
    174             int targetLength = len * 3 + 1;
    175             char* buffer = new char[targetLength];
    176             // don't normally check for NULL, but in this case targetLength may be large
    177             if (!buffer)
    178                 break;
    179             char* target = buffer;
    180 
    181             ucnv_convertEx(utf8Conv, conv, &target, target + targetLength,
    182                     &source, (const char *)dest, NULL, NULL, NULL, NULL, TRUE, TRUE, &status);
    183             if (U_FAILURE(status)) {
    184                 ALOGE("ucnv_convertEx failed: %d", status);
    185                 mValues->setEntry(i, "???");
    186             } else {
    187                 // zero terminate
    188                 *target = 0;
    189                 mValues->setEntry(i, buffer);
    190             }
    191 
    192             delete[] buffer;
    193         }
    194 
    195         ucnv_close(conv);
    196         ucnv_close(utf8Conv);
    197     }
    198 }
    199 
    200 void MediaScannerClient::endFile()
    201 {
    202     if (mLocaleEncoding != kEncodingNone) {
    203         int size = mNames->size();
    204         uint32_t encoding = kEncodingAll;
    205 
    206         // compute a bit mask containing all possible encodings
    207         for (int i = 0; i < mNames->size(); i++)
    208             encoding &= possibleEncodings(mValues->getEntry(i));
    209 
    210         // if the locale encoding matches, then assume we have a native encoding.
    211         if (encoding & mLocaleEncoding)
    212             convertValues(mLocaleEncoding);
    213 
    214         // finally, push all name/value pairs to the client
    215         for (int i = 0; i < mNames->size(); i++) {
    216             status_t status = handleStringTag(mNames->getEntry(i), mValues->getEntry(i));
    217             if (status) {
    218                 break;
    219             }
    220         }
    221     }
    222     // else addStringTag() has done all the work so we have nothing to do
    223 
    224     delete mNames;
    225     delete mValues;
    226     mNames = NULL;
    227     mValues = NULL;
    228 }
    229 
    230 }  // namespace android
    231