Home | History | Annotate | Download | only in native
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #define LOG_TAG "String"
     18 
     19 #include "JNIHelp.h"
     20 #include "JniConstants.h"
     21 #include "ScopedPrimitiveArray.h"
     22 #include "jni.h"
     23 #include "unicode/utf16.h"
     24 
     25 #include <string.h>
     26 
     27 /**
     28  * Approximates java.lang.UnsafeByteSequence so we don't have to pay the cost of calling back into
     29  * Java when converting a char[] to a UTF-8 byte[]. This lets us have UTF-8 conversions slightly
     30  * faster than ICU for large char[]s without paying for the NIO overhead with small char[]s.
     31  *
     32  * We could avoid this by keeping the UTF-8 bytes on the native heap until we're done and only
     33  * creating a byte[] on the Java heap when we know how big it needs to be, but one shouldn't lie
     34  * to the garbage collector (nor hide potentially large allocations from it).
     35  *
     36  * Because a call to append might require an allocation, it might fail. Callers should always
     37  * check the return value of append.
     38  */
     39 class NativeUnsafeByteSequence {
     40 public:
     41     NativeUnsafeByteSequence(JNIEnv* env)
     42         : mEnv(env), mJavaArray(NULL), mRawArray(NULL), mSize(-1), mOffset(0)
     43     {
     44     }
     45 
     46     ~NativeUnsafeByteSequence() {
     47         // Release our pointer to the raw array, copying changes back to the Java heap.
     48         if (mRawArray != NULL) {
     49             mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, 0);
     50         }
     51     }
     52 
     53     bool append(jbyte b) {
     54         if (mOffset == mSize && !resize(mSize * 2)) {
     55             return false;
     56         }
     57         mRawArray[mOffset++] = b;
     58         return true;
     59     }
     60 
     61     bool resize(int newSize) {
     62         if (newSize == mSize) {
     63             return true;
     64         }
     65 
     66         // Allocate a new array.
     67         jbyteArray newJavaArray = mEnv->NewByteArray(newSize);
     68         if (newJavaArray == NULL) {
     69             return false;
     70         }
     71         jbyte* newRawArray = mEnv->GetByteArrayElements(newJavaArray, NULL);
     72         if (newRawArray == NULL) {
     73             return false;
     74         }
     75 
     76         // Copy data out of the old array and then let go of it.
     77         // Note that we may be trimming the array.
     78         if (mRawArray != NULL) {
     79             memcpy(newRawArray, mRawArray, mOffset);
     80             mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, JNI_ABORT);
     81             mEnv->DeleteLocalRef(mJavaArray);
     82         }
     83 
     84         // Point ourselves at the new array.
     85         mJavaArray = newJavaArray;
     86         mRawArray = newRawArray;
     87         mSize = newSize;
     88         return true;
     89     }
     90 
     91     jbyteArray toByteArray() {
     92         // Trim any unused space, if necessary.
     93         bool okay = resize(mOffset);
     94         return okay ? mJavaArray : NULL;
     95     }
     96 
     97 private:
     98     JNIEnv* mEnv;
     99     jbyteArray mJavaArray;
    100     jbyte* mRawArray;
    101     jint mSize;
    102     jint mOffset;
    103 
    104     // Disallow copy and assignment.
    105     NativeUnsafeByteSequence(const NativeUnsafeByteSequence&);
    106     void operator=(const NativeUnsafeByteSequence&);
    107 };
    108 
    109 static void Charsets_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, jint length, jcharArray javaChars) {
    110     ScopedByteArrayRO bytes(env, javaBytes);
    111     if (bytes.get() == NULL) {
    112         return;
    113     }
    114     ScopedCharArrayRW chars(env, javaChars);
    115     if (chars.get() == NULL) {
    116         return;
    117     }
    118 
    119     const jbyte* src = &bytes[offset];
    120     jchar* dst = &chars[0];
    121     static const jchar REPLACEMENT_CHAR = 0xfffd;
    122     for (int i = length - 1; i >= 0; --i) {
    123         jchar ch = static_cast<jchar>(*src++ & 0xff);
    124         *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR;
    125     }
    126 }
    127 
    128 static void Charsets_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, jint length, jcharArray javaChars) {
    129     ScopedByteArrayRO bytes(env, javaBytes);
    130     if (bytes.get() == NULL) {
    131         return;
    132     }
    133     ScopedCharArrayRW chars(env, javaChars);
    134     if (chars.get() == NULL) {
    135         return;
    136     }
    137 
    138     const jbyte* src = &bytes[offset];
    139     jchar* dst = &chars[0];
    140     for (int i = length - 1; i >= 0; --i) {
    141         *dst++ = static_cast<jchar>(*src++ & 0xff);
    142     }
    143 }
    144 
    145 /**
    146  * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that
    147  * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while
    148  * U+0000 to U+00ff inclusive are identical to ISO-8859-1.
    149  */
    150 static jbyteArray charsToBytes(JNIEnv* env, jcharArray javaChars, jint offset, jint length, jchar maxValidChar) {
    151     ScopedCharArrayRO chars(env, javaChars);
    152     if (chars.get() == NULL) {
    153         return NULL;
    154     }
    155 
    156     jbyteArray javaBytes = env->NewByteArray(length);
    157     ScopedByteArrayRW bytes(env, javaBytes);
    158     if (bytes.get() == NULL) {
    159         return NULL;
    160     }
    161 
    162     const jchar* src = &chars[offset];
    163     jbyte* dst = &bytes[0];
    164     for (int i = length - 1; i >= 0; --i) {
    165         jchar ch = *src++;
    166         if (ch > maxValidChar) {
    167             ch = '?';
    168         }
    169         *dst++ = static_cast<jbyte>(ch);
    170     }
    171 
    172     return javaBytes;
    173 }
    174 
    175 static jbyteArray Charsets_toAsciiBytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) {
    176     return charsToBytes(env, javaChars, offset, length, 0x7f);
    177 }
    178 
    179 static jbyteArray Charsets_toIsoLatin1Bytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) {
    180     return charsToBytes(env, javaChars, offset, length, 0xff);
    181 }
    182 
    183 static jbyteArray Charsets_toUtf8Bytes(JNIEnv* env, jclass, jcharArray javaChars, jint offset, jint length) {
    184     ScopedCharArrayRO chars(env, javaChars);
    185     if (chars.get() == NULL) {
    186         return NULL;
    187     }
    188 
    189     NativeUnsafeByteSequence out(env);
    190     if (!out.resize(length)) {
    191         return NULL;
    192     }
    193 
    194     const int end = offset + length;
    195     for (int i = offset; i < end; ++i) {
    196         jint ch = chars[i];
    197         if (ch < 0x80) {
    198             // One byte.
    199             if (!out.append(ch)) {
    200                 return NULL;
    201             }
    202         } else if (ch < 0x800) {
    203             // Two bytes.
    204             if (!out.append((ch >> 6) | 0xc0) || !out.append((ch & 0x3f) | 0x80)) {
    205                 return NULL;
    206             }
    207         } else if (U16_IS_SURROGATE(ch)) {
    208             // A supplementary character.
    209             jchar high = (jchar) ch;
    210             jchar low = (i + 1 != end) ? chars[i + 1] : 0;
    211             if (!U16_IS_SURROGATE_LEAD(high) || !U16_IS_SURROGATE_TRAIL(low)) {
    212                 if (!out.append('?')) {
    213                     return NULL;
    214                 }
    215                 continue;
    216             }
    217             // Now we know we have a *valid* surrogate pair, we can consume the low surrogate.
    218             ++i;
    219             ch = U16_GET_SUPPLEMENTARY(high, low);
    220             // Four bytes.
    221             jbyte b1 = (ch >> 18) | 0xf0;
    222             jbyte b2 = ((ch >> 12) & 0x3f) | 0x80;
    223             jbyte b3 = ((ch >> 6) & 0x3f) | 0x80;
    224             jbyte b4 = (ch & 0x3f) | 0x80;
    225             if (!out.append(b1) || !out.append(b2) || !out.append(b3) || !out.append(b4)) {
    226                 return NULL;
    227             }
    228         } else {
    229             // Three bytes.
    230             jbyte b1 = (ch >> 12) | 0xe0;
    231             jbyte b2 = ((ch >> 6) & 0x3f) | 0x80;
    232             jbyte b3 = (ch & 0x3f) | 0x80;
    233             if (!out.append(b1) || !out.append(b2) || !out.append(b3)) {
    234                 return NULL;
    235             }
    236         }
    237     }
    238     return out.toByteArray();
    239 }
    240 
    241 static JNINativeMethod gMethods[] = {
    242     NATIVE_METHOD(Charsets, asciiBytesToChars, "([BII[C)V"),
    243     NATIVE_METHOD(Charsets, isoLatin1BytesToChars, "([BII[C)V"),
    244     NATIVE_METHOD(Charsets, toAsciiBytes, "([CII)[B"),
    245     NATIVE_METHOD(Charsets, toIsoLatin1Bytes, "([CII)[B"),
    246     NATIVE_METHOD(Charsets, toUtf8Bytes, "([CII)[B"),
    247 };
    248 void register_java_nio_charset_Charsets(JNIEnv* env) {
    249     jniRegisterNativeMethods(env, "java/nio/charset/Charsets", gMethods, NELEM(gMethods));
    250 }
    251