1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "jni_internal.h" 18 #include "mirror/string.h" 19 #include "mirror/string-inl.h" 20 #include "native/libcore_util_CharsetUtils.h" 21 #include "scoped_fast_native_object_access-inl.h" 22 #include "ScopedPrimitiveArray.h" 23 #include "unicode/utf16.h" 24 25 #include <string.h> 26 27 namespace art { 28 29 /** 30 * Approximates java.lang.UnsafeByteSequence so we don't have to pay the cost of calling back into 31 * Java when converting a char[] to a UTF-8 byte[]. This lets us have UTF-8 conversions slightly 32 * faster than ICU for large char[]s without paying for the NIO overhead with small char[]s. 33 * 34 * We could avoid this by keeping the UTF-8 bytes on the native heap until we're done and only 35 * creating a byte[] on the Java heap when we know how big it needs to be, but one shouldn't lie 36 * to the garbage collector (nor hide potentially large allocations from it). 37 * 38 * Because a call to append might require an allocation, it might fail. Callers should always 39 * check the return value of append. 40 */ 41 class NativeUnsafeByteSequence { 42 public: 43 explicit NativeUnsafeByteSequence(JNIEnv* env) 44 : mEnv(env), mJavaArray(nullptr), mRawArray(nullptr), mSize(-1), mOffset(0) { 45 } 46 47 ~NativeUnsafeByteSequence() { 48 // Release our pointer to the raw array, copying changes back to the Java heap. 49 if (mRawArray != nullptr) { 50 mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, 0); 51 } 52 } 53 54 bool append(jbyte b) { 55 if (mOffset == mSize && !resize(mSize * 2)) { 56 return false; 57 } 58 mRawArray[mOffset++] = b; 59 return true; 60 } 61 62 bool resize(int newSize) { 63 if (newSize == mSize) { 64 return true; 65 } 66 67 // Allocate a new array. 68 jbyteArray newJavaArray = mEnv->NewByteArray(newSize); 69 if (newJavaArray == nullptr) { 70 return false; 71 } 72 jbyte* newRawArray = mEnv->GetByteArrayElements(newJavaArray, nullptr); 73 if (newRawArray == nullptr) { 74 return false; 75 } 76 77 // Copy data out of the old array and then let go of it. 78 // Note that we may be trimming the array. 79 if (mRawArray != nullptr) { 80 memcpy(newRawArray, mRawArray, mOffset); 81 mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, JNI_ABORT); 82 mEnv->DeleteLocalRef(mJavaArray); 83 } 84 85 // Point ourselves at the new array. 86 mJavaArray = newJavaArray; 87 mRawArray = newRawArray; 88 mSize = newSize; 89 return true; 90 } 91 92 jbyteArray toByteArray() { 93 // Trim any unused space, if necessary. 94 bool okay = resize(mOffset); 95 return okay ? mJavaArray : nullptr; 96 } 97 98 private: 99 JNIEnv* mEnv; 100 jbyteArray mJavaArray; 101 jbyte* mRawArray; 102 jint mSize; 103 jint mOffset; 104 105 // Disallow copy and assignment. 106 NativeUnsafeByteSequence(const NativeUnsafeByteSequence&); 107 void operator=(const NativeUnsafeByteSequence&); 108 }; 109 110 static void CharsetUtils_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, 111 jint length, jcharArray javaChars) { 112 ScopedByteArrayRO bytes(env, javaBytes); 113 if (bytes.get() == nullptr) { 114 return; 115 } 116 ScopedCharArrayRW chars(env, javaChars); 117 if (chars.get() == nullptr) { 118 return; 119 } 120 121 const jbyte* src = &bytes[offset]; 122 jchar* dst = &chars[0]; 123 static const jchar REPLACEMENT_CHAR = 0xfffd; 124 for (int i = length - 1; i >= 0; --i) { 125 jchar ch = static_cast<jchar>(*src++ & 0xff); 126 *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR; 127 } 128 } 129 130 static void CharsetUtils_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, 131 jint offset, jint length, jcharArray javaChars) { 132 ScopedByteArrayRO bytes(env, javaBytes); 133 if (bytes.get() == nullptr) { 134 return; 135 } 136 ScopedCharArrayRW chars(env, javaChars); 137 if (chars.get() == nullptr) { 138 return; 139 } 140 141 const jbyte* src = &bytes[offset]; 142 jchar* dst = &chars[0]; 143 for (int i = length - 1; i >= 0; --i) { 144 *dst++ = static_cast<jchar>(*src++ & 0xff); 145 } 146 } 147 148 /** 149 * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that 150 * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while 151 * U+0000 to U+00ff inclusive are identical to ISO-8859-1. 152 */ 153 static jbyteArray charsToBytes(JNIEnv* env, jstring java_string, jint offset, jint length, 154 jchar maxValidChar) { 155 ScopedObjectAccess soa(env); 156 StackHandleScope<1> hs(soa.Self()); 157 Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(java_string))); 158 if (string == nullptr) { 159 return nullptr; 160 } 161 162 jbyteArray javaBytes = env->NewByteArray(length); 163 ScopedByteArrayRW bytes(env, javaBytes); 164 if (bytes.get() == nullptr) { 165 return nullptr; 166 } 167 168 jbyte* dst = &bytes[0]; 169 for (int i = 0; i < length; ++i) { 170 jchar ch = string->CharAt(offset + i); 171 if (ch > maxValidChar) { 172 ch = '?'; 173 } 174 *dst++ = static_cast<jbyte>(ch); 175 } 176 177 return javaBytes; 178 } 179 180 static jbyteArray CharsetUtils_toAsciiBytes(JNIEnv* env, jclass, jstring java_string, jint offset, 181 jint length) { 182 return charsToBytes(env, java_string, offset, length, 0x7f); 183 } 184 185 static jbyteArray CharsetUtils_toIsoLatin1Bytes(JNIEnv* env, jclass, jstring java_string, 186 jint offset, jint length) { 187 return charsToBytes(env, java_string, offset, length, 0xff); 188 } 189 190 static jbyteArray CharsetUtils_toUtf8Bytes(JNIEnv* env, jclass, jstring java_string, jint offset, 191 jint length) { 192 ScopedObjectAccess soa(env); 193 StackHandleScope<1> hs(soa.Self()); 194 Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(java_string))); 195 if (string == nullptr) { 196 return nullptr; 197 } 198 199 NativeUnsafeByteSequence out(env); 200 if (!out.resize(length)) { 201 return nullptr; 202 } 203 204 const int end = offset + length; 205 for (int i = offset; i < end; ++i) { 206 jint ch = string->CharAt(i); 207 if (ch < 0x80) { 208 // One byte. 209 if (!out.append(ch)) { 210 return nullptr; 211 } 212 } else if (ch < 0x800) { 213 // Two bytes. 214 if (!out.append((ch >> 6) | 0xc0) || !out.append((ch & 0x3f) | 0x80)) { 215 return nullptr; 216 } 217 } else if (U16_IS_SURROGATE(ch)) { 218 // A supplementary character. 219 jchar high = static_cast<jchar>(ch); 220 jchar low = (i + 1 != end) ? string->CharAt(i + 1) : 0; 221 if (!U16_IS_SURROGATE_LEAD(high) || !U16_IS_SURROGATE_TRAIL(low)) { 222 if (!out.append('?')) { 223 return nullptr; 224 } 225 continue; 226 } 227 // Now we know we have a *valid* surrogate pair, we can consume the low surrogate. 228 ++i; 229 ch = U16_GET_SUPPLEMENTARY(high, low); 230 // Four bytes. 231 jbyte b1 = (ch >> 18) | 0xf0; 232 jbyte b2 = ((ch >> 12) & 0x3f) | 0x80; 233 jbyte b3 = ((ch >> 6) & 0x3f) | 0x80; 234 jbyte b4 = (ch & 0x3f) | 0x80; 235 if (!out.append(b1) || !out.append(b2) || !out.append(b3) || !out.append(b4)) { 236 return nullptr; 237 } 238 } else { 239 // Three bytes. 240 jbyte b1 = (ch >> 12) | 0xe0; 241 jbyte b2 = ((ch >> 6) & 0x3f) | 0x80; 242 jbyte b3 = (ch & 0x3f) | 0x80; 243 if (!out.append(b1) || !out.append(b2) || !out.append(b3)) { 244 return nullptr; 245 } 246 } 247 } 248 return out.toByteArray(); 249 } 250 251 static JNINativeMethod gMethods[] = { 252 FAST_NATIVE_METHOD(CharsetUtils, asciiBytesToChars, "([BII[C)V"), 253 FAST_NATIVE_METHOD(CharsetUtils, isoLatin1BytesToChars, "([BII[C)V"), 254 FAST_NATIVE_METHOD(CharsetUtils, toAsciiBytes, "(Ljava/lang/String;II)[B"), 255 FAST_NATIVE_METHOD(CharsetUtils, toIsoLatin1Bytes, "(Ljava/lang/String;II)[B"), 256 FAST_NATIVE_METHOD(CharsetUtils, toUtf8Bytes, "(Ljava/lang/String;II)[B"), 257 }; 258 259 void register_libcore_util_CharsetUtils(JNIEnv* env) { 260 REGISTER_NATIVE_METHODS("libcore/util/CharsetUtils"); 261 } 262 263 } // namespace art 264