1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "jni_internal.h" 18 #include "mirror/string.h" 19 #include "mirror/string-inl.h" 20 #include "native/libcore_util_CharsetUtils.h" 21 #include "scoped_fast_native_object_access.h" 22 #include "ScopedPrimitiveArray.h" 23 #include "unicode/utf16.h" 24 25 #include <string.h> 26 27 namespace art { 28 29 /** 30 * Approximates java.lang.UnsafeByteSequence so we don't have to pay the cost of calling back into 31 * Java when converting a char[] to a UTF-8 byte[]. This lets us have UTF-8 conversions slightly 32 * faster than ICU for large char[]s without paying for the NIO overhead with small char[]s. 33 * 34 * We could avoid this by keeping the UTF-8 bytes on the native heap until we're done and only 35 * creating a byte[] on the Java heap when we know how big it needs to be, but one shouldn't lie 36 * to the garbage collector (nor hide potentially large allocations from it). 37 * 38 * Because a call to append might require an allocation, it might fail. Callers should always 39 * check the return value of append. 40 */ 41 class NativeUnsafeByteSequence { 42 public: 43 explicit NativeUnsafeByteSequence(JNIEnv* env) 44 : mEnv(env), mJavaArray(nullptr), mRawArray(nullptr), mSize(-1), mOffset(0) { 45 } 46 47 ~NativeUnsafeByteSequence() { 48 // Release our pointer to the raw array, copying changes back to the Java heap. 49 if (mRawArray != nullptr) { 50 mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, 0); 51 } 52 } 53 54 bool append(jbyte b) { 55 if (mOffset == mSize && !resize(mSize * 2)) { 56 return false; 57 } 58 mRawArray[mOffset++] = b; 59 return true; 60 } 61 62 bool resize(int newSize) { 63 if (newSize == mSize) { 64 return true; 65 } 66 67 // Allocate a new array. 68 jbyteArray newJavaArray = mEnv->NewByteArray(newSize); 69 if (newJavaArray == nullptr) { 70 return false; 71 } 72 jbyte* newRawArray = mEnv->GetByteArrayElements(newJavaArray, nullptr); 73 if (newRawArray == nullptr) { 74 return false; 75 } 76 77 // Copy data out of the old array and then let go of it. 78 // Note that we may be trimming the array. 79 if (mRawArray != nullptr) { 80 memcpy(newRawArray, mRawArray, mOffset); 81 mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, JNI_ABORT); 82 mEnv->DeleteLocalRef(mJavaArray); 83 } 84 85 // Point ourselves at the new array. 86 mJavaArray = newJavaArray; 87 mRawArray = newRawArray; 88 mSize = newSize; 89 return true; 90 } 91 92 jbyteArray toByteArray() { 93 // Trim any unused space, if necessary. 94 bool okay = resize(mOffset); 95 return okay ? mJavaArray : nullptr; 96 } 97 98 private: 99 JNIEnv* mEnv; 100 jbyteArray mJavaArray; 101 jbyte* mRawArray; 102 jint mSize; 103 jint mOffset; 104 105 // Disallow copy and assignment. 106 NativeUnsafeByteSequence(const NativeUnsafeByteSequence&); 107 void operator=(const NativeUnsafeByteSequence&); 108 }; 109 110 static void CharsetUtils_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, 111 jint length, jcharArray javaChars) { 112 ScopedByteArrayRO bytes(env, javaBytes); 113 if (bytes.get() == nullptr) { 114 return; 115 } 116 ScopedCharArrayRW chars(env, javaChars); 117 if (chars.get() == nullptr) { 118 return; 119 } 120 121 const jbyte* src = &bytes[offset]; 122 jchar* dst = &chars[0]; 123 static const jchar REPLACEMENT_CHAR = 0xfffd; 124 for (int i = length - 1; i >= 0; --i) { 125 jchar ch = static_cast<jchar>(*src++ & 0xff); 126 *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR; 127 } 128 } 129 130 static void CharsetUtils_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, 131 jint offset, jint length, jcharArray javaChars) { 132 ScopedByteArrayRO bytes(env, javaBytes); 133 if (bytes.get() == nullptr) { 134 return; 135 } 136 ScopedCharArrayRW chars(env, javaChars); 137 if (chars.get() == nullptr) { 138 return; 139 } 140 141 const jbyte* src = &bytes[offset]; 142 jchar* dst = &chars[0]; 143 for (int i = length - 1; i >= 0; --i) { 144 *dst++ = static_cast<jchar>(*src++ & 0xff); 145 } 146 } 147 148 /** 149 * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that 150 * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while 151 * U+0000 to U+00ff inclusive are identical to ISO-8859-1. 152 */ 153 static jbyteArray charsToBytes(JNIEnv* env, jstring java_string, jint offset, jint length, 154 jchar maxValidChar) { 155 ScopedObjectAccess soa(env); 156 StackHandleScope<1> hs(soa.Self()); 157 Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String*>(java_string))); 158 if (string.Get() == nullptr) { 159 return nullptr; 160 } 161 162 jbyteArray javaBytes = env->NewByteArray(length); 163 ScopedByteArrayRW bytes(env, javaBytes); 164 if (bytes.get() == nullptr) { 165 return nullptr; 166 } 167 168 const jchar* src = &(string->GetValue()[offset]); 169 jbyte* dst = &bytes[0]; 170 for (int i = length - 1; i >= 0; --i) { 171 jchar ch = *src++; 172 if (ch > maxValidChar) { 173 ch = '?'; 174 } 175 *dst++ = static_cast<jbyte>(ch); 176 } 177 178 return javaBytes; 179 } 180 181 static jbyteArray CharsetUtils_toAsciiBytes(JNIEnv* env, jclass, jstring java_string, jint offset, 182 jint length) { 183 return charsToBytes(env, java_string, offset, length, 0x7f); 184 } 185 186 static jbyteArray CharsetUtils_toIsoLatin1Bytes(JNIEnv* env, jclass, jstring java_string, 187 jint offset, jint length) { 188 return charsToBytes(env, java_string, offset, length, 0xff); 189 } 190 191 static jbyteArray CharsetUtils_toUtf8Bytes(JNIEnv* env, jclass, jstring java_string, jint offset, 192 jint length) { 193 ScopedObjectAccess soa(env); 194 StackHandleScope<1> hs(soa.Self()); 195 Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String*>(java_string))); 196 if (string.Get() == nullptr) { 197 return nullptr; 198 } 199 200 NativeUnsafeByteSequence out(env); 201 if (!out.resize(length)) { 202 return nullptr; 203 } 204 205 const int end = offset + length; 206 for (int i = offset; i < end; ++i) { 207 jint ch = string->CharAt(i); 208 if (ch < 0x80) { 209 // One byte. 210 if (!out.append(ch)) { 211 return nullptr; 212 } 213 } else if (ch < 0x800) { 214 // Two bytes. 215 if (!out.append((ch >> 6) | 0xc0) || !out.append((ch & 0x3f) | 0x80)) { 216 return nullptr; 217 } 218 } else if (U16_IS_SURROGATE(ch)) { 219 // A supplementary character. 220 jchar high = static_cast<jchar>(ch); 221 jchar low = (i + 1 != end) ? string->CharAt(i + 1) : 0; 222 if (!U16_IS_SURROGATE_LEAD(high) || !U16_IS_SURROGATE_TRAIL(low)) { 223 if (!out.append('?')) { 224 return nullptr; 225 } 226 continue; 227 } 228 // Now we know we have a *valid* surrogate pair, we can consume the low surrogate. 229 ++i; 230 ch = U16_GET_SUPPLEMENTARY(high, low); 231 // Four bytes. 232 jbyte b1 = (ch >> 18) | 0xf0; 233 jbyte b2 = ((ch >> 12) & 0x3f) | 0x80; 234 jbyte b3 = ((ch >> 6) & 0x3f) | 0x80; 235 jbyte b4 = (ch & 0x3f) | 0x80; 236 if (!out.append(b1) || !out.append(b2) || !out.append(b3) || !out.append(b4)) { 237 return nullptr; 238 } 239 } else { 240 // Three bytes. 241 jbyte b1 = (ch >> 12) | 0xe0; 242 jbyte b2 = ((ch >> 6) & 0x3f) | 0x80; 243 jbyte b3 = (ch & 0x3f) | 0x80; 244 if (!out.append(b1) || !out.append(b2) || !out.append(b3)) { 245 return nullptr; 246 } 247 } 248 } 249 return out.toByteArray(); 250 } 251 252 static JNINativeMethod gMethods[] = { 253 NATIVE_METHOD(CharsetUtils, asciiBytesToChars, "!([BII[C)V"), 254 NATIVE_METHOD(CharsetUtils, isoLatin1BytesToChars, "!([BII[C)V"), 255 NATIVE_METHOD(CharsetUtils, toAsciiBytes, "!(Ljava/lang/String;II)[B"), 256 NATIVE_METHOD(CharsetUtils, toIsoLatin1Bytes, "!(Ljava/lang/String;II)[B"), 257 NATIVE_METHOD(CharsetUtils, toUtf8Bytes, "!(Ljava/lang/String;II)[B"), 258 }; 259 260 void register_libcore_util_CharsetUtils(JNIEnv* env) { 261 REGISTER_NATIVE_METHODS("libcore/util/CharsetUtils"); 262 } 263 264 } // namespace art 265