1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "libcore_util_CharsetUtils.h" 18 19 #include <string.h> 20 21 #include "jni_internal.h" 22 #include "mirror/string-inl.h" 23 #include "mirror/string.h" 24 #include "native_util.h" 25 #include "nativehelper/scoped_primitive_array.h" 26 #include "nativehelper/jni_macros.h" 27 #include "scoped_fast_native_object_access-inl.h" 28 #include "unicode/utf16.h" 29 30 namespace art { 31 32 /** 33 * Approximates java.lang.UnsafeByteSequence so we don't have to pay the cost of calling back into 34 * Java when converting a char[] to a UTF-8 byte[]. This lets us have UTF-8 conversions slightly 35 * faster than ICU for large char[]s without paying for the NIO overhead with small char[]s. 36 * 37 * We could avoid this by keeping the UTF-8 bytes on the native heap until we're done and only 38 * creating a byte[] on the Java heap when we know how big it needs to be, but one shouldn't lie 39 * to the garbage collector (nor hide potentially large allocations from it). 40 * 41 * Because a call to append might require an allocation, it might fail. Callers should always 42 * check the return value of append. 43 */ 44 class NativeUnsafeByteSequence { 45 public: 46 explicit NativeUnsafeByteSequence(JNIEnv* env) 47 : mEnv(env), mJavaArray(nullptr), mRawArray(nullptr), mSize(-1), mOffset(0) { 48 } 49 50 ~NativeUnsafeByteSequence() { 51 // Release our pointer to the raw array, copying changes back to the Java heap. 52 if (mRawArray != nullptr) { 53 mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, 0); 54 } 55 } 56 57 bool append(jbyte b) { 58 if (mOffset == mSize && !resize(mSize * 2)) { 59 return false; 60 } 61 mRawArray[mOffset++] = b; 62 return true; 63 } 64 65 bool resize(int newSize) { 66 if (newSize == mSize) { 67 return true; 68 } 69 70 // Allocate a new array. 71 jbyteArray newJavaArray = mEnv->NewByteArray(newSize); 72 if (newJavaArray == nullptr) { 73 return false; 74 } 75 jbyte* newRawArray = mEnv->GetByteArrayElements(newJavaArray, nullptr); 76 if (newRawArray == nullptr) { 77 return false; 78 } 79 80 // Copy data out of the old array and then let go of it. 81 // Note that we may be trimming the array. 82 if (mRawArray != nullptr) { 83 memcpy(newRawArray, mRawArray, mOffset); 84 mEnv->ReleaseByteArrayElements(mJavaArray, mRawArray, JNI_ABORT); 85 mEnv->DeleteLocalRef(mJavaArray); 86 } 87 88 // Point ourselves at the new array. 89 mJavaArray = newJavaArray; 90 mRawArray = newRawArray; 91 mSize = newSize; 92 return true; 93 } 94 95 jbyteArray toByteArray() { 96 // Trim any unused space, if necessary. 97 bool okay = resize(mOffset); 98 return okay ? mJavaArray : nullptr; 99 } 100 101 private: 102 JNIEnv* mEnv; 103 jbyteArray mJavaArray; 104 jbyte* mRawArray; 105 jint mSize; 106 jint mOffset; 107 108 // Disallow copy and assignment. 109 NativeUnsafeByteSequence(const NativeUnsafeByteSequence&); 110 void operator=(const NativeUnsafeByteSequence&); 111 }; 112 113 static void CharsetUtils_asciiBytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, jint offset, 114 jint length, jcharArray javaChars) { 115 ScopedByteArrayRO bytes(env, javaBytes); 116 if (bytes.get() == nullptr) { 117 return; 118 } 119 ScopedCharArrayRW chars(env, javaChars); 120 if (chars.get() == nullptr) { 121 return; 122 } 123 124 const jbyte* src = &bytes[offset]; 125 jchar* dst = &chars[0]; 126 static const jchar REPLACEMENT_CHAR = 0xfffd; 127 for (int i = length - 1; i >= 0; --i) { 128 jchar ch = static_cast<jchar>(*src++ & 0xff); 129 *dst++ = (ch <= 0x7f) ? ch : REPLACEMENT_CHAR; 130 } 131 } 132 133 static void CharsetUtils_isoLatin1BytesToChars(JNIEnv* env, jclass, jbyteArray javaBytes, 134 jint offset, jint length, jcharArray javaChars) { 135 ScopedByteArrayRO bytes(env, javaBytes); 136 if (bytes.get() == nullptr) { 137 return; 138 } 139 ScopedCharArrayRW chars(env, javaChars); 140 if (chars.get() == nullptr) { 141 return; 142 } 143 144 const jbyte* src = &bytes[offset]; 145 jchar* dst = &chars[0]; 146 for (int i = length - 1; i >= 0; --i) { 147 *dst++ = static_cast<jchar>(*src++ & 0xff); 148 } 149 } 150 151 /** 152 * Translates the given characters to US-ASCII or ISO-8859-1 bytes, using the fact that 153 * Unicode code points between U+0000 and U+007f inclusive are identical to US-ASCII, while 154 * U+0000 to U+00ff inclusive are identical to ISO-8859-1. 155 */ 156 static jbyteArray charsToBytes(JNIEnv* env, jstring java_string, jint offset, jint length, 157 jchar maxValidChar) { 158 ScopedObjectAccess soa(env); 159 StackHandleScope<1> hs(soa.Self()); 160 Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(java_string))); 161 if (string == nullptr) { 162 return nullptr; 163 } 164 165 jbyteArray javaBytes = env->NewByteArray(length); 166 ScopedByteArrayRW bytes(env, javaBytes); 167 if (bytes.get() == nullptr) { 168 return nullptr; 169 } 170 171 jbyte* dst = &bytes[0]; 172 for (int i = 0; i < length; ++i) { 173 jchar ch = string->CharAt(offset + i); 174 if (ch > maxValidChar) { 175 ch = '?'; 176 } 177 *dst++ = static_cast<jbyte>(ch); 178 } 179 180 return javaBytes; 181 } 182 183 static jbyteArray CharsetUtils_toAsciiBytes(JNIEnv* env, jclass, jstring java_string, jint offset, 184 jint length) { 185 return charsToBytes(env, java_string, offset, length, 0x7f); 186 } 187 188 static jbyteArray CharsetUtils_toIsoLatin1Bytes(JNIEnv* env, jclass, jstring java_string, 189 jint offset, jint length) { 190 return charsToBytes(env, java_string, offset, length, 0xff); 191 } 192 193 static jbyteArray CharsetUtils_toUtf8Bytes(JNIEnv* env, jclass, jstring java_string, jint offset, 194 jint length) { 195 ScopedObjectAccess soa(env); 196 StackHandleScope<1> hs(soa.Self()); 197 Handle<mirror::String> string(hs.NewHandle(soa.Decode<mirror::String>(java_string))); 198 if (string == nullptr) { 199 return nullptr; 200 } 201 202 NativeUnsafeByteSequence out(env); 203 if (!out.resize(length)) { 204 return nullptr; 205 } 206 207 const int end = offset + length; 208 for (int i = offset; i < end; ++i) { 209 jint ch = string->CharAt(i); 210 if (ch < 0x80) { 211 // One byte. 212 if (!out.append(ch)) { 213 return nullptr; 214 } 215 } else if (ch < 0x800) { 216 // Two bytes. 217 if (!out.append((ch >> 6) | 0xc0) || !out.append((ch & 0x3f) | 0x80)) { 218 return nullptr; 219 } 220 } else if (U16_IS_SURROGATE(ch)) { 221 // A supplementary character. 222 jchar high = static_cast<jchar>(ch); 223 jchar low = (i + 1 != end) ? string->CharAt(i + 1) : 0; 224 if (!U16_IS_SURROGATE_LEAD(high) || !U16_IS_SURROGATE_TRAIL(low)) { 225 if (!out.append('?')) { 226 return nullptr; 227 } 228 continue; 229 } 230 // Now we know we have a *valid* surrogate pair, we can consume the low surrogate. 231 ++i; 232 ch = U16_GET_SUPPLEMENTARY(high, low); 233 // Four bytes. 234 jbyte b1 = (ch >> 18) | 0xf0; 235 jbyte b2 = ((ch >> 12) & 0x3f) | 0x80; 236 jbyte b3 = ((ch >> 6) & 0x3f) | 0x80; 237 jbyte b4 = (ch & 0x3f) | 0x80; 238 if (!out.append(b1) || !out.append(b2) || !out.append(b3) || !out.append(b4)) { 239 return nullptr; 240 } 241 } else { 242 // Three bytes. 243 jbyte b1 = (ch >> 12) | 0xe0; 244 jbyte b2 = ((ch >> 6) & 0x3f) | 0x80; 245 jbyte b3 = (ch & 0x3f) | 0x80; 246 if (!out.append(b1) || !out.append(b2) || !out.append(b3)) { 247 return nullptr; 248 } 249 } 250 } 251 return out.toByteArray(); 252 } 253 254 static JNINativeMethod gMethods[] = { 255 FAST_NATIVE_METHOD(CharsetUtils, asciiBytesToChars, "([BII[C)V"), 256 FAST_NATIVE_METHOD(CharsetUtils, isoLatin1BytesToChars, "([BII[C)V"), 257 FAST_NATIVE_METHOD(CharsetUtils, toAsciiBytes, "(Ljava/lang/String;II)[B"), 258 FAST_NATIVE_METHOD(CharsetUtils, toIsoLatin1Bytes, "(Ljava/lang/String;II)[B"), 259 FAST_NATIVE_METHOD(CharsetUtils, toUtf8Bytes, "(Ljava/lang/String;II)[B"), 260 }; 261 262 void register_libcore_util_CharsetUtils(JNIEnv* env) { 263 REGISTER_NATIVE_METHODS("libcore/util/CharsetUtils"); 264 } 265 266 } // namespace art 267