1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. 4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 5 * 6 * This code is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 only, as 8 * published by the Free Software Foundation. Oracle designates this 9 * particular file as subject to the "Classpath" exception as provided 10 * by Oracle in the LICENSE file that accompanied this code. 11 * 12 * This code is distributed in the hope that it will be useful, but WITHOUT 13 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 14 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 15 * version 2 for more details (a copy is included in the LICENSE file that 16 * accompanied this code). 17 * 18 * You should have received a copy of the GNU General Public License version 19 * 2 along with this work; if not, write to the Free Software Foundation, 20 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 21 * 22 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 23 * or visit www.oracle.com if you need additional information or have any 24 * questions. 25 */ 26 27 package java.lang; 28 29 import java.io.UnsupportedEncodingException; 30 import java.lang.ref.SoftReference; 31 import java.nio.ByteBuffer; 32 import java.nio.CharBuffer; 33 import java.nio.charset.Charset; 34 import java.nio.charset.CharsetDecoder; 35 import java.nio.charset.CharsetEncoder; 36 import java.nio.charset.CharacterCodingException; 37 import java.nio.charset.CoderResult; 38 import java.nio.charset.CodingErrorAction; 39 import java.nio.charset.IllegalCharsetNameException; 40 import java.nio.charset.UnsupportedCharsetException; 41 import java.util.Arrays; 42 import sun.misc.MessageUtils; 43 import sun.nio.cs.HistoricallyNamedCharset; 44 import sun.nio.cs.ArrayDecoder; 45 import sun.nio.cs.ArrayEncoder; 46 47 /** 48 * Utility class for string encoding and decoding. 49 */ 50 51 class StringCoding { 52 53 private StringCoding() { } 54 55 /** The cached coders for each thread */ 56 private final static ThreadLocal<SoftReference<StringDecoder>> decoder = 57 new ThreadLocal<>(); 58 private final static ThreadLocal<SoftReference<StringEncoder>> encoder = 59 new ThreadLocal<>(); 60 61 private static boolean warnUnsupportedCharset = true; 62 63 private static <T> T deref(ThreadLocal<SoftReference<T>> tl) { 64 SoftReference<T> sr = tl.get(); 65 if (sr == null) 66 return null; 67 return sr.get(); 68 } 69 70 private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) { 71 tl.set(new SoftReference<T>(ob)); 72 } 73 74 // Trim the given byte array to the given length 75 // 76 private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) { 77 78 // Android-changed: System.getSecurityManager() == null is always true on Android. 79 // Libcore tests expect a defensive copy in pretty much all cases. 80 // if (len == ba.length && (isTrusted || System.getSecurityManager() == null)) 81 if (len == ba.length && (isTrusted)) 82 return ba; 83 else 84 return Arrays.copyOf(ba, len); 85 } 86 87 // Trim the given char array to the given length 88 // 89 private static char[] safeTrim(char[] ca, int len, 90 Charset cs, boolean isTrusted) { 91 // Android-changed: System.getSecurityManager() == null is always true on Android. 92 // Libcore tests expect a defensive copy in pretty much all cases. 93 // if (len == ca.length && (isTrusted || System.getSecurityManager() == null)) 94 if (len == ca.length && (isTrusted)) 95 return ca; 96 else 97 return Arrays.copyOf(ca, len); 98 } 99 100 private static int scale(int len, float expansionFactor) { 101 // We need to perform double, not float, arithmetic; otherwise 102 // we lose low order bits when len is larger than 2**24. 103 return (int)(len * (double)expansionFactor); 104 } 105 106 private static Charset lookupCharset(String csn) { 107 if (Charset.isSupported(csn)) { 108 try { 109 return Charset.forName(csn); 110 } catch (UnsupportedCharsetException x) { 111 throw new Error(x); 112 } 113 } 114 return null; 115 } 116 117 private static void warnUnsupportedCharset(String csn) { 118 if (warnUnsupportedCharset) { 119 // Use sun.misc.MessageUtils rather than the Logging API or 120 // System.err since this method may be called during VM 121 // initialization before either is available. 122 MessageUtils.err("WARNING: Default charset " + csn + 123 " not supported, using ISO-8859-1 instead"); 124 warnUnsupportedCharset = false; 125 } 126 } 127 128 129 // -- Decoding -- 130 private static class StringDecoder { 131 private final String requestedCharsetName; 132 private final Charset cs; 133 private final CharsetDecoder cd; 134 private final boolean isTrusted; 135 136 private StringDecoder(Charset cs, String rcn) { 137 this.requestedCharsetName = rcn; 138 this.cs = cs; 139 this.cd = cs.newDecoder() 140 .onMalformedInput(CodingErrorAction.REPLACE) 141 .onUnmappableCharacter(CodingErrorAction.REPLACE); 142 this.isTrusted = (cs.getClass().getClassLoader() == null); 143 } 144 145 String charsetName() { 146 if (cs instanceof HistoricallyNamedCharset) 147 return ((HistoricallyNamedCharset)cs).historicalName(); 148 return cs.name(); 149 } 150 151 final String requestedCharsetName() { 152 return requestedCharsetName; 153 } 154 155 char[] decode(byte[] ba, int off, int len) { 156 int en = scale(len, cd.maxCharsPerByte()); 157 char[] ca = new char[en]; 158 if (len == 0) 159 return ca; 160 if (cd instanceof ArrayDecoder) { 161 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); 162 return safeTrim(ca, clen, cs, isTrusted); 163 } else { 164 cd.reset(); 165 ByteBuffer bb = ByteBuffer.wrap(ba, off, len); 166 CharBuffer cb = CharBuffer.wrap(ca); 167 try { 168 CoderResult cr = cd.decode(bb, cb, true); 169 if (!cr.isUnderflow()) 170 cr.throwException(); 171 cr = cd.flush(cb); 172 if (!cr.isUnderflow()) 173 cr.throwException(); 174 } catch (CharacterCodingException x) { 175 // Substitution is always enabled, 176 // so this shouldn't happen 177 throw new Error(x); 178 } 179 return safeTrim(ca, cb.position(), cs, isTrusted); 180 } 181 } 182 } 183 184 static char[] decode(String charsetName, byte[] ba, int off, int len) 185 throws UnsupportedEncodingException 186 { 187 StringDecoder sd = deref(decoder); 188 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; 189 if ((sd == null) || !(csn.equals(sd.requestedCharsetName()) 190 || csn.equals(sd.charsetName()))) { 191 sd = null; 192 try { 193 Charset cs = lookupCharset(csn); 194 if (cs != null) 195 sd = new StringDecoder(cs, csn); 196 } catch (IllegalCharsetNameException x) {} 197 if (sd == null) 198 throw new UnsupportedEncodingException(csn); 199 set(decoder, sd); 200 } 201 return sd.decode(ba, off, len); 202 } 203 204 static char[] decode(Charset cs, byte[] ba, int off, int len) { 205 // (1)We never cache the "external" cs, the only benefit of creating 206 // an additional StringDe/Encoder object to wrap it is to share the 207 // de/encode() method. These SD/E objects are short-lifed, the young-gen 208 // gc should be able to take care of them well. But the best approash 209 // is still not to generate them if not really necessary. 210 // (2)The defensive copy of the input byte/char[] has a big performance 211 // impact, as well as the outgoing result byte/char[]. Need to do the 212 // optimization check of (sm==null && classLoader0==null) for both. 213 // (3)getClass().getClassLoader0() is expensive 214 // (4)There might be a timing gap in isTrusted setting. getClassLoader0() 215 // is only chcked (and then isTrusted gets set) when (SM==null). It is 216 // possible that the SM==null for now but then SM is NOT null later 217 // when safeTrim() is invoked...the "safe" way to do is to redundant 218 // check (... && (isTrusted || SM == null || getClassLoader0())) in trim 219 // but it then can be argued that the SM is null when the opertaion 220 // is started... 221 CharsetDecoder cd = cs.newDecoder(); 222 int en = scale(len, cd.maxCharsPerByte()); 223 char[] ca = new char[en]; 224 if (len == 0) 225 return ca; 226 boolean isTrusted = false; 227 if (System.getSecurityManager() != null) { 228 if (!(isTrusted = (cs.getClass().getClassLoader() == null))) { 229 ba = Arrays.copyOfRange(ba, off, off + len); 230 off = 0; 231 } 232 } 233 cd.onMalformedInput(CodingErrorAction.REPLACE) 234 .onUnmappableCharacter(CodingErrorAction.REPLACE) 235 .reset(); 236 if (cd instanceof ArrayDecoder) { 237 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca); 238 return safeTrim(ca, clen, cs, isTrusted); 239 } else { 240 ByteBuffer bb = ByteBuffer.wrap(ba, off, len); 241 CharBuffer cb = CharBuffer.wrap(ca); 242 try { 243 CoderResult cr = cd.decode(bb, cb, true); 244 if (!cr.isUnderflow()) 245 cr.throwException(); 246 cr = cd.flush(cb); 247 if (!cr.isUnderflow()) 248 cr.throwException(); 249 } catch (CharacterCodingException x) { 250 // Substitution is always enabled, 251 // so this shouldn't happen 252 throw new Error(x); 253 } 254 return safeTrim(ca, cb.position(), cs, isTrusted); 255 } 256 } 257 258 static char[] decode(byte[] ba, int off, int len) { 259 String csn = Charset.defaultCharset().name(); 260 try { 261 // use charset name decode() variant which provides caching. 262 return decode(csn, ba, off, len); 263 } catch (UnsupportedEncodingException x) { 264 warnUnsupportedCharset(csn); 265 } 266 try { 267 return decode("ISO-8859-1", ba, off, len); 268 } catch (UnsupportedEncodingException x) { 269 // If this code is hit during VM initialization, MessageUtils is 270 // the only way we will be able to get any kind of error message. 271 MessageUtils.err("ISO-8859-1 charset not available: " 272 + x.toString()); 273 // If we can not find ISO-8859-1 (a required encoding) then things 274 // are seriously wrong with the installation. 275 System.exit(1); 276 return null; 277 } 278 } 279 280 // -- Encoding -- 281 private static class StringEncoder { 282 private Charset cs; 283 private CharsetEncoder ce; 284 private final String requestedCharsetName; 285 private final boolean isTrusted; 286 287 private StringEncoder(Charset cs, String rcn) { 288 this.requestedCharsetName = rcn; 289 this.cs = cs; 290 this.ce = cs.newEncoder() 291 .onMalformedInput(CodingErrorAction.REPLACE) 292 .onUnmappableCharacter(CodingErrorAction.REPLACE); 293 this.isTrusted = (cs.getClass().getClassLoader() == null); 294 } 295 296 String charsetName() { 297 if (cs instanceof HistoricallyNamedCharset) 298 return ((HistoricallyNamedCharset)cs).historicalName(); 299 return cs.name(); 300 } 301 302 final String requestedCharsetName() { 303 return requestedCharsetName; 304 } 305 306 byte[] encode(char[] ca, int off, int len) { 307 int en = scale(len, ce.maxBytesPerChar()); 308 byte[] ba = new byte[en]; 309 if (len == 0) 310 return ba; 311 if (ce instanceof ArrayEncoder) { 312 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba); 313 return safeTrim(ba, blen, cs, isTrusted); 314 } else { 315 ce.reset(); 316 ByteBuffer bb = ByteBuffer.wrap(ba); 317 CharBuffer cb = CharBuffer.wrap(ca, off, len); 318 try { 319 // Android-changed: Pass read-only buffer, so the encoder can't alter it 320 CoderResult cr = ce.encode(cb.asReadOnlyBuffer(), bb, true); 321 if (!cr.isUnderflow()) 322 cr.throwException(); 323 cr = ce.flush(bb); 324 if (!cr.isUnderflow()) 325 cr.throwException(); 326 } catch (CharacterCodingException x) { 327 // Substitution is always enabled, 328 // so this shouldn't happen 329 throw new Error(x); 330 } 331 return safeTrim(ba, bb.position(), cs, isTrusted); 332 } 333 } 334 } 335 336 static byte[] encode(String charsetName, char[] ca, int off, int len) 337 throws UnsupportedEncodingException 338 { 339 StringEncoder se = deref(encoder); 340 String csn = (charsetName == null) ? "ISO-8859-1" : charsetName; 341 if ((se == null) || !(csn.equals(se.requestedCharsetName()) 342 || csn.equals(se.charsetName()))) { 343 se = null; 344 try { 345 Charset cs = lookupCharset(csn); 346 if (cs != null) 347 se = new StringEncoder(cs, csn); 348 } catch (IllegalCharsetNameException x) {} 349 if (se == null) 350 throw new UnsupportedEncodingException (csn); 351 set(encoder, se); 352 } 353 return se.encode(ca, off, len); 354 } 355 356 static byte[] encode(Charset cs, char[] ca, int off, int len) { 357 CharsetEncoder ce = cs.newEncoder(); 358 int en = scale(len, ce.maxBytesPerChar()); 359 byte[] ba = new byte[en]; 360 if (len == 0) 361 return ba; 362 boolean isTrusted = false; 363 if (System.getSecurityManager() != null) { 364 if (!(isTrusted = (cs.getClass().getClassLoader() == null))) { 365 ca = Arrays.copyOfRange(ca, off, off + len); 366 off = 0; 367 } 368 } 369 ce.onMalformedInput(CodingErrorAction.REPLACE) 370 .onUnmappableCharacter(CodingErrorAction.REPLACE) 371 .reset(); 372 if (ce instanceof ArrayEncoder) { 373 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba); 374 return safeTrim(ba, blen, cs, isTrusted); 375 } else { 376 ByteBuffer bb = ByteBuffer.wrap(ba); 377 CharBuffer cb = CharBuffer.wrap(ca, off, len); 378 try { 379 // Android-changed: Pass read-only buffer, so the encoder can't alter it 380 CoderResult cr = ce.encode(cb.asReadOnlyBuffer(), bb, true); 381 if (!cr.isUnderflow()) 382 cr.throwException(); 383 cr = ce.flush(bb); 384 if (!cr.isUnderflow()) 385 cr.throwException(); 386 } catch (CharacterCodingException x) { 387 throw new Error(x); 388 } 389 return safeTrim(ba, bb.position(), cs, isTrusted); 390 } 391 } 392 393 static byte[] encode(char[] ca, int off, int len) { 394 String csn = Charset.defaultCharset().name(); 395 try { 396 // use charset name encode() variant which provides caching. 397 return encode(csn, ca, off, len); 398 } catch (UnsupportedEncodingException x) { 399 warnUnsupportedCharset(csn); 400 } 401 try { 402 return encode("ISO-8859-1", ca, off, len); 403 } catch (UnsupportedEncodingException x) { 404 // If this code is hit during VM initialization, MessageUtils is 405 // the only way we will be able to get any kind of error message. 406 MessageUtils.err("ISO-8859-1 charset not available: " 407 + x.toString()); 408 // If we can not find ISO-8859-1 (a required encoding) then things 409 // are seriously wrong with the installation. 410 System.exit(1); 411 return null; 412 } 413 } 414 } 415