Home | History | Annotate | Download | only in lang
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
      4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
      5  *
      6  * This code is free software; you can redistribute it and/or modify it
      7  * under the terms of the GNU General Public License version 2 only, as
      8  * published by the Free Software Foundation.  Oracle designates this
      9  * particular file as subject to the "Classpath" exception as provided
     10  * by Oracle in the LICENSE file that accompanied this code.
     11  *
     12  * This code is distributed in the hope that it will be useful, but WITHOUT
     13  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
     14  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     15  * version 2 for more details (a copy is included in the LICENSE file that
     16  * accompanied this code).
     17  *
     18  * You should have received a copy of the GNU General Public License version
     19  * 2 along with this work; if not, write to the Free Software Foundation,
     20  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
     21  *
     22  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
     23  * or visit www.oracle.com if you need additional information or have any
     24  * questions.
     25  */
     26 
     27 package java.lang;
     28 
     29 import java.io.UnsupportedEncodingException;
     30 import java.lang.ref.SoftReference;
     31 import java.nio.ByteBuffer;
     32 import java.nio.CharBuffer;
     33 import java.nio.charset.Charset;
     34 import java.nio.charset.CharsetDecoder;
     35 import java.nio.charset.CharsetEncoder;
     36 import java.nio.charset.CharacterCodingException;
     37 import java.nio.charset.CoderResult;
     38 import java.nio.charset.CodingErrorAction;
     39 import java.nio.charset.IllegalCharsetNameException;
     40 import java.nio.charset.UnsupportedCharsetException;
     41 import java.util.Arrays;
     42 import sun.misc.MessageUtils;
     43 import sun.nio.cs.HistoricallyNamedCharset;
     44 import sun.nio.cs.ArrayDecoder;
     45 import sun.nio.cs.ArrayEncoder;
     46 
     47 /**
     48  * Utility class for string encoding and decoding.
     49  */
     50 
     51 class StringCoding {
     52 
     53     private StringCoding() { }
     54 
     55     /** The cached coders for each thread */
     56     private final static ThreadLocal<SoftReference<StringDecoder>> decoder =
     57         new ThreadLocal<>();
     58     private final static ThreadLocal<SoftReference<StringEncoder>> encoder =
     59         new ThreadLocal<>();
     60 
     61     private static boolean warnUnsupportedCharset = true;
     62 
     63     private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
     64         SoftReference<T> sr = tl.get();
     65         if (sr == null)
     66             return null;
     67         return sr.get();
     68     }
     69 
     70     private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
     71         tl.set(new SoftReference<T>(ob));
     72     }
     73 
     74     // Trim the given byte array to the given length
     75     //
     76     private static byte[] safeTrim(byte[] ba, int len, Charset cs, boolean isTrusted) {
     77 
     78         // Android-changed: System.getSecurityManager() == null is always true on Android.
     79         // Libcore tests expect a defensive copy in pretty much all cases.
     80         // if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
     81         if (len == ba.length && (isTrusted))
     82             return ba;
     83         else
     84             return Arrays.copyOf(ba, len);
     85     }
     86 
     87     // Trim the given char array to the given length
     88     //
     89     private static char[] safeTrim(char[] ca, int len,
     90                                    Charset cs, boolean isTrusted) {
     91         // Android-changed: System.getSecurityManager() == null is always true on Android.
     92         // Libcore tests expect a defensive copy in pretty much all cases.
     93         // if (len == ca.length && (isTrusted || System.getSecurityManager() == null))
     94         if (len == ca.length && (isTrusted))
     95             return ca;
     96         else
     97             return Arrays.copyOf(ca, len);
     98     }
     99 
    100     private static int scale(int len, float expansionFactor) {
    101         // We need to perform double, not float, arithmetic; otherwise
    102         // we lose low order bits when len is larger than 2**24.
    103         return (int)(len * (double)expansionFactor);
    104     }
    105 
    106     private static Charset lookupCharset(String csn) {
    107         if (Charset.isSupported(csn)) {
    108             try {
    109                 return Charset.forName(csn);
    110             } catch (UnsupportedCharsetException x) {
    111                 throw new Error(x);
    112             }
    113         }
    114         return null;
    115     }
    116 
    117     private static void warnUnsupportedCharset(String csn) {
    118         if (warnUnsupportedCharset) {
    119             // Use sun.misc.MessageUtils rather than the Logging API or
    120             // System.err since this method may be called during VM
    121             // initialization before either is available.
    122             MessageUtils.err("WARNING: Default charset " + csn +
    123                              " not supported, using ISO-8859-1 instead");
    124             warnUnsupportedCharset = false;
    125         }
    126     }
    127 
    128 
    129     // -- Decoding --
    130     private static class StringDecoder {
    131         private final String requestedCharsetName;
    132         private final Charset cs;
    133         private final CharsetDecoder cd;
    134         private final boolean isTrusted;
    135 
    136         private StringDecoder(Charset cs, String rcn) {
    137             this.requestedCharsetName = rcn;
    138             this.cs = cs;
    139             this.cd = cs.newDecoder()
    140                 .onMalformedInput(CodingErrorAction.REPLACE)
    141                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
    142             this.isTrusted = (cs.getClass().getClassLoader() == null);
    143         }
    144 
    145         String charsetName() {
    146             if (cs instanceof HistoricallyNamedCharset)
    147                 return ((HistoricallyNamedCharset)cs).historicalName();
    148             return cs.name();
    149         }
    150 
    151         final String requestedCharsetName() {
    152             return requestedCharsetName;
    153         }
    154 
    155         char[] decode(byte[] ba, int off, int len) {
    156             int en = scale(len, cd.maxCharsPerByte());
    157             char[] ca = new char[en];
    158             if (len == 0)
    159                 return ca;
    160             if (cd instanceof ArrayDecoder) {
    161                 int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
    162                 return safeTrim(ca, clen, cs, isTrusted);
    163             } else {
    164                 cd.reset();
    165                 ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
    166                 CharBuffer cb = CharBuffer.wrap(ca);
    167                 try {
    168                     CoderResult cr = cd.decode(bb, cb, true);
    169                     if (!cr.isUnderflow())
    170                         cr.throwException();
    171                     cr = cd.flush(cb);
    172                     if (!cr.isUnderflow())
    173                         cr.throwException();
    174                 } catch (CharacterCodingException x) {
    175                     // Substitution is always enabled,
    176                     // so this shouldn't happen
    177                     throw new Error(x);
    178                 }
    179                 return safeTrim(ca, cb.position(), cs, isTrusted);
    180             }
    181         }
    182     }
    183 
    184     static char[] decode(String charsetName, byte[] ba, int off, int len)
    185         throws UnsupportedEncodingException
    186     {
    187         StringDecoder sd = deref(decoder);
    188         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
    189         if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
    190                               || csn.equals(sd.charsetName()))) {
    191             sd = null;
    192             try {
    193                 Charset cs = lookupCharset(csn);
    194                 if (cs != null)
    195                     sd = new StringDecoder(cs, csn);
    196             } catch (IllegalCharsetNameException x) {}
    197             if (sd == null)
    198                 throw new UnsupportedEncodingException(csn);
    199             set(decoder, sd);
    200         }
    201         return sd.decode(ba, off, len);
    202     }
    203 
    204     static char[] decode(Charset cs, byte[] ba, int off, int len) {
    205         // (1)We never cache the "external" cs, the only benefit of creating
    206         // an additional StringDe/Encoder object to wrap it is to share the
    207         // de/encode() method. These SD/E objects are short-lifed, the young-gen
    208         // gc should be able to take care of them well. But the best approash
    209         // is still not to generate them if not really necessary.
    210         // (2)The defensive copy of the input byte/char[] has a big performance
    211         // impact, as well as the outgoing result byte/char[]. Need to do the
    212         // optimization check of (sm==null && classLoader0==null) for both.
    213         // (3)getClass().getClassLoader0() is expensive
    214         // (4)There might be a timing gap in isTrusted setting. getClassLoader0()
    215         // is only chcked (and then isTrusted gets set) when (SM==null). It is
    216         // possible that the SM==null for now but then SM is NOT null later
    217         // when safeTrim() is invoked...the "safe" way to do is to redundant
    218         // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
    219         // but it then can be argued that the SM is null when the opertaion
    220         // is started...
    221         CharsetDecoder cd = cs.newDecoder();
    222         int en = scale(len, cd.maxCharsPerByte());
    223         char[] ca = new char[en];
    224         if (len == 0)
    225             return ca;
    226         boolean isTrusted = false;
    227         if (System.getSecurityManager() != null) {
    228             if (!(isTrusted = (cs.getClass().getClassLoader() == null))) {
    229                 ba =  Arrays.copyOfRange(ba, off, off + len);
    230                 off = 0;
    231             }
    232         }
    233         cd.onMalformedInput(CodingErrorAction.REPLACE)
    234           .onUnmappableCharacter(CodingErrorAction.REPLACE)
    235           .reset();
    236         if (cd instanceof ArrayDecoder) {
    237             int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
    238             return safeTrim(ca, clen, cs, isTrusted);
    239         } else {
    240             ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
    241             CharBuffer cb = CharBuffer.wrap(ca);
    242             try {
    243                 CoderResult cr = cd.decode(bb, cb, true);
    244                 if (!cr.isUnderflow())
    245                     cr.throwException();
    246                 cr = cd.flush(cb);
    247                 if (!cr.isUnderflow())
    248                     cr.throwException();
    249             } catch (CharacterCodingException x) {
    250                 // Substitution is always enabled,
    251                 // so this shouldn't happen
    252                 throw new Error(x);
    253             }
    254             return safeTrim(ca, cb.position(), cs, isTrusted);
    255         }
    256     }
    257 
    258     static char[] decode(byte[] ba, int off, int len) {
    259         String csn = Charset.defaultCharset().name();
    260         try {
    261             // use charset name decode() variant which provides caching.
    262             return decode(csn, ba, off, len);
    263         } catch (UnsupportedEncodingException x) {
    264             warnUnsupportedCharset(csn);
    265         }
    266         try {
    267             return decode("ISO-8859-1", ba, off, len);
    268         } catch (UnsupportedEncodingException x) {
    269             // If this code is hit during VM initialization, MessageUtils is
    270             // the only way we will be able to get any kind of error message.
    271             MessageUtils.err("ISO-8859-1 charset not available: "
    272                              + x.toString());
    273             // If we can not find ISO-8859-1 (a required encoding) then things
    274             // are seriously wrong with the installation.
    275             System.exit(1);
    276             return null;
    277         }
    278     }
    279 
    280     // -- Encoding --
    281     private static class StringEncoder {
    282         private Charset cs;
    283         private CharsetEncoder ce;
    284         private final String requestedCharsetName;
    285         private final boolean isTrusted;
    286 
    287         private StringEncoder(Charset cs, String rcn) {
    288             this.requestedCharsetName = rcn;
    289             this.cs = cs;
    290             this.ce = cs.newEncoder()
    291                 .onMalformedInput(CodingErrorAction.REPLACE)
    292                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
    293             this.isTrusted = (cs.getClass().getClassLoader() == null);
    294         }
    295 
    296         String charsetName() {
    297             if (cs instanceof HistoricallyNamedCharset)
    298                 return ((HistoricallyNamedCharset)cs).historicalName();
    299             return cs.name();
    300         }
    301 
    302         final String requestedCharsetName() {
    303             return requestedCharsetName;
    304         }
    305 
    306         byte[] encode(char[] ca, int off, int len) {
    307             int en = scale(len, ce.maxBytesPerChar());
    308             byte[] ba = new byte[en];
    309             if (len == 0)
    310                 return ba;
    311             if (ce instanceof ArrayEncoder) {
    312                 int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
    313                 return safeTrim(ba, blen, cs, isTrusted);
    314             } else {
    315                 ce.reset();
    316                 ByteBuffer bb = ByteBuffer.wrap(ba);
    317                 CharBuffer cb = CharBuffer.wrap(ca, off, len);
    318                 try {
    319                     // Android-changed:  Pass read-only buffer, so the encoder can't alter it
    320                     CoderResult cr = ce.encode(cb.asReadOnlyBuffer(), bb, true);
    321                     if (!cr.isUnderflow())
    322                         cr.throwException();
    323                     cr = ce.flush(bb);
    324                     if (!cr.isUnderflow())
    325                         cr.throwException();
    326                 } catch (CharacterCodingException x) {
    327                     // Substitution is always enabled,
    328                     // so this shouldn't happen
    329                     throw new Error(x);
    330                 }
    331                 return safeTrim(ba, bb.position(), cs, isTrusted);
    332             }
    333         }
    334     }
    335 
    336     static byte[] encode(String charsetName, char[] ca, int off, int len)
    337         throws UnsupportedEncodingException
    338     {
    339         StringEncoder se = deref(encoder);
    340         String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
    341         if ((se == null) || !(csn.equals(se.requestedCharsetName())
    342                               || csn.equals(se.charsetName()))) {
    343             se = null;
    344             try {
    345                 Charset cs = lookupCharset(csn);
    346                 if (cs != null)
    347                     se = new StringEncoder(cs, csn);
    348             } catch (IllegalCharsetNameException x) {}
    349             if (se == null)
    350                 throw new UnsupportedEncodingException (csn);
    351             set(encoder, se);
    352         }
    353         return se.encode(ca, off, len);
    354     }
    355 
    356     static byte[] encode(Charset cs, char[] ca, int off, int len) {
    357         CharsetEncoder ce = cs.newEncoder();
    358         int en = scale(len, ce.maxBytesPerChar());
    359         byte[] ba = new byte[en];
    360         if (len == 0)
    361             return ba;
    362         boolean isTrusted = false;
    363         if (System.getSecurityManager() != null) {
    364             if (!(isTrusted = (cs.getClass().getClassLoader() == null))) {
    365                 ca =  Arrays.copyOfRange(ca, off, off + len);
    366                 off = 0;
    367             }
    368         }
    369         ce.onMalformedInput(CodingErrorAction.REPLACE)
    370           .onUnmappableCharacter(CodingErrorAction.REPLACE)
    371           .reset();
    372         if (ce instanceof ArrayEncoder) {
    373             int blen = ((ArrayEncoder)ce).encode(ca, off, len, ba);
    374             return safeTrim(ba, blen, cs, isTrusted);
    375         } else {
    376             ByteBuffer bb = ByteBuffer.wrap(ba);
    377             CharBuffer cb = CharBuffer.wrap(ca, off, len);
    378             try {
    379                 // Android-changed:  Pass read-only buffer, so the encoder can't alter it
    380                 CoderResult cr = ce.encode(cb.asReadOnlyBuffer(), bb, true);
    381                 if (!cr.isUnderflow())
    382                     cr.throwException();
    383                 cr = ce.flush(bb);
    384                 if (!cr.isUnderflow())
    385                     cr.throwException();
    386             } catch (CharacterCodingException x) {
    387                 throw new Error(x);
    388             }
    389             return safeTrim(ba, bb.position(), cs, isTrusted);
    390         }
    391     }
    392 
    393     static byte[] encode(char[] ca, int off, int len) {
    394         String csn = Charset.defaultCharset().name();
    395         try {
    396             // use charset name encode() variant which provides caching.
    397             return encode(csn, ca, off, len);
    398         } catch (UnsupportedEncodingException x) {
    399             warnUnsupportedCharset(csn);
    400         }
    401         try {
    402             return encode("ISO-8859-1", ca, off, len);
    403         } catch (UnsupportedEncodingException x) {
    404             // If this code is hit during VM initialization, MessageUtils is
    405             // the only way we will be able to get any kind of error message.
    406             MessageUtils.err("ISO-8859-1 charset not available: "
    407                              + x.toString());
    408             // If we can not find ISO-8859-1 (a required encoding) then things
    409             // are seriously wrong with the installation.
    410             System.exit(1);
    411             return null;
    412         }
    413     }
    414 }
    415