Home | History | Annotate | Download | only in binary
      1 /*
      2  * Licensed to the Apache Software Foundation (ASF) under one or more
      3  * contributor license agreements.  See the NOTICE file distributed with
      4  * this work for additional information regarding copyright ownership.
      5  * The ASF licenses this file to You under the Apache License, Version 2.0
      6  * (the "License"); you may not use this file except in compliance with
      7  * the License.  You may obtain a copy of the License at
      8  *
      9  *      http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 package org.apache.commons.codec.binary;
     19 
     20 import java.io.UnsupportedEncodingException;
     21 
     22 import org.apache.commons.codec.CharEncoding;
     23 
     24 /**
     25  * Converts String to and from bytes using the encodings required by the Java specification. These encodings are specified in <a
     26  * href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
     27  *
     28  * @see CharEncoding
     29  * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
     30  * @version $Id: StringUtils.java 801391 2009-08-05 19:55:54Z ggregory $
     31  * @since 1.4
     32  */
     33 public class StringUtils {
     34 
     35     /**
     36      * Encodes the given string into a sequence of bytes using the ISO-8859-1 charset, storing the result into a new
     37      * byte array.
     38      *
     39      * @param string
     40      *            the String to encode
     41      * @return encoded bytes
     42      * @throws IllegalStateException
     43      *             Thrown when the charset is missing, which should be never according the the Java specification.
     44      * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
     45      * @see #getBytesUnchecked(String, String)
     46      */
     47     public static byte[] getBytesIso8859_1(String string) {
     48         return StringUtils.getBytesUnchecked(string, CharEncoding.ISO_8859_1);
     49     }
     50 
     51     /**
     52      * Encodes the given string into a sequence of bytes using the US-ASCII charset, storing the result into a new byte
     53      * array.
     54      *
     55      * @param string
     56      *            the String to encode
     57      * @return encoded bytes
     58      * @throws IllegalStateException
     59      *             Thrown when the charset is missing, which should be never according the the Java specification.
     60      * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
     61      * @see #getBytesUnchecked(String, String)
     62      */
     63     public static byte[] getBytesUsAscii(String string) {
     64         return StringUtils.getBytesUnchecked(string, CharEncoding.US_ASCII);
     65     }
     66 
     67     /**
     68      * Encodes the given string into a sequence of bytes using the UTF-16 charset, storing the result into a new byte
     69      * array.
     70      *
     71      * @param string
     72      *            the String to encode
     73      * @return encoded bytes
     74      * @throws IllegalStateException
     75      *             Thrown when the charset is missing, which should be never according the the Java specification.
     76      * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
     77      * @see #getBytesUnchecked(String, String)
     78      */
     79     public static byte[] getBytesUtf16(String string) {
     80         return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16);
     81     }
     82 
     83     /**
     84      * Encodes the given string into a sequence of bytes using the UTF-16BE charset, storing the result into a new byte
     85      * array.
     86      *
     87      * @param string
     88      *            the String to encode
     89      * @return encoded bytes
     90      * @throws IllegalStateException
     91      *             Thrown when the charset is missing, which should be never according the the Java specification.
     92      * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
     93      * @see #getBytesUnchecked(String, String)
     94      */
     95     public static byte[] getBytesUtf16Be(String string) {
     96         return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16BE);
     97     }
     98 
     99     /**
    100      * Encodes the given string into a sequence of bytes using the UTF-16LE charset, storing the result into a new byte
    101      * array.
    102      *
    103      * @param string
    104      *            the String to encode
    105      * @return encoded bytes
    106      * @throws IllegalStateException
    107      *             Thrown when the charset is missing, which should be never according the the Java specification.
    108      * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
    109      * @see #getBytesUnchecked(String, String)
    110      */
    111     public static byte[] getBytesUtf16Le(String string) {
    112         return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_16LE);
    113     }
    114 
    115     /**
    116      * Encodes the given string into a sequence of bytes using the UTF-8 charset, storing the result into a new byte
    117      * array.
    118      *
    119      * @param string
    120      *            the String to encode
    121      * @return encoded bytes
    122      * @throws IllegalStateException
    123      *             Thrown when the charset is missing, which should be never according the the Java specification.
    124      * @see <a href="http://java.sun.com/j2se/1.4.2/docs/api/java/nio/charset/Charset.html">Standard charsets</a>
    125      * @see #getBytesUnchecked(String, String)
    126      */
    127     public static byte[] getBytesUtf8(String string) {
    128         return StringUtils.getBytesUnchecked(string, CharEncoding.UTF_8);
    129     }
    130 
    131     /**
    132      * Encodes the given string into a sequence of bytes using the named charset, storing the result into a new byte
    133      * array.
    134      * <p>
    135      * This method catches {@link UnsupportedEncodingException} and rethrows it as {@link IllegalStateException}, which
    136      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
    137      * </p>
    138      *
    139      * @param string
    140      *            the String to encode
    141      * @param charsetName
    142      *            The name of a required {@link java.nio.charset.Charset}
    143      * @return encoded bytes
    144      * @throws IllegalStateException
    145      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
    146      *             required charset name.
    147      * @see CharEncoding
    148      * @see String#getBytes(String)
    149      */
    150     public static byte[] getBytesUnchecked(String string, String charsetName) {
    151         if (string == null) {
    152             return null;
    153         }
    154         try {
    155             return string.getBytes(charsetName);
    156         } catch (UnsupportedEncodingException e) {
    157             throw StringUtils.newIllegalStateException(charsetName, e);
    158         }
    159     }
    160 
    161     private static IllegalStateException newIllegalStateException(String charsetName, UnsupportedEncodingException e) {
    162         return new IllegalStateException(charsetName + ": " + e);
    163     }
    164 
    165     /**
    166      * Constructs a new <code>String</code> by decoding the specified array of bytes using the given charset.
    167      * <p>
    168      * This method catches {@link UnsupportedEncodingException} and re-throws it as {@link IllegalStateException}, which
    169      * should never happen for a required charset name. Use this method when the encoding is required to be in the JRE.
    170      * </p>
    171      *
    172      * @param bytes
    173      *            The bytes to be decoded into characters
    174      * @param charsetName
    175      *            The name of a required {@link java.nio.charset.Charset}
    176      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
    177      * @throws IllegalStateException
    178      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen for a
    179      *             required charset name.
    180      * @see CharEncoding
    181      * @see String#String(byte[], String)
    182      */
    183     public static String newString(byte[] bytes, String charsetName) {
    184         if (bytes == null) {
    185             return null;
    186         }
    187         try {
    188             return new String(bytes, charsetName);
    189         } catch (UnsupportedEncodingException e) {
    190             throw StringUtils.newIllegalStateException(charsetName, e);
    191         }
    192     }
    193 
    194     /**
    195      * Constructs a new <code>String</code> by decoding the specified array of bytes using the ISO-8859-1 charset.
    196      *
    197      * @param bytes
    198      *            The bytes to be decoded into characters
    199      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
    200      * @throws IllegalStateException
    201      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
    202      *             charset is required.
    203      */
    204     public static String newStringIso8859_1(byte[] bytes) {
    205         return StringUtils.newString(bytes, CharEncoding.ISO_8859_1);
    206     }
    207 
    208     /**
    209      * Constructs a new <code>String</code> by decoding the specified array of bytes using the US-ASCII charset.
    210      *
    211      * @param bytes
    212      *            The bytes to be decoded into characters
    213      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
    214      * @throws IllegalStateException
    215      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
    216      *             charset is required.
    217      */
    218     public static String newStringUsAscii(byte[] bytes) {
    219         return StringUtils.newString(bytes, CharEncoding.US_ASCII);
    220     }
    221 
    222     /**
    223      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16 charset.
    224      *
    225      * @param bytes
    226      *            The bytes to be decoded into characters
    227      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
    228      * @throws IllegalStateException
    229      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
    230      *             charset is required.
    231      */
    232     public static String newStringUtf16(byte[] bytes) {
    233         return StringUtils.newString(bytes, CharEncoding.UTF_16);
    234     }
    235 
    236     /**
    237      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16BE charset.
    238      *
    239      * @param bytes
    240      *            The bytes to be decoded into characters
    241      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
    242      * @throws IllegalStateException
    243      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
    244      *             charset is required.
    245      */
    246     public static String newStringUtf16Be(byte[] bytes) {
    247         return StringUtils.newString(bytes, CharEncoding.UTF_16BE);
    248     }
    249 
    250     /**
    251      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-16LE charset.
    252      *
    253      * @param bytes
    254      *            The bytes to be decoded into characters
    255      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
    256      * @throws IllegalStateException
    257      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
    258      *             charset is required.
    259      */
    260     public static String newStringUtf16Le(byte[] bytes) {
    261         return StringUtils.newString(bytes, CharEncoding.UTF_16LE);
    262     }
    263 
    264     /**
    265      * Constructs a new <code>String</code> by decoding the specified array of bytes using the UTF-8 charset.
    266      *
    267      * @param bytes
    268      *            The bytes to be decoded into characters
    269      * @return A new <code>String</code> decoded from the specified array of bytes using the given charset.
    270      * @throws IllegalStateException
    271      *             Thrown when a {@link UnsupportedEncodingException} is caught, which should never happen since the
    272      *             charset is required.
    273      */
    274     public static String newStringUtf8(byte[] bytes) {
    275         return StringUtils.newString(bytes, CharEncoding.UTF_8);
    276     }
    277 
    278 }
    279