Home | History | Annotate | Download | only in jutf7
      1 /* ====================================================================
      2  * Copyright (c) 2006 J.T. Beetstra
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining
      5  * a copy of this software and associated documentation files (the
      6  * "Software"), to deal in the Software without restriction, including
      7  * without limitation the rights to use, copy, modify, merge, publish,
      8  * distribute, sublicense, and/or sell copies of the Software, and to
      9  * permit persons to whom the Software is furnished to do so, subject to
     10  * the following conditions:
     11  *
     12  * The above copyright notice and this permission notice shall be
     13  * included in all copies or substantial portions of the Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
     16  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
     17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
     18  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
     19  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
     20  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
     21  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  * ====================================================================
     23  */
     24 
     25 package com.beetstra.jutf7;
     26 
     27 import java.util.Arrays;
     28 
     29 /**
     30  * <p>
     31  * Represent a base 64 mapping. The 64 characters used in the encoding can be
     32  * specified, since modified-UTF-7 uses other characters than UTF-7 (',' instead
     33  * of '/').
     34  * </p>
     35  * <p>
     36  * The exact type of the arguments and result values is adapted to the needs of
     37  * the encoder and decoder, as opposed to following a strict interpretation of
     38  * base 64.
     39  * </p>
     40  * <p>
     41  * Base 64, as specified in RFC 2045, is an encoding used to encode bytes as
     42  * characters. In (modified-)UTF-7 however, it is used to encode characters as
     43  * bytes, using some intermediate steps:
     44  * </p>
     45  * <ol>
     46  * <li>Encode all characters as a 16-bit (UTF-16) integer value</li>
     47  * <li>Write this as stream of bytes (most-significant first)</li>
     48  * <li>Encode these bytes using (modified) base 64 encoding</li>
     49  * <li>Write the thus formed stream of characters as a stream of bytes, using
     50  * ASCII encoding</li>
     51  * </ol>
     52  *
     53  * @author Jaap Beetstra
     54  */
     55 class Base64Util {
     56     private static final int ALPHABET_LENGTH = 64;
     57     private final char[] alphabet;
     58     private final int[] inverseAlphabet;
     59 
     60     /**
     61      * Initializes the class with the specified encoding/decoding alphabet.
     62      *
     63      * @param alphabet
     64      * @throws IllegalArgumentException if alphabet is not 64 characters long or
     65      *             contains characters which are not 7-bit ASCII
     66      */
     67     Base64Util(final String alphabet) {
     68         this.alphabet = alphabet.toCharArray();
     69         if (alphabet.length() != ALPHABET_LENGTH)
     70             throw new IllegalArgumentException("alphabet has incorrect length (should be 64, not "
     71                     + alphabet.length() + ")");
     72         inverseAlphabet = new int[128];
     73         Arrays.fill(inverseAlphabet, -1);
     74         for (int i = 0; i < this.alphabet.length; i++) {
     75             final char ch = this.alphabet[i];
     76             if (ch >= 128)
     77                 throw new IllegalArgumentException("invalid character in alphabet: " + ch);
     78             inverseAlphabet[ch] = i;
     79         }
     80     }
     81 
     82     /**
     83      * Returns the integer value of the six bits represented by the specified
     84      * character.
     85      *
     86      * @param ch The character, as a ASCII encoded byte
     87      * @return The six bits, as an integer value, or -1 if the byte is not in
     88      *         the alphabet
     89      */
     90     int getSextet(final byte ch) {
     91         if (ch >= 128)
     92             return -1;
     93         return inverseAlphabet[ch];
     94     }
     95 
     96     /**
     97      * Tells whether the alphabet contains the specified character.
     98      *
     99      * @param ch The character
    100      * @return true if the alphabet contains <code>ch</code>, false otherwise
    101      */
    102     boolean contains(final char ch) {
    103         if (ch >= 128)
    104             return false;
    105         return inverseAlphabet[ch] >= 0;
    106     }
    107 
    108     /**
    109      * Encodes the six bit group as a character.
    110      *
    111      * @param sextet The six bit group to be encoded
    112      * @return The ASCII value of the character
    113      */
    114     byte getChar(final int sextet) {
    115         return (byte)alphabet[sextet];
    116     }
    117 }
    118