Home | History | Annotate | Download | only in io
      1 /*
      2  * Copyright (C) 2012 The Guava Authors
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
      5  * in compliance with the License. You may obtain a copy of the License at
      6  *
      7  * http://www.apache.org/licenses/LICENSE-2.0
      8  *
      9  * Unless required by applicable law or agreed to in writing, software distributed under the License
     10  * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
     11  * or implied. See the License for the specific language governing permissions and limitations under
     12  * the License.
     13  */
     14 
     15 package com.google.common.io;
     16 
     17 import static com.google.common.base.Preconditions.checkArgument;
     18 import static com.google.common.base.Preconditions.checkNotNull;
     19 import static com.google.common.base.Preconditions.checkPositionIndexes;
     20 import static com.google.common.base.Preconditions.checkState;
     21 import static com.google.common.io.GwtWorkarounds.asCharInput;
     22 import static com.google.common.io.GwtWorkarounds.asCharOutput;
     23 import static com.google.common.io.GwtWorkarounds.asInputStream;
     24 import static com.google.common.io.GwtWorkarounds.asOutputStream;
     25 import static com.google.common.io.GwtWorkarounds.stringBuilderOutput;
     26 import static com.google.common.math.IntMath.divide;
     27 import static com.google.common.math.IntMath.log2;
     28 import static java.math.RoundingMode.CEILING;
     29 import static java.math.RoundingMode.FLOOR;
     30 import static java.math.RoundingMode.UNNECESSARY;
     31 
     32 import com.google.common.annotations.Beta;
     33 import com.google.common.annotations.GwtCompatible;
     34 import com.google.common.annotations.GwtIncompatible;
     35 import com.google.common.base.Ascii;
     36 import com.google.common.base.CharMatcher;
     37 import com.google.common.io.GwtWorkarounds.ByteInput;
     38 import com.google.common.io.GwtWorkarounds.ByteOutput;
     39 import com.google.common.io.GwtWorkarounds.CharInput;
     40 import com.google.common.io.GwtWorkarounds.CharOutput;
     41 
     42 import java.io.IOException;
     43 import java.io.InputStream;
     44 import java.io.OutputStream;
     45 import java.io.Reader;
     46 import java.io.Writer;
     47 import java.util.Arrays;
     48 
     49 import javax.annotation.CheckReturnValue;
     50 import javax.annotation.Nullable;
     51 
     52 /**
     53  * A binary encoding scheme for reversibly translating between byte sequences and printable ASCII
     54  * strings. This class includes several constants for encoding schemes specified by <a
     55  * href="http://tools.ietf.org/html/rfc4648">RFC 4648</a>. For example, the expression:
     56  *
     57  * <pre>   {@code
     58  *   BaseEncoding.base32().encode("foo".getBytes(Charsets.US_ASCII))}</pre>
     59  *
     60  * <p>returns the string {@code "MZXW6==="}, and <pre>   {@code
     61  *  byte[] decoded = BaseEncoding.base32().decode("MZXW6===");}</pre>
     62  *
     63  * <p>...returns the ASCII bytes of the string {@code "foo"}.
     64  *
     65  * <p>By default, {@code BaseEncoding}'s behavior is relatively strict and in accordance with
     66  * RFC 4648.  Decoding rejects characters in the wrong case, though padding is optional.
     67  * To modify encoding and decoding behavior, use configuration methods to obtain a new encoding
     68  * with modified behavior:
     69  *
     70  * <pre>   {@code
     71  *  BaseEncoding.base16().lowerCase().decode("deadbeef");}</pre>
     72  *
     73  * <p>Warning: BaseEncoding instances are immutable.  Invoking a configuration method has no effect
     74  * on the receiving instance; you must store and use the new encoding instance it returns, instead.
     75  *
     76  * <pre>   {@code
     77  *   // Do NOT do this
     78  *   BaseEncoding hex = BaseEncoding.base16();
     79  *   hex.lowerCase(); // does nothing!
     80  *   return hex.decode("deadbeef"); // throws an IllegalArgumentException}</pre>
     81  *
     82  * <p>It is guaranteed that {@code encoding.decode(encoding.encode(x))} is always equal to
     83  * {@code x}, but the reverse does not necessarily hold.
     84  *
     85  * <p>
     86  * <table>
     87  * <tr>
     88  * <th>Encoding
     89  * <th>Alphabet
     90  * <th>{@code char:byte} ratio
     91  * <th>Default padding
     92  * <th>Comments
     93  * <tr>
     94  * <td>{@link #base16()}
     95  * <td>0-9 A-F
     96  * <td>2.00
     97  * <td>N/A
     98  * <td>Traditional hexadecimal.  Defaults to upper case.
     99  * <tr>
    100  * <td>{@link #base32()}
    101  * <td>A-Z 2-7
    102  * <td>1.60
    103  * <td>=
    104  * <td>Human-readable; no possibility of mixing up 0/O or 1/I.  Defaults to upper case.
    105  * <tr>
    106  * <td>{@link #base32Hex()}
    107  * <td>0-9 A-V
    108  * <td>1.60
    109  * <td>=
    110  * <td>"Numerical" base 32; extended from the traditional hex alphabet.  Defaults to upper case.
    111  * <tr>
    112  * <td>{@link #base64()}
    113  * <td>A-Z a-z 0-9 + /
    114  * <td>1.33
    115  * <td>=
    116  * <td>
    117  * <tr>
    118  * <td>{@link #base64Url()}
    119  * <td>A-Z a-z 0-9 - _
    120  * <td>1.33
    121  * <td>=
    122  * <td>Safe to use as filenames, or to pass in URLs without escaping
    123  * </table>
    124  *
    125  * <p>
    126  * All instances of this class are immutable, so they may be stored safely as static constants.
    127  *
    128  * @author Louis Wasserman
    129  * @since 14.0
    130  */
    131 @Beta
    132 @GwtCompatible(emulated = true)
    133 public abstract class BaseEncoding {
    134   // TODO(user): consider adding encodeTo(Appendable, byte[], [int, int])
    135 
    136   BaseEncoding() {}
    137 
    138   /**
    139    * Exception indicating invalid base-encoded input encountered while decoding.
    140    *
    141    * @author Louis Wasserman
    142    * @since 15.0
    143    */
    144   public static final class DecodingException extends IOException {
    145     DecodingException(String message) {
    146       super(message);
    147     }
    148 
    149     DecodingException(Throwable cause) {
    150       initCause(cause);
    151     }
    152   }
    153 
    154   /**
    155    * Encodes the specified byte array, and returns the encoded {@code String}.
    156    */
    157   public String encode(byte[] bytes) {
    158     return encode(checkNotNull(bytes), 0, bytes.length);
    159   }
    160 
    161   /**
    162    * Encodes the specified range of the specified byte array, and returns the encoded
    163    * {@code String}.
    164    */
    165   public final String encode(byte[] bytes, int off, int len) {
    166     checkNotNull(bytes);
    167     checkPositionIndexes(off, off + len, bytes.length);
    168     CharOutput result = stringBuilderOutput(maxEncodedSize(len));
    169     ByteOutput byteOutput = encodingStream(result);
    170     try {
    171       for (int i = 0; i < len; i++) {
    172         byteOutput.write(bytes[off + i]);
    173       }
    174       byteOutput.close();
    175     } catch (IOException impossible) {
    176       throw new AssertionError("impossible");
    177     }
    178     return result.toString();
    179   }
    180 
    181   /**
    182    * Returns an {@code OutputStream} that encodes bytes using this encoding into the specified
    183    * {@code Writer}.  When the returned {@code OutputStream} is closed, so is the backing
    184    * {@code Writer}.
    185    */
    186   @GwtIncompatible("Writer,OutputStream")
    187   public final OutputStream encodingStream(Writer writer) {
    188     return asOutputStream(encodingStream(asCharOutput(writer)));
    189   }
    190 
    191   /**
    192    * Returns an {@code OutputSupplier} that supplies streams that encode bytes using this encoding
    193    * into writers from the specified {@code OutputSupplier}.
    194    *
    195    * @deprecated Use {@link #encodingSink(CharSink)} instead. This method is scheduled to be
    196    *     removed in Guava 16.0.
    197    */
    198   @Deprecated
    199   @GwtIncompatible("Writer,OutputStream")
    200   public final OutputSupplier<OutputStream> encodingStream(
    201       final OutputSupplier<? extends Writer> writerSupplier) {
    202     checkNotNull(writerSupplier);
    203     return new OutputSupplier<OutputStream>() {
    204       @Override
    205       public OutputStream getOutput() throws IOException {
    206         return encodingStream(writerSupplier.getOutput());
    207       }
    208     };
    209   }
    210 
    211   /**
    212    * Returns a {@code ByteSink} that writes base-encoded bytes to the specified {@code CharSink}.
    213    */
    214   @GwtIncompatible("ByteSink,CharSink")
    215   public final ByteSink encodingSink(final CharSink encodedSink) {
    216     checkNotNull(encodedSink);
    217     return new ByteSink() {
    218       @Override
    219       public OutputStream openStream() throws IOException {
    220         return encodingStream(encodedSink.openStream());
    221       }
    222     };
    223   }
    224 
    225   // TODO(user): document the extent of leniency, probably after adding ignore(CharMatcher)
    226 
    227   private static byte[] extract(byte[] result, int length) {
    228     if (length == result.length) {
    229       return result;
    230     } else {
    231       byte[] trunc = new byte[length];
    232       System.arraycopy(result, 0, trunc, 0, length);
    233       return trunc;
    234     }
    235   }
    236 
    237   /**
    238    * Decodes the specified character sequence, and returns the resulting {@code byte[]}.
    239    * This is the inverse operation to {@link #encode(byte[])}.
    240    *
    241    * @throws IllegalArgumentException if the input is not a valid encoded string according to this
    242    *         encoding.
    243    */
    244   public final byte[] decode(CharSequence chars) {
    245     try {
    246       return decodeChecked(chars);
    247     } catch (DecodingException badInput) {
    248       throw new IllegalArgumentException(badInput);
    249     }
    250   }
    251 
    252   /**
    253    * Decodes the specified character sequence, and returns the resulting {@code byte[]}.
    254    * This is the inverse operation to {@link #encode(byte[])}.
    255    *
    256    * @throws DecodingException if the input is not a valid encoded string according to this
    257    *         encoding.
    258    */
    259   final byte[] decodeChecked(CharSequence chars) throws DecodingException {
    260     chars = padding().trimTrailingFrom(chars);
    261     ByteInput decodedInput = decodingStream(asCharInput(chars));
    262     byte[] tmp = new byte[maxDecodedSize(chars.length())];
    263     int index = 0;
    264     try {
    265       for (int i = decodedInput.read(); i != -1; i = decodedInput.read()) {
    266         tmp[index++] = (byte) i;
    267       }
    268     } catch (DecodingException badInput) {
    269       throw badInput;
    270     } catch (IOException impossible) {
    271       throw new AssertionError(impossible);
    272     }
    273     return extract(tmp, index);
    274   }
    275 
    276   /**
    277    * Returns an {@code InputStream} that decodes base-encoded input from the specified
    278    * {@code Reader}.  The returned stream throws a {@link DecodingException} upon decoding-specific
    279    * errors.
    280    */
    281   @GwtIncompatible("Reader,InputStream")
    282   public final InputStream decodingStream(Reader reader) {
    283     return asInputStream(decodingStream(asCharInput(reader)));
    284   }
    285 
    286   /**
    287    * Returns an {@code InputSupplier} that supplies input streams that decode base-encoded input
    288    * from readers from the specified supplier.
    289    *
    290    * @deprecated Use {@link #decodingSource(CharSource)} instead. This method is scheduled to be
    291    *     removed in Guava 16.0.
    292    */
    293   @Deprecated
    294   @GwtIncompatible("Reader,InputStream")
    295   public final InputSupplier<InputStream> decodingStream(
    296       final InputSupplier<? extends Reader> readerSupplier) {
    297     checkNotNull(readerSupplier);
    298     return new InputSupplier<InputStream>() {
    299       @Override
    300       public InputStream getInput() throws IOException {
    301         return decodingStream(readerSupplier.getInput());
    302       }
    303     };
    304   }
    305 
    306   /**
    307    * Returns a {@code ByteSource} that reads base-encoded bytes from the specified
    308    * {@code CharSource}.
    309    */
    310   @GwtIncompatible("ByteSource,CharSource")
    311   public final ByteSource decodingSource(final CharSource encodedSource) {
    312     checkNotNull(encodedSource);
    313     return new ByteSource() {
    314       @Override
    315       public InputStream openStream() throws IOException {
    316         return decodingStream(encodedSource.openStream());
    317       }
    318     };
    319   }
    320 
    321   // Implementations for encoding/decoding
    322 
    323   abstract int maxEncodedSize(int bytes);
    324 
    325   abstract ByteOutput encodingStream(CharOutput charOutput);
    326 
    327   abstract int maxDecodedSize(int chars);
    328 
    329   abstract ByteInput decodingStream(CharInput charInput);
    330 
    331   abstract CharMatcher padding();
    332 
    333   // Modified encoding generators
    334 
    335   /**
    336    * Returns an encoding that behaves equivalently to this encoding, but omits any padding
    337    * characters as specified by <a href="http://tools.ietf.org/html/rfc4648#section-3.2">RFC 4648
    338    * section 3.2</a>, Padding of Encoded Data.
    339    */
    340   @CheckReturnValue
    341   public abstract BaseEncoding omitPadding();
    342 
    343   /**
    344    * Returns an encoding that behaves equivalently to this encoding, but uses an alternate character
    345    * for padding.
    346    *
    347    * @throws IllegalArgumentException if this padding character is already used in the alphabet or a
    348    *         separator
    349    */
    350   @CheckReturnValue
    351   public abstract BaseEncoding withPadChar(char padChar);
    352 
    353   /**
    354    * Returns an encoding that behaves equivalently to this encoding, but adds a separator string
    355    * after every {@code n} characters. Any occurrences of any characters that occur in the separator
    356    * are skipped over in decoding.
    357    *
    358    * @throws IllegalArgumentException if any alphabet or padding characters appear in the separator
    359    *         string, or if {@code n <= 0}
    360    * @throws UnsupportedOperationException if this encoding already uses a separator
    361    */
    362   @CheckReturnValue
    363   public abstract BaseEncoding withSeparator(String separator, int n);
    364 
    365   /**
    366    * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
    367    * uppercase letters. Padding and separator characters remain in their original case.
    368    *
    369    * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
    370    *         lower-case characters
    371    */
    372   @CheckReturnValue
    373   public abstract BaseEncoding upperCase();
    374 
    375   /**
    376    * Returns an encoding that behaves equivalently to this encoding, but encodes and decodes with
    377    * lowercase letters. Padding and separator characters remain in their original case.
    378    *
    379    * @throws IllegalStateException if the alphabet used by this encoding contains mixed upper- and
    380    *         lower-case characters
    381    */
    382   @CheckReturnValue
    383   public abstract BaseEncoding lowerCase();
    384 
    385   private static final BaseEncoding BASE64 = new StandardBaseEncoding(
    386       "base64()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/", '=');
    387 
    388   /**
    389    * The "base64" base encoding specified by <a
    390    * href="http://tools.ietf.org/html/rfc4648#section-4">RFC 4648 section 4</a>, Base 64 Encoding.
    391    * (This is the same as the base 64 encoding from <a
    392    * href="http://tools.ietf.org/html/rfc3548#section-3">RFC 3548</a>.)
    393    *
    394    * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
    395    * omitted} or {@linkplain #withPadChar(char) replaced}.
    396    *
    397    * <p>No line feeds are added by default, as per <a
    398    * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
    399    * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
    400    */
    401   public static BaseEncoding base64() {
    402     return BASE64;
    403   }
    404 
    405   private static final BaseEncoding BASE64_URL = new StandardBaseEncoding(
    406       "base64Url()", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_", '=');
    407 
    408   /**
    409    * The "base64url" encoding specified by <a
    410    * href="http://tools.ietf.org/html/rfc4648#section-5">RFC 4648 section 5</a>, Base 64 Encoding
    411    * with URL and Filename Safe Alphabet, also sometimes referred to as the "web safe Base64."
    412    * (This is the same as the base 64 encoding with URL and filename safe alphabet from <a
    413    * href="http://tools.ietf.org/html/rfc3548#section-4">RFC 3548</a>.)
    414    *
    415    * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
    416    * omitted} or {@linkplain #withPadChar(char) replaced}.
    417    *
    418    * <p>No line feeds are added by default, as per <a
    419    * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
    420    * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
    421    */
    422   public static BaseEncoding base64Url() {
    423     return BASE64_URL;
    424   }
    425 
    426   private static final BaseEncoding BASE32 =
    427       new StandardBaseEncoding("base32()", "ABCDEFGHIJKLMNOPQRSTUVWXYZ234567", '=');
    428 
    429   /**
    430    * The "base32" encoding specified by <a
    431    * href="http://tools.ietf.org/html/rfc4648#section-6">RFC 4648 section 6</a>, Base 32 Encoding.
    432    * (This is the same as the base 32 encoding from <a
    433    * href="http://tools.ietf.org/html/rfc3548#section-5">RFC 3548</a>.)
    434    *
    435    * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
    436    * omitted} or {@linkplain #withPadChar(char) replaced}.
    437    *
    438    * <p>No line feeds are added by default, as per <a
    439    * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
    440    * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
    441    */
    442   public static BaseEncoding base32() {
    443     return BASE32;
    444   }
    445 
    446   private static final BaseEncoding BASE32_HEX =
    447       new StandardBaseEncoding("base32Hex()", "0123456789ABCDEFGHIJKLMNOPQRSTUV", '=');
    448 
    449   /**
    450    * The "base32hex" encoding specified by <a
    451    * href="http://tools.ietf.org/html/rfc4648#section-7">RFC 4648 section 7</a>, Base 32 Encoding
    452    * with Extended Hex Alphabet.  There is no corresponding encoding in RFC 3548.
    453    *
    454    * <p>The character {@code '='} is used for padding, but can be {@linkplain #omitPadding()
    455    * omitted} or {@linkplain #withPadChar(char) replaced}.
    456    *
    457    * <p>No line feeds are added by default, as per <a
    458    * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
    459    * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
    460    */
    461   public static BaseEncoding base32Hex() {
    462     return BASE32_HEX;
    463   }
    464 
    465   private static final BaseEncoding BASE16 =
    466       new StandardBaseEncoding("base16()", "0123456789ABCDEF", null);
    467 
    468   /**
    469    * The "base16" encoding specified by <a
    470    * href="http://tools.ietf.org/html/rfc4648#section-8">RFC 4648 section 8</a>, Base 16 Encoding.
    471    * (This is the same as the base 16 encoding from <a
    472    * href="http://tools.ietf.org/html/rfc3548#section-6">RFC 3548</a>.) This is commonly known as
    473    * "hexadecimal" format.
    474    *
    475    * <p>No padding is necessary in base 16, so {@link #withPadChar(char)} and
    476    * {@link #omitPadding()} have no effect.
    477    *
    478    * <p>No line feeds are added by default, as per <a
    479    * href="http://tools.ietf.org/html/rfc4648#section-3.1"> RFC 4648 section 3.1</a>, Line Feeds in
    480    * Encoded Data. Line feeds may be added using {@link #withSeparator(String, int)}.
    481    */
    482   public static BaseEncoding base16() {
    483     return BASE16;
    484   }
    485 
    486   private static final class Alphabet extends CharMatcher {
    487     private final String name;
    488     // this is meant to be immutable -- don't modify it!
    489     private final char[] chars;
    490     final int mask;
    491     final int bitsPerChar;
    492     final int charsPerChunk;
    493     final int bytesPerChunk;
    494     private final byte[] decodabet;
    495     private final boolean[] validPadding;
    496 
    497     Alphabet(String name, char[] chars) {
    498       this.name = checkNotNull(name);
    499       this.chars = checkNotNull(chars);
    500       try {
    501         this.bitsPerChar = log2(chars.length, UNNECESSARY);
    502       } catch (ArithmeticException e) {
    503         throw new IllegalArgumentException("Illegal alphabet length " + chars.length, e);
    504       }
    505 
    506       /*
    507        * e.g. for base64, bitsPerChar == 6, charsPerChunk == 4, and bytesPerChunk == 3. This makes
    508        * for the smallest chunk size that still has charsPerChunk * bitsPerChar be a multiple of 8.
    509        */
    510       int gcd = Math.min(8, Integer.lowestOneBit(bitsPerChar));
    511       this.charsPerChunk = 8 / gcd;
    512       this.bytesPerChunk = bitsPerChar / gcd;
    513 
    514       this.mask = chars.length - 1;
    515 
    516       byte[] decodabet = new byte[Ascii.MAX + 1];
    517       Arrays.fill(decodabet, (byte) -1);
    518       for (int i = 0; i < chars.length; i++) {
    519         char c = chars[i];
    520         checkArgument(CharMatcher.ASCII.matches(c), "Non-ASCII character: %s", c);
    521         checkArgument(decodabet[c] == -1, "Duplicate character: %s", c);
    522         decodabet[c] = (byte) i;
    523       }
    524       this.decodabet = decodabet;
    525 
    526       boolean[] validPadding = new boolean[charsPerChunk];
    527       for (int i = 0; i < bytesPerChunk; i++) {
    528         validPadding[divide(i * 8, bitsPerChar, CEILING)] = true;
    529       }
    530       this.validPadding = validPadding;
    531     }
    532 
    533     char encode(int bits) {
    534       return chars[bits];
    535     }
    536 
    537     boolean isValidPaddingStartPosition(int index) {
    538       return validPadding[index % charsPerChunk];
    539     }
    540 
    541     int decode(char ch) throws IOException {
    542       if (ch > Ascii.MAX || decodabet[ch] == -1) {
    543         throw new DecodingException("Unrecognized character: " + ch);
    544       }
    545       return decodabet[ch];
    546     }
    547 
    548     private boolean hasLowerCase() {
    549       for (char c : chars) {
    550         if (Ascii.isLowerCase(c)) {
    551           return true;
    552         }
    553       }
    554       return false;
    555     }
    556 
    557     private boolean hasUpperCase() {
    558       for (char c : chars) {
    559         if (Ascii.isUpperCase(c)) {
    560           return true;
    561         }
    562       }
    563       return false;
    564     }
    565 
    566     Alphabet upperCase() {
    567       if (!hasLowerCase()) {
    568         return this;
    569       } else {
    570         checkState(!hasUpperCase(), "Cannot call upperCase() on a mixed-case alphabet");
    571         char[] upperCased = new char[chars.length];
    572         for (int i = 0; i < chars.length; i++) {
    573           upperCased[i] = Ascii.toUpperCase(chars[i]);
    574         }
    575         return new Alphabet(name + ".upperCase()", upperCased);
    576       }
    577     }
    578 
    579     Alphabet lowerCase() {
    580       if (!hasUpperCase()) {
    581         return this;
    582       } else {
    583         checkState(!hasLowerCase(), "Cannot call lowerCase() on a mixed-case alphabet");
    584         char[] lowerCased = new char[chars.length];
    585         for (int i = 0; i < chars.length; i++) {
    586           lowerCased[i] = Ascii.toLowerCase(chars[i]);
    587         }
    588         return new Alphabet(name + ".lowerCase()", lowerCased);
    589       }
    590     }
    591 
    592     @Override
    593     public boolean matches(char c) {
    594       return CharMatcher.ASCII.matches(c) && decodabet[c] != -1;
    595     }
    596 
    597     @Override
    598     public String toString() {
    599       return name;
    600     }
    601   }
    602 
    603   static final class StandardBaseEncoding extends BaseEncoding {
    604     // TODO(user): provide a useful toString
    605     private final Alphabet alphabet;
    606 
    607     @Nullable
    608     private final Character paddingChar;
    609 
    610     StandardBaseEncoding(String name, String alphabetChars, @Nullable Character paddingChar) {
    611       this(new Alphabet(name, alphabetChars.toCharArray()), paddingChar);
    612     }
    613 
    614     StandardBaseEncoding(Alphabet alphabet, @Nullable Character paddingChar) {
    615       this.alphabet = checkNotNull(alphabet);
    616       checkArgument(paddingChar == null || !alphabet.matches(paddingChar),
    617           "Padding character %s was already in alphabet", paddingChar);
    618       this.paddingChar = paddingChar;
    619     }
    620 
    621     @Override
    622     CharMatcher padding() {
    623       return (paddingChar == null) ? CharMatcher.NONE : CharMatcher.is(paddingChar.charValue());
    624     }
    625 
    626     @Override
    627     int maxEncodedSize(int bytes) {
    628       return alphabet.charsPerChunk * divide(bytes, alphabet.bytesPerChunk, CEILING);
    629     }
    630 
    631     @Override
    632     ByteOutput encodingStream(final CharOutput out) {
    633       checkNotNull(out);
    634       return new ByteOutput() {
    635         int bitBuffer = 0;
    636         int bitBufferLength = 0;
    637         int writtenChars = 0;
    638 
    639         @Override
    640         public void write(byte b) throws IOException {
    641           bitBuffer <<= 8;
    642           bitBuffer |= b & 0xFF;
    643           bitBufferLength += 8;
    644           while (bitBufferLength >= alphabet.bitsPerChar) {
    645             int charIndex = (bitBuffer >> (bitBufferLength - alphabet.bitsPerChar))
    646                 & alphabet.mask;
    647             out.write(alphabet.encode(charIndex));
    648             writtenChars++;
    649             bitBufferLength -= alphabet.bitsPerChar;
    650           }
    651         }
    652 
    653         @Override
    654         public void flush() throws IOException {
    655           out.flush();
    656         }
    657 
    658         @Override
    659         public void close() throws IOException {
    660           if (bitBufferLength > 0) {
    661             int charIndex = (bitBuffer << (alphabet.bitsPerChar - bitBufferLength))
    662                 & alphabet.mask;
    663             out.write(alphabet.encode(charIndex));
    664             writtenChars++;
    665             if (paddingChar != null) {
    666               while (writtenChars % alphabet.charsPerChunk != 0) {
    667                 out.write(paddingChar.charValue());
    668                 writtenChars++;
    669               }
    670             }
    671           }
    672           out.close();
    673         }
    674       };
    675     }
    676 
    677     @Override
    678     int maxDecodedSize(int chars) {
    679       return (int) ((alphabet.bitsPerChar * (long) chars + 7L) / 8L);
    680     }
    681 
    682     @Override
    683     ByteInput decodingStream(final CharInput reader) {
    684       checkNotNull(reader);
    685       return new ByteInput() {
    686         int bitBuffer = 0;
    687         int bitBufferLength = 0;
    688         int readChars = 0;
    689         boolean hitPadding = false;
    690         final CharMatcher paddingMatcher = padding();
    691 
    692         @Override
    693         public int read() throws IOException {
    694           while (true) {
    695             int readChar = reader.read();
    696             if (readChar == -1) {
    697               if (!hitPadding && !alphabet.isValidPaddingStartPosition(readChars)) {
    698                 throw new DecodingException("Invalid input length " + readChars);
    699               }
    700               return -1;
    701             }
    702             readChars++;
    703             char ch = (char) readChar;
    704             if (paddingMatcher.matches(ch)) {
    705               if (!hitPadding
    706                   && (readChars == 1 || !alphabet.isValidPaddingStartPosition(readChars - 1))) {
    707                 throw new DecodingException("Padding cannot start at index " + readChars);
    708               }
    709               hitPadding = true;
    710             } else if (hitPadding) {
    711               throw new DecodingException(
    712                   "Expected padding character but found '" + ch + "' at index " + readChars);
    713             } else {
    714               bitBuffer <<= alphabet.bitsPerChar;
    715               bitBuffer |= alphabet.decode(ch);
    716               bitBufferLength += alphabet.bitsPerChar;
    717 
    718               if (bitBufferLength >= 8) {
    719                 bitBufferLength -= 8;
    720                 return (bitBuffer >> bitBufferLength) & 0xFF;
    721               }
    722             }
    723           }
    724         }
    725 
    726         @Override
    727         public void close() throws IOException {
    728           reader.close();
    729         }
    730       };
    731     }
    732 
    733     @Override
    734     public BaseEncoding omitPadding() {
    735       return (paddingChar == null) ? this : new StandardBaseEncoding(alphabet, null);
    736     }
    737 
    738     @Override
    739     public BaseEncoding withPadChar(char padChar) {
    740       if (8 % alphabet.bitsPerChar == 0 ||
    741           (paddingChar != null && paddingChar.charValue() == padChar)) {
    742         return this;
    743       } else {
    744         return new StandardBaseEncoding(alphabet, padChar);
    745       }
    746     }
    747 
    748     @Override
    749     public BaseEncoding withSeparator(String separator, int afterEveryChars) {
    750       checkNotNull(separator);
    751       checkArgument(padding().or(alphabet).matchesNoneOf(separator),
    752           "Separator cannot contain alphabet or padding characters");
    753       return new SeparatedBaseEncoding(this, separator, afterEveryChars);
    754     }
    755 
    756     private transient BaseEncoding upperCase;
    757     private transient BaseEncoding lowerCase;
    758 
    759     @Override
    760     public BaseEncoding upperCase() {
    761       BaseEncoding result = upperCase;
    762       if (result == null) {
    763         Alphabet upper = alphabet.upperCase();
    764         result = upperCase =
    765             (upper == alphabet) ? this : new StandardBaseEncoding(upper, paddingChar);
    766       }
    767       return result;
    768     }
    769 
    770     @Override
    771     public BaseEncoding lowerCase() {
    772       BaseEncoding result = lowerCase;
    773       if (result == null) {
    774         Alphabet lower = alphabet.lowerCase();
    775         result = lowerCase =
    776             (lower == alphabet) ? this : new StandardBaseEncoding(lower, paddingChar);
    777       }
    778       return result;
    779     }
    780 
    781     @Override
    782     public String toString() {
    783       StringBuilder builder = new StringBuilder("BaseEncoding.");
    784       builder.append(alphabet.toString());
    785       if (8 % alphabet.bitsPerChar != 0) {
    786         if (paddingChar == null) {
    787           builder.append(".omitPadding()");
    788         } else {
    789           builder.append(".withPadChar(").append(paddingChar).append(')');
    790         }
    791       }
    792       return builder.toString();
    793     }
    794   }
    795 
    796   static CharInput ignoringInput(final CharInput delegate, final CharMatcher toIgnore) {
    797     checkNotNull(delegate);
    798     checkNotNull(toIgnore);
    799     return new CharInput() {
    800       @Override
    801       public int read() throws IOException {
    802         int readChar;
    803         do {
    804           readChar = delegate.read();
    805         } while (readChar != -1 && toIgnore.matches((char) readChar));
    806         return readChar;
    807       }
    808 
    809       @Override
    810       public void close() throws IOException {
    811         delegate.close();
    812       }
    813     };
    814   }
    815 
    816   static CharOutput separatingOutput(
    817       final CharOutput delegate, final String separator, final int afterEveryChars) {
    818     checkNotNull(delegate);
    819     checkNotNull(separator);
    820     checkArgument(afterEveryChars > 0);
    821     return new CharOutput() {
    822       int charsUntilSeparator = afterEveryChars;
    823 
    824       @Override
    825       public void write(char c) throws IOException {
    826         if (charsUntilSeparator == 0) {
    827           for (int i = 0; i < separator.length(); i++) {
    828             delegate.write(separator.charAt(i));
    829           }
    830           charsUntilSeparator = afterEveryChars;
    831         }
    832         delegate.write(c);
    833         charsUntilSeparator--;
    834       }
    835 
    836       @Override
    837       public void flush() throws IOException {
    838         delegate.flush();
    839       }
    840 
    841       @Override
    842       public void close() throws IOException {
    843         delegate.close();
    844       }
    845     };
    846   }
    847 
    848   static final class SeparatedBaseEncoding extends BaseEncoding {
    849     private final BaseEncoding delegate;
    850     private final String separator;
    851     private final int afterEveryChars;
    852     private final CharMatcher separatorChars;
    853 
    854     SeparatedBaseEncoding(BaseEncoding delegate, String separator, int afterEveryChars) {
    855       this.delegate = checkNotNull(delegate);
    856       this.separator = checkNotNull(separator);
    857       this.afterEveryChars = afterEveryChars;
    858       checkArgument(
    859           afterEveryChars > 0, "Cannot add a separator after every %s chars", afterEveryChars);
    860       this.separatorChars = CharMatcher.anyOf(separator).precomputed();
    861     }
    862 
    863     @Override
    864     CharMatcher padding() {
    865       return delegate.padding();
    866     }
    867 
    868     @Override
    869     int maxEncodedSize(int bytes) {
    870       int unseparatedSize = delegate.maxEncodedSize(bytes);
    871       return unseparatedSize + separator.length()
    872           * divide(Math.max(0, unseparatedSize - 1), afterEveryChars, FLOOR);
    873     }
    874 
    875     @Override
    876     ByteOutput encodingStream(final CharOutput output) {
    877       return delegate.encodingStream(separatingOutput(output, separator, afterEveryChars));
    878     }
    879 
    880     @Override
    881     int maxDecodedSize(int chars) {
    882       return delegate.maxDecodedSize(chars);
    883     }
    884 
    885     @Override
    886     ByteInput decodingStream(final CharInput input) {
    887       return delegate.decodingStream(ignoringInput(input, separatorChars));
    888     }
    889 
    890     @Override
    891     public BaseEncoding omitPadding() {
    892       return delegate.omitPadding().withSeparator(separator, afterEveryChars);
    893     }
    894 
    895     @Override
    896     public BaseEncoding withPadChar(char padChar) {
    897       return delegate.withPadChar(padChar).withSeparator(separator, afterEveryChars);
    898     }
    899 
    900     @Override
    901     public BaseEncoding withSeparator(String separator, int afterEveryChars) {
    902       throw new UnsupportedOperationException("Already have a separator");
    903     }
    904 
    905     @Override
    906     public BaseEncoding upperCase() {
    907       return delegate.upperCase().withSeparator(separator, afterEveryChars);
    908     }
    909 
    910     @Override
    911     public BaseEncoding lowerCase() {
    912       return delegate.lowerCase().withSeparator(separator, afterEveryChars);
    913     }
    914 
    915     @Override
    916     public String toString() {
    917       return delegate.toString() +
    918           ".withSeparator(\"" + separator + "\", " + afterEveryChars + ")";
    919     }
    920   }
    921 }
    922