Home | History | Annotate | Download | only in adapter
      1 /*
      2  * Copyright (C) 2008-2009 Marc Blank
      3  * Licensed to The Android Open Source Project.
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at
      8  *
      9  *      http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 package com.android.exchange.adapter;
     19 
     20 import android.content.Context;
     21 
     22 import com.android.exchange.Eas;
     23 import com.android.exchange.EasException;
     24 import com.android.exchange.service.EasService;
     25 import com.android.exchange.utility.FileLogger;
     26 import com.android.mail.utils.LogUtils;
     27 import com.google.common.annotations.VisibleForTesting;
     28 
     29 import java.io.ByteArrayOutputStream;
     30 import java.io.FileNotFoundException;
     31 import java.io.FileOutputStream;
     32 import java.io.IOException;
     33 import java.io.InputStream;
     34 import java.util.ArrayDeque;
     35 import java.util.ArrayList;
     36 import java.util.Arrays;
     37 import java.util.Deque;
     38 
     39 /**
     40  * Extremely fast and lightweight WBXML parser, implementing only the subset of WBXML that
     41  * EAS uses (as defined in the EAS specification).
     42  *
     43  * Supports:
     44  *      WBXML tokens to encode XML tags
     45  *      WBXML code pages to support multiple XML namespaces
     46  *      Inline strings
     47  *      Opaque data
     48  *
     49  * Does not support: (throws EasParserException)
     50  *      String tables
     51  *      Entities
     52  *      Processing instructions
     53  *      Attribute encoding
     54  *
     55  */
     56 public abstract class Parser {
     57     private static final boolean LOG_VERBOSE = false;
     58 
     59     private static final String LOG_TAG = Eas.LOG_TAG;
     60 
     61     // The following constants are Wbxml standard
     62     public static final int START_DOCUMENT = 0;
     63     public static final int END_DOCUMENT = 1;
     64     private static final int DONE = 1;
     65     private static final int START = 2;
     66     public static final int END = 3;
     67     private static final int TEXT = 4;
     68     private static final int OPAQUE = 5;
     69     private static final int NOT_ENDED = Integer.MIN_VALUE;
     70     private static final int EOF_BYTE = -1;
     71 
     72     private boolean capture = false;
     73 
     74     private ArrayList<Integer> captureArray;
     75 
     76     // The input stream for this parser
     77     private InputStream in;
     78 
     79     // The stack of names of tags being processed; used when debug = true
     80     private String[] nameArray = new String[32];
     81 
     82     public class Tag {
     83         private final int mPage;
     84         private final int mIndex;
     85         // Whether the tag is associated with content (a value)
     86         public final boolean mNoContent;
     87         private final String mName;
     88 
     89         public Tag(final int page, final int id) {
     90             mPage = page;
     91             // The tag is in the low 6 bits
     92             mIndex = id & Tags.PAGE_MASK;
     93             // If the high bit is set, there is content (a value) to be read
     94             mNoContent = (id & Wbxml.WITH_CONTENT) == 0;
     95             if (Tags.isGlobalTag(mIndex)) {
     96                 mName = "unsupported-WBXML";
     97             } else if (!Tags.isValidTag(mPage, mIndex)) {
     98                 mName = "unknown";
     99             } else {
    100                 mName = Tags.getTagName(mPage, mIndex);
    101             }
    102         }
    103 
    104         public int getTagNum() {
    105             if (Tags.isGlobalTag(mIndex)) {
    106                 return mIndex;
    107             }
    108             return (mPage << Tags.PAGE_SHIFT) | mIndex;
    109         }
    110 
    111         @Override
    112         public String toString() {
    113             return mName;
    114         }
    115     }
    116 
    117     // The stack of tags being processed
    118     private final Deque<Tag> startTagArray = new ArrayDeque<Tag>();
    119 
    120     private Tag startTag;
    121 
    122     // The type of the last token read (eg, TEXT, OPAQUE, END, etc).
    123     private int type;
    124 
    125     // The current page. As of EAS 14.1, this is a value 0-24.
    126     private int page;
    127 
    128     // The current tag. The low order 6 bits contain the tag index and the
    129     // higher order bits the page number. The format matches that used for
    130     // the tag enums defined in Tags.java.
    131     public int tag;
    132 
    133     // Whether the current tag is associated with content (a value)
    134     public boolean noContent;
    135 
    136     // The value read, as a String
    137     private String text;
    138 
    139     // The value read, as bytes
    140     private byte[] bytes;
    141 
    142     // TODO: Define a new parse exception type rather than lumping these in as IOExceptions.
    143 
    144     /**
    145      * Generated when the parser comes to EOF prematurely during parsing (i.e. in error)
    146      */
    147     public class EofException extends IOException {
    148         private static final long serialVersionUID = 1L;
    149     }
    150 
    151     /**
    152      * An EmptyStreamException is an EofException that occurs reading the first byte in the parser's
    153      * input stream; in other words, the stream had no content.
    154      */
    155     public class EmptyStreamException extends EofException {
    156         private static final long serialVersionUID = 1L;
    157     }
    158 
    159     public class EodException extends IOException {
    160         private static final long serialVersionUID = 1L;
    161     }
    162 
    163     public class EasParserException extends IOException {
    164         private static final long serialVersionUID = 1L;
    165 
    166         EasParserException() {
    167             super("WBXML format error");
    168         }
    169 
    170         EasParserException(final String reason) {
    171             super(reason);
    172         }
    173     }
    174 
    175     public boolean parse() throws IOException, EasException {
    176         return false;
    177     }
    178 
    179     public Parser(final InputStream in) throws IOException {
    180         setInput(in, true);
    181     }
    182 
    183     /**
    184      * Constructor for use when switching parsers within a input stream
    185      * @param parser an existing, initialized parser
    186      * @throws IOException
    187      */
    188     public Parser(final Parser parser) throws IOException {
    189         setInput(parser.in, false);
    190     }
    191 
    192     protected InputStream getInput() {
    193         return in;
    194     }
    195 
    196     /**
    197      * Turns on data capture; this is used to create test streams that represent "live" data and
    198      * can be used against the various parsers.
    199      */
    200     public void captureOn() {
    201         capture = true;
    202         captureArray = new ArrayList<Integer>();
    203     }
    204 
    205     /**
    206      * Turns off data capture; writes the captured data to a specified file.
    207      */
    208     public void captureOff(final Context context, final String file) {
    209         try {
    210             final FileOutputStream out = context.openFileOutput(file,
    211                     Context.MODE_WORLD_WRITEABLE);
    212             out.write(captureArray.toString().getBytes());
    213             out.close();
    214         } catch (FileNotFoundException e) {
    215             // This is debug code; exceptions aren't interesting.
    216         } catch (IOException e) {
    217             // This is debug code; exceptions aren't interesting.
    218         }
    219     }
    220 
    221     /**
    222      * Return the value of the current tag, as a byte array. Throws EasParserException
    223      * if neither opaque nor text data is present. Never returns null--returns
    224      * an empty byte[] array for empty data.
    225      *
    226      * @return the byte array value of the current tag
    227      * @throws IOException
    228      */
    229     public byte[] getValueBytes() throws IOException {
    230         final String name = startTag.toString();
    231 
    232         getNext();
    233         // This means there was no value given, just <Foo/>; we'll return empty array
    234         if (type == END) {
    235             log("No value for tag: " + name);
    236             return new byte[0];
    237         } else if (type != OPAQUE && type != TEXT) {
    238             throw new EasParserException("Expected OPAQUE or TEXT data for tag " + name);
    239         }
    240 
    241         // Save the value
    242         final byte[] val = type == OPAQUE ? bytes : text.getBytes("UTF-8");
    243         // Read the next token; it had better be the end of the current tag
    244         getNext();
    245         // If not, throw an exception
    246         if (type != END) {
    247             throw new EasParserException("No END found for tag " + name);
    248         }
    249         return val;
    250     }
    251 
    252     /**
    253      * Return the value of the current tag, as a String. Throws EasParserException
    254      * for non-text data. Never returns null--returns an empty string if no data.
    255      *
    256      * @return the String value of the current tag
    257      * @throws IOException
    258      */
    259     public String getValue() throws IOException {
    260         final String name = startTag.toString();
    261 
    262         getNext();
    263         // This means there was no value given, just <Foo/>; we'll return empty string for now
    264         if (type == END) {
    265             log("No value for tag: " + name);
    266             return "";
    267         } else if (type != TEXT) {
    268             throw new EasParserException("Expected TEXT data for tag " + name);
    269         }
    270 
    271         // Save the value
    272         final String val = text;
    273         // Read the next token; it had better be the end of the current tag
    274         getNext();
    275         // If not, throw an exception
    276         if (type != END) {
    277             throw new EasParserException("No END found for tag " + name);
    278         }
    279         return val;
    280     }
    281 
    282     /**
    283      * Return the value of the current tag, as an integer. Throws EasParserException
    284      * for non text data, and text data that doesn't parse as an integer. Returns
    285      * 0 for empty data.
    286      *
    287      * @return the integer value of the current tag
    288      * @throws IOException
    289      */
    290     public int getValueInt() throws IOException {
    291         final String val = getValue();
    292         if (val.length() == 0) {
    293             return 0;
    294         }
    295 
    296         int num;
    297         try {
    298             num = Integer.parseInt(val);
    299         } catch (NumberFormatException e) {
    300             throw new EasParserException("Tag " + startTag + ": " + e.getMessage());
    301         }
    302         return num;
    303     }
    304 
    305     /**
    306      * Return the next tag found in the stream; special tags END and END_DOCUMENT are used to
    307      * mark the end of the current tag and end of document.  If we hit end of document without
    308      * looking for it, generate an EodException.  The tag returned consists of the page number
    309      * shifted PAGE_SHIFT bits OR'd with the tag retrieved from the stream.  Thus, all tags returned
    310      * are unique.
    311      *
    312      * @param endingTag the tag that would represent the end of the tag we're processing
    313      * @return the next tag found
    314      * @throws IOException
    315      */
    316     public int nextTag(final int endingTag) throws IOException {
    317         while (getNext() != DONE) {
    318             // If we're a start, set tag to include the page and return it
    319             if (type == START) {
    320                 tag = startTag.getTagNum();
    321                 return tag;
    322             // If we're at the ending tag we're looking for, return the END signal
    323             } else if (type == END && startTag.getTagNum() == endingTag) {
    324                 return END;
    325             }
    326         }
    327         // We're at end of document here.  If we're looking for it, return END_DOCUMENT
    328         if (endingTag == START_DOCUMENT) {
    329             return END_DOCUMENT;
    330         }
    331         // Otherwise, we've prematurely hit end of document, so exception out
    332         // EodException is a subclass of IOException; this will be treated as an IO error by
    333         // EasService
    334         throw new EodException();
    335     }
    336 
    337     /**
    338      * Skip anything found in the stream until the end of the current tag is reached.  This can be
    339      * used to ignore stretches of xml that aren't needed by the parser.
    340      *
    341      * @throws IOException
    342      */
    343     public void skipTag() throws IOException {
    344         final int thisTag = startTag.getTagNum();
    345         // Just loop until we hit the end of the current tag
    346         while (getNext() != DONE) {
    347             if (type == END && startTag.getTagNum() == thisTag) {
    348                 return;
    349             }
    350         }
    351 
    352         // If we're at end of document, that's bad
    353         throw new EofException();
    354     }
    355 
    356     /**
    357      * Initializes the parser with an input stream; reads the first 4 bytes (which are always the
    358      * same in EAS, and then sets the tag table to point to page 0 (by definition, the starting
    359      * page).
    360      *
    361      * @param in the InputStream associated with this parser
    362      * @throws IOException
    363      */
    364     public void setInput(final InputStream in, final boolean initialize) throws IOException {
    365         this.in = in;
    366         if ((in != null) && initialize) {
    367             // If we fail on the very first byte, report an empty stream
    368             try {
    369                 final int version = readByte(); // version
    370             } catch (EofException e) {
    371                 throw new EmptyStreamException();
    372             }
    373             readInt();  // public identifier
    374             readInt();  // 106 (UTF-8)
    375             final int stringTableLength = readInt();  // string table length
    376             if (stringTableLength != 0) {
    377                 throw new EasParserException("WBXML string table unsupported");
    378             }
    379         }
    380     }
    381 
    382     @VisibleForTesting
    383     void resetInput(final InputStream in) {
    384         this.in = in;
    385         try {
    386             // Read leading zero
    387             read();
    388         } catch (IOException e) {
    389         }
    390     }
    391 
    392     void log(final String str) {
    393         if (!EasService.getProtocolLogging()) {
    394             return;
    395         }
    396         final String logStr;
    397         int cr = str.indexOf('\n');
    398         if (cr > 0) {
    399             logStr = str.substring(0, cr);
    400         } else {
    401             logStr = str;
    402         }
    403         final char [] charArray = new char[startTagArray.size() * 2];
    404         Arrays.fill(charArray, ' ');
    405         final String indent = new String(charArray);
    406         LogUtils.d(LOG_TAG, "%s", indent + logStr);
    407         if (EasService.getFileLogging()) {
    408             FileLogger.log(LOG_TAG, logStr);
    409         }
    410     }
    411 
    412     void logVerbose(final String str) {
    413         if (LOG_VERBOSE) {
    414             log(str);
    415         }
    416     }
    417 
    418     protected void pushTag(final int id) {
    419         page = id >>> Tags.PAGE_SHIFT;
    420         push(id);
    421     }
    422 
    423     protected void pop() {
    424         // Retrieve the now-current startTag from our stack
    425         startTag = startTagArray.removeFirst();
    426         log("</" + startTag + '>');
    427     }
    428 
    429     private void push(final int id) {
    430         startTag = new Tag(page, id);
    431         noContent = startTag.mNoContent;
    432         log("<" + startTag + (noContent ? '/' : "") + '>');
    433         // Save the startTag to our stack
    434         startTagArray.addFirst(startTag);
    435     }
    436 
    437     /**
    438      * Return the next piece of data from the stream.  The return value indicates the type of data
    439      * that has been retrieved - START (start of tag), END (end of tag), DONE (end of stream), or
    440      * TEXT (the value of a tag)
    441      *
    442      * @return the type of data retrieved
    443      * @throws IOException
    444      */
    445     private final int getNext() throws IOException {
    446         bytes = null;
    447         text = null;
    448 
    449         if (noContent) {
    450             startTagArray.removeFirst();
    451             type = END;
    452             noContent = false;
    453             return type;
    454         }
    455 
    456         int id = read();
    457         while (id == Wbxml.SWITCH_PAGE) {
    458             // Get the new page number
    459             page = readByte();
    460             // Retrieve the current tag table
    461             if (!Tags.isValidPage(page)) {
    462                 // Unknown code page. These seem to happen mostly because of
    463                 // invalid data from the server so throw an exception here.
    464                 throw new EasParserException("Unknown code page " + page);
    465             }
    466             logVerbose("Page: " + page);
    467             id = read();
    468         }
    469 
    470         switch (id) {
    471             case EOF_BYTE:
    472                 // End of document
    473                 type = DONE;
    474                 break;
    475 
    476             case Wbxml.END:
    477                 type = END;
    478                 pop();
    479                 break;
    480 
    481             case Wbxml.STR_I:
    482                 // Inline string
    483                 type = TEXT;
    484                 text = readInlineString();
    485                 log(startTag + ": " + text);
    486                 break;
    487 
    488             case Wbxml.OPAQUE:
    489                 // Integer length + opaque data
    490                 type = OPAQUE;
    491                 final int length = readInt();
    492                 bytes = new byte[length];
    493                 for (int i = 0; i < length; i++) {
    494                     bytes[i] = (byte)readByte();
    495                 }
    496                 log(startTag + ": (opaque:" + length + ") ");
    497                 break;
    498 
    499             default:
    500                 if (Tags.isGlobalTag(id & Tags.PAGE_MASK)) {
    501                     throw new EasParserException(String.format(
    502                                     "Unhandled WBXML global token 0x%02X", id));
    503                 }
    504                 if ((id & Wbxml.WITH_ATTRIBUTES) != 0) {
    505                     throw new EasParserException(String.format(
    506                                     "Attributes unsupported, tag 0x%02X", id));
    507                 }
    508                 type = START;
    509                 push(id);
    510         }
    511 
    512         // Return the type of data we're dealing with
    513         return type;
    514     }
    515 
    516     /**
    517      * Read an int from the input stream, and capture it if necessary for debugging.  Seems a small
    518      * price to pay...
    519      *
    520      * @return the int read
    521      * @throws IOException
    522      */
    523     private int read() throws IOException {
    524         int i;
    525         i = in.read();
    526         if (capture) {
    527             captureArray.add(i);
    528         }
    529         logVerbose("Byte: " + i);
    530         return i;
    531     }
    532 
    533     private int readByte() throws IOException {
    534         int i = read();
    535         if (i == EOF_BYTE) {
    536             throw new EofException();
    537         }
    538         return i;
    539     }
    540 
    541     /**
    542      * Throws EasParserException if detects integer encoded with more than 5
    543      * bytes. A uint_32 needs 5 bytes to fully encode 32 bits so if the high
    544      * bit is set for more than 4 bytes, something is wrong with the data
    545      * stream.
    546      */
    547     private int readInt() throws IOException {
    548         int result = 0;
    549         int i;
    550         int numBytes = 0;
    551 
    552         do {
    553             if (++numBytes > 5) {
    554                 throw new EasParserException("Invalid integer encoding, too many bytes");
    555             }
    556             i = readByte();
    557             result = (result << 7) | (i & 0x7f);
    558         } while ((i & 0x80) != 0);
    559 
    560         return result;
    561     }
    562 
    563     /**
    564      * Read an inline string from the stream
    565      *
    566      * @return the String as parsed from the stream
    567      * @throws IOException
    568      */
    569     private String readInlineString() throws IOException {
    570         final ByteArrayOutputStream outputStream = new ByteArrayOutputStream(256);
    571         while (true) {
    572             final int i = read();
    573             if (i == 0) {
    574                 break;
    575             } else if (i == EOF_BYTE) {
    576                 throw new EofException();
    577             }
    578             outputStream.write(i);
    579         }
    580         outputStream.flush();
    581         final String res = outputStream.toString("UTF-8");
    582         outputStream.close();
    583         return res;
    584     }
    585 }
    586