Home | History | Annotate | Download | only in adapter
      1 /*
      2  * Copyright (C) 2008-2009 Marc Blank
      3  * Licensed to The Android Open Source Project.
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at
      8  *
      9  *      http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 package com.android.exchange.adapter;
     19 
     20 import android.content.Context;
     21 
     22 import com.android.exchange.Eas;
     23 import com.android.exchange.EasException;
     24 import com.android.exchange.utility.FileLogger;
     25 import com.android.mail.utils.LogUtils;
     26 import com.google.common.annotations.VisibleForTesting;
     27 
     28 import java.io.ByteArrayOutputStream;
     29 import java.io.FileNotFoundException;
     30 import java.io.FileOutputStream;
     31 import java.io.IOException;
     32 import java.io.InputStream;
     33 import java.util.ArrayList;
     34 
     35 /**
     36  * Extremely fast and lightweight WBXML parser, implementing only the subset of WBXML that
     37  * EAS uses (as defined in the EAS specification)
     38  *
     39  */
     40 public abstract class Parser {
     41     private static final boolean LOG_VERBOSE = false;
     42 
     43     private static final String LOG_TAG = Eas.LOG_TAG;
     44 
     45     // The following constants are Wbxml standard
     46     public static final int START_DOCUMENT = 0;
     47     public static final int DONE = 1;
     48     public static final int START = 2;
     49     public static final int END = 3;
     50     public static final int TEXT = 4;
     51     public static final int END_DOCUMENT = 3;
     52     private static final int NOT_FETCHED = Integer.MIN_VALUE;
     53     private static final int NOT_ENDED = Integer.MIN_VALUE;
     54     private static final int EOF_BYTE = -1;
     55 
     56     // Where tags start in a page
     57     private static final int TAG_BASE = 5;
     58 
     59     private boolean logging = false;
     60     private boolean capture = false;
     61 
     62     private ArrayList<Integer> captureArray;
     63 
     64     // The input stream for this parser
     65     private InputStream in;
     66 
     67     // The current tag depth
     68     private int depth;
     69 
     70     // The upcoming (saved) id from the stream
     71     private int nextId = NOT_FETCHED;
     72 
     73     // The current tag table (i.e. the tag table for the current page)
     74     private String[] tagTable;
     75 
     76     // An array of tag tables, as defined in EasTags
     77     static private String[][] tagTables = new String[Tags.pages.length + 1][];
     78 
     79     // The stack of names of tags being processed; used when debug = true
     80     private String[] nameArray = new String[32];
     81 
     82     // The stack of tags being processed
     83     private int[] startTagArray = new int[32];
     84 
     85     // The following vars are available to all to avoid method calls that represent the state of
     86     // the parser at any given time
     87     public int endTag = NOT_ENDED;
     88 
     89     public int startTag;
     90 
     91     // The type of the last token read
     92     public int type;
     93 
     94     // The current page
     95     public int page;
     96 
     97     // The current tag
     98     public int tag;
     99 
    100     // The name of the current tag
    101     public String name;
    102 
    103     // Whether the current tag is associated with content (a value)
    104     public boolean noContent;
    105 
    106     // The value read, as a String.  Only one of text or num will be valid, depending on whether the
    107     // value was requested as a String or an int (to avoid wasted effort in parsing)
    108     public String text;
    109 
    110     // The value read, as an int
    111     public int num;
    112 
    113     // The value read, as bytes
    114     public byte[] bytes;
    115 
    116     // TODO: Define a new parse exception type rather than lumping these in as IOExceptions.
    117 
    118     /**
    119      * Generated when the parser comes to EOF prematurely during parsing (i.e. in error)
    120      */
    121     public class EofException extends IOException {
    122         private static final long serialVersionUID = 1L;
    123     }
    124 
    125     /**
    126      * An EmptyStreamException is an EofException that occurs reading the first byte in the parser's
    127      * input stream; in other words, the stream had no content.
    128      */
    129     public class EmptyStreamException extends EofException {
    130         private static final long serialVersionUID = 1L;
    131     }
    132 
    133     public class EodException extends IOException {
    134         private static final long serialVersionUID = 1L;
    135     }
    136 
    137     public class EasParserException extends IOException {
    138         private static final long serialVersionUID = 1L;
    139 
    140         EasParserException() {
    141             super("WBXML format error");
    142         }
    143 
    144         EasParserException(String reason) {
    145             super(reason);
    146         }
    147     }
    148 
    149     public boolean parse() throws IOException, EasException {
    150         return false;
    151     }
    152 
    153     /**
    154      * Initialize the tag tables; they are constant
    155      *
    156      */
    157     {
    158         String[][] pages = Tags.pages;
    159         for (int i = 0; i < pages.length; i++) {
    160             String[] page = pages[i];
    161             if (page.length > 0) {
    162                 tagTables[i] = page;
    163             }
    164         }
    165     }
    166 
    167     public Parser(InputStream in) throws IOException {
    168         setInput(in, true);
    169         logging = Eas.PARSER_LOG;
    170     }
    171 
    172     /**
    173      * Constructor for use when switching parsers within a input stream
    174      * @param parser an existing, initialized parser
    175      * @throws IOException
    176      */
    177     public Parser(Parser parser) throws IOException {
    178         setInput(parser.in, false);
    179         logging = Eas.PARSER_LOG;
    180     }
    181 
    182     /**
    183      * Set the debug state of the parser.  When debugging is on, every token is logged (LogUtils.v)
    184      * to the console.
    185      *
    186      * @param val the desired state for debug output
    187      */
    188     public void setDebug(boolean val) {
    189         logging = val;
    190     }
    191 
    192     protected InputStream getInput() {
    193         return in;
    194     }
    195 
    196     /**
    197      * Turns on data capture; this is used to create test streams that represent "live" data and
    198      * can be used against the various parsers.
    199      */
    200     public void captureOn() {
    201         capture = true;
    202         captureArray = new ArrayList<Integer>();
    203     }
    204 
    205     /**
    206      * Turns off data capture; writes the captured data to a specified file.
    207      */
    208     public void captureOff(Context context, String file) {
    209         try {
    210             FileOutputStream out = context.openFileOutput(file, Context.MODE_WORLD_WRITEABLE);
    211             out.write(captureArray.toString().getBytes());
    212             out.close();
    213         } catch (FileNotFoundException e) {
    214             // This is debug code; exceptions aren't interesting.
    215         } catch (IOException e) {
    216             // This is debug code; exceptions aren't interesting.
    217         }
    218     }
    219 
    220     /**
    221      * Return the value of the current tag, as a byte array.  Note that the result of this call
    222      * is indeterminate, and possibly null, if the value of the tag is not a byte array
    223      *
    224      * @return the byte array value of the current tag
    225      * @throws IOException
    226      */
    227     public byte[] getValueBytes() throws IOException {
    228         getValue();
    229         return bytes;
    230     }
    231 
    232     /**
    233      * Return the value of the current tag, as a String.  Note that the result of this call is
    234      * indeterminate, and possibly null, if the value of the tag is not an immediate string
    235      *
    236      * @return the String value of the current tag
    237      * @throws IOException
    238      */
    239     public String getValue() throws IOException {
    240         // The false argument tells getNext to return the value as a String
    241         getNext(false);
    242         // This means there was no value given, just <Foo/>; we'll return empty string for now
    243         if (type == END) {
    244             if (logging) {
    245                 log("No value for tag: " + tagTable[startTag - TAG_BASE]);
    246             }
    247             return "";
    248         }
    249         // Save the value
    250         String val = text;
    251         // Read the next token; it had better be the end of the current tag
    252         getNext(false);
    253         // If not, throw an exception
    254         if (type != END) {
    255             throw new IOException("No END found!");
    256         }
    257         return val;
    258     }
    259 
    260     /**
    261      * Return the value of the current tag, as an integer.  Note that the value of this call is
    262      * indeterminate if the value of this tag is not an immediate string parsed as an integer
    263      *
    264      * @return the integer value of the current tag
    265      * @throws IOException
    266      */
    267    public int getValueInt() throws IOException {
    268         // The true argument to getNext indicates the desire for an integer return value
    269         getNext(true);
    270         if (type == END) {
    271             return 0;
    272         }
    273         // Save the value
    274         int val = num;
    275         // Read the next token; it had better be the end of the current tag
    276         getNext(false);
    277         // If not, throw an exception
    278         if (type != END) {
    279             throw new IOException("No END found!");
    280         }
    281         return val;
    282     }
    283 
    284     /**
    285      * Return the next tag found in the stream; special tags END and END_DOCUMENT are used to
    286      * mark the end of the current tag and end of document.  If we hit end of document without
    287      * looking for it, generate an EodException.  The tag returned consists of the page number
    288      * shifted PAGE_SHIFT bits OR'd with the tag retrieved from the stream.  Thus, all tags returned
    289      * are unique.
    290      *
    291      * @param endingTag the tag that would represent the end of the tag we're processing
    292      * @return the next tag found
    293      * @throws IOException
    294      */
    295     public int nextTag(int endingTag) throws IOException {
    296         // Lose the page information
    297         endTag = endingTag &= Tags.PAGE_MASK;
    298         while (getNext(false) != DONE) {
    299             // If we're a start, set tag to include the page and return it
    300             if (type == START) {
    301                 tag = page | startTag;
    302                 return tag;
    303             // If we're at the ending tag we're looking for, return the END signal
    304             } else if (type == END && startTag == endTag) {
    305                 return END;
    306             }
    307         }
    308         // We're at end of document here.  If we're looking for it, return END_DOCUMENT
    309         if (endTag == START_DOCUMENT) {
    310             return END_DOCUMENT;
    311         }
    312         // Otherwise, we've prematurely hit end of document, so exception out
    313         // EodException is a subclass of IOException; this will be treated as an IO error by
    314         // ExchangeService
    315         throw new EodException();
    316     }
    317 
    318     /**
    319      * Skip anything found in the stream until the end of the current tag is reached.  This can be
    320      * used to ignore stretches of xml that aren't needed by the parser.
    321      *
    322      * @throws IOException
    323      */
    324     public void skipTag() throws IOException {
    325         int thisTag = startTag;
    326         // Just loop until we hit the end of the current tag
    327         while (getNext(false) != DONE) {
    328             if (type == END && startTag == thisTag) {
    329                 return;
    330             }
    331         }
    332 
    333         // If we're at end of document, that's bad
    334         throw new EofException();
    335     }
    336 
    337     /**
    338      * Retrieve the next token from the input stream
    339      *
    340      * @return the token found
    341      * @throws IOException
    342      */
    343     public int nextToken() throws IOException {
    344         getNext(false);
    345         return type;
    346     }
    347 
    348     /**
    349      * Initializes the parser with an input stream; reads the first 4 bytes (which are always the
    350      * same in EAS, and then sets the tag table to point to page 0 (by definition, the starting
    351      * page).
    352      *
    353      * @param in the InputStream associated with this parser
    354      * @throws IOException
    355      */
    356     public void setInput(InputStream in, boolean initialize) throws IOException {
    357         this.in = in;
    358         if ((in != null) && initialize) {
    359             // If we fail on the very first byte, report an empty stream
    360             try {
    361                 readByte(); // version
    362             } catch (EofException e) {
    363                 throw new EmptyStreamException();
    364             }
    365             readInt();  // ?
    366             readInt();  // 106 (UTF-8)
    367             readInt();  // string table length
    368         }
    369         tagTable = tagTables[0];
    370     }
    371 
    372     @VisibleForTesting
    373     void resetInput(InputStream in) {
    374         this.in = in;
    375         try {
    376             // Read leading zero
    377             read();
    378         } catch (IOException e) {
    379         }
    380     }
    381 
    382     void log(String str) {
    383         int cr = str.indexOf('\n');
    384         if (cr > 0) {
    385             str = str.substring(0, cr);
    386         }
    387         LogUtils.v(LOG_TAG, str);
    388         if (Eas.FILE_LOG) {
    389             FileLogger.log(LOG_TAG, str);
    390         }
    391     }
    392 
    393     protected void pushTag(int id) {
    394         page = id >> Tags.PAGE_SHIFT;
    395         tagTable = tagTables[page];
    396         push(id);
    397     }
    398 
    399     private void pop() {
    400         if (logging) {
    401             name = nameArray[depth];
    402             log("</" + name + '>');
    403         }
    404         // Retrieve the now-current startTag from our stack
    405         startTag = endTag = startTagArray[depth];
    406         depth--;
    407     }
    408 
    409     private void push(int id) {
    410         // The tag is in the low 6 bits
    411         startTag = id & 0x3F;
    412         // If the high bit is set, there is content (a value) to be read
    413         noContent = (id & 0x40) == 0;
    414         depth++;
    415         if (logging) {
    416             name = tagTable[startTag - TAG_BASE];
    417             nameArray[depth] = name;
    418             log("<" + name + (noContent ? '/' : "") + '>');
    419         }
    420         // Save the startTag to our stack
    421         startTagArray[depth] = startTag;
    422     }
    423 
    424     /**
    425      * Return the next piece of data from the stream.  The return value indicates the type of data
    426      * that has been retrieved - START (start of tag), END (end of tag), DONE (end of stream), or
    427      * TEXT (the value of a tag)
    428      *
    429      * @param asInt whether a TEXT value should be parsed as a String or an int.
    430      * @return the type of data retrieved
    431      * @throws IOException
    432      */
    433     private final int getNext(boolean asInt) throws IOException {
    434         if (noContent) {
    435             nameArray[depth--] = null;
    436             type = END;
    437             noContent = false;
    438             return type;
    439         }
    440 
    441         text = null;
    442         name = null;
    443 
    444         int id = nextId ();
    445         while (id == Wbxml.SWITCH_PAGE) {
    446             nextId = NOT_FETCHED;
    447             // Get the new page number
    448             int pg = readByte();
    449             // Save the shifted page to add into the startTag in nextTag
    450             page = pg << Tags.PAGE_SHIFT;
    451             if (LOG_VERBOSE) {
    452                 log("Page: " + page);
    453             }
    454             // Retrieve the current tag table
    455             tagTable = tagTables[pg];
    456             id = nextId();
    457         }
    458         nextId = NOT_FETCHED;
    459 
    460         switch (id) {
    461             case EOF_BYTE:
    462                 // End of document
    463                 type = DONE;
    464                 break;
    465 
    466             case Wbxml.END:
    467                 type = END;
    468                 pop();
    469                 break;
    470 
    471             case Wbxml.STR_I:
    472                 // Inline string
    473                 type = TEXT;
    474                 if (asInt) {
    475                     num = readInlineInt();
    476                 } else {
    477                     text = readInlineString();
    478                 }
    479                 if (logging) {
    480                     name = tagTable[startTag - TAG_BASE];
    481                     log(name + ": " + (asInt ? Integer.toString(num) : text));
    482                 }
    483                 break;
    484 
    485             case Wbxml.OPAQUE:
    486                 // Integer length + opaque data
    487                 int length = readInt();
    488                 bytes = new byte[length];
    489                 for (int i = 0; i < length; i++) {
    490                     bytes[i] = (byte)readByte();
    491                 }
    492                 if (logging) {
    493                     name = tagTable[startTag - TAG_BASE];
    494                     log(name + ": (opaque:" + length + ") ");
    495                 }
    496                 break;
    497 
    498             default:
    499                 type = START;
    500                 push(id);
    501         }
    502 
    503         // Return the type of data we're dealing with
    504         return type;
    505     }
    506 
    507     /**
    508      * Read an int from the input stream, and capture it if necessary for debugging.  Seems a small
    509      * price to pay...
    510      *
    511      * @return the int read
    512      * @throws IOException
    513      */
    514     private int read() throws IOException {
    515         int i;
    516         i = in.read();
    517         if (capture) {
    518             captureArray.add(i);
    519         }
    520         if (LOG_VERBOSE) {
    521             log("Byte: " + i);
    522         }
    523         return i;
    524     }
    525 
    526     private int nextId() throws IOException {
    527         if (nextId == NOT_FETCHED) {
    528             nextId = read();
    529         }
    530         return nextId;
    531     }
    532 
    533     private int readByte() throws IOException {
    534         int i = read();
    535         if (i == EOF_BYTE) {
    536             throw new EofException();
    537         }
    538         return i;
    539     }
    540 
    541     /**
    542      * Read an integer from the stream; this is called when the parser knows that what follows is
    543      * an inline string representing an integer (e.g. the Read tag in Email has a value known to
    544      * be either "0" or "1")
    545      *
    546      * @return the integer as parsed from the stream
    547      * @throws IOException
    548      */
    549     private int readInlineInt() throws IOException {
    550         int result = 0;
    551 
    552         while (true) {
    553             int i = readByte();
    554             // Inline strings are always terminated with a zero byte
    555             if (i == 0) {
    556                 return result;
    557             }
    558             if (i >= '0' && i <= '9') {
    559                 result = (result * 10) + (i - '0');
    560             } else {
    561                 throw new IOException("Non integer");
    562             }
    563         }
    564     }
    565 
    566     private int readInt() throws IOException {
    567         int result = 0;
    568         int i;
    569 
    570         do {
    571             i = readByte();
    572             result = (result << 7) | (i & 0x7f);
    573         } while ((i & 0x80) != 0);
    574 
    575         return result;
    576     }
    577 
    578     /**
    579      * Read an inline string from the stream
    580      *
    581      * @return the String as parsed from the stream
    582      * @throws IOException
    583      */
    584     private String readInlineString() throws IOException {
    585         ByteArrayOutputStream outputStream = new ByteArrayOutputStream(256);
    586         while (true) {
    587             int i = read();
    588             if (i == 0) {
    589                 break;
    590             } else if (i == EOF_BYTE) {
    591                 throw new EofException();
    592             }
    593             outputStream.write(i);
    594         }
    595         outputStream.flush();
    596         String res = outputStream.toString("UTF-8");
    597         outputStream.close();
    598         return res;
    599     }
    600 }
    601