Home | History | Annotate | Download | only in adapter
      1 /*
      2  * Copyright (C) 2008-2009 Marc Blank
      3  * Licensed to The Android Open Source Project.
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at
      8  *
      9  *      http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  */
     17 
     18 package com.android.exchange.adapter;
     19 
     20 import android.content.Context;
     21 import android.util.Log;
     22 
     23 import com.android.exchange.Eas;
     24 import com.android.exchange.EasException;
     25 import com.android.exchange.utility.FileLogger;
     26 import com.google.common.annotations.VisibleForTesting;
     27 
     28 import java.io.ByteArrayOutputStream;
     29 import java.io.FileNotFoundException;
     30 import java.io.FileOutputStream;
     31 import java.io.IOException;
     32 import java.io.InputStream;
     33 import java.util.ArrayList;
     34 
     35 /**
     36  * Extremely fast and lightweight WBXML parser, implementing only the subset of WBXML that
     37  * EAS uses (as defined in the EAS specification)
     38  *
     39  */
     40 public abstract class Parser {
     41     private static final boolean LOG_VERBOSE = false;
     42 
     43     // The following constants are Wbxml standard
     44     public static final int START_DOCUMENT = 0;
     45     public static final int DONE = 1;
     46     public static final int START = 2;
     47     public static final int END = 3;
     48     public static final int TEXT = 4;
     49     public static final int END_DOCUMENT = 3;
     50     private static final int NOT_FETCHED = Integer.MIN_VALUE;
     51     private static final int NOT_ENDED = Integer.MIN_VALUE;
     52     private static final int EOF_BYTE = -1;
     53     private boolean logging = false;
     54     private boolean capture = false;
     55     private String logTag = "EAS Parser";
     56 
     57     // Where tags start in a page
     58     private static final int TAG_BASE = 5;
     59 
     60     private ArrayList<Integer> captureArray;
     61 
     62     // The input stream for this parser
     63     private InputStream in;
     64 
     65     // The current tag depth
     66     private int depth;
     67 
     68     // The upcoming (saved) id from the stream
     69     private int nextId = NOT_FETCHED;
     70 
     71     // The current tag table (i.e. the tag table for the current page)
     72     private String[] tagTable;
     73 
     74     // An array of tag tables, as defined in EasTags
     75     static private String[][] tagTables = new String[Tags.pages.length + 1][];
     76 
     77     // The stack of names of tags being processed; used when debug = true
     78     private String[] nameArray = new String[32];
     79 
     80     // The stack of tags being processed
     81     private int[] startTagArray = new int[32];
     82 
     83     // The following vars are available to all to avoid method calls that represent the state of
     84     // the parser at any given time
     85     public int endTag = NOT_ENDED;
     86 
     87     public int startTag;
     88 
     89     // The type of the last token read
     90     public int type;
     91 
     92     // The current page
     93     public int page;
     94 
     95     // The current tag
     96     public int tag;
     97 
     98     // The name of the current tag
     99     public String name;
    100 
    101     // Whether the current tag is associated with content (a value)
    102     private boolean noContent;
    103 
    104     // The value read, as a String.  Only one of text or num will be valid, depending on whether the
    105     // value was requested as a String or an int (to avoid wasted effort in parsing)
    106     public String text;
    107 
    108     // The value read, as an int
    109     public int num;
    110 
    111     // The value read, as bytes
    112     public byte[] bytes;
    113 
    114     /**
    115      * Generated when the parser comes to EOF prematurely during parsing (i.e. in error)
    116      */
    117     public class EofException extends IOException {
    118         private static final long serialVersionUID = 1L;
    119     }
    120 
    121     /**
    122      * An EmptyStreamException is an EofException that occurs reading the first byte in the parser's
    123      * input stream; in other words, the stream had no content.
    124      */
    125     public class EmptyStreamException extends EofException {
    126         private static final long serialVersionUID = 1L;
    127     }
    128 
    129     public class EodException extends IOException {
    130         private static final long serialVersionUID = 1L;
    131     }
    132 
    133     public class EasParserException extends IOException {
    134         private static final long serialVersionUID = 1L;
    135 
    136         EasParserException() {
    137             super("WBXML format error");
    138         }
    139 
    140         EasParserException(String reason) {
    141             super(reason);
    142         }
    143     }
    144 
    145     public boolean parse() throws IOException, EasException {
    146         return false;
    147     }
    148 
    149     /**
    150      * Initialize the tag tables; they are constant
    151      *
    152      */
    153     {
    154         String[][] pages = Tags.pages;
    155         for (int i = 0; i < pages.length; i++) {
    156             String[] page = pages[i];
    157             if (page.length > 0) {
    158                 tagTables[i] = page;
    159             }
    160         }
    161     }
    162 
    163     public Parser(InputStream in) throws IOException {
    164         setInput(in, true);
    165         logging = Eas.PARSER_LOG;
    166     }
    167 
    168     /**
    169      * Constructor for use when switching parsers within a input stream
    170      * @param parser an existing, initialized parser
    171      * @throws IOException
    172      */
    173     public Parser(Parser parser) throws IOException {
    174         setInput(parser.in, false);
    175         logging = Eas.PARSER_LOG;
    176     }
    177 
    178     /**
    179      * Set the debug state of the parser.  When debugging is on, every token is logged (Log.v) to
    180      * the console.
    181      *
    182      * @param val the desired state for debug output
    183      */
    184     public void setDebug(boolean val) {
    185         logging = val;
    186     }
    187 
    188     protected InputStream getInput() {
    189         return in;
    190     }
    191 
    192     /**
    193      * Set the tag used for logging.  When debugging is on, every token is logged (Log.v) to
    194      * the console.
    195      *
    196      * @param val the logging tag
    197      */
    198     public void setLoggingTag(String val) {
    199         logTag = val;
    200     }
    201 
    202     /**
    203      * Turns on data capture; this is used to create test streams that represent "live" data and
    204      * can be used against the various parsers.
    205      */
    206     public void captureOn() {
    207         capture = true;
    208         captureArray = new ArrayList<Integer>();
    209     }
    210 
    211     /**
    212      * Turns off data capture; writes the captured data to a specified file.
    213      */
    214     public void captureOff(Context context, String file) {
    215         try {
    216             FileOutputStream out = context.openFileOutput(file, Context.MODE_WORLD_WRITEABLE);
    217             out.write(captureArray.toString().getBytes());
    218             out.close();
    219         } catch (FileNotFoundException e) {
    220             // This is debug code; exceptions aren't interesting.
    221         } catch (IOException e) {
    222             // This is debug code; exceptions aren't interesting.
    223         }
    224     }
    225 
    226     /**
    227      * Return the value of the current tag, as a byte array.  Note that the result of this call
    228      * is indeterminate, and possibly null, if the value of the tag is not a byte array
    229      *
    230      * @return the byte array value of the current tag
    231      * @throws IOException
    232      */
    233     public byte[] getValueBytes() throws IOException {
    234         getValue();
    235         return bytes;
    236     }
    237 
    238     /**
    239      * Return the value of the current tag, as a String.  Note that the result of this call is
    240      * indeterminate, and possibly null, if the value of the tag is not an immediate string
    241      *
    242      * @return the String value of the current tag
    243      * @throws IOException
    244      */
    245     public String getValue() throws IOException {
    246         // The false argument tells getNext to return the value as a String
    247         getNext(false);
    248         // This means there was no value given, just <Foo/>; we'll return empty string for now
    249         if (type == END) {
    250             if (logging) {
    251                 log("No value for tag: " + tagTable[startTag - TAG_BASE]);
    252             }
    253             return "";
    254         }
    255         // Save the value
    256         String val = text;
    257         // Read the next token; it had better be the end of the current tag
    258         getNext(false);
    259         // If not, throw an exception
    260         if (type != END) {
    261             throw new IOException("No END found!");
    262         }
    263         return val;
    264     }
    265 
    266     /**
    267      * Return the value of the current tag, as an integer.  Note that the value of this call is
    268      * indeterminate if the value of this tag is not an immediate string parsed as an integer
    269      *
    270      * @return the integer value of the current tag
    271      * @throws IOException
    272      */
    273    public int getValueInt() throws IOException {
    274         // The true argument to getNext indicates the desire for an integer return value
    275         getNext(true);
    276         if (type == END) {
    277             return 0;
    278         }
    279         // Save the value
    280         int val = num;
    281         // Read the next token; it had better be the end of the current tag
    282         getNext(false);
    283         // If not, throw an exception
    284         if (type != END) {
    285             throw new IOException("No END found!");
    286         }
    287         return val;
    288     }
    289 
    290     /**
    291      * Return the next tag found in the stream; special tags END and END_DOCUMENT are used to
    292      * mark the end of the current tag and end of document.  If we hit end of document without
    293      * looking for it, generate an EodException.  The tag returned consists of the page number
    294      * shifted PAGE_SHIFT bits OR'd with the tag retrieved from the stream.  Thus, all tags returned
    295      * are unique.
    296      *
    297      * @param endingTag the tag that would represent the end of the tag we're processing
    298      * @return the next tag found
    299      * @throws IOException
    300      */
    301     public int nextTag(int endingTag) throws IOException {
    302         // Lose the page information
    303         endTag = endingTag &= Tags.PAGE_MASK;
    304         while (getNext(false) != DONE) {
    305             // If we're a start, set tag to include the page and return it
    306             if (type == START) {
    307                 tag = page | startTag;
    308                 return tag;
    309             // If we're at the ending tag we're looking for, return the END signal
    310             } else if (type == END && startTag == endTag) {
    311                 return END;
    312             }
    313         }
    314         // We're at end of document here.  If we're looking for it, return END_DOCUMENT
    315         if (endTag == START_DOCUMENT) {
    316             return END_DOCUMENT;
    317         }
    318         // Otherwise, we've prematurely hit end of document, so exception out
    319         // EodException is a subclass of IOException; this will be treated as an IO error by
    320         // ExchangeService
    321         throw new EodException();
    322     }
    323 
    324     /**
    325      * Skip anything found in the stream until the end of the current tag is reached.  This can be
    326      * used to ignore stretches of xml that aren't needed by the parser.
    327      *
    328      * @throws IOException
    329      */
    330     public void skipTag() throws IOException {
    331         int thisTag = startTag;
    332         // Just loop until we hit the end of the current tag
    333         while (getNext(false) != DONE) {
    334             if (type == END && startTag == thisTag) {
    335                 return;
    336             }
    337         }
    338 
    339         // If we're at end of document, that's bad
    340         throw new EofException();
    341     }
    342 
    343     /**
    344      * Retrieve the next token from the input stream
    345      *
    346      * @return the token found
    347      * @throws IOException
    348      */
    349     public int nextToken() throws IOException {
    350         getNext(false);
    351         return type;
    352     }
    353 
    354     /**
    355      * Initializes the parser with an input stream; reads the first 4 bytes (which are always the
    356      * same in EAS, and then sets the tag table to point to page 0 (by definition, the starting
    357      * page).
    358      *
    359      * @param in the InputStream associated with this parser
    360      * @throws IOException
    361      */
    362     public void setInput(InputStream in, boolean initialize) throws IOException {
    363         this.in = in;
    364         if (initialize) {
    365             // If we fail on the very first byte, report an empty stream
    366             try {
    367                 readByte(); // version
    368             } catch (EofException e) {
    369                 throw new EmptyStreamException();
    370             }
    371             readInt();  // ?
    372             readInt();  // 106 (UTF-8)
    373             readInt();  // string table length
    374         }
    375         tagTable = tagTables[0];
    376     }
    377 
    378     @VisibleForTesting
    379     void resetInput(InputStream in) {
    380         this.in = in;
    381         try {
    382             // Read leading zero
    383             read();
    384         } catch (IOException e) {
    385         }
    386     }
    387 
    388     void log(String str) {
    389         int cr = str.indexOf('\n');
    390         if (cr > 0) {
    391             str = str.substring(0, cr);
    392         }
    393         Log.v(logTag, str);
    394         if (Eas.FILE_LOG) {
    395             FileLogger.log(logTag, str);
    396         }
    397     }
    398 
    399     protected void pushTag(int id) {
    400         page = id >> Tags.PAGE_SHIFT;
    401         tagTable = tagTables[page];
    402         push(id);
    403     }
    404 
    405     private void pop() {
    406         if (logging) {
    407             name = nameArray[depth];
    408             log("</" + name + '>');
    409         }
    410         // Retrieve the now-current startTag from our stack
    411         startTag = endTag = startTagArray[depth];
    412         depth--;
    413     }
    414 
    415     private void push(int id) {
    416         // The tag is in the low 6 bits
    417         startTag = id & 0x3F;
    418         // If the high bit is set, there is content (a value) to be read
    419         noContent = (id & 0x40) == 0;
    420         depth++;
    421         if (logging) {
    422             name = tagTable[startTag - TAG_BASE];
    423             nameArray[depth] = name;
    424             log("<" + name + (noContent ? '/' : "") + '>');
    425         }
    426         // Save the startTag to our stack
    427         startTagArray[depth] = startTag;
    428     }
    429 
    430     /**
    431      * Return the next piece of data from the stream.  The return value indicates the type of data
    432      * that has been retrieved - START (start of tag), END (end of tag), DONE (end of stream), or
    433      * TEXT (the value of a tag)
    434      *
    435      * @param asInt whether a TEXT value should be parsed as a String or an int.
    436      * @return the type of data retrieved
    437      * @throws IOException
    438      */
    439     private final int getNext(boolean asInt) throws IOException {
    440         if (noContent) {
    441             nameArray[depth--] = null;
    442             type = END;
    443             noContent = false;
    444             return type;
    445         }
    446 
    447         text = null;
    448         name = null;
    449 
    450         int id = nextId ();
    451         while (id == Wbxml.SWITCH_PAGE) {
    452             nextId = NOT_FETCHED;
    453             // Get the new page number
    454             int pg = readByte();
    455             // Save the shifted page to add into the startTag in nextTag
    456             page = pg << Tags.PAGE_SHIFT;
    457             if (LOG_VERBOSE) {
    458                 log("Page: " + page);
    459             }
    460             // Retrieve the current tag table
    461             tagTable = tagTables[pg];
    462             id = nextId();
    463         }
    464         nextId = NOT_FETCHED;
    465 
    466         switch (id) {
    467             case EOF_BYTE:
    468                 // End of document
    469                 type = DONE;
    470                 break;
    471 
    472             case Wbxml.END:
    473                 type = END;
    474                 pop();
    475                 break;
    476 
    477             case Wbxml.STR_I:
    478                 // Inline string
    479                 type = TEXT;
    480                 if (asInt) {
    481                     num = readInlineInt();
    482                 } else {
    483                     text = readInlineString();
    484                 }
    485                 if (logging) {
    486                     name = tagTable[startTag - TAG_BASE];
    487                     log(name + ": " + (asInt ? Integer.toString(num) : text));
    488                 }
    489                 break;
    490 
    491             case Wbxml.OPAQUE:
    492                 // Integer length + opaque data
    493                 int length = readInt();
    494                 bytes = new byte[length];
    495                 for (int i = 0; i < length; i++) {
    496                     bytes[i] = (byte)readByte();
    497                 }
    498                 if (logging) {
    499                     name = tagTable[startTag - TAG_BASE];
    500                     log(name + ": (opaque:" + length + ") ");
    501                 }
    502                 break;
    503 
    504             default:
    505                 type = START;
    506                 push(id);
    507         }
    508 
    509         // Return the type of data we're dealing with
    510         return type;
    511     }
    512 
    513     /**
    514      * Read an int from the input stream, and capture it if necessary for debugging.  Seems a small
    515      * price to pay...
    516      *
    517      * @return the int read
    518      * @throws IOException
    519      */
    520     private int read() throws IOException {
    521         int i;
    522         i = in.read();
    523         if (capture) {
    524             captureArray.add(i);
    525         }
    526         if (LOG_VERBOSE) {
    527             log("Byte: " + i);
    528         }
    529         return i;
    530     }
    531 
    532     private int nextId() throws IOException {
    533         if (nextId == NOT_FETCHED) {
    534             nextId = read();
    535         }
    536         return nextId;
    537     }
    538 
    539     private int readByte() throws IOException {
    540         int i = read();
    541         if (i == EOF_BYTE) {
    542             throw new EofException();
    543         }
    544         return i;
    545     }
    546 
    547     /**
    548      * Read an integer from the stream; this is called when the parser knows that what follows is
    549      * an inline string representing an integer (e.g. the Read tag in Email has a value known to
    550      * be either "0" or "1")
    551      *
    552      * @return the integer as parsed from the stream
    553      * @throws IOException
    554      */
    555     private int readInlineInt() throws IOException {
    556         int result = 0;
    557 
    558         while (true) {
    559             int i = readByte();
    560             // Inline strings are always terminated with a zero byte
    561             if (i == 0) {
    562                 return result;
    563             }
    564             if (i >= '0' && i <= '9') {
    565                 result = (result * 10) + (i - '0');
    566             } else {
    567                 throw new IOException("Non integer");
    568             }
    569         }
    570     }
    571 
    572     private int readInt() throws IOException {
    573         int result = 0;
    574         int i;
    575 
    576         do {
    577             i = readByte();
    578             result = (result << 7) | (i & 0x7f);
    579         } while ((i & 0x80) != 0);
    580 
    581         return result;
    582     }
    583 
    584     /**
    585      * Read an inline string from the stream
    586      *
    587      * @return the String as parsed from the stream
    588      * @throws IOException
    589      */
    590     private String readInlineString() throws IOException {
    591         ByteArrayOutputStream outputStream = new ByteArrayOutputStream(256);
    592         while (true) {
    593             int i = read();
    594             if (i == 0) {
    595                 break;
    596             } else if (i == EOF_BYTE) {
    597                 throw new EofException();
    598             }
    599             outputStream.write(i);
    600         }
    601         outputStream.flush();
    602         String res = outputStream.toString("UTF-8");
    603         outputStream.close();
    604         return res;
    605     }
    606 }
    607