1 /* 2 * Copyright (C) 2008-2009 Marc Blank 3 * Licensed to The Android Open Source Project. 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18 package com.android.exchange.adapter; 19 20 import android.content.Context; 21 22 import com.android.exchange.Eas; 23 import com.android.exchange.EasException; 24 import com.android.exchange.utility.FileLogger; 25 import com.android.mail.utils.LogUtils; 26 import com.google.common.annotations.VisibleForTesting; 27 28 import java.io.ByteArrayOutputStream; 29 import java.io.FileNotFoundException; 30 import java.io.FileOutputStream; 31 import java.io.IOException; 32 import java.io.InputStream; 33 import java.util.ArrayList; 34 35 /** 36 * Extremely fast and lightweight WBXML parser, implementing only the subset of WBXML that 37 * EAS uses (as defined in the EAS specification) 38 * 39 */ 40 public abstract class Parser { 41 private static final boolean LOG_VERBOSE = false; 42 43 private static final String LOG_TAG = Eas.LOG_TAG; 44 45 // The following constants are Wbxml standard 46 public static final int START_DOCUMENT = 0; 47 public static final int DONE = 1; 48 public static final int START = 2; 49 public static final int END = 3; 50 public static final int TEXT = 4; 51 public static final int END_DOCUMENT = 3; 52 private static final int NOT_FETCHED = Integer.MIN_VALUE; 53 private static final int NOT_ENDED = Integer.MIN_VALUE; 54 private static final int EOF_BYTE = -1; 55 56 // Where tags start in a page 57 private static final int TAG_BASE = 5; 58 59 private boolean logging = false; 60 private boolean capture = false; 61 62 private ArrayList<Integer> captureArray; 63 64 // The input stream for this parser 65 private InputStream in; 66 67 // The current tag depth 68 private int depth; 69 70 // The upcoming (saved) id from the stream 71 private int nextId = NOT_FETCHED; 72 73 // The current tag table (i.e. the tag table for the current page) 74 private String[] tagTable; 75 76 // An array of tag tables, as defined in EasTags 77 static private String[][] tagTables = new String[Tags.pages.length + 1][]; 78 79 // The stack of names of tags being processed; used when debug = true 80 private String[] nameArray = new String[32]; 81 82 // The stack of tags being processed 83 private int[] startTagArray = new int[32]; 84 85 // The following vars are available to all to avoid method calls that represent the state of 86 // the parser at any given time 87 public int endTag = NOT_ENDED; 88 89 public int startTag; 90 91 // The type of the last token read 92 public int type; 93 94 // The current page 95 public int page; 96 97 // The current tag 98 public int tag; 99 100 // The name of the current tag 101 public String name; 102 103 // Whether the current tag is associated with content (a value) 104 public boolean noContent; 105 106 // The value read, as a String. Only one of text or num will be valid, depending on whether the 107 // value was requested as a String or an int (to avoid wasted effort in parsing) 108 public String text; 109 110 // The value read, as an int 111 public int num; 112 113 // The value read, as bytes 114 public byte[] bytes; 115 116 // TODO: Define a new parse exception type rather than lumping these in as IOExceptions. 117 118 /** 119 * Generated when the parser comes to EOF prematurely during parsing (i.e. in error) 120 */ 121 public class EofException extends IOException { 122 private static final long serialVersionUID = 1L; 123 } 124 125 /** 126 * An EmptyStreamException is an EofException that occurs reading the first byte in the parser's 127 * input stream; in other words, the stream had no content. 128 */ 129 public class EmptyStreamException extends EofException { 130 private static final long serialVersionUID = 1L; 131 } 132 133 public class EodException extends IOException { 134 private static final long serialVersionUID = 1L; 135 } 136 137 public class EasParserException extends IOException { 138 private static final long serialVersionUID = 1L; 139 140 EasParserException() { 141 super("WBXML format error"); 142 } 143 144 EasParserException(String reason) { 145 super(reason); 146 } 147 } 148 149 public boolean parse() throws IOException, EasException { 150 return false; 151 } 152 153 /** 154 * Initialize the tag tables; they are constant 155 * 156 */ 157 { 158 String[][] pages = Tags.pages; 159 for (int i = 0; i < pages.length; i++) { 160 String[] page = pages[i]; 161 if (page.length > 0) { 162 tagTables[i] = page; 163 } 164 } 165 } 166 167 public Parser(InputStream in) throws IOException { 168 setInput(in, true); 169 logging = Eas.PARSER_LOG; 170 } 171 172 /** 173 * Constructor for use when switching parsers within a input stream 174 * @param parser an existing, initialized parser 175 * @throws IOException 176 */ 177 public Parser(Parser parser) throws IOException { 178 setInput(parser.in, false); 179 logging = Eas.PARSER_LOG; 180 } 181 182 /** 183 * Set the debug state of the parser. When debugging is on, every token is logged (LogUtils.v) 184 * to the console. 185 * 186 * @param val the desired state for debug output 187 */ 188 public void setDebug(boolean val) { 189 logging = val; 190 } 191 192 protected InputStream getInput() { 193 return in; 194 } 195 196 /** 197 * Turns on data capture; this is used to create test streams that represent "live" data and 198 * can be used against the various parsers. 199 */ 200 public void captureOn() { 201 capture = true; 202 captureArray = new ArrayList<Integer>(); 203 } 204 205 /** 206 * Turns off data capture; writes the captured data to a specified file. 207 */ 208 public void captureOff(Context context, String file) { 209 try { 210 FileOutputStream out = context.openFileOutput(file, Context.MODE_WORLD_WRITEABLE); 211 out.write(captureArray.toString().getBytes()); 212 out.close(); 213 } catch (FileNotFoundException e) { 214 // This is debug code; exceptions aren't interesting. 215 } catch (IOException e) { 216 // This is debug code; exceptions aren't interesting. 217 } 218 } 219 220 /** 221 * Return the value of the current tag, as a byte array. Note that the result of this call 222 * is indeterminate, and possibly null, if the value of the tag is not a byte array 223 * 224 * @return the byte array value of the current tag 225 * @throws IOException 226 */ 227 public byte[] getValueBytes() throws IOException { 228 getValue(); 229 return bytes; 230 } 231 232 /** 233 * Return the value of the current tag, as a String. Note that the result of this call is 234 * indeterminate, and possibly null, if the value of the tag is not an immediate string 235 * 236 * @return the String value of the current tag 237 * @throws IOException 238 */ 239 public String getValue() throws IOException { 240 // The false argument tells getNext to return the value as a String 241 getNext(false); 242 // This means there was no value given, just <Foo/>; we'll return empty string for now 243 if (type == END) { 244 if (logging) { 245 log("No value for tag: " + tagTable[startTag - TAG_BASE]); 246 } 247 return ""; 248 } 249 // Save the value 250 String val = text; 251 // Read the next token; it had better be the end of the current tag 252 getNext(false); 253 // If not, throw an exception 254 if (type != END) { 255 throw new IOException("No END found!"); 256 } 257 return val; 258 } 259 260 /** 261 * Return the value of the current tag, as an integer. Note that the value of this call is 262 * indeterminate if the value of this tag is not an immediate string parsed as an integer 263 * 264 * @return the integer value of the current tag 265 * @throws IOException 266 */ 267 public int getValueInt() throws IOException { 268 // The true argument to getNext indicates the desire for an integer return value 269 getNext(true); 270 if (type == END) { 271 return 0; 272 } 273 // Save the value 274 int val = num; 275 // Read the next token; it had better be the end of the current tag 276 getNext(false); 277 // If not, throw an exception 278 if (type != END) { 279 throw new IOException("No END found!"); 280 } 281 return val; 282 } 283 284 /** 285 * Return the next tag found in the stream; special tags END and END_DOCUMENT are used to 286 * mark the end of the current tag and end of document. If we hit end of document without 287 * looking for it, generate an EodException. The tag returned consists of the page number 288 * shifted PAGE_SHIFT bits OR'd with the tag retrieved from the stream. Thus, all tags returned 289 * are unique. 290 * 291 * @param endingTag the tag that would represent the end of the tag we're processing 292 * @return the next tag found 293 * @throws IOException 294 */ 295 public int nextTag(int endingTag) throws IOException { 296 // Lose the page information 297 endTag = endingTag &= Tags.PAGE_MASK; 298 while (getNext(false) != DONE) { 299 // If we're a start, set tag to include the page and return it 300 if (type == START) { 301 tag = page | startTag; 302 return tag; 303 // If we're at the ending tag we're looking for, return the END signal 304 } else if (type == END && startTag == endTag) { 305 return END; 306 } 307 } 308 // We're at end of document here. If we're looking for it, return END_DOCUMENT 309 if (endTag == START_DOCUMENT) { 310 return END_DOCUMENT; 311 } 312 // Otherwise, we've prematurely hit end of document, so exception out 313 // EodException is a subclass of IOException; this will be treated as an IO error by 314 // ExchangeService 315 throw new EodException(); 316 } 317 318 /** 319 * Skip anything found in the stream until the end of the current tag is reached. This can be 320 * used to ignore stretches of xml that aren't needed by the parser. 321 * 322 * @throws IOException 323 */ 324 public void skipTag() throws IOException { 325 int thisTag = startTag; 326 // Just loop until we hit the end of the current tag 327 while (getNext(false) != DONE) { 328 if (type == END && startTag == thisTag) { 329 return; 330 } 331 } 332 333 // If we're at end of document, that's bad 334 throw new EofException(); 335 } 336 337 /** 338 * Retrieve the next token from the input stream 339 * 340 * @return the token found 341 * @throws IOException 342 */ 343 public int nextToken() throws IOException { 344 getNext(false); 345 return type; 346 } 347 348 /** 349 * Initializes the parser with an input stream; reads the first 4 bytes (which are always the 350 * same in EAS, and then sets the tag table to point to page 0 (by definition, the starting 351 * page). 352 * 353 * @param in the InputStream associated with this parser 354 * @throws IOException 355 */ 356 public void setInput(InputStream in, boolean initialize) throws IOException { 357 this.in = in; 358 if ((in != null) && initialize) { 359 // If we fail on the very first byte, report an empty stream 360 try { 361 readByte(); // version 362 } catch (EofException e) { 363 throw new EmptyStreamException(); 364 } 365 readInt(); // ? 366 readInt(); // 106 (UTF-8) 367 readInt(); // string table length 368 } 369 tagTable = tagTables[0]; 370 } 371 372 @VisibleForTesting 373 void resetInput(InputStream in) { 374 this.in = in; 375 try { 376 // Read leading zero 377 read(); 378 } catch (IOException e) { 379 } 380 } 381 382 void log(String str) { 383 int cr = str.indexOf('\n'); 384 if (cr > 0) { 385 str = str.substring(0, cr); 386 } 387 LogUtils.v(LOG_TAG, str); 388 if (Eas.FILE_LOG) { 389 FileLogger.log(LOG_TAG, str); 390 } 391 } 392 393 protected void pushTag(int id) { 394 page = id >> Tags.PAGE_SHIFT; 395 tagTable = tagTables[page]; 396 push(id); 397 } 398 399 private void pop() { 400 if (logging) { 401 name = nameArray[depth]; 402 log("</" + name + '>'); 403 } 404 // Retrieve the now-current startTag from our stack 405 startTag = endTag = startTagArray[depth]; 406 depth--; 407 } 408 409 private void push(int id) { 410 // The tag is in the low 6 bits 411 startTag = id & 0x3F; 412 // If the high bit is set, there is content (a value) to be read 413 noContent = (id & 0x40) == 0; 414 depth++; 415 if (logging) { 416 name = tagTable[startTag - TAG_BASE]; 417 nameArray[depth] = name; 418 log("<" + name + (noContent ? '/' : "") + '>'); 419 } 420 // Save the startTag to our stack 421 startTagArray[depth] = startTag; 422 } 423 424 /** 425 * Return the next piece of data from the stream. The return value indicates the type of data 426 * that has been retrieved - START (start of tag), END (end of tag), DONE (end of stream), or 427 * TEXT (the value of a tag) 428 * 429 * @param asInt whether a TEXT value should be parsed as a String or an int. 430 * @return the type of data retrieved 431 * @throws IOException 432 */ 433 private final int getNext(boolean asInt) throws IOException { 434 if (noContent) { 435 nameArray[depth--] = null; 436 type = END; 437 noContent = false; 438 return type; 439 } 440 441 text = null; 442 name = null; 443 444 int id = nextId (); 445 while (id == Wbxml.SWITCH_PAGE) { 446 nextId = NOT_FETCHED; 447 // Get the new page number 448 int pg = readByte(); 449 // Save the shifted page to add into the startTag in nextTag 450 page = pg << Tags.PAGE_SHIFT; 451 if (LOG_VERBOSE) { 452 log("Page: " + page); 453 } 454 // Retrieve the current tag table 455 tagTable = tagTables[pg]; 456 id = nextId(); 457 } 458 nextId = NOT_FETCHED; 459 460 switch (id) { 461 case EOF_BYTE: 462 // End of document 463 type = DONE; 464 break; 465 466 case Wbxml.END: 467 type = END; 468 pop(); 469 break; 470 471 case Wbxml.STR_I: 472 // Inline string 473 type = TEXT; 474 if (asInt) { 475 num = readInlineInt(); 476 } else { 477 text = readInlineString(); 478 } 479 if (logging) { 480 name = tagTable[startTag - TAG_BASE]; 481 log(name + ": " + (asInt ? Integer.toString(num) : text)); 482 } 483 break; 484 485 case Wbxml.OPAQUE: 486 // Integer length + opaque data 487 int length = readInt(); 488 bytes = new byte[length]; 489 for (int i = 0; i < length; i++) { 490 bytes[i] = (byte)readByte(); 491 } 492 if (logging) { 493 name = tagTable[startTag - TAG_BASE]; 494 log(name + ": (opaque:" + length + ") "); 495 } 496 break; 497 498 default: 499 type = START; 500 push(id); 501 } 502 503 // Return the type of data we're dealing with 504 return type; 505 } 506 507 /** 508 * Read an int from the input stream, and capture it if necessary for debugging. Seems a small 509 * price to pay... 510 * 511 * @return the int read 512 * @throws IOException 513 */ 514 private int read() throws IOException { 515 int i; 516 i = in.read(); 517 if (capture) { 518 captureArray.add(i); 519 } 520 if (LOG_VERBOSE) { 521 log("Byte: " + i); 522 } 523 return i; 524 } 525 526 private int nextId() throws IOException { 527 if (nextId == NOT_FETCHED) { 528 nextId = read(); 529 } 530 return nextId; 531 } 532 533 private int readByte() throws IOException { 534 int i = read(); 535 if (i == EOF_BYTE) { 536 throw new EofException(); 537 } 538 return i; 539 } 540 541 /** 542 * Read an integer from the stream; this is called when the parser knows that what follows is 543 * an inline string representing an integer (e.g. the Read tag in Email has a value known to 544 * be either "0" or "1") 545 * 546 * @return the integer as parsed from the stream 547 * @throws IOException 548 */ 549 private int readInlineInt() throws IOException { 550 int result = 0; 551 552 while (true) { 553 int i = readByte(); 554 // Inline strings are always terminated with a zero byte 555 if (i == 0) { 556 return result; 557 } 558 if (i >= '0' && i <= '9') { 559 result = (result * 10) + (i - '0'); 560 } else { 561 throw new IOException("Non integer"); 562 } 563 } 564 } 565 566 private int readInt() throws IOException { 567 int result = 0; 568 int i; 569 570 do { 571 i = readByte(); 572 result = (result << 7) | (i & 0x7f); 573 } while ((i & 0x80) != 0); 574 575 return result; 576 } 577 578 /** 579 * Read an inline string from the stream 580 * 581 * @return the String as parsed from the stream 582 * @throws IOException 583 */ 584 private String readInlineString() throws IOException { 585 ByteArrayOutputStream outputStream = new ByteArrayOutputStream(256); 586 while (true) { 587 int i = read(); 588 if (i == 0) { 589 break; 590 } else if (i == EOF_BYTE) { 591 throw new EofException(); 592 } 593 outputStream.write(i); 594 } 595 outputStream.flush(); 596 String res = outputStream.toString("UTF-8"); 597 outputStream.close(); 598 return res; 599 } 600 } 601