1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 package com.android.vcard; 17 18 import android.text.TextUtils; 19 import android.util.Base64; 20 import android.util.Log; 21 22 import com.android.vcard.exception.VCardAgentNotSupportedException; 23 import com.android.vcard.exception.VCardException; 24 import com.android.vcard.exception.VCardInvalidCommentLineException; 25 import com.android.vcard.exception.VCardInvalidLineException; 26 import com.android.vcard.exception.VCardVersionException; 27 28 import java.io.BufferedReader; 29 import java.io.IOException; 30 import java.io.InputStream; 31 import java.io.InputStreamReader; 32 import java.io.Reader; 33 import java.util.ArrayList; 34 import java.util.Collection; 35 import java.util.HashSet; 36 import java.util.List; 37 import java.util.Set; 38 39 /** 40 * <p> 41 * Basic implementation achieving vCard parsing. Based on vCard 2.1. 42 * </p> 43 * @hide 44 */ 45 /* package */ class VCardParserImpl_V21 { 46 private static final String LOG_TAG = VCardConstants.LOG_TAG; 47 48 protected static final class CustomBufferedReader extends BufferedReader { 49 private long mTime; 50 51 /** 52 * Needed since "next line" may be null due to end of line. 53 */ 54 private boolean mNextLineIsValid; 55 private String mNextLine; 56 57 public CustomBufferedReader(Reader in) { 58 super(in); 59 } 60 61 @Override 62 public String readLine() throws IOException { 63 if (mNextLineIsValid) { 64 final String ret = mNextLine; 65 mNextLine = null; 66 mNextLineIsValid = false; 67 return ret; 68 } 69 70 final long start = System.currentTimeMillis(); 71 final String line = super.readLine(); 72 final long end = System.currentTimeMillis(); 73 mTime += end - start; 74 return line; 75 } 76 77 /** 78 * Read one line, but make this object store it in its queue. 79 */ 80 public String peekLine() throws IOException { 81 if (!mNextLineIsValid) { 82 final long start = System.currentTimeMillis(); 83 final String line = super.readLine(); 84 final long end = System.currentTimeMillis(); 85 mTime += end - start; 86 87 mNextLine = line; 88 mNextLineIsValid = true; 89 } 90 91 return mNextLine; 92 } 93 94 public long getTotalmillisecond() { 95 return mTime; 96 } 97 } 98 99 private static final String DEFAULT_ENCODING = "8BIT"; 100 private static final String DEFAULT_CHARSET = "UTF-8"; 101 102 protected final String mIntermediateCharset; 103 104 private final List<VCardInterpreter> mInterpreterList = new ArrayList<VCardInterpreter>(); 105 private boolean mCanceled; 106 107 /** 108 * <p> 109 * The encoding type for deconding byte streams. This member variable is 110 * reset to a default encoding every time when a new item comes. 111 * </p> 112 * <p> 113 * "Encoding" in vCard is different from "Charset". It is mainly used for 114 * addresses, notes, images. "7BIT", "8BIT", "BASE64", and 115 * "QUOTED-PRINTABLE" are known examples. 116 * </p> 117 */ 118 protected String mCurrentEncoding; 119 120 protected String mCurrentCharset; 121 122 /** 123 * <p> 124 * The reader object to be used internally. 125 * </p> 126 * <p> 127 * Developers should not directly read a line from this object. Use 128 * getLine() unless there some reason. 129 * </p> 130 */ 131 protected CustomBufferedReader mReader; 132 133 /** 134 * <p> 135 * Set for storing unkonwn TYPE attributes, which is not acceptable in vCard 136 * specification, but happens to be seen in real world vCard. 137 * </p> 138 * <p> 139 * We just accept those invalid types after emitting a warning for each of it. 140 * </p> 141 */ 142 protected final Set<String> mUnknownTypeSet = new HashSet<String>(); 143 144 /** 145 * <p> 146 * Set for storing unkonwn VALUE attributes, which is not acceptable in 147 * vCard specification, but happens to be seen in real world vCard. 148 * </p> 149 * <p> 150 * We just accept those invalid types after emitting a warning for each of it. 151 * </p> 152 */ 153 protected final Set<String> mUnknownValueSet = new HashSet<String>(); 154 155 156 public VCardParserImpl_V21() { 157 this(VCardConfig.VCARD_TYPE_DEFAULT); 158 } 159 160 public VCardParserImpl_V21(int vcardType) { 161 mIntermediateCharset = VCardConfig.DEFAULT_INTERMEDIATE_CHARSET; 162 } 163 164 /** 165 * @return true when a given property name is a valid property name. 166 */ 167 protected boolean isValidPropertyName(final String propertyName) { 168 if (!(getKnownPropertyNameSet().contains(propertyName.toUpperCase()) || 169 propertyName.startsWith("X-")) 170 && !mUnknownTypeSet.contains(propertyName)) { 171 mUnknownTypeSet.add(propertyName); 172 Log.w(LOG_TAG, "Property name unsupported by vCard 2.1: " + propertyName); 173 } 174 return true; 175 } 176 177 /** 178 * @return String. It may be null, or its length may be 0 179 * @throws IOException 180 */ 181 protected String getLine() throws IOException { 182 return mReader.readLine(); 183 } 184 185 protected String peekLine() throws IOException { 186 return mReader.peekLine(); 187 } 188 189 /** 190 * @return String with it's length > 0 191 * @throws IOException 192 * @throws VCardException when the stream reached end of line 193 */ 194 protected String getNonEmptyLine() throws IOException, VCardException { 195 String line; 196 while (true) { 197 line = getLine(); 198 if (line == null) { 199 throw new VCardException("Reached end of buffer."); 200 } else if (line.trim().length() > 0) { 201 return line; 202 } 203 } 204 } 205 206 /** 207 * <code> 208 * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF 209 * items *CRLF 210 * "END" [ws] ":" [ws] "VCARD" 211 * </code> 212 * @return False when reaching end of file. 213 */ 214 private boolean parseOneVCard() throws IOException, VCardException { 215 // reset for this entire vCard. 216 mCurrentEncoding = DEFAULT_ENCODING; 217 mCurrentCharset = DEFAULT_CHARSET; 218 219 boolean allowGarbage = false; 220 if (!readBeginVCard(allowGarbage)) { 221 return false; 222 } 223 for (VCardInterpreter interpreter : mInterpreterList) { 224 interpreter.onEntryStarted(); 225 } 226 parseItems(); 227 for (VCardInterpreter interpreter : mInterpreterList) { 228 interpreter.onEntryEnded(); 229 } 230 return true; 231 } 232 233 /** 234 * @return True when successful. False when reaching the end of line 235 * @throws IOException 236 * @throws VCardException 237 */ 238 protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException { 239 // TODO: use consructPropertyLine(). 240 String line; 241 do { 242 while (true) { 243 line = getLine(); 244 if (line == null) { 245 return false; 246 } else if (line.trim().length() > 0) { 247 break; 248 } 249 } 250 final String[] strArray = line.split(":", 2); 251 final int length = strArray.length; 252 253 // Although vCard 2.1/3.0 specification does not allow lower cases, 254 // we found vCard file emitted by some external vCard expoter have such 255 // invalid Strings. 256 // e.g. BEGIN:vCard 257 if (length == 2 && strArray[0].trim().equalsIgnoreCase("BEGIN") 258 && strArray[1].trim().equalsIgnoreCase("VCARD")) { 259 return true; 260 } else if (!allowGarbage) { 261 throw new VCardException("Expected String \"BEGIN:VCARD\" did not come " 262 + "(Instead, \"" + line + "\" came)"); 263 } 264 } while (allowGarbage); 265 266 throw new VCardException("Reached where must not be reached."); 267 } 268 269 /** 270 * Parses lines other than the first "BEGIN:VCARD". Takes care of "END:VCARD"n and 271 * "BEGIN:VCARD" in nested vCard. 272 */ 273 /* 274 * items = *CRLF item / item 275 * 276 * Note: BEGIN/END aren't include in the original spec while this method handles them. 277 */ 278 protected void parseItems() throws IOException, VCardException { 279 boolean ended = false; 280 281 try { 282 ended = parseItem(); 283 } catch (VCardInvalidCommentLineException e) { 284 Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored."); 285 } 286 287 while (!ended) { 288 try { 289 ended = parseItem(); 290 } catch (VCardInvalidCommentLineException e) { 291 Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored."); 292 } 293 } 294 } 295 296 /* 297 * item = [groups "."] name [params] ":" value CRLF / [groups "."] "ADR" 298 * [params] ":" addressparts CRLF / [groups "."] "ORG" [params] ":" orgparts 299 * CRLF / [groups "."] "N" [params] ":" nameparts CRLF / [groups "."] 300 * "AGENT" [params] ":" vcard CRLF 301 */ 302 protected boolean parseItem() throws IOException, VCardException { 303 // Reset for an item. 304 mCurrentEncoding = DEFAULT_ENCODING; 305 306 final String line = getNonEmptyLine(); 307 final VCardProperty propertyData = constructPropertyData(line); 308 309 final String propertyNameUpper = propertyData.getName().toUpperCase(); 310 final String propertyRawValue = propertyData.getRawValue(); 311 312 if (propertyNameUpper.equals(VCardConstants.PROPERTY_BEGIN)) { 313 if (propertyRawValue.equalsIgnoreCase("VCARD")) { 314 handleNest(); 315 } else { 316 throw new VCardException("Unknown BEGIN type: " + propertyRawValue); 317 } 318 } else if (propertyNameUpper.equals(VCardConstants.PROPERTY_END)) { 319 if (propertyRawValue.equalsIgnoreCase("VCARD")) { 320 return true; // Ended. 321 } else { 322 throw new VCardException("Unknown END type: " + propertyRawValue); 323 } 324 } else { 325 parseItemInter(propertyData, propertyNameUpper); 326 } 327 return false; 328 } 329 330 private void parseItemInter(VCardProperty property, String propertyNameUpper) 331 throws IOException, VCardException { 332 String propertyRawValue = property.getRawValue(); 333 if (propertyNameUpper.equals(VCardConstants.PROPERTY_AGENT)) { 334 handleAgent(property); 335 } else if (isValidPropertyName(propertyNameUpper)) { 336 if (propertyNameUpper.equals(VCardConstants.PROPERTY_VERSION) && 337 !propertyRawValue.equals(getVersionString())) { 338 throw new VCardVersionException( 339 "Incompatible version: " + propertyRawValue + " != " + getVersionString()); 340 } 341 handlePropertyValue(property, propertyNameUpper); 342 } else { 343 throw new VCardException("Unknown property name: \"" + propertyNameUpper + "\""); 344 } 345 } 346 347 private void handleNest() throws IOException, VCardException { 348 for (VCardInterpreter interpreter : mInterpreterList) { 349 interpreter.onEntryStarted(); 350 } 351 parseItems(); 352 for (VCardInterpreter interpreter : mInterpreterList) { 353 interpreter.onEntryEnded(); 354 } 355 } 356 357 // For performance reason, the states for group and property name are merged into one. 358 static private final int STATE_GROUP_OR_PROPERTY_NAME = 0; 359 static private final int STATE_PARAMS = 1; 360 // vCard 3.0 specification allows double-quoted parameters, while vCard 2.1 does not. 361 static private final int STATE_PARAMS_IN_DQUOTE = 2; 362 363 protected VCardProperty constructPropertyData(String line) throws VCardException { 364 final VCardProperty propertyData = new VCardProperty(); 365 366 final int length = line.length(); 367 if (length > 0 && line.charAt(0) == '#') { 368 throw new VCardInvalidCommentLineException(); 369 } 370 371 int state = STATE_GROUP_OR_PROPERTY_NAME; 372 int nameIndex = 0; 373 374 // This loop is developed so that we don't have to take care of bottle neck here. 375 // Refactor carefully when you need to do so. 376 for (int i = 0; i < length; i++) { 377 final char ch = line.charAt(i); 378 switch (state) { 379 case STATE_GROUP_OR_PROPERTY_NAME: { 380 if (ch == ':') { // End of a property name. 381 final String propertyName = line.substring(nameIndex, i); 382 propertyData.setName(propertyName); 383 propertyData.setRawValue( i < length - 1 ? line.substring(i + 1) : ""); 384 return propertyData; 385 } else if (ch == '.') { // Each group is followed by the dot. 386 final String groupName = line.substring(nameIndex, i); 387 if (groupName.length() == 0) { 388 Log.w(LOG_TAG, "Empty group found. Ignoring."); 389 } else { 390 propertyData.addGroup(groupName); 391 } 392 nameIndex = i + 1; // Next should be another group or a property name. 393 } else if (ch == ';') { // End of property name and beginneng of parameters. 394 final String propertyName = line.substring(nameIndex, i); 395 propertyData.setName(propertyName); 396 nameIndex = i + 1; 397 state = STATE_PARAMS; // Start parameter parsing. 398 } 399 // TODO: comma support (in vCard 3.0 and 4.0). 400 break; 401 } 402 case STATE_PARAMS: { 403 if (ch == '"') { 404 if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) { 405 Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " + 406 "Silently allow it"); 407 } 408 state = STATE_PARAMS_IN_DQUOTE; 409 } else if (ch == ';') { // Starts another param. 410 handleParams(propertyData, line.substring(nameIndex, i)); 411 nameIndex = i + 1; 412 } else if (ch == ':') { // End of param and beginenning of values. 413 handleParams(propertyData, line.substring(nameIndex, i)); 414 propertyData.setRawValue(i < length - 1 ? line.substring(i + 1) : ""); 415 return propertyData; 416 } 417 break; 418 } 419 case STATE_PARAMS_IN_DQUOTE: { 420 if (ch == '"') { 421 if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) { 422 Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " + 423 "Silently allow it"); 424 } 425 state = STATE_PARAMS; 426 } 427 break; 428 } 429 } 430 } 431 432 throw new VCardInvalidLineException("Invalid line: \"" + line + "\""); 433 } 434 435 /* 436 * params = ";" [ws] paramlist paramlist = paramlist [ws] ";" [ws] param / 437 * param param = "TYPE" [ws] "=" [ws] ptypeval / "VALUE" [ws] "=" [ws] 438 * pvalueval / "ENCODING" [ws] "=" [ws] pencodingval / "CHARSET" [ws] "=" 439 * [ws] charsetval / "LANGUAGE" [ws] "=" [ws] langval / "X-" word [ws] "=" 440 * [ws] word / knowntype 441 */ 442 protected void handleParams(VCardProperty propertyData, String params) 443 throws VCardException { 444 final String[] strArray = params.split("=", 2); 445 if (strArray.length == 2) { 446 final String paramName = strArray[0].trim().toUpperCase(); 447 String paramValue = strArray[1].trim(); 448 if (paramName.equals("TYPE")) { 449 handleType(propertyData, paramValue); 450 } else if (paramName.equals("VALUE")) { 451 handleValue(propertyData, paramValue); 452 } else if (paramName.equals("ENCODING")) { 453 handleEncoding(propertyData, paramValue.toUpperCase()); 454 } else if (paramName.equals("CHARSET")) { 455 handleCharset(propertyData, paramValue); 456 } else if (paramName.equals("LANGUAGE")) { 457 handleLanguage(propertyData, paramValue); 458 } else if (paramName.startsWith("X-")) { 459 handleAnyParam(propertyData, paramName, paramValue); 460 } else { 461 throw new VCardException("Unknown type \"" + paramName + "\""); 462 } 463 } else { 464 handleParamWithoutName(propertyData, strArray[0]); 465 } 466 } 467 468 /** 469 * vCard 3.0 parser implementation may throw VCardException. 470 */ 471 protected void handleParamWithoutName(VCardProperty propertyData, final String paramValue) { 472 handleType(propertyData, paramValue); 473 } 474 475 /* 476 * ptypeval = knowntype / "X-" word 477 */ 478 protected void handleType(VCardProperty propertyData, final String ptypeval) { 479 if (!(getKnownTypeSet().contains(ptypeval.toUpperCase()) 480 || ptypeval.startsWith("X-")) 481 && !mUnknownTypeSet.contains(ptypeval)) { 482 mUnknownTypeSet.add(ptypeval); 483 Log.w(LOG_TAG, String.format("TYPE unsupported by %s: ", getVersion(), ptypeval)); 484 } 485 propertyData.addParameter(VCardConstants.PARAM_TYPE, ptypeval); 486 } 487 488 /* 489 * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word 490 */ 491 protected void handleValue(VCardProperty propertyData, final String pvalueval) { 492 if (!(getKnownValueSet().contains(pvalueval.toUpperCase()) 493 || pvalueval.startsWith("X-") 494 || mUnknownValueSet.contains(pvalueval))) { 495 mUnknownValueSet.add(pvalueval); 496 Log.w(LOG_TAG, String.format( 497 "The value unsupported by TYPE of %s: ", getVersion(), pvalueval)); 498 } 499 propertyData.addParameter(VCardConstants.PARAM_VALUE, pvalueval); 500 } 501 502 /* 503 * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word 504 */ 505 protected void handleEncoding(VCardProperty propertyData, String pencodingval) 506 throws VCardException { 507 if (getAvailableEncodingSet().contains(pencodingval) || 508 pencodingval.startsWith("X-")) { 509 propertyData.addParameter(VCardConstants.PARAM_ENCODING, pencodingval); 510 // Update encoding right away, as this is needed to understanding other params. 511 mCurrentEncoding = pencodingval.toUpperCase(); 512 } else { 513 throw new VCardException("Unknown encoding \"" + pencodingval + "\""); 514 } 515 } 516 517 /** 518 * <p> 519 * vCard 2.1 specification only allows us-ascii and iso-8859-xxx (See RFC 1521), 520 * but recent vCard files often contain other charset like UTF-8, SHIFT_JIS, etc. 521 * We allow any charset. 522 * </p> 523 */ 524 protected void handleCharset(VCardProperty propertyData, String charsetval) { 525 mCurrentCharset = charsetval; 526 propertyData.addParameter(VCardConstants.PARAM_CHARSET, charsetval); 527 } 528 529 /** 530 * See also Section 7.1 of RFC 1521 531 */ 532 protected void handleLanguage(VCardProperty propertyData, String langval) 533 throws VCardException { 534 String[] strArray = langval.split("-"); 535 if (strArray.length != 2) { 536 throw new VCardException("Invalid Language: \"" + langval + "\""); 537 } 538 String tmp = strArray[0]; 539 int length = tmp.length(); 540 for (int i = 0; i < length; i++) { 541 if (!isAsciiLetter(tmp.charAt(i))) { 542 throw new VCardException("Invalid Language: \"" + langval + "\""); 543 } 544 } 545 tmp = strArray[1]; 546 length = tmp.length(); 547 for (int i = 0; i < length; i++) { 548 if (!isAsciiLetter(tmp.charAt(i))) { 549 throw new VCardException("Invalid Language: \"" + langval + "\""); 550 } 551 } 552 propertyData.addParameter(VCardConstants.PARAM_LANGUAGE, langval); 553 } 554 555 private boolean isAsciiLetter(char ch) { 556 if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) { 557 return true; 558 } 559 return false; 560 } 561 562 /** 563 * Mainly for "X-" type. This accepts any kind of type without check. 564 */ 565 protected void handleAnyParam( 566 VCardProperty propertyData, String paramName, String paramValue) { 567 propertyData.addParameter(paramName, paramValue); 568 } 569 570 protected void handlePropertyValue(VCardProperty property, String propertyName) 571 throws IOException, VCardException { 572 final String propertyNameUpper = property.getName().toUpperCase(); 573 String propertyRawValue = property.getRawValue(); 574 final String sourceCharset = VCardConfig.DEFAULT_INTERMEDIATE_CHARSET; 575 final Collection<String> charsetCollection = 576 property.getParameters(VCardConstants.PARAM_CHARSET); 577 String targetCharset = 578 ((charsetCollection != null) ? charsetCollection.iterator().next() : null); 579 if (TextUtils.isEmpty(targetCharset)) { 580 targetCharset = VCardConfig.DEFAULT_IMPORT_CHARSET; 581 } 582 583 // TODO: have "separableProperty" which reflects vCard spec.. 584 if (propertyNameUpper.equals(VCardConstants.PROPERTY_ADR) 585 || propertyNameUpper.equals(VCardConstants.PROPERTY_ORG) 586 || propertyNameUpper.equals(VCardConstants.PROPERTY_N)) { 587 handleAdrOrgN(property, propertyRawValue, sourceCharset, targetCharset); 588 return; 589 } 590 591 if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_QP) || 592 // If encoding attribute is missing, then attempt to detect QP encoding. 593 // This is to handle a bug where the android exporter was creating FN properties 594 // with missing encoding. b/7292017 595 (propertyNameUpper.equals(VCardConstants.PROPERTY_FN) && 596 property.getParameters(VCardConstants.PARAM_ENCODING) == null && 597 VCardUtils.appearsLikeAndroidVCardQuotedPrintable(propertyRawValue)) 598 ) { 599 final String quotedPrintablePart = getQuotedPrintablePart(propertyRawValue); 600 final String propertyEncodedValue = 601 VCardUtils.parseQuotedPrintable(quotedPrintablePart, 602 false, sourceCharset, targetCharset); 603 property.setRawValue(quotedPrintablePart); 604 property.setValues(propertyEncodedValue); 605 for (VCardInterpreter interpreter : mInterpreterList) { 606 interpreter.onPropertyCreated(property); 607 } 608 } else if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_BASE64) 609 || mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_B)) { 610 // It is very rare, but some BASE64 data may be so big that 611 // OutOfMemoryError occurs. To ignore such cases, use try-catch. 612 try { 613 final String base64Property = getBase64(propertyRawValue); 614 try { 615 property.setByteValue(Base64.decode(base64Property, Base64.DEFAULT)); 616 } catch (IllegalArgumentException e) { 617 throw new VCardException("Decode error on base64 photo: " + propertyRawValue); 618 } 619 for (VCardInterpreter interpreter : mInterpreterList) { 620 interpreter.onPropertyCreated(property); 621 } 622 } catch (OutOfMemoryError error) { 623 Log.e(LOG_TAG, "OutOfMemoryError happened during parsing BASE64 data!"); 624 for (VCardInterpreter interpreter : mInterpreterList) { 625 interpreter.onPropertyCreated(property); 626 } 627 } 628 } else { 629 if (!(mCurrentEncoding.equals("7BIT") || mCurrentEncoding.equals("8BIT") || 630 mCurrentEncoding.startsWith("X-"))) { 631 Log.w(LOG_TAG, 632 String.format("The encoding \"%s\" is unsupported by vCard %s", 633 mCurrentEncoding, getVersionString())); 634 } 635 636 // Some device uses line folding defined in RFC 2425, which is not allowed 637 // in vCard 2.1 (while needed in vCard 3.0). 638 // 639 // e.g. 640 // BEGIN:VCARD 641 // VERSION:2.1 642 // N:;Omega;;; 643 // EMAIL;INTERNET:"Omega" 644 // <omega (at) example.com> 645 // FN:Omega 646 // END:VCARD 647 // 648 // The vCard above assumes that email address should become: 649 // "Omega" <omega (at) example.com> 650 // 651 // But vCard 2.1 requires Quote-Printable when a line contains line break(s). 652 // 653 // For more information about line folding, 654 // see "5.8.1. Line delimiting and folding" in RFC 2425. 655 // 656 // We take care of this case more formally in vCard 3.0, so we only need to 657 // do this in vCard 2.1. 658 if (getVersion() == VCardConfig.VERSION_21) { 659 StringBuilder builder = null; 660 while (true) { 661 final String nextLine = peekLine(); 662 // We don't need to care too much about this exceptional case, 663 // but we should not wrongly eat up "END:VCARD", since it critically 664 // breaks this parser's state machine. 665 // Thus we roughly look over the next line and confirm it is at least not 666 // "END:VCARD". This extra fee is worth paying. This is exceptional 667 // anyway. 668 if (!TextUtils.isEmpty(nextLine) && 669 nextLine.charAt(0) == ' ' && 670 !"END:VCARD".contains(nextLine.toUpperCase())) { 671 getLine(); // Drop the next line. 672 673 if (builder == null) { 674 builder = new StringBuilder(); 675 builder.append(propertyRawValue); 676 } 677 builder.append(nextLine.substring(1)); 678 } else { 679 break; 680 } 681 } 682 if (builder != null) { 683 propertyRawValue = builder.toString(); 684 } 685 } 686 687 ArrayList<String> propertyValueList = new ArrayList<String>(); 688 String value = maybeUnescapeText(VCardUtils.convertStringCharset( 689 propertyRawValue, sourceCharset, targetCharset)); 690 propertyValueList.add(value); 691 property.setValues(propertyValueList); 692 for (VCardInterpreter interpreter : mInterpreterList) { 693 interpreter.onPropertyCreated(property); 694 } 695 } 696 } 697 698 private void handleAdrOrgN(VCardProperty property, String propertyRawValue, 699 String sourceCharset, String targetCharset) throws VCardException, IOException { 700 List<String> encodedValueList = new ArrayList<String>(); 701 702 // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some softwares/devices emit 703 // such data. 704 if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_QP)) { 705 // First we retrieve Quoted-Printable String from vCard entry, which may include 706 // multiple lines. 707 final String quotedPrintablePart = getQuotedPrintablePart(propertyRawValue); 708 709 // "Raw value" from the view of users should contain all part of QP string. 710 // TODO: add test for this handling 711 property.setRawValue(quotedPrintablePart); 712 713 // We split Quoted-Printable String using semi-colon before decoding it, as 714 // the Quoted-Printable may have semi-colon, which confuses splitter. 715 final List<String> quotedPrintableValueList = 716 VCardUtils.constructListFromValue(quotedPrintablePart, getVersion()); 717 for (String quotedPrintableValue : quotedPrintableValueList) { 718 String encoded = VCardUtils.parseQuotedPrintable(quotedPrintableValue, 719 false, sourceCharset, targetCharset); 720 encodedValueList.add(encoded); 721 } 722 } else { 723 final String propertyValue = VCardUtils.convertStringCharset( 724 getPotentialMultiline(propertyRawValue), sourceCharset, targetCharset); 725 final List<String> valueList = 726 VCardUtils.constructListFromValue(propertyValue, getVersion()); 727 for (String value : valueList) { 728 encodedValueList.add(value); 729 } 730 } 731 732 property.setValues(encodedValueList); 733 for (VCardInterpreter interpreter : mInterpreterList) { 734 interpreter.onPropertyCreated(property); 735 } 736 } 737 738 /** 739 * <p> 740 * Parses and returns Quoted-Printable. 741 * </p> 742 * 743 * @param firstString The string following a parameter name and attributes. 744 * Example: "string" in 745 * "ADR:ENCODING=QUOTED-PRINTABLE:string\n\r". 746 * @return whole Quoted-Printable string, including a given argument and 747 * following lines. Excludes the last empty line following to Quoted 748 * Printable lines. 749 * @throws IOException 750 * @throws VCardException 751 */ 752 private String getQuotedPrintablePart(String firstString) 753 throws IOException, VCardException { 754 // Specifically, there may be some padding between = and CRLF. 755 // See the following: 756 // 757 // qp-line := *(qp-segment transport-padding CRLF) 758 // qp-part transport-padding 759 // qp-segment := qp-section *(SPACE / TAB) "=" 760 // ; Maximum length of 76 characters 761 // 762 // e.g. (from RFC 2045) 763 // Now's the time = 764 // for all folk to come= 765 // to the aid of their country. 766 if (firstString.trim().endsWith("=")) { 767 // remove "transport-padding" 768 int pos = firstString.length() - 1; 769 while (firstString.charAt(pos) != '=') { 770 } 771 StringBuilder builder = new StringBuilder(); 772 builder.append(firstString.substring(0, pos + 1)); 773 builder.append("\r\n"); 774 String line; 775 while (true) { 776 line = getLine(); 777 if (line == null) { 778 throw new VCardException("File ended during parsing a Quoted-Printable String"); 779 } 780 if (line.trim().endsWith("=")) { 781 // remove "transport-padding" 782 pos = line.length() - 1; 783 while (line.charAt(pos) != '=') { 784 } 785 builder.append(line.substring(0, pos + 1)); 786 builder.append("\r\n"); 787 } else { 788 builder.append(line); 789 break; 790 } 791 } 792 return builder.toString(); 793 } else { 794 return firstString; 795 } 796 } 797 798 /** 799 * Given the first line of a property, checks consecutive lines after it and builds a new 800 * multi-line value if it exists. 801 * 802 * @param firstString The first line of the property. 803 * @return A new property, potentially built from multiple lines. 804 * @throws IOException 805 */ 806 private String getPotentialMultiline(String firstString) throws IOException { 807 final StringBuilder builder = new StringBuilder(); 808 builder.append(firstString); 809 810 while (true) { 811 final String line = peekLine(); 812 if (line == null || line.length() == 0) { 813 break; 814 } 815 816 final String propertyName = getPropertyNameUpperCase(line); 817 if (propertyName != null) { 818 break; 819 } 820 821 // vCard 2.1 does not allow multi-line of adr but microsoft vcards may have it. 822 // We will consider the next line to be a part of a multi-line value if it does not 823 // contain a property name (i.e. a colon or semi-colon). 824 // Consume the line. 825 getLine(); 826 builder.append(" ").append(line); 827 } 828 829 return builder.toString(); 830 } 831 832 protected String getBase64(String firstString) throws IOException, VCardException { 833 final StringBuilder builder = new StringBuilder(); 834 builder.append(firstString); 835 836 while (true) { 837 final String line = peekLine(); 838 if (line == null) { 839 throw new VCardException("File ended during parsing BASE64 binary"); 840 } 841 842 // vCard 2.1 requires two spaces at the end of BASE64 strings, but some vCard doesn't 843 // have them. We try to detect those cases using colon and semi-colon, given BASE64 844 // does not contain it. 845 // E.g. 846 // TEL;TYPE=WORK:+5555555 847 // or 848 // END:VCARD 849 String propertyName = getPropertyNameUpperCase(line); 850 if (getKnownPropertyNameSet().contains(propertyName) || 851 VCardConstants.PROPERTY_X_ANDROID_CUSTOM.equals(propertyName)) { 852 Log.w(LOG_TAG, "Found a next property during parsing a BASE64 string, " + 853 "which must not contain semi-colon or colon. Treat the line as next " 854 + "property."); 855 Log.w(LOG_TAG, "Problematic line: " + line.trim()); 856 break; 857 } 858 859 // Consume the line. 860 getLine(); 861 862 if (line.length() == 0) { 863 break; 864 } 865 // Trim off any extraneous whitespace to handle 2.1 implementations 866 // that use 3.0 style line continuations. This is safe because space 867 // isn't a Base64 encoding value. 868 builder.append(line.trim()); 869 } 870 871 return builder.toString(); 872 } 873 874 /** 875 * Extracts the property name portion of a given vCard line. 876 * <p> 877 * Properties must contain a colon. 878 * <p> 879 * E.g. 880 * TEL;TYPE=WORK:+5555555 // returns "TEL" 881 * END:VCARD // returns "END" 882 * TEL; // returns null 883 * 884 * @param line The vCard line. 885 * @return The property name portion. {@literal null} if no property name found. 886 */ 887 private String getPropertyNameUpperCase(String line) { 888 final int colonIndex = line.indexOf(":"); 889 if (colonIndex > -1) { 890 final int semiColonIndex = line.indexOf(";"); 891 892 // Find the minimum index that is greater than -1. 893 final int minIndex; 894 if (colonIndex == -1) { 895 minIndex = semiColonIndex; 896 } else if (semiColonIndex == -1) { 897 minIndex = colonIndex; 898 } else { 899 minIndex = Math.min(colonIndex, semiColonIndex); 900 } 901 return line.substring(0, minIndex).toUpperCase(); 902 } 903 return null; 904 } 905 906 /* 907 * vCard 2.1 specifies AGENT allows one vcard entry. Currently we emit an 908 * error toward the AGENT property. 909 * // TODO: Support AGENT property. 910 * item = 911 * ... / [groups "."] "AGENT" [params] ":" vcard CRLF vcard = "BEGIN" [ws] 912 * ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD" 913 */ 914 protected void handleAgent(final VCardProperty property) throws VCardException { 915 if (!property.getRawValue().toUpperCase().contains("BEGIN:VCARD")) { 916 // Apparently invalid line seen in Windows Mobile 6.5. Ignore them. 917 for (VCardInterpreter interpreter : mInterpreterList) { 918 interpreter.onPropertyCreated(property); 919 } 920 return; 921 } else { 922 throw new VCardAgentNotSupportedException("AGENT Property is not supported now."); 923 } 924 } 925 926 /** 927 * For vCard 3.0. 928 */ 929 protected String maybeUnescapeText(final String text) { 930 return text; 931 } 932 933 /** 934 * Returns unescaped String if the character should be unescaped. Return 935 * null otherwise. e.g. In vCard 2.1, "\;" should be unescaped into ";" 936 * while "\x" should not be. 937 */ 938 protected String maybeUnescapeCharacter(final char ch) { 939 return unescapeCharacter(ch); 940 } 941 942 /* package */ static String unescapeCharacter(final char ch) { 943 // Original vCard 2.1 specification does not allow transformation 944 // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous 945 // implementation of 946 // this class allowed them, so keep it as is. 947 if (ch == '\\' || ch == ';' || ch == ':' || ch == ',') { 948 return String.valueOf(ch); 949 } else { 950 return null; 951 } 952 } 953 954 /** 955 * @return {@link VCardConfig#VERSION_21} 956 */ 957 protected int getVersion() { 958 return VCardConfig.VERSION_21; 959 } 960 961 /** 962 * @return {@link VCardConfig#VERSION_30} 963 */ 964 protected String getVersionString() { 965 return VCardConstants.VERSION_V21; 966 } 967 968 protected Set<String> getKnownPropertyNameSet() { 969 return VCardParser_V21.sKnownPropertyNameSet; 970 } 971 972 protected Set<String> getKnownTypeSet() { 973 return VCardParser_V21.sKnownTypeSet; 974 } 975 976 protected Set<String> getKnownValueSet() { 977 return VCardParser_V21.sKnownValueSet; 978 } 979 980 protected Set<String> getAvailableEncodingSet() { 981 return VCardParser_V21.sAvailableEncoding; 982 } 983 984 protected String getDefaultEncoding() { 985 return DEFAULT_ENCODING; 986 } 987 988 protected String getDefaultCharset() { 989 return DEFAULT_CHARSET; 990 } 991 992 protected String getCurrentCharset() { 993 return mCurrentCharset; 994 } 995 996 public void addInterpreter(VCardInterpreter interpreter) { 997 mInterpreterList.add(interpreter); 998 } 999 1000 public void parse(InputStream is) throws IOException, VCardException { 1001 if (is == null) { 1002 throw new NullPointerException("InputStream must not be null."); 1003 } 1004 1005 final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset); 1006 mReader = new CustomBufferedReader(tmpReader); 1007 1008 final long start = System.currentTimeMillis(); 1009 for (VCardInterpreter interpreter : mInterpreterList) { 1010 interpreter.onVCardStarted(); 1011 } 1012 1013 // vcard_file = [wsls] vcard [wsls] 1014 while (true) { 1015 synchronized (this) { 1016 if (mCanceled) { 1017 Log.i(LOG_TAG, "Cancel request has come. exitting parse operation."); 1018 break; 1019 } 1020 } 1021 if (!parseOneVCard()) { 1022 break; 1023 } 1024 } 1025 1026 for (VCardInterpreter interpreter : mInterpreterList) { 1027 interpreter.onVCardEnded(); 1028 } 1029 } 1030 1031 public void parseOne(InputStream is) throws IOException, VCardException { 1032 if (is == null) { 1033 throw new NullPointerException("InputStream must not be null."); 1034 } 1035 1036 final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset); 1037 mReader = new CustomBufferedReader(tmpReader); 1038 1039 final long start = System.currentTimeMillis(); 1040 for (VCardInterpreter interpreter : mInterpreterList) { 1041 interpreter.onVCardStarted(); 1042 } 1043 parseOneVCard(); 1044 for (VCardInterpreter interpreter : mInterpreterList) { 1045 interpreter.onVCardEnded(); 1046 } 1047 } 1048 1049 public final synchronized void cancel() { 1050 Log.i(LOG_TAG, "ParserImpl received cancel operation."); 1051 mCanceled = true; 1052 } 1053 } 1054