Home | History | Annotate | Download | only in vcard
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 package com.android.vcard;
     17 
     18 import android.text.TextUtils;
     19 import android.util.Base64;
     20 import android.util.Log;
     21 
     22 import com.android.vcard.exception.VCardAgentNotSupportedException;
     23 import com.android.vcard.exception.VCardException;
     24 import com.android.vcard.exception.VCardInvalidCommentLineException;
     25 import com.android.vcard.exception.VCardInvalidLineException;
     26 import com.android.vcard.exception.VCardVersionException;
     27 
     28 import java.io.BufferedReader;
     29 import java.io.IOException;
     30 import java.io.InputStream;
     31 import java.io.InputStreamReader;
     32 import java.io.Reader;
     33 import java.util.ArrayList;
     34 import java.util.Collection;
     35 import java.util.HashSet;
     36 import java.util.List;
     37 import java.util.Set;
     38 
     39 /**
     40  * <p>
     41  * Basic implementation achieving vCard parsing. Based on vCard 2.1.
     42  * </p>
     43  * @hide
     44  */
     45 /* package */ class VCardParserImpl_V21 {
     46     private static final String LOG_TAG = VCardConstants.LOG_TAG;
     47 
     48     protected static final class CustomBufferedReader extends BufferedReader {
     49         private long mTime;
     50 
     51         /**
     52          * Needed since "next line" may be null due to end of line.
     53          */
     54         private boolean mNextLineIsValid;
     55         private String mNextLine;
     56 
     57         public CustomBufferedReader(Reader in) {
     58             super(in);
     59         }
     60 
     61         @Override
     62         public String readLine() throws IOException {
     63             if (mNextLineIsValid) {
     64                 final String ret = mNextLine;
     65                 mNextLine = null;
     66                 mNextLineIsValid = false;
     67                 return ret;
     68             }
     69 
     70             final long start = System.currentTimeMillis();
     71             final String line = super.readLine();
     72             final long end = System.currentTimeMillis();
     73             mTime += end - start;
     74             return line;
     75         }
     76 
     77         /**
     78          * Read one line, but make this object store it in its queue.
     79          */
     80         public String peekLine() throws IOException {
     81             if (!mNextLineIsValid) {
     82                 final long start = System.currentTimeMillis();
     83                 final String line = super.readLine();
     84                 final long end = System.currentTimeMillis();
     85                 mTime += end - start;
     86 
     87                 mNextLine = line;
     88                 mNextLineIsValid = true;
     89             }
     90 
     91             return mNextLine;
     92         }
     93 
     94         public long getTotalmillisecond() {
     95             return mTime;
     96         }
     97     }
     98 
     99     private static final String DEFAULT_ENCODING = "8BIT";
    100     private static final String DEFAULT_CHARSET = "UTF-8";
    101 
    102     protected final String mIntermediateCharset;
    103 
    104     private final List<VCardInterpreter> mInterpreterList = new ArrayList<VCardInterpreter>();
    105     private boolean mCanceled;
    106 
    107     /**
    108      * <p>
    109      * The encoding type for deconding byte streams. This member variable is
    110      * reset to a default encoding every time when a new item comes.
    111      * </p>
    112      * <p>
    113      * "Encoding" in vCard is different from "Charset". It is mainly used for
    114      * addresses, notes, images. "7BIT", "8BIT", "BASE64", and
    115      * "QUOTED-PRINTABLE" are known examples.
    116      * </p>
    117      */
    118     protected String mCurrentEncoding;
    119 
    120     protected String mCurrentCharset;
    121 
    122     /**
    123      * <p>
    124      * The reader object to be used internally.
    125      * </p>
    126      * <p>
    127      * Developers should not directly read a line from this object. Use
    128      * getLine() unless there some reason.
    129      * </p>
    130      */
    131     protected CustomBufferedReader mReader;
    132 
    133     /**
    134      * <p>
    135      * Set for storing unkonwn TYPE attributes, which is not acceptable in vCard
    136      * specification, but happens to be seen in real world vCard.
    137      * </p>
    138      * <p>
    139      * We just accept those invalid types after emitting a warning for each of it.
    140      * </p>
    141      */
    142     protected final Set<String> mUnknownTypeSet = new HashSet<String>();
    143 
    144     /**
    145      * <p>
    146      * Set for storing unkonwn VALUE attributes, which is not acceptable in
    147      * vCard specification, but happens to be seen in real world vCard.
    148      * </p>
    149      * <p>
    150      * We just accept those invalid types after emitting a warning for each of it.
    151      * </p>
    152      */
    153     protected final Set<String> mUnknownValueSet = new HashSet<String>();
    154 
    155 
    156     public VCardParserImpl_V21() {
    157         this(VCardConfig.VCARD_TYPE_DEFAULT);
    158     }
    159 
    160     public VCardParserImpl_V21(int vcardType) {
    161         mIntermediateCharset =  VCardConfig.DEFAULT_INTERMEDIATE_CHARSET;
    162     }
    163 
    164     /**
    165      * @return true when a given property name is a valid property name.
    166      */
    167     protected boolean isValidPropertyName(final String propertyName) {
    168         if (!(getKnownPropertyNameSet().contains(propertyName.toUpperCase()) ||
    169                 propertyName.startsWith("X-"))
    170                 && !mUnknownTypeSet.contains(propertyName)) {
    171             mUnknownTypeSet.add(propertyName);
    172             Log.w(LOG_TAG, "Property name unsupported by vCard 2.1: " + propertyName);
    173         }
    174         return true;
    175     }
    176 
    177     /**
    178      * @return String. It may be null, or its length may be 0
    179      * @throws IOException
    180      */
    181     protected String getLine() throws IOException {
    182         return mReader.readLine();
    183     }
    184 
    185     protected String peekLine() throws IOException {
    186         return mReader.peekLine();
    187     }
    188 
    189     /**
    190      * @return String with it's length > 0
    191      * @throws IOException
    192      * @throws VCardException when the stream reached end of line
    193      */
    194     protected String getNonEmptyLine() throws IOException, VCardException {
    195         String line;
    196         while (true) {
    197             line = getLine();
    198             if (line == null) {
    199                 throw new VCardException("Reached end of buffer.");
    200             } else if (line.trim().length() > 0) {
    201                 return line;
    202             }
    203         }
    204     }
    205 
    206     /**
    207      * <code>
    208      * vcard = "BEGIN" [ws] ":" [ws] "VCARD" [ws] 1*CRLF
    209      *         items *CRLF
    210      *         "END" [ws] ":" [ws] "VCARD"
    211      * </code>
    212      * @return False when reaching end of file.
    213      */
    214     private boolean parseOneVCard() throws IOException, VCardException {
    215         // reset for this entire vCard.
    216         mCurrentEncoding = DEFAULT_ENCODING;
    217         mCurrentCharset = DEFAULT_CHARSET;
    218 
    219         boolean allowGarbage = false;
    220         if (!readBeginVCard(allowGarbage)) {
    221             return false;
    222         }
    223         for (VCardInterpreter interpreter : mInterpreterList) {
    224             interpreter.onEntryStarted();
    225         }
    226         parseItems();
    227         for (VCardInterpreter interpreter : mInterpreterList) {
    228             interpreter.onEntryEnded();
    229         }
    230         return true;
    231     }
    232 
    233     /**
    234      * @return True when successful. False when reaching the end of line
    235      * @throws IOException
    236      * @throws VCardException
    237      */
    238     protected boolean readBeginVCard(boolean allowGarbage) throws IOException, VCardException {
    239         // TODO: use consructPropertyLine().
    240         String line;
    241         do {
    242             while (true) {
    243                 line = getLine();
    244                 if (line == null) {
    245                     return false;
    246                 } else if (line.trim().length() > 0) {
    247                     break;
    248                 }
    249             }
    250             final String[] strArray = line.split(":", 2);
    251             final int length = strArray.length;
    252 
    253             // Although vCard 2.1/3.0 specification does not allow lower cases,
    254             // we found vCard file emitted by some external vCard expoter have such
    255             // invalid Strings.
    256             // e.g. BEGIN:vCard
    257             if (length == 2 && strArray[0].trim().equalsIgnoreCase("BEGIN")
    258                     && strArray[1].trim().equalsIgnoreCase("VCARD")) {
    259                 return true;
    260             } else if (!allowGarbage) {
    261                 throw new VCardException("Expected String \"BEGIN:VCARD\" did not come "
    262                         + "(Instead, \"" + line + "\" came)");
    263             }
    264         } while (allowGarbage);
    265 
    266         throw new VCardException("Reached where must not be reached.");
    267     }
    268 
    269     /**
    270      * Parses lines other than the first "BEGIN:VCARD". Takes care of "END:VCARD"n and
    271      * "BEGIN:VCARD" in nested vCard.
    272      */
    273     /*
    274      * items = *CRLF item / item
    275      *
    276      * Note: BEGIN/END aren't include in the original spec while this method handles them.
    277      */
    278     protected void parseItems() throws IOException, VCardException {
    279         boolean ended = false;
    280 
    281         try {
    282             ended = parseItem();
    283         } catch (VCardInvalidCommentLineException e) {
    284             Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored.");
    285         }
    286 
    287         while (!ended) {
    288             try {
    289                 ended = parseItem();
    290             } catch (VCardInvalidCommentLineException e) {
    291                 Log.e(LOG_TAG, "Invalid line which looks like some comment was found. Ignored.");
    292             }
    293         }
    294     }
    295 
    296     /*
    297      * item = [groups "."] name [params] ":" value CRLF / [groups "."] "ADR"
    298      * [params] ":" addressparts CRLF / [groups "."] "ORG" [params] ":" orgparts
    299      * CRLF / [groups "."] "N" [params] ":" nameparts CRLF / [groups "."]
    300      * "AGENT" [params] ":" vcard CRLF
    301      */
    302     protected boolean parseItem() throws IOException, VCardException {
    303         // Reset for an item.
    304         mCurrentEncoding = DEFAULT_ENCODING;
    305 
    306         final String line = getNonEmptyLine();
    307         final VCardProperty propertyData = constructPropertyData(line);
    308 
    309         final String propertyNameUpper = propertyData.getName().toUpperCase();
    310         final String propertyRawValue = propertyData.getRawValue();
    311 
    312         if (propertyNameUpper.equals(VCardConstants.PROPERTY_BEGIN)) {
    313             if (propertyRawValue.equalsIgnoreCase("VCARD")) {
    314                 handleNest();
    315             } else {
    316                 throw new VCardException("Unknown BEGIN type: " + propertyRawValue);
    317             }
    318         } else if (propertyNameUpper.equals(VCardConstants.PROPERTY_END)) {
    319             if (propertyRawValue.equalsIgnoreCase("VCARD")) {
    320                 return true;  // Ended.
    321             } else {
    322                 throw new VCardException("Unknown END type: " + propertyRawValue);
    323             }
    324         } else {
    325             parseItemInter(propertyData, propertyNameUpper);
    326         }
    327         return false;
    328     }
    329 
    330     private void parseItemInter(VCardProperty property, String propertyNameUpper)
    331             throws IOException, VCardException {
    332         String propertyRawValue = property.getRawValue();
    333         if (propertyNameUpper.equals(VCardConstants.PROPERTY_AGENT)) {
    334             handleAgent(property);
    335         } else if (isValidPropertyName(propertyNameUpper)) {
    336             if (propertyNameUpper.equals(VCardConstants.PROPERTY_VERSION) &&
    337                     !propertyRawValue.equals(getVersionString())) {
    338                 throw new VCardVersionException(
    339                         "Incompatible version: " + propertyRawValue + " != " + getVersionString());
    340             }
    341             handlePropertyValue(property, propertyNameUpper);
    342         } else {
    343             throw new VCardException("Unknown property name: \"" + propertyNameUpper + "\"");
    344         }
    345     }
    346 
    347     private void handleNest() throws IOException, VCardException {
    348         for (VCardInterpreter interpreter : mInterpreterList) {
    349             interpreter.onEntryStarted();
    350         }
    351         parseItems();
    352         for (VCardInterpreter interpreter : mInterpreterList) {
    353             interpreter.onEntryEnded();
    354         }
    355     }
    356 
    357     // For performance reason, the states for group and property name are merged into one.
    358     static private final int STATE_GROUP_OR_PROPERTY_NAME = 0;
    359     static private final int STATE_PARAMS = 1;
    360     // vCard 3.0 specification allows double-quoted parameters, while vCard 2.1 does not.
    361     static private final int STATE_PARAMS_IN_DQUOTE = 2;
    362 
    363     protected VCardProperty constructPropertyData(String line) throws VCardException {
    364         final VCardProperty propertyData = new VCardProperty();
    365 
    366         final int length = line.length();
    367         if (length > 0 && line.charAt(0) == '#') {
    368             throw new VCardInvalidCommentLineException();
    369         }
    370 
    371         int state = STATE_GROUP_OR_PROPERTY_NAME;
    372         int nameIndex = 0;
    373 
    374         // This loop is developed so that we don't have to take care of bottle neck here.
    375         // Refactor carefully when you need to do so.
    376         for (int i = 0; i < length; i++) {
    377             final char ch = line.charAt(i);
    378             switch (state) {
    379                 case STATE_GROUP_OR_PROPERTY_NAME: {
    380                     if (ch == ':') {  // End of a property name.
    381                         final String propertyName = line.substring(nameIndex, i);
    382                         propertyData.setName(propertyName);
    383                         propertyData.setRawValue( i < length - 1 ? line.substring(i + 1) : "");
    384                         return propertyData;
    385                     } else if (ch == '.') {  // Each group is followed by the dot.
    386                         final String groupName = line.substring(nameIndex, i);
    387                         if (groupName.length() == 0) {
    388                             Log.w(LOG_TAG, "Empty group found. Ignoring.");
    389                         } else {
    390                             propertyData.addGroup(groupName);
    391                         }
    392                         nameIndex = i + 1;  // Next should be another group or a property name.
    393                     } else if (ch == ';') {  // End of property name and beginneng of parameters.
    394                         final String propertyName = line.substring(nameIndex, i);
    395                         propertyData.setName(propertyName);
    396                         nameIndex = i + 1;
    397                         state = STATE_PARAMS;  // Start parameter parsing.
    398                     }
    399                     // TODO: comma support (in vCard 3.0 and 4.0).
    400                     break;
    401                 }
    402                 case STATE_PARAMS: {
    403                     if (ch == '"') {
    404                         if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) {
    405                             Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " +
    406                                     "Silently allow it");
    407                         }
    408                         state = STATE_PARAMS_IN_DQUOTE;
    409                     } else if (ch == ';') {  // Starts another param.
    410                         handleParams(propertyData, line.substring(nameIndex, i));
    411                         nameIndex = i + 1;
    412                     } else if (ch == ':') {  // End of param and beginenning of values.
    413                         handleParams(propertyData, line.substring(nameIndex, i));
    414                         propertyData.setRawValue(i < length - 1 ? line.substring(i + 1) : "");
    415                         return propertyData;
    416                     }
    417                     break;
    418                 }
    419                 case STATE_PARAMS_IN_DQUOTE: {
    420                     if (ch == '"') {
    421                         if (VCardConstants.VERSION_V21.equalsIgnoreCase(getVersionString())) {
    422                             Log.w(LOG_TAG, "Double-quoted params found in vCard 2.1. " +
    423                                     "Silently allow it");
    424                         }
    425                         state = STATE_PARAMS;
    426                     }
    427                     break;
    428                 }
    429             }
    430         }
    431 
    432         throw new VCardInvalidLineException("Invalid line: \"" + line + "\"");
    433     }
    434 
    435     /*
    436      * params = ";" [ws] paramlist paramlist = paramlist [ws] ";" [ws] param /
    437      * param param = "TYPE" [ws] "=" [ws] ptypeval / "VALUE" [ws] "=" [ws]
    438      * pvalueval / "ENCODING" [ws] "=" [ws] pencodingval / "CHARSET" [ws] "="
    439      * [ws] charsetval / "LANGUAGE" [ws] "=" [ws] langval / "X-" word [ws] "="
    440      * [ws] word / knowntype
    441      */
    442     protected void handleParams(VCardProperty propertyData, String params)
    443             throws VCardException {
    444         final String[] strArray = params.split("=", 2);
    445         if (strArray.length == 2) {
    446             final String paramName = strArray[0].trim().toUpperCase();
    447             String paramValue = strArray[1].trim();
    448             if (paramName.equals("TYPE")) {
    449                 handleType(propertyData, paramValue);
    450             } else if (paramName.equals("VALUE")) {
    451                 handleValue(propertyData, paramValue);
    452             } else if (paramName.equals("ENCODING")) {
    453                 handleEncoding(propertyData, paramValue.toUpperCase());
    454             } else if (paramName.equals("CHARSET")) {
    455                 handleCharset(propertyData, paramValue);
    456             } else if (paramName.equals("LANGUAGE")) {
    457                 handleLanguage(propertyData, paramValue);
    458             } else if (paramName.startsWith("X-")) {
    459                 handleAnyParam(propertyData, paramName, paramValue);
    460             } else {
    461                 throw new VCardException("Unknown type \"" + paramName + "\"");
    462             }
    463         } else {
    464             handleParamWithoutName(propertyData, strArray[0]);
    465         }
    466     }
    467 
    468     /**
    469      * vCard 3.0 parser implementation may throw VCardException.
    470      */
    471     protected void handleParamWithoutName(VCardProperty propertyData, final String paramValue) {
    472         handleType(propertyData, paramValue);
    473     }
    474 
    475     /*
    476      * ptypeval = knowntype / "X-" word
    477      */
    478     protected void handleType(VCardProperty propertyData, final String ptypeval) {
    479         if (!(getKnownTypeSet().contains(ptypeval.toUpperCase())
    480                 || ptypeval.startsWith("X-"))
    481                 && !mUnknownTypeSet.contains(ptypeval)) {
    482             mUnknownTypeSet.add(ptypeval);
    483             Log.w(LOG_TAG, String.format("TYPE unsupported by %s: ", getVersion(), ptypeval));
    484         }
    485         propertyData.addParameter(VCardConstants.PARAM_TYPE, ptypeval);
    486     }
    487 
    488     /*
    489      * pvalueval = "INLINE" / "URL" / "CONTENT-ID" / "CID" / "X-" word
    490      */
    491     protected void handleValue(VCardProperty propertyData, final String pvalueval) {
    492         if (!(getKnownValueSet().contains(pvalueval.toUpperCase())
    493                 || pvalueval.startsWith("X-")
    494                 || mUnknownValueSet.contains(pvalueval))) {
    495             mUnknownValueSet.add(pvalueval);
    496             Log.w(LOG_TAG, String.format(
    497                     "The value unsupported by TYPE of %s: ", getVersion(), pvalueval));
    498         }
    499         propertyData.addParameter(VCardConstants.PARAM_VALUE, pvalueval);
    500     }
    501 
    502     /*
    503      * pencodingval = "7BIT" / "8BIT" / "QUOTED-PRINTABLE" / "BASE64" / "X-" word
    504      */
    505     protected void handleEncoding(VCardProperty propertyData, String pencodingval)
    506             throws VCardException {
    507         if (getAvailableEncodingSet().contains(pencodingval) ||
    508                 pencodingval.startsWith("X-")) {
    509             propertyData.addParameter(VCardConstants.PARAM_ENCODING, pencodingval);
    510             // Update encoding right away, as this is needed to understanding other params.
    511             mCurrentEncoding = pencodingval.toUpperCase();
    512         } else {
    513             throw new VCardException("Unknown encoding \"" + pencodingval + "\"");
    514         }
    515     }
    516 
    517     /**
    518      * <p>
    519      * vCard 2.1 specification only allows us-ascii and iso-8859-xxx (See RFC 1521),
    520      * but recent vCard files often contain other charset like UTF-8, SHIFT_JIS, etc.
    521      * We allow any charset.
    522      * </p>
    523      */
    524     protected void handleCharset(VCardProperty propertyData, String charsetval) {
    525         mCurrentCharset = charsetval;
    526         propertyData.addParameter(VCardConstants.PARAM_CHARSET, charsetval);
    527     }
    528 
    529     /**
    530      * See also Section 7.1 of RFC 1521
    531      */
    532     protected void handleLanguage(VCardProperty propertyData, String langval)
    533             throws VCardException {
    534         String[] strArray = langval.split("-");
    535         if (strArray.length != 2) {
    536             throw new VCardException("Invalid Language: \"" + langval + "\"");
    537         }
    538         String tmp = strArray[0];
    539         int length = tmp.length();
    540         for (int i = 0; i < length; i++) {
    541             if (!isAsciiLetter(tmp.charAt(i))) {
    542                 throw new VCardException("Invalid Language: \"" + langval + "\"");
    543             }
    544         }
    545         tmp = strArray[1];
    546         length = tmp.length();
    547         for (int i = 0; i < length; i++) {
    548             if (!isAsciiLetter(tmp.charAt(i))) {
    549                 throw new VCardException("Invalid Language: \"" + langval + "\"");
    550             }
    551         }
    552         propertyData.addParameter(VCardConstants.PARAM_LANGUAGE, langval);
    553     }
    554 
    555     private boolean isAsciiLetter(char ch) {
    556         if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')) {
    557             return true;
    558         }
    559         return false;
    560     }
    561 
    562     /**
    563      * Mainly for "X-" type. This accepts any kind of type without check.
    564      */
    565     protected void handleAnyParam(
    566             VCardProperty propertyData, String paramName, String paramValue) {
    567         propertyData.addParameter(paramName, paramValue);
    568     }
    569 
    570     protected void handlePropertyValue(VCardProperty property, String propertyName)
    571             throws IOException, VCardException {
    572         final String propertyNameUpper = property.getName().toUpperCase();
    573         String propertyRawValue = property.getRawValue();
    574         final String sourceCharset = VCardConfig.DEFAULT_INTERMEDIATE_CHARSET;
    575         final Collection<String> charsetCollection =
    576                 property.getParameters(VCardConstants.PARAM_CHARSET);
    577         String targetCharset =
    578                 ((charsetCollection != null) ? charsetCollection.iterator().next() : null);
    579         if (TextUtils.isEmpty(targetCharset)) {
    580             targetCharset = VCardConfig.DEFAULT_IMPORT_CHARSET;
    581         }
    582 
    583         // TODO: have "separableProperty" which reflects vCard spec..
    584         if (propertyNameUpper.equals(VCardConstants.PROPERTY_ADR)
    585                 || propertyNameUpper.equals(VCardConstants.PROPERTY_ORG)
    586                 || propertyNameUpper.equals(VCardConstants.PROPERTY_N)) {
    587             handleAdrOrgN(property, propertyRawValue, sourceCharset, targetCharset);
    588             return;
    589         }
    590 
    591         if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_QP) ||
    592                 // If encoding attribute is missing, then attempt to detect QP encoding.
    593                 // This is to handle a bug where the android exporter was creating FN properties
    594                 // with missing encoding.  b/7292017
    595                 (propertyNameUpper.equals(VCardConstants.PROPERTY_FN) &&
    596                         property.getParameters(VCardConstants.PARAM_ENCODING) == null &&
    597                         VCardUtils.appearsLikeAndroidVCardQuotedPrintable(propertyRawValue))
    598                 ) {
    599             final String quotedPrintablePart = getQuotedPrintablePart(propertyRawValue);
    600             final String propertyEncodedValue =
    601                     VCardUtils.parseQuotedPrintable(quotedPrintablePart,
    602                             false, sourceCharset, targetCharset);
    603             property.setRawValue(quotedPrintablePart);
    604             property.setValues(propertyEncodedValue);
    605             for (VCardInterpreter interpreter : mInterpreterList) {
    606                 interpreter.onPropertyCreated(property);
    607             }
    608         } else if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_BASE64)
    609                 || mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_B)) {
    610             // It is very rare, but some BASE64 data may be so big that
    611             // OutOfMemoryError occurs. To ignore such cases, use try-catch.
    612             try {
    613                 final String base64Property = getBase64(propertyRawValue);
    614                 try {
    615                     property.setByteValue(Base64.decode(base64Property, Base64.DEFAULT));
    616                 } catch (IllegalArgumentException e) {
    617                     throw new VCardException("Decode error on base64 photo: " + propertyRawValue);
    618                 }
    619                 for (VCardInterpreter interpreter : mInterpreterList) {
    620                     interpreter.onPropertyCreated(property);
    621                 }
    622             } catch (OutOfMemoryError error) {
    623                 Log.e(LOG_TAG, "OutOfMemoryError happened during parsing BASE64 data!");
    624                 for (VCardInterpreter interpreter : mInterpreterList) {
    625                     interpreter.onPropertyCreated(property);
    626                 }
    627             }
    628         } else {
    629             if (!(mCurrentEncoding.equals("7BIT") || mCurrentEncoding.equals("8BIT") ||
    630                     mCurrentEncoding.startsWith("X-"))) {
    631                 Log.w(LOG_TAG,
    632                         String.format("The encoding \"%s\" is unsupported by vCard %s",
    633                                 mCurrentEncoding, getVersionString()));
    634             }
    635 
    636             // Some device uses line folding defined in RFC 2425, which is not allowed
    637             // in vCard 2.1 (while needed in vCard 3.0).
    638             //
    639             // e.g.
    640             // BEGIN:VCARD
    641             // VERSION:2.1
    642             // N:;Omega;;;
    643             // EMAIL;INTERNET:"Omega"
    644             //   <omega (at) example.com>
    645             // FN:Omega
    646             // END:VCARD
    647             //
    648             // The vCard above assumes that email address should become:
    649             // "Omega" <omega (at) example.com>
    650             //
    651             // But vCard 2.1 requires Quote-Printable when a line contains line break(s).
    652             //
    653             // For more information about line folding,
    654             // see "5.8.1. Line delimiting and folding" in RFC 2425.
    655             //
    656             // We take care of this case more formally in vCard 3.0, so we only need to
    657             // do this in vCard 2.1.
    658             if (getVersion() == VCardConfig.VERSION_21) {
    659                 StringBuilder builder = null;
    660                 while (true) {
    661                     final String nextLine = peekLine();
    662                     // We don't need to care too much about this exceptional case,
    663                     // but we should not wrongly eat up "END:VCARD", since it critically
    664                     // breaks this parser's state machine.
    665                     // Thus we roughly look over the next line and confirm it is at least not
    666                     // "END:VCARD". This extra fee is worth paying. This is exceptional
    667                     // anyway.
    668                     if (!TextUtils.isEmpty(nextLine) &&
    669                             nextLine.charAt(0) == ' ' &&
    670                             !"END:VCARD".contains(nextLine.toUpperCase())) {
    671                         getLine();  // Drop the next line.
    672 
    673                         if (builder == null) {
    674                             builder = new StringBuilder();
    675                             builder.append(propertyRawValue);
    676                         }
    677                         builder.append(nextLine.substring(1));
    678                     } else {
    679                         break;
    680                     }
    681                 }
    682                 if (builder != null) {
    683                     propertyRawValue = builder.toString();
    684                 }
    685             }
    686 
    687             ArrayList<String> propertyValueList = new ArrayList<String>();
    688             String value = VCardUtils.convertStringCharset(
    689                     maybeUnescapeText(propertyRawValue), sourceCharset, targetCharset);
    690             propertyValueList.add(value);
    691             property.setValues(propertyValueList);
    692             for (VCardInterpreter interpreter : mInterpreterList) {
    693                 interpreter.onPropertyCreated(property);
    694             }
    695         }
    696     }
    697 
    698     private void handleAdrOrgN(VCardProperty property, String propertyRawValue,
    699             String sourceCharset, String targetCharset) throws VCardException, IOException {
    700         List<String> encodedValueList = new ArrayList<String>();
    701 
    702         // vCard 2.1 does not allow QUOTED-PRINTABLE here, but some softwares/devices emit
    703         // such data.
    704         if (mCurrentEncoding.equals(VCardConstants.PARAM_ENCODING_QP)) {
    705             // First we retrieve Quoted-Printable String from vCard entry, which may include
    706             // multiple lines.
    707             final String quotedPrintablePart = getQuotedPrintablePart(propertyRawValue);
    708 
    709             // "Raw value" from the view of users should contain all part of QP string.
    710             // TODO: add test for this handling
    711             property.setRawValue(quotedPrintablePart);
    712 
    713             // We split Quoted-Printable String using semi-colon before decoding it, as
    714             // the Quoted-Printable may have semi-colon, which confuses splitter.
    715             final List<String> quotedPrintableValueList =
    716                     VCardUtils.constructListFromValue(quotedPrintablePart, getVersion());
    717             for (String quotedPrintableValue : quotedPrintableValueList) {
    718                 String encoded = VCardUtils.parseQuotedPrintable(quotedPrintableValue,
    719                         false, sourceCharset, targetCharset);
    720                 encodedValueList.add(encoded);
    721             }
    722         } else {
    723             final String propertyValue = getPotentialMultiline(propertyRawValue);
    724             final List<String> rawValueList =
    725                     VCardUtils.constructListFromValue(propertyValue, getVersion());
    726             for (String rawValue : rawValueList) {
    727                 encodedValueList.add(VCardUtils.convertStringCharset(
    728                         rawValue, sourceCharset, targetCharset));
    729             }
    730         }
    731 
    732         property.setValues(encodedValueList);
    733         for (VCardInterpreter interpreter : mInterpreterList) {
    734             interpreter.onPropertyCreated(property);
    735         }
    736     }
    737 
    738     /**
    739      * <p>
    740      * Parses and returns Quoted-Printable.
    741      * </p>
    742      *
    743      * @param firstString The string following a parameter name and attributes.
    744      *            Example: "string" in
    745      *            "ADR:ENCODING=QUOTED-PRINTABLE:string\n\r".
    746      * @return whole Quoted-Printable string, including a given argument and
    747      *         following lines. Excludes the last empty line following to Quoted
    748      *         Printable lines.
    749      * @throws IOException
    750      * @throws VCardException
    751      */
    752     private String getQuotedPrintablePart(String firstString)
    753             throws IOException, VCardException {
    754         // Specifically, there may be some padding between = and CRLF.
    755         // See the following:
    756         //
    757         // qp-line := *(qp-segment transport-padding CRLF)
    758         // qp-part transport-padding
    759         // qp-segment := qp-section *(SPACE / TAB) "="
    760         // ; Maximum length of 76 characters
    761         //
    762         // e.g. (from RFC 2045)
    763         // Now's the time =
    764         // for all folk to come=
    765         // to the aid of their country.
    766         if (firstString.trim().endsWith("=")) {
    767             // remove "transport-padding"
    768             int pos = firstString.length() - 1;
    769             while (firstString.charAt(pos) != '=') {
    770             }
    771             StringBuilder builder = new StringBuilder();
    772             builder.append(firstString.substring(0, pos + 1));
    773             builder.append("\r\n");
    774             String line;
    775             while (true) {
    776                 line = getLine();
    777                 if (line == null) {
    778                     throw new VCardException("File ended during parsing a Quoted-Printable String");
    779                 }
    780                 if (line.trim().endsWith("=")) {
    781                     // remove "transport-padding"
    782                     pos = line.length() - 1;
    783                     while (line.charAt(pos) != '=') {
    784                     }
    785                     builder.append(line.substring(0, pos + 1));
    786                     builder.append("\r\n");
    787                 } else {
    788                     builder.append(line);
    789                     break;
    790                 }
    791             }
    792             return builder.toString();
    793         } else {
    794             return firstString;
    795         }
    796     }
    797 
    798     /**
    799      * Given the first line of a property, checks consecutive lines after it and builds a new
    800      * multi-line value if it exists.
    801      *
    802      * @param firstString The first line of the property.
    803      * @return A new property, potentially built from multiple lines.
    804      * @throws IOException
    805      */
    806     private String getPotentialMultiline(String firstString) throws IOException {
    807         final StringBuilder builder = new StringBuilder();
    808         builder.append(firstString);
    809 
    810         while (true) {
    811             final String line = peekLine();
    812             if (line == null || line.length() == 0) {
    813                 break;
    814             }
    815 
    816             final String propertyName = getPropertyNameUpperCase(line);
    817             if (propertyName != null) {
    818                 break;
    819             }
    820 
    821             // vCard 2.1 does not allow multi-line of adr but microsoft vcards may have it.
    822             // We will consider the next line to be a part of a multi-line value if it does not
    823             // contain a property name (i.e. a colon or semi-colon).
    824             // Consume the line.
    825             getLine();
    826             builder.append(" ").append(line);
    827         }
    828 
    829         return builder.toString();
    830     }
    831 
    832     protected String getBase64(String firstString) throws IOException, VCardException {
    833         final StringBuilder builder = new StringBuilder();
    834         builder.append(firstString);
    835 
    836         while (true) {
    837             final String line = peekLine();
    838             if (line == null) {
    839                 throw new VCardException("File ended during parsing BASE64 binary");
    840             }
    841 
    842             // vCard 2.1 requires two spaces at the end of BASE64 strings, but some vCard doesn't
    843             // have them. We try to detect those cases using colon and semi-colon, given BASE64
    844             // does not contain it.
    845             // E.g.
    846             //      TEL;TYPE=WORK:+5555555
    847             // or
    848             //      END:VCARD
    849             String propertyName = getPropertyNameUpperCase(line);
    850             if (getKnownPropertyNameSet().contains(propertyName) ||
    851                     VCardConstants.PROPERTY_X_ANDROID_CUSTOM.equals(propertyName)) {
    852                 Log.w(LOG_TAG, "Found a next property during parsing a BASE64 string, " +
    853                         "which must not contain semi-colon or colon. Treat the line as next "
    854                         + "property.");
    855                 Log.w(LOG_TAG, "Problematic line: " + line.trim());
    856                 break;
    857             }
    858 
    859             // Consume the line.
    860             getLine();
    861 
    862             if (line.length() == 0) {
    863                 break;
    864             }
    865             // Trim off any extraneous whitespace to handle 2.1 implementations
    866             // that use 3.0 style line continuations. This is safe because space
    867             // isn't a Base64 encoding value.
    868             builder.append(line.trim());
    869         }
    870 
    871         return builder.toString();
    872     }
    873 
    874     /**
    875      * Extracts the property name portion of a given vCard line.
    876      * <p>
    877      * Properties must contain a colon.
    878      * <p>
    879      * E.g.
    880      *      TEL;TYPE=WORK:+5555555  // returns "TEL"
    881      *      END:VCARD // returns "END"
    882      *      TEL; // returns null
    883      *
    884      * @param line The vCard line.
    885      * @return The property name portion. {@literal null} if no property name found.
    886      */
    887     private String getPropertyNameUpperCase(String line) {
    888         final int colonIndex = line.indexOf(":");
    889         if (colonIndex > -1) {
    890             final int semiColonIndex = line.indexOf(";");
    891 
    892             // Find the minimum index that is greater than -1.
    893             final int minIndex;
    894             if (colonIndex == -1) {
    895                 minIndex = semiColonIndex;
    896             } else if (semiColonIndex == -1) {
    897                 minIndex = colonIndex;
    898             } else {
    899                 minIndex = Math.min(colonIndex, semiColonIndex);
    900             }
    901             return line.substring(0, minIndex).toUpperCase();
    902         }
    903         return null;
    904     }
    905 
    906     /*
    907      * vCard 2.1 specifies AGENT allows one vcard entry. Currently we emit an
    908      * error toward the AGENT property.
    909      * // TODO: Support AGENT property.
    910      * item =
    911      * ... / [groups "."] "AGENT" [params] ":" vcard CRLF vcard = "BEGIN" [ws]
    912      * ":" [ws] "VCARD" [ws] 1*CRLF items *CRLF "END" [ws] ":" [ws] "VCARD"
    913      */
    914     protected void handleAgent(final VCardProperty property) throws VCardException {
    915         if (!property.getRawValue().toUpperCase().contains("BEGIN:VCARD")) {
    916             // Apparently invalid line seen in Windows Mobile 6.5. Ignore them.
    917             for (VCardInterpreter interpreter : mInterpreterList) {
    918                 interpreter.onPropertyCreated(property);
    919             }
    920             return;
    921         } else {
    922             throw new VCardAgentNotSupportedException("AGENT Property is not supported now.");
    923         }
    924     }
    925 
    926     /**
    927      * For vCard 3.0.
    928      */
    929     protected String maybeUnescapeText(final String text) {
    930         return text;
    931     }
    932 
    933     /**
    934      * Returns unescaped String if the character should be unescaped. Return
    935      * null otherwise. e.g. In vCard 2.1, "\;" should be unescaped into ";"
    936      * while "\x" should not be.
    937      */
    938     protected String maybeUnescapeCharacter(final char ch) {
    939         return unescapeCharacter(ch);
    940     }
    941 
    942     /* package */ static String unescapeCharacter(final char ch) {
    943         // Original vCard 2.1 specification does not allow transformation
    944         // "\:" -> ":", "\," -> ",", and "\\" -> "\", but previous
    945         // implementation of
    946         // this class allowed them, so keep it as is.
    947         if (ch == '\\' || ch == ';' || ch == ':' || ch == ',') {
    948             return String.valueOf(ch);
    949         } else {
    950             return null;
    951         }
    952     }
    953 
    954     /**
    955      * @return {@link VCardConfig#VERSION_21}
    956      */
    957     protected int getVersion() {
    958         return VCardConfig.VERSION_21;
    959     }
    960 
    961     /**
    962      * @return {@link VCardConfig#VERSION_30}
    963      */
    964     protected String getVersionString() {
    965         return VCardConstants.VERSION_V21;
    966     }
    967 
    968     protected Set<String> getKnownPropertyNameSet() {
    969         return VCardParser_V21.sKnownPropertyNameSet;
    970     }
    971 
    972     protected Set<String> getKnownTypeSet() {
    973         return VCardParser_V21.sKnownTypeSet;
    974     }
    975 
    976     protected Set<String> getKnownValueSet() {
    977         return VCardParser_V21.sKnownValueSet;
    978     }
    979 
    980     protected Set<String> getAvailableEncodingSet() {
    981         return VCardParser_V21.sAvailableEncoding;
    982     }
    983 
    984     protected String getDefaultEncoding() {
    985         return DEFAULT_ENCODING;
    986     }
    987 
    988     protected String getDefaultCharset() {
    989         return DEFAULT_CHARSET;
    990     }
    991 
    992     protected String getCurrentCharset() {
    993         return mCurrentCharset;
    994     }
    995 
    996     public void addInterpreter(VCardInterpreter interpreter) {
    997         mInterpreterList.add(interpreter);
    998     }
    999 
   1000     public void parse(InputStream is) throws IOException, VCardException {
   1001         if (is == null) {
   1002             throw new NullPointerException("InputStream must not be null.");
   1003         }
   1004 
   1005         final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset);
   1006         mReader = new CustomBufferedReader(tmpReader);
   1007 
   1008         final long start = System.currentTimeMillis();
   1009         for (VCardInterpreter interpreter : mInterpreterList) {
   1010             interpreter.onVCardStarted();
   1011         }
   1012 
   1013         // vcard_file = [wsls] vcard [wsls]
   1014         while (true) {
   1015             synchronized (this) {
   1016                 if (mCanceled) {
   1017                     Log.i(LOG_TAG, "Cancel request has come. exitting parse operation.");
   1018                     break;
   1019                 }
   1020             }
   1021             if (!parseOneVCard()) {
   1022                 break;
   1023             }
   1024         }
   1025 
   1026         for (VCardInterpreter interpreter : mInterpreterList) {
   1027             interpreter.onVCardEnded();
   1028         }
   1029     }
   1030 
   1031     public void parseOne(InputStream is) throws IOException, VCardException {
   1032         if (is == null) {
   1033             throw new NullPointerException("InputStream must not be null.");
   1034         }
   1035 
   1036         final InputStreamReader tmpReader = new InputStreamReader(is, mIntermediateCharset);
   1037         mReader = new CustomBufferedReader(tmpReader);
   1038 
   1039         final long start = System.currentTimeMillis();
   1040         for (VCardInterpreter interpreter : mInterpreterList) {
   1041             interpreter.onVCardStarted();
   1042         }
   1043         parseOneVCard();
   1044         for (VCardInterpreter interpreter : mInterpreterList) {
   1045             interpreter.onVCardEnded();
   1046         }
   1047     }
   1048 
   1049     public final synchronized void cancel() {
   1050         Log.i(LOG_TAG, "ParserImpl received cancel operation.");
   1051         mCanceled = true;
   1052     }
   1053 }
   1054