Home | History | Annotate | Download | only in net
      1 /*
      2  * Copyright 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.example.android.basicsyncadapter.net;
     18 
     19 import android.text.format.Time;
     20 import android.util.Xml;
     21 
     22 import org.xmlpull.v1.XmlPullParser;
     23 import org.xmlpull.v1.XmlPullParserException;
     24 
     25 import java.io.IOException;
     26 import java.io.InputStream;
     27 import java.text.ParseException;
     28 import java.util.ArrayList;
     29 import java.util.List;
     30 
     31 /**
     32  * This class parses generic Atom feeds.
     33  *
     34  * <p>Given an InputStream representation of a feed, it returns a List of entries,
     35  * where each list element represents a single entry (post) in the XML feed.
     36  *
     37  * <p>An example of an Atom feed can be found at:
     38  * http://en.wikipedia.org/w/index.php?title=Atom_(standard)&oldid=560239173#Example_of_an_Atom_1.0_feed
     39  */
     40 public class FeedParser {
     41 
     42     // Constants indicting XML element names that we're interested in
     43     private static final int TAG_ID = 1;
     44     private static final int TAG_TITLE = 2;
     45     private static final int TAG_PUBLISHED = 3;
     46     private static final int TAG_LINK = 4;
     47 
     48     // We don't use XML namespaces
     49     private static final String ns = null;
     50 
     51     /** Parse an Atom feed, returning a collection of Entry objects.
     52      *
     53      * @param in Atom feed, as a stream.
     54      * @return List of {@link com.example.android.basicsyncadapter.net.FeedParser.Entry} objects.
     55      * @throws org.xmlpull.v1.XmlPullParserException on error parsing feed.
     56      * @throws java.io.IOException on I/O error.
     57      */
     58     public List<Entry> parse(InputStream in)
     59             throws XmlPullParserException, IOException, ParseException {
     60         try {
     61             XmlPullParser parser = Xml.newPullParser();
     62             parser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, false);
     63             parser.setInput(in, null);
     64             parser.nextTag();
     65             return readFeed(parser);
     66         } finally {
     67             in.close();
     68         }
     69     }
     70 
     71     /**
     72      * Decode a feed attached to an XmlPullParser.
     73      *
     74      * @param parser Incoming XMl
     75      * @return List of {@link com.example.android.basicsyncadapter.net.FeedParser.Entry} objects.
     76      * @throws org.xmlpull.v1.XmlPullParserException on error parsing feed.
     77      * @throws java.io.IOException on I/O error.
     78      */
     79     private List<Entry> readFeed(XmlPullParser parser)
     80             throws XmlPullParserException, IOException, ParseException {
     81         List<Entry> entries = new ArrayList<Entry>();
     82 
     83         // Search for <feed> tags. These wrap the beginning/end of an Atom document.
     84         //
     85         // Example:
     86         // <?xml version="1.0" encoding="utf-8"?>
     87         // <feed xmlns="http://www.w3.org/2005/Atom">
     88         // ...
     89         // </feed>
     90         parser.require(XmlPullParser.START_TAG, ns, "feed");
     91         while (parser.next() != XmlPullParser.END_TAG) {
     92             if (parser.getEventType() != XmlPullParser.START_TAG) {
     93                 continue;
     94             }
     95             String name = parser.getName();
     96             // Starts by looking for the <entry> tag. This tag repeates inside of <feed> for each
     97             // article in the feed.
     98             //
     99             // Example:
    100             // <entry>
    101             //   <title>Article title</title>
    102             //   <link rel="alternate" type="text/html" href="http://example.com/article/1234"/>
    103             //   <link rel="edit" href="http://example.com/admin/article/1234"/>
    104             //   <id>urn:uuid:218AC159-7F68-4CC6-873F-22AE6017390D</id>
    105             //   <published>2003-06-27T12:00:00Z</published>
    106             //   <updated>2003-06-28T12:00:00Z</updated>
    107             //   <summary>Article summary goes here.</summary>
    108             //   <author>
    109             //     <name>Rick Deckard</name>
    110             //     <email>deckard (at) example.com</email>
    111             //   </author>
    112             // </entry>
    113             if (name.equals("entry")) {
    114                 entries.add(readEntry(parser));
    115             } else {
    116                 skip(parser);
    117             }
    118         }
    119         return entries;
    120     }
    121 
    122     /**
    123      * Parses the contents of an entry. If it encounters a title, summary, or link tag, hands them
    124      * off to their respective "read" methods for processing. Otherwise, skips the tag.
    125      */
    126     private Entry readEntry(XmlPullParser parser)
    127             throws XmlPullParserException, IOException, ParseException {
    128         parser.require(XmlPullParser.START_TAG, ns, "entry");
    129         String id = null;
    130         String title = null;
    131         String link = null;
    132         long publishedOn = 0;
    133 
    134         while (parser.next() != XmlPullParser.END_TAG) {
    135             if (parser.getEventType() != XmlPullParser.START_TAG) {
    136                 continue;
    137             }
    138             String name = parser.getName();
    139             if (name.equals("id")){
    140                 // Example: <id>urn:uuid:218AC159-7F68-4CC6-873F-22AE6017390D</id>
    141                 id = readTag(parser, TAG_ID);
    142             } else if (name.equals("title")) {
    143                 // Example: <title>Article title</title>
    144                 title = readTag(parser, TAG_TITLE);
    145             } else if (name.equals("link")) {
    146                 // Example: <link rel="alternate" type="text/html" href="http://example.com/article/1234"/>
    147                 //
    148                 // Multiple link types can be included. readAlternateLink() will only return
    149                 // non-null when reading an "alternate"-type link. Ignore other responses.
    150                 String tempLink = readTag(parser, TAG_LINK);
    151                 if (tempLink != null) {
    152                     link = tempLink;
    153                 }
    154             } else if (name.equals("published")) {
    155                 // Example: <published>2003-06-27T12:00:00Z</published>
    156                 Time t = new Time();
    157                 t.parse3339(readTag(parser, TAG_PUBLISHED));
    158                 publishedOn = t.toMillis(false);
    159             } else {
    160                 skip(parser);
    161             }
    162         }
    163         return new Entry(id, title, link, publishedOn);
    164     }
    165 
    166     /**
    167      * Process an incoming tag and read the selected value from it.
    168      */
    169     private String readTag(XmlPullParser parser, int tagType)
    170             throws IOException, XmlPullParserException {
    171         String tag = null;
    172         String endTag = null;
    173 
    174         switch (tagType) {
    175             case TAG_ID:
    176                 return readBasicTag(parser, "id");
    177             case TAG_TITLE:
    178                 return readBasicTag(parser, "title");
    179             case TAG_PUBLISHED:
    180                 return readBasicTag(parser, "published");
    181             case TAG_LINK:
    182                 return readAlternateLink(parser);
    183             default:
    184                 throw new IllegalArgumentException("Unknown tag type: " + tagType);
    185         }
    186     }
    187 
    188     /**
    189      * Reads the body of a basic XML tag, which is guaranteed not to contain any nested elements.
    190      *
    191      * <p>You probably want to call readTag().
    192      *
    193      * @param parser Current parser object
    194      * @param tag XML element tag name to parse
    195      * @return Body of the specified tag
    196      * @throws java.io.IOException
    197      * @throws org.xmlpull.v1.XmlPullParserException
    198      */
    199     private String readBasicTag(XmlPullParser parser, String tag)
    200             throws IOException, XmlPullParserException {
    201         parser.require(XmlPullParser.START_TAG, ns, tag);
    202         String result = readText(parser);
    203         parser.require(XmlPullParser.END_TAG, ns, tag);
    204         return result;
    205     }
    206 
    207     /**
    208      * Processes link tags in the feed.
    209      */
    210     private String readAlternateLink(XmlPullParser parser)
    211             throws IOException, XmlPullParserException {
    212         String link = null;
    213         parser.require(XmlPullParser.START_TAG, ns, "link");
    214         String tag = parser.getName();
    215         String relType = parser.getAttributeValue(null, "rel");
    216         if (relType.equals("alternate")) {
    217             link = parser.getAttributeValue(null, "href");
    218         }
    219         while (true) {
    220             if (parser.nextTag() == XmlPullParser.END_TAG) break;
    221             // Intentionally break; consumes any remaining sub-tags.
    222         }
    223         return link;
    224     }
    225 
    226     /**
    227      * For the tags title and summary, extracts their text values.
    228      */
    229     private String readText(XmlPullParser parser) throws IOException, XmlPullParserException {
    230         String result = null;
    231         if (parser.next() == XmlPullParser.TEXT) {
    232             result = parser.getText();
    233             parser.nextTag();
    234         }
    235         return result;
    236     }
    237 
    238     /**
    239      * Skips tags the parser isn't interested in. Uses depth to handle nested tags. i.e.,
    240      * if the next tag after a START_TAG isn't a matching END_TAG, it keeps going until it
    241      * finds the matching END_TAG (as indicated by the value of "depth" being 0).
    242      */
    243     private void skip(XmlPullParser parser) throws XmlPullParserException, IOException {
    244         if (parser.getEventType() != XmlPullParser.START_TAG) {
    245             throw new IllegalStateException();
    246         }
    247         int depth = 1;
    248         while (depth != 0) {
    249             switch (parser.next()) {
    250                 case XmlPullParser.END_TAG:
    251                     depth--;
    252                     break;
    253                 case XmlPullParser.START_TAG:
    254                     depth++;
    255                     break;
    256             }
    257         }
    258     }
    259 
    260     /**
    261      * This class represents a single entry (post) in the XML feed.
    262      *
    263      * <p>It includes the data members "title," "link," and "summary."
    264      */
    265     public static class Entry {
    266         public final String id;
    267         public final String title;
    268         public final String link;
    269         public final long published;
    270 
    271         Entry(String id, String title, String link, long published) {
    272             this.id = id;
    273             this.title = title;
    274             this.link = link;
    275             this.published = published;
    276         }
    277     }
    278 }
    279