Home | History | Annotate | Download | only in v1
      1 /* -*-             c-basic-offset: 4; indent-tabs-mode: nil; -*-  //------100-columns-wide------>|*/
      2 // for license please see accompanying LICENSE.txt file (available also at http://www.xmlpull.org/)
      3 
      4 package org.xmlpull.v1;
      5 
      6 import java.io.InputStream;
      7 import java.io.IOException;
      8 import java.io.Reader;
      9 
     10 /**
     11  * XML Pull Parser is an interface that defines parsing functionality provided
     12  * in <a href="http://www.xmlpull.org/">XMLPULL V1 API</a> (visit this website to
     13  * learn more about API and its implementations).
     14  *
     15  * <p>There are following different
     16  * kinds of parser depending on which features are set:<ul>
     17  * <li><b>non-validating</b> parser as defined in XML 1.0 spec when
     18  *   FEATURE_PROCESS_DOCDECL is set to true
     19  * <li><b>validating parser</b> as defined in XML 1.0 spec when
     20  *   FEATURE_VALIDATION is true (and that implies that FEATURE_PROCESS_DOCDECL is true)
     21  * <li>when FEATURE_PROCESS_DOCDECL is false (this is default and
     22  *   if different value is required necessary must be changed before parsing is started)
     23  *   then parser behaves like XML 1.0 compliant non-validating parser under condition that
     24  *  <em>no DOCDECL is present</em> in XML documents
     25  *   (internal entites can still be defined with defineEntityReplacementText()).
     26  *   This mode of operation is intended <b>for operation in constrained environments</b> such as J2ME.
     27  * </ul>
     28  *
     29  *
     30  * <p>There are two key methods: next() and nextToken(). While next() provides
     31  * access to high level parsing events, nextToken() allows access to lower
     32  * level tokens.
     33  *
     34  * <p>The current event state of the parser
     35  * can be determined by calling the
     36  * <a href="#getEventType()">getEventType()</a> method.
     37  * Initially, the parser is in the <a href="#START_DOCUMENT">START_DOCUMENT</a>
     38  * state.
     39  *
     40  * <p>The method <a href="#next()">next()</a> advances the parser to the
     41  * next event. The int value returned from next determines the current parser
     42  * state and is identical to the value returned from following calls to
     43  * getEventType ().
     44  *
     45  * <p>Th following event types are seen by next()<dl>
     46  * <dt><a href="#START_TAG">START_TAG</a><dd> An XML start tag was read.
     47  * <dt><a href="#TEXT">TEXT</a><dd> Text content was read;
     48  * the text content can be retrieved using the getText() method.
     49  *  (when in validating mode next() will not report ignorable whitespace, use nextToken() instead)
     50  * <dt><a href="#END_TAG">END_TAG</a><dd> An end tag was read
     51  * <dt><a href="#END_DOCUMENT">END_DOCUMENT</a><dd> No more events are available
     52  * </dl>
     53  *
     54  * <p>after first next() or nextToken() (or any other next*() method)
     55  * is called user application can obtain
     56  * XML version, standalone and encoding from XML declaration
     57  * in following ways:<ul>
     58  * <li><b>version</b>:
     59  *  getProperty(&quot;<a href="http://xmlpull.org/v1/doc/properties.html#xmldecl-version">http://xmlpull.org/v1/doc/properties.html#xmldecl-version</a>&quot;)
     60  *       returns String ("1.0") or null if XMLDecl was not read or if property is not supported
     61  * <li><b>standalone</b>:
     62  *  getProperty(&quot;<a href="http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone">http://xmlpull.org/v1/doc/properties.html#xmldecl-standalone</a>&quot;)
     63  *       returns Boolean: null if there was no standalone declaration
     64  *  or if property is not supported
     65  *         otherwise returns Boolean(true) if standalone="yes" and Boolean(false) when standalone="no"
     66  * <li><b>encoding</b>: obtained from getInputEncoding()
     67  *       null if stream had unknown encoding (not set in setInputStream)
     68  *           and it was not declared in XMLDecl
     69  * </ul>
     70  *
     71  * A minimal example for using this API may look as follows:
     72  * <pre>
     73  * import java.io.IOException;
     74  * import java.io.StringReader;
     75  *
     76  * import org.xmlpull.v1.XmlPullParser;
     77  * import org.xmlpull.v1.<a href="XmlPullParserException.html">XmlPullParserException</a>;
     78  * import org.xmlpull.v1.<a href="XmlPullParserFactory.html">XmlPullParserFactory</a>;
     79  *
     80  * public class SimpleXmlPullApp
     81  * {
     82  *
     83  *     public static void main (String args[])
     84  *         throws XmlPullParserException, IOException
     85  *     {
     86  *         XmlPullParserFactory factory = XmlPullParserFactory.newInstance();
     87  *         factory.setNamespaceAware(true);
     88  *         XmlPullParser xpp = factory.newPullParser();
     89  *
     90  *         xpp.<a href="#setInput">setInput</a>( new StringReader ( "&lt;foo>Hello World!&lt;/foo>" ) );
     91  *         int eventType = xpp.getEventType();
     92  *         while (eventType != XmlPullParser.END_DOCUMENT) {
     93  *          if(eventType == XmlPullParser.START_DOCUMENT) {
     94  *              System.out.println("Start document");
     95  *          } else if(eventType == XmlPullParser.START_TAG) {
     96  *              System.out.println("Start tag "+xpp.<a href="#getName()">getName()</a>);
     97  *          } else if(eventType == XmlPullParser.END_TAG) {
     98  *              System.out.println("End tag "+xpp.getName());
     99  *          } else if(eventType == XmlPullParser.TEXT) {
    100  *              System.out.println("Text "+xpp.<a href="#getText()">getText()</a>);
    101  *          }
    102  *          eventType = xpp.next();
    103  *         }
    104  *         System.out.println("End document");
    105  *     }
    106  * }
    107  * </pre>
    108  *
    109  * <p>The above example will generate the following output:
    110  * <pre>
    111  * Start document
    112  * Start tag foo
    113  * Text Hello World!
    114  * End tag foo
    115  * End document
    116  * </pre>
    117  *
    118  * <p>For more details on API usage, please refer to the
    119  * quick Introduction available at <a href="http://www.xmlpull.org">http://www.xmlpull.org</a>
    120  *
    121  * @see XmlPullParserFactory
    122  * @see #defineEntityReplacementText
    123  * @see #getName
    124  * @see #getNamespace
    125  * @see #getText
    126  * @see #next
    127  * @see #nextToken
    128  * @see #setInput
    129  * @see #FEATURE_PROCESS_DOCDECL
    130  * @see #FEATURE_VALIDATION
    131  * @see #START_DOCUMENT
    132  * @see #START_TAG
    133  * @see #TEXT
    134  * @see #END_TAG
    135  * @see #END_DOCUMENT
    136  *
    137  * @author <a href="http://www-ai.cs.uni-dortmund.de/PERSONAL/haustein.html">Stefan Haustein</a>
    138  * @author <a href="http://www.extreme.indiana.edu/~aslom/">Aleksander Slominski</a>
    139  */
    140 
    141 public interface XmlPullParser {
    142 
    143     /** This constant represents the default namespace (empty string "") */
    144     String NO_NAMESPACE = "";
    145 
    146     // ----------------------------------------------------------------------------
    147     // EVENT TYPES as reported by next()
    148 
    149     /**
    150      * Signalize that parser is at the very beginning of the document
    151      * and nothing was read yet.
    152      * This event type can only be observed by calling getEvent()
    153      * before the first call to next(), nextToken, or nextTag()</a>).
    154      *
    155      * @see #next
    156      * @see #nextToken
    157      */
    158     int START_DOCUMENT = 0;
    159 
    160     /**
    161      * Logical end of the xml document. Returned from getEventType, next()
    162      * and nextToken()
    163      * when the end of the input document has been reached.
    164      * <p><strong>NOTE:</strong> subsequent calls to
    165      * <a href="#next()">next()</a> or <a href="#nextToken()">nextToken()</a>
    166      * may result in exception being thrown.
    167      *
    168      * @see #next
    169      * @see #nextToken
    170      */
    171     int END_DOCUMENT = 1;
    172 
    173     /**
    174      * Returned from getEventType(),
    175      * <a href="#next()">next()</a>, <a href="#nextToken()">nextToken()</a> when
    176      * a start tag was read.
    177      * The name of start tag is available from getName(), its namespace and prefix are
    178      * available from getNamespace() and getPrefix()
    179      * if <a href='#FEATURE_PROCESS_NAMESPACES'>namespaces are enabled</a>.
    180      * See getAttribute* methods to retrieve element attributes.
    181      * See getNamespace* methods to retrieve newly declared namespaces.
    182      *
    183      * @see #next
    184      * @see #nextToken
    185      * @see #getName
    186      * @see #getPrefix
    187      * @see #getNamespace
    188      * @see #getAttributeCount
    189      * @see #getDepth
    190      * @see #getNamespaceCount
    191      * @see #getNamespace
    192      * @see #FEATURE_PROCESS_NAMESPACES
    193      */
    194     int START_TAG = 2;
    195 
    196     /**
    197      * Returned from getEventType(), <a href="#next()">next()</a>, or
    198      * <a href="#nextToken()">nextToken()</a> when an end tag was read.
    199      * The name of start tag is available from getName(), its
    200      * namespace and prefix are
    201      * available from getNamespace() and getPrefix().
    202      *
    203      * @see #next
    204      * @see #nextToken
    205      * @see #getName
    206      * @see #getPrefix
    207      * @see #getNamespace
    208      * @see #FEATURE_PROCESS_NAMESPACES
    209      */
    210     int END_TAG = 3;
    211 
    212 
    213     /**
    214      * Character data was read and will is available by calling getText().
    215      * <p><strong>Please note:</strong> <a href="#next()">next()</a> will
    216      * accumulate multiple
    217      * events into one TEXT event, skipping IGNORABLE_WHITESPACE,
    218      * PROCESSING_INSTRUCTION and COMMENT events,
    219      * In contrast, <a href="#nextToken()">nextToken()</a> will stop reading
    220      * text when any other event is observed.
    221      * Also, when the state was reached by calling next(), the text value will
    222      * be normalized, whereas getText() will
    223      * return unnormalized content in the case of nextToken(). This allows
    224      * an exact roundtrip without changing line ends when examining low
    225      * level events, whereas for high level applications the text is
    226      * normalized appropriately.
    227      *
    228      * @see #next
    229      * @see #nextToken
    230      * @see #getText
    231      */
    232     int TEXT = 4;
    233 
    234     // ----------------------------------------------------------------------------
    235     // additional events exposed by lower level nextToken()
    236 
    237     /**
    238      * A CDATA sections was just read;
    239      * this token is available only from calls to <a href="#nextToken()">nextToken()</a>.
    240      * A call to next() will accumulate various text events into a single event
    241      * of type TEXT. The text contained in the CDATA section is available
    242      * by calling getText().
    243      *
    244      * @see #nextToken
    245      * @see #getText
    246      */
    247     int CDSECT = 5;
    248 
    249     /**
    250      * An entity reference was just read;
    251      * this token is available from <a href="#nextToken()">nextToken()</a>
    252      * only. The entity name is available by calling getName(). If available,
    253      * the replacement text can be obtained by calling getText(); otherwise,
    254      * the user is responsible for resolving the entity reference.
    255      * This event type is never returned from next(); next() will
    256      * accumulate the replacement text and other text
    257      * events to a single TEXT event.
    258      *
    259      * @see #nextToken
    260      * @see #getText
    261      */
    262     int ENTITY_REF = 6;
    263 
    264     /**
    265      * Ignorable whitespace was just read.
    266      * This token is available only from <a href="#nextToken()">nextToken()</a>).
    267      * For non-validating
    268      * parsers, this event is only reported by nextToken() when outside
    269      * the root element.
    270      * Validating parsers may be able to detect ignorable whitespace at
    271      * other locations.
    272      * The ignorable whitespace string is available by calling getText()
    273      *
    274      * <p><strong>NOTE:</strong> this is different from calling the
    275      *  isWhitespace() method, since text content
    276      *  may be whitespace but not ignorable.
    277      *
    278      * Ignorable whitespace is skipped by next() automatically; this event
    279      * type is never returned from next().
    280      *
    281      * @see #nextToken
    282      * @see #getText
    283      */
    284     int IGNORABLE_WHITESPACE = 7;
    285 
    286     /**
    287      * An XML processing instruction declaration was just read. This
    288      * event type is available only via <a href="#nextToken()">nextToken()</a>.
    289      * getText() will return text that is inside the processing instruction.
    290      * Calls to next() will skip processing instructions automatically.
    291      * @see #nextToken
    292      * @see #getText
    293      */
    294     int PROCESSING_INSTRUCTION = 8;
    295 
    296     /**
    297      * An XML comment was just read. This event type is this token is
    298      * available via <a href="#nextToken()">nextToken()</a> only;
    299      * calls to next() will skip comments automatically.
    300      * The content of the comment can be accessed using the getText()
    301      * method.
    302      *
    303      * @see #nextToken
    304      * @see #getText
    305      */
    306     int COMMENT = 9;
    307 
    308     /**
    309      * An XML document type declaration was just read. This token is
    310      * available from <a href="#nextToken()">nextToken()</a> only.
    311      * The unparsed text inside the doctype is available via
    312      * the getText() method.
    313      *
    314      * @see #nextToken
    315      * @see #getText
    316      */
    317     int DOCDECL = 10;
    318 
    319     /**
    320      * This array can be used to convert the event type integer constants
    321      * such as START_TAG or TEXT to
    322      * to a string. For example, the value of TYPES[START_TAG] is
    323      * the string "START_TAG".
    324      *
    325      * This array is intended for diagnostic output only. Relying
    326      * on the contents of the array may be dangerous since malicious
    327      * applications may alter the array, although it is final, due
    328      * to limitations of the Java language.
    329      */
    330     String [] TYPES = {
    331         "START_DOCUMENT",
    332             "END_DOCUMENT",
    333             "START_TAG",
    334             "END_TAG",
    335             "TEXT",
    336             "CDSECT",
    337             "ENTITY_REF",
    338             "IGNORABLE_WHITESPACE",
    339             "PROCESSING_INSTRUCTION",
    340             "COMMENT",
    341             "DOCDECL"
    342     };
    343 
    344 
    345     // ----------------------------------------------------------------------------
    346     // namespace related features
    347 
    348     /**
    349      * This feature determines whether the parser processes
    350      * namespaces. As for all features, the default value is false.
    351      * <p><strong>NOTE:</strong> The value can not be changed during
    352      * parsing an must be set before parsing.
    353      *
    354      * @see #getFeature
    355      * @see #setFeature
    356      */
    357     String FEATURE_PROCESS_NAMESPACES =
    358         "http://xmlpull.org/v1/doc/features.html#process-namespaces";
    359 
    360     /**
    361      * This feature determines whether namespace attributes are
    362      * exposed via the attribute access methods. Like all features,
    363      * the default value is false. This feature cannot be changed
    364      * during parsing.
    365      *
    366      * @see #getFeature
    367      * @see #setFeature
    368      */
    369     String FEATURE_REPORT_NAMESPACE_ATTRIBUTES =
    370         "http://xmlpull.org/v1/doc/features.html#report-namespace-prefixes";
    371 
    372     /**
    373      * This feature determines whether the document declaration
    374      * is processed. If set to false,
    375      * the DOCDECL event type is reported by nextToken()
    376      * and ignored by next().
    377      *
    378      * If this feature is activated, then the document declaration
    379      * must be processed by the parser.
    380      *
    381      * <p><strong>Please note:</strong> If the document type declaration
    382      * was ignored, entity references may cause exceptions
    383      * later in the parsing process.
    384      * The default value of this feature is false. It cannot be changed
    385      * during parsing.
    386      *
    387      * @see #getFeature
    388      * @see #setFeature
    389      */
    390     String FEATURE_PROCESS_DOCDECL =
    391         "http://xmlpull.org/v1/doc/features.html#process-docdecl";
    392 
    393     /**
    394      * If this feature is activated, all validation errors as
    395      * defined in the XML 1.0 specification are reported.
    396      * This implies that FEATURE_PROCESS_DOCDECL is true and both, the
    397      * internal and external document type declaration will be processed.
    398      * <p><strong>Please Note:</strong> This feature can not be changed
    399      * during parsing. The default value is false.
    400      *
    401      * @see #getFeature
    402      * @see #setFeature
    403      */
    404     String FEATURE_VALIDATION =
    405         "http://xmlpull.org/v1/doc/features.html#validation";
    406 
    407     /**
    408      * Use this call to change the general behaviour of the parser,
    409      * such as namespace processing or doctype declaration handling.
    410      * This method must be called before the first call to next or
    411      * nextToken. Otherwise, an exception is thrown.
    412      * <p>Example: call setFeature(FEATURE_PROCESS_NAMESPACES, true) in order
    413      * to switch on namespace processing. The initial settings correspond
    414      * to the properties requested from the XML Pull Parser factory.
    415      * If none were requested, all features are deactivated by default.
    416      *
    417      * @exception XmlPullParserException If the feature is not supported or can not be set
    418      * @exception IllegalArgumentException If string with the feature name is null
    419      */
    420     void setFeature(String name,
    421                            boolean state) throws XmlPullParserException;
    422 
    423     /**
    424      * Returns the current value of the given feature.
    425      * <p><strong>Please note:</strong> unknown features are
    426      * <strong>always</strong> returned as false.
    427      *
    428      * @param name The name of feature to be retrieved.
    429      * @return The value of the feature.
    430      * @exception IllegalArgumentException if string the feature name is null
    431      */
    432 
    433     boolean getFeature(String name);
    434 
    435     /**
    436      * Set the value of a property.
    437      *
    438      * The property name is any fully-qualified URI.
    439      *
    440      * @exception XmlPullParserException If the property is not supported or can not be set
    441      * @exception IllegalArgumentException If string with the property name is null
    442      */
    443     void setProperty(String name,
    444                             Object value) throws XmlPullParserException;
    445 
    446     /**
    447      * Look up the value of a property.
    448      *
    449      * The property name is any fully-qualified URI.
    450      * <p><strong>NOTE:</strong> unknown properties are <strong>always</strong>
    451      * returned as null.
    452      *
    453      * @param name The name of property to be retrieved.
    454      * @return The value of named property.
    455      */
    456     Object getProperty(String name);
    457 
    458 
    459     /**
    460      * Set the input source for parser to the given reader and
    461      * resets the parser. The event type is set to the initial value
    462      * START_DOCUMENT.
    463      * Setting the reader to null will just stop parsing and
    464      * reset parser state,
    465      * allowing the parser to free internal resources
    466      * such as parsing buffers.
    467      */
    468     void setInput(Reader in) throws XmlPullParserException;
    469 
    470 
    471     /**
    472      * Sets the input stream the parser is going to process.
    473      * This call resets the parser state and sets the event type
    474      * to the initial value START_DOCUMENT.
    475      *
    476      * <p><strong>NOTE:</strong> If an input encoding string is passed,
    477      *  it MUST be used. Otherwise,
    478      *  if inputEncoding is null, the parser SHOULD try to determine
    479      *  input encoding following XML 1.0 specification (see below).
    480      *  If encoding detection is supported then following feature
    481      *  <a href="http://xmlpull.org/v1/doc/features.html#detect-encoding">http://xmlpull.org/v1/doc/features.html#detect-encoding</a>
    482      *  MUST be true amd otherwise it must be false
    483      *
    484      * @param inputStream contains a raw byte input stream of possibly
    485      *     unknown encoding (when inputEncoding is null).
    486      *
    487      * @param inputEncoding if not null it MUST be used as encoding for inputStream
    488      */
    489     void setInput(InputStream inputStream, String inputEncoding)
    490         throws XmlPullParserException;
    491 
    492     /**
    493      * Returns the input encoding if known, null otherwise.
    494      * If setInput(InputStream, inputEncoding) was called with an inputEncoding
    495      * value other than null, this value must be returned
    496      * from this method. Otherwise, if inputEncoding is null and
    497      * the parser supports the encoding detection feature
    498      * (http://xmlpull.org/v1/doc/features.html#detect-encoding),
    499      * it must return the detected encoding.
    500      * If setInput(Reader) was called, null is returned.
    501      * After first call to next if XML declaration was present this method
    502      * will return encoding declared.
    503      */
    504     String getInputEncoding();
    505 
    506     /**
    507      * Set new value for entity replacement text as defined in
    508      * <a href="http://www.w3.org/TR/REC-xml#intern-replacement">XML 1.0 Section 4.5
    509      * Construction of Internal Entity Replacement Text</a>.
    510      * If FEATURE_PROCESS_DOCDECL or FEATURE_VALIDATION are set, calling this
    511      * function will result in an exception -- when processing of DOCDECL is
    512      * enabled, there is no need to the entity replacement text manually.
    513      *
    514      * <p>The motivation for this function is to allow very small
    515      * implementations of XMLPULL that will work in J2ME environments.
    516      * Though these implementations may not be able to process the document type
    517      * declaration, they still can work with known DTDs by using this function.
    518      *
    519      * <p><b>Please notes:</b> The given value is used literally as replacement text
    520      * and it corresponds to declaring entity in DTD that has all special characters
    521      * escaped: left angle bracket is replaced with &amp;lt;, ampersand with &amp;amp;
    522      * and so on.
    523      *
    524      * <p><b>Note:</b> The given value is the literal replacement text and must not
    525      * contain any other entity reference (if it contains any entity reference
    526      * there will be no further replacement).
    527      *
    528      * <p><b>Note:</b> The list of pre-defined entity names will
    529      * always contain standard XML entities such as
    530      * amp (&amp;amp;), lt (&amp;lt;), gt (&amp;gt;), quot (&amp;quot;), and apos (&amp;apos;).
    531      * Those cannot be redefined by this method!
    532      *
    533      * @see #setInput
    534      * @see #FEATURE_PROCESS_DOCDECL
    535      * @see #FEATURE_VALIDATION
    536      */
    537     void defineEntityReplacementText( String entityName,
    538                                             String replacementText ) throws XmlPullParserException;
    539 
    540     /**
    541      * Returns the numbers of elements in the namespace stack for the given
    542      * depth.
    543      * If namespaces are not enabled, 0 is returned.
    544      *
    545      * <p><b>NOTE:</b> when parser is on END_TAG then it is allowed to call
    546      *  this function with getDepth()+1 argument to retrieve position of namespace
    547      *  prefixes and URIs that were declared on corresponding START_TAG.
    548      * <p><b>NOTE:</b> to retrieve list of namespaces declared in current element:<pre>
    549      *       XmlPullParser pp = ...
    550      *       int nsStart = pp.getNamespaceCount(pp.getDepth()-1);
    551      *       int nsEnd = pp.getNamespaceCount(pp.getDepth());
    552      *       for (int i = nsStart; i < nsEnd; i++) {
    553      *          String prefix = pp.getNamespacePrefix(i);
    554      *          String ns = pp.getNamespaceUri(i);
    555      *           // ...
    556      *      }
    557      * </pre>
    558      *
    559      * @see #getNamespacePrefix
    560      * @see #getNamespaceUri
    561      * @see #getNamespace()
    562      * @see #getNamespace(String)
    563      */
    564     int getNamespaceCount(int depth) throws XmlPullParserException;
    565 
    566     /**
    567      * Returns the namespace prefix for the given position
    568      * in the namespace stack.
    569      * Default namespace declaration (xmlns='...') will have null as prefix.
    570      * If the given index is out of range, an exception is thrown.
    571      * <p><b>Please note:</b> when the parser is on an END_TAG,
    572      * namespace prefixes that were declared
    573      * in the corresponding START_TAG are still accessible
    574      * although they are no longer in scope.
    575      */
    576     String getNamespacePrefix(int pos) throws XmlPullParserException;
    577 
    578     /**
    579      * Returns the namespace URI for the given position in the
    580      * namespace stack
    581      * If the position is out of range, an exception is thrown.
    582      * <p><b>NOTE:</b> when parser is on END_TAG then namespace prefixes that were declared
    583      *  in corresponding START_TAG are still accessible even though they are not in scope
    584      */
    585     String getNamespaceUri(int pos) throws XmlPullParserException;
    586 
    587     /**
    588      * Returns the URI corresponding to the given prefix,
    589      * depending on current state of the parser.
    590      *
    591      * <p>If the prefix was not declared in the current scope,
    592      * null is returned. The default namespace is included
    593      * in the namespace table and is available via
    594      * getNamespace (null).
    595      *
    596      * <p>This method is a convenience method for
    597      *
    598      * <pre>
    599      *  for (int i = getNamespaceCount(getDepth ())-1; i >= 0; i--) {
    600      *   if (getNamespacePrefix(i).equals( prefix )) {
    601      *     return getNamespaceUri(i);
    602      *   }
    603      *  }
    604      *  return null;
    605      * </pre>
    606      *
    607      * <p><strong>Please note:</strong> parser implementations
    608      * may provide more efficient lookup, e.g. using a Hashtable.
    609      * The 'xml' prefix is bound to "http://www.w3.org/XML/1998/namespace", as
    610      * defined in the
    611      * <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a>
    612      * specification. Analogous, the 'xmlns' prefix is resolved to
    613      * <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
    614      *
    615      * @see #getNamespaceCount
    616      * @see #getNamespacePrefix
    617      * @see #getNamespaceUri
    618      */
    619     String getNamespace (String prefix);
    620 
    621 
    622     // --------------------------------------------------------------------------
    623     // miscellaneous reporting methods
    624 
    625     /**
    626      * Returns the current depth of the element.
    627      * Outside the root element, the depth is 0. The
    628      * depth is incremented by 1 when a start tag is reached.
    629      * The depth is decremented AFTER the end tag
    630      * event was observed.
    631      *
    632      * <pre>
    633      * &lt;!-- outside --&gt;     0
    634      * &lt;root>                  1
    635      *   sometext                 1
    636      *     &lt;foobar&gt;         2
    637      *     &lt;/foobar&gt;        2
    638      * &lt;/root&gt;              1
    639      * &lt;!-- outside --&gt;     0
    640      * </pre>
    641      */
    642     int getDepth();
    643 
    644     /**
    645      * Returns a short text describing the current parser state, including
    646      * the position, a
    647      * description of the current event and the data source if known.
    648      * This method is especially useful to provide meaningful
    649      * error messages and for debugging purposes.
    650      */
    651     String getPositionDescription ();
    652 
    653 
    654     /**
    655      * Returns the current line number, starting from 1.
    656      * When the parser does not know the current line number
    657      * or can not determine it,  -1 is returned (e.g. for WBXML).
    658      *
    659      * @return current line number or -1 if unknown.
    660      */
    661     int getLineNumber();
    662 
    663     /**
    664      * Returns the current column number, starting from 0.
    665      * When the parser does not know the current column number
    666      * or can not determine it,  -1 is returned (e.g. for WBXML).
    667      *
    668      * @return current column number or -1 if unknown.
    669      */
    670     int getColumnNumber();
    671 
    672 
    673     // --------------------------------------------------------------------------
    674     // TEXT related methods
    675 
    676     /**
    677      * Checks whether the current TEXT event contains only whitespace
    678      * characters.
    679      * For IGNORABLE_WHITESPACE, this is always true.
    680      * For TEXT and CDSECT, false is returned when the current event text
    681      * contains at least one non-white space character. For any other
    682      * event type an exception is thrown.
    683      *
    684      * <p><b>Please note:</b> non-validating parsers are not
    685      * able to distinguish whitespace and ignorable whitespace,
    686      * except from whitespace outside the root element. Ignorable
    687      * whitespace is reported as separate event, which is exposed
    688      * via nextToken only.
    689      *
    690      */
    691     boolean isWhitespace() throws XmlPullParserException;
    692 
    693     /**
    694      * Returns the text content of the current event as String.
    695      * The value returned depends on current event type,
    696      * for example for TEXT event it is element content
    697      * (this is typical case when next() is used).
    698      *
    699      * See description of nextToken() for detailed description of
    700      * possible returned values for different types of events.
    701      *
    702      * <p><strong>NOTE:</strong> in case of ENTITY_REF, this method returns
    703      * the entity replacement text (or null if not available). This is
    704      * the only case where
    705      * getText() and getTextCharacters() return different values.
    706      *
    707      * @see #getEventType
    708      * @see #next
    709      * @see #nextToken
    710      */
    711     String getText ();
    712 
    713 
    714     /**
    715      * Returns the buffer that contains the text of the current event,
    716      * as well as the start offset and length relevant for the current
    717      * event. See getText(), next() and nextToken() for description of possible returned values.
    718      *
    719      * <p><strong>Please note:</strong> this buffer must not
    720      * be modified and its content MAY change after a call to
    721      * next() or nextToken(). This method will always return the
    722      * same value as getText(), except for ENTITY_REF. In the case
    723      * of ENTITY ref, getText() returns the replacement text and
    724      * this method returns the actual input buffer containing the
    725      * entity name.
    726      * If getText() returns null, this method returns null as well and
    727      * the values returned in the holder array MUST be -1 (both start
    728      * and length).
    729      *
    730      * @see #getText
    731      * @see #next
    732      * @see #nextToken
    733      *
    734      * @param holderForStartAndLength Must hold an 2-element int array
    735      * into which the start offset and length values will be written.
    736      * @return char buffer that contains the text of the current event
    737      *  (null if the current event has no text associated).
    738      */
    739     char[] getTextCharacters(int [] holderForStartAndLength);
    740 
    741     // --------------------------------------------------------------------------
    742     // START_TAG / END_TAG shared methods
    743 
    744     /**
    745      * Returns the namespace URI of the current element.
    746      * The default namespace is represented
    747      * as empty string.
    748      * If namespaces are not enabled, an empty String ("") is always returned.
    749      * The current event must be START_TAG or END_TAG; otherwise,
    750      * null is returned.
    751      */
    752     String getNamespace ();
    753 
    754     /**
    755      * For START_TAG or END_TAG events, the (local) name of the current
    756      * element is returned when namespaces are enabled. When namespace
    757      * processing is disabled, the raw name is returned.
    758      * For ENTITY_REF events, the entity name is returned.
    759      * If the current event is not START_TAG, END_TAG, or ENTITY_REF,
    760      * null is returned.
    761      * <p><b>Please note:</b> To reconstruct the raw element name
    762      *  when namespaces are enabled and the prefix is not null,
    763      * you will need to  add the prefix and a colon to localName..
    764      *
    765      */
    766     String getName();
    767 
    768     /**
    769      * Returns the prefix of the current element.
    770      * If the element is in the default namespace (has no prefix),
    771      * null is returned.
    772      * If namespaces are not enabled, or the current event
    773      * is not  START_TAG or END_TAG, null is returned.
    774      */
    775     String getPrefix();
    776 
    777     /**
    778      * Returns true if the current event is START_TAG and the tag
    779      * is degenerated
    780      * (e.g. &lt;foobar/&gt;).
    781      * <p><b>NOTE:</b> if the parser is not on START_TAG, an exception
    782      * will be thrown.
    783      */
    784     boolean isEmptyElementTag() throws XmlPullParserException;
    785 
    786     // --------------------------------------------------------------------------
    787     // START_TAG Attributes retrieval methods
    788 
    789     /**
    790      * Returns the number of attributes of the current start tag, or
    791      * -1 if the current event type is not START_TAG
    792      *
    793      * @see #getAttributeNamespace
    794      * @see #getAttributeName
    795      * @see #getAttributePrefix
    796      * @see #getAttributeValue
    797      */
    798     int getAttributeCount();
    799 
    800     /**
    801      * Returns the namespace URI of the attribute
    802      * with the given index (starts from 0).
    803      * Returns an empty string ("") if namespaces are not enabled
    804      * or the attribute has no namespace.
    805      * Throws an IndexOutOfBoundsException if the index is out of range
    806      * or the current event type is not START_TAG.
    807      *
    808      * <p><strong>NOTE:</strong> if FEATURE_REPORT_NAMESPACE_ATTRIBUTES is set
    809      * then namespace attributes (xmlns:ns='...') must be reported
    810      * with namespace
    811      * <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
    812      * (visit this URL for description!).
    813      * The default namespace attribute (xmlns="...") will be reported with empty namespace.
    814      * <p><strong>NOTE:</strong>The xml prefix is bound as defined in
    815      * <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a>
    816      * specification to "http://www.w3.org/XML/1998/namespace".
    817      *
    818      * @param index zero-based index of attribute
    819      * @return attribute namespace,
    820      *   empty string ("") is returned  if namespaces processing is not enabled or
    821      *   namespaces processing is enabled but attribute has no namespace (it has no prefix).
    822      */
    823     String getAttributeNamespace (int index);
    824 
    825     /**
    826      * Returns the local name of the specified attribute
    827      * if namespaces are enabled or just attribute name if namespaces are disabled.
    828      * Throws an IndexOutOfBoundsException if the index is out of range
    829      * or current event type is not START_TAG.
    830      *
    831      * @param index zero-based index of attribute
    832      * @return attribute name (null is never returned)
    833      */
    834     String getAttributeName (int index);
    835 
    836     /**
    837      * Returns the prefix of the specified attribute
    838      * Returns null if the element has no prefix.
    839      * If namespaces are disabled it will always return null.
    840      * Throws an IndexOutOfBoundsException if the index is out of range
    841      * or current event type is not START_TAG.
    842      *
    843      * @param index zero-based index of attribute
    844      * @return attribute prefix or null if namespaces processing is not enabled.
    845      */
    846     String getAttributePrefix(int index);
    847 
    848     /**
    849      * Returns the type of the specified attribute
    850      * If parser is non-validating it MUST return CDATA.
    851      *
    852      * @param index zero-based index of attribute
    853      * @return attribute type (null is never returned)
    854      */
    855     String getAttributeType(int index);
    856 
    857     /**
    858      * Returns if the specified attribute was not in input was declared in XML.
    859      * If parser is non-validating it MUST always return false.
    860      * This information is part of XML infoset:
    861      *
    862      * @param index zero-based index of attribute
    863      * @return false if attribute was in input
    864      */
    865     boolean isAttributeDefault(int index);
    866 
    867     /**
    868      * Returns the given attributes value.
    869      * Throws an IndexOutOfBoundsException if the index is out of range
    870      * or current event type is not START_TAG.
    871      *
    872      * <p><strong>NOTE:</strong> attribute value must be normalized
    873      * (including entity replacement text if PROCESS_DOCDECL is false) as described in
    874      * <a href="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section
    875      * 3.3.3 Attribute-Value Normalization</a>
    876      *
    877      * @see #defineEntityReplacementText
    878      *
    879      * @param index zero-based index of attribute
    880      * @return value of attribute (null is never returned)
    881      */
    882     String getAttributeValue(int index);
    883 
    884     /**
    885      * Returns the attributes value identified by namespace URI and namespace localName.
    886      * If namespaces are disabled namespace must be null.
    887      * If current event type is not START_TAG then IndexOutOfBoundsException will be thrown.
    888      *
    889      * <p><strong>NOTE:</strong> attribute value must be normalized
    890      * (including entity replacement text if PROCESS_DOCDECL is false) as described in
    891      * <a href="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section
    892      * 3.3.3 Attribute-Value Normalization</a>
    893      *
    894      * @see #defineEntityReplacementText
    895      *
    896      * @param namespace Namespace of the attribute if namespaces are enabled otherwise must be null
    897      * @param name If namespaces enabled local name of attribute otherwise just attribute name
    898      * @return value of attribute or null if attribute with given name does not exist
    899      */
    900     String getAttributeValue(String namespace,
    901                                     String name);
    902 
    903     // --------------------------------------------------------------------------
    904     // actual parsing methods
    905 
    906     /**
    907      * Returns the type of the current event (START_TAG, END_TAG, TEXT, etc.)
    908      *
    909      * @see #next()
    910      * @see #nextToken()
    911      */
    912     int getEventType()
    913         throws XmlPullParserException;
    914 
    915     /**
    916      * Get next parsing event - element content will be coalesced and only one
    917      * TEXT event must be returned for whole element content
    918      * (comments and processing instructions will be ignored and entity references
    919      * must be expanded or exception must be thrown if entity reference can not be expanded).
    920      * If element content is empty (content is "") then no TEXT event will be reported.
    921      *
    922      * <p><b>NOTE:</b> empty element (such as &lt;tag/>) will be reported
    923      *  with  two separate events: START_TAG, END_TAG - it must be so to preserve
    924      *   parsing equivalency of empty element to &lt;tag>&lt;/tag>.
    925      *  (see isEmptyElementTag ())
    926      *
    927      * @see #isEmptyElementTag
    928      * @see #START_TAG
    929      * @see #TEXT
    930      * @see #END_TAG
    931      * @see #END_DOCUMENT
    932      */
    933 
    934     int next()
    935         throws XmlPullParserException, IOException;
    936 
    937 
    938     /**
    939      * This method works similarly to next() but will expose
    940      * additional event types (COMMENT, CDSECT, DOCDECL, ENTITY_REF, PROCESSING_INSTRUCTION, or
    941      * IGNORABLE_WHITESPACE) if they are available in input.
    942      *
    943      * <p>If special feature
    944      * <a href="http://xmlpull.org/v1/doc/features.html#xml-roundtrip">FEATURE_XML_ROUNDTRIP</a>
    945      * (identified by URI: http://xmlpull.org/v1/doc/features.html#xml-roundtrip)
    946      * is enabled it is possible to do XML document round trip ie. reproduce
    947      * exectly on output the XML input using getText():
    948      * returned content is always unnormalized (exactly as in input).
    949      * Otherwise returned content is end-of-line normalized as described
    950      * <a href="http://www.w3.org/TR/REC-xml#sec-line-ends">XML 1.0 End-of-Line Handling</a>
    951      * and. Also when this feature is enabled exact content of START_TAG, END_TAG,
    952      * DOCDECL and PROCESSING_INSTRUCTION is available.
    953      *
    954      * <p>Here is the list of tokens that can be  returned from nextToken()
    955      * and what getText() and getTextCharacters() returns:<dl>
    956      * <dt>START_DOCUMENT<dd>null
    957      * <dt>END_DOCUMENT<dd>null
    958      * <dt>START_TAG<dd>null unless FEATURE_XML_ROUNDTRIP
    959      *   enabled and then returns XML tag, ex: &lt;tag attr='val'>
    960      * <dt>END_TAG<dd>null unless FEATURE_XML_ROUNDTRIP
    961      *  id enabled and then returns XML tag, ex: &lt;/tag>
    962      * <dt>TEXT<dd>return element content.
    963      *  <br>Note: that element content may be delivered in multiple consecutive TEXT events.
    964      * <dt>IGNORABLE_WHITESPACE<dd>return characters that are determined to be ignorable white
    965      * space. If the FEATURE_XML_ROUNDTRIP is enabled all whitespace content outside root
    966      * element will always reported as IGNORABLE_WHITESPACE otherwise reporting is optional.
    967      *  <br>Note: that element content may be delivered in multiple consecutive IGNORABLE_WHITESPACE events.
    968      * <dt>CDSECT<dd>
    969      * return text <em>inside</em> CDATA
    970      *  (ex. 'fo&lt;o' from &lt;!CDATA[fo&lt;o]]>)
    971      * <dt>PROCESSING_INSTRUCTION<dd>
    972      *  if FEATURE_XML_ROUNDTRIP is true
    973      *  return exact PI content ex: 'pi foo' from &lt;?pi foo?>
    974      *  otherwise it may be exact PI content or concatenation of PI target,
    975      * space and data so for example for
    976      *   &lt;?target    data?> string &quot;target data&quot; may
    977      *       be returned if FEATURE_XML_ROUNDTRIP is false.
    978      * <dt>COMMENT<dd>return comment content ex. 'foo bar' from &lt;!--foo bar-->
    979      * <dt>ENTITY_REF<dd>getText() MUST return entity replacement text if PROCESS_DOCDECL is false
    980      * otherwise getText() MAY return null,
    981      * additionally getTextCharacters() MUST return entity name
    982      * (for example 'entity_name' for &amp;entity_name;).
    983      * <br><b>NOTE:</b> this is the only place where value returned from getText() and
    984      *   getTextCharacters() <b>are different</b>
    985      * <br><b>NOTE:</b> it is user responsibility to resolve entity reference
    986      *    if PROCESS_DOCDECL is false and there is no entity replacement text set in
    987      *    defineEntityReplacementText() method (getText() will be null)
    988      * <br><b>NOTE:</b> character entities (ex. &amp;#32;) and standard entities such as
    989      *  &amp;amp; &amp;lt; &amp;gt; &amp;quot; &amp;apos; are reported as well
    990      *  and are <b>not</b> reported as TEXT tokens but as ENTITY_REF tokens!
    991      *  This requirement is added to allow to do roundtrip of XML documents!
    992      * <dt>DOCDECL<dd>
    993      * if FEATURE_XML_ROUNDTRIP is true or PROCESS_DOCDECL is false
    994      * then return what is inside of DOCDECL for example it returns:<pre>
    995      * &quot; titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
    996      * [&lt;!ENTITY % active.links "INCLUDE">]&quot;</pre>
    997      * <p>for input document that contained:<pre>
    998      * &lt;!DOCTYPE titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
    999      * [&lt;!ENTITY % active.links "INCLUDE">]></pre>
   1000      * otherwise if FEATURE_XML_ROUNDTRIP is false and PROCESS_DOCDECL is true
   1001      *    then what is returned is undefined (it may be even null)
   1002      * </dd>
   1003      * </dl>
   1004      *
   1005      * <p><strong>NOTE:</strong> there is no guarantee that there will only one TEXT or
   1006      * IGNORABLE_WHITESPACE event from nextToken() as parser may chose to deliver element content in
   1007      * multiple tokens (dividing element content into chunks)
   1008      *
   1009      * <p><strong>NOTE:</strong> whether returned text of token is end-of-line normalized
   1010      *  is depending on FEATURE_XML_ROUNDTRIP.
   1011      *
   1012      * <p><strong>NOTE:</strong> XMLDecl (&lt;?xml ...?&gt;) is not reported but its content
   1013      * is available through optional properties (see class description above).
   1014      *
   1015      * @see #next
   1016      * @see #START_TAG
   1017      * @see #TEXT
   1018      * @see #END_TAG
   1019      * @see #END_DOCUMENT
   1020      * @see #COMMENT
   1021      * @see #DOCDECL
   1022      * @see #PROCESSING_INSTRUCTION
   1023      * @see #ENTITY_REF
   1024      * @see #IGNORABLE_WHITESPACE
   1025      */
   1026     int nextToken()
   1027         throws XmlPullParserException, IOException;
   1028 
   1029     //-----------------------------------------------------------------------------
   1030     // utility methods to mak XML parsing easier ...
   1031 
   1032     /**
   1033      * Test if the current event is of the given type and if the
   1034      * namespace and name do match. null will match any namespace
   1035      * and any name. If the test is not passed, an exception is
   1036      * thrown. The exception text indicates the parser position,
   1037      * the expected event and the current event that is not meeting the
   1038      * requirement.
   1039      *
   1040      * <p>Essentially it does this
   1041      * <pre>
   1042      *  if (type != getEventType()
   1043      *  || (namespace != null &amp;&amp;  !namespace.equals( getNamespace () ) )
   1044      *  || (name != null &amp;&amp;  !name.equals( getName() ) ) )
   1045      *     throw new XmlPullParserException( "expected "+ TYPES[ type ]+getPositionDescription());
   1046      * </pre>
   1047      */
   1048     void require(int type, String namespace, String name)
   1049         throws XmlPullParserException, IOException;
   1050 
   1051     /**
   1052      * If current event is START_TAG then if next element is TEXT then element content is returned
   1053      * or if next event is END_TAG then empty string is returned, otherwise exception is thrown.
   1054      * After calling this function successfully parser will be positioned on END_TAG.
   1055      *
   1056      * <p>The motivation for this function is to allow to parse consistently both
   1057      * empty elements and elements that has non empty content, for example for input: <ol>
   1058      * <li>&lt;tag&gt;foo&lt;/tag&gt;
   1059      * <li>&lt;tag&gt;&lt;/tag&gt; (which is equivalent to &lt;tag/&gt;
   1060      * both input can be parsed with the same code:
   1061      * <pre>
   1062      *   p.nextTag()
   1063      *   p.requireEvent(p.START_TAG, "", "tag");
   1064      *   String content = p.nextText();
   1065      *   p.requireEvent(p.END_TAG, "", "tag");
   1066      * </pre>
   1067      * This function together with nextTag make it very easy to parse XML that has
   1068      * no mixed content.
   1069      *
   1070      *
   1071      * <p>Essentially it does this
   1072      * <pre>
   1073      *  if(getEventType() != START_TAG) {
   1074      *     throw new XmlPullParserException(
   1075      *       "parser must be on START_TAG to read next text", this, null);
   1076      *  }
   1077      *  int eventType = next();
   1078      *  if(eventType == TEXT) {
   1079      *     String result = getText();
   1080      *     eventType = next();
   1081      *     if(eventType != END_TAG) {
   1082      *       throw new XmlPullParserException(
   1083      *          "event TEXT it must be immediately followed by END_TAG", this, null);
   1084      *      }
   1085      *      return result;
   1086      *  } else if(eventType == END_TAG) {
   1087      *     return "";
   1088      *  } else {
   1089      *     throw new XmlPullParserException(
   1090      *       "parser must be on START_TAG or TEXT to read text", this, null);
   1091      *  }
   1092      * </pre>
   1093      *
   1094      * <p><strong>Warning:</strong> Prior to API level 14, the pull parser returned by {@code
   1095      * android.util.Xml} did not always advance to the END_TAG event when this method was called.
   1096      * Work around by using manually advancing after calls to nextText(): <pre>
   1097      *  String text = xpp.nextText();
   1098      *  if (xpp.getEventType() != XmlPullParser.END_TAG) {
   1099      *      xpp.next();
   1100      *  }
   1101      * </pre>
   1102      */
   1103     String nextText() throws XmlPullParserException, IOException;
   1104 
   1105     /**
   1106      * Call next() and return event if it is START_TAG or END_TAG
   1107      * otherwise throw an exception.
   1108      * It will skip whitespace TEXT before actual tag if any.
   1109      *
   1110      * <p>essentially it does this
   1111      * <pre>
   1112      *   int eventType = next();
   1113      *   if(eventType == TEXT &amp;&amp;  isWhitespace()) {   // skip whitespace
   1114      *      eventType = next();
   1115      *   }
   1116      *   if (eventType != START_TAG &amp;&amp;  eventType != END_TAG) {
   1117      *      throw new XmlPullParserException("expected start or end tag", this, null);
   1118      *   }
   1119      *   return eventType;
   1120      * </pre>
   1121      */
   1122     int nextTag() throws XmlPullParserException, IOException;
   1123 
   1124 }
   1125