Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 package com.android.utils;
     18 
     19 import com.android.annotations.NonNull;
     20 import com.android.annotations.Nullable;
     21 
     22 import org.w3c.dom.Attr;
     23 import org.w3c.dom.Document;
     24 import org.w3c.dom.Element;
     25 import org.w3c.dom.Node;
     26 import org.w3c.dom.Text;
     27 import org.xml.sax.Attributes;
     28 import org.xml.sax.InputSource;
     29 import org.xml.sax.Locator;
     30 import org.xml.sax.SAXException;
     31 import org.xml.sax.helpers.DefaultHandler;
     32 
     33 import java.io.ByteArrayOutputStream;
     34 import java.io.IOException;
     35 import java.io.InputStream;
     36 import java.io.StringReader;
     37 import java.io.UnsupportedEncodingException;
     38 import java.util.ArrayList;
     39 import java.util.List;
     40 import java.util.regex.Matcher;
     41 import java.util.regex.Pattern;
     42 
     43 import javax.xml.parsers.DocumentBuilder;
     44 import javax.xml.parsers.DocumentBuilderFactory;
     45 import javax.xml.parsers.ParserConfigurationException;
     46 import javax.xml.parsers.SAXParser;
     47 import javax.xml.parsers.SAXParserFactory;
     48 
     49 /**
     50  * A simple DOM XML parser which can retrieve exact beginning and end offsets
     51  * (and line and column numbers) for element nodes as well as attribute nodes.
     52  */
     53 public class PositionXmlParser {
     54     private static final String UTF_8 = "UTF-8";                 //$NON-NLS-1$
     55     private static final String UTF_16 = "UTF_16";               //$NON-NLS-1$
     56     private static final String UTF_16LE = "UTF_16LE";           //$NON-NLS-1$
     57     private static final String CONTENT_KEY = "contents";        //$NON-NLS-1$
     58     private final static String POS_KEY = "offsets";             //$NON-NLS-1$
     59     private static final String NAMESPACE_PREFIX_FEATURE =
     60             "http://xml.org/sax/features/namespace-prefixes";    //$NON-NLS-1$
     61     private static final String NAMESPACE_FEATURE =
     62             "http://xml.org/sax/features/namespaces";            //$NON-NLS-1$
     63     /** See http://www.w3.org/TR/REC-xml/#NT-EncodingDecl */
     64     private static final Pattern ENCODING_PATTERN =
     65             Pattern.compile("encoding=['\"](\\S*)['\"]");//$NON-NLS-1$
     66 
     67     /**
     68      * Parses the XML content from the given input stream.
     69      *
     70      * @param input the input stream containing the XML to be parsed
     71      * @return the corresponding document
     72      * @throws ParserConfigurationException if a SAX parser is not available
     73      * @throws SAXException if the document contains a parsing error
     74      * @throws IOException if something is seriously wrong. This should not
     75      *             happen since the input source is known to be constructed from
     76      *             a string.
     77      */
     78     @Nullable
     79     public Document parse(@NonNull InputStream input)
     80             throws ParserConfigurationException, SAXException, IOException {
     81         // Read in all the data
     82         ByteArrayOutputStream out = new ByteArrayOutputStream();
     83         byte[] buf = new byte[1024];
     84         while (true) {
     85           int r = input.read(buf);
     86           if (r == -1) {
     87             break;
     88           }
     89           out.write(buf, 0, r);
     90         }
     91         input.close();
     92         return parse(out.toByteArray());
     93     }
     94 
     95     /**
     96      * Parses the XML content from the given byte array
     97      *
     98      * @param data the raw XML data (with unknown encoding)
     99      * @return the corresponding document
    100      * @throws ParserConfigurationException if a SAX parser is not available
    101      * @throws SAXException if the document contains a parsing error
    102      * @throws IOException if something is seriously wrong. This should not
    103      *             happen since the input source is known to be constructed from
    104      *             a string.
    105      */
    106     @Nullable
    107     public Document parse(@NonNull byte[] data)
    108             throws ParserConfigurationException, SAXException, IOException {
    109         String xml = getXmlString(data);
    110         return parse(xml, new InputSource(new StringReader(xml)), true);
    111     }
    112 
    113     /**
    114      * Parses the given XML content.
    115      *
    116      * @param xml the XML string to be parsed. This must be in the correct
    117      *     encoding already.
    118      * @return the corresponding document
    119      * @throws ParserConfigurationException if a SAX parser is not available
    120      * @throws SAXException if the document contains a parsing error
    121      * @throws IOException if something is seriously wrong. This should not
    122      *             happen since the input source is known to be constructed from
    123      *             a string.
    124      */
    125     @Nullable
    126     public Document parse(@NonNull String xml)
    127             throws ParserConfigurationException, SAXException, IOException {
    128         return parse(xml, new InputSource(new StringReader(xml)), true);
    129     }
    130 
    131     @NonNull
    132     private Document parse(@NonNull String xml, @NonNull InputSource input, boolean checkBom)
    133             throws ParserConfigurationException, SAXException, IOException {
    134         try {
    135             SAXParserFactory factory = SAXParserFactory.newInstance();
    136             factory.setFeature(NAMESPACE_FEATURE, true);
    137             factory.setFeature(NAMESPACE_PREFIX_FEATURE, true);
    138             SAXParser parser = factory.newSAXParser();
    139             DomBuilder handler = new DomBuilder(xml);
    140             parser.parse(input, handler);
    141             return handler.getDocument();
    142         } catch (SAXException e) {
    143             if (checkBom && e.getMessage().contains("Content is not allowed in prolog")) {
    144                 // Byte order mark in the string? Skip it. There are many markers
    145                 // (see http://en.wikipedia.org/wiki/Byte_order_mark) so here we'll
    146                 // just skip those up to the XML prolog beginning character, <
    147                 xml = xml.replaceFirst("^([\\W]+)<","<");  //$NON-NLS-1$ //$NON-NLS-2$
    148                 return parse(xml, new InputSource(new StringReader(xml)), false);
    149             }
    150             throw e;
    151         }
    152     }
    153 
    154     /**
    155      * Returns the String corresponding to the given byte array of XML data
    156      * (with unknown encoding). This method attempts to guess the encoding based
    157      * on the XML prologue.
    158      * @param data the XML data to be decoded into a string
    159      * @return a string corresponding to the XML data
    160      */
    161     public static String getXmlString(byte[] data) {
    162         int offset = 0;
    163 
    164         String defaultCharset = UTF_8;
    165         String charset = null;
    166         // Look for the byte order mark, to see if we need to remove bytes from
    167         // the input stream (and to determine whether files are big endian or little endian) etc
    168         // for files which do not specify the encoding.
    169         // See http://unicode.org/faq/utf_bom.html#BOM for more.
    170         if (data.length > 4) {
    171             if (data[0] == (byte)0xef && data[1] == (byte)0xbb && data[2] == (byte)0xbf) {
    172                 // UTF-8
    173                 defaultCharset = charset = UTF_8;
    174                 offset += 3;
    175             } else if (data[0] == (byte)0xfe && data[1] == (byte)0xff) {
    176                 //  UTF-16, big-endian
    177                 defaultCharset = charset = UTF_16;
    178                 offset += 2;
    179             } else if (data[0] == (byte)0x0 && data[1] == (byte)0x0
    180                     && data[2] == (byte)0xfe && data[3] == (byte)0xff) {
    181                 // UTF-32, big-endian
    182                 defaultCharset = charset = "UTF_32";    //$NON-NLS-1$
    183                 offset += 4;
    184             } else if (data[0] == (byte)0xff && data[1] == (byte)0xfe
    185                     && data[2] == (byte)0x0 && data[3] == (byte)0x0) {
    186                 // UTF-32, little-endian. We must check for this *before* looking for
    187                 // UTF_16LE since UTF_32LE has the same prefix!
    188                 defaultCharset = charset = "UTF_32LE";  //$NON-NLS-1$
    189                 offset += 4;
    190             } else if (data[0] == (byte)0xff && data[1] == (byte)0xfe) {
    191                 //  UTF-16, little-endian
    192                 defaultCharset = charset = UTF_16LE;
    193                 offset += 2;
    194             }
    195         }
    196         int length = data.length - offset;
    197 
    198         // Guess encoding by searching for an encoding= entry in the first line.
    199         // The prologue, and the encoding names, will always be in ASCII - which means
    200         // we don't need to worry about strange character encodings for the prologue characters.
    201         // However, one wrinkle is that the whole file may be encoded in something like UTF-16
    202         // where there are two bytes per character, so we can't just look for
    203         //  ['e','n','c','o','d','i','n','g'] etc in the byte array since there could be
    204         // multiple bytes for each character. However, since again the prologue is in ASCII,
    205         // we can just drop the zeroes.
    206         boolean seenOddZero = false;
    207         boolean seenEvenZero = false;
    208         int prologueStart = -1;
    209         for (int lineEnd = offset; lineEnd < data.length; lineEnd++) {
    210             if (data[lineEnd] == 0) {
    211                 if ((lineEnd - offset) % 1 == 0) {
    212                     seenEvenZero = true;
    213                 } else {
    214                     seenOddZero = true;
    215                 }
    216             } else if (data[lineEnd] == '\n' || data[lineEnd] == '\r') {
    217                 break;
    218             } else if (data[lineEnd] == '<') {
    219                 prologueStart = lineEnd;
    220             } else if (data[lineEnd] == '>') {
    221                 // End of prologue. Quick check to see if this is a utf-8 file since that's
    222                 // common
    223                 for (int i = lineEnd - 4; i >= 0; i--) {
    224                     if ((data[i] == 'u' || data[i] == 'U')
    225                             && (data[i + 1] == 't' || data[i + 1] == 'T')
    226                             && (data[i + 2] == 'f' || data[i + 2] == 'F')
    227                             && (data[i + 3] == '-' || data[i + 3] == '_')
    228                             && (data[i + 4] == '8')
    229                             ) {
    230                         charset = UTF_8;
    231                         break;
    232                     }
    233                 }
    234 
    235                 if (charset == null) {
    236                     StringBuilder sb = new StringBuilder();
    237                     for (int i = prologueStart; i <= lineEnd; i++) {
    238                         if (data[i] != 0) {
    239                             sb.append((char) data[i]);
    240                         }
    241                     }
    242                     String prologue = sb.toString();
    243                     int encodingIndex = prologue.indexOf("encoding"); //$NON-NLS-1$
    244                     if (encodingIndex != -1) {
    245                         Matcher matcher = ENCODING_PATTERN.matcher(prologue);
    246                         if (matcher.find(encodingIndex)) {
    247                             charset = matcher.group(1);
    248                         }
    249                     }
    250                 }
    251 
    252                 break;
    253             }
    254         }
    255 
    256         // No prologue on the first line, and no byte order mark: Assume UTF-8/16
    257         if (charset == null) {
    258             charset = seenOddZero ? UTF_16 : seenEvenZero ? UTF_16LE : UTF_8;
    259         }
    260 
    261         String xml = null;
    262         try {
    263             xml = new String(data, offset, length, charset);
    264         } catch (UnsupportedEncodingException e) {
    265             try {
    266                 if (charset != defaultCharset) {
    267                     xml = new String(data, offset, length, defaultCharset);
    268                 }
    269             } catch (UnsupportedEncodingException u) {
    270                 // Just use the default encoding below
    271             }
    272         }
    273         if (xml == null) {
    274             xml = new String(data, offset, length);
    275         }
    276         return xml;
    277     }
    278 
    279     /**
    280      * Returns the position for the given node. This is the start position. The
    281      * end position can be obtained via {@link Position#getEnd()}.
    282      *
    283      * @param node the node to look up position for
    284      * @return the position, or null if the node type is not supported for
    285      *         position info
    286      */
    287     @Nullable
    288     public Position getPosition(@NonNull Node node) {
    289         return getPosition(node, -1, -1);
    290     }
    291 
    292     /**
    293      * Returns the position for the given node. This is the start position. The
    294      * end position can be obtained via {@link Position#getEnd()}. A specific
    295      * range within the node can be specified with the {@code start} and
    296      * {@code end} parameters.
    297      *
    298      * @param node the node to look up position for
    299      * @param start the relative offset within the node range to use as the
    300      *            starting position, inclusive, or -1 to not limit the range
    301      * @param end the relative offset within the node range to use as the ending
    302      *            position, or -1 to not limit the range
    303      * @return the position, or null if the node type is not supported for
    304      *         position info
    305      */
    306     @Nullable
    307     public Position getPosition(@NonNull Node node, int start, int end) {
    308         // Look up the position information stored while parsing for the given node.
    309         // Note however that we only store position information for elements (because
    310         // there is no SAX callback for individual attributes).
    311         // Therefore, this method special cases this:
    312         //  -- First, it looks at the owner element and uses its position
    313         //     information as a first approximation.
    314         //  -- Second, it uses that, as well as the original XML text, to search
    315         //     within the node range for an exact text match on the attribute name
    316         //     and if found uses that as the exact node offsets instead.
    317         if (node instanceof Attr) {
    318             Attr attr = (Attr) node;
    319             Position pos = (Position) attr.getOwnerElement().getUserData(POS_KEY);
    320             if (pos != null) {
    321                 int startOffset = pos.getOffset();
    322                 int endOffset = pos.getEnd().getOffset();
    323                 if (start != -1) {
    324                     startOffset += start;
    325                     if (end != -1) {
    326                         endOffset = start + end;
    327                     }
    328                 }
    329 
    330                 // Find attribute in the text
    331                 String contents = (String) node.getOwnerDocument().getUserData(CONTENT_KEY);
    332                 if (contents == null) {
    333                     return null;
    334                 }
    335 
    336                 // Locate the name=value attribute in the source text
    337                 // Fast string check first for the common occurrence
    338                 String name = attr.getName();
    339                 Pattern pattern = Pattern.compile(
    340                         String.format("%1$s\\s*=\\s*[\"'].*[\"']", name)); //$NON-NLS-1$
    341                 Matcher matcher = pattern.matcher(contents);
    342                 if (matcher.find(startOffset) && matcher.start() <= endOffset) {
    343                     int index = matcher.start();
    344                     // Adjust the line and column to this new offset
    345                     int line = pos.getLine();
    346                     int column = pos.getColumn();
    347                     for (int offset = pos.getOffset(); offset < index; offset++) {
    348                         char t = contents.charAt(offset);
    349                         if (t == '\n') {
    350                             line++;
    351                             column = 0;
    352                         } else {
    353                             column++;
    354                         }
    355                     }
    356 
    357                     Position attributePosition = createPosition(line, column, index);
    358                     // Also set end range for retrieval in getLocation
    359                     attributePosition.setEnd(createPosition(line, column + matcher.end() - index,
    360                             matcher.end()));
    361                     return attributePosition;
    362                 } else {
    363                     // No regexp match either: just fall back to element position
    364                     return pos;
    365                 }
    366             }
    367         } else if (node instanceof Text) {
    368             // Position of parent element, if any
    369             Position pos = null;
    370             if (node.getPreviousSibling() != null) {
    371                 pos = (Position) node.getPreviousSibling().getUserData(POS_KEY);
    372             }
    373             if (pos == null) {
    374                 pos = (Position) node.getParentNode().getUserData(POS_KEY);
    375             }
    376             if (pos != null) {
    377                 // Attempt to point forward to the actual text node
    378                 int startOffset = pos.getOffset();
    379                 int endOffset = pos.getEnd().getOffset();
    380                 int line = pos.getLine();
    381                 int column = pos.getColumn();
    382 
    383                 // Find attribute in the text
    384                 String contents = (String) node.getOwnerDocument().getUserData(CONTENT_KEY);
    385                 if (contents == null || contents.length() < endOffset) {
    386                     return null;
    387                 }
    388 
    389                 boolean inAttribute = false;
    390                 for (int offset = startOffset; offset <= endOffset; offset++) {
    391                     char c = contents.charAt(offset);
    392                     if (c == '>' && !inAttribute) {
    393                         // Found the end of the element open tag: this is where the
    394                         // text begins.
    395 
    396                         // Skip >
    397                         offset++;
    398                         column++;
    399 
    400                         String text = node.getNodeValue();
    401                         int textIndex = 0;
    402                         int textLength = text.length();
    403                         int newLine = line;
    404                         int newColumn = column;
    405                         if (start != -1) {
    406                             textLength = Math.min(textLength, start);
    407                             for (; textIndex < textLength; textIndex++) {
    408                                 char t = text.charAt(textIndex);
    409                                 if (t == '\n') {
    410                                     newLine++;
    411                                     newColumn = 0;
    412                                 } else {
    413                                     newColumn++;
    414                                 }
    415                             }
    416                         } else {
    417                             // Skip text whitespace prefix, if the text node contains
    418                             // non-whitespace characters
    419                             for (; textIndex < textLength; textIndex++) {
    420                                 char t = text.charAt(textIndex);
    421                                 if (t == '\n') {
    422                                     newLine++;
    423                                     newColumn = 0;
    424                                 } else if (!Character.isWhitespace(t)) {
    425                                     break;
    426                                 } else {
    427                                     newColumn++;
    428                                 }
    429                             }
    430                         }
    431                         if (textIndex == text.length()) {
    432                             textIndex = 0; // Whitespace node
    433                         } else {
    434                             line = newLine;
    435                             column = newColumn;
    436                         }
    437 
    438                         Position attributePosition = createPosition(line, column,
    439                                 offset + textIndex);
    440                         // Also set end range for retrieval in getLocation
    441                         if (end != -1) {
    442                             attributePosition.setEnd(createPosition(line, column,
    443                                     offset + end));
    444                         } else {
    445                             attributePosition.setEnd(createPosition(line, column,
    446                                     offset + textLength));
    447                         }
    448                         return attributePosition;
    449                     } else if (c == '"') {
    450                         inAttribute = !inAttribute;
    451                     } else if (c == '\n') {
    452                         line++;
    453                         column = -1; // pre-subtract column added below
    454                     }
    455                     column++;
    456                 }
    457 
    458                 return pos;
    459             }
    460         }
    461 
    462         return (Position) node.getUserData(POS_KEY);
    463     }
    464 
    465     /**
    466      * SAX parser handler which incrementally builds up a DOM document as we go
    467      * along, and updates position information along the way. Position
    468      * information is attached to the DOM nodes by setting user data with the
    469      * {@link POS_KEY} key.
    470      */
    471     private final class DomBuilder extends DefaultHandler {
    472         private final String mXml;
    473         private final Document mDocument;
    474         private Locator mLocator;
    475         private int mCurrentLine = 0;
    476         private int mCurrentOffset;
    477         private int mCurrentColumn;
    478         private final List<Element> mStack = new ArrayList<Element>();
    479         private final StringBuilder mPendingText = new StringBuilder();
    480 
    481         private DomBuilder(String xml) throws ParserConfigurationException {
    482             mXml = xml;
    483 
    484             DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    485             factory.setNamespaceAware(true);
    486             factory.setValidating(false);
    487             DocumentBuilder docBuilder = factory.newDocumentBuilder();
    488             mDocument = docBuilder.newDocument();
    489             mDocument.setUserData(CONTENT_KEY, xml, null);
    490         }
    491 
    492         /** Returns the document parsed by the handler */
    493         Document getDocument() {
    494             return mDocument;
    495         }
    496 
    497         @Override
    498         public void setDocumentLocator(Locator locator) {
    499             this.mLocator = locator;
    500         }
    501 
    502         @Override
    503         public void startElement(String uri, String localName, String qName,
    504                 Attributes attributes) throws SAXException {
    505             try {
    506                 flushText();
    507                 Element element = mDocument.createElement(qName);
    508                 for (int i = 0; i < attributes.getLength(); i++) {
    509                     if (attributes.getURI(i) != null && attributes.getURI(i).length() > 0) {
    510                         Attr attr = mDocument.createAttributeNS(attributes.getURI(i),
    511                                 attributes.getQName(i));
    512                         attr.setValue(attributes.getValue(i));
    513                         element.setAttributeNodeNS(attr);
    514                         assert attr.getOwnerElement() == element;
    515                     } else {
    516                         Attr attr = mDocument.createAttribute(attributes.getQName(i));
    517                         attr.setValue(attributes.getValue(i));
    518                         element.setAttributeNode(attr);
    519                         assert attr.getOwnerElement() == element;
    520                     }
    521                 }
    522 
    523                 Position pos = getCurrentPosition();
    524 
    525                 // The starting position reported to us by SAX is really the END of the
    526                 // open tag in an element, when all the attributes have been processed.
    527                 // We have to scan backwards to find the real beginning. We'll do that
    528                 // by scanning backwards.
    529                 // -1: Make sure that when we have <foo></foo> we don't consider </foo>
    530                 // the beginning since pos.offset will typically point to the first character
    531                 // AFTER the element open tag, which could be a closing tag or a child open
    532                 // tag
    533 
    534                 for (int offset = pos.getOffset() - 1; offset >= 0; offset--) {
    535                     char c = mXml.charAt(offset);
    536                     // < cannot appear in attribute values or anywhere else within
    537                     // an element open tag, so we know the first occurrence is the real
    538                     // element start
    539                     if (c == '<') {
    540                         // Adjust line position
    541                         int line = pos.getLine();
    542                         for (int i = offset, n = pos.getOffset(); i < n; i++) {
    543                             if (mXml.charAt(i) == '\n') {
    544                                 line--;
    545                             }
    546                         }
    547 
    548                         // Compute new column position
    549                         int column = 0;
    550                         for (int i = offset - 1; i >= 0; i--, column++) {
    551                             if (mXml.charAt(i) == '\n') {
    552                                 break;
    553                             }
    554                         }
    555 
    556                         pos = createPosition(line, column, offset);
    557                         break;
    558                     }
    559                 }
    560 
    561                 element.setUserData(POS_KEY, pos, null);
    562                 mStack.add(element);
    563             } catch (Exception t) {
    564                 throw new SAXException(t);
    565             }
    566         }
    567 
    568         @Override
    569         public void endElement(String uri, String localName, String qName) {
    570             flushText();
    571             Element element = mStack.remove(mStack.size() - 1);
    572 
    573             Position pos = (Position) element.getUserData(POS_KEY);
    574             assert pos != null;
    575             pos.setEnd(getCurrentPosition());
    576 
    577             if (mStack.isEmpty()) {
    578                 mDocument.appendChild(element);
    579             } else {
    580                 Element parent = mStack.get(mStack.size() - 1);
    581                 parent.appendChild(element);
    582             }
    583         }
    584 
    585         /**
    586          * Returns a position holder for the current position. The most
    587          * important part of this function is to incrementally compute the
    588          * offset as well, by counting forwards until it reaches the new line
    589          * number and column position of the XML parser, counting characters as
    590          * it goes along.
    591          */
    592         private Position getCurrentPosition() {
    593             int line = mLocator.getLineNumber() - 1;
    594             int column = mLocator.getColumnNumber() - 1;
    595 
    596             // Compute offset incrementally now that we have the new line and column
    597             // numbers
    598             int xmlLength = mXml.length();
    599             while (mCurrentLine < line && mCurrentOffset < xmlLength) {
    600                 char c = mXml.charAt(mCurrentOffset);
    601                 if (c == '\r' && mCurrentOffset < xmlLength - 1) {
    602                     if (mXml.charAt(mCurrentOffset + 1) != '\n') {
    603                         mCurrentLine++;
    604                         mCurrentColumn = 0;
    605                     }
    606                 } else if (c == '\n') {
    607                     mCurrentLine++;
    608                     mCurrentColumn = 0;
    609                 } else {
    610                     mCurrentColumn++;
    611                 }
    612                 mCurrentOffset++;
    613             }
    614 
    615             mCurrentOffset += column - mCurrentColumn;
    616             if (mCurrentOffset >= xmlLength) {
    617                 // The parser sometimes passes wrong column numbers at the
    618                 // end of the file: Ensure that the offset remains valid.
    619                 mCurrentOffset = xmlLength;
    620             }
    621             mCurrentColumn = column;
    622 
    623             return createPosition(mCurrentLine, mCurrentColumn, mCurrentOffset);
    624         }
    625 
    626         @Override
    627         public void characters(char c[], int start, int length) throws SAXException {
    628             mPendingText.append(c, start, length);
    629         }
    630 
    631         private void flushText() {
    632             if (mPendingText.length() > 0 && !mStack.isEmpty()) {
    633                 Element element = mStack.get(mStack.size() - 1);
    634                 Node textNode = mDocument.createTextNode(mPendingText.toString());
    635                 element.appendChild(textNode);
    636                 mPendingText.setLength(0);
    637             }
    638         }
    639     }
    640 
    641     /**
    642      * Creates a position while constructing the DOM document. This method
    643      * allows a subclass to create a custom implementation of the position
    644      * class.
    645      *
    646      * @param line the line number for the position
    647      * @param column the column number for the position
    648      * @param offset the character offset
    649      * @return a new position
    650      */
    651     @NonNull
    652     protected Position createPosition(int line, int column, int offset) {
    653         return new DefaultPosition(line, column, offset);
    654     }
    655 
    656     protected interface Position {
    657         /**
    658          * Linked position: for a begin position this will point to the
    659          * corresponding end position. For an end position this will be null.
    660          *
    661          * @return the end position, or null
    662          */
    663         @Nullable
    664         public Position getEnd();
    665 
    666         /**
    667          * Linked position: for a begin position this will point to the
    668          * corresponding end position. For an end position this will be null.
    669          *
    670          * @param end the end position
    671          */
    672         public void setEnd(@NonNull Position end);
    673 
    674         /** @return the line number, 0-based */
    675         public int getLine();
    676 
    677         /** @return the offset number, 0-based */
    678         public int getOffset();
    679 
    680         /** @return the column number, 0-based, and -1 if the column number if not known */
    681         public int getColumn();
    682     }
    683 
    684     protected static class DefaultPosition implements Position {
    685         /** The line number (0-based where the first line is line 0) */
    686         private final int mLine;
    687         private final int mColumn;
    688         private final int mOffset;
    689         private Position mEnd;
    690 
    691         /**
    692          * Creates a new {@link Position}
    693          *
    694          * @param line the 0-based line number, or -1 if unknown
    695          * @param column the 0-based column number, or -1 if unknown
    696          * @param offset the offset, or -1 if unknown
    697          */
    698         public DefaultPosition(int line, int column, int offset) {
    699             this.mLine = line;
    700             this.mColumn = column;
    701             this.mOffset = offset;
    702         }
    703 
    704         @Override
    705         public int getLine() {
    706             return mLine;
    707         }
    708 
    709         @Override
    710         public int getOffset() {
    711             return mOffset;
    712         }
    713 
    714         @Override
    715         public int getColumn() {
    716             return mColumn;
    717         }
    718 
    719         @Override
    720         public Position getEnd() {
    721             return mEnd;
    722         }
    723 
    724         @Override
    725         public void setEnd(@NonNull Position end) {
    726             mEnd = end;
    727         }
    728     }
    729 }
    730