Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef HTMLInputStream_h
     27 #define HTMLInputStream_h
     28 
     29 #include "core/html/parser/InputStreamPreprocessor.h"
     30 #include "platform/text/SegmentedString.h"
     31 
     32 namespace WebCore {
     33 
     34 // The InputStream is made up of a sequence of SegmentedStrings:
     35 //
     36 // [--current--][--next--][--next--] ... [--next--]
     37 //            /\                         (also called m_last)
     38 //            L_ current insertion point
     39 //
     40 // The current segmented string is stored in InputStream.  Each of the
     41 // afterInsertionPoint buffers are stored in InsertionPointRecords on the
     42 // stack.
     43 //
     44 // We remove characters from the "current" string in the InputStream.
     45 // document.write() will add characters at the current insertion point,
     46 // which appends them to the "current" string.
     47 //
     48 // m_last is a pointer to the last of the afterInsertionPoint strings.
     49 // The network adds data at the end of the InputStream, which appends
     50 // them to the "last" string.
     51 class HTMLInputStream {
     52     WTF_MAKE_NONCOPYABLE(HTMLInputStream);
     53 public:
     54     HTMLInputStream()
     55         : m_last(&m_first)
     56     {
     57     }
     58 
     59     void appendToEnd(const SegmentedString& string)
     60     {
     61         m_last->append(string);
     62     }
     63 
     64     void insertAtCurrentInsertionPoint(const SegmentedString& string)
     65     {
     66         m_first.append(string);
     67     }
     68 
     69     bool hasInsertionPoint() const
     70     {
     71         return &m_first != m_last;
     72     }
     73 
     74     void markEndOfFile()
     75     {
     76         m_last->append(SegmentedString(String(&kEndOfFileMarker, 1)));
     77         m_last->close();
     78     }
     79 
     80     void closeWithoutMarkingEndOfFile()
     81     {
     82         m_last->close();
     83     }
     84 
     85     bool haveSeenEndOfFile() const
     86     {
     87         return m_last->isClosed();
     88     }
     89 
     90     SegmentedString& current() { return m_first; }
     91     const SegmentedString& current() const { return m_first; }
     92 
     93     void splitInto(SegmentedString& next)
     94     {
     95         next = m_first;
     96         m_first = SegmentedString();
     97         if (m_last == &m_first) {
     98             // We used to only have one SegmentedString in the InputStream
     99             // but now we have two.  That means m_first is no longer also
    100             // the m_last string, |next| is now the last one.
    101             m_last = &next;
    102         }
    103     }
    104 
    105     void mergeFrom(SegmentedString& next)
    106     {
    107         m_first.append(next);
    108         if (m_last == &next) {
    109             // The string |next| used to be the last SegmentedString in
    110             // the InputStream.  Now that it's been merged into m_first,
    111             // that makes m_first the last one.
    112             m_last = &m_first;
    113         }
    114         if (next.isClosed()) {
    115             // We also need to merge the "closed" state from next to
    116             // m_first.  Arguably, this work could be done in append().
    117             m_first.close();
    118         }
    119     }
    120 
    121 private:
    122     SegmentedString m_first;
    123     SegmentedString* m_last;
    124 };
    125 
    126 class InsertionPointRecord {
    127     WTF_MAKE_NONCOPYABLE(InsertionPointRecord);
    128 public:
    129     explicit InsertionPointRecord(HTMLInputStream& inputStream)
    130         : m_inputStream(&inputStream)
    131     {
    132         m_line = m_inputStream->current().currentLine();
    133         m_column = m_inputStream->current().currentColumn();
    134         m_inputStream->splitInto(m_next);
    135         // We 'fork' current position and use it for the generated script part.
    136         // This is a bit weird, because generated part does not have positions within an HTML document.
    137         m_inputStream->current().setCurrentPosition(m_line, m_column, 0);
    138     }
    139 
    140     ~InsertionPointRecord()
    141     {
    142         // Some inserted text may have remained in input stream. E.g. if script has written "&amp" or "<table",
    143         // it stays in buffer because it cannot be properly tokenized before we see next part.
    144         int unparsedRemainderLength = m_inputStream->current().length();
    145         m_inputStream->mergeFrom(m_next);
    146         // We restore position for the character that goes right after unparsed remainder.
    147         m_inputStream->current().setCurrentPosition(m_line, m_column, unparsedRemainderLength);
    148     }
    149 
    150 private:
    151     HTMLInputStream* m_inputStream;
    152     SegmentedString m_next;
    153     OrdinalNumber m_line;
    154     OrdinalNumber m_column;
    155 };
    156 
    157 }
    158 
    159 #endif
    160