Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Google, Inc. All Rights Reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     24  */
     25 
     26 #ifndef HTMLInputStream_h
     27 #define HTMLInputStream_h
     28 
     29 #include "SegmentedString.h"
     30 
     31 namespace WebCore {
     32 
     33 // The InputStream is made up of a sequence of SegmentedStrings:
     34 //
     35 // [--current--][--next--][--next--] ... [--next--]
     36 //            /\                         (also called m_last)
     37 //            L_ current insertion point
     38 //
     39 // The current segmented string is stored in InputStream.  Each of the
     40 // afterInsertionPoint buffers are stored in InsertionPointRecords on the
     41 // stack.
     42 //
     43 // We remove characters from the "current" string in the InputStream.
     44 // document.write() will add characters at the current insertion point,
     45 // which appends them to the "current" string.
     46 //
     47 // m_last is a pointer to the last of the afterInsertionPoint strings.
     48 // The network adds data at the end of the InputStream, which appends
     49 // them to the "last" string.
     50 class HTMLInputStream {
     51     WTF_MAKE_NONCOPYABLE(HTMLInputStream);
     52 public:
     53     HTMLInputStream()
     54         : m_last(&m_first)
     55     {
     56     }
     57 
     58     void appendToEnd(const SegmentedString& string)
     59     {
     60         m_last->append(string);
     61     }
     62 
     63     void insertAtCurrentInsertionPoint(const SegmentedString& string)
     64     {
     65         m_first.append(string);
     66     }
     67 
     68     bool hasInsertionPoint() const
     69     {
     70         return &m_first != m_last;
     71     }
     72 
     73     void markEndOfFile()
     74     {
     75         // FIXME: This should use InputStreamPreprocessor::endOfFileMarker
     76         // once InputStreamPreprocessor is split off into its own header.
     77         static const UChar endOfFileMarker = 0;
     78         m_last->append(SegmentedString(String(&endOfFileMarker, 1)));
     79         m_last->close();
     80     }
     81 
     82     bool haveSeenEndOfFile() const
     83     {
     84         return m_last->isClosed();
     85     }
     86 
     87     SegmentedString& current() { return m_first; }
     88     const SegmentedString& current() const { return m_first; }
     89 
     90     void splitInto(SegmentedString& next)
     91     {
     92         next = m_first;
     93         m_first = SegmentedString();
     94         if (m_last == &m_first) {
     95             // We used to only have one SegmentedString in the InputStream
     96             // but now we have two.  That means m_first is no longer also
     97             // the m_last string, |next| is now the last one.
     98             m_last = &next;
     99         }
    100     }
    101 
    102     void mergeFrom(SegmentedString& next)
    103     {
    104         m_first.append(next);
    105         if (m_last == &next) {
    106             // The string |next| used to be the last SegmentedString in
    107             // the InputStream.  Now that it's been merged into m_first,
    108             // that makes m_first the last one.
    109             m_last = &m_first;
    110         }
    111         if (next.isClosed()) {
    112             // We also need to merge the "closed" state from next to
    113             // m_first.  Arguably, this work could be done in append().
    114             m_first.close();
    115         }
    116     }
    117 
    118 private:
    119     SegmentedString m_first;
    120     SegmentedString* m_last;
    121 };
    122 
    123 class InsertionPointRecord {
    124     WTF_MAKE_NONCOPYABLE(InsertionPointRecord);
    125 public:
    126     explicit InsertionPointRecord(HTMLInputStream& inputStream)
    127         : m_inputStream(&inputStream)
    128     {
    129         m_line = m_inputStream->current().currentLine();
    130         m_column = m_inputStream->current().currentColumn();
    131         m_inputStream->splitInto(m_next);
    132         // We 'fork' current position and use it for the generated script part.
    133         // This is a bit weird, because generated part does not have positions within an HTML document.
    134         m_inputStream->current().setCurrentPosition(m_line, m_column, 0);
    135     }
    136 
    137     ~InsertionPointRecord()
    138     {
    139         // Some inserted text may have remained in input stream. E.g. if script has written "&amp" or "<table",
    140         // it stays in buffer because it cannot be properly tokenized before we see next part.
    141         int unparsedRemainderLength = m_inputStream->current().length();
    142         m_inputStream->mergeFrom(m_next);
    143         // We restore position for the character that goes right after unparsed remainder.
    144         m_inputStream->current().setCurrentPosition(m_line, m_column, unparsedRemainderLength);
    145     }
    146 
    147 private:
    148     HTMLInputStream* m_inputStream;
    149     SegmentedString m_next;
    150     WTF::ZeroBasedNumber m_line;
    151     WTF::ZeroBasedNumber m_column;
    152 };
    153 
    154 }
    155 
    156 #endif
    157