1 /* 2 * Copyright (C) 2013 Google, Inc. All Rights Reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 16 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 24 */ 25 26 #ifndef AtomicHTMLToken_h 27 #define AtomicHTMLToken_h 28 29 #include "HTMLElementLookupTrie.h" 30 #include "core/dom/Attribute.h" 31 #include "core/html/parser/CompactHTMLToken.h" 32 #include "core/html/parser/HTMLToken.h" 33 #include "wtf/RefCounted.h" 34 #include "wtf/RefPtr.h" 35 36 namespace WebCore { 37 38 class AtomicHTMLToken { 39 WTF_MAKE_NONCOPYABLE(AtomicHTMLToken); 40 public: 41 42 bool forceQuirks() const 43 { 44 ASSERT(m_type == HTMLToken::DOCTYPE); 45 return m_doctypeData->m_forceQuirks; 46 } 47 48 HTMLToken::Type type() const { return m_type; } 49 50 const AtomicString& name() const 51 { 52 ASSERT(usesName()); 53 return m_name; 54 } 55 56 void setName(const AtomicString& name) 57 { 58 ASSERT(usesName()); 59 m_name = name; 60 } 61 62 bool selfClosing() const 63 { 64 ASSERT(m_type == HTMLToken::StartTag || m_type == HTMLToken::EndTag); 65 return m_selfClosing; 66 } 67 68 Attribute* getAttributeItem(const QualifiedName& attributeName) 69 { 70 ASSERT(usesAttributes()); 71 return findAttributeInVector(m_attributes, attributeName); 72 } 73 74 Vector<Attribute>& attributes() 75 { 76 ASSERT(usesAttributes()); 77 return m_attributes; 78 } 79 80 const Vector<Attribute>& attributes() const 81 { 82 ASSERT(usesAttributes()); 83 return m_attributes; 84 } 85 86 const String& characters() const 87 { 88 ASSERT(m_type == HTMLToken::Character); 89 return m_data; 90 } 91 92 const String& comment() const 93 { 94 ASSERT(m_type == HTMLToken::Comment); 95 return m_data; 96 } 97 98 // FIXME: Distinguish between a missing public identifer and an empty one. 99 Vector<UChar>& publicIdentifier() const 100 { 101 ASSERT(m_type == HTMLToken::DOCTYPE); 102 return m_doctypeData->m_publicIdentifier; 103 } 104 105 // FIXME: Distinguish between a missing system identifer and an empty one. 106 Vector<UChar>& systemIdentifier() const 107 { 108 ASSERT(m_type == HTMLToken::DOCTYPE); 109 return m_doctypeData->m_systemIdentifier; 110 } 111 112 explicit AtomicHTMLToken(HTMLToken& token) 113 : m_type(token.type()) 114 { 115 switch (m_type) { 116 case HTMLToken::Uninitialized: 117 ASSERT_NOT_REACHED(); 118 break; 119 case HTMLToken::DOCTYPE: 120 m_name = AtomicString(token.name()); 121 m_doctypeData = token.releaseDoctypeData(); 122 break; 123 case HTMLToken::EndOfFile: 124 break; 125 case HTMLToken::StartTag: 126 case HTMLToken::EndTag: { 127 m_selfClosing = token.selfClosing(); 128 if (StringImpl* tagName = lookupHTMLTag(token.name().data(), token.name().size())) 129 m_name = AtomicString(tagName); 130 else 131 m_name = AtomicString(token.name()); 132 initializeAttributes(token.attributes()); 133 break; 134 } 135 case HTMLToken::Character: 136 case HTMLToken::Comment: 137 if (token.isAll8BitData()) 138 m_data = String::make8BitFrom16BitSource(token.data()); 139 else 140 m_data = String(token.data()); 141 break; 142 } 143 } 144 145 explicit AtomicHTMLToken(const CompactHTMLToken& token) 146 : m_type(token.type()) 147 { 148 switch (m_type) { 149 case HTMLToken::Uninitialized: 150 ASSERT_NOT_REACHED(); 151 break; 152 case HTMLToken::DOCTYPE: 153 m_name = token.data(); 154 m_doctypeData = adoptPtr(new DoctypeData()); 155 m_doctypeData->m_hasPublicIdentifier = true; 156 append(m_doctypeData->m_publicIdentifier, token.publicIdentifier()); 157 m_doctypeData->m_hasSystemIdentifier = true; 158 append(m_doctypeData->m_systemIdentifier, token.systemIdentifier()); 159 m_doctypeData->m_forceQuirks = token.doctypeForcesQuirks(); 160 break; 161 case HTMLToken::EndOfFile: 162 break; 163 case HTMLToken::StartTag: 164 m_attributes.reserveInitialCapacity(token.attributes().size()); 165 for (Vector<CompactHTMLToken::Attribute>::const_iterator it = token.attributes().begin(); it != token.attributes().end(); ++it) { 166 QualifiedName name(nullAtom, it->name, nullAtom); 167 // FIXME: This is N^2 for the number of attributes. 168 if (!findAttributeInVector(m_attributes, name)) 169 m_attributes.append(Attribute(name, it->value)); 170 } 171 // Fall through! 172 case HTMLToken::EndTag: 173 m_selfClosing = token.selfClosing(); 174 m_name = token.data(); 175 break; 176 case HTMLToken::Character: 177 case HTMLToken::Comment: 178 m_data = token.data(); 179 break; 180 } 181 } 182 183 explicit AtomicHTMLToken(HTMLToken::Type type) 184 : m_type(type) 185 , m_selfClosing(false) 186 { 187 } 188 189 AtomicHTMLToken(HTMLToken::Type type, const AtomicString& name, const Vector<Attribute>& attributes = Vector<Attribute>()) 190 : m_type(type) 191 , m_name(name) 192 , m_selfClosing(false) 193 , m_attributes(attributes) 194 { 195 ASSERT(usesName()); 196 } 197 198 private: 199 HTMLToken::Type m_type; 200 201 void initializeAttributes(const HTMLToken::AttributeList& attributes); 202 QualifiedName nameForAttribute(const HTMLToken::Attribute&) const; 203 204 bool usesName() const; 205 206 bool usesAttributes() const; 207 208 // "name" for DOCTYPE, StartTag, and EndTag 209 AtomicString m_name; 210 211 // "data" for Comment, "characters" for Character 212 String m_data; 213 214 // For DOCTYPE 215 OwnPtr<DoctypeData> m_doctypeData; 216 217 // For StartTag and EndTag 218 bool m_selfClosing; 219 220 Vector<Attribute> m_attributes; 221 }; 222 223 inline void AtomicHTMLToken::initializeAttributes(const HTMLToken::AttributeList& attributes) 224 { 225 size_t size = attributes.size(); 226 if (!size) 227 return; 228 229 m_attributes.clear(); 230 m_attributes.reserveInitialCapacity(size); 231 for (size_t i = 0; i < size; ++i) { 232 const HTMLToken::Attribute& attribute = attributes[i]; 233 if (attribute.name.isEmpty()) 234 continue; 235 236 // FIXME: We should be able to add the following ASSERT once we fix 237 // https://bugs.webkit.org/show_bug.cgi?id=62971 238 // ASSERT(attribute.nameRange.start); 239 ASSERT(attribute.nameRange.end); 240 ASSERT(attribute.valueRange.start); 241 ASSERT(attribute.valueRange.end); 242 243 AtomicString value(attribute.value); 244 const QualifiedName& name = nameForAttribute(attribute); 245 // FIXME: This is N^2 for the number of attributes. 246 if (!findAttributeInVector(m_attributes, name)) 247 m_attributes.append(Attribute(name, value)); 248 } 249 } 250 251 } 252 253 #endif 254