Home | History | Annotate | Download | only in html
      1 <HTML>
      2 <BODY BGCOLOR="white">
      3 <PRE>
      4 <FONT color="green">001</FONT>    // Copyright (c) 2011, Mike Samuel<a name="line.1"></a>
      5 <FONT color="green">002</FONT>    // All rights reserved.<a name="line.2"></a>
      6 <FONT color="green">003</FONT>    //<a name="line.3"></a>
      7 <FONT color="green">004</FONT>    // Redistribution and use in source and binary forms, with or without<a name="line.4"></a>
      8 <FONT color="green">005</FONT>    // modification, are permitted provided that the following conditions<a name="line.5"></a>
      9 <FONT color="green">006</FONT>    // are met:<a name="line.6"></a>
     10 <FONT color="green">007</FONT>    //<a name="line.7"></a>
     11 <FONT color="green">008</FONT>    // Redistributions of source code must retain the above copyright<a name="line.8"></a>
     12 <FONT color="green">009</FONT>    // notice, this list of conditions and the following disclaimer.<a name="line.9"></a>
     13 <FONT color="green">010</FONT>    // Redistributions in binary form must reproduce the above copyright<a name="line.10"></a>
     14 <FONT color="green">011</FONT>    // notice, this list of conditions and the following disclaimer in the<a name="line.11"></a>
     15 <FONT color="green">012</FONT>    // documentation and/or other materials provided with the distribution.<a name="line.12"></a>
     16 <FONT color="green">013</FONT>    // Neither the name of the OWASP nor the names of its contributors may<a name="line.13"></a>
     17 <FONT color="green">014</FONT>    // be used to endorse or promote products derived from this software<a name="line.14"></a>
     18 <FONT color="green">015</FONT>    // without specific prior written permission.<a name="line.15"></a>
     19 <FONT color="green">016</FONT>    // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS<a name="line.16"></a>
     20 <FONT color="green">017</FONT>    // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT<a name="line.17"></a>
     21 <FONT color="green">018</FONT>    // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS<a name="line.18"></a>
     22 <FONT color="green">019</FONT>    // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE<a name="line.19"></a>
     23 <FONT color="green">020</FONT>    // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,<a name="line.20"></a>
     24 <FONT color="green">021</FONT>    // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,<a name="line.21"></a>
     25 <FONT color="green">022</FONT>    // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;<a name="line.22"></a>
     26 <FONT color="green">023</FONT>    // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER<a name="line.23"></a>
     27 <FONT color="green">024</FONT>    // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT<a name="line.24"></a>
     28 <FONT color="green">025</FONT>    // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN<a name="line.25"></a>
     29 <FONT color="green">026</FONT>    // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE<a name="line.26"></a>
     30 <FONT color="green">027</FONT>    // POSSIBILITY OF SUCH DAMAGE.<a name="line.27"></a>
     31 <FONT color="green">028</FONT>    <a name="line.28"></a>
     32 <FONT color="green">029</FONT>    package org.owasp.html;<a name="line.29"></a>
     33 <FONT color="green">030</FONT>    <a name="line.30"></a>
     34 <FONT color="green">031</FONT>    import com.google.common.collect.ImmutableMap;<a name="line.31"></a>
     35 <FONT color="green">032</FONT>    <a name="line.32"></a>
     36 <FONT color="green">033</FONT>    /**<a name="line.33"></a>
     37 <FONT color="green">034</FONT>     * From section 8.1.2.6 of http://www.whatwg.org/specs/web-apps/current-work/<a name="line.34"></a>
     38 <FONT color="green">035</FONT>     * &lt;p&gt;<a name="line.35"></a>
     39 <FONT color="green">036</FONT>     * The text in CDATA and RCDATA elements must not contain any<a name="line.36"></a>
     40 <FONT color="green">037</FONT>     * occurrences of the string "&lt;/" (U+003C LESS-THAN SIGN, U+002F<a name="line.37"></a>
     41 <FONT color="green">038</FONT>     * SOLIDUS) followed by characters that case-insensitively match the<a name="line.38"></a>
     42 <FONT color="green">039</FONT>     * tag name of the element followed by one of U+0009 CHARACTER<a name="line.39"></a>
     43 <FONT color="green">040</FONT>     * TABULATION, U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C<a name="line.40"></a>
     44 <FONT color="green">041</FONT>     * FORM FEED (FF), U+0020 SPACE, U+003E GREATER-THAN SIGN (&gt;), or<a name="line.41"></a>
     45 <FONT color="green">042</FONT>     * U+002F SOLIDUS (/), unless that string is part of an escaping<a name="line.42"></a>
     46 <FONT color="green">043</FONT>     * text span.<a name="line.43"></a>
     47 <FONT color="green">044</FONT>     * &lt;/p&gt;<a name="line.44"></a>
     48 <FONT color="green">045</FONT>     *<a name="line.45"></a>
     49 <FONT color="green">046</FONT>     * &lt;p&gt;<a name="line.46"></a>
     50 <FONT color="green">047</FONT>     * See also<a name="line.47"></a>
     51 <FONT color="green">048</FONT>     * http://www.whatwg.org/specs/web-apps/current-work/#cdata-rcdata-restrictions<a name="line.48"></a>
     52 <FONT color="green">049</FONT>     * for the elements which fall in each category.<a name="line.49"></a>
     53 <FONT color="green">050</FONT>     * &lt;/p&gt;<a name="line.50"></a>
     54 <FONT color="green">051</FONT>     *<a name="line.51"></a>
     55 <FONT color="green">052</FONT>     * @author Mike Samuel &lt;mikesamuel (a] gmail.com&gt;<a name="line.52"></a>
     56 <FONT color="green">053</FONT>     */<a name="line.53"></a>
     57 <FONT color="green">054</FONT>    public enum HtmlTextEscapingMode {<a name="line.54"></a>
     58 <FONT color="green">055</FONT>      /**<a name="line.55"></a>
     59 <FONT color="green">056</FONT>       * Normally escaped character data that breaks around comments and tags.<a name="line.56"></a>
     60 <FONT color="green">057</FONT>       */<a name="line.57"></a>
     61 <FONT color="green">058</FONT>      PCDATA,<a name="line.58"></a>
     62 <FONT color="green">059</FONT>      /**<a name="line.59"></a>
     63 <FONT color="green">060</FONT>       * A span of text where HTML special characters are interpreted literally,<a name="line.60"></a>
     64 <FONT color="green">061</FONT>       * as in a SCRIPT tag.<a name="line.61"></a>
     65 <FONT color="green">062</FONT>       */<a name="line.62"></a>
     66 <FONT color="green">063</FONT>      CDATA,<a name="line.63"></a>
     67 <FONT color="green">064</FONT>      /**<a name="line.64"></a>
     68 <FONT color="green">065</FONT>       * Like {@link #CDATA} but only for certain browsers.<a name="line.65"></a>
     69 <FONT color="green">066</FONT>       */<a name="line.66"></a>
     70 <FONT color="green">067</FONT>      CDATA_SOMETIMES,<a name="line.67"></a>
     71 <FONT color="green">068</FONT>      /**<a name="line.68"></a>
     72 <FONT color="green">069</FONT>       * A span of text and character entity references where HTML special<a name="line.69"></a>
     73 <FONT color="green">070</FONT>       * characters are interpreted literally, as in a TITLE tag.<a name="line.70"></a>
     74 <FONT color="green">071</FONT>       */<a name="line.71"></a>
     75 <FONT color="green">072</FONT>      RCDATA,<a name="line.72"></a>
     76 <FONT color="green">073</FONT>      /**<a name="line.73"></a>
     77 <FONT color="green">074</FONT>       * A span of text where HTML special characters are interpreted literally,<a name="line.74"></a>
     78 <FONT color="green">075</FONT>       * where there is no end tag.  PLAIN_TEXT runs until the end of the file.<a name="line.75"></a>
     79 <FONT color="green">076</FONT>       */<a name="line.76"></a>
     80 <FONT color="green">077</FONT>      PLAIN_TEXT,<a name="line.77"></a>
     81 <FONT color="green">078</FONT>    <a name="line.78"></a>
     82 <FONT color="green">079</FONT>      /**<a name="line.79"></a>
     83 <FONT color="green">080</FONT>       * Cannot contain data.<a name="line.80"></a>
     84 <FONT color="green">081</FONT>       */<a name="line.81"></a>
     85 <FONT color="green">082</FONT>      VOID,<a name="line.82"></a>
     86 <FONT color="green">083</FONT>      ;<a name="line.83"></a>
     87 <FONT color="green">084</FONT>    <a name="line.84"></a>
     88 <FONT color="green">085</FONT>      private static final ImmutableMap&lt;String, HtmlTextEscapingMode&gt; ESCAPING_MODES<a name="line.85"></a>
     89 <FONT color="green">086</FONT>          = ImmutableMap.&lt;String, HtmlTextEscapingMode&gt;builder()<a name="line.86"></a>
     90 <FONT color="green">087</FONT>          .put("iframe", CDATA)<a name="line.87"></a>
     91 <FONT color="green">088</FONT>          // HTML5 does not treat listing as CDATA and treats XMP as deprecated,<a name="line.88"></a>
     92 <FONT color="green">089</FONT>          // but HTML2 does at<a name="line.89"></a>
     93 <FONT color="green">090</FONT>          // http://www.w3.org/MarkUp/1995-archive/NonStandard.html<a name="line.90"></a>
     94 <FONT color="green">091</FONT>          // Listing is not supported by browsers.<a name="line.91"></a>
     95 <FONT color="green">092</FONT>          .put("listing", CDATA_SOMETIMES)<a name="line.92"></a>
     96 <FONT color="green">093</FONT>          .put("xmp", CDATA)<a name="line.93"></a>
     97 <FONT color="green">094</FONT>    <a name="line.94"></a>
     98 <FONT color="green">095</FONT>          // Technically, noembed, noscript and noframes are CDATA_SOMETIMES but<a name="line.95"></a>
     99 <FONT color="green">096</FONT>          // we can only be hurt by allowing tag content that looks like text so<a name="line.96"></a>
    100 <FONT color="green">097</FONT>          // we treat them as regular..<a name="line.97"></a>
    101 <FONT color="green">098</FONT>          //.put("noembed", CDATA_SOMETIMES)<a name="line.98"></a>
    102 <FONT color="green">099</FONT>          //.put("noframes", CDATA_SOMETIMES)<a name="line.99"></a>
    103 <FONT color="green">100</FONT>          //.put("noscript", CDATA_SOMETIMES)<a name="line.100"></a>
    104 <FONT color="green">101</FONT>          .put("comment", CDATA_SOMETIMES)  // IE only<a name="line.101"></a>
    105 <FONT color="green">102</FONT>    <a name="line.102"></a>
    106 <FONT color="green">103</FONT>          // Runs till end of file.<a name="line.103"></a>
    107 <FONT color="green">104</FONT>          .put("plaintext", PLAIN_TEXT)<a name="line.104"></a>
    108 <FONT color="green">105</FONT>    <a name="line.105"></a>
    109 <FONT color="green">106</FONT>          .put("script", CDATA)<a name="line.106"></a>
    110 <FONT color="green">107</FONT>          .put("style", CDATA)<a name="line.107"></a>
    111 <FONT color="green">108</FONT>    <a name="line.108"></a>
    112 <FONT color="green">109</FONT>          // Textarea and Title are RCDATA, not CDATA, so decode entity references.<a name="line.109"></a>
    113 <FONT color="green">110</FONT>          .put("textarea", RCDATA)<a name="line.110"></a>
    114 <FONT color="green">111</FONT>          .put("title", RCDATA)<a name="line.111"></a>
    115 <FONT color="green">112</FONT>    <a name="line.112"></a>
    116 <FONT color="green">113</FONT>          // Nodes that can't contain content.<a name="line.113"></a>
    117 <FONT color="green">114</FONT>          // http://www.w3.org/TR/html-markup/syntax.html#void-elements<a name="line.114"></a>
    118 <FONT color="green">115</FONT>          .put("area", VOID)<a name="line.115"></a>
    119 <FONT color="green">116</FONT>          .put("base", VOID)<a name="line.116"></a>
    120 <FONT color="green">117</FONT>          .put("br", VOID)<a name="line.117"></a>
    121 <FONT color="green">118</FONT>          .put("col", VOID)<a name="line.118"></a>
    122 <FONT color="green">119</FONT>          .put("command", VOID)<a name="line.119"></a>
    123 <FONT color="green">120</FONT>          .put("embed", VOID)<a name="line.120"></a>
    124 <FONT color="green">121</FONT>          .put("hr", VOID)<a name="line.121"></a>
    125 <FONT color="green">122</FONT>          .put("img", VOID)<a name="line.122"></a>
    126 <FONT color="green">123</FONT>          .put("input", VOID)<a name="line.123"></a>
    127 <FONT color="green">124</FONT>          .put("keygen", VOID)<a name="line.124"></a>
    128 <FONT color="green">125</FONT>          .put("link", VOID)<a name="line.125"></a>
    129 <FONT color="green">126</FONT>          .put("meta", VOID)<a name="line.126"></a>
    130 <FONT color="green">127</FONT>          .put("param", VOID)<a name="line.127"></a>
    131 <FONT color="green">128</FONT>          .put("source", VOID)<a name="line.128"></a>
    132 <FONT color="green">129</FONT>          .put("track", VOID)<a name="line.129"></a>
    133 <FONT color="green">130</FONT>          .put("wbr", VOID)<a name="line.130"></a>
    134 <FONT color="green">131</FONT>    <a name="line.131"></a>
    135 <FONT color="green">132</FONT>           // EMPTY per http://www.w3.org/TR/REC-html32#basefont<a name="line.132"></a>
    136 <FONT color="green">133</FONT>          .put("basefont", VOID)<a name="line.133"></a>
    137 <FONT color="green">134</FONT>          .build();<a name="line.134"></a>
    138 <FONT color="green">135</FONT>    <a name="line.135"></a>
    139 <FONT color="green">136</FONT>    <a name="line.136"></a>
    140 <FONT color="green">137</FONT>      /**<a name="line.137"></a>
    141 <FONT color="green">138</FONT>       * The mode used for content following a start tag with the given name.<a name="line.138"></a>
    142 <FONT color="green">139</FONT>       */<a name="line.139"></a>
    143 <FONT color="green">140</FONT>      public static HtmlTextEscapingMode getModeForTag(String canonTagName) {<a name="line.140"></a>
    144 <FONT color="green">141</FONT>        HtmlTextEscapingMode mode = ESCAPING_MODES.get(canonTagName);<a name="line.141"></a>
    145 <FONT color="green">142</FONT>        return mode != null ? mode : PCDATA;<a name="line.142"></a>
    146 <FONT color="green">143</FONT>      }<a name="line.143"></a>
    147 <FONT color="green">144</FONT>    <a name="line.144"></a>
    148 <FONT color="green">145</FONT>      /**<a name="line.145"></a>
    149 <FONT color="green">146</FONT>       * True iff the content following the given tag allows escaping text<a name="line.146"></a>
    150 <FONT color="green">147</FONT>       * spans: {@code &lt;!--&amp;hellip;--&gt;} that escape even things that might<a name="line.147"></a>
    151 <FONT color="green">148</FONT>       * be an end tag for the corresponding open tag.<a name="line.148"></a>
    152 <FONT color="green">149</FONT>       */<a name="line.149"></a>
    153 <FONT color="green">150</FONT>      public static boolean allowsEscapingTextSpan(String canonTagName) {<a name="line.150"></a>
    154 <FONT color="green">151</FONT>        // &lt;xmp&gt; and &lt;plaintext&gt; do not admit escaping text spans.<a name="line.151"></a>
    155 <FONT color="green">152</FONT>        return "style".equals(canonTagName) || "script".equals(canonTagName)<a name="line.152"></a>
    156 <FONT color="green">153</FONT>            || "noembed".equals(canonTagName) || "noscript".equals(canonTagName)<a name="line.153"></a>
    157 <FONT color="green">154</FONT>            || "noframes".equals(canonTagName);<a name="line.154"></a>
    158 <FONT color="green">155</FONT>      }<a name="line.155"></a>
    159 <FONT color="green">156</FONT>    <a name="line.156"></a>
    160 <FONT color="green">157</FONT>      /**<a name="line.157"></a>
    161 <FONT color="green">158</FONT>       * True if content immediately following the start tag must be treated as<a name="line.158"></a>
    162 <FONT color="green">159</FONT>       * special CDATA so that &amp;lt;'s are not treated as starting tags, comments<a name="line.159"></a>
    163 <FONT color="green">160</FONT>       * or directives.<a name="line.160"></a>
    164 <FONT color="green">161</FONT>       */<a name="line.161"></a>
    165 <FONT color="green">162</FONT>      public static boolean isTagFollowedByLiteralContent(String canonTagName) {<a name="line.162"></a>
    166 <FONT color="green">163</FONT>        HtmlTextEscapingMode mode = getModeForTag(canonTagName);<a name="line.163"></a>
    167 <FONT color="green">164</FONT>        return mode != PCDATA &amp;&amp; mode != VOID;<a name="line.164"></a>
    168 <FONT color="green">165</FONT>      }<a name="line.165"></a>
    169 <FONT color="green">166</FONT>    <a name="line.166"></a>
    170 <FONT color="green">167</FONT>      /**<a name="line.167"></a>
    171 <FONT color="green">168</FONT>       * True iff the tag cannot contain any content -- will an HTML parser consider<a name="line.168"></a>
    172 <FONT color="green">169</FONT>       * the element to have ended immediately after the start tag.<a name="line.169"></a>
    173 <FONT color="green">170</FONT>       */<a name="line.170"></a>
    174 <FONT color="green">171</FONT>      public static boolean isVoidElement(String canonTagName) {<a name="line.171"></a>
    175 <FONT color="green">172</FONT>        return getModeForTag(canonTagName) == VOID;<a name="line.172"></a>
    176 <FONT color="green">173</FONT>      }<a name="line.173"></a>
    177 <FONT color="green">174</FONT>    }<a name="line.174"></a>
    178 
    179 
    180 
    181 
    182 
    183 
    184 
    185 
    186 
    187 
    188 
    189 
    190 
    191 
    192 
    193 
    194 
    195 
    196 
    197 
    198 
    199 
    200 
    201 
    202 
    203 
    204 
    205 
    206 
    207 
    208 
    209 
    210 
    211 
    212 
    213 
    214 
    215 
    216 
    217 
    218 
    219 
    220 
    221 
    222 
    223 
    224 
    225 
    226 
    227 
    228 
    229 
    230 
    231 
    232 
    233 
    234 
    235 
    236 
    237 
    238 </PRE>
    239 </BODY>
    240 </HTML>
    241