Home | History | Annotate | Download | only in examples
      1 // Copyright (c) 2011, Mike Samuel
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions
      6 // are met:
      7 //
      8 // Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 // Redistributions in binary form must reproduce the above copyright
     11 // notice, this list of conditions and the following disclaimer in the
     12 // documentation and/or other materials provided with the distribution.
     13 // Neither the name of the OWASP nor the names of its contributors may
     14 // be used to endorse or promote products derived from this software
     15 // without specific prior written permission.
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     19 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     20 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     21 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     22 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     23 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     24 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     26 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27 // POSSIBILITY OF SUCH DAMAGE.
     28 
     29 package org.owasp.html.examples;
     30 
     31 import java.io.IOException;
     32 import java.io.InputStreamReader;
     33 import java.util.regex.Pattern;
     34 
     35 import org.owasp.html.Handler;
     36 import org.owasp.html.HtmlPolicyBuilder;
     37 import org.owasp.html.HtmlSanitizer;
     38 import org.owasp.html.HtmlStreamEventReceiver;
     39 import org.owasp.html.HtmlStreamRenderer;
     40 
     41 import com.google.common.base.Charsets;
     42 import com.google.common.base.Function;
     43 import com.google.common.base.Throwables;
     44 import com.google.common.io.CharStreams;
     45 
     46 /**
     47  * Based on the
     48  * <a href="http://www.owasp.org/index.php/Category:OWASP_AntiSamy_Project#Stage_2_-_Choosing_a_base_policy_file">AntiSamy Slashdot example</a>.
     49  * <blockquote>
     50  * Slashdot (http://www.slashdot.org/) is a techie news site that allows users
     51  * to respond anonymously to news posts with very limited HTML markup. Now
     52  * Slashdot is not only one of the coolest sites around, it's also one that's
     53  * been subject to many different successful attacks. Even more unfortunate is
     54  * the fact that most of the attacks led users to the infamous goatse.cx picture
     55  * (please don't go look it up). The rules for Slashdot are fairly strict: users
     56  * can only submit the following HTML tags and no CSS: {@code <b>}, {@code <u>},
     57  * {@code <i>}, {@code <a>}, {@code <blockquote>}.
     58  * <br>
     59  * Accordingly, we've built a policy file that allows fairly similar
     60  * functionality. All text-formatting tags that operate directly on the font,
     61  * color or emphasis have been allowed.
     62  * </blockquote>
     63  */
     64 public class SlashdotPolicyExample {
     65 
     66   /** A policy definition that matches the minimal HTML that Slashdot allows. */
     67   public static final Function<HtmlStreamEventReceiver, HtmlSanitizer.Policy>
     68       POLICY_DEFINITION = new HtmlPolicyBuilder()
     69           .allowStandardUrlProtocols()
     70           // Allow title="..." on any element.
     71           .allowAttributes("title").globally()
     72           // Allow href="..." on <a> elements.
     73           .allowAttributes("href").onElements("a")
     74           // Defeat link spammers.
     75           .requireRelNofollowOnLinks()
     76           // Allow lang= with an alphabetic value on any element.
     77           .allowAttributes("lang").matching(Pattern.compile("[a-zA-Z]{2,20}"))
     78               .globally()
     79           // The align attribute on <p> elements can have any value below.
     80           .allowAttributes("align")
     81               .matching(true, "center", "left", "right", "justify", "char")
     82               .onElements("p")
     83           // These elements are allowed.
     84           .allowElements(
     85               "a", "p", "div", "i", "b", "em", "blockquote", "tt", "strong",
     86               "br", "ul", "ol", "li")
     87           // Custom slashdot tags.
     88           // These could be rewritten in the sanitizer using an ElementPolicy.
     89           .allowElements("quote", "ecode")
     90           .toFactory();
     91 
     92   public static void main(String[] args) throws IOException {
     93     if (args.length != 0) {
     94       System.err.println("Reads from STDIN and writes to STDOUT");
     95       System.exit(-1);
     96     }
     97     System.err.println("[Reading from STDIN]");
     98     // Fetch the HTML to sanitize.
     99     String html = CharStreams.toString(
    100         new InputStreamReader(System.in, Charsets.UTF_8));
    101     // Set up an output channel to receive the sanitized HTML.
    102     HtmlStreamRenderer renderer = HtmlStreamRenderer.create(
    103         System.out,
    104         // Receives notifications on a failure to write to the output.
    105         new Handler<IOException>() {
    106           public void handle(IOException ex) {
    107             Throwables.propagate(ex);  // System.out suppresses IOExceptions
    108           }
    109         },
    110         // Our HTML parser is very lenient, but this receives notifications on
    111         // truly bizarre inputs.
    112         new Handler<String>() {
    113           public void handle(String x) {
    114             throw new AssertionError(x);
    115           }
    116         });
    117     // Use the policy defined above to sanitize the HTML.
    118     HtmlSanitizer.sanitize(html, POLICY_DEFINITION.apply(renderer));
    119   }
    120 }
    121