Home | History | Annotate | Download | only in html
      1 // Copyright (c) 2013, Mike Samuel
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions
      6 // are met:
      7 //
      8 // Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 // Redistributions in binary form must reproduce the above copyright
     11 // notice, this list of conditions and the following disclaimer in the
     12 // documentation and/or other materials provided with the distribution.
     13 // Neither the name of the OWASP nor the names of its contributors may
     14 // be used to endorse or promote products derived from this software
     15 // without specific prior written permission.
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     19 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
     20 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     21 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     22 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     23 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
     24 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     25 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     26 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27 // POSSIBILITY OF SUCH DAMAGE.
     28 
     29 package org.owasp.html;
     30 
     31 import java.util.Arrays;
     32 import java.util.EnumMap;
     33 import java.util.Random;
     34 import java.util.regex.Pattern;
     35 
     36 import org.junit.Test;
     37 import org.owasp.html.CssTokens.TokenType;
     38 
     39 import com.google.common.collect.Maps;
     40 
     41 public class CssFuzzerTest extends FuzzyTestCase {
     42 
     43   private static final String[] TOKEN_PARTS = new String[] {
     44     "'", "\"", "<!--", "-->", "/*", "*/", "***", "//", "\r", "\n",
     45     "<", ">", "/", ",", ";", ":", "(", "url", "Url", ")", "[", "]", "{", "}",
     46     "\\", "\\a", "\\d", "\\0", " ", "\t", "42", ".", "ex", "auto", "foo", "BAr",
     47     "important", "!", "\ufeff", "\u0000", "\u00a0", "\ufffd", "\ud801\udc02",
     48     "\u007f", "\u000c", "CDATA", "style"
     49   };
     50 
     51   private static final String[] FREQUENT_TOKEN_PARTS = new String[] {
     52     "*/", " ", "\t", "\r", "\n",
     53   };
     54 
     55   private static final String[] DISALLOWED_IN_OUTPUT = {
     56     "</style", "<![CDATA[", "]]>", "\r", "\n",
     57   };
     58 
     59   final class Watcher implements Runnable {
     60     String input;
     61     long started;
     62 
     63     public void run() {
     64       synchronized (this) {
     65         try {
     66           while (true) {
     67             this.wait(1000 /* ms = 1s */);
     68             String input = this.input;
     69             if (input == null) { break; }  // Done
     70             long started = this.started;
     71             long now = System.nanoTime();
     72             if (now - started >= 1000000000L /* ns = 1s */) {
     73               System.err.println(
     74                   "`" + input + "` is slow. seed=" + CssFuzzerTest.this.seed);
     75             }
     76           }
     77         } catch (InterruptedException ex) {
     78           // Done
     79         }
     80       }
     81     }
     82   }
     83 
     84   @Test
     85   public final void testUnderStress() {
     86     Random r = this.rnd;
     87     Watcher watcher = new Watcher();
     88     Thread watcherThread = null;
     89     for (int run = 0, nRuns = (1 << 16); run < nRuns; ++run) {
     90       // Compose a random string from token parts.
     91       StringBuilder sb = new StringBuilder();
     92       int nParts = r.nextInt(64) + 16;
     93       for (int j = nParts; --j >= 0;) {
     94         int die = r.nextInt(32);
     95         switch (die) {
     96         case 0: sb.append((char) rnd.nextInt(0x80)); break;
     97         case 1: sb.append((char) rnd.nextInt(0x1800)); break;
     98         default:
     99           String[] arr = (die & 1) != 0 ? TOKEN_PARTS : FREQUENT_TOKEN_PARTS;
    100           sb.append(arr[rnd.nextInt(arr.length)]);
    101           break;
    102         }
    103       }
    104       String randomCss = sb.toString();
    105 
    106       synchronized (watcher) {
    107         watcher.input = randomCss;
    108         watcher.started = System.nanoTime();
    109       }
    110       if (watcherThread == null) {
    111         watcherThread = new Thread(watcher);
    112         watcherThread.setDaemon(true);
    113         watcherThread.start();
    114       }
    115 
    116       String msg = "seed=" + this.seed + ", css=`" + randomCss + "`";
    117       CssTokens tokens = CssTokens.lex(randomCss);
    118 
    119       // Test idempotent
    120       String renormalized = CssTokens.lex(tokens.normalizedCss).normalizedCss;
    121       if (!renormalized.equals(tokens.normalizedCss)) {
    122         if (!renormalized.equals(fixDigitSpaceUnit(tokens))) {
    123           for (CssTokens.TokenIterator it = tokens.iterator(); it.hasNext();
    124                it.advance()) {
    125             System.err.println(it.token() + ":" + it.type());
    126           }
    127           assertEquals(
    128               "not idempotent, " + msg,
    129               tokens.normalizedCss,
    130               renormalized);
    131         }
    132       }
    133 
    134       // Test normalized CSS does not contain HTML/XML breaking tokens.
    135       for (String disallowed : DISALLOWED_IN_OUTPUT) {
    136         assertFalse(
    137             "contains " + disallowed + ", " + msg,
    138             tokens.normalizedCss.contains(disallowed));
    139       }
    140 
    141       // Test that tokens are roughly well-formed.
    142       int nTokens = 0;
    143       for (CssTokens.TokenIterator it = tokens.iterator(); it.hasNext();) {
    144         CssTokens.TokenType type = it.type();
    145         String token = it.next();
    146         Pattern filter = TOKEN_TYPE_FILTERS.get(type);
    147         if (filter != null && !filter.matcher(token).matches()) {
    148           fail(type + " `" + token + "`, " + msg);
    149         }
    150         ++nTokens;
    151       }
    152 
    153       // Test that walking the bracket list works.
    154       int[] reverse = new int[nTokens];
    155       Arrays.fill(reverse, -1);
    156       for (int j = 0; j < nTokens; ++j) {
    157         int partner = tokens.brackets.partner(j);
    158         if (partner != -1) {
    159           reverse[partner] = j;
    160         }
    161       }
    162       for (int j = 0; j < nTokens; ++j) {
    163         if (reverse[j] != -1) {
    164           assertEquals(msg, reverse[reverse[j]], j);
    165         }
    166       }
    167     }
    168     synchronized (watcher) {
    169       watcher.input = null;
    170       watcher.notifyAll();
    171     }
    172   }
    173 
    174   private static final EnumMap<CssTokens.TokenType, Pattern> TOKEN_TYPE_FILTERS
    175     = Maps.newEnumMap(CssTokens.TokenType.class);
    176   static {
    177     String NUMBER = "-?(?:0|[1-9][0-9]*)(?:\\.[0-9]*[1-9])?(?:e-?[1-9][0-9]*)?";
    178     String IDENT_START = "[a-zA-Z_\\u0080-\udbff\udfff\\-]";
    179     String IDENT_PART = "(?:" + IDENT_START + "|[0-9])";
    180     String IDENT = IDENT_START + IDENT_PART + "*";
    181     TOKEN_TYPE_FILTERS.put(
    182         CssTokens.TokenType.AT, Pattern.compile("@" + IDENT));
    183     TOKEN_TYPE_FILTERS.put(
    184         CssTokens.TokenType.COLON, Pattern.compile(":"));
    185     TOKEN_TYPE_FILTERS.put(
    186         CssTokens.TokenType.COLUMN, Pattern.compile("\\|\\|"));
    187     TOKEN_TYPE_FILTERS.put(
    188         CssTokens.TokenType.COMMA, Pattern.compile(","));
    189     TOKEN_TYPE_FILTERS.put(
    190         CssTokens.TokenType.DELIM,
    191         Pattern.compile("[^\\w\u0000- \u0080-\uffff\\-]"));
    192     TOKEN_TYPE_FILTERS.put(
    193         CssTokens.TokenType.DIMENSION, Pattern.compile(NUMBER + "[a-z]+"));
    194     TOKEN_TYPE_FILTERS.put(
    195         CssTokens.TokenType.DOT_IDENT, Pattern.compile("\\." + IDENT));
    196     TOKEN_TYPE_FILTERS.put(
    197         CssTokens.TokenType.FUNCTION, Pattern.compile(IDENT + "[(]"));
    198     TOKEN_TYPE_FILTERS.put(
    199         CssTokens.TokenType.HASH_ID, Pattern.compile("#" + IDENT_PART + "+"));
    200     TOKEN_TYPE_FILTERS.put(
    201         CssTokens.TokenType.HASH_UNRESTRICTED,
    202         Pattern.compile("#[a-fA-F0-9]+"));
    203     TOKEN_TYPE_FILTERS.put(
    204         CssTokens.TokenType.IDENT,
    205         Pattern.compile(IDENT));
    206     TOKEN_TYPE_FILTERS.put(
    207         CssTokens.TokenType.LEFT_CURLY,
    208         Pattern.compile("[{]"));
    209     TOKEN_TYPE_FILTERS.put(
    210         CssTokens.TokenType.LEFT_PAREN,
    211         Pattern.compile("[(]"));
    212     TOKEN_TYPE_FILTERS.put(
    213         CssTokens.TokenType.LEFT_SQUARE,
    214         Pattern.compile("[\\[]"));
    215     TOKEN_TYPE_FILTERS.put(
    216         CssTokens.TokenType.MATCH,
    217         Pattern.compile("[~^$|*]="));
    218     TOKEN_TYPE_FILTERS.put(
    219         CssTokens.TokenType.NUMBER,
    220         Pattern.compile(NUMBER));
    221     TOKEN_TYPE_FILTERS.put(
    222         CssTokens.TokenType.PERCENTAGE,
    223         Pattern.compile(NUMBER + "%"));
    224     TOKEN_TYPE_FILTERS.put(
    225         CssTokens.TokenType.RIGHT_CURLY,
    226         Pattern.compile("[}]"));
    227     TOKEN_TYPE_FILTERS.put(
    228         CssTokens.TokenType.RIGHT_PAREN,
    229         Pattern.compile("[)]"));
    230     TOKEN_TYPE_FILTERS.put(
    231         CssTokens.TokenType.RIGHT_SQUARE,
    232         Pattern.compile("[\\]]"));
    233     TOKEN_TYPE_FILTERS.put(
    234         CssTokens.TokenType.SEMICOLON,
    235         Pattern.compile(";"));
    236     TOKEN_TYPE_FILTERS.put(
    237         CssTokens.TokenType.STRING,
    238         Pattern.compile("'(?:[^'\r\n\\\\]|\\\\[^\r\n])*'"));
    239     TOKEN_TYPE_FILTERS.put(
    240         CssTokens.TokenType.UNICODE_RANGE,
    241         Pattern.compile("U\\+[0-9a-f]{1,6}(?:-[0-9a-f]{1,6}|\\?{0,5})?"));
    242     TOKEN_TYPE_FILTERS.put(
    243         CssTokens.TokenType.URL,
    244         Pattern.compile("url\\('[0-9A-Za-z\\-_.~:/?#\\[\\]@!$&+,;=%]*'\\)"));
    245     TOKEN_TYPE_FILTERS.put(
    246         CssTokens.TokenType.WHITESPACE,
    247         Pattern.compile(" "));
    248   }
    249 
    250   /**
    251    * "1:NUMBER ex:IDENT" -> "1ex:DIMENSION" is a common source source of
    252    * a-idempotency, but not one that causes problems in practice.
    253    * This hack helps ignore it.
    254    */
    255   static String fixDigitSpaceUnit(CssTokens tokens) {
    256     StringBuilder sb = new StringBuilder();
    257     for (CssTokens.TokenIterator it = tokens.iterator(); it.hasNext();) {
    258       if (it.type() != TokenType.NUMBER) {
    259         sb.append(it.next());
    260       } else {
    261         do {
    262           sb.append(it.next());
    263         } while (it.hasNext() && it.type() == TokenType.NUMBER);
    264         if (it.hasNext() && it.type() == TokenType.WHITESPACE) {
    265           it.advance();
    266           String numberFollower = null;
    267           if (it.hasNext()) {
    268             String token = it.token();
    269             switch (it.type()) {
    270               case IDENT:
    271                 if (CssTokens.isWellKnownUnit(token)) {
    272                   numberFollower = token;
    273                   it.advance();
    274                   if (it.hasNext() && it.token().startsWith(".")) {
    275                     numberFollower += " ";
    276                   }
    277                   it.backup();
    278                 }
    279                 break;
    280               case FUNCTION:
    281                 String name = token.substring(0, token.length() - 1);
    282                 if (CssTokens.isWellKnownUnit(name)) {
    283                   numberFollower = token;
    284                 }
    285                 break;
    286               case DELIM:
    287                 if ("%".equals(token)) {
    288                   numberFollower = token;
    289                 }
    290                 break;
    291               default: break;
    292             }
    293           }
    294           if (numberFollower == null) {
    295             sb.append(' ');
    296           } else {
    297             // Drop the space and append a lower-case version of the unit.
    298             sb.append(Strings.toLowerCase(numberFollower));
    299             it.advance();
    300           }
    301         }
    302       }
    303     }
    304     return sb.toString();
    305   }
    306 }
    307