1 // Copyright (c) 2013, Mike Samuel 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions 6 // are met: 7 // 8 // Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // Redistributions in binary form must reproduce the above copyright 11 // notice, this list of conditions and the following disclaimer in the 12 // documentation and/or other materials provided with the distribution. 13 // Neither the name of the OWASP nor the names of its contributors may 14 // be used to endorse or promote products derived from this software 15 // without specific prior written permission. 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 19 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 20 // COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 21 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23 // LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 // LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 26 // ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 // POSSIBILITY OF SUCH DAMAGE. 28 29 package org.owasp.html; 30 31 import java.util.Arrays; 32 import java.util.EnumMap; 33 import java.util.Random; 34 import java.util.regex.Pattern; 35 36 import org.junit.Test; 37 import org.owasp.html.CssTokens.TokenType; 38 39 import com.google.common.collect.Maps; 40 41 public class CssFuzzerTest extends FuzzyTestCase { 42 43 private static final String[] TOKEN_PARTS = new String[] { 44 "'", "\"", "<!--", "-->", "/*", "*/", "***", "//", "\r", "\n", 45 "<", ">", "/", ",", ";", ":", "(", "url", "Url", ")", "[", "]", "{", "}", 46 "\\", "\\a", "\\d", "\\0", " ", "\t", "42", ".", "ex", "auto", "foo", "BAr", 47 "important", "!", "\ufeff", "\u0000", "\u00a0", "\ufffd", "\ud801\udc02", 48 "\u007f", "\u000c", "CDATA", "style" 49 }; 50 51 private static final String[] FREQUENT_TOKEN_PARTS = new String[] { 52 "*/", " ", "\t", "\r", "\n", 53 }; 54 55 private static final String[] DISALLOWED_IN_OUTPUT = { 56 "</style", "<![CDATA[", "]]>", "\r", "\n", 57 }; 58 59 final class Watcher implements Runnable { 60 String input; 61 long started; 62 63 public void run() { 64 synchronized (this) { 65 try { 66 while (true) { 67 this.wait(1000 /* ms = 1s */); 68 String input = this.input; 69 if (input == null) { break; } // Done 70 long started = this.started; 71 long now = System.nanoTime(); 72 if (now - started >= 1000000000L /* ns = 1s */) { 73 System.err.println( 74 "`" + input + "` is slow. seed=" + CssFuzzerTest.this.seed); 75 } 76 } 77 } catch (InterruptedException ex) { 78 // Done 79 } 80 } 81 } 82 } 83 84 @Test 85 public final void testUnderStress() { 86 Random r = this.rnd; 87 Watcher watcher = new Watcher(); 88 Thread watcherThread = null; 89 for (int run = 0, nRuns = (1 << 16); run < nRuns; ++run) { 90 // Compose a random string from token parts. 91 StringBuilder sb = new StringBuilder(); 92 int nParts = r.nextInt(64) + 16; 93 for (int j = nParts; --j >= 0;) { 94 int die = r.nextInt(32); 95 switch (die) { 96 case 0: sb.append((char) rnd.nextInt(0x80)); break; 97 case 1: sb.append((char) rnd.nextInt(0x1800)); break; 98 default: 99 String[] arr = (die & 1) != 0 ? TOKEN_PARTS : FREQUENT_TOKEN_PARTS; 100 sb.append(arr[rnd.nextInt(arr.length)]); 101 break; 102 } 103 } 104 String randomCss = sb.toString(); 105 106 synchronized (watcher) { 107 watcher.input = randomCss; 108 watcher.started = System.nanoTime(); 109 } 110 if (watcherThread == null) { 111 watcherThread = new Thread(watcher); 112 watcherThread.setDaemon(true); 113 watcherThread.start(); 114 } 115 116 String msg = "seed=" + this.seed + ", css=`" + randomCss + "`"; 117 CssTokens tokens = CssTokens.lex(randomCss); 118 119 // Test idempotent 120 String renormalized = CssTokens.lex(tokens.normalizedCss).normalizedCss; 121 if (!renormalized.equals(tokens.normalizedCss)) { 122 if (!renormalized.equals(fixDigitSpaceUnit(tokens))) { 123 for (CssTokens.TokenIterator it = tokens.iterator(); it.hasNext(); 124 it.advance()) { 125 System.err.println(it.token() + ":" + it.type()); 126 } 127 assertEquals( 128 "not idempotent, " + msg, 129 tokens.normalizedCss, 130 renormalized); 131 } 132 } 133 134 // Test normalized CSS does not contain HTML/XML breaking tokens. 135 for (String disallowed : DISALLOWED_IN_OUTPUT) { 136 assertFalse( 137 "contains " + disallowed + ", " + msg, 138 tokens.normalizedCss.contains(disallowed)); 139 } 140 141 // Test that tokens are roughly well-formed. 142 int nTokens = 0; 143 for (CssTokens.TokenIterator it = tokens.iterator(); it.hasNext();) { 144 CssTokens.TokenType type = it.type(); 145 String token = it.next(); 146 Pattern filter = TOKEN_TYPE_FILTERS.get(type); 147 if (filter != null && !filter.matcher(token).matches()) { 148 fail(type + " `" + token + "`, " + msg); 149 } 150 ++nTokens; 151 } 152 153 // Test that walking the bracket list works. 154 int[] reverse = new int[nTokens]; 155 Arrays.fill(reverse, -1); 156 for (int j = 0; j < nTokens; ++j) { 157 int partner = tokens.brackets.partner(j); 158 if (partner != -1) { 159 reverse[partner] = j; 160 } 161 } 162 for (int j = 0; j < nTokens; ++j) { 163 if (reverse[j] != -1) { 164 assertEquals(msg, reverse[reverse[j]], j); 165 } 166 } 167 } 168 synchronized (watcher) { 169 watcher.input = null; 170 watcher.notifyAll(); 171 } 172 } 173 174 private static final EnumMap<CssTokens.TokenType, Pattern> TOKEN_TYPE_FILTERS 175 = Maps.newEnumMap(CssTokens.TokenType.class); 176 static { 177 String NUMBER = "-?(?:0|[1-9][0-9]*)(?:\\.[0-9]*[1-9])?(?:e-?[1-9][0-9]*)?"; 178 String IDENT_START = "[a-zA-Z_\\u0080-\udbff\udfff\\-]"; 179 String IDENT_PART = "(?:" + IDENT_START + "|[0-9])"; 180 String IDENT = IDENT_START + IDENT_PART + "*"; 181 TOKEN_TYPE_FILTERS.put( 182 CssTokens.TokenType.AT, Pattern.compile("@" + IDENT)); 183 TOKEN_TYPE_FILTERS.put( 184 CssTokens.TokenType.COLON, Pattern.compile(":")); 185 TOKEN_TYPE_FILTERS.put( 186 CssTokens.TokenType.COLUMN, Pattern.compile("\\|\\|")); 187 TOKEN_TYPE_FILTERS.put( 188 CssTokens.TokenType.COMMA, Pattern.compile(",")); 189 TOKEN_TYPE_FILTERS.put( 190 CssTokens.TokenType.DELIM, 191 Pattern.compile("[^\\w\u0000- \u0080-\uffff\\-]")); 192 TOKEN_TYPE_FILTERS.put( 193 CssTokens.TokenType.DIMENSION, Pattern.compile(NUMBER + "[a-z]+")); 194 TOKEN_TYPE_FILTERS.put( 195 CssTokens.TokenType.DOT_IDENT, Pattern.compile("\\." + IDENT)); 196 TOKEN_TYPE_FILTERS.put( 197 CssTokens.TokenType.FUNCTION, Pattern.compile(IDENT + "[(]")); 198 TOKEN_TYPE_FILTERS.put( 199 CssTokens.TokenType.HASH_ID, Pattern.compile("#" + IDENT_PART + "+")); 200 TOKEN_TYPE_FILTERS.put( 201 CssTokens.TokenType.HASH_UNRESTRICTED, 202 Pattern.compile("#[a-fA-F0-9]+")); 203 TOKEN_TYPE_FILTERS.put( 204 CssTokens.TokenType.IDENT, 205 Pattern.compile(IDENT)); 206 TOKEN_TYPE_FILTERS.put( 207 CssTokens.TokenType.LEFT_CURLY, 208 Pattern.compile("[{]")); 209 TOKEN_TYPE_FILTERS.put( 210 CssTokens.TokenType.LEFT_PAREN, 211 Pattern.compile("[(]")); 212 TOKEN_TYPE_FILTERS.put( 213 CssTokens.TokenType.LEFT_SQUARE, 214 Pattern.compile("[\\[]")); 215 TOKEN_TYPE_FILTERS.put( 216 CssTokens.TokenType.MATCH, 217 Pattern.compile("[~^$|*]=")); 218 TOKEN_TYPE_FILTERS.put( 219 CssTokens.TokenType.NUMBER, 220 Pattern.compile(NUMBER)); 221 TOKEN_TYPE_FILTERS.put( 222 CssTokens.TokenType.PERCENTAGE, 223 Pattern.compile(NUMBER + "%")); 224 TOKEN_TYPE_FILTERS.put( 225 CssTokens.TokenType.RIGHT_CURLY, 226 Pattern.compile("[}]")); 227 TOKEN_TYPE_FILTERS.put( 228 CssTokens.TokenType.RIGHT_PAREN, 229 Pattern.compile("[)]")); 230 TOKEN_TYPE_FILTERS.put( 231 CssTokens.TokenType.RIGHT_SQUARE, 232 Pattern.compile("[\\]]")); 233 TOKEN_TYPE_FILTERS.put( 234 CssTokens.TokenType.SEMICOLON, 235 Pattern.compile(";")); 236 TOKEN_TYPE_FILTERS.put( 237 CssTokens.TokenType.STRING, 238 Pattern.compile("'(?:[^'\r\n\\\\]|\\\\[^\r\n])*'")); 239 TOKEN_TYPE_FILTERS.put( 240 CssTokens.TokenType.UNICODE_RANGE, 241 Pattern.compile("U\\+[0-9a-f]{1,6}(?:-[0-9a-f]{1,6}|\\?{0,5})?")); 242 TOKEN_TYPE_FILTERS.put( 243 CssTokens.TokenType.URL, 244 Pattern.compile("url\\('[0-9A-Za-z\\-_.~:/?#\\[\\]@!$&+,;=%]*'\\)")); 245 TOKEN_TYPE_FILTERS.put( 246 CssTokens.TokenType.WHITESPACE, 247 Pattern.compile(" ")); 248 } 249 250 /** 251 * "1:NUMBER ex:IDENT" -> "1ex:DIMENSION" is a common source source of 252 * a-idempotency, but not one that causes problems in practice. 253 * This hack helps ignore it. 254 */ 255 static String fixDigitSpaceUnit(CssTokens tokens) { 256 StringBuilder sb = new StringBuilder(); 257 for (CssTokens.TokenIterator it = tokens.iterator(); it.hasNext();) { 258 if (it.type() != TokenType.NUMBER) { 259 sb.append(it.next()); 260 } else { 261 do { 262 sb.append(it.next()); 263 } while (it.hasNext() && it.type() == TokenType.NUMBER); 264 if (it.hasNext() && it.type() == TokenType.WHITESPACE) { 265 it.advance(); 266 String numberFollower = null; 267 if (it.hasNext()) { 268 String token = it.token(); 269 switch (it.type()) { 270 case IDENT: 271 if (CssTokens.isWellKnownUnit(token)) { 272 numberFollower = token; 273 it.advance(); 274 if (it.hasNext() && it.token().startsWith(".")) { 275 numberFollower += " "; 276 } 277 it.backup(); 278 } 279 break; 280 case FUNCTION: 281 String name = token.substring(0, token.length() - 1); 282 if (CssTokens.isWellKnownUnit(name)) { 283 numberFollower = token; 284 } 285 break; 286 case DELIM: 287 if ("%".equals(token)) { 288 numberFollower = token; 289 } 290 break; 291 default: break; 292 } 293 } 294 if (numberFollower == null) { 295 sb.append(' '); 296 } else { 297 // Drop the space and append a lower-case version of the unit. 298 sb.append(Strings.toLowerCase(numberFollower)); 299 it.advance(); 300 } 301 } 302 } 303 } 304 return sb.toString(); 305 } 306 } 307