1 /* 2 * Copyright (C) 2010 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 /** 18 * This is a series of unit tests for snippet creation and highlighting 19 * 20 * You can run this entire test case with: 21 * runtest -c com.android.emailcommon.utility.TextUtilitiesTests email 22 */ 23 package com.android.emailcommon.utility; 24 25 import android.test.AndroidTestCase; 26 import android.test.suitebuilder.annotation.SmallTest; 27 28 import android.text.SpannableStringBuilder; 29 import android.text.style.BackgroundColorSpan; 30 31 @SmallTest 32 public class TextUtilitiesTests extends AndroidTestCase { 33 34 public void testPlainSnippet() { 35 // Test the simplest cases 36 assertEquals("", TextUtilities.makeSnippetFromPlainText(null)); 37 assertEquals("", TextUtilities.makeSnippetFromPlainText("")); 38 39 // Test handling leading, trailing, and duplicated whitespace 40 // Just test common whitespace characters; we calls Character.isWhitespace() internally, so 41 // other whitespace should be fine as well 42 assertEquals("", TextUtilities.makeSnippetFromPlainText(" \n\r\t\r\t\n")); 43 char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER; 44 assertEquals("foo", TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \n\t\r" + c)); 45 assertEquals("foo bar", 46 TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \r\n bar\n\t\r" + c)); 47 48 // Handle duplicated - and = 49 assertEquals("Foo-Bar=Bletch", 50 TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch")); 51 52 // We shouldn't muck with HTML entities 53 assertEquals(" >", TextUtilities.makeSnippetFromPlainText(" >")); 54 } 55 56 public void testHtmlSnippet() { 57 // Test the simplest cases 58 assertEquals("", TextUtilities.makeSnippetFromHtmlText(null)); 59 assertEquals("", TextUtilities.makeSnippetFromHtmlText("")); 60 61 // Test handling leading, trailing, and duplicated whitespace 62 // Just test common whitespace characters; we calls Character.isWhitespace() internally, so 63 // other whitespace should be fine as well 64 assertEquals("", TextUtilities.makeSnippetFromHtmlText(" \n\r\t\r\t\n")); 65 char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER; 66 assertEquals("foo", TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \n\t\r" + c)); 67 assertEquals("foo bar", 68 TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \r\n bar\n\t\r" + c)); 69 70 // Handle duplicated - and = 71 assertEquals("Foo-Bar=Bletch", 72 TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch")); 73 74 // We should catch HTML entities in these tests 75 assertEquals(">", TextUtilities.makeSnippetFromHtmlText(" >")); 76 assertEquals("&<> \"", TextUtilities.makeSnippetFromHtmlText("&<> "")); 77 // Test for decimal and hex entities 78 assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("ABC")); 79 assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("ABC")); 80 81 // Test for stripping simple tags 82 assertEquals("Hi there", TextUtilities.makeSnippetFromHtmlText("<html>Hi there</html>")); 83 // TODO: Add tests here if/when we find problematic HTML 84 } 85 86 public void testStripHtmlEntityEdgeCases() { 87 int[] skipCount = new int[1]; 88 // Bare & isn't an entity 89 char c = TextUtilities.stripHtmlEntity("&", 0, skipCount); 90 assertEquals(c, '&'); 91 assertEquals(0, skipCount[0]); 92 // Also not legal 93 c = TextUtilities.stripHtmlEntity("&;", 0, skipCount); 94 assertEquals(c, '&'); 95 assertEquals(0, skipCount[0]); 96 // This is an entity, but shouldn't be found 97 c = TextUtilities.stripHtmlEntity("&nosuch;", 0, skipCount); 98 assertEquals(c, '&'); 99 assertEquals(0, skipCount[0]); 100 // This is too long for an entity, even though it starts like a valid one 101 c = TextUtilities.stripHtmlEntity(" andmore;", 0, skipCount); 102 assertEquals(c, '&'); 103 assertEquals(0, skipCount[0]); 104 // Illegal decimal entities 105 c = TextUtilities.stripHtmlEntity("&#ABC", 0, skipCount); 106 assertEquals(c, '&'); 107 assertEquals(0, skipCount[0]); 108 c = TextUtilities.stripHtmlEntity("B", 0, skipCount); 109 assertEquals(c, '&'); 110 assertEquals(0, skipCount[0]); 111 // Illegal hex entities 112 c = TextUtilities.stripHtmlEntity("઼", 0, skipCount); 113 assertEquals(c, '&'); 114 assertEquals(0, skipCount[0]); 115 // Illegal hex entities 116 c = TextUtilities.stripHtmlEntity("G", 0, skipCount); 117 assertEquals(c, '&'); 118 assertEquals(0, skipCount[0]); 119 } 120 121 public void testStripContent() { 122 assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText( 123 "<html><style foo=\"bar\">Not</style>Visible</html>")); 124 assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText( 125 "<html><STYLE foo=\"bar\">Not</STYLE>Visible</html>")); 126 assertEquals("IsVisible", TextUtilities.makeSnippetFromHtmlText( 127 "<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>")); 128 assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText( 129 "<html>Visible<style foo=\"bar\">Not")); 130 assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText( 131 "<html>Visible<style foo=\"bar\">Not</style>AgainVisible")); 132 assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText( 133 "<html>Visible<style foo=\"bar\"/>AgainVisible")); 134 assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText( 135 "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible")); 136 } 137 138 /** 139 * We pass in HTML text in which an ampersand (@) is two chars ahead of the correct end position 140 * for the tag named 'tag' and then check whether the calculated end position matches the known 141 * correct position. HTML text not containing an ampersand should generate a calculated end of 142 * -1 143 * @param text the HTML text to test 144 */ 145 private void findTagEnd(String text, String tag) { 146 int calculatedEnd = TextUtilities.findTagEnd(text , tag, 0); 147 int knownEnd = text.indexOf('@') + 2; 148 if (knownEnd == 1) { 149 // indexOf will return -1, so we'll get 1 as knownEnd 150 assertEquals(-1, calculatedEnd); 151 } else { 152 assertEquals(calculatedEnd, knownEnd); 153 } 154 } 155 156 public void testFindTagEnd() { 157 // Test with <tag ... /> 158 findTagEnd("<tag foo=\"bar\"@ /> <blah blah>", "tag"); 159 // Test with <tag ...> ... </tag> 160 findTagEnd("<tag foo=\"bar\">some text@</tag>some more text", "tag"); 161 // Test with incomplete tag 162 findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag"); 163 // Test with space at end of tag 164 findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag "); 165 } 166 167 private void assertHighlightUnchanged(String str) { 168 assertEquals(str, TextUtilities.highlightTermsInHtml(str, null)); 169 } 170 171 public void testHighlightNoTerm() { 172 // With no search terms, the html should be unchanged 173 assertHighlightUnchanged("<html><style foo=\"bar\">Not</style>Visible</html>"); 174 assertHighlightUnchanged("<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>"); 175 assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not"); 176 assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not</style>AgainVisible"); 177 assertHighlightUnchanged("<html>Visible<style foo=\"bar\"/>AgainVisible"); 178 assertHighlightUnchanged( 179 "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible"); 180 } 181 182 public void testHighlightSingleTermHtml() { 183 String str = "<html><style foo=\"bar\">Not</style>Visible</html>"; 184 // Test that tags aren't highlighted 185 assertEquals(str, TextUtilities.highlightTermsInHtml( 186 "<html><style foo=\"bar\">Not</style>Visible</html>", "style")); 187 // Test that non-tags are 188 assertEquals("<html><style foo=\"bar\">Not</style><span " + 189 "style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING + 190 "\">Visi</span>ble</html>", 191 TextUtilities.highlightTermsInHtml(str, "Visi")); 192 assertEquals("<html>Visible<style foo=\"bar\">Not</style>A<span" + 193 " style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING + 194 "\">gain</span>Visible", 195 TextUtilities.highlightTermsInHtml( 196 "<html>Visible<style foo=\"bar\">Not</style>AgainVisible", "gain")); 197 } 198 199 public void brokentestHighlightSingleTermText() { 200 // Sprinkle text with a few HTML characters to make sure they're ignored 201 String text = "This< should be visibl>e"; 202 // We should find this, because search terms are case insensitive 203 SpannableStringBuilder ssb = 204 (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Visi"); 205 BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); 206 assertEquals(1, spans.length); 207 BackgroundColorSpan span = spans[0]; 208 assertEquals(text.indexOf("visi"), ssb.getSpanStart(span)); 209 assertEquals(text.indexOf("bl>e"), ssb.getSpanEnd(span)); 210 // Heh; this next test fails.. we use the search term! 211 assertEquals(text, ssb.toString()); 212 213 // Multiple instances of the term 214 text = "The research word should be a search result"; 215 ssb = (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Search"); 216 spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); 217 assertEquals(2, spans.length); 218 span = spans[0]; 219 assertEquals(text.indexOf("search word"), ssb.getSpanStart(span)); 220 assertEquals(text.indexOf(" word"), ssb.getSpanEnd(span)); 221 span = spans[1]; 222 assertEquals(text.indexOf("search result"), ssb.getSpanStart(span)); 223 assertEquals(text.indexOf(" result"), ssb.getSpanEnd(span)); 224 assertEquals(text, ssb.toString()); 225 } 226 227 public void brokentestHighlightTwoTermText() { 228 String text = "This should be visible"; 229 // We should find this, because search terms are case insensitive 230 SpannableStringBuilder ssb = 231 (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "visi should"); 232 BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); 233 assertEquals(2, spans.length); 234 BackgroundColorSpan span = spans[0]; 235 assertEquals(text.indexOf("should"), ssb.getSpanStart(span)); 236 assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span)); 237 span = spans[1]; 238 assertEquals(text.indexOf("visi"), ssb.getSpanStart(span)); 239 assertEquals(text.indexOf("ble"), ssb.getSpanEnd(span)); 240 assertEquals(text, ssb.toString()); 241 } 242 243 public void brokentestHighlightDuplicateTermText() { 244 String text = "This should be visible"; 245 // We should find this, because search terms are case insensitive 246 SpannableStringBuilder ssb = 247 (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should should"); 248 BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); 249 assertEquals(1, spans.length); 250 BackgroundColorSpan span = spans[0]; 251 assertEquals(text.indexOf("should"), ssb.getSpanStart(span)); 252 assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span)); 253 } 254 255 public void brokentestHighlightOverlapTermText() { 256 String text = "This shoulder is visible"; 257 // We should find this, because search terms are case insensitive 258 SpannableStringBuilder ssb = 259 (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should ould"); 260 BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); 261 assertEquals(1, spans.length); 262 BackgroundColorSpan span = spans[0]; 263 assertEquals(text.indexOf("should"), ssb.getSpanStart(span)); 264 assertEquals(text.indexOf("er is"), ssb.getSpanEnd(span)); 265 } 266 267 268 public void brokentestHighlightOverlapTermText2() { 269 String text = "The shoulders are visible"; 270 // We should find this, because search terms are case insensitive 271 SpannableStringBuilder ssb = 272 (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "shoulder shoulders"); 273 BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class); 274 assertEquals(2, spans.length); 275 BackgroundColorSpan span = spans[0]; 276 assertEquals(text.indexOf("shoulder"), ssb.getSpanStart(span)); 277 assertEquals(text.indexOf("s are visible"), ssb.getSpanEnd(span)); 278 span = spans[1]; 279 // Just the 's' should be caught in the 2nd span 280 assertEquals(text.indexOf("s are visible"), ssb.getSpanStart(span)); 281 assertEquals(text.indexOf(" are visible"), ssb.getSpanEnd(span)); 282 assertEquals(text, ssb.toString()); 283 } 284 // For debugging large HTML samples 285 286 // private String readLargeSnippet(String fn) { 287 // File file = mContext.getFileStreamPath(fn); 288 // StringBuffer sb = new StringBuffer(); 289 // BufferedReader reader = null; 290 // try { 291 // String text; 292 // reader = new BufferedReader(new FileReader(file)); 293 // while ((text = reader.readLine()) != null) { 294 // sb.append(text); 295 // sb.append(" "); 296 // } 297 // } catch (IOException e) { 298 // } 299 // return sb.toString(); 300 // } 301 } 302