Home | History | Annotate | Download | only in utility
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /**
     18  * This is a series of unit tests for snippet creation and highlighting
     19  *
     20  * You can run this entire test case with:
     21  *   runtest -c com.android.emailcommon.utility.TextUtilitiesTests email
     22  */
     23 package com.android.emailcommon.utility;
     24 
     25 import android.test.AndroidTestCase;
     26 import android.text.SpannableStringBuilder;
     27 import android.text.style.BackgroundColorSpan;
     28 
     29 public class TextUtilitiesTests extends AndroidTestCase {
     30 
     31     public void testPlainSnippet() {
     32         // Test the simplest cases
     33         assertEquals("", TextUtilities.makeSnippetFromPlainText(null));
     34         assertEquals("", TextUtilities.makeSnippetFromPlainText(""));
     35 
     36         // Test handling leading, trailing, and duplicated whitespace
     37         // Just test common whitespace characters; we calls Character.isWhitespace() internally, so
     38         // other whitespace should be fine as well
     39         assertEquals("", TextUtilities.makeSnippetFromPlainText(" \n\r\t\r\t\n"));
     40         char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER;
     41         assertEquals("foo", TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \n\t\r" + c));
     42         assertEquals("foo bar",
     43                 TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \r\n bar\n\t\r" + c));
     44 
     45         // Handle duplicated - and =
     46         assertEquals("Foo-Bar=Bletch",
     47                 TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch"));
     48 
     49         // We shouldn't muck with HTML entities
     50         assertEquals(" >", TextUtilities.makeSnippetFromPlainText(" >"));
     51     }
     52 
     53     public void testHtmlSnippet() {
     54         // Test the simplest cases
     55         assertEquals("", TextUtilities.makeSnippetFromHtmlText(null));
     56         assertEquals("", TextUtilities.makeSnippetFromHtmlText(""));
     57 
     58         // Test handling leading, trailing, and duplicated whitespace
     59         // Just test common whitespace characters; we calls Character.isWhitespace() internally, so
     60         // other whitespace should be fine as well
     61         assertEquals("", TextUtilities.makeSnippetFromHtmlText(" \n\r\t\r\t\n"));
     62         char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER;
     63         assertEquals("foo", TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \n\t\r" + c));
     64         assertEquals("foo bar",
     65                 TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \r\n bar\n\t\r" + c));
     66 
     67         // Handle duplicated - and =
     68         assertEquals("Foo-Bar=Bletch",
     69                 TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch"));
     70 
     71         // We should catch HTML entities in these tests
     72         assertEquals(">", TextUtilities.makeSnippetFromHtmlText(" >"));
     73         assertEquals("&<> \"", TextUtilities.makeSnippetFromHtmlText("&amp;&lt;&gt;&nbsp;&quot;"));
     74         // Test for decimal and hex entities
     75         assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("&#65;&#66;&#67;"));
     76         assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("&#x41;&#x42;&#x43;"));
     77 
     78         // Test for stripping simple tags
     79         assertEquals("Hi there", TextUtilities.makeSnippetFromHtmlText("<html>Hi there</html>"));
     80         // TODO: Add tests here if/when we find problematic HTML
     81     }
     82 
     83     public void testStripHtmlEntityEdgeCases() {
     84         int[] skipCount = new int[1];
     85         // Bare & isn't an entity
     86         char c = TextUtilities.stripHtmlEntity("&", 0, skipCount);
     87         assertEquals(c, '&');
     88         assertEquals(0, skipCount[0]);
     89         // Also not legal
     90         c = TextUtilities.stripHtmlEntity("&;", 0, skipCount);
     91         assertEquals(c, '&');
     92         assertEquals(0, skipCount[0]);
     93         // This is an entity, but shouldn't be found
     94         c = TextUtilities.stripHtmlEntity("&nosuch;", 0, skipCount);
     95         assertEquals(c, '&');
     96         assertEquals(0, skipCount[0]);
     97         // This is too long for an entity, even though it starts like a valid one
     98         c = TextUtilities.stripHtmlEntity("&nbspandmore;", 0, skipCount);
     99         assertEquals(c, '&');
    100         assertEquals(0, skipCount[0]);
    101         // Illegal decimal entities
    102         c = TextUtilities.stripHtmlEntity("&#ABC", 0, skipCount);
    103         assertEquals(c, '&');
    104         assertEquals(0, skipCount[0]);
    105         c = TextUtilities.stripHtmlEntity("&#12B", 0, skipCount);
    106         assertEquals(c, '&');
    107         assertEquals(0, skipCount[0]);
    108         // Illegal hex entities
    109         c = TextUtilities.stripHtmlEntity("&#xABC", 0, skipCount);
    110         assertEquals(c, '&');
    111         assertEquals(0, skipCount[0]);
    112         // Illegal hex entities
    113         c = TextUtilities.stripHtmlEntity("&#x19G", 0, skipCount);
    114         assertEquals(c, '&');
    115         assertEquals(0, skipCount[0]);
    116     }
    117 
    118     public void testStripContent() {
    119         assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
    120             "<html><style foo=\"bar\">Not</style>Visible</html>"));
    121         assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
    122             "<html><STYLE foo=\"bar\">Not</STYLE>Visible</html>"));
    123         assertEquals("IsVisible", TextUtilities.makeSnippetFromHtmlText(
    124             "<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>"));
    125         assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
    126             "<html>Visible<style foo=\"bar\">Not"));
    127         assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
    128             "<html>Visible<style foo=\"bar\">Not</style>AgainVisible"));
    129         assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
    130             "<html>Visible<style foo=\"bar\"/>AgainVisible"));
    131         assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
    132             "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible"));
    133     }
    134 
    135     /**
    136      * We pass in HTML text in which an ampersand (@) is two chars ahead of the correct end position
    137      * for the tag named 'tag' and then check whether the calculated end position matches the known
    138      * correct position.  HTML text not containing an ampersand should generate a calculated end of
    139      * -1
    140      * @param text the HTML text to test
    141      */
    142     private void findTagEnd(String text, String tag) {
    143         int calculatedEnd = TextUtilities.findTagEnd(text , tag, 0);
    144         int knownEnd = text.indexOf('@') + 2;
    145         if (knownEnd == 1) {
    146             // indexOf will return -1, so we'll get 1 as knownEnd
    147             assertEquals(-1, calculatedEnd);
    148         } else {
    149             assertEquals(calculatedEnd, knownEnd);
    150         }
    151     }
    152 
    153     public void testFindTagEnd() {
    154         // Test with <tag ... />
    155         findTagEnd("<tag foo=\"bar\"@ /> <blah blah>", "tag");
    156         // Test with <tag ...> ... </tag>
    157         findTagEnd("<tag foo=\"bar\">some text@</tag>some more text", "tag");
    158         // Test with incomplete tag
    159         findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag");
    160         // Test with space at end of tag
    161         findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag ");
    162     }
    163 
    164     private void assertHighlightUnchanged(String str) {
    165         assertEquals(str, TextUtilities.highlightTermsInHtml(str, null));
    166     }
    167 
    168     public void testHighlightNoTerm() {
    169         // With no search terms, the html should be unchanged
    170         assertHighlightUnchanged("<html><style foo=\"bar\">Not</style>Visible</html>");
    171         assertHighlightUnchanged("<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>");
    172         assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not");
    173         assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not</style>AgainVisible");
    174         assertHighlightUnchanged("<html>Visible<style foo=\"bar\"/>AgainVisible");
    175         assertHighlightUnchanged(
    176                 "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible");
    177     }
    178 
    179     public void testHighlightSingleTermHtml() {
    180         String str = "<html><style foo=\"bar\">Not</style>Visible</html>";
    181         // Test that tags aren't highlighted
    182         assertEquals(str, TextUtilities.highlightTermsInHtml(
    183                 "<html><style foo=\"bar\">Not</style>Visible</html>", "style"));
    184         // Test that non-tags are
    185         assertEquals("<html><style foo=\"bar\">Not</style><span " +
    186                 "style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING +
    187                 "\">Visi</span>ble</html>",
    188                 TextUtilities.highlightTermsInHtml(str, "Visi"));
    189         assertEquals("<html>Visible<style foo=\"bar\">Not</style>A<span" +
    190                 " style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING +
    191                 "\">gain</span>Visible",
    192                 TextUtilities.highlightTermsInHtml(
    193                         "<html>Visible<style foo=\"bar\">Not</style>AgainVisible", "gain"));
    194     }
    195 
    196     public void testHighlightSingleTermText() {
    197         // Sprinkle text with a few HTML characters to make sure they're ignored
    198         String text = "This< should be visibl>e";
    199         // We should find this, because search terms are case insensitive
    200         SpannableStringBuilder ssb =
    201             (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Visi");
    202         BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
    203         assertEquals(1, spans.length);
    204         BackgroundColorSpan span = spans[0];
    205         assertEquals(text.indexOf("visi"), ssb.getSpanStart(span));
    206         assertEquals(text.indexOf("bl>e"), ssb.getSpanEnd(span));
    207         // Heh; this next test fails.. we use the search term!
    208         assertEquals(text, ssb.toString());
    209 
    210         // Multiple instances of the term
    211         text = "The research word should be a search result";
    212         ssb = (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Search");
    213         spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
    214         assertEquals(2, spans.length);
    215         span = spans[0];
    216         assertEquals(text.indexOf("search word"), ssb.getSpanStart(span));
    217         assertEquals(text.indexOf(" word"), ssb.getSpanEnd(span));
    218         span = spans[1];
    219         assertEquals(text.indexOf("search result"), ssb.getSpanStart(span));
    220         assertEquals(text.indexOf(" result"), ssb.getSpanEnd(span));
    221         assertEquals(text, ssb.toString());
    222     }
    223 
    224     public void testHighlightTwoTermText() {
    225         String text = "This should be visible";
    226         // We should find this, because search terms are case insensitive
    227         SpannableStringBuilder ssb =
    228             (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "visi should");
    229         BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
    230         assertEquals(2, spans.length);
    231         BackgroundColorSpan span = spans[0];
    232         assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
    233         assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span));
    234         span = spans[1];
    235         assertEquals(text.indexOf("visi"), ssb.getSpanStart(span));
    236         assertEquals(text.indexOf("ble"), ssb.getSpanEnd(span));
    237         assertEquals(text, ssb.toString());
    238     }
    239 
    240     public void testHighlightDuplicateTermText() {
    241         String text = "This should be visible";
    242         // We should find this, because search terms are case insensitive
    243         SpannableStringBuilder ssb =
    244             (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should should");
    245         BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
    246         assertEquals(1, spans.length);
    247         BackgroundColorSpan span = spans[0];
    248         assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
    249         assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span));
    250     }
    251 
    252     public void testHighlightOverlapTermText() {
    253         String text = "This shoulder is visible";
    254         // We should find this, because search terms are case insensitive
    255         SpannableStringBuilder ssb =
    256             (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should ould");
    257         BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
    258         assertEquals(1, spans.length);
    259         BackgroundColorSpan span = spans[0];
    260         assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
    261         assertEquals(text.indexOf("er is"), ssb.getSpanEnd(span));
    262     }
    263 
    264 
    265     public void testHighlightOverlapTermText2() {
    266         String text = "The shoulders are visible";
    267         // We should find this, because search terms are case insensitive
    268         SpannableStringBuilder ssb =
    269             (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "shoulder shoulders");
    270         BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
    271         assertEquals(2, spans.length);
    272         BackgroundColorSpan span = spans[0];
    273         assertEquals(text.indexOf("shoulder"), ssb.getSpanStart(span));
    274         assertEquals(text.indexOf("s are visible"), ssb.getSpanEnd(span));
    275         span = spans[1];
    276         // Just the 's' should be caught in the 2nd span
    277         assertEquals(text.indexOf("s are visible"), ssb.getSpanStart(span));
    278         assertEquals(text.indexOf(" are visible"), ssb.getSpanEnd(span));
    279         assertEquals(text, ssb.toString());
    280     }
    281     // For debugging large HTML samples
    282 
    283 //    private String readLargeSnippet(String fn) {
    284 //        File file = mContext.getFileStreamPath(fn);
    285 //        StringBuffer sb = new StringBuffer();
    286 //        BufferedReader reader = null;
    287 //        try {
    288 //            String text;
    289 //            reader = new BufferedReader(new FileReader(file));
    290 //            while ((text = reader.readLine()) != null) {
    291 //                sb.append(text);
    292 //                sb.append(" ");
    293 //            }
    294 //        } catch (IOException e) {
    295 //        }
    296 //        return sb.toString();
    297 //    }
    298  }
    299