Home | History | Annotate | Download | only in utility
      1 /*
      2  * Copyright (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /**
     18  * This is a series of unit tests for snippet creation and highlighting
     19  *
     20  * You can run this entire test case with:
     21  *   runtest -c com.android.emailcommon.utility.TextUtilitiesTests email
     22  */
     23 package com.android.emailcommon.utility;
     24 
     25 import android.test.AndroidTestCase;
     26 import android.test.suitebuilder.annotation.SmallTest;
     27 
     28 import android.text.SpannableStringBuilder;
     29 import android.text.style.BackgroundColorSpan;
     30 
     31 @SmallTest
     32 public class TextUtilitiesTests extends AndroidTestCase {
     33 
     34     public void testPlainSnippet() {
     35         // Test the simplest cases
     36         assertEquals("", TextUtilities.makeSnippetFromPlainText(null));
     37         assertEquals("", TextUtilities.makeSnippetFromPlainText(""));
     38 
     39         // Test handling leading, trailing, and duplicated whitespace
     40         // Just test common whitespace characters; we calls Character.isWhitespace() internally, so
     41         // other whitespace should be fine as well
     42         assertEquals("", TextUtilities.makeSnippetFromPlainText(" \n\r\t\r\t\n"));
     43         char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER;
     44         assertEquals("foo", TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \n\t\r" + c));
     45         assertEquals("foo bar",
     46                 TextUtilities.makeSnippetFromPlainText(c + "\r\n\tfoo \r\n bar\n\t\r" + c));
     47 
     48         // Handle duplicated - and =
     49         assertEquals("Foo-Bar=Bletch",
     50                 TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch"));
     51 
     52         // We shouldn't muck with HTML entities
     53         assertEquals(" >", TextUtilities.makeSnippetFromPlainText(" >"));
     54     }
     55 
     56     public void testHtmlSnippet() {
     57         // Test the simplest cases
     58         assertEquals("", TextUtilities.makeSnippetFromHtmlText(null));
     59         assertEquals("", TextUtilities.makeSnippetFromHtmlText(""));
     60 
     61         // Test handling leading, trailing, and duplicated whitespace
     62         // Just test common whitespace characters; we calls Character.isWhitespace() internally, so
     63         // other whitespace should be fine as well
     64         assertEquals("", TextUtilities.makeSnippetFromHtmlText(" \n\r\t\r\t\n"));
     65         char c = TextUtilities.NON_BREAKING_SPACE_CHARACTER;
     66         assertEquals("foo", TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \n\t\r" + c));
     67         assertEquals("foo bar",
     68                 TextUtilities.makeSnippetFromHtmlText(c + "\r\n\tfoo \r\n bar\n\t\r" + c));
     69 
     70         // Handle duplicated - and =
     71         assertEquals("Foo-Bar=Bletch",
     72                 TextUtilities.makeSnippetFromPlainText("Foo-----Bar=======Bletch"));
     73 
     74         // We should catch HTML entities in these tests
     75         assertEquals(">", TextUtilities.makeSnippetFromHtmlText(" >"));
     76         assertEquals("&<> \"", TextUtilities.makeSnippetFromHtmlText("&amp;&lt;&gt;&nbsp;&quot;"));
     77         // Test for decimal and hex entities
     78         assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("&#65;&#66;&#67;"));
     79         assertEquals("ABC", TextUtilities.makeSnippetFromHtmlText("&#x41;&#x42;&#x43;"));
     80 
     81         // Test for stripping simple tags
     82         assertEquals("Hi there", TextUtilities.makeSnippetFromHtmlText("<html>Hi there</html>"));
     83         // TODO: Add tests here if/when we find problematic HTML
     84     }
     85 
     86     public void testStripHtmlEntityEdgeCases() {
     87         int[] skipCount = new int[1];
     88         // Bare & isn't an entity
     89         char c = TextUtilities.stripHtmlEntity("&", 0, skipCount);
     90         assertEquals(c, '&');
     91         assertEquals(0, skipCount[0]);
     92         // Also not legal
     93         c = TextUtilities.stripHtmlEntity("&;", 0, skipCount);
     94         assertEquals(c, '&');
     95         assertEquals(0, skipCount[0]);
     96         // This is an entity, but shouldn't be found
     97         c = TextUtilities.stripHtmlEntity("&nosuch;", 0, skipCount);
     98         assertEquals(c, '&');
     99         assertEquals(0, skipCount[0]);
    100         // This is too long for an entity, even though it starts like a valid one
    101         c = TextUtilities.stripHtmlEntity("&nbspandmore;", 0, skipCount);
    102         assertEquals(c, '&');
    103         assertEquals(0, skipCount[0]);
    104         // Illegal decimal entities
    105         c = TextUtilities.stripHtmlEntity("&#ABC", 0, skipCount);
    106         assertEquals(c, '&');
    107         assertEquals(0, skipCount[0]);
    108         c = TextUtilities.stripHtmlEntity("&#12B", 0, skipCount);
    109         assertEquals(c, '&');
    110         assertEquals(0, skipCount[0]);
    111         // Illegal hex entities
    112         c = TextUtilities.stripHtmlEntity("&#xABC", 0, skipCount);
    113         assertEquals(c, '&');
    114         assertEquals(0, skipCount[0]);
    115         // Illegal hex entities
    116         c = TextUtilities.stripHtmlEntity("&#x19G", 0, skipCount);
    117         assertEquals(c, '&');
    118         assertEquals(0, skipCount[0]);
    119     }
    120 
    121     public void testStripContent() {
    122         assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
    123             "<html><style foo=\"bar\">Not</style>Visible</html>"));
    124         assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
    125             "<html><STYLE foo=\"bar\">Not</STYLE>Visible</html>"));
    126         assertEquals("IsVisible", TextUtilities.makeSnippetFromHtmlText(
    127             "<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>"));
    128         assertEquals("Visible", TextUtilities.makeSnippetFromHtmlText(
    129             "<html>Visible<style foo=\"bar\">Not"));
    130         assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
    131             "<html>Visible<style foo=\"bar\">Not</style>AgainVisible"));
    132         assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
    133             "<html>Visible<style foo=\"bar\"/>AgainVisible"));
    134         assertEquals("VisibleAgainVisible", TextUtilities.makeSnippetFromHtmlText(
    135             "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible"));
    136     }
    137 
    138     /**
    139      * We pass in HTML text in which an ampersand (@) is two chars ahead of the correct end position
    140      * for the tag named 'tag' and then check whether the calculated end position matches the known
    141      * correct position.  HTML text not containing an ampersand should generate a calculated end of
    142      * -1
    143      * @param text the HTML text to test
    144      */
    145     private void findTagEnd(String text, String tag) {
    146         int calculatedEnd = TextUtilities.findTagEnd(text , tag, 0);
    147         int knownEnd = text.indexOf('@') + 2;
    148         if (knownEnd == 1) {
    149             // indexOf will return -1, so we'll get 1 as knownEnd
    150             assertEquals(-1, calculatedEnd);
    151         } else {
    152             assertEquals(calculatedEnd, knownEnd);
    153         }
    154     }
    155 
    156     public void testFindTagEnd() {
    157         // Test with <tag ... />
    158         findTagEnd("<tag foo=\"bar\"@ /> <blah blah>", "tag");
    159         // Test with <tag ...> ... </tag>
    160         findTagEnd("<tag foo=\"bar\">some text@</tag>some more text", "tag");
    161         // Test with incomplete tag
    162         findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag");
    163         // Test with space at end of tag
    164         findTagEnd("<tag foo=\"bar\">some more text but no end tag", "tag ");
    165     }
    166 
    167     private void assertHighlightUnchanged(String str) {
    168         assertEquals(str, TextUtilities.highlightTermsInHtml(str, null));
    169     }
    170 
    171     public void testHighlightNoTerm() {
    172         // With no search terms, the html should be unchanged
    173         assertHighlightUnchanged("<html><style foo=\"bar\">Not</style>Visible</html>");
    174         assertHighlightUnchanged("<html><nostrip foo=\"bar\">Is</nostrip>Visible</html>");
    175         assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not");
    176         assertHighlightUnchanged("<html>Visible<style foo=\"bar\">Not</style>AgainVisible");
    177         assertHighlightUnchanged("<html>Visible<style foo=\"bar\"/>AgainVisible");
    178         assertHighlightUnchanged(
    179                 "<html>Visible<style foo=\"bar\"/><head><//blah<style>Not</head>AgainVisible");
    180     }
    181 
    182     public void testHighlightSingleTermHtml() {
    183         String str = "<html><style foo=\"bar\">Not</style>Visible</html>";
    184         // Test that tags aren't highlighted
    185         assertEquals(str, TextUtilities.highlightTermsInHtml(
    186                 "<html><style foo=\"bar\">Not</style>Visible</html>", "style"));
    187         // Test that non-tags are
    188         assertEquals("<html><style foo=\"bar\">Not</style><span " +
    189                 "style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING +
    190                 "\">Visi</span>ble</html>",
    191                 TextUtilities.highlightTermsInHtml(str, "Visi"));
    192         assertEquals("<html>Visible<style foo=\"bar\">Not</style>A<span" +
    193                 " style=\"background-color: " + TextUtilities.HIGHLIGHT_COLOR_STRING +
    194                 "\">gain</span>Visible",
    195                 TextUtilities.highlightTermsInHtml(
    196                         "<html>Visible<style foo=\"bar\">Not</style>AgainVisible", "gain"));
    197     }
    198 
    199     public void brokentestHighlightSingleTermText() {
    200         // Sprinkle text with a few HTML characters to make sure they're ignored
    201         String text = "This< should be visibl>e";
    202         // We should find this, because search terms are case insensitive
    203         SpannableStringBuilder ssb =
    204             (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Visi");
    205         BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
    206         assertEquals(1, spans.length);
    207         BackgroundColorSpan span = spans[0];
    208         assertEquals(text.indexOf("visi"), ssb.getSpanStart(span));
    209         assertEquals(text.indexOf("bl>e"), ssb.getSpanEnd(span));
    210         // Heh; this next test fails.. we use the search term!
    211         assertEquals(text, ssb.toString());
    212 
    213         // Multiple instances of the term
    214         text = "The research word should be a search result";
    215         ssb = (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "Search");
    216         spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
    217         assertEquals(2, spans.length);
    218         span = spans[0];
    219         assertEquals(text.indexOf("search word"), ssb.getSpanStart(span));
    220         assertEquals(text.indexOf(" word"), ssb.getSpanEnd(span));
    221         span = spans[1];
    222         assertEquals(text.indexOf("search result"), ssb.getSpanStart(span));
    223         assertEquals(text.indexOf(" result"), ssb.getSpanEnd(span));
    224         assertEquals(text, ssb.toString());
    225     }
    226 
    227     public void brokentestHighlightTwoTermText() {
    228         String text = "This should be visible";
    229         // We should find this, because search terms are case insensitive
    230         SpannableStringBuilder ssb =
    231             (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "visi should");
    232         BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
    233         assertEquals(2, spans.length);
    234         BackgroundColorSpan span = spans[0];
    235         assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
    236         assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span));
    237         span = spans[1];
    238         assertEquals(text.indexOf("visi"), ssb.getSpanStart(span));
    239         assertEquals(text.indexOf("ble"), ssb.getSpanEnd(span));
    240         assertEquals(text, ssb.toString());
    241     }
    242 
    243     public void brokentestHighlightDuplicateTermText() {
    244         String text = "This should be visible";
    245         // We should find this, because search terms are case insensitive
    246         SpannableStringBuilder ssb =
    247             (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should should");
    248         BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
    249         assertEquals(1, spans.length);
    250         BackgroundColorSpan span = spans[0];
    251         assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
    252         assertEquals(text.indexOf(" be"), ssb.getSpanEnd(span));
    253     }
    254 
    255     public void brokentestHighlightOverlapTermText() {
    256         String text = "This shoulder is visible";
    257         // We should find this, because search terms are case insensitive
    258         SpannableStringBuilder ssb =
    259             (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "should ould");
    260         BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
    261         assertEquals(1, spans.length);
    262         BackgroundColorSpan span = spans[0];
    263         assertEquals(text.indexOf("should"), ssb.getSpanStart(span));
    264         assertEquals(text.indexOf("er is"), ssb.getSpanEnd(span));
    265     }
    266 
    267 
    268     public void brokentestHighlightOverlapTermText2() {
    269         String text = "The shoulders are visible";
    270         // We should find this, because search terms are case insensitive
    271         SpannableStringBuilder ssb =
    272             (SpannableStringBuilder)TextUtilities.highlightTermsInText(text, "shoulder shoulders");
    273         BackgroundColorSpan[] spans = ssb.getSpans(0, ssb.length(), BackgroundColorSpan.class);
    274         assertEquals(2, spans.length);
    275         BackgroundColorSpan span = spans[0];
    276         assertEquals(text.indexOf("shoulder"), ssb.getSpanStart(span));
    277         assertEquals(text.indexOf("s are visible"), ssb.getSpanEnd(span));
    278         span = spans[1];
    279         // Just the 's' should be caught in the 2nd span
    280         assertEquals(text.indexOf("s are visible"), ssb.getSpanStart(span));
    281         assertEquals(text.indexOf(" are visible"), ssb.getSpanEnd(span));
    282         assertEquals(text, ssb.toString());
    283     }
    284     // For debugging large HTML samples
    285 
    286 //    private String readLargeSnippet(String fn) {
    287 //        File file = mContext.getFileStreamPath(fn);
    288 //        StringBuffer sb = new StringBuffer();
    289 //        BufferedReader reader = null;
    290 //        try {
    291 //            String text;
    292 //            reader = new BufferedReader(new FileReader(file));
    293 //            while ((text = reader.readLine()) != null) {
    294 //                sb.append(text);
    295 //                sb.append(" ");
    296 //            }
    297 //        } catch (IOException e) {
    298 //        }
    299 //        return sb.toString();
    300 //    }
    301  }
    302