Home | History | Annotate | Download | only in intltest
      1 /*
      2 **********************************************************************
      3 * Copyright (C) 2011-2011, International Business Machines Corporation
      4 * and others.  All Rights Reserved.
      5 **********************************************************************
      6 ************************************************************************
      7 *   Date          Name        Description
      8 *   05/14/2011    grhoten     Creation.
      9 ************************************************************************/
     10 
     11 #include "unicode/utypes.h"
     12 
     13 #if !UCONFIG_NO_BREAK_ITERATION
     14 
     15 #include "dicttest.h"
     16 #include "textfile.h"
     17 #include "uvector.h"
     18 #include "unicode/rbbi.h"
     19 
     20 void DictionaryWordTest::TestThaiBreaks() {
     21     UErrorCode status=U_ZERO_ERROR;
     22     BreakIterator* b;
     23     Locale locale = Locale("th");
     24     int32_t p, index;
     25     UChar c[]= {
     26             0x0E01, 0x0E39, 0x0020, 0x0E01, 0x0E34, 0x0E19, 0x0E01, 0x0E38, 0x0E49, 0x0E07, 0x0020, 0x0E1B,
     27             0x0E34, 0x0E49, 0x0E48, 0x0E07, 0x0E2D, 0x0E22, 0x0E39, 0x0E48, 0x0E43, 0x0E19,
     28             0x0E16, 0x0E49, 0x0E33, 0x0000
     29     };
     30     int32_t expectedWordResult[] = {
     31             2, 3, 6, 10, 11, 15, 17, 20, 22
     32     };
     33     int32_t expectedLineResult[] = {
     34             3, 6, 11, 15, 17, 20, 22
     35     };
     36 
     37     int32_t size = u_strlen(c);
     38     UnicodeString text=UnicodeString(c);
     39 
     40     b = BreakIterator::createWordInstance(locale, status);
     41     if (U_FAILURE(status)) {
     42         errcheckln(status, "Unable to create thai word break iterator. - %s", u_errorName(status));
     43         return;
     44     }
     45     b->setText(text);
     46     p = index = 0;
     47     while ((p=b->next())!=BreakIterator::DONE && p < size) {
     48         if (p != expectedWordResult[index++]) {
     49             errln("Incorrect break given by thai word break iterator. Expected: %d  Got: %d", expectedWordResult[index-1], p);
     50         }
     51     }
     52     delete b;
     53 
     54     b = BreakIterator::createLineInstance(locale, status);
     55     if (U_FAILURE(status)) {
     56         errln("Unable to create thai line break iterator.");
     57         return;
     58     }
     59     b->setText(text);
     60     p = index = 0;
     61     while ((p=b->next())!=BreakIterator::DONE && p < size) {
     62         if (p != expectedLineResult[index++]) {
     63             errln("Incorrect break given by thai line break iterator. Expected: %d  Got: %d", expectedLineResult[index-1], p);
     64         }
     65     }
     66 
     67     delete b;
     68 }
     69 
     70 #define DICTIONARY_TEST_FILE "wordsegments.txt"
     71 
     72 void DictionaryWordTest::TestWordBoundaries() {
     73     UErrorCode      status  = U_ZERO_ERROR;
     74 
     75     TextFile phrases(DICTIONARY_TEST_FILE, "UTF8", status);
     76     if (U_FAILURE(status)) {
     77         dataerrln("Can't open "DICTIONARY_TEST_FILE": %s; skipping test",
     78               u_errorName(status));
     79         return;
     80     }
     81 
     82     // Due to how the word break iterator works,
     83     // scripts for languages that use no spaces should use the correct dictionary by default.
     84     BreakIterator *wb = BreakIterator::createWordInstance("en", status);
     85     if (U_FAILURE(status)) {
     86         dataerrln("Word break iterator can not be opened: %s; skipping test",
     87               u_errorName(status));
     88         return;
     89     }
     90 
     91     int32_t pos, pIdx;
     92     int32_t testLines = 0;
     93     UnicodeString phrase;
     94     while (phrases.readLineSkippingComments(phrase, status, FALSE) && U_SUCCESS(status)) {
     95         UVector breaks(status);
     96 
     97         for (pIdx = 0; pIdx < phrase.length(); pIdx++) {
     98             if (phrase.charAt(pIdx) == 0x007C /* | */) {
     99                 breaks.addElement(pIdx, status);
    100                 phrase.remove(pIdx, 1);
    101             }
    102         }
    103         breaks.addElement(pIdx, status);
    104 
    105         wb->setText(phrase);
    106         int32_t brkArrPos = 0;
    107         while ((pos=wb->next())!=BreakIterator::DONE) {
    108             int32_t expectedPos = breaks.elementAti(brkArrPos);
    109             if (expectedPos != pos) {
    110                 errln("Incorrect forward word break on line %d. Expected: %d  Got: %d",
    111                     phrases.getLineNumber(), breaks.elementAt(brkArrPos), pos);
    112             }
    113             brkArrPos++;
    114         }
    115         brkArrPos = breaks.size() - 1;
    116         while ((pos=wb->previous())!=BreakIterator::DONE) {
    117             brkArrPos--;
    118             int32_t expectedPos = breaks.elementAti(brkArrPos);
    119             if (expectedPos != pos) {
    120                 errln("Incorrect backward word break on line %d. Expected: %d  Got: %d",
    121                     phrases.getLineNumber(), breaks.elementAt(brkArrPos), pos);
    122             }
    123         }
    124         testLines++;
    125     }
    126     delete wb;
    127     logln("%d tests were run.", testLines);
    128 }
    129 
    130 void DictionaryWordTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */)
    131 {
    132     if (exec) logln("TestSuite DictionaryWordTest: ");
    133     TESTCASE_AUTO_BEGIN;
    134     TESTCASE_AUTO(TestThaiBreaks);
    135     TESTCASE_AUTO(TestWordBoundaries);
    136     TESTCASE_AUTO_END;
    137 }
    138 
    139 
    140 #endif
    141