1 /* 2 ********************************************************************** 3 * Copyright (C) 2011-2011, International Business Machines Corporation 4 * and others. All Rights Reserved. 5 ********************************************************************** 6 ************************************************************************ 7 * Date Name Description 8 * 05/14/2011 grhoten Creation. 9 ************************************************************************/ 10 11 #include "unicode/utypes.h" 12 13 #if !UCONFIG_NO_BREAK_ITERATION 14 15 #include "dicttest.h" 16 #include "textfile.h" 17 #include "uvector.h" 18 #include "unicode/rbbi.h" 19 20 void DictionaryWordTest::TestThaiBreaks() { 21 UErrorCode status=U_ZERO_ERROR; 22 BreakIterator* b; 23 Locale locale = Locale("th"); 24 int32_t p, index; 25 UChar c[]= { 26 0x0E01, 0x0E39, 0x0020, 0x0E01, 0x0E34, 0x0E19, 0x0E01, 0x0E38, 0x0E49, 0x0E07, 0x0020, 0x0E1B, 27 0x0E34, 0x0E49, 0x0E48, 0x0E07, 0x0E2D, 0x0E22, 0x0E39, 0x0E48, 0x0E43, 0x0E19, 28 0x0E16, 0x0E49, 0x0E33, 0x0000 29 }; 30 int32_t expectedWordResult[] = { 31 2, 3, 6, 10, 11, 15, 17, 20, 22 32 }; 33 int32_t expectedLineResult[] = { 34 3, 6, 11, 15, 17, 20, 22 35 }; 36 37 int32_t size = u_strlen(c); 38 UnicodeString text=UnicodeString(c); 39 40 b = BreakIterator::createWordInstance(locale, status); 41 if (U_FAILURE(status)) { 42 errcheckln(status, "Unable to create thai word break iterator. - %s", u_errorName(status)); 43 return; 44 } 45 b->setText(text); 46 p = index = 0; 47 while ((p=b->next())!=BreakIterator::DONE && p < size) { 48 if (p != expectedWordResult[index++]) { 49 errln("Incorrect break given by thai word break iterator. Expected: %d Got: %d", expectedWordResult[index-1], p); 50 } 51 } 52 delete b; 53 54 b = BreakIterator::createLineInstance(locale, status); 55 if (U_FAILURE(status)) { 56 errln("Unable to create thai line break iterator."); 57 return; 58 } 59 b->setText(text); 60 p = index = 0; 61 while ((p=b->next())!=BreakIterator::DONE && p < size) { 62 if (p != expectedLineResult[index++]) { 63 errln("Incorrect break given by thai line break iterator. Expected: %d Got: %d", expectedLineResult[index-1], p); 64 } 65 } 66 67 delete b; 68 } 69 70 #define DICTIONARY_TEST_FILE "wordsegments.txt" 71 72 void DictionaryWordTest::TestWordBoundaries() { 73 UErrorCode status = U_ZERO_ERROR; 74 75 TextFile phrases(DICTIONARY_TEST_FILE, "UTF8", status); 76 if (U_FAILURE(status)) { 77 dataerrln("Can't open "DICTIONARY_TEST_FILE": %s; skipping test", 78 u_errorName(status)); 79 return; 80 } 81 82 // Due to how the word break iterator works, 83 // scripts for languages that use no spaces should use the correct dictionary by default. 84 BreakIterator *wb = BreakIterator::createWordInstance("en", status); 85 if (U_FAILURE(status)) { 86 dataerrln("Word break iterator can not be opened: %s; skipping test", 87 u_errorName(status)); 88 return; 89 } 90 91 int32_t pos, pIdx; 92 int32_t testLines = 0; 93 UnicodeString phrase; 94 while (phrases.readLineSkippingComments(phrase, status, FALSE) && U_SUCCESS(status)) { 95 UVector breaks(status); 96 97 for (pIdx = 0; pIdx < phrase.length(); pIdx++) { 98 if (phrase.charAt(pIdx) == 0x007C /* | */) { 99 breaks.addElement(pIdx, status); 100 phrase.remove(pIdx, 1); 101 } 102 } 103 breaks.addElement(pIdx, status); 104 105 wb->setText(phrase); 106 int32_t brkArrPos = 0; 107 while ((pos=wb->next())!=BreakIterator::DONE) { 108 int32_t expectedPos = breaks.elementAti(brkArrPos); 109 if (expectedPos != pos) { 110 errln("Incorrect forward word break on line %d. Expected: %d Got: %d", 111 phrases.getLineNumber(), breaks.elementAt(brkArrPos), pos); 112 } 113 brkArrPos++; 114 } 115 brkArrPos = breaks.size() - 1; 116 while ((pos=wb->previous())!=BreakIterator::DONE) { 117 brkArrPos--; 118 int32_t expectedPos = breaks.elementAti(brkArrPos); 119 if (expectedPos != pos) { 120 errln("Incorrect backward word break on line %d. Expected: %d Got: %d", 121 phrases.getLineNumber(), breaks.elementAt(brkArrPos), pos); 122 } 123 } 124 testLines++; 125 } 126 delete wb; 127 logln("%d tests were run.", testLines); 128 } 129 130 void DictionaryWordTest::runIndexedTest( int32_t index, UBool exec, const char* &name, char* /*par */) 131 { 132 if (exec) logln("TestSuite DictionaryWordTest: "); 133 TESTCASE_AUTO_BEGIN; 134 TESTCASE_AUTO(TestThaiBreaks); 135 TESTCASE_AUTO(TestWordBoundaries); 136 TESTCASE_AUTO_END; 137 } 138 139 140 #endif 141