Home | History | Annotate | Download | only in rbbi
      1 /*
      2  *******************************************************************************
      3  * Copyright (C) 1996-2012, International Business Machines Corporation and    *
      4  * others. All Rights Reserved.                                                *
      5  *******************************************************************************
      6  */
      7 package com.ibm.icu.dev.test.rbbi;
      8 
      9 import java.util.ListResourceBundle;
     10 
     11 import com.ibm.icu.dev.test.TestFmwk;
     12 import com.ibm.icu.text.BreakIterator;
     13 import com.ibm.icu.text.RuleBasedBreakIterator;
     14 
     15 // TODO: {dlf} this test currently doesn't test anything!
     16 // You'll notice that the resource that uses the dictionary isn't even on the resource path,
     17 // so the dictionary never gets used.  Good thing, too, because it would throw a security
     18 // exception if run with a security manager.  Not that it would matter, the dictionary
     19 // resource isn't even in the icu source tree!
     20 // In order to fix this:
     21 // 1) make sure english.dict matches the current dictionary format required by dbbi
     22 // 2) make sure english.dict gets included in icu4jtests.jar
     23 // 3) have this test use getResourceAsStream to get a stream on the dictionary, and
     24 //    directly instantiate a DictionaryBasedBreakIterator.  It can use the rules from
     25 //    the appropriate section of ResourceBundle_en_US_TEST.  I'd suggest just copying
     26 //    the rules into this file.
     27 // 4) change the test text by inserting '|' at word breaks, and '||' at line breaks.
     28 // 5) process this text to a) create tables of break indices, and b) clean up the test
     29 //    for the break iterator to work on
     30 //
     31 // This would NOT test the ability to load dictionary-based break iterators through our
     32 // normal resource mechanism.  One could install such a break iterator and its
     33 // resources into the icu4j jar, and it would work, but there's no way to register entire
     34 // resources from outside yet.  Even if there were, the access restrictions are a bit
     35 // difficult to manage, if one wanted to register a break iterator whose code and data
     36 // resides outside the icu4j jar.  Since the code to instantiate would be going through
     37 // two protection domains, each domain would have to allow access to the data-- but
     38 // icu4j's domain wouldn't know about ours.  So we could instantiate before registering
     39 // the break iterator, but this would mean we'd have to fully initialize the dictionary(s)
     40 // at instantiation time, rather than let this be deferred until they are actually needed.
     41 //
     42 // I've done items 2 and 3 above.  Unfortunately, since I haven't done item 1, the
     43 // dictionary builder crashes.  So for now I'm disabling this test.  This is not
     44 // that important, since we have a thai dictionary that we do test thoroughly.
     45 //
     46 
     47 public class SimpleBITest extends TestFmwk{
     48     public static final String testText =
     49 //        "The rain in Spain stays mainly on the plain.  The plains in Spain are mainly pained with rain.";
     50 //"one-two now--  Hah!  You owe me exactly $1,345.67...  Pay up, huh?  By the way, why don't I send you my re\u0301sume\u0301?  This is a line\r\nbreak.";
     51 //"nowisthetimeforallgoodmen...  tocometothehelpoftheircountry";
     52 "When, in the course of human events, it becomes necessary for one people to dissolve the political bonds which have "
     53 //"When,inthecourseofhumanevents,itbecomesnecessaryforonepeopletodissolvethepoliticalbondswhichhave"
     54 + "connectedthemwithanother,andtoassumeamongthepowersoftheearth,theseparateandequalstationtowhichthelaws"
     55 + "ofnatureandofnature'sGodentitlethem,adecentrespecttotheopinionsofmankindrequiresthattheyshoulddeclarethe"
     56 + "causeswhichimpelthemtotheseparation\n"
     57 + "Weholdthesetruthstobeself-evident,thatallmenarecreatedequal,thattheyareendowedbytheirCreatorwithcertain"
     58 + "unalienablerights,thatamongthesearelife,libertyandthepursuitofhappiness.Thattosecuretheserights,governmentsare"
     59 + "institutedamongmen,derivingtheirjustpowersfromtheconsentofthegoverned.Thatwheneveranyformofgovernment"
     60 + "becomesdestructivetotheseends,itistherightofthepeopletoalterortoabolishit,andtoinstitutenewgovernment,laying"
     61 + "itsfoundationonsuchprinciplesandorganizingitspowersinsuchform,astothemshallseemmostlikelytoeffecttheirsafety"
     62 + "andhappiness.Prudence,indeed,willdictatethatgovernmentslongestablishedshouldnotbechangedforlightandtransient"
     63 + "causes;andaccordinglyallexperiencehathshownthatmankindaremoredisposedtosuffer,whileevilsaresufferable,than"
     64 + "torightthemselvesbyabolishingtheformstowhichtheyareaccustomed.Butwhenalongtrainofabusesandusurpations,"
     65 + "pursuinginvariablythesameobjectevincesadesigntoreducethemunderabsolutedespotism,itistheirright,itistheirduty,"
     66 + "tothrowoffsuchgovernment,andtoprovidenewguardsfortheirfuturesecurity.--Suchhasbeenthepatientsufferanceof"
     67 + "thesecolonies;andsuchisnowthenecessitywhichconstrainsthemtoaltertheirformersystemsofgovernment.Thehistory"
     68 + "ofthepresentKingofGreatBritainisahistoryofrepeatedinjuriesandusurpations,allhavingindirectobjectthe"
     69 + "establishmentofanabsolutetyrannyoverthesestates.Toprovethis,letfactsbesubmittedtoacandidworld.\n"
     70 + "Hehasrefusedhisassenttolaws,themostwholesomeandnecessaryforthepublicgood.\n"
     71 + "Hehasforbiddenhisgovernorstopasslawsofimmediateandpressingimportance,unlesssuspendedintheiroperationtill"
     72 + "hisassentshouldbeobtained;andwhensosuspended,hehasutterlyneglectedtoattendtothem.\n"
     73 + "Hehasrefusedtopassotherlawsfortheaccommodationoflargedistrictsofpeople,unlessthosepeoplewouldrelinquish"
     74 + "therightofrepresentationinthelegislature,arightinestimabletothemandformidabletotyrantsonly.\n"
     75 + "Hehascalledtogetherlegislativebodiesatplacesunusual,uncomfortable,anddistantfromthedepositoryoftheirpublic"
     76 + "records,forthesolepurposeoffatiguingthemintocompliancewithhismeasures.\n"
     77 + "Hehasdissolvedrepresentativehousesrepeatedly,foropposingwithmanlyfirmnesshisinvasionsontherightsofthepeople.\n"
     78 + "Hehasrefusedforalongtime,aftersuchdissolutions,tocauseotherstobeelected;wherebythelegislativepowers,"
     79 + "incapableofannihilation,havereturnedtothepeopleatlargefortheirexercise;thestateremaininginthemeantimeexposed"
     80 + "toallthedangersofinvasionfromwithout,andconvulsionswithin.\n"
     81 + "Hehasendeavoredtopreventthepopulationofthesestates;forthatpurposeobstructingthelawsfornaturalizationof"
     82 + "foreigners;refusingtopassotherstoencouragetheirmigrationhither,andraisingtheconditionsofnewappropriationsof"
     83 + "lands.\n"
     84 + "Hehasobstructedtheadministrationofjustice,byrefusinghisassenttolawsforestablishingjudiciarypowers.\n"
     85 + "Hehasmadejudgesdependentonhiswillalone,forthetenureoftheiroffices,andtheamountandpaymentoftheirsalaries.\n"
     86 + "Hehaserectedamultitudeofnewoffices,andsenthitherswarmsofofficerstoharassourpeople,andeatouttheir"
     87 + "substance.\n"
     88 + "Hehaskeptamongus,intimesofpeace,standingarmieswithouttheconsentofourlegislature.\n"
     89 + "Hehasaffectedtorenderthemilitaryindependentofandsuperiortocivilpower.\n"
     90 + "Hehascombinedwithotherstosubjectustoajurisdictionforeigntoourconstitution,andunacknowledgedbyourlaws;"
     91 + "givinghisassenttotheiractsofpretendedlegislation:\n"
     92 + "Forquarteringlargebodiesofarmedtroopsamongus:\n"
     93 + "Forprotectingthem,bymocktrial,frompunishmentforanymurderswhichtheyshouldcommitontheinhabitantsofthese"
     94 + "states:\n"
     95 + "Forcuttingoffourtradewithallpartsoftheworld:\n"
     96 + "Forimposingtaxesonuswithoutourconsent:\n"
     97 + "Fordeprivingusinmanycases,ofthebenefitsoftrialbyjury:\n"
     98 + "Fortransportingusbeyondseastobetriedforpretendedoffenses:\n"
     99 + "ForabolishingthefreesystemofEnglishlawsinaneighboringprovince,establishingthereinanarbitrarygovernment,and"
    100 + "enlargingitsboundariessoastorenderitatonceanexampleandfitinstrumentforintroducingthesameabsoluteruleinthese"
    101 + "colonies:\n"
    102 + "Fortakingawayourcharters,abolishingourmostvaluablelaws,andalteringfundamentallytheformsofourgovernments:\n"
    103 + "Forsuspendingourownlegislatures,anddeclaringthemselvesinvestedwithpowertolegislateforusinallcaseswhatsoever.\n"
    104 + "Hehasabdicatedgovernmenthere,bydeclaringusoutofhisprotectionandwagingwaragainstus.\n"
    105 + "Hehasplunderedourseas,ravagedourcoasts,burnedourtowns,anddestroyedthelivesofourpeople.\n"
    106 + "Heisatthistimetransportinglargearmiesofforeignmercenariestocompletetheworksofdeath,desolationandtyranny,"
    107 + "alreadybegunwithcircumstancesofcrueltyandperfidyscarcelyparalleledinthemostbarbarousages,andtotalyunworth"
    108 + "theheadofacivilizednation.\n"
    109 + "Hehasconstrainedourfellowcitizenstakencaptiveonthehighseastobeararmsagainsttheircountry,tobecomethe"
    110 + "executionersoftheirfriendsandbrethren,ortofallthemselvesbytheirhands.\n"
    111 + "Hehasexciteddomesticinsurrectionsamongstus,andhasendeavoredtobringontheinhabitantsofourfrontiers,the"
    112 + "mercilessIndiansavages,whoseknownruleofwarfare,isundistinguisheddestructionofallages,sexesandconditions.\n"
    113 + "Ineverystageoftheseoppressionswehavepetitionedforredressinthemosthumbleterms:ourrepeatedpetitionshave"
    114 + "beenansweredonlybyrepeatedinjury.Aprince,whosecharacteristhusmarkedbyeveryactwhichmaydefineatyrant,is"
    115 + "unfittobetherulerofafreepeople.\n"
    116 + "NorhavewebeenwantinginattentiontoourBritishbrethren.Wehavewarnedthemfromtimetotimeofattemptsbytheir"
    117 + "legislaturetoextendanunwarrantablejurisdictionoverus.Wehaveremindedthemofthecircumstancesofouremigration"
    118 + "andsettlementhere.Wehaveappealedtotheirnativejusticeandmagnanimity,andwehaveconjuredthembythetiesofour"
    119 + "commonkindredtodisavowtheseusurpations,which,wouldinevitablyinterruptourconnectionsandcorrespondence.We"
    120 + "must,therefore,acquiesceinthenecessity,whichdenouncesourseparation,andholdthem,asweholdtherestofmankind,"
    121 + "enemiesinwar,inpeacefriends.\n"
    122 + "We,therefore,therepresentativesoftheUnitedStatesofAmerica,inGeneralCongress,assembled,appealingtothe"
    123 + "SupremeJudgeoftheworldfortherectitudeofourintentions,do,inthename,andbytheauthorityofthegoodpeopleof"
    124 + "thesecolonies,solemnlypublishanddeclare,thattheseunitedcoloniesare,andofrightoughttobefreeandindependent"
    125 + "states;thattheyareabsolvedfromallallegiancetotheBritishCrown,andthatallpoliticalconnectionbetweenthemandthe"
    126 + "stateofGreatBritain,isandoughttobetotallydissolved;andthatasfreeandindependentstates,theyhavefullpowerto"
    127 + "leveywar,concludepeace,contractalliances,establishcommerce,andtodoallotheractsandthingswhichindependent"
    128 + "statesmayofrightdo.Andforthesupportofthisdeclaration,withafirmrelianceontheprotectionofDivineProvidence,we"
    129 + "mutuallypledgetoeachotherourlives,ourfortunesandoursacredhonor.\n";
    130 
    131     public static void main(String[] args) throws Exception {
    132         new SimpleBITest().run(args);
    133     }
    134 
    135     protected boolean validate() {
    136         // TODO: remove when english.dict gets fixed
    137         return false;
    138     }
    139 
    140     private BreakIterator createTestIterator(int kind) {
    141         final String bname = "com.ibm.icu.dev.test.rbbi.BreakIteratorRules_en_US_TEST";
    142 
    143         BreakIterator iter = null;
    144 
    145         ListResourceBundle bundle = null;
    146         try {
    147             Class cls = Class.forName(bname);
    148             bundle = (ListResourceBundle)cls.newInstance();
    149         }
    150         catch (Exception e) {
    151             errln("could not create bundle: " + bname + "exception: " + e.getMessage());
    152             return null;
    153         }
    154 
    155         final String[] kindNames = {
    156             "Character", "Word", "Line", "Sentence"
    157         };
    158         String rulesName = kindNames[kind] + "BreakRules";
    159 
    160         String[] classNames = bundle.getStringArray("BreakIteratorClasses");
    161         String rules = bundle.getString(rulesName);
    162         if (classNames[kind].equals("RuleBasedBreakIterator")) {
    163             iter = new RuleBasedBreakIterator(rules);
    164         }
    165         if (iter == null) {
    166             errln("could not create iterator");
    167         }
    168 
    169         return iter;
    170     }
    171 
    172     public void testWordBreak() throws Exception {
    173         BreakIterator wordBreak = createTestIterator(BreakIterator.KIND_WORD);
    174         int breaks = doTest(wordBreak);
    175         logln(String.valueOf(breaks));
    176     }
    177 
    178     public void testLineBreak() throws Exception {
    179         BreakIterator lineBreak = createTestIterator(BreakIterator.KIND_LINE);
    180         int breaks = doTest(lineBreak);
    181         logln(String.valueOf(breaks));
    182     }
    183 
    184     public void testSentenceBreak() throws Exception {
    185         BreakIterator sentenceBreak = createTestIterator(BreakIterator.KIND_SENTENCE);
    186         int breaks = doTest(sentenceBreak);
    187         logln(String.valueOf(breaks));
    188     }
    189 
    190     private int doTest(BreakIterator bi) {
    191         // forward
    192         bi.setText(testText);
    193         int p = bi.first();
    194         int lastP = p;
    195         String fragment;
    196         int breaks = 0;
    197         logln("Forward...");
    198         while (p != BreakIterator.DONE) {
    199             p = bi.next();
    200             if (p != BreakIterator.DONE) {
    201                 fragment = testText.substring(lastP, p);
    202             } else {
    203                 fragment = testText.substring(lastP);
    204             }
    205             debugPrintln(": >" + fragment + "<");
    206             ++breaks;
    207             lastP = p;
    208         }
    209         return breaks;
    210     }
    211 
    212     private void debugPrintln(String s) {
    213         final String zeros = "0000";
    214         String temp;
    215         StringBuffer out = new StringBuffer();
    216         for (int i = 0; i < s.length(); i++) {
    217             char c = s.charAt(i);
    218             if (c >= ' ' && c < '\u007f')
    219                 out.append(c);
    220             else {
    221                 out.append("\\u");
    222                 temp = Integer.toHexString((int)c);
    223                 out.append(zeros.substring(0, 4 - temp.length()));
    224                 out.append(temp);
    225             }
    226         }
    227         logln(out.toString());
    228     }
    229 
    230 /*    private void debugPrintln2(String s) {
    231         StringBuffer out = new StringBuffer();
    232         for (int i = 0; i < s.length(); i++) {
    233             char c = s.charAt(i);
    234             if (c >= '\u0100')
    235                 out.append("<" + ((int)c - 0x100) + ">");
    236             else
    237                 out.append(c);
    238         }
    239         logln(out.toString());
    240     }*/
    241 }
    242 
    243