Home | History | Annotate | Download | only in break-iterator
      1 // Copyright 2013 the V8 project authors. All rights reserved.
      2 // Redistribution and use in source and binary forms, with or without
      3 // modification, are permitted provided that the following conditions are
      4 // met:
      5 //
      6 //     * Redistributions of source code must retain the above copyright
      7 //       notice, this list of conditions and the following disclaimer.
      8 //     * Redistributions in binary form must reproduce the above
      9 //       copyright notice, this list of conditions and the following
     10 //       disclaimer in the documentation and/or other materials provided
     11 //       with the distribution.
     12 //     * Neither the name of Google Inc. nor the names of its
     13 //       contributors may be used to endorse or promote products derived
     14 //       from this software without specific prior written permission.
     15 //
     16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     27 
     28 // Segment plain Chinese sentence and check results.
     29 
     30 var iterator = new Intl.v8BreakIterator(['zh']);
     31 
     32 var textToSegment = '\u56FD\u52A1\u9662\u5173\u4E8E\u300A\u571F\u5730' +
     33                     '\u623F\u5C4B\u7BA1\u7406\u6761\u4F8B\u300B';
     34 iterator.adoptText(textToSegment);
     35 
     36 var slices = [];
     37 var types = [];
     38 var pos = iterator.first();
     39 while (pos !== -1) {
     40   var nextPos = iterator.next();
     41   if (nextPos === -1) break;
     42 
     43   slices.push(textToSegment.slice(pos, nextPos));
     44   types.push(iterator.breakType());
     45 
     46   pos = nextPos;
     47 }
     48 
     49 assertEquals('\u56FD\u52A1\u9662', slices[0]);
     50 assertEquals('\u5173\u4E8E', slices[1]);
     51 assertEquals('\u300A', slices[2]);
     52 assertEquals('\u571F\u5730', slices[3]);
     53 assertEquals('\u623F\u5C4B', slices[4]);
     54 assertEquals('\u7BA1\u7406', slices[5]);
     55 assertEquals('\u6761\u4F8B', slices[6]);
     56 assertEquals('\u300B', slices[7]);
     57 
     58 assertEquals('ideo', types[0]);
     59 assertEquals('ideo', types[1]);
     60 assertEquals('none', types[2]);
     61 assertEquals('ideo', types[3]);
     62 assertEquals('ideo', types[4]);
     63 assertEquals('none', types[types.length - 1]);
     64