Home | History | Annotate | Download | only in smartselect
      1 // Copyright (C) 2017 The Android Open Source Project
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //      http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 syntax = "proto2";
     16 option optimize_for = LITE_RUNTIME;
     17 
     18 package libtextclassifier;
     19 
     20 // Represents a codepoint range [start, end) with its role for tokenization.
     21 message TokenizationCodepointRange {
     22   optional int32 start = 1;
     23   optional int32 end = 2;
     24 
     25   // Role of the codepoints in the range.
     26   enum Role {
     27     // Concatenates the codepoint to the current run of codepoints.
     28     DEFAULT_ROLE = 0;
     29 
     30     // Splits a run of codepoints before the current codepoint.
     31     SPLIT_BEFORE = 0x1;
     32 
     33     // Splits a run of codepoints after the current codepoint.
     34     SPLIT_AFTER = 0x2;
     35 
     36     // Discards the codepoint.
     37     DISCARD_CODEPOINT = 0x4;
     38 
     39     // Common values:
     40     // Splits on the characters and discards them. Good e.g. for the space
     41     // character.
     42     WHITESPACE_SEPARATOR = 0x7;
     43     // Each codepoint will be a separate token. Good e.g. for Chinese
     44     // characters.
     45     TOKEN_SEPARATOR = 0x3;
     46   }
     47   optional Role role = 3;
     48 }
     49