Home | History | Annotate | Download | only in smartselect
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef LIBTEXTCLASSIFIER_SMARTSELECT_TYPES_H_
     18 #define LIBTEXTCLASSIFIER_SMARTSELECT_TYPES_H_
     19 
     20 #include <ostream>
     21 #include <string>
     22 #include <utility>
     23 
     24 namespace libtextclassifier {
     25 
     26 constexpr int kInvalidIndex = -1;
     27 
     28 // Index for a 0-based array of tokens.
     29 using TokenIndex = int;
     30 
     31 // Index for a 0-based array of codepoints.
     32 using CodepointIndex = int;
     33 
     34 // Marks a span in a sequence of codepoints. The first element is the index of
     35 // the first codepoint of the span, and the second element is the index of the
     36 // codepoint one past the end of the span.
     37 using CodepointSpan = std::pair<CodepointIndex, CodepointIndex>;
     38 
     39 // Marks a span in a sequence of tokens. The first element is the index of the
     40 // first token in the span, and the second element is the index of the token one
     41 // past the end of the span.
     42 using TokenSpan = std::pair<TokenIndex, TokenIndex>;
     43 
     44 // Token holds a token, its position in the original string and whether it was
     45 // part of the input span.
     46 struct Token {
     47   std::string value;
     48   CodepointIndex start;
     49   CodepointIndex end;
     50 
     51   // Whether the token is a padding token.
     52   bool is_padding;
     53 
     54   // Default constructor constructs the padding-token.
     55   Token()
     56       : value(""), start(kInvalidIndex), end(kInvalidIndex), is_padding(true) {}
     57 
     58   Token(const std::string& arg_value, CodepointIndex arg_start,
     59         CodepointIndex arg_end)
     60       : value(arg_value), start(arg_start), end(arg_end), is_padding(false) {}
     61 
     62   bool operator==(const Token& other) const {
     63     return value == other.value && start == other.start && end == other.end &&
     64            is_padding == other.is_padding;
     65   }
     66 
     67   bool IsContainedInSpan(CodepointSpan span) const {
     68     return start >= span.first && end <= span.second;
     69   }
     70 };
     71 
     72 // Pretty-printing function for Token.
     73 inline std::ostream& operator<<(std::ostream& os, const Token& token) {
     74   return os << "Token(\"" << token.value << "\", " << token.start << ", "
     75             << token.end << ", is_padding=" << token.is_padding << ")";
     76 }
     77 
     78 }  // namespace libtextclassifier
     79 
     80 #endif  // LIBTEXTCLASSIFIER_SMARTSELECT_TYPES_H_
     81