1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "annotator/cached-features.h" 18 19 #include "utils/base/logging.h" 20 #include "utils/tensor-view.h" 21 22 namespace libtextclassifier3 { 23 24 namespace { 25 26 int CalculateOutputFeaturesSize(const FeatureProcessorOptions* options, 27 int feature_vector_size) { 28 const bool bounds_sensitive_enabled = 29 options->bounds_sensitive_features() && 30 options->bounds_sensitive_features()->enabled(); 31 32 int num_extracted_tokens = 0; 33 if (bounds_sensitive_enabled) { 34 const FeatureProcessorOptions_::BoundsSensitiveFeatures* config = 35 options->bounds_sensitive_features(); 36 num_extracted_tokens += config->num_tokens_before(); 37 num_extracted_tokens += config->num_tokens_inside_left(); 38 num_extracted_tokens += config->num_tokens_inside_right(); 39 num_extracted_tokens += config->num_tokens_after(); 40 if (config->include_inside_bag()) { 41 ++num_extracted_tokens; 42 } 43 } else { 44 num_extracted_tokens = 2 * options->context_size() + 1; 45 } 46 47 int output_features_size = num_extracted_tokens * feature_vector_size; 48 49 if (bounds_sensitive_enabled && 50 options->bounds_sensitive_features()->include_inside_length()) { 51 ++output_features_size; 52 } 53 54 return output_features_size; 55 } 56 57 } // namespace 58 59 std::unique_ptr<CachedFeatures> CachedFeatures::Create( 60 const TokenSpan& extraction_span, 61 std::unique_ptr<std::vector<float>> features, 62 std::unique_ptr<std::vector<float>> padding_features, 63 const FeatureProcessorOptions* options, int feature_vector_size) { 64 const int min_feature_version = 65 options->bounds_sensitive_features() && 66 options->bounds_sensitive_features()->enabled() 67 ? 2 68 : 1; 69 if (options->feature_version() < min_feature_version) { 70 TC3_LOG(ERROR) << "Unsupported feature version."; 71 return nullptr; 72 } 73 74 std::unique_ptr<CachedFeatures> cached_features(new CachedFeatures()); 75 cached_features->extraction_span_ = extraction_span; 76 cached_features->features_ = std::move(features); 77 cached_features->padding_features_ = std::move(padding_features); 78 cached_features->options_ = options; 79 80 cached_features->output_features_size_ = 81 CalculateOutputFeaturesSize(options, feature_vector_size); 82 83 return cached_features; 84 } 85 86 void CachedFeatures::AppendClickContextFeaturesForClick( 87 int click_pos, std::vector<float>* output_features) const { 88 click_pos -= extraction_span_.first; 89 90 AppendFeaturesInternal( 91 /*intended_span=*/ExpandTokenSpan(SingleTokenSpan(click_pos), 92 options_->context_size(), 93 options_->context_size()), 94 /*read_mask_span=*/{0, TokenSpanSize(extraction_span_)}, output_features); 95 } 96 97 void CachedFeatures::AppendBoundsSensitiveFeaturesForSpan( 98 TokenSpan selected_span, std::vector<float>* output_features) const { 99 const FeatureProcessorOptions_::BoundsSensitiveFeatures* config = 100 options_->bounds_sensitive_features(); 101 102 selected_span.first -= extraction_span_.first; 103 selected_span.second -= extraction_span_.first; 104 105 // Append the features for tokens around the left bound. Masks out tokens 106 // after the right bound, so that if num_tokens_inside_left goes past it, 107 // padding tokens will be used. 108 AppendFeaturesInternal( 109 /*intended_span=*/{selected_span.first - config->num_tokens_before(), 110 selected_span.first + 111 config->num_tokens_inside_left()}, 112 /*read_mask_span=*/{0, selected_span.second}, output_features); 113 114 // Append the features for tokens around the right bound. Masks out tokens 115 // before the left bound, so that if num_tokens_inside_right goes past it, 116 // padding tokens will be used. 117 AppendFeaturesInternal( 118 /*intended_span=*/{selected_span.second - 119 config->num_tokens_inside_right(), 120 selected_span.second + config->num_tokens_after()}, 121 /*read_mask_span=*/{selected_span.first, TokenSpanSize(extraction_span_)}, 122 output_features); 123 124 if (config->include_inside_bag()) { 125 AppendBagFeatures(selected_span, output_features); 126 } 127 128 if (config->include_inside_length()) { 129 output_features->push_back( 130 static_cast<float>(TokenSpanSize(selected_span))); 131 } 132 } 133 134 void CachedFeatures::AppendFeaturesInternal( 135 const TokenSpan& intended_span, const TokenSpan& read_mask_span, 136 std::vector<float>* output_features) const { 137 const TokenSpan copy_span = 138 IntersectTokenSpans(intended_span, read_mask_span); 139 for (int i = intended_span.first; i < copy_span.first; ++i) { 140 AppendPaddingFeatures(output_features); 141 } 142 output_features->insert( 143 output_features->end(), 144 features_->begin() + copy_span.first * NumFeaturesPerToken(), 145 features_->begin() + copy_span.second * NumFeaturesPerToken()); 146 for (int i = copy_span.second; i < intended_span.second; ++i) { 147 AppendPaddingFeatures(output_features); 148 } 149 } 150 151 void CachedFeatures::AppendPaddingFeatures( 152 std::vector<float>* output_features) const { 153 output_features->insert(output_features->end(), padding_features_->begin(), 154 padding_features_->end()); 155 } 156 157 void CachedFeatures::AppendBagFeatures( 158 const TokenSpan& bag_span, std::vector<float>* output_features) const { 159 const int offset = output_features->size(); 160 output_features->resize(output_features->size() + NumFeaturesPerToken()); 161 for (int i = bag_span.first; i < bag_span.second; ++i) { 162 for (int j = 0; j < NumFeaturesPerToken(); ++j) { 163 (*output_features)[offset + j] += 164 (*features_)[i * NumFeaturesPerToken() + j] / TokenSpanSize(bag_span); 165 } 166 } 167 } 168 169 int CachedFeatures::NumFeaturesPerToken() const { 170 return padding_features_->size(); 171 } 172 173 } // namespace libtextclassifier3 174