1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // http://code.google.com/p/protobuf/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: kenton (at) google.com (Kenton Varda) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 // 35 // Interface for manipulating databases of descriptors. 36 37 #ifndef GOOGLE_PROTOBUF_DESCRIPTOR_DATABASE_H__ 38 #define GOOGLE_PROTOBUF_DESCRIPTOR_DATABASE_H__ 39 40 #include <map> 41 #include <string> 42 #include <utility> 43 #include <vector> 44 #include <google/protobuf/descriptor.h> 45 46 namespace google { 47 namespace protobuf { 48 49 // Defined in this file. 50 class DescriptorDatabase; 51 class SimpleDescriptorDatabase; 52 class EncodedDescriptorDatabase; 53 class DescriptorPoolDatabase; 54 class MergedDescriptorDatabase; 55 56 // Abstract interface for a database of descriptors. 57 // 58 // This is useful if you want to create a DescriptorPool which loads 59 // descriptors on-demand from some sort of large database. If the database 60 // is large, it may be inefficient to enumerate every .proto file inside it 61 // calling DescriptorPool::BuildFile() for each one. Instead, a DescriptorPool 62 // can be created which wraps a DescriptorDatabase and only builds particular 63 // descriptors when they are needed. 64 class LIBPROTOBUF_EXPORT DescriptorDatabase { 65 public: 66 inline DescriptorDatabase() {} 67 virtual ~DescriptorDatabase(); 68 69 // Find a file by file name. Fills in in *output and returns true if found. 70 // Otherwise, returns false, leaving the contents of *output undefined. 71 virtual bool FindFileByName(const string& filename, 72 FileDescriptorProto* output) = 0; 73 74 // Find the file that declares the given fully-qualified symbol name. 75 // If found, fills in *output and returns true, otherwise returns false 76 // and leaves *output undefined. 77 virtual bool FindFileContainingSymbol(const string& symbol_name, 78 FileDescriptorProto* output) = 0; 79 80 // Find the file which defines an extension extending the given message type 81 // with the given field number. If found, fills in *output and returns true, 82 // otherwise returns false and leaves *output undefined. containing_type 83 // must be a fully-qualified type name. 84 virtual bool FindFileContainingExtension(const string& containing_type, 85 int field_number, 86 FileDescriptorProto* output) = 0; 87 88 // Finds the tag numbers used by all known extensions of 89 // extendee_type, and appends them to output in an undefined 90 // order. This method is best-effort: it's not guaranteed that the 91 // database will find all extensions, and it's not guaranteed that 92 // FindFileContainingExtension will return true on all of the found 93 // numbers. Returns true if the search was successful, otherwise 94 // returns false and leaves output unchanged. 95 // 96 // This method has a default implementation that always returns 97 // false. 98 virtual bool FindAllExtensionNumbers(const string& extendee_type, 99 vector<int>* output) { 100 return false; 101 } 102 103 private: 104 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DescriptorDatabase); 105 }; 106 107 // A DescriptorDatabase into which you can insert files manually. 108 // 109 // FindFileContainingSymbol() is fully-implemented. When you add a file, its 110 // symbols will be indexed for this purpose. Note that the implementation 111 // may return false positives, but only if it isn't possible for the symbol 112 // to be defined in any other file. In particular, if a file defines a symbol 113 // "Foo", then searching for "Foo.[anything]" will match that file. This way, 114 // the database does not need to aggressively index all children of a symbol. 115 // 116 // FindFileContainingExtension() is mostly-implemented. It works if and only 117 // if the original FieldDescriptorProto defining the extension has a 118 // fully-qualified type name in its "extendee" field (i.e. starts with a '.'). 119 // If the extendee is a relative name, SimpleDescriptorDatabase will not 120 // attempt to resolve the type, so it will not know what type the extension is 121 // extending. Therefore, calling FindFileContainingExtension() with the 122 // extension's containing type will never actually find that extension. Note 123 // that this is an unlikely problem, as all FileDescriptorProtos created by the 124 // protocol compiler (as well as ones created by calling 125 // FileDescriptor::CopyTo()) will always use fully-qualified names for all 126 // types. You only need to worry if you are constructing FileDescriptorProtos 127 // yourself, or are calling compiler::Parser directly. 128 class LIBPROTOBUF_EXPORT SimpleDescriptorDatabase : public DescriptorDatabase { 129 public: 130 SimpleDescriptorDatabase(); 131 ~SimpleDescriptorDatabase(); 132 133 // Adds the FileDescriptorProto to the database, making a copy. The object 134 // can be deleted after Add() returns. Returns false if the file conflicted 135 // with a file already in the database, in which case an error will have 136 // been written to GOOGLE_LOG(ERROR). 137 bool Add(const FileDescriptorProto& file); 138 139 // Adds the FileDescriptorProto to the database and takes ownership of it. 140 bool AddAndOwn(const FileDescriptorProto* file); 141 142 // implements DescriptorDatabase ----------------------------------- 143 bool FindFileByName(const string& filename, 144 FileDescriptorProto* output); 145 bool FindFileContainingSymbol(const string& symbol_name, 146 FileDescriptorProto* output); 147 bool FindFileContainingExtension(const string& containing_type, 148 int field_number, 149 FileDescriptorProto* output); 150 bool FindAllExtensionNumbers(const string& extendee_type, 151 vector<int>* output); 152 153 private: 154 // So that it can use DescriptorIndex. 155 friend class EncodedDescriptorDatabase; 156 157 // An index mapping file names, symbol names, and extension numbers to 158 // some sort of values. 159 template <typename Value> 160 class DescriptorIndex { 161 public: 162 // Helpers to recursively add particular descriptors and all their contents 163 // to the index. 164 bool AddFile(const FileDescriptorProto& file, 165 Value value); 166 bool AddSymbol(const string& name, Value value); 167 bool AddNestedExtensions(const DescriptorProto& message_type, 168 Value value); 169 bool AddExtension(const FieldDescriptorProto& field, 170 Value value); 171 172 Value FindFile(const string& filename); 173 Value FindSymbol(const string& name); 174 Value FindExtension(const string& containing_type, int field_number); 175 bool FindAllExtensionNumbers(const string& containing_type, 176 vector<int>* output); 177 178 private: 179 map<string, Value> by_name_; 180 map<string, Value> by_symbol_; 181 map<pair<string, int>, Value> by_extension_; 182 183 // Invariant: The by_symbol_ map does not contain any symbols which are 184 // prefixes of other symbols in the map. For example, "foo.bar" is a 185 // prefix of "foo.bar.baz" (but is not a prefix of "foo.barbaz"). 186 // 187 // This invariant is important because it means that given a symbol name, 188 // we can find a key in the map which is a prefix of the symbol in O(lg n) 189 // time, and we know that there is at most one such key. 190 // 191 // The prefix lookup algorithm works like so: 192 // 1) Find the last key in the map which is less than or equal to the 193 // search key. 194 // 2) If the found key is a prefix of the search key, then return it. 195 // Otherwise, there is no match. 196 // 197 // I am sure this algorithm has been described elsewhere, but since I 198 // wasn't able to find it quickly I will instead prove that it works 199 // myself. The key to the algorithm is that if a match exists, step (1) 200 // will find it. Proof: 201 // 1) Define the "search key" to be the key we are looking for, the "found 202 // key" to be the key found in step (1), and the "match key" to be the 203 // key which actually matches the serach key (i.e. the key we're trying 204 // to find). 205 // 2) The found key must be less than or equal to the search key by 206 // definition. 207 // 3) The match key must also be less than or equal to the search key 208 // (because it is a prefix). 209 // 4) The match key cannot be greater than the found key, because if it 210 // were, then step (1) of the algorithm would have returned the match 211 // key instead (since it finds the *greatest* key which is less than or 212 // equal to the search key). 213 // 5) Therefore, the found key must be between the match key and the search 214 // key, inclusive. 215 // 6) Since the search key must be a sub-symbol of the match key, if it is 216 // not equal to the match key, then search_key[match_key.size()] must 217 // be '.'. 218 // 7) Since '.' sorts before any other character that is valid in a symbol 219 // name, then if the found key is not equal to the match key, then 220 // found_key[match_key.size()] must also be '.', because any other value 221 // would make it sort after the search key. 222 // 8) Therefore, if the found key is not equal to the match key, then the 223 // found key must be a sub-symbol of the match key. However, this would 224 // contradict our map invariant which says that no symbol in the map is 225 // a sub-symbol of any other. 226 // 9) Therefore, the found key must match the match key. 227 // 228 // The above proof assumes the match key exists. In the case that the 229 // match key does not exist, then step (1) will return some other symbol. 230 // That symbol cannot be a super-symbol of the search key since if it were, 231 // then it would be a match, and we're assuming the match key doesn't exist. 232 // Therefore, step 2 will correctly return no match. 233 234 // Find the last entry in the by_symbol_ map whose key is less than or 235 // equal to the given name. 236 typename map<string, Value>::iterator FindLastLessOrEqual( 237 const string& name); 238 239 // True if either the arguments are equal or super_symbol identifies a 240 // parent symbol of sub_symbol (e.g. "foo.bar" is a parent of 241 // "foo.bar.baz", but not a parent of "foo.barbaz"). 242 bool IsSubSymbol(const string& sub_symbol, const string& super_symbol); 243 244 // Returns true if and only if all characters in the name are alphanumerics, 245 // underscores, or periods. 246 bool ValidateSymbolName(const string& name); 247 }; 248 249 250 DescriptorIndex<const FileDescriptorProto*> index_; 251 vector<const FileDescriptorProto*> files_to_delete_; 252 253 // If file is non-NULL, copy it into *output and return true, otherwise 254 // return false. 255 bool MaybeCopy(const FileDescriptorProto* file, 256 FileDescriptorProto* output); 257 258 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(SimpleDescriptorDatabase); 259 }; 260 261 // Very similar to SimpleDescriptorDatabase, but stores all the descriptors 262 // as raw bytes and generally tries to use as little memory as possible. 263 // 264 // The same caveats regarding FindFileContainingExtension() apply as with 265 // SimpleDescriptorDatabase. 266 class LIBPROTOBUF_EXPORT EncodedDescriptorDatabase : public DescriptorDatabase { 267 public: 268 EncodedDescriptorDatabase(); 269 ~EncodedDescriptorDatabase(); 270 271 // Adds the FileDescriptorProto to the database. The descriptor is provided 272 // in encoded form. The database does not make a copy of the bytes, nor 273 // does it take ownership; it's up to the caller to make sure the bytes 274 // remain valid for the life of the database. Returns false and logs an error 275 // if the bytes are not a valid FileDescriptorProto or if the file conflicted 276 // with a file already in the database. 277 bool Add(const void* encoded_file_descriptor, int size); 278 279 // Like Add(), but makes a copy of the data, so that the caller does not 280 // need to keep it around. 281 bool AddCopy(const void* encoded_file_descriptor, int size); 282 283 // Like FindFileContainingSymbol but returns only the name of the file. 284 bool FindNameOfFileContainingSymbol(const string& symbol_name, 285 string* output); 286 287 // implements DescriptorDatabase ----------------------------------- 288 bool FindFileByName(const string& filename, 289 FileDescriptorProto* output); 290 bool FindFileContainingSymbol(const string& symbol_name, 291 FileDescriptorProto* output); 292 bool FindFileContainingExtension(const string& containing_type, 293 int field_number, 294 FileDescriptorProto* output); 295 bool FindAllExtensionNumbers(const string& extendee_type, 296 vector<int>* output); 297 298 private: 299 SimpleDescriptorDatabase::DescriptorIndex<pair<const void*, int> > index_; 300 vector<void*> files_to_delete_; 301 302 // If encoded_file.first is non-NULL, parse the data into *output and return 303 // true, otherwise return false. 304 bool MaybeParse(pair<const void*, int> encoded_file, 305 FileDescriptorProto* output); 306 307 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(EncodedDescriptorDatabase); 308 }; 309 310 // A DescriptorDatabase that fetches files from a given pool. 311 class LIBPROTOBUF_EXPORT DescriptorPoolDatabase : public DescriptorDatabase { 312 public: 313 DescriptorPoolDatabase(const DescriptorPool& pool); 314 ~DescriptorPoolDatabase(); 315 316 // implements DescriptorDatabase ----------------------------------- 317 bool FindFileByName(const string& filename, 318 FileDescriptorProto* output); 319 bool FindFileContainingSymbol(const string& symbol_name, 320 FileDescriptorProto* output); 321 bool FindFileContainingExtension(const string& containing_type, 322 int field_number, 323 FileDescriptorProto* output); 324 bool FindAllExtensionNumbers(const string& extendee_type, 325 vector<int>* output); 326 327 private: 328 const DescriptorPool& pool_; 329 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(DescriptorPoolDatabase); 330 }; 331 332 // A DescriptorDatabase that wraps two or more others. It first searches the 333 // first database and, if that fails, tries the second, and so on. 334 class LIBPROTOBUF_EXPORT MergedDescriptorDatabase : public DescriptorDatabase { 335 public: 336 // Merge just two databases. The sources remain property of the caller. 337 MergedDescriptorDatabase(DescriptorDatabase* source1, 338 DescriptorDatabase* source2); 339 // Merge more than two databases. The sources remain property of the caller. 340 // The vector may be deleted after the constructor returns but the 341 // DescriptorDatabases need to stick around. 342 MergedDescriptorDatabase(const vector<DescriptorDatabase*>& sources); 343 ~MergedDescriptorDatabase(); 344 345 // implements DescriptorDatabase ----------------------------------- 346 bool FindFileByName(const string& filename, 347 FileDescriptorProto* output); 348 bool FindFileContainingSymbol(const string& symbol_name, 349 FileDescriptorProto* output); 350 bool FindFileContainingExtension(const string& containing_type, 351 int field_number, 352 FileDescriptorProto* output); 353 // Merges the results of calling all databases. Returns true iff any 354 // of the databases returned true. 355 bool FindAllExtensionNumbers(const string& extendee_type, 356 vector<int>* output); 357 358 private: 359 vector<DescriptorDatabase*> sources_; 360 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(MergedDescriptorDatabase); 361 }; 362 363 } // namespace protobuf 364 365 } // namespace google 366 #endif // GOOGLE_PROTOBUF_DESCRIPTOR_DATABASE_H__ 367