1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // http://code.google.com/p/protobuf/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: kenton (at) google.com (Kenton Varda) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 // 35 // This file contains the CodedInputStream and CodedOutputStream classes, 36 // which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively, 37 // and allow you to read or write individual pieces of data in various 38 // formats. In particular, these implement the varint encoding for 39 // integers, a simple variable-length encoding in which smaller numbers 40 // take fewer bytes. 41 // 42 // Typically these classes will only be used internally by the protocol 43 // buffer library in order to encode and decode protocol buffers. Clients 44 // of the library only need to know about this class if they wish to write 45 // custom message parsing or serialization procedures. 46 // 47 // CodedOutputStream example: 48 // // Write some data to "myfile". First we write a 4-byte "magic number" 49 // // to identify the file type, then write a length-delimited string. The 50 // // string is composed of a varint giving the length followed by the raw 51 // // bytes. 52 // int fd = open("myfile", O_WRONLY); 53 // ZeroCopyOutputStream* raw_output = new FileOutputStream(fd); 54 // CodedOutputStream* coded_output = new CodedOutputStream(raw_output); 55 // 56 // int magic_number = 1234; 57 // char text[] = "Hello world!"; 58 // coded_output->WriteLittleEndian32(magic_number); 59 // coded_output->WriteVarint32(strlen(text)); 60 // coded_output->WriteRaw(text, strlen(text)); 61 // 62 // delete coded_output; 63 // delete raw_output; 64 // close(fd); 65 // 66 // CodedInputStream example: 67 // // Read a file created by the above code. 68 // int fd = open("myfile", O_RDONLY); 69 // ZeroCopyInputStream* raw_input = new FileInputStream(fd); 70 // CodedInputStream coded_input = new CodedInputStream(raw_input); 71 // 72 // coded_input->ReadLittleEndian32(&magic_number); 73 // if (magic_number != 1234) { 74 // cerr << "File not in expected format." << endl; 75 // return; 76 // } 77 // 78 // uint32 size; 79 // coded_input->ReadVarint32(&size); 80 // 81 // char* text = new char[size + 1]; 82 // coded_input->ReadRaw(buffer, size); 83 // text[size] = '\0'; 84 // 85 // delete coded_input; 86 // delete raw_input; 87 // close(fd); 88 // 89 // cout << "Text is: " << text << endl; 90 // delete [] text; 91 // 92 // For those who are interested, varint encoding is defined as follows: 93 // 94 // The encoding operates on unsigned integers of up to 64 bits in length. 95 // Each byte of the encoded value has the format: 96 // * bits 0-6: Seven bits of the number being encoded. 97 // * bit 7: Zero if this is the last byte in the encoding (in which 98 // case all remaining bits of the number are zero) or 1 if 99 // more bytes follow. 100 // The first byte contains the least-significant 7 bits of the number, the 101 // second byte (if present) contains the next-least-significant 7 bits, 102 // and so on. So, the binary number 1011000101011 would be encoded in two 103 // bytes as "10101011 00101100". 104 // 105 // In theory, varint could be used to encode integers of any length. 106 // However, for practicality we set a limit at 64 bits. The maximum encoded 107 // length of a number is thus 10 bytes. 108 109 #ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ 110 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ 111 112 #include <string> 113 #ifndef _MSC_VER 114 #include <sys/param.h> 115 #endif // !_MSC_VER 116 #include <google/protobuf/stubs/common.h> 117 #include <google/protobuf/stubs/common.h> // for GOOGLE_PREDICT_TRUE macro 118 119 namespace google { 120 121 namespace protobuf { 122 123 class DescriptorPool; 124 class MessageFactory; 125 126 namespace io { 127 128 // Defined in this file. 129 class CodedInputStream; 130 class CodedOutputStream; 131 132 // Defined in other files. 133 class ZeroCopyInputStream; // zero_copy_stream.h 134 class ZeroCopyOutputStream; // zero_copy_stream.h 135 136 // Class which reads and decodes binary data which is composed of varint- 137 // encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream. 138 // Most users will not need to deal with CodedInputStream. 139 // 140 // Most methods of CodedInputStream that return a bool return false if an 141 // underlying I/O error occurs or if the data is malformed. Once such a 142 // failure occurs, the CodedInputStream is broken and is no longer useful. 143 class LIBPROTOBUF_EXPORT CodedInputStream { 144 public: 145 // Create a CodedInputStream that reads from the given ZeroCopyInputStream. 146 explicit CodedInputStream(ZeroCopyInputStream* input); 147 148 // Create a CodedInputStream that reads from the given flat array. This is 149 // faster than using an ArrayInputStream. PushLimit(size) is implied by 150 // this constructor. 151 explicit CodedInputStream(const uint8* buffer, int size); 152 153 // Destroy the CodedInputStream and position the underlying 154 // ZeroCopyInputStream at the first unread byte. If an error occurred while 155 // reading (causing a method to return false), then the exact position of 156 // the input stream may be anywhere between the last value that was read 157 // successfully and the stream's byte limit. 158 ~CodedInputStream(); 159 160 161 // Skips a number of bytes. Returns false if an underlying read error 162 // occurs. 163 bool Skip(int count); 164 165 // Sets *data to point directly at the unread part of the CodedInputStream's 166 // underlying buffer, and *size to the size of that buffer, but does not 167 // advance the stream's current position. This will always either produce 168 // a non-empty buffer or return false. If the caller consumes any of 169 // this data, it should then call Skip() to skip over the consumed bytes. 170 // This may be useful for implementing external fast parsing routines for 171 // types of data not covered by the CodedInputStream interface. 172 bool GetDirectBufferPointer(const void** data, int* size); 173 174 // Like GetDirectBufferPointer, but this method is inlined, and does not 175 // attempt to Refresh() if the buffer is currently empty. 176 inline void GetDirectBufferPointerInline(const void** data, 177 int* size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; 178 179 // Read raw bytes, copying them into the given buffer. 180 bool ReadRaw(void* buffer, int size); 181 182 // Like ReadRaw, but reads into a string. 183 // 184 // Implementation Note: ReadString() grows the string gradually as it 185 // reads in the data, rather than allocating the entire requested size 186 // upfront. This prevents denial-of-service attacks in which a client 187 // could claim that a string is going to be MAX_INT bytes long in order to 188 // crash the server because it can't allocate this much space at once. 189 bool ReadString(string* buffer, int size); 190 // Like the above, with inlined optimizations. This should only be used 191 // by the protobuf implementation. 192 inline bool InternalReadStringInline(string* buffer, 193 int size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; 194 195 196 // Read a 32-bit little-endian integer. 197 bool ReadLittleEndian32(uint32* value); 198 // Read a 64-bit little-endian integer. 199 bool ReadLittleEndian64(uint64* value); 200 201 // These methods read from an externally provided buffer. The caller is 202 // responsible for ensuring that the buffer has sufficient space. 203 // Read a 32-bit little-endian integer. 204 static const uint8* ReadLittleEndian32FromArray(const uint8* buffer, 205 uint32* value); 206 // Read a 64-bit little-endian integer. 207 static const uint8* ReadLittleEndian64FromArray(const uint8* buffer, 208 uint64* value); 209 210 // Read an unsigned integer with Varint encoding, truncating to 32 bits. 211 // Reading a 32-bit value is equivalent to reading a 64-bit one and casting 212 // it to uint32, but may be more efficient. 213 bool ReadVarint32(uint32* value); 214 // Read an unsigned integer with Varint encoding. 215 bool ReadVarint64(uint64* value); 216 217 // Read a tag. This calls ReadVarint32() and returns the result, or returns 218 // zero (which is not a valid tag) if ReadVarint32() fails. Also, it updates 219 // the last tag value, which can be checked with LastTagWas(). 220 // Always inline because this is only called in once place per parse loop 221 // but it is called for every iteration of said loop, so it should be fast. 222 // GCC doesn't want to inline this by default. 223 uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE; 224 225 // Usually returns true if calling ReadVarint32() now would produce the given 226 // value. Will always return false if ReadVarint32() would not return the 227 // given value. If ExpectTag() returns true, it also advances past 228 // the varint. For best performance, use a compile-time constant as the 229 // parameter. 230 // Always inline because this collapses to a small number of instructions 231 // when given a constant parameter, but GCC doesn't want to inline by default. 232 bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; 233 234 // Like above, except this reads from the specified buffer. The caller is 235 // responsible for ensuring that the buffer is large enough to read a varint 236 // of the expected size. For best performance, use a compile-time constant as 237 // the expected tag parameter. 238 // 239 // Returns a pointer beyond the expected tag if it was found, or NULL if it 240 // was not. 241 static const uint8* ExpectTagFromArray( 242 const uint8* buffer, 243 uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; 244 245 // Usually returns true if no more bytes can be read. Always returns false 246 // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent 247 // call to LastTagWas() will act as if ReadTag() had been called and returned 248 // zero, and ConsumedEntireMessage() will return true. 249 bool ExpectAtEnd(); 250 251 // If the last call to ReadTag() returned the given value, returns true. 252 // Otherwise, returns false; 253 // 254 // This is needed because parsers for some types of embedded messages 255 // (with field type TYPE_GROUP) don't actually know that they've reached the 256 // end of a message until they see an ENDGROUP tag, which was actually part 257 // of the enclosing message. The enclosing message would like to check that 258 // tag to make sure it had the right number, so it calls LastTagWas() on 259 // return from the embedded parser to check. 260 bool LastTagWas(uint32 expected); 261 262 // When parsing message (but NOT a group), this method must be called 263 // immediately after MergeFromCodedStream() returns (if it returns true) 264 // to further verify that the message ended in a legitimate way. For 265 // example, this verifies that parsing did not end on an end-group tag. 266 // It also checks for some cases where, due to optimizations, 267 // MergeFromCodedStream() can incorrectly return true. 268 bool ConsumedEntireMessage(); 269 270 // Limits ---------------------------------------------------------- 271 // Limits are used when parsing length-delimited embedded messages. 272 // After the message's length is read, PushLimit() is used to prevent 273 // the CodedInputStream from reading beyond that length. Once the 274 // embedded message has been parsed, PopLimit() is called to undo the 275 // limit. 276 277 // Opaque type used with PushLimit() and PopLimit(). Do not modify 278 // values of this type yourself. The only reason that this isn't a 279 // struct with private internals is for efficiency. 280 typedef int Limit; 281 282 // Places a limit on the number of bytes that the stream may read, 283 // starting from the current position. Once the stream hits this limit, 284 // it will act like the end of the input has been reached until PopLimit() 285 // is called. 286 // 287 // As the names imply, the stream conceptually has a stack of limits. The 288 // shortest limit on the stack is always enforced, even if it is not the 289 // top limit. 290 // 291 // The value returned by PushLimit() is opaque to the caller, and must 292 // be passed unchanged to the corresponding call to PopLimit(). 293 Limit PushLimit(int byte_limit); 294 295 // Pops the last limit pushed by PushLimit(). The input must be the value 296 // returned by that call to PushLimit(). 297 void PopLimit(Limit limit); 298 299 // Returns the number of bytes left until the nearest limit on the 300 // stack is hit, or -1 if no limits are in place. 301 int BytesUntilLimit(); 302 303 // Total Bytes Limit ----------------------------------------------- 304 // To prevent malicious users from sending excessively large messages 305 // and causing integer overflows or memory exhaustion, CodedInputStream 306 // imposes a hard limit on the total number of bytes it will read. 307 308 // Sets the maximum number of bytes that this CodedInputStream will read 309 // before refusing to continue. To prevent integer overflows in the 310 // protocol buffers implementation, as well as to prevent servers from 311 // allocating enormous amounts of memory to hold parsed messages, the 312 // maximum message length should be limited to the shortest length that 313 // will not harm usability. The theoretical shortest message that could 314 // cause integer overflows is 512MB. The default limit is 64MB. Apps 315 // should set shorter limits if possible. If warning_threshold is not -1, 316 // a warning will be printed to stderr after warning_threshold bytes are 317 // read. An error will always be printed to stderr if the limit is 318 // reached. 319 // 320 // This is unrelated to PushLimit()/PopLimit(). 321 // 322 // Hint: If you are reading this because your program is printing a 323 // warning about dangerously large protocol messages, you may be 324 // confused about what to do next. The best option is to change your 325 // design such that excessively large messages are not necessary. 326 // For example, try to design file formats to consist of many small 327 // messages rather than a single large one. If this is infeasible, 328 // you will need to increase the limit. Chances are, though, that 329 // your code never constructs a CodedInputStream on which the limit 330 // can be set. You probably parse messages by calling things like 331 // Message::ParseFromString(). In this case, you will need to change 332 // your code to instead construct some sort of ZeroCopyInputStream 333 // (e.g. an ArrayInputStream), construct a CodedInputStream around 334 // that, then call Message::ParseFromCodedStream() instead. Then 335 // you can adjust the limit. Yes, it's more work, but you're doing 336 // something unusual. 337 void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold); 338 339 // Recursion Limit ------------------------------------------------- 340 // To prevent corrupt or malicious messages from causing stack overflows, 341 // we must keep track of the depth of recursion when parsing embedded 342 // messages and groups. CodedInputStream keeps track of this because it 343 // is the only object that is passed down the stack during parsing. 344 345 // Sets the maximum recursion depth. The default is 64. 346 void SetRecursionLimit(int limit); 347 348 // Increments the current recursion depth. Returns true if the depth is 349 // under the limit, false if it has gone over. 350 bool IncrementRecursionDepth(); 351 352 // Decrements the recursion depth. 353 void DecrementRecursionDepth(); 354 355 // Extension Registry ---------------------------------------------- 356 // ADVANCED USAGE: 99.9% of people can ignore this section. 357 // 358 // By default, when parsing extensions, the parser looks for extension 359 // definitions in the pool which owns the outer message's Descriptor. 360 // However, you may call SetExtensionRegistry() to provide an alternative 361 // pool instead. This makes it possible, for example, to parse a message 362 // using a generated class, but represent some extensions using 363 // DynamicMessage. 364 365 // Set the pool used to look up extensions. Most users do not need to call 366 // this as the correct pool will be chosen automatically. 367 // 368 // WARNING: It is very easy to misuse this. Carefully read the requirements 369 // below. Do not use this unless you are sure you need it. Almost no one 370 // does. 371 // 372 // Let's say you are parsing a message into message object m, and you want 373 // to take advantage of SetExtensionRegistry(). You must follow these 374 // requirements: 375 // 376 // The given DescriptorPool must contain m->GetDescriptor(). It is not 377 // sufficient for it to simply contain a descriptor that has the same name 378 // and content -- it must be the *exact object*. In other words: 379 // assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) == 380 // m->GetDescriptor()); 381 // There are two ways to satisfy this requirement: 382 // 1) Use m->GetDescriptor()->pool() as the pool. This is generally useless 383 // because this is the pool that would be used anyway if you didn't call 384 // SetExtensionRegistry() at all. 385 // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an 386 // "underlay". Read the documentation for DescriptorPool for more 387 // information about underlays. 388 // 389 // You must also provide a MessageFactory. This factory will be used to 390 // construct Message objects representing extensions. The factory's 391 // GetPrototype() MUST return non-NULL for any Descriptor which can be found 392 // through the provided pool. 393 // 394 // If the provided factory might return instances of protocol-compiler- 395 // generated (i.e. compiled-in) types, or if the outer message object m is 396 // a generated type, then the given factory MUST have this property: If 397 // GetPrototype() is given a Descriptor which resides in 398 // DescriptorPool::generated_pool(), the factory MUST return the same 399 // prototype which MessageFactory::generated_factory() would return. That 400 // is, given a descriptor for a generated type, the factory must return an 401 // instance of the generated class (NOT DynamicMessage). However, when 402 // given a descriptor for a type that is NOT in generated_pool, the factory 403 // is free to return any implementation. 404 // 405 // The reason for this requirement is that generated sub-objects may be 406 // accessed via the standard (non-reflection) extension accessor methods, 407 // and these methods will down-cast the object to the generated class type. 408 // If the object is not actually of that type, the results would be undefined. 409 // On the other hand, if an extension is not compiled in, then there is no 410 // way the code could end up accessing it via the standard accessors -- the 411 // only way to access the extension is via reflection. When using reflection, 412 // DynamicMessage and generated messages are indistinguishable, so it's fine 413 // if these objects are represented using DynamicMessage. 414 // 415 // Using DynamicMessageFactory on which you have called 416 // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the 417 // above requirement. 418 // 419 // If either pool or factory is NULL, both must be NULL. 420 // 421 // Note that this feature is ignored when parsing "lite" messages as they do 422 // not have descriptors. 423 void SetExtensionRegistry(DescriptorPool* pool, MessageFactory* factory); 424 425 // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool 426 // has been provided. 427 const DescriptorPool* GetExtensionPool(); 428 429 // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no 430 // factory has been provided. 431 MessageFactory* GetExtensionFactory(); 432 433 private: 434 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream); 435 436 ZeroCopyInputStream* input_; 437 const uint8* buffer_; 438 const uint8* buffer_end_; // pointer to the end of the buffer. 439 int total_bytes_read_; // total bytes read from input_, including 440 // the current buffer 441 442 // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here 443 // so that we can BackUp() on destruction. 444 int overflow_bytes_; 445 446 // LastTagWas() stuff. 447 uint32 last_tag_; // result of last ReadTag(). 448 449 // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly 450 // at EOF, or by ExpectAtEnd() when it returns true. This happens when we 451 // reach the end of a message and attempt to read another tag. 452 bool legitimate_message_end_; 453 454 // See EnableAliasing(). 455 bool aliasing_enabled_; 456 457 // Limits 458 Limit current_limit_; // if position = -1, no limit is applied 459 460 // For simplicity, if the current buffer crosses a limit (either a normal 461 // limit created by PushLimit() or the total bytes limit), buffer_size_ 462 // only tracks the number of bytes before that limit. This field 463 // contains the number of bytes after it. Note that this implies that if 464 // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've 465 // hit a limit. However, if both are zero, it doesn't necessarily mean 466 // we aren't at a limit -- the buffer may have ended exactly at the limit. 467 int buffer_size_after_limit_; 468 469 // Maximum number of bytes to read, period. This is unrelated to 470 // current_limit_. Set using SetTotalBytesLimit(). 471 int total_bytes_limit_; 472 int total_bytes_warning_threshold_; 473 474 // Current recursion depth, controlled by IncrementRecursionDepth() and 475 // DecrementRecursionDepth(). 476 int recursion_depth_; 477 // Recursion depth limit, set by SetRecursionLimit(). 478 int recursion_limit_; 479 480 // See SetExtensionRegistry(). 481 const DescriptorPool* extension_pool_; 482 MessageFactory* extension_factory_; 483 484 // Private member functions. 485 486 // Advance the buffer by a given number of bytes. 487 void Advance(int amount); 488 489 // Back up input_ to the current buffer position. 490 void BackUpInputToCurrentPosition(); 491 492 // Recomputes the value of buffer_size_after_limit_. Must be called after 493 // current_limit_ or total_bytes_limit_ changes. 494 void RecomputeBufferLimits(); 495 496 // Writes an error message saying that we hit total_bytes_limit_. 497 void PrintTotalBytesLimitError(); 498 499 // Called when the buffer runs out to request more data. Implies an 500 // Advance(BufferSize()). 501 bool Refresh(); 502 503 // When parsing varints, we optimize for the common case of small values, and 504 // then optimize for the case when the varint fits within the current buffer 505 // piece. The Fallback method is used when we can't use the one-byte 506 // optimization. The Slow method is yet another fallback when the buffer is 507 // not large enough. Making the slow path out-of-line speeds up the common 508 // case by 10-15%. The slow path is fairly uncommon: it only triggers when a 509 // message crosses multiple buffers. 510 bool ReadVarint32Fallback(uint32* value); 511 bool ReadVarint64Fallback(uint64* value); 512 bool ReadVarint32Slow(uint32* value); 513 bool ReadVarint64Slow(uint64* value); 514 bool ReadLittleEndian32Fallback(uint32* value); 515 bool ReadLittleEndian64Fallback(uint64* value); 516 // Fallback/slow methods for reading tags. These do not update last_tag_, 517 // but will set legitimate_message_end_ if we are at the end of the input 518 // stream. 519 uint32 ReadTagFallback(); 520 uint32 ReadTagSlow(); 521 bool ReadStringFallback(string* buffer, int size); 522 523 // Return the size of the buffer. 524 int BufferSize() const; 525 526 static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB 527 528 static const int kDefaultTotalBytesWarningThreshold = 32 << 20; // 32MB 529 static const int kDefaultRecursionLimit = 64; 530 }; 531 532 // Class which encodes and writes binary data which is composed of varint- 533 // encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream. 534 // Most users will not need to deal with CodedOutputStream. 535 // 536 // Most methods of CodedOutputStream which return a bool return false if an 537 // underlying I/O error occurs. Once such a failure occurs, the 538 // CodedOutputStream is broken and is no longer useful. The Write* methods do 539 // not return the stream status, but will invalidate the stream if an error 540 // occurs. The client can probe HadError() to determine the status. 541 // 542 // Note that every method of CodedOutputStream which writes some data has 543 // a corresponding static "ToArray" version. These versions write directly 544 // to the provided buffer, returning a pointer past the last written byte. 545 // They require that the buffer has sufficient capacity for the encoded data. 546 // This allows an optimization where we check if an output stream has enough 547 // space for an entire message before we start writing and, if there is, we 548 // call only the ToArray methods to avoid doing bound checks for each 549 // individual value. 550 // i.e., in the example above: 551 // 552 // CodedOutputStream coded_output = new CodedOutputStream(raw_output); 553 // int magic_number = 1234; 554 // char text[] = "Hello world!"; 555 // 556 // int coded_size = sizeof(magic_number) + 557 // CodedOutputStream::Varint32Size(strlen(text)) + 558 // strlen(text); 559 // 560 // uint8* buffer = 561 // coded_output->GetDirectBufferForNBytesAndAdvance(coded_size); 562 // if (buffer != NULL) { 563 // // The output stream has enough space in the buffer: write directly to 564 // // the array. 565 // buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number, 566 // buffer); 567 // buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer); 568 // buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer); 569 // } else { 570 // // Make bound-checked writes, which will ask the underlying stream for 571 // // more space as needed. 572 // coded_output->WriteLittleEndian32(magic_number); 573 // coded_output->WriteVarint32(strlen(text)); 574 // coded_output->WriteRaw(text, strlen(text)); 575 // } 576 // 577 // delete coded_output; 578 class LIBPROTOBUF_EXPORT CodedOutputStream { 579 public: 580 // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream. 581 explicit CodedOutputStream(ZeroCopyOutputStream* output); 582 583 // Destroy the CodedOutputStream and position the underlying 584 // ZeroCopyOutputStream immediately after the last byte written. 585 ~CodedOutputStream(); 586 587 // Skips a number of bytes, leaving the bytes unmodified in the underlying 588 // buffer. Returns false if an underlying write error occurs. This is 589 // mainly useful with GetDirectBufferPointer(). 590 bool Skip(int count); 591 592 // Sets *data to point directly at the unwritten part of the 593 // CodedOutputStream's underlying buffer, and *size to the size of that 594 // buffer, but does not advance the stream's current position. This will 595 // always either produce a non-empty buffer or return false. If the caller 596 // writes any data to this buffer, it should then call Skip() to skip over 597 // the consumed bytes. This may be useful for implementing external fast 598 // serialization routines for types of data not covered by the 599 // CodedOutputStream interface. 600 bool GetDirectBufferPointer(void** data, int* size); 601 602 // If there are at least "size" bytes available in the current buffer, 603 // returns a pointer directly into the buffer and advances over these bytes. 604 // The caller may then write directly into this buffer (e.g. using the 605 // *ToArray static methods) rather than go through CodedOutputStream. If 606 // there are not enough bytes available, returns NULL. The return pointer is 607 // invalidated as soon as any other non-const method of CodedOutputStream 608 // is called. 609 inline uint8* GetDirectBufferForNBytesAndAdvance(int size); 610 611 // Write raw bytes, copying them from the given buffer. 612 void WriteRaw(const void* buffer, int size); 613 // Like WriteRaw() but writing directly to the target array. 614 // This is _not_ inlined, as the compiler often optimizes memcpy into inline 615 // copy loops. Since this gets called by every field with string or bytes 616 // type, inlining may lead to a significant amount of code bloat, with only a 617 // minor performance gain. 618 static uint8* WriteRawToArray(const void* buffer, int size, uint8* target); 619 620 // Equivalent to WriteRaw(str.data(), str.size()). 621 void WriteString(const string& str); 622 // Like WriteString() but writing directly to the target array. 623 static uint8* WriteStringToArray(const string& str, uint8* target); 624 625 626 // Write a 32-bit little-endian integer. 627 void WriteLittleEndian32(uint32 value); 628 // Like WriteLittleEndian32() but writing directly to the target array. 629 static uint8* WriteLittleEndian32ToArray(uint32 value, uint8* target); 630 // Write a 64-bit little-endian integer. 631 void WriteLittleEndian64(uint64 value); 632 // Like WriteLittleEndian64() but writing directly to the target array. 633 static uint8* WriteLittleEndian64ToArray(uint64 value, uint8* target); 634 635 // Write an unsigned integer with Varint encoding. Writing a 32-bit value 636 // is equivalent to casting it to uint64 and writing it as a 64-bit value, 637 // but may be more efficient. 638 void WriteVarint32(uint32 value); 639 // Like WriteVarint32() but writing directly to the target array. 640 static uint8* WriteVarint32ToArray(uint32 value, uint8* target); 641 // Write an unsigned integer with Varint encoding. 642 void WriteVarint64(uint64 value); 643 // Like WriteVarint64() but writing directly to the target array. 644 static uint8* WriteVarint64ToArray(uint64 value, uint8* target); 645 646 // Equivalent to WriteVarint32() except when the value is negative, 647 // in which case it must be sign-extended to a full 10 bytes. 648 void WriteVarint32SignExtended(int32 value); 649 // Like WriteVarint32SignExtended() but writing directly to the target array. 650 static uint8* WriteVarint32SignExtendedToArray(int32 value, uint8* target); 651 652 // This is identical to WriteVarint32(), but optimized for writing tags. 653 // In particular, if the input is a compile-time constant, this method 654 // compiles down to a couple instructions. 655 // Always inline because otherwise the aformentioned optimization can't work, 656 // but GCC by default doesn't want to inline this. 657 void WriteTag(uint32 value); 658 // Like WriteTag() but writing directly to the target array. 659 static uint8* WriteTagToArray( 660 uint32 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; 661 662 // Returns the number of bytes needed to encode the given value as a varint. 663 static int VarintSize32(uint32 value); 664 // Returns the number of bytes needed to encode the given value as a varint. 665 static int VarintSize64(uint64 value); 666 667 // If negative, 10 bytes. Otheriwse, same as VarintSize32(). 668 static int VarintSize32SignExtended(int32 value); 669 670 // Returns the total number of bytes written since this object was created. 671 inline int ByteCount() const; 672 673 // Returns true if there was an underlying I/O error since this object was 674 // created. 675 bool HadError() const { return had_error_; } 676 677 private: 678 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream); 679 680 ZeroCopyOutputStream* output_; 681 uint8* buffer_; 682 int buffer_size_; 683 int total_bytes_; // Sum of sizes of all buffers seen so far. 684 bool had_error_; // Whether an error occurred during output. 685 686 // Advance the buffer by a given number of bytes. 687 void Advance(int amount); 688 689 // Called when the buffer runs out to request more data. Implies an 690 // Advance(buffer_size_). 691 bool Refresh(); 692 693 static uint8* WriteVarint32FallbackToArray(uint32 value, uint8* target); 694 695 // Always-inlined versions of WriteVarint* functions so that code can be 696 // reused, while still controlling size. For instance, WriteVarint32ToArray() 697 // should not directly call this: since it is inlined itself, doing so 698 // would greatly increase the size of generated code. Instead, it should call 699 // WriteVarint32FallbackToArray. Meanwhile, WriteVarint32() is already 700 // out-of-line, so it should just invoke this directly to avoid any extra 701 // function call overhead. 702 static uint8* WriteVarint32FallbackToArrayInline( 703 uint32 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; 704 static uint8* WriteVarint64ToArrayInline( 705 uint64 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; 706 707 static int VarintSize32Fallback(uint32 value); 708 }; 709 710 // inline methods ==================================================== 711 // The vast majority of varints are only one byte. These inline 712 // methods optimize for that case. 713 714 inline bool CodedInputStream::ReadVarint32(uint32* value) { 715 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) { 716 *value = *buffer_; 717 Advance(1); 718 return true; 719 } else { 720 return ReadVarint32Fallback(value); 721 } 722 } 723 724 inline bool CodedInputStream::ReadVarint64(uint64* value) { 725 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) { 726 *value = *buffer_; 727 Advance(1); 728 return true; 729 } else { 730 return ReadVarint64Fallback(value); 731 } 732 } 733 734 // static 735 inline const uint8* CodedInputStream::ReadLittleEndian32FromArray( 736 const uint8* buffer, 737 uint32* value) { 738 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ 739 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN 740 memcpy(value, buffer, sizeof(*value)); 741 return buffer + sizeof(*value); 742 #else 743 *value = (static_cast<uint32>(buffer[0]) ) | 744 (static_cast<uint32>(buffer[1]) << 8) | 745 (static_cast<uint32>(buffer[2]) << 16) | 746 (static_cast<uint32>(buffer[3]) << 24); 747 return buffer + sizeof(*value); 748 #endif 749 } 750 // static 751 inline const uint8* CodedInputStream::ReadLittleEndian64FromArray( 752 const uint8* buffer, 753 uint64* value) { 754 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ 755 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN 756 memcpy(value, buffer, sizeof(*value)); 757 return buffer + sizeof(*value); 758 #else 759 uint32 part0 = (static_cast<uint32>(buffer[0]) ) | 760 (static_cast<uint32>(buffer[1]) << 8) | 761 (static_cast<uint32>(buffer[2]) << 16) | 762 (static_cast<uint32>(buffer[3]) << 24); 763 uint32 part1 = (static_cast<uint32>(buffer[4]) ) | 764 (static_cast<uint32>(buffer[5]) << 8) | 765 (static_cast<uint32>(buffer[6]) << 16) | 766 (static_cast<uint32>(buffer[7]) << 24); 767 *value = static_cast<uint64>(part0) | 768 (static_cast<uint64>(part1) << 32); 769 return buffer + sizeof(*value); 770 #endif 771 } 772 773 inline bool CodedInputStream::ReadLittleEndian32(uint32* value) { 774 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ 775 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN 776 if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) { 777 memcpy(value, buffer_, sizeof(*value)); 778 Advance(sizeof(*value)); 779 return true; 780 } else { 781 return ReadLittleEndian32Fallback(value); 782 } 783 #else 784 return ReadLittleEndian32Fallback(value); 785 #endif 786 } 787 788 inline bool CodedInputStream::ReadLittleEndian64(uint64* value) { 789 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ 790 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN 791 if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) { 792 memcpy(value, buffer_, sizeof(*value)); 793 Advance(sizeof(*value)); 794 return true; 795 } else { 796 return ReadLittleEndian64Fallback(value); 797 } 798 #else 799 return ReadLittleEndian64Fallback(value); 800 #endif 801 } 802 803 inline uint32 CodedInputStream::ReadTag() { 804 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] < 0x80) { 805 last_tag_ = buffer_[0]; 806 Advance(1); 807 return last_tag_; 808 } else { 809 last_tag_ = ReadTagFallback(); 810 return last_tag_; 811 } 812 } 813 814 inline bool CodedInputStream::LastTagWas(uint32 expected) { 815 return last_tag_ == expected; 816 } 817 818 inline bool CodedInputStream::ConsumedEntireMessage() { 819 return legitimate_message_end_; 820 } 821 822 inline bool CodedInputStream::ExpectTag(uint32 expected) { 823 if (expected < (1 << 7)) { 824 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) { 825 Advance(1); 826 return true; 827 } else { 828 return false; 829 } 830 } else if (expected < (1 << 14)) { 831 if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) && 832 buffer_[0] == static_cast<uint8>(expected | 0x80) && 833 buffer_[1] == static_cast<uint8>(expected >> 7)) { 834 Advance(2); 835 return true; 836 } else { 837 return false; 838 } 839 } else { 840 // Don't bother optimizing for larger values. 841 return false; 842 } 843 } 844 845 inline const uint8* CodedInputStream::ExpectTagFromArray( 846 const uint8* buffer, uint32 expected) { 847 if (expected < (1 << 7)) { 848 if (buffer[0] == expected) { 849 return buffer + 1; 850 } 851 } else if (expected < (1 << 14)) { 852 if (buffer[0] == static_cast<uint8>(expected | 0x80) && 853 buffer[1] == static_cast<uint8>(expected >> 7)) { 854 return buffer + 2; 855 } 856 } 857 return NULL; 858 } 859 860 inline void CodedInputStream::GetDirectBufferPointerInline(const void** data, 861 int* size) { 862 *data = buffer_; 863 *size = buffer_end_ - buffer_; 864 } 865 866 inline bool CodedInputStream::ExpectAtEnd() { 867 // If we are at a limit we know no more bytes can be read. Otherwise, it's 868 // hard to say without calling Refresh(), and we'd rather not do that. 869 870 if (buffer_ == buffer_end_ && buffer_size_after_limit_ != 0) { 871 last_tag_ = 0; // Pretend we called ReadTag()... 872 legitimate_message_end_ = true; // ... and it hit EOF. 873 return true; 874 } else { 875 return false; 876 } 877 } 878 879 inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) { 880 if (buffer_size_ < size) { 881 return NULL; 882 } else { 883 uint8* result = buffer_; 884 Advance(size); 885 return result; 886 } 887 } 888 889 inline uint8* CodedOutputStream::WriteVarint32ToArray(uint32 value, 890 uint8* target) { 891 if (value < 0x80) { 892 *target = value; 893 return target + 1; 894 } else { 895 return WriteVarint32FallbackToArray(value, target); 896 } 897 } 898 899 inline void CodedOutputStream::WriteVarint32SignExtended(int32 value) { 900 if (value < 0) { 901 WriteVarint64(static_cast<uint64>(value)); 902 } else { 903 WriteVarint32(static_cast<uint32>(value)); 904 } 905 } 906 907 inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray( 908 int32 value, uint8* target) { 909 if (value < 0) { 910 return WriteVarint64ToArray(static_cast<uint64>(value), target); 911 } else { 912 return WriteVarint32ToArray(static_cast<uint32>(value), target); 913 } 914 } 915 916 inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value, 917 uint8* target) { 918 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ 919 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN 920 memcpy(target, &value, sizeof(value)); 921 #else 922 target[0] = static_cast<uint8>(value); 923 target[1] = static_cast<uint8>(value >> 8); 924 target[2] = static_cast<uint8>(value >> 16); 925 target[3] = static_cast<uint8>(value >> 24); 926 #endif 927 return target + sizeof(value); 928 } 929 930 inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value, 931 uint8* target) { 932 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ 933 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN 934 memcpy(target, &value, sizeof(value)); 935 #else 936 uint32 part0 = static_cast<uint32>(value); 937 uint32 part1 = static_cast<uint32>(value >> 32); 938 939 target[0] = static_cast<uint8>(part0); 940 target[1] = static_cast<uint8>(part0 >> 8); 941 target[2] = static_cast<uint8>(part0 >> 16); 942 target[3] = static_cast<uint8>(part0 >> 24); 943 target[4] = static_cast<uint8>(part1); 944 target[5] = static_cast<uint8>(part1 >> 8); 945 target[6] = static_cast<uint8>(part1 >> 16); 946 target[7] = static_cast<uint8>(part1 >> 24); 947 #endif 948 return target + sizeof(value); 949 } 950 951 inline void CodedOutputStream::WriteTag(uint32 value) { 952 WriteVarint32(value); 953 } 954 955 inline uint8* CodedOutputStream::WriteTagToArray( 956 uint32 value, uint8* target) { 957 if (value < (1 << 7)) { 958 target[0] = value; 959 return target + 1; 960 } else if (value < (1 << 14)) { 961 target[0] = static_cast<uint8>(value | 0x80); 962 target[1] = static_cast<uint8>(value >> 7); 963 return target + 2; 964 } else { 965 return WriteVarint32FallbackToArray(value, target); 966 } 967 } 968 969 inline int CodedOutputStream::VarintSize32(uint32 value) { 970 if (value < (1 << 7)) { 971 return 1; 972 } else { 973 return VarintSize32Fallback(value); 974 } 975 } 976 977 inline int CodedOutputStream::VarintSize32SignExtended(int32 value) { 978 if (value < 0) { 979 return 10; // TODO(kenton): Make this a symbolic constant. 980 } else { 981 return VarintSize32(static_cast<uint32>(value)); 982 } 983 } 984 985 inline void CodedOutputStream::WriteString(const string& str) { 986 WriteRaw(str.data(), str.size()); 987 } 988 989 inline uint8* CodedOutputStream::WriteStringToArray( 990 const string& str, uint8* target) { 991 return WriteRawToArray(str.data(), str.size(), target); 992 } 993 994 inline int CodedOutputStream::ByteCount() const { 995 return total_bytes_ - buffer_size_; 996 } 997 998 inline void CodedInputStream::Advance(int amount) { 999 buffer_ += amount; 1000 } 1001 1002 inline void CodedOutputStream::Advance(int amount) { 1003 buffer_ += amount; 1004 buffer_size_ -= amount; 1005 } 1006 1007 inline void CodedInputStream::SetRecursionLimit(int limit) { 1008 recursion_limit_ = limit; 1009 } 1010 1011 inline bool CodedInputStream::IncrementRecursionDepth() { 1012 ++recursion_depth_; 1013 return recursion_depth_ <= recursion_limit_; 1014 } 1015 1016 inline void CodedInputStream::DecrementRecursionDepth() { 1017 if (recursion_depth_ > 0) --recursion_depth_; 1018 } 1019 1020 inline void CodedInputStream::SetExtensionRegistry(DescriptorPool* pool, 1021 MessageFactory* factory) { 1022 extension_pool_ = pool; 1023 extension_factory_ = factory; 1024 } 1025 1026 inline const DescriptorPool* CodedInputStream::GetExtensionPool() { 1027 return extension_pool_; 1028 } 1029 1030 inline MessageFactory* CodedInputStream::GetExtensionFactory() { 1031 return extension_factory_; 1032 } 1033 1034 inline int CodedInputStream::BufferSize() const { 1035 return buffer_end_ - buffer_; 1036 } 1037 1038 inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input) 1039 : input_(input), 1040 buffer_(NULL), 1041 buffer_end_(NULL), 1042 total_bytes_read_(0), 1043 overflow_bytes_(0), 1044 last_tag_(0), 1045 legitimate_message_end_(false), 1046 aliasing_enabled_(false), 1047 current_limit_(INT_MAX), 1048 buffer_size_after_limit_(0), 1049 total_bytes_limit_(kDefaultTotalBytesLimit), 1050 total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold), 1051 recursion_depth_(0), 1052 recursion_limit_(kDefaultRecursionLimit), 1053 extension_pool_(NULL), 1054 extension_factory_(NULL) { 1055 // Eagerly Refresh() so buffer space is immediately available. 1056 Refresh(); 1057 } 1058 1059 inline CodedInputStream::CodedInputStream(const uint8* buffer, int size) 1060 : input_(NULL), 1061 buffer_(buffer), 1062 buffer_end_(buffer + size), 1063 total_bytes_read_(size), 1064 overflow_bytes_(0), 1065 last_tag_(0), 1066 legitimate_message_end_(false), 1067 aliasing_enabled_(false), 1068 current_limit_(size), 1069 buffer_size_after_limit_(0), 1070 total_bytes_limit_(kDefaultTotalBytesLimit), 1071 total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold), 1072 recursion_depth_(0), 1073 recursion_limit_(kDefaultRecursionLimit), 1074 extension_pool_(NULL), 1075 extension_factory_(NULL) { 1076 // Note that setting current_limit_ == size is important to prevent some 1077 // code paths from trying to access input_ and segfaulting. 1078 } 1079 1080 inline CodedInputStream::~CodedInputStream() { 1081 if (input_ != NULL) { 1082 BackUpInputToCurrentPosition(); 1083 } 1084 } 1085 1086 } // namespace io 1087 } // namespace protobuf 1088 1089 } // namespace google 1090 #endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ 1091