BWAPI
|
00001 // Protocol Buffers - Google's data interchange format 00002 // Copyright 2008 Google Inc. All rights reserved. 00003 // http://code.google.com/p/protobuf/ 00004 // 00005 // Redistribution and use in source and binary forms, with or without 00006 // modification, are permitted provided that the following conditions are 00007 // met: 00008 // 00009 // * Redistributions of source code must retain the above copyright 00010 // notice, this list of conditions and the following disclaimer. 00011 // * Redistributions in binary form must reproduce the above 00012 // copyright notice, this list of conditions and the following disclaimer 00013 // in the documentation and/or other materials provided with the 00014 // distribution. 00015 // * Neither the name of Google Inc. nor the names of its 00016 // contributors may be used to endorse or promote products derived from 00017 // this software without specific prior written permission. 00018 // 00019 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00020 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00021 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00022 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 00023 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00024 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00025 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00026 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00027 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00028 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00029 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 00031 // Author: kenton@google.com (Kenton Varda) 00032 // Based on original Protocol Buffers design by 00033 // Sanjay Ghemawat, Jeff Dean, and others. 00034 // 00035 // This file contains the CodedInputStream and CodedOutputStream classes, 00036 // which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively, 00037 // and allow you to read or write individual pieces of data in various 00038 // formats. In particular, these implement the varint encoding for 00039 // integers, a simple variable-length encoding in which smaller numbers 00040 // take fewer bytes. 00041 // 00042 // Typically these classes will only be used internally by the protocol 00043 // buffer library in order to encode and decode protocol buffers. Clients 00044 // of the library only need to know about this class if they wish to write 00045 // custom message parsing or serialization procedures. 00046 // 00047 // CodedOutputStream example: 00048 // // Write some data to "myfile". First we write a 4-byte "magic number" 00049 // // to identify the file type, then write a length-delimited string. The 00050 // // string is composed of a varint giving the length followed by the raw 00051 // // bytes. 00052 // int fd = open("myfile", O_WRONLY); 00053 // ZeroCopyOutputStream* raw_output = new FileOutputStream(fd); 00054 // CodedOutputStream* coded_output = new CodedOutputStream(raw_output); 00055 // 00056 // int magic_number = 1234; 00057 // char text[] = "Hello world!"; 00058 // coded_output->WriteLittleEndian32(magic_number); 00059 // coded_output->WriteVarint32(strlen(text)); 00060 // coded_output->WriteRaw(text, strlen(text)); 00061 // 00062 // delete coded_output; 00063 // delete raw_output; 00064 // close(fd); 00065 // 00066 // CodedInputStream example: 00067 // // Read a file created by the above code. 00068 // int fd = open("myfile", O_RDONLY); 00069 // ZeroCopyInputStream* raw_input = new FileInputStream(fd); 00070 // CodedInputStream coded_input = new CodedInputStream(raw_input); 00071 // 00072 // coded_input->ReadLittleEndian32(&magic_number); 00073 // if (magic_number != 1234) { 00074 // cerr << "File not in expected format." << endl; 00075 // return; 00076 // } 00077 // 00078 // uint32 size; 00079 // coded_input->ReadVarint32(&size); 00080 // 00081 // char* text = new char[size + 1]; 00082 // coded_input->ReadRaw(buffer, size); 00083 // text[size] = '\0'; 00084 // 00085 // delete coded_input; 00086 // delete raw_input; 00087 // close(fd); 00088 // 00089 // cout << "Text is: " << text << endl; 00090 // delete [] text; 00091 // 00092 // For those who are interested, varint encoding is defined as follows: 00093 // 00094 // The encoding operates on unsigned integers of up to 64 bits in length. 00095 // Each byte of the encoded value has the format: 00096 // * bits 0-6: Seven bits of the number being encoded. 00097 // * bit 7: Zero if this is the last byte in the encoding (in which 00098 // case all remaining bits of the number are zero) or 1 if 00099 // more bytes follow. 00100 // The first byte contains the least-significant 7 bits of the number, the 00101 // second byte (if present) contains the next-least-significant 7 bits, 00102 // and so on. So, the binary number 1011000101011 would be encoded in two 00103 // bytes as "10101011 00101100". 00104 // 00105 // In theory, varint could be used to encode integers of any length. 00106 // However, for practicality we set a limit at 64 bits. The maximum encoded 00107 // length of a number is thus 10 bytes. 00108 00109 #ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ 00110 #define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__ 00111 00112 #include <string> 00113 #ifndef _MSC_VER 00114 #include <sys/param.h> 00115 #endif // !_MSC_VER 00116 #include <google/protobuf/stubs/common.h> 00117 #include <google/protobuf/stubs/common.h> // for GOOGLE_PREDICT_TRUE macro 00118 00119 namespace google { 00120 00121 namespace protobuf { 00122 00123 class DescriptorPool; 00124 class MessageFactory; 00125 00126 namespace io { 00127 00128 // Defined in this file. 00129 class CodedInputStream; 00130 class CodedOutputStream; 00131 00132 // Defined in other files. 00133 class ZeroCopyInputStream; // zero_copy_stream.h 00134 class ZeroCopyOutputStream; // zero_copy_stream.h 00135 00136 // Class which reads and decodes binary data which is composed of varint- 00137 // encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream. 00138 // Most users will not need to deal with CodedInputStream. 00139 // 00140 // Most methods of CodedInputStream that return a bool return false if an 00141 // underlying I/O error occurs or if the data is malformed. Once such a 00142 // failure occurs, the CodedInputStream is broken and is no longer useful. 00143 class LIBPROTOBUF_EXPORT CodedInputStream { 00144 public: 00145 // Create a CodedInputStream that reads from the given ZeroCopyInputStream. 00146 explicit CodedInputStream(ZeroCopyInputStream* input); 00147 00148 // Create a CodedInputStream that reads from the given flat array. This is 00149 // faster than using an ArrayInputStream. PushLimit(size) is implied by 00150 // this constructor. 00151 explicit CodedInputStream(const uint8* buffer, int size); 00152 00153 // Destroy the CodedInputStream and position the underlying 00154 // ZeroCopyInputStream at the first unread byte. If an error occurred while 00155 // reading (causing a method to return false), then the exact position of 00156 // the input stream may be anywhere between the last value that was read 00157 // successfully and the stream's byte limit. 00158 ~CodedInputStream(); 00159 00160 00161 // Skips a number of bytes. Returns false if an underlying read error 00162 // occurs. 00163 bool Skip(int count); 00164 00165 // Sets *data to point directly at the unread part of the CodedInputStream's 00166 // underlying buffer, and *size to the size of that buffer, but does not 00167 // advance the stream's current position. This will always either produce 00168 // a non-empty buffer or return false. If the caller consumes any of 00169 // this data, it should then call Skip() to skip over the consumed bytes. 00170 // This may be useful for implementing external fast parsing routines for 00171 // types of data not covered by the CodedInputStream interface. 00172 bool GetDirectBufferPointer(const void** data, int* size); 00173 00174 // Like GetDirectBufferPointer, but this method is inlined, and does not 00175 // attempt to Refresh() if the buffer is currently empty. 00176 inline void GetDirectBufferPointerInline(const void** data, 00177 int* size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; 00178 00179 // Read raw bytes, copying them into the given buffer. 00180 bool ReadRaw(void* buffer, int size); 00181 00182 // Like ReadRaw, but reads into a string. 00183 // 00184 // Implementation Note: ReadString() grows the string gradually as it 00185 // reads in the data, rather than allocating the entire requested size 00186 // upfront. This prevents denial-of-service attacks in which a client 00187 // could claim that a string is going to be MAX_INT bytes long in order to 00188 // crash the server because it can't allocate this much space at once. 00189 bool ReadString(string* buffer, int size); 00190 // Like the above, with inlined optimizations. This should only be used 00191 // by the protobuf implementation. 00192 inline bool InternalReadStringInline(string* buffer, 00193 int size) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; 00194 00195 00196 // Read a 32-bit little-endian integer. 00197 bool ReadLittleEndian32(uint32* value); 00198 // Read a 64-bit little-endian integer. 00199 bool ReadLittleEndian64(uint64* value); 00200 00201 // These methods read from an externally provided buffer. The caller is 00202 // responsible for ensuring that the buffer has sufficient space. 00203 // Read a 32-bit little-endian integer. 00204 static const uint8* ReadLittleEndian32FromArray(const uint8* buffer, 00205 uint32* value); 00206 // Read a 64-bit little-endian integer. 00207 static const uint8* ReadLittleEndian64FromArray(const uint8* buffer, 00208 uint64* value); 00209 00210 // Read an unsigned integer with Varint encoding, truncating to 32 bits. 00211 // Reading a 32-bit value is equivalent to reading a 64-bit one and casting 00212 // it to uint32, but may be more efficient. 00213 bool ReadVarint32(uint32* value); 00214 // Read an unsigned integer with Varint encoding. 00215 bool ReadVarint64(uint64* value); 00216 00217 // Read a tag. This calls ReadVarint32() and returns the result, or returns 00218 // zero (which is not a valid tag) if ReadVarint32() fails. Also, it updates 00219 // the last tag value, which can be checked with LastTagWas(). 00220 // Always inline because this is only called in once place per parse loop 00221 // but it is called for every iteration of said loop, so it should be fast. 00222 // GCC doesn't want to inline this by default. 00223 uint32 ReadTag() GOOGLE_ATTRIBUTE_ALWAYS_INLINE; 00224 00225 // Usually returns true if calling ReadVarint32() now would produce the given 00226 // value. Will always return false if ReadVarint32() would not return the 00227 // given value. If ExpectTag() returns true, it also advances past 00228 // the varint. For best performance, use a compile-time constant as the 00229 // parameter. 00230 // Always inline because this collapses to a small number of instructions 00231 // when given a constant parameter, but GCC doesn't want to inline by default. 00232 bool ExpectTag(uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; 00233 00234 // Like above, except this reads from the specified buffer. The caller is 00235 // responsible for ensuring that the buffer is large enough to read a varint 00236 // of the expected size. For best performance, use a compile-time constant as 00237 // the expected tag parameter. 00238 // 00239 // Returns a pointer beyond the expected tag if it was found, or NULL if it 00240 // was not. 00241 static const uint8* ExpectTagFromArray( 00242 const uint8* buffer, 00243 uint32 expected) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; 00244 00245 // Usually returns true if no more bytes can be read. Always returns false 00246 // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent 00247 // call to LastTagWas() will act as if ReadTag() had been called and returned 00248 // zero, and ConsumedEntireMessage() will return true. 00249 bool ExpectAtEnd(); 00250 00251 // If the last call to ReadTag() returned the given value, returns true. 00252 // Otherwise, returns false; 00253 // 00254 // This is needed because parsers for some types of embedded messages 00255 // (with field type TYPE_GROUP) don't actually know that they've reached the 00256 // end of a message until they see an ENDGROUP tag, which was actually part 00257 // of the enclosing message. The enclosing message would like to check that 00258 // tag to make sure it had the right number, so it calls LastTagWas() on 00259 // return from the embedded parser to check. 00260 bool LastTagWas(uint32 expected); 00261 00262 // When parsing message (but NOT a group), this method must be called 00263 // immediately after MergeFromCodedStream() returns (if it returns true) 00264 // to further verify that the message ended in a legitimate way. For 00265 // example, this verifies that parsing did not end on an end-group tag. 00266 // It also checks for some cases where, due to optimizations, 00267 // MergeFromCodedStream() can incorrectly return true. 00268 bool ConsumedEntireMessage(); 00269 00270 // Limits ---------------------------------------------------------- 00271 // Limits are used when parsing length-delimited embedded messages. 00272 // After the message's length is read, PushLimit() is used to prevent 00273 // the CodedInputStream from reading beyond that length. Once the 00274 // embedded message has been parsed, PopLimit() is called to undo the 00275 // limit. 00276 00277 // Opaque type used with PushLimit() and PopLimit(). Do not modify 00278 // values of this type yourself. The only reason that this isn't a 00279 // struct with private internals is for efficiency. 00280 typedef int Limit; 00281 00282 // Places a limit on the number of bytes that the stream may read, 00283 // starting from the current position. Once the stream hits this limit, 00284 // it will act like the end of the input has been reached until PopLimit() 00285 // is called. 00286 // 00287 // As the names imply, the stream conceptually has a stack of limits. The 00288 // shortest limit on the stack is always enforced, even if it is not the 00289 // top limit. 00290 // 00291 // The value returned by PushLimit() is opaque to the caller, and must 00292 // be passed unchanged to the corresponding call to PopLimit(). 00293 Limit PushLimit(int byte_limit); 00294 00295 // Pops the last limit pushed by PushLimit(). The input must be the value 00296 // returned by that call to PushLimit(). 00297 void PopLimit(Limit limit); 00298 00299 // Returns the number of bytes left until the nearest limit on the 00300 // stack is hit, or -1 if no limits are in place. 00301 int BytesUntilLimit(); 00302 00303 // Total Bytes Limit ----------------------------------------------- 00304 // To prevent malicious users from sending excessively large messages 00305 // and causing integer overflows or memory exhaustion, CodedInputStream 00306 // imposes a hard limit on the total number of bytes it will read. 00307 00308 // Sets the maximum number of bytes that this CodedInputStream will read 00309 // before refusing to continue. To prevent integer overflows in the 00310 // protocol buffers implementation, as well as to prevent servers from 00311 // allocating enormous amounts of memory to hold parsed messages, the 00312 // maximum message length should be limited to the shortest length that 00313 // will not harm usability. The theoretical shortest message that could 00314 // cause integer overflows is 512MB. The default limit is 64MB. Apps 00315 // should set shorter limits if possible. If warning_threshold is not -1, 00316 // a warning will be printed to stderr after warning_threshold bytes are 00317 // read. An error will always be printed to stderr if the limit is 00318 // reached. 00319 // 00320 // This is unrelated to PushLimit()/PopLimit(). 00321 // 00322 // Hint: If you are reading this because your program is printing a 00323 // warning about dangerously large protocol messages, you may be 00324 // confused about what to do next. The best option is to change your 00325 // design such that excessively large messages are not necessary. 00326 // For example, try to design file formats to consist of many small 00327 // messages rather than a single large one. If this is infeasible, 00328 // you will need to increase the limit. Chances are, though, that 00329 // your code never constructs a CodedInputStream on which the limit 00330 // can be set. You probably parse messages by calling things like 00331 // Message::ParseFromString(). In this case, you will need to change 00332 // your code to instead construct some sort of ZeroCopyInputStream 00333 // (e.g. an ArrayInputStream), construct a CodedInputStream around 00334 // that, then call Message::ParseFromCodedStream() instead. Then 00335 // you can adjust the limit. Yes, it's more work, but you're doing 00336 // something unusual. 00337 void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold); 00338 00339 // Recursion Limit ------------------------------------------------- 00340 // To prevent corrupt or malicious messages from causing stack overflows, 00341 // we must keep track of the depth of recursion when parsing embedded 00342 // messages and groups. CodedInputStream keeps track of this because it 00343 // is the only object that is passed down the stack during parsing. 00344 00345 // Sets the maximum recursion depth. The default is 64. 00346 void SetRecursionLimit(int limit); 00347 00348 // Increments the current recursion depth. Returns true if the depth is 00349 // under the limit, false if it has gone over. 00350 bool IncrementRecursionDepth(); 00351 00352 // Decrements the recursion depth. 00353 void DecrementRecursionDepth(); 00354 00355 // Extension Registry ---------------------------------------------- 00356 // ADVANCED USAGE: 99.9% of people can ignore this section. 00357 // 00358 // By default, when parsing extensions, the parser looks for extension 00359 // definitions in the pool which owns the outer message's Descriptor. 00360 // However, you may call SetExtensionRegistry() to provide an alternative 00361 // pool instead. This makes it possible, for example, to parse a message 00362 // using a generated class, but represent some extensions using 00363 // DynamicMessage. 00364 00365 // Set the pool used to look up extensions. Most users do not need to call 00366 // this as the correct pool will be chosen automatically. 00367 // 00368 // WARNING: It is very easy to misuse this. Carefully read the requirements 00369 // below. Do not use this unless you are sure you need it. Almost no one 00370 // does. 00371 // 00372 // Let's say you are parsing a message into message object m, and you want 00373 // to take advantage of SetExtensionRegistry(). You must follow these 00374 // requirements: 00375 // 00376 // The given DescriptorPool must contain m->GetDescriptor(). It is not 00377 // sufficient for it to simply contain a descriptor that has the same name 00378 // and content -- it must be the *exact object*. In other words: 00379 // assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) == 00380 // m->GetDescriptor()); 00381 // There are two ways to satisfy this requirement: 00382 // 1) Use m->GetDescriptor()->pool() as the pool. This is generally useless 00383 // because this is the pool that would be used anyway if you didn't call 00384 // SetExtensionRegistry() at all. 00385 // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an 00386 // "underlay". Read the documentation for DescriptorPool for more 00387 // information about underlays. 00388 // 00389 // You must also provide a MessageFactory. This factory will be used to 00390 // construct Message objects representing extensions. The factory's 00391 // GetPrototype() MUST return non-NULL for any Descriptor which can be found 00392 // through the provided pool. 00393 // 00394 // If the provided factory might return instances of protocol-compiler- 00395 // generated (i.e. compiled-in) types, or if the outer message object m is 00396 // a generated type, then the given factory MUST have this property: If 00397 // GetPrototype() is given a Descriptor which resides in 00398 // DescriptorPool::generated_pool(), the factory MUST return the same 00399 // prototype which MessageFactory::generated_factory() would return. That 00400 // is, given a descriptor for a generated type, the factory must return an 00401 // instance of the generated class (NOT DynamicMessage). However, when 00402 // given a descriptor for a type that is NOT in generated_pool, the factory 00403 // is free to return any implementation. 00404 // 00405 // The reason for this requirement is that generated sub-objects may be 00406 // accessed via the standard (non-reflection) extension accessor methods, 00407 // and these methods will down-cast the object to the generated class type. 00408 // If the object is not actually of that type, the results would be undefined. 00409 // On the other hand, if an extension is not compiled in, then there is no 00410 // way the code could end up accessing it via the standard accessors -- the 00411 // only way to access the extension is via reflection. When using reflection, 00412 // DynamicMessage and generated messages are indistinguishable, so it's fine 00413 // if these objects are represented using DynamicMessage. 00414 // 00415 // Using DynamicMessageFactory on which you have called 00416 // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the 00417 // above requirement. 00418 // 00419 // If either pool or factory is NULL, both must be NULL. 00420 // 00421 // Note that this feature is ignored when parsing "lite" messages as they do 00422 // not have descriptors. 00423 void SetExtensionRegistry(DescriptorPool* pool, MessageFactory* factory); 00424 00425 // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool 00426 // has been provided. 00427 const DescriptorPool* GetExtensionPool(); 00428 00429 // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no 00430 // factory has been provided. 00431 MessageFactory* GetExtensionFactory(); 00432 00433 private: 00434 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream); 00435 00436 ZeroCopyInputStream* input_; 00437 const uint8* buffer_; 00438 const uint8* buffer_end_; // pointer to the end of the buffer. 00439 int total_bytes_read_; // total bytes read from input_, including 00440 // the current buffer 00441 00442 // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here 00443 // so that we can BackUp() on destruction. 00444 int overflow_bytes_; 00445 00446 // LastTagWas() stuff. 00447 uint32 last_tag_; // result of last ReadTag(). 00448 00449 // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly 00450 // at EOF, or by ExpectAtEnd() when it returns true. This happens when we 00451 // reach the end of a message and attempt to read another tag. 00452 bool legitimate_message_end_; 00453 00454 // See EnableAliasing(). 00455 bool aliasing_enabled_; 00456 00457 // Limits 00458 Limit current_limit_; // if position = -1, no limit is applied 00459 00460 // For simplicity, if the current buffer crosses a limit (either a normal 00461 // limit created by PushLimit() or the total bytes limit), buffer_size_ 00462 // only tracks the number of bytes before that limit. This field 00463 // contains the number of bytes after it. Note that this implies that if 00464 // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've 00465 // hit a limit. However, if both are zero, it doesn't necessarily mean 00466 // we aren't at a limit -- the buffer may have ended exactly at the limit. 00467 int buffer_size_after_limit_; 00468 00469 // Maximum number of bytes to read, period. This is unrelated to 00470 // current_limit_. Set using SetTotalBytesLimit(). 00471 int total_bytes_limit_; 00472 int total_bytes_warning_threshold_; 00473 00474 // Current recursion depth, controlled by IncrementRecursionDepth() and 00475 // DecrementRecursionDepth(). 00476 int recursion_depth_; 00477 // Recursion depth limit, set by SetRecursionLimit(). 00478 int recursion_limit_; 00479 00480 // See SetExtensionRegistry(). 00481 const DescriptorPool* extension_pool_; 00482 MessageFactory* extension_factory_; 00483 00484 // Private member functions. 00485 00486 // Advance the buffer by a given number of bytes. 00487 void Advance(int amount); 00488 00489 // Back up input_ to the current buffer position. 00490 void BackUpInputToCurrentPosition(); 00491 00492 // Recomputes the value of buffer_size_after_limit_. Must be called after 00493 // current_limit_ or total_bytes_limit_ changes. 00494 void RecomputeBufferLimits(); 00495 00496 // Writes an error message saying that we hit total_bytes_limit_. 00497 void PrintTotalBytesLimitError(); 00498 00499 // Called when the buffer runs out to request more data. Implies an 00500 // Advance(BufferSize()). 00501 bool Refresh(); 00502 00503 // When parsing varints, we optimize for the common case of small values, and 00504 // then optimize for the case when the varint fits within the current buffer 00505 // piece. The Fallback method is used when we can't use the one-byte 00506 // optimization. The Slow method is yet another fallback when the buffer is 00507 // not large enough. Making the slow path out-of-line speeds up the common 00508 // case by 10-15%. The slow path is fairly uncommon: it only triggers when a 00509 // message crosses multiple buffers. 00510 bool ReadVarint32Fallback(uint32* value); 00511 bool ReadVarint64Fallback(uint64* value); 00512 bool ReadVarint32Slow(uint32* value); 00513 bool ReadVarint64Slow(uint64* value); 00514 bool ReadLittleEndian32Fallback(uint32* value); 00515 bool ReadLittleEndian64Fallback(uint64* value); 00516 // Fallback/slow methods for reading tags. These do not update last_tag_, 00517 // but will set legitimate_message_end_ if we are at the end of the input 00518 // stream. 00519 uint32 ReadTagFallback(); 00520 uint32 ReadTagSlow(); 00521 bool ReadStringFallback(string* buffer, int size); 00522 00523 // Return the size of the buffer. 00524 int BufferSize() const; 00525 00526 static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB 00527 00528 static const int kDefaultTotalBytesWarningThreshold = 32 << 20; // 32MB 00529 static const int kDefaultRecursionLimit = 64; 00530 }; 00531 00532 // Class which encodes and writes binary data which is composed of varint- 00533 // encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream. 00534 // Most users will not need to deal with CodedOutputStream. 00535 // 00536 // Most methods of CodedOutputStream which return a bool return false if an 00537 // underlying I/O error occurs. Once such a failure occurs, the 00538 // CodedOutputStream is broken and is no longer useful. The Write* methods do 00539 // not return the stream status, but will invalidate the stream if an error 00540 // occurs. The client can probe HadError() to determine the status. 00541 // 00542 // Note that every method of CodedOutputStream which writes some data has 00543 // a corresponding static "ToArray" version. These versions write directly 00544 // to the provided buffer, returning a pointer past the last written byte. 00545 // They require that the buffer has sufficient capacity for the encoded data. 00546 // This allows an optimization where we check if an output stream has enough 00547 // space for an entire message before we start writing and, if there is, we 00548 // call only the ToArray methods to avoid doing bound checks for each 00549 // individual value. 00550 // i.e., in the example above: 00551 // 00552 // CodedOutputStream coded_output = new CodedOutputStream(raw_output); 00553 // int magic_number = 1234; 00554 // char text[] = "Hello world!"; 00555 // 00556 // int coded_size = sizeof(magic_number) + 00557 // CodedOutputStream::Varint32Size(strlen(text)) + 00558 // strlen(text); 00559 // 00560 // uint8* buffer = 00561 // coded_output->GetDirectBufferForNBytesAndAdvance(coded_size); 00562 // if (buffer != NULL) { 00563 // // The output stream has enough space in the buffer: write directly to 00564 // // the array. 00565 // buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number, 00566 // buffer); 00567 // buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer); 00568 // buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer); 00569 // } else { 00570 // // Make bound-checked writes, which will ask the underlying stream for 00571 // // more space as needed. 00572 // coded_output->WriteLittleEndian32(magic_number); 00573 // coded_output->WriteVarint32(strlen(text)); 00574 // coded_output->WriteRaw(text, strlen(text)); 00575 // } 00576 // 00577 // delete coded_output; 00578 class LIBPROTOBUF_EXPORT CodedOutputStream { 00579 public: 00580 // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream. 00581 explicit CodedOutputStream(ZeroCopyOutputStream* output); 00582 00583 // Destroy the CodedOutputStream and position the underlying 00584 // ZeroCopyOutputStream immediately after the last byte written. 00585 ~CodedOutputStream(); 00586 00587 // Skips a number of bytes, leaving the bytes unmodified in the underlying 00588 // buffer. Returns false if an underlying write error occurs. This is 00589 // mainly useful with GetDirectBufferPointer(). 00590 bool Skip(int count); 00591 00592 // Sets *data to point directly at the unwritten part of the 00593 // CodedOutputStream's underlying buffer, and *size to the size of that 00594 // buffer, but does not advance the stream's current position. This will 00595 // always either produce a non-empty buffer or return false. If the caller 00596 // writes any data to this buffer, it should then call Skip() to skip over 00597 // the consumed bytes. This may be useful for implementing external fast 00598 // serialization routines for types of data not covered by the 00599 // CodedOutputStream interface. 00600 bool GetDirectBufferPointer(void** data, int* size); 00601 00602 // If there are at least "size" bytes available in the current buffer, 00603 // returns a pointer directly into the buffer and advances over these bytes. 00604 // The caller may then write directly into this buffer (e.g. using the 00605 // *ToArray static methods) rather than go through CodedOutputStream. If 00606 // there are not enough bytes available, returns NULL. The return pointer is 00607 // invalidated as soon as any other non-const method of CodedOutputStream 00608 // is called. 00609 inline uint8* GetDirectBufferForNBytesAndAdvance(int size); 00610 00611 // Write raw bytes, copying them from the given buffer. 00612 void WriteRaw(const void* buffer, int size); 00613 // Like WriteRaw() but writing directly to the target array. 00614 // This is _not_ inlined, as the compiler often optimizes memcpy into inline 00615 // copy loops. Since this gets called by every field with string or bytes 00616 // type, inlining may lead to a significant amount of code bloat, with only a 00617 // minor performance gain. 00618 static uint8* WriteRawToArray(const void* buffer, int size, uint8* target); 00619 00620 // Equivalent to WriteRaw(str.data(), str.size()). 00621 void WriteString(const string& str); 00622 // Like WriteString() but writing directly to the target array. 00623 static uint8* WriteStringToArray(const string& str, uint8* target); 00624 00625 00626 // Write a 32-bit little-endian integer. 00627 void WriteLittleEndian32(uint32 value); 00628 // Like WriteLittleEndian32() but writing directly to the target array. 00629 static uint8* WriteLittleEndian32ToArray(uint32 value, uint8* target); 00630 // Write a 64-bit little-endian integer. 00631 void WriteLittleEndian64(uint64 value); 00632 // Like WriteLittleEndian64() but writing directly to the target array. 00633 static uint8* WriteLittleEndian64ToArray(uint64 value, uint8* target); 00634 00635 // Write an unsigned integer with Varint encoding. Writing a 32-bit value 00636 // is equivalent to casting it to uint64 and writing it as a 64-bit value, 00637 // but may be more efficient. 00638 void WriteVarint32(uint32 value); 00639 // Like WriteVarint32() but writing directly to the target array. 00640 static uint8* WriteVarint32ToArray(uint32 value, uint8* target); 00641 // Write an unsigned integer with Varint encoding. 00642 void WriteVarint64(uint64 value); 00643 // Like WriteVarint64() but writing directly to the target array. 00644 static uint8* WriteVarint64ToArray(uint64 value, uint8* target); 00645 00646 // Equivalent to WriteVarint32() except when the value is negative, 00647 // in which case it must be sign-extended to a full 10 bytes. 00648 void WriteVarint32SignExtended(int32 value); 00649 // Like WriteVarint32SignExtended() but writing directly to the target array. 00650 static uint8* WriteVarint32SignExtendedToArray(int32 value, uint8* target); 00651 00652 // This is identical to WriteVarint32(), but optimized for writing tags. 00653 // In particular, if the input is a compile-time constant, this method 00654 // compiles down to a couple instructions. 00655 // Always inline because otherwise the aformentioned optimization can't work, 00656 // but GCC by default doesn't want to inline this. 00657 void WriteTag(uint32 value); 00658 // Like WriteTag() but writing directly to the target array. 00659 static uint8* WriteTagToArray( 00660 uint32 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; 00661 00662 // Returns the number of bytes needed to encode the given value as a varint. 00663 static int VarintSize32(uint32 value); 00664 // Returns the number of bytes needed to encode the given value as a varint. 00665 static int VarintSize64(uint64 value); 00666 00667 // If negative, 10 bytes. Otheriwse, same as VarintSize32(). 00668 static int VarintSize32SignExtended(int32 value); 00669 00670 // Returns the total number of bytes written since this object was created. 00671 inline int ByteCount() const; 00672 00673 // Returns true if there was an underlying I/O error since this object was 00674 // created. 00675 bool HadError() const { return had_error_; } 00676 00677 private: 00678 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream); 00679 00680 ZeroCopyOutputStream* output_; 00681 uint8* buffer_; 00682 int buffer_size_; 00683 int total_bytes_; // Sum of sizes of all buffers seen so far. 00684 bool had_error_; // Whether an error occurred during output. 00685 00686 // Advance the buffer by a given number of bytes. 00687 void Advance(int amount); 00688 00689 // Called when the buffer runs out to request more data. Implies an 00690 // Advance(buffer_size_). 00691 bool Refresh(); 00692 00693 static uint8* WriteVarint32FallbackToArray(uint32 value, uint8* target); 00694 00695 // Always-inlined versions of WriteVarint* functions so that code can be 00696 // reused, while still controlling size. For instance, WriteVarint32ToArray() 00697 // should not directly call this: since it is inlined itself, doing so 00698 // would greatly increase the size of generated code. Instead, it should call 00699 // WriteVarint32FallbackToArray. Meanwhile, WriteVarint32() is already 00700 // out-of-line, so it should just invoke this directly to avoid any extra 00701 // function call overhead. 00702 static uint8* WriteVarint32FallbackToArrayInline( 00703 uint32 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; 00704 static uint8* WriteVarint64ToArrayInline( 00705 uint64 value, uint8* target) GOOGLE_ATTRIBUTE_ALWAYS_INLINE; 00706 00707 static int VarintSize32Fallback(uint32 value); 00708 }; 00709 00710 // inline methods ==================================================== 00711 // The vast majority of varints are only one byte. These inline 00712 // methods optimize for that case. 00713 00714 inline bool CodedInputStream::ReadVarint32(uint32* value) { 00715 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) { 00716 *value = *buffer_; 00717 Advance(1); 00718 return true; 00719 } else { 00720 return ReadVarint32Fallback(value); 00721 } 00722 } 00723 00724 inline bool CodedInputStream::ReadVarint64(uint64* value) { 00725 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) { 00726 *value = *buffer_; 00727 Advance(1); 00728 return true; 00729 } else { 00730 return ReadVarint64Fallback(value); 00731 } 00732 } 00733 00734 // static 00735 inline const uint8* CodedInputStream::ReadLittleEndian32FromArray( 00736 const uint8* buffer, 00737 uint32* value) { 00738 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ 00739 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN 00740 memcpy(value, buffer, sizeof(*value)); 00741 return buffer + sizeof(*value); 00742 #else 00743 *value = (static_cast<uint32>(buffer[0]) ) | 00744 (static_cast<uint32>(buffer[1]) << 8) | 00745 (static_cast<uint32>(buffer[2]) << 16) | 00746 (static_cast<uint32>(buffer[3]) << 24); 00747 return buffer + sizeof(*value); 00748 #endif 00749 } 00750 // static 00751 inline const uint8* CodedInputStream::ReadLittleEndian64FromArray( 00752 const uint8* buffer, 00753 uint64* value) { 00754 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ 00755 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN 00756 memcpy(value, buffer, sizeof(*value)); 00757 return buffer + sizeof(*value); 00758 #else 00759 uint32 part0 = (static_cast<uint32>(buffer[0]) ) | 00760 (static_cast<uint32>(buffer[1]) << 8) | 00761 (static_cast<uint32>(buffer[2]) << 16) | 00762 (static_cast<uint32>(buffer[3]) << 24); 00763 uint32 part1 = (static_cast<uint32>(buffer[4]) ) | 00764 (static_cast<uint32>(buffer[5]) << 8) | 00765 (static_cast<uint32>(buffer[6]) << 16) | 00766 (static_cast<uint32>(buffer[7]) << 24); 00767 *value = static_cast<uint64>(part0) | 00768 (static_cast<uint64>(part1) << 32); 00769 return buffer + sizeof(*value); 00770 #endif 00771 } 00772 00773 inline bool CodedInputStream::ReadLittleEndian32(uint32* value) { 00774 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ 00775 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN 00776 if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) { 00777 memcpy(value, buffer_, sizeof(*value)); 00778 Advance(sizeof(*value)); 00779 return true; 00780 } else { 00781 return ReadLittleEndian32Fallback(value); 00782 } 00783 #else 00784 return ReadLittleEndian32Fallback(value); 00785 #endif 00786 } 00787 00788 inline bool CodedInputStream::ReadLittleEndian64(uint64* value) { 00789 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ 00790 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN 00791 if (GOOGLE_PREDICT_TRUE(BufferSize() >= sizeof(*value))) { 00792 memcpy(value, buffer_, sizeof(*value)); 00793 Advance(sizeof(*value)); 00794 return true; 00795 } else { 00796 return ReadLittleEndian64Fallback(value); 00797 } 00798 #else 00799 return ReadLittleEndian64Fallback(value); 00800 #endif 00801 } 00802 00803 inline uint32 CodedInputStream::ReadTag() { 00804 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] < 0x80) { 00805 last_tag_ = buffer_[0]; 00806 Advance(1); 00807 return last_tag_; 00808 } else { 00809 last_tag_ = ReadTagFallback(); 00810 return last_tag_; 00811 } 00812 } 00813 00814 inline bool CodedInputStream::LastTagWas(uint32 expected) { 00815 return last_tag_ == expected; 00816 } 00817 00818 inline bool CodedInputStream::ConsumedEntireMessage() { 00819 return legitimate_message_end_; 00820 } 00821 00822 inline bool CodedInputStream::ExpectTag(uint32 expected) { 00823 if (expected < (1 << 7)) { 00824 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) { 00825 Advance(1); 00826 return true; 00827 } else { 00828 return false; 00829 } 00830 } else if (expected < (1 << 14)) { 00831 if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) && 00832 buffer_[0] == static_cast<uint8>(expected | 0x80) && 00833 buffer_[1] == static_cast<uint8>(expected >> 7)) { 00834 Advance(2); 00835 return true; 00836 } else { 00837 return false; 00838 } 00839 } else { 00840 // Don't bother optimizing for larger values. 00841 return false; 00842 } 00843 } 00844 00845 inline const uint8* CodedInputStream::ExpectTagFromArray( 00846 const uint8* buffer, uint32 expected) { 00847 if (expected < (1 << 7)) { 00848 if (buffer[0] == expected) { 00849 return buffer + 1; 00850 } 00851 } else if (expected < (1 << 14)) { 00852 if (buffer[0] == static_cast<uint8>(expected | 0x80) && 00853 buffer[1] == static_cast<uint8>(expected >> 7)) { 00854 return buffer + 2; 00855 } 00856 } 00857 return NULL; 00858 } 00859 00860 inline void CodedInputStream::GetDirectBufferPointerInline(const void** data, 00861 int* size) { 00862 *data = buffer_; 00863 *size = buffer_end_ - buffer_; 00864 } 00865 00866 inline bool CodedInputStream::ExpectAtEnd() { 00867 // If we are at a limit we know no more bytes can be read. Otherwise, it's 00868 // hard to say without calling Refresh(), and we'd rather not do that. 00869 00870 if (buffer_ == buffer_end_ && buffer_size_after_limit_ != 0) { 00871 last_tag_ = 0; // Pretend we called ReadTag()... 00872 legitimate_message_end_ = true; // ... and it hit EOF. 00873 return true; 00874 } else { 00875 return false; 00876 } 00877 } 00878 00879 inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) { 00880 if (buffer_size_ < size) { 00881 return NULL; 00882 } else { 00883 uint8* result = buffer_; 00884 Advance(size); 00885 return result; 00886 } 00887 } 00888 00889 inline uint8* CodedOutputStream::WriteVarint32ToArray(uint32 value, 00890 uint8* target) { 00891 if (value < 0x80) { 00892 *target = value; 00893 return target + 1; 00894 } else { 00895 return WriteVarint32FallbackToArray(value, target); 00896 } 00897 } 00898 00899 inline void CodedOutputStream::WriteVarint32SignExtended(int32 value) { 00900 if (value < 0) { 00901 WriteVarint64(static_cast<uint64>(value)); 00902 } else { 00903 WriteVarint32(static_cast<uint32>(value)); 00904 } 00905 } 00906 00907 inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray( 00908 int32 value, uint8* target) { 00909 if (value < 0) { 00910 return WriteVarint64ToArray(static_cast<uint64>(value), target); 00911 } else { 00912 return WriteVarint32ToArray(static_cast<uint32>(value), target); 00913 } 00914 } 00915 00916 inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value, 00917 uint8* target) { 00918 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ 00919 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN 00920 memcpy(target, &value, sizeof(value)); 00921 #else 00922 target[0] = static_cast<uint8>(value); 00923 target[1] = static_cast<uint8>(value >> 8); 00924 target[2] = static_cast<uint8>(value >> 16); 00925 target[3] = static_cast<uint8>(value >> 24); 00926 #endif 00927 return target + sizeof(value); 00928 } 00929 00930 inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value, 00931 uint8* target) { 00932 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST) && \ 00933 defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN 00934 memcpy(target, &value, sizeof(value)); 00935 #else 00936 uint32 part0 = static_cast<uint32>(value); 00937 uint32 part1 = static_cast<uint32>(value >> 32); 00938 00939 target[0] = static_cast<uint8>(part0); 00940 target[1] = static_cast<uint8>(part0 >> 8); 00941 target[2] = static_cast<uint8>(part0 >> 16); 00942 target[3] = static_cast<uint8>(part0 >> 24); 00943 target[4] = static_cast<uint8>(part1); 00944 target[5] = static_cast<uint8>(part1 >> 8); 00945 target[6] = static_cast<uint8>(part1 >> 16); 00946 target[7] = static_cast<uint8>(part1 >> 24); 00947 #endif 00948 return target + sizeof(value); 00949 } 00950 00951 inline void CodedOutputStream::WriteTag(uint32 value) { 00952 WriteVarint32(value); 00953 } 00954 00955 inline uint8* CodedOutputStream::WriteTagToArray( 00956 uint32 value, uint8* target) { 00957 if (value < (1 << 7)) { 00958 target[0] = value; 00959 return target + 1; 00960 } else if (value < (1 << 14)) { 00961 target[0] = static_cast<uint8>(value | 0x80); 00962 target[1] = static_cast<uint8>(value >> 7); 00963 return target + 2; 00964 } else { 00965 return WriteVarint32FallbackToArray(value, target); 00966 } 00967 } 00968 00969 inline int CodedOutputStream::VarintSize32(uint32 value) { 00970 if (value < (1 << 7)) { 00971 return 1; 00972 } else { 00973 return VarintSize32Fallback(value); 00974 } 00975 } 00976 00977 inline int CodedOutputStream::VarintSize32SignExtended(int32 value) { 00978 if (value < 0) { 00979 return 10; // TODO(kenton): Make this a symbolic constant. 00980 } else { 00981 return VarintSize32(static_cast<uint32>(value)); 00982 } 00983 } 00984 00985 inline void CodedOutputStream::WriteString(const string& str) { 00986 WriteRaw(str.data(), str.size()); 00987 } 00988 00989 inline uint8* CodedOutputStream::WriteStringToArray( 00990 const string& str, uint8* target) { 00991 return WriteRawToArray(str.data(), str.size(), target); 00992 } 00993 00994 inline int CodedOutputStream::ByteCount() const { 00995 return total_bytes_ - buffer_size_; 00996 } 00997 00998 inline void CodedInputStream::Advance(int amount) { 00999 buffer_ += amount; 01000 } 01001 01002 inline void CodedOutputStream::Advance(int amount) { 01003 buffer_ += amount; 01004 buffer_size_ -= amount; 01005 } 01006 01007 inline void CodedInputStream::SetRecursionLimit(int limit) { 01008 recursion_limit_ = limit; 01009 } 01010 01011 inline bool CodedInputStream::IncrementRecursionDepth() { 01012 ++recursion_depth_; 01013 return recursion_depth_ <= recursion_limit_; 01014 } 01015 01016 inline void CodedInputStream::DecrementRecursionDepth() { 01017 if (recursion_depth_ > 0) --recursion_depth_; 01018 } 01019 01020 inline void CodedInputStream::SetExtensionRegistry(DescriptorPool* pool, 01021 MessageFactory* factory) { 01022 extension_pool_ = pool; 01023 extension_factory_ = factory; 01024 } 01025 01026 inline const DescriptorPool* CodedInputStream::GetExtensionPool() { 01027 return extension_pool_; 01028 } 01029 01030 inline MessageFactory* CodedInputStream::GetExtensionFactory() { 01031 return extension_factory_; 01032 } 01033 01034 inline int CodedInputStream::BufferSize() const { 01035 return buffer_end_ - buffer_; 01036 } 01037 01038 inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input) 01039 : input_(input), 01040 buffer_(NULL), 01041 buffer_end_(NULL), 01042 total_bytes_read_(0), 01043 overflow_bytes_(0), 01044 last_tag_(0), 01045 legitimate_message_end_(false), 01046 aliasing_enabled_(false), 01047 current_limit_(INT_MAX), 01048 buffer_size_after_limit_(0), 01049 total_bytes_limit_(kDefaultTotalBytesLimit), 01050 total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold), 01051 recursion_depth_(0), 01052 recursion_limit_(kDefaultRecursionLimit), 01053 extension_pool_(NULL), 01054 extension_factory_(NULL) { 01055 // Eagerly Refresh() so buffer space is immediately available. 01056 Refresh(); 01057 } 01058 01059 inline CodedInputStream::CodedInputStream(const uint8* buffer, int size) 01060 : input_(NULL), 01061 buffer_(buffer), 01062 buffer_end_(buffer + size), 01063 total_bytes_read_(size), 01064 overflow_bytes_(0), 01065 last_tag_(0), 01066 legitimate_message_end_(false), 01067 aliasing_enabled_(false), 01068 current_limit_(size), 01069 buffer_size_after_limit_(0), 01070 total_bytes_limit_(kDefaultTotalBytesLimit), 01071 total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold), 01072 recursion_depth_(0), 01073 recursion_limit_(kDefaultRecursionLimit), 01074 extension_pool_(NULL), 01075 extension_factory_(NULL) { 01076 // Note that setting current_limit_ == size is important to prevent some 01077 // code paths from trying to access input_ and segfaulting. 01078 } 01079 01080 inline CodedInputStream::~CodedInputStream() { 01081 if (input_ != NULL) { 01082 BackUpInputToCurrentPosition(); 01083 } 01084 } 01085 01086 } // namespace io 01087 } // namespace protobuf 01088 01089 } // namespace google 01090 #endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__