BWAPI
|
00001 // Protocol Buffers - Google's data interchange format 00002 // Copyright 2008 Google Inc. All rights reserved. 00003 // http://code.google.com/p/protobuf/ 00004 // 00005 // Redistribution and use in source and binary forms, with or without 00006 // modification, are permitted provided that the following conditions are 00007 // met: 00008 // 00009 // * Redistributions of source code must retain the above copyright 00010 // notice, this list of conditions and the following disclaimer. 00011 // * Redistributions in binary form must reproduce the above 00012 // copyright notice, this list of conditions and the following disclaimer 00013 // in the documentation and/or other materials provided with the 00014 // distribution. 00015 // * Neither the name of Google Inc. nor the names of its 00016 // contributors may be used to endorse or promote products derived from 00017 // this software without specific prior written permission. 00018 // 00019 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 00020 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 00021 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 00022 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 00023 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00024 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00025 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00026 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00027 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00028 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00029 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 00031 // Author: kenton@google.com (Kenton Varda) 00032 // Based on original Protocol Buffers design by 00033 // Sanjay Ghemawat, Jeff Dean, and others. 00034 // 00035 // Implements parsing of .proto files to FileDescriptorProtos. 00036 00037 #ifndef GOOGLE_PROTOBUF_COMPILER_PARSER_H__ 00038 #define GOOGLE_PROTOBUF_COMPILER_PARSER_H__ 00039 00040 #include <map> 00041 #include <string> 00042 #include <utility> 00043 #include <google/protobuf/stubs/common.h> 00044 #include <google/protobuf/descriptor.h> 00045 #include <google/protobuf/descriptor.pb.h> 00046 #include <google/protobuf/repeated_field.h> 00047 #include <google/protobuf/io/tokenizer.h> 00048 00049 namespace google { 00050 namespace protobuf { class Message; } 00051 00052 namespace protobuf { 00053 namespace compiler { 00054 00055 // Defined in this file. 00056 class Parser; 00057 class SourceLocationTable; 00058 00059 // Implements parsing of protocol definitions (such as .proto files). 00060 // 00061 // Note that most users will be more interested in the Importer class. 00062 // Parser is a lower-level class which simply converts a single .proto file 00063 // to a FileDescriptorProto. It does not resolve import directives or perform 00064 // many other kinds of validation needed to construct a complete 00065 // FileDescriptor. 00066 class LIBPROTOBUF_EXPORT Parser { 00067 public: 00068 Parser(); 00069 ~Parser(); 00070 00071 // Parse the entire input and construct a FileDescriptorProto representing 00072 // it. Returns true if no errors occurred, false otherwise. 00073 bool Parse(io::Tokenizer* input, FileDescriptorProto* file); 00074 00075 // Optional fetaures: 00076 00077 // Requests that locations of certain definitions be recorded to the given 00078 // SourceLocationTable while parsing. This can be used to look up exact line 00079 // and column numbers for errors reported by DescriptorPool during validation. 00080 // Set to NULL (the default) to discard source location information. 00081 void RecordSourceLocationsTo(SourceLocationTable* location_table) { 00082 source_location_table_ = location_table; 00083 } 00084 00085 // Requsets that errors be recorded to the given ErrorCollector while 00086 // parsing. Set to NULL (the default) to discard error messages. 00087 void RecordErrorsTo(io::ErrorCollector* error_collector) { 00088 error_collector_ = error_collector; 00089 } 00090 00091 // Returns the identifier used in the "syntax = " declaration, if one was 00092 // seen during the last call to Parse(), or the empty string otherwise. 00093 const string& GetSyntaxIdentifier() { return syntax_identifier_; } 00094 00095 // If set true, input files will be required to begin with a syntax 00096 // identifier. Otherwise, files may omit this. If a syntax identifier 00097 // is provided, it must be 'syntax = "proto2";' and must appear at the 00098 // top of this file regardless of whether or not it was required. 00099 void SetRequireSyntaxIdentifier(bool value) { 00100 require_syntax_identifier_ = value; 00101 } 00102 00103 // Call SetStopAfterSyntaxIdentifier(true) to tell the parser to stop 00104 // parsing as soon as it has seen the syntax identifier, or lack thereof. 00105 // This is useful for quickly identifying the syntax of the file without 00106 // parsing the whole thing. If this is enabled, no error will be recorded 00107 // if the syntax identifier is something other than "proto2" (since 00108 // presumably the caller intends to deal with that), but other kinds of 00109 // errors (e.g. parse errors) will still be reported. When this is enabled, 00110 // you may pass a NULL FileDescriptorProto to Parse(). 00111 void SetStopAfterSyntaxIdentifier(bool value) { 00112 stop_after_syntax_identifier_ = value; 00113 } 00114 00115 private: 00116 // ================================================================= 00117 // Error recovery helpers 00118 00119 // Consume the rest of the current statement. This consumes tokens 00120 // until it sees one of: 00121 // ';' Consumes the token and returns. 00122 // '{' Consumes the brace then calls SkipRestOfBlock(). 00123 // '}' Returns without consuming. 00124 // EOF Returns (can't consume). 00125 // The Parser often calls SkipStatement() after encountering a syntax 00126 // error. This allows it to go on parsing the following lines, allowing 00127 // it to report more than just one error in the file. 00128 void SkipStatement(); 00129 00130 // Consume the rest of the current block, including nested blocks, 00131 // ending after the closing '}' is encountered and consumed, or at EOF. 00132 void SkipRestOfBlock(); 00133 00134 // ----------------------------------------------------------------- 00135 // Single-token consuming helpers 00136 // 00137 // These make parsing code more readable. 00138 00139 // True if the current token is TYPE_END. 00140 inline bool AtEnd(); 00141 00142 // True if the next token matches the given text. 00143 inline bool LookingAt(const char* text); 00144 // True if the next token is of the given type. 00145 inline bool LookingAtType(io::Tokenizer::TokenType token_type); 00146 00147 // If the next token exactly matches the text given, consume it and return 00148 // true. Otherwise, return false without logging an error. 00149 bool TryConsume(const char* text); 00150 00151 // These attempt to read some kind of token from the input. If successful, 00152 // they return true. Otherwise they return false and add the given error 00153 // to the error list. 00154 00155 // Consume a token with the exact text given. 00156 bool Consume(const char* text, const char* error); 00157 // Same as above, but automatically generates the error "Expected \"text\".", 00158 // where "text" is the expected token text. 00159 bool Consume(const char* text); 00160 // Consume a token of type IDENTIFIER and store its text in "output". 00161 bool ConsumeIdentifier(string* output, const char* error); 00162 // Consume an integer and store its value in "output". 00163 bool ConsumeInteger(int* output, const char* error); 00164 // Consume a 64-bit integer and store its value in "output". If the value 00165 // is greater than max_value, an error will be reported. 00166 bool ConsumeInteger64(uint64 max_value, uint64* output, const char* error); 00167 // Consume a number and store its value in "output". This will accept 00168 // tokens of either INTEGER or FLOAT type. 00169 bool ConsumeNumber(double* output, const char* error); 00170 // Consume a string literal and store its (unescaped) value in "output". 00171 bool ConsumeString(string* output, const char* error); 00172 00173 // ----------------------------------------------------------------- 00174 // Error logging helpers 00175 00176 // Invokes error_collector_->AddError(), if error_collector_ is not NULL. 00177 void AddError(int line, int column, const string& error); 00178 00179 // Invokes error_collector_->AddError() with the line and column number 00180 // of the current token. 00181 void AddError(const string& error); 00182 00183 // Record the given line and column and associate it with this descriptor 00184 // in the SourceLocationTable. 00185 void RecordLocation(const Message* descriptor, 00186 DescriptorPool::ErrorCollector::ErrorLocation location, 00187 int line, int column); 00188 00189 // Record the current line and column and associate it with this descriptor 00190 // in the SourceLocationTable. 00191 void RecordLocation(const Message* descriptor, 00192 DescriptorPool::ErrorCollector::ErrorLocation location); 00193 00194 // ================================================================= 00195 // Parsers for various language constructs 00196 00197 // Parses the "syntax = \"proto2\";" line at the top of the file. Returns 00198 // false if it failed to parse or if the syntax identifier was not 00199 // recognized. 00200 bool ParseSyntaxIdentifier(); 00201 00202 // These methods parse various individual bits of code. They return 00203 // false if they completely fail to parse the construct. In this case, 00204 // it is probably necessary to skip the rest of the statement to recover. 00205 // However, if these methods return true, it does NOT mean that there 00206 // were no errors; only that there were no *syntax* errors. For instance, 00207 // if a service method is defined using proper syntax but uses a primitive 00208 // type as its input or output, ParseMethodField() still returns true 00209 // and only reports the error by calling AddError(). In practice, this 00210 // makes logic much simpler for the caller. 00211 00212 // Parse a top-level message, enum, service, etc. 00213 bool ParseTopLevelStatement(FileDescriptorProto* file); 00214 00215 // Parse various language high-level language construrcts. 00216 bool ParseMessageDefinition(DescriptorProto* message); 00217 bool ParseEnumDefinition(EnumDescriptorProto* enum_type); 00218 bool ParseServiceDefinition(ServiceDescriptorProto* service); 00219 bool ParsePackage(FileDescriptorProto* file); 00220 bool ParseImport(string* import_filename); 00221 bool ParseOption(Message* options); 00222 00223 // These methods parse the contents of a message, enum, or service type and 00224 // add them to the given object. They consume the entire block including 00225 // the beginning and ending brace. 00226 bool ParseMessageBlock(DescriptorProto* message); 00227 bool ParseEnumBlock(EnumDescriptorProto* enum_type); 00228 bool ParseServiceBlock(ServiceDescriptorProto* service); 00229 00230 // Parse one statement within a message, enum, or service block, inclunding 00231 // final semicolon. 00232 bool ParseMessageStatement(DescriptorProto* message); 00233 bool ParseEnumStatement(EnumDescriptorProto* message); 00234 bool ParseServiceStatement(ServiceDescriptorProto* message); 00235 00236 // Parse a field of a message. If the field is a group, its type will be 00237 // added to "messages". 00238 bool ParseMessageField(FieldDescriptorProto* field, 00239 RepeatedPtrField<DescriptorProto>* messages); 00240 00241 // Parse an "extensions" declaration. 00242 bool ParseExtensions(DescriptorProto* message); 00243 00244 // Parse an "extend" declaration. 00245 bool ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions, 00246 RepeatedPtrField<DescriptorProto>* messages); 00247 00248 // Parse a single enum value within an enum block. 00249 bool ParseEnumConstant(EnumValueDescriptorProto* enum_value); 00250 00251 // Parse enum constant options, i.e. the list in square brackets at the end 00252 // of the enum constant value definition. 00253 bool ParseEnumConstantOptions(EnumValueDescriptorProto* value); 00254 00255 // Parse a single method within a service definition. 00256 bool ParseServiceMethod(MethodDescriptorProto* method); 00257 00258 // Parse "required", "optional", or "repeated" and fill in "label" 00259 // with the value. 00260 bool ParseLabel(FieldDescriptorProto::Label* label); 00261 00262 // Parse a type name and fill in "type" (if it is a primitive) or 00263 // "type_name" (if it is not) with the type parsed. 00264 bool ParseType(FieldDescriptorProto::Type* type, 00265 string* type_name); 00266 // Parse a user-defined type and fill in "type_name" with the name. 00267 // If a primitive type is named, it is treated as an error. 00268 bool ParseUserDefinedType(string* type_name); 00269 00270 // Parses field options, i.e. the stuff in square brackets at the end 00271 // of a field definition. Also parses default value. 00272 bool ParseFieldOptions(FieldDescriptorProto* field); 00273 00274 // Parse the "default" option. This needs special handling because its 00275 // type is the field's type. 00276 bool ParseDefaultAssignment(FieldDescriptorProto* field); 00277 00278 // Parse a single option name/value pair, e.g. "ctype = CORD". The name 00279 // identifies a field of the given Message, and the value of that field 00280 // is set to the parsed value. 00281 bool ParseOptionAssignment(Message* options); 00282 00283 // Parses a single part of a multipart option name. A multipart name consists 00284 // of names separated by dots. Each name is either an identifier or a series 00285 // of identifiers separated by dots and enclosed in parentheses. E.g., 00286 // "foo.(bar.baz).qux". 00287 bool ParseOptionNamePart(UninterpretedOption* uninterpreted_option); 00288 00289 // ================================================================= 00290 00291 io::Tokenizer* input_; 00292 io::ErrorCollector* error_collector_; 00293 SourceLocationTable* source_location_table_; 00294 bool had_errors_; 00295 bool require_syntax_identifier_; 00296 bool stop_after_syntax_identifier_; 00297 string syntax_identifier_; 00298 00299 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Parser); 00300 }; 00301 00302 // A table mapping (descriptor, ErrorLocation) pairs -- as reported by 00303 // DescriptorPool when validating descriptors -- to line and column numbers 00304 // within the original source code. 00305 class LIBPROTOBUF_EXPORT SourceLocationTable { 00306 public: 00307 SourceLocationTable(); 00308 ~SourceLocationTable(); 00309 00310 // Finds the precise location of the given error and fills in *line and 00311 // *column with the line and column numbers. If not found, sets *line to 00312 // -1 and *column to 0 (since line = -1 is used to mean "error has no exact 00313 // location" in the ErrorCollector interface). Returns true if found, false 00314 // otherwise. 00315 bool Find(const Message* descriptor, 00316 DescriptorPool::ErrorCollector::ErrorLocation location, 00317 int* line, int* column) const; 00318 00319 // Adds a location to the table. 00320 void Add(const Message* descriptor, 00321 DescriptorPool::ErrorCollector::ErrorLocation location, 00322 int line, int column); 00323 00324 // Clears the contents of the table. 00325 void Clear(); 00326 00327 private: 00328 typedef map< 00329 pair<const Message*, DescriptorPool::ErrorCollector::ErrorLocation>, 00330 pair<int, int> > LocationMap; 00331 LocationMap location_map_; 00332 }; 00333 00334 } // namespace compiler 00335 } // namespace protobuf 00336 00337 } // namespace google 00338 #endif // GOOGLE_PROTOBUF_COMPILER_PARSER_H__