BWAPI
Undermind/proxy/cpp/include/google/protobuf/compiler/parser.h
Go to the documentation of this file.
00001 // Protocol Buffers - Google's data interchange format
00002 // Copyright 2008 Google Inc.  All rights reserved.
00003 // http://code.google.com/p/protobuf/
00004 //
00005 // Redistribution and use in source and binary forms, with or without
00006 // modification, are permitted provided that the following conditions are
00007 // met:
00008 //
00009 //     * Redistributions of source code must retain the above copyright
00010 // notice, this list of conditions and the following disclaimer.
00011 //     * Redistributions in binary form must reproduce the above
00012 // copyright notice, this list of conditions and the following disclaimer
00013 // in the documentation and/or other materials provided with the
00014 // distribution.
00015 //     * Neither the name of Google Inc. nor the names of its
00016 // contributors may be used to endorse or promote products derived from
00017 // this software without specific prior written permission.
00018 //
00019 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
00020 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
00021 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
00022 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00023 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00024 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00025 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00026 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00027 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00028 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00029 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030 
00031 // Author: kenton@google.com (Kenton Varda)
00032 //  Based on original Protocol Buffers design by
00033 //  Sanjay Ghemawat, Jeff Dean, and others.
00034 //
00035 // Implements parsing of .proto files to FileDescriptorProtos.
00036 
00037 #ifndef GOOGLE_PROTOBUF_COMPILER_PARSER_H__
00038 #define GOOGLE_PROTOBUF_COMPILER_PARSER_H__
00039 
00040 #include <map>
00041 #include <string>
00042 #include <utility>
00043 #include <google/protobuf/stubs/common.h>
00044 #include <google/protobuf/descriptor.h>
00045 #include <google/protobuf/descriptor.pb.h>
00046 #include <google/protobuf/repeated_field.h>
00047 #include <google/protobuf/io/tokenizer.h>
00048 
00049 namespace google {
00050 namespace protobuf { class Message; }
00051 
00052 namespace protobuf {
00053 namespace compiler {
00054 
00055 // Defined in this file.
00056 class Parser;
00057 class SourceLocationTable;
00058 
00059 // Implements parsing of protocol definitions (such as .proto files).
00060 //
00061 // Note that most users will be more interested in the Importer class.
00062 // Parser is a lower-level class which simply converts a single .proto file
00063 // to a FileDescriptorProto.  It does not resolve import directives or perform
00064 // many other kinds of validation needed to construct a complete
00065 // FileDescriptor.
00066 class LIBPROTOBUF_EXPORT Parser {
00067  public:
00068   Parser();
00069   ~Parser();
00070 
00071   // Parse the entire input and construct a FileDescriptorProto representing
00072   // it.  Returns true if no errors occurred, false otherwise.
00073   bool Parse(io::Tokenizer* input, FileDescriptorProto* file);
00074 
00075   // Optional fetaures:
00076 
00077   // Requests that locations of certain definitions be recorded to the given
00078   // SourceLocationTable while parsing.  This can be used to look up exact line
00079   // and column numbers for errors reported by DescriptorPool during validation.
00080   // Set to NULL (the default) to discard source location information.
00081   void RecordSourceLocationsTo(SourceLocationTable* location_table) {
00082     source_location_table_ = location_table;
00083   }
00084 
00085   // Requsets that errors be recorded to the given ErrorCollector while
00086   // parsing.  Set to NULL (the default) to discard error messages.
00087   void RecordErrorsTo(io::ErrorCollector* error_collector) {
00088     error_collector_ = error_collector;
00089   }
00090 
00091   // Returns the identifier used in the "syntax = " declaration, if one was
00092   // seen during the last call to Parse(), or the empty string otherwise.
00093   const string& GetSyntaxIdentifier() { return syntax_identifier_; }
00094 
00095   // If set true, input files will be required to begin with a syntax
00096   // identifier.  Otherwise, files may omit this.  If a syntax identifier
00097   // is provided, it must be 'syntax = "proto2";' and must appear at the
00098   // top of this file regardless of whether or not it was required.
00099   void SetRequireSyntaxIdentifier(bool value) {
00100     require_syntax_identifier_ = value;
00101   }
00102 
00103   // Call SetStopAfterSyntaxIdentifier(true) to tell the parser to stop
00104   // parsing as soon as it has seen the syntax identifier, or lack thereof.
00105   // This is useful for quickly identifying the syntax of the file without
00106   // parsing the whole thing.  If this is enabled, no error will be recorded
00107   // if the syntax identifier is something other than "proto2" (since
00108   // presumably the caller intends to deal with that), but other kinds of
00109   // errors (e.g. parse errors) will still be reported.  When this is enabled,
00110   // you may pass a NULL FileDescriptorProto to Parse().
00111   void SetStopAfterSyntaxIdentifier(bool value) {
00112     stop_after_syntax_identifier_ = value;
00113   }
00114 
00115  private:
00116   // =================================================================
00117   // Error recovery helpers
00118 
00119   // Consume the rest of the current statement.  This consumes tokens
00120   // until it sees one of:
00121   //   ';'  Consumes the token and returns.
00122   //   '{'  Consumes the brace then calls SkipRestOfBlock().
00123   //   '}'  Returns without consuming.
00124   //   EOF  Returns (can't consume).
00125   // The Parser often calls SkipStatement() after encountering a syntax
00126   // error.  This allows it to go on parsing the following lines, allowing
00127   // it to report more than just one error in the file.
00128   void SkipStatement();
00129 
00130   // Consume the rest of the current block, including nested blocks,
00131   // ending after the closing '}' is encountered and consumed, or at EOF.
00132   void SkipRestOfBlock();
00133 
00134   // -----------------------------------------------------------------
00135   // Single-token consuming helpers
00136   //
00137   // These make parsing code more readable.
00138 
00139   // True if the current token is TYPE_END.
00140   inline bool AtEnd();
00141 
00142   // True if the next token matches the given text.
00143   inline bool LookingAt(const char* text);
00144   // True if the next token is of the given type.
00145   inline bool LookingAtType(io::Tokenizer::TokenType token_type);
00146 
00147   // If the next token exactly matches the text given, consume it and return
00148   // true.  Otherwise, return false without logging an error.
00149   bool TryConsume(const char* text);
00150 
00151   // These attempt to read some kind of token from the input.  If successful,
00152   // they return true.  Otherwise they return false and add the given error
00153   // to the error list.
00154 
00155   // Consume a token with the exact text given.
00156   bool Consume(const char* text, const char* error);
00157   // Same as above, but automatically generates the error "Expected \"text\".",
00158   // where "text" is the expected token text.
00159   bool Consume(const char* text);
00160   // Consume a token of type IDENTIFIER and store its text in "output".
00161   bool ConsumeIdentifier(string* output, const char* error);
00162   // Consume an integer and store its value in "output".
00163   bool ConsumeInteger(int* output, const char* error);
00164   // Consume a 64-bit integer and store its value in "output".  If the value
00165   // is greater than max_value, an error will be reported.
00166   bool ConsumeInteger64(uint64 max_value, uint64* output, const char* error);
00167   // Consume a number and store its value in "output".  This will accept
00168   // tokens of either INTEGER or FLOAT type.
00169   bool ConsumeNumber(double* output, const char* error);
00170   // Consume a string literal and store its (unescaped) value in "output".
00171   bool ConsumeString(string* output, const char* error);
00172 
00173   // -----------------------------------------------------------------
00174   // Error logging helpers
00175 
00176   // Invokes error_collector_->AddError(), if error_collector_ is not NULL.
00177   void AddError(int line, int column, const string& error);
00178 
00179   // Invokes error_collector_->AddError() with the line and column number
00180   // of the current token.
00181   void AddError(const string& error);
00182 
00183   // Record the given line and column and associate it with this descriptor
00184   // in the SourceLocationTable.
00185   void RecordLocation(const Message* descriptor,
00186                       DescriptorPool::ErrorCollector::ErrorLocation location,
00187                       int line, int column);
00188 
00189   // Record the current line and column and associate it with this descriptor
00190   // in the SourceLocationTable.
00191   void RecordLocation(const Message* descriptor,
00192                       DescriptorPool::ErrorCollector::ErrorLocation location);
00193 
00194   // =================================================================
00195   // Parsers for various language constructs
00196 
00197   // Parses the "syntax = \"proto2\";" line at the top of the file.  Returns
00198   // false if it failed to parse or if the syntax identifier was not
00199   // recognized.
00200   bool ParseSyntaxIdentifier();
00201 
00202   // These methods parse various individual bits of code.  They return
00203   // false if they completely fail to parse the construct.  In this case,
00204   // it is probably necessary to skip the rest of the statement to recover.
00205   // However, if these methods return true, it does NOT mean that there
00206   // were no errors; only that there were no *syntax* errors.  For instance,
00207   // if a service method is defined using proper syntax but uses a primitive
00208   // type as its input or output, ParseMethodField() still returns true
00209   // and only reports the error by calling AddError().  In practice, this
00210   // makes logic much simpler for the caller.
00211 
00212   // Parse a top-level message, enum, service, etc.
00213   bool ParseTopLevelStatement(FileDescriptorProto* file);
00214 
00215   // Parse various language high-level language construrcts.
00216   bool ParseMessageDefinition(DescriptorProto* message);
00217   bool ParseEnumDefinition(EnumDescriptorProto* enum_type);
00218   bool ParseServiceDefinition(ServiceDescriptorProto* service);
00219   bool ParsePackage(FileDescriptorProto* file);
00220   bool ParseImport(string* import_filename);
00221   bool ParseOption(Message* options);
00222 
00223   // These methods parse the contents of a message, enum, or service type and
00224   // add them to the given object.  They consume the entire block including
00225   // the beginning and ending brace.
00226   bool ParseMessageBlock(DescriptorProto* message);
00227   bool ParseEnumBlock(EnumDescriptorProto* enum_type);
00228   bool ParseServiceBlock(ServiceDescriptorProto* service);
00229 
00230   // Parse one statement within a message, enum, or service block, inclunding
00231   // final semicolon.
00232   bool ParseMessageStatement(DescriptorProto* message);
00233   bool ParseEnumStatement(EnumDescriptorProto* message);
00234   bool ParseServiceStatement(ServiceDescriptorProto* message);
00235 
00236   // Parse a field of a message.  If the field is a group, its type will be
00237   // added to "messages".
00238   bool ParseMessageField(FieldDescriptorProto* field,
00239                          RepeatedPtrField<DescriptorProto>* messages);
00240 
00241   // Parse an "extensions" declaration.
00242   bool ParseExtensions(DescriptorProto* message);
00243 
00244   // Parse an "extend" declaration.
00245   bool ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
00246                    RepeatedPtrField<DescriptorProto>* messages);
00247 
00248   // Parse a single enum value within an enum block.
00249   bool ParseEnumConstant(EnumValueDescriptorProto* enum_value);
00250 
00251   // Parse enum constant options, i.e. the list in square brackets at the end
00252   // of the enum constant value definition.
00253   bool ParseEnumConstantOptions(EnumValueDescriptorProto* value);
00254 
00255   // Parse a single method within a service definition.
00256   bool ParseServiceMethod(MethodDescriptorProto* method);
00257 
00258   // Parse "required", "optional", or "repeated" and fill in "label"
00259   // with the value.
00260   bool ParseLabel(FieldDescriptorProto::Label* label);
00261 
00262   // Parse a type name and fill in "type" (if it is a primitive) or
00263   // "type_name" (if it is not) with the type parsed.
00264   bool ParseType(FieldDescriptorProto::Type* type,
00265                  string* type_name);
00266   // Parse a user-defined type and fill in "type_name" with the name.
00267   // If a primitive type is named, it is treated as an error.
00268   bool ParseUserDefinedType(string* type_name);
00269 
00270   // Parses field options, i.e. the stuff in square brackets at the end
00271   // of a field definition.  Also parses default value.
00272   bool ParseFieldOptions(FieldDescriptorProto* field);
00273 
00274   // Parse the "default" option.  This needs special handling because its
00275   // type is the field's type.
00276   bool ParseDefaultAssignment(FieldDescriptorProto* field);
00277 
00278   // Parse a single option name/value pair, e.g. "ctype = CORD".  The name
00279   // identifies a field of the given Message, and the value of that field
00280   // is set to the parsed value.
00281   bool ParseOptionAssignment(Message* options);
00282 
00283   // Parses a single part of a multipart option name. A multipart name consists
00284   // of names separated by dots. Each name is either an identifier or a series
00285   // of identifiers separated by dots and enclosed in parentheses. E.g.,
00286   // "foo.(bar.baz).qux".
00287   bool ParseOptionNamePart(UninterpretedOption* uninterpreted_option);
00288 
00289   // =================================================================
00290 
00291   io::Tokenizer* input_;
00292   io::ErrorCollector* error_collector_;
00293   SourceLocationTable* source_location_table_;
00294   bool had_errors_;
00295   bool require_syntax_identifier_;
00296   bool stop_after_syntax_identifier_;
00297   string syntax_identifier_;
00298 
00299   GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Parser);
00300 };
00301 
00302 // A table mapping (descriptor, ErrorLocation) pairs -- as reported by
00303 // DescriptorPool when validating descriptors -- to line and column numbers
00304 // within the original source code.
00305 class LIBPROTOBUF_EXPORT SourceLocationTable {
00306  public:
00307   SourceLocationTable();
00308   ~SourceLocationTable();
00309 
00310   // Finds the precise location of the given error and fills in *line and
00311   // *column with the line and column numbers.  If not found, sets *line to
00312   // -1 and *column to 0 (since line = -1 is used to mean "error has no exact
00313   // location" in the ErrorCollector interface).  Returns true if found, false
00314   // otherwise.
00315   bool Find(const Message* descriptor,
00316             DescriptorPool::ErrorCollector::ErrorLocation location,
00317             int* line, int* column) const;
00318 
00319   // Adds a location to the table.
00320   void Add(const Message* descriptor,
00321            DescriptorPool::ErrorCollector::ErrorLocation location,
00322            int line, int column);
00323 
00324   // Clears the contents of the table.
00325   void Clear();
00326 
00327  private:
00328   typedef map<
00329     pair<const Message*, DescriptorPool::ErrorCollector::ErrorLocation>,
00330     pair<int, int> > LocationMap;
00331   LocationMap location_map_;
00332 };
00333 
00334 }  // namespace compiler
00335 }  // namespace protobuf
00336 
00337 }  // namespace google
00338 #endif  // GOOGLE_PROTOBUF_COMPILER_PARSER_H__
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines