435 lines
19 KiB
C++
435 lines
19 KiB
C++
// Protocol Buffers - Google's data interchange format
|
|
// Copyright 2008 Google Inc. All rights reserved.
|
|
// http://code.google.com/p/protobuf/
|
|
//
|
|
// Redistribution and use in source and binary forms, with or without
|
|
// modification, are permitted provided that the following conditions are
|
|
// met:
|
|
//
|
|
// * Redistributions of source code must retain the above copyright
|
|
// notice, this list of conditions and the following disclaimer.
|
|
// * Redistributions in binary form must reproduce the above
|
|
// copyright notice, this list of conditions and the following disclaimer
|
|
// in the documentation and/or other materials provided with the
|
|
// distribution.
|
|
// * Neither the name of Google Inc. nor the names of its
|
|
// contributors may be used to endorse or promote products derived from
|
|
// this software without specific prior written permission.
|
|
//
|
|
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
// Author: kenton@google.com (Kenton Varda)
|
|
// Based on original Protocol Buffers design by
|
|
// Sanjay Ghemawat, Jeff Dean, and others.
|
|
//
|
|
// Implements parsing of .proto files to FileDescriptorProtos.
|
|
|
|
#ifndef GOOGLE_PROTOBUF_COMPILER_PARSER_H__
|
|
#define GOOGLE_PROTOBUF_COMPILER_PARSER_H__
|
|
|
|
#include <map>
|
|
#include <string>
|
|
#include <utility>
|
|
#include <google/protobuf/stubs/common.h>
|
|
#include <google/protobuf/descriptor.h>
|
|
#include <google/protobuf/descriptor.pb.h>
|
|
#include <google/protobuf/repeated_field.h>
|
|
#include <google/protobuf/io/tokenizer.h>
|
|
|
|
namespace google {
|
|
namespace protobuf { class Message; }
|
|
|
|
namespace protobuf {
|
|
namespace compiler {
|
|
|
|
// Defined in this file.
|
|
class Parser;
|
|
class SourceLocationTable;
|
|
|
|
// Implements parsing of protocol definitions (such as .proto files).
|
|
//
|
|
// Note that most users will be more interested in the Importer class.
|
|
// Parser is a lower-level class which simply converts a single .proto file
|
|
// to a FileDescriptorProto. It does not resolve import directives or perform
|
|
// many other kinds of validation needed to construct a complete
|
|
// FileDescriptor.
|
|
class LIBPROTOBUF_EXPORT Parser {
|
|
public:
|
|
Parser();
|
|
~Parser();
|
|
|
|
// Parse the entire input and construct a FileDescriptorProto representing
|
|
// it. Returns true if no errors occurred, false otherwise.
|
|
bool Parse(io::Tokenizer* input, FileDescriptorProto* file);
|
|
|
|
// Optional fetaures:
|
|
|
|
// DEPRECATED: New code should use the SourceCodeInfo embedded in the
|
|
// FileDescriptorProto.
|
|
//
|
|
// Requests that locations of certain definitions be recorded to the given
|
|
// SourceLocationTable while parsing. This can be used to look up exact line
|
|
// and column numbers for errors reported by DescriptorPool during validation.
|
|
// Set to NULL (the default) to discard source location information.
|
|
void RecordSourceLocationsTo(SourceLocationTable* location_table) {
|
|
source_location_table_ = location_table;
|
|
}
|
|
|
|
// Requests that errors be recorded to the given ErrorCollector while
|
|
// parsing. Set to NULL (the default) to discard error messages.
|
|
void RecordErrorsTo(io::ErrorCollector* error_collector) {
|
|
error_collector_ = error_collector;
|
|
}
|
|
|
|
// Returns the identifier used in the "syntax = " declaration, if one was
|
|
// seen during the last call to Parse(), or the empty string otherwise.
|
|
const string& GetSyntaxIdentifier() { return syntax_identifier_; }
|
|
|
|
// If set true, input files will be required to begin with a syntax
|
|
// identifier. Otherwise, files may omit this. If a syntax identifier
|
|
// is provided, it must be 'syntax = "proto2";' and must appear at the
|
|
// top of this file regardless of whether or not it was required.
|
|
void SetRequireSyntaxIdentifier(bool value) {
|
|
require_syntax_identifier_ = value;
|
|
}
|
|
|
|
// Call SetStopAfterSyntaxIdentifier(true) to tell the parser to stop
|
|
// parsing as soon as it has seen the syntax identifier, or lack thereof.
|
|
// This is useful for quickly identifying the syntax of the file without
|
|
// parsing the whole thing. If this is enabled, no error will be recorded
|
|
// if the syntax identifier is something other than "proto2" (since
|
|
// presumably the caller intends to deal with that), but other kinds of
|
|
// errors (e.g. parse errors) will still be reported. When this is enabled,
|
|
// you may pass a NULL FileDescriptorProto to Parse().
|
|
void SetStopAfterSyntaxIdentifier(bool value) {
|
|
stop_after_syntax_identifier_ = value;
|
|
}
|
|
|
|
private:
|
|
// =================================================================
|
|
// Error recovery helpers
|
|
|
|
// Consume the rest of the current statement. This consumes tokens
|
|
// until it sees one of:
|
|
// ';' Consumes the token and returns.
|
|
// '{' Consumes the brace then calls SkipRestOfBlock().
|
|
// '}' Returns without consuming.
|
|
// EOF Returns (can't consume).
|
|
// The Parser often calls SkipStatement() after encountering a syntax
|
|
// error. This allows it to go on parsing the following lines, allowing
|
|
// it to report more than just one error in the file.
|
|
void SkipStatement();
|
|
|
|
// Consume the rest of the current block, including nested blocks,
|
|
// ending after the closing '}' is encountered and consumed, or at EOF.
|
|
void SkipRestOfBlock();
|
|
|
|
// -----------------------------------------------------------------
|
|
// Single-token consuming helpers
|
|
//
|
|
// These make parsing code more readable.
|
|
|
|
// True if the current token is TYPE_END.
|
|
inline bool AtEnd();
|
|
|
|
// True if the next token matches the given text.
|
|
inline bool LookingAt(const char* text);
|
|
// True if the next token is of the given type.
|
|
inline bool LookingAtType(io::Tokenizer::TokenType token_type);
|
|
|
|
// If the next token exactly matches the text given, consume it and return
|
|
// true. Otherwise, return false without logging an error.
|
|
bool TryConsume(const char* text);
|
|
|
|
// These attempt to read some kind of token from the input. If successful,
|
|
// they return true. Otherwise they return false and add the given error
|
|
// to the error list.
|
|
|
|
// Consume a token with the exact text given.
|
|
bool Consume(const char* text, const char* error);
|
|
// Same as above, but automatically generates the error "Expected \"text\".",
|
|
// where "text" is the expected token text.
|
|
bool Consume(const char* text);
|
|
// Consume a token of type IDENTIFIER and store its text in "output".
|
|
bool ConsumeIdentifier(string* output, const char* error);
|
|
// Consume an integer and store its value in "output".
|
|
bool ConsumeInteger(int* output, const char* error);
|
|
// Consume a 64-bit integer and store its value in "output". If the value
|
|
// is greater than max_value, an error will be reported.
|
|
bool ConsumeInteger64(uint64 max_value, uint64* output, const char* error);
|
|
// Consume a number and store its value in "output". This will accept
|
|
// tokens of either INTEGER or FLOAT type.
|
|
bool ConsumeNumber(double* output, const char* error);
|
|
// Consume a string literal and store its (unescaped) value in "output".
|
|
bool ConsumeString(string* output, const char* error);
|
|
|
|
// -----------------------------------------------------------------
|
|
// Error logging helpers
|
|
|
|
// Invokes error_collector_->AddError(), if error_collector_ is not NULL.
|
|
void AddError(int line, int column, const string& error);
|
|
|
|
// Invokes error_collector_->AddError() with the line and column number
|
|
// of the current token.
|
|
void AddError(const string& error);
|
|
|
|
// Records a location in the SourceCodeInfo.location table (see
|
|
// descriptor.proto). We use RAII to ensure that the start and end locations
|
|
// are recorded -- the constructor records the start location and the
|
|
// destructor records the end location. Since the parser is
|
|
// recursive-descent, this works out beautifully.
|
|
class LIBPROTOBUF_EXPORT LocationRecorder {
|
|
public:
|
|
// Construct the file's "root" location.
|
|
LocationRecorder(Parser* parser);
|
|
|
|
// Construct a location that represents a declaration nested within the
|
|
// given parent. E.g. a field's location is nested within the location
|
|
// for a message type. The parent's path will be copied, so you should
|
|
// call AddPath() only to add the path components leading from the parent
|
|
// to the child (as opposed to leading from the root to the child).
|
|
LocationRecorder(const LocationRecorder& parent);
|
|
|
|
// Convenience constructors that call AddPath() one or two times.
|
|
LocationRecorder(const LocationRecorder& parent, int path1);
|
|
LocationRecorder(const LocationRecorder& parent, int path1, int path2);
|
|
|
|
~LocationRecorder();
|
|
|
|
// Add a path component. See SourceCodeInfo.Location.path in
|
|
// descriptor.proto.
|
|
void AddPath(int path_component);
|
|
|
|
// By default the location is considered to start at the current token at
|
|
// the time the LocationRecorder is created. StartAt() sets the start
|
|
// location to the given token instead.
|
|
void StartAt(const io::Tokenizer::Token& token);
|
|
|
|
// By default the location is considered to end at the previous token at
|
|
// the time the LocationRecorder is destroyed. EndAt() sets the end
|
|
// location to the given token instead.
|
|
void EndAt(const io::Tokenizer::Token& token);
|
|
|
|
// Records the start point of this location to the SourceLocationTable that
|
|
// was passed to RecordSourceLocationsTo(), if any. SourceLocationTable
|
|
// is an older way of keeping track of source locations which is still
|
|
// used in some places.
|
|
void RecordLegacyLocation(const Message* descriptor,
|
|
DescriptorPool::ErrorCollector::ErrorLocation location);
|
|
|
|
private:
|
|
Parser* parser_;
|
|
SourceCodeInfo::Location* location_;
|
|
|
|
void Init(const LocationRecorder& parent);
|
|
};
|
|
|
|
// =================================================================
|
|
// Parsers for various language constructs
|
|
|
|
// Parses the "syntax = \"proto2\";" line at the top of the file. Returns
|
|
// false if it failed to parse or if the syntax identifier was not
|
|
// recognized.
|
|
bool ParseSyntaxIdentifier();
|
|
|
|
// These methods parse various individual bits of code. They return
|
|
// false if they completely fail to parse the construct. In this case,
|
|
// it is probably necessary to skip the rest of the statement to recover.
|
|
// However, if these methods return true, it does NOT mean that there
|
|
// were no errors; only that there were no *syntax* errors. For instance,
|
|
// if a service method is defined using proper syntax but uses a primitive
|
|
// type as its input or output, ParseMethodField() still returns true
|
|
// and only reports the error by calling AddError(). In practice, this
|
|
// makes logic much simpler for the caller.
|
|
|
|
// Parse a top-level message, enum, service, etc.
|
|
bool ParseTopLevelStatement(FileDescriptorProto* file,
|
|
const LocationRecorder& root_location);
|
|
|
|
// Parse various language high-level language construrcts.
|
|
bool ParseMessageDefinition(DescriptorProto* message,
|
|
const LocationRecorder& message_location);
|
|
bool ParseEnumDefinition(EnumDescriptorProto* enum_type,
|
|
const LocationRecorder& enum_location);
|
|
bool ParseServiceDefinition(ServiceDescriptorProto* service,
|
|
const LocationRecorder& service_location);
|
|
bool ParsePackage(FileDescriptorProto* file,
|
|
const LocationRecorder& root_location);
|
|
bool ParseImport(string* import_filename,
|
|
const LocationRecorder& root_location,
|
|
int index);
|
|
bool ParseOption(Message* options,
|
|
const LocationRecorder& options_location);
|
|
|
|
// These methods parse the contents of a message, enum, or service type and
|
|
// add them to the given object. They consume the entire block including
|
|
// the beginning and ending brace.
|
|
bool ParseMessageBlock(DescriptorProto* message,
|
|
const LocationRecorder& message_location);
|
|
bool ParseEnumBlock(EnumDescriptorProto* enum_type,
|
|
const LocationRecorder& enum_location);
|
|
bool ParseServiceBlock(ServiceDescriptorProto* service,
|
|
const LocationRecorder& service_location);
|
|
|
|
// Parse one statement within a message, enum, or service block, inclunding
|
|
// final semicolon.
|
|
bool ParseMessageStatement(DescriptorProto* message,
|
|
const LocationRecorder& message_location);
|
|
bool ParseEnumStatement(EnumDescriptorProto* message,
|
|
const LocationRecorder& enum_location);
|
|
bool ParseServiceStatement(ServiceDescriptorProto* message,
|
|
const LocationRecorder& service_location);
|
|
|
|
// Parse a field of a message. If the field is a group, its type will be
|
|
// added to "messages".
|
|
//
|
|
// parent_location and location_field_number_for_nested_type are needed when
|
|
// parsing groups -- we need to generate a nested message type within the
|
|
// parent and record its location accordingly. Since the parent could be
|
|
// either a FileDescriptorProto or a DescriptorProto, we must pass in the
|
|
// correct field number to use.
|
|
bool ParseMessageField(FieldDescriptorProto* field,
|
|
RepeatedPtrField<DescriptorProto>* messages,
|
|
const LocationRecorder& parent_location,
|
|
int location_field_number_for_nested_type,
|
|
const LocationRecorder& field_location);
|
|
|
|
// Parse an "extensions" declaration.
|
|
bool ParseExtensions(DescriptorProto* message,
|
|
const LocationRecorder& extensions_location);
|
|
|
|
// Parse an "extend" declaration. (See also comments for
|
|
// ParseMessageField().)
|
|
bool ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
|
|
RepeatedPtrField<DescriptorProto>* messages,
|
|
const LocationRecorder& parent_location,
|
|
int location_field_number_for_nested_type,
|
|
const LocationRecorder& extend_location);
|
|
|
|
// Parse a single enum value within an enum block.
|
|
bool ParseEnumConstant(EnumValueDescriptorProto* enum_value,
|
|
const LocationRecorder& enum_value_location);
|
|
|
|
// Parse enum constant options, i.e. the list in square brackets at the end
|
|
// of the enum constant value definition.
|
|
bool ParseEnumConstantOptions(EnumValueDescriptorProto* value,
|
|
const LocationRecorder& enum_value_location);
|
|
|
|
// Parse a single method within a service definition.
|
|
bool ParseServiceMethod(MethodDescriptorProto* method,
|
|
const LocationRecorder& method_location);
|
|
|
|
// Parse "required", "optional", or "repeated" and fill in "label"
|
|
// with the value.
|
|
bool ParseLabel(FieldDescriptorProto::Label* label);
|
|
|
|
// Parse a type name and fill in "type" (if it is a primitive) or
|
|
// "type_name" (if it is not) with the type parsed.
|
|
bool ParseType(FieldDescriptorProto::Type* type,
|
|
string* type_name);
|
|
// Parse a user-defined type and fill in "type_name" with the name.
|
|
// If a primitive type is named, it is treated as an error.
|
|
bool ParseUserDefinedType(string* type_name);
|
|
|
|
// Parses field options, i.e. the stuff in square brackets at the end
|
|
// of a field definition. Also parses default value.
|
|
bool ParseFieldOptions(FieldDescriptorProto* field,
|
|
const LocationRecorder& field_location);
|
|
|
|
// Parse the "default" option. This needs special handling because its
|
|
// type is the field's type.
|
|
bool ParseDefaultAssignment(FieldDescriptorProto* field,
|
|
const LocationRecorder& field_location);
|
|
|
|
// Parse a single option name/value pair, e.g. "ctype = CORD". The name
|
|
// identifies a field of the given Message, and the value of that field
|
|
// is set to the parsed value.
|
|
bool ParseOptionAssignment(Message* options,
|
|
const LocationRecorder& options_location);
|
|
|
|
// Parses a single part of a multipart option name. A multipart name consists
|
|
// of names separated by dots. Each name is either an identifier or a series
|
|
// of identifiers separated by dots and enclosed in parentheses. E.g.,
|
|
// "foo.(bar.baz).qux".
|
|
bool ParseOptionNamePart(UninterpretedOption* uninterpreted_option,
|
|
const LocationRecorder& part_location);
|
|
|
|
// Parses a string surrounded by balanced braces. Strips off the outer
|
|
// braces and stores the enclosed string in *value.
|
|
// E.g.,
|
|
// { foo } *value gets 'foo'
|
|
// { foo { bar: box } } *value gets 'foo { bar: box }'
|
|
// {} *value gets ''
|
|
//
|
|
// REQUIRES: LookingAt("{")
|
|
// When finished successfully, we are looking at the first token past
|
|
// the ending brace.
|
|
bool ParseUninterpretedBlock(string* value);
|
|
|
|
// =================================================================
|
|
|
|
io::Tokenizer* input_;
|
|
io::ErrorCollector* error_collector_;
|
|
SourceCodeInfo* source_code_info_;
|
|
SourceLocationTable* source_location_table_; // legacy
|
|
bool had_errors_;
|
|
bool require_syntax_identifier_;
|
|
bool stop_after_syntax_identifier_;
|
|
string syntax_identifier_;
|
|
|
|
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Parser);
|
|
};
|
|
|
|
// A table mapping (descriptor, ErrorLocation) pairs -- as reported by
|
|
// DescriptorPool when validating descriptors -- to line and column numbers
|
|
// within the original source code.
|
|
//
|
|
// This is semi-obsolete: FileDescriptorProto.source_code_info now contains
|
|
// far more complete information about source locations. However, as of this
|
|
// writing you still need to use SourceLocationTable when integrating with
|
|
// DescriptorPool.
|
|
class LIBPROTOBUF_EXPORT SourceLocationTable {
|
|
public:
|
|
SourceLocationTable();
|
|
~SourceLocationTable();
|
|
|
|
// Finds the precise location of the given error and fills in *line and
|
|
// *column with the line and column numbers. If not found, sets *line to
|
|
// -1 and *column to 0 (since line = -1 is used to mean "error has no exact
|
|
// location" in the ErrorCollector interface). Returns true if found, false
|
|
// otherwise.
|
|
bool Find(const Message* descriptor,
|
|
DescriptorPool::ErrorCollector::ErrorLocation location,
|
|
int* line, int* column) const;
|
|
|
|
// Adds a location to the table.
|
|
void Add(const Message* descriptor,
|
|
DescriptorPool::ErrorCollector::ErrorLocation location,
|
|
int line, int column);
|
|
|
|
// Clears the contents of the table.
|
|
void Clear();
|
|
|
|
private:
|
|
typedef map<
|
|
pair<const Message*, DescriptorPool::ErrorCollector::ErrorLocation>,
|
|
pair<int, int> > LocationMap;
|
|
LocationMap location_map_;
|
|
};
|
|
|
|
} // namespace compiler
|
|
} // namespace protobuf
|
|
|
|
} // namespace google
|
|
#endif // GOOGLE_PROTOBUF_COMPILER_PARSER_H__
|