Home | History | Annotate | Download | only in compiler
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // http://code.google.com/p/protobuf/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 // Author: kenton (at) google.com (Kenton Varda)
     32 //  Based on original Protocol Buffers design by
     33 //  Sanjay Ghemawat, Jeff Dean, and others.
     34 
     35 #ifdef _MSC_VER
     36 #include <io.h>
     37 #else
     38 #include <unistd.h>
     39 #endif
     40 #include <sys/types.h>
     41 #include <sys/stat.h>
     42 #include <fcntl.h>
     43 #include <errno.h>
     44 
     45 #include <algorithm>
     46 
     47 #include <google/protobuf/compiler/importer.h>
     48 
     49 #include <google/protobuf/compiler/parser.h>
     50 #include <google/protobuf/io/tokenizer.h>
     51 #include <google/protobuf/io/zero_copy_stream_impl.h>
     52 #include <google/protobuf/stubs/strutil.h>
     53 
     54 namespace google {
     55 namespace protobuf {
     56 namespace compiler {
     57 
     58 #ifdef _WIN32
     59 #ifndef F_OK
     60 #define F_OK 00  // not defined by MSVC for whatever reason
     61 #endif
     62 #include <ctype.h>
     63 #endif
     64 
     65 // Returns true if the text looks like a Windows-style absolute path, starting
     66 // with a drive letter.  Example:  "C:\foo".  TODO(kenton):  Share this with
     67 // copy in command_line_interface.cc?
     68 static bool IsWindowsAbsolutePath(const string& text) {
     69 #if defined(_WIN32) || defined(__CYGWIN__)
     70   return text.size() >= 3 && text[1] == ':' &&
     71          isalpha(text[0]) &&
     72          (text[2] == '/' || text[2] == '\\') &&
     73          text.find_last_of(':') == 1;
     74 #else
     75   return false;
     76 #endif
     77 }
     78 
     79 MultiFileErrorCollector::~MultiFileErrorCollector() {}
     80 
     81 // This class serves two purposes:
     82 // - It implements the ErrorCollector interface (used by Tokenizer and Parser)
     83 //   in terms of MultiFileErrorCollector, using a particular filename.
     84 // - It lets us check if any errors have occurred.
     85 class SourceTreeDescriptorDatabase::SingleFileErrorCollector
     86     : public io::ErrorCollector {
     87  public:
     88   SingleFileErrorCollector(const string& filename,
     89                            MultiFileErrorCollector* multi_file_error_collector)
     90     : filename_(filename),
     91       multi_file_error_collector_(multi_file_error_collector),
     92       had_errors_(false) {}
     93   ~SingleFileErrorCollector() {}
     94 
     95   bool had_errors() { return had_errors_; }
     96 
     97   // implements ErrorCollector ---------------------------------------
     98   void AddError(int line, int column, const string& message) {
     99     if (multi_file_error_collector_ != NULL) {
    100       multi_file_error_collector_->AddError(filename_, line, column, message);
    101     }
    102     had_errors_ = true;
    103   }
    104 
    105  private:
    106   string filename_;
    107   MultiFileErrorCollector* multi_file_error_collector_;
    108   bool had_errors_;
    109 };
    110 
    111 // ===================================================================
    112 
    113 SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase(
    114     SourceTree* source_tree)
    115   : source_tree_(source_tree),
    116     error_collector_(NULL),
    117     using_validation_error_collector_(false),
    118     validation_error_collector_(this) {}
    119 
    120 SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {}
    121 
    122 bool SourceTreeDescriptorDatabase::FindFileByName(
    123     const string& filename, FileDescriptorProto* output) {
    124   scoped_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename));
    125   if (input == NULL) {
    126     if (error_collector_ != NULL) {
    127       error_collector_->AddError(filename, -1, 0, "File not found.");
    128     }
    129     return false;
    130   }
    131 
    132   // Set up the tokenizer and parser.
    133   SingleFileErrorCollector file_error_collector(filename, error_collector_);
    134   io::Tokenizer tokenizer(input.get(), &file_error_collector);
    135 
    136   Parser parser;
    137   if (error_collector_ != NULL) {
    138     parser.RecordErrorsTo(&file_error_collector);
    139   }
    140   if (using_validation_error_collector_) {
    141     parser.RecordSourceLocationsTo(&source_locations_);
    142   }
    143 
    144   // Parse it.
    145   output->set_name(filename);
    146   return parser.Parse(&tokenizer, output) &&
    147          !file_error_collector.had_errors();
    148 }
    149 
    150 bool SourceTreeDescriptorDatabase::FindFileContainingSymbol(
    151     const string& symbol_name, FileDescriptorProto* output) {
    152   return false;
    153 }
    154 
    155 bool SourceTreeDescriptorDatabase::FindFileContainingExtension(
    156     const string& containing_type, int field_number,
    157     FileDescriptorProto* output) {
    158   return false;
    159 }
    160 
    161 // -------------------------------------------------------------------
    162 
    163 SourceTreeDescriptorDatabase::ValidationErrorCollector::
    164 ValidationErrorCollector(SourceTreeDescriptorDatabase* owner)
    165   : owner_(owner) {}
    166 
    167 SourceTreeDescriptorDatabase::ValidationErrorCollector::
    168 ~ValidationErrorCollector() {}
    169 
    170 void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddError(
    171     const string& filename,
    172     const string& element_name,
    173     const Message* descriptor,
    174     ErrorLocation location,
    175     const string& message) {
    176   if (owner_->error_collector_ == NULL) return;
    177 
    178   int line, column;
    179   owner_->source_locations_.Find(descriptor, location, &line, &column);
    180   owner_->error_collector_->AddError(filename, line, column, message);
    181 }
    182 
    183 // ===================================================================
    184 
    185 Importer::Importer(SourceTree* source_tree,
    186                    MultiFileErrorCollector* error_collector)
    187   : database_(source_tree),
    188     pool_(&database_, database_.GetValidationErrorCollector()) {
    189   database_.RecordErrorsTo(error_collector);
    190 }
    191 
    192 Importer::~Importer() {}
    193 
    194 const FileDescriptor* Importer::Import(const string& filename) {
    195   return pool_.FindFileByName(filename);
    196 }
    197 
    198 // ===================================================================
    199 
    200 SourceTree::~SourceTree() {}
    201 
    202 DiskSourceTree::DiskSourceTree() {}
    203 
    204 DiskSourceTree::~DiskSourceTree() {}
    205 
    206 static inline char LastChar(const string& str) {
    207   return str[str.size() - 1];
    208 }
    209 
    210 // Given a path, returns an equivalent path with these changes:
    211 // - On Windows, any backslashes are replaced with forward slashes.
    212 // - Any instances of the directory "." are removed.
    213 // - Any consecutive '/'s are collapsed into a single slash.
    214 // Note that the resulting string may be empty.
    215 //
    216 // TODO(kenton):  It would be nice to handle "..", e.g. so that we can figure
    217 //   out that "foo/bar.proto" is inside "baz/../foo".  However, if baz is a
    218 //   symlink or doesn't exist, then things get complicated, and we can't
    219 //   actually determine this without investigating the filesystem, probably
    220 //   in non-portable ways.  So, we punt.
    221 //
    222 // TODO(kenton):  It would be nice to use realpath() here except that it
    223 //   resolves symbolic links.  This could cause problems if people place
    224 //   symbolic links in their source tree.  For example, if you executed:
    225 //     protoc --proto_path=foo foo/bar/baz.proto
    226 //   then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize
    227 //   to a path which does not appear to be under foo, and thus the compiler
    228 //   will complain that baz.proto is not inside the --proto_path.
    229 static string CanonicalizePath(string path) {
    230 #ifdef _WIN32
    231   // The Win32 API accepts forward slashes as a path delimiter even though
    232   // backslashes are standard.  Let's avoid confusion and use only forward
    233   // slashes.
    234   path = StringReplace(path, "\\", "/", true);
    235 #endif
    236 
    237   vector<string> parts;
    238   vector<string> canonical_parts;
    239   SplitStringUsing(path, "/", &parts);  // Note:  Removes empty parts.
    240   for (int i = 0; i < parts.size(); i++) {
    241     if (parts[i] == ".") {
    242       // Ignore.
    243     } else {
    244       canonical_parts.push_back(parts[i]);
    245     }
    246   }
    247   string result = JoinStrings(canonical_parts, "/");
    248   if (!path.empty() && path[0] == '/') {
    249     // Restore leading slash.
    250     result = '/' + result;
    251   }
    252   if (!path.empty() && LastChar(path) == '/' &&
    253       !result.empty() && LastChar(result) != '/') {
    254     // Restore trailing slash.
    255     result += '/';
    256   }
    257   return result;
    258 }
    259 
    260 static inline bool ContainsParentReference(const string& path) {
    261   return path == ".." ||
    262          HasPrefixString(path, "../") ||
    263          HasSuffixString(path, "/..") ||
    264          path.find("/../") != string::npos;
    265 }
    266 
    267 // Maps a file from an old location to a new one.  Typically, old_prefix is
    268 // a virtual path and new_prefix is its corresponding disk path.  Returns
    269 // false if the filename did not start with old_prefix, otherwise replaces
    270 // old_prefix with new_prefix and stores the result in *result.  Examples:
    271 //   string result;
    272 //   assert(ApplyMapping("foo/bar", "", "baz", &result));
    273 //   assert(result == "baz/foo/bar");
    274 //
    275 //   assert(ApplyMapping("foo/bar", "foo", "baz", &result));
    276 //   assert(result == "baz/bar");
    277 //
    278 //   assert(ApplyMapping("foo", "foo", "bar", &result));
    279 //   assert(result == "bar");
    280 //
    281 //   assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
    282 //   assert(!ApplyMapping("foo/bar", "baz", "qux", &result));
    283 //   assert(!ApplyMapping("foobar", "foo", "baz", &result));
    284 static bool ApplyMapping(const string& filename,
    285                          const string& old_prefix,
    286                          const string& new_prefix,
    287                          string* result) {
    288   if (old_prefix.empty()) {
    289     // old_prefix matches any relative path.
    290     if (ContainsParentReference(filename)) {
    291       // We do not allow the file name to use "..".
    292       return false;
    293     }
    294     if (HasPrefixString(filename, "/") ||
    295         IsWindowsAbsolutePath(filename)) {
    296       // This is an absolute path, so it isn't matched by the empty string.
    297       return false;
    298     }
    299     result->assign(new_prefix);
    300     if (!result->empty()) result->push_back('/');
    301     result->append(filename);
    302     return true;
    303   } else if (HasPrefixString(filename, old_prefix)) {
    304     // old_prefix is a prefix of the filename.  Is it the whole filename?
    305     if (filename.size() == old_prefix.size()) {
    306       // Yep, it's an exact match.
    307       *result = new_prefix;
    308       return true;
    309     } else {
    310       // Not an exact match.  Is the next character a '/'?  Otherwise,
    311       // this isn't actually a match at all.  E.g. the prefix "foo/bar"
    312       // does not match the filename "foo/barbaz".
    313       int after_prefix_start = -1;
    314       if (filename[old_prefix.size()] == '/') {
    315         after_prefix_start = old_prefix.size() + 1;
    316       } else if (filename[old_prefix.size() - 1] == '/') {
    317         // old_prefix is never empty, and canonicalized paths never have
    318         // consecutive '/' characters.
    319         after_prefix_start = old_prefix.size();
    320       }
    321       if (after_prefix_start != -1) {
    322         // Yep.  So the prefixes are directories and the filename is a file
    323         // inside them.
    324         string after_prefix = filename.substr(after_prefix_start);
    325         if (ContainsParentReference(after_prefix)) {
    326           // We do not allow the file name to use "..".
    327           return false;
    328         }
    329         result->assign(new_prefix);
    330         if (!result->empty()) result->push_back('/');
    331         result->append(after_prefix);
    332         return true;
    333       }
    334     }
    335   }
    336 
    337   return false;
    338 }
    339 
    340 void DiskSourceTree::MapPath(const string& virtual_path,
    341                              const string& disk_path) {
    342   mappings_.push_back(Mapping(virtual_path, CanonicalizePath(disk_path)));
    343 }
    344 
    345 DiskSourceTree::DiskFileToVirtualFileResult
    346 DiskSourceTree::DiskFileToVirtualFile(
    347     const string& disk_file,
    348     string* virtual_file,
    349     string* shadowing_disk_file) {
    350   int mapping_index = -1;
    351   string canonical_disk_file = CanonicalizePath(disk_file);
    352 
    353   for (int i = 0; i < mappings_.size(); i++) {
    354     // Apply the mapping in reverse.
    355     if (ApplyMapping(canonical_disk_file, mappings_[i].disk_path,
    356                      mappings_[i].virtual_path, virtual_file)) {
    357       // Success.
    358       mapping_index = i;
    359       break;
    360     }
    361   }
    362 
    363   if (mapping_index == -1) {
    364     return NO_MAPPING;
    365   }
    366 
    367   // Iterate through all mappings with higher precedence and verify that none
    368   // of them map this file to some other existing file.
    369   for (int i = 0; i < mapping_index; i++) {
    370     if (ApplyMapping(*virtual_file, mappings_[i].virtual_path,
    371                      mappings_[i].disk_path, shadowing_disk_file)) {
    372       if (access(shadowing_disk_file->c_str(), F_OK) >= 0) {
    373         // File exists.
    374         return SHADOWED;
    375       }
    376     }
    377   }
    378   shadowing_disk_file->clear();
    379 
    380   // Verify that we can open the file.  Note that this also has the side-effect
    381   // of verifying that we are not canonicalizing away any non-existent
    382   // directories.
    383   scoped_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(disk_file));
    384   if (stream == NULL) {
    385     return CANNOT_OPEN;
    386   }
    387 
    388   return SUCCESS;
    389 }
    390 
    391 bool DiskSourceTree::VirtualFileToDiskFile(const string& virtual_file,
    392                                            string* disk_file) {
    393   scoped_ptr<io::ZeroCopyInputStream> stream(OpenVirtualFile(virtual_file,
    394                                                              disk_file));
    395   return stream != NULL;
    396 }
    397 
    398 io::ZeroCopyInputStream* DiskSourceTree::Open(const string& filename) {
    399   return OpenVirtualFile(filename, NULL);
    400 }
    401 
    402 io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile(
    403     const string& virtual_file,
    404     string* disk_file) {
    405   if (virtual_file != CanonicalizePath(virtual_file) ||
    406       ContainsParentReference(virtual_file)) {
    407     // We do not allow importing of paths containing things like ".." or
    408     // consecutive slashes since the compiler expects files to be uniquely
    409     // identified by file name.
    410     return NULL;
    411   }
    412 
    413   for (int i = 0; i < mappings_.size(); i++) {
    414     string temp_disk_file;
    415     if (ApplyMapping(virtual_file, mappings_[i].virtual_path,
    416                      mappings_[i].disk_path, &temp_disk_file)) {
    417       io::ZeroCopyInputStream* stream = OpenDiskFile(temp_disk_file);
    418       if (stream != NULL) {
    419         if (disk_file != NULL) {
    420           *disk_file = temp_disk_file;
    421         }
    422         return stream;
    423       }
    424 
    425       if (errno == EACCES) {
    426         // The file exists but is not readable.
    427         // TODO(kenton):  Find a way to report this more nicely.
    428         GOOGLE_LOG(WARNING) << "Read access is denied for file: " << temp_disk_file;
    429         return NULL;
    430       }
    431     }
    432   }
    433 
    434   return NULL;
    435 }
    436 
    437 io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile(
    438     const string& filename) {
    439   int file_descriptor;
    440   do {
    441     file_descriptor = open(filename.c_str(), O_RDONLY);
    442   } while (file_descriptor < 0 && errno == EINTR);
    443   if (file_descriptor >= 0) {
    444     io::FileInputStream* result = new io::FileInputStream(file_descriptor);
    445     result->SetCloseOnDelete(true);
    446     return result;
    447   } else {
    448     return NULL;
    449   }
    450 }
    451 
    452 }  // namespace compiler
    453 }  // namespace protobuf
    454 }  // namespace google
    455