1 // Protocol Buffers - Google's data interchange format 2 // Copyright 2008 Google Inc. All rights reserved. 3 // http://code.google.com/p/protobuf/ 4 // 5 // Redistribution and use in source and binary forms, with or without 6 // modification, are permitted provided that the following conditions are 7 // met: 8 // 9 // * Redistributions of source code must retain the above copyright 10 // notice, this list of conditions and the following disclaimer. 11 // * Redistributions in binary form must reproduce the above 12 // copyright notice, this list of conditions and the following disclaimer 13 // in the documentation and/or other materials provided with the 14 // distribution. 15 // * Neither the name of Google Inc. nor the names of its 16 // contributors may be used to endorse or promote products derived from 17 // this software without specific prior written permission. 18 // 19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31 // Author: kenton (at) google.com (Kenton Varda) 32 // Based on original Protocol Buffers design by 33 // Sanjay Ghemawat, Jeff Dean, and others. 34 35 #ifdef _MSC_VER 36 #include <io.h> 37 #else 38 #include <unistd.h> 39 #endif 40 #include <sys/types.h> 41 #include <sys/stat.h> 42 #include <fcntl.h> 43 #include <errno.h> 44 45 #include <algorithm> 46 47 #include <google/protobuf/compiler/importer.h> 48 49 #include <google/protobuf/compiler/parser.h> 50 #include <google/protobuf/io/tokenizer.h> 51 #include <google/protobuf/io/zero_copy_stream_impl.h> 52 #include <google/protobuf/stubs/strutil.h> 53 54 namespace google { 55 namespace protobuf { 56 namespace compiler { 57 58 #ifdef _WIN32 59 #ifndef F_OK 60 #define F_OK 00 // not defined by MSVC for whatever reason 61 #endif 62 #include <ctype.h> 63 #endif 64 65 // Returns true if the text looks like a Windows-style absolute path, starting 66 // with a drive letter. Example: "C:\foo". TODO(kenton): Share this with 67 // copy in command_line_interface.cc? 68 static bool IsWindowsAbsolutePath(const string& text) { 69 #if defined(_WIN32) || defined(__CYGWIN__) 70 return text.size() >= 3 && text[1] == ':' && 71 isalpha(text[0]) && 72 (text[2] == '/' || text[2] == '\\') && 73 text.find_last_of(':') == 1; 74 #else 75 return false; 76 #endif 77 } 78 79 MultiFileErrorCollector::~MultiFileErrorCollector() {} 80 81 // This class serves two purposes: 82 // - It implements the ErrorCollector interface (used by Tokenizer and Parser) 83 // in terms of MultiFileErrorCollector, using a particular filename. 84 // - It lets us check if any errors have occurred. 85 class SourceTreeDescriptorDatabase::SingleFileErrorCollector 86 : public io::ErrorCollector { 87 public: 88 SingleFileErrorCollector(const string& filename, 89 MultiFileErrorCollector* multi_file_error_collector) 90 : filename_(filename), 91 multi_file_error_collector_(multi_file_error_collector), 92 had_errors_(false) {} 93 ~SingleFileErrorCollector() {} 94 95 bool had_errors() { return had_errors_; } 96 97 // implements ErrorCollector --------------------------------------- 98 void AddError(int line, int column, const string& message) { 99 if (multi_file_error_collector_ != NULL) { 100 multi_file_error_collector_->AddError(filename_, line, column, message); 101 } 102 had_errors_ = true; 103 } 104 105 private: 106 string filename_; 107 MultiFileErrorCollector* multi_file_error_collector_; 108 bool had_errors_; 109 }; 110 111 // =================================================================== 112 113 SourceTreeDescriptorDatabase::SourceTreeDescriptorDatabase( 114 SourceTree* source_tree) 115 : source_tree_(source_tree), 116 error_collector_(NULL), 117 using_validation_error_collector_(false), 118 validation_error_collector_(this) {} 119 120 SourceTreeDescriptorDatabase::~SourceTreeDescriptorDatabase() {} 121 122 bool SourceTreeDescriptorDatabase::FindFileByName( 123 const string& filename, FileDescriptorProto* output) { 124 scoped_ptr<io::ZeroCopyInputStream> input(source_tree_->Open(filename)); 125 if (input == NULL) { 126 if (error_collector_ != NULL) { 127 error_collector_->AddError(filename, -1, 0, "File not found."); 128 } 129 return false; 130 } 131 132 // Set up the tokenizer and parser. 133 SingleFileErrorCollector file_error_collector(filename, error_collector_); 134 io::Tokenizer tokenizer(input.get(), &file_error_collector); 135 136 Parser parser; 137 if (error_collector_ != NULL) { 138 parser.RecordErrorsTo(&file_error_collector); 139 } 140 if (using_validation_error_collector_) { 141 parser.RecordSourceLocationsTo(&source_locations_); 142 } 143 144 // Parse it. 145 output->set_name(filename); 146 return parser.Parse(&tokenizer, output) && 147 !file_error_collector.had_errors(); 148 } 149 150 bool SourceTreeDescriptorDatabase::FindFileContainingSymbol( 151 const string& symbol_name, FileDescriptorProto* output) { 152 return false; 153 } 154 155 bool SourceTreeDescriptorDatabase::FindFileContainingExtension( 156 const string& containing_type, int field_number, 157 FileDescriptorProto* output) { 158 return false; 159 } 160 161 // ------------------------------------------------------------------- 162 163 SourceTreeDescriptorDatabase::ValidationErrorCollector:: 164 ValidationErrorCollector(SourceTreeDescriptorDatabase* owner) 165 : owner_(owner) {} 166 167 SourceTreeDescriptorDatabase::ValidationErrorCollector:: 168 ~ValidationErrorCollector() {} 169 170 void SourceTreeDescriptorDatabase::ValidationErrorCollector::AddError( 171 const string& filename, 172 const string& element_name, 173 const Message* descriptor, 174 ErrorLocation location, 175 const string& message) { 176 if (owner_->error_collector_ == NULL) return; 177 178 int line, column; 179 owner_->source_locations_.Find(descriptor, location, &line, &column); 180 owner_->error_collector_->AddError(filename, line, column, message); 181 } 182 183 // =================================================================== 184 185 Importer::Importer(SourceTree* source_tree, 186 MultiFileErrorCollector* error_collector) 187 : database_(source_tree), 188 pool_(&database_, database_.GetValidationErrorCollector()) { 189 database_.RecordErrorsTo(error_collector); 190 } 191 192 Importer::~Importer() {} 193 194 const FileDescriptor* Importer::Import(const string& filename) { 195 return pool_.FindFileByName(filename); 196 } 197 198 // =================================================================== 199 200 SourceTree::~SourceTree() {} 201 202 DiskSourceTree::DiskSourceTree() {} 203 204 DiskSourceTree::~DiskSourceTree() {} 205 206 static inline char LastChar(const string& str) { 207 return str[str.size() - 1]; 208 } 209 210 // Given a path, returns an equivalent path with these changes: 211 // - On Windows, any backslashes are replaced with forward slashes. 212 // - Any instances of the directory "." are removed. 213 // - Any consecutive '/'s are collapsed into a single slash. 214 // Note that the resulting string may be empty. 215 // 216 // TODO(kenton): It would be nice to handle "..", e.g. so that we can figure 217 // out that "foo/bar.proto" is inside "baz/../foo". However, if baz is a 218 // symlink or doesn't exist, then things get complicated, and we can't 219 // actually determine this without investigating the filesystem, probably 220 // in non-portable ways. So, we punt. 221 // 222 // TODO(kenton): It would be nice to use realpath() here except that it 223 // resolves symbolic links. This could cause problems if people place 224 // symbolic links in their source tree. For example, if you executed: 225 // protoc --proto_path=foo foo/bar/baz.proto 226 // then if foo/bar is a symbolic link, foo/bar/baz.proto will canonicalize 227 // to a path which does not appear to be under foo, and thus the compiler 228 // will complain that baz.proto is not inside the --proto_path. 229 static string CanonicalizePath(string path) { 230 #ifdef _WIN32 231 // The Win32 API accepts forward slashes as a path delimiter even though 232 // backslashes are standard. Let's avoid confusion and use only forward 233 // slashes. 234 path = StringReplace(path, "\\", "/", true); 235 #endif 236 237 vector<string> parts; 238 vector<string> canonical_parts; 239 SplitStringUsing(path, "/", &parts); // Note: Removes empty parts. 240 for (int i = 0; i < parts.size(); i++) { 241 if (parts[i] == ".") { 242 // Ignore. 243 } else { 244 canonical_parts.push_back(parts[i]); 245 } 246 } 247 string result = JoinStrings(canonical_parts, "/"); 248 if (!path.empty() && path[0] == '/') { 249 // Restore leading slash. 250 result = '/' + result; 251 } 252 if (!path.empty() && LastChar(path) == '/' && 253 !result.empty() && LastChar(result) != '/') { 254 // Restore trailing slash. 255 result += '/'; 256 } 257 return result; 258 } 259 260 static inline bool ContainsParentReference(const string& path) { 261 return path == ".." || 262 HasPrefixString(path, "../") || 263 HasSuffixString(path, "/..") || 264 path.find("/../") != string::npos; 265 } 266 267 // Maps a file from an old location to a new one. Typically, old_prefix is 268 // a virtual path and new_prefix is its corresponding disk path. Returns 269 // false if the filename did not start with old_prefix, otherwise replaces 270 // old_prefix with new_prefix and stores the result in *result. Examples: 271 // string result; 272 // assert(ApplyMapping("foo/bar", "", "baz", &result)); 273 // assert(result == "baz/foo/bar"); 274 // 275 // assert(ApplyMapping("foo/bar", "foo", "baz", &result)); 276 // assert(result == "baz/bar"); 277 // 278 // assert(ApplyMapping("foo", "foo", "bar", &result)); 279 // assert(result == "bar"); 280 // 281 // assert(!ApplyMapping("foo/bar", "baz", "qux", &result)); 282 // assert(!ApplyMapping("foo/bar", "baz", "qux", &result)); 283 // assert(!ApplyMapping("foobar", "foo", "baz", &result)); 284 static bool ApplyMapping(const string& filename, 285 const string& old_prefix, 286 const string& new_prefix, 287 string* result) { 288 if (old_prefix.empty()) { 289 // old_prefix matches any relative path. 290 if (ContainsParentReference(filename)) { 291 // We do not allow the file name to use "..". 292 return false; 293 } 294 if (HasPrefixString(filename, "/") || 295 IsWindowsAbsolutePath(filename)) { 296 // This is an absolute path, so it isn't matched by the empty string. 297 return false; 298 } 299 result->assign(new_prefix); 300 if (!result->empty()) result->push_back('/'); 301 result->append(filename); 302 return true; 303 } else if (HasPrefixString(filename, old_prefix)) { 304 // old_prefix is a prefix of the filename. Is it the whole filename? 305 if (filename.size() == old_prefix.size()) { 306 // Yep, it's an exact match. 307 *result = new_prefix; 308 return true; 309 } else { 310 // Not an exact match. Is the next character a '/'? Otherwise, 311 // this isn't actually a match at all. E.g. the prefix "foo/bar" 312 // does not match the filename "foo/barbaz". 313 int after_prefix_start = -1; 314 if (filename[old_prefix.size()] == '/') { 315 after_prefix_start = old_prefix.size() + 1; 316 } else if (filename[old_prefix.size() - 1] == '/') { 317 // old_prefix is never empty, and canonicalized paths never have 318 // consecutive '/' characters. 319 after_prefix_start = old_prefix.size(); 320 } 321 if (after_prefix_start != -1) { 322 // Yep. So the prefixes are directories and the filename is a file 323 // inside them. 324 string after_prefix = filename.substr(after_prefix_start); 325 if (ContainsParentReference(after_prefix)) { 326 // We do not allow the file name to use "..". 327 return false; 328 } 329 result->assign(new_prefix); 330 if (!result->empty()) result->push_back('/'); 331 result->append(after_prefix); 332 return true; 333 } 334 } 335 } 336 337 return false; 338 } 339 340 void DiskSourceTree::MapPath(const string& virtual_path, 341 const string& disk_path) { 342 mappings_.push_back(Mapping(virtual_path, CanonicalizePath(disk_path))); 343 } 344 345 DiskSourceTree::DiskFileToVirtualFileResult 346 DiskSourceTree::DiskFileToVirtualFile( 347 const string& disk_file, 348 string* virtual_file, 349 string* shadowing_disk_file) { 350 int mapping_index = -1; 351 string canonical_disk_file = CanonicalizePath(disk_file); 352 353 for (int i = 0; i < mappings_.size(); i++) { 354 // Apply the mapping in reverse. 355 if (ApplyMapping(canonical_disk_file, mappings_[i].disk_path, 356 mappings_[i].virtual_path, virtual_file)) { 357 // Success. 358 mapping_index = i; 359 break; 360 } 361 } 362 363 if (mapping_index == -1) { 364 return NO_MAPPING; 365 } 366 367 // Iterate through all mappings with higher precedence and verify that none 368 // of them map this file to some other existing file. 369 for (int i = 0; i < mapping_index; i++) { 370 if (ApplyMapping(*virtual_file, mappings_[i].virtual_path, 371 mappings_[i].disk_path, shadowing_disk_file)) { 372 if (access(shadowing_disk_file->c_str(), F_OK) >= 0) { 373 // File exists. 374 return SHADOWED; 375 } 376 } 377 } 378 shadowing_disk_file->clear(); 379 380 // Verify that we can open the file. Note that this also has the side-effect 381 // of verifying that we are not canonicalizing away any non-existent 382 // directories. 383 scoped_ptr<io::ZeroCopyInputStream> stream(OpenDiskFile(disk_file)); 384 if (stream == NULL) { 385 return CANNOT_OPEN; 386 } 387 388 return SUCCESS; 389 } 390 391 bool DiskSourceTree::VirtualFileToDiskFile(const string& virtual_file, 392 string* disk_file) { 393 scoped_ptr<io::ZeroCopyInputStream> stream(OpenVirtualFile(virtual_file, 394 disk_file)); 395 return stream != NULL; 396 } 397 398 io::ZeroCopyInputStream* DiskSourceTree::Open(const string& filename) { 399 return OpenVirtualFile(filename, NULL); 400 } 401 402 io::ZeroCopyInputStream* DiskSourceTree::OpenVirtualFile( 403 const string& virtual_file, 404 string* disk_file) { 405 if (virtual_file != CanonicalizePath(virtual_file) || 406 ContainsParentReference(virtual_file)) { 407 // We do not allow importing of paths containing things like ".." or 408 // consecutive slashes since the compiler expects files to be uniquely 409 // identified by file name. 410 return NULL; 411 } 412 413 for (int i = 0; i < mappings_.size(); i++) { 414 string temp_disk_file; 415 if (ApplyMapping(virtual_file, mappings_[i].virtual_path, 416 mappings_[i].disk_path, &temp_disk_file)) { 417 io::ZeroCopyInputStream* stream = OpenDiskFile(temp_disk_file); 418 if (stream != NULL) { 419 if (disk_file != NULL) { 420 *disk_file = temp_disk_file; 421 } 422 return stream; 423 } 424 425 if (errno == EACCES) { 426 // The file exists but is not readable. 427 // TODO(kenton): Find a way to report this more nicely. 428 GOOGLE_LOG(WARNING) << "Read access is denied for file: " << temp_disk_file; 429 return NULL; 430 } 431 } 432 } 433 434 return NULL; 435 } 436 437 io::ZeroCopyInputStream* DiskSourceTree::OpenDiskFile( 438 const string& filename) { 439 int file_descriptor; 440 do { 441 file_descriptor = open(filename.c_str(), O_RDONLY); 442 } while (file_descriptor < 0 && errno == EINTR); 443 if (file_descriptor >= 0) { 444 io::FileInputStream* result = new io::FileInputStream(file_descriptor); 445 result->SetCloseOnDelete(true); 446 return result; 447 } else { 448 return NULL; 449 } 450 } 451 452 } // namespace compiler 453 } // namespace protobuf 454 } // namespace google 455