1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "nacl_io/httpfs/http_fs_node.h" 6 7 #include <assert.h> 8 #include <errno.h> 9 #include <stdio.h> 10 #include <string.h> 11 12 #include <ppapi/c/pp_errors.h> 13 14 #include "nacl_io/httpfs/http_fs.h" 15 #include "nacl_io/kernel_handle.h" 16 #include "nacl_io/osinttypes.h" 17 18 #if defined(WIN32) 19 #define snprintf _snprintf 20 #endif 21 22 namespace nacl_io { 23 24 namespace { 25 26 // If we're attempting to read a partial request, but the server returns a full 27 // request, we need to read all of the data up to the start of our partial 28 // request into a dummy buffer. This is the maximum size of that buffer. 29 const int MAX_READ_BUFFER_SIZE = 64 * 1024; 30 const int32_t STATUSCODE_OK = 200; 31 const int32_t STATUSCODE_PARTIAL_CONTENT = 206; 32 const int32_t STATUSCODE_FORBIDDEN = 403; 33 const int32_t STATUSCODE_NOT_FOUND = 404; 34 const int32_t STATUSCODE_REQUESTED_RANGE_NOT_SATISFIABLE = 416; 35 36 StringMap_t ParseHeaders(const char* headers, int32_t headers_length) { 37 enum State { 38 FINDING_KEY, 39 SKIPPING_WHITESPACE, 40 FINDING_VALUE, 41 }; 42 43 StringMap_t result; 44 std::string key; 45 std::string value; 46 47 State state = FINDING_KEY; 48 const char* start = headers; 49 for (int i = 0; i < headers_length; ++i) { 50 switch (state) { 51 case FINDING_KEY: 52 if (headers[i] == ':') { 53 // Found key. 54 key.assign(start, &headers[i] - start); 55 key = NormalizeHeaderKey(key); 56 state = SKIPPING_WHITESPACE; 57 } 58 break; 59 60 case SKIPPING_WHITESPACE: 61 if (headers[i] == ' ') { 62 // Found whitespace, keep going... 63 break; 64 } 65 66 // Found a non-whitespace, mark this as the start of the value. 67 start = &headers[i]; 68 state = FINDING_VALUE; 69 // Fallthrough to start processing value without incrementing i. 70 71 case FINDING_VALUE: 72 if (headers[i] == '\n') { 73 // Found value. 74 value.assign(start, &headers[i] - start); 75 result[key] = value; 76 start = &headers[i + 1]; 77 state = FINDING_KEY; 78 } 79 break; 80 } 81 } 82 83 return result; 84 } 85 86 bool ParseContentLength(const StringMap_t& headers, off_t* content_length) { 87 StringMap_t::const_iterator iter = headers.find("Content-Length"); 88 if (iter == headers.end()) 89 return false; 90 91 *content_length = strtoull(iter->second.c_str(), NULL, 10); 92 return true; 93 } 94 95 bool ParseContentRange(const StringMap_t& headers, 96 off_t* read_start, 97 off_t* read_end, 98 off_t* entity_length) { 99 StringMap_t::const_iterator iter = headers.find("Content-Range"); 100 if (iter == headers.end()) 101 return false; 102 103 // The key should look like "bytes ##-##/##" or "bytes ##-##/*". The last 104 // value is the entity length, which can potentially be * (i.e. unknown). 105 off_t read_start_int; 106 off_t read_end_int; 107 off_t entity_length_int; 108 int result = sscanf(iter->second.c_str(), 109 "bytes %" SCNi64 "-%" SCNi64 "/%" SCNi64, 110 &read_start_int, 111 &read_end_int, 112 &entity_length_int); 113 114 // The Content-Range header specifies an inclusive range: e.g. the first ten 115 // bytes is "bytes 0-9/*". Convert it to a half-open range by incrementing 116 // read_end. 117 if (result == 2) { 118 if (read_start) 119 *read_start = read_start_int; 120 if (read_end) 121 *read_end = read_end_int + 1; 122 if (entity_length) 123 *entity_length = 0; 124 return true; 125 } else if (result == 3) { 126 if (read_start) 127 *read_start = read_start_int; 128 if (read_end) 129 *read_end = read_end_int + 1; 130 if (entity_length) 131 *entity_length = entity_length_int; 132 return true; 133 } 134 135 return false; 136 } 137 138 // Maps an HTTP |status_code| onto the appropriate errno code. 139 int HTTPStatusCodeToErrno(int status_code) { 140 switch (status_code) { 141 case STATUSCODE_OK: 142 case STATUSCODE_PARTIAL_CONTENT: 143 return 0; 144 case STATUSCODE_FORBIDDEN: 145 return EACCES; 146 case STATUSCODE_NOT_FOUND: 147 return ENOENT; 148 } 149 if (status_code >= 400 && status_code < 500) 150 return EINVAL; 151 return EIO; 152 } 153 154 } // namespace 155 156 void HttpFsNode::SetCachedSize(off_t size) { 157 has_cached_size_ = true; 158 stat_.st_size = size; 159 } 160 161 Error HttpFsNode::FSync() { 162 return EACCES; 163 } 164 165 Error HttpFsNode::GetDents(size_t offs, 166 struct dirent* pdir, 167 size_t count, 168 int* out_bytes) { 169 *out_bytes = 0; 170 return EACCES; 171 } 172 173 Error HttpFsNode::GetStat(struct stat* stat) { 174 AUTO_LOCK(node_lock_); 175 return GetStat_Locked(stat); 176 } 177 178 Error HttpFsNode::Read(const HandleAttr& attr, 179 void* buf, 180 size_t count, 181 int* out_bytes) { 182 *out_bytes = 0; 183 184 AUTO_LOCK(node_lock_); 185 if (cache_content_) { 186 if (cached_data_.empty()) { 187 Error error = DownloadToCache(); 188 if (error) 189 return error; 190 } 191 192 return ReadPartialFromCache(attr, buf, count, out_bytes); 193 } 194 195 return DownloadPartial(attr, buf, count, out_bytes); 196 } 197 198 Error HttpFsNode::FTruncate(off_t size) { 199 return EACCES; 200 } 201 202 Error HttpFsNode::Write(const HandleAttr& attr, 203 const void* buf, 204 size_t count, 205 int* out_bytes) { 206 // TODO(binji): support POST? 207 *out_bytes = 0; 208 return EACCES; 209 } 210 211 Error HttpFsNode::GetSize(off_t* out_size) { 212 *out_size = 0; 213 214 // TODO(binji): This value should be cached properly; i.e. obey the caching 215 // headers returned by the server. 216 AUTO_LOCK(node_lock_); 217 struct stat statbuf; 218 Error error = GetStat_Locked(&statbuf); 219 if (error) 220 return error; 221 222 *out_size = stat_.st_size; 223 return 0; 224 } 225 226 HttpFsNode::HttpFsNode(Filesystem* filesystem, 227 const std::string& url, 228 bool cache_content) 229 : Node(filesystem), 230 url_(url), 231 buffer_(NULL), 232 buffer_len_(0), 233 cache_content_(cache_content), 234 has_cached_size_(false) { 235 // http nodes are read-only by default 236 SetMode(S_IRALL); 237 } 238 239 HttpFsNode::~HttpFsNode() { 240 free(buffer_); 241 } 242 243 Error HttpFsNode::GetStat_Locked(struct stat* stat) { 244 // Assume we need to 'HEAD' if we do not know the size, otherwise, assume 245 // that the information is constant. We can add a timeout if needed. 246 HttpFs* filesystem = static_cast<HttpFs*>(filesystem_); 247 if (!has_cached_size_ || !filesystem->cache_stat_) { 248 StringMap_t headers; 249 ScopedResource loader(filesystem_->ppapi()); 250 ScopedResource request(filesystem_->ppapi()); 251 ScopedResource response(filesystem_->ppapi()); 252 int32_t statuscode; 253 StringMap_t response_headers; 254 const char* method = "HEAD"; 255 256 if (filesystem->is_blob_url_) { 257 // Blob URLs do not support HEAD requests, but do give the content length 258 // in their response headers. We issue a single-byte GET request to 259 // retrieve the content length. 260 method = "GET"; 261 headers["Range"] = "bytes=0-0"; 262 } 263 264 Error error = OpenUrl(method, 265 &headers, 266 &loader, 267 &request, 268 &response, 269 &statuscode, 270 &response_headers); 271 if (error) 272 return error; 273 274 off_t entity_length; 275 if (ParseContentRange(response_headers, NULL, NULL, &entity_length)) { 276 SetCachedSize(static_cast<off_t>(entity_length)); 277 } else if (ParseContentLength(response_headers, &entity_length)) { 278 SetCachedSize(static_cast<off_t>(entity_length)); 279 } else if (cache_content_) { 280 // The server didn't give a content length; download the data to memory 281 // via DownloadToCache, which will also set stat_.st_size; 282 error = DownloadToCache(); 283 if (error) 284 return error; 285 } else { 286 // The user doesn't want to cache content, but we didn't get a 287 // "Content-Length" header. Read the entire entity, and throw it away. 288 // Don't use DownloadToCache, as that will still allocate enough memory 289 // for the entire entity. 290 off_t bytes_read; 291 error = DownloadToTemp(&bytes_read); 292 if (error) 293 return error; 294 295 SetCachedSize(bytes_read); 296 } 297 298 stat_.st_atime = 0; // TODO(binji): Use "Last-Modified". 299 stat_.st_mtime = 0; 300 stat_.st_ctime = 0; 301 302 SetType(S_IFREG); 303 } 304 305 // Fill the stat structure if provided 306 if (stat) 307 *stat = stat_; 308 309 return 0; 310 } 311 312 Error HttpFsNode::OpenUrl(const char* method, 313 StringMap_t* request_headers, 314 ScopedResource* out_loader, 315 ScopedResource* out_request, 316 ScopedResource* out_response, 317 int32_t* out_statuscode, 318 StringMap_t* out_response_headers) { 319 // Clear all out parameters. 320 *out_statuscode = 0; 321 out_response_headers->clear(); 322 323 // Assume lock_ is already held. 324 PepperInterface* ppapi = filesystem_->ppapi(); 325 326 HttpFs* mount_http = static_cast<HttpFs*>(filesystem_); 327 out_request->Reset( 328 mount_http->MakeUrlRequestInfo(url_, method, request_headers)); 329 if (!out_request->pp_resource()) 330 return EINVAL; 331 332 URLLoaderInterface* loader_interface = ppapi->GetURLLoaderInterface(); 333 URLResponseInfoInterface* response_interface = 334 ppapi->GetURLResponseInfoInterface(); 335 VarInterface* var_interface = ppapi->GetVarInterface(); 336 337 out_loader->Reset(loader_interface->Create(ppapi->GetInstance())); 338 if (!out_loader->pp_resource()) 339 return EINVAL; 340 341 int32_t result = loader_interface->Open(out_loader->pp_resource(), 342 out_request->pp_resource(), 343 PP_BlockUntilComplete()); 344 if (result != PP_OK) 345 return PPErrorToErrno(result); 346 347 out_response->Reset( 348 loader_interface->GetResponseInfo(out_loader->pp_resource())); 349 if (!out_response->pp_resource()) 350 return EINVAL; 351 352 // Get response statuscode. 353 PP_Var statuscode = response_interface->GetProperty( 354 out_response->pp_resource(), PP_URLRESPONSEPROPERTY_STATUSCODE); 355 356 if (statuscode.type != PP_VARTYPE_INT32) 357 return EINVAL; 358 359 *out_statuscode = statuscode.value.as_int; 360 361 // Only accept OK or Partial Content. 362 Error error = HTTPStatusCodeToErrno(*out_statuscode); 363 if (error) 364 return error; 365 366 // Get response headers. 367 PP_Var response_headers_var = response_interface->GetProperty( 368 out_response->pp_resource(), PP_URLRESPONSEPROPERTY_HEADERS); 369 370 uint32_t response_headers_length; 371 const char* response_headers_str = 372 var_interface->VarToUtf8(response_headers_var, &response_headers_length); 373 374 *out_response_headers = 375 ParseHeaders(response_headers_str, response_headers_length); 376 377 var_interface->Release(response_headers_var); 378 379 return 0; 380 } 381 382 Error HttpFsNode::DownloadToCache() { 383 StringMap_t headers; 384 ScopedResource loader(filesystem_->ppapi()); 385 ScopedResource request(filesystem_->ppapi()); 386 ScopedResource response(filesystem_->ppapi()); 387 int32_t statuscode; 388 StringMap_t response_headers; 389 Error error = OpenUrl("GET", 390 &headers, 391 &loader, 392 &request, 393 &response, 394 &statuscode, 395 &response_headers); 396 if (error) 397 return error; 398 399 off_t content_length = 0; 400 if (ParseContentLength(response_headers, &content_length)) { 401 cached_data_.resize(content_length); 402 int real_size; 403 error = ReadResponseToBuffer( 404 loader, cached_data_.data(), content_length, &real_size); 405 if (error) 406 return error; 407 408 SetCachedSize(real_size); 409 cached_data_.resize(real_size); 410 return 0; 411 } 412 413 int bytes_read; 414 error = ReadEntireResponseToCache(loader, &bytes_read); 415 if (error) 416 return error; 417 418 SetCachedSize(bytes_read); 419 return 0; 420 } 421 422 Error HttpFsNode::ReadPartialFromCache(const HandleAttr& attr, 423 void* buf, 424 int count, 425 int* out_bytes) { 426 *out_bytes = 0; 427 off_t size = cached_data_.size(); 428 429 if (attr.offs + count > size) 430 count = size - attr.offs; 431 432 if (count <= 0) 433 return 0; 434 435 memcpy(buf, &cached_data_.data()[attr.offs], count); 436 *out_bytes = count; 437 return 0; 438 } 439 440 Error HttpFsNode::DownloadPartial(const HandleAttr& attr, 441 void* buf, 442 off_t count, 443 int* out_bytes) { 444 *out_bytes = 0; 445 446 StringMap_t headers; 447 448 char buffer[100]; 449 // Range request is inclusive: 0-99 returns 100 bytes. 450 snprintf(&buffer[0], 451 sizeof(buffer), 452 "bytes=%" PRIi64 "-%" PRIi64, 453 attr.offs, 454 attr.offs + count - 1); 455 headers["Range"] = buffer; 456 457 ScopedResource loader(filesystem_->ppapi()); 458 ScopedResource request(filesystem_->ppapi()); 459 ScopedResource response(filesystem_->ppapi()); 460 int32_t statuscode; 461 StringMap_t response_headers; 462 Error error = OpenUrl("GET", 463 &headers, 464 &loader, 465 &request, 466 &response, 467 &statuscode, 468 &response_headers); 469 if (error) { 470 if (statuscode == STATUSCODE_REQUESTED_RANGE_NOT_SATISFIABLE) { 471 // We're likely trying to read past the end. Return 0 bytes. 472 *out_bytes = 0; 473 return 0; 474 } 475 476 return error; 477 } 478 479 off_t read_start = 0; 480 if (statuscode == STATUSCODE_OK) { 481 // No partial result, read everything starting from the part we care about. 482 off_t content_length; 483 if (ParseContentLength(response_headers, &content_length)) { 484 if (attr.offs >= content_length) 485 return EINVAL; 486 487 // Clamp count, if trying to read past the end of the file. 488 if (attr.offs + count > content_length) { 489 count = content_length - attr.offs; 490 } 491 } 492 } else if (statuscode == STATUSCODE_PARTIAL_CONTENT) { 493 // Determine from the headers where we are reading. 494 off_t read_end; 495 off_t entity_length; 496 if (ParseContentRange( 497 response_headers, &read_start, &read_end, &entity_length)) { 498 if (read_start > attr.offs || read_start > read_end) { 499 // If this error occurs, the server is returning bogus values. 500 return EINVAL; 501 } 502 503 // Clamp count, if trying to read past the end of the file. 504 count = std::min(read_end - read_start, count); 505 } else { 506 // Partial Content without Content-Range. Assume that the server gave us 507 // exactly what we asked for. This can happen even when the server 508 // returns 200 -- the cache may return 206 in this case, but not modify 509 // the headers. 510 read_start = attr.offs; 511 } 512 } 513 514 if (read_start < attr.offs) { 515 // We aren't yet at the location where we want to start reading. Read into 516 // our dummy buffer until then. 517 int bytes_to_read = attr.offs - read_start; 518 int bytes_read; 519 error = ReadResponseToTemp(loader, bytes_to_read, &bytes_read); 520 if (error) 521 return error; 522 523 // Tried to read past the end of the entity. 524 if (bytes_read < bytes_to_read) { 525 *out_bytes = 0; 526 return 0; 527 } 528 } 529 530 return ReadResponseToBuffer(loader, buf, count, out_bytes); 531 } 532 533 Error HttpFsNode::DownloadToTemp(off_t* out_bytes) { 534 StringMap_t headers; 535 ScopedResource loader(filesystem_->ppapi()); 536 ScopedResource request(filesystem_->ppapi()); 537 ScopedResource response(filesystem_->ppapi()); 538 int32_t statuscode; 539 StringMap_t response_headers; 540 Error error = OpenUrl("GET", 541 &headers, 542 &loader, 543 &request, 544 &response, 545 &statuscode, 546 &response_headers); 547 if (error) 548 return error; 549 550 off_t content_length = 0; 551 if (ParseContentLength(response_headers, &content_length)) { 552 *out_bytes = content_length; 553 return 0; 554 } 555 556 return ReadEntireResponseToTemp(loader, out_bytes); 557 } 558 559 Error HttpFsNode::ReadEntireResponseToTemp(const ScopedResource& loader, 560 off_t* out_bytes) { 561 *out_bytes = 0; 562 563 const int kBytesToRead = MAX_READ_BUFFER_SIZE; 564 buffer_ = (char*)realloc(buffer_, kBytesToRead); 565 assert(buffer_); 566 if (!buffer_) { 567 buffer_len_ = 0; 568 return ENOMEM; 569 } 570 buffer_len_ = kBytesToRead; 571 572 while (true) { 573 int bytes_read; 574 Error error = 575 ReadResponseToBuffer(loader, buffer_, kBytesToRead, &bytes_read); 576 if (error) 577 return error; 578 579 *out_bytes += bytes_read; 580 581 if (bytes_read < kBytesToRead) 582 return 0; 583 } 584 } 585 586 Error HttpFsNode::ReadEntireResponseToCache(const ScopedResource& loader, 587 int* out_bytes) { 588 *out_bytes = 0; 589 const int kBytesToRead = MAX_READ_BUFFER_SIZE; 590 591 while (true) { 592 // Always recalculate the buf pointer because it may have moved when 593 // cached_data_ was resized. 594 cached_data_.resize(*out_bytes + kBytesToRead); 595 void* buf = cached_data_.data() + *out_bytes; 596 597 int bytes_read; 598 Error error = ReadResponseToBuffer(loader, buf, kBytesToRead, &bytes_read); 599 if (error) 600 return error; 601 602 *out_bytes += bytes_read; 603 604 if (bytes_read < kBytesToRead) { 605 // Shrink the cached data buffer to the correct size. 606 cached_data_.resize(*out_bytes); 607 return 0; 608 } 609 } 610 } 611 612 Error HttpFsNode::ReadResponseToTemp(const ScopedResource& loader, 613 int count, 614 int* out_bytes) { 615 *out_bytes = 0; 616 617 if (buffer_len_ < count) { 618 int new_len = std::min(count, MAX_READ_BUFFER_SIZE); 619 buffer_ = (char*)realloc(buffer_, new_len); 620 assert(buffer_); 621 if (!buffer_) { 622 buffer_len_ = 0; 623 return ENOMEM; 624 } 625 buffer_len_ = new_len; 626 } 627 628 int bytes_left = count; 629 while (bytes_left > 0) { 630 int bytes_to_read = std::min(bytes_left, buffer_len_); 631 int bytes_read; 632 Error error = ReadResponseToBuffer( 633 loader, buffer_, bytes_to_read, &bytes_read); 634 if (error) 635 return error; 636 637 if (bytes_read == 0) 638 return 0; 639 640 bytes_left -= bytes_read; 641 *out_bytes += bytes_read; 642 } 643 644 return 0; 645 } 646 647 Error HttpFsNode::ReadResponseToBuffer(const ScopedResource& loader, 648 void* buf, 649 int count, 650 int* out_bytes) { 651 *out_bytes = 0; 652 653 PepperInterface* ppapi = filesystem_->ppapi(); 654 URLLoaderInterface* loader_interface = ppapi->GetURLLoaderInterface(); 655 656 char* out_buffer = static_cast<char*>(buf); 657 int bytes_to_read = count; 658 while (bytes_to_read > 0) { 659 int bytes_read = 660 loader_interface->ReadResponseBody(loader.pp_resource(), 661 out_buffer, 662 bytes_to_read, 663 PP_BlockUntilComplete()); 664 665 if (bytes_read == 0) { 666 // This is not an error -- it may just be that we were trying to read 667 // more data than exists. 668 *out_bytes = count - bytes_to_read; 669 return 0; 670 } 671 672 if (bytes_read < 0) 673 return PPErrorToErrno(bytes_read); 674 675 assert(bytes_read <= bytes_to_read); 676 bytes_to_read -= bytes_read; 677 out_buffer += bytes_read; 678 } 679 680 *out_bytes = count; 681 return 0; 682 } 683 684 } // namespace nacl_io 685