Home | History | Annotate | Download | only in httpfs
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "nacl_io/httpfs/http_fs_node.h"
      6 
      7 #include <assert.h>
      8 #include <errno.h>
      9 #include <stdio.h>
     10 #include <string.h>
     11 
     12 #include <ppapi/c/pp_errors.h>
     13 
     14 #include "nacl_io/httpfs/http_fs.h"
     15 #include "nacl_io/kernel_handle.h"
     16 #include "nacl_io/osinttypes.h"
     17 
     18 #if defined(WIN32)
     19 #define snprintf _snprintf
     20 #endif
     21 
     22 namespace nacl_io {
     23 
     24 namespace {
     25 
     26 // If we're attempting to read a partial request, but the server returns a full
     27 // request, we need to read all of the data up to the start of our partial
     28 // request into a dummy buffer. This is the maximum size of that buffer.
     29 const int MAX_READ_BUFFER_SIZE = 64 * 1024;
     30 const int32_t STATUSCODE_OK = 200;
     31 const int32_t STATUSCODE_PARTIAL_CONTENT = 206;
     32 const int32_t STATUSCODE_FORBIDDEN = 403;
     33 const int32_t STATUSCODE_NOT_FOUND = 404;
     34 const int32_t STATUSCODE_REQUESTED_RANGE_NOT_SATISFIABLE = 416;
     35 
     36 StringMap_t ParseHeaders(const char* headers, int32_t headers_length) {
     37   enum State {
     38     FINDING_KEY,
     39     SKIPPING_WHITESPACE,
     40     FINDING_VALUE,
     41   };
     42 
     43   StringMap_t result;
     44   std::string key;
     45   std::string value;
     46 
     47   State state = FINDING_KEY;
     48   const char* start = headers;
     49   for (int i = 0; i < headers_length; ++i) {
     50     switch (state) {
     51       case FINDING_KEY:
     52         if (headers[i] == ':') {
     53           // Found key.
     54           key.assign(start, &headers[i] - start);
     55           key = NormalizeHeaderKey(key);
     56           state = SKIPPING_WHITESPACE;
     57         }
     58         break;
     59 
     60       case SKIPPING_WHITESPACE:
     61         if (headers[i] == ' ') {
     62           // Found whitespace, keep going...
     63           break;
     64         }
     65 
     66         // Found a non-whitespace, mark this as the start of the value.
     67         start = &headers[i];
     68         state = FINDING_VALUE;
     69       // Fallthrough to start processing value without incrementing i.
     70 
     71       case FINDING_VALUE:
     72         if (headers[i] == '\n') {
     73           // Found value.
     74           value.assign(start, &headers[i] - start);
     75           result[key] = value;
     76           start = &headers[i + 1];
     77           state = FINDING_KEY;
     78         }
     79         break;
     80     }
     81   }
     82 
     83   return result;
     84 }
     85 
     86 bool ParseContentLength(const StringMap_t& headers, off_t* content_length) {
     87   StringMap_t::const_iterator iter = headers.find("Content-Length");
     88   if (iter == headers.end())
     89     return false;
     90 
     91   *content_length = strtoull(iter->second.c_str(), NULL, 10);
     92   return true;
     93 }
     94 
     95 bool ParseContentRange(const StringMap_t& headers,
     96                        off_t* read_start,
     97                        off_t* read_end,
     98                        off_t* entity_length) {
     99   StringMap_t::const_iterator iter = headers.find("Content-Range");
    100   if (iter == headers.end())
    101     return false;
    102 
    103   // The key should look like "bytes ##-##/##" or "bytes ##-##/*". The last
    104   // value is the entity length, which can potentially be * (i.e. unknown).
    105   off_t read_start_int;
    106   off_t read_end_int;
    107   off_t entity_length_int;
    108   int result = sscanf(iter->second.c_str(),
    109                       "bytes %" SCNi64 "-%" SCNi64 "/%" SCNi64,
    110                       &read_start_int,
    111                       &read_end_int,
    112                       &entity_length_int);
    113 
    114   // The Content-Range header specifies an inclusive range: e.g. the first ten
    115   // bytes is "bytes 0-9/*". Convert it to a half-open range by incrementing
    116   // read_end.
    117   if (result == 2) {
    118     if (read_start)
    119       *read_start = read_start_int;
    120     if (read_end)
    121       *read_end = read_end_int + 1;
    122     if (entity_length)
    123       *entity_length = 0;
    124     return true;
    125   } else if (result == 3) {
    126     if (read_start)
    127       *read_start = read_start_int;
    128     if (read_end)
    129       *read_end = read_end_int + 1;
    130     if (entity_length)
    131       *entity_length = entity_length_int;
    132     return true;
    133   }
    134 
    135   return false;
    136 }
    137 
    138 // Maps an HTTP |status_code| onto the appropriate errno code.
    139 int HTTPStatusCodeToErrno(int status_code) {
    140   switch (status_code) {
    141     case STATUSCODE_OK:
    142     case STATUSCODE_PARTIAL_CONTENT:
    143       return 0;
    144     case STATUSCODE_FORBIDDEN:
    145       return EACCES;
    146     case STATUSCODE_NOT_FOUND:
    147       return ENOENT;
    148   }
    149   if (status_code >= 400 && status_code < 500)
    150     return EINVAL;
    151   return EIO;
    152 }
    153 
    154 }  // namespace
    155 
    156 void HttpFsNode::SetCachedSize(off_t size) {
    157   has_cached_size_ = true;
    158   stat_.st_size = size;
    159 }
    160 
    161 Error HttpFsNode::FSync() {
    162   return EACCES;
    163 }
    164 
    165 Error HttpFsNode::GetDents(size_t offs,
    166                            struct dirent* pdir,
    167                            size_t count,
    168                            int* out_bytes) {
    169   *out_bytes = 0;
    170   return EACCES;
    171 }
    172 
    173 Error HttpFsNode::GetStat(struct stat* stat) {
    174   AUTO_LOCK(node_lock_);
    175   return GetStat_Locked(stat);
    176 }
    177 
    178 Error HttpFsNode::Read(const HandleAttr& attr,
    179                        void* buf,
    180                        size_t count,
    181                        int* out_bytes) {
    182   *out_bytes = 0;
    183 
    184   AUTO_LOCK(node_lock_);
    185   if (cache_content_) {
    186     if (cached_data_.empty()) {
    187       Error error = DownloadToCache();
    188       if (error)
    189         return error;
    190     }
    191 
    192     return ReadPartialFromCache(attr, buf, count, out_bytes);
    193   }
    194 
    195   return DownloadPartial(attr, buf, count, out_bytes);
    196 }
    197 
    198 Error HttpFsNode::FTruncate(off_t size) {
    199   return EACCES;
    200 }
    201 
    202 Error HttpFsNode::Write(const HandleAttr& attr,
    203                         const void* buf,
    204                         size_t count,
    205                         int* out_bytes) {
    206   // TODO(binji): support POST?
    207   *out_bytes = 0;
    208   return EACCES;
    209 }
    210 
    211 Error HttpFsNode::GetSize(off_t* out_size) {
    212   *out_size = 0;
    213 
    214   // TODO(binji): This value should be cached properly; i.e. obey the caching
    215   // headers returned by the server.
    216   AUTO_LOCK(node_lock_);
    217   struct stat statbuf;
    218   Error error = GetStat_Locked(&statbuf);
    219   if (error)
    220     return error;
    221 
    222   *out_size = stat_.st_size;
    223   return 0;
    224 }
    225 
    226 HttpFsNode::HttpFsNode(Filesystem* filesystem,
    227                        const std::string& url,
    228                        bool cache_content)
    229     : Node(filesystem),
    230       url_(url),
    231       buffer_(NULL),
    232       buffer_len_(0),
    233       cache_content_(cache_content),
    234       has_cached_size_(false) {
    235   // http nodes are read-only by default
    236   SetMode(S_IRALL);
    237 }
    238 
    239 HttpFsNode::~HttpFsNode() {
    240   free(buffer_);
    241 }
    242 
    243 Error HttpFsNode::GetStat_Locked(struct stat* stat) {
    244   // Assume we need to 'HEAD' if we do not know the size, otherwise, assume
    245   // that the information is constant.  We can add a timeout if needed.
    246   HttpFs* filesystem = static_cast<HttpFs*>(filesystem_);
    247   if (!has_cached_size_ || !filesystem->cache_stat_) {
    248     StringMap_t headers;
    249     ScopedResource loader(filesystem_->ppapi());
    250     ScopedResource request(filesystem_->ppapi());
    251     ScopedResource response(filesystem_->ppapi());
    252     int32_t statuscode;
    253     StringMap_t response_headers;
    254     const char* method = "HEAD";
    255 
    256     if (filesystem->is_blob_url_) {
    257       // Blob URLs do not support HEAD requests, but do give the content length
    258       // in their response headers. We issue a single-byte GET request to
    259       // retrieve the content length.
    260       method = "GET";
    261       headers["Range"] = "bytes=0-0";
    262     }
    263 
    264     Error error = OpenUrl(method,
    265                           &headers,
    266                           &loader,
    267                           &request,
    268                           &response,
    269                           &statuscode,
    270                           &response_headers);
    271     if (error)
    272       return error;
    273 
    274     off_t entity_length;
    275     if (ParseContentRange(response_headers, NULL, NULL, &entity_length)) {
    276       SetCachedSize(static_cast<off_t>(entity_length));
    277     } else if (ParseContentLength(response_headers, &entity_length)) {
    278       SetCachedSize(static_cast<off_t>(entity_length));
    279     } else if (cache_content_) {
    280       // The server didn't give a content length; download the data to memory
    281       // via DownloadToCache, which will also set stat_.st_size;
    282       error = DownloadToCache();
    283       if (error)
    284         return error;
    285     } else {
    286       // The user doesn't want to cache content, but we didn't get a
    287       // "Content-Length" header. Read the entire entity, and throw it away.
    288       // Don't use DownloadToCache, as that will still allocate enough memory
    289       // for the entire entity.
    290       off_t bytes_read;
    291       error = DownloadToTemp(&bytes_read);
    292       if (error)
    293         return error;
    294 
    295       SetCachedSize(bytes_read);
    296     }
    297 
    298     stat_.st_atime = 0;  // TODO(binji): Use "Last-Modified".
    299     stat_.st_mtime = 0;
    300     stat_.st_ctime = 0;
    301 
    302     SetType(S_IFREG);
    303   }
    304 
    305   // Fill the stat structure if provided
    306   if (stat)
    307     *stat = stat_;
    308 
    309   return 0;
    310 }
    311 
    312 Error HttpFsNode::OpenUrl(const char* method,
    313                           StringMap_t* request_headers,
    314                           ScopedResource* out_loader,
    315                           ScopedResource* out_request,
    316                           ScopedResource* out_response,
    317                           int32_t* out_statuscode,
    318                           StringMap_t* out_response_headers) {
    319   // Clear all out parameters.
    320   *out_statuscode = 0;
    321   out_response_headers->clear();
    322 
    323   // Assume lock_ is already held.
    324   PepperInterface* ppapi = filesystem_->ppapi();
    325 
    326   HttpFs* mount_http = static_cast<HttpFs*>(filesystem_);
    327   out_request->Reset(
    328       mount_http->MakeUrlRequestInfo(url_, method, request_headers));
    329   if (!out_request->pp_resource())
    330     return EINVAL;
    331 
    332   URLLoaderInterface* loader_interface = ppapi->GetURLLoaderInterface();
    333   URLResponseInfoInterface* response_interface =
    334       ppapi->GetURLResponseInfoInterface();
    335   VarInterface* var_interface = ppapi->GetVarInterface();
    336 
    337   out_loader->Reset(loader_interface->Create(ppapi->GetInstance()));
    338   if (!out_loader->pp_resource())
    339     return EINVAL;
    340 
    341   int32_t result = loader_interface->Open(out_loader->pp_resource(),
    342                                           out_request->pp_resource(),
    343                                           PP_BlockUntilComplete());
    344   if (result != PP_OK)
    345     return PPErrorToErrno(result);
    346 
    347   out_response->Reset(
    348       loader_interface->GetResponseInfo(out_loader->pp_resource()));
    349   if (!out_response->pp_resource())
    350     return EINVAL;
    351 
    352   // Get response statuscode.
    353   PP_Var statuscode = response_interface->GetProperty(
    354       out_response->pp_resource(), PP_URLRESPONSEPROPERTY_STATUSCODE);
    355 
    356   if (statuscode.type != PP_VARTYPE_INT32)
    357     return EINVAL;
    358 
    359   *out_statuscode = statuscode.value.as_int;
    360 
    361   // Only accept OK or Partial Content.
    362   Error error = HTTPStatusCodeToErrno(*out_statuscode);
    363   if (error)
    364     return error;
    365 
    366   // Get response headers.
    367   PP_Var response_headers_var = response_interface->GetProperty(
    368       out_response->pp_resource(), PP_URLRESPONSEPROPERTY_HEADERS);
    369 
    370   uint32_t response_headers_length;
    371   const char* response_headers_str =
    372       var_interface->VarToUtf8(response_headers_var, &response_headers_length);
    373 
    374   *out_response_headers =
    375       ParseHeaders(response_headers_str, response_headers_length);
    376 
    377   var_interface->Release(response_headers_var);
    378 
    379   return 0;
    380 }
    381 
    382 Error HttpFsNode::DownloadToCache() {
    383   StringMap_t headers;
    384   ScopedResource loader(filesystem_->ppapi());
    385   ScopedResource request(filesystem_->ppapi());
    386   ScopedResource response(filesystem_->ppapi());
    387   int32_t statuscode;
    388   StringMap_t response_headers;
    389   Error error = OpenUrl("GET",
    390                         &headers,
    391                         &loader,
    392                         &request,
    393                         &response,
    394                         &statuscode,
    395                         &response_headers);
    396   if (error)
    397     return error;
    398 
    399   off_t content_length = 0;
    400   if (ParseContentLength(response_headers, &content_length)) {
    401     cached_data_.resize(content_length);
    402     int real_size;
    403     error = ReadResponseToBuffer(
    404         loader, cached_data_.data(), content_length, &real_size);
    405     if (error)
    406       return error;
    407 
    408     SetCachedSize(real_size);
    409     cached_data_.resize(real_size);
    410     return 0;
    411   }
    412 
    413   int bytes_read;
    414   error = ReadEntireResponseToCache(loader, &bytes_read);
    415   if (error)
    416     return error;
    417 
    418   SetCachedSize(bytes_read);
    419   return 0;
    420 }
    421 
    422 Error HttpFsNode::ReadPartialFromCache(const HandleAttr& attr,
    423                                        void* buf,
    424                                        int count,
    425                                        int* out_bytes) {
    426   *out_bytes = 0;
    427   off_t size = cached_data_.size();
    428 
    429   if (attr.offs + count > size)
    430     count = size - attr.offs;
    431 
    432   if (count <= 0)
    433     return 0;
    434 
    435   memcpy(buf, &cached_data_.data()[attr.offs], count);
    436   *out_bytes = count;
    437   return 0;
    438 }
    439 
    440 Error HttpFsNode::DownloadPartial(const HandleAttr& attr,
    441                                   void* buf,
    442                                   off_t count,
    443                                   int* out_bytes) {
    444   *out_bytes = 0;
    445 
    446   StringMap_t headers;
    447 
    448   char buffer[100];
    449   // Range request is inclusive: 0-99 returns 100 bytes.
    450   snprintf(&buffer[0],
    451            sizeof(buffer),
    452            "bytes=%" PRIi64 "-%" PRIi64,
    453            attr.offs,
    454            attr.offs + count - 1);
    455   headers["Range"] = buffer;
    456 
    457   ScopedResource loader(filesystem_->ppapi());
    458   ScopedResource request(filesystem_->ppapi());
    459   ScopedResource response(filesystem_->ppapi());
    460   int32_t statuscode;
    461   StringMap_t response_headers;
    462   Error error = OpenUrl("GET",
    463                         &headers,
    464                         &loader,
    465                         &request,
    466                         &response,
    467                         &statuscode,
    468                         &response_headers);
    469   if (error) {
    470     if (statuscode == STATUSCODE_REQUESTED_RANGE_NOT_SATISFIABLE) {
    471       // We're likely trying to read past the end. Return 0 bytes.
    472       *out_bytes = 0;
    473       return 0;
    474     }
    475 
    476     return error;
    477   }
    478 
    479   off_t read_start = 0;
    480   if (statuscode == STATUSCODE_OK) {
    481     // No partial result, read everything starting from the part we care about.
    482     off_t content_length;
    483     if (ParseContentLength(response_headers, &content_length)) {
    484       if (attr.offs >= content_length)
    485         return EINVAL;
    486 
    487       // Clamp count, if trying to read past the end of the file.
    488       if (attr.offs + count > content_length) {
    489         count = content_length - attr.offs;
    490       }
    491     }
    492   } else if (statuscode == STATUSCODE_PARTIAL_CONTENT) {
    493     // Determine from the headers where we are reading.
    494     off_t read_end;
    495     off_t entity_length;
    496     if (ParseContentRange(
    497             response_headers, &read_start, &read_end, &entity_length)) {
    498       if (read_start > attr.offs || read_start > read_end) {
    499         // If this error occurs, the server is returning bogus values.
    500         return EINVAL;
    501       }
    502 
    503       // Clamp count, if trying to read past the end of the file.
    504       count = std::min(read_end - read_start, count);
    505     } else {
    506       // Partial Content without Content-Range. Assume that the server gave us
    507       // exactly what we asked for. This can happen even when the server
    508       // returns 200 -- the cache may return 206 in this case, but not modify
    509       // the headers.
    510       read_start = attr.offs;
    511     }
    512   }
    513 
    514   if (read_start < attr.offs) {
    515     // We aren't yet at the location where we want to start reading. Read into
    516     // our dummy buffer until then.
    517     int bytes_to_read = attr.offs - read_start;
    518     int bytes_read;
    519     error = ReadResponseToTemp(loader, bytes_to_read, &bytes_read);
    520     if (error)
    521       return error;
    522 
    523     // Tried to read past the end of the entity.
    524     if (bytes_read < bytes_to_read) {
    525       *out_bytes = 0;
    526       return 0;
    527     }
    528   }
    529 
    530   return ReadResponseToBuffer(loader, buf, count, out_bytes);
    531 }
    532 
    533 Error HttpFsNode::DownloadToTemp(off_t* out_bytes) {
    534   StringMap_t headers;
    535   ScopedResource loader(filesystem_->ppapi());
    536   ScopedResource request(filesystem_->ppapi());
    537   ScopedResource response(filesystem_->ppapi());
    538   int32_t statuscode;
    539   StringMap_t response_headers;
    540   Error error = OpenUrl("GET",
    541                         &headers,
    542                         &loader,
    543                         &request,
    544                         &response,
    545                         &statuscode,
    546                         &response_headers);
    547   if (error)
    548     return error;
    549 
    550   off_t content_length = 0;
    551   if (ParseContentLength(response_headers, &content_length)) {
    552     *out_bytes = content_length;
    553     return 0;
    554   }
    555 
    556   return ReadEntireResponseToTemp(loader, out_bytes);
    557 }
    558 
    559 Error HttpFsNode::ReadEntireResponseToTemp(const ScopedResource& loader,
    560                                            off_t* out_bytes) {
    561   *out_bytes = 0;
    562 
    563   const int kBytesToRead = MAX_READ_BUFFER_SIZE;
    564   buffer_ = (char*)realloc(buffer_, kBytesToRead);
    565   assert(buffer_);
    566   if (!buffer_) {
    567     buffer_len_ = 0;
    568     return ENOMEM;
    569   }
    570   buffer_len_ = kBytesToRead;
    571 
    572   while (true) {
    573     int bytes_read;
    574     Error error =
    575         ReadResponseToBuffer(loader, buffer_, kBytesToRead, &bytes_read);
    576     if (error)
    577       return error;
    578 
    579     *out_bytes += bytes_read;
    580 
    581     if (bytes_read < kBytesToRead)
    582       return 0;
    583   }
    584 }
    585 
    586 Error HttpFsNode::ReadEntireResponseToCache(const ScopedResource& loader,
    587                                             int* out_bytes) {
    588   *out_bytes = 0;
    589   const int kBytesToRead = MAX_READ_BUFFER_SIZE;
    590 
    591   while (true) {
    592     // Always recalculate the buf pointer because it may have moved when
    593     // cached_data_ was resized.
    594     cached_data_.resize(*out_bytes + kBytesToRead);
    595     void* buf = cached_data_.data() + *out_bytes;
    596 
    597     int bytes_read;
    598     Error error = ReadResponseToBuffer(loader, buf, kBytesToRead, &bytes_read);
    599     if (error)
    600       return error;
    601 
    602     *out_bytes += bytes_read;
    603 
    604     if (bytes_read < kBytesToRead) {
    605       // Shrink the cached data buffer to the correct size.
    606       cached_data_.resize(*out_bytes);
    607       return 0;
    608     }
    609   }
    610 }
    611 
    612 Error HttpFsNode::ReadResponseToTemp(const ScopedResource& loader,
    613                                      int count,
    614                                      int* out_bytes) {
    615   *out_bytes = 0;
    616 
    617   if (buffer_len_ < count) {
    618     int new_len = std::min(count, MAX_READ_BUFFER_SIZE);
    619     buffer_ = (char*)realloc(buffer_, new_len);
    620     assert(buffer_);
    621     if (!buffer_) {
    622       buffer_len_ = 0;
    623       return ENOMEM;
    624     }
    625     buffer_len_ = new_len;
    626   }
    627 
    628   int bytes_left = count;
    629   while (bytes_left > 0) {
    630     int bytes_to_read = std::min(bytes_left, buffer_len_);
    631     int bytes_read;
    632     Error error = ReadResponseToBuffer(
    633         loader, buffer_, bytes_to_read, &bytes_read);
    634     if (error)
    635       return error;
    636 
    637     if (bytes_read == 0)
    638       return 0;
    639 
    640     bytes_left -= bytes_read;
    641     *out_bytes += bytes_read;
    642   }
    643 
    644   return 0;
    645 }
    646 
    647 Error HttpFsNode::ReadResponseToBuffer(const ScopedResource& loader,
    648                                        void* buf,
    649                                        int count,
    650                                        int* out_bytes) {
    651   *out_bytes = 0;
    652 
    653   PepperInterface* ppapi = filesystem_->ppapi();
    654   URLLoaderInterface* loader_interface = ppapi->GetURLLoaderInterface();
    655 
    656   char* out_buffer = static_cast<char*>(buf);
    657   int bytes_to_read = count;
    658   while (bytes_to_read > 0) {
    659     int bytes_read =
    660         loader_interface->ReadResponseBody(loader.pp_resource(),
    661                                            out_buffer,
    662                                            bytes_to_read,
    663                                            PP_BlockUntilComplete());
    664 
    665     if (bytes_read == 0) {
    666       // This is not an error -- it may just be that we were trying to read
    667       // more data than exists.
    668       *out_bytes = count - bytes_to_read;
    669       return 0;
    670     }
    671 
    672     if (bytes_read < 0)
    673       return PPErrorToErrno(bytes_read);
    674 
    675     assert(bytes_read <= bytes_to_read);
    676     bytes_to_read -= bytes_read;
    677     out_buffer += bytes_read;
    678   }
    679 
    680   *out_bytes = count;
    681   return 0;
    682 }
    683 
    684 }  // namespace nacl_io
    685