Home | History | Annotate | Download | only in httpfs
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "nacl_io/httpfs/http_fs_node.h"
      6 
      7 #include <assert.h>
      8 #include <errno.h>
      9 #include <stdio.h>
     10 #include <string.h>
     11 
     12 #include <ppapi/c/pp_errors.h>
     13 
     14 #include "nacl_io/httpfs/http_fs.h"
     15 #include "nacl_io/kernel_handle.h"
     16 #include "nacl_io/osinttypes.h"
     17 
     18 #if defined(WIN32)
     19 #define snprintf _snprintf
     20 #endif
     21 
     22 namespace nacl_io {
     23 
     24 namespace {
     25 
     26 // If we're attempting to read a partial request, but the server returns a full
     27 // request, we need to read all of the data up to the start of our partial
     28 // request into a dummy buffer. This is the maximum size of that buffer.
     29 const int MAX_READ_BUFFER_SIZE = 64 * 1024;
     30 const int32_t STATUSCODE_OK = 200;
     31 const int32_t STATUSCODE_PARTIAL_CONTENT = 206;
     32 const int32_t STATUSCODE_FORBIDDEN = 403;
     33 const int32_t STATUSCODE_NOT_FOUND = 404;
     34 const int32_t STATUSCODE_REQUESTED_RANGE_NOT_SATISFIABLE = 416;
     35 
     36 StringMap_t ParseHeaders(const char* headers, int32_t headers_length) {
     37   enum State {
     38     FINDING_KEY,
     39     SKIPPING_WHITESPACE,
     40     FINDING_VALUE,
     41   };
     42 
     43   StringMap_t result;
     44   std::string key;
     45   std::string value;
     46 
     47   State state = FINDING_KEY;
     48   const char* start = headers;
     49   for (int i = 0; i < headers_length; ++i) {
     50     switch (state) {
     51       case FINDING_KEY:
     52         if (headers[i] == ':') {
     53           // Found key.
     54           key.assign(start, &headers[i] - start);
     55           key = NormalizeHeaderKey(key);
     56           state = SKIPPING_WHITESPACE;
     57         }
     58         break;
     59 
     60       case SKIPPING_WHITESPACE:
     61         if (headers[i] == ' ') {
     62           // Found whitespace, keep going...
     63           break;
     64         }
     65 
     66         // Found a non-whitespace, mark this as the start of the value.
     67         start = &headers[i];
     68         state = FINDING_VALUE;
     69       // Fallthrough to start processing value without incrementing i.
     70 
     71       case FINDING_VALUE:
     72         if (headers[i] == '\n') {
     73           // Found value.
     74           value.assign(start, &headers[i] - start);
     75           result[key] = value;
     76           start = &headers[i + 1];
     77           state = FINDING_KEY;
     78         }
     79         break;
     80     }
     81   }
     82 
     83   return result;
     84 }
     85 
     86 bool ParseContentLength(const StringMap_t& headers, off_t* content_length) {
     87   StringMap_t::const_iterator iter = headers.find("Content-Length");
     88   if (iter == headers.end())
     89     return false;
     90 
     91   *content_length = strtoull(iter->second.c_str(), NULL, 10);
     92   return true;
     93 }
     94 
     95 bool ParseContentRange(const StringMap_t& headers,
     96                        off_t* read_start,
     97                        off_t* read_end,
     98                        off_t* entity_length) {
     99   StringMap_t::const_iterator iter = headers.find("Content-Range");
    100   if (iter == headers.end())
    101     return false;
    102 
    103   // The key should look like "bytes ##-##/##" or "bytes ##-##/*". The last
    104   // value is the entity length, which can potentially be * (i.e. unknown).
    105   off_t read_start_int;
    106   off_t read_end_int;
    107   off_t entity_length_int;
    108   int result = sscanf(iter->second.c_str(),
    109                       "bytes %" SCNi64 "-%" SCNi64 "/%" SCNi64,
    110                       &read_start_int,
    111                       &read_end_int,
    112                       &entity_length_int);
    113 
    114   // The Content-Range header specifies an inclusive range: e.g. the first ten
    115   // bytes is "bytes 0-9/*". Convert it to a half-open range by incrementing
    116   // read_end.
    117   if (result == 2) {
    118     *read_start = read_start_int;
    119     *read_end = read_end_int + 1;
    120     *entity_length = 0;
    121     return true;
    122   } else if (result == 3) {
    123     *read_start = read_start_int;
    124     *read_end = read_end_int + 1;
    125     *entity_length = entity_length_int;
    126     return true;
    127   }
    128 
    129   return false;
    130 }
    131 
    132 // Maps an HTTP |status_code| onto the appropriate errno code.
    133 int HTTPStatusCodeToErrno(int status_code) {
    134   switch (status_code) {
    135     case STATUSCODE_OK:
    136     case STATUSCODE_PARTIAL_CONTENT:
    137       return 0;
    138     case STATUSCODE_FORBIDDEN:
    139       return EACCES;
    140     case STATUSCODE_NOT_FOUND:
    141       return ENOENT;
    142   }
    143   if (status_code >= 400 && status_code < 500)
    144     return EINVAL;
    145   return EIO;
    146 }
    147 
    148 }  // namespace
    149 
    150 void HttpFsNode::SetCachedSize(off_t size) {
    151   has_cached_size_ = true;
    152   stat_.st_size = size;
    153 }
    154 
    155 Error HttpFsNode::FSync() {
    156   return EACCES;
    157 }
    158 
    159 Error HttpFsNode::GetDents(size_t offs,
    160                            struct dirent* pdir,
    161                            size_t count,
    162                            int* out_bytes) {
    163   *out_bytes = 0;
    164   return EACCES;
    165 }
    166 
    167 Error HttpFsNode::GetStat(struct stat* stat) {
    168   AUTO_LOCK(node_lock_);
    169   return GetStat_Locked(stat);
    170 }
    171 
    172 Error HttpFsNode::Read(const HandleAttr& attr,
    173                        void* buf,
    174                        size_t count,
    175                        int* out_bytes) {
    176   *out_bytes = 0;
    177 
    178   AUTO_LOCK(node_lock_);
    179   if (cache_content_) {
    180     if (cached_data_.empty()) {
    181       Error error = DownloadToCache();
    182       if (error)
    183         return error;
    184     }
    185 
    186     return ReadPartialFromCache(attr, buf, count, out_bytes);
    187   }
    188 
    189   return DownloadPartial(attr, buf, count, out_bytes);
    190 }
    191 
    192 Error HttpFsNode::FTruncate(off_t size) {
    193   return EACCES;
    194 }
    195 
    196 Error HttpFsNode::Write(const HandleAttr& attr,
    197                         const void* buf,
    198                         size_t count,
    199                         int* out_bytes) {
    200   // TODO(binji): support POST?
    201   *out_bytes = 0;
    202   return EACCES;
    203 }
    204 
    205 Error HttpFsNode::GetSize(off_t* out_size) {
    206   *out_size = 0;
    207 
    208   // TODO(binji): This value should be cached properly; i.e. obey the caching
    209   // headers returned by the server.
    210   AUTO_LOCK(node_lock_);
    211   struct stat statbuf;
    212   Error error = GetStat_Locked(&statbuf);
    213   if (error)
    214     return error;
    215 
    216   *out_size = stat_.st_size;
    217   return 0;
    218 }
    219 
    220 HttpFsNode::HttpFsNode(Filesystem* filesystem,
    221                        const std::string& url,
    222                        bool cache_content)
    223     : Node(filesystem),
    224       url_(url),
    225       cache_content_(cache_content),
    226       has_cached_size_(false) {
    227 }
    228 
    229 void HttpFsNode::SetMode(int mode) {
    230   stat_.st_mode = mode;
    231 }
    232 
    233 Error HttpFsNode::GetStat_Locked(struct stat* stat) {
    234   // Assume we need to 'HEAD' if we do not know the size, otherwise, assume
    235   // that the information is constant.  We can add a timeout if needed.
    236   HttpFs* filesystem = static_cast<HttpFs*>(filesystem_);
    237   if (!has_cached_size_ || !filesystem->cache_stat_) {
    238     StringMap_t headers;
    239     ScopedResource loader(filesystem_->ppapi());
    240     ScopedResource request(filesystem_->ppapi());
    241     ScopedResource response(filesystem_->ppapi());
    242     int32_t statuscode;
    243     StringMap_t response_headers;
    244     Error error = OpenUrl("HEAD",
    245                           &headers,
    246                           &loader,
    247                           &request,
    248                           &response,
    249                           &statuscode,
    250                           &response_headers);
    251     if (error)
    252       return error;
    253 
    254     off_t entity_length;
    255     if (ParseContentLength(response_headers, &entity_length)) {
    256       SetCachedSize(static_cast<off_t>(entity_length));
    257     } else if (cache_content_) {
    258       // The server didn't give a content length; download the data to memory
    259       // via DownloadToCache, which will also set stat_.st_size;
    260       error = DownloadToCache();
    261       if (error)
    262         return error;
    263     } else {
    264       // The user doesn't want to cache content, but we didn't get a
    265       // "Content-Length" header. Read the entire entity, and throw it away.
    266       // Don't use DownloadToCache, as that will still allocate enough memory
    267       // for the entire entity.
    268       off_t bytes_read;
    269       error = DownloadToTemp(&bytes_read);
    270       if (error)
    271         return error;
    272 
    273       SetCachedSize(bytes_read);
    274     }
    275 
    276     stat_.st_atime = 0;  // TODO(binji): Use "Last-Modified".
    277     stat_.st_mtime = 0;
    278     stat_.st_ctime = 0;
    279 
    280     stat_.st_mode |= S_IFREG;
    281   }
    282 
    283   // Fill the stat structure if provided
    284   if (stat)
    285     *stat = stat_;
    286 
    287   return 0;
    288 }
    289 
    290 Error HttpFsNode::OpenUrl(const char* method,
    291                           StringMap_t* request_headers,
    292                           ScopedResource* out_loader,
    293                           ScopedResource* out_request,
    294                           ScopedResource* out_response,
    295                           int32_t* out_statuscode,
    296                           StringMap_t* out_response_headers) {
    297   // Clear all out parameters.
    298   *out_statuscode = 0;
    299   out_response_headers->clear();
    300 
    301   // Assume lock_ is already held.
    302   PepperInterface* ppapi = filesystem_->ppapi();
    303 
    304   HttpFs* mount_http = static_cast<HttpFs*>(filesystem_);
    305   out_request->Reset(
    306       mount_http->MakeUrlRequestInfo(url_, method, request_headers));
    307   if (!out_request->pp_resource())
    308     return EINVAL;
    309 
    310   URLLoaderInterface* loader_interface = ppapi->GetURLLoaderInterface();
    311   URLResponseInfoInterface* response_interface =
    312       ppapi->GetURLResponseInfoInterface();
    313   VarInterface* var_interface = ppapi->GetVarInterface();
    314 
    315   out_loader->Reset(loader_interface->Create(ppapi->GetInstance()));
    316   if (!out_loader->pp_resource())
    317     return EINVAL;
    318 
    319   int32_t result = loader_interface->Open(out_loader->pp_resource(),
    320                                           out_request->pp_resource(),
    321                                           PP_BlockUntilComplete());
    322   if (result != PP_OK)
    323     return PPErrorToErrno(result);
    324 
    325   out_response->Reset(
    326       loader_interface->GetResponseInfo(out_loader->pp_resource()));
    327   if (!out_response->pp_resource())
    328     return EINVAL;
    329 
    330   // Get response statuscode.
    331   PP_Var statuscode = response_interface->GetProperty(
    332       out_response->pp_resource(), PP_URLRESPONSEPROPERTY_STATUSCODE);
    333 
    334   if (statuscode.type != PP_VARTYPE_INT32)
    335     return EINVAL;
    336 
    337   *out_statuscode = statuscode.value.as_int;
    338 
    339   // Only accept OK or Partial Content.
    340   Error error = HTTPStatusCodeToErrno(*out_statuscode);
    341   if (error)
    342     return error;
    343 
    344   // Get response headers.
    345   PP_Var response_headers_var = response_interface->GetProperty(
    346       out_response->pp_resource(), PP_URLRESPONSEPROPERTY_HEADERS);
    347 
    348   uint32_t response_headers_length;
    349   const char* response_headers_str =
    350       var_interface->VarToUtf8(response_headers_var, &response_headers_length);
    351 
    352   *out_response_headers =
    353       ParseHeaders(response_headers_str, response_headers_length);
    354 
    355   var_interface->Release(response_headers_var);
    356 
    357   return 0;
    358 }
    359 
    360 Error HttpFsNode::DownloadToCache() {
    361   StringMap_t headers;
    362   ScopedResource loader(filesystem_->ppapi());
    363   ScopedResource request(filesystem_->ppapi());
    364   ScopedResource response(filesystem_->ppapi());
    365   int32_t statuscode;
    366   StringMap_t response_headers;
    367   Error error = OpenUrl("GET",
    368                         &headers,
    369                         &loader,
    370                         &request,
    371                         &response,
    372                         &statuscode,
    373                         &response_headers);
    374   if (error)
    375     return error;
    376 
    377   off_t content_length = 0;
    378   if (ParseContentLength(response_headers, &content_length)) {
    379     cached_data_.resize(content_length);
    380     int real_size;
    381     error = ReadResponseToBuffer(
    382         loader, cached_data_.data(), content_length, &real_size);
    383     if (error)
    384       return error;
    385 
    386     SetCachedSize(real_size);
    387     cached_data_.resize(real_size);
    388     return 0;
    389   }
    390 
    391   int bytes_read;
    392   error = ReadEntireResponseToCache(loader, &bytes_read);
    393   if (error)
    394     return error;
    395 
    396   SetCachedSize(bytes_read);
    397   return 0;
    398 }
    399 
    400 Error HttpFsNode::ReadPartialFromCache(const HandleAttr& attr,
    401                                        void* buf,
    402                                        int count,
    403                                        int* out_bytes) {
    404   *out_bytes = 0;
    405   off_t size = cached_data_.size();
    406 
    407   if (attr.offs + count > size)
    408     count = size - attr.offs;
    409 
    410   if (count <= 0)
    411     return 0;
    412 
    413   memcpy(buf, &cached_data_.data()[attr.offs], count);
    414   *out_bytes = count;
    415   return 0;
    416 }
    417 
    418 Error HttpFsNode::DownloadPartial(const HandleAttr& attr,
    419                                   void* buf,
    420                                   off_t count,
    421                                   int* out_bytes) {
    422   *out_bytes = 0;
    423 
    424   StringMap_t headers;
    425 
    426   char buffer[100];
    427   // Range request is inclusive: 0-99 returns 100 bytes.
    428   snprintf(&buffer[0],
    429            sizeof(buffer),
    430            "bytes=%" PRIi64 "-%" PRIi64,
    431            attr.offs,
    432            attr.offs + count - 1);
    433   headers["Range"] = buffer;
    434 
    435   ScopedResource loader(filesystem_->ppapi());
    436   ScopedResource request(filesystem_->ppapi());
    437   ScopedResource response(filesystem_->ppapi());
    438   int32_t statuscode;
    439   StringMap_t response_headers;
    440   Error error = OpenUrl("GET",
    441                         &headers,
    442                         &loader,
    443                         &request,
    444                         &response,
    445                         &statuscode,
    446                         &response_headers);
    447   if (error) {
    448     if (statuscode == STATUSCODE_REQUESTED_RANGE_NOT_SATISFIABLE) {
    449       // We're likely trying to read past the end. Return 0 bytes.
    450       *out_bytes = 0;
    451       return 0;
    452     }
    453 
    454     return error;
    455   }
    456 
    457   off_t read_start = 0;
    458   if (statuscode == STATUSCODE_OK) {
    459     // No partial result, read everything starting from the part we care about.
    460     off_t content_length;
    461     if (ParseContentLength(response_headers, &content_length)) {
    462       if (attr.offs >= content_length)
    463         return EINVAL;
    464 
    465       // Clamp count, if trying to read past the end of the file.
    466       if (attr.offs + count > content_length) {
    467         count = content_length - attr.offs;
    468       }
    469     }
    470   } else if (statuscode == STATUSCODE_PARTIAL_CONTENT) {
    471     // Determine from the headers where we are reading.
    472     off_t read_end;
    473     off_t entity_length;
    474     if (ParseContentRange(
    475             response_headers, &read_start, &read_end, &entity_length)) {
    476       if (read_start > attr.offs || read_start > read_end) {
    477         // If this error occurs, the server is returning bogus values.
    478         return EINVAL;
    479       }
    480 
    481       // Clamp count, if trying to read past the end of the file.
    482       count = std::min(read_end - read_start, count);
    483     } else {
    484       // Partial Content without Content-Range. Assume that the server gave us
    485       // exactly what we asked for. This can happen even when the server
    486       // returns 200 -- the cache may return 206 in this case, but not modify
    487       // the headers.
    488       read_start = attr.offs;
    489     }
    490   }
    491 
    492   if (read_start < attr.offs) {
    493     // We aren't yet at the location where we want to start reading. Read into
    494     // our dummy buffer until then.
    495     int bytes_to_read = attr.offs - read_start;
    496     int bytes_read;
    497     error = ReadResponseToTemp(loader, bytes_to_read, &bytes_read);
    498     if (error)
    499       return error;
    500 
    501     // Tried to read past the end of the entity.
    502     if (bytes_read < bytes_to_read) {
    503       *out_bytes = 0;
    504       return 0;
    505     }
    506   }
    507 
    508   return ReadResponseToBuffer(loader, buf, count, out_bytes);
    509 }
    510 
    511 Error HttpFsNode::DownloadToTemp(off_t* out_bytes) {
    512   StringMap_t headers;
    513   ScopedResource loader(filesystem_->ppapi());
    514   ScopedResource request(filesystem_->ppapi());
    515   ScopedResource response(filesystem_->ppapi());
    516   int32_t statuscode;
    517   StringMap_t response_headers;
    518   Error error = OpenUrl("GET",
    519                         &headers,
    520                         &loader,
    521                         &request,
    522                         &response,
    523                         &statuscode,
    524                         &response_headers);
    525   if (error)
    526     return error;
    527 
    528   off_t content_length = 0;
    529   if (ParseContentLength(response_headers, &content_length)) {
    530     *out_bytes = content_length;
    531     return 0;
    532   }
    533 
    534   return ReadEntireResponseToTemp(loader, out_bytes);
    535 }
    536 
    537 Error HttpFsNode::ReadEntireResponseToTemp(const ScopedResource& loader,
    538                                            off_t* out_bytes) {
    539   *out_bytes = 0;
    540 
    541   const int kBytesToRead = MAX_READ_BUFFER_SIZE;
    542   buffer_.resize(kBytesToRead);
    543 
    544   while (true) {
    545     int bytes_read;
    546     Error error =
    547         ReadResponseToBuffer(loader, buffer_.data(), kBytesToRead, &bytes_read);
    548     if (error)
    549       return error;
    550 
    551     *out_bytes += bytes_read;
    552 
    553     if (bytes_read < kBytesToRead)
    554       return 0;
    555   }
    556 }
    557 
    558 Error HttpFsNode::ReadEntireResponseToCache(const ScopedResource& loader,
    559                                             int* out_bytes) {
    560   *out_bytes = 0;
    561   const int kBytesToRead = MAX_READ_BUFFER_SIZE;
    562 
    563   while (true) {
    564     // Always recalculate the buf pointer because it may have moved when
    565     // cached_data_ was resized.
    566     cached_data_.resize(*out_bytes + kBytesToRead);
    567     void* buf = cached_data_.data() + *out_bytes;
    568 
    569     int bytes_read;
    570     Error error = ReadResponseToBuffer(loader, buf, kBytesToRead, &bytes_read);
    571     if (error)
    572       return error;
    573 
    574     *out_bytes += bytes_read;
    575 
    576     if (bytes_read < kBytesToRead) {
    577       // Shrink the cached data buffer to the correct size.
    578       cached_data_.resize(*out_bytes);
    579       return 0;
    580     }
    581   }
    582 }
    583 
    584 Error HttpFsNode::ReadResponseToTemp(const ScopedResource& loader,
    585                                      int count,
    586                                      int* out_bytes) {
    587   *out_bytes = 0;
    588 
    589   if (buffer_.size() < static_cast<size_t>(count))
    590     buffer_.resize(std::min(count, MAX_READ_BUFFER_SIZE));
    591 
    592   int bytes_left = count;
    593   while (bytes_left > 0) {
    594     int bytes_to_read =
    595         std::min(static_cast<size_t>(bytes_left), buffer_.size());
    596     int bytes_read;
    597     Error error = ReadResponseToBuffer(
    598         loader, buffer_.data(), bytes_to_read, &bytes_read);
    599     if (error)
    600       return error;
    601 
    602     if (bytes_read == 0)
    603       return 0;
    604 
    605     bytes_left -= bytes_read;
    606     *out_bytes += bytes_read;
    607   }
    608 
    609   return 0;
    610 }
    611 
    612 Error HttpFsNode::ReadResponseToBuffer(const ScopedResource& loader,
    613                                        void* buf,
    614                                        int count,
    615                                        int* out_bytes) {
    616   *out_bytes = 0;
    617 
    618   PepperInterface* ppapi = filesystem_->ppapi();
    619   URLLoaderInterface* loader_interface = ppapi->GetURLLoaderInterface();
    620 
    621   char* out_buffer = static_cast<char*>(buf);
    622   int bytes_to_read = count;
    623   while (bytes_to_read > 0) {
    624     int bytes_read =
    625         loader_interface->ReadResponseBody(loader.pp_resource(),
    626                                            out_buffer,
    627                                            bytes_to_read,
    628                                            PP_BlockUntilComplete());
    629 
    630     if (bytes_read == 0) {
    631       // This is not an error -- it may just be that we were trying to read
    632       // more data than exists.
    633       *out_bytes = count - bytes_to_read;
    634       return 0;
    635     }
    636 
    637     if (bytes_read < 0)
    638       return PPErrorToErrno(bytes_read);
    639 
    640     assert(bytes_read <= bytes_to_read);
    641     bytes_to_read -= bytes_read;
    642     out_buffer += bytes_read;
    643   }
    644 
    645   *out_bytes = count;
    646   return 0;
    647 }
    648 
    649 }  // namespace nacl_io
    650