Home | History | Annotate | Download | only in web_request
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/browser/extensions/api/web_request/form_data_parser.h"
      6 
      7 #include <vector>
      8 
      9 #include "base/lazy_instance.h"
     10 #include "base/logging.h"
     11 #include "base/macros.h"
     12 #include "base/strings/string_util.h"
     13 #include "base/values.h"
     14 #include "net/base/escape.h"
     15 #include "net/url_request/url_request.h"
     16 #include "third_party/re2/re2/re2.h"
     17 
     18 using base::DictionaryValue;
     19 using base::ListValue;
     20 using base::StringPiece;
     21 using re2::RE2;
     22 
     23 namespace extensions {
     24 
     25 namespace {
     26 
     27 const char kContentDisposition[] = "content-disposition:";
     28 const size_t kContentDispositionLength = arraysize(kContentDisposition) - 1;
     29 // kCharacterPattern is an allowed character in a URL encoding. Definition is
     30 // from RFC 1738, end of section 2.2.
     31 const char kCharacterPattern[] =
     32     "(?:[a-zA-Z0-9$_.+!*'(),]|-|(?:%[a-fA-F0-9]{2}))";
     33 const char kEscapeClosingQuote[] = "\\\\E";
     34 
     35 // A wrapper struct for static RE2 objects to be held as LazyInstance.
     36 struct Patterns {
     37   Patterns();
     38   ~Patterns();
     39   const RE2 transfer_padding_pattern;
     40   const RE2 crlf_pattern;
     41   const RE2 closing_pattern;
     42   const RE2 epilogue_pattern;
     43   const RE2 crlf_free_pattern;
     44   const RE2 preamble_pattern;
     45   const RE2 header_pattern;
     46   const RE2 content_disposition_pattern;
     47   const RE2 name_pattern;
     48   const RE2 value_pattern;
     49   const RE2 unquote_pattern;
     50   const RE2 url_encoded_pattern;
     51 };
     52 
     53 Patterns::Patterns()
     54     : transfer_padding_pattern("[ \\t]*\\r\\n"),
     55       crlf_pattern("\\r\\n"),
     56       closing_pattern("--[ \\t]*"),
     57       epilogue_pattern("|\\r\\n(?s:.)*"),
     58       crlf_free_pattern("(?:[^\\r]|\\r+[^\\r\\n])*"),
     59       preamble_pattern(".+?"),
     60       header_pattern("[!-9;-~]+:(.|\\r\\n[\\t ])*\\r\\n"),
     61       content_disposition_pattern(std::string("(?i:") + kContentDisposition +
     62                                   ")"),
     63       name_pattern("\\bname=\"([^\"]*)\""),
     64       value_pattern("\\bfilename=\"([^\"]*)\""),
     65       unquote_pattern(kEscapeClosingQuote),
     66       url_encoded_pattern(std::string("(") + kCharacterPattern + "*)=(" +
     67                           kCharacterPattern +
     68                           "*)") {
     69 }
     70 
     71 Patterns::~Patterns() {}
     72 
     73 base::LazyInstance<Patterns>::Leaky g_patterns = LAZY_INSTANCE_INITIALIZER;
     74 
     75 }  // namespace
     76 
     77 // Parses URLencoded forms, see
     78 // http://www.w3.org/TR/REC-html40-971218/interact/forms.html#h-17.13.4.1 .
     79 class FormDataParserUrlEncoded : public FormDataParser {
     80  public:
     81   FormDataParserUrlEncoded();
     82   virtual ~FormDataParserUrlEncoded();
     83 
     84   // Implementation of FormDataParser.
     85   virtual bool AllDataReadOK() OVERRIDE;
     86   virtual bool GetNextNameValue(Result* result) OVERRIDE;
     87   virtual bool SetSource(base::StringPiece source) OVERRIDE;
     88 
     89  private:
     90   // Returns the pattern to match a single name-value pair. This could be even
     91   // static, but then we would have to spend more code on initializing the
     92   // cached pointer to g_patterns.Get().
     93   const RE2& pattern() const {
     94     return patterns_->url_encoded_pattern;
     95   }
     96 
     97   // Auxiliary constant for using RE2. Number of arguments for parsing
     98   // name-value pairs (one for name, one for value).
     99   static const size_t args_size_ = 2u;
    100   static const net::UnescapeRule::Type unescape_rules_;
    101 
    102   re2::StringPiece source_;
    103   bool source_set_;
    104   bool source_malformed_;
    105 
    106   // Auxiliary store for using RE2.
    107   std::string name_;
    108   std::string value_;
    109   const RE2::Arg arg_name_;
    110   const RE2::Arg arg_value_;
    111   const RE2::Arg* args_[args_size_];
    112 
    113   // Caching the pointer to g_patterns.Get().
    114   const Patterns* patterns_;
    115 
    116   DISALLOW_COPY_AND_ASSIGN(FormDataParserUrlEncoded);
    117 };
    118 
    119 // The following class, FormDataParserMultipart, parses forms encoded as
    120 // multipart, defined in RFCs 2388 (specific to forms), 2046 (multipart
    121 // encoding) and 5322 (MIME-headers).
    122 //
    123 // Implementation details
    124 //
    125 // The original grammar from RFC 2046 is this, "multipart-body" being the root
    126 // non-terminal:
    127 //
    128 // boundary := 0*69<bchars> bcharsnospace
    129 // bchars := bcharsnospace / " "
    130 // bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" / "_" / ","
    131 //                  / "-" / "." / "/" / ":" / "=" / "?"
    132 // dash-boundary := "--" boundary
    133 // multipart-body := [preamble CRLF]
    134 //                        dash-boundary transport-padding CRLF
    135 //                        body-part *encapsulation
    136 //                        close-delimiter transport-padding
    137 //                        [CRLF epilogue]
    138 // transport-padding := *LWSP-char
    139 // encapsulation := delimiter transport-padding CRLF body-part
    140 // delimiter := CRLF dash-boundary
    141 // close-delimiter := delimiter "--"
    142 // preamble := discard-text
    143 // epilogue := discard-text
    144 // discard-text := *(*text CRLF) *text
    145 // body-part := MIME-part-headers [CRLF *OCTET]
    146 // OCTET := <any 0-255 octet value>
    147 //
    148 // Uppercase non-terminals are defined in RFC 5234, Appendix B.1; i.e. CRLF,
    149 // DIGIT, and ALPHA stand for "\r\n", '0'-'9' and the set of letters of the
    150 // English alphabet, respectively.
    151 // The non-terminal "text" is presumably just any text, excluding line breaks.
    152 // The non-terminal "LWSP-char" is not directly defined in the original grammar
    153 // but it means "linear whitespace", which is a space or a horizontal tab.
    154 // The non-terminal "MIME-part-headers" is not discussed in RFC 2046, so we use
    155 // the syntax for "optional fields" from Section 3.6.8 of RFC 5322:
    156 //
    157 // MIME-part-headers := field-name ":" unstructured CRLF
    158 // field-name := 1*ftext
    159 // ftext := %d33-57 /          ; Printable US-ASCII
    160 //          %d59-126           ;  characters not including ":".
    161 // Based on Section 2.2.1 of RFC 5322, "unstructured" matches any string which
    162 // does not contain a CRLF sub-string, except for substrings "CRLF<space>" and
    163 // "CRLF<horizontal tab>", which serve for "folding".
    164 //
    165 // The FormDataParseMultipart class reads the input source and tries to parse it
    166 // according to the grammar above, rooted at the "multipart-body" non-terminal.
    167 // This happens in stages:
    168 //
    169 // 1. The optional preamble and the initial dash-boundary with transport padding
    170 // and a CRLF are read and ignored.
    171 //
    172 // 2. Repeatedly each body part is read. The body parts can either serve to
    173 //    upload a file, or just a string of bytes.
    174 // 2.a. The headers of that part are searched for the "content-disposition"
    175 //      header, which contains the name of the value represented by that body
    176 //      part. If the body-part is for file upload, that header also contains a
    177 //      filename.
    178 // 2.b. The "*OCTET" part of the body part is then read and passed as the value
    179 //      of the name-value pair for body parts representing a string of bytes.
    180 //      For body parts for uploading a file the "*OCTET" part is just ignored
    181 //      and the filename is used for value instead.
    182 //
    183 // 3. The final close-delimiter and epilogue are read and ignored.
    184 //
    185 // IMPORTANT NOTE
    186 // This parser supports sources split into multiple chunks. Therefore SetSource
    187 // can be called multiple times if the source is spread over several chunks.
    188 // However, the split may only occur inside a body part, right after the
    189 // trailing CRLF of headers.
    190 class FormDataParserMultipart : public FormDataParser {
    191  public:
    192   explicit FormDataParserMultipart(const std::string& boundary_separator);
    193   virtual ~FormDataParserMultipart();
    194 
    195   // Implementation of FormDataParser.
    196   virtual bool AllDataReadOK() OVERRIDE;
    197   virtual bool GetNextNameValue(Result* result) OVERRIDE;
    198   virtual bool SetSource(base::StringPiece source) OVERRIDE;
    199 
    200  private:
    201   enum State {
    202     STATE_INIT,      // No input read yet.
    203     STATE_READY,     // Ready to call GetNextNameValue.
    204     STATE_FINISHED,  // Read the input until the end.
    205     STATE_SUSPEND,   // Waiting until a new |source_| is set.
    206     STATE_ERROR
    207   };
    208 
    209   // Produces a regexp to match the string "--" + |literal|. The idea is to
    210   // represent "--" + |literal| as a "quoted pattern", a verbatim copy enclosed
    211   // in "\\Q" and "\\E". The only catch is to watch out for occurences of "\\E"
    212   // inside |literal|. Those must be excluded from the quote and the backslash
    213   // doubly escaped. For example, for literal == "abc\\Edef" the result is
    214   // "\\Q--abc\\E\\\\E\\Qdef\\E".
    215   static std::string CreateBoundaryPatternFromLiteral(
    216       const std::string& literal);
    217 
    218   // Tests whether |input| has a prefix matching |pattern|.
    219   static bool StartsWithPattern(const re2::StringPiece& input,
    220                                 const RE2& pattern);
    221 
    222   // If |source_| starts with a header, seeks |source_| beyond the header. If
    223   // the header is Content-Disposition, extracts |name| from "name=" and
    224   // possibly |value| from "filename=" fields of that header. Only if the
    225   // "name" or "filename" fields are found, then |name| or |value| are touched.
    226   // Returns true iff |source_| is seeked forward. Sets |value_assigned|
    227   // to true iff |value| has been assigned to.
    228   bool TryReadHeader(base::StringPiece* name,
    229                      base::StringPiece* value,
    230                      bool* value_assigned);
    231 
    232   // Helper to GetNextNameValue. Expects that the input starts with a data
    233   // portion of a body part. An attempt is made to read the input until the end
    234   // of that body part. If |data| is not NULL, it is set to contain the data
    235   // portion. Returns true iff the reading was successful.
    236   bool FinishReadingPart(base::StringPiece* data);
    237 
    238   // These methods could be even static, but then we would have to spend more
    239   // code on initializing the cached pointer to g_patterns.Get().
    240   const RE2& transfer_padding_pattern() const {
    241     return patterns_->transfer_padding_pattern;
    242   }
    243   const RE2& crlf_pattern() const {
    244     return patterns_->crlf_pattern;
    245   }
    246   const RE2& closing_pattern() const {
    247     return patterns_->closing_pattern;
    248   }
    249   const RE2& epilogue_pattern() const {
    250     return patterns_->epilogue_pattern;
    251   }
    252   const RE2& crlf_free_pattern() const {
    253     return patterns_->crlf_free_pattern;
    254   }
    255   const RE2& preamble_pattern() const {
    256     return patterns_->preamble_pattern;
    257   }
    258   const RE2& header_pattern() const {
    259     return patterns_->header_pattern;
    260   }
    261   const RE2& content_disposition_pattern() const {
    262     return patterns_->content_disposition_pattern;
    263   }
    264   const RE2& name_pattern() const {
    265     return patterns_->name_pattern;
    266   }
    267   const RE2& value_pattern() const {
    268     return patterns_->value_pattern;
    269   }
    270   // However, this is used in a static method so it needs to be static.
    271   static const RE2& unquote_pattern() {
    272     return g_patterns.Get().unquote_pattern;  // No caching g_patterns here.
    273   }
    274 
    275   const RE2 dash_boundary_pattern_;
    276 
    277   // Because of initialisation dependency, |state_| needs to be declared after
    278   // |dash_boundary_pattern_|.
    279   State state_;
    280 
    281   // The parsed message can be split into multiple sources which we read
    282   // sequentially.
    283   re2::StringPiece source_;
    284 
    285   // Caching the pointer to g_patterns.Get().
    286   const Patterns* patterns_;
    287 
    288   DISALLOW_COPY_AND_ASSIGN(FormDataParserMultipart);
    289 };
    290 
    291 FormDataParser::Result::Result() {}
    292 FormDataParser::Result::~Result() {}
    293 
    294 FormDataParser::~FormDataParser() {}
    295 
    296 // static
    297 scoped_ptr<FormDataParser> FormDataParser::Create(
    298     const net::URLRequest& request) {
    299   std::string value;
    300   const bool found = request.extra_request_headers().GetHeader(
    301       net::HttpRequestHeaders::kContentType, &value);
    302   return CreateFromContentTypeHeader(found ? &value : NULL);
    303 }
    304 
    305 // static
    306 scoped_ptr<FormDataParser> FormDataParser::CreateFromContentTypeHeader(
    307     const std::string* content_type_header) {
    308   enum ParserChoice {URL_ENCODED, MULTIPART, ERROR_CHOICE};
    309   ParserChoice choice = ERROR_CHOICE;
    310   std::string boundary;
    311 
    312   if (content_type_header == NULL) {
    313     choice = URL_ENCODED;
    314   } else {
    315     const std::string content_type(
    316         content_type_header->substr(0, content_type_header->find(';')));
    317 
    318     if (base::strcasecmp(
    319         content_type.c_str(), "application/x-www-form-urlencoded") == 0) {
    320       choice = URL_ENCODED;
    321     } else if (base::strcasecmp(
    322         content_type.c_str(), "multipart/form-data") == 0) {
    323       static const char kBoundaryString[] = "boundary=";
    324       size_t offset = content_type_header->find(kBoundaryString);
    325       if (offset == std::string::npos) {
    326         // Malformed header.
    327         return scoped_ptr<FormDataParser>();
    328       }
    329       offset += sizeof(kBoundaryString) - 1;
    330       boundary = content_type_header->substr(
    331           offset, content_type_header->find(';', offset));
    332       if (!boundary.empty())
    333         choice = MULTIPART;
    334     }
    335   }
    336   // Other cases are unparseable, including when |content_type| is "text/plain".
    337 
    338   switch (choice) {
    339     case URL_ENCODED:
    340       return scoped_ptr<FormDataParser>(new FormDataParserUrlEncoded());
    341     case MULTIPART:
    342       return scoped_ptr<FormDataParser>(new FormDataParserMultipart(boundary));
    343     case ERROR_CHOICE:
    344       return scoped_ptr<FormDataParser>();
    345   }
    346   NOTREACHED();  // Some compilers do not believe this is unreachable.
    347   return scoped_ptr<FormDataParser>();
    348 }
    349 
    350 FormDataParser::FormDataParser() {}
    351 
    352 const net::UnescapeRule::Type FormDataParserUrlEncoded::unescape_rules_ =
    353     net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS |
    354     net::UnescapeRule::SPACES | net::UnescapeRule::REPLACE_PLUS_WITH_SPACE;
    355 
    356 FormDataParserUrlEncoded::FormDataParserUrlEncoded()
    357     : source_(NULL),
    358       source_set_(false),
    359       source_malformed_(false),
    360       arg_name_(&name_),
    361       arg_value_(&value_),
    362       patterns_(g_patterns.Pointer()) {
    363   args_[0] = &arg_name_;
    364   args_[1] = &arg_value_;
    365 }
    366 
    367 FormDataParserUrlEncoded::~FormDataParserUrlEncoded() {}
    368 
    369 bool FormDataParserUrlEncoded::AllDataReadOK() {
    370   // All OK means we read the whole source.
    371   return source_set_ && source_.empty() && !source_malformed_;
    372 }
    373 
    374 bool FormDataParserUrlEncoded::GetNextNameValue(Result* result) {
    375   if (!source_set_ || source_malformed_)
    376     return false;
    377 
    378   bool success = RE2::ConsumeN(&source_, pattern(), args_, args_size_);
    379   if (success) {
    380     result->set_name(net::UnescapeURLComponent(name_, unescape_rules_));
    381     result->set_value(net::UnescapeURLComponent(value_, unescape_rules_));
    382   }
    383   if (source_.length() > 0) {
    384     if (source_[0] == '&')
    385       source_.remove_prefix(1);  // Remove the leading '&'.
    386     else
    387       source_malformed_ = true;  // '&' missing between two name-value pairs.
    388   }
    389   return success && !source_malformed_;
    390 }
    391 
    392 bool FormDataParserUrlEncoded::SetSource(base::StringPiece source) {
    393   if (source_set_)
    394     return false;  // We do not allow multiple sources for this parser.
    395   source_.set(source.data(), source.size());
    396   source_set_ = true;
    397   source_malformed_ = false;
    398   return true;
    399 }
    400 
    401 // static
    402 std::string FormDataParserMultipart::CreateBoundaryPatternFromLiteral(
    403     const std::string& literal) {
    404   static const char quote[] = "\\Q";
    405   static const char unquote[] = "\\E";
    406 
    407   // The result always starts with opening the qoute and then "--".
    408   std::string result("\\Q--");
    409 
    410   // This StringPiece is used below to record the next occurrence of "\\E" in
    411   // |literal|.
    412   re2::StringPiece seek_unquote(literal);
    413   const char* copy_start = literal.data();
    414   size_t copy_length = literal.size();
    415 
    416   // Find all "\\E" in |literal| and exclude them from the \Q...\E quote.
    417   while (RE2::FindAndConsume(&seek_unquote, unquote_pattern())) {
    418     copy_length = seek_unquote.data() - copy_start;
    419     result.append(copy_start, copy_length);
    420     result.append(kEscapeClosingQuote);
    421     result.append(quote);
    422     copy_start = seek_unquote.data();
    423   }
    424 
    425   // Finish the last \Q...\E quote.
    426   copy_length = (literal.data() + literal.size()) - copy_start;
    427   result.append(copy_start, copy_length);
    428   result.append(unquote);
    429   return result;
    430 }
    431 
    432 // static
    433 bool FormDataParserMultipart::StartsWithPattern(const re2::StringPiece& input,
    434                                                 const RE2& pattern) {
    435   return pattern.Match(input, 0, input.size(), RE2::ANCHOR_START, NULL, 0);
    436 }
    437 
    438 FormDataParserMultipart::FormDataParserMultipart(
    439     const std::string& boundary_separator)
    440     : dash_boundary_pattern_(
    441           CreateBoundaryPatternFromLiteral(boundary_separator)),
    442       state_(dash_boundary_pattern_.ok() ? STATE_INIT : STATE_ERROR),
    443       patterns_(g_patterns.Pointer()) {}
    444 
    445 FormDataParserMultipart::~FormDataParserMultipart() {}
    446 
    447 bool FormDataParserMultipart::AllDataReadOK() {
    448   return state_ == STATE_FINISHED;
    449 }
    450 
    451 bool FormDataParserMultipart::FinishReadingPart(base::StringPiece* data) {
    452   const char* data_start = source_.data();
    453   while (!StartsWithPattern(source_, dash_boundary_pattern_)) {
    454     if (!RE2::Consume(&source_, crlf_free_pattern()) ||
    455         !RE2::Consume(&source_, crlf_pattern())) {
    456       state_ = STATE_ERROR;
    457       return false;
    458     }
    459   }
    460   if (data != NULL) {
    461     if (source_.data() == data_start) {
    462       // No data in this body part.
    463       state_ = STATE_ERROR;
    464       return false;
    465     }
    466     // Subtract 2 for the trailing "\r\n".
    467     data->set(data_start, source_.data() - data_start - 2);
    468   }
    469 
    470   // Finally, read the dash-boundary and either skip to the next body part, or
    471   // finish reading the source.
    472   CHECK(RE2::Consume(&source_, dash_boundary_pattern_));
    473   if (StartsWithPattern(source_, closing_pattern())) {
    474     CHECK(RE2::Consume(&source_, closing_pattern()));
    475     if (RE2::Consume(&source_, epilogue_pattern()))
    476       state_ = STATE_FINISHED;
    477     else
    478       state_ = STATE_ERROR;
    479   } else {  // Next body part ahead.
    480     if (!RE2::Consume(&source_, transfer_padding_pattern()))
    481       state_ = STATE_ERROR;
    482   }
    483   return state_ != STATE_ERROR;
    484 }
    485 
    486 bool FormDataParserMultipart::GetNextNameValue(Result* result) {
    487   if (source_.empty() || state_ != STATE_READY)
    488     return false;
    489 
    490   // 1. Read body-part headers.
    491   base::StringPiece name;
    492   base::StringPiece value;
    493   bool value_assigned = false;
    494   bool value_assigned_temp;
    495   while (TryReadHeader(&name, &value, &value_assigned_temp))
    496     value_assigned |= value_assigned_temp;
    497   if (name.empty() || state_ == STATE_ERROR) {
    498     state_ = STATE_ERROR;
    499     return false;
    500   }
    501 
    502   // 2. Read the trailing CRLF after headers.
    503   if (!RE2::Consume(&source_, crlf_pattern())) {
    504     state_ = STATE_ERROR;
    505     return false;
    506   }
    507 
    508   // 3. Read the data of this body part, i.e., everything until the first
    509   // dash-boundary.
    510   bool return_value;
    511   if (value_assigned && source_.empty()) {  // Wait for a new source?
    512     return_value = true;
    513     state_ = STATE_SUSPEND;
    514   } else {
    515     return_value = FinishReadingPart(value_assigned ? NULL : &value);
    516   }
    517 
    518   std::string unescaped_name = net::UnescapeURLComponent(
    519       name.as_string(),
    520       net::UnescapeRule::URL_SPECIAL_CHARS | net::UnescapeRule::CONTROL_CHARS);
    521   result->set_name(unescaped_name);
    522   result->set_value(value);
    523 
    524   return return_value;
    525 }
    526 
    527 bool FormDataParserMultipart::SetSource(base::StringPiece source) {
    528   if (source.data() == NULL || !source_.empty())
    529     return false;
    530   source_.set(source.data(), source.size());
    531 
    532   switch (state_) {
    533     case STATE_INIT:
    534       // Seek behind the preamble.
    535       while (!StartsWithPattern(source_, dash_boundary_pattern_)) {
    536         if (!RE2::Consume(&source_, preamble_pattern())) {
    537           state_ = STATE_ERROR;
    538           break;
    539         }
    540       }
    541       // Read dash-boundary, transfer padding, and CRLF.
    542       if (state_ != STATE_ERROR) {
    543         if (!RE2::Consume(&source_, dash_boundary_pattern_) ||
    544             !RE2::Consume(&source_, transfer_padding_pattern()))
    545           state_ = STATE_ERROR;
    546         else
    547           state_ = STATE_READY;
    548       }
    549       break;
    550     case STATE_READY:  // Nothing to do.
    551       break;
    552     case STATE_SUSPEND:
    553       state_ = FinishReadingPart(NULL) ? STATE_READY : STATE_ERROR;
    554       break;
    555     default:
    556       state_ = STATE_ERROR;
    557   }
    558   return state_ != STATE_ERROR;
    559 }
    560 
    561 bool FormDataParserMultipart::TryReadHeader(base::StringPiece* name,
    562                                             base::StringPiece* value,
    563                                             bool* value_assigned) {
    564   *value_assigned = false;
    565   const char* header_start = source_.data();
    566   if (!RE2::Consume(&source_, header_pattern()))
    567     return false;
    568   // (*) After this point we must return true, because we consumed one header.
    569 
    570   // Subtract 2 for the trailing "\r\n".
    571   re2::StringPiece header(header_start, source_.data() - header_start - 2);
    572 
    573   if (!StartsWithPattern(header, content_disposition_pattern()))
    574     return true;  // Skip headers that don't describe the content-disposition.
    575 
    576   re2::StringPiece groups[2];
    577 
    578   if (!name_pattern().Match(header,
    579                             kContentDispositionLength, header.size(),
    580                             RE2::UNANCHORED, groups, 2)) {
    581     state_ = STATE_ERROR;
    582     return true;  // See (*) for why true.
    583   }
    584   name->set(groups[1].data(), groups[1].size());
    585 
    586   if (value_pattern().Match(header,
    587                             kContentDispositionLength, header.size(),
    588                             RE2::UNANCHORED, groups, 2)) {
    589     value->set(groups[1].data(), groups[1].size());
    590     *value_assigned = true;
    591   }
    592   return true;
    593 }
    594 
    595 }  // namespace extensions
    596