1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "content/common/page_state_serialization.h" 6 7 #include <algorithm> 8 #include <limits> 9 10 #include "base/pickle.h" 11 #include "base/strings/string_number_conversions.h" 12 #include "base/strings/string_util.h" 13 #include "base/strings/utf_string_conversions.h" 14 #include "ui/gfx/screen.h" 15 16 namespace content { 17 namespace { 18 19 #if defined(OS_ANDROID) 20 float g_device_scale_factor_for_testing = 0.0; 21 #endif 22 23 //----------------------------------------------------------------------------- 24 25 void AppendDataToHttpBody(ExplodedHttpBody* http_body, const char* data, 26 int data_length) { 27 ExplodedHttpBodyElement element; 28 element.type = WebKit::WebHTTPBody::Element::TypeData; 29 element.data.assign(data, data_length); 30 http_body->elements.push_back(element); 31 } 32 33 void AppendFileRangeToHttpBody(ExplodedHttpBody* http_body, 34 const base::NullableString16& file_path, 35 int file_start, 36 int file_length, 37 double file_modification_time) { 38 ExplodedHttpBodyElement element; 39 element.type = WebKit::WebHTTPBody::Element::TypeFile; 40 element.file_path = file_path; 41 element.file_start = file_start; 42 element.file_length = file_length; 43 element.file_modification_time = file_modification_time; 44 http_body->elements.push_back(element); 45 } 46 47 void AppendURLRangeToHttpBody(ExplodedHttpBody* http_body, 48 const GURL& url, 49 int file_start, 50 int file_length, 51 double file_modification_time) { 52 ExplodedHttpBodyElement element; 53 element.type = WebKit::WebHTTPBody::Element::TypeURL; 54 element.url = url; 55 element.file_start = file_start; 56 element.file_length = file_length; 57 element.file_modification_time = file_modification_time; 58 http_body->elements.push_back(element); 59 } 60 61 void AppendBlobToHttpBody(ExplodedHttpBody* http_body, const GURL& url) { 62 ExplodedHttpBodyElement element; 63 element.type = WebKit::WebHTTPBody::Element::TypeBlob; 64 element.url = url; 65 http_body->elements.push_back(element); 66 } 67 68 //---------------------------------------------------------------------------- 69 70 void AppendReferencedFilesFromHttpBody( 71 const std::vector<ExplodedHttpBodyElement>& elements, 72 std::vector<base::NullableString16>* referenced_files) { 73 for (size_t i = 0; i < elements.size(); ++i) { 74 if (elements[i].type == WebKit::WebHTTPBody::Element::TypeFile) 75 referenced_files->push_back(elements[i].file_path); 76 } 77 } 78 79 bool AppendReferencedFilesFromDocumentState( 80 const std::vector<base::NullableString16>& document_state, 81 std::vector<base::NullableString16>* referenced_files) { 82 if (document_state.empty()) 83 return true; 84 85 // This algorithm is adapted from Blink's core/html/FormController.cpp code. 86 // We only care about how that code worked when this code snapshot was taken 87 // as this code is only needed for backwards compat. 88 // 89 // For reference, see FormController::formStatesFromStateVector at: 90 // http://src.chromium.org/viewvc/blink/trunk/Source/core/html/FormController.cpp?pathrev=152274 91 92 size_t index = 0; 93 94 if (document_state.size() < 3) 95 return false; 96 97 index++; // Skip over magic signature. 98 index++; // Skip over form key. 99 100 size_t item_count; 101 if (!base::StringToSizeT(document_state[index++].string(), &item_count)) 102 return false; 103 104 while (item_count--) { 105 if (index + 1 >= document_state.size()) 106 return false; 107 108 index++; // Skip over name. 109 const base::NullableString16& type = document_state[index++]; 110 111 if (index >= document_state.size()) 112 return false; 113 114 size_t value_size; 115 if (!base::StringToSizeT(document_state[index++].string(), &value_size)) 116 return false; 117 118 if (index + value_size > document_state.size() || 119 index + value_size < index) // Check for overflow. 120 return false; 121 122 if (EqualsASCII(type.string(), "file")) { 123 if (value_size != 2) 124 return false; 125 126 referenced_files->push_back(document_state[index++]); 127 index++; // Skip over display name. 128 } else { 129 index += value_size; 130 } 131 } 132 133 return true; 134 } 135 136 bool RecursivelyAppendReferencedFiles( 137 const ExplodedFrameState& frame_state, 138 std::vector<base::NullableString16>* referenced_files) { 139 if (!frame_state.http_body.is_null) { 140 AppendReferencedFilesFromHttpBody(frame_state.http_body.elements, 141 referenced_files); 142 } 143 144 if (!AppendReferencedFilesFromDocumentState(frame_state.document_state, 145 referenced_files)) 146 return false; 147 148 for (size_t i = 0; i < frame_state.children.size(); ++i) { 149 if (!RecursivelyAppendReferencedFiles(frame_state.children[i], 150 referenced_files)) 151 return false; 152 } 153 154 return true; 155 } 156 157 //---------------------------------------------------------------------------- 158 159 struct SerializeObject { 160 SerializeObject() 161 : version(0), 162 parse_error(false) { 163 } 164 165 SerializeObject(const char* data, int len) 166 : pickle(data, len), 167 version(0), 168 parse_error(false) { 169 iter = PickleIterator(pickle); 170 } 171 172 std::string GetAsString() { 173 return std::string(static_cast<const char*>(pickle.data()), pickle.size()); 174 } 175 176 Pickle pickle; 177 PickleIterator iter; 178 int version; 179 bool parse_error; 180 }; 181 182 // Version ID of serialized format. 183 // 11: Min version 184 // 12: Adds support for contains_passwords in HTTP body 185 // 13: Adds support for URL (FileSystem URL) 186 // 14: Adds list of referenced files, version written only for first item. 187 // 188 // NOTE: If the version is -1, then the pickle contains only a URL string. 189 // See ReadPageState. 190 // 191 const int kMinVersion = 11; 192 const int kCurrentVersion = 14; 193 194 // A bunch of convenience functions to read/write to SerializeObjects. The 195 // de-serializers assume the input data will be in the correct format and fall 196 // back to returning safe defaults when not. 197 198 void WriteData(const void* data, int length, SerializeObject* obj) { 199 obj->pickle.WriteData(static_cast<const char*>(data), length); 200 } 201 202 void ReadData(SerializeObject* obj, const void** data, int* length) { 203 const char* tmp; 204 if (obj->pickle.ReadData(&obj->iter, &tmp, length)) { 205 *data = tmp; 206 } else { 207 obj->parse_error = true; 208 *data = NULL; 209 *length = 0; 210 } 211 } 212 213 void WriteInteger(int data, SerializeObject* obj) { 214 obj->pickle.WriteInt(data); 215 } 216 217 int ReadInteger(SerializeObject* obj) { 218 int tmp; 219 if (obj->pickle.ReadInt(&obj->iter, &tmp)) 220 return tmp; 221 obj->parse_error = true; 222 return 0; 223 } 224 225 void ConsumeInteger(SerializeObject* obj) { 226 int unused ALLOW_UNUSED = ReadInteger(obj); 227 } 228 229 void WriteInteger64(int64 data, SerializeObject* obj) { 230 obj->pickle.WriteInt64(data); 231 } 232 233 int64 ReadInteger64(SerializeObject* obj) { 234 int64 tmp = 0; 235 if (obj->pickle.ReadInt64(&obj->iter, &tmp)) 236 return tmp; 237 obj->parse_error = true; 238 return 0; 239 } 240 241 void WriteReal(double data, SerializeObject* obj) { 242 WriteData(&data, sizeof(double), obj); 243 } 244 245 double ReadReal(SerializeObject* obj) { 246 const void* tmp = NULL; 247 int length = 0; 248 double value = 0.0; 249 ReadData(obj, &tmp, &length); 250 if (length == static_cast<int>(sizeof(double))) { 251 // Use memcpy, as tmp may not be correctly aligned. 252 memcpy(&value, tmp, sizeof(double)); 253 } else { 254 obj->parse_error = true; 255 } 256 return value; 257 } 258 259 void WriteBoolean(bool data, SerializeObject* obj) { 260 obj->pickle.WriteInt(data ? 1 : 0); 261 } 262 263 bool ReadBoolean(SerializeObject* obj) { 264 bool tmp; 265 if (obj->pickle.ReadBool(&obj->iter, &tmp)) 266 return tmp; 267 obj->parse_error = true; 268 return false; 269 } 270 271 void WriteGURL(const GURL& url, SerializeObject* obj) { 272 obj->pickle.WriteString(url.possibly_invalid_spec()); 273 } 274 275 GURL ReadGURL(SerializeObject* obj) { 276 std::string spec; 277 if (obj->pickle.ReadString(&obj->iter, &spec)) 278 return GURL(spec); 279 obj->parse_error = true; 280 return GURL(); 281 } 282 283 // WriteString pickles the NullableString16 as <int length><char16* data>. 284 // If length == -1, then the NullableString16 itself is null. Otherwise the 285 // length is the number of char16 (not bytes) in the NullableString16. 286 void WriteString(const base::NullableString16& str, SerializeObject* obj) { 287 if (str.is_null()) { 288 obj->pickle.WriteInt(-1); 289 } else { 290 const char16* data = str.string().data(); 291 size_t length_in_bytes = str.string().length() * sizeof(char16); 292 293 CHECK_LT(length_in_bytes, 294 static_cast<size_t>(std::numeric_limits<int>::max())); 295 obj->pickle.WriteInt(length_in_bytes); 296 obj->pickle.WriteBytes(data, length_in_bytes); 297 } 298 } 299 300 // This reads a serialized NullableString16 from obj. If a string can't be 301 // read, NULL is returned. 302 const char16* ReadStringNoCopy(SerializeObject* obj, int* num_chars) { 303 int length_in_bytes; 304 if (!obj->pickle.ReadInt(&obj->iter, &length_in_bytes)) { 305 obj->parse_error = true; 306 return NULL; 307 } 308 309 if (length_in_bytes < 0) 310 return NULL; 311 312 const char* data; 313 if (!obj->pickle.ReadBytes(&obj->iter, &data, length_in_bytes)) { 314 obj->parse_error = true; 315 return NULL; 316 } 317 318 if (num_chars) 319 *num_chars = length_in_bytes / sizeof(char16); 320 return reinterpret_cast<const char16*>(data); 321 } 322 323 base::NullableString16 ReadString(SerializeObject* obj) { 324 int num_chars; 325 const char16* chars = ReadStringNoCopy(obj, &num_chars); 326 return chars ? 327 base::NullableString16(base::string16(chars, num_chars), false) : 328 base::NullableString16(); 329 } 330 331 void ConsumeString(SerializeObject* obj) { 332 const char16* unused ALLOW_UNUSED = ReadStringNoCopy(obj, NULL); 333 } 334 335 template <typename T> 336 void WriteAndValidateVectorSize(const std::vector<T>& v, SerializeObject* obj) { 337 CHECK_LT(v.size(), std::numeric_limits<int>::max() / sizeof(T)); 338 WriteInteger(static_cast<int>(v.size()), obj); 339 } 340 341 size_t ReadAndValidateVectorSize(SerializeObject* obj, size_t element_size) { 342 size_t num_elements = static_cast<size_t>(ReadInteger(obj)); 343 344 // Ensure that resizing a vector to size num_elements makes sense. 345 if (std::numeric_limits<int>::max() / element_size <= num_elements) { 346 obj->parse_error = true; 347 return 0; 348 } 349 350 // Ensure that it is plausible for the pickle to contain num_elements worth 351 // of data. 352 if (obj->pickle.payload_size() <= num_elements) { 353 obj->parse_error = true; 354 return 0; 355 } 356 357 return num_elements; 358 } 359 360 // Writes a Vector of strings into a SerializeObject for serialization. 361 void WriteStringVector( 362 const std::vector<base::NullableString16>& data, SerializeObject* obj) { 363 WriteAndValidateVectorSize(data, obj); 364 for (size_t i = 0; i < data.size(); ++i) { 365 WriteString(data[i], obj); 366 } 367 } 368 369 void ReadStringVector(SerializeObject* obj, 370 std::vector<base::NullableString16>* result) { 371 size_t num_elements = 372 ReadAndValidateVectorSize(obj, sizeof(base::NullableString16)); 373 374 result->resize(num_elements); 375 for (size_t i = 0; i < num_elements; ++i) 376 (*result)[i] = ReadString(obj); 377 } 378 379 // Writes an ExplodedHttpBody object into a SerializeObject for serialization. 380 void WriteHttpBody(const ExplodedHttpBody& http_body, SerializeObject* obj) { 381 WriteBoolean(!http_body.is_null, obj); 382 383 if (http_body.is_null) 384 return; 385 386 WriteAndValidateVectorSize(http_body.elements, obj); 387 for (size_t i = 0; i < http_body.elements.size(); ++i) { 388 const ExplodedHttpBodyElement& element = http_body.elements[i]; 389 WriteInteger(element.type, obj); 390 if (element.type == WebKit::WebHTTPBody::Element::TypeData) { 391 WriteData(element.data.data(), static_cast<int>(element.data.size()), 392 obj); 393 } else if (element.type == WebKit::WebHTTPBody::Element::TypeFile) { 394 WriteString(element.file_path, obj); 395 WriteInteger64(element.file_start, obj); 396 WriteInteger64(element.file_length, obj); 397 WriteReal(element.file_modification_time, obj); 398 } else if (element.type == WebKit::WebHTTPBody::Element::TypeURL) { 399 WriteGURL(element.url, obj); 400 WriteInteger64(element.file_start, obj); 401 WriteInteger64(element.file_length, obj); 402 WriteReal(element.file_modification_time, obj); 403 } else { 404 WriteGURL(element.url, obj); 405 } 406 } 407 WriteInteger64(http_body.identifier, obj); 408 WriteBoolean(http_body.contains_passwords, obj); 409 } 410 411 void ReadHttpBody(SerializeObject* obj, ExplodedHttpBody* http_body) { 412 // An initial boolean indicates if we have an HTTP body. 413 if (!ReadBoolean(obj)) 414 return; 415 http_body->is_null = false; 416 417 int num_elements = ReadInteger(obj); 418 419 for (int i = 0; i < num_elements; ++i) { 420 int type = ReadInteger(obj); 421 if (type == WebKit::WebHTTPBody::Element::TypeData) { 422 const void* data; 423 int length = -1; 424 ReadData(obj, &data, &length); 425 if (length >= 0) { 426 AppendDataToHttpBody(http_body, static_cast<const char*>(data), 427 length); 428 } 429 } else if (type == WebKit::WebHTTPBody::Element::TypeFile) { 430 base::NullableString16 file_path = ReadString(obj); 431 int64 file_start = ReadInteger64(obj); 432 int64 file_length = ReadInteger64(obj); 433 double file_modification_time = ReadReal(obj); 434 AppendFileRangeToHttpBody(http_body, file_path, file_start, file_length, 435 file_modification_time); 436 } else if (type == WebKit::WebHTTPBody::Element::TypeURL) { 437 GURL url = ReadGURL(obj); 438 int64 file_start = ReadInteger64(obj); 439 int64 file_length = ReadInteger64(obj); 440 double file_modification_time = ReadReal(obj); 441 AppendURLRangeToHttpBody(http_body, url, file_start, file_length, 442 file_modification_time); 443 } else if (type == WebKit::WebHTTPBody::Element::TypeBlob) { 444 GURL blob_url = ReadGURL(obj); 445 AppendBlobToHttpBody(http_body, blob_url); 446 } 447 } 448 http_body->identifier = ReadInteger64(obj); 449 450 if (obj->version >= 12) 451 http_body->contains_passwords = ReadBoolean(obj); 452 } 453 454 // Writes the ExplodedFrameState data into the SerializeObject object for 455 // serialization. 456 void WriteFrameState( 457 const ExplodedFrameState& state, SerializeObject* obj, bool is_top) { 458 // WARNING: This data may be persisted for later use. As such, care must be 459 // taken when changing the serialized format. If a new field needs to be 460 // written, only adding at the end will make it easier to deal with loading 461 // older versions. Similarly, this should NOT save fields with sensitive 462 // data, such as password fields. 463 464 WriteString(state.url_string, obj); 465 WriteString(state.original_url_string, obj); 466 WriteString(state.target, obj); 467 WriteString(state.parent, obj); 468 WriteString(state.title, obj); 469 WriteString(state.alternate_title, obj); 470 WriteReal(state.visited_time, obj); 471 WriteInteger(state.scroll_offset.x(), obj); 472 WriteInteger(state.scroll_offset.y(), obj); 473 WriteBoolean(state.is_target_item, obj); 474 WriteInteger(state.visit_count, obj); 475 WriteString(state.referrer, obj); 476 477 WriteStringVector(state.document_state, obj); 478 479 WriteReal(state.page_scale_factor, obj); 480 WriteInteger64(state.item_sequence_number, obj); 481 WriteInteger64(state.document_sequence_number, obj); 482 483 bool has_state_object = !state.state_object.is_null(); 484 WriteBoolean(has_state_object, obj); 485 if (has_state_object) 486 WriteString(state.state_object, obj); 487 488 WriteHttpBody(state.http_body, obj); 489 490 // NOTE: It is a quirk of the format that we still have to write the 491 // http_content_type field when the HTTP body is null. That's why this code 492 // is here instead of inside WriteHttpBody. 493 WriteString(state.http_body.http_content_type, obj); 494 495 // Subitems 496 const std::vector<ExplodedFrameState>& children = state.children; 497 WriteAndValidateVectorSize(children, obj); 498 for (size_t i = 0; i < children.size(); ++i) 499 WriteFrameState(children[i], obj, false); 500 } 501 502 void ReadFrameState(SerializeObject* obj, bool is_top, 503 ExplodedFrameState* state) { 504 if (obj->version < 14 && !is_top) 505 ConsumeInteger(obj); // Skip over redundant version field. 506 507 state->url_string = ReadString(obj); 508 state->original_url_string = ReadString(obj); 509 state->target = ReadString(obj); 510 state->parent = ReadString(obj); 511 state->title = ReadString(obj); 512 state->alternate_title = ReadString(obj); 513 state->visited_time = ReadReal(obj); 514 515 int x = ReadInteger(obj); 516 int y = ReadInteger(obj); 517 state->scroll_offset = gfx::Point(x, y); 518 519 state->is_target_item = ReadBoolean(obj); 520 state->visit_count = ReadInteger(obj); 521 state->referrer = ReadString(obj); 522 523 ReadStringVector(obj, &state->document_state); 524 525 state->page_scale_factor = ReadReal(obj); 526 state->item_sequence_number = ReadInteger64(obj); 527 state->document_sequence_number = ReadInteger64(obj); 528 529 bool has_state_object = ReadBoolean(obj); 530 if (has_state_object) 531 state->state_object = ReadString(obj); 532 533 ReadHttpBody(obj, &state->http_body); 534 535 // NOTE: It is a quirk of the format that we still have to read the 536 // http_content_type field when the HTTP body is null. That's why this code 537 // is here instead of inside ReadHttpBody. 538 state->http_body.http_content_type = ReadString(obj); 539 540 if (obj->version < 14) 541 ConsumeString(obj); // Skip unused referrer string. 542 543 #if defined(OS_ANDROID) 544 if (obj->version == 11) { 545 // Now-unused values that shipped in this version of Chrome for Android when 546 // it was on a private branch. 547 ReadReal(obj); 548 ReadBoolean(obj); 549 550 // In this version, page_scale_factor included device_scale_factor and 551 // scroll offsets were premultiplied by pageScaleFactor. 552 if (state->page_scale_factor) { 553 float device_scale_factor = g_device_scale_factor_for_testing; 554 if (!device_scale_factor) { 555 device_scale_factor = 556 gfx::Screen::GetNativeScreen()->GetPrimaryDisplay(). 557 device_scale_factor(); 558 } 559 state->scroll_offset = 560 gfx::Point(state->scroll_offset.x() / state->page_scale_factor, 561 state->scroll_offset.y() / state->page_scale_factor); 562 state->page_scale_factor /= device_scale_factor; 563 } 564 } 565 #endif 566 567 // Subitems 568 size_t num_children = 569 ReadAndValidateVectorSize(obj, sizeof(ExplodedFrameState)); 570 state->children.resize(num_children); 571 for (size_t i = 0; i < num_children; ++i) 572 ReadFrameState(obj, false, &state->children[i]); 573 } 574 575 void WritePageState(const ExplodedPageState& state, SerializeObject* obj) { 576 WriteInteger(obj->version, obj); 577 WriteStringVector(state.referenced_files, obj); 578 WriteFrameState(state.top, obj, true); 579 } 580 581 void ReadPageState(SerializeObject* obj, ExplodedPageState* state) { 582 obj->version = ReadInteger(obj); 583 584 if (obj->version == -1) { 585 GURL url = ReadGURL(obj); 586 // NOTE: GURL::possibly_invalid_spec() always returns valid UTF-8. 587 state->top.url_string = state->top.original_url_string = 588 base::NullableString16(UTF8ToUTF16(url.possibly_invalid_spec()), false); 589 return; 590 } 591 592 if (obj->version > kCurrentVersion || obj->version < kMinVersion) { 593 obj->parse_error = true; 594 return; 595 } 596 597 if (obj->version >= 14) 598 ReadStringVector(obj, &state->referenced_files); 599 600 ReadFrameState(obj, true, &state->top); 601 602 if (obj->version < 14) 603 RecursivelyAppendReferencedFiles(state->top, &state->referenced_files); 604 605 // De-dupe 606 state->referenced_files.erase( 607 std::unique(state->referenced_files.begin(), 608 state->referenced_files.end()), 609 state->referenced_files.end()); 610 } 611 612 } // namespace 613 614 ExplodedHttpBodyElement::ExplodedHttpBodyElement() 615 : type(WebKit::WebHTTPBody::Element::TypeData), 616 file_start(0), 617 file_length(-1), 618 file_modification_time(std::numeric_limits<double>::quiet_NaN()) { 619 } 620 621 ExplodedHttpBodyElement::~ExplodedHttpBodyElement() { 622 } 623 624 ExplodedHttpBody::ExplodedHttpBody() 625 : identifier(0), 626 contains_passwords(false), 627 is_null(true) { 628 } 629 630 ExplodedHttpBody::~ExplodedHttpBody() { 631 } 632 633 ExplodedFrameState::ExplodedFrameState() 634 : item_sequence_number(0), 635 document_sequence_number(0), 636 visit_count(0), 637 visited_time(0.0), 638 page_scale_factor(0.0), 639 is_target_item(false) { 640 } 641 642 ExplodedFrameState::~ExplodedFrameState() { 643 } 644 645 ExplodedPageState::ExplodedPageState() { 646 } 647 648 ExplodedPageState::~ExplodedPageState() { 649 } 650 651 bool DecodePageState(const std::string& encoded, ExplodedPageState* exploded) { 652 *exploded = ExplodedPageState(); 653 654 if (encoded.empty()) 655 return true; 656 657 SerializeObject obj(encoded.data(), static_cast<int>(encoded.size())); 658 ReadPageState(&obj, exploded); 659 return !obj.parse_error; 660 } 661 662 bool EncodePageState(const ExplodedPageState& exploded, std::string* encoded) { 663 SerializeObject obj; 664 obj.version = kCurrentVersion; 665 WritePageState(exploded, &obj); 666 *encoded = obj.GetAsString(); 667 return true; 668 } 669 670 #if defined(OS_ANDROID) 671 bool DecodePageStateWithDeviceScaleFactorForTesting( 672 const std::string& encoded, 673 float device_scale_factor, 674 ExplodedPageState* exploded) { 675 g_device_scale_factor_for_testing = device_scale_factor; 676 bool rv = DecodePageState(encoded, exploded); 677 g_device_scale_factor_for_testing = 0.0; 678 return rv; 679 } 680 #endif 681 682 } // namespace content 683