1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/json/string_escape.h" 6 7 #include <stddef.h> 8 #include <stdint.h> 9 10 #include <limits> 11 #include <string> 12 13 #include "base/logging.h" 14 #include "base/strings/string_util.h" 15 #include "base/strings/stringprintf.h" 16 #include "base/strings/utf_string_conversion_utils.h" 17 #include "base/third_party/icu/icu_utf.h" 18 19 namespace base { 20 21 namespace { 22 23 // Format string for printing a \uXXXX escape sequence. 24 const char kU16EscapeFormat[] = "\\u%04X"; 25 26 // The code point to output for an invalid input code unit. 27 const uint32_t kReplacementCodePoint = 0xFFFD; 28 29 // Used below in EscapeSpecialCodePoint(). 30 static_assert('<' == 0x3C, "less than sign must be 0x3c"); 31 32 // Try to escape the |code_point| if it is a known special character. If 33 // successful, returns true and appends the escape sequence to |dest|. This 34 // isn't required by the spec, but it's more readable by humans. 35 bool EscapeSpecialCodePoint(uint32_t code_point, std::string* dest) { 36 // WARNING: if you add a new case here, you need to update the reader as well. 37 // Note: \v is in the reader, but not here since the JSON spec doesn't 38 // allow it. 39 switch (code_point) { 40 case '\b': 41 dest->append("\\b"); 42 break; 43 case '\f': 44 dest->append("\\f"); 45 break; 46 case '\n': 47 dest->append("\\n"); 48 break; 49 case '\r': 50 dest->append("\\r"); 51 break; 52 case '\t': 53 dest->append("\\t"); 54 break; 55 case '\\': 56 dest->append("\\\\"); 57 break; 58 case '"': 59 dest->append("\\\""); 60 break; 61 // Escape < to prevent script execution; escaping > is not necessary and 62 // not doing so save a few bytes. 63 case '<': 64 dest->append("\\u003C"); 65 break; 66 // Escape the "Line Separator" and "Paragraph Separator" characters, since 67 // they should be treated like a new line \r or \n. 68 case 0x2028: 69 dest->append("\\u2028"); 70 break; 71 case 0x2029: 72 dest->append("\\u2029"); 73 break; 74 default: 75 return false; 76 } 77 return true; 78 } 79 80 template <typename S> 81 bool EscapeJSONStringImpl(const S& str, bool put_in_quotes, std::string* dest) { 82 bool did_replacement = false; 83 84 if (put_in_quotes) 85 dest->push_back('"'); 86 87 // Casting is necessary because ICU uses int32_t. Try and do so safely. 88 CHECK_LE(str.length(), 89 static_cast<size_t>(std::numeric_limits<int32_t>::max())); 90 const int32_t length = static_cast<int32_t>(str.length()); 91 92 for (int32_t i = 0; i < length; ++i) { 93 uint32_t code_point; 94 if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point)) { 95 code_point = kReplacementCodePoint; 96 did_replacement = true; 97 } 98 99 if (EscapeSpecialCodePoint(code_point, dest)) 100 continue; 101 102 // Escape non-printing characters. 103 if (code_point < 32) 104 base::StringAppendF(dest, kU16EscapeFormat, code_point); 105 else 106 WriteUnicodeCharacter(code_point, dest); 107 } 108 109 if (put_in_quotes) 110 dest->push_back('"'); 111 112 return !did_replacement; 113 } 114 115 } // namespace 116 117 bool EscapeJSONString(const StringPiece& str, 118 bool put_in_quotes, 119 std::string* dest) { 120 return EscapeJSONStringImpl(str, put_in_quotes, dest); 121 } 122 123 std::string GetQuotedJSONString(const StringPiece& str) { 124 std::string dest; 125 bool ok = EscapeJSONStringImpl(str, true, &dest); 126 DCHECK(ok); 127 return dest; 128 } 129 130 std::string EscapeBytesAsInvalidJSONString(const StringPiece& str, 131 bool put_in_quotes) { 132 std::string dest; 133 134 if (put_in_quotes) 135 dest.push_back('"'); 136 137 for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) { 138 unsigned char c = *it; 139 if (EscapeSpecialCodePoint(c, &dest)) 140 continue; 141 142 if (c < 32 || c > 126) 143 base::StringAppendF(&dest, kU16EscapeFormat, c); 144 else 145 dest.push_back(*it); 146 } 147 148 if (put_in_quotes) 149 dest.push_back('"'); 150 151 return dest; 152 } 153 154 } // namespace base 155