1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/json/string_escape.h" 6 7 #include <string> 8 9 #include "base/strings/string_util.h" 10 #include "base/strings/stringprintf.h" 11 #include "base/strings/utf_string_conversion_utils.h" 12 #include "base/strings/utf_string_conversions.h" 13 #include "base/third_party/icu/icu_utf.h" 14 15 namespace base { 16 17 namespace { 18 19 // Format string for printing a \uXXXX escape sequence. 20 const char kU16EscapeFormat[] = "\\u%04X"; 21 22 // The code point to output for an invalid input code unit. 23 const uint32 kReplacementCodePoint = 0xFFFD; 24 25 // Used below in EscapeSpecialCodePoint(). 26 COMPILE_ASSERT('<' == 0x3C, less_than_sign_is_0x3c); 27 28 // Try to escape the |code_point| if it is a known special character. If 29 // successful, returns true and appends the escape sequence to |dest|. This 30 // isn't required by the spec, but it's more readable by humans. 31 bool EscapeSpecialCodePoint(uint32 code_point, std::string* dest) { 32 // WARNING: if you add a new case here, you need to update the reader as well. 33 // Note: \v is in the reader, but not here since the JSON spec doesn't 34 // allow it. 35 switch (code_point) { 36 case '\b': 37 dest->append("\\b"); 38 break; 39 case '\f': 40 dest->append("\\f"); 41 break; 42 case '\n': 43 dest->append("\\n"); 44 break; 45 case '\r': 46 dest->append("\\r"); 47 break; 48 case '\t': 49 dest->append("\\t"); 50 break; 51 case '\\': 52 dest->append("\\\\"); 53 break; 54 case '"': 55 dest->append("\\\""); 56 break; 57 // Escape < to prevent script execution; escaping > is not necessary and 58 // not doing so save a few bytes. 59 case '<': 60 dest->append("\\u003C"); 61 break; 62 default: 63 return false; 64 } 65 return true; 66 } 67 68 template <typename S> 69 bool EscapeJSONStringImpl(const S& str, bool put_in_quotes, std::string* dest) { 70 bool did_replacement = false; 71 72 if (put_in_quotes) 73 dest->push_back('"'); 74 75 // Casting is necessary because ICU uses int32. Try and do so safely. 76 CHECK_LE(str.length(), static_cast<size_t>(kint32max)); 77 const int32 length = static_cast<int32>(str.length()); 78 79 for (int32 i = 0; i < length; ++i) { 80 uint32 code_point; 81 if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point)) { 82 code_point = kReplacementCodePoint; 83 did_replacement = true; 84 } 85 86 if (EscapeSpecialCodePoint(code_point, dest)) 87 continue; 88 89 // Escape non-printing characters. 90 if (code_point < 32) 91 base::StringAppendF(dest, kU16EscapeFormat, code_point); 92 else 93 WriteUnicodeCharacter(code_point, dest); 94 } 95 96 if (put_in_quotes) 97 dest->push_back('"'); 98 99 return !did_replacement; 100 } 101 102 } // namespace 103 104 bool EscapeJSONString(const StringPiece& str, 105 bool put_in_quotes, 106 std::string* dest) { 107 return EscapeJSONStringImpl(str, put_in_quotes, dest); 108 } 109 110 bool EscapeJSONString(const StringPiece16& str, 111 bool put_in_quotes, 112 std::string* dest) { 113 return EscapeJSONStringImpl(str, put_in_quotes, dest); 114 } 115 116 std::string GetQuotedJSONString(const StringPiece& str) { 117 std::string dest; 118 bool ok = EscapeJSONStringImpl(str, true, &dest); 119 DCHECK(ok); 120 return dest; 121 } 122 123 std::string GetQuotedJSONString(const StringPiece16& str) { 124 std::string dest; 125 bool ok = EscapeJSONStringImpl(str, true, &dest); 126 DCHECK(ok); 127 return dest; 128 } 129 130 std::string EscapeBytesAsInvalidJSONString(const StringPiece& str, 131 bool put_in_quotes) { 132 std::string dest; 133 134 if (put_in_quotes) 135 dest.push_back('"'); 136 137 for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) { 138 ToUnsigned<StringPiece::value_type>::Unsigned c = *it; 139 if (EscapeSpecialCodePoint(c, &dest)) 140 continue; 141 142 if (c < 32 || c > 126) 143 base::StringAppendF(&dest, kU16EscapeFormat, c); 144 else 145 dest.push_back(*it); 146 } 147 148 if (put_in_quotes) 149 dest.push_back('"'); 150 151 return dest; 152 } 153 154 } // namespace base 155