Home | History | Annotate | Download | only in json
      1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/json/string_escape.h"
      6 
      7 #include <stddef.h>
      8 #include <stdint.h>
      9 
     10 #include <limits>
     11 #include <string>
     12 
     13 #include "base/strings/string_util.h"
     14 #include "base/strings/stringprintf.h"
     15 #include "base/strings/utf_string_conversion_utils.h"
     16 #include "base/strings/utf_string_conversions.h"
     17 #include "base/third_party/icu/icu_utf.h"
     18 
     19 namespace base {
     20 
     21 namespace {
     22 
     23 // Format string for printing a \uXXXX escape sequence.
     24 const char kU16EscapeFormat[] = "\\u%04X";
     25 
     26 // The code point to output for an invalid input code unit.
     27 const uint32_t kReplacementCodePoint = 0xFFFD;
     28 
     29 // Used below in EscapeSpecialCodePoint().
     30 static_assert('<' == 0x3C, "less than sign must be 0x3c");
     31 
     32 // Try to escape the |code_point| if it is a known special character. If
     33 // successful, returns true and appends the escape sequence to |dest|. This
     34 // isn't required by the spec, but it's more readable by humans.
     35 bool EscapeSpecialCodePoint(uint32_t code_point, std::string* dest) {
     36   // WARNING: if you add a new case here, you need to update the reader as well.
     37   // Note: \v is in the reader, but not here since the JSON spec doesn't
     38   // allow it.
     39   switch (code_point) {
     40     case '\b':
     41       dest->append("\\b");
     42       break;
     43     case '\f':
     44       dest->append("\\f");
     45       break;
     46     case '\n':
     47       dest->append("\\n");
     48       break;
     49     case '\r':
     50       dest->append("\\r");
     51       break;
     52     case '\t':
     53       dest->append("\\t");
     54       break;
     55     case '\\':
     56       dest->append("\\\\");
     57       break;
     58     case '"':
     59       dest->append("\\\"");
     60       break;
     61     // Escape < to prevent script execution; escaping > is not necessary and
     62     // not doing so save a few bytes.
     63     case '<':
     64       dest->append("\\u003C");
     65       break;
     66     // Escape the "Line Separator" and "Paragraph Separator" characters, since
     67     // they should be treated like a new line \r or \n.
     68     case 0x2028:
     69       dest->append("\\u2028");
     70       break;
     71     case 0x2029:
     72       dest->append("\\u2029");
     73       break;
     74     default:
     75       return false;
     76   }
     77   return true;
     78 }
     79 
     80 template <typename S>
     81 bool EscapeJSONStringImpl(const S& str, bool put_in_quotes, std::string* dest) {
     82   bool did_replacement = false;
     83 
     84   if (put_in_quotes)
     85     dest->push_back('"');
     86 
     87   // Casting is necessary because ICU uses int32_t. Try and do so safely.
     88   CHECK_LE(str.length(),
     89            static_cast<size_t>(std::numeric_limits<int32_t>::max()));
     90   const int32_t length = static_cast<int32_t>(str.length());
     91 
     92   for (int32_t i = 0; i < length; ++i) {
     93     uint32_t code_point;
     94     if (!ReadUnicodeCharacter(str.data(), length, &i, &code_point)) {
     95       code_point = kReplacementCodePoint;
     96       did_replacement = true;
     97     }
     98 
     99     if (EscapeSpecialCodePoint(code_point, dest))
    100       continue;
    101 
    102     // Escape non-printing characters.
    103     if (code_point < 32)
    104       base::StringAppendF(dest, kU16EscapeFormat, code_point);
    105     else
    106       WriteUnicodeCharacter(code_point, dest);
    107   }
    108 
    109   if (put_in_quotes)
    110     dest->push_back('"');
    111 
    112   return !did_replacement;
    113 }
    114 
    115 }  // namespace
    116 
    117 bool EscapeJSONString(const StringPiece& str,
    118                       bool put_in_quotes,
    119                       std::string* dest) {
    120   return EscapeJSONStringImpl(str, put_in_quotes, dest);
    121 }
    122 
    123 bool EscapeJSONString(const StringPiece16& str,
    124                       bool put_in_quotes,
    125                       std::string* dest) {
    126   return EscapeJSONStringImpl(str, put_in_quotes, dest);
    127 }
    128 
    129 std::string GetQuotedJSONString(const StringPiece& str) {
    130   std::string dest;
    131   bool ok = EscapeJSONStringImpl(str, true, &dest);
    132   DCHECK(ok);
    133   return dest;
    134 }
    135 
    136 std::string GetQuotedJSONString(const StringPiece16& str) {
    137   std::string dest;
    138   bool ok = EscapeJSONStringImpl(str, true, &dest);
    139   DCHECK(ok);
    140   return dest;
    141 }
    142 
    143 std::string EscapeBytesAsInvalidJSONString(const StringPiece& str,
    144                                            bool put_in_quotes) {
    145   std::string dest;
    146 
    147   if (put_in_quotes)
    148     dest.push_back('"');
    149 
    150   for (StringPiece::const_iterator it = str.begin(); it != str.end(); ++it) {
    151     unsigned char c = *it;
    152     if (EscapeSpecialCodePoint(c, &dest))
    153       continue;
    154 
    155     if (c < 32 || c > 126)
    156       base::StringAppendF(&dest, kU16EscapeFormat, c);
    157     else
    158       dest.push_back(*it);
    159   }
    160 
    161   if (put_in_quotes)
    162     dest.push_back('"');
    163 
    164   return dest;
    165 }
    166 
    167 }  // namespace base
    168