Home | History | Annotate | Download | only in protobuf
      1 // Protocol Buffers - Google's data interchange format
      2 // Copyright 2008 Google Inc.  All rights reserved.
      3 // https://developers.google.com/protocol-buffers/
      4 //
      5 // Redistribution and use in source and binary forms, with or without
      6 // modification, are permitted provided that the following conditions are
      7 // met:
      8 //
      9 //     * Redistributions of source code must retain the above copyright
     10 // notice, this list of conditions and the following disclaimer.
     11 //     * Redistributions in binary form must reproduce the above
     12 // copyright notice, this list of conditions and the following disclaimer
     13 // in the documentation and/or other materials provided with the
     14 // distribution.
     15 //     * Neither the name of Google Inc. nor the names of its
     16 // contributors may be used to endorse or promote products derived from
     17 // this software without specific prior written permission.
     18 //
     19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 package com.google.protobuf;
     32 
     33 /**
     34  * Provide text format escaping support for proto2 instances.
     35  */
     36 final class TextFormatEscaper {
     37   private TextFormatEscaper() {}
     38 
     39   private interface ByteSequence {
     40     int size();
     41     byte byteAt(int offset);
     42   }
     43 
     44   /**
     45    * Escapes bytes in the format used in protocol buffer text format, which
     46    * is the same as the format used for C string literals.  All bytes
     47    * that are not printable 7-bit ASCII characters are escaped, as well as
     48    * backslash, single-quote, and double-quote characters.  Characters for
     49    * which no defined short-hand escape sequence is defined will be escaped
     50    * using 3-digit octal sequences.
     51    */
     52   static String escapeBytes(final ByteSequence input) {
     53     final StringBuilder builder = new StringBuilder(input.size());
     54     for (int i = 0; i < input.size(); i++) {
     55       final byte b = input.byteAt(i);
     56       switch (b) {
     57         // Java does not recognize \a or \v, apparently.
     58         case 0x07: builder.append("\\a"); break;
     59         case '\b': builder.append("\\b"); break;
     60         case '\f': builder.append("\\f"); break;
     61         case '\n': builder.append("\\n"); break;
     62         case '\r': builder.append("\\r"); break;
     63         case '\t': builder.append("\\t"); break;
     64         case 0x0b: builder.append("\\v"); break;
     65         case '\\': builder.append("\\\\"); break;
     66         case '\'': builder.append("\\\'"); break;
     67         case '"' : builder.append("\\\""); break;
     68         default:
     69           // Only ASCII characters between 0x20 (space) and 0x7e (tilde) are
     70           // printable.  Other byte values must be escaped.
     71           if (b >= 0x20 && b <= 0x7e) {
     72             builder.append((char) b);
     73           } else {
     74             builder.append('\\');
     75             builder.append((char) ('0' + ((b >>> 6) & 3)));
     76             builder.append((char) ('0' + ((b >>> 3) & 7)));
     77             builder.append((char) ('0' + (b & 7)));
     78           }
     79           break;
     80       }
     81     }
     82     return builder.toString();
     83   }
     84 
     85   /**
     86    * Escapes bytes in the format used in protocol buffer text format, which
     87    * is the same as the format used for C string literals.  All bytes
     88    * that are not printable 7-bit ASCII characters are escaped, as well as
     89    * backslash, single-quote, and double-quote characters.  Characters for
     90    * which no defined short-hand escape sequence is defined will be escaped
     91    * using 3-digit octal sequences.
     92    */
     93   static String escapeBytes(final ByteString input) {
     94     return escapeBytes(new ByteSequence() {
     95       @Override
     96       public int size() {
     97         return input.size();
     98       }
     99       @Override
    100       public byte byteAt(int offset) {
    101         return input.byteAt(offset);
    102       }
    103     });
    104   }
    105 
    106   /**
    107    * Like {@link #escapeBytes(ByteString)}, but used for byte array.
    108    */
    109   static String escapeBytes(final byte[] input) {
    110     return escapeBytes(new ByteSequence() {
    111       @Override
    112       public int size() {
    113         return input.length;
    114       }
    115       @Override
    116       public byte byteAt(int offset) {
    117         return input[offset];
    118       }
    119     });
    120   }
    121 
    122   /**
    123    * Like {@link #escapeBytes(ByteString)}, but escapes a text string.
    124    * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped
    125    * individually as a 3-digit octal escape.  Yes, it's weird.
    126    */
    127   static String escapeText(final String input) {
    128     return escapeBytes(ByteString.copyFromUtf8(input));
    129   }
    130 
    131   /**
    132    * Escape double quotes and backslashes in a String for unicode output of a message.
    133    */
    134   static String escapeDoubleQuotesAndBackslashes(final String input) {
    135     return input.replace("\\", "\\\\").replace("\"", "\\\"");
    136   }
    137 }
    138