Home | History | Annotate | Download | only in protozero
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_
     18 #define INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_
     19 
     20 #include <inttypes.h>
     21 #include <stddef.h>
     22 
     23 #include <type_traits>
     24 
     25 #include "perfetto/base/logging.h"
     26 #include "perfetto/base/utils.h"
     27 
     28 namespace protozero {
     29 namespace proto_utils {
     30 
     31 // See https://developers.google.com/protocol-buffers/docs/encoding wire types.
     32 // This is a type encoded into the proto that provides just enough info to
     33 // find the length of the following value.
     34 enum class ProtoWireType : uint32_t {
     35   kVarInt = 0,
     36   kFixed64 = 1,
     37   kLengthDelimited = 2,
     38   kFixed32 = 5,
     39 };
     40 
     41 // This is the type defined in the proto for each field. This information
     42 // is used to decide the translation strategy when writing the trace.
     43 enum class ProtoSchemaType {
     44   kUnknown = 0,
     45   kDouble,
     46   kFloat,
     47   kInt64,
     48   kUint64,
     49   kInt32,
     50   kFixed64,
     51   kFixed32,
     52   kBool,
     53   kString,
     54   kGroup,  // Deprecated (proto2 only)
     55   kMessage,
     56   kBytes,
     57   kUint32,
     58   kEnum,
     59   kSfixed32,
     60   kSfixed64,
     61   kSint32,
     62   kSint64,
     63 };
     64 
     65 inline const char* ProtoSchemaToString(ProtoSchemaType v) {
     66   switch (v) {
     67     case ProtoSchemaType::kUnknown:
     68       return "unknown";
     69     case ProtoSchemaType::kDouble:
     70       return "double";
     71     case ProtoSchemaType::kFloat:
     72       return "float";
     73     case ProtoSchemaType::kInt64:
     74       return "int64";
     75     case ProtoSchemaType::kUint64:
     76       return "uint64";
     77     case ProtoSchemaType::kInt32:
     78       return "int32";
     79     case ProtoSchemaType::kFixed64:
     80       return "fixed64";
     81     case ProtoSchemaType::kFixed32:
     82       return "fixed32";
     83     case ProtoSchemaType::kBool:
     84       return "bool";
     85     case ProtoSchemaType::kString:
     86       return "string";
     87     case ProtoSchemaType::kGroup:
     88       return "group";
     89     case ProtoSchemaType::kMessage:
     90       return "message";
     91     case ProtoSchemaType::kBytes:
     92       return "bytes";
     93     case ProtoSchemaType::kUint32:
     94       return "uint32";
     95     case ProtoSchemaType::kEnum:
     96       return "enum";
     97     case ProtoSchemaType::kSfixed32:
     98       return "sfixed32";
     99     case ProtoSchemaType::kSfixed64:
    100       return "sfixed64";
    101     case ProtoSchemaType::kSint32:
    102       return "sint32";
    103     case ProtoSchemaType::kSint64:
    104       return "sint64";
    105   }
    106   // For gcc:
    107   PERFETTO_DCHECK(false);
    108   return "";
    109 }
    110 
    111 // Maximum message size supported: 256 MiB (4 x 7-bit due to varint encoding).
    112 constexpr size_t kMessageLengthFieldSize = 4;
    113 constexpr size_t kMaxMessageLength = (1u << (kMessageLengthFieldSize * 7)) - 1;
    114 
    115 // Field tag is encoded as 32-bit varint (5 bytes at most).
    116 // Largest value of simple (not length-delimited) field is 64-bit varint
    117 // (10 bytes at most). 15 bytes buffer is enough to store a simple field.
    118 constexpr size_t kMaxTagEncodedSize = 5;
    119 constexpr size_t kMaxSimpleFieldEncodedSize = kMaxTagEncodedSize + 10;
    120 
    121 // Proto types: (int|uint|sint)(32|64), bool, enum.
    122 constexpr uint32_t MakeTagVarInt(uint32_t field_id) {
    123   return (field_id << 3) | static_cast<uint32_t>(ProtoWireType::kVarInt);
    124 }
    125 
    126 // Proto types: fixed64, sfixed64, fixed32, sfixed32, double, float.
    127 template <typename T>
    128 constexpr uint32_t MakeTagFixed(uint32_t field_id) {
    129   static_assert(sizeof(T) == 8 || sizeof(T) == 4, "Value must be 4 or 8 bytes");
    130   return (field_id << 3) |
    131          static_cast<uint32_t>((sizeof(T) == 8 ? ProtoWireType::kFixed64
    132                                                : ProtoWireType::kFixed32));
    133 }
    134 
    135 // Proto types: string, bytes, embedded messages.
    136 constexpr uint32_t MakeTagLengthDelimited(uint32_t field_id) {
    137   return (field_id << 3) |
    138          static_cast<uint32_t>(ProtoWireType::kLengthDelimited);
    139 }
    140 
    141 // Proto types: sint64, sint32.
    142 template <typename T>
    143 inline typename std::make_unsigned<T>::type ZigZagEncode(T value) {
    144   return static_cast<typename std::make_unsigned<T>::type>(
    145       (value << 1) ^ (value >> (sizeof(T) * 8 - 1)));
    146 }
    147 
    148 template <typename T>
    149 inline uint8_t* WriteVarInt(T value, uint8_t* target) {
    150   // If value is <= 0 we must first sign extend to int64_t (see [1]).
    151   // Finally we always cast to an unsigned value to to avoid arithmetic
    152   // (sign expanding) shifts in the while loop.
    153   // [1]: "If you use int32 or int64 as the type for a negative number, the
    154   // resulting varint is always ten bytes long".
    155   // - developers.google.com/protocol-buffers/docs/encoding
    156   // So for each input type we do the following casts:
    157   // uintX_t -> uintX_t -> uintX_t
    158   // int8_t  -> int64_t -> uint64_t
    159   // int16_t -> int64_t -> uint64_t
    160   // int32_t -> int64_t -> uint64_t
    161   // int64_t -> int64_t -> uint64_t
    162   using MaybeExtendedType =
    163       typename std::conditional<std::is_unsigned<T>::value, T, int64_t>::type;
    164   using UnsignedType = typename std::make_unsigned<MaybeExtendedType>::type;
    165 
    166   MaybeExtendedType extended_value = static_cast<MaybeExtendedType>(value);
    167   UnsignedType unsigned_value = static_cast<UnsignedType>(extended_value);
    168 
    169   while (unsigned_value >= 0x80) {
    170     *target++ = static_cast<uint8_t>(unsigned_value) | 0x80;
    171     unsigned_value >>= 7;
    172   }
    173   *target = static_cast<uint8_t>(unsigned_value);
    174   return target + 1;
    175 }
    176 
    177 // Writes a fixed-size redundant encoding of the given |value|. This is
    178 // used to backfill fixed-size reservations for the length field using a
    179 // non-canonical varint encoding (e.g. \x81\x80\x80\x00 instead of \x01).
    180 // See https://github.com/google/protobuf/issues/1530.
    181 // In particular, this is used for nested messages. The size of a nested message
    182 // is not known until all its field have been written. |kMessageLengthFieldSize|
    183 // bytes are reserved to encode the size field and backfilled at the end.
    184 inline void WriteRedundantVarInt(uint32_t value, uint8_t* buf) {
    185   for (size_t i = 0; i < kMessageLengthFieldSize; ++i) {
    186     const uint8_t msb = (i < kMessageLengthFieldSize - 1) ? 0x80 : 0;
    187     buf[i] = static_cast<uint8_t>(value) | msb;
    188     value >>= 7;
    189   }
    190 }
    191 
    192 template <uint32_t field_id>
    193 void StaticAssertSingleBytePreamble() {
    194   static_assert(field_id < 16,
    195                 "Proto field id too big to fit in a single byte preamble");
    196 }
    197 
    198 // Parses a VarInt from the encoded buffer [start, end). |end| is STL-style and
    199 // points one byte past the end of buffer.
    200 // The parsed int value is stored in the output arg |value|. Returns a pointer
    201 // to the next unconsumed byte (so start < retval <= end) or |start| if the
    202 // VarInt could not be fully parsed because there was not enough space in the
    203 // buffer.
    204 inline const uint8_t* ParseVarInt(const uint8_t* start,
    205                                   const uint8_t* end,
    206                                   uint64_t* value) {
    207   const uint8_t* pos = start;
    208   uint64_t shift = 0;
    209   *value = 0;
    210   do {
    211     if (PERFETTO_UNLIKELY(pos >= end)) {
    212       *value = 0;
    213       return start;
    214     }
    215     PERFETTO_DCHECK(shift < 64ull);
    216     *value |= static_cast<uint64_t>(*pos & 0x7f) << shift;
    217     shift += 7;
    218   } while (*pos++ & 0x80);
    219   return pos;
    220 }
    221 
    222 }  // namespace proto_utils
    223 }  // namespace protozero
    224 
    225 #endif  // INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_
    226