1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_ 18 #define INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_ 19 20 #include <inttypes.h> 21 #include <stddef.h> 22 23 #include <type_traits> 24 25 #include "perfetto/base/logging.h" 26 #include "perfetto/base/utils.h" 27 28 namespace protozero { 29 namespace proto_utils { 30 31 // See https://developers.google.com/protocol-buffers/docs/encoding wire types. 32 // This is a type encoded into the proto that provides just enough info to 33 // find the length of the following value. 34 enum class ProtoWireType : uint32_t { 35 kVarInt = 0, 36 kFixed64 = 1, 37 kLengthDelimited = 2, 38 kFixed32 = 5, 39 }; 40 41 // This is the type defined in the proto for each field. This information 42 // is used to decide the translation strategy when writing the trace. 43 enum class ProtoSchemaType { 44 kUnknown = 0, 45 kDouble, 46 kFloat, 47 kInt64, 48 kUint64, 49 kInt32, 50 kFixed64, 51 kFixed32, 52 kBool, 53 kString, 54 kGroup, // Deprecated (proto2 only) 55 kMessage, 56 kBytes, 57 kUint32, 58 kEnum, 59 kSfixed32, 60 kSfixed64, 61 kSint32, 62 kSint64, 63 }; 64 65 inline const char* ProtoSchemaToString(ProtoSchemaType v) { 66 switch (v) { 67 case ProtoSchemaType::kUnknown: 68 return "unknown"; 69 case ProtoSchemaType::kDouble: 70 return "double"; 71 case ProtoSchemaType::kFloat: 72 return "float"; 73 case ProtoSchemaType::kInt64: 74 return "int64"; 75 case ProtoSchemaType::kUint64: 76 return "uint64"; 77 case ProtoSchemaType::kInt32: 78 return "int32"; 79 case ProtoSchemaType::kFixed64: 80 return "fixed64"; 81 case ProtoSchemaType::kFixed32: 82 return "fixed32"; 83 case ProtoSchemaType::kBool: 84 return "bool"; 85 case ProtoSchemaType::kString: 86 return "string"; 87 case ProtoSchemaType::kGroup: 88 return "group"; 89 case ProtoSchemaType::kMessage: 90 return "message"; 91 case ProtoSchemaType::kBytes: 92 return "bytes"; 93 case ProtoSchemaType::kUint32: 94 return "uint32"; 95 case ProtoSchemaType::kEnum: 96 return "enum"; 97 case ProtoSchemaType::kSfixed32: 98 return "sfixed32"; 99 case ProtoSchemaType::kSfixed64: 100 return "sfixed64"; 101 case ProtoSchemaType::kSint32: 102 return "sint32"; 103 case ProtoSchemaType::kSint64: 104 return "sint64"; 105 } 106 // For gcc: 107 PERFETTO_DCHECK(false); 108 return ""; 109 } 110 111 // Maximum message size supported: 256 MiB (4 x 7-bit due to varint encoding). 112 constexpr size_t kMessageLengthFieldSize = 4; 113 constexpr size_t kMaxMessageLength = (1u << (kMessageLengthFieldSize * 7)) - 1; 114 115 // Field tag is encoded as 32-bit varint (5 bytes at most). 116 // Largest value of simple (not length-delimited) field is 64-bit varint 117 // (10 bytes at most). 15 bytes buffer is enough to store a simple field. 118 constexpr size_t kMaxTagEncodedSize = 5; 119 constexpr size_t kMaxSimpleFieldEncodedSize = kMaxTagEncodedSize + 10; 120 121 // Proto types: (int|uint|sint)(32|64), bool, enum. 122 constexpr uint32_t MakeTagVarInt(uint32_t field_id) { 123 return (field_id << 3) | static_cast<uint32_t>(ProtoWireType::kVarInt); 124 } 125 126 // Proto types: fixed64, sfixed64, fixed32, sfixed32, double, float. 127 template <typename T> 128 constexpr uint32_t MakeTagFixed(uint32_t field_id) { 129 static_assert(sizeof(T) == 8 || sizeof(T) == 4, "Value must be 4 or 8 bytes"); 130 return (field_id << 3) | 131 static_cast<uint32_t>((sizeof(T) == 8 ? ProtoWireType::kFixed64 132 : ProtoWireType::kFixed32)); 133 } 134 135 // Proto types: string, bytes, embedded messages. 136 constexpr uint32_t MakeTagLengthDelimited(uint32_t field_id) { 137 return (field_id << 3) | 138 static_cast<uint32_t>(ProtoWireType::kLengthDelimited); 139 } 140 141 // Proto types: sint64, sint32. 142 template <typename T> 143 inline typename std::make_unsigned<T>::type ZigZagEncode(T value) { 144 return static_cast<typename std::make_unsigned<T>::type>( 145 (value << 1) ^ (value >> (sizeof(T) * 8 - 1))); 146 } 147 148 template <typename T> 149 inline uint8_t* WriteVarInt(T value, uint8_t* target) { 150 // If value is <= 0 we must first sign extend to int64_t (see [1]). 151 // Finally we always cast to an unsigned value to to avoid arithmetic 152 // (sign expanding) shifts in the while loop. 153 // [1]: "If you use int32 or int64 as the type for a negative number, the 154 // resulting varint is always ten bytes long". 155 // - developers.google.com/protocol-buffers/docs/encoding 156 // So for each input type we do the following casts: 157 // uintX_t -> uintX_t -> uintX_t 158 // int8_t -> int64_t -> uint64_t 159 // int16_t -> int64_t -> uint64_t 160 // int32_t -> int64_t -> uint64_t 161 // int64_t -> int64_t -> uint64_t 162 using MaybeExtendedType = 163 typename std::conditional<std::is_unsigned<T>::value, T, int64_t>::type; 164 using UnsignedType = typename std::make_unsigned<MaybeExtendedType>::type; 165 166 MaybeExtendedType extended_value = static_cast<MaybeExtendedType>(value); 167 UnsignedType unsigned_value = static_cast<UnsignedType>(extended_value); 168 169 while (unsigned_value >= 0x80) { 170 *target++ = static_cast<uint8_t>(unsigned_value) | 0x80; 171 unsigned_value >>= 7; 172 } 173 *target = static_cast<uint8_t>(unsigned_value); 174 return target + 1; 175 } 176 177 // Writes a fixed-size redundant encoding of the given |value|. This is 178 // used to backfill fixed-size reservations for the length field using a 179 // non-canonical varint encoding (e.g. \x81\x80\x80\x00 instead of \x01). 180 // See https://github.com/google/protobuf/issues/1530. 181 // In particular, this is used for nested messages. The size of a nested message 182 // is not known until all its field have been written. |kMessageLengthFieldSize| 183 // bytes are reserved to encode the size field and backfilled at the end. 184 inline void WriteRedundantVarInt(uint32_t value, uint8_t* buf) { 185 for (size_t i = 0; i < kMessageLengthFieldSize; ++i) { 186 const uint8_t msb = (i < kMessageLengthFieldSize - 1) ? 0x80 : 0; 187 buf[i] = static_cast<uint8_t>(value) | msb; 188 value >>= 7; 189 } 190 } 191 192 template <uint32_t field_id> 193 void StaticAssertSingleBytePreamble() { 194 static_assert(field_id < 16, 195 "Proto field id too big to fit in a single byte preamble"); 196 } 197 198 // Parses a VarInt from the encoded buffer [start, end). |end| is STL-style and 199 // points one byte past the end of buffer. 200 // The parsed int value is stored in the output arg |value|. Returns a pointer 201 // to the next unconsumed byte (so start < retval <= end) or |start| if the 202 // VarInt could not be fully parsed because there was not enough space in the 203 // buffer. 204 inline const uint8_t* ParseVarInt(const uint8_t* start, 205 const uint8_t* end, 206 uint64_t* value) { 207 const uint8_t* pos = start; 208 uint64_t shift = 0; 209 *value = 0; 210 do { 211 if (PERFETTO_UNLIKELY(pos >= end)) { 212 *value = 0; 213 return start; 214 } 215 PERFETTO_DCHECK(shift < 64ull); 216 *value |= static_cast<uint64_t>(*pos & 0x7f) << shift; 217 shift += 7; 218 } while (*pos++ & 0x80); 219 return pos; 220 } 221 222 } // namespace proto_utils 223 } // namespace protozero 224 225 #endif // INCLUDE_PERFETTO_PROTOZERO_PROTO_UTILS_H_ 226