Home | History | Annotate | Download | only in internal
      1 # Protocol Buffers - Google's data interchange format
      2 # Copyright 2008 Google Inc.  All rights reserved.
      3 # http://code.google.com/p/protobuf/
      4 #
      5 # Redistribution and use in source and binary forms, with or without
      6 # modification, are permitted provided that the following conditions are
      7 # met:
      8 #
      9 #     * Redistributions of source code must retain the above copyright
     10 # notice, this list of conditions and the following disclaimer.
     11 #     * Redistributions in binary form must reproduce the above
     12 # copyright notice, this list of conditions and the following disclaimer
     13 # in the documentation and/or other materials provided with the
     14 # distribution.
     15 #     * Neither the name of Google Inc. nor the names of its
     16 # contributors may be used to endorse or promote products derived from
     17 # this software without specific prior written permission.
     18 #
     19 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30 
     31 """Code for decoding protocol buffer primitives.
     32 
     33 This code is very similar to encoder.py -- read the docs for that module first.
     34 
     35 A "decoder" is a function with the signature:
     36   Decode(buffer, pos, end, message, field_dict)
     37 The arguments are:
     38   buffer:     The string containing the encoded message.
     39   pos:        The current position in the string.
     40   end:        The position in the string where the current message ends.  May be
     41               less than len(buffer) if we're reading a sub-message.
     42   message:    The message object into which we're parsing.
     43   field_dict: message._fields (avoids a hashtable lookup).
     44 The decoder reads the field and stores it into field_dict, returning the new
     45 buffer position.  A decoder for a repeated field may proactively decode all of
     46 the elements of that field, if they appear consecutively.
     47 
     48 Note that decoders may throw any of the following:
     49   IndexError:  Indicates a truncated message.
     50   struct.error:  Unpacking of a fixed-width field failed.
     51   message.DecodeError:  Other errors.
     52 
     53 Decoders are expected to raise an exception if they are called with pos > end.
     54 This allows callers to be lax about bounds checking:  it's fineto read past
     55 "end" as long as you are sure that someone else will notice and throw an
     56 exception later on.
     57 
     58 Something up the call stack is expected to catch IndexError and struct.error
     59 and convert them to message.DecodeError.
     60 
     61 Decoders are constructed using decoder constructors with the signature:
     62   MakeDecoder(field_number, is_repeated, is_packed, key, new_default)
     63 The arguments are:
     64   field_number:  The field number of the field we want to decode.
     65   is_repeated:   Is the field a repeated field? (bool)
     66   is_packed:     Is the field a packed field? (bool)
     67   key:           The key to use when looking up the field within field_dict.
     68                  (This is actually the FieldDescriptor but nothing in this
     69                  file should depend on that.)
     70   new_default:   A function which takes a message object as a parameter and
     71                  returns a new instance of the default value for this field.
     72                  (This is called for repeated fields and sub-messages, when an
     73                  instance does not already exist.)
     74 
     75 As with encoders, we define a decoder constructor for every type of field.
     76 Then, for every field of every message class we construct an actual decoder.
     77 That decoder goes into a dict indexed by tag, so when we decode a message
     78 we repeatedly read a tag, look up the corresponding decoder, and invoke it.
     79 """
     80 
     81 __author__ = 'kenton (at] google.com (Kenton Varda)'
     82 
     83 import struct
     84 from google.protobuf.internal import encoder
     85 from google.protobuf.internal import wire_format
     86 from google.protobuf import message
     87 
     88 
     89 # This will overflow and thus become IEEE-754 "infinity".  We would use
     90 # "float('inf')" but it doesn't work on Windows pre-Python-2.6.
     91 _POS_INF = 1e10000
     92 _NEG_INF = -_POS_INF
     93 _NAN = _POS_INF * 0
     94 
     95 
     96 # This is not for optimization, but rather to avoid conflicts with local
     97 # variables named "message".
     98 _DecodeError = message.DecodeError
     99 
    100 
    101 def _VarintDecoder(mask):
    102   """Return an encoder for a basic varint value (does not include tag).
    103 
    104   Decoded values will be bitwise-anded with the given mask before being
    105   returned, e.g. to limit them to 32 bits.  The returned decoder does not
    106   take the usual "end" parameter -- the caller is expected to do bounds checking
    107   after the fact (often the caller can defer such checking until later).  The
    108   decoder returns a (value, new_pos) pair.
    109   """
    110 
    111   local_ord = ord
    112   def DecodeVarint(buffer, pos):
    113     result = 0
    114     shift = 0
    115     while 1:
    116       b = local_ord(buffer[pos])
    117       result |= ((b & 0x7f) << shift)
    118       pos += 1
    119       if not (b & 0x80):
    120         result &= mask
    121         return (result, pos)
    122       shift += 7
    123       if shift >= 64:
    124         raise _DecodeError('Too many bytes when decoding varint.')
    125   return DecodeVarint
    126 
    127 
    128 def _SignedVarintDecoder(mask):
    129   """Like _VarintDecoder() but decodes signed values."""
    130 
    131   local_ord = ord
    132   def DecodeVarint(buffer, pos):
    133     result = 0
    134     shift = 0
    135     while 1:
    136       b = local_ord(buffer[pos])
    137       result |= ((b & 0x7f) << shift)
    138       pos += 1
    139       if not (b & 0x80):
    140         if result > 0x7fffffffffffffff:
    141           result -= (1 << 64)
    142           result |= ~mask
    143         else:
    144           result &= mask
    145         return (result, pos)
    146       shift += 7
    147       if shift >= 64:
    148         raise _DecodeError('Too many bytes when decoding varint.')
    149   return DecodeVarint
    150 
    151 
    152 _DecodeVarint = _VarintDecoder((1 << 64) - 1)
    153 _DecodeSignedVarint = _SignedVarintDecoder((1 << 64) - 1)
    154 
    155 # Use these versions for values which must be limited to 32 bits.
    156 _DecodeVarint32 = _VarintDecoder((1 << 32) - 1)
    157 _DecodeSignedVarint32 = _SignedVarintDecoder((1 << 32) - 1)
    158 
    159 
    160 def ReadTag(buffer, pos):
    161   """Read a tag from the buffer, and return a (tag_bytes, new_pos) tuple.
    162 
    163   We return the raw bytes of the tag rather than decoding them.  The raw
    164   bytes can then be used to look up the proper decoder.  This effectively allows
    165   us to trade some work that would be done in pure-python (decoding a varint)
    166   for work that is done in C (searching for a byte string in a hash table).
    167   In a low-level language it would be much cheaper to decode the varint and
    168   use that, but not in Python.
    169   """
    170 
    171   start = pos
    172   while ord(buffer[pos]) & 0x80:
    173     pos += 1
    174   pos += 1
    175   return (buffer[start:pos], pos)
    176 
    177 
    178 # --------------------------------------------------------------------
    179 
    180 
    181 def _SimpleDecoder(wire_type, decode_value):
    182   """Return a constructor for a decoder for fields of a particular type.
    183 
    184   Args:
    185       wire_type:  The field's wire type.
    186       decode_value:  A function which decodes an individual value, e.g.
    187         _DecodeVarint()
    188   """
    189 
    190   def SpecificDecoder(field_number, is_repeated, is_packed, key, new_default):
    191     if is_packed:
    192       local_DecodeVarint = _DecodeVarint
    193       def DecodePackedField(buffer, pos, end, message, field_dict):
    194         value = field_dict.get(key)
    195         if value is None:
    196           value = field_dict.setdefault(key, new_default(message))
    197         (endpoint, pos) = local_DecodeVarint(buffer, pos)
    198         endpoint += pos
    199         if endpoint > end:
    200           raise _DecodeError('Truncated message.')
    201         while pos < endpoint:
    202           (element, pos) = decode_value(buffer, pos)
    203           value.append(element)
    204         if pos > endpoint:
    205           del value[-1]   # Discard corrupt value.
    206           raise _DecodeError('Packed element was truncated.')
    207         return pos
    208       return DecodePackedField
    209     elif is_repeated:
    210       tag_bytes = encoder.TagBytes(field_number, wire_type)
    211       tag_len = len(tag_bytes)
    212       def DecodeRepeatedField(buffer, pos, end, message, field_dict):
    213         value = field_dict.get(key)
    214         if value is None:
    215           value = field_dict.setdefault(key, new_default(message))
    216         while 1:
    217           (element, new_pos) = decode_value(buffer, pos)
    218           value.append(element)
    219           # Predict that the next tag is another copy of the same repeated
    220           # field.
    221           pos = new_pos + tag_len
    222           if buffer[new_pos:pos] != tag_bytes or new_pos >= end:
    223             # Prediction failed.  Return.
    224             if new_pos > end:
    225               raise _DecodeError('Truncated message.')
    226             return new_pos
    227       return DecodeRepeatedField
    228     else:
    229       def DecodeField(buffer, pos, end, message, field_dict):
    230         (field_dict[key], pos) = decode_value(buffer, pos)
    231         if pos > end:
    232           del field_dict[key]  # Discard corrupt value.
    233           raise _DecodeError('Truncated message.')
    234         return pos
    235       return DecodeField
    236 
    237   return SpecificDecoder
    238 
    239 
    240 def _ModifiedDecoder(wire_type, decode_value, modify_value):
    241   """Like SimpleDecoder but additionally invokes modify_value on every value
    242   before storing it.  Usually modify_value is ZigZagDecode.
    243   """
    244 
    245   # Reusing _SimpleDecoder is slightly slower than copying a bunch of code, but
    246   # not enough to make a significant difference.
    247 
    248   def InnerDecode(buffer, pos):
    249     (result, new_pos) = decode_value(buffer, pos)
    250     return (modify_value(result), new_pos)
    251   return _SimpleDecoder(wire_type, InnerDecode)
    252 
    253 
    254 def _StructPackDecoder(wire_type, format):
    255   """Return a constructor for a decoder for a fixed-width field.
    256 
    257   Args:
    258       wire_type:  The field's wire type.
    259       format:  The format string to pass to struct.unpack().
    260   """
    261 
    262   value_size = struct.calcsize(format)
    263   local_unpack = struct.unpack
    264 
    265   # Reusing _SimpleDecoder is slightly slower than copying a bunch of code, but
    266   # not enough to make a significant difference.
    267 
    268   # Note that we expect someone up-stack to catch struct.error and convert
    269   # it to _DecodeError -- this way we don't have to set up exception-
    270   # handling blocks every time we parse one value.
    271 
    272   def InnerDecode(buffer, pos):
    273     new_pos = pos + value_size
    274     result = local_unpack(format, buffer[pos:new_pos])[0]
    275     return (result, new_pos)
    276   return _SimpleDecoder(wire_type, InnerDecode)
    277 
    278 
    279 def _FloatDecoder():
    280   """Returns a decoder for a float field.
    281 
    282   This code works around a bug in struct.unpack for non-finite 32-bit
    283   floating-point values.
    284   """
    285 
    286   local_unpack = struct.unpack
    287 
    288   def InnerDecode(buffer, pos):
    289     # We expect a 32-bit value in little-endian byte order.  Bit 1 is the sign
    290     # bit, bits 2-9 represent the exponent, and bits 10-32 are the significand.
    291     new_pos = pos + 4
    292     float_bytes = buffer[pos:new_pos]
    293 
    294     # If this value has all its exponent bits set, then it's non-finite.
    295     # In Python 2.4, struct.unpack will convert it to a finite 64-bit value.
    296     # To avoid that, we parse it specially.
    297     if ((float_bytes[3] in '\x7F\xFF')
    298         and (float_bytes[2] >= '\x80')):
    299       # If at least one significand bit is set...
    300       if float_bytes[0:3] != '\x00\x00\x80':
    301         return (_NAN, new_pos)
    302       # If sign bit is set...
    303       if float_bytes[3] == '\xFF':
    304         return (_NEG_INF, new_pos)
    305       return (_POS_INF, new_pos)
    306 
    307     # Note that we expect someone up-stack to catch struct.error and convert
    308     # it to _DecodeError -- this way we don't have to set up exception-
    309     # handling blocks every time we parse one value.
    310     result = local_unpack('<f', float_bytes)[0]
    311     return (result, new_pos)
    312   return _SimpleDecoder(wire_format.WIRETYPE_FIXED32, InnerDecode)
    313 
    314 
    315 def _DoubleDecoder():
    316   """Returns a decoder for a double field.
    317 
    318   This code works around a bug in struct.unpack for not-a-number.
    319   """
    320 
    321   local_unpack = struct.unpack
    322 
    323   def InnerDecode(buffer, pos):
    324     # We expect a 64-bit value in little-endian byte order.  Bit 1 is the sign
    325     # bit, bits 2-12 represent the exponent, and bits 13-64 are the significand.
    326     new_pos = pos + 8
    327     double_bytes = buffer[pos:new_pos]
    328 
    329     # If this value has all its exponent bits set and at least one significand
    330     # bit set, it's not a number.  In Python 2.4, struct.unpack will treat it
    331     # as inf or -inf.  To avoid that, we treat it specially.
    332     if ((double_bytes[7] in '\x7F\xFF')
    333         and (double_bytes[6] >= '\xF0')
    334         and (double_bytes[0:7] != '\x00\x00\x00\x00\x00\x00\xF0')):
    335       return (_NAN, new_pos)
    336 
    337     # Note that we expect someone up-stack to catch struct.error and convert
    338     # it to _DecodeError -- this way we don't have to set up exception-
    339     # handling blocks every time we parse one value.
    340     result = local_unpack('<d', double_bytes)[0]
    341     return (result, new_pos)
    342   return _SimpleDecoder(wire_format.WIRETYPE_FIXED64, InnerDecode)
    343 
    344 
    345 # --------------------------------------------------------------------
    346 
    347 
    348 Int32Decoder = EnumDecoder = _SimpleDecoder(
    349     wire_format.WIRETYPE_VARINT, _DecodeSignedVarint32)
    350 
    351 Int64Decoder = _SimpleDecoder(
    352     wire_format.WIRETYPE_VARINT, _DecodeSignedVarint)
    353 
    354 UInt32Decoder = _SimpleDecoder(wire_format.WIRETYPE_VARINT, _DecodeVarint32)
    355 UInt64Decoder = _SimpleDecoder(wire_format.WIRETYPE_VARINT, _DecodeVarint)
    356 
    357 SInt32Decoder = _ModifiedDecoder(
    358     wire_format.WIRETYPE_VARINT, _DecodeVarint32, wire_format.ZigZagDecode)
    359 SInt64Decoder = _ModifiedDecoder(
    360     wire_format.WIRETYPE_VARINT, _DecodeVarint, wire_format.ZigZagDecode)
    361 
    362 # Note that Python conveniently guarantees that when using the '<' prefix on
    363 # formats, they will also have the same size across all platforms (as opposed
    364 # to without the prefix, where their sizes depend on the C compiler's basic
    365 # type sizes).
    366 Fixed32Decoder  = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<I')
    367 Fixed64Decoder  = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<Q')
    368 SFixed32Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED32, '<i')
    369 SFixed64Decoder = _StructPackDecoder(wire_format.WIRETYPE_FIXED64, '<q')
    370 FloatDecoder = _FloatDecoder()
    371 DoubleDecoder = _DoubleDecoder()
    372 
    373 BoolDecoder = _ModifiedDecoder(
    374     wire_format.WIRETYPE_VARINT, _DecodeVarint, bool)
    375 
    376 
    377 def StringDecoder(field_number, is_repeated, is_packed, key, new_default):
    378   """Returns a decoder for a string field."""
    379 
    380   local_DecodeVarint = _DecodeVarint
    381   local_unicode = unicode
    382 
    383   assert not is_packed
    384   if is_repeated:
    385     tag_bytes = encoder.TagBytes(field_number,
    386                                  wire_format.WIRETYPE_LENGTH_DELIMITED)
    387     tag_len = len(tag_bytes)
    388     def DecodeRepeatedField(buffer, pos, end, message, field_dict):
    389       value = field_dict.get(key)
    390       if value is None:
    391         value = field_dict.setdefault(key, new_default(message))
    392       while 1:
    393         (size, pos) = local_DecodeVarint(buffer, pos)
    394         new_pos = pos + size
    395         if new_pos > end:
    396           raise _DecodeError('Truncated string.')
    397         value.append(local_unicode(buffer[pos:new_pos], 'utf-8'))
    398         # Predict that the next tag is another copy of the same repeated field.
    399         pos = new_pos + tag_len
    400         if buffer[new_pos:pos] != tag_bytes or new_pos == end:
    401           # Prediction failed.  Return.
    402           return new_pos
    403     return DecodeRepeatedField
    404   else:
    405     def DecodeField(buffer, pos, end, message, field_dict):
    406       (size, pos) = local_DecodeVarint(buffer, pos)
    407       new_pos = pos + size
    408       if new_pos > end:
    409         raise _DecodeError('Truncated string.')
    410       field_dict[key] = local_unicode(buffer[pos:new_pos], 'utf-8')
    411       return new_pos
    412     return DecodeField
    413 
    414 
    415 def BytesDecoder(field_number, is_repeated, is_packed, key, new_default):
    416   """Returns a decoder for a bytes field."""
    417 
    418   local_DecodeVarint = _DecodeVarint
    419 
    420   assert not is_packed
    421   if is_repeated:
    422     tag_bytes = encoder.TagBytes(field_number,
    423                                  wire_format.WIRETYPE_LENGTH_DELIMITED)
    424     tag_len = len(tag_bytes)
    425     def DecodeRepeatedField(buffer, pos, end, message, field_dict):
    426       value = field_dict.get(key)
    427       if value is None:
    428         value = field_dict.setdefault(key, new_default(message))
    429       while 1:
    430         (size, pos) = local_DecodeVarint(buffer, pos)
    431         new_pos = pos + size
    432         if new_pos > end:
    433           raise _DecodeError('Truncated string.')
    434         value.append(buffer[pos:new_pos])
    435         # Predict that the next tag is another copy of the same repeated field.
    436         pos = new_pos + tag_len
    437         if buffer[new_pos:pos] != tag_bytes or new_pos == end:
    438           # Prediction failed.  Return.
    439           return new_pos
    440     return DecodeRepeatedField
    441   else:
    442     def DecodeField(buffer, pos, end, message, field_dict):
    443       (size, pos) = local_DecodeVarint(buffer, pos)
    444       new_pos = pos + size
    445       if new_pos > end:
    446         raise _DecodeError('Truncated string.')
    447       field_dict[key] = buffer[pos:new_pos]
    448       return new_pos
    449     return DecodeField
    450 
    451 
    452 def GroupDecoder(field_number, is_repeated, is_packed, key, new_default):
    453   """Returns a decoder for a group field."""
    454 
    455   end_tag_bytes = encoder.TagBytes(field_number,
    456                                    wire_format.WIRETYPE_END_GROUP)
    457   end_tag_len = len(end_tag_bytes)
    458 
    459   assert not is_packed
    460   if is_repeated:
    461     tag_bytes = encoder.TagBytes(field_number,
    462                                  wire_format.WIRETYPE_START_GROUP)
    463     tag_len = len(tag_bytes)
    464     def DecodeRepeatedField(buffer, pos, end, message, field_dict):
    465       value = field_dict.get(key)
    466       if value is None:
    467         value = field_dict.setdefault(key, new_default(message))
    468       while 1:
    469         value = field_dict.get(key)
    470         if value is None:
    471           value = field_dict.setdefault(key, new_default(message))
    472         # Read sub-message.
    473         pos = value.add()._InternalParse(buffer, pos, end)
    474         # Read end tag.
    475         new_pos = pos+end_tag_len
    476         if buffer[pos:new_pos] != end_tag_bytes or new_pos > end:
    477           raise _DecodeError('Missing group end tag.')
    478         # Predict that the next tag is another copy of the same repeated field.
    479         pos = new_pos + tag_len
    480         if buffer[new_pos:pos] != tag_bytes or new_pos == end:
    481           # Prediction failed.  Return.
    482           return new_pos
    483     return DecodeRepeatedField
    484   else:
    485     def DecodeField(buffer, pos, end, message, field_dict):
    486       value = field_dict.get(key)
    487       if value is None:
    488         value = field_dict.setdefault(key, new_default(message))
    489       # Read sub-message.
    490       pos = value._InternalParse(buffer, pos, end)
    491       # Read end tag.
    492       new_pos = pos+end_tag_len
    493       if buffer[pos:new_pos] != end_tag_bytes or new_pos > end:
    494         raise _DecodeError('Missing group end tag.')
    495       return new_pos
    496     return DecodeField
    497 
    498 
    499 def MessageDecoder(field_number, is_repeated, is_packed, key, new_default):
    500   """Returns a decoder for a message field."""
    501 
    502   local_DecodeVarint = _DecodeVarint
    503 
    504   assert not is_packed
    505   if is_repeated:
    506     tag_bytes = encoder.TagBytes(field_number,
    507                                  wire_format.WIRETYPE_LENGTH_DELIMITED)
    508     tag_len = len(tag_bytes)
    509     def DecodeRepeatedField(buffer, pos, end, message, field_dict):
    510       value = field_dict.get(key)
    511       if value is None:
    512         value = field_dict.setdefault(key, new_default(message))
    513       while 1:
    514         value = field_dict.get(key)
    515         if value is None:
    516           value = field_dict.setdefault(key, new_default(message))
    517         # Read length.
    518         (size, pos) = local_DecodeVarint(buffer, pos)
    519         new_pos = pos + size
    520         if new_pos > end:
    521           raise _DecodeError('Truncated message.')
    522         # Read sub-message.
    523         if value.add()._InternalParse(buffer, pos, new_pos) != new_pos:
    524           # The only reason _InternalParse would return early is if it
    525           # encountered an end-group tag.
    526           raise _DecodeError('Unexpected end-group tag.')
    527         # Predict that the next tag is another copy of the same repeated field.
    528         pos = new_pos + tag_len
    529         if buffer[new_pos:pos] != tag_bytes or new_pos == end:
    530           # Prediction failed.  Return.
    531           return new_pos
    532     return DecodeRepeatedField
    533   else:
    534     def DecodeField(buffer, pos, end, message, field_dict):
    535       value = field_dict.get(key)
    536       if value is None:
    537         value = field_dict.setdefault(key, new_default(message))
    538       # Read length.
    539       (size, pos) = local_DecodeVarint(buffer, pos)
    540       new_pos = pos + size
    541       if new_pos > end:
    542         raise _DecodeError('Truncated message.')
    543       # Read sub-message.
    544       if value._InternalParse(buffer, pos, new_pos) != new_pos:
    545         # The only reason _InternalParse would return early is if it encountered
    546         # an end-group tag.
    547         raise _DecodeError('Unexpected end-group tag.')
    548       return new_pos
    549     return DecodeField
    550 
    551 
    552 # --------------------------------------------------------------------
    553 
    554 MESSAGE_SET_ITEM_TAG = encoder.TagBytes(1, wire_format.WIRETYPE_START_GROUP)
    555 
    556 def MessageSetItemDecoder(extensions_by_number):
    557   """Returns a decoder for a MessageSet item.
    558 
    559   The parameter is the _extensions_by_number map for the message class.
    560 
    561   The message set message looks like this:
    562     message MessageSet {
    563       repeated group Item = 1 {
    564         required int32 type_id = 2;
    565         required string message = 3;
    566       }
    567     }
    568   """
    569 
    570   type_id_tag_bytes = encoder.TagBytes(2, wire_format.WIRETYPE_VARINT)
    571   message_tag_bytes = encoder.TagBytes(3, wire_format.WIRETYPE_LENGTH_DELIMITED)
    572   item_end_tag_bytes = encoder.TagBytes(1, wire_format.WIRETYPE_END_GROUP)
    573 
    574   local_ReadTag = ReadTag
    575   local_DecodeVarint = _DecodeVarint
    576   local_SkipField = SkipField
    577 
    578   def DecodeItem(buffer, pos, end, message, field_dict):
    579     message_set_item_start = pos
    580     type_id = -1
    581     message_start = -1
    582     message_end = -1
    583 
    584     # Technically, type_id and message can appear in any order, so we need
    585     # a little loop here.
    586     while 1:
    587       (tag_bytes, pos) = local_ReadTag(buffer, pos)
    588       if tag_bytes == type_id_tag_bytes:
    589         (type_id, pos) = local_DecodeVarint(buffer, pos)
    590       elif tag_bytes == message_tag_bytes:
    591         (size, message_start) = local_DecodeVarint(buffer, pos)
    592         pos = message_end = message_start + size
    593       elif tag_bytes == item_end_tag_bytes:
    594         break
    595       else:
    596         pos = SkipField(buffer, pos, end, tag_bytes)
    597         if pos == -1:
    598           raise _DecodeError('Missing group end tag.')
    599 
    600     if pos > end:
    601       raise _DecodeError('Truncated message.')
    602 
    603     if type_id == -1:
    604       raise _DecodeError('MessageSet item missing type_id.')
    605     if message_start == -1:
    606       raise _DecodeError('MessageSet item missing message.')
    607 
    608     extension = extensions_by_number.get(type_id)
    609     if extension is not None:
    610       value = field_dict.get(extension)
    611       if value is None:
    612         value = field_dict.setdefault(
    613             extension, extension.message_type._concrete_class())
    614       if value._InternalParse(buffer, message_start,message_end) != message_end:
    615         # The only reason _InternalParse would return early is if it encountered
    616         # an end-group tag.
    617         raise _DecodeError('Unexpected end-group tag.')
    618     else:
    619       if not message._unknown_fields:
    620         message._unknown_fields = []
    621       message._unknown_fields.append((MESSAGE_SET_ITEM_TAG,
    622                                       buffer[message_set_item_start:pos]))
    623 
    624     return pos
    625 
    626   return DecodeItem
    627 
    628 # --------------------------------------------------------------------
    629 # Optimization is not as heavy here because calls to SkipField() are rare,
    630 # except for handling end-group tags.
    631 
    632 def _SkipVarint(buffer, pos, end):
    633   """Skip a varint value.  Returns the new position."""
    634 
    635   while ord(buffer[pos]) & 0x80:
    636     pos += 1
    637   pos += 1
    638   if pos > end:
    639     raise _DecodeError('Truncated message.')
    640   return pos
    641 
    642 def _SkipFixed64(buffer, pos, end):
    643   """Skip a fixed64 value.  Returns the new position."""
    644 
    645   pos += 8
    646   if pos > end:
    647     raise _DecodeError('Truncated message.')
    648   return pos
    649 
    650 def _SkipLengthDelimited(buffer, pos, end):
    651   """Skip a length-delimited value.  Returns the new position."""
    652 
    653   (size, pos) = _DecodeVarint(buffer, pos)
    654   pos += size
    655   if pos > end:
    656     raise _DecodeError('Truncated message.')
    657   return pos
    658 
    659 def _SkipGroup(buffer, pos, end):
    660   """Skip sub-group.  Returns the new position."""
    661 
    662   while 1:
    663     (tag_bytes, pos) = ReadTag(buffer, pos)
    664     new_pos = SkipField(buffer, pos, end, tag_bytes)
    665     if new_pos == -1:
    666       return pos
    667     pos = new_pos
    668 
    669 def _EndGroup(buffer, pos, end):
    670   """Skipping an END_GROUP tag returns -1 to tell the parent loop to break."""
    671 
    672   return -1
    673 
    674 def _SkipFixed32(buffer, pos, end):
    675   """Skip a fixed32 value.  Returns the new position."""
    676 
    677   pos += 4
    678   if pos > end:
    679     raise _DecodeError('Truncated message.')
    680   return pos
    681 
    682 def _RaiseInvalidWireType(buffer, pos, end):
    683   """Skip function for unknown wire types.  Raises an exception."""
    684 
    685   raise _DecodeError('Tag had invalid wire type.')
    686 
    687 def _FieldSkipper():
    688   """Constructs the SkipField function."""
    689 
    690   WIRETYPE_TO_SKIPPER = [
    691       _SkipVarint,
    692       _SkipFixed64,
    693       _SkipLengthDelimited,
    694       _SkipGroup,
    695       _EndGroup,
    696       _SkipFixed32,
    697       _RaiseInvalidWireType,
    698       _RaiseInvalidWireType,
    699       ]
    700 
    701   wiretype_mask = wire_format.TAG_TYPE_MASK
    702   local_ord = ord
    703 
    704   def SkipField(buffer, pos, end, tag_bytes):
    705     """Skips a field with the specified tag.
    706 
    707     |pos| should point to the byte immediately after the tag.
    708 
    709     Returns:
    710         The new position (after the tag value), or -1 if the tag is an end-group
    711         tag (in which case the calling loop should break).
    712     """
    713 
    714     # The wire type is always in the first byte since varints are little-endian.
    715     wire_type = local_ord(tag_bytes[0]) & wiretype_mask
    716     return WIRETYPE_TO_SKIPPER[wire_type](buffer, pos, end)
    717 
    718   return SkipField
    719 
    720 SkipField = _FieldSkipper()
    721