Home | History | Annotate | Download | only in parsing
      1 // Copyright 2011 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "src/parsing/duplicate-finder.h"
      6 
      7 namespace v8 {
      8 namespace internal {
      9 
     10 bool DuplicateFinder::AddOneByteSymbol(Vector<const uint8_t> key) {
     11   return AddSymbol(key, true);
     12 }
     13 
     14 bool DuplicateFinder::AddTwoByteSymbol(Vector<const uint16_t> key) {
     15   return AddSymbol(Vector<const uint8_t>::cast(key), false);
     16 }
     17 
     18 bool DuplicateFinder::AddSymbol(Vector<const uint8_t> key, bool is_one_byte) {
     19   uint32_t hash = Hash(key, is_one_byte);
     20   byte* encoding = BackupKey(key, is_one_byte);
     21   base::HashMap::Entry* entry = map_.LookupOrInsert(encoding, hash);
     22   int old_value = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
     23   entry->value = reinterpret_cast<void*>(1);
     24   return old_value;
     25 }
     26 
     27 uint32_t DuplicateFinder::Hash(Vector<const uint8_t> key, bool is_one_byte) {
     28   // Primitive hash function, almost identical to the one used
     29   // for strings (except that it's seeded by the length and representation).
     30   int length = key.length();
     31   uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0);
     32   for (int i = 0; i < length; i++) {
     33     uint32_t c = key[i];
     34     hash = (hash + c) * 1025;
     35     hash ^= (hash >> 6);
     36   }
     37   return hash;
     38 }
     39 
     40 bool DuplicateFinder::Match(void* first, void* second) {
     41   // Decode lengths.
     42   // Length + representation is encoded as base 128, most significant heptet
     43   // first, with a 8th bit being non-zero while there are more heptets.
     44   // The value encodes the number of bytes following, and whether the original
     45   // was Latin1.
     46   byte* s1 = reinterpret_cast<byte*>(first);
     47   byte* s2 = reinterpret_cast<byte*>(second);
     48   uint32_t length_one_byte_field = 0;
     49   byte c1;
     50   do {
     51     c1 = *s1;
     52     if (c1 != *s2) return false;
     53     length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f);
     54     s1++;
     55     s2++;
     56   } while ((c1 & 0x80) != 0);
     57   int length = static_cast<int>(length_one_byte_field >> 1);
     58   return memcmp(s1, s2, length) == 0;
     59 }
     60 
     61 byte* DuplicateFinder::BackupKey(Vector<const uint8_t> bytes,
     62                                  bool is_one_byte) {
     63   uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0);
     64   backing_store_.StartSequence();
     65   // Emit one_byte_length as base-128 encoded number, with the 7th bit set
     66   // on the byte of every heptet except the last, least significant, one.
     67   if (one_byte_length >= (1 << 7)) {
     68     if (one_byte_length >= (1 << 14)) {
     69       if (one_byte_length >= (1 << 21)) {
     70         if (one_byte_length >= (1 << 28)) {
     71           backing_store_.Add(
     72               static_cast<uint8_t>((one_byte_length >> 28) | 0x80));
     73         }
     74         backing_store_.Add(
     75             static_cast<uint8_t>((one_byte_length >> 21) | 0x80u));
     76       }
     77       backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 14) | 0x80u));
     78     }
     79     backing_store_.Add(static_cast<uint8_t>((one_byte_length >> 7) | 0x80u));
     80   }
     81   backing_store_.Add(static_cast<uint8_t>(one_byte_length & 0x7f));
     82 
     83   backing_store_.AddBlock(bytes);
     84   return backing_store_.EndSequence().start();
     85 }
     86 
     87 }  // namespace internal
     88 }  // namespace v8
     89