Home | History | Annotate | Download | only in src
      1 // Copyright 2014 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 
      6 #include "src/unicode-inl.h"
      7 #include "src/unicode-decoder.h"
      8 #include <stdio.h>
      9 #include <stdlib.h>
     10 
     11 namespace unibrow {
     12 
     13 void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length,
     14                             const uint8_t* stream, size_t stream_length) {
     15   // Assume everything will fit in the buffer and stream won't be needed.
     16   last_byte_of_buffer_unused_ = false;
     17   unbuffered_start_ = NULL;
     18   unbuffered_length_ = 0;
     19   bool writing_to_buffer = true;
     20   // Loop until stream is read, writing to buffer as long as buffer has space.
     21   size_t utf16_length = 0;
     22   while (stream_length != 0) {
     23     size_t cursor = 0;
     24     uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
     25     DCHECK(cursor > 0 && cursor <= stream_length);
     26     stream += cursor;
     27     stream_length -= cursor;
     28     bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode;
     29     utf16_length += is_two_characters ? 2 : 1;
     30     // Don't need to write to the buffer, but still need utf16_length.
     31     if (!writing_to_buffer) continue;
     32     // Write out the characters to the buffer.
     33     // Must check for equality with buffer_length as we've already updated it.
     34     if (utf16_length <= buffer_length) {
     35       if (is_two_characters) {
     36         *buffer++ = Utf16::LeadSurrogate(character);
     37         *buffer++ = Utf16::TrailSurrogate(character);
     38       } else {
     39         *buffer++ = character;
     40       }
     41       if (utf16_length == buffer_length) {
     42         // Just wrote last character of buffer
     43         writing_to_buffer = false;
     44         unbuffered_start_ = stream;
     45         unbuffered_length_ = stream_length;
     46       }
     47       continue;
     48     }
     49     // Have gone over buffer.
     50     // Last char of buffer is unused, set cursor back.
     51     DCHECK(is_two_characters);
     52     writing_to_buffer = false;
     53     last_byte_of_buffer_unused_ = true;
     54     unbuffered_start_ = stream - cursor;
     55     unbuffered_length_ = stream_length + cursor;
     56   }
     57   utf16_length_ = utf16_length;
     58 }
     59 
     60 
     61 void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream,
     62                                      size_t stream_length, uint16_t* data,
     63                                      size_t data_length) {
     64   while (data_length != 0) {
     65     size_t cursor = 0;
     66     uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor);
     67     // There's a total lack of bounds checking for stream
     68     // as it was already done in Reset.
     69     stream += cursor;
     70     DCHECK(stream_length >= cursor);
     71     stream_length -= cursor;
     72     if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) {
     73       *data++ = Utf16::LeadSurrogate(character);
     74       *data++ = Utf16::TrailSurrogate(character);
     75       DCHECK(data_length > 1);
     76       data_length -= 2;
     77     } else {
     78       *data++ = character;
     79       data_length -= 1;
     80     }
     81   }
     82 }
     83 
     84 }  // namespace unibrow
     85