1 // Copyright 2014 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 6 #include "src/unicode-inl.h" 7 #include "src/unicode-decoder.h" 8 #include <stdio.h> 9 #include <stdlib.h> 10 11 namespace unibrow { 12 13 void Utf8DecoderBase::Reset(uint16_t* buffer, size_t buffer_length, 14 const uint8_t* stream, size_t stream_length) { 15 // Assume everything will fit in the buffer and stream won't be needed. 16 last_byte_of_buffer_unused_ = false; 17 unbuffered_start_ = NULL; 18 unbuffered_length_ = 0; 19 bool writing_to_buffer = true; 20 // Loop until stream is read, writing to buffer as long as buffer has space. 21 size_t utf16_length = 0; 22 while (stream_length != 0) { 23 size_t cursor = 0; 24 uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor); 25 DCHECK(cursor > 0 && cursor <= stream_length); 26 stream += cursor; 27 stream_length -= cursor; 28 bool is_two_characters = character > Utf16::kMaxNonSurrogateCharCode; 29 utf16_length += is_two_characters ? 2 : 1; 30 // Don't need to write to the buffer, but still need utf16_length. 31 if (!writing_to_buffer) continue; 32 // Write out the characters to the buffer. 33 // Must check for equality with buffer_length as we've already updated it. 34 if (utf16_length <= buffer_length) { 35 if (is_two_characters) { 36 *buffer++ = Utf16::LeadSurrogate(character); 37 *buffer++ = Utf16::TrailSurrogate(character); 38 } else { 39 *buffer++ = character; 40 } 41 if (utf16_length == buffer_length) { 42 // Just wrote last character of buffer 43 writing_to_buffer = false; 44 unbuffered_start_ = stream; 45 unbuffered_length_ = stream_length; 46 } 47 continue; 48 } 49 // Have gone over buffer. 50 // Last char of buffer is unused, set cursor back. 51 DCHECK(is_two_characters); 52 writing_to_buffer = false; 53 last_byte_of_buffer_unused_ = true; 54 unbuffered_start_ = stream - cursor; 55 unbuffered_length_ = stream_length + cursor; 56 } 57 utf16_length_ = utf16_length; 58 } 59 60 61 void Utf8DecoderBase::WriteUtf16Slow(const uint8_t* stream, 62 size_t stream_length, uint16_t* data, 63 size_t data_length) { 64 while (data_length != 0) { 65 size_t cursor = 0; 66 uint32_t character = Utf8::ValueOf(stream, stream_length, &cursor); 67 // There's a total lack of bounds checking for stream 68 // as it was already done in Reset. 69 stream += cursor; 70 DCHECK(stream_length >= cursor); 71 stream_length -= cursor; 72 if (character > unibrow::Utf16::kMaxNonSurrogateCharCode) { 73 *data++ = Utf16::LeadSurrogate(character); 74 *data++ = Utf16::TrailSurrogate(character); 75 DCHECK(data_length > 1); 76 data_length -= 2; 77 } else { 78 *data++ = character; 79 data_length -= 1; 80 } 81 } 82 } 83 84 } // namespace unibrow 85