Home | History | Annotate | Download | only in fxcrt
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fxcrt/fx_basic.h"
      8 
      9 void CFX_UTF8Decoder::Clear() {
     10   m_Buffer.Clear();
     11   m_PendingBytes = 0;
     12 }
     13 void CFX_UTF8Decoder::AppendChar(uint32_t ch) {
     14   m_Buffer.AppendChar((FX_WCHAR)ch);
     15 }
     16 void CFX_UTF8Decoder::Input(uint8_t byte) {
     17   if (byte < 0x80) {
     18     m_PendingBytes = 0;
     19     m_Buffer.AppendChar(byte);
     20   } else if (byte < 0xc0) {
     21     if (m_PendingBytes == 0) {
     22       return;
     23     }
     24     m_PendingBytes--;
     25     m_PendingChar |= (byte & 0x3f) << (m_PendingBytes * 6);
     26     if (m_PendingBytes == 0) {
     27       AppendChar(m_PendingChar);
     28     }
     29   } else if (byte < 0xe0) {
     30     m_PendingBytes = 1;
     31     m_PendingChar = (byte & 0x1f) << 6;
     32   } else if (byte < 0xf0) {
     33     m_PendingBytes = 2;
     34     m_PendingChar = (byte & 0x0f) << 12;
     35   } else if (byte < 0xf8) {
     36     m_PendingBytes = 3;
     37     m_PendingChar = (byte & 0x07) << 18;
     38   } else if (byte < 0xfc) {
     39     m_PendingBytes = 4;
     40     m_PendingChar = (byte & 0x03) << 24;
     41   } else if (byte < 0xfe) {
     42     m_PendingBytes = 5;
     43     m_PendingChar = (byte & 0x01) << 30;
     44   }
     45 }
     46 void CFX_UTF8Encoder::Input(FX_WCHAR unicode) {
     47   if ((uint32_t)unicode < 0x80) {
     48     m_Buffer.AppendChar(unicode);
     49   } else {
     50     if ((uint32_t)unicode >= 0x80000000) {
     51       return;
     52     }
     53     int nbytes = 0;
     54     if ((uint32_t)unicode < 0x800) {
     55       nbytes = 2;
     56     } else if ((uint32_t)unicode < 0x10000) {
     57       nbytes = 3;
     58     } else if ((uint32_t)unicode < 0x200000) {
     59       nbytes = 4;
     60     } else if ((uint32_t)unicode < 0x4000000) {
     61       nbytes = 5;
     62     } else {
     63       nbytes = 6;
     64     }
     65     static uint8_t prefix[] = {0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
     66     int order = 1 << ((nbytes - 1) * 6);
     67     int code = unicode;
     68     m_Buffer.AppendChar(prefix[nbytes - 2] | (code / order));
     69     for (int i = 0; i < nbytes - 1; i++) {
     70       code = code % order;
     71       order >>= 6;
     72       m_Buffer.AppendChar(0x80 | (code / order));
     73     }
     74   }
     75 }
     76 
     77 CFX_ByteString FX_UTF8Encode(const CFX_WideStringC& wsStr) {
     78   FX_STRSIZE len = wsStr.GetLength();
     79   const FX_WCHAR* pStr = wsStr.c_str();
     80   CFX_UTF8Encoder encoder;
     81   while (len-- > 0)
     82     encoder.Input(*pStr++);
     83 
     84   return CFX_ByteString(encoder.GetResult());
     85 }
     86