Home | History | Annotate | Download | only in include
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #ifndef _FX_CODEPAGE
      8 #define _FX_CODEPAGE
      9 class IFX_CodePage;
     10 #define FX_CODEPAGE_DefANSI 0
     11 #define FX_CODEPAGE_DefOEM 1
     12 #define FX_CODEPAGE_DefMAC 2
     13 #define FX_CODEPAGE_Thread 3
     14 #define FX_CODEPAGE_Symbol 42
     15 #define FX_CODEPAGE_MSDOS_US 437
     16 #define FX_CODEPAGE_Arabic_ASMO708 708
     17 #define FX_CODEPAGE_Arabic_ASMO449Plus 709
     18 #define FX_CODEPAGE_Arabic_Transparent 710
     19 #define FX_CODEPAGE_Arabic_NafithaEnhanced 711
     20 #define FX_CODEPAGE_Arabic_TransparentASMO 720
     21 #define FX_CODEPAGE_MSDOS_Greek1 737
     22 #define FX_CODEPAGE_MSDOS_Baltic 775
     23 #define FX_CODEPAGE_MSWin31_WesternEuropean 819
     24 #define FX_CODEPAGE_MSDOS_WesternEuropean 850
     25 #define FX_CODEPAGE_MSDOS_EasternEuropean 852
     26 #define FX_CODEPAGE_MSDOS_Latin3 853
     27 #define FX_CODEPAGE_MSDOS_Cyrillic 855
     28 #define FX_CODEPAGE_MSDOS_Turkish 857
     29 #define FX_CODEPAGE_MSDOS_Latin1Euro 858
     30 #define FX_CODEPAGE_MSDOS_Portuguese 860
     31 #define FX_CODEPAGE_MSDOS_Icelandic 861
     32 #define FX_CODEPAGE_MSDOS_Hebrew 862
     33 #define FX_CODEPAGE_MSDOS_FrenchCanadian 863
     34 #define FX_CODEPAGE_MSDOS_Arabic 864
     35 #define FX_CODEPAGE_MSDOS_Norwegian 865
     36 #define FX_CODEPAGE_MSDOS_Russian 866
     37 #define FX_CODEPAGE_MSDOS_Greek2 869
     38 #define FX_CODEPAGE_MSDOS_Thai 874
     39 #define FX_CODEPAGE_MSDOS_KamenickyCS 895
     40 #define FX_CODEPAGE_ShiftJIS 932
     41 #define FX_CODEPAGE_ChineseSimplified 936
     42 #define FX_CODEPAGE_Korean 949
     43 #define FX_CODEPAGE_ChineseTraditional 950
     44 #define FX_CODEPAGE_UTF16LE 1200
     45 #define FX_CODEPAGE_UTF16BE 1201
     46 #define FX_CODEPAGE_MSWin_EasternEuropean 1250
     47 #define FX_CODEPAGE_MSWin_Cyrillic 1251
     48 #define FX_CODEPAGE_MSWin_WesternEuropean 1252
     49 #define FX_CODEPAGE_MSWin_Greek 1253
     50 #define FX_CODEPAGE_MSWin_Turkish 1254
     51 #define FX_CODEPAGE_MSWin_Hebrew 1255
     52 #define FX_CODEPAGE_MSWin_Arabic 1256
     53 #define FX_CODEPAGE_MSWin_Baltic 1257
     54 #define FX_CODEPAGE_MSWin_Vietnamese 1258
     55 #define FX_CODEPAGE_Johab 1361
     56 #define FX_CODEPAGE_MAC_Roman 10000
     57 #define FX_CODEPAGE_MAC_ShiftJIS 10001
     58 #define FX_CODEPAGE_MAC_ChineseTraditional 10002
     59 #define FX_CODEPAGE_MAC_Korean 10003
     60 #define FX_CODEPAGE_MAC_Arabic 10004
     61 #define FX_CODEPAGE_MAC_Hebrew 10005
     62 #define FX_CODEPAGE_MAC_Greek 10006
     63 #define FX_CODEPAGE_MAC_Cyrillic 10007
     64 #define FX_CODEPAGE_MAC_ChineseSimplified 10008
     65 #define FX_CODEPAGE_MAC_Thai 10021
     66 #define FX_CODEPAGE_MAC_EasternEuropean 10029
     67 #define FX_CODEPAGE_MAC_Turkish 10081
     68 #define FX_CODEPAGE_UTF32LE 12000
     69 #define FX_CODEPAGE_UTF32BE 12001
     70 #define FX_CODEPAGE_ISO8859_1 28591
     71 #define FX_CODEPAGE_ISO8859_2 28592
     72 #define FX_CODEPAGE_ISO8859_3 28593
     73 #define FX_CODEPAGE_ISO8859_4 28594
     74 #define FX_CODEPAGE_ISO8859_5 28595
     75 #define FX_CODEPAGE_ISO8859_6 28596
     76 #define FX_CODEPAGE_ISO8859_7 28597
     77 #define FX_CODEPAGE_ISO8859_8 28598
     78 #define FX_CODEPAGE_ISO8859_9 28599
     79 #define FX_CODEPAGE_ISO8859_10 28600
     80 #define FX_CODEPAGE_ISO8859_11 28601
     81 #define FX_CODEPAGE_ISO8859_12 28602
     82 #define FX_CODEPAGE_ISO8859_13 28603
     83 #define FX_CODEPAGE_ISO8859_14 28604
     84 #define FX_CODEPAGE_ISO8859_15 28605
     85 #define FX_CODEPAGE_ISO8859_16 28606
     86 #define FX_CODEPAGE_ISCII_Devanagari 57002
     87 #define FX_CODEPAGE_ISCII_Bengali 57003
     88 #define FX_CODEPAGE_ISCII_Tamil 57004
     89 #define FX_CODEPAGE_ISCII_Telugu 57005
     90 #define FX_CODEPAGE_ISCII_Assamese 57006
     91 #define FX_CODEPAGE_ISCII_Oriya 57007
     92 #define FX_CODEPAGE_ISCII_Kannada 57008
     93 #define FX_CODEPAGE_ISCII_Malayalam 57009
     94 #define FX_CODEPAGE_ISCII_Gujarati 57010
     95 #define FX_CODEPAGE_ISCII_Punjabi 57011
     96 #define FX_CODEPAGE_UTF7 65000
     97 #define FX_CODEPAGE_UTF8 65001
     98 #define FX_CHARSET_ANSI 0
     99 #define FX_CHARSET_Default 1
    100 #define FX_CHARSET_Symbol 2
    101 #define FX_CHARSET_MAC_Roman 77
    102 #define FX_CHARSET_MAC_ShiftJIS 78
    103 #define FX_CHARSET_MAC_Korean 79
    104 #define FX_CHARSET_MAC_ChineseSimplified 80
    105 #define FX_CHARSET_MAC_ChineseTriditional 81
    106 #define FX_CHARSET_MAC_Johab 82
    107 #define FX_CHARSET_MAC_Hebrew 83
    108 #define FX_CHARSET_MAC_Arabic 84
    109 #define FX_CHARSET_MAC_Greek 85
    110 #define FX_CHARSET_MAC_Turkish 86
    111 #define FX_CHARSET_MAC_Thai 87
    112 #define FX_CHARSET_MAC_EasternEuropean 88
    113 #define FX_CHARSET_MAC_Cyrillic 89
    114 #define FX_CHARSET_ShiftJIS 128
    115 #define FX_CHARSET_Korean 129
    116 #define FX_CHARSET_Johab 130
    117 #define FX_CHARSET_ChineseSimplified 134
    118 #define FX_CHARSET_ChineseTriditional 136
    119 #define FX_CHARSET_MSWin_Greek 161
    120 #define FX_CHARSET_MSWin_Turkish 162
    121 #define FX_CHARSET_MSWin_Vietnamese 163
    122 #define FX_CHARSET_MSWin_Hebrew 177
    123 #define FX_CHARSET_MSWin_Arabic 178
    124 #define FX_CHARSET_ArabicTraditional 179
    125 #define FX_CHARSET_ArabicUser 180
    126 #define FX_CHARSET_HebrewUser 181
    127 #define FX_CHARSET_MSWin_Baltic 186
    128 #define FX_CHARSET_MSWin_Cyrillic 204
    129 #define FX_CHARSET_Thai 222
    130 #define FX_CHARSET_MSWin_EasterEuropean 238
    131 #define FX_CHARSET_US 254
    132 #define FX_CHARSET_OEM 255
    133 FX_WORD FX_GetCodePageFromCharset(uint8_t charset);
    134 FX_WORD FX_GetCharsetFromCodePage(FX_WORD codepage);
    135 FX_WORD FX_GetCodePageFromStringA(const FX_CHAR* pStr, int32_t iLength);
    136 FX_WORD FX_GetCodePageFormStringW(const FX_WCHAR* pStr, int32_t iLength);
    137 FX_WORD FX_GetDefCodePageByLanguage(FX_WORD wLanguage);
    138 void FX_SwapByteOrder(FX_WCHAR* pStr, int32_t iLength);
    139 void FX_SwapByteOrderCopy(const FX_WCHAR* pSrc,
    140                           FX_WCHAR* pDst,
    141                           int32_t iLength);
    142 void FX_UTF16ToWChar(void* pBuffer, int32_t iLength);
    143 void FX_UTF16ToWCharCopy(const FX_WORD* pUTF16,
    144                          FX_WCHAR* pWChar,
    145                          int32_t iLength);
    146 void FX_WCharToUTF16(void* pBuffer, int32_t iLength);
    147 void FX_WCharToUTF16Copy(const FX_WCHAR* pWChar,
    148                          FX_WORD* pUTF16,
    149                          int32_t iLength);
    150 int32_t FX_DecodeString(FX_WORD wCodePage,
    151                         const FX_CHAR* pSrc,
    152                         int32_t* pSrcLen,
    153                         FX_WCHAR* pDst,
    154                         int32_t* pDstLen,
    155                         FX_BOOL bErrBreak = FALSE);
    156 int32_t FX_UTF8Decode(const FX_CHAR* pSrc,
    157                       int32_t* pSrcLen,
    158                       FX_WCHAR* pDst,
    159                       int32_t* pDstLen);
    160 enum FX_CODESYSTEM {
    161   FX_MBCS = 0,
    162   FX_SBCS,
    163   FX_DBCS,
    164 };
    165 typedef struct _FX_CODEPAGE_HEADER {
    166   uint16_t uCPID;
    167   uint8_t uMinCharBytes;
    168   uint8_t uMaxCharBytes;
    169   FX_CODESYSTEM eCPType;
    170   FX_BOOL bHasLeadByte;
    171   FX_WCHAR wMinChar;
    172   FX_WCHAR wMaxChar;
    173   FX_WCHAR wDefChar;
    174   FX_WCHAR wMinUnicode;
    175   FX_WCHAR wMaxUnicode;
    176   FX_WCHAR wDefUnicode;
    177 } FX_CODEPAGE_HEADER;
    178 #define FX_CPMAPTYPE_Consecution 1
    179 #define FX_CPMAPTYPE_Strict 2
    180 #define FX_CPMAPTYPE_NoMapping 3
    181 #define FX_CPMAPTYPE_Delta 4
    182 typedef struct _FX_CPCU_MAPTABLE1 {
    183   uint16_t uMapType;
    184   uint16_t uUniocde;
    185 } FX_CPCU_MAPTABLE1;
    186 typedef struct _FX_CPCU_MAPTABLE2 {
    187   uint8_t uTrailByte;
    188   uint8_t uMapType;
    189   uint16_t uOffset;
    190 } FX_CPCU_MAPTABLE2;
    191 typedef struct _FX_CPCU_MAPINFO {
    192   FX_CPCU_MAPTABLE1* pMapTable1;
    193   FX_CPCU_MAPTABLE2* pMapTable2;
    194   const uint8_t* pMapData;
    195 } FX_CPCU_MAPINFO;
    196 typedef struct _FX_CPUC_MAPTABLE {
    197   uint16_t uStartUnicode;
    198   uint16_t uEndUnicode;
    199   uint16_t uMapType;
    200   uint16_t uOffset;
    201 } FX_CPUC_MAPTABLE;
    202 typedef struct _FX_CPUC_MAPINFO {
    203   uint32_t uMapCount;
    204   FX_CPUC_MAPTABLE* pMapTable;
    205   const uint8_t* pMapData;
    206 } FX_CPUC_MAPINFO;
    207 typedef struct _FX_CODEPAGE {
    208   FX_CODEPAGE_HEADER const* pCPHeader;
    209   FX_CPCU_MAPINFO const* pCPCUMapInfo;
    210   FX_CPUC_MAPINFO const* pCPUCMapInfo;
    211 } FX_CODEPAGE, *FX_LPCODEPAGE;
    212 typedef FX_CODEPAGE const* FX_LPCCODEPAGE;
    213 typedef struct _FX_STR2CPHASH {
    214   uint32_t uHash;
    215   uint32_t uCodePage;
    216 } FX_STR2CPHASH;
    217 typedef struct _FX_CHARSET_MAP {
    218   uint16_t charset;
    219   uint16_t codepage;
    220 } FX_CHARSET_MAP;
    221 typedef struct _FX_LANG2CPMAP {
    222   FX_WORD wLanguage;
    223   FX_WORD wCodepage;
    224 } FX_LANG2CPMAP;
    225 
    226 class IFX_CodePage {
    227  public:
    228   static IFX_CodePage* Create(FX_WORD wCodePage);
    229   virtual ~IFX_CodePage() {}
    230   virtual void Release() = 0;
    231   virtual FX_WORD GetCodePageNumber() const = 0;
    232   virtual FX_CODESYSTEM GetCodeSystemType() const = 0;
    233   virtual FX_BOOL HasLeadByte() const = 0;
    234   virtual FX_BOOL IsLeadByte(uint8_t byte) const = 0;
    235   virtual int32_t GetMinBytesPerChar() const = 0;
    236   virtual int32_t GetMaxBytesPerChar() const = 0;
    237   virtual FX_WCHAR GetMinCharcode() const = 0;
    238   virtual FX_WCHAR GetMaxCharcode() const = 0;
    239   virtual FX_WCHAR GetDefCharcode() const = 0;
    240   virtual FX_WCHAR GetMinUnicode() const = 0;
    241   virtual FX_WCHAR GetMaxUnicode() const = 0;
    242   virtual FX_WCHAR GetDefUnicode() const = 0;
    243   virtual FX_BOOL IsValidCharcode(FX_WORD wCharcode) const = 0;
    244   virtual FX_WCHAR GetUnicode(FX_WORD wCharcode) const = 0;
    245   virtual FX_BOOL IsValidUnicode(FX_WCHAR wUnicode) const = 0;
    246   virtual FX_WORD GetCharcode(FX_WCHAR wUnicode) const = 0;
    247 };
    248 #endif
    249