1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef _FX_CODEPAGE 8 #define _FX_CODEPAGE 9 class IFX_CodePage; 10 #define FX_CODEPAGE_DefANSI 0 11 #define FX_CODEPAGE_DefOEM 1 12 #define FX_CODEPAGE_DefMAC 2 13 #define FX_CODEPAGE_Thread 3 14 #define FX_CODEPAGE_Symbol 42 15 #define FX_CODEPAGE_MSDOS_US 437 16 #define FX_CODEPAGE_Arabic_ASMO708 708 17 #define FX_CODEPAGE_Arabic_ASMO449Plus 709 18 #define FX_CODEPAGE_Arabic_Transparent 710 19 #define FX_CODEPAGE_Arabic_NafithaEnhanced 711 20 #define FX_CODEPAGE_Arabic_TransparentASMO 720 21 #define FX_CODEPAGE_MSDOS_Greek1 737 22 #define FX_CODEPAGE_MSDOS_Baltic 775 23 #define FX_CODEPAGE_MSWin31_WesternEuropean 819 24 #define FX_CODEPAGE_MSDOS_WesternEuropean 850 25 #define FX_CODEPAGE_MSDOS_EasternEuropean 852 26 #define FX_CODEPAGE_MSDOS_Latin3 853 27 #define FX_CODEPAGE_MSDOS_Cyrillic 855 28 #define FX_CODEPAGE_MSDOS_Turkish 857 29 #define FX_CODEPAGE_MSDOS_Latin1Euro 858 30 #define FX_CODEPAGE_MSDOS_Portuguese 860 31 #define FX_CODEPAGE_MSDOS_Icelandic 861 32 #define FX_CODEPAGE_MSDOS_Hebrew 862 33 #define FX_CODEPAGE_MSDOS_FrenchCanadian 863 34 #define FX_CODEPAGE_MSDOS_Arabic 864 35 #define FX_CODEPAGE_MSDOS_Norwegian 865 36 #define FX_CODEPAGE_MSDOS_Russian 866 37 #define FX_CODEPAGE_MSDOS_Greek2 869 38 #define FX_CODEPAGE_MSDOS_Thai 874 39 #define FX_CODEPAGE_MSDOS_KamenickyCS 895 40 #define FX_CODEPAGE_ShiftJIS 932 41 #define FX_CODEPAGE_ChineseSimplified 936 42 #define FX_CODEPAGE_Korean 949 43 #define FX_CODEPAGE_ChineseTraditional 950 44 #define FX_CODEPAGE_UTF16LE 1200 45 #define FX_CODEPAGE_UTF16BE 1201 46 #define FX_CODEPAGE_MSWin_EasternEuropean 1250 47 #define FX_CODEPAGE_MSWin_Cyrillic 1251 48 #define FX_CODEPAGE_MSWin_WesternEuropean 1252 49 #define FX_CODEPAGE_MSWin_Greek 1253 50 #define FX_CODEPAGE_MSWin_Turkish 1254 51 #define FX_CODEPAGE_MSWin_Hebrew 1255 52 #define FX_CODEPAGE_MSWin_Arabic 1256 53 #define FX_CODEPAGE_MSWin_Baltic 1257 54 #define FX_CODEPAGE_MSWin_Vietnamese 1258 55 #define FX_CODEPAGE_Johab 1361 56 #define FX_CODEPAGE_MAC_Roman 10000 57 #define FX_CODEPAGE_MAC_ShiftJIS 10001 58 #define FX_CODEPAGE_MAC_ChineseTraditional 10002 59 #define FX_CODEPAGE_MAC_Korean 10003 60 #define FX_CODEPAGE_MAC_Arabic 10004 61 #define FX_CODEPAGE_MAC_Hebrew 10005 62 #define FX_CODEPAGE_MAC_Greek 10006 63 #define FX_CODEPAGE_MAC_Cyrillic 10007 64 #define FX_CODEPAGE_MAC_ChineseSimplified 10008 65 #define FX_CODEPAGE_MAC_Thai 10021 66 #define FX_CODEPAGE_MAC_EasternEuropean 10029 67 #define FX_CODEPAGE_MAC_Turkish 10081 68 #define FX_CODEPAGE_UTF32LE 12000 69 #define FX_CODEPAGE_UTF32BE 12001 70 #define FX_CODEPAGE_ISO8859_1 28591 71 #define FX_CODEPAGE_ISO8859_2 28592 72 #define FX_CODEPAGE_ISO8859_3 28593 73 #define FX_CODEPAGE_ISO8859_4 28594 74 #define FX_CODEPAGE_ISO8859_5 28595 75 #define FX_CODEPAGE_ISO8859_6 28596 76 #define FX_CODEPAGE_ISO8859_7 28597 77 #define FX_CODEPAGE_ISO8859_8 28598 78 #define FX_CODEPAGE_ISO8859_9 28599 79 #define FX_CODEPAGE_ISO8859_10 28600 80 #define FX_CODEPAGE_ISO8859_11 28601 81 #define FX_CODEPAGE_ISO8859_12 28602 82 #define FX_CODEPAGE_ISO8859_13 28603 83 #define FX_CODEPAGE_ISO8859_14 28604 84 #define FX_CODEPAGE_ISO8859_15 28605 85 #define FX_CODEPAGE_ISO8859_16 28606 86 #define FX_CODEPAGE_ISCII_Devanagari 57002 87 #define FX_CODEPAGE_ISCII_Bengali 57003 88 #define FX_CODEPAGE_ISCII_Tamil 57004 89 #define FX_CODEPAGE_ISCII_Telugu 57005 90 #define FX_CODEPAGE_ISCII_Assamese 57006 91 #define FX_CODEPAGE_ISCII_Oriya 57007 92 #define FX_CODEPAGE_ISCII_Kannada 57008 93 #define FX_CODEPAGE_ISCII_Malayalam 57009 94 #define FX_CODEPAGE_ISCII_Gujarati 57010 95 #define FX_CODEPAGE_ISCII_Punjabi 57011 96 #define FX_CODEPAGE_UTF7 65000 97 #define FX_CODEPAGE_UTF8 65001 98 #define FX_CHARSET_ANSI 0 99 #define FX_CHARSET_Default 1 100 #define FX_CHARSET_Symbol 2 101 #define FX_CHARSET_MAC_Roman 77 102 #define FX_CHARSET_MAC_ShiftJIS 78 103 #define FX_CHARSET_MAC_Korean 79 104 #define FX_CHARSET_MAC_ChineseSimplified 80 105 #define FX_CHARSET_MAC_ChineseTriditional 81 106 #define FX_CHARSET_MAC_Johab 82 107 #define FX_CHARSET_MAC_Hebrew 83 108 #define FX_CHARSET_MAC_Arabic 84 109 #define FX_CHARSET_MAC_Greek 85 110 #define FX_CHARSET_MAC_Turkish 86 111 #define FX_CHARSET_MAC_Thai 87 112 #define FX_CHARSET_MAC_EasternEuropean 88 113 #define FX_CHARSET_MAC_Cyrillic 89 114 #define FX_CHARSET_ShiftJIS 128 115 #define FX_CHARSET_Korean 129 116 #define FX_CHARSET_Johab 130 117 #define FX_CHARSET_ChineseSimplified 134 118 #define FX_CHARSET_ChineseTriditional 136 119 #define FX_CHARSET_MSWin_Greek 161 120 #define FX_CHARSET_MSWin_Turkish 162 121 #define FX_CHARSET_MSWin_Vietnamese 163 122 #define FX_CHARSET_MSWin_Hebrew 177 123 #define FX_CHARSET_MSWin_Arabic 178 124 #define FX_CHARSET_ArabicTraditional 179 125 #define FX_CHARSET_ArabicUser 180 126 #define FX_CHARSET_HebrewUser 181 127 #define FX_CHARSET_MSWin_Baltic 186 128 #define FX_CHARSET_MSWin_Cyrillic 204 129 #define FX_CHARSET_Thai 222 130 #define FX_CHARSET_MSWin_EasterEuropean 238 131 #define FX_CHARSET_US 254 132 #define FX_CHARSET_OEM 255 133 FX_WORD FX_GetCodePageFromCharset(uint8_t charset); 134 FX_WORD FX_GetCharsetFromCodePage(FX_WORD codepage); 135 FX_WORD FX_GetCodePageFromStringA(const FX_CHAR* pStr, int32_t iLength); 136 FX_WORD FX_GetCodePageFormStringW(const FX_WCHAR* pStr, int32_t iLength); 137 FX_WORD FX_GetDefCodePageByLanguage(FX_WORD wLanguage); 138 void FX_SwapByteOrder(FX_WCHAR* pStr, int32_t iLength); 139 void FX_SwapByteOrderCopy(const FX_WCHAR* pSrc, 140 FX_WCHAR* pDst, 141 int32_t iLength); 142 void FX_UTF16ToWChar(void* pBuffer, int32_t iLength); 143 void FX_UTF16ToWCharCopy(const FX_WORD* pUTF16, 144 FX_WCHAR* pWChar, 145 int32_t iLength); 146 void FX_WCharToUTF16(void* pBuffer, int32_t iLength); 147 void FX_WCharToUTF16Copy(const FX_WCHAR* pWChar, 148 FX_WORD* pUTF16, 149 int32_t iLength); 150 int32_t FX_DecodeString(FX_WORD wCodePage, 151 const FX_CHAR* pSrc, 152 int32_t* pSrcLen, 153 FX_WCHAR* pDst, 154 int32_t* pDstLen, 155 FX_BOOL bErrBreak = FALSE); 156 int32_t FX_UTF8Decode(const FX_CHAR* pSrc, 157 int32_t* pSrcLen, 158 FX_WCHAR* pDst, 159 int32_t* pDstLen); 160 enum FX_CODESYSTEM { 161 FX_MBCS = 0, 162 FX_SBCS, 163 FX_DBCS, 164 }; 165 typedef struct _FX_CODEPAGE_HEADER { 166 uint16_t uCPID; 167 uint8_t uMinCharBytes; 168 uint8_t uMaxCharBytes; 169 FX_CODESYSTEM eCPType; 170 FX_BOOL bHasLeadByte; 171 FX_WCHAR wMinChar; 172 FX_WCHAR wMaxChar; 173 FX_WCHAR wDefChar; 174 FX_WCHAR wMinUnicode; 175 FX_WCHAR wMaxUnicode; 176 FX_WCHAR wDefUnicode; 177 } FX_CODEPAGE_HEADER; 178 #define FX_CPMAPTYPE_Consecution 1 179 #define FX_CPMAPTYPE_Strict 2 180 #define FX_CPMAPTYPE_NoMapping 3 181 #define FX_CPMAPTYPE_Delta 4 182 typedef struct _FX_CPCU_MAPTABLE1 { 183 uint16_t uMapType; 184 uint16_t uUniocde; 185 } FX_CPCU_MAPTABLE1; 186 typedef struct _FX_CPCU_MAPTABLE2 { 187 uint8_t uTrailByte; 188 uint8_t uMapType; 189 uint16_t uOffset; 190 } FX_CPCU_MAPTABLE2; 191 typedef struct _FX_CPCU_MAPINFO { 192 FX_CPCU_MAPTABLE1* pMapTable1; 193 FX_CPCU_MAPTABLE2* pMapTable2; 194 const uint8_t* pMapData; 195 } FX_CPCU_MAPINFO; 196 typedef struct _FX_CPUC_MAPTABLE { 197 uint16_t uStartUnicode; 198 uint16_t uEndUnicode; 199 uint16_t uMapType; 200 uint16_t uOffset; 201 } FX_CPUC_MAPTABLE; 202 typedef struct _FX_CPUC_MAPINFO { 203 uint32_t uMapCount; 204 FX_CPUC_MAPTABLE* pMapTable; 205 const uint8_t* pMapData; 206 } FX_CPUC_MAPINFO; 207 typedef struct _FX_CODEPAGE { 208 FX_CODEPAGE_HEADER const* pCPHeader; 209 FX_CPCU_MAPINFO const* pCPCUMapInfo; 210 FX_CPUC_MAPINFO const* pCPUCMapInfo; 211 } FX_CODEPAGE, *FX_LPCODEPAGE; 212 typedef FX_CODEPAGE const* FX_LPCCODEPAGE; 213 typedef struct _FX_STR2CPHASH { 214 uint32_t uHash; 215 uint32_t uCodePage; 216 } FX_STR2CPHASH; 217 typedef struct _FX_CHARSET_MAP { 218 uint16_t charset; 219 uint16_t codepage; 220 } FX_CHARSET_MAP; 221 typedef struct _FX_LANG2CPMAP { 222 FX_WORD wLanguage; 223 FX_WORD wCodepage; 224 } FX_LANG2CPMAP; 225 226 class IFX_CodePage { 227 public: 228 static IFX_CodePage* Create(FX_WORD wCodePage); 229 virtual ~IFX_CodePage() {} 230 virtual void Release() = 0; 231 virtual FX_WORD GetCodePageNumber() const = 0; 232 virtual FX_CODESYSTEM GetCodeSystemType() const = 0; 233 virtual FX_BOOL HasLeadByte() const = 0; 234 virtual FX_BOOL IsLeadByte(uint8_t byte) const = 0; 235 virtual int32_t GetMinBytesPerChar() const = 0; 236 virtual int32_t GetMaxBytesPerChar() const = 0; 237 virtual FX_WCHAR GetMinCharcode() const = 0; 238 virtual FX_WCHAR GetMaxCharcode() const = 0; 239 virtual FX_WCHAR GetDefCharcode() const = 0; 240 virtual FX_WCHAR GetMinUnicode() const = 0; 241 virtual FX_WCHAR GetMaxUnicode() const = 0; 242 virtual FX_WCHAR GetDefUnicode() const = 0; 243 virtual FX_BOOL IsValidCharcode(FX_WORD wCharcode) const = 0; 244 virtual FX_WCHAR GetUnicode(FX_WORD wCharcode) const = 0; 245 virtual FX_BOOL IsValidUnicode(FX_WCHAR wUnicode) const = 0; 246 virtual FX_WORD GetCharcode(FX_WCHAR wUnicode) const = 0; 247 }; 248 #endif 249