1 /** @file 2 Implementation of translation upon VT-UTF8. 3 4 Copyright (c) 2006 - 2010, Intel Corporation. All rights reserved.<BR> 5 This program and the accompanying materials 6 are licensed and made available under the terms and conditions of the BSD License 7 which accompanies this distribution. The full text of the license may be found at 8 http://opensource.org/licenses/bsd-license.php 9 10 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, 11 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. 12 13 **/ 14 15 #include "Terminal.h" 16 17 /** 18 Translate all VT-UTF8 characters in the Raw FIFI into unicode characters, 19 and insert them into Unicode FIFO. 20 21 @param TerminalDevice The terminal device. 22 23 **/ 24 VOID 25 VTUTF8RawDataToUnicode ( 26 IN TERMINAL_DEV *TerminalDevice 27 ) 28 { 29 UTF8_CHAR Utf8Char; 30 UINT8 ValidBytes; 31 UINT16 UnicodeChar; 32 33 ValidBytes = 0; 34 // 35 // pop the raw data out from the raw fifo, 36 // and translate it into unicode, then push 37 // the unicode into unicode fifo, until the raw fifo is empty. 38 // 39 while (!IsRawFiFoEmpty (TerminalDevice) && !IsUnicodeFiFoFull (TerminalDevice)) { 40 41 GetOneValidUtf8Char (TerminalDevice, &Utf8Char, &ValidBytes); 42 43 if (ValidBytes < 1 || ValidBytes > 3) { 44 continue; 45 } 46 47 Utf8ToUnicode (Utf8Char, ValidBytes, (CHAR16 *) &UnicodeChar); 48 49 UnicodeFiFoInsertOneKey (TerminalDevice, UnicodeChar); 50 } 51 } 52 53 /** 54 Get one valid VT-UTF8 characters set from Raw Data FIFO. 55 56 @param Utf8Device The terminal device. 57 @param Utf8Char Returned valid VT-UTF8 characters set. 58 @param ValidBytes The count of returned VT-VTF8 characters. 59 If ValidBytes is zero, no valid VT-UTF8 returned. 60 61 **/ 62 VOID 63 GetOneValidUtf8Char ( 64 IN TERMINAL_DEV *Utf8Device, 65 OUT UTF8_CHAR *Utf8Char, 66 OUT UINT8 *ValidBytes 67 ) 68 { 69 UINT8 Temp; 70 UINT8 Index; 71 BOOLEAN FetchFlag; 72 73 Temp = 0; 74 Index = 0; 75 FetchFlag = TRUE; 76 77 // 78 // if no valid Utf8 char is found in the RawFiFo, 79 // then *ValidBytes will be zero. 80 // 81 *ValidBytes = 0; 82 83 while (!IsRawFiFoEmpty (Utf8Device)) { 84 85 RawFiFoRemoveOneKey (Utf8Device, &Temp); 86 87 switch (*ValidBytes) { 88 89 case 0: 90 if ((Temp & 0x80) == 0) { 91 // 92 // one-byte utf8 char 93 // 94 *ValidBytes = 1; 95 96 Utf8Char->Utf8_1 = Temp; 97 98 FetchFlag = FALSE; 99 100 } else if ((Temp & 0xe0) == 0xc0) { 101 // 102 // two-byte utf8 char 103 // 104 *ValidBytes = 2; 105 106 Utf8Char->Utf8_2[1] = Temp; 107 108 } else if ((Temp & 0xf0) == 0xe0) { 109 // 110 // three-byte utf8 char 111 // 112 *ValidBytes = 3; 113 114 Utf8Char->Utf8_3[2] = Temp; 115 116 Index++; 117 118 } else { 119 // 120 // reset *ValidBytes to zero, let valid utf8 char search restart 121 // 122 *ValidBytes = 0; 123 } 124 125 break; 126 127 case 2: 128 // 129 // two-byte utf8 char go on 130 // 131 if ((Temp & 0xc0) == 0x80) { 132 133 Utf8Char->Utf8_2[0] = Temp; 134 135 FetchFlag = FALSE; 136 137 } else { 138 139 *ValidBytes = 0; 140 } 141 break; 142 143 case 3: 144 // 145 // three-byte utf8 char go on 146 // 147 if ((Temp & 0xc0) == 0x80) { 148 if (Index == 1) { 149 Utf8Char->Utf8_3[1] = Temp; 150 Index++; 151 } else { 152 Utf8Char->Utf8_3[0] = Temp; 153 FetchFlag = FALSE; 154 } 155 } else { 156 // 157 // reset *ValidBytes and Index to zero, let valid utf8 char search restart 158 // 159 *ValidBytes = 0; 160 Index = 0; 161 } 162 break; 163 164 default: 165 break; 166 } 167 168 if (!FetchFlag) { 169 break; 170 } 171 } 172 173 return ; 174 } 175 176 /** 177 Translate VT-UTF8 characters into one Unicode character. 178 179 UTF8 Encoding Table 180 Bits per Character | Unicode Character Range | Unicode Binary Encoding | UTF8 Binary Encoding 181 0-7 | 0x0000 - 0x007F | 00000000 0xxxxxxx | 0xxxxxxx 182 8-11 | 0x0080 - 0x07FF | 00000xxx xxxxxxxx | 110xxxxx 10xxxxxx 183 12-16 | 0x0800 - 0xFFFF | xxxxxxxx xxxxxxxx | 1110xxxx 10xxxxxx 10xxxxxx 184 185 186 @param Utf8Char VT-UTF8 character set needs translating. 187 @param ValidBytes The count of valid VT-UTF8 characters. 188 @param UnicodeChar Returned unicode character. 189 190 **/ 191 VOID 192 Utf8ToUnicode ( 193 IN UTF8_CHAR Utf8Char, 194 IN UINT8 ValidBytes, 195 OUT CHAR16 *UnicodeChar 196 ) 197 { 198 UINT8 UnicodeByte0; 199 UINT8 UnicodeByte1; 200 UINT8 Byte0; 201 UINT8 Byte1; 202 UINT8 Byte2; 203 204 *UnicodeChar = 0; 205 206 // 207 // translate utf8 code to unicode, in terminal standard, 208 // up to 3 bytes utf8 code is supported. 209 // 210 switch (ValidBytes) { 211 case 1: 212 // 213 // one-byte utf8 code 214 // 215 *UnicodeChar = (UINT16) Utf8Char.Utf8_1; 216 break; 217 218 case 2: 219 // 220 // two-byte utf8 code 221 // 222 Byte0 = Utf8Char.Utf8_2[0]; 223 Byte1 = Utf8Char.Utf8_2[1]; 224 225 UnicodeByte0 = (UINT8) ((Byte1 << 6) | (Byte0 & 0x3f)); 226 UnicodeByte1 = (UINT8) ((Byte1 >> 2) & 0x07); 227 *UnicodeChar = (UINT16) (UnicodeByte0 | (UnicodeByte1 << 8)); 228 break; 229 230 case 3: 231 // 232 // three-byte utf8 code 233 // 234 Byte0 = Utf8Char.Utf8_3[0]; 235 Byte1 = Utf8Char.Utf8_3[1]; 236 Byte2 = Utf8Char.Utf8_3[2]; 237 238 UnicodeByte0 = (UINT8) ((Byte1 << 6) | (Byte0 & 0x3f)); 239 UnicodeByte1 = (UINT8) ((Byte2 << 4) | ((Byte1 >> 2) & 0x0f)); 240 *UnicodeChar = (UINT16) (UnicodeByte0 | (UnicodeByte1 << 8)); 241 242 default: 243 break; 244 } 245 246 return ; 247 } 248 249 /** 250 Translate one Unicode character into VT-UTF8 characters. 251 252 UTF8 Encoding Table 253 Bits per Character | Unicode Character Range | Unicode Binary Encoding | UTF8 Binary Encoding 254 0-7 | 0x0000 - 0x007F | 00000000 0xxxxxxx | 0xxxxxxx 255 8-11 | 0x0080 - 0x07FF | 00000xxx xxxxxxxx | 110xxxxx 10xxxxxx 256 12-16 | 0x0800 - 0xFFFF | xxxxxxxx xxxxxxxx | 1110xxxx 10xxxxxx 10xxxxxx 257 258 259 @param Unicode Unicode character need translating. 260 @param Utf8Char Return VT-UTF8 character set. 261 @param ValidBytes The count of valid VT-UTF8 characters. If 262 ValidBytes is zero, no valid VT-UTF8 returned. 263 264 **/ 265 VOID 266 UnicodeToUtf8 ( 267 IN CHAR16 Unicode, 268 OUT UTF8_CHAR *Utf8Char, 269 OUT UINT8 *ValidBytes 270 ) 271 { 272 UINT8 UnicodeByte0; 273 UINT8 UnicodeByte1; 274 // 275 // translate unicode to utf8 code 276 // 277 UnicodeByte0 = (UINT8) Unicode; 278 UnicodeByte1 = (UINT8) (Unicode >> 8); 279 280 if (Unicode < 0x0080) { 281 282 Utf8Char->Utf8_1 = (UINT8) (UnicodeByte0 & 0x7f); 283 *ValidBytes = 1; 284 285 } else if (Unicode < 0x0800) { 286 // 287 // byte sequence: high -> low 288 // Utf8_2[0], Utf8_2[1] 289 // 290 Utf8Char->Utf8_2[1] = (UINT8) ((UnicodeByte0 & 0x3f) + 0x80); 291 Utf8Char->Utf8_2[0] = (UINT8) ((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x1f) + 0xc0); 292 293 *ValidBytes = 2; 294 295 } else { 296 // 297 // byte sequence: high -> low 298 // Utf8_3[0], Utf8_3[1], Utf8_3[2] 299 // 300 Utf8Char->Utf8_3[2] = (UINT8) ((UnicodeByte0 & 0x3f) + 0x80); 301 Utf8Char->Utf8_3[1] = (UINT8) ((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x3f) + 0x80); 302 Utf8Char->Utf8_3[0] = (UINT8) (((UnicodeByte1 >> 4) & 0x0f) + 0xe0); 303 304 *ValidBytes = 3; 305 } 306 } 307 308 309 /** 310 Check if input string is valid VT-UTF8 string. 311 312 @param TerminalDevice The terminal device. 313 @param WString The input string. 314 315 @retval EFI_SUCCESS If all input characters are valid. 316 317 **/ 318 EFI_STATUS 319 VTUTF8TestString ( 320 IN TERMINAL_DEV *TerminalDevice, 321 IN CHAR16 *WString 322 ) 323 { 324 // 325 // to utf8, all kind of characters are supported. 326 // 327 return EFI_SUCCESS; 328 } 329