Home | History | Annotate | Download | only in TerminalDxe
      1 /** @file
      2   Implementation of translation upon VT-UTF8.
      3 
      4 Copyright (c) 2006 - 2010, Intel Corporation. All rights reserved.<BR>
      5 This program and the accompanying materials
      6 are licensed and made available under the terms and conditions of the BSD License
      7 which accompanies this distribution.  The full text of the license may be found at
      8 http://opensource.org/licenses/bsd-license.php
      9 
     10 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
     12 
     13 **/
     14 
     15 #include "Terminal.h"
     16 
     17 /**
     18   Translate all VT-UTF8 characters in the Raw FIFI into unicode characters,
     19   and insert them into Unicode FIFO.
     20 
     21   @param TerminalDevice          The terminal device.
     22 
     23 **/
     24 VOID
     25 VTUTF8RawDataToUnicode (
     26   IN  TERMINAL_DEV    *TerminalDevice
     27   )
     28 {
     29   UTF8_CHAR Utf8Char;
     30   UINT8     ValidBytes;
     31   UINT16    UnicodeChar;
     32 
     33   ValidBytes = 0;
     34   //
     35   // pop the raw data out from the raw fifo,
     36   // and translate it into unicode, then push
     37   // the unicode into unicode fifo, until the raw fifo is empty.
     38   //
     39   while (!IsRawFiFoEmpty (TerminalDevice) && !IsUnicodeFiFoFull (TerminalDevice)) {
     40 
     41     GetOneValidUtf8Char (TerminalDevice, &Utf8Char, &ValidBytes);
     42 
     43     if (ValidBytes < 1 || ValidBytes > 3) {
     44       continue;
     45     }
     46 
     47     Utf8ToUnicode (Utf8Char, ValidBytes, (CHAR16 *) &UnicodeChar);
     48 
     49     UnicodeFiFoInsertOneKey (TerminalDevice, UnicodeChar);
     50   }
     51 }
     52 
     53 /**
     54   Get one valid VT-UTF8 characters set from Raw Data FIFO.
     55 
     56   @param  Utf8Device          The terminal device.
     57   @param  Utf8Char            Returned valid VT-UTF8 characters set.
     58   @param  ValidBytes          The count of returned VT-VTF8 characters.
     59                               If ValidBytes is zero, no valid VT-UTF8 returned.
     60 
     61 **/
     62 VOID
     63 GetOneValidUtf8Char (
     64   IN  TERMINAL_DEV      *Utf8Device,
     65   OUT UTF8_CHAR         *Utf8Char,
     66   OUT UINT8             *ValidBytes
     67   )
     68 {
     69   UINT8   Temp;
     70   UINT8   Index;
     71   BOOLEAN FetchFlag;
     72 
     73   Temp      = 0;
     74   Index     = 0;
     75   FetchFlag = TRUE;
     76 
     77   //
     78   // if no valid Utf8 char is found in the RawFiFo,
     79   // then *ValidBytes will be zero.
     80   //
     81   *ValidBytes = 0;
     82 
     83   while (!IsRawFiFoEmpty (Utf8Device)) {
     84 
     85     RawFiFoRemoveOneKey (Utf8Device, &Temp);
     86 
     87     switch (*ValidBytes) {
     88 
     89     case 0:
     90       if ((Temp & 0x80) == 0) {
     91         //
     92         // one-byte utf8 char
     93         //
     94         *ValidBytes       = 1;
     95 
     96         Utf8Char->Utf8_1  = Temp;
     97 
     98         FetchFlag         = FALSE;
     99 
    100       } else if ((Temp & 0xe0) == 0xc0) {
    101         //
    102         // two-byte utf8 char
    103         //
    104         *ValidBytes         = 2;
    105 
    106         Utf8Char->Utf8_2[1] = Temp;
    107 
    108       } else if ((Temp & 0xf0) == 0xe0) {
    109         //
    110         // three-byte utf8 char
    111         //
    112         *ValidBytes         = 3;
    113 
    114         Utf8Char->Utf8_3[2] = Temp;
    115 
    116         Index++;
    117 
    118       } else {
    119         //
    120         // reset *ValidBytes to zero, let valid utf8 char search restart
    121         //
    122         *ValidBytes = 0;
    123       }
    124 
    125       break;
    126 
    127     case 2:
    128       //
    129       // two-byte utf8 char go on
    130       //
    131       if ((Temp & 0xc0) == 0x80) {
    132 
    133         Utf8Char->Utf8_2[0] = Temp;
    134 
    135         FetchFlag           = FALSE;
    136 
    137       } else {
    138 
    139         *ValidBytes = 0;
    140       }
    141       break;
    142 
    143     case 3:
    144       //
    145       // three-byte utf8 char go on
    146       //
    147       if ((Temp & 0xc0) == 0x80) {
    148         if (Index == 1) {
    149           Utf8Char->Utf8_3[1] = Temp;
    150           Index++;
    151         } else {
    152           Utf8Char->Utf8_3[0] = Temp;
    153           FetchFlag = FALSE;
    154         }
    155       } else {
    156         //
    157         // reset *ValidBytes and Index to zero, let valid utf8 char search restart
    158         //
    159         *ValidBytes = 0;
    160         Index       = 0;
    161       }
    162       break;
    163 
    164     default:
    165       break;
    166     }
    167 
    168     if (!FetchFlag) {
    169       break;
    170     }
    171   }
    172 
    173   return ;
    174 }
    175 
    176 /**
    177   Translate VT-UTF8 characters into one Unicode character.
    178 
    179   UTF8 Encoding Table
    180   Bits per Character | Unicode Character Range | Unicode Binary  Encoding |	UTF8 Binary Encoding
    181         0-7	         |     0x0000 - 0x007F	   |     00000000 0xxxxxxx	  |   0xxxxxxx
    182         8-11 	       |     0x0080 - 0x07FF	   |     00000xxx xxxxxxxx 	  |   110xxxxx 10xxxxxx
    183        12-16	       |     0x0800 - 0xFFFF	   |     xxxxxxxx xxxxxxxx	  |   1110xxxx 10xxxxxx 10xxxxxx
    184 
    185 
    186   @param  Utf8Char         VT-UTF8 character set needs translating.
    187   @param  ValidBytes       The count of valid VT-UTF8 characters.
    188   @param  UnicodeChar      Returned unicode character.
    189 
    190 **/
    191 VOID
    192 Utf8ToUnicode (
    193   IN  UTF8_CHAR       Utf8Char,
    194   IN  UINT8           ValidBytes,
    195   OUT CHAR16          *UnicodeChar
    196   )
    197 {
    198   UINT8 UnicodeByte0;
    199   UINT8 UnicodeByte1;
    200   UINT8 Byte0;
    201   UINT8 Byte1;
    202   UINT8 Byte2;
    203 
    204   *UnicodeChar = 0;
    205 
    206   //
    207   // translate utf8 code to unicode, in terminal standard,
    208   // up to 3 bytes utf8 code is supported.
    209   //
    210   switch (ValidBytes) {
    211   case 1:
    212     //
    213     // one-byte utf8 code
    214     //
    215     *UnicodeChar = (UINT16) Utf8Char.Utf8_1;
    216     break;
    217 
    218   case 2:
    219     //
    220     // two-byte utf8 code
    221     //
    222     Byte0         = Utf8Char.Utf8_2[0];
    223     Byte1         = Utf8Char.Utf8_2[1];
    224 
    225     UnicodeByte0  = (UINT8) ((Byte1 << 6) | (Byte0 & 0x3f));
    226     UnicodeByte1  = (UINT8) ((Byte1 >> 2) & 0x07);
    227     *UnicodeChar  = (UINT16) (UnicodeByte0 | (UnicodeByte1 << 8));
    228     break;
    229 
    230   case 3:
    231     //
    232     // three-byte utf8 code
    233     //
    234     Byte0         = Utf8Char.Utf8_3[0];
    235     Byte1         = Utf8Char.Utf8_3[1];
    236     Byte2         = Utf8Char.Utf8_3[2];
    237 
    238     UnicodeByte0  = (UINT8) ((Byte1 << 6) | (Byte0 & 0x3f));
    239     UnicodeByte1  = (UINT8) ((Byte2 << 4) | ((Byte1 >> 2) & 0x0f));
    240     *UnicodeChar  = (UINT16) (UnicodeByte0 | (UnicodeByte1 << 8));
    241 
    242   default:
    243     break;
    244   }
    245 
    246   return ;
    247 }
    248 
    249 /**
    250   Translate one Unicode character into VT-UTF8 characters.
    251 
    252   UTF8 Encoding Table
    253   Bits per Character | Unicode Character Range | Unicode Binary  Encoding |	UTF8 Binary Encoding
    254         0-7	         |     0x0000 - 0x007F	   |     00000000 0xxxxxxx	  |   0xxxxxxx
    255         8-11 	       |     0x0080 - 0x07FF	   |     00000xxx xxxxxxxx 	  |   110xxxxx 10xxxxxx
    256        12-16	       |     0x0800 - 0xFFFF	   |     xxxxxxxx xxxxxxxx	  |   1110xxxx 10xxxxxx 10xxxxxx
    257 
    258 
    259   @param  Unicode          Unicode character need translating.
    260   @param  Utf8Char         Return VT-UTF8 character set.
    261   @param  ValidBytes       The count of valid VT-UTF8 characters. If
    262                            ValidBytes is zero, no valid VT-UTF8 returned.
    263 
    264 **/
    265 VOID
    266 UnicodeToUtf8 (
    267   IN  CHAR16      Unicode,
    268   OUT UTF8_CHAR   *Utf8Char,
    269   OUT UINT8       *ValidBytes
    270   )
    271 {
    272   UINT8 UnicodeByte0;
    273   UINT8 UnicodeByte1;
    274   //
    275   // translate unicode to utf8 code
    276   //
    277   UnicodeByte0  = (UINT8) Unicode;
    278   UnicodeByte1  = (UINT8) (Unicode >> 8);
    279 
    280   if (Unicode < 0x0080) {
    281 
    282     Utf8Char->Utf8_1  = (UINT8) (UnicodeByte0 & 0x7f);
    283     *ValidBytes       = 1;
    284 
    285   } else if (Unicode < 0x0800) {
    286     //
    287     // byte sequence: high -> low
    288     //                Utf8_2[0], Utf8_2[1]
    289     //
    290     Utf8Char->Utf8_2[1] = (UINT8) ((UnicodeByte0 & 0x3f) + 0x80);
    291     Utf8Char->Utf8_2[0] = (UINT8) ((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x1f) + 0xc0);
    292 
    293     *ValidBytes         = 2;
    294 
    295   } else {
    296     //
    297     // byte sequence: high -> low
    298     //                Utf8_3[0], Utf8_3[1], Utf8_3[2]
    299     //
    300     Utf8Char->Utf8_3[2] = (UINT8) ((UnicodeByte0 & 0x3f) + 0x80);
    301     Utf8Char->Utf8_3[1] = (UINT8) ((((UnicodeByte1 << 2) + (UnicodeByte0 >> 6)) & 0x3f) + 0x80);
    302     Utf8Char->Utf8_3[0] = (UINT8) (((UnicodeByte1 >> 4) & 0x0f) + 0xe0);
    303 
    304     *ValidBytes         = 3;
    305   }
    306 }
    307 
    308 
    309 /**
    310   Check if input string is valid VT-UTF8 string.
    311 
    312   @param  TerminalDevice          The terminal device.
    313   @param  WString                 The input string.
    314 
    315   @retval EFI_SUCCESS             If all input characters are valid.
    316 
    317 **/
    318 EFI_STATUS
    319 VTUTF8TestString (
    320   IN  TERMINAL_DEV    *TerminalDevice,
    321   IN  CHAR16          *WString
    322   )
    323 {
    324   //
    325   // to utf8, all kind of characters are supported.
    326   //
    327   return EFI_SUCCESS;
    328 }
    329