Home | History | Annotate | Download | only in UefiLanguageLib
      1 /** @file
      2   Language Library implementation that provides functions for language conversion
      3   between ISO 639-2 and RFC 4646 language codes.
      4 
      5   Copyright (c) 2009 - 2010, Intel Corporation. All rights reserved.<BR>
      6   This program and the accompanying materials
      7   are licensed and made available under the terms and conditions of the BSD License
      8   which accompanies this distribution.  The full text of the license may be found at
      9   http://opensource.org/licenses/bsd-license.php
     10 
     11   THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS,
     12   WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED.
     13 
     14 **/
     15 
     16 #include <Uefi.h>
     17 
     18 #include <Library/LanguageLib.h>
     19 
     20 #include <Library/BaseLib.h>
     21 #include <Library/DebugLib.h>
     22 #include <Library/MemoryAllocationLib.h>
     23 
     24 //
     25 // Lookup table of ISO639-2 3 character language codes to ISO 639-1 2 character language codes
     26 // Each entry is 5 CHAR8 values long.  The first 3 CHAR8 values are the ISO 639-2 code.
     27 // The last 2 CHAR8 values are the ISO 639-1 code.
     28 //
     29 // ISO 639-2 B codes and deprecated ISO 639-1 codes are not supported.
     30 //
     31 // Commonly used language codes such as English and French are put in the front of the table for quick match.
     32 //
     33 GLOBAL_REMOVE_IF_UNREFERENCED CONST CHAR8 mIso639ToRfc4646ConversionTable[] =
     34 "\
     35 engen\
     36 frafr\
     37 aaraa\
     38 abkab\
     39 aveae\
     40 afraf\
     41 akaak\
     42 amham\
     43 argan\
     44 araar\
     45 asmas\
     46 avaav\
     47 aymay\
     48 azeaz\
     49 bakba\
     50 belbe\
     51 bulbg\
     52 bihbh\
     53 bisbi\
     54 bambm\
     55 benbn\
     56 bodbo\
     57 brebr\
     58 bosbs\
     59 catca\
     60 chece\
     61 chach\
     62 cosco\
     63 crecr\
     64 cescs\
     65 chucu\
     66 chvcv\
     67 cymcy\
     68 danda\
     69 deude\
     70 divdv\
     71 dzodz\
     72 eweee\
     73 ellel\
     74 epoeo\
     75 spaes\
     76 estet\
     77 euseu\
     78 fasfa\
     79 fulff\
     80 finfi\
     81 fijfj\
     82 faofo\
     83 fryfy\
     84 glega\
     85 glagd\
     86 glggl\
     87 grngn\
     88 gujgu\
     89 glvgv\
     90 hauha\
     91 hebhe\
     92 hinhi\
     93 hmoho\
     94 hrvhr\
     95 hatht\
     96 hunhu\
     97 hyehy\
     98 herhz\
     99 inaia\
    100 indid\
    101 ileie\
    102 iboig\
    103 iiiii\
    104 ipkik\
    105 idoio\
    106 islis\
    107 itait\
    108 ikuiu\
    109 jpnja\
    110 javjv\
    111 katka\
    112 konkg\
    113 kikki\
    114 kuakj\
    115 kazkk\
    116 kalkl\
    117 khmkm\
    118 kankn\
    119 korko\
    120 kaukr\
    121 kasks\
    122 kurku\
    123 komkv\
    124 corkw\
    125 kirky\
    126 latla\
    127 ltzlb\
    128 luglg\
    129 limli\
    130 linln\
    131 laolo\
    132 litlt\
    133 lublu\
    134 lavlv\
    135 mlgmg\
    136 mahmh\
    137 mrimi\
    138 mkdmk\
    139 malml\
    140 monmn\
    141 marmr\
    142 msams\
    143 mltmt\
    144 myamy\
    145 nauna\
    146 nobnb\
    147 ndend\
    148 nepne\
    149 ndong\
    150 nldnl\
    151 nnonn\
    152 norno\
    153 nblnr\
    154 navnv\
    155 nyany\
    156 ocioc\
    157 ojioj\
    158 ormom\
    159 orior\
    160 ossos\
    161 panpa\
    162 plipi\
    163 polpl\
    164 pusps\
    165 porpt\
    166 quequ\
    167 rohrm\
    168 runrn\
    169 ronro\
    170 rusru\
    171 kinrw\
    172 sansa\
    173 srdsc\
    174 sndsd\
    175 smese\
    176 sagsg\
    177 sinsi\
    178 slksk\
    179 slvsl\
    180 smosm\
    181 snasn\
    182 somso\
    183 sqisq\
    184 srpsr\
    185 sswss\
    186 sotst\
    187 sunsu\
    188 swesv\
    189 swasw\
    190 tamta\
    191 telte\
    192 tgktg\
    193 thath\
    194 tirti\
    195 tuktk\
    196 tgltl\
    197 tsntn\
    198 tonto\
    199 turtr\
    200 tsots\
    201 tattt\
    202 twitw\
    203 tahty\
    204 uigug\
    205 ukruk\
    206 urdur\
    207 uzbuz\
    208 venve\
    209 vievi\
    210 volvo\
    211 wlnwa\
    212 wolwo\
    213 xhoxh\
    214 yidyi\
    215 yoryo\
    216 zhaza\
    217 zhozh\
    218 zulzu\
    219 ";
    220 
    221 /**
    222   Converts upper case ASCII characters in an ASCII string to lower case ASCII
    223   characters in an ASCII string.
    224 
    225   If a an ASCII character in Source is in the range 'A'..'Z', then it is converted
    226   to an ASCII character in the range 'a'..'z' in Destination.  Otherwise, no
    227   conversion is performed.  Length ASCII characters from Source are convertered and
    228   stored in Destination.
    229 
    230   @param  Destination  An ASCII string to store the results of the conversion.
    231   @param  Source       The source ASCII string of the conversion.
    232   @param  Length       The number of ASCII characters to convert.
    233 
    234 **/
    235 VOID
    236 EFIAPI
    237 InternalLanguageLibToLower (
    238   OUT CHAR8        *Destination,
    239   IN  CONST CHAR8  *Source,
    240   IN  UINTN        Length
    241   )
    242 {
    243   for (; Length > 0; Length--, Destination++, Source++) {
    244     *Destination = (CHAR8)((*Source >= 'A' && *Source <= 'Z') ? *Source + ('a' - 'A') : *Source);
    245   }
    246 }
    247 
    248 /**
    249   Convert an ISO 639-2 language code to a RFC 4646 language code.
    250   If the ISO 639-2 language code has a corresponding ISO 639-1 code, then the ISO 639-1
    251   code is returned. Else the original ISO 639-2 code is returned. The returned RFC 4646
    252   language code is composed of only a primary language subtag.
    253 
    254   If Iso639Language is NULL, then ASSERT.
    255   If Rfc4646Language is NULL, then ASSERT.
    256 
    257   @param[out] Rfc4646Language  Pointers to a buffer large enough for an ASCII string
    258                                which reprsents a RFC 4646 language code containging only
    259                                either a ISO 639-1 or ISO 639-2 primary language subtag.
    260                                This string is Null-terminated.
    261   @param[in]  Iso639Language   Pointer to a 3-letter ASCII string which represents
    262                                an ISO 639-2 language code. This string is not required
    263                                to be Null-terminated.
    264 
    265   @retval TRUE                 The ISO 639-2 language code was converted to a ISO 639-1 code.
    266   @retval FALSE                The language code does not have corresponding ISO 639-1 code.
    267 
    268 **/
    269 BOOLEAN
    270 EFIAPI
    271 ConvertIso639ToRfc4646 (
    272   OUT CHAR8        *Rfc4646Language,
    273   IN  CONST CHAR8  *Iso639Language
    274   )
    275 {
    276   CONST CHAR8  *Match;
    277 
    278   ASSERT (Iso639Language != NULL);
    279   ASSERT (Rfc4646Language != NULL);
    280 
    281   //
    282   // Convert first 3 characters of Iso639Language to lower case ASCII characters in Rfc4646Language
    283   //
    284   InternalLanguageLibToLower (Rfc4646Language, Iso639Language, 3);
    285   Rfc4646Language[3] = '\0';
    286 
    287   Match = mIso639ToRfc4646ConversionTable;
    288   do {
    289     Match = AsciiStrStr (Match, Rfc4646Language);
    290     if (Match == NULL) {
    291       return FALSE;
    292     }
    293     if (((Match - mIso639ToRfc4646ConversionTable) % 5) == 0) {
    294       break;
    295     }
    296     ++Match;
    297   } while (TRUE);
    298   Rfc4646Language[0] = Match[3];
    299   Rfc4646Language[1] = Match[4];
    300   Rfc4646Language[2] = '\0';
    301   return TRUE;
    302 }
    303 
    304 /**
    305   Convert a RFC 4646 language code to an ISO 639-2 language code. The primary language
    306   subtag of the RFC 4646 code must be either an ISO 639-1 or 639-2 code. If the primary
    307   language subtag is an ISO 639-1 code, then it is converted to its corresponding ISO 639-2
    308   code (T code if applies). Else the ISO 639-2 code is returned.
    309 
    310   If Rfc4646Language is NULL, then ASSERT.
    311   If Iso639Language is NULL, then ASSERT.
    312 
    313   @param[out] Iso639Language   Pointers to a buffer large enough for a 3-letter ASCII string
    314                                which reprsents an ISO 639-2 language code. The string is Null-terminated.
    315   @param[in]  Rfc4646Language  Pointer to a RFC 4646 language code string. This string is terminated
    316                                by a NULL or a ';' character.
    317 
    318   @retval TRUE                 Language code converted successfully.
    319   @retval FALSE                The RFC 4646 language code is invalid or unsupported.
    320 
    321 **/
    322 BOOLEAN
    323 EFIAPI
    324 ConvertRfc4646ToIso639 (
    325   OUT CHAR8        *Iso639Language,
    326   IN  CONST CHAR8  *Rfc4646Language
    327   )
    328 {
    329   CONST CHAR8 *Match;
    330 
    331   ASSERT (Rfc4646Language != NULL);
    332   ASSERT (Iso639Language != NULL);
    333 
    334   //
    335   // RFC 4646 language code check before determining
    336   // if the primary language subtag is ISO 639-1 or 639-2 code
    337   //
    338   if (Rfc4646Language[0] == '\0' || Rfc4646Language[1] == '\0') {
    339     return FALSE;
    340   }
    341 
    342   //
    343   // Check if the primary language subtag is ISO 639-1 code
    344   //
    345   if (Rfc4646Language[2] == ';' || Rfc4646Language[2] == '-' || Rfc4646Language[2] == '\0') {
    346     //
    347     // Convert first 2 characters of Rfc4646Language to lower case ASCII characters in Iso639Language
    348     //
    349     InternalLanguageLibToLower (Iso639Language, Rfc4646Language, 2);
    350     //
    351     // Convert ISO 639-1 code to ISO 639-2 code
    352     //
    353     Iso639Language[2] = '\0';
    354     Match = mIso639ToRfc4646ConversionTable;
    355     do {
    356       Match = AsciiStrStr (Match, Iso639Language);
    357       if (Match == NULL) {
    358         return FALSE;
    359       }
    360       if (((Match - mIso639ToRfc4646ConversionTable) % 5) == 3) {
    361         break;
    362       }
    363       ++Match;
    364     } while (TRUE);
    365     Rfc4646Language = Match - 3;
    366   } else if (!(Rfc4646Language[3] == ';' || Rfc4646Language[3] == '-' || Rfc4646Language[3] == '\0')) {
    367     return FALSE;
    368   }
    369   Iso639Language[0] = Rfc4646Language[0];
    370   Iso639Language[1] = Rfc4646Language[1];
    371   Iso639Language[2] = Rfc4646Language[2];
    372   Iso639Language[3] = '\0';
    373   return TRUE;
    374 }
    375 
    376 /**
    377   Convert ISO 639-2 language codes to RFC 4646 codes and return the converted codes.
    378   Caller is responsible for freeing the allocated buffer.
    379 
    380   If Iso639Languages is NULL, then ASSERT.
    381 
    382   @param[in] Iso639Languages  Pointers to a Null-terminated ISO 639-2 language codes string containing
    383                               one or more ISO 639-2 3-letter language codes.
    384 
    385   @retval NULL                Invalid ISO 639-2 language code found.
    386   @retval NULL                Out of memory.
    387   @return                     Pointer to the allocate buffer containing the Null-terminated converted language codes string.
    388                               This string is composed of one or more RFC4646 language codes each of which has only
    389                               ISO 639-1 2-letter primary language subtag.
    390 
    391 **/
    392 CHAR8 *
    393 EFIAPI
    394 ConvertLanguagesIso639ToRfc4646 (
    395   IN CONST CHAR8  *Iso639Languages
    396   )
    397 {
    398   UINTN  Length;
    399   UINTN  Iso639Index;
    400   UINTN  Rfc4646Index;
    401   CHAR8  *Rfc4646Languages;
    402 
    403   ASSERT (Iso639Languages != NULL);
    404 
    405   //
    406   // The length of ISO 639-2 lanugage codes string must be multiple of 3
    407   //
    408   Length = AsciiStrLen (Iso639Languages);
    409   if (Length % 3 != 0) {
    410     return NULL;
    411   }
    412 
    413   //
    414   // Allocate buffer for RFC 4646 language codes string
    415   //
    416   Rfc4646Languages = AllocatePool (Length + (Length / 3));
    417   if (Rfc4646Languages == NULL) {
    418     return NULL;
    419   }
    420 
    421   for (Iso639Index = 0, Rfc4646Index = 0; Iso639Languages[Iso639Index] != '\0'; Iso639Index += 3) {
    422     if (ConvertIso639ToRfc4646 (&Rfc4646Languages[Rfc4646Index], &Iso639Languages[Iso639Index])) {
    423       Rfc4646Index += 2;
    424     } else {
    425       Rfc4646Index += 3;
    426     }
    427     Rfc4646Languages[Rfc4646Index++] = ';';
    428   }
    429   Rfc4646Languages[Rfc4646Index - 1] = '\0';
    430   return Rfc4646Languages;
    431 }
    432 
    433 /**
    434   Convert RFC 4646 language codes to ISO 639-2 codes and return the converted codes.
    435   The primary language subtag of the RFC 4646 code must be either an ISO 639-1 or 639-2 code.
    436   Caller is responsible for freeing the allocated buffer.
    437 
    438   If Rfc4646Languages is NULL, then ASSERT.
    439 
    440   @param[in] Rfc4646Languages  Pointers to a Null-terminated RFC 4646 language codes string containing
    441                                one or more RFC 4646 language codes.
    442 
    443   @retval NULL                 Invalid or unsupported RFC 4646 language code found.
    444   @retval NULL                 Out of memory.
    445   @return                      Pointer to the allocate buffer containing the Null-terminated converted language codes string.
    446                                This string is composed of one or more ISO 639-2 language codes.
    447 
    448 **/
    449 CHAR8 *
    450 EFIAPI
    451 ConvertLanguagesRfc4646ToIso639 (
    452   IN CONST CHAR8  *Rfc4646Languages
    453   )
    454 {
    455   UINTN  NumLanguages;
    456   UINTN  Iso639Index;
    457   UINTN  Rfc4646Index;
    458   CHAR8  *Iso639Languages;
    459 
    460   ASSERT (Rfc4646Languages != NULL);
    461 
    462   //
    463   // Determine the number of languages in the RFC 4646 language codes string
    464   //
    465   for (Rfc4646Index = 0, NumLanguages = 1; Rfc4646Languages[Rfc4646Index] != '\0'; Rfc4646Index++) {
    466     if (Rfc4646Languages[Rfc4646Index] == ';') {
    467       NumLanguages++;
    468     }
    469   }
    470 
    471   //
    472   // Allocate buffer for ISO 639-2 language codes string
    473   //
    474   Iso639Languages = AllocateZeroPool (NumLanguages * 3 + 1);
    475   if (Iso639Languages == NULL) {
    476     return NULL;
    477   }
    478 
    479   //
    480   // Do the conversion for each RFC 4646 language code
    481   //
    482   for (Rfc4646Index = 0, Iso639Index = 0; Rfc4646Languages[Rfc4646Index] != '\0';) {
    483     if (ConvertRfc4646ToIso639 (&Iso639Languages[Iso639Index], &Rfc4646Languages[Rfc4646Index])) {
    484       Iso639Index += 3;
    485     } else {
    486       FreePool (Iso639Languages);
    487       return NULL;
    488     }
    489     //
    490     // Locate next language code
    491     //
    492     while (Rfc4646Languages[Rfc4646Index] != ';' && Rfc4646Languages[Rfc4646Index] != '\0') {
    493       Rfc4646Index++;
    494     }
    495     if (Rfc4646Languages[Rfc4646Index] == ';') {
    496       Rfc4646Index++;
    497     }
    498   }
    499   Iso639Languages[Iso639Index] = '\0';
    500   return Iso639Languages;
    501 }
    502