1 /** @file 2 Language Library implementation that provides functions for language conversion 3 between ISO 639-2 and RFC 4646 language codes. 4 5 Copyright (c) 2009 - 2010, Intel Corporation. All rights reserved.<BR> 6 This program and the accompanying materials 7 are licensed and made available under the terms and conditions of the BSD License 8 which accompanies this distribution. The full text of the license may be found at 9 http://opensource.org/licenses/bsd-license.php 10 11 THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS" BASIS, 12 WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED. 13 14 **/ 15 16 #include <Uefi.h> 17 18 #include <Library/LanguageLib.h> 19 20 #include <Library/BaseLib.h> 21 #include <Library/DebugLib.h> 22 #include <Library/MemoryAllocationLib.h> 23 24 // 25 // Lookup table of ISO639-2 3 character language codes to ISO 639-1 2 character language codes 26 // Each entry is 5 CHAR8 values long. The first 3 CHAR8 values are the ISO 639-2 code. 27 // The last 2 CHAR8 values are the ISO 639-1 code. 28 // 29 // ISO 639-2 B codes and deprecated ISO 639-1 codes are not supported. 30 // 31 // Commonly used language codes such as English and French are put in the front of the table for quick match. 32 // 33 GLOBAL_REMOVE_IF_UNREFERENCED CONST CHAR8 mIso639ToRfc4646ConversionTable[] = 34 "\ 35 engen\ 36 frafr\ 37 aaraa\ 38 abkab\ 39 aveae\ 40 afraf\ 41 akaak\ 42 amham\ 43 argan\ 44 araar\ 45 asmas\ 46 avaav\ 47 aymay\ 48 azeaz\ 49 bakba\ 50 belbe\ 51 bulbg\ 52 bihbh\ 53 bisbi\ 54 bambm\ 55 benbn\ 56 bodbo\ 57 brebr\ 58 bosbs\ 59 catca\ 60 chece\ 61 chach\ 62 cosco\ 63 crecr\ 64 cescs\ 65 chucu\ 66 chvcv\ 67 cymcy\ 68 danda\ 69 deude\ 70 divdv\ 71 dzodz\ 72 eweee\ 73 ellel\ 74 epoeo\ 75 spaes\ 76 estet\ 77 euseu\ 78 fasfa\ 79 fulff\ 80 finfi\ 81 fijfj\ 82 faofo\ 83 fryfy\ 84 glega\ 85 glagd\ 86 glggl\ 87 grngn\ 88 gujgu\ 89 glvgv\ 90 hauha\ 91 hebhe\ 92 hinhi\ 93 hmoho\ 94 hrvhr\ 95 hatht\ 96 hunhu\ 97 hyehy\ 98 herhz\ 99 inaia\ 100 indid\ 101 ileie\ 102 iboig\ 103 iiiii\ 104 ipkik\ 105 idoio\ 106 islis\ 107 itait\ 108 ikuiu\ 109 jpnja\ 110 javjv\ 111 katka\ 112 konkg\ 113 kikki\ 114 kuakj\ 115 kazkk\ 116 kalkl\ 117 khmkm\ 118 kankn\ 119 korko\ 120 kaukr\ 121 kasks\ 122 kurku\ 123 komkv\ 124 corkw\ 125 kirky\ 126 latla\ 127 ltzlb\ 128 luglg\ 129 limli\ 130 linln\ 131 laolo\ 132 litlt\ 133 lublu\ 134 lavlv\ 135 mlgmg\ 136 mahmh\ 137 mrimi\ 138 mkdmk\ 139 malml\ 140 monmn\ 141 marmr\ 142 msams\ 143 mltmt\ 144 myamy\ 145 nauna\ 146 nobnb\ 147 ndend\ 148 nepne\ 149 ndong\ 150 nldnl\ 151 nnonn\ 152 norno\ 153 nblnr\ 154 navnv\ 155 nyany\ 156 ocioc\ 157 ojioj\ 158 ormom\ 159 orior\ 160 ossos\ 161 panpa\ 162 plipi\ 163 polpl\ 164 pusps\ 165 porpt\ 166 quequ\ 167 rohrm\ 168 runrn\ 169 ronro\ 170 rusru\ 171 kinrw\ 172 sansa\ 173 srdsc\ 174 sndsd\ 175 smese\ 176 sagsg\ 177 sinsi\ 178 slksk\ 179 slvsl\ 180 smosm\ 181 snasn\ 182 somso\ 183 sqisq\ 184 srpsr\ 185 sswss\ 186 sotst\ 187 sunsu\ 188 swesv\ 189 swasw\ 190 tamta\ 191 telte\ 192 tgktg\ 193 thath\ 194 tirti\ 195 tuktk\ 196 tgltl\ 197 tsntn\ 198 tonto\ 199 turtr\ 200 tsots\ 201 tattt\ 202 twitw\ 203 tahty\ 204 uigug\ 205 ukruk\ 206 urdur\ 207 uzbuz\ 208 venve\ 209 vievi\ 210 volvo\ 211 wlnwa\ 212 wolwo\ 213 xhoxh\ 214 yidyi\ 215 yoryo\ 216 zhaza\ 217 zhozh\ 218 zulzu\ 219 "; 220 221 /** 222 Converts upper case ASCII characters in an ASCII string to lower case ASCII 223 characters in an ASCII string. 224 225 If a an ASCII character in Source is in the range 'A'..'Z', then it is converted 226 to an ASCII character in the range 'a'..'z' in Destination. Otherwise, no 227 conversion is performed. Length ASCII characters from Source are convertered and 228 stored in Destination. 229 230 @param Destination An ASCII string to store the results of the conversion. 231 @param Source The source ASCII string of the conversion. 232 @param Length The number of ASCII characters to convert. 233 234 **/ 235 VOID 236 EFIAPI 237 InternalLanguageLibToLower ( 238 OUT CHAR8 *Destination, 239 IN CONST CHAR8 *Source, 240 IN UINTN Length 241 ) 242 { 243 for (; Length > 0; Length--, Destination++, Source++) { 244 *Destination = (CHAR8)((*Source >= 'A' && *Source <= 'Z') ? *Source + ('a' - 'A') : *Source); 245 } 246 } 247 248 /** 249 Convert an ISO 639-2 language code to a RFC 4646 language code. 250 If the ISO 639-2 language code has a corresponding ISO 639-1 code, then the ISO 639-1 251 code is returned. Else the original ISO 639-2 code is returned. The returned RFC 4646 252 language code is composed of only a primary language subtag. 253 254 If Iso639Language is NULL, then ASSERT. 255 If Rfc4646Language is NULL, then ASSERT. 256 257 @param[out] Rfc4646Language Pointers to a buffer large enough for an ASCII string 258 which reprsents a RFC 4646 language code containging only 259 either a ISO 639-1 or ISO 639-2 primary language subtag. 260 This string is Null-terminated. 261 @param[in] Iso639Language Pointer to a 3-letter ASCII string which represents 262 an ISO 639-2 language code. This string is not required 263 to be Null-terminated. 264 265 @retval TRUE The ISO 639-2 language code was converted to a ISO 639-1 code. 266 @retval FALSE The language code does not have corresponding ISO 639-1 code. 267 268 **/ 269 BOOLEAN 270 EFIAPI 271 ConvertIso639ToRfc4646 ( 272 OUT CHAR8 *Rfc4646Language, 273 IN CONST CHAR8 *Iso639Language 274 ) 275 { 276 CONST CHAR8 *Match; 277 278 ASSERT (Iso639Language != NULL); 279 ASSERT (Rfc4646Language != NULL); 280 281 // 282 // Convert first 3 characters of Iso639Language to lower case ASCII characters in Rfc4646Language 283 // 284 InternalLanguageLibToLower (Rfc4646Language, Iso639Language, 3); 285 Rfc4646Language[3] = '\0'; 286 287 Match = mIso639ToRfc4646ConversionTable; 288 do { 289 Match = AsciiStrStr (Match, Rfc4646Language); 290 if (Match == NULL) { 291 return FALSE; 292 } 293 if (((Match - mIso639ToRfc4646ConversionTable) % 5) == 0) { 294 break; 295 } 296 ++Match; 297 } while (TRUE); 298 Rfc4646Language[0] = Match[3]; 299 Rfc4646Language[1] = Match[4]; 300 Rfc4646Language[2] = '\0'; 301 return TRUE; 302 } 303 304 /** 305 Convert a RFC 4646 language code to an ISO 639-2 language code. The primary language 306 subtag of the RFC 4646 code must be either an ISO 639-1 or 639-2 code. If the primary 307 language subtag is an ISO 639-1 code, then it is converted to its corresponding ISO 639-2 308 code (T code if applies). Else the ISO 639-2 code is returned. 309 310 If Rfc4646Language is NULL, then ASSERT. 311 If Iso639Language is NULL, then ASSERT. 312 313 @param[out] Iso639Language Pointers to a buffer large enough for a 3-letter ASCII string 314 which reprsents an ISO 639-2 language code. The string is Null-terminated. 315 @param[in] Rfc4646Language Pointer to a RFC 4646 language code string. This string is terminated 316 by a NULL or a ';' character. 317 318 @retval TRUE Language code converted successfully. 319 @retval FALSE The RFC 4646 language code is invalid or unsupported. 320 321 **/ 322 BOOLEAN 323 EFIAPI 324 ConvertRfc4646ToIso639 ( 325 OUT CHAR8 *Iso639Language, 326 IN CONST CHAR8 *Rfc4646Language 327 ) 328 { 329 CONST CHAR8 *Match; 330 331 ASSERT (Rfc4646Language != NULL); 332 ASSERT (Iso639Language != NULL); 333 334 // 335 // RFC 4646 language code check before determining 336 // if the primary language subtag is ISO 639-1 or 639-2 code 337 // 338 if (Rfc4646Language[0] == '\0' || Rfc4646Language[1] == '\0') { 339 return FALSE; 340 } 341 342 // 343 // Check if the primary language subtag is ISO 639-1 code 344 // 345 if (Rfc4646Language[2] == ';' || Rfc4646Language[2] == '-' || Rfc4646Language[2] == '\0') { 346 // 347 // Convert first 2 characters of Rfc4646Language to lower case ASCII characters in Iso639Language 348 // 349 InternalLanguageLibToLower (Iso639Language, Rfc4646Language, 2); 350 // 351 // Convert ISO 639-1 code to ISO 639-2 code 352 // 353 Iso639Language[2] = '\0'; 354 Match = mIso639ToRfc4646ConversionTable; 355 do { 356 Match = AsciiStrStr (Match, Iso639Language); 357 if (Match == NULL) { 358 return FALSE; 359 } 360 if (((Match - mIso639ToRfc4646ConversionTable) % 5) == 3) { 361 break; 362 } 363 ++Match; 364 } while (TRUE); 365 Rfc4646Language = Match - 3; 366 } else if (!(Rfc4646Language[3] == ';' || Rfc4646Language[3] == '-' || Rfc4646Language[3] == '\0')) { 367 return FALSE; 368 } 369 Iso639Language[0] = Rfc4646Language[0]; 370 Iso639Language[1] = Rfc4646Language[1]; 371 Iso639Language[2] = Rfc4646Language[2]; 372 Iso639Language[3] = '\0'; 373 return TRUE; 374 } 375 376 /** 377 Convert ISO 639-2 language codes to RFC 4646 codes and return the converted codes. 378 Caller is responsible for freeing the allocated buffer. 379 380 If Iso639Languages is NULL, then ASSERT. 381 382 @param[in] Iso639Languages Pointers to a Null-terminated ISO 639-2 language codes string containing 383 one or more ISO 639-2 3-letter language codes. 384 385 @retval NULL Invalid ISO 639-2 language code found. 386 @retval NULL Out of memory. 387 @return Pointer to the allocate buffer containing the Null-terminated converted language codes string. 388 This string is composed of one or more RFC4646 language codes each of which has only 389 ISO 639-1 2-letter primary language subtag. 390 391 **/ 392 CHAR8 * 393 EFIAPI 394 ConvertLanguagesIso639ToRfc4646 ( 395 IN CONST CHAR8 *Iso639Languages 396 ) 397 { 398 UINTN Length; 399 UINTN Iso639Index; 400 UINTN Rfc4646Index; 401 CHAR8 *Rfc4646Languages; 402 403 ASSERT (Iso639Languages != NULL); 404 405 // 406 // The length of ISO 639-2 lanugage codes string must be multiple of 3 407 // 408 Length = AsciiStrLen (Iso639Languages); 409 if (Length % 3 != 0) { 410 return NULL; 411 } 412 413 // 414 // Allocate buffer for RFC 4646 language codes string 415 // 416 Rfc4646Languages = AllocatePool (Length + (Length / 3)); 417 if (Rfc4646Languages == NULL) { 418 return NULL; 419 } 420 421 for (Iso639Index = 0, Rfc4646Index = 0; Iso639Languages[Iso639Index] != '\0'; Iso639Index += 3) { 422 if (ConvertIso639ToRfc4646 (&Rfc4646Languages[Rfc4646Index], &Iso639Languages[Iso639Index])) { 423 Rfc4646Index += 2; 424 } else { 425 Rfc4646Index += 3; 426 } 427 Rfc4646Languages[Rfc4646Index++] = ';'; 428 } 429 Rfc4646Languages[Rfc4646Index - 1] = '\0'; 430 return Rfc4646Languages; 431 } 432 433 /** 434 Convert RFC 4646 language codes to ISO 639-2 codes and return the converted codes. 435 The primary language subtag of the RFC 4646 code must be either an ISO 639-1 or 639-2 code. 436 Caller is responsible for freeing the allocated buffer. 437 438 If Rfc4646Languages is NULL, then ASSERT. 439 440 @param[in] Rfc4646Languages Pointers to a Null-terminated RFC 4646 language codes string containing 441 one or more RFC 4646 language codes. 442 443 @retval NULL Invalid or unsupported RFC 4646 language code found. 444 @retval NULL Out of memory. 445 @return Pointer to the allocate buffer containing the Null-terminated converted language codes string. 446 This string is composed of one or more ISO 639-2 language codes. 447 448 **/ 449 CHAR8 * 450 EFIAPI 451 ConvertLanguagesRfc4646ToIso639 ( 452 IN CONST CHAR8 *Rfc4646Languages 453 ) 454 { 455 UINTN NumLanguages; 456 UINTN Iso639Index; 457 UINTN Rfc4646Index; 458 CHAR8 *Iso639Languages; 459 460 ASSERT (Rfc4646Languages != NULL); 461 462 // 463 // Determine the number of languages in the RFC 4646 language codes string 464 // 465 for (Rfc4646Index = 0, NumLanguages = 1; Rfc4646Languages[Rfc4646Index] != '\0'; Rfc4646Index++) { 466 if (Rfc4646Languages[Rfc4646Index] == ';') { 467 NumLanguages++; 468 } 469 } 470 471 // 472 // Allocate buffer for ISO 639-2 language codes string 473 // 474 Iso639Languages = AllocateZeroPool (NumLanguages * 3 + 1); 475 if (Iso639Languages == NULL) { 476 return NULL; 477 } 478 479 // 480 // Do the conversion for each RFC 4646 language code 481 // 482 for (Rfc4646Index = 0, Iso639Index = 0; Rfc4646Languages[Rfc4646Index] != '\0';) { 483 if (ConvertRfc4646ToIso639 (&Iso639Languages[Iso639Index], &Rfc4646Languages[Rfc4646Index])) { 484 Iso639Index += 3; 485 } else { 486 FreePool (Iso639Languages); 487 return NULL; 488 } 489 // 490 // Locate next language code 491 // 492 while (Rfc4646Languages[Rfc4646Index] != ';' && Rfc4646Languages[Rfc4646Index] != '\0') { 493 Rfc4646Index++; 494 } 495 if (Rfc4646Languages[Rfc4646Index] == ';') { 496 Rfc4646Index++; 497 } 498 } 499 Iso639Languages[Iso639Index] = '\0'; 500 return Iso639Languages; 501 } 502