Home | History | Annotate | Download | only in compact_lang_det
      1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 //
      5 // This file extends lang_enc.h with additional languages and extended routines.
      6 // It is current with Unicode 5.1 (March 2008)
      7 //
      8 
      9 #ifndef ENCODINGS_COMPACT_LANG_DET_EXT_LANG_ENC_H__
     10 #define ENCODINGS_COMPACT_LANG_DET_EXT_LANG_ENC_H__
     11 
     12 #include "languages/public/languages.h"
     13 #include "encodings/compact_lang_det/letterscript_enum.h"
     14 
     15 
     16 // Leave a small gap after the base languages, so adding one or two is easy.
     17 // Just reduce the gap here (currently 5 entries)
     18 
     19 // Montengrin added, so reducing this from 5 to 4. dsites 2008.10.06
     20 #define EXT_LANGUAGE_BASE (NUM_LANGUAGES + 4)
     21 
     22 // Google UI languages
     23 #define X_BORK_BORK_BORK (Language)(EXT_LANGUAGE_BASE+0)
     24 #define X_PIG_LATIN (Language)(EXT_LANGUAGE_BASE+1)
     25 #define X_HACKER (Language)(EXT_LANGUAGE_BASE+2)
     26 #define X_KLINGON (Language)(EXT_LANGUAGE_BASE+3)
     27 #define X_ELMER_FUDD (Language)(EXT_LANGUAGE_BASE+4)
     28 
     29 // Pseudo-languages for Unicode scripts that express a single language
     30 #define X_OGHAM (Language)(EXT_LANGUAGE_BASE+5)
     31 #define X_RUNIC (Language)(EXT_LANGUAGE_BASE+6)
     32 #define X_YI (Language)(EXT_LANGUAGE_BASE+7)
     33 #define X_OLD_ITALIC (Language)(EXT_LANGUAGE_BASE+8)
     34 #define X_GOTHIC (Language)(EXT_LANGUAGE_BASE+9)
     35 #define X_DESERET (Language)(EXT_LANGUAGE_BASE+10)
     36 #define X_HANUNOO (Language)(EXT_LANGUAGE_BASE+11)
     37 #define X_BUHID (Language)(EXT_LANGUAGE_BASE+12)
     38 #define X_TAGBANWA (Language)(EXT_LANGUAGE_BASE+13)
     39 #define X_TAI_LE (Language)(EXT_LANGUAGE_BASE+14)
     40 #define X_LINEAR_B (Language)(EXT_LANGUAGE_BASE+15)
     41 #define X_UGARITIC (Language)(EXT_LANGUAGE_BASE+16)
     42 #define X_SHAVIAN (Language)(EXT_LANGUAGE_BASE+17)
     43 #define X_OSMANYA (Language)(EXT_LANGUAGE_BASE+18)
     44 #define X_CYPRIOT (Language)(EXT_LANGUAGE_BASE+19)
     45 #define X_BUGINESE (Language)(EXT_LANGUAGE_BASE+20)
     46 #define X_COPTIC (Language)(EXT_LANGUAGE_BASE+21)
     47 #define X_NEW_TAI_LUE (Language)(EXT_LANGUAGE_BASE+22)
     48 #define X_GLAGOLITIC (Language)(EXT_LANGUAGE_BASE+23)
     49 #define X_TIFINAGH (Language)(EXT_LANGUAGE_BASE+24)
     50 #define X_SYLOTI_NAGRI (Language)(EXT_LANGUAGE_BASE+25)
     51 #define X_OLD_PERSIAN (Language)(EXT_LANGUAGE_BASE+26)
     52 #define X_KHAROSHTHI (Language)(EXT_LANGUAGE_BASE+27)
     53 #define X_BALINESE (Language)(EXT_LANGUAGE_BASE+28)
     54 #define X_CUNEIFORM (Language)(EXT_LANGUAGE_BASE+29)
     55 #define X_PHOENICIAN (Language)(EXT_LANGUAGE_BASE+30)
     56 #define X_PHAGS_PA (Language)(EXT_LANGUAGE_BASE+31)
     57 #define X_NKO (Language)(EXT_LANGUAGE_BASE+32)
     58 
     59 // Unicode 5.1
     60 #define X_SUDANESE (Language)(EXT_LANGUAGE_BASE+33)
     61 #define X_LEPCHA (Language)(EXT_LANGUAGE_BASE+34)
     62 #define X_OL_CHIKI (Language)(EXT_LANGUAGE_BASE+35)
     63 #define X_VAI (Language)(EXT_LANGUAGE_BASE+36)
     64 #define X_SAURASHTRA (Language)(EXT_LANGUAGE_BASE+37)
     65 #define X_KAYAH_LI (Language)(EXT_LANGUAGE_BASE+38)
     66 #define X_REJANG (Language)(EXT_LANGUAGE_BASE+39)
     67 #define X_LYCIAN (Language)(EXT_LANGUAGE_BASE+40)
     68 #define X_CARIAN (Language)(EXT_LANGUAGE_BASE+41)
     69 #define X_LYDIAN (Language)(EXT_LANGUAGE_BASE+42)
     70 #define X_CHAM (Language)(EXT_LANGUAGE_BASE+43)
     71 
     72 #define EXT_NUM_LANGUAGES (Language)(EXT_LANGUAGE_BASE+44)
     73 
     74 
     75 
     76 // ExtLanguageName
     77 // ------------
     78 // Given the Language, returns its string name used as the output by
     79 // the lang/enc identifier, e.g. "Korean"
     80 // "invalid_language" if the input is invalid.
     81 extern const char* ExtLanguageName(const Language lang);
     82 
     83 // ExtLanguageDeclaredName
     84 // ------------
     85 // Given the Language, returns its Language enum spelling, for use by
     86 // programs that create C declarations, e.g. "KOREAN"
     87 // "UNKNOWN_LANGUAGE" if the input is invalid.
     88 extern const char* ExtLanguageDeclaredName(const Language lang);
     89 
     90 // ExtLanguageCode
     91 // ------------
     92 // Given the Language, return the language code, e.g. "ko"
     93 // This is determined by
     94 // the following (in order of preference):
     95 // - ISO-639-1 two-letter language code
     96 //   (all except those mentioned below)
     97 // - ISO-639-2 three-letter bibliographic language code
     98 //   (Tibetan, Dhivehi, Cherokee, Syriac)
     99 // - Google-specific language code
    100 //   (ChineseT ("zh-TW"), Teragram Unknown, Unknown,
    101 //   Portuguese-Portugal, Portuguese-Brazil, Limbu)
    102 extern const char * ExtLanguageCode(const Language lang);
    103 
    104 
    105 // Convert "en-Latn-GB" to ENGLISH
    106 // Normalize to PORTUGUESE, not PORTUGUESE_B nor PORTUGUESE_P
    107 // Consider for later: NORWEGIAN, NORWEGIAN_N
    108 // Consider for later: SCOTS, SCOTS_GAELIC
    109 // Consider for later: SERBO_CROATIAN, SERBIAN, CROATIAN, BOSNIAN
    110 //
    111 Language GetLanguageFromNumberOrName(const char* src);
    112 
    113 // Convert "en-Latn-GB" to ULScript_Latin
    114 UnicodeLScript GetLScriptFromNumberOrName(const char* src);
    115 
    116 // Merge together some languages, such as bo/hr/sr
    117 Language NormalizeLanguage(Language lang);
    118 
    119 #endif  // ENCODINGS_COMPACT_LANG_DET_EXT_LANG_ENC_H__
    120