1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 // 5 // This file extends lang_enc.h with additional languages and extended routines. 6 // It is current with Unicode 5.1 (March 2008) 7 // 8 9 #ifndef ENCODINGS_COMPACT_LANG_DET_EXT_LANG_ENC_H__ 10 #define ENCODINGS_COMPACT_LANG_DET_EXT_LANG_ENC_H__ 11 12 #include "languages/public/languages.h" 13 #include "encodings/compact_lang_det/letterscript_enum.h" 14 15 16 // Leave a small gap after the base languages, so adding one or two is easy. 17 // Just reduce the gap here (currently 5 entries) 18 19 // Montengrin added, so reducing this from 5 to 4. dsites 2008.10.06 20 #define EXT_LANGUAGE_BASE (NUM_LANGUAGES + 4) 21 22 // Google UI languages 23 #define X_BORK_BORK_BORK (Language)(EXT_LANGUAGE_BASE+0) 24 #define X_PIG_LATIN (Language)(EXT_LANGUAGE_BASE+1) 25 #define X_HACKER (Language)(EXT_LANGUAGE_BASE+2) 26 #define X_KLINGON (Language)(EXT_LANGUAGE_BASE+3) 27 #define X_ELMER_FUDD (Language)(EXT_LANGUAGE_BASE+4) 28 29 // Pseudo-languages for Unicode scripts that express a single language 30 #define X_OGHAM (Language)(EXT_LANGUAGE_BASE+5) 31 #define X_RUNIC (Language)(EXT_LANGUAGE_BASE+6) 32 #define X_YI (Language)(EXT_LANGUAGE_BASE+7) 33 #define X_OLD_ITALIC (Language)(EXT_LANGUAGE_BASE+8) 34 #define X_GOTHIC (Language)(EXT_LANGUAGE_BASE+9) 35 #define X_DESERET (Language)(EXT_LANGUAGE_BASE+10) 36 #define X_HANUNOO (Language)(EXT_LANGUAGE_BASE+11) 37 #define X_BUHID (Language)(EXT_LANGUAGE_BASE+12) 38 #define X_TAGBANWA (Language)(EXT_LANGUAGE_BASE+13) 39 #define X_TAI_LE (Language)(EXT_LANGUAGE_BASE+14) 40 #define X_LINEAR_B (Language)(EXT_LANGUAGE_BASE+15) 41 #define X_UGARITIC (Language)(EXT_LANGUAGE_BASE+16) 42 #define X_SHAVIAN (Language)(EXT_LANGUAGE_BASE+17) 43 #define X_OSMANYA (Language)(EXT_LANGUAGE_BASE+18) 44 #define X_CYPRIOT (Language)(EXT_LANGUAGE_BASE+19) 45 #define X_BUGINESE (Language)(EXT_LANGUAGE_BASE+20) 46 #define X_COPTIC (Language)(EXT_LANGUAGE_BASE+21) 47 #define X_NEW_TAI_LUE (Language)(EXT_LANGUAGE_BASE+22) 48 #define X_GLAGOLITIC (Language)(EXT_LANGUAGE_BASE+23) 49 #define X_TIFINAGH (Language)(EXT_LANGUAGE_BASE+24) 50 #define X_SYLOTI_NAGRI (Language)(EXT_LANGUAGE_BASE+25) 51 #define X_OLD_PERSIAN (Language)(EXT_LANGUAGE_BASE+26) 52 #define X_KHAROSHTHI (Language)(EXT_LANGUAGE_BASE+27) 53 #define X_BALINESE (Language)(EXT_LANGUAGE_BASE+28) 54 #define X_CUNEIFORM (Language)(EXT_LANGUAGE_BASE+29) 55 #define X_PHOENICIAN (Language)(EXT_LANGUAGE_BASE+30) 56 #define X_PHAGS_PA (Language)(EXT_LANGUAGE_BASE+31) 57 #define X_NKO (Language)(EXT_LANGUAGE_BASE+32) 58 59 // Unicode 5.1 60 #define X_SUDANESE (Language)(EXT_LANGUAGE_BASE+33) 61 #define X_LEPCHA (Language)(EXT_LANGUAGE_BASE+34) 62 #define X_OL_CHIKI (Language)(EXT_LANGUAGE_BASE+35) 63 #define X_VAI (Language)(EXT_LANGUAGE_BASE+36) 64 #define X_SAURASHTRA (Language)(EXT_LANGUAGE_BASE+37) 65 #define X_KAYAH_LI (Language)(EXT_LANGUAGE_BASE+38) 66 #define X_REJANG (Language)(EXT_LANGUAGE_BASE+39) 67 #define X_LYCIAN (Language)(EXT_LANGUAGE_BASE+40) 68 #define X_CARIAN (Language)(EXT_LANGUAGE_BASE+41) 69 #define X_LYDIAN (Language)(EXT_LANGUAGE_BASE+42) 70 #define X_CHAM (Language)(EXT_LANGUAGE_BASE+43) 71 72 #define EXT_NUM_LANGUAGES (Language)(EXT_LANGUAGE_BASE+44) 73 74 75 76 // ExtLanguageName 77 // ------------ 78 // Given the Language, returns its string name used as the output by 79 // the lang/enc identifier, e.g. "Korean" 80 // "invalid_language" if the input is invalid. 81 extern const char* ExtLanguageName(const Language lang); 82 83 // ExtLanguageDeclaredName 84 // ------------ 85 // Given the Language, returns its Language enum spelling, for use by 86 // programs that create C declarations, e.g. "KOREAN" 87 // "UNKNOWN_LANGUAGE" if the input is invalid. 88 extern const char* ExtLanguageDeclaredName(const Language lang); 89 90 // ExtLanguageCode 91 // ------------ 92 // Given the Language, return the language code, e.g. "ko" 93 // This is determined by 94 // the following (in order of preference): 95 // - ISO-639-1 two-letter language code 96 // (all except those mentioned below) 97 // - ISO-639-2 three-letter bibliographic language code 98 // (Tibetan, Dhivehi, Cherokee, Syriac) 99 // - Google-specific language code 100 // (ChineseT ("zh-TW"), Teragram Unknown, Unknown, 101 // Portuguese-Portugal, Portuguese-Brazil, Limbu) 102 extern const char * ExtLanguageCode(const Language lang); 103 104 105 // Convert "en-Latn-GB" to ENGLISH 106 // Normalize to PORTUGUESE, not PORTUGUESE_B nor PORTUGUESE_P 107 // Consider for later: NORWEGIAN, NORWEGIAN_N 108 // Consider for later: SCOTS, SCOTS_GAELIC 109 // Consider for later: SERBO_CROATIAN, SERBIAN, CROATIAN, BOSNIAN 110 // 111 Language GetLanguageFromNumberOrName(const char* src); 112 113 // Convert "en-Latn-GB" to ULScript_Latin 114 UnicodeLScript GetLScriptFromNumberOrName(const char* src); 115 116 // Merge together some languages, such as bo/hr/sr 117 Language NormalizeLanguage(Language lang); 118 119 #endif // ENCODINGS_COMPACT_LANG_DET_EXT_LANG_ENC_H__ 120