Home | History | Annotate | Download | only in compact_lang_det
      1 // Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "encodings/compact_lang_det/letterscript_enum.h"
      6 
      7 #include "encodings/compact_lang_det/win/cld_logging.h"
      8 
      9 static const char* kUnicodeLScriptNames[ULScript_NUM_SCRIPTS] = {
     10   "Common",
     11   "Latin",
     12   "Greek",
     13   "Cyrillic",
     14   "Armenian",
     15   "Hebrew",
     16   "Arabic",
     17   "Syriac",
     18   "Thaana",
     19   "Devanagari",
     20   "Bengali",
     21   "Gurmukhi",
     22   "Gujarati",
     23   "Oriya",
     24   "Tamil",
     25   "Telugu",
     26   "Kannada",
     27   "Malayalam",
     28   "Sinhala",
     29   "Thai",
     30   "Lao",
     31   "Tibetan",
     32   "Myanmar",
     33   "Georgian",
     34   "HanCJK",
     35   "Ethiopic",
     36   "Cherokee",
     37   "Canadian_Aboriginal",
     38   "Ogham",
     39   "Runic",
     40   "Khmer",
     41   "Mongolian",
     42   "Yi",
     43   "Old_Italic",
     44   "Gothic",
     45   "Deseret",
     46   "Inherited",
     47   "Tagalog",
     48   "Hanunoo",
     49   "Buhid",
     50   "Tagbanwa",
     51   "Limbu",
     52   "Tai_Le",
     53   "Linear_B",
     54   "Ugaritic",
     55   "Shavian",
     56   "Osmanya",
     57   "Cypriot",
     58   "Buginese",
     59   "Coptic",
     60   "New_Tai_Lue",
     61   "Glagolitic",
     62   "Tifinagh",
     63   "Syloti_Nagri",
     64   "Old_Persian",
     65   "Kharoshthi",
     66   "Balinese",
     67   "Cuneiform",
     68   "Phoenician",
     69   "Phags_Pa",
     70   "Nko",
     71 
     72   // Unicode 5.1 beta
     73   "Sundanese",
     74   "Lepcha",
     75   "Ol_Chiki",
     76   "Vai",
     77   "Saurashtra",
     78   "Kayah_Li",
     79   "Rejang",
     80   "Lycian",
     81   "Carian",
     82   "Lydian",
     83   "Cham",
     84 };
     85 
     86 
     87 // Unicode 5.1 beta script names from
     88 // http://www.unicode.org/Public/5.1.0/diffs/5.0.0-5.1.0.all.2.diffs
     89 // NOTE: 'Vai ' => "Vaii" to make four letters, not three
     90 // see http://unicode.org/iso15924/iso15924-codes.html
     91 const char* const kLScriptName4[ULScript_NUM_SCRIPTS] = {
     92   "Zyyy", "Latn", "Grek", "Cyrl",   "Armn", "Hebr", "Arab", "Syrc",
     93   "Thaa", "Deva", "Beng", "Guru",   "Gujr", "Orya", "Taml", "Telu",
     94   "Knda", "Mlym", "Sinh", "Thai",   "Laoo", "Tibt", "Mymr", "Geor",
     95   "Hani", "Ethi", "Cher", "Cans",   "Ogam", "Runr", "Khmr", "Mong",
     96 
     97   "Yiii", "Ital", "Goth", "Dsrt",   "Zzzz", "Tglg", "Hano", "Buhd",
     98   "Tagb", "Limb", "Tale", "Linb",   "Ugar", "Shaw", "Osma", "Cprt",
     99   "Bugi", "Copt", "Talu", "Glag",   "Tfng", "Sylo", "Xpeo", "Khar",
    100   "Bali", "Xsux", "Phnx", "Phag",   "Nkoo",
    101 
    102   // Unicode 5.1 beta
    103   "Sund", "Lepc", "Olck", "Vaii", "Saur", "Kali", "Rjng", "Lyci",
    104   "Cari", "Lydi", "Cham",
    105 };
    106 
    107 
    108 const char* UnicodeLScriptName(const UnicodeLScript ls) {
    109   CHECK(ls >= 0 && ls < ULScript_NUM_SCRIPTS);
    110   return kUnicodeLScriptNames[ls];
    111 }
    112 
    113 
    114 const char* UnicodeLScriptCode(const UnicodeLScript ls) {
    115   CHECK(ls >= 0 && ls < ULScript_NUM_SCRIPTS);
    116   return kLScriptName4[ls];
    117 }
    118