Home | History | Annotate | Download | only in scripts
      1 #!/bin/bash
      2 # Copyright (c) 2014 The Chromium Authors. All rights reserved.
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 
      7 # Remove display names for languages that are not listed in the accept-language
      8 # list of Chromium.
      9 function filter_display_language_names {
     10   for lang in $(grep -v '^#' accept_lang.list)
     11   do
     12     # Set $OP to '|' only if $ACCEPT_LANG_PATTERN is not empty.
     13     OP=${ACCEPT_LANG_PATTERN:+|}
     14     ACCEPT_LANG_PATTERN="${ACCEPT_LANG_PATTERN}${OP}${lang}"
     15   done
     16   ACCEPT_LANG_PATTERN="(${ACCEPT_LANG_PATTERN})[^a-z]"
     17 
     18   echo "Filtering out display names for non-A-L languages ${langdatapath}"
     19   for lang in $(grep -v '^#' chrome_ui_languages.list)
     20   do
     21     target=${langdatapath}/${lang}.txt
     22     echo Overwriting ${target} ...
     23     sed -r -i \
     24     '/^    Keys\{$/,/^    \}$/d
     25      /^    Languages\{$/, /^    \}$/ {
     26        /^    Languages\{$/p
     27        /^        '${ACCEPT_LANG_PATTERN}'/p
     28        /^    \}$/p
     29        d
     30      }
     31      /^    Types\{$/,/^    \}$/d
     32      /^    Variants\{$/,/^    \}$/d' ${target}
     33   done
     34 }
     35 
     36 
     37 # Keep only the minimum locale data for non-UI languages.
     38 function abridge_locale_data_for_non_ui_languages {
     39   for lang in $(grep -v '^#' chrome_ui_languages.list)
     40   do
     41     # Set $OP to '|' only if $UI_LANGUAGES is not empty.
     42     OP=${UI_LANGUAGES:+|}
     43     UI_LANGUAGES="${UI_LANGUAGES}${OP}${lang}"
     44   done
     45 
     46   EXTRA_LANGUAGES=$(egrep -v -e '^#' -e "(${UI_LANGUAGES})" accept_lang.list)
     47 
     48   echo Creating minimum locale data in ${localedatapath}
     49   for lang in ${EXTRA_LANGUAGES}
     50   do
     51     target=${localedatapath}/${lang}.txt
     52     [  -e ${target} ] || { echo "missing ${lang}"; continue; }
     53     echo Overwriting ${target} ...
     54     sed -n -r -i \
     55       '1, /^'${lang}'\{$/p
     56        /^    "%%ALIAS"\{/p
     57        /^    AuxExemplarCharacters\{.*\}$/p
     58        /^    AuxExemplarCharacters\{$/, /^    \}$/p
     59        /^    ExemplarCharacters\{.*\}$/p
     60        /^    ExemplarCharacters\{$/, /^    \}$/p
     61        /^    (LocaleScript|layout)\{$/, /^    \}$/p
     62        /^    Version\{.*$/p
     63        /^\}$/p' ${target}
     64   done
     65 
     66   echo Creating minimum locale data in ${langdatapath}
     67   for lang in ${EXTRA_LANGUAGES}
     68   do
     69     target=${langdatapath}/${lang}.txt
     70     [  -e ${target} ] || { echo "missing ${lang}"; continue; }
     71     echo Overwriting ${target} ...
     72     sed -n -r -i \
     73       '1, /^'${lang}'\{$/p
     74        /^    Languages\{$/, /^    \}$/ {
     75          /^    Languages\{$/p
     76          /^        '${lang}'\{.*\}$/p
     77          /^    \}$/p
     78        }
     79        /^\}$/p' ${target}
     80   done
     81 }
     82 
     83 # Drop historic currencies.
     84 # TODO(jshin): Use ucurr_isAvailable in ICU to drop more currencies.
     85 # See also http://en.wikipedia.org/wiki/List_of_circulating_currencies
     86 function filter_currency_data {
     87   for currency in $(grep -v '^#' currencies_to_drop.list)
     88   do
     89     OP=${DROPLIST:+|}
     90     DROPLIST=${DROPLIST}${OP}${currency}
     91   done
     92   DROPLIST="(${DROPLIST})\{"
     93 
     94   cd "${dataroot}/curr"
     95   for i in *.txt
     96   do
     97     [ $i != 'supplementalData.txt' ] && \
     98     sed -r -i '/^        '$DROPLIST'/, /^        }/ d' $i
     99   done
    100 }
    101 
    102 # Remove the display names for numeric region codes other than
    103 # 419 (Latin America) because we don't use them.
    104 function filter_region_data {
    105   cd "${dataroot}/region"
    106   sed -i  '/[0-35-9][0-9][0-9]{/ d' *.txt
    107 }
    108 
    109 
    110 
    111 function remove_exemplar_cities {
    112   cd "${dataroot}/zone"
    113   for i in *.txt
    114   do
    115     [ $i != 'root.txt' ] && \
    116     sed -i '/^    zoneStrings/, /^        "meta:/ {
    117       /^    zoneStrings/ p
    118       /^        "meta:/ p
    119       d
    120     }' $i
    121   done
    122 }
    123 
    124 # Keep only duration and compound in units* sections.
    125 function filter_locale_data {
    126   for i in ${dataroot}/locales/*.txt
    127   do
    128     echo Overwriting $i ...
    129     sed -r -i \
    130       '/^    units(|Narrow|Short)\{$/, /^    \}$/ {
    131          /^    units(|Narrow|Short)\{$/ p
    132          /^        (duration|compound)\{$/, /^        \}$/ p
    133          /^    \}$/ p
    134          d
    135        }' ${i}
    136   done
    137 }
    138 
    139 # big5han and gb2312han collation do not make any sense and nobody uses them.
    140 function remove_legacy_chinese_codepoint_collation {
    141   echo "Removing Big5 / GB2312 collation data from Chinese locale"
    142   target="${dataroot}/coll/zh.txt"
    143   echo "Overwriting ${target}"
    144   sed -r -i '/^        (big5|gb2312)han\{$/,/^        \}$/ d' ${target}
    145 }
    146 
    147 dataroot="$(dirname $0)/../source/data"
    148 localedatapath="${dataroot}/locales"
    149 langdatapath="${dataroot}/lang"
    150 
    151 
    152 
    153 filter_display_language_names
    154 abridge_locale_data_for_non_ui_languages
    155 filter_currency_data
    156 filter_region_data
    157 remove_legacy_chinese_codepoint_collation
    158 filter_locale_data
    159 
    160 # Chromium OS needs exemplar cities for timezones, but not Chromium.
    161 # It'll save 400kB (uncompressed), but the size difference in
    162 # 7z compressed installer is <= 100kB.
    163 # TODO(jshin): Make separate data files for CrOS and Chromium.
    164 #remove_exemplar_cities
    165