1 #!/bin/bash 2 # Copyright (c) 2014 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 7 # Remove display names for languages that are not listed in the accept-language 8 # list of Chromium. 9 function filter_display_language_names { 10 for lang in $(grep -v '^#' accept_lang.list) 11 do 12 # Set $OP to '|' only if $ACCEPT_LANG_PATTERN is not empty. 13 OP=${ACCEPT_LANG_PATTERN:+|} 14 ACCEPT_LANG_PATTERN="${ACCEPT_LANG_PATTERN}${OP}${lang}" 15 done 16 ACCEPT_LANG_PATTERN="(${ACCEPT_LANG_PATTERN})[^a-z]" 17 18 echo "Filtering out display names for non-A-L languages ${langdatapath}" 19 for lang in $(grep -v '^#' chrome_ui_languages.list) 20 do 21 target=${langdatapath}/${lang}.txt 22 echo Overwriting ${target} ... 23 sed -r -i \ 24 '/^ Keys\{$/,/^ \}$/d 25 /^ Languages\{$/, /^ \}$/ { 26 /^ Languages\{$/p 27 /^ '${ACCEPT_LANG_PATTERN}'/p 28 /^ \}$/p 29 d 30 } 31 /^ Types\{$/,/^ \}$/d 32 /^ Variants\{$/,/^ \}$/d' ${target} 33 done 34 } 35 36 37 # Keep only the minimum locale data for non-UI languages. 38 function abridge_locale_data_for_non_ui_languages { 39 for lang in $(grep -v '^#' chrome_ui_languages.list) 40 do 41 # Set $OP to '|' only if $UI_LANGUAGES is not empty. 42 OP=${UI_LANGUAGES:+|} 43 UI_LANGUAGES="${UI_LANGUAGES}${OP}${lang}" 44 done 45 46 EXTRA_LANGUAGES=$(egrep -v -e '^#' -e "(${UI_LANGUAGES})" accept_lang.list) 47 48 echo Creating minimum locale data in ${localedatapath} 49 for lang in ${EXTRA_LANGUAGES} 50 do 51 target=${localedatapath}/${lang}.txt 52 [ -e ${target} ] || { echo "missing ${lang}"; continue; } 53 echo Overwriting ${target} ... 54 sed -n -r -i \ 55 '1, /^'${lang}'\{$/p 56 /^ "%%ALIAS"\{/p 57 /^ AuxExemplarCharacters\{.*\}$/p 58 /^ AuxExemplarCharacters\{$/, /^ \}$/p 59 /^ ExemplarCharacters\{.*\}$/p 60 /^ ExemplarCharacters\{$/, /^ \}$/p 61 /^ (LocaleScript|layout)\{$/, /^ \}$/p 62 /^ Version\{.*$/p 63 /^\}$/p' ${target} 64 done 65 66 echo Creating minimum locale data in ${langdatapath} 67 for lang in ${EXTRA_LANGUAGES} 68 do 69 target=${langdatapath}/${lang}.txt 70 [ -e ${target} ] || { echo "missing ${lang}"; continue; } 71 echo Overwriting ${target} ... 72 sed -n -r -i \ 73 '1, /^'${lang}'\{$/p 74 /^ Languages\{$/, /^ \}$/ { 75 /^ Languages\{$/p 76 /^ '${lang}'\{.*\}$/p 77 /^ \}$/p 78 } 79 /^\}$/p' ${target} 80 done 81 } 82 83 # Drop historic currencies. 84 # TODO(jshin): Use ucurr_isAvailable in ICU to drop more currencies. 85 # See also http://en.wikipedia.org/wiki/List_of_circulating_currencies 86 function filter_currency_data { 87 for currency in $(grep -v '^#' currencies_to_drop.list) 88 do 89 OP=${DROPLIST:+|} 90 DROPLIST=${DROPLIST}${OP}${currency} 91 done 92 DROPLIST="(${DROPLIST})\{" 93 94 cd "${dataroot}/curr" 95 for i in *.txt 96 do 97 [ $i != 'supplementalData.txt' ] && \ 98 sed -r -i '/^ '$DROPLIST'/, /^ }/ d' $i 99 done 100 } 101 102 # Remove the display names for numeric region codes other than 103 # 419 (Latin America) because we don't use them. 104 function filter_region_data { 105 cd "${dataroot}/region" 106 sed -i '/[0-35-9][0-9][0-9]{/ d' *.txt 107 } 108 109 110 111 function remove_exemplar_cities { 112 cd "${dataroot}/zone" 113 for i in *.txt 114 do 115 [ $i != 'root.txt' ] && \ 116 sed -i '/^ zoneStrings/, /^ "meta:/ { 117 /^ zoneStrings/ p 118 /^ "meta:/ p 119 d 120 }' $i 121 done 122 } 123 124 # Keep only duration and compound in units* sections. 125 function filter_locale_data { 126 for i in ${dataroot}/locales/*.txt 127 do 128 echo Overwriting $i ... 129 sed -r -i \ 130 '/^ units(|Narrow|Short)\{$/, /^ \}$/ { 131 /^ units(|Narrow|Short)\{$/ p 132 /^ (duration|compound)\{$/, /^ \}$/ p 133 /^ \}$/ p 134 d 135 }' ${i} 136 done 137 } 138 139 # big5han and gb2312han collation do not make any sense and nobody uses them. 140 function remove_legacy_chinese_codepoint_collation { 141 echo "Removing Big5 / GB2312 collation data from Chinese locale" 142 target="${dataroot}/coll/zh.txt" 143 echo "Overwriting ${target}" 144 sed -r -i '/^ (big5|gb2312)han\{$/,/^ \}$/ d' ${target} 145 } 146 147 dataroot="$(dirname $0)/../source/data" 148 localedatapath="${dataroot}/locales" 149 langdatapath="${dataroot}/lang" 150 151 152 153 filter_display_language_names 154 abridge_locale_data_for_non_ui_languages 155 filter_currency_data 156 filter_region_data 157 remove_legacy_chinese_codepoint_collation 158 filter_locale_data 159 160 # Chromium OS needs exemplar cities for timezones, but not Chromium. 161 # It'll save 400kB (uncompressed), but the size difference in 162 # 7z compressed installer is <= 100kB. 163 # TODO(jshin): Make separate data files for CrOS and Chromium. 164 #remove_exemplar_cities 165