Home | History | Annotate | Download | only in tools
      1 #!/usr/bin/python2.4
      2 #
      3 # Copyright (C) 2010 The Android Open Source Project
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 #
     17 """
     18 Creates the list of search engines
     19 
     20 The created list is placed in the res/values-<locale> directory. Also updates
     21 res/values/all_search_engines.xml if required with new data.
     22 
     23 Usage: get_search_engines.py
     24 
     25 Copyright (C) 2010 The Android Open Source Project
     26 """
     27 
     28 import os
     29 import re
     30 import sys
     31 import urllib
     32 from xml.dom import minidom
     33 
     34 # Locales to generate search engine lists for
     35 locales = ["cs-CZ", "da-DK", "de-AT", "de-CH", "de-DE", "el-GR", "en-AU",
     36     "en-GB", "en-IE", "en-NZ", "en-SG", "en-ZA", "es-ES", "fr-BE", "fr-FR",
     37     "it-IT", "ja-JP", "ko-KR", "nb-NO", "nl-BE", "nl-NL", "pl-PL", "pt-PT",
     38     "pt-BR", "ru-RU", "sv-SE", "tr-TR", "zh-CN", "zh-HK", "zh-MO", "zh-TW"]
     39 
     40 google_data = ["google", "Google", "google.com",
     41   "http://www.google.com/favicon.ico",
     42   "http://www.google.com/search?ie={inputEncoding}&amp;source=android-browser&amp;q={searchTerms}",
     43   "UTF-8",
     44   "http://www.google.com/complete/search?client=android&q={searchTerms}"]
     45 
     46 class SearchEngineManager(object):
     47   """Manages list of search engines and creates locale specific lists.
     48 
     49   The main method useful for the caller is generateListForLocale(), which
     50   creates a locale specific donottranslate-search_engines.xml file.
     51   """
     52 
     53   def __init__(self):
     54     """Inits SearchEngineManager with relevant search engine data.
     55 
     56     The search engine data is downloaded from the Chrome source repository.
     57     """
     58     self.chrome_data = urllib.urlopen(
     59         'http://src.chromium.org/viewvc/chrome/trunk/src/chrome/'
     60         'browser/search_engines/template_url_prepopulate_data.cc').read()
     61     if self.chrome_data.lower().find('repository not found') != -1:
     62       print 'Unable to get Chrome source data for search engine list.\nExiting.'
     63       sys.exit(2)
     64 
     65     self.resdir = os.path.normpath(os.path.join(sys.path[0], '../res'))
     66 
     67     self.all_engines = set()
     68 
     69   def getXmlString(self, str):
     70     """Returns an XML-safe string for the given string.
     71 
     72     Given a string from the search engine data structure, convert it to a
     73     string suitable to write to our XML data file by stripping away NULLs,
     74     unwanted quotes, wide-string declarations (L"") and replacing C-style
     75     unicode characters with XML equivalents.
     76     """
     77     str = str.strip()
     78     if str.upper() == 'NULL':
     79       return ''
     80 
     81     if str.startswith('L"'):
     82       str = str[2:]
     83     if str.startswith('@') or str.startswith('?'):
     84       str = '\\' + str
     85 
     86     str = str.strip('"')
     87     str = str.replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;')
     88     str = str.replace('"', '&quot;').replace('\'', '&apos;')
     89     str = re.sub(r'\\x([a-fA-F0-9]{1,4})', r'&#x\1;', str)
     90 
     91     return str
     92 
     93   def getEngineData(self, name):
     94     """Returns an array of strings describing the specified search engine.
     95 
     96     The returned strings are in the same order as in the Chrome source data file
     97     except that the internal name of the search engine is inserted at the
     98     beginning of the list.
     99     """
    100 
    101     if name == "google":
    102       return google_data
    103 
    104     # Find the first occurance of this search engine name in the form
    105     # " <name> =" in the chrome data file.
    106     re_exp = '\s' + name + '\s*='
    107     search_obj = re.search(re_exp, self.chrome_data)
    108     if not search_obj:
    109       print ('Unable to find data for search engine ' + name +
    110              '. Please check the chrome data file for format changes.')
    111       return None
    112 
    113     # Extract the struct declaration between the curly braces.
    114     start_pos = self.chrome_data.find('{', search_obj.start()) + 1;
    115     end_pos = self.chrome_data.find('};', start_pos);
    116     engine_data_str = self.chrome_data[start_pos:end_pos]
    117 
    118     # Remove c++ style '//' comments at the ends of each line
    119     engine_data_lines = engine_data_str.split('\n')
    120     engine_data_str = ""
    121     for line in engine_data_lines:
    122         start_pos = line.find(' // ')
    123         if start_pos != -1:
    124             line = line[:start_pos]
    125         engine_data_str = engine_data_str + line + '\n'
    126 
    127     # Join multiple line strings into a single string.
    128     engine_data_str = re.sub('\"\s+\"', '', engine_data_str)
    129     engine_data_str = re.sub('\"\s+L\"', '', engine_data_str)
    130     engine_data_str = engine_data_str.replace('"L"', '')
    131 
    132     engine_data = engine_data_str.split(',')
    133     for i in range(len(engine_data)):
    134       engine_data[i] = self.getXmlString(engine_data[i])
    135 
    136     # If the last element was an empty string (due to an extra comma at the
    137     # end), ignore it.
    138     if not engine_data[len(engine_data) - 1]:
    139       engine_data.pop()
    140 
    141     engine_data.insert(0, name)
    142 
    143     return engine_data
    144 
    145   def getSearchEnginesForCountry(self, country):
    146     """Returns the list of search engine names for the given country.
    147 
    148     The data comes from the Chrome data file.
    149     """
    150     # The Chrome data file has an array defined with the name 'engines_XX'
    151     # where XX = country.
    152     pos = self.chrome_data.find('engines_' + country)
    153     if pos == -1:
    154       print ('Unable to find search engine data for country ' + country + '.')
    155       return
    156 
    157     # Extract the text between the curly braces for this array declaration
    158     engines_start = self.chrome_data.find('{', pos) + 1;
    159     engines_end = self.chrome_data.find('}', engines_start);
    160     engines_str = self.chrome_data[engines_start:engines_end]
    161 
    162     # Remove embedded /**/ style comments, white spaces, address-of operators
    163     # and the trailing comma if any.
    164     engines_str = re.sub('\/\*.+\*\/', '', engines_str)
    165     engines_str = re.sub('\s+', '', engines_str)
    166     engines_str = engines_str.replace('&','')
    167     engines_str = engines_str.rstrip(',')
    168 
    169     # Split the array into it's elements
    170     engines = engines_str.split(',')
    171 
    172     return engines
    173 
    174   def writeAllEngines(self):
    175     """Writes all search engines to the all_search_engines.xml file.
    176     """
    177 
    178     all_search_engines_path = os.path.join(self.resdir, 'values/all_search_engines.xml')
    179 
    180     text = []
    181 
    182     for engine_name in self.all_engines:
    183       engine_data = self.getEngineData(engine_name)
    184       text.append('  <string-array name="%s" translatable="false">\n' % (engine_data[0]))
    185       for i in range(1, 7):
    186         text.append('    <item>%s</item>\n' % (engine_data[i]))
    187       text.append('  </string-array>\n')
    188       print engine_data[1] + " added to all_search_engines.xml"
    189 
    190     self.generateXmlFromTemplate(os.path.join(sys.path[0], 'all_search_engines.template.xml'),
    191         all_search_engines_path, text)
    192 
    193   def generateDefaultList(self):
    194     self.writeEngineList(os.path.join(self.resdir, 'values'), "default")
    195 
    196   def generateListForLocale(self, locale):
    197     """Creates a new locale specific donottranslate-search_engines.xml file.
    198 
    199     The new file contains search engines specific to that country. If required
    200     this function updates all_search_engines.xml file with any new search
    201     engine data necessary.
    202     """
    203     separator_pos = locale.find('-')
    204     if separator_pos == -1:
    205       print ('Locale must be of format <language>-<country>. For e.g.'
    206              ' "es-US" or "en-GB"')
    207       return
    208 
    209     language = locale[0:separator_pos]
    210     country = locale[separator_pos + 1:].upper()
    211     dir_path = os.path.join(self.resdir, 'values-' + language + '-r' + country)
    212 
    213     self.writeEngineList(dir_path, country)
    214 
    215   def writeEngineList(self, dir_path, country):
    216     if os.path.exists(dir_path) and not os.path.isdir(dir_path):
    217       print "File exists in output directory path " + dir_path + ". Please remove it and try again."
    218       return
    219 
    220     engines = self.getSearchEnginesForCountry(country)
    221     if not engines:
    222       return
    223     for engine in engines:
    224       self.all_engines.add(engine)
    225 
    226     # Create the locale specific search_engines.xml file. Each
    227     # search_engines.xml file has a hardcoded list of 7 items. If there are less
    228     # than 7 search engines for this country, the remaining items are marked as
    229     # enabled=false.
    230     text = []
    231     text.append('  <string-array name="search_engines" translatable="false">\n');
    232     for engine in engines:
    233       engine_data = self.getEngineData(engine)
    234       name = engine_data[0]
    235       text.append('    <item>%s</item>\n' % (name))
    236     text.append('  </string-array>\n');
    237 
    238     self.generateXmlFromTemplate(os.path.join(sys.path[0], 'search_engines.template.xml'),
    239         os.path.join(dir_path, 'donottranslate-search_engines.xml'),
    240         text)
    241 
    242   def generateXmlFromTemplate(self, template_path, out_path, text):
    243     # Load the template file and insert the new contents before the last line.
    244     template_text = open(template_path).read()
    245     pos = template_text.rfind('\n', 0, -2) + 1
    246     contents = template_text[0:pos] + ''.join(text) + template_text[pos:]
    247 
    248     # Make sure what we have created is valid XML :) No need to check for errors
    249     # as the script will terminate with an exception if the XML was malformed.
    250     engines_dom = minidom.parseString(contents)
    251 
    252     dir_path = os.path.dirname(out_path)
    253     if not os.path.exists(dir_path):
    254       os.makedirs(dir_path)
    255       print 'Created directory ' + dir_path
    256     file = open(out_path, 'w')
    257     file.write(contents)
    258     file.close()
    259     print 'Wrote ' + out_path
    260 
    261 if __name__ == "__main__":
    262   manager = SearchEngineManager()
    263   manager.generateDefaultList()
    264   for locale in locales:
    265     manager.generateListForLocale(locale)
    266   manager.writeAllEngines()
    267