1 #!/usr/bin/python 2 # Copyright (c) 2010 The Chromium Authors. All rights reserved. 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 6 """Class for parsing metadata about extension samples.""" 7 8 import locale 9 import os 10 import os.path 11 import re 12 import hashlib 13 import zipfile 14 import simplejson as json 15 16 # Make sure we get consistent string sorting behavior by explicitly using the 17 # default C locale. 18 locale.setlocale(locale.LC_ALL, 'C') 19 20 def sorted_walk(path): 21 """ A version of os.walk that yields results in order sorted by name. 22 23 This is to prevent spurious docs changes due to os.walk returning items in a 24 filesystem dependent order (by inode creation time, etc). 25 """ 26 for base, dirs, files in os.walk(path): 27 dirs.sort() 28 files.sort() 29 yield base, dirs, files 30 31 def parse_json_file(path, encoding="utf-8"): 32 """ Load the specified file and parse it as JSON. 33 34 Args: 35 path: Path to a file containing JSON-encoded data. 36 encoding: Encoding used in the file. Defaults to utf-8. 37 38 Returns: 39 A Python object representing the data encoded in the file. 40 41 Raises: 42 Exception: If the file could not be read or its contents could not be 43 parsed as JSON data. 44 """ 45 try: 46 json_file = open(path, 'r') 47 except IOError, msg: 48 raise Exception("Failed to read the file at %s: %s" % (path, msg)) 49 50 try: 51 json_obj = json.load(json_file, encoding) 52 except ValueError, msg: 53 raise Exception("Failed to parse JSON out of file %s: %s" % (path, msg)) 54 finally: 55 json_file.close() 56 57 return json_obj 58 59 class ApiManifest(object): 60 """ Represents the list of API methods contained in extension_api.json """ 61 62 _MODULE_DOC_KEYS = ['functions', 'events'] 63 """ Keys which may be passed to the _parseModuleDocLinksByKey method.""" 64 65 def __init__(self, manifest_path): 66 """ Read the supplied manifest file and parse its contents. 67 68 Args: 69 manifest_path: Path to extension_api.json 70 """ 71 self._manifest = parse_json_file(manifest_path) 72 73 def _getDocLink(self, method, hashprefix): 74 """ 75 Given an API method, return a partial URL corresponding to the doc 76 file for that method. 77 78 Args: 79 method: A string like 'chrome.foo.bar' or 'chrome.experimental.foo.onBar' 80 hashprefix: The prefix to put in front of hash links - 'method' for 81 methods and 'event' for events. 82 83 Returns: 84 A string like 'foo.html#method-bar' or 'experimental.foo.html#event-onBar' 85 """ 86 urlpattern = '%%s.html#%s-%%s' % hashprefix 87 urlparts = tuple(method.replace('chrome.', '').rsplit('.', 1)) 88 return urlpattern % urlparts 89 90 def _parseModuleDocLinksByKey(self, module, key): 91 """ 92 Given a specific API module, returns a dict of methods or events mapped to 93 documentation URLs. 94 95 Args: 96 module: The data in extension_api.json corresponding to a single module. 97 key: A key belonging to _MODULE_DOC_KEYS to determine which set of 98 methods to parse, and what kind of documentation URL to generate. 99 100 Returns: 101 A dict of extension methods mapped to file and hash URL parts for the 102 corresponding documentation links, like: 103 { 104 "chrome.tabs.remove": "tabs.html#method-remove", 105 "chrome.tabs.onDetached" : "tabs.html#event-onDetatched" 106 } 107 108 If the API namespace is defined "nodoc" then an empty dict is returned. 109 110 Raises: 111 Exception: If the key supplied is not a member of _MODULE_DOC_KEYS. 112 """ 113 methods = [] 114 api_dict = {} 115 namespace = module['namespace'] 116 if module.has_key('nodoc'): 117 return api_dict 118 if key not in self._MODULE_DOC_KEYS: 119 raise Exception("key %s must be one of %s" % (key, self._MODULE_DOC_KEYS)) 120 if module.has_key(key): 121 methods.extend(module[key]) 122 for method in methods: 123 method_name = 'chrome.%s.%s' % (namespace, method['name']) 124 hashprefix = 'method' 125 if key == 'events': 126 hashprefix = 'event' 127 api_dict[method_name] = self._getDocLink(method_name, hashprefix) 128 return api_dict 129 130 def getModuleNames(self): 131 """ Returns the names of individual modules in the API. 132 133 Returns: 134 The namespace """ 135 # Exclude modules with a "nodoc" property. 136 return set(module['namespace'].encode() for module in self._manifest 137 if "nodoc" not in module) 138 139 def getDocumentationLinks(self): 140 """ Parses the extension_api.json manifest and returns a dict of all 141 events and methods for every module, mapped to relative documentation links. 142 143 Returns: 144 A dict of methods/events => partial doc links for every module. 145 """ 146 api_dict = {} 147 for module in self._manifest: 148 api_dict.update(self._parseModuleDocLinksByKey(module, 'functions')) 149 api_dict.update(self._parseModuleDocLinksByKey(module, 'events')) 150 return api_dict 151 152 class SamplesManifest(object): 153 """ Represents a manifest file containing information about the sample 154 extensions available in the codebase. """ 155 156 def __init__(self, base_sample_path, base_dir, api_manifest): 157 """ Reads through the filesystem and obtains information about any Chrome 158 extensions which exist underneath the specified folder. 159 160 Args: 161 base_sample_path: The directory under which to search for samples. 162 base_dir: The base directory samples will be referenced from. 163 api_manifest: An instance of the ApiManifest class, which will indicate 164 which API methods are available. 165 """ 166 self._base_dir = base_dir 167 manifest_paths = self._locateManifestsFromPath(base_sample_path) 168 self._manifest_data = self._parseManifestData(manifest_paths, api_manifest) 169 170 def _locateManifestsFromPath(self, path): 171 """ 172 Returns a list of paths to sample extension manifest.json files. 173 174 Args: 175 base_path: Base path in which to start the search. 176 Returns: 177 A list of paths below base_path pointing at manifest.json files. 178 """ 179 manifest_paths = [] 180 for root, directories, files in sorted_walk(path): 181 if 'manifest.json' in files: 182 directories = [] # Don't go any further down this tree 183 manifest_paths.append(os.path.join(root, 'manifest.json')) 184 if '.svn' in directories: 185 directories.remove('.svn') # Don't go into SVN metadata directories 186 return manifest_paths 187 188 def _parseManifestData(self, manifest_paths, api_manifest): 189 """ Returns metadata about the sample extensions given their manifest 190 paths. 191 192 Args: 193 manifest_paths: A list of paths to extension manifests 194 api_manifest: An instance of the ApiManifest class, which will indicate 195 which API methods are available. 196 197 Returns: 198 Manifest data containing a list of samples and available API methods. 199 """ 200 api_method_dict = api_manifest.getDocumentationLinks() 201 api_methods = api_method_dict.keys() 202 203 samples = [] 204 for path in manifest_paths: 205 sample = Sample(path, api_methods, self._base_dir) 206 # Don't render apps 207 if sample.is_app() == False: 208 samples.append(sample) 209 210 def compareSamples(sample1, sample2): 211 """ Compares two samples as a sort comparator, by name then path. """ 212 value = cmp(sample1['name'].upper(), sample2['name'].upper()) 213 if value == 0: 214 value = cmp(sample1['path'], sample2['path']) 215 return value 216 217 samples.sort(compareSamples) 218 219 manifest_data = {'samples': samples, 'api': api_method_dict} 220 return manifest_data 221 222 def writeToFile(self, path): 223 """ Writes the contents of this manifest file as a JSON-encoded text file. 224 225 Args: 226 path: The path to write the samples manifest file to. 227 """ 228 manifest_text = json.dumps(self._manifest_data, indent=2, 229 sort_keys=True, separators=(',', ': ')) 230 output_path = os.path.realpath(path) 231 try: 232 output_file = open(output_path, 'w') 233 except IOError, msg: 234 raise Exception("Failed to write the samples manifest file." 235 "The specific error was: %s." % msg) 236 output_file.write(manifest_text) 237 output_file.close() 238 239 def writeZippedSamples(self): 240 """ For each sample in the current manifest, create a zip file with the 241 sample contents in the sample's parent directory if not zip exists, or 242 update the zip file if the sample has been updated. 243 244 Returns: 245 A set of paths representing zip files which have been modified. 246 """ 247 modified_paths = [] 248 for sample in self._manifest_data['samples']: 249 path = sample.write_zip() 250 if path: 251 modified_paths.append(path) 252 return modified_paths 253 254 class Sample(dict): 255 """ Represents metadata about a Chrome extension sample. 256 257 Extends dict so that it can be easily JSON serialized. 258 """ 259 260 def __init__(self, manifest_path, api_methods, base_dir): 261 """ Initializes a Sample instance given a path to a manifest. 262 263 Args: 264 manifest_path: A filesystem path to a manifest file. 265 api_methods: A list of strings containing all possible Chrome extension 266 API calls. 267 base_dir: The base directory where this sample will be referenced from - 268 paths will be made relative to this directory. 269 """ 270 self._base_dir = base_dir 271 self._manifest_path = manifest_path 272 self._manifest = parse_json_file(self._manifest_path) 273 self._locale_data = self._parse_locale_data() 274 275 # The following calls set data which will be serialized when converting 276 # this object to JSON. 277 source_data = self._parse_source_data(api_methods) 278 self['api_calls'] = source_data['api_calls'] 279 self['source_files'] = source_data['source_files'] 280 self['source_hash'] = source_data['source_hash'] 281 282 self['name'] = self._parse_name() 283 self['description'] = self._parse_description() 284 self['icon'] = self._parse_icon() 285 self['features'] = self._parse_features() 286 self['protocols'] = self._parse_protocols() 287 self['path'] = self._get_relative_path() 288 self['search_string'] = self._get_search_string() 289 self['id'] = hashlib.sha1(self['path']).hexdigest() 290 self['zip_path'] = self._get_relative_zip_path() 291 292 _FEATURE_ATTRIBUTES = ( 293 'browser_action', 294 'page_action', 295 'background_page', 296 'options_page', 297 'plugins', 298 'theme', 299 'chrome_url_overrides' 300 ) 301 """ Attributes that will map to "features" if their corresponding key is 302 present in the extension manifest. """ 303 304 _SOURCE_FILE_EXTENSIONS = ('.html', '.json', '.js', '.css', '.htm') 305 """ File extensions to files which may contain source code.""" 306 307 _ENGLISH_LOCALES = ['en_US', 'en', 'en_GB'] 308 """ Locales from which translations may be used in the sample gallery. """ 309 310 def _get_localized_manifest_value(self, key): 311 """ Returns a localized version of the requested manifest value. 312 313 Args: 314 key: The manifest key whose value the caller wants translated. 315 316 Returns: 317 If the supplied value exists and contains a ___MSG_token___ value, this 318 method will resolve the appropriate translation and return the result. 319 If no token exists, the manifest value will be returned. If the key does 320 not exist, an empty string will be returned. 321 322 Raises: 323 Exception: If the localized value for the given token could not be found. 324 """ 325 if self._manifest.has_key(key): 326 if self._manifest[key][:6] == '__MSG_': 327 try: 328 return self._get_localized_value(self._manifest[key]) 329 except Exception, msg: 330 raise Exception("Could not translate manifest value for key %s: %s" % 331 (key, msg)) 332 else: 333 return self._manifest[key] 334 else: 335 return '' 336 337 def _get_localized_value(self, message_token): 338 """ Returns the localized version of the requested MSG bundle token. 339 340 Args: 341 message_token: A message bundle token like __MSG_extensionName__. 342 343 Returns: 344 The translated text corresponding to the token, with any placeholders 345 automatically resolved and substituted in. 346 347 Raises: 348 Exception: If a message bundle token is not found in the translations. 349 """ 350 placeholder_pattern = re.compile('\$(\w*)\$') 351 token = message_token[6:-2] 352 if self._locale_data.has_key(token): 353 message = self._locale_data[token]['message'] 354 355 placeholder_match = placeholder_pattern.search(message) 356 if placeholder_match: 357 # There are placeholders in the translation - substitute them. 358 placeholder_name = placeholder_match.group(1) 359 placeholders = self._locale_data[token]['placeholders'] 360 if placeholders.has_key(placeholder_name.lower()): 361 placeholder_value = placeholders[placeholder_name.lower()]['content'] 362 placeholder_token = '$%s$' % placeholder_name 363 message = message.replace(placeholder_token, placeholder_value) 364 return message 365 else: 366 raise Exception('Could not find localized string: %s' % message_token) 367 368 def _get_relative_path(self): 369 """ Returns a relative path from the supplied base dir to the manifest dir. 370 371 This method is used because we may not be able to rely on os.path.relpath 372 which was introduced in Python 2.6 and only works on Windows and Unix. 373 374 Since the example extensions should always be subdirectories of the 375 base sample manifest path, we can get a relative path through a simple 376 string substitution. 377 378 Returns: 379 A relative directory path from the sample manifest's directory to the 380 directory containing this sample's manifest.json. 381 """ 382 real_manifest_path = os.path.realpath(self._manifest_path) 383 real_base_path = os.path.realpath(self._base_dir) 384 return real_manifest_path.replace(real_base_path, '')\ 385 .replace('manifest.json', '')[1:] 386 387 def _get_relative_zip_path(self): 388 """ Returns a relative path from the base dir to the sample's zip file. 389 390 Intended for locating the zip file for the sample in the samples manifest. 391 392 Returns: 393 A relative directory path form the sample manifest's directory to this 394 sample's zip file. 395 """ 396 zip_filename = self._get_zip_filename() 397 zip_relpath = os.path.dirname(os.path.dirname(self._get_relative_path())) 398 return os.path.join(zip_relpath, zip_filename) 399 400 def _get_search_string(self): 401 """ Constructs a string to be used when searching the samples list. 402 403 To make the implementation of the JavaScript-based search very direct, a 404 string is constructed containing the title, description, API calls, and 405 features that this sample uses, and is converted to uppercase. This makes 406 JavaScript sample searching very fast and easy to implement. 407 408 Returns: 409 An uppercase string containing information to match on for searching 410 samples on the client. 411 """ 412 search_terms = [ 413 self['name'], 414 self['description'], 415 ] 416 search_terms.extend(self['features']) 417 search_terms.extend(self['api_calls']) 418 search_string = ' '.join(search_terms).replace('"', '')\ 419 .replace('\'', '')\ 420 .upper() 421 return search_string 422 423 def _get_zip_filename(self): 424 """ Returns the filename to be used for a generated zip of the sample. 425 426 Returns: 427 A string in the form of "<dirname>.zip" where <dirname> is the name 428 of the directory containing this sample's manifest.json. 429 """ 430 sample_path = os.path.realpath(os.path.dirname(self._manifest_path)) 431 sample_dirname = os.path.basename(sample_path) 432 return "%s.zip" % sample_dirname 433 434 def _parse_description(self): 435 """ Returns a localized description of the extension. 436 437 Returns: 438 A localized version of the sample's description. 439 """ 440 return self._get_localized_manifest_value('description') 441 442 def _parse_features(self): 443 """ Returns a list of features the sample uses. 444 445 Returns: 446 A list of features the extension uses, as determined by 447 self._FEATURE_ATTRIBUTES. 448 """ 449 features = set() 450 for feature_attr in self._FEATURE_ATTRIBUTES: 451 if self._manifest.has_key(feature_attr): 452 features.add(feature_attr) 453 454 if self._uses_popup(): 455 features.add('popup') 456 457 if self._manifest.has_key('permissions'): 458 for permission in self._manifest['permissions']: 459 split = permission.split('://') 460 if (len(split) == 1): 461 features.add(split[0]) 462 return sorted(features) 463 464 def _parse_icon(self): 465 """ Returns the path to the 128px icon for this sample. 466 467 Returns: 468 The path to the 128px icon if defined in the manifest, None otherwise. 469 """ 470 if (self._manifest.has_key('icons') and 471 self._manifest['icons'].has_key('128')): 472 return self._manifest['icons']['128'] 473 else: 474 return None 475 476 def _parse_locale_data(self): 477 """ Parses this sample's locale data into a dict. 478 479 Because the sample gallery is in English, this method only looks for 480 translations as defined by self._ENGLISH_LOCALES. 481 482 Returns: 483 A dict containing the translation keys and corresponding English text 484 for this extension. 485 486 Raises: 487 Exception: If the messages file cannot be read, or if it is improperly 488 formatted JSON. 489 """ 490 en_messages = {} 491 extension_dir_path = os.path.dirname(self._manifest_path) 492 for locale in self._ENGLISH_LOCALES: 493 en_messages_path = os.path.join(extension_dir_path, '_locales', locale, 494 'messages.json') 495 if (os.path.isfile(en_messages_path)): 496 break 497 498 if (os.path.isfile(en_messages_path)): 499 try: 500 en_messages_file = open(en_messages_path, 'r') 501 except IOError, msg: 502 raise Exception("Failed to read %s: %s" % (en_messages_path, msg)) 503 en_messages_contents = en_messages_file.read() 504 en_messages_file.close() 505 try: 506 en_messages = json.loads(en_messages_contents) 507 except ValueError, msg: 508 raise Exception("File %s has a syntax error: %s" % 509 (en_messages_path, msg)) 510 return en_messages 511 512 def _parse_name(self): 513 """ Returns a localized name for the extension. 514 515 Returns: 516 A localized version of the sample's name. 517 """ 518 return self._get_localized_manifest_value('name') 519 520 def _parse_protocols(self): 521 """ Returns a list of protocols this extension requests permission for. 522 523 Returns: 524 A list of every unique protocol listed in the manifest's permssions. 525 """ 526 protocols = [] 527 if self._manifest.has_key('permissions'): 528 for permission in self._manifest['permissions']: 529 split = permission.split('://') 530 if (len(split) == 2) and (split[0] not in protocols): 531 protocols.append(split[0] + "://") 532 return protocols 533 534 def _parse_source_data(self, api_methods): 535 """ Iterates over the sample's source files and parses data from them. 536 537 Parses any files in the sample directory with known source extensions 538 (as defined in self._SOURCE_FILE_EXTENSIONS). For each file, this method: 539 540 1. Stores a relative path from the manifest.json directory to the file. 541 2. Searches through the contents of the file for chrome.* API calls. 542 3. Calculates a SHA1 digest for the contents of the file. 543 544 Args: 545 api_methods: A list of strings containing the potential 546 API calls the and the extension sample could be making. 547 548 Raises: 549 Exception: If any of the source files cannot be read. 550 551 Returns: 552 A dictionary containing the keys/values: 553 'api_calls' A sorted list of API calls the sample makes. 554 'source_files' A sorted list of paths to files the extension uses. 555 'source_hash' A hash of the individual file hashes. 556 """ 557 data = {} 558 source_paths = [] 559 source_hashes = [] 560 api_calls = set() 561 base_path = os.path.realpath(os.path.dirname(self._manifest_path)) 562 for root, directories, files in sorted_walk(base_path): 563 if '.svn' in directories: 564 directories.remove('.svn') # Don't go into SVN metadata directories 565 566 for file_name in files: 567 ext = os.path.splitext(file_name)[1] 568 if ext in self._SOURCE_FILE_EXTENSIONS: 569 # Add the file path to the list of source paths. 570 fullpath = os.path.realpath(os.path.join(root, file_name)) 571 path = fullpath.replace(base_path, '')[1:] 572 source_paths.append(path) 573 574 # Read the contents and parse out API calls. 575 try: 576 code_file = open(fullpath, "r") 577 except IOError, msg: 578 raise Exception("Failed to read %s: %s" % (fullpath, msg)) 579 code_contents = unicode(code_file.read(), errors="replace") 580 code_file.close() 581 for method in api_methods: 582 if (code_contents.find(method) > -1): 583 api_calls.add(method) 584 585 # Get a hash of the file contents for zip file generation. 586 hash = hashlib.sha1(code_contents.encode("ascii", "replace")) 587 source_hashes.append(hash.hexdigest()) 588 589 data['api_calls'] = sorted(api_calls) 590 data['source_files'] = sorted(source_paths) 591 data['source_hash'] = hashlib.sha1(''.join(source_hashes)).hexdigest() 592 return data 593 594 def _uses_background(self): 595 """ Returns true if the extension defines a background page. """ 596 return self._manifest.has_key('background_page') 597 598 def _uses_browser_action(self): 599 """ Returns true if the extension defines a browser action. """ 600 return self._manifest.has_key('browser_action') 601 602 def _uses_content_scripts(self): 603 """ Returns true if the extension uses content scripts. """ 604 return self._manifest.has_key('content_scripts') 605 606 def _uses_options(self): 607 """ Returns true if the extension defines an options page. """ 608 return self._manifest.has_key('options_page') 609 610 def _uses_page_action(self): 611 """ Returns true if the extension uses a page action. """ 612 return self._manifest.has_key('page_action') 613 614 def _uses_popup(self): 615 """ Returns true if the extension defines a popup on a page or browser 616 action. """ 617 has_b_popup = (self._uses_browser_action() and 618 self._manifest['browser_action'].has_key('popup')) 619 has_p_popup = (self._uses_page_action() and 620 self._manifest['page_action'].has_key('popup')) 621 return has_b_popup or has_p_popup 622 623 def is_app(self): 624 """ Returns true if the extension has an 'app' section in its manifest.""" 625 return self._manifest.has_key('app') 626 627 def write_zip(self): 628 """ Writes a zip file containing all of the files in this Sample's dir.""" 629 sample_path = os.path.realpath(os.path.dirname(self._manifest_path)) 630 sample_dirname = os.path.basename(sample_path) 631 sample_parentpath = os.path.dirname(sample_path) 632 633 zip_filename = self._get_zip_filename() 634 zip_path = os.path.join(sample_parentpath, zip_filename) 635 # we pass zip_manifest_path to zipfile.getinfo(), which chokes on 636 # backslashes, so don't rely on os.path.join, use forward slash on 637 # all platforms. 638 zip_manifest_path = sample_dirname + '/manifest.json' 639 640 zipfile.ZipFile.debug = 3 641 642 if os.path.isfile(zip_path): 643 try: 644 old_zip_file = zipfile.ZipFile(zip_path, 'r') 645 except IOError, msg: 646 raise Exception("Could not read zip at %s: %s" % (zip_path, msg)) 647 except zipfile.BadZipfile, msg: 648 raise Exception("File at %s is not a zip file: %s" % (zip_path, msg)) 649 650 try: 651 info = old_zip_file.getinfo(zip_manifest_path) 652 hash = info.comment 653 if hash == self['source_hash']: 654 return None # Hashes match - no need to generate file 655 except KeyError, msg: 656 pass # The old zip file doesn't contain a hash - overwrite 657 finally: 658 old_zip_file.close() 659 660 zip_file = zipfile.ZipFile(zip_path, 'w') 661 662 try: 663 for root, dirs, files in sorted_walk(sample_path): 664 if '.svn' in dirs: 665 dirs.remove('.svn') 666 for file in files: 667 # Absolute path to the file to be added. 668 abspath = os.path.realpath(os.path.join(root, file)) 669 # Relative path to store the file in under the zip. 670 relpath = sample_dirname + abspath.replace(sample_path, "") 671 672 zip_file.write(abspath, relpath) 673 if file == 'manifest.json': 674 info = zip_file.getinfo(zip_manifest_path) 675 info.comment = self['source_hash'] 676 except RuntimeError, msg: 677 raise Exception("Could not write zip at %s: %s" % (zip_path, msg)) 678 finally: 679 zip_file.close() 680 681 return self._get_relative_zip_path() 682