Home | History | Annotate | Download | only in closure_linter
      1 #!/usr/bin/env python
      2 #
      3 # Copyright 2008 The Closure Linter Authors. All Rights Reserved.
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS-IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 """Logic for computing dependency information for closurized JavaScript files.
     18 
     19 Closurized JavaScript files express dependencies using goog.require and
     20 goog.provide statements. In order for the linter to detect when a statement is
     21 missing or unnecessary, all identifiers in the JavaScript file must first be
     22 processed to determine if they constitute the creation or usage of a dependency.
     23 """
     24 
     25 
     26 
     27 from closure_linter import javascripttokens
     28 from closure_linter import tokenutil
     29 
     30 # pylint: disable-msg=C6409
     31 TokenType = javascripttokens.JavaScriptTokenType
     32 
     33 DEFAULT_EXTRA_NAMESPACES = [
     34   'goog.testing.asserts',
     35   'goog.testing.jsunit',
     36 ]
     37 
     38 class ClosurizedNamespacesInfo(object):
     39   """Dependency information for closurized JavaScript files.
     40 
     41   Processes token streams for dependency creation or usage and provides logic
     42   for determining if a given require or provide statement is unnecessary or if
     43   there are missing require or provide statements.
     44   """
     45 
     46   def __init__(self, closurized_namespaces, ignored_extra_namespaces):
     47     """Initializes an instance the ClosurizedNamespacesInfo class.
     48 
     49     Args:
     50       closurized_namespaces: A list of namespace prefixes that should be
     51           processed for dependency information. Non-matching namespaces are
     52           ignored.
     53       ignored_extra_namespaces: A list of namespaces that should not be reported
     54           as extra regardless of whether they are actually used.
     55     """
     56     self._closurized_namespaces = closurized_namespaces
     57     self._ignored_extra_namespaces = (ignored_extra_namespaces +
     58                                       DEFAULT_EXTRA_NAMESPACES)
     59     self.Reset()
     60 
     61   def Reset(self):
     62     """Resets the internal state to prepare for processing a new file."""
     63 
     64     # A list of goog.provide tokens in the order they appeared in the file.
     65     self._provide_tokens = []
     66 
     67     # A list of goog.require tokens in the order they appeared in the file.
     68     self._require_tokens = []
     69 
     70     # Namespaces that are already goog.provided.
     71     self._provided_namespaces = []
     72 
     73     # Namespaces that are already goog.required.
     74     self._required_namespaces = []
     75 
     76     # Note that created_namespaces and used_namespaces contain both namespaces
     77     # and identifiers because there are many existing cases where a method or
     78     # constant is provided directly instead of its namespace. Ideally, these
     79     # two lists would only have to contain namespaces.
     80 
     81     # A list of tuples where the first element is the namespace of an identifier
     82     # created in the file and the second is the identifier itself.
     83     self._created_namespaces = []
     84 
     85     # A list of tuples where the first element is the namespace of an identifier
     86     # used in the file and the second is the identifier itself.
     87     self._used_namespaces = []
     88 
     89     # A list of seemingly-unnecessary namespaces that are goog.required() and
     90     # annotated with @suppress {extraRequire}.
     91     self._suppressed_requires = []
     92 
     93     # A list of goog.provide tokens which are duplicates.
     94     self._duplicate_provide_tokens = []
     95 
     96     # A list of goog.require tokens which are duplicates.
     97     self._duplicate_require_tokens = []
     98 
     99     # Whether this file is in a goog.scope. Someday, we may add support
    100     # for checking scopified namespaces, but for now let's just fail
    101     # in a more reasonable way.
    102     self._scopified_file = False
    103 
    104     # TODO(user): Handle the case where there are 2 different requires
    105     # that can satisfy the same dependency, but only one is necessary.
    106 
    107   def GetProvidedNamespaces(self):
    108     """Returns the namespaces which are already provided by this file.
    109 
    110     Returns:
    111       A list of strings where each string is a 'namespace' corresponding to an
    112       existing goog.provide statement in the file being checked.
    113     """
    114     return list(self._provided_namespaces)
    115 
    116   def GetRequiredNamespaces(self):
    117     """Returns the namespaces which are already required by this file.
    118 
    119     Returns:
    120       A list of strings where each string is a 'namespace' corresponding to an
    121       existing goog.require statement in the file being checked.
    122     """
    123     return list(self._required_namespaces)
    124 
    125   def IsExtraProvide(self, token):
    126     """Returns whether the given goog.provide token is unnecessary.
    127 
    128     Args:
    129       token: A goog.provide token.
    130 
    131     Returns:
    132       True if the given token corresponds to an unnecessary goog.provide
    133       statement, otherwise False.
    134     """
    135     if self._scopified_file:
    136       return False
    137 
    138     namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string
    139 
    140     base_namespace = namespace.split('.', 1)[0]
    141     if base_namespace not in self._closurized_namespaces:
    142       return False
    143 
    144     if token in self._duplicate_provide_tokens:
    145       return True
    146 
    147     # TODO(user): There's probably a faster way to compute this.
    148     for created_namespace, created_identifier in self._created_namespaces:
    149       if namespace == created_namespace or namespace == created_identifier:
    150         return False
    151 
    152     return True
    153 
    154   def IsExtraRequire(self, token):
    155     """Returns whether the given goog.require token is unnecessary.
    156 
    157     Args:
    158       token: A goog.require token.
    159 
    160     Returns:
    161       True if the given token corresponds to an unnecessary goog.require
    162       statement, otherwise False.
    163     """
    164     if self._scopified_file:
    165       return False
    166 
    167     namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string
    168 
    169     base_namespace = namespace.split('.', 1)[0]
    170     if base_namespace not in self._closurized_namespaces:
    171       return False
    172 
    173     if namespace in self._ignored_extra_namespaces:
    174       return False
    175 
    176     if token in self._duplicate_require_tokens:
    177       return True
    178 
    179     if namespace in self._suppressed_requires:
    180       return False
    181 
    182     # If the namespace contains a component that is initial caps, then that
    183     # must be the last component of the namespace.
    184     parts = namespace.split('.')
    185     if len(parts) > 1 and parts[-2][0].isupper():
    186       return True
    187 
    188     # TODO(user): There's probably a faster way to compute this.
    189     for used_namespace, used_identifier in self._used_namespaces:
    190       if namespace == used_namespace or namespace == used_identifier:
    191         return False
    192 
    193     return True
    194 
    195   def GetMissingProvides(self):
    196     """Returns the set of missing provided namespaces for the current file.
    197 
    198     Returns:
    199       Returns a set of strings where each string is a namespace that should be
    200       provided by this file, but is not.
    201     """
    202     if self._scopified_file:
    203       return set()
    204 
    205     missing_provides = set()
    206     for namespace, identifier in self._created_namespaces:
    207       if (not self._IsPrivateIdentifier(identifier) and
    208           namespace not in self._provided_namespaces and
    209           identifier not in self._provided_namespaces and
    210           namespace not in self._required_namespaces):
    211         missing_provides.add(namespace)
    212 
    213     return missing_provides
    214 
    215   def GetMissingRequires(self):
    216     """Returns the set of missing required namespaces for the current file.
    217 
    218     For each non-private identifier used in the file, find either a
    219     goog.require, goog.provide or a created identifier that satisfies it.
    220     goog.require statements can satisfy the identifier by requiring either the
    221     namespace of the identifier or the identifier itself. goog.provide
    222     statements can satisfy the identifier by providing the namespace of the
    223     identifier. A created identifier can only satisfy the used identifier if
    224     it matches it exactly (necessary since things can be defined on a
    225     namespace in more than one file). Note that provided namespaces should be
    226     a subset of created namespaces, but we check both because in some cases we
    227     can't always detect the creation of the namespace.
    228 
    229     Returns:
    230       Returns a set of strings where each string is a namespace that should be
    231       required by this file, but is not.
    232     """
    233     if self._scopified_file:
    234       return set()
    235 
    236     external_dependencies = set(self._required_namespaces)
    237 
    238     # Assume goog namespace is always available.
    239     external_dependencies.add('goog')
    240 
    241     created_identifiers = set()
    242     for namespace, identifier in self._created_namespaces:
    243       created_identifiers.add(identifier)
    244 
    245     missing_requires = set()
    246     for namespace, identifier in self._used_namespaces:
    247       if (not self._IsPrivateIdentifier(identifier) and
    248           namespace not in external_dependencies and
    249           namespace not in self._provided_namespaces and
    250           identifier not in external_dependencies and
    251           identifier not in created_identifiers):
    252         missing_requires.add(namespace)
    253 
    254     return missing_requires
    255 
    256   def _IsPrivateIdentifier(self, identifier):
    257     """Returns whether the given identifer is private."""
    258     pieces = identifier.split('.')
    259     for piece in pieces:
    260       if piece.endswith('_'):
    261         return True
    262     return False
    263 
    264   def IsFirstProvide(self, token):
    265     """Returns whether token is the first provide token."""
    266     return self._provide_tokens and token == self._provide_tokens[0]
    267 
    268   def IsFirstRequire(self, token):
    269     """Returns whether token is the first require token."""
    270     return self._require_tokens and token == self._require_tokens[0]
    271 
    272   def IsLastProvide(self, token):
    273     """Returns whether token is the last provide token."""
    274     return self._provide_tokens and token == self._provide_tokens[-1]
    275 
    276   def IsLastRequire(self, token):
    277     """Returns whether token is the last require token."""
    278     return self._require_tokens and token == self._require_tokens[-1]
    279 
    280   def ProcessToken(self, token, state_tracker):
    281     """Processes the given token for dependency information.
    282 
    283     Args:
    284       token: The token to process.
    285       state_tracker: The JavaScript state tracker.
    286     """
    287 
    288     # Note that this method is in the critical path for the linter and has been
    289     # optimized for performance in the following ways:
    290     # - Tokens are checked by type first to minimize the number of function
    291     #   calls necessary to determine if action needs to be taken for the token.
    292     # - The most common tokens types are checked for first.
    293     # - The number of function calls has been minimized (thus the length of this
    294     #   function.
    295 
    296     if token.type == TokenType.IDENTIFIER:
    297       # TODO(user): Consider saving the whole identifier in metadata.
    298       whole_identifier_string = self._GetWholeIdentifierString(token)
    299       if whole_identifier_string is None:
    300         # We only want to process the identifier one time. If the whole string
    301         # identifier is None, that means this token was part of a multi-token
    302         # identifier, but it was not the first token of the identifier.
    303         return
    304 
    305       # In the odd case that a goog.require is encountered inside a function,
    306       # just ignore it (e.g. dynamic loading in test runners).
    307       if token.string == 'goog.require' and not state_tracker.InFunction():
    308         self._require_tokens.append(token)
    309         namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string
    310         if namespace in self._required_namespaces:
    311           self._duplicate_require_tokens.append(token)
    312         else:
    313           self._required_namespaces.append(namespace)
    314 
    315         # If there is a suppression for the require, add a usage for it so it
    316         # gets treated as a regular goog.require (i.e. still gets sorted).
    317         jsdoc = state_tracker.GetDocComment()
    318         if jsdoc and ('extraRequire' in jsdoc.suppressions):
    319           self._suppressed_requires.append(namespace)
    320           self._AddUsedNamespace(state_tracker, namespace)
    321 
    322       elif token.string == 'goog.provide':
    323         self._provide_tokens.append(token)
    324         namespace = tokenutil.Search(token, TokenType.STRING_TEXT).string
    325         if namespace in self._provided_namespaces:
    326           self._duplicate_provide_tokens.append(token)
    327         else:
    328           self._provided_namespaces.append(namespace)
    329 
    330         # If there is a suppression for the provide, add a creation for it so it
    331         # gets treated as a regular goog.provide (i.e. still gets sorted).
    332         jsdoc = state_tracker.GetDocComment()
    333         if jsdoc and ('extraProvide' in jsdoc.suppressions):
    334           self._AddCreatedNamespace(state_tracker, namespace)
    335 
    336       elif token.string == 'goog.scope':
    337         self._scopified_file = True
    338 
    339       else:
    340         jsdoc = state_tracker.GetDocComment()
    341         if jsdoc and jsdoc.HasFlag('typedef'):
    342           self._AddCreatedNamespace(state_tracker, whole_identifier_string,
    343                                     self.GetClosurizedNamespace(
    344                                         whole_identifier_string))
    345         else:
    346           self._AddUsedNamespace(state_tracker, whole_identifier_string)
    347 
    348     elif token.type == TokenType.SIMPLE_LVALUE:
    349       identifier = token.values['identifier']
    350       namespace = self.GetClosurizedNamespace(identifier)
    351       if state_tracker.InFunction():
    352         self._AddUsedNamespace(state_tracker, identifier)
    353       elif namespace and namespace != 'goog':
    354         self._AddCreatedNamespace(state_tracker, identifier, namespace)
    355 
    356     elif token.type == TokenType.DOC_FLAG:
    357       flag_type = token.attached_object.flag_type
    358       is_interface = state_tracker.GetDocComment().HasFlag('interface')
    359       if flag_type == 'implements' or (flag_type == 'extends' and is_interface):
    360         # Interfaces should be goog.require'd.
    361         doc_start = tokenutil.Search(token, TokenType.DOC_START_BRACE)
    362         interface = tokenutil.Search(doc_start, TokenType.COMMENT)
    363         self._AddUsedNamespace(state_tracker, interface.string)
    364 
    365 
    366   def _GetWholeIdentifierString(self, token):
    367     """Returns the whole identifier string for the given token.
    368 
    369     Checks the tokens after the current one to see if the token is one in a
    370     sequence of tokens which are actually just one identifier (i.e. a line was
    371     wrapped in the middle of an identifier).
    372 
    373     Args:
    374       token: The token to check.
    375 
    376     Returns:
    377       The whole identifier string or None if this token is not the first token
    378       in a multi-token identifier.
    379     """
    380     result = ''
    381 
    382     # Search backward to determine if this token is the first token of the
    383     # identifier. If it is not the first token, return None to signal that this
    384     # token should be ignored.
    385     prev_token = token.previous
    386     while prev_token:
    387       if (prev_token.IsType(TokenType.IDENTIFIER) or
    388           prev_token.IsType(TokenType.NORMAL) and prev_token.string == '.'):
    389         return None
    390       elif (not prev_token.IsType(TokenType.WHITESPACE) and
    391             not prev_token.IsAnyType(TokenType.COMMENT_TYPES)):
    392         break
    393       prev_token = prev_token.previous
    394 
    395     # Search forward to find other parts of this identifier separated by white
    396     # space.
    397     next_token = token
    398     while next_token:
    399       if (next_token.IsType(TokenType.IDENTIFIER) or
    400           next_token.IsType(TokenType.NORMAL) and next_token.string == '.'):
    401         result += next_token.string
    402       elif (not next_token.IsType(TokenType.WHITESPACE) and
    403             not next_token.IsAnyType(TokenType.COMMENT_TYPES)):
    404         break
    405       next_token = next_token.next
    406 
    407     return result
    408 
    409   def _AddCreatedNamespace(self, state_tracker, identifier, namespace=None):
    410     """Adds the namespace of an identifier to the list of created namespaces.
    411 
    412     If the identifier is annotated with a 'missingProvide' suppression, it is
    413     not added.
    414 
    415     Args:
    416       state_tracker: The JavaScriptStateTracker instance.
    417       identifier: The identifier to add.
    418       namespace: The namespace of the identifier or None if the identifier is
    419           also the namespace.
    420     """
    421     if not namespace:
    422       namespace = identifier
    423 
    424     jsdoc = state_tracker.GetDocComment()
    425     if jsdoc and 'missingProvide' in jsdoc.suppressions:
    426       return
    427 
    428     self._created_namespaces.append([namespace, identifier])
    429 
    430   def _AddUsedNamespace(self, state_tracker, identifier):
    431     """Adds the namespace of an identifier to the list of used namespaces.
    432 
    433     If the identifier is annotated with a 'missingRequire' suppression, it is
    434     not added.
    435 
    436     Args:
    437       state_tracker: The JavaScriptStateTracker instance.
    438       identifier: An identifier which has been used.
    439     """
    440     jsdoc = state_tracker.GetDocComment()
    441     if jsdoc and 'missingRequire' in jsdoc.suppressions:
    442       return
    443 
    444     namespace = self.GetClosurizedNamespace(identifier)
    445     if namespace:
    446       self._used_namespaces.append([namespace, identifier])
    447 
    448   def GetClosurizedNamespace(self, identifier):
    449     """Given an identifier, returns the namespace that identifier is from.
    450 
    451     Args:
    452       identifier: The identifier to extract a namespace from.
    453 
    454     Returns:
    455       The namespace the given identifier resides in, or None if one could not
    456       be found.
    457     """
    458     if identifier.startswith('goog.global'):
    459       # Ignore goog.global, since it is, by definition, global.
    460       return None
    461 
    462     parts = identifier.split('.')
    463     for namespace in self._closurized_namespaces:
    464       if not identifier.startswith(namespace + '.'):
    465         continue
    466 
    467       last_part = parts[-1]
    468       if not last_part:
    469         # TODO(robbyw): Handle this: it's a multi-line identifier.
    470         return None
    471 
    472       # The namespace for a class is the shortest prefix ending in a class
    473       # name, which starts with a capital letter but is not a capitalized word.
    474       #
    475       # We ultimately do not want to allow requiring or providing of inner
    476       # classes/enums.  Instead, a file should provide only the top-level class
    477       # and users should require only that.
    478       namespace = []
    479       for part in parts:
    480         if part == 'prototype' or part.isupper():
    481           return '.'.join(namespace)
    482         namespace.append(part)
    483         if part[0].isupper():
    484           return '.'.join(namespace)
    485 
    486       # At this point, we know there's no class or enum, so the namespace is
    487       # just the identifier with the last part removed. With the exception of
    488       # apply, inherits, and call, which should also be stripped.
    489       if parts[-1] in ('apply', 'inherits', 'call'):
    490         parts.pop()
    491       parts.pop()
    492 
    493       # If the last part ends with an underscore, it is a private variable,
    494       # method, or enum. The namespace is whatever is before it.
    495       if parts and parts[-1].endswith('_'):
    496         parts.pop()
    497 
    498       return '.'.join(parts)
    499 
    500     return None
    501