Home | History | Annotate | Download | only in closure_linter
      1 #!/usr/bin/env python
      2 #
      3 # Copyright 2008 The Closure Linter Authors. All Rights Reserved.
      4 #
      5 # Licensed under the Apache License, Version 2.0 (the "License");
      6 # you may not use this file except in compliance with the License.
      7 # You may obtain a copy of the License at
      8 #
      9 #      http://www.apache.org/licenses/LICENSE-2.0
     10 #
     11 # Unless required by applicable law or agreed to in writing, software
     12 # distributed under the License is distributed on an "AS-IS" BASIS,
     13 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 # See the License for the specific language governing permissions and
     15 # limitations under the License.
     16 
     17 """Logic for computing dependency information for closurized JavaScript files.
     18 
     19 Closurized JavaScript files express dependencies using goog.require and
     20 goog.provide statements. In order for the linter to detect when a statement is
     21 missing or unnecessary, all identifiers in the JavaScript file must first be
     22 processed to determine if they constitute the creation or usage of a dependency.
     23 """
     24 
     25 
     26 
     27 import re
     28 
     29 from closure_linter import javascripttokens
     30 from closure_linter import tokenutil
     31 
     32 # pylint: disable=g-bad-name
     33 TokenType = javascripttokens.JavaScriptTokenType
     34 
     35 DEFAULT_EXTRA_NAMESPACES = [
     36     'goog.testing.asserts',
     37     'goog.testing.jsunit',
     38 ]
     39 
     40 
     41 class UsedNamespace(object):
     42   """A type for information about a used namespace."""
     43 
     44   def __init__(self, namespace, identifier, token, alias_definition):
     45     """Initializes the instance.
     46 
     47     Args:
     48       namespace: the namespace of an identifier used in the file
     49       identifier: the complete identifier
     50       token: the token that uses the namespace
     51       alias_definition: a boolean stating whether the namespace is only to used
     52           for an alias definition and should not be required.
     53     """
     54     self.namespace = namespace
     55     self.identifier = identifier
     56     self.token = token
     57     self.alias_definition = alias_definition
     58 
     59   def GetLine(self):
     60     return self.token.line_number
     61 
     62   def __repr__(self):
     63     return 'UsedNamespace(%s)' % ', '.join(
     64         ['%s=%s' % (k, repr(v)) for k, v in self.__dict__.iteritems()])
     65 
     66 
     67 class ClosurizedNamespacesInfo(object):
     68   """Dependency information for closurized JavaScript files.
     69 
     70   Processes token streams for dependency creation or usage and provides logic
     71   for determining if a given require or provide statement is unnecessary or if
     72   there are missing require or provide statements.
     73   """
     74 
     75   def __init__(self, closurized_namespaces, ignored_extra_namespaces):
     76     """Initializes an instance the ClosurizedNamespacesInfo class.
     77 
     78     Args:
     79       closurized_namespaces: A list of namespace prefixes that should be
     80           processed for dependency information. Non-matching namespaces are
     81           ignored.
     82       ignored_extra_namespaces: A list of namespaces that should not be reported
     83           as extra regardless of whether they are actually used.
     84     """
     85     self._closurized_namespaces = closurized_namespaces
     86     self._ignored_extra_namespaces = (ignored_extra_namespaces +
     87                                       DEFAULT_EXTRA_NAMESPACES)
     88     self.Reset()
     89 
     90   def Reset(self):
     91     """Resets the internal state to prepare for processing a new file."""
     92 
     93     # A list of goog.provide tokens in the order they appeared in the file.
     94     self._provide_tokens = []
     95 
     96     # A list of goog.require tokens in the order they appeared in the file.
     97     self._require_tokens = []
     98 
     99     # Namespaces that are already goog.provided.
    100     self._provided_namespaces = []
    101 
    102     # Namespaces that are already goog.required.
    103     self._required_namespaces = []
    104 
    105     # Note that created_namespaces and used_namespaces contain both namespaces
    106     # and identifiers because there are many existing cases where a method or
    107     # constant is provided directly instead of its namespace. Ideally, these
    108     # two lists would only have to contain namespaces.
    109 
    110     # A list of tuples where the first element is the namespace of an identifier
    111     # created in the file, the second is the identifier itself and the third is
    112     # the line number where it's created.
    113     self._created_namespaces = []
    114 
    115     # A list of UsedNamespace instances.
    116     self._used_namespaces = []
    117 
    118     # A list of seemingly-unnecessary namespaces that are goog.required() and
    119     # annotated with @suppress {extraRequire}.
    120     self._suppressed_requires = []
    121 
    122     # A list of goog.provide tokens which are duplicates.
    123     self._duplicate_provide_tokens = []
    124 
    125     # A list of goog.require tokens which are duplicates.
    126     self._duplicate_require_tokens = []
    127 
    128     # Whether this file is in a goog.scope. Someday, we may add support
    129     # for checking scopified namespaces, but for now let's just fail
    130     # in a more reasonable way.
    131     self._scopified_file = False
    132 
    133     # TODO(user): Handle the case where there are 2 different requires
    134     # that can satisfy the same dependency, but only one is necessary.
    135 
    136   def GetProvidedNamespaces(self):
    137     """Returns the namespaces which are already provided by this file.
    138 
    139     Returns:
    140       A list of strings where each string is a 'namespace' corresponding to an
    141       existing goog.provide statement in the file being checked.
    142     """
    143     return set(self._provided_namespaces)
    144 
    145   def GetRequiredNamespaces(self):
    146     """Returns the namespaces which are already required by this file.
    147 
    148     Returns:
    149       A list of strings where each string is a 'namespace' corresponding to an
    150       existing goog.require statement in the file being checked.
    151     """
    152     return set(self._required_namespaces)
    153 
    154   def IsExtraProvide(self, token):
    155     """Returns whether the given goog.provide token is unnecessary.
    156 
    157     Args:
    158       token: A goog.provide token.
    159 
    160     Returns:
    161       True if the given token corresponds to an unnecessary goog.provide
    162       statement, otherwise False.
    163     """
    164     namespace = tokenutil.GetStringAfterToken(token)
    165 
    166     if self.GetClosurizedNamespace(namespace) is None:
    167       return False
    168 
    169     if token in self._duplicate_provide_tokens:
    170       return True
    171 
    172     # TODO(user): There's probably a faster way to compute this.
    173     for created_namespace, created_identifier, _ in self._created_namespaces:
    174       if namespace == created_namespace or namespace == created_identifier:
    175         return False
    176 
    177     return True
    178 
    179   def IsExtraRequire(self, token):
    180     """Returns whether the given goog.require token is unnecessary.
    181 
    182     Args:
    183       token: A goog.require token.
    184 
    185     Returns:
    186       True if the given token corresponds to an unnecessary goog.require
    187       statement, otherwise False.
    188     """
    189     namespace = tokenutil.GetStringAfterToken(token)
    190 
    191     if self.GetClosurizedNamespace(namespace) is None:
    192       return False
    193 
    194     if namespace in self._ignored_extra_namespaces:
    195       return False
    196 
    197     if token in self._duplicate_require_tokens:
    198       return True
    199 
    200     if namespace in self._suppressed_requires:
    201       return False
    202 
    203     # If the namespace contains a component that is initial caps, then that
    204     # must be the last component of the namespace.
    205     parts = namespace.split('.')
    206     if len(parts) > 1 and parts[-2][0].isupper():
    207       return True
    208 
    209     # TODO(user): There's probably a faster way to compute this.
    210     for ns in self._used_namespaces:
    211       if (not ns.alias_definition and (
    212           namespace == ns.namespace or namespace == ns.identifier)):
    213         return False
    214 
    215     return True
    216 
    217   def GetMissingProvides(self):
    218     """Returns the dict of missing provided namespaces for the current file.
    219 
    220     Returns:
    221       Returns a dictionary of key as string and value as integer where each
    222       string(key) is a namespace that should be provided by this file, but is
    223       not and integer(value) is first line number where it's defined.
    224     """
    225     missing_provides = dict()
    226     for namespace, identifier, line_number in self._created_namespaces:
    227       if (not self._IsPrivateIdentifier(identifier) and
    228           namespace not in self._provided_namespaces and
    229           identifier not in self._provided_namespaces and
    230           namespace not in self._required_namespaces and
    231           namespace not in missing_provides):
    232         missing_provides[namespace] = line_number
    233 
    234     return missing_provides
    235 
    236   def GetMissingRequires(self):
    237     """Returns the dict of missing required namespaces for the current file.
    238 
    239     For each non-private identifier used in the file, find either a
    240     goog.require, goog.provide or a created identifier that satisfies it.
    241     goog.require statements can satisfy the identifier by requiring either the
    242     namespace of the identifier or the identifier itself. goog.provide
    243     statements can satisfy the identifier by providing the namespace of the
    244     identifier. A created identifier can only satisfy the used identifier if
    245     it matches it exactly (necessary since things can be defined on a
    246     namespace in more than one file). Note that provided namespaces should be
    247     a subset of created namespaces, but we check both because in some cases we
    248     can't always detect the creation of the namespace.
    249 
    250     Returns:
    251       Returns a dictionary of key as string and value integer where each
    252       string(key) is a namespace that should be required by this file, but is
    253       not and integer(value) is first line number where it's used.
    254     """
    255     external_dependencies = set(self._required_namespaces)
    256 
    257     # Assume goog namespace is always available.
    258     external_dependencies.add('goog')
    259     # goog.module is treated as a builtin, too (for goog.module.get).
    260     external_dependencies.add('goog.module')
    261 
    262     created_identifiers = set()
    263     for unused_namespace, identifier, unused_line_number in (
    264         self._created_namespaces):
    265       created_identifiers.add(identifier)
    266 
    267     missing_requires = dict()
    268     illegal_alias_statements = dict()
    269 
    270     def ShouldRequireNamespace(namespace, identifier):
    271       """Checks if a namespace would normally be required."""
    272       return (
    273           not self._IsPrivateIdentifier(identifier) and
    274           namespace not in external_dependencies and
    275           namespace not in self._provided_namespaces and
    276           identifier not in external_dependencies and
    277           identifier not in created_identifiers and
    278           namespace not in missing_requires)
    279 
    280     # First check all the used identifiers where we know that their namespace
    281     # needs to be provided (unless they are optional).
    282     for ns in self._used_namespaces:
    283       namespace = ns.namespace
    284       identifier = ns.identifier
    285       if (not ns.alias_definition and
    286           ShouldRequireNamespace(namespace, identifier)):
    287         missing_requires[namespace] = ns.GetLine()
    288 
    289     # Now that all required namespaces are known, we can check if the alias
    290     # definitions (that are likely being used for typeannotations that don't
    291     # need explicit goog.require statements) are already covered. If not
    292     # the user shouldn't use the alias.
    293     for ns in self._used_namespaces:
    294       if (not ns.alias_definition or
    295           not ShouldRequireNamespace(ns.namespace, ns.identifier)):
    296         continue
    297       if self._FindNamespace(ns.identifier, self._provided_namespaces,
    298                              created_identifiers, external_dependencies,
    299                              missing_requires):
    300         continue
    301       namespace = ns.identifier.rsplit('.', 1)[0]
    302       illegal_alias_statements[namespace] = ns.token
    303 
    304     return missing_requires, illegal_alias_statements
    305 
    306   def _FindNamespace(self, identifier, *namespaces_list):
    307     """Finds the namespace of an identifier given a list of other namespaces.
    308 
    309     Args:
    310       identifier: An identifier whose parent needs to be defined.
    311           e.g. for goog.bar.foo we search something that provides
    312           goog.bar.
    313       *namespaces_list: var args of iterables of namespace identifiers
    314     Returns:
    315       The namespace that the given identifier is part of or None.
    316     """
    317     identifier = identifier.rsplit('.', 1)[0]
    318     identifier_prefix = identifier + '.'
    319     for namespaces in namespaces_list:
    320       for namespace in namespaces:
    321         if namespace == identifier or namespace.startswith(identifier_prefix):
    322           return namespace
    323     return None
    324 
    325   def _IsPrivateIdentifier(self, identifier):
    326     """Returns whether the given identifier is private."""
    327     pieces = identifier.split('.')
    328     for piece in pieces:
    329       if piece.endswith('_'):
    330         return True
    331     return False
    332 
    333   def IsFirstProvide(self, token):
    334     """Returns whether token is the first provide token."""
    335     return self._provide_tokens and token == self._provide_tokens[0]
    336 
    337   def IsFirstRequire(self, token):
    338     """Returns whether token is the first require token."""
    339     return self._require_tokens and token == self._require_tokens[0]
    340 
    341   def IsLastProvide(self, token):
    342     """Returns whether token is the last provide token."""
    343     return self._provide_tokens and token == self._provide_tokens[-1]
    344 
    345   def IsLastRequire(self, token):
    346     """Returns whether token is the last require token."""
    347     return self._require_tokens and token == self._require_tokens[-1]
    348 
    349   def ProcessToken(self, token, state_tracker):
    350     """Processes the given token for dependency information.
    351 
    352     Args:
    353       token: The token to process.
    354       state_tracker: The JavaScript state tracker.
    355     """
    356 
    357     # Note that this method is in the critical path for the linter and has been
    358     # optimized for performance in the following ways:
    359     # - Tokens are checked by type first to minimize the number of function
    360     #   calls necessary to determine if action needs to be taken for the token.
    361     # - The most common tokens types are checked for first.
    362     # - The number of function calls has been minimized (thus the length of this
    363     #   function.
    364 
    365     if token.type == TokenType.IDENTIFIER:
    366       # TODO(user): Consider saving the whole identifier in metadata.
    367       whole_identifier_string = tokenutil.GetIdentifierForToken(token)
    368       if whole_identifier_string is None:
    369         # We only want to process the identifier one time. If the whole string
    370         # identifier is None, that means this token was part of a multi-token
    371         # identifier, but it was not the first token of the identifier.
    372         return
    373 
    374       # In the odd case that a goog.require is encountered inside a function,
    375       # just ignore it (e.g. dynamic loading in test runners).
    376       if token.string == 'goog.require' and not state_tracker.InFunction():
    377         self._require_tokens.append(token)
    378         namespace = tokenutil.GetStringAfterToken(token)
    379         if namespace in self._required_namespaces:
    380           self._duplicate_require_tokens.append(token)
    381         else:
    382           self._required_namespaces.append(namespace)
    383 
    384         # If there is a suppression for the require, add a usage for it so it
    385         # gets treated as a regular goog.require (i.e. still gets sorted).
    386         if self._HasSuppression(state_tracker, 'extraRequire'):
    387           self._suppressed_requires.append(namespace)
    388           self._AddUsedNamespace(state_tracker, namespace, token)
    389 
    390       elif token.string == 'goog.provide':
    391         self._provide_tokens.append(token)
    392         namespace = tokenutil.GetStringAfterToken(token)
    393         if namespace in self._provided_namespaces:
    394           self._duplicate_provide_tokens.append(token)
    395         else:
    396           self._provided_namespaces.append(namespace)
    397 
    398         # If there is a suppression for the provide, add a creation for it so it
    399         # gets treated as a regular goog.provide (i.e. still gets sorted).
    400         if self._HasSuppression(state_tracker, 'extraProvide'):
    401           self._AddCreatedNamespace(state_tracker, namespace, token.line_number)
    402 
    403       elif token.string == 'goog.scope':
    404         self._scopified_file = True
    405 
    406       elif token.string == 'goog.setTestOnly':
    407 
    408         # Since the message is optional, we don't want to scan to later lines.
    409         for t in tokenutil.GetAllTokensInSameLine(token):
    410           if t.type == TokenType.STRING_TEXT:
    411             message = t.string
    412 
    413             if re.match(r'^\w+(\.\w+)+$', message):
    414               # This looks like a namespace. If it's a Closurized namespace,
    415               # consider it created.
    416               base_namespace = message.split('.', 1)[0]
    417               if base_namespace in self._closurized_namespaces:
    418                 self._AddCreatedNamespace(state_tracker, message,
    419                                           token.line_number)
    420 
    421             break
    422       else:
    423         jsdoc = state_tracker.GetDocComment()
    424         if token.metadata and token.metadata.aliased_symbol:
    425           whole_identifier_string = token.metadata.aliased_symbol
    426         elif (token.string == 'goog.module.get' and
    427               not self._HasSuppression(state_tracker, 'extraRequire')):
    428           # Cannot use _AddUsedNamespace as this is not an identifier, but
    429           # already the entire namespace that's required.
    430           namespace = tokenutil.GetStringAfterToken(token)
    431           namespace = UsedNamespace(namespace, namespace, token,
    432                                     alias_definition=False)
    433           self._used_namespaces.append(namespace)
    434         if jsdoc and jsdoc.HasFlag('typedef'):
    435           self._AddCreatedNamespace(state_tracker, whole_identifier_string,
    436                                     token.line_number,
    437                                     namespace=self.GetClosurizedNamespace(
    438                                         whole_identifier_string))
    439         else:
    440           is_alias_definition = (token.metadata and
    441                                  token.metadata.is_alias_definition)
    442           self._AddUsedNamespace(state_tracker, whole_identifier_string,
    443                                  token, is_alias_definition)
    444 
    445     elif token.type == TokenType.SIMPLE_LVALUE:
    446       identifier = token.values['identifier']
    447       start_token = tokenutil.GetIdentifierStart(token)
    448       if start_token and start_token != token:
    449         # Multi-line identifier being assigned. Get the whole identifier.
    450         identifier = tokenutil.GetIdentifierForToken(start_token)
    451       else:
    452         start_token = token
    453       # If an alias is defined on the start_token, use it instead.
    454       if (start_token and
    455           start_token.metadata and
    456           start_token.metadata.aliased_symbol and
    457           not start_token.metadata.is_alias_definition):
    458         identifier = start_token.metadata.aliased_symbol
    459 
    460       if identifier:
    461         namespace = self.GetClosurizedNamespace(identifier)
    462         if state_tracker.InFunction():
    463           self._AddUsedNamespace(state_tracker, identifier, token)
    464         elif namespace and namespace != 'goog':
    465           self._AddCreatedNamespace(state_tracker, identifier,
    466                                     token.line_number, namespace=namespace)
    467 
    468     elif token.type == TokenType.DOC_FLAG:
    469       flag = token.attached_object
    470       flag_type = flag.flag_type
    471       if flag and flag.HasType() and flag.jstype:
    472         is_interface = state_tracker.GetDocComment().HasFlag('interface')
    473         if flag_type == 'implements' or (flag_type == 'extends'
    474                                          and is_interface):
    475           identifier = flag.jstype.alias or flag.jstype.identifier
    476           self._AddUsedNamespace(state_tracker, identifier, token)
    477           # Since we process doctypes only for implements and extends, the
    478           # type is a simple one and we don't need any iteration for subtypes.
    479 
    480   def _AddCreatedNamespace(self, state_tracker, identifier, line_number,
    481                            namespace=None):
    482     """Adds the namespace of an identifier to the list of created namespaces.
    483 
    484     If the identifier is annotated with a 'missingProvide' suppression, it is
    485     not added.
    486 
    487     Args:
    488       state_tracker: The JavaScriptStateTracker instance.
    489       identifier: The identifier to add.
    490       line_number: Line number where namespace is created.
    491       namespace: The namespace of the identifier or None if the identifier is
    492           also the namespace.
    493     """
    494     if not namespace:
    495       namespace = identifier
    496 
    497     if self._HasSuppression(state_tracker, 'missingProvide'):
    498       return
    499 
    500     self._created_namespaces.append([namespace, identifier, line_number])
    501 
    502   def _AddUsedNamespace(self, state_tracker, identifier, token,
    503                         is_alias_definition=False):
    504     """Adds the namespace of an identifier to the list of used namespaces.
    505 
    506     If the identifier is annotated with a 'missingRequire' suppression, it is
    507     not added.
    508 
    509     Args:
    510       state_tracker: The JavaScriptStateTracker instance.
    511       identifier: An identifier which has been used.
    512       token: The token in which the namespace is used.
    513       is_alias_definition: If the used namespace is part of an alias_definition.
    514           Aliased symbols need their parent namespace to be available, if it is
    515           not yet required through another symbol, an error will be thrown.
    516     """
    517     if self._HasSuppression(state_tracker, 'missingRequire'):
    518       return
    519 
    520     identifier = self._GetUsedIdentifier(identifier)
    521     namespace = self.GetClosurizedNamespace(identifier)
    522     # b/5362203 If its a variable in scope then its not a required namespace.
    523     if namespace and not state_tracker.IsVariableInScope(namespace):
    524       namespace = UsedNamespace(namespace, identifier, token,
    525                                 is_alias_definition)
    526       self._used_namespaces.append(namespace)
    527 
    528   def _HasSuppression(self, state_tracker, suppression):
    529     jsdoc = state_tracker.GetDocComment()
    530     return jsdoc and suppression in jsdoc.suppressions
    531 
    532   def _GetUsedIdentifier(self, identifier):
    533     """Strips apply/call/inherit calls from the identifier."""
    534     for suffix in ('.apply', '.call', '.inherit'):
    535       if identifier.endswith(suffix):
    536         return identifier[:-len(suffix)]
    537     return identifier
    538 
    539   def GetClosurizedNamespace(self, identifier):
    540     """Given an identifier, returns the namespace that identifier is from.
    541 
    542     Args:
    543       identifier: The identifier to extract a namespace from.
    544 
    545     Returns:
    546       The namespace the given identifier resides in, or None if one could not
    547       be found.
    548     """
    549     if identifier.startswith('goog.global'):
    550       # Ignore goog.global, since it is, by definition, global.
    551       return None
    552 
    553     parts = identifier.split('.')
    554     for namespace in self._closurized_namespaces:
    555       if not identifier.startswith(namespace + '.'):
    556         continue
    557 
    558       # The namespace for a class is the shortest prefix ending in a class
    559       # name, which starts with a capital letter but is not a capitalized word.
    560       #
    561       # We ultimately do not want to allow requiring or providing of inner
    562       # classes/enums.  Instead, a file should provide only the top-level class
    563       # and users should require only that.
    564       namespace = []
    565       for part in parts:
    566         if part == 'prototype' or part.isupper():
    567           return '.'.join(namespace)
    568         namespace.append(part)
    569         if part[0].isupper():
    570           return '.'.join(namespace)
    571 
    572       # At this point, we know there's no class or enum, so the namespace is
    573       # just the identifier with the last part removed. With the exception of
    574       # apply, inherits, and call, which should also be stripped.
    575       if parts[-1] in ('apply', 'inherits', 'call'):
    576         parts.pop()
    577       parts.pop()
    578 
    579       # If the last part ends with an underscore, it is a private variable,
    580       # method, or enum. The namespace is whatever is before it.
    581       if parts and parts[-1].endswith('_'):
    582         parts.pop()
    583 
    584       return '.'.join(parts)
    585 
    586     return None
    587