Home | History | Annotate | Download | only in docs
      1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Generate docs for the TensorFlow Python API."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import argparse
     22 import fnmatch
     23 import os
     24 import sys
     25 
     26 import six
     27 
     28 from tensorflow.python.util import tf_inspect
     29 from tensorflow.tools.common import public_api
     30 from tensorflow.tools.common import traverse
     31 from tensorflow.tools.docs import doc_generator_visitor
     32 from tensorflow.tools.docs import parser
     33 from tensorflow.tools.docs import pretty_docs
     34 from tensorflow.tools.docs import py_guide_parser
     35 
     36 
     37 def _is_free_function(py_object, full_name, index):
     38   """Check if input is a free function (and not a class- or static method)."""
     39   if not tf_inspect.isfunction(py_object):
     40     return False
     41 
     42   # Static methods are functions to tf_inspect (in 2.7), so check if the parent
     43   # is a class. If there is no parent, it's not a function.
     44   if '.' not in full_name:
     45     return False
     46 
     47   parent_name = full_name.rsplit('.', 1)[0]
     48   if tf_inspect.isclass(index[parent_name]):
     49     return False
     50 
     51   return True
     52 
     53 
     54 def write_docs(output_dir, parser_config, yaml_toc, root_title='TensorFlow'):
     55   """Write previously extracted docs to disk.
     56 
     57   Write a docs page for each symbol included in the indices of parser_config to
     58   a tree of docs at `output_dir`.
     59 
     60   Symbols with multiple aliases will have only one page written about
     61   them, which is referenced for all aliases.
     62 
     63   Args:
     64     output_dir: Directory to write documentation markdown files to. Will be
     65       created if it doesn't exist.
     66     parser_config: A `parser.ParserConfig` object, containing all the necessary
     67       indices.
     68     yaml_toc: Set to `True` to generate a "_toc.yaml" file.
     69     root_title: The title name for the root level index.md.
     70 
     71   Raises:
     72     ValueError: if `output_dir` is not an absolute path
     73   """
     74   # Make output_dir.
     75   if not os.path.isabs(output_dir):
     76     raise ValueError("'output_dir' must be an absolute path.\n"
     77                      "    output_dir='%s'" % output_dir)
     78 
     79   try:
     80     if not os.path.exists(output_dir):
     81       os.makedirs(output_dir)
     82   except OSError as e:
     83     print('Creating output dir "%s" failed: %s' % (output_dir, e))
     84     raise
     85 
     86   # These dictionaries are used for table-of-contents generation below
     87   # They will contain, after the for-loop below::
     88   #  - module name(string):classes and functions the module contains(list)
     89   module_children = {}
     90   #  - symbol name(string):pathname (string)
     91   symbol_to_file = {}
     92 
     93   # Parse and write Markdown pages, resolving cross-links (@{symbol}).
     94   for full_name, py_object in six.iteritems(parser_config.index):
     95     parser_config.reference_resolver.current_doc_full_name = full_name
     96 
     97     if full_name in parser_config.duplicate_of:
     98       continue
     99 
    100     # Methods and some routines are documented only as part of their class.
    101     if not (tf_inspect.ismodule(py_object) or tf_inspect.isclass(py_object) or
    102             _is_free_function(py_object, full_name, parser_config.index)):
    103       continue
    104 
    105     sitepath = os.path.join('api_docs/python',
    106                             parser.documentation_path(full_name)[:-3])
    107 
    108     # For TOC, we need to store a mapping from full_name to the file
    109     # we're generating
    110     symbol_to_file[full_name] = sitepath
    111 
    112     # For a module, remember the module for the table-of-contents
    113     if tf_inspect.ismodule(py_object):
    114       if full_name in parser_config.tree:
    115         module_children.setdefault(full_name, [])
    116 
    117     # For something else that's documented,
    118     # figure out what module it lives in
    119     else:
    120       subname = str(full_name)
    121       while True:
    122         subname = subname[:subname.rindex('.')]
    123         if tf_inspect.ismodule(parser_config.index[subname]):
    124           module_children.setdefault(subname, []).append(full_name)
    125           break
    126 
    127     print('Writing docs for %s (%r).' % (full_name, py_object))
    128 
    129     # Generate docs for `py_object`, resolving references.
    130     page_info = parser.docs_for_object(full_name, py_object, parser_config)
    131 
    132     path = os.path.join(output_dir, parser.documentation_path(full_name))
    133     directory = os.path.dirname(path)
    134     try:
    135       if not os.path.exists(directory):
    136         os.makedirs(directory)
    137       with open(path, 'w') as f:
    138         f.write(pretty_docs.build_md_page(page_info))
    139     except OSError as e:
    140       print('Cannot write documentation for %s to %s: %s' % (full_name,
    141                                                              directory, e))
    142       raise
    143 
    144   if yaml_toc:
    145     # Generate table of contents
    146 
    147     # Put modules in alphabetical order, case-insensitive
    148     modules = sorted(module_children.keys(), key=lambda a: a.upper())
    149 
    150     leftnav_path = os.path.join(output_dir, '_toc.yaml')
    151     with open(leftnav_path, 'w') as f:
    152 
    153       # Generate header
    154       f.write('# Automatically generated file; please do not edit\ntoc:\n')
    155       for module in modules:
    156         indent_num = module.count('.')
    157         # Don't list `tf.submodule` inside `tf`
    158         indent_num = max(indent_num, 1)
    159         indent = '  '*indent_num
    160 
    161         if indent_num > 1:
    162           # tf.contrib.baysflow.entropy will be under
    163           #   tf.contrib->baysflow->entropy
    164           title = module.split('.')[-1]
    165         else:
    166           title = module
    167 
    168         header = [
    169             '- title: ' + title,
    170             '  section:',
    171             '  - title: Overview',
    172             '    path: /TARGET_DOC_ROOT/VERSION/' + symbol_to_file[module]]
    173         header = ''.join([indent+line+'\n' for line in header])
    174         f.write(header)
    175 
    176         symbols_in_module = module_children.get(module, [])
    177         # Sort case-insensitive, if equal sort case sensitive (upper first)
    178         symbols_in_module.sort(key=lambda a: (a.upper(), a))
    179 
    180         for full_name in symbols_in_module:
    181           item = [
    182               '  - title: ' + full_name[len(module) + 1:],
    183               '    path: /TARGET_DOC_ROOT/VERSION/' + symbol_to_file[full_name]]
    184           item = ''.join([indent+line+'\n' for line in item])
    185           f.write(item)
    186 
    187   # Write a global index containing all full names with links.
    188   with open(os.path.join(output_dir, 'index.md'), 'w') as f:
    189     f.write(
    190         parser.generate_global_index(root_title, parser_config.index,
    191                                      parser_config.reference_resolver))
    192 
    193 
    194 def add_dict_to_dict(add_from, add_to):
    195   for key in add_from:
    196     if key in add_to:
    197       add_to[key].extend(add_from[key])
    198     else:
    199       add_to[key] = add_from[key]
    200 
    201 
    202 # Exclude some libraries in contrib from the documentation altogether.
    203 def _get_default_private_map():
    204   return {'tf.test': ['mock']}
    205 
    206 
    207 # Exclude members of some libraries.
    208 def _get_default_do_not_descend_map():
    209   # TODO(wicke): Shrink this list once the modules get sealed.
    210   return {
    211       'tf': ['cli', 'lib', 'wrappers'],
    212       'tf.contrib': [
    213           'compiler',
    214           'grid_rnn',
    215           # Block contrib.keras to de-clutter the docs
    216           'keras',
    217           'labeled_tensor',
    218           'quantization',
    219           'session_bundle',
    220           'slim',
    221           'solvers',
    222           'specs',
    223           'tensor_forest',
    224           'tensorboard',
    225           'testing',
    226           'tfprof',
    227       ],
    228       'tf.contrib.bayesflow': [
    229           'special_math', 'stochastic_gradient_estimators',
    230           'stochastic_variables'
    231       ],
    232       'tf.contrib.ffmpeg': ['ffmpeg_ops'],
    233       'tf.contrib.graph_editor': [
    234           'edit', 'match', 'reroute', 'subgraph', 'transform', 'select', 'util'
    235       ],
    236       'tf.contrib.keras': ['api', 'python'],
    237       'tf.contrib.layers': ['feature_column', 'summaries'],
    238       'tf.contrib.learn': [
    239           'datasets',
    240           'head',
    241           'graph_actions',
    242           'io',
    243           'models',
    244           'monitors',
    245           'ops',
    246           'preprocessing',
    247           'utils',
    248       ],
    249       'tf.contrib.util': ['loader'],
    250   }
    251 
    252 
    253 def extract(py_modules, private_map, do_not_descend_map):
    254   """Extract docs from tf namespace and write them to disk."""
    255   # Traverse the first module.
    256   visitor = doc_generator_visitor.DocGeneratorVisitor(py_modules[0][0])
    257   api_visitor = public_api.PublicAPIVisitor(visitor)
    258   api_visitor.set_root_name(py_modules[0][0])
    259   add_dict_to_dict(private_map, api_visitor.private_map)
    260   add_dict_to_dict(do_not_descend_map, api_visitor.do_not_descend_map)
    261 
    262   traverse.traverse(py_modules[0][1], api_visitor)
    263 
    264   # Traverse all py_modules after the first:
    265   for module_name, module in py_modules[1:]:
    266     visitor.set_root_name(module_name)
    267     api_visitor.set_root_name(module_name)
    268     traverse.traverse(module, api_visitor)
    269 
    270   return visitor
    271 
    272 
    273 class _GetMarkdownTitle(py_guide_parser.PyGuideParser):
    274   """Extract the title from a .md file."""
    275 
    276   def __init__(self):
    277     self.title = None
    278     py_guide_parser.PyGuideParser.__init__(self)
    279 
    280   def process_title(self, _, title):
    281     if self.title is None:  # only use the first title
    282       self.title = title
    283 
    284 
    285 class _DocInfo(object):
    286   """A simple struct for holding a doc's url and title."""
    287 
    288   def __init__(self, url, title):
    289     self.url = url
    290     self.title = title
    291 
    292 
    293 def build_doc_index(src_dir):
    294   """Build an index from a keyword designating a doc to _DocInfo objects."""
    295   doc_index = {}
    296   if not os.path.isabs(src_dir):
    297     raise ValueError("'src_dir' must be an absolute path.\n"
    298                      "    src_dir='%s'" % src_dir)
    299 
    300   if not os.path.exists(src_dir):
    301     raise ValueError("'src_dir' path must exist.\n"
    302                      "    src_dir='%s'" % src_dir)
    303 
    304   for dirpath, _, filenames in os.walk(src_dir):
    305     suffix = os.path.relpath(path=dirpath, start=src_dir)
    306     for base_name in filenames:
    307       if not base_name.endswith('.md'):
    308         continue
    309       title_parser = _GetMarkdownTitle()
    310       title_parser.process(os.path.join(dirpath, base_name))
    311       key_parts = os.path.join(suffix, base_name[:-3]).split('/')
    312       if key_parts[-1] == 'index':
    313         key_parts = key_parts[:-1]
    314       doc_info = _DocInfo(os.path.join(suffix, base_name), title_parser.title)
    315       doc_index[key_parts[-1]] = doc_info
    316       if len(key_parts) > 1:
    317         doc_index['/'.join(key_parts[-2:])] = doc_info
    318 
    319   return doc_index
    320 
    321 
    322 class _GuideRef(object):
    323 
    324   def __init__(self, base_name, title, section_title, section_tag):
    325     self.url = 'api_guides/python/' + (('%s#%s' % (base_name, section_tag))
    326                                        if section_tag else base_name)
    327     self.link_text = (('%s > %s' % (title, section_title))
    328                       if section_title else title)
    329 
    330   def make_md_link(self, url_prefix):
    331     return '[%s](%s%s)' % (self.link_text, url_prefix, self.url)
    332 
    333 
    334 class _GenerateGuideIndex(py_guide_parser.PyGuideParser):
    335   """Turn guide files into an index from symbol name to a list of _GuideRefs."""
    336 
    337   def __init__(self):
    338     self.index = {}
    339     py_guide_parser.PyGuideParser.__init__(self)
    340 
    341   def process(self, full_path, base_name):
    342     """Index a file, reading from `full_path`, with `base_name` as the link."""
    343     self.full_path = full_path
    344     self.base_name = base_name
    345     self.title = None
    346     self.section_title = None
    347     self.section_tag = None
    348     py_guide_parser.PyGuideParser.process(self, full_path)
    349 
    350   def process_title(self, _, title):
    351     if self.title is None:  # only use the first title
    352       self.title = title
    353 
    354   def process_section(self, _, section_title, tag):
    355     self.section_title = section_title
    356     self.section_tag = tag
    357 
    358   def process_line(self, _, line):
    359     """Index @{symbol} references as in the current file & section."""
    360     for match in parser.SYMBOL_REFERENCE_RE.finditer(line):
    361       val = self.index.get(match.group(1), [])
    362       val.append(
    363           _GuideRef(self.base_name, self.title, self.section_title,
    364                     self.section_tag))
    365       self.index[match.group(1)] = val
    366 
    367 
    368 def _build_guide_index(guide_src_dir):
    369   """Return dict: symbol name -> _GuideRef from the files in `guide_src_dir`."""
    370   index_generator = _GenerateGuideIndex()
    371   if os.path.exists(guide_src_dir):
    372     for full_path, base_name in py_guide_parser.md_files_in_dir(guide_src_dir):
    373       index_generator.process(full_path, base_name)
    374   return index_generator.index
    375 
    376 
    377 class _UpdateTags(py_guide_parser.PyGuideParser):
    378   """Rewrites a Python guide so that each section has an explicit tag."""
    379 
    380   def process_section(self, line_number, section_title, tag):
    381     self.replace_line(line_number, '<h2 id="%s">%s</h2>' % (tag, section_title))
    382 
    383 
    384 EXCLUDED = set(['__init__.py', 'OWNERS', 'README.txt'])
    385 
    386 
    387 def _other_docs(src_dir, output_dir, reference_resolver, file_pattern='*.md'):
    388   """Fix @{} references in all files under `src_dir` matching `file_pattern`.
    389 
    390   A matching directory structure, with the modified files is
    391   written to `output_dir`.
    392 
    393   `{"__init__.py","OWNERS","README.txt"}` are skipped.
    394 
    395   Files not matching `file_pattern` (using `fnmatch`) are copied with no change.
    396 
    397   Also, files in the `api_guides/python` directory get explicit ids set on all
    398   heading-2s to ensure back-links work.
    399 
    400   Args:
    401     src_dir: The directory to convert files from.
    402     output_dir: The root directory to write the resulting files to.
    403     reference_resolver: A `parser.ReferenceResolver` to make the replacements.
    404     file_pattern: Only replace references in files matching file_patters,
    405       using fnmatch. Non-matching files are copied unchanged.
    406   """
    407   # Iterate through all the source files and process them.
    408   tag_updater = _UpdateTags()
    409   for dirpath, _, filenames in os.walk(src_dir):
    410     # How to get from `dirpath` to api_docs/python/
    411     relative_path_to_root = os.path.relpath(
    412         path=os.path.join(src_dir, 'api_docs/python'), start=dirpath)
    413 
    414     # Make the directory under output_dir.
    415     new_dir = os.path.join(output_dir,
    416                            os.path.relpath(path=dirpath, start=src_dir))
    417     try:
    418       if not os.path.exists(new_dir):
    419         os.makedirs(new_dir)
    420     except OSError as e:
    421       print('Creating output dir "%s" failed: %s' % (new_dir, e))
    422       raise
    423 
    424     for base_name in filenames:
    425       if base_name in EXCLUDED:
    426         print('Skipping excluded file %s...' % base_name)
    427         continue
    428       full_in_path = os.path.join(dirpath, base_name)
    429 
    430       reference_resolver.current_doc_full_name = full_in_path
    431 
    432       suffix = os.path.relpath(path=full_in_path, start=src_dir)
    433       full_out_path = os.path.join(output_dir, suffix)
    434       if not fnmatch.fnmatch(base_name, file_pattern):
    435         print('Copying un-matched file %s...' % suffix)
    436         open(full_out_path, 'w').write(open(full_in_path).read())
    437         continue
    438       if dirpath.endswith('/api_guides/python'):
    439         print('Processing Python guide %s...' % base_name)
    440         content = tag_updater.process(full_in_path)
    441       else:
    442         print('Processing doc %s...' % suffix)
    443         content = open(full_in_path).read()
    444 
    445       content = reference_resolver.replace_references(content,
    446                                                       relative_path_to_root)
    447       with open(full_out_path, 'w') as f:
    448         f.write(content)
    449 
    450   print('Done.')
    451 
    452 
    453 class DocGenerator(object):
    454   """Main entry point for generating docs."""
    455 
    456   def __init__(self):
    457     if sys.version_info >= (3, 0):
    458       sys.exit('Doc generation is not supported from python3.')
    459     self.argument_parser = argparse.ArgumentParser()
    460     self._py_modules = None
    461     self._private_map = _get_default_private_map()
    462     self._do_not_descend_map = _get_default_do_not_descend_map()
    463     self.yaml_toc = True
    464 
    465   def add_output_dir_argument(self):
    466     self.argument_parser.add_argument(
    467         '--output_dir',
    468         type=str,
    469         default=None,
    470         required=True,
    471         help='Directory to write docs to.')
    472 
    473   def add_src_dir_argument(self):
    474     self.argument_parser.add_argument(
    475         '--src_dir',
    476         type=str,
    477         default=None,
    478         required=True,
    479         help='Directory with the source docs.')
    480 
    481   def add_base_dir_argument(self, default_base_dir):
    482     self.argument_parser.add_argument(
    483         '--base_dir',
    484         type=str,
    485         default=default_base_dir,
    486         help='Base directory to strip from file names referenced in docs.')
    487 
    488   def parse_known_args(self):
    489     flags, _ = self.argument_parser.parse_known_args()
    490     return flags
    491 
    492   def add_to_private_map(self, d):
    493     add_dict_to_dict(d, self._private_map)
    494 
    495   def add_to_do_not_descend_map(self, d):
    496     add_dict_to_dict(d, self._do_not_descend_map)
    497 
    498   def set_private_map(self, d):
    499     self._private_map = d
    500 
    501   def set_do_not_descend_map(self, d):
    502     self._do_not_descend_map = d
    503 
    504   def set_py_modules(self, py_modules):
    505     self._py_modules = py_modules
    506 
    507   def py_module_names(self):
    508     if self._py_modules is None:
    509       raise RuntimeError(
    510           'Must call set_py_modules() before running py_module_names().')
    511     return [name for (name, _) in self._py_modules]
    512 
    513   def make_reference_resolver(self, visitor, doc_index):
    514     return parser.ReferenceResolver.from_visitor(
    515         visitor, doc_index, py_module_names=self.py_module_names())
    516 
    517   def make_parser_config(self, visitor, reference_resolver, guide_index,
    518                          base_dir):
    519     return parser.ParserConfig(
    520         reference_resolver=reference_resolver,
    521         duplicates=visitor.duplicates,
    522         duplicate_of=visitor.duplicate_of,
    523         tree=visitor.tree,
    524         index=visitor.index,
    525         reverse_index=visitor.reverse_index,
    526         guide_index=guide_index,
    527         base_dir=base_dir)
    528 
    529   def run_extraction(self):
    530     return extract(self._py_modules, self._private_map,
    531                    self._do_not_descend_map)
    532 
    533   def build(self, flags):
    534     """Actually build the docs."""
    535     doc_index = build_doc_index(flags.src_dir)
    536     visitor = self.run_extraction()
    537     reference_resolver = self.make_reference_resolver(visitor, doc_index)
    538 
    539     root_title = getattr(flags, 'root_title', 'TensorFlow')
    540     guide_index = _build_guide_index(
    541         os.path.join(flags.src_dir, 'api_guides/python'))
    542 
    543     parser_config = self.make_parser_config(visitor, reference_resolver,
    544                                             guide_index, flags.base_dir)
    545     output_dir = os.path.join(flags.output_dir, 'api_docs/python')
    546 
    547     write_docs(
    548         output_dir,
    549         parser_config,
    550         yaml_toc=self.yaml_toc,
    551         root_title=root_title)
    552     _other_docs(flags.src_dir, flags.output_dir, reference_resolver)
    553 
    554     parser_config.reference_resolver.log_errors()
    555 
    556     return parser_config.reference_resolver.num_errors()
    557