Home | History | Annotate | Download | only in keras
      1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 # pylint: disable=protected-access
     16 """Code for model cloning, plus model-related API entries.
     17 """
     18 from __future__ import absolute_import
     19 from __future__ import division
     20 from __future__ import print_function
     21 
     22 from tensorflow.python.keras import backend as K
     23 from tensorflow.python.keras import metrics as metrics_module
     24 from tensorflow.python.keras import optimizers
     25 from tensorflow.python.keras.engine import sequential
     26 from tensorflow.python.keras.engine import training
     27 from tensorflow.python.keras.engine.base_layer import Layer
     28 from tensorflow.python.keras.engine.input_layer import Input
     29 from tensorflow.python.keras.engine.input_layer import InputLayer
     30 from tensorflow.python.keras.engine.network import Network
     31 from tensorflow.python.keras.saving import hdf5_format
     32 from tensorflow.python.keras.saving import model_config
     33 from tensorflow.python.keras.utils import generic_utils
     34 from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
     35 from tensorflow.python.util import nest
     36 from tensorflow.python.util.tf_export import keras_export
     37 
     38 
     39 # API entries importable from `keras.models`:
     40 Model = training.Model  # pylint: disable=invalid-name
     41 Sequential = sequential.Sequential  # pylint: disable=invalid-name
     42 save_model = hdf5_format.save_model
     43 load_model = hdf5_format.load_model
     44 model_from_config = model_config.model_from_config
     45 model_from_yaml = model_config.model_from_yaml
     46 model_from_json = model_config.model_from_json
     47 
     48 
     49 def _clone_layer(layer):
     50   return layer.__class__.from_config(layer.get_config())
     51 
     52 
     53 def _clone_functional_model(model, input_tensors=None, share_weights=False):
     54   """Clone a functional `Model` instance.
     55 
     56   Model cloning is similar to calling a model on new inputs,
     57   except that it creates new layers (and thus new weights) instead
     58   of sharing the weights of the existing layers.
     59 
     60   Arguments:
     61       model: Instance of `Model`.
     62       input_tensors: optional list of input tensors
     63           to build the model upon. If not provided,
     64           placeholders will be created.
     65       share_weights: flag to enable sharing of non-input layers between the
     66           cloned and original model. Note this still clones the input layers.
     67           This is required when we create a per-replica copy of the model with
     68           distribution strategy; we want the weights to be shared but still
     69           feed inputs separately so we create new input layers.
     70 
     71   Returns:
     72       An instance of `Model` reproducing the behavior
     73       of the original model, on top of new inputs tensors,
     74       using newly instantiated weights.
     75 
     76   Raises:
     77       ValueError: in case of invalid `model` argument value.
     78   """
     79   if not isinstance(model, Model):
     80     raise ValueError('Expected `model` argument '
     81                      'to be a `Model` instance, got ', model)
     82   if isinstance(model, Sequential):
     83     raise ValueError('Expected `model` argument '
     84                      'to be a functional `Model` instance, '
     85                      'got a `Sequential` instance instead:', model)
     86 
     87   layer_map = {}  # Cache for created layers.
     88   tensor_map = {}  # Map {reference_tensor: corresponding_tensor}
     89   if input_tensors is None:
     90     # Create placeholders to build the model on top of.
     91     input_tensors = []
     92     for layer in model._input_layers:
     93       input_tensor = Input(
     94           batch_shape=layer._batch_input_shape,
     95           dtype=layer.dtype,
     96           sparse=layer.sparse,
     97           name=layer.name)
     98       input_tensors.append(input_tensor)
     99       # Cache newly created input layer.
    100       newly_created_input_layer = input_tensor._keras_history[0]
    101       layer_map[layer] = newly_created_input_layer
    102   else:
    103     # Make sure that all input tensors come from a Keras layer.
    104     # If tensor comes from an input layer: cache the input layer.
    105     input_tensors = nest.flatten(input_tensors)
    106     input_tensors_ = []
    107     for i in range(len(input_tensors)):
    108       input_tensor = input_tensors[i]
    109       if not K.is_keras_tensor(input_tensor):
    110         original_input_layer = model._input_layers[i]
    111         name = original_input_layer.name
    112         input_tensor = Input(tensor=input_tensor,
    113                              name='input_wrapper_for_' + name)
    114 
    115         input_tensors_.append(input_tensor)
    116         # Cache newly created input layer.
    117         newly_created_input_layer = input_tensor._keras_history[0]
    118         layer_map[original_input_layer] = newly_created_input_layer
    119       else:
    120         input_tensors_.append(input_tensor)
    121     input_tensors = input_tensors_
    122 
    123   for x, y in zip(model.inputs, input_tensors):
    124     tensor_map[x] = y
    125 
    126   # Iterated over every node in the reference model, in depth order.
    127   depth_keys = list(model._nodes_by_depth.keys())
    128   depth_keys.sort(reverse=True)
    129   for depth in depth_keys:
    130     nodes = model._nodes_by_depth[depth]
    131     for node in nodes:
    132       # Recover the corresponding layer.
    133       layer = node.outbound_layer
    134 
    135       # Get or create layer.
    136       if layer not in layer_map:
    137         if not share_weights:
    138           # Clone layer.
    139           new_layer = _clone_layer(layer)
    140           layer_map[layer] = new_layer
    141           layer = new_layer
    142       else:
    143         # Reuse previously cloned layer.
    144         layer = layer_map[layer]
    145         # Don't call InputLayer multiple times.
    146         if isinstance(layer, InputLayer):
    147           continue
    148 
    149       # If all previous input tensors are available in tensor_map,
    150       # then call node.inbound_layer on them.
    151       if all(
    152           tensor in tensor_map for tensor in nest.flatten(node.input_tensors)):
    153         computed_tensors = nest.map_structure(lambda t: tensor_map[t],
    154                                               node.input_tensors)
    155         # Call layer.
    156         kwargs = node.arguments or {}
    157         output_tensors = layer(computed_tensors, **kwargs)
    158 
    159         for x, y in zip(
    160             nest.flatten(node.output_tensors), nest.flatten(output_tensors)):
    161           tensor_map[x] = y
    162 
    163   # Check that we did compute the model outputs,
    164   # then instantiate a new model from inputs and outputs.
    165   output_tensors = []
    166   for x in model.outputs:
    167     assert x in tensor_map, 'Could not compute output ' + str(x)
    168     output_tensors.append(tensor_map[x])
    169 
    170   input_tensors = nest.pack_sequence_as(model._nested_inputs, input_tensors)
    171   output_tensors = nest.pack_sequence_as(model._nested_outputs, output_tensors)
    172   return Model(input_tensors, output_tensors, name=model.name)
    173 
    174 
    175 def _clone_sequential_model(model, input_tensors=None, share_weights=False):
    176   """Clone a `Sequential` model instance.
    177 
    178   Model cloning is similar to calling a model on new inputs,
    179   except that it creates new layers (and thus new weights) instead
    180   of sharing the weights of the existing layers.
    181 
    182   Arguments:
    183       model: Instance of `Sequential`.
    184       input_tensors: optional list of input tensors
    185           to build the model upon. If not provided,
    186           placeholders will be created.
    187       share_weights: flag to enable sharing of non-input layers between the
    188           cloned and original model. Note this still clones the input layers.
    189           This is required when we create a per-replica copy of the model with
    190           distribution strategy; we want the weights to be shared but still
    191           feed inputs separately so we create new input layers.
    192 
    193   Returns:
    194       An instance of `Sequential` reproducing the behavior
    195       of the original model, on top of new inputs tensors,
    196       using newly instantiated weights.
    197 
    198   Raises:
    199       ValueError: in case of invalid `model` argument value.
    200   """
    201   if not isinstance(model, Sequential):
    202     raise ValueError('Expected `model` argument '
    203                      'to be a `Sequential` model instance, '
    204                      'but got:', model)
    205 
    206   # Use model._layers to ensure that all layers are cloned. The model's layers
    207   # property will exclude the initial InputLayer (if it exists) in the model,
    208   # resulting in a different Sequential model structure.
    209   if input_tensors is None:
    210     if share_weights:
    211       # In preserve weights case we still want the input layers to be cloned.
    212       layers = []
    213       for layer in model._layers:
    214         if isinstance(layer, InputLayer):
    215           layers.append(_clone_layer(layer))
    216         else:
    217           layers.append(layer)
    218     else:
    219       layers = [_clone_layer(layer) for layer in model._layers]
    220     return Sequential(layers=layers, name=model.name)
    221   else:
    222     # If input tensors are provided, the original model's InputLayer is
    223     # overwritten with a different InputLayer.
    224     layers = [
    225         layer for layer in model._layers if not isinstance(layer, InputLayer)]
    226     if not share_weights:
    227       layers = [_clone_layer(layer) for layer in layers]
    228     if len(generic_utils.to_list(input_tensors)) != 1:
    229       raise ValueError('To clone a `Sequential` model, we expect '
    230                        ' at most one tensor '
    231                        'as part of `input_tensors`.')
    232 
    233     if isinstance(input_tensors, tuple):
    234       input_tensors = list(input_tensors)
    235     x = generic_utils.to_list(input_tensors)[0]
    236     if K.is_keras_tensor(x):
    237       origin_layer = x._keras_history[0]
    238       if isinstance(origin_layer, InputLayer):
    239         return Sequential(layers=[origin_layer] + layers, name=model.name)
    240       else:
    241         raise ValueError('Cannot clone a `Sequential` model on top '
    242                          'of a tensor that comes from a Keras layer '
    243                          'other than an `InputLayer`. '
    244                          'Use the functional API instead.')
    245     input_tensor = Input(tensor=x, name='input_wrapper_for_' + str(x.name))
    246     input_layer = input_tensor._keras_history[0]
    247     return Sequential(layers=[input_layer] + layers, name=model.name)
    248 
    249 
    250 @keras_export('keras.models.clone_model')
    251 def clone_model(model, input_tensors=None):
    252   """Clone any `Model` instance.
    253 
    254   Model cloning is similar to calling a model on new inputs,
    255   except that it creates new layers (and thus new weights) instead
    256   of sharing the weights of the existing layers.
    257 
    258   Arguments:
    259       model: Instance of `Model`
    260           (could be a functional model or a Sequential model).
    261       input_tensors: optional list of input tensors or InputLayer objects
    262           to build the model upon. If not provided,
    263           placeholders will be created.
    264 
    265   Returns:
    266       An instance of `Model` reproducing the behavior
    267       of the original model, on top of new inputs tensors,
    268       using newly instantiated weights.
    269 
    270   Raises:
    271       ValueError: in case of invalid `model` argument value.
    272   """
    273   if isinstance(model, Sequential):
    274     return _clone_sequential_model(model, input_tensors=input_tensors)
    275   else:
    276     return _clone_functional_model(model, input_tensors=input_tensors)
    277 
    278 
    279 # "Clone" a subclassed model by reseting all of the attributes.
    280 def _in_place_subclassed_model_reset(model):
    281   """Substitute for model cloning that works for subclassed models.
    282 
    283   Subclassed models cannot be cloned because their topology is not serializable.
    284   To "instantiate" an identical model in a new TF graph, we reuse the original
    285   model object, but we clear its state.
    286 
    287   After calling this function on a model instance, you can use the model
    288   instance as if it were a model clone (in particular you can use it in a new
    289   graph).
    290 
    291   This method clears the state of the input model. It is thus destructive.
    292   However the original state can be restored fully by calling
    293   `_in_place_subclassed_model_state_restoration`.
    294 
    295   Args:
    296     model: Instance of a Keras model created via subclassing.
    297 
    298   Raises:
    299     ValueError: In case the model uses a subclassed model as inner layer.
    300   """
    301   assert not model._is_graph_network  # Only makes sense for subclassed networks
    302   # Retrieve all layers tracked by the model as well as their attribute names
    303   attributes_cache = {}
    304   for name in dir(model):
    305     try:
    306       value = getattr(model, name)
    307     except (AttributeError, ValueError, TypeError):
    308       continue
    309     if isinstance(value, Layer):
    310       attributes_cache[name] = value
    311       assert value in model.layers
    312       if hasattr(value, 'layers') and value.layers:
    313         raise ValueError('We do not support the use of nested layers '
    314                          'in `model_to_estimator` at this time. Found nested '
    315                          'layer: %s' % value)
    316     elif isinstance(
    317         value, (list, tuple)) and name not in ('layers', '_layers', 'metrics',
    318                                                '_compile_metric_functions',
    319                                                '_output_loss_metrics'):
    320       # Handle case: list/tuple of layers (also tracked by the Network API).
    321       if value and all(isinstance(val, Layer) for val in value):
    322         raise ValueError('We do not support the use of list-of-layers '
    323                          'attributes in subclassed models used with '
    324                          '`model_to_estimator` at this time. Found list '
    325                          'model: %s' % name)
    326 
    327   # Replace layers on the model with fresh layers
    328   layers_to_names = {value: key for key, value in attributes_cache.items()}
    329   original_layers = model._layers[:]
    330   setattr_tracking = model._setattr_tracking
    331   model._setattr_tracking = False
    332   model._layers = []
    333   for layer in original_layers:  # We preserve layer order.
    334     config = layer.get_config()
    335     # This will not work for nested subclassed models used as layers.
    336     # This would be theoretically possible to support, but would add complexity.
    337     # Only do it if users complain.
    338     if isinstance(layer, Network) and not layer._is_graph_network:
    339       raise ValueError('We do not support the use of nested subclassed models '
    340                        'in `model_to_estimator` at this time. Found nested '
    341                        'model: %s' % layer)
    342     fresh_layer = layer.__class__.from_config(config)
    343     name = layers_to_names[layer]
    344     setattr(model, name, fresh_layer)
    345     model._layers.append(fresh_layer)
    346 
    347   # Cache original model build attributes (in addition to layers)
    348   if (not hasattr(model, '_original_attributes_cache') or
    349       model._original_attributes_cache is None):
    350     if model.built:
    351       attributes_to_cache = [
    352           'inputs',
    353           'outputs',
    354           '_feed_outputs',
    355           '_feed_output_names',
    356           '_feed_output_shapes',
    357           '_feed_loss_fns',
    358           'loss_weights_list',
    359           'targets',
    360           '_feed_targets',
    361           'sample_weight_modes',
    362           'total_loss',
    363           'sample_weights',
    364           '_feed_sample_weights',
    365           'train_function',
    366           'test_function',
    367           'predict_function',
    368           '_collected_trainable_weights',
    369           '_feed_inputs',
    370           '_feed_input_names',
    371           '_feed_input_shapes',
    372           'optimizer',
    373       ]
    374       for name in attributes_to_cache:
    375         attributes_cache[name] = getattr(model, name)
    376   model._original_attributes_cache = attributes_cache
    377   _reset_build_compile_trackers(model)
    378   model._setattr_tracking = setattr_tracking
    379 
    380 
    381 def _reset_build_compile_trackers(model):
    382   """Reset state trackers for model.
    383 
    384   Note that we do not actually zero out attributes such as optimizer,
    385   but instead rely on the expectation that all of the attrs will be
    386   over-written on calling build/compile/etc. This is somewhat fragile,
    387   insofar as we check elsewhere for the presence of these attributes as
    388   evidence of having been built/compiled/etc. Pending a better way to do this,
    389   we reset key attributes here to allow building and compiling.
    390 
    391   Args:
    392     model: the model that is being reset
    393   """
    394   # Reset build state
    395   model.built = False
    396   model.inputs = None
    397   model.outputs = None
    398   # Reset compile state
    399   model._is_compiled = False  # pylint:disable=protected-access
    400   model.optimizer = None
    401 
    402 
    403 def in_place_subclassed_model_state_restoration(model):
    404   """Restores the original state of a model after it was "reset".
    405 
    406   This undoes this action of `_in_place_subclassed_model_reset`, which is called
    407   in `clone_and_build_model` if `in_place_reset` is set to True.
    408 
    409   Args:
    410     model: Instance of a Keras model created via subclassing, on which
    411       `_in_place_subclassed_model_reset` was previously called.
    412   """
    413   assert not model._is_graph_network
    414   # Restore layers and build attributes
    415   if (hasattr(model, '_original_attributes_cache') and
    416       model._original_attributes_cache is not None):
    417     # Models have sticky attribute assignment, so we want to be careful to add
    418     # back the previous attributes and track Layers by their original names
    419     # without adding dependencies on "utility" attributes which Models exempt
    420     # when they're constructed.
    421     setattr_tracking = model._setattr_tracking
    422     model._setattr_tracking = False
    423     model._layers = []
    424     for name, value in model._original_attributes_cache.items():
    425       setattr(model, name, value)
    426       if isinstance(value, Layer):
    427         model._layers.append(value)
    428     model._original_attributes_cache = None
    429     model._setattr_tracking = setattr_tracking
    430   else:
    431     # Restore to the state of a never-called model.
    432     _reset_build_compile_trackers(model)
    433 
    434 
    435 def clone_and_build_model(
    436     model, input_tensors=None, target_tensors=None, custom_objects=None,
    437     compile_clone=True, in_place_reset=False, optimizer_iterations=None):
    438   """Clone a `Model` and build/compile it with the same settings used before.
    439 
    440   This function can be be run in the same graph or in a separate graph from the
    441   model. When using a separate graph, `in_place_reset` must be `False`.
    442 
    443   Note that, currently, the clone produced from this function may not work with
    444   TPU DistributionStrategy. Try at your own risk.
    445 
    446   Args:
    447     model: `tf.keras.Model` object. Can be Functional, Sequential, or
    448       sub-classed.
    449     input_tensors: Optional list of input tensors to build the model upon. If
    450       not provided, placeholders will be created.
    451     target_tensors: Optional list of target tensors for compiling the model. If
    452       not provided, placeholders will be created.
    453     custom_objects: Optional dictionary mapping string names to custom classes
    454       or functions.
    455     compile_clone: Boolean, whether to compile model clone (default `True`).
    456     in_place_reset: Boolean, whether to reset the model in place. Only used if
    457       the model is a subclassed model. In the case of a subclassed model,
    458       this argument must be set to `True` (default `False`). To restore the
    459       original model, use the function
    460       `in_place_subclassed_model_state_restoration(model)`.
    461     optimizer_iterations: An iterations variable that will be incremented by the
    462       optimizer if the clone is compiled. This argument is used when a Keras
    463       model is cloned into an Estimator model function, because Estimators
    464       create their own global step variable.
    465 
    466   Returns:
    467     Clone of the model.
    468 
    469   Raises:
    470     ValueError: Cloning fails in the following cases
    471       - cloning a subclassed model with `in_place_reset` set to False.
    472       - compiling the clone when the original model has not been compiled.
    473   """
    474   # Grab optimizer now, as we reset-in-place for subclassed models, but
    475   # want to maintain access to the original optimizer.
    476   orig_optimizer = model.optimizer
    477   if compile_clone and not orig_optimizer:
    478     raise ValueError(
    479         'Error when cloning model: compile_clone was set to True, but the '
    480         'original model has not been compiled.')
    481 
    482   if model._is_graph_network or isinstance(model, Sequential):
    483     if custom_objects:
    484       with CustomObjectScope(custom_objects):
    485         clone = clone_model(model, input_tensors=input_tensors)
    486     else:
    487       clone = clone_model(model, input_tensors=input_tensors)
    488 
    489     if all([isinstance(clone, Sequential),
    490             not clone._is_graph_network,
    491             getattr(model, '_build_input_shape', None) is not None]):
    492       # Set model inputs to build the model and add input/output properties.
    493       # TODO(kathywu): Add multiple placeholders to handle edge case where
    494       # sequential model has multiple inputs.
    495       clone._set_inputs(
    496           K.placeholder(model._build_input_shape, dtype=model.inputs[0].dtype))
    497   else:
    498     if not in_place_reset:
    499       raise ValueError(
    500           'This model is a subclassed model. '
    501           'Such a model cannot be cloned, but there is a workaround where '
    502           'the model is reset in-place. To use this, please set the argument '
    503           '`in_place_reset` to `True`. This will reset the attributes in the '
    504           'original model. To restore the attributes, call '
    505           '`in_place_subclassed_model_state_restoration(model)`.')
    506     clone = model
    507     _in_place_subclassed_model_reset(clone)
    508     if input_tensors is not None:
    509       if isinstance(input_tensors, (list, tuple)) and len(input_tensors) == 1:
    510         input_tensors = input_tensors[0]
    511       clone._set_inputs(input_tensors)
    512 
    513   if compile_clone:
    514     if isinstance(orig_optimizer, optimizers.TFOptimizer):
    515       optimizer = optimizers.TFOptimizer(
    516           orig_optimizer.optimizer, optimizer_iterations)
    517       K.track_tf_optimizer(optimizer)
    518     else:
    519       optimizer_config = orig_optimizer.get_config()
    520       optimizer = orig_optimizer.__class__.from_config(optimizer_config)
    521       if optimizer_iterations is not None:
    522         optimizer.iterations = optimizer_iterations
    523 
    524     clone.compile(
    525         optimizer,
    526         model.loss,
    527         metrics=metrics_module.clone_metrics(model._compile_metrics),
    528         loss_weights=model.loss_weights,
    529         sample_weight_mode=model.sample_weight_mode,
    530         weighted_metrics=metrics_module.clone_metrics(
    531             model._compile_weighted_metrics),
    532         target_tensors=target_tensors)
    533 
    534   return clone
    535