Home | History | Annotate | Download | only in nets
      1 # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Contains a model definition for AlexNet.
     16 
     17 This work was first described in:
     18   ImageNet Classification with Deep Convolutional Neural Networks
     19   Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton
     20 
     21 and later refined in:
     22   One weird trick for parallelizing convolutional neural networks
     23   Alex Krizhevsky, 2014
     24 
     25 Here we provide the implementation proposed in "One weird trick" and not
     26 "ImageNet Classification", as per the paper, the LRN layers have been removed.
     27 
     28 Usage:
     29   with slim.arg_scope(alexnet.alexnet_v2_arg_scope()):
     30     outputs, end_points = alexnet.alexnet_v2(inputs)
     31 
     32 @@alexnet_v2
     33 """
     34 
     35 from __future__ import absolute_import
     36 from __future__ import division
     37 from __future__ import print_function
     38 
     39 from tensorflow.contrib import layers
     40 from tensorflow.contrib.framework.python.ops import arg_scope
     41 from tensorflow.contrib.layers.python.layers import layers as layers_lib
     42 from tensorflow.contrib.layers.python.layers import regularizers
     43 from tensorflow.contrib.layers.python.layers import utils
     44 from tensorflow.python.ops import array_ops
     45 from tensorflow.python.ops import init_ops
     46 from tensorflow.python.ops import nn_ops
     47 from tensorflow.python.ops import variable_scope
     48 
     49 trunc_normal = lambda stddev: init_ops.truncated_normal_initializer(0.0, stddev)
     50 
     51 
     52 def alexnet_v2_arg_scope(weight_decay=0.0005):
     53   with arg_scope(
     54       [layers.conv2d, layers_lib.fully_connected],
     55       activation_fn=nn_ops.relu,
     56       biases_initializer=init_ops.constant_initializer(0.1),
     57       weights_regularizer=regularizers.l2_regularizer(weight_decay)):
     58     with arg_scope([layers.conv2d], padding='SAME'):
     59       with arg_scope([layers_lib.max_pool2d], padding='VALID') as arg_sc:
     60         return arg_sc
     61 
     62 
     63 def alexnet_v2(inputs,
     64                num_classes=1000,
     65                is_training=True,
     66                dropout_keep_prob=0.5,
     67                spatial_squeeze=True,
     68                scope='alexnet_v2'):
     69   """AlexNet version 2.
     70 
     71   Described in: http://arxiv.org/pdf/1404.5997v2.pdf
     72   Parameters from:
     73   github.com/akrizhevsky/cuda-convnet2/blob/master/layers/
     74   layers-imagenet-1gpu.cfg
     75 
     76   Note: All the fully_connected layers have been transformed to conv2d layers.
     77         To use in classification mode, resize input to 224x224. To use in fully
     78         convolutional mode, set spatial_squeeze to false.
     79         The LRN layers have been removed and change the initializers from
     80         random_normal_initializer to xavier_initializer.
     81 
     82   Args:
     83     inputs: a tensor of size [batch_size, height, width, channels].
     84     num_classes: number of predicted classes.
     85     is_training: whether or not the model is being trained.
     86     dropout_keep_prob: the probability that activations are kept in the dropout
     87       layers during training.
     88     spatial_squeeze: whether or not should squeeze the spatial dimensions of the
     89       outputs. Useful to remove unnecessary dimensions for classification.
     90     scope: Optional scope for the variables.
     91 
     92   Returns:
     93     the last op containing the log predictions and end_points dict.
     94   """
     95   with variable_scope.variable_scope(scope, 'alexnet_v2', [inputs]) as sc:
     96     end_points_collection = sc.original_name_scope + '_end_points'
     97     # Collect outputs for conv2d, fully_connected and max_pool2d.
     98     with arg_scope(
     99         [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],
    100         outputs_collections=[end_points_collection]):
    101       net = layers.conv2d(
    102           inputs, 64, [11, 11], 4, padding='VALID', scope='conv1')
    103       net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1')
    104       net = layers.conv2d(net, 192, [5, 5], scope='conv2')
    105       net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2')
    106       net = layers.conv2d(net, 384, [3, 3], scope='conv3')
    107       net = layers.conv2d(net, 384, [3, 3], scope='conv4')
    108       net = layers.conv2d(net, 256, [3, 3], scope='conv5')
    109       net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5')
    110 
    111       # Use conv2d instead of fully_connected layers.
    112       with arg_scope(
    113           [layers.conv2d],
    114           weights_initializer=trunc_normal(0.005),
    115           biases_initializer=init_ops.constant_initializer(0.1)):
    116         net = layers.conv2d(net, 4096, [5, 5], padding='VALID', scope='fc6')
    117         net = layers_lib.dropout(
    118             net, dropout_keep_prob, is_training=is_training, scope='dropout6')
    119         net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
    120         net = layers_lib.dropout(
    121             net, dropout_keep_prob, is_training=is_training, scope='dropout7')
    122         net = layers.conv2d(
    123             net,
    124             num_classes, [1, 1],
    125             activation_fn=None,
    126             normalizer_fn=None,
    127             biases_initializer=init_ops.zeros_initializer(),
    128             scope='fc8')
    129 
    130       # Convert end_points_collection into a end_point dict.
    131       end_points = utils.convert_collection_to_dict(end_points_collection)
    132       if spatial_squeeze:
    133         net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
    134         end_points[sc.name + '/fc8'] = net
    135       return net, end_points
    136 
    137 
    138 alexnet_v2.default_image_size = 224
    139