1 # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 """Contains a model definition for AlexNet. 16 17 This work was first described in: 18 ImageNet Classification with Deep Convolutional Neural Networks 19 Alex Krizhevsky, Ilya Sutskever and Geoffrey E. Hinton 20 21 and later refined in: 22 One weird trick for parallelizing convolutional neural networks 23 Alex Krizhevsky, 2014 24 25 Here we provide the implementation proposed in "One weird trick" and not 26 "ImageNet Classification", as per the paper, the LRN layers have been removed. 27 28 Usage: 29 with slim.arg_scope(alexnet.alexnet_v2_arg_scope()): 30 outputs, end_points = alexnet.alexnet_v2(inputs) 31 32 @@alexnet_v2 33 """ 34 35 from __future__ import absolute_import 36 from __future__ import division 37 from __future__ import print_function 38 39 from tensorflow.contrib import layers 40 from tensorflow.contrib.framework.python.ops import arg_scope 41 from tensorflow.contrib.layers.python.layers import layers as layers_lib 42 from tensorflow.contrib.layers.python.layers import regularizers 43 from tensorflow.contrib.layers.python.layers import utils 44 from tensorflow.python.ops import array_ops 45 from tensorflow.python.ops import init_ops 46 from tensorflow.python.ops import nn_ops 47 from tensorflow.python.ops import variable_scope 48 49 trunc_normal = lambda stddev: init_ops.truncated_normal_initializer(0.0, stddev) 50 51 52 def alexnet_v2_arg_scope(weight_decay=0.0005): 53 with arg_scope( 54 [layers.conv2d, layers_lib.fully_connected], 55 activation_fn=nn_ops.relu, 56 biases_initializer=init_ops.constant_initializer(0.1), 57 weights_regularizer=regularizers.l2_regularizer(weight_decay)): 58 with arg_scope([layers.conv2d], padding='SAME'): 59 with arg_scope([layers_lib.max_pool2d], padding='VALID') as arg_sc: 60 return arg_sc 61 62 63 def alexnet_v2(inputs, 64 num_classes=1000, 65 is_training=True, 66 dropout_keep_prob=0.5, 67 spatial_squeeze=True, 68 scope='alexnet_v2'): 69 """AlexNet version 2. 70 71 Described in: http://arxiv.org/pdf/1404.5997v2.pdf 72 Parameters from: 73 github.com/akrizhevsky/cuda-convnet2/blob/master/layers/ 74 layers-imagenet-1gpu.cfg 75 76 Note: All the fully_connected layers have been transformed to conv2d layers. 77 To use in classification mode, resize input to 224x224. To use in fully 78 convolutional mode, set spatial_squeeze to false. 79 The LRN layers have been removed and change the initializers from 80 random_normal_initializer to xavier_initializer. 81 82 Args: 83 inputs: a tensor of size [batch_size, height, width, channels]. 84 num_classes: number of predicted classes. 85 is_training: whether or not the model is being trained. 86 dropout_keep_prob: the probability that activations are kept in the dropout 87 layers during training. 88 spatial_squeeze: whether or not should squeeze the spatial dimensions of the 89 outputs. Useful to remove unnecessary dimensions for classification. 90 scope: Optional scope for the variables. 91 92 Returns: 93 the last op containing the log predictions and end_points dict. 94 """ 95 with variable_scope.variable_scope(scope, 'alexnet_v2', [inputs]) as sc: 96 end_points_collection = sc.original_name_scope + '_end_points' 97 # Collect outputs for conv2d, fully_connected and max_pool2d. 98 with arg_scope( 99 [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], 100 outputs_collections=[end_points_collection]): 101 net = layers.conv2d( 102 inputs, 64, [11, 11], 4, padding='VALID', scope='conv1') 103 net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool1') 104 net = layers.conv2d(net, 192, [5, 5], scope='conv2') 105 net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool2') 106 net = layers.conv2d(net, 384, [3, 3], scope='conv3') 107 net = layers.conv2d(net, 384, [3, 3], scope='conv4') 108 net = layers.conv2d(net, 256, [3, 3], scope='conv5') 109 net = layers_lib.max_pool2d(net, [3, 3], 2, scope='pool5') 110 111 # Use conv2d instead of fully_connected layers. 112 with arg_scope( 113 [layers.conv2d], 114 weights_initializer=trunc_normal(0.005), 115 biases_initializer=init_ops.constant_initializer(0.1)): 116 net = layers.conv2d(net, 4096, [5, 5], padding='VALID', scope='fc6') 117 net = layers_lib.dropout( 118 net, dropout_keep_prob, is_training=is_training, scope='dropout6') 119 net = layers.conv2d(net, 4096, [1, 1], scope='fc7') 120 net = layers_lib.dropout( 121 net, dropout_keep_prob, is_training=is_training, scope='dropout7') 122 net = layers.conv2d( 123 net, 124 num_classes, [1, 1], 125 activation_fn=None, 126 normalizer_fn=None, 127 biases_initializer=init_ops.zeros_initializer(), 128 scope='fc8') 129 130 # Convert end_points_collection into a end_point dict. 131 end_points = utils.convert_collection_to_dict(end_points_collection) 132 if spatial_squeeze: 133 net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') 134 end_points[sc.name + '/fc8'] = net 135 return net, end_points 136 137 138 alexnet_v2.default_image_size = 224 139