Trains a ResNet on the CIFAR10 small images dataset

Sumber: https://github.com/keras-team/keras/blob/master/examples/cifar10_resnet.py

"""

Trains a ResNet on the CIFAR10 dataset.

ResNet v1: [Deep Residual Learning for Image Recognition ](https://arxiv.org/pdf/1512.03385.pdf) ResNet v2: [Identity Mappings in Deep Residual Networks ](https://arxiv.org/pdf/1603.05027.pdf) Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti

------------|--:|-------:|-----------------------:|---:

ResNet20 v1| 3| 92.16 %| 91.25 %|35 ResNet32 v1| 5| 92.46 %| 92.49 %|50 ResNet44 v1| 7| 92.50 %| 92.83 %|70 ResNet56 v1| 9| 92.71 %| 93.03 %|90 ResNet110 v1| 18| 92.65 %| 93.39+-.16 %|165 ResNet164 v1| 27| - %| 94.07 %| - ResNet1001 v1|N/A| - %| 92.39 %| - Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti

------------|--:|-------:|-----------------------:|---:

ResNet20 v2| 2| - %| - %|--- ResNet32 v2|N/A| NA %| NA %| NA ResNet44 v2|N/A| NA %| NA %| NA ResNet56 v2| 6| 93.01 %| NA %|100 ResNet110 v2| 12| 93.15 %| 93.63 %|180 ResNet164 v2| 18| - %| 94.54 %| - ResNet1001 v2|111| - %| 95.08+-.14 %| - """

from __future__ import print_function import keras from keras.layers import Dense, Conv2D, BatchNormalization, Activation from keras.layers import AveragePooling2D, Input, Flatten from keras.optimizers import Adam from keras.callbacks import ModelCheckpoint, LearningRateScheduler from keras.callbacks import ReduceLROnPlateau from keras.preprocessing.image import ImageDataGenerator from keras.regularizers import l2 from keras import backend as K from keras.models import Model from keras.datasets import cifar10 import numpy as np import os

Training parameters

batch_size = 32 # orig paper trained all networks with batch_size=128 epochs = 200 data_augmentation = True num_classes = 10

Subtracting pixel mean improves accuracy

subtract_pixel_mean = True

Model parameter
----------------------------------------------------------------------------
| | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch
Model | n | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti
|v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2)
----------------------------------------------------------------------------
ResNet20 | 3 (2)| 92.16 | 91.25 | ----- | ----- | 35 (---)
ResNet32 | 5(NA)| 92.46 | 92.49 | NA | NA | 50 ( NA)
ResNet44 | 7(NA)| 92.50 | 92.83 | NA | NA | 70 ( NA)
ResNet56 | 9 (6)| 92.71 | 93.03 | 93.01 | NA | 90 (100)
ResNet110 |18(12)| 92.65 | 93.39+-.16| 93.15 | 93.63 | 165(180)
ResNet164 |27(18)| ----- | 94.07 | ----- | 94.54 | ---(---)
ResNet1001| (111)| ----- | 92.39 | ----- | 95.08+-.14| ---(---)
---------------------------------------------------------------------------

n = 3

Model version
Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2)

version = 1

Computed depth from supplied model parameter n

if version == 1:

   depth = n * 6 + 2

elif version == 2:

   depth = n * 9 + 2

Model name, depth and version

model_type = 'ResNet%dv%d' % (depth, version)

Load the CIFAR10 data.

(x_train, y_train), (x_test, y_test) = cifar10.load_data()

Input image dimensions.

input_shape = x_train.shape[1:]

Normalize data.

x_train = x_train.astype('float32') / 255 x_test = x_test.astype('float32') / 255

If subtract pixel mean is enabled

if subtract_pixel_mean:

   x_train_mean = np.mean(x_train, axis=0)
   x_train -= x_train_mean
   x_test -= x_train_mean

print('x_train shape:', x_train.shape) print(x_train.shape[0], 'train samples') print(x_test.shape[0], 'test samples') print('y_train shape:', y_train.shape)

Convert class vectors to binary class matrices.

y_train = keras.utils.to_categorical(y_train, num_classes) y_test = keras.utils.to_categorical(y_test, num_classes)

def lr_schedule(epoch):

   """Learning Rate Schedule
   Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs.
   Called automatically every epoch as part of callbacks during training.
   # Arguments
       epoch (int): The number of epochs
   # Returns
       lr (float32): learning rate
   """
   lr = 1e-3
   if epoch > 180:
       lr *= 0.5e-3
   elif epoch > 160:
       lr *= 1e-3
   elif epoch > 120:
       lr *= 1e-2
   elif epoch > 80:
       lr *= 1e-1
   print('Learning rate: ', lr)
   return lr

def resnet_layer(inputs,

                num_filters=16,
                kernel_size=3,
                strides=1,
                activation='relu',
                batch_normalization=True,
                conv_first=True):
   """2D Convolution-Batch Normalization-Activation stack builder
   # Arguments
       inputs (tensor): input tensor from input image or previous layer
       num_filters (int): Conv2D number of filters
       kernel_size (int): Conv2D square kernel dimensions
       strides (int): Conv2D square stride dimensions
       activation (string): activation name
       batch_normalization (bool): whether to include batch normalization
       conv_first (bool): conv-bn-activation (True) or
           bn-activation-conv (False)
   # Returns
       x (tensor): tensor as input to the next layer
   """
   conv = Conv2D(num_filters,
                 kernel_size=kernel_size,
                 strides=strides,
                 padding='same',
                 kernel_initializer='he_normal',
                 kernel_regularizer=l2(1e-4))

   x = inputs
   if conv_first:
       x = conv(x)
       if batch_normalization:
           x = BatchNormalization()(x)
       if activation is not None:
           x = Activation(activation)(x)
   else:
       if batch_normalization:
           x = BatchNormalization()(x)
       if activation is not None:
           x = Activation(activation)(x)
       x = conv(x)
   return x

def resnet_v1(input_shape, depth, num_classes=10):

   """ResNet Version 1 Model builder [a]
   Stacks of 2 x (3 x 3) Conv2D-BN-ReLU
   Last ReLU is after the shortcut connection.
   At the beginning of each stage, the feature map size is halved (downsampled)
   by a convolutional layer with strides=2, while the number of filters is
   doubled. Within each stage, the layers have the same number filters and the
   same number of filters.
   Features maps sizes:
   stage 0: 32x32, 16
   stage 1: 16x16, 32
   stage 2:  8x8,  64
   The Number of parameters is approx the same as Table 6 of [a]:
   ResNet20 0.27M
   ResNet32 0.46M
   ResNet44 0.66M
   ResNet56 0.85M
   ResNet110 1.7M
   # Arguments
       input_shape (tensor): shape of input image tensor
       depth (int): number of core convolutional layers
       num_classes (int): number of classes (CIFAR10 has 10)
   # Returns
       model (Model): Keras model instance
   """
   if (depth - 2) % 6 != 0:
       raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
   # Start model definition.
   num_filters = 16
   num_res_blocks = int((depth - 2) / 6)

   inputs = Input(shape=input_shape)
   x = resnet_layer(inputs=inputs)
   # Instantiate the stack of residual units
   for stack in range(3):
       for res_block in range(num_res_blocks):
           strides = 1
           if stack > 0 and res_block == 0:  # first layer but not first stack
               strides = 2  # downsample
           y = resnet_layer(inputs=x,
                            num_filters=num_filters,
                            strides=strides)
           y = resnet_layer(inputs=y,
                            num_filters=num_filters,
                            activation=None)
           if stack > 0 and res_block == 0:  # first layer but not first stack
               # linear projection residual shortcut connection to match
               # changed dims
               x = resnet_layer(inputs=x,
                                num_filters=num_filters,
                                kernel_size=1,
                                strides=strides,
                                activation=None,
                                batch_normalization=False)
           x = keras.layers.add([x, y])
           x = Activation('relu')(x)
       num_filters *= 2

   # Add classifier on top.
   # v1 does not use BN after last shortcut connection-ReLU
   x = AveragePooling2D(pool_size=8)(x)
   y = Flatten()(x)
   outputs = Dense(num_classes,
                   activation='softmax',
                   kernel_initializer='he_normal')(y)

   # Instantiate model.
   model = Model(inputs=inputs, outputs=outputs)
   return model

def resnet_v2(input_shape, depth, num_classes=10):

   """ResNet Version 2 Model builder [b]
   Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as
   bottleneck layer
   First shortcut connection per layer is 1 x 1 Conv2D.
   Second and onwards shortcut connection is identity.
   At the beginning of each stage, the feature map size is halved (downsampled)
   by a convolutional layer with strides=2, while the number of filter maps is
   doubled. Within each stage, the layers have the same number filters and the
   same filter map sizes.
   Features maps sizes:
   conv1  : 32x32,  16
   stage 0: 32x32,  64
   stage 1: 16x16, 128
   stage 2:  8x8,  256
   # Arguments
       input_shape (tensor): shape of input image tensor
       depth (int): number of core convolutional layers
       num_classes (int): number of classes (CIFAR10 has 10)
   # Returns
       model (Model): Keras model instance
   """
   if (depth - 2) % 9 != 0:
       raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])')
   # Start model definition.
   num_filters_in = 16
   num_res_blocks = int((depth - 2) / 9)

   inputs = Input(shape=input_shape)
   # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths
   x = resnet_layer(inputs=inputs,
                    num_filters=num_filters_in,
                    conv_first=True)

   # Instantiate the stack of residual units
   for stage in range(3):
       for res_block in range(num_res_blocks):
           activation = 'relu'
           batch_normalization = True
           strides = 1
           if stage == 0:
               num_filters_out = num_filters_in * 4
               if res_block == 0:  # first layer and first stage
                   activation = None
                   batch_normalization = False
           else:
               num_filters_out = num_filters_in * 2
               if res_block == 0:  # first layer but not first stage
                   strides = 2    # downsample

           # bottleneck residual unit
           y = resnet_layer(inputs=x,
                            num_filters=num_filters_in,
                            kernel_size=1,
                            strides=strides,
                            activation=activation,
                            batch_normalization=batch_normalization,
                            conv_first=False)
           y = resnet_layer(inputs=y,
                            num_filters=num_filters_in,
                            conv_first=False)
           y = resnet_layer(inputs=y,
                            num_filters=num_filters_out,
                            kernel_size=1,
                            conv_first=False)
           if res_block == 0:
               # linear projection residual shortcut connection to match
               # changed dims
               x = resnet_layer(inputs=x,
                                num_filters=num_filters_out,
                                kernel_size=1,
                                strides=strides,
                                activation=None,
                                batch_normalization=False)
           x = keras.layers.add([x, y])

       num_filters_in = num_filters_out

   # Add classifier on top.
   # v2 has BN-ReLU before Pooling
   x = BatchNormalization()(x)
   x = Activation('relu')(x)
   x = AveragePooling2D(pool_size=8)(x)
   y = Flatten()(x)
   outputs = Dense(num_classes,
                   activation='softmax',
                   kernel_initializer='he_normal')(y)

   # Instantiate model.
   model = Model(inputs=inputs, outputs=outputs)
   return model

if version == 2:

   model = resnet_v2(input_shape=input_shape, depth=depth)

else:

   model = resnet_v1(input_shape=input_shape, depth=depth)

model.compile(loss='categorical_crossentropy',

             optimizer=Adam(lr=lr_schedule(0)),
             metrics=['accuracy'])

model.summary() print(model_type)

Prepare model model saving directory.

save_dir = os.path.join(os.getcwd(), 'saved_models') model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type if not os.path.isdir(save_dir):

   os.makedirs(save_dir)

filepath = os.path.join(save_dir, model_name)

Prepare callbacks for model saving and for learning rate adjustment.

checkpoint = ModelCheckpoint(filepath=filepath,

                            monitor='val_acc',
                            verbose=1,
                            save_best_only=True)

lr_scheduler = LearningRateScheduler(lr_schedule)

lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),

                              cooldown=0,
                              patience=5,
                              min_lr=0.5e-6)

callbacks = [checkpoint, lr_reducer, lr_scheduler]

Run training, with or without data augmentation.

if not data_augmentation:

   print('Not using data augmentation.')
   model.fit(x_train, y_train,
             batch_size=batch_size,
             epochs=epochs,
             validation_data=(x_test, y_test),
             shuffle=True,
             callbacks=callbacks)

else:

   print('Using real-time data augmentation.')
   # This will do preprocessing and realtime data augmentation:
   datagen = ImageDataGenerator(
       # set input mean to 0 over the dataset
       featurewise_center=False,
       # set each sample mean to 0
       samplewise_center=False,
       # divide inputs by std of dataset
       featurewise_std_normalization=False,
       # divide each input by its std
       samplewise_std_normalization=False,
       # apply ZCA whitening
       zca_whitening=False,
       # epsilon for ZCA whitening
       zca_epsilon=1e-06,
       # randomly rotate images in the range (deg 0 to 180)
       rotation_range=0,
       # randomly shift images horizontally
       width_shift_range=0.1,
       # randomly shift images vertically
       height_shift_range=0.1,
       # set range for random shear
       shear_range=0.,
       # set range for random zoom
       zoom_range=0.,
       # set range for random channel shifts
       channel_shift_range=0.,
       # set mode for filling points outside the input boundaries
       fill_mode='nearest',
       # value used for fill_mode = "constant"
       cval=0.,
       # randomly flip images
       horizontal_flip=True,
       # randomly flip images
       vertical_flip=False,
       # set rescaling factor (applied before any other transformation)
       rescale=None,
       # set function that will be applied on each input
       preprocessing_function=None,
       # image data format, either "channels_first" or "channels_last"
       data_format=None,
       # fraction of images reserved for validation (strictly between 0 and 1)
       validation_split=0.0)

   # Compute quantities required for featurewise normalization
   # (std, mean, and principal components if ZCA whitening is applied).
   datagen.fit(x_train)

   # Fit the model on the batches generated by datagen.flow().
   model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                       validation_data=(x_test, y_test),
                       epochs=epochs, verbose=1, workers=4,
                       callbacks=callbacks)

Score trained model.

scores = model.evaluate(x_test, y_test, verbose=1) print('Test loss:', scores[0]) print('Test accuracy:', scores[1])

Pranala Menarik

Keras

Trains a ResNet on the CIFAR10 small images dataset

Pranala Menarik

Navigation menu

Search