ECE471- Midterm Project Solved

Your shopping cart is empty.

(Understanding intermediate layers using linear classifier probes) Images to Replicate: Figures 5 and 8

Figure 5:

Information for Figure 5:

For this part of the assignment, we had to replicate figure 5 which uses the MNIST convolutional model given in tensorflow/models/image/mnist/convolutional.py.           In this figure, the test prediction error is plotted at the beginning and end of training for a probe inserted at each layer. Things to note: For Figure 5a, there is a decrease in the first couple of layers as the first ReLU has a big impact. For Figure 5b, there prediction error mostly decreases at every layer.

Code for Figure 5:

import tensorflow as tf from tensorflow.keras import Model, Sequential, layers from tensorflow.keras.layers import Conv2D, Flatten, Dense, ReLU, MaxPool2D, Softmax, Dropout from tensorflow.keras.regularizers import l2 import numpy as np import matplotlib matplotlib.use('tkagg') import matplotlib.pyplot as plt import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import logging tf.get_logger().setLevel(logging.ERROR)

IMAGE_DIM = 28

NUM_CHANNELS = 1

NUM_LABELS = 10

NUM_EPOCHS = 10

BATCH_SIZE = 512

mnist = tf.keras.datasets.mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()

rand_index = np.arange(50000) np.random.shuffle(rand_index) x_train = x_train[rand_index] y_train = y_train[rand_index] rand_index = np.arange(10000) np.random.shuffle(rand_index) x_test = x_test[rand_index] y_test = y_test[rand_index]

x_test = x_test.reshape(x_test.shape[0], IMAGE_DIM, IMAGE_DIM, NUM_CHANNELS).astype('float32')/255.0 x_train = x_train.reshape(x_train.shape[0], IMAGE_DIM, IMAGE_DIM, NUM_CHANNELS).astype('float32')/255.0 XTRAIN_LENGTH = len(x_train)

probe_layer = {0:"input", 1:"conv1_preact", 2:"conv1_postact", 3:"conv1_postpool", 4:"conv2_preact", 5:"conv2_postact", 6:"conv2_postpool", 8:"fc1_preact", 9:"fc1_postact", 10:"logits"}

probe_for_graph = ["input", "conv1_preact", "conv1_postact", "conv1_postpool", "conv2_preact", "conv2_postact", "conv2_postpool", "fc1_preact", "fc1_postact", "logits"]

class linearClassifier(layers.Layer): def __init__(self):

                   super(linearClassifier, self).__init__()      self.f1 = Flatten()     self.d1 = Dense(NUM_LABELS)

def call(self, x):

                   return self.d1(self.f1(x))

class MyModel(Model):

def __init__(self):

                   super(MyModel, self).__init__()

                   # list of layers           self.my_layers = []

                   # the i-th entry represents a probe inserted before the i-th layer              self.probes = {}

                   # index of probe being trained                  self.probe_layer_num = -1

                   self.add_probe(0)

                   self.my_layers.append(Conv2D(32, [5, 5], strides=(1, 1), padding='same'))            self.add_probe(1)

                   self.my_layers.append(ReLU())               self.add_probe(2)

                   self.my_layers.append(MaxPool2D(pool_size=(2, 2), padding='same'))                  self.add_probe(3)

                   self.my_layers.append(Conv2D(64, [5, 5], strides=(1, 1), padding='same'))            self.add_probe(4)

                   self.my_layers.append(ReLU())               self.add_probe(5)

                   self.my_layers.append(MaxPool2D(pool_size=(2, 2), padding='same'))                  self.add_probe(6)

                   self.my_layers.append(Flatten())              self.my_layers.append(Dense(512, kernel_regularizer=l2(5e-4), \ bias_regularizer=l2(5e-4)))      self.add_probe(8)

                   self.my_layers.append(ReLU())               self.d1 = Dropout(0.5)              self.add_probe(9)

                   self.my_layers.append(Dense(NUM_LABELS, kernel_regularizer=l2(5e-4), \         bias_regularizer=l2(5e-4)))      self.add_probe(10)

def add_probe(self, key):

                   self.probes[key] = linearClassifier()

def call(self, x):

                   if self.probe_layer_num == -1: # for network training                for (i, layer) in enumerate(self.my_layers):               if i == 9:              x = self.d1(x)            x = layer(x)               return x

                   else: # for probe training          for layer in self.my_layers[0:self.probe_layer_num]:

                   x = layer(x)               x = tf.stop_gradient(x)              probe = self.probes[self.probe_layer_num]              return probe(x)

model = MyModel()

# Optimizer for probes

# optim_probe = tf.keras.optimizers.RMSprop(learning_rate=0.01, decay=0.9, momentum=0.9, \

#                epsilon=1e-6, centered=True)

def train_probes(weights):

probe_errors = []

# early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, \

            #                restore_best_weights=True)

# We previously implemented early stopping, but the results were not as pleasant, # so we resulted to training for many many epochs

for probe_layer_num in model.probes.keys():

                   model.reset_metrics()              model.probe_layer_num = probe_layer_num          model.compile(optimizer=optimizer, loss=loss_object, metrics=['accuracy'])                 model.set_weights(weights)

                   model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=500, \       verbose=2, validation_split=1/6)               test_loss, test_accuracy = model.evaluate(x_test, y_test, verbose=2)               probe_errors.append(1 - test_accuracy)                   print("Error for probe ", probe_layer[probe_layer_num], ":", 1 - test_accuracy)

return probe_errors

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(0.01, XTRAIN_LENGTH, 0.95, staircase=True) optimizer = tf.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9) loss_object = tf.losses.SparseCategoricalCrossentropy(from_logits=True) model.run_eagerly = True

# compile model and save initial weights

model.compile(optimizer=optimizer, loss=loss_object, metrics=['accuracy']) for probe_layer_num in model.probes.keys(): model.probe_layer_num = probe_layer_num model(x_train[0:BATCH_SIZE])

weights = model.get_weights()

# train the probes with pre-trained weights probe_errors = train_probes(weights) plt.figure(figsize=(20,10)) index = range(len(probe_for_graph)) plt.plot(index, probe_errors) plt.xticks(index, probe_for_graph, rotation=20) axes = plt.gca() axes.set(ylim=(0,0.1)) plt.ylabel("test prediction error") plt.title("Figure 5a") plt.show()

#train model and save weights model.probe_layer_num = -1 model.compile(optimizer=optimizer, loss=loss_object, metrics=['accuracy']) model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, verbose=2) weights = model.get_weights()

# train the probes with post-trained weights probe_errors_trained = train_probes(weights) plt.figure(figsize=(20,10)) index = range(len(probe_for_graph)) plt.plot(index, probe_errors_trained) plt.xticks(index, probe_for_graph, rotation=20) axes = plt.gca() axes.set(ylim=(0,0.1)) plt.ylabel("test prediction error") plt.title("Figure 5b") plt.show()

# fig1.savefig('midterm_figure1.png')

# fig2.savefig('midterm_figure2.png')

Obtained Figures for Figure 5:

Note that the two figures above are from different runs of the program - the best figures were chosen. With different optimization and more time for us to train, we believe the figures will appear more like that in the research paper, as shown below:



Figure 8:

Information for Figure 8:

For this part of the assignment, we had to replicate figure 8. In this figure is a model with 128 layers with a skip connection from layer 0 to layer 64. The figure visualizes a probe at every layer to see how well each layer would perform if its values were used as a linear classifier. The probes allow us to observe how the first 64 layers are ignored even after copious training.

Code for Figure 8:

import tensorflow as tf from tensorflow.keras import Model, layers from tensorflow.keras.layers import Flatten, Dense, Concatenate from tensorflow.keras.datasets import mnist import sys import numpy as np import matplotlib.pyplot as plt import os os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import logging tf.get_logger().setLevel(logging.ERROR)

NUM_LABELS = 10

SKIP_LAYER = 64

LAYER_COUNT = 128

BATCH_SIZE = 512

(x_train, y_train),(x_test, y_test) = mnist.load_data() x_test = x_test.astype('float32')/255.0 x_train = x_train.astype('float32')/255.0

XTRAIN_LENGTH = len(x_train)

class MyModel(Model):

def __init__(self):

                   super(MyModel, self).__init__()

                   # applied at the first layer        self.f1 = Flatten()

                   # list of layers           self.my_layers = []

                   # applied before 64th layer      self.c1 = Concatenate()

                   # the i-th entry represents a probe inserted before the i-th layer

                   # Each probe is basically a dense layer self.probes = []

                   # index of probe being trained                  self.probe_num = -1

                   # Pathologically deep model with

                   # 128 fully-connected layers & 128 hidden units     # activation function is leaky ReLU          lrelu = lambda x: tf.keras.activations.relu(x, alpha=0.01)    for _ in range(LAYER_COUNT):

                   self.my_layers.append(Dense(128, activation=lrelu))              self.probes.append(Dense(NUM_LABELS))

                   self.my_layers.append(Dense(NUM_LABELS))

def call(self, x):

                   x = self.f1(x)

                   r = x # residual / short-cut / skip connection             if self.probe_num == -1: # for network training        for (i, layer) in enumerate(self.my_layers):       if i == SKIP_LAYER:                 x = self.c1([x, r])       x = layer(x)               return x

                   else: # for probe training          for (i, layer) in enumerate(self.my_layers[0:self.probe_num]):                  if i == SKIP_LAYER: x = self.c1([x, r])       x = layer(x)               x = tf.stop_gradient(x)              probe = self.probes[self.probe_num]       return probe(x)

model = MyModel()

def train_probes(weights):

probe_errors = [] callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=2,verbose=1)

for probe_num in range(len(model.probes)):

                   optim_probe = tf.keras.optimizers.Adam()                model.reset_metrics()              model.probe_num = probe_num

                   model.compile(optimizer=optim_probe, loss=loss_object, metrics=['accuracy'])     model.set_weights(weights)

                   print("Starting Training and Evaluation of probe number: ", probe_num, flush=True)               model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=200, \          verbose=0, validation_split=1/6, callbacks=[callback])             _, test_accuracy = model.evaluate(x_test, y_test, verbose=0)

                   probe_errors.append(1 - test_accuracy)                   print("Probe Number:", probe_num, " Probe Error: ", 1 - test_accuracy, flush=True)

return probe_errors

lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(0.01,XTRAIN_LENGTH, 0.95, staircase=True) optimizer = tf.optimizers.SGD(learning_rate=lr_schedule, momentum=0.9) loss_object = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

# compile model and save initial weights model.compile(optimizer=optimizer, loss=loss_object, metrics=['accuracy']) model(x_train[0:BATCH_SIZE]) for probe_num in range(len(model.probes)):

model.probe_num = probe_num model(x_train[0:BATCH_SIZE])

weights = model.get_weights()

probe_errors = train_probes(weights)

print("Training Model for 500 minibatches", flush=True) model.probe_num = -1 model.compile(optimizer=optimizer, loss=loss_object, metrics=['accuracy']) model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=int(500*BATCH_SIZE/XTRAIN_LENGTH), verbose=0) weights = model.get_weights()

probe_errors_trained = train_probes(weights)

print("Training Model for 1500 more minibatches", flush=True) model.probe_num = -1 model.compile(optimizer=optimizer, loss=loss_object, metrics=['accuracy']) model.set_weights(weights) model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=int(1500*BATCH_SIZE/XTRAIN_LENGTH), verbose=0) weights = model.get_weights()

probe_errors_trained2 = train_probes(weights)

x = np.arange(1,LAYER_COUNT +1)

fig1 = plt.figure(figsize=(20,10)) plt.bar(x, probe_errors) plt.xlabel("Probes after 0 minibatches") plt.ylabel("Optimal Prediction Error") axes = plt.gca() axes.set_ylim([0.0,1.0])

# plt.show()

fig2 = plt.figure(figsize=(20,10)) plt.bar(x, probe_errors_trained) plt.xlabel("Probes after 500 minibatches") plt.ylabel("Optimal Prediction Error") axes = plt.gca() axes.set_ylim([0.0,1.0])

# plt.show()

fig3 = plt.figure(figsize=(20,10)) plt.bar(x, probe_errors_trained2) plt.xlabel("Probes after 2000 minibatches") plt.ylabel("Optimal Prediction Error") axes = plt.gca() axes.set_ylim([0.0,1.0])

# plt.show()

fig1.savefig('midterm_figure3_128.png') fig2.savefig('midterm_figure4_128.png') fig3.savefig('midterm_figure5_128.png')

Obtained Figures for Figure 8: Here are images for 10 layers:

Probes after 0 minibatches Probes after 500 minibatches Probes after 2000 minibatches Here are images for 30 layers:

            Probes after 0 minibatches                         Probes after 500 minibatches                    Probes after 2000 minibatches

The article’s figures shown below:

Shopping cart

US$0

ECE471- Midterm Project Solved

More products