# import libraries
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.datasets import imdb
from tensorflow.keras import models, layers, optimizers, backend
5 IMDB
# load dataset
= 10000
num_words
= imdb.load_data(num_words = num_words)
(train_data, train_labels), (test_data, test_labels)
train_data.shape, train_labels.shape, test_data.shape, test_labels.shape
# preprocess
= np.zeros(shape = (len(train_data), num_words), dtype = float)
X_train = np.zeros(shape = (len(test_data), num_words), dtype = float)
X_test
for i, seq in enumerate(train_data):
= 1.
X_train[i, seq]
for i, seq in enumerate(test_data):
= 1.
X_test[i, seq]
= train_labels.astype(float)
y_train = test_labels.astype(float)
y_test
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
= X_train[:12500]
partial_X_train = y_train[:12500]
partial_y_train = X_train[12500:]
X_val = y_train[12500:] y_val
def explore(X_train,
y_train,
X_val,
y_val,
n_units,
n_layers,
activation,
learning_rate,
momentum):
# define ann architecture
= models.Sequential()
model for i in range(n_layers):
= activation))
model.add(layers.Dense(n_units, activation 1, activation = "sigmoid"))
model.add(layers.Dense(
# define optimizer, loss function, and metrics
= optimizers.RMSprop(learning_rate = learning_rate, momentum = momentum)
optimizer
# train ann model
= (10000,))
model.build(input_shape compile(optimizer = optimizer, loss = "binary_crossentropy", metrics = ["accuracy"])
model.= 20, batch_size = 64, verbose = 0)
model.fit(X_train, y_train, epochs
# evaluate ann model
= model.evaluate(X_val, y_val, verbose = 0)
val_loss, val_acc
return val_loss, val_acc
# set hyperparameters
= np.logspace(-2, -4, 5)
learning_rate_list = np.linspace(0.1, 0.9, 5)
momentum_list = [32, 64]
n_unit_list = [1, 3]
n_hidden_layer_list = ["relu", "tanh"]
activation_list
= []
param_list for learning_rate in learning_rate_list:
for momentum in momentum_list:
for n_units in n_unit_list:
for n_layers in n_hidden_layer_list:
for activation in activation_list:
param_list.append({"learning_rate": learning_rate,
"momentum": momentum,
"n_units": n_units,
"n_layers": n_layers,
"activation": activation
})
= []
results for params in param_list:
= explore(
val_loss, val_acc
partial_X_train,
partial_y_train,
X_val,
y_val,= params["n_units"],
n_units = params["n_hidden_layer"],
n_hidden_layer = params["activation"],
activation = params["learning_rate"],
learning_rate = params["momentum"],
momentum
)
"val_loss": val_loss,
results.append({"val_acc": val_acc,
"params": params})
backend.clear_session()
# get optimal parameters
= [result["val_acc"] for result in results]
val_accuracies = results[np.argmax(val_accuracies)]["params"]
opt_params
opt_params
# define ann architecture
= models.Sequential()
model for i in range(opt_params["n_layers"]):
"n_units"], activation = opt_params["activation"]))
model.add(layers.Dense(opt_params[1, activation = "sigmoid"))
model.add(layers.Dense(
# define optimizer, loss function, and metrics
= optimizers.RMSprop(learning_rate = opt_params["learning_rate"],
optimizer = opt_params["momentum"])
momentum
# train ann model
= (10000,))
model.build(input_shape compile(optimizer = optimizer, loss = "binary_crossentropy", metrics = ["accuracy"])
model.
= model.fit(X_train, y_train, epochs = 20, batch_size = 64, verbose = 0) history
= history['loss']
loss
= range(1, len(loss) + 1)
epochs
= 'bo'
blue_dots = 'b'
solid_blue_line
= 'Training loss')
plt.plot(epochs, loss, solid_blue_line, label 'Training loss')
plt.title('Epochs')
plt.xlabel('Loss')
plt.ylabel(
plt.legend()
plt.show()
= history['accuracy']
accuracy
= range(1, len(accuracy) + 1)
epochs
= 'bo'
blue_dots = 'b'
solid_blue_line
= 'Training accuracy')
plt.plot(epochs, accuracy, solid_blue_line, label 'Training accuracy')
plt.title('Epochs')
plt.xlabel('accuracy')
plt.ylabel(
plt.legend()
plt.show()
model.evaluate(X_test, y_test)