# import libraries
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.datasets import reuters
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import models, layers, optimizers, backend6 Reuters
# load dataset
num_words = 10000
(train_data, train_labels,), (test_data, test_labels) = reuters.load_data(num_words = num_words)
train_data.shape, train_labels.shape, test_data.shape, test_labels.shapeseq_len = 300 # the avg is 145.54
X_train = [seq[:seq_len] for seq in train_data]
X_train = [np.append([0] * (seq_len - len(seq)), seq) for seq in X_train]
X_train = np.array(X_train).astype(int)
y_train = to_categorical(train_labels)
X_test = [seq[:seq_len] for seq in test_data]
X_test = [np.append([0] * (seq_len - len(seq)), seq) for seq in X_test]
X_test = np.array(X_test).astype(int)
y_test = to_categorical(test_labels)
X_train.shape, y_train.shape, X_test.shape, y_test.shapepartial_X_train = X_train[:4500]
partial_y_train = y_train[:4500]
X_val = X_train[4500:]
y_val = y_train[4500:]def explore(X_train,
y_train,
X_val,
y_val,
embedding_dim,
learning_rate,
momentum):
# define ann architecture
model = models.Sequential()
model.add(layers.Embedding(num_words, embedding_dim, input_length = seq_len))
model.add(layers.Dense(64, activation = "relu"))
model.add(layers.Dense(46, activation = "sigmoid"))
# define optimizer, loss function, and metrics
optimizer = optimizers.RMSprop(learning_rate = learning_rate, momentum = momentum)
# train ann model
model.compile(optimizer = optimizer, loss = "categorical_crossentropy", metrics = ["accuracy"])
model.fit(X_train, y_train, epochs = 20, batch_size = 64, verbose = 0)
# evaluate ann model
val_loss, val_acc = model.evaluate(X_val, y_val, verbose = 0)
return val_loss, val_acc# set hyperparameters
learning_rate_list = np.logspace(-2, -4, 5)
momentum_list = np.linspace(0.1, 0.9, 5)
embedding_dim_list = 2 ** np.arange(3, 7)
param_list = []
for learning_rate in learning_rate_list:
for momentum in momentum_list:
for embedding_dim in embedding_dim_list:
param_list.append({
"learning_rate": learning_rate,
"momentum": momentum,
"embedding_dim": embedding_dim
})results = []
for params in param_list:
val_loss, val_acc = explore(
partial_X_train,
partial_y_train,
X_val,
y_val,
embedding_dim = params["embedding_dim"],
learning_rate = params["learning_rate"],
momentum = params["momentum"],
)
results.append({"val_loss": val_loss,
"val_acc": val_acc,
"params": params})
backend.clear_session()# get optimal parameters
val_accuracies = [result["val_acc"] for result in results]
opt_params = results[np.argmax(val_accuracies)]["params"]
opt_params# define ann architecture
model = models.Sequential()
for i in range(opt_params["n_layers"]):
model.add(layers.Dense(opt_params["n_units"], activation = opt_params["activation"]))
model.add(layers.Dense(1, activation = "sigmoid"))
# define optimizer, loss function, and metrics
optimizer = optimizers.RMSprop(learning_rate = opt_params["learning_rate"],
momentum = opt_params["momentum"])
# train ann model
model.build(input_shape = (10000,))
model.compile(optimizer = optimizer, loss = "binary_crossentropy", metrics = ["accuracy"])
history = model.fit(X_train, y_train, epochs = 20, batch_size = 64, verbose = 0)loss = history['loss']
epochs = range(1, len(loss) + 1)
blue_dots = 'bo'
solid_blue_line = 'b'
plt.plot(epochs, loss, solid_blue_line, label = 'Training loss')
plt.title('Training loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()accuracy = history['accuracy']
epochs = range(1, len(accuracy) + 1)
blue_dots = 'bo'
solid_blue_line = 'b'
plt.plot(epochs, accuracy, solid_blue_line, label = 'Training accuracy')
plt.title('Training accuracy')
plt.xlabel('Epochs')
plt.ylabel('accuracy')
plt.legend()
plt.show()model.evaluate(X_test, y_test)