# Load the pandas libraryimport pandas as pd# Load numpy for array manipulationimport numpy as np# Load seaborn plotting libraryimport seaborn as snsimport matplotlib.pyplot as pltimport tensorflow as tffrom tensorflow import kerasfrom tensorflow.keras import layers# Set font sizes in plotssns.set(font_scale =1.2)# Display all columnspd.set_option('display.max_columns', None)# Drop rows with NaNs (not consistent as other workflows!!!)Heart = pd.read_csv("../../data/Heart.csv").drop(['Unnamed: 0'], axis =1).dropna()Heart['AHD'] = Heart['AHD'] =='Yes'Heart
Age Sex ChestPain RestBP Chol \
count 297.000000 297.000000 297 297.000000 297.000000
unique NaN NaN 4 NaN NaN
top NaN NaN asymptomatic NaN NaN
freq NaN NaN 142 NaN NaN
mean 54.542088 0.676768 NaN 131.693603 247.350168
std 9.049736 0.468500 NaN 17.762806 51.997583
min 29.000000 0.000000 NaN 94.000000 126.000000
25% 48.000000 0.000000 NaN 120.000000 211.000000
50% 56.000000 1.000000 NaN 130.000000 243.000000
75% 61.000000 1.000000 NaN 140.000000 276.000000
max 77.000000 1.000000 NaN 200.000000 564.000000
Fbs RestECG MaxHR ExAng Oldpeak \
count 297.000000 297.000000 297.000000 297.000000 297.000000
unique NaN NaN NaN NaN NaN
top NaN NaN NaN NaN NaN
freq NaN NaN NaN NaN NaN
mean 0.144781 0.996633 149.599327 0.326599 1.055556
std 0.352474 0.994914 22.941562 0.469761 1.166123
min 0.000000 0.000000 71.000000 0.000000 0.000000
25% 0.000000 0.000000 133.000000 0.000000 0.000000
50% 0.000000 1.000000 153.000000 0.000000 0.800000
75% 0.000000 2.000000 166.000000 1.000000 1.600000
max 1.000000 2.000000 202.000000 1.000000 6.200000
Slope Ca Thal AHD
count 297.000000 297.000000 297 297
unique NaN NaN 3 2
top NaN NaN normal False
freq NaN NaN 164 160
mean 1.602694 0.676768 NaN NaN
std 0.618187 0.938965 NaN NaN
min 1.000000 0.000000 NaN NaN
25% 1.000000 0.000000 NaN NaN
50% 2.000000 0.000000 NaN NaN
75% 2.000000 1.000000 NaN NaN
max 3.000000 3.000000 NaN NaN
Below, we define 3 utility functions to do the operations:
encode_numerical_feature to apply featurewise normalization to numerical features.
encode_string_categorical_feature to first turn string inputs into integer indices, then one-hot encode these integer indices.
encode_integer_categorical_feature to one-hot encode integer categorical features.
from tensorflow.keras.layers import IntegerLookupfrom tensorflow.keras.layers import Normalizationfrom tensorflow.keras.layers import StringLookupdef encode_numerical_feature(feature, name, dataset):# Create a Normalization layer for our feature normalizer = Normalization()# Prepare a Dataset that only yields our feature feature_ds = dataset.map(lambda x, y: x[name]) feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))# Learn the statistics of the data normalizer.adapt(feature_ds)# Normalize the input feature encoded_feature = normalizer(feature)return encoded_featuredef encode_categorical_feature(feature, name, dataset, is_string): lookup_class = StringLookup if is_string else IntegerLookup# Create a lookup layer which will turn strings into integer indices lookup = lookup_class(output_mode="binary")# Prepare a Dataset that only yields our feature feature_ds = dataset.map(lambda x, y: x[name]) feature_ds = feature_ds.map(lambda x: tf.expand_dims(x, -1))# Learn the set of possible string values and assign them a fixed integer index lookup.adapt(feature_ds)# Turn the string input into integer indices encoded_feature = lookup(feature)return encoded_feature
import keras_tunerdef build_model(hp): x = layers.Dense(# Tune number of units. units = hp.Int("units", min_value =16, max_value =48, step=16),# Tune the activation function to use. activation = hp.Choice("activation", ["relu", "tanh"]) )(all_features)# Tune whether to use dropoutif hp.Boolean("dropout"): x = layers.Dropout(0.5)(x) output = layers.Dense(1, activation ="sigmoid")(x) model = keras.Model(all_inputs, output) model.compile( optimizer = keras.optimizers.Adam( learning_rate = hp.Float("lr", min_value =1e-4, max_value =1e-2, sampling="log") ), loss ="binary_crossentropy", metrics = ["accuracy"], )return modelbuild_model(keras_tuner.HyperParameters())
<keras.engine.functional.Functional object at 0x7fe61fa00760>
7 Start the search
After defining the search space, we need to select a tuner class to run the search. We may choose from RandomSearch, BayesianOptimization and Hyperband, which correspond to different tuning algorithms. Here we use RandomSearch as an example.
To initialize the tuner, we need to specify several arguments in the initializer.
hypermodel. The model-building function, which is build_model in our case.
objective. The name of the objective to optimize (whether to minimize or maximize is automatically inferred for built-in metrics). We will introduce how to use custom metrics later in this tutorial.
max_trials. The total number of trials to run during the search.
executions_per_trial. The number of models that should be built and fit for each trial. Different trials have different hyperparameter values. The executions within the same trial have the same hyperparameter values. The purpose of having multiple executions per trial is to reduce results variance and therefore be able to more accurately assess the performance of a model. If you want to get results faster, you could set executions_per_trial=1 (single round of training for each model configuration).
overwrite. Control whether to overwrite the previous results in the same directory or resume the previous search instead. Here we set overwrite=True to start a new search and ignore any previous results.
directory. A path to a directory for storing the search results.
project_name. The name of the sub-directory in the directory.
# Get the top 2 models.models = tuner.get_best_models(num_models =2)best_model = models[0]# Build the model.# Needed for `Sequential` without specified `input_shape`.best_model.build(input_shape = (None, 13,))best_model.summary()