$25
In [1]: import os, time, glob, socket
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random import csv
import pickle path=os.getcwd()
pd.set_option("display.max_columns", None)
import sys
import warnings
if not sys.warnoptions:
warnings.simplefilter("ignore")
In [2]: from sklearn.model_selection import train_test_split from sklearn.model_selection import GridSearchCV from sklearn.model_selection import cross_val_score from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score,accuracy_score
from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import plot_roc_curve,roc_curve from sklearn.metrics import average_precision_score, precision_recall_curve
from sklearn.metrics import auc, plot_precision_recall_curve
Read Data
In [3]: df = pd.read_csv("Data.csv")
print(df. shape)
df head()
(1400, 31)
Time
Vl
V2
V3
V4
V5
V6
V7
V8
V9
0
127459.0
-0.489190
0.783289
-1.659097
-1.366814
2.576846
3.513254
0.271305
1.304641
-0.238853
1
128505.0
1.293556
-1.302381
-2.241085
0.393974
0.680825
0.821662
0.501478
0.104208
0.111651
2
128393.0
-0.755894
0.121305
0.852314
-2.303416
-0.233670
-0.244191
-0.285440
0.424009
-1.072689
3
128738.0
2.038750
-0.159488
-1.096570
0.425224
-0.214944
-1.151940
0.107112
-0.250273
0.701067
4
140293.0
0.951025
3.252926
-5.039105
4.632411
3.014501
-1.349570
0.980940
-1.819539
-2.099049
In [4]:
In [5]:
Y=df.Class X=df.drop(['Class'], axis=1)
X_train, X_val, y_train, y_val = train_test_split(X, Y, test_size=0.2) print(X_train.shape)
print(y_train.shape) print(X_val.shape) print(y_val.shape)
(1120, 30)
(1120,)
(280, 30)
(280,)
Please construct a DNN for binary classification according to the cross-entropy error function
You should decide the following hyperparameters:
· number of hidden layers
· number of hidden units
· learning rate
· number of iterations
· mini-batch size
In [6]: from tensorflow import keras
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier from tensorflow.keras.layers import Dense, Activation
from tensorflow.keras import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.models import load_model
In [7]: import tensorflow
In [8]: def create_model(learning_rate,activations,units,num_hidden_layers): # create modeL
model = Sequential()# InitiaLize the constructor model.add(Dense(X_train.shape[1],input_dim=X_train.shape[1],kernel_initializer="rand om_normal",activation=activations))
for i in range(num_hidden_layers):
# Add one hidden Layer
model.add(Dense(units, activation=activations))
model.add(Dense(1, activation='sigmoid')) #output Layer
opt = tensorflow.keras.optimizers.Adam(learning_rate=learning_rate)
# CompiLe modeL
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) return model
Grid search
In [ ]: # create modeL
model = KerasClassifier(build_fn=create_model)
# define the grid search parameters
batch_size = [10, 20,50]
epochs = [10,30,50]
learning_rate = [0.0001,0.001, 0.01, 0.1, 0.2, 0.3] activations = ['softmax','sigmoid', 'relu']
units = [1, 5, 10, 15, 20,30]
num_hidden_layers=range(1,5)
param_grid = dict(batch_size=batch_size, epochs=epochs,learning_rate=learning_rate,activ ations=activations,units=units,num_hidden_layers=num_hidden_layers)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=10) grid_result = grid.fit(X_train, y_train)
In [27]: # summarize results
print("Best: %f using %s" V (grid_result best_score_, grid_result.best_params_))
Best: 0.923214 using {'activations': 'relu', 'batch_size': 20, 'epochs': 50, 'learning_ rate': 0.0001, 'num_hidden_layers': 2, 'units': 15}
In [9]: model = create_model(learning_rate= 0.0001,activations='relu',units=15,num_hidden_layers =2)
DNN_model = model.fit(X_train, y_train,validation_data=(X_val,y_val), epochs= 50, batch_size= 20, verbose=2)
y_dnn_pred=model.predict_classes(X_val).flatten() y_dnn_true=model.predict_classes(X_train).flatten()
Epoch 1/50
56/56 - 1s -
loss:
697.5054
- accuracy: 0.3357
- val_loss: 275.4269 - val_accuracy: 0.3
036
Epoch 2/50
56/56 - Os -
loss:
68.3296 -
accuracy:
0.5045
- val_loss:
2.8939
- val_accuracy:
0.6964
Epoch 3/50
56/56 - Os -
loss:
1.0350 -
accuracy:
0.6268 -
val_loss:
0.6033 -
val_accuracy:
0.7000
Epoch 4/50
56/56 - Os -
loss:
0.6031 -
accuracy:
0.7196 -
val_loss:
0.5192 -
val_accuracy:
0.7821
Epoch 5/50
56/56 - Os -
loss:
0.5474 -
accuracy:
0.7509 -
val_loss:
0.4879 -
val_accuracy:
0.7429
Epoch 6/50
56/56 - Os -
loss:
0.5608 -
accuracy:
0.7563 -
val_loss:
0.6351 -
val_accuracy:
0.7321
Epoch 7/50
56/56 - Os -
loss:
0.6142 -
accuracy:
0.7304 -
val_loss:
0.5022 -
val_accuracy:
0.8393
Epoch 8/50
56/56 - Os -
loss:
0.6448 -
accuracy:
0.7330 -
val_loss:
0.5357 -
val_accuracy:
0.8964
Epoch 9/50
56/56 - Os -
loss:
0.5896 -
accuracy:
0.7518 -
val_loss:
0.4523 -
val_accuracy:
0.7857
Epoch 10/50
56/56 - Os -
loss:
0.5960 -
accuracy:
0.7527 -
val_loss:
0.4165 -
val_accuracy:
0.7929
Epoch 11/50
56/56 - Os -
loss:
0.5802 -
accuracy:
0.7312 -
val_loss:
0.4888 -
val_accuracy:
0.9071
Epoch 12/50
56/56 - Os -
loss:
0.6065 -
accuracy:
0.7411 -
val_loss:
0.5473 -
val_accuracy:
0.8750
Epoch 13/50
56/56 - Os -
loss:
0.4695 -
accuracy:
0.7973 -
val_loss:
0.6121 -
val_accuracy:
0.5607
Epoch 14/50
56/56 - Os -
loss:
0.5372 -
accuracy:
0.7598 -
val_loss:
0.3802 -
val_accuracy:
0.8714
Epoch 15/50
56/56 - Os -
loss:
0.5138 -
accuracy:
0.7911 -
val_loss:
0.3547 -
val_accuracy:
0.8464
Epoch 16/50
56/56 - Os -
loss:
0.4844 -
accuracy:
0.7973 -
val_loss:
0.3891 -
val_accuracy:
0.9036
Epoch 17/50
56/56 - Os -
loss:
0.5828 -
accuracy:
0.7786 -
val_loss:
0.6653 -
val_accuracy:
0.3036
Epoch 18/50
56/56 - Os -
loss:
0.7239 -
accuracy:
0.7411 -
val_loss:
0.5436 -
val_accuracy:
0.8500
Epoch 19/50
56/56 - Os -
loss:
0.4198 -
accuracy:
0.8491 -
val_loss:
0.3285 -
val_accuracy:
0.8679
Epoch 20/50
56/56 - Os -
loss:
0.6916 -
accuracy:
0.7563 -
val_loss:
0.5040 -
val_accuracy:
0.7964
Epoch 21/50
56/56 - Os -
loss:
0.5663 -
accuracy:
0.7777 -
val_loss:
0.3155 -
val_accuracy:
0.8786
Epoch 22/50
56/56 - Os -
loss:
0.4719 -
accuracy:
0.8143 -
val_loss:
0.4551 -
val_accuracy:
0.8107
Epoch 23/50
56/56 - Os -
loss:
0.5015 -
accuracy:
0.8188 -
val_loss:
0.4871 -
val_accuracy:
0.8107
Epoch 24/50
56/56 - Os -
loss:
0.4903 -
accuracy:
0.8143 -
val_loss:
0.9859 -
val_accuracy:
0.3036
Epoch 25/50
56/56 - Os -
loss:
0.7534 -
accuracy:
0.7527 -
val_loss:
0.4389 -
val_accuracy:
0.8179
Epoch 26/50
56/56 - Os -
loss:
0.4444 -
accuracy:
0.8259 -
val_loss:
0.2919 -
val_accuracy:
0.8821
Epoch 27/50
56/56 - Os -
loss:
0.4383 -
accuracy:
0.8116 -
val_loss:
0.5109 -
val_accuracy:
0.8786
Epoch 28/50
56/56 - Os -
loss:
0.3931 -
accuracy:
0.8357 -
val_loss:
0.3135 -
val_accuracy:
0.8536
Epoch 29/50
56/56 - Os -
loss:
0.3295 -
accuracy:
0.8839 -
val_loss:
0.4225 -
val_accuracy:
0.8321
Epoch 30/50
56/56 - Os -
loss:
0.4366 -
accuracy:
0.8411 -
val_loss:
0.5693 -
val_accuracy:
0.8071
Epoch 31/50
56/56 - Os -
loss: 0.4457 -
accuracy: 0.8143 -
val_loss: 0.4845
Epoch 32/50
56/56 - Os -
loss: 0.7262 -
accuracy: 0.7723 -
val_loss: 0.8425
Epoch 33/50
56/56 - Os -
loss: 0.6364 -
accuracy: 0.7821 -
val_loss: 0.5943
Epoch 34/50
56/56 - Os -
loss: 0.3407 -
accuracy: 0.8732 -
val_loss: 0.2614
Epoch 35/50
56/56 - Os -
loss: 0.3520 -
accuracy: 0.8830 -
val_loss: 0.2931
Epoch 36/50
56/56 - Os -
loss: 0.3741 -
accuracy: 0.8571 -
val_loss: 0.3627
Epoch 37/50
56/56 - Os -
loss: 0.3529 -
accuracy: 0.8696 -
val_loss: 0.2572
Epoch 38/50
56/56 - Os -
loss: 0.3376 -
accuracy: 0.8813 -
val_loss: 0.6919
Epoch 39/50
56/56 - Os -
loss: 0.4866 -
accuracy: 0.8214 -
val_loss: 0.8331
Epoch 40/50
56/56 - Os -
loss: 0.3511 -
accuracy: 0.8607 -
val_loss: 0.2860
Epoch 41/50
56/56 - Os -
loss: 0.3211 -
accuracy: 0.8768 -
val_loss: 0.3590
Epoch 42/50
56/56 - Os -
loss: 0.4443 -
accuracy: 0.8259 -
val_loss: 0.2406
Epoch 43/50
56/56 - Os -
loss: 0.3386 -
accuracy: 0.8670 -
val_loss: 0.3713
Epoch 44/50
56/56 - Os -
loss: 0.3775 -
accuracy: 0.8446 -
val_loss: 0.2344
Epoch 45/50
56/56 - Os -
loss: 0.3032 -
accuracy: 0.8786 -
val_loss: 0.2456
Epoch 46/50
56/56 - Os -
loss: 0.3831 -
accuracy: 0.8455 -
val_loss: 0.3610
Epoch 47/50
56/56 - Os -
loss: 0.3549 -
accuracy: 0.8795 -
val_loss: 0.6659
Epoch 48/50
56/56 - Os -
loss: 0.4210 -
accuracy: 0.8375 -
val_loss: 1.1288
Epoch 49/50
56/56 - Os -
loss: 0.4034 -
accuracy: 0.8750 -
val_loss: 0.2231
Epoch 50/50
56/56 - Os -
loss: 0.4474 -
accuracy: 0.8420 -
val loss: 0.3609
In [10]: plt.figure(figsize=(18,6))
plt.subplot(121)
plt.plot(ONN_model.history[1loss'],label='train') plt.plot(ONN_model.history[lval_lossi],label=lvall) plt.ylim(0.2,10)
plt.xlabel('epoch')
plt.ylabel('loss')
plt.title('model loss')
plt.legend(loc=1)
plt.subplot(122) plt.plot(ONN_model.history[laccuracyl],label=ltrainl) plt.plot(ONN_model.history[lval_accuracyl],label=lvall) plt.ylim(0.2,1)
plt.xlabel('epoch')
plt.ylabel('accuracy')
plt.title('model accuracy')
plt.legend(loc=1)
Out[10]: <matplotlib.legend.Legend at 0x1dc98f9bb88>
model -ziss
0 10 20 30 40
epoch
model accuracy
110 20
epoch
Please plot the confusion matrices for (i) as the example in Figure 2.
In [11]: def plot_confusion_matrix(y_true,y_pred,title):
conf_matrix = confusion_matrix(y_true, y_pred)
fig, ax = plt.subplots(figsize=(5, 5))
ax.matshow(conf_matrix, cmap=plt.cm.Oranges, alpha=0.3)
for i in range(conf_matrix.shape[0]):
for j in range(conf_matrix.shape[1]):
ax.text(x=j, y=i,s=conf_matrix[i, j], va='center', ha='center', size='xx-lar
gel)
plt.xlabel('Predictions', fontsize=18) plt.ylabel('Actuals', fontsize=18) plt.title(title, fontsize=18)
plt.show()
In [12]: plot_confusion_matrix(y_train,y_dnn_true,itrain_confusion_matrix_DW) plot_confusion_matrix(y_val,y_dnn_pred,ival_confusion_matrix_DNIV)
train confusion matrix MI
—o —
Predictions
va I confusion matrix DNN
1—
Predictions
In [13]:
print('Precision: %.3f' % precision_score(y_val, y_dnn_pred)) print('Recall: %.3f' % recall_score(y_val, y_dnn_pred)) print('Accuracy: %.3f' % accuracy_score(y_val, y_dnn_pred)) print('F1 Score: %.3f' % f1_score(y_val, y_dnn_pred))
Precision: 0.929 Recall: 0.918 Accuracy: 0.954 F1 Score: 0.923
1 -(iv)
What is the difference between decision tree and random forest?
I3 A4(RF, random forest)MTIMACYAV27.3):Vtfd ,ff(17,1143;),,LmerighstwiTANa—C144 , MM3'jR,V3A*
WA , VINStOMAIMnft* , W*Ifilig-IZILIAMWAStEge*1,PM , wIR-.21mTmo—mommag;:x7 , mum-
ouga* .
1 -(v)
Please use decision tree and random forest to learn the binary classification task. Calculate the corresponding Accuracy, Precision, Recall and Fl-Score on validation set.
Decision tree
In [91]:
dt_gsc = GridSearchCV(DecisionTreeClassifier(random_state=0), param_grid={
imin_samples_split': range(2, 10), 'max_depth': range(1,7),
"criterion":["gini", "entropy"] }, verbose=0,n_jobs=-1, cv=10, refit=True)
dt_gsc.fit(X_train,y_train) dt_gsc.best_params_
Out[91]: {'criterion': 'gini', 'max_depth': 3, 'min_samples_split': 2}
In [239]:
print('Precision: %.3f' % precision_score(y_val, y_dt_pred)) print('Recall: %.3f' % recall_score(y_val, y_dt_pred)) print('Accuracy: %.3f' % accuracy_score(y_val, y_dt_pred)) print('Fl Score: %.3f' % fl_score(y_val, y_dt_pred))
Precision: 0.990 Recall: 0.990 Accuracy: 0.993 Fl Score: 0.990
In [240]: plot_confusion_matrix(y_train,y_dt_true,ltrain_confusion_matrix_Decision tree') plot_confusion_matrix(y_val,y_dt_pred,lval_confusion_matrix_Decision tree')
train confusion matrix Decision tree
0 - - 1
752
0
2
366
Predictions
val confusion matrix Decision tree