Non linear binary classification using Neural Network with forward and Back propagation(sample code)
Before move, Refresh yourself with type of loss and Gradient Descent from previous article… https://medium.com/@ranasinghiitkgp/sigmoid-neuron-model-gradient-descent-with-sample-code-4919bfc9d4c4
Apply chain rule in Neural Network:
let’s start with some basic calculus( chain rule) in order to calculate the the derivative at each stage.
here there are many paths between loss and input variable(W121). So calculate derivative of each path and at the end sum them. similarly, calculate for each weight.
the above figure shows the derivative at each stage in chain rule. here predicted y is the soft-max function and a is the linear sum of multiplication of weight and input. loss is the summation of (actual -predicted)² .the final derivative of loss with weight is input to the learning algorithm shown in fig 1. the value I got in the above figure shown in figure 3.
let’s move one step further in three layer network. the delta weight of two yellow network will be summed and than calculate W131 using chain rule.
Below given figure shows the derivative of all loss at each layer using chain rule.
Implement Forward and Backward propagation Network with code(for binary classification)
install Library
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error, log_loss
from tqdm import tqdm_notebook
import seaborn as sns
import imageio
from IPython.display import HTMLfrom sklearn.preprocessing import OneHotEncoder
from sklearn.datasets import make_blobs
data set
data, labels = make_blobs(n_samples=1000, centers=4, n_features=2, random_state=0)
print(data.shape, labels.shape)labels_orig = labels
labels = np.mod(labels_orig, 2)# converting into binary class#------Splitting dataset
X_train, X_val, Y_train, Y_val = train_test_split(data, labels, stratify=labels, random_state=0)
print(X_train.shape, X_val.shape)
Problem description: Neural network solving(three layer) base is input layer(1), middle layer(1), outer layer(1)
Creating Forward and backward propagation class
class FFNetwork:
def __init__(self):
np.random.seed(0)
self.w1 = np.random.randn()
self.w2 = np.random.randn()
self.w3 = np.random.randn()
self.w6 = np.random.randn()
self.w7 = np.random.randn()
self.w8 = np.random.randn()
self.w9 = np.random.randn()
self.w10 = np.random.randn()
self.w11= np.random.randn()
self.w12= np.random.randn()
self.b1 = 0
self.b2 = 0
self.b3 = 0
self.b4 = 0
self.b5 = 0
def sigmoid(self, x):
return 1.0/(1.0 + np.exp(-x))
def forward_pass(self, x):
self.x1, self.x2 = x
self.a1 = self.w1*self.x1 + self.w2*self.x2 + self.b1
self.h1 = self.sigmoid(self.a1)
self.a2 = self.w3*self.x1 + self.w6*self.x2 + self.b2
self.h2 = self.sigmoid(self.a2)
self.a3 = self.w7*self.h1 + self.w8*self.h2 + self.b3
self.h3 = self.sigmoid(self.a3)
self.a4 = self.w9*self.h1 + self.w10*self.h2 + self.b4
self.h4 = self.sigmoid(self.a4)
self.a5 = self.w11*self.h3 + self.w12*self.h4 + self.b5
self.h5 = self.sigmoid(self.a5)
return self.h5
def grad(self, x, y):
self.forward_pass(x)
self.dw11 = (self.h5-y) * self.h5*(1-self.h5) * self.h3
self.dw12 = (self.h5-y) * self.h5*(1-self.h5) * self.h4
self.db5 = (self.h5-y) * self.h5*(1-self.h5)
self.dw7 = (self.h5-y) * self.h5*(1-self.h5) * self.w11 * self.h3*(1-self.h3) * self.h1
self.dw8 = (self.h5-y) * self.h5*(1-self.h5) * self.w11 * self.h3*(1-self.h3) * self.h2
self.dw9 = (self.h5-y) * self.h5*(1-self.h5) * self.w12 * self.h4*(1-self.h4) * self.h1
self.dw10 = (self.h5-y) * self.h5*(1-self.h5) * self.w12 * self.h4*(1-self.h4) * self.h2
self.db3 = (self.h5-y) * self.h5*(1-self.h5) * self.w11 * self.h3*(1-self.h3)
self.db4 = (self.h5-y) * self.h5*(1-self.h5) * self.w12 * self.h4*(1-self.h4)
self.dw1 = ((self.h5-y) * self.h5*(1-self.h5) * self.w11 * self.h3*(1-self.h3) * self.w7 + (self.h5-y) * self.h5*(1-self.h5) * self.w12 * self.h4*(1-self.h4) * self.w9) * self.h1*(1-self.h1) * self.x1
self.dw2 = ((self.h5-y) * self.h5*(1-self.h5) * self.w11 * self.h3*(1-self.h3) * self.w7 + (self.h5-y) * self.h5*(1-self.h5) * self.w12 * self.h4*(1-self.h4) * self.w9) * self.h1*(1-self.h1) * self.x2
self.dw3 = ((self.h5-y) * self.h5*(1-self.h5) * self.w11 * self.h3*(1-self.h3) * self.w8 + (self.h5-y) * self.h5*(1-self.h5) * self.w12 * self.h4*(1-self.h4) * self.w10) * self.h2*(1-self.h2) * self.x1
self.dw6 = ((self.h5-y) * self.h5*(1-self.h5) * self.w11 * self.h3*(1-self.h3) * self.w8 + (self.h5-y) * self.h5*(1-self.h5) * self.w12 * self.h4*(1-self.h4) * self.w10) * self.h2*(1-self.h2) * self.x2
self.db1 = ((self.h5-y) * self.h5*(1-self.h5) * self.w11 * self.h3*(1-self.h3) * self.w7 + (self.h5-y) * self.h5*(1-self.h5) * self.w12 * self.h4*(1-self.h4) * self.w9) * self.h1*(1-self.h1)
self.db2 = ((self.h5-y) * self.h5*(1-self.h5) * self.w11 * self.h3*(1-self.h3) * self.w7 + (self.h5-y) * self.h5*(1-self.h5) * self.w12 * self.h4*(1-self.h4) * self.w9) * self.h2*(1-self.h2)
def fit(self, X, Y, epochs=1, learning_rate=1, initialise=True, display_loss=False, display_weight=False):
# initialise w, b
if initialise:
np.random.seed(0)
self.w1 = np.random.randn()
self.w2 = np.random.randn()
self.w3 = np.random.randn()
self.w6 = np.random.randn()
self.w7 = np.random.randn()
self.w8 = np.random.randn()
self.w9 = np.random.randn()
self.w10 = np.random.randn()
self.w11 = np.random.randn()
self.w12 = np.random.randn()
self.b1 = 0
self.b2 = 0
self.b3 = 0
self.b4 = 0
self.b5 = 0
if display_loss:
loss = {}
for i in tqdm_notebook(range(epochs), total=epochs, unit="epoch"):
dw1, dw2, dw3, dw6, dw7, dw8, dw9, dw10, dw11, dw12, db1, db2, db3, db4, db5 = [0]*15
for x, y in zip(X, Y):
self.grad(x, y)
dw1 += self.dw1
dw2 += self.dw2
dw3 += self.dw3
dw6 += self.dw6
dw7 += self.dw7
dw8 += self.dw8
dw9 += self.dw9
dw10 += self.dw10
dw11 += self.dw11
dw12 += self.dw12
db1 += self.db1
db2 += self.db2
db3 += self.db3
db4 += self.db4
db5 += self.db5
m = X.shape[0]
self.w1 -= learning_rate * dw1 / m
self.w2 -= learning_rate * dw2 / m
self.w3 -= learning_rate * dw3 / m
self.w6 -= learning_rate * dw6 / m
self.w7 -= learning_rate * dw7 / m
self.w8 -= learning_rate * dw8 / m
self.w9 -= learning_rate * dw9 / m
self.w10 -= learning_rate * dw10 / m
self.w11 -= learning_rate * dw11 / m
self.w12 -= learning_rate * dw12 / m
self.b1 -= learning_rate * db1 / m
self.b2 -= learning_rate * db2 / m
self.b3 -= learning_rate * db3 / m
self.b4 -= learning_rate * db4 / m
self.b5 -= learning_rate * db5 / m
if display_loss:
Y_pred = self.predict(X)
loss[i] = mean_squared_error(Y_pred, Y)
if display_weight:
weight_matrix = np.array([[0, self.b5, self.w11, self.w12, 0, 0], [self.b3, self.w7, self.w8, self.b4, self.w9, self.w10], [self.b1, self.w1, self.w2, self.b2, self.w3, self.w6]])
weight_matrices.append(weight_matrix)
if display_loss:
plt.plot(loss.values())
plt.xlabel('Epochs')
plt.ylabel('Mean Squared Error')
plt.show()
def predict(self, X):
Y_pred = []
for x in X:
y_pred = self.forward_pass(x)
Y_pred.append(y_pred)
return np.array(Y_pred)def predict_h1(self, X):
Y_pred = []
for x in X:
y_pred = self.forward_pass(x)
Y_pred.append(self.h1)
return np.array(Y_pred)
def predict_h2(self, X):
Y_pred = []
for x in X:
y_pred = self.forward_pass(x)
Y_pred.append(self.h2)
return np.array(Y_pred)
def predict_h3(self, X):
Y_pred = []
for x in X:
y_pred = self.forward_pass(x)
Y_pred.append(self.h3)
return np.array(Y_pred)
def predict_h4(self, X):
Y_pred = []
for x in X:
y_pred = self.forward_pass(x)
Y_pred.append(self.h4)
return np.array(Y_pred)
def predict_h5(self, X):
Y_pred = []
for x in X:
y_pred = self.forward_pass(x)
Y_pred.append(self.h5)
return np.array(Y_pred)
Run the model
weight_matrices = []
ffn = FirstFFNetwork1()
ffn.fit(X_train, Y_train, epochs=1000, learning_rate=5, display_loss=True, display_weight=True)Y_pred_train = ffn.predict(X_train)
Y_pred_binarised_train = (Y_pred_train >= 0.5).astype(“int”).ravel()
Y_pred_val = ffn.predict(X_val)
Y_pred_binarised_val = (Y_pred_val >= 0.5).astype(“int”).ravel()
accuracy_train = accuracy_score(Y_pred_binarised_train, Y_train)
accuracy_val = accuracy_score(Y_pred_binarised_val, Y_val)print(“Training accuracy”, round(accuracy_train, 2))
print(“Validation accuracy”, round(accuracy_val, 2))
initially error fluctuate with it become saturated after 800 epoch value at learning rate of 5.
plt.scatter(X_train[:,0], X_train[:,1], c=Y_pred_binarised_train, cmap=my_cmap, s=15*(np.abs(Y_pred_binarised_train-Y_train)+.2))
plt.show()
Scatter plot show the clearly classified the binary data with accuracy of 91% at learning rate 5 and epoch value of 1000.
def plot_heat_map(epoch):
fig = plt.figure(figsize=(10, 1))
sns.heatmap(weight_matrices[epoch], annot=True, cmap=my_cmap, vmin=-3, vmax=3)
plt.title("Epoch "+str(epoch))fig.canvas.draw()
image = np.frombuffer(fig.canvas.tostring_rgb(), dtype='uint8')
image = image.reshape(fig.canvas.get_width_height()[::-1] + (3,))return image
imageio.mimsave('./weights_viz.gif', [plot_heat_map(i) for i in range(0,len(weight_matrices),len(weight_matrices)//100)], fps=1)
plot shows that weight saturated as epochs value increases where x2 is most important variable and a1, a3, a4, a5 are most important neurons in this neural network because at end of epochs value(980) they have high weights.
=======Detail code can be found at GitHub Link============
https://github.com/ranasingh-gkp/padhaiAI/tree/master/Python_Scalar%20Backpropagation
Reference:
- > One fourth LAB(Special thanks)
- Wikipedia
All images taken from One fourth lab video lecture