import numpy as np
import matplotlib.pyplot as plt
import math
from tensorflow import keras
from tensorflow.keras import layers, metrics

def calc_metrics(y_true, y_pred):
  """
  Calculates and prints mean absolute error, mean squared error, root mean squared error, and r2 score
  @param y_true: Target labels
  @param y_pred: Target predictions
  @return:
  """
  # Residuals
  if len(y_true) != len(y_pred):
    raise ValueError("Mismatched input lengths")

  n = len(y_true)
  residuals_squared = []
  residuals_absolute = []
  sum_squares = 0
  y_mean = np.mean(y_true)
  for t, p in zip(y_true, y_pred):
    residuals_squared.append(math.pow(t - p, 2))
    residuals_absolute.append(abs(t - p))
    sum_squares += math.pow(t - y_mean, 2)

  mae = sum(residuals_absolute) / n
  mse = sum(residuals_squared) / n
  rmse = math.pow(mse, 0.5)
  r2 = 1 - (sum(residuals_squared) / sum_squares)

  print(f"Mean Absolute Error     = {mae:.5f}")
  print(f"Mean Squared Error      = {mse:.5f}")
  print(f"Root Mean Squared Error = {rmse:.5f}")
  print(f"R2 Score                = {r2:.5f}")

class ActivationFn(object):
  def compute(self, x):
    return x
  def derivative(self, x):
    return 1

class ReluActivationFn(ActivationFn):
  def compute(self, x):
    if x < 0:
      return 0
    return x
  def derivative(self, x):
    if x < 0:
      return 0.0
    return 1.0

class SigmoidActivationFn(ActivationFn):
  def compute(self, x):
    return 1.0 / (1.0 + np.exp(-1*x))
  def derivative(self, x):
    return self.compute(x) * (1 - self.compute(x))

class TanhActivationFn(ActivationFn):
  def compute(self, x):
    return np.tanh(x)
  def derivative(self, x):
    return 1.0 - np.power(self.compute(x), 2)

class SoftplusActivationFn(ActivationFn):
  def compute(self, x):
    return np.log(1 + np.exp(x))
  def derivative(self, x):
    return 1.0 / (1.0 + np.exp(-1*x))

class GaussianActivationFn(ActivationFn):
  def compute(self, x):
    return np.exp(-1*np.power(x, 2))
  def derivative(self, x):
    return -2*x*np.exp(-1*np.power(x, 2))

class XORModel(object):
  """
  Represents XOR Neural Network model in the figure above
  =============
  Model Summary
  =============

  input layer:
    input_a
    input_b

  hidden 1 neuron. Activation function = hidden_layer_activation
    h1
    h2
    h3
    h4

  output layers. Activation function = output_layer_activation
    o1

  Total Number of Parameters = 12
  """
  def __init__(self, hidden_layer_activation: ActivationFn, output_layer_activation: ActivationFn, learning_rate=0.01):
    self.n_input_units = 2
    self.n_dense_units = 4
    self.n_outputs = 1
    self.learning_rate = learning_rate
    self.hidden_layer_activation = hidden_layer_activation
    self.output_layer_activation = output_layer_activation

    np.random.seed(23)
    self.h1_weights = np.random.rand(self.n_input_units)
    np.random.seed(13)
    self.h2_weights = np.random.rand(self.n_input_units)
    np.random.seed(8962)
    self.h3_weights = np.random.rand(self.n_input_units)
    np.random.seed(65486)
    self.h4_weights = np.random.rand(self.n_input_units)

    self.layer01_weights = np.array([self.h1_weights, self.h2_weights, self.h3_weights, self.h4_weights])

    np.random.seed(42)
    self.o1_weights = np.random.rand(self.n_dense_units)
    self.layer12_weights = np.array([self.o1_weights])

    self.inputs_ab = np.array([0, 0])

  def predict(self, a, b):
    """
    Predict
    @param a: Input A value
    @param b: Input B value
    @return: Out o1
    """
    self.inputs_ab[0] = a
    self.inputs_ab[1] = b
    self.forward_propagation()
    return self.out_o1

  def fit(self, X, y, epochs=1):
    """
    Train the model using gradient descent with batch size = 1
    @param X: Training samples
    @param y: Labels
    @param epochs: Number of epochs to run
    @return: Loss history
    """
    history = []

    for i in range(epochs):
      epoch_error = 0
      for x, y_true in zip(X, y):
        self.inputs_ab[0] = x[0]
        self.inputs_ab[1] = x[1]
        self.y_true = y_true

        # Update model error state
        self.backpropagation()

        # Update weights
        self.layer01_weights[0][0] = self.layer01_weights[0][0] - self.learning_rate*self.derror_o1_dw1
        self.layer01_weights[0][1] = self.layer01_weights[0][1] - self.learning_rate*self.derror_o1_dw2

        self.layer01_weights[1][0] = self.layer01_weights[1][0] - self.learning_rate*self.derror_o1_dw3
        self.layer01_weights[1][1] = self.layer01_weights[1][1] - self.learning_rate*self.derror_o1_dw4

        self.layer01_weights[2][0] = self.layer01_weights[2][0] - self.learning_rate*self.derror_o1_dw5
        self.layer01_weights[2][1] = self.layer01_weights[2][1] - self.learning_rate*self.derror_o1_dw6

        self.layer01_weights[3][0] = self.layer01_weights[3][0] - self.learning_rate*self.derror_o1_dw7
        self.layer01_weights[3][1] = self.layer01_weights[3][1] - self.learning_rate*self.derror_o1_dw8

        self.layer12_weights[0][0] = self.layer12_weights[0][0] - self.learning_rate*self.derror_o1_dw9
        self.layer12_weights[0][1] = self.layer12_weights[0][1] - self.learning_rate*self.derror_o1_dw10
        self.layer12_weights[0][2] = self.layer12_weights[0][2] - self.learning_rate*self.derror_o1_dw11
        self.layer12_weights[0][3] = self.layer12_weights[0][3] - self.learning_rate*self.derror_o1_dw12

        # Get new error
        self.forward_propagation()

        epoch_error += ((self.y_true - self.out_o1)**2)

      history.append(epoch_error / len(x))
    return history

  def forward_propagation(self):
    """
    Update the state of this instance by forward propagation
    @return: None
    """
    self.net_h1 = np.tensordot(self.layer01_weights[0], self.inputs_ab, axes=1)
    self.out_h1 = self.hidden_layer_activation.compute(self.net_h1)

    self.net_h2 = np.tensordot(self.layer01_weights[1], self.inputs_ab, axes=1)
    self.out_h2 = self.hidden_layer_activation.compute(self.net_h2)

    self.net_h3 = np.tensordot(self.layer01_weights[2], self.inputs_ab, axes=1)
    self.out_h3 = self.hidden_layer_activation.compute(self.net_h3)

    self.net_h4 = np.tensordot(self.layer01_weights[3], self.inputs_ab, axes=1)
    self.out_h4 = self.hidden_layer_activation.compute(self.net_h4)

    self.out_h1234 = np.array([self.out_h1, self.out_h2, self.out_h3, self.out_h4])

    self.net_o1 = np.tensordot(self.out_h1234, self.layer12_weights[0], axes=1)
    self.out_o1 = self.output_layer_activation.compute(self.net_o1)

  def backpropagation(self):
    """
    Update the error states of this instance by backpropagation
    @return: None
    """
    ############
    # layer 12 #
    ############
    self.forward_propagation()

    derror_o1_dout_o1   = -(self.y_true - self.out_o1)
    dout_o1_dnet_o1     = self.output_layer_activation.derivative(self.net_o1)
    dnet_o1_dw9         = self.out_h1
    dnet_o1_dw10        = self.out_h2
    dnet_o1_dw11        = self.out_h3
    dnet_o1_dw12        = self.out_h4

    self.derror_o1_dw9  = derror_o1_dout_o1 * dout_o1_dnet_o1 * dnet_o1_dw9
    self.derror_o1_dw10 = derror_o1_dout_o1 * dout_o1_dnet_o1 * dnet_o1_dw10
    self.derror_o1_dw11 = derror_o1_dout_o1 * dout_o1_dnet_o1 * dnet_o1_dw11
    self.derror_o1_dw12 = derror_o1_dout_o1 * dout_o1_dnet_o1 * dnet_o1_dw12

    ############
    # layer 01 #
    ############
    derror_o1_dnet_o1   = derror_o1_dout_o1 * dout_o1_dnet_o1
    # w1, w2
    dnet_o1_dout_h1     = self.layer12_weights[0][0] # w9
    dout_h1_dnet_h1     = self.hidden_layer_activation.derivative(self.net_h1)
    dnet_h1_dw1         = self.inputs_ab[0] # a
    dnet_h1_dw2         = self.inputs_ab[1] # b

    self.derror_o1_dw1  = derror_o1_dnet_o1 * dnet_o1_dout_h1 * dout_h1_dnet_h1 * dnet_h1_dw1
    self.derror_o1_dw2  = derror_o1_dnet_o1 * dnet_o1_dout_h1 * dout_h1_dnet_h1 * dnet_h1_dw2

    # w3, w4
    dnet_o1_dout_h2     = self.layer12_weights[0][1] # w10
    dout_h2_dnet_h2     = self.hidden_layer_activation.derivative(self.net_h2)
    dnet_h2_dw3         = self.inputs_ab[0] # a
    dnet_h2_dw4         = self.inputs_ab[1] # b

    self.derror_o1_dw3  = derror_o1_dnet_o1 * dnet_o1_dout_h2 * dout_h2_dnet_h2 * dnet_h2_dw3
    self.derror_o1_dw4  = derror_o1_dnet_o1 * dnet_o1_dout_h2 * dout_h2_dnet_h2 * dnet_h2_dw4

    # w5, w6
    dnet_o1_dout_h3     = self.layer12_weights[0][2] # w11
    dout_h3_dnet_h3     = self.hidden_layer_activation.derivative(self.net_h3)
    dnet_h3_dw5         = self.inputs_ab[0] # a
    dnet_h3_dw6         = self.inputs_ab[1] # b

    self.derror_o1_dw5  = derror_o1_dnet_o1 * dnet_o1_dout_h3 * dout_h3_dnet_h3 * dnet_h3_dw5
    self.derror_o1_dw6  = derror_o1_dnet_o1 * dnet_o1_dout_h3 * dout_h3_dnet_h3 * dnet_h3_dw6

    # w7, w8
    dnet_o1_dout_h4     = self.layer12_weights[0][3] # w12
    dout_h4_dnet_h4     = self.hidden_layer_activation.derivative(self.net_h4)
    dnet_h4_dw7         = self.inputs_ab[0] # a
    dnet_h4_dw8         = self.inputs_ab[1] # b

    self.derror_o1_dw7  = derror_o1_dnet_o1 * dnet_o1_dout_h4 * dout_h4_dnet_h4 * dnet_h4_dw7
    self.derror_o1_dw8  = derror_o1_dnet_o1 * dnet_o1_dout_h4 * dout_h4_dnet_h4 * dnet_h4_dw8

X = [[0, 0], [0, 1], [1, 0], [1, 1]]
y = [0, 1, 1, 0]

def plot_xor_model_error(e):
  """
  Plot error of XOR model
  @param e: Loss history
  @return: None
  """
  fig, ax = plt.subplots()
  ax.set_title('Loss (Mean squared error)')
  ax.plot(e)
  ax.grid(visible=True)
  ax.set_ylabel("Loss")
  ax.set_xlabel("Epoch")

def test_xor_model(model_under_test, epochs):
  """
  Utility function to run various tests on an XOR model with the method "predict"
  @param model_under_test: The model with the predict method
  @param epochs: Number of epochs to train
  @return: None
  """
  y_pred_1 = []
  y_pred_2 = []

  print("Initial predictions")
  print(f"X | Y | Q True   | Q Predicted")
  print(f"------------------------------")
  for x, y_true in zip(X, y):
    y_pred = model_under_test.predict(x[0], x[1])
    y_pred_1.append(y_pred)
    print(f"{x[0]} | {x[1]} | {y_true}        | {y_pred}")
  calc_metrics(y, y_pred_1)
  error_m = m.fit(X, y, epochs=epochs)
  print("===========================")
  print("Predictions after training")
  print(f"X | Y | Q True   | Q Predicted")
  print(f"------------------------------")
  for x, y_true in zip(X, y):
    y_pred = model_under_test.predict(x[0], x[1])
    y_pred_2.append(y_pred)
    print(f"{x[0]} | {x[1]} | {y_true}        | {y_pred}")
  calc_metrics(y, y_pred_2)
  plot_xor_model_error(error_m)

m = XORModel(learning_rate=0.01, hidden_layer_activation=GaussianActivationFn(), output_layer_activation=GaussianActivationFn())
test_xor_model(m, 5000)

Initial predictions
X | Y | Q True   | Q Predicted
------------------------------
0 | 0 | 0        | 0.0008640834548181034
0 | 1 | 1        | 0.023038474655564935
1 | 0 | 1        | 0.019038121277748808
1 | 1 | 0        | 0.3781749380990913
Mean Absolute Error     = 0.58424
Mean Squared Error      = 0.51494
Root Mean Squared Error = 0.71759
R2 Score                = -1.05976
===========================
Predictions after training
X | Y | Q True   | Q Predicted
------------------------------
0 | 0 | 0        | 0.02830903562522035
0 | 1 | 1        | 0.9869397507295415
1 | 0 | 1        | 0.9888564478317782
1 | 1 | 0        | 0.020690989391717766
Mean Absolute Error     = 0.01830
Mean Squared Error      = 0.00038
Root Mean Squared Error = 0.01952
R2 Score                = 0.99848

m = XORModel(learning_rate=0.5, hidden_layer_activation=TanhActivationFn(), output_layer_activation=TanhActivationFn())
test_xor_model(m, 400)

Initial predictions
X | Y | Q True   | Q Predicted
------------------------------
0 | 0 | 0        | 0.0
0 | 1 | 1        | 0.8294150947494272
1 | 0 | 1        | 0.8366625823802332
1 | 1 | 0        | 0.962012506115427
Mean Absolute Error     = 0.32398
Mean Squared Error      = 0.24531
Root Mean Squared Error = 0.49529
R2 Score                = 0.01875
===========================
Predictions after training
X | Y | Q True   | Q Predicted
------------------------------
0 | 0 | 0        | 0.0
0 | 1 | 1        | 0.9693165049621585
1 | 0 | 1        | 0.967214884205445
1 | 1 | 0        | -0.1289160080290679
Mean Absolute Error     = 0.04810
Mean Squared Error      = 0.00466
Root Mean Squared Error = 0.06826
R2 Score                = 0.98136

m = XORModel(learning_rate=0.7, hidden_layer_activation=SoftplusActivationFn(), output_layer_activation=SoftplusActivationFn())
test_xor_model(m, 5000)

Initial predictions
X | Y | Q True   | Q Predicted
------------------------------
0 | 0 | 0        | 1.9882063689622802
0 | 1 | 1        | 2.728134832244606
1 | 0 | 1        | 2.695628231054407
1 | 1 | 0        | 3.6223532831710012
Mean Absolute Error     = 2.25858
Mean Squared Error      = 5.73400
Root Mean Squared Error = 2.39458
R2 Score                = -21.93601
===========================
Predictions after training
X | Y | Q True   | Q Predicted
------------------------------
0 | 0 | 0        | 0.0317827685033709
0 | 1 | 1        | 1.103553898571937
1 | 0 | 1        | 1.2122441621677822
1 | 1 | 0        | 0.13469700774795554
Mean Absolute Error     = 0.12057
Mean Squared Error      = 0.01873
Root Mean Squared Error = 0.13686
R2 Score                = 0.92508

x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
y = np.array([0, 1, 1, 0])
model5 = keras.models.Sequential()
model5.add(layers.InputLayer(input_shape=(2)))
model5.add(layers.Dense(4, activation="tanh", use_bias=False))
model5.add(layers.Dense(1, activation="tanh", use_bias=False))
model5.compile(loss="mean_squared_error", optimizer="Adam", metrics=[metrics.MeanSquaredError()])
model5_history = model5.fit(x, y, epochs=10000, verbose=False)
plot_xor_model_error(model5_history.history['loss'])
y_pred = []
print(f"X | Y | Q True   | Q Predicted")
print(f"------------------------------")
for test_x, y_true in zip(x, y):
  y_ = model5.predict(np.array([test_x]), verbose=False)[0][0]
  y_pred.append(y_)
  print(f"0 | 0 | {y_true}        | {y_}")
calc_metrics(y, y_pred)

X | Y | Q True   | Q Predicted
------------------------------
0 | 0 | 0        | 0.0
0 | 0 | 1        | 0.9880496263504028
0 | 0 | 1        | 0.9868031740188599
0 | 0 | 0        | 0.00022530555725097656
Mean Absolute Error     = 0.00634
Mean Squared Error      = 0.00008
Root Mean Squared Error = 0.00890
R2 Score                = 0.99968

Neural Network implementation from scratch¶

Metrics Functions¶

Theory¶

Forward Propagation¶

Backpropagation¶

Activation Functions¶

Neural Network Implementation¶

Training¶

Gaussian activation function¶

Other activation functions¶

Tanh Model¶

Softplus Model¶

Compare to Keras tanh Model¶