Backprop Algorithm

In [1]:
# the place where the function are stored
from liback import *
import numpy as np

import matplotlib.pyplot as plt
In [2]:
plt.rcParams['figure.figsize'] = (10.0, 10.0)

Nonlinear classifier

In [3]:
def NL3classifier(x,y):
    
    def teste1(a,b): return a**2 * b - 3
    def teste2(a,b): return -a**2 + 6*a*b + 3*b**2 + 5
    
    out = np.zeros(3)
    
    if (teste1(x,y) > 0 and teste2(x,y) < 0):
        out[1] = 1
        out[0] = 0
    if(teste2(x,y)>0):
        out[2] = 1
        out[0] = 0

    if(out[1] < 1 and out[2] < 1):
        out[0] = 1
        
    return out
In [4]:
npts = 10000

lower_lim = -4
upper_lim = 8

xx = np.random.uniform(lower_lim,upper_lim,npts)
yy = np.random.uniform(lower_lim,upper_lim,npts)

classificadores = np.array(list(map(lambda a,b: NL3classifier(a,b) , xx , yy)))

print(classificadores.shape)

plt.scatter(xx , yy , s = 5 , c = classificadores)
plt.show()
(10000, 3)
In [5]:
train_npts = 100000 
data_train = np.random.uniform(lower_lim , upper_lim , 2 * train_npts).reshape(train_npts , 2)

# the classifiers for the trainning data
ff = np.array(list(map(lambda x,y : NL3classifier(x,y) , data_train[:,0] , data_train[:,1])))

print(ff.shape)

# now we are going to produce the softmax of each row of ff
softmax_ff = np.transpose(np.apply_along_axis(softmax , 1 , ff))

print(softmax_ff.shape)
(100000, 3)
(3, 100000)

Trainning of the ANN

In [6]:
# the parameters of the ANN

# number of epochs
nepchs = 30
# number of hidden layers
nhls = 1

# dimension of the input
input_dim = 2

# dimension of the hidden layer 1
hid1_dim = 20

# dimension of the classifier
out_dim = 3
In [7]:
# the learning rate
learn_eta = 0.00001

# initialization of the matrices

AA1_mat = np.random.uniform(0,1, input_dim * hid1_dim).reshape(hid1_dim , input_dim)
bias1_mat = np.random.uniform(0,1,hid1_dim).reshape(hid1_dim,1)

AA2_mat = np.random.uniform(0,1,hid1_dim * out_dim).reshape(out_dim , hid1_dim)
bias2_mat = np.random.uniform(0,1,out_dim).reshape(out_dim , 1)

def affine1(x):
    
    return np.matmul(AA1_mat, x) + bias1_mat

def affine2(x):
    
    return np.matmul(AA2_mat , x) + bias2_mat

Lbiasmenus1 = np.ones(out_dim).reshape(out_dim , 1)
Lbiasmenus2 = np.ones(hid1_dim).reshape(hid1_dim,1)

for j in range(nepchs):
    learn_eta *= .999
    for i in range(train_npts):

        # input 
        inpt = np.array([data_train[i,0],data_train[i,1]]).reshape(2,1)
    
        # value at first layer
        phi = ReLU(affine1(inpt))

        # output layer
        sigma = softmax(affine2(phi))
    
        # bring on the classification
        classff = softmax_ff[:,i].reshape(out_dim,1)
    
        # the gradient of the loss
        dce = DCE(classff, sigma)
        dsoft = Dsoftmax(affine2(phi))

        # we create now the matrices M_{-1} and L_{-1}
        Mmenus1 = np.diag(np.matmul(dce,dsoft).flatten())
        LAAmenus1 = np.transpose(np.repeat(phi , out_dim , axis = 1))
    
        ########################################################
        # and the update of the parameters in the first layer
        delta_AA2_mat = -learn_eta * np.matmul(Mmenus1 , LAAmenus1)
        delta_bias2_mat = -learn_eta * np.matmul(Mmenus1 , Lbiasmenus1)
        
        AA2_mat += delta_AA2_mat
        bias2_mat += delta_bias2_mat
        ########################################################
    
        Mmenus2 = np.diag(np.matmul(np.matmul(dce,dsoft),AA2_mat).flatten())
        drelu = DReLU(affine1(inpt))
        LAAmenus2 = np.transpose(np.repeat(inpt , hid1_dim , axis = 1))
    
        delta_AA1_mat = -learn_eta * np.matmul(np.matmul(Mmenus2,drelu),LAAmenus2)
        delta_bias1_mat = -learn_eta * np.matmul(np.matmul(Mmenus2,drelu),Lbiasmenus2)
    
        AA1_mat += delta_AA1_mat
        bias1_mat += delta_bias1_mat
        ########################################################

Test the ANN

In [8]:
test_npts = 10000
data_test = np.random.uniform(lower_lim , upper_lim , 2 * test_npts).reshape(test_npts , 2)

ff_test = np.array(list(map(lambda x,y : NL3classifier(x,y) , data_test[:,0] , data_test[:,1])))

ff_test = np.transpose(np.apply_along_axis(softmax,1,ff_test))
In [9]:
classificador_test = np.zeros(test_npts)

for i in range(test_npts):
    
    # input 
    inpt = np.array([data_test[i,0],data_test[i,1]]).reshape(2,1)
    
    # value at first layer
    phi = ReLU(affine1(inpt))

    # output layer
    sigma = softmax(affine2(phi))
    
    # make the classification
    if (np.argmax(sigma) != np.argmax(ff_test[:,i]) ) : classificador_test[i] = 1.0

print(classificador_test.sum()/classificador_test.size)
0.0817
In [10]:
xx_test = data_test[:,0]
yy_test = data_test[:,1]

plt.scatter(xx_test,yy_test,s=5,c=classificador_test)
plt.show()