import numpy as np
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = (20.0, 10.0)
plt.rcParams["figure.titlesize"] = 40
# We will need to generate matrices whose columns are recurrent powers of the first column.
# That boring job is done by this workhorse. Optimization must be done later.
def generate_pow_cols(n,a):
if n == 0:
return a
else :
for i in range(0,n-1):
tmp = np.array(a[:,0] * a[:,a.shape[1]-1]).reshape(a.shape[0],1)
a = np.append(a,tmp,axis = 1)
return a
# to test the function above
teste = np.arange(5).reshape(5,1)
print(teste,end = '\n\n')
new_teste = generate_pow_cols(4,teste)
print(new_teste,end='\n\n')
# this generate a array of powers of x. There will be n powers +, raging from 0 to n-1
def generate_pow_array(n,x):
if n == 0 :
return 1
else :
out = np.ones(n)
for i in range(0,n):
out[i] = x**i
return out
# from a given array of coefficients this function creates a polynomial
def func(coeff , x):
n = coeff.size
xx = generate_pow_array(n,x)
out = np.sum(coeff * xx)
return out
# we need to apply the previous function to a column matrix
def func_over_array(coeff , x):
out = np.zeros_like(x)
# we assume that x is a column matrix
for i in range(0,x.shape[0]):
out[i,0] = func(coeff,x[i,0])
return out
# to test the function it is enough to
generate_pow_array(10,2)
# we generate a fixed set of random coefficients
coef_inf = -3
coef_sup = 2
coef_size = 5
#coef = np.random.uniform(coef_inf,coef_sup,coef_size)
coef = np.ones(coef_size)
coef
# using this coef array we can compute the value of the function func
func(coef,2)
# that when x = 1 corresponds to the sum of the elements of coef
func(coef,1) - np.sum(coef)
teste = np.arange(5.0).reshape(5,1)
print(teste)
print(func_over_array(coef,teste))
func(coef,4)
%%capture
'''
we need to define a function like this
def funcao(x):
return .3*x**3 + 3.4 * x**2 + 7 * x + 2
'''
# the parameters for all the runs
coef_inf = -10
coef_sup = 10
# we generate random values of space in the interval [-amplitude,amplitude]
amplitude = 1
# start by generating a set of coefficients for the polynomial function
coef_size = 5
coef = np.random.uniform(coef_inf,coef_sup,coef_size)
coef
# we create the points to train
# the number of points that we are going to sample.
# In the linear case without noise, it is enough to have equality between the number of points and variables
npts_treino = coef.size
xx_treino = np.random.uniform(-amplitude,amplitude,npts_treino).reshape(npts_treino,1)
# we need one less column than coef.size because bellow we will add the o^th power column
data_treino = generate_pow_cols(coef.size-1,xx_treino)
# we need to add a column of ones to take care of the intersection value
bb_treino = np.ones_like(xx_treino)
# and we append it in the first column of data
data_treino = np.append(bb_treino,data_treino,axis = 1)
# from this matrix we create the trasnformation matrix for least squares
matrix = np.linalg.inv(np.matmul(data_treino.transpose(),data_treino))
matrix = np.matmul(matrix,data_treino.transpose())
yy_treino = func_over_array(coef,xx_treino)
ww = np.matmul(matrix,yy_treino)
print(ww)
# At this moment we repeat all the previous process for the test set
# we generate random values of space in the interval [-amplitude,amplitude]
amplitude = 5
# the number of points that we are going to sample for the test.
npts_teste = 20
xx_teste = np.random.uniform(-amplitude,amplitude,npts_teste).reshape(npts_teste,1)
bb_teste = np.ones_like(xx_teste)
data_teste = generate_pow_cols(coef.size-1,xx_teste)
data_teste = np.append(bb_teste,data_teste,axis = 1)
# this is the same of computing X^T \cdot w = \hat y
yy_hat = np.matmul(data_teste,ww)
# the correct value of the test points
yy_teste = func_over_array(coef,xx_teste)
result = np.append(yy_hat,yy_teste,axis=1)
print(result)
# computing the mean square error
np.square(yy_hat - yy_teste).mean()
coef
xp = np.linspace(-5,5,100)
# do not forget that the input x of func_over_array is a column matrix
yp = func_over_array(coef,xp.reshape(xp.size,1))
plt.plot(xp,yp,c = 'k')
plt.scatter(xx_treino,yy_treino, c = 'r',alpha = 1,linewidths = 3)
plt.scatter(xx_teste,yy_hat, c = 'y', alpha = 1, linewidths = 3)
plt.show()
# this induces later that the generator polynomial will have degree coef2_size - 1
coef2_size = 7
coef2 = np.random.uniform(coef_inf,coef_sup,coef2_size)
coef2
# the number of points that we are going to sample.
# Notice that we are generating the double of points to train in relation to first example
npts_treino2 = 2 * coef2.size
# in opposition to what happened in the previous case, the values of coefficients did not dependended
# on the trainning data. This is not the case here.
xx_treino2 = np.random.uniform(-amplitude,amplitude,npts_treino2).reshape(npts_treino2,1)
# Because our goal is to low capacity, in this case this corresponds to lower degree polynomial approximation
data_treino2 = generate_pow_cols(coef2.size-4,xx_treino2)
# like before
bb_treino2 = np.ones_like(xx_treino2)
# and we append it in the first column of data
data_treino2 = np.append(bb_treino2,data_treino2,axis = 1)
# from this matrix we create the trasnformation matrix for least squares
matrix2 = np.linalg.inv(np.matmul(data_treino2.transpose(),data_treino2))
matrix2 = np.matmul(matrix2,data_treino2.transpose())
yy_treino2 = func_over_array(coef2,xx_treino2)
ww2 = np.matmul(matrix2,yy_treino2)
print(ww2)
# as opposed to what happened in the previous scenario, there is a difference between the
xp2 = np.linspace(-amplitude,amplitude,100)
yp2 = func_over_array(ww2,xp2.reshape(xp2.size,1))
plt.plot(xp2,yp2,c='k')
plt.scatter(xx_treino2,yy_treino2,c='r',linewidths=3)
plt.title('Lower Capacity leads to Underfitting')
plt.show()
We repeat the process but at this time, with exactly the same model data generator, we make a higher degree approximation.
npts_treino3 = coef2.size
xx_treino3 = np.random.uniform(-amplitude,amplitude,npts_treino3).reshape(npts_treino3,1)
# Because our goal is to increase capacity, in this case this corresponds to higher degree polynomial approximation
data_treino3 = generate_pow_cols(coef2.size + 1,xx_treino3)
bb_treino3 = np.ones_like(xx_treino3)
data_treino3 = np.append(bb_treino3,data_treino3,axis = 1)
matrix3 = np.linalg.inv(np.matmul(data_treino3.transpose(),data_treino3))
matrix3 = np.matmul(matrix3,data_treino3.transpose())
yy_treino3 = func_over_array(coef2,xx_treino3)
ww3 = np.matmul(matrix3,yy_treino3)
print(ww3)
xp3 = np.linspace(-amplitude,amplitude,100)
yp3 = func_over_array(coef2,xp3.reshape(xp3.size,1))
plt.plot(xp3,yp3,c='k')
plt.scatter(xx_treino3,yy_treino3,c='r',linewidths=3)
plt.title('Increasing the Capacity we increase the fit over the trainning data.')
plt.show()
There is a perfect fit between the approximation function and the trainning data. The overfitting appears when we try to generalize the approximation to the test set.
# the number of points that we are going to sample for the test.
npts_teste3 = 20
xx_teste3 = np.random.uniform(-amplitude3,amplitude3,npts_teste3).reshape(npts_teste3,1)
bb_teste3 = np.ones_like(xx_teste3)
data_teste3 = generate_pow_cols(coef2.size + 1,xx_teste3)
data_teste3 = np.append(bb_teste3,data_teste3,axis = 1)
# this is the same of computing X^T \cdot w = \hat y
yy_hat3 = np.matmul(data_teste3,ww3)
# the correct value of the test points
yy_teste3 = func_over_array(coef2,xx_teste3)
result = np.append(yy_hat3,yy_teste3,axis=1)
print(result,end='\n\n')
# the mean square error
print("The mean square error is %f.\n" % np.square(yy_teste3 - yy_hat3).mean())
xp3 = np.linspace(-amplitude3,amplitude3,100)
yp3 = func_over_array(coef2,xp3.reshape(xp3.size,1))
plt.plot(xp3,yp3,c='k')
plt.scatter(xx_treino3,yy_treino3,c='r',linewidths=3)
plt.scatter(xx_teste3,yy_hat3,c='g',linewidths=3)
plt.title('Increasing the Capacity we reduce the fit over the trainning data.')
plt.show()