The following instructions computes the local minimum of functions using the gradient descent.
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import numpy as np
# this changes the size of all plots to the chosen size
plt.rcParams["figure.figsize"] = [20,10]
def testefunc(x,y):
return np.cos(x)*(x**2+y**2)
def dtestefunc(x,y):
return np.asarray([2*x*np.cos(x)-np.sin(x)*(x**2 + y**2),2*y*np.cos(x)])
limsup = 2
x = np.linspace(-limsup,limsup,1000)
y = np.linspace(-limsup,limsup,1000)
X,Y = np.meshgrid(x,y)
z = testefunc(X,Y)
fig = plt.figure()
ax = fig.gca(projection='3d')
ax.plot_surface(X,Y,z)
ax.view_init(60, 35)
plt.show()
The next routine computes the gradient descent.
hessiana = [[2,0],[0,2]]
spot_pt = np.asarray([0.7,.2])
grad = dtestefunc(spot_pt[0],spot_pt[1])
learning_rate = np.inner(np.transpose(grad),grad)/np.inner(grad,np.matmul(hessiana,grad))
max_iterations = 10
min_precision = 1e-10
count_iterations = 0
precision = 10
while(count_iterations < max_iterations and precision > min_precision):
spotfunc = testefunc(spot_pt[0],spot_pt[1])
grad = dtestefunc(spot_pt[0],spot_pt[1])
#learning_rate = np.inner(np.transpose(grad),grad)/np.inner(grad,np.matmul(hessiana,grad))
next_pt = spot_pt - learning_rate * grad
precision = np.linalg.norm(next_pt - spot_pt)
print(next_pt,precision)
spot_pt = next_pt
count_iterations += 1
The next set of instructions computes the gradient descent for a least squares problem. Given the matrices $A$ and $b$ by
\begin{equation}\label{eq:GD.100} A=\begin{bmatrix} 1 & 2\\ 1 & 1\\ 1 & 1 \end{bmatrix},\quad b=\begin{bmatrix} 0\\ 0\\ 1 \end{bmatrix} \end{equation}We want to compute the best approximation in the column space of $A$ of the vector $b$ that minimizes de distance
\begin{equation}\label{eq:GD.120} f(X)=\frac{1}{2}\Vert AX-b\Vert^2. \end{equation}Computing the gradient of the function $f$ we obtain
\begin{equation}\label{eq:GD.130} \nabla f(X) = A^T(AX-b). \end{equation}def LSfunc(x , MA , Mb):
tmp = np.matmul(MA,x) - Mb
out = np.matmul(np.transpose(tmp),tmp)
return out
def dLSfunc(x , MA , Mb):
tmp = np.matmul(MA,x) - Mb
out = np.matmul(np.transpose(MA),tmp)
return out
mma = [[1,2],[1,1],[1,1]]
mb = [[0],[0],[1]]
spot = [[1],[1]]
learning_rate = .1
max_iterations = 1000
min_precision = 1e-10
count_iterations = 0
precision = 10
while (count_iterations < max_iterations and precision > min_precision):
spotfunc = LSfunc(spot , mma , mb)
dspotfunc = dLSfunc(spot , mma , mb)
next_spot = spot - learning_rate * dspotfunc
precision = np.linalg.norm(next_spot - spot)
spot = next_spot
count_iterations += 1
print(next_spot , precision)
The exact value may be computed via the formula
\begin{equation}\label{eq:GD.140} p = A(A^TA)^{-1}A^Tb \end{equation}smma = np.matmul(np.transpose(mma),mma)
tmab = np.matmul(np.transpose(mma),mb)
x_hat = np.matmul(np.linalg.inv(smma),tmab)
exact_p = np.matmul(mma,x_hat)
print(x_hat)