0% found this document useful (0 votes)
16 views12 pages

Numerical Gradient and Optimization Methods

Uploaded by

Andres Alcivar
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
16 views12 pages

Numerical Gradient and Optimization Methods

Uploaded by

Andres Alcivar
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

ANDRES ALCIVAR

SOONGSIL UNIVERSITY
1. GRADIENT_1D

import numpy as np
import [Link] as plt

# Define the numerical differentiation function


def numerical_diff(f, x):
h = 1e-4 # Small number to approximate the derivative
return (f(x + h) - f(x - h)) / (2 * h) # Central difference formula

# Define the function for which we want to calculate the derivative


def function_1(x):
return 0.01 * x ** 2 + 0.1 * x

# Function to create a tangent line at a given point x


def tangent_line(f, x):
d = numerical_diff(f, x) # Calculate the derivative at x
print(d) # Print the derivative for verification
y = f(x) - d * x # Calculate the y-intercept of the tangent line
return lambda t: d * t + y # Return the linear function representing
the tangent line

# Create an array of x values from 0 to 20 with an increment of 0.1


x = [Link](0.0, 20.0, 0.1)
y = function_1(x) # Calculate y values for the function

# Labels for the plot


[Link]("x")
[Link]("f(x)")

# Get the tangent line function at x = 5


tf = tangent_line(function_1, 5)
y2 = tf(x) # Calculate y values for the tangent line

# Plot the original function and its tangent line at x = 5


[Link](x, y)
[Link](x, y2)
[Link]() # Display the plot
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
ANDRES ALCIVAR
SOONGSIL UNIVERSITY

2. GRADIENT_2D
import numpy as np
import [Link] as plt
from mpl_toolkits.mplot3d import Axes3D

# Function to compute the numerical gradient of a scalar function at a


single point
def _numerical_gradient_no_batch(f, x):
h = 1e-4 # Small number for finite difference approximation
grad = np.zeros_like(x) # Initialize gradient array with the same
shape as x

# Iterate over all dimensions of x to calculate partial


derivatives
for idx in range([Link]):
tmp_val = x[idx] # Store the original value
x[idx] = float(tmp_val) + h
fxh1 = f(x) # Evaluate f at x+h

x[idx] = tmp_val - h
fxh2 = f(x) # Evaluate f at x-h
grad[idx] = (fxh1 - fxh2) / (2 * h) # Compute the partial
derivative

x[idx] = tmp_val # Restore the original value of x[idx]

return grad # Return the gradient vector

# Function to compute the numerical gradient of a function at one or


more points
def numerical_gradient(f, X):
if [Link] == 1:
return _numerical_gradient_no_batch(f, X) # Handle single
point case
else:
grad = np.zeros_like(X) # Initialize gradient matrix

# Compute gradient for each point in X


for idx, x in enumerate(X):
grad[idx] = _numerical_gradient_no_batch(f, x)

return grad # Return the gradient matrix

# Function to be minimized (in this case, a simple sum of squares


function)
def function_2(x):
if [Link] == 1:
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
return [Link](x**2)
else:
return [Link](x**2, axis=1)

# Main script execution starts here


if __name__ == '__main__':
# Generate grid points for plotting
x0 = [Link](-2, 2.5, 0.25)
x1 = [Link](-2, 2.5, 0.25)
X, Y = [Link](x0, x1) # Create a meshgrid for the 2D plot

X = [Link]() # Flatten the grid arrays for processing


Y = [Link]()

# Compute the gradient of function_2 at each grid point


grad = numerical_gradient(function_2, [Link]([X, Y]).T).T

# Plotting
[Link]()
[Link](X, Y, -grad[0], -grad[1], angles="xy",
color="#666666") # Draw arrows showing gradient direction
[Link]([-2, 2])
[Link]([-2, 2])
[Link]('x0') # Label for the x-axis
[Link]('x1') # Label for the y-axis
[Link]() # Add a grid to the plot
[Link]() # Render the plot
[Link]() # Display the plot
ANDRES ALCIVAR
SOONGSIL UNIVERSITY

3. GRADIENT_METHOD

import numpy as np
import [Link] as plt
from gradient_2d import numerical_gradient # Import the numerical
gradient function

# Function to perform gradient descent on a given function f


def gradient_descent(f, init_x, lr=0.01, step_num=100):
x = init_x # Initialize x with the starting point
x_history = [] # List to store the history of x values during the
descent

# Perform the gradient descent for a given number of steps


for i in range(step_num):
x_history.append([Link]()) # Add the current x value to the
history

grad = numerical_gradient(f, x) # Compute the gradient at


current x
x -= lr * grad # Update x by moving against the gradient

return x, [Link](x_history) # Return the final position and the


history of x values

# Function to be optimized, in this case, a simple quadratic function


def function_2(x):
return x[0]**2 + x[1]**2 # Return the sum of squares of x

# Initial point from which the gradient descent starts


init_x = [Link]([-3.0, 4.0])

# Set the learning rate and number of steps for the gradient descent
lr = 0.1
step_num = 20
x, x_history = gradient_descent(function_2, init_x, lr=lr,
step_num=step_num)

# Plotting setup
[Link]([-5, 5], [0, 0], '--b') # Draw x-axis
[Link]([0, 0], [-5, 5], '--b') # Draw y-axis
[Link](x_history[:, 0], x_history[:, 1], 'o') # Plot the history of x
positions

# Set plot limits and labels


[Link](-3.5, 3.5)
[Link](-4.5, 4.5)
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
[Link]("X0")
[Link]("X1")
[Link]() # Display the plot
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
4. GRADIENT_SIMPLENET
import sys, os
# Append the parent directory to [Link] to import modules from there
[Link]([Link])
import numpy as np # Import numpy for numerical operations
from [Link] import softmax, cross_entropy_error # Import
common functions used in neural networks
from [Link] import numerical_gradient # Import the gradient
computation function

# Define a simple neural network class


class simpleNet:
def __init__(self):
# Initialize the weights randomly using a Gaussian distribution
self.W = [Link](2, 3)

# Define the prediction function using matrix multiplication


def predict(self, x):
return [Link](x, self.W)

# Define the loss function which calculates the total loss for a
given input and true output
def loss(self, x, t):
z = [Link](x) # Get the score vector by predicting
y = softmax(z) # Apply softmax to get probability distribution
loss = cross_entropy_error(y, t) # Calculate the cross-entropy
loss

return loss

# Create an instance of the neural network


net = simpleNet()

# Test data
x = [Link]([0.6, 0.9]) # Input data
t = [Link]([0, 0, 1]) # True label (one-hot encoded)

# Define the function to compute the loss


f = lambda w: [Link](x, t) # Lambda function for loss computation
dW = numerical_gradient(f, net.W) # Calculate the gradient of the loss
with respect to the weights

print(dW) # Print the gradient of the weights


ANDRES ALCIVAR
SOONGSIL UNIVERSITY
5. TWO_LAYER_NET
import sys, os
# Append the parent directory to the system path to allow importing
modules from there
[Link]([Link])
from [Link] import * # Import common neural network
functions like sigmoid and softmax
from [Link] import numerical_gradient # Import the function
to compute numerical gradients
import numpy as np # Import numpy for matrix and vector computations

# Define a class for a two-layer neural network


class TwoLayerNet:
# Constructor to initialize the network architecture and
parameters
def __init__(self, input_size, hidden_size, output_size,
weight_init_std=0.01):
# Initialize weights and biases
[Link] = {}
[Link]['W1'] = weight_init_std *
[Link](input_size, hidden_size)
[Link]['b1'] = [Link](hidden_size)
[Link]['W2'] = weight_init_std *
[Link](hidden_size, output_size)
[Link]['b2'] = [Link](output_size)

# Method to compute the output of the network for a given input


def predict(self, x):
W1, W2 = [Link]['W1'], [Link]['W2']
b1, b2 = [Link]['b1'], [Link]['b2']

# Forward pass through the network


a1 = [Link](x, W1) + b1
z1 = sigmoid(a1)
a2 = [Link](z1, W2) + b2
y = softmax(a2)

return y # Return the network output

# Compute the loss function for a given set of input and true
output
def loss(self, x, t):
y = [Link](x)
return cross_entropy_error(y, t) # Return the cross-entropy
loss

# Calculate the accuracy of the network on a given set of data


def accuracy(self, x, t):
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
y = [Link](x)
y = [Link](y, axis=1)
t = [Link](t, axis=1)

accuracy = [Link](y == t) / float([Link][0]) # Compute the


accuracy
return accuracy

# Compute numerical gradients of the loss with respect to the


parameters
def numerical_gradient(self, x, t):
loss_W = lambda W: [Link](x, t) # Define the loss function

# Calculate gradients for each parameter


grads = {}
grads['W1'] = numerical_gradient(loss_W, [Link]['W1'])
grads['b1'] = numerical_gradient(loss_W, [Link]['b1'])
grads['W2'] = numerical_gradient(loss_W, [Link]['W2'])
grads['b2'] = numerical_gradient(loss_W, [Link]['b2'])

return grads # Return the computed gradients

# Compute analytical gradients of the loss with respect to the


parameters
def gradient(self, x, t):
# Unpack parameters
W1, W2 = [Link]['W1'], [Link]['W2']
b1, b2 = [Link]['b1'], [Link]['b2']
grads = {}

batch_num = [Link][0]

# Forward pass: compute activations


a1 = [Link](x, W1) + b1
z1 = sigmoid(a1)
a2 = [Link](z1, W2) + b2
y = softmax(a2)

# Backward pass: compute gradients


dy = (y - t) / batch_num
grads['W2'] = [Link](z1.T, dy)
grads['b2'] = [Link](dy, axis=0)

dz1 = [Link](dy, W2.T)


da1 = sigmoid_grad(a1) * dz1
grads['W1'] = [Link](x.T, da1)
grads['b1'] = [Link](da1, axis=0)

return grads # Return the computed gradients


ANDRES ALCIVAR
SOONGSIL UNIVERSITY
6. TRAINING_NEURALNET
import sys, os
# Append the parent directory to the system path to allow importing from
there
[Link]([Link])
import numpy as np # Numerical computing library
import [Link] as plt # Plotting library
from [Link] import load_mnist # Function to load the MNIST
dataset
from two_layer_net import TwoLayerNet # Class representing a two-layer
neural network

# Load the MNIST dataset with normalization and one-hot encoding of the
labels
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,
one_hot_label=True)

# Initialize the neural network with the specified architecture


network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

# Define training parameters


iters_num = 10000 # Number of training iterations
train_size = x_train.shape[0] # Total number of training samples
batch_size = 100 # Number of samples in each mini-batch
learning_rate = 0.1 # Learning rate for weight updates

# Lists to store the loss and accuracy values for plotting


train_loss_list = []
train_acc_list = []
test_acc_list = []

# Calculate the number of iterations per epoch (full pass through the
training data)
iter_per_epoch = max(train_size / batch_size, 1)

# Training loop
for i in range(iters_num):
# Randomly select a mini-batch of samples
batch_mask = [Link](train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]

# Compute the gradient of the loss function


grad = [Link](x_batch, t_batch)

# Update the network parameters based on the gradient


for key in ('W1', 'b1', 'W2', 'b2'):
[Link][key] -= learning_rate * grad[key]
ANDRES ALCIVAR
SOONGSIL UNIVERSITY

# Calculate and record the loss for the current mini-batch


loss = [Link](x_batch, t_batch)
train_loss_list.append(loss)

# At the end of each epoch, evaluate and print the training and test
accuracy
if i % iter_per_epoch == 0:
train_acc = [Link](x_train, t_train)
test_acc = [Link](x_test, t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print("train acc, test acc | " + str(train_acc) + ", " +
str(test_acc))

# Plotting the training and test accuracies


markers = {'train': 'o', 'test': 's'} # Markers for the plot
x = [Link](len(train_acc_list)) # Epochs on the x-axis
[Link](x, train_acc_list, label='train acc') # Training accuracy plot
[Link](x, test_acc_list, label='test acc', linestyle='--') # Test
accuracy plot
[Link]("epochs") # X-axis label
[Link]("accuracy") # Y-axis label
[Link](0, 1.0) # Set y-axis limits
[Link](loc='lower right') # Add a legend
[Link]() # Display the plot
ANDRES ALCIVAR
SOONGSIL UNIVERSITY

You might also like