ANDRES ALCIVAR
SOONGSIL UNIVERSITY
1. GRADIENT_1D
import numpy as np
import [Link] as plt
# Define the numerical differentiation function
def numerical_diff(f, x):
h = 1e-4 # Small number to approximate the derivative
return (f(x + h) - f(x - h)) / (2 * h) # Central difference formula
# Define the function for which we want to calculate the derivative
def function_1(x):
return 0.01 * x ** 2 + 0.1 * x
# Function to create a tangent line at a given point x
def tangent_line(f, x):
d = numerical_diff(f, x) # Calculate the derivative at x
print(d) # Print the derivative for verification
y = f(x) - d * x # Calculate the y-intercept of the tangent line
return lambda t: d * t + y # Return the linear function representing
the tangent line
# Create an array of x values from 0 to 20 with an increment of 0.1
x = [Link](0.0, 20.0, 0.1)
y = function_1(x) # Calculate y values for the function
# Labels for the plot
[Link]("x")
[Link]("f(x)")
# Get the tangent line function at x = 5
tf = tangent_line(function_1, 5)
y2 = tf(x) # Calculate y values for the tangent line
# Plot the original function and its tangent line at x = 5
[Link](x, y)
[Link](x, y2)
[Link]() # Display the plot
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
2. GRADIENT_2D
import numpy as np
import [Link] as plt
from mpl_toolkits.mplot3d import Axes3D
# Function to compute the numerical gradient of a scalar function at a
single point
def _numerical_gradient_no_batch(f, x):
h = 1e-4 # Small number for finite difference approximation
grad = np.zeros_like(x) # Initialize gradient array with the same
shape as x
# Iterate over all dimensions of x to calculate partial
derivatives
for idx in range([Link]):
tmp_val = x[idx] # Store the original value
x[idx] = float(tmp_val) + h
fxh1 = f(x) # Evaluate f at x+h
x[idx] = tmp_val - h
fxh2 = f(x) # Evaluate f at x-h
grad[idx] = (fxh1 - fxh2) / (2 * h) # Compute the partial
derivative
x[idx] = tmp_val # Restore the original value of x[idx]
return grad # Return the gradient vector
# Function to compute the numerical gradient of a function at one or
more points
def numerical_gradient(f, X):
if [Link] == 1:
return _numerical_gradient_no_batch(f, X) # Handle single
point case
else:
grad = np.zeros_like(X) # Initialize gradient matrix
# Compute gradient for each point in X
for idx, x in enumerate(X):
grad[idx] = _numerical_gradient_no_batch(f, x)
return grad # Return the gradient matrix
# Function to be minimized (in this case, a simple sum of squares
function)
def function_2(x):
if [Link] == 1:
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
return [Link](x**2)
else:
return [Link](x**2, axis=1)
# Main script execution starts here
if __name__ == '__main__':
# Generate grid points for plotting
x0 = [Link](-2, 2.5, 0.25)
x1 = [Link](-2, 2.5, 0.25)
X, Y = [Link](x0, x1) # Create a meshgrid for the 2D plot
X = [Link]() # Flatten the grid arrays for processing
Y = [Link]()
# Compute the gradient of function_2 at each grid point
grad = numerical_gradient(function_2, [Link]([X, Y]).T).T
# Plotting
[Link]()
[Link](X, Y, -grad[0], -grad[1], angles="xy",
color="#666666") # Draw arrows showing gradient direction
[Link]([-2, 2])
[Link]([-2, 2])
[Link]('x0') # Label for the x-axis
[Link]('x1') # Label for the y-axis
[Link]() # Add a grid to the plot
[Link]() # Render the plot
[Link]() # Display the plot
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
3. GRADIENT_METHOD
import numpy as np
import [Link] as plt
from gradient_2d import numerical_gradient # Import the numerical
gradient function
# Function to perform gradient descent on a given function f
def gradient_descent(f, init_x, lr=0.01, step_num=100):
x = init_x # Initialize x with the starting point
x_history = [] # List to store the history of x values during the
descent
# Perform the gradient descent for a given number of steps
for i in range(step_num):
x_history.append([Link]()) # Add the current x value to the
history
grad = numerical_gradient(f, x) # Compute the gradient at
current x
x -= lr * grad # Update x by moving against the gradient
return x, [Link](x_history) # Return the final position and the
history of x values
# Function to be optimized, in this case, a simple quadratic function
def function_2(x):
return x[0]**2 + x[1]**2 # Return the sum of squares of x
# Initial point from which the gradient descent starts
init_x = [Link]([-3.0, 4.0])
# Set the learning rate and number of steps for the gradient descent
lr = 0.1
step_num = 20
x, x_history = gradient_descent(function_2, init_x, lr=lr,
step_num=step_num)
# Plotting setup
[Link]([-5, 5], [0, 0], '--b') # Draw x-axis
[Link]([0, 0], [-5, 5], '--b') # Draw y-axis
[Link](x_history[:, 0], x_history[:, 1], 'o') # Plot the history of x
positions
# Set plot limits and labels
[Link](-3.5, 3.5)
[Link](-4.5, 4.5)
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
[Link]("X0")
[Link]("X1")
[Link]() # Display the plot
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
4. GRADIENT_SIMPLENET
import sys, os
# Append the parent directory to [Link] to import modules from there
[Link]([Link])
import numpy as np # Import numpy for numerical operations
from [Link] import softmax, cross_entropy_error # Import
common functions used in neural networks
from [Link] import numerical_gradient # Import the gradient
computation function
# Define a simple neural network class
class simpleNet:
def __init__(self):
# Initialize the weights randomly using a Gaussian distribution
self.W = [Link](2, 3)
# Define the prediction function using matrix multiplication
def predict(self, x):
return [Link](x, self.W)
# Define the loss function which calculates the total loss for a
given input and true output
def loss(self, x, t):
z = [Link](x) # Get the score vector by predicting
y = softmax(z) # Apply softmax to get probability distribution
loss = cross_entropy_error(y, t) # Calculate the cross-entropy
loss
return loss
# Create an instance of the neural network
net = simpleNet()
# Test data
x = [Link]([0.6, 0.9]) # Input data
t = [Link]([0, 0, 1]) # True label (one-hot encoded)
# Define the function to compute the loss
f = lambda w: [Link](x, t) # Lambda function for loss computation
dW = numerical_gradient(f, net.W) # Calculate the gradient of the loss
with respect to the weights
print(dW) # Print the gradient of the weights
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
5. TWO_LAYER_NET
import sys, os
# Append the parent directory to the system path to allow importing
modules from there
[Link]([Link])
from [Link] import * # Import common neural network
functions like sigmoid and softmax
from [Link] import numerical_gradient # Import the function
to compute numerical gradients
import numpy as np # Import numpy for matrix and vector computations
# Define a class for a two-layer neural network
class TwoLayerNet:
# Constructor to initialize the network architecture and
parameters
def __init__(self, input_size, hidden_size, output_size,
weight_init_std=0.01):
# Initialize weights and biases
[Link] = {}
[Link]['W1'] = weight_init_std *
[Link](input_size, hidden_size)
[Link]['b1'] = [Link](hidden_size)
[Link]['W2'] = weight_init_std *
[Link](hidden_size, output_size)
[Link]['b2'] = [Link](output_size)
# Method to compute the output of the network for a given input
def predict(self, x):
W1, W2 = [Link]['W1'], [Link]['W2']
b1, b2 = [Link]['b1'], [Link]['b2']
# Forward pass through the network
a1 = [Link](x, W1) + b1
z1 = sigmoid(a1)
a2 = [Link](z1, W2) + b2
y = softmax(a2)
return y # Return the network output
# Compute the loss function for a given set of input and true
output
def loss(self, x, t):
y = [Link](x)
return cross_entropy_error(y, t) # Return the cross-entropy
loss
# Calculate the accuracy of the network on a given set of data
def accuracy(self, x, t):
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
y = [Link](x)
y = [Link](y, axis=1)
t = [Link](t, axis=1)
accuracy = [Link](y == t) / float([Link][0]) # Compute the
accuracy
return accuracy
# Compute numerical gradients of the loss with respect to the
parameters
def numerical_gradient(self, x, t):
loss_W = lambda W: [Link](x, t) # Define the loss function
# Calculate gradients for each parameter
grads = {}
grads['W1'] = numerical_gradient(loss_W, [Link]['W1'])
grads['b1'] = numerical_gradient(loss_W, [Link]['b1'])
grads['W2'] = numerical_gradient(loss_W, [Link]['W2'])
grads['b2'] = numerical_gradient(loss_W, [Link]['b2'])
return grads # Return the computed gradients
# Compute analytical gradients of the loss with respect to the
parameters
def gradient(self, x, t):
# Unpack parameters
W1, W2 = [Link]['W1'], [Link]['W2']
b1, b2 = [Link]['b1'], [Link]['b2']
grads = {}
batch_num = [Link][0]
# Forward pass: compute activations
a1 = [Link](x, W1) + b1
z1 = sigmoid(a1)
a2 = [Link](z1, W2) + b2
y = softmax(a2)
# Backward pass: compute gradients
dy = (y - t) / batch_num
grads['W2'] = [Link](z1.T, dy)
grads['b2'] = [Link](dy, axis=0)
dz1 = [Link](dy, W2.T)
da1 = sigmoid_grad(a1) * dz1
grads['W1'] = [Link](x.T, da1)
grads['b1'] = [Link](da1, axis=0)
return grads # Return the computed gradients
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
6. TRAINING_NEURALNET
import sys, os
# Append the parent directory to the system path to allow importing from
there
[Link]([Link])
import numpy as np # Numerical computing library
import [Link] as plt # Plotting library
from [Link] import load_mnist # Function to load the MNIST
dataset
from two_layer_net import TwoLayerNet # Class representing a two-layer
neural network
# Load the MNIST dataset with normalization and one-hot encoding of the
labels
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,
one_hot_label=True)
# Initialize the neural network with the specified architecture
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
# Define training parameters
iters_num = 10000 # Number of training iterations
train_size = x_train.shape[0] # Total number of training samples
batch_size = 100 # Number of samples in each mini-batch
learning_rate = 0.1 # Learning rate for weight updates
# Lists to store the loss and accuracy values for plotting
train_loss_list = []
train_acc_list = []
test_acc_list = []
# Calculate the number of iterations per epoch (full pass through the
training data)
iter_per_epoch = max(train_size / batch_size, 1)
# Training loop
for i in range(iters_num):
# Randomly select a mini-batch of samples
batch_mask = [Link](train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
# Compute the gradient of the loss function
grad = [Link](x_batch, t_batch)
# Update the network parameters based on the gradient
for key in ('W1', 'b1', 'W2', 'b2'):
[Link][key] -= learning_rate * grad[key]
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
# Calculate and record the loss for the current mini-batch
loss = [Link](x_batch, t_batch)
train_loss_list.append(loss)
# At the end of each epoch, evaluate and print the training and test
accuracy
if i % iter_per_epoch == 0:
train_acc = [Link](x_train, t_train)
test_acc = [Link](x_test, t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print("train acc, test acc | " + str(train_acc) + ", " +
str(test_acc))
# Plotting the training and test accuracies
markers = {'train': 'o', 'test': 's'} # Markers for the plot
x = [Link](len(train_acc_list)) # Epochs on the x-axis
[Link](x, train_acc_list, label='train acc') # Training accuracy plot
[Link](x, test_acc_list, label='test acc', linestyle='--') # Test
accuracy plot
[Link]("epochs") # X-axis label
[Link]("accuracy") # Y-axis label
[Link](0, 1.0) # Set y-axis limits
[Link](loc='lower right') # Add a legend
[Link]() # Display the plot
ANDRES ALCIVAR
SOONGSIL UNIVERSITY