0% found this document useful (0 votes)

16 views12 pages

Numerical Gradient and Optimization Methods

Uploaded by

Andres Alcivar

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

16 views12 pages

Numerical Gradient and Optimization Methods

Uploaded by

Andres Alcivar

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

ANDRES ALCIVAR

SOONGSIL UNIVERSITY
1. GRADIENT_1D

import numpy as np
import [Link] as plt

# Define the numerical differentiation function

def numerical_diff(f, x):
h = 1e-4 # Small number to approximate the derivative
return (f(x + h) - f(x - h)) / (2 * h) # Central difference formula

# Define the function for which we want to calculate the derivative

def function_1(x):
return 0.01 * x ** 2 + 0.1 * x

# Function to create a tangent line at a given point x

def tangent_line(f, x):
d = numerical_diff(f, x) # Calculate the derivative at x
print(d) # Print the derivative for verification
y = f(x) - d * x # Calculate the y-intercept of the tangent line
return lambda t: d * t + y # Return the linear function representing
the tangent line

# Create an array of x values from 0 to 20 with an increment of 0.1

x = [Link](0.0, 20.0, 0.1)
y = function_1(x) # Calculate y values for the function

# Labels for the plot

[Link]("x")
[Link]("f(x)")

# Get the tangent line function at x = 5

tf = tangent_line(function_1, 5)
y2 = tf(x) # Calculate y values for the tangent line

# Plot the original function and its tangent line at x = 5

[Link](x, y)
[Link](x, y2)
[Link]() # Display the plot
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
ANDRES ALCIVAR
SOONGSIL UNIVERSITY

2. GRADIENT_2D
import numpy as np
import [Link] as plt
from mpl_toolkits.mplot3d import Axes3D

# Function to compute the numerical gradient of a scalar function at a

single point
def _numerical_gradient_no_batch(f, x):
h = 1e-4 # Small number for finite difference approximation
grad = np.zeros_like(x) # Initialize gradient array with the same
shape as x

# Iterate over all dimensions of x to calculate partial

derivatives
for idx in range([Link]):
tmp_val = x[idx] # Store the original value
x[idx] = float(tmp_val) + h
fxh1 = f(x) # Evaluate f at x+h

x[idx] = tmp_val - h
fxh2 = f(x) # Evaluate f at x-h
grad[idx] = (fxh1 - fxh2) / (2 * h) # Compute the partial
derivative

x[idx] = tmp_val # Restore the original value of x[idx]

return grad # Return the gradient vector

# Function to compute the numerical gradient of a function at one or

more points
def numerical_gradient(f, X):
if [Link] == 1:
return _numerical_gradient_no_batch(f, X) # Handle single
point case
else:
grad = np.zeros_like(X) # Initialize gradient matrix

# Compute gradient for each point in X

for idx, x in enumerate(X):
grad[idx] = _numerical_gradient_no_batch(f, x)

return grad # Return the gradient matrix

# Function to be minimized (in this case, a simple sum of squares

function)
def function_2(x):
if [Link] == 1:
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
return [Link](x**2)
else:
return [Link](x**2, axis=1)

# Main script execution starts here

if __name__ == '__main__':
# Generate grid points for plotting
x0 = [Link](-2, 2.5, 0.25)
x1 = [Link](-2, 2.5, 0.25)
X, Y = [Link](x0, x1) # Create a meshgrid for the 2D plot

X = [Link]() # Flatten the grid arrays for processing

Y = [Link]()

# Compute the gradient of function_2 at each grid point

grad = numerical_gradient(function_2, [Link]([X, Y]).T).T

# Plotting
[Link]()
[Link](X, Y, -grad[0], -grad[1], angles="xy",
color="#666666") # Draw arrows showing gradient direction
[Link]([-2, 2])
[Link]([-2, 2])
[Link]('x0') # Label for the x-axis
[Link]('x1') # Label for the y-axis
[Link]() # Add a grid to the plot
[Link]() # Render the plot
[Link]() # Display the plot
ANDRES ALCIVAR
SOONGSIL UNIVERSITY

3. GRADIENT_METHOD

import numpy as np
import [Link] as plt
from gradient_2d import numerical_gradient # Import the numerical
gradient function

# Function to perform gradient descent on a given function f

def gradient_descent(f, init_x, lr=0.01, step_num=100):
x = init_x # Initialize x with the starting point
x_history = [] # List to store the history of x values during the
descent

# Perform the gradient descent for a given number of steps

for i in range(step_num):
x_history.append([Link]()) # Add the current x value to the
history

grad = numerical_gradient(f, x) # Compute the gradient at

current x
x -= lr * grad # Update x by moving against the gradient

return x, [Link](x_history) # Return the final position and the

history of x values

# Function to be optimized, in this case, a simple quadratic function

def function_2(x):
return x[0]**2 + x[1]**2 # Return the sum of squares of x

# Initial point from which the gradient descent starts

init_x = [Link]([-3.0, 4.0])

# Set the learning rate and number of steps for the gradient descent
lr = 0.1
step_num = 20
x, x_history = gradient_descent(function_2, init_x, lr=lr,
step_num=step_num)

# Plotting setup
[Link]([-5, 5], [0, 0], '--b') # Draw x-axis
[Link]([0, 0], [-5, 5], '--b') # Draw y-axis
[Link](x_history[:, 0], x_history[:, 1], 'o') # Plot the history of x
positions

# Set plot limits and labels

[Link](-3.5, 3.5)
[Link](-4.5, 4.5)
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
[Link]("X0")
[Link]("X1")
[Link]() # Display the plot
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
4. GRADIENT_SIMPLENET
import sys, os
# Append the parent directory to [Link] to import modules from there
[Link]([Link])
import numpy as np # Import numpy for numerical operations
from [Link] import softmax, cross_entropy_error # Import
common functions used in neural networks
from [Link] import numerical_gradient # Import the gradient
computation function

# Define a simple neural network class

class simpleNet:
def __init__(self):
# Initialize the weights randomly using a Gaussian distribution
self.W = [Link](2, 3)

# Define the prediction function using matrix multiplication

def predict(self, x):
return [Link](x, self.W)

# Define the loss function which calculates the total loss for a
given input and true output
def loss(self, x, t):
z = [Link](x) # Get the score vector by predicting
y = softmax(z) # Apply softmax to get probability distribution
loss = cross_entropy_error(y, t) # Calculate the cross-entropy
loss

return loss

# Create an instance of the neural network

net = simpleNet()

# Test data
x = [Link]([0.6, 0.9]) # Input data
t = [Link]([0, 0, 1]) # True label (one-hot encoded)

# Define the function to compute the loss

f = lambda w: [Link](x, t) # Lambda function for loss computation
dW = numerical_gradient(f, net.W) # Calculate the gradient of the loss
with respect to the weights

print(dW) # Print the gradient of the weights

ANDRES ALCIVAR
SOONGSIL UNIVERSITY
5. TWO_LAYER_NET
import sys, os
# Append the parent directory to the system path to allow importing
modules from there
[Link]([Link])
from [Link] import * # Import common neural network
functions like sigmoid and softmax
from [Link] import numerical_gradient # Import the function
to compute numerical gradients
import numpy as np # Import numpy for matrix and vector computations

# Method to compute the output of the network for a given input

def predict(self, x):
W1, W2 = [Link]['W1'], [Link]['W2']
b1, b2 = [Link]['b1'], [Link]['b2']

# Forward pass through the network

a1 = [Link](x, W1) + b1
z1 = sigmoid(a1)
a2 = [Link](z1, W2) + b2
y = softmax(a2)

return y # Return the network output

# Compute the loss function for a given set of input and true
output
def loss(self, x, t):
y = [Link](x)
return cross_entropy_error(y, t) # Return the cross-entropy
loss

# Calculate the accuracy of the network on a given set of data

def accuracy(self, x, t):
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
y = [Link](x)
y = [Link](y, axis=1)
t = [Link](t, axis=1)

accuracy = [Link](y == t) / float([Link][0]) # Compute the

accuracy
return accuracy

# Compute numerical gradients of the loss with respect to the

parameters
def numerical_gradient(self, x, t):
loss_W = lambda W: [Link](x, t) # Define the loss function

# Calculate gradients for each parameter

grads = {}
grads['W1'] = numerical_gradient(loss_W, [Link]['W1'])
grads['b1'] = numerical_gradient(loss_W, [Link]['b1'])
grads['W2'] = numerical_gradient(loss_W, [Link]['W2'])
grads['b2'] = numerical_gradient(loss_W, [Link]['b2'])

return grads # Return the computed gradients

# Compute analytical gradients of the loss with respect to the

parameters
def gradient(self, x, t):
# Unpack parameters
W1, W2 = [Link]['W1'], [Link]['W2']
b1, b2 = [Link]['b1'], [Link]['b2']
grads = {}

batch_num = [Link][0]

# Forward pass: compute activations

a1 = [Link](x, W1) + b1
z1 = sigmoid(a1)
a2 = [Link](z1, W2) + b2
y = softmax(a2)

# Backward pass: compute gradients

dy = (y - t) / batch_num
grads['W2'] = [Link](z1.T, dy)
grads['b2'] = [Link](dy, axis=0)

dz1 = [Link](dy, W2.T)

da1 = sigmoid_grad(a1) * dz1
grads['W1'] = [Link](x.T, da1)
grads['b1'] = [Link](da1, axis=0)

return grads # Return the computed gradients

ANDRES ALCIVAR
SOONGSIL UNIVERSITY
6. TRAINING_NEURALNET
import sys, os
# Append the parent directory to the system path to allow importing from
there
[Link]([Link])
import numpy as np # Numerical computing library
import [Link] as plt # Plotting library
from [Link] import load_mnist # Function to load the MNIST
dataset
from two_layer_net import TwoLayerNet # Class representing a two-layer
neural network

# Load the MNIST dataset with normalization and one-hot encoding of the
labels
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,
one_hot_label=True)

# Initialize the neural network with the specified architecture

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

# Define training parameters

iters_num = 10000 # Number of training iterations
train_size = x_train.shape[0] # Total number of training samples
batch_size = 100 # Number of samples in each mini-batch
learning_rate = 0.1 # Learning rate for weight updates

# Lists to store the loss and accuracy values for plotting

train_loss_list = []
train_acc_list = []
test_acc_list = []

# Calculate the number of iterations per epoch (full pass through the
training data)
iter_per_epoch = max(train_size / batch_size, 1)

# Training loop
for i in range(iters_num):
# Randomly select a mini-batch of samples
batch_mask = [Link](train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]

# Compute the gradient of the loss function

grad = [Link](x_batch, t_batch)

# Update the network parameters based on the gradient

for key in ('W1', 'b1', 'W2', 'b2'):
[Link][key] -= learning_rate * grad[key]
ANDRES ALCIVAR
SOONGSIL UNIVERSITY

# Calculate and record the loss for the current mini-batch

loss = [Link](x_batch, t_batch)
train_loss_list.append(loss)

# At the end of each epoch, evaluate and print the training and test
accuracy
if i % iter_per_epoch == 0:
train_acc = [Link](x_train, t_train)
test_acc = [Link](x_test, t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print("train acc, test acc | " + str(train_acc) + ", " +
str(test_acc))

# Plotting the training and test accuracies

markers = {'train': 'o', 'test': 's'} # Markers for the plot
x = [Link](len(train_acc_list)) # Epochs on the x-axis
[Link](x, train_acc_list, label='train acc') # Training accuracy plot
[Link](x, test_acc_list, label='test acc', linestyle='--') # Test
accuracy plot
[Link]("epochs") # X-axis label
[Link]("accuracy") # Y-axis label
[Link](0, 1.0) # Set y-axis limits
[Link](loc='lower right') # Add a legend
[Link]() # Display the plot
ANDRES ALCIVAR
SOONGSIL UNIVERSITY

Gaussian Distribution & Gradient Descent Lab
No ratings yet
Gaussian Distribution & Gradient Descent Lab
29 pages
PyTorch Gradient Descent Examples
No ratings yet
PyTorch Gradient Descent Examples
22 pages
Visual Guide to Gradient Descent
No ratings yet
Visual Guide to Gradient Descent
6 pages
3D Visualization of Gradient Descent
No ratings yet
3D Visualization of Gradient Descent
7 pages
Gradient Descent in Ridge Regression
No ratings yet
Gradient Descent in Ridge Regression
5 pages
Python Implementations of ML Algorithms
No ratings yet
Python Implementations of ML Algorithms
25 pages
Ilovepdf Merged Compressed
No ratings yet
Ilovepdf Merged Compressed
62 pages
21BDS0078 VL2024250102128 Ast01
No ratings yet
21BDS0078 VL2024250102128 Ast01
35 pages
All Sem
No ratings yet
All Sem
77 pages
Linear Regression for Restaurant Profits
No ratings yet
Linear Regression for Restaurant Profits
51 pages
DL Lab - Record
No ratings yet
DL Lab - Record
54 pages
Gaussian Distribution & Gradient Descent
No ratings yet
Gaussian Distribution & Gradient Descent
23 pages
Partial Derivatives
No ratings yet
Partial Derivatives
1 page
Gaussian Distribution & Gradient Descent Lab
No ratings yet
Gaussian Distribution & Gradient Descent Lab
48 pages
Automatic Differentiation with PyTorch
No ratings yet
Automatic Differentiation with PyTorch
228 pages
Programming Assignment 1
No ratings yet
Programming Assignment 1
12 pages
Pattern Recognition Lab Experiments
No ratings yet
Pattern Recognition Lab Experiments
30 pages
Machine Learning Labs Overview
No ratings yet
Machine Learning Labs Overview
46 pages
Simple Neural Network with Backpropagation
No ratings yet
Simple Neural Network with Backpropagation
18 pages
Pattern Recognition Lab
No ratings yet
Pattern Recognition Lab
16 pages
Numerical Methods for Root Finding
No ratings yet
Numerical Methods for Root Finding
18 pages
Activation Functions and Perceptron Models
No ratings yet
Activation Functions and Perceptron Models
27 pages
Python ML Lab: Gaussian & Regression Tasks
No ratings yet
Python ML Lab: Gaussian & Regression Tasks
24 pages
Neural Network Implementations in Python
No ratings yet
Neural Network Implementations in Python
30 pages
Steepest Descent and Newton Methods
No ratings yet
Steepest Descent and Newton Methods
19 pages
Gradient Checking in Backpropagation
No ratings yet
Gradient Checking in Backpropagation
12 pages
Gaussian Distribution and Gradient Descent Implementations
No ratings yet
Gaussian Distribution and Gradient Descent Implementations
11 pages
Linear Regression Tutorial with Boston Data
No ratings yet
Linear Regression Tutorial with Boston Data
8 pages
Spring 26 Regression Analysis Ridge Regression
No ratings yet
Spring 26 Regression Analysis Ridge Regression
24 pages
Python Numerical Methods Manual
No ratings yet
Python Numerical Methods Manual
26 pages
Mtech Programs AI Lab
No ratings yet
Mtech Programs AI Lab
11 pages
Linear Regression with Gradient Descent
No ratings yet
Linear Regression with Gradient Descent
11 pages
Python Gradient Descent Optimization
No ratings yet
Python Gradient Descent Optimization
29 pages
Gaussian Elimination in Python
No ratings yet
Gaussian Elimination in Python
139 pages
Gradient Descent Optimization Techniques
No ratings yet
Gradient Descent Optimization Techniques
4 pages
DNN Lab Manual for MCA Program
No ratings yet
DNN Lab Manual for MCA Program
34 pages
Linear Regression and Optimization Basics
No ratings yet
Linear Regression and Optimization Basics
27 pages
Numerical Methods in Python
No ratings yet
Numerical Methods in Python
8 pages
EE2211 Supervised Learning Overview
No ratings yet
EE2211 Supervised Learning Overview
15 pages
Exercices Revision
No ratings yet
Exercices Revision
6 pages
PRCV
No ratings yet
PRCV
30 pages
Exp 134567
No ratings yet
Exp 134567
13 pages
Gradient Descent in Neural Networks Explained
No ratings yet
Gradient Descent in Neural Networks Explained
4 pages
Linear Regression
No ratings yet
Linear Regression
64 pages
Linear Regression and Logistic Models
No ratings yet
Linear Regression and Logistic Models
6 pages
NC Lab Program1 Perceptron Algorithm
No ratings yet
NC Lab Program1 Perceptron Algorithm
36 pages
Numerical Methods: Newton-Raphson & Gauss-Seidel
No ratings yet
Numerical Methods: Newton-Raphson & Gauss-Seidel
49 pages
MP Neuron Model Logic Network Experiment
No ratings yet
MP Neuron Model Logic Network Experiment
41 pages
Numerical Methods in Python
No ratings yet
Numerical Methods in Python
6 pages
Machine Learning Cost Function Guide
No ratings yet
Machine Learning Cost Function Guide
9 pages
Machine Learning Algorithm Implementations
No ratings yet
Machine Learning Algorithm Implementations
22 pages
Machine Learning Lab Exercises Guide
No ratings yet
Machine Learning Lab Exercises Guide
41 pages
Python Practical Exercises on ML
No ratings yet
Python Practical Exercises on ML
14 pages
Advanced Neural Network Techniques
No ratings yet
Advanced Neural Network Techniques
16 pages
Neural Network Training with Python
No ratings yet
Neural Network Training with Python
3 pages
00 Submission For MCAM Replacement
No ratings yet
00 Submission For MCAM Replacement
1 page
Computer Clusters for Parallel Computing
No ratings yet
Computer Clusters for Parallel Computing
12 pages
Key Characteristics of P2P Networks
No ratings yet
Key Characteristics of P2P Networks
55 pages
Overview of Korea's Telecommunications Act
No ratings yet
Overview of Korea's Telecommunications Act
8 pages
Rethinking Board Technology Committees
No ratings yet
Rethinking Board Technology Committees
11 pages
MNIST Neural Network Image Prediction
No ratings yet
MNIST Neural Network Image Prediction
4 pages
Enhancing B2B Customer Health Metrics
100% (1)
Enhancing B2B Customer Health Metrics
11 pages
Boards Embracing New Tech Strategies
No ratings yet
Boards Embracing New Tech Strategies
23 pages
Technology Committee Best Practices
No ratings yet
Technology Committee Best Practices
10 pages
Fall 2024 Presentation Schedule
No ratings yet
Fall 2024 Presentation Schedule
6 pages
UI/UX Design Process and Principles
No ratings yet
UI/UX Design Process and Principles
25 pages
HCI Concepts in UI/UX Design Class
No ratings yet
HCI Concepts in UI/UX Design Class
9 pages
Korean Personal Information Protection Act Report
No ratings yet
Korean Personal Information Protection Act Report
9 pages
South Korea's Data Privacy Law Overview
No ratings yet
South Korea's Data Privacy Law Overview
37 pages
Answer Key for Set Theory Problems
No ratings yet
Answer Key for Set Theory Problems
3 pages
D.H. Fremlin - Measure Theory - Topological Measure Spaces (Vol. 4) (2003)
100% (1)
D.H. Fremlin - Measure Theory - Topological Measure Spaces (Vol. 4) (2003)
945 pages
Piecewise Functions and Graphing Guide
No ratings yet
Piecewise Functions and Graphing Guide
3 pages
HST2 Algebra 2
No ratings yet
HST2 Algebra 2
1 page
Grade 12 Functions Assignment 09
No ratings yet
Grade 12 Functions Assignment 09
16 pages
Magnetic Forces and Charged Particles
100% (2)
Magnetic Forces and Charged Particles
21 pages
Exploring Goldbach's Conjecture Insights
100% (1)
Exploring Goldbach's Conjecture Insights
36 pages
Elementary Proof of Prime Number Theorem
No ratings yet
Elementary Proof of Prime Number Theorem
11 pages
Integrating the Exponential Function
No ratings yet
Integrating the Exponential Function
7 pages
Convexity Analysis of f(x)=x1x2
No ratings yet
Convexity Analysis of f(x)=x1x2
8 pages
College Algebra Review Questions
No ratings yet
College Algebra Review Questions
2 pages
Chapter 2 Solutions Overview
70% (10)
Chapter 2 Solutions Overview
53 pages
Initial vs Boundary Value Problems Explained
No ratings yet
Initial vs Boundary Value Problems Explained
3 pages
Integrating with Trapezoidal and Simpson's Rules
No ratings yet
Integrating with Trapezoidal and Simpson's Rules
7 pages
EECS 50 Discrete Signals Midterm Exam
No ratings yet
EECS 50 Discrete Signals Midterm Exam
7 pages
Sum and Product of Quadratic Roots
No ratings yet
Sum and Product of Quadratic Roots
18 pages
Accelerated Algebra 2/Trig Course Overview
No ratings yet
Accelerated Algebra 2/Trig Course Overview
2 pages
Stresses in Rarified Gases from Temperature Inequalities
No ratings yet
Stresses in Rarified Gases from Temperature Inequalities
26 pages
JEE 2022 Mathematics Solutions: ALPS
No ratings yet
JEE 2022 Mathematics Solutions: ALPS
19 pages
Chapter 2, Solution 1.: COSMOS: Complete Online Solutions Manual Organization System
No ratings yet
Chapter 2, Solution 1.: COSMOS: Complete Online Solutions Manual Organization System
152 pages
Dynamical Systems Course Overview
No ratings yet
Dynamical Systems Course Overview
3 pages
Positive Solutions in Semilinear Equations
No ratings yet
Positive Solutions in Semilinear Equations
19 pages
Understanding Gibbs Sampling in MCMC
No ratings yet
Understanding Gibbs Sampling in MCMC
2 pages
Vector Applications and Problems
No ratings yet
Vector Applications and Problems
5 pages
NDA Maths Lecture Schedule 2025
No ratings yet
NDA Maths Lecture Schedule 2025
3 pages
Notes on Absolutely Continuous Functions
No ratings yet
Notes on Absolutely Continuous Functions
16 pages
Solving Linear Equations and Word Problems
No ratings yet
Solving Linear Equations and Word Problems
12 pages
Line Drawing Algorithms Overview
No ratings yet
Line Drawing Algorithms Overview
25 pages
Torque and Conditions of Equilibrium
No ratings yet
Torque and Conditions of Equilibrium
20 pages
Class 10 Trigonometry Assignment
No ratings yet
Class 10 Trigonometry Assignment
2 pages

Numerical Gradient and Optimization Methods

Uploaded by

Numerical Gradient and Optimization Methods

Uploaded by

ANDRES ALCIVAR

# Define the numerical differentiation function

# Define the function for which we want to calculate the derivative

# Function to create a tangent line at a given point x

# Create an array of x values from 0 to 20 with an increment of 0.1

# Labels for the plot

# Get the tangent line function at x = 5

# Plot the original function and its tangent line at x = 5

# Function to compute the numerical gradient of a scalar function at a

# Iterate over all dimensions of x to calculate partial

x[idx] = tmp_val # Restore the original value of x[idx]

return grad # Return the gradient vector

# Function to compute the numerical gradient of a function at one or

# Compute gradient for each point in X

return grad # Return the gradient matrix

# Function to be minimized (in this case, a simple sum of squares

# Main script execution starts here

X = [Link]() # Flatten the grid arrays for processing

# Compute the gradient of function_2 at each grid point

# Function to perform gradient descent on a given function f

# Perform the gradient descent for a given number of steps

grad = numerical_gradient(f, x) # Compute the gradient at

return x, [Link](x_history) # Return the final position and the

# Function to be optimized, in this case, a simple quadratic function

# Initial point from which the gradient descent starts

# Set plot limits and labels

# Define a simple neural network class

# Define the prediction function using matrix multiplication

# Create an instance of the neural network

# Define the function to compute the loss

print(dW) # Print the gradient of the weights

# Define a class for a two-layer neural network

# Method to compute the output of the network for a given input

# Forward pass through the network

return y # Return the network output

# Calculate the accuracy of the network on a given set of data

accuracy = [Link](y == t) / float([Link][0]) # Compute the

# Compute numerical gradients of the loss with respect to the

# Calculate gradients for each parameter

return grads # Return the computed gradients

# Compute analytical gradients of the loss with respect to the

# Forward pass: compute activations

# Backward pass: compute gradients

dz1 = [Link](dy, W2.T)

return grads # Return the computed gradients

# Initialize the neural network with the specified architecture

# Define training parameters

# Lists to store the loss and accuracy values for plotting

# Compute the gradient of the loss function

# Update the network parameters based on the gradient

# Calculate and record the loss for the current mini-batch

# Plotting the training and test accuracies

You might also like