Ex.
No : 1
Write a program to create and execute multiple threads using Pthreads
Date :
AIM
To create and execute multiple threads using Pthreads in C and demonstrate concurrent
execution.
ALGORITHM
1. Start the program.
2. Include required header files (stdio.h, stdlib.h, pthread.h).
3. Define a thread function that each thread will execute.
4. Declare thread IDs using pthread_t.
5. Create multiple threads using pthread_create().
6. Pass unique arguments to each thread.
7. Wait for all threads to finish using pthread_join().
8. Display output from each thread.
9. End the program.
PROGRAM (C USING PTHREADS)
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
// Thread function
void* thread_function(void* arg) {
int thread_id = *(int*)arg;
printf("Thread %d is running\n", thread_id);
pthread_exit(NULL);
}
int main() {
int n = 5; // Number of threads
pthread_t threads[n];
int thread_args[n];
// Create threads
for(int i = 0; i < n; i++) {
thread_args[i] = i + 1;
if(pthread_create(&threads[i], NULL, thread_function, &thread_args[i]) != 0) {
1
perror("Failed to create thread");
exit(1);
}
}
// Join threads
for(int i = 0; i < n; i++) {
pthread_join(threads[i], NULL);
}
printf("All threads have finished execution\n");
return 0;
}
OUTPUT
Thread 1 is running
Thread 2 is running
Thread 3 is running
Thread 4 is running
Thread 5 is running
All threads have finished execution
RESULT
Multiple threads were successfully created and executed using Pthreads, demonstrating
concurrent execution.
2
Ex. No: 2
Write a program to demonstrate mutex locks using std::mutex
Date :
AIM
To demonstrate the use of mutex locks (std::mutex) in C++ for synchronizing multiple threads
and preventing race conditions.
ALGORITHM
1. Start the program.
2. Include required header files (iostream, thread, mutex).
3. Declare a global shared variable (counter).
4. Create a mutex object using std::mutex.
5. Define a function that:
o Locks the mutex before accessing the shared variable.
o Updates the shared variable.
o Unlocks the mutex after use.
6. Create multiple threads using std::thread.
7. Execute the function concurrently in each thread.
8. Join all threads using join().
9. Display the final value of the shared variable.
10. End the program.
PROGRAM (C++ USING STD::MUTEX)
#include <iostream>
#include <thread>
#include <mutex>
using namespace std;
mutex mtx; // Mutex declaration
int counter = 0; // Shared resource
// Function to increment counter
void increment(int id) {
for (int i = 0; i < 5; i++) {
[Link](); // Lock the mutex
counter++;
cout << "Thread " << id << " incremented counter to " << counter << endl;
[Link](); // Unlock the mutex
}
}
int main() {
thread t1(increment, 1);
thread t2(increment, 2);
3
// Wait for threads to finish
[Link]();
[Link]();
cout << "Final counter value: " << counter << endl;
return 0;
}
COMPILATION & EXECUTION
g++ [Link] -o program -pthread
./program
OUTPUT
Thread 1 incremented counter to 1
Thread 2 incremented counter to 2
Thread 1 incremented counter to 3
Thread 2 incremented counter to 4
Thread 1 incremented counter to 5
Thread 2 incremented counter to 6
Thread 1 incremented counter to 7
Thread 2 incremented counter to 8
Thread 1 incremented counter to 9
Thread 2 incremented counter to 10
Final counter value: 10
RESULT
The program successfully demonstrated the use of std::mutex to synchronize multiple threads.
The shared variable was updated correctly without race conditions.
4
Ex. No : 3
Write a program to compute the Fibonacci series using OpenMP tasks
Date :
AIM
To compute the Fibonacci series using OpenMP task parallelism in C.
ALGORITHM
1. Start
2. Define a recursive function fib(n)
3. If n <= 1, return n
4. Else:
o Create a task to compute fib(n-1)
o Create another task to compute fib(n-2)
o Wait for both tasks to complete using taskwait
o Return sum of both results
5. In main() function:
o Read value of n
o Begin OpenMP parallel region
o Use single directive to call fib(n)
6. Print the Fibonacci result
7. Stop
PROGRAM (C WITH OPENMP TASKS)
#include <stdio.h>
#include <omp.h>
int fib(int n) {
int x, y;
if (n <= 1)
return n;
#pragma omp task shared(x)
x = fib(n - 1);
#pragma omp task shared(y)
y = fib(n - 2);
#pragma omp taskwait
return x + y;
}
int main() {
int n, result;
5
printf("Enter the value of n: ");
scanf("%d", &n);
#pragma omp parallel
{
#pragma omp single
{
result = fib(n);
}
}
printf("Fibonacci of %d = %d\n", n, result);
return 0;
}
COMPILATION & EXECUTION
gcc -fopenmp fib.c -o fib
./fib
OUTPUT
Enter the value of n: 7
Fibonacci of 7 = 13
RESULT
The Fibonacci number is successfully computed using OpenMP task parallelism,
demonstrating parallel execution of recursive calls.
6
Ex. No: 4 Write a Rust program to spawn threads and use channels for
Date : communication
AIM
To create multiple threads in Rust and enable communication between them using channels.
ALGORITHM
1. Start
2. Import required modules (thread, mpsc, time)
3. Create a channel using mpsc::channel() (transmitter tx, receiver rx)
4. Spawn multiple threads: Each thread sends a message through the transmitter
5. Drop the original transmitter to close the channel
6. In the main thread:
o Receive messages using the receiver
o Print each received message
7. End
PROGRAM (RUST)
use std::thread;
use std::sync::mpsc;
use std::time::Duration;
fn main() {
let (tx, rx) = mpsc::channel();
for i in 1..=5 {
let tx_clone = [Link]();
thread::spawn(move || {
let message = format!("Message from thread {}", i);
tx_clone.send(message).unwrap();
thread::sleep(Duration::from_millis(500));
});
}
// Drop the original transmitter
drop(tx);
// Receive messages
for received in rx {
println!("Received: {}", received);
}
7
}
OUTPUT
Received: Message from thread 1
Received: Message from thread 2
Received: Message from thread 3
Received: Message from thread 4
Received: Message from thread 5
RESULT
The program successfully demonstrates multi-threading in Rust and inter-thread
communication using channels, where multiple threads send messages safely to the main thread.
8
Ex. No: 5 Write a program using MPI to calculate the average of an array using
Date : collective communication
AIM
To calculate the average of an array using MPI collective communication functions.
ALGORITHM
1. Start
2. Initialize MPI environment
3. Get:
o Total number of processes (size)
o Rank of each process (rank)
4. In root process (rank 0):
o Initialize the array
5. Scatter the array elements to all processes using MPI_Scatter
6. Each process computes its local sum
7. Use MPI_Reduce to compute the global sum at root process
8. Root process calculates average: Average=Number of Elements / Total Sum
9. Display the result
10. Finalize MPI
11. End
PROGRAM (C WITH MPI)
#include <stdio.h>
#include <mpi.h>
int main(int argc, char *argv[]) {
int rank, size;
int data[8], local_data;
int local_sum = 0, total_sum = 0;
int n = 8; // total number of elements
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
// Initialize array in root process
if (rank == 0) {
printf("Array elements: ");
for (int i = 0; i < n; i++) {
data[i] = i + 1; // 1 to 8
9
printf("%d ", data[i]);
}
printf("\n");
}
// Scatter data (1 element per process)
MPI_Scatter(data, 1, MPI_INT, &local_data, 1, MPI_INT, 0, MPI_COMM_WORLD);
// Each process computes local sum
local_sum = local_data;
// Reduce to get total sum at root
MPI_Reduce(&local_sum, &total_sum, 1, MPI_INT, MPI_SUM, 0,MPI_COMM_WORLD);
// Root computes average
if (rank == 0) {
float avg = (float)total_sum / n;
printf("Total Sum = %d\n", total_sum);
printf("Average = %.2f\n", avg);
}
MPI_Finalize();
return 0;
}
OUTPUT
Array elements: 1 2 3 4 5 6 7 8
Total Sum = 36
Average = 4.50
RESULT
The average of the array is successfully computed using MPI collective communication
functions (MPI_Scatter and MPI_Reduce), demonstrating parallel data distribution and aggregation.
10
Ex. No: 6 Write a program using the threading module to demonstrate data race
Date : and synchronization
AIM
To demonstrate data race condition and its resolution using synchronization (Lock) in the
threading module.
ALGORITHM
Part A: Data Race (without synchronization)
1. Start
2. Import threading module
3. Initialize a shared variable counter = 0
4. Create a function to increment the counter multiple times
5. Spawn multiple threads executing the function
6. Wait for all threads to finish
7. Print the final counter value (incorrect due to race condition)
Part B: Synchronization (with Lock)
8. Create a Lock object
9. Modify the function:
o Acquire lock before updating counter
o Release lock after update
10. Repeat thread execution
11. Print final counter value (correct result)
12. Stop
PROGRAM (PYTHON)
import threading
counter = 0
lock = [Link]()
# Without synchronization (Data Race)
def increment_without_lock():
global counter
for _ in range(100000):
counter += 1
# With synchronization (Lock)
def increment_with_lock():
global counter
for _ in range(100000):
11
[Link]()
counter += 1
[Link]()
# ---- Data Race Demonstration ----
threads = []
counter = 0
for _ in range(5):
t = [Link](target=increment_without_lock)
[Link](t)
[Link]()
for t in threads:
[Link]()
print("Final Counter without Lock (Data Race):", counter)
# ---- Synchronization Demonstration ----
threads = []
counter = 0
for _ in range(5):
t = [Link](target=increment_with_lock)
[Link](t)
[Link]()
for t in threads:
[Link]()
print("Final Counter with Lock (Synchronized):", counter)
OUTPUT
Final Counter without Lock (Data Race): 327845 # (varies each run)
Final Counter with Lock (Synchronized): 500000
RESULT
Without synchronization, the shared variable produces incorrect results due to a data race
condition.
Using a lock mechanism, threads are synchronized, ensuring correct and predictable output.
12
Ex. No: 7
Write a program to perform matrix multiplication using OpenMP
Date :
AIM
To perform matrix multiplication using OpenMP parallel programming.
ALGORITHM
1. Start
2. Declare matrices A, B, and result matrix C
3. Input the order of matrices (rows and columns)
4. Initialize matrices A and B
5. Use OpenMP parallel directive:
o Parallelize outer loop using #pragma omp parallel for
6. For each element of result matrix C[i][j]:
o Initialize C[i][j] = 0
o Compute:
7. Print the resultant matrix C
8. Stop
PROGRAM (PYTHON)
#include <stdio.h>
#include <omp.h>
#define N 3
int main() {
int A[N][N], B[N][N], C[N][N];
int i, j, k;
// Initialize matrices
printf("Matrix A:\n");
for(i = 0; i < N; i++) {
for(j = 0; j < N; j++) {
A[i][j] = i + j;
printf("%d ", A[i][j]);
}
printf("\n");
}
printf("\nMatrix B:\n");
for(i = 0; i < N; i++) {
13
for(j = 0; j < N; j++) {
B[i][j] = i * j;
printf("%d ", B[i][j]);
}
printf("\n");
}
// Parallel matrix multiplication
#pragma omp parallel for private(i,j,k)
for(i = 0; i < N; i++) {
for(j = 0; j < N; j++) {
C[i][j] = 0;
for(k = 0; k < N; k++) {
C[i][j] += A[i][k] * B[k][j];
}
}
}
// Print result
printf("\nResultant Matrix C:\n");
for(i = 0; i < N; i++) {
for(j = 0; j < N; j++) {
printf("%d ", C[i][j]);
}
printf("\n");
}
return 0;
}
OUTPUT
Matrix A:
012
123
234
14
Matrix B:
000
012
024
Resultant Matrix C:
0 5 10
0 8 16
0 11 22
RESULT
Matrix multiplication is successfully performed using OpenMP parallelization, improving
performance by distributing computations across multiple threads.
15
Ex. No: 8
Write a Rust program to parallelize a for loop using the rayon crate
Date :
AIM
To parallelize a for loop in Rust using the Rayon crate for efficient data parallelism.
ALGORITHM
1. Start
2. Add the Rayon crate to the project dependencies
3. Import Rayon prelude (rayon::prelude::*)
4. Create a vector (or range of numbers)
5. Convert the iterator into a parallel iterator using .par_iter()
6. Perform the desired operation (e.g., square each element)
7. Collect or display the results
8. Stop
PROGRAM (PYTHON)
use rayon::prelude::*;
fn main() {
let numbers: Vec<i32> = (1..=10).collect();
// Parallel processing using Rayon
let squares: Vec<i32> = numbers
.par_iter()
.map(|x| x * x)
.collect();
println!("Original: {:?}", numbers);
println!("Squares: {:?}", squares);
}
OUTPUT
Original: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
Squares: [1, 4, 9, 16, 25, 36, 49, 64, 81, 100]
RESULT
The program successfully demonstrates parallel execution of a loop using Rayon, improving
performance by utilizing multiple CPU cores while maintaining simple and safe code.
16
Ex. No: 9
Write a program and profile its performance using ‘gprof’ or ‘perf’
Date :
AIM
To write a C program and analyze its performance using profiling tools like gprof or perf.
ALGORITHM
1. Start the program
2. Define a function that consumes time (e.g., loop or computation)
3. Call the function multiple times
4. Compile the program with profiling enabled
5. Run the program to generate profiling data
6. Use profiling tool (gprof or perf) to analyze performance
7. Display function execution time and call statistics
8. Stop
PROGRAM (C CODE)
#include <stdio.h>
// Function that consumes time
void slowFunction() {
long int i;
for(i = 0; i < 100000000; i++);
}
// Another function
void fastFunction() {
printf("Fast function executed\n");
}
int main() {
int i;
for(i = 0; i < 5; i++) {
slowFunction();
fastFunction();
}
return 0;
}
COMPILATION & EXECUTION (USING GPROF)
gcc -pg program.c -o program
./program
gprof program [Link] > [Link]
17
OUTPUT (PROGRAM RUN)
Fast function executed
Fast function executed
Fast function executed
Fast function executed
Fast function executed
PROFILING OUTPUT (GPROF)
Flat profile:
Each sample counts as 0.01 seconds.
% cumulative self self total
time seconds seconds calls ms/call ms/call name
90.00 0.90 0.90 5 180.00 180.00 slowFunction
10.00 1.00 0.10 5 20.00 20.00 fastFunction
RESULT
The program was successfully executed and profiled using gprof/perf.
The analysis shows that slowFunction() consumes most of the execution time, while fastFunction()
takes minimal time. This helps identify performance bottlenecks.
18
Ex. No : 10
MINI PROJECT – Parallel Merge Sort (OpenMP + MPI)
Date :
AIM
To implement parallel merge sort using OpenMP and MPI and compare complexity.
PROBLEM STATEMENT
Sorting a large array using sequential merge sort is time-consuming. Parallelizing the algorithm
using OpenMP and MPI can significantly reduce execution time by utilizing multiple processors.
PART A – OpenMP Parallel Merge Sort
ALGORITHM (OpenMP)
1. Start the program.
2. Read number of elements and input array.
3. Divide array recursively into subarrays.
4. Use OpenMP parallel sections for recursive calls.
5. Sort subarrays in parallel.
6. Merge sorted subarrays.
7. Display sorted array and stop.
PROGRAM – OpenMP Merge Sort (C)
#include <stdio.h>
#include <omp.h>
void merge(int a[], int l, int m, int r) {
int i = l, j = m + 1, k = 0;
int temp[r - l + 1];
while (i <= m && j <= r) {
if (a[i] < a[j])
temp [k++] = a[i++];
else
temp[k++] = a[j++];
}
while (i <= m)
temp[k++] = a[i++];
while (j <= r)
temp[k++] = a[j++];
19
for (i = l, k = 0; i <= r; i++, k++)
a[i] = temp[k];
}
void mergeSort(int a[], int l, int r) {
if (l < r) {
int m = (l + r) / 2;
#pragma omp parallel sections
{
#pragma omp section
mergeSort(a, l, m);
#pragma omp section
mergeSort(a, m + 1, r);
}
merge(a, l, m, r);
}
}
int main() {
int n, a[100];
printf("Enter number of elements: ");
scanf("%d", &n);
printf("Enter elements:\n");
for (int i = 0; i < n; i++)
scanf("%d", &a[i]);
#pragma omp parallel
{
#pragma omp single
mergeSort(a, 0, n - 1);
}
printf("Sorted array:\n");
for (int i = 0; i < n; i++)
printf("%d ", a[i]);
return 0;
}
20
COMPILATION & EXECUTION
gcc -fopenmp merge_openmp.c
./[Link]
OUTPUT
Sorted array:
2 5 7 9 12 18
RESULT
Merge sort is successfully parallelized using OpenMP.
21
PART B – MPI Parallel Merge Sort
ALGORITHM ( MPI)
1. Initialize MPI environment.
2. Read input array in root process.
3. Divide array among processes using MPI_Scatter.
4. Each process sorts its local array.
5. Gather sorted subarrays using MPI_Gather.
6. Root process merges sorted subarrays.
7. Display sorted array and finalize MPI.
PROGRAM – MPI Merge Sort (C)
#include <stdio.h>
#include <mpi.h>
#include<stdlib.>
void merge(int a[], int l, int m, int r) {
int i = l, j = m + 1, k = 0;
int temp[r - l + 1];
while (i <= m && j <= r) {
if (a[i]< a[j])
temp[k++] = a[i++]; else
temp[k++] = a[j++];
}
while (i <= m) temp[k++] = a[i++];
while (j <= r) temp[k++] = a[j++];
for (i = l, k = 0; i <= r; i++, k++)
a[i] = temp[k];
}
void mergeSort(int a[], int l, int r) {
if (l < r) {
int m = (l + r) / 2;
mergeSort(a, l, m);
mergeSort(a,+ 1, r);
merge(a, l, m, r);
22
}
}
int main(int argc, char* argv[]) {
int rank, size, n = 8;
int arr[8] = {9, 3, 7, 5, 6, 4, 8, 2};
int local_n;
int local_arr[4];
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
local_n = n / size;
MPI_Scatter(arr, local_n, MPI_INT, local_arr, local_n,
MPI_INT, 0, MPI_COMM_WORLD);
mergeSort(local_arr, 0, local_n - 1);
MPI_Gather(local_arr, local_n, MPI_INT,arr, local_n, MPI_INT,
0, MPI_COMM_WORLD);
if (rank == 0) {
mergeSort(arr, 0, n - 1);
printf("Sorted array:\n");
for (int i = 0; i < n; i++)
printf("%d ", arr[i]);
}
MPI_Finalize();
return 0;
}
COMPILATION & EXECUTION
mpicc merge_mpi.c
mpirun -np 2 ./[Link]
23
OUTPUT
Sorted array:
23456789
RESULT
MPI-based merge sort successfully distributes work across processes and sorts the
array.
24