RAJARAJESWARI COLLEGE OF ENGINEERING
MYSORE ROAD, BANGALORE-560074
(An ISO 9001:2008 Certified Institute)
(2024-25)
DEPARTMENT OF COMPUTER SCIENCE & ENGINEERING
[IoT, Cybersecurity including Blockchain Technology]
Generative AI Lab Manual
Prepared By
Ghouse Pasha
Assistant Professor
Dept of CSE(IC), RRCE
Lab1: Explore pre-trained word vectors. Explore word relationships using
vector arithmetic. Perform arithmetic operations and analyze results.
Code:
import [Link] as api
from [Link] import KeyedVectors
from [Link] import load
print("Loading the model, please wait...")
model = load('glove-wiki-gigaword-50')
print("Model loaded successfully!")
word_vector = model['king']
print(f"\nVector for 'king':\n{word_vector}")
result = model.most_similar(positive=['king', 'woman'], negative=['man'], topn=1)
print(f"\n'king' - 'man' + 'woman' ≈ {result[0][0]} with similarity score {result[0][1]:.2f}")
similarity = [Link]('king', 'queen')
print(f"\nSimilarity between 'king' and 'queen': {similarity:.2f}")
odd_one = model.doesnt_match(['breakfast', 'lunch', 'dinner', 'car'])
print(f"\nOdd one out: {odd_one}")
Output:
Lab 2: Use dimensionality reduction (e.g., PCA or t-SNE) to visualize word
embeddings for Q1. Select 10 words from a specific domain (e.g., sports,
technology) and visualize their embeddings. Analyze clusters and
relationships. Generate contextually rich outputs using embeddings. Write
a program to generate 5 semantically similar words for a given input.
Code:
import [Link] as api
import numpy as np
import [Link] as plt
from [Link] import PCA
from [Link] import TSNE
word_vectors = [Link]("word2vec-google-news-300")
words = ["computer", "laptop", "AI", "machine", "robot", "software", "hardware",
"algorithm", "network", "cybersecurity"]
vectors = [Link]([word_vectors[word] for word in words])
def plot_embeddings(vectors, words, method="PCA"):
if method == "PCA":
reduced = PCA(n_components=2).fit_transform(vectors)
else:
reduced = TSNE(n_components=2, perplexity=5,
random_state=42).fit_transform(vectors)
[Link](figsize=(8, 6))
[Link](reduced[:, 0], reduced[:, 1])
for i, word in enumerate(words):
[Link](word, (reduced[i, 0], reduced[i, 1]), fontsize=12)
[Link](f"Word Embedding Visualization using {method}")
[Link]()
plot_embeddings(vectors, words, method="PCA")
plot_embeddings(vectors, words, method="t-SNE")
Output:
Google colab
Lab 3: Train a custom Word2Vec model on a small dataset. Train
embeddings on a domain-specific corpus (e.g., legal, medical) and analyze
how embeddings capture domain-specific semantics.
Code:
!pip install gensim matplotlib scikit-learn
import gensim
import [Link]
import numpy as np
import [Link] as plt
from [Link] import TSNE
from [Link] import Word2Vec
medical_sentences = [
['patient', 'diagnosed', 'cancer', 'treatment', 'chemotherapy'],
['doctor', 'prescribes', 'medication', 'therapy', 'recovery'],
['hospital', 'surgery', 'nurse', 'care', 'treatment'],
['virus', 'infection', 'vaccine', 'immune', 'system'],
['diabetes', 'insulin', 'blood', 'sugar', 'health'],
['heart', 'disease', 'cardiac', 'attack', 'stroke'],
['brain', 'neuroscience', 'mental', 'health', 'psychology'],
['radiology', 'MRI', 'X-ray', 'diagnosis', 'scan'],
['nutrition', 'diet', 'exercise', 'wellness', 'fitness'],
['epidemic', 'pandemic', 'COVID', 'quarantine', 'vaccine']
]
model = Word2Vec(sentences=medical_sentences, vector_size=100, window=3,
min_count=1, workers=4)
similar_words = [Link].most_similar('treatment', topn=5)
print("\nTop 5 words similar to 'treatment':")
for word, score in similar_words:
print(f"{word}: {score:.4f}")
words = list([Link].index_to_key) # Get vocabulary
word_vectors = [Link]([[Link][word] for word in words])
tsne = TSNE(n_components=2, random_state=0, perplexity=3)
word_vectors_2d = tsne.fit_transform(word_vectors)
[Link](figsize=(8, 6))
for i, word in enumerate(words):
[Link](word_vectors_2d[i, 0], word_vectors_2d[i, 1])
[Link](word_vectors_2d[i, 0] + 0.05, word_vectors_2d[i, 1] + 0.05, word, fontsize=12)
[Link]("t-SNE Visualization of Custom Medical Word Embeddings")
[Link]()
Output:
Lab 4: Use word embeddings to improve prompts for Generative AI
model. Retrieve similar words using word embeddings. Use the similar
words to enrich a GenAI prompt. Use the AI model to generate
responses for the original and enriched prompts. Compare the outputs
in terms of detail and relevance.
Code:
!pip install sentence-transformers
from sentence_transformers import SentenceTransformer, util
import torch
model = SentenceTransformer('all-MiniLM-L6-v2')
def get_similar_words(word, top_k=5):
"""
Finds similar words using word embeddings.
Args:
word: The word to find similar words for.
top_k: The number of similar words to return.
Returns:
A list of similar words.
"""
embeddings = [Link]([word], convert_to_tensor=True)
cosine_scores = util.pytorch_cos_sim(embeddings, [Link](['dog', 'cat',
'animal', 'pet', 'mammal', 'food'], convert_to_tensor=True))
top_results = [Link](cosine_scores[0], k=top_k)
similar_words = []
for score, idx in zip(top_results[0], top_results[1]):
similar_words.append(['dog', 'cat', 'animal', 'pet', 'mammal', 'food'][[Link]()])
return similar_words
def enrich_prompt(prompt):
"""
Enriches a prompt with similar words.
Args:
prompt: The original prompt.
Returns:
The enriched prompt.
"""
words = [Link]()
enriched_prompt = ""
for word in words:
similar_words = get_similar_words(word)
enriched_prompt += word + " (" + ", ".join(similar_words) + ") "
return enriched_prompt
original_prompt = "Describe the characteristics of a dog."
enriched_prompt = enrich_prompt(original_prompt)
def generate_response(prompt):
"""
Generates a response from a GenAI model.
Args:
prompt: The prompt to use.
Returns:
The generated response.
"""
response = f"Response for prompt: {prompt}"
return response
original_response = generate_response(original_prompt)
enriched_response = generate_response(enriched_prompt)
print(f"Original Prompt: {original_prompt}")
print(f"Original Response: {original_response}")
print(f"Enriched Prompt: {enriched_prompt}")
print(f"Enriched Response: {enriched_response}")
Output:
Lab 5: Use word embeddings to create meaningful sentences for creative
tasks. Retrieve similar words for a seed word. Create a sentence or story
using these words as a starting point. Write a program that: Takes a seed
word. Generates similar words. Constructs a short paragraph using these
words.
Code:
from sentence_transformers import SentenceTransformer, util
import torch
model = SentenceTransformer('all-MiniLM-L6-v2')
def get_similar_words(word, top_k=5):
"""
Finds similar words using word embeddings.
Args:
word: The word to find similar words for.
top_k: The number of similar words to return.
Returns:
A list of similar words.
"""
embeddings = [Link]([word], convert_to_tensor=True)
cosine_scores = util.pytorch_cos_sim(embeddings, [Link](['dog', 'cat', 'animal', 'pet',
'mammal', 'food', 'happy', 'sad', 'excited', 'angry'], convert_to_tensor=True))
top_results = [Link](cosine_scores[0], k=top_k)
similar_words = []
for score, idx in zip(top_results[0], top_results[1]):
similar_words.append(['dog', 'cat', 'animal', 'pet', 'mammal', 'food', 'happy', 'sad', 'excited',
'angry'][[Link]()])
return similar_words
def create_sentence(seed_word):
"""
Creates a short paragraph using similar words.
Args:
seed_word: The seed word to start with.
Returns:
A short paragraph.
"""
similar_words = get_similar_words(seed_word)
sentence = f"The {seed_word} was {similar_words[0]}, and it made me feel
{similar_words[1]}. I wondered if it was like a {similar_words[2]}, or maybe more like a
{similar_words[3]}."
return sentence
seed_word = "sunrise"
paragraph = create_sentence(seed_word)
paragraph
Output:
Lab 6: Use a pre-trained Hugging Face model to analyze sentiment in text.
Assume a real-world application, Load the sentiment analysis pipeline.
Analyze the sentiment by giving sentences to input.
Code:
from transformers import pipeline
sentiment_pipeline = pipeline("sentiment-analysis")
def analyze_sentiment(text):
result = sentiment_pipeline(text)[0] # Get the first result
label = result["label"]
confidence = result["score"]
return f"Sentiment: {label} (Confidence: {confidence:.2f})"
texts = [
"I love this product! It's amazing.",
"This is the worst experience I've ever had.",
"The movie was okay, but nothing special.",
"I'm extremely happy with my new laptop!",
"This service is so frustrating and disappointing."
for text in texts:
print(f"Text: {text}")
print(analyze_sentiment(text))
print("-" * 50)
Output:
Lab 7: Summarize long texts using a pre-trained summarization model
using Hugging face model. Load the summarization pipeline. Take a
passage as input and obtain the summarized text.
Code:
from transformers import pipeline
summarizer = pipeline("summarization")
def summarize_text(text, max_length=130, min_length=30):
"""
Summarizes a long text using a pre-trained summarization model.
Args:
text: The text to summarize.
max_length: The maximum length of the summary.
min_length: The minimum length of the summary.
Returns:
The summarized text.
"""
summary = summarizer(text, max_length=max_length, min_length=min_length,
do_sample=False)[0]['summary_text']
return summary
passage = """
The Gemini API gives you access to Gemini models created by Google DeepMind. Gemini
models are built from the ground up to be multimodal, so you can reason seamlessly across
text, images, code, and audio.
"""
summary = summarize_text(passage)
summary
Output:
Lab 9: Take the Institution name as input. Use Pydantic to define the schema
for the desired output and create a custom output parser. Invoke the Chain
and Fetch Results. Extract the below Institution related details from
Wikipedia: The founder of the Institution. When it was founded. The current
branches in the institution . How many employees are working in it. A brief
4-line summary of the institution.
Code:
!pip install wikipedia
!pip install pydantic
import wikipedia
from pydantic import BaseModel, Field
from typing import List, Optional
class InstitutionDetails(BaseModel):
"""
Pydantic schema for institution details.
"""
founder: Optional[str] = Field(None, description="Founder of the institution")
founded: Optional[int] = Field(None, description="Year of founding")
branches: Optional[List[str]] = Field(None, description="Current branches of
the institution")
num_employees: Optional[int] = Field(None, description="Number of
employees")
summary: Optional[str] = Field(None, description="A brief summary of the
institution")
def parse_wikipedia_page(page_content: str) -> InstitutionDetails:
"""
Parses the Wikipedia page content to extract the relevant details.
Args:
page_content (str): The content of the Wikipedia page.
Returns:
InstitutionDetails: Parsed institution details.
"""
details = InstitutionDetails()
try:
# Basic parsing - replace with more robust methods for production
[Link] = "\n".join([Link](page_content,
sentences=4).split('\n')[:4]) #Extract first 4 lines of summary
#Further parsing would require more advanced NLP techniques like NER or
dependency parsing,
#as simply searching for keywords is unreliable.
except Exception as e:
print(f"Error parsing Wikipedia page: {e}")
return details
if __name__ == "__main__":
institution_name = input("Enter the institution name: ")
try:
page = [Link](institution_name)
page_content = [Link]
details = parse_wikipedia_page(page_content)
print(details.model_dump_json(indent=2)) #Use .model_dump_json for
proper output
except [Link]:
print(f"Wikipedia page not found for '{institution_name}'")
except [Link] as e:
print(f"Disambiguation error: {[Link]}")
except Exception as e:
print(f"An unexpected error occurred: {e}")
Output:
Enter the institution name: Harvard Law School
Error parsing Wikipedia page: Expecting value: line 1 column 1 (char 0)
"founder": null,
"founded": null,
"branches": null,
"num_employees": null,
"summary": null
}
Lab 10: Build a chatbot for the Indian Penal Code. We'll start by
downloading the official Indian Penal Code document, and then we'll create
a chatbot that can interact with it. Users will be able to ask questions about
the Indian Penal Code and have a conversation with it.
Code:
import wikipedia
from pydantic import BaseModel, Field
from typing import List, Optional
import re
!pip install wikipedia
!pip install pydantic
class IPCSection(BaseModel):
"""
Pydantic schema for an IPC section.
"""
section_number: str = Field(..., description="Section number of the IPC")
description: Optional[str] = Field(None, description="Description of the
section")
punishment: Optional[str] = Field(None, description="Punishment prescribed
for the offence")
def parse_ipc_section(section_text: str) -> IPCSection:
"""
Parses the text of an IPC section to extract relevant details.
"""
section = IPCSection(section_number=section_text.split(". ")[0])
# Use regular expressions to extract description and punishment
description_match = [Link](r"(?<=Whoever).*(?=\s*Shall be punished)",
section_text, [Link])
punishment_match = [Link](r"(?<=Shall be punished).*(?=\.)",
section_text, [Link])
[Link] = description_match.group(0).strip() if description_match
else "Description not found"
[Link] = punishment_match.group(0).strip() if
punishment_match else "Punishment not found"
return section
def search_ipc(query: str) -> List[IPCSection]:
"""
Searches the IPC for a given query.
"""
try:
page = [Link]("Indian Penal Code")
content = [Link]
# Use regular expressions to find sections matching the query
sections = []
matches = [Link](rf"{query}.*?(?=\n\d+\.)", content, [Link])
for match in matches:
[Link](parse_ipc_section([Link]()))
return sections
except [Link]:
print(f"Wikipedia page not found for 'Indian Penal Code'")
return []
except Exception as e:
print(f"An unexpected error occurred: {e}")
return []
if __name__ == "__main__":
while True:
user_query = input("Ask a question about the Indian Penal Code (or type
'exit'): ")
if user_query.lower() == 'exit':
break
results = search_ipc(user_query)
if results:
for section in results:
print(section.model_dump_json(indent=2))
else:
print("No matching sections found.")
Output:
Ask a question about the Indian Penal Code (or type 'exit'): ospds
Wikipedia page not found for 'Indian Penal Code'
No matching sections found.
Ask a question about the Indian Penal Code (or type 'exit'): exit