✅ 1.
Parse text File
✅ 2. Parse CSV File
✅ 3. Parse JSON File
✅ 4. Parse HTML File (using BeautifulSoup)
📦 You need to install BeautifulSoup: pip install beautifulsoup4
from bs4 import BeautifulSoup
# Open and read the HTML file
with open('E:/[Link]', 'r') as file:
html_content = [Link]()
# Parse the HTML using BeautifulSoup
soup = BeautifulSoup(html_content, '[Link]')
# Print the title of the page
print("Title:", [Link])
# Print the text inside the h1 tag
print("Heading:", [Link])
✅ 4. Parse XML File (using ElementTree)
import [Link] as ET
# Parse the XML file
tree = [Link]('E:/[Link]')
root = [Link]()
# Print the root tag
print("Root tag:", [Link])
# Print each child element and its text
for i in root:
print([Link] + ":", [Link])
✅ Notes:
ElementTree is part of Python’s standard library — no need to install anything.
from numpy import *
# 1. Create arrays
a1 =array([1, 2, 3, 4, 5]) # 1D array
a2 = arange(6, 16).reshape(2, 5) # 2D array with shape (2, 5)
print("Array1:\n", a1)
print("Array2:\n", a2)
# 2. Reshape array
reshaped = [Link](5, 2)
print("\nReshaped Array2 to (5,2):\n", reshaped)
# 3. Slice arrays
print("\n Slice first 3 elements of Array1:", a1[:3])
print("Slice second row of Array2:", a2[1])
# 4. Add indexes
a1[2] = 99 # change third element
print("\n Modified Array1 (index 2 changed to 99):", a1)
# 5. Apply arithmetic
x = a1 + 10
print("\nArray1 + 10:\n", x)
y = a1 - 5
print("\nArray1 - 5:\n", y)
z = a1 * 10
print("\nArray1 * 10:\n", z)
p = a1 / 10
print("\nArray1 / 10:\n", p)
# 6. Apply logic
l = a1 > 3
print("\nArray1 > 3:\n", l)
# 7. Aggregation
print("\nSum of Array2:", sum(a2))
print("Mean of Array2:", mean(a2))
print("Max of Array2:", max(a2))
2. Program for reading and writing binary file.
try:
f = open('E:/Data engineering/[Link]', 'rb')
f1 = open('E:/Data engineering/[Link]', 'wb')
for i in f:
[Link](i)
print("Image copied successfully.")
except FileNotFoundError:
print("Error: Source image not found.")
except IOError as e:
print("I/O error occurred:", e)
finally:
# Make sure files are closed properly
try:
[Link]()
[Link]()
except NameError:
# If f or f1 was never successfully opened
pass
import pandas as pd
# Single-level indexing
df_single = [Link]({
'Name': ['Anusha', 'Keerthana', 'Ajay'],
'Age': [25, 30, 35]
})
print("Single-level Indexing:")
print(df_single)
# Hierarchical (Multi-level) indexing
data = [Link](
[100, 200, 300, 400],
index=[['A', 'A', 'B', 'B'], ['Math', 'Science', 'Math', 'Science']]
)
print("\nHierarchical Indexing:")
print(data)
Program for handling missing data
import pandas as pd
import numpy as np
data = {
'Name': ['Lily', 'Khushi', 'Mohan'],
'Age': [25, [Link], 35],
'Score': [90, 85, [Link]]
}
df = [Link](data)
print("Original DataFrame with NaNs:")
print(df)
# Fill missing values
df_filled = [Link](0)
print("\nFilled Missing Values with 0:")
print(df_filled)
# Drop rows with any NaN
df_dropped = [Link]()
print("\nDropped Rows with Any NaN:")
print(df_dropped)
import pandas as pd
df = [Link]({
'A': [1, 2, 3],
'B': [4, 5, 6]
})
print("Original DataFrame:")
print(df)
# Arithmetic operation: Add a column
df['C'] = df['A'] + df['B']
print("\nAfter Arithmetic Operation (A + B):")
print(df)
# Boolean operation: Filter rows
filtered = df[df['C'] > 6]
print("\nFiltered Rows (C > 6):")
print(filtered)
import pandas as pd
# Merging
df1 = [Link]({'ID': [1, 2], 'Name': ['Khushi', 'Reshama']})
df2 = [Link]({'ID': [1, 2], 'Score': [90, 85]})
merged = [Link](df1, df2, on='ID')
print("Merged DataFrame:")
print(merged)
# Aggregation
df = [Link]({
'Department': ['HR', 'HR', 'IT', 'IT'],
'Salary': [50000, 55000, 60000, 65000]
})
agg = [Link]('Department').mean()
print("\nAggregated Average Salary by Department:")
print(agg)
7. Plot Individual Columns and Entire Table.
import pandas as pd
import [Link] as plt
# Load or create a sample DataFrame
data = {
'Year': [2018, 2019, 2020, 2021, 2022],
'Sales': [100, 150, 130, 170, 200],
'Profit': [20, 35, 30, 50, 60]
}
df = [Link](data)
# Plot individual columns
[Link](figsize=(8, 5))
[Link](df['Year'], df['Sales'], marker='o', label='Sales')
[Link]('Sales Over Years')
[Link]('Year')
[Link]('Sales')
[Link](True)
[Link]()
[Link]()
# Plot another individual column
[Link](figsize=(8, 5))
[Link](df['Year'], df['Profit'], color='green', marker='s', label='Profit')
[Link]('Profit Over Years')
[Link]('Year')
[Link]('Profit')
[Link](True)
[Link]()
[Link]()
# Plot the whole table (all numeric columns vs Year)
[Link](figsize=(8, 5))
for column in [Link]:
if column != 'Year':
[Link](df['Year'], df[column], marker='o', label=column)
[Link]('Sales & Profit Over Years')
[Link]('Year')
[Link]('Value')
[Link]()
[Link](True)
[Link]()
Reading data from file and writing data in file
#writing data in the file
try:
f = open("E:/Data engineering/[Link]", 'w')
[Link]("Laptop")
[Link]("Mobile")
print("Data written successfully.")
except IOError as e:
print("An I/O error occurred:", e)
finally:
try:
[Link]()
except NameError:
# If 'f' was not successfully created
print("File was not opened properly.")
#appending data in the file
#reading data from file
try:
f = open("E:/Data engineering/[Link]", 'r')
print([Link](), end="")
print([Link]())
except FileNotFoundError:
print("Error: The file '[Link]' was not found.")
except IOError as e:
print("An I/O error occurred:", e)
finally:
try:
[Link]()
except NameError:
print("File could not be opened.")