7) Implement ARIMA on time series data
PROGRAM:
import numpy as np
import pandas as pd
import [Link] as plt
from [Link] import ARIMA
from [Link] import plot_acf, plot_pacf
# Generate synthetic time series data
[Link](42)
n = 100
time = [Link](n)
data = 10 + 0.5 * time + [Link](size=n) # Linear trend with noise
# Create a DataFrame
ts_data = [Link](data, columns=['Value'])
ts_data['Time'] = pd.date_range(start='2020-01-01', periods=n, freq='D')
ts_data.set_index('Time', inplace=True)
# Plot the time series
[Link](figsize=(10, 6))
[Link](ts_data, label='Time Series Data')
[Link]('Synthetic Time Series Data')
[Link]('Date')
[Link]('Value')
[Link]()
[Link]()
# Check for stationarity (optional, can use Augmented Dickey-Fuller test)
from [Link] import adfuller
result = adfuller(ts_data['Value'])
print(f'ADF Statistic: {result[0]}')
print(f'p-value: {result[1]}')
# Plot ACF and PACF
plot_acf(ts_data['Value'], lags=20)
[Link]('Autocorrelation Function (ACF)')
[Link]()
plot_pacf(ts_data['Value'], lags=20)
[Link]('Partial Autocorrelation Function (PACF)')
[Link]()
# Fit the ARIMA model
# For this example, let's assume we choose parameters (p=1, d=1, q=1)
model = ARIMA(ts_data['Value'], order=(1, 1, 1))
model_fit = [Link]()
# Print the summary of the model
print(model_fit.summary())
# Make forecasts
forecast = model_fit.forecast(steps=10)
forecast_index = pd.date_range(start=ts_data.index[-1] + [Link](days=1), periods=10,
freq='D')
# Plot the original data and the forecast
[Link](figsize=(10, 6))
[Link](ts_data, label='Historical Data', color='blue')
[Link](forecast_index, forecast, label='Forecast', color='orange')
[Link]('ARIMA Forecast')
[Link]('Date')
[Link]('Value')
[Link]()
[Link]()
Output:
ADF Statistic: 0.2480785554660348
p-value: 0.9748328625999818
8) Perform visualization techniques (types of maps -Bar, Column, Scatter,
3D, Cubes etc) in python
Bar Chart
Column Chart (Vertical Bar Chart)
Scatter Plot
3D Cube Plot (using mpl_toolkits.mplot3d)
PROGRAM:
import numpy as np
import [Link] as plt
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D
# Sample Data
[Link](0)
categories = ['A', 'B', 'C', 'D', 'E']
values = [Link](1, 10, size=len(categories))
x = [Link](100)
y = [Link](100)
z = [Link](100) * 100 # for 3D plot
# 1. Bar Chart
[Link](figsize=(12, 8))
[Link](2, 2, 1)
[Link](categories, values, color='skyblue')
[Link]('Bar Chart')
[Link]('Categories')
[Link]('Values')
# 2. Column Chart (Vertical Bar Chart)
[Link](2, 2, 2)
[Link](categories, values, color='salmon')
[Link]('Column Chart')
[Link]('Values')
[Link]('Categories')
# 3. Scatter Plot
[Link](2, 2, 3)
[Link](x, y, color='green', alpha=0.6)
[Link]('Scatter Plot')
[Link]('X-axis')
[Link]('Y-axis')
# 4. 3D Cube Plot
ax = [Link](2, 2, 4, projection='3d')
[Link](x, y, z, color='purple')
ax.set_title('3D Cube Plot')
ax.set_xlabel('X-axis')
ax.set_ylabel('Y-axis')
ax.set_zlabel('Z-axis')
plt.tight_layout()
[Link]()
Output:
9) Perform descriptive analytics on healthcare data
PROGRAM:
import pandas as pd
import numpy as np
import [Link] as plt
import seaborn as sns
# Create a synthetic healthcare dataset
data = {
'PatientID': range(1, 101),
'Age': [Link](20, 80, size=100),
'Gender': [Link](['Male', 'Female'], size=100),
'BMI': [Link](18.5, 40.0, size=100), # Body Mass Index
'BloodPressure': [Link](90, 180, size=100),
'Cholesterol': [Link](['Normal', 'High'], size=100),
'Diabetes': [Link](['Yes', 'No'], size=100)
df = [Link](data)
# 1. Summary Statistics
print("Summary Statistics:")
print([Link](include='all'))
# 2. Count of Gender
gender_count = df['Gender'].value_counts()
print("\nGender Distribution:")
print(gender_count)
# 3. Age Distribution
[Link](figsize=(12, 6))
[Link](df['Age'], bins=10, kde=True)
[Link]('Age Distribution')
[Link]('Age')
[Link]('Frequency')
[Link]()
# 4. BMI Boxplot
[Link](figsize=(12, 6))
[Link](x='Gender', y='BMI', data=df)
[Link]('BMI by Gender')
[Link]('Gender')
[Link]('BMI')
[Link]()
# 5. Blood Pressure vs. BMI
[Link](figsize=(12, 6))
[Link](x='BMI', y='BloodPressure', hue='Diabetes', style='Cholesterol', data=df)
[Link]('Blood Pressure vs. BMI')
[Link]('BMI')
[Link]('Blood Pressure')
[Link](title='Diabetes/Cholesterol')
[Link]()
# 6. Correlation Matrix
correlation_matrix = df[['Age', 'BMI', 'BloodPressure']].corr()
[Link](figsize=(8, 6))
[Link](correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
[Link]('Correlation Matrix')
[Link]()
Output:
Summary Statistics:
PatientID Age Gender ... BloodPressure Cholesterol Diabetes
count 100.000000 100.000000 100 ... 100.000000 100 100
unique NaN NaN 2 ... NaN 2 2
top NaN NaN Female ... NaN Normal Yes
freq NaN NaN 53 ... NaN 51 63
mean 50.500000 45.500000 NaN ... 135.530000 NaN NaN
std 29.011492 17.584141 NaN ... 25.007253 NaN NaN
min 1.000000 20.000000 NaN ... 91.000000 NaN NaN
25% 25.750000 28.750000 NaN ... 118.000000 NaN NaN
50% 50.500000 46.000000 NaN ... 136.000000 NaN NaN
75% 75.250000 60.250000 NaN ... 158.250000 NaN NaN
max 100.000000 79.000000 NaN ... 178.000000 NaN NaN
[11 rows x 7 columns]
Gender Distribution:
Gender
Female 53
Male 47
Name: count, dtype: int64
10) Perform predictive analysis on product sales data
PROGRAM:
import pandas as pd
import numpy as np
import [Link] as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error, r2_score
# Create a synthetic product sales dataset
[Link](0)
months = pd.date_range(start='2020-01-01', periods=24, freq='M')
sales = [Link](1000, 5000, size=24) + [Link](0, 1000, 24) # Increasing
trend
data = [Link]({'Month': months, 'Sales': sales})
# 1. Visualize the Sales Data
[Link](figsize=(10, 6))
[Link](data['Month'], data['Sales'], marker='o')
[Link]('Monthly Product Sales')
[Link]('Month')
[Link]('Sales')
[Link](rotation=45)
[Link]()
[Link]()
# 2. Prepare the data for predictive analysis
data['Month_ordinal'] = data['Month'].map([Link]) # Convert dates to
ordinal
X = data[['Month_ordinal']] # Feature
y = data['Sales'] # Target
# 3. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# 4. Create and train the linear regression model
model = LinearRegression()
[Link](X_train, y_train)
# 5. Make predictions
y_pred = [Link](X_test)
# 6. Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')
print(f'R-squared: {r2:.2f}')
# 7. Visualize the predictions
[Link](figsize=(10, 6))
[Link](data['Month'], data['Sales'], marker='o', label='Actual Sales')
[Link](X_test['Month_ordinal'].map([Link]), y_pred, color='red',
label='Predicted Sales')
[Link]('Sales Prediction')
[Link]('Month')
[Link]('Sales')
[Link](rotation=45)
[Link]()
[Link]()
[Link]()
# 8. Forecast future sales (next 6 months)
future_months = pd.date_range(start=data['Month'].iloc[-1] + [Link](months=1),
periods=6, freq='M')
future_months_ordinal = future_months.map([Link]).[Link](-1, 1)
future_sales = [Link](future_months_ordinal)
# 9. Visualize future predictions
[Link](figsize=(10, 6))
[Link](data['Month'], data['Sales'], marker='o', label='Actual Sales')
[Link](future_months, future_sales, marker='o', color='orange', label='Forecasted Sales')
[Link]('Sales Prediction and Forecast')
[Link]('Month')
[Link]('Sales')
[Link](rotation=45)
[Link]()
[Link]()
[Link]()
Output:
months = pd.date_range(start='2020-01-01', periods=24, freq='M')
Mean Squared Error: 1276134.25
R-squared: 0.23
11) Apply predective analytics for weather forecasting
PROGRAM:
import pandas as pd
import numpy as np
import [Link] as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error, r2_score
# Create a synthetic weather dataset
[Link](0)
days = pd.date_range(start='2020-01-01', periods=365, freq='D')
temperature = [Link](15, 30, size=365) # Random temperatures
humidity = [Link](30, 90, size=365) # Random humidity
pressure = [Link](980, 1030, size=365) # Random atmospheric pressure
data = [Link]({'Date': days, 'Temperature': temperature, 'Humidity': humidity,
'Pressure': pressure})
# 1. Visualize the Temperature Data
[Link](figsize=(10, 6))
[Link](data['Date'], data['Temperature'], label='Temperature', color='orange')
[Link]('Daily Temperature')
[Link]('Date')
[Link]('Temperature (°C)')
[Link](rotation=45)
[Link]()
[Link]()
# 2. Prepare the data for predictive analysis
X = data[['Humidity', 'Pressure']] # Features
y = data['Temperature'] # Target
# 3. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
# 4. Create and train the linear regression model
model = LinearRegression()
[Link](X_train, y_train)
# 5. Make predictions
y_pred = [Link](X_test)
# 6. Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')
print(f'R-squared: {r2:.2f}')
# 7. Visualize the predictions vs actual temperatures
[Link](figsize=(10, 6))
[Link](y_test, y_pred, color='blue')
[Link]([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--')
[Link]('Actual vs Predicted Temperatures')
[Link]('Actual Temperature (°C)')
[Link]('Predicted Temperature (°C)')
[Link]()
[Link]()
# 8. Forecasting future temperatures (for the next 7 days)
future_days = pd.date_range(start=data['Date'].iloc[-1] + [Link](days=1), periods=7,
freq='D')
future_humidity = [Link](30, 90, size=7) # Random future humidity
future_pressure = [Link](980, 1030, size=7) # Random future pressure
future_X = [Link]({'Humidity': future_humidity, 'Pressure': future_pressure})
# Make future predictions
future_temperatures = [Link](future_X)
# 9. Visualize future predictions
[Link](figsize=(10, 6))
[Link](data['Date'], data['Temperature'], label='Historical Temperature', color='orange')
[Link](future_days, future_temperatures, marker='o', label='Forecasted Temperature',
color='blue')
[Link]('Temperature Forecasting')
[Link]('Date')
[Link]('Temperature (°C)')
[Link](rotation=45)
[Link]()
[Link]()
[Link] ()
Output:
Mean Squared Error: 20.41
R-squared: -0.00