0% found this document useful (0 votes)
2 views8 pages

PCA and Factor Analysis in R

Uploaded by

mirapalanani4
Copyright
© All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
2 views8 pages

PCA and Factor Analysis in R

Uploaded by

mirapalanani4
Copyright
© All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd

Practical 10

Principal component analysis using R

data("iris")
> str(iris)
'[Link]': 150 obs. of 5 variables:
$ [Link]: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
$ [Link] : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
$ [Link]: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
$ [Link] : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
$ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1
1 1 1 1 1 1 ...
> #partition of data
> [Link](111)
> ind <- sample(2, nrow(iris),
+ replace = TRUE,
+ prob = c(0.8, 0.2))
> training <- iris[ind==1,];
> testing <- iris[ind==2,];
> #Scatter Plot & Correlations
> [Link]("psych")
> library(psych)
> [Link](training[,-5],
+ gap = 0,
+ bg = c("red", "yellow", "blue")[training$Species],
+ pch=21)
> #Principal Component Analysis
> pc <- prcomp(training[,-5],
+ center = TRUE,
+ scale. = TRUE)
> attributes(pc)
$names
[1] "sdev" "rotation" "center" "scale"
[5] "x"

$class
[1] "prcomp"

> print(pc)
Standard deviations (1, .., p=4):
[1] 1.7173318 0.9403519 0.3843232 0.1371332

Rotation (n x k) = (4 x 4):
PC1 PC2 PC3
[Link] 0.5147163 -0.39817685 0.7242679
[Link] -0.2926048 -0.91328503 -0.2557463
[Link] 0.5772530 -0.02932037 -0.1755427
[Link] 0.5623421 -0.08065952 -0.6158040
PC4
[Link] 0.2279438
[Link] -0.1220110
[Link] -0.7969342
[Link] 0.5459403
Practical 11
Factor analysis using R

# Load the dataset


> data(iris)
> # View the first few rows of the dataset
> head(iris)
[Link] [Link] [Link]
1 5.1 3.5 1.4
2 4.9 3.0 1.4
3 4.7 3.2 1.3
4 4.6 3.1 1.5
5 5.0 3.6 1.4
6 5.4 3.9 1.7
[Link] Species
1 0.2 setosa
2 0.2 setosa
3 0.2 setosa
4 0.2 setosa
5 0.2 setosa
6 0.4 setosa
> # Scale the data
> iris_scaled <- scale(iris[,1:4])
> # Perform factor analysis
> library(psych)
> fa <- fa(r = iris_scaled,
+ nfactors = 4,
+ rotate = "varimax")
> summary(fa)

Factor analysis with Call: fa(r = iris_scaled, nfactors = 4, rotate


= "varimax")

Test of the hypothesis that 4 factors are sufficient.


The degrees of freedom for the model is -4 and the objective
function was 0
The number of observations was 150 with Chi Square = 0
with prob < NA

The root mean square of the residuals (RMSA) is 0


The df corrected root mean square of the residuals is NA

Tucker Lewis Index of factoring reliability = 1.009


> # View the factor loadings
> fa$loadings

Loadings:
MR1 MR2 MR3 MR4
[Link] 0.997
[Link] -0.108 0.757
[Link] 0.861 -0.413 0.288
[Link] 0.801 -0.317 0.492

MR1 MR2 MR3 MR4


SS loadings 2.389 0.844 0.332 0.000
Proportion Var 0.597 0.211 0.083 0.000
Cumulative Var 0.597 0.808 0.891 0.891
>
LINEAR DISCREMINANT ANALYSIS
library(MASS)

> library(tidyverse)

> library(caret)

> theme_set(theme_classic())

> # Load the data

> data("iris")

> # Split the data into training (80%) and test set (20%)

> [Link](123)

> [Link] <- iris$Species %>%

+ createDataPartition(p = 0.8, list = FALSE)

> [Link] <- iris[[Link], ]

> [Link] <- iris[-[Link], ]

> # Estimate preprocessing parameters

> [Link] <- [Link] %>%

+ preProcess(method = c("center", "scale"))

> # Transform the data using the estimated parameters

> [Link] <- [Link] %>% predict([Link])

> [Link] <- [Link] %>% predict([Link])

> # Fit the model

> model <- lda(Species~., data = [Link])

> # Make predictions

> predictions <- model %>% predict([Link])

> # Model accuracy

> mean(predictions$class==[Link]$Species)

[1] 0.9666667

> model <- lda(Species~., data = [Link])

> model
Call:

lda(Species ~ ., data = [Link])

Prior probabilities of groups:

setosa versicolor virginica

0.3333333 0.3333333 0.3333333

Group means:

[Link] [Link] [Link]

setosa -1.0112835 0.78048647 -1.2900001

versicolor 0.1014181 -0.68674658 0.2566029

virginica 0.9098654 -0.09373989 1.0333972

[Link]

setosa -1.2453195

versicolor 0.1472614

virginica 1.0980581

Coefficients of linear discriminants:

LD1 LD2

[Link] 0.6794973 0.04463786

[Link] 0.6565085 -1.00330120

[Link] -3.8365047 1.44176147

[Link] -2.2722313 -1.96516251

Proportion of trace:

LD1 LD2

0.9902 0.0098

> cluster analysis


library(factoextra)
> # Loading dataset
> df <- mtcars
> # Omitting any NA values
> df <- [Link](df)
> # Scaling dataset
> df <- scale(df)
> # output to be present as PNG file
> png(file = "[Link]")
> km <- kmeans(df, centers = 4, nstart = 25)
> # Visualize the clusters
> fviz_cluster(km, data = df)
> # saving the file
> [Link]()
null device
1
> # output to be present as PNG file
> png(file = "[Link]")
> km <- kmeans(df, centers = 5, nstart = 25)
> # Visualize the clusters
> fviz_cluster(km, data = df)
> # saving the file
> [Link]()
null device
1
>

# Installing the package


[Link]("dplyr")
# Loading package
> library(dplyr)
> # Summary of dataset in package
> summary(mtcars)
mpg cyl disp
Min. :10.40 Min. :4.000 Min. : 71.1
1st Qu.:15.43 1st Qu.:4.000 1st Qu.:120.8
Median :19.20 Median :6.000 Median :196.3
Mean :20.09 Mean :6.188 Mean :230.7
3rd Qu.:22.80 3rd Qu.:8.000 3rd Qu.:326.0
Max. :33.90 Max. :8.000 Max. :472.0
hp drat wt
Min. : 52.0 Min. :2.760 Min. :1.513
1st Qu.: 96.5 1st Qu.:3.080 1st Qu.:2.581
Median :123.0 Median :3.695 Median :3.325
Mean :146.7 Mean :3.597 Mean :3.217
3rd Qu.:180.0 3rd Qu.:3.920 3rd Qu.:3.610
Max. :335.0 Max. :4.930 Max. :5.424
qsec vs
Min. :14.50 Min. :0.0000
1st Qu.:16.89 1st Qu.:0.0000
Median :17.71 Median :0.0000
Mean :17.85 Mean :0.4375
3rd Qu.:18.90 3rd Qu.:1.0000
Max. :22.90 Max. :1.0000
am gear carb
Min. :0.0000 Min. :3.000 Min. :1.000
1st Qu.:0.0000 1st Qu.:3.000 1st Qu.:2.000
Median :0.0000 Median :4.000 Median :2.000
Mean :0.4062 Mean :3.688 Mean :2.812
3rd Qu.:1.0000 3rd Qu.:4.000 3rd Qu.:4.000
Max. :1.0000 Max. :5.000 Max. :8.000
> Installing the package
> # For Logistic regression
> [Link]("caTools")
Error in [Link] : Updating loaded packages
> [Link]("caTools")
> # For ROC curve to evaluate model
> [Link]("ROCR")
Error in [Link] : Updating loaded packages
> [Link]("ROCR")

> # Loading package


> library(caTools)
> library(ROCR)
> # Splitting dataset
> split <- [Link](mtcars, SplitRatio = 0.8)
> split
[1] TRUE TRUE TRUE TRUE FALSE TRUE FALSE
[8] TRUE TRUE TRUE FALSE
> train_reg <- subset(mtcars, split == "TRUE")
> test_reg <- subset(mtcars, split == "FALSE")
> # Training model
> logistic_model <- glm(vs ~ wt + disp,
+ data = train_reg,
+ family = "binomial")
> logistic_model

Call: glm(formula = vs ~ wt + disp, family = "binomial", data =


train_reg)

Coefficients:
(Intercept) wt disp
3.65725 0.50755 -0.02638

Degrees of Freedom: 23 Total (i.e. Null); 21 Residual


Null Deviance: 33.27
Residual Deviance: 17.64 AIC: 23.64
> # Summary
> summary(logistic_model)

Call:
glm(formula = vs ~ wt + disp, family = "binomial", data =
train_reg)

Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 3.65725 3.20933 1.140 0.254
wt 0.50755 1.74052 0.292 0.771
disp -0.02638 0.01611 -1.638 0.102

(Dispersion parameter for binomial family taken to be 1)

Null deviance: 33.271 on 23 degrees of freedom


Residual deviance: 17.638 on 21 degrees of freedom
AIC: 23.638

Number of Fisher Scoring iterations: 6

> predict_reg <- predict(logistic_model,


+ test_reg, type = "response")
> predict_reg
Hornet Sportabout Duster 360
0.01642104 0.01752145
Merc 280C Lincoln Continental
0.72757938 0.00325796
Fiat 128 Dodge Challenger
0.93690626 0.05001231
Porsche 914-2 Ford Pantera L
0.82781339 0.01812313

>
# Create the data frame
> data <- [Link](
+ Years_Exp = c(1.1, 1.3, 1.5, 2.0, 2.2, 2.9, 3.0, 3.2, 3.2, 3.7),
+ Salary = c(39343.00, 46205.00, 37731.00, 43525.00,
+ 39891.00, 56642.00, 60150.00, 54445.00, 64445.00,
57189.00)
+)
> # Create the scatter plot
> plot(data$Years_Exp, data$Salary,
+ xlab = "Years Experienced",
+ ylab = "Salary",
+ main = "Scatter Plot of Years Experienced vs Salary")
> [Link]('caTools')
Error in [Link] : Updating loaded packages
> [Link]("caTools")
WARNING: Rtools is required to build R packages but is not
currently installed. Please download and install the appropriate
version of Rtools before proceeding:

[Link]
Warning in [Link] :
package ‘caTools’ is in use and will not be installed
> library(caTools)
> split = [Link](data$Salary, SplitRatio = 0.7)
> trainingset = subset(data, split == TRUE)
> testset = subset(data, split == FALSE)
> # Fitting Simple Linear Regression to the Training set
> lm.r= lm(formula = Salary ~ Years_Exp,
+ data = trainingset)
> #Summary of the model
> summary(lm.r)

Call:
lm(formula = Salary ~ Years_Exp, data = trainingset)

Residuals:
1 2 3 5 6 7
479.5 5713.8 -4387.8 -7924.6 3129.5 5823.7
10
-2834.1

Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 29911 5770 5.184 0.00351
Years_Exp 8138 2382 3.417 0.01890

(Intercept) **
Years_Exp *
---
Signif. codes:
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 5774 on 5 degrees of freedom


Multiple R-squared: 0.7002, Adjusted R-squared:
0.6402
F-statistic: 11.68 on 1 and 5 DF, p-value: 0.0189

> # Create a data frame with new input values


> new_data <- [Link](Years_Exp = c(4.0, 4.5, 5.0))
> # Predict using the linear regression model
> predicted_salaries <- predict(lm.r, newdata = new_data)
> # Display the predicted salaries
> print(predicted_salaries)
1 2 3
62464.62 66533.78 70602.94
>

> # given below is the data pertaining to income, years of service


and tax paid by
#by 10 individuals working in multinomial company

# given below is the data pertaining to income ,years of service and


tax paid by
#by 10 individuals working in multinomial company

Taxdata<-[Link](
taxpaid=c(7,10,8,6,5,5,9,6,9,7),
Income=c(2.5,3,2,2.2,1.8,1.5,1.5,1.9,2.3,1.6),
years=c(9,13,10,10,8,8,12,10,9,5))
# fitting of multiple regression
mulreg1<-lm(Taxdata$taxpaid~Taxdata$Income+Taxdata$years)
mulreg1
summary(mulreg)

You might also like