Compare lasso, ridge and net elastic regression
LINEAR_RIDGE_LASSO_REGRESSION
mugo_muiruri_james-
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-8
library(mlbench)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
##
%+%, alpha
library(Amelia)
## Loading required package: Rcpp
##
##
##
##
##
##
##
##
##
##
##
##
Amelia II: Multiple Imputation
(Version 1.8.1, built:-)
Copyright (C- James Honaker, Gary King and Matthew Blackwell
Refer to http://gking.harvard.edu/amelia/ for more information
data(BostonHousing)
data<-BostonHousing
data cleaning check missing values
missmap(BostonHousing,col = c("white","black"),y.at = 1,y.labels = '',legend
= TRUE)
explore the data
head(data)
##
crim zn indus chas
nox
rm age
dis rad tax ptratio
b
lstat
##-
##-
##-
##-
##-
##-
##
medv
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2
## 6 28.7
variables
str(data)
## 'data.frame':
506 obs. of 14 variables:
## $ crim
: num- ...
## $ zn
: num- ...
## $ indus : num- ...
## $ chas
: Factor w/ 2 levels "0","1":- ...
## $ nox
: num- ...
## $ rm
: num- ...
## $ age
: num- ...
## $ dis
: num- ...
## $ rad
: num- ...
## $ tax
: num- ...
## $ ptratio: num- ...
## $ b
: num- ...
## $ lstat : num- ...
## $ medv
: num- ...
remove factor
head(data[c(-4,-14)])
##
##
##
##
##
##
##
1
2
3
4
5
6
crim zn indus
nox
rm age
dis rad tax ptratio
b lstat-
correlation
pairs.panels(data[c(-4,-14)])
partitioning
set.seed(123)
ind<-sample(2,nrow(data),replace=T,prob=c(0.7,0.3))
train<-data[ind==1,]
test<-data[ind==2,]
custom
custom<-trainControl(method = "repeatedcv",number = 10,repeats =
5,verboseIter = T)
multiple linear regression
set.seed(1234)
lm<-train(medv~.,train,method="lm",trControl=custom)
##
##
##
##
##
##
##
##
##
##
##
##
+
+
+
+
+
+
-
Fold01.Rep1:
Fold01.Rep1:
Fold02.Rep1:
Fold02.Rep1:
Fold03.Rep1:
Fold03.Rep1:
Fold04.Rep1:
Fold04.Rep1:
Fold05.Rep1:
Fold05.Rep1:
Fold06.Rep1:
Fold06.Rep1:
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
-
Fold07.Rep1:
Fold07.Rep1:
Fold08.Rep1:
Fold08.Rep1:
Fold09.Rep1:
Fold09.Rep1:
Fold10.Rep1:
Fold10.Rep1:
Fold01.Rep2:
Fold01.Rep2:
Fold02.Rep2:
Fold02.Rep2:
Fold03.Rep2:
Fold03.Rep2:
Fold04.Rep2:
Fold04.Rep2:
Fold05.Rep2:
Fold05.Rep2:
Fold06.Rep2:
Fold06.Rep2:
Fold07.Rep2:
Fold07.Rep2:
Fold08.Rep2:
Fold08.Rep2:
Fold09.Rep2:
Fold09.Rep2:
Fold10.Rep2:
Fold10.Rep2:
Fold01.Rep3:
Fold01.Rep3:
Fold02.Rep3:
Fold02.Rep3:
Fold03.Rep3:
Fold03.Rep3:
Fold04.Rep3:
Fold04.Rep3:
Fold05.Rep3:
Fold05.Rep3:
Fold06.Rep3:
Fold06.Rep3:
Fold07.Rep3:
Fold07.Rep3:
Fold08.Rep3:
Fold08.Rep3:
Fold09.Rep3:
Fold09.Rep3:
Fold10.Rep3:
Fold10.Rep3:
Fold01.Rep4:
Fold01.Rep4:
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
intercept=TRUE
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
+ Fold02.Rep4: intercept=TRUE
- Fold02.Rep4: intercept=TRUE
+ Fold03.Rep4: intercept=TRUE
- Fold03.Rep4: intercept=TRUE
+ Fold04.Rep4: intercept=TRUE
- Fold04.Rep4: intercept=TRUE
+ Fold05.Rep4: intercept=TRUE
- Fold05.Rep4: intercept=TRUE
+ Fold06.Rep4: intercept=TRUE
- Fold06.Rep4: intercept=TRUE
+ Fold07.Rep4: intercept=TRUE
- Fold07.Rep4: intercept=TRUE
+ Fold08.Rep4: intercept=TRUE
- Fold08.Rep4: intercept=TRUE
+ Fold09.Rep4: intercept=TRUE
- Fold09.Rep4: intercept=TRUE
+ Fold10.Rep4: intercept=TRUE
- Fold10.Rep4: intercept=TRUE
+ Fold01.Rep5: intercept=TRUE
- Fold01.Rep5: intercept=TRUE
+ Fold02.Rep5: intercept=TRUE
- Fold02.Rep5: intercept=TRUE
+ Fold03.Rep5: intercept=TRUE
- Fold03.Rep5: intercept=TRUE
+ Fold04.Rep5: intercept=TRUE
- Fold04.Rep5: intercept=TRUE
+ Fold05.Rep5: intercept=TRUE
- Fold05.Rep5: intercept=TRUE
+ Fold06.Rep5: intercept=TRUE
- Fold06.Rep5: intercept=TRUE
+ Fold07.Rep5: intercept=TRUE
- Fold07.Rep5: intercept=TRUE
+ Fold08.Rep5: intercept=TRUE
- Fold08.Rep5: intercept=TRUE
+ Fold09.Rep5: intercept=TRUE
- Fold09.Rep5: intercept=TRUE
+ Fold10.Rep5: intercept=TRUE
- Fold10.Rep5: intercept=TRUE
Aggregating results
Fitting final model on full training set
lm
##
##
##
##
##
##
##
Linear Regression
363 samples
13 predictor
No pre-processing
Resampling: Cross-Validated (10 fold, repeated 5 times)
## Summary of sample sizes: 326, 327, 326, 327, 326, 327, ...
## Resampling results:
##
##
RMSE
Rsquared
MAE
##-
##
## Tuning parameter 'intercept' was held constant at a value of TRUE
regression multiple regression results
lm$results
##
intercept
RMSE Rsquared
MAE
RMSESD RsquaredSD
MAESD
## 1
TRUE-
summary
summary(lm)
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
Call:
lm(formula = .outcome ~ ., data = dat)
Residuals:
Min
1Q
-15.8373 -2.9216
Median
-0.6721
3Q
2.1082
Max
27.2939
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept-e-10 ***
crim
- - **
zn- ***
indus-
chas- *
nox
- -e-07 ***
rm-e-16 ***
age
- -
dis
- -e-12 ***
rad-e-05 ***
tax
- - *
ptratio
- -e-10 ***
b- **
lstat
- -e-12 ***
--Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 4.88 on 349 degrees of freedom
Multiple R-squared: 0.7463, Adjusted R-squared: 0.7368
F-statistic: 78.95 on 13 and 349 DF, p-value: < 2.2e-16
plot
plot(lm$finalModel)
RIDGE REGRESSION
set.seed(1234)
ridge<-
train(medv~.,train,method="glmnet",tuneGrid=expand.grid(alpha=0,lambda=seq(0.
0001,1,length=5)),trControl=custom)
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Fold01.Rep1:
Fold01.Rep1:
Fold02.Rep1:
Fold02.Rep1:
Fold03.Rep1:
Fold03.Rep1:
Fold04.Rep1:
Fold04.Rep1:
Fold05.Rep1:
Fold05.Rep1:
Fold06.Rep1:
Fold06.Rep1:
Fold07.Rep1:
Fold07.Rep1:
Fold08.Rep1:
Fold08.Rep1:
Fold09.Rep1:
Fold09.Rep1:
Fold10.Rep1:
Fold10.Rep1:
Fold01.Rep2:
Fold01.Rep2:
Fold02.Rep2:
Fold02.Rep2:
Fold03.Rep2:
Fold03.Rep2:
Fold04.Rep2:
Fold04.Rep2:
Fold05.Rep2:
Fold05.Rep2:
Fold06.Rep2:
Fold06.Rep2:
Fold07.Rep2:
Fold07.Rep2:
Fold08.Rep2:
Fold08.Rep2:
Fold09.Rep2:
Fold09.Rep2:
Fold10.Rep2:
Fold10.Rep2:
Fold01.Rep3:
Fold01.Rep3:
Fold02.Rep3:
Fold02.Rep3:
Fold03.Rep3:
Fold03.Rep3:
Fold04.Rep3:
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Fold04.Rep3:
Fold05.Rep3:
Fold05.Rep3:
Fold06.Rep3:
Fold06.Rep3:
Fold07.Rep3:
Fold07.Rep3:
Fold08.Rep3:
Fold08.Rep3:
Fold09.Rep3:
Fold09.Rep3:
Fold10.Rep3:
Fold10.Rep3:
Fold01.Rep4:
Fold01.Rep4:
Fold02.Rep4:
Fold02.Rep4:
Fold03.Rep4:
Fold03.Rep4:
Fold04.Rep4:
Fold04.Rep4:
Fold05.Rep4:
Fold05.Rep4:
Fold06.Rep4:
Fold06.Rep4:
Fold07.Rep4:
Fold07.Rep4:
Fold08.Rep4:
Fold08.Rep4:
Fold09.Rep4:
Fold09.Rep4:
Fold10.Rep4:
Fold10.Rep4:
Fold01.Rep5:
Fold01.Rep5:
Fold02.Rep5:
Fold02.Rep5:
Fold03.Rep5:
Fold03.Rep5:
Fold04.Rep5:
Fold04.Rep5:
Fold05.Rep5:
Fold05.Rep5:
Fold06.Rep5:
Fold06.Rep5:
Fold07.Rep5:
Fold07.Rep5:
Fold08.Rep5:
Fold08.Rep5:
Fold09.Rep5:
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
alpha=0,
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
lambda=1
##
##
##
##
##
##
- Fold09.Rep5: alpha=0, lambda=1
+ Fold10.Rep5: alpha=0, lambda=1
- Fold10.Rep5: alpha=0, lambda=1
Aggregating results
Selecting tuning parameters
Fitting alpha = 0, lambda = 0.5 on full training set
ridge
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
glmnet
363 samples
13 predictor
No pre-processing
Resampling: Cross-Validated (10 fold, repeated 5 times)
Summary of sample sizes: 326, 327, 326, 327, 326, 327, ...
Resampling results across tuning parameters:
lambda-
RMSE-
Rsquared-
MAE-
Tuning parameter 'alpha' was held constant at a value of 0
RMSE was used to select the optimal model using the smallest value.
The final values used for the model were alpha = 0 and lambda = 0.50005.
plotting
plot(ridge)
ploting further
plot(ridge$finalModel,xvar="lambda",label=T)
PLOT III
plot(ridge$finalModel,xvar="dev",label=T)
PLOT VARIMP
plot(varImp(ridge,scale=F))
REGRESSION
LASSO
set.seed(123)
lasso