13 Régularisation de la vraisemblance

Régressions pénalisées avec glmnet

library(bestglm)
data(SAheart)
SAheart.X <- model.matrix(chd~.,data=SAheart)[,-1]
SAheart.Y <- SAheart$chd 
library(glmnet)
ridge <- glmnet(SAheart.X,SAheart.Y,family="binomial",alpha=0)
lasso <- glmnet(SAheart.X,SAheart.Y,family="binomial",alpha=1)
par(mfrow=c(2,2))
plot(ridge,ylim=c(-0.1,0.2))
plot(lasso,ylim=c(-0.1,0.2))
plot(ridge,ylim=c(-0.1,0.2),xvar="lambda")
plot(lasso,ylim=c(-0.1,0.2),xvar="lambda")

plot(ridge,ylim=c(-0.1,0.2),cex.lab=0.5)
plot(lasso,ylim=c(-0.1,0.2),cex.lab=0.5)
plot(ridge,ylim=c(-0.1,0.2),xvar="lambda",cex.lab=0.5)
plot(lasso,ylim=c(-0.1,0.2),xvar="lambda",cex.lab=0.5)

Validation croisée

set.seed(2398)
m1.ridge <- cv.glmnet(SAheart.X,SAheart.Y,family="binomial",alpha=0)
m1.lasso <- cv.glmnet(SAheart.X,SAheart.Y,family="binomial",alpha=1)
m2.ridge <- cv.glmnet(SAheart.X,SAheart.Y,family="binomial",alpha=0,type.measure="class")
m2.lasso <- cv.glmnet(SAheart.X,SAheart.Y,family="binomial",alpha=1,type.measure="class")
m3.ridge <- cv.glmnet(SAheart.X,SAheart.Y,family="binomial",alpha=0,type.measure="auc")
m3.lasso <- cv.glmnet(SAheart.X,SAheart.Y,family="binomial",alpha=1,type.measure="auc")
m1.ridge$lambda.min
[1] 0.01774595
m1.ridge$lambda.1se
[1] 0.2892148
par(mfrow=c(3,2))
plot(m1.ridge,main="Ridge")
plot(m1.lasso,main="Lasso")
plot(m2.ridge,main="Ridge")
plot(m2.lasso,main="Lasso")
plot(m3.ridge,main="Ridge")
plot(m3.lasso,main="Lasso")

Group-lasso et elastic net

library(gglasso)
X1 <- c(rep("A",60),rep("B",90),rep("C",50))
X2 <- c(rep("E",40),rep("F",60),rep("G",55),rep("H",45))
set.seed(1298)
X_3 <- runif(200)
set.seed(2381)
Y <- round(runif(200))
donnees <- data.frame(X1,X2,X_3,Y)
D <- model.matrix(Y~.,data=donnees)[,-1]
lasso <- glmnet(D,Y,alpha=1,lambda=exp(seq(-3,-5,length=100)))
groupe <- c(1,1,2,2,2,3)
library(gglasso)
Y1 <- 2*Y-1 
g.lasso <- gglasso(D,Y1,group=groupe,loss="logit",lambda=exp(seq(-4.5,-5.5,length=100)))
plot(lasso,xvar="lambda",lwd=2,main="Lasso")

plot(g.lasso,main="Group-lasso")

library(caret)
alpha <- seq(0,1,by=0.1)
lambda <- exp(seq(-7,2,length=100))
grille <- expand.grid(alpha=alpha,lambda=lambda)
ctrl <- trainControl(method="cv")
SAheart$chd <- as.factor(SAheart$chd)
set.seed(1234)
sel <- train(chd~.,data=SAheart,method="glmnet",family="binomial",trControl=ctrl,tuneGrid=grille)
sel$bestTune
    alpha     lambda
747   0.7 0.05971442
getTrainPerf(sel)
  TrainAccuracy TrainKappa method
1     0.7489362  0.3759712 glmnet

Application : détection d’images publicitaires

ad.data <- read.table("../donnees/ad_data.txt",header=FALSE,sep=",",dec=".",na.strings = "?",strip.white = TRUE)
names(ad.data)[ncol(ad.data)] <- "Y"
ad.data$Y <- as.factor(ad.data$Y)

ad.data1 <- na.omit(ad.data)
dim(ad.data1)
[1] 2359 1559
set.seed(1234)
ind.app <- sample(nrow(ad.data1),1800)
dapp <- ad.data1[ind.app,]
dtest <- ad.data1[-ind.app,]
X.app <- model.matrix(Y~.,data=dapp)[,-1]
X.test <- model.matrix(Y~.,data=dtest)[,-1]
Y.app <- dapp$Y
Y.test <- dtest$Y
logit <- glm(Y~.,data=dapp,family="binomial") 
set.seed(123)
lasso.cv <- cv.glmnet(X.app,Y.app,family="binomial")
ridge.cv <- cv.glmnet(X.app,Y.app,family="binomial",alpha=0,lambda=exp(seq(-8,0,length=100)))
en.cv <- cv.glmnet(X.app,Y.app,family="binomial",alpha=0.5)
par(mfrow=c(1,3))
plot(lasso.cv,main="Lasso")
plot(ridge.cv,main="Ridge")
plot(en.cv,main="Elastic net")

score <- data.frame(obs=dtest$Y,logit=predict(logit,newdata=dtest,type="response"),
                                lasso=as.vector(predict(lasso.cv,newx = X.test,type="response")),
                                ridge=as.vector(predict(ridge.cv,newx = X.test,type="response")),
                                en=as.vector(predict(en.cv,newx = X.test,type="response")))
library(pROC)
roc.ad <- roc(obs~logit+lasso+ridge+en,data=score)

couleur <- c("black","red","blue","green")
mapply(plot,roc.ad,col=couleur,lty=1:4,add=c(F,T,T,T),lwd=2,legacy.axes=TRUE)
                   logit       lasso       ridge       en         
percent            FALSE       FALSE       FALSE       FALSE      
sensitivities      numeric,329 numeric,276 numeric,515 numeric,266
specificities      numeric,329 numeric,276 numeric,515 numeric,266
thresholds         numeric,329 numeric,276 numeric,515 numeric,266
direction          "<"         "<"         "<"         "<"        
cases              numeric,461 numeric,461 numeric,461 numeric,461
controls           numeric,98  numeric,98  numeric,98  numeric,98 
fun.sesp           ?           ?           ?           ?          
auc                0.8635619   0.9695648   0.9805879   0.969587   
call               expression  expression  expression  expression 
original.predictor numeric,559 numeric,559 numeric,559 numeric,559
original.response  factor,559  factor,559  factor,559  factor,559 
predictor          numeric,559 numeric,559 numeric,559 numeric,559
response           factor,559  factor,559  factor,559  factor,559 
levels             character,2 character,2 character,2 character,2
predictor.name     "logit"     "lasso"     "ridge"     "en"       
response.name      "obs"       "obs"       "obs"       "obs"      
legend("bottomright",legend=c("logit","lasso","ridge","elastic net"),col=couleur,lty=1:4,lwd=2,cex=0.65)

sort(round(unlist(lapply(roc.ad,auc)),3),decreasing=TRUE)
ridge lasso    en logit 
0.981 0.970 0.970 0.864 
prev1 <- data.frame(apply(round(score[,-1]),2,factor,labels=c("ad.","nonad.")))
err <- apply(sweep(prev1,1,dtest$Y,FUN="!="),2,mean)
sort(round(err,3))
ridge lasso    en logit 
0.030 0.034 0.036 0.077