<- read.table("../donnees/ozone.txt",header=TRUE,sep=";") ozone
17 Estimateurs à noyau et \(k\) plus proches voisins
Estimateurs à noyau
<- order(ozone[,"T12"])
ind <- ozone[ind,"T12"]
T12o <- ozone[ind,"O3"] O3o
<- lm(O3o~1,weight=c(rep(1,10),rep(0,40)))
reg1 <- lm(O3o~1,weight=c(rep(0,10),rep(1,10),rep(0,30)))
reg2 <- lm(O3o~1,weight=c(rep(0,20),rep(1,10),rep(0,20)))
reg3 <- lm(O3o~1,weight=c(rep(0,30),rep(1,10),rep(0,10)))
reg4 <- lm(O3o~1,weight=c(rep(0,40),rep(1,10))) reg5
plot(T12o,O3o,pch=20,xlab="T12",ylab="O3")
abline(v=c(14,18),col="red",lwd=2)
abline(v=c(16),col="blue",lty=2)
points(16,mean(O3o[T12o>=14 & T12o<=18]),col="blue",pch=17,cex=1.5)
library(ibr)
<- seq(7,30,by=0.01)
x par(mfrow=c(1,3))
<- c(20,3,0.05)
h for (i in h){
plot(T12o,O3o,pch=20,xlab="T12",ylab="O3")
<- npregress(T12o,O3o,bandwidth = i)
tmp <- predict(tmp,newdata=x)
prev lines(x,prev,col="blue",lwd=2)
}
Les \(k\) plus proches voisins
par(mfrow=c(1,3))
library(FNN)
<- c(50,10,1)
k for (i in k){
<- knn.reg(train=T12o,test=as.matrix(x),y=O3o,k=i)
mod plot(T12o,O3o,pch=20,xlab="T12",ylab="O3")
lines(x,mod$pred,col="blue",lwd=2)
}
Sélection des paramètres
<- npregress(T12o,O3o)$bandwidth
hcv hcv
[1] 1.688373
knn.reg(train=T12o,y=O3o,k=10)$PRESS/length(T12o)
[1] 287.6629
<- 1:49
K_cand <- rep(0,length(K_cand))
loo for (i in 1:length(K_cand)){
<- knn.reg(train=T12o,y=O3o,k=K_cand[i])$PRESS/length(T12o)
loo[i]
}which.min(loo)] K_cand[
[1] 8
<- knn.reg(train=T12o,test=as.matrix(x),y=O3o,k=8)
mod.kppv <- npregress(T12o,O3o,bandwidth = hcv)
mod.noyau <- predict(mod.noyau,newdata=x)
prev.noyau plot(T12o,O3o,pch=20,xlab="T12",ylab="O3")
lines(x,mod.kppv$pred,col="blue",lwd=2)
lines(x,prev.noyau,col="red",lty=2,lwd=2)
$df mod.noyau
[1] 5.339428