12 Régression de Poisson

Le modèle de Poisson

Malaria <- read.table("../donnees/poissonData3.csv", sep=",", header=T)
summary(Malaria)
     Sexe                Age            Altitude     Prevention       
 Length:1627        Min.   :  10.0   Min.   :1129   Length:1627       
 Class :character   1st Qu.: 220.0   1st Qu.:1266   Class :character  
 Mode  :character   Median : 361.0   Median :1298   Mode  :character  
                    Mean   : 419.4   Mean   :1295                     
                    3rd Qu.: 555.0   3rd Qu.:1320                     
                    Max.   :1499.0   Max.   :1515                     
                                     NA's   :105                      
     Duree          N.malaria     
 Min.   :   0.0   Min.   : 0.000  
 1st Qu.: 172.0   1st Qu.: 1.000  
 Median : 721.0   Median : 4.000  
 Mean   : 619.3   Mean   : 4.687  
 3rd Qu.:1011.0   3rd Qu.: 7.000  
 Max.   :1464.0   Max.   :26.000  
                                  
modP <- glm(N.malaria ~ Duree, data = Malaria, family = poisson)
modP

Call:  glm(formula = N.malaria ~ Duree, family = poisson, data = Malaria)

Coefficients:
(Intercept)        Duree  
   0.429459     0.001508  

Degrees of Freedom: 1626 Total (i.e. Null);  1625 Residual
Null Deviance:      5710 
Residual Deviance: 3325     AIC: 8125
plot(N.malaria ~ Duree, data = Malaria,pch=20,cex=0.5)
mod.lin <- lm(N.malaria ~ Duree, data = Malaria)
abline(a=coef(mod.lin)[1],b=coef(mod.lin)[2],lwd=2)
x <- seq(0,1500,by=1)
y <- exp(coef(modP)[1]+coef(modP)[2]*x)
lines(x,y,col="red",lty=2,lwd=2.5)

modP3 <- glm( N.malaria ~ Duree + Sexe + Prevention, 
              data = Malaria,family = poisson )

Tests et intervalles de confiance

Malaria$Prevention <- as.factor(Malaria$Prevention)
Malaria$Prevention <- relevel(Malaria$Prevention,ref="Rien")
modP3 <- glm( N.malaria ~ Duree + Sexe + Prevention, data = Malaria,
             family = poisson )
summary(modP3)

Call:
glm(formula = N.malaria ~ Duree + Sexe + Prevention, family = poisson, 
    data = Malaria)

Coefficients:
                            Estimate Std. Error z value Pr(>|z|)    
(Intercept)                0.3878929  0.0389800   9.951   <2e-16 ***
Duree                      0.0015101  0.0000343  44.031   <2e-16 ***
SexeM                      0.0550890  0.0229690   2.398   0.0165 *  
PreventionAutre           -0.2255828  0.1781379  -1.266   0.2054    
PreventionMoustiquaire     0.0176850  0.0255967   0.691   0.4896    
PreventionSerpentin/Spray  0.0196420  0.0590690   0.333   0.7395    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for poisson family taken to be 1)

    Null deviance: 5710.4  on 1626  degrees of freedom
Residual deviance: 3317.3  on 1621  degrees of freedom
AIC: 8124.6

Number of Fisher Scoring iterations: 5
modP2 <- glm( N.malaria ~ Duree + Sexe, data = Malaria, family = poisson)
-2*(logLik(modP2)-logLik(modP3))
'log Lik.' 2.448823 (df=3)
qchisq(0.95,df=3)
[1] 7.814728
anova(modP2,modP3,test="LRT")
Analysis of Deviance Table

Model 1: N.malaria ~ Duree + Sexe
Model 2: N.malaria ~ Duree + Sexe + Prevention
  Resid. Df Resid. Dev Df Deviance Pr(>Chi)
1      1624     3319.8                     
2      1621     3317.3  3   2.4488   0.4846
library(car)
Anova(modP2,test="LR")
Analysis of Deviance Table (Type II tests)

Response: N.malaria
      LR Chisq Df Pr(>Chisq)    
Duree  2386.56  1    < 2e-16 ***
Sexe      5.45  1    0.01961 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
round(confint.default(modP3),3)
                           2.5 % 97.5 %
(Intercept)                0.311  0.464
Duree                      0.001  0.002
SexeM                      0.010  0.100
PreventionAutre           -0.575  0.124
PreventionMoustiquaire    -0.032  0.068
PreventionSerpentin/Spray -0.096  0.135
round(confint(modP3),3)
                           2.5 % 97.5 %
(Intercept)                0.311  0.464
Duree                      0.001  0.002
SexeM                      0.010  0.100
PreventionAutre           -0.596  0.105
PreventionMoustiquaire    -0.032  0.068
PreventionSerpentin/Spray -0.098  0.134

Sélection de variables

Malaria <- read.table("../donnees/poissonData.csv", sep=",", header=T)
Malaria1 <- na.omit(Malaria)
Malaria1$Prevention <- as.factor(Malaria1$Prevention)
Malaria1$Sexe <- as.factor(Malaria1$Sexe)
library(bestglm)
mod_sel <- bestglm(Malaria1,family=poisson)
mod_sel$BestModels
   Sexe   Age Altitude Prevention Duree Criterion
1 FALSE  TRUE     TRUE      FALSE  TRUE  7384.946
2 FALSE FALSE     TRUE      FALSE  TRUE  7387.814
3  TRUE  TRUE     TRUE      FALSE  TRUE  7390.053
4  TRUE FALSE     TRUE      FALSE  TRUE  7393.119
5 FALSE  TRUE    FALSE      FALSE  TRUE  7401.021