6 Variables qualitatives : ANCOVA et ANOVA

La concentration en ozone

ozone <- read.table("../donnees/ozone.txt", header = T, sep = ";")
plot(ozone[,"T12"], ozone[,"O3"],pch=as.numeric(ozone[,"vent"]),
     col = as.numeric(ozone[,"vent"]))
a1 <- lm(O3 ~ T12, data = ozone[ozone[,"vent"]=="EST",])
a2 <- lm(O3 ~ T12, data = ozone[ozone[,"vent"]=="NORD",])
a3 <- lm(O3 ~ T12, data = ozone[ozone[,"vent"]=="OUEST",])
a4 <- lm(O3 ~ T12, data = ozone[ozone[,"vent"]=="SUD",])
abline(a1, col=1)
abline(a2, col=2)
abline(a3, col=3)
abline(a4, col=4)

mod1b <- lm(formula = O3 ~ -1 + vent + T12:vent, data = ozone)
summary(mod1b)

Call:
lm(formula = O3 ~ -1 + vent + T12:vent, data = ozone)

Residuals:
    Min      1Q  Median      3Q     Max 
-29.903  -9.163   1.153  10.319  32.638 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
ventEST        45.6090    13.9343   3.273 0.002133 ** 
ventNORD      106.6345    28.0341   3.804 0.000456 ***
ventOUEST      64.6840    24.6208   2.627 0.011967 *  
ventSUD       -27.0602    26.5389  -1.020 0.313737    
ventEST:T12     2.7480     0.6342   4.333 8.96e-05 ***
ventNORD:T12   -1.6491     1.6058  -1.027 0.310327    
ventOUEST:T12   0.3407     1.2047   0.283 0.778709    
ventSUD:T12     5.3786     1.1497   4.678 3.00e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 14.71 on 42 degrees of freedom
Multiple R-squared:  0.9773,    Adjusted R-squared:  0.973 
F-statistic: 226.1 on 8 and 42 DF,  p-value: < 2.2e-16
mod1 <- lm(formula = O3 ~ vent + T12:vent, data = ozone)
summary(mod1)

Call:
lm(formula = O3 ~ vent + T12:vent, data = ozone)

Residuals:
    Min      1Q  Median      3Q     Max 
-29.903  -9.163   1.153  10.319  32.638 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)    45.6090    13.9343   3.273  0.00213 ** 
ventNORD       61.0255    31.3061   1.949  0.05796 .  
ventOUEST      19.0751    28.2905   0.674  0.50384    
ventSUD       -72.6691    29.9746  -2.424  0.01972 *  
ventEST:T12     2.7480     0.6342   4.333 8.96e-05 ***
ventNORD:T12   -1.6491     1.6058  -1.027  0.31033    
ventOUEST:T12   0.3407     1.2047   0.283  0.77871    
ventSUD:T12     5.3786     1.1497   4.678 3.00e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 14.71 on 42 degrees of freedom
Multiple R-squared:  0.6753,    Adjusted R-squared:  0.6212 
F-statistic: 12.48 on 7 and 42 DF,  p-value: 1.614e-08
mod2 <- lm(formula = O3 ~ vent + T12, data = ozone)
mod2b <- lm(formula = O3 ~ -1 + vent + T12, data = ozone)
mod3 <- lm(formula = O3 ~ vent:T12, data = ozone)
anova(mod2,mod1)
Analysis of Variance Table

Model 1: O3 ~ vent + T12
Model 2: O3 ~ vent + T12:vent
  Res.Df     RSS Df Sum of Sq      F   Pr(>F)   
1     45 12612.0                                
2     42  9087.4  3    3524.5 5.4298 0.003011 **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod3,mod1)
Analysis of Variance Table

Model 1: O3 ~ vent:T12
Model 2: O3 ~ vent + T12:vent
  Res.Df     RSS Df Sum of Sq      F  Pr(>F)  
1     45 11864.1                              
2     42  9087.4  3    2776.6 4.2776 0.01008 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(rstudent(mod2) ~ fitted(mod2),xlab="ychap",ylab="residus")

library(lattice)
xyplot(rstudent(mod2)~fitted(mod2)|vent,data = ozone, ylab="residus")

mod <- lm(formula = O3 ~ vent + T12 + T12:vent, data = ozone)
mod0 <- lm(formula = O3 ~ vent +T12 + T12:vent, data = ozone)
summary(mod0)

Call:
lm(formula = O3 ~ vent + T12 + T12:vent, data = ozone)

Residuals:
    Min      1Q  Median      3Q     Max 
-29.903  -9.163   1.153  10.319  32.638 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)    45.6090    13.9343   3.273  0.00213 ** 
ventNORD       61.0255    31.3061   1.949  0.05796 .  
ventOUEST      19.0751    28.2905   0.674  0.50384    
ventSUD       -72.6691    29.9746  -2.424  0.01972 *  
T12             2.7480     0.6342   4.333 8.96e-05 ***
ventNORD:T12   -4.3971     1.7265  -2.547  0.01462 *  
ventOUEST:T12  -2.4073     1.3614  -1.768  0.08429 .  
ventSUD:T12     2.6306     1.3130   2.004  0.05160 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 14.71 on 42 degrees of freedom
Multiple R-squared:  0.6753,    Adjusted R-squared:  0.6212 
F-statistic: 12.48 on 7 and 42 DF,  p-value: 1.614e-08

La hauteur des eucalyptus

eucalypt <- read.table("../donnees/eucalyptus.txt", header = T, sep = ";")
eucalypt[,"bloc"] <- as.factor(eucalypt[,"bloc"])
m.complet <- lm(ht ~ bloc - 1 + bloc:circ, data = eucalypt)
m.pente <- lm(ht ~ bloc - 1 + circ, data = eucalypt)
m.ordonne <- lm(ht ~ bloc:circ, data = eucalypt)
anova(m.pente, m.complet)
Analysis of Variance Table

Model 1: ht ~ bloc - 1 + circ
Model 2: ht ~ bloc - 1 + bloc:circ
  Res.Df    RSS Df Sum of Sq      F Pr(>F)
1   1425 2005.9                           
2   1423 2005.0  2   0.84752 0.3007 0.7403
anova(m.ordonne, m.complet)
Analysis of Variance Table

Model 1: ht ~ bloc:circ
Model 2: ht ~ bloc - 1 + bloc:circ
  Res.Df    RSS Df Sum of Sq      F Pr(>F)
1   1425 2009.2                           
2   1423 2005.0  2    4.1649 1.4779 0.2285
m.simple <- lm(ht ~ circ, data = eucalypt)
anova(m.simple, m.pente)
Analysis of Variance Table

Model 1: ht ~ circ
Model 2: ht ~ bloc - 1 + circ
  Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
1   1427 2052.1                                  
2   1425 2005.9  2    46.188 16.406 9.031e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ANOVA

mod1 <- lm(O3~vent-1,data=ozone)
summary(mod1)

Call:
lm(formula = O3 ~ vent - 1, data = ozone)

Residuals:
    Min      1Q  Median      3Q     Max 
-40.250 -13.950  -2.233  14.972  39.857 

Coefficients:
          Estimate Std. Error t value Pr(>|t|)    
ventEST    103.850      4.963   20.92  < 2e-16 ***
ventNORD    78.289      6.618   11.83 1.49e-15 ***
ventOUEST   71.578      4.680   15.30  < 2e-16 ***
ventSUD     94.343      7.504   12.57  < 2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 19.85 on 46 degrees of freedom
Multiple R-squared:  0.9547,    Adjusted R-squared:  0.9508 
F-statistic: 242.4 on 4 and 46 DF,  p-value: < 2.2e-16
anova(mod1)
Analysis of Variance Table

Response: O3
          Df Sum Sq Mean Sq F value    Pr(>F)    
vent       4 382244   95561  242.44 < 2.2e-16 ***
Residuals 46  18131     394                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod2 <- lm(O3 ~ vent, data = ozone)
anova(mod2)
Analysis of Variance Table

Response: O3
          Df  Sum Sq Mean Sq F value    Pr(>F)    
vent       3  9859.8  3286.6  8.3383 0.0001556 ***
Residuals 46 18131.4   394.2                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mod2)

Call:
lm(formula = O3 ~ vent, data = ozone)

Residuals:
    Min      1Q  Median      3Q     Max 
-40.250 -13.950  -2.233  14.972  39.857 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  103.850      4.963  20.923  < 2e-16 ***
ventNORD     -25.561      8.272  -3.090  0.00339 ** 
ventOUEST    -32.272      6.821  -4.731 2.16e-05 ***
ventSUD       -9.507      8.997  -1.057  0.29616    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 19.85 on 46 degrees of freedom
Multiple R-squared:  0.3522,    Adjusted R-squared:   0.31 
F-statistic: 8.338 on 3 and 46 DF,  p-value: 0.0001556
lm(O3 ~ C(vent,treatment), data = ozone)

Call:
lm(formula = O3 ~ C(vent, treatment), data = ozone)

Coefficients:
            (Intercept)   C(vent, treatment)NORD  C(vent, treatment)OUEST  
                103.850                  -25.561                  -32.272  
  C(vent, treatment)SUD  
                 -9.507  
lm(O3 ~ C(vent,base=2), data = ozone)

Call:
lm(formula = O3 ~ C(vent, base = 2), data = ozone)

Coefficients:
           (Intercept)    C(vent, base = 2)EST  C(vent, base = 2)OUEST  
                78.289                  25.561                  -6.711  
  C(vent, base = 2)SUD  
                16.054  
II <- length(levels(as.factor(ozone$vent)))
nI <- table(ozone$vent)
contraste<-matrix(rbind(diag(II-1),-nI[-II]/nI[II]),II,II-1)
mod3 <- lm(O3 ~ C(vent,contraste), data = ozone)
anova(mod3)
Analysis of Variance Table

Response: O3
                   Df  Sum Sq Mean Sq F value    Pr(>F)    
C(vent, contraste)  3  9859.8  3286.6  8.3383 0.0001556 ***
Residuals          46 18131.4   394.2                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mod3)

Call:
lm(formula = O3 ~ C(vent, contraste), data = ozone)

Residuals:
    Min      1Q  Median      3Q     Max 
-40.250 -13.950  -2.233  14.972  39.857 

Coefficients:
                    Estimate Std. Error t value Pr(>|t|)    
(Intercept)           86.300      2.808  30.737  < 2e-16 ***
C(vent, contraste)1   17.550      4.093   4.288 9.15e-05 ***
C(vent, contraste)2   -8.011      5.993  -1.337 0.187858    
C(vent, contraste)3  -14.722      3.744  -3.933 0.000281 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 19.85 on 46 degrees of freedom
Multiple R-squared:  0.3522,    Adjusted R-squared:   0.31 
F-statistic: 8.338 on 3 and 46 DF,  p-value: 0.0001556
mod4 <- lm(O3 ~ C(vent,sum), data = ozone) 
anova(mod4)
Analysis of Variance Table

Response: O3
             Df  Sum Sq Mean Sq F value    Pr(>F)    
C(vent, sum)  3  9859.8  3286.6  8.3383 0.0001556 ***
Residuals    46 18131.4   394.2                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(mod4)

Call:
lm(formula = O3 ~ C(vent, sum), data = ozone)

Residuals:
    Min      1Q  Median      3Q     Max 
-40.250 -13.950  -2.233  14.972  39.857 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)     87.015      3.027  28.743  < 2e-16 ***
C(vent, sum)1   16.835      4.635   3.632 0.000705 ***
C(vent, sum)2   -8.726      5.573  -1.566 0.124284    
C(vent, sum)3  -15.437      4.485  -3.442 0.001240 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 19.85 on 46 degrees of freedom
Multiple R-squared:  0.3522,    Adjusted R-squared:   0.31 
F-statistic: 8.338 on 3 and 46 DF,  p-value: 0.0001556
resid2 <- resid(mod2)
ozone$vent <- as.factor(ozone$vent)
plot(resid2~vent,data=ozone, ylab="residus")

plot(resid2 ~ jitter(fitted(mod2)),xlab="ychap",ylab="residus")

xyplot(resid2 ~ I(1:50)|vent, data=ozone,
       xlab="index", ylab="residus")

par(mfrow=c(1,2))
with(ozone, interaction.plot(vent, nebulosite, O3, col=1:2))
with(ozone, interaction.plot(nebulosite, vent, O3, col=1:4))

mod1 <- lm(O3 ~ vent + nebulosite + vent:nebulosite, data = ozone)
mod2 <- lm(O3 ~ vent + nebulosite, data = ozone)
anova(mod2, mod1)
Analysis of Variance Table

Model 1: O3 ~ vent + nebulosite
Model 2: O3 ~ vent + nebulosite + vent:nebulosite
  Res.Df   RSS Df Sum of Sq     F Pr(>F)
1     45 11730                          
2     42 11246  3    483.62 0.602 0.6173
mod3 <- lm(O3 ~ vent, data = ozone)
anova(mod3, mod2)
Analysis of Variance Table

Model 1: O3 ~ vent
Model 2: O3 ~ vent + nebulosite
  Res.Df   RSS Df Sum of Sq      F    Pr(>F)    
1     46 18131                                  
2     45 11730  1    6401.5 24.558 1.066e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod3, mod2, mod1)
Analysis of Variance Table

Model 1: O3 ~ vent
Model 2: O3 ~ vent + nebulosite
Model 3: O3 ~ vent + nebulosite + vent:nebulosite
  Res.Df   RSS Df Sum of Sq      F    Pr(>F)    
1     46 18131                                  
2     45 11730  1    6401.5 23.907 1.523e-05 ***
3     42 11246  3     483.6  0.602    0.6173    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(mod1)
Analysis of Variance Table

Response: O3
                Df  Sum Sq Mean Sq F value    Pr(>F)    
vent             3  9859.8  3286.6  12.274 6.689e-06 ***
nebulosite       1  6401.5  6401.5  23.907 1.523e-05 ***
vent:nebulosite  3   483.6   161.2   0.602    0.6173    
Residuals       42 11246.2   267.8                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1