Unusual observations
Data Cars2004
Dataset including the hybrid cars:
data(Cars2004, package = "mffSM")
head(Cars2004)
## vname type drive hybrid price.retail price.dealer price cons.city
## 1 Chevrolet.Aveo.4dr 1 1 0 11690 10965 11327.5 8.4
## 2 Chevrolet.Aveo.LS.4dr.hatch 1 1 0 12585 11802 12193.5 8.4
## 3 Chevrolet.Cavalier.2dr 1 1 0 14610 13697 14153.5 9.0
## 4 Chevrolet.Cavalier.4dr 1 1 0 14810 13884 14347.0 9.0
## 5 Chevrolet.Cavalier.LS.2dr 1 1 0 16385 15357 15871.0 9.0
## 6 Dodge.Neon.SE.4dr 1 1 0 13670 12849 13259.5 8.1
## cons.highway consumption engine.size ncylinder horsepower weight iweight lweight wheel.base
## 1 6.9 7.65 1.6 4 103 1075 0.0009302326 6.980076 249
## 2 6.9 7.65 1.6 4 103 1065 0.0009389671 6.970730 249
## 3 6.4 7.70 2.2 4 140 1187 0.0008424600 7.079184 264
## 4 6.4 7.70 2.2 4 140 1214 0.0008237232 7.101676 264
## 5 6.4 7.70 2.2 4 140 1187 0.0008424600 7.079184 264
## 6 6.5 7.30 2.0 4 132 1171 0.0008539710 7.065613 267
## length width ftype fdrive fhybrid
## 1 424 168 personal front No
## 2 389 168 personal front No
## 3 465 175 personal front No
## 4 465 173 personal front No
## 5 465 175 personal front No
## 6 442 170 personal front No
dim(Cars2004)
## [1] 428 22
summary(Cars2004)
## vname type drive hybrid price.retail
## Length:428 Min. :1.00 Min. :1.000 Min. :0.000000 Min. : 10280
## Class :character 1st Qu.:1.00 1st Qu.:1.000 1st Qu.:0.000000 1st Qu.: 20334
## Mode :character Median :1.00 Median :1.000 Median :0.000000 Median : 27635
## Mean :2.21 Mean :1.687 Mean :0.007009 Mean : 32775
## 3rd Qu.:3.00 3rd Qu.:2.000 3rd Qu.:0.000000 3rd Qu.: 39205
## Max. :6.00 Max. :3.000 Max. :1.000000 Max. :192465
##
## price.dealer price cons.city cons.highway consumption engine.size
## Min. : 9875 Min. : 10078 Min. : 3.90 Min. : 3.600 Min. : 3.75 Min. :1.300
## 1st Qu.: 18866 1st Qu.: 19572 1st Qu.:11.20 1st Qu.: 8.100 1st Qu.: 9.65 1st Qu.:2.375
## Median : 25294 Median : 26426 Median :12.40 Median : 9.000 Median :10.65 Median :3.000
## Mean : 30015 Mean : 31395 Mean :12.31 Mean : 9.107 Mean :10.71 Mean :3.197
## 3rd Qu.: 35710 3rd Qu.: 37500 3rd Qu.:13.80 3rd Qu.: 9.800 3rd Qu.:11.64 3rd Qu.:3.900
## Max. :173560 Max. :183012 Max. :23.50 Max. :19.600 Max. :21.55 Max. :8.300
## NA's :14 NA's :14 NA's :14
## ncylinder horsepower weight iweight lweight
## Min. :-1.000 Min. : 73.0 Min. : 839 Min. :0.0003067 Min. :6.732
## 1st Qu.: 4.000 1st Qu.:165.0 1st Qu.:1407 1st Qu.:0.0005547 1st Qu.:7.249
## Median : 6.000 Median :210.0 Median :1576 Median :0.0006345 Median :7.363
## Mean : 5.776 Mean :215.9 Mean :1623 Mean :0.0006431 Mean :7.370
## 3rd Qu.: 6.000 3rd Qu.:255.0 3rd Qu.:1803 3rd Qu.:0.0007106 3rd Qu.:7.497
## Max. :12.000 Max. :500.0 Max. :3261 Max. :0.0011919 Max. :8.090
## NA's :2 NA's :2 NA's :2
## wheel.base length width ftype fdrive fhybrid
## Min. :226.0 Min. :363.0 Min. :163 personal:245 front:226 No :425
## 1st Qu.:262.0 1st Qu.:450.0 1st Qu.:175 wagon : 30 rear :110 Yes: 3
## Median :272.0 Median :472.0 Median :180 SUV : 60 4x4 : 92
## Mean :274.8 Mean :470.2 Mean :181 pickup : 24
## 3rd Qu.:284.0 3rd Qu.:490.0 3rd Qu.:185 sport : 49
## Max. :366.0 Max. :577.0 Max. :206 minivan : 20
## NA's :2 NA's :26 NA's :28
To be able to compare a model fitted here with other models where also other covariates will be included, we restrict ourselves to a subset of the dataset where all variables consumption
, lweight
and engine.size
are known.
isComplete <- complete.cases(Cars2004[, c("consumption", "lweight", "engine.size")])
sum(!isComplete)
## [1] 16
CarsUsed <- subset(Cars2004, isComplete, select = c("vname", "fhybrid", "consumption", "weight", "lweight"))
dim(CarsUsed)
## [1] 412 5
summary(CarsUsed)
## vname fhybrid consumption weight lweight
## Length:412 No :409 Min. : 3.750 Min. : 839 Min. :6.732
## Class :character Yes: 3 1st Qu.: 9.625 1st Qu.:1413 1st Qu.:7.253
## Mode :character Median :10.650 Median :1577 Median :7.363
## Mean :10.704 Mean :1618 Mean :7.369
## 3rd Qu.:11.650 3rd Qu.:1800 3rd Qu.:7.496
## Max. :21.550 Max. :2903 Max. :7.973
consumption
on ldrive
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ lweight, data = CarsUsed, pch = PCH, col = COL, bg = BGC,
xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]")
#lines(lowess(CarsUsed[, "lweight"], CarsUsed[, "consumption"]), col = "blue", lwd = 2)
m1 <- lm(consumption ~ lweight, data = CarsUsed)
summary(m1)
##
## Call:
## lm(formula = consumption ~ lweight, data = CarsUsed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -4.5974 -0.7224 -0.1317 0.5322 5.0968
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -59.3329 1.9306 -30.73 <2e-16 ***
## lweight 9.5048 0.2619 36.29 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.071 on 410 degrees of freedom
## Multiple R-squared: 0.7626, Adjusted R-squared: 0.762
## F-statistic: 1317 on 1 and 410 DF, p-value: < 2.2e-16
(Ybar <- round(with(CarsUsed, mean(consumption)), 2))
## [1] 10.7
(be0 <- round(coef(m1)[1], 2))
## (Intercept)
## -59.33
(be1 <- round(coef(m1)[2], 4))
## lweight
## 9.5048
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ lweight, data = CarsUsed, pch = PCH, col = COL2, bg = BGC2,
xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]")
abline(m1, col = "red2", lwd = 2)
h <- hatvalues(m1)
print(h[1:10])
## 1 2 3 4 5 6 7 8
## 0.011453373 0.011892770 0.007436292 0.006688146 0.007436292 0.007916965 0.007320542 0.007494823
## 9 10
## 0.007583444 0.007583444
m <- 1 - hatvalues(m1)
print(m[1:10])
## 1 2 3 4 5 6 7 8 9 10
## 0.9885466 0.9881072 0.9925637 0.9933119 0.9925637 0.9920830 0.9926795 0.9925052 0.9924166 0.9924166
u <- residuals(m1)
print(u[1:10])
## 1 2 3 4 5 6 7 8 9
## 0.6390508 0.7278809 -0.2529504 -0.4667273 -0.2529504 -0.5239612 -0.6849261 0.1130778 -0.3128290
## 10
## 0.1371710
ustd <- rstandard(m1)
print(ustd[1:10])
## 1 2 3 4 5 6 7 8 9
## 0.6000037 0.6835580 -0.2370136 -0.4371570 -0.2370136 -0.4910686 -0.6417358 0.1059566 -0.2931414
## 10
## 0.1285382
Tt <- rstudent(m1)
print(Tt[1:10])
## 1 2 3 4 5 6 7 8 9
## 0.5995348 0.6831133 -0.2367406 -0.4367254 -0.2367406 -0.4906137 -0.6412749 0.1058288 -0.2928143
## 10
## 0.1283840
= LSE's of the observation-specific corrections of the response expectation in the outlier models.
gamma <- residuals(m1) / (1 - hatvalues(m1))
print(gamma[1:10])
## 1 2 3 4 5 6 7 8 9
## 0.6464549 0.7366416 -0.2548455 -0.4698699 -0.2548455 -0.5281424 -0.6899771 0.1139317 -0.3152195
## 10
## 0.1382192
CarsUsed[, "h"] <- hatvalues(m1)
CarsUsed[, "m"] <- 1 - hatvalues(m1)
CarsUsed[, "u"] <- residuals(m1)
CarsUsed[, "ustd"] <- rstandard(m1)
CarsUsed[, "gamma"] <- CarsUsed[, "u"] / CarsUsed[, "m"]
CarsUsed[, "Tt"] <- rstudent(m1)
#
head(CarsUsed)
## vname fhybrid consumption weight lweight h m u
## 1 Chevrolet.Aveo.4dr No 7.65 1075 6.980076 0.011453373 0.9885466 0.6390508
## 2 Chevrolet.Aveo.LS.4dr.hatch No 7.65 1065 6.970730 0.011892770 0.9881072 0.7278809
## 3 Chevrolet.Cavalier.2dr No 7.70 1187 7.079184 0.007436292 0.9925637 -0.2529504
## 4 Chevrolet.Cavalier.4dr No 7.70 1214 7.101676 0.006688146 0.9933119 -0.4667273
## 5 Chevrolet.Cavalier.LS.2dr No 7.70 1187 7.079184 0.007436292 0.9925637 -0.2529504
## 6 Dodge.Neon.SE.4dr No 7.30 1171 7.065613 0.007916965 0.9920830 -0.5239612
## ustd gamma Tt
## 1 0.6000037 0.6464549 0.5995348
## 2 0.6835580 0.7366416 0.6831133
## 3 -0.2370136 -0.2548455 -0.2367406
## 4 -0.4371570 -0.4698699 -0.4367254
## 5 -0.2370136 -0.2548455 -0.2367406
## 6 -0.4910686 -0.5281424 -0.4906137
ind <- order(abs(CarsUsed[, "Tt"]), decreasing = TRUE)
print(CarsUsed[ind,][1:5,])
## vname fhybrid consumption weight lweight h
## 305 Hummer.H2 No 21.55 2903 7.973500 0.024295022
## 94 Toyota.Prius.4dr.(gas/electric) Yes 4.30 1311 7.178545 0.004587768
## 348 Land.Rover.Discovery.SE No 17.15 2076 7.638198 0.006769897
## 97 Volkswagen.Jetta.GLS.TDI.4dr No 5.65 1362 7.216709 0.003807403
## 69 Honda.Civic.Hybrid.4dr.manual.(gas/electric) Yes 4.85 1239 7.122060 0.006062351
## m u ustd gamma Tt
## 305 0.9757050 5.096802 4.816766 5.223712 4.953073
## 94 0.9954122 -4.597353 -4.301535 -4.618542 -4.396641
## 348 0.9932301 3.883762 3.637849 3.910233 3.693509
## 97 0.9961926 -3.610092 -3.376477 -3.623890 -3.420244
## 69 0.9939376 -3.510471 -3.287024 -3.531883 -3.327145
#
print(CarsUsed[ind, 1:5][1:5,])
## vname fhybrid consumption weight lweight
## 305 Hummer.H2 No 21.55 2903 7.973500
## 94 Toyota.Prius.4dr.(gas/electric) Yes 4.30 1311 7.178545
## 348 Land.Rover.Discovery.SE No 17.15 2076 7.638198
## 97 Volkswagen.Jetta.GLS.TDI.4dr No 5.65 1362 7.216709
## 69 Honda.Civic.Hybrid.4dr.manual.(gas/electric) Yes 4.85 1239 7.122060
print(CarsUsed[ind, -(1:5)][1:5,])
## h m u ustd gamma Tt
## 305 0.024295022 0.9757050 5.096802 4.816766 5.223712 4.953073
## 94 0.004587768 0.9954122 -4.597353 -4.301535 -4.618542 -4.396641
## 348 0.006769897 0.9932301 3.883762 3.637849 3.910233 3.693509
## 97 0.003807403 0.9961926 -3.610092 -3.376477 -3.623890 -3.420244
## 69 0.006062351 0.9939376 -3.510471 -3.287024 -3.531883 -3.327145
xshow <- CarsUsed[ind,][1:5, "lweight"]
yshow <- CarsUsed[ind,][1:5, "consumption"]
tshow <- rownames(CarsUsed[ind,])[1:5]
#
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ lweight, data = CarsUsed, pch = PCH, col = COL2, bg = BGC2,
xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]", ylim = c(3.75, 22))
abline(m1, col = "red2", lwd = 2)
text(7.5, 8, labels = eval(substitute(expression(paste(hat(E), "(Y|X=x) = ", be0, " + ", be1, "x", sep="")),
list(be0 = be0, be1 = be1))), pos = 4)
text(7.5, 7, labels = eval(substitute(expression(paste(bar(Y), " = ", Ybar, sep="")),
list(Ybar = Ybar))), pos = 4)
text(xshow, yshow, labels = tshow, pos = 3)
PvalUnadj <- 2 * pt(-abs(rstudent(m1)), df = m1[["df.residual"]] - 1)
min(PvalUnadj)
## [1] 1.070856e-06
sum(PvalUnadj < 0.05)
## [1] 23
PvalBonf <- p.adjust(PvalUnadj, method = "bonferroni")
min(PvalBonf)
## [1] 0.0004411927
sum(PvalBonf < 0.05)
## [1] 2
CarsUsed <- transform(CarsUsed, PvalUnadj = PvalUnadj,
PvalBonf = PvalBonf)
print(subset(CarsUsed, PvalBonf < 0.05, select = c("vname", "fhybrid", "consumption", "gamma", "Tt", "PvalUnadj", "PvalBonf")))
## vname fhybrid consumption gamma Tt PvalUnadj
## 94 Toyota.Prius.4dr.(gas/electric) Yes 4.30 -4.618542 -4.396641 1.403358e-05
## 305 Hummer.H2 No 21.55 5.223712 4.953073 1.070856e-06
## PvalBonf
## 94 0.0057818340
## 305 0.0004411927
print(CarsUsed[ind, c("vname", "fhybrid", "consumption", "gamma", "Tt", "PvalUnadj", "PvalBonf")][1:5,])
## vname fhybrid consumption gamma Tt
## 305 Hummer.H2 No 21.55 5.223712 4.953073
## 94 Toyota.Prius.4dr.(gas/electric) Yes 4.30 -4.618542 -4.396641
## 348 Land.Rover.Discovery.SE No 17.15 3.910233 3.693509
## 97 Volkswagen.Jetta.GLS.TDI.4dr No 5.65 -3.623890 -3.420244
## 69 Honda.Civic.Hybrid.4dr.manual.(gas/electric) Yes 4.85 -3.531883 -3.327145
## PvalUnadj PvalBonf
## 305 1.070856e-06 0.0004411927
## 94 1.403358e-05 0.0057818340
## 348 2.512115e-04 0.1034991460
## 97 6.885730e-04 0.2836920724
## 69 9.567628e-04 0.3941862851
xout <- subset(CarsUsed, PvalBonf < 0.05)[, "lweight"]
yout <- subset(CarsUsed, PvalBonf < 0.05)[, "consumption"]
#
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ lweight, data = CarsUsed, pch = PCH, col = COL3, bg = BGC3,
xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]", ylim = c(3.75, 22))
abline(m1, col = "red2", lwd = 2)
text(7.5, 8, labels = eval(substitute(expression(paste(hat(E), "(Y|X=x) = ", be0, " + ", be1, "x", sep="")),
list(be0 = be0, be1 = be1))), pos = 4)
text(7.5, 7, labels = eval(substitute(expression(paste(bar(Y), " = ", Ybar, sep="")),
list(Ybar = Ybar))), pos = 4)
text(xshow, yshow, labels = tshow, pos = 3)
points(xout, yout, pch = PCH, col = "orange", bg = "red", cex = 2)
inflMeas <- influence.measures(m1)
#print(inflMeas) ## full table with some stars
print(inflMeas$infmat[1:10,]) ## the first 10 rows without stars
## dfb.1_ dfb.lwgh dffit cov.r cook.d hat
## 1 0.058079251 -0.057288572 0.064533096 1.014754 2.085519e-03 0.011453373
## 2 0.067760218 -0.066859700 0.074943193 1.014674 2.811899e-03 0.011892770
## 3 -0.017131716 0.016817978 -0.020491409 1.012147 2.104334e-04 0.007436292
## 4 -0.029182966 0.028603518 -0.035835916 1.010719 6.433764e-04 0.006688146
## 5 -0.017131716 0.016817978 -0.020491409 1.012147 2.104334e-04 0.007436292
## 6 -0.037145548 0.036495821 -0.043827329 1.011724 9.621993e-04 0.007916965
## 7 -0.045873896 0.045023905 -0.055069523 1.010274 1.518507e-03 0.007320542
## 8 0.007702297 -0.007562061 0.009196404 1.012429 4.238916e-05 0.007494823
## 9 -0.021494294 0.021106330 -0.025596382 1.012150 3.283195e-04 0.007583444
## 10 0.009424138 -0.009254036 0.011222692 1.012493 6.312584e-05 0.007583444
summary(influence.measures(m1))
## Potentially influential observations of
## lm(formula = consumption ~ lweight, data = CarsUsed) :
##
## dfb.1_ dfb.lwgh dffit cov.r cook.d hat
## 1 0.06 -0.06 0.06 1.01_* 0.00 0.01
## 2 0.07 -0.07 0.07 1.01_* 0.00 0.01
## 17 0.07 -0.07 0.07 1.01_* 0.00 0.01
## 39 -0.01 0.01 -0.01 1.02_* 0.00 0.01
## 47 0.07 -0.07 0.07 1.02_* 0.00 0.02_*
## 48 0.09 -0.09 0.10 1.02_* 0.00 0.02_*
## 49 0.06 -0.06 0.06 1.02_* 0.00 0.02_*
## 69 -0.21 0.20 -0.26_* 0.96_* 0.03 0.01
## 70 -0.14 0.14 -0.14 1.03_* 0.01 0.03_*
## 94 -0.21 0.20 -0.30_* 0.92_* 0.04 0.00
## 97 -0.13 0.13 -0.21_* 0.95_* 0.02 0.00
## 204 -0.05 0.06 0.14 0.98_* 0.01 0.00
## 270 0.20 -0.20 0.22_* 0.99 0.02 0.01
## 271 0.20 -0.20 0.22_* 0.99 0.02 0.01
## 278 0.05 -0.04 0.12 0.98_* 0.01 0.00
## 294 0.21 -0.21 0.23_* 1.00 0.03 0.02_*
## 295 -0.02 0.02 0.02 1.02_* 0.00 0.01
## 301 0.00 0.00 -0.01 1.02_* 0.00 0.01
## 302 0.00 0.00 0.00 1.01_* 0.00 0.01
## 304 0.01 -0.01 -0.01 1.03_* 0.00 0.02_*
## 305 -0.73 0.74 0.78_* 0.92_* 0.29 0.02_*
## 307 0.02 -0.02 -0.03 1.02_* 0.00 0.02_*
## 309 -0.06 0.07 0.07 1.02_* 0.00 0.01
## 321 -0.23 0.24 0.26_* 0.99 0.03 0.01
## 323 -0.08 0.09 0.09 1.02_* 0.00 0.02_*
## 326 -0.26 0.26 0.29_* 0.99 0.04 0.01
## 339 0.04 -0.04 -0.05 1.01_* 0.00 0.01
## 341 0.13 -0.13 0.18 0.98_* 0.02 0.00
## 347 -0.01 0.01 0.12 0.98_* 0.01 0.00
## 348 -0.24 0.24 0.30_* 0.95_* 0.05 0.01
## 375 -0.01 0.01 -0.01 1.02_* 0.00 0.01
## 405 -0.04 0.04 0.04 1.02_* 0.00 0.02_*
## 406 0.04 -0.04 -0.04 1.02_* 0.00 0.02_*
## 415 0.00 0.00 0.00 1.02_* 0.00 0.01
## 422 -0.01 0.02 0.15 0.97_* 0.01 0.00
## 424 -0.03 0.03 0.03 1.02_* 0.00 0.01
(r <- m1[["rank"]])
## [1] 2
(n <- m1[["df.residual"]] + r)
## [1] 412
h <- hatvalues(m1)
print(h[1:10])
## 1 2 3 4 5 6 7 8
## 0.011453373 0.011892770 0.007436292 0.006688146 0.007436292 0.007916965 0.007320542 0.007494823
## 9 10
## 0.007583444 0.007583444
3 * r / n
## [1] 0.01456311
sum(hatvalues(m1) > 3 * r / n)
## [1] 11
isLeverage <- (hatvalues(m1) > 3 * r /n)
subset(CarsUsed, isLeverage, select = c("vname", "consumption", "weight", "lweight", "h"))
## vname consumption weight lweight h
## 47 Toyota.Echo.2dr.manual 6.10 923 6.827629 0.01992471
## 48 Toyota.Echo.2dr.auto 6.55 946 6.852243 0.01836889
## 49 Toyota.Echo.4dr 6.10 932 6.837333 0.01930270
## 70 Honda.Insight.2dr.(gas/electric) 3.75 839 6.732211 0.02664081
## 294 Toyota.MR2.Spyder.convertible.2dr 8.20 996 6.903747 0.01534760
## 304 GMC.Yukon.XL.2500.SLT 15.95 2782 7.930925 0.02132481
## 305 Hummer.H2 21.55 2903 7.973500 0.02429502
## 307 Lincoln.Navigator.Luxury 15.60 2707 7.903596 0.01953240
## 323 Lexus.LX.470 15.95 2536 7.838343 0.01561382
## 405 Cadillac.Escalade.EXT 15.95 2667 7.888710 0.01859360
## 406 Chevrolet.Avalanche.1500 14.95 2575 7.853605 0.01648470
xlev <- subset(CarsUsed, isLeverage)[, "lweight"]
ylev <- subset(CarsUsed, isLeverage)[, "consumption"]
tlev <- rownames(subset(CarsUsed, isLeverage))
#
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ lweight, data = CarsUsed, pch = PCH, col = COL3, bg = BGC3,
xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]", ylim = c(3.75, 22))
abline(m1, col = "red2", lwd = 2)
text(7.5, 8, labels = eval(substitute(expression(paste(hat(E), "(Y|X=x) = ", be0, " + ", be1, "x", sep="")),
list(be0 = be0, be1 = be1))), pos = 4)
text(7.5, 7, labels = eval(substitute(expression(paste(bar(Y), " = ", Ybar, sep="")),
list(Ybar = Ybar))), pos = 4)
text(xlev, ylev, labels = tlev, pos = 3)
points(xlev, ylev, pch = PCH, col = "orange", bg = "red", cex = 2)
dfbts <- dfbetas(m1)
print(dfbts[1:10,])
## (Intercept) lweight
## 1 0.058079251 -0.057288572
## 2 0.067760218 -0.066859700
## 3 -0.017131716 0.016817978
## 4 -0.029182966 0.028603518
## 5 -0.017131716 0.016817978
## 6 -0.037145548 0.036495821
## 7 -0.045873896 0.045023905
## 8 0.007702297 -0.007562061
## 9 -0.021494294 0.021106330
## 10 0.009424138 -0.009254036
sum(abs(dfbetas(m1)[, 1]) > 1)
## [1] 0
sum(abs(dfbetas(m1)[, 2]) > 1)
## [1] 0
apply(abs(dfbetas(m1)), 2, max)
## (Intercept) lweight
## 0.7344821 0.7415123
dffts <- dffits(m1)
print(dffts[1:10])
## 1 2 3 4 5 6 7
## 0.064533096 0.074943193 -0.020491409 -0.035835916 -0.020491409 -0.043827329 -0.055069523
## 8 9 10
## 0.009196404 -0.025596382 0.011222692
3 * sqrt(r / (n-r))
## [1] 0.2095291
sum(abs(dffits(m1)) > 3 * sqrt(r / (n-r)))
## [1] 10
CarsUsed[, "dffits"] <- dffits(m1)
isDFFITS <- (abs(dffits(m1)) > 3 * sqrt(r / (n-r)))
subset(CarsUsed, isDFFITS, select = c("vname", "consumption", "weight", "lweight", "dffits"))
## vname consumption weight lweight dffits
## 69 Honda.Civic.Hybrid.4dr.manual.(gas/electric) 4.85 1239 7.122060 -0.2598440
## 94 Toyota.Prius.4dr.(gas/electric) 4.30 1311 7.178545 -0.2984834
## 97 Volkswagen.Jetta.GLS.TDI.4dr 5.65 1362 7.216709 -0.2114462
## 270 Mazda.MX-5.Miata.convertible.2dr 9.30 1083 6.987490 0.2216790
## 271 Mazda.MX-5.Miata.LS.convertible.2dr 9.30 1083 6.987490 0.2216790
## 294 Toyota.MR2.Spyder.convertible.2dr 8.20 996 6.903747 0.2254823
## 305 Hummer.H2 21.55 2903 7.973500 0.7815812
## 321 Land.Rover.Range.Rover.HSE 17.15 2440 7.799753 0.2597672
## 326 Mercedes-Benz.G500 17.45 2460 7.807917 0.2892681
## 348 Land.Rover.Discovery.SE 17.15 2076 7.638198 0.3049335
We also include the fitted line from a model without observation 305 (with the highest DFFITS).
xdffits <- subset(CarsUsed, isDFFITS)[, "lweight"]
ydffits <- subset(CarsUsed, isDFFITS)[, "consumption"]
tdffits <- rownames(subset(CarsUsed, isDFFITS))
#
m1without305 <- lm(consumption ~ lweight, data = subset(CarsUsed, vname != "Hummer.H2"))
#
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ lweight, data = CarsUsed, pch = PCH, col = COL3, bg = BGC3,
xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]", ylim = c(3.75, 22))
abline(m1, col = "red2", lwd = 2)
text(7.5, 8, labels = eval(substitute(expression(paste(hat(E), "(Y|X=x) = ", be0, " + ", be1, "x", sep="")),
list(be0 = be0, be1 = be1))), pos = 4)
text(7.5, 7, labels = eval(substitute(expression(paste(bar(Y), " = ", Ybar, sep="")),
list(Ybar = Ybar))), pos = 4)
text(xdffits, ydffits, labels = tdffits, pos = 3)
points(xdffits, ydffits, pch = PCH, col = "orange", bg = "red", cex = 2)
#
abline(m1without305, col = "darkgreen", lwd = 2)
legend(6.7, 22, legend = paste("Fit without obs. 305 with the highest DFFITS value", sep = ""), lty = 1, col = "darkgreen", lwd = 2)
cookd <- cooks.distance(m1)
print(cookd[1:10])
## 1 2 3 4 5 6 7
## 2.085519e-03 2.811899e-03 2.104334e-04 6.433764e-04 2.104334e-04 9.621993e-04 1.518507e-03
## 8 9 10
## 4.238916e-05 3.283195e-04 6.312584e-05
qf(0.50, r, n-r)
## [1] 0.6943203
sum(cooks.distance(m1) > qf(0.50, r, n-r))
## [1] 0
max(cooks.distance(m1))
## [1] 0.288855
covrt <- covratio(m1)
print(covrt[1:10])
## 1 2 3 4 5 6 7 8 9 10
## 1.014754 1.014674 1.012147 1.010719 1.012147 1.011724 1.010274 1.012429 1.012150 1.012493
3 * (r / (n-r))
## [1] 0.01463415
sum(abs(1 - covratio(m1)) > 3 * (r / (n-r)))
## [1] 31
sum(abs(1 - covratio(m1)) > 3 * (r / (n-r)))
## [1] 31
CarsUsed[, "covratio"] <- covratio(m1)
isCOVRATIO <- (abs(1 - covratio(m1)) > 3 * (r / (n-r)))
subset(CarsUsed, isCOVRATIO, select = c("vname", "consumption", "weight", "lweight", "covratio"))
## vname consumption weight lweight covratio
## 1 Chevrolet.Aveo.4dr 7.65 1075 6.980076 1.0147544
## 2 Chevrolet.Aveo.LS.4dr.hatch 7.65 1065 6.970730 1.0146741
## 17 Hyundai.Accent.GT.2dr.hatch 7.60 1061 6.966967 1.0149481
## 39 Scion.xA.4dr.hatch 6.80 1061 6.966967 1.0171433
## 47 Toyota.Echo.2dr.manual 6.10 923 6.827629 1.0240384
## 48 Toyota.Echo.2dr.auto 6.55 946 6.852243 1.0211810
## 49 Toyota.Echo.4dr 6.10 932 6.837333 1.0237925
## 69 Honda.Civic.Hybrid.4dr.manual.(gas/electric) 4.85 1239 7.122060 0.9584411
## 70 Honda.Insight.2dr.(gas/electric) 3.75 839 6.732211 1.0287100
## 94 Toyota.Prius.4dr.(gas/electric) 4.30 1311 7.178545 0.9204641
## 97 Volkswagen.Jetta.GLS.TDI.4dr 5.65 1362 7.216709 0.9534180
## 204 Audi-S4-Quattro.4dr 14.30 1735 7.458763 0.9758488
## 278 Mercedes-Benz.SLK32.AMG.2dr 12.25 1461 7.286876 0.9846960
## 295 Cadillac.Escaladet 14.95 2434 7.797291 1.0184249
## 301 Ford.Expedition-4.6-XLT 14.05 2268 7.726654 1.0151225
## 302 GMC.Envoy.XUV.SLE 14.05 2243 7.715570 1.0146477
## 304 GMC.Yukon.XL.2500.SLT 15.95 2782 7.930925 1.0267488
## 305 Hummer.H2 21.55 2903 7.973500 0.9166531
## 307 Lincoln.Navigator.Luxury 15.60 2707 7.903596 1.0247566
## 309 Toyota.Sequoia.SR5 15.30 2390 7.779049 1.0154956
## 323 Lexus.LX.470 15.95 2536 7.838343 1.0181450
## 339 Volkswagen.Touareg.V6 13.75 2307 7.743703 1.0147275
## 341 Chevrolet.Tracker 11.55 1300 7.170120 0.9777751
## 347 Jeep.Wrangler.Sahara.convertible.2dr 13.55 1622 7.391415 0.9779122
## 348 Land.Rover.Discovery.SE 17.15 2076 7.638198 0.9474854
## 375 Scion.xB 7.15 1100 7.003065 1.0154466
## 405 Cadillac.Escalade.EXT 15.95 2667 7.888710 1.0235282
## 406 Chevrolet.Avalanche.1500 14.95 2575 7.853605 1.0211552
## 415 Ford.F-150.Supercab.Lariat 14.95 2478 7.815207 1.0195227
## 422 Mazda.B4000.SE.Cab.Plus 14.05 1620 7.390181 0.9654596
## 424 Nissan.Titan.King.Cab.XE 14.95 2398 7.782390 1.0173502
xcovratio <- subset(CarsUsed, isCOVRATIO)[, "lweight"]
ycovratio <- subset(CarsUsed, isCOVRATIO)[, "consumption"]
tcovratio <- rownames(subset(CarsUsed, isCOVRATIO))
#
par(mfrow = c(1, 1), bty = BTY, mar = c(4, 4, 1, 1) + 0.1)
plot(consumption ~ lweight, data = CarsUsed, pch = PCH, col = COL3, bg = BGC3,
xlab = "Log(weight) [log(kg)]", ylab = "Consumption [l/100 km]", ylim = c(3.75, 22))
abline(m1, col = "red2", lwd = 2)
text(7.5, 8, labels = eval(substitute(expression(paste(hat(E), "(Y|X=x) = ", be0, " + ", be1, "x", sep="")),
list(be0 = be0, be1 = be1))), pos = 4)
text(7.5, 7, labels = eval(substitute(expression(paste(bar(Y), " = ", Ybar, sep="")),
list(Ybar = Ybar))), pos = 4)
text(xcovratio, ycovratio, labels = tcovratio, pos = 3)
points(xcovratio, ycovratio, pch = PCH, col = "orange", bg = "red", cex = 2)
plot.lm
par(mfrow = c(1, 1), mar = c(5, 4, 1, 1) + 0.1)
plot(m1, which = 4, col = "blue4")
par(mfrow = c(1, 1), mar = c(5, 4, 1, 1) + 0.1)
plot(m1, which = 5, pch = 21, col = "blue4", bg = "skyblue")
par(mfrow = c(1, 1), mar = c(5, 4, 2, 1) + 0.1)
plot(m1, which = 6, pch = 21, col = "blue4", bg = "skyblue")