ISLR Home

Question

p299

Fit some of the non-linear models investigated in this chapter to the Auto data set. Is there evidence for non-linear relationships in this data set? Create some informative plots to justify your answer.


glimpse(Auto)
## Rows: 392
## Columns: 9
## $ mpg          <dbl> 18, 15, 18, 16, 17, 15, 14, 14, 14, 15, 15, 14, 15, 14, …
## $ cylinders    <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 6, 6, 6, 4,…
## $ displacement <dbl> 307, 350, 318, 304, 302, 429, 454, 440, 455, 390, 383, 3…
## $ horsepower   <dbl> 130, 165, 150, 150, 140, 198, 220, 215, 225, 190, 170, 1…
## $ weight       <dbl> 3504, 3693, 3436, 3433, 3449, 4341, 4354, 4312, 4425, 38…
## $ acceleration <dbl> 12.0, 11.5, 11.0, 12.0, 10.5, 10.0, 9.0, 8.5, 10.0, 8.5,…
## $ year         <dbl> 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, …
## $ origin       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 3,…
## $ name         <fct> chevrolet chevelle malibu, buick skylark 320, plymouth s…
library(skimr)
skim(Auto)
Data summary
Name Auto
Number of rows 392
Number of columns 9
_______________________
Column type frequency:
factor 1
numeric 8
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
name 0 1 FALSE 301 amc: 5, for: 5, toy: 5, amc: 4

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
mpg 0 1 23.45 7.81 9 17.00 22.75 29.00 46.6 ▆▇▆▃▁
cylinders 0 1 5.47 1.71 3 4.00 4.00 8.00 8.0 ▇▁▃▁▅
displacement 0 1 194.41 104.64 68 105.00 151.00 275.75 455.0 ▇▂▂▃▁
horsepower 0 1 104.47 38.49 46 75.00 93.50 126.00 230.0 ▆▇▃▁▁
weight 0 1 2977.58 849.40 1613 2225.25 2803.50 3614.75 5140.0 ▇▇▅▅▂
acceleration 0 1 15.54 2.76 8 13.78 15.50 17.02 24.8 ▁▆▇▂▁
year 0 1 75.98 3.68 70 73.00 76.00 79.00 82.0 ▇▆▇▆▇
origin 0 1 1.58 0.81 1 1.00 1.00 2.00 3.0 ▇▁▂▁▂
## The following object is masked from package:ggplot2:
## 
##     mpg

names(Auto)
## [1] "mpg"          "cylinders"    "displacement" "horsepower"   "weight"      
## [6] "acceleration" "year"         "origin"       "name"

Cylinders vs MPG

g1 <- ggplot(Auto, aes(x = cylinders, y = mpg, group = cylinders)) + geom_boxplot() + theme(legend.position = "none")
g1

Displacement vs MPG

ggplot(Auto, aes(x = displacement, y = mpg)) +
  geom_point(alpha = 0.5) +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Horsepower vs MPG

ggplot(Auto, aes(x = horsepower, y = mpg)) +
  geom_point(alpha = 0.5) +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Weight vs MPG

ggplot(Auto, aes(x = weight, y = mpg)) +
  geom_point(alpha = 0.5) +
  geom_smooth()
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Acceleration vs MPG

ggplot(Auto, aes(x = acceleration, y = mpg)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "loess", formula = y ~ x)

Year vs MPG

ggplot(Auto, aes(x = year, y = mpg)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "loess", formula = y ~ x) # Default params

Origin vs MPG

ggplot(Auto, aes(x = origin, y = mpg, group=origin)) +
  geom_boxplot()# + theme(legend.position = "none")

GAM - Generalize Additive Models

library(gam)
# gam.fit = gam(mpg ~ cylinders +  s(acceleration,4) + s(weight,4), data = Auto)
gam.fit = gam(mpg ~ cylinders, data = Auto)

Model Summary

summary(gam.fit)
## 
## Call: gam(formula = mpg ~ cylinders, data = Auto)
## Deviance Residuals:
##      Min       1Q   Median       3Q      Max 
## -14.2413  -3.1832  -0.6332   2.5491  17.9168 
## 
## (Dispersion Parameter for gaussian family taken to be 24.1434)
## 
##     Null Deviance: 23818.99 on 391 degrees of freedom
## Residual Deviance: 9415.91 on 390 degrees of freedom
## AIC: 2364.574 
## 
## Number of Local Scoring Iterations: 2 
## 
## Anova for Parametric Effects
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## cylinders   1 14403.1 14403.1  596.57 < 2.2e-16 ***
## Residuals 390  9415.9    24.1                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

References