ISLR Home

p100

setwd("../chapter03/")
#getwd()
library(MASS)
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.3
# install.packages("ISLR")
# data(package = "ISLR") # View included datasets

Simple Linear Regression

names(Boston)
##  [1] "crim"    "zn"      "indus"   "chas"    "nox"     "rm"      "age"    
##  [8] "dis"     "rad"     "tax"     "ptratio" "black"   "lstat"   "medv"

\(mdev = \beta_0 + \beta_1 (lstat)\)

# mdev is the response, lstat is the predictor
lm.fit = lm(medv ~ lstat, data = Boston)


# Can attach a default dataframe then it's implied in many places
attach(Boston) 
lm.fit = lm(medv~lstat) # Prefer the first method with data = Boston

lm.fit # Show the coefficients
## 
## Call:
## lm(formula = medv ~ lstat)
## 
## Coefficients:
## (Intercept)        lstat  
##       34.55        -0.95

\[ \beta_0 = 34.55 \\ \beta_1 = -0.95 \]

summary(lm.fit)
## 
## Call:
## lm(formula = medv ~ lstat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -15.168  -3.990  -1.318   2.034  24.500 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 34.55384    0.56263   61.41   <2e-16 ***
## lstat       -0.95005    0.03873  -24.53   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.216 on 504 degrees of freedom
## Multiple R-squared:  0.5441, Adjusted R-squared:  0.5432 
## F-statistic: 601.6 on 1 and 504 DF,  p-value: < 2.2e-16
names(lm.fit)
##  [1] "coefficients"  "residuals"     "effects"       "rank"         
##  [5] "fitted.values" "assign"        "qr"            "df.residual"  
##  [9] "xlevels"       "call"          "terms"         "model"

Coefficients

coef(lm.fit)
## (Intercept)       lstat 
##  34.5538409  -0.9500494

Predict

# one line for each lstat, c(5,10,15)
predict(lm.fit, data.frame(lstat = c(5,10,15)), interval = "confidence") # Confidence Intervals
##        fit      lwr      upr
## 1 29.80359 29.00741 30.59978
## 2 25.05335 24.47413 25.63256
## 3 20.30310 19.73159 20.87461
predict(lm.fit, data.frame(lstat = c(5,10,15)), interval = "prediction") # Prediction Intervals
##        fit       lwr      upr
## 1 29.80359 17.565675 42.04151
## 2 25.05335 12.827626 37.27907
## 3 20.30310  8.077742 32.52846

Plot

plot(lstat, medv)
abline(lm.fit) # Add Least Squares Regression Line

abline(lm.fit, lwd=3) # Line width
abline(lm.fit, lwd=3, col="red") # color
abline(lm.fit, col="red")
abline(lm.fit, pch=20) # Plot character
abline(lm.fit, pch="+") # Plot character
abline(lm.fit, pch=1:20) # Plot character

Plot Analytics

  • Residuals vs Fitted
  • Q-Q Plot tells us if residuals are normal
  • Scale-Location
  • Residuals vs Leverage
par(mfrow = c(2,2)) # 4 plots in same picture
plot(lm.fit) # 4 plots