ISLR Home

Q8

p54

8a

college = read.csv("College.csv")
# attach(College)

8b

fix function

{r, results=‘hide’}

Add row names

rownames(college)=college[,1] ### Set rownames equal to first column

Remove first column

Remove column 1 because we assigned to rownames.

# college[,-1]
college=college[,-1] # 

8c

summary(college)
##    Private               Apps           Accept          Enroll    
##  Length:777         Min.   :   81   Min.   :   72   Min.   :  35  
##  Class :character   1st Qu.:  776   1st Qu.:  604   1st Qu.: 242  
##  Mode  :character   Median : 1558   Median : 1110   Median : 434  
##                     Mean   : 3002   Mean   : 2019   Mean   : 780  
##                     3rd Qu.: 3624   3rd Qu.: 2424   3rd Qu.: 902  
##                     Max.   :48094   Max.   :26330   Max.   :6392  
##    Top10perc       Top25perc      F.Undergrad     P.Undergrad     
##  Min.   : 1.00   Min.   :  9.0   Min.   :  139   Min.   :    1.0  
##  1st Qu.:15.00   1st Qu.: 41.0   1st Qu.:  992   1st Qu.:   95.0  
##  Median :23.00   Median : 54.0   Median : 1707   Median :  353.0  
##  Mean   :27.56   Mean   : 55.8   Mean   : 3700   Mean   :  855.3  
##  3rd Qu.:35.00   3rd Qu.: 69.0   3rd Qu.: 4005   3rd Qu.:  967.0  
##  Max.   :96.00   Max.   :100.0   Max.   :31643   Max.   :21836.0  
##     Outstate       Room.Board       Books           Personal   
##  Min.   : 2340   Min.   :1780   Min.   :  96.0   Min.   : 250  
##  1st Qu.: 7320   1st Qu.:3597   1st Qu.: 470.0   1st Qu.: 850  
##  Median : 9990   Median :4200   Median : 500.0   Median :1200  
##  Mean   :10441   Mean   :4358   Mean   : 549.4   Mean   :1341  
##  3rd Qu.:12925   3rd Qu.:5050   3rd Qu.: 600.0   3rd Qu.:1700  
##  Max.   :21700   Max.   :8124   Max.   :2340.0   Max.   :6800  
##       PhD            Terminal       S.F.Ratio      perc.alumni   
##  Min.   :  8.00   Min.   : 24.0   Min.   : 2.50   Min.   : 0.00  
##  1st Qu.: 62.00   1st Qu.: 71.0   1st Qu.:11.50   1st Qu.:13.00  
##  Median : 75.00   Median : 82.0   Median :13.60   Median :21.00  
##  Mean   : 72.66   Mean   : 79.7   Mean   :14.09   Mean   :22.74  
##  3rd Qu.: 85.00   3rd Qu.: 92.0   3rd Qu.:16.50   3rd Qu.:31.00  
##  Max.   :103.00   Max.   :100.0   Max.   :39.80   Max.   :64.00  
##      Expend        Grad.Rate     
##  Min.   : 3186   Min.   : 10.00  
##  1st Qu.: 6751   1st Qu.: 53.00  
##  Median : 8377   Median : 65.00  
##  Mean   : 9660   Mean   : 65.46  
##  3rd Qu.:10830   3rd Qu.: 78.00  
##  Max.   :56233   Max.   :118.00

Correlation Pairs

library(ISLR)
pairs(College[,1:10]) # [,1:10] college dataframe didn't work

plot(College$Outstate, College$Private)

#rm(college)
detach("package:ISLR", unload=TRUE)

attach(college)
plot(Private, Outstate, xlim=c(0,100), ylim=c(0,100))
## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion

8c iv

Elite=rep("No", nrow(college))
Elite[college$Top10perc > 50] = "Yes"
Elite = as.factor(Elite)
summary(Elite)
##  No Yes 
## 699  78
plot(college$Outstate, Elite)

plot(Elite, college$Outstate)

par(mfrow=c(2,2)) # 4 plots per picture
attach(college)
## The following objects are masked from college (pos = 3):
## 
##     Accept, Apps, Books, Enroll, Expend, F.Undergrad, Grad.Rate,
##     Outstate, P.Undergrad, perc.alumni, Personal, PhD, Private,
##     Room.Board, S.F.Ratio, Terminal, Top10perc, Top25perc
hist(PhD, breaks = 10, col = 3)
hist(F.Undergrad, breaks = 10)
hist(P.Undergrad, col=2, breaks = 15)
hist(Outstate, col=2, breaks = 15)

pairs(~ PhD + F.Undergrad + P.Undergrad , college)

  1. Continue exploring…

Q9 Auto

  1. Which predictors are quantitative, and which are qualitative.

  2. range of each quantitative predictor

# attach(Auto)
library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.3
range(Auto$mpg)
## [1]  9.0 46.6
range(Auto$cylinders)
## [1] 3 8
range(Auto$displacement)
## [1]  68 455
range(Auto$weight)
## [1] 1613 5140
range(Auto$horsepower)
## [1]  46 230
range(Auto$acceleration)
## [1]  8.0 24.8
range(Auto$year)
## [1] 70 82
# range(order)

9c Mean standard deviation for each quantitative predictor

sd(Auto$mpg)
## [1] 7.805007
sd(Auto$cylinders)
## [1] 1.705783
sd(Auto$displacement)
## [1] 104.644
sd(Auto$weight)
## [1] 849.4026
sd(Auto$horsepower)
## [1] 38.49116
sd(Auto$acceleration)
## [1] 2.758864
sd(Auto$year)
## [1] 3.683737
#View(Auto)

9d

auto = Auto
tenth = auto[10,]

auto = auto[-c(10:85),]
attach(auto)
sd(mpg)
## [1] 7.867283
sd(cylinders)
## [1] 1.654179
sd(displacement)
## [1] 99.67837
sd(weight)
## [1] 811.3002
sd(horsepower)
## [1] 35.70885
sd(acceleration)
## [1] 2.693721
sd(year)
## [1] 3.106217

Q10

library(MASS)
head(Boston, n=5)
##      crim zn indus chas   nox    rm  age    dis rad tax ptratio  black lstat
## 1 0.00632 18  2.31    0 0.538 6.575 65.2 4.0900   1 296    15.3 396.90  4.98
## 2 0.02731  0  7.07    0 0.469 6.421 78.9 4.9671   2 242    17.8 396.90  9.14
## 3 0.02729  0  7.07    0 0.469 7.185 61.1 4.9671   2 242    17.8 392.83  4.03
## 4 0.03237  0  2.18    0 0.458 6.998 45.8 6.0622   3 222    18.7 394.63  2.94
## 5 0.06905  0  2.18    0 0.458 7.147 54.2 6.0622   3 222    18.7 396.90  5.33
##   medv
## 1 24.0
## 2 21.6
## 3 34.7
## 4 33.4
## 5 36.2

Number of rows and columns

How many rows are in this data set? How many columns? What do the rows and columns represent?

dim(Boston)
## [1] 506  14

10b

Make some pairwise scatterplots of the predictors (columns) in this data set. Describe your findings.

pairs(Boston)

10c

Are any of the predictors associated with per capita crime rate? If so, explain the relationship.

pairs(~ crim + age, Boston)

#help(Boston)
pairs(~ crim + age + zn, Boston)

10c Answer

Age of the housing, Most likely the older the neighborhood, the more experience/ lower income it has with crime

Radial Highways, either criminals has quick getaway or it is so far away from highway that it is not gentrified

10d

Do any of the suburbs of Boston appear to have particularly high crime rates? Tax rates? Pupil-teacher ratios? Comment on the range of each predictor.

dim(Boston[Boston$crim >= 20,])
## [1] 18 14
dim(Boston[Boston$tax >= 666,])
## [1] 137  14
dim(Boston[Boston$ptratio >= 20,])
## [1] 201  14

10e

How many of the suburbs in this data set bound the Charles river?

dim(Boston[Boston$chas==1,])[1]
## [1] 35

10f

What is the median pupil-teacher ratio among the towns in this data set?

median(Boston$ptratio)
## [1] 19.05

10g

Which suburb of Boston has lowest median value of owner- occupied homes? What are the values of the other predictors for that suburb, and how do those values compare to the overall ranges for those predictors? Comment on your findings.

Boston[Boston$medv == min(Boston$medv),]
##        crim zn indus chas   nox    rm age    dis rad tax ptratio  black lstat
## 399 38.3518  0  18.1    0 0.693 5.453 100 1.4896  24 666    20.2 396.90 30.59
## 406 67.9208  0  18.1    0 0.693 5.683 100 1.4254  24 666    20.2 384.97 22.98
##     medv
## 399    5
## 406    5

10h

In this data set, how many of the suburbs average more than seven rooms per dwelling? More than eight rooms per dwelling? Comment on the suburbs that average more than eight rooms per dwelling.

dim(Boston[Boston$rm > 7,])[1]
## [1] 64
dim(Boston[Boston$rm > 8,])[1]
## [1] 13
pairs(Boston[Boston$rm > 8,])