p42

Basic Commands

#getwd()
#setwd("../chapter02/")
# http://faculty.marshall.usc.edu/gareth-james/ISL/bios.html

x <- c(1,3,2,5) # Vector
x
## [1] 1 3 2 5
x = c(1,6,2)
y = c(1,4,3)
length(x)
## [1] 3
length(y)
## [1] 3

List and Remove objects

ls() # List all objects
## [1] "x" "y"
rm(list = ls()) # Remove all objects

Matrix

#?matrix
x = matrix(data=c(1,2,3,4), nrow = 2, ncol = 2)
x
##      [,1] [,2]
## [1,]    1    3
## [2,]    2    4
x = matrix(c(1,2,3,4), 2, 2)
x
##      [,1] [,2]
## [1,]    1    3
## [2,]    2    4
matrix(c(1,2,3,4), 2, 2, byrow = TRUE)
##      [,1] [,2]
## [1,]    1    2
## [2,]    3    4

Square Root

sqrt(x)
##          [,1]     [,2]
## [1,] 1.000000 1.732051
## [2,] 1.414214 2.000000

Correlation

x = rnorm(50)
y = x + rnorm(50, mean = 50, sd = .1)
y
##  [1] 50.61508 49.78288 49.98360 52.24826 50.72951 48.92897 49.82495 50.03336
##  [9] 49.90008 51.32371 50.38523 47.90603 50.79670 48.46852 50.07275 50.36893
## [17] 50.89561 49.58554 50.64008 50.51077 51.49055 50.69971 50.47124 52.45478
## [25] 49.99722 50.48574 49.22556 50.69357 49.63148 51.42444 50.24742 50.47353
## [33] 49.77522 49.64214 49.09333 50.51206 49.58210 50.76035 50.76565 50.50143
## [41] 50.16230 50.69265 48.19904 52.20604 51.39416 52.11487 49.33432 49.65721
## [49] 49.06231 47.82384
cor(x,y)
## [1] 0.9951339
set.seed(1303) # Produce same random results by initializing the seed.
rnorm(50) # First 50
##  [1] -1.1439763145  1.3421293656  2.1853904757  0.5363925179  0.0631929665
##  [6]  0.5022344825 -0.0004167247  0.5658198405 -0.5725226890 -1.1102250073
## [11] -0.0486871234 -0.6956562176  0.8289174803  0.2066528551 -0.2356745091
## [16] -0.5563104914 -0.3647543571  0.8623550343 -0.6307715354  0.3136021252
## [21] -0.9314953177  0.8238676185  0.5233707021  0.7069214120  0.4202043256
## [26] -0.2690521547 -1.5103172999 -0.6902124766 -0.1434719524 -1.0135274099
## [31]  1.5732737361  0.0127465055  0.8726470499  0.4220661905 -0.0188157917
## [36]  2.6157489689 -0.6931401748 -0.2663217810 -0.7206364412  1.3677342065
## [41]  0.2640073322  0.6321868074 -1.3306509858  0.0268888182  1.0406363208
## [46]  1.3120237985 -0.0300020767 -0.2500257125  0.0234144857  1.6598706557
rnorm(50) # Next 50
##  [1]  1.00185188  0.26300143 -0.02835910 -0.55625904 -0.11956114 -1.03629594
##  [7] -0.65663801  0.53071490  0.11239650 -2.07756129  0.42047788  0.34127692
## [13] -1.11146959  0.84377453 -0.85525778  2.24788116 -1.37211474  0.93599500
## [19]  0.54973757  0.51758744 -0.56054669 -0.63876770 -0.06500831  0.37530956
## [25]  1.30692614 -0.61058086  0.32282993  1.75126495  1.55928971  0.64713105
## [31] -1.93202340 -0.96938200  1.00148882  0.15220012 -0.04515586 -0.50296757
## [37] -0.25911284  1.01738122 -1.72582568  0.93284077  0.02782077  1.58725296
## [43]  0.23574669 -0.21068373 -0.16983068  0.76280099  0.43017948  1.37181976
## [49]  1.57143594  0.13737399
set.seed(3) # Matches book
y=rnorm(100)
mean(y)
## [1] 0.01103557
var(y)
## [1] 0.7328675
sqrt(var(y)) # sd()
## [1] 0.8560768
sd(y)
## [1] 0.8560768

Graphics

x=rnorm(100)
y=rnorm(100)

plot(x,y, xlab = "x-axis", ylab = "y-axis", main = "Title")

Save chart as PDF

pdf("Figure.pdf") # Filename
plot(x,y,col="green")
dev.off()
## quartz_off_screen 
##                 2

Sequences

x = seq(1,10)
x
##  [1]  1  2  3  4  5  6  7  8  9 10
x = 1:10
x
##  [1]  1  2  3  4  5  6  7  8  9 10
x = seq(-pi,pi, length = 50)
x
##  [1] -3.14159265 -3.01336438 -2.88513611 -2.75690784 -2.62867957 -2.50045130
##  [7] -2.37222302 -2.24399475 -2.11576648 -1.98753821 -1.85930994 -1.73108167
## [13] -1.60285339 -1.47462512 -1.34639685 -1.21816858 -1.08994031 -0.96171204
## [19] -0.83348377 -0.70525549 -0.57702722 -0.44879895 -0.32057068 -0.19234241
## [25] -0.06411414  0.06411414  0.19234241  0.32057068  0.44879895  0.57702722
## [31]  0.70525549  0.83348377  0.96171204  1.08994031  1.21816858  1.34639685
## [37]  1.47462512  1.60285339  1.73108167  1.85930994  1.98753821  2.11576648
## [43]  2.24399475  2.37222302  2.50045130  2.62867957  2.75690784  2.88513611
## [49]  3.01336438  3.14159265

Contour Plot

y=x
f=outer(x,y,function(x,y) cos(y) / (1+x^2))
contour(x,y,f)
contour(x,y,f,nlevels = 45, add = T)

fa = (f-t(f))/2
contour(x,y,fa,nlevels = 15)

Image() - heatmap

image(x,y,fa)

Perspective Plots

persp(x,y,fa)

persp(x,y,fa,theta = 30)

persp(x,y,fa,theta = 30, phi = 20)

persp(x,y,fa,theta = 30, phi = 70)

persp(x,y,fa,theta = 30, phi = 40)

Indexing Data

p47

A = matrix(1:16,4,4)
A
##      [,1] [,2] [,3] [,4]
## [1,]    1    5    9   13
## [2,]    2    6   10   14
## [3,]    3    7   11   15
## [4,]    4    8   12   16
A[2,3]
## [1] 10
A[c(1,3),c(2,4)] # rows 1 and 3 intersect with columns 2 and 4
##      [,1] [,2]
## [1,]    5   13
## [2,]    7   15
A[c(1:3),c(2:4)] #
##      [,1] [,2] [,3]
## [1,]    5    9   13
## [2,]    6   10   14
## [3,]    7   11   15
A[1:2,]
##      [,1] [,2] [,3] [,4]
## [1,]    1    5    9   13
## [2,]    2    6   10   14
A[1,]
## [1]  1  5  9 13

Negative sign excludes

A[-c(1,3),] # Exclude rows 1,3
##      [,1] [,2] [,3] [,4]
## [1,]    2    6   10   14
## [2,]    4    8   12   16
A[-c(1,3),-c(1,3,4)] # Exclude rows 1,3 and columns 1,3,4
## [1] 6 8
dim(A)
## [1] 4 4

Loading Data

# Loads it incorrectly
Auto = read.table("../input/Auto.data", stringsAsFactors = TRUE)
#  fix(Auto) # X not working on Mac

# Loads it correctly
Auto = read.table("../input/Auto.data", header = T, na.strings = "?")

Load csv

Auto = read.csv("../input/Auto.csv", header = T, na.strings = "?", stringsAsFactors = TRUE)
dim(Auto)
## [1] 397   9
Auto[1:4,] # First four rows
##   mpg cylinders displacement horsepower weight acceleration year origin
## 1  18         8          307        130   3504         12.0   70      1
## 2  15         8          350        165   3693         11.5   70      1
## 3  18         8          318        150   3436         11.0   70      1
## 4  16         8          304        150   3433         12.0   70      1
##                        name
## 1 chevrolet chevelle malibu
## 2         buick skylark 320
## 3        plymouth satellite
## 4             amc rebel sst
names(Auto) # column names
## [1] "mpg"          "cylinders"    "displacement" "horsepower"   "weight"      
## [6] "acceleration" "year"         "origin"       "name"

Additional Graphical and Numerical Summaries

library(ISLR)
## Warning: package 'ISLR' was built under R version 4.0.3
## 
## Attaching package: 'ISLR'
## The following object is masked _by_ '.GlobalEnv':
## 
##     Auto
plot(Auto$cylinders, Auto$mpg, xlim=c(0,10), ylim=c(0,100))

attach(Auto) # Now Auto dataframe is implied
## The following object is masked from package:ggplot2:
## 
##     mpg
plot(Auto$cylinders, Auto$mpg, xlim=c(0,10), ylim=c(0,100))

Boxplot

cylinders = as.factor(Auto$cylinders)

# We now get a boxplot because x-axis is categorical
plot(cylinders, mpg)

plot(cylinders, mpg, col="red")

plot(cylinders, mpg, col="red", varwidth=T)

plot(cylinders, mpg, col="red", varwidth=T, horizontal=T) # Flip x and y axis

plot(cylinders, mpg, col="red", varwidth=T, xlab="cylinders", ylab="MPG") 

mpg
##   [1] 18.0 15.0 18.0 16.0 17.0 15.0 14.0 14.0 14.0 15.0 15.0 14.0 15.0 14.0 24.0
##  [16] 22.0 18.0 21.0 27.0 26.0 25.0 24.0 25.0 26.0 21.0 10.0 10.0 11.0  9.0 27.0
##  [31] 28.0 25.0 25.0 19.0 16.0 17.0 19.0 18.0 14.0 14.0 14.0 14.0 12.0 13.0 13.0
##  [46] 18.0 22.0 19.0 18.0 23.0 28.0 30.0 30.0 31.0 35.0 27.0 26.0 24.0 25.0 23.0
##  [61] 20.0 21.0 13.0 14.0 15.0 14.0 17.0 11.0 13.0 12.0 13.0 19.0 15.0 13.0 13.0
##  [76] 14.0 18.0 22.0 21.0 26.0 22.0 28.0 23.0 28.0 27.0 13.0 14.0 13.0 14.0 15.0
##  [91] 12.0 13.0 13.0 14.0 13.0 12.0 13.0 18.0 16.0 18.0 18.0 23.0 26.0 11.0 12.0
## [106] 13.0 12.0 18.0 20.0 21.0 22.0 18.0 19.0 21.0 26.0 15.0 16.0 29.0 24.0 20.0
## [121] 19.0 15.0 24.0 20.0 11.0 20.0 21.0 19.0 15.0 31.0 26.0 32.0 25.0 16.0 16.0
## [136] 18.0 16.0 13.0 14.0 14.0 14.0 29.0 26.0 26.0 31.0 32.0 28.0 24.0 26.0 24.0
## [151] 26.0 31.0 19.0 18.0 15.0 15.0 16.0 15.0 16.0 14.0 17.0 16.0 15.0 18.0 21.0
## [166] 20.0 13.0 29.0 23.0 20.0 23.0 24.0 25.0 24.0 18.0 29.0 19.0 23.0 23.0 22.0
## [181] 25.0 33.0 28.0 25.0 25.0 26.0 27.0 17.5 16.0 15.5 14.5 22.0 22.0 24.0 22.5
## [196] 29.0 24.5 29.0 33.0 20.0 18.0 18.5 17.5 29.5 32.0 28.0 26.5 20.0 13.0 19.0
## [211] 19.0 16.5 16.5 13.0 13.0 13.0 31.5 30.0 36.0 25.5 33.5 17.5 17.0 15.5 15.0
## [226] 17.5 20.5 19.0 18.5 16.0 15.5 15.5 16.0 29.0 24.5 26.0 25.5 30.5 33.5 30.0
## [241] 30.5 22.0 21.5 21.5 43.1 36.1 32.8 39.4 36.1 19.9 19.4 20.2 19.2 20.5 20.2
## [256] 25.1 20.5 19.4 20.6 20.8 18.6 18.1 19.2 17.7 18.1 17.5 30.0 27.5 27.2 30.9
## [271] 21.1 23.2 23.8 23.9 20.3 17.0 21.6 16.2 31.5 29.5 21.5 19.8 22.3 20.2 20.6
## [286] 17.0 17.6 16.5 18.2 16.9 15.5 19.2 18.5 31.9 34.1 35.7 27.4 25.4 23.0 27.2
## [301] 23.9 34.2 34.5 31.8 37.3 28.4 28.8 26.8 33.5 41.5 38.1 32.1 37.2 28.0 26.4
## [316] 24.3 19.1 34.3 29.8 31.3 37.0 32.2 46.6 27.9 40.8 44.3 43.4 36.4 30.0 44.6
## [331] 40.9 33.8 29.8 32.7 23.7 35.0 23.6 32.4 27.2 26.6 25.8 23.5 30.0 39.1 39.0
## [346] 35.1 32.3 37.0 37.7 34.1 34.7 34.4 29.9 33.0 34.5 33.7 32.4 32.9 31.6 28.1
## [361] 30.7 25.4 24.2 22.4 26.6 20.2 17.6 28.0 27.0 34.0 31.0 29.0 27.0 24.0 36.0
## [376] 37.0 31.0 38.0 36.0 36.0 36.0 34.0 38.0 32.0 38.0 25.0 38.0 26.0 22.0 32.0
## [391] 36.0 27.0 27.0 44.0 32.0 28.0 31.0

histogram

hist(mpg)

hist(mpg, col=2) # 2 = red

hist(mpg, col=2, breaks = 15)

# pairs() scatterplot

pairs(Auto) # ???

pairs(~ mpg + displacement + horsepower + weight + acceleration, Auto)

Make graph interactive

{plot(horsepower, mpg)
identify(horsepower, mpg, name)
}

## integer(0)

Summarize Entire Table

summary(Auto) 
##       mpg          cylinders      displacement     horsepower        weight    
##  Min.   : 9.00   Min.   :3.000   Min.   : 68.0   Min.   : 46.0   Min.   :1613  
##  1st Qu.:17.50   1st Qu.:4.000   1st Qu.:104.0   1st Qu.: 75.0   1st Qu.:2223  
##  Median :23.00   Median :4.000   Median :146.0   Median : 93.5   Median :2800  
##  Mean   :23.52   Mean   :5.458   Mean   :193.5   Mean   :104.5   Mean   :2970  
##  3rd Qu.:29.00   3rd Qu.:8.000   3rd Qu.:262.0   3rd Qu.:126.0   3rd Qu.:3609  
##  Max.   :46.60   Max.   :8.000   Max.   :455.0   Max.   :230.0   Max.   :5140  
##                                                  NA's   :5                     
##   acceleration        year           origin                  name    
##  Min.   : 8.00   Min.   :70.00   Min.   :1.000   ford pinto    :  6  
##  1st Qu.:13.80   1st Qu.:73.00   1st Qu.:1.000   amc matador   :  5  
##  Median :15.50   Median :76.00   Median :1.000   ford maverick :  5  
##  Mean   :15.56   Mean   :75.99   Mean   :1.574   toyota corolla:  5  
##  3rd Qu.:17.10   3rd Qu.:79.00   3rd Qu.:2.000   amc gremlin   :  4  
##  Max.   :24.80   Max.   :82.00   Max.   :3.000   amc hornet    :  4  
##                                                  (Other)       :368

Summarize One value

summary(mpg) # One value
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    9.00   17.50   23.00   23.52   29.00   46.60