mtcars
dataNote: The type
argument in generate()
is automatically filled based on the entries for specify()
and hypothesize()
. It can be removed throughout the examples that follow. It is left in to reiterate the type of generation process being performed.
library(infer)
library(dplyr)
mtcars <- mtcars %>%
mutate(cyl = factor(cyl),
vs = factor(vs),
am = factor(am),
gear = factor(gear),
carb = factor(carb))
# For reproducibility
set.seed(2018)
One numerical variable (mean)
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", mu = 25) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "mean")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 26.6
## 2 2 25.1
## 3 3 25.2
## 4 4 24.7
## 5 5 24.6
## 6 6 25.8
## 7 7 24.7
## 8 8 25.6
## 9 9 25.0
## 10 10 25.1
## # ... with 90 more rows
One numerical variable (median)
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", med = 26) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "median")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 28.2
## 2 2 27.2
## 3 3 26.2
## 4 4 26
## 5 5 26.5
## 6 6 24.5
## 7 7 26
## 8 8 28.2
## 9 9 28.2
## 10 10 23.2
## # ... with 90 more rows
One categorical (2 level) variable
mtcars %>%
specify(response = am, success = "1") %>% # formula alt: am ~ NULL
hypothesize(null = "point", p = .25) %>%
generate(reps = 100, type = "simulate") %>%
calculate(stat = "prop")
## # A tibble: 100 x 2
## replicate stat
## <fct> <dbl>
## 1 1 0.375
## 2 2 0.0625
## 3 3 0.125
## 4 4 0.25
## 5 5 0.188
## 6 6 0.406
## 7 7 0.219
## 8 8 0.375
## 9 9 0.344
## 10 10 0.188
## # ... with 90 more rows
Two categorical (2 level) variables
mtcars %>%
specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in props", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.421
## 2 2 -0.167
## 3 3 -0.421
## 4 4 -0.0397
## 5 5 0.0873
## 6 6 -0.0397
## 7 7 -0.0397
## 8 8 -0.0397
## 9 9 0.0873
## 10 10 -0.167
## # ... with 90 more rows
One categorical (>2 level) - GoF
mtcars %>%
specify(cyl ~ NULL) %>% # alt: response = cyl
hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>%
generate(reps = 100, type = "simulate") %>%
calculate(stat = "Chisq")
## # A tibble: 100 x 2
## replicate stat
## <fct> <dbl>
## 1 1 6.75
## 2 2 1.69
## 3 3 3.19
## 4 4 1.69
## 5 5 6
## 6 6 2.69
## 7 7 4.75
## 8 8 0.75
## 9 9 0.688
## 10 10 3.69
## # ... with 90 more rows
Two categorical (>2 level) variables
mtcars %>%
specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "Chisq")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 1.34
## 2 2 1.63
## 3 3 1.63
## 4 4 2.63
## 5 5 3.90
## 6 6 1.74
## 7 7 0.126
## 8 8 1.74
## 9 9 1.34
## 10 10 1.34
## # ... with 90 more rows
One numerical variable one categorical (2 levels) (diff in means)
mtcars %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in means", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -1.10
## 2 2 0.217
## 3 3 -1.08
## 4 4 -3.80
## 5 5 3.08
## 6 6 0.489
## 7 7 2.34
## 8 8 4.10
## 9 9 -1.86
## 10 10 -0.210
## # ... with 90 more rows
One numerical variable one categorical (2 levels) (diff in medians)
mtcars %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in medians", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 0.5
## 2 2 -1.10
## 3 3 5.20
## 4 4 1.8
## 5 5 0.5
## 6 6 3.3
## 7 7 -1.60
## 8 8 -2.3
## 9 9 2.90
## 10 10 -0.5
## # ... with 90 more rows
One numerical one categorical (>2 levels) - ANOVA
mtcars %>%
specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "F")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 1.43
## 2 2 1.65
## 3 3 0.318
## 4 4 0.393
## 5 5 1.05
## 6 6 0.826
## 7 7 1.32
## 8 8 0.833
## 9 9 0.144
## 10 10 0.365
## # ... with 90 more rows
Two numerical vars - SLR
mtcars %>%
specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "slope")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.0151
## 2 2 0.00224
## 3 3 -0.0120
## 4 4 0.00292
## 5 5 0.0203
## 6 6 -0.00730
## 7 7 -0.0246
## 8 8 0.00555
## 9 9 0.0109
## 10 10 0.0176
## # ... with 90 more rows
One numerical variable (standard deviation)
Not currently implemented
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", sigma = 5) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "sd")
One numerical (one mean)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "mean")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 19.6
## 2 2 21.8
## 3 3 18.7
## 4 4 19.2
## 5 5 21.6
## 6 6 19.9
## 7 7 20.7
## 8 8 19.3
## 9 9 21.2
## 10 10 21.3
## # ... with 90 more rows
One numerical (one median)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "median")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 19.2
## 2 2 20.1
## 3 3 21
## 4 4 17.8
## 5 5 20.1
## 6 6 19.2
## 7 7 18.4
## 8 8 19.2
## 9 9 19.2
## 10 10 18.0
## # ... with 90 more rows
One numerical (standard deviation)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "sd")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 5.28
## 2 2 6.74
## 3 3 5.29
## 4 4 5.41
## 5 5 5.56
## 6 6 5.65
## 7 7 6.17
## 8 8 6.40
## 9 9 6.31
## 10 10 6.11
## # ... with 90 more rows
One categorical (one proportion)
mtcars %>%
specify(response = am, success = "1") %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "prop")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 0.375
## 2 2 0.406
## 3 3 0.406
## 4 4 0.312
## 5 5 0.312
## 6 6 0.469
## 7 7 0.438
## 8 8 0.281
## 9 9 0.438
## 10 10 0.5
## # ... with 90 more rows
One numerical variable one categorical (2 levels) (diff in means)
mtcars %>%
specify(mpg ~ am) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "diff in means", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -9.38
## 2 2 -5.11
## 3 3 -4.88
## 4 4 -5.39
## 5 5 -9.19
## 6 6 -7.20
## 7 7 -5.34
## 8 8 -3.20
## 9 9 -5.95
## 10 10 -11.0
## # ... with 90 more rows
Two categorical variables (diff in proportions)
mtcars %>%
specify(am ~ vs, success = "1") %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "diff in props", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.352
## 2 2 -0.15
## 3 3 -0.294
## 4 4 -0.254
## 5 5 -0.438
## 6 6 -0.126
## 7 7 -0.188
## 8 8 0.167
## 9 9 -0.143
## 10 10 -0.5
## # ... with 90 more rows
Two numerical vars - SLR
mtcars %>%
specify(mpg ~ hp) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "slope")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.0850
## 2 2 -0.0512
## 3 3 -0.0736
## 4 4 -0.0569
## 5 5 -0.0930
## 6 6 -0.0659
## 7 7 -0.0710
## 8 8 -0.0767
## 9 9 -0.0556
## 10 10 -0.0627
## # ... with 90 more rows
Two numerical vars - correlation
mtcars %>%
specify(mpg ~ hp) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "correlation")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.821
## 2 2 -0.812
## 3 3 -0.802
## 4 4 -0.723
## 5 5 -0.885
## 6 6 -0.777
## 7 7 -0.752
## 8 8 -0.758
## 9 9 -0.826
## 10 10 -0.779
## # ... with 90 more rows