mtcars
datalibrary(infer)
library(dplyr)
mtcars <- as.data.frame(mtcars) %>%
mutate(cyl = factor(cyl),
vs = factor(vs),
am = factor(am),
gear = factor(gear),
carb = factor(carb))
# For reproducibility
set.seed(2018)
One numerical variable (mean)
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", mu = 25) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "mean")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 26.6
## 2 2 25.1
## 3 3 25.2
## 4 4 24.7
## 5 5 24.6
## 6 6 25.8
## 7 7 24.7
## 8 8 25.6
## 9 9 25.0
## 10 10 25.1
## # ... with 90 more rows
One numerical variable (median)
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", med = 26) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "median")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 28.2
## 2 2 27.2
## 3 3 26.2
## 4 4 26.0
## 5 5 26.5
## 6 6 24.5
## 7 7 26.0
## 8 8 28.2
## 9 9 28.2
## 10 10 23.2
## # ... with 90 more rows
One numerical variable (standard deviation)
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", sigma = 5) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "sd")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 5.88
## 2 2 6.27
## 3 3 5.08
## 4 4 6.91
## 5 5 5.96
## 6 6 6.01
## 7 7 6.75
## 8 8 5.14
## 9 9 6.06
## 10 10 5.63
## # ... with 90 more rows
One categorical (2 level) variable
mtcars %>%
specify(response = am, success = "1") %>% # formula alt: am ~ NULL
hypothesize(null = "point", p = .25) %>%
generate(reps = 100, type = "simulate") %>%
calculate(stat = "prop")
## # A tibble: 100 x 2
## replicate stat
## <fct> <dbl>
## 1 1 0.281
## 2 2 0.281
## 3 3 0.281
## 4 4 0.219
## 5 5 0.250
## 6 6 0.250
## 7 7 0.281
## 8 8 0.250
## 9 9 0.188
## 10 10 0.344
## # ... with 90 more rows
Two categorical (2 level) variables
mtcars %>%
specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in props", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.294
## 2 2 -0.167
## 3 3 -0.0397
## 4 4 0.214
## 5 5 -0.167
## 6 6 -0.167
## 7 7 0.0873
## 8 8 -0.0397
## 9 9 -0.0397
## 10 10 0.341
## # ... with 90 more rows
One categorical (>2 level) - GoF
mtcars %>%
specify(cyl ~ NULL) %>% # alt: response = cyl
hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>%
generate(reps = 100, type = "simulate") %>%
calculate(stat = "Chisq")
## # A tibble: 100 x 2
## replicate stat
## <fct> <dbl>
## 1 1 3.00
## 2 2 2.25
## 3 3 3.00
## 4 4 1.69
## 5 5 6.75
## 6 6 0.188
## 7 7 2.25
## 8 8 1.19
## 9 9 0.688
## 10 10 3.19
## # ... with 90 more rows
Two categorical (>2 level) variables
mtcars %>%
specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "Chisq")
## # A tibble: 100 x 2
## replicate stat
## <fct> <dbl>
## 1 1 3.68
## 2 2 0.557
## 3 3 5.00
## 4 4 4.57
## 5 5 6.48
## 6 6 0.126
## 7 7 3.71
## 8 8 6.91
## 9 9 1.45
## 10 10 1.01
## # ... with 90 more rows
One numerical variable one categorical (2 levels) (diff in means)
mtcars %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in means", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 0.360
## 2 2 0.0490
## 3 3 3.38
## 4 4 1.89
## 5 5 0.891
## 6 6 0.904
## 7 7 -0.327
## 8 8 -3.44
## 9 9 0.502
## 10 10 -1.92
## # ... with 90 more rows
One numerical variable one categorical (2 levels) (diff in medians)
mtcars %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in medians", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 0.500
## 2 2 -2.70
## 3 3 2.40
## 4 4 0
## 5 5 -5.00
## 6 6 2.40
## 7 7 0
## 8 8 2.90
## 9 9 -0.500
## 10 10 0.500
## # ... with 90 more rows
One numerical one categorical (>2 levels) - ANOVA
mtcars %>%
specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "F")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 2.53
## 2 2 0.104
## 3 3 0.238
## 4 4 1.12
## 5 5 0.660
## 6 6 0.173
## 7 7 0.799
## 8 8 0.800
## 9 9 0.255
## 10 10 0.0198
## # ... with 90 more rows
Two numerical vars - SLR
mtcars %>%
specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "slope")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 0.0310
## 2 2 0.00706
## 3 3 -0.0231
## 4 4 -0.0285
## 5 5 -0.0124
## 6 6 -0.00164
## 7 7 -0.00587
## 8 8 -0.00369
## 9 9 0.00522
## 10 10 -0.00866
## # ... with 90 more rows
One numerical (one mean)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "mean")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 19.3
## 2 2 20.8
## 3 3 22.5
## 4 4 19.3
## 5 5 21.3
## 6 6 20.1
## 7 7 20.7
## 8 8 20.4
## 9 9 19.8
## 10 10 19.8
## # ... with 90 more rows
One numerical (one median)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "median")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 19.2
## 2 2 19.2
## 3 3 19.0
## 4 4 18.0
## 5 5 21.2
## 6 6 21.0
## 7 7 17.0
## 8 8 19.4
## 9 9 19.0
## 10 10 21.2
## # ... with 90 more rows
One numerical (standard deviation)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "sd")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 5.27
## 2 2 6.29
## 3 3 5.32
## 4 4 4.95
## 5 5 5.57
## 6 6 6.92
## 7 7 6.45
## 8 8 4.85
## 9 9 6.16
## 10 10 6.49
## # ... with 90 more rows
One categorical (one proportion)
mtcars %>%
specify(response = am, success = "1") %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "prop")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 0.344
## 2 2 0.312
## 3 3 0.344
## 4 4 0.438
## 5 5 0.406
## 6 6 0.438
## 7 7 0.469
## 8 8 0.375
## 9 9 0.344
## 10 10 0.469
## # ... with 90 more rows
One numerical variable one categorical (2 levels) (diff in means)
mtcars %>%
specify(mpg ~ am) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "diff in means", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -6.90
## 2 2 -5.02
## 3 3 -7.51
## 4 4 -5.71
## 5 5 -8.25
## 6 6 -7.23
## 7 7 -8.45
## 8 8 -3.71
## 9 9 -6.54
## 10 10 -9.37
## # ... with 90 more rows
Two categorical variables (diff in proportions)
mtcars %>%
specify(am ~ vs, success = "1") %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "diff in props", order = c("0", "1"))
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.263
## 2 2 0.0167
## 3 3 0.108
## 4 4 -0.188
## 5 5 -0.408
## 6 6 0
## 7 7 -0.564
## 8 8 -0.563
## 9 9 -0.0338
## 10 10 0.133
## # ... with 90 more rows
Two numerical vars - SLR
mtcars %>%
specify(mpg ~ hp) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "slope")
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.0951
## 2 2 -0.0951
## 3 3 -0.103
## 4 4 -0.0553
## 5 5 -0.104
## 6 6 -0.0677
## 7 7 -0.0588
## 8 8 -0.0650
## 9 9 -0.0987
## 10 10 -0.0657
## # ... with 90 more rows