mtcars
datalibrary(infer)
library(dplyr)
mtcars <- mtcars %>%
mutate(cyl = factor(cyl),
vs = factor(vs),
am = factor(am),
gear = factor(gear),
carb = factor(carb))
# For reproducibility
set.seed(2018)
One numerical variable (mean)
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", mu = 25) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "mean")
## Response: mpg (numeric)
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 26.6
## 2 2 25.1
## 3 3 25.2
## 4 4 24.7
## 5 5 24.6
## 6 6 25.8
## 7 7 24.7
## 8 8 25.6
## 9 9 25.0
## 10 10 25.1
## # ... with 90 more rows
One numerical variable (median)
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", med = 26) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "median")
## Response: mpg (numeric)
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 28.2
## 2 2 27.2
## 3 3 26.2
## 4 4 26.0
## 5 5 26.5
## 6 6 24.5
## 7 7 26.0
## 8 8 28.2
## 9 9 28.2
## 10 10 23.2
## # ... with 90 more rows
One categorical (2 level) variable
mtcars %>%
specify(response = am, success = "1") %>% # formula alt: am ~ NULL
hypothesize(null = "point", p = .25) %>%
generate(reps = 100, type = "simulate") %>%
calculate(stat = "prop")
## Response: am (factor)
## Null Hypothesis: point
## # A tibble: 100 x 2
## replicate stat
## <fct> <dbl>
## 1 1 0.375
## 2 2 0.0625
## 3 3 0.125
## 4 4 0.250
## 5 5 0.188
## 6 6 0.406
## 7 7 0.219
## 8 8 0.375
## 9 9 0.344
## 10 10 0.188
## # ... with 90 more rows
Two categorical (2 level) variables
mtcars %>%
specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in props", order = c("0", "1"))
## Response: am (factor)
## Explanatory: vs (factor)
## Null Hypothesis: independence
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.421
## 2 2 -0.167
## 3 3 -0.421
## 4 4 -0.0397
## 5 5 0.0873
## 6 6 -0.0397
## 7 7 -0.0397
## 8 8 -0.0397
## 9 9 0.0873
## 10 10 -0.167
## # ... with 90 more rows
One categorical (>2 level) - GoF
mtcars %>%
specify(cyl ~ NULL) %>% # alt: response = cyl
hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>%
generate(reps = 100, type = "simulate") %>%
calculate(stat = "Chisq")
## Response: cyl (factor)
## Null Hypothesis: point
## # A tibble: 100 x 2
## replicate stat
## <fct> <dbl>
## 1 1 6.75
## 2 2 1.69
## 3 3 3.19
## 4 4 1.69
## 5 5 6.00
## 6 6 2.69
## 7 7 4.75
## 8 8 0.750
## 9 9 0.688
## 10 10 3.69
## # ... with 90 more rows
Two categorical (>2 level) variables
mtcars %>%
specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "Chisq")
## Response: cyl (factor)
## Explanatory: am (factor)
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 1.34
## 2 2 1.63
## 3 3 1.63
## 4 4 2.63
## 5 5 3.90
## 6 6 1.74
## 7 7 0.126
## 8 8 1.74
## 9 9 1.34
## 10 10 1.34
## # ... with 90 more rows
One numerical variable one categorical (2 levels) (diff in means)
mtcars %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in means", order = c("0", "1"))
## Response: mpg (numeric)
## Explanatory: am (factor)
## Null Hypothesis: independence
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -1.10
## 2 2 0.217
## 3 3 -1.08
## 4 4 -3.80
## 5 5 3.08
## 6 6 0.489
## 7 7 2.34
## 8 8 4.10
## 9 9 -1.86
## 10 10 -0.210
## # ... with 90 more rows
One numerical variable one categorical (2 levels) (diff in medians)
mtcars %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in medians", order = c("0", "1"))
## Response: mpg (numeric)
## Explanatory: am (factor)
## Null Hypothesis: independence
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 0.500
## 2 2 -1.10
## 3 3 5.20
## 4 4 1.80
## 5 5 0.500
## 6 6 3.30
## 7 7 -1.60
## 8 8 -2.30
## 9 9 2.90
## 10 10 -0.500
## # ... with 90 more rows
One numerical one categorical (>2 levels) - ANOVA
mtcars %>%
specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "F")
## Response: mpg (numeric)
## Explanatory: cyl (factor)
## Null Hypothesis: independence
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 1.43
## 2 2 1.65
## 3 3 0.318
## 4 4 0.393
## 5 5 1.05
## 6 6 0.826
## 7 7 1.32
## 8 8 0.833
## 9 9 0.144
## 10 10 0.365
## # ... with 90 more rows
Two numerical vars - SLR
mtcars %>%
specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "slope")
## Response: mpg (numeric)
## Explanatory: hp (numeric)
## Null Hypothesis: independence
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.0151
## 2 2 0.00224
## 3 3 -0.0120
## 4 4 0.00292
## 5 5 0.0203
## 6 6 -0.00730
## 7 7 -0.0246
## 8 8 0.00555
## 9 9 0.0109
## 10 10 0.0176
## # ... with 90 more rows
One numerical variable (standard deviation)
Not currently implemented
mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", sigma = 5) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "sd")
One numerical (one mean)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "mean")
## Response: mpg (numeric)
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 19.6
## 2 2 21.8
## 3 3 18.7
## 4 4 19.2
## 5 5 21.6
## 6 6 19.9
## 7 7 20.7
## 8 8 19.3
## 9 9 21.2
## 10 10 21.3
## # ... with 90 more rows
One numerical (one median)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "median")
## Response: mpg (numeric)
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 19.2
## 2 2 20.1
## 3 3 21.0
## 4 4 17.8
## 5 5 20.1
## 6 6 19.2
## 7 7 18.4
## 8 8 19.2
## 9 9 19.2
## 10 10 18.0
## # ... with 90 more rows
One numerical (standard deviation)
mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "sd")
## Response: mpg (numeric)
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 5.28
## 2 2 6.74
## 3 3 5.29
## 4 4 5.41
## 5 5 5.56
## 6 6 5.65
## 7 7 6.17
## 8 8 6.40
## 9 9 6.31
## 10 10 6.11
## # ... with 90 more rows
One categorical (one proportion)
mtcars %>%
specify(response = am, success = "1") %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "prop")
## Response: am (factor)
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 0.375
## 2 2 0.406
## 3 3 0.406
## 4 4 0.312
## 5 5 0.312
## 6 6 0.469
## 7 7 0.438
## 8 8 0.281
## 9 9 0.438
## 10 10 0.500
## # ... with 90 more rows
One numerical variable one categorical (2 levels) (diff in means)
mtcars %>%
specify(mpg ~ am) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "diff in means", order = c("0", "1"))
## Response: mpg (numeric)
## Explanatory: am (factor)
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 - 9.38
## 2 2 - 5.11
## 3 3 - 4.88
## 4 4 - 5.39
## 5 5 - 9.19
## 6 6 - 7.20
## 7 7 - 5.34
## 8 8 - 3.20
## 9 9 - 5.95
## 10 10 -11.0
## # ... with 90 more rows
Two categorical variables (diff in proportions)
mtcars %>%
specify(am ~ vs, success = "1") %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "diff in props", order = c("0", "1"))
## Response: am (factor)
## Explanatory: vs (factor)
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.352
## 2 2 -0.150
## 3 3 -0.294
## 4 4 -0.254
## 5 5 -0.438
## 6 6 -0.126
## 7 7 -0.188
## 8 8 0.167
## 9 9 -0.143
## 10 10 -0.500
## # ... with 90 more rows
Two numerical vars - SLR
mtcars %>%
specify(mpg ~ hp) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "slope")
## Response: mpg (numeric)
## Explanatory: hp (numeric)
## # A tibble: 100 x 2
## replicate stat
## <int> <dbl>
## 1 1 -0.0850
## 2 2 -0.0512
## 3 3 -0.0736
## 4 4 -0.0569
## 5 5 -0.0930
## 6 6 -0.0659
## 7 7 -0.0710
## 8 8 -0.0767
## 9 9 -0.0556
## 10 10 -0.0627
## # ... with 90 more rows