# Examples using mtcars data

## Data preparation

library(infer)
library(dplyr)
mtcars <- mtcars %>%
mutate(cyl = factor(cyl),
vs = factor(vs),
am = factor(am),
gear = factor(gear),
carb = factor(carb))
# For reproducibility
set.seed(2018)

One numerical variable (mean)

mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", mu = 25) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "mean")
## Response: mpg (numeric)
## # A tibble: 100 x 2
##    replicate  stat
##        <int> <dbl>
##  1         1  26.6
##  2         2  25.1
##  3         3  25.2
##  4         4  24.7
##  5         5  24.6
##  6         6  25.8
##  7         7  24.7
##  8         8  25.6
##  9         9  25.0
## 10        10  25.1
## # ... with 90 more rows

One numerical variable (median)

mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", med = 26) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "median")
## Response: mpg (numeric)
## # A tibble: 100 x 2
##    replicate  stat
##        <int> <dbl>
##  1         1  28.2
##  2         2  27.2
##  3         3  26.2
##  4         4  26.0
##  5         5  26.5
##  6         6  24.5
##  7         7  26.0
##  8         8  28.2
##  9         9  28.2
## 10        10  23.2
## # ... with 90 more rows

One categorical (2 level) variable

mtcars %>%
specify(response = am, success = "1") %>% # formula alt: am ~ NULL
hypothesize(null = "point", p = .25) %>%
generate(reps = 100, type = "simulate") %>%
calculate(stat = "prop")
## Response: am (factor)
## Null Hypothesis:  point
## # A tibble: 100 x 2
##    replicate   stat
##    <fct>      <dbl>
##  1 1         0.375
##  2 2         0.0625
##  3 3         0.125
##  4 4         0.250
##  5 5         0.188
##  6 6         0.406
##  7 7         0.219
##  8 8         0.375
##  9 9         0.344
## 10 10        0.188
## # ... with 90 more rows

Two categorical (2 level) variables

mtcars %>%
specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in props", order = c("0", "1"))
## Response: am (factor)
## Explanatory: vs (factor)
## Null Hypothesis:  independence
## # A tibble: 100 x 2
##    replicate    stat
##        <int>   <dbl>
##  1         1 -0.421
##  2         2 -0.167
##  3         3 -0.421
##  4         4 -0.0397
##  5         5  0.0873
##  6         6 -0.0397
##  7         7 -0.0397
##  8         8 -0.0397
##  9         9  0.0873
## 10        10 -0.167
## # ... with 90 more rows

One categorical (>2 level) - GoF

mtcars %>%
specify(cyl ~ NULL) %>% # alt: response = cyl
hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>%
generate(reps = 100, type = "simulate") %>%
calculate(stat = "Chisq")
## Response: cyl (factor)
## Null Hypothesis:  point
## # A tibble: 100 x 2
##    replicate  stat
##    <fct>     <dbl>
##  1 1         6.75
##  2 2         1.69
##  3 3         3.19
##  4 4         1.69
##  5 5         6.00
##  6 6         2.69
##  7 7         4.75
##  8 8         0.750
##  9 9         0.688
## 10 10        3.69
## # ... with 90 more rows

Two categorical (>2 level) variables

mtcars %>%
specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "Chisq")
## Response: cyl (factor)
## Explanatory: am (factor)
## # A tibble: 100 x 2
##    replicate  stat
##        <int> <dbl>
##  1         1 1.34
##  2         2 1.63
##  3         3 1.63
##  4         4 2.63
##  5         5 3.90
##  6         6 1.74
##  7         7 0.126
##  8         8 1.74
##  9         9 1.34
## 10        10 1.34
## # ... with 90 more rows

One numerical variable one categorical (2 levels) (diff in means)

mtcars %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in means", order = c("0", "1"))
## Response: mpg (numeric)
## Explanatory: am (factor)
## Null Hypothesis:  independence
## # A tibble: 100 x 2
##    replicate   stat
##        <int>  <dbl>
##  1         1 -1.10
##  2         2  0.217
##  3         3 -1.08
##  4         4 -3.80
##  5         5  3.08
##  6         6  0.489
##  7         7  2.34
##  8         8  4.10
##  9         9 -1.86
## 10        10 -0.210
## # ... with 90 more rows

One numerical variable one categorical (2 levels) (diff in medians)

mtcars %>%
specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "diff in medians", order = c("0", "1"))
## Response: mpg (numeric)
## Explanatory: am (factor)
## Null Hypothesis:  independence
## # A tibble: 100 x 2
##    replicate   stat
##        <int>  <dbl>
##  1         1  0.500
##  2         2 -1.10
##  3         3  5.20
##  4         4  1.80
##  5         5  0.500
##  6         6  3.30
##  7         7 -1.60
##  8         8 -2.30
##  9         9  2.90
## 10        10 -0.500
## # ... with 90 more rows

One numerical one categorical (>2 levels) - ANOVA

mtcars %>%
specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "F")
## Response: mpg (numeric)
## Explanatory: cyl (factor)
## Null Hypothesis:  independence
## # A tibble: 100 x 2
##    replicate  stat
##        <int> <dbl>
##  1         1 1.43
##  2         2 1.65
##  3         3 0.318
##  4         4 0.393
##  5         5 1.05
##  6         6 0.826
##  7         7 1.32
##  8         8 0.833
##  9         9 0.144
## 10        10 0.365
## # ... with 90 more rows

Two numerical vars - SLR

mtcars %>%
specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl
hypothesize(null = "independence") %>%
generate(reps = 100, type = "permute") %>%
calculate(stat = "slope")
## Response: mpg (numeric)
## Explanatory: hp (numeric)
## Null Hypothesis:  independence
## # A tibble: 100 x 2
##    replicate     stat
##        <int>    <dbl>
##  1         1 -0.0151
##  2         2  0.00224
##  3         3 -0.0120
##  4         4  0.00292
##  5         5  0.0203
##  6         6 -0.00730
##  7         7 -0.0246
##  8         8  0.00555
##  9         9  0.0109
## 10        10  0.0176
## # ... with 90 more rows

One numerical variable (standard deviation)

Not currently implemented

mtcars %>%
specify(response = mpg) %>% # formula alt: mpg ~ NULL
hypothesize(null = "point", sigma = 5) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "sd")

### Confidence intervals

One numerical (one mean)

mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "mean")
## Response: mpg (numeric)
## # A tibble: 100 x 2
##    replicate  stat
##        <int> <dbl>
##  1         1  19.6
##  2         2  21.8
##  3         3  18.7
##  4         4  19.2
##  5         5  21.6
##  6         6  19.9
##  7         7  20.7
##  8         8  19.3
##  9         9  21.2
## 10        10  21.3
## # ... with 90 more rows

One numerical (one median)

mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "median")
## Response: mpg (numeric)
## # A tibble: 100 x 2
##    replicate  stat
##        <int> <dbl>
##  1         1  19.2
##  2         2  20.1
##  3         3  21.0
##  4         4  17.8
##  5         5  20.1
##  6         6  19.2
##  7         7  18.4
##  8         8  19.2
##  9         9  19.2
## 10        10  18.0
## # ... with 90 more rows

One numerical (standard deviation)

mtcars %>%
specify(response = mpg) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "sd")
## Response: mpg (numeric)
## # A tibble: 100 x 2
##    replicate  stat
##        <int> <dbl>
##  1         1  5.28
##  2         2  6.74
##  3         3  5.29
##  4         4  5.41
##  5         5  5.56
##  6         6  5.65
##  7         7  6.17
##  8         8  6.40
##  9         9  6.31
## 10        10  6.11
## # ... with 90 more rows

One categorical (one proportion)

mtcars %>%
specify(response = am, success = "1") %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "prop")
## Response: am (factor)
## # A tibble: 100 x 2
##    replicate  stat
##        <int> <dbl>
##  1         1 0.375
##  2         2 0.406
##  3         3 0.406
##  4         4 0.312
##  5         5 0.312
##  6         6 0.469
##  7         7 0.438
##  8         8 0.281
##  9         9 0.438
## 10        10 0.500
## # ... with 90 more rows

One numerical variable one categorical (2 levels) (diff in means)

mtcars %>%
specify(mpg ~ am) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "diff in means", order = c("0", "1"))
## Response: mpg (numeric)
## Explanatory: am (factor)
## # A tibble: 100 x 2
##    replicate   stat
##        <int>  <dbl>
##  1         1 - 9.38
##  2         2 - 5.11
##  3         3 - 4.88
##  4         4 - 5.39
##  5         5 - 9.19
##  6         6 - 7.20
##  7         7 - 5.34
##  8         8 - 3.20
##  9         9 - 5.95
## 10        10 -11.0
## # ... with 90 more rows

Two categorical variables (diff in proportions)

mtcars %>%
specify(am ~ vs, success = "1") %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "diff in props", order = c("0", "1"))
## Response: am (factor)
## Explanatory: vs (factor)
## # A tibble: 100 x 2
##    replicate   stat
##        <int>  <dbl>
##  1         1 -0.352
##  2         2 -0.150
##  3         3 -0.294
##  4         4 -0.254
##  5         5 -0.438
##  6         6 -0.126
##  7         7 -0.188
##  8         8  0.167
##  9         9 -0.143
## 10        10 -0.500
## # ... with 90 more rows

Two numerical vars - SLR

mtcars %>%
specify(mpg ~ hp) %>%
generate(reps = 100, type = "bootstrap") %>%
calculate(stat = "slope")
## Response: mpg (numeric)
## Explanatory: hp (numeric)
## # A tibble: 100 x 2
##    replicate    stat
##        <int>   <dbl>
##  1         1 -0.0850
##  2         2 -0.0512
##  3         3 -0.0736
##  4         4 -0.0569
##  5         5 -0.0930
##  6         6 -0.0659
##  7         7 -0.0710
##  8         8 -0.0767
##  9         9 -0.0556
## 10        10 -0.0627
## # ... with 90 more rows