Examples using mtcars data

Chester Ismay and Andrew bray

2018-01-05

Data preparation

library(infer)
library(dplyr)
mtcars <- as.data.frame(mtcars) %>%
  mutate(cyl = factor(cyl),
         vs = factor(vs),
         am = factor(am),
         gear = factor(gear),
         carb = factor(carb))
# For reproducibility         
set.seed(2018)         

One numerical variable (mean)

mtcars %>%
  specify(response = mpg) %>% # formula alt: mpg ~ NULL
  hypothesize(null = "point", mu = 25) %>% 
  generate(reps = 100, type = "bootstrap") %>% 
  calculate(stat = "mean")
## # A tibble: 100 x 2
##    replicate  stat
##        <int> <dbl>
##  1         1  26.6
##  2         2  25.1
##  3         3  25.2
##  4         4  24.7
##  5         5  24.6
##  6         6  25.8
##  7         7  24.7
##  8         8  25.6
##  9         9  25.0
## 10        10  25.1
## # ... with 90 more rows

One numerical variable (median)

mtcars %>%
  specify(response = mpg) %>% # formula alt: mpg ~ NULL
  hypothesize(null = "point", med = 26) %>% 
  generate(reps = 100, type = "bootstrap") %>% 
  calculate(stat = "median")
## # A tibble: 100 x 2
##    replicate  stat
##        <int> <dbl>
##  1         1  28.2
##  2         2  27.2
##  3         3  26.2
##  4         4  26.0
##  5         5  26.5
##  6         6  24.5
##  7         7  26.0
##  8         8  28.2
##  9         9  28.2
## 10        10  23.2
## # ... with 90 more rows

One numerical variable (standard deviation)

mtcars %>%
  specify(response = mpg) %>% # formula alt: mpg ~ NULL
  hypothesize(null = "point", sigma = 5) %>% 
  generate(reps = 100, type = "bootstrap") %>% 
  calculate(stat = "sd")
## # A tibble: 100 x 2
##    replicate  stat
##        <int> <dbl>
##  1         1  5.88
##  2         2  6.27
##  3         3  5.08
##  4         4  6.91
##  5         5  5.96
##  6         6  6.01
##  7         7  6.75
##  8         8  5.14
##  9         9  6.06
## 10        10  5.63
## # ... with 90 more rows

One categorical (2 level) variable

mtcars %>%
  specify(response = am, success = "1") %>% # formula alt: am ~ NULL
  hypothesize(null = "point", p = .25) %>% 
  generate(reps = 100, type = "simulate") %>% 
  calculate(stat = "prop")
## # A tibble: 100 x 2
##    replicate  stat
##    <fct>     <dbl>
##  1 1         0.281
##  2 2         0.281
##  3 3         0.281
##  4 4         0.219
##  5 5         0.250
##  6 6         0.250
##  7 7         0.281
##  8 8         0.250
##  9 9         0.188
## 10 10        0.344
## # ... with 90 more rows

Two categorical (2 level) variables

mtcars %>%
  specify(am ~ vs, success = "1") %>% # alt: response = am, explanatory = vs
  hypothesize(null = "independence") %>%
  generate(reps = 100, type = "permute") %>%
  calculate(stat = "diff in props", order = c("0", "1"))
## # A tibble: 100 x 2
##    replicate    stat
##        <int>   <dbl>
##  1         1 -0.294 
##  2         2 -0.167 
##  3         3 -0.0397
##  4         4  0.214 
##  5         5 -0.167 
##  6         6 -0.167 
##  7         7  0.0873
##  8         8 -0.0397
##  9         9 -0.0397
## 10        10  0.341 
## # ... with 90 more rows

One categorical (>2 level) - GoF

mtcars %>%
  specify(cyl ~ NULL) %>% # alt: response = cyl
  hypothesize(null = "point", p = c("4" = .5, "6" = .25, "8" = .25)) %>%
  generate(reps = 100, type = "simulate") %>%
  calculate(stat = "Chisq")
## # A tibble: 100 x 2
##    replicate  stat
##    <fct>     <dbl>
##  1 1         3.00 
##  2 2         2.25 
##  3 3         3.00 
##  4 4         1.69 
##  5 5         6.75 
##  6 6         0.188
##  7 7         2.25 
##  8 8         1.19 
##  9 9         0.688
## 10 10        3.19 
## # ... with 90 more rows

Two categorical (>2 level) variables

mtcars %>%
  specify(cyl ~ am) %>% # alt: response = cyl, explanatory = am
  hypothesize(null = "independence") %>%
  generate(reps = 100, type = "permute") %>%
  calculate(stat = "Chisq")
## # A tibble: 100 x 2
##    replicate  stat
##    <fct>     <dbl>
##  1 1         3.68 
##  2 2         0.557
##  3 3         5.00 
##  4 4         4.57 
##  5 5         6.48 
##  6 6         0.126
##  7 7         3.71 
##  8 8         6.91 
##  9 9         1.45 
## 10 10        1.01 
## # ... with 90 more rows

One numerical variable one categorical (2 levels) (diff in means)

mtcars %>%
  specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
  hypothesize(null = "independence") %>%
  generate(reps = 100, type = "permute") %>%
  calculate(stat = "diff in means", order = c("0", "1"))
## # A tibble: 100 x 2
##    replicate    stat
##        <int>   <dbl>
##  1         1  0.360 
##  2         2  0.0490
##  3         3  3.38  
##  4         4  1.89  
##  5         5  0.891 
##  6         6  0.904 
##  7         7 -0.327 
##  8         8 -3.44  
##  9         9  0.502 
## 10        10 -1.92  
## # ... with 90 more rows

One numerical variable one categorical (2 levels) (diff in medians)

mtcars %>%
  specify(mpg ~ am) %>% # alt: response = mpg, explanatory = am
  hypothesize(null = "independence") %>%
  generate(reps = 100, type = "permute") %>%
  calculate(stat = "diff in medians", order = c("0", "1"))
## # A tibble: 100 x 2
##    replicate   stat
##        <int>  <dbl>
##  1         1  0.500
##  2         2 -2.70 
##  3         3  2.40 
##  4         4  0    
##  5         5 -5.00 
##  6         6  2.40 
##  7         7  0    
##  8         8  2.90 
##  9         9 -0.500
## 10        10  0.500
## # ... with 90 more rows

One numerical one categorical (>2 levels) - ANOVA

mtcars %>%
  specify(mpg ~ cyl) %>% # alt: response = mpg, explanatory = cyl
  hypothesize(null = "independence") %>%
  generate(reps = 100, type = "permute") %>%
  calculate(stat = "F")
## # A tibble: 100 x 2
##    replicate   stat
##        <int>  <dbl>
##  1         1 2.53  
##  2         2 0.104 
##  3         3 0.238 
##  4         4 1.12  
##  5         5 0.660 
##  6         6 0.173 
##  7         7 0.799 
##  8         8 0.800 
##  9         9 0.255 
## 10        10 0.0198
## # ... with 90 more rows

Two numerical vars - SLR

mtcars %>%
  specify(mpg ~ hp) %>% # alt: response = mpg, explanatory = cyl
  hypothesize(null = "independence") %>%
  generate(reps = 100, type = "permute") %>%
  calculate(stat = "slope")
## # A tibble: 100 x 2
##    replicate     stat
##        <int>    <dbl>
##  1         1  0.0310 
##  2         2  0.00706
##  3         3 -0.0231 
##  4         4 -0.0285 
##  5         5 -0.0124 
##  6         6 -0.00164
##  7         7 -0.00587
##  8         8 -0.00369
##  9         9  0.00522
## 10        10 -0.00866
## # ... with 90 more rows

Confidence intervals

One numerical (one mean)

mtcars %>%
  specify(response = mpg) %>%
  generate(reps = 100, type = "bootstrap") %>%
  calculate(stat = "mean")
## # A tibble: 100 x 2
##    replicate  stat
##        <int> <dbl>
##  1         1  19.3
##  2         2  20.8
##  3         3  22.5
##  4         4  19.3
##  5         5  21.3
##  6         6  20.1
##  7         7  20.7
##  8         8  20.4
##  9         9  19.8
## 10        10  19.8
## # ... with 90 more rows

One numerical (one median)

mtcars %>%
  specify(response = mpg) %>%
  generate(reps = 100, type = "bootstrap") %>%
  calculate(stat = "median")
## # A tibble: 100 x 2
##    replicate  stat
##        <int> <dbl>
##  1         1  19.2
##  2         2  19.2
##  3         3  19.0
##  4         4  18.0
##  5         5  21.2
##  6         6  21.0
##  7         7  17.0
##  8         8  19.4
##  9         9  19.0
## 10        10  21.2
## # ... with 90 more rows

One numerical (standard deviation)

mtcars %>%
  specify(response = mpg) %>%
  generate(reps = 100, type = "bootstrap") %>%
  calculate(stat = "sd")
## # A tibble: 100 x 2
##    replicate  stat
##        <int> <dbl>
##  1         1  5.27
##  2         2  6.29
##  3         3  5.32
##  4         4  4.95
##  5         5  5.57
##  6         6  6.92
##  7         7  6.45
##  8         8  4.85
##  9         9  6.16
## 10        10  6.49
## # ... with 90 more rows

One categorical (one proportion)

mtcars %>%
  specify(response = am, success = "1") %>%
  generate(reps = 100, type = "bootstrap") %>%
  calculate(stat = "prop")
## # A tibble: 100 x 2
##    replicate  stat
##        <int> <dbl>
##  1         1 0.344
##  2         2 0.312
##  3         3 0.344
##  4         4 0.438
##  5         5 0.406
##  6         6 0.438
##  7         7 0.469
##  8         8 0.375
##  9         9 0.344
## 10        10 0.469
## # ... with 90 more rows

One numerical variable one categorical (2 levels) (diff in means)

mtcars %>%
  specify(mpg ~ am) %>%
  generate(reps = 100, type = "bootstrap") %>%
  calculate(stat = "diff in means", order = c("0", "1"))
## # A tibble: 100 x 2
##    replicate  stat
##        <int> <dbl>
##  1         1 -6.90
##  2         2 -5.02
##  3         3 -7.51
##  4         4 -5.71
##  5         5 -8.25
##  6         6 -7.23
##  7         7 -8.45
##  8         8 -3.71
##  9         9 -6.54
## 10        10 -9.37
## # ... with 90 more rows

Two categorical variables (diff in proportions)

mtcars %>%
  specify(am ~ vs, success = "1") %>%
  generate(reps = 100, type = "bootstrap") %>%
  calculate(stat = "diff in props", order = c("0", "1"))
## # A tibble: 100 x 2
##    replicate    stat
##        <int>   <dbl>
##  1         1 -0.263 
##  2         2  0.0167
##  3         3  0.108 
##  4         4 -0.188 
##  5         5 -0.408 
##  6         6  0     
##  7         7 -0.564 
##  8         8 -0.563 
##  9         9 -0.0338
## 10        10  0.133 
## # ... with 90 more rows

Two numerical vars - SLR

mtcars %>%
  specify(mpg ~ hp) %>% 
  generate(reps = 100, type = "bootstrap") %>%
  calculate(stat = "slope")
## # A tibble: 100 x 2
##    replicate    stat
##        <int>   <dbl>
##  1         1 -0.0951
##  2         2 -0.0951
##  3         3 -0.103 
##  4         4 -0.0553
##  5         5 -0.104 
##  6         6 -0.0677
##  7         7 -0.0588
##  8         8 -0.0650
##  9         9 -0.0987
## 10        10 -0.0657
## # ... with 90 more rows