---
title: "Hypothesis Testing Demo"
output:
html_notebook:
toc: yes
editor_options:
chunk_output_type: inline
---
Load tidyverse package and set plot theme
```{r, message = F}
library(tidyverse)
theme_set(theme_classic(base_size = 16))
```
Let's take a quick look at the empirical data
```{r empirical}
meeting_admitted <- 388
meeting_died <- 66
meeting_mortality <- meeting_died / meeting_admitted
nonmeeting_admitted <- 2154
nonmeeting_died <- 535
nonmeeting_mortality <- nonmeeting_died / nonmeeting_admitted
mortality_diff <- meeting_mortality - nonmeeting_mortality
# Make a dataframe to show the results
data_frame(type = c("Nonmeeting", "Meeting", "Difference"),
mortality = c(nonmeeting_mortality, meeting_mortality,
mortality_diff))
```
Let's simulate it
```{r sample_func}
day_diff <- function() {
# Make an array with the right number of patients
patients <- c(rep("Meeting", meeting_admitted),
rep("NonMeeting", nonmeeting_admitted))
# randomly select the total who died from the array
died <- sample(patients, meeting_died + nonmeeting_died)
# Compute the difference in mortality between Meeting and NonMeeting Days
died_meeting <- sum(died == "Meeting") / meeting_admitted
died_nonmeeting <- sum(died == "NonMeeting") / nonmeeting_admitted
return(died_meeting - died_nonmeeting)
}
# One function call
day_diff()
```
Now lets draw samples and pot
```{r thousand_samples, fig.height=4, fig.width = 6}
thousand_diffs <- data_frame(diff = replicate(1000, day_diff()))
ggplot(thousand_diffs, aes(x = diff)) +
geom_histogram(bins = 25) +
geom_vline(xintercept = mortality_diff, color = "darkred", size = 2)
```
Sample and plot along with our original result
```{r empirical_data, fig.height=4, fig.width = 6}
thousand_diffs <- data_frame(diff = replicate(1000, day_diff()))
percentile <- .025
# Find top and bottom 2.5% percents
lower_percentile <- quantile(thousand_diffs$diff, percentile)
upper_percentile <- quantile(thousand_diffs$diff, 1-percentile)
#add a vertical line to the random samples to show the empirical data
ggplot(thousand_diffs, aes(x = diff)) +
geom_histogram(bins = 25) +
geom_vline(xintercept = mortality_diff, color = "darkred", size = 2) +
geom_vline(xintercept = lower_percentile, color = "blue", size = 2) +
geom_vline(xintercept = upper_percentile, color = "blue", size = 2)
```
Where does our difference lie?
```{r}
# Compute the proportion of samples that the actual difference was greater than
mean(mortality_diff >= thousand_diffs$diff)
```
What about nonteaching hospitals?
```{r non_teaching_empirical}
meeting_admitted <- 3709
meeting_died <- 901
meeting_mortality <- meeting_died / meeting_admitted
nonmeeting_admitted <- 22054
nonmeeting_died <- 5432
nonmeeting_mortality <- nonmeeting_died / nonmeeting_admitted
mortality_diff <- meeting_mortality - nonmeeting_mortality
# Make a dataframe to show the results
data_frame(type = c("Nonmeeting", "Meeting", "Difference"),
mortality = c(nonmeeting_mortality, meeting_mortality,
mortality_diff))
```
Sample and plot along with our original result
```{r nonteaching_empirical_data}
thousand_diffs <- data_frame(diff = replicate(1000, day_diff()))
# Find top and bottom 2.5% percents
lower_percentile <- quantile(thousand_diffs$diff, percentile)
upper_percentile <- quantile(thousand_diffs$diff, 1-percentile)
#add a vertical line to the random samples to show the empirical data
ggplot(thousand_diffs, aes(x = diff)) +
geom_histogram(bins = 25) +
geom_vline(xintercept = mortality_diff, color = "darkred", size = 2) +
geom_vline(xintercept = lower_percentile, color = "blue", size = 2) +
geom_vline(xintercept = upper_percentile, color = "blue", size = 2)
```