--- title: "Lab2CodeKey" author: "Kenya Amano" date: "10/9/2020" output: pdf_document editor_options: chunk_output_type: console --- {r setup, include=FALSE} knitr::opts_chunk\$set(echo = TRUE)  ## Prerequisite {r, warning=F, message=FALSE} # Good practice to remove all objects from the workspace rm(list = ls()) # Use library() for packages you need, or source() for other R files. library(tidyverse) # Setting the seed ensures that we get the same random draw over and over again. set.seed(20201009) rnorm(5) # Check  ## 0. Calculate the following operations by hand (... meaning by R) a) $$\sum\limits_{i=1}^{5} i =$$ b) $$\prod\limits_{i=1}^{5} i =$$ c) $$5! \times 10^{3!} \times e^{4} =$$ {r} # a) sum(1:5) # b) prod(1:5) #Check 1*2*3*4*5 # c) factorial(5) 5*4*3*2*1 factorial(5) * 10^factorial(3) * exp(4)  ## 1. Build a Bernoulli distribution using the sample() function, where the probability of "success" is 0.7. Run "?sample" if you are unsure how the function works. {r} # Create an imaginary person to flip the coin once for you sample(x = c(0, 1), size = 1, prob = c(0.3, 0.7) )  ## 2. How do you know if it is working properly? Conduct simulation to check if the assigned probabilities are matached with the empirics {r} # Specify the number of simulations sims <- 10000 # Specify the probability ProbSuccess <- 0.7 # Create an empty vector as "container" BernResult <- vector(mode = "numeric", length = sims) # For loop for (i in 1:sims) { BernResult[i] <- sample(c(0, 1), size = 1, prob = c(1- ProbSuccess, ProbSuccess) ) } mean(BernResult)  {r, results=FALSE} ## Faster way w/o loop...but we want to build the intuition sample(c(0, 1), size = sims, replace = TRUE, prob = c(0.3, 0.7) ) ## Or use rbinom (rmb: bernoulli is just a special case of binomial when N = 1) rbinom(sims, size = 1, prob = 0.7)  ## 3. Plot the above Bernoulli distribution {r, warning=F, message=F} # Base graphics hist(BernResult) # ggplot2 BernDF <- tibble(Outcome = BernResult) ggplot(BernDF, aes(x = Outcome)) + geom_histogram() + scale_x_continuous(breaks = c(0, 1), expand = c(1, 0)) + labs(y = "Frequency", x = "x", subtitle = "Bernoulli Distribution: Frequency")+ theme_bw() # Adding: aes(y = stat(count / sum(count))) in geom_hist ; What does it do? ggplot(BernDF, aes(x = Outcome)) + geom_histogram(aes(y = stat(count/sum(count)))) + scale_x_continuous(breaks = c(0, 1), expand = c(1, 0)) + labs(y = "Prob", x = "x", subtitle = "Bernoulli Distribution: Probability")+ theme_bw() # y = stat() means... BernResult %>% as_tibble %>% count(value) %>% mutate(TotalNum = sum(n), Prob = n/TotalNum)  ## 4. Based on the above, generate a binomial distribution, with number of trials equal to 10, without using rbinom() {r} # Create an imaginary person to flip the coin ten times for you # Let's test it outside of the loop: sample(c(0, 1), size = 10, replace = TRUE, prob = c(1- ProbSuccess, ProbSuccess) ) # Create number of simulations and an empty vector as container BinoResult <- vector(mode = "numeric", length = sims) for (i in 1:sims) { # Create an imaginary person to flip the coin ten times for you flips <- sample(c(0, 1), size = 10, replace = TRUE, prob = c(1- ProbSuccess, ProbSuccess) ) # Sum up the number of "success" for that person count <- sum(flips) # Store it into the container; repeat 10,000 BinoResult[i] <- count } # Faster way w/o loop and sample()...but we want to build the intuition # rbinom(n = sims, # size = 10, # prob = ProbSuccess)  ## 5. Plot the above binomial distribution {r} # Base graphics hist(BinoResult) # ggplot2 BinoResult %>% as_tibble %>% # Convert to data frame ggplot(aes(x = value)) + geom_histogram(aes(y = stat(count/sum(count)))) + scale_x_continuous(breaks = 1:10) + labs(y = "Prob", x = "x", subtitle = "Binomial Distribution")+ theme_bw()  {r, include=F} rbinom(n = sims, size = 10, prob = ProbSuccess) %>% as_tibble %>% # Convert to data frame ggplot(aes(x = value)) + geom_histogram(aes(y = stat(count/sum(count)))) + scale_x_continuous(breaks = 1:10) + labs(y = "Prob", x = "x", subtitle = "Binomial Distribution")+ theme_bw()  ## 6. Explore the rbinom, dbinom, pbinom functions. What do they do? Answer the following questions: a) The probability of a coin landing on head is 0.7. If you were to flip the coin 10 times, what is the probability of getting exactly 7 heads? b) What is the probability of getting 7 heads or less? c) How do you know (b) is true? {r} # a) Pr(exactly 7 heads) -> PDF dbinom(x = 7, size = 10, prob = 0.7) # b) Pr(7 heads or less) -> CDF pbinom(q = 7, size = 10, prob = 0.7) pbinom(q = 7, size = 10, prob = 0.7, lower.tail = FALSE) 1- pbinom(q = 7, size = 10, prob = 0.7) # c) Double check dbinom(x = c(0:7), size = 10, prob = 0.7) %>% sum()  # How to plot? {r} # PDF # Create x axis in data.frame tibble(x = seq(from = 0, to = 10, by =1)) %>% # Create y axis mutate(y = dbinom(x = x, size = 10, prob = 0.7)) %>% # Plot, map data in aesthetic ggplot(aes(x=x,y=y))+ # Specify the type of plot geom_line()+ # Specify x axis breaks #scale_x_continuous(breaks = 0:10)+ scale_x_continuous(breaks = 0:10)+ # Add vertical line at x = 0.7 geom_vline(xintercept = 7, color ="red", linetype = "dashed")+ # Change label on y axis labs(y="Prob", subtitle = "PDF")+ # (Optional: specify theme ) theme_bw() # CDF # Create x axis in data.frame tibble(x = seq(from = 0, to = 10, by =1)) %>% # Create y axis mutate(y = pbinom(q = x, size =10, prob=.7)) %>% # Plot, map data in aesthetic ggplot(aes(x=x,y=y))+ # Specify the type of plot geom_line()+ # Specify x axis breaks scale_x_continuous(breaks = 0:10)+ # Add vertical line at x = 0.7 geom_vline(xintercept = 7, color ="red", linetype = "dashed")+ # Change label on y axis labs(y="Prob", subtitle = "CDF")+ # (Optional: specify theme ) theme_bw()