library(tidyverse)
library(gridExtra)
  1. Using a for loop, write a function to calculate the number of zeroes in a numeric vector. Before entering the loop, set up a counter variable counter <- 0. Inside the loop, add 1 to counter each time you have a zero in the vector. Finally, use return(counter) for the output.
#############################################################
# FUNCTION: zeroes_counter
# Description: Counts the number of zeroes in a numeric vector
# Input(s): 'vec' (a vector of numerics)
# Output(s): 'counter' (the count of zeros in the vector)
# ------------------------------------------------------

zeroes_counter <- function(vec = NULL){
# function body
  counter <- 0 # tracks the number of 0s
  
  if (is.null(vec)){ # return a count of 0 if no input vector
    counter <- 0
  } else
  
  for (i in seq_along(vec)){
    if(vec[i] != 0) next # only increment 'counter' if 'vec[i]' is a 0
    counter = counter+1
  }
 
  return(counter)

} # end of zeroes_counter
#############################################################

# Utilizing the function ------------------------------------------------------


vec1 <- sample(c(1:100, rep(0, sample(1:50, 1)))) # create a shuffled vector of numbers including random number of 0's.
cat("Here's my vector: ", vec1, "\n",
    "The number of zeroes in my vector is: ",zeroes_counter(vec = vec1),"\n")
## Here's my vector:  2 99 14 63 0 1 44 37 95 94 48 22 82 86 16 8 90 24 3 0 85 83 0 0 93 100 42 20 36 38 17 27 73 97 58 87 77 4 12 55 80 28 0 31 34 11 15 40 0 67 76 0 33 30 60 0 51 39 70 59 52 79 19 62 35 84 45 43 54 18 64 41 71 13 88 6 10 46 89 50 66 0 32 72 57 25 0 92 78 74 0 96 56 26 65 53 0 69 29 9 75 0 91 21 47 0 5 49 98 61 7 23 81 0 68 
##  The number of zeroes in my vector is:  15
  1. Use subsetting instead of a loop to rewrite the function as a single line of code.
#############################################################
# FUNCTION: zeroes_counter2
# Description: Counts the number of zeroes in a numeric vector
# Input(s): 'vec' (a vector of numerics)
# Output(s): 'counter' (the count of zeros in the vector)
# ------------------------------------------------------

zeroes_counter2 <- function(vec = NULL){
# function body
  counter <- 0
  
  if (is.null(vec)){ # return a count of 0 if no input vector
    counter <- 0
  } else
  
  counter <- length(vec[vec == 0])
  return(counter)

} # end of zeroes_counter
#############################################################

# Utilizing the function ------------------------------------------------------

vec1 <- sample(c(1:100, rep(0, sample(1:50, 1)))) # create a shuffled vector of numbers including random number of 0's.
cat("Here's my vector: ", vec1, "\n",
    "The number of zeroes in my vector is: ",zeroes_counter2(vec = vec1),"\n")
## Here's my vector:  37 0 34 56 79 15 0 87 60 0 9 53 0 62 2 8 0 0 51 64 28 0 40 100 0 91 10 97 65 99 0 12 0 38 76 29 0 0 0 73 0 0 30 7 0 31 69 72 32 4 27 0 49 98 11 21 47 67 0 33 0 0 58 0 5 0 0 41 3 59 89 0 86 0 75 77 84 0 0 13 83 0 23 0 0 81 14 44 0 0 0 0 18 57 0 95 19 36 26 96 0 20 61 78 0 55 0 0 17 82 16 70 90 0 63 66 0 52 48 46 92 0 1 0 71 88 6 93 0 0 25 39 43 85 54 0 45 22 74 0 50 0 42 80 68 35 0 0 94 24 
##  The number of zeroes in my vector is:  50
  1. Write a function that takes as input two integers representing the number of rows and columns in a matrix. The output is a matrix of these dimensions in which each element is the product of the row number x the column number.
#############################################################
# FUNCTION: make_matrix
# Description:  create a matrix with specified number of rows and columns in which each element is the product of the row number x the column number.
# Input(s):  'rows' (no. of rows in the matrix)
#            'cols' (no. of columns in the matrix0
# output(s):  a matrix 'm'
# ------------------------------------------------------

make_matrix <- function(rows = NULL, cols = NULL){
  
   m <- NULL # initialize matrix 'm'
   
  if (is.null(rows)|is.null(cols)){ # Ensure the function only runs when rows and columns number are provided
   stop("Function Failure: Pls insert integer values for input parameters 'rows' (number of rows) and 'cols'(number of columns)")
  } else
  
  for (i in seq_len(rows)){
        mr <- matrix(i * seq_len(cols), ncol = cols) # create a 1 x (cols) matrix with the values being the current row number times the column numbers
    m <- rbind(m,mr) # adds the matrix 'mr' to the final matrix 'm'
    }
    
return(m)

} # end of make_matrix

#############################################################

# Utilizing the make_matrix() function ------------------------------------------------------

make_matrix(rows = 5, cols = 7)
##      [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,]    1    2    3    4    5    6    7
## [2,]    2    4    6    8   10   12   14
## [3,]    3    6    9   12   15   18   21
## [4,]    4    8   12   16   20   24   28
## [5,]    5   10   15   20   25   30   35
  1. Now let’s practice calling custom functions within a for loops. Use the code from previous lectures on loops and functions to complete the following steps:

    1. Simulate a dataset with 3 groups of data, each group drawn from a distribution with a different mean. The final data frame should have 1 column for group and 1 column for the response variable.
#############################################################
# FUNCTION: sim_df
# Description: create a dataframe with a simulated dataset of 3 groups of data
#              each group drawn from a normal distribution with a different mean
# Input(s):  'means' ( a atomic vector containing means for the 3 groups)
# output(s):  a dataframe 'df'
# ------------------------------------------------------

sim_df <- function(means = c(5, 16, 30)){
   if (is.null(means)){ # Ensure the function only runs when an input is given
   stop("Function Failure: Insert 'means' (an atomic vector containing means for the 3 groups")
  } else
  
  Group <- NULL # create an empty vector 'Group'
  Response <- NULL # create an empty vector 'Response'
  n <- 50
  set.seed(89)
  for(i in seq_along(means)){
    Group <- c(Group, rep(i, n)) # add the group number to the 'Group' vector
    Response <- c(Response, rnorm(n, mean = means[i])) # add the current 'Response' values to the 'Response' vector
  }
  
df <- data.frame(Group, Response) # join 'Group' and 'Response' vectors into a dataframe

return(df)

} # end of sim_df

#############################################################

# Utilizing the sim_df() function ------------------------------------------------------

a <- sim_df(c(18, 5, 29))
print(a)
##     Group  Response
## 1       1 16.548398
## 2       1 18.708100
## 3       1 19.593097
## 4       1 17.162404
## 5       1 19.929470
## 6       1 16.768930
## 7       1 18.597780
## 8       1 15.723113
## 9       1 17.295223
## 10      1 16.737407
## 11      1 18.096423
## 12      1 17.038105
## 13      1 17.684238
## 14      1 17.990089
## 15      1 18.884846
## 16      1 18.705247
## 17      1 18.487294
## 18      1 18.542293
## 19      1 18.742681
## 20      1 15.638443
## 21      1 19.377239
## 22      1 17.397413
## 23      1 18.656548
## 24      1 17.842920
## 25      1 18.791356
## 26      1 16.934100
## 27      1 15.981798
## 28      1 18.689317
## 29      1 19.571529
## 30      1 16.916901
## 31      1 16.141745
## 32      1 16.087518
## 33      1 18.301266
## 34      1 17.906916
## 35      1 17.142216
## 36      1 17.499024
## 37      1 18.104672
## 38      1 18.157348
## 39      1 17.162672
## 40      1 15.974028
## 41      1 17.135673
## 42      1 16.832684
## 43      1 17.831593
## 44      1 18.952340
## 45      1 17.706156
## 46      1 19.556656
## 47      1 18.094615
## 48      1 16.414982
## 49      1 18.112504
## 50      1 19.078087
## 51      2  4.554309
## 52      2  5.487409
## 53      2  3.947329
## 54      2  6.056855
## 55      2  3.745757
## 56      2  5.960639
## 57      2  3.945866
## 58      2  4.720163
## 59      2  6.263240
## 60      2  6.121210
## 61      2  4.073151
## 62      2  5.769954
## 63      2  5.290495
## 64      2  5.101467
## 65      2  6.194221
## 66      2  3.142266
## 67      2  3.872532
## 68      2  5.054438
## 69      2  5.747795
## 70      2  5.803128
## 71      2  5.268189
## 72      2  3.308383
## 73      2  4.102731
## 74      2  4.264430
## 75      2  5.972637
## 76      2  2.865160
## 77      2  4.895946
## 78      2  5.814757
## 79      2  4.456358
## 80      2  4.596987
## 81      2  3.463396
## 82      2  4.225546
## 83      2  4.197835
## 84      2  2.675862
## 85      2  6.211855
## 86      2  4.673535
## 87      2  6.239167
## 88      2  4.933422
## 89      2  6.139455
## 90      2  3.990342
## 91      2  5.832657
## 92      2  3.978332
## 93      2  5.680469
## 94      2  5.492903
## 95      2  5.540434
## 96      2  4.033185
## 97      2  4.166288
## 98      2  4.799996
## 99      2  6.218252
## 100     2  3.498044
## 101     3 29.656750
## 102     3 28.229897
## 103     3 29.489077
## 104     3 29.394825
## 105     3 29.035247
## 106     3 30.830415
## 107     3 28.159403
## 108     3 30.328067
## 109     3 27.474809
## 110     3 28.534991
## 111     3 28.267270
## 112     3 27.733672
## 113     3 29.766258
## 114     3 28.733967
## 115     3 30.380645
## 116     3 28.377274
## 117     3 27.983423
## 118     3 31.374048
## 119     3 29.237709
## 120     3 30.023559
## 121     3 29.427148
## 122     3 28.403185
## 123     3 29.184451
## 124     3 28.690602
## 125     3 28.237707
## 126     3 29.220364
## 127     3 27.784915
## 128     3 29.996499
## 129     3 28.823589
## 130     3 29.260799
## 131     3 29.094511
## 132     3 28.905088
## 133     3 27.850449
## 134     3 28.758538
## 135     3 27.467321
## 136     3 29.528008
## 137     3 28.210028
## 138     3 30.788339
## 139     3 28.244699
## 140     3 29.725614
## 141     3 30.921836
## 142     3 29.325745
## 143     3 30.083602
## 144     3 29.093262
## 145     3 28.215320
## 146     3 28.205215
## 147     3 27.354002
## 148     3 30.189446
## 149     3 27.463997
## 150     3 30.229392
  1. Write a custom function that 1) reshuffles the response variable, and 2) calculates the mean of each group in the reshuffled data. Store the means in a vector of length 3.
#############################################################
# FUNCTION: shuffle_means
# Description:  a custom function that 1) reshuffles the response variable, and 2) calculates the mean of each group in the reshuffled data. Store the means in a vector of length 3
# Input(s):  dataframe 'df'
# Output(s):  vector 'v'
# ------------------------------------------------------

shuffle_means <- function(df = NULL){

   if (is.null(df)){ # Ensure the function only runs when an input is given
   stop("Function Failure: Insert 'df' a dataframe")
  } else
  
  df[2] <- sample(df$Response) # shuffle 'Response' variable while holding 'Group' variable constant 
  v <-  df %>% group_by(Group) %>% 
               summarize(Mean_of_Group = mean(Response)) # computes the mean of the 'Response' variable for each group 
  
return(v)

} # end of shuffle_means

#############################################################

# Utilizing shuffle_means() function ------------------------------------------------------

s <- shuffle_means(a)
print(s)
## # A tibble: 3 × 2
##   Group Mean_of_Group
##   <int>         <dbl>
## 1     1          17.4
## 2     2          18.7
## 3     3          15.6
  1. Use a for loop to repeat the function in b 100 times. Store the results in a data frame that has 1 column indicating the replicate number and 1 column for each new group mean, for a total of 4 columns.
#Replicate_Number <- NULL
t <- data.frame(NULL)


for (i in seq_len(100)){
  
  reShape <- spread(as.data.frame(shuffle_means(a)), key = Group, value = Mean_of_Group) # convert the 'shuffle_means(a)' from long to wide format. 
  # Note: 'shuffle_means(a) was transform to a dataframe as it was a tibble and spread() only takes in dataframe objects.
  d <- data.frame(i, reShape)
  t <- rbind(t, d) # add the dataframe from each iteration to the output dataframe 't'
}

names(t) <- c("Replicate Number", "Group 1 Means", "Group 2 Means", "Group 3 Means")
print(t)
##     Replicate Number Group 1 Means Group 2 Means Group 3 Means
## 1                  1      16.82922      17.34623      17.49073
## 2                  2      17.57996      17.65011      16.43611
## 3                  3      19.47793      16.73275      15.45550
## 4                  4      18.33428      17.99471      15.33720
## 5                  5      15.98905      19.20649      16.47064
## 6                  6      18.10046      17.49473      16.07100
## 7                  7      15.14998      19.05654      17.45966
## 8                  8      17.37915      16.44665      17.84039
## 9                  9      18.29547      15.88396      17.48675
## 10                10      17.13135      16.75561      17.77922
## 11                11      17.90462      16.87722      16.88434
## 12                12      15.67611      18.02061      17.96946
## 13                13      17.65714      15.85050      18.15854
## 14                14      17.51080      18.31348      15.84191
## 15                15      18.92969      18.33364      14.40285
## 16                16      15.70352      18.78754      17.17512
## 17                17      17.54933      15.57594      18.54092
## 18                18      17.91360      17.17919      16.57339
## 19                19      16.22673      16.60600      18.83346
## 20                20      15.42462      17.64208      18.59948
## 21                21      15.95759      18.13796      17.57064
## 22                22      18.41319      17.25476      15.99824
## 23                23      16.81959      17.88178      16.96481
## 24                24      14.50942      20.07825      17.07851
## 25                25      17.54764      16.90142      17.21712
## 26                26      16.14028      18.31881      17.20709
## 27                27      16.62024      16.35036      18.69558
## 28                28      18.01546      16.32233      17.32838
## 29                29      16.74602      17.26371      17.65646
## 30                30      17.82266      17.53433      16.30919
## 31                31      15.96368      17.15588      18.54662
## 32                32      14.66397      18.62662      18.37559
## 33                33      16.71850      17.83258      17.11511
## 34                34      17.29853      16.83185      17.53580
## 35                35      18.37965      16.52597      16.76057
## 36                36      17.70144      17.62905      16.33570
## 37                37      17.93472      16.06415      17.66732
## 38                38      16.61194      17.09300      17.96124
## 39                39      15.13898      17.75234      18.77486
## 40                40      17.10110      17.03922      17.52586
## 41                41      15.60586      18.51818      17.54215
## 42                42      15.77381      20.39707      15.49530
## 43                43      18.04231      18.00210      15.62177
## 44                44      15.27914      17.67138      18.71566
## 45                45      16.93652      18.21641      16.51326
## 46                46      16.62316      18.49330      16.54973
## 47                47      16.73198      17.33936      17.59485
## 48                48      18.14591      16.18652      17.33376
## 49                49      16.16388      17.38073      18.12158
## 50                50      16.33262      17.66083      17.67273
## 51                51      16.40940      17.39862      17.85816
## 52                52      18.09628      17.00103      16.56887
## 53                53      18.19471      17.28514      16.18634
## 54                54      18.59270      16.97469      16.09879
## 55                55      17.72383      18.67972      15.26264
## 56                56      17.29035      17.19541      17.18042
## 57                57      17.26405      17.15887      17.24326
## 58                58      17.90195      15.89064      17.87359
## 59                59      16.52537      18.16555      16.97526
## 60                60      18.58850      16.30322      16.77445
## 61                61      17.27982      14.83004      19.55632
## 62                62      18.08558      16.37528      17.20532
## 63                63      18.11402      17.06062      16.49154
## 64                64      15.41618      19.03415      17.21585
## 65                65      16.45329      18.78665      16.42624
## 66                66      17.03902      16.30363      18.32354
## 67                67      16.17043      17.69087      17.80488
## 68                68      18.26730      15.54724      17.85164
## 69                69      16.81042      18.11251      16.74326
## 70                70      16.15940      17.17422      18.33256
## 71                71      16.92476      18.80001      15.94141
## 72                72      15.16704      18.54464      17.95450
## 73                73      17.40638      16.01181      18.24799
## 74                74      18.59511      17.24559      15.82548
## 75                75      15.72834      17.40836      18.52948
## 76                76      18.51694      17.40031      15.74893
## 77                77      17.24826      20.07441      14.34352
## 78                78      18.48277      15.82017      17.36324
## 79                79      21.05735      15.80914      14.79969
## 80                80      16.51416      17.14376      18.00826
## 81                81      17.16023      17.17458      17.33138
## 82                82      17.32410      17.42259      16.91949
## 83                83      16.69727      18.04490      16.92402
## 84                84      18.08714      15.91108      17.66796
## 85                85      16.91534      17.37168      17.37916
## 86                86      16.19650      18.05679      17.41290
## 87                87      16.45926      16.49363      18.71329
## 88                88      16.92947      18.93566      15.80105
## 89                89      19.66029      15.66769      16.33820
## 90                90      15.63450      18.04582      17.98587
## 91                91      17.13650      18.23403      16.29566
## 92                92      17.14903      17.25397      17.26318
## 93                93      19.24961      15.63211      16.78447
## 94                94      17.79812      16.39395      17.47411
## 95                95      16.80121      16.94531      17.91966
## 96                96      17.16149      17.34160      17.16310
## 97                97      15.28956      16.19269      20.18393
## 98                98      17.05102      17.73406      16.88110
## 99                99      16.43399      18.18979      17.04241
## 100              100      16.80349      16.80711      18.05559
  1. Use qplot() to create a histogram of the means for each reshuffled group. Or, if you want a challenge, use ggplot() to overlay all 3 histograms in the same figure. How do the distributions of reshuffled means compare to the original means?
# Reshuffled means plots ------------------------------------------------------

y <- sim_df(c(18, 5, 30)) # create a simulated dataframe
y[2] <- sample(y$Response) # shuffle the Response variable


plot_1 <- ggplot(data = y%>%filter(Group == 1),aes(x = Response)) + # plot group 1
               geom_histogram(binwidth = 0.5,fill = "royalblue", color = "black") +
               labs(title = "Shuffled Group 1 ", x = "Groups", y = "Means of the Groups")+
               theme(plot.title = element_text(hjust=0.5))
               
plot_2 <- ggplot(data = y%>%filter(Group == 2),aes(x = Response)) + # plot group 2
               geom_histogram(binwidth = 0.5,fill = "hotpink", color = "black") +
               labs(title = "Shuffled Group 2 ", x = "Groups", y = "Means of the Groups") +
               theme(plot.title = element_text(hjust=0.5))

plot_3 <- ggplot(data = y%>%filter(Group == 3),aes(x = Response)) + # plot group 3
               geom_histogram(binwidth = 0.5,fill =  "orange", color = "black") +
               labs(title = "Shuffled Group 3 ", x = "Groups", y = "Means of the Groups") +
               theme(plot.title = element_text(hjust=0.5))


plotList <-list(plot_1, plot_2, plot_3) # compress all the plots into a list
grid.arrange(grobs = plotList) # plot all 3 plots on a grid

# Original means plot (unshuffled) ------------------------------------------------------

f <- sim_df(c(18, 5, 30))

plot_4 <- ggplot(data = f%>%filter(Group == 1),aes(x = Response)) + # plot group 1
               geom_histogram(binwidth = 0.5,fill = "royalblue", color = "black") +
               labs(title = "Original Group 1 ", x = "Groups", y = "Means of the Groups")+
               theme(plot.title = element_text(hjust=0.5))
               
plot_5 <- ggplot(data = f%>%filter(Group == 2),aes(x = Response)) + # plot group 2
               geom_histogram(binwidth = 0.5,fill = "hotpink", color = "black") +
               labs(title = "Original Group 2 ", x = "Groups", y = "Means of the Groups") +
               theme(plot.title = element_text(hjust=0.5))

plot_6 <- ggplot(data = f%>%filter(Group == 3),aes(x = Response)) + # plot group 3
               geom_histogram(binwidth = 0.5,fill =  "orange", color = "black") +
               labs(title = "Original Group 3 ", x = "Groups", y = "Means of the Groups") +
               theme(plot.title = element_text(hjust=0.5))


plotList2 <-list(plot_4, plot_5, plot_6) # compress all the plots into a list
grid.arrange(grobs = plotList2) # plot all 3 plots on a grid