library(tidyverse)
library(gridExtra)
#############################################################
# FUNCTION: zeroes_counter
# Description: Counts the number of zeroes in a numeric vector
# Input(s): 'vec' (a vector of numerics)
# Output(s): 'counter' (the count of zeros in the vector)
# ------------------------------------------------------
zeroes_counter <- function(vec = NULL){
# function body
counter <- 0 # tracks the number of 0s
if (is.null(vec)){ # return a count of 0 if no input vector
counter <- 0
} else
for (i in seq_along(vec)){
if(vec[i] != 0) next # only increment 'counter' if 'vec[i]' is a 0
counter = counter+1
}
return(counter)
} # end of zeroes_counter
#############################################################
# Utilizing the function ------------------------------------------------------
vec1 <- sample(c(1:100, rep(0, sample(1:50, 1)))) # create a shuffled vector of numbers including random number of 0's.
cat("Here's my vector: ", vec1, "\n",
"The number of zeroes in my vector is: ",zeroes_counter(vec = vec1),"\n")
## Here's my vector: 2 99 14 63 0 1 44 37 95 94 48 22 82 86 16 8 90 24 3 0 85 83 0 0 93 100 42 20 36 38 17 27 73 97 58 87 77 4 12 55 80 28 0 31 34 11 15 40 0 67 76 0 33 30 60 0 51 39 70 59 52 79 19 62 35 84 45 43 54 18 64 41 71 13 88 6 10 46 89 50 66 0 32 72 57 25 0 92 78 74 0 96 56 26 65 53 0 69 29 9 75 0 91 21 47 0 5 49 98 61 7 23 81 0 68
## The number of zeroes in my vector is: 15
#############################################################
# FUNCTION: zeroes_counter2
# Description: Counts the number of zeroes in a numeric vector
# Input(s): 'vec' (a vector of numerics)
# Output(s): 'counter' (the count of zeros in the vector)
# ------------------------------------------------------
zeroes_counter2 <- function(vec = NULL){
# function body
counter <- 0
if (is.null(vec)){ # return a count of 0 if no input vector
counter <- 0
} else
counter <- length(vec[vec == 0])
return(counter)
} # end of zeroes_counter
#############################################################
# Utilizing the function ------------------------------------------------------
vec1 <- sample(c(1:100, rep(0, sample(1:50, 1)))) # create a shuffled vector of numbers including random number of 0's.
cat("Here's my vector: ", vec1, "\n",
"The number of zeroes in my vector is: ",zeroes_counter2(vec = vec1),"\n")
## Here's my vector: 37 0 34 56 79 15 0 87 60 0 9 53 0 62 2 8 0 0 51 64 28 0 40 100 0 91 10 97 65 99 0 12 0 38 76 29 0 0 0 73 0 0 30 7 0 31 69 72 32 4 27 0 49 98 11 21 47 67 0 33 0 0 58 0 5 0 0 41 3 59 89 0 86 0 75 77 84 0 0 13 83 0 23 0 0 81 14 44 0 0 0 0 18 57 0 95 19 36 26 96 0 20 61 78 0 55 0 0 17 82 16 70 90 0 63 66 0 52 48 46 92 0 1 0 71 88 6 93 0 0 25 39 43 85 54 0 45 22 74 0 50 0 42 80 68 35 0 0 94 24
## The number of zeroes in my vector is: 50
#############################################################
# FUNCTION: make_matrix
# Description: create a matrix with specified number of rows and columns in which each element is the product of the row number x the column number.
# Input(s): 'rows' (no. of rows in the matrix)
# 'cols' (no. of columns in the matrix0
# output(s): a matrix 'm'
# ------------------------------------------------------
make_matrix <- function(rows = NULL, cols = NULL){
m <- NULL # initialize matrix 'm'
if (is.null(rows)|is.null(cols)){ # Ensure the function only runs when rows and columns number are provided
stop("Function Failure: Pls insert integer values for input parameters 'rows' (number of rows) and 'cols'(number of columns)")
} else
for (i in seq_len(rows)){
mr <- matrix(i * seq_len(cols), ncol = cols) # create a 1 x (cols) matrix with the values being the current row number times the column numbers
m <- rbind(m,mr) # adds the matrix 'mr' to the final matrix 'm'
}
return(m)
} # end of make_matrix
#############################################################
# Utilizing the make_matrix() function ------------------------------------------------------
make_matrix(rows = 5, cols = 7)
## [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,] 1 2 3 4 5 6 7
## [2,] 2 4 6 8 10 12 14
## [3,] 3 6 9 12 15 18 21
## [4,] 4 8 12 16 20 24 28
## [5,] 5 10 15 20 25 30 35
Now let’s practice calling custom functions within a for loops. Use the code from previous lectures on loops and functions to complete the following steps:
#############################################################
# FUNCTION: sim_df
# Description: create a dataframe with a simulated dataset of 3 groups of data
# each group drawn from a normal distribution with a different mean
# Input(s): 'means' ( a atomic vector containing means for the 3 groups)
# output(s): a dataframe 'df'
# ------------------------------------------------------
sim_df <- function(means = c(5, 16, 30)){
if (is.null(means)){ # Ensure the function only runs when an input is given
stop("Function Failure: Insert 'means' (an atomic vector containing means for the 3 groups")
} else
Group <- NULL # create an empty vector 'Group'
Response <- NULL # create an empty vector 'Response'
n <- 50
set.seed(89)
for(i in seq_along(means)){
Group <- c(Group, rep(i, n)) # add the group number to the 'Group' vector
Response <- c(Response, rnorm(n, mean = means[i])) # add the current 'Response' values to the 'Response' vector
}
df <- data.frame(Group, Response) # join 'Group' and 'Response' vectors into a dataframe
return(df)
} # end of sim_df
#############################################################
# Utilizing the sim_df() function ------------------------------------------------------
a <- sim_df(c(18, 5, 29))
print(a)
## Group Response
## 1 1 16.548398
## 2 1 18.708100
## 3 1 19.593097
## 4 1 17.162404
## 5 1 19.929470
## 6 1 16.768930
## 7 1 18.597780
## 8 1 15.723113
## 9 1 17.295223
## 10 1 16.737407
## 11 1 18.096423
## 12 1 17.038105
## 13 1 17.684238
## 14 1 17.990089
## 15 1 18.884846
## 16 1 18.705247
## 17 1 18.487294
## 18 1 18.542293
## 19 1 18.742681
## 20 1 15.638443
## 21 1 19.377239
## 22 1 17.397413
## 23 1 18.656548
## 24 1 17.842920
## 25 1 18.791356
## 26 1 16.934100
## 27 1 15.981798
## 28 1 18.689317
## 29 1 19.571529
## 30 1 16.916901
## 31 1 16.141745
## 32 1 16.087518
## 33 1 18.301266
## 34 1 17.906916
## 35 1 17.142216
## 36 1 17.499024
## 37 1 18.104672
## 38 1 18.157348
## 39 1 17.162672
## 40 1 15.974028
## 41 1 17.135673
## 42 1 16.832684
## 43 1 17.831593
## 44 1 18.952340
## 45 1 17.706156
## 46 1 19.556656
## 47 1 18.094615
## 48 1 16.414982
## 49 1 18.112504
## 50 1 19.078087
## 51 2 4.554309
## 52 2 5.487409
## 53 2 3.947329
## 54 2 6.056855
## 55 2 3.745757
## 56 2 5.960639
## 57 2 3.945866
## 58 2 4.720163
## 59 2 6.263240
## 60 2 6.121210
## 61 2 4.073151
## 62 2 5.769954
## 63 2 5.290495
## 64 2 5.101467
## 65 2 6.194221
## 66 2 3.142266
## 67 2 3.872532
## 68 2 5.054438
## 69 2 5.747795
## 70 2 5.803128
## 71 2 5.268189
## 72 2 3.308383
## 73 2 4.102731
## 74 2 4.264430
## 75 2 5.972637
## 76 2 2.865160
## 77 2 4.895946
## 78 2 5.814757
## 79 2 4.456358
## 80 2 4.596987
## 81 2 3.463396
## 82 2 4.225546
## 83 2 4.197835
## 84 2 2.675862
## 85 2 6.211855
## 86 2 4.673535
## 87 2 6.239167
## 88 2 4.933422
## 89 2 6.139455
## 90 2 3.990342
## 91 2 5.832657
## 92 2 3.978332
## 93 2 5.680469
## 94 2 5.492903
## 95 2 5.540434
## 96 2 4.033185
## 97 2 4.166288
## 98 2 4.799996
## 99 2 6.218252
## 100 2 3.498044
## 101 3 29.656750
## 102 3 28.229897
## 103 3 29.489077
## 104 3 29.394825
## 105 3 29.035247
## 106 3 30.830415
## 107 3 28.159403
## 108 3 30.328067
## 109 3 27.474809
## 110 3 28.534991
## 111 3 28.267270
## 112 3 27.733672
## 113 3 29.766258
## 114 3 28.733967
## 115 3 30.380645
## 116 3 28.377274
## 117 3 27.983423
## 118 3 31.374048
## 119 3 29.237709
## 120 3 30.023559
## 121 3 29.427148
## 122 3 28.403185
## 123 3 29.184451
## 124 3 28.690602
## 125 3 28.237707
## 126 3 29.220364
## 127 3 27.784915
## 128 3 29.996499
## 129 3 28.823589
## 130 3 29.260799
## 131 3 29.094511
## 132 3 28.905088
## 133 3 27.850449
## 134 3 28.758538
## 135 3 27.467321
## 136 3 29.528008
## 137 3 28.210028
## 138 3 30.788339
## 139 3 28.244699
## 140 3 29.725614
## 141 3 30.921836
## 142 3 29.325745
## 143 3 30.083602
## 144 3 29.093262
## 145 3 28.215320
## 146 3 28.205215
## 147 3 27.354002
## 148 3 30.189446
## 149 3 27.463997
## 150 3 30.229392
#############################################################
# FUNCTION: shuffle_means
# Description: a custom function that 1) reshuffles the response variable, and 2) calculates the mean of each group in the reshuffled data. Store the means in a vector of length 3
# Input(s): dataframe 'df'
# Output(s): vector 'v'
# ------------------------------------------------------
shuffle_means <- function(df = NULL){
if (is.null(df)){ # Ensure the function only runs when an input is given
stop("Function Failure: Insert 'df' a dataframe")
} else
df[2] <- sample(df$Response) # shuffle 'Response' variable while holding 'Group' variable constant
v <- df %>% group_by(Group) %>%
summarize(Mean_of_Group = mean(Response)) # computes the mean of the 'Response' variable for each group
return(v)
} # end of shuffle_means
#############################################################
# Utilizing shuffle_means() function ------------------------------------------------------
s <- shuffle_means(a)
print(s)
## # A tibble: 3 × 2
## Group Mean_of_Group
## <int> <dbl>
## 1 1 17.4
## 2 2 18.7
## 3 3 15.6
#Replicate_Number <- NULL
t <- data.frame(NULL)
for (i in seq_len(100)){
reShape <- spread(as.data.frame(shuffle_means(a)), key = Group, value = Mean_of_Group) # convert the 'shuffle_means(a)' from long to wide format.
# Note: 'shuffle_means(a) was transform to a dataframe as it was a tibble and spread() only takes in dataframe objects.
d <- data.frame(i, reShape)
t <- rbind(t, d) # add the dataframe from each iteration to the output dataframe 't'
}
names(t) <- c("Replicate Number", "Group 1 Means", "Group 2 Means", "Group 3 Means")
print(t)
## Replicate Number Group 1 Means Group 2 Means Group 3 Means
## 1 1 16.82922 17.34623 17.49073
## 2 2 17.57996 17.65011 16.43611
## 3 3 19.47793 16.73275 15.45550
## 4 4 18.33428 17.99471 15.33720
## 5 5 15.98905 19.20649 16.47064
## 6 6 18.10046 17.49473 16.07100
## 7 7 15.14998 19.05654 17.45966
## 8 8 17.37915 16.44665 17.84039
## 9 9 18.29547 15.88396 17.48675
## 10 10 17.13135 16.75561 17.77922
## 11 11 17.90462 16.87722 16.88434
## 12 12 15.67611 18.02061 17.96946
## 13 13 17.65714 15.85050 18.15854
## 14 14 17.51080 18.31348 15.84191
## 15 15 18.92969 18.33364 14.40285
## 16 16 15.70352 18.78754 17.17512
## 17 17 17.54933 15.57594 18.54092
## 18 18 17.91360 17.17919 16.57339
## 19 19 16.22673 16.60600 18.83346
## 20 20 15.42462 17.64208 18.59948
## 21 21 15.95759 18.13796 17.57064
## 22 22 18.41319 17.25476 15.99824
## 23 23 16.81959 17.88178 16.96481
## 24 24 14.50942 20.07825 17.07851
## 25 25 17.54764 16.90142 17.21712
## 26 26 16.14028 18.31881 17.20709
## 27 27 16.62024 16.35036 18.69558
## 28 28 18.01546 16.32233 17.32838
## 29 29 16.74602 17.26371 17.65646
## 30 30 17.82266 17.53433 16.30919
## 31 31 15.96368 17.15588 18.54662
## 32 32 14.66397 18.62662 18.37559
## 33 33 16.71850 17.83258 17.11511
## 34 34 17.29853 16.83185 17.53580
## 35 35 18.37965 16.52597 16.76057
## 36 36 17.70144 17.62905 16.33570
## 37 37 17.93472 16.06415 17.66732
## 38 38 16.61194 17.09300 17.96124
## 39 39 15.13898 17.75234 18.77486
## 40 40 17.10110 17.03922 17.52586
## 41 41 15.60586 18.51818 17.54215
## 42 42 15.77381 20.39707 15.49530
## 43 43 18.04231 18.00210 15.62177
## 44 44 15.27914 17.67138 18.71566
## 45 45 16.93652 18.21641 16.51326
## 46 46 16.62316 18.49330 16.54973
## 47 47 16.73198 17.33936 17.59485
## 48 48 18.14591 16.18652 17.33376
## 49 49 16.16388 17.38073 18.12158
## 50 50 16.33262 17.66083 17.67273
## 51 51 16.40940 17.39862 17.85816
## 52 52 18.09628 17.00103 16.56887
## 53 53 18.19471 17.28514 16.18634
## 54 54 18.59270 16.97469 16.09879
## 55 55 17.72383 18.67972 15.26264
## 56 56 17.29035 17.19541 17.18042
## 57 57 17.26405 17.15887 17.24326
## 58 58 17.90195 15.89064 17.87359
## 59 59 16.52537 18.16555 16.97526
## 60 60 18.58850 16.30322 16.77445
## 61 61 17.27982 14.83004 19.55632
## 62 62 18.08558 16.37528 17.20532
## 63 63 18.11402 17.06062 16.49154
## 64 64 15.41618 19.03415 17.21585
## 65 65 16.45329 18.78665 16.42624
## 66 66 17.03902 16.30363 18.32354
## 67 67 16.17043 17.69087 17.80488
## 68 68 18.26730 15.54724 17.85164
## 69 69 16.81042 18.11251 16.74326
## 70 70 16.15940 17.17422 18.33256
## 71 71 16.92476 18.80001 15.94141
## 72 72 15.16704 18.54464 17.95450
## 73 73 17.40638 16.01181 18.24799
## 74 74 18.59511 17.24559 15.82548
## 75 75 15.72834 17.40836 18.52948
## 76 76 18.51694 17.40031 15.74893
## 77 77 17.24826 20.07441 14.34352
## 78 78 18.48277 15.82017 17.36324
## 79 79 21.05735 15.80914 14.79969
## 80 80 16.51416 17.14376 18.00826
## 81 81 17.16023 17.17458 17.33138
## 82 82 17.32410 17.42259 16.91949
## 83 83 16.69727 18.04490 16.92402
## 84 84 18.08714 15.91108 17.66796
## 85 85 16.91534 17.37168 17.37916
## 86 86 16.19650 18.05679 17.41290
## 87 87 16.45926 16.49363 18.71329
## 88 88 16.92947 18.93566 15.80105
## 89 89 19.66029 15.66769 16.33820
## 90 90 15.63450 18.04582 17.98587
## 91 91 17.13650 18.23403 16.29566
## 92 92 17.14903 17.25397 17.26318
## 93 93 19.24961 15.63211 16.78447
## 94 94 17.79812 16.39395 17.47411
## 95 95 16.80121 16.94531 17.91966
## 96 96 17.16149 17.34160 17.16310
## 97 97 15.28956 16.19269 20.18393
## 98 98 17.05102 17.73406 16.88110
## 99 99 16.43399 18.18979 17.04241
## 100 100 16.80349 16.80711 18.05559
# Reshuffled means plots ------------------------------------------------------
y <- sim_df(c(18, 5, 30)) # create a simulated dataframe
y[2] <- sample(y$Response) # shuffle the Response variable
plot_1 <- ggplot(data = y%>%filter(Group == 1),aes(x = Response)) + # plot group 1
geom_histogram(binwidth = 0.5,fill = "royalblue", color = "black") +
labs(title = "Shuffled Group 1 ", x = "Groups", y = "Means of the Groups")+
theme(plot.title = element_text(hjust=0.5))
plot_2 <- ggplot(data = y%>%filter(Group == 2),aes(x = Response)) + # plot group 2
geom_histogram(binwidth = 0.5,fill = "hotpink", color = "black") +
labs(title = "Shuffled Group 2 ", x = "Groups", y = "Means of the Groups") +
theme(plot.title = element_text(hjust=0.5))
plot_3 <- ggplot(data = y%>%filter(Group == 3),aes(x = Response)) + # plot group 3
geom_histogram(binwidth = 0.5,fill = "orange", color = "black") +
labs(title = "Shuffled Group 3 ", x = "Groups", y = "Means of the Groups") +
theme(plot.title = element_text(hjust=0.5))
plotList <-list(plot_1, plot_2, plot_3) # compress all the plots into a list
grid.arrange(grobs = plotList) # plot all 3 plots on a grid
# Original means plot (unshuffled) ------------------------------------------------------
f <- sim_df(c(18, 5, 30))
plot_4 <- ggplot(data = f%>%filter(Group == 1),aes(x = Response)) + # plot group 1
geom_histogram(binwidth = 0.5,fill = "royalblue", color = "black") +
labs(title = "Original Group 1 ", x = "Groups", y = "Means of the Groups")+
theme(plot.title = element_text(hjust=0.5))
plot_5 <- ggplot(data = f%>%filter(Group == 2),aes(x = Response)) + # plot group 2
geom_histogram(binwidth = 0.5,fill = "hotpink", color = "black") +
labs(title = "Original Group 2 ", x = "Groups", y = "Means of the Groups") +
theme(plot.title = element_text(hjust=0.5))
plot_6 <- ggplot(data = f%>%filter(Group == 3),aes(x = Response)) + # plot group 3
geom_histogram(binwidth = 0.5,fill = "orange", color = "black") +
labs(title = "Original Group 3 ", x = "Groups", y = "Means of the Groups") +
theme(plot.title = element_text(hjust=0.5))
plotList2 <-list(plot_4, plot_5, plot_6) # compress all the plots into a list
grid.arrange(grobs = plotList2) # plot all 3 plots on a grid