This document describes the analysis pipeline for the growth curve analysis used in Masters thesis. The required packages are: tidyverse, viridis, matrixStats, pracma, ggplot2, gridExtra, and lemon.
Data from the Tecan spectrometer is read in (data is expressed with a first column of time points as a fraction of days, followed by the OD reads for each experiment in each column (all experiments done in duplicate or triplicate - outliers have been removed)) and tidied.
gc_rep <- read.csv(paste(raw_path, "/gc_replicates.csv", sep = ""), header = TRUE, fileEncoding = 'UTF-8-BOM')
gc_long <- gc_rep %>%
pivot_longer(c(2:ncol(.)), names_to = "strain", values_to = "OD600") %>%
filter(!str_detect(strain, "het"))
gc_org <- gc_long %>%
separate(strain, into = c("strain", "replicate")) %>%
pivot_wider(names_from = replicate, values_from = OD600)
gc_names = c("time", "strain", "rep_1", "rep_2", "rep_3")
names(gc_org) <- gc_names
Mean and standard error are calculated for every experimental read. (Names indicate which gene was humanized in that strain, except for the names RSHIP (HsS5.1), RSHIP4 (HsS6.1), RSHIP4N (HsS7.1), RSHIP4NT (HsS8.1), RSHIP4NTC (HsS9.2), RSHIP4NTK (HsS9.1).)
gc_stat <- gc_org %>%
mutate(mean = rowMeans(.[c("rep_1", "rep_2", "rep_3")], na.rm = TRUE),
error = rowSds(as.matrix(.[c("rep_1", "rep_2", "rep_3")]), na.rm = TRUE)/sqrt(length(.[c("rep_1", "rep_2", "rep_3")]))) %>%
filter(strain %in% c("cyp51a1", "hmgcr", "lbr", "mvk", "nsdhl", "pmvk", "rship", "rship4", "rship4n", "rship4nt", "rship4ntc", "rship4ntk", "sc4mol", "sqle", "wt"))
To get a quantitative measure of total growth, we calculate the area under each curve, and map those to a histogram, with standard deviation between replicates.
AUCdata <- gc_long %>%
select(strain, time, OD600)
AUC <- function(dat) {
strains <- pull(unique(dat[,1]))
auc <- c()
for (i in 1:length(strains)) {
temp <- filter(dat, strain %in% strains[i])
auc <- c(auc, trapz(pull(temp, 2), pull(temp, 3)))
}
newdat <- data.frame(strains, auc)
newdat <- mutate(newdat, prop = auc/124.73264)
return <- newdat
}
aucs <- AUC(AUCdata) %>%
filter(!str_detect(strains, "rship")) %>%
separate(strains, into = c("strain", "replicate")) %>%
select(strain, replicate, prop) %>%
pivot_wider(names_from = replicate, values_from = prop)
aucs_names = c("strain", "rep_1", "rep_2", "rep_3")
names(aucs) <- aucs_names
aucs_reps <- aucs %>%
mutate(mean = rowMeans(.[c("rep_1", "rep_2", "rep_3")], na.rm = TRUE),
error = rowSds(as.matrix(.[c("rep_1", "rep_2", "rep_3")]), na.rm = TRUE)/sqrt(length(.[c("rep_1", "rep_2", "rep_3")])),
sd = rowSds(as.matrix(.[c("rep_1", "rep_2", "rep_3")]), na.rm = TRUE))
aucs_reps$strain <- as.character(aucs_reps$strain)
aucs_reps$strain <- factor(aucs_reps$strain, levels = c("wt", "hmgcr", "mvk", "pmvk", "sqle", "cyp51a1", "lbr", "sc4mol", "nsdhl"))
auc_gg <- aucs_reps %>%
select(strain, mean, error) %>%
ggplot() +
geom_bar(mapping = aes(x = strain, y = mean, fill = factor(strain)), stat = "identity") +
geom_errorbar(aes(x = strain,
ymin = mean - error,
ymax = mean + error),
width=.2,
position=position_dodge(.9)) +
scale_x_discrete(breaks = c("wt", "hmgcr", "mvk", "pmvk", "sqle", "cyp51a1", "lbr", "sc4mol", "nsdhl"),
labels = c("WT", "HsS1.1", "HsS1.2", "HsS1.3", "HsS1.4", "HsS1.5", "HsS1.6", "HsS1.7", "HsS1.8")) +
xlab("") +
ylab("AUC compared to WT") +
theme(panel.background = element_blank()) +
scale_fill_viridis_d("Strain", labels = c("WT", "HsS1.1 (HMGCR)", "HsS1.2 (MVK)", "HsS1.3 (PMVK)", "HsS1.4 (SQLE)", "HsS1.5 (CYP51A1)", "HsS1.6 (LBR)", "HsS1.7 (SC4MOL)", "HsS1.8 (NSDHL)"))
# print(ggplot_build(auc_gg)$data[[1]][,1])
# colours used:
# lbr = "#5DC863FF"
# hmgcr = "#472D7BFF"
# sqle = "#21908CFF"
# sc4mol = "#AADC32FF"
# pmvk = "#2C728EFF"
# nsdhl = "#FDE725FF"
# cyp51a1 = "#27AD81FF"
# wt = "#440154FF"
# mvk = "#3B528BFF"
For each of the single-gene humanized strains (HsS1.X), growth is plotted, matching colour values to the previous figure. Every strain is compared to the wild-type as a reference.
hmgcr <- gc_stat %>%
filter(strain == "hmgcr" | strain == "wt") %>%
ggplot(aes(x = time, y = mean)) +
geom_line(mapping = aes(colour=strain), size=3) +
geom_ribbon(aes(ymin = mean - error,
ymax = mean + error,
fill = strain), alpha = 0.2) +
theme(panel.background = element_blank(), legend.position = "none") +
xlab("") +
ylab("OD600") +
ggtitle("HsS1.1") +
scale_colour_manual(values=c("#472D7BFF", "#440154FF"), aesthetics = c("colour", "fill"))
mvk <- gc_stat %>%
filter(strain == "mvk" | strain == "wt") %>%
ggplot(aes(x = time, y = mean)) +
geom_line(mapping = aes(colour=strain), size=3) +
geom_ribbon(aes(ymin = mean - error,
ymax = mean + error,
fill = strain), alpha = 0.2) +
theme(panel.background = element_blank(), legend.position = "none") +
xlab("") +
ylab("") +
ggtitle("HsS1.2") +
scale_colour_manual(values=c("#3B528BFF", "#440154FF"), aesthetics = c("colour", "fill"))
pmvk <- gc_stat %>%
filter(strain == "pmvk" | strain == "wt") %>%
ggplot(aes(x = time, y = mean)) +
geom_line(mapping = aes(colour=strain), size=3) +
geom_ribbon(aes(ymin = mean - error,
ymax = mean + error,
fill = strain), alpha = 0.2) +
theme(panel.background = element_blank(), legend.position = "none") +
xlab("") +
ylab("") +
ggtitle("HsS1.3") +
scale_colour_manual(values=c("#2C728EFF" , "#440154FF"), aesthetics = c("colour", "fill"))
sqle <- gc_stat %>%
filter(strain == "sqle" | strain == "wt") %>%
ggplot(aes(x = time, y = mean)) +
geom_line(mapping = aes(colour=strain), size=3) +
geom_ribbon(aes(ymin = mean - error,
ymax = mean + error,
fill = strain), alpha = 0.2) +
xlab("") +
ylab("") +
ggtitle("HsS1.4") +
theme(panel.background = element_blank(), legend.position = "none") +
scale_colour_manual(values=c("#21908CFF", "#440154FF"), aesthetics = c("colour", "fill"))
cyp51a1 <- gc_stat %>%
filter(strain == "cyp51a1" | strain == "wt") %>%
ggplot(aes(x = time, y = mean)) +
geom_line(mapping = aes(colour=strain), size=3) +
geom_ribbon(aes(ymin = mean - error,
ymax = mean + error,
fill = strain), alpha = 0.2) +
xlab("Time") +
ylab("OD600") +
ggtitle("HsS1.5") +
theme(panel.background = element_blank(), legend.position = "none") +
scale_colour_manual(values=c("#27AD81FF", "#440154FF"), aesthetics = c("colour", "fill"))
lbr <- gc_stat %>%
filter(strain == "lbr" | strain == "wt") %>%
ggplot(aes(x = time, y = mean)) +
geom_line(mapping = aes(colour=strain), size=3) +
geom_ribbon(aes(ymin = mean - error,
ymax = mean + error,
fill = strain), alpha = 0.2) +
xlab("") +
ylab("") +
ggtitle("HsS1.6") +
theme(panel.background = element_blank(), legend.position = "none") +
scale_colour_manual(values=c("#5DC863FF", "#440154FF"), aesthetics = c("colour", "fill"))
sc4mol <- gc_stat %>%
filter(strain == "sc4mol" | strain == "wt") %>%
ggplot(aes(x = time, y = mean)) +
geom_line(mapping = aes(colour=strain), size=3) +
geom_ribbon(aes(ymin = mean - error,
ymax = mean + error,
fill = strain), alpha = 0.2) +
xlab("") +
ylab("") +
ggtitle("HsS1.7") +
theme(panel.background = element_blank(), legend.position = "none") +
scale_colour_manual(values=c("#AADC32FF" , "#440154FF"), aesthetics = c("colour", "fill"))
nsdhl <- gc_stat %>%
filter(strain == "nsdhl" | strain == "wt") %>%
ggplot(aes(x = time, y = mean)) +
geom_line(mapping = aes(colour=strain), size=3) +
geom_ribbon(aes(ymin = mean - error,
ymax = mean + error,
fill = strain), alpha = 0.2) +
xlab("") +
ylab("") +
ggtitle("HsS1.8") +
theme(panel.background = element_blank(), legend.position = "none") +
scale_colour_manual(values=c("#FDE725FF" , "#440154FF"), aesthetics = c("colour", "fill"))
grid_arrange_shared_legend(auc_gg, gridExtra::arrangeGrob(hmgcr, mvk, pmvk, sqle, cyp51a1, lbr, sc4mol, nsdhl, ncol = 4, nrow = 2), ncol = 1, nrow = 2)
We repeat the above (AUC data and growth curves) for the multi-gene strains (HsS5.1-HsS9.2).
aucs_multi <- AUC(AUCdata) %>%
filter(str_detect(strains, "rship") | str_detect(strains, "wt")) %>%
separate(strains, into = c("strain", "replicate")) %>%
select(strain, replicate, prop) %>%
pivot_wider(names_from = replicate, values_from = prop)
names(aucs_multi) <- aucs_names
aucs_reps_multi <- aucs_multi %>%
mutate(mean = rowMeans(.[c("rep_1", "rep_2", "rep_3")], na.rm = TRUE),
error = rowSds(as.matrix(.[c("rep_1", "rep_2", "rep_3")]), na.rm = TRUE)/sqrt(length(.[c("rep_1", "rep_2", "rep_3")])),
sd = rowSds(as.matrix(.[c("rep_1", "rep_2", "rep_3")]), na.rm = TRUE))
aucs_reps_multi$strain <- as.character(aucs_reps_multi$strain)
aucs_reps_multi$strain <- factor(aucs_reps_multi$strain, levels = c("wt", "rship", "rship4", "rship4n", "rship4nt", "rship4ntk", "rship4ntc"))
auc_gg_multi <- aucs_reps_multi %>%
select(strain, mean, error) %>%
ggplot() +
geom_bar(mapping = aes(x = strain, y = mean, fill = factor(strain)), stat = "identity") +
geom_errorbar(aes(x = strain,
ymin = mean - error,
ymax = mean + error),
width=.2,
position=position_dodge(.9)) +
scale_x_discrete(breaks = c("wt", "rship", "rship4", "rship4n", "rship4nt", "rship4ntk", "rship4ntc"),
labels = c("WT", "HsS5.1", "HsS6.1", "HsS7.1", "HsS8.1", "HsS9.1", "HsS9.2")) +
xlab("") +
ylab("AUC compared to WT") +
theme(panel.background = element_blank()) +
scale_fill_viridis_d("Strain", labels = c("WT", "HsS5.1", "HsS6.1", "HsS7.1", "HsS8.1", "HsS9.1", "HsS9.2"), option = "plasma")
print(ggplot_build(auc_gg_multi)$data[[1]][,1])
## [1] "#5D01A6FF" "#9C179EFF" "#CC4678FF" "#0D0887FF" "#ED7953FF" "#F0F921FF"
## [7] "#FDB32FFF"
# colours used:
# wt = "#0D0887FF",
# rship = "#5D01A6FF",
# rship4 = "#9C179EFF",
# rship4n = "#CC4678FF",
# rship4nt = "#ED7953FF",
# rship4ntk = "#FDB32FFF",
# rship4ntc = "#F0F921FF"
rship <- gc_stat %>%
filter(strain == "rship" | strain == "wt") %>%
ggplot(aes(x = time, y = mean)) +
geom_line(mapping = aes(colour=strain), size=3) +
geom_ribbon(aes(ymin = mean - error,
ymax = mean + error,
fill = strain), alpha = 0.2) +
xlab("") +
ylab("OD600") +
ggtitle("HsS5.1") +
theme(panel.background = element_blank(), legend.position = "none") +
scale_colour_manual(values=c("#5D01A6FF", "#0D0887FF"), aesthetics = c("colour", "fill"))
rship4 <- gc_stat %>%
filter(strain == "rship4" | strain == "wt") %>%
ggplot(aes(x = time, y = mean)) +
geom_line(mapping = aes(colour=strain), size=3) +
geom_ribbon(aes(ymin = mean - error,
ymax = mean + error,
fill = strain), alpha = 0.2) +
xlab("") +
ylab("") +
ggtitle("HsS6.1") +
theme(panel.background = element_blank(), legend.position = "none") +
scale_colour_manual(values=c("#9C179EFF", "#0D0887FF"), aesthetics = c("colour", "fill"))
rship4n <- gc_stat %>%
filter(strain == "rship4n" | strain == "wt") %>%
ggplot(aes(x = time, y = mean)) +
geom_line(mapping = aes(colour=strain), size=3) +
geom_ribbon(aes(ymin = mean - error,
ymax = mean + error,
fill = strain), alpha = 0.2) +
xlab("") +
ylab("") +
ggtitle("HsS7.1") +
theme(panel.background = element_blank(), legend.position = "none") +
scale_colour_manual(values=c("#CC4678FF", "#0D0887FF"), aesthetics = c("colour", "fill"))
rship4nt <- gc_stat %>%
filter(strain == "rship4nt" | strain == "wt") %>%
ggplot(aes(x = time, y = mean)) +
geom_line(mapping = aes(colour=strain), size=3) +
geom_ribbon(aes(ymin = mean - error,
ymax = mean + error,
fill = strain), alpha = 0.2) +
xlab("") +
ylab("OD600") +
ggtitle("HsS8.1") +
theme(panel.background = element_blank(), legend.position = "none") +
scale_colour_manual(values=c("#ED7953FF", "#0D0887FF"), aesthetics = c("colour", "fill"))
rship4ntk <- gc_stat %>%
filter(strain == "rship4ntk" | strain == "wt") %>%
ggplot(aes(x = time, y = mean)) +
geom_line(mapping = aes(colour=strain), size=3) +
geom_ribbon(aes(ymin = mean - error,
ymax = mean + error,
fill = strain), alpha = 0.2) +
xlab("") +
ylab("") +
ggtitle("HsS9.1") +
theme(panel.background = element_blank(), legend.position = "none") +
scale_colour_manual(values=c("#FDB32FFF", "#0D0887FF"), aesthetics = c("colour", "fill"))
rship4ntc <- gc_stat %>%
filter(strain == "rship4ntc" | strain == "wt") %>%
ggplot(aes(x = time, y = mean)) +
geom_line(mapping = aes(colour=strain), size=3) +
geom_ribbon(aes(ymin = mean - error,
ymax = mean + error,
fill = strain), alpha = 0.2) +
xlab("") +
ylab("") +
ggtitle("HsS9.2") +
theme(panel.background = element_blank(), legend.position = "none") +
scale_colour_manual(values=c("#F0F921FF", "#0D0887FF"), aesthetics = c("colour", "fill"))
grid_arrange_shared_legend(auc_gg_multi, gridExtra::arrangeGrob(rship, rship4, rship4n, rship4nt, rship4ntk, rship4ntc, ncol = 3, nrow = 2), ncol = 1, nrow = 2)
Citations for packages used:
print("Pracma: https://cran.r-project.org/web/packages/pracma/index.html")
## [1] "Pracma: https://cran.r-project.org/web/packages/pracma/index.html"
print("Lemon: https://cran.r-project.org/web/packages/lemon/lemon.pdf")
## [1] "Lemon: https://cran.r-project.org/web/packages/lemon/lemon.pdf"