From 7d95c24c44ea02caf643a6fbe6d344e9b9212a15 Mon Sep 17 00:00:00 2001 From: Sam Horsfield Date: Thu, 28 Mar 2024 14:19:59 +0000 Subject: [PATCH] Updates CELEBRIMBOR notation in figures --- simulate_pangenomes/combined_performance.R | 20 ++++++++-------- simulate_pangenomes/mmseqs2_performance.R | 28 +++++++++++----------- simulate_pangenomes/panaroo_performance.R | 14 +++++------ 3 files changed, 31 insertions(+), 31 deletions(-) diff --git a/simulate_pangenomes/combined_performance.R b/simulate_pangenomes/combined_performance.R index 20930ed..64a7d39 100644 --- a/simulate_pangenomes/combined_performance.R +++ b/simulate_pangenomes/combined_performance.R @@ -36,7 +36,7 @@ panaroo.stringency <- "strict" core.df$variable[core.df$type == "sim" & core.df$analysis == "CGT" & core.df$tool == "freq_only"] <- "Unadjusted" core.df$split[core.df$type == "sim" & core.df$analysis == "CGT" & core.df$tool == "freq_only"] <- "mmseqs2" - core.df$variable[core.df$type == "sim" & core.df$analysis == "CGT" & core.df$tool == "cgt"] <- "Celebrimbor" + core.df$variable[core.df$type == "sim" & core.df$analysis == "CGT" & core.df$tool == "cgt"] <- "CELEBRIMBOR" core.df$split[core.df$type == "sim" & core.df$analysis == "CGT" & core.df$tool == "cgt"] <- "mmseqs2" core.df$variable[core.df$type == "ori" & core.df$analysis == "PANAROO" & core.df$tool == "panaroo" & core.df$stringency == panaroo.stringency] <- "True Total" @@ -45,20 +45,20 @@ panaroo.stringency <- "strict" core.df$variable[core.df$type == "sim" & core.df$analysis == "PANAROO" & core.df$tool == "panaroo" & core.df$stringency == panaroo.stringency] <- "Unadjusted" core.df$split[core.df$type == "sim" & core.df$analysis == "PANAROO" & core.df$tool == "panaroo" & core.df$stringency == panaroo.stringency] <- "Panaroo" - core.df$variable[core.df$type == "sim" & core.df$analysis == "PANAROO" & core.df$tool == "cgt" & core.df$stringency == panaroo.stringency] <- "Celebrimbor" + core.df$variable[core.df$type == "sim" & core.df$analysis == "PANAROO" & core.df$tool == "cgt" & core.df$stringency == panaroo.stringency] <- "CELEBRIMBOR" core.df$split[core.df$type == "sim" & core.df$analysis == "PANAROO" & core.df$tool == "cgt" & core.df$stringency == panaroo.stringency] <- "Panaroo" core.df.subset <- subset(core.df, variable != "NA") core.df.subset <- subset(core.df.subset, error == 0.05 | error == 0.0) core.df.subset <- subset(core.df.subset, core_lim >= 0.7 & core_lim <= 0.99) - core.df.subset$variable <- factor(core.df.subset$variable, levels = c("True Total", "Celebrimbor", "Unadjusted")) + core.df.subset$variable <- factor(core.df.subset$variable, levels = c("True Total", "CELEBRIMBOR", "Unadjusted")) core.df.subset$split <- factor(core.df.subset$split, levels = c("mmseqs2", "Panaroo")) - all_core_p <- ggplot(data=core.df.subset, aes(x=core_lim, y=core, colour=variable)) + facet_grid(.~split) + geom_point(size=2) + geom_line() + theme_light() + xlab("Core threshold") + ylab("Number of core genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, colour = "black", face = "bold"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg() + scale_y_continuous(breaks = seq(0, 100000, 1000)) + scale_x_continuous(breaks = seq(0, 1, 0.1)) + all_core_p <- ggplot(data=core.df.subset, aes(x=core_lim, y=core, colour=variable)) + facet_grid(.~split) + geom_point(size=2) + geom_line() + theme_light() + xlab("Core threshold") + ylab("Number of core genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, colour = "black", face = "bold"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm() + scale_y_continuous(breaks = seq(0, 100000, 1000)) + scale_x_continuous(breaks = seq(0, 1, 0.1)) all_core_p - ggsave(file="Celebrimbor_simulation_core_figure.svg", plot=all_core_p, width=11, height=5) + ggsave(file="CELEBRIMBOR_simulation_core_figure.svg", plot=all_core_p, width=11, height=5) } @@ -72,7 +72,7 @@ panaroo.stringency <- "strict" rare.df$variable[rare.df$type == "sim" & rare.df$analysis == "CGT" & rare.df$tool == "freq_only"] <- "Unadjusted" rare.df$split[rare.df$type == "sim" & rare.df$analysis == "CGT" & rare.df$tool == "freq_only"] <- "mmseqs2" - rare.df$variable[rare.df$type == "sim" & rare.df$analysis == "CGT" & rare.df$tool == "cgt"] <- "Celebrimbor" + rare.df$variable[rare.df$type == "sim" & rare.df$analysis == "CGT" & rare.df$tool == "cgt"] <- "CELEBRIMBOR" rare.df$split[rare.df$type == "sim" & rare.df$analysis == "CGT" & rare.df$tool == "cgt"] <- "mmseqs2" rare.df$variable[rare.df$type == "ori" & rare.df$analysis == "PANAROO" & rare.df$tool == "panaroo" & rare.df$stringency == panaroo.stringency] <- "True Total" @@ -81,20 +81,20 @@ panaroo.stringency <- "strict" rare.df$variable[rare.df$type == "sim" & rare.df$analysis == "PANAROO" & rare.df$tool == "panaroo" & rare.df$stringency == panaroo.stringency] <- "Unadjusted" rare.df$split[rare.df$type == "sim" & rare.df$analysis == "PANAROO" & rare.df$tool == "panaroo" & rare.df$stringency == panaroo.stringency] <- "Panaroo" - rare.df$variable[rare.df$type == "sim" & rare.df$analysis == "PANAROO" & rare.df$tool == "cgt" & rare.df$stringency == panaroo.stringency] <- "Celebrimbor" + rare.df$variable[rare.df$type == "sim" & rare.df$analysis == "PANAROO" & rare.df$tool == "cgt" & rare.df$stringency == panaroo.stringency] <- "CELEBRIMBOR" rare.df$split[rare.df$type == "sim" & rare.df$analysis == "PANAROO" & rare.df$tool == "cgt" & rare.df$stringency == panaroo.stringency] <- "Panaroo" rare.df.subset <- subset(rare.df, variable != "NA") rare.df.subset <- subset(rare.df.subset, error == 0.05 | error == 0.0) rare.df.subset <- subset(rare.df.subset, rare_lim <= 0.25) - rare.df.subset$variable <- factor(rare.df.subset$variable, levels = c("True Total", "Celebrimbor", "Unadjusted")) + rare.df.subset$variable <- factor(rare.df.subset$variable, levels = c("True Total", "CELEBRIMBOR", "Unadjusted")) rare.df.subset$split <- factor(rare.df.subset$split, levels = c("mmseqs2", "Panaroo")) - all_rare_p <- ggplot(data=rare.df.subset, aes(x=rare_lim, y=rare, colour=variable)) + facet_grid(.~split) + geom_point(size=2) + geom_line() + theme_light() + xlab("Rare threshold") + ylab("Number of rare genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, colour = "black", face = "bold"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg() + scale_y_continuous(breaks = seq(0, 100000, 5000)) + scale_x_continuous(breaks = seq(0, 1, 0.1)) + all_rare_p <- ggplot(data=rare.df.subset, aes(x=rare_lim, y=rare, colour=variable)) + facet_grid(.~split) + geom_point(size=2) + geom_line() + theme_light() + xlab("Rare threshold") + ylab("Number of rare genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, colour = "black", face = "bold"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm() + scale_y_continuous(breaks = seq(0, 100000, 5000)) + scale_x_continuous(breaks = seq(0, 1, 0.1)) all_rare_p - ggsave(file="Celebrimbor_simulation_rare_figure.svg", plot=all_rare_p, width=11, height=5) + ggsave(file="CELEBRIMBOR_simulation_rare_figure.svg", plot=all_rare_p, width=11, height=5) } pub.table <- subset(core.df, core_lim == 0.95 & (error == 0.05 | error == 0) & variable != "NA") diff --git a/simulate_pangenomes/mmseqs2_performance.R b/simulate_pangenomes/mmseqs2_performance.R index 9267922..2e8b5b3 100644 --- a/simulate_pangenomes/mmseqs2_performance.R +++ b/simulate_pangenomes/mmseqs2_performance.R @@ -130,22 +130,22 @@ if (to_analyse == "rare") subsample.df.rare$variable <- as.character(subsample.df.rare$variable) subsample.df.rare$variable[subsample.df.rare$variable == "total_rare"] <- "True Total" - subsample.df.rare$variable[subsample.df.rare$variable == "rare_cgt_pred"] <- "Celebrimbor" + subsample.df.rare$variable[subsample.df.rare$variable == "rare_cgt_pred"] <- "CELEBRIMBOR" subsample.df.rare$variable[subsample.df.rare$variable == "rare_freq_pred"] <- "Unadjusted" - subsample.df.rare$variable <- factor(subsample.df.rare$variable, levels = c("True Total", "Celebrimbor", "Unadjusted")) + subsample.df.rare$variable <- factor(subsample.df.rare$variable, levels = c("True Total", "CELEBRIMBOR", "Unadjusted")) - rare_p <- ggplot(data=subsample.df.rare, aes(x=rare_lim, y=value, colour=variable)) + geom_point(size=2) + geom_line() + theme_light() + xlab("Rare threshold") + ylab("Number of rare genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=20,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, face = "italic"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg() + rare_p <- ggplot(data=subsample.df.rare, aes(x=rare_lim, y=value, colour=variable)) + geom_point(size=2) + geom_line() + theme_light() + xlab("Rare threshold") + ylab("Number of rare genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=20,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, face = "italic"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm() rare_p subsample.df.mid1 <- subset(subsample.df, variable == "total_middle" | variable == "middle_cgt_pred" | variable == "middle_freq_pred") subsample.df.mid1$variable <- as.character(subsample.df.mid1$variable) subsample.df.mid1$variable[subsample.df.mid1$variable == "total_middle"] <- "True Total" - subsample.df.mid1$variable[subsample.df.mid1$variable == "middle_cgt_pred"] <- "Celebrimbor" + subsample.df.mid1$variable[subsample.df.mid1$variable == "middle_cgt_pred"] <- "CELEBRIMBOR" subsample.df.mid1$variable[subsample.df.mid1$variable == "middle_freq_pred"] <- "Unadjusted" - subsample.df.mid1$variable <- factor(subsample.df.mid1$variable, levels = c("True Total", "Celebrimbor", "Unadjusted")) + subsample.df.mid1$variable <- factor(subsample.df.mid1$variable, levels = c("True Total", "CELEBRIMBOR", "Unadjusted")) - middle_p1 <- ggplot(data=subsample.df.mid1, aes(x=rare_lim, y=value, colour=variable)) + geom_point(size=2) + geom_line() + theme_light() + xlab("Rare threshold") + ylab("Number of intermediate genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=20,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, face = "italic"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg() + middle_p1 <- ggplot(data=subsample.df.mid1, aes(x=rare_lim, y=value, colour=variable)) + geom_point(size=2) + geom_line() + theme_light() + xlab("Rare threshold") + ylab("Number of intermediate genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=20,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, face = "italic"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm() middle_p1 subsample.df.rare$type <-"Number of rare genes" @@ -154,7 +154,7 @@ if (to_analyse == "rare") subsample.df.all.rare$type <- factor(subsample.df.all.rare$type, levels = c("Number of rare genes", "Number of intermediate genes")) - all_rare_p <- ggplot(data=subsample.df.all.rare, aes(x=rare_lim, y=value, colour=variable)) + facet_grid(type~., switch = "y", scales = "free_y") + geom_point(size=2) + geom_line() + theme_light() + xlab("Rare threshold") + ylab("") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.placement = "outside", strip.background = element_blank(), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, colour = "black", face = "bold"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg() + scale_y_continuous(breaks = seq(0, 100000, 2000)) + all_rare_p <- ggplot(data=subsample.df.all.rare, aes(x=rare_lim, y=value, colour=variable)) + facet_grid(type~., switch = "y", scales = "free_y") + geom_point(size=2) + geom_line() + theme_light() + xlab("Rare threshold") + ylab("") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.placement = "outside", strip.background = element_blank(), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, colour = "black", face = "bold"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm() + scale_y_continuous(breaks = seq(0, 100000, 2000)) all_rare_p write.csv(print.df, file='mmseqs_rare.csv') } else if (to_analyse == "core") @@ -168,22 +168,22 @@ if (to_analyse == "rare") subsample.df.core$variable <- as.character(subsample.df.core$variable) subsample.df.core$variable[subsample.df.core$variable == "total_core"] <- "True Total" - subsample.df.core$variable[subsample.df.core$variable == "core_cgt_pred"] <- "Celebrimbor" + subsample.df.core$variable[subsample.df.core$variable == "core_cgt_pred"] <- "CELEBRIMBOR" subsample.df.core$variable[subsample.df.core$variable == "core_freq_pred"] <- "Unadjusted" - subsample.df.core$variable <- factor(subsample.df.core$variable, levels = c("True Total", "Celebrimbor", "Unadjusted")) + subsample.df.core$variable <- factor(subsample.df.core$variable, levels = c("True Total", "CELEBRIMBOR", "Unadjusted")) - core_p <- ggplot(data=subsample.df.core, aes(x=core_lim, y=value, colour=variable)) + geom_point(size=2) + geom_line() + theme_light() + xlab("Core threshold") + ylab("Number of core genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=20,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, face = "italic"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg() + core_p <- ggplot(data=subsample.df.core, aes(x=core_lim, y=value, colour=variable)) + geom_point(size=2) + geom_line() + theme_light() + xlab("Core threshold") + ylab("Number of core genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=20,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, face = "italic"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm() core_p subsample.df.mid2 <- subset(subsample.df, variable == "total_middle" | variable == "middle_cgt_pred" | variable == "middle_freq_pred") subsample.df.mid2$variable <- as.character(subsample.df.mid2$variable) subsample.df.mid2$variable[subsample.df.mid2$variable == "total_middle"] <- "True Total" - subsample.df.mid2$variable[subsample.df.mid2$variable == "middle_cgt_pred"] <- "Celebrimbor" + subsample.df.mid2$variable[subsample.df.mid2$variable == "middle_cgt_pred"] <- "CELEBRIMBOR" subsample.df.mid2$variable[subsample.df.mid2$variable == "middle_freq_pred"] <- "Unadjusted" - subsample.df.mid2$variable <- factor(subsample.df.mid2$variable, levels = c("True Total", "Celebrimbor", "Unadjusted")) + subsample.df.mid2$variable <- factor(subsample.df.mid2$variable, levels = c("True Total", "CELEBRIMBOR", "Unadjusted")) - middle_p2 <- ggplot(data=subsample.df.mid2, aes(x=core_lim, y=value, colour=variable)) + geom_point(size=2) + geom_line() + theme_light() + xlab("Core threshold") + ylab("Number of intermediate genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=20,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, face = "italic"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg() + middle_p2 <- ggplot(data=subsample.df.mid2, aes(x=core_lim, y=value, colour=variable)) + geom_point(size=2) + geom_line() + theme_light() + xlab("Core threshold") + ylab("Number of intermediate genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=20,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, face = "italic"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm() middle_p2 subsample.df.core$type <-"Number of core genes" @@ -193,7 +193,7 @@ if (to_analyse == "rare") subsample.df.all.core$type <- factor(subsample.df.all.core$type, levels = c("Number of core genes", "Number of intermediate genes")) - all_core_p <- ggplot(data=subsample.df.all.core, aes(x=core_lim, y=value, colour=variable)) + facet_grid(type~., switch = "y", scales = "free_y") + geom_point(size=2) + geom_line() + theme_light() + xlab("Core threshold") + ylab("") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.placement = "outside", strip.background = element_blank(), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, colour = "black", face = "bold"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg() + scale_y_continuous(breaks = seq(0, 100000, 1000)) + all_core_p <- ggplot(data=subsample.df.all.core, aes(x=core_lim, y=value, colour=variable)) + facet_grid(type~., switch = "y", scales = "free_y") + geom_point(size=2) + geom_line() + theme_light() + xlab("Core threshold") + ylab("") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.placement = "outside", strip.background = element_blank(), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, colour = "black", face = "bold"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm() + scale_y_continuous(breaks = seq(0, 100000, 1000)) all_core_p write.csv(print.df, file='mmseqs_core.csv') diff --git a/simulate_pangenomes/panaroo_performance.R b/simulate_pangenomes/panaroo_performance.R index 1b2ba1d..a1cb920 100644 --- a/simulate_pangenomes/panaroo_performance.R +++ b/simulate_pangenomes/panaroo_performance.R @@ -9,7 +9,7 @@ library(ggpubr) # run to grid.plot with "core" and then full script with "rare" to_analyse = "core" -# directory should contain TSV generated by Celebrimbor with names cgt_sim_panaroo_strict_params_x,y_err_z.txt (adjusted data) +# directory should contain TSV generated by CELEBRIMBOR with names cgt_sim_panaroo_strict_params_x,y_err_z.txt (adjusted data) # and panaroo_sim_strict_params_x,y.tsv (unadjusted data) where x is rare threshold, y is core threshold and z is error as decimals. if (to_analyse == "rare") { @@ -80,27 +80,27 @@ subsample.df$tool <- as.character(subsample.df$tool) subsample.df$stringency <- as.character(subsample.df$stringency) subsample.df$type[subsample.df$type == "ori"] <- "Pre-removal" subsample.df$type[subsample.df$type == "sim"] <- "Post-removal" -subsample.df$tool[subsample.df$tool == "cgt"] <- "Celebrimbor" +subsample.df$tool[subsample.df$tool == "cgt"] <- "CELEBRIMBOR" subsample.df$tool[subsample.df$tool == "panaroo"] <- "Unadjusted" subsample.df$stringency[subsample.df$stringency == "moderate"] <- "Moderate" subsample.df$stringency[subsample.df$stringency == "strict"] <- "Strict" subsample.df$stringency[subsample.df$stringency == "sensitive"] <- "Sensitive" subsample.df$stringency <- factor(subsample.df$stringency, levels = c("Sensitive", "Moderate", "Strict")) subsample.df$type <- factor(subsample.df$type, levels = c("Pre-removal", "Post-removal")) -subsample.df$tool <- factor(subsample.df$tool, levels = c("Celebrimbor", "Unadjusted")) +subsample.df$tool <- factor(subsample.df$tool, levels = c("CELEBRIMBOR", "Unadjusted")) subsample.df.true <- subset(subsample.df, type == "Pre-removal" & tool == "Unadjusted") subsample.df.true$tool <- "True Total" subsample.df <- subset(subsample.df, type == "Post-removal") subsample.df <- rbind(subsample.df, subsample.df.true) -subsample.df$tool <- factor(subsample.df$tool, levels = c("True Total", "Celebrimbor", "Unadjusted")) +subsample.df$tool <- factor(subsample.df$tool, levels = c("True Total", "CELEBRIMBOR", "Unadjusted")) if (to_analyse == "rare") { subsample.df.rare <- subset(subsample.df, variable == "rare" & rare_lim >= 0.0 & rare_lim <= 0.3) subsample.df.rare$variable <-"Number of rare genes" - rare_p <- ggplot(data=subsample.df.rare, aes(x=rare_lim, y=value, colour=tool)) + facet_grid(.~stringency, scales = "free_y") + geom_point(size=2, alpha=0.5) + geom_line(alpha=0.5) + theme_light() + xlab("Rare threshold") + ylab("Number of rare genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg() + scale_y_continuous(breaks = seq(0, 100000, 2000)) + rare_p <- ggplot(data=subsample.df.rare, aes(x=rare_lim, y=value, colour=tool)) + facet_grid(.~stringency, scales = "free_y") + geom_point(size=2, alpha=0.5) + geom_line(alpha=0.5) + theme_light() + xlab("Rare threshold") + ylab("Number of rare genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm() + scale_y_continuous(breaks = seq(0, 100000, 2000)) rare_p write.csv(total.df, file='panaroo_rare.csv') } else if (to_analyse == "core") @@ -108,7 +108,7 @@ if (to_analyse == "rare") subsample.df.core <- subset(subsample.df, variable == "core" & core_lim >= 0.7 & core_lim <= 0.99) subsample.df.core$variable <-"Number of core genes" - core_p <- ggplot(data=subsample.df.core, aes(x=core_lim, y=value, colour=tool)) + facet_grid(.~stringency, scales = "free_y") + geom_point(size=2, alpha=0.5) + geom_line(alpha=0.5) + theme_light() + xlab("Core threshold") + ylab("Number of core genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg() + scale_y_continuous(breaks = seq(0, 100000, 2000)) + core_p <- ggplot(data=subsample.df.core, aes(x=core_lim, y=value, colour=tool)) + facet_grid(.~stringency, scales = "free_y") + geom_point(size=2, alpha=0.5) + geom_line(alpha=0.5) + theme_light() + xlab("Core threshold") + ylab("Number of core genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm() + scale_y_continuous(breaks = seq(0, 100000, 2000)) core_p write.csv(total.df, file='panaroo_core.csv') } @@ -116,4 +116,4 @@ if (to_analyse == "rare") grid.plot <- ggarrange(core_p, rare_p, align="hv", ncol = 1, nrow = 2, common.legend = TRUE, legend="right", labels="AUTO") grid.plot -ggsave(file="Celebrimbor_panaroo_simulation_figure.png", plot=grid.plot, width=14, height=7) \ No newline at end of file +ggsave(file="CELEBRIMBOR_panaroo_simulation_figure.png", plot=grid.plot, width=14, height=7) \ No newline at end of file