Skip to content

Commit

Permalink
Updates CELEBRIMBOR notation in figures
Browse files Browse the repository at this point in the history
  • Loading branch information
samhorsfield96 committed Mar 28, 2024
1 parent fc70ce3 commit 7d95c24
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 31 deletions.
20 changes: 10 additions & 10 deletions simulate_pangenomes/combined_performance.R
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ panaroo.stringency <- "strict"
core.df$variable[core.df$type == "sim" & core.df$analysis == "CGT" & core.df$tool == "freq_only"] <- "Unadjusted"
core.df$split[core.df$type == "sim" & core.df$analysis == "CGT" & core.df$tool == "freq_only"] <- "mmseqs2"

core.df$variable[core.df$type == "sim" & core.df$analysis == "CGT" & core.df$tool == "cgt"] <- "Celebrimbor"
core.df$variable[core.df$type == "sim" & core.df$analysis == "CGT" & core.df$tool == "cgt"] <- "CELEBRIMBOR"
core.df$split[core.df$type == "sim" & core.df$analysis == "CGT" & core.df$tool == "cgt"] <- "mmseqs2"

core.df$variable[core.df$type == "ori" & core.df$analysis == "PANAROO" & core.df$tool == "panaroo" & core.df$stringency == panaroo.stringency] <- "True Total"
Expand All @@ -45,20 +45,20 @@ panaroo.stringency <- "strict"
core.df$variable[core.df$type == "sim" & core.df$analysis == "PANAROO" & core.df$tool == "panaroo" & core.df$stringency == panaroo.stringency] <- "Unadjusted"
core.df$split[core.df$type == "sim" & core.df$analysis == "PANAROO" & core.df$tool == "panaroo" & core.df$stringency == panaroo.stringency] <- "Panaroo"

core.df$variable[core.df$type == "sim" & core.df$analysis == "PANAROO" & core.df$tool == "cgt" & core.df$stringency == panaroo.stringency] <- "Celebrimbor"
core.df$variable[core.df$type == "sim" & core.df$analysis == "PANAROO" & core.df$tool == "cgt" & core.df$stringency == panaroo.stringency] <- "CELEBRIMBOR"
core.df$split[core.df$type == "sim" & core.df$analysis == "PANAROO" & core.df$tool == "cgt" & core.df$stringency == panaroo.stringency] <- "Panaroo"

core.df.subset <- subset(core.df, variable != "NA")
core.df.subset <- subset(core.df.subset, error == 0.05 | error == 0.0)
core.df.subset <- subset(core.df.subset, core_lim >= 0.7 & core_lim <= 0.99)

core.df.subset$variable <- factor(core.df.subset$variable, levels = c("True Total", "Celebrimbor", "Unadjusted"))
core.df.subset$variable <- factor(core.df.subset$variable, levels = c("True Total", "CELEBRIMBOR", "Unadjusted"))
core.df.subset$split <- factor(core.df.subset$split, levels = c("mmseqs2", "Panaroo"))

all_core_p <- ggplot(data=core.df.subset, aes(x=core_lim, y=core, colour=variable)) + facet_grid(.~split) + geom_point(size=2) + geom_line() + theme_light() + xlab("Core threshold") + ylab("Number of core genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, colour = "black", face = "bold"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg() + scale_y_continuous(breaks = seq(0, 100000, 1000)) + scale_x_continuous(breaks = seq(0, 1, 0.1))
all_core_p <- ggplot(data=core.df.subset, aes(x=core_lim, y=core, colour=variable)) + facet_grid(.~split) + geom_point(size=2) + geom_line() + theme_light() + xlab("Core threshold") + ylab("Number of core genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, colour = "black", face = "bold"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm() + scale_y_continuous(breaks = seq(0, 100000, 1000)) + scale_x_continuous(breaks = seq(0, 1, 0.1))
all_core_p

ggsave(file="Celebrimbor_simulation_core_figure.svg", plot=all_core_p, width=11, height=5)
ggsave(file="CELEBRIMBOR_simulation_core_figure.svg", plot=all_core_p, width=11, height=5)
}


Expand All @@ -72,7 +72,7 @@ panaroo.stringency <- "strict"
rare.df$variable[rare.df$type == "sim" & rare.df$analysis == "CGT" & rare.df$tool == "freq_only"] <- "Unadjusted"
rare.df$split[rare.df$type == "sim" & rare.df$analysis == "CGT" & rare.df$tool == "freq_only"] <- "mmseqs2"

rare.df$variable[rare.df$type == "sim" & rare.df$analysis == "CGT" & rare.df$tool == "cgt"] <- "Celebrimbor"
rare.df$variable[rare.df$type == "sim" & rare.df$analysis == "CGT" & rare.df$tool == "cgt"] <- "CELEBRIMBOR"
rare.df$split[rare.df$type == "sim" & rare.df$analysis == "CGT" & rare.df$tool == "cgt"] <- "mmseqs2"

rare.df$variable[rare.df$type == "ori" & rare.df$analysis == "PANAROO" & rare.df$tool == "panaroo" & rare.df$stringency == panaroo.stringency] <- "True Total"
Expand All @@ -81,20 +81,20 @@ panaroo.stringency <- "strict"
rare.df$variable[rare.df$type == "sim" & rare.df$analysis == "PANAROO" & rare.df$tool == "panaroo" & rare.df$stringency == panaroo.stringency] <- "Unadjusted"
rare.df$split[rare.df$type == "sim" & rare.df$analysis == "PANAROO" & rare.df$tool == "panaroo" & rare.df$stringency == panaroo.stringency] <- "Panaroo"

rare.df$variable[rare.df$type == "sim" & rare.df$analysis == "PANAROO" & rare.df$tool == "cgt" & rare.df$stringency == panaroo.stringency] <- "Celebrimbor"
rare.df$variable[rare.df$type == "sim" & rare.df$analysis == "PANAROO" & rare.df$tool == "cgt" & rare.df$stringency == panaroo.stringency] <- "CELEBRIMBOR"
rare.df$split[rare.df$type == "sim" & rare.df$analysis == "PANAROO" & rare.df$tool == "cgt" & rare.df$stringency == panaroo.stringency] <- "Panaroo"

rare.df.subset <- subset(rare.df, variable != "NA")
rare.df.subset <- subset(rare.df.subset, error == 0.05 | error == 0.0)
rare.df.subset <- subset(rare.df.subset, rare_lim <= 0.25)

rare.df.subset$variable <- factor(rare.df.subset$variable, levels = c("True Total", "Celebrimbor", "Unadjusted"))
rare.df.subset$variable <- factor(rare.df.subset$variable, levels = c("True Total", "CELEBRIMBOR", "Unadjusted"))
rare.df.subset$split <- factor(rare.df.subset$split, levels = c("mmseqs2", "Panaroo"))

all_rare_p <- ggplot(data=rare.df.subset, aes(x=rare_lim, y=rare, colour=variable)) + facet_grid(.~split) + geom_point(size=2) + geom_line() + theme_light() + xlab("Rare threshold") + ylab("Number of rare genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, colour = "black", face = "bold"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg() + scale_y_continuous(breaks = seq(0, 100000, 5000)) + scale_x_continuous(breaks = seq(0, 1, 0.1))
all_rare_p <- ggplot(data=rare.df.subset, aes(x=rare_lim, y=rare, colour=variable)) + facet_grid(.~split) + geom_point(size=2) + geom_line() + theme_light() + xlab("Rare threshold") + ylab("Number of rare genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, colour = "black", face = "bold"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm() + scale_y_continuous(breaks = seq(0, 100000, 5000)) + scale_x_continuous(breaks = seq(0, 1, 0.1))
all_rare_p

ggsave(file="Celebrimbor_simulation_rare_figure.svg", plot=all_rare_p, width=11, height=5)
ggsave(file="CELEBRIMBOR_simulation_rare_figure.svg", plot=all_rare_p, width=11, height=5)
}

pub.table <- subset(core.df, core_lim == 0.95 & (error == 0.05 | error == 0) & variable != "NA")
Expand Down
28 changes: 14 additions & 14 deletions simulate_pangenomes/mmseqs2_performance.R
Original file line number Diff line number Diff line change
Expand Up @@ -130,22 +130,22 @@ if (to_analyse == "rare")

subsample.df.rare$variable <- as.character(subsample.df.rare$variable)
subsample.df.rare$variable[subsample.df.rare$variable == "total_rare"] <- "True Total"
subsample.df.rare$variable[subsample.df.rare$variable == "rare_cgt_pred"] <- "Celebrimbor"
subsample.df.rare$variable[subsample.df.rare$variable == "rare_cgt_pred"] <- "CELEBRIMBOR"
subsample.df.rare$variable[subsample.df.rare$variable == "rare_freq_pred"] <- "Unadjusted"
subsample.df.rare$variable <- factor(subsample.df.rare$variable, levels = c("True Total", "Celebrimbor", "Unadjusted"))
subsample.df.rare$variable <- factor(subsample.df.rare$variable, levels = c("True Total", "CELEBRIMBOR", "Unadjusted"))

rare_p <- ggplot(data=subsample.df.rare, aes(x=rare_lim, y=value, colour=variable)) + geom_point(size=2) + geom_line() + theme_light() + xlab("Rare threshold") + ylab("Number of rare genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=20,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, face = "italic"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg()
rare_p <- ggplot(data=subsample.df.rare, aes(x=rare_lim, y=value, colour=variable)) + geom_point(size=2) + geom_line() + theme_light() + xlab("Rare threshold") + ylab("Number of rare genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=20,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, face = "italic"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm()
rare_p

subsample.df.mid1 <- subset(subsample.df, variable == "total_middle" | variable == "middle_cgt_pred" | variable == "middle_freq_pred")

subsample.df.mid1$variable <- as.character(subsample.df.mid1$variable)
subsample.df.mid1$variable[subsample.df.mid1$variable == "total_middle"] <- "True Total"
subsample.df.mid1$variable[subsample.df.mid1$variable == "middle_cgt_pred"] <- "Celebrimbor"
subsample.df.mid1$variable[subsample.df.mid1$variable == "middle_cgt_pred"] <- "CELEBRIMBOR"
subsample.df.mid1$variable[subsample.df.mid1$variable == "middle_freq_pred"] <- "Unadjusted"
subsample.df.mid1$variable <- factor(subsample.df.mid1$variable, levels = c("True Total", "Celebrimbor", "Unadjusted"))
subsample.df.mid1$variable <- factor(subsample.df.mid1$variable, levels = c("True Total", "CELEBRIMBOR", "Unadjusted"))

middle_p1 <- ggplot(data=subsample.df.mid1, aes(x=rare_lim, y=value, colour=variable)) + geom_point(size=2) + geom_line() + theme_light() + xlab("Rare threshold") + ylab("Number of intermediate genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=20,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, face = "italic"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg()
middle_p1 <- ggplot(data=subsample.df.mid1, aes(x=rare_lim, y=value, colour=variable)) + geom_point(size=2) + geom_line() + theme_light() + xlab("Rare threshold") + ylab("Number of intermediate genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=20,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, face = "italic"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm()
middle_p1

subsample.df.rare$type <-"Number of rare genes"
Expand All @@ -154,7 +154,7 @@ if (to_analyse == "rare")

subsample.df.all.rare$type <- factor(subsample.df.all.rare$type, levels = c("Number of rare genes", "Number of intermediate genes"))

all_rare_p <- ggplot(data=subsample.df.all.rare, aes(x=rare_lim, y=value, colour=variable)) + facet_grid(type~., switch = "y", scales = "free_y") + geom_point(size=2) + geom_line() + theme_light() + xlab("Rare threshold") + ylab("") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.placement = "outside", strip.background = element_blank(), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, colour = "black", face = "bold"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg() + scale_y_continuous(breaks = seq(0, 100000, 2000))
all_rare_p <- ggplot(data=subsample.df.all.rare, aes(x=rare_lim, y=value, colour=variable)) + facet_grid(type~., switch = "y", scales = "free_y") + geom_point(size=2) + geom_line() + theme_light() + xlab("Rare threshold") + ylab("") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.placement = "outside", strip.background = element_blank(), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, colour = "black", face = "bold"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm() + scale_y_continuous(breaks = seq(0, 100000, 2000))
all_rare_p
write.csv(print.df, file='mmseqs_rare.csv')
} else if (to_analyse == "core")
Expand All @@ -168,22 +168,22 @@ if (to_analyse == "rare")

subsample.df.core$variable <- as.character(subsample.df.core$variable)
subsample.df.core$variable[subsample.df.core$variable == "total_core"] <- "True Total"
subsample.df.core$variable[subsample.df.core$variable == "core_cgt_pred"] <- "Celebrimbor"
subsample.df.core$variable[subsample.df.core$variable == "core_cgt_pred"] <- "CELEBRIMBOR"
subsample.df.core$variable[subsample.df.core$variable == "core_freq_pred"] <- "Unadjusted"
subsample.df.core$variable <- factor(subsample.df.core$variable, levels = c("True Total", "Celebrimbor", "Unadjusted"))
subsample.df.core$variable <- factor(subsample.df.core$variable, levels = c("True Total", "CELEBRIMBOR", "Unadjusted"))

core_p <- ggplot(data=subsample.df.core, aes(x=core_lim, y=value, colour=variable)) + geom_point(size=2) + geom_line() + theme_light() + xlab("Core threshold") + ylab("Number of core genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=20,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, face = "italic"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg()
core_p <- ggplot(data=subsample.df.core, aes(x=core_lim, y=value, colour=variable)) + geom_point(size=2) + geom_line() + theme_light() + xlab("Core threshold") + ylab("Number of core genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=20,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, face = "italic"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm()
core_p

subsample.df.mid2 <- subset(subsample.df, variable == "total_middle" | variable == "middle_cgt_pred" | variable == "middle_freq_pred")

subsample.df.mid2$variable <- as.character(subsample.df.mid2$variable)
subsample.df.mid2$variable[subsample.df.mid2$variable == "total_middle"] <- "True Total"
subsample.df.mid2$variable[subsample.df.mid2$variable == "middle_cgt_pred"] <- "Celebrimbor"
subsample.df.mid2$variable[subsample.df.mid2$variable == "middle_cgt_pred"] <- "CELEBRIMBOR"
subsample.df.mid2$variable[subsample.df.mid2$variable == "middle_freq_pred"] <- "Unadjusted"
subsample.df.mid2$variable <- factor(subsample.df.mid2$variable, levels = c("True Total", "Celebrimbor", "Unadjusted"))
subsample.df.mid2$variable <- factor(subsample.df.mid2$variable, levels = c("True Total", "CELEBRIMBOR", "Unadjusted"))

middle_p2 <- ggplot(data=subsample.df.mid2, aes(x=core_lim, y=value, colour=variable)) + geom_point(size=2) + geom_line() + theme_light() + xlab("Core threshold") + ylab("Number of intermediate genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=20,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, face = "italic"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg()
middle_p2 <- ggplot(data=subsample.df.mid2, aes(x=core_lim, y=value, colour=variable)) + geom_point(size=2) + geom_line() + theme_light() + xlab("Core threshold") + ylab("Number of intermediate genes") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=20,face="bold"), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, face = "italic"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm()
middle_p2

subsample.df.core$type <-"Number of core genes"
Expand All @@ -193,7 +193,7 @@ if (to_analyse == "rare")
subsample.df.all.core$type <- factor(subsample.df.all.core$type, levels = c("Number of core genes", "Number of intermediate genes"))


all_core_p <- ggplot(data=subsample.df.all.core, aes(x=core_lim, y=value, colour=variable)) + facet_grid(type~., switch = "y", scales = "free_y") + geom_point(size=2) + geom_line() + theme_light() + xlab("Core threshold") + ylab("") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.placement = "outside", strip.background = element_blank(), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, colour = "black", face = "bold"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_npg() + scale_y_continuous(breaks = seq(0, 100000, 1000))
all_core_p <- ggplot(data=subsample.df.all.core, aes(x=core_lim, y=value, colour=variable)) + facet_grid(type~., switch = "y", scales = "free_y") + geom_point(size=2) + geom_line() + theme_light() + xlab("Core threshold") + ylab("") + theme(axis.text.x = element_text(size = 14), axis.text.y = element_text(size = 14), axis.title=element_text(size=16,face="bold"), strip.placement = "outside", strip.background = element_blank(), strip.text.x = element_text(size = 14), strip.text.y = element_text(size = 14, colour = "black", face = "bold"), legend.title=element_text(size=18,face="bold"), legend.text=element_text(size=16)) + guides(colour=guide_legend(title="Method")) + scale_colour_nejm() + scale_y_continuous(breaks = seq(0, 100000, 1000))
all_core_p

write.csv(print.df, file='mmseqs_core.csv')
Expand Down
Loading

0 comments on commit 7d95c24

Please sign in to comment.