Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
* remove prepare()

* Fix denominator

* simplify text rendering

* minor
  • Loading branch information
tanghaibao authored Jan 25, 2025
1 parent 5bad2c6 commit 32bd395
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 111 deletions.
152 changes: 42 additions & 110 deletions src/jcvi/projects/sugarcane.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
savefig,
)
from ..graphics.chromosome import Chromosome as ChromosomePlot
from ..utils.cbook import short_float

SoColor = "#7436a4" # Purple
SsColor = "#5a8340" # Green
Expand Down Expand Up @@ -356,23 +357,25 @@ def summary(self) -> List[Tuple[str, int, int, int]]:
group_chrom_count = group_count / len(chromosomes[0])
group_so_size = group_count if subgenome == "SO" else 0
group_ss_size = group_count if subgenome == "SS" else 0
group_size = group_so_size + group_ss_size
ans.append(
(
subgenome,
group_chrom_count,
group_so_size / SO_GENE_COUNT,
group_ss_size / SS_GENE_COUNT,
group_so_size / group_size,
group_ss_size / group_size,
)
)
total_chrom_count += group_chrom_count
total_so_size += group_so_size
total_ss_size += group_ss_size
total_size = total_so_size + total_ss_size
ans.append(
(
"Total",
total_chrom_count,
total_so_size / SO_GENE_COUNT,
total_ss_size / SS_GENE_COUNT,
total_so_size / total_size,
total_ss_size / total_size,
)
)
return ans
Expand All @@ -392,59 +395,61 @@ def __init__(self, SO_data, SS_data, percent_SO_data, percent_SS_data):
self.percent_SO_data = percent_SO_data
self.percent_SS_data = percent_SS_data

def _summary(self, a, tag, precision=0):
def _summary(self, a, tag, precision=0) -> Tuple[str, str]:
mean, mn, mx = (
round(np.mean(a), precision),
round(np.min(a), precision),
round(np.max(a), precision),
)
s = f"*{tag}* chr: {mean:.0f}"
if mn == mean and mx == mean:
return s
return s + f" ({mn:.0f}-{mx:.0f})"
return s, ""
return s, f" ({mn:.0f}-{mx:.0f})"

def _percent_summary(self, a, tag, precision=1):
def _percent_summary(self, a, tag, precision=1) -> Tuple[str, str]:
mean, mn, mx = (
round(np.mean(a), precision),
round(np.min(a), precision),
round(np.max(a), precision),
)
s = f"*{tag}*%: {mean:.1f}%"
s = f"*{tag}*%: {short_float(mean, precision)}%"
print(s)
if mn == mean and mx == mean:
return s
return s + f" ({mn:.1f}-{mx:.1f}%)"
return s, ""
return s, f"({short_float(mn, precision)}-{short_float(mx, precision)}%)"

@property
def percent_SO_summary(self):
def percent_SO_summary(self) -> Tuple[str, str]:
return self._percent_summary(self.percent_SO_data, "So")

@property
def percent_SS_summary(self):
def percent_SS_summary(self) -> Tuple[str, str]:
return self._percent_summary(self.percent_SS_data, "Ss")

@property
def SO_summary(self):
def SO_summary(self) -> Tuple[str, str]:
return self._summary(self.SO_data, "So")

@property
def SS_summary(self):
def SS_summary(self) -> Tuple[str, str]:
return self._summary(self.SS_data, "Ss")


def simulate_F1(SO: Genome, SS: Genome, mode: CrossMode, verbose: bool = False):
SO_SS_F1 = None
if mode == CrossMode.nx2plusn:
SO_SS_F1 = SO.mate_nx2plusn("SOxSS F1", SS, verbose=verbose)
elif mode == CrossMode.twoplusnFDR:
SO_SS_F1 = SO.mate_2nplusn_FDR("SOxSS F1", SS, verbose=verbose)
elif mode == CrossMode.twoplusnSDR:
SO_SS_F1 = SO.mate_2nplusn_SDR("SOxSS F1", SS, verbose=verbose)
if verbose:
if verbose and SO_SS_F1:
SO_SS_F1.print_summary()
return SO_SS_F1


def simulate_BC1(SO: Genome, SS_SO_F1: Genome, mode: CrossMode, verbose=False):
SS_SO_BC1 = None
if mode == CrossMode.nx2plusn:
SS_SO_BC1 = SO.mate_nx2plusn("SOxSS BC1", SS_SO_F1, verbose=verbose)
elif mode == CrossMode.twoplusnFDR:
Expand Down Expand Up @@ -541,47 +546,32 @@ def modify_range_end(d: dict, value: int):
summary = GenomeSummary(SO_data, SS_data, percent_SO_data, percent_SS_data)

# Write the stats summary within the plot
summary_style = dict(size=9, ha="center", va="center")
summary_style = dict(size=8, va="center")
SO_peak = SO_counter.most_common(1)[0][0]
SS_peak = SS_counter.most_common(1)[0][0]
SO_single = len(SO_counter) == 1
SS_single = len(SS_counter) == 1

# Offset the text to avoid overlapping
if SO_peak < SS_peak:
if SO_single:
SO_peak -= 8
if SS_single:
SS_peak += 8
else:
if SO_single:
if SO_peak > 79:
SO_peak -= 8
else:
SO_peak += 8
if SS_single:
SS_peak -= 8
ax.text(
SO_peak, ymax * 0.85, markup(summary.SO_summary), color=SoColor, **summary_style
)
ax.text(
SO_peak,
ymax * 0.65,
markup(summary.percent_SO_summary),
color=SoColor,
**summary_style,
)
ax.text(
SS_peak, ymax * 0.85, markup(summary.SS_summary), color=SsColor, **summary_style
)
ax.text(
SS_peak,
ymax * 0.65,
markup(summary.percent_SS_summary),
color=SsColor,
**summary_style,
)

for xpos, ypos, single, text, color, ha in zip(
[SO_peak] * 4 + [SS_peak] * 4,
([ymax * 0.85] * 2 + [ymax * 0.65] * 2) * 2,
[SO_single] * 4 + [SS_single] * 4,
summary.SO_summary
+ summary.percent_SO_summary
+ summary.SS_summary
+ summary.percent_SS_summary,
[SoColor] * 4 + [SsColor] * 4,
["right", "left"] * 4,
):
# Offset some text to avoid overlapping
if abs(SS_peak - SO_peak) < 16 and xpos == SO_peak:
xpos -= 12
PAD = 1 if single else 0.25
if ha == "left":
xpos -= PAD
else:
xpos += PAD
ax.text(xpos, ypos, markup(text), color=color, ha=ha, **summary_style)
return summary


Expand Down Expand Up @@ -726,63 +716,6 @@ def simulate(args):
write_chromosomes(genomes, op.join(outdir, f"all_{cross}"))


def _get_sizes(filename, prefix_length, tag, target_size=None):
"""Returns a dictionary of chromome lengths from a given file.
Args:
filename ([str]): Path to the input file. Input file is 2-column file
with rows `seqid length`.
prefix_length (int): Extract first N characters.
tag (str): Prepend `tag-` to the seqid.
target_size (int): Expected genome size. Defaults to None.
"""
sizes_list = defaultdict(list)
with open(filename, encoding="utf-8") as fp:
for row in fp:
if not row.startswith("Chr"):
continue
name, size = row.split()
idx = int(name[3:prefix_length])
size = int(size)
name = f"{tag}-chr{idx:02d}"
sizes_list[name].append(size)

# Get the average length
sizes = dict(
(name, int(round(np.mean(size_list)))) for name, size_list in sizes_list.items()
)
print(sizes)
if target_size is None:
return sizes

total_size = sum(sizes.values())
correction_factor = target_size / total_size
print(
f"{tag} total:{total_size} target:{target_size} correction:{correction_factor:.2f}x"
)
return dict(
(name, int(round(correction_factor * size))) for name, size in sizes.items()
)


def prepare(args):
"""
%prog SoChrLen.txt SsChrLen.txt
Calculate lengths from real sugarcane data.
"""
p = OptionParser(prepare.__doc__)
_, args = p.parse_args(args)
if len(args) != 2:
sys.exit(not p.print_help())

solist, sslist = args
# The haploid set of LA Purple is 957.2 Mb and haploid set of US56-14-4 is 732.5 Mb
sizes = _get_sizes(solist, 5, "SO", target_size=int(957.2 * 1e6))
sizes.update(_get_sizes(sslist, 4, "SS", target_size=int(732.5 * 1e6)))
print(sizes)


def get_genome_wide_pct(summary: str) -> Dict[tuple, list]:
"""Collect genome-wide ungapped percent identity.
Specifically, from file `SS_SR_SO.summary.txt`.
Expand Down Expand Up @@ -1104,7 +1037,6 @@ def chromosome(args):
def main():

actions = (
("prepare", "Calculate lengths from real sugarcane data"),
("simulate", "Run simulation on female restitution"),
# Plot the simulated chromosomes
("chromosome", "Plot the chromosomes of the simulated genomes"),
Expand Down
16 changes: 16 additions & 0 deletions src/jcvi/utils/cbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,22 @@ def percentage(a, b, precision=1, mode: Optional[int] = 0):
return pct


def short_float(f, precision=1, trim_zeros=True):
"""
Format a float to a string with a fixed precision, and optionally trim
trailing zeros.
>>> short_float(3.1415926)
'3.1'
>>> short_float(3.002)
'3'
"""
f = f"{f:.{precision}f}"
if trim_zeros:
f = f.rstrip("0").rstrip(".")
return f


def thousands(x):
"""
>>> thousands(12345)
Expand Down
15 changes: 14 additions & 1 deletion tests/utils/test_cbook.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import pytest

from jcvi.apps.base import cleanup
from jcvi.utils.cbook import autoscale, depends, gene_name, seqid_parse
from jcvi.utils.cbook import autoscale, depends, gene_name, seqid_parse, short_float


@pytest.mark.parametrize(
Expand Down Expand Up @@ -52,3 +52,16 @@ def func1(infile="a", outfile="b"):
func1(infile="a", outfile="b")
assert op.exists("b")
cleanup("a", "b")


@pytest.mark.parametrize(
"f,precision,trim_zeros,output",
[
(0.123456, 3, True, "0.123"),
(0.0000, 2, True, "0"),
(3.1000, 2, False, "3.10"),
(3.0, 2, True, "3"),
],
)
def test_short_float(f, precision, trim_zeros, output):
assert short_float(f, precision, trim_zeros) == output

0 comments on commit 32bd395

Please sign in to comment.