From 5c591fdeb20124caa0126055e0997c3e3db1dc27 Mon Sep 17 00:00:00 2001 From: aliciaaevans Date: Fri, 26 Jul 2024 17:32:44 -0400 Subject: [PATCH 1/2] feat: add platform (subdir) data to download stats --- src/package_downloads/stats_from_anaconda_org.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/package_downloads/stats_from_anaconda_org.py b/src/package_downloads/stats_from_anaconda_org.py index d364b161a71ad..672935a87e4a7 100644 --- a/src/package_downloads/stats_from_anaconda_org.py +++ b/src/package_downloads/stats_from_anaconda_org.py @@ -149,15 +149,22 @@ async def save_packages_stats(channel_dir: Path, totals: pd.DataFrame) -> None: versions_dir = channel_dir / "versions" versions_dir.mkdir(parents=True, exist_ok=True) + platforms_dir = channel_dir / "platforms" + platforms_dir.mkdir(parents=True, exist_ok=True) for package, package_totals in packages_totals: - version_totals = package_totals.groupby("version", sort=False) + version_totals = package_totals.groupby(["version"], sort=False) write_tsv(versions_dir / f"{package}.tsv", version_totals.sum("total")) + subdir_totals = package_totals.groupby(["subdir"], sort=False) + write_tsv(platforms_dir / f"{package}.tsv", subdir_totals.sum("total")) async def save_historic_channel_stats( date: str, channel_dir: Path, totals: pd.DataFrame ) -> None: - channel_totals = pd.DataFrame([{"date": date, "total": totals["total"].sum()}]) + subdirs_totals = totals.groupby("subdir", sort=True).sum(numeric_only=True)["total"] + total_dict = {"date": date, "total": totals["total"].sum()} + total_dict.update(subdirs_totals.to_dict()) + channel_totals = pd.DataFrame([total_dict]) channel_tsv = channel_dir / "channel.tsv" if channel_tsv.exists(): channel_totals = pd.concat([read_tsv(channel_tsv), channel_totals]) @@ -190,7 +197,7 @@ async def main() -> str: } date = session.date for channel_name, package_names in channel_package_names.items(): - await save_channel_stats(date, channel_name, package_names) + await save_channel_stats(date, channel_name, package_names[1:100]) return date From dd281a42f4f2d3f2a05ab6b62f57cd702c124ee3 Mon Sep 17 00:00:00 2001 From: aliciaaevans Date: Fri, 26 Jul 2024 17:40:46 -0400 Subject: [PATCH 2/2] remove testing limit --- src/package_downloads/stats_from_anaconda_org.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/package_downloads/stats_from_anaconda_org.py b/src/package_downloads/stats_from_anaconda_org.py index 672935a87e4a7..98c409285d2f6 100644 --- a/src/package_downloads/stats_from_anaconda_org.py +++ b/src/package_downloads/stats_from_anaconda_org.py @@ -197,7 +197,7 @@ async def main() -> str: } date = session.date for channel_name, package_names in channel_package_names.items(): - await save_channel_stats(date, channel_name, package_names[1:100]) + await save_channel_stats(date, channel_name, package_names) return date