From 0ea1c9275bf6216b6a2c73f8367eccfecc0addf3 Mon Sep 17 00:00:00 2001 From: Dax Pryce Date: Wed, 8 Jan 2025 14:45:15 -0800 Subject: [PATCH] Preparing for 1.2.2 and updating versions (#46) * Update build.yml * Update pyproject.toml Expanding our supported python trove identifiers to include python 3.13 * Update Cargo.toml Updating pyo3 from 0.15 to 0.23 * Update pyproject.toml Moving 3.6 and 3.7 to EOL (since not only they, but even 3.8 are EOL. pyo3 currently does not support abi3-py36, and I'm betting 3.7 is going to follow suit sometime soon.) * Update Cargo.toml abi3-py36 changing to abi3-py38 * Updated a bunch of versions. Pyo3 changed fairly substantially in signature specification for python. Also updated my first name everywhere. * Updating versions, running clippy, yanking the logging nonsense that was never used and never should be used (there have to be better ways than that nonsense), and about to have clippy fix my convention of always using a return statement because I hate implicit returns but that's a me thing not a world thing * More clippy fixes * More clippy suggestions. It's like I never ran this before. * Adding ipython and networkx as dev deps. `cd packages/pyo3 && uv sync && uv run ipython` get you to a reasonable repl for manual testing. I cannot believe I did not do proper python testing here. Maybe I did it in graspologic? * Running cargo fmt * Misspelled repetition, which I have also repetitively done in this commit message alone * Committing some minor changes before I rebase on dev. I forgot the dev/main branching scheme. * Updating the pyproject.toml to be correct as per the current pypa specification. I really hope this doesn't break older versions. * Fixing the changes clippy made to some of the commonmark documentation in the function. Too much was being treated as a quoted paragraph. --- .github/workflows/build.yml | 12 +- .rustfmt.toml | 3 +- clippy.toml | 2 + .cargo/config => config.toml | 0 packages/cli/Cargo.toml | 9 +- packages/cli/src/args.rs | 74 ++---- packages/cli/src/leiden.rs | 4 +- packages/cli/src/main.rs | 58 ++--- packages/network_partitions/Cargo.toml | 6 +- packages/network_partitions/src/clustering.rs | 55 ++-- .../src/leiden/full_network_clustering.rs | 49 ++-- .../src/leiden/full_network_work_queue.rs | 22 +- .../src/leiden/hierarchical.rs | 30 +-- .../{leiden.rs => leiden_clustering.rs} | 52 +--- packages/network_partitions/src/leiden/mod.rs | 4 +- .../src/leiden/neighboring_clusters.rs | 30 +-- .../src/leiden/quality_value_increment.rs | 4 +- .../src/leiden/subnetwork.rs | 37 ++- packages/network_partitions/src/lib.rs | 1 - packages/network_partitions/src/macros.rs | 46 ---- .../src/network/compact_network.rs | 85 +++--- .../src/network/identifier.rs | 19 +- .../src/network/labeled_network.rs | 47 ++-- .../src/network/network_builder.rs | 18 -- packages/network_partitions/src/quality.rs | 2 +- .../network_partitions/src/random_vector.rs | 6 +- packages/network_partitions/src/resolution.rs | 4 +- .../network_partitions/src/safe_vectors.rs | 6 +- packages/pyo3/Cargo.toml | 9 +- packages/pyo3/pyproject.toml | 26 +- packages/pyo3/src/lib.rs | 67 ++--- packages/pyo3/src/mediator.rs | 38 +-- packages/pyo3/uv.lock | 242 ++++++++++++++++++ 33 files changed, 550 insertions(+), 517 deletions(-) create mode 100644 clippy.toml rename .cargo/config => config.toml (100%) rename packages/network_partitions/src/leiden/{leiden.rs => leiden_clustering.rs} (88%) delete mode 100644 packages/network_partitions/src/macros.rs create mode 100644 packages/pyo3/uv.lock diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index bf2944e..41605b6 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -12,10 +12,10 @@ jobs: runs-on: "ubuntu-latest" steps: - uses: actions/checkout@v4 - - name: Set up Python 3.8 + - name: Set up Python 3.12 uses: actions/setup-python@v5 with: - python-version: 3.8 + python-version: 3.12 - name: Materialize build number run: | pip install -U pip @@ -38,10 +38,10 @@ jobs: os: ["ubuntu-latest", "windows-latest", "macos-latest"] steps: - uses: actions/checkout@v4 - - name: Set up Python 3.9 + - name: Set up Python 3.12 uses: actions/setup-python@v5 with: - python-version: 3.9 + python-version: 3.12 - uses: actions/download-artifact@v4 with: name: cargo-toml @@ -106,10 +106,10 @@ jobs: if: github.ref=='refs/heads/main' || github.ref=='refs/heads/dev' steps: - uses: actions/checkout@v4 - - name: Set up Python 3.8 + - name: Set up Python 3.12 uses: actions/setup-python@v2 with: - python-version: 3.8 + python-version: 3.12 - uses: actions/download-artifact@v4 with: name: dist-ubuntu-latest diff --git a/.rustfmt.toml b/.rustfmt.toml index 3fd9bce..307cca5 100644 --- a/.rustfmt.toml +++ b/.rustfmt.toml @@ -1,3 +1,2 @@ -fn_args_layout = "vertical" -empty_item_single_line = false +fn_params_layout = "vertical" diff --git a/clippy.toml b/clippy.toml new file mode 100644 index 0000000..e910272 --- /dev/null +++ b/clippy.toml @@ -0,0 +1,2 @@ +too-many-arguments-threshold=20 # for what it is worth, clippy is absolutely right and pythonic-ness is absolutely wrong +enum-variant-name-threshold=10 # it doesn't like the repetition in "Error" in my export-to-python error types diff --git a/.cargo/config b/config.toml similarity index 100% rename from .cargo/config rename to config.toml diff --git a/packages/cli/Cargo.toml b/packages/cli/Cargo.toml index 52514c1..92b07b4 100644 --- a/packages/cli/Cargo.toml +++ b/packages/cli/Cargo.toml @@ -1,16 +1,13 @@ [package] name = "cli" -version = "0.1.0" -authors = ["Dwayne Pryce "] +version = "0.1.1" +authors = ["Dax Pryce "] edition = "2018" license = "MIT" description = "CLI Runner for the topologic associated crates (network_partitions and eventually network_automatic_layouts)" [dependencies] -clap = "2.34" +clap = "4.5" rand = "0.8" rand_xorshift = "0.3" network_partitions={path = "../network_partitions"} - -[features] -logging = ["network_partitions/logging"] diff --git a/packages/cli/src/args.rs b/packages/cli/src/args.rs index af96c43..5dc665e 100644 --- a/packages/cli/src/args.rs +++ b/packages/cli/src/args.rs @@ -33,27 +33,30 @@ pub struct CliArgs { pub skip_first_line: bool, } -impl TryFrom> for CliArgs { +impl TryFrom for CliArgs { type Error = ParseCliError; - fn try_from(matches: ArgMatches<'_>) -> Result { - let source_edges = matches - .value_of(SOURCE_EDGES) + fn try_from(matches: ArgMatches) -> Result { + let source_edges: &str = matches + .get_one(SOURCE_EDGES) + .cloned() .ok_or(ParseCliError::RequiredValueError)?; - let output = matches - .value_of(OUTPUT) + let output: &str = matches + .get_one(OUTPUT) + .cloned() .ok_or(ParseCliError::RequiredValueError)?; - let separator = matches - .value_of(SEPARATOR) + let separator: &str = matches + .get_one(SEPARATOR) + .cloned() .ok_or(ParseCliError::RequiredValueError)?; - let source_index: usize = matches.value_of(SOURCE_INDEX).as_a()?; - let target_index: usize = matches.value_of(TARGET_INDEX).as_a()?; - let weight_index: Option = matches.value_of(WEIGHT_INDEX).as_a()?; - let seed: Option = matches.value_of(SEED).as_a()?; - let iterations: usize = matches.value_of(ITERATIONS).as_a()?; - let resolution: f64 = matches.value_of(RESOLUTION).as_a()?; - let randomness: f64 = matches.value_of(RANDOMNESS).as_a()?; - let quality_option: Option<&str> = matches.value_of(QUALITY); + let source_index: usize = *matches.get_one(SOURCE_INDEX).unwrap(); + let target_index: usize = *matches.get_one(TARGET_INDEX).unwrap(); + let weight_index: Option = matches.get_one(WEIGHT_INDEX).copied(); + let seed: Option = matches.get_one(SEED).cloned(); + let iterations: usize = *matches.get_one(ITERATIONS).unwrap(); + let resolution: f64 = *matches.get_one(RESOLUTION).unwrap(); + let randomness: f64 = *matches.get_one(RANDOMNESS).unwrap(); + let quality_option: Option<&str> = matches.get_one(QUALITY).cloned(); let use_modularity: bool = match quality_option { Some(quality_value) => { if quality_value == "cpm" { @@ -66,7 +69,7 @@ impl TryFrom> for CliArgs { } None => Err(ParseCliError::RequiredValueError), }?; - let skip_first_line: bool = matches.is_present(HAS_HEADER); + let skip_first_line: bool = matches.contains_id(HAS_HEADER); let cli_args: CliArgs = CliArgs { source_edges: source_edges.into(), output_path: output.into(), @@ -81,7 +84,7 @@ impl TryFrom> for CliArgs { use_modularity, skip_first_line, }; - return Ok(cli_args); + Ok(cli_args) } } @@ -94,43 +97,12 @@ pub enum ParseCliError { impl From for ParseCliError { fn from(_: ParseFloatError) -> Self { - return ParseCliError::NotANumber; + ParseCliError::NotANumber } } impl From for ParseCliError { fn from(_: ParseIntError) -> Self { - return ParseCliError::NotANumber; - } -} - -trait As { - fn as_a(&self) -> Result; -} - -impl As for Option<&str> { - fn as_a(&self) -> Result { - self.map(|cli_arg| cli_arg.parse::().unwrap()) - .ok_or(ParseCliError::RequiredValueError) - } -} - -impl As for Option<&str> { - fn as_a(&self) -> Result { - self.map(|cli_arg| cli_arg.parse::().unwrap()) - .ok_or(ParseCliError::RequiredValueError) - } -} - -impl As> for Option<&str> { - fn as_a(&self) -> Result, ParseCliError> { - let result = match self { - Some(cli_arg) => { - let parse_result = cli_arg.parse::(); - Ok(parse_result.map(|value| Some(value))?) - } - None => Ok(None), - }; - return result; + ParseCliError::NotANumber } } diff --git a/packages/cli/src/leiden.rs b/packages/cli/src/leiden.rs index f656af8..8e91e29 100644 --- a/packages/cli/src/leiden.rs +++ b/packages/cli/src/leiden.rs @@ -75,9 +75,9 @@ pub fn leiden( let mut output_file: File = File::create(output_path).expect("Unable to open output file for writing"); for item in &clustering { - write!( + writeln!( output_file, - "{},{}\n", + "{},{}", labeled_network.label_for(item.node_id), item.cluster ) diff --git a/packages/cli/src/main.rs b/packages/cli/src/main.rs index bd203f6..7545f07 100644 --- a/packages/cli/src/main.rs +++ b/packages/cli/src/main.rs @@ -1,8 +1,7 @@ // Copyright (c) Microsoft Corporation. // Licensed under the MIT license. -#![feature(in_band_lifetimes)] -use clap::{App, Arg}; +use clap::{Arg, ArgAction, Command}; use std::convert::TryFrom; mod args; @@ -11,83 +10,82 @@ mod leiden; use crate::args::*; fn main() { - let matches = App::new("leiden_cli") + let matches = Command::new("leiden_cli") .version("0.1.0") .author("Dwayne Pryce ") .about("Runs leiden over a provided edge list and outputs the results") .arg( - Arg::with_name(SOURCE_EDGES) + Arg::new(SOURCE_EDGES) .help("The edge list that defines the graph's connections") .required(true) .index(1), ) .arg( - Arg::with_name(OUTPUT) + Arg::new(OUTPUT) .help("The output for the communities detected") .required(true) .index(2), ) .arg( - Arg::with_name(SEPARATOR) - .short("s") + Arg::new(SEPARATOR) + .short('s') .help("The character to split the edge list on") - .takes_value(true) + .action(ArgAction::Set) .default_value("\t"), ) .arg( - Arg::with_name(SOURCE_INDEX) - .takes_value(true) + Arg::new(SOURCE_INDEX) + .action(ArgAction::Set) .help("0-based index of source column from edge file") .default_value("0"), ) .arg( - Arg::with_name(TARGET_INDEX) - .takes_value(true) + Arg::new(TARGET_INDEX) + .action(ArgAction::Set) .help("0-based index of target column from edge file") .default_value("1"), ) .arg( - Arg::with_name(WEIGHT_INDEX) - .takes_value(true) + Arg::new(WEIGHT_INDEX) + .action(ArgAction::Set) .help("0-based index of weight column from edge file") ) .arg( - Arg::with_name(SEED) - .takes_value(true) + Arg::new(SEED) + .action(ArgAction::Set) .help("A seed value to start the PRNG") .long("seed"), ) .arg( - Arg::with_name(ITERATIONS) - .takes_value(true) + Arg::new(ITERATIONS) + .action(ArgAction::Set) .help("Leiden is an inherently recursive algorithm, however it may find itself (due to randomness) at a localized maximum. Setting iterations to a number larger than 1 may allow you to jump out of a local maximum and continue until a better optimum partitioning is found (note that any n > 1 will mean that leiden will be run again for a minimum of n-1 more times, though it may be run for many more than that") - .short("i") + .short('i') .default_value("1"), ) .arg( - Arg::with_name(RESOLUTION) - .takes_value(true) + Arg::new(RESOLUTION) + .action(ArgAction::Set) .help("") - .short("r") + .short('r') .default_value("1.0") ) .arg( - Arg::with_name(RANDOMNESS) - .takes_value(true) + Arg::new(RANDOMNESS) + .action(ArgAction::Set) .help("") .default_value("1E-2"), ) .arg( - Arg::with_name(QUALITY) - .takes_value(true) + Arg::new(QUALITY) + .action(ArgAction::Set) .help("Quality function to use") - .short("q") - .possible_value("modularity") - .possible_value("cpm") + .short('q') + .value_parser(["modularity", "cpm"]) .default_value("modularity"), ) .arg( - Arg::with_name(HAS_HEADER) + Arg::new(HAS_HEADER) .help("Flag must be added if the source file contains a header line") .long("has_header") ) diff --git a/packages/network_partitions/Cargo.toml b/packages/network_partitions/Cargo.toml index 21b1da8..bf9b9f2 100644 --- a/packages/network_partitions/Cargo.toml +++ b/packages/network_partitions/Cargo.toml @@ -1,18 +1,14 @@ [package] name = "network_partitions" version = "0.1.0" -authors = ["Dwayne Pryce "] +authors = ["Dax Pryce "] edition = "2018" license = "MIT" description = "Leiden community detection as per https://arxiv.org/abs/1810.08473" [dependencies] rand = "0.8" -chrono = { version = "0.4", optional = true } [dev-dependencies] rand_xorshift = "0.3" -[features] -logging = ["chrono"] -debug = [] diff --git a/packages/network_partitions/src/clustering.rs b/packages/network_partitions/src/clustering.rs index 28d7095..893211c 100644 --- a/packages/network_partitions/src/clustering.rs +++ b/packages/network_partitions/src/clustering.rs @@ -27,13 +27,19 @@ pub struct Clustering { node_to_cluster_mapping: Vec, } +impl Default for Clustering { + fn default() -> Self { + Self::new() + } +} + impl Clustering { /// Creates an empty Clustering with no nodes. Can be added to by calling `update_node_cluster`. pub fn new() -> Clustering { - return Clustering { + Clustering { next_cluster_id: 0, node_to_cluster_mapping: Vec::new(), - }; + } } /// Creates a Clustering with `num_nodes` entries in the `node_to_cluster_mapping` vector and @@ -41,10 +47,10 @@ impl Clustering { pub fn as_self_clusters(num_nodes: usize) -> Clustering { let mut identity_mapping: Vec = Vec::with_capacity(num_nodes); identity_mapping.extend(0..num_nodes); - return Clustering { + Clustering { next_cluster_id: num_nodes, node_to_cluster_mapping: identity_mapping, - }; + } } /// Creates a clustering (with ZERO sanity checking) of the values stored in Clustering. @@ -53,28 +59,27 @@ impl Clustering { node_to_cluster_mapping: Vec, next_cluster_id: usize, ) -> Clustering { - return Clustering { + Clustering { next_cluster_id, node_to_cluster_mapping, - }; + } } /// The actual number of nodes in this Clustering pub fn num_nodes(&self) -> usize { - return self.node_to_cluster_mapping.len(); + self.node_to_cluster_mapping.len() } pub fn next_cluster_id(&self) -> usize { - return self.next_cluster_id; + self.next_cluster_id } pub fn cluster_at( &self, node: usize, ) -> Result { - return self - .node_to_cluster_mapping - .get_or_err(node, CoreError::ClusterIndexingError); + self.node_to_cluster_mapping + .get_or_err(node, CoreError::ClusterIndexingError) } pub fn update_cluster_at( @@ -82,23 +87,23 @@ impl Clustering { node: usize, cluster: usize, ) -> Result<(), CoreError> { - return if self.node_to_cluster_mapping.is_safe_access(node) { + if self.node_to_cluster_mapping.is_safe_access(node) { self.node_to_cluster_mapping[node] = cluster; self.next_cluster_id = self.next_cluster_id.max(cluster + 1); Ok(()) } else { Err(CoreError::ClusterIndexingError) - }; + } } /// Generates a vector of nodes for each cluster with the index referencing the cluster and the /// value being a count from 0 upward. pub fn num_nodes_per_cluster(&self) -> Vec { - let mut nodes_per_cluster: Vec = vec![0 as u64; self.next_cluster_id]; + let mut nodes_per_cluster: Vec = vec![0_u64; self.next_cluster_id]; for i in 0..self.node_to_cluster_mapping.len() { nodes_per_cluster[self.node_to_cluster_mapping[i]] += 1; } - return nodes_per_cluster; + nodes_per_cluster } /// Generates a vector containing every node id for every cluster id. The outer vector index @@ -107,13 +112,13 @@ impl Clustering { let number_nodes_per_cluster: Vec = self.num_nodes_per_cluster(); let mut nodes_per_cluster: Vec> = Vec::with_capacity(self.next_cluster_id); - for i in 0..self.next_cluster_id { - nodes_per_cluster.push(Vec::with_capacity(number_nodes_per_cluster[i] as usize)); + for nodes_in_cluster in number_nodes_per_cluster.iter().take(self.next_cluster_id) { + nodes_per_cluster.push(Vec::with_capacity(*nodes_in_cluster as usize)); } for (node_id, cluster) in self.node_to_cluster_mapping.iter().enumerate() { nodes_per_cluster[*cluster].push(node_id); } - return nodes_per_cluster; + nodes_per_cluster } /// This method compacts the Clustering, removing empty clusters and applying new cluster IDs @@ -186,7 +191,7 @@ impl From for HashMap { for i in 0..clustering.node_to_cluster_mapping.len() { map.insert(i, clustering.node_to_cluster_mapping[i]); } - return map; + map } } @@ -195,11 +200,11 @@ pub struct ClusterIterator<'a> { next_cluster_id: usize, } -impl<'a> Iterator for ClusterIterator<'a> { +impl Iterator for ClusterIterator<'_> { type Item = ClusterItem; fn next(&mut self) -> Option { - return if self.next_cluster_id == self.cluster_ref.node_to_cluster_mapping.len() { + if self.next_cluster_id == self.cluster_ref.node_to_cluster_mapping.len() { None } else { let item = ClusterItem { @@ -208,7 +213,7 @@ impl<'a> Iterator for ClusterIterator<'a> { }; self.next_cluster_id += 1; Some(item) - }; + } } } @@ -217,10 +222,10 @@ impl<'a> IntoIterator for &'a Clustering { type IntoIter = ClusterIterator<'a>; fn into_iter(self) -> Self::IntoIter { - return ClusterIterator { - cluster_ref: &self, + ClusterIterator { + cluster_ref: self, next_cluster_id: 0, - }; + } } } diff --git a/packages/network_partitions/src/leiden/full_network_clustering.rs b/packages/network_partitions/src/leiden/full_network_clustering.rs index f267f04..fca9c01 100644 --- a/packages/network_partitions/src/leiden/full_network_clustering.rs +++ b/packages/network_partitions/src/leiden/full_network_clustering.rs @@ -6,9 +6,7 @@ use super::quality_value_increment; use crate::clustering::Clustering; use crate::errors::CoreError; use crate::leiden::neighboring_clusters::NeighboringClusters; -use crate::log; use crate::network::prelude::*; -use crate::progress_meter; use rand::Rng; pub fn full_network_clustering( @@ -20,13 +18,6 @@ pub fn full_network_clustering( where T: Rng, { - log!( - "Full network clustering starting for provided network with {} nodes and {} edges and an initial clustering with a max cluster id of {}", - network.num_nodes(), - network.num_edges(), - clustering.next_cluster_id() - ); - if network.num_nodes() <= 1 { return Ok(false); } @@ -46,12 +37,6 @@ where NeighboringClusters::with_capacity(network.num_nodes()); while !work_queue.is_empty() { - progress_meter!( - "{}% complete (may repeat as nodes are marked unstable)", - network.num_nodes() - work_queue.len(), - network.num_nodes() - ); - let current_node: usize = work_queue.pop_front()?; let current_cluster: usize = clustering.cluster_at(current_node)?; let current_node_weight: f64 = network.node_weight(current_node); @@ -77,7 +62,7 @@ where current_node, current_cluster, &mut neighboring_clusters, - &mut unused_clusters, + &unused_clusters, num_unused_clusters, )?; @@ -130,7 +115,7 @@ where if improved { clustering.remove_empty_clusters(); } - return Ok(improved); + Ok(improved) } fn weights_and_counts_per_cluster( @@ -145,12 +130,12 @@ fn weights_and_counts_per_cluster( cluster_weights[cluster_id] += compact_node.weight; num_nodes_per_cluster[cluster_id] += 1; } - return Ok((cluster_weights, num_nodes_per_cluster)); + Ok((cluster_weights, num_nodes_per_cluster)) } fn unused_clusters( network: &CompactNetwork, - num_nodes_per_cluster: &Vec, + num_nodes_per_cluster: &[usize], ) -> (Vec, usize) { let size: usize = network.num_nodes() - 1; let mut unused_clusters: Vec = vec![0; size]; @@ -161,26 +146,26 @@ fn unused_clusters( num_unused_clusters += 1; } } - return (unused_clusters, num_unused_clusters); + (unused_clusters, num_unused_clusters) } fn leave_current_cluster( cluster: usize, node_weight: f64, - cluster_weights: &mut Vec, - num_nodes_per_cluster: &mut Vec, - unused_clusters: &mut Vec, + cluster_weights: &mut [f64], + num_nodes_per_cluster: &mut [usize], + unused_clusters: &mut [usize], num_unused_clusters: usize, ) -> usize { cluster_weights[cluster] -= node_weight; num_nodes_per_cluster[cluster] -= 1; - return if num_nodes_per_cluster[cluster] == 0 { + if num_nodes_per_cluster[cluster] == 0 { unused_clusters[num_unused_clusters] = cluster; num_unused_clusters + 1 } else { num_unused_clusters - }; + } } fn identify_neighboring_clusters( @@ -189,7 +174,7 @@ fn identify_neighboring_clusters( current_node: usize, current_cluster: usize, neighboring_clusters: &mut NeighboringClusters, - unused_clusters: &Vec, + unused_clusters: &[usize], num_unused_clusters: usize, ) -> Result<(), CoreError> { neighboring_clusters.reset_for_current_cluster(current_cluster); @@ -201,7 +186,7 @@ fn identify_neighboring_clusters( neighboring_clusters.increase_cluster_weight(neighbor_cluster, neighbor.edge_weight); } neighboring_clusters.freeze(); - return Ok(()); + Ok(()) } fn best_cluster_for( @@ -209,7 +194,7 @@ fn best_cluster_for( current_node_weight: f64, adjusted_resolution: f64, neighboring_clusters: &NeighboringClusters, - cluster_weights: &Vec, + cluster_weights: &[f64], ) -> usize { let mut best_cluster: usize = current_cluster; let mut max_quality_value_increment: f64 = quality_value_increment::calculate( @@ -232,14 +217,14 @@ fn best_cluster_for( max_quality_value_increment = quality_value_increment; } } - return best_cluster; + best_cluster } fn join_cluster( cluster: usize, node_weight: f64, - cluster_weights: &mut Vec, - num_nodes_per_cluster: &mut Vec, + cluster_weights: &mut [f64], + num_nodes_per_cluster: &mut [usize], num_unused_clusters: &mut usize, last_unused_cluster: usize, ) { @@ -263,7 +248,7 @@ fn trigger_cluster_change( work_queue.push_back(neighbor.id); } } - return Ok(()); + Ok(()) } #[cfg(test)] diff --git a/packages/network_partitions/src/leiden/full_network_work_queue.rs b/packages/network_partitions/src/leiden/full_network_work_queue.rs index 91c8a8a..ad905c9 100644 --- a/packages/network_partitions/src/leiden/full_network_work_queue.rs +++ b/packages/network_partitions/src/leiden/full_network_work_queue.rs @@ -2,17 +2,19 @@ // Licensed under the MIT license. use crate::errors::CoreError; -use crate::log; use std::collections::VecDeque; use rand::Rng; /// The FullNetworkWorkQueue is a composite class of a circular work queue and a vec of bools indicating /// when a node should be treated as stable +/// /// Node stability is a prerequisite for being added to the work queue, for if it is unstable /// it likewise means it is already on the work queue. +/// /// On `pop_front()`, presuming the work queue is not empty, the value is retrieved from the work queue, /// and immediately marked as stable; this guarantees consistency within this object. +/// /// If a recoverable error occurs while processing this item, the onus is on the user to /// reinsert the item via `push_front`. #[derive(Debug, PartialEq)] @@ -24,10 +26,10 @@ pub struct FullNetworkWorkQueue { impl FullNetworkWorkQueue { #[allow(dead_code)] pub fn new() -> FullNetworkWorkQueue { - return FullNetworkWorkQueue { + FullNetworkWorkQueue { work_queue: VecDeque::new(), stable: Vec::new(), - }; + } } /// Generates a random order from [0..len) in the work queue, and initializes the stability @@ -40,6 +42,7 @@ impl FullNetworkWorkQueue { /// > This avoids reallocating where possible, but the conditions for that are strict, and subject /// > to change, and so shouldn't be relied upon unless the Vec came from From> /// > and hasn't been reallocated. + /// /// However, creation of this item is called infrequently, and our worst case scenario is 2 O(n)s /// instead of 1 O(n). We'll use the speed boost now, but this may be worth looking into for /// speed sake periodically to verify that the actual current Rust impl of the `From` trait for @@ -60,12 +63,10 @@ impl FullNetworkWorkQueue { for i in 0..len { stable.push(false); let random_index: usize = rng.gen_range(0..len); - let old_value: usize = permutation[i]; - permutation[i] = permutation[random_index]; - permutation[random_index] = old_value; + permutation.swap(i, random_index); } let work_queue: VecDeque = VecDeque::from(permutation); - return FullNetworkWorkQueue { work_queue, stable }; + FullNetworkWorkQueue { work_queue, stable } } /// Presuming the work queue contains a value, pops it from that queue, marks the node as stable, @@ -74,7 +75,7 @@ impl FullNetworkWorkQueue { pub fn pop_front(&mut self) -> Result { let front: usize = self.work_queue.pop_front().ok_or(CoreError::QueueError)?; self.stable[front] = true; - return Ok(front); + Ok(front) } /// If the item to be added to the work queue is not already on it, add it to the queue @@ -87,7 +88,6 @@ impl FullNetworkWorkQueue { if self.stable.len() <= item { // increase the size to at least include item+1, and set all of the values to be stable // this shouldn't be happening, and if it is, I need to know about it - log!("We had to resize the FullNetworkWorkQueue's stability array from {} to {}. This is unexpected.", self.stable.len(), item+1); self.stable.resize(item + 1, true); } if self.stable[item] { @@ -97,12 +97,12 @@ impl FullNetworkWorkQueue { } pub fn is_empty(&self) -> bool { - return self.work_queue.is_empty(); + self.work_queue.is_empty() } #[allow(dead_code)] pub fn len(&self) -> usize { - return self.work_queue.len(); + self.work_queue.len() } } diff --git a/packages/network_partitions/src/leiden/hierarchical.rs b/packages/network_partitions/src/leiden/hierarchical.rs index d81e12f..0e4de1e 100644 --- a/packages/network_partitions/src/leiden/hierarchical.rs +++ b/packages/network_partitions/src/leiden/hierarchical.rs @@ -1,9 +1,8 @@ use rand::Rng; -use super::leiden::leiden; +use super::leiden_clustering::leiden; use crate::clustering::{ClusterItem, Clustering}; use crate::errors::CoreError; -use crate::log; use crate::network::prelude::*; use std::collections::HashSet; use std::collections::VecDeque; @@ -54,10 +53,10 @@ impl HierarchicalClustering { } cluster_range.push((range_start, hierarchical.len())); - return HierarchicalClustering { + HierarchicalClustering { hierarchical_clusterings: hierarchical, cluster_range, - }; + } } pub fn insert_subnetwork_clustering( @@ -100,7 +99,7 @@ impl HierarchicalClustering { self.hierarchical_clusterings[old_hierarchical_cluster_entry].is_final_cluster = false; } - return final_cluster_id; + final_cluster_id } } @@ -113,7 +112,7 @@ impl OrderedClustering for Clustering { let mut ordered_cluster_items: Vec = self.into_iter().collect(); ordered_cluster_items .sort_by(|a, b| a.cluster.cmp(&b.cluster).then(a.node_id.cmp(&b.node_id))); - return ordered_cluster_items; + ordered_cluster_items } } @@ -140,8 +139,6 @@ where use_modularity, )?; - log!("First clustering completed."); - let mut hierarchical_clustering: HierarchicalClustering = HierarchicalClustering::new(&updated_clustering); let mut work_queue: VecDeque = VecDeque::new(); @@ -156,11 +153,6 @@ where max_cluster_size, use_modularity, ) { - log!( - "Cluster {} contains more than {} values and will be added to the work queue", - subnetwork.id, - max_cluster_size - ); work_queue.push_back(HierarchicalWork { subnetwork: subnetwork.subnetwork, parent_cluster: subnetwork.id, @@ -184,10 +176,6 @@ where if subnetwork_clustering.next_cluster_id() == 1 { // we couldn't break this cluster down any further. clusters_that_did_not_split.insert(work_item.parent_cluster); - log!( - "Cluster {} did not split so we will not re-process it.", - work_item.parent_cluster - ); } else { hierarchical_clustering.insert_subnetwork_clustering( &subnetwork, @@ -204,7 +192,6 @@ where } if work_queue.is_empty() { - log!("Level {} complete, seeking any other clusters larger than {} size for further refinement", level, max_cluster_size); level += 1; let nodes_by_cluster: Vec> = updated_clustering.nodes_per_cluster(); for subnetwork in network.subnetworks_iter( @@ -225,10 +212,5 @@ where } } } - log!( - "Unable to break down {} clusters, {:?}", - clusters_that_did_not_split.len(), - clusters_that_did_not_split - ); - return Ok(hierarchical_clustering.hierarchical_clusterings); + Ok(hierarchical_clustering.hierarchical_clusterings) } diff --git a/packages/network_partitions/src/leiden/leiden.rs b/packages/network_partitions/src/leiden/leiden_clustering.rs similarity index 88% rename from packages/network_partitions/src/leiden/leiden.rs rename to packages/network_partitions/src/leiden/leiden_clustering.rs index 9e15474..281c898 100644 --- a/packages/network_partitions/src/leiden/leiden.rs +++ b/packages/network_partitions/src/leiden/leiden_clustering.rs @@ -8,9 +8,7 @@ use rand::Rng; use crate::clustering::{ClusterItem, Clustering}; use crate::errors::CoreError; -use crate::log; use crate::network::prelude::*; -use crate::progress_meter; use crate::resolution::adjust_resolution; use super::full_network_clustering; @@ -81,21 +79,12 @@ where } let mut clustering: Clustering = - clustering.unwrap_or(Clustering::as_self_clusters(network.num_nodes().clone())); + clustering.unwrap_or(Clustering::as_self_clusters(network.num_nodes())); guarantee_clustering_sanity(network, &mut clustering)?; let mut improved: bool = false; - log!( - "Running Leiden with the maximization function {} for {} iterations over a network with {} nodes and {} edges with a total edge weight of {} and total node weight of {}", - if use_modularity { "modularity" } else { "cpm" }, - iterations, - &network.num_nodes(), - &network.num_edges(), - &network.total_edge_weight(), - &network.total_node_weight(), - ); for _i in 0..iterations { improved |= improve_clustering( network, @@ -107,7 +96,7 @@ where )?; } - return Ok((improved, clustering)); + Ok((improved, clustering)) } /// This function will be executed repeatedly as per number_iterations @@ -130,13 +119,7 @@ where rng, )?; - log!( - "Full network clustering completed, determined there should be {} clusters for {} nodes", - &clustering.next_cluster_id(), - &clustering.num_nodes() - ); - - if clustering.next_cluster_id() < network.num_nodes().clone() { + if clustering.next_cluster_id() < network.num_nodes() { // given the updated clustering, generate subnetworks for each cluster comprised solely of the // nodes in that cluster, then fast, low-fidelity cluster the subnetworks, merging the results // back into the primary clustering before returning @@ -155,7 +138,6 @@ where SubnetworkClusteringGenerator::with_capacity(max_subnetwork_size as usize); for item in subnetworks_iterator { - progress_meter!("{}% complete", item.id, num_subnetworks); if num_nodes_per_cluster[item.id] == 1 && item.subnetwork.num_nodes() == 0 { // this is a singleton cluster, and cannot move from what it previously was. // the subnetwork actually has no information about the nodes in it, because we don't @@ -193,14 +175,6 @@ where induced_clustering_network.num_nodes(), ); - log!( - "Induced network with {} nodes and {} edges with a total edge weight of {} and total node weight of {}", - &induced_clustering_network.num_nodes(), - &induced_clustering_network.num_edges(), - &induced_clustering_network.total_edge_weight(), - &induced_clustering_network.total_node_weight(), - ); - improved |= improve_clustering( &induced_clustering_network, &mut induced_network_clustering, @@ -211,7 +185,7 @@ where )?; clustering.merge_clustering(&induced_network_clustering); } - return Ok(improved); + Ok(improved) } fn initial_clustering_for_induced( @@ -220,18 +194,18 @@ fn initial_clustering_for_induced( ) -> Clustering { // Create an initial clustering for the induced network based on the non-refined clustering let mut clusters_induced_network: Vec = Vec::with_capacity(num_nodes); - for num_nodes_per_induced_cluster_index in 0..num_nodes_per_cluster_induced_network.len() { + for (num_nodes_per_induced_cluster_index, repetitions) in + num_nodes_per_cluster_induced_network.iter().enumerate() + { // fill num_nodes_per_induced_cluster_index into positions from clusters_induced_network_index to clusters_induced_network_index + num_nodes_per_cluster_reduced_network[num_nodes_per_induced_cluster_index] - let repetitions: usize = - num_nodes_per_cluster_induced_network[num_nodes_per_induced_cluster_index]; clusters_induced_network - .extend(iter::repeat(num_nodes_per_induced_cluster_index).take(repetitions)); + .extend(iter::repeat(num_nodes_per_induced_cluster_index).take(*repetitions)); } let next_cluster_id: usize = match clusters_induced_network.last() { - Some(largest_cluster) => largest_cluster.clone() + 1, + Some(largest_cluster) => *largest_cluster + 1, None => 0, }; - return Clustering::as_defined(clusters_induced_network, next_cluster_id); + Clustering::as_defined(clusters_induced_network, next_cluster_id) } fn guarantee_clustering_sanity( @@ -252,11 +226,11 @@ fn guarantee_clustering_sanity( let mut cluster_membership: HashMap> = HashMap::new(); for ClusterItem { node_id, cluster } in clustering.into_iter() { let cluster_members: &mut HashSet = - cluster_membership.entry(cluster).or_insert(HashSet::new()); + cluster_membership.entry(cluster).or_default(); cluster_members.insert(node_id); } - for (_cluster, cluster_members) in &cluster_membership { + for cluster_members in cluster_membership.values() { if cluster_members.len() > 1 { // we are only trying to move non-singletons if they don't have a possible connection for cluster_member in cluster_members { @@ -270,7 +244,7 @@ fn guarantee_clustering_sanity( } } } - return Ok(()); + Ok(()) } #[cfg(test)] diff --git a/packages/network_partitions/src/leiden/mod.rs b/packages/network_partitions/src/leiden/mod.rs index 6ecba9b..e2beedc 100644 --- a/packages/network_partitions/src/leiden/mod.rs +++ b/packages/network_partitions/src/leiden/mod.rs @@ -2,12 +2,12 @@ // Licensed under the MIT license. pub use self::hierarchical::{hierarchical_leiden, HierarchicalCluster}; -pub use self::leiden::leiden; +pub use self::leiden_clustering::leiden; mod full_network_clustering; mod full_network_work_queue; mod hierarchical; -mod leiden; +mod leiden_clustering; mod neighboring_clusters; mod quality_value_increment; mod subnetwork; diff --git a/packages/network_partitions/src/leiden/neighboring_clusters.rs b/packages/network_partitions/src/leiden/neighboring_clusters.rs index 54506e3..20efae5 100644 --- a/packages/network_partitions/src/leiden/neighboring_clusters.rs +++ b/packages/network_partitions/src/leiden/neighboring_clusters.rs @@ -11,26 +11,23 @@ pub struct NeighboringClusters { impl NeighboringClusters { pub fn with_capacity(length: usize) -> NeighboringClusters { - return NeighboringClusters { + NeighboringClusters { neighboring_clusters: Vec::with_capacity(length + 1), neighbor_edge_weights_within_cluster: vec![f64::NAN; length + 1], current_cluster: None, - }; + } } pub fn reset_for_current_cluster( &mut self, current_cluster: usize, ) { - match self.current_cluster { - Some(current_cluster) => { - self.neighbor_edge_weights_within_cluster[current_cluster] = f64::NAN; - for cluster in &self.neighboring_clusters { - self.neighbor_edge_weights_within_cluster[*cluster] = f64::NAN; - } - self.neighboring_clusters.clear(); + if let Some(current_cluster) = self.current_cluster { + self.neighbor_edge_weights_within_cluster[current_cluster] = f64::NAN; + for cluster in &self.neighboring_clusters { + self.neighbor_edge_weights_within_cluster[*cluster] = f64::NAN; } - None => {} + self.neighboring_clusters.clear(); } self.current_cluster = Some(current_cluster); } @@ -50,13 +47,10 @@ impl NeighboringClusters { pub fn freeze(&mut self) { // only set the weight for the current cluster if no other neighbors belong to it. - match self.current_cluster { - Some(current_cluster) => { - if self.neighbor_edge_weights_within_cluster[current_cluster].is_nan() { - self.neighbor_edge_weights_within_cluster[current_cluster] = 0_f64; - } + if let Some(current_cluster) = self.current_cluster { + if self.neighbor_edge_weights_within_cluster[current_cluster].is_nan() { + self.neighbor_edge_weights_within_cluster[current_cluster] = 0_f64; } - None => {} } } @@ -64,10 +58,10 @@ impl NeighboringClusters { &self, cluster: usize, ) -> f64 { - return self.neighbor_edge_weights_within_cluster[cluster]; + self.neighbor_edge_weights_within_cluster[cluster] } pub fn iter(&self) -> Iter { - return self.neighboring_clusters.iter(); + self.neighboring_clusters.iter() } } diff --git a/packages/network_partitions/src/leiden/quality_value_increment.rs b/packages/network_partitions/src/leiden/quality_value_increment.rs index 9a43e6b..1c390e6 100644 --- a/packages/network_partitions/src/leiden/quality_value_increment.rs +++ b/packages/network_partitions/src/leiden/quality_value_increment.rs @@ -11,7 +11,7 @@ pub fn as_reference_impl( cluster_weight: f64, adjusted_resolution: f64, ) -> f64 { - return cluster_edge_weights - (node_weight * cluster_weight * adjusted_resolution); + cluster_edge_weights - (node_weight * cluster_weight * adjusted_resolution) } #[allow(dead_code)] // there will inherently be dead code in this module @@ -21,5 +21,5 @@ pub fn as_paper( cluster_weight: f64, adjusted_resolution: f64, ) -> f64 { - return cluster_edge_weights - ((node_weight + cluster_weight) * adjusted_resolution); + cluster_edge_weights - ((node_weight + cluster_weight) * adjusted_resolution) } diff --git a/packages/network_partitions/src/leiden/subnetwork.rs b/packages/network_partitions/src/leiden/subnetwork.rs index 3b305b0..0bd6ff5 100644 --- a/packages/network_partitions/src/leiden/subnetwork.rs +++ b/packages/network_partitions/src/leiden/subnetwork.rs @@ -31,13 +31,13 @@ impl SubnetworkClusteringGenerator { let neighboring_cluster_edge_weights: Vec = Vec::with_capacity(capacity); let singleton_clusters: Vec = Vec::with_capacity(capacity); let summed_qvi_records: Vec = Vec::with_capacity(capacity); - return SubnetworkClusteringGenerator { + SubnetworkClusteringGenerator { node_processing_order, neighboring_clusters, neighboring_cluster_edge_weights, singleton_clusters, summed_qvi_records, - }; + } } pub fn subnetwork_clustering( @@ -104,7 +104,7 @@ impl SubnetworkClusteringGenerator { let chosen_cluster: usize = best_cluster_for_node( node, subnetwork.node(node).weight, - &neighboring_clusters, + neighboring_clusters, neighboring_cluster_edge_weights, &cluster_weights, &external_edge_weight_per_cluster, @@ -138,7 +138,7 @@ impl SubnetworkClusteringGenerator { if improved { clustering.remove_empty_clusters(); } - return Ok(clustering); + Ok(clustering) } fn subnetwork_reset( @@ -163,30 +163,27 @@ impl SubnetworkClusteringGenerator { for i in 0..length { let random_index: usize = rng.gen_range(0..length); - let old_value: usize = self.node_processing_order[i]; - self.node_processing_order[i] = self.node_processing_order[random_index]; - self.node_processing_order[random_index] = old_value; + self.node_processing_order.swap(i, random_index); } } } fn node_can_move( node: usize, - cluster_weights: &Vec, - external_edge_weight_per_cluster: &Vec, + cluster_weights: &[f64], + external_edge_weight_per_cluster: &[f64], total_node_weight: f64, - singleton_clusters: &Vec, + singleton_clusters: &[bool], adjusted_resolution: f64, ) -> bool { let connectivity_threshold: f64 = cluster_weights[node] * (total_node_weight - cluster_weights[node]) * adjusted_resolution; - return singleton_clusters[node] - && external_edge_weight_per_cluster[node] >= connectivity_threshold; + singleton_clusters[node] && external_edge_weight_per_cluster[node] >= connectivity_threshold } fn node_reset( neighboring_clusters: &mut Vec, - neighboring_cluster_edge_weights: &mut Vec, + neighboring_cluster_edge_weights: &mut [f64], summed_qvi_records: &mut Vec, node: usize, ) { @@ -201,10 +198,10 @@ fn node_reset( fn best_cluster_for_node( node: usize, node_weight: f64, - neighboring_clusters: &Vec, - neighboring_cluster_edge_weights: &mut Vec, - cluster_weights: &Vec, - external_edge_weight_per_cluster: &Vec, + neighboring_clusters: &[usize], + neighboring_cluster_edge_weights: &mut [f64], + cluster_weights: &[f64], + external_edge_weight_per_cluster: &[f64], total_node_weight: f64, summed_qvi_records: &mut Vec, adjusted_resolution: f64, @@ -258,12 +255,12 @@ where } else { best_cluster }; - return chosen_cluster; + chosen_cluster } /// Approximate the .exp() function, and more importantly, reduce the amount of times we will get infinite return values fn approximate_exponent(result: f64) -> f64 { - return if result < -256_f64 { + if result < -256_f64 { 0_f64 } else { let mut result = 1_f64 + result / 256_f64; @@ -276,5 +273,5 @@ fn approximate_exponent(result: f64) -> f64 { result *= result; result *= result; result - }; + } } diff --git a/packages/network_partitions/src/lib.rs b/packages/network_partitions/src/lib.rs index 3bdcfe7..55fa254 100644 --- a/packages/network_partitions/src/lib.rs +++ b/packages/network_partitions/src/lib.rs @@ -4,7 +4,6 @@ pub mod clustering; pub mod errors; pub mod leiden; -pub mod macros; pub mod network; pub mod quality; pub mod random_vector; diff --git a/packages/network_partitions/src/macros.rs b/packages/network_partitions/src/macros.rs deleted file mode 100644 index 5199505..0000000 --- a/packages/network_partitions/src/macros.rs +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) Microsoft Corporation. -// Licensed under the MIT license. - -#[macro_export] -macro_rules! log { - ($message:expr) => {{ - #[cfg(feature = "logging")] - { - use chrono::Local; - println!("{}: {}", Local::now().format("%H:%M:%S%.3f"), $message); - //println!($message); - } - }}; - ($fmt:expr, $($args:tt)*) => {{ - #[cfg(feature = "logging")] - { - use chrono::Local; - let message = format!($fmt, $($args)*); - println!("{}: {}", Local::now().format("%H:%M:%S%.3f"), message); - //println!($fmt, $($args)*); - } - }}; -} - -#[macro_export] -macro_rules! progress_meter { - ($fmt: expr, $current_work_index: expr, $total_work_length: expr) => {{ - #[cfg(feature = "logging")] - { - if $current_work_index == $total_work_length - 1 { - log!($fmt, "100"); - } else { - let ten_percent: f64 = ($total_work_length as f64 / 10_f64).ceil(); - if $current_work_index as f64 % ten_percent - > ($current_work_index + 1) as f64 % ten_percent - { - let numerator: f64 = ($current_work_index + 1) as f64; - let denominator: f64 = $total_work_length as f64; - - let decile: f64 = (numerator / denominator * 10_f64).floor() * 10_f64; - log!($fmt, decile); - } - } - } - }}; -} diff --git a/packages/network_partitions/src/network/compact_network.rs b/packages/network_partitions/src/network/compact_network.rs index d02dc51..5fbf59d 100644 --- a/packages/network_partitions/src/network/compact_network.rs +++ b/packages/network_partitions/src/network/compact_network.rs @@ -14,6 +14,7 @@ /// second collection /// - A second vector whose indices are the EdgeIds (usize) and whose values are a tuple of /// (NodeId, weight (f64)). +/// /// The second vector's entries make two guarantees: that all of the neighbors for a given node /// will be continuous, and that the neighbors will be sorted in ascending order as per NodeId. /// @@ -95,11 +96,11 @@ impl<'a> CompactNodeItem<'a> { // make the neighbor iterator here, not on compactnodeitem creation let neighbor_range: Range = self.compact_network.neighbor_range(self.id); let neighbor_start: ConnectionId = neighbor_range.start; - return NeighborIterator { + NeighborIterator { compact_network: self.compact_network, neighbor_range, current_neighbor: neighbor_start, - }; + } } } @@ -130,11 +131,11 @@ impl CompactNetwork { neighbors: Vec, total_self_links_edge_weight: f64, ) -> CompactNetwork { - return CompactNetwork { + CompactNetwork { nodes, neighbors, total_self_links_edge_weight, - }; + } } fn neighbor_range( @@ -155,11 +156,11 @@ impl CompactNetwork { id: CompactNodeId, ) -> CompactNodeItem { let weight: &f64 = &self.nodes[id].0; - return CompactNodeItem { + CompactNodeItem { id, weight: *weight, compact_network: self, - }; + } } pub fn neighbors_for( @@ -168,35 +169,34 @@ impl CompactNetwork { ) -> NeighborIterator { let neighbor_range: Range = self.neighbor_range(id); let neighbor_start: ConnectionId = neighbor_range.start; - return NeighborIterator { + NeighborIterator { compact_network: self, neighbor_range, current_neighbor: neighbor_start, - }; + } } pub fn node_weight( &self, id: CompactNodeId, ) -> f64 { - return self.nodes[id].0; + self.nodes[id].0 } pub fn node_weights(&self) -> Vec { - return self.nodes.iter().map(|(weight, _)| *weight).collect(); + self.nodes.iter().map(|(weight, _)| *weight).collect() } pub fn total_edge_weight_per_node(&self) -> Vec { // when using modularity, this should return the exact same as node_weights. - return self - .nodes + self.nodes .iter() .map(|(_, node_id)| { self.neighbors_for(*node_id) .map(|neighbor| neighbor.edge_weight) .sum::() }) - .collect(); + .collect() } pub fn subnetworks_iter<'a, 'b>( @@ -207,20 +207,20 @@ impl CompactNetwork { ) -> SubnetworkIterator<'a, 'b> { let clustering: Clustering = clustering.clone(); - return SubnetworkIterator { + SubnetworkIterator { compact_supernetwork: self, clustering, clustered_nodes: nodes_by_cluster, current_clustered_nodes_index: 0, builder: LabeledNetworkBuilder::new(), subnetwork_minimum_size, - }; + } } pub fn filtered_subnetworks<'a>( &'a self, clustering: &'a Clustering, - nodes_by_cluster: &'a Vec>, + nodes_by_cluster: &'a [Vec], subnetwork_minimum_size: u32, use_modularity: bool, ) -> impl Iterator> + 'a { @@ -233,7 +233,7 @@ impl CompactNetwork { nodes_in_cluster.len() >= subnetwork_minimum_size as usize }) .map(move |(cluster_id, nodes_in_cluster)| { - let subnetwork_edges = nodes_in_cluster.into_iter().flat_map(|node| { + let subnetwork_edges = nodes_in_cluster.iter().flat_map(|node| { self.neighbors_for(*node) .filter(|neighbor| clustering[neighbor.id] == cluster_id) .map(move |neighbor| (*node, neighbor.id, neighbor.edge_weight)) @@ -245,7 +245,7 @@ impl CompactNetwork { id: cluster_id, } }); - return subnetwork_iterator; + subnetwork_iterator } pub fn induce_clustering_network( @@ -268,7 +268,7 @@ impl CompactNetwork { } else { *cluster_to_cluster_edges .entry(node_cluster) - .or_insert(HashMap::new()) + .or_default() .entry(neighbor_cluster) .or_insert(0_f64) += neighbor.edge_weight; } @@ -278,11 +278,15 @@ impl CompactNetwork { let mut cluster_nodes: Vec = Vec::with_capacity(clustering.next_cluster_id()); let mut cluster_neighbors: Vec = Vec::new(); - for cluster in 0..clustering.next_cluster_id() { - cluster_nodes.push((cluster_weights[cluster], cluster_neighbors.len())); + for (cluster, cluster_weight) in cluster_weights + .iter() + .take(clustering.next_cluster_id()) + .enumerate() + { + cluster_nodes.push((*cluster_weight, cluster_neighbors.len())); let mut neighbors: Vec<(&usize, &f64)> = cluster_to_cluster_edges .entry(cluster) - .or_insert(HashMap::new()) + .or_default() .iter() .collect(); neighbors.sort_unstable_by(|a, b| a.0.cmp(b.0)); @@ -298,34 +302,33 @@ impl CompactNetwork { cluster_total_self_links_edge_weight, ); - return Ok(induced); + Ok(induced) } } impl NetworkDetails for CompactNetwork { fn num_nodes(&self) -> usize { - return self.nodes.len(); + self.nodes.len() } fn num_edges(&self) -> usize { - return (self.neighbors.len() as f64 / 2_f64) as usize; + (self.neighbors.len() as f64 / 2_f64) as usize } fn total_node_weight(&self) -> f64 { - return self.nodes.iter().map(|node| node.0).sum::(); + self.nodes.iter().map(|node| node.0).sum::() } fn total_edge_weight(&self) -> f64 { - return self - .neighbors + self.neighbors .iter() .map(|neighbor| neighbor.1) .sum::() - / 2_f64; + / 2_f64 } fn total_self_links_edge_weight(&self) -> f64 { - return self.total_self_links_edge_weight; + self.total_self_links_edge_weight } } @@ -334,10 +337,10 @@ impl<'a> IntoIterator for &'a CompactNetwork { type IntoIter = NodeIterator<'a>; fn into_iter(self) -> Self::IntoIter { - return NodeIterator { - compact_network: &self, + NodeIterator { + compact_network: self, current_node: 0, - }; + } } } @@ -349,13 +352,13 @@ pub struct NodeIterator<'a> { impl<'a> Iterator for NodeIterator<'a> { type Item = CompactNodeItem<'a>; fn next(&mut self) -> Option { - return if self.current_node == self.compact_network.nodes.len() { + if self.current_node == self.compact_network.nodes.len() { None } else { let item = self.compact_network.node(self.current_node); self.current_node += 1; Some(item) - }; + } } } @@ -366,11 +369,11 @@ pub struct NeighborIterator<'a> { current_neighbor: ConnectionId, } -impl<'a> Iterator for NeighborIterator<'a> { +impl Iterator for NeighborIterator<'_> { type Item = CompactNeighborItem; fn next(&mut self) -> Option { - return if self.neighbor_range.contains(&self.current_neighbor) { + if self.neighbor_range.contains(&self.current_neighbor) { let (neighbor_id, edge_weight) = self.compact_network.neighbors[self.current_neighbor]; let item = CompactNeighborItem { connection_id: self.current_neighbor, @@ -382,7 +385,7 @@ impl<'a> Iterator for NeighborIterator<'a> { Some(item) } else { None - }; + } } } @@ -395,7 +398,7 @@ pub struct SubnetworkIterator<'a, 'b> { subnetwork_minimum_size: Option, } -impl<'a, 'b> Iterator for SubnetworkIterator<'a, 'b> { +impl Iterator for SubnetworkIterator<'_, '_> { type Item = CompactSubnetworkItem; fn next(&mut self) -> Option { let next_valid_position: Option = match self.subnetwork_minimum_size { @@ -425,7 +428,7 @@ impl<'a, 'b> Iterator for SubnetworkIterator<'a, 'b> { } } }; - return match next_valid_position { + match next_valid_position { Some(current) => { self.current_clustered_nodes_index = current; @@ -461,7 +464,7 @@ impl<'a, 'b> Iterator for SubnetworkIterator<'a, 'b> { self.current_clustered_nodes_index = self.clustered_nodes.len(); None } - }; + } } } diff --git a/packages/network_partitions/src/network/identifier.rs b/packages/network_partitions/src/network/identifier.rs index b8125be..ef9c598 100644 --- a/packages/network_partitions/src/network/identifier.rs +++ b/packages/network_partitions/src/network/identifier.rs @@ -9,6 +9,15 @@ pub struct Identifier { new_to_original: Vec, } +impl Default for Identifier +where + T: Clone + Hash + Eq, +{ + fn default() -> Self { + Self::new() + } +} + impl Identifier where T: Clone + Hash + Eq, @@ -19,14 +28,14 @@ where original_to_new: map, new_to_original: Vec::new(), }; - return id; + id } pub fn identify( &mut self, original: T, ) -> usize { - return match self.original_to_new.get(&original) { + match self.original_to_new.get(&original) { Some(id) => *id, None => { let new_id: usize = self.new_to_original.len(); @@ -34,11 +43,11 @@ where self.new_to_original.push(original); new_id } - }; + } } pub fn identity_map(&self) -> Vec { - return self.new_to_original.clone(); + self.new_to_original.clone() } pub fn clear(&mut self) { @@ -53,6 +62,6 @@ where self.original_to_new.clear(); self.new_to_original.clear(); - return (id_to_label, label_to_id); + (id_to_label, label_to_id) } } diff --git a/packages/network_partitions/src/network/labeled_network.rs b/packages/network_partitions/src/network/labeled_network.rs index f538d50..747276b 100644 --- a/packages/network_partitions/src/network/labeled_network.rs +++ b/packages/network_partitions/src/network/labeled_network.rs @@ -22,23 +22,23 @@ pub struct LabeledNetwork { impl NetworkDetails for LabeledNetwork { fn num_nodes(&self) -> usize { - return self.network_structure.num_nodes(); + self.network_structure.num_nodes() } fn num_edges(&self) -> usize { - return self.network_structure.num_edges(); + self.network_structure.num_edges() } fn total_node_weight(&self) -> f64 { - return self.network_structure.total_node_weight(); + self.network_structure.total_node_weight() } fn total_edge_weight(&self) -> f64 { - return self.network_structure.total_edge_weight(); + self.network_structure.total_edge_weight() } fn total_self_links_edge_weight(&self) -> f64 { - return self.network_structure.total_self_links_edge_weight(); + self.network_structure.total_self_links_edge_weight() } } @@ -47,6 +47,15 @@ pub struct LabeledNetworkBuilder { identifier: Identifier, } +impl Default for LabeledNetworkBuilder +where + T: Clone + Eq + Hash + PartialEq + std::cmp::PartialEq, +{ + fn default() -> Self { + Self::new() + } +} + impl LabeledNetworkBuilder where T: Clone + Eq + Hash + PartialEq + std::cmp::PartialEq, @@ -56,7 +65,7 @@ where node_to_neighbors: HashMap::new(), identifier: Identifier::new(), }; - return builder; + builder } pub fn with_capacity(size: usize) -> Self { @@ -64,7 +73,7 @@ where node_to_neighbors: HashMap::with_capacity(size), identifier: Identifier::new(), }; - return builder; + builder } pub fn build( @@ -93,12 +102,12 @@ where let target_id: CompactNodeId = self.identifier.identify(target); node_to_neighbors .entry(source_id) - .or_insert(HashMap::new()) + .or_default() .entry(target_id) .or_insert(weight); node_to_neighbors .entry(target_id) - .or_insert(HashMap::new()) + .or_default() .entry(source_id) .or_insert(weight); } @@ -108,11 +117,8 @@ where let mut total_self_links_edge_weight: f64 = 0_f64; for node_id in 0..id_to_labels.len() { let mut node_weight: f64 = 0_f64; // we are going to set the node_weight as the summation of edge weights regardless of whether we're using modularity or CPM, but if we are using CPM we won't bother to use it. - let mut node_neighbors: Vec<(&CompactNodeId, &f64)> = node_to_neighbors - .get(&node_id) - .unwrap() - .into_iter() - .collect(); + let mut node_neighbors: Vec<(&CompactNodeId, &f64)> = + node_to_neighbors.get(&node_id).unwrap().iter().collect(); let neighbor_start: usize = neighbors.len(); node_neighbors.sort_by(|a, b| a.0.cmp(b.0)); for (neighbor_id, edge_weight) in node_neighbors { @@ -136,7 +142,7 @@ where network_structure: compact_network, }; - return labeled_network; + labeled_network } } @@ -148,27 +154,26 @@ where /// that the edges provided are already in sorted source order (e.g. all edges from A to /// all appear sequentially in the list. /// So we must collect and guarantee that behavior with this function. - pub fn compact(&self) -> &CompactNetwork { - return &self.network_structure; + &self.network_structure } pub fn compact_id_for( &self, id: T, ) -> Option { - return self.labels_to_id.get(&id).cloned(); + self.labels_to_id.get(&id).cloned() } pub fn label_for( &self, compact_id: CompactNodeId, ) -> &T { - return &self.id_to_labels[compact_id]; + &self.id_to_labels[compact_id] } pub fn labeled_ids(&self) -> impl Iterator + '_ { - return self.id_to_labels.iter().enumerate(); + self.id_to_labels.iter().enumerate() } pub fn load_from( @@ -211,7 +216,7 @@ where let labeled_network: LabeledNetwork = builder.build(edges.into_iter(), use_modularity); - return Ok(labeled_network); + Ok(labeled_network) } } diff --git a/packages/network_partitions/src/network/network_builder.rs b/packages/network_partitions/src/network/network_builder.rs index c2ef624..98c5869 100644 --- a/packages/network_partitions/src/network/network_builder.rs +++ b/packages/network_partitions/src/network/network_builder.rs @@ -202,25 +202,7 @@ impl NetworkBuilder { mut self, edges: Vec<(String, String, f64)>, ) -> NetworkBuilder { - #[cfg(feature = "logging")] - let mut index: usize = 0; - #[cfg(feature = "logging")] - let edge_len: usize = edges.len(); - #[cfg(feature = "logging")] - let step: usize = (edge_len as f64 / 10_f64).floor() as usize; for (source, target, weight) in edges { - #[cfg(feature = "logging")] - { - // this way of logging is required because we're actually doing some non logging - index += 1; - if index % step == 0 { - log!( - "Added {} edges of {} total to the network builder", - index, - edge_len - ); - } - } self = self.add_edge(source, target, weight); } return self; diff --git a/packages/network_partitions/src/quality.rs b/packages/network_partitions/src/quality.rs index be1e7fb..85a461f 100644 --- a/packages/network_partitions/src/quality.rs +++ b/packages/network_partitions/src/quality.rs @@ -35,5 +35,5 @@ pub fn quality( quality /= 2_f64 * network.total_edge_weight() + network.total_self_links_edge_weight(); - return Ok(quality); + Ok(quality) } diff --git a/packages/network_partitions/src/random_vector.rs b/packages/network_partitions/src/random_vector.rs index 7cf4023..a0a5c65 100644 --- a/packages/network_partitions/src/random_vector.rs +++ b/packages/network_partitions/src/random_vector.rs @@ -19,10 +19,8 @@ where for i in 0..length { let random_index: usize = rng.gen_range(0..length); - let old_value: usize = permutation[i]; - permutation[i] = permutation[random_index]; - permutation[random_index] = old_value; + permutation.swap(i, random_index); } - return permutation; + permutation } diff --git a/packages/network_partitions/src/resolution.rs b/packages/network_partitions/src/resolution.rs index 7168c5c..4766f56 100644 --- a/packages/network_partitions/src/resolution.rs +++ b/packages/network_partitions/src/resolution.rs @@ -20,7 +20,7 @@ pub fn adjust_resolution( use_modularity: bool, ) -> f64 { let resolution: f64 = resolution.unwrap_or(DEFAULT_RESOLUTION); - return if use_modularity { + if use_modularity { // Note: this is adjusted from the version @ // https://github.com/CWTSLeiden/networkanalysis/blob/master/src/cwts/networkanalysis/run/RunNetworkClustering.java#L331 // which seems to be a bug since this resolution factor when used for modularity is @@ -29,5 +29,5 @@ pub fn adjust_resolution( / (2_f64 * (network.total_edge_weight() + network.total_self_links_edge_weight())) } else { resolution - }; + } } diff --git a/packages/network_partitions/src/safe_vectors.rs b/packages/network_partitions/src/safe_vectors.rs index 14a23e0..b5a8fea 100644 --- a/packages/network_partitions/src/safe_vectors.rs +++ b/packages/network_partitions/src/safe_vectors.rs @@ -26,14 +26,14 @@ impl SafeVectors for Vec { &self, index: usize, ) -> bool { - return index < self.len(); + index < self.len() } fn is_valid_range( &self, index: usize, ) -> bool { - return index <= self.len(); + index <= self.len() } fn get_or_err( @@ -41,6 +41,6 @@ impl SafeVectors for Vec { index: usize, err: CoreError, ) -> Result { - return self.get(index).cloned().ok_or(err); + self.get(index).cloned().ok_or(err) } } diff --git a/packages/pyo3/Cargo.toml b/packages/pyo3/Cargo.toml index 88e5999..d542bf8 100644 --- a/packages/pyo3/Cargo.toml +++ b/packages/pyo3/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "graspologic_native" -version = "1.2.1" +version = "1.2.2" authors = ["daxpryce@microsoft.com"] edition = "2018" license = "MIT" @@ -15,11 +15,8 @@ crate-type = ["rlib","cdylib"] rand = "0.8" rand_xorshift = "0.3" network_partitions = { path = "../network_partitions" } -chrono = { version = "0.4", optional = true } [dependencies.pyo3] -version = "0.15.1" -features = ["extension-module", "abi3-py36"] - -logging = ["network_partitions/logging", "chrono"] +version = "0.23" +features = ["extension-module", "abi3-py38"] diff --git a/packages/pyo3/pyproject.toml b/packages/pyo3/pyproject.toml index 7015b27..11c93a5 100644 --- a/packages/pyo3/pyproject.toml +++ b/packages/pyo3/pyproject.toml @@ -1,27 +1,35 @@ [project] name = "graspologic-native" -maintainer = "Dax Pryce" -maintainer-email = "daxpryce@microsoft.com" -requires-python = ">=3.6,<3.13" +authors = [ + {name = "Dax Pryce", email = "daxpryce@microsoft.com"} +] +maintainers = [ + {name = "Dax Pryce", email = "daxpryce@microsoft.com"} +] dynamic = ["version"] -classifier = [ +requires-python = ">=3.8,<3.14" +classifiers = [ "Development Status :: 5 - Production/Stable", "License :: OSI Approved :: MIT License", "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: 3.6", - "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Topic :: Scientific/Engineering :: Mathematics"] [project.urls] -Github = "https://github.com/microsoft/graspologic-native" -Graspologic = "https://github.com/microsoft/graspologic" - +Github = "https://github.com/graspologic-org/graspologic-native" +Graspologic = "https://github.com/graspologic-org/graspologic" [build-system] requires = ["maturin>=1.4,<2.0"] build-backend = "maturin" + +[tool.uv] +dev-dependencies = [ + "ipython>=8.12.3", + "networkx>=3,<4", +] diff --git a/packages/pyo3/src/lib.rs b/packages/pyo3/src/lib.rs index 4521f1b..2749b4d 100644 --- a/packages/pyo3/src/lib.rs +++ b/packages/pyo3/src/lib.rs @@ -8,15 +8,10 @@ mod mediator; use std::collections::{HashMap, HashSet}; -use pyo3::exceptions::{PyTypeError, PyValueError}; use pyo3::prelude::*; -use pyo3::{create_exception, wrap_pyfunction, wrap_pymodule, PyObjectProtocol}; - -use pyo3::types::{PyDict, PyInt, PyList, PyString, PyTuple}; use network_partitions::clustering::Clustering; use network_partitions::errors::CoreError; -use network_partitions::log; use network_partitions::network::prelude::*; use network_partitions::quality; @@ -45,32 +40,21 @@ impl HierarchicalCluster { .parent_cluster .map(|level| level.to_string()) .unwrap_or("None".into()); - return Ok(format!( + Ok(format!( "HierarchicalCluster(node=\"{}\", cluster=\"{}\", level={}, parent_cluster={}, is_final_cluster={})", self.node, self.cluster, self.level, parent, self.is_final_cluster, - )); + )) } fn __str__(&self) -> PyResult { - return self.__repr__(); + self.__repr__() } } -#[pyfunction( - "/", - resolution = "1.0", - randomness = "0.001", - iterations = "1", - use_modularity = "true", - trials = "1" -)] -#[pyo3( - text_signature = "(edges, /, starting_communities, resolution, randomness, iterations, use_modularity, seed, trials)" -)] /// Leiden is a global network partitioning algorithm. Given a list of edges and a maximization /// function, it will iterate through the network attempting to find an optimal partitioning of /// the entire network. @@ -107,6 +91,8 @@ impl HierarchicalCluster { /// :raises InternalNetworkIndexingError: An internal algorithm error. Please report with reproduction steps. /// :raises ParameterRangeError: One of the parameters provided did not meet the requirements in the documentation. /// :raises UnsafeInducementError: An internal algorithm error. Please report with reproduction steps. +#[pyfunction] +#[pyo3(signature=(edges, /, starting_communities=None, resolution=1.0, randomness=0.001, iterations=1, use_modularity=true, seed=None, trials=1))] fn leiden( py: Python, edges: Vec, @@ -118,13 +104,6 @@ fn leiden( seed: Option, trials: u64, ) -> PyResult<(f64, HashMap)> { - #[cfg(feature = "logging")] - use std::time::Instant; - #[cfg(feature = "logging")] - let now: Instant = Instant::now(); - - log!("pyo3 converted {} edges from Python's representation to a Vec<(String, String, f64)> representation at {:?}", edges.len(), now); - let result: Result<(f64, HashMap), PyLeidenError> = py.allow_threads(move || { mediator::leiden( @@ -138,20 +117,9 @@ fn leiden( trials, ) }); - return result.map_err(|err| PyErr::from(err)); + result.map_err(PyErr::from) } -#[pyfunction( - "/", - resolution = "1.0", - randomness = "0.001", - iterations = "1", - use_modularity = "true", - max_cluster_size = "1000" -)] -#[pyo3( - text_signature = "(edges, /, starting_communities, resolution, randomness, iterations, use_modularity, max_cluster_size, seed)" -)] /// Hierarchical leiden builds upon the leiden function by further breaking down exceptionally large clusters. /// /// The process followed is to run leiden the first time, then each cluster with membership @@ -201,6 +169,8 @@ fn leiden( /// :raises InternalNetworkIndexingError: An internal algorithm error. Please report with reproduction steps. /// :raises ParameterRangeError: One of the parameters provided did not meet the requirements in the documentation. /// :raises UnsafeInducementError: An internal algorithm error. Please report with reproduction steps. +#[pyfunction] +#[pyo3(signature=(edges, /, starting_communities=None, resolution=1.0, randomness=0.001, iterations=1, use_modularity=true, max_cluster_size=1000, seed=None))] fn hierarchical_leiden( py: Python, edges: Vec, @@ -212,13 +182,6 @@ fn hierarchical_leiden( max_cluster_size: u32, seed: Option, ) -> PyResult> { - #[cfg(feature = "logging")] - use std::time::Instant; - #[cfg(feature = "logging")] - let now: Instant = Instant::now(); - - log!("pyo3 converted {} edges from Python's representation to a Vec<(String, String, f64)> representation at {:?}", edges.len(), now); - let result: Result, PyLeidenError> = py.allow_threads(move || { mediator::hierarchical_leiden( edges, @@ -231,11 +194,9 @@ fn hierarchical_leiden( seed, ) }); - return result.map_err(|err| PyErr::from(err)); + result.map_err(PyErr::from) } -#[pyfunction("/", resolution = "1.0")] -#[pyo3(text_signature = "(edges, communities, /, resolution)")] /// Measures the modularity for a global partitioning of a network described by a list of edges. /// /// :param edges: A list of edges, defined with the source and target encoded as strings and the edge weight being a float. @@ -243,12 +204,14 @@ fn hierarchical_leiden( /// :param communities: An optional initial mapping of nodes to their community. Note that /// this function does require that all nodes in the edge list have a community and nodes in the /// community dictionary exist as a node in the provided edge list. The community values must -/// also be a non negative number. +/// also be a non-negative number. /// :type communities: Dict[str, int] /// :param float resolution: Default is `1.0`. Higher resolution values lead to more communities and /// lower resolution values leads to fewer communities. Must be greater than 0. /// :return: The modularity of the community partitioning provided for the network. /// :rtype: float +#[pyfunction] +#[pyo3(signature=(edges, communities, /, resolution=1.0))] fn modularity( py: Python, edges: Vec, @@ -258,15 +221,15 @@ fn modularity( let result: Result = py.allow_threads(move || mediator::modularity(edges, communities, resolution)); - return result.map_err(|err| PyErr::from(err)); + result.map_err(PyErr::from) } /// graspologic_native currently supports global network partitioning via the Leiden University /// algorithm described by https://arxiv.org/abs/1810.08473 #[pymodule] fn graspologic_native( - py: Python, - module: &PyModule, + py: Python<'_>, + module: &Bound<'_, PyModule>, ) -> PyResult<()> { module.add_class::()?; module.add_wrapped(wrap_pyfunction!(leiden))?; diff --git a/packages/pyo3/src/mediator.rs b/packages/pyo3/src/mediator.rs index c22b76d..f7d9e42 100644 --- a/packages/pyo3/src/mediator.rs +++ b/packages/pyo3/src/mediator.rs @@ -6,7 +6,6 @@ use std::collections::{HashMap, HashSet}; use network_partitions::clustering::Clustering; use network_partitions::errors::CoreError; use network_partitions::leiden; -use network_partitions::log; use network_partitions::network::prelude::*; use network_partitions::quality; use network_partitions::safe_vectors::SafeVectors; @@ -27,16 +26,9 @@ pub fn leiden( seed: Option, trials: u64, ) -> Result<(f64, HashMap), PyLeidenError> { - log!( - "Building a LabeledNetwork for quality measured by {}", - if use_modularity { "modularity" } else { "CPM" } - ); - log!("Adding {} edges to network builder", edges.len()); - let mut builder: LabeledNetworkBuilder = LabeledNetworkBuilder::new(); let labeled_network: LabeledNetwork = builder.build(edges.into_iter(), use_modularity); - log!("Network built from edges"); let initial_clustering: Option = match starting_communities { Some(starting_communities) => Some(communities_to_clustering( &labeled_network, @@ -45,8 +37,6 @@ pub fn leiden( None => None, }; - log!("Mapped any starting communities from a dictionary into a clustering"); - let mut rng: XorShiftRng = match seed { Some(seed) => XorShiftRng::seed_from_u64(seed), None => XorShiftRng::from_entropy(), @@ -68,7 +58,6 @@ pub fn leiden( use_modularity, )?; - log!("Completed leiden process"); let quality_score: f64 = quality::quality( compact_network, &clustering, @@ -80,13 +69,9 @@ pub fn leiden( best_clustering = Some(clustering); } } - - log!("Calculated quality score"); let clustering: HashMap = map_from(&labeled_network, &best_clustering.unwrap())?; - log!("Mapped the clustering back to a dictionary: {:?}"); - - return Ok((best_quality_score, clustering)); + Ok((best_quality_score, clustering)) } pub fn modularity( @@ -103,7 +88,7 @@ pub fn modularity( Some(resolution), true, )?; - return Ok(quality); + Ok(quality) } pub fn hierarchical_leiden( @@ -116,16 +101,9 @@ pub fn hierarchical_leiden( max_cluster_size: u32, seed: Option, ) -> Result, PyLeidenError> { - log!( - "Building a LabeledNetwork for quality measured by {}", - if use_modularity { "modularity" } else { "CPM" } - ); - log!("Adding {} edges to network builder", edges.len()); - let mut builder: LabeledNetworkBuilder = LabeledNetworkBuilder::new(); let labeled_network: LabeledNetwork = builder.build(edges.into_iter(), use_modularity); - log!("Network built from edges"); let clustering: Option = match starting_communities { Some(starting_communities) => Some(communities_to_clustering( &labeled_network, @@ -133,15 +111,11 @@ pub fn hierarchical_leiden( )?), None => None, }; - - log!("Mapped any starting communities from a dictionary into a clustering"); - let mut rng: XorShiftRng = match seed { Some(seed) => XorShiftRng::seed_from_u64(seed), None => XorShiftRng::from_entropy(), }; - log!("Running hierarchical leiden over a network of {} nodes, {} edges, with a max_cluster_size of {}", labeled_network.num_nodes(), labeled_network.num_edges(), max_cluster_size); let compact_network: &CompactNetwork = labeled_network.compact(); let internal_clusterings: Vec = leiden::hierarchical_leiden( compact_network, @@ -154,8 +128,6 @@ pub fn hierarchical_leiden( max_cluster_size, )?; - log!("Completed hierarchical leiden process"); - let mut hierarchical_clustering: Vec = Vec::with_capacity(internal_clusterings.len()); for internal in internal_clusterings { @@ -169,7 +141,7 @@ pub fn hierarchical_leiden( }); } - return Ok(hierarchical_clustering); + Ok(hierarchical_clustering) } fn map_from( @@ -181,7 +153,7 @@ fn map_from( let node_name = network.label_for(item.node_id); map.insert(node_name.into(), item.cluster); } - return Ok(map); + Ok(map) } fn communities_to_clustering( @@ -219,5 +191,5 @@ fn communities_to_clustering( // and compress any gaps clustering.remove_empty_clusters(); - return Ok(clustering); + Ok(clustering) } diff --git a/packages/pyo3/uv.lock b/packages/pyo3/uv.lock new file mode 100644 index 0000000..34bf690 --- /dev/null +++ b/packages/pyo3/uv.lock @@ -0,0 +1,242 @@ +version = 1 +requires-python = ">=3.8, <3.14" + +[[package]] +name = "appnope" +version = "0.1.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/35/5d/752690df9ef5b76e169e68d6a129fa6d08a7100ca7f754c89495db3c6019/appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee", size = 4170 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/81/29/5ecc3a15d5a33e31b26c11426c45c501e439cb865d0bff96315d86443b78/appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c", size = 4321 }, +] + +[[package]] +name = "asttokens" +version = "3.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/4a/e7/82da0a03e7ba5141f05cce0d302e6eed121ae055e0456ca228bf693984bc/asttokens-3.0.0.tar.gz", hash = "sha256:0dcd8baa8d62b0c1d118b399b2ddba3c4aff271d0d7a9e0d4c1681c79035bbc7", size = 61978 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/25/8a/c46dcc25341b5bce5472c718902eb3d38600a903b14fa6aeecef3f21a46f/asttokens-3.0.0-py3-none-any.whl", hash = "sha256:e3078351a059199dd5138cb1c706e6430c05eff2ff136af5eb4790f9d28932e2", size = 26918 }, +] + +[[package]] +name = "backcall" +version = "0.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a2/40/764a663805d84deee23043e1426a9175567db89c8b3287b5c2ad9f71aa93/backcall-0.2.0.tar.gz", hash = "sha256:5cbdbf27be5e7cfadb448baf0aa95508f91f2bbc6c6437cd9cd06e2a4c215e1e", size = 18041 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/1c/ff6546b6c12603d8dd1070aa3c3d273ad4c07f5771689a7b69a550e8c951/backcall-0.2.0-py2.py3-none-any.whl", hash = "sha256:fbbce6a29f263178a1f7915c1940bde0ec2b2a967566fe1c65c1dfb7422bd255", size = 11157 }, +] + +[[package]] +name = "colorama" +version = "0.4.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335 }, +] + +[[package]] +name = "decorator" +version = "5.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/0c/8d907af351aa16b42caae42f9d6aa37b900c67308052d10fdce809f8d952/decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330", size = 35016 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d5/50/83c593b07763e1161326b3b8c6686f0f4b0f24d5526546bee538c89837d6/decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186", size = 9073 }, +] + +[[package]] +name = "executing" +version = "2.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/8c/e3/7d45f492c2c4a0e8e0fad57d081a7c8a0286cdd86372b070cca1ec0caa1e/executing-2.1.0.tar.gz", hash = "sha256:8ea27ddd260da8150fa5a708269c4a10e76161e2496ec3e587da9e3c0fe4b9ab", size = 977485 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b5/fd/afcd0496feca3276f509df3dbd5dae726fcc756f1a08d9e25abe1733f962/executing-2.1.0-py2.py3-none-any.whl", hash = "sha256:8d63781349375b5ebccc3142f4b30350c0cd9c79f921cde38be2be4637e98eaf", size = 25805 }, +] + +[[package]] +name = "graspologic-native" +version = "1.2.2" +source = { editable = "." } + +[package.dev-dependencies] +dev = [ + { name = "ipython" }, + { name = "networkx" }, +] + +[package.metadata] + +[package.metadata.requires-dev] +dev = [ + { name = "ipython", specifier = ">=8.12.3" }, + { name = "networkx", specifier = ">=3,<4" }, +] + +[[package]] +name = "ipython" +version = "8.12.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "appnope", marker = "sys_platform == 'darwin'" }, + { name = "backcall" }, + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "decorator" }, + { name = "jedi" }, + { name = "matplotlib-inline" }, + { name = "pexpect", marker = "sys_platform != 'win32'" }, + { name = "pickleshare" }, + { name = "prompt-toolkit" }, + { name = "pygments" }, + { name = "stack-data" }, + { name = "traitlets" }, + { name = "typing-extensions", marker = "python_full_version < '3.10'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9e/6a/44ef299b1762f5a73841e87fae8a73a8cc8aee538d6dc8c77a5afe1fd2ce/ipython-8.12.3.tar.gz", hash = "sha256:3910c4b54543c2ad73d06579aa771041b7d5707b033bd488669b4cf544e3b363", size = 5470171 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8d/97/8fe103906cd81bc42d3b0175b5534a9f67dccae47d6451131cf8d0d70bb2/ipython-8.12.3-py3-none-any.whl", hash = "sha256:b0340d46a933d27c657b211a329d0be23793c36595acf9e6ef4164bc01a1804c", size = 798307 }, +] + +[[package]] +name = "jedi" +version = "0.19.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "parso" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/72/3a/79a912fbd4d8dd6fbb02bf69afd3bb72cf0c729bb3063c6f4498603db17a/jedi-0.19.2.tar.gz", hash = "sha256:4770dc3de41bde3966b02eb84fbcf557fb33cce26ad23da12c742fb50ecb11f0", size = 1231287 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c0/5a/9cac0c82afec3d09ccd97c8b6502d48f165f9124db81b4bcb90b4af974ee/jedi-0.19.2-py2.py3-none-any.whl", hash = "sha256:a8ef22bde8490f57fe5c7681a3c83cb58874daf72b4784de3cce5b6ef6edb5b9", size = 1572278 }, +] + +[[package]] +name = "matplotlib-inline" +version = "0.1.7" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "traitlets" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/99/5b/a36a337438a14116b16480db471ad061c36c3694df7c2084a0da7ba538b7/matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90", size = 8159 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/8e/9ad090d3553c280a8060fbf6e24dc1c0c29704ee7d1c372f0c174aa59285/matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca", size = 9899 }, +] + +[[package]] +name = "networkx" +version = "3.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/99/f9/d45c9ecf50a6b67a200e0bbd324201b5cd777dfc0e6c8f6d1620ce5a7ada/networkx-3.0.tar.gz", hash = "sha256:9a9992345353618ae98339c2b63d8201c381c2944f38a2ab49cb45a4c667e412", size = 1987075 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/11/eb/929b1a04b1778f4dd606c739c93c134306e4a31012e31e184c8308f3d985/networkx-3.0-py3-none-any.whl", hash = "sha256:58058d66b1818043527244fab9d41a51fcd7dcc271748015f3c181b8a90c8e2e", size = 2043929 }, +] + +[[package]] +name = "parso" +version = "0.8.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/66/94/68e2e17afaa9169cf6412ab0f28623903be73d1b32e208d9e8e541bb086d/parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d", size = 400609 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/c6/ac/dac4a63f978e4dcb3c6d3a78c4d8e0192a113d288502a1216950c41b1027/parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18", size = 103650 }, +] + +[[package]] +name = "pexpect" +version = "4.9.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "ptyprocess" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/92/cc564bf6381ff43ce1f4d06852fc19a2f11d180f23dc32d9588bee2f149d/pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f", size = 166450 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772 }, +] + +[[package]] +name = "pickleshare" +version = "0.7.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d8/b6/df3c1c9b616e9c0edbc4fbab6ddd09df9535849c64ba51fcb6531c32d4d8/pickleshare-0.7.5.tar.gz", hash = "sha256:87683d47965c1da65cdacaf31c8441d12b8044cdec9aca500cd78fc2c683afca", size = 6161 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/41/220f49aaea88bc6fa6cba8d05ecf24676326156c23b991e80b3f2fc24c77/pickleshare-0.7.5-py2.py3-none-any.whl", hash = "sha256:9649af414d74d4df115d5d718f82acb59c9d418196b7b4290ed47a12ce62df56", size = 6877 }, +] + +[[package]] +name = "prompt-toolkit" +version = "3.0.48" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wcwidth" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/2d/4f/feb5e137aff82f7c7f3248267b97451da3644f6cdc218edfe549fb354127/prompt_toolkit-3.0.48.tar.gz", hash = "sha256:d6623ab0477a80df74e646bdbc93621143f5caf104206aa29294d53de1a03d90", size = 424684 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/6a/fd08d94654f7e67c52ca30523a178b3f8ccc4237fce4be90d39c938a831a/prompt_toolkit-3.0.48-py3-none-any.whl", hash = "sha256:f49a827f90062e411f1ce1f854f2aedb3c23353244f8108b89283587397ac10e", size = 386595 }, +] + +[[package]] +name = "ptyprocess" +version = "0.7.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/20/e5/16ff212c1e452235a90aeb09066144d0c5a6a8c0834397e03f5224495c4e/ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220", size = 70762 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/22/a6/858897256d0deac81a172289110f31629fc4cee19b6f01283303e18c8db3/ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35", size = 13993 }, +] + +[[package]] +name = "pure-eval" +version = "0.2.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/05/0a34433a064256a578f1783a10da6df098ceaa4a57bbeaa96a6c0352786b/pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42", size = 19752 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842 }, +] + +[[package]] +name = "pygments" +version = "2.19.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/7c/2d/c3338d48ea6cc0feb8446d8e6937e1408088a72a39937982cc6111d17f84/pygments-2.19.1.tar.gz", hash = "sha256:61c16d2a8576dc0649d9f39e089b5f02bcd27fba10d8fb4dcc28173f7a45151f", size = 4968581 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8a/0b/9fcc47d19c48b59121088dd6da2488a49d5f72dacf8262e2790a1d2c7d15/pygments-2.19.1-py3-none-any.whl", hash = "sha256:9ea1544ad55cecf4b8242fab6dd35a93bbce657034b0611ee383099054ab6d8c", size = 1225293 }, +] + +[[package]] +name = "stack-data" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asttokens" }, + { name = "executing" }, + { name = "pure-eval" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/28/e3/55dcc2cfbc3ca9c29519eb6884dd1415ecb53b0e934862d3559ddcb7e20b/stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9", size = 44707 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f1/7b/ce1eafaf1a76852e2ec9b22edecf1daa58175c090266e9f6c64afcd81d91/stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695", size = 24521 }, +] + +[[package]] +name = "traitlets" +version = "5.14.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/eb/79/72064e6a701c2183016abbbfedaba506d81e30e232a68c9f0d6f6fcd1574/traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7", size = 161621 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/00/c0/8f5d070730d7836adc9c9b6408dec68c6ced86b304a9b26a14df072a6e8c/traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f", size = 85359 }, +] + +[[package]] +name = "typing-extensions" +version = "4.12.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/df/db/f35a00659bc03fec321ba8bce9420de607a1d37f8342eee1863174c69557/typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8", size = 85321 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/26/9f/ad63fc0248c5379346306f8668cda6e2e2e9c95e01216d2b8ffd9ff037d0/typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d", size = 37438 }, +] + +[[package]] +name = "wcwidth" +version = "0.2.13" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6c/63/53559446a878410fc5a5974feb13d31d78d752eb18aeba59c7fef1af7598/wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5", size = 101301 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fd/84/fd2ba7aafacbad3c4201d395674fc6348826569da3c0937e75505ead3528/wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859", size = 34166 }, +]