From 12a32fefdf3e0a8af05b577c932feed49169ef68 Mon Sep 17 00:00:00 2001 From: Jonathan Pallant Date: Mon, 15 Jul 2024 17:59:08 +0100 Subject: [PATCH] generate-copyright: Marks which deps are in the stdlib --- .../generate-copyright/src/cargo_metadata.rs | 81 ++++++++++++------- src/tools/generate-copyright/src/main.rs | 74 ++++++++--------- 2 files changed, 83 insertions(+), 72 deletions(-) diff --git a/src/tools/generate-copyright/src/cargo_metadata.rs b/src/tools/generate-copyright/src/cargo_metadata.rs index 65ee40f91609b..4d366b01045fa 100644 --- a/src/tools/generate-copyright/src/cargo_metadata.rs +++ b/src/tools/generate-copyright/src/cargo_metadata.rs @@ -1,6 +1,6 @@ //! Gets metadata about a workspace from Cargo -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::BTreeMap; use std::ffi::{OsStr, OsString}; use std::path::Path; @@ -23,13 +23,18 @@ pub enum Error { RunningVendor, } -/// Describes one of our dependencies +/// Uniquely describes a package on crates.io #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct Dependency { +pub struct Package { /// The name of the package pub name: String, /// The version number pub version: String, +} + +/// Extra data about a package +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct PackageMetadata { /// The license it is under pub license: String, /// The list of authors from the package metadata @@ -40,20 +45,44 @@ pub struct Dependency { pub notices: BTreeMap, } -/// Use `cargo` to get a list of dependencies and their license data. +/// Use `cargo metadata` and `cargo vendor` to get a list of dependencies and their license data. /// /// This will involve running `cargo vendor` into `${BUILD}/vendor` so we can /// grab the license files. /// /// Any dependency with a path beginning with `root_path` is ignored, as we /// assume `reuse` has covered it already. -pub fn get( +pub fn get_metadata_and_notices( cargo: &Path, dest: &Path, root_path: &Path, manifest_paths: &[&Path], -) -> Result, Error> { - let mut temp_set = BTreeSet::new(); +) -> Result, Error> { + let mut output = get_metadata(cargo, root_path, manifest_paths)?; + + // Now do a cargo-vendor and grab everything + let vendor_path = dest.join("vendor"); + println!("Vendoring deps into {}...", vendor_path.display()); + run_cargo_vendor(cargo, &vendor_path, manifest_paths)?; + + // Now for each dependency we found, go and grab any important looking files + for (package, metadata) in output.iter_mut() { + load_important_files(package, metadata, &vendor_path)?; + } + + Ok(output) +} + +/// Use `cargo metadata` to get a list of dependencies and their license data. +/// +/// Any dependency with a path beginning with `root_path` is ignored, as we +/// assume `reuse` has covered it already. +pub fn get_metadata( + cargo: &Path, + root_path: &Path, + manifest_paths: &[&Path], +) -> Result, Error> { + let mut output = BTreeMap::new(); // Look at the metadata for each manifest for manifest_path in manifest_paths { if manifest_path.file_name() != Some(OsStr::new("Cargo.toml")) { @@ -71,7 +100,7 @@ pub fn get( .and_then(|v| v.as_str()) .map(Path::new) .ok_or_else(|| Error::MissingJsonElement("package.manifest_path"))?; - if manifest_path.starts_with(&root_path) { + if manifest_path.starts_with(root_path) { // it's an in-tree dependency and reuse covers it continue; } @@ -93,28 +122,14 @@ pub fn get( .ok_or_else(|| Error::MissingJsonElement("package.authors"))?; let authors: Vec = authors_list.iter().filter_map(|v| v.as_str()).map(|s| s.to_owned()).collect(); - temp_set.insert(Dependency { - name: name.to_owned(), - version: version.to_owned(), - license: license.to_owned(), - authors, - notices: BTreeMap::new(), - }); + let package = Package { name: name.to_owned(), version: version.to_owned() }; + output.insert( + package.clone(), + PackageMetadata { license: license.to_owned(), authors, notices: BTreeMap::new() }, + ); } } - // Now do a cargo-vendor and grab everything - let vendor_path = dest.join("vendor"); - println!("Vendoring deps into {}...", vendor_path.display()); - run_cargo_vendor(cargo, &vendor_path, manifest_paths)?; - - // Now for each dependency we found, go and grab any important looking files - let mut output = BTreeSet::new(); - for mut dep in temp_set { - load_important_files(&mut dep, &vendor_path)?; - output.insert(dep); - } - Ok(output) } @@ -128,7 +143,7 @@ fn get_metadata_json(cargo: &Path, manifest_path: &Path) -> Result Resu } vendor_command.arg(dest); - let vendor_status = vendor_command.status().map_err(|e| Error::LaunchingVendor(e))?; + let vendor_status = vendor_command.status().map_err(Error::LaunchingVendor)?; if !vendor_status.success() { return Err(Error::RunningVendor); @@ -164,8 +179,12 @@ fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Resu /// /// Maybe one-day Cargo.toml will contain enough information that we don't need /// to do this manual scraping. -fn load_important_files(dep: &mut Dependency, vendor_root: &Path) -> Result<(), Error> { - let name_version = format!("{}-{}", dep.name, dep.version); +fn load_important_files( + package: &Package, + dep: &mut PackageMetadata, + vendor_root: &Path, +) -> Result<(), Error> { + let name_version = format!("{}-{}", package.name, package.version); println!("Scraping notices for {}...", name_version); let dep_vendor_path = vendor_root.join(name_version); for entry in std::fs::read_dir(dep_vendor_path)? { diff --git a/src/tools/generate-copyright/src/main.rs b/src/tools/generate-copyright/src/main.rs index 29babd2282b1e..5d7710ba081fd 100644 --- a/src/tools/generate-copyright/src/main.rs +++ b/src/tools/generate-copyright/src/main.rs @@ -1,11 +1,11 @@ use anyhow::Error; -use std::collections::BTreeSet; +use std::collections::{BTreeMap, BTreeSet}; use std::io::Write; use std::path::{Path, PathBuf}; mod cargo_metadata; -static TOP_BOILERPLATE: &'static str = r##" +static TOP_BOILERPLATE: &str = r##" @@ -27,7 +27,7 @@ when building the Rust toolchain (including the Rust Standard Library).

"##; -static BOTTOM_BOILERPLATE: &'static str = r#" +static BOTTOM_BOILERPLATE: &str = r#" "#; @@ -53,9 +53,10 @@ fn main() -> Result<(), Error> { Path::new("./library/std/Cargo.toml"), ]; let collected_cargo_metadata = - cargo_metadata::get(&cargo, &out_dir, &root_path, &workspace_paths)?; + cargo_metadata::get_metadata_and_notices(&cargo, &out_dir, &root_path, &workspace_paths)?; - let mut license_set = BTreeSet::new(); + let stdlib_set = + cargo_metadata::get_metadata(&cargo, &root_path, &[Path::new("./library/std/Cargo.toml")])?; let mut buffer = Vec::new(); @@ -65,13 +66,13 @@ fn main() -> Result<(), Error> { buffer, r#"

In-tree files

The following licenses cover the in-tree source files that were used in this release:

"# )?; - render_tree_recursive(&collected_tree_metadata.files, &mut buffer, 0, &mut license_set)?; + render_tree_recursive(&collected_tree_metadata.files, &mut buffer)?; writeln!( buffer, r#"

Out-of-tree dependencies

The following licenses cover the out-of-tree crates that were used in this release:

"# )?; - render_deps(collected_cargo_metadata.iter(), &mut buffer, &mut license_set)?; + render_deps(&collected_cargo_metadata, &stdlib_set, &mut buffer)?; writeln!(buffer, "{}", BOTTOM_BOILERPLATE)?; @@ -82,46 +83,35 @@ fn main() -> Result<(), Error> { /// Recursively draw the tree of files/folders we found on disk and their licenses, as /// markdown, into the given Vec. -fn render_tree_recursive( - node: &Node, - buffer: &mut Vec, - depth: usize, - license_set: &mut BTreeSet, -) -> Result<(), Error> { +fn render_tree_recursive(node: &Node, buffer: &mut Vec) -> Result<(), Error> { writeln!(buffer, r#"
"#)?; match node { Node::Root { children } => { for child in children { - render_tree_recursive(child, buffer, depth, license_set)?; + render_tree_recursive(child, buffer)?; } } Node::Directory { name, children, license } => { - render_tree_license(std::iter::once(name), license.iter(), buffer, license_set)?; + render_tree_license(std::iter::once(name), license.iter(), buffer)?; if !children.is_empty() { writeln!(buffer, "

Exceptions:

")?; for child in children { - render_tree_recursive(child, buffer, depth + 1, license_set)?; + render_tree_recursive(child, buffer)?; } } } Node::CondensedDirectory { name, licenses } => { - render_tree_license(std::iter::once(name), licenses.iter(), buffer, license_set)?; + render_tree_license(std::iter::once(name), licenses.iter(), buffer)?; } Node::Group { files, directories, license } => { render_tree_license( directories.iter().chain(files.iter()), std::iter::once(license), buffer, - license_set, )?; } Node::File { name, license } => { - render_tree_license( - std::iter::once(name), - std::iter::once(license), - buffer, - license_set, - )?; + render_tree_license(std::iter::once(name), std::iter::once(license), buffer)?; } } writeln!(buffer, "
")?; @@ -134,14 +124,12 @@ fn render_tree_license<'a>( names: impl Iterator, licenses: impl Iterator, buffer: &mut Vec, - license_set: &mut BTreeSet, ) -> Result<(), Error> { // de-duplicate and sort SPDX and Copyright strings let mut spdxs = BTreeSet::new(); let mut copyrights = BTreeSet::new(); for license in licenses { spdxs.insert(&license.spdx); - license_set.insert(license.spdx.clone()); for copyright in &license.copyright { copyrights.insert(copyright); } @@ -168,34 +156,38 @@ fn render_tree_license<'a>( } /// Render a list of out-of-tree dependencies as markdown into the given Vec. -fn render_deps<'a, 'b>( - deps: impl Iterator, - buffer: &'b mut Vec, - license_set: &mut BTreeSet, +fn render_deps( + all_deps: &BTreeMap, + stdlib_set: &BTreeMap, + buffer: &mut Vec, ) -> Result<(), Error> { - for dep in deps { - let authors_list = if dep.authors.is_empty() { + for (package, metadata) in all_deps { + let authors_list = if metadata.authors.is_empty() { "None Specified".to_owned() } else { - dep.authors.join(", ") + metadata.authors.join(", ") }; - let url = format!("https://crates.io/crates/{}/{}", dep.name, dep.version); + let url = format!("https://crates.io/crates/{}/{}", package.name, package.version); writeln!(buffer)?; writeln!( buffer, r#"

📦 {name}-{version}

"#, - name = dep.name, - version = dep.version, + name = package.name, + version = package.version, )?; writeln!(buffer, r#"

URL: {url}

"#,)?; + writeln!( + buffer, + "

In libstd: {}

", + if stdlib_set.contains_key(package) { "Yes" } else { "No" } + )?; writeln!(buffer, "

Authors: {}

", escape_html(&authors_list))?; - writeln!(buffer, "

License: {}

", escape_html(&dep.license))?; - license_set.insert(dep.license.clone()); + writeln!(buffer, "

License: {}

", escape_html(&metadata.license))?; writeln!(buffer, "

Notices: ")?; - if dep.notices.is_empty() { + if metadata.notices.is_empty() { writeln!(buffer, "None")?; } else { - for (name, contents) in &dep.notices { + for (name, contents) in &metadata.notices { writeln!( buffer, "

{}", @@ -244,7 +236,7 @@ fn env_path(var: &str) -> Result { /// Escapes any invalid HTML characters fn escape_html(input: &str) -> String { - static MAPPING: [(char, &'static str); 3] = [('&', "&"), ('<', "<"), ('>', ">")]; + static MAPPING: [(char, &str); 3] = [('&', "&"), ('<', "<"), ('>', ">")]; let mut output = input.to_owned(); for (ch, s) in &MAPPING { output = output.replace(*ch, s);