From 12a32fefdf3e0a8af05b577c932feed49169ef68 Mon Sep 17 00:00:00 2001
From: Jonathan Pallant
Date: Mon, 15 Jul 2024 17:59:08 +0100
Subject: [PATCH] generate-copyright: Marks which deps are in the stdlib
---
.../generate-copyright/src/cargo_metadata.rs | 81 ++++++++++++-------
src/tools/generate-copyright/src/main.rs | 74 ++++++++---------
2 files changed, 83 insertions(+), 72 deletions(-)
diff --git a/src/tools/generate-copyright/src/cargo_metadata.rs b/src/tools/generate-copyright/src/cargo_metadata.rs
index 65ee40f91609b..4d366b01045fa 100644
--- a/src/tools/generate-copyright/src/cargo_metadata.rs
+++ b/src/tools/generate-copyright/src/cargo_metadata.rs
@@ -1,6 +1,6 @@
//! Gets metadata about a workspace from Cargo
-use std::collections::{BTreeMap, BTreeSet};
+use std::collections::BTreeMap;
use std::ffi::{OsStr, OsString};
use std::path::Path;
@@ -23,13 +23,18 @@ pub enum Error {
RunningVendor,
}
-/// Describes one of our dependencies
+/// Uniquely describes a package on crates.io
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
-pub struct Dependency {
+pub struct Package {
/// The name of the package
pub name: String,
/// The version number
pub version: String,
+}
+
+/// Extra data about a package
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
+pub struct PackageMetadata {
/// The license it is under
pub license: String,
/// The list of authors from the package metadata
@@ -40,20 +45,44 @@ pub struct Dependency {
pub notices: BTreeMap,
}
-/// Use `cargo` to get a list of dependencies and their license data.
+/// Use `cargo metadata` and `cargo vendor` to get a list of dependencies and their license data.
///
/// This will involve running `cargo vendor` into `${BUILD}/vendor` so we can
/// grab the license files.
///
/// Any dependency with a path beginning with `root_path` is ignored, as we
/// assume `reuse` has covered it already.
-pub fn get(
+pub fn get_metadata_and_notices(
cargo: &Path,
dest: &Path,
root_path: &Path,
manifest_paths: &[&Path],
-) -> Result, Error> {
- let mut temp_set = BTreeSet::new();
+) -> Result, Error> {
+ let mut output = get_metadata(cargo, root_path, manifest_paths)?;
+
+ // Now do a cargo-vendor and grab everything
+ let vendor_path = dest.join("vendor");
+ println!("Vendoring deps into {}...", vendor_path.display());
+ run_cargo_vendor(cargo, &vendor_path, manifest_paths)?;
+
+ // Now for each dependency we found, go and grab any important looking files
+ for (package, metadata) in output.iter_mut() {
+ load_important_files(package, metadata, &vendor_path)?;
+ }
+
+ Ok(output)
+}
+
+/// Use `cargo metadata` to get a list of dependencies and their license data.
+///
+/// Any dependency with a path beginning with `root_path` is ignored, as we
+/// assume `reuse` has covered it already.
+pub fn get_metadata(
+ cargo: &Path,
+ root_path: &Path,
+ manifest_paths: &[&Path],
+) -> Result, Error> {
+ let mut output = BTreeMap::new();
// Look at the metadata for each manifest
for manifest_path in manifest_paths {
if manifest_path.file_name() != Some(OsStr::new("Cargo.toml")) {
@@ -71,7 +100,7 @@ pub fn get(
.and_then(|v| v.as_str())
.map(Path::new)
.ok_or_else(|| Error::MissingJsonElement("package.manifest_path"))?;
- if manifest_path.starts_with(&root_path) {
+ if manifest_path.starts_with(root_path) {
// it's an in-tree dependency and reuse covers it
continue;
}
@@ -93,28 +122,14 @@ pub fn get(
.ok_or_else(|| Error::MissingJsonElement("package.authors"))?;
let authors: Vec =
authors_list.iter().filter_map(|v| v.as_str()).map(|s| s.to_owned()).collect();
- temp_set.insert(Dependency {
- name: name.to_owned(),
- version: version.to_owned(),
- license: license.to_owned(),
- authors,
- notices: BTreeMap::new(),
- });
+ let package = Package { name: name.to_owned(), version: version.to_owned() };
+ output.insert(
+ package.clone(),
+ PackageMetadata { license: license.to_owned(), authors, notices: BTreeMap::new() },
+ );
}
}
- // Now do a cargo-vendor and grab everything
- let vendor_path = dest.join("vendor");
- println!("Vendoring deps into {}...", vendor_path.display());
- run_cargo_vendor(cargo, &vendor_path, manifest_paths)?;
-
- // Now for each dependency we found, go and grab any important looking files
- let mut output = BTreeSet::new();
- for mut dep in temp_set {
- load_important_files(&mut dep, &vendor_path)?;
- output.insert(dep);
- }
-
Ok(output)
}
@@ -128,7 +143,7 @@ fn get_metadata_json(cargo: &Path, manifest_path: &Path) -> Result Resu
}
vendor_command.arg(dest);
- let vendor_status = vendor_command.status().map_err(|e| Error::LaunchingVendor(e))?;
+ let vendor_status = vendor_command.status().map_err(Error::LaunchingVendor)?;
if !vendor_status.success() {
return Err(Error::RunningVendor);
@@ -164,8 +179,12 @@ fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Resu
///
/// Maybe one-day Cargo.toml will contain enough information that we don't need
/// to do this manual scraping.
-fn load_important_files(dep: &mut Dependency, vendor_root: &Path) -> Result<(), Error> {
- let name_version = format!("{}-{}", dep.name, dep.version);
+fn load_important_files(
+ package: &Package,
+ dep: &mut PackageMetadata,
+ vendor_root: &Path,
+) -> Result<(), Error> {
+ let name_version = format!("{}-{}", package.name, package.version);
println!("Scraping notices for {}...", name_version);
let dep_vendor_path = vendor_root.join(name_version);
for entry in std::fs::read_dir(dep_vendor_path)? {
diff --git a/src/tools/generate-copyright/src/main.rs b/src/tools/generate-copyright/src/main.rs
index 29babd2282b1e..5d7710ba081fd 100644
--- a/src/tools/generate-copyright/src/main.rs
+++ b/src/tools/generate-copyright/src/main.rs
@@ -1,11 +1,11 @@
use anyhow::Error;
-use std::collections::BTreeSet;
+use std::collections::{BTreeMap, BTreeSet};
use std::io::Write;
use std::path::{Path, PathBuf};
mod cargo_metadata;
-static TOP_BOILERPLATE: &'static str = r##"
+static TOP_BOILERPLATE: &str = r##"
@@ -27,7 +27,7 @@ when building the Rust toolchain (including the Rust Standard Library).
"##;
-static BOTTOM_BOILERPLATE: &'static str = r#"
+static BOTTOM_BOILERPLATE: &str = r#"
"#;
@@ -53,9 +53,10 @@ fn main() -> Result<(), Error> {
Path::new("./library/std/Cargo.toml"),
];
let collected_cargo_metadata =
- cargo_metadata::get(&cargo, &out_dir, &root_path, &workspace_paths)?;
+ cargo_metadata::get_metadata_and_notices(&cargo, &out_dir, &root_path, &workspace_paths)?;
- let mut license_set = BTreeSet::new();
+ let stdlib_set =
+ cargo_metadata::get_metadata(&cargo, &root_path, &[Path::new("./library/std/Cargo.toml")])?;
let mut buffer = Vec::new();
@@ -65,13 +66,13 @@ fn main() -> Result<(), Error> {
buffer,
r#"In-tree files
The following licenses cover the in-tree source files that were used in this release:
"#
)?;
- render_tree_recursive(&collected_tree_metadata.files, &mut buffer, 0, &mut license_set)?;
+ render_tree_recursive(&collected_tree_metadata.files, &mut buffer)?;
writeln!(
buffer,
r#"Out-of-tree dependencies
The following licenses cover the out-of-tree crates that were used in this release:
"#
)?;
- render_deps(collected_cargo_metadata.iter(), &mut buffer, &mut license_set)?;
+ render_deps(&collected_cargo_metadata, &stdlib_set, &mut buffer)?;
writeln!(buffer, "{}", BOTTOM_BOILERPLATE)?;
@@ -82,46 +83,35 @@ fn main() -> Result<(), Error> {
/// Recursively draw the tree of files/folders we found on disk and their licenses, as
/// markdown, into the given Vec.
-fn render_tree_recursive(
- node: &Node,
- buffer: &mut Vec,
- depth: usize,
- license_set: &mut BTreeSet,
-) -> Result<(), Error> {
+fn render_tree_recursive(node: &Node, buffer: &mut Vec) -> Result<(), Error> {
writeln!(buffer, r#""#)?;
match node {
Node::Root { children } => {
for child in children {
- render_tree_recursive(child, buffer, depth, license_set)?;
+ render_tree_recursive(child, buffer)?;
}
}
Node::Directory { name, children, license } => {
- render_tree_license(std::iter::once(name), license.iter(), buffer, license_set)?;
+ render_tree_license(std::iter::once(name), license.iter(), buffer)?;
if !children.is_empty() {
writeln!(buffer, "
Exceptions:
")?;
for child in children {
- render_tree_recursive(child, buffer, depth + 1, license_set)?;
+ render_tree_recursive(child, buffer)?;
}
}
}
Node::CondensedDirectory { name, licenses } => {
- render_tree_license(std::iter::once(name), licenses.iter(), buffer, license_set)?;
+ render_tree_license(std::iter::once(name), licenses.iter(), buffer)?;
}
Node::Group { files, directories, license } => {
render_tree_license(
directories.iter().chain(files.iter()),
std::iter::once(license),
buffer,
- license_set,
)?;
}
Node::File { name, license } => {
- render_tree_license(
- std::iter::once(name),
- std::iter::once(license),
- buffer,
- license_set,
- )?;
+ render_tree_license(std::iter::once(name), std::iter::once(license), buffer)?;
}
}
writeln!(buffer, "
")?;
@@ -134,14 +124,12 @@ fn render_tree_license<'a>(
names: impl Iterator- ,
licenses: impl Iterator
- ,
buffer: &mut Vec,
- license_set: &mut BTreeSet,
) -> Result<(), Error> {
// de-duplicate and sort SPDX and Copyright strings
let mut spdxs = BTreeSet::new();
let mut copyrights = BTreeSet::new();
for license in licenses {
spdxs.insert(&license.spdx);
- license_set.insert(license.spdx.clone());
for copyright in &license.copyright {
copyrights.insert(copyright);
}
@@ -168,34 +156,38 @@ fn render_tree_license<'a>(
}
/// Render a list of out-of-tree dependencies as markdown into the given Vec.
-fn render_deps<'a, 'b>(
- deps: impl Iterator
- ,
- buffer: &'b mut Vec,
- license_set: &mut BTreeSet,
+fn render_deps(
+ all_deps: &BTreeMap,
+ stdlib_set: &BTreeMap,
+ buffer: &mut Vec,
) -> Result<(), Error> {
- for dep in deps {
- let authors_list = if dep.authors.is_empty() {
+ for (package, metadata) in all_deps {
+ let authors_list = if metadata.authors.is_empty() {
"None Specified".to_owned()
} else {
- dep.authors.join(", ")
+ metadata.authors.join(", ")
};
- let url = format!("https://crates.io/crates/{}/{}", dep.name, dep.version);
+ let url = format!("https://crates.io/crates/{}/{}", package.name, package.version);
writeln!(buffer)?;
writeln!(
buffer,
r#"
📦 {name}-{version}
"#,
- name = dep.name,
- version = dep.version,
+ name = package.name,
+ version = package.version,
)?;
writeln!(buffer, r#"URL: {url}
"#,)?;
+ writeln!(
+ buffer,
+ "In libstd: {}
",
+ if stdlib_set.contains_key(package) { "Yes" } else { "No" }
+ )?;
writeln!(buffer, "Authors: {}
", escape_html(&authors_list))?;
- writeln!(buffer, "License: {}
", escape_html(&dep.license))?;
- license_set.insert(dep.license.clone());
+ writeln!(buffer, "License: {}
", escape_html(&metadata.license))?;
writeln!(buffer, "Notices: ")?;
- if dep.notices.is_empty() {
+ if metadata.notices.is_empty() {
writeln!(buffer, "None")?;
} else {
- for (name, contents) in &dep.notices {
+ for (name, contents) in &metadata.notices {
writeln!(
buffer,
"{}
",
@@ -244,7 +236,7 @@ fn env_path(var: &str) -> Result {
/// Escapes any invalid HTML characters
fn escape_html(input: &str) -> String {
- static MAPPING: [(char, &'static str); 3] = [('&', "&"), ('<', "<"), ('>', ">")];
+ static MAPPING: [(char, &str); 3] = [('&', "&"), ('<', "<"), ('>', ">")];
let mut output = input.to_owned();
for (ch, s) in &MAPPING {
output = output.replace(*ch, s);