Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Change generate-copyright to generate HTML, with cargo dependencies included #128353

47 changes: 44 additions & 3 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1406,8 +1406,11 @@ name = "generate-copyright"
version = "0.1.0"
dependencies = [
"anyhow",
"cargo_metadata 0.18.1",
"rinja 0.2.0",
"serde",
"serde_json",
"thiserror",
]

[[package]]
Expand Down Expand Up @@ -3097,14 +3100,43 @@ dependencies = [
"walkdir",
]

[[package]]
name = "rinja"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2d47a46d7729e891c8accf260e9daa02ae6d570aa2a94fb1fb27eb5364a2323"
dependencies = [
"humansize",
"num-traits",
"percent-encoding",
"rinja_derive 0.2.0",
]

[[package]]
name = "rinja"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d3762e3740cdbf2fd2be465cc2c26d643ad17353cc2e0223d211c1b096118bd"
dependencies = [
"itoa",
"rinja_derive",
"rinja_derive 0.3.0",
]

[[package]]
name = "rinja_derive"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44dae9afe59d58ed8d988d67d1945f3638125d2fd2104058399382e11bd3ea2a"
dependencies = [
"basic-toml",
"mime",
"mime_guess",
"once_map",
"proc-macro2",
"quote",
"rinja_parser 0.2.0",
"serde",
"syn 2.0.67",
]

[[package]]
Expand All @@ -3120,11 +3152,20 @@ dependencies = [
"once_map",
"proc-macro2",
"quote",
"rinja_parser",
"rinja_parser 0.3.0",
"serde",
"syn 2.0.67",
]

[[package]]
name = "rinja_parser"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b1771c78cd5d3b1646ef8d8f2ed100db936e8b291d3cc06e92a339ff346858c"
dependencies = [
"nom",
]

[[package]]
name = "rinja_parser"
version = "0.3.0"
Expand Down Expand Up @@ -4603,7 +4644,7 @@ dependencies = [
"minifier",
"pulldown-cmark 0.9.6",
"regex",
"rinja",
"rinja 0.3.0",
"rustdoc-json-types",
"serde",
"serde_json",
Expand Down
2 changes: 1 addition & 1 deletion REUSE.toml
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ SPDX-License-Identifier = "MIT OR Apache-2.0"
path = "src/llvm-project/**"
precedence = "override"
SPDX-FileCopyrightText = [
"2003-2019 by the contributors listed in [CREDITS.TXT](https://github.com/rust-lang/llvm-project/blob/7738295178045041669876bf32b0543ec8319a5c/llvm/CREDITS.TXT)",
"2003-2019 by the contributors listed in CREDITS.TXT (https://github.com/rust-lang/llvm-project/blob/7738295178045041669876bf32b0543ec8319a5c/llvm/CREDITS.TXT)",
"2010 Apple Inc",
"2003-2019 University of Illinois at Urbana-Champaign.",
]
Expand Down
4 changes: 3 additions & 1 deletion src/bootstrap/src/core/build_steps/run.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,13 @@ impl Step for GenerateCopyright {
let license_metadata = builder.ensure(CollectLicenseMetadata);

// Temporary location, it will be moved to the proper one once it's accurate.
let dest = builder.out.join("COPYRIGHT.md");
let dest = builder.out.join("COPYRIGHT.html");

let mut cmd = builder.tool_cmd(Tool::GenerateCopyright);
cmd.env("LICENSE_METADATA", &license_metadata);
cmd.env("DEST", &dest);
cmd.env("OUT_DIR", &builder.out);
cmd.env("CARGO", &builder.initial_cargo);
cmd.run(builder);

dest
Expand Down
2 changes: 2 additions & 0 deletions src/tools/collect-license-metadata/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
name = "collect-license-metadata"
version = "0.1.0"
edition = "2021"
description = "Runs the reuse tool and caches the output, so rust toolchain devs don't need to have reuse installed"
license = "MIT OR Apache-2.0"

[dependencies]
anyhow = "1.0.65"
Expand Down
5 changes: 5 additions & 0 deletions src/tools/collect-license-metadata/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,11 @@ use anyhow::Error;

use crate::licenses::LicensesInterner;

/// The entry point to the binary.
///
/// You should probably let `bootstrap` execute this program instead of running it directly.
///
/// Run `x.py run collect-license-metadata`
fn main() -> Result<(), Error> {
let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into();
let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into();
Expand Down
4 changes: 4 additions & 0 deletions src/tools/generate-copyright/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,14 @@
name = "generate-copyright"
version = "0.1.0"
edition = "2021"
description = "Produces a manifest of all the copyrighted materials in the Rust Toolchain"

# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
anyhow = "1.0.65"
cargo_metadata = "0.18.1"
rinja = "0.2.0"
serde = { version = "1.0.147", features = ["derive"] }
serde_json = "1.0.85"
thiserror = "1"
191 changes: 191 additions & 0 deletions src/tools/generate-copyright/src/cargo_metadata.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,191 @@
//! Gets metadata about a workspace from Cargo

use std::collections::BTreeMap;
use std::ffi::OsStr;
use std::path::{Path, PathBuf};

/// Describes how this module can fail
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("I/O Error: {0:?}")]
Io(#[from] std::io::Error),
#[error("Failed get output from cargo-metadata: {0:?}")]
GettingMetadata(#[from] cargo_metadata::Error),
#[error("Failed to run cargo vendor: {0:?}")]
LaunchingVendor(std::io::Error),
#[error("Failed to complete cargo vendor")]
RunningVendor,
#[error("Bad path {0:?} whilst scraping files")]
Scraping(PathBuf),
}

/// Uniquely describes a package on crates.io
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct Package {
/// The name of the package
pub name: String,
/// The version number
pub version: String,
}

/// Extra data about a package
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct PackageMetadata {
/// The license it is under
pub license: String,
/// The list of authors from the package metadata
pub authors: Vec<String>,
/// A list of important files from the package, with their contents.
///
/// This includes *COPYRIGHT*, *NOTICE*, *AUTHOR*, *LICENSE*, and *LICENCE* files, case-insensitive.
pub notices: BTreeMap<String, String>,
/// If this is true, this dep is in the Rust Standard Library
pub is_in_libstd: Option<bool>,
}

/// Use `cargo metadata` and `cargo vendor` to get a list of dependencies and their license data.
///
/// This will involve running `cargo vendor` into `${BUILD}/vendor` so we can
/// grab the license files.
///
/// Any dependency with a path beginning with `root_path` is ignored, as we
/// assume `reuse` has covered it already.
pub fn get_metadata_and_notices(
cargo: &Path,
dest: &Path,
root_path: &Path,
manifest_paths: &[&Path],
) -> Result<BTreeMap<Package, PackageMetadata>, Error> {
let mut output = get_metadata(cargo, root_path, manifest_paths)?;

// Now do a cargo-vendor and grab everything
let vendor_path = dest.join("vendor");
println!("Vendoring deps into {}...", vendor_path.display());
run_cargo_vendor(cargo, &vendor_path, manifest_paths)?;

// Now for each dependency we found, go and grab any important looking files
for (package, metadata) in output.iter_mut() {
load_important_files(package, metadata, &vendor_path)?;
}

Ok(output)
}

/// Use `cargo metadata` to get a list of dependencies and their license data.
///
/// Any dependency with a path beginning with `root_path` is ignored, as we
/// assume `reuse` has covered it already.
pub fn get_metadata(
cargo: &Path,
root_path: &Path,
manifest_paths: &[&Path],
) -> Result<BTreeMap<Package, PackageMetadata>, Error> {
let mut output = BTreeMap::new();
// Look at the metadata for each manifest
for manifest_path in manifest_paths {
if manifest_path.file_name() != Some(OsStr::new("Cargo.toml")) {
panic!("cargo_manifest::get requires a path to a Cargo.toml file");
}
let metadata = cargo_metadata::MetadataCommand::new()
.cargo_path(cargo)
.env("RUSTC_BOOTSTRAP", "1")
.manifest_path(manifest_path)
.exec()?;
for package in metadata.packages {
let manifest_path = package.manifest_path.as_path();
if manifest_path.starts_with(root_path) {
// it's an in-tree dependency and reuse covers it
continue;
}
// otherwise it's an out-of-tree dependency
let package_id = Package { name: package.name, version: package.version.to_string() };
output.insert(
package_id,
PackageMetadata {
license: package.license.unwrap_or_else(|| String::from("Unspecified")),
authors: package.authors,
notices: BTreeMap::new(),
is_in_libstd: None,
},
);
}
}

Ok(output)
}

/// Run cargo-vendor, fetching into the given dir
fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Result<(), Error> {
let mut vendor_command = std::process::Command::new(cargo);
vendor_command.env("RUSTC_BOOTSTRAP", "1");
vendor_command.arg("vendor");
vendor_command.arg("--quiet");
vendor_command.arg("--versioned-dirs");
for manifest_path in manifest_paths {
vendor_command.arg("-s");
vendor_command.arg(manifest_path);
}
vendor_command.arg(dest);

let vendor_status = vendor_command.status().map_err(Error::LaunchingVendor)?;

if !vendor_status.success() {
return Err(Error::RunningVendor);
}

Ok(())
}

/// Add important files off disk into this dependency.
///
/// Maybe one-day Cargo.toml will contain enough information that we don't need
/// to do this manual scraping.
fn load_important_files(
package: &Package,
dep: &mut PackageMetadata,
vendor_root: &Path,
) -> Result<(), Error> {
let name_version = format!("{}-{}", package.name, package.version);
println!("Scraping notices for {}...", name_version);
let dep_vendor_path = vendor_root.join(name_version);
for entry in std::fs::read_dir(dep_vendor_path)? {
let entry = entry?;
let metadata = entry.metadata()?;
let path = entry.path();
let Some(filename) = path.file_name() else {
return Err(Error::Scraping(path));
};
let lc_filename = filename.to_ascii_lowercase();
let lc_filename_str = lc_filename.to_string_lossy();
let mut keep = false;
for m in ["copyright", "licence", "license", "author", "notice"] {
if lc_filename_str.contains(m) {
keep = true;
break;
}
}
if keep {
if metadata.is_dir() {
for inner_entry in std::fs::read_dir(entry.path())? {
let inner_entry = inner_entry?;
if inner_entry.metadata()?.is_file() {
let inner_filename = inner_entry.file_name();
let inner_filename_str = inner_filename.to_string_lossy();
let qualified_filename =
format!("{}/{}", lc_filename_str, inner_filename_str);
println!("Scraping {}", qualified_filename);
Kobzol marked this conversation as resolved.
Show resolved Hide resolved
dep.notices.insert(
qualified_filename.to_string(),
std::fs::read_to_string(inner_entry.path())?,
);
}
}
} else if metadata.is_file() {
let filename = filename.to_string_lossy();
println!("Scraping {}", filename);
dep.notices.insert(filename.to_string(), std::fs::read_to_string(path)?);
}
}
}
Ok(())
}
Loading