uv/crates/puffin-build/src/lib.rs

688 lines
26 KiB
Rust

//! Build wheels from source distributions
//!
//! <https://packaging.python.org/en/latest/specifications/source-distribution-format/>
use std::fmt::{Display, Formatter};
use std::io;
use std::io::BufRead;
use std::path::{Path, PathBuf};
use std::process::{Command, Output};
use std::str::FromStr;
use std::sync::Arc;
use flate2::read::GzDecoder;
use fs_err as fs;
use fs_err::{DirEntry, File};
use indoc::formatdoc;
use once_cell::sync::Lazy;
use pyproject_toml::{BuildSystem, Project};
use regex::Regex;
use serde::{Deserialize, Serialize};
use tar::Archive;
use tempfile::{tempdir, TempDir};
use thiserror::Error;
use tokio::sync::Mutex;
use tracing::{debug, instrument};
use zip::ZipArchive;
use pep508_rs::Requirement;
use puffin_interpreter::{InterpreterInfo, Virtualenv};
use puffin_traits::BuildContext;
/// e.g. `pygraphviz/graphviz_wrap.c:3020:10: fatal error: graphviz/cgraph.h: No such file or directory`
static MISSING_HEADER_RE: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r".*\.(c|c..|h|h..):\d+:\d+: fatal error: (?<header>.*\.(h|h..)): No such file or directory"
)
.unwrap()
});
#[derive(Error, Debug)]
pub enum Error {
#[error(transparent)]
IO(#[from] io::Error),
#[error("Failed to read zip file")]
Zip(#[from] zip::result::ZipError),
#[error("Unsupported archive format (extension not recognized): {0}")]
UnsupportedArchiveType(String),
#[error("Invalid source distribution: {0}")]
InvalidSourceDistribution(String),
#[error("Invalid pyproject.toml")]
InvalidPyprojectToml(#[from] toml::de::Error),
#[error("Failed to install requirements from {0}")]
RequirementsInstall(&'static str, #[source] anyhow::Error),
#[error("Failed to create temporary virtual environment")]
Gourgeist(#[from] gourgeist::Error),
#[error("Failed to run {0}")]
CommandFailed(PathBuf, #[source] io::Error),
#[error("{message}:\n--- stdout:\n{stdout}\n--- stderr:\n{stderr}\n---")]
BuildBackend {
message: String,
stdout: String,
stderr: String,
},
/// Nudge the user towards installing the missing dev library
#[error("{message}:\n--- stdout:\n{stdout}\n--- stderr:\n{stderr}\n---")]
MissingHeader {
message: String,
stdout: String,
stderr: String,
#[source]
missing_header_cause: MissingHeaderCause,
},
}
#[derive(Debug, Error)]
pub struct MissingHeaderCause {
header: String,
// I've picked this over the better readable package name to make clear that you need to
// look for the build dependencies of that version or git commit respectively
package_id: String,
}
impl Display for MissingHeaderCause {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
write!(
f,
"This error likely indicates that you need to install a library that provides \"{}\" for {}",
self.header, self.package_id
)
}
}
impl Error {
fn from_command_output(message: String, output: &Output, package_id: &str) -> Self {
let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string();
let stderr = String::from_utf8_lossy(&output.stderr).trim().to_string();
// In the cases i've seen it was the 5th last line (see test case), 10 seems like a
// reasonable cutoff
if let Some(header) =
stderr.lines().rev().take(10).find_map(|line| {
Some(MISSING_HEADER_RE.captures(line.trim())?["header"].to_string())
})
{
return Self::MissingHeader {
message,
stdout,
stderr,
missing_header_cause: MissingHeaderCause {
header,
package_id: package_id.to_string(),
},
};
}
Self::BuildBackend {
message,
stdout,
stderr,
}
}
}
/// A pyproject.toml as specified in PEP 517
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "kebab-case")]
pub struct PyProjectToml {
/// Build-related data
pub build_system: Option<BuildSystem>,
/// Project metadata
pub project: Option<Project>,
}
/// `[build-backend]` from pyproject.toml
struct Pep517Backend {
/// The build backend string such as `setuptools.build_meta:__legacy__` or `maturin` from
/// `build-backend.backend` in pyproject.toml
///
/// <https://peps.python.org/pep-0517/#build-wheel>
backend: String,
/// `build-backend.requirements` in pyproject.toml
requirements: Vec<Requirement>,
/// <https://peps.python.org/pep-0517/#in-tree-build-backends>
backend_path: Option<Vec<String>>,
}
impl Pep517Backend {
fn backend_import(&self) -> String {
if let Some((path, object)) = self.backend.split_once(':') {
format!("from {path} import {object}")
} else {
format!("import {}", self.backend)
}
}
}
/// Uses an [`Arc`] internally, clone freely
#[derive(Debug, Default, Clone)]
pub struct SourceBuildContext {
/// Cache the first resolution of `pip`, `setuptools` and `wheel` we made for setup.py (and
/// some PEP 517) builds so we can reuse it
setup_py_resolution: Arc<Mutex<Option<Vec<Requirement>>>>,
}
/// Holds the state through a series of PEP 517 frontend to backend calls or a single setup.py
/// invocation.
///
/// This keeps both the temp dir and the result of a potential `prepare_metadata_for_build_wheel`
/// call which changes how we call `build_wheel`.
pub struct SourceBuild {
temp_dir: TempDir,
source_tree: PathBuf,
/// `Some` if this is a PEP 517 build
pep517_backend: Option<Pep517Backend>,
venv: Virtualenv,
/// Populated if `prepare_metadata_for_build_wheel` was called.
///
/// > If the build frontend has previously called prepare_metadata_for_build_wheel and depends
/// > on the wheel resulting from this call to have metadata matching this earlier call, then
/// > it should provide the path to the created .dist-info directory as the metadata_directory
/// > argument. If this argument is provided, then build_wheel MUST produce a wheel with
/// > identical metadata. The directory passed in by the build frontend MUST be identical to the
/// > directory created by prepare_metadata_for_build_wheel, including any unrecognized files
/// > it created.
metadata_directory: Option<PathBuf>,
/// Package id such as `foo-1.2.3`, for error reporting
package_id: String,
}
impl SourceBuild {
/// Create a virtual environment in which to build a source distribution, extracting the
/// contents from an archive if necessary.
///
/// `package_id` is for error reporting only.
pub async fn setup(
source: &Path,
subdirectory: Option<&Path>,
interpreter_info: &InterpreterInfo,
build_context: &impl BuildContext,
source_build_context: SourceBuildContext,
package_id: &str,
) -> Result<SourceBuild, Error> {
let temp_dir = tempdir()?;
// TODO(konstin): Parse and verify filenames
let source_root = if fs::metadata(source)?.is_dir() {
source.to_path_buf()
} else {
debug!("Unpacking for build: {}", source.display());
let extracted = temp_dir.path().join("extracted");
extract_archive(source, &extracted)?
};
let source_tree = if let Some(subdir) = subdirectory {
source_root.join(subdir)
} else {
source_root
};
// Check if we have a PEP 517 build, a legacy setup.py, or an edge case
let build_system = if source_tree.join("pyproject.toml").is_file() {
let pyproject_toml: PyProjectToml =
toml::from_str(&fs::read_to_string(source_tree.join("pyproject.toml"))?)
.map_err(Error::InvalidPyprojectToml)?;
pyproject_toml.build_system
} else {
None
};
let venv = gourgeist::create_venv(
temp_dir.path().join(".venv"),
build_context.base_python(),
interpreter_info,
)?;
// There are packages such as DTLSSocket 0.1.16 that say
// ```toml
// [build-system]
// requires = ["Cython<3", "setuptools", "wheel"]
// ```
// In this case we need to install requires PEP 517 style but then call setup.py in the
// legacy way
let requirements = if let Some(build_system) = &build_system {
let resolved_requirements = build_context
.resolve(&build_system.requires)
.await
.map_err(|err| {
Error::RequirementsInstall("build-system.requires (resolve)", err)
})?;
build_context
.install(&resolved_requirements, &venv)
.await
.map_err(|err| {
Error::RequirementsInstall("build-system.requires (install)", err)
})?;
build_system.requires.clone()
} else {
let requirements = vec![
Requirement::from_str("wheel").unwrap(),
Requirement::from_str("setuptools").unwrap(),
Requirement::from_str("pip").unwrap(),
];
let mut resolution = source_build_context.setup_py_resolution.lock().await;
let resolved_requirements = if let Some(resolved_requirements) = &*resolution {
resolved_requirements.clone()
} else {
let resolved_requirements = build_context
.resolve(&requirements)
.await
.map_err(|err| Error::RequirementsInstall("setup.py build (resolve)", err))?;
*resolution = Some(resolved_requirements.clone());
resolved_requirements
};
build_context
.install(&resolved_requirements, &venv)
.await
.map_err(|err| Error::RequirementsInstall("setup.py build (install)", err))?;
requirements
};
// > If the pyproject.toml file is absent, or the build-backend key is missing, the
// > source tree is not using this specification, and tools should revert to the legacy
// > behaviour of running setup.py (either directly, or by implicitly invoking the
// > setuptools.build_meta:__legacy__ backend).
let pep517_backend = if let Some(build_system) = build_system {
if let Some(backend) = build_system.build_backend {
Some(Pep517Backend {
backend,
backend_path: build_system.backend_path,
requirements,
})
} else {
None
}
} else {
None
};
if let Some(pep517_backend) = &pep517_backend {
create_pep517_build_environment(
&source_tree,
&venv,
pep517_backend,
build_context,
package_id,
)
.await?;
} else {
if !source_tree.join("setup.py").is_file() {
return Err(Error::InvalidSourceDistribution(
"The archive contains neither a pyproject.toml or a setup.py at the top level"
.to_string(),
));
}
}
Ok(Self {
temp_dir,
source_tree,
pep517_backend,
venv,
metadata_directory: None,
package_id: package_id.to_string(),
})
}
/// Try calling `prepare_metadata_for_build_wheel` to get the metadata without executing the
/// actual build
///
/// TODO(konstin): Return the actual metadata instead of the dist-info dir
pub fn get_metadata_without_build(&mut self) -> Result<Option<&Path>, Error> {
// setup.py builds don't support this
let Some(pep517_backend) = &self.pep517_backend else {
return Ok(None);
};
let metadata_directory = self.temp_dir.path().join("metadata_directory");
fs::create_dir(&metadata_directory)?;
debug!(
"Calling `{}.prepare_metadata_for_build_wheel()`",
pep517_backend.backend
);
let script = formatdoc! {
r#"{} as backend
import json
if prepare_metadata_for_build_wheel := getattr(backend, "prepare_metadata_for_build_wheel", None):
print(prepare_metadata_for_build_wheel("{}"))
else:
print()
"#, pep517_backend.backend_import(), escape_path_for_python(&metadata_directory)
};
let output = run_python_script(&self.venv.python_executable(), &script, &self.source_tree)?;
if !output.status.success() {
return Err(Error::from_command_output(
"Build backend failed to determine metadata through `prepare_metadata_for_build_wheel`".to_string(),
&output,
&self.package_id));
}
let message = output
.stdout
.lines()
.last()
// flatten is nightly only :/
.transpose()
.map_err(|err| err.to_string())
.and_then(|last_line| last_line.ok_or("Missing message".to_string()))
.map_err(|err| {
Error::from_command_output(
format!(
"Build backend failed to return metadata directory with \
`prepare_metadata_for_build_wheel`: {err}"
),
&output,
&self.package_id,
)
})?;
if message.is_empty() {
return Ok(None);
}
self.metadata_directory = Some(metadata_directory.join(message));
return Ok(self.metadata_directory.as_deref());
}
/// Build a source distribution from an archive (`.zip` or `.tar.gz`), return the location of the
/// built wheel.
///
/// The location will be inside `temp_dir`, i.e. you must use the wheel before dropping the temp
/// dir.
///
/// <https://packaging.python.org/en/latest/specifications/source-distribution-format/>
#[instrument(skip(self))]
pub fn build(&self, wheel_dir: &Path) -> Result<String, Error> {
// The build scripts run with the extracted root as cwd, so they need the absolute path
let wheel_dir = fs::canonicalize(wheel_dir)?;
if let Some(pep517_backend) = &self.pep517_backend {
self.pep517_build_wheel(&wheel_dir, pep517_backend)
} else {
// We checked earlier that setup.py exists
let python_interpreter = self.venv.python_executable();
let output = Command::new(&python_interpreter)
.args(["setup.py", "bdist_wheel"])
.current_dir(&self.source_tree)
.output()
.map_err(|err| Error::CommandFailed(python_interpreter, err))?;
if !output.status.success() {
return Err(Error::from_command_output(
"Failed building wheel through setup.py".to_string(),
&output,
&self.package_id,
));
}
let dist = fs::read_dir(self.source_tree.join("dist"))?;
let dist_dir = dist.collect::<io::Result<Vec<DirEntry>>>()?;
let [dist_wheel] = dist_dir.as_slice() else {
return Err(Error::from_command_output(
format!(
"Expected exactly wheel in `dist/` after invoking setup.py, found {dist_dir:?}"
),
&output,
&self.package_id));
};
// TODO(konstin): Faster copy such as reflink? Or maybe don't really let the user pick the target dir
let wheel = wheel_dir.join(dist_wheel.file_name());
fs::copy(dist_wheel.path(), wheel)?;
// TODO(konstin): Check wheel filename
Ok(dist_wheel.file_name().to_string_lossy().to_string())
}
}
fn pep517_build_wheel(
&self,
wheel_dir: &Path,
pep517_backend: &Pep517Backend,
) -> Result<String, Error> {
let metadata_directory = self
.metadata_directory
.as_deref()
.map_or("None".to_string(), |path| {
format!(r#""{}""#, escape_path_for_python(path))
});
debug!(
"Calling `{}.build_wheel(metadata_directory={})`",
pep517_backend.backend, metadata_directory
);
let escaped_wheel_dir = escape_path_for_python(wheel_dir);
let script = formatdoc! {
r#"{} as backend
print(backend.build_wheel("{}", metadata_directory={}))
"#, pep517_backend.backend_import(), escaped_wheel_dir, metadata_directory
};
let output = run_python_script(&self.venv.python_executable(), &script, &self.source_tree)?;
if !output.status.success() {
return Err(Error::from_command_output(
"Build backend failed to build wheel through `build_wheel()` ".to_string(),
&output,
&self.package_id,
));
}
let stdout = String::from_utf8_lossy(&output.stdout);
let distribution_filename = stdout.lines().last();
let Some(distribution_filename) =
distribution_filename.filter(|wheel| wheel_dir.join(wheel).is_file())
else {
return Err(Error::from_command_output(
"Build backend did not return the wheel filename through `build_wheel()`"
.to_string(),
&output,
&self.package_id,
));
};
Ok(distribution_filename.to_string())
}
}
fn escape_path_for_python(path: &Path) -> String {
path.to_string_lossy()
.replace('\\', "\\\\")
.replace('"', "\\\"")
}
/// Not a method because we call it before the builder is completely initialized
async fn create_pep517_build_environment(
source_tree: &Path,
venv: &Virtualenv,
pep517_backend: &Pep517Backend,
build_context: &impl BuildContext,
package_id: &str,
) -> Result<(), Error> {
debug!(
"Calling `{}.get_requires_for_build_wheel()`",
pep517_backend.backend
);
if pep517_backend.backend_path.is_some() {
return Err(Error::InvalidSourceDistribution(
"backend-path is not supported yet".to_string(),
));
}
let script = formatdoc! {
r#"{} as backend
import json
if get_requires_for_build_wheel := getattr(backend, "get_requires_for_build_wheel", None):
requires = get_requires_for_build_wheel()
else:
requires = []
print(json.dumps(requires))
"#, pep517_backend.backend_import()
};
let output = run_python_script(&venv.python_executable(), &script, source_tree)?;
if !output.status.success() {
return Err(Error::from_command_output(
"Build backend failed to determine extras requires with `get_requires_for_build_wheel`"
.to_string(),
&output,
package_id,
));
}
let extra_requires = output
.stdout
.lines()
.last()
// flatten is nightly only :/
.transpose()
.map_err(|err| err.to_string())
.and_then(|last_line| last_line.ok_or("Missing message".to_string()))
.and_then(|message| serde_json::from_str(&message).map_err(|err| err.to_string()));
let extra_requires: Vec<Requirement> = extra_requires.map_err(|err| {
Error::from_command_output(
format!(
"Build backend failed to return extras requires with \
`get_requires_for_build_wheel`: {err}"
),
&output,
package_id,
)
})?;
// Some packages (such as tqdm 4.66.1) list only extra requires that have already been part of
// the pyproject.toml requires (in this case, `wheel`). We can skip doing the whole resolution
// and installation again.
// TODO(konstin): Do we still need this when we have a fast resolver?
if !extra_requires.is_empty()
&& !extra_requires
.iter()
.all(|req| pep517_backend.requirements.contains(req))
{
debug!("Installing extra requirements for build backend");
// TODO(konstin): Do we need to resolve them together?
let requirements: Vec<Requirement> = pep517_backend
.requirements
.iter()
.cloned()
.chain(extra_requires)
.collect();
let resolved_requirements = build_context
.resolve(&requirements)
.await
.map_err(|err| Error::RequirementsInstall("build-system.requires (resolve)", err))?;
build_context
.install(&resolved_requirements, venv)
.await
.map_err(|err| Error::RequirementsInstall("build-system.requires (install)", err))?;
}
Ok(())
}
/// Returns the directory with the `pyproject.toml`/`setup.py`
#[instrument(skip_all, fields(path))]
fn extract_archive(sdist: &Path, extracted: &PathBuf) -> Result<PathBuf, Error> {
// TODO(konstin): Simplify this with camino paths?
if sdist
.extension()
.is_some_and(|extension| extension == "zip")
{
let mut archive = ZipArchive::new(File::open(sdist)?)?;
archive.extract(extracted)?;
// .tar.gz
} else if sdist.extension().is_some_and(|extension| extension == "gz")
&& sdist.file_stem().is_some_and(|stem| {
Path::new(stem)
.extension()
.is_some_and(|extension| extension == "tar")
})
{
let mut archive = Archive::new(GzDecoder::new(File::open(sdist)?));
archive.unpack(extracted)?;
} else {
return Err(Error::UnsupportedArchiveType(
sdist
.file_name()
.unwrap_or(sdist.as_os_str())
.to_string_lossy()
.to_string(),
));
}
// > A .tar.gz source distribution (sdist) contains a single top-level directory called
// > `{name}-{version}` (e.g. foo-1.0), containing the source files of the package.
// TODO(konstin): Verify the name of the directory
let top_level = fs::read_dir(extracted)?.collect::<io::Result<Vec<DirEntry>>>()?;
let [root] = top_level.as_slice() else {
return Err(Error::InvalidSourceDistribution(format!(
"The top level of the archive must only contain a list directory, but it contains {top_level:?}"
)));
};
Ok(root.path())
}
#[instrument(skip(script, source_tree))]
fn run_python_script(
python_interpreter: &Path,
script: &str,
source_tree: &Path,
) -> Result<Output, Error> {
Command::new(python_interpreter)
.args(["-c", script])
.current_dir(source_tree)
.output()
.map_err(|err| Error::CommandFailed(python_interpreter.to_path_buf(), err))
}
#[cfg(test)]
mod test {
use std::process::{ExitStatus, Output};
use crate::Error;
use indoc::indoc;
#[test]
fn missing_header() {
let output = Output {
status: ExitStatus::default(), // This is wrong but `from_raw` is platform gated
stdout: indoc!(r#"
running bdist_wheel
running build
[...]
creating build/temp.linux-x86_64-cpython-39/pygraphviz
gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -DOPENSSL_NO_SSL3 -fPIC -DSWIG_PYTHON_STRICT_BYTE_CHAR -I/tmp/.tmpy6vVes/.venv/include -I/home/konsti/.pyenv/versions/3.9.18/include/python3.9 -c pygraphviz/graphviz_wrap.c -o build/temp.linux-x86_64-cpython-39/pygraphviz/graphviz_wrap.o
"#
).as_bytes().to_vec(),
stderr: indoc!(r#"
warning: no files found matching '*.png' under directory 'doc'
warning: no files found matching '*.txt' under directory 'doc'
[...]
no previously-included directories found matching 'doc/build'
pygraphviz/graphviz_wrap.c:3020:10: fatal error: graphviz/cgraph.h: No such file or directory
3020 | #include "graphviz/cgraph.h"
| ^~~~~~~~~~~~~~~~~~~
compilation terminated.
error: command '/usr/bin/gcc' failed with exit code 1
"#
).as_bytes().to_vec(),
};
let err = Error::from_command_output(
"Failed building wheel through setup.py".to_string(),
&output,
"pygraphviz-1.11",
);
assert!(matches!(err, Error::MissingHeader { .. }));
insta::assert_display_snapshot!(err, @r###"
Failed building wheel through setup.py:
--- stdout:
running bdist_wheel
running build
[...]
creating build/temp.linux-x86_64-cpython-39/pygraphviz
gcc -Wno-unused-result -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -DOPENSSL_NO_SSL3 -fPIC -DSWIG_PYTHON_STRICT_BYTE_CHAR -I/tmp/.tmpy6vVes/.venv/include -I/home/konsti/.pyenv/versions/3.9.18/include/python3.9 -c pygraphviz/graphviz_wrap.c -o build/temp.linux-x86_64-cpython-39/pygraphviz/graphviz_wrap.o
--- stderr:
warning: no files found matching '*.png' under directory 'doc'
warning: no files found matching '*.txt' under directory 'doc'
[...]
no previously-included directories found matching 'doc/build'
pygraphviz/graphviz_wrap.c:3020:10: fatal error: graphviz/cgraph.h: No such file or directory
3020 | #include "graphviz/cgraph.h"
| ^~~~~~~~~~~~~~~~~~~
compilation terminated.
error: command '/usr/bin/gcc' failed with exit code 1
---
"###);
insta::assert_display_snapshot!(
std::error::Error::source(&err).unwrap(),
@r###"This error likely indicates that you need to install a library that provides "graphviz/cgraph.h" for pygraphviz-1.11"###
);
}
}