Files
uv/crates/uv-git/src/source.rs
Ibraheem Ahmed 261aa2c70a Port all git functionality to use git CLI (#3833)
## Summary

We currently rely on libgit2 for most git-related functionality.
However, libgit2 has long-standing performance issues, as well as lags
significantly behind git in terms of new features. For these reasons we
now use the git CLI by default for fetching repositories
(https://github.com/astral-sh/uv/pull/1781). This PR completely drops
libgit2 in favor of the git CLI for all git-related functionality, which
should allow us to use features such as partial clones and sparse
checkouts in the future for performance.

There is also a lot of technical debt in the current git code as it's
mostly taken from Cargo. Switching to the git CLI *vastly* simplifies
the `uv-git` codebase.

Eventually we might want to look into switching to
[`gitoxide`](https://github.com/Byron/gitoxide), but it's currently too
immature for our use case.
2024-05-30 15:28:48 -04:00

146 lines
4.7 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
//! Git support is derived from Cargo's implementation.
//! Cargo is dual-licensed under either Apache 2.0 or MIT, at the user's choice.
//! Source: <https://github.com/rust-lang/cargo/blob/23eb492cf920ce051abfc56bbaf838514dc8365c/src/cargo/sources/git/source.rs>
use std::path::{Path, PathBuf};
use anyhow::Result;
use reqwest::Client;
use tracing::{debug, instrument};
use url::Url;
use cache_key::{digest, RepositoryUrl};
use crate::git::GitRemote;
use crate::{GitOid, GitSha, GitUrl};
/// A remote Git source that can be checked out locally.
pub struct GitSource {
/// The Git reference from the manifest file.
git: GitUrl,
/// The HTTP client to use for fetching.
client: Client,
/// The path to the Git source database.
cache: PathBuf,
/// The reporter to use for this source.
reporter: Option<Box<dyn Reporter>>,
}
impl GitSource {
/// Initialize a new Git source.
pub fn new(git: GitUrl, cache: impl Into<PathBuf>) -> Self {
Self {
git,
client: Client::new(),
cache: cache.into(),
reporter: None,
}
}
/// Set the [`Reporter`] to use for this `GIt` source.
#[must_use]
pub fn with_reporter(self, reporter: impl Reporter + 'static) -> Self {
Self {
reporter: Some(Box::new(reporter)),
..self
}
}
/// Fetch the underlying Git repository at the given revision.
#[instrument(skip(self), fields(repository = %self.git.repository, rev = ?self.git.precise))]
pub fn fetch(self) -> Result<Fetch> {
// The path to the repo, within the Git database.
let ident = digest(&RepositoryUrl::new(&self.git.repository));
let db_path = self.cache.join("db").join(&ident);
let remote = GitRemote::new(&self.git.repository);
let (db, actual_rev, task) = match (self.git.precise, remote.db_at(&db_path).ok()) {
// If we have a locked revision, and we have a preexisting database
// which has that revision, then no update needs to happen.
(Some(rev), Some(db)) if db.contains(rev.into()) => (db, rev, None),
// ... otherwise we use this state to update the git database. Note
// that we still check for being offline here, for example in the
// situation that we have a locked revision but the database
// doesn't have it.
(locked_rev, db) => {
debug!("Updating git source `{:?}`", self.git.repository);
// Report the checkout operation to the reporter.
let task = self.reporter.as_ref().map(|reporter| {
reporter.on_checkout_start(remote.url(), self.git.reference.as_rev())
});
let (db, actual_rev) = remote.checkout(
&db_path,
db,
&self.git.reference,
locked_rev.map(GitOid::from),
&self.client,
)?;
(db, GitSha::from(actual_rev), task)
}
};
// Dont use the full hash, in order to contribute less to reaching the
// path length limit on Windows.
let short_id = db.to_short_id(actual_rev.into())?;
// Check out `actual_rev` from the database to a scoped location on the
// filesystem. This will use hard links and such to ideally make the
// checkout operation here pretty fast.
let checkout_path = self
.cache
.join("checkouts")
.join(&ident)
.join(short_id.as_str());
db.copy_to(actual_rev.into(), &checkout_path)?;
// Report the checkout operation to the reporter.
if let Some(task) = task {
if let Some(reporter) = self.reporter.as_ref() {
reporter.on_checkout_complete(remote.url(), short_id.as_str(), task);
}
}
Ok(Fetch {
git: self.git.with_precise(actual_rev),
path: checkout_path,
})
}
}
pub struct Fetch {
/// The [`GitUrl`] reference that was fetched.
git: GitUrl,
/// The path to the checked out repository.
path: PathBuf,
}
impl Fetch {
pub fn git(&self) -> &GitUrl {
&self.git
}
pub fn path(&self) -> &Path {
&self.path
}
pub fn into_git(self) -> GitUrl {
self.git
}
pub fn into_path(self) -> PathBuf {
self.path
}
}
pub trait Reporter: Send + Sync {
/// Callback to invoke when a repository checkout begins.
fn on_checkout_start(&self, url: &Url, rev: &str) -> usize;
/// Callback to invoke when a repository checkout completes.
fn on_checkout_complete(&self, url: &Url, rev: &str, index: usize);
}