mirror of https://github.com/astral-sh/ruff
More stability checker options (#5299)
## Summary This contains three changes: * repos in `check_ecosystem.py` are stored as `org:name` instead of `org/name` to create a flat directory layout * `check_ecosystem.py` performs a maximum of 50 parallel jobs at the same time to avoid consuming to much RAM * `check-formatter-stability` gets a new option `--multi-project` so it's possible to do `cargo run --bin ruff_dev -- check-formatter-stability --multi-project target/checkouts` With these three changes it becomes easy to check the formatter stability over a larger number of repositories. This is part of the integration of integrating formatter regressions checks into the ecosystem checks. ## Test Plan ```shell python scripts/check_ecosystem.py --checkouts target/checkouts --projects github_search.jsonl -v $(which true) $(which true) cargo run --bin ruff_dev -- check-formatter-stability --multi-project target/checkouts ```
This commit is contained in:
parent
f9f0cf7524
commit
03694ef649
|
|
@ -42,6 +42,9 @@ pub(crate) struct Args {
|
||||||
/// Print only the first error and exit, `-x` is same as pytest
|
/// Print only the first error and exit, `-x` is same as pytest
|
||||||
#[arg(long, short = 'x')]
|
#[arg(long, short = 'x')]
|
||||||
pub(crate) exit_first_error: bool,
|
pub(crate) exit_first_error: bool,
|
||||||
|
/// Checks each project inside a directory
|
||||||
|
#[arg(long)]
|
||||||
|
pub(crate) multi_project: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Generate ourself a `try_parse_from` impl for `CheckArgs`. This is a strange way to use clap but
|
/// Generate ourself a `try_parse_from` impl for `CheckArgs`. This is a strange way to use clap but
|
||||||
|
|
@ -54,6 +57,35 @@ struct WrapperArgs {
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn main(args: &Args) -> anyhow::Result<ExitCode> {
|
pub(crate) fn main(args: &Args) -> anyhow::Result<ExitCode> {
|
||||||
|
let all_success = if args.multi_project {
|
||||||
|
let mut all_success = true;
|
||||||
|
for base_dir in &args.files {
|
||||||
|
for dir in base_dir.read_dir()? {
|
||||||
|
let dir = dir?;
|
||||||
|
println!("Starting {}", dir.path().display());
|
||||||
|
let success = check_repo(&Args {
|
||||||
|
files: vec![dir.path().clone()],
|
||||||
|
..*args
|
||||||
|
});
|
||||||
|
println!("Finished {}: {:?}", dir.path().display(), success);
|
||||||
|
if !matches!(success, Ok(true)) {
|
||||||
|
all_success = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
all_success
|
||||||
|
} else {
|
||||||
|
check_repo(args)?
|
||||||
|
};
|
||||||
|
if all_success {
|
||||||
|
Ok(ExitCode::SUCCESS)
|
||||||
|
} else {
|
||||||
|
Ok(ExitCode::FAILURE)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns whether the check was successful
|
||||||
|
pub(crate) fn check_repo(args: &Args) -> anyhow::Result<bool> {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
// Find files to check (or in this case, format twice). Adapted from ruff_cli
|
// Find files to check (or in this case, format twice). Adapted from ruff_cli
|
||||||
|
|
@ -77,13 +109,20 @@ pub(crate) fn main(args: &Args) -> anyhow::Result<ExitCode> {
|
||||||
let (paths, _resolver) = python_files_in_path(&cli.files, &pyproject_config, &overrides)?;
|
let (paths, _resolver) = python_files_in_path(&cli.files, &pyproject_config, &overrides)?;
|
||||||
assert!(!paths.is_empty(), "no python files in {:?}", cli.files);
|
assert!(!paths.is_empty(), "no python files in {:?}", cli.files);
|
||||||
|
|
||||||
|
let mut formatted_counter = 0;
|
||||||
let errors = paths
|
let errors = paths
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|dir_entry| {
|
.map(|dir_entry| {
|
||||||
// Doesn't make sense to recover here in this test script
|
// Doesn't make sense to recover here in this test script
|
||||||
let file = dir_entry
|
dir_entry.expect("Iterating the files in the repository failed")
|
||||||
.expect("Iterating the files in the repository failed")
|
})
|
||||||
.into_path();
|
.filter(|dir_entry| {
|
||||||
|
// For some reason it does not filter in the beginning
|
||||||
|
dir_entry.file_name() != "pyproject.toml"
|
||||||
|
})
|
||||||
|
.map(|dir_entry| {
|
||||||
|
let file = dir_entry.path().to_path_buf();
|
||||||
|
formatted_counter += 1;
|
||||||
// Handle panics (mostly in `debug_assert!`)
|
// Handle panics (mostly in `debug_assert!`)
|
||||||
let result = match catch_unwind(|| check_file(&file)) {
|
let result = match catch_unwind(|| check_file(&file)) {
|
||||||
Ok(result) => result,
|
Ok(result) => result,
|
||||||
|
|
@ -166,20 +205,20 @@ Formatted twice:
|
||||||
}
|
}
|
||||||
|
|
||||||
if args.exit_first_error {
|
if args.exit_first_error {
|
||||||
return Ok(ExitCode::FAILURE);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let duration = start.elapsed();
|
let duration = start.elapsed();
|
||||||
println!(
|
println!(
|
||||||
"Formatting {} files twice took {:.2}s",
|
"Formatting {} files twice took {:.2}s",
|
||||||
cli.files.len(),
|
formatted_counter,
|
||||||
duration.as_secs_f32()
|
duration.as_secs_f32()
|
||||||
);
|
);
|
||||||
|
|
||||||
if any_errors {
|
if any_errors {
|
||||||
Ok(ExitCode::FAILURE)
|
Ok(false)
|
||||||
} else {
|
} else {
|
||||||
Ok(ExitCode::SUCCESS)
|
Ok(true)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -44,11 +44,11 @@ class Repository(NamedTuple):
|
||||||
async def clone(self: Self, checkout_dir: Path) -> AsyncIterator[Path]:
|
async def clone(self: Self, checkout_dir: Path) -> AsyncIterator[Path]:
|
||||||
"""Shallow clone this repository to a temporary directory."""
|
"""Shallow clone this repository to a temporary directory."""
|
||||||
if checkout_dir.exists():
|
if checkout_dir.exists():
|
||||||
logger.debug(f"Reusing {self.org}/{self.repo}")
|
logger.debug(f"Reusing {self.org}:{self.repo}")
|
||||||
yield Path(checkout_dir)
|
yield Path(checkout_dir)
|
||||||
return
|
return
|
||||||
|
|
||||||
logger.debug(f"Cloning {self.org}/{self.repo}")
|
logger.debug(f"Cloning {self.org}:{self.repo}")
|
||||||
git_command = [
|
git_command = [
|
||||||
"git",
|
"git",
|
||||||
"clone",
|
"clone",
|
||||||
|
|
@ -177,18 +177,17 @@ async def compare(
|
||||||
"""Check a specific repository against two versions of ruff."""
|
"""Check a specific repository against two versions of ruff."""
|
||||||
removed, added = set(), set()
|
removed, added = set(), set()
|
||||||
|
|
||||||
# Allows to keep the checkouts locations
|
# By the default, the git clone are transient, but if the user provides a
|
||||||
|
# directory for permanent storage we keep it there
|
||||||
if checkouts:
|
if checkouts:
|
||||||
checkout_parent = checkouts.joinpath(repo.org)
|
location_context = nullcontext(checkouts)
|
||||||
# Don't create the repodir itself, we need that for checking for existing
|
|
||||||
# clones
|
|
||||||
checkout_parent.mkdir(exist_ok=True, parents=True)
|
|
||||||
location_context = nullcontext(checkout_parent)
|
|
||||||
else:
|
else:
|
||||||
location_context = tempfile.TemporaryDirectory()
|
location_context = tempfile.TemporaryDirectory()
|
||||||
|
|
||||||
with location_context as checkout_parent:
|
with location_context as checkout_parent:
|
||||||
checkout_dir = Path(checkout_parent).joinpath(repo.repo)
|
assert ":" not in repo.org
|
||||||
|
assert ":" not in repo.repo
|
||||||
|
checkout_dir = Path(checkout_parent).joinpath(f"{repo.org}:{repo.repo}")
|
||||||
async with repo.clone(checkout_dir) as path:
|
async with repo.clone(checkout_dir) as path:
|
||||||
try:
|
try:
|
||||||
async with asyncio.TaskGroup() as tg:
|
async with asyncio.TaskGroup() as tg:
|
||||||
|
|
@ -284,8 +283,19 @@ async def main(
|
||||||
|
|
||||||
logger.debug(f"Checking {len(repositories)} projects")
|
logger.debug(f"Checking {len(repositories)} projects")
|
||||||
|
|
||||||
|
# https://stackoverflow.com/a/61478547/3549270
|
||||||
|
# Otherwise doing 3k repositories can take >8GB RAM
|
||||||
|
semaphore = asyncio.Semaphore(50)
|
||||||
|
|
||||||
|
async def limited_parallelism(coroutine): # noqa: ANN
|
||||||
|
async with semaphore:
|
||||||
|
return await coroutine
|
||||||
|
|
||||||
results = await asyncio.gather(
|
results = await asyncio.gather(
|
||||||
*[compare(ruff1, ruff2, repo, checkouts) for repo in repositories.values()],
|
*[
|
||||||
|
limited_parallelism(compare(ruff1, ruff2, repo, checkouts))
|
||||||
|
for repo in repositories.values()
|
||||||
|
],
|
||||||
return_exceptions=True,
|
return_exceptions=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -433,6 +443,8 @@ if __name__ == "__main__":
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
|
if args.checkouts:
|
||||||
|
args.checkouts.mkdir(exist_ok=True, parents=True)
|
||||||
main_task = asyncio.ensure_future(
|
main_task = asyncio.ensure_future(
|
||||||
main(
|
main(
|
||||||
ruff1=args.ruff1,
|
ruff1=args.ruff1,
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue