Basic concurrent checking (#13049)

This commit is contained in:
Micha Reiser 2024-08-24 10:53:27 +02:00 committed by GitHub
parent 8c09496b07
commit ecab04e338
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 129 additions and 35 deletions

2
Cargo.lock generated
View File

@ -1989,6 +1989,7 @@ dependencies = [
"anyhow",
"crossbeam",
"notify",
"rayon",
"red_knot_python_semantic",
"ruff_cache",
"ruff_db",
@ -2147,6 +2148,7 @@ dependencies = [
"criterion",
"mimalloc",
"once_cell",
"rayon",
"red_knot_python_semantic",
"red_knot_workspace",
"ruff_db",

View File

@ -11,11 +11,11 @@ fn check() {
};
let mut workspace = Workspace::new("/", &settings).expect("Workspace to be created");
let test = workspace
workspace
.open_file("test.py", "import random22\n")
.expect("File to be opened");
let result = workspace.check_file(&test).expect("Check to succeed");
let result = workspace.check().expect("Check to succeed");
assert_eq!(
result,

View File

@ -22,6 +22,7 @@ ruff_text_size = { workspace = true }
anyhow = { workspace = true }
crossbeam = { workspace = true }
notify = { workspace = true }
rayon = { workspace = true }
rustc-hash = { workspace = true }
salsa = { workspace = true }
tracing = { workspace = true }

View File

@ -56,6 +56,8 @@ impl RootDatabase {
}
pub fn check_file(&self, file: File) -> Result<Vec<String>, Cancelled> {
let _span = tracing::debug_span!("check_file", file=%file.path(self)).entered();
self.with_db(|db| check_file(db, file))
}

View File

@ -15,7 +15,8 @@ use ruff_db::{
use ruff_python_ast::{name::Name, PySourceType};
use ruff_text_size::Ranged;
use crate::workspace::files::{Index, Indexed, PackageFiles};
use crate::db::RootDatabase;
use crate::workspace::files::{Index, Indexed, IndexedIter, PackageFiles};
use crate::{
db::Db,
lint::{lint_semantic, lint_syntax},
@ -190,23 +191,35 @@ impl Workspace {
}
/// Checks all open files in the workspace and its dependencies.
#[tracing::instrument(level = "debug", skip_all)]
pub fn check(self, db: &dyn Db) -> Vec<String> {
pub fn check(self, db: &RootDatabase) -> Vec<String> {
let workspace_span = tracing::debug_span!("check_workspace");
let _span = workspace_span.enter();
tracing::debug!("Checking workspace");
let files = WorkspaceFiles::new(db, self);
let result = Arc::new(std::sync::Mutex::new(Vec::new()));
let inner_result = Arc::clone(&result);
let mut result = Vec::new();
let db = db.snapshot();
let workspace_span = workspace_span.clone();
if let Some(open_files) = self.open_files(db) {
for file in open_files {
result.extend_from_slice(&check_file(db, *file));
}
} else {
for package in self.packages(db) {
result.extend(package.check(db));
}
}
rayon::scope(move |scope| {
for file in &files {
let result = inner_result.clone();
let db = db.snapshot();
let workspace_span = workspace_span.clone();
result
scope.spawn(move |_| {
let check_file_span = tracing::debug_span!(parent: &workspace_span, "check_file", file=%file.path(&db));
let _entered = check_file_span.entered();
let file_diagnostics = check_file(&db, file);
result.lock().unwrap().extend(file_diagnostics);
});
}
});
Arc::into_inner(result).unwrap().into_inner().unwrap()
}
/// Opens a file in the workspace.
@ -324,19 +337,6 @@ impl Package {
index.insert(file);
}
#[tracing::instrument(level = "debug", skip(db))]
pub(crate) fn check(self, db: &dyn Db) -> Vec<String> {
tracing::debug!("Checking package '{}'", self.root(db));
let mut result = Vec::new();
for file in &self.files(db) {
let diagnostics = check_file(db, file);
result.extend_from_slice(&diagnostics);
}
result
}
/// Returns the files belonging to this package.
pub fn files(self, db: &dyn Db) -> Indexed<'_> {
let files = self.file_set(db);
@ -384,9 +384,7 @@ impl Package {
#[salsa::tracked]
pub(super) fn check_file(db: &dyn Db, file: File) -> Vec<String> {
let path = file.path(db);
let _span = tracing::debug_span!("check_file", file=%path).entered();
tracing::debug!("Checking file '{path}'");
tracing::debug!("Checking file '{path}'", path = file.path(db));
let mut diagnostics = Vec::new();
@ -474,6 +472,73 @@ fn discover_package_files(db: &dyn Db, path: &SystemPath) -> FxHashSet<File> {
files
}
#[derive(Debug)]
enum WorkspaceFiles<'a> {
OpenFiles(&'a FxHashSet<File>),
PackageFiles(Vec<Indexed<'a>>),
}
impl<'a> WorkspaceFiles<'a> {
fn new(db: &'a dyn Db, workspace: Workspace) -> Self {
if let Some(open_files) = workspace.open_files(db) {
WorkspaceFiles::OpenFiles(open_files)
} else {
WorkspaceFiles::PackageFiles(
workspace
.packages(db)
.map(|package| package.files(db))
.collect(),
)
}
}
}
impl<'a> IntoIterator for &'a WorkspaceFiles<'a> {
type Item = File;
type IntoIter = WorkspaceFilesIter<'a>;
fn into_iter(self) -> Self::IntoIter {
match self {
WorkspaceFiles::OpenFiles(files) => WorkspaceFilesIter::OpenFiles(files.iter()),
WorkspaceFiles::PackageFiles(package_files) => {
let mut package_files = package_files.iter();
WorkspaceFilesIter::PackageFiles {
current: package_files.next().map(IntoIterator::into_iter),
package_files,
}
}
}
}
}
enum WorkspaceFilesIter<'db> {
OpenFiles(std::collections::hash_set::Iter<'db, File>),
PackageFiles {
package_files: std::slice::Iter<'db, Indexed<'db>>,
current: Option<IndexedIter<'db>>,
},
}
impl Iterator for WorkspaceFilesIter<'_> {
type Item = File;
fn next(&mut self) -> Option<Self::Item> {
match self {
WorkspaceFilesIter::OpenFiles(files) => files.next().copied(),
WorkspaceFilesIter::PackageFiles {
package_files,
current,
} => loop {
if let Some(file) = current.as_mut().and_then(Iterator::next) {
return Some(file);
}
*current = Some(package_files.next()?.into_iter());
},
}
}
}
#[cfg(test)]
mod tests {
use ruff_db::files::system_path_to_file;

View File

@ -158,9 +158,11 @@ impl Deref for Indexed<'_> {
}
}
pub(super) type IndexedIter<'a> = std::iter::Copied<std::collections::hash_set::Iter<'a, File>>;
impl<'a> IntoIterator for &'a Indexed<'_> {
type Item = File;
type IntoIter = std::iter::Copied<std::collections::hash_set::Iter<'a, File>>;
type IntoIter = IndexedIter<'a>;
fn into_iter(self) -> Self::IntoIter {
self.files.iter().copied()

View File

@ -37,13 +37,14 @@ name = "red_knot"
harness = false
[dependencies]
codspeed-criterion-compat = { workspace = true, default-features = false, optional = true }
criterion = { workspace = true, default-features = false }
once_cell = { workspace = true }
rayon = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
url = { workspace = true }
ureq = { workspace = true }
criterion = { workspace = true, default-features = false }
codspeed-criterion-compat = { workspace = true, default-features = false, optional = true }
[dev-dependencies]
ruff_db = { workspace = true }

View File

@ -1,5 +1,6 @@
#![allow(clippy::disallowed_names)]
use rayon::ThreadPoolBuilder;
use red_knot_python_semantic::PythonVersion;
use red_knot_workspace::db::RootDatabase;
use red_knot_workspace::watch::{ChangeEvent, ChangedKind};
@ -97,7 +98,25 @@ fn setup_case() -> Case {
}
}
static RAYON_INITIALIZED: std::sync::Once = std::sync::Once::new();
fn setup_rayon() {
// Initialize the rayon thread pool outside the benchmark because it has a significant cost.
// We limit the thread pool to only one (the current thread) because we're focused on
// where red knot spends time and less about how well the code runs concurrently.
// We might want to add a benchmark focusing on concurrency to detect congestion in the future.
RAYON_INITIALIZED.call_once(|| {
ThreadPoolBuilder::new()
.num_threads(1)
.use_current_thread()
.build_global()
.unwrap();
});
}
fn benchmark_incremental(criterion: &mut Criterion) {
setup_rayon();
criterion.bench_function("red_knot_check_file[incremental]", |b| {
b.iter_batched_ref(
|| {
@ -134,6 +153,8 @@ fn benchmark_incremental(criterion: &mut Criterion) {
}
fn benchmark_cold(criterion: &mut Criterion) {
setup_rayon();
criterion.bench_function("red_knot_check_file[cold]", |b| {
b.iter_batched_ref(
setup_case,