Basic concurrent checking (#13049)

This commit is contained in:
Micha Reiser 2024-08-24 10:53:27 +02:00 committed by GitHub
parent 8c09496b07
commit ecab04e338
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 129 additions and 35 deletions

2
Cargo.lock generated
View File

@ -1989,6 +1989,7 @@ dependencies = [
"anyhow", "anyhow",
"crossbeam", "crossbeam",
"notify", "notify",
"rayon",
"red_knot_python_semantic", "red_knot_python_semantic",
"ruff_cache", "ruff_cache",
"ruff_db", "ruff_db",
@ -2147,6 +2148,7 @@ dependencies = [
"criterion", "criterion",
"mimalloc", "mimalloc",
"once_cell", "once_cell",
"rayon",
"red_knot_python_semantic", "red_knot_python_semantic",
"red_knot_workspace", "red_knot_workspace",
"ruff_db", "ruff_db",

View File

@ -11,11 +11,11 @@ fn check() {
}; };
let mut workspace = Workspace::new("/", &settings).expect("Workspace to be created"); let mut workspace = Workspace::new("/", &settings).expect("Workspace to be created");
let test = workspace workspace
.open_file("test.py", "import random22\n") .open_file("test.py", "import random22\n")
.expect("File to be opened"); .expect("File to be opened");
let result = workspace.check_file(&test).expect("Check to succeed"); let result = workspace.check().expect("Check to succeed");
assert_eq!( assert_eq!(
result, result,

View File

@ -22,6 +22,7 @@ ruff_text_size = { workspace = true }
anyhow = { workspace = true } anyhow = { workspace = true }
crossbeam = { workspace = true } crossbeam = { workspace = true }
notify = { workspace = true } notify = { workspace = true }
rayon = { workspace = true }
rustc-hash = { workspace = true } rustc-hash = { workspace = true }
salsa = { workspace = true } salsa = { workspace = true }
tracing = { workspace = true } tracing = { workspace = true }

View File

@ -56,6 +56,8 @@ impl RootDatabase {
} }
pub fn check_file(&self, file: File) -> Result<Vec<String>, Cancelled> { pub fn check_file(&self, file: File) -> Result<Vec<String>, Cancelled> {
let _span = tracing::debug_span!("check_file", file=%file.path(self)).entered();
self.with_db(|db| check_file(db, file)) self.with_db(|db| check_file(db, file))
} }

View File

@ -15,7 +15,8 @@ use ruff_db::{
use ruff_python_ast::{name::Name, PySourceType}; use ruff_python_ast::{name::Name, PySourceType};
use ruff_text_size::Ranged; use ruff_text_size::Ranged;
use crate::workspace::files::{Index, Indexed, PackageFiles}; use crate::db::RootDatabase;
use crate::workspace::files::{Index, Indexed, IndexedIter, PackageFiles};
use crate::{ use crate::{
db::Db, db::Db,
lint::{lint_semantic, lint_syntax}, lint::{lint_semantic, lint_syntax},
@ -190,23 +191,35 @@ impl Workspace {
} }
/// Checks all open files in the workspace and its dependencies. /// Checks all open files in the workspace and its dependencies.
#[tracing::instrument(level = "debug", skip_all)] pub fn check(self, db: &RootDatabase) -> Vec<String> {
pub fn check(self, db: &dyn Db) -> Vec<String> { let workspace_span = tracing::debug_span!("check_workspace");
let _span = workspace_span.enter();
tracing::debug!("Checking workspace"); tracing::debug!("Checking workspace");
let files = WorkspaceFiles::new(db, self);
let result = Arc::new(std::sync::Mutex::new(Vec::new()));
let inner_result = Arc::clone(&result);
let mut result = Vec::new(); let db = db.snapshot();
let workspace_span = workspace_span.clone();
if let Some(open_files) = self.open_files(db) { rayon::scope(move |scope| {
for file in open_files { for file in &files {
result.extend_from_slice(&check_file(db, *file)); let result = inner_result.clone();
} let db = db.snapshot();
} else { let workspace_span = workspace_span.clone();
for package in self.packages(db) {
result.extend(package.check(db));
}
}
result scope.spawn(move |_| {
let check_file_span = tracing::debug_span!(parent: &workspace_span, "check_file", file=%file.path(&db));
let _entered = check_file_span.entered();
let file_diagnostics = check_file(&db, file);
result.lock().unwrap().extend(file_diagnostics);
});
}
});
Arc::into_inner(result).unwrap().into_inner().unwrap()
} }
/// Opens a file in the workspace. /// Opens a file in the workspace.
@ -324,19 +337,6 @@ impl Package {
index.insert(file); index.insert(file);
} }
#[tracing::instrument(level = "debug", skip(db))]
pub(crate) fn check(self, db: &dyn Db) -> Vec<String> {
tracing::debug!("Checking package '{}'", self.root(db));
let mut result = Vec::new();
for file in &self.files(db) {
let diagnostics = check_file(db, file);
result.extend_from_slice(&diagnostics);
}
result
}
/// Returns the files belonging to this package. /// Returns the files belonging to this package.
pub fn files(self, db: &dyn Db) -> Indexed<'_> { pub fn files(self, db: &dyn Db) -> Indexed<'_> {
let files = self.file_set(db); let files = self.file_set(db);
@ -384,9 +384,7 @@ impl Package {
#[salsa::tracked] #[salsa::tracked]
pub(super) fn check_file(db: &dyn Db, file: File) -> Vec<String> { pub(super) fn check_file(db: &dyn Db, file: File) -> Vec<String> {
let path = file.path(db); tracing::debug!("Checking file '{path}'", path = file.path(db));
let _span = tracing::debug_span!("check_file", file=%path).entered();
tracing::debug!("Checking file '{path}'");
let mut diagnostics = Vec::new(); let mut diagnostics = Vec::new();
@ -474,6 +472,73 @@ fn discover_package_files(db: &dyn Db, path: &SystemPath) -> FxHashSet<File> {
files files
} }
#[derive(Debug)]
enum WorkspaceFiles<'a> {
OpenFiles(&'a FxHashSet<File>),
PackageFiles(Vec<Indexed<'a>>),
}
impl<'a> WorkspaceFiles<'a> {
fn new(db: &'a dyn Db, workspace: Workspace) -> Self {
if let Some(open_files) = workspace.open_files(db) {
WorkspaceFiles::OpenFiles(open_files)
} else {
WorkspaceFiles::PackageFiles(
workspace
.packages(db)
.map(|package| package.files(db))
.collect(),
)
}
}
}
impl<'a> IntoIterator for &'a WorkspaceFiles<'a> {
type Item = File;
type IntoIter = WorkspaceFilesIter<'a>;
fn into_iter(self) -> Self::IntoIter {
match self {
WorkspaceFiles::OpenFiles(files) => WorkspaceFilesIter::OpenFiles(files.iter()),
WorkspaceFiles::PackageFiles(package_files) => {
let mut package_files = package_files.iter();
WorkspaceFilesIter::PackageFiles {
current: package_files.next().map(IntoIterator::into_iter),
package_files,
}
}
}
}
}
enum WorkspaceFilesIter<'db> {
OpenFiles(std::collections::hash_set::Iter<'db, File>),
PackageFiles {
package_files: std::slice::Iter<'db, Indexed<'db>>,
current: Option<IndexedIter<'db>>,
},
}
impl Iterator for WorkspaceFilesIter<'_> {
type Item = File;
fn next(&mut self) -> Option<Self::Item> {
match self {
WorkspaceFilesIter::OpenFiles(files) => files.next().copied(),
WorkspaceFilesIter::PackageFiles {
package_files,
current,
} => loop {
if let Some(file) = current.as_mut().and_then(Iterator::next) {
return Some(file);
}
*current = Some(package_files.next()?.into_iter());
},
}
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use ruff_db::files::system_path_to_file; use ruff_db::files::system_path_to_file;

View File

@ -158,9 +158,11 @@ impl Deref for Indexed<'_> {
} }
} }
pub(super) type IndexedIter<'a> = std::iter::Copied<std::collections::hash_set::Iter<'a, File>>;
impl<'a> IntoIterator for &'a Indexed<'_> { impl<'a> IntoIterator for &'a Indexed<'_> {
type Item = File; type Item = File;
type IntoIter = std::iter::Copied<std::collections::hash_set::Iter<'a, File>>; type IntoIter = IndexedIter<'a>;
fn into_iter(self) -> Self::IntoIter { fn into_iter(self) -> Self::IntoIter {
self.files.iter().copied() self.files.iter().copied()

View File

@ -37,13 +37,14 @@ name = "red_knot"
harness = false harness = false
[dependencies] [dependencies]
codspeed-criterion-compat = { workspace = true, default-features = false, optional = true }
criterion = { workspace = true, default-features = false }
once_cell = { workspace = true } once_cell = { workspace = true }
rayon = { workspace = true }
serde = { workspace = true } serde = { workspace = true }
serde_json = { workspace = true } serde_json = { workspace = true }
url = { workspace = true } url = { workspace = true }
ureq = { workspace = true } ureq = { workspace = true }
criterion = { workspace = true, default-features = false }
codspeed-criterion-compat = { workspace = true, default-features = false, optional = true }
[dev-dependencies] [dev-dependencies]
ruff_db = { workspace = true } ruff_db = { workspace = true }

View File

@ -1,5 +1,6 @@
#![allow(clippy::disallowed_names)] #![allow(clippy::disallowed_names)]
use rayon::ThreadPoolBuilder;
use red_knot_python_semantic::PythonVersion; use red_knot_python_semantic::PythonVersion;
use red_knot_workspace::db::RootDatabase; use red_knot_workspace::db::RootDatabase;
use red_knot_workspace::watch::{ChangeEvent, ChangedKind}; use red_knot_workspace::watch::{ChangeEvent, ChangedKind};
@ -97,7 +98,25 @@ fn setup_case() -> Case {
} }
} }
static RAYON_INITIALIZED: std::sync::Once = std::sync::Once::new();
fn setup_rayon() {
// Initialize the rayon thread pool outside the benchmark because it has a significant cost.
// We limit the thread pool to only one (the current thread) because we're focused on
// where red knot spends time and less about how well the code runs concurrently.
// We might want to add a benchmark focusing on concurrency to detect congestion in the future.
RAYON_INITIALIZED.call_once(|| {
ThreadPoolBuilder::new()
.num_threads(1)
.use_current_thread()
.build_global()
.unwrap();
});
}
fn benchmark_incremental(criterion: &mut Criterion) { fn benchmark_incremental(criterion: &mut Criterion) {
setup_rayon();
criterion.bench_function("red_knot_check_file[incremental]", |b| { criterion.bench_function("red_knot_check_file[incremental]", |b| {
b.iter_batched_ref( b.iter_batched_ref(
|| { || {
@ -134,6 +153,8 @@ fn benchmark_incremental(criterion: &mut Criterion) {
} }
fn benchmark_cold(criterion: &mut Criterion) { fn benchmark_cold(criterion: &mut Criterion) {
setup_rayon();
criterion.bench_function("red_knot_check_file[cold]", |b| { criterion.bench_function("red_knot_check_file[cold]", |b| {
b.iter_batched_ref( b.iter_batched_ref(
setup_case, setup_case,