ruff/crates/ruff_benchmark/benches/red_knot.rs

250 lines
7.7 KiB
Rust

#![allow(clippy::disallowed_names)]
use std::borrow::Cow;
use std::ops::Range;
use rayon::ThreadPoolBuilder;
use red_knot_project::metadata::options::{EnvironmentOptions, Options};
use red_knot_project::metadata::value::RangedValue;
use red_knot_project::watch::{ChangeEvent, ChangedKind};
use red_knot_project::{Db, ProjectDatabase, ProjectMetadata};
use ruff_benchmark::criterion::{criterion_group, criterion_main, BatchSize, Criterion};
use ruff_benchmark::TestFile;
use ruff_db::diagnostic::{Diagnostic, DiagnosticId, Severity};
use ruff_db::files::{system_path_to_file, File};
use ruff_db::source::source_text;
use ruff_db::system::{MemoryFileSystem, SystemPath, SystemPathBuf, TestSystem};
use ruff_python_ast::python_version::PythonVersion;
use rustc_hash::FxHashSet;
struct Case {
db: ProjectDatabase,
fs: MemoryFileSystem,
re: File,
re_path: SystemPathBuf,
}
// "https://raw.githubusercontent.com/python/cpython/8e8a4baf652f6e1cee7acde9d78c4b6154539748/Lib/tomllib";
static TOMLLIB_FILES: [TestFile; 4] = [
TestFile::new(
"tomllib/__init__.py",
include_str!("../resources/tomllib/__init__.py"),
),
TestFile::new(
"tomllib/_parser.py",
include_str!("../resources/tomllib/_parser.py"),
),
TestFile::new(
"tomllib/_re.py",
include_str!("../resources/tomllib/_re.py"),
),
TestFile::new(
"tomllib/_types.py",
include_str!("../resources/tomllib/_types.py"),
),
];
/// A structured set of fields we use to do diagnostic comparisons.
///
/// This helps assert benchmark results. Previously, we would compare
/// the actual diagnostic output, but using `insta` inside benchmarks is
/// problematic, and updating the strings otherwise when diagnostic rendering
/// changes is a PITA.
type KeyDiagnosticFields = (
DiagnosticId,
Option<&'static str>,
Option<Range<usize>>,
Cow<'static, str>,
Severity,
);
static EXPECTED_DIAGNOSTICS: &[KeyDiagnosticFields] = &[
// We don't support `*` imports yet:
(
DiagnosticId::lint("unresolved-import"),
Some("/src/tomllib/_parser.py"),
Some(192..200),
Cow::Borrowed("Module `collections.abc` has no member `Iterable`"),
Severity::Error,
),
// We don't handle intersections in `is_assignable_to` yet
(
DiagnosticId::lint("invalid-argument-type"),
Some("/src/tomllib/_parser.py"),
Some(20158..20172),
Cow::Borrowed("Object of type `Unknown & ~AlwaysFalsy | @Todo & ~AlwaysFalsy` cannot be assigned to parameter 1 (`match`) of function `match_to_datetime`; expected type `Match`"),
Severity::Error,
),
(
DiagnosticId::lint("invalid-argument-type"),
Some("/src/tomllib/_parser.py"),
Some(20464..20479),
Cow::Borrowed("Object of type `Unknown & ~AlwaysFalsy | @Todo & ~AlwaysFalsy` cannot be assigned to parameter 1 (`match`) of function `match_to_localtime`; expected type `Match`"),
Severity::Error,
),
(
DiagnosticId::lint("invalid-argument-type"),
Some("/src/tomllib/_parser.py"),
Some(20774..20786),
Cow::Borrowed("Object of type `Unknown & ~AlwaysFalsy | @Todo & ~AlwaysFalsy` cannot be assigned to parameter 1 (`match`) of function `match_to_number`; expected type `Match`"),
Severity::Error,
),
(
DiagnosticId::lint("unused-ignore-comment"),
Some("/src/tomllib/_parser.py"),
Some(22299..22333),
Cow::Borrowed("Unused blanket `type: ignore` directive"),
Severity::Warning,
),
];
fn tomllib_path(file: &TestFile) -> SystemPathBuf {
SystemPathBuf::from("src").join(file.name())
}
fn setup_case() -> Case {
let system = TestSystem::default();
let fs = system.memory_file_system().clone();
fs.write_files(
TOMLLIB_FILES
.iter()
.map(|file| (tomllib_path(file), file.code().to_string())),
)
.unwrap();
let src_root = SystemPath::new("/src");
let mut metadata = ProjectMetadata::discover(src_root, &system).unwrap();
metadata.apply_cli_options(Options {
environment: Some(EnvironmentOptions {
python_version: Some(RangedValue::cli(PythonVersion::PY312)),
..EnvironmentOptions::default()
}),
..Options::default()
});
let mut db = ProjectDatabase::new(metadata, system).unwrap();
let mut tomllib_files = FxHashSet::default();
let mut re: Option<File> = None;
for test_file in &TOMLLIB_FILES {
let file = system_path_to_file(&db, tomllib_path(test_file)).unwrap();
if test_file.name().ends_with("_re.py") {
re = Some(file);
}
tomllib_files.insert(file);
}
let re = re.unwrap();
db.project().set_open_files(&mut db, tomllib_files);
let re_path = re.path(&db).as_system_path().unwrap().to_owned();
Case {
db,
fs,
re,
re_path,
}
}
static RAYON_INITIALIZED: std::sync::Once = std::sync::Once::new();
fn setup_rayon() {
// Initialize the rayon thread pool outside the benchmark because it has a significant cost.
// We limit the thread pool to only one (the current thread) because we're focused on
// where red knot spends time and less about how well the code runs concurrently.
// We might want to add a benchmark focusing on concurrency to detect congestion in the future.
RAYON_INITIALIZED.call_once(|| {
ThreadPoolBuilder::new()
.num_threads(1)
.use_current_thread()
.build_global()
.unwrap();
});
}
fn benchmark_incremental(criterion: &mut Criterion) {
fn setup() -> Case {
let case = setup_case();
let result: Vec<_> = case.db.check().unwrap();
assert_diagnostics(&case.db, &result);
case.fs
.write_file(
&case.re_path,
format!("{}\n# A comment\n", source_text(&case.db, case.re).as_str()),
)
.unwrap();
case
}
fn incremental(case: &mut Case) {
let Case { db, .. } = case;
db.apply_changes(
vec![ChangeEvent::Changed {
path: case.re_path.clone(),
kind: ChangedKind::FileContent,
}],
None,
);
let result = db.check().unwrap();
assert_eq!(result.len(), EXPECTED_DIAGNOSTICS.len());
}
setup_rayon();
criterion.bench_function("red_knot_check_file[incremental]", |b| {
b.iter_batched_ref(setup, incremental, BatchSize::SmallInput);
});
}
fn benchmark_cold(criterion: &mut Criterion) {
setup_rayon();
criterion.bench_function("red_knot_check_file[cold]", |b| {
b.iter_batched_ref(
setup_case,
|case| {
let Case { db, .. } = case;
let result: Vec<_> = db.check().unwrap();
assert_diagnostics(db, &result);
},
BatchSize::SmallInput,
);
});
}
#[track_caller]
fn assert_diagnostics(db: &dyn Db, diagnostics: &[Box<dyn Diagnostic>]) {
let normalized: Vec<_> = diagnostics
.iter()
.map(|diagnostic| {
(
diagnostic.id(),
diagnostic
.span()
.map(|span| span.file())
.map(|file| file.path(db).as_str()),
diagnostic
.span()
.and_then(|span| span.range())
.map(Range::<usize>::from),
diagnostic.message(),
diagnostic.severity(),
)
})
.collect();
assert_eq!(&normalized, EXPECTED_DIAGNOSTICS);
}
criterion_group!(check_file, benchmark_cold, benchmark_incremental);
criterion_main!(check_file);