[red-knot] gather type prevalence statistics (#15834)

Something Alex and I threw together during our 1:1 this morning. Allows
us to collect statistics on the prevalence of various types in a file,
most usefully TODO types or other dynamic types.
This commit is contained in:
Carl Meyer 2025-01-31 07:10:00 -08:00 committed by GitHub
parent 44ac17b3ba
commit ce769f6ae2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 131 additions and 0 deletions

View File

@ -53,6 +53,7 @@ mod mro;
mod narrow; mod narrow;
mod signatures; mod signatures;
mod slots; mod slots;
mod statistics;
mod string_annotation; mod string_annotation;
mod subclass_of; mod subclass_of;
mod type_ordering; mod type_ordering;

View File

@ -61,6 +61,7 @@ use crate::types::diagnostic::{
UNDEFINED_REVEAL, UNRESOLVED_ATTRIBUTE, UNRESOLVED_IMPORT, UNSUPPORTED_OPERATOR, UNDEFINED_REVEAL, UNRESOLVED_ATTRIBUTE, UNRESOLVED_IMPORT, UNSUPPORTED_OPERATOR,
}; };
use crate::types::mro::MroErrorKind; use crate::types::mro::MroErrorKind;
use crate::types::statistics::TypeStatistics;
use crate::types::unpacker::{UnpackResult, Unpacker}; use crate::types::unpacker::{UnpackResult, Unpacker};
use crate::types::{ use crate::types::{
builtins_symbol, global_symbol, symbol, symbol_from_bindings, symbol_from_declarations, builtins_symbol, global_symbol, symbol, symbol_from_bindings, symbol_from_declarations,
@ -299,6 +300,14 @@ impl<'db> TypeInference<'db> {
self.diagnostics.shrink_to_fit(); self.diagnostics.shrink_to_fit();
self.deferred.shrink_to_fit(); self.deferred.shrink_to_fit();
} }
pub(super) fn statistics(&self) -> TypeStatistics {
let mut statistics = TypeStatistics::default();
for ty in self.expressions.values() {
statistics.increment(*ty);
}
statistics
}
} }
impl WithDiagnostics for TypeInference<'_> { impl WithDiagnostics for TypeInference<'_> {

View File

@ -0,0 +1,121 @@
use crate::types::{infer_scope_types, semantic_index, Type};
use crate::Db;
use ruff_db::files::File;
use rustc_hash::FxHashMap;
/// Get type-coverage statistics for a file.
#[salsa::tracked(return_ref)]
pub fn type_statistics<'db>(db: &'db dyn Db, file: File) -> TypeStatistics<'db> {
let _span = tracing::trace_span!("type_statistics", file=?file.path(db)).entered();
tracing::debug!(
"Gathering statistics for file '{path}'",
path = file.path(db)
);
let index = semantic_index(db, file);
let mut statistics = TypeStatistics::default();
for scope_id in index.scope_ids() {
let result = infer_scope_types(db, scope_id);
statistics.extend(&result.statistics());
}
statistics
}
/// Map each type to count of expressions with that type.
#[derive(Debug, Default, Eq, PartialEq)]
pub(super) struct TypeStatistics<'db>(FxHashMap<Type<'db>, u32>);
impl<'db> TypeStatistics<'db> {
fn extend(&mut self, other: &TypeStatistics<'db>) {
for (ty, count) in &other.0 {
self.0
.entry(*ty)
.and_modify(|my_count| *my_count += count)
.or_insert(*count);
}
}
pub(super) fn increment(&mut self, ty: Type<'db>) {
self.0
.entry(ty)
.and_modify(|count| *count += 1)
.or_insert(1);
}
#[allow(unused)]
fn expression_count(&self) -> u32 {
self.0.values().sum()
}
#[allow(unused)]
fn todo_count(&self) -> u32 {
self.0
.iter()
.filter(|(key, _)| key.is_todo())
.map(|(_, count)| count)
.sum()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::db::tests::{setup_db, TestDb};
use ruff_db::files::system_path_to_file;
use ruff_db::system::DbWithTestSystem;
fn get_stats<'db>(
db: &'db mut TestDb,
filename: &str,
source: &str,
) -> &'db TypeStatistics<'db> {
db.write_dedented(filename, source).unwrap();
type_statistics(db, system_path_to_file(db, filename).unwrap())
}
#[test]
fn all_static() {
let mut db = setup_db();
let stats = get_stats(&mut db, "src/foo.py", "1");
assert_eq!(stats.0, FxHashMap::from_iter([(Type::IntLiteral(1), 1)]));
}
#[test]
fn todo_and_expression_count() {
let mut db = setup_db();
let stats = get_stats(
&mut db,
"src/foo.py",
r#"
x = [x for x in [1]]
"#,
);
assert_eq!(stats.todo_count(), 4);
assert_eq!(stats.expression_count(), 6);
}
#[test]
fn sum() {
let mut db = setup_db();
let stats = get_stats(
&mut db,
"src/foo.py",
r#"
1
def f():
1
"#,
);
assert_eq!(stats.0[&Type::IntLiteral(1)], 2);
}
}