From 7ee7c68f36dfac7713f24bf1bebb2b028d3d75b4 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Mon, 5 Aug 2024 21:33:36 +0100 Subject: [PATCH] Add a new script to generate builtin module names (#12696) --- .../red_knot_module_resolver/src/resolver.rs | 58 ++------- crates/ruff_db/src/program.rs | 12 ++ .../src/sys/builtin_modules.rs | 55 +++++++++ .../src/{sys.rs => sys/known_stdlib.rs} | 0 crates/ruff_python_stdlib/src/sys/mod.rs | 5 + scripts/generate_builtin_modules.py | 116 ++++++++++++++++++ scripts/generate_known_standard_library.py | 2 +- 7 files changed, 198 insertions(+), 50 deletions(-) create mode 100644 crates/ruff_python_stdlib/src/sys/builtin_modules.rs rename crates/ruff_python_stdlib/src/{sys.rs => sys/known_stdlib.rs} (100%) create mode 100644 crates/ruff_python_stdlib/src/sys/mod.rs create mode 100644 scripts/generate_builtin_modules.py diff --git a/crates/red_knot_module_resolver/src/resolver.rs b/crates/red_knot_module_resolver/src/resolver.rs index 39e98296d9..f11cd55cb4 100644 --- a/crates/red_knot_module_resolver/src/resolver.rs +++ b/crates/red_knot_module_resolver/src/resolver.rs @@ -1,7 +1,6 @@ use std::borrow::Cow; use std::iter::FusedIterator; -use once_cell::sync::Lazy; use ruff_db::files::{File, FilePath, FileRootKind}; use ruff_db::program::{Program, SearchPathSettings, TargetVersion}; use ruff_db::system::{DirectoryEntry, System, SystemPath, SystemPathBuf}; @@ -447,60 +446,21 @@ struct ModuleNameIngredient<'db> { pub(super) name: ModuleName, } -/// Modules that are builtin to the Python interpreter itself. -/// -/// When these module names are imported, standard module resolution is bypassed: -/// the module name always resolves to the stdlib module, -/// even if there's a module of the same name in the workspace root -/// (which would normally result in the stdlib module being overridden). -/// -/// TODO(Alex): write a script to generate this list, -/// similar to what we do in `crates/ruff_python_stdlib/src/sys.rs` -static BUILTIN_MODULES: Lazy> = Lazy::new(|| { - const BUILTIN_MODULE_NAMES: &[&str] = &[ - "_abc", - "_ast", - "_codecs", - "_collections", - "_functools", - "_imp", - "_io", - "_locale", - "_operator", - "_signal", - "_sre", - "_stat", - "_string", - "_symtable", - "_thread", - "_tokenize", - "_tracemalloc", - "_typing", - "_warnings", - "_weakref", - "atexit", - "builtins", - "errno", - "faulthandler", - "gc", - "itertools", - "marshal", - "posix", - "pwd", - "sys", - "time", - ]; - BUILTIN_MODULE_NAMES.iter().copied().collect() -}); - /// Given a module name and a list of search paths in which to lookup modules, /// attempt to resolve the module name fn resolve_name(db: &dyn Db, name: &ModuleName) -> Option<(SearchPath, File, ModuleKind)> { let resolver_settings = module_resolution_settings(db); - let resolver_state = ResolverState::new(db, resolver_settings.target_version()); - let is_builtin_module = BUILTIN_MODULES.contains(&name.as_str()); + let target_version = resolver_settings.target_version(); + let resolver_state = ResolverState::new(db, target_version); + let (_, minor_version) = target_version.as_tuple(); + let is_builtin_module = + ruff_python_stdlib::sys::is_builtin_module(minor_version, name.as_str()); for search_path in resolver_settings.search_paths(db) { + // When a builtin module is imported, standard module resolution is bypassed: + // the module name always resolves to the stdlib module, + // even if there's a module of the same name in the workspace root + // (which would normally result in the stdlib module being overridden). if is_builtin_module && !search_path.is_standard_library() { continue; } diff --git a/crates/ruff_db/src/program.rs b/crates/ruff_db/src/program.rs index 78f3fc5a3b..9fcc102f1f 100644 --- a/crates/ruff_db/src/program.rs +++ b/crates/ruff_db/src/program.rs @@ -39,6 +39,18 @@ pub enum TargetVersion { } impl TargetVersion { + pub const fn as_tuple(self) -> (u8, u8) { + match self { + Self::Py37 => (3, 7), + Self::Py38 => (3, 8), + Self::Py39 => (3, 9), + Self::Py310 => (3, 10), + Self::Py311 => (3, 11), + Self::Py312 => (3, 12), + Self::Py313 => (3, 13), + } + } + const fn as_str(self) -> &'static str { match self { Self::Py37 => "py37", diff --git a/crates/ruff_python_stdlib/src/sys/builtin_modules.rs b/crates/ruff_python_stdlib/src/sys/builtin_modules.rs new file mode 100644 index 0000000000..6000857a2f --- /dev/null +++ b/crates/ruff_python_stdlib/src/sys/builtin_modules.rs @@ -0,0 +1,55 @@ +//! This file is generated by `scripts/generate_builtin_modules.py` + +/// Return `true` if `module` is a [builtin module] on the given +/// Python 3 version. +/// +/// "Builtin modules" are modules that are compiled directly into the +/// Python interpreter. These can never be shadowed by first-party +/// modules; the normal rules of module resolution do not apply to these +/// modules. +/// +/// [builtin module]: https://docs.python.org/3/library/sys.html#sys.builtin_module_names +#[allow(clippy::unnested_or_patterns)] +pub fn is_builtin_module(minor_version: u8, module: &str) -> bool { + matches!( + (minor_version, module), + ( + _, + "_abc" + | "_ast" + | "_codecs" + | "_collections" + | "_functools" + | "_imp" + | "_io" + | "_locale" + | "_operator" + | "_signal" + | "_sre" + | "_stat" + | "_string" + | "_symtable" + | "_thread" + | "_tracemalloc" + | "_warnings" + | "_weakref" + | "atexit" + | "builtins" + | "errno" + | "faulthandler" + | "gc" + | "itertools" + | "marshal" + | "posix" + | "pwd" + | "sys" + | "time" + ) | (7, "xxsubtype" | "zipimport") + | (8, "xxsubtype") + | (9, "_peg_parser" | "xxsubtype") + | (10, "xxsubtype") + | (11, "_tokenize" | "xxsubtype") + | (12, "_tokenize" | "_typing") + | (13, "_suggestions" | "_sysconfig" | "_tokenize" | "_typing") + ) +} diff --git a/crates/ruff_python_stdlib/src/sys.rs b/crates/ruff_python_stdlib/src/sys/known_stdlib.rs similarity index 100% rename from crates/ruff_python_stdlib/src/sys.rs rename to crates/ruff_python_stdlib/src/sys/known_stdlib.rs diff --git a/crates/ruff_python_stdlib/src/sys/mod.rs b/crates/ruff_python_stdlib/src/sys/mod.rs new file mode 100644 index 0000000000..c600926955 --- /dev/null +++ b/crates/ruff_python_stdlib/src/sys/mod.rs @@ -0,0 +1,5 @@ +mod builtin_modules; +mod known_stdlib; + +pub use builtin_modules::is_builtin_module; +pub use known_stdlib::is_known_standard_library; diff --git a/scripts/generate_builtin_modules.py b/scripts/generate_builtin_modules.py new file mode 100644 index 0000000000..b1a1937731 --- /dev/null +++ b/scripts/generate_builtin_modules.py @@ -0,0 +1,116 @@ +"""Script to generate `crates/ruff_python_stdlib/src/builtin_modules.rs`. + +This script requires the following executables to be callable via a subprocess: +- `python3.7` +- `python3.8` +- `python3.9` +- `python3.10` +- `python3.11` +- `python3.12` +- `python3.13` +""" + +from __future__ import annotations + +import builtins +import subprocess +import textwrap +from functools import partial +from pathlib import Path + +MODULE_CRATE = "ruff_python_stdlib" +MODULE_PATH = Path("crates") / MODULE_CRATE / "src" / "sys" / "builtin_modules.rs" + +type Version = tuple[int, int] + +PYTHON_VERSIONS: list[Version] = [ + (3, 7), + (3, 8), + (3, 9), + (3, 10), + (3, 11), + (3, 12), + (3, 13), +] + + +def builtin_modules_on_version(major_version: int, minor_version: int) -> set[str]: + executable = f"python{major_version}.{minor_version}" + try: + proc = subprocess.run( + [executable, "-c", "import sys; print(sys.builtin_module_names)"], + check=True, + text=True, + capture_output=True, + ) + except subprocess.CalledProcessError as e: + print(e.stdout) + print(e.stderr) + raise + return set(eval(proc.stdout)) + + +def generate_module( + script_destination: Path, crate_name: str, python_versions: list[Version] +) -> None: + with script_destination.open("w") as f: + print = partial(builtins.print, file=f) + + print( + textwrap.dedent( + """\ + //! This file is generated by `scripts/generate_builtin_modules.py` + + /// Return `true` if `module` is a [builtin module] on the given + /// Python 3 version. + /// + /// "Builtin modules" are modules that are compiled directly into the + /// Python interpreter. These can never be shadowed by first-party + /// modules; the normal rules of module resolution do not apply to these + /// modules. + /// + /// [builtin module]: https://docs.python.org/3/library/sys.html#sys.builtin_module_names + #[allow(clippy::unnested_or_patterns)] + pub fn is_builtin_module(minor_version: u8, module: &str) -> bool { + matches!((minor_version, module), + """, + ) + ) + + modules_by_version = { + minor_version: builtin_modules_on_version(major_version, minor_version) + for major_version, minor_version in python_versions + } + + # First, add a case for the modules that are in all versions. + ubiquitous_modules = set.intersection(*modules_by_version.values()) + + print("(_, ") + for i, module in enumerate(sorted(ubiquitous_modules)): + if i > 0: + print(" | ", end="") + print(f'"{module}"') + print(")") + + # Next, add any version-specific modules. + for _major_version, minor_version in python_versions: + version_modules = set.difference( + modules_by_version[minor_version], + ubiquitous_modules, + ) + + print(" | ") + print(f"({minor_version}, ") + for i, module in enumerate(sorted(version_modules)): + if i > 0: + print(" | ", end="") + print(f'"{module}"') + print(")") + + print(")}") + + subprocess.run(["cargo", "fmt", "--package", crate_name], check=True) + + +if __name__ == "__main__": + generate_module(MODULE_PATH, MODULE_CRATE, PYTHON_VERSIONS) diff --git a/scripts/generate_known_standard_library.py b/scripts/generate_known_standard_library.py index 6205908b14..22117b60d4 100644 --- a/scripts/generate_known_standard_library.py +++ b/scripts/generate_known_standard_library.py @@ -4,7 +4,7 @@ from pathlib import Path from stdlibs import stdlib_module_names -PATH = Path("crates") / "ruff_python_stdlib" / "src" / "sys.rs" +PATH = Path("crates") / "ruff_python_stdlib" / "src" / "sys" / "known_stdlib.rs" VERSIONS: list[tuple[int, int]] = [ (3, 7), (3, 8),