From 968c7df7708ade0ba9f4b41472c97255613e7946 Mon Sep 17 00:00:00 2001 From: Jonathan Plasse <13716151+JonathanPlasse@users.noreply.github.com> Date: Fri, 31 Mar 2023 21:15:36 +0200 Subject: [PATCH] Fix `is_module_name()` and improve perf of `is_identifier()` (#3795) --- .../0001_initial.py | 0 .../N999/module/invalid_name/__init__.py | 0 .../N999/module/invalid_name/import.py | 0 crates/ruff/src/rules/pep8_naming/mod.rs | 3 +- .../pep8_naming/rules/invalid_module_name.rs | 44 ++++++++--- ...module__invalid_name__0001_initial.py.snap | 19 +++++ ...N999__module__invalid_name__import.py.snap | 19 +++++ ...__module__valid_name__0001_initial.py.snap | 6 -- crates/ruff_python_stdlib/src/identifiers.rs | 73 +++++++++++++++---- 9 files changed, 135 insertions(+), 29 deletions(-) rename crates/ruff/resources/test/fixtures/pep8_naming/N999/module/{valid_name => invalid_name}/0001_initial.py (100%) create mode 100644 crates/ruff/resources/test/fixtures/pep8_naming/N999/module/invalid_name/__init__.py create mode 100644 crates/ruff/resources/test/fixtures/pep8_naming/N999/module/invalid_name/import.py create mode 100644 crates/ruff/src/rules/pep8_naming/snapshots/ruff__rules__pep8_naming__tests__N999_N999__module__invalid_name__0001_initial.py.snap create mode 100644 crates/ruff/src/rules/pep8_naming/snapshots/ruff__rules__pep8_naming__tests__N999_N999__module__invalid_name__import.py.snap delete mode 100644 crates/ruff/src/rules/pep8_naming/snapshots/ruff__rules__pep8_naming__tests__N999_N999__module__valid_name__0001_initial.py.snap diff --git a/crates/ruff/resources/test/fixtures/pep8_naming/N999/module/valid_name/0001_initial.py b/crates/ruff/resources/test/fixtures/pep8_naming/N999/module/invalid_name/0001_initial.py similarity index 100% rename from crates/ruff/resources/test/fixtures/pep8_naming/N999/module/valid_name/0001_initial.py rename to crates/ruff/resources/test/fixtures/pep8_naming/N999/module/invalid_name/0001_initial.py diff --git a/crates/ruff/resources/test/fixtures/pep8_naming/N999/module/invalid_name/__init__.py b/crates/ruff/resources/test/fixtures/pep8_naming/N999/module/invalid_name/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/crates/ruff/resources/test/fixtures/pep8_naming/N999/module/invalid_name/import.py b/crates/ruff/resources/test/fixtures/pep8_naming/N999/module/invalid_name/import.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/crates/ruff/src/rules/pep8_naming/mod.rs b/crates/ruff/src/rules/pep8_naming/mod.rs index 41dec1bacc..7c8800778b 100644 --- a/crates/ruff/src/rules/pep8_naming/mod.rs +++ b/crates/ruff/src/rules/pep8_naming/mod.rs @@ -41,9 +41,10 @@ mod tests { #[test_case(Rule::InvalidModuleName, Path::new("N999/module/no_module/test.txt"); "N999_8")] #[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/file-with-dashes.py"); "N999_9")] #[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/__main__.py"); "N999_10")] - #[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/0001_initial.py"); "N999_11")] + #[test_case(Rule::InvalidModuleName, Path::new("N999/module/invalid_name/0001_initial.py"); "N999_11")] #[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/__setup__.py"); "N999_12")] #[test_case(Rule::InvalidModuleName, Path::new("N999/module/valid_name/file-with-dashes"); "N999_13")] + #[test_case(Rule::InvalidModuleName, Path::new("N999/module/invalid_name/import.py"); "N999_14")] fn rules(rule_code: Rule, path: &Path) -> Result<()> { let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); let diagnostics = test_path( diff --git a/crates/ruff/src/rules/pep8_naming/rules/invalid_module_name.rs b/crates/ruff/src/rules/pep8_naming/rules/invalid_module_name.rs index f5c6697477..0d16c3e5e7 100644 --- a/crates/ruff/src/rules/pep8_naming/rules/invalid_module_name.rs +++ b/crates/ruff/src/rules/pep8_naming/rules/invalid_module_name.rs @@ -1,13 +1,14 @@ +use std::ffi::OsStr; use std::path::Path; use ruff_diagnostics::{Diagnostic, Violation}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_ast::types::Range; -use ruff_python_stdlib::identifiers::is_module_name; +use ruff_python_stdlib::identifiers::{is_migration_name, is_module_name}; /// ## What it does /// Checks for module names that do not follow the `snake_case` naming -/// convention. +/// convention or are otherwise invalid. /// /// ## Why is this bad? /// [PEP 8] recommends the use of the `snake_case` naming convention for @@ -21,6 +22,10 @@ use ruff_python_stdlib::identifiers::is_module_name; /// > provides a higher level (e.g. more object oriented) interface, the C/C++ module has /// > a leading underscore (e.g. `_socket`). /// +/// Further, in order for Python modules to be importable, they must be valid +/// identifiers. As such, they cannot start with a digit, or collide with hard +/// keywords, like `import` or `class`. +/// /// ## Example /// - Instead of `example-module-name` or `example module name`, use `example_module_name`. /// - Instead of `ExampleModule`, use `example_module`. @@ -49,18 +54,21 @@ pub fn invalid_module_name(path: &Path, package: Option<&Path>) -> Option) -> Option bool { + path.file_name().map_or(false, |file_name| { + file_name == "__init__.py" + || file_name == "__init__.pyi" + || file_name == "__main__.py" + || file_name == "__main__.pyi" + }) +} + +/// Return `true` if a [`Path`] refers to a migration file. +fn is_migration_file(path: &Path) -> bool { + path.parent() + .and_then(Path::file_name) + .and_then(OsStr::to_str) + .map_or(false, |parent| matches!(parent, "versions" | "migrations")) +} diff --git a/crates/ruff/src/rules/pep8_naming/snapshots/ruff__rules__pep8_naming__tests__N999_N999__module__invalid_name__0001_initial.py.snap b/crates/ruff/src/rules/pep8_naming/snapshots/ruff__rules__pep8_naming__tests__N999_N999__module__invalid_name__0001_initial.py.snap new file mode 100644 index 0000000000..468bace1c0 --- /dev/null +++ b/crates/ruff/src/rules/pep8_naming/snapshots/ruff__rules__pep8_naming__tests__N999_N999__module__invalid_name__0001_initial.py.snap @@ -0,0 +1,19 @@ +--- +source: crates/ruff/src/rules/pep8_naming/mod.rs +expression: diagnostics +--- +- kind: + name: InvalidModuleName + body: "Invalid module name: '0001_initial'" + suggestion: ~ + fixable: false + location: + row: 1 + column: 0 + end_location: + row: 1 + column: 0 + fix: + edits: [] + parent: ~ + diff --git a/crates/ruff/src/rules/pep8_naming/snapshots/ruff__rules__pep8_naming__tests__N999_N999__module__invalid_name__import.py.snap b/crates/ruff/src/rules/pep8_naming/snapshots/ruff__rules__pep8_naming__tests__N999_N999__module__invalid_name__import.py.snap new file mode 100644 index 0000000000..b8820fda65 --- /dev/null +++ b/crates/ruff/src/rules/pep8_naming/snapshots/ruff__rules__pep8_naming__tests__N999_N999__module__invalid_name__import.py.snap @@ -0,0 +1,19 @@ +--- +source: crates/ruff/src/rules/pep8_naming/mod.rs +expression: diagnostics +--- +- kind: + name: InvalidModuleName + body: "Invalid module name: 'import'" + suggestion: ~ + fixable: false + location: + row: 1 + column: 0 + end_location: + row: 1 + column: 0 + fix: + edits: [] + parent: ~ + diff --git a/crates/ruff/src/rules/pep8_naming/snapshots/ruff__rules__pep8_naming__tests__N999_N999__module__valid_name__0001_initial.py.snap b/crates/ruff/src/rules/pep8_naming/snapshots/ruff__rules__pep8_naming__tests__N999_N999__module__valid_name__0001_initial.py.snap deleted file mode 100644 index b0a1ebbaaf..0000000000 --- a/crates/ruff/src/rules/pep8_naming/snapshots/ruff__rules__pep8_naming__tests__N999_N999__module__valid_name__0001_initial.py.snap +++ /dev/null @@ -1,6 +0,0 @@ ---- -source: crates/ruff/src/rules/pep8_naming/mod.rs -expression: diagnostics ---- -[] - diff --git a/crates/ruff_python_stdlib/src/identifiers.rs b/crates/ruff_python_stdlib/src/identifiers.rs index c58be92325..c7f4463fe8 100644 --- a/crates/ruff_python_stdlib/src/identifiers.rs +++ b/crates/ruff_python_stdlib/src/identifiers.rs @@ -1,9 +1,11 @@ +use crate::keyword::KWLIST; + /// Returns `true` if a string is a valid Python identifier (e.g., variable /// name). -pub fn is_identifier(s: &str) -> bool { +pub fn is_identifier(name: &str) -> bool { // Is the first character a letter or underscore? - if !s - .chars() + let mut chars = name.chars(); + if !chars .next() .map_or(false, |c| c.is_alphabetic() || c == '_') { @@ -11,7 +13,7 @@ pub fn is_identifier(s: &str) -> bool { } // Are the rest of the characters letters, digits, or underscores? - s.chars().skip(1).all(|c| c.is_alphanumeric() || c == '_') + chars.all(|c| c.is_alphanumeric() || c == '_') } /// Returns `true` if a string is a private identifier, such that, when the @@ -24,26 +26,71 @@ pub fn is_mangled_private(id: &str) -> bool { } /// Returns `true` if a string is a PEP 8-compliant module name (i.e., consists of lowercase -/// letters, numbers, and underscores). -pub fn is_module_name(s: &str) -> bool { - s.chars() - .all(|c| c.is_lowercase() || c.is_numeric() || c == '_') +/// letters, numbers, underscores, and is not a keyword). +pub fn is_module_name(name: &str) -> bool { + // Is the string a keyword? + if KWLIST.contains(&name) { + return false; + } + + // Is the first character a letter or underscore? + let mut chars = name.chars(); + if !chars + .next() + .map_or(false, |c| c.is_ascii_lowercase() || c == '_') + { + return false; + } + + // Are the rest of the characters letters, digits, or underscores? + chars.all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_') +} + +/// Returns `true` if a string appears to be a valid migration file name (e.g., `0001_initial.py`). +pub fn is_migration_name(name: &str) -> bool { + // Is the string a keyword? + if KWLIST.contains(&name) { + return false; + } + + // Are characters letters, digits, or underscores? + name.chars() + .all(|c| c.is_ascii_lowercase() || c.is_ascii_digit() || c == '_') } #[cfg(test)] mod tests { - use crate::identifiers::is_module_name; + use crate::identifiers::{is_migration_name, is_module_name}; #[test] - fn test_is_module_name() { + fn module_name() { + assert!(is_module_name("_abc")); assert!(is_module_name("a")); + assert!(is_module_name("a_b_c")); assert!(is_module_name("abc")); assert!(is_module_name("abc0")); assert!(is_module_name("abc_")); - assert!(is_module_name("a_b_c")); - assert!(is_module_name("0abc")); - assert!(is_module_name("_abc")); + assert!(!is_module_name("0001_initial")); + assert!(!is_module_name("0abc")); assert!(!is_module_name("a-b-c")); assert!(!is_module_name("a_B_c")); + assert!(!is_module_name("class")); + assert!(!is_module_name("δ")); + } + + #[test] + fn migration_name() { + assert!(is_migration_name("0001_initial")); + assert!(is_migration_name("0abc")); + assert!(is_migration_name("_abc")); + assert!(is_migration_name("a")); + assert!(is_migration_name("a_b_c")); + assert!(is_migration_name("abc")); + assert!(is_migration_name("abc0")); + assert!(is_migration_name("abc_")); + assert!(!is_migration_name("a-b-c")); + assert!(!is_migration_name("a_B_c")); + assert!(!is_migration_name("class")); + assert!(!is_migration_name("δ")); } }