From f79c980e17bf1ef6064a589dfc5e6b1c288e860f Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Fri, 10 May 2024 20:27:56 +0530 Subject: [PATCH] Add support for attribute docstring in the semantic model (#11315) ## Summary This PR adds updates the semantic model to detect attribute docstring. Refer to [PEP 258](https://peps.python.org/pep-0258/#attribute-docstrings) for the definition of an attribute docstring. This PR doesn't add full support for it but only considers string literals as attribute docstring for the following cases: 1. A string literal following an assignment statement in the **global scope**. 2. A global class attribute For an assignment statement, it's considered an attribute docstring only if the target expression is a name expression (`x = 1`). So, chained assignment, multiple assignment or unpacking, and starred expression, which are all valid in the target position, aren't considered here. In `__init__` method, an assignment to the `self` variable like `self.x = 1` is also a candidate for an attribute docstring. **This PR does not support this position.** ## Test Plan I used the following source code along with a print statement to verify that the attribute docstring detection is correct. Refer to the PR description for the code snippet. I'll add this in the follow-up PR (https://github.com/astral-sh/ruff/pull/11302) which uses this method. --- crates/ruff_linter/src/checkers/ast/mod.rs | 107 ++++++++++++++++-- .../rules/avoidable_escaped_quote.rs | 2 +- .../rules/check_string_quotes.rs | 2 +- .../rules/unnecessary_escaped_quote.rs | 2 +- .../ruff/rules/ambiguous_unicode_character.rs | 2 +- crates/ruff_python_semantic/src/model.rs | 45 +++++++- 6 files changed, 141 insertions(+), 19 deletions(-) diff --git a/crates/ruff_linter/src/checkers/ast/mod.rs b/crates/ruff_linter/src/checkers/ast/mod.rs index f9876b421a..391012b0f1 100644 --- a/crates/ruff_linter/src/checkers/ast/mod.rs +++ b/crates/ruff_linter/src/checkers/ast/mod.rs @@ -73,7 +73,7 @@ mod annotation; mod deferred; /// State representing whether a docstring is expected or not for the next statement. -#[derive(Default, Debug, Copy, Clone, PartialEq)] +#[derive(Debug, Copy, Clone, PartialEq)] enum DocstringState { /// The next statement is expected to be a docstring, but not necessarily so. /// @@ -92,15 +92,84 @@ enum DocstringState { /// For `Foo`, the state is expected when the checker is visiting the class /// body but isn't going to be present. While, for `bar` function, the docstring /// is expected and present. - #[default] - Expected, + Expected(ExpectedDocstringKind), Other, } +impl Default for DocstringState { + /// Returns the default docstring state which is to expect a module-level docstring. + fn default() -> Self { + Self::Expected(ExpectedDocstringKind::Module) + } +} + impl DocstringState { - /// Returns `true` if the next statement is expected to be a docstring. - const fn is_expected(self) -> bool { - matches!(self, DocstringState::Expected) + /// Returns the docstring kind if the state is expecting a docstring. + const fn expected_kind(self) -> Option { + match self { + DocstringState::Expected(kind) => Some(kind), + DocstringState::Other => None, + } + } +} + +/// The kind of an expected docstring. +#[derive(Debug, Copy, Clone, PartialEq)] +enum ExpectedDocstringKind { + /// A module-level docstring. + /// + /// For example, + /// ```python + /// """This is a module-level docstring.""" + /// + /// a = 1 + /// ``` + Module, + + /// A class-level docstring. + /// + /// For example, + /// ```python + /// class Foo: + /// """This is the docstring for `Foo` class.""" + /// + /// def __init__(self) -> None: + /// ... + /// ``` + Class, + + /// A function-level docstring. + /// + /// For example, + /// ```python + /// def foo(): + /// """This is the docstring for `foo` function.""" + /// pass + /// ``` + Function, + + /// An attribute-level docstring. + /// + /// For example, + /// ```python + /// a = 1 + /// """This is the docstring for `a` variable.""" + /// + /// + /// class Foo: + /// b = 1 + /// """This is the docstring for `Foo.b` class variable.""" + /// ``` + Attribute, +} + +impl ExpectedDocstringKind { + /// Returns the semantic model flag that represents the current docstring state. + const fn as_flag(self) -> SemanticModelFlags { + match self { + ExpectedDocstringKind::Attribute => SemanticModelFlags::ATTRIBUTE_DOCSTRING, + _ => SemanticModelFlags::PEP_257_DOCSTRING, + } } } @@ -383,9 +452,9 @@ impl<'a> Visitor<'a> for Checker<'a> { // Update the semantic model if it is in a docstring. This should be done after the // flags snapshot to ensure that it gets reset once the statement is analyzed. - if self.docstring_state.is_expected() { + if let Some(kind) = self.docstring_state.expected_kind() { if is_docstring_stmt(stmt) { - self.semantic.flags |= SemanticModelFlags::DOCSTRING; + self.semantic.flags |= kind.as_flag(); } // Reset the state irrespective of whether the statement is a docstring or not. self.docstring_state = DocstringState::Other; @@ -709,7 +778,7 @@ impl<'a> Visitor<'a> for Checker<'a> { } // Set the docstring state before visiting the class body. - self.docstring_state = DocstringState::Expected; + self.docstring_state = DocstringState::Expected(ExpectedDocstringKind::Class); self.visit_body(body); let scope_id = self.semantic.scope_id; @@ -874,6 +943,24 @@ impl<'a> Visitor<'a> for Checker<'a> { _ => visitor::walk_stmt(self, stmt), }; + if self.semantic().at_top_level() || self.semantic().current_scope().kind.is_class() { + match stmt { + Stmt::Assign(ast::StmtAssign { targets, .. }) => { + if let [Expr::Name(_)] = targets.as_slice() { + self.docstring_state = + DocstringState::Expected(ExpectedDocstringKind::Attribute); + } + } + Stmt::AnnAssign(ast::StmtAnnAssign { target, .. }) => { + if target.is_name_expr() { + self.docstring_state = + DocstringState::Expected(ExpectedDocstringKind::Attribute); + } + } + _ => {} + } + } + // Step 3: Clean-up // Step 4: Analysis @@ -2122,7 +2209,7 @@ impl<'a> Checker<'a> { self.visit_parameters(parameters); // Set the docstring state before visiting the function body. - self.docstring_state = DocstringState::Expected; + self.docstring_state = DocstringState::Expected(ExpectedDocstringKind::Function); self.visit_body(body); } } diff --git a/crates/ruff_linter/src/rules/flake8_quotes/rules/avoidable_escaped_quote.rs b/crates/ruff_linter/src/rules/flake8_quotes/rules/avoidable_escaped_quote.rs index 560c1734c3..d2bcca756c 100644 --- a/crates/ruff_linter/src/rules/flake8_quotes/rules/avoidable_escaped_quote.rs +++ b/crates/ruff_linter/src/rules/flake8_quotes/rules/avoidable_escaped_quote.rs @@ -52,7 +52,7 @@ impl AlwaysFixableViolation for AvoidableEscapedQuote { /// Q003 pub(crate) fn avoidable_escaped_quote(checker: &mut Checker, string_like: StringLike) { - if checker.semantic().in_docstring() + if checker.semantic().in_pep_257_docstring() || checker.semantic().in_string_type_definition() // This rule has support for strings nested inside another f-strings but they're checked // via the outermost f-string. This means that we shouldn't be checking any nested string diff --git a/crates/ruff_linter/src/rules/flake8_quotes/rules/check_string_quotes.rs b/crates/ruff_linter/src/rules/flake8_quotes/rules/check_string_quotes.rs index 7c01db8c5a..5ccffe6316 100644 --- a/crates/ruff_linter/src/rules/flake8_quotes/rules/check_string_quotes.rs +++ b/crates/ruff_linter/src/rules/flake8_quotes/rules/check_string_quotes.rs @@ -460,7 +460,7 @@ pub(crate) fn check_string_quotes(checker: &mut Checker, string_like: StringLike StringLike::FString(node) => node.value.iter().map(Ranged::range).collect(), }; - if checker.semantic().in_docstring() { + if checker.semantic().in_pep_257_docstring() { if checker.enabled(Rule::BadQuotesDocstring) { for range in ranges { docstring(checker, range); diff --git a/crates/ruff_linter/src/rules/flake8_quotes/rules/unnecessary_escaped_quote.rs b/crates/ruff_linter/src/rules/flake8_quotes/rules/unnecessary_escaped_quote.rs index 251e196287..01a3c79f20 100644 --- a/crates/ruff_linter/src/rules/flake8_quotes/rules/unnecessary_escaped_quote.rs +++ b/crates/ruff_linter/src/rules/flake8_quotes/rules/unnecessary_escaped_quote.rs @@ -47,7 +47,7 @@ impl AlwaysFixableViolation for UnnecessaryEscapedQuote { /// Q004 pub(crate) fn unnecessary_escaped_quote(checker: &mut Checker, string_like: StringLike) { - if checker.semantic().in_docstring() { + if checker.semantic().in_pep_257_docstring() { return; } diff --git a/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs b/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs index 85f73f3db9..77c828848f 100644 --- a/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs +++ b/crates/ruff_linter/src/rules/ruff/rules/ambiguous_unicode_character.rs @@ -186,7 +186,7 @@ pub(crate) fn ambiguous_unicode_character_comment( /// RUF001, RUF002 pub(crate) fn ambiguous_unicode_character_string(checker: &mut Checker, string_like: StringLike) { - let context = if checker.semantic().in_docstring() { + let context = if checker.semantic().in_pep_257_docstring() { Context::Docstring } else { Context::String diff --git a/crates/ruff_python_semantic/src/model.rs b/crates/ruff_python_semantic/src/model.rs index 2b6bfdc64b..5bfa87821c 100644 --- a/crates/ruff_python_semantic/src/model.rs +++ b/crates/ruff_python_semantic/src/model.rs @@ -1643,9 +1643,17 @@ impl<'a> SemanticModel<'a> { .intersects(SemanticModelFlags::TYPE_CHECKING_BLOCK) } - /// Return `true` if the model is in a docstring. - pub const fn in_docstring(&self) -> bool { - self.flags.intersects(SemanticModelFlags::DOCSTRING) + /// Return `true` if the model is in a docstring as described in [PEP 257]. + /// + /// [PEP 257]: https://peps.python.org/pep-0257/#what-is-a-docstring + pub const fn in_pep_257_docstring(&self) -> bool { + self.flags.intersects(SemanticModelFlags::PEP_257_DOCSTRING) + } + + /// Return `true` if the model is in an attribute docstring. + pub const fn in_attribute_docstring(&self) -> bool { + self.flags + .intersects(SemanticModelFlags::ATTRIBUTE_DOCSTRING) } /// Return `true` if the model has traversed past the "top-of-file" import boundary. @@ -2082,7 +2090,7 @@ bitflags! { /// ``` const COMPREHENSION_ASSIGNMENT = 1 << 20; - /// The model is in a module / class / function docstring. + /// The model is in a docstring as described in [PEP 257]. /// /// For example, the model could be visiting either the module, class, /// or function docstring in: @@ -2099,7 +2107,9 @@ bitflags! { /// """Function docstring.""" /// pass /// ``` - const DOCSTRING = 1 << 21; + /// + /// [PEP 257]: https://peps.python.org/pep-0257/#what-is-a-docstring + const PEP_257_DOCSTRING = 1 << 21; /// The model is visiting the r.h.s. of a module-level `__all__` definition. /// @@ -2136,6 +2146,31 @@ bitflags! { /// while traversing the AST. (This only happens in stub files.) const DEFERRED_CLASS_BASE = 1 << 25; + /// The model is in an attribute docstring. + /// + /// An attribute docstring is a string literal immediately following an assignment or an + /// annotated assignment statement. The context in which this is valid are: + /// 1. At the top level of a module + /// 2. At the top level of a class definition i.e., a class attribute + /// + /// For example: + /// ```python + /// a = 1 + /// """This is an attribute docstring for `a` variable""" + /// + /// + /// class Foo: + /// b = 1 + /// """This is an attribute docstring for `Foo.b` class variable""" + /// ``` + /// + /// Unlike other kinds of docstrings as described in [PEP 257], attribute docstrings are + /// discarded at runtime. However, they are used by some documentation renderers and + /// static-analysis tools. + /// + /// [PEP 257]: https://peps.python.org/pep-0257/#what-is-a-docstring + const ATTRIBUTE_DOCSTRING = 1 << 26; + /// The context is in any type annotation. const ANNOTATION = Self::TYPING_ONLY_ANNOTATION.bits() | Self::RUNTIME_EVALUATED_ANNOTATION.bits() | Self::RUNTIME_REQUIRED_ANNOTATION.bits();