[ty] Classify docstrings in semantic tokens (syntax highlighting) (#22031)

## Summary

* Related to, but does not handle
https://github.com/astral-sh/ty/issues/2021

## Test Plan

I also added some snapshot tests for future work on non-standard
attribute docstrings (didn't want to highlight them if we don't
recognize them elsewhere).
This commit is contained in:
Aria Desires 2025-12-19 13:36:01 -05:00 committed by GitHub
parent df1552b9a4
commit cdb7a9fb33
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 789 additions and 20 deletions

View File

@ -2956,6 +2956,295 @@ def function():
assert_snapshot!(test.hover(), @"Hover provided no content");
}
#[test]
fn hover_func_with_concat_docstring() {
let test = cursor_test(
r#"
def a<CURSOR>b():
"""wow cool docs""" """and docs"""
return
"#,
);
assert_snapshot!(test.hover(), @r#"
def ab() -> Unknown
---------------------------------------------
wow cool docsand docs
---------------------------------------------
```python
def ab() -> Unknown
```
---
wow cool docsand docs
---------------------------------------------
info[hover]: Hovered content is
--> main.py:2:5
|
2 | def ab():
| ^-
| ||
| |Cursor offset
| source
3 | """wow cool docs""" """and docs"""
4 | return
|
"#);
}
#[test]
fn hover_func_with_plus_docstring() {
let test = cursor_test(
r#"
def a<CURSOR>b():
"""wow cool docs""" + """and docs"""
return
"#,
);
assert_snapshot!(test.hover(), @r#"
def ab() -> Unknown
---------------------------------------------
```python
def ab() -> Unknown
```
---------------------------------------------
info[hover]: Hovered content is
--> main.py:2:5
|
2 | def ab():
| ^-
| ||
| |Cursor offset
| source
3 | """wow cool docs""" + """and docs"""
4 | return
|
"#);
}
#[test]
fn hover_func_with_slash_docstring() {
let test = cursor_test(
r#"
def a<CURSOR>b():
"""wow cool docs""" \
"""and docs"""
return
"#,
);
assert_snapshot!(test.hover(), @r#"
def ab() -> Unknown
---------------------------------------------
wow cool docsand docs
---------------------------------------------
```python
def ab() -> Unknown
```
---
wow cool docsand docs
---------------------------------------------
info[hover]: Hovered content is
--> main.py:2:5
|
2 | def ab():
| ^-
| ||
| |Cursor offset
| source
3 | """wow cool docs""" \
4 | """and docs"""
|
"#);
}
#[test]
fn hover_func_with_sameline_commented_docstring() {
let test = cursor_test(
r#"
def a<CURSOR>b():
"""wow cool docs""" # and a comment
"""and docs""" # that shouldn't be included
return
"#,
);
assert_snapshot!(test.hover(), @r#"
def ab() -> Unknown
---------------------------------------------
wow cool docs
---------------------------------------------
```python
def ab() -> Unknown
```
---
wow cool docs
---------------------------------------------
info[hover]: Hovered content is
--> main.py:2:5
|
2 | def ab():
| ^-
| ||
| |Cursor offset
| source
3 | """wow cool docs""" # and a comment
4 | """and docs""" # that shouldn't be included
|
"#);
}
#[test]
fn hover_func_with_nextline_commented_docstring() {
let test = cursor_test(
r#"
def a<CURSOR>b():
"""wow cool docs"""
# and a comment that shouldn't be included
"""and docs"""
return
"#,
);
assert_snapshot!(test.hover(), @r#"
def ab() -> Unknown
---------------------------------------------
wow cool docs
---------------------------------------------
```python
def ab() -> Unknown
```
---
wow cool docs
---------------------------------------------
info[hover]: Hovered content is
--> main.py:2:5
|
2 | def ab():
| ^-
| ||
| |Cursor offset
| source
3 | """wow cool docs"""
4 | # and a comment that shouldn't be included
|
"#);
}
#[test]
fn hover_func_with_parens_docstring() {
let test = cursor_test(
r#"
def a<CURSOR>b():
(
"""wow cool docs"""
"""and docs"""
)
return
"#,
);
assert_snapshot!(test.hover(), @r#"
def ab() -> Unknown
---------------------------------------------
wow cool docsand docs
---------------------------------------------
```python
def ab() -> Unknown
```
---
wow cool docsand docs
---------------------------------------------
info[hover]: Hovered content is
--> main.py:2:5
|
2 | def ab():
| ^-
| ||
| |Cursor offset
| source
3 | (
4 | """wow cool docs"""
|
"#);
}
#[test]
fn hover_func_with_nextline_commented_parens_docstring() {
let test = cursor_test(
r#"
def a<CURSOR>b():
(
"""wow cool docs"""
# and a comment that shouldn't be included
"""and docs"""
)
return
"#,
);
assert_snapshot!(test.hover(), @r#"
def ab() -> Unknown
---------------------------------------------
wow cool docsand docs
---------------------------------------------
```python
def ab() -> Unknown
```
---
wow cool docsand docs
---------------------------------------------
info[hover]: Hovered content is
--> main.py:2:5
|
2 | def ab():
| ^-
| ||
| |Cursor offset
| source
3 | (
4 | """wow cool docs"""
|
"#);
}
#[test]
fn hover_attribute_docstring_spill() {
let test = cursor_test(
r#"
if True:
a<CURSOR>b = 1
"this shouldn't be a docstring but also it doesn't matter much"
"#,
);
assert_snapshot!(test.hover(), @r#"
Literal[1]
---------------------------------------------
```python
Literal[1]
```
---------------------------------------------
info[hover]: Hovered content is
--> main.py:3:5
|
2 | if True:
3 | ab = 1
| ^-
| ||
| |Cursor offset
| source
4 | "this shouldn't be a docstring but also it doesn't matter much"
|
"#);
}
#[test]
fn hover_class_typevar_variance() {
let test = cursor_test(

View File

@ -131,6 +131,7 @@ bitflags! {
const DEFINITION = 1 << 0;
const READONLY = 1 << 1;
const ASYNC = 1 << 2;
const DOCUMENTATION = 1 << 3;
}
}
@ -143,7 +144,7 @@ impl SemanticTokenModifier {
/// highlighting. For details, refer to this LSP specification:
/// <https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#semanticTokenModifiers>
pub fn all_names() -> Vec<&'static str> {
vec!["definition", "readonly", "async"]
vec!["definition", "readonly", "async", "documentation"]
}
}
@ -189,18 +190,22 @@ pub fn semantic_tokens(db: &dyn Db, file: File, range: Option<TextRange>) -> Sem
let model = SemanticModel::new(db, file);
let mut visitor = SemanticTokenVisitor::new(&model, range);
visitor.expecting_docstring = true;
visitor.visit_body(parsed.suite());
SemanticTokens::new(visitor.tokens)
}
/// AST visitor that collects semantic tokens.
#[expect(clippy::struct_excessive_bools)]
struct SemanticTokenVisitor<'db> {
model: &'db SemanticModel<'db>,
tokens: Vec<SemanticToken>,
in_class_scope: bool,
in_type_annotation: bool,
in_target_creating_definition: bool,
in_docstring: bool,
expecting_docstring: bool,
range_filter: Option<TextRange>,
}
@ -212,7 +217,9 @@ impl<'db> SemanticTokenVisitor<'db> {
in_class_scope: false,
in_target_creating_definition: false,
in_type_annotation: false,
in_docstring: false,
range_filter,
expecting_docstring: false,
}
}
@ -601,6 +608,8 @@ impl SourceOrderVisitor<'_> for SemanticTokenVisitor<'_> {
}
fn visit_stmt(&mut self, stmt: &Stmt) {
let expecting_docstring = self.expecting_docstring;
self.expecting_docstring = false;
match stmt {
ast::Stmt::FunctionDef(func) => {
// Visit decorator expressions
@ -642,7 +651,9 @@ impl SourceOrderVisitor<'_> for SemanticTokenVisitor<'_> {
let prev_in_class = self.in_class_scope;
self.in_class_scope = false;
self.expecting_docstring = true;
self.visit_body(&func.body);
self.expecting_docstring = false;
self.in_class_scope = prev_in_class;
}
ast::Stmt::ClassDef(class) => {
@ -672,7 +683,9 @@ impl SourceOrderVisitor<'_> for SemanticTokenVisitor<'_> {
let prev_in_class = self.in_class_scope;
self.in_class_scope = true;
self.expecting_docstring = true;
self.visit_body(&class.body);
self.expecting_docstring = false;
self.in_class_scope = prev_in_class;
}
ast::Stmt::TypeAlias(type_alias) => {
@ -754,6 +767,7 @@ impl SourceOrderVisitor<'_> for SemanticTokenVisitor<'_> {
self.in_target_creating_definition = false;
self.visit_expr(&assignment.value);
self.expecting_docstring = true;
}
ast::Stmt::AnnAssign(assignment) => {
self.in_target_creating_definition = true;
@ -765,6 +779,7 @@ impl SourceOrderVisitor<'_> for SemanticTokenVisitor<'_> {
if let Some(value) = &assignment.value {
self.visit_expr(value);
}
self.expecting_docstring = true;
}
ast::Stmt::For(for_stmt) => {
self.in_target_creating_definition = true;
@ -809,7 +824,13 @@ impl SourceOrderVisitor<'_> for SemanticTokenVisitor<'_> {
self.visit_body(&try_stmt.orelse);
self.visit_body(&try_stmt.finalbody);
}
ast::Stmt::Expr(expr) => {
if expecting_docstring && expr.value.is_string_literal_expr() {
self.in_docstring = true;
}
walk_stmt(self, stmt);
self.in_docstring = false;
}
_ => {
// For all other statement types, let the default visitor handle them
walk_stmt(self, stmt);
@ -903,11 +924,12 @@ impl SourceOrderVisitor<'_> for SemanticTokenVisitor<'_> {
fn visit_string_literal(&mut self, string_literal: &StringLiteral) {
// Emit a semantic token for this string literal part
self.add_token(
string_literal.range(),
SemanticTokenType::String,
SemanticTokenModifier::empty(),
);
let modifiers = if self.in_docstring {
SemanticTokenModifier::DOCUMENTATION
} else {
SemanticTokenModifier::empty()
};
self.add_token(string_literal.range(), SemanticTokenType::String, modifiers);
}
fn visit_bytes_literal(&mut self, bytes_literal: &BytesLiteral) {
@ -1852,6 +1874,456 @@ y: Optional[str] = None
"#);
}
#[test]
fn function_docstring_classification() {
let test = SemanticTokenTest::new(
r#"
def my_function(param1: int, param2: str) -> bool:
"""Example function
Args:
param1: The first parameter.
param2: The second parameter.
Returns:
The return value. True for success, False otherwise.
"""
x = "hello"
def other_func(): pass
"""unrelated string"""
return False
"#,
);
let tokens = test.highlight_file();
assert_snapshot!(test.to_snapshot(&tokens), @r#"
"my_function" @ 5..16: Function [definition]
"param1" @ 17..23: Parameter [definition]
"int" @ 25..28: Class
"param2" @ 30..36: Parameter [definition]
"str" @ 38..41: Class
"bool" @ 46..50: Class
"\"\"\"Example function\n\n Args:\n param1: The first parameter.\n param2: The second parameter.\n\n Returns:\n The return value. True for success, False otherwise.\n\n \"\"\"" @ 56..245: String [documentation]
"x" @ 251..252: Variable [definition]
"\"hello\"" @ 255..262: String
"other_func" @ 271..281: Function [definition]
"\"\"\"unrelated string\"\"\"" @ 295..317: String
"False" @ 330..335: BuiltinConstant
"#);
}
#[test]
fn class_docstring_classification() {
let test = SemanticTokenTest::new(
r#"
class MyClass:
"""Example class
What a good class wowwee
"""
def __init__(self): pass
"""unrelated string"""
x: str = "hello"
"#,
);
let tokens = test.highlight_file();
assert_snapshot!(test.to_snapshot(&tokens), @r#"
"MyClass" @ 7..14: Class [definition]
"\"\"\"Example class\n\n What a good class wowwee\n \"\"\"" @ 20..74: String [documentation]
"__init__" @ 84..92: Method [definition]
"self" @ 93..97: SelfParameter [definition]
"\"\"\"unrelated string\"\"\"" @ 110..132: String
"x" @ 138..139: Variable [definition]
"str" @ 141..144: Class
"\"hello\"" @ 147..154: String
"#);
}
#[test]
fn module_docstring_classification() {
let test = SemanticTokenTest::new(
r#"
"""Example module
What a good module wooo
"""
def my_func(): pass
"""unrelated string"""
x: str = "hello"
"#,
);
let tokens = test.highlight_file();
assert_snapshot!(test.to_snapshot(&tokens), @r#"
"\"\"\"Example module\n\nWhat a good module wooo\n\"\"\"" @ 1..47: String [documentation]
"my_func" @ 53..60: Function [definition]
"\"\"\"unrelated string\"\"\"" @ 70..92: String
"x" @ 94..95: Variable [definition]
"str" @ 97..100: Class
"\"hello\"" @ 103..110: String
"#);
}
#[test]
fn attribute_docstring_classification() {
let test = SemanticTokenTest::new(
r#"
important_value: str = "wow"
"""This is the most important value
Don't trust the other guy
"""
x = "unrelated string"
other_value: int = 2
"""This is such an import value omg
Trust me
"""
"#,
);
let tokens = test.highlight_file();
assert_snapshot!(test.to_snapshot(&tokens), @r#"
"important_value" @ 1..16: Variable [definition]
"str" @ 18..21: Class
"\"wow\"" @ 24..29: String
"\"\"\"This is the most important value\n\nDon't trust the other guy\n\"\"\"" @ 30..96: String [documentation]
"x" @ 98..99: Variable [definition]
"\"unrelated string\"" @ 102..120: String
"other_value" @ 122..133: Variable [definition]
"int" @ 135..138: Class
"2" @ 141..142: Number
"\"\"\"This is such an import value omg\n\nTrust me\n\"\"\"" @ 143..192: String [documentation]
"#);
}
#[test]
fn attribute_docstring_classification_spill() {
let test = SemanticTokenTest::new(
r#"
if True:
x = 1
"this shouldn't be a docstring but also it doesn't matter much"
"""
"#,
);
let tokens = test.highlight_file();
assert_snapshot!(test.to_snapshot(&tokens), @r#"
"True" @ 4..8: BuiltinConstant
"x" @ 14..15: Variable [definition]
"1" @ 18..19: Number
"\"this shouldn't be a docstring but also it doesn't matter much\"" @ 20..83: String [documentation]
"\"\"\"\n" @ 84..88: String
"#);
}
#[test]
fn docstring_classification_concat() {
let test = SemanticTokenTest::new(
r#"
class MyClass:
"""wow cool docs""" """and docs"""
def my_func():
"""wow cool docs""" """and docs"""
x = 1
"""wow cool docs""" """and docs"""
"#,
);
let tokens = test.highlight_file();
assert_snapshot!(test.to_snapshot(&tokens), @r#"
"MyClass" @ 7..14: Class [definition]
"\"\"\"wow cool docs\"\"\"" @ 20..39: String [documentation]
"\"\"\"and docs\"\"\"" @ 40..54: String [documentation]
"my_func" @ 60..67: Function [definition]
"\"\"\"wow cool docs\"\"\"" @ 75..94: String [documentation]
"\"\"\"and docs\"\"\"" @ 95..109: String [documentation]
"x" @ 111..112: Variable [definition]
"1" @ 115..116: Number
"\"\"\"wow cool docs\"\"\"" @ 117..136: String [documentation]
"\"\"\"and docs\"\"\"" @ 137..151: String [documentation]
"#);
}
#[test]
fn docstring_classification_concat_parens() {
let test = SemanticTokenTest::new(
r#"
class MyClass:
(
"""wow cool docs"""
"""and docs"""
)
def my_func():
(
"""wow cool docs"""
"""and docs"""
)
x = 1
(
"""wow cool docs"""
"""and docs"""
)
"#,
);
let tokens = test.highlight_file();
assert_snapshot!(test.to_snapshot(&tokens), @r#"
"MyClass" @ 7..14: Class [definition]
"\"\"\"wow cool docs\"\"\"" @ 30..49: String [documentation]
"\"\"\"and docs\"\"\"" @ 58..72: String [documentation]
"my_func" @ 84..91: Function [definition]
"\"\"\"wow cool docs\"\"\"" @ 109..128: String [documentation]
"\"\"\"and docs\"\"\"" @ 137..151: String [documentation]
"x" @ 159..160: Variable [definition]
"1" @ 163..164: Number
"\"\"\"wow cool docs\"\"\"" @ 171..190: String [documentation]
"\"\"\"and docs\"\"\"" @ 195..209: String [documentation]
"#);
}
#[test]
fn docstring_classification_concat_parens_commented_nextline() {
let test = SemanticTokenTest::new(
r#"
class MyClass:
(
"""wow cool docs"""
# and a comment that shouldn't be included
"""and docs"""
)
def my_func():
(
"""wow cool docs"""
# and a comment that shouldn't be included
"""and docs"""
)
x = 1
(
"""wow cool docs"""
# and a comment that shouldn't be included
"""and docs"""
)
"#,
);
let tokens = test.highlight_file();
assert_snapshot!(test.to_snapshot(&tokens), @r#"
"MyClass" @ 7..14: Class [definition]
"\"\"\"wow cool docs\"\"\"" @ 30..49: String [documentation]
"\"\"\"and docs\"\"\"" @ 109..123: String [documentation]
"my_func" @ 135..142: Function [definition]
"\"\"\"wow cool docs\"\"\"" @ 160..179: String [documentation]
"\"\"\"and docs\"\"\"" @ 239..253: String [documentation]
"x" @ 261..262: Variable [definition]
"1" @ 265..266: Number
"\"\"\"wow cool docs\"\"\"" @ 273..292: String [documentation]
"\"\"\"and docs\"\"\"" @ 344..358: String [documentation]
"#);
}
#[test]
fn docstring_classification_concat_commented_nextline() {
let test = SemanticTokenTest::new(
r#"
class MyClass:
"""wow cool docs"""
# and a comment that shouldn't be included
"""and docs"""
def my_func():
"""wow cool docs"""
# and a comment that shouldn't be included
"""and docs"""
x = 1
"""wow cool docs"""
# and a comment that shouldn't be included
"""and docs"""
"#,
);
let tokens = test.highlight_file();
assert_snapshot!(test.to_snapshot(&tokens), @r#"
"MyClass" @ 7..14: Class [definition]
"\"\"\"wow cool docs\"\"\"" @ 20..39: String [documentation]
"\"\"\"and docs\"\"\"" @ 91..105: String
"my_func" @ 111..118: Function [definition]
"\"\"\"wow cool docs\"\"\"" @ 126..145: String [documentation]
"\"\"\"and docs\"\"\"" @ 197..211: String
"x" @ 213..214: Variable [definition]
"1" @ 217..218: Number
"\"\"\"wow cool docs\"\"\"" @ 219..238: String [documentation]
"\"\"\"and docs\"\"\"" @ 282..296: String
"#);
}
#[test]
fn docstring_classification_concat_commented_sameline() {
let test = SemanticTokenTest::new(
r#"
class MyClass:
"""wow cool docs""" # and a comment
"""and docs""" # that shouldn't be included
def my_func():
"""wow cool docs""" # and a comment
"""and docs""" # that shouldn't be included
x = 1
"""wow cool docs""" # and a comment
"""and docs""" # that shouldn't be included
"#,
);
let tokens = test.highlight_file();
assert_snapshot!(test.to_snapshot(&tokens), @r#"
"MyClass" @ 7..14: Class [definition]
"\"\"\"wow cool docs\"\"\"" @ 20..39: String [documentation]
"\"\"\"and docs\"\"\"" @ 60..74: String
"my_func" @ 114..121: Function [definition]
"\"\"\"wow cool docs\"\"\"" @ 129..148: String [documentation]
"\"\"\"and docs\"\"\"" @ 169..183: String
"x" @ 219..220: Variable [definition]
"1" @ 223..224: Number
"\"\"\"wow cool docs\"\"\"" @ 225..244: String [documentation]
"\"\"\"and docs\"\"\"" @ 261..275: String
"#);
}
#[test]
fn docstring_classification_concat_slashed() {
let test = SemanticTokenTest::new(
r#"
class MyClass:
"""wow cool docs""" \
"""and docs"""
def my_func():
"""wow cool docs""" \
"""and docs"""
x = 1
"""wow cool docs""" \
"""and docs"""
"#,
);
let tokens = test.highlight_file();
assert_snapshot!(test.to_snapshot(&tokens), @r#"
"MyClass" @ 7..14: Class [definition]
"\"\"\"wow cool docs\"\"\"" @ 20..39: String [documentation]
"\"\"\"and docs\"\"\"" @ 46..60: String [documentation]
"my_func" @ 66..73: Function [definition]
"\"\"\"wow cool docs\"\"\"" @ 81..100: String [documentation]
"\"\"\"and docs\"\"\"" @ 107..121: String [documentation]
"x" @ 123..124: Variable [definition]
"1" @ 127..128: Number
"\"\"\"wow cool docs\"\"\"" @ 129..148: String [documentation]
"\"\"\"and docs\"\"\"" @ 151..165: String [documentation]
"#);
}
#[test]
fn docstring_classification_plus() {
let test = SemanticTokenTest::new(
r#"
class MyClass:
"wow cool docs" + "and docs"
def my_func():
"wow cool docs" + "and docs"
x = 1
"wow cool docs" + "and docs"
"#,
);
let tokens = test.highlight_file();
assert_snapshot!(test.to_snapshot(&tokens), @r#"
"MyClass" @ 7..14: Class [definition]
"\"wow cool docs\"" @ 20..35: String
"\"and docs\"" @ 38..48: String
"my_func" @ 54..61: Function [definition]
"\"wow cool docs\"" @ 69..84: String
"\"and docs\"" @ 87..97: String
"x" @ 99..100: Variable [definition]
"1" @ 103..104: Number
"\"wow cool docs\"" @ 105..120: String
"\"and docs\"" @ 123..133: String
"#);
}
#[test]
fn class_attribute_docstring_classification() {
let test = SemanticTokenTest::new(
r#"
class MyClass:
important_value: str = "wow"
"""This is the most important value
Don't trust the other guy
"""
x = "unrelated string"
other_value: int = 2
"""This is such an import value omg
Trust me
"""
"#,
);
let tokens = test.highlight_file();
assert_snapshot!(test.to_snapshot(&tokens), @r#"
"MyClass" @ 7..14: Class [definition]
"important_value" @ 20..35: Variable [definition]
"str" @ 37..40: Class
"\"wow\"" @ 43..48: String
"\"\"\"This is the most important value\n\n Don't trust the other guy\n \"\"\"" @ 53..127: String [documentation]
"x" @ 133..134: Variable [definition]
"\"unrelated string\"" @ 137..155: String
"other_value" @ 161..172: Variable [definition]
"int" @ 174..177: Class
"2" @ 180..181: Number
"\"\"\"This is such an import value omg\n\n Trust me\n \"\"\"" @ 186..243: String [documentation]
"#);
}
#[test]
fn test_debug_int_classification() {
let test = SemanticTokenTest::new(
@ -2856,6 +3328,12 @@ def foo(self, **key, value=10):
if token.modifiers.contains(SemanticTokenModifier::ASYNC) {
mods.push("async");
}
if token
.modifiers
.contains(SemanticTokenModifier::DOCUMENTATION)
{
mods.push("documentation");
}
format!(" [{}]", mods.join(", "))
};

View File

@ -80,7 +80,8 @@ expression: initialization_result
"tokenModifiers": [
"definition",
"readonly",
"async"
"async",
"documentation"
]
},
"range": true,

View File

@ -80,7 +80,8 @@ expression: initialization_result
"tokenModifiers": [
"definition",
"readonly",
"async"
"async",
"documentation"
]
},
"range": true,

View File

@ -38,51 +38,51 @@ expression: tokens
4,
51,
10,
0,
8,
1,
0,
1,
10,
0,
8,
1,
0,
10,
10,
0,
8,
1,
0,
37,
10,
0,
8,
1,
0,
38,
10,
0,
8,
1,
0,
1,
10,
0,
8,
1,
0,
13,
10,
0,
8,
1,
0,
61,
10,
0,
8,
1,
0,
1,
10,
0,
8,
1,
0,
7,
10,
0
8
]
}