Add a dedicated token indexer for continuations and comments (#1886)

The primary motivation is that we can now robustly detect `\` continuations due to the addition of `Tok::NonLogicalNewline`. This PR generalizes the approach we took to comments (track all lines that contain any comments), and applies it to continuations too.
This commit is contained in:
Charlie Marsh
2023-01-15 01:57:31 -05:00
committed by GitHub
parent 2c644619e0
commit 3791ca721a
40 changed files with 259 additions and 114 deletions

116
src/source_code/indexer.rs Normal file
View File

@@ -0,0 +1,116 @@
//! Struct used to index source code, to enable efficient lookup of tokens that
//! are omitted from the AST (e.g., commented lines).
use rustpython_ast::Location;
use rustpython_parser::lexer::{LexResult, Tok};
pub struct Indexer {
commented_lines: Vec<usize>,
continuation_lines: Vec<usize>,
}
impl Indexer {
pub fn commented_lines(&self) -> &[usize] {
&self.commented_lines
}
pub fn continuation_lines(&self) -> &[usize] {
&self.continuation_lines
}
}
impl From<&[LexResult]> for Indexer {
fn from(lxr: &[LexResult]) -> Self {
let mut commented_lines = Vec::new();
let mut continuation_lines = Vec::new();
let mut prev: Option<(&Location, &Tok, &Location)> = None;
for (start, tok, end) in lxr.iter().flatten() {
if matches!(tok, Tok::Comment(_)) {
commented_lines.push(start.row());
}
if let Some((.., prev_tok, prev_end)) = prev {
if !matches!(
prev_tok,
Tok::Newline | Tok::NonLogicalNewline | Tok::Comment(..)
) {
for line in prev_end.row()..start.row() {
continuation_lines.push(line);
}
}
}
prev = Some((start, tok, end));
}
Self {
commented_lines,
continuation_lines,
}
}
}
#[cfg(test)]
mod tests {
use rustpython_parser::lexer;
use rustpython_parser::lexer::LexResult;
use crate::source_code::Indexer;
#[test]
fn continuation() {
let contents = r#"x = 1"#;
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.continuation_lines(), Vec::<usize>::new().as_slice());
let contents = r#"
# Hello, world!
x = 1
y = 2
"#
.trim();
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.continuation_lines(), Vec::<usize>::new().as_slice());
let contents = r#"
x = \
1
if True:
z = \
\
2
(
"abc" # Foo
"def" \
"ghi"
)
"#
.trim();
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.continuation_lines(), [1, 5, 6, 11]);
let contents = r#"
x = 1; import sys
import os
if True:
x = 1; import sys
import os
if True:
x = 1; \
import os
x = 1; \
import os
"#
.trim();
let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
let indexer: Indexer = lxr.as_slice().into();
assert_eq!(indexer.continuation_lines(), [9, 12]);
}
}

View File

@@ -1,8 +1,10 @@
mod generator;
mod indexer;
mod locator;
mod stylist;
pub(crate) use generator::Generator;
pub(crate) use indexer::Indexer;
pub(crate) use locator::Locator;
use rustpython_parser::error::ParseError;
use rustpython_parser::parser;