Run first lint rules atop tree-sitter

This commit is contained in:
Charlie Marsh 2022-10-06 22:17:39 -04:00
parent c8dad90115
commit cdae6cf1f3
6 changed files with 1341 additions and 502 deletions

View File

@ -1,459 +1,23 @@
extern crate core;
use std::path::PathBuf;
use anyhow::Result;
use num_bigint::BigInt;
use num_traits::{float, Num};
use rustpython_ast::{
Arguments, Constant, Expr, ExprContext, ExprKind, Keyword, KeywordData, Location, Operator,
Stmt, StmtKind, Withitem,
};
use tree_sitter::{Node, Parser, Point};
use clap::Parser as ClapParser;
use tree_sitter::Parser;
fn to_location(point: Point) -> Location {
Location::new(point.row + 1, point.column + 1)
}
use ruff::fs;
use ruff::tree_parser::extract_module;
fn print_node(node: Node, source: &[u8]) {
let range = node.range();
let text = &source[range.start_byte..range.end_byte];
let line = range.start_point.row;
let col = range.start_point.column;
println!(
"[Line: {}, Col: {}] {}: `{}`",
line,
col,
node.kind(),
std::str::from_utf8(text).unwrap()
);
}
fn extract_module(node: Node, source: &[u8]) -> Vec<Stmt> {
let mut cursor = node.walk();
node.children(&mut cursor)
.map(|node| extract_statement(node, source))
.collect()
}
fn extract_suite(node: Node, source: &[u8]) -> Vec<Stmt> {
let mut cursor = node.walk();
node.children(&mut cursor)
.map(|node| extract_statement(node, source))
.collect()
}
fn extract_text(node: Node, source: &[u8]) -> String {
let range = node.range();
let text = &source[range.start_byte..range.end_byte];
std::str::from_utf8(text).unwrap().to_string()
}
fn extract_augmented_operator(node: Node, source: &[u8]) -> Operator {
match node.kind() {
"+=" => Operator::Add,
"-=" => Operator::Sub,
"*=" => Operator::Mult,
"@=" => Operator::MatMult,
"/=" => Operator::Div,
"%=" => Operator::Mod,
"**=" => Operator::Pow,
"<<=" => Operator::LShift,
">>=" => Operator::RShift,
"|=" => Operator::BitOr,
"^=" => Operator::BitXor,
"&=" => Operator::BitAnd,
"//=" => Operator::FloorDiv,
_ => panic!("Invalid operator: {:?}", node),
}
}
fn extract_operator(node: Node, source: &[u8]) -> Operator {
match node.kind() {
"+" => Operator::Add,
"-" => Operator::Sub,
"*" => Operator::Mult,
"@" => Operator::MatMult,
"/" => Operator::Div,
"%" => Operator::Mod,
"**" => Operator::Pow,
"<<" => Operator::LShift,
">>" => Operator::RShift,
"|" => Operator::BitOr,
"^" => Operator::BitXor,
"&" => Operator::BitAnd,
"//" => Operator::FloorDiv,
_ => panic!("Invalid operator: {:?}", node),
}
}
fn extract_arguments(node: Node, source: &[u8]) -> Arguments {
Arguments {
posonlyargs: vec![],
args: vec![],
vararg: None,
kwonlyargs: vec![],
kw_defaults: vec![],
kwarg: None,
defaults: vec![],
}
}
fn extract_with_clause(node: Node, source: &[u8]) -> Vec<Withitem> {
let mut cursor = node.walk();
for child in node.children(&mut cursor) {
print_node(child, source);
}
return vec![];
}
fn extract_statement(node: Node, source: &[u8]) -> Stmt {
match node.kind() {
"for_statement" => Stmt::new(
to_location(node.start_position()),
to_location(node.end_position()),
StmtKind::For {
target: Box::new(extract_expression(
node.child_by_field_name("left").unwrap(),
source,
)),
iter: Box::new(extract_expression(
node.child_by_field_name("right").unwrap(),
source,
)),
body: extract_suite(node.child_by_field_name("body").unwrap(), source),
// STOPSHIP(charlie): Unimplemented.
orelse: vec![],
type_comment: None,
},
),
"while_statement" => Stmt::new(
to_location(node.start_position()),
to_location(node.end_position()),
StmtKind::While {
test: Box::new(extract_expression(
node.child_by_field_name("condition").unwrap(),
source,
)),
body: extract_suite(node.child_by_field_name("body").unwrap(), source),
// STOPSHIP(charlie): Unimplemented.
orelse: vec![],
},
),
"with_statement" => Stmt::new(
to_location(node.start_position()),
to_location(node.end_position()),
StmtKind::With {
// TODO(charlie): If async, this will be 2? Also, we need to iterate until we find
// this, probably.
items: extract_with_clause(node.child(1).unwrap(), source),
body: extract_suite(node.child_by_field_name("body").unwrap(), source),
type_comment: None,
},
),
"class_definition" => {
if let Some((bases, keywords)) = node
.child_by_field_name("superclasses")
.map(|node| extract_argument_list(node, source))
{
Stmt::new(
to_location(node.start_position()),
to_location(node.end_position()),
StmtKind::ClassDef {
name: extract_text(node.child_by_field_name("name").unwrap(), source),
bases,
keywords,
body: extract_suite(node.child_by_field_name("body").unwrap(), source),
// TODO(charlie): How do I access these? Probably need to pass them down or
// recurse.
decorator_list: vec![],
},
)
} else {
Stmt::new(
to_location(node.start_position()),
to_location(node.end_position()),
StmtKind::ClassDef {
name: extract_text(node.child_by_field_name("name").unwrap(), source),
bases: vec![],
keywords: vec![],
body: extract_suite(node.child_by_field_name("body").unwrap(), source),
// TODO(charlie): How do I access these? Probably need to pass them down or
// recurse.
decorator_list: vec![],
},
)
}
}
"function_definition" => Stmt::new(
to_location(node.start_position()),
to_location(node.end_position()),
StmtKind::FunctionDef {
name: extract_text(node.child(1).unwrap(), source),
args: Box::new(extract_arguments(node.child(2).unwrap(), source)),
body: extract_suite(node.child_by_field_name("body").unwrap(), source),
decorator_list: vec![],
returns: None,
type_comment: None,
},
),
"return_statement" => Stmt::new(
to_location(node.start_position()),
to_location(node.end_position()),
StmtKind::Return {
value: node
.child(1)
.map(|node| Box::new(extract_expression(node, source))),
},
),
"pass_statement" => Stmt::new(
to_location(node.start_position()),
to_location(node.end_position()),
StmtKind::Pass,
),
"expression_statement" => {
let node = node.child(0).unwrap();
match node.kind() {
"assignment" => Stmt::new(
to_location(node.start_position()),
to_location(node.end_position()),
StmtKind::Assign {
targets: vec![],
value: Box::new(extract_expression(node.child(2).unwrap(), source)),
type_comment: None,
},
),
"augmented_assignment" => Stmt::new(
to_location(node.start_position()),
to_location(node.end_position()),
StmtKind::AugAssign {
target: Box::new(extract_expression(
node.child_by_field_name("left").unwrap(),
source,
)),
value: Box::new(extract_expression(
node.child_by_field_name("right").unwrap(),
source,
)),
op: extract_augmented_operator(
node.child_by_field_name("operator").unwrap(),
source,
),
},
),
_ => Stmt::new(
to_location(node.start_position()),
to_location(node.end_position()),
StmtKind::Expr {
value: Box::new(extract_expression(node, source)),
},
),
}
}
_ => panic!("Unhandled node: {}", node.kind()),
}
}
fn extract_expression_list(node: Node, source: &[u8]) -> Vec<Expr> {
let mut cursor = node.walk();
node.children(&mut cursor)
.filter(|node| node.kind() != "(" && node.kind() != ")" && node.kind() != ",")
.map(|node| extract_expression(node, source))
.collect()
}
fn extract_keyword_argument(node: Node, source: &[u8]) -> Keyword {
Keyword::new(
Default::default(),
Default::default(),
KeywordData {
arg: Some(extract_text(
node.child_by_field_name("name").unwrap(),
source,
)),
value: Box::new(extract_expression(
node.child_by_field_name("value").unwrap(),
source,
)),
},
)
}
fn extract_argument_list(node: Node, source: &[u8]) -> (Vec<Expr>, Vec<Keyword>) {
let mut args = vec![];
let mut keywords = vec![];
for child in node.children(&mut node.walk()) {
match child.kind() {
"keyword_argument" => {
keywords.push(extract_keyword_argument(child, source));
}
"identifier" | "integer" => {
args.push(extract_expression(child, source));
}
_ => {}
}
}
(args, keywords)
}
fn extract_expression(node: Node, source: &[u8]) -> Expr {
match node.kind() {
"integer" => Expr::new(
to_location(node.start_position()),
to_location(node.end_position()),
ExprKind::Constant {
value: Constant::Int(
BigInt::from_str_radix(&extract_text(node, source), 10).unwrap(),
),
kind: None,
},
),
"float" => Expr::new(
to_location(node.start_position()),
to_location(node.end_position()),
ExprKind::Constant {
value: Constant::Float(extract_text(node, source).parse::<f64>().unwrap()),
kind: None,
},
),
"string" => Expr::new(
to_location(node.start_position()),
to_location(node.end_position()),
ExprKind::Constant {
value: Constant::Str(extract_text(node, source)),
kind: None,
},
),
"tuple" => Expr::new(
to_location(node.start_position()),
to_location(node.end_position()),
ExprKind::Tuple {
elts: extract_expression_list(node, source),
ctx: ExprContext::Load,
},
),
"identifier" => Expr::new(
to_location(node.start_position()),
to_location(node.end_position()),
ExprKind::Name {
id: std::str::from_utf8(&source[node.range().start_byte..node.range().end_byte])
.unwrap()
.to_string(),
ctx: ExprContext::Load,
},
),
"call" => {
let argument_list =
extract_argument_list(node.child_by_field_name("arguments").unwrap(), source);
Expr::new(
to_location(node.start_position()),
to_location(node.end_position()),
ExprKind::Call {
func: Box::new(extract_expression(
node.child_by_field_name("function").unwrap(),
source,
)),
args: argument_list.0,
keywords: argument_list.1,
},
)
}
"binary_operator" => {
print_node(node, source);
Expr::new(
to_location(node.start_position()),
to_location(node.end_position()),
ExprKind::BinOp {
left: Box::new(extract_expression(
node.child_by_field_name("left").unwrap(),
source,
)),
op: extract_operator(node.child_by_field_name("operator").unwrap(), source),
right: Box::new(extract_expression(
node.child_by_field_name("right").unwrap(),
source,
)),
},
)
}
"true" => Expr::new(
to_location(node.start_position()),
to_location(node.end_position()),
ExprKind::Constant {
value: Constant::Bool(true),
kind: None,
},
),
"false" => Expr::new(
to_location(node.start_position()),
to_location(node.end_position()),
ExprKind::Constant {
value: Constant::Bool(false),
kind: None,
},
),
"ellipsis" => Expr::new(
to_location(node.start_position()),
to_location(node.end_position()),
ExprKind::Constant {
value: Constant::Ellipsis,
kind: None,
},
),
"yield" => match node.child(1) {
None => Expr::new(
to_location(node.start_position()),
to_location(node.end_position()),
ExprKind::Yield { value: None },
),
Some(node) => match node.kind() {
"from" => Expr::new(
to_location(node.start_position()),
to_location(node.end_position()),
ExprKind::YieldFrom {
value: Box::new(extract_expression(node.next_sibling().unwrap(), source)),
},
),
_ => Expr::new(
to_location(node.start_position()),
to_location(node.end_position()),
ExprKind::Yield {
value: Some(Box::new(extract_expression(node, source))),
},
),
},
},
_ => {
print_node(node, source);
panic!("Unhandled node: {}", node.kind())
}
}
#[derive(Debug, ClapParser)]
struct Cli {
#[arg(required = true)]
file: PathBuf,
}
fn main() -> Result<()> {
let src = r#"
def double(x):
# Return a double.
return x * 2
let cli = Cli::parse();
x = (double(500), double(2, z=1))
x += 1
let src = fs::read_file(&cli.file)?;
class Foo:
pass
for x in range(5):
yield x
yield from x
x = True
x = b"abc"
while True:
pass
with (
foo as bar,
baz as wop):
pass
"#;
let mut parser = Parser::new();
parser
.set_language(tree_sitter_python::language())
@ -461,11 +25,11 @@ baz as wop):
let parse_tree = parser.parse(src.as_bytes(), None);
if let Some(parse_tree) = &parse_tree {
let _ = extract_module(parse_tree.root_node(), src.as_bytes());
// println!(
// "{:#?}",
// extract_module(parse_tree.root_node(), src.as_bytes())
// );
// let _ = extract_module(parse_tree.root_node(), src.as_bytes());
println!(
"{:#?}",
extract_module(parse_tree.root_node(), src.as_bytes())
);
}
Ok(())

1
foo.py Normal file
View File

@ -0,0 +1 @@
x = call(1)

View File

@ -2,10 +2,9 @@ use std::path::Path;
use anyhow::Result;
use log::debug;
use rustpython_parser::lexer::LexResult;
use crate::autofix::fixer::Mode;
use crate::linter::{check_path, tokenize};
use crate::linter::check_path;
use crate::message::Message;
use crate::settings::{RawSettings, Settings};
@ -27,6 +26,7 @@ pub mod printer;
pub mod pyproject;
mod python;
pub mod settings;
pub mod tree_parser;
/// Run ruff over Python source code directly.
pub fn check(path: &Path, contents: &str) -> Result<Vec<Message>> {
@ -44,21 +44,8 @@ pub fn check(path: &Path, contents: &str) -> Result<Vec<Message>> {
let settings = Settings::from_raw(RawSettings::from_pyproject(&pyproject, &project_root)?);
// Tokenize once.
let tokens: Vec<LexResult> = tokenize(contents);
// Determine the noqa line for every line in the source.
let noqa_line_for = noqa::extract_noqa_line_for(&tokens);
// Generate checks.
let checks = check_path(
path,
contents,
tokens,
&noqa_line_for,
&settings,
&Mode::None,
)?;
let checks = check_path(path, contents, &[], &settings, &Mode::None)?;
// Convert to messages.
let messages: Vec<Message> = checks

View File

@ -5,6 +5,7 @@ use anyhow::Result;
use log::debug;
use rustpython_parser::lexer::LexResult;
use rustpython_parser::{lexer, parser};
use tree_sitter::Parser;
use crate::ast::types::Range;
use crate::autofix::fixer;
@ -16,7 +17,8 @@ use crate::code_gen::SourceGenerator;
use crate::message::Message;
use crate::noqa::add_noqa;
use crate::settings::Settings;
use crate::{cache, fs, noqa};
use crate::tree_parser::extract_module;
use crate::{cache, fs};
/// Collect tokens up to and including the first error.
pub(crate) fn tokenize(contents: &str) -> Vec<LexResult> {
@ -34,7 +36,6 @@ pub(crate) fn tokenize(contents: &str) -> Vec<LexResult> {
pub(crate) fn check_path(
path: &Path,
contents: &str,
tokens: Vec<LexResult>,
noqa_line_for: &[usize],
settings: &Settings,
autofix: &fixer::Mode,
@ -48,17 +49,25 @@ pub(crate) fn check_path(
.iter()
.any(|check_code| matches!(check_code.lint_source(), LintSource::AST))
{
match parser::parse_program_tokens(tokens, "<filename>") {
let src = contents.as_bytes();
let mut parser = Parser::new();
parser
.set_language(tree_sitter_python::language())
.expect("Error loading Python grammar");
let parse_tree = parser.parse(src, None).unwrap();
match extract_module(parse_tree.root_node(), src) {
Ok(python_ast) => {
checks.extend(check_ast(&python_ast, contents, settings, autofix, path))
}
Err(parse_error) => {
if settings.enabled.contains(&CheckCode::E999) {
checks.push(Check::new(
CheckKind::SyntaxError(parse_error.error.to_string()),
CheckKind::SyntaxError(parse_error.to_string()),
Range {
location: parse_error.location,
end_location: parse_error.location,
location: Default::default(),
end_location: Default::default(),
},
))
}
@ -100,14 +109,8 @@ pub fn lint_path(
// Read the file from disk.
let contents = fs::read_file(path)?;
// Tokenize once.
let tokens: Vec<LexResult> = tokenize(&contents);
// Determine the noqa line for every line in the source.
let noqa_line_for = noqa::extract_noqa_line_for(&tokens);
// Generate checks.
let mut checks = check_path(path, &contents, tokens, &noqa_line_for, settings, autofix)?;
let mut checks = check_path(path, &contents, &[], settings, autofix)?;
// Apply autofix.
if matches!(autofix, fixer::Mode::Apply) {
@ -134,23 +137,10 @@ pub fn add_noqa_to_path(path: &Path, settings: &Settings) -> Result<usize> {
// Read the file from disk.
let contents = fs::read_file(path)?;
// Tokenize once.
let tokens: Vec<LexResult> = tokenize(&contents);
// Determine the noqa line for every line in the source.
let noqa_line_for = noqa::extract_noqa_line_for(&tokens);
// Generate checks.
let checks = check_path(
path,
&contents,
tokens,
&noqa_line_for,
settings,
&fixer::Mode::None,
)?;
let checks = check_path(path, &contents, &[], settings, &fixer::Mode::None)?;
add_noqa(&checks, &contents, &noqa_line_for, path)
add_noqa(&checks, &contents, &[], path)
}
pub fn autoformat_path(path: &Path) -> Result<()> {
@ -175,14 +165,12 @@ mod tests {
use anyhow::Result;
use regex::Regex;
use rustpython_parser::lexer::LexResult;
use crate::autofix::fixer;
use crate::checks::{Check, CheckCode};
use crate::fs;
use crate::linter;
use crate::linter::tokenize;
use crate::settings;
use crate::{fs, noqa};
fn check_path(
path: &Path,
@ -190,9 +178,7 @@ mod tests {
autofix: &fixer::Mode,
) -> Result<Vec<Check>> {
let contents = fs::read_file(path)?;
let tokens: Vec<LexResult> = tokenize(&contents);
let noqa_line_for = noqa::extract_noqa_line_for(&tokens);
linter::check_path(path, &contents, tokens, &noqa_line_for, settings, autofix)
linter::check_path(path, &contents, &[], settings, autofix)
}
#[test]

View File

@ -44,6 +44,7 @@ pub fn extract_noqa_directive(line: &str) -> Directive {
}
}
#[allow(dead_code)]
pub fn extract_noqa_line_for(lxr: &[LexResult]) -> Vec<usize> {
let mut noqa_line_for: Vec<usize> = vec![];

1300
src/tree_parser.rs Normal file

File diff suppressed because it is too large Load Diff