Try out tree-sitter

This commit is contained in:
Charlie Marsh 2022-10-01 16:36:11 -04:00
parent 6a1d7d8a1c
commit 40ab5d353b
6 changed files with 191 additions and 0 deletions

3
.gitmodules vendored Normal file
View File

@ -0,0 +1,3 @@
[submodule "tree-sitter-python"]
path = tree-sitter-python
url = https://github.com/tree-sitter/tree-sitter-python.git

23
Cargo.lock generated
View File

@ -1912,6 +1912,7 @@ dependencies = [
"anyhow",
"bincode",
"cacache",
"cc",
"chrono",
"clap",
"clearscreen",
@ -1939,6 +1940,8 @@ dependencies = [
"strum",
"strum_macros",
"toml",
"tree-sitter",
"tree-sitter-python",
"update-informer",
"walkdir",
]
@ -2409,6 +2412,26 @@ dependencies = [
"serde",
]
[[package]]
name = "tree-sitter"
version = "0.20.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d4423c784fe11398ca91e505cdc71356b07b1a924fc8735cfab5333afe3e18bc"
dependencies = [
"cc",
"regex",
]
[[package]]
name = "tree-sitter-python"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dda114f58048f5059dcf158aff691dffb8e113e6d2b50d94263fd68711975287"
dependencies = [
"cc",
"tree-sitter",
]
[[package]]
name = "twox-hash"
version = "1.6.3"

View File

@ -6,10 +6,14 @@ edition = "2021"
[lib]
name = "ruff"
[build-dependencies]
cc="*"
[dependencies]
anyhow = { version = "1.0.60" }
bincode = { version = "1.3.3" }
cacache = { version = "10.0.1" }
cc = "*"
chrono = { version = "0.4.21" }
clap = { version = "4.0.1", features = ["derive"] }
clearscreen = { version = "1.0.10" }
@ -33,6 +37,8 @@ rustpython-common = { git = "https://github.com/charliermarsh/RustPython.git", r
serde = { version = "1.0.143", features = ["derive"] }
serde_json = { version = "1.0.83" }
toml = { version = "0.5.9" }
tree-sitter = "0.20.9"
tree-sitter-python = "0.20.2"
update-informer = { version = "0.5.0", default_features = false, features = ["pypi"], optional = true }
walkdir = { version = "2.3.2" }
strum = { version = "0.24.1", features = ["strum_macros"] }

28
build.rs Normal file
View File

@ -0,0 +1,28 @@
use std::path::Path;
extern crate cc;
fn main() {
let src_dir = Path::new("tree-sitter-python").join("src");
let mut c_config = cc::Build::new();
c_config.include(&src_dir);
c_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable")
.flag_if_supported("-Wno-trigraphs");
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
c_config.compile("parser");
let mut cpp_config = cc::Build::new();
cpp_config.cpp(true);
cpp_config.include(&src_dir);
cpp_config
.flag_if_supported("-Wno-unused-parameter")
.flag_if_supported("-Wno-unused-but-set-variable");
let scanner_path = src_dir.join("scanner.cc");
cpp_config.file(&scanner_path);
println!("cargo:rerun-if-changed={}", scanner_path.to_str().unwrap());
cpp_config.compile("scanner");
}

130
examples/parse_python.rs Normal file
View File

@ -0,0 +1,130 @@
use anyhow::Result;
use tree_sitter::{Parser, Query, QueryCursor};
enum Action {
Up,
Down,
Right,
}
fn main() -> Result<()> {
let src = r#"
def double(x):
# Return a double.
return x * 2
x = double(1)
y = (f"{x}" "b")
"#;
let mut parser = Parser::new();
parser
.set_language(tree_sitter_python::language())
.expect("Error loading Python grammar");
let parse_tree = parser.parse(src, None);
if let Some(parse_tree) = &parse_tree {
// Check for comments.
let query = Query::new(tree_sitter_python::language(), "(comment) @capture")?;
let mut query_cursor = QueryCursor::new();
let all_matches = query_cursor.matches(&query, parse_tree.root_node(), src.as_bytes());
for each_match in all_matches {
for capture in each_match.captures.iter() {
let range = capture.node.range();
let text = &src[range.start_byte..range.end_byte];
let line = range.start_point.row;
let col = range.start_point.column;
println!(
"[Line: {}, Col: {}] Offending source code: `{}`",
line, col, text
);
}
}
// Check for string concatenations.
let query = Query::new(
tree_sitter_python::language(),
"(concatenated_string) @capture",
)?;
let mut query_cursor = QueryCursor::new();
let all_matches = query_cursor.matches(&query, parse_tree.root_node(), src.as_bytes());
for each_match in all_matches {
for capture in each_match.captures.iter() {
let range = capture.node.range();
let text = &src[range.start_byte..range.end_byte];
let line = range.start_point.row;
let col = range.start_point.column;
println!(
"[Line: {}, Col: {}] Offending source code: `{}`",
line, col, text
);
}
}
// Walk the tree.
let mut cursor = parse_tree.walk();
let mut action = Action::Down;
loop {
match action {
Action::Up => {
if cursor.goto_next_sibling() {
action = Action::Right;
} else if cursor.goto_parent() {
action = Action::Up;
} else {
break;
}
}
Action::Down => {
let range = cursor.node().range();
let text = &src[range.start_byte..range.end_byte];
let line = range.start_point.row;
let col = range.start_point.column;
println!(
"[Line: {}, Col: {}] {}: `{}`",
line,
col,
cursor.node().kind(),
text
);
if cursor.goto_first_child() {
action = Action::Down;
} else if cursor.goto_next_sibling() {
action = Action::Right;
} else if cursor.goto_parent() {
action = Action::Up;
} else {
break;
}
}
Action::Right => {
let range = cursor.node().range();
let text = &src[range.start_byte..range.end_byte];
let line = range.start_point.row;
let col = range.start_point.column;
println!(
"[Line: {}, Col: {}] {}: `{}`",
line,
col,
cursor.node().kind(),
text
);
if cursor.goto_first_child() {
action = Action::Down;
} else if cursor.goto_next_sibling() {
action = Action::Right;
} else if cursor.goto_parent() {
action = Action::Up;
} else {
break;
}
}
}
}
}
Ok(())
}

1
tree-sitter-python Submodule

@ -0,0 +1 @@
Subproject commit de221eccf9a221f5b85474a553474a69b4b5784d