mirror of https://github.com/astral-sh/ruff
[red-knot] Add fuzzer to catch panics for invalid syntax (#14678)
## Summary This PR adds a fuzzer harness for red knot that runs the type checker on source code that contains invalid syntax. Additionally, this PR also updates the `init-fuzzer.sh` script to increase the corpus size to: * Include various crates that includes Python source code * Use the 3.13 CPython source code And, remove any non-Python files from the final corpus so that when the fuzzer tries to minify the corpus, it doesn't produce files that only contains documentation content as that's just noise. ## Test Plan Run `./fuzz/init-fuzzer.sh`, say no to the large dataset. Run the fuzzer with `cargo +night fuzz run red_knot_check_invalid_syntax -- -timeout=5`
This commit is contained in:
parent
575deb5d4d
commit
1685d95ed2
|
|
@ -32,6 +32,8 @@ jobs:
|
|||
# Flag that is raised when any code is changed
|
||||
# This is superset of the linter and formatter
|
||||
code: ${{ steps.changed.outputs.code_any_changed }}
|
||||
# Flag that is raised when any code that affects the fuzzer is changed
|
||||
fuzz: ${{ steps.changed.outputs.fuzz_any_changed }}
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
|
|
@ -79,6 +81,11 @@ jobs:
|
|||
- python/**
|
||||
- .github/workflows/ci.yaml
|
||||
|
||||
fuzz:
|
||||
- fuzz/Cargo.toml
|
||||
- fuzz/Cargo.lock
|
||||
- fuzz/fuzz_targets/**
|
||||
|
||||
code:
|
||||
- "**/*"
|
||||
- "!**/*.md"
|
||||
|
|
@ -288,7 +295,7 @@ jobs:
|
|||
name: "cargo fuzz build"
|
||||
runs-on: ubuntu-latest
|
||||
needs: determine_changes
|
||||
if: ${{ github.ref == 'refs/heads/main' }}
|
||||
if: ${{ github.ref == 'refs/heads/main' || needs.determine_changes.outputs.fuzz == 'true' }}
|
||||
timeout-minutes: 10
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
|
|
|||
|
|
@ -17,6 +17,9 @@ libfuzzer = ["libfuzzer-sys/link_libfuzzer"]
|
|||
cargo-fuzz = true
|
||||
|
||||
[dependencies]
|
||||
red_knot_python_semantic = { path = "../crates/red_knot_python_semantic" }
|
||||
red_knot_vendored = { path = "../crates/red_knot_vendored" }
|
||||
ruff_db = { path = "../crates/ruff_db" }
|
||||
ruff_linter = { path = "../crates/ruff_linter" }
|
||||
ruff_python_ast = { path = "../crates/ruff_python_ast" }
|
||||
ruff_python_codegen = { path = "../crates/ruff_python_codegen" }
|
||||
|
|
@ -26,12 +29,18 @@ ruff_python_formatter = { path = "../crates/ruff_python_formatter"}
|
|||
ruff_text_size = { path = "../crates/ruff_text_size" }
|
||||
|
||||
libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer", default-features = false }
|
||||
salsa = { git = "https://github.com/salsa-rs/salsa.git", rev = "254c749b02cde2fd29852a7463a33e800b771758" }
|
||||
similar = { version = "2.5.0" }
|
||||
tracing = { version = "0.1.40" }
|
||||
|
||||
# Prevent this from interfering with workspaces
|
||||
[workspace]
|
||||
members = ["."]
|
||||
|
||||
[[bin]]
|
||||
name = "red_knot_check_invalid_syntax"
|
||||
path = "fuzz_targets/red_knot_check_invalid_syntax.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "ruff_parse_simple"
|
||||
path = "fuzz_targets/ruff_parse_simple.rs"
|
||||
|
|
|
|||
|
|
@ -74,6 +74,15 @@ Each fuzzer harness in [`fuzz_targets`](fuzz_targets) targets a different aspect
|
|||
them in different ways. While there is implementation-specific documentation in the source code
|
||||
itself, each harness is briefly described below.
|
||||
|
||||
### `red_knot_check_invalid_syntax`
|
||||
|
||||
This fuzz harness checks that the type checker (Red Knot) does not panic when checking a source
|
||||
file with invalid syntax. This rejects any corpus entries that is already valid Python code.
|
||||
Currently, this is limited to syntax errors that's produced by Ruff's Python parser which means
|
||||
that it does not cover all possible syntax errors (<https://github.com/astral-sh/ruff/issues/11934>).
|
||||
A possible workaround for now would be to bypass the parser and run the type checker on all inputs
|
||||
regardless of syntax errors.
|
||||
|
||||
### `ruff_parse_simple`
|
||||
|
||||
This fuzz harness does not perform any "smart" testing of Ruff; it merely checks that the parsing
|
||||
|
|
|
|||
|
|
@ -0,0 +1 @@
|
|||
ruff_fix_validity
|
||||
|
|
@ -0,0 +1,143 @@
|
|||
//! Fuzzer harness that runs the type checker to catch for panics for source code containing
|
||||
//! syntax errors.
|
||||
|
||||
#![no_main]
|
||||
|
||||
use std::sync::{Mutex, OnceLock};
|
||||
|
||||
use libfuzzer_sys::{fuzz_target, Corpus};
|
||||
|
||||
use red_knot_python_semantic::types::check_types;
|
||||
use red_knot_python_semantic::{
|
||||
Db as SemanticDb, Program, ProgramSettings, PythonVersion, SearchPathSettings,
|
||||
};
|
||||
use ruff_db::files::{system_path_to_file, File, Files};
|
||||
use ruff_db::system::{DbWithTestSystem, System, SystemPathBuf, TestSystem};
|
||||
use ruff_db::vendored::VendoredFileSystem;
|
||||
use ruff_db::{Db as SourceDb, Upcast};
|
||||
use ruff_python_parser::{parse_unchecked, Mode};
|
||||
|
||||
/// Database that can be used for testing.
|
||||
///
|
||||
/// Uses an in memory filesystem and it stubs out the vendored files by default.
|
||||
#[salsa::db]
|
||||
struct TestDb {
|
||||
storage: salsa::Storage<Self>,
|
||||
files: Files,
|
||||
system: TestSystem,
|
||||
vendored: VendoredFileSystem,
|
||||
events: std::sync::Arc<std::sync::Mutex<Vec<salsa::Event>>>,
|
||||
}
|
||||
|
||||
impl TestDb {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
storage: salsa::Storage::default(),
|
||||
system: TestSystem::default(),
|
||||
vendored: red_knot_vendored::file_system().clone(),
|
||||
events: std::sync::Arc::default(),
|
||||
files: Files::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[salsa::db]
|
||||
impl SourceDb for TestDb {
|
||||
fn vendored(&self) -> &VendoredFileSystem {
|
||||
&self.vendored
|
||||
}
|
||||
|
||||
fn system(&self) -> &dyn System {
|
||||
&self.system
|
||||
}
|
||||
|
||||
fn files(&self) -> &Files {
|
||||
&self.files
|
||||
}
|
||||
}
|
||||
|
||||
impl DbWithTestSystem for TestDb {
|
||||
fn test_system(&self) -> &TestSystem {
|
||||
&self.system
|
||||
}
|
||||
|
||||
fn test_system_mut(&mut self) -> &mut TestSystem {
|
||||
&mut self.system
|
||||
}
|
||||
}
|
||||
|
||||
impl Upcast<dyn SourceDb> for TestDb {
|
||||
fn upcast(&self) -> &(dyn SourceDb + 'static) {
|
||||
self
|
||||
}
|
||||
fn upcast_mut(&mut self) -> &mut (dyn SourceDb + 'static) {
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[salsa::db]
|
||||
impl SemanticDb for TestDb {
|
||||
fn is_file_open(&self, file: File) -> bool {
|
||||
!file.path(self).is_vendored_path()
|
||||
}
|
||||
}
|
||||
|
||||
#[salsa::db]
|
||||
impl salsa::Database for TestDb {
|
||||
fn salsa_event(&self, event: &dyn Fn() -> salsa::Event) {
|
||||
let event = event();
|
||||
tracing::trace!("event: {:?}", event);
|
||||
let mut events = self.events.lock().unwrap();
|
||||
events.push(event);
|
||||
}
|
||||
}
|
||||
|
||||
fn setup_db() -> TestDb {
|
||||
let db = TestDb::new();
|
||||
|
||||
let src_root = SystemPathBuf::from("/src");
|
||||
db.memory_file_system()
|
||||
.create_directory_all(&src_root)
|
||||
.unwrap();
|
||||
|
||||
Program::from_settings(
|
||||
&db,
|
||||
&ProgramSettings {
|
||||
target_version: PythonVersion::default(),
|
||||
search_paths: SearchPathSettings::new(src_root),
|
||||
},
|
||||
)
|
||||
.expect("Valid search path settings");
|
||||
|
||||
db
|
||||
}
|
||||
|
||||
static TEST_DB: OnceLock<Mutex<TestDb>> = OnceLock::new();
|
||||
|
||||
fn do_fuzz(case: &[u8]) -> Corpus {
|
||||
let Ok(code) = std::str::from_utf8(case) else {
|
||||
return Corpus::Reject;
|
||||
};
|
||||
|
||||
let parsed = parse_unchecked(code, Mode::Module);
|
||||
if parsed.is_valid() {
|
||||
return Corpus::Reject;
|
||||
}
|
||||
|
||||
let mut db = TEST_DB
|
||||
.get_or_init(|| Mutex::new(setup_db()))
|
||||
.lock()
|
||||
.unwrap();
|
||||
|
||||
for path in &["/src/a.py", "/src/a.pyi"] {
|
||||
db.write_file(path, code).unwrap();
|
||||
let file = system_path_to_file(&*db, path).unwrap();
|
||||
check_types(&*db, file);
|
||||
db.memory_file_system().remove_file(path).unwrap();
|
||||
file.sync(&mut *db);
|
||||
}
|
||||
|
||||
Corpus::Keep
|
||||
}
|
||||
|
||||
fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) });
|
||||
|
|
@ -11,16 +11,32 @@ fi
|
|||
|
||||
if [ ! -d corpus/ruff_fix_validity ]; then
|
||||
mkdir -p corpus/ruff_fix_validity
|
||||
read -p "Would you like to build a corpus from a python source code dataset? (this will take a long time!) [Y/n] " -n 1 -r
|
||||
echo
|
||||
cd corpus/ruff_fix_validity
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
curl -L 'https://zenodo.org/record/3628784/files/python-corpus.tar.gz?download=1' | tar xz
|
||||
|
||||
(
|
||||
cd corpus/ruff_fix_validity
|
||||
|
||||
read -p "Would you like to build a corpus from a python source code dataset? (this will take a long time!) [Y/n] " -n 1 -r
|
||||
echo
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
curl -L 'https://zenodo.org/record/3628784/files/python-corpus.tar.gz?download=1' | tar xz
|
||||
fi
|
||||
|
||||
# Build a smaller corpus in addition to the (optional) larger corpus
|
||||
curl -L 'https://github.com/python/cpython/archive/refs/tags/v3.13.0.tar.gz' | tar xz
|
||||
cp -r "../../../crates/red_knot_workspace/resources/test/corpus" "red_knot_workspace"
|
||||
cp -r "../../../crates/ruff_linter/resources/test/fixtures" "ruff_linter"
|
||||
cp -r "../../../crates/ruff_python_formatter/resources/test/fixtures" "ruff_python_formatter"
|
||||
cp -r "../../../crates/ruff_python_parser/resources" "ruff_python_parser"
|
||||
|
||||
# Delete all non-Python files
|
||||
find . -type f -not -name "*.py" -delete
|
||||
)
|
||||
|
||||
if [[ "$OSTYPE" == "darwin"* ]]; then
|
||||
cargo +nightly fuzz cmin ruff_fix_validity -- -timeout=5
|
||||
else
|
||||
cargo fuzz cmin -s none ruff_fix_validity -- -timeout=5
|
||||
fi
|
||||
curl -L 'https://github.com/python/cpython/archive/refs/tags/v3.12.0b2.tar.gz' | tar xz
|
||||
cp -r "../../../crates/ruff_linter/resources/test" .
|
||||
cd -
|
||||
cargo fuzz cmin -s none ruff_fix_validity -- -timeout=5
|
||||
fi
|
||||
|
||||
echo "Done! You are ready to fuzz."
|
||||
|
|
|
|||
Loading…
Reference in New Issue