convert to structs

Move SARIF rendering to ruff_db

Summary
--

This is another mostly-straightforward JSON-based output format. In the first
commit, I converted to using actual structs. I made a couple of tweaks to the
format:

- The `text` field of `SarifRule::full_description` was implicitly an `Option`
before, but serializing this as `null` is invalid based on the [validator]. I
made this an `Option<MessageString>` (`MessageString` is a shortened form of
`multiformatMessageString`, which is what the schema calls this type) and skip
serializing it if it's `None`, which validates against the schema.
- `SarifResult::code` was explicitly an `Option<&'a SecondaryCode>`, which was
also invalid according to the schema. I made it a required field and fell back
on the lint name as in some of the other recent formats. This currently only
affects syntax errors in Ruff.

In the second commit I moved the code to `ruff_db` and updated the Ruff-specific
`expect` calls.

Test Plan
--

Existing tests ported to `ruff_db`

[validator]: https://www.jsonschemavalidator.net/s/GlhhhHQ7
This commit is contained in:
Brent Westbrook 2025-07-15 21:59:59 -04:00
parent e73a8ba571
commit 98320690dd
2 changed files with 158 additions and 112 deletions

View File

@ -85,7 +85,7 @@ exit_code: 1
"message": { "message": {
"text": "SyntaxError: Cannot use `match` statement on Python 3.9 (syntax was added in Python 3.10)" "text": "SyntaxError: Cannot use `match` statement on Python 3.9 (syntax was added in Python 3.10)"
}, },
"ruleId": null "ruleId": "invalid-syntax"
} }
], ],
"tool": { "tool": {

View File

@ -2,8 +2,7 @@ use std::collections::HashSet;
use std::io::Write; use std::io::Write;
use anyhow::Result; use anyhow::Result;
use serde::{Serialize, Serializer}; use serde::Serialize;
use serde_json::json;
use ruff_db::diagnostic::{Diagnostic, SecondaryCode}; use ruff_db::diagnostic::{Diagnostic, SecondaryCode};
use ruff_source_file::OneIndexed; use ruff_source_file::OneIndexed;
@ -27,38 +26,43 @@ impl Emitter for SarifEmitter {
.map(SarifResult::from_message) .map(SarifResult::from_message)
.collect::<Result<Vec<_>>>()?; .collect::<Result<Vec<_>>>()?;
let unique_rules: HashSet<_> = results.iter().filter_map(|result| result.code).collect(); let unique_rules: HashSet<_> = diagnostics
.iter()
.filter_map(Diagnostic::secondary_code)
.collect();
let mut rules: Vec<SarifRule> = unique_rules.into_iter().map(SarifRule::from).collect(); let mut rules: Vec<SarifRule> = unique_rules.into_iter().map(SarifRule::from).collect();
rules.sort_by(|a, b| a.code.cmp(b.code)); rules.sort_by(|a, b| a.id.cmp(b.id));
let output = json!({ let output = SarifOutput {
"$schema": "https://json.schemastore.org/sarif-2.1.0.json", schema: "https://json.schemastore.org/sarif-2.1.0.json",
"version": "2.1.0", version: "2.1.0",
"runs": [{ runs: [SarifRun {
"tool": { tool: SarifTool {
"driver": { driver: SarifDriver {
"name": "ruff", name: "ruff",
"informationUri": "https://github.com/astral-sh/ruff", information_uri: "https://github.com/astral-sh/ruff",
"rules": rules, rules,
"version": VERSION.to_string(), version: VERSION,
} },
}, },
"results": results, results,
}], }],
}); };
serde_json::to_writer_pretty(writer, &output)?; serde_json::to_writer_pretty(writer, &output)?;
Ok(()) Ok(())
} }
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifRule<'a> { struct SarifRule<'a> {
name: &'a str, #[serde(skip_serializing_if = "Option::is_none")]
code: &'a SecondaryCode, full_description: Option<MessageString<'a>>,
linter: &'a str, help: MessageString<'a>,
summary: &'a str, help_uri: Option<String>,
explanation: Option<&'a str>, id: &'a SecondaryCode,
url: Option<String>, properties: SarifProperties<'a>,
short_description: MessageString<'a>,
} }
impl<'a> From<&'a SecondaryCode> for SarifRule<'a> { impl<'a> From<&'a SecondaryCode> for SarifRule<'a> {
@ -71,54 +75,28 @@ impl<'a> From<&'a SecondaryCode> for SarifRule<'a> {
.find(|rule| rule.noqa_code().suffix() == suffix) .find(|rule| rule.noqa_code().suffix() == suffix)
.expect("Expected a valid noqa code corresponding to a rule"); .expect("Expected a valid noqa code corresponding to a rule");
Self { Self {
name: rule.into(), id: code,
code, help_uri: rule.url(),
linter: linter.name(), short_description: MessageString::from(rule.message_formats()[0]),
summary: rule.message_formats()[0], full_description: rule.explanation().map(MessageString::from),
explanation: rule.explanation(), help: MessageString::from(rule.message_formats()[0]),
url: rule.url(), properties: SarifProperties {
id: code,
kind: linter.name(),
name: rule.into(),
problem_severity: "error",
},
} }
} }
} }
impl Serialize for SarifRule<'_> { #[derive(Debug, Serialize)]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> #[serde(rename_all = "camelCase")]
where
S: Serializer,
{
json!({
"id": self.code,
"shortDescription": {
"text": self.summary,
},
"fullDescription": {
"text": self.explanation,
},
"help": {
"text": self.summary,
},
"helpUri": self.url,
"properties": {
"id": self.code,
"kind": self.linter,
"name": self.name,
"problem.severity": "error".to_string(),
},
})
.serialize(serializer)
}
}
#[derive(Debug)]
struct SarifResult<'a> { struct SarifResult<'a> {
code: Option<&'a SecondaryCode>, level: &'static str,
level: String, locations: [SarifLocation; 1],
message: String, message: MessageString<'a>,
uri: String, rule_id: &'a str,
start_line: OneIndexed,
start_column: OneIndexed,
end_line: OneIndexed,
end_column: OneIndexed,
} }
impl<'a> SarifResult<'a> { impl<'a> SarifResult<'a> {
@ -128,16 +106,28 @@ impl<'a> SarifResult<'a> {
let end_location = message.expect_ruff_end_location(); let end_location = message.expect_ruff_end_location();
let path = normalize_path(&*message.expect_ruff_filename()); let path = normalize_path(&*message.expect_ruff_filename());
Ok(Self { Ok(Self {
code: message.secondary_code(), rule_id: message
level: "error".to_string(), .secondary_code()
message: message.body().to_string(), .map_or_else(|| message.name(), SecondaryCode::as_str),
uri: url::Url::from_file_path(&path) level: "error",
.map_err(|()| anyhow::anyhow!("Failed to convert path to URL: {}", path.display()))? message: MessageString::from(message.body()),
.to_string(), locations: [SarifLocation {
start_line: start_location.line, physical_location: SarifPhysicalLocation {
start_column: start_location.column, artifact_location: SarifArtifactLocation {
end_line: end_location.line, uri: url::Url::from_file_path(&path)
end_column: end_location.column, .map_err(|()| {
anyhow::anyhow!("Failed to convert path to URL: {}", path.display())
})?
.to_string(),
},
region: SarifRegion {
start_line: start_location.line,
start_column: start_location.column,
end_line: end_location.line,
end_column: end_location.column,
},
},
}],
}) })
} }
@ -148,47 +138,103 @@ impl<'a> SarifResult<'a> {
let end_location = message.expect_ruff_end_location(); let end_location = message.expect_ruff_end_location();
let path = normalize_path(&*message.expect_ruff_filename()); let path = normalize_path(&*message.expect_ruff_filename());
Ok(Self { Ok(Self {
code: message.secondary_code(), rule_id: message
level: "error".to_string(), .secondary_code()
message: message.body().to_string(), .map_or_else(|| message.name(), SecondaryCode::as_str),
uri: path.display().to_string(), level: "error",
start_line: start_location.line, message: MessageString::from(message.body()),
start_column: start_location.column, locations: [SarifLocation {
end_line: end_location.line, physical_location: SarifPhysicalLocation {
end_column: end_location.column, artifact_location: SarifArtifactLocation {
uri: path.display().to_string(),
},
region: SarifRegion {
start_line: start_location.line,
start_column: start_location.column,
end_line: end_location.line,
end_column: end_location.column,
},
},
}],
}) })
} }
} }
impl Serialize for SarifResult<'_> { #[derive(Serialize)]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> struct SarifOutput<'a> {
where #[serde(rename = "$schema")]
S: Serializer, schema: &'static str,
{ runs: [SarifRun<'a>; 1],
json!({ version: &'static str,
"level": self.level, }
"message": {
"text": self.message, #[derive(Serialize)]
}, struct SarifRun<'a> {
"locations": [{ results: Vec<SarifResult<'a>>,
"physicalLocation": { tool: SarifTool<'a>,
"artifactLocation": { }
"uri": self.uri,
}, #[derive(Serialize)]
"region": { struct SarifTool<'a> {
"startLine": self.start_line, driver: SarifDriver<'a>,
"startColumn": self.start_column, }
"endLine": self.end_line,
"endColumn": self.end_column, #[derive(Serialize)]
} struct SarifDriver<'a> {
} #[serde(rename = "informationUri")]
}], information_uri: &'static str,
"ruleId": self.code, name: &'static str,
}) rules: Vec<SarifRule<'a>>,
.serialize(serializer) version: &'static str,
}
#[derive(Debug, Clone, Serialize)]
struct SarifProperties<'a> {
id: &'a SecondaryCode,
kind: &'a str,
name: &'a str,
#[serde(rename = "problem.severity")]
problem_severity: &'static str,
}
#[derive(Debug, Clone, Serialize)]
struct MessageString<'a> {
text: &'a str,
}
impl<'a> From<&'a str> for MessageString<'a> {
fn from(text: &'a str) -> Self {
Self { text }
} }
} }
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifLocation {
physical_location: SarifPhysicalLocation,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifPhysicalLocation {
artifact_location: SarifArtifactLocation,
region: SarifRegion,
}
#[derive(Debug, Serialize)]
struct SarifArtifactLocation {
uri: String,
}
#[derive(Debug, Serialize)]
#[serde(rename_all = "camelCase")]
struct SarifRegion {
end_column: OneIndexed,
end_line: OneIndexed,
start_column: OneIndexed,
start_line: OneIndexed,
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::message::SarifEmitter; use crate::message::SarifEmitter;