Move GitLab output rendering to `ruff_db` (#20117)

## Summary

This PR is a first step toward adding a GitLab output format to ty. It
converts the `GitlabEmitter` from `ruff_linter` to a `GitlabRenderer` in
`ruff_db` and updates its implementation to handle non-Ruff files and
diagnostics without primary spans. I tried to break up the changes here
so that they're easy to review commit-by-commit, or at least in groups
of commits:
- [preparatory changes in-place in `ruff_linter` and a `ruff_db`
skeleton](0761b73a61)
- [moving the code over with no implementation changes mixed
in](0761b73a61..8f909ea0bb)
- [tidying up the code now in
`ruff_db`](9f047c4f9f..e5e217fcd6)

This wasn't strictly necessary, but I also added some `Serialize`
structs instead of calling `json!` to make it a little clearer that we
weren't modifying the schema (e4c4bee35d).

I plan to follow this up with a separate PR exposing this output format
in the ty CLI, which should be quite straightforward.

## Test Plan

Existing tests, especially the two that show up in the diff as renamed
nearly without changes
This commit is contained in:
Brent Westbrook 2025-08-28 08:56:33 -04:00 committed by GitHub
parent d9aaacd01f
commit 1ce65714c0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
13 changed files with 277 additions and 244 deletions

2
Cargo.lock generated
View File

@ -2870,6 +2870,7 @@ dependencies = [
"insta",
"matchit",
"path-slash",
"pathdiff",
"quick-junit",
"ruff_annotate_snippets",
"ruff_cache",
@ -3020,7 +3021,6 @@ dependencies = [
"memchr",
"natord",
"path-absolutize",
"pathdiff",
"pep440_rs",
"pyproject-toml",
"regex",

View File

@ -15,8 +15,7 @@ use ruff_db::diagnostic::{
use ruff_linter::fs::relativize_path;
use ruff_linter::logging::LogLevel;
use ruff_linter::message::{
Emitter, EmitterContext, GithubEmitter, GitlabEmitter, GroupedEmitter, SarifEmitter,
TextEmitter,
Emitter, EmitterContext, GithubEmitter, GroupedEmitter, SarifEmitter, TextEmitter,
};
use ruff_linter::notify_user;
use ruff_linter::settings::flags::{self};
@ -296,7 +295,11 @@ impl Printer {
GithubEmitter.emit(writer, &diagnostics.inner, &context)?;
}
OutputFormat::Gitlab => {
GitlabEmitter::default().emit(writer, &diagnostics.inner, &context)?;
let config = DisplayDiagnosticConfig::default()
.format(DiagnosticFormat::Gitlab)
.preview(preview);
let value = DisplayDiagnostics::new(&context, &config, &diagnostics.inner);
write!(writer, "{value}")?;
}
OutputFormat::Pylint => {
let config = DisplayDiagnosticConfig::default()

View File

@ -20,59 +20,59 @@ exit_code: 1
{
"check_name": "F401",
"description": "F401: `os` imported but unused",
"severity": "major",
"fingerprint": "4dbad37161e65c72",
"location": {
"path": "input.py",
"positions": {
"begin": {
"column": 8,
"line": 1
"line": 1,
"column": 8
},
"end": {
"column": 10,
"line": 1
"line": 1,
"column": 10
}
}
},
"severity": "major"
}
},
{
"check_name": "F821",
"description": "F821: Undefined name `y`",
"severity": "major",
"fingerprint": "7af59862a085230",
"location": {
"path": "input.py",
"positions": {
"begin": {
"column": 5,
"line": 2
"line": 2,
"column": 5
},
"end": {
"column": 6,
"line": 2
"line": 2,
"column": 6
}
}
},
"severity": "major"
}
},
{
"check_name": "invalid-syntax",
"description": "invalid-syntax: Cannot use `match` statement on Python 3.9 (syntax was added in Python 3.10)",
"severity": "major",
"fingerprint": "e558cec859bb66e8",
"location": {
"path": "input.py",
"positions": {
"begin": {
"column": 1,
"line": 3
"line": 3,
"column": 1
},
"end": {
"column": 6,
"line": 3
"line": 3,
"column": 6
}
}
},
"severity": "major"
}
}
]
----- stderr -----

View File

@ -34,6 +34,7 @@ glob = { workspace = true }
ignore = { workspace = true, optional = true }
matchit = { workspace = true }
path-slash = { workspace = true }
pathdiff = { workspace = true }
quick-junit = { workspace = true, optional = true }
rustc-hash = { workspace = true }
salsa = { workspace = true }
@ -53,7 +54,7 @@ web-time = { version = "1.1.0" }
etcetera = { workspace = true, optional = true }
[dev-dependencies]
insta = { workspace = true }
insta = { workspace = true, features = ["filters"] }
tempfile = { workspace = true }
[features]

View File

@ -1435,6 +1435,11 @@ pub enum DiagnosticFormat {
/// Print diagnostics in the format expected by JUnit.
#[cfg(feature = "junit")]
Junit,
/// Print diagnostics in the JSON format used by GitLab [Code Quality] reports.
///
/// [Code Quality]: https://docs.gitlab.com/ee/ci/testing/code_quality.html#implement-a-custom-tool
#[cfg(feature = "serde")]
Gitlab,
}
/// A representation of the kinds of messages inside a diagnostic.

View File

@ -31,6 +31,8 @@ mod azure;
mod concise;
mod full;
#[cfg(feature = "serde")]
mod gitlab;
#[cfg(feature = "serde")]
mod json;
#[cfg(feature = "serde")]
mod json_lines;
@ -136,6 +138,10 @@ impl std::fmt::Display for DisplayDiagnostics<'_> {
DiagnosticFormat::Junit => {
junit::JunitRenderer::new(self.resolver).render(f, self.diagnostics)?;
}
#[cfg(feature = "serde")]
DiagnosticFormat::Gitlab => {
gitlab::GitlabRenderer::new(self.resolver).render(f, self.diagnostics)?;
}
}
Ok(())

View File

@ -0,0 +1,205 @@
use std::{
collections::HashSet,
hash::{DefaultHasher, Hash, Hasher},
path::Path,
};
use ruff_source_file::LineColumn;
use serde::{Serialize, Serializer, ser::SerializeSeq};
use crate::diagnostic::{Diagnostic, Severity};
use super::FileResolver;
pub(super) struct GitlabRenderer<'a> {
resolver: &'a dyn FileResolver,
}
impl<'a> GitlabRenderer<'a> {
pub(super) fn new(resolver: &'a dyn FileResolver) -> Self {
Self { resolver }
}
}
impl GitlabRenderer<'_> {
pub(super) fn render(
&self,
f: &mut std::fmt::Formatter,
diagnostics: &[Diagnostic],
) -> std::fmt::Result {
write!(
f,
"{}",
serde_json::to_string_pretty(&SerializedMessages {
diagnostics,
resolver: self.resolver,
#[expect(
clippy::disallowed_methods,
reason = "We don't have access to a `System` here, \
and this is only intended for use by GitLab CI, \
which runs on a real `System`."
)]
project_dir: std::env::var("CI_PROJECT_DIR").ok().as_deref(),
})
.unwrap()
)
}
}
struct SerializedMessages<'a> {
diagnostics: &'a [Diagnostic],
resolver: &'a dyn FileResolver,
project_dir: Option<&'a str>,
}
impl Serialize for SerializedMessages<'_> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut s = serializer.serialize_seq(Some(self.diagnostics.len()))?;
let mut fingerprints = HashSet::<u64>::with_capacity(self.diagnostics.len());
for diagnostic in self.diagnostics {
let location = diagnostic
.primary_span()
.map(|span| {
let file = span.file();
let positions = if self.resolver.is_notebook(file) {
// We can't give a reasonable location for the structured formats,
// so we show one that's clearly a fallback
Default::default()
} else {
let diagnostic_source = file.diagnostic_source(self.resolver);
let source_code = diagnostic_source.as_source_code();
span.range()
.map(|range| Positions {
begin: source_code.line_column(range.start()),
end: source_code.line_column(range.end()),
})
.unwrap_or_default()
};
let path = self.project_dir.as_ref().map_or_else(
|| file.relative_path(self.resolver).display().to_string(),
|project_dir| relativize_path_to(file.path(self.resolver), project_dir),
);
Location { path, positions }
})
.unwrap_or_default();
let mut message_fingerprint = fingerprint(diagnostic, &location.path, 0);
// Make sure that we do not get a fingerprint that is already in use
// by adding in the previously generated one.
while fingerprints.contains(&message_fingerprint) {
message_fingerprint = fingerprint(diagnostic, &location.path, message_fingerprint);
}
fingerprints.insert(message_fingerprint);
let description = diagnostic.body();
let check_name = diagnostic.secondary_code_or_id();
let severity = match diagnostic.severity() {
Severity::Info => "info",
Severity::Warning => "minor",
Severity::Error => "major",
// Another option here is `blocker`
Severity::Fatal => "critical",
};
let value = Message {
check_name,
// GitLab doesn't display the separate `check_name` field in a Code Quality report,
// so prepend it to the description too.
description: format!("{check_name}: {description}"),
severity,
fingerprint: format!("{:x}", message_fingerprint),
location,
};
s.serialize_element(&value)?;
}
s.end()
}
}
#[derive(Serialize)]
struct Message<'a> {
check_name: &'a str,
description: String,
severity: &'static str,
fingerprint: String,
location: Location,
}
/// The place in the source code where the issue was discovered.
///
/// According to the CodeClimate report format [specification] linked from the GitLab [docs], this
/// field is required, so we fall back on a default `path` and position if the diagnostic doesn't
/// have a primary span.
///
/// [specification]: https://github.com/codeclimate/platform/blob/master/spec/analyzers/SPEC.md#data-types
/// [docs]: https://docs.gitlab.com/ci/testing/code_quality/#code-quality-report-format
#[derive(Default, Serialize)]
struct Location {
path: String,
positions: Positions,
}
#[derive(Default, Serialize)]
struct Positions {
begin: LineColumn,
end: LineColumn,
}
/// Generate a unique fingerprint to identify a violation.
fn fingerprint(diagnostic: &Diagnostic, project_path: &str, salt: u64) -> u64 {
let mut hasher = DefaultHasher::new();
salt.hash(&mut hasher);
diagnostic.name().hash(&mut hasher);
project_path.hash(&mut hasher);
hasher.finish()
}
/// Convert an absolute path to be relative to the specified project root.
fn relativize_path_to<P: AsRef<Path>, R: AsRef<Path>>(path: P, project_root: R) -> String {
format!(
"{}",
pathdiff::diff_paths(&path, project_root)
.expect("Could not diff paths")
.display()
)
}
#[cfg(test)]
mod tests {
use crate::diagnostic::{
DiagnosticFormat,
render::tests::{create_diagnostics, create_syntax_error_diagnostics},
};
const FINGERPRINT_FILTERS: [(&str, &str); 1] = [(
r#""fingerprint": "[a-z0-9]+","#,
r#""fingerprint": "<redacted>","#,
)];
#[test]
fn output() {
let (env, diagnostics) = create_diagnostics(DiagnosticFormat::Gitlab);
insta::with_settings!({filters => FINGERPRINT_FILTERS}, {
insta::assert_snapshot!(env.render_diagnostics(&diagnostics));
});
}
#[test]
fn syntax_errors() {
let (env, diagnostics) = create_syntax_error_diagnostics(DiagnosticFormat::Gitlab);
insta::with_settings!({filters => FINGERPRINT_FILTERS}, {
insta::assert_snapshot!(env.render_diagnostics(&diagnostics));
});
}
}

View File

@ -1,63 +1,63 @@
---
source: crates/ruff_linter/src/message/gitlab.rs
expression: redact_fingerprint(&content)
source: crates/ruff_db/src/diagnostic/render/gitlab.rs
expression: env.render_diagnostics(&diagnostics)
---
[
{
"check_name": "F401",
"description": "F401: `os` imported but unused",
"severity": "major",
"fingerprint": "<redacted>",
"location": {
"path": "fib.py",
"positions": {
"begin": {
"column": 8,
"line": 1
"line": 1,
"column": 8
},
"end": {
"column": 10,
"line": 1
"line": 1,
"column": 10
}
}
},
"severity": "major"
}
},
{
"check_name": "F841",
"description": "F841: Local variable `x` is assigned to but never used",
"severity": "major",
"fingerprint": "<redacted>",
"location": {
"path": "fib.py",
"positions": {
"begin": {
"column": 5,
"line": 6
"line": 6,
"column": 5
},
"end": {
"column": 6,
"line": 6
"line": 6,
"column": 6
}
}
},
"severity": "major"
}
},
{
"check_name": "F821",
"description": "F821: Undefined name `a`",
"severity": "major",
"fingerprint": "<redacted>",
"location": {
"path": "undef.py",
"positions": {
"begin": {
"column": 4,
"line": 1
"line": 1,
"column": 4
},
"end": {
"column": 5,
"line": 1
"line": 1,
"column": 5
}
}
},
"severity": "major"
}
}
]

View File

@ -1,44 +1,44 @@
---
source: crates/ruff_linter/src/message/gitlab.rs
expression: redact_fingerprint(&content)
source: crates/ruff_db/src/diagnostic/render/gitlab.rs
expression: env.render_diagnostics(&diagnostics)
---
[
{
"check_name": "invalid-syntax",
"description": "invalid-syntax: Expected one or more symbol names after import",
"severity": "major",
"fingerprint": "<redacted>",
"location": {
"path": "syntax_errors.py",
"positions": {
"begin": {
"column": 15,
"line": 1
"line": 1,
"column": 15
},
"end": {
"column": 1,
"line": 2
"line": 2,
"column": 1
}
}
},
"severity": "major"
}
},
{
"check_name": "invalid-syntax",
"description": "invalid-syntax: Expected ')', found newline",
"severity": "major",
"fingerprint": "<redacted>",
"location": {
"path": "syntax_errors.py",
"positions": {
"begin": {
"column": 12,
"line": 3
"line": 3,
"column": 12
},
"end": {
"column": 1,
"line": 4
"line": 4,
"column": 1
}
}
},
"severity": "major"
}
}
]

View File

@ -51,7 +51,6 @@ path-absolutize = { workspace = true, features = [
"once_cell_cache",
"use_unix_paths_on_wasm",
] }
pathdiff = { workspace = true }
pep440_rs = { workspace = true }
pyproject-toml = { workspace = true }
regex = { workspace = true }

View File

@ -58,13 +58,3 @@ pub fn relativize_path<P: AsRef<Path>>(path: P) -> String {
}
format!("{}", path.display())
}
/// Convert an absolute path to be relative to the specified project root.
pub fn relativize_path_to<P: AsRef<Path>, R: AsRef<Path>>(path: P, project_root: R) -> String {
format!(
"{}",
pathdiff::diff_paths(&path, project_root)
.expect("Could not diff paths")
.display()
)
}

View File

@ -1,174 +0,0 @@
use std::collections::HashSet;
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
use std::io::Write;
use serde::ser::SerializeSeq;
use serde::{Serialize, Serializer};
use serde_json::json;
use ruff_db::diagnostic::Diagnostic;
use crate::fs::{relativize_path, relativize_path_to};
use crate::message::{Emitter, EmitterContext};
/// Generate JSON with violations in GitLab CI format
// https://docs.gitlab.com/ee/ci/testing/code_quality.html#implement-a-custom-tool
pub struct GitlabEmitter {
project_dir: Option<String>,
}
impl Default for GitlabEmitter {
fn default() -> Self {
Self {
project_dir: std::env::var("CI_PROJECT_DIR").ok(),
}
}
}
impl Emitter for GitlabEmitter {
fn emit(
&mut self,
writer: &mut dyn Write,
diagnostics: &[Diagnostic],
context: &EmitterContext,
) -> anyhow::Result<()> {
serde_json::to_writer_pretty(
writer,
&SerializedMessages {
diagnostics,
context,
project_dir: self.project_dir.as_deref(),
},
)?;
Ok(())
}
}
struct SerializedMessages<'a> {
diagnostics: &'a [Diagnostic],
context: &'a EmitterContext<'a>,
project_dir: Option<&'a str>,
}
impl Serialize for SerializedMessages<'_> {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
let mut s = serializer.serialize_seq(Some(self.diagnostics.len()))?;
let mut fingerprints = HashSet::<u64>::with_capacity(self.diagnostics.len());
for diagnostic in self.diagnostics {
let filename = diagnostic.expect_ruff_filename();
let (start_location, end_location) = if self.context.is_notebook(&filename) {
// We can't give a reasonable location for the structured formats,
// so we show one that's clearly a fallback
Default::default()
} else {
(
diagnostic.expect_ruff_start_location(),
diagnostic.expect_ruff_end_location(),
)
};
let path = self.project_dir.as_ref().map_or_else(
|| relativize_path(&filename),
|project_dir| relativize_path_to(&filename, project_dir),
);
let mut message_fingerprint = fingerprint(diagnostic, &path, 0);
// Make sure that we do not get a fingerprint that is already in use
// by adding in the previously generated one.
while fingerprints.contains(&message_fingerprint) {
message_fingerprint = fingerprint(diagnostic, &path, message_fingerprint);
}
fingerprints.insert(message_fingerprint);
let description = diagnostic.body();
let check_name = diagnostic.secondary_code_or_id();
let value = json!({
"check_name": check_name,
// GitLab doesn't display the separate `check_name` field in a Code Quality report,
// so prepend it to the description too.
"description": format!("{check_name}: {description}"),
"severity": "major",
"fingerprint": format!("{:x}", message_fingerprint),
"location": {
"path": path,
"positions": {
"begin": start_location,
"end": end_location,
},
},
});
s.serialize_element(&value)?;
}
s.end()
}
}
/// Generate a unique fingerprint to identify a violation.
fn fingerprint(message: &Diagnostic, project_path: &str, salt: u64) -> u64 {
let mut hasher = DefaultHasher::new();
salt.hash(&mut hasher);
message.name().hash(&mut hasher);
project_path.hash(&mut hasher);
hasher.finish()
}
#[cfg(test)]
mod tests {
use insta::assert_snapshot;
use crate::message::GitlabEmitter;
use crate::message::tests::{
capture_emitter_output, create_diagnostics, create_syntax_error_diagnostics,
};
#[test]
fn output() {
let mut emitter = GitlabEmitter::default();
let content = capture_emitter_output(&mut emitter, &create_diagnostics());
assert_snapshot!(redact_fingerprint(&content));
}
#[test]
fn syntax_errors() {
let mut emitter = GitlabEmitter::default();
let content = capture_emitter_output(&mut emitter, &create_syntax_error_diagnostics());
assert_snapshot!(redact_fingerprint(&content));
}
// Redact the fingerprint because the default hasher isn't stable across platforms.
fn redact_fingerprint(content: &str) -> String {
static FINGERPRINT_HAY_KEY: &str = r#""fingerprint": ""#;
let mut output = String::with_capacity(content.len());
let mut last = 0;
for (start, _) in content.match_indices(FINGERPRINT_HAY_KEY) {
let fingerprint_hash_start = start + FINGERPRINT_HAY_KEY.len();
output.push_str(&content[last..fingerprint_hash_start]);
output.push_str("<redacted>");
last = fingerprint_hash_start
+ content[fingerprint_hash_start..]
.find('"')
.expect("Expected terminating quote");
}
output.push_str(&content[last..]);
output
}
}

View File

@ -10,7 +10,6 @@ use ruff_db::diagnostic::{
use ruff_db::files::File;
pub use github::GithubEmitter;
pub use gitlab::GitlabEmitter;
pub use grouped::GroupedEmitter;
use ruff_notebook::NotebookIndex;
use ruff_source_file::SourceFile;
@ -22,7 +21,6 @@ use crate::Fix;
use crate::registry::Rule;
mod github;
mod gitlab;
mod grouped;
mod sarif;
mod text;