From 92f471a666fb82529b4553956c0a8bec3050cad8 Mon Sep 17 00:00:00 2001
From: konsti <konstin@mailbox.org>
Date: Thu, 20 Jul 2023 11:30:14 +0200
Subject: [PATCH] Handle io errors gracefully (#5611)

## Summary

It can happen that we can't read a file (a python file, a jupyter
notebook or pyproject.toml), which needs to be handled and handled
consistently for all file types. Instead of using `Err` or `error!`, we
emit E602 with the io error as message and continue. This PR makes sure
we handle all three cases consistently, emit E602.

I'm not convinced that it should be possible to disable io errors, but
we now handle the regular case consistently and at least print warning
consistently.

I went with `warn!` but i can change them all to `error!`, too.

It also checks the error case when a pyproject.toml is not readable. The
error message is not very helpful, but it's now a bit clearer that
actually ruff itself failed instead vs this being a diagnostic.

## Examples

This is how an Err of `run` looks now:


![image](https://github.com/astral-sh/ruff/assets/6826232/890f7ab2-2309-4b6f-a4b3-67161947cc83)

With an unreadable file and `IOError` disabled:


![image](https://github.com/astral-sh/ruff/assets/6826232/fd3d6959-fa23-4ddf-b2e5-8d6022df54b1)

(we lint zero files but count files before linting not during so we exit
0)

I'm not sure if it should (or if we should take a different path with
manual ExitStatus), but this currently also triggers when `files` is
empty:


![image](https://github.com/astral-sh/ruff/assets/6826232/f7ede301-41b5-4743-97fd-49149f750337)

## Test Plan

Unix only: Create a temporary directory with files with permissions
`000` (not readable by the owner) and run on that directory. Since this
breaks the assumptions of most of the test code (single file, `ruff`
instead of `ruff_cli`), the test code is rather cumbersome and looks a
bit misplaced; i'm happy about suggestions to fit it in closer with the
other tests or streamline it in other ways. I added another test for
when the entire directory is not readable.
---
 Cargo.lock                                    |   4 +
 Cargo.toml                                    |   2 +-
 crates/ruff/Cargo.toml                        |   1 +
 crates/ruff/src/pyproject_toml.rs             |  35 +++---
 crates/ruff/src/rules/ruff/mod.rs             |   2 +-
 crates/ruff/src/settings/mod.rs               |   2 -
 crates/ruff_cli/Cargo.toml                    |   2 +
 crates/ruff_cli/src/bin/ruff.rs               |  10 +-
 crates/ruff_cli/src/commands/run.rs           | 102 ++++++++++++++++--
 ...li__commands__run__test__E902_E902.py.snap |   7 ++
 crates/ruff_cli/src/diagnostics.rs            |  51 +++++++--
 crates/ruff_cli/tests/integration_test.rs     |  67 ++++++++++++
 crates/ruff_shrinking/src/main.rs             |   2 +-
 13 files changed, 250 insertions(+), 37 deletions(-)
 create mode 100644 crates/ruff_cli/src/commands/snapshots/ruff_cli__commands__run__test__E902_E902.py.snap

diff --git a/Cargo.lock b/Cargo.lock
index f7c0f7b355..33cf811709 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1005,6 +1005,7 @@ dependencies = [
  "globset",
  "lazy_static",
  "linked-hash-map",
+ "regex",
  "similar",
  "walkdir",
  "yaml-rust",
@@ -1945,6 +1946,7 @@ dependencies = [
  "smallvec",
  "strum",
  "strum_macros",
+ "tempfile",
  "test-case",
  "thiserror",
  "toml",
@@ -2003,6 +2005,7 @@ dependencies = [
  "filetime",
  "glob",
  "ignore",
+ "insta",
  "itertools",
  "itoa",
  "log",
@@ -2025,6 +2028,7 @@ dependencies = [
  "shellexpand",
  "similar",
  "strum",
+ "tempfile",
  "tikv-jemallocator",
  "ureq",
  "walkdir",
diff --git a/Cargo.toml b/Cargo.toml
index 63069a6a13..d06200abf3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -21,7 +21,7 @@ filetime = { version = "0.2.20" }
 glob = { version = "0.3.1" }
 globset = { version = "0.4.10" }
 ignore = { version = "0.4.20" }
-insta = { version = "1.31.0" }
+insta = { version = "1.31.0", feature = ["filters", "glob"] }
 is-macro = { version = "0.2.2" }
 itertools = { version = "0.10.5" }
 log = { version = "0.4.17" }
diff --git a/crates/ruff/Cargo.toml b/crates/ruff/Cargo.toml
index c99cce2846..094f67cdc8 100644
--- a/crates/ruff/Cargo.toml
+++ b/crates/ruff/Cargo.toml
@@ -86,6 +86,7 @@ pretty_assertions = "1.3.0"
 test-case = { workspace = true }
 # Disable colored output in tests
 colored = { workspace = true, features = ["no-color"] }
+tempfile = "3.6.0"
 
 [features]
 default = []
diff --git a/crates/ruff/src/pyproject_toml.rs b/crates/ruff/src/pyproject_toml.rs
index a0cfeb961b..d320f091a7 100644
--- a/crates/ruff/src/pyproject_toml.rs
+++ b/crates/ruff/src/pyproject_toml.rs
@@ -1,4 +1,5 @@
-use anyhow::Result;
+use colored::Colorize;
+use log::warn;
 use pyproject_toml::{BuildSystem, Project};
 use ruff_text_size::{TextRange, TextSize};
 use serde::{Deserialize, Serialize};
@@ -22,34 +23,38 @@ struct PyProjectToml {
     project: Option<Project>,
 }
 
-pub fn lint_pyproject_toml(source_file: SourceFile, settings: &Settings) -> Result<Vec<Message>> {
-    let mut messages = vec![];
-
-    let err = match toml::from_str::<PyProjectToml>(source_file.source_text()) {
-        Ok(_) => return Ok(messages),
-        Err(err) => err,
+pub fn lint_pyproject_toml(source_file: SourceFile, settings: &Settings) -> Vec<Message> {
+    let Some(err) = toml::from_str::<PyProjectToml>(source_file.source_text()).err() else {
+        return Vec::default();
     };
 
+    let mut messages = Vec::new();
     let range = match err.span() {
         // This is bad but sometimes toml and/or serde just don't give us spans
         // TODO(konstin,micha): https://github.com/astral-sh/ruff/issues/4571
         None => TextRange::default(),
         Some(range) => {
             let Ok(end) = TextSize::try_from(range.end) else {
+                let message = format!(
+                    "{} is larger than 4GB, but ruff assumes all files to be smaller",
+                    source_file.name(),
+                );
                 if settings.rules.enabled(Rule::IOError) {
-                    let diagnostic = Diagnostic::new(
-                        IOError {
-                            message: "pyproject.toml is larger than 4GB".to_string(),
-                        },
-                        TextRange::default(),
-                    );
+                    let diagnostic = Diagnostic::new(IOError { message }, TextRange::default());
                     messages.push(Message::from_diagnostic(
                         diagnostic,
                         source_file,
                         TextSize::default(),
                     ));
+                } else {
+                    warn!(
+                        "{}{}{} {message}",
+                        "Failed to lint ".bold(),
+                        source_file.name().bold(),
+                        ":".bold()
+                    );
                 }
-                return Ok(messages);
+                return messages;
             };
             TextRange::new(
                 // start <= end, so if end < 4GB follows start < 4GB
@@ -69,5 +74,5 @@ pub fn lint_pyproject_toml(source_file: SourceFile, settings: &Settings) -> Resu
         ));
     }
 
-    Ok(messages)
+    messages
 }
diff --git a/crates/ruff/src/rules/ruff/mod.rs b/crates/ruff/src/rules/ruff/mod.rs
index 79062bc839..18c15fda9e 100644
--- a/crates/ruff/src/rules/ruff/mod.rs
+++ b/crates/ruff/src/rules/ruff/mod.rs
@@ -212,7 +212,7 @@ mod tests {
         let messages = lint_pyproject_toml(
             source_file,
             &settings::Settings::for_rule(Rule::InvalidPyprojectToml),
-        )?;
+        );
         assert_messages!(snapshot, messages);
         Ok(())
     }
diff --git a/crates/ruff/src/settings/mod.rs b/crates/ruff/src/settings/mod.rs
index 1e2c08906d..a7933b2540 100644
--- a/crates/ruff/src/settings/mod.rs
+++ b/crates/ruff/src/settings/mod.rs
@@ -293,7 +293,6 @@ impl Settings {
         })
     }
 
-    #[cfg(test)]
     pub fn for_rule(rule_code: Rule) -> Self {
         Self {
             rules: RuleTable::from_iter([rule_code]),
@@ -301,7 +300,6 @@ impl Settings {
         }
     }
 
-    #[cfg(test)]
     pub fn for_rules(rules: impl IntoIterator<Item = Rule>) -> Self {
         Self {
             rules: RuleTable::from_iter(rules),
diff --git a/crates/ruff_cli/Cargo.toml b/crates/ruff_cli/Cargo.toml
index ca58bdcb42..122dd50d14 100644
--- a/crates/ruff_cli/Cargo.toml
+++ b/crates/ruff_cli/Cargo.toml
@@ -64,6 +64,8 @@ wild = { version = "2" }
 
 [dev-dependencies]
 assert_cmd = { version = "2.0.8" }
+insta = { workspace = true, features = ["filters"] }
+tempfile = "3.6.0"
 ureq = { version = "2.6.2", features = [] }
 
 [target.'cfg(target_os = "windows")'.dependencies]
diff --git a/crates/ruff_cli/src/bin/ruff.rs b/crates/ruff_cli/src/bin/ruff.rs
index 0c4ea2ef4d..97343ef798 100644
--- a/crates/ruff_cli/src/bin/ruff.rs
+++ b/crates/ruff_cli/src/bin/ruff.rs
@@ -50,7 +50,15 @@ pub fn main() -> ExitCode {
         Err(err) => {
             #[allow(clippy::print_stderr)]
             {
-                eprintln!("{}{} {err:?}", "error".red().bold(), ":".bold());
+                // This communicates that this isn't a linter error but ruff itself hard-errored for
+                // some reason (e.g. failed to resolve the configuration)
+                eprintln!("{}", "ruff failed".red().bold());
+                // Currently we generally only see one error, but e.g. with io errors when resolving
+                // the configuration it is help to chain errors ("resolving configuration failed" ->
+                // "failed to read file: subdir/pyproject.toml")
+                for cause in err.chain() {
+                    eprintln!("  {} {cause}", "Cause:".bold());
+                }
             }
             ExitStatus::Error.into()
         }
diff --git a/crates/ruff_cli/src/commands/run.rs b/crates/ruff_cli/src/commands/run.rs
index 61cdf6bf7a..fba477192d 100644
--- a/crates/ruff_cli/src/commands/run.rs
+++ b/crates/ruff_cli/src/commands/run.rs
@@ -125,7 +125,7 @@ pub(crate) fn run(
                         (Some(path.to_owned()), {
                             let mut error = e.to_string();
                             for cause in e.chain() {
-                                write!(&mut error, "\n  Caused by: {cause}").unwrap();
+                                write!(&mut error, "\n  Cause: {cause}").unwrap();
                             }
                             error
                         })
@@ -143,30 +143,30 @@ pub(crate) fn run(
             }
             .unwrap_or_else(|(path, message)| {
                 if let Some(path) = &path {
-                    error!(
-                        "{}{}{} {message}",
-                        "Failed to lint ".bold(),
-                        fs::relativize_path(path).bold(),
-                        ":".bold()
-                    );
                     let settings = resolver.resolve(path, pyproject_config);
                     if settings.rules.enabled(Rule::IOError) {
-                        let file =
+                        let dummy =
                             SourceFileBuilder::new(path.to_string_lossy().as_ref(), "").finish();
 
                         Diagnostics::new(
                             vec![Message::from_diagnostic(
                                 Diagnostic::new(IOError { message }, TextRange::default()),
-                                file,
+                                dummy,
                                 TextSize::default(),
                             )],
                             ImportMap::default(),
                         )
                     } else {
+                        warn!(
+                            "{}{}{} {message}",
+                            "Failed to lint ".bold(),
+                            fs::relativize_path(path).bold(),
+                            ":".bold()
+                        );
                         Diagnostics::default()
                     }
                 } else {
-                    error!("{} {message}", "Encountered error:".bold());
+                    warn!("{} {message}", "Encountered error:".bold());
                     Diagnostics::default()
                 }
             })
@@ -226,3 +226,85 @@ with the relevant file contents, the `pyproject.toml` settings, and the followin
         }
     }
 }
+
+#[cfg(test)]
+#[cfg(unix)]
+mod test {
+    use super::run;
+    use crate::args::Overrides;
+    use anyhow::Result;
+    use ruff::message::{Emitter, EmitterContext, TextEmitter};
+    use ruff::registry::Rule;
+    use ruff::resolver::{PyprojectConfig, PyprojectDiscoveryStrategy};
+    use ruff::settings::{flags, AllSettings, CliSettings, Settings};
+    use rustc_hash::FxHashMap;
+    use std::fs;
+    use std::os::unix::fs::OpenOptionsExt;
+    use tempfile::TempDir;
+
+    /// We check that regular python files, pyproject.toml and jupyter notebooks all handle io
+    /// errors gracefully
+    #[test]
+    fn unreadable_files() -> Result<()> {
+        let path = "E902.py";
+        let rule_code = Rule::IOError;
+
+        // Create inaccessible files
+        let tempdir = TempDir::new()?;
+        let pyproject_toml = tempdir.path().join("pyproject.toml");
+        let python_file = tempdir.path().join("code.py");
+        let notebook = tempdir.path().join("notebook.ipynb");
+        for file in [&pyproject_toml, &python_file, &notebook] {
+            fs::OpenOptions::new()
+                .create(true)
+                .write(true)
+                .mode(0o000)
+                .open(file)?;
+        }
+
+        // Configure
+        let snapshot = format!("{}_{}", rule_code.noqa_code(), path);
+        let settings = AllSettings {
+            cli: CliSettings::default(),
+            // invalid pyproject.toml is not active by default
+            lib: Settings::for_rules(vec![rule_code, Rule::InvalidPyprojectToml]),
+        };
+        let pyproject_config =
+            PyprojectConfig::new(PyprojectDiscoveryStrategy::Fixed, settings, None);
+
+        // Run
+        let diagnostics = run(
+            // Notebooks are not included by default
+            &[tempdir.path().to_path_buf(), notebook],
+            &pyproject_config,
+            &Overrides::default(),
+            flags::Cache::Disabled,
+            flags::Noqa::Disabled,
+            flags::FixMode::Generate,
+        )
+        .unwrap();
+        let mut output = Vec::new();
+
+        TextEmitter::default()
+            .with_show_fix_status(true)
+            .emit(
+                &mut output,
+                &diagnostics.messages,
+                &EmitterContext::new(&FxHashMap::default()),
+            )
+            .unwrap();
+
+        let messages = String::from_utf8(output).unwrap();
+
+        insta::with_settings!({
+            omit_expression => true,
+            filters => vec![
+                // The tempdir is always different (and platform dependent)
+                (tempdir.path().to_str().unwrap(), "/home/ferris/project"),
+            ]
+        }, {
+            insta::assert_snapshot!(snapshot, messages);
+        });
+        Ok(())
+    }
+}
diff --git a/crates/ruff_cli/src/commands/snapshots/ruff_cli__commands__run__test__E902_E902.py.snap b/crates/ruff_cli/src/commands/snapshots/ruff_cli__commands__run__test__E902_E902.py.snap
new file mode 100644
index 0000000000..7e0365d8f0
--- /dev/null
+++ b/crates/ruff_cli/src/commands/snapshots/ruff_cli__commands__run__test__E902_E902.py.snap
@@ -0,0 +1,7 @@
+---
+source: crates/ruff_cli/src/commands/run.rs
+---
+/home/ferris/project/code.py:1:1: E902 Permission denied (os error 13)
+/home/ferris/project/notebook.ipynb:1:1: E902 Permission denied (os error 13)
+/home/ferris/project/pyproject.toml:1:1: E902 Permission denied (os error 13)
+
diff --git a/crates/ruff_cli/src/diagnostics.rs b/crates/ruff_cli/src/diagnostics.rs
index 1d058b9b98..fcd0342106 100644
--- a/crates/ruff_cli/src/diagnostics.rs
+++ b/crates/ruff_cli/src/diagnostics.rs
@@ -8,19 +8,21 @@ use std::path::Path;
 
 use anyhow::{anyhow, Result};
 use colored::Colorize;
-use log::{debug, error};
-use ruff_text_size::TextSize;
+use log::{debug, error, warn};
+use ruff_text_size::{TextRange, TextSize};
 use rustc_hash::FxHashMap;
 use similar::TextDiff;
 
-use ruff::fs;
 use ruff::jupyter::Notebook;
 use ruff::linter::{lint_fix, lint_only, FixTable, FixerResult, LinterResult};
 use ruff::logging::DisplayParseError;
 use ruff::message::Message;
 use ruff::pyproject_toml::lint_pyproject_toml;
+use ruff::registry::Rule;
 use ruff::settings::{flags, AllSettings, Settings};
 use ruff::source_kind::SourceKind;
+use ruff::{fs, IOError};
+use ruff_diagnostics::Diagnostic;
 use ruff_python_ast::imports::ImportMap;
 use ruff_python_ast::source_code::{LineIndex, SourceCode, SourceFileBuilder};
 use ruff_python_stdlib::path::{is_jupyter_notebook, is_project_toml};
@@ -127,6 +129,31 @@ pub(crate) fn lint_path(
 
     debug!("Checking: {}", path.display());
 
+    // In case of an io error we want to exit early
+    let io_error_diagnostics = |err: io::Error, path: &Path| -> Diagnostics {
+        if settings.lib.rules.enabled(Rule::IOError) {
+            let io_err = Diagnostic::new(
+                IOError {
+                    message: err.to_string(),
+                },
+                TextRange::default(),
+            );
+            let dummy = SourceFileBuilder::new(path.to_string_lossy().as_ref(), "").finish();
+            Diagnostics::new(
+                vec![Message::from_diagnostic(io_err, dummy, TextSize::default())],
+                ImportMap::default(),
+            )
+        } else {
+            warn!(
+                "{}{}{} {err}",
+                "Failed to lint ".bold(),
+                fs::relativize_path(path).bold(),
+                ":".bold()
+            );
+            Diagnostics::default()
+        }
+    };
+
     // We have to special case this here since the Python tokenizer doesn't work with TOML.
     if is_project_toml(path) {
         let messages = if settings
@@ -135,9 +162,14 @@ pub(crate) fn lint_path(
             .iter_enabled()
             .any(|rule_code| rule_code.lint_source().is_pyproject_toml())
         {
-            let contents = std::fs::read_to_string(path)?;
+            let contents = match std::fs::read_to_string(path) {
+                Ok(contents) => contents,
+                Err(err) => {
+                    return Ok(io_error_diagnostics(err, path));
+                }
+            };
             let source_file = SourceFileBuilder::new(path.to_string_lossy(), contents).finish();
-            lint_pyproject_toml(source_file, &settings.lib)?
+            lint_pyproject_toml(source_file, &settings.lib)
         } else {
             vec![]
         };
@@ -154,7 +186,14 @@ pub(crate) fn lint_path(
             Err(diagnostic) => return Ok(*diagnostic),
         }
     } else {
-        SourceKind::Python(std::fs::read_to_string(path)?)
+        // This is tested by ruff_cli integration test `unreadable_file`
+        let contents = match std::fs::read_to_string(path) {
+            Ok(contents) => contents,
+            Err(err) => {
+                return Ok(io_error_diagnostics(err, path));
+            }
+        };
+        SourceKind::Python(contents)
     };
 
     let contents = source_kind.content().to_string();
diff --git a/crates/ruff_cli/tests/integration_test.rs b/crates/ruff_cli/tests/integration_test.rs
index 674b2fd5a2..f9126328b7 100644
--- a/crates/ruff_cli/tests/integration_test.rs
+++ b/crates/ruff_cli/tests/integration_test.rs
@@ -1,13 +1,28 @@
 #![cfg(not(target_family = "wasm"))]
 
+#[cfg(unix)]
+use std::fs;
+#[cfg(unix)]
+use std::fs::Permissions;
+#[cfg(unix)]
+use std::os::unix::fs::{OpenOptionsExt, PermissionsExt};
 #[cfg(unix)]
 use std::path::Path;
 use std::str;
 
+#[cfg(unix)]
+use anyhow::Context;
 use anyhow::Result;
 use assert_cmd::Command;
 #[cfg(unix)]
+use clap::Parser;
+#[cfg(unix)]
 use path_absolutize::path_dedot;
+#[cfg(unix)]
+use tempfile::TempDir;
+
+use ruff_cli::args::Args;
+use ruff_cli::run;
 
 const BIN_NAME: &str = "ruff";
 
@@ -278,3 +293,55 @@ Found 1 error.
 
     Ok(())
 }
+
+/// An unreadable pyproject.toml in non-isolated mode causes ruff to hard-error trying to build up
+/// configuration globs
+#[cfg(unix)]
+#[test]
+fn unreadable_pyproject_toml() -> Result<()> {
+    let tempdir = TempDir::new()?;
+    let pyproject_toml = tempdir.path().join("pyproject.toml");
+    // Create an empty file with 000 permissions
+    fs::OpenOptions::new()
+        .create(true)
+        .write(true)
+        .mode(0o000)
+        .open(pyproject_toml)?;
+
+    // Don't `--isolated` since the configuration discovery is where the error happens
+    let args = Args::parse_from(["", "check", "--no-cache", tempdir.path().to_str().unwrap()]);
+    let err = run(args).err().context("Unexpected success")?;
+    assert_eq!(
+        err.chain()
+            .map(std::string::ToString::to_string)
+            .collect::<Vec<_>>(),
+        vec!["Permission denied (os error 13)".to_string()],
+    );
+    Ok(())
+}
+
+/// Check the output with an unreadable directory
+#[cfg(unix)]
+#[test]
+fn unreadable_dir() -> Result<()> {
+    // Create a directory with 000 (not iterable/readable) permissions
+    let tempdir = TempDir::new()?;
+    let unreadable_dir = tempdir.path().join("unreadable_dir");
+    fs::create_dir(&unreadable_dir)?;
+    fs::set_permissions(&unreadable_dir, Permissions::from_mode(0o000))?;
+
+    // We (currently?) have to use a subcommand to check exit status (currently wrong) and logging
+    // output
+    let mut cmd = Command::cargo_bin(BIN_NAME)?;
+    let output = cmd
+        .args(["--no-cache", "--isolated"])
+        .arg(&unreadable_dir)
+        .assert()
+        // TODO(konstin): This should be a failure, but we currently can't track that
+        .success();
+    assert_eq!(
+        str::from_utf8(&output.get_output().stderr)?,
+        "warning: Encountered error: Permission denied (os error 13)\n"
+    );
+    Ok(())
+}
diff --git a/crates/ruff_shrinking/src/main.rs b/crates/ruff_shrinking/src/main.rs
index 24e8d509f4..249b444ce1 100644
--- a/crates/ruff_shrinking/src/main.rs
+++ b/crates/ruff_shrinking/src/main.rs
@@ -429,7 +429,7 @@ fn main() -> ExitCode {
     if let Err(e) = run() {
         eprintln!("💥 Minimizer failed");
         for cause in e.chain() {
-            eprintln!("  Caused by: {cause}");
+            eprintln!("  Cause: {cause}");
         }
         ExitCode::FAILURE
     } else {