diff --git a/crates/ruff/resources/test/fixtures/pandas_vet/pandas_use_of_dot_read_table.py b/crates/ruff/resources/test/fixtures/pandas_vet/pandas_use_of_dot_read_table.py new file mode 100644 index 0000000000..a0e48e223e --- /dev/null +++ b/crates/ruff/resources/test/fixtures/pandas_vet/pandas_use_of_dot_read_table.py @@ -0,0 +1,20 @@ +import pandas as pd + +# Errors. +df = pd.read_table("data.csv", sep=",") +df = pd.read_table("data.csv", sep=",", header=0) +filename = "data.csv" +df = pd.read_table(filename, sep=",") +df = pd.read_table(filename, sep=",", header=0) + +# Non-errors. +df = pd.read_csv("data.csv") +df = pd.read_table("data.tsv") +df = pd.read_table("data.tsv", sep="\t") +df = pd.read_table("data.tsv", sep=",,") +df = pd.read_table("data.tsv", sep=", ") +df = pd.read_table("data.tsv", sep=" ,") +df = pd.read_table("data.tsv", sep=" , ") +not_pd.read_table("data.csv", sep=",") +data = read_table("data.csv", sep=",") +data = read_table diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs index a974ff14ec..3c6e1b2b9e 100644 --- a/crates/ruff/src/checkers/ast/mod.rs +++ b/crates/ruff/src/checkers/ast/mod.rs @@ -2788,7 +2788,9 @@ where pandas_vet::rules::inplace_argument(self, expr, func, args, keywords); } pandas_vet::rules::call(self, func); - + if self.enabled(Rule::PandasUseOfDotReadTable) { + pandas_vet::rules::use_of_read_table(self, func, keywords); + } if self.enabled(Rule::PandasUseOfPdMerge) { pandas_vet::rules::use_of_pd_merge(self, func); } diff --git a/crates/ruff/src/rules/pandas_vet/mod.rs b/crates/ruff/src/rules/pandas_vet/mod.rs index 295a83cd24..6319a0f5cb 100644 --- a/crates/ruff/src/rules/pandas_vet/mod.rs +++ b/crates/ruff/src/rules/pandas_vet/mod.rs @@ -237,27 +237,6 @@ mod tests { "#, "PD011_pass_node_name" )] - #[test_case( - r#" - import pandas as pd - employees = pd.read_csv(input_file) - "#, - "PD012_pass_read_csv" - )] - #[test_case( - r#" - import pandas as pd - employees = pd.read_table(input_file) - "#, - "PD012_fail_read_table" - )] - #[test_case( - r#" - import pandas as pd - employees = read_table - "#, - "PD012_node_Name_pass" - )] #[test_case( r#" import pandas as pd @@ -360,6 +339,10 @@ mod tests { assert_messages!(snapshot, diagnostics); } + #[test_case( + Rule::PandasUseOfDotReadTable, + Path::new("pandas_use_of_dot_read_table.py") + )] #[test_case(Rule::PandasUseOfInplaceArgument, Path::new("PD002.py"))] fn paths(rule_code: Rule, path: &Path) -> Result<()> { let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); diff --git a/crates/ruff/src/rules/pandas_vet/rules/call.rs b/crates/ruff/src/rules/pandas_vet/rules/call.rs index 74bf8de249..20f1ae873d 100644 --- a/crates/ruff/src/rules/pandas_vet/rules/call.rs +++ b/crates/ruff/src/rules/pandas_vet/rules/call.rs @@ -126,18 +126,6 @@ impl Violation for PandasUseOfDotPivotOrUnstack { } } -// TODO(tjkuson): Add documentation for this rule once clarified. -// https://github.com/astral-sh/ruff/issues/5628 -#[violation] -pub struct PandasUseOfDotReadTable; - -impl Violation for PandasUseOfDotReadTable { - #[derive_message_formats] - fn message(&self) -> String { - format!("`.read_csv` is preferred to `.read_table`; provides same functionality") - } -} - /// ## What it does /// Checks for uses of `.stack` on Pandas objects. /// @@ -193,14 +181,6 @@ pub(crate) fn call(checker: &mut Checker, func: &Expr) { { PandasUseOfDotPivotOrUnstack.into() } - "read_table" - if checker - .settings - .rules - .enabled(Rule::PandasUseOfDotReadTable) => - { - PandasUseOfDotReadTable.into() - } "stack" if checker.settings.rules.enabled(Rule::PandasUseOfDotStack) => { PandasUseOfDotStack.into() } diff --git a/crates/ruff/src/rules/pandas_vet/rules/mod.rs b/crates/ruff/src/rules/pandas_vet/rules/mod.rs index a0c1ef0908..0a52e18783 100644 --- a/crates/ruff/src/rules/pandas_vet/rules/mod.rs +++ b/crates/ruff/src/rules/pandas_vet/rules/mod.rs @@ -3,6 +3,7 @@ pub(crate) use attr::*; pub(crate) use call::*; pub(crate) use inplace_argument::*; pub(crate) use pd_merge::*; +pub(crate) use read_table::*; pub(crate) use subscript::*; pub(crate) mod assignment_to_df; @@ -10,4 +11,5 @@ pub(crate) mod attr; pub(crate) mod call; pub(crate) mod inplace_argument; pub(crate) mod pd_merge; +pub(crate) mod read_table; pub(crate) mod subscript; diff --git a/crates/ruff/src/rules/pandas_vet/rules/read_table.rs b/crates/ruff/src/rules/pandas_vet/rules/read_table.rs new file mode 100644 index 0000000000..a6b6839d20 --- /dev/null +++ b/crates/ruff/src/rules/pandas_vet/rules/read_table.rs @@ -0,0 +1,76 @@ +use rustpython_parser::ast; +use rustpython_parser::ast::{Constant, Expr, Keyword, Ranged}; + +use ruff_diagnostics::{Diagnostic, Violation}; +use ruff_macros::{derive_message_formats, violation}; + +use crate::checkers::ast::Checker; + +/// ## What it does +/// Checks for uses of `pd.read_table` to read CSV files. +/// +/// ## Why is this bad? +/// In the Pandas API, `pd.read_csv` and `pd.read_table` are equivalent apart +/// from their default separator: `pd.read_csv` defaults to a comma (`,`), +/// while `pd.read_table` defaults to a tab (`\t`) as the default separator. +/// +/// Prefer `pd.read_csv` over `pd.read_table` when reading comma-separated +/// data (like CSV files), as it is more idiomatic. +/// +/// ## Example +/// ```python +/// import pandas as pd +/// +/// cities_df = pd.read_table("cities.csv", sep=",") +/// ``` +/// +/// Use instead: +/// ```python +/// import pandas as pd +/// +/// cities_df = pd.read_csv("cities.csv") +/// ``` +/// +/// ## References +/// - [Pandas documentation: `read_csv`](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html#pandas.read_csv) +/// - [Pandas documentation: `read_table`](https://pandas.pydata.org/docs/reference/api/pandas.read_table.html#pandas.read_table) +#[violation] +pub struct PandasUseOfDotReadTable; + +impl Violation for PandasUseOfDotReadTable { + #[derive_message_formats] + fn message(&self) -> String { + format!("Use `.read_csv` instead of `.read_table` to read CSV files") + } +} + +/// PD012 +pub(crate) fn use_of_read_table(checker: &mut Checker, func: &Expr, keywords: &[Keyword]) { + if checker + .semantic() + .resolve_call_path(func) + .map_or(false, |call_path| { + matches!(call_path.as_slice(), ["pandas", "read_table"]) + }) + { + if let Some(Expr::Constant(ast::ExprConstant { + value: Constant::Str(value), + .. + })) = keywords + .iter() + .find(|keyword| { + keyword + .arg + .as_ref() + .map_or(false, |keyword| keyword.as_str() == "sep") + }) + .map(|keyword| &keyword.value) + { + if value.as_str() == "," { + checker + .diagnostics + .push(Diagnostic::new(PandasUseOfDotReadTable, func.range())); + } + } + } +} diff --git a/crates/ruff/src/rules/pandas_vet/snapshots/ruff__rules__pandas_vet__tests__PD012_fail_read_table.snap b/crates/ruff/src/rules/pandas_vet/snapshots/ruff__rules__pandas_vet__tests__PD012_fail_read_table.snap deleted file mode 100644 index fb6b1dc9b1..0000000000 --- a/crates/ruff/src/rules/pandas_vet/snapshots/ruff__rules__pandas_vet__tests__PD012_fail_read_table.snap +++ /dev/null @@ -1,11 +0,0 @@ ---- -source: crates/ruff/src/rules/pandas_vet/mod.rs ---- -:3:13: PD012 `.read_csv` is preferred to `.read_table`; provides same functionality - | -2 | import pandas as pd -3 | employees = pd.read_table(input_file) - | ^^^^^^^^^^^^^ PD012 - | - - diff --git a/crates/ruff/src/rules/pandas_vet/snapshots/ruff__rules__pandas_vet__tests__PD012_node_Name_pass.snap b/crates/ruff/src/rules/pandas_vet/snapshots/ruff__rules__pandas_vet__tests__PD012_node_Name_pass.snap deleted file mode 100644 index cee5b2f842..0000000000 --- a/crates/ruff/src/rules/pandas_vet/snapshots/ruff__rules__pandas_vet__tests__PD012_node_Name_pass.snap +++ /dev/null @@ -1,4 +0,0 @@ ---- -source: crates/ruff/src/rules/pandas_vet/mod.rs ---- - diff --git a/crates/ruff/src/rules/pandas_vet/snapshots/ruff__rules__pandas_vet__tests__PD012_pandas_use_of_dot_read_table.py.snap b/crates/ruff/src/rules/pandas_vet/snapshots/ruff__rules__pandas_vet__tests__PD012_pandas_use_of_dot_read_table.py.snap new file mode 100644 index 0000000000..06fcb65b1e --- /dev/null +++ b/crates/ruff/src/rules/pandas_vet/snapshots/ruff__rules__pandas_vet__tests__PD012_pandas_use_of_dot_read_table.py.snap @@ -0,0 +1,42 @@ +--- +source: crates/ruff/src/rules/pandas_vet/mod.rs +--- +pandas_use_of_dot_read_table.py:4:6: PD012 Use `.read_csv` instead of `.read_table` to read CSV files + | +3 | # Errors. +4 | df = pd.read_table("data.csv", sep=",") + | ^^^^^^^^^^^^^ PD012 +5 | df = pd.read_table("data.csv", sep=",", header=0) +6 | filename = "data.csv" + | + +pandas_use_of_dot_read_table.py:5:6: PD012 Use `.read_csv` instead of `.read_table` to read CSV files + | +3 | # Errors. +4 | df = pd.read_table("data.csv", sep=",") +5 | df = pd.read_table("data.csv", sep=",", header=0) + | ^^^^^^^^^^^^^ PD012 +6 | filename = "data.csv" +7 | df = pd.read_table(filename, sep=",") + | + +pandas_use_of_dot_read_table.py:7:6: PD012 Use `.read_csv` instead of `.read_table` to read CSV files + | +5 | df = pd.read_table("data.csv", sep=",", header=0) +6 | filename = "data.csv" +7 | df = pd.read_table(filename, sep=",") + | ^^^^^^^^^^^^^ PD012 +8 | df = pd.read_table(filename, sep=",", header=0) + | + +pandas_use_of_dot_read_table.py:8:6: PD012 Use `.read_csv` instead of `.read_table` to read CSV files + | + 6 | filename = "data.csv" + 7 | df = pd.read_table(filename, sep=",") + 8 | df = pd.read_table(filename, sep=",", header=0) + | ^^^^^^^^^^^^^ PD012 + 9 | +10 | # Non-errors. + | + + diff --git a/crates/ruff/src/rules/pandas_vet/snapshots/ruff__rules__pandas_vet__tests__PD012_pass_read_csv.snap b/crates/ruff/src/rules/pandas_vet/snapshots/ruff__rules__pandas_vet__tests__PD012_pass_read_csv.snap deleted file mode 100644 index cee5b2f842..0000000000 --- a/crates/ruff/src/rules/pandas_vet/snapshots/ruff__rules__pandas_vet__tests__PD012_pass_read_csv.snap +++ /dev/null @@ -1,4 +0,0 @@ ---- -source: crates/ruff/src/rules/pandas_vet/mod.rs ---- -