Change `pandas-use-of-dot-read-table` rule to emit only when `read_table` is used on CSV data (#5807)

## Summary

Closes #5628 by only emitting if `sep=","`. Includes documentation
(completes the `pandas-vet` ruleset).

Related to #2646.

## Test Plan

`cargo test`
This commit is contained in:
Tom Kuson 2023-07-17 02:25:13 +01:00 committed by GitHub
parent 2cd117ba81
commit ae431df146
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 147 additions and 61 deletions

View File

@ -0,0 +1,20 @@
import pandas as pd
# Errors.
df = pd.read_table("data.csv", sep=",")
df = pd.read_table("data.csv", sep=",", header=0)
filename = "data.csv"
df = pd.read_table(filename, sep=",")
df = pd.read_table(filename, sep=",", header=0)
# Non-errors.
df = pd.read_csv("data.csv")
df = pd.read_table("data.tsv")
df = pd.read_table("data.tsv", sep="\t")
df = pd.read_table("data.tsv", sep=",,")
df = pd.read_table("data.tsv", sep=", ")
df = pd.read_table("data.tsv", sep=" ,")
df = pd.read_table("data.tsv", sep=" , ")
not_pd.read_table("data.csv", sep=",")
data = read_table("data.csv", sep=",")
data = read_table

View File

@ -2788,7 +2788,9 @@ where
pandas_vet::rules::inplace_argument(self, expr, func, args, keywords); pandas_vet::rules::inplace_argument(self, expr, func, args, keywords);
} }
pandas_vet::rules::call(self, func); pandas_vet::rules::call(self, func);
if self.enabled(Rule::PandasUseOfDotReadTable) {
pandas_vet::rules::use_of_read_table(self, func, keywords);
}
if self.enabled(Rule::PandasUseOfPdMerge) { if self.enabled(Rule::PandasUseOfPdMerge) {
pandas_vet::rules::use_of_pd_merge(self, func); pandas_vet::rules::use_of_pd_merge(self, func);
} }

View File

@ -237,27 +237,6 @@ mod tests {
"#, "#,
"PD011_pass_node_name" "PD011_pass_node_name"
)] )]
#[test_case(
r#"
import pandas as pd
employees = pd.read_csv(input_file)
"#,
"PD012_pass_read_csv"
)]
#[test_case(
r#"
import pandas as pd
employees = pd.read_table(input_file)
"#,
"PD012_fail_read_table"
)]
#[test_case(
r#"
import pandas as pd
employees = read_table
"#,
"PD012_node_Name_pass"
)]
#[test_case( #[test_case(
r#" r#"
import pandas as pd import pandas as pd
@ -360,6 +339,10 @@ mod tests {
assert_messages!(snapshot, diagnostics); assert_messages!(snapshot, diagnostics);
} }
#[test_case(
Rule::PandasUseOfDotReadTable,
Path::new("pandas_use_of_dot_read_table.py")
)]
#[test_case(Rule::PandasUseOfInplaceArgument, Path::new("PD002.py"))] #[test_case(Rule::PandasUseOfInplaceArgument, Path::new("PD002.py"))]
fn paths(rule_code: Rule, path: &Path) -> Result<()> { fn paths(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());

View File

@ -126,18 +126,6 @@ impl Violation for PandasUseOfDotPivotOrUnstack {
} }
} }
// TODO(tjkuson): Add documentation for this rule once clarified.
// https://github.com/astral-sh/ruff/issues/5628
#[violation]
pub struct PandasUseOfDotReadTable;
impl Violation for PandasUseOfDotReadTable {
#[derive_message_formats]
fn message(&self) -> String {
format!("`.read_csv` is preferred to `.read_table`; provides same functionality")
}
}
/// ## What it does /// ## What it does
/// Checks for uses of `.stack` on Pandas objects. /// Checks for uses of `.stack` on Pandas objects.
/// ///
@ -193,14 +181,6 @@ pub(crate) fn call(checker: &mut Checker, func: &Expr) {
{ {
PandasUseOfDotPivotOrUnstack.into() PandasUseOfDotPivotOrUnstack.into()
} }
"read_table"
if checker
.settings
.rules
.enabled(Rule::PandasUseOfDotReadTable) =>
{
PandasUseOfDotReadTable.into()
}
"stack" if checker.settings.rules.enabled(Rule::PandasUseOfDotStack) => { "stack" if checker.settings.rules.enabled(Rule::PandasUseOfDotStack) => {
PandasUseOfDotStack.into() PandasUseOfDotStack.into()
} }

View File

@ -3,6 +3,7 @@ pub(crate) use attr::*;
pub(crate) use call::*; pub(crate) use call::*;
pub(crate) use inplace_argument::*; pub(crate) use inplace_argument::*;
pub(crate) use pd_merge::*; pub(crate) use pd_merge::*;
pub(crate) use read_table::*;
pub(crate) use subscript::*; pub(crate) use subscript::*;
pub(crate) mod assignment_to_df; pub(crate) mod assignment_to_df;
@ -10,4 +11,5 @@ pub(crate) mod attr;
pub(crate) mod call; pub(crate) mod call;
pub(crate) mod inplace_argument; pub(crate) mod inplace_argument;
pub(crate) mod pd_merge; pub(crate) mod pd_merge;
pub(crate) mod read_table;
pub(crate) mod subscript; pub(crate) mod subscript;

View File

@ -0,0 +1,76 @@
use rustpython_parser::ast;
use rustpython_parser::ast::{Constant, Expr, Keyword, Ranged};
use ruff_diagnostics::{Diagnostic, Violation};
use ruff_macros::{derive_message_formats, violation};
use crate::checkers::ast::Checker;
/// ## What it does
/// Checks for uses of `pd.read_table` to read CSV files.
///
/// ## Why is this bad?
/// In the Pandas API, `pd.read_csv` and `pd.read_table` are equivalent apart
/// from their default separator: `pd.read_csv` defaults to a comma (`,`),
/// while `pd.read_table` defaults to a tab (`\t`) as the default separator.
///
/// Prefer `pd.read_csv` over `pd.read_table` when reading comma-separated
/// data (like CSV files), as it is more idiomatic.
///
/// ## Example
/// ```python
/// import pandas as pd
///
/// cities_df = pd.read_table("cities.csv", sep=",")
/// ```
///
/// Use instead:
/// ```python
/// import pandas as pd
///
/// cities_df = pd.read_csv("cities.csv")
/// ```
///
/// ## References
/// - [Pandas documentation: `read_csv`](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html#pandas.read_csv)
/// - [Pandas documentation: `read_table`](https://pandas.pydata.org/docs/reference/api/pandas.read_table.html#pandas.read_table)
#[violation]
pub struct PandasUseOfDotReadTable;
impl Violation for PandasUseOfDotReadTable {
#[derive_message_formats]
fn message(&self) -> String {
format!("Use `.read_csv` instead of `.read_table` to read CSV files")
}
}
/// PD012
pub(crate) fn use_of_read_table(checker: &mut Checker, func: &Expr, keywords: &[Keyword]) {
if checker
.semantic()
.resolve_call_path(func)
.map_or(false, |call_path| {
matches!(call_path.as_slice(), ["pandas", "read_table"])
})
{
if let Some(Expr::Constant(ast::ExprConstant {
value: Constant::Str(value),
..
})) = keywords
.iter()
.find(|keyword| {
keyword
.arg
.as_ref()
.map_or(false, |keyword| keyword.as_str() == "sep")
})
.map(|keyword| &keyword.value)
{
if value.as_str() == "," {
checker
.diagnostics
.push(Diagnostic::new(PandasUseOfDotReadTable, func.range()));
}
}
}
}

View File

@ -1,11 +0,0 @@
---
source: crates/ruff/src/rules/pandas_vet/mod.rs
---
<filename>:3:13: PD012 `.read_csv` is preferred to `.read_table`; provides same functionality
|
2 | import pandas as pd
3 | employees = pd.read_table(input_file)
| ^^^^^^^^^^^^^ PD012
|

View File

@ -1,4 +0,0 @@
---
source: crates/ruff/src/rules/pandas_vet/mod.rs
---

View File

@ -0,0 +1,42 @@
---
source: crates/ruff/src/rules/pandas_vet/mod.rs
---
pandas_use_of_dot_read_table.py:4:6: PD012 Use `.read_csv` instead of `.read_table` to read CSV files
|
3 | # Errors.
4 | df = pd.read_table("data.csv", sep=",")
| ^^^^^^^^^^^^^ PD012
5 | df = pd.read_table("data.csv", sep=",", header=0)
6 | filename = "data.csv"
|
pandas_use_of_dot_read_table.py:5:6: PD012 Use `.read_csv` instead of `.read_table` to read CSV files
|
3 | # Errors.
4 | df = pd.read_table("data.csv", sep=",")
5 | df = pd.read_table("data.csv", sep=",", header=0)
| ^^^^^^^^^^^^^ PD012
6 | filename = "data.csv"
7 | df = pd.read_table(filename, sep=",")
|
pandas_use_of_dot_read_table.py:7:6: PD012 Use `.read_csv` instead of `.read_table` to read CSV files
|
5 | df = pd.read_table("data.csv", sep=",", header=0)
6 | filename = "data.csv"
7 | df = pd.read_table(filename, sep=",")
| ^^^^^^^^^^^^^ PD012
8 | df = pd.read_table(filename, sep=",", header=0)
|
pandas_use_of_dot_read_table.py:8:6: PD012 Use `.read_csv` instead of `.read_table` to read CSV files
|
6 | filename = "data.csv"
7 | df = pd.read_table(filename, sep=",")
8 | df = pd.read_table(filename, sep=",", header=0)
| ^^^^^^^^^^^^^ PD012
9 |
10 | # Non-errors.
|

View File

@ -1,4 +0,0 @@
---
source: crates/ruff/src/rules/pandas_vet/mod.rs
---