mirror of https://github.com/astral-sh/ruff
Change `pandas-use-of-dot-read-table` rule to emit only when `read_table` is used on CSV data (#5807)
## Summary Closes #5628 by only emitting if `sep=","`. Includes documentation (completes the `pandas-vet` ruleset). Related to #2646. ## Test Plan `cargo test`
This commit is contained in:
parent
2cd117ba81
commit
ae431df146
20
crates/ruff/resources/test/fixtures/pandas_vet/pandas_use_of_dot_read_table.py
vendored
Normal file
20
crates/ruff/resources/test/fixtures/pandas_vet/pandas_use_of_dot_read_table.py
vendored
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# Errors.
|
||||||
|
df = pd.read_table("data.csv", sep=",")
|
||||||
|
df = pd.read_table("data.csv", sep=",", header=0)
|
||||||
|
filename = "data.csv"
|
||||||
|
df = pd.read_table(filename, sep=",")
|
||||||
|
df = pd.read_table(filename, sep=",", header=0)
|
||||||
|
|
||||||
|
# Non-errors.
|
||||||
|
df = pd.read_csv("data.csv")
|
||||||
|
df = pd.read_table("data.tsv")
|
||||||
|
df = pd.read_table("data.tsv", sep="\t")
|
||||||
|
df = pd.read_table("data.tsv", sep=",,")
|
||||||
|
df = pd.read_table("data.tsv", sep=", ")
|
||||||
|
df = pd.read_table("data.tsv", sep=" ,")
|
||||||
|
df = pd.read_table("data.tsv", sep=" , ")
|
||||||
|
not_pd.read_table("data.csv", sep=",")
|
||||||
|
data = read_table("data.csv", sep=",")
|
||||||
|
data = read_table
|
||||||
|
|
@ -2788,7 +2788,9 @@ where
|
||||||
pandas_vet::rules::inplace_argument(self, expr, func, args, keywords);
|
pandas_vet::rules::inplace_argument(self, expr, func, args, keywords);
|
||||||
}
|
}
|
||||||
pandas_vet::rules::call(self, func);
|
pandas_vet::rules::call(self, func);
|
||||||
|
if self.enabled(Rule::PandasUseOfDotReadTable) {
|
||||||
|
pandas_vet::rules::use_of_read_table(self, func, keywords);
|
||||||
|
}
|
||||||
if self.enabled(Rule::PandasUseOfPdMerge) {
|
if self.enabled(Rule::PandasUseOfPdMerge) {
|
||||||
pandas_vet::rules::use_of_pd_merge(self, func);
|
pandas_vet::rules::use_of_pd_merge(self, func);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -237,27 +237,6 @@ mod tests {
|
||||||
"#,
|
"#,
|
||||||
"PD011_pass_node_name"
|
"PD011_pass_node_name"
|
||||||
)]
|
)]
|
||||||
#[test_case(
|
|
||||||
r#"
|
|
||||||
import pandas as pd
|
|
||||||
employees = pd.read_csv(input_file)
|
|
||||||
"#,
|
|
||||||
"PD012_pass_read_csv"
|
|
||||||
)]
|
|
||||||
#[test_case(
|
|
||||||
r#"
|
|
||||||
import pandas as pd
|
|
||||||
employees = pd.read_table(input_file)
|
|
||||||
"#,
|
|
||||||
"PD012_fail_read_table"
|
|
||||||
)]
|
|
||||||
#[test_case(
|
|
||||||
r#"
|
|
||||||
import pandas as pd
|
|
||||||
employees = read_table
|
|
||||||
"#,
|
|
||||||
"PD012_node_Name_pass"
|
|
||||||
)]
|
|
||||||
#[test_case(
|
#[test_case(
|
||||||
r#"
|
r#"
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
@ -360,6 +339,10 @@ mod tests {
|
||||||
assert_messages!(snapshot, diagnostics);
|
assert_messages!(snapshot, diagnostics);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test_case(
|
||||||
|
Rule::PandasUseOfDotReadTable,
|
||||||
|
Path::new("pandas_use_of_dot_read_table.py")
|
||||||
|
)]
|
||||||
#[test_case(Rule::PandasUseOfInplaceArgument, Path::new("PD002.py"))]
|
#[test_case(Rule::PandasUseOfInplaceArgument, Path::new("PD002.py"))]
|
||||||
fn paths(rule_code: Rule, path: &Path) -> Result<()> {
|
fn paths(rule_code: Rule, path: &Path) -> Result<()> {
|
||||||
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
|
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
|
||||||
|
|
|
||||||
|
|
@ -126,18 +126,6 @@ impl Violation for PandasUseOfDotPivotOrUnstack {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(tjkuson): Add documentation for this rule once clarified.
|
|
||||||
// https://github.com/astral-sh/ruff/issues/5628
|
|
||||||
#[violation]
|
|
||||||
pub struct PandasUseOfDotReadTable;
|
|
||||||
|
|
||||||
impl Violation for PandasUseOfDotReadTable {
|
|
||||||
#[derive_message_formats]
|
|
||||||
fn message(&self) -> String {
|
|
||||||
format!("`.read_csv` is preferred to `.read_table`; provides same functionality")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// ## What it does
|
/// ## What it does
|
||||||
/// Checks for uses of `.stack` on Pandas objects.
|
/// Checks for uses of `.stack` on Pandas objects.
|
||||||
///
|
///
|
||||||
|
|
@ -193,14 +181,6 @@ pub(crate) fn call(checker: &mut Checker, func: &Expr) {
|
||||||
{
|
{
|
||||||
PandasUseOfDotPivotOrUnstack.into()
|
PandasUseOfDotPivotOrUnstack.into()
|
||||||
}
|
}
|
||||||
"read_table"
|
|
||||||
if checker
|
|
||||||
.settings
|
|
||||||
.rules
|
|
||||||
.enabled(Rule::PandasUseOfDotReadTable) =>
|
|
||||||
{
|
|
||||||
PandasUseOfDotReadTable.into()
|
|
||||||
}
|
|
||||||
"stack" if checker.settings.rules.enabled(Rule::PandasUseOfDotStack) => {
|
"stack" if checker.settings.rules.enabled(Rule::PandasUseOfDotStack) => {
|
||||||
PandasUseOfDotStack.into()
|
PandasUseOfDotStack.into()
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ pub(crate) use attr::*;
|
||||||
pub(crate) use call::*;
|
pub(crate) use call::*;
|
||||||
pub(crate) use inplace_argument::*;
|
pub(crate) use inplace_argument::*;
|
||||||
pub(crate) use pd_merge::*;
|
pub(crate) use pd_merge::*;
|
||||||
|
pub(crate) use read_table::*;
|
||||||
pub(crate) use subscript::*;
|
pub(crate) use subscript::*;
|
||||||
|
|
||||||
pub(crate) mod assignment_to_df;
|
pub(crate) mod assignment_to_df;
|
||||||
|
|
@ -10,4 +11,5 @@ pub(crate) mod attr;
|
||||||
pub(crate) mod call;
|
pub(crate) mod call;
|
||||||
pub(crate) mod inplace_argument;
|
pub(crate) mod inplace_argument;
|
||||||
pub(crate) mod pd_merge;
|
pub(crate) mod pd_merge;
|
||||||
|
pub(crate) mod read_table;
|
||||||
pub(crate) mod subscript;
|
pub(crate) mod subscript;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,76 @@
|
||||||
|
use rustpython_parser::ast;
|
||||||
|
use rustpython_parser::ast::{Constant, Expr, Keyword, Ranged};
|
||||||
|
|
||||||
|
use ruff_diagnostics::{Diagnostic, Violation};
|
||||||
|
use ruff_macros::{derive_message_formats, violation};
|
||||||
|
|
||||||
|
use crate::checkers::ast::Checker;
|
||||||
|
|
||||||
|
/// ## What it does
|
||||||
|
/// Checks for uses of `pd.read_table` to read CSV files.
|
||||||
|
///
|
||||||
|
/// ## Why is this bad?
|
||||||
|
/// In the Pandas API, `pd.read_csv` and `pd.read_table` are equivalent apart
|
||||||
|
/// from their default separator: `pd.read_csv` defaults to a comma (`,`),
|
||||||
|
/// while `pd.read_table` defaults to a tab (`\t`) as the default separator.
|
||||||
|
///
|
||||||
|
/// Prefer `pd.read_csv` over `pd.read_table` when reading comma-separated
|
||||||
|
/// data (like CSV files), as it is more idiomatic.
|
||||||
|
///
|
||||||
|
/// ## Example
|
||||||
|
/// ```python
|
||||||
|
/// import pandas as pd
|
||||||
|
///
|
||||||
|
/// cities_df = pd.read_table("cities.csv", sep=",")
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// Use instead:
|
||||||
|
/// ```python
|
||||||
|
/// import pandas as pd
|
||||||
|
///
|
||||||
|
/// cities_df = pd.read_csv("cities.csv")
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// ## References
|
||||||
|
/// - [Pandas documentation: `read_csv`](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html#pandas.read_csv)
|
||||||
|
/// - [Pandas documentation: `read_table`](https://pandas.pydata.org/docs/reference/api/pandas.read_table.html#pandas.read_table)
|
||||||
|
#[violation]
|
||||||
|
pub struct PandasUseOfDotReadTable;
|
||||||
|
|
||||||
|
impl Violation for PandasUseOfDotReadTable {
|
||||||
|
#[derive_message_formats]
|
||||||
|
fn message(&self) -> String {
|
||||||
|
format!("Use `.read_csv` instead of `.read_table` to read CSV files")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// PD012
|
||||||
|
pub(crate) fn use_of_read_table(checker: &mut Checker, func: &Expr, keywords: &[Keyword]) {
|
||||||
|
if checker
|
||||||
|
.semantic()
|
||||||
|
.resolve_call_path(func)
|
||||||
|
.map_or(false, |call_path| {
|
||||||
|
matches!(call_path.as_slice(), ["pandas", "read_table"])
|
||||||
|
})
|
||||||
|
{
|
||||||
|
if let Some(Expr::Constant(ast::ExprConstant {
|
||||||
|
value: Constant::Str(value),
|
||||||
|
..
|
||||||
|
})) = keywords
|
||||||
|
.iter()
|
||||||
|
.find(|keyword| {
|
||||||
|
keyword
|
||||||
|
.arg
|
||||||
|
.as_ref()
|
||||||
|
.map_or(false, |keyword| keyword.as_str() == "sep")
|
||||||
|
})
|
||||||
|
.map(|keyword| &keyword.value)
|
||||||
|
{
|
||||||
|
if value.as_str() == "," {
|
||||||
|
checker
|
||||||
|
.diagnostics
|
||||||
|
.push(Diagnostic::new(PandasUseOfDotReadTable, func.range()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,11 +0,0 @@
|
||||||
---
|
|
||||||
source: crates/ruff/src/rules/pandas_vet/mod.rs
|
|
||||||
---
|
|
||||||
<filename>:3:13: PD012 `.read_csv` is preferred to `.read_table`; provides same functionality
|
|
||||||
|
|
|
||||||
2 | import pandas as pd
|
|
||||||
3 | employees = pd.read_table(input_file)
|
|
||||||
| ^^^^^^^^^^^^^ PD012
|
|
||||||
|
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,4 +0,0 @@
|
||||||
---
|
|
||||||
source: crates/ruff/src/rules/pandas_vet/mod.rs
|
|
||||||
---
|
|
||||||
|
|
||||||
|
|
@ -0,0 +1,42 @@
|
||||||
|
---
|
||||||
|
source: crates/ruff/src/rules/pandas_vet/mod.rs
|
||||||
|
---
|
||||||
|
pandas_use_of_dot_read_table.py:4:6: PD012 Use `.read_csv` instead of `.read_table` to read CSV files
|
||||||
|
|
|
||||||
|
3 | # Errors.
|
||||||
|
4 | df = pd.read_table("data.csv", sep=",")
|
||||||
|
| ^^^^^^^^^^^^^ PD012
|
||||||
|
5 | df = pd.read_table("data.csv", sep=",", header=0)
|
||||||
|
6 | filename = "data.csv"
|
||||||
|
|
|
||||||
|
|
||||||
|
pandas_use_of_dot_read_table.py:5:6: PD012 Use `.read_csv` instead of `.read_table` to read CSV files
|
||||||
|
|
|
||||||
|
3 | # Errors.
|
||||||
|
4 | df = pd.read_table("data.csv", sep=",")
|
||||||
|
5 | df = pd.read_table("data.csv", sep=",", header=0)
|
||||||
|
| ^^^^^^^^^^^^^ PD012
|
||||||
|
6 | filename = "data.csv"
|
||||||
|
7 | df = pd.read_table(filename, sep=",")
|
||||||
|
|
|
||||||
|
|
||||||
|
pandas_use_of_dot_read_table.py:7:6: PD012 Use `.read_csv` instead of `.read_table` to read CSV files
|
||||||
|
|
|
||||||
|
5 | df = pd.read_table("data.csv", sep=",", header=0)
|
||||||
|
6 | filename = "data.csv"
|
||||||
|
7 | df = pd.read_table(filename, sep=",")
|
||||||
|
| ^^^^^^^^^^^^^ PD012
|
||||||
|
8 | df = pd.read_table(filename, sep=",", header=0)
|
||||||
|
|
|
||||||
|
|
||||||
|
pandas_use_of_dot_read_table.py:8:6: PD012 Use `.read_csv` instead of `.read_table` to read CSV files
|
||||||
|
|
|
||||||
|
6 | filename = "data.csv"
|
||||||
|
7 | df = pd.read_table(filename, sep=",")
|
||||||
|
8 | df = pd.read_table(filename, sep=",", header=0)
|
||||||
|
| ^^^^^^^^^^^^^ PD012
|
||||||
|
9 |
|
||||||
|
10 | # Non-errors.
|
||||||
|
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -1,4 +0,0 @@
|
||||||
---
|
|
||||||
source: crates/ruff/src/rules/pandas_vet/mod.rs
|
|
||||||
---
|
|
||||||
|
|
||||||
Loading…
Reference in New Issue