mirror of https://github.com/astral-sh/ruff
Change `pandas-use-of-dot-read-table` rule to emit only when `read_table` is used on CSV data (#5807)
## Summary Closes #5628 by only emitting if `sep=","`. Includes documentation (completes the `pandas-vet` ruleset). Related to #2646. ## Test Plan `cargo test`
This commit is contained in:
parent
2cd117ba81
commit
ae431df146
20
crates/ruff/resources/test/fixtures/pandas_vet/pandas_use_of_dot_read_table.py
vendored
Normal file
20
crates/ruff/resources/test/fixtures/pandas_vet/pandas_use_of_dot_read_table.py
vendored
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
import pandas as pd
|
||||
|
||||
# Errors.
|
||||
df = pd.read_table("data.csv", sep=",")
|
||||
df = pd.read_table("data.csv", sep=",", header=0)
|
||||
filename = "data.csv"
|
||||
df = pd.read_table(filename, sep=",")
|
||||
df = pd.read_table(filename, sep=",", header=0)
|
||||
|
||||
# Non-errors.
|
||||
df = pd.read_csv("data.csv")
|
||||
df = pd.read_table("data.tsv")
|
||||
df = pd.read_table("data.tsv", sep="\t")
|
||||
df = pd.read_table("data.tsv", sep=",,")
|
||||
df = pd.read_table("data.tsv", sep=", ")
|
||||
df = pd.read_table("data.tsv", sep=" ,")
|
||||
df = pd.read_table("data.tsv", sep=" , ")
|
||||
not_pd.read_table("data.csv", sep=",")
|
||||
data = read_table("data.csv", sep=",")
|
||||
data = read_table
|
||||
|
|
@ -2788,7 +2788,9 @@ where
|
|||
pandas_vet::rules::inplace_argument(self, expr, func, args, keywords);
|
||||
}
|
||||
pandas_vet::rules::call(self, func);
|
||||
|
||||
if self.enabled(Rule::PandasUseOfDotReadTable) {
|
||||
pandas_vet::rules::use_of_read_table(self, func, keywords);
|
||||
}
|
||||
if self.enabled(Rule::PandasUseOfPdMerge) {
|
||||
pandas_vet::rules::use_of_pd_merge(self, func);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -237,27 +237,6 @@ mod tests {
|
|||
"#,
|
||||
"PD011_pass_node_name"
|
||||
)]
|
||||
#[test_case(
|
||||
r#"
|
||||
import pandas as pd
|
||||
employees = pd.read_csv(input_file)
|
||||
"#,
|
||||
"PD012_pass_read_csv"
|
||||
)]
|
||||
#[test_case(
|
||||
r#"
|
||||
import pandas as pd
|
||||
employees = pd.read_table(input_file)
|
||||
"#,
|
||||
"PD012_fail_read_table"
|
||||
)]
|
||||
#[test_case(
|
||||
r#"
|
||||
import pandas as pd
|
||||
employees = read_table
|
||||
"#,
|
||||
"PD012_node_Name_pass"
|
||||
)]
|
||||
#[test_case(
|
||||
r#"
|
||||
import pandas as pd
|
||||
|
|
@ -360,6 +339,10 @@ mod tests {
|
|||
assert_messages!(snapshot, diagnostics);
|
||||
}
|
||||
|
||||
#[test_case(
|
||||
Rule::PandasUseOfDotReadTable,
|
||||
Path::new("pandas_use_of_dot_read_table.py")
|
||||
)]
|
||||
#[test_case(Rule::PandasUseOfInplaceArgument, Path::new("PD002.py"))]
|
||||
fn paths(rule_code: Rule, path: &Path) -> Result<()> {
|
||||
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
|
||||
|
|
|
|||
|
|
@ -126,18 +126,6 @@ impl Violation for PandasUseOfDotPivotOrUnstack {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO(tjkuson): Add documentation for this rule once clarified.
|
||||
// https://github.com/astral-sh/ruff/issues/5628
|
||||
#[violation]
|
||||
pub struct PandasUseOfDotReadTable;
|
||||
|
||||
impl Violation for PandasUseOfDotReadTable {
|
||||
#[derive_message_formats]
|
||||
fn message(&self) -> String {
|
||||
format!("`.read_csv` is preferred to `.read_table`; provides same functionality")
|
||||
}
|
||||
}
|
||||
|
||||
/// ## What it does
|
||||
/// Checks for uses of `.stack` on Pandas objects.
|
||||
///
|
||||
|
|
@ -193,14 +181,6 @@ pub(crate) fn call(checker: &mut Checker, func: &Expr) {
|
|||
{
|
||||
PandasUseOfDotPivotOrUnstack.into()
|
||||
}
|
||||
"read_table"
|
||||
if checker
|
||||
.settings
|
||||
.rules
|
||||
.enabled(Rule::PandasUseOfDotReadTable) =>
|
||||
{
|
||||
PandasUseOfDotReadTable.into()
|
||||
}
|
||||
"stack" if checker.settings.rules.enabled(Rule::PandasUseOfDotStack) => {
|
||||
PandasUseOfDotStack.into()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ pub(crate) use attr::*;
|
|||
pub(crate) use call::*;
|
||||
pub(crate) use inplace_argument::*;
|
||||
pub(crate) use pd_merge::*;
|
||||
pub(crate) use read_table::*;
|
||||
pub(crate) use subscript::*;
|
||||
|
||||
pub(crate) mod assignment_to_df;
|
||||
|
|
@ -10,4 +11,5 @@ pub(crate) mod attr;
|
|||
pub(crate) mod call;
|
||||
pub(crate) mod inplace_argument;
|
||||
pub(crate) mod pd_merge;
|
||||
pub(crate) mod read_table;
|
||||
pub(crate) mod subscript;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,76 @@
|
|||
use rustpython_parser::ast;
|
||||
use rustpython_parser::ast::{Constant, Expr, Keyword, Ranged};
|
||||
|
||||
use ruff_diagnostics::{Diagnostic, Violation};
|
||||
use ruff_macros::{derive_message_formats, violation};
|
||||
|
||||
use crate::checkers::ast::Checker;
|
||||
|
||||
/// ## What it does
|
||||
/// Checks for uses of `pd.read_table` to read CSV files.
|
||||
///
|
||||
/// ## Why is this bad?
|
||||
/// In the Pandas API, `pd.read_csv` and `pd.read_table` are equivalent apart
|
||||
/// from their default separator: `pd.read_csv` defaults to a comma (`,`),
|
||||
/// while `pd.read_table` defaults to a tab (`\t`) as the default separator.
|
||||
///
|
||||
/// Prefer `pd.read_csv` over `pd.read_table` when reading comma-separated
|
||||
/// data (like CSV files), as it is more idiomatic.
|
||||
///
|
||||
/// ## Example
|
||||
/// ```python
|
||||
/// import pandas as pd
|
||||
///
|
||||
/// cities_df = pd.read_table("cities.csv", sep=",")
|
||||
/// ```
|
||||
///
|
||||
/// Use instead:
|
||||
/// ```python
|
||||
/// import pandas as pd
|
||||
///
|
||||
/// cities_df = pd.read_csv("cities.csv")
|
||||
/// ```
|
||||
///
|
||||
/// ## References
|
||||
/// - [Pandas documentation: `read_csv`](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html#pandas.read_csv)
|
||||
/// - [Pandas documentation: `read_table`](https://pandas.pydata.org/docs/reference/api/pandas.read_table.html#pandas.read_table)
|
||||
#[violation]
|
||||
pub struct PandasUseOfDotReadTable;
|
||||
|
||||
impl Violation for PandasUseOfDotReadTable {
|
||||
#[derive_message_formats]
|
||||
fn message(&self) -> String {
|
||||
format!("Use `.read_csv` instead of `.read_table` to read CSV files")
|
||||
}
|
||||
}
|
||||
|
||||
/// PD012
|
||||
pub(crate) fn use_of_read_table(checker: &mut Checker, func: &Expr, keywords: &[Keyword]) {
|
||||
if checker
|
||||
.semantic()
|
||||
.resolve_call_path(func)
|
||||
.map_or(false, |call_path| {
|
||||
matches!(call_path.as_slice(), ["pandas", "read_table"])
|
||||
})
|
||||
{
|
||||
if let Some(Expr::Constant(ast::ExprConstant {
|
||||
value: Constant::Str(value),
|
||||
..
|
||||
})) = keywords
|
||||
.iter()
|
||||
.find(|keyword| {
|
||||
keyword
|
||||
.arg
|
||||
.as_ref()
|
||||
.map_or(false, |keyword| keyword.as_str() == "sep")
|
||||
})
|
||||
.map(|keyword| &keyword.value)
|
||||
{
|
||||
if value.as_str() == "," {
|
||||
checker
|
||||
.diagnostics
|
||||
.push(Diagnostic::new(PandasUseOfDotReadTable, func.range()));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,11 +0,0 @@
|
|||
---
|
||||
source: crates/ruff/src/rules/pandas_vet/mod.rs
|
||||
---
|
||||
<filename>:3:13: PD012 `.read_csv` is preferred to `.read_table`; provides same functionality
|
||||
|
|
||||
2 | import pandas as pd
|
||||
3 | employees = pd.read_table(input_file)
|
||||
| ^^^^^^^^^^^^^ PD012
|
||||
|
|
||||
|
||||
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
---
|
||||
source: crates/ruff/src/rules/pandas_vet/mod.rs
|
||||
---
|
||||
|
||||
|
|
@ -0,0 +1,42 @@
|
|||
---
|
||||
source: crates/ruff/src/rules/pandas_vet/mod.rs
|
||||
---
|
||||
pandas_use_of_dot_read_table.py:4:6: PD012 Use `.read_csv` instead of `.read_table` to read CSV files
|
||||
|
|
||||
3 | # Errors.
|
||||
4 | df = pd.read_table("data.csv", sep=",")
|
||||
| ^^^^^^^^^^^^^ PD012
|
||||
5 | df = pd.read_table("data.csv", sep=",", header=0)
|
||||
6 | filename = "data.csv"
|
||||
|
|
||||
|
||||
pandas_use_of_dot_read_table.py:5:6: PD012 Use `.read_csv` instead of `.read_table` to read CSV files
|
||||
|
|
||||
3 | # Errors.
|
||||
4 | df = pd.read_table("data.csv", sep=",")
|
||||
5 | df = pd.read_table("data.csv", sep=",", header=0)
|
||||
| ^^^^^^^^^^^^^ PD012
|
||||
6 | filename = "data.csv"
|
||||
7 | df = pd.read_table(filename, sep=",")
|
||||
|
|
||||
|
||||
pandas_use_of_dot_read_table.py:7:6: PD012 Use `.read_csv` instead of `.read_table` to read CSV files
|
||||
|
|
||||
5 | df = pd.read_table("data.csv", sep=",", header=0)
|
||||
6 | filename = "data.csv"
|
||||
7 | df = pd.read_table(filename, sep=",")
|
||||
| ^^^^^^^^^^^^^ PD012
|
||||
8 | df = pd.read_table(filename, sep=",", header=0)
|
||||
|
|
||||
|
||||
pandas_use_of_dot_read_table.py:8:6: PD012 Use `.read_csv` instead of `.read_table` to read CSV files
|
||||
|
|
||||
6 | filename = "data.csv"
|
||||
7 | df = pd.read_table(filename, sep=",")
|
||||
8 | df = pd.read_table(filename, sep=",", header=0)
|
||||
| ^^^^^^^^^^^^^ PD012
|
||||
9 |
|
||||
10 | # Non-errors.
|
||||
|
|
||||
|
||||
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
---
|
||||
source: crates/ruff/src/rules/pandas_vet/mod.rs
|
||||
---
|
||||
|
||||
Loading…
Reference in New Issue