This commit is contained in:
Dan Parizher 2025-12-16 16:39:34 -05:00 committed by GitHub
commit 2b57ebdfc5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 109 additions and 7 deletions

View File

@ -0,0 +1,38 @@
import pandas as pd
import numpy as np
def test_numpy_unique_inverse():
unique = np.unique_inverse([1, 2, 3, 2, 1])
result = unique.values
def test_numpy_unique_all():
unique = np.unique_all([1, 2, 3, 2, 1])
result = unique.values
def test_numpy_unique_counts():
unique = np.unique_counts([1, 2, 3, 2, 1])
result = unique.values
def test_numpy_typed_unique_inverse():
from typing import TYPE_CHECKING
if TYPE_CHECKING:
from numpy.lib._arraysetops_impl import UniqueInverseResult
unique: UniqueInverseResult[np.uint64] = np.unique_inverse([1, 2, 3, 2, 1])
result = unique.values
def test_simple_non_pandas():
p = 1
result = p.values
def test_pandas_dataframe_values():
"""This should trigger PD011 - pandas DataFrame .values usage"""
import pandas as pd
x = pd.DataFrame()
result = x.values

View File

@ -1,5 +1,7 @@
use ruff_python_ast::Expr; use ruff_python_ast::Expr;
use ruff_python_semantic::{BindingKind, Imported, SemanticModel}; use ruff_python_semantic::{
BindingKind, Imported, SemanticModel, analyze::typing::find_binding_value,
};
#[derive(Debug)] #[derive(Debug)]
pub(super) enum Resolution { pub(super) enum Resolution {
@ -16,6 +18,7 @@ pub(super) enum Resolution {
/// Test an [`Expr`] for relevance to Pandas-related operations. /// Test an [`Expr`] for relevance to Pandas-related operations.
pub(super) fn test_expression(expr: &Expr, semantic: &SemanticModel) -> Resolution { pub(super) fn test_expression(expr: &Expr, semantic: &SemanticModel) -> Resolution {
match expr { match expr {
// Literals in the expression itself are definitely not pandas-related
Expr::StringLiteral(_) Expr::StringLiteral(_)
| Expr::BytesLiteral(_) | Expr::BytesLiteral(_)
| Expr::NumberLiteral(_) | Expr::NumberLiteral(_)
@ -34,12 +37,14 @@ pub(super) fn test_expression(expr: &Expr, semantic: &SemanticModel) -> Resoluti
semantic semantic
.resolve_name(name) .resolve_name(name)
.map_or(Resolution::IrrelevantBinding, |id| { .map_or(Resolution::IrrelevantBinding, |id| {
match &semantic.binding(id).kind { let binding = semantic.binding(id);
match &binding.kind {
BindingKind::Argument => { BindingKind::Argument => {
// Avoid, e.g., `self.values`. // Avoid, e.g., `self.values`.
if matches!(name.id.as_str(), "self" | "cls") { if matches!(name.id.as_str(), "self" | "cls") {
Resolution::IrrelevantBinding Resolution::IrrelevantBinding
} else { } else {
// Function arguments are treated as relevant unless proven otherwise
Resolution::RelevantLocal Resolution::RelevantLocal
} }
} }
@ -48,16 +53,63 @@ pub(super) fn test_expression(expr: &Expr, semantic: &SemanticModel) -> Resoluti
| BindingKind::NamedExprAssignment | BindingKind::NamedExprAssignment
| BindingKind::LoopVar | BindingKind::LoopVar
| BindingKind::Global(_) | BindingKind::Global(_)
| BindingKind::Nonlocal(_, _) => Resolution::RelevantLocal, | BindingKind::Nonlocal(_, _) => {
BindingKind::Import(import) // Check if this binding comes from a definitively non-pandas source
if matches!(import.qualified_name().segments(), ["pandas"]) => if let Some(assigned_value) = find_binding_value(binding, semantic) {
{ // Recurse to check the assigned value
Resolution::PandasModule match test_expression(assigned_value, semantic) {
// If the assigned value is definitively not pandas (literals, etc.)
Resolution::IrrelevantExpression => {
Resolution::IrrelevantBinding
}
// If it's clearly pandas-related, treat as relevant
Resolution::RelevantLocal | Resolution::PandasModule => {
Resolution::RelevantLocal
}
// If we got IrrelevantBinding, it means we traced it back to a
// non-pandas source (e.g., numpy import), so keep it as irrelevant
Resolution::IrrelevantBinding => Resolution::IrrelevantBinding,
}
} else {
// If we can't determine the source, be liberal and treat as relevant
// to avoid false negatives (e.g., function parameters with annotations)
Resolution::RelevantLocal
}
}
BindingKind::Import(import) => {
let segments = import.qualified_name().segments();
if matches!(segments, ["pandas"]) {
Resolution::PandasModule
} else if matches!(segments, ["numpy"]) {
// Explicitly exclude numpy imports
Resolution::IrrelevantBinding
} else {
Resolution::IrrelevantBinding
}
} }
_ => Resolution::IrrelevantBinding, _ => Resolution::IrrelevantBinding,
} }
}) })
} }
// Recurse for attribute access (e.g., df.values -> check df)
Expr::Attribute(attr) => test_expression(attr.value.as_ref(), semantic),
// Recurse for call expressions (e.g., pd.DataFrame() -> check pd)
Expr::Call(call) => {
// Check if this is a pandas function call
if let Some(qualified_name) = semantic.resolve_qualified_name(&call.func) {
let segments = qualified_name.segments();
if segments.starts_with(&["pandas"]) {
return Resolution::RelevantLocal;
}
// Explicitly exclude numpy function calls
if segments.starts_with(&["numpy"]) || segments.starts_with(&["np"]) {
return Resolution::IrrelevantBinding;
}
}
// For other calls, recurse on the function expression
test_expression(&call.func, semantic)
}
// For other expressions, default to relevant to avoid false negatives
_ => Resolution::RelevantLocal, _ => Resolution::RelevantLocal,
} }
} }

View File

@ -379,6 +379,7 @@ mod tests {
)] )]
#[test_case(Rule::PandasUseOfInplaceArgument, Path::new("PD002.py"))] #[test_case(Rule::PandasUseOfInplaceArgument, Path::new("PD002.py"))]
#[test_case(Rule::PandasNuniqueConstantSeriesCheck, Path::new("PD101.py"))] #[test_case(Rule::PandasNuniqueConstantSeriesCheck, Path::new("PD101.py"))]
#[test_case(Rule::PandasUseOfDotValues, Path::new("PD011.py"))]
fn paths(rule_code: Rule, path: &Path) -> Result<()> { fn paths(rule_code: Rule, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy()); let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
let diagnostics = test_path( let diagnostics = test_path(

View File

@ -0,0 +1,11 @@
---
source: crates/ruff_linter/src/rules/pandas_vet/mod.rs
---
PD011 Use `.to_numpy()` instead of `.values`
--> PD011.py:37:14
|
35 | import pandas as pd
36 | x = pd.DataFrame()
37 | result = x.values
| ^^^^^^^^
|