diff --git a/README.md b/README.md index 7927287dea..088c87bbe6 100644 --- a/README.md +++ b/README.md @@ -443,6 +443,7 @@ For more, see [pyupgrade](https://pypi.org/project/pyupgrade/3.2.0/) on PyPI. | U009 | PEP3120UnnecessaryCodingComment | utf-8 encoding declaration is unnecessary | 🛠 | | U010 | UnnecessaryFutureImport | Unnecessary `__future__` import `...` for target Python version | 🛠 | | U011 | UnnecessaryLRUCacheParams | Unnecessary parameters to functools.lru_cache | 🛠 | +| U012 | UnnecessaryEncodeUTF8 | Unnecessary call to `encode` as UTF-8 | 🛠 | ### pep8-naming @@ -687,7 +688,7 @@ including: - [`flake8-comprehensions`](https://pypi.org/project/flake8-comprehensions/) - [`flake8-bugbear`](https://pypi.org/project/flake8-bugbear/) (21/32) - [`flake8-2020`](https://pypi.org/project/flake8-2020/) -- [`pyupgrade`](https://pypi.org/project/pyupgrade/) (14/34) +- [`pyupgrade`](https://pypi.org/project/pyupgrade/) (15/34) - [`autoflake`](https://pypi.org/project/autoflake/) (1/7) Beyond rule-set parity, Ruff suffers from the following limitations vis-à-vis Flake8: @@ -713,7 +714,7 @@ Today, Ruff can be used to replace Flake8 when used with any of the following pl - [`flake8-2020`](https://pypi.org/project/flake8-2020/) Ruff can also replace [`isort`](https://pypi.org/project/isort/), [`yesqa`](https://github.com/asottile/yesqa), -and a subset of the rules implemented in [`pyupgrade`](https://pypi.org/project/pyupgrade/) (14/34). +and a subset of the rules implemented in [`pyupgrade`](https://pypi.org/project/pyupgrade/) (15/34). If you're looking to use Ruff, but rely on an unsupported Flake8 plugin, free to file an Issue. diff --git a/resources/test/fixtures/U012.py b/resources/test/fixtures/U012.py new file mode 100644 index 0000000000..d930bedc42 --- /dev/null +++ b/resources/test/fixtures/U012.py @@ -0,0 +1,52 @@ +# ASCII literals should be replaced by a bytes literal +"foo".encode("utf-8") # b"foo" +"foo".encode("u8") # b"foo" +"foo".encode() # b"foo" +"foo".encode("UTF8") # b"foo" +U"foo".encode("utf-8") # b"foo" +"foo".encode(encoding="utf-8") # b"foo" +""" +Lorem + +Ipsum +""".encode( + "utf-8" +) +# b""" +# Lorem +# +# Ipsum +# """ + +# `encode` on variables should not be processed. +string = "hello there" +string.encode("utf-8") + +bar = "bar" +f"foo{bar}".encode("utf-8") # f"foo{bar}".encode() +encoding = "latin" +"foo".encode(encoding) +f"foo{bar}".encode(encoding) + +# `encode` with custom args and kwargs should not be processed. +"foo".encode("utf-8", errors="replace") +"foo".encode("utf-8", "replace") +"foo".encode(errors="replace") +"foo".encode(encoding="utf-8", errors="replace") + +# `encode` with custom args and kwargs on unicode should not be processed. +"unicode text©".encode("utf-8", errors="replace") +"unicode text©".encode("utf-8", "replace") +"unicode text©".encode(errors="replace") +"unicode text©".encode(encoding="utf-8", errors="replace") + +# Unicode literals should only be stripped of default encoding. +"unicode text©".encode("utf-8") # "unicode text©".encode() +"unicode text©".encode() +"unicode text©".encode(encoding="UTF8") # "unicode text©".encode() + +r"fo\o".encode("utf-8") # br"fo\o" +u"foo".encode("utf-8") # b"foo" +R"fo\o".encode("utf-8") # br"fo\o" +U"foo".encode("utf-8") # b"foo" +print("foo".encode()) # print(b"foo") diff --git a/src/check_ast.rs b/src/check_ast.rs index 39dd16a5d5..44cbfaeb82 100644 --- a/src/check_ast.rs +++ b/src/check_ast.rs @@ -1073,6 +1073,10 @@ where pyupgrade::plugins::super_call_with_parameters(self, expr, func, args); } + if self.settings.enabled.contains(&CheckCode::U012) { + pyupgrade::plugins::unnecessary_encode_utf8(self, expr, func, args, keywords); + } + // flake8-print if self.settings.enabled.contains(&CheckCode::T201) || self.settings.enabled.contains(&CheckCode::T203) diff --git a/src/checks.rs b/src/checks.rs index ef224d21b4..49a005c250 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -156,6 +156,7 @@ pub enum CheckCode { U009, U010, U011, + U012, // pydocstyle D100, D101, @@ -444,6 +445,7 @@ pub enum CheckKind { PEP3120UnnecessaryCodingComment, UnnecessaryFutureImport(Vec), UnnecessaryLRUCacheParams, + UnnecessaryEncodeUTF8, // pydocstyle BlankLineAfterLastSection(String), BlankLineAfterSection(String), @@ -691,6 +693,7 @@ impl CheckCode { CheckCode::U009 => CheckKind::PEP3120UnnecessaryCodingComment, CheckCode::U010 => CheckKind::UnnecessaryFutureImport(vec!["...".to_string()]), CheckCode::U011 => CheckKind::UnnecessaryLRUCacheParams, + CheckCode::U012 => CheckKind::UnnecessaryEncodeUTF8, // pydocstyle CheckCode::D100 => CheckKind::PublicModule, CheckCode::D101 => CheckKind::PublicClass, @@ -901,6 +904,7 @@ impl CheckCode { CheckCode::U009 => CheckCategory::Pyupgrade, CheckCode::U010 => CheckCategory::Pyupgrade, CheckCode::U011 => CheckCategory::Pyupgrade, + CheckCode::U012 => CheckCategory::Pyupgrade, CheckCode::D100 => CheckCategory::Pydocstyle, CheckCode::D101 => CheckCategory::Pydocstyle, CheckCode::D102 => CheckCategory::Pydocstyle, @@ -1103,6 +1107,7 @@ impl CheckKind { CheckKind::PEP3120UnnecessaryCodingComment => &CheckCode::U009, CheckKind::UnnecessaryFutureImport(_) => &CheckCode::U010, CheckKind::UnnecessaryLRUCacheParams => &CheckCode::U011, + CheckKind::UnnecessaryEncodeUTF8 => &CheckCode::U012, // pydocstyle CheckKind::BlankLineAfterLastSection(_) => &CheckCode::D413, CheckKind::BlankLineAfterSection(_) => &CheckCode::D410, @@ -1607,6 +1612,7 @@ impl CheckKind { CheckKind::UnnecessaryLRUCacheParams => { "Unnecessary parameters to functools.lru_cache".to_string() } + CheckKind::UnnecessaryEncodeUTF8 => "Unnecessary call to `encode` as UTF-8".to_string(), // pydocstyle CheckKind::FitsOnOneLine => "One-line docstring should fit on one line".to_string(), CheckKind::BlankLineAfterSummary => { @@ -1873,6 +1879,7 @@ impl CheckKind { | CheckKind::UnnecessaryAbspath | CheckKind::UnnecessaryCollectionCall(_) | CheckKind::UnnecessaryComprehension(_) + | CheckKind::UnnecessaryEncodeUTF8 | CheckKind::UnnecessaryFutureImport(_) | CheckKind::UnnecessaryGeneratorDict | CheckKind::UnnecessaryGeneratorList diff --git a/src/checks_gen.rs b/src/checks_gen.rs index daf0409ce0..19f0c8e63f 100644 --- a/src/checks_gen.rs +++ b/src/checks_gen.rs @@ -265,6 +265,7 @@ pub enum CheckCodePrefix { U01, U010, U011, + U012, W, W2, W29, @@ -1004,6 +1005,7 @@ impl CheckCodePrefix { CheckCode::U009, CheckCode::U010, CheckCode::U011, + CheckCode::U012, ], CheckCodePrefix::U0 => vec![ CheckCode::U001, @@ -1017,6 +1019,7 @@ impl CheckCodePrefix { CheckCode::U009, CheckCode::U010, CheckCode::U011, + CheckCode::U012, ], CheckCodePrefix::U00 => vec![ CheckCode::U001, @@ -1038,9 +1041,10 @@ impl CheckCodePrefix { CheckCodePrefix::U007 => vec![CheckCode::U007], CheckCodePrefix::U008 => vec![CheckCode::U008], CheckCodePrefix::U009 => vec![CheckCode::U009], - CheckCodePrefix::U01 => vec![CheckCode::U010, CheckCode::U011], + CheckCodePrefix::U01 => vec![CheckCode::U010, CheckCode::U011, CheckCode::U012], CheckCodePrefix::U010 => vec![CheckCode::U010], CheckCodePrefix::U011 => vec![CheckCode::U011], + CheckCodePrefix::U012 => vec![CheckCode::U012], CheckCodePrefix::W => vec![CheckCode::W292, CheckCode::W605], CheckCodePrefix::W2 => vec![CheckCode::W292], CheckCodePrefix::W29 => vec![CheckCode::W292], @@ -1351,6 +1355,7 @@ impl CheckCodePrefix { CheckCodePrefix::U01 => PrefixSpecificity::Tens, CheckCodePrefix::U010 => PrefixSpecificity::Explicit, CheckCodePrefix::U011 => PrefixSpecificity::Explicit, + CheckCodePrefix::U012 => PrefixSpecificity::Explicit, CheckCodePrefix::W => PrefixSpecificity::Category, CheckCodePrefix::W2 => PrefixSpecificity::Hundreds, CheckCodePrefix::W29 => PrefixSpecificity::Tens, diff --git a/src/linter.rs b/src/linter.rs index 2351893f42..3db8ce4269 100644 --- a/src/linter.rs +++ b/src/linter.rs @@ -486,6 +486,7 @@ mod tests { #[test_case(CheckCode::U010, Path::new("U010.py"); "U010")] #[test_case(CheckCode::U011, Path::new("U011_0.py"); "U011_0")] #[test_case(CheckCode::U011, Path::new("U011_1.py"); "U011_1")] + #[test_case(CheckCode::U012, Path::new("U012.py"); "U012")] #[test_case(CheckCode::W292, Path::new("W292_0.py"); "W292_0")] #[test_case(CheckCode::W292, Path::new("W292_1.py"); "W292_1")] #[test_case(CheckCode::W292, Path::new("W292_2.py"); "W292_2")] diff --git a/src/pyupgrade/plugins/mod.rs b/src/pyupgrade/plugins/mod.rs index 9ca68d6eae..4b6a2eda95 100644 --- a/src/pyupgrade/plugins/mod.rs +++ b/src/pyupgrade/plugins/mod.rs @@ -2,6 +2,7 @@ pub use deprecated_unittest_alias::deprecated_unittest_alias; pub use super_call_with_parameters::super_call_with_parameters; pub use type_of_primitive::type_of_primitive; pub use unnecessary_abspath::unnecessary_abspath; +pub use unnecessary_encode_utf8::unnecessary_encode_utf8; pub use unnecessary_future_import::unnecessary_future_import; pub use unnecessary_lru_cache_params::unnecessary_lru_cache_params; pub use use_pep585_annotation::use_pep585_annotation; @@ -13,6 +14,7 @@ mod deprecated_unittest_alias; mod super_call_with_parameters; mod type_of_primitive; mod unnecessary_abspath; +mod unnecessary_encode_utf8; mod unnecessary_future_import; mod unnecessary_lru_cache_params; mod use_pep585_annotation; diff --git a/src/pyupgrade/plugins/unnecessary_encode_utf8.rs b/src/pyupgrade/plugins/unnecessary_encode_utf8.rs new file mode 100644 index 0000000000..c38dccc618 --- /dev/null +++ b/src/pyupgrade/plugins/unnecessary_encode_utf8.rs @@ -0,0 +1,152 @@ +use rustpython_ast::{Constant, Expr, ExprKind, Keyword}; + +use crate::ast::types::Range; +use crate::autofix::Fix; +use crate::check_ast::Checker; +use crate::checks::{Check, CheckKind}; +use crate::source_code_locator::SourceCodeLocator; + +const UTF8_LITERALS: &[&str] = &["utf-8", "utf8", "utf_8", "u8", "utf", "cp65001"]; + +fn match_encoded_variable(func: &Expr) -> Option<&Expr> { + if let ExprKind::Attribute { + value: variable, + attr, + .. + } = &func.node + { + if attr == "encode" { + return Some(variable); + } + } + None +} + +fn is_utf8_encoding_arg(arg: &Expr) -> bool { + if let ExprKind::Constant { + value: Constant::Str(value), + .. + } = &arg.node + { + UTF8_LITERALS.contains(&value.to_lowercase().as_str()) + } else { + false + } +} + +fn is_default_encode(args: &Vec, kwargs: &Vec) -> bool { + match (args.len(), kwargs.len()) { + // .encode() + (0, 0) => true, + // .encode(encoding) + (1, 0) => is_utf8_encoding_arg(&args[0]), + // .encode(kwarg=kwarg) + (0, 1) => { + kwargs[0].node.arg == Some("encoding".to_string()) + && is_utf8_encoding_arg(&kwargs[0].node.value) + } + // .encode(*args, **kwargs) + _ => false, + } +} + +// Return a Fix for a default `encode` call removing the encoding argument, +// keyword, or positional. +fn delete_default_encode_arg_or_kwarg( + expr: &Expr, + args: &[Expr], + kwargs: &[Keyword], + patch: bool, +) -> Option { + if let Some(arg) = args.get(0) { + let mut check = Check::new(CheckKind::UnnecessaryEncodeUTF8, Range::from_located(expr)); + if patch { + check.amend(Fix::deletion(arg.location, arg.end_location.unwrap())); + } + Some(check) + } else if let Some(kwarg) = kwargs.get(0) { + let mut check = Check::new(CheckKind::UnnecessaryEncodeUTF8, Range::from_located(expr)); + if patch { + check.amend(Fix::deletion(kwarg.location, kwarg.end_location.unwrap())); + } + Some(check) + } else { + None + } +} + +// Return a Fix replacing the call to encode by a `"b"` prefix on the string. +fn replace_with_bytes_literal( + expr: &Expr, + constant: &Expr, + locator: &SourceCodeLocator, + patch: bool, +) -> Check { + let mut check = Check::new(CheckKind::UnnecessaryEncodeUTF8, Range::from_located(expr)); + if patch { + let content = locator.slice_source_code_range(&Range { + location: constant.location, + end_location: constant.end_location.unwrap(), + }); + let content = format!( + "b{}", + content.trim_start_matches('u').trim_start_matches('U') + ); + check.amend(Fix::replacement( + content, + expr.location, + expr.end_location.unwrap(), + )) + } + check +} + +/// U012 +pub fn unnecessary_encode_utf8( + checker: &mut Checker, + expr: &Expr, + func: &Expr, + args: &Vec, + kwargs: &Vec, +) { + if let Some(variable) = match_encoded_variable(func) { + match &variable.node { + ExprKind::Constant { + value: Constant::Str(literal), + .. + } => { + // "str".encode() + // "str".encode("utf-8") + if is_default_encode(args, kwargs) { + if literal.is_ascii() { + // "foo".encode() + checker.add_check(replace_with_bytes_literal( + expr, + variable, + checker.locator, + checker.patch(), + )); + } else { + // "unicode text©".encode("utf-8") + if let Some(check) = + delete_default_encode_arg_or_kwarg(expr, args, kwargs, checker.patch()) + { + checker.add_check(check); + } + } + } + } + // f"foo{bar}".encode(*args, **kwargs) + ExprKind::JoinedStr { .. } => { + if is_default_encode(args, kwargs) { + if let Some(check) = + delete_default_encode_arg_or_kwarg(expr, args, kwargs, checker.patch()) + { + checker.add_check(check); + } + } + } + _ => {} + } + } +} diff --git a/src/snapshots/ruff__linter__tests__U012_U012.py.snap b/src/snapshots/ruff__linter__tests__U012_U012.py.snap new file mode 100644 index 0000000000..87561b6848 --- /dev/null +++ b/src/snapshots/ruff__linter__tests__U012_U012.py.snap @@ -0,0 +1,260 @@ +--- +source: src/linter.rs +expression: checks +--- +- kind: UnnecessaryEncodeUTF8 + location: + row: 2 + column: 0 + end_location: + row: 2 + column: 21 + fix: + patch: + content: "b\"foo\"" + location: + row: 2 + column: 0 + end_location: + row: 2 + column: 21 + applied: false +- kind: UnnecessaryEncodeUTF8 + location: + row: 3 + column: 0 + end_location: + row: 3 + column: 18 + fix: + patch: + content: "b\"foo\"" + location: + row: 3 + column: 0 + end_location: + row: 3 + column: 18 + applied: false +- kind: UnnecessaryEncodeUTF8 + location: + row: 4 + column: 0 + end_location: + row: 4 + column: 14 + fix: + patch: + content: "b\"foo\"" + location: + row: 4 + column: 0 + end_location: + row: 4 + column: 14 + applied: false +- kind: UnnecessaryEncodeUTF8 + location: + row: 5 + column: 0 + end_location: + row: 5 + column: 20 + fix: + patch: + content: "b\"foo\"" + location: + row: 5 + column: 0 + end_location: + row: 5 + column: 20 + applied: false +- kind: UnnecessaryEncodeUTF8 + location: + row: 6 + column: 0 + end_location: + row: 6 + column: 22 + fix: + patch: + content: "b\"foo\"" + location: + row: 6 + column: 0 + end_location: + row: 6 + column: 22 + applied: false +- kind: UnnecessaryEncodeUTF8 + location: + row: 7 + column: 0 + end_location: + row: 7 + column: 30 + fix: + patch: + content: "b\"foo\"" + location: + row: 7 + column: 0 + end_location: + row: 7 + column: 30 + applied: false +- kind: UnnecessaryEncodeUTF8 + location: + row: 8 + column: 0 + end_location: + row: 14 + column: 1 + fix: + patch: + content: "b\"\"\"\nLorem\n\nIpsum\n\"\"\"" + location: + row: 8 + column: 0 + end_location: + row: 14 + column: 1 + applied: false +- kind: UnnecessaryEncodeUTF8 + location: + row: 26 + column: 0 + end_location: + row: 26 + column: 27 + fix: + patch: + content: "" + location: + row: 26 + column: 19 + end_location: + row: 26 + column: 26 + applied: false +- kind: UnnecessaryEncodeUTF8 + location: + row: 44 + column: 0 + end_location: + row: 44 + column: 31 + fix: + patch: + content: "" + location: + row: 44 + column: 23 + end_location: + row: 44 + column: 30 + applied: false +- kind: UnnecessaryEncodeUTF8 + location: + row: 46 + column: 0 + end_location: + row: 46 + column: 39 + fix: + patch: + content: "" + location: + row: 46 + column: 23 + end_location: + row: 46 + column: 38 + applied: false +- kind: UnnecessaryEncodeUTF8 + location: + row: 48 + column: 0 + end_location: + row: 48 + column: 23 + fix: + patch: + content: "br\"fo\\o\"" + location: + row: 48 + column: 0 + end_location: + row: 48 + column: 23 + applied: false +- kind: UnnecessaryEncodeUTF8 + location: + row: 49 + column: 0 + end_location: + row: 49 + column: 22 + fix: + patch: + content: "b\"foo\"" + location: + row: 49 + column: 0 + end_location: + row: 49 + column: 22 + applied: false +- kind: UnnecessaryEncodeUTF8 + location: + row: 50 + column: 0 + end_location: + row: 50 + column: 23 + fix: + patch: + content: "bR\"fo\\o\"" + location: + row: 50 + column: 0 + end_location: + row: 50 + column: 23 + applied: false +- kind: UnnecessaryEncodeUTF8 + location: + row: 51 + column: 0 + end_location: + row: 51 + column: 22 + fix: + patch: + content: "b\"foo\"" + location: + row: 51 + column: 0 + end_location: + row: 51 + column: 22 + applied: false +- kind: UnnecessaryEncodeUTF8 + location: + row: 52 + column: 6 + end_location: + row: 52 + column: 20 + fix: + patch: + content: "b\"foo\"" + location: + row: 52 + column: 6 + end_location: + row: 52 + column: 20 + applied: false +