From 7ed5b3d3a2d1679b508a8b49a76492ab20b2bf8d Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 30 Aug 2022 13:19:22 -0400 Subject: [PATCH] Avoid re-reading + iterating over lines for ignores (#48) --- Cargo.lock | 20 ------------------- Cargo.toml | 1 - src/check_lines.rs | 48 +++++++++++++++++++++++++++++----------------- src/checks.rs | 26 +++++++++++++++++++++++++ src/fs.rs | 12 +----------- src/linter.rs | 9 +-------- src/message.rs | 35 --------------------------------- 7 files changed, 58 insertions(+), 93 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5fb33fe492..4f47481f5c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -188,25 +188,6 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" -[[package]] -name = "autoincrement" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc81b6ad0c9122715c741113fb97ba283f723b1dd86e186dd7df00a2b6877bfe" -dependencies = [ - "autoincrement_derive", -] - -[[package]] -name = "autoincrement_derive" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1259ce9b9a586a3f5ada3d236800ad0aa446b0d4c3dbac4158084a1dd704a228" -dependencies = [ - "quote", - "syn", -] - [[package]] name = "base64" version = "0.10.1" @@ -1657,7 +1638,6 @@ name = "ruff" version = "0.0.19" dependencies = [ "anyhow", - "autoincrement", "bincode", "cacache", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 138c512c51..98b6b13960 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,7 +8,6 @@ name = "ruff" [dependencies] anyhow = { version = "1.0.60" } -autoincrement = "1.0.1" bincode = { version = "1.3.3" } cacache = { version = "10.0.1" } chrono = { version = "0.4.21" } diff --git a/src/check_lines.rs b/src/check_lines.rs index 2f566d0568..a17ccbd98d 100644 --- a/src/check_lines.rs +++ b/src/check_lines.rs @@ -3,24 +3,36 @@ use rustpython_parser::ast::Location; use crate::checks::{Check, CheckKind}; use crate::settings::Settings; -pub fn check_lines(contents: &str, settings: &Settings) -> Vec { - contents - .lines() - .enumerate() - .filter_map(|(row, line)| { - if settings.select.contains(CheckKind::LineTooLong.code()) - && line.len() > settings.line_length - { - let chunks: Vec<&str> = line.split_whitespace().collect(); - if !(chunks.len() == 1 || (chunks.len() == 2 && chunks[0] == "#")) { - return Some(Check { - kind: CheckKind::LineTooLong, - location: Location::new(row + 1, settings.line_length + 1), - }); +pub fn check_lines(checks: &mut Vec, contents: &str, settings: &Settings) { + let enforce_line_too_ling = settings.select.contains(CheckKind::LineTooLong.code()); + + let mut line_checks = vec![]; + let mut ignored = vec![]; + for (row, line) in contents.lines().enumerate() { + // Remove any ignored checks. + // TODO(charlie): Only validate checks for the current line. + for (index, check) in checks.iter().enumerate() { + if check.location.row() == row + 1 && check.is_inline_ignored(line) { + ignored.push(index); + } + } + + // Enforce line length. + if enforce_line_too_ling && line.len() > settings.line_length { + let chunks: Vec<&str> = line.split_whitespace().collect(); + if !(chunks.len() == 1 || (chunks.len() == 2 && chunks[0] == "#")) { + let check = Check { + kind: CheckKind::LineTooLong, + location: Location::new(row + 1, settings.line_length + 1), + }; + if !check.is_inline_ignored(line) { + line_checks.push(check); } } - - None - }) - .collect() + } + } + for index in ignored.iter().rev() { + checks.remove(*index); + } + checks.extend(line_checks); } diff --git a/src/checks.rs b/src/checks.rs index 51cff6cede..dfa84ed63b 100644 --- a/src/checks.rs +++ b/src/checks.rs @@ -1,6 +1,7 @@ use std::str::FromStr; use anyhow::Result; +use regex::Regex; use rustpython_parser::ast::Location; use serde::{Deserialize, Serialize}; @@ -135,3 +136,28 @@ pub struct Check { pub kind: CheckKind, pub location: Location, } + +impl Check { + pub fn is_inline_ignored(&self, line: &str) -> bool { + let re = Regex::new(r"(?i)# noqa(?::\s?(?P([A-Z]+[0-9]+(?:[,\s]+)?)+))?").unwrap(); + match re.captures(line) { + Some(caps) => match caps.name("codes") { + Some(codes) => { + let re = Regex::new(r"[,\s]").unwrap(); + for code in re + .split(codes.as_str()) + .map(|code| code.trim()) + .filter(|code| !code.is_empty()) + { + if code == self.kind.code().as_str() { + return true; + } + } + false + } + None => true, + }, + None => false, + } + } +} diff --git a/src/fs.rs b/src/fs.rs index 09d5eb0769..1fe591ab42 100644 --- a/src/fs.rs +++ b/src/fs.rs @@ -1,5 +1,5 @@ use std::fs::File; -use std::io::{BufRead, BufReader, Read}; +use std::io::{BufReader, Read}; use std::path::{Path, PathBuf}; use anyhow::Result; @@ -22,16 +22,6 @@ pub fn iter_python_files(path: &PathBuf) -> impl Iterator { .filter(|entry| entry.path().to_string_lossy().ends_with(".py")) } -pub fn read_line(path: &Path, row: &usize) -> Result { - let file = File::open(path)?; - let buf_reader = BufReader::new(file); - buf_reader - .lines() - .nth(*row - 1) - .unwrap() - .map_err(|e| e.into()) -} - pub fn read_file(path: &Path) -> Result { let file = File::open(path)?; let mut buf_reader = BufReader::new(file); diff --git a/src/linter.rs b/src/linter.rs index 56bc06417c..15fe3de2f1 100644 --- a/src/linter.rs +++ b/src/linter.rs @@ -36,13 +36,7 @@ pub fn check_path(path: &Path, settings: &Settings, mode: &cache::Mode) -> Resul } // Run the lines-based checks. - if settings - .select - .iter() - .any(|check_code| matches!(check_code.lint_source(), LintSource::Lines)) - { - checks.extend(check_lines(&contents, settings)); - } + check_lines(&mut checks, &contents, settings); // Convert to messages. let messages: Vec = checks @@ -52,7 +46,6 @@ pub fn check_path(path: &Path, settings: &Settings, mode: &cache::Mode) -> Resul location: check.location, filename: path.to_string_lossy().to_string(), }) - .filter(|message| !message.is_inline_ignored()) .collect(); cache::set(path, settings, &messages, mode); diff --git a/src/message.rs b/src/message.rs index 0d3d0f451b..c3ba2fd5d0 100644 --- a/src/message.rs +++ b/src/message.rs @@ -1,13 +1,10 @@ use std::fmt; -use std::path::Path; use colored::Colorize; -use regex::Regex; use rustpython_parser::ast::Location; use serde::{Deserialize, Serialize}; use crate::checks::CheckKind; -use crate::fs; #[derive(Serialize, Deserialize)] #[serde(remote = "Location")] @@ -32,38 +29,6 @@ pub struct Message { pub filename: String, } -impl Message { - pub fn is_inline_ignored(&self) -> bool { - match fs::read_line(Path::new(&self.filename), &self.location.row()) { - Ok(line) => { - // https://github.com/PyCQA/flake8/blob/799c71eeb61cf26c7c176aed43e22523e2a6d991/src/flake8/defaults.py#L26 - let re = Regex::new(r"(?i)# noqa(?::\s?(?P([A-Z]+[0-9]+(?:[,\s]+)?)+))?") - .unwrap(); - match re.captures(&line) { - Some(caps) => match caps.name("codes") { - Some(codes) => { - let re = Regex::new(r"[,\s]").unwrap(); - for code in re - .split(codes.as_str()) - .map(|code| code.trim()) - .filter(|code| !code.is_empty()) - { - if code == self.kind.code().as_str() { - return true; - } - } - false - } - None => true, - }, - None => false, - } - } - Err(_) => false, - } - } -} - impl fmt::Display for Message { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(