From ed72c027a3d72c29e199f3e0a2ad8bb3697629fc Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Mon, 24 Jul 2023 19:41:57 -0400 Subject: [PATCH] Replace `NoHashHasher` usages with `FxHashMap` (#6049) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary I had always assumed that `NoHashHasher` would be faster when using integer keys, but benchmarking shows otherwise: ``` linter/default-rules/numpy/globals.py time: [66.544 µs 66.606 µs 66.678 µs] thrpt: [44.253 MiB/s 44.300 MiB/s 44.342 MiB/s] change: time: [-0.1843% +0.1087% +0.3718%] (p = 0.46 > 0.05) thrpt: [-0.3704% -0.1086% +0.1847%] No change in performance detected. Found 1 outliers among 100 measurements (1.00%) 1 (1.00%) high mild linter/default-rules/pydantic/types.py time: [1.3787 ms 1.3811 ms 1.3837 ms] thrpt: [18.431 MiB/s 18.466 MiB/s 18.498 MiB/s] change: time: [-0.4827% -0.1074% +0.1927%] (p = 0.56 > 0.05) thrpt: [-0.1924% +0.1075% +0.4850%] No change in performance detected. linter/default-rules/numpy/ctypeslib.py time: [624.82 µs 625.96 µs 627.17 µs] thrpt: [26.550 MiB/s 26.601 MiB/s 26.650 MiB/s] change: time: [-0.7071% -0.4908% -0.2736%] (p = 0.00 < 0.05) thrpt: [+0.2744% +0.4932% +0.7122%] Change within noise threshold. linter/default-rules/large/dataset.py time: [3.1585 ms 3.1634 ms 3.1685 ms] thrpt: [12.840 MiB/s 12.861 MiB/s 12.880 MiB/s] change: time: [-1.5338% -1.3463% -1.1476%] (p = 0.00 < 0.05) thrpt: [+1.1610% +1.3647% +1.5577%] Performance has improved. linter/all-rules/numpy/globals.py time: [140.17 µs 140.37 µs 140.58 µs] thrpt: [20.989 MiB/s 21.020 MiB/s 21.051 MiB/s] change: time: [-0.1066% +0.3140% +0.7479%] (p = 0.14 > 0.05) thrpt: [-0.7423% -0.3130% +0.1067%] No change in performance detected. Found 3 outliers among 100 measurements (3.00%) 2 (2.00%) high mild 1 (1.00%) high severe linter/all-rules/pydantic/types.py time: [2.7030 ms 2.7069 ms 2.7112 ms] thrpt: [9.4064 MiB/s 9.4216 MiB/s 9.4351 MiB/s] change: time: [-0.6721% -0.4874% -0.2974%] (p = 0.00 < 0.05) thrpt: [+0.2982% +0.4898% +0.6766%] Change within noise threshold. Found 14 outliers among 100 measurements (14.00%) 12 (12.00%) high mild 2 (2.00%) high severe linter/all-rules/numpy/ctypeslib.py time: [1.4709 ms 1.4727 ms 1.4749 ms] thrpt: [11.290 MiB/s 11.306 MiB/s 11.320 MiB/s] change: time: [-1.1617% -0.9766% -0.8094%] (p = 0.00 < 0.05) thrpt: [+0.8160% +0.9862% +1.1754%] Change within noise threshold. Found 12 outliers among 100 measurements (12.00%) 9 (9.00%) high mild 3 (3.00%) high severe linter/all-rules/large/dataset.py time: [5.8086 ms 5.8163 ms 5.8240 ms] thrpt: [6.9854 MiB/s 6.9946 MiB/s 7.0038 MiB/s] change: time: [-1.5651% -1.3536% -1.1584%] (p = 0.00 < 0.05) thrpt: [+1.1720% +1.3721% +1.5900%] Performance has improved. ``` My guess is that `NoHashHasher` underperforms because the keys are not randomly distributed... Anyway, it's a ~1% (significant) performance gain on some of the above, plus we get to remove a dependency. --- Cargo.lock | 8 -------- Cargo.toml | 1 - crates/ruff/Cargo.toml | 1 - crates/ruff/src/autofix/mod.rs | 5 ++--- crates/ruff_python_semantic/Cargo.toml | 1 - crates/ruff_python_semantic/src/binding.rs | 2 -- crates/ruff_python_semantic/src/model.rs | 15 +++++++-------- crates/ruff_python_semantic/src/scope.rs | 10 ++++------ 8 files changed, 13 insertions(+), 30 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4f477ceb4d..f2cffcef08 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1295,12 +1295,6 @@ dependencies = [ "static_assertions", ] -[[package]] -name = "nohash-hasher" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bf50223579dc7cdcfb3bfcacf7069ff68243f8c363f62ffa99cf000a6b9c451" - [[package]] name = "nom" version = "7.1.3" @@ -1895,7 +1889,6 @@ dependencies = [ "log", "memchr", "natord", - "nohash-hasher", "num-bigint", "num-traits", "once_cell", @@ -2164,7 +2157,6 @@ version = "0.0.0" dependencies = [ "bitflags 2.3.3", "is-macro", - "nohash-hasher", "num-traits", "ruff_index", "ruff_python_ast", diff --git a/Cargo.toml b/Cargo.toml index 51a34651bc..a7b55367a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,6 @@ is-macro = { version = "0.2.2" } itertools = { version = "0.10.5" } log = { version = "0.4.17" } memchr = "2.5.0" -nohash-hasher = { version = "0.2.0" } num-bigint = { version = "0.4.3" } num-traits = { version = "0.2.15" } once_cell = { version = "1.17.1" } diff --git a/crates/ruff/Cargo.toml b/crates/ruff/Cargo.toml index 31f80dd093..e3b52e0e76 100644 --- a/crates/ruff/Cargo.toml +++ b/crates/ruff/Cargo.toml @@ -45,7 +45,6 @@ libcst = { workspace = true } log = { workspace = true } memchr = { workspace = true } natord = { version = "1.0.9" } -nohash-hasher = { workspace = true } num-bigint = { workspace = true } num-traits = { workspace = true } once_cell = { workspace = true } diff --git a/crates/ruff/src/autofix/mod.rs b/crates/ruff/src/autofix/mod.rs index a666171b54..3aba3eb424 100644 --- a/crates/ruff/src/autofix/mod.rs +++ b/crates/ruff/src/autofix/mod.rs @@ -1,9 +1,8 @@ use std::collections::BTreeSet; use itertools::Itertools; -use nohash_hasher::IntSet; use ruff_text_size::{TextLen, TextRange, TextSize}; -use rustc_hash::FxHashMap; +use rustc_hash::{FxHashMap, FxHashSet}; use ruff_diagnostics::{Diagnostic, Edit, Fix, IsolationLevel}; use ruff_python_ast::source_code::Locator; @@ -47,7 +46,7 @@ fn apply_fixes<'a>( let mut output = String::with_capacity(locator.len()); let mut last_pos: Option = None; let mut applied: BTreeSet<&Edit> = BTreeSet::default(); - let mut isolated: IntSet = IntSet::default(); + let mut isolated: FxHashSet = FxHashSet::default(); let mut fixed = FxHashMap::default(); let mut source_map = SourceMap::default(); diff --git a/crates/ruff_python_semantic/Cargo.toml b/crates/ruff_python_semantic/Cargo.toml index 93c47dae9d..08d7ab19c2 100644 --- a/crates/ruff_python_semantic/Cargo.toml +++ b/crates/ruff_python_semantic/Cargo.toml @@ -20,7 +20,6 @@ ruff_index = { path = "../ruff_index" } bitflags = { workspace = true } is-macro = { workspace = true } -nohash-hasher = { workspace = true } num-traits = { workspace = true } rustc-hash = { workspace = true } rustpython-parser = { workspace = true } diff --git a/crates/ruff_python_semantic/src/binding.rs b/crates/ruff_python_semantic/src/binding.rs index 77bf59416a..eb93d6ffbd 100644 --- a/crates/ruff_python_semantic/src/binding.rs +++ b/crates/ruff_python_semantic/src/binding.rs @@ -275,8 +275,6 @@ bitflags! { #[newtype_index] pub struct BindingId; -impl nohash_hasher::IsEnabled for BindingId {} - /// The bindings in a program. /// /// Bindings are indexed by [`BindingId`] diff --git a/crates/ruff_python_semantic/src/model.rs b/crates/ruff_python_semantic/src/model.rs index 61726598d9..5171ff0b84 100644 --- a/crates/ruff_python_semantic/src/model.rs +++ b/crates/ruff_python_semantic/src/model.rs @@ -1,9 +1,8 @@ -use std::collections::HashMap; use std::path::Path; use bitflags::bitflags; -use nohash_hasher::{BuildNoHashHasher, IntMap}; use ruff_text_size::TextRange; +use rustc_hash::FxHashMap; use rustpython_parser::ast::{Expr, Ranged, Stmt}; use smallvec::SmallVec; @@ -74,7 +73,7 @@ pub struct SemanticModel<'a> { /// /// In this case, the binding created by `x = 1` shadows the binding created by `import x`, /// despite the fact that they're in different scopes. - pub shadowed_bindings: HashMap>, + pub shadowed_bindings: FxHashMap, /// Map from binding index to indexes of bindings that annotate it (in the same scope). /// @@ -97,7 +96,7 @@ pub struct SemanticModel<'a> { /// In this case, we _do_ store the binding created by `x: int` directly on the scope, and not /// as a delayed annotation. Annotations are thus treated as bindings only when they are the /// first binding in a scope; any annotations that follow are treated as "delayed" annotations. - delayed_annotations: HashMap, BuildNoHashHasher>, + delayed_annotations: FxHashMap>, /// Map from binding ID to the IDs of all scopes in which it is declared a `global` or /// `nonlocal`. @@ -112,7 +111,7 @@ pub struct SemanticModel<'a> { /// /// In this case, the binding created by `x = 1` is rebound within the scope created by `f` /// by way of the `global x` statement. - rebinding_scopes: HashMap, BuildNoHashHasher>, + rebinding_scopes: FxHashMap>, /// Flags for the semantic model. pub flags: SemanticModelFlags, @@ -137,9 +136,9 @@ impl<'a> SemanticModel<'a> { resolved_references: ResolvedReferences::default(), unresolved_references: UnresolvedReferences::default(), globals: GlobalsArena::default(), - shadowed_bindings: IntMap::default(), - delayed_annotations: IntMap::default(), - rebinding_scopes: IntMap::default(), + shadowed_bindings: FxHashMap::default(), + delayed_annotations: FxHashMap::default(), + rebinding_scopes: FxHashMap::default(), flags: SemanticModelFlags::new(path), handled_exceptions: Vec::default(), } diff --git a/crates/ruff_python_semantic/src/scope.rs b/crates/ruff_python_semantic/src/scope.rs index 431f22ab99..1437655a46 100644 --- a/crates/ruff_python_semantic/src/scope.rs +++ b/crates/ruff_python_semantic/src/scope.rs @@ -1,8 +1,6 @@ -use bitflags::bitflags; -use nohash_hasher::{BuildNoHashHasher, IntMap}; -use std::collections::HashMap; use std::ops::{Deref, DerefMut}; +use bitflags::bitflags; use rustc_hash::FxHashMap; use rustpython_parser::ast; @@ -37,7 +35,7 @@ pub struct Scope<'a> { /// ``` /// /// In this case, the binding created by `x = 2` shadows the binding created by `x = 1`. - shadowed_bindings: HashMap>, + shadowed_bindings: FxHashMap, /// Index into the globals arena, if the scope contains any globally-declared symbols. globals_id: Option, @@ -53,7 +51,7 @@ impl<'a> Scope<'a> { parent: None, star_imports: Vec::default(), bindings: FxHashMap::default(), - shadowed_bindings: IntMap::default(), + shadowed_bindings: FxHashMap::default(), globals_id: None, flags: ScopeFlags::empty(), } @@ -65,7 +63,7 @@ impl<'a> Scope<'a> { parent: Some(parent), star_imports: Vec::default(), bindings: FxHashMap::default(), - shadowed_bindings: IntMap::default(), + shadowed_bindings: FxHashMap::default(), globals_id: None, flags: ScopeFlags::empty(), }