diff --git a/crates/bench/Cargo.toml b/crates/bench/Cargo.toml index b86b3d7af..90d51f97b 100644 --- a/crates/bench/Cargo.toml +++ b/crates/bench/Cargo.toml @@ -46,3 +46,4 @@ sha2 = { workspace = true } metrohash = "1.0.6" cityhash = "0.1.1" xxhash-rust = { version = "0.8.8", features = ["xxh3"] } +blake3 = { version = "1.5.0", features = ["rayon", "mmap"] } diff --git a/crates/bench/benches/checksum.rs b/crates/bench/benches/checksum.rs index e648a76db..2e6e81582 100644 --- a/crates/bench/benches/checksum.rs +++ b/crates/bench/benches/checksum.rs @@ -185,6 +185,39 @@ fn cityhash128_wheel(c: &mut Criterion) { group.finish(); } +fn blake3_wheel(c: &mut Criterion) { + let mut group = c.benchmark_group("blake3_wheel"); + + for filename in FILENAMES { + group.bench_function(BenchmarkId::from_parameter(filename), |b| { + b.iter(|| { + let file = fs::read(format!("/Users/crmarsh/workspace/puffin/{filename}")).unwrap(); + std::hint::black_box(blake3::hash(&file)); + }); + }); + } + + group.finish(); +} + +fn blake3_mmap_wheel(c: &mut Criterion) { + let mut group = c.benchmark_group("blake3_mmap_wheel"); + + for filename in FILENAMES { + group.bench_function(BenchmarkId::from_parameter(filename), |b| { + b.iter(|| { + let mut hasher = blake3::Hasher::new(); + hasher + .update_mmap_rayon(format!("/Users/crmarsh/workspace/puffin/{filename}")) + .unwrap(); + std::hint::black_box(hasher.finalize()); + }); + }); + } + + group.finish(); +} + fn crc(c: &mut Criterion) { let mut group = c.benchmark_group("crc"); @@ -212,13 +245,15 @@ fn crc(c: &mut Criterion) { criterion_group!( checksum, - xxhash_wheel, - seahash_wheel, - metrohash_wheel, - cityhash64_wheel, - cityhash128_wheel, - sha256_wheel, - crc32_wheel, + blake3_mmap_wheel, + blake3_wheel, + // xxhash_wheel, + // seahash_wheel, + // metrohash_wheel, + // cityhash64_wheel, + // cityhash128_wheel, + // sha256_wheel, + // crc32_wheel, // sha256_record, // checksum_record, // seahash_record, diff --git a/crates/puffin-extract/Cargo.toml b/crates/puffin-extract/Cargo.toml index 51762ad60..d91cb43d2 100644 --- a/crates/puffin-extract/Cargo.toml +++ b/crates/puffin-extract/Cargo.toml @@ -26,3 +26,4 @@ seahash.workspace = true sha2.workspace = true sha = "1.0.3" hex.workspace = true +blake3 = "1.5.0" diff --git a/crates/puffin-extract/src/lib.rs b/crates/puffin-extract/src/lib.rs index 1a3572385..855fa9cd1 100644 --- a/crates/puffin-extract/src/lib.rs +++ b/crates/puffin-extract/src/lib.rs @@ -1,12 +1,10 @@ -use std::hash::Hasher; use std::path::{Path, PathBuf}; use std::pin::Pin; use std::task::{Context, Poll}; use rayon::prelude::*; -use sha::utils::{Digest, DigestExt}; use tokio::io::ReadBuf; -use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}; +use tokio_util::compat::FuturesAsyncReadCompatExt; use zip::result::ZipError; use zip::ZipArchive; @@ -30,29 +28,24 @@ pub enum Error { InvalidArchive(Vec), } -struct HashingReader { +struct HashingReader<'a, R> { reader: R, - hasher: H, + hasher: &'a mut blake3::Hasher, } -impl HashingReader +impl<'a, R> HashingReader<'a, R> where R: tokio::io::AsyncRead + Unpin, - H: std::hash::Hasher + Unpin, + // H: std::hash::Hasher + Unpin, { - fn new(reader: R, hasher: H) -> Self { + fn new(reader: R, hasher: &'a mut blake3::Hasher) -> Self { HashingReader { reader, hasher } } - - fn into_inner(self) -> (R, H) { - (self.reader, self.hasher) - } } -impl tokio::io::AsyncRead for HashingReader +impl<'a, R> tokio::io::AsyncRead for HashingReader<'a, R> where R: tokio::io::AsyncRead + Unpin, - H: std::hash::Hasher + Unpin, { fn poll_read( mut self: Pin<&mut Self>, @@ -62,7 +55,7 @@ where let reader = Pin::new(&mut self.reader); match reader.poll_read(cx, buf) { Poll::Ready(Ok(())) => { - self.hasher.write(buf.filled()); + self.hasher.update_rayon(buf.filled()); Poll::Ready(Ok(())) } other => other, @@ -79,7 +72,7 @@ pub async fn unzip_no_seek( reader: R, target: &Path, ) -> Result<(), Error> { - let mut hasher = seahash::SeaHasher::default(); + let mut hasher = blake3::Hasher::default(); let reader = HashingReader::new(reader, &mut hasher); let mut zip = async_zip::base::read::stream::ZipFileReader::with_tokio(reader); diff --git a/crates/puffin-installer/src/downloader.rs b/crates/puffin-installer/src/downloader.rs index 42692092e..1262366c7 100644 --- a/crates/puffin-installer/src/downloader.rs +++ b/crates/puffin-installer/src/downloader.rs @@ -1,10 +1,8 @@ use std::cmp::Reverse; -use std::hash::Hasher; use std::path::{Path, PathBuf}; use std::sync::Arc; use futures::{FutureExt, Stream, StreamExt, TryFutureExt, TryStreamExt}; -use sha2::Digest; use tokio::task::JoinError; use tracing::instrument; use url::Url;