This commit is contained in:
Charlie Marsh 2024-01-25 09:29:00 -05:00
parent 71c9a876aa
commit c2453f99ce
5 changed files with 53 additions and 25 deletions

View File

@ -46,3 +46,4 @@ sha2 = { workspace = true }
metrohash = "1.0.6"
cityhash = "0.1.1"
xxhash-rust = { version = "0.8.8", features = ["xxh3"] }
blake3 = { version = "1.5.0", features = ["rayon", "mmap"] }

View File

@ -185,6 +185,39 @@ fn cityhash128_wheel(c: &mut Criterion<WallTime>) {
group.finish();
}
fn blake3_wheel(c: &mut Criterion<WallTime>) {
let mut group = c.benchmark_group("blake3_wheel");
for filename in FILENAMES {
group.bench_function(BenchmarkId::from_parameter(filename), |b| {
b.iter(|| {
let file = fs::read(format!("/Users/crmarsh/workspace/puffin/{filename}")).unwrap();
std::hint::black_box(blake3::hash(&file));
});
});
}
group.finish();
}
fn blake3_mmap_wheel(c: &mut Criterion<WallTime>) {
let mut group = c.benchmark_group("blake3_mmap_wheel");
for filename in FILENAMES {
group.bench_function(BenchmarkId::from_parameter(filename), |b| {
b.iter(|| {
let mut hasher = blake3::Hasher::new();
hasher
.update_mmap_rayon(format!("/Users/crmarsh/workspace/puffin/{filename}"))
.unwrap();
std::hint::black_box(hasher.finalize());
});
});
}
group.finish();
}
fn crc(c: &mut Criterion<WallTime>) {
let mut group = c.benchmark_group("crc");
@ -212,13 +245,15 @@ fn crc(c: &mut Criterion<WallTime>) {
criterion_group!(
checksum,
xxhash_wheel,
seahash_wheel,
metrohash_wheel,
cityhash64_wheel,
cityhash128_wheel,
sha256_wheel,
crc32_wheel,
blake3_mmap_wheel,
blake3_wheel,
// xxhash_wheel,
// seahash_wheel,
// metrohash_wheel,
// cityhash64_wheel,
// cityhash128_wheel,
// sha256_wheel,
// crc32_wheel,
// sha256_record,
// checksum_record,
// seahash_record,

View File

@ -26,3 +26,4 @@ seahash.workspace = true
sha2.workspace = true
sha = "1.0.3"
hex.workspace = true
blake3 = "1.5.0"

View File

@ -1,12 +1,10 @@
use std::hash::Hasher;
use std::path::{Path, PathBuf};
use std::pin::Pin;
use std::task::{Context, Poll};
use rayon::prelude::*;
use sha::utils::{Digest, DigestExt};
use tokio::io::ReadBuf;
use tokio_util::compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt};
use tokio_util::compat::FuturesAsyncReadCompatExt;
use zip::result::ZipError;
use zip::ZipArchive;
@ -30,29 +28,24 @@ pub enum Error {
InvalidArchive(Vec<fs_err::DirEntry>),
}
struct HashingReader<R, H> {
struct HashingReader<'a, R> {
reader: R,
hasher: H,
hasher: &'a mut blake3::Hasher,
}
impl<R, H> HashingReader<R, H>
impl<'a, R> HashingReader<'a, R>
where
R: tokio::io::AsyncRead + Unpin,
H: std::hash::Hasher + Unpin,
// H: std::hash::Hasher + Unpin,
{
fn new(reader: R, hasher: H) -> Self {
fn new(reader: R, hasher: &'a mut blake3::Hasher) -> Self {
HashingReader { reader, hasher }
}
fn into_inner(self) -> (R, H) {
(self.reader, self.hasher)
}
}
impl<R, H> tokio::io::AsyncRead for HashingReader<R, H>
impl<'a, R> tokio::io::AsyncRead for HashingReader<'a, R>
where
R: tokio::io::AsyncRead + Unpin,
H: std::hash::Hasher + Unpin,
{
fn poll_read(
mut self: Pin<&mut Self>,
@ -62,7 +55,7 @@ where
let reader = Pin::new(&mut self.reader);
match reader.poll_read(cx, buf) {
Poll::Ready(Ok(())) => {
self.hasher.write(buf.filled());
self.hasher.update_rayon(buf.filled());
Poll::Ready(Ok(()))
}
other => other,
@ -79,7 +72,7 @@ pub async fn unzip_no_seek<R: tokio::io::AsyncRead + Unpin>(
reader: R,
target: &Path,
) -> Result<(), Error> {
let mut hasher = seahash::SeaHasher::default();
let mut hasher = blake3::Hasher::default();
let reader = HashingReader::new(reader, &mut hasher);
let mut zip = async_zip::base::read::stream::ZipFileReader::with_tokio(reader);

View File

@ -1,10 +1,8 @@
use std::cmp::Reverse;
use std::hash::Hasher;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use futures::{FutureExt, Stream, StreamExt, TryFutureExt, TryStreamExt};
use sha2::Digest;
use tokio::task::JoinError;
use tracing::instrument;
use url::Url;