puffin-client: generalize SimpleMetadaRaw into OwnedArchive<A> (#1208)

It turns out that the pattern I coded up for SimpleMetadataRaw is
generally useful when working with rkyv. This commit makes it generic by
supporting any type that implements rkyv's traits, and makes a few
simplifying assumptions by picking a concrete serializer, validator and
deserializer. In effect, this lets use own any archived value.

We also rejigger the API a little bit and double-down on
`OwnedArchive<A>` just being a owned wrapper for `Archived<A>`. Namely,
we implement `Deref` and turn its inherent methods into methods that
require fully qualified syntax. (As is standard for things that
implement `Deref` to avoid ambiguity with the deref target's methods.)

(This PR also makes a couple small simplifications to our custom rkyv
serializer since we no longer need to use it directly. We do still need
to name the type in trait bounds, so it has to be public.)
This commit is contained in:
Andrew Gallant 2024-01-31 11:56:34 -05:00 committed by GitHub
parent 234e8d0bb7
commit b9d89e7624
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 158 additions and 108 deletions

View File

@ -2,9 +2,10 @@ pub use cached_client::{CacheControl, CachedClient, CachedClientError, DataWithC
pub use error::{Error, ErrorKind};
pub use flat_index::{FlatDistributions, FlatIndex, FlatIndexClient, FlatIndexError};
pub use registry_client::{
read_metadata_async, RegistryClient, RegistryClientBuilder, SimpleMetadata, SimpleMetadataRaw,
SimpleMetadatum, VersionFiles,
read_metadata_async, RegistryClient, RegistryClientBuilder, SimpleMetadata, SimpleMetadatum,
VersionFiles,
};
pub use rkyvutil::OwnedArchive;
mod cache_headers;
mod cached_client;

View File

@ -616,103 +616,6 @@ impl IntoIterator for SimpleMetadata {
}
}
/// An owned archived type for `SimpleMetadata`.
///
/// This type is effectively a `Archived<SimpleMetadata>`, but owns its buffer.
/// Constructing the type requires validating that the bytes are valid, but
/// subsequent accesses are free.
#[derive(Debug)]
pub struct SimpleMetadataRaw {
raw: rkyv::util::AlignedVec,
}
impl SimpleMetadataRaw {
/// Create a new owned archived value from the raw aligned bytes of the
/// serialized representation of a `SimpleMetadata`.
///
/// # Errors
///
/// If the bytes fail validation (e.g., contains unaligned pointers or
/// strings aren't valid UTF-8), then this returns an error.
pub fn new(raw: rkyv::util::AlignedVec) -> Result<SimpleMetadataRaw, Error> {
// We convert the error to a simple string because... the error type
// does not implement Send. And I don't think we really need to keep
// the error type around anyway.
let _ = rkyv::validation::validators::check_archived_root::<SimpleMetadata>(&raw)
.map_err(|e| ErrorKind::ArchiveRead(e.to_string()))?;
Ok(SimpleMetadataRaw { raw })
}
/// Like `SimpleMetadataRaw::new`, but reads the value from the given
/// reader.
///
/// Note that this consumes the entirety of the given reader.
///
/// # Errors
///
/// If the bytes fail validation (e.g., contains unaligned pointers or
/// strings aren't valid UTF-8), then this returns an error.
pub fn from_reader<R: std::io::Read>(mut rdr: R) -> Result<SimpleMetadataRaw, Error> {
let mut buf = rkyv::util::AlignedVec::with_capacity(1024);
buf.extend_from_reader(&mut rdr).map_err(ErrorKind::Io)?;
SimpleMetadataRaw::new(buf)
}
/// Creates an owned archive value from the unarchived value.
///
/// # Errors
///
/// This can fail if creating an archive for the given type fails.
/// Currently, this, at minimum, includes cases where a `SimpleMetadata`
/// contains a `PathBuf` that is not valid UTF-8.
pub fn from_unarchived(unarchived: &SimpleMetadata) -> Result<SimpleMetadataRaw, Error> {
use rkyv::ser::Serializer;
let mut serializer = crate::rkyvutil::Serializer::<4096>::new();
serializer
.serialize_value(unarchived)
.map_err(ErrorKind::ArchiveWrite)?;
let raw = serializer.into_serializer().into_inner();
Ok(SimpleMetadataRaw { raw })
}
/// Write the underlying bytes of this archived value to the given writer.
///
/// # Errors
///
/// Any failures from writing are returned to the caller.
pub fn write<W: std::io::Write>(&self, mut wtr: W) -> Result<(), Error> {
Ok(wtr.write_all(&self.raw).map_err(ErrorKind::Io)?)
}
/// Returns this owned archive value as a borrowed archive value.
pub fn as_archived(&self) -> &rkyv::Archived<SimpleMetadata> {
// SAFETY: We've validated that our underlying buffer is a valid
// archive for SimpleMetadata in the constructor, so we can skip
// validation here. Since we don't mutate the buffer, this conversion
// is guaranteed to be correct.
unsafe { rkyv::archived_root::<SimpleMetadata>(&self.raw) }
}
/// Returns the raw underlying bytes of this owned archive value.
///
/// They are guaranteed to be a valid serialization of
/// `Archived<SimpleMetadata>`.
pub fn as_bytes(&self) -> &[u8] {
&self.raw
}
/// Deserialize this owned archived value into the original
/// `SimpleMetadata`.
pub fn deserialize(&self) -> SimpleMetadata {
use rkyv::Deserialize;
self.as_archived()
.deserialize(&mut rkyv::de::deserializers::SharedDeserializeMap::new())
.expect("valid archive must deserialize correctly")
}
}
#[derive(Debug)]
enum MediaType {
Json,

View File

@ -1,7 +1,17 @@
/*!
Defines some helpers for use with `rkyv`.
Principally, we define our own implementation of the `Serializer` trait.
# Owned archived type
Typical usage patterns with rkyv involve using an `&Archived<T>`, where values
of that type are cast from a `&[u8]`. The owned archive type in this module
effectively provides a way to use `Archive<T>` without needing to worry about
the lifetime of the buffer it's attached to. This works by making the owned
archive type own the buffer itself.
# Custom serializer
This module provides our own implementation of the `Serializer` trait.
This involves a fair bit of boiler plate, but it was largely copied from
`CompositeSerializer`. (Indeed, our serializer wraps a `CompositeSerializer`.)
@ -17,15 +27,156 @@ fail.
use std::convert::Infallible;
use rkyv::{
de::deserializers::SharedDeserializeMap,
ser::serializers::{
AlignedSerializer, AllocScratch, AllocScratchError, AllocSerializer, CompositeSerializer,
AlignedSerializer, AllocScratch, AllocScratchError, CompositeSerializer,
CompositeSerializerError, FallbackScratch, HeapScratch, SharedSerializeMap,
SharedSerializeMapError,
},
util::AlignedVec,
Archive, ArchiveUnsized, Fallible,
validation::validators::DefaultValidator,
Archive, ArchiveUnsized, CheckBytes, Deserialize, Fallible, Serialize,
};
use crate::{Error, ErrorKind};
/// An owned archived type.
///
/// This type is effectively an owned version of `Archived<A>`. Normally, when
/// one gets an archived type from a buffer, the archive type is bound to the
/// lifetime of the buffer. This effectively provides a home for that buffer so
/// that one can pass around an archived type as if it were owned.
///
/// Constructing the type requires validating the bytes are a valid
/// representation of an `Archived<A>`, but subsequent accesses (via deref) are
/// free.
///
/// Note that this type makes a number of assumptions about the specific
/// serializer, deserializer and validator used. This type could be made
/// more generic, but it's not clear we need that in puffin. By making our
/// choices concrete here, we make use of this type much simpler to understand.
/// Unfortunately, AG couldn't find a way of making the trait bounds simpler,
/// so if `OwnedVec` is being used in trait implementations, the traits bounds
/// will likely need to be copied from here.
#[derive(Debug)]
pub struct OwnedArchive<A> {
raw: rkyv::util::AlignedVec,
archive: std::marker::PhantomData<A>,
}
impl<A> OwnedArchive<A>
where
A: Archive + Serialize<Serializer<4096>>,
A::Archived: (for<'a> CheckBytes<DefaultValidator<'a>>) + Deserialize<A, SharedDeserializeMap>,
{
/// Create a new owned archived value from the raw aligned bytes of the
/// serialized representation of an `A`.
///
/// # Errors
///
/// If the bytes fail validation (e.g., contains unaligned pointers or
/// strings aren't valid UTF-8), then this returns an error.
pub fn new(raw: rkyv::util::AlignedVec) -> Result<OwnedArchive<A>, Error> {
// We convert the error to a simple string because... the error type
// does not implement Send. And I don't think we really need to keep
// the error type around anyway.
let _ = rkyv::validation::validators::check_archived_root::<A>(&raw)
.map_err(|e| ErrorKind::ArchiveRead(e.to_string()))?;
Ok(OwnedArchive {
raw,
archive: std::marker::PhantomData,
})
}
/// Like `OwnedArchive::new`, but reads the value from the given reader.
///
/// Note that this consumes the entirety of the given reader.
///
/// # Errors
///
/// If the bytes fail validation (e.g., contains unaligned pointers or
/// strings aren't valid UTF-8), then this returns an error.
pub fn from_reader<R: std::io::Read>(mut rdr: R) -> Result<OwnedArchive<A>, Error> {
let mut buf = rkyv::util::AlignedVec::with_capacity(1024);
buf.extend_from_reader(&mut rdr).map_err(ErrorKind::Io)?;
OwnedArchive::new(buf)
}
/// Creates an owned archive value from the unarchived value.
///
/// # Errors
///
/// This can fail if creating an archive for the given type fails.
/// Currently, this, at minimum, includes cases where an `A` contains a
/// `PathBuf` that is not valid UTF-8.
pub fn from_unarchived(unarchived: &A) -> Result<OwnedArchive<A>, Error> {
use rkyv::ser::Serializer;
let mut serializer = crate::rkyvutil::Serializer::<4096>::default();
serializer
.serialize_value(unarchived)
.map_err(ErrorKind::ArchiveWrite)?;
let raw = serializer.into_serializer().into_inner();
Ok(OwnedArchive {
raw,
archive: std::marker::PhantomData,
})
}
/// Write the underlying bytes of this archived value to the given writer.
///
/// Note that because this type has a `Deref` impl, this method requires
/// fully-qualified syntax. So, if `o` is an `OwnedValue`, then use
/// `OwnedValue::write(&o, wtr)`.
///
/// # Errors
///
/// Any failures from writing are returned to the caller.
pub fn write<W: std::io::Write>(this: &OwnedArchive<A>, mut wtr: W) -> Result<(), Error> {
Ok(wtr.write_all(&this.raw).map_err(ErrorKind::Io)?)
}
/// Returns the raw underlying bytes of this owned archive value.
///
/// They are guaranteed to be a valid serialization of `Archived<A>`.
///
/// Note that because this type has a `Deref` impl, this method requires
/// fully-qualified syntax. So, if `o` is an `OwnedValue`, then use
/// `OwnedValue::as_bytes(&o)`.
pub fn as_bytes(this: &OwnedArchive<A>) -> &[u8] {
&this.raw
}
/// Deserialize this owned archived value into the original
/// `SimpleMetadata`.
///
/// Note that because this type has a `Deref` impl, this method requires
/// fully-qualified syntax. So, if `o` is an `OwnedValue`, then use
/// `OwnedValue::deserialize(&o)`.
pub fn deserialize(this: &OwnedArchive<A>) -> A {
(**this)
.deserialize(&mut SharedDeserializeMap::new())
.expect("valid archive must deserialize correctly")
}
}
impl<A> std::ops::Deref for OwnedArchive<A>
where
A: Archive + Serialize<Serializer<4096>>,
A::Archived: (for<'a> CheckBytes<DefaultValidator<'a>>) + Deserialize<A, SharedDeserializeMap>,
{
type Target = A::Archived;
fn deref(&self) -> &A::Archived {
// SAFETY: We've validated that our underlying buffer is a valid
// archive for SimpleMetadata in the constructor, so we can skip
// validation here. Since we don't mutate the buffer, this conversion
// is guaranteed to be correct.
unsafe { rkyv::archived_root::<A>(&self.raw) }
}
}
#[derive(Default)]
pub struct Serializer<const N: usize> {
composite: CompositeSerializer<
AlignedSerializer<AlignedVec>,
@ -35,12 +186,7 @@ pub struct Serializer<const N: usize> {
}
impl<const N: usize> Serializer<N> {
pub fn new() -> Serializer<N> {
let composite = AllocSerializer::<N>::default();
Serializer { composite }
}
pub fn into_serializer(self) -> AlignedSerializer<AlignedVec> {
fn into_serializer(self) -> AlignedSerializer<AlignedVec> {
self.composite.into_serializer()
}
}