From ae83a74309a49dc3a1863d5cf6bfb7ee2d5251e0 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Tue, 26 Dec 2023 16:57:20 -0500 Subject: [PATCH] Review feedback for HTML indexes (#733) See: https://github.com/astral-sh/puffin/pull/719 --- crates/puffin-client/src/error.rs | 2 +- crates/puffin-client/src/html.rs | 13 +++++++++---- crates/pypi-types/src/simple_json.rs | 1 + 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/crates/puffin-client/src/error.rs b/crates/puffin-client/src/error.rs index 77d8eed14..78b8e0594 100644 --- a/crates/puffin-client/src/error.rs +++ b/crates/puffin-client/src/error.rs @@ -93,7 +93,7 @@ pub enum Error { #[error("Invalid `Content-Type` header for {0}")] InvalidContentTypeHeader(Url, #[source] http::header::ToStrError), - #[error("Unsupported `Content-Type` \"{1}\" for {0}")] + #[error("Unsupported `Content-Type` \"{1}\" for {0}. Expected JSON or HTML.")] UnsupportedMediaType(Url, String), } diff --git a/crates/puffin-client/src/html.rs b/crates/puffin-client/src/html.rs index e3b9338fa..9c8916808 100644 --- a/crates/puffin-client/src/html.rs +++ b/crates/puffin-client/src/html.rs @@ -15,7 +15,7 @@ pub(crate) struct SimpleHtml { impl SimpleHtml { /// Parse the list of [`File`]s from the simple HTML page returned by the given URL. pub(crate) fn parse(text: &str, url: &Url) -> Result { - let dom = tl::parse(text, tl::ParserOptions::default()).unwrap(); + let dom = tl::parse(text, tl::ParserOptions::default())?; // Parse the first `` tag, if any, to determine the base URL to which all // relative URLs should be resolved. The HTML spec requires that the `` tag @@ -49,7 +49,7 @@ impl SimpleHtml { return Ok(None); }; let href = std::str::from_utf8(href.as_bytes())?; - let url = Url::parse(href)?; + let url = Url::parse(href).map_err(|err| Error::UrlParse(href.to_string(), err))?; Ok(Some(url)) } @@ -89,7 +89,9 @@ impl SimpleHtml { .flatten() .ok_or_else(|| Error::MissingHref(base.clone()))?; let href = std::str::from_utf8(href.as_bytes())?; - let url = base.join(href)?; + let url = base + .join(href) + .map_err(|err| Error::UrlParse(href.to_string(), err))?; // Extract the filename from the body text, which MUST match that of // the final path component of the URL. @@ -164,8 +166,11 @@ pub enum Error { #[error(transparent)] Utf8(#[from] std::str::Utf8Error), + #[error("Failed to parse URL: {0}")] + UrlParse(String, #[source] url::ParseError), + #[error(transparent)] - UrlParse(#[from] url::ParseError), + HtmlParse(#[from] tl::ParseError), #[error("Missing href attribute on URL: {0}")] MissingHref(Url), diff --git a/crates/pypi-types/src/simple_json.rs b/crates/pypi-types/src/simple_json.rs index 6ed31b5b1..acc76126c 100644 --- a/crates/pypi-types/src/simple_json.rs +++ b/crates/pypi-types/src/simple_json.rs @@ -83,5 +83,6 @@ impl Yanked { /// only support SHA 256 atm. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct Hashes { + // TODO(charlie): Hashes should be optional. pub sha256: String, }