6 Commits

Author SHA1 Message Date
Sunshine
8462b6bc31 Merge pull request #207 from snshn/bump-version
bump version (2.3.0 -> 2.3.1)
2020-08-01 21:00:26 -04:00
Sunshine
92f38556b6 bump version (2.3.0 -> 2.3.1) 2020-08-01 20:24:38 -04:00
Sunshine
c0bdeab2e3 Merge pull request #206 from snshn/update-crates
Update crates
2020-08-01 19:43:00 -04:00
Sunshine
5a502eab4b update crate versions 2020-08-01 19:20:20 -04:00
Sunshine
19f08265a2 Merge pull request #205 from snshn/base-tag
Implement support for BASE tag
2020-08-01 02:47:33 -04:00
Sunshine
1d6392cb28 implement support for BASE tag 2020-08-01 02:35:07 -04:00
4 changed files with 575 additions and 526 deletions

985
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
[package]
name = "monolith"
version = "2.3.0"
version = "2.3.1"
edition = "2018"
authors = [
"Sunshine <sunshine@uberspace.net>",
@@ -12,12 +12,12 @@ authors = [
description = "CLI tool for saving web pages as a single HTML file"
[dependencies]
base64 = "0.12.2"
chrono = "0.4.11" # Used to render comments indicating the time the page was saved
base64 = "0.12.3"
chrono = "0.4.13" # Used to render comments indicating the time the page was saved
clap = "2.33.1"
cssparser = "0.27.2"
html5ever = "0.24.1"
sha2 = "0.9.0" # Used in calculating checksums during integrity checks
sha2 = "0.9.1" # Used in calculating checksums during integrity checks
url = "2.1.1"
[dependencies.reqwest]

View File

@@ -29,6 +29,31 @@ struct SrcSetItem<'a> {
const ICON_VALUES: &[&str] = &["icon", "shortcut icon"];
pub fn add_base_tag(document: &Handle, url: String) -> RcDom {
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, document, SerializeOpts::default())
.expect("unable to serialize DOM into buffer");
let result = String::from_utf8(buf).unwrap();
let mut dom = html_to_dom(&result);
let doc = dom.get_document();
let html = get_child_node_by_name(&doc, "html");
let head = get_child_node_by_name(&html, "head");
let favicon_node = dom.create_element(
QualName::new(None, ns!(), local_name!("base")),
vec![Attribute {
name: QualName::new(None, ns!(), local_name!("href")),
value: format_tendril!("{}", url),
}],
Default::default(),
);
// Insert BASE tag into HEAD
head.children.borrow_mut().push(favicon_node.clone());
dom
}
pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom {
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, document, SerializeOpts::default())
@@ -54,7 +79,7 @@ pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom {
Default::default(),
);
// Append favicon node to HEAD
// Insert favicon LINK tag into HEAD
head.children.borrow_mut().push(favicon_node.clone());
dom
@@ -205,6 +230,56 @@ pub fn has_proper_integrity(data: &[u8], integrity: &str) -> bool {
}
}
pub fn has_base_tag(handle: &Handle) -> bool {
let mut found_base_tag: bool = false;
match handle.data {
NodeData::Document => {
// Dig deeper
for child in handle.children.borrow().iter() {
if has_base_tag(child) {
found_base_tag = true;
break;
}
}
}
NodeData::Element {
ref name,
ref attrs,
..
} => {
match name.local.as_ref() {
"base" => {
let attrs_mut = &mut attrs.borrow_mut();
for attr in attrs_mut.iter_mut() {
if &attr.name.local == "href" {
if !attr.value.trim().is_empty() {
found_base_tag = true;
break;
}
}
}
}
_ => {}
}
if !found_base_tag {
// Dig deeper
for child in handle.children.borrow().iter() {
if has_base_tag(child) {
found_base_tag = true;
break;
}
}
}
}
_ => {}
}
found_base_tag
}
pub fn has_favicon(handle: &Handle) -> bool {
let mut found_favicon: bool = false;
@@ -600,6 +675,7 @@ pub fn walk_and_embed_assets(
}
}
LinkType::Unknown => {
// Make sure that all other LINKs' href attributes are full URLs
for attr in attrs_mut.iter_mut() {
let attr_name: &str = &attr.name.local;
if attr_name.eq_ignore_ascii_case("href") {
@@ -612,6 +688,20 @@ pub fn walk_and_embed_assets(
}
}
}
"base" => {
if is_http_url(url) {
// Ensure BASE href is a full URL, not a relative one
for attr in attrs_mut.iter_mut() {
let attr_name: &str = &attr.name.local;
if attr_name.eq_ignore_ascii_case("href") {
let href_full_url =
resolve_url(&url, attr.value.trim()).unwrap_or_default();
attr.value.clear();
attr.value.push_slice(&href_full_url.as_str());
}
}
}
}
"body" => {
// Find and remove background attribute(s), keep value of the last found one
let mut background: String = str!();

View File

@@ -9,7 +9,8 @@ use std::process;
use std::time::Duration;
use monolith::html::{
add_favicon, has_favicon, html_to_dom, metadata_tag, stringify_document, walk_and_embed_assets,
add_base_tag, add_favicon, has_base_tag, has_favicon, html_to_dom, metadata_tag,
stringify_document, walk_and_embed_assets,
};
use monolith::opts::Options;
use monolith::url::{
@@ -141,6 +142,14 @@ fn main() {
process::exit(1);
}
// Embed remote assets
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
// Take care of BASE tag
if is_http_url(base_url.clone()) && !has_base_tag(&dom.document) {
dom = add_base_tag(&dom.document, base_url.clone());
}
// Request and embed /favicon.ico (unless it's already linked in the document)
if !options.no_images && is_http_url(target_url) && !has_favicon(&dom.document) {
let favicon_ico_url: String = resolve_url(&base_url, "/favicon.ico").unwrap();
@@ -163,15 +172,12 @@ fn main() {
}
}
// Embed remote assets
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
// Serialize DOM tree
let mut result: String = stringify_document(&dom.document, &options);
// Add metadata tag
if !options.no_metadata {
let metadata_comment = metadata_tag(&base_url);
let metadata_comment: String = metadata_tag(&base_url);
result.insert_str(0, &metadata_comment);
if metadata_comment.len() > 0 {
result.insert_str(metadata_comment.len(), "\n");