Compare commits
6 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8462b6bc31 | ||
|
|
92f38556b6 | ||
|
|
c0bdeab2e3 | ||
|
|
5a502eab4b | ||
|
|
19f08265a2 | ||
|
|
1d6392cb28 |
985
Cargo.lock
generated
985
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "monolith"
|
||||
version = "2.3.0"
|
||||
version = "2.3.1"
|
||||
edition = "2018"
|
||||
authors = [
|
||||
"Sunshine <sunshine@uberspace.net>",
|
||||
@@ -12,12 +12,12 @@ authors = [
|
||||
description = "CLI tool for saving web pages as a single HTML file"
|
||||
|
||||
[dependencies]
|
||||
base64 = "0.12.2"
|
||||
chrono = "0.4.11" # Used to render comments indicating the time the page was saved
|
||||
base64 = "0.12.3"
|
||||
chrono = "0.4.13" # Used to render comments indicating the time the page was saved
|
||||
clap = "2.33.1"
|
||||
cssparser = "0.27.2"
|
||||
html5ever = "0.24.1"
|
||||
sha2 = "0.9.0" # Used in calculating checksums during integrity checks
|
||||
sha2 = "0.9.1" # Used in calculating checksums during integrity checks
|
||||
url = "2.1.1"
|
||||
|
||||
[dependencies.reqwest]
|
||||
|
||||
92
src/html.rs
92
src/html.rs
@@ -29,6 +29,31 @@ struct SrcSetItem<'a> {
|
||||
|
||||
const ICON_VALUES: &[&str] = &["icon", "shortcut icon"];
|
||||
|
||||
pub fn add_base_tag(document: &Handle, url: String) -> RcDom {
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, document, SerializeOpts::default())
|
||||
.expect("unable to serialize DOM into buffer");
|
||||
let result = String::from_utf8(buf).unwrap();
|
||||
|
||||
let mut dom = html_to_dom(&result);
|
||||
let doc = dom.get_document();
|
||||
let html = get_child_node_by_name(&doc, "html");
|
||||
let head = get_child_node_by_name(&html, "head");
|
||||
let favicon_node = dom.create_element(
|
||||
QualName::new(None, ns!(), local_name!("base")),
|
||||
vec![Attribute {
|
||||
name: QualName::new(None, ns!(), local_name!("href")),
|
||||
value: format_tendril!("{}", url),
|
||||
}],
|
||||
Default::default(),
|
||||
);
|
||||
|
||||
// Insert BASE tag into HEAD
|
||||
head.children.borrow_mut().push(favicon_node.clone());
|
||||
|
||||
dom
|
||||
}
|
||||
|
||||
pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom {
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, document, SerializeOpts::default())
|
||||
@@ -54,7 +79,7 @@ pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom {
|
||||
Default::default(),
|
||||
);
|
||||
|
||||
// Append favicon node to HEAD
|
||||
// Insert favicon LINK tag into HEAD
|
||||
head.children.borrow_mut().push(favicon_node.clone());
|
||||
|
||||
dom
|
||||
@@ -205,6 +230,56 @@ pub fn has_proper_integrity(data: &[u8], integrity: &str) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn has_base_tag(handle: &Handle) -> bool {
|
||||
let mut found_base_tag: bool = false;
|
||||
|
||||
match handle.data {
|
||||
NodeData::Document => {
|
||||
// Dig deeper
|
||||
for child in handle.children.borrow().iter() {
|
||||
if has_base_tag(child) {
|
||||
found_base_tag = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
NodeData::Element {
|
||||
ref name,
|
||||
ref attrs,
|
||||
..
|
||||
} => {
|
||||
match name.local.as_ref() {
|
||||
"base" => {
|
||||
let attrs_mut = &mut attrs.borrow_mut();
|
||||
|
||||
for attr in attrs_mut.iter_mut() {
|
||||
if &attr.name.local == "href" {
|
||||
if !attr.value.trim().is_empty() {
|
||||
found_base_tag = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
if !found_base_tag {
|
||||
// Dig deeper
|
||||
for child in handle.children.borrow().iter() {
|
||||
if has_base_tag(child) {
|
||||
found_base_tag = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
found_base_tag
|
||||
}
|
||||
|
||||
pub fn has_favicon(handle: &Handle) -> bool {
|
||||
let mut found_favicon: bool = false;
|
||||
|
||||
@@ -600,6 +675,7 @@ pub fn walk_and_embed_assets(
|
||||
}
|
||||
}
|
||||
LinkType::Unknown => {
|
||||
// Make sure that all other LINKs' href attributes are full URLs
|
||||
for attr in attrs_mut.iter_mut() {
|
||||
let attr_name: &str = &attr.name.local;
|
||||
if attr_name.eq_ignore_ascii_case("href") {
|
||||
@@ -612,6 +688,20 @@ pub fn walk_and_embed_assets(
|
||||
}
|
||||
}
|
||||
}
|
||||
"base" => {
|
||||
if is_http_url(url) {
|
||||
// Ensure BASE href is a full URL, not a relative one
|
||||
for attr in attrs_mut.iter_mut() {
|
||||
let attr_name: &str = &attr.name.local;
|
||||
if attr_name.eq_ignore_ascii_case("href") {
|
||||
let href_full_url =
|
||||
resolve_url(&url, attr.value.trim()).unwrap_or_default();
|
||||
attr.value.clear();
|
||||
attr.value.push_slice(&href_full_url.as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
"body" => {
|
||||
// Find and remove background attribute(s), keep value of the last found one
|
||||
let mut background: String = str!();
|
||||
|
||||
16
src/main.rs
16
src/main.rs
@@ -9,7 +9,8 @@ use std::process;
|
||||
use std::time::Duration;
|
||||
|
||||
use monolith::html::{
|
||||
add_favicon, has_favicon, html_to_dom, metadata_tag, stringify_document, walk_and_embed_assets,
|
||||
add_base_tag, add_favicon, has_base_tag, has_favicon, html_to_dom, metadata_tag,
|
||||
stringify_document, walk_and_embed_assets,
|
||||
};
|
||||
use monolith::opts::Options;
|
||||
use monolith::url::{
|
||||
@@ -141,6 +142,14 @@ fn main() {
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
// Embed remote assets
|
||||
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
|
||||
|
||||
// Take care of BASE tag
|
||||
if is_http_url(base_url.clone()) && !has_base_tag(&dom.document) {
|
||||
dom = add_base_tag(&dom.document, base_url.clone());
|
||||
}
|
||||
|
||||
// Request and embed /favicon.ico (unless it's already linked in the document)
|
||||
if !options.no_images && is_http_url(target_url) && !has_favicon(&dom.document) {
|
||||
let favicon_ico_url: String = resolve_url(&base_url, "/favicon.ico").unwrap();
|
||||
@@ -163,15 +172,12 @@ fn main() {
|
||||
}
|
||||
}
|
||||
|
||||
// Embed remote assets
|
||||
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
|
||||
|
||||
// Serialize DOM tree
|
||||
let mut result: String = stringify_document(&dom.document, &options);
|
||||
|
||||
// Add metadata tag
|
||||
if !options.no_metadata {
|
||||
let metadata_comment = metadata_tag(&base_url);
|
||||
let metadata_comment: String = metadata_tag(&base_url);
|
||||
result.insert_str(0, &metadata_comment);
|
||||
if metadata_comment.len() > 0 {
|
||||
result.insert_str(metadata_comment.len(), "\n");
|
||||
|
||||
Reference in New Issue
Block a user