30 Commits

Author SHA1 Message Date
Sunshine
f067fc2324 bump version number (2.8.1 -> 2.8.2) 2024-09-03 00:01:37 -04:00
Sunshine
49d7585e02 get rid of useless docs 2024-09-02 23:50:09 -04:00
Caleb Hattingh
e25b7bc470 Link openssl statically - fixes #340 2024-09-02 23:40:15 -04:00
Sunshine
329c0568a4 update README.md 2024-08-19 07:21:10 -10:00
Andriy Rakhnin
f151a33c48 update authors 2024-08-16 10:52:58 -04:00
Andriy Rakhnin
ebf96bf1e5 referer_url refactoring 2024-08-16 10:52:58 -04:00
Andriy Rakhnin
c10c78a27d add HTTP referer header for resource requests 2024-08-16 10:52:58 -04:00
Andriy Rakhnin
64e84e4983 couple of cosmetic changes 2024-08-15 14:14:51 -04:00
Andriy Rakhnin
674d4085c7 fix srcset attribute parsing according to the specifications 2024-08-15 14:14:51 -04:00
Bryan Honof
e0fd5d4bb9 Update README.md 2024-07-30 14:54:01 -04:00
Bryan Honof
f4e360f09d Add flox installation option 2024-07-30 14:54:01 -04:00
Sunshine
0b4116f48a bump version in nuget manifest file 2024-06-24 12:06:22 -04:00
Orhun Parmaksız
084981a2ae Update instructions for installing on Arch Linux 2024-06-13 14:20:15 -04:00
Sunshine
a3feb7b721 update dependencies 2024-05-17 11:05:35 -04:00
Sunshine
87eb197e33 do not indent links based on depth in the output 2024-05-17 10:47:25 -04:00
Viktor Szépe
6798cad2b2 Fix typos 2024-03-29 06:24:14 -10:00
Waldir Pimenta
174cb50877 Add installation instructions for NixPkgs 2024-03-27 01:32:17 -10:00
Sunshine
e397a7532d Update README.md 2024-03-26 03:00:12 -10:00
Sunshine
91d8c146a9 Update README.md 2024-03-26 02:52:59 -10:00
Sunshine
67e07b91af provide more installation instructions 2024-03-26 02:51:44 -10:00
Sunshine
f797b8c999 get rid of outdated related projects 2024-03-26 02:05:07 -10:00
Sunshine
60251c6878 switch to new builder for Docker 2024-03-25 03:44:15 -10:00
Sunshine
b70801d55b ignore /target/ when building Docker image 2024-03-25 02:55:06 -10:00
Sunshine
2a50936990 update one of the author's email 2024-03-25 02:32:32 -10:00
Fred Weitendorf
f9e961f088 Add cargo setup to README
I did not have cargo installed on my system when I first tried to build the repo from a fresh git clone (I had not noticed it was a rust project), and it wasn't called out as a dependency for installation from source. I've added it as a noted dependency with some quick, collapsible installation instructions I followed to get it installed.
2024-03-25 01:33:23 -10:00
Thomas Kraxner
d8c3620d00 Scoop as install source 2024-03-25 01:30:17 -10:00
Thomas Merz
4aab0a64ee 🩹 fix README to prevent 'ERROR: failed to solve: invalid reference format: repository name must be lowercase'
and 'docker: invalid reference format: repository name must be lowercase.'
2024-03-25 01:06:43 -10:00
Sunshine
a2155e0af6 switch Rust edition from 2018 to 2021 2024-01-14 07:04:32 -10:00
Sunshine
f7e5527432 upgrade html5ever to 0.26.0 2024-01-14 06:41:46 -10:00
Sunshine
f7dd09d481 get rid of warnings from old version of Shopify/upload-to-release 2024-01-14 06:11:51 -10:00
42 changed files with 1020 additions and 1208 deletions

1
.dockerignore Normal file
View File

@@ -0,0 +1 @@
/target/

View File

@@ -20,7 +20,7 @@ jobs:
- name: Build the executable
run: cargo build --release
- uses: Shopify/upload-to-release@1.0.0
- uses: Shopify/upload-to-release@2.0.0
with:
name: monolith.exe
path: target\release\monolith.exe
@@ -53,7 +53,7 @@ jobs:
run: cargo build --release --target=arm-unknown-linux-gnueabihf
- name: Attach artifact to the release
uses: Shopify/upload-to-release@1.0.0
uses: Shopify/upload-to-release@2.0.0
with:
name: monolith-gnu-linux-armhf
path: target/arm-unknown-linux-gnueabihf/release/monolith
@@ -86,7 +86,7 @@ jobs:
run: cargo build --release --target=aarch64-unknown-linux-gnu
- name: Attach artifact to the release
uses: Shopify/upload-to-release@1.0.0
uses: Shopify/upload-to-release@2.0.0
with:
name: monolith-gnu-linux-aarch64
path: target/aarch64-unknown-linux-gnu/release/monolith
@@ -101,7 +101,7 @@ jobs:
- name: Build the executable
run: cargo build --release
- uses: Shopify/upload-to-release@1.0.0
- uses: Shopify/upload-to-release@2.0.0
with:
name: monolith-gnu-linux-x86_64
path: target/release/monolith

1203
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,14 +1,15 @@
[package]
name = "monolith"
version = "2.8.1"
version = "2.8.2"
authors = [
"Sunshine <sunshine@uberspace.net>",
"Sunshine <snshn@tutanota.com>",
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
"Emmanuel Delaborde <th3rac25@gmail.com>",
"Emi Simpson <emi@alchemi.dev>",
"rhysd <lin90162@yahoo.co.jp>",
"Andriy Rakhnin <a@rakhnin.com>",
]
edition = "2018"
edition = "2021"
description = "CLI tool for saving web pages as a single HTML file"
homepage = "https://github.com/Y2Z/monolith"
repository = "https://github.com/Y2Z/monolith"
@@ -23,15 +24,17 @@ license = "CC0-1.0"
[dependencies]
atty = "0.2.14" # Used for highlighting network errors
base64 = "0.21.7" # Used for integrity attributes
chrono = "0.4.31" # Used for formatting creation timestamp
base64 = "0.22.1" # Used for integrity attributes
chrono = "0.4.38" # Used for formatting output timestamp
clap = "3.2.25" # Used for processing CLI arguments
cssparser = "0.33.0" # Used for dealing with CSS
encoding_rs = "0.8.33" # Used for parsing and converting document charsets
html5ever = "0.24.1" # Used for all things DOM
percent-encoding = "2.3.1"
cssparser = "0.34.0" # Used for dealing with CSS
encoding_rs = "0.8.34" # Used for parsing and converting document charsets
html5ever = "0.27.0" # Used for all things DOM
markup5ever_rcdom = "0.3.0" # Used for manipulating DOM
percent-encoding = "2.3.1" # Used for encoding URLs
sha2 = "0.10.8" # Used for calculating checksums during integrity checks
url = "2.5.0"
url = "2.5.0" # Used for parsing URLs
openssl = { version = "0.10.64", features = ["vendored"] } # Used for making network requests
# Used for parsing srcset and NOSCRIPT
[dependencies.regex]

View File

@@ -1,4 +1,4 @@
FROM ekidd/rust-musl-builder as builder
FROM clux/muslrust:stable as builder
RUN curl -L -o monolith.tar.gz $(curl -s https://api.github.com/repos/y2z/monolith/releases/latest \
| grep "tarball_url.*\"," \
@@ -17,6 +17,6 @@ RUN apk update && \
apk add --no-cache openssl && \
rm -rf "/var/cache/apk/*"
COPY --from=builder /home/rust/.cargo/bin/monolith /usr/bin/monolith
COPY --from=builder /root/.cargo/bin/monolith /usr/bin/monolith
WORKDIR /tmp
ENTRYPOINT ["/usr/bin/monolith"]

View File

@@ -7,23 +7,30 @@ build:
@cargo build --locked
.PHONY: build
test: build
@cargo test --locked
@cargo fmt --all -- --check
.PHONY: test
lint:
@cargo fmt --all --
.PHONY: lint
clean:
@cargo clean
.PHONY: clean
install:
@cargo install --force --locked --path .
.PHONY: install
lint:
@cargo fmt --all --
.PHONY: lint
lint_check:
@cargo fmt --all -- --check
.PHONY: lint_check
test: build
@cargo test --locked
.PHONY: test
uninstall:
@cargo uninstall
.PHONY: uninstall
clean:
@cargo clean
update-lock-file:
@cargo update
.PHONY: clean

View File

@@ -42,6 +42,18 @@ brew install monolith
choco install monolith
```
#### Via [Scoop](https://scoop.sh/#/apps?q=monolith) (Windows)
```console
scoop install main/monolith
```
#### Via [Winget](https://winstall.app/apps/Y2Z.Monolith) (Windows)
```console
winget install --id=Y2Z.Monolith -e
```
#### Via [MacPorts](https://ports.macports.org/port/monolith/summary) (macOS)
```console
@@ -60,10 +72,22 @@ snap install monolith
guix install monolith
```
#### Using [AUR](https://aur.archlinux.org/packages/monolith) (Arch Linux)
#### Using [NixPkgs](https://search.nixos.org/packages?channel=unstable&show=monolith&query=monolith)
```console
yay monolith
nix-env -iA nixpkgs.monolith
```
#### Using [Flox](https://flox.dev)
```console
flox install monolith
```
#### Using [Pacman](https://archlinux.org/packages/extra/x86_64/monolith) (Arch Linux)
```console
pacman -S monolith
```
#### Using [aports](https://pkgs.alpinelinux.org/packages?name=monolith) (Alpine Linux)
@@ -72,6 +96,12 @@ yay monolith
apk add monolith
```
#### Using [XBPS Package Manager](https://voidlinux.org/packages/?q=monolith) (Void Linux)
```console
xbps-install -S monolith
```
#### Using [FreeBSD packages](https://svnweb.freebsd.org/ports/head/www/monolith/) (FreeBSD)
```console
@@ -95,13 +125,31 @@ make install clean
#### Using [containers](https://www.docker.com/)
```console
docker build -t Y2Z/monolith .
docker build -t y2z/monolith .
sudo install -b dist/run-in-container.sh /usr/local/bin/monolith
```
#### From [source](https://github.com/Y2Z/monolith)
Dependency: `libssl`
Dependencies: `libssl`, `cargo`
<details>
<summary>Install cargo (GNU/Linux)</summary>
Check if cargo is installed
```console
cargo -v
```
If cargo is not already installed, install and add it to your existing ```$PATH``` (paraphrasing the [official installation instructions](https://doc.rust-lang.org/cargo/getting-started/installation.html)):
```console
curl https://sh.rustup.rs -sSf | sh
. "$HOME/.cargo/env"
```
Proceed with installing from source:
</details>
```console
git clone https://github.com/Y2Z/monolith.git
@@ -124,7 +172,7 @@ monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roa
```
```console
cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html
cat some-site-page.html | monolith -aIiFfcMv -b https://some.site/ - > some-site-page-with-assets.html
```
@@ -134,13 +182,13 @@ cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html
## Options
- `-a`: Exclude audio sources
- `-b`: Use custom `base URL`
- `-b`: Use `custom base URL`
- `-B`: Forbid retrieving assets from specified domain(s)
- `-c`: Exclude CSS
- `-C`: Read cookies from `file`
- `-d`: Allow retrieving assets only from specified `domain(s)`
- `-e`: Ignore network errors
- `-E`: Save document using custom `encoding`
- `-E`: Save document using `custom encoding`
- `-f`: Omit frames
- `-F`: Exclude web fonts
- `-h`: Print help information
@@ -153,7 +201,7 @@ cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html
- `-o`: Write output to `file` (use “-” for STDOUT)
- `-s`: Be quiet
- `-t`: Adjust `network request timeout`
- `-u`: Provide custom `User-Agent`
- `-u`: Provide `custom User-Agent`
- `-v`: Exclude videos
@@ -182,7 +230,7 @@ Monolith doesn't feature a JavaScript engine, hence websites that retrieve and d
For example, Chromium (Chrome) can be used to act as a pre-processor for such pages:
```console
chromium --headless --incognito --dump-dom https://github.com | monolith - -I -b https://github.com -o github.html
chromium --headless --window-size=1920,1080 --run-all-compositor-stages-before-draw --virtual-time-budget=9000 --incognito --dump-dom https://github.com | monolith - -I -b https://github.com -o github.html
```
@@ -205,26 +253,7 @@ Please open an issue if something is wrong, that helps make this project better.
---------------------------------------------------
## Related projects
- Monolith Chrome Extension: https://github.com/rhysd/monolith-of-web
- Pagesaver: https://github.com/distributed-mind/pagesaver
- Personal WayBack Machine: https://github.com/popey/pwbm
- Hako: https://github.com/dmpop/hako
- Monk: https://github.com/monk-dev/monk
---------------------------------------------------
## License
To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide.
This software is distributed without any warranty.
---------------------------------------------------
<!-- Microtext -->
<sub>Keep in mind that `monolith` is not aware of your browsers session</sub>

2
dist/run-in-container.sh vendored Normal file → Executable file
View File

@@ -7,4 +7,4 @@ if which podman 2>&1 > /dev/null; then
DOCKER=podman
fi
$DOCKER run --rm Y2Z/$PROG_NAME "$@"
$DOCKER run --rm y2z/$PROG_NAME "$@"

View File

@@ -1,19 +0,0 @@
# 1. Record architecture decisions
Date: 2019-12-25
## Status
Accepted
## Context
We need to record the architectural decisions made on this project.
## Decision
We will use Architecture Decision Records, as [described by Michael Nygard](http://thinkrelevance.com/blog/2011/11/15/documenting-architecture-decisions).
## Consequences
See Michael Nygard's article, linked above. For a lightweight ADR toolset, see Nat Pryce's [adr-tools](https://github.com/npryce/adr-tools).

View File

@@ -1,19 +0,0 @@
# 2. NOSCRIPT nodes
Date: 2020-04-16
## Status
Accepted
## Context
HTML pages can contain `noscript` nodes, which reveal their contents only in case when JavaScript is not available. Most of the time they contain hidden messages that inform about certain JavaScript-dependent features not being operational, however sometimes can also feature media assets or even iframes.
## Decision
When the document is being saved with or without JavaScript, each `noscript` node should be preserved while its children need to be processed exactly the same way as the rest of the document. This approach will ensure that even hidden remote assets are embedded — since those hidden elements may have to be displayed later in a browser that has JavaScript turned off. An option should be available to "unwrap" all `noscript` nodes in order to make their contents always visible in the document, complimenting the "disable JS" function of the program.
## Consequences
Saved documents will have contents of all `noscript` nodes processed as if they are part of the document's DOM, therefore properly display images encapsulated within `noscript` nodes when being viewed in browsers that have JavaScript turned off (or have no JavaScript support in the first place). The new option to "unwrap" `noscript` elements will help the user ensure that the resulting document always represents what the original web page looked like in a browser that had JavaScript turned off.

View File

@@ -1,21 +0,0 @@
# 3. Network request timeout
Date: 2020-02-15
## Status
Accepted
## Context
A slow network connection and overloaded server may negatively impact network response time.
## Decision
Make the program simulate behavior of popular web browsers and CLI tools, where the default network response timeout is most often set to 120 seconds.
Instead of featuring retries for timed out network requests, the program should have an option to adjust the timeout length, along with making it indefinite when given "0" as its value.
## Consequences
The user is able to retrieve resources that have long response time, as well as obtain full control over how soon, and if at all, network requests should time out.

View File

@@ -1,21 +0,0 @@
# 4. Asset integrity check
Date: 2020-02-23
## Status
Accepted
## Context
In HTML5, `link` and `script` nodes have an attribute named `integrity`, which lets the browser check if the remote file is valid, mostly for the purpose of enhancing page security.
## Decision
In order to replicate the browser's behavior, the program should perform integrity check the same way it does, excluding the linked asset from the final result if such check fails.
The `integrity` attribute should be removed from nodes, as it bears no benefit for resources embedded as data URLs.
## Consequences
Assets that fail to pass the check get excluded from the saved document. Meanwhile, saved documents no longer contain integrity attributes on all `link` and `script` nodes.

View File

@@ -1,19 +0,0 @@
# 5. Asset Minimization
Date: 2020-03-14
## Status
Accepted
## Context
It may look like a good idea to make monolith compress retrieved assets while saving the page for the purpose of reducing the resulting document's file size.
## Decision
Given that the main purpose of this program is to save pages in a convenient to store and share manner — it's mostly an archiving tool, aside from being able to tell monolith to exclude certain types of asests (e.g. images, CSS, JavaScript), it would be outside of scope of this program to implement code for compressing assets. Minimizing files before embedding them does not reduce the amount of data that needs to be transferred either. A separate tool can be used later to compress and minimize pages saved by monolith, if needed.
## Consequences
Monolith will not support modification of original document assets for the purpose of reducing their size, sticking to performing only minimal amount of modifications to the original web page — whatever is needed to provide security or exclude unwanted asset types.

View File

@@ -1,19 +0,0 @@
# 6. Reload and location `meta` tags
Date: 2020-06-25
## Status
Accepted
## Context
HTML documents may contain `meta` tags capable of automatically refreshing the page or redirecting to another location.
## Decision
Since the resulting document is saved to disk and generally not intended to be served over the network, it only makes sense to remove `meta` tags that have `http-equiv` attribute equal to "Refresh" or "Location", in order to prevent them from reloading the page or redirecting to another location.
## Consequences
Monolith will ensure that saved documents do not contain `meta` tags capable of changing location or reloading the page.

View File

@@ -1,19 +0,0 @@
# 7. Network errors
Date: 2020-11-22
## Status
Accepted
## Context
Servers may return information with HTTP response codes other than `200`, however those responses may still contain useful data.
## Decision
Fail by default, notifying of the network error. Add option to continue retrieving assets by treating all response codes as `200`.
## Consequences
Monolith will fail to obtain resources with status other than `200`, unless told to ignore network errors.

View File

@@ -1,40 +0,0 @@
# 8. Base Tag
Date: 2020-12-25
## Status
Accepted
## Context
HTML documents may contain `base` tag, which influences resolution of anchor links and relative URLs as well as dynamically loaded resources.
Sometimes, in order to make certain saved documents function closer to how they operate while being served from a remote server, the `base` tag specifying the source page's URL may need to be added to the document.
There can be only one such tag. If multiple `base` tags are present, only the first encountered tag ends up being used.
## Decision
Adding the `base` tag should be optional — saved documents should not contain the `base` tag unless it was specified by the user, or the document originally had the `base` tag in it.
Existing `href` attribute's value of the original `base` tag should be used for resolving the document's relative links instead of document's own URL (precisely the way browsers do it).
## Consequences
#### If the base tag does not exist in the source document
- If the base tag does not exist in the source document
- With base URL option provided
- use the specified base URL value to retrieve assets, keep original base URL value in the document
- Without base URL option provided
- download document as usual, do not add base tag
- If the base tag already exists in the source document
- With base URL option provided
- we overwrite the original base URL before retrieving assets, keep new base URL value in the document
- Without base URL option provided:
- use the base URL from the original document to retrieve assets, keep original base URL value in the document
The program will obtain ability to retrieve remote assets for non-remote sources (such as data URLs and local files).
The program will obatin ability to get rid of existing base tag values (by provind an empty one).

View File

@@ -1,3 +0,0 @@
# References
- https://content-security-policy.com/

View File

@@ -1,23 +0,0 @@
# Web apps that can be saved with Monolith
These apps retain all or most of their functionality when saved with Monolith:
## Converse
| Website | https://conversejs.org |
|:-----------------------|:--------------------------------------------------------------------|
| Description | An XMPP client built using web technologies |
| Functionality retained | **full** |
| Command to use | `monolith https://conversejs.org/fullscreen.html > conversejs.html` |
| Monolith version used | 2.2.7 |
## Markdown Tables generator
| Website | https://www.tablesgenerator.com |
|:--------------------------|:-----------------------------------------------------------------------------------------------|
| Description | Tool for creating tables in extended Markdown format |
| Functionality retained | **full** |
| Command to use | `monolith -I https://www.tablesgenerator.com/markdown_tables -o markdown-table-generator.html` |
| Monolith version used | 2.6.1 |

View File

@@ -2,7 +2,7 @@
<package xmlns="http://schemas.microsoft.com/packaging/2015/06/nuspec.xsd">
<metadata>
<id>monolith</id>
<version>2.4.0</version>
<version>2.8.1</version>
<title>Monolith</title>
<authors>Sunshine, Mahdi Robatipoor, Emmanuel Delaborde, Emi Simpson, rhysd</authors>
<projectUrl>https://github.com/Y2Z/monolith</projectUrl>

View File

@@ -36,7 +36,6 @@ pub fn embed_css(
document_url: &Url,
css: &str,
options: &Options,
depth: u32,
) -> String {
let mut input = ParserInput::new(&css);
let mut parser = Parser::new(&mut input);
@@ -47,7 +46,6 @@ pub fn embed_css(
document_url,
&mut parser,
options,
depth,
"",
"",
"",
@@ -81,7 +79,6 @@ pub fn process_css<'a>(
document_url: &Url,
parser: &mut Parser,
options: &Options,
depth: u32,
rule_name: &str,
prop_name: &str,
func_name: &str,
@@ -135,7 +132,6 @@ pub fn process_css<'a>(
document_url,
parser,
options,
depth,
rule_name,
curr_prop.as_str(),
func_name,
@@ -190,14 +186,7 @@ pub fn process_css<'a>(
}
let import_full_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
cache,
client,
&document_url,
&import_full_url,
options,
depth + 1,
) {
match retrieve_asset(cache, client, &document_url, &import_full_url, options) {
Ok((
import_contents,
import_final_url,
@@ -213,7 +202,6 @@ pub fn process_css<'a>(
&import_final_url,
&String::from_utf8_lossy(&import_contents),
options,
depth + 1,
)
.as_bytes(),
&import_final_url,
@@ -251,7 +239,6 @@ pub fn process_css<'a>(
&document_url,
&resolved_url,
options,
depth + 1,
) {
Ok((data, final_url, media_type, charset)) => {
let mut data_url =
@@ -341,14 +328,7 @@ pub fn process_css<'a>(
result.push_str("url(");
if is_import {
let full_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
cache,
client,
&document_url,
&full_url,
options,
depth + 1,
) {
match retrieve_asset(cache, client, &document_url, &full_url, options) {
Ok((css, final_url, media_type, charset)) => {
let mut data_url = create_data_url(
&media_type,
@@ -359,7 +339,6 @@ pub fn process_css<'a>(
&final_url,
&String::from_utf8_lossy(&css),
options,
depth + 1,
)
.as_bytes(),
&final_url,
@@ -380,14 +359,7 @@ pub fn process_css<'a>(
result.push_str(format_quoted_string(EMPTY_IMAGE_DATA_URL).as_str());
} else {
let full_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
cache,
client,
&document_url,
&full_url,
options,
depth + 1,
) {
match retrieve_asset(cache, client, &document_url, &full_url, options) {
Ok((data, final_url, media_type, charset)) => {
let mut data_url =
create_data_url(&media_type, &charset, &data, &final_url);
@@ -423,7 +395,6 @@ pub fn process_css<'a>(
document_url,
parser,
options,
depth,
curr_rule.as_str(),
curr_prop.as_str(),
function_name,

View File

@@ -3,11 +3,11 @@ use chrono::prelude::*;
use encoding_rs::Encoding;
use html5ever::interface::QualName;
use html5ever::parse_document;
use html5ever::rcdom::{Handle, NodeData, RcDom};
use html5ever::serialize::{serialize, SerializeOpts};
use html5ever::tendril::{format_tendril, TendrilSink};
use html5ever::tree_builder::{Attribute, TreeSink};
use html5ever::{local_name, namespace_url, ns, LocalName};
use markup5ever_rcdom::{Handle, NodeData, RcDom, SerializableHandle};
use regex::Regex;
use reqwest::blocking::Client;
use reqwest::Url;
@@ -30,10 +30,16 @@ struct SrcSetItem<'a> {
const ICON_VALUES: &'static [&str] = &["icon", "shortcut icon"];
const WHITESPACES: &'static [char] = &['\t', '\n', '\x0c', '\r', ' '];
pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom {
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, document, SerializeOpts::default())
.expect("unable to serialize DOM into buffer");
serialize(
&mut buf,
&SerializableHandle::from(document.clone()),
SerializeOpts::default(),
)
.expect("unable to serialize DOM into buffer");
let mut dom = html_to_dom(&buf, "utf-8".to_string());
let doc = dom.get_document();
@@ -161,15 +167,44 @@ pub fn embed_srcset(
document_url: &Url,
srcset: &str,
options: &Options,
depth: u32,
) -> String {
let mut array: Vec<SrcSetItem> = vec![];
let re = Regex::new(r",\s+").unwrap();
for srcset_item in re.split(srcset) {
let parts: Vec<&str> = srcset_item.trim().split_whitespace().collect();
if parts.len() > 0 {
let path = parts[0].trim();
let descriptor = if parts.len() > 1 { parts[1].trim() } else { "" };
// Parse srcset attribute according to the specs
// https://html.spec.whatwg.org/multipage/images.html#srcset-attribute
let mut offset = 0;
let size = srcset.chars().count();
while offset < size {
let mut has_descriptor = true;
// Zero or more whitespaces + skip leading comma
let url_start = offset
+ srcset[offset..]
.chars()
.take_while(|&c| WHITESPACES.contains(&c) || c == ',')
.count();
if url_start >= size {
break;
}
// A valid non-empty URL that does not start or end with comma
let mut url_end = url_start
+ srcset[url_start..]
.chars()
.take_while(|&c| !WHITESPACES.contains(&c))
.count();
while (url_end - 1) > url_start && srcset.chars().nth(url_end - 1).unwrap() == ',' {
has_descriptor = false;
url_end -= 1;
}
offset = url_end;
// If the URL wasn't terminated by comma there may also be a descriptor
if has_descriptor {
offset += srcset[url_end..].chars().take_while(|&c| c != ',').count();
}
// Collect SrcSetItem
if url_end > url_start {
let path = &srcset[url_start..url_end];
let descriptor = &srcset[url_end..offset].trim();
let srcset_real_item = SrcSetItem { path, descriptor };
array.push(srcset_real_item);
}
@@ -182,14 +217,7 @@ pub fn embed_srcset(
result.push_str(EMPTY_IMAGE_DATA_URL);
} else {
let image_full_url: Url = resolve_url(&document_url, part.path);
match retrieve_asset(
cache,
client,
&document_url,
&image_full_url,
options,
depth + 1,
) {
match retrieve_asset(cache, client, &document_url, &image_full_url, options) {
Ok((image_data, image_final_url, image_media_type, image_charset)) => {
let mut image_data_url = create_data_url(
&image_media_type,
@@ -197,7 +225,7 @@ pub fn embed_srcset(
&image_data,
&image_final_url,
);
// Append retreved asset as a data URL
// Append retrieved asset as a data URL
image_data_url.set_fragment(image_full_url.fragment());
result.push_str(image_data_url.as_ref());
}
@@ -428,8 +456,12 @@ pub fn is_icon(attr_value: &str) -> bool {
pub fn set_base_url(document: &Handle, desired_base_href: String) -> RcDom {
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, document, SerializeOpts::default())
.expect("unable to serialize DOM into buffer");
serialize(
&mut buf,
&SerializableHandle::from(document.clone()),
SerializeOpts::default(),
)
.expect("unable to serialize DOM into buffer");
let mut dom = html_to_dom(&buf, "utf-8".to_string());
let doc = dom.get_document();
@@ -534,7 +566,7 @@ pub fn set_node_attr(node: &Handle, attr_name: &str, attr_value: Option<String>)
pub fn serialize_document(mut dom: RcDom, document_encoding: String, options: &Options) -> Vec<u8> {
let mut buf: Vec<u8> = Vec::new();
let doc = dom.get_document();
let document = dom.get_document();
if options.isolate
|| options.no_css
@@ -544,7 +576,7 @@ pub fn serialize_document(mut dom: RcDom, document_encoding: String, options: &O
|| options.no_images
{
// Take care of CSP
if let Some(html) = get_child_node_by_name(&doc, "html") {
if let Some(html) = get_child_node_by_name(&document, "html") {
if let Some(head) = get_child_node_by_name(&html, "head") {
let meta = dom.create_element(
QualName::new(None, ns!(), local_name!("meta")),
@@ -570,8 +602,12 @@ pub fn serialize_document(mut dom: RcDom, document_encoding: String, options: &O
}
}
serialize(&mut buf, &doc, SerializeOpts::default())
.expect("Unable to serialize DOM into buffer");
serialize(
&mut buf,
&SerializableHandle::from(document.clone()),
SerializeOpts::default(),
)
.expect("Unable to serialize DOM into buffer");
// Unwrap NOSCRIPT elements
if options.unwrap_noscript {
@@ -599,18 +635,10 @@ pub fn retrieve_and_embed_asset(
attr_name: &str,
attr_value: &str,
options: &Options,
depth: u32,
) {
let resolved_url: Url = resolve_url(document_url, attr_value);
match retrieve_asset(
cache,
client,
&document_url.clone(),
&resolved_url,
options,
depth + 1,
) {
match retrieve_asset(cache, client, &document_url.clone(), &resolved_url, options) {
Ok((data, final_url, mut media_type, charset)) => {
let node_name: &str = get_node_name(&node).unwrap();
@@ -639,7 +667,7 @@ pub fn retrieve_and_embed_asset(
if node_name == "link" && determine_link_node_type(node) == "stylesheet" {
// Stylesheet LINK elements require special treatment
let css: String = embed_css(cache, client, &final_url, &s, options, depth + 1);
let css: String = embed_css(cache, client, &final_url, &s, options);
// Create and embed data URL
let css_data_url =
@@ -648,19 +676,12 @@ pub fn retrieve_and_embed_asset(
} else if node_name == "frame" || node_name == "iframe" {
// (I)FRAMEs are also quite different from conventional resources
let frame_dom = html_to_dom(&data, charset.clone());
walk_and_embed_assets(
cache,
client,
&final_url,
&frame_dom.document,
&options,
depth + 1,
);
walk_and_embed_assets(cache, client, &final_url, &frame_dom.document, &options);
let mut frame_data: Vec<u8> = Vec::new();
serialize(
&mut frame_data,
&frame_dom.document,
&SerializableHandle::from(frame_dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
@@ -710,13 +731,12 @@ pub fn walk_and_embed_assets(
document_url: &Url,
node: &Handle,
options: &Options,
depth: u32,
) {
match node.data {
NodeData::Document => {
// Dig deeper
for child in node.children.borrow().iter() {
walk_and_embed_assets(cache, client, &document_url, child, options, depth);
walk_and_embed_assets(cache, client, &document_url, child, options);
}
}
NodeData::Element {
@@ -751,7 +771,6 @@ pub fn walk_and_embed_assets(
"href",
&link_attr_href_value,
options,
depth,
);
} else {
set_node_attr(node, "href", None);
@@ -774,7 +793,6 @@ pub fn walk_and_embed_assets(
"href",
&link_attr_href_value,
options,
depth,
);
}
}
@@ -816,7 +834,6 @@ pub fn walk_and_embed_assets(
"background",
&body_attr_background_value,
options,
depth,
);
}
}
@@ -862,7 +879,6 @@ pub fn walk_and_embed_assets(
"src",
&img_full_url,
options,
depth,
);
}
}
@@ -870,14 +886,8 @@ pub fn walk_and_embed_assets(
// Resolve srcset attribute
if let Some(img_srcset) = get_node_attr(node, "srcset") {
if !img_srcset.is_empty() {
let resolved_srcset: String = embed_srcset(
cache,
client,
&document_url,
&img_srcset,
options,
depth,
);
let resolved_srcset: String =
embed_srcset(cache, client, &document_url, &img_srcset, options);
set_node_attr(node, "srcset", Some(resolved_srcset));
}
}
@@ -907,7 +917,6 @@ pub fn walk_and_embed_assets(
"src",
&input_attr_src_value,
options,
depth,
);
}
}
@@ -940,7 +949,6 @@ pub fn walk_and_embed_assets(
"href",
&image_href,
options,
depth,
);
}
}
@@ -961,7 +969,6 @@ pub fn walk_and_embed_assets(
"src",
&source_attr_src_value,
options,
depth,
);
}
} else if parent_node_name == "video" {
@@ -976,7 +983,6 @@ pub fn walk_and_embed_assets(
"src",
&source_attr_src_value,
options,
depth,
);
}
}
@@ -998,7 +1004,6 @@ pub fn walk_and_embed_assets(
&document_url,
&source_attr_srcset_value,
options,
depth,
);
set_node_attr(node, "srcset", Some(resolved_srcset));
}
@@ -1051,7 +1056,6 @@ pub fn walk_and_embed_assets(
"src",
&script_attr_src.unwrap_or_default(),
options,
depth,
);
}
}
@@ -1069,7 +1073,6 @@ pub fn walk_and_embed_assets(
&document_url,
tendril.as_ref(),
options,
depth,
);
tendril.clear();
tendril.push_slice(&replacement);
@@ -1101,7 +1104,6 @@ pub fn walk_and_embed_assets(
"src",
&frame_attr_src_value,
options,
depth,
);
}
}
@@ -1121,7 +1123,6 @@ pub fn walk_and_embed_assets(
"src",
&audio_attr_src_value,
options,
depth,
);
}
}
@@ -1140,7 +1141,6 @@ pub fn walk_and_embed_assets(
"src",
&video_attr_src_value,
options,
depth,
);
}
}
@@ -1164,7 +1164,6 @@ pub fn walk_and_embed_assets(
"poster",
&video_attr_poster_value,
options,
depth,
);
}
}
@@ -1188,7 +1187,6 @@ pub fn walk_and_embed_assets(
&document_url,
&noscript_contents_dom.document,
&options,
depth,
);
// Get rid of original contents
noscript_contents.clear();
@@ -1198,8 +1196,12 @@ pub fn walk_and_embed_assets(
{
if let Some(body) = get_child_node_by_name(&html, "body") {
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &body, SerializeOpts::default())
.expect("Unable to serialize DOM into buffer");
serialize(
&mut buf,
&SerializableHandle::from(body.clone()),
SerializeOpts::default(),
)
.expect("Unable to serialize DOM into buffer");
let result = String::from_utf8_lossy(&buf);
noscript_contents.push_slice(&result);
}
@@ -1225,7 +1227,6 @@ pub fn walk_and_embed_assets(
&document_url,
&node_attr_style_value,
options,
depth,
);
set_node_attr(node, "style", Some(embedded_style));
}
@@ -1249,7 +1250,7 @@ pub fn walk_and_embed_assets(
// Dig deeper
for child in node.children.borrow().iter() {
walk_and_embed_assets(cache, client, &document_url, child, options, depth);
walk_and_embed_assets(cache, client, &document_url, child, options);
}
}
_ => {

View File

@@ -1,5 +1,5 @@
use encoding_rs::Encoding;
use html5ever::rcdom::RcDom;
use markup5ever_rcdom::RcDom;
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use std::collections::HashMap;
@@ -198,7 +198,7 @@ fn main() {
|| (target_url.scheme() == "http" || target_url.scheme() == "https")
|| target_url.scheme() == "data"
{
match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options, 0) {
match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options) {
Ok((retrieved_data, final_url, media_type, charset)) => {
// Provide output as text without processing it, the way browsers do
if !media_type.eq_ignore_ascii_case("text/html")
@@ -306,7 +306,7 @@ fn main() {
}
// Traverse through the document and embed remote assets
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options);
// Update or add new BASE element to reroute network requests and hash-links
if let Some(new_base_url) = options.base_url.clone() {
@@ -320,14 +320,7 @@ fn main() {
{
let favicon_ico_url: Url = resolve_url(&base_url, "/favicon.ico");
match retrieve_asset(
&mut cache,
&client,
&target_url,
&favicon_ico_url,
&options,
0,
) {
match retrieve_asset(&mut cache, &client, &target_url, &favicon_ico_url, &options) {
Ok((data, final_url, media_type, charset)) => {
let favicon_data_url: Url =
create_data_url(&media_type, &charset, &data, &final_url);

View File

@@ -79,6 +79,17 @@ pub fn parse_data_url(url: &Url) -> (String, String, Vec<u8>) {
(media_type, charset, blob)
}
pub fn get_referer_url(url: Url) -> Url {
let mut url = url.clone();
// Spec: https://httpwg.org/specs/rfc9110.html#field.referer
// Must not include the fragment and userinfo components of the URI
url.set_fragment(None);
url.set_username(&"").unwrap();
url.set_password(None).unwrap();
url
}
pub fn resolve_url(from: &Url, to: &str) -> Url {
match Url::parse(&to) {
Ok(parsed_url) => parsed_url,

View File

@@ -1,12 +1,12 @@
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE, COOKIE};
use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE, COOKIE, REFERER};
use std::collections::HashMap;
use std::fs;
use std::path::{Path, PathBuf};
use url::Url;
use crate::opts::Options;
use crate::url::{clean_url, parse_data_url};
use crate::url::{clean_url, get_referer_url, parse_data_url};
const ANSI_COLOR_RED: &'static str = "\x1b[31m";
const ANSI_COLOR_RESET: &'static str = "\x1b[0m";
@@ -148,18 +148,6 @@ pub fn domain_is_within_domain(domain: &str, domain_to_match_against: &str) -> b
ok
}
pub fn indent(level: u32) -> String {
let mut result: String = String::new();
let mut l: u32 = level;
while l > 0 {
result += " ";
l -= 1;
}
result
}
pub fn is_plaintext_media_type(media_type: &str) -> bool {
media_type.to_lowercase().as_str().starts_with("text/")
|| PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
@@ -198,7 +186,6 @@ pub fn retrieve_asset(
parent_url: &Url,
url: &Url,
options: &Options,
depth: u32,
) -> Result<(Vec<u8>, Url, String, String), reqwest::Error> {
if url.scheme() == "data" {
let (media_type, charset, data) = parse_data_url(url);
@@ -208,8 +195,7 @@ pub fn retrieve_asset(
if parent_url.scheme() != "file" {
if !options.silent {
eprintln!(
"{}{}{} ({}){}",
indent(depth).as_str(),
"{}{} ({}){}",
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
"Security Error",
@@ -230,8 +216,7 @@ pub fn retrieve_asset(
if path.is_dir() {
if !options.silent {
eprintln!(
"{}{}{} (is a directory){}",
indent(depth).as_str(),
"{}{} (is a directory){}",
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
if options.no_color {
@@ -246,7 +231,7 @@ pub fn retrieve_asset(
Err(client.get("").send().unwrap_err())
} else {
if !options.silent {
eprintln!("{}{}", indent(depth).as_str(), &url);
eprintln!("{}", &url);
}
let file_blob: Vec<u8> = fs::read(&path).expect("Unable to read file");
@@ -261,8 +246,7 @@ pub fn retrieve_asset(
} else {
if !options.silent {
eprintln!(
"{}{}{} (not found){}",
indent(depth).as_str(),
"{}{} (not found){}",
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
if options.no_color {
@@ -282,7 +266,7 @@ pub fn retrieve_asset(
if cache.contains_key(&cache_key) {
// URL is in cache, we get and return it
if !options.silent {
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
eprintln!("{} (from cache)", &url);
}
Ok((
@@ -314,13 +298,19 @@ pub fn retrieve_asset(
}
}
}
// Add referer header for page resource requests
if parent_url != url {
headers.insert(
REFERER,
HeaderValue::from_str(get_referer_url(parent_url.clone()).as_str()).unwrap(),
);
}
match client.get(url.as_str()).headers(headers).send() {
Ok(response) => {
if !options.ignore_errors && response.status() != reqwest::StatusCode::OK {
if !options.silent {
eprintln!(
"{}{}{} ({}){}",
indent(depth).as_str(),
"{}{} ({}){}",
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
response.status(),
@@ -339,9 +329,9 @@ pub fn retrieve_asset(
if !options.silent {
if url.as_str() == response_url.as_str() {
eprintln!("{}{}", indent(depth).as_str(), &url);
eprintln!("{}", &url);
} else {
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &response_url);
eprintln!("{} -> {}", &url, &response_url);
}
}
@@ -365,8 +355,7 @@ pub fn retrieve_asset(
Err(error) => {
if !options.silent {
eprintln!(
"{}{}{}{}",
indent(depth).as_str(),
"{}{}{}",
if options.no_color { "" } else { ANSI_COLOR_RED },
error,
if options.no_color {
@@ -388,8 +377,7 @@ pub fn retrieve_asset(
Err(error) => {
if !options.silent {
eprintln!(
"{}{}{} ({}){}",
indent(depth).as_str(),
"{}{} ({}){}",
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
error,

View File

@@ -90,9 +90,9 @@ mod passing {
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_css}\n \
{file_url_css}\n \
{file_url_html}\n\
{file_url_css}\n\
{file_url_css}\n\
{file_url_css}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),

View File

@@ -39,10 +39,10 @@ mod passing {
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file}{cwd}/tests/_data_/basic/local-file.html\n \
{file}{cwd}/tests/_data_/basic/local-style.css\n \
{file}{cwd}/tests/_data_/basic/local-style-does-not-exist.css (not found)\n \
{file}{cwd}/tests/_data_/basic/monolith.png (not found)\n \
{file}{cwd}/tests/_data_/basic/local-file.html\n\
{file}{cwd}/tests/_data_/basic/local-style.css\n\
{file}{cwd}/tests/_data_/basic/local-style-does-not-exist.css (not found)\n\
{file}{cwd}/tests/_data_/basic/monolith.png (not found)\n\
{file}{cwd}/tests/_data_/basic/local-script.js\n\
",
file = file_url_protocol,
@@ -185,7 +185,7 @@ mod passing {
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_html}\n\
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
@@ -236,10 +236,10 @@ mod passing {
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file}{cwd}/tests/_data_/integrity/index.html\n \
{file}{cwd}/tests/_data_/integrity/style.css\n \
{file}{cwd}/tests/_data_/integrity/style.css\n \
{file}{cwd}/tests/_data_/integrity/script.js\n \
{file}{cwd}/tests/_data_/integrity/index.html\n\
{file}{cwd}/tests/_data_/integrity/style.css\n\
{file}{cwd}/tests/_data_/integrity/style.css\n\
{file}{cwd}/tests/_data_/integrity/script.js\n\
{file}{cwd}/tests/_data_/integrity/script.js\n\
",
file = file_url_protocol,

View File

@@ -27,7 +27,7 @@ mod passing {
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_html}\n\
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
@@ -58,7 +58,7 @@ mod passing {
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_html}\n\
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
@@ -89,7 +89,7 @@ mod passing {
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_html}\n\
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
@@ -120,7 +120,7 @@ mod passing {
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n \
{file_url_html}\n\
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),

View File

@@ -38,7 +38,7 @@ mod passing {
)
);
// STDOUT should contain original document without any modificatons
// STDOUT should contain original document without any modifications
let s: String;
if let Some(encoding) = Encoding::for_label(b"gb2312") {
let (string, _, _) = encoding.decode(&out.stdout);
@@ -135,7 +135,7 @@ mod passing {
)
);
// STDOUT should contain original document without any modificatons
// STDOUT should contain original document without any modifications
assert_eq!(
String::from_utf8_lossy(&out.stdout).to_string(),
"<html>\

View File

@@ -23,7 +23,7 @@ mod passing {
let options = Options::default();
assert_eq!(
css::embed_css(cache, &client, &document_url, "", &options, 0),
css::embed_css(cache, &client, &document_url, "", &options),
""
);
}
@@ -36,7 +36,7 @@ mod passing {
let options = Options::default();
assert_eq!(
css::embed_css(cache, &client, &document_url, "\t \t ", &options, 0,),
css::embed_css(cache, &client, &document_url, "\t \t ", &options),
""
);
}
@@ -59,7 +59,7 @@ mod passing {
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(cache, &client, &document_url, &STYLE, &options, 0,),
css::embed_css(cache, &client, &document_url, &STYLE, &options),
format!(
"/* border: none;*/\
background-image: url(\"{empty_image}\"); \
@@ -91,7 +91,7 @@ mod passing {
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(cache, &client, &document_url, &STYLE, &options, 0),
css::embed_css(cache, &client, &document_url, &STYLE, &options),
format!(
"/* border: none;*/\
background-image: url(\"{empty_image}\"); \
@@ -122,7 +122,7 @@ mod passing {
html > body {}";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0),
css::embed_css(cache, &client, &document_url, &CSS, &options),
CSS
);
}
@@ -166,7 +166,7 @@ mod passing {
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0),
css::embed_css(cache, &client, &document_url, &CSS, &options),
CSS
);
}
@@ -188,7 +188,7 @@ mod passing {
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
css::embed_css(cache, &client, &document_url, &CSS, &options),
"\
@charset \"UTF-8\";\n\
\n\
@@ -218,7 +218,7 @@ mod passing {
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
css::embed_css(cache, &client, &document_url, &CSS, &options),
CSS
);
}
@@ -240,7 +240,7 @@ mod passing {
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
css::embed_css(cache, &client, &document_url, &CSS, &options),
CSS
);
}
@@ -264,7 +264,7 @@ mod passing {
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
css::embed_css(cache, &client, &document_url, &CSS, &options),
CSS
);
}
@@ -312,7 +312,7 @@ mod passing {
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
css::embed_css(cache, &client, &document_url, &CSS, &options),
CSS_OUT
);
}
@@ -337,7 +337,7 @@ mod passing {
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
css::embed_css(cache, &client, &document_url, &CSS, &options),
CSS_OUT
);
}
@@ -364,7 +364,7 @@ mod passing {
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
css::embed_css(cache, &client, &document_url, &CSS, &options),
CSS_OUT
);
}

View File

@@ -10,17 +10,17 @@ mod passing {
use monolith::css;
#[test]
fn backrgound() {
fn background() {
assert!(css::is_image_url_prop("background"));
}
#[test]
fn backrgound_image() {
fn background_image() {
assert!(css::is_image_url_prop("background-image"));
}
#[test]
fn backrgound_image_uppercase() {
fn background_image_uppercase() {
assert!(css::is_image_url_prop("BACKGROUND-IMAGE"));
}

View File

@@ -8,6 +8,7 @@
#[cfg(test)]
mod passing {
use html5ever::serialize::{serialize, SerializeOpts};
use markup5ever_rcdom::SerializableHandle;
use monolith::html;
@@ -19,7 +20,12 @@ mod passing {
dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string());
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),

View File

@@ -29,7 +29,6 @@ mod passing {
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!(
@@ -55,7 +54,6 @@ mod passing {
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!(
@@ -78,7 +76,6 @@ mod passing {
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!(
@@ -101,7 +98,6 @@ mod passing {
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!(
@@ -112,6 +108,56 @@ mod passing {
),
);
}
#[test]
fn no_whitespace_after_commas() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small,s.png 1x,medium,m.png 2x,large,l.png 3x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
);
assert_eq!(
embedded_css,
format!(
"{} 1x, {} 2x, {} 3x",
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL
),
);
}
#[test]
fn last_without_descriptor() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small,s.png 1x, medium,m.png 2x, large,l.png";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
);
assert_eq!(
embedded_css,
format!(
"{} 1x, {} 2x, {}",
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL
),
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@@ -145,12 +191,11 @@ mod failing {
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!(
embedded_css,
format!("{} 1x, {} 2x,", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
format!("{} 1x, {} 2x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
);
}
}

View File

@@ -7,7 +7,7 @@
#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
use markup5ever_rcdom::{Handle, NodeData};
use monolith::html;

View File

@@ -7,7 +7,7 @@
#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
use markup5ever_rcdom::{Handle, NodeData};
use monolith::html;

View File

@@ -7,7 +7,7 @@
#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
use markup5ever_rcdom::{Handle, NodeData};
use monolith::html;

View File

@@ -8,6 +8,7 @@
#[cfg(test)]
mod passing {
use html5ever::serialize::{serialize, SerializeOpts};
use markup5ever_rcdom::SerializableHandle;
use reqwest::blocking::Client;
use std::collections::HashMap;
use url::Url;
@@ -29,10 +30,15 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
@@ -52,10 +58,15 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
@@ -75,10 +86,15 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
@@ -104,10 +120,15 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
@@ -140,10 +161,15 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
@@ -177,10 +203,15 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
@@ -201,10 +232,15 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
@@ -233,10 +269,15 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
@@ -269,10 +310,15 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
@@ -303,10 +349,15 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
@@ -340,10 +391,15 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
@@ -378,10 +434,15 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
@@ -424,10 +485,15 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
@@ -463,10 +529,15 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
@@ -498,10 +569,15 @@ mod passing {
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),

View File

@@ -46,7 +46,7 @@ mod passing {
}
#[test]
fn removesempty_fragment_and_keeps_empty_query() {
fn removes_empty_fragment_and_keeps_query() {
assert_eq!(
url::clean_url(Url::parse("https://somewhere.com/font.eot?a=b&#").unwrap()).as_str(),
"https://somewhere.com/font.eot?a=b&"

View File

@@ -0,0 +1,91 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use reqwest::Url;
use monolith::url;
#[test]
fn preserve_original() {
let original_url: Url = Url::parse("https://somewhere.com/font.eot#iefix").unwrap();
let referer_url: Url = url::get_referer_url(original_url.clone());
assert_eq!(referer_url.as_str(), "https://somewhere.com/font.eot");
assert_eq!(
original_url.as_str(),
"https://somewhere.com/font.eot#iefix"
);
}
#[test]
fn removes_fragment() {
assert_eq!(
url::get_referer_url(Url::parse("https://somewhere.com/font.eot#iefix").unwrap())
.as_str(),
"https://somewhere.com/font.eot"
);
}
#[test]
fn removes_empty_fragment() {
assert_eq!(
url::get_referer_url(Url::parse("https://somewhere.com/font.eot#").unwrap()).as_str(),
"https://somewhere.com/font.eot"
);
}
#[test]
fn removes_empty_fragment_and_keeps_empty_query() {
assert_eq!(
url::get_referer_url(Url::parse("https://somewhere.com/font.eot?#").unwrap()).as_str(),
"https://somewhere.com/font.eot?"
);
}
#[test]
fn removes_empty_fragment_and_keeps_query() {
assert_eq!(
url::get_referer_url(Url::parse("https://somewhere.com/font.eot?a=b&#").unwrap())
.as_str(),
"https://somewhere.com/font.eot?a=b&"
);
}
#[test]
fn removes_credentials() {
assert_eq!(
url::get_referer_url(Url::parse("https://cookie:monster@gibson.lan/path").unwrap())
.as_str(),
"https://gibson.lan/path"
);
}
#[test]
fn removes_empty_credentials() {
assert_eq!(
url::get_referer_url(Url::parse("https://@gibson.lan/path").unwrap()).as_str(),
"https://gibson.lan/path"
);
}
#[test]
fn removes_empty_username_credentials() {
assert_eq!(
url::get_referer_url(Url::parse("https://:monster@gibson.lan/path").unwrap()).as_str(),
"https://gibson.lan/path"
);
}
#[test]
fn removes_empty_password_credentials() {
assert_eq!(
url::get_referer_url(Url::parse("https://cookie@gibson.lan/path").unwrap()).as_str(),
"https://gibson.lan/path"
);
}
}

View File

@@ -1,5 +1,6 @@
mod clean_url;
mod create_data_url;
mod get_referer_url;
mod is_url_and_has_protocol;
mod parse_data_url;
mod resolve_url;

View File

@@ -1,36 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::utils;
#[test]
fn zero() {
assert_eq!(utils::indent(0), "");
}
#[test]
fn one() {
assert_eq!(utils::indent(1), " ");
}
#[test]
fn two() {
assert_eq!(utils::indent(2), " ");
}
#[test]
fn three() {
assert_eq!(utils::indent(3), " ");
}
#[test]
fn four() {
assert_eq!(utils::indent(4), " ");
}
}

View File

@@ -1,5 +1,4 @@
mod detect_media_type;
mod domain_is_within_domain;
mod indent;
mod parse_content_type;
mod retrieve_asset;

View File

@@ -32,7 +32,6 @@ mod passing {
&Url::parse("data:text/html;base64,c291cmNl").unwrap(),
&Url::parse("data:text/html;base64,dGFyZ2V0").unwrap(),
&options,
0,
)
.unwrap();
assert_eq!(&media_type, "text/html");
@@ -75,7 +74,6 @@ mod passing {
))
.unwrap(),
&options,
0,
)
.unwrap();
assert_eq!(&media_type, "application/javascript");
@@ -124,7 +122,6 @@ mod failing {
&Url::parse("data:text/html;base64,SoUrCe").unwrap(),
&Url::parse("file:///etc/passwd").unwrap(),
&options,
0,
) {
Ok((..)) => {
assert!(false);
@@ -150,7 +147,6 @@ mod failing {
&Url::parse("https://kernel.org/").unwrap(),
&Url::parse("file:///etc/passwd").unwrap(),
&options,
0,
) {
Ok((..)) => {
assert!(false);