Compare commits
91 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
95040173fc | ||
|
|
b10d41f82e | ||
|
|
4c2c55d166 | ||
|
|
2dd1c465e4 | ||
|
|
a5afda9c80 | ||
|
|
ab6fed6d1f | ||
|
|
f8dcb335e7 | ||
|
|
913051870a | ||
|
|
614a518475 | ||
|
|
870a4b150e | ||
|
|
0533b287b7 | ||
|
|
4ba4285b6b | ||
|
|
2b9caf9840 | ||
|
|
8adf059980 | ||
|
|
8ad252868e | ||
|
|
e145df372f | ||
|
|
816b6175ac | ||
|
|
d89b4d5f5b | ||
|
|
15d98a7269 | ||
|
|
36e82cb511 | ||
|
|
1b1befd7b0 | ||
|
|
a2f59b4418 | ||
|
|
124a62920f | ||
|
|
f557504bed | ||
|
|
5ac520b4da | ||
|
|
7a97291498 | ||
|
|
38a6f963ad | ||
|
|
052f8f49ec | ||
|
|
08de486382 | ||
|
|
c0e0a69773 | ||
|
|
1636540693 | ||
|
|
3e80cb02ce | ||
|
|
a296531b3f | ||
|
|
8462b6bc31 | ||
|
|
92f38556b6 | ||
|
|
c0bdeab2e3 | ||
|
|
5a502eab4b | ||
|
|
19f08265a2 | ||
|
|
1d6392cb28 | ||
|
|
03cdc0e0b2 | ||
|
|
b98b7af0b4 | ||
|
|
73c35eaccb | ||
|
|
2c5d1e930b | ||
|
|
90f7c3a0d0 | ||
|
|
c1fec5967d | ||
|
|
09d41d2cf1 | ||
|
|
8f1da3c792 | ||
|
|
a8449a2b32 | ||
|
|
164e728ad3 | ||
|
|
8883bd6aca | ||
|
|
eae5d4dc6b | ||
|
|
ec85121d28 | ||
|
|
a8a85a4191 | ||
|
|
decd5b2119 | ||
|
|
bef6d848e9 | ||
|
|
4263e42cd1 | ||
|
|
23de5ced21 | ||
|
|
bc98aca2a2 | ||
|
|
438ebd520a | ||
|
|
ddb97009e9 | ||
|
|
6e67545b92 | ||
|
|
9e5d8ec691 | ||
|
|
fb835fae28 | ||
|
|
29bf042da0 | ||
|
|
d67483cf8e | ||
|
|
4140d8ebad | ||
|
|
2ac964fae5 | ||
|
|
ae5d6d2df4 | ||
|
|
2ed151d883 | ||
|
|
3cdfdc45d3 | ||
|
|
ac04af2cfc | ||
|
|
769953d7bd | ||
|
|
136dcc31cf | ||
|
|
44cac65a83 | ||
|
|
c3ca2ad1d5 | ||
|
|
0347fd3985 | ||
|
|
95d0083b3c | ||
|
|
3ce26b5fdd | ||
|
|
7f9458adfe | ||
|
|
5c229c51da | ||
|
|
f6ea16b3ad | ||
|
|
877b11d52c | ||
|
|
f9aac6f41b | ||
|
|
0a30c286fe | ||
|
|
ea56b9b4c1 | ||
|
|
e821591efe | ||
|
|
4e5d2fdc8d | ||
|
|
7c2ed2c9ca | ||
|
|
60d21ae071 | ||
|
|
bfdcd459e1 | ||
|
|
6c020dfa88 |
15
.github/workflows/cd.yml
vendored
15
.github/workflows/cd.yml
vendored
@@ -15,7 +15,7 @@ jobs:
|
||||
- run: git config --global core.autocrlf false
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v2
|
||||
- name: Build and install the executable
|
||||
- name: Build the executable
|
||||
run: cargo build --release
|
||||
- uses: Shopify/upload-to-release@1.0.0
|
||||
with:
|
||||
@@ -52,3 +52,16 @@ jobs:
|
||||
name: monolith-gnu-linux-armhf
|
||||
path: target/arm-unknown-linux-gnueabihf/release/monolith
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
gnu_linux_x86_64:
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v2
|
||||
- name: Build the executable
|
||||
run: cargo build --release
|
||||
- uses: Shopify/upload-to-release@1.0.0
|
||||
with:
|
||||
name: monolith-gnu-linux-x86_64
|
||||
path: target/release/monolith
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
1177
Cargo.lock
generated
1177
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
30
Cargo.toml
30
Cargo.toml
@@ -1,7 +1,6 @@
|
||||
[package]
|
||||
name = "monolith"
|
||||
version = "2.2.7"
|
||||
edition = "2018"
|
||||
version = "2.4.0"
|
||||
authors = [
|
||||
"Sunshine <sunshine@uberspace.net>",
|
||||
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
|
||||
@@ -9,22 +8,35 @@ authors = [
|
||||
"Emi Simpson <emi@alchemi.dev>",
|
||||
"rhysd <lin90162@yahoo.co.jp>",
|
||||
]
|
||||
edition = "2018"
|
||||
description = "CLI tool for saving web pages as a single HTML file"
|
||||
homepage = "https://github.com/Y2Z/monolith"
|
||||
repository = "https://github.com/Y2Z/monolith"
|
||||
readme = "README.md"
|
||||
keywords = ["web", "http", "html", "download", "command-line"]
|
||||
categories = ["command-line-utilities", "web-programming"]
|
||||
include = [
|
||||
"src/*.rs",
|
||||
"Cargo.toml",
|
||||
]
|
||||
license = "Unlicense"
|
||||
license-file = "LICENSE"
|
||||
|
||||
[dependencies]
|
||||
base64 = "0.12.0"
|
||||
clap = "2.33.0"
|
||||
base64 = "0.13.0"
|
||||
chrono = "0.4.19" # Used for formatting creation timestamp
|
||||
clap = "2.33.3"
|
||||
cssparser = "0.27.2"
|
||||
html5ever = "0.24.1"
|
||||
sha2 = "0.8.1" # Used in calculating checksums during integrity checks
|
||||
time = "0.1.42" # Used to render comments indicating the time the page was saved
|
||||
url = "2.1.1"
|
||||
regex = "1.4.2" # Used for parsing srcset
|
||||
sha2 = "0.9.2" # Used for calculating checksums during integrity checks
|
||||
url = "2.2.0"
|
||||
|
||||
[dependencies.reqwest]
|
||||
version = "0.10.*"
|
||||
version = "0.10.9"
|
||||
default-features = false
|
||||
features = ["default-tls", "blocking", "gzip"]
|
||||
|
||||
[dev-dependencies]
|
||||
assert_cmd = "0.12.0"
|
||||
assert_cmd = "1.0.1"
|
||||
tempfile = "3.1.0"
|
||||
|
||||
4
Makefile
4
Makefile
@@ -23,3 +23,7 @@ install:
|
||||
uninstall:
|
||||
@cargo uninstall
|
||||
.PHONY: uninstall
|
||||
|
||||
clean:
|
||||
@cargo clean
|
||||
.PHONY: clean
|
||||
|
||||
42
README.md
42
README.md
@@ -3,13 +3,13 @@
|
||||
[](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows)
|
||||
|
||||
```
|
||||
___ ___________ __________ ___________________ ___
|
||||
| \ / \ | | | | | |
|
||||
| \_/ __ \_| __ | | ___ ___ |__| |
|
||||
| | | | | | | | | | | |
|
||||
| |__| _ |__| |____| | | | | __ |
|
||||
| |\_/| | \ | | | | | | |
|
||||
|___| |__________| \____________________| |___| |___| |___|
|
||||
_____ ______________ __________ ___________________ ___
|
||||
| \ / \ | | | | | |
|
||||
| \_/ __ \_| __ | | ___ ___ |__| |
|
||||
| | | | | | | | | | | |
|
||||
| |\ /| |__| _ |__| |____| | | | | __ |
|
||||
| | \___/ | | \ | | | | | | |
|
||||
|___| |__________| \_____________________| |___| |___| |___|
|
||||
```
|
||||
|
||||
A data hoarder’s dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
|
||||
@@ -22,12 +22,22 @@ If compared to saving websites with `wget -mpk`, this tool embeds all assets as
|
||||
|
||||
## Installation
|
||||
|
||||
### Using Cargo
|
||||
$ cargo install monolith
|
||||
|
||||
#### Via Homebrew (on macOS and GNU/Linux)
|
||||
$ brew install monolith
|
||||
|
||||
#### Using Snapcraft (on GNU/Linux)
|
||||
$ snap install monolith
|
||||
|
||||
#### Using Ports collection (on FreeBSD and TrueOS)
|
||||
$ cd /usr/ports/www/monolith/
|
||||
$ make install clean
|
||||
|
||||
#### Using pre-built binaries (Windows, ARM-based devices, etc)
|
||||
Every [release](https://github.com/Y2Z/monolith/releases) contains pre-built binaries for Windows, GNU/Linux, as well as platforms with non-standart CPU architecture.
|
||||
|
||||
#### From source
|
||||
|
||||
Dependency: `libssl-dev`
|
||||
@@ -36,7 +46,7 @@ Dependency: `libssl-dev`
|
||||
$ cd monolith
|
||||
$ make install
|
||||
|
||||
#### With Docker
|
||||
#### Using Containers
|
||||
The guide can be found [here](docs/containers.md)
|
||||
|
||||
---------------------------------------------------
|
||||
@@ -47,21 +57,26 @@ The guide can be found [here](docs/containers.md)
|
||||
---------------------------------------------------
|
||||
|
||||
## Options
|
||||
- `-c`: Ignore styles
|
||||
- `-f`: Exclude frames and iframes
|
||||
- `-F`: Omit web fonts
|
||||
- `-a`: Exclude audio sources
|
||||
- `-b`: Use custom base URL
|
||||
- `-c`: Exclude CSS
|
||||
- `-e`: Ignore network errors
|
||||
- `-f`: Omit frames
|
||||
- `-F`: Exclude web fonts
|
||||
- `-i`: Remove images
|
||||
- `-I`: Isolate the document
|
||||
- `-j`: Exclude JavaScript
|
||||
- `-k`: Accept invalid X.509 (TLS) certificates
|
||||
- `-M`: Don't add timestamp and URL information
|
||||
- `-o`: Write output to file
|
||||
- `-s`: Enable silent mode
|
||||
- `-s`: Be quiet
|
||||
- `-t`: Adjust network request timeout
|
||||
- `-u`: Provide custom User-Agent
|
||||
- `-v`: Exclude videos
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
## HTTPS and HTTP proxies
|
||||
## Proxies
|
||||
Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables.
|
||||
|
||||
---------------------------------------------------
|
||||
@@ -75,6 +90,7 @@ Please open an issue if something is wrong, that helps make this project better.
|
||||
- `Monolith Chrome Extension`: https://github.com/rhysd/monolith-of-web
|
||||
- `Pagesaver`: https://github.com/distributed-mind/pagesaver
|
||||
- `Personal WayBack Machine`: https://github.com/popey/pwbm
|
||||
- `Hako`: https://github.com/dmpop/hako
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
|
||||
BIN
assets/icon/icon.blend
Normal file
BIN
assets/icon/icon.blend
Normal file
Binary file not shown.
BIN
assets/icon/icon.png
Normal file
BIN
assets/icon/icon.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.2 MiB |
@@ -1,4 +1,4 @@
|
||||
# 2. Network request timeout
|
||||
# 3. Network request timeout
|
||||
|
||||
Date: 2020-02-15
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# 4. Asset Minimization
|
||||
# 5. Asset Minimization
|
||||
|
||||
Date: 2020-03-14
|
||||
|
||||
|
||||
19
docs/arch/0006-reload-and-location-meta-tags.md
Normal file
19
docs/arch/0006-reload-and-location-meta-tags.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# 6. Reload and location `meta` tags
|
||||
|
||||
Date: 2020-06-25
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
HTML documents may contain `meta` tags capable of automatically refreshing the page or redirecting to another location.
|
||||
|
||||
## Decision
|
||||
|
||||
Since the resulting document is saved to disk and generally not intended to be served over the network, it only makes sense to remove `meta` tags that have `http-equiv` attribute equal to "Refresh" or "Location", in order to prevent them from reloading the page or redirecting to another location.
|
||||
|
||||
## Consequences
|
||||
|
||||
Monolith will ensure that saved documents do not contain `meta` tags capable of changing location or reloading the page.
|
||||
19
docs/arch/0007-network-errors.md
Normal file
19
docs/arch/0007-network-errors.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# 7. Network errors
|
||||
|
||||
Date: 2020-11-22
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
Servers may return information with HTTP response codes other than `200`, however those responses may still contain useful data.
|
||||
|
||||
## Decision
|
||||
|
||||
Fail by default, notifying of the network error. Add option to continue retrieving assets by treating all response codes as `200`.
|
||||
|
||||
## Consequences
|
||||
|
||||
Monolith will fail to obtain resources with status other than `200`, unless told to ignore network errors.
|
||||
40
docs/arch/0008-base-tag.md
Normal file
40
docs/arch/0008-base-tag.md
Normal file
@@ -0,0 +1,40 @@
|
||||
# 8. Base Tag
|
||||
|
||||
Date: 2020-12-25
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
HTML documents may contain `base` tag, which influences resolution of anchor links and relative URLs as well as dynamically loaded resources.
|
||||
|
||||
Sometimes, in order to make certain saved documents function closer to how they operate while being served from a remote server, the `base` tag specifying the source page's URL may need to be added to the document.
|
||||
|
||||
There can be only one such tag. If multiple `base` tags are present, only the first encountered tag ends up being used.
|
||||
|
||||
## Decision
|
||||
|
||||
Adding the `base` tag should be optional — saved documents should not contain the `base` tag unless it was specified by the user, or the document originally had the `base` tag in it.
|
||||
|
||||
Existing `href` attribute's value of the original `base` tag should be used for resolving the document's relative links instead of document's own URL (precisely the way browsers do it).
|
||||
|
||||
## Consequences
|
||||
|
||||
#### If the base tag does not exist in the source document
|
||||
|
||||
- If the base tag does not exist in the source document
|
||||
- With base URL option provided
|
||||
- use the specified base URL value to retrieve assets, keep original base URL value in the document
|
||||
- Without base URL option provided
|
||||
- download document as usual, do not add base tag
|
||||
- If the base tag already exists in the source document
|
||||
- With base URL option provided
|
||||
- we overwrite the original base URL before retrieving assets, keep new base URL value in the document
|
||||
- Without base URL option provided:
|
||||
- use the base URL from the original document to retrieve assets, keep original base URL value in the document
|
||||
|
||||
The program will obtain ability to retrieve remote assets for non-remote sources (such as data URLs and local files).
|
||||
|
||||
The program will obatin ability to get rid of existing base tag values (by provind an empty one).
|
||||
3
docs/references.md
Normal file
3
docs/references.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# References
|
||||
|
||||
- https://content-security-policy.com/
|
||||
10
docs/web-apps.md
Normal file
10
docs/web-apps.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# Web apps that can be saved with Monolith
|
||||
|
||||
These apps retain most or all of their functionality when saved with Monolith
|
||||
|
||||
|Converse|https://conversejs.org|
|
||||
|:--|:--|
|
||||
|Description|An XMPP client built using web technologies|
|
||||
|Functionality retained|**full**|
|
||||
|Command to use|`monolith https://conversejs.org/fullscreen.html > conversejs.html`|
|
||||
|Monolith version used|2.2.7|
|
||||
@@ -18,11 +18,11 @@ description: |
|
||||
|
||||
confinement: strict
|
||||
|
||||
# Building on armhf fails, so we specify all supported non-armhf architectures
|
||||
architectures:
|
||||
- build-on: amd64
|
||||
- build-on: i386
|
||||
- build-on: arm64
|
||||
- build-on: armhf
|
||||
- build-on: i386
|
||||
- build-on: ppc64el
|
||||
- build-on: s390x
|
||||
|
||||
|
||||
79
src/args.rs
79
src/args.rs
@@ -1,79 +0,0 @@
|
||||
use clap::{App, Arg};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct AppArgs {
|
||||
pub target: String,
|
||||
pub no_css: bool,
|
||||
pub no_fonts: bool,
|
||||
pub no_frames: bool,
|
||||
pub no_images: bool,
|
||||
pub no_js: bool,
|
||||
pub insecure: bool,
|
||||
pub isolate: bool,
|
||||
pub output: String,
|
||||
pub silent: bool,
|
||||
pub timeout: u64,
|
||||
pub user_agent: String,
|
||||
pub no_metadata: bool,
|
||||
}
|
||||
|
||||
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
|
||||
const DEFAULT_USER_AGENT: &str =
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
|
||||
|
||||
impl AppArgs {
|
||||
pub fn get() -> AppArgs {
|
||||
let app = App::new(env!("CARGO_PKG_NAME"))
|
||||
.version(crate_version!())
|
||||
.author(crate_authors!("\n"))
|
||||
.about(crate_description!())
|
||||
.arg(
|
||||
Arg::with_name("target")
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
.index(1)
|
||||
.help("URL or file path"),
|
||||
)
|
||||
// .args_from_usage("-a, --include-audio 'Removes audio sources'")
|
||||
.args_from_usage("-c, --no-css 'Removes CSS'")
|
||||
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
|
||||
.args_from_usage("-F, --no-fonts 'Removes fonts'")
|
||||
.args_from_usage("-i, --no-images 'Removes images'")
|
||||
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
|
||||
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
||||
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
|
||||
.args_from_usage("-M, --no-metadata 'Excludes metadata information from the document'")
|
||||
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
|
||||
.args_from_usage("-s, --silent 'Suppresses verbosity'")
|
||||
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
|
||||
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
|
||||
// .args_from_usage("-v, --include-video 'Removes video sources'")
|
||||
.get_matches();
|
||||
let mut app_args = AppArgs::default();
|
||||
// Process the command
|
||||
app_args.target = app
|
||||
.value_of("target")
|
||||
.expect("please set target")
|
||||
.to_string();
|
||||
app_args.no_css = app.is_present("no-css");
|
||||
app_args.no_fonts = app.is_present("no-fonts");
|
||||
app_args.no_frames = app.is_present("no-frames");
|
||||
app_args.no_images = app.is_present("no-images");
|
||||
app_args.no_js = app.is_present("no-js");
|
||||
app_args.insecure = app.is_present("insecure");
|
||||
app_args.no_metadata = app.is_present("no-metadata");
|
||||
app_args.isolate = app.is_present("isolate");
|
||||
app_args.silent = app.is_present("silent");
|
||||
app_args.timeout = app
|
||||
.value_of("timeout")
|
||||
.unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string())
|
||||
.parse::<u64>()
|
||||
.unwrap();
|
||||
app_args.output = app.value_of("output").unwrap_or("").to_string();
|
||||
app_args.user_agent = app
|
||||
.value_of("user-agent")
|
||||
.unwrap_or(DEFAULT_USER_AGENT)
|
||||
.to_string();
|
||||
app_args
|
||||
}
|
||||
}
|
||||
151
src/css.rs
151
src/css.rs
@@ -2,7 +2,9 @@ use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::utils::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset};
|
||||
use crate::opts::Options;
|
||||
use crate::url::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_with_fragment};
|
||||
use crate::utils::retrieve_asset;
|
||||
|
||||
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
|
||||
// Universal
|
||||
@@ -58,12 +60,11 @@ pub fn process_css<'a>(
|
||||
client: &Client,
|
||||
parent_url: &str,
|
||||
parser: &mut Parser,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
rule_name: &str,
|
||||
prop_name: &str,
|
||||
func_name: &str,
|
||||
opt_no_fonts: bool,
|
||||
opt_no_images: bool,
|
||||
opt_silent: bool,
|
||||
) -> Result<String, ParseError<'a, String>> {
|
||||
let mut result: String = str!();
|
||||
|
||||
@@ -90,7 +91,7 @@ pub fn process_css<'a>(
|
||||
Token::Colon => result.push_str(":"),
|
||||
Token::Comma => result.push_str(","),
|
||||
Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => {
|
||||
if opt_no_fonts && curr_rule == "font-face" {
|
||||
if options.no_fonts && curr_rule == "font-face" {
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -113,12 +114,11 @@ pub fn process_css<'a>(
|
||||
client,
|
||||
parent_url,
|
||||
parser,
|
||||
options,
|
||||
depth,
|
||||
rule_name,
|
||||
curr_prop.as_str(),
|
||||
func_name,
|
||||
opt_no_fonts,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
)
|
||||
})
|
||||
.unwrap();
|
||||
@@ -148,7 +148,7 @@ pub fn process_css<'a>(
|
||||
// @import, @font-face, @charset, @media...
|
||||
Token::AtKeyword(ref value) => {
|
||||
curr_rule = str!(value);
|
||||
if opt_no_fonts && curr_rule == "font-face" {
|
||||
if options.no_fonts && curr_rule == "font-face" {
|
||||
continue;
|
||||
}
|
||||
result.push_str("@");
|
||||
@@ -171,34 +171,42 @@ pub fn process_css<'a>(
|
||||
|
||||
let import_full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||
let import_url_fragment = get_url_fragment(import_full_url.clone());
|
||||
match retrieve_asset(cache, client, &parent_url, &import_full_url, opt_silent) {
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&parent_url,
|
||||
&import_full_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((import_contents, import_final_url, _import_media_type)) => {
|
||||
result.push_str(
|
||||
enquote(
|
||||
data_to_data_url(
|
||||
"text/css",
|
||||
embed_css(
|
||||
cache,
|
||||
client,
|
||||
&import_final_url,
|
||||
&String::from_utf8_lossy(&import_contents),
|
||||
opt_no_fonts,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
)
|
||||
.as_bytes(),
|
||||
&import_final_url,
|
||||
&import_url_fragment,
|
||||
),
|
||||
false,
|
||||
let import_data_url = data_to_data_url(
|
||||
"text/css",
|
||||
embed_css(
|
||||
cache,
|
||||
client,
|
||||
&import_final_url,
|
||||
&String::from_utf8_lossy(&import_contents),
|
||||
options,
|
||||
depth + 1,
|
||||
)
|
||||
.as_str(),
|
||||
.as_bytes(),
|
||||
&import_final_url,
|
||||
);
|
||||
let assembled_url: String = url_with_fragment(
|
||||
import_data_url.as_str(),
|
||||
import_url_fragment.as_str(),
|
||||
);
|
||||
result.push_str(enquote(assembled_url, false).as_str());
|
||||
}
|
||||
Err(_) => {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
if is_http_url(import_full_url.clone()) {
|
||||
result.push_str(enquote(import_full_url, false).as_str());
|
||||
let assembled_url: String = url_with_fragment(
|
||||
import_full_url.as_str(),
|
||||
import_url_fragment.as_str(),
|
||||
);
|
||||
result.push_str(enquote(assembled_url, false).as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -209,7 +217,7 @@ pub fn process_css<'a>(
|
||||
continue;
|
||||
}
|
||||
|
||||
if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
|
||||
if options.no_images && is_image_url_prop(curr_prop.as_str()) {
|
||||
result.push_str(enquote(str!(empty_image!()), false).as_str());
|
||||
} else {
|
||||
let resolved_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||
@@ -219,21 +227,23 @@ pub fn process_css<'a>(
|
||||
client,
|
||||
&parent_url,
|
||||
&resolved_url,
|
||||
opt_silent,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((data, final_url, media_type)) => {
|
||||
let data_url = data_to_data_url(
|
||||
&media_type,
|
||||
&data,
|
||||
&final_url,
|
||||
&url_fragment,
|
||||
);
|
||||
result.push_str(enquote(data_url, false).as_str());
|
||||
let data_url = data_to_data_url(&media_type, &data, &final_url);
|
||||
let assembled_url: String =
|
||||
url_with_fragment(data_url.as_str(), url_fragment.as_str());
|
||||
result.push_str(enquote(assembled_url, false).as_str());
|
||||
}
|
||||
Err(_) => {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
if is_http_url(resolved_url.clone()) {
|
||||
result.push_str(enquote(resolved_url, false).as_str());
|
||||
let assembled_url: String = url_with_fragment(
|
||||
resolved_url.as_str(),
|
||||
url_fragment.as_str(),
|
||||
);
|
||||
result.push_str(enquote(assembled_url, false).as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -261,7 +271,7 @@ pub fn process_css<'a>(
|
||||
if *has_sign && *unit_value >= 0. {
|
||||
result.push_str("+");
|
||||
}
|
||||
result.push_str(str!(unit_value * 100.).as_str());
|
||||
result.push_str(str!(unit_value * 100.0).as_str());
|
||||
result.push_str("%");
|
||||
}
|
||||
Token::Dimension {
|
||||
@@ -305,7 +315,8 @@ pub fn process_css<'a>(
|
||||
if is_import {
|
||||
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||
let url_fragment = get_url_fragment(full_url.clone());
|
||||
match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) {
|
||||
match retrieve_asset(cache, client, &parent_url, &full_url, options, depth + 1)
|
||||
{
|
||||
Ok((css, final_url, _media_type)) => {
|
||||
let data_url = data_to_data_url(
|
||||
"text/css",
|
||||
@@ -314,39 +325,51 @@ pub fn process_css<'a>(
|
||||
client,
|
||||
&final_url,
|
||||
&String::from_utf8_lossy(&css),
|
||||
opt_no_fonts,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
options,
|
||||
depth + 1,
|
||||
)
|
||||
.as_bytes(),
|
||||
&final_url,
|
||||
&url_fragment,
|
||||
);
|
||||
result.push_str(enquote(data_url, false).as_str());
|
||||
let assembled_url: String =
|
||||
url_with_fragment(data_url.as_str(), url_fragment.as_str());
|
||||
result.push_str(enquote(assembled_url, false).as_str());
|
||||
}
|
||||
Err(_) => {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
if is_http_url(full_url.clone()) {
|
||||
result.push_str(enquote(full_url, false).as_str());
|
||||
let assembled_url: String =
|
||||
url_with_fragment(full_url.as_str(), url_fragment.as_str());
|
||||
result.push_str(enquote(assembled_url, false).as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
|
||||
if is_image_url_prop(curr_prop.as_str()) && options.no_images {
|
||||
result.push_str(enquote(str!(empty_image!()), false).as_str());
|
||||
} else {
|
||||
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||
let url_fragment = get_url_fragment(full_url.clone());
|
||||
match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) {
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&parent_url,
|
||||
&full_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((data, final_url, media_type)) => {
|
||||
let data_url =
|
||||
data_to_data_url(&media_type, &data, &final_url, &url_fragment);
|
||||
result.push_str(enquote(data_url, false).as_str());
|
||||
let data_url = data_to_data_url(&media_type, &data, &final_url);
|
||||
let assembled_url: String =
|
||||
url_with_fragment(data_url.as_str(), url_fragment.as_str());
|
||||
result.push_str(enquote(assembled_url, false).as_str());
|
||||
}
|
||||
Err(_) => {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
if is_http_url(full_url.clone()) {
|
||||
result.push_str(enquote(full_url, false).as_str());
|
||||
let assembled_url: String =
|
||||
url_with_fragment(full_url.as_str(), url_fragment.as_str());
|
||||
result.push_str(enquote(assembled_url, false).as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -367,12 +390,11 @@ pub fn process_css<'a>(
|
||||
client,
|
||||
parent_url,
|
||||
parser,
|
||||
options,
|
||||
depth,
|
||||
curr_rule.as_str(),
|
||||
curr_prop.as_str(),
|
||||
function_name,
|
||||
opt_no_fonts,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
)
|
||||
})
|
||||
.unwrap();
|
||||
@@ -384,6 +406,11 @@ pub fn process_css<'a>(
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure empty CSS is really empty
|
||||
if result.len() > 0 && result.trim().len() == 0 {
|
||||
result = result.trim().to_string()
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
@@ -392,9 +419,8 @@ pub fn embed_css(
|
||||
client: &Client,
|
||||
parent_url: &str,
|
||||
css: &str,
|
||||
opt_no_fonts: bool,
|
||||
opt_no_images: bool,
|
||||
opt_silent: bool,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
) -> String {
|
||||
let mut input = ParserInput::new(&css);
|
||||
let mut parser = Parser::new(&mut input);
|
||||
@@ -404,12 +430,11 @@ pub fn embed_css(
|
||||
client,
|
||||
parent_url,
|
||||
&mut parser,
|
||||
options,
|
||||
depth,
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
opt_no_fonts,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
1957
src/html.rs
1957
src/html.rs
File diff suppressed because it is too large
Load Diff
@@ -1,9 +1,14 @@
|
||||
#[macro_use]
|
||||
extern crate clap;
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
|
||||
pub mod css;
|
||||
pub mod html;
|
||||
pub mod js;
|
||||
pub mod opts;
|
||||
pub mod url;
|
||||
pub mod utils;
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
200
src/main.rs
200
src/main.rs
@@ -1,22 +1,24 @@
|
||||
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
|
||||
use monolith::utils::{data_url_to_data, is_data_url, is_file_url, is_http_url, retrieve_asset};
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||
use reqwest::Url;
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::io::{self, Error, Write};
|
||||
use std::io::{self, prelude::*, Error, Write};
|
||||
use std::path::Path;
|
||||
use std::process;
|
||||
use std::time::Duration;
|
||||
|
||||
mod args;
|
||||
mod macros;
|
||||
use monolith::html::{
|
||||
add_favicon, create_metadata_tag, get_base_url, has_favicon, html_to_dom, set_base_url,
|
||||
stringify_document, walk_and_embed_assets,
|
||||
};
|
||||
use monolith::opts::Options;
|
||||
use monolith::url::{
|
||||
data_to_data_url, is_data_url, is_file_url, is_http_url, parse_data_url, resolve_url,
|
||||
};
|
||||
use monolith::utils::retrieve_asset;
|
||||
|
||||
#[macro_use]
|
||||
extern crate clap;
|
||||
use crate::args::AppArgs;
|
||||
mod macros;
|
||||
|
||||
enum Output {
|
||||
Stdout(io::Stdout),
|
||||
@@ -25,7 +27,7 @@ enum Output {
|
||||
|
||||
impl Output {
|
||||
fn new(file_path: &str) -> Result<Output, Error> {
|
||||
if file_path.is_empty() {
|
||||
if file_path.is_empty() || file_path.eq("-") {
|
||||
Ok(Output::Stdout(io::stdout()))
|
||||
} else {
|
||||
Ok(Output::File(fs::File::create(file_path)?))
|
||||
@@ -46,12 +48,22 @@ impl Output {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_stdin() -> String {
|
||||
let mut buffer = String::new();
|
||||
for line in io::stdin().lock().lines() {
|
||||
buffer += line.unwrap_or_default().as_str();
|
||||
buffer += "\n";
|
||||
}
|
||||
buffer
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let app_args = AppArgs::get();
|
||||
let original_target: &str = &app_args.target;
|
||||
let options = Options::from_args();
|
||||
let original_target: &str = &options.target;
|
||||
let target_url: &str;
|
||||
let base_url;
|
||||
let dom;
|
||||
let mut base_url: String;
|
||||
let mut dom;
|
||||
let mut use_stdin: bool = false;
|
||||
|
||||
// Pre-process the input
|
||||
let cwd_normalized: String =
|
||||
@@ -60,16 +72,26 @@ fn main() {
|
||||
let mut target: String = str!(original_target.clone()).replace("\\", "/");
|
||||
let path_is_relative: bool = path.is_relative();
|
||||
|
||||
// Determine exact target URL
|
||||
if target.clone().len() == 0 {
|
||||
eprintln!("No target specified");
|
||||
if !options.silent {
|
||||
eprintln!("No target specified");
|
||||
}
|
||||
process::exit(1);
|
||||
} else if target.clone() == "-" {
|
||||
// Read from pipe (stdin)
|
||||
use_stdin = true;
|
||||
// Default target URL to empty data URL; the user can control it via --base-url
|
||||
target_url = "data:text/html,"
|
||||
} else if is_http_url(target.clone()) || is_data_url(target.clone()) {
|
||||
target_url = target.as_str();
|
||||
} else if is_file_url(target.clone()) {
|
||||
target_url = target.as_str();
|
||||
} else if path.exists() {
|
||||
if !path.is_file() {
|
||||
eprintln!("Local target is not a file: {}", original_target);
|
||||
if !options.silent {
|
||||
eprintln!("Local target is not a file: {}", original_target);
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" });
|
||||
@@ -86,106 +108,122 @@ fn main() {
|
||||
target_url = target.as_str();
|
||||
}
|
||||
|
||||
let mut output = Output::new(&app_args.output).expect("Could not prepare output");
|
||||
// Define output
|
||||
let mut output = Output::new(&options.output).expect("Could not prepare output");
|
||||
|
||||
// Initialize client
|
||||
let mut cache = HashMap::new();
|
||||
let mut header_map = HeaderMap::new();
|
||||
header_map.insert(
|
||||
USER_AGENT,
|
||||
HeaderValue::from_str(&app_args.user_agent).expect("Invalid User-Agent header specified"),
|
||||
HeaderValue::from_str(&options.user_agent).expect("Invalid User-Agent header specified"),
|
||||
);
|
||||
|
||||
let timeout: u64 = if app_args.timeout > 0 {
|
||||
app_args.timeout
|
||||
let timeout: u64 = if options.timeout > 0 {
|
||||
options.timeout
|
||||
} else {
|
||||
std::u64::MAX / 4
|
||||
};
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(timeout))
|
||||
.danger_accept_invalid_certs(app_args.insecure)
|
||||
.danger_accept_invalid_certs(options.insecure)
|
||||
.default_headers(header_map)
|
||||
.build()
|
||||
.expect("Failed to initialize HTTP client");
|
||||
|
||||
// Retrieve root document
|
||||
if is_file_url(target_url) || is_http_url(target_url) {
|
||||
match retrieve_asset(&mut cache, &client, target_url, target_url, app_args.silent) {
|
||||
// At this stage we assume that the base URL is the same as the target URL
|
||||
base_url = str!(target_url);
|
||||
|
||||
// Retrieve target document
|
||||
if use_stdin {
|
||||
dom = html_to_dom(&read_stdin());
|
||||
} else if is_file_url(target_url) || is_http_url(target_url) {
|
||||
match retrieve_asset(&mut cache, &client, target_url, target_url, &options, 0) {
|
||||
Ok((data, final_url, _media_type)) => {
|
||||
base_url = final_url;
|
||||
if options.base_url.clone().unwrap_or(str!()).is_empty() {
|
||||
base_url = final_url
|
||||
}
|
||||
dom = html_to_dom(&String::from_utf8_lossy(&data));
|
||||
}
|
||||
Err(_) => {
|
||||
eprintln!("Could not retrieve target document");
|
||||
if !options.silent {
|
||||
eprintln!("Could not retrieve target document");
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
} else if is_data_url(target_url) {
|
||||
let (media_type, data): (String, Vec<u8>) = data_url_to_data(target_url);
|
||||
let (media_type, data): (String, Vec<u8>) = parse_data_url(target_url);
|
||||
if !media_type.eq_ignore_ascii_case("text/html") {
|
||||
eprintln!("Unsupported data URL media type");
|
||||
if !options.silent {
|
||||
eprintln!("Unsupported data URL media type");
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
base_url = str!(target_url);
|
||||
dom = html_to_dom(&String::from_utf8_lossy(&data));
|
||||
} else {
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
let time_saved = time::now_utc();
|
||||
|
||||
walk_and_embed_assets(
|
||||
&mut cache,
|
||||
&client,
|
||||
&base_url,
|
||||
&dom.document,
|
||||
app_args.no_css,
|
||||
app_args.no_fonts,
|
||||
app_args.no_frames,
|
||||
app_args.no_js,
|
||||
app_args.no_images,
|
||||
app_args.silent,
|
||||
);
|
||||
|
||||
let mut html: String = stringify_document(
|
||||
&dom.document,
|
||||
app_args.no_css,
|
||||
app_args.no_frames,
|
||||
app_args.no_js,
|
||||
app_args.no_images,
|
||||
app_args.isolate,
|
||||
);
|
||||
|
||||
if !app_args.no_metadata {
|
||||
// Safe to unwrap (we just put this through an HTTP request)
|
||||
let mut clean_url = Url::parse(&base_url).unwrap();
|
||||
clean_url.set_fragment(None);
|
||||
// Prevent credentials from getting into metadata
|
||||
if is_http_url(&base_url) {
|
||||
// Only HTTP(S) URLs may feature credentials
|
||||
clean_url.set_username("").unwrap();
|
||||
clean_url.set_password(None).unwrap();
|
||||
}
|
||||
let metadata_comment = if is_http_url(&base_url) {
|
||||
format!(
|
||||
"<!-- Saved from {} at {} using {} v{} -->\n",
|
||||
&clean_url,
|
||||
time_saved.rfc3339(),
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
// Use custom base URL if specified, read and use what's in the DOM otherwise
|
||||
if !options.base_url.clone().unwrap_or(str!()).is_empty() {
|
||||
if is_data_url(options.base_url.clone().unwrap()) {
|
||||
if !options.silent {
|
||||
eprintln!("Data URLs cannot be used as base URL");
|
||||
}
|
||||
process::exit(1);
|
||||
} else {
|
||||
format!(
|
||||
"<!-- Saved from local source at {} using {} v{} -->\n",
|
||||
time_saved.rfc3339(),
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
};
|
||||
html.insert_str(0, &metadata_comment);
|
||||
base_url = options.base_url.clone().unwrap();
|
||||
}
|
||||
} else {
|
||||
if let Some(existing_base_url) = get_base_url(&dom.document) {
|
||||
base_url = resolve_url(target_url, existing_base_url).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
// Embed remote assets
|
||||
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
|
||||
|
||||
// Update or add new BASE tag to reroute network requests and hash-links in the final document
|
||||
if let Some(new_base_url) = options.base_url.clone() {
|
||||
dom = set_base_url(&dom.document, new_base_url);
|
||||
}
|
||||
|
||||
// Request and embed /favicon.ico (unless it's already linked in the document)
|
||||
if !options.no_images && is_http_url(target_url) && !has_favicon(&dom.document) {
|
||||
let favicon_ico_url: String = resolve_url(&base_url, "/favicon.ico").unwrap();
|
||||
|
||||
match retrieve_asset(
|
||||
&mut cache,
|
||||
&client,
|
||||
&base_url,
|
||||
&favicon_ico_url,
|
||||
&options,
|
||||
0,
|
||||
) {
|
||||
Ok((data, final_url, media_type)) => {
|
||||
let favicon_data_url: String = data_to_data_url(&media_type, &data, &final_url);
|
||||
dom = add_favicon(&dom.document, favicon_data_url);
|
||||
}
|
||||
Err(_) => {
|
||||
// Failed to retrieve favicon.ico
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Serialize DOM tree
|
||||
let mut result: String = stringify_document(&dom.document, &options);
|
||||
|
||||
// Add metadata tag
|
||||
if !options.no_metadata {
|
||||
let metadata_comment: String = create_metadata_tag(&target_url);
|
||||
result.insert_str(0, &metadata_comment);
|
||||
if metadata_comment.len() > 0 {
|
||||
result.insert_str(metadata_comment.len(), "\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Write result into stdout or file
|
||||
output
|
||||
.writeln_str(&html)
|
||||
.writeln_str(&result)
|
||||
.expect("Could not write HTML output");
|
||||
}
|
||||
|
||||
102
src/opts.rs
Normal file
102
src/opts.rs
Normal file
@@ -0,0 +1,102 @@
|
||||
use clap::{App, Arg};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Options {
|
||||
pub no_audio: bool,
|
||||
pub base_url: Option<String>,
|
||||
pub no_css: bool,
|
||||
pub ignore_errors: bool,
|
||||
pub no_frames: bool,
|
||||
pub no_fonts: bool,
|
||||
pub no_images: bool,
|
||||
pub isolate: bool,
|
||||
pub no_js: bool,
|
||||
pub insecure: bool,
|
||||
pub no_metadata: bool,
|
||||
pub output: String,
|
||||
pub silent: bool,
|
||||
pub timeout: u64,
|
||||
pub user_agent: String,
|
||||
pub no_video: bool,
|
||||
pub target: String,
|
||||
}
|
||||
|
||||
const ASCII: &str = " \
|
||||
_____ ______________ __________ ___________________ ___
|
||||
| \\ / \\ | | | | | |
|
||||
| \\_/ __ \\_| __ | | ___ ___ |__| |
|
||||
| | | | | | | | | | | |
|
||||
| |\\ /| |__| _ |__| |____| | | | | __ |
|
||||
| | \\___/ | | \\ | | | | | | |
|
||||
|___| |__________| \\_____________________| |___| |___| |___|
|
||||
";
|
||||
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
|
||||
const DEFAULT_USER_AGENT: &str =
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
|
||||
|
||||
impl Options {
|
||||
pub fn from_args() -> Options {
|
||||
let app = App::new(env!("CARGO_PKG_NAME"))
|
||||
.version(crate_version!())
|
||||
.author(crate_authors!("\n"))
|
||||
.about(format!("{}\n{}", ASCII, crate_description!()).as_str())
|
||||
.args_from_usage("-a, --no-audio 'Removes audio sources'")
|
||||
.args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'")
|
||||
.args_from_usage("-c, --no-css 'Removes CSS'")
|
||||
.args_from_usage("-e, --ignore-errors 'Ignore network errors'")
|
||||
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
|
||||
.args_from_usage("-F, --no-fonts 'Removes fonts'")
|
||||
.args_from_usage("-i, --no-images 'Removes images'")
|
||||
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
|
||||
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
||||
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
|
||||
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
|
||||
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
|
||||
.args_from_usage("-s, --silent 'Suppresses verbosity'")
|
||||
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
|
||||
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
|
||||
.args_from_usage("-v, --no-video 'Removes video sources'")
|
||||
.arg(
|
||||
Arg::with_name("target")
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
.index(1)
|
||||
.help("URL or file path"),
|
||||
)
|
||||
.get_matches();
|
||||
let mut options: Options = Options::default();
|
||||
|
||||
// Process the command
|
||||
options.target = app
|
||||
.value_of("target")
|
||||
.expect("please set target")
|
||||
.to_string();
|
||||
options.no_audio = app.is_present("no-audio");
|
||||
if let Some(base_url) = app.value_of("base-url") {
|
||||
options.base_url = Some(str!(base_url));
|
||||
}
|
||||
options.no_css = app.is_present("no-css");
|
||||
options.ignore_errors = app.is_present("ignore-errors");
|
||||
options.no_frames = app.is_present("no-frames");
|
||||
options.no_fonts = app.is_present("no-fonts");
|
||||
options.no_images = app.is_present("no-images");
|
||||
options.isolate = app.is_present("isolate");
|
||||
options.no_js = app.is_present("no-js");
|
||||
options.insecure = app.is_present("insecure");
|
||||
options.no_metadata = app.is_present("no-metadata");
|
||||
options.output = app.value_of("output").unwrap_or("").to_string();
|
||||
options.silent = app.is_present("silent");
|
||||
options.timeout = app
|
||||
.value_of("timeout")
|
||||
.unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string())
|
||||
.parse::<u64>()
|
||||
.unwrap();
|
||||
options.user_agent = app
|
||||
.value_of("user-agent")
|
||||
.unwrap_or(DEFAULT_USER_AGENT)
|
||||
.to_string();
|
||||
options.no_video = app.is_present("no-video");
|
||||
|
||||
options
|
||||
}
|
||||
}
|
||||
123
src/tests/cli/base_url.rs
Normal file
123
src/tests/cli/base_url.rs
Normal file
@@ -0,0 +1,123 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use assert_cmd::prelude::*;
|
||||
use std::env;
|
||||
use std::process::Command;
|
||||
|
||||
#[test]
|
||||
fn add_new_when_provided() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-b")
|
||||
.arg("http://localhost:8000/")
|
||||
.arg("data:text/html,Hello%2C%20World!")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain newly added base URL
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head>\
|
||||
<base href=\"http://localhost:8000/\"></base>\
|
||||
</head><body>Hello, World!</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keep_existing_when_none_provided() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain newly added base URL
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head>\
|
||||
<base href=\"http://localhost:8000/\">\
|
||||
</head><body>Hello, World!</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn override_existing_when_provided() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-b")
|
||||
.arg("http://localhost/")
|
||||
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain newly added base URL
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head>\
|
||||
<base href=\"http://localhost/\">\
|
||||
</head><body>Hello, World!</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_existing_when_empty_provided() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-b")
|
||||
.arg("")
|
||||
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain newly added base URL
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head>\
|
||||
<base href=\"\">\
|
||||
</head><body>Hello, World!</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -128,13 +128,41 @@ mod passing {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_fonts_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-F")
|
||||
.arg("data:text/html,<style>@font-face { font-family: myFont; src: url(font.woff); }</style>Hi")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no web fonts
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"font-src 'none';\"></meta>\
|
||||
<style></style>\
|
||||
</head><body>Hi</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-f")
|
||||
.arg("data:text/html,<iframe src=\"https://google.com\"></iframe>Hi")
|
||||
.arg("data:text/html,<iframe src=\"https://duckduckgo.com\"></iframe>Hi")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
@@ -142,7 +170,7 @@ mod passing {
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
|
||||
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
|
||||
);
|
||||
|
||||
@@ -259,8 +287,8 @@ mod passing {
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
{file}{cwd}/src/tests/data/basic/local-file.html\n\
|
||||
{file}{cwd}/src/tests/data/basic/local-style.css\n\
|
||||
{file}{cwd}/src/tests/data/basic/local-file.html\n \
|
||||
{file}{cwd}/src/tests/data/basic/local-style.css\n \
|
||||
{file}{cwd}/src/tests/data/basic/local-script.js\n\
|
||||
",
|
||||
file = file_url_protocol,
|
||||
@@ -309,7 +337,7 @@ mod passing {
|
||||
<title>Local HTML file</title>\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\">\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||
<img alt=\"\" src=\"{empty_image}\">\n \
|
||||
<img src=\"{empty_image}\" alt=\"\">\n \
|
||||
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||
<script></script>\n\n\n\n\
|
||||
@@ -371,7 +399,7 @@ mod passing {
|
||||
<title>Local HTML file</title>\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\">\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||
<img alt=\"\" src=\"{empty_image}\">\n \
|
||||
<img src=\"{empty_image}\" alt=\"\">\n \
|
||||
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||
<script></script>\n\n\n\n\
|
||||
@@ -461,7 +489,7 @@ mod passing {
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
{file}{html_path}\n\
|
||||
{file}{html_path}\n \
|
||||
{file}{svg_path}\n\
|
||||
",
|
||||
file = file_url_prefix,
|
||||
@@ -512,9 +540,9 @@ mod passing {
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
{file}{html_path}\n\
|
||||
{file}{css_path}\n\
|
||||
{file}{css_path}\n\
|
||||
{file}{html_path}\n \
|
||||
{file}{css_path}\n \
|
||||
{file}{css_path}\n \
|
||||
{file}{css_path}\n\
|
||||
",
|
||||
file = file_url_prefix,
|
||||
2
src/tests/cli/mod.rs
Normal file
2
src/tests/cli/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
mod base_url;
|
||||
mod basic;
|
||||
@@ -7,17 +7,36 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::css;
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::css;
|
||||
use crate::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn empty_input() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let options = Options::default();
|
||||
|
||||
assert_eq!(css::embed_css(cache, &client, "", "", &options, 0), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trim_if_empty() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let options = Options::default();
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, "", "", false, false, false,),
|
||||
css::embed_css(
|
||||
cache,
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
"\t \t ",
|
||||
&options,
|
||||
0,
|
||||
),
|
||||
""
|
||||
);
|
||||
}
|
||||
@@ -26,6 +45,9 @@ mod passing {
|
||||
fn style_exclude_unquoted_images() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
|
||||
const STYLE: &str = "/* border: none;*/\
|
||||
background-image: url(https://somewhere.com/bg.png); \
|
||||
@@ -41,9 +63,8 @@ mod passing {
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&STYLE,
|
||||
false,
|
||||
true,
|
||||
true,
|
||||
&options,
|
||||
0,
|
||||
),
|
||||
format!(
|
||||
"/* border: none;*/\
|
||||
@@ -62,6 +83,9 @@ mod passing {
|
||||
fn style_exclude_single_quoted_images() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
|
||||
const STYLE: &str = "/* border: none;*/\
|
||||
background-image: url('https://somewhere.com/bg.png'); \
|
||||
@@ -72,7 +96,7 @@ mod passing {
|
||||
height: calc(100vh - 10pt)";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, "", &STYLE, false, true, true,),
|
||||
css::embed_css(cache, &client, "", &STYLE, &options, 0),
|
||||
format!(
|
||||
"/* border: none;*/\
|
||||
background-image: url('{empty_image}'); \
|
||||
@@ -90,6 +114,8 @@ mod passing {
|
||||
fn style_block() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
const CSS: &str = "\
|
||||
#id.class-name:not(:nth-child(3n+0)) {\n \
|
||||
@@ -100,7 +126,7 @@ mod passing {
|
||||
html > body {}";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, "file:///", &CSS, false, false, true,),
|
||||
css::embed_css(cache, &client, "file:///", &CSS, &options, 0),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
@@ -109,6 +135,8 @@ mod passing {
|
||||
fn attribute_selectors() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
const CSS: &str = "\
|
||||
[data-value] {
|
||||
@@ -140,16 +168,15 @@ mod passing {
|
||||
}
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, "", &CSS, false, false, false,),
|
||||
CSS
|
||||
);
|
||||
assert_eq!(css::embed_css(cache, &client, "", &CSS, &options, 0), CSS);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_string() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
const CSS: &str = "\
|
||||
@charset 'UTF-8';\n\
|
||||
@@ -165,9 +192,8 @@ mod passing {
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&CSS,
|
||||
false,
|
||||
false,
|
||||
true,
|
||||
&options,
|
||||
0,
|
||||
),
|
||||
"\
|
||||
@charset 'UTF-8';\n\
|
||||
@@ -183,6 +209,8 @@ mod passing {
|
||||
fn hash_urls() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
const CSS: &str = "\
|
||||
body {\n \
|
||||
@@ -200,9 +228,8 @@ mod passing {
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&CSS,
|
||||
false,
|
||||
false,
|
||||
true,
|
||||
&options,
|
||||
0,
|
||||
),
|
||||
CSS
|
||||
);
|
||||
@@ -212,6 +239,8 @@ mod passing {
|
||||
fn transform_percentages_and_degrees() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
const CSS: &str = "\
|
||||
div {\n \
|
||||
@@ -227,9 +256,8 @@ mod passing {
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&CSS,
|
||||
false,
|
||||
false,
|
||||
true,
|
||||
&options,
|
||||
0,
|
||||
),
|
||||
CSS
|
||||
);
|
||||
@@ -239,6 +267,8 @@ mod passing {
|
||||
fn unusual_indents() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
const CSS: &str = "\
|
||||
.is\\:good:hover {\n \
|
||||
@@ -256,9 +286,8 @@ mod passing {
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&CSS,
|
||||
false,
|
||||
false,
|
||||
true,
|
||||
&options,
|
||||
0,
|
||||
),
|
||||
CSS
|
||||
);
|
||||
@@ -268,6 +297,9 @@ mod passing {
|
||||
fn exclude_fonts() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.no_fonts = true;
|
||||
options.silent = true;
|
||||
|
||||
const CSS: &str = "\
|
||||
@font-face {\n \
|
||||
@@ -309,9 +341,8 @@ mod passing {
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&CSS,
|
||||
true,
|
||||
false,
|
||||
true,
|
||||
&options,
|
||||
0,
|
||||
),
|
||||
CSS_OUT
|
||||
);
|
||||
|
||||
23
src/tests/data/import-css-via-data-url/index.html
Normal file
23
src/tests/data/import-css-via-data-url/index.html
Normal file
@@ -0,0 +1,23 @@
|
||||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<title>Attempt to import CSS via data URL asset</title>
|
||||
<style>
|
||||
|
||||
body {
|
||||
background-color: white;
|
||||
color: black;
|
||||
}
|
||||
|
||||
</style>
|
||||
<link href="data:text/css;base64,QGltcG9ydCAic3R5bGUuY3NzIjsK" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<p>If you see pink background with white foreground then we’re in trouble</p>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
4
src/tests/data/import-css-via-data-url/style.css
Normal file
4
src/tests/data/import-css-via-data-url/style.css
Normal file
@@ -0,0 +1,4 @@
|
||||
body {
|
||||
background-color: pink;
|
||||
color: white;
|
||||
}
|
||||
19
src/tests/data/integrity/index.html
Normal file
19
src/tests/data/integrity/index.html
Normal file
@@ -0,0 +1,19 @@
|
||||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<meta http-equiv="Content-Security-Policy" content="default-src 'unsafe-inline' file:;" />
|
||||
<title>Local HTML file</title>
|
||||
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-IWaCTORHkRhOWzcZeILSVmV6V6gPTHgNem6o6rsFAyaKTieDFkeeMrWjtO0DuWrX3bqZY46CVTZXUu0mia0qXQ==" crossorigin="anonymous" />
|
||||
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-vWBzl4NE9oIg8NFOPAyOZbaam0UXWr6aDHPaY2kodSzAFl+mKoj/RMNc6C31NDqK4mE2i68IWxYWqWJPLCgPOw==" crossorigin="anonymous" />
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<p>This page should have black background and white foreground, but only when served via http: (not via file:)</p>
|
||||
<script src="script.js" integrity="sha256-ecrEsYh3+ICCX8BCrNSotXgI5534282JwJjx8Q9ZWLc="></script>
|
||||
<script src="script.js" integrity="sha256-6idk9dK0bOkVdG7Oz4/0YLXSJya8xZHqbRZKMhYrt6o="></script>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
3
src/tests/data/integrity/script.js
Normal file
3
src/tests/data/integrity/script.js
Normal file
@@ -0,0 +1,3 @@
|
||||
function noop() {
|
||||
console.log("monolith");
|
||||
}
|
||||
4
src/tests/data/integrity/style.css
Normal file
4
src/tests/data/integrity/style.css
Normal file
@@ -0,0 +1,4 @@
|
||||
body {
|
||||
background-color: #000;
|
||||
color: #FFF;
|
||||
}
|
||||
29
src/tests/html/add_favicon.rs
Normal file
29
src/tests/html/add_favicon.rs
Normal file
@@ -0,0 +1,29 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use html5ever::serialize::{serialize, SerializeOpts};
|
||||
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let html = "<div>text</div>";
|
||||
let mut dom = html::html_to_dom(&html);
|
||||
|
||||
dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string());
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head><link rel=\"icon\" href=\"I_AM_A_FAVICON_DATA_URL\"></link></head><body><div>text</div></body></html>"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -11,7 +11,7 @@ mod passing {
|
||||
|
||||
#[test]
|
||||
fn empty_input_sha256() {
|
||||
assert!(html::has_proper_integrity(
|
||||
assert!(html::check_integrity(
|
||||
"".as_bytes(),
|
||||
"sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="
|
||||
));
|
||||
@@ -19,7 +19,7 @@ mod passing {
|
||||
|
||||
#[test]
|
||||
fn sha256() {
|
||||
assert!(html::has_proper_integrity(
|
||||
assert!(html::check_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha256-9EWAHgy4mSYsm54hmDaIDXPKLRsLnBX7lZyQ6xISNOM="
|
||||
));
|
||||
@@ -27,7 +27,7 @@ mod passing {
|
||||
|
||||
#[test]
|
||||
fn sha384() {
|
||||
assert!(html::has_proper_integrity(
|
||||
assert!(html::check_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha384-gc9l7omltke8C33bedgh15E12M7RrAQa5t63Yb8APlpe7ZhiqV23+oqiulSJl3Kw"
|
||||
));
|
||||
@@ -35,7 +35,7 @@ mod passing {
|
||||
|
||||
#[test]
|
||||
fn sha512() {
|
||||
assert!(html::has_proper_integrity(
|
||||
assert!(html::check_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha512-zG5B88cYMqcdiMi9gz0XkOFYw2BpjeYdn5V6+oFrMgSNjRpqL7EF8JEwl17ztZbK3N7I/tTwp3kxQbN1RgFBww=="
|
||||
));
|
||||
@@ -55,20 +55,17 @@ mod failing {
|
||||
|
||||
#[test]
|
||||
fn empty_hash() {
|
||||
assert!(!html::has_proper_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
""
|
||||
));
|
||||
assert!(!html::check_integrity("abcdef0123456789".as_bytes(), ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_input_empty_hash() {
|
||||
assert!(!html::has_proper_integrity("".as_bytes(), ""));
|
||||
assert!(!html::check_integrity("".as_bytes(), ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sha256() {
|
||||
assert!(!html::has_proper_integrity(
|
||||
assert!(!html::check_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha256-badhash"
|
||||
));
|
||||
@@ -76,7 +73,7 @@ mod failing {
|
||||
|
||||
#[test]
|
||||
fn sha384() {
|
||||
assert!(!html::has_proper_integrity(
|
||||
assert!(!html::check_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha384-badhash"
|
||||
));
|
||||
@@ -84,7 +81,7 @@ mod failing {
|
||||
|
||||
#[test]
|
||||
fn sha512() {
|
||||
assert!(!html::has_proper_integrity(
|
||||
assert!(!html::check_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha512-badhash"
|
||||
));
|
||||
80
src/tests/html/compose_csp.rs
Normal file
80
src/tests/html/compose_csp.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
use crate::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn isolated() {
|
||||
let mut options = Options::default();
|
||||
options.isolate = true;
|
||||
let csp_content = html::compose_csp(&options);
|
||||
|
||||
assert_eq!(csp_content, "default-src 'unsafe-inline' data:;");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_css() {
|
||||
let mut options = Options::default();
|
||||
options.no_css = true;
|
||||
let csp_content = html::compose_csp(&options);
|
||||
|
||||
assert_eq!(csp_content, "style-src 'none';");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_fonts() {
|
||||
let mut options = Options::default();
|
||||
options.no_fonts = true;
|
||||
let csp_content = html::compose_csp(&options);
|
||||
|
||||
assert_eq!(csp_content, "font-src 'none';");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_frames() {
|
||||
let mut options = Options::default();
|
||||
options.no_frames = true;
|
||||
let csp_content = html::compose_csp(&options);
|
||||
|
||||
assert_eq!(csp_content, "frame-src 'none'; child-src 'none';");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_js() {
|
||||
let mut options = Options::default();
|
||||
options.no_js = true;
|
||||
let csp_content = html::compose_csp(&options);
|
||||
|
||||
assert_eq!(csp_content, "script-src 'none';");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_images() {
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
let csp_content = html::compose_csp(&options);
|
||||
|
||||
assert_eq!(csp_content, "img-src data:;");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn all() {
|
||||
let mut options = Options::default();
|
||||
options.isolate = true;
|
||||
options.no_css = true;
|
||||
options.no_fonts = true;
|
||||
options.no_frames = true;
|
||||
options.no_js = true;
|
||||
options.no_images = true;
|
||||
let csp_content = html::compose_csp(&options);
|
||||
|
||||
assert_eq!(csp_content, "default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;");
|
||||
}
|
||||
}
|
||||
82
src/tests/html/create_metadata_tag.rs
Normal file
82
src/tests/html/create_metadata_tag.rs
Normal file
@@ -0,0 +1,82 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use chrono::prelude::*;
|
||||
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn http_url() {
|
||||
let url = "http://192.168.1.1/";
|
||||
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||
let metadata_comment: String = html::create_metadata_tag(url);
|
||||
|
||||
assert_eq!(
|
||||
metadata_comment,
|
||||
format!(
|
||||
"<!-- Saved from {} at {} using {} v{} -->",
|
||||
&url,
|
||||
timestamp,
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn file_url() {
|
||||
let url = "file:///home/monolith/index.html";
|
||||
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||
let metadata_comment: String = html::create_metadata_tag(url);
|
||||
|
||||
assert_eq!(
|
||||
metadata_comment,
|
||||
format!(
|
||||
"<!-- Saved from local source at {} using {} v{} -->",
|
||||
timestamp,
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn data_url() {
|
||||
let url = "data:text/html,Hello%2C%20World!";
|
||||
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||
let metadata_comment: String = html::create_metadata_tag(url);
|
||||
|
||||
assert_eq!(
|
||||
metadata_comment,
|
||||
format!(
|
||||
"<!-- Saved from local source at {} using {} v{} -->",
|
||||
timestamp,
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn empty_string() {
|
||||
assert_eq!(html::create_metadata_tag(""), "");
|
||||
}
|
||||
}
|
||||
@@ -7,20 +7,94 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::html;
|
||||
use crate::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn replace_with_empty_images() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let srcset_value = "small.png 1x, large.png 2x";
|
||||
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, true, true);
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
|
||||
|
||||
assert_eq!(
|
||||
embedded_css,
|
||||
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),
|
||||
embedded_css
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn commas_within_file_names() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let srcset_value = "small,s.png 1x, large,l.png 2x";
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
|
||||
|
||||
assert_eq!(
|
||||
embedded_css,
|
||||
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tabs_and_newlines_after_commas() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let srcset_value = "small,s.png 1x,\nmedium,m.png 2x,\nlarge,l.png 3x";
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
|
||||
|
||||
assert_eq!(
|
||||
embedded_css,
|
||||
format!(
|
||||
"{} 1x, {} 2x, {} 3x",
|
||||
empty_image!(),
|
||||
empty_image!(),
|
||||
empty_image!()
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::html;
|
||||
use crate::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn trailing_comma() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let srcset_value = "small.png 1x, large.png 2x,";
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
|
||||
|
||||
assert_eq!(
|
||||
embedded_css,
|
||||
format!("{} 1x, {} 2x,", empty_image!(), empty_image!()),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
104
src/tests/html/get_base_url.rs
Normal file
104
src/tests/html/get_base_url.rs
Normal file
@@ -0,0 +1,104 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn present() {
|
||||
let html = "<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<base href=\"https://musicbrainz.org\" />
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
|
||||
assert_eq!(
|
||||
html::get_base_url(&dom.document),
|
||||
Some(str!("https://musicbrainz.org"))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_tags() {
|
||||
let html = "<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<base href=\"https://www.discogs.com/\" />
|
||||
<base href=\"https://musicbrainz.org\" />
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
|
||||
assert_eq!(
|
||||
html::get_base_url(&dom.document),
|
||||
Some(str!("https://www.discogs.com/"))
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn absent() {
|
||||
let html = "<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
|
||||
assert_eq!(html::get_base_url(&dom.document), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_href() {
|
||||
let html = "<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<base />
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
|
||||
assert_eq!(html::get_base_url(&dom.document), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_href() {
|
||||
let html = "<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<base href=\"\" />
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
|
||||
assert_eq!(html::get_base_url(&dom.document), Some(str!()));
|
||||
}
|
||||
}
|
||||
54
src/tests/html/get_node_attr.rs
Normal file
54
src/tests/html/get_node_attr.rs
Normal file
@@ -0,0 +1,54 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use html5ever::rcdom::{Handle, NodeData};
|
||||
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn div_two_style_attributes() {
|
||||
let html = "<!doctype html><html><head></head><body><DIV STYLE=\"color: blue;\" style=\"display: none;\"></div></body></html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let mut count = 0;
|
||||
|
||||
fn test_walk(node: &Handle, i: &mut i8) {
|
||||
*i += 1;
|
||||
|
||||
match &node.data {
|
||||
NodeData::Document => {
|
||||
// Dig deeper
|
||||
for child in node.children.borrow().iter() {
|
||||
test_walk(child, &mut *i);
|
||||
}
|
||||
}
|
||||
NodeData::Element { ref name, .. } => {
|
||||
let node_name = name.local.as_ref().to_string();
|
||||
|
||||
if node_name == "body" {
|
||||
assert_eq!(html::get_node_attr(node, "class"), None);
|
||||
} else if node_name == "div" {
|
||||
assert_eq!(
|
||||
html::get_node_attr(node, "style"),
|
||||
Some(str!("color: blue;"))
|
||||
);
|
||||
}
|
||||
|
||||
for child in node.children.borrow().iter() {
|
||||
test_walk(child, &mut *i);
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
}
|
||||
|
||||
test_walk(&dom.document, &mut count);
|
||||
|
||||
assert_eq!(count, 6);
|
||||
}
|
||||
}
|
||||
@@ -7,11 +7,12 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
use html5ever::rcdom::{Handle, NodeData};
|
||||
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn get_node_name() {
|
||||
fn parent_node_names() {
|
||||
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let mut count = 0;
|
||||
|
||||
50
src/tests/html/has_favicon.rs
Normal file
50
src/tests/html/has_favicon.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn icon() {
|
||||
let html = "<link rel=\"icon\" href=\"\" /><div>text</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let res: bool = html::has_favicon(&dom.document);
|
||||
|
||||
assert!(res);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shortcut_icon() {
|
||||
let html = "<link rel=\"shortcut icon\" href=\"\" /><div>text</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let res: bool = html::has_favicon(&dom.document);
|
||||
|
||||
assert!(res);
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn absent() {
|
||||
let html = "<div>text</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let res: bool = html::has_favicon(&dom.document);
|
||||
|
||||
assert!(!res);
|
||||
}
|
||||
}
|
||||
@@ -23,16 +23,6 @@ mod passing {
|
||||
fn icon_uppercase() {
|
||||
assert!(html::is_icon("ICON"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mask_icon() {
|
||||
assert!(html::is_icon("mask-icon"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fluid_icon() {
|
||||
assert!(html::is_icon("fluid-icon"));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
@@ -46,6 +36,16 @@ mod passing {
|
||||
mod failing {
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn mask_icon() {
|
||||
assert!(!html::is_icon("mask-icon"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fluid_icon() {
|
||||
assert!(!html::is_icon("fluid-icon"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stylesheet() {
|
||||
assert!(!html::is_icon("stylesheet"));
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
mod add_favicon;
|
||||
mod check_integrity;
|
||||
mod compose_csp;
|
||||
mod create_metadata_tag;
|
||||
mod embed_srcset;
|
||||
mod get_base_url;
|
||||
mod get_node_attr;
|
||||
mod get_node_name;
|
||||
mod has_proper_integrity;
|
||||
mod has_favicon;
|
||||
mod is_icon;
|
||||
mod set_node_attr;
|
||||
mod stringify_document;
|
||||
mod walk_and_embed_assets;
|
||||
|
||||
105
src/tests/html/set_node_attr.rs
Normal file
105
src/tests/html/set_node_attr.rs
Normal file
@@ -0,0 +1,105 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use html5ever::rcdom::{Handle, NodeData};
|
||||
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn html_lang_and_body_style() {
|
||||
let html = "<!doctype html><html lang=\"en\"><head></head><body></body></html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let mut count = 0;
|
||||
|
||||
fn test_walk(node: &Handle, i: &mut i8) {
|
||||
*i += 1;
|
||||
|
||||
match &node.data {
|
||||
NodeData::Document => {
|
||||
// Dig deeper
|
||||
for child in node.children.borrow().iter() {
|
||||
test_walk(child, &mut *i);
|
||||
}
|
||||
}
|
||||
NodeData::Element { ref name, .. } => {
|
||||
let node_name = name.local.as_ref().to_string();
|
||||
|
||||
if node_name == "html" {
|
||||
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("en")));
|
||||
|
||||
html::set_node_attr(node, "lang", Some(str!("de")));
|
||||
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("de")));
|
||||
|
||||
html::set_node_attr(node, "lang", None);
|
||||
assert_eq!(html::get_node_attr(node, "lang"), None);
|
||||
|
||||
html::set_node_attr(node, "lang", Some(str!("")));
|
||||
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("")));
|
||||
} else if node_name == "body" {
|
||||
assert_eq!(html::get_node_attr(node, "style"), None);
|
||||
|
||||
html::set_node_attr(node, "style", Some(str!("display: none;")));
|
||||
assert_eq!(
|
||||
html::get_node_attr(node, "style"),
|
||||
Some(str!("display: none;"))
|
||||
);
|
||||
}
|
||||
|
||||
for child in node.children.borrow().iter() {
|
||||
test_walk(child, &mut *i);
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
}
|
||||
|
||||
test_walk(&dom.document, &mut count);
|
||||
|
||||
assert_eq!(count, 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn body_background() {
|
||||
let html = "<!doctype html><html lang=\"en\"><head></head><body background=\"1\" background=\"2\"></body></html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let mut count = 0;
|
||||
|
||||
fn test_walk(node: &Handle, i: &mut i8) {
|
||||
*i += 1;
|
||||
|
||||
match &node.data {
|
||||
NodeData::Document => {
|
||||
// Dig deeper
|
||||
for child in node.children.borrow().iter() {
|
||||
test_walk(child, &mut *i);
|
||||
}
|
||||
}
|
||||
NodeData::Element { ref name, .. } => {
|
||||
let node_name = name.local.as_ref().to_string();
|
||||
|
||||
if node_name == "body" {
|
||||
assert_eq!(html::get_node_attr(node, "background"), Some(str!("1")));
|
||||
|
||||
html::set_node_attr(node, "background", None);
|
||||
assert_eq!(html::get_node_attr(node, "background"), None);
|
||||
}
|
||||
|
||||
for child in node.children.borrow().iter() {
|
||||
test_walk(child, &mut *i);
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
}
|
||||
|
||||
test_walk(&dom.document, &mut count);
|
||||
|
||||
assert_eq!(count, 5);
|
||||
}
|
||||
}
|
||||
@@ -8,27 +8,16 @@
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
use crate::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn div_as_root_element() {
|
||||
let html = "<div><script src=\"some.js\"></script></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_isolate: bool = false;
|
||||
let options = Options::default();
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_isolate,
|
||||
),
|
||||
html::stringify_document(&dom.document, &options),
|
||||
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
|
||||
);
|
||||
}
|
||||
@@ -40,21 +29,13 @@ mod passing {
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
<div><script src=\"some.js\"></script></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_isolate: bool = true;
|
||||
let mut options = Options::default();
|
||||
options.isolate = true;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_isolate,
|
||||
&options
|
||||
),
|
||||
"<html>\
|
||||
<head>\
|
||||
@@ -79,22 +60,11 @@ mod passing {
|
||||
<link rel=\"stylesheet\" href=\"main.css\"/>\
|
||||
<div style=\"display: none;\"></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
|
||||
let opt_no_css: bool = true;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_isolate: bool = false;
|
||||
let mut options = Options::default();
|
||||
options.no_css = true;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_isolate,
|
||||
),
|
||||
html::stringify_document(&dom.document, &options),
|
||||
"<!DOCTYPE html>\
|
||||
<html>\
|
||||
<head>\
|
||||
@@ -114,26 +84,18 @@ mod passing {
|
||||
<link rel=\"something\"/>\
|
||||
<div><script src=\"some.js\"></script></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = true;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_isolate: bool = false;
|
||||
let mut options = Options::default();
|
||||
options.no_frames = true;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_isolate,
|
||||
&options
|
||||
),
|
||||
"<!DOCTYPE html>\
|
||||
<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
|
||||
<title>Frameless document</title>\
|
||||
<link rel=\"something\">\
|
||||
</head>\
|
||||
@@ -149,31 +111,28 @@ mod passing {
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
<link rel=\"stylesheet\" href=\"some.css\">\
|
||||
<div>\
|
||||
<script src=\"some.js\"></script>\
|
||||
<img style=\"width: 100%;\" src=\"some.png\" />\
|
||||
<iframe src=\"some.html\"></iframe>\
|
||||
<script src=\"some.js\"></script>\
|
||||
<img style=\"width: 100%;\" src=\"some.png\" />\
|
||||
<iframe src=\"some.html\"></iframe>\
|
||||
</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
|
||||
let opt_isolate: bool = true;
|
||||
let opt_no_css: bool = true;
|
||||
let opt_no_frames: bool = true;
|
||||
let opt_no_js: bool = true;
|
||||
let opt_no_images: bool = true;
|
||||
let mut options = Options::default();
|
||||
options.isolate = true;
|
||||
options.no_css = true;
|
||||
options.no_fonts = true;
|
||||
options.no_frames = true;
|
||||
options.no_js = true;
|
||||
options.no_images = true;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_isolate,
|
||||
&options
|
||||
),
|
||||
"<!DOCTYPE html>\
|
||||
<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; frame-src 'none';child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
|
||||
<title>no-frame no-css no-js no-image isolated document</title>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
<link rel=\"stylesheet\" href=\"some.css\">\
|
||||
|
||||
@@ -7,11 +7,13 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
use html5ever::serialize::{serialize, SerializeOpts};
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::html;
|
||||
use crate::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let cache = &mut HashMap::new();
|
||||
@@ -20,27 +22,12 @@ mod passing {
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
@@ -58,27 +45,12 @@ mod passing {
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
@@ -96,27 +68,12 @@ mod passing {
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
@@ -136,26 +93,13 @@ mod passing {
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = true;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
let mut options = Options::default();
|
||||
options.no_css = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
@@ -182,27 +126,13 @@ mod passing {
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = true;
|
||||
let opt_silent = true;
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
@@ -211,15 +141,15 @@ mod passing {
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
format!(
|
||||
"<html>\
|
||||
<head>\
|
||||
<link rel=\"icon\">\
|
||||
</head>\
|
||||
<body>\
|
||||
<div>\
|
||||
<img src=\"{empty_image}\">\
|
||||
</div>\
|
||||
</body>\
|
||||
</html>",
|
||||
<head>\
|
||||
<link rel=\"icon\">\
|
||||
</head>\
|
||||
<body>\
|
||||
<div>\
|
||||
<img src=\"{empty_image}\">\
|
||||
</div>\
|
||||
</body>\
|
||||
</html>",
|
||||
empty_image = empty_image!()
|
||||
)
|
||||
);
|
||||
@@ -233,27 +163,13 @@ mod passing {
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = true;
|
||||
let opt_silent = true;
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
@@ -271,26 +187,13 @@ mod passing {
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = true;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
let mut options = Options::default();
|
||||
options.no_frames = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
@@ -308,26 +211,13 @@ mod passing {
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = true;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
let mut options = Options::default();
|
||||
options.no_frames = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
@@ -341,34 +231,20 @@ mod passing {
|
||||
#[test]
|
||||
fn no_js() {
|
||||
let html = "<div onClick=\"void(0)\">\
|
||||
<script src=\"http://localhost/assets/some.js\"></script>\
|
||||
<script>alert(1)</script>\
|
||||
<script src=\"http://localhost/assets/some.js\"></script>\
|
||||
<script>alert(1)</script>\
|
||||
</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = true;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
let mut options = Options::default();
|
||||
options.no_js = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
@@ -381,33 +257,24 @@ mod passing {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn with_no_integrity() {
|
||||
fn discards_integrity() {
|
||||
let html = "<title>No integrity</title>\
|
||||
<link integrity=\"sha384-...\" rel=\"something\"/>\
|
||||
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let opt_no_css: bool = true;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = true;
|
||||
let opt_no_js: bool = true;
|
||||
let opt_no_images: bool = true;
|
||||
let opt_silent = true;
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
let mut options = Options::default();
|
||||
options.no_css = true;
|
||||
options.no_frames = true;
|
||||
options.no_js = true;
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
@@ -415,8 +282,47 @@ mod passing {
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html>\
|
||||
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
|
||||
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
|
||||
<body></body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_unwanted_meta_tags() {
|
||||
let html = "<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Refresh\" value=\"20\"/>\
|
||||
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
|
||||
</head>\
|
||||
<body></body>\
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.no_css = true;
|
||||
options.no_frames = true;
|
||||
options.no_js = true;
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"disabled by monolith (Refresh)\" value=\"20\">\
|
||||
<meta http-equiv=\"disabled by monolith (Location)\" value=\"https://freebsd.org\">\
|
||||
</head>\
|
||||
<body></body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
@@ -3,4 +3,6 @@ mod css;
|
||||
mod html;
|
||||
mod js;
|
||||
mod macros;
|
||||
mod opts;
|
||||
mod url;
|
||||
mod utils;
|
||||
|
||||
32
src/tests/opts.rs
Normal file
32
src/tests/opts.rs
Normal file
@@ -0,0 +1,32 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn defaults() {
|
||||
let options: Options = Options::default();
|
||||
|
||||
assert_eq!(options.target, str!());
|
||||
assert_eq!(options.no_audio, false);
|
||||
assert_eq!(options.no_css, false);
|
||||
assert_eq!(options.no_frames, false);
|
||||
assert_eq!(options.no_fonts, false);
|
||||
assert_eq!(options.no_images, false);
|
||||
assert_eq!(options.isolate, false);
|
||||
assert_eq!(options.no_js, false);
|
||||
assert_eq!(options.insecure, false);
|
||||
assert_eq!(options.no_metadata, false);
|
||||
assert_eq!(options.output, str!());
|
||||
assert_eq!(options.silent, false);
|
||||
assert_eq!(options.timeout, 0);
|
||||
assert_eq!(options.user_agent, "");
|
||||
assert_eq!(options.no_video, false);
|
||||
}
|
||||
}
|
||||
@@ -7,12 +7,12 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::utils;
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn removes_fragment() {
|
||||
assert_eq!(
|
||||
utils::clean_url("https://somewhere.com/font.eot#iefix"),
|
||||
url::clean_url("https://somewhere.com/font.eot#iefix"),
|
||||
"https://somewhere.com/font.eot"
|
||||
);
|
||||
}
|
||||
@@ -20,7 +20,7 @@ mod passing {
|
||||
#[test]
|
||||
fn removes_empty_fragment() {
|
||||
assert_eq!(
|
||||
utils::clean_url("https://somewhere.com/font.eot#"),
|
||||
url::clean_url("https://somewhere.com/font.eot#"),
|
||||
"https://somewhere.com/font.eot"
|
||||
);
|
||||
}
|
||||
@@ -28,7 +28,7 @@ mod passing {
|
||||
#[test]
|
||||
fn removes_empty_query_and_empty_fragment() {
|
||||
assert_eq!(
|
||||
utils::clean_url("https://somewhere.com/font.eot?#"),
|
||||
url::clean_url("https://somewhere.com/font.eot?#"),
|
||||
"https://somewhere.com/font.eot"
|
||||
);
|
||||
}
|
||||
@@ -36,7 +36,7 @@ mod passing {
|
||||
#[test]
|
||||
fn removes_empty_query_amp_and_empty_fragment() {
|
||||
assert_eq!(
|
||||
utils::clean_url("https://somewhere.com/font.eot?a=b&#"),
|
||||
url::clean_url("https://somewhere.com/font.eot?a=b&#"),
|
||||
"https://somewhere.com/font.eot?a=b"
|
||||
);
|
||||
}
|
||||
@@ -44,7 +44,7 @@ mod passing {
|
||||
#[test]
|
||||
fn keeps_credentials() {
|
||||
assert_eq!(
|
||||
utils::clean_url("https://cookie:monster@gibson.internet/"),
|
||||
url::clean_url("https://cookie:monster@gibson.internet/"),
|
||||
"https://cookie:monster@gibson.internet/"
|
||||
);
|
||||
}
|
||||
@@ -7,13 +7,13 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::utils;
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn encode_string_with_specific_media_type() {
|
||||
let mime = "application/javascript";
|
||||
let data = "var word = 'hello';\nalert(word);\n";
|
||||
let data_url = utils::data_to_data_url(mime, data.as_bytes(), "", "");
|
||||
let data_url = url::data_to_data_url(mime, data.as_bytes(), "");
|
||||
|
||||
assert_eq!(
|
||||
&data_url,
|
||||
@@ -24,8 +24,8 @@ mod passing {
|
||||
#[test]
|
||||
fn encode_append_fragment() {
|
||||
let data = "<svg></svg>\n";
|
||||
let data_url = utils::data_to_data_url("text/css", data.as_bytes(), "", "fragment");
|
||||
let data_url = url::data_to_data_url("image/svg+xml", data.as_bytes(), "");
|
||||
|
||||
assert_eq!(&data_url, "data:text/css;base64,PHN2Zz48L3N2Zz4K#fragment");
|
||||
assert_eq!(&data_url, "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K");
|
||||
}
|
||||
}
|
||||
@@ -7,12 +7,12 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::utils;
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn decode_unicode_characters() {
|
||||
assert_eq!(
|
||||
utils::decode_url(str!(
|
||||
url::decode_url(str!(
|
||||
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
|
||||
)),
|
||||
"検ヒム解塗ゃッ = サ"
|
||||
@@ -22,7 +22,7 @@ mod passing {
|
||||
#[test]
|
||||
fn decode_file_url() {
|
||||
assert_eq!(
|
||||
utils::decode_url(str!("file:///tmp/space%20here/test%231.html")),
|
||||
url::decode_url(str!("file:///tmp/space%20here/test%231.html")),
|
||||
"file:///tmp/space here/test#1.html"
|
||||
);
|
||||
}
|
||||
@@ -30,7 +30,7 @@ mod passing {
|
||||
#[test]
|
||||
fn plus_sign() {
|
||||
assert_eq!(
|
||||
utils::decode_url(str!(
|
||||
url::decode_url(str!(
|
||||
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
|
||||
)),
|
||||
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
|
||||
@@ -7,18 +7,18 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::utils;
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn remove_protocl_and_fragment() {
|
||||
if cfg!(windows) {
|
||||
assert_eq!(
|
||||
utils::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"),
|
||||
url::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"),
|
||||
"C:\\documents\\some-path\\some-file.svg"
|
||||
);
|
||||
} else {
|
||||
assert_eq!(
|
||||
utils::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"),
|
||||
url::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"),
|
||||
"/tmp/some-path/some-file.svg"
|
||||
);
|
||||
}
|
||||
@@ -28,12 +28,12 @@ mod passing {
|
||||
fn decodes_urls() {
|
||||
if cfg!(windows) {
|
||||
assert_eq!(
|
||||
utils::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
|
||||
url::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
|
||||
"C:\\Documents and Settings\\some-file.html"
|
||||
);
|
||||
} else {
|
||||
assert_eq!(
|
||||
utils::file_url_to_fs_path("file:///home/user/My%20Documents"),
|
||||
url::file_url_to_fs_path("file:///home/user/My%20Documents"),
|
||||
"/home/user/My Documents"
|
||||
);
|
||||
}
|
||||
48
src/tests/url/get_url_fragment.rs
Normal file
48
src/tests/url/get_url_fragment.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn data_url() {
|
||||
assert_eq!(
|
||||
url::get_url_fragment(
|
||||
"data:image/svg+xml;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h#test"
|
||||
),
|
||||
"test"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn https_empty() {
|
||||
assert_eq!(url::get_url_fragment("https://kernel.org#"), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_fragment() {
|
||||
assert_eq!(url::get_url_fragment("https://kernel.org"), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dummy_data_url() {
|
||||
assert_eq!(url::get_url_fragment("data:text/html,"), "");
|
||||
}
|
||||
}
|
||||
@@ -7,18 +7,18 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::utils;
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn data_url_text_html() {
|
||||
assert!(utils::is_data_url(
|
||||
assert!(url::is_data_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn data_url_no_media_type() {
|
||||
assert!(utils::is_data_url(
|
||||
assert!(url::is_data_url(
|
||||
"data:;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
}
|
||||
@@ -33,20 +33,20 @@ mod passing {
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::utils;
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn https_url() {
|
||||
assert!(!utils::is_data_url("https://kernel.org"));
|
||||
assert!(!url::is_data_url("https://kernel.org"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_protocol_url() {
|
||||
assert!(!utils::is_data_url("//kernel.org"));
|
||||
assert!(!url::is_data_url("//kernel.org"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_string() {
|
||||
assert!(!utils::is_data_url(""));
|
||||
assert!(!url::is_data_url(""));
|
||||
}
|
||||
}
|
||||
@@ -7,32 +7,32 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::utils;
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn unix_file_url() {
|
||||
assert!(utils::is_file_url(
|
||||
assert!(url::is_file_url(
|
||||
"file:///home/user/Websites/my-website/index.html"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn windows_file_url() {
|
||||
assert!(utils::is_file_url(
|
||||
assert!(url::is_file_url(
|
||||
"file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unix_url_with_backslashes() {
|
||||
assert!(utils::is_file_url(
|
||||
assert!(url::is_file_url(
|
||||
"file:\\\\\\home\\user\\Websites\\my-website\\index.html"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn windows_file_url_with_backslashes() {
|
||||
assert!(utils::is_file_url(
|
||||
assert!(url::is_file_url(
|
||||
"file:\\\\\\C:\\Documents%20and%20Settings\\user\\Websites\\my-website\\assets\\images\\logo.png"
|
||||
));
|
||||
}
|
||||
@@ -47,37 +47,37 @@ mod passing {
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::utils;
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn url_with_no_protocl() {
|
||||
assert!(!utils::is_file_url("//kernel.org"));
|
||||
assert!(!url::is_file_url("//kernel.org"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dot_slash_filename() {
|
||||
assert!(!utils::is_file_url("./index.html"));
|
||||
assert!(!url::is_file_url("./index.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn just_filename() {
|
||||
assert!(!utils::is_file_url("some-local-page.htm"));
|
||||
assert!(!url::is_file_url("some-local-page.htm"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn https_ip_port_url() {
|
||||
assert!(!utils::is_file_url("https://1.2.3.4:80/www/index.html"));
|
||||
assert!(!url::is_file_url("https://1.2.3.4:80/www/index.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn data_url() {
|
||||
assert!(!utils::is_file_url(
|
||||
assert!(!url::is_file_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn just_word_file() {
|
||||
assert!(!utils::is_file_url("file"));
|
||||
assert!(!url::is_file_url("file"));
|
||||
}
|
||||
}
|
||||
@@ -7,21 +7,21 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::utils;
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn http_url() {
|
||||
assert!(utils::is_http_url("http://kernel.org"));
|
||||
assert!(url::is_http_url("http://kernel.org"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn https_url() {
|
||||
assert!(utils::is_http_url("https://www.rust-lang.org/"));
|
||||
assert!(url::is_http_url("https://www.rust-lang.org/"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn http_url_with_backslashes() {
|
||||
assert!(utils::is_http_url("http:\\\\freebsd.org\\"));
|
||||
assert!(url::is_http_url("http:\\\\freebsd.org\\"));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,31 +34,31 @@ mod passing {
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::utils;
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn url_with_no_protocol() {
|
||||
assert!(!utils::is_http_url("//kernel.org"));
|
||||
assert!(!url::is_http_url("//kernel.org"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dot_slash_filename() {
|
||||
assert!(!utils::is_http_url("./index.html"));
|
||||
assert!(!url::is_http_url("./index.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn just_filename() {
|
||||
assert!(!utils::is_http_url("some-local-page.htm"));
|
||||
assert!(!url::is_http_url("some-local-page.htm"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn https_ip_port_url() {
|
||||
assert!(!utils::is_http_url("ftp://1.2.3.4/www/index.html"));
|
||||
assert!(!url::is_http_url("ftp://1.2.3.4/www/index.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn data_url() {
|
||||
assert!(!utils::is_http_url(
|
||||
assert!(!url::is_http_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
}
|
||||
12
src/tests/url/mod.rs
Normal file
12
src/tests/url/mod.rs
Normal file
@@ -0,0 +1,12 @@
|
||||
mod clean_url;
|
||||
mod data_to_data_url;
|
||||
mod decode_url;
|
||||
mod file_url_to_fs_path;
|
||||
mod get_url_fragment;
|
||||
mod is_data_url;
|
||||
mod is_file_url;
|
||||
mod is_http_url;
|
||||
mod parse_data_url;
|
||||
mod resolve_url;
|
||||
mod url_has_protocol;
|
||||
mod url_with_fragment;
|
||||
@@ -7,11 +7,11 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::utils;
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn parse_text_html_base64() {
|
||||
let (media_type, data) = utils::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
|
||||
let (media_type, data) = url::parse_data_url("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
|
||||
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(
|
||||
@@ -22,7 +22,7 @@ mod passing {
|
||||
|
||||
#[test]
|
||||
fn parse_text_html_utf8() {
|
||||
let (media_type, data) = utils::data_url_to_data(
|
||||
let (media_type, data) = url::parse_data_url(
|
||||
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
|
||||
);
|
||||
|
||||
@@ -35,7 +35,7 @@ mod passing {
|
||||
|
||||
#[test]
|
||||
fn parse_text_html_plaintext() {
|
||||
let (media_type, data) = utils::data_url_to_data(
|
||||
let (media_type, data) = url::parse_data_url(
|
||||
"data:text/html,Work expands so as to fill the time available for its completion",
|
||||
);
|
||||
|
||||
@@ -48,7 +48,7 @@ mod passing {
|
||||
|
||||
#[test]
|
||||
fn parse_text_html_charset_utf_8_between_two_whitespaces() {
|
||||
let (media_type, data) = utils::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
|
||||
let (media_type, data) = url::parse_data_url(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
|
||||
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(
|
||||
@@ -59,8 +59,7 @@ mod passing {
|
||||
|
||||
#[test]
|
||||
fn parse_text_css_url_encoded() {
|
||||
let (media_type, data) =
|
||||
utils::data_url_to_data("data:text/css,div{background-color:%23000}");
|
||||
let (media_type, data) = url::parse_data_url("data:text/css,div{background-color:%23000}");
|
||||
|
||||
assert_eq!(media_type, "text/css");
|
||||
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
|
||||
@@ -68,7 +67,7 @@ mod passing {
|
||||
|
||||
#[test]
|
||||
fn parse_no_media_type_base64() {
|
||||
let (media_type, data) = utils::data_url_to_data("data:;base64,dGVzdA==");
|
||||
let (media_type, data) = url::parse_data_url("data:;base64,dGVzdA==");
|
||||
|
||||
assert_eq!(media_type, "");
|
||||
assert_eq!(String::from_utf8_lossy(&data), "test");
|
||||
@@ -76,7 +75,7 @@ mod passing {
|
||||
|
||||
#[test]
|
||||
fn parse_no_media_type_no_encoding() {
|
||||
let (media_type, data) = utils::data_url_to_data("data:;,test%20test");
|
||||
let (media_type, data) = url::parse_data_url("data:;,test%20test");
|
||||
|
||||
assert_eq!(media_type, "");
|
||||
assert_eq!(String::from_utf8_lossy(&data), "test test");
|
||||
@@ -92,11 +91,11 @@ mod passing {
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::utils;
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn just_word_data() {
|
||||
let (media_type, data) = utils::data_url_to_data("data");
|
||||
let (media_type, data) = url::parse_data_url("data");
|
||||
|
||||
assert_eq!(media_type, "");
|
||||
assert_eq!(String::from_utf8_lossy(&data), "");
|
||||
194
src/tests/url/resolve_url.rs
Normal file
194
src/tests/url/resolve_url.rs
Normal file
@@ -0,0 +1,194 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn from_https_to_level_up_relative() {
|
||||
assert_eq!(
|
||||
url::resolve_url("https://www.kernel.org", "../category/signatures.html")
|
||||
.unwrap_or_default(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_just_filename_to_full_https_url() {
|
||||
assert_eq!(
|
||||
url::resolve_url(
|
||||
"saved_page.htm",
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
)
|
||||
.unwrap_or_default(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_https_url_to_url_with_no_protocol() {
|
||||
assert_eq!(
|
||||
url::resolve_url(
|
||||
"https://www.kernel.org",
|
||||
"//www.kernel.org/theme/images/logos/tux.png",
|
||||
)
|
||||
.unwrap_or_default(),
|
||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() {
|
||||
assert_eq!(
|
||||
url::resolve_url(
|
||||
"https://www.kernel.org",
|
||||
"//another-host.org/theme/images/logos/tux.png",
|
||||
)
|
||||
.unwrap_or_default(),
|
||||
"https://another-host.org/theme/images/logos/tux.png"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_https_url_to_relative_root_path() {
|
||||
assert_eq!(
|
||||
url::resolve_url(
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
"/theme/images/logos/tux.png",
|
||||
)
|
||||
.unwrap_or_default(),
|
||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_https_to_just_filename() {
|
||||
assert_eq!(
|
||||
url::resolve_url(
|
||||
"https://www.w3schools.com/html/html_iframe.asp",
|
||||
"default.asp",
|
||||
)
|
||||
.unwrap_or_default(),
|
||||
"https://www.w3schools.com/html/default.asp"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_data_url_to_https() {
|
||||
assert_eq!(
|
||||
url::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
)
|
||||
.unwrap_or_default(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_data_url_to_data_url() {
|
||||
assert_eq!(
|
||||
url::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
|
||||
)
|
||||
.unwrap_or_default(),
|
||||
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_file_url_to_relative_path() {
|
||||
assert_eq!(
|
||||
url::resolve_url(
|
||||
"file:///home/user/Websites/my-website/index.html",
|
||||
"assets/images/logo.png",
|
||||
)
|
||||
.unwrap_or_default(),
|
||||
"file:///home/user/Websites/my-website/assets/images/logo.png"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_file_url_to_relative_path_with_backslashes() {
|
||||
assert_eq!(
|
||||
url::resolve_url(
|
||||
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
|
||||
"assets\\images\\logo.png",
|
||||
)
|
||||
.unwrap_or_default(),
|
||||
"file:///home/user/Websites/my-website/assets/images/logo.png"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_data_url_to_file_url() {
|
||||
assert_eq!(
|
||||
url::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"file:///etc/passwd",
|
||||
)
|
||||
.unwrap_or_default(),
|
||||
"file:///etc/passwd"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preserve_fragment() {
|
||||
assert_eq!(
|
||||
url::resolve_url(
|
||||
"http://doesnt-matter.local/",
|
||||
"css/fonts/fontmarvelous.svg#fontmarvelous",
|
||||
)
|
||||
.unwrap_or_default(),
|
||||
"http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_from_file_url_to_file_url() {
|
||||
assert_eq!(
|
||||
if cfg!(windows) {
|
||||
url::resolve_url("file:///c:/index.html", "file:///c:/image.png")
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
url::resolve_url("file:///tmp/index.html", "file:///tmp/image.png")
|
||||
.unwrap_or_default()
|
||||
},
|
||||
if cfg!(windows) {
|
||||
"file:///c:/image.png"
|
||||
} else {
|
||||
"file:///tmp/image.png"
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn from_data_url_to_url_with_no_protocol() {
|
||||
assert_eq!(
|
||||
url::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"//www.w3schools.com/html/html_iframe.asp",
|
||||
)
|
||||
.unwrap_or_default(),
|
||||
""
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -7,53 +7,58 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::utils;
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn mailto() {
|
||||
assert!(utils::url_has_protocol(
|
||||
assert!(url::url_has_protocol(
|
||||
"mailto:somebody@somewhere.com?subject=hello"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tel() {
|
||||
assert!(utils::url_has_protocol("tel:5551234567"));
|
||||
assert!(url::url_has_protocol("tel:5551234567"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ftp_no_slashes() {
|
||||
assert!(utils::url_has_protocol("ftp:some-ftp-server.com"));
|
||||
assert!(url::url_has_protocol("ftp:some-ftp-server.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ftp_with_credentials() {
|
||||
assert!(utils::url_has_protocol(
|
||||
assert!(url::url_has_protocol(
|
||||
"ftp://user:password@some-ftp-server.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn javascript() {
|
||||
assert!(utils::url_has_protocol("javascript:void(0)"));
|
||||
assert!(url::url_has_protocol("javascript:void(0)"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn http() {
|
||||
assert!(utils::url_has_protocol("http://news.ycombinator.com"));
|
||||
assert!(url::url_has_protocol("http://news.ycombinator.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn https() {
|
||||
assert!(utils::url_has_protocol("https://github.com"));
|
||||
assert!(url::url_has_protocol("https://github.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mailto_uppercase() {
|
||||
assert!(utils::url_has_protocol(
|
||||
assert!(url::url_has_protocol(
|
||||
"MAILTO:somebody@somewhere.com?subject=hello"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_data_url() {
|
||||
assert!(url::url_has_protocol("data:text/html,"));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
@@ -65,27 +70,25 @@ mod passing {
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::utils;
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn url_with_no_protocol() {
|
||||
assert!(!utils::url_has_protocol(
|
||||
"//some-hostname.com/some-file.html"
|
||||
));
|
||||
assert!(!url::url_has_protocol("//some-hostname.com/some-file.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn relative_path() {
|
||||
assert!(!utils::url_has_protocol("some-hostname.com/some-file.html"));
|
||||
assert!(!url::url_has_protocol("some-hostname.com/some-file.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn relative_to_root_path() {
|
||||
assert!(!utils::url_has_protocol("/some-file.html"));
|
||||
assert!(!url::url_has_protocol("/some-file.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_string() {
|
||||
assert!(!utils::url_has_protocol(""));
|
||||
assert!(!url::url_has_protocol(""));
|
||||
}
|
||||
}
|
||||
40
src/tests/url/url_with_fragment.rs
Normal file
40
src/tests/url/url_with_fragment.rs
Normal file
@@ -0,0 +1,40 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn url_with_fragment_url() {
|
||||
let url = "https://localhost.localdomain/path/";
|
||||
let fragment = "test";
|
||||
let assembled_url = url::url_with_fragment(url, fragment);
|
||||
|
||||
assert_eq!(&assembled_url, "https://localhost.localdomain/path/#test");
|
||||
}
|
||||
#[test]
|
||||
fn url_with_fragment_empty_url() {
|
||||
let url = "https://localhost.localdomain/path/";
|
||||
let fragment = "";
|
||||
let assembled_url = url::url_with_fragment(url, fragment);
|
||||
|
||||
assert_eq!(&assembled_url, "https://localhost.localdomain/path/");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn url_with_fragment_data_url() {
|
||||
let url = "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K";
|
||||
let fragment = "fragment";
|
||||
let assembled_url = url::url_with_fragment(url, fragment);
|
||||
|
||||
assert_eq!(
|
||||
&assembled_url,
|
||||
"data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K#fragment"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -10,17 +10,22 @@ mod passing {
|
||||
use crate::utils;
|
||||
|
||||
#[test]
|
||||
fn data_url() {
|
||||
assert_eq!(
|
||||
utils::get_url_fragment(
|
||||
"data:image/svg+xml;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h#test"
|
||||
),
|
||||
"test"
|
||||
);
|
||||
fn zero() {
|
||||
assert_eq!(utils::indent(0), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn https_empty() {
|
||||
assert_eq!(utils::get_url_fragment("https://kernel.org#"), "");
|
||||
fn one() {
|
||||
assert_eq!(utils::indent(1), " ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two() {
|
||||
assert_eq!(utils::indent(2), " ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn three() {
|
||||
assert_eq!(utils::indent(3), " ");
|
||||
}
|
||||
}
|
||||
@@ -1,13 +1,3 @@
|
||||
mod clean_url;
|
||||
mod data_to_data_url;
|
||||
mod data_url_to_data;
|
||||
mod decode_url;
|
||||
mod detect_media_type;
|
||||
mod file_url_to_fs_path;
|
||||
mod get_url_fragment;
|
||||
mod is_data_url;
|
||||
mod is_file_url;
|
||||
mod is_http_url;
|
||||
mod resolve_url;
|
||||
mod indent;
|
||||
mod retrieve_asset;
|
||||
mod url_has_protocol;
|
||||
|
||||
@@ -1,238 +0,0 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::utils;
|
||||
use url::ParseError;
|
||||
|
||||
#[test]
|
||||
fn from_https_to_level_up_relative() -> Result<(), ParseError> {
|
||||
let resolved_url =
|
||||
utils::resolve_url("https://www.kernel.org", "../category/signatures.html")?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_just_filename_to_full_https_url() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"saved_page.htm",
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_https_url_to_url_with_no_protocol() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"https://www.kernel.org",
|
||||
"//www.kernel.org/theme/images/logos/tux.png",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() -> Result<(), ParseError>
|
||||
{
|
||||
let resolved_url = utils::resolve_url(
|
||||
"https://www.kernel.org",
|
||||
"//another-host.org/theme/images/logos/tux.png",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://another-host.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_https_url_to_relative_root_path() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
"/theme/images/logos/tux.png",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_https_to_just_filename() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"https://www.w3schools.com/html/html_iframe.asp",
|
||||
"default.asp",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.w3schools.com/html/default.asp"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_data_url_to_https() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_data_url_to_data_url() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_file_url_to_relative_path() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"file:///home/user/Websites/my-website/index.html",
|
||||
"assets/images/logo.png",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"file:///home/user/Websites/my-website/assets/images/logo.png"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_file_url_to_relative_path_with_backslashes() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
|
||||
"assets\\images\\logo.png",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"file:///home/user/Websites/my-website/assets/images/logo.png"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_data_url_to_file_url() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"file:///etc/passwd",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
|
||||
assert_eq!(resolved_url.as_str(), "file:///etc/passwd");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preserve_fragment() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"http://doesnt-matter.local/",
|
||||
"css/fonts/fontmarvelous.svg#fontmarvelous",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_from_file_url_to_file_url() -> Result<(), ParseError> {
|
||||
let resolved_url = if cfg!(windows) {
|
||||
utils::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!())
|
||||
} else {
|
||||
utils::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!())
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
if cfg!(windows) {
|
||||
"file:///c:/image.png"
|
||||
} else {
|
||||
"file:///tmp/image.png"
|
||||
}
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::utils;
|
||||
use url::ParseError;
|
||||
|
||||
#[test]
|
||||
fn from_data_url_to_url_with_no_protocol() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"//www.w3schools.com/html/html_iframe.asp",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
|
||||
assert_eq!(resolved_url.as_str(), "");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -7,16 +7,22 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::utils;
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
|
||||
use crate::opts::Options;
|
||||
use crate::url;
|
||||
use crate::utils;
|
||||
|
||||
#[test]
|
||||
fn read_data_url() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
// If both source and target are data URLs,
|
||||
// ensure the result contains target data URL
|
||||
let (data, final_url, media_type) = utils::retrieve_asset(
|
||||
@@ -24,16 +30,17 @@ mod passing {
|
||||
&client,
|
||||
"data:text/html;base64,c291cmNl",
|
||||
"data:text/html;base64,dGFyZ2V0",
|
||||
false,
|
||||
&options,
|
||||
0,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
utils::data_to_data_url(&media_type, &data, &final_url, ""),
|
||||
utils::data_to_data_url("text/html", "target".as_bytes(), "", "")
|
||||
url::data_to_data_url(&media_type, &data, &final_url),
|
||||
url::data_to_data_url("text/html", "target".as_bytes(), "")
|
||||
);
|
||||
assert_eq!(
|
||||
final_url,
|
||||
utils::data_to_data_url("text/html", "target".as_bytes(), "", "")
|
||||
url::data_to_data_url("text/html", "target".as_bytes(), "")
|
||||
);
|
||||
assert_eq!(&media_type, "text/html");
|
||||
}
|
||||
@@ -43,6 +50,9 @@ mod passing {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
|
||||
// Inclusion of local assets from local sources should be allowed
|
||||
@@ -60,10 +70,11 @@ mod passing {
|
||||
file = file_url_protocol,
|
||||
cwd = cwd.to_str().unwrap()
|
||||
),
|
||||
false,
|
||||
&options,
|
||||
0,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(utils::data_to_data_url("application/javascript", &data, &final_url, ""), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
|
||||
assert_eq!(url::data_to_data_url("application/javascript", &data, &final_url), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
|
||||
assert_eq!(
|
||||
&final_url,
|
||||
&format!(
|
||||
@@ -84,22 +95,28 @@ mod passing {
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::utils;
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::opts::Options;
|
||||
use crate::utils;
|
||||
|
||||
#[test]
|
||||
fn read_local_file_with_data_url_parent() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
// Inclusion of local assets from data URL sources should not be allowed
|
||||
match utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
"data:text/html;base64,SoUrCe",
|
||||
"file:///etc/passwd",
|
||||
false,
|
||||
&options,
|
||||
0,
|
||||
) {
|
||||
Ok((..)) => {
|
||||
assert!(false);
|
||||
@@ -115,13 +132,17 @@ mod failing {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
// Inclusion of local assets from remote sources should not be allowed
|
||||
match utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
"https://kernel.org/",
|
||||
"file:///etc/passwd",
|
||||
false,
|
||||
&options,
|
||||
0,
|
||||
) {
|
||||
Ok((..)) => {
|
||||
assert!(false);
|
||||
|
||||
167
src/url.rs
Normal file
167
src/url.rs
Normal file
@@ -0,0 +1,167 @@
|
||||
use base64;
|
||||
use url::{form_urlencoded, ParseError, Url};
|
||||
|
||||
use crate::utils::detect_media_type;
|
||||
|
||||
pub fn clean_url<T: AsRef<str>>(input: T) -> String {
|
||||
let mut url = Url::parse(input.as_ref()).unwrap();
|
||||
|
||||
// Clear fragment
|
||||
url.set_fragment(None);
|
||||
|
||||
// Get rid of stray question mark
|
||||
if url.query() == Some("") {
|
||||
url.set_query(None);
|
||||
}
|
||||
|
||||
// Remove empty trailing ampersand(s)
|
||||
let mut result: String = url.to_string();
|
||||
while result.ends_with("&") {
|
||||
result.pop();
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String {
|
||||
let media_type: String = if media_type.is_empty() {
|
||||
detect_media_type(data, &url)
|
||||
} else {
|
||||
media_type.to_string()
|
||||
};
|
||||
|
||||
format!("data:{};base64,{}", media_type, base64::encode(data))
|
||||
}
|
||||
|
||||
pub fn decode_url(input: String) -> String {
|
||||
let input: String = input.replace("+", "%2B");
|
||||
|
||||
form_urlencoded::parse(input.as_bytes())
|
||||
.map(|(key, val)| {
|
||||
[
|
||||
key.to_string(),
|
||||
if val.to_string().len() == 0 {
|
||||
str!()
|
||||
} else {
|
||||
str!('=')
|
||||
},
|
||||
val.to_string(),
|
||||
]
|
||||
.concat()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn file_url_to_fs_path(url: &str) -> String {
|
||||
if !is_file_url(url) {
|
||||
return str!();
|
||||
}
|
||||
|
||||
let cutoff_l = if cfg!(windows) { 8 } else { 7 };
|
||||
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
|
||||
let url_fragment = get_url_fragment(url);
|
||||
if url_fragment != "" {
|
||||
let max_len = fs_file_path.len() - 1 - url_fragment.len();
|
||||
fs_file_path = fs_file_path[0..max_len].to_string();
|
||||
}
|
||||
|
||||
if cfg!(windows) {
|
||||
fs_file_path = fs_file_path.replace("/", "\\");
|
||||
}
|
||||
|
||||
// File paths should not be %-encoded
|
||||
decode_url(fs_file_path)
|
||||
}
|
||||
|
||||
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
|
||||
match Url::parse(url.as_ref()) {
|
||||
Ok(parsed_url) => parsed_url.fragment().unwrap_or("").to_string(),
|
||||
Err(_err) => str!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme() == "data"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme() == "file"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn parse_data_url<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
|
||||
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
|
||||
let path: String = parsed_url.path().to_string();
|
||||
let comma_loc: usize = path.find(',').unwrap_or(path.len());
|
||||
|
||||
let meta_data: String = path.chars().take(comma_loc).collect();
|
||||
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
|
||||
|
||||
let text: String = decode_url(raw_data);
|
||||
|
||||
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
|
||||
let mut media_type: String = str!();
|
||||
let mut encoding: &str = "";
|
||||
|
||||
let mut i: i8 = 0;
|
||||
for item in &meta_data_items {
|
||||
if i == 0 {
|
||||
media_type = str!(item);
|
||||
} else {
|
||||
if item.eq_ignore_ascii_case("base64")
|
||||
|| item.eq_ignore_ascii_case("utf8")
|
||||
|| item.eq_ignore_ascii_case("charset=UTF-8")
|
||||
{
|
||||
encoding = item;
|
||||
}
|
||||
}
|
||||
|
||||
i = i + 1;
|
||||
}
|
||||
|
||||
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
|
||||
base64::decode(&text).unwrap_or(vec![])
|
||||
} else {
|
||||
text.as_bytes().to_vec()
|
||||
};
|
||||
|
||||
(media_type, data)
|
||||
}
|
||||
|
||||
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
|
||||
let result = if is_http_url(to.as_ref()) {
|
||||
to.as_ref().to_string()
|
||||
} else {
|
||||
Url::parse(from.as_ref())?
|
||||
.join(to.as_ref())?
|
||||
.as_ref()
|
||||
.to_string()
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme().len() > 0))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn url_with_fragment(url: &str, fragment: &str) -> String {
|
||||
let mut result = str!(&url);
|
||||
|
||||
if !fragment.is_empty() {
|
||||
result += "#";
|
||||
result += fragment;
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
247
src/utils.rs
247
src/utils.rs
@@ -1,10 +1,13 @@
|
||||
use base64;
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use url::{form_urlencoded, ParseError, Url};
|
||||
|
||||
use crate::opts::Options;
|
||||
use crate::url::{clean_url, file_url_to_fs_path, is_data_url, is_file_url, parse_data_url};
|
||||
|
||||
const INDENT: &str = " ";
|
||||
|
||||
const MAGIC: [[&[u8]; 2]; 18] = [
|
||||
// Image
|
||||
@@ -38,26 +41,6 @@ const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
|
||||
"text/plain",
|
||||
];
|
||||
|
||||
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String {
|
||||
let media_type: String = if media_type.is_empty() {
|
||||
detect_media_type(data, &url)
|
||||
} else {
|
||||
media_type.to_string()
|
||||
};
|
||||
let hash: String = if fragment != "" {
|
||||
format!("#{}", fragment)
|
||||
} else {
|
||||
str!()
|
||||
};
|
||||
|
||||
format!(
|
||||
"data:{};base64,{}{}",
|
||||
media_type,
|
||||
base64::encode(data),
|
||||
hash
|
||||
)
|
||||
}
|
||||
|
||||
pub fn detect_media_type(data: &[u8], url: &str) -> String {
|
||||
for item in MAGIC.iter() {
|
||||
if data.starts_with(item[0]) {
|
||||
@@ -72,159 +55,27 @@ pub fn detect_media_type(data: &[u8], url: &str) -> String {
|
||||
str!()
|
||||
}
|
||||
|
||||
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme().len() > 0))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme() == "data"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme() == "file"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn is_plaintext_media_type(media_type: &str) -> bool {
|
||||
PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
|
||||
}
|
||||
|
||||
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
|
||||
let result = if is_http_url(to.as_ref()) {
|
||||
to.as_ref().to_string()
|
||||
} else {
|
||||
Url::parse(from.as_ref())?
|
||||
.join(to.as_ref())?
|
||||
.as_ref()
|
||||
.to_string()
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
|
||||
if Url::parse(url.as_ref()).unwrap().fragment() == None {
|
||||
str!()
|
||||
} else {
|
||||
str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap())
|
||||
pub fn indent(level: u32) -> String {
|
||||
let mut result = str!();
|
||||
let mut l: u32 = level;
|
||||
while l > 0 {
|
||||
result += INDENT;
|
||||
l -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clean_url<T: AsRef<str>>(input: T) -> String {
|
||||
let mut url = Url::parse(input.as_ref()).unwrap();
|
||||
|
||||
// Clear fragment
|
||||
url.set_fragment(None);
|
||||
|
||||
// Get rid of stray question mark
|
||||
if url.query() == Some("") {
|
||||
url.set_query(None);
|
||||
}
|
||||
|
||||
// Remove empty trailing ampersand(s)
|
||||
let mut result: String = url.to_string();
|
||||
while result.ends_with("&") {
|
||||
result.pop();
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
pub fn data_url_to_data<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
|
||||
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
|
||||
let path: String = parsed_url.path().to_string();
|
||||
let comma_loc: usize = path.find(',').unwrap_or(path.len());
|
||||
|
||||
let meta_data: String = path.chars().take(comma_loc).collect();
|
||||
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
|
||||
|
||||
let text: String = decode_url(raw_data);
|
||||
|
||||
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
|
||||
let mut media_type: String = str!();
|
||||
let mut encoding: &str = "";
|
||||
|
||||
let mut i: i8 = 0;
|
||||
for item in &meta_data_items {
|
||||
if i == 0 {
|
||||
media_type = str!(item);
|
||||
} else {
|
||||
if item.eq_ignore_ascii_case("base64")
|
||||
|| item.eq_ignore_ascii_case("utf8")
|
||||
|| item.eq_ignore_ascii_case("charset=UTF-8")
|
||||
{
|
||||
encoding = item;
|
||||
}
|
||||
}
|
||||
|
||||
i = i + 1;
|
||||
}
|
||||
|
||||
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
|
||||
base64::decode(&text).unwrap_or(vec![])
|
||||
} else {
|
||||
text.as_bytes().to_vec()
|
||||
};
|
||||
|
||||
(media_type, data)
|
||||
}
|
||||
|
||||
pub fn decode_url(input: String) -> String {
|
||||
let input: String = input.replace("+", "%2B");
|
||||
|
||||
form_urlencoded::parse(input.as_bytes())
|
||||
.map(|(key, val)| {
|
||||
[
|
||||
key.to_string(),
|
||||
if val.to_string().len() == 0 {
|
||||
str!()
|
||||
} else {
|
||||
str!('=')
|
||||
},
|
||||
val.to_string(),
|
||||
]
|
||||
.concat()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn file_url_to_fs_path(url: &str) -> String {
|
||||
if !is_file_url(url) {
|
||||
return str!();
|
||||
}
|
||||
|
||||
let cutoff_l = if cfg!(windows) { 8 } else { 7 };
|
||||
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
|
||||
let url_fragment = get_url_fragment(url);
|
||||
if url_fragment != "" {
|
||||
let max_len = fs_file_path.len() - 1 - url_fragment.len();
|
||||
fs_file_path = fs_file_path[0..max_len].to_string();
|
||||
}
|
||||
|
||||
if cfg!(windows) {
|
||||
fs_file_path = fs_file_path.replace("/", "\\");
|
||||
}
|
||||
|
||||
// File paths should not be %-encoded
|
||||
decode_url(fs_file_path)
|
||||
}
|
||||
|
||||
pub fn retrieve_asset(
|
||||
cache: &mut HashMap<String, Vec<u8>>,
|
||||
client: &Client,
|
||||
parent_url: &str,
|
||||
url: &str,
|
||||
opt_silent: bool,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
) -> Result<(Vec<u8>, String, String), reqwest::Error> {
|
||||
if url.len() == 0 {
|
||||
// Provoke error
|
||||
@@ -232,7 +83,7 @@ pub fn retrieve_asset(
|
||||
}
|
||||
|
||||
if is_data_url(&url) {
|
||||
let (media_type, data) = data_url_to_data(url);
|
||||
let (media_type, data) = parse_data_url(url);
|
||||
Ok((data, url.to_string(), media_type))
|
||||
} else if is_file_url(&url) {
|
||||
// Check if parent_url is also file:///
|
||||
@@ -245,8 +96,8 @@ pub fn retrieve_asset(
|
||||
let fs_file_path: String = file_url_to_fs_path(url);
|
||||
let path = Path::new(&fs_file_path);
|
||||
if path.exists() {
|
||||
if !opt_silent {
|
||||
eprintln!("{}", &url);
|
||||
if !options.silent {
|
||||
eprintln!("{}{}", indent(depth).as_str(), &url);
|
||||
}
|
||||
|
||||
Ok((fs::read(&fs_file_path).expect(""), url.to_string(), str!()))
|
||||
@@ -259,8 +110,8 @@ pub fn retrieve_asset(
|
||||
|
||||
if cache.contains_key(&cache_key) {
|
||||
// URL is in cache, we get and return it
|
||||
if !opt_silent {
|
||||
eprintln!("{} (from cache)", &url);
|
||||
if !options.silent {
|
||||
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
|
||||
}
|
||||
|
||||
Ok((
|
||||
@@ -270,34 +121,46 @@ pub fn retrieve_asset(
|
||||
))
|
||||
} else {
|
||||
// URL not in cache, we retrieve the file
|
||||
let mut response = client.get(url).send()?;
|
||||
let res_url = response.url().to_string();
|
||||
match client.get(url).send() {
|
||||
Ok(mut response) => {
|
||||
if !options.ignore_errors && response.status() != 200 {
|
||||
if !options.silent {
|
||||
eprintln!("Unable to retrieve {} ({})", &url, response.status());
|
||||
}
|
||||
// Provoke error
|
||||
return Err(client.get("").send().unwrap_err());
|
||||
}
|
||||
|
||||
if !opt_silent {
|
||||
if url == res_url {
|
||||
eprintln!("{}", &url);
|
||||
} else {
|
||||
eprintln!("{} -> {}", &url, &res_url);
|
||||
let res_url = response.url().to_string();
|
||||
|
||||
if !options.silent {
|
||||
if url == res_url {
|
||||
eprintln!("{}{}", indent(depth).as_str(), &url);
|
||||
} else {
|
||||
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &res_url);
|
||||
}
|
||||
}
|
||||
|
||||
let new_cache_key: String = clean_url(&res_url);
|
||||
|
||||
// Convert response into a byte array
|
||||
let mut data: Vec<u8> = vec![];
|
||||
response.copy_to(&mut data)?;
|
||||
|
||||
// Attempt to obtain media type by reading the Content-Type header
|
||||
let media_type = response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|header| header.to_str().ok())
|
||||
.unwrap_or("");
|
||||
|
||||
// Add retrieved resource to cache
|
||||
cache.insert(new_cache_key, data.clone());
|
||||
|
||||
Ok((data, res_url, media_type.to_string()))
|
||||
}
|
||||
Err(error) => Err(error),
|
||||
}
|
||||
|
||||
let new_cache_key: String = clean_url(&res_url);
|
||||
|
||||
// Convert response into a byte array
|
||||
let mut data: Vec<u8> = vec![];
|
||||
response.copy_to(&mut data)?;
|
||||
|
||||
// Attempt to obtain media type by reading the Content-Type header
|
||||
let media_type = response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|header| header.to_str().ok())
|
||||
.unwrap_or("");
|
||||
|
||||
// Add to cache
|
||||
cache.insert(new_cache_key, data.clone());
|
||||
|
||||
Ok((data, res_url, media_type.to_string()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user