58 Commits

Author SHA1 Message Date
Sunshine
8462b6bc31 Merge pull request #207 from snshn/bump-version
bump version (2.3.0 -> 2.3.1)
2020-08-01 21:00:26 -04:00
Sunshine
92f38556b6 bump version (2.3.0 -> 2.3.1) 2020-08-01 20:24:38 -04:00
Sunshine
c0bdeab2e3 Merge pull request #206 from snshn/update-crates
Update crates
2020-08-01 19:43:00 -04:00
Sunshine
5a502eab4b update crate versions 2020-08-01 19:20:20 -04:00
Sunshine
19f08265a2 Merge pull request #205 from snshn/base-tag
Implement support for BASE tag
2020-08-01 02:47:33 -04:00
Sunshine
1d6392cb28 implement support for BASE tag 2020-08-01 02:35:07 -04:00
Sunshine
03cdc0e0b2 Merge pull request #201 from snshn/refactor-and-version-bump
Refactor and version bump
2020-07-14 03:51:31 -04:00
Sunshine
b98b7af0b4 Merge pull request #202 from snshn/minus-stdout
Treat - for stdout
2020-07-14 03:51:18 -04:00
Sunshine
73c35eaccb treat minus for output target file path as stdout 2020-07-14 03:35:59 -04:00
Sunshine
2c5d1e930b bump version (2.2.7 -> 2.3.0) 2020-07-14 03:29:08 -04:00
Sunshine
90f7c3a0d0 alphabetical order for function names 2020-07-14 03:27:52 -04:00
Sunshine
c1fec5967d Merge pull request #200 from snshn/favicon
Automatically obtain favicon.ico
2020-07-14 03:24:10 -04:00
Sunshine
09d41d2cf1 automatically obtain favicon.ico 2020-07-14 02:58:29 -04:00
Sunshine
8f1da3c792 Update cd.yml 2020-07-13 19:09:01 -04:00
Sunshine
a8449a2b32 Update README.md 2020-07-13 01:16:38 -04:00
Sunshine
164e728ad3 Merge pull request #197 from snshn/addetional-black-box-test-data
Additional black box test data
2020-07-06 16:51:49 -04:00
Sunshine
8883bd6aca add more black box test data 2020-07-06 16:15:57 -04:00
Sunshine
eae5d4dc6b Merge pull request #196 from snshn/help-message-update
Update help message
2020-07-01 06:41:32 -04:00
Sunshine
ec85121d28 update help message 2020-07-01 06:29:56 -04:00
Sunshine
a8a85a4191 Merge pull request #195 from snshn/logo
Logo
2020-07-01 06:24:28 -04:00
Sunshine
decd5b2119 add ASCII logo atop of help message 2020-07-01 06:13:58 -04:00
Sunshine
bef6d848e9 add raster icon along with its Blender scene 2020-07-01 05:54:48 -04:00
Sunshine
4263e42cd1 Merge pull request #194 from snshn/indented-tree
Indented tree
2020-06-28 16:37:10 -04:00
Sunshine
23de5ced21 add tests for utils::indent() 2020-06-28 16:15:42 -04:00
Sunshine
bc98aca2a2 indent items in retrieval log to form a tree-like structure 2020-06-28 16:11:15 -04:00
Sunshine
438ebd520a Merge pull request #193 from snshn/options-struct
Pass options object instead of using separate parameters
2020-06-28 01:51:05 -04:00
Sunshine
ddb97009e9 pass options object instead of using separate parameters 2020-06-28 01:36:41 -04:00
Sunshine
6e67545b92 Merge pull request #192 from snshn/more-test-data
Add more sample data for blackbox tests
2020-06-27 14:57:07 -04:00
Sunshine
9e5d8ec691 add more sample data for blackbox tests 2020-06-27 14:55:10 -04:00
Sunshine
fb835fae28 Merge pull request #191 from snshn/trim-style
Trim CSS if it contains nothing but whitespaces
2020-06-26 23:41:41 -04:00
Sunshine
29bf042da0 trim CSS if it contains nothing but whitespaces 2020-06-26 23:26:55 -04:00
Sunshine
d67483cf8e Merge pull request #190 from snshn/refactor-csp
Refactor CSP code
2020-06-26 21:42:19 -04:00
Sunshine
4140d8ebad Create references.md 2020-06-26 18:16:18 -04:00
Sunshine
2ac964fae5 include font-src into CSP 2020-06-26 18:14:46 -04:00
Sunshine
ae5d6d2df4 refactor CSP code 2020-06-26 16:19:44 -04:00
Sunshine
2ed151d883 Update web-apps.md 2020-06-26 15:05:47 -04:00
Sunshine
3cdfdc45d3 Update snapcraft.yaml 2020-06-26 14:57:52 -04:00
Sunshine
ac04af2cfc Update ADR-0006 2020-06-26 14:44:54 -04:00
Sunshine
769953d7bd Merge pull request #187 from snshn/arm-snapcraft
Add armhf target to snapcraft.yaml
2020-06-26 14:40:46 -04:00
Sunshine
136dcc31cf Merge pull request #189 from snshn/remove-unwanted-meta-tags
Automatically remove "Refresh" and "Location" META tags
2020-06-26 01:29:41 -04:00
Sunshine
44cac65a83 automatically remove "Refresh" and "Location" META tags 2020-06-26 01:18:52 -04:00
Sunshine
c3ca2ad1d5 Merge pull request #188 from snshn/metadata-tag-function
Move metadata tag code into a function
2020-06-25 18:31:50 -04:00
Sunshine
0347fd3985 move metadata tag code into a function 2020-06-25 18:23:56 -04:00
Sunshine
95d0083b3c Update README.md 2020-06-25 17:38:39 -04:00
Sunshine
3ce26b5fdd Merge pull request #186 from snshn/code-improvements
Code improvements
2020-06-24 03:31:11 -04:00
Sunshine
7f9458adfe add armhf target to snapcraft.yaml 2020-06-24 03:20:36 -04:00
Sunshine
5c229c51da move functions related to URL manipulation into url.rs 2020-06-24 03:16:40 -04:00
Sunshine
f6ea16b3ad create a separate function for appending URL fragments 2020-06-24 02:26:05 -04:00
Sunshine
877b11d52c Merge pull request #185 from snshn/upd-crates
Update crates
2020-06-20 03:41:00 -04:00
Sunshine
f9aac6f41b update crates 2020-06-20 01:05:39 -04:00
Sunshine
0a30c286fe add x86_64 GNU/Linux target to CD 2020-06-19 07:44:57 -04:00
Sunshine
ea56b9b4c1 Update README.md 2020-06-19 03:51:19 -04:00
Sunshine
e821591efe Merge pull request #183 from snshn/update-readme-freebsd-instructions
Add FreeBSD installation instructions to README.md
2020-06-18 22:26:56 -04:00
Sunshine
4e5d2fdc8d Merge pull request #184 from snshn/update-readme-ascii
Update README.md
2020-06-18 00:25:55 -04:00
Sunshine
7c2ed2c9ca Update README.md 2020-06-18 00:19:16 -04:00
Sunshine
60d21ae071 Update README.md 2020-06-17 07:42:10 -04:00
Sunshine
bfdcd459e1 Update web-apps.md 2020-06-04 02:32:01 -04:00
Sunshine
6c020dfa88 Create web-apps.md 2020-06-04 02:31:41 -04:00
53 changed files with 2299 additions and 1553 deletions

View File

@@ -15,7 +15,7 @@ jobs:
- run: git config --global core.autocrlf false
- name: Checkout the repository
uses: actions/checkout@v2
- name: Build and install the executable
- name: Build the executable
run: cargo build --release
- uses: Shopify/upload-to-release@1.0.0
with:
@@ -52,3 +52,16 @@ jobs:
name: monolith-gnu-linux-armhf
path: target/arm-unknown-linux-gnueabihf/release/monolith
repo-token: ${{ secrets.GITHUB_TOKEN }}
gnu_linux_x86_64:
runs-on: ubuntu-18.04
steps:
- name: Checkout the repository
uses: actions/checkout@v2
- name: Build the executable
run: cargo build --release
- uses: Shopify/upload-to-release@1.0.0
with:
name: monolith-gnu-linux-x86_64
path: target/release/monolith
repo-token: ${{ secrets.GITHUB_TOKEN }}

1066
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
[package]
name = "monolith"
version = "2.2.7"
version = "2.3.1"
edition = "2018"
authors = [
"Sunshine <sunshine@uberspace.net>",
@@ -12,19 +12,19 @@ authors = [
description = "CLI tool for saving web pages as a single HTML file"
[dependencies]
base64 = "0.12.0"
clap = "2.33.0"
base64 = "0.12.3"
chrono = "0.4.13" # Used to render comments indicating the time the page was saved
clap = "2.33.1"
cssparser = "0.27.2"
html5ever = "0.24.1"
sha2 = "0.8.1" # Used in calculating checksums during integrity checks
time = "0.1.42" # Used to render comments indicating the time the page was saved
sha2 = "0.9.1" # Used in calculating checksums during integrity checks
url = "2.1.1"
[dependencies.reqwest]
version = "0.10.*"
version = "0.10.6"
default-features = false
features = ["default-tls", "blocking", "gzip"]
[dev-dependencies]
assert_cmd = "0.12.0"
assert_cmd = "1.0.1"
tempfile = "3.1.0"

View File

@@ -23,3 +23,7 @@ install:
uninstall:
@cargo uninstall
.PHONY: uninstall
clean:
@cargo clean
.PHONY: clean

View File

@@ -3,13 +3,13 @@
[![Monolith Build Status for Windows](https://github.com/Y2Z/monolith/workflows/Windows/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows)
```
___ ___________ __________ ___________________ ___
| \ / \ | | | | | |
| \_/ __ \_| __ | | ___ ___ |__| |
| | | | | | | | | | | |
| |__| _ |__| |____| | | | | __ |
| |\_/| | \ | | | | | | |
|___| |__________| \____________________| |___| |___| |___|
_____ ______________ __________ ___________________ ___
| \ / \ | | | | | |
| \_/ __ \_| __ | | ___ ___ |__| |
| | | | | | | | | | | |
| |\ /| |__| _ |__| |____| | | | | __ |
| | \___/ | | \ | | | | | | |
|___| |__________| \_____________________| |___| |___| |___|
```
A data hoarders dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
@@ -28,6 +28,13 @@ If compared to saving websites with `wget -mpk`, this tool embeds all assets as
#### Using Snapcraft (on GNU/Linux)
$ snap install monolith
#### Using Ports collection (on FreeBSD and TrueOS)
$ cd /usr/ports/www/monolith/
$ make install clean
#### Using pre-built binaries (Windows, ARM-based devices, etc)
Every [release](https://github.com/Y2Z/monolith/releases) contains pre-built binaries for Windows, GNU/Linux, as well as platforms with non-standart CPU architecture.
#### From source
Dependency: `libssl-dev`
@@ -36,7 +43,7 @@ Dependency: `libssl-dev`
$ cd monolith
$ make install
#### With Docker
#### Using Containers
The guide can be found [here](docs/containers.md)
---------------------------------------------------
@@ -48,14 +55,14 @@ The guide can be found [here](docs/containers.md)
## Options
- `-c`: Ignore styles
- `-f`: Exclude frames and iframes
- `-f`: Exclude frames
- `-F`: Omit web fonts
- `-i`: Remove images
- `-I`: Isolate the document
- `-j`: Exclude JavaScript
- `-k`: Accept invalid X.509 (TLS) certificates
- `-o`: Write output to file
- `-s`: Enable silent mode
- `-s`: Be quiet
- `-t`: Adjust network request timeout
- `-u`: Provide custom User-Agent

BIN
assets/icon/icon.blend Normal file

Binary file not shown.

BIN
assets/icon/icon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 MiB

View File

@@ -0,0 +1,19 @@
# 4. Reload and location `meta` tags
Date: 2020-06-25
## Status
Accepted
## Context
HTML documents may contain `meta` tags capable of automatically refreshing the page or redirecting to another location.
## Decision
Since the resulting document is saved to disk and generally not intended to be served over the network, it only makes sense to remove `meta` tags that have `http-equiv` attribute equal to "Refresh" or "Location", in order to prevent them from reloading the page or redirecting to another location.
## Consequences
Monolith will ensure that saved documents do not contain `meta` tags capable of changing location or reloading the page.

3
docs/references.md Normal file
View File

@@ -0,0 +1,3 @@
# References
- https://content-security-policy.com/

10
docs/web-apps.md Normal file
View File

@@ -0,0 +1,10 @@
# Web apps that can be saved with Monolith
These apps retain most or all of their functionality when saved with Monolith
|Converse|https://conversejs.org|
|:--|:--|
|Description|An XMPP client built using web technologies|
|Functionality retained|**full**|
|Command to use|`monolith https://conversejs.org/fullscreen.html > conversejs.html`|
|Monolith version used|2.2.7|

View File

@@ -18,11 +18,11 @@ description: |
confinement: strict
# Building on armhf fails, so we specify all supported non-armhf architectures
architectures:
- build-on: amd64
- build-on: i386
- build-on: arm64
- build-on: armhf
- build-on: i386
- build-on: ppc64el
- build-on: s390x

View File

@@ -2,7 +2,9 @@ use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::utils::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset};
use crate::opts::Options;
use crate::url::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_with_fragment};
use crate::utils::retrieve_asset;
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
// Universal
@@ -58,12 +60,11 @@ pub fn process_css<'a>(
client: &Client,
parent_url: &str,
parser: &mut Parser,
options: &Options,
depth: u32,
rule_name: &str,
prop_name: &str,
func_name: &str,
opt_no_fonts: bool,
opt_no_images: bool,
opt_silent: bool,
) -> Result<String, ParseError<'a, String>> {
let mut result: String = str!();
@@ -90,7 +91,7 @@ pub fn process_css<'a>(
Token::Colon => result.push_str(":"),
Token::Comma => result.push_str(","),
Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => {
if opt_no_fonts && curr_rule == "font-face" {
if options.no_fonts && curr_rule == "font-face" {
continue;
}
@@ -113,12 +114,11 @@ pub fn process_css<'a>(
client,
parent_url,
parser,
options,
depth,
rule_name,
curr_prop.as_str(),
func_name,
opt_no_fonts,
opt_no_images,
opt_silent,
)
})
.unwrap();
@@ -148,7 +148,7 @@ pub fn process_css<'a>(
// @import, @font-face, @charset, @media...
Token::AtKeyword(ref value) => {
curr_rule = str!(value);
if opt_no_fonts && curr_rule == "font-face" {
if options.no_fonts && curr_rule == "font-face" {
continue;
}
result.push_str("@");
@@ -171,34 +171,42 @@ pub fn process_css<'a>(
let import_full_url = resolve_url(&parent_url, value).unwrap_or_default();
let import_url_fragment = get_url_fragment(import_full_url.clone());
match retrieve_asset(cache, client, &parent_url, &import_full_url, opt_silent) {
match retrieve_asset(
cache,
client,
&parent_url,
&import_full_url,
options.silent,
depth + 1,
) {
Ok((import_contents, import_final_url, _import_media_type)) => {
result.push_str(
enquote(
data_to_data_url(
"text/css",
embed_css(
cache,
client,
&import_final_url,
&String::from_utf8_lossy(&import_contents),
opt_no_fonts,
opt_no_images,
opt_silent,
)
.as_bytes(),
&import_final_url,
&import_url_fragment,
),
false,
let import_data_url = data_to_data_url(
"text/css",
embed_css(
cache,
client,
&import_final_url,
&String::from_utf8_lossy(&import_contents),
options,
depth + 1,
)
.as_str(),
.as_bytes(),
&import_final_url,
);
let assembled_url: String = url_with_fragment(
import_data_url.as_str(),
import_url_fragment.as_str(),
);
result.push_str(enquote(assembled_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(import_full_url.clone()) {
result.push_str(enquote(import_full_url, false).as_str());
let assembled_url: String = url_with_fragment(
import_full_url.as_str(),
import_url_fragment.as_str(),
);
result.push_str(enquote(assembled_url, false).as_str());
}
}
}
@@ -209,7 +217,7 @@ pub fn process_css<'a>(
continue;
}
if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
if options.no_images && is_image_url_prop(curr_prop.as_str()) {
result.push_str(enquote(str!(empty_image!()), false).as_str());
} else {
let resolved_url = resolve_url(&parent_url, value).unwrap_or_default();
@@ -219,21 +227,23 @@ pub fn process_css<'a>(
client,
&parent_url,
&resolved_url,
opt_silent,
options.silent,
depth + 1,
) {
Ok((data, final_url, media_type)) => {
let data_url = data_to_data_url(
&media_type,
&data,
&final_url,
&url_fragment,
);
result.push_str(enquote(data_url, false).as_str());
let data_url = data_to_data_url(&media_type, &data, &final_url);
let assembled_url: String =
url_with_fragment(data_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(resolved_url.clone()) {
result.push_str(enquote(resolved_url, false).as_str());
let assembled_url: String = url_with_fragment(
resolved_url.as_str(),
url_fragment.as_str(),
);
result.push_str(enquote(assembled_url, false).as_str());
}
}
}
@@ -261,7 +271,7 @@ pub fn process_css<'a>(
if *has_sign && *unit_value >= 0. {
result.push_str("+");
}
result.push_str(str!(unit_value * 100.).as_str());
result.push_str(str!(unit_value * 100.0).as_str());
result.push_str("%");
}
Token::Dimension {
@@ -305,7 +315,14 @@ pub fn process_css<'a>(
if is_import {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) {
match retrieve_asset(
cache,
client,
&parent_url,
&full_url,
options.silent,
depth + 1,
) {
Ok((css, final_url, _media_type)) => {
let data_url = data_to_data_url(
"text/css",
@@ -314,39 +331,51 @@ pub fn process_css<'a>(
client,
&final_url,
&String::from_utf8_lossy(&css),
opt_no_fonts,
opt_no_images,
opt_silent,
options,
depth + 1,
)
.as_bytes(),
&final_url,
&url_fragment,
);
result.push_str(enquote(data_url, false).as_str());
let assembled_url: String =
url_with_fragment(data_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) {
result.push_str(enquote(full_url, false).as_str());
let assembled_url: String =
url_with_fragment(full_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
}
}
}
} else {
if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
if is_image_url_prop(curr_prop.as_str()) && options.no_images {
result.push_str(enquote(str!(empty_image!()), false).as_str());
} else {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) {
match retrieve_asset(
cache,
client,
&parent_url,
&full_url,
options.silent,
depth + 1,
) {
Ok((data, final_url, media_type)) => {
let data_url =
data_to_data_url(&media_type, &data, &final_url, &url_fragment);
result.push_str(enquote(data_url, false).as_str());
let data_url = data_to_data_url(&media_type, &data, &final_url);
let assembled_url: String =
url_with_fragment(data_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) {
result.push_str(enquote(full_url, false).as_str());
let assembled_url: String =
url_with_fragment(full_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
}
}
}
@@ -367,12 +396,11 @@ pub fn process_css<'a>(
client,
parent_url,
parser,
options,
depth,
curr_rule.as_str(),
curr_prop.as_str(),
function_name,
opt_no_fonts,
opt_no_images,
opt_silent,
)
})
.unwrap();
@@ -384,6 +412,11 @@ pub fn process_css<'a>(
}
}
// Ensure empty CSS is really empty
if result.len() > 0 && result.trim().len() == 0 {
result = result.trim().to_string()
}
Ok(result)
}
@@ -392,9 +425,8 @@ pub fn embed_css(
client: &Client,
parent_url: &str,
css: &str,
opt_no_fonts: bool,
opt_no_images: bool,
opt_silent: bool,
options: &Options,
depth: u32,
) -> String {
let mut input = ParserInput::new(&css);
let mut parser = Parser::new(&mut input);
@@ -404,12 +436,11 @@ pub fn embed_css(
client,
parent_url,
&mut parser,
options,
depth,
"",
"",
"",
opt_no_fonts,
opt_no_images,
opt_silent,
)
.unwrap()
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,9 +1,14 @@
#[macro_use]
extern crate clap;
#[macro_use]
mod macros;
pub mod css;
pub mod html;
pub mod js;
pub mod opts;
pub mod url;
pub mod utils;
#[cfg(test)]

View File

@@ -1,8 +1,5 @@
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
use monolith::utils::{data_url_to_data, is_data_url, is_file_url, is_http_url, retrieve_asset};
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use reqwest::Url;
use std::collections::HashMap;
use std::env;
use std::fs;
@@ -11,12 +8,17 @@ use std::path::Path;
use std::process;
use std::time::Duration;
mod args;
mod macros;
use monolith::html::{
add_base_tag, add_favicon, has_base_tag, has_favicon, html_to_dom, metadata_tag,
stringify_document, walk_and_embed_assets,
};
use monolith::opts::Options;
use monolith::url::{
data_to_data_url, data_url_to_data, is_data_url, is_file_url, is_http_url, resolve_url,
};
use monolith::utils::retrieve_asset;
#[macro_use]
extern crate clap;
use crate::args::AppArgs;
mod macros;
enum Output {
Stdout(io::Stdout),
@@ -25,7 +27,7 @@ enum Output {
impl Output {
fn new(file_path: &str) -> Result<Output, Error> {
if file_path.is_empty() {
if file_path.is_empty() || file_path.eq("-") {
Ok(Output::Stdout(io::stdout()))
} else {
Ok(Output::File(fs::File::create(file_path)?))
@@ -47,11 +49,11 @@ impl Output {
}
fn main() {
let app_args = AppArgs::get();
let original_target: &str = &app_args.target;
let options = Options::from_args();
let original_target: &str = &options.target;
let target_url: &str;
let base_url;
let dom;
let mut dom;
// Pre-process the input
let cwd_normalized: String =
@@ -60,6 +62,7 @@ fn main() {
let mut target: String = str!(original_target.clone()).replace("\\", "/");
let path_is_relative: bool = path.is_relative();
// Determine exact target URL
if target.clone().len() == 0 {
eprintln!("No target specified");
process::exit(1);
@@ -86,31 +89,38 @@ fn main() {
target_url = target.as_str();
}
let mut output = Output::new(&app_args.output).expect("Could not prepare output");
// Define output
let mut output = Output::new(&options.output).expect("Could not prepare output");
// Initialize client
let mut cache = HashMap::new();
let mut header_map = HeaderMap::new();
header_map.insert(
USER_AGENT,
HeaderValue::from_str(&app_args.user_agent).expect("Invalid User-Agent header specified"),
HeaderValue::from_str(&options.user_agent).expect("Invalid User-Agent header specified"),
);
let timeout: u64 = if app_args.timeout > 0 {
app_args.timeout
let timeout: u64 = if options.timeout > 0 {
options.timeout
} else {
std::u64::MAX / 4
};
let client = Client::builder()
.timeout(Duration::from_secs(timeout))
.danger_accept_invalid_certs(app_args.insecure)
.danger_accept_invalid_certs(options.insecure)
.default_headers(header_map)
.build()
.expect("Failed to initialize HTTP client");
// Retrieve root document
// Retrieve target document
if is_file_url(target_url) || is_http_url(target_url) {
match retrieve_asset(&mut cache, &client, target_url, target_url, app_args.silent) {
match retrieve_asset(
&mut cache,
&client,
target_url,
target_url,
options.silent,
0,
) {
Ok((data, final_url, _media_type)) => {
base_url = final_url;
dom = html_to_dom(&String::from_utf8_lossy(&data));
@@ -132,60 +142,50 @@ fn main() {
process::exit(1);
}
let time_saved = time::now_utc();
// Embed remote assets
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
walk_and_embed_assets(
&mut cache,
&client,
&base_url,
&dom.document,
app_args.no_css,
app_args.no_fonts,
app_args.no_frames,
app_args.no_js,
app_args.no_images,
app_args.silent,
);
let mut html: String = stringify_document(
&dom.document,
app_args.no_css,
app_args.no_frames,
app_args.no_js,
app_args.no_images,
app_args.isolate,
);
if !app_args.no_metadata {
// Safe to unwrap (we just put this through an HTTP request)
let mut clean_url = Url::parse(&base_url).unwrap();
clean_url.set_fragment(None);
// Prevent credentials from getting into metadata
if is_http_url(&base_url) {
// Only HTTP(S) URLs may feature credentials
clean_url.set_username("").unwrap();
clean_url.set_password(None).unwrap();
}
let metadata_comment = if is_http_url(&base_url) {
format!(
"<!-- Saved from {} at {} using {} v{} -->\n",
&clean_url,
time_saved.rfc3339(),
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
} else {
format!(
"<!-- Saved from local source at {} using {} v{} -->\n",
time_saved.rfc3339(),
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
};
html.insert_str(0, &metadata_comment);
// Take care of BASE tag
if is_http_url(base_url.clone()) && !has_base_tag(&dom.document) {
dom = add_base_tag(&dom.document, base_url.clone());
}
// Request and embed /favicon.ico (unless it's already linked in the document)
if !options.no_images && is_http_url(target_url) && !has_favicon(&dom.document) {
let favicon_ico_url: String = resolve_url(&base_url, "/favicon.ico").unwrap();
match retrieve_asset(
&mut cache,
&client,
&base_url,
&favicon_ico_url,
options.silent,
0,
) {
Ok((data, final_url, media_type)) => {
let favicon_data_url: String = data_to_data_url(&media_type, &data, &final_url);
dom = add_favicon(&dom.document, favicon_data_url);
}
Err(_) => {
// Failed to retrieve favicon.ico
}
}
}
// Serialize DOM tree
let mut result: String = stringify_document(&dom.document, &options);
// Add metadata tag
if !options.no_metadata {
let metadata_comment: String = metadata_tag(&base_url);
result.insert_str(0, &metadata_comment);
if metadata_comment.len() > 0 {
result.insert_str(metadata_comment.len(), "\n");
}
}
// Write result into stdout or file
output
.writeln_str(&html)
.writeln_str(&result)
.expect("Could not write HTML output");
}

View File

@@ -1,7 +1,7 @@
use clap::{App, Arg};
#[derive(Default)]
pub struct AppArgs {
pub struct Options {
pub target: String,
pub no_css: bool,
pub no_fonts: bool,
@@ -17,16 +17,25 @@ pub struct AppArgs {
pub no_metadata: bool,
}
const ASCII: &str = " \
_____ ______________ __________ ___________________ ___
| \\ / \\ | | | | | |
| \\_/ __ \\_| __ | | ___ ___ |__| |
| | | | | | | | | | | |
| |\\ /| |__| _ |__| |____| | | | | __ |
| | \\___/ | | \\ | | | | | | |
|___| |__________| \\_____________________| |___| |___| |___|
";
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
const DEFAULT_USER_AGENT: &str =
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
impl AppArgs {
pub fn get() -> AppArgs {
impl Options {
pub fn from_args() -> Options {
let app = App::new(env!("CARGO_PKG_NAME"))
.version(crate_version!())
.author(crate_authors!("\n"))
.about(crate_description!())
.about(format!("{}\n{}", ASCII, crate_description!()).as_str())
.arg(
Arg::with_name("target")
.required(true)
@@ -34,7 +43,7 @@ impl AppArgs {
.index(1)
.help("URL or file path"),
)
// .args_from_usage("-a, --include-audio 'Removes audio sources'")
// .args_from_usage("-a, --no-audio 'Removes audio sources'")
.args_from_usage("-c, --no-css 'Removes CSS'")
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
.args_from_usage("-F, --no-fonts 'Removes fonts'")
@@ -43,37 +52,39 @@ impl AppArgs {
.args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Excludes metadata information from the document'")
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
.args_from_usage("-o, --output=[document.html] 'Write output to <file>'")
.args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
// .args_from_usage("-v, --include-video 'Removes video sources'")
.args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'")
// .args_from_usage("-v, --no-video 'Removes video sources'")
.get_matches();
let mut app_args = AppArgs::default();
let mut options: Options = Options::default();
// Process the command
app_args.target = app
options.target = app
.value_of("target")
.expect("please set target")
.to_string();
app_args.no_css = app.is_present("no-css");
app_args.no_fonts = app.is_present("no-fonts");
app_args.no_frames = app.is_present("no-frames");
app_args.no_images = app.is_present("no-images");
app_args.no_js = app.is_present("no-js");
app_args.insecure = app.is_present("insecure");
app_args.no_metadata = app.is_present("no-metadata");
app_args.isolate = app.is_present("isolate");
app_args.silent = app.is_present("silent");
app_args.timeout = app
options.no_css = app.is_present("no-css");
options.no_frames = app.is_present("no-frames");
options.no_fonts = app.is_present("no-fonts");
options.no_images = app.is_present("no-images");
options.isolate = app.is_present("isolate");
options.no_js = app.is_present("no-js");
options.insecure = app.is_present("insecure");
options.no_metadata = app.is_present("no-metadata");
options.output = app.value_of("output").unwrap_or("").to_string();
options.silent = app.is_present("silent");
options.timeout = app
.value_of("timeout")
.unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string())
.parse::<u64>()
.unwrap();
app_args.output = app.value_of("output").unwrap_or("").to_string();
app_args.user_agent = app
options.user_agent = app
.value_of("user-agent")
.unwrap_or(DEFAULT_USER_AGENT)
.to_string();
app_args
options
}
}

View File

@@ -128,13 +128,41 @@ mod passing {
Ok(())
}
#[test]
fn remove_fonts_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-F")
.arg("data:text/html,<style>@font-face { font-family: myFont; src: url(font.woff); }</style>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no web fonts
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"font-src 'none';\"></meta>\
<style></style>\
</head><body>Hi</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-f")
.arg("data:text/html,<iframe src=\"https://google.com\"></iframe>Hi")
.arg("data:text/html,<iframe src=\"https://duckduckgo.com\"></iframe>Hi")
.output()
.unwrap();
@@ -142,7 +170,7 @@ mod passing {
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
);
@@ -259,8 +287,8 @@ mod passing {
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{cwd}/src/tests/data/basic/local-file.html\n\
{file}{cwd}/src/tests/data/basic/local-style.css\n\
{file}{cwd}/src/tests/data/basic/local-file.html\n \
{file}{cwd}/src/tests/data/basic/local-style.css\n \
{file}{cwd}/src/tests/data/basic/local-script.js\n\
",
file = file_url_protocol,
@@ -461,7 +489,7 @@ mod passing {
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n\
{file}{html_path}\n \
{file}{svg_path}\n\
",
file = file_url_prefix,
@@ -512,9 +540,9 @@ mod passing {
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n\
{file}{css_path}\n\
{file}{css_path}\n\
{file}{html_path}\n \
{file}{css_path}\n \
{file}{css_path}\n \
{file}{css_path}\n\
",
file = file_url_prefix,

View File

@@ -7,17 +7,36 @@
#[cfg(test)]
mod passing {
use crate::css;
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::css;
use crate::opts::Options;
#[test]
fn empty_input() {
let cache = &mut HashMap::new();
let client = Client::new();
let options = Options::default();
assert_eq!(css::embed_css(cache, &client, "", "", &options, 0), "");
}
#[test]
fn trim_if_empty() {
let cache = &mut HashMap::new();
let client = Client::new();
let options = Options::default();
assert_eq!(
css::embed_css(cache, &client, "", "", false, false, false,),
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
"\t \t ",
&options,
0,
),
""
);
}
@@ -26,6 +45,9 @@ mod passing {
fn style_exclude_unquoted_images() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
const STYLE: &str = "/* border: none;*/\
background-image: url(https://somewhere.com/bg.png); \
@@ -41,9 +63,8 @@ mod passing {
&client,
"https://doesntmatter.local/",
&STYLE,
false,
true,
true,
&options,
0,
),
format!(
"/* border: none;*/\
@@ -62,6 +83,9 @@ mod passing {
fn style_exclude_single_quoted_images() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
const STYLE: &str = "/* border: none;*/\
background-image: url('https://somewhere.com/bg.png'); \
@@ -72,7 +96,7 @@ mod passing {
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(cache, &client, "", &STYLE, false, true, true,),
css::embed_css(cache, &client, "", &STYLE, &options, 0),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
@@ -90,6 +114,8 @@ mod passing {
fn style_block() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
#id.class-name:not(:nth-child(3n+0)) {\n \
@@ -100,7 +126,7 @@ mod passing {
html > body {}";
assert_eq!(
css::embed_css(cache, &client, "file:///", &CSS, false, false, true,),
css::embed_css(cache, &client, "file:///", &CSS, &options, 0),
CSS
);
}
@@ -109,6 +135,8 @@ mod passing {
fn attribute_selectors() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
[data-value] {
@@ -140,16 +168,15 @@ mod passing {
}
";
assert_eq!(
css::embed_css(cache, &client, "", &CSS, false, false, false,),
CSS
);
assert_eq!(css::embed_css(cache, &client, "", &CSS, &options, 0), CSS);
}
#[test]
fn import_string() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
@charset 'UTF-8';\n\
@@ -165,9 +192,8 @@ mod passing {
&client,
"https://doesntmatter.local/",
&CSS,
false,
false,
true,
&options,
0,
),
"\
@charset 'UTF-8';\n\
@@ -183,6 +209,8 @@ mod passing {
fn hash_urls() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
body {\n \
@@ -200,9 +228,8 @@ mod passing {
&client,
"https://doesntmatter.local/",
&CSS,
false,
false,
true,
&options,
0,
),
CSS
);
@@ -212,6 +239,8 @@ mod passing {
fn transform_percentages_and_degrees() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
div {\n \
@@ -227,9 +256,8 @@ mod passing {
&client,
"https://doesntmatter.local/",
&CSS,
false,
false,
true,
&options,
0,
),
CSS
);
@@ -239,6 +267,8 @@ mod passing {
fn unusual_indents() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
.is\\:good:hover {\n \
@@ -256,9 +286,8 @@ mod passing {
&client,
"https://doesntmatter.local/",
&CSS,
false,
false,
true,
&options,
0,
),
CSS
);
@@ -268,6 +297,9 @@ mod passing {
fn exclude_fonts() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.no_fonts = true;
options.silent = true;
const CSS: &str = "\
@font-face {\n \
@@ -309,9 +341,8 @@ mod passing {
&client,
"https://doesntmatter.local/",
&CSS,
true,
false,
true,
&options,
0,
),
CSS_OUT
);

View File

@@ -0,0 +1,23 @@
<!doctype html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Attempt to import CSS via data URL asset</title>
<style>
body {
background-color: white;
color: black;
}
</style>
<link href="data:text/css;base64,QGltcG9ydCAic3R5bGUuY3NzIjsK" rel="stylesheet" type="text/css" />
</head>
<body>
<p>If you see pink background with white foreground then were in trouble</p>
</body>
</html>

View File

@@ -0,0 +1,4 @@
body {
background-color: pink;
color: white;
}

View File

@@ -0,0 +1,19 @@
<!doctype html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="Content-Security-Policy" content="default-src 'unsafe-inline' file:;" />
<title>Local HTML file</title>
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-IWaCTORHkRhOWzcZeILSVmV6V6gPTHgNem6o6rsFAyaKTieDFkeeMrWjtO0DuWrX3bqZY46CVTZXUu0mia0qXQ==" crossorigin="anonymous" />
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-vWBzl4NE9oIg8NFOPAyOZbaam0UXWr6aDHPaY2kodSzAFl+mKoj/RMNc6C31NDqK4mE2i68IWxYWqWJPLCgPOw==" crossorigin="anonymous" />
</head>
<body>
<p>This page should have black background and white foreground, but only when served via http: (not via file:)</p>
<script src="script.js" integrity="sha256-ecrEsYh3+ICCX8BCrNSotXgI5534282JwJjx8Q9ZWLc="></script>
<script src="script.js" integrity="sha256-6idk9dK0bOkVdG7Oz4/0YLXSJya8xZHqbRZKMhYrt6o="></script>
</body>
</html>

View File

@@ -0,0 +1,3 @@
function noop() {
console.log("monolith");
}

View File

@@ -0,0 +1,4 @@
body {
background-color: #000;
color: #FFF;
}

View File

@@ -0,0 +1,29 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use html5ever::serialize::{serialize, SerializeOpts};
use crate::html;
#[test]
fn basic() {
let html = "<div>text</div>";
let mut dom = html::html_to_dom(&html);
dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string());
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head><link rel=\"icon\" href=\"I_AM_A_FAVICON_DATA_URL\"></link></head><body><div>text</div></body></html>"
);
}
}

80
src/tests/html/csp.rs Normal file
View File

@@ -0,0 +1,80 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::html;
use crate::opts::Options;
#[test]
fn isolated() {
let mut options = Options::default();
options.isolate = true;
let csp_content = html::csp(&options);
assert_eq!(csp_content, "default-src 'unsafe-inline' data:;");
}
#[test]
fn no_css() {
let mut options = Options::default();
options.no_css = true;
let csp_content = html::csp(&options);
assert_eq!(csp_content, "style-src 'none';");
}
#[test]
fn no_fonts() {
let mut options = Options::default();
options.no_fonts = true;
let csp_content = html::csp(&options);
assert_eq!(csp_content, "font-src 'none';");
}
#[test]
fn no_frames() {
let mut options = Options::default();
options.no_frames = true;
let csp_content = html::csp(&options);
assert_eq!(csp_content, "frame-src 'none'; child-src 'none';");
}
#[test]
fn no_js() {
let mut options = Options::default();
options.no_js = true;
let csp_content = html::csp(&options);
assert_eq!(csp_content, "script-src 'none';");
}
#[test]
fn no_images() {
let mut options = Options::default();
options.no_images = true;
let csp_content = html::csp(&options);
assert_eq!(csp_content, "img-src data:;");
}
#[test]
fn all() {
let mut options = Options::default();
options.isolate = true;
options.no_css = true;
options.no_fonts = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
let csp_content = html::csp(&options);
assert_eq!(csp_content, "default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;");
}
}

View File

@@ -7,16 +7,21 @@
#[cfg(test)]
mod passing {
use crate::html;
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::html;
use crate::opts::Options;
#[test]
fn replace_with_empty_images() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small.png 1x, large.png 2x";
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, true, true);
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
assert_eq!(
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),

View File

@@ -7,9 +7,10 @@
#[cfg(test)]
mod passing {
use crate::html;
use html5ever::rcdom::{Handle, NodeData};
use crate::html;
#[test]
fn get_node_name() {
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";

View File

@@ -0,0 +1,52 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::html;
use crate::opts::Options;
#[test]
fn icon() {
let html = "<link rel=\"icon\" href=\"\" /><div>text</div>";
let dom = html::html_to_dom(&html);
let res: bool = html::has_favicon(&dom.document);
assert!(res);
}
#[test]
fn shortcut_icon() {
let html = "<link rel=\"shortcut icon\" href=\"\" /><div>text</div>";
let dom = html::html_to_dom(&html);
let res: bool = html::has_favicon(&dom.document);
assert!(res);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::html;
use crate::opts::Options;
#[test]
fn absent() {
let html = "<div>text</div>";
let dom = html::html_to_dom(&html);
let res: bool = html::has_favicon(&dom.document);
assert!(!res);
}
}

View File

@@ -23,16 +23,6 @@ mod passing {
fn icon_uppercase() {
assert!(html::is_icon("ICON"));
}
#[test]
fn mask_icon() {
assert!(html::is_icon("mask-icon"));
}
#[test]
fn fluid_icon() {
assert!(html::is_icon("fluid-icon"));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@@ -46,6 +36,16 @@ mod passing {
mod failing {
use crate::html;
#[test]
fn mask_icon() {
assert!(!html::is_icon("mask-icon"));
}
#[test]
fn fluid_icon() {
assert!(!html::is_icon("fluid-icon"));
}
#[test]
fn stylesheet() {
assert!(!html::is_icon("stylesheet"));

View File

@@ -0,0 +1,82 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use chrono::prelude::*;
use crate::html;
#[test]
fn http_url() {
let url = "http://192.168.1.1/";
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::metadata_tag(url);
assert_eq!(
metadata_comment,
format!(
"<!-- Saved from {} at {} using {} v{} -->",
&url,
timestamp,
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
);
}
#[test]
fn file_url() {
let url = "file:///home/monolith/index.html";
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::metadata_tag(url);
assert_eq!(
metadata_comment,
format!(
"<!-- Saved from local source at {} using {} v{} -->",
timestamp,
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
);
}
#[test]
fn data_url() {
let url = "data:text/html,Hello%2C%20World!";
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::metadata_tag(url);
assert_eq!(
metadata_comment,
format!(
"<!-- Saved from local source at {} using {} v{} -->",
timestamp,
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::html;
#[test]
fn empty_string() {
assert_eq!(html::metadata_tag(""), "");
}
}

View File

@@ -1,6 +1,10 @@
mod add_favicon;
mod csp;
mod embed_srcset;
mod get_node_name;
mod has_favicon;
mod has_proper_integrity;
mod is_icon;
mod metadata_tag;
mod stringify_document;
mod walk_and_embed_assets;

View File

@@ -8,27 +8,16 @@
#[cfg(test)]
mod passing {
use crate::html;
use crate::opts::Options;
#[test]
fn div_as_root_element() {
let html = "<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
let options = Options::default();
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
html::stringify_document(&dom.document, &options),
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
);
}
@@ -40,21 +29,13 @@ mod passing {
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = true;
let mut options = Options::default();
options.isolate = true;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
&options
),
"<html>\
<head>\
@@ -79,22 +60,11 @@ mod passing {
<link rel=\"stylesheet\" href=\"main.css\"/>\
<div style=\"display: none;\"></div>";
let dom = html::html_to_dom(&html);
let opt_no_css: bool = true;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
let mut options = Options::default();
options.no_css = true;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
html::stringify_document(&dom.document, &options),
"<!DOCTYPE html>\
<html>\
<head>\
@@ -114,26 +84,18 @@ mod passing {
<link rel=\"something\"/>\
<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
let mut options = Options::default();
options.no_frames = true;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
&options
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
<title>Frameless document</title>\
<link rel=\"something\">\
</head>\
@@ -149,31 +111,28 @@ mod passing {
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\" />\
<iframe src=\"some.html\"></iframe>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\" />\
<iframe src=\"some.html\"></iframe>\
</div>";
let dom = html::html_to_dom(&html);
let opt_isolate: bool = true;
let opt_no_css: bool = true;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
let mut options = Options::default();
options.isolate = true;
options.no_css = true;
options.no_fonts = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
&options
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; frame-src 'none';child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\

View File

@@ -7,11 +7,13 @@
#[cfg(test)]
mod passing {
use crate::html;
use html5ever::serialize::{serialize, SerializeOpts};
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::html;
use crate::opts::Options;
#[test]
fn basic() {
let cache = &mut HashMap::new();
@@ -20,27 +22,12 @@ mod passing {
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@@ -58,27 +45,12 @@ mod passing {
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@@ -96,27 +68,12 @@ mod passing {
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@@ -136,26 +93,13 @@ mod passing {
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = true;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let mut options = Options::default();
options.no_css = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@@ -182,27 +126,13 @@ mod passing {
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = true;
let opt_silent = true;
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@@ -211,15 +141,15 @@ mod passing {
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"<html>\
<head>\
<link rel=\"icon\">\
</head>\
<body>\
<div>\
<img src=\"{empty_image}\">\
</div>\
</body>\
</html>",
<head>\
<link rel=\"icon\">\
</head>\
<body>\
<div>\
<img src=\"{empty_image}\">\
</div>\
</body>\
</html>",
empty_image = empty_image!()
)
);
@@ -233,27 +163,13 @@ mod passing {
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = true;
let opt_silent = true;
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@@ -271,26 +187,13 @@ mod passing {
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let mut options = Options::default();
options.no_frames = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@@ -308,26 +211,13 @@ mod passing {
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let mut options = Options::default();
options.no_frames = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@@ -341,34 +231,20 @@ mod passing {
#[test]
fn no_js() {
let html = "<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = true;
let opt_no_images: bool = false;
let opt_silent = true;
let mut options = Options::default();
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@@ -381,33 +257,24 @@ mod passing {
}
#[test]
fn with_no_integrity() {
fn discards_integrity() {
let html = "<title>No integrity</title>\
<link integrity=\"sha384-...\" rel=\"something\"/>\
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let client = Client::new();
let opt_no_css: bool = true;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
let opt_silent = true;
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut options = Options::default();
options.no_css = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
@@ -415,8 +282,47 @@ mod passing {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
<body></body>\
</html>"
);
}
#[test]
fn removes_unwanted_meta_tags() {
let html = "<html>\
<head>\
<meta http-equiv=\"Refresh\" value=\"20\"/>\
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
</head>\
<body></body>\
</html>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<meta>\
<meta>\
</head>\
<body></body>\
</html>"
);
}

View File

@@ -3,4 +3,5 @@ mod css;
mod html;
mod js;
mod macros;
mod opts;
mod utils;

30
src/tests/opts.rs Normal file
View File

@@ -0,0 +1,30 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::opts::Options;
#[test]
fn defaults() {
let options: Options = Options::default();
assert_eq!(options.target, str!());
assert_eq!(options.no_css, false);
assert_eq!(options.no_frames, false);
assert_eq!(options.no_fonts, false);
assert_eq!(options.no_images, false);
assert_eq!(options.isolate, false);
assert_eq!(options.no_js, false);
assert_eq!(options.insecure, false);
assert_eq!(options.no_metadata, false);
assert_eq!(options.output, str!());
assert_eq!(options.silent, false);
assert_eq!(options.timeout, 0);
assert_eq!(options.user_agent, "");
}
}

View File

@@ -7,12 +7,12 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn removes_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot#iefix"),
url::clean_url("https://somewhere.com/font.eot#iefix"),
"https://somewhere.com/font.eot"
);
}
@@ -20,7 +20,7 @@ mod passing {
#[test]
fn removes_empty_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot#"),
url::clean_url("https://somewhere.com/font.eot#"),
"https://somewhere.com/font.eot"
);
}
@@ -28,7 +28,7 @@ mod passing {
#[test]
fn removes_empty_query_and_empty_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot?#"),
url::clean_url("https://somewhere.com/font.eot?#"),
"https://somewhere.com/font.eot"
);
}
@@ -36,7 +36,7 @@ mod passing {
#[test]
fn removes_empty_query_amp_and_empty_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot?a=b&#"),
url::clean_url("https://somewhere.com/font.eot?a=b&#"),
"https://somewhere.com/font.eot?a=b"
);
}
@@ -44,7 +44,7 @@ mod passing {
#[test]
fn keeps_credentials() {
assert_eq!(
utils::clean_url("https://cookie:monster@gibson.internet/"),
url::clean_url("https://cookie:monster@gibson.internet/"),
"https://cookie:monster@gibson.internet/"
);
}

View File

@@ -7,13 +7,13 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn encode_string_with_specific_media_type() {
let mime = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let data_url = utils::data_to_data_url(mime, data.as_bytes(), "", "");
let data_url = url::data_to_data_url(mime, data.as_bytes(), "");
assert_eq!(
&data_url,
@@ -24,8 +24,8 @@ mod passing {
#[test]
fn encode_append_fragment() {
let data = "<svg></svg>\n";
let data_url = utils::data_to_data_url("text/css", data.as_bytes(), "", "fragment");
let data_url = url::data_to_data_url("image/svg+xml", data.as_bytes(), "");
assert_eq!(&data_url, "data:text/css;base64,PHN2Zz48L3N2Zz4K#fragment");
assert_eq!(&data_url, "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K");
}
}

View File

@@ -7,11 +7,11 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn parse_text_html_base64() {
let (media_type, data) = utils::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
let (media_type, data) = url::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
assert_eq!(media_type, "text/html");
assert_eq!(
@@ -22,7 +22,7 @@ mod passing {
#[test]
fn parse_text_html_utf8() {
let (media_type, data) = utils::data_url_to_data(
let (media_type, data) = url::data_url_to_data(
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
);
@@ -35,7 +35,7 @@ mod passing {
#[test]
fn parse_text_html_plaintext() {
let (media_type, data) = utils::data_url_to_data(
let (media_type, data) = url::data_url_to_data(
"data:text/html,Work expands so as to fill the time available for its completion",
);
@@ -48,7 +48,7 @@ mod passing {
#[test]
fn parse_text_html_charset_utf_8_between_two_whitespaces() {
let (media_type, data) = utils::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
let (media_type, data) = url::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
assert_eq!(media_type, "text/html");
assert_eq!(
@@ -60,7 +60,7 @@ mod passing {
#[test]
fn parse_text_css_url_encoded() {
let (media_type, data) =
utils::data_url_to_data("data:text/css,div{background-color:%23000}");
url::data_url_to_data("data:text/css,div{background-color:%23000}");
assert_eq!(media_type, "text/css");
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
@@ -68,7 +68,7 @@ mod passing {
#[test]
fn parse_no_media_type_base64() {
let (media_type, data) = utils::data_url_to_data("data:;base64,dGVzdA==");
let (media_type, data) = url::data_url_to_data("data:;base64,dGVzdA==");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "test");
@@ -76,7 +76,7 @@ mod passing {
#[test]
fn parse_no_media_type_no_encoding() {
let (media_type, data) = utils::data_url_to_data("data:;,test%20test");
let (media_type, data) = url::data_url_to_data("data:;,test%20test");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "test test");
@@ -92,11 +92,11 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::utils;
use crate::url;
#[test]
fn just_word_data() {
let (media_type, data) = utils::data_url_to_data("data");
let (media_type, data) = url::data_url_to_data("data");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "");

View File

@@ -7,12 +7,12 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn decode_unicode_characters() {
assert_eq!(
utils::decode_url(str!(
url::decode_url(str!(
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
)),
"検ヒム解塗ゃッ = サ"
@@ -22,7 +22,7 @@ mod passing {
#[test]
fn decode_file_url() {
assert_eq!(
utils::decode_url(str!("file:///tmp/space%20here/test%231.html")),
url::decode_url(str!("file:///tmp/space%20here/test%231.html")),
"file:///tmp/space here/test#1.html"
);
}
@@ -30,7 +30,7 @@ mod passing {
#[test]
fn plus_sign() {
assert_eq!(
utils::decode_url(str!(
url::decode_url(str!(
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
)),
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"

View File

@@ -7,18 +7,18 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn remove_protocl_and_fragment() {
if cfg!(windows) {
assert_eq!(
utils::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"),
url::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"),
"C:\\documents\\some-path\\some-file.svg"
);
} else {
assert_eq!(
utils::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"),
url::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"),
"/tmp/some-path/some-file.svg"
);
}
@@ -28,12 +28,12 @@ mod passing {
fn decodes_urls() {
if cfg!(windows) {
assert_eq!(
utils::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
url::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
"C:\\Documents and Settings\\some-file.html"
);
} else {
assert_eq!(
utils::file_url_to_fs_path("file:///home/user/My%20Documents"),
url::file_url_to_fs_path("file:///home/user/My%20Documents"),
"/home/user/My Documents"
);
}

View File

@@ -7,12 +7,12 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn data_url() {
assert_eq!(
utils::get_url_fragment(
url::get_url_fragment(
"data:image/svg+xml;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h#test"
),
"test"
@@ -21,6 +21,6 @@ mod passing {
#[test]
fn https_empty() {
assert_eq!(utils::get_url_fragment("https://kernel.org#"), "");
assert_eq!(url::get_url_fragment("https://kernel.org#"), "");
}
}

View File

@@ -7,18 +7,18 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn data_url_text_html() {
assert!(utils::is_data_url(
assert!(url::is_data_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn data_url_no_media_type() {
assert!(utils::is_data_url(
assert!(url::is_data_url(
"data:;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
@@ -33,20 +33,20 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::utils;
use crate::url;
#[test]
fn https_url() {
assert!(!utils::is_data_url("https://kernel.org"));
assert!(!url::is_data_url("https://kernel.org"));
}
#[test]
fn no_protocol_url() {
assert!(!utils::is_data_url("//kernel.org"));
assert!(!url::is_data_url("//kernel.org"));
}
#[test]
fn empty_string() {
assert!(!utils::is_data_url(""));
assert!(!url::is_data_url(""));
}
}

View File

@@ -7,32 +7,32 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn unix_file_url() {
assert!(utils::is_file_url(
assert!(url::is_file_url(
"file:///home/user/Websites/my-website/index.html"
));
}
#[test]
fn windows_file_url() {
assert!(utils::is_file_url(
assert!(url::is_file_url(
"file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png"
));
}
#[test]
fn unix_url_with_backslashes() {
assert!(utils::is_file_url(
assert!(url::is_file_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html"
));
}
#[test]
fn windows_file_url_with_backslashes() {
assert!(utils::is_file_url(
assert!(url::is_file_url(
"file:\\\\\\C:\\Documents%20and%20Settings\\user\\Websites\\my-website\\assets\\images\\logo.png"
));
}
@@ -47,37 +47,37 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::utils;
use crate::url;
#[test]
fn url_with_no_protocl() {
assert!(!utils::is_file_url("//kernel.org"));
assert!(!url::is_file_url("//kernel.org"));
}
#[test]
fn dot_slash_filename() {
assert!(!utils::is_file_url("./index.html"));
assert!(!url::is_file_url("./index.html"));
}
#[test]
fn just_filename() {
assert!(!utils::is_file_url("some-local-page.htm"));
assert!(!url::is_file_url("some-local-page.htm"));
}
#[test]
fn https_ip_port_url() {
assert!(!utils::is_file_url("https://1.2.3.4:80/www/index.html"));
assert!(!url::is_file_url("https://1.2.3.4:80/www/index.html"));
}
#[test]
fn data_url() {
assert!(!utils::is_file_url(
assert!(!url::is_file_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn just_word_file() {
assert!(!utils::is_file_url("file"));
assert!(!url::is_file_url("file"));
}
}

View File

@@ -7,21 +7,21 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn http_url() {
assert!(utils::is_http_url("http://kernel.org"));
assert!(url::is_http_url("http://kernel.org"));
}
#[test]
fn https_url() {
assert!(utils::is_http_url("https://www.rust-lang.org/"));
assert!(url::is_http_url("https://www.rust-lang.org/"));
}
#[test]
fn http_url_with_backslashes() {
assert!(utils::is_http_url("http:\\\\freebsd.org\\"));
assert!(url::is_http_url("http:\\\\freebsd.org\\"));
}
}
@@ -34,31 +34,31 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::utils;
use crate::url;
#[test]
fn url_with_no_protocol() {
assert!(!utils::is_http_url("//kernel.org"));
assert!(!url::is_http_url("//kernel.org"));
}
#[test]
fn dot_slash_filename() {
assert!(!utils::is_http_url("./index.html"));
assert!(!url::is_http_url("./index.html"));
}
#[test]
fn just_filename() {
assert!(!utils::is_http_url("some-local-page.htm"));
assert!(!url::is_http_url("some-local-page.htm"));
}
#[test]
fn https_ip_port_url() {
assert!(!utils::is_http_url("ftp://1.2.3.4/www/index.html"));
assert!(!url::is_http_url("ftp://1.2.3.4/www/index.html"));
}
#[test]
fn data_url() {
assert!(!utils::is_http_url(
assert!(!url::is_http_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}

12
src/tests/url/mod.rs Normal file
View File

@@ -0,0 +1,12 @@
mod clean_url;
mod data_to_data_url;
mod data_url_to_data;
mod decode_url;
mod file_url_to_fs_path;
mod get_url_fragment;
mod is_data_url;
mod is_file_url;
mod is_http_url;
mod resolve_url;
mod url_has_protocol;
mod url_with_fragment;

View File

@@ -7,13 +7,14 @@
#[cfg(test)]
mod passing {
use crate::utils;
use url::ParseError;
use crate::url;
#[test]
fn from_https_to_level_up_relative() -> Result<(), ParseError> {
let resolved_url =
utils::resolve_url("https://www.kernel.org", "../category/signatures.html")?;
url::resolve_url("https://www.kernel.org", "../category/signatures.html")?;
assert_eq!(
resolved_url.as_str(),
@@ -25,7 +26,7 @@ mod passing {
#[test]
fn from_just_filename_to_full_https_url() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"saved_page.htm",
"https://www.kernel.org/category/signatures.html",
)?;
@@ -40,7 +41,7 @@ mod passing {
#[test]
fn from_https_url_to_url_with_no_protocol() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"https://www.kernel.org",
"//www.kernel.org/theme/images/logos/tux.png",
)?;
@@ -56,7 +57,7 @@ mod passing {
#[test]
fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() -> Result<(), ParseError>
{
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"https://www.kernel.org",
"//another-host.org/theme/images/logos/tux.png",
)?;
@@ -71,7 +72,7 @@ mod passing {
#[test]
fn from_https_url_to_relative_root_path() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"https://www.kernel.org/category/signatures.html",
"/theme/images/logos/tux.png",
)?;
@@ -86,7 +87,7 @@ mod passing {
#[test]
fn from_https_to_just_filename() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"https://www.w3schools.com/html/html_iframe.asp",
"default.asp",
)?;
@@ -101,7 +102,7 @@ mod passing {
#[test]
fn from_data_url_to_https() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"https://www.kernel.org/category/signatures.html",
)?;
@@ -116,7 +117,7 @@ mod passing {
#[test]
fn from_data_url_to_data_url() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
)?;
@@ -131,7 +132,7 @@ mod passing {
#[test]
fn from_file_url_to_relative_path() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"file:///home/user/Websites/my-website/index.html",
"assets/images/logo.png",
)
@@ -147,7 +148,7 @@ mod passing {
#[test]
fn from_file_url_to_relative_path_with_backslashes() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
"assets\\images\\logo.png",
)
@@ -163,7 +164,7 @@ mod passing {
#[test]
fn from_data_url_to_file_url() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"file:///etc/passwd",
)
@@ -176,7 +177,7 @@ mod passing {
#[test]
fn preserve_fragment() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"http://doesnt-matter.local/",
"css/fonts/fontmarvelous.svg#fontmarvelous",
)
@@ -193,9 +194,9 @@ mod passing {
#[test]
fn resolve_from_file_url_to_file_url() -> Result<(), ParseError> {
let resolved_url = if cfg!(windows) {
utils::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!())
url::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!())
} else {
utils::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!())
url::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!())
};
assert_eq!(
@@ -220,12 +221,12 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::utils;
use crate::url;
use url::ParseError;
#[test]
fn from_data_url_to_url_with_no_protocol() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
let resolved_url = url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"//www.w3schools.com/html/html_iframe.asp",
)

View File

@@ -7,50 +7,50 @@
#[cfg(test)]
mod passing {
use crate::utils;
use crate::url;
#[test]
fn mailto() {
assert!(utils::url_has_protocol(
assert!(url::url_has_protocol(
"mailto:somebody@somewhere.com?subject=hello"
));
}
#[test]
fn tel() {
assert!(utils::url_has_protocol("tel:5551234567"));
assert!(url::url_has_protocol("tel:5551234567"));
}
#[test]
fn ftp_no_slashes() {
assert!(utils::url_has_protocol("ftp:some-ftp-server.com"));
assert!(url::url_has_protocol("ftp:some-ftp-server.com"));
}
#[test]
fn ftp_with_credentials() {
assert!(utils::url_has_protocol(
assert!(url::url_has_protocol(
"ftp://user:password@some-ftp-server.com"
));
}
#[test]
fn javascript() {
assert!(utils::url_has_protocol("javascript:void(0)"));
assert!(url::url_has_protocol("javascript:void(0)"));
}
#[test]
fn http() {
assert!(utils::url_has_protocol("http://news.ycombinator.com"));
assert!(url::url_has_protocol("http://news.ycombinator.com"));
}
#[test]
fn https() {
assert!(utils::url_has_protocol("https://github.com"));
assert!(url::url_has_protocol("https://github.com"));
}
#[test]
fn mailto_uppercase() {
assert!(utils::url_has_protocol(
assert!(url::url_has_protocol(
"MAILTO:somebody@somewhere.com?subject=hello"
));
}
@@ -69,23 +69,23 @@ mod failing {
#[test]
fn url_with_no_protocol() {
assert!(!utils::url_has_protocol(
assert!(!url::url_has_protocol(
"//some-hostname.com/some-file.html"
));
}
#[test]
fn relative_path() {
assert!(!utils::url_has_protocol("some-hostname.com/some-file.html"));
assert!(!url::url_has_protocol("some-hostname.com/some-file.html"));
}
#[test]
fn relative_to_root_path() {
assert!(!utils::url_has_protocol("/some-file.html"));
assert!(!url::url_has_protocol("/some-file.html"));
}
#[test]
fn empty_string() {
assert!(!utils::url_has_protocol(""));
assert!(!url::url_has_protocol(""));
}
}

View File

@@ -0,0 +1,40 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn url_with_fragment_url() {
let url = "https://localhost.localdomain/path/";
let fragment = "test";
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(&assembled_url, "https://localhost.localdomain/path/#test");
}
#[test]
fn url_with_fragment_empty_url() {
let url = "https://localhost.localdomain/path/";
let fragment = "";
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(&assembled_url, "https://localhost.localdomain/path/");
}
#[test]
fn url_with_fragment_data_url() {
let url = "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K";
let fragment = "fragment";
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(
&assembled_url,
"data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K#fragment"
);
}
}

31
src/tests/utils/indent.rs Normal file
View File

@@ -0,0 +1,31 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::utils;
#[test]
fn zero() {
assert_eq!(utils::indent(0), "");
}
#[test]
fn one() {
assert_eq!(utils::indent(1), " ");
}
#[test]
fn two() {
assert_eq!(utils::indent(2), " ");
}
#[test]
fn three() {
assert_eq!(utils::indent(3), " ");
}
}

View File

@@ -1,13 +1,3 @@
mod clean_url;
mod data_to_data_url;
mod data_url_to_data;
mod decode_url;
mod detect_media_type;
mod file_url_to_fs_path;
mod get_url_fragment;
mod is_data_url;
mod is_file_url;
mod is_http_url;
mod resolve_url;
mod indent;
mod retrieve_asset;
mod url_has_protocol;

View File

@@ -7,11 +7,13 @@
#[cfg(test)]
mod passing {
use crate::utils;
use reqwest::blocking::Client;
use std::collections::HashMap;
use std::env;
use crate::url;
use crate::utils;
#[test]
fn read_data_url() {
let cache = &mut HashMap::new();
@@ -25,15 +27,16 @@ mod passing {
"data:text/html;base64,c291cmNl",
"data:text/html;base64,dGFyZ2V0",
false,
0,
)
.unwrap();
assert_eq!(
utils::data_to_data_url(&media_type, &data, &final_url, ""),
utils::data_to_data_url("text/html", "target".as_bytes(), "", "")
url::data_to_data_url(&media_type, &data, &final_url),
url::data_to_data_url("text/html", "target".as_bytes(), "")
);
assert_eq!(
final_url,
utils::data_to_data_url("text/html", "target".as_bytes(), "", "")
url::data_to_data_url("text/html", "target".as_bytes(), "")
);
assert_eq!(&media_type, "text/html");
}
@@ -61,9 +64,10 @@ mod passing {
cwd = cwd.to_str().unwrap()
),
false,
0,
)
.unwrap();
assert_eq!(utils::data_to_data_url("application/javascript", &data, &final_url, ""), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
assert_eq!(url::data_to_data_url("application/javascript", &data, &final_url), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
assert_eq!(
&final_url,
&format!(
@@ -84,10 +88,11 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::utils;
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::utils;
#[test]
fn read_local_file_with_data_url_parent() {
let cache = &mut HashMap::new();
@@ -100,6 +105,7 @@ mod failing {
"data:text/html;base64,SoUrCe",
"file:///etc/passwd",
false,
0,
) {
Ok((..)) => {
assert!(false);
@@ -122,6 +128,7 @@ mod failing {
"https://kernel.org/",
"file:///etc/passwd",
false,
0,
) {
Ok((..)) => {
assert!(false);

168
src/url.rs Normal file
View File

@@ -0,0 +1,168 @@
use base64;
use url::{form_urlencoded, ParseError, Url};
use crate::utils::detect_media_type;
pub fn clean_url<T: AsRef<str>>(input: T) -> String {
let mut url = Url::parse(input.as_ref()).unwrap();
// Clear fragment
url.set_fragment(None);
// Get rid of stray question mark
if url.query() == Some("") {
url.set_query(None);
}
// Remove empty trailing ampersand(s)
let mut result: String = url.to_string();
while result.ends_with("&") {
result.pop();
}
result
}
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String {
let media_type: String = if media_type.is_empty() {
detect_media_type(data, &url)
} else {
media_type.to_string()
};
format!("data:{};base64,{}", media_type, base64::encode(data))
}
pub fn data_url_to_data<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
let path: String = parsed_url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len());
let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
let text: String = decode_url(raw_data);
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut media_type: String = str!();
let mut encoding: &str = "";
let mut i: i8 = 0;
for item in &meta_data_items {
if i == 0 {
media_type = str!(item);
} else {
if item.eq_ignore_ascii_case("base64")
|| item.eq_ignore_ascii_case("utf8")
|| item.eq_ignore_ascii_case("charset=UTF-8")
{
encoding = item;
}
}
i = i + 1;
}
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
base64::decode(&text).unwrap_or(vec![])
} else {
text.as_bytes().to_vec()
};
(media_type, data)
}
pub fn decode_url(input: String) -> String {
let input: String = input.replace("+", "%2B");
form_urlencoded::parse(input.as_bytes())
.map(|(key, val)| {
[
key.to_string(),
if val.to_string().len() == 0 {
str!()
} else {
str!('=')
},
val.to_string(),
]
.concat()
})
.collect()
}
pub fn file_url_to_fs_path(url: &str) -> String {
if !is_file_url(url) {
return str!();
}
let cutoff_l = if cfg!(windows) { 8 } else { 7 };
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
let url_fragment = get_url_fragment(url);
if url_fragment != "" {
let max_len = fs_file_path.len() - 1 - url_fragment.len();
fs_file_path = fs_file_path[0..max_len].to_string();
}
if cfg!(windows) {
fs_file_path = fs_file_path.replace("/", "\\");
}
// File paths should not be %-encoded
decode_url(fs_file_path)
}
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
if Url::parse(url.as_ref()).unwrap().fragment() == None {
str!()
} else {
str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap())
}
}
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "data"))
.unwrap_or(false)
}
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "file"))
.unwrap_or(false)
}
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
.unwrap_or(false)
}
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
let result = if is_http_url(to.as_ref()) {
to.as_ref().to_string()
} else {
Url::parse(from.as_ref())?
.join(to.as_ref())?
.as_ref()
.to_string()
};
Ok(result)
}
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme().len() > 0))
.unwrap_or(false)
}
pub fn url_with_fragment(url: &str, fragment: &str) -> String {
let mut result = str!(&url);
if !fragment.is_empty() {
result += "#";
result += fragment;
}
result
}

View File

@@ -1,10 +1,12 @@
use base64;
use reqwest::blocking::Client;
use reqwest::header::CONTENT_TYPE;
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use url::{form_urlencoded, ParseError, Url};
use crate::url::{clean_url, data_url_to_data, file_url_to_fs_path, is_data_url, is_file_url};
const INDENT: &str = " ";
const MAGIC: [[&[u8]; 2]; 18] = [
// Image
@@ -38,26 +40,6 @@ const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
"text/plain",
];
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String {
let media_type: String = if media_type.is_empty() {
detect_media_type(data, &url)
} else {
media_type.to_string()
};
let hash: String = if fragment != "" {
format!("#{}", fragment)
} else {
str!()
};
format!(
"data:{};base64,{}{}",
media_type,
base64::encode(data),
hash
)
}
pub fn detect_media_type(data: &[u8], url: &str) -> String {
for item in MAGIC.iter() {
if data.starts_with(item[0]) {
@@ -72,159 +54,27 @@ pub fn detect_media_type(data: &[u8], url: &str) -> String {
str!()
}
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme().len() > 0))
.unwrap_or(false)
}
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "data"))
.unwrap_or(false)
}
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "file"))
.unwrap_or(false)
}
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
.unwrap_or(false)
}
pub fn is_plaintext_media_type(media_type: &str) -> bool {
PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
}
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
let result = if is_http_url(to.as_ref()) {
to.as_ref().to_string()
} else {
Url::parse(from.as_ref())?
.join(to.as_ref())?
.as_ref()
.to_string()
};
Ok(result)
}
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
if Url::parse(url.as_ref()).unwrap().fragment() == None {
str!()
} else {
str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap())
pub fn indent(level: u32) -> String {
let mut result = str!();
let mut l: u32 = level;
while l > 0 {
result += INDENT;
l -= 1;
}
}
pub fn clean_url<T: AsRef<str>>(input: T) -> String {
let mut url = Url::parse(input.as_ref()).unwrap();
// Clear fragment
url.set_fragment(None);
// Get rid of stray question mark
if url.query() == Some("") {
url.set_query(None);
}
// Remove empty trailing ampersand(s)
let mut result: String = url.to_string();
while result.ends_with("&") {
result.pop();
}
result
}
pub fn data_url_to_data<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
let path: String = parsed_url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len());
let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
let text: String = decode_url(raw_data);
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut media_type: String = str!();
let mut encoding: &str = "";
let mut i: i8 = 0;
for item in &meta_data_items {
if i == 0 {
media_type = str!(item);
} else {
if item.eq_ignore_ascii_case("base64")
|| item.eq_ignore_ascii_case("utf8")
|| item.eq_ignore_ascii_case("charset=UTF-8")
{
encoding = item;
}
}
i = i + 1;
}
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
base64::decode(&text).unwrap_or(vec![])
} else {
text.as_bytes().to_vec()
};
(media_type, data)
}
pub fn decode_url(input: String) -> String {
let input: String = input.replace("+", "%2B");
form_urlencoded::parse(input.as_bytes())
.map(|(key, val)| {
[
key.to_string(),
if val.to_string().len() == 0 {
str!()
} else {
str!('=')
},
val.to_string(),
]
.concat()
})
.collect()
}
pub fn file_url_to_fs_path(url: &str) -> String {
if !is_file_url(url) {
return str!();
}
let cutoff_l = if cfg!(windows) { 8 } else { 7 };
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
let url_fragment = get_url_fragment(url);
if url_fragment != "" {
let max_len = fs_file_path.len() - 1 - url_fragment.len();
fs_file_path = fs_file_path[0..max_len].to_string();
}
if cfg!(windows) {
fs_file_path = fs_file_path.replace("/", "\\");
}
// File paths should not be %-encoded
decode_url(fs_file_path)
}
pub fn retrieve_asset(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &str,
url: &str,
opt_silent: bool,
depth: u32,
) -> Result<(Vec<u8>, String, String), reqwest::Error> {
if url.len() == 0 {
// Provoke error
@@ -246,7 +96,7 @@ pub fn retrieve_asset(
let path = Path::new(&fs_file_path);
if path.exists() {
if !opt_silent {
eprintln!("{}", &url);
eprintln!("{}{}", indent(depth).as_str(), &url);
}
Ok((fs::read(&fs_file_path).expect(""), url.to_string(), str!()))
@@ -260,7 +110,7 @@ pub fn retrieve_asset(
if cache.contains_key(&cache_key) {
// URL is in cache, we get and return it
if !opt_silent {
eprintln!("{} (from cache)", &url);
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
}
Ok((
@@ -275,9 +125,9 @@ pub fn retrieve_asset(
if !opt_silent {
if url == res_url {
eprintln!("{}", &url);
eprintln!("{}{}", indent(depth).as_str(), &url);
} else {
eprintln!("{} -> {}", &url, &res_url);
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &res_url);
}
}