23 Commits

Author SHA1 Message Date
Sunshine
22a031af5d Merge pull request #256 from snshn/more-tests-fixes-and-improvements
More tests, fixes, improvements
2021-06-02 04:06:37 -10:00
Sunshine
6e6a60b305 Merge branch 'master' into more-tests-fixes-and-improvements 2021-06-02 04:01:41 -10:00
Sunshine
77d6022d84 bump version (2.4.1 -> 2.5.0) 2021-06-02 04:00:18 -10:00
Sunshine
5db19d1a3e update dependencies 2021-06-02 03:58:28 -10:00
Sunshine
a6e891b3c5 add more tests 2021-06-02 03:41:41 -10:00
Sunshine
d7a82a008b Merge pull request #260 from snshn/ie-css-hack-fix
Remove optional trailing space from CSS idents
2021-05-28 23:04:34 -10:00
Sunshine
2369a4dd3c remove optional trailing space from CSS idents 2021-05-28 12:03:19 -10:00
Sunshine
d27e53fb36 Merge pull request #259 from snshn/related-project-monk
Add Monk to related projects in README.md
2021-05-24 10:54:11 -10:00
Sunshine
2cb51477d2 add Monk to related projects in README.md 2021-05-24 01:47:19 -10:00
Sunshine
a308a20411 simplify code of CLI tests 2021-03-15 20:10:50 -10:00
Sunshine
a6ddf1c13a simplify code responsible for processing CSS 2021-03-14 19:42:57 -10:00
Sunshine
8256d17efd Merge pull request #253 from snshn/unwrap-noscript
Make possible to unwrap NOSCRIPT nodes
2021-03-11 22:43:28 -10:00
Sunshine
efa12935ba Merge pull request #254 from snshn/no-containers-md
Get rid of containers.md (now part of README.md)
2021-03-11 22:39:49 -10:00
Sunshine
7126a98023 Merge pull request #255 from snshn/pkgsrc
Add installation instructions using pkgsrc
2021-03-11 22:38:33 -10:00
Sunshine
c7ee3ec6e2 get rid of containers.md (now part of README.md) 2021-03-11 22:27:44 -10:00
Sunshine
c4218031e2 add installation instructions using pkgsrc 2021-03-11 22:26:32 -10:00
Sunshine
6f918f6c1c make possible to unwrap NOSCRIPT nodes 2021-03-11 18:18:39 -10:00
Sunshine
6ecda080e8 Merge pull request #252 from snshn/revamp
Revamp codebase
2021-03-11 14:25:10 -10:00
Sunshine
2e86ee67a5 revamp codebase 2021-03-11 14:15:18 -10:00
Sunshine
359616b901 Update README.md 2021-03-09 16:04:32 -10:00
Sunshine
ea2cdab330 Update README.md 2021-03-09 15:52:23 -10:00
Sunshine
4434823c46 Update README.md 2021-03-09 14:49:10 -10:00
Sunshine
e0a78ffc9d Update README.md 2021-03-09 13:31:15 -10:00
47 changed files with 2461 additions and 2637 deletions

1349
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
[package]
name = "monolith"
version = "2.4.1"
version = "2.5.0"
authors = [
"Sunshine <sunshine@uberspace.net>",
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
@@ -22,21 +22,20 @@ include = [
license = "CC0-1.0"
[dependencies]
atty = "0.2" # Used for highlighting network errors
atty = "0.2.14" # Used for highlighting network errors
base64 = "0.13.0"
chrono = "0.4.19" # Used for formatting creation timestamp
clap = "2.33.3"
cssparser = "0.28.1"
html5ever = "0.24.1"
regex = "1.4.3" # Used for parsing srcset
sha2 = "0.9.2" # Used for calculating checksums during integrity checks
url = "2.2.0"
regex = "1.5.4" # Used for parsing srcset and NOSCRIPT
sha2 = "0.9.5" # Used for calculating checksums during integrity checks
url = "2.2.2"
[dependencies.reqwest]
version = "0.11.0"
version = "0.11.3"
default-features = false
features = ["default-tls", "blocking", "gzip"]
[dev-dependencies]
assert_cmd = "1.0.2"
tempfile = "3.2.0"
assert_cmd = "1.0.4"

View File

@@ -22,21 +22,32 @@ If compared to saving websites with `wget -mpk`, this tool embeds all assets as
## Installation
### Using Cargo
#### Using [Cargo](https://crates.io/crates/monolith)
$ cargo install monolith
#### Via Homebrew (on macOS and GNU/Linux)
#### Via [Homebrew](https://formulae.brew.sh/formula/monolith) (macOS and GNU/Linux)
$ brew install monolith
#### Using Snapcraft (on GNU/Linux)
#### Using [Snapcraft](https://snapcraft.io/monolith) (GNU/Linux)
$ snap install monolith
#### Using Ports collection (on FreeBSD and TrueOS)
#### Using [FreeBSD packages](https://svnweb.freebsd.org/ports/head/www/monolith/) (FreeBSD)
$ pkg install monolith
#### Using [FreeBSD ports](https://www.freshports.org/www/monolith/) (FreeBSD)
$ cd /usr/ports/www/monolith/
$ make install clean
#### Using pre-built binaries (Windows, ARM-based devices, etc)
Every [release](https://github.com/Y2Z/monolith/releases) contains pre-built binaries for Windows, GNU/Linux, as well as platforms with non-standart CPU architecture.
#### Using [pkgsrc](https://pkgsrc.se/www/monolith) (NetBSD, OpenBSD, Haiku, etc)
$ cd /usr/pkgsrc/www/monolith
$ make install clean
#### Using [pre-built binaries](https://github.com/Y2Z/monolith/releases) (Windows, ARM-based devices, etc)
Every release contains pre-built binaries for Windows, GNU/Linux, as well as platforms with non-standart CPU architecture.
#### Using [containers](https://www.docker.com/)
$ docker build -t Y2Z/monolith .
$ sudo install -b utils/run-in-container.sh /usr/local/bin/monolith
#### From source
@@ -46,18 +57,13 @@ Dependency: `libssl-dev`
$ cd monolith
$ make install
#### Using Containers
$ docker build -t Y2Z/monolith .
$ sudo install -b utils/run-in-container.sh /usr/local/bin/monolith
---------------------------------------------------
## Usage
$ monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roads-lyrics.html
or
$ cat index.html | monolith -aIiFfcMv - > index-processed.html
$ cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html
---------------------------------------------------
@@ -73,10 +79,11 @@ or
- `-j`: Exclude JavaScript
- `-k`: Accept invalid X.509 (TLS) certificates
- `-M`: Don't add timestamp and URL information
- `-n`: Extract contents of NOSCRIPT elements
- `-o`: Write output to `file`
- `-s`: Be quiet
- `-t`: Adjust `network request timeout`
- `-u`: Provide `custom User-Agent`
- `-u`: Provide custom `User-Agent`
- `-v`: Exclude videos
---------------------------------------------------
@@ -92,19 +99,16 @@ Please open an issue if something is wrong, that helps make this project better.
---------------------------------------------------
## Related projects
- `Monolith Chrome Extension`: https://github.com/rhysd/monolith-of-web
- `Pagesaver`: https://github.com/distributed-mind/pagesaver
- `Personal WayBack Machine`: https://github.com/popey/pwbm
- `Hako`: https://github.com/dmpop/hako
- Monolith Chrome Extension: https://github.com/rhysd/monolith-of-web
- Pagesaver: https://github.com/distributed-mind/pagesaver
- Personal WayBack Machine: https://github.com/popey/pwbm
- Hako: https://github.com/dmpop/hako
- Monk: https://gitlab.com/fisherdarling/monk
---------------------------------------------------
## License
<a href="http://creativecommons.org/publicdomain/zero/1.0/">
<img src="http://i.creativecommons.org/p/zero/1.0/88x31.png" alt="CC0-1.0" />
</a>
<br />
To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide.
This software is distributed without any warranty.

View File

@@ -1,15 +0,0 @@
1. Run `docker build -t y2z/monolith .` to create a Docker image
2. Create a file named `monolith` which contains:
```sh
#!/bin/sh
docker run --rm \
y2z/monolith \
monolith \
"$@"
```
3. Make the file executable (`chmod +x monolith`) and include it into your `$PATH`
4. Now you should be able to run a containerized build of monolith like this:
`monolith -I https://github.com > document.html`

View File

@@ -1,9 +1,12 @@
use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
use cssparser::{
serialize_identifier, serialize_string, ParseError, Parser, ParserInput, SourcePosition, Token,
};
use reqwest::blocking::Client;
use std::collections::HashMap;
use url::Url;
use crate::opts::Options;
use crate::url::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_with_fragment};
use crate::url::{create_data_url, resolve_url};
use crate::utils::retrieve_asset;
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
@@ -26,7 +29,44 @@ const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
"suffix",
"symbols",
];
const CSS_SPECIAL_CHARS: &'static str = "~!@$%^&*()+=,./'\";:?><[]{}|`#";
pub fn embed_css(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
document_url: &Url,
css: &str,
options: &Options,
depth: u32,
) -> String {
let mut input = ParserInput::new(&css);
let mut parser = Parser::new(&mut input);
process_css(
cache,
client,
document_url,
&mut parser,
options,
depth,
"",
"",
"",
)
.unwrap()
}
pub fn format_ident(ident: &str) -> String {
let mut res: String = String::new();
let _ = serialize_identifier(ident, &mut res);
res = res.trim_end().to_string();
res
}
pub fn format_quoted_string(string: &str) -> String {
let mut res: String = String::new();
let _ = serialize_string(string, &mut res);
res
}
pub fn is_image_url_prop(prop_name: &str) -> bool {
CSS_PROPS_WITH_IMAGE_URLS
@@ -35,30 +75,10 @@ pub fn is_image_url_prop(prop_name: &str) -> bool {
.is_some()
}
pub fn enquote(input: String, double: bool) -> String {
if double {
format!("\"{}\"", input.replace("\"", "\\\""))
} else {
format!("'{}'", input.replace("'", "\\'"))
}
}
pub fn escape(value: &str) -> String {
let mut res = str!(&value);
res = res.replace("\\", "\\\\");
for c in CSS_SPECIAL_CHARS.chars() {
res = res.replace(c, format!("\\{}", c).as_str());
}
res
}
pub fn process_css<'a>(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &str,
document_url: &Url,
parser: &mut Parser,
options: &Options,
depth: u32,
@@ -112,7 +132,7 @@ pub fn process_css<'a>(
process_css(
cache,
client,
parent_url,
document_url,
parser,
options,
depth,
@@ -143,7 +163,7 @@ pub fn process_css<'a>(
Token::Ident(ref value) => {
curr_rule = str!();
curr_prop = str!(value);
result.push_str(&escape(value));
result.push_str(&format_ident(value));
}
// @import, @font-face, @charset, @media...
Token::AtKeyword(ref value) => {
@@ -164,23 +184,22 @@ pub fn process_css<'a>(
curr_rule = str!();
// Skip empty import values
if value.len() < 1 {
if value.len() == 0 {
result.push_str("''");
continue;
}
let import_full_url = resolve_url(&parent_url, value).unwrap_or_default();
let import_url_fragment = get_url_fragment(import_full_url.clone());
let import_full_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
cache,
client,
&parent_url,
&document_url,
&import_full_url,
options,
depth + 1,
) {
Ok((import_contents, import_final_url, _import_media_type)) => {
let import_data_url = data_to_data_url(
let mut import_data_url = create_data_url(
"text/css",
embed_css(
cache,
@@ -193,63 +212,64 @@ pub fn process_css<'a>(
.as_bytes(),
&import_final_url,
);
let assembled_url: String = url_with_fragment(
import_data_url.as_str(),
import_url_fragment.as_str(),
import_data_url.set_fragment(import_full_url.fragment());
result.push_str(
format_quoted_string(&import_data_url.to_string()).as_str(),
);
result.push_str(enquote(assembled_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(import_full_url.clone()) {
let assembled_url: String = url_with_fragment(
import_full_url.as_str(),
import_url_fragment.as_str(),
if import_full_url.scheme() == "http"
|| import_full_url.scheme() == "https"
{
result.push_str(
format_quoted_string(&import_full_url.to_string()).as_str(),
);
result.push_str(enquote(assembled_url, false).as_str());
}
}
}
} else {
if func_name == "url" {
// Skip empty url()'s
if value.len() < 1 {
if value.len() == 0 {
continue;
}
if options.no_images && is_image_url_prop(curr_prop.as_str()) {
result.push_str(enquote(str!(empty_image!()), false).as_str());
result.push_str(format_quoted_string(empty_image!()).as_str());
} else {
let resolved_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(resolved_url.clone());
let resolved_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
cache,
client,
&parent_url,
&document_url,
&resolved_url,
options,
depth + 1,
) {
Ok((data, final_url, media_type)) => {
let data_url = data_to_data_url(&media_type, &data, &final_url);
let assembled_url: String =
url_with_fragment(data_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
let mut data_url =
create_data_url(&media_type, &data, &final_url);
data_url.set_fragment(resolved_url.fragment());
result.push_str(
format_quoted_string(&data_url.to_string()).as_str(),
);
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(resolved_url.clone()) {
let assembled_url: String = url_with_fragment(
resolved_url.as_str(),
url_fragment.as_str(),
if resolved_url.scheme() == "http"
|| resolved_url.scheme() == "https"
{
result.push_str(
format_quoted_string(&resolved_url.to_string())
.as_str(),
);
result.push_str(enquote(assembled_url, false).as_str());
}
}
}
}
} else {
result.push_str(enquote(str!(value), false).as_str());
result.push_str(format_quoted_string(value).as_str());
}
}
}
@@ -290,8 +310,9 @@ pub fn process_css<'a>(
Token::IDHash(ref value) => {
curr_rule = str!();
result.push_str("#");
result.push_str(&escape(value));
result.push_str(&format_ident(value));
}
// url()
Token::UnquotedUrl(ref value) => {
let is_import: bool = curr_rule == "import";
@@ -313,12 +334,17 @@ pub fn process_css<'a>(
result.push_str("url(");
if is_import {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
match retrieve_asset(cache, client, &parent_url, &full_url, options, depth + 1)
{
let full_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
cache,
client,
&document_url,
&full_url,
options,
depth + 1,
) {
Ok((css, final_url, _media_type)) => {
let data_url = data_to_data_url(
let mut data_url = create_data_url(
"text/css",
embed_css(
cache,
@@ -331,45 +357,42 @@ pub fn process_css<'a>(
.as_bytes(),
&final_url,
);
let assembled_url: String =
url_with_fragment(data_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
data_url.set_fragment(full_url.fragment());
result.push_str(format_quoted_string(&data_url.to_string()).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) {
let assembled_url: String =
url_with_fragment(full_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
if full_url.scheme() == "http" || full_url.scheme() == "https" {
result
.push_str(format_quoted_string(&full_url.to_string()).as_str());
}
}
}
} else {
if is_image_url_prop(curr_prop.as_str()) && options.no_images {
result.push_str(enquote(str!(empty_image!()), false).as_str());
result.push_str(format_quoted_string(empty_image!()).as_str());
} else {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
let full_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
cache,
client,
&parent_url,
&document_url,
&full_url,
options,
depth + 1,
) {
Ok((data, final_url, media_type)) => {
let data_url = data_to_data_url(&media_type, &data, &final_url);
let assembled_url: String =
url_with_fragment(data_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
let mut data_url = create_data_url(&media_type, &data, &final_url);
data_url.set_fragment(full_url.fragment());
result
.push_str(format_quoted_string(&data_url.to_string()).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) {
let assembled_url: String =
url_with_fragment(full_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
if full_url.scheme() == "http" || full_url.scheme() == "https" {
result.push_str(
format_quoted_string(&full_url.to_string()).as_str(),
);
}
}
}
@@ -377,6 +400,7 @@ pub fn process_css<'a>(
}
result.push_str(")");
}
// =
Token::Delim(ref value) => result.push_str(&value.to_string()),
Token::Function(ref name) => {
let function_name: &str = &name.clone();
@@ -388,7 +412,7 @@ pub fn process_css<'a>(
process_css(
cache,
client,
parent_url,
document_url,
parser,
options,
depth,
@@ -413,28 +437,3 @@ pub fn process_css<'a>(
Ok(result)
}
pub fn embed_css(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &str,
css: &str,
options: &Options,
depth: u32,
) -> String {
let mut input = ParserInput::new(&css);
let mut parser = Parser::new(&mut input);
process_css(
cache,
client,
parent_url,
&mut parser,
options,
depth,
"",
"",
"",
)
.unwrap()
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,21 +1,19 @@
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use std::collections::HashMap;
use std::env;
use std::fs;
use std::io::{self, prelude::*, Error, Write};
use std::path::Path;
use std::process;
use std::time::Duration;
use url::Url;
use monolith::html::{
add_favicon, create_metadata_tag, get_base_url, has_favicon, html_to_dom, set_base_url,
stringify_document, walk_and_embed_assets,
};
use monolith::opts::Options;
use monolith::url::{
data_to_data_url, is_data_url, is_file_url, is_http_url, parse_data_url, resolve_url,
};
use monolith::url::{create_data_url, parse_data_url, resolve_url};
use monolith::utils::retrieve_asset;
mod macros;
@@ -50,62 +48,87 @@ impl Output {
pub fn read_stdin() -> String {
let mut buffer = String::new();
for line in io::stdin().lock().lines() {
buffer += line.unwrap_or_default().as_str();
buffer += "\n";
}
buffer
}
fn main() {
let options = Options::from_args();
let original_target: &str = &options.target;
let target_url: &str;
let mut base_url: String;
let mut dom;
let mut use_stdin: bool = false;
let mut target: String = str!(&options.target.clone());
// Pre-process the input
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let path = Path::new(original_target);
let mut target: String = str!(original_target.clone()).replace("\\", "/");
let path_is_relative: bool = path.is_relative();
// Determine exact target URL
if target.clone().len() == 0 {
// Check if target was provided
if target.len() == 0 {
if !options.silent {
eprintln!("No target specified");
}
process::exit(1);
} else if target.clone() == "-" {
}
let target_url: Url;
let mut base_url: Url;
let mut use_stdin: bool = false;
// Determine exact target URL
if target.clone() == "-" {
// Read from pipe (stdin)
use_stdin = true;
// Default target URL to empty data URL; the user can control it via --base-url
target_url = "data:text/html,"
} else if is_http_url(target.clone()) || is_data_url(target.clone()) {
target_url = target.as_str();
} else if is_file_url(target.clone()) {
target_url = target.as_str();
} else if path.exists() {
if !path.is_file() {
if !options.silent {
eprintln!("Local target is not a file: {}", original_target);
}
process::exit(1);
}
target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" });
if path_is_relative {
target.insert_str(if cfg!(windows) { 8 } else { 7 }, &cwd_normalized);
target.insert_str(
if cfg!(windows) { 8 } else { 7 } + &cwd_normalized.len(),
"/",
);
}
target_url = target.as_str();
// Set default target URL to an empty data URL; the user can control it via --base-url
target_url = Url::parse("data:text/html,").unwrap();
} else {
target.insert_str(0, "http://");
target_url = target.as_str();
match Url::parse(&target.clone()) {
Ok(parsed_url) => {
if parsed_url.scheme() == "data"
|| parsed_url.scheme() == "file"
|| (parsed_url.scheme() == "http" || parsed_url.scheme() == "https")
{
target_url = parsed_url;
} else {
if !options.silent {
eprintln!("Unsupported target URL type: {}", &parsed_url.scheme());
}
process::exit(1);
}
}
Err(_err) => {
// Failed to parse given base URL,
// perhaps it's a filesystem path?
let path: &Path = Path::new(&target);
if path.exists() {
if path.is_file() {
match Url::from_file_path(fs::canonicalize(&path).unwrap()) {
Ok(file_url) => {
target_url = file_url;
}
Err(_err) => {
if !options.silent {
eprintln!(
"Could not generate file URL out of given path: {}",
"err"
);
}
process::exit(1);
}
}
} else {
if !options.silent {
eprintln!("Local target is not a file: {}", &options.target);
}
process::exit(1);
}
} else {
// Last chance, now we do what browsers do:
// prepend "http://" and hope it points to a website
target.insert_str(0, "http://");
target_url = Url::parse(&target).unwrap();
}
}
}
}
// Define output
@@ -123,7 +146,7 @@ fn main() {
let timeout: u64 = if options.timeout > 0 {
options.timeout
} else {
std::u64::MAX / 4
std::u64::MAX / 4 // This is pretty close to infinity
};
let client = Client::builder()
.timeout(Duration::from_secs(timeout))
@@ -133,13 +156,17 @@ fn main() {
.expect("Failed to initialize HTTP client");
// At this stage we assume that the base URL is the same as the target URL
base_url = str!(target_url);
base_url = target_url.clone();
let mut dom;
// Retrieve target document
if use_stdin {
dom = html_to_dom(&read_stdin());
} else if is_file_url(target_url) || is_http_url(target_url) {
match retrieve_asset(&mut cache, &client, target_url, target_url, &options, 0) {
} else if target_url.scheme() == "file"
|| (target_url.scheme() == "http" || target_url.scheme() == "https")
{
match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options, 0) {
Ok((data, final_url, _media_type)) => {
if options.base_url.clone().unwrap_or(str!()).is_empty() {
base_url = final_url
@@ -153,61 +180,97 @@ fn main() {
process::exit(1);
}
}
} else if is_data_url(target_url) {
let (media_type, data): (String, Vec<u8>) = parse_data_url(target_url);
} else if target_url.scheme() == "data" {
let (media_type, data): (String, Vec<u8>) = parse_data_url(&target_url);
if !media_type.eq_ignore_ascii_case("text/html") {
if !options.silent {
eprintln!("Unsupported data URL media type");
}
process::exit(1);
}
dom = html_to_dom(&String::from_utf8_lossy(&data));
} else {
process::exit(1);
}
// Use custom base URL if specified, read and use what's in the DOM otherwise
if !options.base_url.clone().unwrap_or(str!()).is_empty() {
if is_data_url(options.base_url.clone().unwrap()) {
if !options.silent {
eprintln!("Data URLs cannot be used as base URL");
}
process::exit(1);
} else {
base_url = options.base_url.clone().unwrap();
let b: String = options.base_url.clone().unwrap_or(str!());
if b.is_empty() {
// No custom base URL is specified,
// try to see if the document has BASE tag
if let Some(existing_base_url) = get_base_url(&dom.document) {
base_url = resolve_url(&target_url, &existing_base_url);
}
} else {
if let Some(existing_base_url) = get_base_url(&dom.document) {
base_url = resolve_url(target_url, existing_base_url).unwrap();
// Custom base URL provided
match Url::parse(&b) {
Ok(parsed_url) => {
if parsed_url.scheme() == "file" {
// File base URLs can only work with
// documents saved from filesystem
if target_url.scheme() == "file" {
base_url = parsed_url;
}
} else {
base_url = parsed_url;
}
}
Err(_) => {
// Failed to parse given base URL,
// perhaps it's a filesystem path?
if target_url.scheme() == "file" {
// Relative paths could work for documents saved from filesystem
let path: &Path = Path::new(&b);
if path.exists() {
match Url::from_file_path(fs::canonicalize(&path).unwrap()) {
Ok(file_url) => {
base_url = file_url;
}
Err(_) => {
if !options.silent {
eprintln!("Could not map given path to base URL: {}", b);
}
process::exit(1);
}
}
}
}
}
}
}
// Embed remote assets
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
// Update or add new BASE tag to reroute network requests and hash-links in the final document
// Update or add new BASE tag to reroute network requests
// and hash-links in the final document
if let Some(new_base_url) = options.base_url.clone() {
dom = set_base_url(&dom.document, new_base_url);
}
// Request and embed /favicon.ico (unless it's already linked in the document)
if !options.no_images && is_http_url(target_url) && !has_favicon(&dom.document) {
let favicon_ico_url: String = resolve_url(&base_url, "/favicon.ico").unwrap();
if !options.no_images
&& (target_url.scheme() == "http" || target_url.scheme() == "https")
&& !has_favicon(&dom.document)
{
let favicon_ico_url: Url = resolve_url(&base_url, "/favicon.ico");
match retrieve_asset(
&mut cache,
&client,
&base_url,
&target_url,
&favicon_ico_url,
&options,
0,
) {
Ok((data, final_url, media_type)) => {
let favicon_data_url: String = data_to_data_url(&media_type, &data, &final_url);
dom = add_favicon(&dom.document, favicon_data_url);
let favicon_data_url: Url = create_data_url(&media_type, &data, &final_url);
dom = add_favicon(&dom.document, favicon_data_url.to_string());
}
Err(_) => {
// Failed to retrieve favicon.ico
// Failed to retrieve /favicon.ico
}
}
}

View File

@@ -21,6 +21,7 @@ pub struct Options {
pub no_video: bool,
pub target: String,
pub no_color: bool,
pub unwrap_noscript: bool,
}
const ASCII: &'static str = " \
@@ -55,6 +56,9 @@ impl Options {
.args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
.args_from_usage(
"-n, --unwrap-noscript 'Replaces NOSCRIPT elements with their contents'",
)
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
.args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
@@ -100,6 +104,7 @@ impl Options {
} else {
options.user_agent = Some(DEFAULT_USER_AGENT.to_string());
}
options.unwrap_noscript = app.is_present("unwrap-noscript");
options.no_video = app.is_present("no-video");
options.no_color =

View File

@@ -12,8 +12,8 @@ mod passing {
use std::process::Command;
#[test]
fn add_new_when_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn add_new_when_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-b")
@@ -35,13 +35,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn keep_existing_when_none_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn keep_existing_when_none_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
@@ -61,13 +59,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn override_existing_when_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn override_existing_when_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-b")
@@ -89,13 +85,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_existing_when_empty_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn set_existing_to_empty_when_empty_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-b")
@@ -117,7 +111,5 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}

View File

@@ -9,13 +9,14 @@
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::io::Write;
use std::fs;
use std::path::Path;
use std::process::{Command, Stdio};
use tempfile::NamedTempFile;
use url::Url;
#[test]
fn print_version() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn print_version() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("-V").output().unwrap();
// STDOUT should contain program name and version
@@ -29,12 +30,10 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn stdin_target_input() -> Result<(), Box<dyn std::error::Error>> {
fn stdin_target_input() {
let mut echo = Command::new("echo")
.arg("Hello from STDIN")
.stdout(Stdio::piped())
@@ -43,70 +42,51 @@ mod passing {
let echo_out = echo.stdout.take().unwrap();
echo.wait().unwrap();
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
cmd.stdin(echo_out);
let out = cmd.arg("-M").arg("-").output().unwrap();
// STDOUT should contain HTML from STDIN
// STDOUT should contain HTML created out of STDIN
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body>Hello from STDIN\n</body></html>\n"
);
Ok(())
}
#[test]
fn css_import_string() -> Result<(), Box<dyn std::error::Error>> {
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut file_css = NamedTempFile::new()?;
writeln!(file_css, "body{{background-color:#000;color:#fff}}")?;
let mut file_html = NamedTempFile::new()?;
writeln!(
file_html,
"\
<style>\n\
@charset 'UTF-8';\n\
\n\
@import '{file}{css_path}';\n\
\n\
@import url({file}{css_path});\n\
\n\
@import url('{file}{css_path}')\n\
</style>\n\
",
file = file_url_prefix,
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
fn css_import_string() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("src/tests/data/css/index.html");
let path_css: &Path = Path::new("src/tests/data/css/style.css");
assert!(path_html.is_file());
assert!(path_css.is_file());
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
// STDOUT should contain embedded CSS url()'s
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><style>\n@charset 'UTF-8';\n\n@import 'data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K';\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K');\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K')\n</style>\n\n</head><body></body></html>\n"
"<html><head><style>\n\n @charset \"UTF-8\";\n\n @import \"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\";\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n</style>\n</head><body></body></html>\n"
);
// STDERR should list temporary files that got retrieved
// STDERR should list files that got retrieved
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n \
{file}{css_path}\n \
{file}{css_path}\n \
{file}{css_path}\n\
{file_url_html}\n \
{file_url_css}\n \
{file_url_css}\n \
{file_url_css}\n\
",
file = file_url_prefix,
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_css = Url::from_file_path(fs::canonicalize(&path_css).unwrap()).unwrap(),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}
@@ -124,8 +104,8 @@ mod failing {
use std::process::Command;
#[test]
fn bad_input_empty_target() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn bad_input_empty_target() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("").output().unwrap();
// STDOUT should be empty
@@ -139,7 +119,5 @@ mod failing {
// The exit code should be 1
out.assert().code(1);
Ok(())
}
}

View File

@@ -12,28 +12,8 @@ mod passing {
use std::process::Command;
#[test]
fn bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
// STDOUT should contain HTML
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"Unsupported data URL media type\n"
);
// The exit code should be 1
out.assert().code(1);
Ok(())
}
#[test]
fn isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn isolate_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-I")
@@ -54,13 +34,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn remove_css_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-c")
@@ -82,13 +60,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_fonts_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn remove_fonts_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-F")
@@ -110,13 +86,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn remove_frames_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-f")
@@ -137,13 +111,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn remove_images_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-i")
@@ -173,13 +145,11 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn remove_js_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-j")
@@ -203,14 +173,43 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
}
}
Ok(())
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
#[test]
fn bad_input_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
// STDOUT should contain HTML
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"Unsupported data URL media type\n"
);
// The exit code should be 1
out.assert().code(1);
}
#[test]
fn security_disallow_local_assets_within_data_url_targets(
) -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn security_disallow_local_assets_within_data_url_targets() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/basic/local-script.js\"%3E%3C/script%3E")
@@ -220,7 +219,7 @@ mod passing {
// STDOUT should contain HTML with no JS in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><script></script></head><body></body></html>\n"
"<html><head><script src=\"data:application/javascript;base64,\"></script></head><body></body></html>\n"
);
// STDERR should be empty
@@ -228,7 +227,5 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}

View File

@@ -9,13 +9,14 @@
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::io::Write;
use std::fs;
use std::path::Path;
use std::process::Command;
use tempfile::NamedTempFile;
use url::Url;
#[test]
fn local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn local_file_target_input_relative_target_path() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let out = cmd
@@ -36,7 +37,7 @@ mod passing {
<!DOCTYPE html><html lang=\"en\"><head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\" href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\">\n \
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\" rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
@@ -46,13 +47,15 @@ mod passing {
"
);
// STDERR should contain list of retrieved file URLs
// STDERR should contain list of retrieved file URLs, two missing
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{cwd}/src/tests/data/basic/local-file.html\n \
{file}{cwd}/src/tests/data/basic/local-style.css\n \
{file}{cwd}/src/tests/data/basic/local-style-does-not-exist.css (not found)\n \
{file}{cwd}/src/tests/data/basic/monolith.png (not found)\n \
{file}{cwd}/src/tests/data/basic/local-script.js\n\
",
file = file_url_protocol,
@@ -62,32 +65,19 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn local_file_target_input_absolute_target_path() -> Result<(), Box<dyn std::error::Error>> {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String = str!(cwd.to_str().unwrap()).replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn local_file_target_input_absolute_target_path() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("src/tests/data/basic/local-file.html");
let out = cmd
.arg("-M")
.arg("-jciI")
.arg(if cfg!(windows) {
format!(
"{cwd}\\src\\tests\\data\\basic\\local-file.html",
cwd = cwd.to_str().unwrap()
)
} else {
format!(
"{cwd}/src/tests/data/basic/local-file.html",
cwd = cwd.to_str().unwrap()
)
})
.arg("-Ijci")
.arg(path_html.as_os_str())
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDOUT should contain HTML from the local file
assert_eq!(
@@ -114,21 +104,18 @@ mod passing {
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
"{file_url_html}\n",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn local_file_url_target_input() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn local_file_url_target_input() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
@@ -173,72 +160,110 @@ mod passing {
);
// STDERR should contain list of retrieved file URLs
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
if cfg!(windows) {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
}
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn embed_file_url_local_asset_within_style_attribute() -> Result<(), Box<dyn std::error::Error>>
{
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut file_svg = NamedTempFile::new()?;
writeln!(file_svg, "<svg version=\"1.1\" baseProfile=\"full\" width=\"300\" height=\"200\" xmlns=\"http://www.w3.org/2000/svg\">\
<rect width=\"100%\" height=\"100%\" fill=\"red\" />\
<circle cx=\"150\" cy=\"100\" r=\"80\" fill=\"green\" />\
<text x=\"150\" y=\"125\" font-size=\"60\" text-anchor=\"middle\" fill=\"white\">SVG</text>\
</svg>\n")?;
let mut file_html = NamedTempFile::new()?;
writeln!(
file_html,
"<div style='background-image: url(\"{file}{path}\")'></div>\n",
file = file_url_prefix,
path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
// STDOUT should contain HTML with date URL for background-image in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><div style=\"background-image: url('data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iMTAwJSIgaGVpZ2h0PSIxMDAlIiBmaWxsPSJyZWQiIC8+PGNpcmNsZSBjeD0iMTUwIiBjeT0iMTAwIiByPSI4MCIgZmlsbD0iZ3JlZW4iIC8+PHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+PC9zdmc+Cgo=')\"></div>\n\n</body></html>\n"
);
// STDERR should list temporary files that got retrieved
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n \
{file}{svg_path}\n\
",
file = file_url_prefix,
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
svg_path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// The exit code should be 0
out.assert().code(0);
}
Ok(())
#[test]
fn embed_file_url_local_asset_within_style_attribute() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("src/tests/data/svg/index.html");
let path_svg: &Path = Path::new("src/tests/data/svg/image.svg");
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
// STDOUT should contain HTML with date URL for background-image in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><div style=\"background-image: url(&quot;data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=&quot;)\"></div>\n</body></html>\n"
);
// STDERR should list files that got retrieved
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// The exit code should be 0
out.assert().code(0);
}
#[test]
fn discard_integrity_for_local_files() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
let out = cmd
.arg("-M")
.arg("-i")
.arg(if cfg!(windows) {
format!(
"{file}{cwd}/src/tests/data/integrity/index.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/src/tests/data/integrity/index.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
})
.output()
.unwrap();
// STDOUT should contain HTML from the local file; integrity attributes should be missing
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\n \
<title>Local HTML file</title>\n \
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNGRkY7Cn0K\" rel=\"stylesheet\" type=\"text/css\" crossorigin=\"anonymous\">\n \
<link href=\"style.css\" rel=\"stylesheet\" type=\"text/css\" crossorigin=\"anonymous\">\n</head>\n\n<body>\n \
<p>This page should have black background and white foreground, but only when served via http: (not via file:)</p>\n \
<script src=\"data:application/javascript;base64,ZnVuY3Rpb24gbm9vcCgpIHsKICAgIGNvbnNvbGUubG9nKCJtb25vbGl0aCIpOwp9Cg==\"></script>\n \
<script src=\"script.js\"></script>\n\n\n\n\
</body></html>\n\
"
)
);
// STDERR should contain list of retrieved file URLs
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{cwd}/src/tests/data/integrity/index.html\n \
{file}{cwd}/src/tests/data/integrity/style.css\n \
{file}{cwd}/src/tests/data/integrity/style.css\n \
{file}{cwd}/src/tests/data/integrity/script.js\n \
{file}{cwd}/src/tests/data/integrity/script.js\n\
",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// The exit code should be 0
out.assert().code(0);
}
}

View File

@@ -2,4 +2,5 @@ mod base_url;
mod basic;
mod data_url;
mod local_files;
mod noscript;
mod unusual_encodings;

170
src/tests/cli/noscript.rs Normal file
View File

@@ -0,0 +1,170 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::fs;
use std::path::Path;
use std::process::Command;
use url::Url;
#[test]
fn parse_noscript_contents() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("src/tests/data/noscript/index.html");
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
// STDOUT should contain HTML with no CSS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><noscript><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"></noscript>\n</body></html>\n"
);
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// The exit code should be 0
out.assert().code(0);
}
#[test]
fn unwrap_noscript_contents() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("src/tests/data/noscript/index.html");
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
// STDOUT should contain HTML with no CSS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript-->\n</body></html>\n"
);
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// The exit code should be 0
out.assert().code(0);
}
#[test]
fn unwrap_noscript_contents_nested() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("src/tests/data/noscript/nested.html");
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
// STDOUT should contain HTML with no CSS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><!--noscript--><h1>JS is not active</h1><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript--><!--/noscript-->\n</body></html>\n"
);
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// The exit code should be 0
out.assert().code(0);
}
#[test]
fn unwrap_noscript_contents_with_script() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("src/tests/data/noscript/script.html");
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
// STDOUT should contain HTML with no CSS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html>\
<head></head>\
<body>\
<!--noscript-->\
<img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\">\
<!--/noscript-->\n\
</body>\
</html>\n"
);
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file_url_html}\n \
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// The exit code should be 0
out.assert().code(0);
}
#[test]
fn unwrap_noscript_contents_attr_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-n")
.arg("data:text/html,<noscript class=\"\">test</noscript>")
.output()
.unwrap();
// STDOUT should contain unwrapped contents of NOSCRIPT element
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><!--noscript class=\"\"-->test<!--/noscript--></head><body></body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
}
}

View File

@@ -12,10 +12,10 @@ mod passing {
use std::process::Command;
#[test]
fn change_encoding_to_utf_8() -> Result<(), Box<dyn std::error::Error>> {
fn change_encoding_to_utf_8() {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String = str!(cwd.to_str().unwrap()).replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg(if cfg!(windows) {
@@ -30,7 +30,14 @@ mod passing {
// STDOUT should contain newly added base URL
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\n <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n </head>\n <body>\n © Some Company\n \n\n</body></html>\n"
"<html>\
<head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
</head>\n \
<body>\n \
© Some Company\n \
\n\n</body>\
</html>\n"
);
// STDERR should contain only the target file
@@ -45,7 +52,5 @@ mod passing {
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}

View File

@@ -8,6 +8,7 @@
#[cfg(test)]
mod passing {
use reqwest::blocking::Client;
use reqwest::Url;
use std::collections::HashMap;
use crate::css;
@@ -17,26 +18,24 @@ mod passing {
fn empty_input() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("data:,").unwrap();
let options = Options::default();
assert_eq!(css::embed_css(cache, &client, "", "", &options, 0), "");
assert_eq!(
css::embed_css(cache, &client, &document_url, "", &options, 0),
""
);
}
#[test]
fn trim_if_empty() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let options = Options::default();
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
"\t \t ",
&options,
0,
),
css::embed_css(cache, &client, &document_url, "\t \t ", &options, 0,),
""
);
}
@@ -45,6 +44,7 @@ mod passing {
fn style_exclude_unquoted_images() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
@@ -58,18 +58,11 @@ mod passing {
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&STYLE,
&options,
0,
),
css::embed_css(cache, &client, &document_url, &STYLE, &options, 0,),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
list-style: url('{empty_image}');\
background-image: url(\"{empty_image}\"); \
list-style: url(\"{empty_image}\");\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
@@ -83,6 +76,7 @@ mod passing {
fn style_exclude_single_quoted_images() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("data:,").unwrap();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
@@ -96,11 +90,11 @@ mod passing {
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(cache, &client, "", &STYLE, &options, 0),
css::embed_css(cache, &client, &document_url, &STYLE, &options, 0),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
list-style: url('{empty_image}');\
background-image: url(\"{empty_image}\"); \
list-style: url(\"{empty_image}\");\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
@@ -114,19 +108,20 @@ mod passing {
fn style_block() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("file:///").unwrap();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
#id.class-name:not(:nth-child(3n+0)) {\n \
// border: none;\n \
background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=');\n\
background-image: url(\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\");\n\
}\n\
\n\
html > body {}";
assert_eq!(
css::embed_css(cache, &client, "file:///", &CSS, &options, 0),
css::embed_css(cache, &client, &document_url, &CSS, &options, 0),
CSS
);
}
@@ -135,6 +130,7 @@ mod passing {
fn attribute_selectors() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.silent = true;
@@ -143,38 +139,42 @@ mod passing {
/* Attribute exists */
}
[data-value='foo'] {
[data-value=\"foo\"] {
/* Attribute has this exact value */
}
[data-value*='foo'] {
[data-value*=\"foo\"] {
/* Attribute value contains this value somewhere in it */
}
[data-value~='foo'] {
[data-value~=\"foo\"] {
/* Attribute has this value in a space-separated list somewhere */
}
[data-value^='foo'] {
[data-value^=\"foo\"] {
/* Attribute value starts with this */
}
[data-value|='foo'] {
[data-value|=\"foo\"] {
/* Attribute value starts with this in a dash-separated list */
}
[data-value$='foo'] {
[data-value$=\"foo\"] {
/* Attribute value ends with this */
}
";
assert_eq!(css::embed_css(cache, &client, "", &CSS, &options, 0), CSS);
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0),
CSS
);
}
#[test]
fn import_string() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.silent = true;
@@ -187,20 +187,13 @@ mod passing {
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
"\
@charset 'UTF-8';\n\
@charset \"UTF-8\";\n\
\n\
@import 'data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9';\n\
@import \"data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9\";\n\
\n\
@import url('data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==')\n\
@import url(\"data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==\")\n\
"
);
}
@@ -209,6 +202,7 @@ mod passing {
fn hash_urls() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.silent = true;
@@ -223,14 +217,7 @@ mod passing {
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
CSS
);
}
@@ -239,6 +226,7 @@ mod passing {
fn transform_percentages_and_degrees() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.silent = true;
@@ -251,14 +239,7 @@ mod passing {
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
CSS
);
}
@@ -267,6 +248,7 @@ mod passing {
fn unusual_indents() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.silent = true;
@@ -281,14 +263,7 @@ mod passing {
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
CSS
);
}
@@ -297,6 +272,7 @@ mod passing {
fn exclude_fonts() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.no_fonts = true;
options.silent = true;
@@ -320,30 +296,74 @@ mod passing {
font-family: 'My Font' Verdana\n\
}\n\
";
const CSS_OUT: &str = " \
\n\
\n\
#identifier {\n \
font-family: 'My Font' Arial\n\
font-family: \"My Font\" Arial\n\
}\n\
\n \
\n\
\n\
div {\n \
font-family: 'My Font' Verdana\n\
font-family: \"My Font\" Verdana\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
CSS_OUT
);
}
#[test]
fn content() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("data:,").unwrap();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
#language a[href=\"#translations\"]:before {\n\
content: url(data:,) \"\\A\";\n\
white-space: pre }\n\
";
const CSS_OUT: &str = "\
#language a[href=\"#translations\"]:before {\n\
content: url(\"data:;base64,\") \"\\a \";\n\
white-space: pre }\n\
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
CSS_OUT
);
}
#[test]
fn ie_css_hack() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("data:,").unwrap();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
div#p>svg>foreignObject>section:not(\\9) {\n\
width: 300px;\n\
width: 500px\\9;\n\
}\n\
";
const CSS_OUT: &str = "\
div#p>svg>foreignObject>section:not(\\9) {\n\
width: 300px;\n\
width: 500px\t;\n\
}\n\
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
CSS_OUT
);
}

View File

@@ -1,53 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::css;
#[test]
fn empty_input_single_quotes() {
assert_eq!(css::enquote(str!(""), false), "''");
}
#[test]
fn empty_input_double_quotes() {
assert_eq!(css::enquote(str!(""), true), "\"\"");
}
#[test]
fn apostrophes_single_quotes() {
assert_eq!(
css::enquote(str!("It's a lovely day, don't you think?"), false),
"'It\\'s a lovely day, don\\'t you think?'"
);
}
#[test]
fn apostrophes_double_quotes() {
assert_eq!(
css::enquote(str!("It's a lovely day, don't you think?"), true),
"\"It's a lovely day, don't you think?\""
);
}
#[test]
fn feet_and_inches_single_quotes() {
assert_eq!(
css::enquote(str!("5'2\", 6'5\""), false),
"'5\\'2\", 6\\'5\"'"
);
}
#[test]
fn feet_and_inches_double_quotes() {
assert_eq!(
css::enquote(str!("5'2\", 6'5\""), true),
"\"5'2\\\", 6'5\\\"\""
);
}
}

View File

@@ -1,3 +1,2 @@
mod embed_css;
mod enquote;
mod is_image_url_prop;

View File

@@ -0,0 +1,11 @@
<style>
@charset 'UTF-8';
@import 'style.css';
@import url(style.css);
@import url('style.css');
</style>

View File

@@ -0,0 +1 @@
body{background-color:#000;color:#fff}

View File

@@ -3,8 +3,6 @@
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="Content-Security-Policy" content="default-src 'unsafe-inline' file:;" />
<title>Local HTML file</title>
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-IWaCTORHkRhOWzcZeILSVmV6V6gPTHgNem6o6rsFAyaKTieDFkeeMrWjtO0DuWrX3bqZY46CVTZXUu0mia0qXQ==" crossorigin="anonymous" />
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-vWBzl4NE9oIg8NFOPAyOZbaam0UXWr6aDHPaY2kodSzAFl+mKoj/RMNc6C31NDqK4mE2i68IWxYWqWJPLCgPOw==" crossorigin="anonymous" />

View File

@@ -0,0 +1,5 @@
<svg version="1.1" baseProfile="full" width="300" height="200" xmlns="http://www.w3.org/2000/svg">
<rect width="100%" height="100%" fill="red" />
<circle cx="150" cy="100" r="80" fill="green" />
<text x="150" y="125" font-size="60" text-anchor="middle" fill="white">SVG</text>
</svg>

After

Width:  |  Height:  |  Size: 296 B

View File

@@ -0,0 +1 @@
<body><noscript><img src="image.svg" /></noscript></body>

View File

@@ -0,0 +1 @@
<body><noscript><h1>JS is not active</h1><noscript><img src="image.svg" /></noscript></noscript></body>

View File

@@ -0,0 +1 @@
<body><noscript><script>alert(1);</script><img src="image.svg" /></noscript></body>

View File

@@ -0,0 +1,5 @@
<svg version="1.1" baseProfile="full" width="300" height="200" xmlns="http://www.w3.org/2000/svg">
<rect width="100%" height="100%" fill="red" />
<circle cx="150" cy="100" r="80" fill="green" />
<text x="150" y="125" font-size="60" text-anchor="middle" fill="white">SVG</text>
</svg>

After

Width:  |  Height:  |  Size: 296 B

View File

@@ -0,0 +1 @@
<div style="background-image: url('image.svg')"></div>

View File

@@ -8,14 +8,15 @@
#[cfg(test)]
mod passing {
use chrono::prelude::*;
use reqwest::Url;
use crate::html;
#[test]
fn http_url() {
let url = "http://192.168.1.1/";
let url: Url = Url::parse("http://192.168.1.1/").unwrap();
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::create_metadata_tag(url);
let metadata_comment: String = html::create_metadata_tag(&url);
assert_eq!(
metadata_comment,
@@ -31,9 +32,9 @@ mod passing {
#[test]
fn file_url() {
let url = "file:///home/monolith/index.html";
let url: Url = Url::parse("file:///home/monolith/index.html").unwrap();
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::create_metadata_tag(url);
let metadata_comment: String = html::create_metadata_tag(&url);
assert_eq!(
metadata_comment,
@@ -48,9 +49,9 @@ mod passing {
#[test]
fn data_url() {
let url = "data:text/html,Hello%2C%20World!";
let url: Url = Url::parse("data:text/html,Hello%2C%20World!").unwrap();
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::create_metadata_tag(url);
let metadata_comment: String = html::create_metadata_tag(&url);
assert_eq!(
metadata_comment,
@@ -63,20 +64,3 @@ mod passing {
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::html;
#[test]
fn empty_string() {
assert_eq!(html::create_metadata_tag(""), "");
}
}

View File

@@ -8,6 +8,7 @@
#[cfg(test)]
mod passing {
use reqwest::blocking::Client;
use reqwest::Url;
use std::collections::HashMap;
use crate::html;
@@ -21,7 +22,14 @@ mod passing {
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!(
embedded_css,
@@ -42,7 +50,14 @@ mod passing {
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!(
embedded_css,
@@ -58,7 +73,14 @@ mod passing {
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!(
embedded_css,
@@ -74,7 +96,14 @@ mod passing {
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!(
embedded_css,
@@ -98,6 +127,7 @@ mod passing {
#[cfg(test)]
mod failing {
use reqwest::blocking::Client;
use reqwest::Url;
use std::collections::HashMap;
use crate::html;
@@ -111,7 +141,14 @@ mod failing {
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
0,
);
assert_eq!(
embedded_css,

View File

@@ -10,6 +10,7 @@ mod passing {
use html5ever::serialize::{serialize, SerializeOpts};
use reqwest::blocking::Client;
use std::collections::HashMap;
use url::Url;
use crate::html;
use crate::opts::Options;
@@ -18,9 +19,9 @@ mod passing {
fn basic() {
let cache = &mut HashMap::new();
let html = "<div><P></P></div>";
let html: &str = "<div><P></P></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let url: Url = Url::parse("http://localhost").unwrap();
let mut options = Options::default();
options.silent = true;
@@ -42,7 +43,7 @@ mod passing {
fn ensure_no_recursive_iframe() {
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
@@ -65,7 +66,7 @@ mod passing {
fn ensure_no_recursive_frame() {
let html = "<frameset><frame src=\"\"></frameset>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
@@ -86,12 +87,14 @@ mod passing {
#[test]
fn no_css() {
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
<link rel=\"alternate stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>";
let html = "\
<link rel=\"stylesheet\" href=\"main.css\">\
<link rel=\"alternate stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>\
";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
@@ -107,16 +110,18 @@ mod passing {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<link rel=\"stylesheet\">\
<link rel=\"alternate stylesheet\">\
<style></style>\
</head>\
<body>\
<div></div>\
</body>\
</html>"
"\
<html>\
<head>\
<link rel=\"stylesheet\">\
<link rel=\"alternate stylesheet\">\
<style></style>\
</head>\
<body>\
<div></div>\
</body>\
</html>\
"
);
}
@@ -125,7 +130,7 @@ mod passing {
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
@@ -162,7 +167,7 @@ mod passing {
let html =
"<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
@@ -186,7 +191,7 @@ mod passing {
fn no_frames() {
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
@@ -202,7 +207,15 @@ mod passing {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
"\
<html>\
<head>\
</head>\
<frameset>\
<frame src=\"\">\
</frameset>\
</html>\
"
);
}
@@ -210,7 +223,7 @@ mod passing {
fn no_iframes() {
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
@@ -226,18 +239,27 @@ mod passing {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
"\
<html>\
<head></head>\
<body>\
<iframe src=\"\"></iframe>\
</body>\
</html>\
"
);
}
#[test]
fn no_js() {
let html = "<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>";
let html = "\
<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>\
";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
@@ -253,25 +275,29 @@ mod passing {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><script></script>\
<script></script></div></body></html>"
"\
<html>\
<head></head>\
<body>\
<div>\
<script></script>\
<script></script>\
</div>\
</body>\
</html>\
"
);
}
#[test]
fn discards_integrity() {
let html = "<title>No integrity</title>\
<link integrity=\"sha384-...\" rel=\"something\"/>\
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
fn keeps_integrity_for_linked_assets() {
let html = "<title>Has integrity</title>\
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\" />";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
options.silent = true;
let client = Client::new();
@@ -283,24 +309,109 @@ mod passing {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
"\
<html>\
<head>\
<title>Has integrity</title>\
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\">\
</head>\
<body></body>\
</html>"
</html>\
"
);
}
#[test]
fn discards_integrity_for_linked_assets_nojs_nocss() {
let html = "\
<title>No integrity</title>\
<link integrity=\"\" rel=\"stylesheet\" href=\"data:;\"/>\
<script integrity=\"\" src=\"some.js\"></script>\
";
let dom = html::html_to_dom(&html);
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<title>No integrity</title>\
<link rel=\"stylesheet\">\
<script></script>\
</head>\
<body></body>\
</html>\
"
);
}
#[test]
fn discards_integrity_for_embedded_assets() {
let html = "\
<title>No integrity</title>\
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\"/>\
<script integrity=\"sha384-456\" src=\"some.js\"></script>\
";
let dom = html::html_to_dom(&html);
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<title>No integrity</title>\
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\">\
<script></script>\
</head>\
<body>\
</body>\
</html>\
"
);
}
#[test]
fn removes_unwanted_meta_tags() {
let html = "<html>\
<head>\
<meta http-equiv=\"Refresh\" value=\"20\"/>\
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
</head>\
<body></body>\
</html>";
let html = "\
<html>\
<head>\
<meta http-equiv=\"Refresh\" value=\"20\"/>\
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
</head>\
<body>\
</body>\
</html>\
";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
@@ -319,19 +430,22 @@ mod passing {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
"\
<html>\
<head>\
<meta http-equiv=\"disabled by monolith (Refresh)\" value=\"20\">\
<meta http-equiv=\"disabled by monolith (Location)\" value=\"https://freebsd.org\">\
</head>\
<body></body>\
<body>\
</body>\
</html>"
);
}
#[test]
fn processes_noscript_tags() {
let html = "<html>\
let html = "\
<html>\
<body>\
<noscript>\
<img src=\"image.png\" />\
@@ -339,7 +453,7 @@ mod passing {
</body>\
</html>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
@@ -356,7 +470,8 @@ mod passing {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"<html>\
"\
<html>\
<head>\
</head>\
<body>\
@@ -369,4 +484,34 @@ mod passing {
)
);
}
#[test]
fn preserves_script_type_json() {
let html = "<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>";
let dom = html::html_to_dom(&html);
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>\
</head>\
<body>\
</body>\
</html>"
);
}
}

View File

@@ -7,12 +7,23 @@
#[cfg(test)]
mod passing {
use reqwest::Url;
use crate::url;
#[test]
fn preserve_original() {
let u: Url = Url::parse("https://somewhere.com/font.eot#iefix").unwrap();
url::clean_url(u.clone());
assert_eq!(u.as_str(), "https://somewhere.com/font.eot#iefix");
}
#[test]
fn removes_fragment() {
assert_eq!(
url::clean_url("https://somewhere.com/font.eot#iefix"),
url::clean_url(Url::parse("https://somewhere.com/font.eot#iefix").unwrap()).as_str(),
"https://somewhere.com/font.eot"
);
}
@@ -20,31 +31,31 @@ mod passing {
#[test]
fn removes_empty_fragment() {
assert_eq!(
url::clean_url("https://somewhere.com/font.eot#"),
url::clean_url(Url::parse("https://somewhere.com/font.eot#").unwrap()).as_str(),
"https://somewhere.com/font.eot"
);
}
#[test]
fn removes_empty_query_and_empty_fragment() {
fn removes_empty_fragment_and_keeps_empty_query() {
assert_eq!(
url::clean_url("https://somewhere.com/font.eot?#"),
"https://somewhere.com/font.eot"
url::clean_url(Url::parse("https://somewhere.com/font.eot?#").unwrap()).as_str(),
"https://somewhere.com/font.eot?"
);
}
#[test]
fn removes_empty_query_amp_and_empty_fragment() {
fn removesempty_fragment_and_keeps_empty_query() {
assert_eq!(
url::clean_url("https://somewhere.com/font.eot?a=b&#"),
"https://somewhere.com/font.eot?a=b"
url::clean_url(Url::parse("https://somewhere.com/font.eot?a=b&#").unwrap()).as_str(),
"https://somewhere.com/font.eot?a=b&"
);
}
#[test]
fn keeps_credentials() {
assert_eq!(
url::clean_url("https://cookie:monster@gibson.internet/"),
url::clean_url(Url::parse("https://cookie:monster@gibson.internet/").unwrap()).as_str(),
"https://cookie:monster@gibson.internet/"
);
}

View File

@@ -7,16 +7,18 @@
#[cfg(test)]
mod passing {
use reqwest::Url;
use crate::url;
#[test]
fn encode_string_with_specific_media_type() {
let mime = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let data_url = url::data_to_data_url(mime, data.as_bytes(), "");
let data_url = url::create_data_url(mime, data.as_bytes(), &Url::parse("data:,").unwrap());
assert_eq!(
&data_url,
data_url.as_str(),
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
);
}
@@ -24,8 +26,15 @@ mod passing {
#[test]
fn encode_append_fragment() {
let data = "<svg></svg>\n";
let data_url = url::data_to_data_url("image/svg+xml", data.as_bytes(), "");
let data_url = url::create_data_url(
"image/svg+xml",
data.as_bytes(),
&Url::parse("data:,").unwrap(),
);
assert_eq!(&data_url, "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K");
assert_eq!(
data_url.as_str(),
"data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K"
);
}
}

View File

@@ -1,41 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn remove_protocl_and_fragment() {
if cfg!(windows) {
assert_eq!(
url::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"),
"C:\\documents\\some-path\\some-file.svg"
);
} else {
assert_eq!(
url::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"),
"/tmp/some-path/some-file.svg"
);
}
}
#[test]
fn decodes_urls() {
if cfg!(windows) {
assert_eq!(
url::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
"C:\\Documents and Settings\\some-file.html"
);
} else {
assert_eq!(
url::file_url_to_fs_path("file:///home/user/My%20Documents"),
"/home/user/My Documents"
);
}
}
}

View File

@@ -1,48 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn data_url() {
assert_eq!(
url::get_url_fragment(
"data:image/svg+xml;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h#test"
),
"test"
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn https_empty() {
assert_eq!(url::get_url_fragment("https://kernel.org#"), "");
}
#[test]
fn no_fragment() {
assert_eq!(url::get_url_fragment("https://kernel.org"), "");
}
#[test]
fn dummy_data_url() {
assert_eq!(url::get_url_fragment("data:text/html,"), "");
}
}

View File

@@ -1,52 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn data_url_text_html() {
assert!(url::is_data_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn data_url_no_media_type() {
assert!(url::is_data_url(
"data:;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn https_url() {
assert!(!url::is_data_url("https://kernel.org"));
}
#[test]
fn no_protocol_url() {
assert!(!url::is_data_url("//kernel.org"));
}
#[test]
fn empty_string() {
assert!(!url::is_data_url(""));
}
}

View File

@@ -1,83 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn unix_file_url() {
assert!(url::is_file_url(
"file:///home/user/Websites/my-website/index.html"
));
}
#[test]
fn windows_file_url() {
assert!(url::is_file_url(
"file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png"
));
}
#[test]
fn unix_url_with_backslashes() {
assert!(url::is_file_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html"
));
}
#[test]
fn windows_file_url_with_backslashes() {
assert!(url::is_file_url(
"file:\\\\\\C:\\Documents%20and%20Settings\\user\\Websites\\my-website\\assets\\images\\logo.png"
));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn url_with_no_protocl() {
assert!(!url::is_file_url("//kernel.org"));
}
#[test]
fn dot_slash_filename() {
assert!(!url::is_file_url("./index.html"));
}
#[test]
fn just_filename() {
assert!(!url::is_file_url("some-local-page.htm"));
}
#[test]
fn https_ip_port_url() {
assert!(!url::is_file_url("https://1.2.3.4:80/www/index.html"));
}
#[test]
fn data_url() {
assert!(!url::is_file_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn just_word_file() {
assert!(!url::is_file_url("file"));
}
}

View File

@@ -1,65 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn http_url() {
assert!(url::is_http_url("http://kernel.org"));
}
#[test]
fn https_url() {
assert!(url::is_http_url("https://www.rust-lang.org/"));
}
#[test]
fn http_url_with_backslashes() {
assert!(url::is_http_url("http:\\\\freebsd.org\\"));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn url_with_no_protocol() {
assert!(!url::is_http_url("//kernel.org"));
}
#[test]
fn dot_slash_filename() {
assert!(!url::is_http_url("./index.html"));
}
#[test]
fn just_filename() {
assert!(!url::is_http_url("some-local-page.htm"));
}
#[test]
fn https_ip_port_url() {
assert!(!url::is_http_url("ftp://1.2.3.4/www/index.html"));
}
#[test]
fn data_url() {
assert!(!url::is_http_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
}

View File

@@ -11,53 +11,63 @@ mod passing {
#[test]
fn mailto() {
assert!(url::url_has_protocol(
assert!(url::is_url_and_has_protocol(
"mailto:somebody@somewhere.com?subject=hello"
));
}
#[test]
fn tel() {
assert!(url::url_has_protocol("tel:5551234567"));
assert!(url::is_url_and_has_protocol("tel:5551234567"));
}
#[test]
fn ftp_no_slashes() {
assert!(url::url_has_protocol("ftp:some-ftp-server.com"));
assert!(url::is_url_and_has_protocol("ftp:some-ftp-server.com"));
}
#[test]
fn ftp_with_credentials() {
assert!(url::url_has_protocol(
assert!(url::is_url_and_has_protocol(
"ftp://user:password@some-ftp-server.com"
));
}
#[test]
fn javascript() {
assert!(url::url_has_protocol("javascript:void(0)"));
assert!(url::is_url_and_has_protocol("javascript:void(0)"));
}
#[test]
fn http() {
assert!(url::url_has_protocol("http://news.ycombinator.com"));
assert!(url::is_url_and_has_protocol("http://news.ycombinator.com"));
}
#[test]
fn https() {
assert!(url::url_has_protocol("https://github.com"));
assert!(url::is_url_and_has_protocol("https://github.com"));
}
#[test]
fn file() {
assert!(url::is_url_and_has_protocol("file:///tmp/image.png"));
}
#[test]
fn mailto_uppercase() {
assert!(url::url_has_protocol(
assert!(url::is_url_and_has_protocol(
"MAILTO:somebody@somewhere.com?subject=hello"
));
}
#[test]
fn empty_data_url() {
assert!(url::url_has_protocol("data:text/html,"));
assert!(url::is_url_and_has_protocol("data:text/html,"));
}
#[test]
fn empty_data_url_surrounded_by_spaces() {
assert!(url::is_url_and_has_protocol(" data:text/html, "));
}
}
@@ -74,21 +84,27 @@ mod failing {
#[test]
fn url_with_no_protocol() {
assert!(!url::url_has_protocol("//some-hostname.com/some-file.html"));
assert_eq!(
url::is_url_and_has_protocol("//some-hostname.com/some-file.html"),
false
);
}
#[test]
fn relative_path() {
assert!(!url::url_has_protocol("some-hostname.com/some-file.html"));
assert_eq!(
url::is_url_and_has_protocol("some-hostname.com/some-file.html"),
false
);
}
#[test]
fn relative_to_root_path() {
assert!(!url::url_has_protocol("/some-file.html"));
assert_eq!(url::is_url_and_has_protocol("/some-file.html"), false);
}
#[test]
fn empty_string() {
assert!(!url::url_has_protocol(""));
assert_eq!(url::is_url_and_has_protocol(""), false);
}
}

View File

@@ -1,12 +1,7 @@
mod clean_url;
mod data_to_data_url;
mod decode_url;
mod file_url_to_fs_path;
mod get_url_fragment;
mod is_data_url;
mod is_file_url;
mod is_http_url;
mod create_data_url;
mod is_url_and_has_protocol;
mod parse_data_url;
mod percent_decode;
mod percent_encode;
mod resolve_url;
mod url_has_protocol;
mod url_with_fragment;

View File

@@ -7,11 +7,13 @@
#[cfg(test)]
mod passing {
use reqwest::Url;
use crate::url;
#[test]
fn parse_text_html_base64() {
let (media_type, data) = url::parse_data_url("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
let (media_type, data) = url::parse_data_url(&Url::parse("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==").unwrap());
assert_eq!(media_type, "text/html");
assert_eq!(
@@ -23,7 +25,7 @@ mod passing {
#[test]
fn parse_text_html_utf8() {
let (media_type, data) = url::parse_data_url(
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
&Url::parse("data:text/html;utf8,Work expands so as to fill the time available for its completion").unwrap(),
);
assert_eq!(media_type, "text/html");
@@ -36,7 +38,10 @@ mod passing {
#[test]
fn parse_text_html_plaintext() {
let (media_type, data) = url::parse_data_url(
"data:text/html,Work expands so as to fill the time available for its completion",
&Url::parse(
"data:text/html,Work expands so as to fill the time available for its completion",
)
.unwrap(),
);
assert_eq!(media_type, "text/html");
@@ -46,20 +51,10 @@ mod passing {
);
}
#[test]
fn parse_text_html_charset_utf_8_between_two_whitespaces() {
let (media_type, data) = url::parse_data_url(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
assert_eq!(media_type, "text/html");
assert_eq!(
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn parse_text_css_url_encoded() {
let (media_type, data) = url::parse_data_url("data:text/css,div{background-color:%23000}");
let (media_type, data) =
url::parse_data_url(&Url::parse("data:text/css,div{background-color:%23000}").unwrap());
assert_eq!(media_type, "text/css");
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
@@ -67,7 +62,7 @@ mod passing {
#[test]
fn parse_no_media_type_base64() {
let (media_type, data) = url::parse_data_url("data:;base64,dGVzdA==");
let (media_type, data) = url::parse_data_url(&Url::parse("data:;base64,dGVzdA==").unwrap());
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "test");
@@ -75,7 +70,7 @@ mod passing {
#[test]
fn parse_no_media_type_no_encoding() {
let (media_type, data) = url::parse_data_url("data:;,test%20test");
let (media_type, data) = url::parse_data_url(&Url::parse("data:;,test%20test").unwrap());
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "test test");
@@ -91,11 +86,13 @@ mod passing {
#[cfg(test)]
mod failing {
use reqwest::Url;
use crate::url;
#[test]
fn just_word_data() {
let (media_type, data) = url::parse_data_url("data");
fn empty_data_url() {
let (media_type, data) = url::parse_data_url(&Url::parse("data:,").unwrap());
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "");

View File

@@ -12,7 +12,7 @@ mod passing {
#[test]
fn decode_unicode_characters() {
assert_eq!(
url::decode_url(str!(
url::percent_decode(str!(
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
)),
"検ヒム解塗ゃッ = サ"
@@ -22,7 +22,7 @@ mod passing {
#[test]
fn decode_file_url() {
assert_eq!(
url::decode_url(str!("file:///tmp/space%20here/test%231.html")),
url::percent_decode(str!("file:///tmp/space%20here/test%231.html")),
"file:///tmp/space here/test#1.html"
);
}
@@ -30,7 +30,7 @@ mod passing {
#[test]
fn plus_sign() {
assert_eq!(
url::decode_url(str!(
url::percent_decode(str!(
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
)),
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"

View File

@@ -10,31 +10,7 @@ mod passing {
use crate::url;
#[test]
fn url_with_fragment_url() {
let url = "https://localhost.localdomain/path/";
let fragment = "test";
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(&assembled_url, "https://localhost.localdomain/path/#test");
}
#[test]
fn url_with_fragment_empty_url() {
let url = "https://localhost.localdomain/path/";
let fragment = "";
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(&assembled_url, "https://localhost.localdomain/path/");
}
#[test]
fn url_with_fragment_data_url() {
let url = "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K";
let fragment = "fragment";
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(
&assembled_url,
"data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K#fragment"
);
fn apostrophe() {
assert_eq!(url::percent_encode(str!("'")), "%27");
}
}

View File

@@ -7,26 +7,49 @@
#[cfg(test)]
mod passing {
use reqwest::Url;
use crate::url;
#[test]
fn from_https_to_level_up_relative() {
fn basic_httsp_relative() {
assert_eq!(
url::resolve_url("https://www.kernel.org", "../category/signatures.html")
.unwrap_or_default(),
"https://www.kernel.org/category/signatures.html"
url::resolve_url(
&Url::parse("https://www.kernel.org").unwrap(),
"category/signatures.html"
)
.as_str(),
Url::parse("https://www.kernel.org/category/signatures.html")
.unwrap()
.as_str()
);
}
#[test]
fn from_just_filename_to_full_https_url() {
fn basic_httsp_absolute() {
assert_eq!(
url::resolve_url(
"saved_page.htm",
"https://www.kernel.org/category/signatures.html",
&Url::parse("https://www.kernel.org").unwrap(),
"/category/signatures.html"
)
.unwrap_or_default(),
"https://www.kernel.org/category/signatures.html"
.as_str(),
Url::parse("https://www.kernel.org/category/signatures.html")
.unwrap()
.as_str()
);
}
#[test]
fn from_https_to_level_up_relative() {
assert_eq!(
url::resolve_url(
&Url::parse("https://www.kernel.org").unwrap(),
"../category/signatures.html"
)
.as_str(),
Url::parse("https://www.kernel.org/category/signatures.html")
.unwrap()
.as_str()
);
}
@@ -34,10 +57,10 @@ mod passing {
fn from_https_url_to_url_with_no_protocol() {
assert_eq!(
url::resolve_url(
"https://www.kernel.org",
&Url::parse("https://www.kernel.org").unwrap(),
"//www.kernel.org/theme/images/logos/tux.png",
)
.unwrap_or_default(),
.as_str(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
}
@@ -46,22 +69,22 @@ mod passing {
fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() {
assert_eq!(
url::resolve_url(
"https://www.kernel.org",
&Url::parse("https://www.kernel.org").unwrap(),
"//another-host.org/theme/images/logos/tux.png",
)
.unwrap_or_default(),
.as_str(),
"https://another-host.org/theme/images/logos/tux.png"
);
}
#[test]
fn from_https_url_to_relative_root_path() {
fn from_https_url_to_absolute_path() {
assert_eq!(
url::resolve_url(
"https://www.kernel.org/category/signatures.html",
&Url::parse("https://www.kernel.org/category/signatures.html").unwrap(),
"/theme/images/logos/tux.png",
)
.unwrap_or_default(),
.as_str(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
}
@@ -70,10 +93,10 @@ mod passing {
fn from_https_to_just_filename() {
assert_eq!(
url::resolve_url(
"https://www.w3schools.com/html/html_iframe.asp",
&Url::parse("https://www.w3schools.com/html/html_iframe.asp").unwrap(),
"default.asp",
)
.unwrap_or_default(),
.as_str(),
"https://www.w3schools.com/html/default.asp"
);
}
@@ -82,10 +105,11 @@ mod passing {
fn from_data_url_to_https() {
assert_eq!(
url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
&Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
.unwrap(),
"https://www.kernel.org/category/signatures.html",
)
.unwrap_or_default(),
.as_str(),
"https://www.kernel.org/category/signatures.html"
);
}
@@ -94,10 +118,11 @@ mod passing {
fn from_data_url_to_data_url() {
assert_eq!(
url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
&Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
.unwrap(),
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
)
.unwrap_or_default(),
.as_str(),
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K"
);
}
@@ -106,10 +131,10 @@ mod passing {
fn from_file_url_to_relative_path() {
assert_eq!(
url::resolve_url(
"file:///home/user/Websites/my-website/index.html",
&Url::parse("file:///home/user/Websites/my-website/index.html").unwrap(),
"assets/images/logo.png",
)
.unwrap_or_default(),
.as_str(),
"file:///home/user/Websites/my-website/assets/images/logo.png"
);
}
@@ -118,10 +143,10 @@ mod passing {
fn from_file_url_to_relative_path_with_backslashes() {
assert_eq!(
url::resolve_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
&Url::parse("file:\\\\\\home\\user\\Websites\\my-website\\index.html").unwrap(),
"assets\\images\\logo.png",
)
.unwrap_or_default(),
.as_str(),
"file:///home/user/Websites/my-website/assets/images/logo.png"
);
}
@@ -130,10 +155,11 @@ mod passing {
fn from_data_url_to_file_url() {
assert_eq!(
url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
&Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
.unwrap(),
"file:///etc/passwd",
)
.unwrap_or_default(),
.as_str(),
"file:///etc/passwd"
);
}
@@ -142,30 +168,35 @@ mod passing {
fn preserve_fragment() {
assert_eq!(
url::resolve_url(
"http://doesnt-matter.local/",
&Url::parse("http://doesnt-matter.local/").unwrap(),
"css/fonts/fontmarvelous.svg#fontmarvelous",
)
.unwrap_or_default(),
.as_str(),
"http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous"
);
}
#[test]
fn resolve_from_file_url_to_file_url() {
assert_eq!(
if cfg!(windows) {
url::resolve_url("file:///c:/index.html", "file:///c:/image.png")
.unwrap_or_default()
} else {
url::resolve_url("file:///tmp/index.html", "file:///tmp/image.png")
.unwrap_or_default()
},
if cfg!(windows) {
if cfg!(windows) {
assert_eq!(
url::resolve_url(
&Url::parse("file:///c:/index.html").unwrap(),
"file:///c:/image.png"
)
.as_str(),
"file:///c:/image.png"
} else {
);
} else {
assert_eq!(
url::resolve_url(
&Url::parse("file:///tmp/index.html").unwrap(),
"file:///tmp/image.png"
)
.as_str(),
"file:///tmp/image.png"
}
);
);
}
}
}
@@ -178,17 +209,20 @@ mod passing {
#[cfg(test)]
mod failing {
use reqwest::Url;
use crate::url;
#[test]
fn from_data_url_to_url_with_no_protocol() {
assert_eq!(
url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
&Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
.unwrap(),
"//www.w3schools.com/html/html_iframe.asp",
)
.unwrap_or_default(),
""
.as_str(),
"data:,"
);
}
}

View File

@@ -7,131 +7,171 @@
#[cfg(test)]
mod passing {
use reqwest::Url;
use crate::utils;
#[test]
fn image_gif87() {
assert_eq!(utils::detect_media_type(b"GIF87a", ""), "image/gif");
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(utils::detect_media_type(b"GIF87a", &dummy_url), "image/gif");
}
#[test]
fn image_gif89() {
assert_eq!(utils::detect_media_type(b"GIF89a", ""), "image/gif");
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(utils::detect_media_type(b"GIF89a", &dummy_url), "image/gif");
}
#[test]
fn image_jpeg() {
assert_eq!(utils::detect_media_type(b"\xFF\xD8\xFF", ""), "image/jpeg");
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"\xFF\xD8\xFF", &dummy_url),
"image/jpeg"
);
}
#[test]
fn image_png() {
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", ""),
utils::detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", &dummy_url),
"image/png"
);
}
#[test]
fn image_svg() {
assert_eq!(utils::detect_media_type(b"<svg ", ""), "image/svg+xml");
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"<svg ", &dummy_url),
"image/svg+xml"
);
}
#[test]
fn image_webp() {
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"RIFF....WEBPVP8 ", ""),
utils::detect_media_type(b"RIFF....WEBPVP8 ", &dummy_url),
"image/webp"
);
}
#[test]
fn image_icon() {
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"\x00\x00\x01\x00", ""),
utils::detect_media_type(b"\x00\x00\x01\x00", &dummy_url),
"image/x-icon"
);
}
#[test]
fn image_svg_filename() {
let file_url: Url = Url::parse("file:///tmp/local-file.svg").unwrap();
assert_eq!(
utils::detect_media_type(b"<?xml ", "local-file.svg"),
utils::detect_media_type(b"<?xml ", &file_url),
"image/svg+xml"
);
}
#[test]
fn image_svg_url_uppercase() {
assert_eq!(
utils::detect_media_type(b"", "https://some-site.com/images/local-file.SVG"),
"image/svg+xml"
);
let https_url: Url = Url::parse("https://some-site.com/images/local-file.SVG").unwrap();
assert_eq!(utils::detect_media_type(b"", &https_url), "image/svg+xml");
}
#[test]
fn audio_mpeg() {
assert_eq!(utils::detect_media_type(b"ID3", ""), "audio/mpeg");
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(utils::detect_media_type(b"ID3", &dummy_url), "audio/mpeg");
}
#[test]
fn audio_mpeg_2() {
assert_eq!(utils::detect_media_type(b"\xFF\x0E", ""), "audio/mpeg");
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"\xFF\x0E", &dummy_url),
"audio/mpeg"
);
}
#[test]
fn audio_mpeg_3() {
assert_eq!(utils::detect_media_type(b"\xFF\x0F", ""), "audio/mpeg");
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"\xFF\x0F", &dummy_url),
"audio/mpeg"
);
}
#[test]
fn audio_ogg() {
assert_eq!(utils::detect_media_type(b"OggS", ""), "audio/ogg");
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(utils::detect_media_type(b"OggS", &dummy_url), "audio/ogg");
}
#[test]
fn audio_wav() {
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"RIFF....WAVEfmt ", ""),
utils::detect_media_type(b"RIFF....WAVEfmt ", &dummy_url),
"audio/wav"
);
}
#[test]
fn audio_flac() {
assert_eq!(utils::detect_media_type(b"fLaC", ""), "audio/x-flac");
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"fLaC", &dummy_url),
"audio/x-flac"
);
}
#[test]
fn video_avi() {
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"RIFF....AVI LIST", ""),
utils::detect_media_type(b"RIFF....AVI LIST", &dummy_url),
"video/avi"
);
}
#[test]
fn video_mp4() {
assert_eq!(utils::detect_media_type(b"....ftyp", ""), "video/mp4");
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"....ftyp", &dummy_url),
"video/mp4"
);
}
#[test]
fn video_mpeg() {
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"\x00\x00\x01\x0B", ""),
utils::detect_media_type(b"\x00\x00\x01\x0B", &dummy_url),
"video/mpeg"
);
}
#[test]
fn video_quicktime() {
assert_eq!(utils::detect_media_type(b"....moov", ""), "video/quicktime");
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"....moov", &dummy_url),
"video/quicktime"
);
}
#[test]
fn video_webm() {
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"\x1A\x45\xDF\xA3", ""),
utils::detect_media_type(b"\x1A\x45\xDF\xA3", &dummy_url),
"video/webm"
);
}
@@ -146,10 +186,16 @@ mod passing {
#[cfg(test)]
mod failing {
use reqwest::Url;
use crate::utils;
#[test]
fn unknown_media_type() {
assert_eq!(utils::detect_media_type(b"abcdef0123456789", ""), "");
let dummy_url: Url = Url::parse("data:,").unwrap();
assert_eq!(
utils::detect_media_type(b"abcdef0123456789", &dummy_url),
""
);
}
}

View File

@@ -8,6 +8,7 @@
#[cfg(test)]
mod passing {
use reqwest::blocking::Client;
use reqwest::Url;
use std::collections::HashMap;
use std::env;
@@ -24,23 +25,31 @@ mod passing {
options.silent = true;
// If both source and target are data URLs,
// ensure the result contains target data URL
// ensure the result contains target data URL
let (data, final_url, media_type) = utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,c291cmNl",
"data:text/html;base64,dGFyZ2V0",
&Url::parse("data:text/html;base64,c291cmNl").unwrap(),
&Url::parse("data:text/html;base64,dGFyZ2V0").unwrap(),
&options,
0,
)
.unwrap();
assert_eq!(
url::data_to_data_url(&media_type, &data, &final_url),
url::data_to_data_url("text/html", "target".as_bytes(), "")
url::create_data_url(&media_type, &data, &final_url),
url::create_data_url(
"text/html",
"target".as_bytes(),
&Url::parse("data:text/html;base64,c291cmNl").unwrap()
)
);
assert_eq!(
final_url,
url::data_to_data_url("text/html", "target".as_bytes(), "")
url::create_data_url(
"text/html",
"target".as_bytes(),
&Url::parse("data:text/html;base64,c291cmNl").unwrap()
)
);
assert_eq!(&media_type, "text/html");
}
@@ -60,28 +69,31 @@ mod passing {
let (data, final_url, _media_type) = utils::retrieve_asset(
cache,
&client,
&format!(
&Url::parse(&format!(
"{file}{cwd}/src/tests/data/basic/local-file.html",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
),
&format!(
))
.unwrap(),
&Url::parse(&format!(
"{file}{cwd}/src/tests/data/basic/local-script.js",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
),
))
.unwrap(),
&options,
0,
)
.unwrap();
assert_eq!(url::data_to_data_url("application/javascript", &data, &final_url), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
assert_eq!(url::create_data_url("application/javascript", &data, &final_url), Url::parse("data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==").unwrap());
assert_eq!(
&final_url,
&format!(
final_url,
Url::parse(&format!(
"{file}{cwd}/src/tests/data/basic/local-script.js",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
)
))
.unwrap()
);
}
}
@@ -96,6 +108,7 @@ mod passing {
#[cfg(test)]
mod failing {
use reqwest::blocking::Client;
use reqwest::Url;
use std::collections::HashMap;
use crate::opts::Options;
@@ -113,8 +126,8 @@ mod failing {
match utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"file:///etc/passwd",
&Url::parse("data:text/html;base64,SoUrCe").unwrap(),
&Url::parse("file:///etc/passwd").unwrap(),
&options,
0,
) {
@@ -139,8 +152,8 @@ mod failing {
match utils::retrieve_asset(
cache,
&client,
"https://kernel.org/",
"file:///etc/passwd",
&Url::parse("https://kernel.org/").unwrap(),
&Url::parse("file:///etc/passwd").unwrap(),
&options,
0,
) {

View File

@@ -1,112 +1,50 @@
use base64;
use url::{form_urlencoded, ParseError, Url};
use url::{form_urlencoded, Url};
use crate::utils::detect_media_type;
pub fn clean_url<T: AsRef<str>>(input: T) -> String {
let mut url = Url::parse(input.as_ref()).unwrap();
pub fn clean_url(url: Url) -> Url {
let mut url = url.clone();
// Clear fragment
// Clear fragment (if any)
url.set_fragment(None);
// Get rid of stray question mark
if url.query() == Some("") {
url.set_query(None);
}
// Remove empty trailing ampersand(s)
let mut result: String = url.to_string();
while result.ends_with("&") {
result.pop();
}
result
url
}
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String {
pub fn create_data_url(media_type: &str, data: &[u8], final_asset_url: &Url) -> Url {
let media_type: String = if media_type.is_empty() {
detect_media_type(data, &url)
detect_media_type(data, &final_asset_url)
} else {
media_type.to_string()
};
format!("data:{};base64,{}", media_type, base64::encode(data))
let mut data_url: Url = Url::parse("data:,").unwrap();
data_url.set_path(format!("{};base64,{}", media_type, base64::encode(data)).as_str());
data_url
}
pub fn decode_url(input: String) -> String {
let input: String = input.replace("+", "%2B");
form_urlencoded::parse(input.as_bytes())
.map(|(key, val)| {
[
key.to_string(),
if val.to_string().len() == 0 {
str!()
} else {
str!('=')
},
val.to_string(),
]
.concat()
})
.collect()
}
pub fn file_url_to_fs_path(url: &str) -> String {
if !is_file_url(url) {
return str!();
}
let cutoff_l = if cfg!(windows) { 8 } else { 7 };
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
let url_fragment = get_url_fragment(url);
if url_fragment != "" {
let max_len = fs_file_path.len() - 1 - url_fragment.len();
fs_file_path = fs_file_path[0..max_len].to_string();
}
if cfg!(windows) {
fs_file_path = fs_file_path.replace("/", "\\");
}
// File paths should not be %-encoded
decode_url(fs_file_path)
}
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
match Url::parse(url.as_ref()) {
Ok(parsed_url) => parsed_url.fragment().unwrap_or("").to_string(),
Err(_err) => str!(),
pub fn is_url_and_has_protocol(input: &str) -> bool {
match Url::parse(&input) {
Ok(parsed_url) => {
return parsed_url.scheme().len() > 0;
}
Err(_) => {
return false;
}
}
}
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "data"))
.unwrap_or(false)
}
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "file"))
.unwrap_or(false)
}
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
.unwrap_or(false)
}
pub fn parse_data_url<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
let path: String = parsed_url.path().to_string();
pub fn parse_data_url(url: &Url) -> (String, Vec<u8>) {
let path: String = url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len());
let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
let text: String = decode_url(raw_data);
let text: String = percent_decode(raw_data);
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut media_type: String = str!();
@@ -137,31 +75,35 @@ pub fn parse_data_url<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
(media_type, data)
}
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
let result = if is_http_url(to.as_ref()) {
to.as_ref().to_string()
} else {
Url::parse(from.as_ref())?
.join(to.as_ref())?
.as_ref()
.to_string()
};
Ok(result)
pub fn percent_decode(input: String) -> String {
let input: String = input.replace("+", "%2B");
form_urlencoded::parse(input.as_bytes())
.map(|(key, val)| {
[
key.to_string(),
if val.to_string().len() == 0 {
str!()
} else {
str!('=')
},
val.to_string(),
]
.concat()
})
.collect()
}
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme().len() > 0))
.unwrap_or(false)
pub fn percent_encode(input: String) -> String {
form_urlencoded::byte_serialize(input.as_bytes()).collect()
}
pub fn url_with_fragment(url: &str, fragment: &str) -> String {
let mut result = str!(&url);
if !fragment.is_empty() {
result += "#";
result += fragment;
pub fn resolve_url(from: &Url, to: &str) -> Url {
match Url::parse(&to) {
Ok(parsed_url) => parsed_url,
Err(_) => match from.join(to) {
Ok(joined) => joined,
Err(_) => Url::parse("data:,").unwrap(),
},
}
result
}

View File

@@ -2,15 +2,14 @@ use reqwest::blocking::Client;
use reqwest::header::CONTENT_TYPE;
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use std::path::{Path, PathBuf};
use url::Url;
use crate::opts::Options;
use crate::url::{clean_url, file_url_to_fs_path, is_data_url, is_file_url, parse_data_url};
use crate::url::{clean_url, parse_data_url};
const ANSI_COLOR_RED: &'static str = "\x1b[31m";
const ANSI_COLOR_RESET: &'static str = "\x1b[0m";
const INDENT: &'static str = " ";
const MAGIC: [[&[u8]; 2]; 18] = [
// Image
[b"GIF87a", b"image/gif"],
@@ -34,24 +33,16 @@ const MAGIC: [[&[u8]; 2]; 18] = [
[b"....moov", b"video/quicktime"],
[b"\x1A\x45\xDF\xA3", b"video/webm"],
];
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
"application/javascript",
"image/svg+xml",
// "text/css",
// "text/csv",
// "text/html",
// "text/javascript",
// "text/plain",
];
const PLAINTEXT_MEDIA_TYPES: &[&str] = &["application/javascript", "image/svg+xml"];
pub fn detect_media_type(data: &[u8], url: &str) -> String {
for item in MAGIC.iter() {
if data.starts_with(item[0]) {
return String::from_utf8(item[1].to_vec()).unwrap();
pub fn detect_media_type(data: &[u8], url: &Url) -> String {
for magic_item in MAGIC.iter() {
if data.starts_with(magic_item[0]) {
return String::from_utf8(magic_item[1].to_vec()).unwrap();
}
}
if url.to_lowercase().ends_with(".svg") {
if url.path().to_lowercase().ends_with(".svg") {
return str!("image/svg+xml");
}
@@ -64,68 +55,109 @@ pub fn is_plaintext_media_type(media_type: &str) -> bool {
}
pub fn indent(level: u32) -> String {
let mut result = str!();
let mut result: String = String::new();
let mut l: u32 = level;
while l > 0 {
result += INDENT;
result += " ";
l -= 1;
}
result
}
pub fn retrieve_asset(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &str,
url: &str,
parent_url: &Url,
url: &Url,
options: &Options,
depth: u32,
) -> Result<(Vec<u8>, String, String), reqwest::Error> {
if url.len() == 0 {
// Provoke error
client.get("").send()?;
}
if is_data_url(&url) {
) -> Result<(Vec<u8>, Url, String), reqwest::Error> {
if url.scheme() == "data" {
let (media_type, data) = parse_data_url(url);
Ok((data, url.to_string(), media_type))
} else if is_file_url(&url) {
// Check if parent_url is also file:///
// (if not, then we don't embed the asset)
if !is_file_url(&parent_url) {
Ok((data, url.clone(), media_type))
} else if url.scheme() == "file" {
// Check if parent_url is also file:/// (if not, then we don't embed the asset)
if parent_url.scheme() != "file" {
if !options.silent {
eprintln!(
"{}{}{} ({}){}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
"Security Error",
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
// Provoke error
client.get("").send()?;
}
let fs_file_path: String = file_url_to_fs_path(url);
let path = Path::new(&fs_file_path);
let path_buf: PathBuf = url.to_file_path().unwrap().clone();
let path: &Path = path_buf.as_path();
if path.exists() {
if path.is_dir() {
if !options.silent {
eprintln!(
"{}{}{} (is a directory){}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
// Provoke error
Err(client.get("").send().unwrap_err())
} else {
if !options.silent {
eprintln!("{}{}", indent(depth).as_str(), &url);
}
Ok((fs::read(&path).expect(""), url.clone(), str!()))
}
} else {
if !options.silent {
eprintln!("{}{}", indent(depth).as_str(), &url);
eprintln!(
"{}{}{} (not found){}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
Ok((fs::read(&fs_file_path).expect(""), url.to_string(), str!()))
} else {
// Provoke error
Err(client.get("").send().unwrap_err())
}
} else {
let cache_key: String = clean_url(&url);
let cache_key: String = clean_url(url.clone()).as_str().to_string();
if cache.contains_key(&cache_key) {
// URL is in cache, we get and return it
// URL is in cache,
// we get and return it
if !options.silent {
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
}
Ok((
cache.get(&cache_key).unwrap().to_vec(),
url.to_string(),
str!(),
))
Ok((cache.get(&cache_key).unwrap().to_vec(), url.clone(), str!()))
} else {
// URL not in cache, we retrieve the file
match client.get(url).send() {
// URL not in cache,
// we retrieve the file
match client.get(url.as_str()).send() {
Ok(mut response) => {
if !options.ignore_errors && response.status() != 200 {
if !options.silent {
@@ -146,24 +178,22 @@ pub fn retrieve_asset(
return Err(client.get("").send().unwrap_err());
}
let res_url = response.url().to_string();
if !options.silent {
if url == res_url {
if url.as_str() == response.url().as_str() {
eprintln!("{}{}", indent(depth).as_str(), &url);
} else {
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &res_url);
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &response.url());
}
}
let new_cache_key: String = clean_url(&res_url);
let new_cache_key: String = clean_url(response.url().clone()).to_string();
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
response.copy_to(&mut data).unwrap();
// Attempt to obtain media type by reading the Content-Type header
let media_type = response
// Attempt to obtain media type by reading Content-Type header
let media_type: &str = response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
@@ -172,9 +202,27 @@ pub fn retrieve_asset(
// Add retrieved resource to cache
cache.insert(new_cache_key, data.clone());
Ok((data, res_url, media_type.to_string()))
// Return
Ok((data, response.url().clone(), media_type.to_string()))
}
Err(error) => {
if !options.silent {
eprintln!(
"{}{}{} ({}){}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
error,
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
Err(client.get("").send().unwrap_err())
}
Err(error) => Err(error),
}
}
}