Compare commits
22 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
81b304c558 | ||
|
|
a3e82a2ad8 | ||
|
|
a5bf1705db | ||
|
|
78c37958dc | ||
|
|
20c56a5440 | ||
|
|
37416f827b | ||
|
|
7f123e810b | ||
|
|
db04d11d99 | ||
|
|
1c8d4f1830 | ||
|
|
1c71e708e1 | ||
|
|
a1bb9a4b74 | ||
|
|
cf7e368545 | ||
|
|
c1edde9b3e | ||
|
|
7c0504c4cb | ||
|
|
1bff2c22ba | ||
|
|
8113509dcf | ||
|
|
8fc0fc155f | ||
|
|
7c61b462dd | ||
|
|
ef3684025b | ||
|
|
db7ee697b3 | ||
|
|
89ce5029b9 | ||
|
|
54609b10e5 |
3
.github/FUNDING.yml
vendored
Normal file
3
.github/FUNDING.yml
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
# These are supported funding model platforms
|
||||
|
||||
github: snshn
|
||||
738
Cargo.lock
generated
738
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
34
Cargo.toml
34
Cargo.toml
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "monolith"
|
||||
version = "2.6.2"
|
||||
version = "2.8.0"
|
||||
authors = [
|
||||
"Sunshine <sunshine@uberspace.net>",
|
||||
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
|
||||
@@ -22,22 +22,28 @@ include = [
|
||||
license = "CC0-1.0"
|
||||
|
||||
[dependencies]
|
||||
atty = "0.2.14" # Used for highlighting network errors
|
||||
base64 = "0.13.0" # Used for integrity attributes
|
||||
chrono = "0.4.20" # Used for formatting creation timestamp
|
||||
clap = "3.2.16"
|
||||
cssparser = "0.29.6"
|
||||
encoding_rs = "0.8.31"
|
||||
html5ever = "0.24.1"
|
||||
percent-encoding = "2.1.0"
|
||||
regex = "1.6.0" # Used for parsing srcset and NOSCRIPT
|
||||
sha2 = "0.10.2" # Used for calculating checksums during integrity checks
|
||||
url = "2.2.2"
|
||||
atty = "0.2.14" # Used for highlighting network errors
|
||||
base64 = "0.21.7" # Used for integrity attributes
|
||||
chrono = "0.4.31" # Used for formatting creation timestamp
|
||||
clap = "3.2.25" # Used for processing CLI arguments
|
||||
cssparser = "0.33.0" # Used for dealing with CSS
|
||||
encoding_rs = "0.8.33" # Used for parsing and converting document charsets
|
||||
html5ever = "0.24.1" # Used for all things DOM
|
||||
percent-encoding = "2.3.1"
|
||||
sha2 = "0.10.8" # Used for calculating checksums during integrity checks
|
||||
url = "2.5.0"
|
||||
|
||||
# Used for parsing srcset and NOSCRIPT
|
||||
[dependencies.regex]
|
||||
version = "1.10.2"
|
||||
default-features = false
|
||||
features = ["std", "perf-dfa", "unicode-perl"]
|
||||
|
||||
# Used for making network requests
|
||||
[dependencies.reqwest]
|
||||
version = "0.11.11"
|
||||
version = "0.11.23"
|
||||
default-features = false
|
||||
features = ["default-tls", "blocking", "gzip", "brotli", "deflate"]
|
||||
|
||||
[dev-dependencies]
|
||||
assert_cmd = "2.0.4"
|
||||
assert_cmd = "2.0.13"
|
||||
|
||||
60
README.md
60
README.md
@@ -24,7 +24,7 @@ If compared to saving websites with `wget -mpk`, this tool embeds all assets as
|
||||
|
||||
## Installation
|
||||
|
||||
#### Using [Cargo](https://crates.io/crates/monolith)
|
||||
#### Using [Cargo](https://crates.io/crates/monolith) (cross-platform)
|
||||
|
||||
```console
|
||||
cargo install monolith
|
||||
@@ -36,6 +36,12 @@ cargo install monolith
|
||||
brew install monolith
|
||||
```
|
||||
|
||||
#### Via [Chocolatey](https://community.chocolatey.org/packages/monolith) (Windows)
|
||||
|
||||
```console
|
||||
choco install monolith
|
||||
```
|
||||
|
||||
#### Via [MacPorts](https://ports.macports.org/port/monolith/summary) (macOS)
|
||||
|
||||
```console
|
||||
@@ -48,6 +54,24 @@ sudo port install monolith
|
||||
snap install monolith
|
||||
```
|
||||
|
||||
#### Using [Guix](https://packages.guix.gnu.org/packages/monolith) (GNU/Linux)
|
||||
|
||||
```console
|
||||
guix install monolith
|
||||
```
|
||||
|
||||
#### Using [AUR](https://aur.archlinux.org/packages/monolith) (Arch Linux)
|
||||
|
||||
```console
|
||||
yay monolith
|
||||
```
|
||||
|
||||
#### Using [aports](https://pkgs.alpinelinux.org/packages?name=monolith) (Alpine Linux)
|
||||
|
||||
```console
|
||||
apk add monolith
|
||||
```
|
||||
|
||||
#### Using [FreeBSD packages](https://svnweb.freebsd.org/ports/head/www/monolith/) (FreeBSD)
|
||||
|
||||
```console
|
||||
@@ -111,9 +135,12 @@ cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html
|
||||
|
||||
- `-a`: Exclude audio sources
|
||||
- `-b`: Use custom `base URL`
|
||||
- `-B`: Forbid retrieving assets from specified domain(s)
|
||||
- `-c`: Exclude CSS
|
||||
- `-C`: Save document using custom `charset`
|
||||
- `-C`: Read cookies from `file`
|
||||
- `-d`: Allow retrieving assets only from specified `domain(s)`
|
||||
- `-e`: Ignore network errors
|
||||
- `-E`: Save document using custom `encoding`
|
||||
- `-f`: Omit frames
|
||||
- `-F`: Exclude web fonts
|
||||
- `-i`: Remove images
|
||||
@@ -132,6 +159,35 @@ cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html
|
||||
---------------------------------------------------
|
||||
|
||||
|
||||
## Whitelisting and blacklisting domains
|
||||
|
||||
Options `-d` and `-B` provide control over what domains can be used to retrieve assets from, e.g.:
|
||||
|
||||
```console
|
||||
monolith -I -d example.com -d www.example.com https://example.com -o example-only.html
|
||||
```
|
||||
|
||||
```console
|
||||
monolith -I -B -d .googleusercontent.com -d googleanalytics.com -d .google.com https://example.com -o example-no-ads.html
|
||||
```
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
|
||||
## Dynamic content
|
||||
|
||||
Monolith doesn't feature a JavaScript engine, hence websites that retrieve and display data after initial load may require usage of additional tools.
|
||||
|
||||
For example, Chromium (Chrome) can be used to act as a pre-processor for such pages:
|
||||
|
||||
```console
|
||||
chromium --headless --incognito --dump-dom https://github.com | monolith - -I -b https://github.com -o github.html
|
||||
```
|
||||
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
|
||||
## Proxies
|
||||
|
||||
Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables.
|
||||
|
||||
119
src/cookies.rs
Normal file
119
src/cookies.rs
Normal file
@@ -0,0 +1,119 @@
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use url::Url;
|
||||
|
||||
pub struct Cookie {
|
||||
pub domain: String,
|
||||
pub include_subdomains: bool,
|
||||
pub path: String,
|
||||
pub https_only: bool,
|
||||
pub expires: u64,
|
||||
pub name: String,
|
||||
pub value: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum CookieFileContentsParseError {
|
||||
InvalidHeader,
|
||||
}
|
||||
|
||||
impl Cookie {
|
||||
pub fn is_expired(&self) -> bool {
|
||||
if self.expires == 0 {
|
||||
return false; // Session, never expires
|
||||
}
|
||||
|
||||
let start = SystemTime::now();
|
||||
let since_the_epoch = start
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.expect("Time went backwards");
|
||||
|
||||
self.expires < since_the_epoch.as_secs()
|
||||
}
|
||||
|
||||
pub fn matches_url(&self, url: &str) -> bool {
|
||||
match Url::parse(&url) {
|
||||
Ok(url) => {
|
||||
// Check protocol scheme
|
||||
match url.scheme() {
|
||||
"http" => {
|
||||
if self.https_only {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
"https" => {}
|
||||
_ => {
|
||||
// Should never match URLs of protocols other than HTTP(S)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check host
|
||||
if let Some(url_host) = url.host_str() {
|
||||
if self.domain.starts_with(".") && self.include_subdomains {
|
||||
if !url_host.to_lowercase().ends_with(&self.domain)
|
||||
&& !url_host
|
||||
.eq_ignore_ascii_case(&self.domain[1..self.domain.len() - 1])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if !url_host.eq_ignore_ascii_case(&self.domain) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check path
|
||||
if !url.path().eq_ignore_ascii_case(&self.path)
|
||||
&& !url.path().starts_with(&self.path)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_cookie_file_contents(
|
||||
cookie_file_contents: &str,
|
||||
) -> Result<Vec<Cookie>, CookieFileContentsParseError> {
|
||||
let mut cookies: Vec<Cookie> = Vec::new();
|
||||
|
||||
for (i, line) in cookie_file_contents.lines().enumerate() {
|
||||
if i == 0 {
|
||||
// Parsing first line
|
||||
if !line.eq("# HTTP Cookie File") && !line.eq("# Netscape HTTP Cookie File") {
|
||||
return Err(CookieFileContentsParseError::InvalidHeader);
|
||||
}
|
||||
} else {
|
||||
// Ignore comment lines
|
||||
if line.starts_with("#") {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Attempt to parse values
|
||||
let mut fields = line.split("\t");
|
||||
if fields.clone().count() != 7 {
|
||||
continue;
|
||||
}
|
||||
cookies.push(Cookie {
|
||||
domain: fields.next().unwrap().to_string().to_lowercase(),
|
||||
include_subdomains: fields.next().unwrap().to_string() == "TRUE",
|
||||
path: fields.next().unwrap().to_string(),
|
||||
https_only: fields.next().unwrap().to_string() == "TRUE",
|
||||
expires: fields.next().unwrap().parse::<u64>().unwrap(),
|
||||
name: fields.next().unwrap().to_string(),
|
||||
value: fields.next().unwrap().to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(cookies)
|
||||
}
|
||||
@@ -88,8 +88,8 @@ pub fn process_css<'a>(
|
||||
) -> Result<String, ParseError<'a, String>> {
|
||||
let mut result: String = "".to_string();
|
||||
|
||||
let mut curr_rule: String = rule_name.clone().to_string();
|
||||
let mut curr_prop: String = prop_name.clone().to_string();
|
||||
let mut curr_rule: String = rule_name.to_string();
|
||||
let mut curr_prop: String = prop_name.to_string();
|
||||
let mut token: &Token;
|
||||
let mut token_offset: SourcePosition;
|
||||
|
||||
|
||||
@@ -601,7 +601,7 @@ pub fn retrieve_and_embed_asset(
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
) {
|
||||
let resolved_url: Url = resolve_url(document_url, attr_value.clone());
|
||||
let resolved_url: Url = resolve_url(document_url, attr_value);
|
||||
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
pub mod cookies;
|
||||
pub mod css;
|
||||
pub mod html;
|
||||
pub mod js;
|
||||
|
||||
150
src/main.rs
150
src/main.rs
@@ -10,6 +10,7 @@ use std::process;
|
||||
use std::time::Duration;
|
||||
use url::Url;
|
||||
|
||||
use monolith::cookies::parse_cookie_file_contents;
|
||||
use monolith::html::{
|
||||
add_favicon, create_metadata_tag, get_base_url, get_charset, has_favicon, html_to_dom,
|
||||
serialize_document, set_base_url, set_charset, walk_and_embed_assets,
|
||||
@@ -64,82 +65,102 @@ pub fn read_stdin() -> Vec<u8> {
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let options = Options::from_args();
|
||||
let mut target: String = options.target.clone();
|
||||
let mut options = Options::from_args();
|
||||
|
||||
// Check if target was provided
|
||||
if target.len() == 0 {
|
||||
if options.target.len() == 0 {
|
||||
if !options.silent {
|
||||
eprintln!("No target specified");
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
// Check if custom charset is valid
|
||||
if let Some(custom_charset) = options.charset.clone() {
|
||||
if !Encoding::for_label_no_replacement(custom_charset.as_bytes()).is_some() {
|
||||
eprintln!("Unknown encoding: {}", &custom_charset);
|
||||
// Check if custom encoding is valid
|
||||
if let Some(custom_encoding) = options.encoding.clone() {
|
||||
if !Encoding::for_label_no_replacement(custom_encoding.as_bytes()).is_some() {
|
||||
eprintln!("Unknown encoding: {}", &custom_encoding);
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
let target_url: Url;
|
||||
let mut use_stdin: bool = false;
|
||||
|
||||
// Determine exact target URL
|
||||
if target.clone() == "-" {
|
||||
// Read from pipe (stdin)
|
||||
use_stdin = true;
|
||||
// Set default target URL to an empty data URL; the user can set it via --base-url
|
||||
target_url = Url::parse("data:text/html,").unwrap();
|
||||
} else {
|
||||
match Url::parse(&target.clone()) {
|
||||
Ok(parsed_url) => {
|
||||
if parsed_url.scheme() == "data"
|
||||
|| parsed_url.scheme() == "file"
|
||||
|| (parsed_url.scheme() == "http" || parsed_url.scheme() == "https")
|
||||
{
|
||||
target_url = parsed_url;
|
||||
} else {
|
||||
let target_url = match options.target.as_str() {
|
||||
"-" => {
|
||||
// Read from pipe (stdin)
|
||||
use_stdin = true;
|
||||
// Set default target URL to an empty data URL; the user can set it via --base-url
|
||||
Url::parse("data:text/html,").unwrap()
|
||||
}
|
||||
target => match Url::parse(&target) {
|
||||
Ok(url) => match url.scheme() {
|
||||
"data" | "file" | "http" | "https" => url,
|
||||
unsupported_scheme => {
|
||||
if !options.silent {
|
||||
eprintln!("Unsupported target URL type: {}", &parsed_url.scheme());
|
||||
eprintln!("Unsupported target URL type: {}", unsupported_scheme);
|
||||
}
|
||||
process::exit(1);
|
||||
process::exit(1)
|
||||
}
|
||||
}
|
||||
Err(_err) => {
|
||||
},
|
||||
Err(_) => {
|
||||
// Failed to parse given base URL (perhaps it's a filesystem path?)
|
||||
let path: &Path = Path::new(&target);
|
||||
|
||||
if path.exists() {
|
||||
if path.is_file() {
|
||||
match Url::from_file_path(fs::canonicalize(&path).unwrap()) {
|
||||
Ok(file_url) => {
|
||||
target_url = file_url;
|
||||
}
|
||||
Err(_err) => {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
"Could not generate file URL out of given path: {}",
|
||||
"err"
|
||||
);
|
||||
match path.exists() {
|
||||
true => match path.is_file() {
|
||||
true => {
|
||||
let canonical_path = fs::canonicalize(&path).unwrap();
|
||||
match Url::from_file_path(canonical_path) {
|
||||
Ok(url) => url,
|
||||
Err(_) => {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
"Could not generate file URL out of given path: {}",
|
||||
&target
|
||||
);
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if !options.silent {
|
||||
eprintln!("Local target is not a file: {}", &options.target);
|
||||
false => {
|
||||
if !options.silent {
|
||||
eprintln!("Local target is not a file: {}", &target);
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
process::exit(1);
|
||||
},
|
||||
false => {
|
||||
// It is not a FS path, now we do what browsers do:
|
||||
// prepend "http://" and hope it points to a website
|
||||
Url::parse(&format!("http://{hopefully_url}", hopefully_url = &target))
|
||||
.unwrap()
|
||||
}
|
||||
} else {
|
||||
// Last chance, now we do what browsers do:
|
||||
// prepend "http://" and hope it points to a website
|
||||
target.insert_str(0, "http://");
|
||||
target_url = Url::parse(&target).unwrap();
|
||||
}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
// Read and parse cookie file
|
||||
if let Some(opt_cookie_file) = options.cookie_file.clone() {
|
||||
match fs::read_to_string(opt_cookie_file) {
|
||||
Ok(str) => match parse_cookie_file_contents(&str) {
|
||||
Ok(cookies) => {
|
||||
options.cookies = cookies;
|
||||
// for c in &cookies {
|
||||
// // if !cookie.is_expired() {
|
||||
// // options.cookies.append(c);
|
||||
// // }
|
||||
// }
|
||||
}
|
||||
Err(_) => {
|
||||
eprintln!("Could not parse specified cookie file");
|
||||
process::exit(1);
|
||||
}
|
||||
},
|
||||
Err(_) => {
|
||||
eprintln!("Could not read specified cookie file");
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -179,12 +200,21 @@ fn main() {
|
||||
{
|
||||
match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options, 0) {
|
||||
Ok((retrieved_data, final_url, media_type, charset)) => {
|
||||
// Make sure the media type is text/html
|
||||
if !media_type.eq_ignore_ascii_case("text/html") {
|
||||
if !options.silent {
|
||||
eprintln!("Unsupported document media type");
|
||||
}
|
||||
process::exit(1);
|
||||
// Provide output as text without processing it, the way browsers do
|
||||
if !media_type.eq_ignore_ascii_case("text/html")
|
||||
&& !media_type.eq_ignore_ascii_case("application/xhtml+xml")
|
||||
{
|
||||
// Define output
|
||||
let mut output =
|
||||
Output::new(&options.output).expect("Could not prepare output");
|
||||
|
||||
// Write retrieved data into STDOUT or file
|
||||
output
|
||||
.write(&retrieved_data)
|
||||
.expect("Could not write output");
|
||||
|
||||
// Nothing else to do past this point
|
||||
process::exit(0);
|
||||
}
|
||||
|
||||
if options
|
||||
@@ -310,8 +340,8 @@ fn main() {
|
||||
}
|
||||
|
||||
// Save using specified charset, if given
|
||||
if let Some(custom_charset) = options.charset.clone() {
|
||||
document_encoding = custom_charset;
|
||||
if let Some(custom_encoding) = options.encoding.clone() {
|
||||
document_encoding = custom_encoding;
|
||||
dom = set_charset(dom, document_encoding.clone());
|
||||
}
|
||||
|
||||
@@ -328,6 +358,6 @@ fn main() {
|
||||
// Define output
|
||||
let mut output = Output::new(&options.output).expect("Could not prepare output");
|
||||
|
||||
// Write result into stdout or file
|
||||
output.write(&result).expect("Could not write HTML output");
|
||||
// Write result into STDOUT or file
|
||||
output.write(&result).expect("Could not write output");
|
||||
}
|
||||
|
||||
69
src/opts.rs
69
src/opts.rs
@@ -1,13 +1,19 @@
|
||||
use clap::{App, Arg};
|
||||
use clap::{App, Arg, ArgAction};
|
||||
use std::env;
|
||||
|
||||
use crate::cookies::Cookie;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Options {
|
||||
pub no_audio: bool,
|
||||
pub base_url: Option<String>,
|
||||
pub blacklist_domains: bool,
|
||||
pub no_css: bool,
|
||||
pub charset: Option<String>,
|
||||
pub cookie_file: Option<String>,
|
||||
pub cookies: Vec<Cookie>,
|
||||
pub domains: Option<Vec<String>>,
|
||||
pub ignore_errors: bool,
|
||||
pub encoding: Option<String>,
|
||||
pub no_frames: bool,
|
||||
pub no_fonts: bool,
|
||||
pub no_images: bool,
|
||||
@@ -46,28 +52,41 @@ impl Options {
|
||||
.version(env!("CARGO_PKG_VERSION"))
|
||||
.author(format!("\n{}\n\n", env!("CARGO_PKG_AUTHORS").replace(':', "\n")).as_str())
|
||||
.about(format!("{}\n{}", ASCII, env!("CARGO_PKG_DESCRIPTION")).as_str())
|
||||
.args_from_usage("-a, --no-audio 'Removes audio sources'")
|
||||
.args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'")
|
||||
.args_from_usage("-c, --no-css 'Removes CSS'")
|
||||
.args_from_usage("-C, --charset=[UTF-8] 'Enforces custom encoding'")
|
||||
.args_from_usage("-a, --no-audio 'Remove audio sources'")
|
||||
.args_from_usage("-b, --base-url=[http://localhost/] 'Set custom base URL'")
|
||||
.args_from_usage(
|
||||
"-B, --blacklist-domains 'Treat list of specified domains as blacklist'",
|
||||
)
|
||||
.args_from_usage("-c, --no-css 'Remove CSS'")
|
||||
.args_from_usage("-C, --cookies=[cookies.txt] 'Specify cookie file'")
|
||||
.arg(
|
||||
Arg::with_name("domains")
|
||||
.short('d')
|
||||
.long("domain")
|
||||
.takes_value(true)
|
||||
.value_name("example.com")
|
||||
.action(ArgAction::Append)
|
||||
.help("Specify domains to use for white/black-listing"),
|
||||
)
|
||||
.args_from_usage("-e, --ignore-errors 'Ignore network errors'")
|
||||
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
|
||||
.args_from_usage("-F, --no-fonts 'Removes fonts'")
|
||||
.args_from_usage("-i, --no-images 'Removes images'")
|
||||
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
|
||||
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
||||
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
|
||||
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
|
||||
.args_from_usage("-E, --encoding=[UTF-8] 'Enforce custom charset'")
|
||||
.args_from_usage("-f, --no-frames 'Remove frames and iframes'")
|
||||
.args_from_usage("-F, --no-fonts 'Remove fonts'")
|
||||
.args_from_usage("-i, --no-images 'Remove images'")
|
||||
.args_from_usage("-I, --isolate 'Cut off document from the Internet'")
|
||||
.args_from_usage("-j, --no-js 'Remove JavaScript'")
|
||||
.args_from_usage("-k, --insecure 'Allow invalid X.509 (TLS) certificates'")
|
||||
.args_from_usage("-M, --no-metadata 'Exclude timestamp and source information'")
|
||||
.args_from_usage(
|
||||
"-n, --unwrap-noscript 'Replaces NOSCRIPT elements with their contents'",
|
||||
"-n, --unwrap-noscript 'Replace NOSCRIPT elements with their contents'",
|
||||
)
|
||||
.args_from_usage(
|
||||
"-o, --output=[document.html] 'Writes output to <file>, use - for STDOUT'",
|
||||
"-o, --output=[document.html] 'Write output to <file>, use - for STDOUT'",
|
||||
)
|
||||
.args_from_usage("-s, --silent 'Suppresses verbosity'")
|
||||
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
|
||||
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
|
||||
.args_from_usage("-v, --no-video 'Removes video sources'")
|
||||
.args_from_usage("-s, --silent 'Suppress verbosity'")
|
||||
.args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'")
|
||||
.args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'")
|
||||
.args_from_usage("-v, --no-video 'Remove video sources'")
|
||||
.arg(
|
||||
Arg::with_name("target")
|
||||
.required(true)
|
||||
@@ -87,9 +106,17 @@ impl Options {
|
||||
if let Some(base_url) = app.value_of("base-url") {
|
||||
options.base_url = Some(base_url.to_string());
|
||||
}
|
||||
options.blacklist_domains = app.is_present("blacklist-domains");
|
||||
options.no_css = app.is_present("no-css");
|
||||
if let Some(charset) = app.value_of("charset") {
|
||||
options.charset = Some(charset.to_string());
|
||||
if let Some(cookie_file) = app.value_of("cookies") {
|
||||
options.cookie_file = Some(cookie_file.to_string());
|
||||
}
|
||||
if let Some(encoding) = app.value_of("encoding") {
|
||||
options.encoding = Some(encoding.to_string());
|
||||
}
|
||||
if let Some(domains) = app.get_many::<String>("domains") {
|
||||
let list_of_domains: Vec<String> = domains.map(|v| v.clone()).collect::<Vec<_>>();
|
||||
options.domains = Some(list_of_domains);
|
||||
}
|
||||
options.ignore_errors = app.is_present("ignore-errors");
|
||||
options.no_frames = app.is_present("no-frames");
|
||||
|
||||
83
src/utils.rs
83
src/utils.rs
@@ -1,5 +1,5 @@
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE, COOKIE};
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
@@ -92,6 +92,62 @@ pub fn detect_media_type_by_file_name(filename: &str) -> String {
|
||||
mime.to_string()
|
||||
}
|
||||
|
||||
pub fn domain_is_within_domain(domain: &str, domain_to_match_against: &str) -> bool {
|
||||
if domain_to_match_against.len() == 0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
if domain_to_match_against == "." {
|
||||
return true;
|
||||
}
|
||||
|
||||
let domain_partials: Vec<&str> = domain.trim_end_matches(".").rsplit(".").collect();
|
||||
let domain_to_match_against_partials: Vec<&str> = domain_to_match_against
|
||||
.trim_end_matches(".")
|
||||
.rsplit(".")
|
||||
.collect();
|
||||
let domain_to_match_against_starts_with_a_dot = domain_to_match_against.starts_with(".");
|
||||
|
||||
let mut i: usize = 0;
|
||||
let l: usize = std::cmp::max(
|
||||
domain_partials.len(),
|
||||
domain_to_match_against_partials.len(),
|
||||
);
|
||||
let mut ok: bool = true;
|
||||
|
||||
while i < l {
|
||||
// Exit and return false if went out of bounds of domain to match against, and it didn't start with a dot
|
||||
if !domain_to_match_against_starts_with_a_dot
|
||||
&& domain_to_match_against_partials.len() < i + 1
|
||||
{
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
|
||||
let domain_partial = if domain_partials.len() < i + 1 {
|
||||
""
|
||||
} else {
|
||||
domain_partials.get(i).unwrap()
|
||||
};
|
||||
let domain_to_match_against_partial = if domain_to_match_against_partials.len() < i + 1 {
|
||||
""
|
||||
} else {
|
||||
domain_to_match_against_partials.get(i).unwrap()
|
||||
};
|
||||
|
||||
let parts_match = domain_to_match_against_partial.eq_ignore_ascii_case(domain_partial);
|
||||
|
||||
if !parts_match && domain_to_match_against_partial.len() != 0 {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
|
||||
i += 1;
|
||||
}
|
||||
|
||||
ok
|
||||
}
|
||||
|
||||
pub fn indent(level: u32) -> String {
|
||||
let mut result: String = String::new();
|
||||
let mut l: u32 = level;
|
||||
@@ -148,7 +204,7 @@ pub fn retrieve_asset(
|
||||
let (media_type, charset, data) = parse_data_url(url);
|
||||
Ok((data, url.clone(), media_type, charset))
|
||||
} else if url.scheme() == "file" {
|
||||
// Check if parent_url is also file:/// (if not, then we don't embed the asset)
|
||||
// Check if parent_url is also a file: URL (if not, then we don't embed the asset)
|
||||
if parent_url.scheme() != "file" {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
@@ -236,8 +292,29 @@ pub fn retrieve_asset(
|
||||
"".to_string(),
|
||||
))
|
||||
} else {
|
||||
if let Some(domains) = &options.domains {
|
||||
let domain_matches = domains
|
||||
.iter()
|
||||
.any(|d| domain_is_within_domain(url.host_str().unwrap(), &d.trim()));
|
||||
if (options.blacklist_domains && domain_matches)
|
||||
|| (!options.blacklist_domains && !domain_matches)
|
||||
{
|
||||
return Err(client.get("").send().unwrap_err());
|
||||
}
|
||||
}
|
||||
|
||||
// URL not in cache, we retrieve the file
|
||||
match client.get(url.as_str()).send() {
|
||||
let mut headers = HeaderMap::new();
|
||||
if options.cookies.len() > 0 {
|
||||
for cookie in &options.cookies {
|
||||
if !cookie.is_expired() && cookie.matches_url(url.as_str()) {
|
||||
let cookie_header_value: String = cookie.name.clone() + "=" + &cookie.value;
|
||||
headers
|
||||
.insert(COOKIE, HeaderValue::from_str(&cookie_header_value).unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
match client.get(url.as_str()).headers(headers).send() {
|
||||
Ok(response) => {
|
||||
if !options.ignore_errors && response.status() != reqwest::StatusCode::OK {
|
||||
if !options.silent {
|
||||
|
||||
@@ -196,17 +196,14 @@ mod failing {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
|
||||
|
||||
// STDERR should contain error description
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
"Unsupported document media type\n"
|
||||
);
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain HTML
|
||||
assert_eq!(String::from_utf8_lossy(&out.stdout), "");
|
||||
// STDOUT should contain text
|
||||
assert_eq!(String::from_utf8_lossy(&out.stdout), "Hello, World!\n");
|
||||
|
||||
// Exit code should be 1
|
||||
out.assert().code(1);
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -221,7 +218,7 @@ mod failing {
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain HTML with no JS in it
|
||||
// STDOUT should contain HTML without contents of local JS file
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head><script src=\"data:application/javascript;base64,\"></script></head><body></body></html>\n"
|
||||
|
||||
@@ -115,7 +115,7 @@ mod passing {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-C")
|
||||
.arg("-E")
|
||||
.arg("utf8")
|
||||
.arg(format!(
|
||||
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
|
||||
@@ -158,7 +158,7 @@ mod passing {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-C")
|
||||
.arg("-E")
|
||||
.arg("utf0")
|
||||
.arg(format!(
|
||||
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
|
||||
|
||||
68
tests/cookies/cookie/is_expired.rs
Normal file
68
tests/cookies/cookie/is_expired.rs
Normal file
@@ -0,0 +1,68 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn never_expires() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
|
||||
assert!(!cookie.is_expired());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expires_long_from_now() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 9999999999,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
|
||||
assert!(!cookie.is_expired());
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn expired() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 1,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
|
||||
assert!(cookie.is_expired());
|
||||
}
|
||||
}
|
||||
107
tests/cookies/cookie/matches_url.rs
Normal file
107
tests/cookies/cookie/matches_url.rs
Normal file
@@ -0,0 +1,107 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn secure_url() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: true,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(cookie.matches_url("https://127.0.0.1/something"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_secure_url() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(cookie.matches_url("http://127.0.0.1/something"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subdomain() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from(".somethingsomething.com"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: true,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(cookie.matches_url("https://cdn.somethingsomething.com/something"));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn empty_url() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(!cookie.matches_url(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrong_hostname() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(!cookie.matches_url("http://0.0.0.0/"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrong_path() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: false,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(!cookie.matches_url("http://0.0.0.0/path"));
|
||||
}
|
||||
}
|
||||
2
tests/cookies/cookie/mod.rs
Normal file
2
tests/cookies/cookie/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
mod is_expired;
|
||||
mod matches_url;
|
||||
2
tests/cookies/mod.rs
Normal file
2
tests/cookies/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
mod cookie;
|
||||
mod parse_cookie_file_contents;
|
||||
87
tests/cookies/parse_cookie_file_contents.rs
Normal file
87
tests/cookies/parse_cookie_file_contents.rs
Normal file
@@ -0,0 +1,87 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn parse_file() {
|
||||
let file_contents =
|
||||
"# Netscape HTTP Cookie File\n127.0.0.1\tFALSE\t/\tFALSE\t0\tUSER_TOKEN\tin";
|
||||
let result = cookies::parse_cookie_file_contents(&file_contents).unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].domain, "127.0.0.1");
|
||||
assert_eq!(result[0].include_subdomains, false);
|
||||
assert_eq!(result[0].path, "/");
|
||||
assert_eq!(result[0].https_only, false);
|
||||
assert_eq!(result[0].expires, 0);
|
||||
assert_eq!(result[0].name, "USER_TOKEN");
|
||||
assert_eq!(result[0].value, "in");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_multiline_file() {
|
||||
let file_contents = "# HTTP Cookie File\n127.0.0.1\tFALSE\t/\tFALSE\t0\tUSER_TOKEN\tin\n127.0.0.1\tTRUE\t/\tTRUE\t9\tUSER_TOKEN\tout\n\n";
|
||||
let result = cookies::parse_cookie_file_contents(&file_contents).unwrap();
|
||||
assert_eq!(result.len(), 2);
|
||||
assert_eq!(result[0].domain, "127.0.0.1");
|
||||
assert_eq!(result[0].include_subdomains, false);
|
||||
assert_eq!(result[0].path, "/");
|
||||
assert_eq!(result[0].https_only, false);
|
||||
assert_eq!(result[0].expires, 0);
|
||||
assert_eq!(result[0].name, "USER_TOKEN");
|
||||
assert_eq!(result[0].value, "in");
|
||||
assert_eq!(result[1].domain, "127.0.0.1");
|
||||
assert_eq!(result[1].include_subdomains, true);
|
||||
assert_eq!(result[1].path, "/");
|
||||
assert_eq!(result[1].https_only, true);
|
||||
assert_eq!(result[1].expires, 9);
|
||||
assert_eq!(result[1].name, "USER_TOKEN");
|
||||
assert_eq!(result[1].value, "out");
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
let file_contents = "";
|
||||
let result = cookies::parse_cookie_file_contents(&file_contents).unwrap();
|
||||
assert_eq!(result.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_header() {
|
||||
let file_contents = "127.0.0.1 FALSE / FALSE 0 USER_TOKEN in";
|
||||
match cookies::parse_cookie_file_contents(&file_contents) {
|
||||
Ok(_result) => {
|
||||
assert!(false);
|
||||
}
|
||||
Err(_e) => {
|
||||
assert!(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spaces_instead_of_tabs() {
|
||||
let file_contents =
|
||||
"# HTTP Cookie File\n127.0.0.1 FALSE / FALSE 0 USER_TOKEN in";
|
||||
let result = cookies::parse_cookie_file_contents(&file_contents).unwrap();
|
||||
assert_eq!(result.len(), 0);
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
mod cli;
|
||||
mod cookies;
|
||||
mod css;
|
||||
mod html;
|
||||
mod js;
|
||||
|
||||
@@ -16,7 +16,8 @@ mod passing {
|
||||
assert_eq!(options.no_audio, false);
|
||||
assert_eq!(options.base_url, None);
|
||||
assert_eq!(options.no_css, false);
|
||||
assert_eq!(options.charset, None);
|
||||
assert_eq!(options.cookie_file, None);
|
||||
assert_eq!(options.encoding, None);
|
||||
assert_eq!(options.no_frames, false);
|
||||
assert_eq!(options.no_fonts, false);
|
||||
assert_eq!(options.no_images, false);
|
||||
|
||||
154
tests/utils/domain_is_within_domain.rs
Normal file
154
tests/utils/domain_is_within_domain.rs
Normal file
@@ -0,0 +1,154 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use monolith::utils;
|
||||
|
||||
#[test]
|
||||
fn sub_domain_is_within_dotted_sub_domain() {
|
||||
assert!(utils::domain_is_within_domain(
|
||||
"news.ycombinator.com",
|
||||
".news.ycombinator.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn domain_is_within_dotted_domain() {
|
||||
assert!(utils::domain_is_within_domain(
|
||||
"ycombinator.com",
|
||||
".ycombinator.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sub_domain_is_within_dotted_domain() {
|
||||
assert!(utils::domain_is_within_domain(
|
||||
"news.ycombinator.com",
|
||||
".ycombinator.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sub_domain_is_within_dotted_top_level_domain() {
|
||||
assert!(utils::domain_is_within_domain(
|
||||
"news.ycombinator.com",
|
||||
".com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn domain_is_within_itself() {
|
||||
assert!(utils::domain_is_within_domain(
|
||||
"ycombinator.com",
|
||||
"ycombinator.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn domain_with_trailing_dot_is_within_itself() {
|
||||
assert!(utils::domain_is_within_domain(
|
||||
"ycombinator.com.",
|
||||
"ycombinator.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn domain_with_trailing_dot_is_within_single_dot() {
|
||||
assert!(utils::domain_is_within_domain("ycombinator.com.", "."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn domain_matches_single_dot() {
|
||||
assert!(utils::domain_is_within_domain("ycombinator.com", "."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dotted_domain_must_be_within_dotted_domain() {
|
||||
assert!(utils::domain_is_within_domain(
|
||||
".ycombinator.com",
|
||||
".ycombinator.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_is_within_dot() {
|
||||
assert!(utils::domain_is_within_domain("", "."));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn both_dots() {
|
||||
assert!(utils::domain_is_within_domain(".", "."));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use monolith::utils;
|
||||
|
||||
#[test]
|
||||
fn sub_domain_must_not_be_within_domain() {
|
||||
assert!(!utils::domain_is_within_domain(
|
||||
"news.ycombinator.com",
|
||||
"ycombinator.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn domain_must_not_be_within_top_level_domain() {
|
||||
assert!(!utils::domain_is_within_domain("ycombinator.com", "com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn different_domains_must_not_be_within_one_another() {
|
||||
assert!(!utils::domain_is_within_domain(
|
||||
"news.ycombinator.com",
|
||||
"kernel.org"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sub_domain_is_not_within_wrong_top_level_domain() {
|
||||
assert!(!utils::domain_is_within_domain(
|
||||
"news.ycombinator.com",
|
||||
"org"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dotted_domain_is_not_within_domain() {
|
||||
assert!(!utils::domain_is_within_domain(
|
||||
".ycombinator.com",
|
||||
"ycombinator.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn different_domain_is_not_within_dotted_domain() {
|
||||
assert!(!utils::domain_is_within_domain(
|
||||
"www.doodleoptimize.com",
|
||||
".ycombinator.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_domain_can_be_within_empty_domain() {
|
||||
assert!(!utils::domain_is_within_domain("ycombinator.com", ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn both_can_not_be_empty() {
|
||||
assert!(!utils::domain_is_within_domain("", ""));
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
mod detect_media_type;
|
||||
mod domain_is_within_domain;
|
||||
mod indent;
|
||||
mod parse_content_type;
|
||||
mod retrieve_asset;
|
||||
|
||||
Reference in New Issue
Block a user