33 Commits

Author SHA1 Message Date
Sunshine
95040173fc Merge pull request #233 from snshn/cargo-fix-license-identifier
Fix license identifier in Cargo.toml
2020-12-25 22:42:31 -10:00
Sunshine
b10d41f82e fix license identifier in Cargo.toml 2020-12-25 22:41:31 -10:00
Sunshine
4c2c55d166 Merge pull request #232 from snshn/fix-cargo-toml
Reduce amount of keywords to 5 (max)
2020-12-25 22:34:08 -10:00
Sunshine
2dd1c465e4 reduce amount of keywords to 5 (max) 2020-12-25 22:28:19 -10:00
Sunshine
a5afda9c80 Merge pull request #229 from snshn/cargo-install
Update Cargo.toml for publishing on crates.io
2020-12-25 22:15:13 -10:00
Sunshine
ab6fed6d1f Merge pull request #231 from snshn/fix-srcset
Fix srcset parsing
2020-12-25 22:07:45 -10:00
Sunshine
f8dcb335e7 bump version, make possible to install via cargo 2020-12-25 22:07:19 -10:00
Sunshine
913051870a Merge pull request #230 from snshn/stdin
Make possible to use stdin as input method
2020-12-25 21:58:14 -10:00
Sunshine
614a518475 fix srcset parsing 2020-12-25 21:56:40 -10:00
Sunshine
870a4b150e make possible to use stdin as input method 2020-12-25 21:23:29 -10:00
Sunshine
0533b287b7 Merge pull request #228 from snshn/audio-video-support
Add support for embedding video and audio files
2020-12-25 16:55:42 -10:00
Sunshine
4ba4285b6b add support for embedding video and audio files 2020-12-25 16:49:43 -10:00
Sunshine
2b9caf9840 Merge pull request #227 from snshn/fix-trailing-comma-for-srcset-parsing
Fix crash associated with trailing/repeating commas within srcset
2020-12-25 14:33:20 -10:00
Sunshine
8adf059980 fix crash associated with trailing/repeating commas within srcset 2020-12-25 14:24:52 -10:00
Sunshine
8ad252868e Merge pull request #226 from snshn/base-tag-option
Add base URL option
2020-12-25 13:02:05 -10:00
Sunshine
e145df372f Merge branch 'master' into base-tag-option 2020-12-25 12:10:54 -10:00
Sunshine
816b6175ac rewrite ADR #8 (Base Tag) 2020-12-25 12:06:56 -10:00
Sunshine
d89b4d5f5b refactor code that processes the DOM 2020-12-25 11:09:47 -10:00
Sunshine
15d98a7269 don't modify base url by default, add option for setting it 2020-12-24 18:38:44 -10:00
Sunshine
36e82cb511 Update README.md 2020-12-13 08:42:41 -10:00
Sunshine
1b1befd7b0 Merge pull request #222 from snshn/readme-no-metadata-info
Add description of -M option to README.md
2020-12-09 21:29:33 -05:00
Sunshine
a2f59b4418 Update README.md 2020-12-09 16:23:17 -10:00
Sunshine
124a62920f Merge pull request #221 from snshn/related-project-hako
Add Hako
2020-12-09 07:54:52 -05:00
Sunshine
f557504bed Update README.md
Add Hako to the list of related projects
2020-12-08 17:02:09 -10:00
Sunshine
5ac520b4da Merge pull request #219 from snshn/ignore-network-errors-option
Account for network errors
2020-11-22 17:54:30 -10:00
Sunshine
7a97291498 add ADR 7 (Network errors) 2020-11-22 17:20:37 -10:00
Sunshine
38a6f963ad account for network errors, add option to ignore them 2020-11-22 16:49:26 -10:00
Sunshine
052f8f49ec Merge pull request #218 from snshn/update-crates
Update crates
2020-11-20 00:36:33 -10:00
Sunshine
08de486382 use newer dependencies 2020-11-20 00:30:05 -10:00
Sunshine
c0e0a69773 Merge pull request #214 from zfhrp6/remove_use_unused_opts
remove unused import opts::Options;
2020-11-04 01:04:38 -08:00
Sunshine
1636540693 Merge pull request #216 from zfhrp6/suppress_deprecation_of_dependency
update clap 2.33.1 -> 2.33.3
2020-11-02 19:03:43 -08:00
zfhrp
3e80cb02ce update clap 2.33.1 -> 2.33.3 2020-11-02 00:11:41 +09:00
zfhrp
a296531b3f remove unused import opts::Options; 2020-11-01 23:04:08 +09:00
36 changed files with 1988 additions and 1192 deletions

213
Cargo.lock generated
View File

@@ -5,6 +5,14 @@ name = "adler"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "aho-corasick"
version = "0.7.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ansi_term"
version = "0.11.0"
@@ -59,7 +67,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "base64"
version = "0.12.3"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
@@ -95,19 +103,26 @@ name = "cfg-if"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "chrono"
version = "0.4.13"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.74 (registry+https://github.com/rust-lang/crates.io-index)",
"num-integer 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.2.12 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "clap"
version = "2.33.1"
version = "2.33.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -127,6 +142,15 @@ dependencies = [
"bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "console_error_panic_hook"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "core-foundation"
version = "0.7.0"
@@ -247,6 +271,15 @@ name = "foreign-types-shared"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "form_urlencoded"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
"percent-encoding 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "fuchsia-cprng"
version = "0.1.1"
@@ -484,10 +517,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "js-sys"
version = "0.3.44"
version = "0.3.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"wasm-bindgen 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@@ -601,18 +634,19 @@ dependencies = [
[[package]]
name = "monolith"
version = "2.3.1"
version = "2.4.0"
dependencies = [
"assert_cmd 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"base64 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)",
"base64 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono 0.4.19 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.33.3 (registry+https://github.com/rust-lang/crates.io-index)",
"cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)",
"html5ever 0.24.1 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.10.7 (registry+https://github.com/rust-lang/crates.io-index)",
"sha2 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.10.9 (registry+https://github.com/rust-lang/crates.io-index)",
"sha2 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)",
"tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"url 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@@ -815,6 +849,11 @@ name = "pin-project-lite"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "pin-project-lite"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "pin-utils"
version = "0.1.0"
@@ -1034,6 +1073,22 @@ name = "redox_syscall"
version = "0.1.57"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "regex"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.21 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-syntax"
version = "0.6.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "remove_dir_all"
version = "0.5.3"
@@ -1044,11 +1099,11 @@ dependencies = [
[[package]]
name = "reqwest"
version = "0.10.7"
version = "0.10.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"async-compression 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"base64 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)",
"base64 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
"bytes 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.23 (registry+https://github.com/rust-lang/crates.io-index)",
"futures-core 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -1058,21 +1113,22 @@ dependencies = [
"hyper 0.13.7 (registry+https://github.com/rust-lang/crates.io-index)",
"hyper-tls 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
"ipnet 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"js-sys 0.3.44 (registry+https://github.com/rust-lang/crates.io-index)",
"js-sys 0.3.45 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.11 (registry+https://github.com/rust-lang/crates.io-index)",
"mime 0.3.16 (registry+https://github.com/rust-lang/crates.io-index)",
"mime_guess 2.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"native-tls 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)",
"percent-encoding 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"pin-project-lite 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
"pin-project-lite 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.114 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_urlencoded 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_urlencoded 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"tokio 0.2.22 (registry+https://github.com/rust-lang/crates.io-index)",
"tokio-tls 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen-futures 0.4.17 (registry+https://github.com/rust-lang/crates.io-index)",
"url 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen-futures 0.4.18 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen-test 0.3.18 (registry+https://github.com/rust-lang/crates.io-index)",
"web-sys 0.3.44 (registry+https://github.com/rust-lang/crates.io-index)",
"winreg 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
@@ -1091,6 +1147,11 @@ dependencies = [
"winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "scoped-tls"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "security-framework"
version = "0.4.4"
@@ -1139,22 +1200,22 @@ dependencies = [
[[package]]
name = "serde_urlencoded"
version = "0.6.1"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"dtoa 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"form_urlencoded 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"itoa 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"ryu 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.114 (registry+https://github.com/rust-lang/crates.io-index)",
"url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "sha2"
version = "0.9.1"
version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"block-buffer 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
"cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
"cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"cpuid-bool 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"digest 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
"opaque-debug 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -1268,6 +1329,14 @@ dependencies = [
"unicode-width 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "thread_local"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "time"
version = "0.1.43"
@@ -1395,9 +1464,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "url"
version = "2.1.1"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"form_urlencoded 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"idna 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
"percent-encoding 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -1447,18 +1517,18 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "wasm-bindgen"
version = "0.2.67"
version = "0.2.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.114 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.57 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen-macro 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen-macro 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "wasm-bindgen-backend"
version = "0.2.67"
version = "0.2.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"bumpalo 3.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -1467,53 +1537,75 @@ dependencies = [
"proc-macro2 1.0.19 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 1.0.36 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen-shared 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen-shared 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "wasm-bindgen-futures"
version = "0.4.17"
version = "0.4.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
"js-sys 0.3.44 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)",
"js-sys 0.3.45 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)",
"web-sys 0.3.44 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "wasm-bindgen-macro"
version = "0.2.67"
version = "0.2.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"quote 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen-macro-support 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen-macro-support 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "wasm-bindgen-macro-support"
version = "0.2.67"
version = "0.2.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 1.0.19 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 1.0.36 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen-backend 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen-shared 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen-backend 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen-shared 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "wasm-bindgen-shared"
version = "0.2.67"
version = "0.2.68"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "wasm-bindgen-test"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"console_error_panic_hook 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"js-sys 0.3.45 (registry+https://github.com/rust-lang/crates.io-index)",
"scoped-tls 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen-futures 0.4.18 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen-test-macro 0.3.18 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "wasm-bindgen-test-macro"
version = "0.3.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"proc-macro2 1.0.19 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "web-sys"
version = "0.3.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"js-sys 0.3.44 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)",
"js-sys 0.3.45 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@@ -1564,22 +1656,25 @@ dependencies = [
[metadata]
"checksum adler 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ee2a4ec343196209d6594e19543ae87a39f96d5534d7174822a3ad825dd6ed7e"
"checksum aho-corasick 0.7.15 (registry+https://github.com/rust-lang/crates.io-index)" = "7404febffaa47dac81aa44dba71523c9d069b1bdc50a77db41195149e17f68e5"
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
"checksum assert_cmd 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c88b9ca26f9c16ec830350d309397e74ee9abdfd8eb1f71cb6ecc71a3fc818da"
"checksum async-compression 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "9021768bcce77296b64648cc7a7460e3df99979b97ed5c925c38d1cc83778d98"
"checksum atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)" = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
"checksum autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2"
"checksum autocfg 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f8aac770f1885fd7e387acedd76065302551364496e46b3dd00860b2f8359b9d"
"checksum base64 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3441f0f7b02788e948e47f457ca01f1d7e6d92c693bc132c22b087d3141c03ff"
"checksum base64 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)" = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
"checksum block-buffer 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4152116fd6e9dadb291ae18fc1ec3575ed6d84c29642d97890f4b4a3417297e4"
"checksum bumpalo 3.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2e8c087f005730276d1096a652e92a8bacee2e2472bcc9715a74d2bec38b5820"
"checksum bytes 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0e4cec68f03f32e44924783795810fa50a7035d8c8ebe78580ad7e6c703fba38"
"checksum cc 1.0.58 (registry+https://github.com/rust-lang/crates.io-index)" = "f9a06fb2e53271d7c279ec1efea6ab691c35a2ae67ec0d91d7acec0caf13b518"
"checksum cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
"checksum chrono 0.4.13 (registry+https://github.com/rust-lang/crates.io-index)" = "c74d84029116787153e02106bf53e66828452a4b325cc8652b788b5967c0a0b6"
"checksum clap 2.33.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bdfa80d47f954d53a35a64987ca1422f495b8d6483c0fe9f7117b36c2a792129"
"checksum cfg-if 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
"checksum chrono 0.4.19 (registry+https://github.com/rust-lang/crates.io-index)" = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
"checksum clap 2.33.3 (registry+https://github.com/rust-lang/crates.io-index)" = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
"checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
"checksum console_error_panic_hook 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "b8d976903543e0c48546a91908f21588a680a8c8f984df9a5d69feccb2b2a211"
"checksum core-foundation 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "57d24c7a13c43e870e37c1556b74555437870a04514f7685f5b354e090567171"
"checksum core-foundation-sys 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b3a71ab494c0b5b860bdc8407ae08978052417070c2ced38573a9157ad75b8ac"
"checksum cpuid-bool 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8aebca1129a03dc6dc2b127edd729435bbc4a37e1d5f4d7513165089ceb02634"
@@ -1596,6 +1691,7 @@ dependencies = [
"checksum fnv 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
"checksum foreign-types 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
"checksum foreign-types-shared 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b"
"checksum form_urlencoded 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ece68d15c92e84fa4f19d3780f1294e5ca82a78a6d515f1efaabcc144688be00"
"checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba"
"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
@@ -1622,7 +1718,7 @@ dependencies = [
"checksum iovec 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "b2b3ea6ff95e175473f8ffe6a7eb7c00d054240321b84c57051175fe3c1e075e"
"checksum ipnet 2.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "47be2f14c678be2fdcab04ab1171db51b2762ce6f0a8ee87c8dd4a04ed216135"
"checksum itoa 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6"
"checksum js-sys 0.3.44 (registry+https://github.com/rust-lang/crates.io-index)" = "85a7e2c92a4804dd459b86c339278d0fe87cf93757fae222c3fa3ae75458bc73"
"checksum js-sys 0.3.45 (registry+https://github.com/rust-lang/crates.io-index)" = "ca059e81d9486668f12d455a4ea6daa600bd408134cd17e3d3fb5a32d1f016f8"
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
"checksum libc 0.2.74 (registry+https://github.com/rust-lang/crates.io-index)" = "a2f02823cf78b754822df5f7f268fb59822e7296276d3e069d8e8cb26a14bd10"
@@ -1659,6 +1755,7 @@ dependencies = [
"checksum pin-project 0.4.23 (registry+https://github.com/rust-lang/crates.io-index)" = "ca4433fff2ae79342e497d9f8ee990d174071408f28f726d6d83af93e58e48aa"
"checksum pin-project-internal 0.4.23 (registry+https://github.com/rust-lang/crates.io-index)" = "2c0e815c3ee9a031fdf5af21c10aa17c573c9c6a566328d99e3936c34e36461f"
"checksum pin-project-lite 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "282adbf10f2698a7a77f8e983a74b2d18176c19a7fd32a45446139ae7b02b715"
"checksum pin-project-lite 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6b063f57ec186e6140e2b8b6921e5f1bd89c7356dda5b33acc5401203ca6131c"
"checksum pin-utils 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
"checksum pkg-config 0.3.18 (registry+https://github.com/rust-lang/crates.io-index)" = "d36492546b6af1463394d46f0c834346f31548646f6ba10849802c9c9a27ac33"
"checksum ppv-lite86 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "237a5ed80e274dbc66f86bd59c1e25edc039660be53194b5fe0a482e0f2612ea"
@@ -1686,17 +1783,20 @@ dependencies = [
"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
"checksum redox_syscall 0.1.57 (registry+https://github.com/rust-lang/crates.io-index)" = "41cc0f7e4d5d4544e8861606a285bb08d3e70712ccc7d2b84d7c0ccfaf4b05ce"
"checksum regex 1.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c"
"checksum regex-syntax 0.6.21 (registry+https://github.com/rust-lang/crates.io-index)" = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189"
"checksum remove_dir_all 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
"checksum reqwest 0.10.7 (registry+https://github.com/rust-lang/crates.io-index)" = "12427a5577082c24419c9c417db35cfeb65962efc7675bb6b0d5f1f9d315bfe6"
"checksum reqwest 0.10.9 (registry+https://github.com/rust-lang/crates.io-index)" = "fb15d6255c792356a0f578d8a645c677904dc02e862bebe2ecc18e0c01b9a0ce"
"checksum ryu 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
"checksum schannel 0.1.19 (registry+https://github.com/rust-lang/crates.io-index)" = "8f05ba609c234e60bee0d547fe94a4c7e9da733d1c962cf6e59efa4cd9c8bc75"
"checksum scoped-tls 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2"
"checksum security-framework 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "64808902d7d99f78eaddd2b4e2509713babc3dc3c85ad6f4c447680f3c01e535"
"checksum security-framework-sys 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "17bf11d99252f512695eb468de5516e5cf75455521e69dfe343f3b74e4748405"
"checksum serde 1.0.114 (registry+https://github.com/rust-lang/crates.io-index)" = "5317f7588f0a5078ee60ef675ef96735a1442132dc645eb1d12c018620ed8cd3"
"checksum serde_derive 1.0.114 (registry+https://github.com/rust-lang/crates.io-index)" = "2a0be94b04690fbaed37cddffc5c134bf537c8e3329d53e982fe04c374978f8e"
"checksum serde_json 1.0.57 (registry+https://github.com/rust-lang/crates.io-index)" = "164eacbdb13512ec2745fb09d51fd5b22b0d65ed294a1dcf7285a360c80a675c"
"checksum serde_urlencoded 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9ec5d77e2d4c73717816afac02670d5c4f534ea95ed430442cad02e7a6e32c97"
"checksum sha2 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2933378ddfeda7ea26f48c555bdad8bb446bf8a3d17832dc83e380d444cfb8c1"
"checksum serde_urlencoded 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "edfa57a7f8d9c1d260a549e7224100f6c43d43f9103e06dd8b4095a9b2b43ce9"
"checksum sha2 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6e7aab86fe2149bad8c507606bdb3f4ef5e7b2380eb92350f56122cca72a42a8"
"checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
"checksum siphasher 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "fa8f3741c7372e75519bd9346068370c9cdaabcc1f9599cbcf2a2719352286b7"
"checksum slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8"
@@ -1710,6 +1810,7 @@ dependencies = [
"checksum tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9"
"checksum tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b"
"checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
"checksum thread_local 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
"checksum time 0.1.43 (registry+https://github.com/rust-lang/crates.io-index)" = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438"
"checksum tinyvec 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "53953d2d3a5ad81d9f844a32f14ebb121f50b650cd59d0ee2a07cf13c617efed"
"checksum tokio 0.2.22 (registry+https://github.com/rust-lang/crates.io-index)" = "5d34ca54d84bf2b5b4d7d31e901a8464f7b60ac145a284fba25ceb801f2ddccd"
@@ -1726,7 +1827,7 @@ dependencies = [
"checksum unicode-normalization 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6fb19cf769fa8c6a80a162df694621ebeb4dafb606470b2b2fce0be40a98a977"
"checksum unicode-width 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
"checksum unicode-xid 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
"checksum url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "829d4a8476c35c9bf0bbce5a3b23f4106f79728039b726d292bb93bc106787cb"
"checksum url 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5909f2b0817350449ed73e8bcd81c8c3c8d9a7a5d8acba4b27db277f1868976e"
"checksum utf-8 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7"
"checksum vcpkg 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "6454029bf181f092ad1b853286f23e2c507d8e8194d01d92da4a55c274a5508c"
"checksum vec_map 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
@@ -1734,12 +1835,14 @@ dependencies = [
"checksum wait-timeout 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9f200f5b12eb75f8c1ed65abd4b2db8a6e1b138a20de009dacee265a2498f3f6"
"checksum want 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1ce8a968cb1cd110d136ff8b819a556d6fb6d919363c61534f6860c7eb172ba0"
"checksum wasi 0.9.0+wasi-snapshot-preview1 (registry+https://github.com/rust-lang/crates.io-index)" = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519"
"checksum wasm-bindgen 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)" = "f0563a9a4b071746dd5aedbc3a28c6fe9be4586fb3fbadb67c400d4f53c6b16c"
"checksum wasm-bindgen-backend 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)" = "bc71e4c5efa60fb9e74160e89b93353bc24059999c0ae0fb03affc39770310b0"
"checksum wasm-bindgen-futures 0.4.17 (registry+https://github.com/rust-lang/crates.io-index)" = "95f8d235a77f880bcef268d379810ea6c0af2eacfa90b1ad5af731776e0c4699"
"checksum wasm-bindgen-macro 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)" = "97c57cefa5fa80e2ba15641578b44d36e7a64279bc5ed43c6dbaf329457a2ed2"
"checksum wasm-bindgen-macro-support 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)" = "841a6d1c35c6f596ccea1f82504a192a60378f64b3bb0261904ad8f2f5657556"
"checksum wasm-bindgen-shared 0.2.67 (registry+https://github.com/rust-lang/crates.io-index)" = "93b162580e34310e5931c4b792560108b10fd14d64915d7fff8ff00180e70092"
"checksum wasm-bindgen 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)" = "1ac64ead5ea5f05873d7c12b545865ca2b8d28adfc50a49b84770a3a97265d42"
"checksum wasm-bindgen-backend 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)" = "f22b422e2a757c35a73774860af8e112bff612ce6cb604224e8e47641a9e4f68"
"checksum wasm-bindgen-futures 0.4.18 (registry+https://github.com/rust-lang/crates.io-index)" = "b7866cab0aa01de1edf8b5d7936938a7e397ee50ce24119aef3e1eaa3b6171da"
"checksum wasm-bindgen-macro 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)" = "6b13312a745c08c469f0b292dd2fcd6411dba5f7160f593da6ef69b64e407038"
"checksum wasm-bindgen-macro-support 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)" = "f249f06ef7ee334cc3b8ff031bfc11ec99d00f34d86da7498396dc1e3b1498fe"
"checksum wasm-bindgen-shared 0.2.68 (registry+https://github.com/rust-lang/crates.io-index)" = "1d649a3145108d7d3fbcde896a468d1bd636791823c9921135218ad89be08307"
"checksum wasm-bindgen-test 0.3.18 (registry+https://github.com/rust-lang/crates.io-index)" = "34d1cdc8b98a557f24733d50a1199c4b0635e465eecba9c45b214544da197f64"
"checksum wasm-bindgen-test-macro 0.3.18 (registry+https://github.com/rust-lang/crates.io-index)" = "e8fb9c67be7439ee8ab1b7db502a49c05e51e2835b66796c705134d9b8e1a585"
"checksum web-sys 0.3.44 (registry+https://github.com/rust-lang/crates.io-index)" = "dda38f4e5ca63eda02c059d243aa25b5f35ab98451e518c51612cd0f1bd19a47"
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
"checksum winapi 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"

View File

@@ -1,7 +1,6 @@
[package]
name = "monolith"
version = "2.3.1"
edition = "2018"
version = "2.4.0"
authors = [
"Sunshine <sunshine@uberspace.net>",
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
@@ -9,19 +8,32 @@ authors = [
"Emi Simpson <emi@alchemi.dev>",
"rhysd <lin90162@yahoo.co.jp>",
]
edition = "2018"
description = "CLI tool for saving web pages as a single HTML file"
homepage = "https://github.com/Y2Z/monolith"
repository = "https://github.com/Y2Z/monolith"
readme = "README.md"
keywords = ["web", "http", "html", "download", "command-line"]
categories = ["command-line-utilities", "web-programming"]
include = [
"src/*.rs",
"Cargo.toml",
]
license = "Unlicense"
license-file = "LICENSE"
[dependencies]
base64 = "0.12.3"
chrono = "0.4.13" # Used to render comments indicating the time the page was saved
clap = "2.33.1"
base64 = "0.13.0"
chrono = "0.4.19" # Used for formatting creation timestamp
clap = "2.33.3"
cssparser = "0.27.2"
html5ever = "0.24.1"
sha2 = "0.9.1" # Used in calculating checksums during integrity checks
url = "2.1.1"
regex = "1.4.2" # Used for parsing srcset
sha2 = "0.9.2" # Used for calculating checksums during integrity checks
url = "2.2.0"
[dependencies.reqwest]
version = "0.10.6"
version = "0.10.9"
default-features = false
features = ["default-tls", "blocking", "gzip"]

View File

@@ -22,6 +22,9 @@ If compared to saving websites with `wget -mpk`, this tool embeds all assets as
## Installation
### Using Cargo
$ cargo install monolith
#### Via Homebrew (on macOS and GNU/Linux)
$ brew install monolith
@@ -54,21 +57,26 @@ The guide can be found [here](docs/containers.md)
---------------------------------------------------
## Options
- `-c`: Ignore styles
- `-f`: Exclude frames
- `-F`: Omit web fonts
- `-a`: Exclude audio sources
- `-b`: Use custom base URL
- `-c`: Exclude CSS
- `-e`: Ignore network errors
- `-f`: Omit frames
- `-F`: Exclude web fonts
- `-i`: Remove images
- `-I`: Isolate the document
- `-j`: Exclude JavaScript
- `-k`: Accept invalid X.509 (TLS) certificates
- `-M`: Don't add timestamp and URL information
- `-o`: Write output to file
- `-s`: Be quiet
- `-t`: Adjust network request timeout
- `-u`: Provide custom User-Agent
- `-v`: Exclude videos
---------------------------------------------------
## HTTPS and HTTP proxies
## Proxies
Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables.
---------------------------------------------------
@@ -82,6 +90,7 @@ Please open an issue if something is wrong, that helps make this project better.
- `Monolith Chrome Extension`: https://github.com/rhysd/monolith-of-web
- `Pagesaver`: https://github.com/distributed-mind/pagesaver
- `Personal WayBack Machine`: https://github.com/popey/pwbm
- `Hako`: https://github.com/dmpop/hako
---------------------------------------------------

View File

@@ -1,4 +1,4 @@
# 2. Network request timeout
# 3. Network request timeout
Date: 2020-02-15

View File

@@ -1,4 +1,4 @@
# 4. Asset Minimization
# 5. Asset Minimization
Date: 2020-03-14

View File

@@ -1,4 +1,4 @@
# 4. Reload and location `meta` tags
# 6. Reload and location `meta` tags
Date: 2020-06-25

View File

@@ -0,0 +1,19 @@
# 7. Network errors
Date: 2020-11-22
## Status
Accepted
## Context
Servers may return information with HTTP response codes other than `200`, however those responses may still contain useful data.
## Decision
Fail by default, notifying of the network error. Add option to continue retrieving assets by treating all response codes as `200`.
## Consequences
Monolith will fail to obtain resources with status other than `200`, unless told to ignore network errors.

View File

@@ -0,0 +1,40 @@
# 8. Base Tag
Date: 2020-12-25
## Status
Accepted
## Context
HTML documents may contain `base` tag, which influences resolution of anchor links and relative URLs as well as dynamically loaded resources.
Sometimes, in order to make certain saved documents function closer to how they operate while being served from a remote server, the `base` tag specifying the source page's URL may need to be added to the document.
There can be only one such tag. If multiple `base` tags are present, only the first encountered tag ends up being used.
## Decision
Adding the `base` tag should be optional — saved documents should not contain the `base` tag unless it was specified by the user, or the document originally had the `base` tag in it.
Existing `href` attribute's value of the original `base` tag should be used for resolving the document's relative links instead of document's own URL (precisely the way browsers do it).
## Consequences
#### If the base tag does not exist in the source document
- If the base tag does not exist in the source document
- With base URL option provided
- use the specified base URL value to retrieve assets, keep original base URL value in the document
- Without base URL option provided
- download document as usual, do not add base tag
- If the base tag already exists in the source document
- With base URL option provided
- we overwrite the original base URL before retrieving assets, keep new base URL value in the document
- Without base URL option provided:
- use the base URL from the original document to retrieve assets, keep original base URL value in the document
The program will obtain ability to retrieve remote assets for non-remote sources (such as data URLs and local files).
The program will obatin ability to get rid of existing base tag values (by provind an empty one).

View File

@@ -176,7 +176,7 @@ pub fn process_css<'a>(
client,
&parent_url,
&import_full_url,
options.silent,
options,
depth + 1,
) {
Ok((import_contents, import_final_url, _import_media_type)) => {
@@ -227,7 +227,7 @@ pub fn process_css<'a>(
client,
&parent_url,
&resolved_url,
options.silent,
options,
depth + 1,
) {
Ok((data, final_url, media_type)) => {
@@ -315,14 +315,8 @@ pub fn process_css<'a>(
if is_import {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
match retrieve_asset(
cache,
client,
&parent_url,
&full_url,
options.silent,
depth + 1,
) {
match retrieve_asset(cache, client, &parent_url, &full_url, options, depth + 1)
{
Ok((css, final_url, _media_type)) => {
let data_url = data_to_data_url(
"text/css",
@@ -361,7 +355,7 @@ pub fn process_css<'a>(
client,
&parent_url,
&full_url,
options.silent,
options,
depth + 1,
) {
Ok((data, final_url, media_type)) => {

File diff suppressed because it is too large Load Diff

View File

@@ -3,18 +3,18 @@ use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use std::collections::HashMap;
use std::env;
use std::fs;
use std::io::{self, Error, Write};
use std::io::{self, prelude::*, Error, Write};
use std::path::Path;
use std::process;
use std::time::Duration;
use monolith::html::{
add_base_tag, add_favicon, has_base_tag, has_favicon, html_to_dom, metadata_tag,
add_favicon, create_metadata_tag, get_base_url, has_favicon, html_to_dom, set_base_url,
stringify_document, walk_and_embed_assets,
};
use monolith::opts::Options;
use monolith::url::{
data_to_data_url, data_url_to_data, is_data_url, is_file_url, is_http_url, resolve_url,
data_to_data_url, is_data_url, is_file_url, is_http_url, parse_data_url, resolve_url,
};
use monolith::utils::retrieve_asset;
@@ -48,12 +48,22 @@ impl Output {
}
}
pub fn read_stdin() -> String {
let mut buffer = String::new();
for line in io::stdin().lock().lines() {
buffer += line.unwrap_or_default().as_str();
buffer += "\n";
}
buffer
}
fn main() {
let options = Options::from_args();
let original_target: &str = &options.target;
let target_url: &str;
let base_url;
let mut base_url: String;
let mut dom;
let mut use_stdin: bool = false;
// Pre-process the input
let cwd_normalized: String =
@@ -64,15 +74,24 @@ fn main() {
// Determine exact target URL
if target.clone().len() == 0 {
eprintln!("No target specified");
if !options.silent {
eprintln!("No target specified");
}
process::exit(1);
} else if target.clone() == "-" {
// Read from pipe (stdin)
use_stdin = true;
// Default target URL to empty data URL; the user can control it via --base-url
target_url = "data:text/html,"
} else if is_http_url(target.clone()) || is_data_url(target.clone()) {
target_url = target.as_str();
} else if is_file_url(target.clone()) {
target_url = target.as_str();
} else if path.exists() {
if !path.is_file() {
eprintln!("Local target is not a file: {}", original_target);
if !options.silent {
eprintln!("Local target is not a file: {}", original_target);
}
process::exit(1);
}
target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" });
@@ -111,43 +130,62 @@ fn main() {
.build()
.expect("Failed to initialize HTTP client");
// At this stage we assume that the base URL is the same as the target URL
base_url = str!(target_url);
// Retrieve target document
if is_file_url(target_url) || is_http_url(target_url) {
match retrieve_asset(
&mut cache,
&client,
target_url,
target_url,
options.silent,
0,
) {
if use_stdin {
dom = html_to_dom(&read_stdin());
} else if is_file_url(target_url) || is_http_url(target_url) {
match retrieve_asset(&mut cache, &client, target_url, target_url, &options, 0) {
Ok((data, final_url, _media_type)) => {
base_url = final_url;
if options.base_url.clone().unwrap_or(str!()).is_empty() {
base_url = final_url
}
dom = html_to_dom(&String::from_utf8_lossy(&data));
}
Err(_) => {
eprintln!("Could not retrieve target document");
if !options.silent {
eprintln!("Could not retrieve target document");
}
process::exit(1);
}
}
} else if is_data_url(target_url) {
let (media_type, data): (String, Vec<u8>) = data_url_to_data(target_url);
let (media_type, data): (String, Vec<u8>) = parse_data_url(target_url);
if !media_type.eq_ignore_ascii_case("text/html") {
eprintln!("Unsupported data URL media type");
if !options.silent {
eprintln!("Unsupported data URL media type");
}
process::exit(1);
}
base_url = str!(target_url);
dom = html_to_dom(&String::from_utf8_lossy(&data));
} else {
process::exit(1);
}
// Use custom base URL if specified, read and use what's in the DOM otherwise
if !options.base_url.clone().unwrap_or(str!()).is_empty() {
if is_data_url(options.base_url.clone().unwrap()) {
if !options.silent {
eprintln!("Data URLs cannot be used as base URL");
}
process::exit(1);
} else {
base_url = options.base_url.clone().unwrap();
}
} else {
if let Some(existing_base_url) = get_base_url(&dom.document) {
base_url = resolve_url(target_url, existing_base_url).unwrap();
}
}
// Embed remote assets
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
// Take care of BASE tag
if is_http_url(base_url.clone()) && !has_base_tag(&dom.document) {
dom = add_base_tag(&dom.document, base_url.clone());
// Update or add new BASE tag to reroute network requests and hash-links in the final document
if let Some(new_base_url) = options.base_url.clone() {
dom = set_base_url(&dom.document, new_base_url);
}
// Request and embed /favicon.ico (unless it's already linked in the document)
@@ -159,7 +197,7 @@ fn main() {
&client,
&base_url,
&favicon_ico_url,
options.silent,
&options,
0,
) {
Ok((data, final_url, media_type)) => {
@@ -177,7 +215,7 @@ fn main() {
// Add metadata tag
if !options.no_metadata {
let metadata_comment: String = metadata_tag(&base_url);
let metadata_comment: String = create_metadata_tag(&target_url);
result.insert_str(0, &metadata_comment);
if metadata_comment.len() > 0 {
result.insert_str(metadata_comment.len(), "\n");

View File

@@ -2,19 +2,23 @@ use clap::{App, Arg};
#[derive(Default)]
pub struct Options {
pub target: String,
pub no_audio: bool,
pub base_url: Option<String>,
pub no_css: bool,
pub no_fonts: bool,
pub ignore_errors: bool,
pub no_frames: bool,
pub no_fonts: bool,
pub no_images: bool,
pub isolate: bool,
pub no_js: bool,
pub insecure: bool,
pub isolate: bool,
pub no_metadata: bool,
pub output: String,
pub silent: bool,
pub timeout: u64,
pub user_agent: String,
pub no_metadata: bool,
pub no_video: bool,
pub target: String,
}
const ASCII: &str = " \
@@ -36,6 +40,22 @@ impl Options {
.version(crate_version!())
.author(crate_authors!("\n"))
.about(format!("{}\n{}", ASCII, crate_description!()).as_str())
.args_from_usage("-a, --no-audio 'Removes audio sources'")
.args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'")
.args_from_usage("-c, --no-css 'Removes CSS'")
.args_from_usage("-e, --ignore-errors 'Ignore network errors'")
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
.args_from_usage("-F, --no-fonts 'Removes fonts'")
.args_from_usage("-i, --no-images 'Removes images'")
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
.args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
.args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
.args_from_usage("-v, --no-video 'Removes video sources'")
.arg(
Arg::with_name("target")
.required(true)
@@ -43,20 +63,6 @@ impl Options {
.index(1)
.help("URL or file path"),
)
// .args_from_usage("-a, --no-audio 'Removes audio sources'")
.args_from_usage("-c, --no-css 'Removes CSS'")
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
.args_from_usage("-F, --no-fonts 'Removes fonts'")
.args_from_usage("-i, --no-images 'Removes images'")
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
.args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Excludes metadata information from the document'")
.args_from_usage("-o, --output=[document.html] 'Write output to <file>'")
.args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'")
// .args_from_usage("-v, --no-video 'Removes video sources'")
.get_matches();
let mut options: Options = Options::default();
@@ -65,7 +71,12 @@ impl Options {
.value_of("target")
.expect("please set target")
.to_string();
options.no_audio = app.is_present("no-audio");
if let Some(base_url) = app.value_of("base-url") {
options.base_url = Some(str!(base_url));
}
options.no_css = app.is_present("no-css");
options.ignore_errors = app.is_present("ignore-errors");
options.no_frames = app.is_present("no-frames");
options.no_fonts = app.is_present("no-fonts");
options.no_images = app.is_present("no-images");
@@ -84,6 +95,7 @@ impl Options {
.value_of("user-agent")
.unwrap_or(DEFAULT_USER_AGENT)
.to_string();
options.no_video = app.is_present("no-video");
options
}

123
src/tests/cli/base_url.rs Normal file
View File

@@ -0,0 +1,123 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
#[test]
fn add_new_when_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-b")
.arg("http://localhost:8000/")
.arg("data:text/html,Hello%2C%20World!")
.output()
.unwrap();
// STDOUT should contain newly added base URL
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<base href=\"http://localhost:8000/\"></base>\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn keep_existing_when_none_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
.output()
.unwrap();
// STDOUT should contain newly added base URL
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<base href=\"http://localhost:8000/\">\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn override_existing_when_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-b")
.arg("http://localhost/")
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
.output()
.unwrap();
// STDOUT should contain newly added base URL
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<base href=\"http://localhost/\">\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_existing_when_empty_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-b")
.arg("")
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
.output()
.unwrap();
// STDOUT should contain newly added base URL
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<base href=\"\">\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}

View File

@@ -337,7 +337,7 @@ mod passing {
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"{empty_image}\">\n \
<img src=\"{empty_image}\" alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script></script>\n\n\n\n\
@@ -399,7 +399,7 @@ mod passing {
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"{empty_image}\">\n \
<img src=\"{empty_image}\" alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script></script>\n\n\n\n\

2
src/tests/cli/mod.rs Normal file
View File

@@ -0,0 +1,2 @@
mod base_url;
mod basic;

View File

@@ -11,7 +11,7 @@ mod passing {
#[test]
fn empty_input_sha256() {
assert!(html::has_proper_integrity(
assert!(html::check_integrity(
"".as_bytes(),
"sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="
));
@@ -19,7 +19,7 @@ mod passing {
#[test]
fn sha256() {
assert!(html::has_proper_integrity(
assert!(html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha256-9EWAHgy4mSYsm54hmDaIDXPKLRsLnBX7lZyQ6xISNOM="
));
@@ -27,7 +27,7 @@ mod passing {
#[test]
fn sha384() {
assert!(html::has_proper_integrity(
assert!(html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha384-gc9l7omltke8C33bedgh15E12M7RrAQa5t63Yb8APlpe7ZhiqV23+oqiulSJl3Kw"
));
@@ -35,7 +35,7 @@ mod passing {
#[test]
fn sha512() {
assert!(html::has_proper_integrity(
assert!(html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha512-zG5B88cYMqcdiMi9gz0XkOFYw2BpjeYdn5V6+oFrMgSNjRpqL7EF8JEwl17ztZbK3N7I/tTwp3kxQbN1RgFBww=="
));
@@ -55,20 +55,17 @@ mod failing {
#[test]
fn empty_hash() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
""
));
assert!(!html::check_integrity("abcdef0123456789".as_bytes(), ""));
}
#[test]
fn empty_input_empty_hash() {
assert!(!html::has_proper_integrity("".as_bytes(), ""));
assert!(!html::check_integrity("".as_bytes(), ""));
}
#[test]
fn sha256() {
assert!(!html::has_proper_integrity(
assert!(!html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha256-badhash"
));
@@ -76,7 +73,7 @@ mod failing {
#[test]
fn sha384() {
assert!(!html::has_proper_integrity(
assert!(!html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha384-badhash"
));
@@ -84,7 +81,7 @@ mod failing {
#[test]
fn sha512() {
assert!(!html::has_proper_integrity(
assert!(!html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha512-badhash"
));

View File

@@ -14,7 +14,7 @@ mod passing {
fn isolated() {
let mut options = Options::default();
options.isolate = true;
let csp_content = html::csp(&options);
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "default-src 'unsafe-inline' data:;");
}
@@ -23,7 +23,7 @@ mod passing {
fn no_css() {
let mut options = Options::default();
options.no_css = true;
let csp_content = html::csp(&options);
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "style-src 'none';");
}
@@ -32,7 +32,7 @@ mod passing {
fn no_fonts() {
let mut options = Options::default();
options.no_fonts = true;
let csp_content = html::csp(&options);
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "font-src 'none';");
}
@@ -41,7 +41,7 @@ mod passing {
fn no_frames() {
let mut options = Options::default();
options.no_frames = true;
let csp_content = html::csp(&options);
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "frame-src 'none'; child-src 'none';");
}
@@ -50,7 +50,7 @@ mod passing {
fn no_js() {
let mut options = Options::default();
options.no_js = true;
let csp_content = html::csp(&options);
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "script-src 'none';");
}
@@ -59,7 +59,7 @@ mod passing {
fn no_images() {
let mut options = Options::default();
options.no_images = true;
let csp_content = html::csp(&options);
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "img-src data:;");
}
@@ -73,7 +73,7 @@ mod passing {
options.no_frames = true;
options.no_js = true;
options.no_images = true;
let csp_content = html::csp(&options);
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;");
}

View File

@@ -15,7 +15,7 @@ mod passing {
fn http_url() {
let url = "http://192.168.1.1/";
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::metadata_tag(url);
let metadata_comment: String = html::create_metadata_tag(url);
assert_eq!(
metadata_comment,
@@ -33,7 +33,7 @@ mod passing {
fn file_url() {
let url = "file:///home/monolith/index.html";
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::metadata_tag(url);
let metadata_comment: String = html::create_metadata_tag(url);
assert_eq!(
metadata_comment,
@@ -50,7 +50,7 @@ mod passing {
fn data_url() {
let url = "data:text/html,Hello%2C%20World!";
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::metadata_tag(url);
let metadata_comment: String = html::create_metadata_tag(url);
assert_eq!(
metadata_comment,
@@ -77,6 +77,6 @@ mod failing {
#[test]
fn empty_string() {
assert_eq!(html::metadata_tag(""), "");
assert_eq!(html::create_metadata_tag(""), "");
}
}

View File

@@ -24,8 +24,77 @@ mod passing {
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
assert_eq!(
embedded_css,
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),
embedded_css
);
}
#[test]
fn commas_within_file_names() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small,s.png 1x, large,l.png 2x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
assert_eq!(
embedded_css,
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),
);
}
#[test]
fn tabs_and_newlines_after_commas() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small,s.png 1x,\nmedium,m.png 2x,\nlarge,l.png 3x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
assert_eq!(
embedded_css,
format!(
"{} 1x, {} 2x, {} 3x",
empty_image!(),
empty_image!(),
empty_image!()
),
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::html;
use crate::opts::Options;
#[test]
fn trailing_comma() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small.png 1x, large.png 2x,";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
assert_eq!(
embedded_css,
format!("{} 1x, {} 2x,", empty_image!(), empty_image!()),
);
}
}

View File

@@ -0,0 +1,104 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::html;
#[test]
fn present() {
let html = "<!doctype html>
<html>
<head>
<base href=\"https://musicbrainz.org\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html);
assert_eq!(
html::get_base_url(&dom.document),
Some(str!("https://musicbrainz.org"))
);
}
#[test]
fn multiple_tags() {
let html = "<!doctype html>
<html>
<head>
<base href=\"https://www.discogs.com/\" />
<base href=\"https://musicbrainz.org\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html);
assert_eq!(
html::get_base_url(&dom.document),
Some(str!("https://www.discogs.com/"))
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::html;
#[test]
fn absent() {
let html = "<!doctype html>
<html>
<head>
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html);
assert_eq!(html::get_base_url(&dom.document), None);
}
#[test]
fn no_href() {
let html = "<!doctype html>
<html>
<head>
<base />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html);
assert_eq!(html::get_base_url(&dom.document), None);
}
#[test]
fn empty_href() {
let html = "<!doctype html>
<html>
<head>
<base href=\"\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html);
assert_eq!(html::get_base_url(&dom.document), Some(str!()));
}
}

View File

@@ -0,0 +1,54 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
use crate::html;
#[test]
fn div_two_style_attributes() {
let html = "<!doctype html><html><head></head><body><DIV STYLE=\"color: blue;\" style=\"display: none;\"></div></body></html>";
let dom = html::html_to_dom(&html);
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
match &node.data {
NodeData::Document => {
// Dig deeper
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
if node_name == "body" {
assert_eq!(html::get_node_attr(node, "class"), None);
} else if node_name == "div" {
assert_eq!(
html::get_node_attr(node, "style"),
Some(str!("color: blue;"))
);
}
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
_ => (),
};
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 6);
}
}

View File

@@ -12,7 +12,7 @@ mod passing {
use crate::html;
#[test]
fn get_node_name() {
fn parent_node_names() {
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
let dom = html::html_to_dom(&html);
let mut count = 0;

View File

@@ -8,7 +8,6 @@
#[cfg(test)]
mod passing {
use crate::html;
use crate::opts::Options;
#[test]
fn icon() {
@@ -39,7 +38,6 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::html;
use crate::opts::Options;
#[test]
fn absent() {

View File

@@ -1,10 +1,13 @@
mod add_favicon;
mod csp;
mod check_integrity;
mod compose_csp;
mod create_metadata_tag;
mod embed_srcset;
mod get_base_url;
mod get_node_attr;
mod get_node_name;
mod has_favicon;
mod has_proper_integrity;
mod is_icon;
mod metadata_tag;
mod set_node_attr;
mod stringify_document;
mod walk_and_embed_assets;

View File

@@ -0,0 +1,105 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
use crate::html;
#[test]
fn html_lang_and_body_style() {
let html = "<!doctype html><html lang=\"en\"><head></head><body></body></html>";
let dom = html::html_to_dom(&html);
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
match &node.data {
NodeData::Document => {
// Dig deeper
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
if node_name == "html" {
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("en")));
html::set_node_attr(node, "lang", Some(str!("de")));
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("de")));
html::set_node_attr(node, "lang", None);
assert_eq!(html::get_node_attr(node, "lang"), None);
html::set_node_attr(node, "lang", Some(str!("")));
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("")));
} else if node_name == "body" {
assert_eq!(html::get_node_attr(node, "style"), None);
html::set_node_attr(node, "style", Some(str!("display: none;")));
assert_eq!(
html::get_node_attr(node, "style"),
Some(str!("display: none;"))
);
}
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
_ => (),
};
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 5);
}
#[test]
fn body_background() {
let html = "<!doctype html><html lang=\"en\"><head></head><body background=\"1\" background=\"2\"></body></html>";
let dom = html::html_to_dom(&html);
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
match &node.data {
NodeData::Document => {
// Dig deeper
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
if node_name == "body" {
assert_eq!(html::get_node_attr(node, "background"), Some(str!("1")));
html::set_node_attr(node, "background", None);
assert_eq!(html::get_node_attr(node, "background"), None);
}
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
_ => (),
};
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 5);
}
}

View File

@@ -319,8 +319,8 @@ mod passing {
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<meta>\
<meta>\
<meta http-equiv=\"disabled by monolith (Refresh)\" value=\"20\">\
<meta http-equiv=\"disabled by monolith (Location)\" value=\"https://freebsd.org\">\
</head>\
<body></body>\
</html>"

View File

@@ -4,4 +4,5 @@ mod html;
mod js;
mod macros;
mod opts;
mod url;
mod utils;

View File

@@ -14,6 +14,7 @@ mod passing {
let options: Options = Options::default();
assert_eq!(options.target, str!());
assert_eq!(options.no_audio, false);
assert_eq!(options.no_css, false);
assert_eq!(options.no_frames, false);
assert_eq!(options.no_fonts, false);
@@ -26,5 +27,6 @@ mod passing {
assert_eq!(options.silent, false);
assert_eq!(options.timeout, 0);
assert_eq!(options.user_agent, "");
assert_eq!(options.no_video, false);
}
}

View File

@@ -18,9 +18,31 @@ mod passing {
"test"
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn https_empty() {
assert_eq!(url::get_url_fragment("https://kernel.org#"), "");
}
#[test]
fn no_fragment() {
assert_eq!(url::get_url_fragment("https://kernel.org"), "");
}
#[test]
fn dummy_data_url() {
assert_eq!(url::get_url_fragment("data:text/html,"), "");
}
}

View File

@@ -1,12 +1,12 @@
mod clean_url;
mod data_to_data_url;
mod data_url_to_data;
mod decode_url;
mod file_url_to_fs_path;
mod get_url_fragment;
mod is_data_url;
mod is_file_url;
mod is_http_url;
mod parse_data_url;
mod resolve_url;
mod url_has_protocol;
mod url_with_fragment;

View File

@@ -11,7 +11,7 @@ mod passing {
#[test]
fn parse_text_html_base64() {
let (media_type, data) = url::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
let (media_type, data) = url::parse_data_url("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
assert_eq!(media_type, "text/html");
assert_eq!(
@@ -22,7 +22,7 @@ mod passing {
#[test]
fn parse_text_html_utf8() {
let (media_type, data) = url::data_url_to_data(
let (media_type, data) = url::parse_data_url(
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
);
@@ -35,7 +35,7 @@ mod passing {
#[test]
fn parse_text_html_plaintext() {
let (media_type, data) = url::data_url_to_data(
let (media_type, data) = url::parse_data_url(
"data:text/html,Work expands so as to fill the time available for its completion",
);
@@ -48,7 +48,7 @@ mod passing {
#[test]
fn parse_text_html_charset_utf_8_between_two_whitespaces() {
let (media_type, data) = url::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
let (media_type, data) = url::parse_data_url(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
assert_eq!(media_type, "text/html");
assert_eq!(
@@ -59,8 +59,7 @@ mod passing {
#[test]
fn parse_text_css_url_encoded() {
let (media_type, data) =
url::data_url_to_data("data:text/css,div{background-color:%23000}");
let (media_type, data) = url::parse_data_url("data:text/css,div{background-color:%23000}");
assert_eq!(media_type, "text/css");
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
@@ -68,7 +67,7 @@ mod passing {
#[test]
fn parse_no_media_type_base64() {
let (media_type, data) = url::data_url_to_data("data:;base64,dGVzdA==");
let (media_type, data) = url::parse_data_url("data:;base64,dGVzdA==");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "test");
@@ -76,7 +75,7 @@ mod passing {
#[test]
fn parse_no_media_type_no_encoding() {
let (media_type, data) = url::data_url_to_data("data:;,test%20test");
let (media_type, data) = url::parse_data_url("data:;,test%20test");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "test test");
@@ -96,7 +95,7 @@ mod failing {
#[test]
fn just_word_data() {
let (media_type, data) = url::data_url_to_data("data");
let (media_type, data) = url::parse_data_url("data");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "");

View File

@@ -7,208 +7,165 @@
#[cfg(test)]
mod passing {
use url::ParseError;
use crate::url;
#[test]
fn from_https_to_level_up_relative() -> Result<(), ParseError> {
let resolved_url =
url::resolve_url("https://www.kernel.org", "../category/signatures.html")?;
fn from_https_to_level_up_relative() {
assert_eq!(
resolved_url.as_str(),
url::resolve_url("https://www.kernel.org", "../category/signatures.html")
.unwrap_or_default(),
"https://www.kernel.org/category/signatures.html"
);
Ok(())
}
#[test]
fn from_just_filename_to_full_https_url() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
"saved_page.htm",
"https://www.kernel.org/category/signatures.html",
)?;
fn from_just_filename_to_full_https_url() {
assert_eq!(
resolved_url.as_str(),
url::resolve_url(
"saved_page.htm",
"https://www.kernel.org/category/signatures.html",
)
.unwrap_or_default(),
"https://www.kernel.org/category/signatures.html"
);
Ok(())
}
#[test]
fn from_https_url_to_url_with_no_protocol() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
"https://www.kernel.org",
"//www.kernel.org/theme/images/logos/tux.png",
)?;
fn from_https_url_to_url_with_no_protocol() {
assert_eq!(
resolved_url.as_str(),
url::resolve_url(
"https://www.kernel.org",
"//www.kernel.org/theme/images/logos/tux.png",
)
.unwrap_or_default(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
Ok(())
}
#[test]
fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() -> Result<(), ParseError>
{
let resolved_url = url::resolve_url(
"https://www.kernel.org",
"//another-host.org/theme/images/logos/tux.png",
)?;
fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() {
assert_eq!(
resolved_url.as_str(),
url::resolve_url(
"https://www.kernel.org",
"//another-host.org/theme/images/logos/tux.png",
)
.unwrap_or_default(),
"https://another-host.org/theme/images/logos/tux.png"
);
Ok(())
}
#[test]
fn from_https_url_to_relative_root_path() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
"https://www.kernel.org/category/signatures.html",
"/theme/images/logos/tux.png",
)?;
fn from_https_url_to_relative_root_path() {
assert_eq!(
resolved_url.as_str(),
url::resolve_url(
"https://www.kernel.org/category/signatures.html",
"/theme/images/logos/tux.png",
)
.unwrap_or_default(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
Ok(())
}
#[test]
fn from_https_to_just_filename() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
"https://www.w3schools.com/html/html_iframe.asp",
"default.asp",
)?;
fn from_https_to_just_filename() {
assert_eq!(
resolved_url.as_str(),
url::resolve_url(
"https://www.w3schools.com/html/html_iframe.asp",
"default.asp",
)
.unwrap_or_default(),
"https://www.w3schools.com/html/default.asp"
);
Ok(())
}
#[test]
fn from_data_url_to_https() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"https://www.kernel.org/category/signatures.html",
)?;
fn from_data_url_to_https() {
assert_eq!(
resolved_url.as_str(),
url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"https://www.kernel.org/category/signatures.html",
)
.unwrap_or_default(),
"https://www.kernel.org/category/signatures.html"
);
Ok(())
}
#[test]
fn from_data_url_to_data_url() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
)?;
fn from_data_url_to_data_url() {
assert_eq!(
resolved_url.as_str(),
url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
)
.unwrap_or_default(),
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K"
);
Ok(())
}
#[test]
fn from_file_url_to_relative_path() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
"file:///home/user/Websites/my-website/index.html",
"assets/images/logo.png",
)
.unwrap_or(str!());
fn from_file_url_to_relative_path() {
assert_eq!(
resolved_url.as_str(),
url::resolve_url(
"file:///home/user/Websites/my-website/index.html",
"assets/images/logo.png",
)
.unwrap_or_default(),
"file:///home/user/Websites/my-website/assets/images/logo.png"
);
Ok(())
}
#[test]
fn from_file_url_to_relative_path_with_backslashes() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
"assets\\images\\logo.png",
)
.unwrap_or(str!());
fn from_file_url_to_relative_path_with_backslashes() {
assert_eq!(
resolved_url.as_str(),
url::resolve_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
"assets\\images\\logo.png",
)
.unwrap_or_default(),
"file:///home/user/Websites/my-website/assets/images/logo.png"
);
Ok(())
}
#[test]
fn from_data_url_to_file_url() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"file:///etc/passwd",
)
.unwrap_or(str!());
assert_eq!(resolved_url.as_str(), "file:///etc/passwd");
Ok(())
}
#[test]
fn preserve_fragment() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
"http://doesnt-matter.local/",
"css/fonts/fontmarvelous.svg#fontmarvelous",
)
.unwrap_or(str!());
fn from_data_url_to_file_url() {
assert_eq!(
resolved_url.as_str(),
url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"file:///etc/passwd",
)
.unwrap_or_default(),
"file:///etc/passwd"
);
}
#[test]
fn preserve_fragment() {
assert_eq!(
url::resolve_url(
"http://doesnt-matter.local/",
"css/fonts/fontmarvelous.svg#fontmarvelous",
)
.unwrap_or_default(),
"http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous"
);
Ok(())
}
#[test]
fn resolve_from_file_url_to_file_url() -> Result<(), ParseError> {
let resolved_url = if cfg!(windows) {
url::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!())
} else {
url::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!())
};
fn resolve_from_file_url_to_file_url() {
assert_eq!(
resolved_url.as_str(),
if cfg!(windows) {
url::resolve_url("file:///c:/index.html", "file:///c:/image.png")
.unwrap_or_default()
} else {
url::resolve_url("file:///tmp/index.html", "file:///tmp/image.png")
.unwrap_or_default()
},
if cfg!(windows) {
"file:///c:/image.png"
} else {
"file:///tmp/image.png"
}
);
Ok(())
}
}
@@ -222,18 +179,16 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::url;
use url::ParseError;
#[test]
fn from_data_url_to_url_with_no_protocol() -> Result<(), ParseError> {
let resolved_url = url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"//www.w3schools.com/html/html_iframe.asp",
)
.unwrap_or(str!());
assert_eq!(resolved_url.as_str(), "");
Ok(())
fn from_data_url_to_url_with_no_protocol() {
assert_eq!(
url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"//www.w3schools.com/html/html_iframe.asp",
)
.unwrap_or_default(),
""
);
}
}

View File

@@ -54,6 +54,11 @@ mod passing {
"MAILTO:somebody@somewhere.com?subject=hello"
));
}
#[test]
fn empty_data_url() {
assert!(url::url_has_protocol("data:text/html,"));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@@ -65,13 +70,11 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::utils;
use crate::url;
#[test]
fn url_with_no_protocol() {
assert!(!url::url_has_protocol(
"//some-hostname.com/some-file.html"
));
assert!(!url::url_has_protocol("//some-hostname.com/some-file.html"));
}
#[test]

View File

@@ -11,6 +11,7 @@ mod passing {
use std::collections::HashMap;
use std::env;
use crate::opts::Options;
use crate::url;
use crate::utils;
@@ -19,6 +20,9 @@ mod passing {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
// If both source and target are data URLs,
// ensure the result contains target data URL
let (data, final_url, media_type) = utils::retrieve_asset(
@@ -26,7 +30,7 @@ mod passing {
&client,
"data:text/html;base64,c291cmNl",
"data:text/html;base64,dGFyZ2V0",
false,
&options,
0,
)
.unwrap();
@@ -46,6 +50,9 @@ mod passing {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// Inclusion of local assets from local sources should be allowed
@@ -63,7 +70,7 @@ mod passing {
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
),
false,
&options,
0,
)
.unwrap();
@@ -91,6 +98,7 @@ mod failing {
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::opts::Options;
use crate::utils;
#[test]
@@ -98,13 +106,16 @@ mod failing {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
// Inclusion of local assets from data URL sources should not be allowed
match utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"file:///etc/passwd",
false,
&options,
0,
) {
Ok((..)) => {
@@ -121,13 +132,16 @@ mod failing {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
// Inclusion of local assets from remote sources should not be allowed
match utils::retrieve_asset(
cache,
&client,
"https://kernel.org/",
"file:///etc/passwd",
false,
&options,
0,
) {
Ok((..)) => {

View File

@@ -33,45 +33,6 @@ pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String {
format!("data:{};base64,{}", media_type, base64::encode(data))
}
pub fn data_url_to_data<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
let path: String = parsed_url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len());
let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
let text: String = decode_url(raw_data);
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut media_type: String = str!();
let mut encoding: &str = "";
let mut i: i8 = 0;
for item in &meta_data_items {
if i == 0 {
media_type = str!(item);
} else {
if item.eq_ignore_ascii_case("base64")
|| item.eq_ignore_ascii_case("utf8")
|| item.eq_ignore_ascii_case("charset=UTF-8")
{
encoding = item;
}
}
i = i + 1;
}
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
base64::decode(&text).unwrap_or(vec![])
} else {
text.as_bytes().to_vec()
};
(media_type, data)
}
pub fn decode_url(input: String) -> String {
let input: String = input.replace("+", "%2B");
@@ -113,10 +74,9 @@ pub fn file_url_to_fs_path(url: &str) -> String {
}
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
if Url::parse(url.as_ref()).unwrap().fragment() == None {
str!()
} else {
str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap())
match Url::parse(url.as_ref()) {
Ok(parsed_url) => parsed_url.fragment().unwrap_or("").to_string(),
Err(_err) => str!(),
}
}
@@ -138,6 +98,45 @@ pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
.unwrap_or(false)
}
pub fn parse_data_url<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
let path: String = parsed_url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len());
let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
let text: String = decode_url(raw_data);
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut media_type: String = str!();
let mut encoding: &str = "";
let mut i: i8 = 0;
for item in &meta_data_items {
if i == 0 {
media_type = str!(item);
} else {
if item.eq_ignore_ascii_case("base64")
|| item.eq_ignore_ascii_case("utf8")
|| item.eq_ignore_ascii_case("charset=UTF-8")
{
encoding = item;
}
}
i = i + 1;
}
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
base64::decode(&text).unwrap_or(vec![])
} else {
text.as_bytes().to_vec()
};
(media_type, data)
}
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
let result = if is_http_url(to.as_ref()) {
to.as_ref().to_string()

View File

@@ -4,7 +4,8 @@ use std::collections::HashMap;
use std::fs;
use std::path::Path;
use crate::url::{clean_url, data_url_to_data, file_url_to_fs_path, is_data_url, is_file_url};
use crate::opts::Options;
use crate::url::{clean_url, file_url_to_fs_path, is_data_url, is_file_url, parse_data_url};
const INDENT: &str = " ";
@@ -73,7 +74,7 @@ pub fn retrieve_asset(
client: &Client,
parent_url: &str,
url: &str,
opt_silent: bool,
options: &Options,
depth: u32,
) -> Result<(Vec<u8>, String, String), reqwest::Error> {
if url.len() == 0 {
@@ -82,7 +83,7 @@ pub fn retrieve_asset(
}
if is_data_url(&url) {
let (media_type, data) = data_url_to_data(url);
let (media_type, data) = parse_data_url(url);
Ok((data, url.to_string(), media_type))
} else if is_file_url(&url) {
// Check if parent_url is also file:///
@@ -95,7 +96,7 @@ pub fn retrieve_asset(
let fs_file_path: String = file_url_to_fs_path(url);
let path = Path::new(&fs_file_path);
if path.exists() {
if !opt_silent {
if !options.silent {
eprintln!("{}{}", indent(depth).as_str(), &url);
}
@@ -109,7 +110,7 @@ pub fn retrieve_asset(
if cache.contains_key(&cache_key) {
// URL is in cache, we get and return it
if !opt_silent {
if !options.silent {
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
}
@@ -120,34 +121,46 @@ pub fn retrieve_asset(
))
} else {
// URL not in cache, we retrieve the file
let mut response = client.get(url).send()?;
let res_url = response.url().to_string();
match client.get(url).send() {
Ok(mut response) => {
if !options.ignore_errors && response.status() != 200 {
if !options.silent {
eprintln!("Unable to retrieve {} ({})", &url, response.status());
}
// Provoke error
return Err(client.get("").send().unwrap_err());
}
if !opt_silent {
if url == res_url {
eprintln!("{}{}", indent(depth).as_str(), &url);
} else {
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &res_url);
let res_url = response.url().to_string();
if !options.silent {
if url == res_url {
eprintln!("{}{}", indent(depth).as_str(), &url);
} else {
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &res_url);
}
}
let new_cache_key: String = clean_url(&res_url);
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
// Attempt to obtain media type by reading the Content-Type header
let media_type = response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
.unwrap_or("");
// Add retrieved resource to cache
cache.insert(new_cache_key, data.clone());
Ok((data, res_url, media_type.to_string()))
}
Err(error) => Err(error),
}
let new_cache_key: String = clean_url(&res_url);
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
// Attempt to obtain media type by reading the Content-Type header
let media_type = response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
.unwrap_or("");
// Add to cache
cache.insert(new_cache_key, data.clone());
Ok((data, res_url, media_type.to_string()))
}
}
}