157 Commits

Author SHA1 Message Date
Sunshine
cbbb297473 Merge pull request #251 from snshn/bump-version-again
Bump version number to 2.4.1
2021-03-09 02:17:17 -10:00
Sunshine
98ddb821a5 bump version number 2021-03-09 02:07:07 -10:00
Sunshine
be097b1d4e Merge pull request #250 from snshn/alternate-stylesheets
Embed alternate stylesheets
2021-03-09 01:58:08 -10:00
Sunshine
325688acf5 add test for alternate stylesheets 2021-03-09 01:48:41 -10:00
Sunshine
11207d49d2 embed alternate stylesheets 2021-03-09 01:46:15 -10:00
Sunshine
96da64e193 Merge pull request #247 from snshn/cc0
Change project license to CC0 1.0 Universal (CC0 1.0)
2021-03-01 13:28:49 -10:00
Sunshine
8a62a51210 Merge pull request #248 from snshn/update-container-instructions
Running in container instructions update
2021-02-28 23:24:10 -10:00
Sunshine
a6ac1df93d running in container instructions update 2021-02-28 21:46:38 -10:00
Sunshine
49e81149df switch license to CC0-1.0 2021-02-28 19:54:46 -10:00
Sunshine
a3516b2ae9 Merge pull request #245 from snshn/change-meta-charset-to-utf-8
Forcefully set document's charset to UTF-8
2021-02-23 23:48:49 -10:00
Sunshine
385301bf16 clean up unused code 2021-02-23 23:39:51 -10:00
Sunshine
4921a70dda Merge branch 'master' into change-meta-charset-to-utf-8 2021-02-23 23:38:03 -10:00
Sunshine
e0273c664a forcefully set document's charset to UTF-8 2021-02-23 23:35:35 -10:00
Sunshine
6d629bfd4a Merge pull request #244 from snshn/process-noscript
Process contents of NOSCRIPT tags
2021-02-22 20:13:26 -10:00
Sunshine
ae9d78a891 process contents of NOSCRIPT tags 2021-02-22 19:42:39 -10:00
Sunshine
0f55fb3c49 Merge pull request #243 from snshn/fix-embedding-picture-srcset
Fix embedding of srcset assets for PICTURE nodes
2021-02-22 16:27:22 -10:00
Sunshine
e41fd6a1c6 fix embedding of srcset for PICTURE nodes 2021-02-22 16:21:12 -10:00
Sunshine
eaf662bb3b Update README.md 2021-02-15 15:38:06 -10:00
Sunshine
fa71f6a42c Merge pull request #240 from snshn/color
Add color to asset download log
2021-01-30 10:48:35 -10:00
Sunshine
9a27c6c5ee add color to asset download log 2021-01-29 20:24:35 -10:00
Sunshine
4ad07c0519 Merge pull request #239 from snshn/update-crates
Update dependencies
2021-01-29 17:27:43 -10:00
Sunshine
e78405f2ae update dependencies 2021-01-29 17:19:38 -10:00
Sunshine
e81462be41 Merge pull request #237 from snshn/choco
Add Chocolatey spec file
2020-12-31 15:32:27 -10:00
Sunshine
b972d717ce add chocolatey spec 2020-12-31 15:30:41 -10:00
Sunshine
edb679d2b3 Merge pull request #236 from snshn/pipe-in-target-test
Add test for stdin pipe
2020-12-31 14:44:57 -10:00
Sunshine
2e1462a953 add test for stdin pipe 2020-12-31 14:38:31 -10:00
Sunshine
57883b84b2 Merge pull request #235 from snshn/allow-empty-user-agent-string
Make it possible to specify an empty user-agent string
2020-12-31 13:02:35 -10:00
Sunshine
4fa2eda983 make it possible to specify an empty user-agent string 2020-12-31 12:57:22 -10:00
Sunshine
028187a31e Merge pull request #234 from snshn/update-dependencies
Update crates
2020-12-28 12:11:25 -10:00
Sunshine
c469c30cbd update crates 2020-12-28 12:04:27 -10:00
Sunshine
6de36243f9 Fix armhf build in cd.yml 2020-12-27 05:52:47 -10:00
Sunshine
4f162d0cc0 Update README.md 2020-12-25 22:59:24 -10:00
Sunshine
95040173fc Merge pull request #233 from snshn/cargo-fix-license-identifier
Fix license identifier in Cargo.toml
2020-12-25 22:42:31 -10:00
Sunshine
b10d41f82e fix license identifier in Cargo.toml 2020-12-25 22:41:31 -10:00
Sunshine
4c2c55d166 Merge pull request #232 from snshn/fix-cargo-toml
Reduce amount of keywords to 5 (max)
2020-12-25 22:34:08 -10:00
Sunshine
2dd1c465e4 reduce amount of keywords to 5 (max) 2020-12-25 22:28:19 -10:00
Sunshine
a5afda9c80 Merge pull request #229 from snshn/cargo-install
Update Cargo.toml for publishing on crates.io
2020-12-25 22:15:13 -10:00
Sunshine
ab6fed6d1f Merge pull request #231 from snshn/fix-srcset
Fix srcset parsing
2020-12-25 22:07:45 -10:00
Sunshine
f8dcb335e7 bump version, make possible to install via cargo 2020-12-25 22:07:19 -10:00
Sunshine
913051870a Merge pull request #230 from snshn/stdin
Make possible to use stdin as input method
2020-12-25 21:58:14 -10:00
Sunshine
614a518475 fix srcset parsing 2020-12-25 21:56:40 -10:00
Sunshine
870a4b150e make possible to use stdin as input method 2020-12-25 21:23:29 -10:00
Sunshine
0533b287b7 Merge pull request #228 from snshn/audio-video-support
Add support for embedding video and audio files
2020-12-25 16:55:42 -10:00
Sunshine
4ba4285b6b add support for embedding video and audio files 2020-12-25 16:49:43 -10:00
Sunshine
2b9caf9840 Merge pull request #227 from snshn/fix-trailing-comma-for-srcset-parsing
Fix crash associated with trailing/repeating commas within srcset
2020-12-25 14:33:20 -10:00
Sunshine
8adf059980 fix crash associated with trailing/repeating commas within srcset 2020-12-25 14:24:52 -10:00
Sunshine
8ad252868e Merge pull request #226 from snshn/base-tag-option
Add base URL option
2020-12-25 13:02:05 -10:00
Sunshine
e145df372f Merge branch 'master' into base-tag-option 2020-12-25 12:10:54 -10:00
Sunshine
816b6175ac rewrite ADR #8 (Base Tag) 2020-12-25 12:06:56 -10:00
Sunshine
d89b4d5f5b refactor code that processes the DOM 2020-12-25 11:09:47 -10:00
Sunshine
15d98a7269 don't modify base url by default, add option for setting it 2020-12-24 18:38:44 -10:00
Sunshine
36e82cb511 Update README.md 2020-12-13 08:42:41 -10:00
Sunshine
1b1befd7b0 Merge pull request #222 from snshn/readme-no-metadata-info
Add description of -M option to README.md
2020-12-09 21:29:33 -05:00
Sunshine
a2f59b4418 Update README.md 2020-12-09 16:23:17 -10:00
Sunshine
124a62920f Merge pull request #221 from snshn/related-project-hako
Add Hako
2020-12-09 07:54:52 -05:00
Sunshine
f557504bed Update README.md
Add Hako to the list of related projects
2020-12-08 17:02:09 -10:00
Sunshine
5ac520b4da Merge pull request #219 from snshn/ignore-network-errors-option
Account for network errors
2020-11-22 17:54:30 -10:00
Sunshine
7a97291498 add ADR 7 (Network errors) 2020-11-22 17:20:37 -10:00
Sunshine
38a6f963ad account for network errors, add option to ignore them 2020-11-22 16:49:26 -10:00
Sunshine
052f8f49ec Merge pull request #218 from snshn/update-crates
Update crates
2020-11-20 00:36:33 -10:00
Sunshine
08de486382 use newer dependencies 2020-11-20 00:30:05 -10:00
Sunshine
c0e0a69773 Merge pull request #214 from zfhrp6/remove_use_unused_opts
remove unused import opts::Options;
2020-11-04 01:04:38 -08:00
Sunshine
1636540693 Merge pull request #216 from zfhrp6/suppress_deprecation_of_dependency
update clap 2.33.1 -> 2.33.3
2020-11-02 19:03:43 -08:00
zfhrp
3e80cb02ce update clap 2.33.1 -> 2.33.3 2020-11-02 00:11:41 +09:00
zfhrp
a296531b3f remove unused import opts::Options; 2020-11-01 23:04:08 +09:00
Sunshine
8462b6bc31 Merge pull request #207 from snshn/bump-version
bump version (2.3.0 -> 2.3.1)
2020-08-01 21:00:26 -04:00
Sunshine
92f38556b6 bump version (2.3.0 -> 2.3.1) 2020-08-01 20:24:38 -04:00
Sunshine
c0bdeab2e3 Merge pull request #206 from snshn/update-crates
Update crates
2020-08-01 19:43:00 -04:00
Sunshine
5a502eab4b update crate versions 2020-08-01 19:20:20 -04:00
Sunshine
19f08265a2 Merge pull request #205 from snshn/base-tag
Implement support for BASE tag
2020-08-01 02:47:33 -04:00
Sunshine
1d6392cb28 implement support for BASE tag 2020-08-01 02:35:07 -04:00
Sunshine
03cdc0e0b2 Merge pull request #201 from snshn/refactor-and-version-bump
Refactor and version bump
2020-07-14 03:51:31 -04:00
Sunshine
b98b7af0b4 Merge pull request #202 from snshn/minus-stdout
Treat - for stdout
2020-07-14 03:51:18 -04:00
Sunshine
73c35eaccb treat minus for output target file path as stdout 2020-07-14 03:35:59 -04:00
Sunshine
2c5d1e930b bump version (2.2.7 -> 2.3.0) 2020-07-14 03:29:08 -04:00
Sunshine
90f7c3a0d0 alphabetical order for function names 2020-07-14 03:27:52 -04:00
Sunshine
c1fec5967d Merge pull request #200 from snshn/favicon
Automatically obtain favicon.ico
2020-07-14 03:24:10 -04:00
Sunshine
09d41d2cf1 automatically obtain favicon.ico 2020-07-14 02:58:29 -04:00
Sunshine
8f1da3c792 Update cd.yml 2020-07-13 19:09:01 -04:00
Sunshine
a8449a2b32 Update README.md 2020-07-13 01:16:38 -04:00
Sunshine
164e728ad3 Merge pull request #197 from snshn/addetional-black-box-test-data
Additional black box test data
2020-07-06 16:51:49 -04:00
Sunshine
8883bd6aca add more black box test data 2020-07-06 16:15:57 -04:00
Sunshine
eae5d4dc6b Merge pull request #196 from snshn/help-message-update
Update help message
2020-07-01 06:41:32 -04:00
Sunshine
ec85121d28 update help message 2020-07-01 06:29:56 -04:00
Sunshine
a8a85a4191 Merge pull request #195 from snshn/logo
Logo
2020-07-01 06:24:28 -04:00
Sunshine
decd5b2119 add ASCII logo atop of help message 2020-07-01 06:13:58 -04:00
Sunshine
bef6d848e9 add raster icon along with its Blender scene 2020-07-01 05:54:48 -04:00
Sunshine
4263e42cd1 Merge pull request #194 from snshn/indented-tree
Indented tree
2020-06-28 16:37:10 -04:00
Sunshine
23de5ced21 add tests for utils::indent() 2020-06-28 16:15:42 -04:00
Sunshine
bc98aca2a2 indent items in retrieval log to form a tree-like structure 2020-06-28 16:11:15 -04:00
Sunshine
438ebd520a Merge pull request #193 from snshn/options-struct
Pass options object instead of using separate parameters
2020-06-28 01:51:05 -04:00
Sunshine
ddb97009e9 pass options object instead of using separate parameters 2020-06-28 01:36:41 -04:00
Sunshine
6e67545b92 Merge pull request #192 from snshn/more-test-data
Add more sample data for blackbox tests
2020-06-27 14:57:07 -04:00
Sunshine
9e5d8ec691 add more sample data for blackbox tests 2020-06-27 14:55:10 -04:00
Sunshine
fb835fae28 Merge pull request #191 from snshn/trim-style
Trim CSS if it contains nothing but whitespaces
2020-06-26 23:41:41 -04:00
Sunshine
29bf042da0 trim CSS if it contains nothing but whitespaces 2020-06-26 23:26:55 -04:00
Sunshine
d67483cf8e Merge pull request #190 from snshn/refactor-csp
Refactor CSP code
2020-06-26 21:42:19 -04:00
Sunshine
4140d8ebad Create references.md 2020-06-26 18:16:18 -04:00
Sunshine
2ac964fae5 include font-src into CSP 2020-06-26 18:14:46 -04:00
Sunshine
ae5d6d2df4 refactor CSP code 2020-06-26 16:19:44 -04:00
Sunshine
2ed151d883 Update web-apps.md 2020-06-26 15:05:47 -04:00
Sunshine
3cdfdc45d3 Update snapcraft.yaml 2020-06-26 14:57:52 -04:00
Sunshine
ac04af2cfc Update ADR-0006 2020-06-26 14:44:54 -04:00
Sunshine
769953d7bd Merge pull request #187 from snshn/arm-snapcraft
Add armhf target to snapcraft.yaml
2020-06-26 14:40:46 -04:00
Sunshine
136dcc31cf Merge pull request #189 from snshn/remove-unwanted-meta-tags
Automatically remove "Refresh" and "Location" META tags
2020-06-26 01:29:41 -04:00
Sunshine
44cac65a83 automatically remove "Refresh" and "Location" META tags 2020-06-26 01:18:52 -04:00
Sunshine
c3ca2ad1d5 Merge pull request #188 from snshn/metadata-tag-function
Move metadata tag code into a function
2020-06-25 18:31:50 -04:00
Sunshine
0347fd3985 move metadata tag code into a function 2020-06-25 18:23:56 -04:00
Sunshine
95d0083b3c Update README.md 2020-06-25 17:38:39 -04:00
Sunshine
3ce26b5fdd Merge pull request #186 from snshn/code-improvements
Code improvements
2020-06-24 03:31:11 -04:00
Sunshine
7f9458adfe add armhf target to snapcraft.yaml 2020-06-24 03:20:36 -04:00
Sunshine
5c229c51da move functions related to URL manipulation into url.rs 2020-06-24 03:16:40 -04:00
Sunshine
f6ea16b3ad create a separate function for appending URL fragments 2020-06-24 02:26:05 -04:00
Sunshine
877b11d52c Merge pull request #185 from snshn/upd-crates
Update crates
2020-06-20 03:41:00 -04:00
Sunshine
f9aac6f41b update crates 2020-06-20 01:05:39 -04:00
Sunshine
0a30c286fe add x86_64 GNU/Linux target to CD 2020-06-19 07:44:57 -04:00
Sunshine
ea56b9b4c1 Update README.md 2020-06-19 03:51:19 -04:00
Sunshine
e821591efe Merge pull request #183 from snshn/update-readme-freebsd-instructions
Add FreeBSD installation instructions to README.md
2020-06-18 22:26:56 -04:00
Sunshine
4e5d2fdc8d Merge pull request #184 from snshn/update-readme-ascii
Update README.md
2020-06-18 00:25:55 -04:00
Sunshine
7c2ed2c9ca Update README.md 2020-06-18 00:19:16 -04:00
Sunshine
60d21ae071 Update README.md 2020-06-17 07:42:10 -04:00
Sunshine
bfdcd459e1 Update web-apps.md 2020-06-04 02:32:01 -04:00
Sunshine
6c020dfa88 Create web-apps.md 2020-06-04 02:31:41 -04:00
Sunshine
9894213393 Merge pull request #182 from snshn/version-bump
Version bump
2020-06-01 05:48:08 -04:00
Sunshine
80523c5a59 version bump 2020-06-01 05:41:42 -04:00
Sunshine
65b5ff4ec0 Merge pull request #181 from snshn/only-remove-credentals-from-http-urls
Only attempt to remove credentals from HTTP(S) URLs
2020-06-01 05:36:01 -04:00
Sunshine
4e31d0433e only attempt to remove credentals from HTTP(S) URLs 2020-06-01 05:28:02 -04:00
Sunshine
ed82b96152 Merge pull request #179 from snshn/refine-adrs
Refine ADRs
2020-05-25 00:41:38 -04:00
Sunshine
f16a2a9ed5 refine ADRs 2020-05-24 21:21:52 -04:00
Sunshine
38d7873d6e Update 0004-asset-integrity-check.md 2020-05-24 21:10:35 -04:00
Sunshine
d848179a43 Merge pull request #124 from snshn/adr-integrity
Propose ADR 0004: Asset integrity check
2020-05-24 06:24:26 -04:00
Sunshine
399f515eeb Merge pull request #178 from snshn/tests-code-refactor
Group all tests into either passing or failing groups
2020-05-24 03:46:34 -04:00
Sunshine
46616f327b Merge pull request #177 from snshn/update-readme
Update README.md
2020-05-24 03:46:21 -04:00
Sunshine
090d647390 group all tests into either passing or failing groups 2020-05-23 03:49:04 -04:00
Sunshine
4fa88b7aba update README.md 2020-05-23 03:16:08 -04:00
Sunshine
3d678d80ee Merge pull request #176 from snshn/img-srcset
IMG srcset
2020-05-17 14:26:30 -04:00
Sunshine
19a87f426e version bump 2020-05-17 14:06:55 -04:00
Sunshine
cbe3f9f554 implement support for embedding images within srcset 2020-05-17 14:06:44 -04:00
Sunshine
b6a44c64cf Merge pull request #174 from snshn/armhf-cd
Improve CD for compiling ARM binary asset
2020-05-12 03:31:37 -04:00
Sunshine
84e2dd789c improve CD for compiling ARM binary asset 2020-05-12 03:29:32 -04:00
Sunshine
ac4945ca97 Merge pull request #173 from snshn/sha2-integrity
Add asset integrity validation
2020-05-12 03:15:02 -04:00
Sunshine
2ca2c7aff8 version bump 2020-05-12 03:10:43 -04:00
Sunshine
a18df74946 refactor code and implement integrity validation 2020-05-12 02:51:37 -04:00
Sunshine
2bc8414cc1 Merge pull request #172 from snshn/update-metadata-comment
improve metadata comments
2020-04-30 22:39:25 -04:00
Sunshine
c4569343a4 improve metadata comments 2020-04-30 20:23:09 -04:00
Sunshine
5f5820c71a Merge pull request #168 from snshn/context-comment
Metadata comment tag
2020-04-30 20:06:40 -04:00
Sunshine
4719a6fecf Merge pull request #170 from snshn/svg-image-href
Embed SVG IMAGE assets
2020-04-30 20:00:59 -04:00
Sunshine
c999359b9f Merge branch 'context-comment' of github.com:Alch-Emi/monolith into context-comment 2020-04-30 19:54:13 -04:00
Sunshine
f22e2b6e68 embed SVG IMAGE assets 2020-04-30 19:51:30 -04:00
Sunshine
31a9550f5b Merge pull request #171 from snshn/improve-ci-cd
Add rustfmt installation step to CI
2020-04-30 19:51:04 -04:00
Sunshine
201f2d61b9 add rustfmt installation step to CI 2020-04-30 19:45:44 -04:00
Sunshine
3ae4dfae8e Update README.md 2020-04-28 09:07:47 -04:00
Sunshine
8fbae735fa add ADR 0004: Asset integrity check 2020-02-23 23:15:32 -05:00
Emi Simpson
05985583f0 Switch timestamps from rfc822 local time to iso8601 UTC 2020-01-10 14:30:35 -05:00
Emi Simpson
651fa716b4 Clean user, pass, and fragment from URL before writing 2020-01-10 14:18:15 -05:00
Emi Simpson
9be3982dc6 Added --no-context flag to disable adding context comment 2020-01-08 19:00:53 -05:00
Emi Simpson
27c9fb4cd3 Added comment indicating the context under which the page was downloaded 2020-01-08 18:51:18 -05:00
85 changed files with 6651 additions and 4086 deletions

View File

@@ -15,7 +15,7 @@ jobs:
- run: git config --global core.autocrlf false
- name: Checkout the repository
uses: actions/checkout@v2
- name: Build and install the executable
- name: Build the executable
run: cargo build --release
- uses: Shopify/upload-to-release@1.0.0
with:
@@ -31,24 +31,37 @@ jobs:
- name: Prepare cross-platform environment
run: |
sudo mkdir -p /cross-build-arm
sudo echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ bionic main" >> /etc/apt/sources.list
sudo touch /etc/apt/sources.list.d/armhf.list
echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ bionic main" | sudo tee -a /etc/apt/sources.list.d/armhf.list
sudo apt-get update
sudo apt-get install -y gcc-arm-linux-gnueabihf libc6-armhf-cross libc6-dev-armhf-cross
sudo apt-get download libssl1.1:armhf libssl-dev:armhf
sudo dpkg -x libssl1.1*.deb /cross-build-arm
sudo dpkg -x libssl-dev*.deb /cross-build-arm
rustup target add arm-unknown-linux-gnueabihf
echo "::set-env name=C_INCLUDE_PATH::/cross-build-arm/usr/include"
echo "::set-env name=OPENSSL_INCLUDE_DIR::/cross-build-arm/usr/include/arm-linux-gnueabihf"
echo "::set-env name=OPENSSL_LIB_DIR::/cross-build-arm/usr/lib/arm-linux-gnueabihf"
echo "::set-env name=PKG_CONFIG_ALLOW_CROSS::1"
echo "::set-env name=RUSTFLAGS::-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build-arm/usr/lib/arm-linux-gnueabihf -L/cross-build-arm/lib/arm-linux-gnueabihf"
echo "C_INCLUDE_PATH=/cross-build-arm/usr/include" >> $GITHUB_ENV
echo "OPENSSL_INCLUDE_DIR=/cross-build-arm/usr/include/arm-linux-gnueabihf" >> $GITHUB_ENV
echo "OPENSSL_LIB_DIR=/cross-build-arm/usr/lib/arm-linux-gnueabihf" >> $GITHUB_ENV
echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV
echo "RUSTFLAGS=-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build-arm/usr/lib/arm-linux-gnueabihf -L/cross-build-arm/lib/arm-linux-gnueabihf" >> $GITHUB_ENV
- name: Build the executable
run: |
cargo build --release --target=arm-unknown-linux-gnueabihf
run: cargo build --release --target=arm-unknown-linux-gnueabihf
- name: Attach artifact to the release
uses: Shopify/upload-to-release@1.0.0
with:
name: monolith-gnu-linux-armhf
path: target/arm-unknown-linux-gnueabihf/release/monolith
repo-token: ${{ secrets.GITHUB_TOKEN }}
gnu_linux_x86_64:
runs-on: ubuntu-18.04
steps:
- name: Checkout the repository
uses: actions/checkout@v2
- name: Build the executable
run: cargo build --release
- uses: Shopify/upload-to-release@1.0.0
with:
name: monolith-gnu-linux-x86_64
path: target/release/monolith
repo-token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -27,4 +27,6 @@ jobs:
- name: Run tests
run: cargo test --all --locked --verbose
- name: Check code formatting
run: cargo fmt --all -- --check
run: |
rustup component add rustfmt
cargo fmt --all -- --check

1404
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,6 @@
[package]
name = "monolith"
version = "2.2.4"
edition = "2018"
version = "2.4.1"
authors = [
"Sunshine <sunshine@uberspace.net>",
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
@@ -9,20 +8,35 @@ authors = [
"Emi Simpson <emi@alchemi.dev>",
"rhysd <lin90162@yahoo.co.jp>",
]
edition = "2018"
description = "CLI tool for saving web pages as a single HTML file"
homepage = "https://github.com/Y2Z/monolith"
repository = "https://github.com/Y2Z/monolith"
readme = "README.md"
keywords = ["web", "http", "html", "download", "command-line"]
categories = ["command-line-utilities", "web-programming"]
include = [
"src/*.rs",
"Cargo.toml",
]
license = "CC0-1.0"
[dependencies]
base64 = "0.12.0"
clap = "2.33.0"
cssparser = "0.27.2"
atty = "0.2" # Used for highlighting network errors
base64 = "0.13.0"
chrono = "0.4.19" # Used for formatting creation timestamp
clap = "2.33.3"
cssparser = "0.28.1"
html5ever = "0.24.1"
url = "2.1.1"
regex = "1.4.3" # Used for parsing srcset
sha2 = "0.9.2" # Used for calculating checksums during integrity checks
url = "2.2.0"
[dependencies.reqwest]
version = "0.10.*"
version = "0.11.0"
default-features = false
features = ["default-tls", "blocking", "gzip"]
[dev-dependencies]
assert_cmd = "0.12.0"
tempfile = "3.1.0"
assert_cmd = "1.0.2"
tempfile = "3.2.0"

View File

@@ -2,13 +2,13 @@ FROM rust
WORKDIR /usr/local/src/
RUN curl -s https://api.github.com/repos/y2z/monolith/releases/latest \
| grep "tarball_url.*\"," \
| cut -d '"' -f 4 \
| wget -qi - -O monolith.tar.gz
| grep "tarball_url.*\"," \
| cut -d '"' -f 4 \
| wget -qi - -O monolith.tar.gz
RUN tar xfz monolith.tar.gz \
&& mv Y2Z-monolith-* monolith \
&& rm monolith.tar.gz
&& mv Y2Z-monolith-* monolith \
&& rm monolith.tar.gz
WORKDIR /usr/local/src/monolith
RUN ls -a

137
LICENSE
View File

@@ -1,24 +1,121 @@
This is free and unencumbered software released into the public domain.
Creative Commons Legal Code
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
CC0 1.0 Universal
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE
LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN
ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS
INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES
REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS
PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM
THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED
HEREUNDER.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
Statement of Purpose
For more information, please refer to <http://unlicense.org>
The laws of most jurisdictions throughout the world automatically confer
exclusive Copyright and Related Rights (defined below) upon the creator
and subsequent owner(s) (each and all, an "owner") of an original work of
authorship and/or a database (each, a "Work").
Certain owners wish to permanently relinquish those rights to a Work for
the purpose of contributing to a commons of creative, cultural and
scientific works ("Commons") that the public can reliably and without fear
of later claims of infringement build upon, modify, incorporate in other
works, reuse and redistribute as freely as possible in any form whatsoever
and for any purposes, including without limitation commercial purposes.
These owners may contribute to the Commons to promote the ideal of a free
culture and the further production of creative, cultural and scientific
works, or to gain reputation or greater distribution for their Work in
part through the use and efforts of others.
For these and/or other purposes and motivations, and without any
expectation of additional consideration or compensation, the person
associating CC0 with a Work (the "Affirmer"), to the extent that he or she
is an owner of Copyright and Related Rights in the Work, voluntarily
elects to apply CC0 to the Work and publicly distribute the Work under its
terms, with knowledge of his or her Copyright and Related Rights in the
Work and the meaning and intended legal effect of CC0 on those rights.
1. Copyright and Related Rights. A Work made available under CC0 may be
protected by copyright and related or neighboring rights ("Copyright and
Related Rights"). Copyright and Related Rights include, but are not
limited to, the following:
i. the right to reproduce, adapt, distribute, perform, display,
communicate, and translate a Work;
ii. moral rights retained by the original author(s) and/or performer(s);
iii. publicity and privacy rights pertaining to a person's image or
likeness depicted in a Work;
iv. rights protecting against unfair competition in regards to a Work,
subject to the limitations in paragraph 4(a), below;
v. rights protecting the extraction, dissemination, use and reuse of data
in a Work;
vi. database rights (such as those arising under Directive 96/9/EC of the
European Parliament and of the Council of 11 March 1996 on the legal
protection of databases, and under any national implementation
thereof, including any amended or successor version of such
directive); and
vii. other similar, equivalent or corresponding rights throughout the
world based on applicable law or treaty, and any national
implementations thereof.
2. Waiver. To the greatest extent permitted by, but not in contravention
of, applicable law, Affirmer hereby overtly, fully, permanently,
irrevocably and unconditionally waives, abandons, and surrenders all of
Affirmer's Copyright and Related Rights and associated claims and causes
of action, whether now known or unknown (including existing as well as
future claims and causes of action), in the Work (i) in all territories
worldwide, (ii) for the maximum duration provided by applicable law or
treaty (including future time extensions), (iii) in any current or future
medium and for any number of copies, and (iv) for any purpose whatsoever,
including without limitation commercial, advertising or promotional
purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each
member of the public at large and to the detriment of Affirmer's heirs and
successors, fully intending that such Waiver shall not be subject to
revocation, rescission, cancellation, termination, or any other legal or
equitable action to disrupt the quiet enjoyment of the Work by the public
as contemplated by Affirmer's express Statement of Purpose.
3. Public License Fallback. Should any part of the Waiver for any reason
be judged legally invalid or ineffective under applicable law, then the
Waiver shall be preserved to the maximum extent permitted taking into
account Affirmer's express Statement of Purpose. In addition, to the
extent the Waiver is so judged Affirmer hereby grants to each affected
person a royalty-free, non transferable, non sublicensable, non exclusive,
irrevocable and unconditional license to exercise Affirmer's Copyright and
Related Rights in the Work (i) in all territories worldwide, (ii) for the
maximum duration provided by applicable law or treaty (including future
time extensions), (iii) in any current or future medium and for any number
of copies, and (iv) for any purpose whatsoever, including without
limitation commercial, advertising or promotional purposes (the
"License"). The License shall be deemed effective as of the date CC0 was
applied by Affirmer to the Work. Should any part of the License for any
reason be judged legally invalid or ineffective under applicable law, such
partial invalidity or ineffectiveness shall not invalidate the remainder
of the License, and in such case Affirmer hereby affirms that he or she
will not (i) exercise any of his or her remaining Copyright and Related
Rights in the Work or (ii) assert any associated claims and causes of
action with respect to the Work, in either case contrary to Affirmer's
express Statement of Purpose.
4. Limitations and Disclaimers.
a. No trademark or patent rights held by Affirmer are waived, abandoned,
surrendered, licensed or otherwise affected by this document.
b. Affirmer offers the Work as-is and makes no representations or
warranties of any kind concerning the Work, express, implied,
statutory or otherwise, including without limitation warranties of
title, merchantability, fitness for a particular purpose, non
infringement, or the absence of latent or other defects, accuracy, or
the present or absence of errors, whether or not discoverable, all to
the greatest extent permissible under applicable law.
c. Affirmer disclaims responsibility for clearing rights of other persons
that may apply to the Work or any use thereof, including without
limitation any person's Copyright and Related Rights in the Work.
Further, Affirmer disclaims responsibility for obtaining any necessary
consents, permissions or other rights required for any use of the
Work.
d. Affirmer understands and acknowledges that Creative Commons is not a
party to this document and has no duty or obligation with respect to
this CC0 or use of the Work.

View File

@@ -10,7 +10,7 @@ build:
test: build
@cargo test --locked
@cargo fmt --all -- --check
.PHONY: test_code_formatting
.PHONY: test
lint:
@cargo fmt --all --
@@ -23,3 +23,7 @@ install:
uninstall:
@cargo uninstall
.PHONY: uninstall
clean:
@cargo clean
.PHONY: clean

View File

@@ -3,13 +3,13 @@
[![Monolith Build Status for Windows](https://github.com/Y2Z/monolith/workflows/Windows/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows)
```
___ ___________ __________ ___________________ ___
| \ / \ | | | | | |
| \_/ __ \_| __ | | ___ ___ |__| |
| | | | | | | | | | | |
| |__| _ |__| |____| | | | | __ |
| |\_/| | \ | | | | | | |
|___| |__________| \____________________| |___| |___| |___|
_____ ______________ __________ ___________________ ___
| \ / \ | | | | | |
| \_/ __ \_| __ | | ___ ___ |__| |
| | | | | | | | | | | |
| |\ /| |__| _ |__| |____| | | | | __ |
| | \___/ | | \ | | | | | | |
|___| |__________| \_____________________| |___| |___| |___|
```
A data hoarders dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
@@ -22,46 +22,66 @@ If compared to saving websites with `wget -mpk`, this tool embeds all assets as
## Installation
### Using Cargo
$ cargo install monolith
#### Via Homebrew (on macOS and GNU/Linux)
$ brew install monolith
#### Using Snapcraft (on GNU/Linux)
$ snap install monolith
#### Using Ports collection (on FreeBSD and TrueOS)
$ cd /usr/ports/www/monolith/
$ make install clean
#### Using pre-built binaries (Windows, ARM-based devices, etc)
Every [release](https://github.com/Y2Z/monolith/releases) contains pre-built binaries for Windows, GNU/Linux, as well as platforms with non-standart CPU architecture.
#### From source
Dependency: `libssl-dev`
Dependency: `libssl-dev`
$ git clone https://github.com/Y2Z/monolith.git
$ cd monolith
$ make install
#### With Docker
The guide can be found [here](docs/containers.md)
#### Using Containers
$ docker build -t Y2Z/monolith .
$ sudo install -b utils/run-in-container.sh /usr/local/bin/monolith
---------------------------------------------------
## Usage
$ monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roads-lyrics.html
or
$ cat index.html | monolith -aIiFfcMv - > index-processed.html
---------------------------------------------------
## Options
- `-c`: Ignore styles
- `-f`: Exclude frames and iframes
- `-F`: Omit web fonts
- `-a`: Exclude audio sources
- `-b`: Use custom `base URL`
- `-c`: Exclude CSS
- `-e`: Ignore network errors
- `-f`: Omit frames
- `-F`: Exclude web fonts
- `-i`: Remove images
- `-I`: Isolate the document
- `-j`: Exclude JavaScript
- `-k`: Accept invalid X.509 (TLS) certificates
- `-o`: Write output to file
- `-s`: Silent mode
- `-t`: Set custom network request timeout
- `-u`: Provide own User-Agent
- `-M`: Don't add timestamp and URL information
- `-o`: Write output to `file`
- `-s`: Be quiet
- `-t`: Adjust `network request timeout`
- `-u`: Provide `custom User-Agent`
- `-v`: Exclude videos
---------------------------------------------------
## HTTPS and HTTP proxies
## Proxies
Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables.
---------------------------------------------------
@@ -75,11 +95,18 @@ Please open an issue if something is wrong, that helps make this project better.
- `Monolith Chrome Extension`: https://github.com/rhysd/monolith-of-web
- `Pagesaver`: https://github.com/distributed-mind/pagesaver
- `Personal WayBack Machine`: https://github.com/popey/pwbm
- `Hako`: https://github.com/dmpop/hako
---------------------------------------------------
## License
The Unlicense
<a href="http://creativecommons.org/publicdomain/zero/1.0/">
<img src="http://i.creativecommons.org/p/zero/1.0/88x31.png" alt="CC0-1.0" />
</a>
<br />
To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide.
This software is distributed without any warranty.
---------------------------------------------------

BIN
assets/icon/icon.blend Normal file

Binary file not shown.

BIN
assets/icon/icon.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.2 MiB

View File

@@ -8,12 +8,12 @@ Accepted
## Context
HTML pages sometimes contain NOSCRIPT nodes, which reveal their contents only in case when JavaScript is not available. Most of the time they contain hidden messages that inform about certain JavaScript-dependent features not being operational, however sometimes can also feature media assets or even iframes.
HTML pages can contain `noscript` nodes, which reveal their contents only in case when JavaScript is not available. Most of the time they contain hidden messages that inform about certain JavaScript-dependent features not being operational, however sometimes can also feature media assets or even iframes.
## Decision
When the document is being saved with or without JavaScript, each NOSCRIPT node should be preserved while its children need to be processed exactly the same way as the rest of the document. This approach will ensure that even hidden remote assets are embedded — since those hidden elements may have to be displayed later in a browser that has JavaScript turned off. An option should be available to "unwrap" all NOSCRIPT nodes in order to make their contents always visible in the document, complimenting the "disable JS" function of the program.
When the document is being saved with or without JavaScript, each `noscript` node should be preserved while its children need to be processed exactly the same way as the rest of the document. This approach will ensure that even hidden remote assets are embedded — since those hidden elements may have to be displayed later in a browser that has JavaScript turned off. An option should be available to "unwrap" all `noscript` nodes in order to make their contents always visible in the document, complimenting the "disable JS" function of the program.
## Consequences
Saved documents will have contents of all NOSCRIPT nodes processed as if they are part of the document's DOM, therefore properly display images encapsulated within NOSCRIPT nodes when being viewed in browsers that have JavaScript turned off (or have no JavaScript support in the first place). The new option to "unwrap" NOSCRIPT elements will help the user ensure that the resulting document always represents what the original web page looked like in a browser that had JavaScript turned off.
Saved documents will have contents of all `noscript` nodes processed as if they are part of the document's DOM, therefore properly display images encapsulated within `noscript` nodes when being viewed in browsers that have JavaScript turned off (or have no JavaScript support in the first place). The new option to "unwrap" `noscript` elements will help the user ensure that the resulting document always represents what the original web page looked like in a browser that had JavaScript turned off.

View File

@@ -1,4 +1,4 @@
# 2. Network request timeout
# 3. Network request timeout
Date: 2020-02-15
@@ -12,14 +12,10 @@ A slow network connection and overloaded server may negatively impact network re
## Decision
Make the program simulate behavior of popular web browsers and CLI tools, where
the default network response timeout is most often set to 120 seconds.
Make the program simulate behavior of popular web browsers and CLI tools, where the default network response timeout is most often set to 120 seconds.
Instead of featuring retries for timed out network requests, the program
should have an option to adjust the timeout length, along with making it
indefinite when given "0" as its value.
Instead of featuring retries for timed out network requests, the program should have an option to adjust the timeout length, along with making it indefinite when given "0" as its value.
## Consequences
The user is able to retrieve resources that have long response time, as well as obtain
full control over how soon, and if at all, network requests should time out.
The user is able to retrieve resources that have long response time, as well as obtain full control over how soon, and if at all, network requests should time out.

View File

@@ -0,0 +1,21 @@
# 4. Asset integrity check
Date: 2020-02-23
## Status
Accepted
## Context
In HTML5, `link` and `script` nodes have an attribute named `integrity`, which lets the browser check if the remote file is valid, mostly for the purpose of enhancing page security.
## Decision
In order to replicate the browser's behavior, the program should perform integrity check the same way it does, excluding the linked asset from the final result if such check fails.
The `integrity` attribute should be removed from nodes, as it bears no benefit for resources embedded as data URLs.
## Consequences
Assets that fail to pass the check get excluded from the saved document. Meanwhile, saved documents no longer contain integrity attributes on all `link` and `script` nodes.

View File

@@ -1,4 +1,4 @@
# 4. Asset Minimization
# 5. Asset Minimization
Date: 2020-03-14
@@ -8,18 +8,12 @@ Accepted
## Context
It may look like a good idea to make monolith compress retrieved assets while
saving the page for the purpose of reducing the resulting document's file size.
It may look like a good idea to make monolith compress retrieved assets while saving the page for the purpose of reducing the resulting document's file size.
## Decision
Given that the main purpose of this program is to save pages in a convenient to store and share manner — it's mostly an archiving tool,
aside from being able to tell monolith to exclude certain types of asests (e.g. images, CSS, JavaScript),
it would be outside of scope of this program to implement code for compressing assets. Minimizing files before embedding them
does not reduce the amount of data that needs to be transferred either. A separate tool can be used later to compress and minimize pages
saved by monolith, if needed.
Given that the main purpose of this program is to save pages in a convenient to store and share manner — it's mostly an archiving tool, aside from being able to tell monolith to exclude certain types of asests (e.g. images, CSS, JavaScript), it would be outside of scope of this program to implement code for compressing assets. Minimizing files before embedding them does not reduce the amount of data that needs to be transferred either. A separate tool can be used later to compress and minimize pages saved by monolith, if needed.
## Consequences
Monolith will not support modification of original document assets for the purpose of reducing their size, sticking to performing only minimal
amount of modifications to the original web page — whatever is needed to provide security or exclude unwanted asset types.
Monolith will not support modification of original document assets for the purpose of reducing their size, sticking to performing only minimal amount of modifications to the original web page — whatever is needed to provide security or exclude unwanted asset types.

View File

@@ -0,0 +1,19 @@
# 6. Reload and location `meta` tags
Date: 2020-06-25
## Status
Accepted
## Context
HTML documents may contain `meta` tags capable of automatically refreshing the page or redirecting to another location.
## Decision
Since the resulting document is saved to disk and generally not intended to be served over the network, it only makes sense to remove `meta` tags that have `http-equiv` attribute equal to "Refresh" or "Location", in order to prevent them from reloading the page or redirecting to another location.
## Consequences
Monolith will ensure that saved documents do not contain `meta` tags capable of changing location or reloading the page.

View File

@@ -0,0 +1,19 @@
# 7. Network errors
Date: 2020-11-22
## Status
Accepted
## Context
Servers may return information with HTTP response codes other than `200`, however those responses may still contain useful data.
## Decision
Fail by default, notifying of the network error. Add option to continue retrieving assets by treating all response codes as `200`.
## Consequences
Monolith will fail to obtain resources with status other than `200`, unless told to ignore network errors.

View File

@@ -0,0 +1,40 @@
# 8. Base Tag
Date: 2020-12-25
## Status
Accepted
## Context
HTML documents may contain `base` tag, which influences resolution of anchor links and relative URLs as well as dynamically loaded resources.
Sometimes, in order to make certain saved documents function closer to how they operate while being served from a remote server, the `base` tag specifying the source page's URL may need to be added to the document.
There can be only one such tag. If multiple `base` tags are present, only the first encountered tag ends up being used.
## Decision
Adding the `base` tag should be optional — saved documents should not contain the `base` tag unless it was specified by the user, or the document originally had the `base` tag in it.
Existing `href` attribute's value of the original `base` tag should be used for resolving the document's relative links instead of document's own URL (precisely the way browsers do it).
## Consequences
#### If the base tag does not exist in the source document
- If the base tag does not exist in the source document
- With base URL option provided
- use the specified base URL value to retrieve assets, keep original base URL value in the document
- Without base URL option provided
- download document as usual, do not add base tag
- If the base tag already exists in the source document
- With base URL option provided
- we overwrite the original base URL before retrieving assets, keep new base URL value in the document
- Without base URL option provided:
- use the base URL from the original document to retrieve assets, keep original base URL value in the document
The program will obtain ability to retrieve remote assets for non-remote sources (such as data URLs and local files).
The program will obatin ability to get rid of existing base tag values (by provind an empty one).

3
docs/references.md Normal file
View File

@@ -0,0 +1,3 @@
# References
- https://content-security-policy.com/

10
docs/web-apps.md Normal file
View File

@@ -0,0 +1,10 @@
# Web apps that can be saved with Monolith
These apps retain most or all of their functionality when saved with Monolith
|Converse|https://conversejs.org|
|:--|:--|
|Description|An XMPP client built using web technologies|
|Functionality retained|**full**|
|Command to use|`monolith https://conversejs.org/fullscreen.html > conversejs.html`|
|Monolith version used|2.2.7|

25
monolith.nuspec Normal file
View File

@@ -0,0 +1,25 @@
<?xml version="1.0" encoding="utf-8"?>
<package xmlns="http://schemas.microsoft.com/packaging/2015/06/nuspec.xsd">
<metadata>
<id>monolith</id>
<version>2.4.0</version>
<title>Monolith</title>
<authors>Sunshine, Mahdi Robatipoor, Emmanuel Delaborde, Emi Simpson, rhysd</authors>
<projectUrl>https://github.com/Y2Z/monolith</projectUrl>
<iconUrl>https://raw.githubusercontent.com/Y2Z/monolith/master/assets/icon/icon.png</iconUrl>
<licenseUrl>https://raw.githubusercontent.com/Y2Z/monolith/master/LICENSE</licenseUrl>
<requireLicenseAcceptance>false</requireLicenseAcceptance>
<description>CLI tool for saving complete web pages as a single HTML file
A data hoarders dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
Unlike the conventional “Save page as”, monolith not only saves the target document, it embeds CSS, image, and JavaScript assets all at once, producing a single HTML5 document that is a joy to store and share.
If compared to saving websites using wget, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available.
</description>
<copyright>Public Domain</copyright>
<language>en-US</language>
<tags>scraping archiving</tags>
<docsUrl>https://github.com/Y2Z/monolith/blob/master/README.md</docsUrl>
</metadata>
</package>

View File

@@ -18,11 +18,11 @@ description: |
confinement: strict
# Building on armhf fails, so we specify all supported non-armhf architectures
architectures:
- build-on: amd64
- build-on: i386
- build-on: arm64
- build-on: armhf
- build-on: i386
- build-on: ppc64el
- build-on: s390x

View File

@@ -1,76 +0,0 @@
use clap::{App, Arg};
#[derive(Default)]
pub struct AppArgs {
pub target: String,
pub no_css: bool,
pub no_fonts: bool,
pub no_frames: bool,
pub no_images: bool,
pub no_js: bool,
pub insecure: bool,
pub isolate: bool,
pub output: String,
pub silent: bool,
pub timeout: u64,
pub user_agent: String,
}
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
const DEFAULT_USER_AGENT: &str =
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
impl AppArgs {
pub fn get() -> AppArgs {
let app = App::new(env!("CARGO_PKG_NAME"))
.version(crate_version!())
.author(crate_authors!("\n"))
.about(crate_description!())
.arg(
Arg::with_name("target")
.required(true)
.takes_value(true)
.index(1)
.help("URL or file path"),
)
// .args_from_usage("-a, --include-audio 'Removes audio sources'")
.args_from_usage("-c, --no-css 'Removes CSS'")
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
.args_from_usage("-F, --no-fonts 'Removes fonts'")
.args_from_usage("-i, --no-images 'Removes images'")
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
.args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
.args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
// .args_from_usage("-v, --include-video 'Removes video sources'")
.get_matches();
let mut app_args = AppArgs::default();
// Process the command
app_args.target = app
.value_of("target")
.expect("please set target")
.to_string();
app_args.no_css = app.is_present("no-css");
app_args.no_fonts = app.is_present("no-fonts");
app_args.no_frames = app.is_present("no-frames");
app_args.no_images = app.is_present("no-images");
app_args.no_js = app.is_present("no-js");
app_args.insecure = app.is_present("insecure");
app_args.isolate = app.is_present("isolate");
app_args.silent = app.is_present("silent");
app_args.timeout = app
.value_of("timeout")
.unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string())
.parse::<u64>()
.unwrap();
app_args.output = app.value_of("output").unwrap_or("").to_string();
app_args.user_agent = app
.value_of("user-agent")
.unwrap_or(DEFAULT_USER_AGENT)
.to_string();
app_args
}
}

View File

@@ -2,7 +2,9 @@ use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::utils::{data_to_data_url, get_url_fragment, resolve_url, retrieve_asset};
use crate::opts::Options;
use crate::url::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_with_fragment};
use crate::utils::retrieve_asset;
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
// Universal
@@ -24,7 +26,7 @@ const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
"suffix",
"symbols",
];
const CSS_SPECIAL_CHARS: &str = "~!@$%^&*()+=,./'\";:?><[]{}|`#";
const CSS_SPECIAL_CHARS: &'static str = "~!@$%^&*()+=,./'\";:?><[]{}|`#";
pub fn is_image_url_prop(prop_name: &str) -> bool {
CSS_PROPS_WITH_IMAGE_URLS
@@ -58,12 +60,11 @@ pub fn process_css<'a>(
client: &Client,
parent_url: &str,
parser: &mut Parser,
options: &Options,
depth: u32,
rule_name: &str,
prop_name: &str,
func_name: &str,
opt_no_fonts: bool,
opt_no_images: bool,
opt_silent: bool,
) -> Result<String, ParseError<'a, String>> {
let mut result: String = str!();
@@ -90,7 +91,7 @@ pub fn process_css<'a>(
Token::Colon => result.push_str(":"),
Token::Comma => result.push_str(","),
Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => {
if opt_no_fonts && curr_rule == "font-face" {
if options.no_fonts && curr_rule == "font-face" {
continue;
}
@@ -113,12 +114,11 @@ pub fn process_css<'a>(
client,
parent_url,
parser,
options,
depth,
rule_name,
curr_prop.as_str(),
func_name,
opt_no_fonts,
opt_no_images,
opt_silent,
)
})
.unwrap();
@@ -148,7 +148,7 @@ pub fn process_css<'a>(
// @import, @font-face, @charset, @media...
Token::AtKeyword(ref value) => {
curr_rule = str!(value);
if opt_no_fonts && curr_rule == "font-face" {
if options.no_fonts && curr_rule == "font-face" {
continue;
}
result.push_str("@");
@@ -169,40 +169,47 @@ pub fn process_css<'a>(
continue;
}
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
let (css, final_url) = retrieve_asset(
let import_full_url = resolve_url(&parent_url, value).unwrap_or_default();
let import_url_fragment = get_url_fragment(import_full_url.clone());
match retrieve_asset(
cache,
client,
&parent_url,
&full_url,
false,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(
enquote(
data_to_data_url(
&import_full_url,
options,
depth + 1,
) {
Ok((import_contents, import_final_url, _import_media_type)) => {
let import_data_url = data_to_data_url(
"text/css",
embed_css(
cache,
client,
final_url.as_str(),
&css,
opt_no_fonts,
opt_no_images,
opt_silent,
&import_final_url,
&String::from_utf8_lossy(&import_contents),
options,
depth + 1,
)
.as_bytes(),
&final_url,
url_fragment.as_str(),
),
false,
)
.as_str(),
);
&import_final_url,
);
let assembled_url: String = url_with_fragment(
import_data_url.as_str(),
import_url_fragment.as_str(),
);
result.push_str(enquote(assembled_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(import_full_url.clone()) {
let assembled_url: String = url_with_fragment(
import_full_url.as_str(),
import_url_fragment.as_str(),
);
result.push_str(enquote(assembled_url, false).as_str());
}
}
}
} else {
if func_name == "url" {
// Skip empty url()'s
@@ -210,21 +217,36 @@ pub fn process_css<'a>(
continue;
}
if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
if options.no_images && is_image_url_prop(curr_prop.as_str()) {
result.push_str(enquote(str!(empty_image!()), false).as_str());
} else {
let resolved_url = resolve_url(&parent_url, value).unwrap_or_default();
let (data_url, _final_url) = retrieve_asset(
let url_fragment = get_url_fragment(resolved_url.clone());
match retrieve_asset(
cache,
client,
&parent_url,
&resolved_url,
true,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(enquote(data_url, false).as_str());
options,
depth + 1,
) {
Ok((data, final_url, media_type)) => {
let data_url = data_to_data_url(&media_type, &data, &final_url);
let assembled_url: String =
url_with_fragment(data_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(resolved_url.clone()) {
let assembled_url: String = url_with_fragment(
resolved_url.as_str(),
url_fragment.as_str(),
);
result.push_str(enquote(assembled_url, false).as_str());
}
}
}
}
} else {
result.push_str(enquote(str!(value), false).as_str());
@@ -249,7 +271,7 @@ pub fn process_css<'a>(
if *has_sign && *unit_value >= 0. {
result.push_str("+");
}
result.push_str(str!(unit_value * 100.).as_str());
result.push_str(str!(unit_value * 100.0).as_str());
result.push_str("%");
}
Token::Dimension {
@@ -293,54 +315,64 @@ pub fn process_css<'a>(
if is_import {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
let (css, final_url) = retrieve_asset(
cache,
client,
&parent_url,
&full_url,
false,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(
enquote(
data_to_data_url(
match retrieve_asset(cache, client, &parent_url, &full_url, options, depth + 1)
{
Ok((css, final_url, _media_type)) => {
let data_url = data_to_data_url(
"text/css",
embed_css(
cache,
client,
final_url.as_str(),
&css,
opt_no_fonts,
opt_no_images,
opt_silent,
&final_url,
&String::from_utf8_lossy(&css),
options,
depth + 1,
)
.as_bytes(),
&final_url,
url_fragment.as_str(),
),
false,
)
.as_str(),
);
);
let assembled_url: String =
url_with_fragment(data_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) {
let assembled_url: String =
url_with_fragment(full_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
}
}
}
} else {
if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
if is_image_url_prop(curr_prop.as_str()) && options.no_images {
result.push_str(enquote(str!(empty_image!()), false).as_str());
} else {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let (data_url, _final_url) = retrieve_asset(
let url_fragment = get_url_fragment(full_url.clone());
match retrieve_asset(
cache,
client,
&parent_url,
&full_url,
true,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(enquote(data_url, false).as_str());
options,
depth + 1,
) {
Ok((data, final_url, media_type)) => {
let data_url = data_to_data_url(&media_type, &data, &final_url);
let assembled_url: String =
url_with_fragment(data_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) {
let assembled_url: String =
url_with_fragment(full_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
}
}
}
}
}
result.push_str(")");
@@ -358,12 +390,11 @@ pub fn process_css<'a>(
client,
parent_url,
parser,
options,
depth,
curr_rule.as_str(),
curr_prop.as_str(),
function_name,
opt_no_fonts,
opt_no_images,
opt_silent,
)
})
.unwrap();
@@ -375,6 +406,11 @@ pub fn process_css<'a>(
}
}
// Ensure empty CSS is really empty
if result.len() > 0 && result.trim().len() == 0 {
result = result.trim().to_string()
}
Ok(result)
}
@@ -383,9 +419,8 @@ pub fn embed_css(
client: &Client,
parent_url: &str,
css: &str,
opt_no_fonts: bool,
opt_no_images: bool,
opt_silent: bool,
options: &Options,
depth: u32,
) -> String {
let mut input = ParserInput::new(&css);
let mut parser = Parser::new(&mut input);
@@ -395,12 +430,11 @@ pub fn embed_css(
client,
parent_url,
&mut parser,
options,
depth,
"",
"",
"",
opt_no_fonts,
opt_no_images,
opt_silent,
)
.unwrap()
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,4 +1,4 @@
const JS_DOM_EVENT_ATTRS: &[&str] = &[
const JS_DOM_EVENT_ATTRS: &'static [&str] = &[
// From WHATWG HTML spec 8.1.5.2 "Event handlers on elements, Document objects, and Window objects":
// https://html.spec.whatwg.org/#event-handlers-on-elements,-document-objects,-and-window-objects
// https://html.spec.whatwg.org/#attributes-3 (table "List of event handler content attributes")

View File

@@ -1,9 +1,14 @@
#[macro_use]
extern crate clap;
#[macro_use]
mod macros;
pub mod css;
pub mod html;
pub mod js;
pub mod opts;
pub mod url;
pub mod utils;
#[cfg(test)]

View File

@@ -1,21 +1,24 @@
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
use monolith::utils::{data_url_to_text, is_data_url, is_file_url, is_http_url, retrieve_asset};
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use std::collections::HashMap;
use std::env;
use std::fs;
use std::io::{self, Error, Write};
use std::io::{self, prelude::*, Error, Write};
use std::path::Path;
use std::process;
use std::time::Duration;
mod args;
mod macros;
use monolith::html::{
add_favicon, create_metadata_tag, get_base_url, has_favicon, html_to_dom, set_base_url,
stringify_document, walk_and_embed_assets,
};
use monolith::opts::Options;
use monolith::url::{
data_to_data_url, is_data_url, is_file_url, is_http_url, parse_data_url, resolve_url,
};
use monolith::utils::retrieve_asset;
#[macro_use]
extern crate clap;
use crate::args::AppArgs;
mod macros;
enum Output {
Stdout(io::Stdout),
@@ -24,7 +27,7 @@ enum Output {
impl Output {
fn new(file_path: &str) -> Result<Output, Error> {
if file_path.is_empty() {
if file_path.is_empty() || file_path.eq("-") {
Ok(Output::Stdout(io::stdout()))
} else {
Ok(Output::File(fs::File::create(file_path)?))
@@ -45,12 +48,22 @@ impl Output {
}
}
pub fn read_stdin() -> String {
let mut buffer = String::new();
for line in io::stdin().lock().lines() {
buffer += line.unwrap_or_default().as_str();
buffer += "\n";
}
buffer
}
fn main() {
let app_args = AppArgs::get();
let original_target: &str = &app_args.target;
let options = Options::from_args();
let original_target: &str = &options.target;
let target_url: &str;
let base_url;
let dom;
let mut base_url: String;
let mut dom;
let mut use_stdin: bool = false;
// Pre-process the input
let cwd_normalized: String =
@@ -59,16 +72,26 @@ fn main() {
let mut target: String = str!(original_target.clone()).replace("\\", "/");
let path_is_relative: bool = path.is_relative();
// Determine exact target URL
if target.clone().len() == 0 {
eprintln!("No target specified");
if !options.silent {
eprintln!("No target specified");
}
process::exit(1);
} else if target.clone() == "-" {
// Read from pipe (stdin)
use_stdin = true;
// Default target URL to empty data URL; the user can control it via --base-url
target_url = "data:text/html,"
} else if is_http_url(target.clone()) || is_data_url(target.clone()) {
target_url = target.as_str();
} else if is_file_url(target.clone()) {
target_url = target.as_str();
} else if path.exists() {
if !path.is_file() {
eprintln!("Local target is not a file: {}", original_target);
if !options.silent {
eprintln!("Local target is not a file: {}", original_target);
}
process::exit(1);
}
target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" });
@@ -85,77 +108,124 @@ fn main() {
target_url = target.as_str();
}
let mut output = Output::new(&app_args.output).expect("Could not prepare output");
// Define output
let mut output = Output::new(&options.output).expect("Could not prepare output");
// Initialize client
let mut cache = HashMap::new();
let mut header_map = HeaderMap::new();
header_map.insert(
USER_AGENT,
HeaderValue::from_str(&app_args.user_agent).expect("Invalid User-Agent header specified"),
);
let timeout: u64 = if app_args.timeout > 0 {
app_args.timeout
if let Some(user_agent) = &options.user_agent {
header_map.insert(
USER_AGENT,
HeaderValue::from_str(&user_agent).expect("Invalid User-Agent header specified"),
);
}
let timeout: u64 = if options.timeout > 0 {
options.timeout
} else {
std::u64::MAX / 4
};
let client = Client::builder()
.timeout(Duration::from_secs(timeout))
.danger_accept_invalid_certs(app_args.insecure)
.danger_accept_invalid_certs(options.insecure)
.default_headers(header_map)
.build()
.expect("Failed to initialize HTTP client");
// Retrieve root document
if is_file_url(target_url) || is_http_url(target_url) {
let (data, final_url) = retrieve_asset(
&mut cache,
&client,
target_url,
target_url,
false,
"",
app_args.silent,
)
.expect("Could not retrieve target document");
base_url = final_url;
dom = html_to_dom(&data);
// At this stage we assume that the base URL is the same as the target URL
base_url = str!(target_url);
// Retrieve target document
if use_stdin {
dom = html_to_dom(&read_stdin());
} else if is_file_url(target_url) || is_http_url(target_url) {
match retrieve_asset(&mut cache, &client, target_url, target_url, &options, 0) {
Ok((data, final_url, _media_type)) => {
if options.base_url.clone().unwrap_or(str!()).is_empty() {
base_url = final_url
}
dom = html_to_dom(&String::from_utf8_lossy(&data));
}
Err(_) => {
if !options.silent {
eprintln!("Could not retrieve target document");
}
process::exit(1);
}
}
} else if is_data_url(target_url) {
let (media_type, text): (String, String) = data_url_to_text(target_url);
let (media_type, data): (String, Vec<u8>) = parse_data_url(target_url);
if !media_type.eq_ignore_ascii_case("text/html") {
eprintln!("Unsupported data URL media type");
if !options.silent {
eprintln!("Unsupported data URL media type");
}
process::exit(1);
}
base_url = str!(target_url);
dom = html_to_dom(&text);
dom = html_to_dom(&String::from_utf8_lossy(&data));
} else {
process::exit(1);
}
walk_and_embed_assets(
&mut cache,
&client,
&base_url,
&dom.document,
app_args.no_css,
app_args.no_fonts,
app_args.no_frames,
app_args.no_js,
app_args.no_images,
app_args.silent,
);
// Use custom base URL if specified, read and use what's in the DOM otherwise
if !options.base_url.clone().unwrap_or(str!()).is_empty() {
if is_data_url(options.base_url.clone().unwrap()) {
if !options.silent {
eprintln!("Data URLs cannot be used as base URL");
}
process::exit(1);
} else {
base_url = options.base_url.clone().unwrap();
}
} else {
if let Some(existing_base_url) = get_base_url(&dom.document) {
base_url = resolve_url(target_url, existing_base_url).unwrap();
}
}
let html: String = stringify_document(
&dom.document,
app_args.no_css,
app_args.no_frames,
app_args.no_js,
app_args.no_images,
app_args.isolate,
);
// Embed remote assets
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
// Update or add new BASE tag to reroute network requests and hash-links in the final document
if let Some(new_base_url) = options.base_url.clone() {
dom = set_base_url(&dom.document, new_base_url);
}
// Request and embed /favicon.ico (unless it's already linked in the document)
if !options.no_images && is_http_url(target_url) && !has_favicon(&dom.document) {
let favicon_ico_url: String = resolve_url(&base_url, "/favicon.ico").unwrap();
match retrieve_asset(
&mut cache,
&client,
&base_url,
&favicon_ico_url,
&options,
0,
) {
Ok((data, final_url, media_type)) => {
let favicon_data_url: String = data_to_data_url(&media_type, &data, &final_url);
dom = add_favicon(&dom.document, favicon_data_url);
}
Err(_) => {
// Failed to retrieve favicon.ico
}
}
}
// Serialize DOM tree
let mut result: String = stringify_document(&dom.document, &options);
// Add metadata tag
if !options.no_metadata {
let metadata_comment: String = create_metadata_tag(&target_url);
result.insert_str(0, &metadata_comment);
if metadata_comment.len() > 0 {
result.insert_str(metadata_comment.len(), "\n");
}
}
// Write result into stdout or file
output
.writeln_str(&html)
.writeln_str(&result)
.expect("Could not write HTML output");
}

115
src/opts.rs Normal file
View File

@@ -0,0 +1,115 @@
use clap::{App, Arg};
use std::env;
#[derive(Default)]
pub struct Options {
pub no_audio: bool,
pub base_url: Option<String>,
pub no_css: bool,
pub ignore_errors: bool,
pub no_frames: bool,
pub no_fonts: bool,
pub no_images: bool,
pub isolate: bool,
pub no_js: bool,
pub insecure: bool,
pub no_metadata: bool,
pub output: String,
pub silent: bool,
pub timeout: u64,
pub user_agent: Option<String>,
pub no_video: bool,
pub target: String,
pub no_color: bool,
}
const ASCII: &'static str = " \
_____ ______________ __________ ___________________ ___
| \\ / \\ | | | | | |
| \\_/ __ \\_| __ | | ___ ___ |__| |
| | | | | | | | | | | |
| |\\ /| |__| _ |__| |____| | | | | __ |
| | \\___/ | | \\ | | | | | | |
|___| |__________| \\_____________________| |___| |___| |___|
";
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
const DEFAULT_USER_AGENT: &'static str =
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
const ENV_VAR_NO_COLOR: &str = "NO_COLOR";
const ENV_VAR_TERM: &str = "TERM";
impl Options {
pub fn from_args() -> Options {
let app = App::new(env!("CARGO_PKG_NAME"))
.version(crate_version!())
.author(format!("\n{}", crate_authors!("\n")).as_str())
.about(format!("{}\n{}", ASCII, crate_description!()).as_str())
.args_from_usage("-a, --no-audio 'Removes audio sources'")
.args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'")
.args_from_usage("-c, --no-css 'Removes CSS'")
.args_from_usage("-e, --ignore-errors 'Ignore network errors'")
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
.args_from_usage("-F, --no-fonts 'Removes fonts'")
.args_from_usage("-i, --no-images 'Removes images'")
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
.args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
.args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
.args_from_usage("-v, --no-video 'Removes video sources'")
.arg(
Arg::with_name("target")
.required(true)
.takes_value(true)
.index(1)
.help("URL or file path, use - for stdin"),
)
.get_matches();
let mut options: Options = Options::default();
// Process the command
options.target = app
.value_of("target")
.expect("please set target")
.to_string();
options.no_audio = app.is_present("no-audio");
if let Some(base_url) = app.value_of("base-url") {
options.base_url = Some(str!(base_url));
}
options.no_css = app.is_present("no-css");
options.ignore_errors = app.is_present("ignore-errors");
options.no_frames = app.is_present("no-frames");
options.no_fonts = app.is_present("no-fonts");
options.no_images = app.is_present("no-images");
options.isolate = app.is_present("isolate");
options.no_js = app.is_present("no-js");
options.insecure = app.is_present("insecure");
options.no_metadata = app.is_present("no-metadata");
options.output = app.value_of("output").unwrap_or("").to_string();
options.silent = app.is_present("silent");
options.timeout = app
.value_of("timeout")
.unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string())
.parse::<u64>()
.unwrap();
if let Some(user_agent) = app.value_of("user-agent") {
options.user_agent = Some(str!(user_agent));
} else {
options.user_agent = Some(DEFAULT_USER_AGENT.to_string());
}
options.no_video = app.is_present("no-video");
options.no_color =
env::var_os(ENV_VAR_NO_COLOR).is_some() || atty::isnt(atty::Stream::Stderr);
if let Some(term) = env::var_os(ENV_VAR_TERM) {
if term == "dumb" {
options.no_color = true;
}
}
options
}
}

View File

@@ -1,520 +0,0 @@
use assert_cmd::prelude::*;
use std::env;
use std::io::Write;
use std::process::Command;
use tempfile::NamedTempFile;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_print_version() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("-V").output().unwrap();
// STDOUT should contain program name and version
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_bad_input_empty_target() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("").output().unwrap();
// STDOUT should be empty
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"No target specified\n"
);
// The exit code should be 1
out.assert().code(1);
Ok(())
}
#[test]
fn passing_bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
// STDOUT should contain HTML
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"Unsupported data URL media type\n"
);
// The exit code should be 1
out.assert().code(1);
Ok(())
}
#[test]
fn passing_isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-I")
.arg("data:text/html,Hello%2C%20World!")
.output()
.unwrap();
// STDOUT should contain isolated HTML
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-c")
.arg("data:text/html,<style>body{background-color:pink}</style>Hello")
.output()
.unwrap();
// STDOUT should contain HTML with no CSS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
<style></style>\
</head><body>Hello</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-f")
.arg("data:text/html,<iframe src=\"https://google.com\"></iframe>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no iframes
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-i")
.arg("data:text/html,<img src=\"https://google.com\"/>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no images
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\
</head>\
<body>\
<img src=\"{empty_image}\">\
Hi\
</body>\
</html>\n",
empty_image = empty_image!()
)
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-j")
.arg("data:text/html,<script>alert(2)</script>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no JS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
<script></script></head>\
<body>Hi</body>\
</html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let out = cmd
.arg(if cfg!(windows) {
"src\\tests\\data\\local-file.html"
} else {
"src/tests/data/local-file.html"
})
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"\
<!DOCTYPE html><html lang=\"en\"><head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\" rel=\"stylesheet\" type=\"text/css\">\n \
<link href=\"data:text/css;base64,\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
</body></html>\n\
"
);
// STDERR should contain list of retrieved file URLs
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{cwd}/src/tests/data/local-file.html\n\
{file}{cwd}/src/tests/data/local-style.css\n\
{file}{cwd}/src/tests/data/local-script.js\n\
",
file = file_url_protocol,
cwd = cwd_normalized
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_local_file_target_input_absolute_target_path() -> Result<(), Box<dyn std::error::Error>>
{
let cwd = env::current_dir().unwrap();
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-jciI")
.arg(if cfg!(windows) {
format!(
"{cwd}\\src\\tests\\data\\local-file.html",
cwd = cwd.to_str().unwrap()
)
} else {
format!(
"{cwd}/src/tests/data/local-file.html",
cwd = cwd.to_str().unwrap()
)
})
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"{empty_image}\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"\"></script>\n\n\n\n\
</body></html>\n\
",
empty_image = empty_image!()
)
);
// STDERR should contain only the target file
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"{file}{cwd}/src/tests/data/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_local_file_url_target_input() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
let out = cmd
.arg("-cji")
.arg(if cfg!(windows) {
format!(
"{file}{cwd}/src/tests/data/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/src/tests/data/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
})
.output()
.unwrap();
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"{empty_image}\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"\"></script>\n\n\n\n\
</body></html>\n\
",
empty_image = empty_image!()
)
);
// STDERR should contain list of retrieved file URLs
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
if cfg!(windows) {
format!(
"{file}{cwd}/src/tests/data/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/src/tests/data/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
}
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_security_disallow_local_assets_within_data_url_targets(
) -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/local-script.js\"%3E%3C/script%3E")
.output()
.unwrap();
// STDOUT should contain HTML with no JS in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><script src=\"\"></script></head><body></body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_embed_file_url_local_asset_within_style_attribute(
) -> Result<(), Box<dyn std::error::Error>> {
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut file_svg = NamedTempFile::new()?;
writeln!(file_svg, "<svg version=\"1.1\" baseProfile=\"full\" width=\"300\" height=\"200\" xmlns=\"http://www.w3.org/2000/svg\">\
<rect width=\"100%\" height=\"100%\" fill=\"red\" />\
<circle cx=\"150\" cy=\"100\" r=\"80\" fill=\"green\" />\
<text x=\"150\" y=\"125\" font-size=\"60\" text-anchor=\"middle\" fill=\"white\">SVG</text>\
</svg>\n")?;
let mut file_html = NamedTempFile::new()?;
writeln!(
file_html,
"<div style='background-image: url(\"{file}{path}\")'></div>\n",
file = file_url_prefix,
path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg(file_html.path()).output().unwrap();
// STDOUT should contain HTML with date URL for background-image in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><div style=\"background-image: url('data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iMTAwJSIgaGVpZ2h0PSIxMDAlIiBmaWxsPSJyZWQiIC8+PGNpcmNsZSBjeD0iMTUwIiBjeT0iMTAwIiByPSI4MCIgZmlsbD0iZ3JlZW4iIC8+PHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+PC9zdmc+Cgo=')\"></div>\n\n</body></html>\n"
);
// STDERR should list temporary files that got retrieved
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n\
{file}{svg_path}\n\
",
file = file_url_prefix,
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
svg_path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_css_import_string() -> Result<(), Box<dyn std::error::Error>> {
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut file_css = NamedTempFile::new()?;
writeln!(file_css, "body{{background-color:#000;color:#fff}}")?;
let mut file_html = NamedTempFile::new()?;
writeln!(
file_html,
"\
<style>\n\
@charset 'UTF-8';\n\
\n\
@import '{file}{css_path}';\n\
\n\
@import url({file}{css_path});\n\
\n\
@import url('{file}{css_path}')\n\
</style>\n\
",
file = file_url_prefix,
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg(file_html.path()).output().unwrap();
// STDOUT should contain embedded CSS url()'s
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><style>\n@charset 'UTF-8';\n\n@import 'data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K';\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K');\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K')\n</style>\n\n</head><body></body></html>\n"
);
// STDERR should list temporary files that got retrieved
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n\
{file}{css_path}\n\
{file}{css_path}\n\
{file}{css_path}\n\
",
file = file_url_prefix,
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}

123
src/tests/cli/base_url.rs Normal file
View File

@@ -0,0 +1,123 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
#[test]
fn add_new_when_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-b")
.arg("http://localhost:8000/")
.arg("data:text/html,Hello%2C%20World!")
.output()
.unwrap();
// STDOUT should contain newly added base URL
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<base href=\"http://localhost:8000/\"></base>\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn keep_existing_when_none_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
.output()
.unwrap();
// STDOUT should contain newly added base URL
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<base href=\"http://localhost:8000/\">\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn override_existing_when_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-b")
.arg("http://localhost/")
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
.output()
.unwrap();
// STDOUT should contain newly added base URL
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<base href=\"http://localhost/\">\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_existing_when_empty_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-b")
.arg("")
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
.output()
.unwrap();
// STDOUT should contain newly added base URL
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<base href=\"\">\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}

145
src/tests/cli/basic.rs Normal file
View File

@@ -0,0 +1,145 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::io::Write;
use std::process::{Command, Stdio};
use tempfile::NamedTempFile;
#[test]
fn print_version() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("-V").output().unwrap();
// STDOUT should contain program name and version
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn stdin_target_input() -> Result<(), Box<dyn std::error::Error>> {
let mut echo = Command::new("echo")
.arg("Hello from STDIN")
.stdout(Stdio::piped())
.spawn()
.unwrap();
let echo_out = echo.stdout.take().unwrap();
echo.wait().unwrap();
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
cmd.stdin(echo_out);
let out = cmd.arg("-M").arg("-").output().unwrap();
// STDOUT should contain HTML from STDIN
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body>Hello from STDIN\n</body></html>\n"
);
Ok(())
}
#[test]
fn css_import_string() -> Result<(), Box<dyn std::error::Error>> {
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut file_css = NamedTempFile::new()?;
writeln!(file_css, "body{{background-color:#000;color:#fff}}")?;
let mut file_html = NamedTempFile::new()?;
writeln!(
file_html,
"\
<style>\n\
@charset 'UTF-8';\n\
\n\
@import '{file}{css_path}';\n\
\n\
@import url({file}{css_path});\n\
\n\
@import url('{file}{css_path}')\n\
</style>\n\
",
file = file_url_prefix,
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
// STDOUT should contain embedded CSS url()'s
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><style>\n@charset 'UTF-8';\n\n@import 'data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K';\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K');\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K')\n</style>\n\n</head><body></body></html>\n"
);
// STDERR should list temporary files that got retrieved
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n \
{file}{css_path}\n \
{file}{css_path}\n \
{file}{css_path}\n\
",
file = file_url_prefix,
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
#[test]
fn bad_input_empty_target() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("").output().unwrap();
// STDOUT should be empty
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"No target specified\n"
);
// The exit code should be 1
out.assert().code(1);
Ok(())
}
}

234
src/tests/cli/data_url.rs Normal file
View File

@@ -0,0 +1,234 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
#[test]
fn bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
// STDOUT should contain HTML
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"Unsupported data URL media type\n"
);
// The exit code should be 1
out.assert().code(1);
Ok(())
}
#[test]
fn isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-I")
.arg("data:text/html,Hello%2C%20World!")
.output()
.unwrap();
// STDOUT should contain isolated HTML
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-c")
.arg("data:text/html,<style>body{background-color:pink}</style>Hello")
.output()
.unwrap();
// STDOUT should contain HTML with no CSS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
<style></style>\
</head><body>Hello</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_fonts_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-F")
.arg("data:text/html,<style>@font-face { font-family: myFont; src: url(font.woff); }</style>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no web fonts
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"font-src 'none';\"></meta>\
<style></style>\
</head><body>Hi</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-f")
.arg("data:text/html,<iframe src=\"https://duckduckgo.com\"></iframe>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no iframes
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-i")
.arg("data:text/html,<img src=\"https://google.com\"/>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no images
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\
</head>\
<body>\
<img src=\"{empty_image}\">\
Hi\
</body>\
</html>\n",
empty_image = empty_image!()
)
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-j")
.arg("data:text/html,<script>alert(2)</script>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no JS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
<script></script></head>\
<body>Hi</body>\
</html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn security_disallow_local_assets_within_data_url_targets(
) -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/basic/local-script.js\"%3E%3C/script%3E")
.output()
.unwrap();
// STDOUT should contain HTML with no JS in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><script></script></head><body></body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}

View File

@@ -0,0 +1,244 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::io::Write;
use std::process::Command;
use tempfile::NamedTempFile;
#[test]
fn local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let out = cmd
.arg("-M")
.arg(if cfg!(windows) {
"src\\tests\\data\\basic\\local-file.html"
} else {
"src/tests/data/basic/local-file.html"
})
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"\
<!DOCTYPE html><html lang=\"en\"><head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\" href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
</body></html>\n\
"
);
// STDERR should contain list of retrieved file URLs
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{cwd}/src/tests/data/basic/local-file.html\n \
{file}{cwd}/src/tests/data/basic/local-style.css\n \
{file}{cwd}/src/tests/data/basic/local-script.js\n\
",
file = file_url_protocol,
cwd = cwd_normalized
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn local_file_target_input_absolute_target_path() -> Result<(), Box<dyn std::error::Error>> {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String = str!(cwd.to_str().unwrap()).replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-jciI")
.arg(if cfg!(windows) {
format!(
"{cwd}\\src\\tests\\data\\basic\\local-file.html",
cwd = cwd.to_str().unwrap()
)
} else {
format!(
"{cwd}/src/tests/data/basic/local-file.html",
cwd = cwd.to_str().unwrap()
)
})
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img src=\"{empty_image}\" alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = empty_image!()
)
);
// STDERR should contain only the target file
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn local_file_url_target_input() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
let out = cmd
.arg("-M")
.arg("-cji")
.arg(if cfg!(windows) {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
})
.output()
.unwrap();
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img src=\"{empty_image}\" alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = empty_image!()
)
);
// STDERR should contain list of retrieved file URLs
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
if cfg!(windows) {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
}
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn embed_file_url_local_asset_within_style_attribute() -> Result<(), Box<dyn std::error::Error>>
{
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut file_svg = NamedTempFile::new()?;
writeln!(file_svg, "<svg version=\"1.1\" baseProfile=\"full\" width=\"300\" height=\"200\" xmlns=\"http://www.w3.org/2000/svg\">\
<rect width=\"100%\" height=\"100%\" fill=\"red\" />\
<circle cx=\"150\" cy=\"100\" r=\"80\" fill=\"green\" />\
<text x=\"150\" y=\"125\" font-size=\"60\" text-anchor=\"middle\" fill=\"white\">SVG</text>\
</svg>\n")?;
let mut file_html = NamedTempFile::new()?;
writeln!(
file_html,
"<div style='background-image: url(\"{file}{path}\")'></div>\n",
file = file_url_prefix,
path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
// STDOUT should contain HTML with date URL for background-image in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><div style=\"background-image: url('data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iMTAwJSIgaGVpZ2h0PSIxMDAlIiBmaWxsPSJyZWQiIC8+PGNpcmNsZSBjeD0iMTUwIiBjeT0iMTAwIiByPSI4MCIgZmlsbD0iZ3JlZW4iIC8+PHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+PC9zdmc+Cgo=')\"></div>\n\n</body></html>\n"
);
// STDERR should list temporary files that got retrieved
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n \
{file}{svg_path}\n\
",
file = file_url_prefix,
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
svg_path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}

5
src/tests/cli/mod.rs Normal file
View File

@@ -0,0 +1,5 @@
mod base_url;
mod basic;
mod data_url;
mod local_files;
mod unusual_encodings;

View File

@@ -0,0 +1,51 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
#[test]
fn change_encoding_to_utf_8() -> Result<(), Box<dyn std::error::Error>> {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String = str!(cwd.to_str().unwrap()).replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg(if cfg!(windows) {
"src\\tests\\data\\unusual_encodings\\iso-8859-1.html"
} else {
"src/tests/data/unusual_encodings/iso-8859-1.html"
})
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDOUT should contain newly added base URL
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\n <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n </head>\n <body>\n © Some Company\n \n\n</body></html>\n"
);
// STDERR should contain only the target file
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"{file}{cwd}/src/tests/data/unusual_encodings/iso-8859-1.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}

View File

@@ -1,8 +1,3 @@
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::css;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -10,308 +5,346 @@ use crate::css;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_empty_input() {
let cache = &mut HashMap::new();
let client = Client::new();
#[cfg(test)]
mod passing {
use reqwest::blocking::Client;
use std::collections::HashMap;
assert_eq!(
css::embed_css(cache, &client, "", "", false, false, false,),
""
);
}
#[test]
fn passing_style_exclude_unquoted_images() {
let cache = &mut HashMap::new();
let client = Client::new();
const STYLE: &str = "/* border: none;*/\
background-image: url(https://somewhere.com/bg.png); \
list-style: url(/assets/images/bullet.svg);\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&STYLE,
false,
true,
true,
),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
list-style: url('{empty_image}');\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)",
empty_image = empty_image!()
)
);
}
#[test]
fn passing_style_exclude_single_quoted_images() {
let cache = &mut HashMap::new();
let client = Client::new();
const STYLE: &str = "/* border: none;*/\
background-image: url('https://somewhere.com/bg.png'); \
list-style: url('/assets/images/bullet.svg');\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(cache, &client, "", &STYLE, false, true, true,),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
list-style: url('{empty_image}');\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)",
empty_image = empty_image!()
)
);
}
#[test]
fn passing_style_block() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
#id.class-name:not(:nth-child(3n+0)) {\n \
// border: none;\n \
background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=');\n\
}\n\
\n\
html > body {}";
assert_eq!(
css::embed_css(cache, &client, "file:///", &CSS, false, false, true,),
CSS
);
}
#[test]
fn passing_attribute_selectors() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
[data-value] {
/* Attribute exists */
}
[data-value='foo'] {
/* Attribute has this exact value */
}
[data-value*='foo'] {
/* Attribute value contains this value somewhere in it */
}
[data-value~='foo'] {
/* Attribute has this value in a space-separated list somewhere */
}
[data-value^='foo'] {
/* Attribute value starts with this */
}
[data-value|='foo'] {
/* Attribute value starts with this in a dash-separated list */
}
[data-value$='foo'] {
/* Attribute value ends with this */
}
";
assert_eq!(
css::embed_css(cache, &client, "", &CSS, false, false, false,),
CSS
);
}
#[test]
fn passing_import_string() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
@charset 'UTF-8';\n\
\n\
@import 'data:text/css,html{background-color:%23000}';\n\
\n\
@import url('data:text/css,html{color:%23fff}')\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
false,
false,
true,
),
"\
@charset 'UTF-8';\n\
\n\
@import 'data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9';\n\
\n\
@import url('data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==')\n\
"
);
}
#[test]
fn passing_hash_urls() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
body {\n \
behavior: url(#default#something);\n\
}\n\
\n\
.scissorHalf {\n \
offset-path: url(#somePath);\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
false,
false,
true,
),
CSS
);
}
#[test]
fn passing_transform_percentages_and_degrees() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
div {\n \
transform: translate(-50%, -50%) rotate(-45deg);\n\
transform: translate(50%, 50%) rotate(45deg);\n\
transform: translate(+50%, +50%) rotate(+45deg);\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
false,
false,
true,
),
CSS
);
}
#[test]
fn passing_unusual_indents() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
.is\\:good:hover {\n \
color: green\n\
}\n\
\n\
#\\~\\!\\@\\$\\%\\^\\&\\*\\(\\)\\+\\=\\,\\.\\/\\\\\\'\\\"\\;\\:\\?\\>\\<\\[\\]\\{\\}\\|\\`\\# {\n \
color: black\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
false,
false,
true,
),
CSS
);
}
#[test]
fn passing_exclude_fonts() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
@font-face {\n \
font-family: 'My Font';\n \
src: url(my_font.woff);\n\
}\n\
\n\
#identifier {\n \
font-family: 'My Font' Arial\n\
}\n\
\n\
@font-face {\n \
font-family: 'My Font';\n \
src: url(my_font.woff);\n\
}\n\
\n\
div {\n \
font-family: 'My Font' Verdana\n\
}\n\
";
const CSS_OUT: &str = " \
\n\
\n\
#identifier {\n \
font-family: 'My Font' Arial\n\
}\n\
\n \
\n\
\n\
div {\n \
font-family: 'My Font' Verdana\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
true,
false,
true,
),
CSS_OUT
);
use crate::css;
use crate::opts::Options;
#[test]
fn empty_input() {
let cache = &mut HashMap::new();
let client = Client::new();
let options = Options::default();
assert_eq!(css::embed_css(cache, &client, "", "", &options, 0), "");
}
#[test]
fn trim_if_empty() {
let cache = &mut HashMap::new();
let client = Client::new();
let options = Options::default();
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
"\t \t ",
&options,
0,
),
""
);
}
#[test]
fn style_exclude_unquoted_images() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
const STYLE: &str = "/* border: none;*/\
background-image: url(https://somewhere.com/bg.png); \
list-style: url(/assets/images/bullet.svg);\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&STYLE,
&options,
0,
),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
list-style: url('{empty_image}');\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)",
empty_image = empty_image!()
)
);
}
#[test]
fn style_exclude_single_quoted_images() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
const STYLE: &str = "/* border: none;*/\
background-image: url('https://somewhere.com/bg.png'); \
list-style: url('/assets/images/bullet.svg');\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(cache, &client, "", &STYLE, &options, 0),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
list-style: url('{empty_image}');\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)",
empty_image = empty_image!()
)
);
}
#[test]
fn style_block() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
#id.class-name:not(:nth-child(3n+0)) {\n \
// border: none;\n \
background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=');\n\
}\n\
\n\
html > body {}";
assert_eq!(
css::embed_css(cache, &client, "file:///", &CSS, &options, 0),
CSS
);
}
#[test]
fn attribute_selectors() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
[data-value] {
/* Attribute exists */
}
[data-value='foo'] {
/* Attribute has this exact value */
}
[data-value*='foo'] {
/* Attribute value contains this value somewhere in it */
}
[data-value~='foo'] {
/* Attribute has this value in a space-separated list somewhere */
}
[data-value^='foo'] {
/* Attribute value starts with this */
}
[data-value|='foo'] {
/* Attribute value starts with this in a dash-separated list */
}
[data-value$='foo'] {
/* Attribute value ends with this */
}
";
assert_eq!(css::embed_css(cache, &client, "", &CSS, &options, 0), CSS);
}
#[test]
fn import_string() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
@charset 'UTF-8';\n\
\n\
@import 'data:text/css,html{background-color:%23000}';\n\
\n\
@import url('data:text/css,html{color:%23fff}')\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
"\
@charset 'UTF-8';\n\
\n\
@import 'data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9';\n\
\n\
@import url('data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==')\n\
"
);
}
#[test]
fn hash_urls() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
body {\n \
behavior: url(#default#something);\n\
}\n\
\n\
.scissorHalf {\n \
offset-path: url(#somePath);\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
CSS
);
}
#[test]
fn transform_percentages_and_degrees() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
div {\n \
transform: translate(-50%, -50%) rotate(-45deg);\n\
transform: translate(50%, 50%) rotate(45deg);\n\
transform: translate(+50%, +50%) rotate(+45deg);\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
CSS
);
}
#[test]
fn unusual_indents() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
.is\\:good:hover {\n \
color: green\n\
}\n\
\n\
#\\~\\!\\@\\$\\%\\^\\&\\*\\(\\)\\+\\=\\,\\.\\/\\\\\\'\\\"\\;\\:\\?\\>\\<\\[\\]\\{\\}\\|\\`\\# {\n \
color: black\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
CSS
);
}
#[test]
fn exclude_fonts() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.no_fonts = true;
options.silent = true;
const CSS: &str = "\
@font-face {\n \
font-family: 'My Font';\n \
src: url(my_font.woff);\n\
}\n\
\n\
#identifier {\n \
font-family: 'My Font' Arial\n\
}\n\
\n\
@font-face {\n \
font-family: 'My Font';\n \
src: url(my_font.woff);\n\
}\n\
\n\
div {\n \
font-family: 'My Font' Verdana\n\
}\n\
";
const CSS_OUT: &str = " \
\n\
\n\
#identifier {\n \
font-family: 'My Font' Arial\n\
}\n\
\n \
\n\
\n\
div {\n \
font-family: 'My Font' Verdana\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
CSS_OUT
);
}
}

View File

@@ -1,5 +1,3 @@
use crate::css;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -7,44 +5,49 @@ use crate::css;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_empty_input_single_quotes() {
assert_eq!(css::enquote(str!(""), false), "''");
}
#[cfg(test)]
mod passing {
use crate::css;
#[test]
fn passing_empty_input_double_quotes() {
assert_eq!(css::enquote(str!(""), true), "\"\"");
}
#[test]
fn empty_input_single_quotes() {
assert_eq!(css::enquote(str!(""), false), "''");
}
#[test]
fn passing_apostrophes_single_quotes() {
assert_eq!(
css::enquote(str!("It's a lovely day, don't you think?"), false),
"'It\\'s a lovely day, don\\'t you think?'"
);
}
#[test]
fn empty_input_double_quotes() {
assert_eq!(css::enquote(str!(""), true), "\"\"");
}
#[test]
fn passing_apostrophes_double_quotes() {
assert_eq!(
css::enquote(str!("It's a lovely day, don't you think?"), true),
"\"It's a lovely day, don't you think?\""
);
}
#[test]
fn apostrophes_single_quotes() {
assert_eq!(
css::enquote(str!("It's a lovely day, don't you think?"), false),
"'It\\'s a lovely day, don\\'t you think?'"
);
}
#[test]
fn passing_feet_and_inches_single_quotes() {
assert_eq!(
css::enquote(str!("5'2\", 6'5\""), false),
"'5\\'2\", 6\\'5\"'"
);
}
#[test]
fn apostrophes_double_quotes() {
assert_eq!(
css::enquote(str!("It's a lovely day, don't you think?"), true),
"\"It's a lovely day, don't you think?\""
);
}
#[test]
fn passing_feet_and_inches_double_quotes() {
assert_eq!(
css::enquote(str!("5'2\", 6'5\""), true),
"\"5'2\\\", 6'5\\\"\""
);
#[test]
fn feet_and_inches_single_quotes() {
assert_eq!(
css::enquote(str!("5'2\", 6'5\""), false),
"'5\\'2\", 6\\'5\"'"
);
}
#[test]
fn feet_and_inches_double_quotes() {
assert_eq!(
css::enquote(str!("5'2\", 6'5\""), true),
"\"5'2\\\", 6'5\\\"\""
);
}
}

View File

@@ -0,0 +1,23 @@
<!doctype html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Attempt to import CSS via data URL asset</title>
<style>
body {
background-color: white;
color: black;
}
</style>
<link href="data:text/css;base64,QGltcG9ydCAic3R5bGUuY3NzIjsK" rel="stylesheet" type="text/css" />
</head>
<body>
<p>If you see pink background with white foreground then were in trouble</p>
</body>
</html>

View File

@@ -0,0 +1,4 @@
body {
background-color: pink;
color: white;
}

View File

@@ -0,0 +1,19 @@
<!doctype html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="Content-Security-Policy" content="default-src 'unsafe-inline' file:;" />
<title>Local HTML file</title>
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-IWaCTORHkRhOWzcZeILSVmV6V6gPTHgNem6o6rsFAyaKTieDFkeeMrWjtO0DuWrX3bqZY46CVTZXUu0mia0qXQ==" crossorigin="anonymous" />
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-vWBzl4NE9oIg8NFOPAyOZbaam0UXWr6aDHPaY2kodSzAFl+mKoj/RMNc6C31NDqK4mE2i68IWxYWqWJPLCgPOw==" crossorigin="anonymous" />
</head>
<body>
<p>This page should have black background and white foreground, but only when served via http: (not via file:)</p>
<script src="script.js" integrity="sha256-ecrEsYh3+ICCX8BCrNSotXgI5534282JwJjx8Q9ZWLc="></script>
<script src="script.js" integrity="sha256-6idk9dK0bOkVdG7Oz4/0YLXSJya8xZHqbRZKMhYrt6o="></script>
</body>
</html>

View File

@@ -0,0 +1,3 @@
function noop() {
console.log("monolith");
}

View File

@@ -0,0 +1,4 @@
body {
background-color: #000;
color: #FFF;
}

View File

@@ -0,0 +1,8 @@
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
</head>
<body>
&copy; Some Company
</body>
</html>

View File

@@ -1,5 +1,3 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -7,22 +5,25 @@ use crate::utils;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_encode_string_with_specific_media_type() {
let mime = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let data_url = utils::data_to_data_url(mime, data.as_bytes(), "", "");
#[cfg(test)]
mod passing {
use html5ever::serialize::{serialize, SerializeOpts};
assert_eq!(
&data_url,
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
);
}
#[test]
fn passing_encode_append_fragment() {
let data = "<svg></svg>\n";
let data_url = utils::data_to_data_url("text/css", data.as_bytes(), "", "fragment");
assert_eq!(&data_url, "data:text/css;base64,PHN2Zz48L3N2Zz4K#fragment");
use crate::html;
#[test]
fn basic() {
let html = "<div>text</div>";
let mut dom = html::html_to_dom(&html);
dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string());
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head><link rel=\"icon\" href=\"I_AM_A_FAVICON_DATA_URL\"></link></head><body><div>text</div></body></html>"
);
}
}

View File

@@ -0,0 +1,89 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::html;
#[test]
fn empty_input_sha256() {
assert!(html::check_integrity(
"".as_bytes(),
"sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="
));
}
#[test]
fn sha256() {
assert!(html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha256-9EWAHgy4mSYsm54hmDaIDXPKLRsLnBX7lZyQ6xISNOM="
));
}
#[test]
fn sha384() {
assert!(html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha384-gc9l7omltke8C33bedgh15E12M7RrAQa5t63Yb8APlpe7ZhiqV23+oqiulSJl3Kw"
));
}
#[test]
fn sha512() {
assert!(html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha512-zG5B88cYMqcdiMi9gz0XkOFYw2BpjeYdn5V6+oFrMgSNjRpqL7EF8JEwl17ztZbK3N7I/tTwp3kxQbN1RgFBww=="
));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::html;
#[test]
fn empty_hash() {
assert!(!html::check_integrity("abcdef0123456789".as_bytes(), ""));
}
#[test]
fn empty_input_empty_hash() {
assert!(!html::check_integrity("".as_bytes(), ""));
}
#[test]
fn sha256() {
assert!(!html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha256-badhash"
));
}
#[test]
fn sha384() {
assert!(!html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha384-badhash"
));
}
#[test]
fn sha512() {
assert!(!html::check_integrity(
"abcdef0123456789".as_bytes(),
"sha512-badhash"
));
}
}

View File

@@ -0,0 +1,80 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::html;
use crate::opts::Options;
#[test]
fn isolated() {
let mut options = Options::default();
options.isolate = true;
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "default-src 'unsafe-inline' data:;");
}
#[test]
fn no_css() {
let mut options = Options::default();
options.no_css = true;
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "style-src 'none';");
}
#[test]
fn no_fonts() {
let mut options = Options::default();
options.no_fonts = true;
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "font-src 'none';");
}
#[test]
fn no_frames() {
let mut options = Options::default();
options.no_frames = true;
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "frame-src 'none'; child-src 'none';");
}
#[test]
fn no_js() {
let mut options = Options::default();
options.no_js = true;
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "script-src 'none';");
}
#[test]
fn no_images() {
let mut options = Options::default();
options.no_images = true;
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "img-src data:;");
}
#[test]
fn all() {
let mut options = Options::default();
options.isolate = true;
options.no_css = true;
options.no_fonts = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;");
}
}

View File

@@ -0,0 +1,82 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use chrono::prelude::*;
use crate::html;
#[test]
fn http_url() {
let url = "http://192.168.1.1/";
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::create_metadata_tag(url);
assert_eq!(
metadata_comment,
format!(
"<!-- Saved from {} at {} using {} v{} -->",
&url,
timestamp,
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
);
}
#[test]
fn file_url() {
let url = "file:///home/monolith/index.html";
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::create_metadata_tag(url);
assert_eq!(
metadata_comment,
format!(
"<!-- Saved from local source at {} using {} v{} -->",
timestamp,
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
);
}
#[test]
fn data_url() {
let url = "data:text/html,Hello%2C%20World!";
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::create_metadata_tag(url);
assert_eq!(
metadata_comment,
format!(
"<!-- Saved from local source at {} using {} v{} -->",
timestamp,
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::html;
#[test]
fn empty_string() {
assert_eq!(html::create_metadata_tag(""), "");
}
}

View File

@@ -0,0 +1,121 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::html;
use crate::opts::Options;
#[test]
fn small_medium_large() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small.png 1x, medium.png 1.5x, large.png 2x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
assert_eq!(
embedded_css,
format!(
"{} 1x, {} 1.5x, {} 2x",
empty_image!(),
empty_image!(),
empty_image!(),
),
);
}
#[test]
fn small_medium_only_medium_has_scale() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small.png, medium.png 1.5x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
assert_eq!(
embedded_css,
format!("{}, {} 1.5x", empty_image!(), empty_image!()),
);
}
#[test]
fn commas_within_file_names() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small,s.png 1x, large,l.png 2x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
assert_eq!(
embedded_css,
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),
);
}
#[test]
fn tabs_and_newlines_after_commas() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small,s.png 1x,\nmedium,m.png 2x,\nlarge,l.png 3x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
assert_eq!(
embedded_css,
format!(
"{} 1x, {} 2x, {} 3x",
empty_image!(),
empty_image!(),
empty_image!()
),
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::html;
use crate::opts::Options;
#[test]
fn trailing_comma() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small.png 1x, large.png 2x,";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
assert_eq!(
embedded_css,
format!("{} 1x, {} 2x,", empty_image!(), empty_image!()),
);
}
}

View File

@@ -0,0 +1,104 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::html;
#[test]
fn present() {
let html = "<!doctype html>
<html>
<head>
<base href=\"https://musicbrainz.org\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html);
assert_eq!(
html::get_base_url(&dom.document),
Some(str!("https://musicbrainz.org"))
);
}
#[test]
fn multiple_tags() {
let html = "<!doctype html>
<html>
<head>
<base href=\"https://www.discogs.com/\" />
<base href=\"https://musicbrainz.org\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html);
assert_eq!(
html::get_base_url(&dom.document),
Some(str!("https://www.discogs.com/"))
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::html;
#[test]
fn absent() {
let html = "<!doctype html>
<html>
<head>
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html);
assert_eq!(html::get_base_url(&dom.document), None);
}
#[test]
fn no_href() {
let html = "<!doctype html>
<html>
<head>
<base />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html);
assert_eq!(html::get_base_url(&dom.document), None);
}
#[test]
fn empty_href() {
let html = "<!doctype html>
<html>
<head>
<base href=\"\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html);
assert_eq!(html::get_base_url(&dom.document), Some(str!()));
}
}

View File

@@ -0,0 +1,54 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
use crate::html;
#[test]
fn div_two_style_attributes() {
let html = "<!doctype html><html><head></head><body><DIV STYLE=\"color: blue;\" style=\"display: none;\"></div></body></html>";
let dom = html::html_to_dom(&html);
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
match &node.data {
NodeData::Document => {
// Dig deeper
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
if node_name == "body" {
assert_eq!(html::get_node_attr(node, "class"), None);
} else if node_name == "div" {
assert_eq!(
html::get_node_attr(node, "style"),
Some(str!("color: blue;"))
);
}
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
_ => (),
};
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 6);
}
}

View File

@@ -1,6 +1,3 @@
use crate::html;
use html5ever::rcdom::{Handle, NodeData};
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -8,42 +5,49 @@ use html5ever::rcdom::{Handle, NodeData};
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn get_node_name() {
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
let dom = html::html_to_dom(&html);
let mut count = 0;
#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
use crate::html;
match &node.data {
NodeData::Document => {
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
let parent = html::get_parent_node(node);
let parent_node_name = html::get_node_name(&parent);
if node_name == "head" || node_name == "body" {
assert_eq!(parent_node_name, Some("html"));
} else if node_name == "div" {
assert_eq!(parent_node_name, Some("body"));
} else if node_name == "p" {
assert_eq!(parent_node_name, Some("div"));
}
#[test]
fn parent_node_names() {
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
let dom = html::html_to_dom(&html);
let mut count = 0;
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
match &node.data {
NodeData::Document => {
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
}
_ => (),
};
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
let parent = html::get_parent_node(node);
let parent_node_name = html::get_node_name(&parent);
if node_name == "head" || node_name == "body" {
assert_eq!(parent_node_name, Some("html"));
} else if node_name == "div" {
assert_eq!(parent_node_name, Some("body"));
} else if node_name == "p" {
assert_eq!(parent_node_name, Some("div"));
}
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
_ => (),
};
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 7);
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 7);
}

View File

@@ -1,5 +1,3 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -7,19 +5,27 @@ use crate::utils;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_http_url() {
assert!(utils::is_http_url("http://kernel.org"));
}
#[cfg(test)]
mod passing {
use crate::html;
#[test]
fn passing_https_url() {
assert!(utils::is_http_url("https://www.rust-lang.org/"));
}
#[test]
fn icon() {
let html = "<link rel=\"icon\" href=\"\" /><div>text</div>";
let dom = html::html_to_dom(&html);
let res: bool = html::has_favicon(&dom.document);
#[test]
fn passing_http_url_with_backslashes() {
assert!(utils::is_http_url("http:\\\\freebsd.org\\"));
assert!(res);
}
#[test]
fn shortcut_icon() {
let html = "<link rel=\"shortcut icon\" href=\"\" /><div>text</div>";
let dom = html::html_to_dom(&html);
let res: bool = html::has_favicon(&dom.document);
assert!(res);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@@ -29,29 +35,16 @@ fn passing_http_url_with_backslashes() {
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_url_with_no_protocol() {
assert!(!utils::is_http_url("//kernel.org"));
}
#[cfg(test)]
mod failing {
use crate::html;
#[test]
fn failing_dot_slash_filename() {
assert!(!utils::is_http_url("./index.html"));
}
#[test]
fn absent() {
let html = "<div>text</div>";
let dom = html::html_to_dom(&html);
let res: bool = html::has_favicon(&dom.document);
#[test]
fn failing_just_filename() {
assert!(!utils::is_http_url("some-local-page.htm"));
}
#[test]
fn failing_https_ip_port_url() {
assert!(!utils::is_http_url("ftp://1.2.3.4/www/index.html"));
}
#[test]
fn failing_data_url() {
assert!(!utils::is_http_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
assert!(!res);
}
}

View File

@@ -1,5 +1,3 @@
use crate::html;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -7,29 +5,24 @@ use crate::html;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_icon() {
assert!(html::is_icon("icon"));
}
#[cfg(test)]
mod passing {
use crate::html;
#[test]
fn passing_shortcut_icon_capitalized() {
assert!(html::is_icon("Shortcut Icon"));
}
#[test]
fn icon() {
assert!(html::is_icon("icon"));
}
#[test]
fn passing_icon_uppercase() {
assert!(html::is_icon("ICON"));
}
#[test]
fn shortcut_icon_capitalized() {
assert!(html::is_icon("Shortcut Icon"));
}
#[test]
fn passing_mask_icon() {
assert!(html::is_icon("mask-icon"));
}
#[test]
fn passing_fluid_icon() {
assert!(html::is_icon("fluid-icon"));
#[test]
fn icon_uppercase() {
assert!(html::is_icon("ICON"));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@@ -39,12 +32,27 @@ fn passing_fluid_icon() {
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_stylesheet() {
assert!(!html::is_icon("stylesheet"));
}
#[cfg(test)]
mod failing {
use crate::html;
#[test]
fn failing_empty_string() {
assert!(!html::is_icon(""));
#[test]
fn mask_icon() {
assert!(!html::is_icon("mask-icon"));
}
#[test]
fn fluid_icon() {
assert!(!html::is_icon("fluid-icon"));
}
#[test]
fn stylesheet() {
assert!(!html::is_icon("stylesheet"));
}
#[test]
fn empty_string() {
assert!(!html::is_icon(""));
}
}

View File

@@ -1,4 +1,13 @@
mod add_favicon;
mod check_integrity;
mod compose_csp;
mod create_metadata_tag;
mod embed_srcset;
mod get_base_url;
mod get_node_attr;
mod get_node_name;
mod has_favicon;
mod is_icon;
mod set_node_attr;
mod stringify_document;
mod walk_and_embed_assets;

View File

@@ -0,0 +1,105 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
use crate::html;
#[test]
fn html_lang_and_body_style() {
let html = "<!doctype html><html lang=\"en\"><head></head><body></body></html>";
let dom = html::html_to_dom(&html);
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
match &node.data {
NodeData::Document => {
// Dig deeper
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
if node_name == "html" {
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("en")));
html::set_node_attr(node, "lang", Some(str!("de")));
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("de")));
html::set_node_attr(node, "lang", None);
assert_eq!(html::get_node_attr(node, "lang"), None);
html::set_node_attr(node, "lang", Some(str!("")));
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("")));
} else if node_name == "body" {
assert_eq!(html::get_node_attr(node, "style"), None);
html::set_node_attr(node, "style", Some(str!("display: none;")));
assert_eq!(
html::get_node_attr(node, "style"),
Some(str!("display: none;"))
);
}
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
_ => (),
};
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 5);
}
#[test]
fn body_background() {
let html = "<!doctype html><html lang=\"en\"><head></head><body background=\"1\" background=\"2\"></body></html>";
let dom = html::html_to_dom(&html);
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
match &node.data {
NodeData::Document => {
// Dig deeper
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
if node_name == "body" {
assert_eq!(html::get_node_attr(node, "background"), Some(str!("1")));
html::set_node_attr(node, "background", None);
assert_eq!(html::get_node_attr(node, "background"), None);
}
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
_ => (),
};
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 5);
}
}

View File

@@ -1,5 +1,3 @@
use crate::html;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -7,182 +5,146 @@ use crate::html;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_div_as_root_element() {
let html = "<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
#[cfg(test)]
mod passing {
use crate::html;
use crate::opts::Options;
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
#[test]
fn div_as_root_element() {
let html = "<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let options = Options::default();
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
);
}
assert_eq!(
html::stringify_document(&dom.document, &options),
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
);
}
#[test]
fn passing_full_page_with_no_html_head_or_body() {
let html = "<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\" />\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = true;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\">\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
</head>\
<body>\
<div>\
<script src=\"some.js\"></script>\
</div>\
</body>\
</html>"
);
}
#[test]
fn passing_doctype_and_the_rest_no_html_head_or_body() {
let html = "<!doctype html>\
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\"/>\
<div style=\"display: none;\"></div>";
let dom = html::html_to_dom(&html);
let opt_no_css: bool = true;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\">\
</head>\
<body><div style=\"display: none;\"></div></body>\
</html>"
);
}
#[test]
fn passing_doctype_and_the_rest_no_html_head_or_body_forbid_frames() {
let html = "<!doctype html>\
<title>Frameless document</title>\
<link rel=\"something\"/>\
<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
<title>Frameless document</title>\
<link rel=\"something\">\
</head>\
<body><div><script src=\"some.js\"></script></div></body>\
</html>"
);
}
#[test]
fn passing_doctype_and_the_rest_all_forbidden() {
let html = "<!doctype html>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\" />\
<iframe src=\"some.html\"></iframe>\
</div>";
let dom = html::html_to_dom(&html);
let opt_isolate: bool = true;
let opt_no_css: bool = true;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; frame-src 'none';child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
<title>no-frame no-css no-js no-image isolated document</title>\
#[test]
fn full_page_with_no_html_head_or_body() {
let html = "<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\" />\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let mut options = Options::default();
options.isolate = true;
assert_eq!(
html::stringify_document(
&dom.document,
&options
),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\">\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
</head>\
<body>\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\">\
<iframe src=\"some.html\"></iframe>\
</div>\
</body>\
</html>"
);
);
}
#[test]
fn doctype_and_the_rest_no_html_head_or_body() {
let html = "<!doctype html>\
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\"/>\
<div style=\"display: none;\"></div>";
let dom = html::html_to_dom(&html);
let mut options = Options::default();
options.no_css = true;
assert_eq!(
html::stringify_document(&dom.document, &options),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\">\
</head>\
<body><div style=\"display: none;\"></div></body>\
</html>"
);
}
#[test]
fn doctype_and_the_rest_no_html_head_or_body_forbid_frames() {
let html = "<!doctype html>\
<title>Frameless document</title>\
<link rel=\"something\"/>\
<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let mut options = Options::default();
options.no_frames = true;
assert_eq!(
html::stringify_document(
&dom.document,
&options
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
<title>Frameless document</title>\
<link rel=\"something\">\
</head>\
<body><div><script src=\"some.js\"></script></div></body>\
</html>"
);
}
#[test]
fn doctype_and_the_rest_all_forbidden() {
let html = "<!doctype html>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\" />\
<iframe src=\"some.html\"></iframe>\
</div>";
let dom = html::html_to_dom(&html);
let mut options = Options::default();
options.isolate = true;
options.no_css = true;
options.no_fonts = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
assert_eq!(
html::stringify_document(
&dom.document,
&options
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
</head>\
<body>\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\">\
<iframe src=\"some.html\"></iframe>\
</div>\
</body>\
</html>"
);
}
}

View File

@@ -1,8 +1,3 @@
use crate::html;
use html5ever::serialize::{serialize, SerializeOpts};
use reqwest::blocking::Client;
use std::collections::HashMap;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -10,410 +5,368 @@ use std::collections::HashMap;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_basic() {
let cache = &mut HashMap::new();
#[cfg(test)]
mod passing {
use html5ever::serialize::{serialize, SerializeOpts};
use reqwest::blocking::Client;
use std::collections::HashMap;
let html = "<div><P></P></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
use crate::html;
use crate::opts::Options;
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
#[test]
fn basic() {
let cache = &mut HashMap::new();
let client = Client::new();
let html = "<div><P></P></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut options = Options::default();
options.silent = true;
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
let client = Client::new();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p></div></body></html>"
);
}
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
#[test]
fn passing_ensure_no_recursive_iframe() {
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p></div></body></html>"
);
}
let client = Client::new();
#[test]
fn ensure_no_recursive_iframe() {
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut options = Options::default();
options.silent = true;
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
let client = Client::new();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
);
}
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
#[test]
fn passing_ensure_no_recursive_frame() {
let html = "<frameset><frame src=\"\"></frameset>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
);
}
let client = Client::new();
#[test]
fn ensure_no_recursive_frame() {
let html = "<frameset><frame src=\"\"></frameset>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut options = Options::default();
options.silent = true;
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
let client = Client::new();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
#[test]
fn passing_no_css() {
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
let opt_no_css: bool = true;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
#[test]
fn no_css() {
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
<link rel=\"alternate stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
let mut options = Options::default();
options.no_css = true;
options.silent = true;
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<link rel=\"stylesheet\" href=\"\">\
<style></style>\
</head>\
<body>\
<div></div>\
</body>\
</html>"
);
}
let client = Client::new();
#[test]
fn passing_no_images() {
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = true;
let opt_silent = true;
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<link rel=\"icon\" href=\"\">\
</head>\
<body>\
<div>\
<img src=\"{empty_image}\">\
</div>\
</body>\
</html>",
empty_image = empty_image!()
)
);
}
#[test]
fn passing_no_body_background_images() {
let html = "<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = true;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body></body></html>"
);
}
#[test]
fn passing_no_frames() {
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test]
fn passing_no_iframes() {
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
);
}
#[test]
fn passing_no_js() {
let html = "<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = true;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><script src=\"\"></script>\
<script></script></div></body></html>"
);
}
#[test]
fn passing_with_no_integrity() {
let html = "<title>No integrity</title>\
<link integrity=\"sha384-...\" rel=\"something\"/>\
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let client = Client::new();
let opt_no_css: bool = true;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
let opt_silent = true;
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head><title>No integrity</title><link rel=\"something\"><script src=\"\"></script></head>\
<body></body>\
</html>"
);
<head>\
<link rel=\"stylesheet\">\
<link rel=\"alternate stylesheet\">\
<style></style>\
</head>\
<body>\
<div></div>\
</body>\
</html>"
);
}
#[test]
fn no_images() {
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"<html>\
<head>\
<link rel=\"icon\">\
</head>\
<body>\
<div>\
<img src=\"{empty_image}\">\
</div>\
</body>\
</html>",
empty_image = empty_image!()
)
);
}
#[test]
fn no_body_background_images() {
let html =
"<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body></body></html>"
);
}
#[test]
fn no_frames() {
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_frames = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test]
fn no_iframes() {
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_frames = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
);
}
#[test]
fn no_js() {
let html = "<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><script></script>\
<script></script></div></body></html>"
);
}
#[test]
fn discards_integrity() {
let html = "<title>No integrity</title>\
<link integrity=\"sha384-...\" rel=\"something\"/>\
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
<body></body>\
</html>"
);
}
#[test]
fn removes_unwanted_meta_tags() {
let html = "<html>\
<head>\
<meta http-equiv=\"Refresh\" value=\"20\"/>\
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
</head>\
<body></body>\
</html>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<meta http-equiv=\"disabled by monolith (Refresh)\" value=\"20\">\
<meta http-equiv=\"disabled by monolith (Location)\" value=\"https://freebsd.org\">\
</head>\
<body></body>\
</html>"
);
}
#[test]
fn processes_noscript_tags() {
let html = "<html>\
<body>\
<noscript>\
<img src=\"image.png\" />\
</noscript>\
</body>\
</html>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"<html>\
<head>\
</head>\
<body>\
<noscript>\
<img src=\"{}\">\
</noscript>\
</body>\
</html>",
empty_image!(),
)
);
}
}

View File

@@ -1,5 +1,3 @@
use crate::js;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -7,19 +5,24 @@ use crate::js;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_onblur_camelcase() {
assert!(js::attr_is_event_handler("onBlur"));
}
#[cfg(test)]
mod passing {
use crate::js;
#[test]
fn passing_onclick_lowercase() {
assert!(js::attr_is_event_handler("onclick"));
}
#[test]
fn onblur_camelcase() {
assert!(js::attr_is_event_handler("onBlur"));
}
#[test]
fn passing_onclick_camelcase() {
assert!(js::attr_is_event_handler("onClick"));
#[test]
fn onclick_lowercase() {
assert!(js::attr_is_event_handler("onclick"));
}
#[test]
fn onclick_camelcase() {
assert!(js::attr_is_event_handler("onClick"));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@@ -29,17 +32,22 @@ fn passing_onclick_camelcase() {
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_href() {
assert!(!js::attr_is_event_handler("href"));
}
#[cfg(test)]
mod failing {
use crate::js;
#[test]
fn failing_empty_string() {
assert!(!js::attr_is_event_handler(""));
}
#[test]
fn href() {
assert!(!js::attr_is_event_handler("href"));
}
#[test]
fn failing_class() {
assert!(!js::attr_is_event_handler("class"));
#[test]
fn empty_string() {
assert!(!js::attr_is_event_handler(""));
}
#[test]
fn class() {
assert!(!js::attr_is_event_handler("class"));
}
}

View File

@@ -3,4 +3,6 @@ mod css;
mod html;
mod js;
mod macros;
mod opts;
mod url;
mod utils;

34
src/tests/opts.rs Normal file
View File

@@ -0,0 +1,34 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::opts::Options;
#[test]
fn defaults() {
let options: Options = Options::default();
assert_eq!(options.no_audio, false);
assert_eq!(options.base_url, None);
assert_eq!(options.no_css, false);
assert_eq!(options.no_frames, false);
assert_eq!(options.no_fonts, false);
assert_eq!(options.no_images, false);
assert_eq!(options.isolate, false);
assert_eq!(options.no_js, false);
assert_eq!(options.insecure, false);
assert_eq!(options.no_metadata, false);
assert_eq!(options.output, str!());
assert_eq!(options.silent, false);
assert_eq!(options.timeout, 0);
assert_eq!(options.user_agent, None);
assert_eq!(options.no_video, false);
assert_eq!(options.target, str!());
}
}

View File

@@ -1,5 +1,3 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -7,32 +5,47 @@ use crate::utils;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_remove_protocl_and_fragment() {
if cfg!(windows) {
assert_eq!(
utils::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"),
"C:\\documents\\some-path\\some-file.svg"
);
} else {
assert_eq!(
utils::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"),
"/tmp/some-path/some-file.svg"
);
}
}
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn passing_decodes_urls() {
if cfg!(windows) {
#[test]
fn removes_fragment() {
assert_eq!(
utils::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
"C:\\Documents and Settings\\some-file.html"
url::clean_url("https://somewhere.com/font.eot#iefix"),
"https://somewhere.com/font.eot"
);
} else {
}
#[test]
fn removes_empty_fragment() {
assert_eq!(
utils::file_url_to_fs_path("file:///home/user/My%20Documents"),
"/home/user/My Documents"
url::clean_url("https://somewhere.com/font.eot#"),
"https://somewhere.com/font.eot"
);
}
#[test]
fn removes_empty_query_and_empty_fragment() {
assert_eq!(
url::clean_url("https://somewhere.com/font.eot?#"),
"https://somewhere.com/font.eot"
);
}
#[test]
fn removes_empty_query_amp_and_empty_fragment() {
assert_eq!(
url::clean_url("https://somewhere.com/font.eot?a=b&#"),
"https://somewhere.com/font.eot?a=b"
);
}
#[test]
fn keeps_credentials() {
assert_eq!(
url::clean_url("https://cookie:monster@gibson.internet/"),
"https://cookie:monster@gibson.internet/"
);
}
}

View File

@@ -1,5 +1,3 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -7,34 +5,27 @@ use crate::utils;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_removes_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot#iefix"),
"https://somewhere.com/font.eot"
);
}
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn passing_removes_empty_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot#"),
"https://somewhere.com/font.eot"
);
}
#[test]
fn encode_string_with_specific_media_type() {
let mime = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let data_url = url::data_to_data_url(mime, data.as_bytes(), "");
#[test]
fn passing_removes_empty_query_and_empty_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot?#"),
"https://somewhere.com/font.eot"
);
}
assert_eq!(
&data_url,
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
);
}
#[test]
fn passing_removes_empty_query_amp_and_empty_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot?a=b&#"),
"https://somewhere.com/font.eot?a=b"
);
#[test]
fn encode_append_fragment() {
let data = "<svg></svg>\n";
let data_url = url::data_to_data_url("image/svg+xml", data.as_bytes(), "");
assert_eq!(&data_url, "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K");
}
}

View File

@@ -1,5 +1,3 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -7,30 +5,35 @@ use crate::utils;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_decode_unicode_characters() {
assert_eq!(
utils::decode_url(str!(
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
)),
"検ヒム解塗ゃッ = サ"
);
}
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn passing_decode_file_url() {
assert_eq!(
utils::decode_url(str!("file:///tmp/space%20here/test%231.html")),
"file:///tmp/space here/test#1.html"
);
}
#[test]
fn decode_unicode_characters() {
assert_eq!(
url::decode_url(str!(
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
)),
"検ヒム解塗ゃッ = サ"
);
}
#[test]
fn passing_plus_sign() {
assert_eq!(
utils::decode_url(str!(
#[test]
fn decode_file_url() {
assert_eq!(
url::decode_url(str!("file:///tmp/space%20here/test%231.html")),
"file:///tmp/space here/test#1.html"
);
}
#[test]
fn plus_sign() {
assert_eq!(
url::decode_url(str!(
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
)),
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
)),
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
);
);
}
}

View File

@@ -0,0 +1,41 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn remove_protocl_and_fragment() {
if cfg!(windows) {
assert_eq!(
url::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"),
"C:\\documents\\some-path\\some-file.svg"
);
} else {
assert_eq!(
url::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"),
"/tmp/some-path/some-file.svg"
);
}
}
#[test]
fn decodes_urls() {
if cfg!(windows) {
assert_eq!(
url::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
"C:\\Documents and Settings\\some-file.html"
);
} else {
assert_eq!(
url::file_url_to_fs_path("file:///home/user/My%20Documents"),
"/home/user/My Documents"
);
}
}
}

View File

@@ -1,5 +1,3 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -7,18 +5,19 @@ use crate::utils;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_data_url_text_html() {
assert!(utils::is_data_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn passing_data_url_no_media_type() {
assert!(utils::is_data_url(
"data:;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
#[test]
fn data_url() {
assert_eq!(
url::get_url_fragment(
"data:image/svg+xml;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h#test"
),
"test"
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@@ -28,17 +27,22 @@ fn passing_data_url_no_media_type() {
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_https_url() {
assert!(!utils::is_data_url("https://kernel.org"));
}
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn failing_no_protocol_url() {
assert!(!utils::is_data_url("//kernel.org"));
}
#[test]
fn https_empty() {
assert_eq!(url::get_url_fragment("https://kernel.org#"), "");
}
#[test]
fn failing_empty_string() {
assert!(!utils::is_data_url(""));
#[test]
fn no_fragment() {
assert_eq!(url::get_url_fragment("https://kernel.org"), "");
}
#[test]
fn dummy_data_url() {
assert_eq!(url::get_url_fragment("data:text/html,"), "");
}
}

View File

@@ -1,5 +1,3 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -7,50 +5,23 @@ use crate::utils;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_mailto() {
assert!(utils::url_has_protocol(
"mailto:somebody@somewhere.com?subject=hello"
));
}
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn passing_tel() {
assert!(utils::url_has_protocol("tel:5551234567"));
}
#[test]
fn data_url_text_html() {
assert!(url::is_data_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn passing_ftp_no_slashes() {
assert!(utils::url_has_protocol("ftp:some-ftp-server.com"));
}
#[test]
fn passing_ftp_with_credentials() {
assert!(utils::url_has_protocol(
"ftp://user:password@some-ftp-server.com"
));
}
#[test]
fn passing_javascript() {
assert!(utils::url_has_protocol("javascript:void(0)"));
}
#[test]
fn passing_http() {
assert!(utils::url_has_protocol("http://news.ycombinator.com"));
}
#[test]
fn passing_https() {
assert!(utils::url_has_protocol("https://github.com"));
}
#[test]
fn passing_mailto_uppercase() {
assert!(utils::url_has_protocol(
"MAILTO:somebody@somewhere.com?subject=hello"
));
#[test]
fn data_url_no_media_type() {
assert!(url::is_data_url(
"data:;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@@ -60,24 +31,22 @@ fn passing_mailto_uppercase() {
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_url_with_no_protocol() {
assert!(!utils::url_has_protocol(
"//some-hostname.com/some-file.html"
));
}
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn failing_relative_path() {
assert!(!utils::url_has_protocol("some-hostname.com/some-file.html"));
}
#[test]
fn https_url() {
assert!(!url::is_data_url("https://kernel.org"));
}
#[test]
fn failing_relative_to_root_path() {
assert!(!utils::url_has_protocol("/some-file.html"));
}
#[test]
fn no_protocol_url() {
assert!(!url::is_data_url("//kernel.org"));
}
#[test]
fn failing_empty_string() {
assert!(!utils::url_has_protocol(""));
#[test]
fn empty_string() {
assert!(!url::is_data_url(""));
}
}

View File

@@ -0,0 +1,83 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn unix_file_url() {
assert!(url::is_file_url(
"file:///home/user/Websites/my-website/index.html"
));
}
#[test]
fn windows_file_url() {
assert!(url::is_file_url(
"file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png"
));
}
#[test]
fn unix_url_with_backslashes() {
assert!(url::is_file_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html"
));
}
#[test]
fn windows_file_url_with_backslashes() {
assert!(url::is_file_url(
"file:\\\\\\C:\\Documents%20and%20Settings\\user\\Websites\\my-website\\assets\\images\\logo.png"
));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn url_with_no_protocl() {
assert!(!url::is_file_url("//kernel.org"));
}
#[test]
fn dot_slash_filename() {
assert!(!url::is_file_url("./index.html"));
}
#[test]
fn just_filename() {
assert!(!url::is_file_url("some-local-page.htm"));
}
#[test]
fn https_ip_port_url() {
assert!(!url::is_file_url("https://1.2.3.4:80/www/index.html"));
}
#[test]
fn data_url() {
assert!(!url::is_file_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn just_word_file() {
assert!(!url::is_file_url("file"));
}
}

View File

@@ -1,5 +1,3 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -7,32 +5,24 @@ use crate::utils;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_unix_file_url() {
assert!(utils::is_file_url(
"file:///home/user/Websites/my-website/index.html"
));
}
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn passing_windows_file_url() {
assert!(utils::is_file_url(
"file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png"
));
}
#[test]
fn http_url() {
assert!(url::is_http_url("http://kernel.org"));
}
#[test]
fn passing_unix_url_with_backslashes() {
assert!(utils::is_file_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html"
));
}
#[test]
fn https_url() {
assert!(url::is_http_url("https://www.rust-lang.org/"));
}
#[test]
fn passing_windows_file_url_with_backslashes() {
assert!(utils::is_file_url(
"file:\\\\\\C:\\Documents%20and%20Settings\\user\\Websites\\my-website\\assets\\images\\logo.png"
));
#[test]
fn http_url_with_backslashes() {
assert!(url::is_http_url("http:\\\\freebsd.org\\"));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@@ -42,34 +32,34 @@ fn passing_windows_file_url_with_backslashes() {
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_url_with_no_protocl() {
assert!(!utils::is_file_url("//kernel.org"));
}
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn failing_dot_slash_filename() {
assert!(!utils::is_file_url("./index.html"));
}
#[test]
fn url_with_no_protocol() {
assert!(!url::is_http_url("//kernel.org"));
}
#[test]
fn failing_just_filename() {
assert!(!utils::is_file_url("some-local-page.htm"));
}
#[test]
fn dot_slash_filename() {
assert!(!url::is_http_url("./index.html"));
}
#[test]
fn failing_https_ip_port_url() {
assert!(!utils::is_file_url("https://1.2.3.4:80/www/index.html"));
}
#[test]
fn just_filename() {
assert!(!url::is_http_url("some-local-page.htm"));
}
#[test]
fn failing_data_url() {
assert!(!utils::is_file_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn https_ip_port_url() {
assert!(!url::is_http_url("ftp://1.2.3.4/www/index.html"));
}
#[test]
fn failing_just_word_file() {
assert!(!utils::is_file_url("file"));
#[test]
fn data_url() {
assert!(!url::is_http_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
}

12
src/tests/url/mod.rs Normal file
View File

@@ -0,0 +1,12 @@
mod clean_url;
mod data_to_data_url;
mod decode_url;
mod file_url_to_fs_path;
mod get_url_fragment;
mod is_data_url;
mod is_file_url;
mod is_http_url;
mod parse_data_url;
mod resolve_url;
mod url_has_protocol;
mod url_with_fragment;

View File

@@ -0,0 +1,103 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn parse_text_html_base64() {
let (media_type, data) = url::parse_data_url("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
assert_eq!(media_type, "text/html");
assert_eq!(
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn parse_text_html_utf8() {
let (media_type, data) = url::parse_data_url(
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
);
assert_eq!(media_type, "text/html");
assert_eq!(
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn parse_text_html_plaintext() {
let (media_type, data) = url::parse_data_url(
"data:text/html,Work expands so as to fill the time available for its completion",
);
assert_eq!(media_type, "text/html");
assert_eq!(
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn parse_text_html_charset_utf_8_between_two_whitespaces() {
let (media_type, data) = url::parse_data_url(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
assert_eq!(media_type, "text/html");
assert_eq!(
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn parse_text_css_url_encoded() {
let (media_type, data) = url::parse_data_url("data:text/css,div{background-color:%23000}");
assert_eq!(media_type, "text/css");
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
}
#[test]
fn parse_no_media_type_base64() {
let (media_type, data) = url::parse_data_url("data:;base64,dGVzdA==");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "test");
}
#[test]
fn parse_no_media_type_no_encoding() {
let (media_type, data) = url::parse_data_url("data:;,test%20test");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "test test");
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn just_word_data() {
let (media_type, data) = url::parse_data_url("data");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "");
}
}

View File

@@ -0,0 +1,194 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn from_https_to_level_up_relative() {
assert_eq!(
url::resolve_url("https://www.kernel.org", "../category/signatures.html")
.unwrap_or_default(),
"https://www.kernel.org/category/signatures.html"
);
}
#[test]
fn from_just_filename_to_full_https_url() {
assert_eq!(
url::resolve_url(
"saved_page.htm",
"https://www.kernel.org/category/signatures.html",
)
.unwrap_or_default(),
"https://www.kernel.org/category/signatures.html"
);
}
#[test]
fn from_https_url_to_url_with_no_protocol() {
assert_eq!(
url::resolve_url(
"https://www.kernel.org",
"//www.kernel.org/theme/images/logos/tux.png",
)
.unwrap_or_default(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
}
#[test]
fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() {
assert_eq!(
url::resolve_url(
"https://www.kernel.org",
"//another-host.org/theme/images/logos/tux.png",
)
.unwrap_or_default(),
"https://another-host.org/theme/images/logos/tux.png"
);
}
#[test]
fn from_https_url_to_relative_root_path() {
assert_eq!(
url::resolve_url(
"https://www.kernel.org/category/signatures.html",
"/theme/images/logos/tux.png",
)
.unwrap_or_default(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
}
#[test]
fn from_https_to_just_filename() {
assert_eq!(
url::resolve_url(
"https://www.w3schools.com/html/html_iframe.asp",
"default.asp",
)
.unwrap_or_default(),
"https://www.w3schools.com/html/default.asp"
);
}
#[test]
fn from_data_url_to_https() {
assert_eq!(
url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"https://www.kernel.org/category/signatures.html",
)
.unwrap_or_default(),
"https://www.kernel.org/category/signatures.html"
);
}
#[test]
fn from_data_url_to_data_url() {
assert_eq!(
url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
)
.unwrap_or_default(),
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K"
);
}
#[test]
fn from_file_url_to_relative_path() {
assert_eq!(
url::resolve_url(
"file:///home/user/Websites/my-website/index.html",
"assets/images/logo.png",
)
.unwrap_or_default(),
"file:///home/user/Websites/my-website/assets/images/logo.png"
);
}
#[test]
fn from_file_url_to_relative_path_with_backslashes() {
assert_eq!(
url::resolve_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
"assets\\images\\logo.png",
)
.unwrap_or_default(),
"file:///home/user/Websites/my-website/assets/images/logo.png"
);
}
#[test]
fn from_data_url_to_file_url() {
assert_eq!(
url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"file:///etc/passwd",
)
.unwrap_or_default(),
"file:///etc/passwd"
);
}
#[test]
fn preserve_fragment() {
assert_eq!(
url::resolve_url(
"http://doesnt-matter.local/",
"css/fonts/fontmarvelous.svg#fontmarvelous",
)
.unwrap_or_default(),
"http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous"
);
}
#[test]
fn resolve_from_file_url_to_file_url() {
assert_eq!(
if cfg!(windows) {
url::resolve_url("file:///c:/index.html", "file:///c:/image.png")
.unwrap_or_default()
} else {
url::resolve_url("file:///tmp/index.html", "file:///tmp/image.png")
.unwrap_or_default()
},
if cfg!(windows) {
"file:///c:/image.png"
} else {
"file:///tmp/image.png"
}
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn from_data_url_to_url_with_no_protocol() {
assert_eq!(
url::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"//www.w3schools.com/html/html_iframe.asp",
)
.unwrap_or_default(),
""
);
}
}

View File

@@ -0,0 +1,94 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn mailto() {
assert!(url::url_has_protocol(
"mailto:somebody@somewhere.com?subject=hello"
));
}
#[test]
fn tel() {
assert!(url::url_has_protocol("tel:5551234567"));
}
#[test]
fn ftp_no_slashes() {
assert!(url::url_has_protocol("ftp:some-ftp-server.com"));
}
#[test]
fn ftp_with_credentials() {
assert!(url::url_has_protocol(
"ftp://user:password@some-ftp-server.com"
));
}
#[test]
fn javascript() {
assert!(url::url_has_protocol("javascript:void(0)"));
}
#[test]
fn http() {
assert!(url::url_has_protocol("http://news.ycombinator.com"));
}
#[test]
fn https() {
assert!(url::url_has_protocol("https://github.com"));
}
#[test]
fn mailto_uppercase() {
assert!(url::url_has_protocol(
"MAILTO:somebody@somewhere.com?subject=hello"
));
}
#[test]
fn empty_data_url() {
assert!(url::url_has_protocol("data:text/html,"));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn url_with_no_protocol() {
assert!(!url::url_has_protocol("//some-hostname.com/some-file.html"));
}
#[test]
fn relative_path() {
assert!(!url::url_has_protocol("some-hostname.com/some-file.html"));
}
#[test]
fn relative_to_root_path() {
assert!(!url::url_has_protocol("/some-file.html"));
}
#[test]
fn empty_string() {
assert!(!url::url_has_protocol(""));
}
}

View File

@@ -0,0 +1,40 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn url_with_fragment_url() {
let url = "https://localhost.localdomain/path/";
let fragment = "test";
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(&assembled_url, "https://localhost.localdomain/path/#test");
}
#[test]
fn url_with_fragment_empty_url() {
let url = "https://localhost.localdomain/path/";
let fragment = "";
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(&assembled_url, "https://localhost.localdomain/path/");
}
#[test]
fn url_with_fragment_data_url() {
let url = "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K";
let fragment = "fragment";
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(
&assembled_url,
"data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K#fragment"
);
}
}

View File

@@ -1,95 +0,0 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_parse_text_html_base64() {
let (media_type, text) = utils::data_url_to_text("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
assert_eq!(media_type, "text/html");
assert_eq!(
text,
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_html_utf8() {
let (media_type, text) = utils::data_url_to_text(
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
);
assert_eq!(media_type, "text/html");
assert_eq!(
text,
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_html_plaintext() {
let (media_type, text) = utils::data_url_to_text(
"data:text/html,Work expands so as to fill the time available for its completion",
);
assert_eq!(media_type, "text/html");
assert_eq!(
text,
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() {
let (media_type, text) = utils::data_url_to_text(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
assert_eq!(media_type, "text/html");
assert_eq!(
text,
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_css_url_encoded() {
let (media_type, text) = utils::data_url_to_text("data:text/css,div{background-color:%23000}");
assert_eq!(media_type, "text/css");
assert_eq!(text, "div{background-color:#000}");
}
#[test]
fn passing_parse_no_media_type_base64() {
let (media_type, text) = utils::data_url_to_text("data:;base64,dGVzdA==");
assert_eq!(media_type, "");
assert_eq!(text, "test");
}
#[test]
fn passing_parse_no_media_type_no_encoding() {
let (media_type, text) = utils::data_url_to_text("data:;,test%20test");
assert_eq!(media_type, "");
assert_eq!(text, "test test");
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_just_word_data() {
let (media_type, text) = utils::data_url_to_text("data");
assert_eq!(media_type, "");
assert_eq!(text, "");
}

View File

@@ -1,5 +1,3 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -7,131 +5,136 @@ use crate::utils;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_image_gif87() {
assert_eq!(utils::detect_media_type(b"GIF87a", ""), "image/gif");
}
#[cfg(test)]
mod passing {
use crate::utils;
#[test]
fn passing_image_gif89() {
assert_eq!(utils::detect_media_type(b"GIF89a", ""), "image/gif");
}
#[test]
fn image_gif87() {
assert_eq!(utils::detect_media_type(b"GIF87a", ""), "image/gif");
}
#[test]
fn passing_image_jpeg() {
assert_eq!(utils::detect_media_type(b"\xFF\xD8\xFF", ""), "image/jpeg");
}
#[test]
fn image_gif89() {
assert_eq!(utils::detect_media_type(b"GIF89a", ""), "image/gif");
}
#[test]
fn passing_image_png() {
assert_eq!(
utils::detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", ""),
"image/png"
);
}
#[test]
fn image_jpeg() {
assert_eq!(utils::detect_media_type(b"\xFF\xD8\xFF", ""), "image/jpeg");
}
#[test]
fn passing_image_svg() {
assert_eq!(utils::detect_media_type(b"<svg ", ""), "image/svg+xml");
}
#[test]
fn image_png() {
assert_eq!(
utils::detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", ""),
"image/png"
);
}
#[test]
fn passing_image_webp() {
assert_eq!(
utils::detect_media_type(b"RIFF....WEBPVP8 ", ""),
"image/webp"
);
}
#[test]
fn image_svg() {
assert_eq!(utils::detect_media_type(b"<svg ", ""), "image/svg+xml");
}
#[test]
fn passing_image_icon() {
assert_eq!(
utils::detect_media_type(b"\x00\x00\x01\x00", ""),
"image/x-icon"
);
}
#[test]
fn image_webp() {
assert_eq!(
utils::detect_media_type(b"RIFF....WEBPVP8 ", ""),
"image/webp"
);
}
#[test]
fn passing_image_svg_filename() {
assert_eq!(
utils::detect_media_type(b"<?xml ", "local-file.svg"),
"image/svg+xml"
);
}
#[test]
fn image_icon() {
assert_eq!(
utils::detect_media_type(b"\x00\x00\x01\x00", ""),
"image/x-icon"
);
}
#[test]
fn passing_image_svg_url_uppercase() {
assert_eq!(
utils::detect_media_type(b"", "https://some-site.com/images/local-file.SVG"),
"image/svg+xml"
);
}
#[test]
fn image_svg_filename() {
assert_eq!(
utils::detect_media_type(b"<?xml ", "local-file.svg"),
"image/svg+xml"
);
}
#[test]
fn passing_audio_mpeg() {
assert_eq!(utils::detect_media_type(b"ID3", ""), "audio/mpeg");
}
#[test]
fn image_svg_url_uppercase() {
assert_eq!(
utils::detect_media_type(b"", "https://some-site.com/images/local-file.SVG"),
"image/svg+xml"
);
}
#[test]
fn passing_audio_mpeg_2() {
assert_eq!(utils::detect_media_type(b"\xFF\x0E", ""), "audio/mpeg");
}
#[test]
fn audio_mpeg() {
assert_eq!(utils::detect_media_type(b"ID3", ""), "audio/mpeg");
}
#[test]
fn passing_audio_mpeg_3() {
assert_eq!(utils::detect_media_type(b"\xFF\x0F", ""), "audio/mpeg");
}
#[test]
fn audio_mpeg_2() {
assert_eq!(utils::detect_media_type(b"\xFF\x0E", ""), "audio/mpeg");
}
#[test]
fn passing_audio_ogg() {
assert_eq!(utils::detect_media_type(b"OggS", ""), "audio/ogg");
}
#[test]
fn audio_mpeg_3() {
assert_eq!(utils::detect_media_type(b"\xFF\x0F", ""), "audio/mpeg");
}
#[test]
fn passing_audio_wav() {
assert_eq!(
utils::detect_media_type(b"RIFF....WAVEfmt ", ""),
"audio/wav"
);
}
#[test]
fn audio_ogg() {
assert_eq!(utils::detect_media_type(b"OggS", ""), "audio/ogg");
}
#[test]
fn passing_audio_flac() {
assert_eq!(utils::detect_media_type(b"fLaC", ""), "audio/x-flac");
}
#[test]
fn audio_wav() {
assert_eq!(
utils::detect_media_type(b"RIFF....WAVEfmt ", ""),
"audio/wav"
);
}
#[test]
fn passing_video_avi() {
assert_eq!(
utils::detect_media_type(b"RIFF....AVI LIST", ""),
"video/avi"
);
}
#[test]
fn audio_flac() {
assert_eq!(utils::detect_media_type(b"fLaC", ""), "audio/x-flac");
}
#[test]
fn passing_video_mp4() {
assert_eq!(utils::detect_media_type(b"....ftyp", ""), "video/mp4");
}
#[test]
fn video_avi() {
assert_eq!(
utils::detect_media_type(b"RIFF....AVI LIST", ""),
"video/avi"
);
}
#[test]
fn passing_video_mpeg() {
assert_eq!(
utils::detect_media_type(b"\x00\x00\x01\x0B", ""),
"video/mpeg"
);
}
#[test]
fn video_mp4() {
assert_eq!(utils::detect_media_type(b"....ftyp", ""), "video/mp4");
}
#[test]
fn passing_video_quicktime() {
assert_eq!(utils::detect_media_type(b"....moov", ""), "video/quicktime");
}
#[test]
fn video_mpeg() {
assert_eq!(
utils::detect_media_type(b"\x00\x00\x01\x0B", ""),
"video/mpeg"
);
}
#[test]
fn passing_video_webm() {
assert_eq!(
utils::detect_media_type(b"\x1A\x45\xDF\xA3", ""),
"video/webm"
);
#[test]
fn video_quicktime() {
assert_eq!(utils::detect_media_type(b"....moov", ""), "video/quicktime");
}
#[test]
fn video_webm() {
assert_eq!(
utils::detect_media_type(b"\x1A\x45\xDF\xA3", ""),
"video/webm"
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@@ -141,7 +144,12 @@ fn passing_video_webm() {
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_unknown_media_type() {
assert_eq!(utils::detect_media_type(b"abcdef0123456789", ""), "");
#[cfg(test)]
mod failing {
use crate::utils;
#[test]
fn unknown_media_type() {
assert_eq!(utils::detect_media_type(b"abcdef0123456789", ""), "");
}
}

View File

@@ -1,5 +1,3 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -7,17 +5,27 @@ use crate::utils;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_data_url() {
assert_eq!(
utils::get_url_fragment(
"data:image/svg+xml;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h#test"
),
"test"
);
}
#[cfg(test)]
mod passing {
use crate::utils;
#[test]
fn passing_https_empty() {
assert_eq!(utils::get_url_fragment("https://kernel.org#"), "");
#[test]
fn zero() {
assert_eq!(utils::indent(0), "");
}
#[test]
fn one() {
assert_eq!(utils::indent(1), " ");
}
#[test]
fn two() {
assert_eq!(utils::indent(2), " ");
}
#[test]
fn three() {
assert_eq!(utils::indent(3), " ");
}
}

View File

@@ -1,13 +1,3 @@
mod clean_url;
mod data_to_data_url;
mod data_url_to_text;
mod decode_url;
mod detect_media_type;
mod file_url_to_fs_path;
mod get_url_fragment;
mod is_data_url;
mod is_file_url;
mod is_http_url;
mod resolve_url;
mod indent;
mod retrieve_asset;
mod url_has_protocol;

View File

@@ -1,229 +0,0 @@
use url::ParseError;
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_from_https_to_level_up_relative() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url("https://www.kernel.org", "../category/signatures.html")?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
Ok(())
}
#[test]
fn passing_from_just_filename_to_full_https_url() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"saved_page.htm",
"https://www.kernel.org/category/signatures.html",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
Ok(())
}
#[test]
fn passing_from_https_url_to_url_with_no_protocol() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"https://www.kernel.org",
"//www.kernel.org/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
Ok(())
}
#[test]
fn passing_from_https_url_to_url_with_no_protocol_and_on_different_hostname(
) -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"https://www.kernel.org",
"//another-host.org/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://another-host.org/theme/images/logos/tux.png"
);
Ok(())
}
#[test]
fn passing_from_https_url_to_relative_root_path() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"https://www.kernel.org/category/signatures.html",
"/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
Ok(())
}
#[test]
fn passing_from_https_to_just_filename() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"https://www.w3schools.com/html/html_iframe.asp",
"default.asp",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.w3schools.com/html/default.asp"
);
Ok(())
}
#[test]
fn passing_from_data_url_to_https() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"https://www.kernel.org/category/signatures.html",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
Ok(())
}
#[test]
fn passing_from_data_url_to_data_url() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
)?;
assert_eq!(
resolved_url.as_str(),
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K"
);
Ok(())
}
#[test]
fn passing_from_file_url_to_relative_path() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"file:///home/user/Websites/my-website/index.html",
"assets/images/logo.png",
)
.unwrap_or(str!());
assert_eq!(
resolved_url.as_str(),
"file:///home/user/Websites/my-website/assets/images/logo.png"
);
Ok(())
}
#[test]
fn passing_from_file_url_to_relative_path_with_backslashes() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
"assets\\images\\logo.png",
)
.unwrap_or(str!());
assert_eq!(
resolved_url.as_str(),
"file:///home/user/Websites/my-website/assets/images/logo.png"
);
Ok(())
}
#[test]
fn passing_from_data_url_to_file_url() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"file:///etc/passwd",
)
.unwrap_or(str!());
assert_eq!(resolved_url.as_str(), "file:///etc/passwd");
Ok(())
}
#[test]
fn passing_preserve_fragment() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"http://doesnt-matter.local/",
"css/fonts/fontmarvelous.svg#fontmarvelous",
)
.unwrap_or(str!());
assert_eq!(
resolved_url.as_str(),
"http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous"
);
Ok(())
}
#[test]
fn passing_resolve_from_file_url_to_file_url() -> Result<(), ParseError> {
let resolved_url = if cfg!(windows) {
utils::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!())
} else {
utils::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!())
};
assert_eq!(
resolved_url.as_str(),
if cfg!(windows) {
"file:///c:/image.png"
} else {
"file:///tmp/image.png"
}
);
Ok(())
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_from_data_url_to_url_with_no_protocol() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"//www.w3schools.com/html/html_iframe.asp",
)
.unwrap_or(str!());
assert_eq!(resolved_url.as_str(), "");
Ok(())
}

View File

@@ -1,8 +1,3 @@
use crate::utils;
use reqwest::blocking::Client;
use std::collections::HashMap;
use std::env;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -10,83 +5,85 @@ use std::env;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_read_data_url() {
let cache = &mut HashMap::new();
let client = Client::new();
#[cfg(test)]
mod passing {
use reqwest::blocking::Client;
use std::collections::HashMap;
use std::env;
// If both source and target are data URLs,
// ensure the result contains target data URL
let (retrieved_data, final_url) = utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"data:text/html;base64,TaRgEt",
true,
"",
false,
)
.unwrap();
assert_eq!(&retrieved_data, "data:text/html;base64,TaRgEt");
assert_eq!(&final_url, "data:text/html;base64,TaRgEt");
}
use crate::opts::Options;
use crate::url;
use crate::utils;
#[test]
fn passing_read_data_url_ignore_suggested_media_type() {
let cache = &mut HashMap::new();
let client = Client::new();
#[test]
fn read_data_url() {
let cache = &mut HashMap::new();
let client = Client::new();
// Media type parameter should not influence data URLs
let (data, final_url) = utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"data:text/html;base64,TaRgEt",
true,
"image/png",
false,
)
.unwrap();
assert_eq!(&data, "data:text/html;base64,TaRgEt");
assert_eq!(&final_url, "data:text/html;base64,TaRgEt");
}
let mut options = Options::default();
options.silent = true;
#[test]
fn passing_read_local_file_with_file_url_parent() {
let cache = &mut HashMap::new();
let client = Client::new();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// Inclusion of local assets from local sources should be allowed
let cwd = env::current_dir().unwrap();
let (data, final_url) = utils::retrieve_asset(
cache,
&client,
&format!(
"{file}{cwd}/src/tests/data/local-file.html",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
),
&format!(
"{file}{cwd}/src/tests/data/local-script.js",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
),
true,
"application/javascript",
false,
)
.unwrap();
assert_eq!(&data, "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
assert_eq!(
&final_url,
&format!(
"{file}{cwd}/src/tests/data/local-script.js",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
// If both source and target are data URLs,
// ensure the result contains target data URL
let (data, final_url, media_type) = utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,c291cmNl",
"data:text/html;base64,dGFyZ2V0",
&options,
0,
)
);
.unwrap();
assert_eq!(
url::data_to_data_url(&media_type, &data, &final_url),
url::data_to_data_url("text/html", "target".as_bytes(), "")
);
assert_eq!(
final_url,
url::data_to_data_url("text/html", "target".as_bytes(), "")
);
assert_eq!(&media_type, "text/html");
}
#[test]
fn read_local_file_with_file_url_parent() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// Inclusion of local assets from local sources should be allowed
let cwd = env::current_dir().unwrap();
let (data, final_url, _media_type) = utils::retrieve_asset(
cache,
&client,
&format!(
"{file}{cwd}/src/tests/data/basic/local-file.html",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
),
&format!(
"{file}{cwd}/src/tests/data/basic/local-script.js",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
),
&options,
0,
)
.unwrap();
assert_eq!(url::data_to_data_url("application/javascript", &data, &final_url), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
assert_eq!(
&final_url,
&format!(
"{file}{cwd}/src/tests/data/basic/local-script.js",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
)
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@@ -96,42 +93,63 @@ fn passing_read_local_file_with_file_url_parent() {
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_read_local_file_with_data_url_parent() {
let cache = &mut HashMap::new();
let client = Client::new();
#[cfg(test)]
mod failing {
use reqwest::blocking::Client;
use std::collections::HashMap;
// Inclusion of local assets from data URL sources should not be allowed
let (data, final_url) = utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"file:///etc/passwd",
true,
"",
false,
)
.unwrap();
assert_eq!(&data, "");
assert_eq!(&final_url, "");
}
#[test]
fn failing_read_local_file_with_https_parent() {
let cache = &mut HashMap::new();
let client = Client::new();
// Inclusion of local assets from remote sources should not be allowed
let (data, final_url) = utils::retrieve_asset(
cache,
&client,
"https://kernel.org/",
"file:///etc/passwd",
true,
"",
false,
)
.unwrap();
assert_eq!(&data, "");
assert_eq!(&final_url, "");
use crate::opts::Options;
use crate::utils;
#[test]
fn read_local_file_with_data_url_parent() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
// Inclusion of local assets from data URL sources should not be allowed
match utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"file:///etc/passwd",
&options,
0,
) {
Ok((..)) => {
assert!(false);
}
Err(_) => {
assert!(true);
}
}
}
#[test]
fn read_local_file_with_https_parent() {
let cache = &mut HashMap::new();
let client = Client::new();
let mut options = Options::default();
options.silent = true;
// Inclusion of local assets from remote sources should not be allowed
match utils::retrieve_asset(
cache,
&client,
"https://kernel.org/",
"file:///etc/passwd",
&options,
0,
) {
Ok((..)) => {
assert!(false);
}
Err(_) => {
assert!(true);
}
}
}
}

167
src/url.rs Normal file
View File

@@ -0,0 +1,167 @@
use base64;
use url::{form_urlencoded, ParseError, Url};
use crate::utils::detect_media_type;
pub fn clean_url<T: AsRef<str>>(input: T) -> String {
let mut url = Url::parse(input.as_ref()).unwrap();
// Clear fragment
url.set_fragment(None);
// Get rid of stray question mark
if url.query() == Some("") {
url.set_query(None);
}
// Remove empty trailing ampersand(s)
let mut result: String = url.to_string();
while result.ends_with("&") {
result.pop();
}
result
}
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String {
let media_type: String = if media_type.is_empty() {
detect_media_type(data, &url)
} else {
media_type.to_string()
};
format!("data:{};base64,{}", media_type, base64::encode(data))
}
pub fn decode_url(input: String) -> String {
let input: String = input.replace("+", "%2B");
form_urlencoded::parse(input.as_bytes())
.map(|(key, val)| {
[
key.to_string(),
if val.to_string().len() == 0 {
str!()
} else {
str!('=')
},
val.to_string(),
]
.concat()
})
.collect()
}
pub fn file_url_to_fs_path(url: &str) -> String {
if !is_file_url(url) {
return str!();
}
let cutoff_l = if cfg!(windows) { 8 } else { 7 };
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
let url_fragment = get_url_fragment(url);
if url_fragment != "" {
let max_len = fs_file_path.len() - 1 - url_fragment.len();
fs_file_path = fs_file_path[0..max_len].to_string();
}
if cfg!(windows) {
fs_file_path = fs_file_path.replace("/", "\\");
}
// File paths should not be %-encoded
decode_url(fs_file_path)
}
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
match Url::parse(url.as_ref()) {
Ok(parsed_url) => parsed_url.fragment().unwrap_or("").to_string(),
Err(_err) => str!(),
}
}
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "data"))
.unwrap_or(false)
}
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "file"))
.unwrap_or(false)
}
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
.unwrap_or(false)
}
pub fn parse_data_url<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
let path: String = parsed_url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len());
let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
let text: String = decode_url(raw_data);
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut media_type: String = str!();
let mut encoding: &str = "";
let mut i: i8 = 0;
for item in &meta_data_items {
if i == 0 {
media_type = str!(item);
} else {
if item.eq_ignore_ascii_case("base64")
|| item.eq_ignore_ascii_case("utf8")
|| item.eq_ignore_ascii_case("charset=UTF-8")
{
encoding = item;
}
}
i = i + 1;
}
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
base64::decode(&text).unwrap_or(vec![])
} else {
text.as_bytes().to_vec()
};
(media_type, data)
}
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
let result = if is_http_url(to.as_ref()) {
to.as_ref().to_string()
} else {
Url::parse(from.as_ref())?
.join(to.as_ref())?
.as_ref()
.to_string()
};
Ok(result)
}
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme().len() > 0))
.unwrap_or(false)
}
pub fn url_with_fragment(url: &str, fragment: &str) -> String {
let mut result = str!(&url);
if !fragment.is_empty() {
result += "#";
result += fragment;
}
result
}

View File

@@ -1,10 +1,15 @@
use base64;
use reqwest::blocking::Client;
use reqwest::header::CONTENT_TYPE;
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use url::{form_urlencoded, ParseError, Url};
use crate::opts::Options;
use crate::url::{clean_url, file_url_to_fs_path, is_data_url, is_file_url, parse_data_url};
const ANSI_COLOR_RED: &'static str = "\x1b[31m";
const ANSI_COLOR_RESET: &'static str = "\x1b[0m";
const INDENT: &'static str = " ";
const MAGIC: [[&[u8]; 2]; 18] = [
// Image
@@ -29,35 +34,16 @@ const MAGIC: [[&[u8]; 2]; 18] = [
[b"....moov", b"video/quicktime"],
[b"\x1A\x45\xDF\xA3", b"video/webm"],
];
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
"application/javascript",
"image/svg+xml",
"text/css",
"text/html",
"text/javascript",
"text/plain",
// "text/css",
// "text/csv",
// "text/html",
// "text/javascript",
// "text/plain",
];
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String {
let media_type: String = if media_type.is_empty() {
detect_media_type(data, &url)
} else {
media_type.to_string()
};
let hash: String = if fragment != "" {
format!("#{}", fragment)
} else {
str!()
};
format!(
"data:{};base64,{}{}",
media_type,
base64::encode(data),
hash
)
}
pub fn detect_media_type(data: &[u8], url: &str) -> String {
for item in MAGIC.iter() {
if data.starts_with(item[0]) {
@@ -72,272 +58,123 @@ pub fn detect_media_type(data: &[u8], url: &str) -> String {
str!()
}
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme().len() > 0))
.unwrap_or(false)
}
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "data"))
.unwrap_or(false)
}
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "file"))
.unwrap_or(false)
}
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
.unwrap_or(false)
}
pub fn is_plaintext_media_type(media_type: &str) -> bool {
PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
media_type.to_lowercase().as_str().starts_with("text/")
|| PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
}
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
let result = if is_http_url(to.as_ref()) {
to.as_ref().to_string()
} else {
Url::parse(from.as_ref())?
.join(to.as_ref())?
.as_ref()
.to_string()
};
Ok(result)
}
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
if Url::parse(url.as_ref()).unwrap().fragment() == None {
str!()
} else {
str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap())
pub fn indent(level: u32) -> String {
let mut result = str!();
let mut l: u32 = level;
while l > 0 {
result += INDENT;
l -= 1;
}
}
pub fn clean_url<T: AsRef<str>>(input: T) -> String {
let mut url = Url::parse(input.as_ref()).unwrap();
// Clear fragment
url.set_fragment(None);
// Get rid of stray question mark
if url.query() == Some("") {
url.set_query(None);
}
// Remove empty trailing ampersand(s)
let mut result: String = url.to_string();
while result.ends_with("&") {
result.pop();
}
result
}
pub fn data_url_to_text<T: AsRef<str>>(url: T) -> (String, String) {
let parsed_url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
let path: String = parsed_url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len());
let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
let data: String = decode_url(raw_data);
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut encoding: &str = "";
let mut media_type: String = str!();
let mut text: String = str!();
let mut i: i8 = 0;
for item in &meta_data_items {
if i == 0 {
media_type = str!(item);
} else {
if item.eq_ignore_ascii_case("base64")
|| item.eq_ignore_ascii_case("utf8")
|| item.eq_ignore_ascii_case("charset=UTF-8")
{
encoding = item;
}
}
i = i + 1;
}
if is_plaintext_media_type(&media_type) || media_type.is_empty() {
if encoding.eq_ignore_ascii_case("base64") {
text = String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
} else {
text = data
}
}
(media_type, text)
}
pub fn decode_url(input: String) -> String {
let input: String = input.replace("+", "%2B");
form_urlencoded::parse(input.as_bytes())
.map(|(key, val)| {
[
key.to_string(),
if val.to_string().len() == 0 {
str!()
} else {
str!('=')
},
val.to_string(),
]
.concat()
})
.collect()
}
pub fn file_url_to_fs_path(url: &str) -> String {
if !is_file_url(url) {
return str!();
}
let cutoff_l = if cfg!(windows) { 8 } else { 7 };
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
let url_fragment = get_url_fragment(url);
if url_fragment != "" {
let max_len = fs_file_path.len() - 1 - url_fragment.len();
fs_file_path = fs_file_path[0..max_len].to_string();
}
if cfg!(windows) {
fs_file_path = fs_file_path.replace("/", "\\");
}
// File paths should not be %-encoded
decode_url(fs_file_path)
}
pub fn retrieve_asset(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &str,
url: &str,
as_data_url: bool,
media_type: &str,
opt_silent: bool,
) -> Result<(String, String), reqwest::Error> {
options: &Options,
depth: u32,
) -> Result<(Vec<u8>, String, String), reqwest::Error> {
if url.len() == 0 {
return Ok((str!(), str!()));
// Provoke error
client.get("").send()?;
}
if is_data_url(&url) {
if as_data_url {
Ok((url.to_string(), url.to_string()))
} else {
let (_media_type, text) = data_url_to_text(url);
Ok((text, url.to_string()))
}
let (media_type, data) = parse_data_url(url);
Ok((data, url.to_string(), media_type))
} else if is_file_url(&url) {
// Check if parent_url is also file:///
// (if not, then we don't embed the asset)
if !is_file_url(&parent_url) {
return Ok((str!(), str!()));
// Provoke error
client.get("").send()?;
}
let fs_file_path: String = file_url_to_fs_path(url);
let path = Path::new(&fs_file_path);
let url_fragment = get_url_fragment(url);
if path.exists() {
if !opt_silent {
eprintln!("{}", &url);
if !options.silent {
eprintln!("{}{}", indent(depth).as_str(), &url);
}
if as_data_url {
let data_url: String = data_to_data_url(
&media_type,
&fs::read(&fs_file_path).unwrap(),
&fs_file_path,
&url_fragment,
);
Ok((data_url, url.to_string()))
} else {
let data: String = fs::read_to_string(&fs_file_path).expect(url);
Ok((data, url.to_string()))
}
Ok((fs::read(&fs_file_path).expect(""), url.to_string(), str!()))
} else {
Ok((str!(), url.to_string()))
// Provoke error
Err(client.get("").send().unwrap_err())
}
} else {
let cache_key: String = clean_url(&url);
if cache.contains_key(&cache_key) {
// URL is in cache, we retrieve it
let data = cache.get(&cache_key).unwrap();
if !opt_silent {
eprintln!("{} (from cache)", &url);
// URL is in cache, we get and return it
if !options.silent {
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
}
if as_data_url {
let url_fragment = get_url_fragment(url);
Ok((
data_to_data_url(media_type, data, url, &url_fragment),
url.to_string(),
))
} else {
Ok((String::from_utf8_lossy(data).to_string(), url.to_string()))
}
Ok((
cache.get(&cache_key).unwrap().to_vec(),
url.to_string(),
str!(),
))
} else {
// URL not in cache, we request it
let mut response = client.get(url).send()?;
let res_url = response.url().to_string();
// URL not in cache, we retrieve the file
match client.get(url).send() {
Ok(mut response) => {
if !options.ignore_errors && response.status() != 200 {
if !options.silent {
eprintln!(
"{}{}{} ({}){}",
indent(depth).as_str(),
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
response.status(),
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
// Provoke error
return Err(client.get("").send().unwrap_err());
}
if !opt_silent {
if url == res_url {
eprintln!("{}", &url);
} else {
eprintln!("{} -> {}", &url, &res_url);
}
}
let res_url = response.url().to_string();
let new_cache_key: String = clean_url(&res_url);
if !options.silent {
if url == res_url {
eprintln!("{}{}", indent(depth).as_str(), &url);
} else {
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &res_url);
}
}
if as_data_url {
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
let new_cache_key: String = clean_url(&res_url);
// Attempt to obtain media type by reading the Content-Type header
let media_type = if media_type == "" {
response
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
// Attempt to obtain media type by reading the Content-Type header
let media_type = response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
.unwrap_or(&media_type)
} else {
media_type
};
let url_fragment = get_url_fragment(url);
let data_url = data_to_data_url(&media_type, &data, url, &url_fragment);
.unwrap_or("");
// Add to cache
cache.insert(new_cache_key, data);
// Add retrieved resource to cache
cache.insert(new_cache_key, data.clone());
Ok((data_url, res_url))
} else {
let content = response.text().unwrap();
// Add to cache
cache.insert(new_cache_key, content.as_bytes().to_vec());
Ok((content, res_url))
Ok((data, res_url, media_type.to_string()))
}
Err(error) => Err(error),
}
}
}

10
utils/run-in-container.sh Normal file
View File

@@ -0,0 +1,10 @@
#!/bin/sh
DOCKER=docker
PROG_NAME=monolith
if which podman 2>&1 > /dev/null; then
DOCKER=podman
fi
$DOCKER run --rm Y2Z/$PROG_NAME $PROG_NAME "$@"