Compare commits
313 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8462b6bc31 | ||
|
|
92f38556b6 | ||
|
|
c0bdeab2e3 | ||
|
|
5a502eab4b | ||
|
|
19f08265a2 | ||
|
|
1d6392cb28 | ||
|
|
03cdc0e0b2 | ||
|
|
b98b7af0b4 | ||
|
|
73c35eaccb | ||
|
|
2c5d1e930b | ||
|
|
90f7c3a0d0 | ||
|
|
c1fec5967d | ||
|
|
09d41d2cf1 | ||
|
|
8f1da3c792 | ||
|
|
a8449a2b32 | ||
|
|
164e728ad3 | ||
|
|
8883bd6aca | ||
|
|
eae5d4dc6b | ||
|
|
ec85121d28 | ||
|
|
a8a85a4191 | ||
|
|
decd5b2119 | ||
|
|
bef6d848e9 | ||
|
|
4263e42cd1 | ||
|
|
23de5ced21 | ||
|
|
bc98aca2a2 | ||
|
|
438ebd520a | ||
|
|
ddb97009e9 | ||
|
|
6e67545b92 | ||
|
|
9e5d8ec691 | ||
|
|
fb835fae28 | ||
|
|
29bf042da0 | ||
|
|
d67483cf8e | ||
|
|
4140d8ebad | ||
|
|
2ac964fae5 | ||
|
|
ae5d6d2df4 | ||
|
|
2ed151d883 | ||
|
|
3cdfdc45d3 | ||
|
|
ac04af2cfc | ||
|
|
769953d7bd | ||
|
|
136dcc31cf | ||
|
|
44cac65a83 | ||
|
|
c3ca2ad1d5 | ||
|
|
0347fd3985 | ||
|
|
95d0083b3c | ||
|
|
3ce26b5fdd | ||
|
|
7f9458adfe | ||
|
|
5c229c51da | ||
|
|
f6ea16b3ad | ||
|
|
877b11d52c | ||
|
|
f9aac6f41b | ||
|
|
0a30c286fe | ||
|
|
ea56b9b4c1 | ||
|
|
e821591efe | ||
|
|
4e5d2fdc8d | ||
|
|
7c2ed2c9ca | ||
|
|
60d21ae071 | ||
|
|
bfdcd459e1 | ||
|
|
6c020dfa88 | ||
|
|
9894213393 | ||
|
|
80523c5a59 | ||
|
|
65b5ff4ec0 | ||
|
|
4e31d0433e | ||
|
|
ed82b96152 | ||
|
|
f16a2a9ed5 | ||
|
|
38d7873d6e | ||
|
|
d848179a43 | ||
|
|
399f515eeb | ||
|
|
46616f327b | ||
|
|
090d647390 | ||
|
|
4fa88b7aba | ||
|
|
3d678d80ee | ||
|
|
19a87f426e | ||
|
|
cbe3f9f554 | ||
|
|
b6a44c64cf | ||
|
|
84e2dd789c | ||
|
|
ac4945ca97 | ||
|
|
2ca2c7aff8 | ||
|
|
a18df74946 | ||
|
|
2bc8414cc1 | ||
|
|
c4569343a4 | ||
|
|
5f5820c71a | ||
|
|
4719a6fecf | ||
|
|
c999359b9f | ||
|
|
f22e2b6e68 | ||
|
|
31a9550f5b | ||
|
|
201f2d61b9 | ||
|
|
3ae4dfae8e | ||
|
|
7b095fe4ff | ||
|
|
890bcb1bb6 | ||
|
|
aa97ea9f82 | ||
|
|
9b40dbbf27 | ||
|
|
289f3e801b | ||
|
|
edacd09dc8 | ||
|
|
5682863725 | ||
|
|
4304d7a638 | ||
|
|
f56f88da94 | ||
|
|
87c8b361ea | ||
|
|
cd505ddb6c | ||
|
|
eeea617fb1 | ||
|
|
cc6dbddb49 | ||
|
|
9d3df2cdc6 | ||
|
|
ab601c3830 | ||
|
|
3738be2b6d | ||
|
|
53160f01c7 | ||
|
|
594ad55bd8 | ||
|
|
d2615f51dc | ||
|
|
c097733ae7 | ||
|
|
67d4b7dafc | ||
|
|
b1d6bbce0c | ||
|
|
20124f4891 | ||
|
|
0dd540afaf | ||
|
|
df71083359 | ||
|
|
349c7bb3ea | ||
|
|
5a30c6b44b | ||
|
|
929924accd | ||
|
|
812b46960c | ||
|
|
874080dbda | ||
|
|
93dd9d4ed4 | ||
|
|
3f0ced0143 | ||
|
|
8112ab6d04 | ||
|
|
e5fc05f5cd | ||
|
|
1068ff659a | ||
|
|
d4d9bbe424 | ||
|
|
cf3a8c8ede | ||
|
|
920d992459 | ||
|
|
c61b3ba858 | ||
|
|
dc6e564ea2 | ||
|
|
24536b5e18 | ||
|
|
908fd59019 | ||
|
|
a19aa37ea8 | ||
|
|
c46bd5900b | ||
|
|
5f98ed23b3 | ||
|
|
c6b135398a | ||
|
|
791e44796e | ||
|
|
b428dd8471 | ||
|
|
b88479446c | ||
|
|
1d6217ef5a | ||
|
|
746c7f05de | ||
|
|
29836d979a | ||
|
|
5ba6e33fa8 | ||
|
|
643c4ce7ef | ||
|
|
c011f90b76 | ||
|
|
875481b9a2 | ||
|
|
05275d864c | ||
|
|
4951fea730 | ||
|
|
b8315a7bd5 | ||
|
|
be25784297 | ||
|
|
b0f1c39175 | ||
|
|
f27d5fa23e | ||
|
|
4f2944a600 | ||
|
|
479c42e1ce | ||
|
|
933379c798 | ||
|
|
061386ccc2 | ||
|
|
59a8be493d | ||
|
|
a653bbe7d4 | ||
|
|
c7aab235d9 | ||
|
|
60ef631315 | ||
|
|
b800947151 | ||
|
|
808ce3e722 | ||
|
|
a92bba4ec5 | ||
|
|
a445098409 | ||
|
|
224d4fc480 | ||
|
|
d5ee8ae6ab | ||
|
|
c16e80f507 | ||
|
|
1c1f2c7128 | ||
|
|
efba6a048d | ||
|
|
1701425003 | ||
|
|
7654eec7e2 | ||
|
|
00942e0b1d | ||
|
|
8fbae735fa | ||
|
|
0d1e21e9ad | ||
|
|
3d2d40e7cd | ||
|
|
b8b6d8cff6 | ||
|
|
928664dc88 | ||
|
|
5c8d75539b | ||
|
|
ee2055a2a3 | ||
|
|
b4c46c59d4 | ||
|
|
8574b7899b | ||
|
|
969bfbdd59 | ||
|
|
63f3a204a6 | ||
|
|
094be09e90 | ||
|
|
23ceaed493 | ||
|
|
d9602e25eb | ||
|
|
0c50aa223b | ||
|
|
e5425ee9d0 | ||
|
|
f720fe0176 | ||
|
|
727a5a410c | ||
|
|
23af174822 | ||
|
|
5ef2b7c9dc | ||
|
|
1e8348543a | ||
|
|
f9bafe092d | ||
|
|
f876e9243c | ||
|
|
b6896febf1 | ||
|
|
29d2ba5857 | ||
|
|
8b1ebc7871 | ||
|
|
d753c83c76 | ||
|
|
47a825f5ed | ||
|
|
0e12cecd85 | ||
|
|
d8def879b2 | ||
|
|
0420854ed6 | ||
|
|
d47482fcd9 | ||
|
|
b68624f2f3 | ||
|
|
a9d114d04d | ||
|
|
4e4ebe9c98 | ||
|
|
429217d8f7 | ||
|
|
1779f4a374 | ||
|
|
26e89ae6d3 | ||
|
|
b333d19d04 | ||
|
|
c1dc798ded | ||
|
|
69d99b69e8 | ||
|
|
aae53d20f0 | ||
|
|
14cf2ce8a6 | ||
|
|
05985583f0 | ||
|
|
651fa716b4 | ||
|
|
67b79e92f9 | ||
|
|
b51f41fe34 | ||
|
|
6f158dc6db | ||
|
|
8d7052b39c | ||
|
|
660511b8a0 | ||
|
|
9be3982dc6 | ||
|
|
27c9fb4cd3 | ||
|
|
929512f4f5 | ||
|
|
a46d89cefc | ||
|
|
f93646e17a | ||
|
|
9d14b6dfea | ||
|
|
9783b96524 | ||
|
|
106efe58ce | ||
|
|
6e99ad13e7 | ||
|
|
413dd66886 | ||
|
|
dc7ec6e7a8 | ||
|
|
ed879231af | ||
|
|
ddf4b8ac13 | ||
|
|
84c13f0605 | ||
|
|
ce03e0e487 | ||
|
|
63e19998d0 | ||
|
|
e3321bbb07 | ||
|
|
0a38cd0eae | ||
|
|
75fb6961ed | ||
|
|
5ba8931502 | ||
|
|
13d2ea1607 | ||
|
|
88ffde0c3b | ||
|
|
bfb97bd062 | ||
|
|
295931041c | ||
|
|
2e623dd9f8 | ||
|
|
169b9657e5 | ||
|
|
dab4ae6965 | ||
|
|
c7fc121c7c | ||
|
|
9ff9dd0928 | ||
|
|
b0fc24d77f | ||
|
|
d8abfaf25c | ||
|
|
565acdef25 | ||
|
|
4637fed15c | ||
|
|
9a7ea4fdde | ||
|
|
3d4a932ac1 | ||
|
|
cf70383165 | ||
|
|
9fe913d853 | ||
|
|
862489e41b | ||
|
|
919e626b5e | ||
|
|
cf347e0483 | ||
|
|
322ab41b8c | ||
|
|
1a7336e809 | ||
|
|
65d0eab793 | ||
|
|
292221ea28 | ||
|
|
614af44c92 | ||
|
|
feb37f5812 | ||
|
|
028beb821c | ||
|
|
76ccff80f9 | ||
|
|
45335d7507 | ||
|
|
a4743ca92f | ||
|
|
b96a777e8a | ||
|
|
4decea716c | ||
|
|
695a787206 | ||
|
|
90e6cb1c45 | ||
|
|
7412d663e0 | ||
|
|
8646af6e9f | ||
|
|
de383c94b1 | ||
|
|
ab65b44f0d | ||
|
|
13bacb4320 | ||
|
|
d574e9a5da | ||
|
|
1de0fc0961 | ||
|
|
ebbf755e09 | ||
|
|
d3956a7905 | ||
|
|
ef7ddcd434 | ||
|
|
11bbfc0851 | ||
|
|
a2bf7e3345 | ||
|
|
35f5e1353d | ||
|
|
f8040f4d8c | ||
|
|
31d3fee626 | ||
|
|
178abd07bd | ||
|
|
491185e191 | ||
|
|
b0c55d5016 | ||
|
|
1ff5e91087 | ||
|
|
550e4cc83f | ||
|
|
5443c0cc3f | ||
|
|
8add3a8746 | ||
|
|
2f592d5561 | ||
|
|
55fe523a1c | ||
|
|
b5d42bd722 | ||
|
|
cbf3b66f33 | ||
|
|
2e48ea90e1 | ||
|
|
9c006f3258 | ||
|
|
ab24851b5b | ||
|
|
de11559efa | ||
|
|
dbacd76103 | ||
|
|
0896f2e214 | ||
|
|
b6ba22513d | ||
|
|
3948ea3aa0 | ||
|
|
8b3f3f3a6e | ||
|
|
eec05767cf | ||
|
|
c05dc2ae65 | ||
|
|
88a230872c | ||
|
|
ac79a52da0 | ||
|
|
04cbbefafa |
22
.github/workflows/build_gnu_linux.yml
vendored
Normal file
22
.github/workflows/build_gnu_linux.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
name: GNU/Linux
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
os:
|
||||
- ubuntu-latest
|
||||
rust:
|
||||
- stable
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- run: git config --global core.autocrlf false
|
||||
- uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: cargo build --all --locked --verbose
|
||||
22
.github/workflows/build_macos.yml
vendored
Normal file
22
.github/workflows/build_macos.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
name: macOS
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
os:
|
||||
- macos-latest
|
||||
rust:
|
||||
- stable
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- run: git config --global core.autocrlf false
|
||||
- uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: cargo build --all --locked --verbose
|
||||
22
.github/workflows/build_windows.yml
vendored
Normal file
22
.github/workflows/build_windows.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
name: Windows
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
os:
|
||||
- windows-latest
|
||||
rust:
|
||||
- stable
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- run: git config --global core.autocrlf false
|
||||
- uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: cargo build --all --locked --verbose
|
||||
67
.github/workflows/cd.yml
vendored
Normal file
67
.github/workflows/cd.yml
vendored
Normal file
@@ -0,0 +1,67 @@
|
||||
# CD GitHub Actions workflow for Monolith
|
||||
|
||||
name: CD
|
||||
|
||||
on:
|
||||
release:
|
||||
types:
|
||||
- created
|
||||
|
||||
jobs:
|
||||
|
||||
windows:
|
||||
runs-on: windows-2019
|
||||
steps:
|
||||
- run: git config --global core.autocrlf false
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v2
|
||||
- name: Build the executable
|
||||
run: cargo build --release
|
||||
- uses: Shopify/upload-to-release@1.0.0
|
||||
with:
|
||||
name: monolith.exe
|
||||
path: target\release\monolith.exe
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
gnu_linux_armhf:
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v2
|
||||
- name: Prepare cross-platform environment
|
||||
run: |
|
||||
sudo mkdir -p /cross-build-arm
|
||||
sudo touch /etc/apt/sources.list.d/armhf.list
|
||||
echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ bionic main" | sudo tee -a /etc/apt/sources.list.d/armhf.list
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y gcc-arm-linux-gnueabihf libc6-armhf-cross libc6-dev-armhf-cross
|
||||
sudo apt-get download libssl1.1:armhf libssl-dev:armhf
|
||||
sudo dpkg -x libssl1.1*.deb /cross-build-arm
|
||||
sudo dpkg -x libssl-dev*.deb /cross-build-arm
|
||||
rustup target add arm-unknown-linux-gnueabihf
|
||||
echo "::set-env name=C_INCLUDE_PATH::/cross-build-arm/usr/include"
|
||||
echo "::set-env name=OPENSSL_INCLUDE_DIR::/cross-build-arm/usr/include/arm-linux-gnueabihf"
|
||||
echo "::set-env name=OPENSSL_LIB_DIR::/cross-build-arm/usr/lib/arm-linux-gnueabihf"
|
||||
echo "::set-env name=PKG_CONFIG_ALLOW_CROSS::1"
|
||||
echo "::set-env name=RUSTFLAGS::-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build-arm/usr/lib/arm-linux-gnueabihf -L/cross-build-arm/lib/arm-linux-gnueabihf"
|
||||
- name: Build the executable
|
||||
run: cargo build --release --target=arm-unknown-linux-gnueabihf
|
||||
- name: Attach artifact to the release
|
||||
uses: Shopify/upload-to-release@1.0.0
|
||||
with:
|
||||
name: monolith-gnu-linux-armhf
|
||||
path: target/arm-unknown-linux-gnueabihf/release/monolith
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
gnu_linux_x86_64:
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v2
|
||||
- name: Build the executable
|
||||
run: cargo build --release
|
||||
- uses: Shopify/upload-to-release@1.0.0
|
||||
with:
|
||||
name: monolith-gnu-linux-x86_64
|
||||
path: target/release/monolith
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
32
.github/workflows/ci.yml
vendored
Normal file
32
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build_and_test:
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
os:
|
||||
- ubuntu-latest
|
||||
- macos-latest
|
||||
- windows-latest
|
||||
rust:
|
||||
- stable
|
||||
- beta
|
||||
- nightly
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- run: git config --global core.autocrlf false
|
||||
- uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: cargo build --all --locked --verbose
|
||||
- name: Run tests
|
||||
run: cargo test --all --locked --verbose
|
||||
- name: Check code formatting
|
||||
run: |
|
||||
rustup component add rustfmt
|
||||
cargo fmt --all -- --check
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -2,9 +2,5 @@
|
||||
# will have compiled files and executables
|
||||
/target/
|
||||
|
||||
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
|
||||
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
|
||||
Cargo.lock
|
||||
|
||||
# These are backup files generated by rustfmt
|
||||
**/*.rs.bk
|
||||
|
||||
17
.travis.yml
17
.travis.yml
@@ -1,17 +0,0 @@
|
||||
language: rust
|
||||
cache: cargo
|
||||
|
||||
sudo: false
|
||||
|
||||
os:
|
||||
- linux
|
||||
- osx
|
||||
|
||||
rust:
|
||||
- stable
|
||||
- beta
|
||||
- nightly
|
||||
|
||||
script:
|
||||
- cargo build --verbose
|
||||
- cargo test --verbose
|
||||
1750
Cargo.lock
generated
Normal file
1750
Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load Diff
30
Cargo.toml
30
Cargo.toml
@@ -1,18 +1,30 @@
|
||||
[package]
|
||||
name = "monolith"
|
||||
version = "2.0.16"
|
||||
version = "2.3.1"
|
||||
edition = "2018"
|
||||
authors = [
|
||||
"Sunshine <sunshine@uberspace.net>",
|
||||
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
|
||||
"Emmanuel Delaborde <th3rac25@gmail.com>",
|
||||
"Emi Simpson <emi@alchemi.dev>",
|
||||
"rhysd <lin90162@yahoo.co.jp>",
|
||||
]
|
||||
description = "CLI tool for saving web pages as a single HTML file"
|
||||
|
||||
[dependencies]
|
||||
base64 = "0.10.1"
|
||||
clap = "2.33.0"
|
||||
html5ever = "0.24.0"
|
||||
indicatif = "0.11.0"
|
||||
lazy_static = "1.3.0"
|
||||
regex = "1.2.1"
|
||||
reqwest = "0.9.20"
|
||||
url = "2.1.0"
|
||||
base64 = "0.12.3"
|
||||
chrono = "0.4.13" # Used to render comments indicating the time the page was saved
|
||||
clap = "2.33.1"
|
||||
cssparser = "0.27.2"
|
||||
html5ever = "0.24.1"
|
||||
sha2 = "0.9.1" # Used in calculating checksums during integrity checks
|
||||
url = "2.1.1"
|
||||
|
||||
[dependencies.reqwest]
|
||||
version = "0.10.6"
|
||||
default-features = false
|
||||
features = ["default-tls", "blocking", "gzip"]
|
||||
|
||||
[dev-dependencies]
|
||||
assert_cmd = "1.0.1"
|
||||
tempfile = "3.1.0"
|
||||
|
||||
18
Dockerfile
Normal file
18
Dockerfile
Normal file
@@ -0,0 +1,18 @@
|
||||
FROM rust
|
||||
|
||||
WORKDIR /usr/local/src/
|
||||
RUN curl -s https://api.github.com/repos/y2z/monolith/releases/latest \
|
||||
| grep "tarball_url.*\"," \
|
||||
| cut -d '"' -f 4 \
|
||||
| wget -qi - -O monolith.tar.gz
|
||||
|
||||
RUN tar xfz monolith.tar.gz \
|
||||
&& mv Y2Z-monolith-* monolith \
|
||||
&& rm monolith.tar.gz
|
||||
|
||||
WORKDIR /usr/local/src/monolith
|
||||
RUN ls -a
|
||||
RUN make install
|
||||
|
||||
WORKDIR /tmp
|
||||
CMD ["/usr/local/cargo/bin/monolith"]
|
||||
29
Makefile
Normal file
29
Makefile
Normal file
@@ -0,0 +1,29 @@
|
||||
# Makefile for monolith
|
||||
|
||||
all: build
|
||||
.PHONY: all
|
||||
|
||||
build:
|
||||
@cargo build --locked
|
||||
.PHONY: build
|
||||
|
||||
test: build
|
||||
@cargo test --locked
|
||||
@cargo fmt --all -- --check
|
||||
.PHONY: test_code_formatting
|
||||
|
||||
lint:
|
||||
@cargo fmt --all --
|
||||
.PHONY: lint
|
||||
|
||||
install:
|
||||
@cargo install --force --locked --path .
|
||||
.PHONY: install
|
||||
|
||||
uninstall:
|
||||
@cargo uninstall
|
||||
.PHONY: uninstall
|
||||
|
||||
clean:
|
||||
@cargo clean
|
||||
.PHONY: clean
|
||||
92
README.md
92
README.md
@@ -1,34 +1,94 @@
|
||||
[](https://travis-ci.org/Y2Z/monolith)
|
||||
[](https://ci.appveyor.com/project/vflyson/monolith)
|
||||
[](https://github.com/Y2Z/monolith/actions?query=workflow%3AGNU%2FLinux)
|
||||
[](https://github.com/Y2Z/monolith/actions?query=workflow%3AmacOS)
|
||||
[](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows)
|
||||
|
||||
# monolith
|
||||
```
|
||||
_____ ______________ __________ ___________________ ___
|
||||
| \ / \ | | | | | |
|
||||
| \_/ __ \_| __ | | ___ ___ |__| |
|
||||
| | | | | | | | | | | |
|
||||
| |\ /| |__| _ |__| |____| | | | | __ |
|
||||
| | \___/ | | \ | | | | | | |
|
||||
|___| |__________| \_____________________| |___| |___| |___|
|
||||
```
|
||||
|
||||
A data hoarder's dream come true: bundle any web page into a single HTML file.
|
||||
You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
|
||||
A data hoarder’s dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
|
||||
|
||||
Unlike the conventional "Save page as", `monolith` not only saves the target document, it embeds CSS, image, and JavaScript assets **all at once**, producing a single HTML5 document that is a joy to store and share.
|
||||
Unlike the conventional “Save page as”, `monolith` not only saves the target document, it embeds CSS, image, and JavaScript assets **all at once**, producing a single HTML5 document that is a joy to store and share.
|
||||
|
||||
If compared to saving websites with `wget -mpk`, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available.
|
||||
|
||||
<!-- `This program works both on remote and local targets. -->
|
||||
---------------------------------------------------
|
||||
|
||||
## Installation
|
||||
|
||||
#### Via Homebrew (on macOS and GNU/Linux)
|
||||
$ brew install monolith
|
||||
|
||||
#### Using Snapcraft (on GNU/Linux)
|
||||
$ snap install monolith
|
||||
|
||||
#### Using Ports collection (on FreeBSD and TrueOS)
|
||||
$ cd /usr/ports/www/monolith/
|
||||
$ make install clean
|
||||
|
||||
#### Using pre-built binaries (Windows, ARM-based devices, etc)
|
||||
Every [release](https://github.com/Y2Z/monolith/releases) contains pre-built binaries for Windows, GNU/Linux, as well as platforms with non-standart CPU architecture.
|
||||
|
||||
#### From source
|
||||
|
||||
Dependency: `libssl-dev`
|
||||
|
||||
### Installation
|
||||
$ git clone https://github.com/Y2Z/monolith.git
|
||||
$ cd monolith
|
||||
$ cargo install --path .
|
||||
$ make install
|
||||
|
||||
### Usage
|
||||
$ monolith https://lyrics.github.io/db/p/portishead/dummy/roads/ > portishead-roads-lyrics.html
|
||||
#### Using Containers
|
||||
The guide can be found [here](docs/containers.md)
|
||||
|
||||
### Options
|
||||
---------------------------------------------------
|
||||
|
||||
## Usage
|
||||
$ monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roads-lyrics.html
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
## Options
|
||||
- `-c`: Ignore styles
|
||||
- `-f`: Exclude frames
|
||||
- `-F`: Omit web fonts
|
||||
- `-i`: Remove images
|
||||
- `-I`: Isolate the document
|
||||
- `-j`: Exclude JavaScript
|
||||
- `-k`: Accept invalid X.509 (TLS) certificates
|
||||
- `-s`: Silent mode
|
||||
- `-u`: Specify custom User-Agent
|
||||
- `-o`: Write output to file
|
||||
- `-s`: Be quiet
|
||||
- `-t`: Adjust network request timeout
|
||||
- `-u`: Provide custom User-Agent
|
||||
|
||||
### License
|
||||
---------------------------------------------------
|
||||
|
||||
## HTTPS and HTTP proxies
|
||||
Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables.
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
## Contributing
|
||||
Please open an issue if something is wrong, that helps make this project better.
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
## Related projects
|
||||
- `Monolith Chrome Extension`: https://github.com/rhysd/monolith-of-web
|
||||
- `Pagesaver`: https://github.com/distributed-mind/pagesaver
|
||||
- `Personal WayBack Machine`: https://github.com/popey/pwbm
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
## License
|
||||
The Unlicense
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
<!-- Microtext -->
|
||||
<sub>Keep in mind that `monolith` is not aware of your browser's session</sub>
|
||||
<sub>Keep in mind that `monolith` is not aware of your browser’s session</sub>
|
||||
|
||||
128
appveyor.yml
128
appveyor.yml
@@ -1,128 +0,0 @@
|
||||
# Appveyor configuration template for Rust using rustup for Rust installation
|
||||
# https://github.com/starkat99/appveyor-rust
|
||||
|
||||
## Operating System (VM environment) ##
|
||||
|
||||
# Rust needs at least Visual Studio 2013 Appveyor OS for MSVC targets.
|
||||
os: Visual Studio 2015
|
||||
|
||||
## Build Matrix ##
|
||||
|
||||
# This configuration will setup a build for each channel & target combination (12 windows
|
||||
# combinations in all).
|
||||
#
|
||||
# There are 3 channels: stable, beta, and nightly.
|
||||
#
|
||||
# Alternatively, the full version may be specified for the channel to build using that specific
|
||||
# version (e.g. channel: 1.5.0)
|
||||
#
|
||||
# The values for target are the set of windows Rust build targets. Each value is of the form
|
||||
#
|
||||
# ARCH-pc-windows-TOOLCHAIN
|
||||
#
|
||||
# Where ARCH is the target architecture, either x86_64 or i686, and TOOLCHAIN is the linker
|
||||
# toolchain to use, either msvc or gnu. See https://www.rust-lang.org/downloads.html#win-foot for
|
||||
# a description of the toolchain differences.
|
||||
# See https://github.com/rust-lang-nursery/rustup.rs/#toolchain-specification for description of
|
||||
# toolchains and host triples.
|
||||
#
|
||||
# Comment out channel/target combos you do not wish to build in CI.
|
||||
#
|
||||
# You may use the `cargoflags` and `RUSTFLAGS` variables to set additional flags for cargo commands
|
||||
# and rustc, respectively. For instance, you can uncomment the cargoflags lines in the nightly
|
||||
# channels to enable unstable features when building for nightly. Or you could add additional
|
||||
# matrix entries to test different combinations of features.
|
||||
environment:
|
||||
matrix:
|
||||
|
||||
### MSVC Toolchains ###
|
||||
|
||||
# Stable 64-bit MSVC
|
||||
- channel: stable
|
||||
target: x86_64-pc-windows-msvc
|
||||
# Stable 32-bit MSVC
|
||||
- channel: stable
|
||||
target: i686-pc-windows-msvc
|
||||
# Beta 64-bit MSVC
|
||||
- channel: beta
|
||||
target: x86_64-pc-windows-msvc
|
||||
# Beta 32-bit MSVC
|
||||
- channel: beta
|
||||
target: i686-pc-windows-msvc
|
||||
# Nightly 64-bit MSVC
|
||||
- channel: nightly
|
||||
target: x86_64-pc-windows-msvc
|
||||
#cargoflags: --features "unstable"
|
||||
# Nightly 32-bit MSVC
|
||||
- channel: nightly
|
||||
target: i686-pc-windows-msvc
|
||||
#cargoflags: --features "unstable"
|
||||
|
||||
### GNU Toolchains ###
|
||||
|
||||
# Stable 64-bit GNU
|
||||
- channel: stable
|
||||
target: x86_64-pc-windows-gnu
|
||||
MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
|
||||
# Stable 32-bit GNU
|
||||
- channel: stable
|
||||
target: i686-pc-windows-gnu
|
||||
MINGW_PATH: 'C:\MinGW\bin'
|
||||
# Beta 64-bit GNU
|
||||
- channel: beta
|
||||
target: x86_64-pc-windows-gnu
|
||||
MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
|
||||
# Beta 32-bit GNU
|
||||
- channel: beta
|
||||
target: i686-pc-windows-gnu
|
||||
MINGW_PATH: 'C:\MinGW\bin'
|
||||
# Nightly 64-bit GNU
|
||||
- channel: nightly
|
||||
target: x86_64-pc-windows-gnu
|
||||
MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
|
||||
#cargoflags: --features "unstable"
|
||||
# Nightly 32-bit GNU
|
||||
- channel: nightly
|
||||
target: i686-pc-windows-gnu
|
||||
MINGW_PATH: 'C:\MinGW\bin'
|
||||
#cargoflags: --features "unstable"
|
||||
|
||||
### Allowed failures ###
|
||||
|
||||
# See Appveyor documentation for specific details. In short, place any channel or targets you wish
|
||||
# to allow build failures on (usually nightly at least is a wise choice). This will prevent a build
|
||||
# or test failure in the matching channels/targets from failing the entire build.
|
||||
matrix:
|
||||
allow_failures:
|
||||
- channel: nightly
|
||||
|
||||
# If you only care about stable channel build failures, uncomment the following line:
|
||||
#- channel: beta
|
||||
|
||||
## Install Script ##
|
||||
|
||||
# This is the most important part of the Appveyor configuration. This installs the version of Rust
|
||||
# specified by the 'channel' and 'target' environment variables from the build matrix. This uses
|
||||
# rustup to install Rust.
|
||||
#
|
||||
# For simple configurations, instead of using the build matrix, you can simply set the
|
||||
# default-toolchain and default-host manually here.
|
||||
install:
|
||||
- appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
|
||||
- rustup-init -yv --default-toolchain %channel% --default-host %target%
|
||||
- set PATH=%PATH%;%USERPROFILE%\.cargo\bin
|
||||
- if defined MINGW_PATH set PATH=%PATH%;%MINGW_PATH%
|
||||
- rustc -vV
|
||||
- cargo -vV
|
||||
|
||||
## Build Script ##
|
||||
|
||||
# 'cargo test' takes care of building for us, so disable Appveyor's build stage. This prevents
|
||||
# the "directory does not contain a project or solution file" error.
|
||||
build: false
|
||||
|
||||
# Uses 'cargo test' to run tests and build. Alternatively, the project may call compiled programs
|
||||
#directly or perform other testing commands. Rust will automatically be placed in the PATH
|
||||
# environment variable.
|
||||
test_script:
|
||||
- cargo test --verbose %cargoflags%
|
||||
BIN
assets/icon/icon.blend
Normal file
BIN
assets/icon/icon.blend
Normal file
Binary file not shown.
BIN
assets/icon/icon.png
Normal file
BIN
assets/icon/icon.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.2 MiB |
19
docs/arch/0001-record-architecture-decisions.md
Normal file
19
docs/arch/0001-record-architecture-decisions.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# 1. Record architecture decisions
|
||||
|
||||
Date: 2019-12-25
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
We need to record the architectural decisions made on this project.
|
||||
|
||||
## Decision
|
||||
|
||||
We will use Architecture Decision Records, as [described by Michael Nygard](http://thinkrelevance.com/blog/2011/11/15/documenting-architecture-decisions).
|
||||
|
||||
## Consequences
|
||||
|
||||
See Michael Nygard's article, linked above. For a lightweight ADR toolset, see Nat Pryce's [adr-tools](https://github.com/npryce/adr-tools).
|
||||
19
docs/arch/0002-noscript-nodes.md
Normal file
19
docs/arch/0002-noscript-nodes.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# 2. NOSCRIPT nodes
|
||||
|
||||
Date: 2020-04-16
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
HTML pages can contain `noscript` nodes, which reveal their contents only in case when JavaScript is not available. Most of the time they contain hidden messages that inform about certain JavaScript-dependent features not being operational, however sometimes can also feature media assets or even iframes.
|
||||
|
||||
## Decision
|
||||
|
||||
When the document is being saved with or without JavaScript, each `noscript` node should be preserved while its children need to be processed exactly the same way as the rest of the document. This approach will ensure that even hidden remote assets are embedded — since those hidden elements may have to be displayed later in a browser that has JavaScript turned off. An option should be available to "unwrap" all `noscript` nodes in order to make their contents always visible in the document, complimenting the "disable JS" function of the program.
|
||||
|
||||
## Consequences
|
||||
|
||||
Saved documents will have contents of all `noscript` nodes processed as if they are part of the document's DOM, therefore properly display images encapsulated within `noscript` nodes when being viewed in browsers that have JavaScript turned off (or have no JavaScript support in the first place). The new option to "unwrap" `noscript` elements will help the user ensure that the resulting document always represents what the original web page looked like in a browser that had JavaScript turned off.
|
||||
21
docs/arch/0003-network-request-timeout.md
Normal file
21
docs/arch/0003-network-request-timeout.md
Normal file
@@ -0,0 +1,21 @@
|
||||
# 2. Network request timeout
|
||||
|
||||
Date: 2020-02-15
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
A slow network connection and overloaded server may negatively impact network response time.
|
||||
|
||||
## Decision
|
||||
|
||||
Make the program simulate behavior of popular web browsers and CLI tools, where the default network response timeout is most often set to 120 seconds.
|
||||
|
||||
Instead of featuring retries for timed out network requests, the program should have an option to adjust the timeout length, along with making it indefinite when given "0" as its value.
|
||||
|
||||
## Consequences
|
||||
|
||||
The user is able to retrieve resources that have long response time, as well as obtain full control over how soon, and if at all, network requests should time out.
|
||||
21
docs/arch/0004-asset-integrity-check.md
Normal file
21
docs/arch/0004-asset-integrity-check.md
Normal file
@@ -0,0 +1,21 @@
|
||||
# 4. Asset integrity check
|
||||
|
||||
Date: 2020-02-23
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
In HTML5, `link` and `script` nodes have an attribute named `integrity`, which lets the browser check if the remote file is valid, mostly for the purpose of enhancing page security.
|
||||
|
||||
## Decision
|
||||
|
||||
In order to replicate the browser's behavior, the program should perform integrity check the same way it does, excluding the linked asset from the final result if such check fails.
|
||||
|
||||
The `integrity` attribute should be removed from nodes, as it bears no benefit for resources embedded as data URLs.
|
||||
|
||||
## Consequences
|
||||
|
||||
Assets that fail to pass the check get excluded from the saved document. Meanwhile, saved documents no longer contain integrity attributes on all `link` and `script` nodes.
|
||||
19
docs/arch/0005-asset-minimization.md
Normal file
19
docs/arch/0005-asset-minimization.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# 4. Asset Minimization
|
||||
|
||||
Date: 2020-03-14
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
It may look like a good idea to make monolith compress retrieved assets while saving the page for the purpose of reducing the resulting document's file size.
|
||||
|
||||
## Decision
|
||||
|
||||
Given that the main purpose of this program is to save pages in a convenient to store and share manner — it's mostly an archiving tool, aside from being able to tell monolith to exclude certain types of asests (e.g. images, CSS, JavaScript), it would be outside of scope of this program to implement code for compressing assets. Minimizing files before embedding them does not reduce the amount of data that needs to be transferred either. A separate tool can be used later to compress and minimize pages saved by monolith, if needed.
|
||||
|
||||
## Consequences
|
||||
|
||||
Monolith will not support modification of original document assets for the purpose of reducing their size, sticking to performing only minimal amount of modifications to the original web page — whatever is needed to provide security or exclude unwanted asset types.
|
||||
19
docs/arch/0006-reload-and-location-meta-tags.md
Normal file
19
docs/arch/0006-reload-and-location-meta-tags.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# 4. Reload and location `meta` tags
|
||||
|
||||
Date: 2020-06-25
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
HTML documents may contain `meta` tags capable of automatically refreshing the page or redirecting to another location.
|
||||
|
||||
## Decision
|
||||
|
||||
Since the resulting document is saved to disk and generally not intended to be served over the network, it only makes sense to remove `meta` tags that have `http-equiv` attribute equal to "Refresh" or "Location", in order to prevent them from reloading the page or redirecting to another location.
|
||||
|
||||
## Consequences
|
||||
|
||||
Monolith will ensure that saved documents do not contain `meta` tags capable of changing location or reloading the page.
|
||||
15
docs/containers.md
Normal file
15
docs/containers.md
Normal file
@@ -0,0 +1,15 @@
|
||||
1. Run `docker build -t y2z/monolith .` to create a Docker image
|
||||
|
||||
2. Create a file named `monolith` which contains:
|
||||
```sh
|
||||
#!/bin/sh
|
||||
|
||||
docker run --rm \
|
||||
y2z/monolith \
|
||||
monolith \
|
||||
"$@"
|
||||
```
|
||||
3. Make the file executable (`chmod +x monolith`) and include it into your `$PATH`
|
||||
4. Now you should be able to run a containerized build of monolith like this:
|
||||
`monolith -I https://github.com > document.html`
|
||||
|
||||
3
docs/references.md
Normal file
3
docs/references.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# References
|
||||
|
||||
- https://content-security-policy.com/
|
||||
10
docs/web-apps.md
Normal file
10
docs/web-apps.md
Normal file
@@ -0,0 +1,10 @@
|
||||
# Web apps that can be saved with Monolith
|
||||
|
||||
These apps retain most or all of their functionality when saved with Monolith
|
||||
|
||||
|Converse|https://conversejs.org|
|
||||
|:--|:--|
|
||||
|Description|An XMPP client built using web technologies|
|
||||
|Functionality retained|**full**|
|
||||
|Command to use|`monolith https://conversejs.org/fullscreen.html > conversejs.html`|
|
||||
|Monolith version used|2.2.7|
|
||||
@@ -1,6 +1,7 @@
|
||||
name: monolith
|
||||
base: core18
|
||||
version: git
|
||||
# Version data defined inside the monolith part below
|
||||
adopt-info: monolith
|
||||
summary: Monolith - Save HTML pages with ease
|
||||
description: |
|
||||
A data hoarder's dream come true: bundle any web page into a single
|
||||
@@ -17,6 +18,14 @@ description: |
|
||||
|
||||
confinement: strict
|
||||
|
||||
architectures:
|
||||
- build-on: amd64
|
||||
- build-on: arm64
|
||||
- build-on: armhf
|
||||
- build-on: i386
|
||||
- build-on: ppc64el
|
||||
- build-on: s390x
|
||||
|
||||
parts:
|
||||
monolith:
|
||||
plugin: rust
|
||||
@@ -24,6 +33,21 @@ parts:
|
||||
build-packages:
|
||||
- libssl-dev
|
||||
- pkg-config
|
||||
override-pull: |
|
||||
snapcraftctl pull
|
||||
# Determine the current tag
|
||||
last_committed_tag="$(git describe --tags --abbrev=0)"
|
||||
last_committed_tag_ver="$(echo ${last_committed_tag} | sed 's/v//')"
|
||||
# Determine the most recent version in the beta channel in the Snap Store
|
||||
last_released_tag="$(snap info $SNAPCRAFT_PROJECT_NAME | awk '$1 == "beta:" { print $2 }')"
|
||||
# If the latest tag from the upstream project has not been released to
|
||||
# beta, build that tag instead of master.
|
||||
if [ "${last_committed_tag_ver}" != "${last_released_tag}" ]; then
|
||||
git fetch
|
||||
git checkout "${last_committed_tag}"
|
||||
fi
|
||||
# set version number of the snap based on what we did above
|
||||
snapcraftctl set-version $(git describe --tags --abbrev=0)
|
||||
|
||||
apps:
|
||||
monolith:
|
||||
|
||||
446
src/css.rs
Normal file
446
src/css.rs
Normal file
@@ -0,0 +1,446 @@
|
||||
use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::opts::Options;
|
||||
use crate::url::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_with_fragment};
|
||||
use crate::utils::retrieve_asset;
|
||||
|
||||
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
|
||||
// Universal
|
||||
"background",
|
||||
"background-image",
|
||||
"border-image",
|
||||
"border-image-source",
|
||||
"content",
|
||||
"cursor",
|
||||
"list-style",
|
||||
"list-style-image",
|
||||
"mask",
|
||||
"mask-image",
|
||||
// Specific to @counter-style
|
||||
"additive-symbols",
|
||||
"negative",
|
||||
"pad",
|
||||
"prefix",
|
||||
"suffix",
|
||||
"symbols",
|
||||
];
|
||||
const CSS_SPECIAL_CHARS: &str = "~!@$%^&*()+=,./'\";:?><[]{}|`#";
|
||||
|
||||
pub fn is_image_url_prop(prop_name: &str) -> bool {
|
||||
CSS_PROPS_WITH_IMAGE_URLS
|
||||
.iter()
|
||||
.find(|p| prop_name.eq_ignore_ascii_case(p))
|
||||
.is_some()
|
||||
}
|
||||
|
||||
pub fn enquote(input: String, double: bool) -> String {
|
||||
if double {
|
||||
format!("\"{}\"", input.replace("\"", "\\\""))
|
||||
} else {
|
||||
format!("'{}'", input.replace("'", "\\'"))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn escape(value: &str) -> String {
|
||||
let mut res = str!(&value);
|
||||
|
||||
res = res.replace("\\", "\\\\");
|
||||
|
||||
for c in CSS_SPECIAL_CHARS.chars() {
|
||||
res = res.replace(c, format!("\\{}", c).as_str());
|
||||
}
|
||||
|
||||
res
|
||||
}
|
||||
|
||||
pub fn process_css<'a>(
|
||||
cache: &mut HashMap<String, Vec<u8>>,
|
||||
client: &Client,
|
||||
parent_url: &str,
|
||||
parser: &mut Parser,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
rule_name: &str,
|
||||
prop_name: &str,
|
||||
func_name: &str,
|
||||
) -> Result<String, ParseError<'a, String>> {
|
||||
let mut result: String = str!();
|
||||
|
||||
let mut curr_rule: String = str!(rule_name.clone());
|
||||
let mut curr_prop: String = str!(prop_name.clone());
|
||||
let mut token: &Token;
|
||||
let mut token_offset: SourcePosition;
|
||||
|
||||
loop {
|
||||
token_offset = parser.position();
|
||||
token = match parser.next_including_whitespace_and_comments() {
|
||||
Ok(token) => token,
|
||||
Err(_) => {
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
match *token {
|
||||
Token::Comment(_) => {
|
||||
let token_slice = parser.slice_from(token_offset);
|
||||
result.push_str(str!(token_slice).as_str());
|
||||
}
|
||||
Token::Semicolon => result.push_str(";"),
|
||||
Token::Colon => result.push_str(":"),
|
||||
Token::Comma => result.push_str(","),
|
||||
Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => {
|
||||
if options.no_fonts && curr_rule == "font-face" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let closure: &str;
|
||||
if token == &Token::ParenthesisBlock {
|
||||
result.push_str("(");
|
||||
closure = ")";
|
||||
} else if token == &Token::SquareBracketBlock {
|
||||
result.push_str("[");
|
||||
closure = "]";
|
||||
} else {
|
||||
result.push_str("{");
|
||||
closure = "}";
|
||||
}
|
||||
|
||||
let block_css: String = parser
|
||||
.parse_nested_block(|parser| {
|
||||
process_css(
|
||||
cache,
|
||||
client,
|
||||
parent_url,
|
||||
parser,
|
||||
options,
|
||||
depth,
|
||||
rule_name,
|
||||
curr_prop.as_str(),
|
||||
func_name,
|
||||
)
|
||||
})
|
||||
.unwrap();
|
||||
result.push_str(block_css.as_str());
|
||||
|
||||
result.push_str(closure);
|
||||
}
|
||||
Token::CloseParenthesis => result.push_str(")"),
|
||||
Token::CloseSquareBracket => result.push_str("]"),
|
||||
Token::CloseCurlyBracket => result.push_str("}"),
|
||||
Token::IncludeMatch => result.push_str("~="),
|
||||
Token::DashMatch => result.push_str("|="),
|
||||
Token::PrefixMatch => result.push_str("^="),
|
||||
Token::SuffixMatch => result.push_str("$="),
|
||||
Token::SubstringMatch => result.push_str("*="),
|
||||
Token::CDO => result.push_str("<!--"),
|
||||
Token::CDC => result.push_str("-->"),
|
||||
Token::WhiteSpace(ref value) => {
|
||||
result.push_str(value);
|
||||
}
|
||||
// div...
|
||||
Token::Ident(ref value) => {
|
||||
curr_rule = str!();
|
||||
curr_prop = str!(value);
|
||||
result.push_str(&escape(value));
|
||||
}
|
||||
// @import, @font-face, @charset, @media...
|
||||
Token::AtKeyword(ref value) => {
|
||||
curr_rule = str!(value);
|
||||
if options.no_fonts && curr_rule == "font-face" {
|
||||
continue;
|
||||
}
|
||||
result.push_str("@");
|
||||
result.push_str(value);
|
||||
}
|
||||
Token::Hash(ref value) => {
|
||||
result.push_str("#");
|
||||
result.push_str(value);
|
||||
}
|
||||
Token::QuotedString(ref value) => {
|
||||
if curr_rule == "import" {
|
||||
// Reset current at-rule value
|
||||
curr_rule = str!();
|
||||
|
||||
// Skip empty import values
|
||||
if value.len() < 1 {
|
||||
result.push_str("''");
|
||||
continue;
|
||||
}
|
||||
|
||||
let import_full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||
let import_url_fragment = get_url_fragment(import_full_url.clone());
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&parent_url,
|
||||
&import_full_url,
|
||||
options.silent,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((import_contents, import_final_url, _import_media_type)) => {
|
||||
let import_data_url = data_to_data_url(
|
||||
"text/css",
|
||||
embed_css(
|
||||
cache,
|
||||
client,
|
||||
&import_final_url,
|
||||
&String::from_utf8_lossy(&import_contents),
|
||||
options,
|
||||
depth + 1,
|
||||
)
|
||||
.as_bytes(),
|
||||
&import_final_url,
|
||||
);
|
||||
let assembled_url: String = url_with_fragment(
|
||||
import_data_url.as_str(),
|
||||
import_url_fragment.as_str(),
|
||||
);
|
||||
result.push_str(enquote(assembled_url, false).as_str());
|
||||
}
|
||||
Err(_) => {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
if is_http_url(import_full_url.clone()) {
|
||||
let assembled_url: String = url_with_fragment(
|
||||
import_full_url.as_str(),
|
||||
import_url_fragment.as_str(),
|
||||
);
|
||||
result.push_str(enquote(assembled_url, false).as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if func_name == "url" {
|
||||
// Skip empty url()'s
|
||||
if value.len() < 1 {
|
||||
continue;
|
||||
}
|
||||
|
||||
if options.no_images && is_image_url_prop(curr_prop.as_str()) {
|
||||
result.push_str(enquote(str!(empty_image!()), false).as_str());
|
||||
} else {
|
||||
let resolved_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||
let url_fragment = get_url_fragment(resolved_url.clone());
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&parent_url,
|
||||
&resolved_url,
|
||||
options.silent,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((data, final_url, media_type)) => {
|
||||
let data_url = data_to_data_url(&media_type, &data, &final_url);
|
||||
let assembled_url: String =
|
||||
url_with_fragment(data_url.as_str(), url_fragment.as_str());
|
||||
result.push_str(enquote(assembled_url, false).as_str());
|
||||
}
|
||||
Err(_) => {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
if is_http_url(resolved_url.clone()) {
|
||||
let assembled_url: String = url_with_fragment(
|
||||
resolved_url.as_str(),
|
||||
url_fragment.as_str(),
|
||||
);
|
||||
result.push_str(enquote(assembled_url, false).as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result.push_str(enquote(str!(value), false).as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
Token::Number {
|
||||
ref has_sign,
|
||||
ref value,
|
||||
..
|
||||
} => {
|
||||
if *has_sign && *value >= 0. {
|
||||
result.push_str("+");
|
||||
}
|
||||
result.push_str(&value.to_string())
|
||||
}
|
||||
Token::Percentage {
|
||||
ref has_sign,
|
||||
ref unit_value,
|
||||
..
|
||||
} => {
|
||||
if *has_sign && *unit_value >= 0. {
|
||||
result.push_str("+");
|
||||
}
|
||||
result.push_str(str!(unit_value * 100.0).as_str());
|
||||
result.push_str("%");
|
||||
}
|
||||
Token::Dimension {
|
||||
ref has_sign,
|
||||
ref value,
|
||||
ref unit,
|
||||
..
|
||||
} => {
|
||||
if *has_sign && *value >= 0. {
|
||||
result.push_str("+");
|
||||
}
|
||||
result.push_str(str!(value).as_str());
|
||||
result.push_str(str!(unit).as_str());
|
||||
}
|
||||
// #selector, #id...
|
||||
Token::IDHash(ref value) => {
|
||||
curr_rule = str!();
|
||||
result.push_str("#");
|
||||
result.push_str(&escape(value));
|
||||
}
|
||||
Token::UnquotedUrl(ref value) => {
|
||||
let is_import: bool = curr_rule == "import";
|
||||
|
||||
if is_import {
|
||||
// Reset current at-rule value
|
||||
curr_rule = str!();
|
||||
}
|
||||
|
||||
// Skip empty url()'s
|
||||
if value.len() < 1 {
|
||||
result.push_str("url()");
|
||||
continue;
|
||||
} else if value.starts_with("#") {
|
||||
result.push_str("url(");
|
||||
result.push_str(value);
|
||||
result.push_str(")");
|
||||
continue;
|
||||
}
|
||||
|
||||
result.push_str("url(");
|
||||
if is_import {
|
||||
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||
let url_fragment = get_url_fragment(full_url.clone());
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&parent_url,
|
||||
&full_url,
|
||||
options.silent,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((css, final_url, _media_type)) => {
|
||||
let data_url = data_to_data_url(
|
||||
"text/css",
|
||||
embed_css(
|
||||
cache,
|
||||
client,
|
||||
&final_url,
|
||||
&String::from_utf8_lossy(&css),
|
||||
options,
|
||||
depth + 1,
|
||||
)
|
||||
.as_bytes(),
|
||||
&final_url,
|
||||
);
|
||||
let assembled_url: String =
|
||||
url_with_fragment(data_url.as_str(), url_fragment.as_str());
|
||||
result.push_str(enquote(assembled_url, false).as_str());
|
||||
}
|
||||
Err(_) => {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
if is_http_url(full_url.clone()) {
|
||||
let assembled_url: String =
|
||||
url_with_fragment(full_url.as_str(), url_fragment.as_str());
|
||||
result.push_str(enquote(assembled_url, false).as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if is_image_url_prop(curr_prop.as_str()) && options.no_images {
|
||||
result.push_str(enquote(str!(empty_image!()), false).as_str());
|
||||
} else {
|
||||
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||
let url_fragment = get_url_fragment(full_url.clone());
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&parent_url,
|
||||
&full_url,
|
||||
options.silent,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((data, final_url, media_type)) => {
|
||||
let data_url = data_to_data_url(&media_type, &data, &final_url);
|
||||
let assembled_url: String =
|
||||
url_with_fragment(data_url.as_str(), url_fragment.as_str());
|
||||
result.push_str(enquote(assembled_url, false).as_str());
|
||||
}
|
||||
Err(_) => {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
if is_http_url(full_url.clone()) {
|
||||
let assembled_url: String =
|
||||
url_with_fragment(full_url.as_str(), url_fragment.as_str());
|
||||
result.push_str(enquote(assembled_url, false).as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
result.push_str(")");
|
||||
}
|
||||
Token::Delim(ref value) => result.push_str(&value.to_string()),
|
||||
Token::Function(ref name) => {
|
||||
let function_name: &str = &name.clone();
|
||||
result.push_str(function_name);
|
||||
result.push_str("(");
|
||||
|
||||
let block_css: String = parser
|
||||
.parse_nested_block(|parser| {
|
||||
process_css(
|
||||
cache,
|
||||
client,
|
||||
parent_url,
|
||||
parser,
|
||||
options,
|
||||
depth,
|
||||
curr_rule.as_str(),
|
||||
curr_prop.as_str(),
|
||||
function_name,
|
||||
)
|
||||
})
|
||||
.unwrap();
|
||||
result.push_str(block_css.as_str());
|
||||
|
||||
result.push_str(")");
|
||||
}
|
||||
Token::BadUrl(_) | Token::BadString(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
// Ensure empty CSS is really empty
|
||||
if result.len() > 0 && result.trim().len() == 0 {
|
||||
result = result.trim().to_string()
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn embed_css(
|
||||
cache: &mut HashMap<String, Vec<u8>>,
|
||||
client: &Client,
|
||||
parent_url: &str,
|
||||
css: &str,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
) -> String {
|
||||
let mut input = ParserInput::new(&css);
|
||||
let mut parser = Parser::new(&mut input);
|
||||
|
||||
process_css(
|
||||
cache,
|
||||
client,
|
||||
parent_url,
|
||||
&mut parser,
|
||||
options,
|
||||
depth,
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
1669
src/html.rs
1669
src/html.rs
File diff suppressed because it is too large
Load Diff
179
src/http.rs
179
src/http.rs
@@ -1,179 +0,0 @@
|
||||
use regex::Regex;
|
||||
use reqwest::Client;
|
||||
use reqwest::header::{CONTENT_TYPE, USER_AGENT};
|
||||
use std::time::Duration;
|
||||
use url::{ParseError, Url};
|
||||
use utils::data_to_dataurl;
|
||||
|
||||
lazy_static! {
|
||||
static ref REGEX_URL: Regex = Regex::new(r"^https?://").unwrap();
|
||||
}
|
||||
|
||||
pub fn is_data_url(url: &str) -> Result<bool, ParseError> {
|
||||
match Url::parse(url) {
|
||||
Ok(parsed_url) => Ok(parsed_url.scheme() == "data"),
|
||||
Err(err) => Err(err),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_valid_url(path: &str) -> bool {
|
||||
REGEX_URL.is_match(path)
|
||||
}
|
||||
|
||||
pub fn resolve_url(from: &str, to: &str) -> Result<String, ParseError> {
|
||||
let result = if is_valid_url(to) {
|
||||
// (anything, http://site.com/css/main.css)
|
||||
to.to_string()
|
||||
} else {
|
||||
Url::parse(from)?.join(to)?.to_string()
|
||||
};
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn retrieve_asset(
|
||||
url: &str,
|
||||
as_dataurl: bool,
|
||||
as_mime: &str,
|
||||
opt_user_agent: &str,
|
||||
opt_silent: bool,
|
||||
opt_insecure: bool,
|
||||
) -> Result<String, reqwest::Error> {
|
||||
if is_data_url(&url).unwrap() {
|
||||
Ok(url.to_string())
|
||||
} else {
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(10))
|
||||
.danger_accept_invalid_certs(opt_insecure)
|
||||
.build()?;
|
||||
let mut response = client
|
||||
.get(url)
|
||||
.header(USER_AGENT, opt_user_agent)
|
||||
.send()?;
|
||||
let final_url = response.url().as_str();
|
||||
|
||||
if !opt_silent {
|
||||
if url == final_url {
|
||||
eprintln!("[ {} ]", &url);
|
||||
} else {
|
||||
eprintln!("[ {} -> {} ]", &url, &final_url);
|
||||
}
|
||||
}
|
||||
|
||||
if as_dataurl {
|
||||
// Convert response into a byte array
|
||||
let mut data: Vec<u8> = vec![];
|
||||
response.copy_to(&mut data)?;
|
||||
|
||||
// Attempt to obtain MIME type by reading the Content-Type header
|
||||
let mimetype = if as_mime == "" {
|
||||
response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|header| header.to_str().ok())
|
||||
.unwrap_or(&as_mime)
|
||||
} else {
|
||||
as_mime
|
||||
};
|
||||
|
||||
Ok(data_to_dataurl(&mimetype, &data))
|
||||
} else {
|
||||
Ok(response.text().unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_is_valid_url() {
|
||||
assert!(is_valid_url("https://www.rust-lang.org/"));
|
||||
assert!(is_valid_url("http://kernel.org"));
|
||||
assert!(!is_valid_url("./index.html"));
|
||||
assert!(!is_valid_url("some-local-page.htm"));
|
||||
assert!(!is_valid_url("ftp://1.2.3.4/www/index.html"));
|
||||
assert!(!is_valid_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_url() -> Result<(), ParseError> {
|
||||
let resolved_url = resolve_url(
|
||||
"https://www.kernel.org",
|
||||
"../category/signatures.html",
|
||||
)?;
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
"https://www.kernel.org",
|
||||
"category/signatures.html",
|
||||
)?;
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
"saved_page.htm",
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
)?;
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
"https://www.kernel.org",
|
||||
"//www.kernel.org/theme/images/logos/tux.png",
|
||||
)?;
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
"https://www.kernel.org",
|
||||
"//another-host.org/theme/images/logos/tux.png",
|
||||
)?;
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://another-host.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
"/theme/images/logos/tux.png",
|
||||
)?;
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
"https://www.w3schools.com/html/html_iframe.asp",
|
||||
"default.asp",
|
||||
)?;
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.w3schools.com/html/default.asp"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_data_url() {
|
||||
assert!(
|
||||
is_data_url("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
|
||||
.unwrap_or(false)
|
||||
);
|
||||
assert!(!is_data_url("https://kernel.org").unwrap_or(false));
|
||||
assert!(!is_data_url("//kernel.org").unwrap_or(false));
|
||||
}
|
||||
}
|
||||
103
src/js.rs
Normal file
103
src/js.rs
Normal file
@@ -0,0 +1,103 @@
|
||||
const JS_DOM_EVENT_ATTRS: &[&str] = &[
|
||||
// From WHATWG HTML spec 8.1.5.2 "Event handlers on elements, Document objects, and Window objects":
|
||||
// https://html.spec.whatwg.org/#event-handlers-on-elements,-document-objects,-and-window-objects
|
||||
// https://html.spec.whatwg.org/#attributes-3 (table "List of event handler content attributes")
|
||||
|
||||
// Global event handlers
|
||||
"onabort",
|
||||
"onauxclick",
|
||||
"onblur",
|
||||
"oncancel",
|
||||
"oncanplay",
|
||||
"oncanplaythrough",
|
||||
"onchange",
|
||||
"onclick",
|
||||
"onclose",
|
||||
"oncontextmenu",
|
||||
"oncuechange",
|
||||
"ondblclick",
|
||||
"ondrag",
|
||||
"ondragend",
|
||||
"ondragenter",
|
||||
"ondragexit",
|
||||
"ondragleave",
|
||||
"ondragover",
|
||||
"ondragstart",
|
||||
"ondrop",
|
||||
"ondurationchange",
|
||||
"onemptied",
|
||||
"onended",
|
||||
"onerror",
|
||||
"onfocus",
|
||||
"onformdata",
|
||||
"oninput",
|
||||
"oninvalid",
|
||||
"onkeydown",
|
||||
"onkeypress",
|
||||
"onkeyup",
|
||||
"onload",
|
||||
"onloadeddata",
|
||||
"onloadedmetadata",
|
||||
"onloadstart",
|
||||
"onmousedown",
|
||||
"onmouseenter",
|
||||
"onmouseleave",
|
||||
"onmousemove",
|
||||
"onmouseout",
|
||||
"onmouseover",
|
||||
"onmouseup",
|
||||
"onwheel",
|
||||
"onpause",
|
||||
"onplay",
|
||||
"onplaying",
|
||||
"onprogress",
|
||||
"onratechange",
|
||||
"onreset",
|
||||
"onresize",
|
||||
"onscroll",
|
||||
"onsecuritypolicyviolation",
|
||||
"onseeked",
|
||||
"onseeking",
|
||||
"onselect",
|
||||
"onslotchange",
|
||||
"onstalled",
|
||||
"onsubmit",
|
||||
"onsuspend",
|
||||
"ontimeupdate",
|
||||
"ontoggle",
|
||||
"onvolumechange",
|
||||
"onwaiting",
|
||||
"onwebkitanimationend",
|
||||
"onwebkitanimationiteration",
|
||||
"onwebkitanimationstart",
|
||||
"onwebkittransitionend",
|
||||
// Event handlers for <body/> and <frameset/> elements
|
||||
"onafterprint",
|
||||
"onbeforeprint",
|
||||
"onbeforeunload",
|
||||
"onhashchange",
|
||||
"onlanguagechange",
|
||||
"onmessage",
|
||||
"onmessageerror",
|
||||
"onoffline",
|
||||
"ononline",
|
||||
"onpagehide",
|
||||
"onpageshow",
|
||||
"onpopstate",
|
||||
"onrejectionhandled",
|
||||
"onstorage",
|
||||
"onunhandledrejection",
|
||||
"onunload",
|
||||
// Event handlers for <html/> element
|
||||
"oncut",
|
||||
"oncopy",
|
||||
"onpaste",
|
||||
];
|
||||
|
||||
// Returns true if DOM attribute name matches a native JavaScript event handler
|
||||
pub fn attr_is_event_handler(attr_name: &str) -> bool {
|
||||
JS_DOM_EVENT_ATTRS
|
||||
.iter()
|
||||
.find(|a| attr_name.eq_ignore_ascii_case(a))
|
||||
.is_some()
|
||||
}
|
||||
17
src/lib.rs
17
src/lib.rs
@@ -1,10 +1,15 @@
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
extern crate html5ever;
|
||||
extern crate regex;
|
||||
extern crate reqwest;
|
||||
extern crate url;
|
||||
extern crate clap;
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
|
||||
pub mod css;
|
||||
pub mod html;
|
||||
pub mod http;
|
||||
pub mod js;
|
||||
pub mod opts;
|
||||
pub mod url;
|
||||
pub mod utils;
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests;
|
||||
|
||||
17
src/macros.rs
Normal file
17
src/macros.rs
Normal file
@@ -0,0 +1,17 @@
|
||||
#[macro_export]
|
||||
macro_rules! str {
|
||||
() => {
|
||||
String::new()
|
||||
};
|
||||
($val: expr) => {
|
||||
ToString::to_string(&$val)
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! empty_image {
|
||||
() => {
|
||||
"data:image/png;base64,\
|
||||
iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII="
|
||||
};
|
||||
}
|
||||
240
src/main.rs
240
src/main.rs
@@ -1,63 +1,191 @@
|
||||
#[macro_use]
|
||||
extern crate clap;
|
||||
extern crate monolith;
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::io::{self, Error, Write};
|
||||
use std::path::Path;
|
||||
use std::process;
|
||||
use std::time::Duration;
|
||||
|
||||
use clap::{App, Arg};
|
||||
use monolith::html::{html_to_dom, print_dom, walk_and_embed_assets};
|
||||
use monolith::http::{is_valid_url, retrieve_asset};
|
||||
use monolith::html::{
|
||||
add_base_tag, add_favicon, has_base_tag, has_favicon, html_to_dom, metadata_tag,
|
||||
stringify_document, walk_and_embed_assets,
|
||||
};
|
||||
use monolith::opts::Options;
|
||||
use monolith::url::{
|
||||
data_to_data_url, data_url_to_data, is_data_url, is_file_url, is_http_url, resolve_url,
|
||||
};
|
||||
use monolith::utils::retrieve_asset;
|
||||
|
||||
static DEFAULT_USER_AGENT: &str =
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0";
|
||||
mod macros;
|
||||
|
||||
fn main() {
|
||||
let command = App::new("monolith")
|
||||
.version(crate_version!())
|
||||
.author(crate_authors!("\n"))
|
||||
.about(crate_description!())
|
||||
.arg(
|
||||
Arg::with_name("url")
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
.index(1)
|
||||
.help("URL to download"),
|
||||
)
|
||||
.args_from_usage("-i, --no-images 'Removes images'")
|
||||
.args_from_usage("-j, --no-js 'Excludes JavaScript'")
|
||||
.args_from_usage("-k, --insecure 'Accept invalid X.509 (TLS) certificates'")
|
||||
.args_from_usage("-s, --silent 'Suppress verbosity'")
|
||||
.args_from_usage("-u, --user-agent=[Iceweasel] 'Custom User-Agent string'")
|
||||
.get_matches();
|
||||
enum Output {
|
||||
Stdout(io::Stdout),
|
||||
File(fs::File),
|
||||
}
|
||||
|
||||
// Process the command
|
||||
let arg_target = command.value_of("url").unwrap();
|
||||
let opt_no_images = command.is_present("no-images");
|
||||
let opt_no_js = command.is_present("no-js");
|
||||
let opt_insecure = command.is_present("insecure");
|
||||
let opt_silent = command.is_present("silent");
|
||||
let opt_user_agent = command.value_of("user-agent").unwrap_or(DEFAULT_USER_AGENT);
|
||||
impl Output {
|
||||
fn new(file_path: &str) -> Result<Output, Error> {
|
||||
if file_path.is_empty() || file_path.eq("-") {
|
||||
Ok(Output::Stdout(io::stdout()))
|
||||
} else {
|
||||
Ok(Output::File(fs::File::create(file_path)?))
|
||||
}
|
||||
}
|
||||
|
||||
if is_valid_url(arg_target) {
|
||||
let data = retrieve_asset(
|
||||
&arg_target,
|
||||
false,
|
||||
"",
|
||||
opt_user_agent,
|
||||
opt_silent,
|
||||
opt_insecure,
|
||||
).unwrap();
|
||||
let dom = html_to_dom(&data);
|
||||
|
||||
walk_and_embed_assets(
|
||||
&arg_target,
|
||||
&dom.document,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_user_agent,
|
||||
opt_silent,
|
||||
opt_insecure,
|
||||
);
|
||||
|
||||
print_dom(&dom.document);
|
||||
println!(); // Ensure newline at end of output
|
||||
fn writeln_str(&mut self, s: &str) -> Result<(), Error> {
|
||||
match self {
|
||||
Output::Stdout(stdout) => {
|
||||
writeln!(stdout, "{}", s)?;
|
||||
stdout.flush()
|
||||
}
|
||||
Output::File(f) => {
|
||||
writeln!(f, "{}", s)?;
|
||||
f.flush()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let options = Options::from_args();
|
||||
let original_target: &str = &options.target;
|
||||
let target_url: &str;
|
||||
let base_url;
|
||||
let mut dom;
|
||||
|
||||
// Pre-process the input
|
||||
let cwd_normalized: String =
|
||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||
let path = Path::new(original_target);
|
||||
let mut target: String = str!(original_target.clone()).replace("\\", "/");
|
||||
let path_is_relative: bool = path.is_relative();
|
||||
|
||||
// Determine exact target URL
|
||||
if target.clone().len() == 0 {
|
||||
eprintln!("No target specified");
|
||||
process::exit(1);
|
||||
} else if is_http_url(target.clone()) || is_data_url(target.clone()) {
|
||||
target_url = target.as_str();
|
||||
} else if is_file_url(target.clone()) {
|
||||
target_url = target.as_str();
|
||||
} else if path.exists() {
|
||||
if !path.is_file() {
|
||||
eprintln!("Local target is not a file: {}", original_target);
|
||||
process::exit(1);
|
||||
}
|
||||
target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" });
|
||||
if path_is_relative {
|
||||
target.insert_str(if cfg!(windows) { 8 } else { 7 }, &cwd_normalized);
|
||||
target.insert_str(
|
||||
if cfg!(windows) { 8 } else { 7 } + &cwd_normalized.len(),
|
||||
"/",
|
||||
);
|
||||
}
|
||||
target_url = target.as_str();
|
||||
} else {
|
||||
target.insert_str(0, "http://");
|
||||
target_url = target.as_str();
|
||||
}
|
||||
|
||||
// Define output
|
||||
let mut output = Output::new(&options.output).expect("Could not prepare output");
|
||||
|
||||
// Initialize client
|
||||
let mut cache = HashMap::new();
|
||||
let mut header_map = HeaderMap::new();
|
||||
header_map.insert(
|
||||
USER_AGENT,
|
||||
HeaderValue::from_str(&options.user_agent).expect("Invalid User-Agent header specified"),
|
||||
);
|
||||
let timeout: u64 = if options.timeout > 0 {
|
||||
options.timeout
|
||||
} else {
|
||||
std::u64::MAX / 4
|
||||
};
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(timeout))
|
||||
.danger_accept_invalid_certs(options.insecure)
|
||||
.default_headers(header_map)
|
||||
.build()
|
||||
.expect("Failed to initialize HTTP client");
|
||||
|
||||
// Retrieve target document
|
||||
if is_file_url(target_url) || is_http_url(target_url) {
|
||||
match retrieve_asset(
|
||||
&mut cache,
|
||||
&client,
|
||||
target_url,
|
||||
target_url,
|
||||
options.silent,
|
||||
0,
|
||||
) {
|
||||
Ok((data, final_url, _media_type)) => {
|
||||
base_url = final_url;
|
||||
dom = html_to_dom(&String::from_utf8_lossy(&data));
|
||||
}
|
||||
Err(_) => {
|
||||
eprintln!("Could not retrieve target document");
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
} else if is_data_url(target_url) {
|
||||
let (media_type, data): (String, Vec<u8>) = data_url_to_data(target_url);
|
||||
if !media_type.eq_ignore_ascii_case("text/html") {
|
||||
eprintln!("Unsupported data URL media type");
|
||||
process::exit(1);
|
||||
}
|
||||
base_url = str!(target_url);
|
||||
dom = html_to_dom(&String::from_utf8_lossy(&data));
|
||||
} else {
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
// Embed remote assets
|
||||
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
|
||||
|
||||
// Take care of BASE tag
|
||||
if is_http_url(base_url.clone()) && !has_base_tag(&dom.document) {
|
||||
dom = add_base_tag(&dom.document, base_url.clone());
|
||||
}
|
||||
|
||||
// Request and embed /favicon.ico (unless it's already linked in the document)
|
||||
if !options.no_images && is_http_url(target_url) && !has_favicon(&dom.document) {
|
||||
let favicon_ico_url: String = resolve_url(&base_url, "/favicon.ico").unwrap();
|
||||
|
||||
match retrieve_asset(
|
||||
&mut cache,
|
||||
&client,
|
||||
&base_url,
|
||||
&favicon_ico_url,
|
||||
options.silent,
|
||||
0,
|
||||
) {
|
||||
Ok((data, final_url, media_type)) => {
|
||||
let favicon_data_url: String = data_to_data_url(&media_type, &data, &final_url);
|
||||
dom = add_favicon(&dom.document, favicon_data_url);
|
||||
}
|
||||
Err(_) => {
|
||||
// Failed to retrieve favicon.ico
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Serialize DOM tree
|
||||
let mut result: String = stringify_document(&dom.document, &options);
|
||||
|
||||
// Add metadata tag
|
||||
if !options.no_metadata {
|
||||
let metadata_comment: String = metadata_tag(&base_url);
|
||||
result.insert_str(0, &metadata_comment);
|
||||
if metadata_comment.len() > 0 {
|
||||
result.insert_str(metadata_comment.len(), "\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Write result into stdout or file
|
||||
output
|
||||
.writeln_str(&result)
|
||||
.expect("Could not write HTML output");
|
||||
}
|
||||
|
||||
90
src/opts.rs
Normal file
90
src/opts.rs
Normal file
@@ -0,0 +1,90 @@
|
||||
use clap::{App, Arg};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Options {
|
||||
pub target: String,
|
||||
pub no_css: bool,
|
||||
pub no_fonts: bool,
|
||||
pub no_frames: bool,
|
||||
pub no_images: bool,
|
||||
pub no_js: bool,
|
||||
pub insecure: bool,
|
||||
pub isolate: bool,
|
||||
pub output: String,
|
||||
pub silent: bool,
|
||||
pub timeout: u64,
|
||||
pub user_agent: String,
|
||||
pub no_metadata: bool,
|
||||
}
|
||||
|
||||
const ASCII: &str = " \
|
||||
_____ ______________ __________ ___________________ ___
|
||||
| \\ / \\ | | | | | |
|
||||
| \\_/ __ \\_| __ | | ___ ___ |__| |
|
||||
| | | | | | | | | | | |
|
||||
| |\\ /| |__| _ |__| |____| | | | | __ |
|
||||
| | \\___/ | | \\ | | | | | | |
|
||||
|___| |__________| \\_____________________| |___| |___| |___|
|
||||
";
|
||||
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
|
||||
const DEFAULT_USER_AGENT: &str =
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
|
||||
|
||||
impl Options {
|
||||
pub fn from_args() -> Options {
|
||||
let app = App::new(env!("CARGO_PKG_NAME"))
|
||||
.version(crate_version!())
|
||||
.author(crate_authors!("\n"))
|
||||
.about(format!("{}\n{}", ASCII, crate_description!()).as_str())
|
||||
.arg(
|
||||
Arg::with_name("target")
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
.index(1)
|
||||
.help("URL or file path"),
|
||||
)
|
||||
// .args_from_usage("-a, --no-audio 'Removes audio sources'")
|
||||
.args_from_usage("-c, --no-css 'Removes CSS'")
|
||||
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
|
||||
.args_from_usage("-F, --no-fonts 'Removes fonts'")
|
||||
.args_from_usage("-i, --no-images 'Removes images'")
|
||||
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
|
||||
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
||||
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
|
||||
.args_from_usage("-M, --no-metadata 'Excludes metadata information from the document'")
|
||||
.args_from_usage("-o, --output=[document.html] 'Write output to <file>'")
|
||||
.args_from_usage("-s, --silent 'Suppresses verbosity'")
|
||||
.args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'")
|
||||
.args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'")
|
||||
// .args_from_usage("-v, --no-video 'Removes video sources'")
|
||||
.get_matches();
|
||||
let mut options: Options = Options::default();
|
||||
|
||||
// Process the command
|
||||
options.target = app
|
||||
.value_of("target")
|
||||
.expect("please set target")
|
||||
.to_string();
|
||||
options.no_css = app.is_present("no-css");
|
||||
options.no_frames = app.is_present("no-frames");
|
||||
options.no_fonts = app.is_present("no-fonts");
|
||||
options.no_images = app.is_present("no-images");
|
||||
options.isolate = app.is_present("isolate");
|
||||
options.no_js = app.is_present("no-js");
|
||||
options.insecure = app.is_present("insecure");
|
||||
options.no_metadata = app.is_present("no-metadata");
|
||||
options.output = app.value_of("output").unwrap_or("").to_string();
|
||||
options.silent = app.is_present("silent");
|
||||
options.timeout = app
|
||||
.value_of("timeout")
|
||||
.unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string())
|
||||
.parse::<u64>()
|
||||
.unwrap();
|
||||
options.user_agent = app
|
||||
.value_of("user-agent")
|
||||
.unwrap_or(DEFAULT_USER_AGENT)
|
||||
.to_string();
|
||||
|
||||
options
|
||||
}
|
||||
}
|
||||
559
src/tests/cli.rs
Normal file
559
src/tests/cli.rs
Normal file
@@ -0,0 +1,559 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use assert_cmd::prelude::*;
|
||||
use std::env;
|
||||
use std::io::Write;
|
||||
use std::process::Command;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
#[test]
|
||||
fn print_version() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd.arg("-V").output().unwrap();
|
||||
|
||||
// STDOUT should contain program name and version
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bad_input_empty_target() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd.arg("").output().unwrap();
|
||||
|
||||
// STDOUT should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
|
||||
|
||||
// STDERR should contain error description
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
"No target specified\n"
|
||||
);
|
||||
|
||||
// The exit code should be 1
|
||||
out.assert().code(1);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
|
||||
|
||||
// STDOUT should contain HTML
|
||||
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
|
||||
|
||||
// STDERR should contain error description
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
"Unsupported data URL media type\n"
|
||||
);
|
||||
|
||||
// The exit code should be 1
|
||||
out.assert().code(1);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-I")
|
||||
.arg("data:text/html,Hello%2C%20World!")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain isolated HTML
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
|
||||
</head><body>Hello, World!</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-c")
|
||||
.arg("data:text/html,<style>body{background-color:pink}</style>Hello")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no CSS
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
|
||||
<style></style>\
|
||||
</head><body>Hello</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_fonts_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-F")
|
||||
.arg("data:text/html,<style>@font-face { font-family: myFont; src: url(font.woff); }</style>Hi")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no web fonts
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"font-src 'none';\"></meta>\
|
||||
<style></style>\
|
||||
</head><body>Hi</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-f")
|
||||
.arg("data:text/html,<iframe src=\"https://duckduckgo.com\"></iframe>Hi")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no iframes
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
|
||||
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-i")
|
||||
.arg("data:text/html,<img src=\"https://google.com\"/>Hi")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no images
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
format!(
|
||||
"<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\
|
||||
</head>\
|
||||
<body>\
|
||||
<img src=\"{empty_image}\">\
|
||||
Hi\
|
||||
</body>\
|
||||
</html>\n",
|
||||
empty_image = empty_image!()
|
||||
)
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-j")
|
||||
.arg("data:text/html,<script>alert(2)</script>Hi")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no JS
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
|
||||
<script></script></head>\
|
||||
<body>Hi</body>\
|
||||
</html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let cwd_normalized: String =
|
||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg(if cfg!(windows) {
|
||||
"src\\tests\\data\\basic\\local-file.html"
|
||||
} else {
|
||||
"src/tests/data/basic/local-file.html"
|
||||
})
|
||||
.output()
|
||||
.unwrap();
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
|
||||
// STDOUT should contain HTML from the local file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"\
|
||||
<!DOCTYPE html><html lang=\"en\"><head>\n \
|
||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||
<title>Local HTML file</title>\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\" href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\">\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||
<img alt=\"\">\n \
|
||||
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
|
||||
</body></html>\n\
|
||||
"
|
||||
);
|
||||
|
||||
// STDERR should contain list of retrieved file URLs
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
{file}{cwd}/src/tests/data/basic/local-file.html\n \
|
||||
{file}{cwd}/src/tests/data/basic/local-style.css\n \
|
||||
{file}{cwd}/src/tests/data/basic/local-script.js\n\
|
||||
",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_file_target_input_absolute_target_path() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let cwd = env::current_dir().unwrap();
|
||||
let cwd_normalized: String =
|
||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-jciI")
|
||||
.arg(if cfg!(windows) {
|
||||
format!(
|
||||
"{cwd}\\src\\tests\\data\\basic\\local-file.html",
|
||||
cwd = cwd.to_str().unwrap()
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{cwd}/src/tests/data/basic/local-file.html",
|
||||
cwd = cwd.to_str().unwrap()
|
||||
)
|
||||
})
|
||||
.output()
|
||||
.unwrap();
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
|
||||
// STDOUT should contain HTML from the local file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
<!DOCTYPE html><html lang=\"en\"><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
|
||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||
<title>Local HTML file</title>\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\">\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||
<img alt=\"\" src=\"{empty_image}\">\n \
|
||||
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||
<script></script>\n\n\n\n\
|
||||
</body></html>\n\
|
||||
",
|
||||
empty_image = empty_image!()
|
||||
)
|
||||
);
|
||||
|
||||
// STDERR should contain only the target file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_file_url_target_input() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let cwd_normalized: String =
|
||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-cji")
|
||||
.arg(if cfg!(windows) {
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-file.html",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-file.html",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
})
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML from the local file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
<!DOCTYPE html><html lang=\"en\"><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
|
||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||
<title>Local HTML file</title>\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\">\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||
<img alt=\"\" src=\"{empty_image}\">\n \
|
||||
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||
<script></script>\n\n\n\n\
|
||||
</body></html>\n\
|
||||
",
|
||||
empty_image = empty_image!()
|
||||
)
|
||||
);
|
||||
|
||||
// STDERR should contain list of retrieved file URLs
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
if cfg!(windows) {
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
}
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn security_disallow_local_assets_within_data_url_targets(
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/basic/local-script.js\"%3E%3C/script%3E")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no JS in it
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head><script></script></head><body></body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn embed_file_url_local_asset_within_style_attribute() -> Result<(), Box<dyn std::error::Error>>
|
||||
{
|
||||
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let mut file_svg = NamedTempFile::new()?;
|
||||
writeln!(file_svg, "<svg version=\"1.1\" baseProfile=\"full\" width=\"300\" height=\"200\" xmlns=\"http://www.w3.org/2000/svg\">\
|
||||
<rect width=\"100%\" height=\"100%\" fill=\"red\" />\
|
||||
<circle cx=\"150\" cy=\"100\" r=\"80\" fill=\"green\" />\
|
||||
<text x=\"150\" y=\"125\" font-size=\"60\" text-anchor=\"middle\" fill=\"white\">SVG</text>\
|
||||
</svg>\n")?;
|
||||
let mut file_html = NamedTempFile::new()?;
|
||||
writeln!(
|
||||
file_html,
|
||||
"<div style='background-image: url(\"{file}{path}\")'></div>\n",
|
||||
file = file_url_prefix,
|
||||
path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
|
||||
)?;
|
||||
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
|
||||
|
||||
// STDOUT should contain HTML with date URL for background-image in it
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head></head><body><div style=\"background-image: url('data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iMTAwJSIgaGVpZ2h0PSIxMDAlIiBmaWxsPSJyZWQiIC8+PGNpcmNsZSBjeD0iMTUwIiBjeT0iMTAwIiByPSI4MCIgZmlsbD0iZ3JlZW4iIC8+PHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+PC9zdmc+Cgo=')\"></div>\n\n</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should list temporary files that got retrieved
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
{file}{html_path}\n \
|
||||
{file}{svg_path}\n\
|
||||
",
|
||||
file = file_url_prefix,
|
||||
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
|
||||
svg_path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn css_import_string() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let mut file_css = NamedTempFile::new()?;
|
||||
writeln!(file_css, "body{{background-color:#000;color:#fff}}")?;
|
||||
let mut file_html = NamedTempFile::new()?;
|
||||
writeln!(
|
||||
file_html,
|
||||
"\
|
||||
<style>\n\
|
||||
@charset 'UTF-8';\n\
|
||||
\n\
|
||||
@import '{file}{css_path}';\n\
|
||||
\n\
|
||||
@import url({file}{css_path});\n\
|
||||
\n\
|
||||
@import url('{file}{css_path}')\n\
|
||||
</style>\n\
|
||||
",
|
||||
file = file_url_prefix,
|
||||
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
|
||||
)?;
|
||||
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
|
||||
|
||||
// STDOUT should contain embedded CSS url()'s
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head><style>\n@charset 'UTF-8';\n\n@import 'data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K';\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K');\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K')\n</style>\n\n</head><body></body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should list temporary files that got retrieved
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
{file}{html_path}\n \
|
||||
{file}{css_path}\n \
|
||||
{file}{css_path}\n \
|
||||
{file}{css_path}\n\
|
||||
",
|
||||
file = file_url_prefix,
|
||||
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
|
||||
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
350
src/tests/css/embed_css.rs
Normal file
350
src/tests/css/embed_css.rs
Normal file
@@ -0,0 +1,350 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::css;
|
||||
use crate::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn empty_input() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let options = Options::default();
|
||||
|
||||
assert_eq!(css::embed_css(cache, &client, "", "", &options, 0), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn trim_if_empty() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let options = Options::default();
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(
|
||||
cache,
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
"\t \t ",
|
||||
&options,
|
||||
0,
|
||||
),
|
||||
""
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn style_exclude_unquoted_images() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
|
||||
const STYLE: &str = "/* border: none;*/\
|
||||
background-image: url(https://somewhere.com/bg.png); \
|
||||
list-style: url(/assets/images/bullet.svg);\
|
||||
width:99.998%; \
|
||||
margin-top: -20px; \
|
||||
line-height: -1; \
|
||||
height: calc(100vh - 10pt)";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(
|
||||
cache,
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&STYLE,
|
||||
&options,
|
||||
0,
|
||||
),
|
||||
format!(
|
||||
"/* border: none;*/\
|
||||
background-image: url('{empty_image}'); \
|
||||
list-style: url('{empty_image}');\
|
||||
width:99.998%; \
|
||||
margin-top: -20px; \
|
||||
line-height: -1; \
|
||||
height: calc(100vh - 10pt)",
|
||||
empty_image = empty_image!()
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn style_exclude_single_quoted_images() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
|
||||
const STYLE: &str = "/* border: none;*/\
|
||||
background-image: url('https://somewhere.com/bg.png'); \
|
||||
list-style: url('/assets/images/bullet.svg');\
|
||||
width:99.998%; \
|
||||
margin-top: -20px; \
|
||||
line-height: -1; \
|
||||
height: calc(100vh - 10pt)";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, "", &STYLE, &options, 0),
|
||||
format!(
|
||||
"/* border: none;*/\
|
||||
background-image: url('{empty_image}'); \
|
||||
list-style: url('{empty_image}');\
|
||||
width:99.998%; \
|
||||
margin-top: -20px; \
|
||||
line-height: -1; \
|
||||
height: calc(100vh - 10pt)",
|
||||
empty_image = empty_image!()
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn style_block() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
const CSS: &str = "\
|
||||
#id.class-name:not(:nth-child(3n+0)) {\n \
|
||||
// border: none;\n \
|
||||
background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=');\n\
|
||||
}\n\
|
||||
\n\
|
||||
html > body {}";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, "file:///", &CSS, &options, 0),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn attribute_selectors() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
const CSS: &str = "\
|
||||
[data-value] {
|
||||
/* Attribute exists */
|
||||
}
|
||||
|
||||
[data-value='foo'] {
|
||||
/* Attribute has this exact value */
|
||||
}
|
||||
|
||||
[data-value*='foo'] {
|
||||
/* Attribute value contains this value somewhere in it */
|
||||
}
|
||||
|
||||
[data-value~='foo'] {
|
||||
/* Attribute has this value in a space-separated list somewhere */
|
||||
}
|
||||
|
||||
[data-value^='foo'] {
|
||||
/* Attribute value starts with this */
|
||||
}
|
||||
|
||||
[data-value|='foo'] {
|
||||
/* Attribute value starts with this in a dash-separated list */
|
||||
}
|
||||
|
||||
[data-value$='foo'] {
|
||||
/* Attribute value ends with this */
|
||||
}
|
||||
";
|
||||
|
||||
assert_eq!(css::embed_css(cache, &client, "", &CSS, &options, 0), CSS);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_string() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
const CSS: &str = "\
|
||||
@charset 'UTF-8';\n\
|
||||
\n\
|
||||
@import 'data:text/css,html{background-color:%23000}';\n\
|
||||
\n\
|
||||
@import url('data:text/css,html{color:%23fff}')\n\
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(
|
||||
cache,
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&CSS,
|
||||
&options,
|
||||
0,
|
||||
),
|
||||
"\
|
||||
@charset 'UTF-8';\n\
|
||||
\n\
|
||||
@import 'data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9';\n\
|
||||
\n\
|
||||
@import url('data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==')\n\
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn hash_urls() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
const CSS: &str = "\
|
||||
body {\n \
|
||||
behavior: url(#default#something);\n\
|
||||
}\n\
|
||||
\n\
|
||||
.scissorHalf {\n \
|
||||
offset-path: url(#somePath);\n\
|
||||
}\n\
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(
|
||||
cache,
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&CSS,
|
||||
&options,
|
||||
0,
|
||||
),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn transform_percentages_and_degrees() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
const CSS: &str = "\
|
||||
div {\n \
|
||||
transform: translate(-50%, -50%) rotate(-45deg);\n\
|
||||
transform: translate(50%, 50%) rotate(45deg);\n\
|
||||
transform: translate(+50%, +50%) rotate(+45deg);\n\
|
||||
}\n\
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(
|
||||
cache,
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&CSS,
|
||||
&options,
|
||||
0,
|
||||
),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unusual_indents() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
const CSS: &str = "\
|
||||
.is\\:good:hover {\n \
|
||||
color: green\n\
|
||||
}\n\
|
||||
\n\
|
||||
#\\~\\!\\@\\$\\%\\^\\&\\*\\(\\)\\+\\=\\,\\.\\/\\\\\\'\\\"\\;\\:\\?\\>\\<\\[\\]\\{\\}\\|\\`\\# {\n \
|
||||
color: black\n\
|
||||
}\n\
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(
|
||||
cache,
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&CSS,
|
||||
&options,
|
||||
0,
|
||||
),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn exclude_fonts() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let mut options = Options::default();
|
||||
options.no_fonts = true;
|
||||
options.silent = true;
|
||||
|
||||
const CSS: &str = "\
|
||||
@font-face {\n \
|
||||
font-family: 'My Font';\n \
|
||||
src: url(my_font.woff);\n\
|
||||
}\n\
|
||||
\n\
|
||||
#identifier {\n \
|
||||
font-family: 'My Font' Arial\n\
|
||||
}\n\
|
||||
\n\
|
||||
@font-face {\n \
|
||||
font-family: 'My Font';\n \
|
||||
src: url(my_font.woff);\n\
|
||||
}\n\
|
||||
\n\
|
||||
div {\n \
|
||||
font-family: 'My Font' Verdana\n\
|
||||
}\n\
|
||||
";
|
||||
|
||||
const CSS_OUT: &str = " \
|
||||
\n\
|
||||
\n\
|
||||
#identifier {\n \
|
||||
font-family: 'My Font' Arial\n\
|
||||
}\n\
|
||||
\n \
|
||||
\n\
|
||||
\n\
|
||||
div {\n \
|
||||
font-family: 'My Font' Verdana\n\
|
||||
}\n\
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(
|
||||
cache,
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&CSS,
|
||||
&options,
|
||||
0,
|
||||
),
|
||||
CSS_OUT
|
||||
);
|
||||
}
|
||||
}
|
||||
53
src/tests/css/enquote.rs
Normal file
53
src/tests/css/enquote.rs
Normal file
@@ -0,0 +1,53 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::css;
|
||||
|
||||
#[test]
|
||||
fn empty_input_single_quotes() {
|
||||
assert_eq!(css::enquote(str!(""), false), "''");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_input_double_quotes() {
|
||||
assert_eq!(css::enquote(str!(""), true), "\"\"");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn apostrophes_single_quotes() {
|
||||
assert_eq!(
|
||||
css::enquote(str!("It's a lovely day, don't you think?"), false),
|
||||
"'It\\'s a lovely day, don\\'t you think?'"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn apostrophes_double_quotes() {
|
||||
assert_eq!(
|
||||
css::enquote(str!("It's a lovely day, don't you think?"), true),
|
||||
"\"It's a lovely day, don't you think?\""
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn feet_and_inches_single_quotes() {
|
||||
assert_eq!(
|
||||
css::enquote(str!("5'2\", 6'5\""), false),
|
||||
"'5\\'2\", 6\\'5\"'"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn feet_and_inches_double_quotes() {
|
||||
assert_eq!(
|
||||
css::enquote(str!("5'2\", 6'5\""), true),
|
||||
"\"5'2\\\", 6'5\\\"\""
|
||||
);
|
||||
}
|
||||
}
|
||||
88
src/tests/css/is_image_url_prop.rs
Normal file
88
src/tests/css/is_image_url_prop.rs
Normal file
@@ -0,0 +1,88 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::css;
|
||||
|
||||
#[test]
|
||||
fn backrgound() {
|
||||
assert!(css::is_image_url_prop("background"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backrgound_image() {
|
||||
assert!(css::is_image_url_prop("background-image"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backrgound_image_uppercase() {
|
||||
assert!(css::is_image_url_prop("BACKGROUND-IMAGE"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn border_image() {
|
||||
assert!(css::is_image_url_prop("border-image"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn content() {
|
||||
assert!(css::is_image_url_prop("content"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cursor() {
|
||||
assert!(css::is_image_url_prop("cursor"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list_style() {
|
||||
assert!(css::is_image_url_prop("list-style"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list_style_image() {
|
||||
assert!(css::is_image_url_prop("list-style-image"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mask_image() {
|
||||
assert!(css::is_image_url_prop("mask-image"));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::css;
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
assert!(!css::is_image_url_prop(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn width() {
|
||||
assert!(!css::is_image_url_prop("width"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn color() {
|
||||
assert!(!css::is_image_url_prop("color"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn z_index() {
|
||||
assert!(!css::is_image_url_prop("z-index"));
|
||||
}
|
||||
}
|
||||
3
src/tests/css/mod.rs
Normal file
3
src/tests/css/mod.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
mod embed_css;
|
||||
mod enquote;
|
||||
mod is_image_url_prop;
|
||||
19
src/tests/data/basic/local-file.html
Normal file
19
src/tests/data/basic/local-file.html
Normal file
@@ -0,0 +1,19 @@
|
||||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<title>Local HTML file</title>
|
||||
<link href="local-style.css" rel="stylesheet" type="text/css" />
|
||||
<link href="local-style-does-not-exist.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<img src="monolith.png" alt="" />
|
||||
<a href="//local-file.html">Tricky href</a>
|
||||
<a href="https://github.com/Y2Z/monolith">Remote URL</a>
|
||||
<script src="local-script.js"></script>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
2
src/tests/data/basic/local-script.js
Normal file
2
src/tests/data/basic/local-script.js
Normal file
@@ -0,0 +1,2 @@
|
||||
document.body.style.backgroundColor = "green";
|
||||
document.body.style.color = "red";
|
||||
4
src/tests/data/basic/local-style.css
Normal file
4
src/tests/data/basic/local-style.css
Normal file
@@ -0,0 +1,4 @@
|
||||
body {
|
||||
background-color: #000;
|
||||
color: #fff;
|
||||
}
|
||||
23
src/tests/data/import-css-via-data-url/index.html
Normal file
23
src/tests/data/import-css-via-data-url/index.html
Normal file
@@ -0,0 +1,23 @@
|
||||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<title>Attempt to import CSS via data URL asset</title>
|
||||
<style>
|
||||
|
||||
body {
|
||||
background-color: white;
|
||||
color: black;
|
||||
}
|
||||
|
||||
</style>
|
||||
<link href="data:text/css;base64,QGltcG9ydCAic3R5bGUuY3NzIjsK" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<p>If you see pink background with white foreground then we’re in trouble</p>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
4
src/tests/data/import-css-via-data-url/style.css
Normal file
4
src/tests/data/import-css-via-data-url/style.css
Normal file
@@ -0,0 +1,4 @@
|
||||
body {
|
||||
background-color: pink;
|
||||
color: white;
|
||||
}
|
||||
19
src/tests/data/integrity/index.html
Normal file
19
src/tests/data/integrity/index.html
Normal file
@@ -0,0 +1,19 @@
|
||||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<meta http-equiv="Content-Security-Policy" content="default-src 'unsafe-inline' file:;" />
|
||||
<title>Local HTML file</title>
|
||||
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-IWaCTORHkRhOWzcZeILSVmV6V6gPTHgNem6o6rsFAyaKTieDFkeeMrWjtO0DuWrX3bqZY46CVTZXUu0mia0qXQ==" crossorigin="anonymous" />
|
||||
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-vWBzl4NE9oIg8NFOPAyOZbaam0UXWr6aDHPaY2kodSzAFl+mKoj/RMNc6C31NDqK4mE2i68IWxYWqWJPLCgPOw==" crossorigin="anonymous" />
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<p>This page should have black background and white foreground, but only when served via http: (not via file:)</p>
|
||||
<script src="script.js" integrity="sha256-ecrEsYh3+ICCX8BCrNSotXgI5534282JwJjx8Q9ZWLc="></script>
|
||||
<script src="script.js" integrity="sha256-6idk9dK0bOkVdG7Oz4/0YLXSJya8xZHqbRZKMhYrt6o="></script>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
3
src/tests/data/integrity/script.js
Normal file
3
src/tests/data/integrity/script.js
Normal file
@@ -0,0 +1,3 @@
|
||||
function noop() {
|
||||
console.log("monolith");
|
||||
}
|
||||
4
src/tests/data/integrity/style.css
Normal file
4
src/tests/data/integrity/style.css
Normal file
@@ -0,0 +1,4 @@
|
||||
body {
|
||||
background-color: #000;
|
||||
color: #FFF;
|
||||
}
|
||||
29
src/tests/html/add_favicon.rs
Normal file
29
src/tests/html/add_favicon.rs
Normal file
@@ -0,0 +1,29 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use html5ever::serialize::{serialize, SerializeOpts};
|
||||
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let html = "<div>text</div>";
|
||||
let mut dom = html::html_to_dom(&html);
|
||||
|
||||
dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string());
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head><link rel=\"icon\" href=\"I_AM_A_FAVICON_DATA_URL\"></link></head><body><div>text</div></body></html>"
|
||||
);
|
||||
}
|
||||
}
|
||||
80
src/tests/html/csp.rs
Normal file
80
src/tests/html/csp.rs
Normal file
@@ -0,0 +1,80 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
use crate::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn isolated() {
|
||||
let mut options = Options::default();
|
||||
options.isolate = true;
|
||||
let csp_content = html::csp(&options);
|
||||
|
||||
assert_eq!(csp_content, "default-src 'unsafe-inline' data:;");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_css() {
|
||||
let mut options = Options::default();
|
||||
options.no_css = true;
|
||||
let csp_content = html::csp(&options);
|
||||
|
||||
assert_eq!(csp_content, "style-src 'none';");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_fonts() {
|
||||
let mut options = Options::default();
|
||||
options.no_fonts = true;
|
||||
let csp_content = html::csp(&options);
|
||||
|
||||
assert_eq!(csp_content, "font-src 'none';");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_frames() {
|
||||
let mut options = Options::default();
|
||||
options.no_frames = true;
|
||||
let csp_content = html::csp(&options);
|
||||
|
||||
assert_eq!(csp_content, "frame-src 'none'; child-src 'none';");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_js() {
|
||||
let mut options = Options::default();
|
||||
options.no_js = true;
|
||||
let csp_content = html::csp(&options);
|
||||
|
||||
assert_eq!(csp_content, "script-src 'none';");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_images() {
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
let csp_content = html::csp(&options);
|
||||
|
||||
assert_eq!(csp_content, "img-src data:;");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn all() {
|
||||
let mut options = Options::default();
|
||||
options.isolate = true;
|
||||
options.no_css = true;
|
||||
options.no_fonts = true;
|
||||
options.no_frames = true;
|
||||
options.no_js = true;
|
||||
options.no_images = true;
|
||||
let csp_content = html::csp(&options);
|
||||
|
||||
assert_eq!(csp_content, "default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;");
|
||||
}
|
||||
}
|
||||
31
src/tests/html/embed_srcset.rs
Normal file
31
src/tests/html/embed_srcset.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::html;
|
||||
use crate::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn replace_with_empty_images() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let srcset_value = "small.png 1x, large.png 2x";
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
|
||||
|
||||
assert_eq!(
|
||||
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),
|
||||
embedded_css
|
||||
);
|
||||
}
|
||||
}
|
||||
53
src/tests/html/get_node_name.rs
Normal file
53
src/tests/html/get_node_name.rs
Normal file
@@ -0,0 +1,53 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use html5ever::rcdom::{Handle, NodeData};
|
||||
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn get_node_name() {
|
||||
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let mut count = 0;
|
||||
|
||||
fn test_walk(node: &Handle, i: &mut i8) {
|
||||
*i += 1;
|
||||
|
||||
match &node.data {
|
||||
NodeData::Document => {
|
||||
for child in node.children.borrow().iter() {
|
||||
test_walk(child, &mut *i);
|
||||
}
|
||||
}
|
||||
NodeData::Element { ref name, .. } => {
|
||||
let node_name = name.local.as_ref().to_string();
|
||||
let parent = html::get_parent_node(node);
|
||||
let parent_node_name = html::get_node_name(&parent);
|
||||
if node_name == "head" || node_name == "body" {
|
||||
assert_eq!(parent_node_name, Some("html"));
|
||||
} else if node_name == "div" {
|
||||
assert_eq!(parent_node_name, Some("body"));
|
||||
} else if node_name == "p" {
|
||||
assert_eq!(parent_node_name, Some("div"));
|
||||
}
|
||||
|
||||
for child in node.children.borrow().iter() {
|
||||
test_walk(child, &mut *i);
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
}
|
||||
|
||||
test_walk(&dom.document, &mut count);
|
||||
|
||||
assert_eq!(count, 7);
|
||||
}
|
||||
}
|
||||
52
src/tests/html/has_favicon.rs
Normal file
52
src/tests/html/has_favicon.rs
Normal file
@@ -0,0 +1,52 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
use crate::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn icon() {
|
||||
let html = "<link rel=\"icon\" href=\"\" /><div>text</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let res: bool = html::has_favicon(&dom.document);
|
||||
|
||||
assert!(res);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shortcut_icon() {
|
||||
let html = "<link rel=\"shortcut icon\" href=\"\" /><div>text</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let res: bool = html::has_favicon(&dom.document);
|
||||
|
||||
assert!(res);
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::html;
|
||||
use crate::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn absent() {
|
||||
let html = "<div>text</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let res: bool = html::has_favicon(&dom.document);
|
||||
|
||||
assert!(!res);
|
||||
}
|
||||
}
|
||||
92
src/tests/html/has_proper_integrity.rs
Normal file
92
src/tests/html/has_proper_integrity.rs
Normal file
@@ -0,0 +1,92 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn empty_input_sha256() {
|
||||
assert!(html::has_proper_integrity(
|
||||
"".as_bytes(),
|
||||
"sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sha256() {
|
||||
assert!(html::has_proper_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha256-9EWAHgy4mSYsm54hmDaIDXPKLRsLnBX7lZyQ6xISNOM="
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sha384() {
|
||||
assert!(html::has_proper_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha384-gc9l7omltke8C33bedgh15E12M7RrAQa5t63Yb8APlpe7ZhiqV23+oqiulSJl3Kw"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sha512() {
|
||||
assert!(html::has_proper_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha512-zG5B88cYMqcdiMi9gz0XkOFYw2BpjeYdn5V6+oFrMgSNjRpqL7EF8JEwl17ztZbK3N7I/tTwp3kxQbN1RgFBww=="
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn empty_hash() {
|
||||
assert!(!html::has_proper_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
""
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_input_empty_hash() {
|
||||
assert!(!html::has_proper_integrity("".as_bytes(), ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sha256() {
|
||||
assert!(!html::has_proper_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha256-badhash"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sha384() {
|
||||
assert!(!html::has_proper_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha384-badhash"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sha512() {
|
||||
assert!(!html::has_proper_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha512-badhash"
|
||||
));
|
||||
}
|
||||
}
|
||||
58
src/tests/html/is_icon.rs
Normal file
58
src/tests/html/is_icon.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn icon() {
|
||||
assert!(html::is_icon("icon"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shortcut_icon_capitalized() {
|
||||
assert!(html::is_icon("Shortcut Icon"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn icon_uppercase() {
|
||||
assert!(html::is_icon("ICON"));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn mask_icon() {
|
||||
assert!(!html::is_icon("mask-icon"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fluid_icon() {
|
||||
assert!(!html::is_icon("fluid-icon"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stylesheet() {
|
||||
assert!(!html::is_icon("stylesheet"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_string() {
|
||||
assert!(!html::is_icon(""));
|
||||
}
|
||||
}
|
||||
82
src/tests/html/metadata_tag.rs
Normal file
82
src/tests/html/metadata_tag.rs
Normal file
@@ -0,0 +1,82 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use chrono::prelude::*;
|
||||
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn http_url() {
|
||||
let url = "http://192.168.1.1/";
|
||||
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||
let metadata_comment: String = html::metadata_tag(url);
|
||||
|
||||
assert_eq!(
|
||||
metadata_comment,
|
||||
format!(
|
||||
"<!-- Saved from {} at {} using {} v{} -->",
|
||||
&url,
|
||||
timestamp,
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn file_url() {
|
||||
let url = "file:///home/monolith/index.html";
|
||||
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||
let metadata_comment: String = html::metadata_tag(url);
|
||||
|
||||
assert_eq!(
|
||||
metadata_comment,
|
||||
format!(
|
||||
"<!-- Saved from local source at {} using {} v{} -->",
|
||||
timestamp,
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn data_url() {
|
||||
let url = "data:text/html,Hello%2C%20World!";
|
||||
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
|
||||
let metadata_comment: String = html::metadata_tag(url);
|
||||
|
||||
assert_eq!(
|
||||
metadata_comment,
|
||||
format!(
|
||||
"<!-- Saved from local source at {} using {} v{} -->",
|
||||
timestamp,
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn empty_string() {
|
||||
assert_eq!(html::metadata_tag(""), "");
|
||||
}
|
||||
}
|
||||
10
src/tests/html/mod.rs
Normal file
10
src/tests/html/mod.rs
Normal file
@@ -0,0 +1,10 @@
|
||||
mod add_favicon;
|
||||
mod csp;
|
||||
mod embed_srcset;
|
||||
mod get_node_name;
|
||||
mod has_favicon;
|
||||
mod has_proper_integrity;
|
||||
mod is_icon;
|
||||
mod metadata_tag;
|
||||
mod stringify_document;
|
||||
mod walk_and_embed_assets;
|
||||
150
src/tests/html/stringify_document.rs
Normal file
150
src/tests/html/stringify_document.rs
Normal file
@@ -0,0 +1,150 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
use crate::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn div_as_root_element() {
|
||||
let html = "<div><script src=\"some.js\"></script></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let options = Options::default();
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(&dom.document, &options),
|
||||
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn full_page_with_no_html_head_or_body() {
|
||||
let html = "<title>Isolated document</title>\
|
||||
<link rel=\"something\" href=\"some.css\" />\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
<div><script src=\"some.js\"></script></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let mut options = Options::default();
|
||||
options.isolate = true;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(
|
||||
&dom.document,
|
||||
&options
|
||||
),
|
||||
"<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
|
||||
<title>Isolated document</title>\
|
||||
<link rel=\"something\" href=\"some.css\">\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
</head>\
|
||||
<body>\
|
||||
<div>\
|
||||
<script src=\"some.js\"></script>\
|
||||
</div>\
|
||||
</body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doctype_and_the_rest_no_html_head_or_body() {
|
||||
let html = "<!doctype html>\
|
||||
<title>Unstyled document</title>\
|
||||
<link rel=\"stylesheet\" href=\"main.css\"/>\
|
||||
<div style=\"display: none;\"></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let mut options = Options::default();
|
||||
options.no_css = true;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(&dom.document, &options),
|
||||
"<!DOCTYPE html>\
|
||||
<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
|
||||
<title>Unstyled document</title>\
|
||||
<link rel=\"stylesheet\" href=\"main.css\">\
|
||||
</head>\
|
||||
<body><div style=\"display: none;\"></div></body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doctype_and_the_rest_no_html_head_or_body_forbid_frames() {
|
||||
let html = "<!doctype html>\
|
||||
<title>Frameless document</title>\
|
||||
<link rel=\"something\"/>\
|
||||
<div><script src=\"some.js\"></script></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let mut options = Options::default();
|
||||
options.no_frames = true;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(
|
||||
&dom.document,
|
||||
&options
|
||||
),
|
||||
"<!DOCTYPE html>\
|
||||
<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
|
||||
<title>Frameless document</title>\
|
||||
<link rel=\"something\">\
|
||||
</head>\
|
||||
<body><div><script src=\"some.js\"></script></div></body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn doctype_and_the_rest_all_forbidden() {
|
||||
let html = "<!doctype html>\
|
||||
<title>no-frame no-css no-js no-image isolated document</title>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
<link rel=\"stylesheet\" href=\"some.css\">\
|
||||
<div>\
|
||||
<script src=\"some.js\"></script>\
|
||||
<img style=\"width: 100%;\" src=\"some.png\" />\
|
||||
<iframe src=\"some.html\"></iframe>\
|
||||
</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let mut options = Options::default();
|
||||
options.isolate = true;
|
||||
options.no_css = true;
|
||||
options.no_fonts = true;
|
||||
options.no_frames = true;
|
||||
options.no_js = true;
|
||||
options.no_images = true;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(
|
||||
&dom.document,
|
||||
&options
|
||||
),
|
||||
"<!DOCTYPE html>\
|
||||
<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
|
||||
<title>no-frame no-css no-js no-image isolated document</title>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
<link rel=\"stylesheet\" href=\"some.css\">\
|
||||
</head>\
|
||||
<body>\
|
||||
<div>\
|
||||
<script src=\"some.js\"></script>\
|
||||
<img style=\"width: 100%;\" src=\"some.png\">\
|
||||
<iframe src=\"some.html\"></iframe>\
|
||||
</div>\
|
||||
</body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
}
|
||||
329
src/tests/html/walk_and_embed_assets.rs
Normal file
329
src/tests/html/walk_and_embed_assets.rs
Normal file
@@ -0,0 +1,329 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use html5ever::serialize::{serialize, SerializeOpts};
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::html;
|
||||
use crate::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let html = "<div><P></P></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><body><div><p></p></div></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ensure_no_recursive_iframe() {
|
||||
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ensure_no_recursive_frame() {
|
||||
let html = "<frameset><frame src=\"\"></frameset>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_css() {
|
||||
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
|
||||
<style>html{background-color: #000;}</style>\
|
||||
<div style=\"display: none;\"></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.no_css = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html>\
|
||||
<head>\
|
||||
<link rel=\"stylesheet\">\
|
||||
<style></style>\
|
||||
</head>\
|
||||
<body>\
|
||||
<div></div>\
|
||||
</body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_images() {
|
||||
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
|
||||
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
format!(
|
||||
"<html>\
|
||||
<head>\
|
||||
<link rel=\"icon\">\
|
||||
</head>\
|
||||
<body>\
|
||||
<div>\
|
||||
<img src=\"{empty_image}\">\
|
||||
</div>\
|
||||
</body>\
|
||||
</html>",
|
||||
empty_image = empty_image!()
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_body_background_images() {
|
||||
let html =
|
||||
"<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><body></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_frames() {
|
||||
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.no_frames = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_iframes() {
|
||||
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.no_frames = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_js() {
|
||||
let html = "<div onClick=\"void(0)\">\
|
||||
<script src=\"http://localhost/assets/some.js\"></script>\
|
||||
<script>alert(1)</script>\
|
||||
</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.no_js = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><body><div><script></script>\
|
||||
<script></script></div></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn discards_integrity() {
|
||||
let html = "<title>No integrity</title>\
|
||||
<link integrity=\"sha384-...\" rel=\"something\"/>\
|
||||
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.no_css = true;
|
||||
options.no_frames = true;
|
||||
options.no_js = true;
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html>\
|
||||
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
|
||||
<body></body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_unwanted_meta_tags() {
|
||||
let html = "<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Refresh\" value=\"20\"/>\
|
||||
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
|
||||
</head>\
|
||||
<body></body>\
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.no_css = true;
|
||||
options.no_frames = true;
|
||||
options.no_js = true;
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html>\
|
||||
<head>\
|
||||
<meta>\
|
||||
<meta>\
|
||||
</head>\
|
||||
<body></body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
}
|
||||
53
src/tests/js/attr_is_event_handler.rs
Normal file
53
src/tests/js/attr_is_event_handler.rs
Normal file
@@ -0,0 +1,53 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::js;
|
||||
|
||||
#[test]
|
||||
fn onblur_camelcase() {
|
||||
assert!(js::attr_is_event_handler("onBlur"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn onclick_lowercase() {
|
||||
assert!(js::attr_is_event_handler("onclick"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn onclick_camelcase() {
|
||||
assert!(js::attr_is_event_handler("onClick"));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::js;
|
||||
|
||||
#[test]
|
||||
fn href() {
|
||||
assert!(!js::attr_is_event_handler("href"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_string() {
|
||||
assert!(!js::attr_is_event_handler(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class() {
|
||||
assert!(!js::attr_is_event_handler("class"));
|
||||
}
|
||||
}
|
||||
1
src/tests/js/mod.rs
Normal file
1
src/tests/js/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
mod attr_is_event_handler;
|
||||
14
src/tests/macros/empty_image.rs
Normal file
14
src/tests/macros/empty_image.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
#[test]
|
||||
fn contains_correct_image_data() {
|
||||
assert_eq!(empty_image!(), "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII=");
|
||||
}
|
||||
}
|
||||
2
src/tests/macros/mod.rs
Normal file
2
src/tests/macros/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
mod empty_image;
|
||||
mod str;
|
||||
24
src/tests/macros/str.rs
Normal file
24
src/tests/macros/str.rs
Normal file
@@ -0,0 +1,24 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
#[test]
|
||||
fn returns_empty_string() {
|
||||
assert_eq!(str!(), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn converts_integer_into_string() {
|
||||
assert_eq!(str!(123), "123");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn converts_str_into_string() {
|
||||
assert_eq!(str!("abc"), "abc");
|
||||
}
|
||||
}
|
||||
7
src/tests/mod.rs
Normal file
7
src/tests/mod.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
mod cli;
|
||||
mod css;
|
||||
mod html;
|
||||
mod js;
|
||||
mod macros;
|
||||
mod opts;
|
||||
mod utils;
|
||||
30
src/tests/opts.rs
Normal file
30
src/tests/opts.rs
Normal file
@@ -0,0 +1,30 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn defaults() {
|
||||
let options: Options = Options::default();
|
||||
|
||||
assert_eq!(options.target, str!());
|
||||
assert_eq!(options.no_css, false);
|
||||
assert_eq!(options.no_frames, false);
|
||||
assert_eq!(options.no_fonts, false);
|
||||
assert_eq!(options.no_images, false);
|
||||
assert_eq!(options.isolate, false);
|
||||
assert_eq!(options.no_js, false);
|
||||
assert_eq!(options.insecure, false);
|
||||
assert_eq!(options.no_metadata, false);
|
||||
assert_eq!(options.output, str!());
|
||||
assert_eq!(options.silent, false);
|
||||
assert_eq!(options.timeout, 0);
|
||||
assert_eq!(options.user_agent, "");
|
||||
}
|
||||
}
|
||||
51
src/tests/url/clean_url.rs
Normal file
51
src/tests/url/clean_url.rs
Normal file
@@ -0,0 +1,51 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn removes_fragment() {
|
||||
assert_eq!(
|
||||
url::clean_url("https://somewhere.com/font.eot#iefix"),
|
||||
"https://somewhere.com/font.eot"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_empty_fragment() {
|
||||
assert_eq!(
|
||||
url::clean_url("https://somewhere.com/font.eot#"),
|
||||
"https://somewhere.com/font.eot"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_empty_query_and_empty_fragment() {
|
||||
assert_eq!(
|
||||
url::clean_url("https://somewhere.com/font.eot?#"),
|
||||
"https://somewhere.com/font.eot"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_empty_query_amp_and_empty_fragment() {
|
||||
assert_eq!(
|
||||
url::clean_url("https://somewhere.com/font.eot?a=b&#"),
|
||||
"https://somewhere.com/font.eot?a=b"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keeps_credentials() {
|
||||
assert_eq!(
|
||||
url::clean_url("https://cookie:monster@gibson.internet/"),
|
||||
"https://cookie:monster@gibson.internet/"
|
||||
);
|
||||
}
|
||||
}
|
||||
31
src/tests/url/data_to_data_url.rs
Normal file
31
src/tests/url/data_to_data_url.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn encode_string_with_specific_media_type() {
|
||||
let mime = "application/javascript";
|
||||
let data = "var word = 'hello';\nalert(word);\n";
|
||||
let data_url = url::data_to_data_url(mime, data.as_bytes(), "");
|
||||
|
||||
assert_eq!(
|
||||
&data_url,
|
||||
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_append_fragment() {
|
||||
let data = "<svg></svg>\n";
|
||||
let data_url = url::data_to_data_url("image/svg+xml", data.as_bytes(), "");
|
||||
|
||||
assert_eq!(&data_url, "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K");
|
||||
}
|
||||
}
|
||||
104
src/tests/url/data_url_to_data.rs
Normal file
104
src/tests/url/data_url_to_data.rs
Normal file
@@ -0,0 +1,104 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn parse_text_html_base64() {
|
||||
let (media_type, data) = url::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
|
||||
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&data),
|
||||
"Work expands so as to fill the time available for its completion"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_text_html_utf8() {
|
||||
let (media_type, data) = url::data_url_to_data(
|
||||
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
|
||||
);
|
||||
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&data),
|
||||
"Work expands so as to fill the time available for its completion"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_text_html_plaintext() {
|
||||
let (media_type, data) = url::data_url_to_data(
|
||||
"data:text/html,Work expands so as to fill the time available for its completion",
|
||||
);
|
||||
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&data),
|
||||
"Work expands so as to fill the time available for its completion"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_text_html_charset_utf_8_between_two_whitespaces() {
|
||||
let (media_type, data) = url::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
|
||||
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&data),
|
||||
"Work expands so as to fill the time available for its completion"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_text_css_url_encoded() {
|
||||
let (media_type, data) =
|
||||
url::data_url_to_data("data:text/css,div{background-color:%23000}");
|
||||
|
||||
assert_eq!(media_type, "text/css");
|
||||
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_no_media_type_base64() {
|
||||
let (media_type, data) = url::data_url_to_data("data:;base64,dGVzdA==");
|
||||
|
||||
assert_eq!(media_type, "");
|
||||
assert_eq!(String::from_utf8_lossy(&data), "test");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_no_media_type_no_encoding() {
|
||||
let (media_type, data) = url::data_url_to_data("data:;,test%20test");
|
||||
|
||||
assert_eq!(media_type, "");
|
||||
assert_eq!(String::from_utf8_lossy(&data), "test test");
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn just_word_data() {
|
||||
let (media_type, data) = url::data_url_to_data("data");
|
||||
|
||||
assert_eq!(media_type, "");
|
||||
assert_eq!(String::from_utf8_lossy(&data), "");
|
||||
}
|
||||
}
|
||||
39
src/tests/url/decode_url.rs
Normal file
39
src/tests/url/decode_url.rs
Normal file
@@ -0,0 +1,39 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn decode_unicode_characters() {
|
||||
assert_eq!(
|
||||
url::decode_url(str!(
|
||||
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
|
||||
)),
|
||||
"検ヒム解塗ゃッ = サ"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_file_url() {
|
||||
assert_eq!(
|
||||
url::decode_url(str!("file:///tmp/space%20here/test%231.html")),
|
||||
"file:///tmp/space here/test#1.html"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plus_sign() {
|
||||
assert_eq!(
|
||||
url::decode_url(str!(
|
||||
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
|
||||
)),
|
||||
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
|
||||
);
|
||||
}
|
||||
}
|
||||
41
src/tests/url/file_url_to_fs_path.rs
Normal file
41
src/tests/url/file_url_to_fs_path.rs
Normal file
@@ -0,0 +1,41 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn remove_protocl_and_fragment() {
|
||||
if cfg!(windows) {
|
||||
assert_eq!(
|
||||
url::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"),
|
||||
"C:\\documents\\some-path\\some-file.svg"
|
||||
);
|
||||
} else {
|
||||
assert_eq!(
|
||||
url::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"),
|
||||
"/tmp/some-path/some-file.svg"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decodes_urls() {
|
||||
if cfg!(windows) {
|
||||
assert_eq!(
|
||||
url::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
|
||||
"C:\\Documents and Settings\\some-file.html"
|
||||
);
|
||||
} else {
|
||||
assert_eq!(
|
||||
url::file_url_to_fs_path("file:///home/user/My%20Documents"),
|
||||
"/home/user/My Documents"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
26
src/tests/url/get_url_fragment.rs
Normal file
26
src/tests/url/get_url_fragment.rs
Normal file
@@ -0,0 +1,26 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn data_url() {
|
||||
assert_eq!(
|
||||
url::get_url_fragment(
|
||||
"data:image/svg+xml;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h#test"
|
||||
),
|
||||
"test"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn https_empty() {
|
||||
assert_eq!(url::get_url_fragment("https://kernel.org#"), "");
|
||||
}
|
||||
}
|
||||
52
src/tests/url/is_data_url.rs
Normal file
52
src/tests/url/is_data_url.rs
Normal file
@@ -0,0 +1,52 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn data_url_text_html() {
|
||||
assert!(url::is_data_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn data_url_no_media_type() {
|
||||
assert!(url::is_data_url(
|
||||
"data:;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn https_url() {
|
||||
assert!(!url::is_data_url("https://kernel.org"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_protocol_url() {
|
||||
assert!(!url::is_data_url("//kernel.org"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_string() {
|
||||
assert!(!url::is_data_url(""));
|
||||
}
|
||||
}
|
||||
83
src/tests/url/is_file_url.rs
Normal file
83
src/tests/url/is_file_url.rs
Normal file
@@ -0,0 +1,83 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn unix_file_url() {
|
||||
assert!(url::is_file_url(
|
||||
"file:///home/user/Websites/my-website/index.html"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn windows_file_url() {
|
||||
assert!(url::is_file_url(
|
||||
"file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unix_url_with_backslashes() {
|
||||
assert!(url::is_file_url(
|
||||
"file:\\\\\\home\\user\\Websites\\my-website\\index.html"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn windows_file_url_with_backslashes() {
|
||||
assert!(url::is_file_url(
|
||||
"file:\\\\\\C:\\Documents%20and%20Settings\\user\\Websites\\my-website\\assets\\images\\logo.png"
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn url_with_no_protocl() {
|
||||
assert!(!url::is_file_url("//kernel.org"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dot_slash_filename() {
|
||||
assert!(!url::is_file_url("./index.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn just_filename() {
|
||||
assert!(!url::is_file_url("some-local-page.htm"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn https_ip_port_url() {
|
||||
assert!(!url::is_file_url("https://1.2.3.4:80/www/index.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn data_url() {
|
||||
assert!(!url::is_file_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn just_word_file() {
|
||||
assert!(!url::is_file_url("file"));
|
||||
}
|
||||
}
|
||||
65
src/tests/url/is_http_url.rs
Normal file
65
src/tests/url/is_http_url.rs
Normal file
@@ -0,0 +1,65 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn http_url() {
|
||||
assert!(url::is_http_url("http://kernel.org"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn https_url() {
|
||||
assert!(url::is_http_url("https://www.rust-lang.org/"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn http_url_with_backslashes() {
|
||||
assert!(url::is_http_url("http:\\\\freebsd.org\\"));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn url_with_no_protocol() {
|
||||
assert!(!url::is_http_url("//kernel.org"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dot_slash_filename() {
|
||||
assert!(!url::is_http_url("./index.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn just_filename() {
|
||||
assert!(!url::is_http_url("some-local-page.htm"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn https_ip_port_url() {
|
||||
assert!(!url::is_http_url("ftp://1.2.3.4/www/index.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn data_url() {
|
||||
assert!(!url::is_http_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
}
|
||||
}
|
||||
12
src/tests/url/mod.rs
Normal file
12
src/tests/url/mod.rs
Normal file
@@ -0,0 +1,12 @@
|
||||
mod clean_url;
|
||||
mod data_to_data_url;
|
||||
mod data_url_to_data;
|
||||
mod decode_url;
|
||||
mod file_url_to_fs_path;
|
||||
mod get_url_fragment;
|
||||
mod is_data_url;
|
||||
mod is_file_url;
|
||||
mod is_http_url;
|
||||
mod resolve_url;
|
||||
mod url_has_protocol;
|
||||
mod url_with_fragment;
|
||||
239
src/tests/url/resolve_url.rs
Normal file
239
src/tests/url/resolve_url.rs
Normal file
@@ -0,0 +1,239 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use url::ParseError;
|
||||
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn from_https_to_level_up_relative() -> Result<(), ParseError> {
|
||||
let resolved_url =
|
||||
url::resolve_url("https://www.kernel.org", "../category/signatures.html")?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_just_filename_to_full_https_url() -> Result<(), ParseError> {
|
||||
let resolved_url = url::resolve_url(
|
||||
"saved_page.htm",
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_https_url_to_url_with_no_protocol() -> Result<(), ParseError> {
|
||||
let resolved_url = url::resolve_url(
|
||||
"https://www.kernel.org",
|
||||
"//www.kernel.org/theme/images/logos/tux.png",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_https_url_to_url_with_no_protocol_and_on_different_hostname() -> Result<(), ParseError>
|
||||
{
|
||||
let resolved_url = url::resolve_url(
|
||||
"https://www.kernel.org",
|
||||
"//another-host.org/theme/images/logos/tux.png",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://another-host.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_https_url_to_relative_root_path() -> Result<(), ParseError> {
|
||||
let resolved_url = url::resolve_url(
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
"/theme/images/logos/tux.png",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_https_to_just_filename() -> Result<(), ParseError> {
|
||||
let resolved_url = url::resolve_url(
|
||||
"https://www.w3schools.com/html/html_iframe.asp",
|
||||
"default.asp",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.w3schools.com/html/default.asp"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_data_url_to_https() -> Result<(), ParseError> {
|
||||
let resolved_url = url::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_data_url_to_data_url() -> Result<(), ParseError> {
|
||||
let resolved_url = url::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_file_url_to_relative_path() -> Result<(), ParseError> {
|
||||
let resolved_url = url::resolve_url(
|
||||
"file:///home/user/Websites/my-website/index.html",
|
||||
"assets/images/logo.png",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"file:///home/user/Websites/my-website/assets/images/logo.png"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_file_url_to_relative_path_with_backslashes() -> Result<(), ParseError> {
|
||||
let resolved_url = url::resolve_url(
|
||||
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
|
||||
"assets\\images\\logo.png",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"file:///home/user/Websites/my-website/assets/images/logo.png"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_data_url_to_file_url() -> Result<(), ParseError> {
|
||||
let resolved_url = url::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"file:///etc/passwd",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
|
||||
assert_eq!(resolved_url.as_str(), "file:///etc/passwd");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preserve_fragment() -> Result<(), ParseError> {
|
||||
let resolved_url = url::resolve_url(
|
||||
"http://doesnt-matter.local/",
|
||||
"css/fonts/fontmarvelous.svg#fontmarvelous",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_from_file_url_to_file_url() -> Result<(), ParseError> {
|
||||
let resolved_url = if cfg!(windows) {
|
||||
url::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!())
|
||||
} else {
|
||||
url::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!())
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
if cfg!(windows) {
|
||||
"file:///c:/image.png"
|
||||
} else {
|
||||
"file:///tmp/image.png"
|
||||
}
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::url;
|
||||
use url::ParseError;
|
||||
|
||||
#[test]
|
||||
fn from_data_url_to_url_with_no_protocol() -> Result<(), ParseError> {
|
||||
let resolved_url = url::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"//www.w3schools.com/html/html_iframe.asp",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
|
||||
assert_eq!(resolved_url.as_str(), "");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
91
src/tests/url/url_has_protocol.rs
Normal file
91
src/tests/url/url_has_protocol.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn mailto() {
|
||||
assert!(url::url_has_protocol(
|
||||
"mailto:somebody@somewhere.com?subject=hello"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tel() {
|
||||
assert!(url::url_has_protocol("tel:5551234567"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ftp_no_slashes() {
|
||||
assert!(url::url_has_protocol("ftp:some-ftp-server.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ftp_with_credentials() {
|
||||
assert!(url::url_has_protocol(
|
||||
"ftp://user:password@some-ftp-server.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn javascript() {
|
||||
assert!(url::url_has_protocol("javascript:void(0)"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn http() {
|
||||
assert!(url::url_has_protocol("http://news.ycombinator.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn https() {
|
||||
assert!(url::url_has_protocol("https://github.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mailto_uppercase() {
|
||||
assert!(url::url_has_protocol(
|
||||
"MAILTO:somebody@somewhere.com?subject=hello"
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::utils;
|
||||
|
||||
#[test]
|
||||
fn url_with_no_protocol() {
|
||||
assert!(!url::url_has_protocol(
|
||||
"//some-hostname.com/some-file.html"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn relative_path() {
|
||||
assert!(!url::url_has_protocol("some-hostname.com/some-file.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn relative_to_root_path() {
|
||||
assert!(!url::url_has_protocol("/some-file.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_string() {
|
||||
assert!(!url::url_has_protocol(""));
|
||||
}
|
||||
}
|
||||
40
src/tests/url/url_with_fragment.rs
Normal file
40
src/tests/url/url_with_fragment.rs
Normal file
@@ -0,0 +1,40 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn url_with_fragment_url() {
|
||||
let url = "https://localhost.localdomain/path/";
|
||||
let fragment = "test";
|
||||
let assembled_url = url::url_with_fragment(url, fragment);
|
||||
|
||||
assert_eq!(&assembled_url, "https://localhost.localdomain/path/#test");
|
||||
}
|
||||
#[test]
|
||||
fn url_with_fragment_empty_url() {
|
||||
let url = "https://localhost.localdomain/path/";
|
||||
let fragment = "";
|
||||
let assembled_url = url::url_with_fragment(url, fragment);
|
||||
|
||||
assert_eq!(&assembled_url, "https://localhost.localdomain/path/");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn url_with_fragment_data_url() {
|
||||
let url = "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K";
|
||||
let fragment = "fragment";
|
||||
let assembled_url = url::url_with_fragment(url, fragment);
|
||||
|
||||
assert_eq!(
|
||||
&assembled_url,
|
||||
"data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K#fragment"
|
||||
);
|
||||
}
|
||||
}
|
||||
155
src/tests/utils/detect_media_type.rs
Normal file
155
src/tests/utils/detect_media_type.rs
Normal file
@@ -0,0 +1,155 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::utils;
|
||||
|
||||
#[test]
|
||||
fn image_gif87() {
|
||||
assert_eq!(utils::detect_media_type(b"GIF87a", ""), "image/gif");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn image_gif89() {
|
||||
assert_eq!(utils::detect_media_type(b"GIF89a", ""), "image/gif");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn image_jpeg() {
|
||||
assert_eq!(utils::detect_media_type(b"\xFF\xD8\xFF", ""), "image/jpeg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn image_png() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", ""),
|
||||
"image/png"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn image_svg() {
|
||||
assert_eq!(utils::detect_media_type(b"<svg ", ""), "image/svg+xml");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn image_webp() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"RIFF....WEBPVP8 ", ""),
|
||||
"image/webp"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn image_icon() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"\x00\x00\x01\x00", ""),
|
||||
"image/x-icon"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn image_svg_filename() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"<?xml ", "local-file.svg"),
|
||||
"image/svg+xml"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn image_svg_url_uppercase() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"", "https://some-site.com/images/local-file.SVG"),
|
||||
"image/svg+xml"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn audio_mpeg() {
|
||||
assert_eq!(utils::detect_media_type(b"ID3", ""), "audio/mpeg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn audio_mpeg_2() {
|
||||
assert_eq!(utils::detect_media_type(b"\xFF\x0E", ""), "audio/mpeg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn audio_mpeg_3() {
|
||||
assert_eq!(utils::detect_media_type(b"\xFF\x0F", ""), "audio/mpeg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn audio_ogg() {
|
||||
assert_eq!(utils::detect_media_type(b"OggS", ""), "audio/ogg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn audio_wav() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"RIFF....WAVEfmt ", ""),
|
||||
"audio/wav"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn audio_flac() {
|
||||
assert_eq!(utils::detect_media_type(b"fLaC", ""), "audio/x-flac");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn video_avi() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"RIFF....AVI LIST", ""),
|
||||
"video/avi"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn video_mp4() {
|
||||
assert_eq!(utils::detect_media_type(b"....ftyp", ""), "video/mp4");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn video_mpeg() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"\x00\x00\x01\x0B", ""),
|
||||
"video/mpeg"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn video_quicktime() {
|
||||
assert_eq!(utils::detect_media_type(b"....moov", ""), "video/quicktime");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn video_webm() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"\x1A\x45\xDF\xA3", ""),
|
||||
"video/webm"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::utils;
|
||||
|
||||
#[test]
|
||||
fn unknown_media_type() {
|
||||
assert_eq!(utils::detect_media_type(b"abcdef0123456789", ""), "");
|
||||
}
|
||||
}
|
||||
31
src/tests/utils/indent.rs
Normal file
31
src/tests/utils/indent.rs
Normal file
@@ -0,0 +1,31 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::utils;
|
||||
|
||||
#[test]
|
||||
fn zero() {
|
||||
assert_eq!(utils::indent(0), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn one() {
|
||||
assert_eq!(utils::indent(1), " ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two() {
|
||||
assert_eq!(utils::indent(2), " ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn three() {
|
||||
assert_eq!(utils::indent(3), " ");
|
||||
}
|
||||
}
|
||||
3
src/tests/utils/mod.rs
Normal file
3
src/tests/utils/mod.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
mod detect_media_type;
|
||||
mod indent;
|
||||
mod retrieve_asset;
|
||||
141
src/tests/utils/retrieve_asset.rs
Normal file
141
src/tests/utils/retrieve_asset.rs
Normal file
@@ -0,0 +1,141 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
|
||||
use crate::url;
|
||||
use crate::utils;
|
||||
|
||||
#[test]
|
||||
fn read_data_url() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
// If both source and target are data URLs,
|
||||
// ensure the result contains target data URL
|
||||
let (data, final_url, media_type) = utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
"data:text/html;base64,c291cmNl",
|
||||
"data:text/html;base64,dGFyZ2V0",
|
||||
false,
|
||||
0,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
url::data_to_data_url(&media_type, &data, &final_url),
|
||||
url::data_to_data_url("text/html", "target".as_bytes(), "")
|
||||
);
|
||||
assert_eq!(
|
||||
final_url,
|
||||
url::data_to_data_url("text/html", "target".as_bytes(), "")
|
||||
);
|
||||
assert_eq!(&media_type, "text/html");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn read_local_file_with_file_url_parent() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
|
||||
// Inclusion of local assets from local sources should be allowed
|
||||
let cwd = env::current_dir().unwrap();
|
||||
let (data, final_url, _media_type) = utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
&format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-file.html",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd.to_str().unwrap()
|
||||
),
|
||||
&format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-script.js",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd.to_str().unwrap()
|
||||
),
|
||||
false,
|
||||
0,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(url::data_to_data_url("application/javascript", &data, &final_url), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
|
||||
assert_eq!(
|
||||
&final_url,
|
||||
&format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-script.js",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd.to_str().unwrap()
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::utils;
|
||||
|
||||
#[test]
|
||||
fn read_local_file_with_data_url_parent() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
// Inclusion of local assets from data URL sources should not be allowed
|
||||
match utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
"data:text/html;base64,SoUrCe",
|
||||
"file:///etc/passwd",
|
||||
false,
|
||||
0,
|
||||
) {
|
||||
Ok((..)) => {
|
||||
assert!(false);
|
||||
}
|
||||
Err(_) => {
|
||||
assert!(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn read_local_file_with_https_parent() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
// Inclusion of local assets from remote sources should not be allowed
|
||||
match utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
"https://kernel.org/",
|
||||
"file:///etc/passwd",
|
||||
false,
|
||||
0,
|
||||
) {
|
||||
Ok((..)) => {
|
||||
assert!(false);
|
||||
}
|
||||
Err(_) => {
|
||||
assert!(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
168
src/url.rs
Normal file
168
src/url.rs
Normal file
@@ -0,0 +1,168 @@
|
||||
use base64;
|
||||
use url::{form_urlencoded, ParseError, Url};
|
||||
|
||||
use crate::utils::detect_media_type;
|
||||
|
||||
pub fn clean_url<T: AsRef<str>>(input: T) -> String {
|
||||
let mut url = Url::parse(input.as_ref()).unwrap();
|
||||
|
||||
// Clear fragment
|
||||
url.set_fragment(None);
|
||||
|
||||
// Get rid of stray question mark
|
||||
if url.query() == Some("") {
|
||||
url.set_query(None);
|
||||
}
|
||||
|
||||
// Remove empty trailing ampersand(s)
|
||||
let mut result: String = url.to_string();
|
||||
while result.ends_with("&") {
|
||||
result.pop();
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String {
|
||||
let media_type: String = if media_type.is_empty() {
|
||||
detect_media_type(data, &url)
|
||||
} else {
|
||||
media_type.to_string()
|
||||
};
|
||||
|
||||
format!("data:{};base64,{}", media_type, base64::encode(data))
|
||||
}
|
||||
|
||||
pub fn data_url_to_data<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
|
||||
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
|
||||
let path: String = parsed_url.path().to_string();
|
||||
let comma_loc: usize = path.find(',').unwrap_or(path.len());
|
||||
|
||||
let meta_data: String = path.chars().take(comma_loc).collect();
|
||||
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
|
||||
|
||||
let text: String = decode_url(raw_data);
|
||||
|
||||
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
|
||||
let mut media_type: String = str!();
|
||||
let mut encoding: &str = "";
|
||||
|
||||
let mut i: i8 = 0;
|
||||
for item in &meta_data_items {
|
||||
if i == 0 {
|
||||
media_type = str!(item);
|
||||
} else {
|
||||
if item.eq_ignore_ascii_case("base64")
|
||||
|| item.eq_ignore_ascii_case("utf8")
|
||||
|| item.eq_ignore_ascii_case("charset=UTF-8")
|
||||
{
|
||||
encoding = item;
|
||||
}
|
||||
}
|
||||
|
||||
i = i + 1;
|
||||
}
|
||||
|
||||
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
|
||||
base64::decode(&text).unwrap_or(vec![])
|
||||
} else {
|
||||
text.as_bytes().to_vec()
|
||||
};
|
||||
|
||||
(media_type, data)
|
||||
}
|
||||
|
||||
pub fn decode_url(input: String) -> String {
|
||||
let input: String = input.replace("+", "%2B");
|
||||
|
||||
form_urlencoded::parse(input.as_bytes())
|
||||
.map(|(key, val)| {
|
||||
[
|
||||
key.to_string(),
|
||||
if val.to_string().len() == 0 {
|
||||
str!()
|
||||
} else {
|
||||
str!('=')
|
||||
},
|
||||
val.to_string(),
|
||||
]
|
||||
.concat()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn file_url_to_fs_path(url: &str) -> String {
|
||||
if !is_file_url(url) {
|
||||
return str!();
|
||||
}
|
||||
|
||||
let cutoff_l = if cfg!(windows) { 8 } else { 7 };
|
||||
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
|
||||
let url_fragment = get_url_fragment(url);
|
||||
if url_fragment != "" {
|
||||
let max_len = fs_file_path.len() - 1 - url_fragment.len();
|
||||
fs_file_path = fs_file_path[0..max_len].to_string();
|
||||
}
|
||||
|
||||
if cfg!(windows) {
|
||||
fs_file_path = fs_file_path.replace("/", "\\");
|
||||
}
|
||||
|
||||
// File paths should not be %-encoded
|
||||
decode_url(fs_file_path)
|
||||
}
|
||||
|
||||
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
|
||||
if Url::parse(url.as_ref()).unwrap().fragment() == None {
|
||||
str!()
|
||||
} else {
|
||||
str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme() == "data"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme() == "file"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
|
||||
let result = if is_http_url(to.as_ref()) {
|
||||
to.as_ref().to_string()
|
||||
} else {
|
||||
Url::parse(from.as_ref())?
|
||||
.join(to.as_ref())?
|
||||
.as_ref()
|
||||
.to_string()
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme().len() > 0))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn url_with_fragment(url: &str, fragment: &str) -> String {
|
||||
let mut result = str!(&url);
|
||||
|
||||
if !fragment.is_empty() {
|
||||
result += "#";
|
||||
result += fragment;
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
177
src/utils.rs
177
src/utils.rs
@@ -1,14 +1,19 @@
|
||||
extern crate base64;
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
|
||||
use self::base64::encode;
|
||||
use crate::url::{clean_url, data_url_to_data, file_url_to_fs_path, is_data_url, is_file_url};
|
||||
|
||||
static MAGIC: [[&[u8]; 2]; 19] = [
|
||||
const INDENT: &str = " ";
|
||||
|
||||
const MAGIC: [[&[u8]; 2]; 18] = [
|
||||
// Image
|
||||
[b"GIF87a", b"image/gif"],
|
||||
[b"GIF89a", b"image/gif"],
|
||||
[b"\xFF\xD8\xFF", b"image/jpeg"],
|
||||
[b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],
|
||||
[b"<?xml ", b"image/svg+xml"],
|
||||
[b"<svg ", b"image/svg+xml"],
|
||||
[b"RIFF....WEBPVP8 ", b"image/webp"],
|
||||
[b"\x00\x00\x01\x00", b"image/x-icon"],
|
||||
@@ -27,66 +32,122 @@ static MAGIC: [[&[u8]; 2]; 19] = [
|
||||
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
||||
];
|
||||
|
||||
pub fn data_to_dataurl(mime: &str, data: &[u8]) -> String {
|
||||
let mimetype = if mime == "" {
|
||||
detect_mimetype(data)
|
||||
} else {
|
||||
mime.to_string()
|
||||
};
|
||||
format!("data:{};base64,{}", mimetype, encode(data))
|
||||
}
|
||||
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
|
||||
"image/svg+xml",
|
||||
"text/css",
|
||||
"text/html",
|
||||
"text/javascript",
|
||||
"text/plain",
|
||||
];
|
||||
|
||||
fn detect_mimetype(data: &[u8]) -> String {
|
||||
let mut re = String::new();
|
||||
|
||||
for item in MAGIC.iter() {
|
||||
pub fn detect_media_type(data: &[u8], url: &str) -> String {
|
||||
for item in MAGIC.iter() {
|
||||
if data.starts_with(item[0]) {
|
||||
re = String::from_utf8(item[1].to_vec()).unwrap();
|
||||
break;
|
||||
return String::from_utf8(item[1].to_vec()).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
re
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_data_to_dataurl() {
|
||||
let mime = "application/javascript";
|
||||
let data = "var word = 'hello';\nalert(word);\n";
|
||||
let datauri = data_to_dataurl(mime, data.as_bytes());
|
||||
assert_eq!(
|
||||
&datauri,
|
||||
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
|
||||
);
|
||||
if url.to_lowercase().ends_with(".svg") {
|
||||
return str!("image/svg+xml");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_mimetype() {
|
||||
// Image
|
||||
assert_eq!(detect_mimetype(b"GIF87a"), "image/gif");
|
||||
assert_eq!(detect_mimetype(b"GIF89a"), "image/gif");
|
||||
assert_eq!(detect_mimetype(b"\xFF\xD8\xFF"), "image/jpeg");
|
||||
assert_eq!(detect_mimetype(b"\x89PNG\x0D\x0A\x1A\x0A"), "image/png");
|
||||
assert_eq!(detect_mimetype(b"<?xml "), "image/svg+xml");
|
||||
assert_eq!(detect_mimetype(b"<svg "), "image/svg+xml");
|
||||
assert_eq!(detect_mimetype(b"RIFF....WEBPVP8 "), "image/webp");
|
||||
assert_eq!(detect_mimetype(b"\x00\x00\x01\x00"), "image/x-icon");
|
||||
// Audio
|
||||
assert_eq!(detect_mimetype(b"ID3"), "audio/mpeg");
|
||||
assert_eq!(detect_mimetype(b"\xFF\x0E"), "audio/mpeg");
|
||||
assert_eq!(detect_mimetype(b"\xFF\x0F"), "audio/mpeg");
|
||||
assert_eq!(detect_mimetype(b"OggS"), "audio/ogg");
|
||||
assert_eq!(detect_mimetype(b"RIFF....WAVEfmt "), "audio/wav");
|
||||
assert_eq!(detect_mimetype(b"fLaC"), "audio/x-flac");
|
||||
// Video
|
||||
assert_eq!(detect_mimetype(b"RIFF....AVI LIST"), "video/avi");
|
||||
assert_eq!(detect_mimetype(b"....ftyp"), "video/mp4");
|
||||
assert_eq!(detect_mimetype(b"\x00\x00\x01\x0B"), "video/mpeg");
|
||||
assert_eq!(detect_mimetype(b"....moov"), "video/quicktime");
|
||||
assert_eq!(detect_mimetype(b"\x1A\x45\xDF\xA3"), "video/webm");
|
||||
str!()
|
||||
}
|
||||
|
||||
pub fn is_plaintext_media_type(media_type: &str) -> bool {
|
||||
PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
|
||||
}
|
||||
|
||||
pub fn indent(level: u32) -> String {
|
||||
let mut result = str!();
|
||||
let mut l: u32 = level;
|
||||
while l > 0 {
|
||||
result += INDENT;
|
||||
l -= 1;
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub fn retrieve_asset(
|
||||
cache: &mut HashMap<String, Vec<u8>>,
|
||||
client: &Client,
|
||||
parent_url: &str,
|
||||
url: &str,
|
||||
opt_silent: bool,
|
||||
depth: u32,
|
||||
) -> Result<(Vec<u8>, String, String), reqwest::Error> {
|
||||
if url.len() == 0 {
|
||||
// Provoke error
|
||||
client.get("").send()?;
|
||||
}
|
||||
|
||||
if is_data_url(&url) {
|
||||
let (media_type, data) = data_url_to_data(url);
|
||||
Ok((data, url.to_string(), media_type))
|
||||
} else if is_file_url(&url) {
|
||||
// Check if parent_url is also file:///
|
||||
// (if not, then we don't embed the asset)
|
||||
if !is_file_url(&parent_url) {
|
||||
// Provoke error
|
||||
client.get("").send()?;
|
||||
}
|
||||
|
||||
let fs_file_path: String = file_url_to_fs_path(url);
|
||||
let path = Path::new(&fs_file_path);
|
||||
if path.exists() {
|
||||
if !opt_silent {
|
||||
eprintln!("{}{}", indent(depth).as_str(), &url);
|
||||
}
|
||||
|
||||
Ok((fs::read(&fs_file_path).expect(""), url.to_string(), str!()))
|
||||
} else {
|
||||
// Provoke error
|
||||
Err(client.get("").send().unwrap_err())
|
||||
}
|
||||
} else {
|
||||
let cache_key: String = clean_url(&url);
|
||||
|
||||
if cache.contains_key(&cache_key) {
|
||||
// URL is in cache, we get and return it
|
||||
if !opt_silent {
|
||||
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
|
||||
}
|
||||
|
||||
Ok((
|
||||
cache.get(&cache_key).unwrap().to_vec(),
|
||||
url.to_string(),
|
||||
str!(),
|
||||
))
|
||||
} else {
|
||||
// URL not in cache, we retrieve the file
|
||||
let mut response = client.get(url).send()?;
|
||||
let res_url = response.url().to_string();
|
||||
|
||||
if !opt_silent {
|
||||
if url == res_url {
|
||||
eprintln!("{}{}", indent(depth).as_str(), &url);
|
||||
} else {
|
||||
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &res_url);
|
||||
}
|
||||
}
|
||||
|
||||
let new_cache_key: String = clean_url(&res_url);
|
||||
|
||||
// Convert response into a byte array
|
||||
let mut data: Vec<u8> = vec![];
|
||||
response.copy_to(&mut data)?;
|
||||
|
||||
// Attempt to obtain media type by reading the Content-Type header
|
||||
let media_type = response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|header| header.to_str().ok())
|
||||
.unwrap_or("");
|
||||
|
||||
// Add to cache
|
||||
cache.insert(new_cache_key, data.clone());
|
||||
|
||||
Ok((data, res_url, media_type.to_string()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user