Compare commits
178 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3d678d80ee | ||
|
|
19a87f426e | ||
|
|
cbe3f9f554 | ||
|
|
b6a44c64cf | ||
|
|
84e2dd789c | ||
|
|
ac4945ca97 | ||
|
|
2ca2c7aff8 | ||
|
|
a18df74946 | ||
|
|
2bc8414cc1 | ||
|
|
c4569343a4 | ||
|
|
5f5820c71a | ||
|
|
4719a6fecf | ||
|
|
c999359b9f | ||
|
|
f22e2b6e68 | ||
|
|
31a9550f5b | ||
|
|
201f2d61b9 | ||
|
|
3ae4dfae8e | ||
|
|
7b095fe4ff | ||
|
|
890bcb1bb6 | ||
|
|
aa97ea9f82 | ||
|
|
9b40dbbf27 | ||
|
|
289f3e801b | ||
|
|
edacd09dc8 | ||
|
|
5682863725 | ||
|
|
4304d7a638 | ||
|
|
f56f88da94 | ||
|
|
87c8b361ea | ||
|
|
cd505ddb6c | ||
|
|
eeea617fb1 | ||
|
|
cc6dbddb49 | ||
|
|
9d3df2cdc6 | ||
|
|
ab601c3830 | ||
|
|
3738be2b6d | ||
|
|
53160f01c7 | ||
|
|
594ad55bd8 | ||
|
|
d2615f51dc | ||
|
|
c097733ae7 | ||
|
|
67d4b7dafc | ||
|
|
b1d6bbce0c | ||
|
|
20124f4891 | ||
|
|
0dd540afaf | ||
|
|
df71083359 | ||
|
|
349c7bb3ea | ||
|
|
5a30c6b44b | ||
|
|
929924accd | ||
|
|
812b46960c | ||
|
|
874080dbda | ||
|
|
93dd9d4ed4 | ||
|
|
3f0ced0143 | ||
|
|
8112ab6d04 | ||
|
|
e5fc05f5cd | ||
|
|
1068ff659a | ||
|
|
d4d9bbe424 | ||
|
|
cf3a8c8ede | ||
|
|
920d992459 | ||
|
|
c61b3ba858 | ||
|
|
dc6e564ea2 | ||
|
|
24536b5e18 | ||
|
|
908fd59019 | ||
|
|
a19aa37ea8 | ||
|
|
c46bd5900b | ||
|
|
5f98ed23b3 | ||
|
|
c6b135398a | ||
|
|
791e44796e | ||
|
|
b428dd8471 | ||
|
|
b88479446c | ||
|
|
1d6217ef5a | ||
|
|
746c7f05de | ||
|
|
29836d979a | ||
|
|
5ba6e33fa8 | ||
|
|
643c4ce7ef | ||
|
|
c011f90b76 | ||
|
|
875481b9a2 | ||
|
|
05275d864c | ||
|
|
4951fea730 | ||
|
|
b8315a7bd5 | ||
|
|
be25784297 | ||
|
|
b0f1c39175 | ||
|
|
f27d5fa23e | ||
|
|
4f2944a600 | ||
|
|
479c42e1ce | ||
|
|
933379c798 | ||
|
|
061386ccc2 | ||
|
|
59a8be493d | ||
|
|
a653bbe7d4 | ||
|
|
c7aab235d9 | ||
|
|
60ef631315 | ||
|
|
b800947151 | ||
|
|
808ce3e722 | ||
|
|
a92bba4ec5 | ||
|
|
a445098409 | ||
|
|
224d4fc480 | ||
|
|
d5ee8ae6ab | ||
|
|
c16e80f507 | ||
|
|
1c1f2c7128 | ||
|
|
efba6a048d | ||
|
|
1701425003 | ||
|
|
7654eec7e2 | ||
|
|
00942e0b1d | ||
|
|
0d1e21e9ad | ||
|
|
3d2d40e7cd | ||
|
|
b8b6d8cff6 | ||
|
|
928664dc88 | ||
|
|
5c8d75539b | ||
|
|
ee2055a2a3 | ||
|
|
b4c46c59d4 | ||
|
|
8574b7899b | ||
|
|
969bfbdd59 | ||
|
|
63f3a204a6 | ||
|
|
094be09e90 | ||
|
|
23ceaed493 | ||
|
|
d9602e25eb | ||
|
|
0c50aa223b | ||
|
|
e5425ee9d0 | ||
|
|
f720fe0176 | ||
|
|
727a5a410c | ||
|
|
23af174822 | ||
|
|
5ef2b7c9dc | ||
|
|
1e8348543a | ||
|
|
f9bafe092d | ||
|
|
f876e9243c | ||
|
|
b6896febf1 | ||
|
|
29d2ba5857 | ||
|
|
8b1ebc7871 | ||
|
|
d753c83c76 | ||
|
|
47a825f5ed | ||
|
|
0e12cecd85 | ||
|
|
d8def879b2 | ||
|
|
0420854ed6 | ||
|
|
d47482fcd9 | ||
|
|
b68624f2f3 | ||
|
|
a9d114d04d | ||
|
|
4e4ebe9c98 | ||
|
|
429217d8f7 | ||
|
|
1779f4a374 | ||
|
|
26e89ae6d3 | ||
|
|
b333d19d04 | ||
|
|
c1dc798ded | ||
|
|
69d99b69e8 | ||
|
|
aae53d20f0 | ||
|
|
14cf2ce8a6 | ||
|
|
05985583f0 | ||
|
|
651fa716b4 | ||
|
|
67b79e92f9 | ||
|
|
b51f41fe34 | ||
|
|
6f158dc6db | ||
|
|
8d7052b39c | ||
|
|
660511b8a0 | ||
|
|
9be3982dc6 | ||
|
|
27c9fb4cd3 | ||
|
|
929512f4f5 | ||
|
|
a46d89cefc | ||
|
|
f93646e17a | ||
|
|
9d14b6dfea | ||
|
|
9783b96524 | ||
|
|
106efe58ce | ||
|
|
6e99ad13e7 | ||
|
|
413dd66886 | ||
|
|
dc7ec6e7a8 | ||
|
|
ed879231af | ||
|
|
ddf4b8ac13 | ||
|
|
84c13f0605 | ||
|
|
ce03e0e487 | ||
|
|
63e19998d0 | ||
|
|
e3321bbb07 | ||
|
|
0a38cd0eae | ||
|
|
75fb6961ed | ||
|
|
5ba8931502 | ||
|
|
13d2ea1607 | ||
|
|
88ffde0c3b | ||
|
|
bfb97bd062 | ||
|
|
295931041c | ||
|
|
2e623dd9f8 | ||
|
|
169b9657e5 | ||
|
|
dab4ae6965 | ||
|
|
c7fc121c7c | ||
|
|
292221ea28 | ||
|
|
feb37f5812 |
22
.github/workflows/build_gnu_linux.yml
vendored
Normal file
22
.github/workflows/build_gnu_linux.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
name: GNU/Linux
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
os:
|
||||
- ubuntu-latest
|
||||
rust:
|
||||
- stable
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- run: git config --global core.autocrlf false
|
||||
- uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: cargo build --all --locked --verbose
|
||||
22
.github/workflows/build_macos.yml
vendored
Normal file
22
.github/workflows/build_macos.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
name: macOS
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
os:
|
||||
- macos-latest
|
||||
rust:
|
||||
- stable
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- run: git config --global core.autocrlf false
|
||||
- uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: cargo build --all --locked --verbose
|
||||
22
.github/workflows/build_windows.yml
vendored
Normal file
22
.github/workflows/build_windows.yml
vendored
Normal file
@@ -0,0 +1,22 @@
|
||||
name: Windows
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
os:
|
||||
- windows-latest
|
||||
rust:
|
||||
- stable
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- run: git config --global core.autocrlf false
|
||||
- uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: cargo build --all --locked --verbose
|
||||
54
.github/workflows/cd.yml
vendored
Normal file
54
.github/workflows/cd.yml
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
# CD GitHub Actions workflow for Monolith
|
||||
|
||||
name: CD
|
||||
|
||||
on:
|
||||
release:
|
||||
types:
|
||||
- created
|
||||
|
||||
jobs:
|
||||
|
||||
windows:
|
||||
runs-on: windows-2019
|
||||
steps:
|
||||
- run: git config --global core.autocrlf false
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v2
|
||||
- name: Build and install the executable
|
||||
run: cargo build --release
|
||||
- uses: Shopify/upload-to-release@1.0.0
|
||||
with:
|
||||
name: monolith.exe
|
||||
path: target\release\monolith.exe
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
gnu_linux_armhf:
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v2
|
||||
- name: Prepare cross-platform environment
|
||||
run: |
|
||||
sudo mkdir -p /cross-build-arm
|
||||
sudo touch /etc/apt/sources.list.d/armhf.list
|
||||
echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ bionic main" | sudo tee -a /etc/apt/sources.list.d/armhf.list
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y gcc-arm-linux-gnueabihf libc6-armhf-cross libc6-dev-armhf-cross
|
||||
sudo apt-get download libssl1.1:armhf libssl-dev:armhf
|
||||
sudo dpkg -x libssl1.1*.deb /cross-build-arm
|
||||
sudo dpkg -x libssl-dev*.deb /cross-build-arm
|
||||
rustup target add arm-unknown-linux-gnueabihf
|
||||
echo "::set-env name=C_INCLUDE_PATH::/cross-build-arm/usr/include"
|
||||
echo "::set-env name=OPENSSL_INCLUDE_DIR::/cross-build-arm/usr/include/arm-linux-gnueabihf"
|
||||
echo "::set-env name=OPENSSL_LIB_DIR::/cross-build-arm/usr/lib/arm-linux-gnueabihf"
|
||||
echo "::set-env name=PKG_CONFIG_ALLOW_CROSS::1"
|
||||
echo "::set-env name=RUSTFLAGS::-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build-arm/usr/lib/arm-linux-gnueabihf -L/cross-build-arm/lib/arm-linux-gnueabihf"
|
||||
- name: Build the executable
|
||||
run: cargo build --release --target=arm-unknown-linux-gnueabihf
|
||||
- name: Attach artifact to the release
|
||||
uses: Shopify/upload-to-release@1.0.0
|
||||
with:
|
||||
name: monolith-gnu-linux-armhf
|
||||
path: target/arm-unknown-linux-gnueabihf/release/monolith
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
32
.github/workflows/ci.yml
vendored
Normal file
32
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,32 @@
|
||||
name: CI
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
|
||||
jobs:
|
||||
build_and_test:
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
os:
|
||||
- ubuntu-latest
|
||||
- macos-latest
|
||||
- windows-latest
|
||||
rust:
|
||||
- stable
|
||||
- beta
|
||||
- nightly
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- run: git config --global core.autocrlf false
|
||||
- uses: actions/checkout@v2
|
||||
- name: Build
|
||||
run: cargo build --all --locked --verbose
|
||||
- name: Run tests
|
||||
run: cargo test --all --locked --verbose
|
||||
- name: Check code formatting
|
||||
run: |
|
||||
rustup component add rustfmt
|
||||
cargo fmt --all -- --check
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -4,6 +4,3 @@
|
||||
|
||||
# These are backup files generated by rustfmt
|
||||
**/*.rs.bk
|
||||
|
||||
# Exclude accidental HTML files
|
||||
*.html
|
||||
|
||||
26
.travis.yml
26
.travis.yml
@@ -1,26 +0,0 @@
|
||||
language: rust
|
||||
cache: cargo
|
||||
|
||||
sudo: false
|
||||
|
||||
os:
|
||||
- linux
|
||||
- osx
|
||||
|
||||
rust:
|
||||
- stable
|
||||
- beta
|
||||
- nightly
|
||||
|
||||
before_script:
|
||||
- rustup component add rustfmt
|
||||
|
||||
script:
|
||||
- cargo build --all --locked --verbose
|
||||
- cargo test --all --locked --verbose
|
||||
- cargo fmt --all -- --check
|
||||
|
||||
jobs:
|
||||
allow_failures:
|
||||
- rust: nightly
|
||||
fast_finish: true
|
||||
1100
Cargo.lock
generated
1100
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
23
Cargo.toml
23
Cargo.toml
@@ -1,19 +1,30 @@
|
||||
[package]
|
||||
name = "monolith"
|
||||
version = "2.1.0"
|
||||
version = "2.2.6"
|
||||
edition = "2018"
|
||||
authors = [
|
||||
"Sunshine <sunshine@uberspace.net>",
|
||||
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
|
||||
"Emmanuel Delaborde <th3rac25@gmail.com>",
|
||||
"Emi Simpson <emi@alchemi.dev>",
|
||||
"rhysd <lin90162@yahoo.co.jp>",
|
||||
]
|
||||
description = "CLI tool for saving web pages as a single HTML file"
|
||||
|
||||
[dependencies]
|
||||
base64 = "0.10.1"
|
||||
base64 = "0.12.0"
|
||||
clap = "2.33.0"
|
||||
cssparser = "0.27.2"
|
||||
html5ever = "0.24.1"
|
||||
lazy_static = "1.4.0"
|
||||
regex = "1.3.1"
|
||||
reqwest = "0.9.20"
|
||||
url = "2.1.0"
|
||||
sha2 = "0.8.1" # Used in calculating checksums during integrity checks
|
||||
time = "0.1.42" # Used to render comments indicating the time the page was saved
|
||||
url = "2.1.1"
|
||||
|
||||
[dependencies.reqwest]
|
||||
version = "0.10.*"
|
||||
default-features = false
|
||||
features = ["default-tls", "blocking", "gzip"]
|
||||
|
||||
[dev-dependencies]
|
||||
assert_cmd = "0.12.0"
|
||||
tempfile = "3.1.0"
|
||||
|
||||
18
Dockerfile
Normal file
18
Dockerfile
Normal file
@@ -0,0 +1,18 @@
|
||||
FROM rust
|
||||
|
||||
WORKDIR /usr/local/src/
|
||||
RUN curl -s https://api.github.com/repos/y2z/monolith/releases/latest \
|
||||
| grep "tarball_url.*\"," \
|
||||
| cut -d '"' -f 4 \
|
||||
| wget -qi - -O monolith.tar.gz
|
||||
|
||||
RUN tar xfz monolith.tar.gz \
|
||||
&& mv Y2Z-monolith-* monolith \
|
||||
&& rm monolith.tar.gz
|
||||
|
||||
WORKDIR /usr/local/src/monolith
|
||||
RUN ls -a
|
||||
RUN make install
|
||||
|
||||
WORKDIR /tmp
|
||||
CMD ["/usr/local/cargo/bin/monolith"]
|
||||
21
Makefile
21
Makefile
@@ -1,16 +1,25 @@
|
||||
.PHONY: all build install run test lint
|
||||
# Makefile for monolith
|
||||
|
||||
all: test build
|
||||
all: build
|
||||
.PHONY: all
|
||||
|
||||
build:
|
||||
@cargo build --locked
|
||||
.PHONY: build
|
||||
|
||||
install:
|
||||
@cargo install --force --locked --path .
|
||||
|
||||
test:
|
||||
test: build
|
||||
@cargo test --locked
|
||||
@cargo fmt --all -- --check
|
||||
.PHONY: test_code_formatting
|
||||
|
||||
lint:
|
||||
@cargo fmt --all --
|
||||
.PHONY: lint
|
||||
|
||||
install:
|
||||
@cargo install --force --locked --path .
|
||||
.PHONY: install
|
||||
|
||||
uninstall:
|
||||
@cargo uninstall
|
||||
.PHONY: uninstall
|
||||
|
||||
69
README.md
69
README.md
@@ -1,5 +1,6 @@
|
||||
[](https://travis-ci.org/Y2Z/monolith)
|
||||
[](https://ci.appveyor.com/project/snshn/monolith/branch/master)
|
||||
[](https://github.com/Y2Z/monolith/actions?query=workflow%3AGNU%2FLinux)
|
||||
[](https://github.com/Y2Z/monolith/actions?query=workflow%3AmacOS)
|
||||
[](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows)
|
||||
|
||||
```
|
||||
___ ___________ __________ ___________________ ___
|
||||
@@ -11,42 +12,76 @@
|
||||
|___| |__________| \____________________| |___| |___| |___|
|
||||
```
|
||||
|
||||
A data hoarder's dream come true: bundle any web page into a single HTML file.
|
||||
You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
|
||||
A data hoarder’s dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
|
||||
|
||||
Unlike the conventional "Save page as", `monolith` not only saves the target document, it embeds CSS, image, and JavaScript assets **all at once**, producing a single HTML5 document that is a joy to store and share.
|
||||
Unlike the conventional “Save page as”, `monolith` not only saves the target document, it embeds CSS, image, and JavaScript assets **all at once**, producing a single HTML5 document that is a joy to store and share.
|
||||
|
||||
If compared to saving websites with `wget -mpk`, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available.
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
## Installation
|
||||
|
||||
### From source
|
||||
$ git clone https://github.com/Y2Z/monolith.git
|
||||
$ cd monolith
|
||||
$ cargo install --path .
|
||||
|
||||
### On macOS (via Homebrew)
|
||||
#### Via Homebrew (on macOS and GNU/Linux)
|
||||
$ brew install monolith
|
||||
|
||||
#### Using Snapcraft (on GNU/Linux)
|
||||
$ snap install monolith
|
||||
|
||||
#### From source
|
||||
|
||||
Dependency: `libssl-dev`
|
||||
|
||||
$ git clone https://github.com/Y2Z/monolith.git
|
||||
$ cd monolith
|
||||
$ make install
|
||||
|
||||
#### With Docker
|
||||
The guide can be found [here](docs/containers.md)
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
## Usage
|
||||
$ monolith https://lyrics.github.io/db/p/portishead/dummy/roads/ > portishead-roads-lyrics.html
|
||||
$ monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roads-lyrics.html
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
## Options
|
||||
- `-c`: Ignore styles
|
||||
- `-f`: Exclude iframes
|
||||
- `-f`: Exclude frames and iframes
|
||||
- `-F`: Omit web fonts
|
||||
- `-i`: Remove images
|
||||
- `-I`: Isolate document
|
||||
- `-I`: Isolate the document
|
||||
- `-j`: Exclude JavaScript
|
||||
- `-k`: Accept invalid X.509 (TLS) certificates
|
||||
- `-o`: Write output to file
|
||||
- `-s`: Silent mode
|
||||
- `-u`: Specify custom User-Agent
|
||||
- `-t`: Set custom network request timeout
|
||||
- `-u`: Provide own User-Agent
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
## HTTPS and HTTP proxies
|
||||
Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables.
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
## Contributing
|
||||
Please open an issue if something is wrong, that helps make this project better.
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
## Related projects
|
||||
- `Monolith Chrome Extension`: https://github.com/rhysd/monolith-of-web
|
||||
- `Pagesaver`: https://github.com/distributed-mind/pagesaver
|
||||
- `SingleFile`: https://github.com/gildas-lormeau/SingleFile
|
||||
- `Personal WayBack Machine`: https://github.com/popey/pwbm
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
## License
|
||||
The Unlicense
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
<!-- Microtext -->
|
||||
<sub>Keep in mind that `monolith` is not aware of your browser's session</sub>
|
||||
<sub>Keep in mind that `monolith` is not aware of your browser’s session</sub>
|
||||
|
||||
130
appveyor.yml
130
appveyor.yml
@@ -1,130 +0,0 @@
|
||||
# Appveyor configuration template for Rust using rustup for Rust installation
|
||||
# https://github.com/starkat99/appveyor-rust
|
||||
|
||||
## Operating System (VM environment) ##
|
||||
|
||||
# Rust needs at least Visual Studio 2013 Appveyor OS for MSVC targets.
|
||||
os: Visual Studio 2015
|
||||
|
||||
## Build Matrix ##
|
||||
|
||||
# This configuration will setup a build for each channel & target combination (12 windows
|
||||
# combinations in all).
|
||||
#
|
||||
# There are 3 channels: stable, beta, and nightly.
|
||||
#
|
||||
# Alternatively, the full version may be specified for the channel to build using that specific
|
||||
# version (e.g. channel: 1.5.0)
|
||||
#
|
||||
# The values for target are the set of windows Rust build targets. Each value is of the form
|
||||
#
|
||||
# ARCH-pc-windows-TOOLCHAIN
|
||||
#
|
||||
# Where ARCH is the target architecture, either x86_64 or i686, and TOOLCHAIN is the linker
|
||||
# toolchain to use, either msvc or gnu. See https://www.rust-lang.org/downloads.html#win-foot for
|
||||
# a description of the toolchain differences.
|
||||
# See https://github.com/rust-lang-nursery/rustup.rs/#toolchain-specification for description of
|
||||
# toolchains and host triples.
|
||||
#
|
||||
# Comment out channel/target combos you do not wish to build in CI.
|
||||
#
|
||||
# You may use the `cargoflags` and `RUSTFLAGS` variables to set additional flags for cargo commands
|
||||
# and rustc, respectively. For instance, you can uncomment the cargoflags lines in the nightly
|
||||
# channels to enable unstable features when building for nightly. Or you could add additional
|
||||
# matrix entries to test different combinations of features.
|
||||
environment:
|
||||
matrix:
|
||||
|
||||
### MSVC Toolchains ###
|
||||
|
||||
# Stable 64-bit MSVC
|
||||
- channel: stable
|
||||
target: x86_64-pc-windows-msvc
|
||||
# Stable 32-bit MSVC
|
||||
- channel: stable
|
||||
target: i686-pc-windows-msvc
|
||||
# Beta 64-bit MSVC
|
||||
- channel: beta
|
||||
target: x86_64-pc-windows-msvc
|
||||
# Beta 32-bit MSVC
|
||||
- channel: beta
|
||||
target: i686-pc-windows-msvc
|
||||
# Nightly 64-bit MSVC
|
||||
- channel: nightly
|
||||
target: x86_64-pc-windows-msvc
|
||||
#cargoflags: --features "unstable"
|
||||
# Nightly 32-bit MSVC
|
||||
- channel: nightly
|
||||
target: i686-pc-windows-msvc
|
||||
#cargoflags: --features "unstable"
|
||||
|
||||
### GNU Toolchains ###
|
||||
|
||||
# Stable 64-bit GNU
|
||||
- channel: stable
|
||||
target: x86_64-pc-windows-gnu
|
||||
MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
|
||||
# Stable 32-bit GNU
|
||||
- channel: stable
|
||||
target: i686-pc-windows-gnu
|
||||
MINGW_PATH: 'C:\MinGW\bin'
|
||||
# Beta 64-bit GNU
|
||||
- channel: beta
|
||||
target: x86_64-pc-windows-gnu
|
||||
MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
|
||||
# Beta 32-bit GNU
|
||||
- channel: beta
|
||||
target: i686-pc-windows-gnu
|
||||
MINGW_PATH: 'C:\MinGW\bin'
|
||||
# Nightly 64-bit GNU
|
||||
- channel: nightly
|
||||
target: x86_64-pc-windows-gnu
|
||||
MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
|
||||
#cargoflags: --features "unstable"
|
||||
# Nightly 32-bit GNU
|
||||
- channel: nightly
|
||||
target: i686-pc-windows-gnu
|
||||
MINGW_PATH: 'C:\MinGW\bin'
|
||||
#cargoflags: --features "unstable"
|
||||
|
||||
### Allowed failures ###
|
||||
|
||||
# See Appveyor documentation for specific details. In short, place any channel or targets you wish
|
||||
# to allow build failures on (usually nightly at least is a wise choice). This will prevent a build
|
||||
# or test failure in the matching channels/targets from failing the entire build.
|
||||
matrix:
|
||||
allow_failures:
|
||||
- channel: nightly
|
||||
|
||||
# If you only care about stable channel build failures, uncomment the following line:
|
||||
#- channel: beta
|
||||
|
||||
## Install Script ##
|
||||
|
||||
# This is the most important part of the Appveyor configuration. This installs the version of Rust
|
||||
# specified by the 'channel' and 'target' environment variables from the build matrix. This uses
|
||||
# rustup to install Rust.
|
||||
#
|
||||
# For simple configurations, instead of using the build matrix, you can simply set the
|
||||
# default-toolchain and default-host manually here.
|
||||
install:
|
||||
- appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
|
||||
- rustup-init -yv --default-toolchain %channel% --default-host %target%
|
||||
- set PATH=%PATH%;%USERPROFILE%\.cargo\bin
|
||||
- if defined MINGW_PATH set PATH=%PATH%;%MINGW_PATH%
|
||||
- rustc -vV
|
||||
- cargo -vV
|
||||
- rustup component add rustfmt
|
||||
|
||||
## Build Script ##
|
||||
|
||||
# 'cargo test' takes care of building for us, so disable Appveyor's build stage. This prevents
|
||||
# the "directory does not contain a project or solution file" error.
|
||||
build: false
|
||||
|
||||
# Uses 'cargo test' to run tests and build. Alternatively, the project may call compiled programs
|
||||
#directly or perform other testing commands. Rust will automatically be placed in the PATH
|
||||
# environment variable.
|
||||
test_script:
|
||||
- cargo test --verbose %cargoflags%
|
||||
- cargo fmt --all -- --check
|
||||
19
docs/arch/0001-record-architecture-decisions.md
Normal file
19
docs/arch/0001-record-architecture-decisions.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# 1. Record architecture decisions
|
||||
|
||||
Date: 2019-12-25
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
We need to record the architectural decisions made on this project.
|
||||
|
||||
## Decision
|
||||
|
||||
We will use Architecture Decision Records, as [described by Michael Nygard](http://thinkrelevance.com/blog/2011/11/15/documenting-architecture-decisions).
|
||||
|
||||
## Consequences
|
||||
|
||||
See Michael Nygard's article, linked above. For a lightweight ADR toolset, see Nat Pryce's [adr-tools](https://github.com/npryce/adr-tools).
|
||||
19
docs/arch/0002-noscript-nodes.md
Normal file
19
docs/arch/0002-noscript-nodes.md
Normal file
@@ -0,0 +1,19 @@
|
||||
# 2. NOSCRIPT nodes
|
||||
|
||||
Date: 2020-04-16
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
HTML pages sometimes contain NOSCRIPT nodes, which reveal their contents only in case when JavaScript is not available. Most of the time they contain hidden messages that inform about certain JavaScript-dependent features not being operational, however sometimes can also feature media assets or even iframes.
|
||||
|
||||
## Decision
|
||||
|
||||
When the document is being saved with or without JavaScript, each NOSCRIPT node should be preserved while its children need to be processed exactly the same way as the rest of the document. This approach will ensure that even hidden remote assets are embedded — since those hidden elements may have to be displayed later in a browser that has JavaScript turned off. An option should be available to "unwrap" all NOSCRIPT nodes in order to make their contents always visible in the document, complimenting the "disable JS" function of the program.
|
||||
|
||||
## Consequences
|
||||
|
||||
Saved documents will have contents of all NOSCRIPT nodes processed as if they are part of the document's DOM, therefore properly display images encapsulated within NOSCRIPT nodes when being viewed in browsers that have JavaScript turned off (or have no JavaScript support in the first place). The new option to "unwrap" NOSCRIPT elements will help the user ensure that the resulting document always represents what the original web page looked like in a browser that had JavaScript turned off.
|
||||
25
docs/arch/0003-network-request-timeout.md
Normal file
25
docs/arch/0003-network-request-timeout.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# 2. Network request timeout
|
||||
|
||||
Date: 2020-02-15
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
A slow network connection and overloaded server may negatively impact network response time.
|
||||
|
||||
## Decision
|
||||
|
||||
Make the program simulate behavior of popular web browsers and CLI tools, where
|
||||
the default network response timeout is most often set to 120 seconds.
|
||||
|
||||
Instead of featuring retries for timed out network requests, the program
|
||||
should have an option to adjust the timeout length, along with making it
|
||||
indefinite when given "0" as its value.
|
||||
|
||||
## Consequences
|
||||
|
||||
The user is able to retrieve resources that have long response time, as well as obtain
|
||||
full control over how soon, and if at all, network requests should time out.
|
||||
25
docs/arch/0005-asset-minimization.md
Normal file
25
docs/arch/0005-asset-minimization.md
Normal file
@@ -0,0 +1,25 @@
|
||||
# 4. Asset Minimization
|
||||
|
||||
Date: 2020-03-14
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
It may look like a good idea to make monolith compress retrieved assets while
|
||||
saving the page for the purpose of reducing the resulting document's file size.
|
||||
|
||||
## Decision
|
||||
|
||||
Given that the main purpose of this program is to save pages in a convenient to store and share manner — it's mostly an archiving tool,
|
||||
aside from being able to tell monolith to exclude certain types of asests (e.g. images, CSS, JavaScript),
|
||||
it would be outside of scope of this program to implement code for compressing assets. Minimizing files before embedding them
|
||||
does not reduce the amount of data that needs to be transferred either. A separate tool can be used later to compress and minimize pages
|
||||
saved by monolith, if needed.
|
||||
|
||||
## Consequences
|
||||
|
||||
Monolith will not support modification of original document assets for the purpose of reducing their size, sticking to performing only minimal
|
||||
amount of modifications to the original web page — whatever is needed to provide security or exclude unwanted asset types.
|
||||
15
docs/containers.md
Normal file
15
docs/containers.md
Normal file
@@ -0,0 +1,15 @@
|
||||
1. Run `docker build -t y2z/monolith .` to create a Docker image
|
||||
|
||||
2. Create a file named `monolith` which contains:
|
||||
```sh
|
||||
#!/bin/sh
|
||||
|
||||
docker run --rm \
|
||||
y2z/monolith \
|
||||
monolith \
|
||||
"$@"
|
||||
```
|
||||
3. Make the file executable (`chmod +x monolith`) and include it into your `$PATH`
|
||||
4. Now you should be able to run a containerized build of monolith like this:
|
||||
`monolith -I https://github.com > document.html`
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
name: monolith
|
||||
base: core18
|
||||
version: git
|
||||
# Version data defined inside the monolith part below
|
||||
adopt-info: monolith
|
||||
summary: Monolith - Save HTML pages with ease
|
||||
description: |
|
||||
A data hoarder's dream come true: bundle any web page into a single
|
||||
@@ -17,6 +18,14 @@ description: |
|
||||
|
||||
confinement: strict
|
||||
|
||||
# Building on armhf fails, so we specify all supported non-armhf architectures
|
||||
architectures:
|
||||
- build-on: amd64
|
||||
- build-on: i386
|
||||
- build-on: arm64
|
||||
- build-on: ppc64el
|
||||
- build-on: s390x
|
||||
|
||||
parts:
|
||||
monolith:
|
||||
plugin: rust
|
||||
@@ -24,6 +33,21 @@ parts:
|
||||
build-packages:
|
||||
- libssl-dev
|
||||
- pkg-config
|
||||
override-pull: |
|
||||
snapcraftctl pull
|
||||
# Determine the current tag
|
||||
last_committed_tag="$(git describe --tags --abbrev=0)"
|
||||
last_committed_tag_ver="$(echo ${last_committed_tag} | sed 's/v//')"
|
||||
# Determine the most recent version in the beta channel in the Snap Store
|
||||
last_released_tag="$(snap info $SNAPCRAFT_PROJECT_NAME | awk '$1 == "beta:" { print $2 }')"
|
||||
# If the latest tag from the upstream project has not been released to
|
||||
# beta, build that tag instead of master.
|
||||
if [ "${last_committed_tag_ver}" != "${last_released_tag}" ]; then
|
||||
git fetch
|
||||
git checkout "${last_committed_tag}"
|
||||
fi
|
||||
# set version number of the snap based on what we did above
|
||||
snapcraftctl set-version $(git describe --tags --abbrev=0)
|
||||
|
||||
apps:
|
||||
monolith:
|
||||
|
||||
55
src/args.rs
55
src/args.rs
@@ -2,60 +2,77 @@ use clap::{App, Arg};
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct AppArgs {
|
||||
pub url_target: String,
|
||||
pub target: String,
|
||||
pub no_css: bool,
|
||||
pub no_fonts: bool,
|
||||
pub no_frames: bool,
|
||||
pub no_images: bool,
|
||||
pub no_js: bool,
|
||||
pub insecure: bool,
|
||||
pub isolate: bool,
|
||||
pub output: String,
|
||||
pub silent: bool,
|
||||
pub timeout: u64,
|
||||
pub user_agent: String,
|
||||
pub no_metadata: bool,
|
||||
}
|
||||
|
||||
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
|
||||
const DEFAULT_USER_AGENT: &str =
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0";
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
|
||||
|
||||
impl AppArgs {
|
||||
pub fn get() -> AppArgs {
|
||||
let app = App::new("monolith")
|
||||
let app = App::new(env!("CARGO_PKG_NAME"))
|
||||
.version(crate_version!())
|
||||
.author(crate_authors!("\n"))
|
||||
.about(crate_description!())
|
||||
.arg(
|
||||
Arg::with_name("url")
|
||||
Arg::with_name("target")
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
.index(1)
|
||||
.help("URL to download"),
|
||||
.help("URL or file path"),
|
||||
)
|
||||
// .args_from_usage("-a, --include-audio 'Embed audio sources'")
|
||||
.args_from_usage("-c, --no-css 'Ignore styles'")
|
||||
.args_from_usage("-f, --no-frames 'Exclude iframes'")
|
||||
.args_from_usage("-i, --no-images 'Remove images'")
|
||||
.args_from_usage("-I, --isolate 'Cut off from the Internet'")
|
||||
.args_from_usage("-j, --no-js 'Exclude JavaScript'")
|
||||
.args_from_usage("-k, --insecure 'Accept invalid X.509 (TLS) certificates'")
|
||||
.args_from_usage("-s, --silent 'Suppress verbosity'")
|
||||
.args_from_usage("-u, --user-agent=[Iceweasel] 'Custom User-Agent string'")
|
||||
// .args_from_usage("-v, --include-video 'Embed video sources'")
|
||||
// .args_from_usage("-a, --include-audio 'Removes audio sources'")
|
||||
.args_from_usage("-c, --no-css 'Removes CSS'")
|
||||
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
|
||||
.args_from_usage("-F, --no-fonts 'Removes fonts'")
|
||||
.args_from_usage("-i, --no-images 'Removes images'")
|
||||
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
|
||||
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
||||
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
|
||||
.args_from_usage("-M, --no-metadata 'Excludes metadata information from the document'")
|
||||
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
|
||||
.args_from_usage("-s, --silent 'Suppresses verbosity'")
|
||||
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
|
||||
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
|
||||
// .args_from_usage("-v, --include-video 'Removes video sources'")
|
||||
.get_matches();
|
||||
let mut app_args = AppArgs::default();
|
||||
// Process the command
|
||||
app_args.url_target = app
|
||||
.value_of("url")
|
||||
.expect("please set target url")
|
||||
app_args.target = app
|
||||
.value_of("target")
|
||||
.expect("please set target")
|
||||
.to_string();
|
||||
app_args.no_css = app.is_present("no-css");
|
||||
app_args.no_fonts = app.is_present("no-fonts");
|
||||
app_args.no_frames = app.is_present("no-frames");
|
||||
app_args.no_images = app.is_present("no-images");
|
||||
app_args.no_js = app.is_present("no-js");
|
||||
app_args.insecure = app.is_present("insecure");
|
||||
app_args.no_metadata = app.is_present("no-metadata");
|
||||
app_args.isolate = app.is_present("isolate");
|
||||
app_args.silent = app.is_present("silent");
|
||||
app_args.timeout = app
|
||||
.value_of("timeout")
|
||||
.unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string())
|
||||
.parse::<u64>()
|
||||
.unwrap();
|
||||
app_args.output = app.value_of("output").unwrap_or("").to_string();
|
||||
app_args.user_agent = app
|
||||
.value_of("user-agent")
|
||||
.unwrap_or_else(|| DEFAULT_USER_AGENT)
|
||||
.unwrap_or(DEFAULT_USER_AGENT)
|
||||
.to_string();
|
||||
app_args
|
||||
}
|
||||
|
||||
415
src/css.rs
Normal file
415
src/css.rs
Normal file
@@ -0,0 +1,415 @@
|
||||
use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::utils::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset};
|
||||
|
||||
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
|
||||
// Universal
|
||||
"background",
|
||||
"background-image",
|
||||
"border-image",
|
||||
"border-image-source",
|
||||
"content",
|
||||
"cursor",
|
||||
"list-style",
|
||||
"list-style-image",
|
||||
"mask",
|
||||
"mask-image",
|
||||
// Specific to @counter-style
|
||||
"additive-symbols",
|
||||
"negative",
|
||||
"pad",
|
||||
"prefix",
|
||||
"suffix",
|
||||
"symbols",
|
||||
];
|
||||
const CSS_SPECIAL_CHARS: &str = "~!@$%^&*()+=,./'\";:?><[]{}|`#";
|
||||
|
||||
pub fn is_image_url_prop(prop_name: &str) -> bool {
|
||||
CSS_PROPS_WITH_IMAGE_URLS
|
||||
.iter()
|
||||
.find(|p| prop_name.eq_ignore_ascii_case(p))
|
||||
.is_some()
|
||||
}
|
||||
|
||||
pub fn enquote(input: String, double: bool) -> String {
|
||||
if double {
|
||||
format!("\"{}\"", input.replace("\"", "\\\""))
|
||||
} else {
|
||||
format!("'{}'", input.replace("'", "\\'"))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn escape(value: &str) -> String {
|
||||
let mut res = str!(&value);
|
||||
|
||||
res = res.replace("\\", "\\\\");
|
||||
|
||||
for c in CSS_SPECIAL_CHARS.chars() {
|
||||
res = res.replace(c, format!("\\{}", c).as_str());
|
||||
}
|
||||
|
||||
res
|
||||
}
|
||||
|
||||
pub fn process_css<'a>(
|
||||
cache: &mut HashMap<String, Vec<u8>>,
|
||||
client: &Client,
|
||||
parent_url: &str,
|
||||
parser: &mut Parser,
|
||||
rule_name: &str,
|
||||
prop_name: &str,
|
||||
func_name: &str,
|
||||
opt_no_fonts: bool,
|
||||
opt_no_images: bool,
|
||||
opt_silent: bool,
|
||||
) -> Result<String, ParseError<'a, String>> {
|
||||
let mut result: String = str!();
|
||||
|
||||
let mut curr_rule: String = str!(rule_name.clone());
|
||||
let mut curr_prop: String = str!(prop_name.clone());
|
||||
let mut token: &Token;
|
||||
let mut token_offset: SourcePosition;
|
||||
|
||||
loop {
|
||||
token_offset = parser.position();
|
||||
token = match parser.next_including_whitespace_and_comments() {
|
||||
Ok(token) => token,
|
||||
Err(_) => {
|
||||
break;
|
||||
}
|
||||
};
|
||||
|
||||
match *token {
|
||||
Token::Comment(_) => {
|
||||
let token_slice = parser.slice_from(token_offset);
|
||||
result.push_str(str!(token_slice).as_str());
|
||||
}
|
||||
Token::Semicolon => result.push_str(";"),
|
||||
Token::Colon => result.push_str(":"),
|
||||
Token::Comma => result.push_str(","),
|
||||
Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => {
|
||||
if opt_no_fonts && curr_rule == "font-face" {
|
||||
continue;
|
||||
}
|
||||
|
||||
let closure: &str;
|
||||
if token == &Token::ParenthesisBlock {
|
||||
result.push_str("(");
|
||||
closure = ")";
|
||||
} else if token == &Token::SquareBracketBlock {
|
||||
result.push_str("[");
|
||||
closure = "]";
|
||||
} else {
|
||||
result.push_str("{");
|
||||
closure = "}";
|
||||
}
|
||||
|
||||
let block_css: String = parser
|
||||
.parse_nested_block(|parser| {
|
||||
process_css(
|
||||
cache,
|
||||
client,
|
||||
parent_url,
|
||||
parser,
|
||||
rule_name,
|
||||
curr_prop.as_str(),
|
||||
func_name,
|
||||
opt_no_fonts,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
)
|
||||
})
|
||||
.unwrap();
|
||||
result.push_str(block_css.as_str());
|
||||
|
||||
result.push_str(closure);
|
||||
}
|
||||
Token::CloseParenthesis => result.push_str(")"),
|
||||
Token::CloseSquareBracket => result.push_str("]"),
|
||||
Token::CloseCurlyBracket => result.push_str("}"),
|
||||
Token::IncludeMatch => result.push_str("~="),
|
||||
Token::DashMatch => result.push_str("|="),
|
||||
Token::PrefixMatch => result.push_str("^="),
|
||||
Token::SuffixMatch => result.push_str("$="),
|
||||
Token::SubstringMatch => result.push_str("*="),
|
||||
Token::CDO => result.push_str("<!--"),
|
||||
Token::CDC => result.push_str("-->"),
|
||||
Token::WhiteSpace(ref value) => {
|
||||
result.push_str(value);
|
||||
}
|
||||
// div...
|
||||
Token::Ident(ref value) => {
|
||||
curr_rule = str!();
|
||||
curr_prop = str!(value);
|
||||
result.push_str(&escape(value));
|
||||
}
|
||||
// @import, @font-face, @charset, @media...
|
||||
Token::AtKeyword(ref value) => {
|
||||
curr_rule = str!(value);
|
||||
if opt_no_fonts && curr_rule == "font-face" {
|
||||
continue;
|
||||
}
|
||||
result.push_str("@");
|
||||
result.push_str(value);
|
||||
}
|
||||
Token::Hash(ref value) => {
|
||||
result.push_str("#");
|
||||
result.push_str(value);
|
||||
}
|
||||
Token::QuotedString(ref value) => {
|
||||
if curr_rule == "import" {
|
||||
// Reset current at-rule value
|
||||
curr_rule = str!();
|
||||
|
||||
// Skip empty import values
|
||||
if value.len() < 1 {
|
||||
result.push_str("''");
|
||||
continue;
|
||||
}
|
||||
|
||||
let import_full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||
let import_url_fragment = get_url_fragment(import_full_url.clone());
|
||||
match retrieve_asset(cache, client, &parent_url, &import_full_url, opt_silent) {
|
||||
Ok((import_contents, import_final_url, _import_media_type)) => {
|
||||
result.push_str(
|
||||
enquote(
|
||||
data_to_data_url(
|
||||
"text/css",
|
||||
embed_css(
|
||||
cache,
|
||||
client,
|
||||
&import_final_url,
|
||||
&String::from_utf8_lossy(&import_contents),
|
||||
opt_no_fonts,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
)
|
||||
.as_bytes(),
|
||||
&import_final_url,
|
||||
&import_url_fragment,
|
||||
),
|
||||
false,
|
||||
)
|
||||
.as_str(),
|
||||
);
|
||||
}
|
||||
Err(_) => {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
if is_http_url(import_full_url.clone()) {
|
||||
result.push_str(enquote(import_full_url, false).as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if func_name == "url" {
|
||||
// Skip empty url()'s
|
||||
if value.len() < 1 {
|
||||
continue;
|
||||
}
|
||||
|
||||
if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
|
||||
result.push_str(enquote(str!(empty_image!()), false).as_str());
|
||||
} else {
|
||||
let resolved_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||
let url_fragment = get_url_fragment(resolved_url.clone());
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&parent_url,
|
||||
&resolved_url,
|
||||
opt_silent,
|
||||
) {
|
||||
Ok((data, final_url, media_type)) => {
|
||||
let data_url = data_to_data_url(
|
||||
&media_type,
|
||||
&data,
|
||||
&final_url,
|
||||
&url_fragment,
|
||||
);
|
||||
result.push_str(enquote(data_url, false).as_str());
|
||||
}
|
||||
Err(_) => {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
if is_http_url(resolved_url.clone()) {
|
||||
result.push_str(enquote(resolved_url, false).as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
result.push_str(enquote(str!(value), false).as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
Token::Number {
|
||||
ref has_sign,
|
||||
ref value,
|
||||
..
|
||||
} => {
|
||||
if *has_sign && *value >= 0. {
|
||||
result.push_str("+");
|
||||
}
|
||||
result.push_str(&value.to_string())
|
||||
}
|
||||
Token::Percentage {
|
||||
ref has_sign,
|
||||
ref unit_value,
|
||||
..
|
||||
} => {
|
||||
if *has_sign && *unit_value >= 0. {
|
||||
result.push_str("+");
|
||||
}
|
||||
result.push_str(str!(unit_value * 100.).as_str());
|
||||
result.push_str("%");
|
||||
}
|
||||
Token::Dimension {
|
||||
ref has_sign,
|
||||
ref value,
|
||||
ref unit,
|
||||
..
|
||||
} => {
|
||||
if *has_sign && *value >= 0. {
|
||||
result.push_str("+");
|
||||
}
|
||||
result.push_str(str!(value).as_str());
|
||||
result.push_str(str!(unit).as_str());
|
||||
}
|
||||
// #selector, #id...
|
||||
Token::IDHash(ref value) => {
|
||||
curr_rule = str!();
|
||||
result.push_str("#");
|
||||
result.push_str(&escape(value));
|
||||
}
|
||||
Token::UnquotedUrl(ref value) => {
|
||||
let is_import: bool = curr_rule == "import";
|
||||
|
||||
if is_import {
|
||||
// Reset current at-rule value
|
||||
curr_rule = str!();
|
||||
}
|
||||
|
||||
// Skip empty url()'s
|
||||
if value.len() < 1 {
|
||||
result.push_str("url()");
|
||||
continue;
|
||||
} else if value.starts_with("#") {
|
||||
result.push_str("url(");
|
||||
result.push_str(value);
|
||||
result.push_str(")");
|
||||
continue;
|
||||
}
|
||||
|
||||
result.push_str("url(");
|
||||
if is_import {
|
||||
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||
let url_fragment = get_url_fragment(full_url.clone());
|
||||
match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) {
|
||||
Ok((css, final_url, _media_type)) => {
|
||||
let data_url = data_to_data_url(
|
||||
"text/css",
|
||||
embed_css(
|
||||
cache,
|
||||
client,
|
||||
&final_url,
|
||||
&String::from_utf8_lossy(&css),
|
||||
opt_no_fonts,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
)
|
||||
.as_bytes(),
|
||||
&final_url,
|
||||
&url_fragment,
|
||||
);
|
||||
result.push_str(enquote(data_url, false).as_str());
|
||||
}
|
||||
Err(_) => {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
if is_http_url(full_url.clone()) {
|
||||
result.push_str(enquote(full_url, false).as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
|
||||
result.push_str(enquote(str!(empty_image!()), false).as_str());
|
||||
} else {
|
||||
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
|
||||
let url_fragment = get_url_fragment(full_url.clone());
|
||||
match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) {
|
||||
Ok((data, final_url, media_type)) => {
|
||||
let data_url =
|
||||
data_to_data_url(&media_type, &data, &final_url, &url_fragment);
|
||||
result.push_str(enquote(data_url, false).as_str());
|
||||
}
|
||||
Err(_) => {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
if is_http_url(full_url.clone()) {
|
||||
result.push_str(enquote(full_url, false).as_str());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
result.push_str(")");
|
||||
}
|
||||
Token::Delim(ref value) => result.push_str(&value.to_string()),
|
||||
Token::Function(ref name) => {
|
||||
let function_name: &str = &name.clone();
|
||||
result.push_str(function_name);
|
||||
result.push_str("(");
|
||||
|
||||
let block_css: String = parser
|
||||
.parse_nested_block(|parser| {
|
||||
process_css(
|
||||
cache,
|
||||
client,
|
||||
parent_url,
|
||||
parser,
|
||||
curr_rule.as_str(),
|
||||
curr_prop.as_str(),
|
||||
function_name,
|
||||
opt_no_fonts,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
)
|
||||
})
|
||||
.unwrap();
|
||||
result.push_str(block_css.as_str());
|
||||
|
||||
result.push_str(")");
|
||||
}
|
||||
Token::BadUrl(_) | Token::BadString(_) => {}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn embed_css(
|
||||
cache: &mut HashMap<String, Vec<u8>>,
|
||||
client: &Client,
|
||||
parent_url: &str,
|
||||
css: &str,
|
||||
opt_no_fonts: bool,
|
||||
opt_no_images: bool,
|
||||
opt_silent: bool,
|
||||
) -> String {
|
||||
let mut input = ParserInput::new(&css);
|
||||
let mut parser = Parser::new(&mut input);
|
||||
|
||||
process_css(
|
||||
cache,
|
||||
client,
|
||||
parent_url,
|
||||
&mut parser,
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
opt_no_fonts,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
)
|
||||
.unwrap()
|
||||
}
|
||||
975
src/html.rs
975
src/html.rs
File diff suppressed because it is too large
Load Diff
63
src/http.rs
63
src/http.rs
@@ -1,63 +0,0 @@
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use reqwest::Client;
|
||||
use std::collections::HashMap;
|
||||
use utils::{data_to_dataurl, is_data_url};
|
||||
|
||||
pub fn retrieve_asset(
|
||||
cache: &mut HashMap<String, String>,
|
||||
client: &Client,
|
||||
url: &str,
|
||||
as_dataurl: bool,
|
||||
mime: &str,
|
||||
opt_silent: bool,
|
||||
) -> Result<(String, String), reqwest::Error> {
|
||||
if is_data_url(&url).unwrap() {
|
||||
Ok((url.to_string(), url.to_string()))
|
||||
} else {
|
||||
if cache.contains_key(&url.to_string()) {
|
||||
// url is in cache
|
||||
if !opt_silent {
|
||||
eprintln!("{} (from cache)", &url);
|
||||
}
|
||||
let data = cache.get(&url.to_string()).unwrap();
|
||||
Ok((data.to_string(), url.to_string()))
|
||||
} else {
|
||||
// url not in cache, we request it
|
||||
let mut response = client.get(url).send()?;
|
||||
|
||||
if !opt_silent {
|
||||
if url == response.url().as_str() {
|
||||
eprintln!("{}", &url);
|
||||
} else {
|
||||
eprintln!("{} -> {}", &url, &response.url().as_str());
|
||||
}
|
||||
}
|
||||
|
||||
if as_dataurl {
|
||||
// Convert response into a byte array
|
||||
let mut data: Vec<u8> = vec![];
|
||||
response.copy_to(&mut data)?;
|
||||
|
||||
// Attempt to obtain MIME type by reading the Content-Type header
|
||||
let mimetype = if mime == "" {
|
||||
response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|header| header.to_str().ok())
|
||||
.unwrap_or(&mime)
|
||||
} else {
|
||||
mime
|
||||
};
|
||||
let dataurl = data_to_dataurl(&mimetype, &data);
|
||||
// insert in cache
|
||||
cache.insert(response.url().to_string(), dataurl.to_string());
|
||||
Ok((dataurl, response.url().to_string()))
|
||||
} else {
|
||||
let content = response.text().unwrap();
|
||||
// insert in cache
|
||||
cache.insert(response.url().to_string(), content.clone());
|
||||
Ok((content, response.url().to_string()))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
111
src/js.rs
111
src/js.rs
@@ -1,32 +1,103 @@
|
||||
const JS_DOM_EVENT_ATTRS: [&str; 21] = [
|
||||
// Input
|
||||
"onfocus",
|
||||
const JS_DOM_EVENT_ATTRS: &[&str] = &[
|
||||
// From WHATWG HTML spec 8.1.5.2 "Event handlers on elements, Document objects, and Window objects":
|
||||
// https://html.spec.whatwg.org/#event-handlers-on-elements,-document-objects,-and-window-objects
|
||||
// https://html.spec.whatwg.org/#attributes-3 (table "List of event handler content attributes")
|
||||
|
||||
// Global event handlers
|
||||
"onabort",
|
||||
"onauxclick",
|
||||
"onblur",
|
||||
"onselect",
|
||||
"oncancel",
|
||||
"oncanplay",
|
||||
"oncanplaythrough",
|
||||
"onchange",
|
||||
"onsubmit",
|
||||
"onreset",
|
||||
"onclick",
|
||||
"onclose",
|
||||
"oncontextmenu",
|
||||
"oncuechange",
|
||||
"ondblclick",
|
||||
"ondrag",
|
||||
"ondragend",
|
||||
"ondragenter",
|
||||
"ondragexit",
|
||||
"ondragleave",
|
||||
"ondragover",
|
||||
"ondragstart",
|
||||
"ondrop",
|
||||
"ondurationchange",
|
||||
"onemptied",
|
||||
"onended",
|
||||
"onerror",
|
||||
"onfocus",
|
||||
"onformdata",
|
||||
"oninput",
|
||||
"oninvalid",
|
||||
"onkeydown",
|
||||
"onkeypress",
|
||||
"onkeyup",
|
||||
// Mouse
|
||||
"onmouseover",
|
||||
"onmouseout",
|
||||
"onmousedown",
|
||||
"onmouseup",
|
||||
"onmousemove",
|
||||
// Click
|
||||
"onclick",
|
||||
"ondblclick",
|
||||
// Load
|
||||
"onload",
|
||||
"onunload",
|
||||
"onabort",
|
||||
"onerror",
|
||||
"onloadeddata",
|
||||
"onloadedmetadata",
|
||||
"onloadstart",
|
||||
"onmousedown",
|
||||
"onmouseenter",
|
||||
"onmouseleave",
|
||||
"onmousemove",
|
||||
"onmouseout",
|
||||
"onmouseover",
|
||||
"onmouseup",
|
||||
"onwheel",
|
||||
"onpause",
|
||||
"onplay",
|
||||
"onplaying",
|
||||
"onprogress",
|
||||
"onratechange",
|
||||
"onreset",
|
||||
"onresize",
|
||||
"onscroll",
|
||||
"onsecuritypolicyviolation",
|
||||
"onseeked",
|
||||
"onseeking",
|
||||
"onselect",
|
||||
"onslotchange",
|
||||
"onstalled",
|
||||
"onsubmit",
|
||||
"onsuspend",
|
||||
"ontimeupdate",
|
||||
"ontoggle",
|
||||
"onvolumechange",
|
||||
"onwaiting",
|
||||
"onwebkitanimationend",
|
||||
"onwebkitanimationiteration",
|
||||
"onwebkitanimationstart",
|
||||
"onwebkittransitionend",
|
||||
// Event handlers for <body/> and <frameset/> elements
|
||||
"onafterprint",
|
||||
"onbeforeprint",
|
||||
"onbeforeunload",
|
||||
"onhashchange",
|
||||
"onlanguagechange",
|
||||
"onmessage",
|
||||
"onmessageerror",
|
||||
"onoffline",
|
||||
"ononline",
|
||||
"onpagehide",
|
||||
"onpageshow",
|
||||
"onpopstate",
|
||||
"onrejectionhandled",
|
||||
"onstorage",
|
||||
"onunhandledrejection",
|
||||
"onunload",
|
||||
// Event handlers for <html/> element
|
||||
"oncut",
|
||||
"oncopy",
|
||||
"onpaste",
|
||||
];
|
||||
|
||||
// Returns true if DOM attribute name matches a native JavaScript event handler
|
||||
pub fn attr_is_event_handler(attr_name: &str) -> bool {
|
||||
JS_DOM_EVENT_ATTRS.contains(&attr_name.to_lowercase().as_str())
|
||||
JS_DOM_EVENT_ATTRS
|
||||
.iter()
|
||||
.find(|a| attr_name.eq_ignore_ascii_case(a))
|
||||
.is_some()
|
||||
}
|
||||
|
||||
@@ -1,15 +1,8 @@
|
||||
extern crate html5ever;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
extern crate regex;
|
||||
extern crate reqwest;
|
||||
extern crate url;
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
|
||||
pub mod css;
|
||||
pub mod html;
|
||||
pub mod http;
|
||||
pub mod js;
|
||||
pub mod utils;
|
||||
|
||||
|
||||
@@ -7,3 +7,11 @@ macro_rules! str {
|
||||
ToString::to_string(&$val)
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! empty_image {
|
||||
() => {
|
||||
"data:image/png;base64,\
|
||||
iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII="
|
||||
};
|
||||
}
|
||||
|
||||
238
src/main.rs
238
src/main.rs
@@ -1,70 +1,188 @@
|
||||
#[macro_use]
|
||||
extern crate clap;
|
||||
extern crate monolith;
|
||||
extern crate reqwest;
|
||||
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
|
||||
use monolith::utils::{data_url_to_data, is_data_url, is_file_url, is_http_url, retrieve_asset};
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||
use reqwest::Url;
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::io::{self, Error, Write};
|
||||
use std::path::Path;
|
||||
use std::process;
|
||||
use std::time::Duration;
|
||||
|
||||
mod args;
|
||||
mod macros;
|
||||
|
||||
use args::AppArgs;
|
||||
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
|
||||
use monolith::http::retrieve_asset;
|
||||
use monolith::utils::is_valid_url;
|
||||
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||
use std::collections::HashMap;
|
||||
use std::time::Duration;
|
||||
#[macro_use]
|
||||
extern crate clap;
|
||||
use crate::args::AppArgs;
|
||||
|
||||
enum Output {
|
||||
Stdout(io::Stdout),
|
||||
File(fs::File),
|
||||
}
|
||||
|
||||
impl Output {
|
||||
fn new(file_path: &str) -> Result<Output, Error> {
|
||||
if file_path.is_empty() {
|
||||
Ok(Output::Stdout(io::stdout()))
|
||||
} else {
|
||||
Ok(Output::File(fs::File::create(file_path)?))
|
||||
}
|
||||
}
|
||||
|
||||
fn writeln_str(&mut self, s: &str) -> Result<(), Error> {
|
||||
match self {
|
||||
Output::Stdout(stdout) => {
|
||||
writeln!(stdout, "{}", s)?;
|
||||
stdout.flush()
|
||||
}
|
||||
Output::File(f) => {
|
||||
writeln!(f, "{}", s)?;
|
||||
f.flush()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let app_args = AppArgs::get();
|
||||
let cache = &mut HashMap::new();
|
||||
if is_valid_url(app_args.url_target.as_str()) {
|
||||
// Initialize client
|
||||
let mut header_map = HeaderMap::new();
|
||||
match HeaderValue::from_str(&app_args.user_agent) {
|
||||
Ok(header) => header_map.insert(USER_AGENT, header),
|
||||
Err(err) => {
|
||||
eprintln!("Invalid user agent! {}", err);
|
||||
return;
|
||||
}
|
||||
};
|
||||
let client = reqwest::Client::builder()
|
||||
.timeout(Duration::from_secs(10))
|
||||
.danger_accept_invalid_certs(app_args.insecure)
|
||||
.default_headers(header_map)
|
||||
.build()
|
||||
.expect("Failed to initialize HTTP client");
|
||||
let original_target: &str = &app_args.target;
|
||||
let target_url: &str;
|
||||
let base_url;
|
||||
let dom;
|
||||
|
||||
let (data, final_url) = retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
app_args.url_target.as_str(),
|
||||
false,
|
||||
"",
|
||||
app_args.silent,
|
||||
)
|
||||
.unwrap();
|
||||
let dom = html_to_dom(&data);
|
||||
// Pre-process the input
|
||||
let cwd_normalized: String =
|
||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||
let path = Path::new(original_target);
|
||||
let mut target: String = str!(original_target.clone()).replace("\\", "/");
|
||||
let path_is_relative: bool = path.is_relative();
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&final_url,
|
||||
&dom.document,
|
||||
app_args.no_css,
|
||||
app_args.no_js,
|
||||
app_args.no_images,
|
||||
app_args.silent,
|
||||
app_args.no_frames,
|
||||
);
|
||||
|
||||
let html: String = stringify_document(
|
||||
&dom.document,
|
||||
app_args.no_css,
|
||||
app_args.no_frames,
|
||||
app_args.no_js,
|
||||
app_args.no_images,
|
||||
app_args.isolate,
|
||||
);
|
||||
|
||||
println!("{}", html);
|
||||
if target.clone().len() == 0 {
|
||||
eprintln!("No target specified");
|
||||
process::exit(1);
|
||||
} else if is_http_url(target.clone()) || is_data_url(target.clone()) {
|
||||
target_url = target.as_str();
|
||||
} else if is_file_url(target.clone()) {
|
||||
target_url = target.as_str();
|
||||
} else if path.exists() {
|
||||
if !path.is_file() {
|
||||
eprintln!("Local target is not a file: {}", original_target);
|
||||
process::exit(1);
|
||||
}
|
||||
target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" });
|
||||
if path_is_relative {
|
||||
target.insert_str(if cfg!(windows) { 8 } else { 7 }, &cwd_normalized);
|
||||
target.insert_str(
|
||||
if cfg!(windows) { 8 } else { 7 } + &cwd_normalized.len(),
|
||||
"/",
|
||||
);
|
||||
}
|
||||
target_url = target.as_str();
|
||||
} else {
|
||||
target.insert_str(0, "http://");
|
||||
target_url = target.as_str();
|
||||
}
|
||||
|
||||
let mut output = Output::new(&app_args.output).expect("Could not prepare output");
|
||||
|
||||
// Initialize client
|
||||
let mut cache = HashMap::new();
|
||||
let mut header_map = HeaderMap::new();
|
||||
header_map.insert(
|
||||
USER_AGENT,
|
||||
HeaderValue::from_str(&app_args.user_agent).expect("Invalid User-Agent header specified"),
|
||||
);
|
||||
|
||||
let timeout: u64 = if app_args.timeout > 0 {
|
||||
app_args.timeout
|
||||
} else {
|
||||
std::u64::MAX / 4
|
||||
};
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(timeout))
|
||||
.danger_accept_invalid_certs(app_args.insecure)
|
||||
.default_headers(header_map)
|
||||
.build()
|
||||
.expect("Failed to initialize HTTP client");
|
||||
|
||||
// Retrieve root document
|
||||
if is_file_url(target_url) || is_http_url(target_url) {
|
||||
match retrieve_asset(&mut cache, &client, target_url, target_url, app_args.silent) {
|
||||
Ok((data, final_url, _media_type)) => {
|
||||
base_url = final_url;
|
||||
dom = html_to_dom(&String::from_utf8_lossy(&data));
|
||||
}
|
||||
Err(_) => {
|
||||
eprintln!("Could not retrieve target document");
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
} else if is_data_url(target_url) {
|
||||
let (media_type, data): (String, Vec<u8>) = data_url_to_data(target_url);
|
||||
if !media_type.eq_ignore_ascii_case("text/html") {
|
||||
eprintln!("Unsupported data URL media type");
|
||||
process::exit(1);
|
||||
}
|
||||
base_url = str!(target_url);
|
||||
dom = html_to_dom(&String::from_utf8_lossy(&data));
|
||||
} else {
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
let time_saved = time::now_utc();
|
||||
|
||||
walk_and_embed_assets(
|
||||
&mut cache,
|
||||
&client,
|
||||
&base_url,
|
||||
&dom.document,
|
||||
app_args.no_css,
|
||||
app_args.no_fonts,
|
||||
app_args.no_frames,
|
||||
app_args.no_js,
|
||||
app_args.no_images,
|
||||
app_args.silent,
|
||||
);
|
||||
|
||||
let mut html: String = stringify_document(
|
||||
&dom.document,
|
||||
app_args.no_css,
|
||||
app_args.no_frames,
|
||||
app_args.no_js,
|
||||
app_args.no_images,
|
||||
app_args.isolate,
|
||||
);
|
||||
|
||||
if !app_args.no_metadata {
|
||||
// Safe to unwrap (we just put this through an HTTP request)
|
||||
let mut clean_url = Url::parse(&base_url).unwrap();
|
||||
clean_url.set_fragment(None);
|
||||
// Don't include credentials
|
||||
clean_url.set_username("").unwrap();
|
||||
clean_url.set_password(None).unwrap();
|
||||
let metadata_comment = if is_http_url(&base_url) {
|
||||
format!(
|
||||
"<!-- Saved from {} at {} using {} v{} -->\n",
|
||||
&clean_url,
|
||||
time_saved.rfc3339(),
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"<!-- Saved from local source at {} using {} v{} -->\n",
|
||||
time_saved.rfc3339(),
|
||||
env!("CARGO_PKG_NAME"),
|
||||
env!("CARGO_PKG_VERSION"),
|
||||
)
|
||||
};
|
||||
html.insert_str(0, &metadata_comment);
|
||||
}
|
||||
|
||||
output
|
||||
.writeln_str(&html)
|
||||
.expect("Could not write HTML output");
|
||||
}
|
||||
|
||||
529
src/tests/cli.rs
Normal file
529
src/tests/cli.rs
Normal file
@@ -0,0 +1,529 @@
|
||||
use assert_cmd::prelude::*;
|
||||
use std::env;
|
||||
use std::io::Write;
|
||||
use std::process::Command;
|
||||
use tempfile::NamedTempFile;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_print_version() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd.arg("-V").output().unwrap();
|
||||
|
||||
// STDOUT should contain program name and version
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_bad_input_empty_target() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd.arg("").output().unwrap();
|
||||
|
||||
// STDOUT should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
|
||||
|
||||
// STDERR should contain error description
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
"No target specified\n"
|
||||
);
|
||||
|
||||
// The exit code should be 1
|
||||
out.assert().code(1);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
|
||||
|
||||
// STDOUT should contain HTML
|
||||
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
|
||||
|
||||
// STDERR should contain error description
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
"Unsupported data URL media type\n"
|
||||
);
|
||||
|
||||
// The exit code should be 1
|
||||
out.assert().code(1);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-I")
|
||||
.arg("data:text/html,Hello%2C%20World!")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain isolated HTML
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
|
||||
</head><body>Hello, World!</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-c")
|
||||
.arg("data:text/html,<style>body{background-color:pink}</style>Hello")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no CSS
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
|
||||
<style></style>\
|
||||
</head><body>Hello</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-f")
|
||||
.arg("data:text/html,<iframe src=\"https://google.com\"></iframe>Hi")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no iframes
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
|
||||
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-i")
|
||||
.arg("data:text/html,<img src=\"https://google.com\"/>Hi")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no images
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
format!(
|
||||
"<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\
|
||||
</head>\
|
||||
<body>\
|
||||
<img src=\"{empty_image}\">\
|
||||
Hi\
|
||||
</body>\
|
||||
</html>\n",
|
||||
empty_image = empty_image!()
|
||||
)
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-j")
|
||||
.arg("data:text/html,<script>alert(2)</script>Hi")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no JS
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
|
||||
<script></script></head>\
|
||||
<body>Hi</body>\
|
||||
</html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let cwd_normalized: String =
|
||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg(if cfg!(windows) {
|
||||
"src\\tests\\data\\basic\\local-file.html"
|
||||
} else {
|
||||
"src/tests/data/basic/local-file.html"
|
||||
})
|
||||
.output()
|
||||
.unwrap();
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
|
||||
// STDOUT should contain HTML from the local file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"\
|
||||
<!DOCTYPE html><html lang=\"en\"><head>\n \
|
||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||
<title>Local HTML file</title>\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\" href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\">\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||
<img alt=\"\">\n \
|
||||
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
|
||||
</body></html>\n\
|
||||
"
|
||||
);
|
||||
|
||||
// STDERR should contain list of retrieved file URLs
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
{file}{cwd}/src/tests/data/basic/local-file.html\n\
|
||||
{file}{cwd}/src/tests/data/basic/local-style.css\n\
|
||||
{file}{cwd}/src/tests/data/basic/local-script.js\n\
|
||||
",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_local_file_target_input_absolute_target_path() -> Result<(), Box<dyn std::error::Error>>
|
||||
{
|
||||
let cwd = env::current_dir().unwrap();
|
||||
let cwd_normalized: String =
|
||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-jciI")
|
||||
.arg(if cfg!(windows) {
|
||||
format!(
|
||||
"{cwd}\\src\\tests\\data\\basic\\local-file.html",
|
||||
cwd = cwd.to_str().unwrap()
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{cwd}/src/tests/data/basic/local-file.html",
|
||||
cwd = cwd.to_str().unwrap()
|
||||
)
|
||||
})
|
||||
.output()
|
||||
.unwrap();
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
|
||||
// STDOUT should contain HTML from the local file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
<!DOCTYPE html><html lang=\"en\"><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
|
||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||
<title>Local HTML file</title>\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\">\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||
<img alt=\"\" src=\"{empty_image}\">\n \
|
||||
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||
<script></script>\n\n\n\n\
|
||||
</body></html>\n\
|
||||
",
|
||||
empty_image = empty_image!()
|
||||
)
|
||||
);
|
||||
|
||||
// STDERR should contain only the target file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_local_file_url_target_input() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let cwd_normalized: String =
|
||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-cji")
|
||||
.arg(if cfg!(windows) {
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-file.html",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-file.html",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
})
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML from the local file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
<!DOCTYPE html><html lang=\"en\"><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
|
||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||
<title>Local HTML file</title>\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\">\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
|
||||
<img alt=\"\" src=\"{empty_image}\">\n \
|
||||
<a href=\"file://local-file.html/\">Tricky href</a>\n \
|
||||
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
|
||||
<script></script>\n\n\n\n\
|
||||
</body></html>\n\
|
||||
",
|
||||
empty_image = empty_image!()
|
||||
)
|
||||
);
|
||||
|
||||
// STDERR should contain list of retrieved file URLs
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
if cfg!(windows) {
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
}
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_security_disallow_local_assets_within_data_url_targets(
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/basic/local-script.js\"%3E%3C/script%3E")
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no JS in it
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head><script></script></head><body></body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_embed_file_url_local_asset_within_style_attribute(
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let mut file_svg = NamedTempFile::new()?;
|
||||
writeln!(file_svg, "<svg version=\"1.1\" baseProfile=\"full\" width=\"300\" height=\"200\" xmlns=\"http://www.w3.org/2000/svg\">\
|
||||
<rect width=\"100%\" height=\"100%\" fill=\"red\" />\
|
||||
<circle cx=\"150\" cy=\"100\" r=\"80\" fill=\"green\" />\
|
||||
<text x=\"150\" y=\"125\" font-size=\"60\" text-anchor=\"middle\" fill=\"white\">SVG</text>\
|
||||
</svg>\n")?;
|
||||
let mut file_html = NamedTempFile::new()?;
|
||||
writeln!(
|
||||
file_html,
|
||||
"<div style='background-image: url(\"{file}{path}\")'></div>\n",
|
||||
file = file_url_prefix,
|
||||
path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
|
||||
)?;
|
||||
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
|
||||
|
||||
// STDOUT should contain HTML with date URL for background-image in it
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head></head><body><div style=\"background-image: url('data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iMTAwJSIgaGVpZ2h0PSIxMDAlIiBmaWxsPSJyZWQiIC8+PGNpcmNsZSBjeD0iMTUwIiBjeT0iMTAwIiByPSI4MCIgZmlsbD0iZ3JlZW4iIC8+PHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+PC9zdmc+Cgo=')\"></div>\n\n</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should list temporary files that got retrieved
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
{file}{html_path}\n\
|
||||
{file}{svg_path}\n\
|
||||
",
|
||||
file = file_url_prefix,
|
||||
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
|
||||
svg_path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_css_import_string() -> Result<(), Box<dyn std::error::Error>> {
|
||||
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
|
||||
let mut file_css = NamedTempFile::new()?;
|
||||
writeln!(file_css, "body{{background-color:#000;color:#fff}}")?;
|
||||
let mut file_html = NamedTempFile::new()?;
|
||||
writeln!(
|
||||
file_html,
|
||||
"\
|
||||
<style>\n\
|
||||
@charset 'UTF-8';\n\
|
||||
\n\
|
||||
@import '{file}{css_path}';\n\
|
||||
\n\
|
||||
@import url({file}{css_path});\n\
|
||||
\n\
|
||||
@import url('{file}{css_path}')\n\
|
||||
</style>\n\
|
||||
",
|
||||
file = file_url_prefix,
|
||||
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
|
||||
)?;
|
||||
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
|
||||
|
||||
// STDOUT should contain embedded CSS url()'s
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head><style>\n@charset 'UTF-8';\n\n@import 'data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K';\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K');\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K')\n</style>\n\n</head><body></body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should list temporary files that got retrieved
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
{file}{html_path}\n\
|
||||
{file}{css_path}\n\
|
||||
{file}{css_path}\n\
|
||||
{file}{css_path}\n\
|
||||
",
|
||||
file = file_url_prefix,
|
||||
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
|
||||
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
317
src/tests/css/embed_css.rs
Normal file
317
src/tests/css/embed_css.rs
Normal file
@@ -0,0 +1,317 @@
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::css;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_empty_input() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, "", "", false, false, false,),
|
||||
""
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_style_exclude_unquoted_images() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
const STYLE: &str = "/* border: none;*/\
|
||||
background-image: url(https://somewhere.com/bg.png); \
|
||||
list-style: url(/assets/images/bullet.svg);\
|
||||
width:99.998%; \
|
||||
margin-top: -20px; \
|
||||
line-height: -1; \
|
||||
height: calc(100vh - 10pt)";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(
|
||||
cache,
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&STYLE,
|
||||
false,
|
||||
true,
|
||||
true,
|
||||
),
|
||||
format!(
|
||||
"/* border: none;*/\
|
||||
background-image: url('{empty_image}'); \
|
||||
list-style: url('{empty_image}');\
|
||||
width:99.998%; \
|
||||
margin-top: -20px; \
|
||||
line-height: -1; \
|
||||
height: calc(100vh - 10pt)",
|
||||
empty_image = empty_image!()
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_style_exclude_single_quoted_images() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
const STYLE: &str = "/* border: none;*/\
|
||||
background-image: url('https://somewhere.com/bg.png'); \
|
||||
list-style: url('/assets/images/bullet.svg');\
|
||||
width:99.998%; \
|
||||
margin-top: -20px; \
|
||||
line-height: -1; \
|
||||
height: calc(100vh - 10pt)";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, "", &STYLE, false, true, true,),
|
||||
format!(
|
||||
"/* border: none;*/\
|
||||
background-image: url('{empty_image}'); \
|
||||
list-style: url('{empty_image}');\
|
||||
width:99.998%; \
|
||||
margin-top: -20px; \
|
||||
line-height: -1; \
|
||||
height: calc(100vh - 10pt)",
|
||||
empty_image = empty_image!()
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_style_block() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
const CSS: &str = "\
|
||||
#id.class-name:not(:nth-child(3n+0)) {\n \
|
||||
// border: none;\n \
|
||||
background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=');\n\
|
||||
}\n\
|
||||
\n\
|
||||
html > body {}";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, "file:///", &CSS, false, false, true,),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_attribute_selectors() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
const CSS: &str = "\
|
||||
[data-value] {
|
||||
/* Attribute exists */
|
||||
}
|
||||
|
||||
[data-value='foo'] {
|
||||
/* Attribute has this exact value */
|
||||
}
|
||||
|
||||
[data-value*='foo'] {
|
||||
/* Attribute value contains this value somewhere in it */
|
||||
}
|
||||
|
||||
[data-value~='foo'] {
|
||||
/* Attribute has this value in a space-separated list somewhere */
|
||||
}
|
||||
|
||||
[data-value^='foo'] {
|
||||
/* Attribute value starts with this */
|
||||
}
|
||||
|
||||
[data-value|='foo'] {
|
||||
/* Attribute value starts with this in a dash-separated list */
|
||||
}
|
||||
|
||||
[data-value$='foo'] {
|
||||
/* Attribute value ends with this */
|
||||
}
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, "", &CSS, false, false, false,),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_import_string() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
const CSS: &str = "\
|
||||
@charset 'UTF-8';\n\
|
||||
\n\
|
||||
@import 'data:text/css,html{background-color:%23000}';\n\
|
||||
\n\
|
||||
@import url('data:text/css,html{color:%23fff}')\n\
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(
|
||||
cache,
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&CSS,
|
||||
false,
|
||||
false,
|
||||
true,
|
||||
),
|
||||
"\
|
||||
@charset 'UTF-8';\n\
|
||||
\n\
|
||||
@import 'data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9';\n\
|
||||
\n\
|
||||
@import url('data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==')\n\
|
||||
"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_hash_urls() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
const CSS: &str = "\
|
||||
body {\n \
|
||||
behavior: url(#default#something);\n\
|
||||
}\n\
|
||||
\n\
|
||||
.scissorHalf {\n \
|
||||
offset-path: url(#somePath);\n\
|
||||
}\n\
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(
|
||||
cache,
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&CSS,
|
||||
false,
|
||||
false,
|
||||
true,
|
||||
),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_transform_percentages_and_degrees() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
const CSS: &str = "\
|
||||
div {\n \
|
||||
transform: translate(-50%, -50%) rotate(-45deg);\n\
|
||||
transform: translate(50%, 50%) rotate(45deg);\n\
|
||||
transform: translate(+50%, +50%) rotate(+45deg);\n\
|
||||
}\n\
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(
|
||||
cache,
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&CSS,
|
||||
false,
|
||||
false,
|
||||
true,
|
||||
),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_unusual_indents() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
const CSS: &str = "\
|
||||
.is\\:good:hover {\n \
|
||||
color: green\n\
|
||||
}\n\
|
||||
\n\
|
||||
#\\~\\!\\@\\$\\%\\^\\&\\*\\(\\)\\+\\=\\,\\.\\/\\\\\\'\\\"\\;\\:\\?\\>\\<\\[\\]\\{\\}\\|\\`\\# {\n \
|
||||
color: black\n\
|
||||
}\n\
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(
|
||||
cache,
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&CSS,
|
||||
false,
|
||||
false,
|
||||
true,
|
||||
),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_exclude_fonts() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
const CSS: &str = "\
|
||||
@font-face {\n \
|
||||
font-family: 'My Font';\n \
|
||||
src: url(my_font.woff);\n\
|
||||
}\n\
|
||||
\n\
|
||||
#identifier {\n \
|
||||
font-family: 'My Font' Arial\n\
|
||||
}\n\
|
||||
\n\
|
||||
@font-face {\n \
|
||||
font-family: 'My Font';\n \
|
||||
src: url(my_font.woff);\n\
|
||||
}\n\
|
||||
\n\
|
||||
div {\n \
|
||||
font-family: 'My Font' Verdana\n\
|
||||
}\n\
|
||||
";
|
||||
|
||||
const CSS_OUT: &str = " \
|
||||
\n\
|
||||
\n\
|
||||
#identifier {\n \
|
||||
font-family: 'My Font' Arial\n\
|
||||
}\n\
|
||||
\n \
|
||||
\n\
|
||||
\n\
|
||||
div {\n \
|
||||
font-family: 'My Font' Verdana\n\
|
||||
}\n\
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(
|
||||
cache,
|
||||
&client,
|
||||
"https://doesntmatter.local/",
|
||||
&CSS,
|
||||
true,
|
||||
false,
|
||||
true,
|
||||
),
|
||||
CSS_OUT
|
||||
);
|
||||
}
|
||||
50
src/tests/css/enquote.rs
Normal file
50
src/tests/css/enquote.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
use crate::css;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_empty_input_single_quotes() {
|
||||
assert_eq!(css::enquote(str!(""), false), "''");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_empty_input_double_quotes() {
|
||||
assert_eq!(css::enquote(str!(""), true), "\"\"");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_apostrophes_single_quotes() {
|
||||
assert_eq!(
|
||||
css::enquote(str!("It's a lovely day, don't you think?"), false),
|
||||
"'It\\'s a lovely day, don\\'t you think?'"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_apostrophes_double_quotes() {
|
||||
assert_eq!(
|
||||
css::enquote(str!("It's a lovely day, don't you think?"), true),
|
||||
"\"It's a lovely day, don't you think?\""
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_feet_and_inches_single_quotes() {
|
||||
assert_eq!(
|
||||
css::enquote(str!("5'2\", 6'5\""), false),
|
||||
"'5\\'2\", 6\\'5\"'"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_feet_and_inches_double_quotes() {
|
||||
assert_eq!(
|
||||
css::enquote(str!("5'2\", 6'5\""), true),
|
||||
"\"5'2\\\", 6'5\\\"\""
|
||||
);
|
||||
}
|
||||
88
src/tests/css/is_image_url_prop.rs
Normal file
88
src/tests/css/is_image_url_prop.rs
Normal file
@@ -0,0 +1,88 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::css;
|
||||
|
||||
#[test]
|
||||
fn backrgound() {
|
||||
assert!(css::is_image_url_prop("background"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backrgound_image() {
|
||||
assert!(css::is_image_url_prop("background-image"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backrgound_image_uppercase() {
|
||||
assert!(css::is_image_url_prop("BACKGROUND-IMAGE"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn border_image() {
|
||||
assert!(css::is_image_url_prop("border-image"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn content() {
|
||||
assert!(css::is_image_url_prop("content"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn cursor() {
|
||||
assert!(css::is_image_url_prop("cursor"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list_style() {
|
||||
assert!(css::is_image_url_prop("list-style"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list_style_image() {
|
||||
assert!(css::is_image_url_prop("list-style-image"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mask_image() {
|
||||
assert!(css::is_image_url_prop("mask-image"));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::css;
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
assert!(!css::is_image_url_prop(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn width() {
|
||||
assert!(!css::is_image_url_prop("width"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn color() {
|
||||
assert!(!css::is_image_url_prop("color"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn z_index() {
|
||||
assert!(!css::is_image_url_prop("z-index"));
|
||||
}
|
||||
}
|
||||
3
src/tests/css/mod.rs
Normal file
3
src/tests/css/mod.rs
Normal file
@@ -0,0 +1,3 @@
|
||||
mod embed_css;
|
||||
mod enquote;
|
||||
mod is_image_url_prop;
|
||||
19
src/tests/data/basic/local-file.html
Normal file
19
src/tests/data/basic/local-file.html
Normal file
@@ -0,0 +1,19 @@
|
||||
<!doctype html>
|
||||
|
||||
<html lang="en">
|
||||
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<title>Local HTML file</title>
|
||||
<link href="local-style.css" rel="stylesheet" type="text/css" />
|
||||
<link href="local-style-does-not-exist.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<img src="monolith.png" alt="" />
|
||||
<a href="//local-file.html">Tricky href</a>
|
||||
<a href="https://github.com/Y2Z/monolith">Remote URL</a>
|
||||
<script src="local-script.js"></script>
|
||||
</body>
|
||||
|
||||
</html>
|
||||
2
src/tests/data/basic/local-script.js
Normal file
2
src/tests/data/basic/local-script.js
Normal file
@@ -0,0 +1,2 @@
|
||||
document.body.style.backgroundColor = "green";
|
||||
document.body.style.color = "red";
|
||||
4
src/tests/data/basic/local-style.css
Normal file
4
src/tests/data/basic/local-style.css
Normal file
@@ -0,0 +1,4 @@
|
||||
body {
|
||||
background-color: #000;
|
||||
color: #fff;
|
||||
}
|
||||
@@ -1,479 +0,0 @@
|
||||
use crate::html::{
|
||||
get_node_name, get_parent_node, html_to_dom, is_icon, stringify_document, walk_and_embed_assets,
|
||||
};
|
||||
use html5ever::rcdom::{Handle, NodeData};
|
||||
use html5ever::serialize::{serialize, SerializeOpts};
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[test]
|
||||
fn test_is_icon() {
|
||||
assert_eq!(is_icon("icon"), true);
|
||||
assert_eq!(is_icon("Shortcut Icon"), true);
|
||||
assert_eq!(is_icon("ICON"), true);
|
||||
assert_eq!(is_icon("mask-icon"), true);
|
||||
assert_eq!(is_icon("fluid-icon"), true);
|
||||
assert_eq!(is_icon("stylesheet"), false);
|
||||
assert_eq!(is_icon(""), false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_parent_node_name() {
|
||||
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
|
||||
let dom = html_to_dom(&html);
|
||||
let mut count = 0;
|
||||
|
||||
fn test_walk(node: &Handle, i: &mut i8) {
|
||||
*i += 1;
|
||||
|
||||
match &node.data {
|
||||
NodeData::Document => {
|
||||
for child in node.children.borrow().iter() {
|
||||
test_walk(child, &mut *i);
|
||||
}
|
||||
}
|
||||
NodeData::Element { ref name, .. } => {
|
||||
let node_name = name.local.as_ref().to_string();
|
||||
let parent_node_name = get_node_name(&get_parent_node(node));
|
||||
if node_name == "head" || node_name == "body" {
|
||||
assert_eq!(parent_node_name, "html");
|
||||
} else if node_name == "div" {
|
||||
assert_eq!(parent_node_name, "body");
|
||||
} else if node_name == "p" {
|
||||
assert_eq!(parent_node_name, "div");
|
||||
}
|
||||
|
||||
println!("{}", node_name);
|
||||
|
||||
for child in node.children.borrow().iter() {
|
||||
test_walk(child, &mut *i);
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
}
|
||||
|
||||
test_walk(&dom.document, &mut count);
|
||||
|
||||
assert_eq!(count, 7);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_walk_and_embed_assets() {
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let html = "<div><P></P></div>";
|
||||
let dom = html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
opt_no_frames,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><body><div><p></p></div></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_walk_and_embed_assets_ensure_no_recursive_iframe() {
|
||||
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
|
||||
let dom = html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
opt_no_frames,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_walk_and_embed_assets_no_css() {
|
||||
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
|
||||
<style>html{background-color: #000;}</style>\
|
||||
<div style=\"display: none;\"></div>";
|
||||
let dom = html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = true;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
opt_no_frames,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html>\
|
||||
<head>\
|
||||
<link rel=\"stylesheet\" href=\"\">\
|
||||
<style></style>\
|
||||
</head>\
|
||||
<body>\
|
||||
<div></div>\
|
||||
</body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_walk_and_embed_assets_no_images() {
|
||||
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
|
||||
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
|
||||
let dom = html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = true;
|
||||
let opt_silent = true;
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
opt_no_frames,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html>\
|
||||
<head>\
|
||||
<link rel=\"icon\" href=\"\">\
|
||||
</head>\
|
||||
<body>\
|
||||
<div>\
|
||||
<img src=\"data:image/png;base64,\
|
||||
iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0\
|
||||
lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\">\
|
||||
</div>\
|
||||
</body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_walk_and_embed_assets_no_frames() {
|
||||
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
|
||||
let dom = html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = true;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
opt_no_frames,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_walk_and_embed_assets_no_js() {
|
||||
let html = "<div onClick=\"void(0)\">\
|
||||
<script src=\"http://localhost/assets/some.js\"></script>\
|
||||
<script>alert(1)</script>\
|
||||
</div>";
|
||||
let dom = html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = true;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
|
||||
let client = reqwest::Client::new();
|
||||
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
opt_no_frames,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><body><div><script src=\"\"></script>\
|
||||
<script></script></div></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stringify_document() {
|
||||
let html = "<div><script src=\"some.js\"></script></div>";
|
||||
let dom = html_to_dom(&html);
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_isolate: bool = false;
|
||||
|
||||
assert_eq!(
|
||||
stringify_document(
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_isolate,
|
||||
),
|
||||
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stringify_document_isolate() {
|
||||
let html = "<title>Isolated document</title>\
|
||||
<link rel=\"something\" href=\"some.css\" />\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
<div><script src=\"some.js\"></script></div>";
|
||||
let dom = html_to_dom(&html);
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_isolate: bool = true;
|
||||
|
||||
assert_eq!(
|
||||
stringify_document(
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_isolate,
|
||||
),
|
||||
"<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
|
||||
<title>Isolated document</title>\
|
||||
<link rel=\"something\" href=\"some.css\">\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
</head>\
|
||||
<body>\
|
||||
<div>\
|
||||
<script src=\"some.js\"></script>\
|
||||
</div>\
|
||||
</body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stringify_document_no_css() {
|
||||
let html = "<!doctype html>\
|
||||
<title>Unstyled document</title>\
|
||||
<link rel=\"stylesheet\" href=\"main.css\"/>\
|
||||
<div style=\"display: none;\"></div>";
|
||||
let dom = html_to_dom(&html);
|
||||
|
||||
let opt_no_css: bool = true;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_isolate: bool = false;
|
||||
|
||||
assert_eq!(
|
||||
stringify_document(
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_isolate,
|
||||
),
|
||||
"<!DOCTYPE html>\
|
||||
<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
|
||||
<title>Unstyled document</title>\
|
||||
<link rel=\"stylesheet\" href=\"main.css\">\
|
||||
</head>\
|
||||
<body><div style=\"display: none;\"></div></body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stringify_document_no_frames() {
|
||||
let html = "<!doctype html>\
|
||||
<title>Frameless document</title>\
|
||||
<link rel=\"something\"/>\
|
||||
<div><script src=\"some.js\"></script></div>";
|
||||
let dom = html_to_dom(&html);
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = true;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_isolate: bool = false;
|
||||
|
||||
assert_eq!(
|
||||
stringify_document(
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_isolate,
|
||||
),
|
||||
"<!DOCTYPE html>\
|
||||
<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
|
||||
<title>Frameless document</title>\
|
||||
<link rel=\"something\">\
|
||||
</head>\
|
||||
<body><div><script src=\"some.js\"></script></div></body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_stringify_document_isolate_no_frames_no_js_no_css_no_images() {
|
||||
let html = "<!doctype html>\
|
||||
<title>no-frame no-css no-js no-image isolated document</title>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
<link rel=\"stylesheet\" href=\"some.css\">\
|
||||
<div>\
|
||||
<script src=\"some.js\"></script>\
|
||||
<img style=\"width: 100%;\" src=\"some.png\" />\
|
||||
<iframe src=\"some.html\"></iframe>\
|
||||
</div>";
|
||||
let dom = html_to_dom(&html);
|
||||
|
||||
let opt_isolate: bool = true;
|
||||
let opt_no_css: bool = true;
|
||||
let opt_no_frames: bool = true;
|
||||
let opt_no_js: bool = true;
|
||||
let opt_no_images: bool = true;
|
||||
|
||||
assert_eq!(
|
||||
stringify_document(
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_isolate,
|
||||
),
|
||||
"<!DOCTYPE html>\
|
||||
<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src \'unsafe-inline\' data:; style-src \'none\'; frame-src \'none\';child-src \'none\'; script-src \'none\'; img-src data:;\"></meta>\
|
||||
<title>no-frame no-css no-js no-image isolated document</title>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
<link rel=\"stylesheet\" href=\"some.css\">\
|
||||
</head>\
|
||||
<body>\
|
||||
<div>\
|
||||
<script src=\"some.js\"></script>\
|
||||
<img style=\"width: 100%;\" src=\"some.png\">\
|
||||
<iframe src=\"some.html\"></iframe>\
|
||||
</div>\
|
||||
</body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
26
src/tests/html/embed_srcset.rs
Normal file
26
src/tests/html/embed_srcset.rs
Normal file
@@ -0,0 +1,26 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[test]
|
||||
fn replace_with_empty_images() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let srcset_value = "small.png 1x, large.png 2x";
|
||||
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, true, true);
|
||||
|
||||
assert_eq!(
|
||||
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),
|
||||
embedded_css
|
||||
);
|
||||
}
|
||||
}
|
||||
49
src/tests/html/get_node_name.rs
Normal file
49
src/tests/html/get_node_name.rs
Normal file
@@ -0,0 +1,49 @@
|
||||
use crate::html;
|
||||
use html5ever::rcdom::{Handle, NodeData};
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn get_node_name() {
|
||||
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let mut count = 0;
|
||||
|
||||
fn test_walk(node: &Handle, i: &mut i8) {
|
||||
*i += 1;
|
||||
|
||||
match &node.data {
|
||||
NodeData::Document => {
|
||||
for child in node.children.borrow().iter() {
|
||||
test_walk(child, &mut *i);
|
||||
}
|
||||
}
|
||||
NodeData::Element { ref name, .. } => {
|
||||
let node_name = name.local.as_ref().to_string();
|
||||
let parent = html::get_parent_node(node);
|
||||
let parent_node_name = html::get_node_name(&parent);
|
||||
if node_name == "head" || node_name == "body" {
|
||||
assert_eq!(parent_node_name, Some("html"));
|
||||
} else if node_name == "div" {
|
||||
assert_eq!(parent_node_name, Some("body"));
|
||||
} else if node_name == "p" {
|
||||
assert_eq!(parent_node_name, Some("div"));
|
||||
}
|
||||
|
||||
for child in node.children.borrow().iter() {
|
||||
test_walk(child, &mut *i);
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
};
|
||||
}
|
||||
|
||||
test_walk(&dom.document, &mut count);
|
||||
|
||||
assert_eq!(count, 7);
|
||||
}
|
||||
92
src/tests/html/has_proper_integrity.rs
Normal file
92
src/tests/html/has_proper_integrity.rs
Normal file
@@ -0,0 +1,92 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn empty_input_sha256() {
|
||||
assert!(html::has_proper_integrity(
|
||||
"".as_bytes(),
|
||||
"sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sha256() {
|
||||
assert!(html::has_proper_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha256-9EWAHgy4mSYsm54hmDaIDXPKLRsLnBX7lZyQ6xISNOM="
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sha384() {
|
||||
assert!(html::has_proper_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha384-gc9l7omltke8C33bedgh15E12M7RrAQa5t63Yb8APlpe7ZhiqV23+oqiulSJl3Kw"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sha512() {
|
||||
assert!(html::has_proper_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha512-zG5B88cYMqcdiMi9gz0XkOFYw2BpjeYdn5V6+oFrMgSNjRpqL7EF8JEwl17ztZbK3N7I/tTwp3kxQbN1RgFBww=="
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::html;
|
||||
|
||||
#[test]
|
||||
fn empty_hash() {
|
||||
assert!(!html::has_proper_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
""
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_input_empty_hash() {
|
||||
assert!(!html::has_proper_integrity("".as_bytes(), ""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sha256() {
|
||||
assert!(!html::has_proper_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha256-badhash"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sha384() {
|
||||
assert!(!html::has_proper_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha384-badhash"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sha512() {
|
||||
assert!(!html::has_proper_integrity(
|
||||
"abcdef0123456789".as_bytes(),
|
||||
"sha512-badhash"
|
||||
));
|
||||
}
|
||||
}
|
||||
50
src/tests/html/is_icon.rs
Normal file
50
src/tests/html/is_icon.rs
Normal file
@@ -0,0 +1,50 @@
|
||||
use crate::html;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_icon() {
|
||||
assert!(html::is_icon("icon"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_shortcut_icon_capitalized() {
|
||||
assert!(html::is_icon("Shortcut Icon"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_icon_uppercase() {
|
||||
assert!(html::is_icon("ICON"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_mask_icon() {
|
||||
assert!(html::is_icon("mask-icon"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_fluid_icon() {
|
||||
assert!(html::is_icon("fluid-icon"));
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn failing_stylesheet() {
|
||||
assert!(!html::is_icon("stylesheet"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_empty_string() {
|
||||
assert!(!html::is_icon(""));
|
||||
}
|
||||
6
src/tests/html/mod.rs
Normal file
6
src/tests/html/mod.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
mod embed_srcset;
|
||||
mod get_node_name;
|
||||
mod has_proper_integrity;
|
||||
mod is_icon;
|
||||
mod stringify_document;
|
||||
mod walk_and_embed_assets;
|
||||
188
src/tests/html/stringify_document.rs
Normal file
188
src/tests/html/stringify_document.rs
Normal file
@@ -0,0 +1,188 @@
|
||||
use crate::html;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_div_as_root_element() {
|
||||
let html = "<div><script src=\"some.js\"></script></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_isolate: bool = false;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_isolate,
|
||||
),
|
||||
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_full_page_with_no_html_head_or_body() {
|
||||
let html = "<title>Isolated document</title>\
|
||||
<link rel=\"something\" href=\"some.css\" />\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
<div><script src=\"some.js\"></script></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_isolate: bool = true;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_isolate,
|
||||
),
|
||||
"<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
|
||||
<title>Isolated document</title>\
|
||||
<link rel=\"something\" href=\"some.css\">\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
</head>\
|
||||
<body>\
|
||||
<div>\
|
||||
<script src=\"some.js\"></script>\
|
||||
</div>\
|
||||
</body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_doctype_and_the_rest_no_html_head_or_body() {
|
||||
let html = "<!doctype html>\
|
||||
<title>Unstyled document</title>\
|
||||
<link rel=\"stylesheet\" href=\"main.css\"/>\
|
||||
<div style=\"display: none;\"></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
|
||||
let opt_no_css: bool = true;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_isolate: bool = false;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_isolate,
|
||||
),
|
||||
"<!DOCTYPE html>\
|
||||
<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
|
||||
<title>Unstyled document</title>\
|
||||
<link rel=\"stylesheet\" href=\"main.css\">\
|
||||
</head>\
|
||||
<body><div style=\"display: none;\"></div></body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_doctype_and_the_rest_no_html_head_or_body_forbid_frames() {
|
||||
let html = "<!doctype html>\
|
||||
<title>Frameless document</title>\
|
||||
<link rel=\"something\"/>\
|
||||
<div><script src=\"some.js\"></script></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_frames: bool = true;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_isolate: bool = false;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_isolate,
|
||||
),
|
||||
"<!DOCTYPE html>\
|
||||
<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
|
||||
<title>Frameless document</title>\
|
||||
<link rel=\"something\">\
|
||||
</head>\
|
||||
<body><div><script src=\"some.js\"></script></div></body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_doctype_and_the_rest_all_forbidden() {
|
||||
let html = "<!doctype html>\
|
||||
<title>no-frame no-css no-js no-image isolated document</title>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
<link rel=\"stylesheet\" href=\"some.css\">\
|
||||
<div>\
|
||||
<script src=\"some.js\"></script>\
|
||||
<img style=\"width: 100%;\" src=\"some.png\" />\
|
||||
<iframe src=\"some.html\"></iframe>\
|
||||
</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
|
||||
let opt_isolate: bool = true;
|
||||
let opt_no_css: bool = true;
|
||||
let opt_no_frames: bool = true;
|
||||
let opt_no_js: bool = true;
|
||||
let opt_no_images: bool = true;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_isolate,
|
||||
),
|
||||
"<!DOCTYPE html>\
|
||||
<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; frame-src 'none';child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
|
||||
<title>no-frame no-css no-js no-image isolated document</title>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
<link rel=\"stylesheet\" href=\"some.css\">\
|
||||
</head>\
|
||||
<body>\
|
||||
<div>\
|
||||
<script src=\"some.js\"></script>\
|
||||
<img style=\"width: 100%;\" src=\"some.png\">\
|
||||
<iframe src=\"some.html\"></iframe>\
|
||||
</div>\
|
||||
</body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
419
src/tests/html/walk_and_embed_assets.rs
Normal file
419
src/tests/html/walk_and_embed_assets.rs
Normal file
@@ -0,0 +1,419 @@
|
||||
use crate::html;
|
||||
use html5ever::serialize::{serialize, SerializeOpts};
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_basic() {
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let html = "<div><P></P></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><body><div><p></p></div></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_ensure_no_recursive_iframe() {
|
||||
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_ensure_no_recursive_frame() {
|
||||
let html = "<frameset><frame src=\"\"></frameset>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_no_css() {
|
||||
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
|
||||
<style>html{background-color: #000;}</style>\
|
||||
<div style=\"display: none;\"></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = true;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html>\
|
||||
<head>\
|
||||
<link rel=\"stylesheet\">\
|
||||
<style></style>\
|
||||
</head>\
|
||||
<body>\
|
||||
<div></div>\
|
||||
</body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_no_images() {
|
||||
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
|
||||
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = true;
|
||||
let opt_silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
format!(
|
||||
"<html>\
|
||||
<head>\
|
||||
<link rel=\"icon\">\
|
||||
</head>\
|
||||
<body>\
|
||||
<div>\
|
||||
<img src=\"{empty_image}\">\
|
||||
</div>\
|
||||
</body>\
|
||||
</html>",
|
||||
empty_image = empty_image!()
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_no_body_background_images() {
|
||||
let html = "<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = true;
|
||||
let opt_silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><body></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_no_frames() {
|
||||
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = true;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_no_iframes() {
|
||||
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = true;
|
||||
let opt_no_js: bool = false;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_no_js() {
|
||||
let html = "<div onClick=\"void(0)\">\
|
||||
<script src=\"http://localhost/assets/some.js\"></script>\
|
||||
<script>alert(1)</script>\
|
||||
</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let opt_no_css: bool = false;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = false;
|
||||
let opt_no_js: bool = true;
|
||||
let opt_no_images: bool = false;
|
||||
let opt_silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html><head></head><body><div><script></script>\
|
||||
<script></script></div></body></html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_with_no_integrity() {
|
||||
let html = "<title>No integrity</title>\
|
||||
<link integrity=\"sha384-...\" rel=\"something\"/>\
|
||||
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let opt_no_css: bool = true;
|
||||
let opt_no_fonts: bool = false;
|
||||
let opt_no_frames: bool = true;
|
||||
let opt_no_js: bool = true;
|
||||
let opt_no_images: bool = true;
|
||||
let opt_silent = true;
|
||||
|
||||
html::walk_and_embed_assets(
|
||||
cache,
|
||||
&client,
|
||||
&url,
|
||||
&dom.document,
|
||||
opt_no_css,
|
||||
opt_no_fonts,
|
||||
opt_no_frames,
|
||||
opt_no_js,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"<html>\
|
||||
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
|
||||
<body></body>\
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
use crate::http::retrieve_asset;
|
||||
use std::collections::HashMap;
|
||||
#[test]
|
||||
fn test_retrieve_asset() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = reqwest::Client::new();
|
||||
let (data, final_url) =
|
||||
retrieve_asset(cache, &client, "data:text/html;base64,...", true, "", false).unwrap();
|
||||
assert_eq!(&data, "data:text/html;base64,...");
|
||||
assert_eq!(&final_url, "data:text/html;base64,...");
|
||||
|
||||
let (data, final_url) = retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
"data:text/html;base64,...",
|
||||
true,
|
||||
"image/png",
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(&data, "data:text/html;base64,...");
|
||||
assert_eq!(&final_url, "data:text/html;base64,...");
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
use crate::js::attr_is_event_handler;
|
||||
|
||||
#[test]
|
||||
fn test_attr_is_event_handler() {
|
||||
// succeeding
|
||||
assert!(attr_is_event_handler("onBlur"));
|
||||
assert!(attr_is_event_handler("onclick"));
|
||||
assert!(attr_is_event_handler("onClick"));
|
||||
// failing
|
||||
assert!(!attr_is_event_handler("href"));
|
||||
assert!(!attr_is_event_handler(""));
|
||||
assert!(!attr_is_event_handler("class"));
|
||||
}
|
||||
45
src/tests/js/attr_is_event_handler.rs
Normal file
45
src/tests/js/attr_is_event_handler.rs
Normal file
@@ -0,0 +1,45 @@
|
||||
use crate::js;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_onblur_camelcase() {
|
||||
assert!(js::attr_is_event_handler("onBlur"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_onclick_lowercase() {
|
||||
assert!(js::attr_is_event_handler("onclick"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_onclick_camelcase() {
|
||||
assert!(js::attr_is_event_handler("onClick"));
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn failing_href() {
|
||||
assert!(!js::attr_is_event_handler("href"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_empty_string() {
|
||||
assert!(!js::attr_is_event_handler(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_class() {
|
||||
assert!(!js::attr_is_event_handler("class"));
|
||||
}
|
||||
1
src/tests/js/mod.rs
Normal file
1
src/tests/js/mod.rs
Normal file
@@ -0,0 +1 @@
|
||||
mod attr_is_event_handler;
|
||||
14
src/tests/macros/empty_image.rs
Normal file
14
src/tests/macros/empty_image.rs
Normal file
@@ -0,0 +1,14 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
#[test]
|
||||
fn contains_correct_image_data() {
|
||||
assert_eq!(empty_image!(), "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII=");
|
||||
}
|
||||
}
|
||||
2
src/tests/macros/mod.rs
Normal file
2
src/tests/macros/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
mod empty_image;
|
||||
mod str;
|
||||
24
src/tests/macros/str.rs
Normal file
24
src/tests/macros/str.rs
Normal file
@@ -0,0 +1,24 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
#[test]
|
||||
fn returns_empty_string() {
|
||||
assert_eq!(str!(), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn converts_integer_into_string() {
|
||||
assert_eq!(str!(123), "123");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn converts_str_into_string() {
|
||||
assert_eq!(str!("abc"), "abc");
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,6 @@
|
||||
mod cli;
|
||||
mod css;
|
||||
mod html;
|
||||
mod http;
|
||||
mod js;
|
||||
mod macros;
|
||||
mod utils;
|
||||
|
||||
@@ -1,160 +0,0 @@
|
||||
use crate::utils::{
|
||||
data_to_dataurl, detect_mimetype, is_data_url, is_valid_url, resolve_url, url_has_protocol,
|
||||
};
|
||||
use url::ParseError;
|
||||
|
||||
#[test]
|
||||
fn test_data_to_dataurl() {
|
||||
let mime = "application/javascript";
|
||||
let data = "var word = 'hello';\nalert(word);\n";
|
||||
let datauri = data_to_dataurl(mime, data.as_bytes());
|
||||
assert_eq!(
|
||||
&datauri,
|
||||
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_detect_mimetype() {
|
||||
// image
|
||||
assert_eq!(detect_mimetype(b"GIF87a"), "image/gif");
|
||||
assert_eq!(detect_mimetype(b"GIF89a"), "image/gif");
|
||||
assert_eq!(detect_mimetype(b"\xFF\xD8\xFF"), "image/jpeg");
|
||||
assert_eq!(detect_mimetype(b"\x89PNG\x0D\x0A\x1A\x0A"), "image/png");
|
||||
assert_eq!(detect_mimetype(b"<?xml "), "image/svg+xml");
|
||||
assert_eq!(detect_mimetype(b"<svg "), "image/svg+xml");
|
||||
assert_eq!(detect_mimetype(b"RIFF....WEBPVP8 "), "image/webp");
|
||||
assert_eq!(detect_mimetype(b"\x00\x00\x01\x00"), "image/x-icon");
|
||||
// audio
|
||||
assert_eq!(detect_mimetype(b"ID3"), "audio/mpeg");
|
||||
assert_eq!(detect_mimetype(b"\xFF\x0E"), "audio/mpeg");
|
||||
assert_eq!(detect_mimetype(b"\xFF\x0F"), "audio/mpeg");
|
||||
assert_eq!(detect_mimetype(b"OggS"), "audio/ogg");
|
||||
assert_eq!(detect_mimetype(b"RIFF....WAVEfmt "), "audio/wav");
|
||||
assert_eq!(detect_mimetype(b"fLaC"), "audio/x-flac");
|
||||
// video
|
||||
assert_eq!(detect_mimetype(b"RIFF....AVI LIST"), "video/avi");
|
||||
assert_eq!(detect_mimetype(b"....ftyp"), "video/mp4");
|
||||
assert_eq!(detect_mimetype(b"\x00\x00\x01\x0B"), "video/mpeg");
|
||||
assert_eq!(detect_mimetype(b"....moov"), "video/quicktime");
|
||||
assert_eq!(detect_mimetype(b"\x1A\x45\xDF\xA3"), "video/webm");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_url_has_protocol() {
|
||||
// succeeding
|
||||
assert_eq!(
|
||||
url_has_protocol("mailto:somebody@somewhere.com?subject=hello"),
|
||||
true
|
||||
);
|
||||
assert_eq!(url_has_protocol("tel:5551234567"), true);
|
||||
assert_eq!(
|
||||
url_has_protocol("ftp:user:password@some-ftp-server.com"),
|
||||
true
|
||||
);
|
||||
assert_eq!(url_has_protocol("javascript:void(0)"), true);
|
||||
assert_eq!(url_has_protocol("http://news.ycombinator.com"), true);
|
||||
assert_eq!(url_has_protocol("https://github.com"), true);
|
||||
assert_eq!(
|
||||
url_has_protocol("MAILTO:somebody@somewhere.com?subject=hello"),
|
||||
true
|
||||
);
|
||||
// failing
|
||||
assert_eq!(
|
||||
url_has_protocol("//some-hostname.com/some-file.html"),
|
||||
false
|
||||
);
|
||||
assert_eq!(url_has_protocol("some-hostname.com/some-file.html"), false);
|
||||
assert_eq!(url_has_protocol("/some-file.html"), false);
|
||||
assert_eq!(url_has_protocol(""), false);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_valid_url() {
|
||||
// succeeding
|
||||
assert!(is_valid_url("https://www.rust-lang.org/"));
|
||||
assert!(is_valid_url("http://kernel.org"));
|
||||
// failing
|
||||
assert!(!is_valid_url("//kernel.org"));
|
||||
assert!(!is_valid_url("./index.html"));
|
||||
assert!(!is_valid_url("some-local-page.htm"));
|
||||
assert!(!is_valid_url("ftp://1.2.3.4/www/index.html"));
|
||||
assert!(!is_valid_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_resolve_url() -> Result<(), ParseError> {
|
||||
let resolved_url = resolve_url("https://www.kernel.org", "../category/signatures.html")?;
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url("https://www.kernel.org", "category/signatures.html")?;
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
"saved_page.htm",
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
)?;
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
"https://www.kernel.org",
|
||||
"//www.kernel.org/theme/images/logos/tux.png",
|
||||
)?;
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
"https://www.kernel.org",
|
||||
"//another-host.org/theme/images/logos/tux.png",
|
||||
)?;
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://another-host.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
"/theme/images/logos/tux.png",
|
||||
)?;
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
let resolved_url = resolve_url(
|
||||
"https://www.w3schools.com/html/html_iframe.asp",
|
||||
"default.asp",
|
||||
)?;
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.w3schools.com/html/default.asp"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_is_data_url() {
|
||||
// succeeding
|
||||
assert!(
|
||||
is_data_url("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
|
||||
.unwrap_or(false)
|
||||
);
|
||||
// failing
|
||||
assert!(!is_data_url("https://kernel.org").unwrap_or(false));
|
||||
assert!(!is_data_url("//kernel.org").unwrap_or(false));
|
||||
assert!(!is_data_url("").unwrap_or(false));
|
||||
}
|
||||
48
src/tests/utils/clean_url.rs
Normal file
48
src/tests/utils/clean_url.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
use crate::utils;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_removes_fragment() {
|
||||
assert_eq!(
|
||||
utils::clean_url("https://somewhere.com/font.eot#iefix"),
|
||||
"https://somewhere.com/font.eot"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_removes_empty_fragment() {
|
||||
assert_eq!(
|
||||
utils::clean_url("https://somewhere.com/font.eot#"),
|
||||
"https://somewhere.com/font.eot"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_removes_empty_query_and_empty_fragment() {
|
||||
assert_eq!(
|
||||
utils::clean_url("https://somewhere.com/font.eot?#"),
|
||||
"https://somewhere.com/font.eot"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_removes_empty_query_amp_and_empty_fragment() {
|
||||
assert_eq!(
|
||||
utils::clean_url("https://somewhere.com/font.eot?a=b&#"),
|
||||
"https://somewhere.com/font.eot?a=b"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_keeps_credentials() {
|
||||
assert_eq!(
|
||||
utils::clean_url("https://cookie:monster@gibson.internet/"),
|
||||
"https://cookie:monster@gibson.internet/"
|
||||
);
|
||||
}
|
||||
28
src/tests/utils/data_to_data_url.rs
Normal file
28
src/tests/utils/data_to_data_url.rs
Normal file
@@ -0,0 +1,28 @@
|
||||
use crate::utils;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_encode_string_with_specific_media_type() {
|
||||
let mime = "application/javascript";
|
||||
let data = "var word = 'hello';\nalert(word);\n";
|
||||
let data_url = utils::data_to_data_url(mime, data.as_bytes(), "", "");
|
||||
|
||||
assert_eq!(
|
||||
&data_url,
|
||||
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_encode_append_fragment() {
|
||||
let data = "<svg></svg>\n";
|
||||
let data_url = utils::data_to_data_url("text/css", data.as_bytes(), "", "fragment");
|
||||
|
||||
assert_eq!(&data_url, "data:text/css;base64,PHN2Zz48L3N2Zz4K#fragment");
|
||||
}
|
||||
95
src/tests/utils/data_url_to_data.rs
Normal file
95
src/tests/utils/data_url_to_data.rs
Normal file
@@ -0,0 +1,95 @@
|
||||
use crate::utils;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_parse_text_html_base64() {
|
||||
let (media_type, data) = utils::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
|
||||
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&data),
|
||||
"Work expands so as to fill the time available for its completion"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_parse_text_html_utf8() {
|
||||
let (media_type, data) = utils::data_url_to_data(
|
||||
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
|
||||
);
|
||||
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&data),
|
||||
"Work expands so as to fill the time available for its completion"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_parse_text_html_plaintext() {
|
||||
let (media_type, data) = utils::data_url_to_data(
|
||||
"data:text/html,Work expands so as to fill the time available for its completion",
|
||||
);
|
||||
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&data),
|
||||
"Work expands so as to fill the time available for its completion"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() {
|
||||
let (media_type, data) = utils::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
|
||||
|
||||
assert_eq!(media_type, "text/html");
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&data),
|
||||
"Work expands so as to fill the time available for its completion"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_parse_text_css_url_encoded() {
|
||||
let (media_type, data) = utils::data_url_to_data("data:text/css,div{background-color:%23000}");
|
||||
|
||||
assert_eq!(media_type, "text/css");
|
||||
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_parse_no_media_type_base64() {
|
||||
let (media_type, data) = utils::data_url_to_data("data:;base64,dGVzdA==");
|
||||
|
||||
assert_eq!(media_type, "");
|
||||
assert_eq!(String::from_utf8_lossy(&data), "test");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_parse_no_media_type_no_encoding() {
|
||||
let (media_type, data) = utils::data_url_to_data("data:;,test%20test");
|
||||
|
||||
assert_eq!(media_type, "");
|
||||
assert_eq!(String::from_utf8_lossy(&data), "test test");
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn failing_just_word_data() {
|
||||
let (media_type, data) = utils::data_url_to_data("data");
|
||||
|
||||
assert_eq!(media_type, "");
|
||||
assert_eq!(String::from_utf8_lossy(&data), "");
|
||||
}
|
||||
39
src/tests/utils/decode_url.rs
Normal file
39
src/tests/utils/decode_url.rs
Normal file
@@ -0,0 +1,39 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::utils;
|
||||
|
||||
#[test]
|
||||
fn decode_unicode_characters() {
|
||||
assert_eq!(
|
||||
utils::decode_url(str!(
|
||||
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
|
||||
)),
|
||||
"検ヒム解塗ゃッ = サ"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_file_url() {
|
||||
assert_eq!(
|
||||
utils::decode_url(str!("file:///tmp/space%20here/test%231.html")),
|
||||
"file:///tmp/space here/test#1.html"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plus_sign() {
|
||||
assert_eq!(
|
||||
utils::decode_url(str!(
|
||||
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
|
||||
)),
|
||||
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
|
||||
);
|
||||
}
|
||||
}
|
||||
147
src/tests/utils/detect_media_type.rs
Normal file
147
src/tests/utils/detect_media_type.rs
Normal file
@@ -0,0 +1,147 @@
|
||||
use crate::utils;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_image_gif87() {
|
||||
assert_eq!(utils::detect_media_type(b"GIF87a", ""), "image/gif");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_image_gif89() {
|
||||
assert_eq!(utils::detect_media_type(b"GIF89a", ""), "image/gif");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_image_jpeg() {
|
||||
assert_eq!(utils::detect_media_type(b"\xFF\xD8\xFF", ""), "image/jpeg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_image_png() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", ""),
|
||||
"image/png"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_image_svg() {
|
||||
assert_eq!(utils::detect_media_type(b"<svg ", ""), "image/svg+xml");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_image_webp() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"RIFF....WEBPVP8 ", ""),
|
||||
"image/webp"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_image_icon() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"\x00\x00\x01\x00", ""),
|
||||
"image/x-icon"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_image_svg_filename() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"<?xml ", "local-file.svg"),
|
||||
"image/svg+xml"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_image_svg_url_uppercase() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"", "https://some-site.com/images/local-file.SVG"),
|
||||
"image/svg+xml"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_audio_mpeg() {
|
||||
assert_eq!(utils::detect_media_type(b"ID3", ""), "audio/mpeg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_audio_mpeg_2() {
|
||||
assert_eq!(utils::detect_media_type(b"\xFF\x0E", ""), "audio/mpeg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_audio_mpeg_3() {
|
||||
assert_eq!(utils::detect_media_type(b"\xFF\x0F", ""), "audio/mpeg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_audio_ogg() {
|
||||
assert_eq!(utils::detect_media_type(b"OggS", ""), "audio/ogg");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_audio_wav() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"RIFF....WAVEfmt ", ""),
|
||||
"audio/wav"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_audio_flac() {
|
||||
assert_eq!(utils::detect_media_type(b"fLaC", ""), "audio/x-flac");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_video_avi() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"RIFF....AVI LIST", ""),
|
||||
"video/avi"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_video_mp4() {
|
||||
assert_eq!(utils::detect_media_type(b"....ftyp", ""), "video/mp4");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_video_mpeg() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"\x00\x00\x01\x0B", ""),
|
||||
"video/mpeg"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_video_quicktime() {
|
||||
assert_eq!(utils::detect_media_type(b"....moov", ""), "video/quicktime");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_video_webm() {
|
||||
assert_eq!(
|
||||
utils::detect_media_type(b"\x1A\x45\xDF\xA3", ""),
|
||||
"video/webm"
|
||||
);
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn failing_unknown_media_type() {
|
||||
assert_eq!(utils::detect_media_type(b"abcdef0123456789", ""), "");
|
||||
}
|
||||
38
src/tests/utils/file_url_to_fs_path.rs
Normal file
38
src/tests/utils/file_url_to_fs_path.rs
Normal file
@@ -0,0 +1,38 @@
|
||||
use crate::utils;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_remove_protocl_and_fragment() {
|
||||
if cfg!(windows) {
|
||||
assert_eq!(
|
||||
utils::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"),
|
||||
"C:\\documents\\some-path\\some-file.svg"
|
||||
);
|
||||
} else {
|
||||
assert_eq!(
|
||||
utils::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"),
|
||||
"/tmp/some-path/some-file.svg"
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_decodes_urls() {
|
||||
if cfg!(windows) {
|
||||
assert_eq!(
|
||||
utils::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
|
||||
"C:\\Documents and Settings\\some-file.html"
|
||||
);
|
||||
} else {
|
||||
assert_eq!(
|
||||
utils::file_url_to_fs_path("file:///home/user/My%20Documents"),
|
||||
"/home/user/My Documents"
|
||||
);
|
||||
}
|
||||
}
|
||||
23
src/tests/utils/get_url_fragment.rs
Normal file
23
src/tests/utils/get_url_fragment.rs
Normal file
@@ -0,0 +1,23 @@
|
||||
use crate::utils;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_data_url() {
|
||||
assert_eq!(
|
||||
utils::get_url_fragment(
|
||||
"data:image/svg+xml;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h#test"
|
||||
),
|
||||
"test"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_https_empty() {
|
||||
assert_eq!(utils::get_url_fragment("https://kernel.org#"), "");
|
||||
}
|
||||
44
src/tests/utils/is_data_url.rs
Normal file
44
src/tests/utils/is_data_url.rs
Normal file
@@ -0,0 +1,44 @@
|
||||
use crate::utils;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_data_url_text_html() {
|
||||
assert!(utils::is_data_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_data_url_no_media_type() {
|
||||
assert!(utils::is_data_url(
|
||||
"data:;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn failing_https_url() {
|
||||
assert!(!utils::is_data_url("https://kernel.org"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_no_protocol_url() {
|
||||
assert!(!utils::is_data_url("//kernel.org"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_empty_string() {
|
||||
assert!(!utils::is_data_url(""));
|
||||
}
|
||||
75
src/tests/utils/is_file_url.rs
Normal file
75
src/tests/utils/is_file_url.rs
Normal file
@@ -0,0 +1,75 @@
|
||||
use crate::utils;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_unix_file_url() {
|
||||
assert!(utils::is_file_url(
|
||||
"file:///home/user/Websites/my-website/index.html"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_windows_file_url() {
|
||||
assert!(utils::is_file_url(
|
||||
"file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_unix_url_with_backslashes() {
|
||||
assert!(utils::is_file_url(
|
||||
"file:\\\\\\home\\user\\Websites\\my-website\\index.html"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_windows_file_url_with_backslashes() {
|
||||
assert!(utils::is_file_url(
|
||||
"file:\\\\\\C:\\Documents%20and%20Settings\\user\\Websites\\my-website\\assets\\images\\logo.png"
|
||||
));
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn failing_url_with_no_protocl() {
|
||||
assert!(!utils::is_file_url("//kernel.org"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_dot_slash_filename() {
|
||||
assert!(!utils::is_file_url("./index.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_just_filename() {
|
||||
assert!(!utils::is_file_url("some-local-page.htm"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_https_ip_port_url() {
|
||||
assert!(!utils::is_file_url("https://1.2.3.4:80/www/index.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_data_url() {
|
||||
assert!(!utils::is_file_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_just_word_file() {
|
||||
assert!(!utils::is_file_url("file"));
|
||||
}
|
||||
57
src/tests/utils/is_http_url.rs
Normal file
57
src/tests/utils/is_http_url.rs
Normal file
@@ -0,0 +1,57 @@
|
||||
use crate::utils;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_http_url() {
|
||||
assert!(utils::is_http_url("http://kernel.org"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_https_url() {
|
||||
assert!(utils::is_http_url("https://www.rust-lang.org/"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_http_url_with_backslashes() {
|
||||
assert!(utils::is_http_url("http:\\\\freebsd.org\\"));
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn failing_url_with_no_protocol() {
|
||||
assert!(!utils::is_http_url("//kernel.org"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_dot_slash_filename() {
|
||||
assert!(!utils::is_http_url("./index.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_just_filename() {
|
||||
assert!(!utils::is_http_url("some-local-page.htm"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_https_ip_port_url() {
|
||||
assert!(!utils::is_http_url("ftp://1.2.3.4/www/index.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_data_url() {
|
||||
assert!(!utils::is_http_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
|
||||
));
|
||||
}
|
||||
13
src/tests/utils/mod.rs
Normal file
13
src/tests/utils/mod.rs
Normal file
@@ -0,0 +1,13 @@
|
||||
mod clean_url;
|
||||
mod data_to_data_url;
|
||||
mod data_url_to_data;
|
||||
mod decode_url;
|
||||
mod detect_media_type;
|
||||
mod file_url_to_fs_path;
|
||||
mod get_url_fragment;
|
||||
mod is_data_url;
|
||||
mod is_file_url;
|
||||
mod is_http_url;
|
||||
mod resolve_url;
|
||||
mod retrieve_asset;
|
||||
mod url_has_protocol;
|
||||
229
src/tests/utils/resolve_url.rs
Normal file
229
src/tests/utils/resolve_url.rs
Normal file
@@ -0,0 +1,229 @@
|
||||
use url::ParseError;
|
||||
|
||||
use crate::utils;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_from_https_to_level_up_relative() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url("https://www.kernel.org", "../category/signatures.html")?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_from_just_filename_to_full_https_url() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"saved_page.htm",
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_from_https_url_to_url_with_no_protocol() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"https://www.kernel.org",
|
||||
"//www.kernel.org/theme/images/logos/tux.png",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_from_https_url_to_url_with_no_protocol_and_on_different_hostname(
|
||||
) -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"https://www.kernel.org",
|
||||
"//another-host.org/theme/images/logos/tux.png",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://another-host.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_from_https_url_to_relative_root_path() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
"/theme/images/logos/tux.png",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_from_https_to_just_filename() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"https://www.w3schools.com/html/html_iframe.asp",
|
||||
"default.asp",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.w3schools.com/html/default.asp"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_from_data_url_to_https() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"https://www.kernel.org/category/signatures.html",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"https://www.kernel.org/category/signatures.html"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_from_data_url_to_data_url() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
|
||||
)?;
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_from_file_url_to_relative_path() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"file:///home/user/Websites/my-website/index.html",
|
||||
"assets/images/logo.png",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"file:///home/user/Websites/my-website/assets/images/logo.png"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_from_file_url_to_relative_path_with_backslashes() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
|
||||
"assets\\images\\logo.png",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"file:///home/user/Websites/my-website/assets/images/logo.png"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_from_data_url_to_file_url() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"file:///etc/passwd",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
|
||||
assert_eq!(resolved_url.as_str(), "file:///etc/passwd");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_preserve_fragment() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"http://doesnt-matter.local/",
|
||||
"css/fonts/fontmarvelous.svg#fontmarvelous",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
"http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous"
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_resolve_from_file_url_to_file_url() -> Result<(), ParseError> {
|
||||
let resolved_url = if cfg!(windows) {
|
||||
utils::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!())
|
||||
} else {
|
||||
utils::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!())
|
||||
};
|
||||
|
||||
assert_eq!(
|
||||
resolved_url.as_str(),
|
||||
if cfg!(windows) {
|
||||
"file:///c:/image.png"
|
||||
} else {
|
||||
"file:///tmp/image.png"
|
||||
}
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn failing_from_data_url_to_url_with_no_protocol() -> Result<(), ParseError> {
|
||||
let resolved_url = utils::resolve_url(
|
||||
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
|
||||
"//www.w3schools.com/html/html_iframe.asp",
|
||||
)
|
||||
.unwrap_or(str!());
|
||||
|
||||
assert_eq!(resolved_url.as_str(), "");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
124
src/tests/utils/retrieve_asset.rs
Normal file
124
src/tests/utils/retrieve_asset.rs
Normal file
@@ -0,0 +1,124 @@
|
||||
use crate::utils;
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
use std::env;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_read_data_url() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
// If both source and target are data URLs,
|
||||
// ensure the result contains target data URL
|
||||
let (data, final_url, media_type) = utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
"data:text/html;base64,c291cmNl",
|
||||
"data:text/html;base64,dGFyZ2V0",
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
utils::data_to_data_url(&media_type, &data, &final_url, ""),
|
||||
utils::data_to_data_url("text/html", "target".as_bytes(), "", "")
|
||||
);
|
||||
assert_eq!(
|
||||
final_url,
|
||||
utils::data_to_data_url("text/html", "target".as_bytes(), "", "")
|
||||
);
|
||||
assert_eq!(&media_type, "text/html");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_read_local_file_with_file_url_parent() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
|
||||
// Inclusion of local assets from local sources should be allowed
|
||||
let cwd = env::current_dir().unwrap();
|
||||
let (data, final_url, _media_type) = utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
&format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-file.html",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd.to_str().unwrap()
|
||||
),
|
||||
&format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-script.js",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd.to_str().unwrap()
|
||||
),
|
||||
false,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(utils::data_to_data_url("application/javascript", &data, &final_url, ""), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
|
||||
assert_eq!(
|
||||
&final_url,
|
||||
&format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-script.js",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd.to_str().unwrap()
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn failing_read_local_file_with_data_url_parent() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
// Inclusion of local assets from data URL sources should not be allowed
|
||||
match utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
"data:text/html;base64,SoUrCe",
|
||||
"file:///etc/passwd",
|
||||
false,
|
||||
) {
|
||||
Ok((..)) => {
|
||||
assert!(false);
|
||||
}
|
||||
Err(_) => {
|
||||
assert!(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_read_local_file_with_https_parent() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
|
||||
// Inclusion of local assets from remote sources should not be allowed
|
||||
match utils::retrieve_asset(
|
||||
cache,
|
||||
&client,
|
||||
"https://kernel.org/",
|
||||
"file:///etc/passwd",
|
||||
false,
|
||||
) {
|
||||
Ok((..)) => {
|
||||
assert!(false);
|
||||
}
|
||||
Err(_) => {
|
||||
assert!(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
83
src/tests/utils/url_has_protocol.rs
Normal file
83
src/tests/utils/url_has_protocol.rs
Normal file
@@ -0,0 +1,83 @@
|
||||
use crate::utils;
|
||||
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn passing_mailto() {
|
||||
assert!(utils::url_has_protocol(
|
||||
"mailto:somebody@somewhere.com?subject=hello"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_tel() {
|
||||
assert!(utils::url_has_protocol("tel:5551234567"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_ftp_no_slashes() {
|
||||
assert!(utils::url_has_protocol("ftp:some-ftp-server.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_ftp_with_credentials() {
|
||||
assert!(utils::url_has_protocol(
|
||||
"ftp://user:password@some-ftp-server.com"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_javascript() {
|
||||
assert!(utils::url_has_protocol("javascript:void(0)"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_http() {
|
||||
assert!(utils::url_has_protocol("http://news.ycombinator.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_https() {
|
||||
assert!(utils::url_has_protocol("https://github.com"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn passing_mailto_uppercase() {
|
||||
assert!(utils::url_has_protocol(
|
||||
"MAILTO:somebody@somewhere.com?subject=hello"
|
||||
));
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[test]
|
||||
fn failing_url_with_no_protocol() {
|
||||
assert!(!utils::url_has_protocol(
|
||||
"//some-hostname.com/some-file.html"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_relative_path() {
|
||||
assert!(!utils::url_has_protocol("some-hostname.com/some-file.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_relative_to_root_path() {
|
||||
assert!(!utils::url_has_protocol("/some-file.html"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn failing_empty_string() {
|
||||
assert!(!utils::url_has_protocol(""));
|
||||
}
|
||||
387
src/utils.rs
387
src/utils.rs
@@ -1,56 +1,17 @@
|
||||
extern crate base64;
|
||||
|
||||
use self::base64::encode;
|
||||
use http::retrieve_asset;
|
||||
use regex::Regex;
|
||||
use reqwest::Client;
|
||||
use base64;
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use std::collections::HashMap;
|
||||
use url::{ParseError, Url};
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use url::{form_urlencoded, ParseError, Url};
|
||||
|
||||
/// This monster of a regex is used to match any kind of URL found in CSS.
|
||||
///
|
||||
/// There are roughly three different categories that a found URL could fit
|
||||
/// into:
|
||||
/// - Font [found after a src: property in an @font-family rule]
|
||||
/// - Stylesheet [denoted by an @import before the url
|
||||
/// - Image [covers all other uses of the url() function]
|
||||
///
|
||||
/// This regex aims to extract the following information:
|
||||
/// - What type of URL is it (font/image/css)
|
||||
/// - Where is the part that needs to be replaced (incl any wrapping quotes)
|
||||
/// - What is the URL (excl any wrapping quotes)
|
||||
///
|
||||
/// Essentially, the regex can be broken down into two parts:
|
||||
///
|
||||
/// `(?:(?P<import>@import)|(?P<font>src\s*:)\s+)?`
|
||||
/// This matches the precursor to a font or CSS URL, and fills in a match under
|
||||
/// either `<import>` (if it's a CSS URL) or `<font>` (if it's a font).
|
||||
/// Determining whether or not it's an image can be done by the negation of both
|
||||
/// of these. Either zero or one of these can match.
|
||||
///
|
||||
/// `url\((?P<to_repl>['"]?(?P<url>[^"'\)]+)['"]?)\)`
|
||||
/// This matches the actual URL part of the url(), and must always match. It also
|
||||
/// sets `<to_repl>` and `<url>` which correspond to everything within
|
||||
/// `url(...)` and a usable URL, respectively.
|
||||
///
|
||||
/// Note, however, that this does not perform any validation of the found URL.
|
||||
/// Malformed CSS could lead to an invalid URL being present. It is therefore
|
||||
/// recomended that the URL gets manually validated.
|
||||
const CSS_URL_REGEX_STR: &str = r###"(?:(?:(?P<stylesheet>@import)|(?P<font>src\s*:))\s+)?url\((?P<to_repl>['"]?(?P<url>[^"'\)]+)['"]?)\)"###;
|
||||
|
||||
lazy_static! {
|
||||
static ref HAS_PROTOCOL: Regex = Regex::new(r"^[a-z0-9]+:").unwrap();
|
||||
static ref REGEX_URL: Regex = Regex::new(r"^https?://").unwrap();
|
||||
static ref REGEX_CSS_URL: Regex = Regex::new(CSS_URL_REGEX_STR).unwrap();
|
||||
}
|
||||
|
||||
const MAGIC: [[&[u8]; 2]; 19] = [
|
||||
const MAGIC: [[&[u8]; 2]; 18] = [
|
||||
// Image
|
||||
[b"GIF87a", b"image/gif"],
|
||||
[b"GIF89a", b"image/gif"],
|
||||
[b"\xFF\xD8\xFF", b"image/jpeg"],
|
||||
[b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],
|
||||
[b"<?xml ", b"image/svg+xml"],
|
||||
[b"<svg ", b"image/svg+xml"],
|
||||
[b"RIFF....WEBPVP8 ", b"image/webp"],
|
||||
[b"\x00\x00\x01\x00", b"image/x-icon"],
|
||||
@@ -69,38 +30,78 @@ const MAGIC: [[&[u8]; 2]; 19] = [
|
||||
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
||||
];
|
||||
|
||||
pub fn data_to_dataurl(mime: &str, data: &[u8]) -> String {
|
||||
let mimetype = if mime.is_empty() {
|
||||
detect_mimetype(data)
|
||||
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
|
||||
"image/svg+xml",
|
||||
"text/css",
|
||||
"text/html",
|
||||
"text/javascript",
|
||||
"text/plain",
|
||||
];
|
||||
|
||||
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String {
|
||||
let media_type: String = if media_type.is_empty() {
|
||||
detect_media_type(data, &url)
|
||||
} else {
|
||||
mime.to_string()
|
||||
media_type.to_string()
|
||||
};
|
||||
format!("data:{};base64,{}", mimetype, encode(data))
|
||||
let hash: String = if fragment != "" {
|
||||
format!("#{}", fragment)
|
||||
} else {
|
||||
str!()
|
||||
};
|
||||
|
||||
format!(
|
||||
"data:{};base64,{}{}",
|
||||
media_type,
|
||||
base64::encode(data),
|
||||
hash
|
||||
)
|
||||
}
|
||||
|
||||
pub fn detect_mimetype(data: &[u8]) -> String {
|
||||
pub fn detect_media_type(data: &[u8], url: &str) -> String {
|
||||
for item in MAGIC.iter() {
|
||||
if data.starts_with(item[0]) {
|
||||
return String::from_utf8(item[1].to_vec()).unwrap();
|
||||
}
|
||||
}
|
||||
"".to_owned()
|
||||
|
||||
if url.to_lowercase().ends_with(".svg") {
|
||||
return str!("image/svg+xml");
|
||||
}
|
||||
|
||||
str!()
|
||||
}
|
||||
|
||||
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
|
||||
HAS_PROTOCOL.is_match(url.as_ref().to_lowercase().as_str())
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme().len() > 0))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn is_data_url<T: AsRef<str>>(url: T) -> Result<bool, ParseError> {
|
||||
Url::parse(url.as_ref()).and_then(|u| Ok(u.scheme() == "data"))
|
||||
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme() == "data"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn is_valid_url<T: AsRef<str>>(path: T) -> bool {
|
||||
REGEX_URL.is_match(path.as_ref())
|
||||
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme() == "file"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
|
||||
Url::parse(url.as_ref())
|
||||
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
pub fn is_plaintext_media_type(media_type: &str) -> bool {
|
||||
PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
|
||||
}
|
||||
|
||||
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
|
||||
let result = if is_valid_url(to.as_ref()) {
|
||||
let result = if is_http_url(to.as_ref()) {
|
||||
to.as_ref().to_string()
|
||||
} else {
|
||||
Url::parse(from.as_ref())?
|
||||
@@ -111,88 +112,192 @@ pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<Strin
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
pub fn resolve_css_imports(
|
||||
cache: &mut HashMap<String, String>,
|
||||
client: &Client,
|
||||
css_string: &str,
|
||||
as_dataurl: bool,
|
||||
href: &str,
|
||||
opt_no_images: bool,
|
||||
opt_silent: bool,
|
||||
) -> String {
|
||||
let mut resolved_css = String::from(css_string);
|
||||
|
||||
for link in REGEX_CSS_URL.captures_iter(&css_string) {
|
||||
let target_link = link.name("url").unwrap().as_str();
|
||||
|
||||
// Determine the type of link
|
||||
let is_stylesheet = link.name("stylesheet").is_some();
|
||||
let is_font = link.name("font").is_some();
|
||||
let is_image = !is_stylesheet && !is_font;
|
||||
|
||||
// Generate absolute URL for content
|
||||
let embedded_url = match resolve_url(href, target_link) {
|
||||
Ok(url) => url,
|
||||
Err(_) => continue, // Malformed URL
|
||||
};
|
||||
|
||||
// Download the asset. If it's more CSS, resolve that too
|
||||
let content = if is_stylesheet {
|
||||
// The link is an @import link
|
||||
retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&embedded_url,
|
||||
false, // Formating as data URL will be done later
|
||||
"text/css", // Expect CSS
|
||||
opt_silent,
|
||||
)
|
||||
.map(|(content, _)| {
|
||||
resolve_css_imports(
|
||||
cache,
|
||||
client,
|
||||
&content,
|
||||
true, // Finally, convert to a dataurl
|
||||
&embedded_url,
|
||||
opt_no_images,
|
||||
opt_silent,
|
||||
)
|
||||
})
|
||||
} else if (is_image && !opt_no_images) || is_font {
|
||||
// The link is some other, non-@import link
|
||||
retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&embedded_url,
|
||||
true, // Format as data URL
|
||||
"", // Unknown MIME type
|
||||
opt_silent,
|
||||
)
|
||||
.map(|(a, _)| a)
|
||||
} else {
|
||||
// If it's a datatype that has been opt_no'd out of, replace with
|
||||
// absolute URL
|
||||
|
||||
Ok(embedded_url.clone())
|
||||
}
|
||||
.unwrap_or_else(|e| {
|
||||
eprintln!("Warning: {}", e);
|
||||
|
||||
// If failed to resolve, replace with absolute URL
|
||||
embedded_url
|
||||
});
|
||||
|
||||
let replacement = format!("\"{}\"", &content);
|
||||
let dest = link.name("to_repl").unwrap();
|
||||
let offset = resolved_css.len() - css_string.len();
|
||||
let target_range = (dest.start() + offset)..(dest.end() + offset);
|
||||
|
||||
resolved_css.replace_range(target_range, &replacement);
|
||||
}
|
||||
|
||||
if as_dataurl {
|
||||
data_to_dataurl("text/css", resolved_css.as_bytes())
|
||||
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
|
||||
if Url::parse(url.as_ref()).unwrap().fragment() == None {
|
||||
str!()
|
||||
} else {
|
||||
resolved_css
|
||||
str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
pub fn clean_url<T: AsRef<str>>(input: T) -> String {
|
||||
let mut url = Url::parse(input.as_ref()).unwrap();
|
||||
|
||||
// Clear fragment
|
||||
url.set_fragment(None);
|
||||
|
||||
// Get rid of stray question mark
|
||||
if url.query() == Some("") {
|
||||
url.set_query(None);
|
||||
}
|
||||
|
||||
// Remove empty trailing ampersand(s)
|
||||
let mut result: String = url.to_string();
|
||||
while result.ends_with("&") {
|
||||
result.pop();
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
pub fn data_url_to_data<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
|
||||
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
|
||||
let path: String = parsed_url.path().to_string();
|
||||
let comma_loc: usize = path.find(',').unwrap_or(path.len());
|
||||
|
||||
let meta_data: String = path.chars().take(comma_loc).collect();
|
||||
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
|
||||
|
||||
let text: String = decode_url(raw_data);
|
||||
|
||||
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
|
||||
let mut media_type: String = str!();
|
||||
let mut encoding: &str = "";
|
||||
|
||||
let mut i: i8 = 0;
|
||||
for item in &meta_data_items {
|
||||
if i == 0 {
|
||||
media_type = str!(item);
|
||||
} else {
|
||||
if item.eq_ignore_ascii_case("base64")
|
||||
|| item.eq_ignore_ascii_case("utf8")
|
||||
|| item.eq_ignore_ascii_case("charset=UTF-8")
|
||||
{
|
||||
encoding = item;
|
||||
}
|
||||
}
|
||||
|
||||
i = i + 1;
|
||||
}
|
||||
|
||||
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
|
||||
base64::decode(&text).unwrap_or(vec![])
|
||||
} else {
|
||||
text.as_bytes().to_vec()
|
||||
};
|
||||
|
||||
(media_type, data)
|
||||
}
|
||||
|
||||
pub fn decode_url(input: String) -> String {
|
||||
let input: String = input.replace("+", "%2B");
|
||||
|
||||
form_urlencoded::parse(input.as_bytes())
|
||||
.map(|(key, val)| {
|
||||
[
|
||||
key.to_string(),
|
||||
if val.to_string().len() == 0 {
|
||||
str!()
|
||||
} else {
|
||||
str!('=')
|
||||
},
|
||||
val.to_string(),
|
||||
]
|
||||
.concat()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn file_url_to_fs_path(url: &str) -> String {
|
||||
if !is_file_url(url) {
|
||||
return str!();
|
||||
}
|
||||
|
||||
let cutoff_l = if cfg!(windows) { 8 } else { 7 };
|
||||
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
|
||||
let url_fragment = get_url_fragment(url);
|
||||
if url_fragment != "" {
|
||||
let max_len = fs_file_path.len() - 1 - url_fragment.len();
|
||||
fs_file_path = fs_file_path[0..max_len].to_string();
|
||||
}
|
||||
|
||||
if cfg!(windows) {
|
||||
fs_file_path = fs_file_path.replace("/", "\\");
|
||||
}
|
||||
|
||||
// File paths should not be %-encoded
|
||||
decode_url(fs_file_path)
|
||||
}
|
||||
|
||||
pub fn retrieve_asset(
|
||||
cache: &mut HashMap<String, Vec<u8>>,
|
||||
client: &Client,
|
||||
parent_url: &str,
|
||||
url: &str,
|
||||
opt_silent: bool,
|
||||
) -> Result<(Vec<u8>, String, String), reqwest::Error> {
|
||||
if url.len() == 0 {
|
||||
// Provoke error
|
||||
client.get("").send()?;
|
||||
}
|
||||
|
||||
if is_data_url(&url) {
|
||||
let (media_type, data) = data_url_to_data(url);
|
||||
Ok((data, url.to_string(), media_type))
|
||||
} else if is_file_url(&url) {
|
||||
// Check if parent_url is also file:///
|
||||
// (if not, then we don't embed the asset)
|
||||
if !is_file_url(&parent_url) {
|
||||
// Provoke error
|
||||
client.get("").send()?;
|
||||
}
|
||||
|
||||
let fs_file_path: String = file_url_to_fs_path(url);
|
||||
let path = Path::new(&fs_file_path);
|
||||
if path.exists() {
|
||||
if !opt_silent {
|
||||
eprintln!("{}", &url);
|
||||
}
|
||||
|
||||
Ok((fs::read(&fs_file_path).expect(""), url.to_string(), str!()))
|
||||
} else {
|
||||
// Provoke error
|
||||
Err(client.get("").send().unwrap_err())
|
||||
}
|
||||
} else {
|
||||
let cache_key: String = clean_url(&url);
|
||||
|
||||
if cache.contains_key(&cache_key) {
|
||||
// URL is in cache, we get and return it
|
||||
if !opt_silent {
|
||||
eprintln!("{} (from cache)", &url);
|
||||
}
|
||||
|
||||
Ok((
|
||||
cache.get(&cache_key).unwrap().to_vec(),
|
||||
url.to_string(),
|
||||
str!(),
|
||||
))
|
||||
} else {
|
||||
// URL not in cache, we retrieve the file
|
||||
let mut response = client.get(url).send()?;
|
||||
let res_url = response.url().to_string();
|
||||
|
||||
if !opt_silent {
|
||||
if url == res_url {
|
||||
eprintln!("{}", &url);
|
||||
} else {
|
||||
eprintln!("{} -> {}", &url, &res_url);
|
||||
}
|
||||
}
|
||||
|
||||
let new_cache_key: String = clean_url(&res_url);
|
||||
|
||||
// Convert response into a byte array
|
||||
let mut data: Vec<u8> = vec![];
|
||||
response.copy_to(&mut data)?;
|
||||
|
||||
// Attempt to obtain media type by reading the Content-Type header
|
||||
let media_type = response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|header| header.to_str().ok())
|
||||
.unwrap_or("");
|
||||
|
||||
// Add to cache
|
||||
cache.insert(new_cache_key, data.clone());
|
||||
|
||||
Ok((data, res_url, media_type.to_string()))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user