71 Commits

Author SHA1 Message Date
Sunshine
3f0ced0143 Merge pull request #151 from snshn/2-2-2
version bump (2.2.1 → 2.2.2)
2020-04-05 14:44:48 -04:00
Sunshine
8112ab6d04 version bump (2.2.1 → 2.2.2) 2020-04-05 14:38:40 -04:00
Sunshine
e5fc05f5cd Merge pull request #150 from snshn/cd-windows-executable
Make the pipeline upload windows build to every new release
2020-04-05 14:35:44 -04:00
Sunshine
1068ff659a make the pipeline upload windows build to every new release 2020-04-05 14:29:06 -04:00
Sunshine
cf3a8c8ede Merge pull request #149 from snshn/remove-travis-ci-and-appveyor
Remove TravisCI and AppVeyor from the project
2020-04-04 19:38:03 -04:00
Sunshine
920d992459 remove TravisCI and AppVeyor from the project 2020-04-04 19:26:58 -04:00
Sunshine
c61b3ba858 Merge pull request #148 from snshn/github-actions-build
Improve GitHub Actions integration
2020-04-04 19:14:52 -04:00
Sunshine
dc6e564ea2 integrate GitHub Actions CI further 2020-04-04 19:05:49 -04:00
Sunshine
24536b5e18 Merge pull request #147 from Y2Z/github-actions-ci
Implement CI using GitHub Actions
2020-04-04 17:51:28 -04:00
Sunshine
908fd59019 Update ci.yml 2020-04-04 17:08:19 -04:00
Sunshine
a19aa37ea8 Merge pull request #145 from snshn/no-images-svg
Empty SVG nodes when excluding images
2020-04-04 15:55:26 -04:00
Sunshine
c46bd5900b Merge pull request #146 from snshn/image-map-area-href
Resolve hrefs of <area> image-map tags
2020-04-04 15:51:45 -04:00
Sunshine
5f98ed23b3 set autocrlf to false to let windows builds pass 2020-04-04 15:42:53 -04:00
Sunshine
c6b135398a Implement CI using GitHub Actions 2020-04-04 15:30:13 -04:00
Sunshine
791e44796e resolve hrefs of <area> image-map tags 2020-04-04 14:55:45 -04:00
Sunshine
b428dd8471 Merge pull request #144 from snshn/macros-unit-test
Implement unit tests for macros
2020-04-04 13:11:19 -04:00
Sunshine
b88479446c implement unit tests for macros 2020-04-04 08:21:41 -04:00
Sunshine
1d6217ef5a empty SVG nodes if --no-images 2020-04-03 21:56:46 -04:00
Sunshine
746c7f05de Merge pull request #143 from snshn/embed-input-images
Add support for image inputs
2020-04-03 04:12:06 -04:00
Sunshine
29836d979a add support for image inputs 2020-04-03 03:30:52 -04:00
Sunshine
5ba6e33fa8 Merge pull request #142 from snshn/robatipoors-improvements
Revamp is_icon() and get_node_name()
2020-04-03 01:39:45 -04:00
Sunshine
643c4ce7ef implement improvements suggested by @robatipoor 2020-04-03 00:00:08 -04:00
Sunshine
c011f90b76 Merge pull request #141 from snshn/update-help-dialog
Update help dialog
2020-04-02 22:49:59 -04:00
Sunshine
875481b9a2 update help dialog 2020-04-02 03:04:21 -04:00
Sunshine
05275d864c Merge pull request #140 from snshn/cssparser
Switch to token-based CSS parser
2020-04-02 02:28:58 -04:00
Sunshine
4951fea730 implement full CSS parsing 2020-04-02 01:09:32 -04:00
Sunshine
b8315a7bd5 Merge pull request #138 from snshn/improved-media-type-detection
Improve SVG media type detection
2020-03-24 18:39:33 -04:00
Sunshine
be25784297 improve SVG media type detection 2020-03-24 08:50:39 -04:00
Sunshine
b0f1c39175 Merge pull request #137 from snshn/master
Bump version to 2.2.0
2020-03-24 08:23:56 -04:00
Sunshine
f27d5fa23e bump version number (2.1.2 → 2.2.0) 2020-03-22 23:30:31 -04:00
Sunshine
4f2944a600 Merge pull request #136 from snshn/restructure-tests
Restructure tests
2020-03-22 23:28:04 -04:00
Sunshine
479c42e1ce improve test code structure 2020-03-22 22:08:41 -04:00
Sunshine
933379c798 ensure consistent naming across all tests 2020-03-22 19:03:33 -04:00
Sunshine
061386ccc2 Merge pull request #135 from snshn/local-file-support
Add support for working with local assets
2020-03-22 17:18:43 -04:00
Sunshine
59a8be493d add support for working with local assets 2020-03-22 15:48:23 -04:00
Sunshine
a653bbe7d4 Merge pull request #133 from Y2Z/docker-instructions
Move Docker instructions under docs/
2020-03-18 00:42:40 -04:00
Sunshine
c7aab235d9 Merge pull request #134 from Y2Z/adr-asset-minimization
Add ADR describing asset minimization
2020-03-16 00:46:28 -04:00
Sunshine
60ef631315 add ADR describing asset minimization 2020-03-15 23:04:03 -04:00
Sunshine
b800947151 move Docker instructions into docs/ 2020-03-14 12:51:05 -04:00
Sunshine
808ce3e722 Merge pull request #130 from snshn/body-background
Account for legacy BODY background="" attribute
2020-03-05 08:32:06 -05:00
Sunshine
a92bba4ec5 Update README.md 2020-03-05 05:15:13 -05:00
Sunshine
a445098409 Update README.md 2020-03-05 05:11:54 -05:00
Sunshine
224d4fc480 Merge pull request #129 from snshn/dockerfile
add Dockerfile
2020-03-05 05:08:13 -05:00
Sunshine
d5ee8ae6ab account for legacy BODY background="" attribute 2020-03-05 04:56:09 -05:00
Sunshine
c16e80f507 add Dockerfile 2020-03-05 04:14:37 -05:00
Sunshine
1c1f2c7128 Merge pull request #127 from snshn/win-travis
add windows target OS to TravisCI
2020-02-27 18:38:50 -05:00
Sunshine
efba6a048d add windows target OS to TravisCI 2020-02-27 01:25:22 -05:00
Sunshine
1701425003 Merge pull request #125 from snshn/frames
Treat frames the same way as iframes
2020-02-24 21:35:29 -05:00
Sunshine
7654eec7e2 treat frames the same way as iframes 2020-02-24 20:18:13 -05:00
Sunshine
00942e0b1d Merge pull request #119 from snshn/data-url-input
Data URL input
2020-02-23 23:33:25 -05:00
Sunshine
0d1e21e9ad add black box tests 2020-02-23 22:48:14 -05:00
Sunshine
3d2d40e7cd add support for data URL targets 2020-02-23 22:25:37 -05:00
Sunshine
b8b6d8cff6 fix "succeeding" to "passing" in tests 2020-02-23 22:24:33 -05:00
Sunshine
928664dc88 correct is_valid_url to is_http_url 2020-02-23 22:24:33 -05:00
Sunshine
5c8d75539b rename dataurl to data_url 2020-02-23 22:24:32 -05:00
Sunshine
ee2055a2a3 Merge pull request #123 from snshn/adr-arch-dir
Move ADRs under docs/arch
2020-02-21 19:16:40 -05:00
Sunshine
b4c46c59d4 move ADRs to docs/arch 2020-02-21 07:58:23 -05:00
Sunshine
8574b7899b Merge pull request #121 from snshn/improve-help
Update help dialog and README.md
2020-02-20 08:07:01 -05:00
Sunshine
969bfbdd59 Merge pull request #120 from snshn/update-crates
Update crates
2020-02-15 12:41:29 -05:00
Sunshine
63f3a204a6 Merge pull request #122 from snshn/adr-timeout
Introduce ADR 0003-network-request-timeout.md
2020-02-15 12:40:02 -05:00
Sunshine
094be09e90 add ADR 0003-network-request-timeout.md 2020-02-15 09:09:12 -05:00
Sunshine
23ceaed493 update crates 2020-02-15 01:47:08 -05:00
Sunshine
d9602e25eb update help dialog and README.md 2020-02-15 01:33:20 -05:00
Sunshine
0c50aa223b Update README.md 2020-02-13 23:47:30 -05:00
Sunshine
e5425ee9d0 Update README.md 2020-02-12 08:38:08 -05:00
Sunshine
f720fe0176 Merge pull request #114 from snshn/custom-network-timeout-option
Add option for custom network request timeout
2020-02-10 21:13:17 -05:00
Sunshine
727a5a410c add option for custom network request timeout 2020-02-10 20:08:06 -05:00
Sunshine
23af174822 Merge pull request #115 from snshn/remove-javascript-anchors
Nullify JS within As' href attributes when needed
2020-02-05 22:57:48 -05:00
Sunshine
5ef2b7c9dc nullify JS within As' href attributes when needed 2020-02-03 01:47:35 -05:00
Sunshine
1e8348543a Merge pull request #111 from snshn/adr
Introduce ADRs
2020-01-22 23:57:25 -05:00
Sunshine
f9bafe092d Introduce ADRs 2020-01-22 01:03:31 -05:00
62 changed files with 3950 additions and 1309 deletions

1
.adr-dir Normal file
View File

@@ -0,0 +1 @@
docs/arch

View File

@@ -1,130 +0,0 @@
# Appveyor configuration template for Rust using rustup for Rust installation
# https://github.com/starkat99/appveyor-rust
## Operating System (VM environment) ##
# Rust needs at least Visual Studio 2013 Appveyor OS for MSVC targets.
os: Visual Studio 2015
## Build Matrix ##
# This configuration will setup a build for each channel & target combination (12 windows
# combinations in all).
#
# There are 3 channels: stable, beta, and nightly.
#
# Alternatively, the full version may be specified for the channel to build using that specific
# version (e.g. channel: 1.5.0)
#
# The values for target are the set of windows Rust build targets. Each value is of the form
#
# ARCH-pc-windows-TOOLCHAIN
#
# Where ARCH is the target architecture, either x86_64 or i686, and TOOLCHAIN is the linker
# toolchain to use, either msvc or gnu. See https://www.rust-lang.org/downloads.html#win-foot for
# a description of the toolchain differences.
# See https://github.com/rust-lang-nursery/rustup.rs/#toolchain-specification for description of
# toolchains and host triples.
#
# Comment out channel/target combos you do not wish to build in CI.
#
# You may use the `cargoflags` and `RUSTFLAGS` variables to set additional flags for cargo commands
# and rustc, respectively. For instance, you can uncomment the cargoflags lines in the nightly
# channels to enable unstable features when building for nightly. Or you could add additional
# matrix entries to test different combinations of features.
environment:
matrix:
### MSVC Toolchains ###
# Stable 64-bit MSVC
- channel: stable
target: x86_64-pc-windows-msvc
# Stable 32-bit MSVC
- channel: stable
target: i686-pc-windows-msvc
# Beta 64-bit MSVC
- channel: beta
target: x86_64-pc-windows-msvc
# Beta 32-bit MSVC
- channel: beta
target: i686-pc-windows-msvc
# Nightly 64-bit MSVC
- channel: nightly
target: x86_64-pc-windows-msvc
#cargoflags: --features "unstable"
# Nightly 32-bit MSVC
- channel: nightly
target: i686-pc-windows-msvc
#cargoflags: --features "unstable"
### GNU Toolchains ###
# Stable 64-bit GNU
- channel: stable
target: x86_64-pc-windows-gnu
MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
# Stable 32-bit GNU
- channel: stable
target: i686-pc-windows-gnu
MINGW_PATH: 'C:\MinGW\bin'
# Beta 64-bit GNU
- channel: beta
target: x86_64-pc-windows-gnu
MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
# Beta 32-bit GNU
- channel: beta
target: i686-pc-windows-gnu
MINGW_PATH: 'C:\MinGW\bin'
# Nightly 64-bit GNU
- channel: nightly
target: x86_64-pc-windows-gnu
MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
#cargoflags: --features "unstable"
# Nightly 32-bit GNU
- channel: nightly
target: i686-pc-windows-gnu
MINGW_PATH: 'C:\MinGW\bin'
#cargoflags: --features "unstable"
### Allowed failures ###
# See Appveyor documentation for specific details. In short, place any channel or targets you wish
# to allow build failures on (usually nightly at least is a wise choice). This will prevent a build
# or test failure in the matching channels/targets from failing the entire build.
matrix:
allow_failures:
- channel: nightly
- channel: beta
# If you only care about stable channel build failures, uncomment the following line:
#- channel: beta
## Install Script ##
# This is the most important part of the Appveyor configuration. This installs the version of Rust
# specified by the 'channel' and 'target' environment variables from the build matrix. This uses
# rustup to install Rust.
#
# For simple configurations, instead of using the build matrix, you can simply set the
# default-toolchain and default-host manually here.
install:
- appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
- rustup-init -yv --default-toolchain %channel% --default-host %target%
- set PATH=%PATH%;%USERPROFILE%\.cargo\bin
- if defined MINGW_PATH set PATH=%PATH%;%MINGW_PATH%
- rustc -vV
- cargo -vV
- rustup component add rustfmt
## Build Script ##
# 'cargo test' takes care of building for us, so disable Appveyor's build stage. This prevents
# the "directory does not contain a project or solution file" error.
build: false
# Uses 'cargo test' to run tests and build. Alternatively, the project may call compiled programs
#directly or perform other testing commands. Rust will automatically be placed in the PATH
# environment variable.
test_script:
- cargo test --verbose %cargoflags%

24
.github/workflows/build.yml vendored Normal file
View File

@@ -0,0 +1,24 @@
name: Build
on:
push:
branches: [ master ]
jobs:
build:
strategy:
matrix:
os:
- ubuntu-latest
- macos-latest
- windows-latest
rust:
- stable
runs-on: ${{ matrix.os }}
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- name: Build
run: cargo build --all --locked --verbose

23
.github/workflows/cd.yml vendored Normal file
View File

@@ -0,0 +1,23 @@
name: CD
on:
release:
types: [created]
jobs:
windows:
runs-on: windows-latest
steps:
- run: git config --global core.autocrlf false
- name: Checkout the repository
uses: actions/checkout@master
- name: Build the executable
run: cargo build --all --locked
- name: Perform local installation
run: cargo install --force --locked --path .
- uses: Shopify/upload-to-release@1.0.0
with:
name: monolith.exe
path: C:\Users\runneradmin\.cargo\bin\monolith.exe
repo-token: ${{ secrets.GITHUB_TOKEN }}

30
.github/workflows/ci.yml vendored Normal file
View File

@@ -0,0 +1,30 @@
name: CI
on:
pull_request:
branches: [ master ]
jobs:
build_and_test:
strategy:
matrix:
os:
- ubuntu-latest
- macos-latest
- windows-latest
rust:
- stable
- beta
- nightly
runs-on: ${{ matrix.os }}
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- name: Build
run: cargo build --all --locked --verbose
- name: Run tests
run: cargo test --all --locked --verbose
- name: Check code formatting
run: cargo fmt --all -- --check

3
.gitignore vendored
View File

@@ -4,6 +4,3 @@
# These are backup files generated by rustfmt
**/*.rs.bk
# Exclude accidental HTML files
*.html

View File

@@ -1,29 +0,0 @@
language: rust
cache: cargo
sudo: false
os:
- linux
- osx
rust:
- stable
- beta
- nightly
before_script:
- rustup component add rustfmt
script:
- cargo build --all --locked --verbose
- cargo test --all --locked --verbose
- |
if [[ "$TRAVIS_RUST_VERSION" == "stable" ]]; then
cargo fmt --all -- --check
fi
jobs:
allow_failures:
- rust: nightly
fast_finish: true

244
Cargo.lock generated
View File

@@ -5,14 +5,6 @@ name = "adler32"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "aho-corasick"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "ansi_term"
version = "0.11.0"
@@ -26,6 +18,18 @@ name = "anyhow"
version = "1.0.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "assert_cmd"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"escargot 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates-core 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates-tree 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "async-compression"
version = "0.2.0"
@@ -52,14 +56,6 @@ name = "autocfg"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "base64"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "base64"
version = "0.11.0"
@@ -75,11 +71,6 @@ name = "bumpalo"
version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byteorder"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bytes"
version = "0.5.3"
@@ -147,11 +138,54 @@ dependencies = [
"cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "cssparser"
version = "0.27.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"cssparser-macros 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)",
"dtoa-short 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
"matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
"phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"smallvec 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "cssparser-macros"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "difference"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "doc-comment"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "dtoa"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "dtoa-short"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "encoding_rs"
version = "0.8.20"
@@ -160,6 +194,17 @@ dependencies = [
"cfg-if 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "escargot"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "flate2"
version = "1.0.13"
@@ -523,15 +568,16 @@ dependencies = [
[[package]]
name = "monolith"
version = "2.1.2"
version = "2.2.2"
dependencies = [
"base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
"assert_cmd 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
"base64 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
"cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)",
"html5ever 0.24.1 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
"url 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@@ -627,6 +673,16 @@ dependencies = [
"phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "phf"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"phf_macros 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_shared 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"proc-macro-hack 0.5.14 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "phf_codegen"
version = "0.7.24"
@@ -645,6 +701,28 @@ dependencies = [
"rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "phf_generator"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"phf_shared 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "phf_macros"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"phf_generator 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"phf_shared 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"proc-macro-hack 0.5.14 (registry+https://github.com/rust-lang/crates.io-index)",
"proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"syn 1.0.11 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "phf_shared"
version = "0.7.24"
@@ -653,6 +731,14 @@ dependencies = [
"siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "phf_shared"
version = "0.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"siphasher 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "pin-project"
version = "0.4.6"
@@ -696,6 +782,34 @@ name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "predicates"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"predicates-core 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "predicates-core"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "predicates-tree"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"predicates-core 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "proc-macro-hack"
version = "0.5.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "proc-macro2"
version = "1.0.6"
@@ -740,6 +854,7 @@ dependencies = [
"rand_chacha 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_hc 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"rand_pcg 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@@ -837,6 +952,14 @@ dependencies = [
"rand_core 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_pcg"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand_core 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand_xorshift"
version = "0.1.1"
@@ -858,22 +981,6 @@ name = "redox_syscall"
version = "0.1.56"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "regex"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "regex-syntax"
version = "0.6.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "remove_dir_all"
version = "0.5.2"
@@ -910,7 +1017,7 @@ dependencies = [
"time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)",
"tokio 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
"tokio-tls 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"url 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen 0.2.57 (registry+https://github.com/rust-lang/crates.io-index)",
"wasm-bindgen-futures 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)",
"web-sys 0.3.34 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -954,6 +1061,9 @@ dependencies = [
name = "serde"
version = "1.0.103"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "serde_derive"
@@ -983,7 +1093,7 @@ dependencies = [
"dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
"itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)",
"url 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@@ -991,6 +1101,11 @@ name = "siphasher"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "siphasher"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "slab"
version = "0.4.2"
@@ -1083,14 +1198,6 @@ dependencies = [
"unicode-width 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "thread_local"
version = "0.3.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "time"
version = "0.1.42"
@@ -1144,6 +1251,11 @@ name = "tower-service"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "treeline"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "try-lock"
version = "0.2.2"
@@ -1190,7 +1302,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "url"
version = "2.1.0"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"idna 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -1382,17 +1494,15 @@ dependencies = [
[metadata]
"checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2"
"checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d"
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
"checksum anyhow 1.0.26 (registry+https://github.com/rust-lang/crates.io-index)" = "7825f6833612eb2414095684fcf6c635becf3ce97fe48cf6421321e93bfbd53c"
"checksum assert_cmd 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6283bac8dd7226470d491bc4737816fea4ca1fba7a2847f2e9097fd6bfb4624c"
"checksum async-compression 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2c5c52622726d68ec35fec88edfb4ccb862d4f3b3bfa4af2f45142e69ef9b220"
"checksum atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)" = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90"
"checksum autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2"
"checksum base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e"
"checksum base64 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7"
"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
"checksum bumpalo 3.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8fe2567a8d8a3aedb4e39aa39e186d5673acfd56393c6ac83b2bc5bd82f4369c"
"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
"checksum bytes 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "10004c15deb332055f7a4a208190aed362cf9a7c2f6ab70a305fba50e1105f38"
"checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb"
"checksum cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)" = "aa87058dce70a3ff5621797f1506cb837edd02ac4c0ae642b4542dce802908b8"
@@ -1402,8 +1512,14 @@ dependencies = [
"checksum core-foundation 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "25b9e03f145fd4f2bf705e07b900cd41fc636598fe5dc452fd0db1441c3f496d"
"checksum core-foundation-sys 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e7ca8a5221364ef15ce201e8ed2f609fc312682a8f4e0e3d4aa5879764e0fa3b"
"checksum crc32fast 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ba125de2af0df55319f41944744ad91c71113bf74a4646efff39afe1f6842db1"
"checksum cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)" = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a"
"checksum cssparser-macros 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e"
"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
"checksum doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97"
"checksum dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "ea57b42383d091c85abcc2706240b94ab2a8fa1fc81c10ff23c4de06e2a90b5e"
"checksum dtoa-short 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "59020b8513b76630c49d918c33db9f4c91638e7d3404a28084083b87e33f76f2"
"checksum encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)" = "87240518927716f79692c2ed85bfe6e98196d18c6401ec75355760233a7e12e9"
"checksum escargot 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "74cf96bec282dcdb07099f7e31d9fed323bca9435a09aba7b6d99b7617bca96d"
"checksum flate2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6bd6d6f4752952feb71363cffc9ebac9411b75b87c6ab6058c40c8900cf43c0f"
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
"checksum foreign-types 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
@@ -1456,9 +1572,13 @@ dependencies = [
"checksum openssl-sys 0.9.53 (registry+https://github.com/rust-lang/crates.io-index)" = "465d16ae7fc0e313318f7de5cecf57b2fbe7511fd213978b457e1c96ff46736f"
"checksum percent-encoding 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e"
"checksum phf 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "b3da44b85f8e8dfaec21adae67f95d93244b2ecf6ad2a692320598dcc8e6dd18"
"checksum phf 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3dfb61232e34fcb633f43d12c58f83c1df82962dcdfa565a4e866ffc17dafe12"
"checksum phf_codegen 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "b03e85129e324ad4166b06b2c7491ae27fe3ec353af72e72cd1654c7225d517e"
"checksum phf_generator 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "09364cc93c159b8b06b1f4dd8a4398984503483891b0c26b867cf431fb132662"
"checksum phf_generator 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "17367f0cc86f2d25802b2c26ee58a7b23faeccf78a396094c13dced0d0182526"
"checksum phf_macros 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7f6fde18ff429ffc8fe78e2bf7f8b7a5a5a6e2a8b58bc5a9ac69198bbda9189c"
"checksum phf_shared 0.7.24 (registry+https://github.com/rust-lang/crates.io-index)" = "234f71a15de2288bcb7e3b6515828d22af7ec8598ee6d24c3b526fa0a80b67a0"
"checksum phf_shared 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c00cf8b9eafe68dde5e9eaa2cef8ee84a9336a47d566ec55ca16589633b65af7"
"checksum pin-project 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "94b90146c7216e4cb534069fb91366de4ea0ea353105ee45ed297e2d1619e469"
"checksum pin-project-internal 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "44ca92f893f0656d3cba8158dd0f2b99b94de256a4a54e870bd6922fcc6c8355"
"checksum pin-project-lite 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e8822eb8bb72452f038ebf6048efa02c3fe22bf83f76519c9583e47fc194a422"
@@ -1466,6 +1586,10 @@ dependencies = [
"checksum pkg-config 0.3.17 (registry+https://github.com/rust-lang/crates.io-index)" = "05da548ad6865900e60eaba7f589cc0783590a92e940c26953ff81ddbab2d677"
"checksum ppv-lite86 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "74490b50b9fbe561ac330df47c08f3f33073d2d00c150f719147d7c54522fa1b"
"checksum precomputed-hash 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
"checksum predicates 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a9bfe52247e5cc9b2f943682a85a5549fb9662245caf094504e69a2f03fe64d4"
"checksum predicates-core 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "06075c3a3e92559ff8929e7a280684489ea27fe44805174c3ebd9328dcb37178"
"checksum predicates-tree 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8e63c4859013b38a76eca2414c64911fba30def9e3202ac461a2d22831220124"
"checksum proc-macro-hack 0.5.14 (registry+https://github.com/rust-lang/crates.io-index)" = "fcfdefadc3d57ca21cf17990a28ef4c0f7c61383a28cb7604cf4a18e6ede1420"
"checksum proc-macro2 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9c9e470a8dc4aeae2dee2f335e8f533e2d4b347e1434e5671afc49b054592f27"
"checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe"
"checksum rand 0.6.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6d71dacdc3c88c1fde3885a3be3fbab9f35724e6ce99467f7d9c5026132184ca"
@@ -1481,11 +1605,10 @@ dependencies = [
"checksum rand_jitter 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1166d5c91dc97b88d1decc3285bb0a99ed84b05cfd0bc2341bdf2d43fc41e39b"
"checksum rand_os 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "7b75f676a1e053fc562eafbb47838d67c84801e38fc1ba459e8f180deabd5071"
"checksum rand_pcg 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "abf9b09b01790cfe0364f52bf32995ea3c39f4d2dd011eac241d2914146d0b44"
"checksum rand_pcg 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "16abd0c1b639e9eb4d7c50c0b8100b0d0f849be2349829c740fe8e6eb4816429"
"checksum rand_xorshift 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cbf7e9e623549b0e21f6e97cf8ecf247c1a8fd2e8a992ae265314300b2455d5c"
"checksum rdrand 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2"
"checksum redox_syscall 0.1.56 (registry+https://github.com/rust-lang/crates.io-index)" = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
"checksum regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "dc220bd33bdce8f093101afe22a037b8eb0e5af33592e6a9caafff0d4cb81cbd"
"checksum regex-syntax 0.6.12 (registry+https://github.com/rust-lang/crates.io-index)" = "11a7e20d1cce64ef2fed88b66d347f88bd9babb82845b2b858f3edbf59a4f716"
"checksum remove_dir_all 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e"
"checksum reqwest 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "03c6cbd2bc1c1cb7052dbe30f4a70cf65811967c800f2dfbb2e6036dc9ee2553"
"checksum ryu 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bfa8506c1de11c9c4e4c38863ccbe02a305c8188e85a05a784c9e11e1c3910c8"
@@ -1497,6 +1620,7 @@ dependencies = [
"checksum serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)" = "1a3351dcbc1f067e2c92ab7c3c1f288ad1a4cffc470b5aaddb4c2e0a3ae80043"
"checksum serde_urlencoded 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9ec5d77e2d4c73717816afac02670d5c4f534ea95ed430442cad02e7a6e32c97"
"checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
"checksum siphasher 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8e88f89a550c01e4cd809f3df4f52dc9e939f3273a2017eabd5c6d12fd98bb23"
"checksum slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8"
"checksum smallvec 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4ecf3b85f68e8abaa7555aa5abdb1153079387e60b718283d732f03897fcfc86"
"checksum sourcefile 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4bf77cb82ba8453b42b6ae1d692e4cdc92f9a47beaf89a847c8be83f4e328ad3"
@@ -1508,12 +1632,12 @@ dependencies = [
"checksum tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7a6e24d9338a0a5be79593e2fa15a648add6138caa803e2d5bc782c371732ca9"
"checksum tendril 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "707feda9f2582d5d680d733e38755547a3e8fb471e7ba11452ecfd9ce93a5d3b"
"checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
"checksum time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)" = "db8dcfca086c1143c9270ac42a2bbd8a7ee477b78ac8e45b19abfb0cbede4b6f"
"checksum tokio 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0e1bef565a52394086ecac0a6fa3b8ace4cb3a138ee1d96bd2b93283b56824e3"
"checksum tokio-tls 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7bde02a3a5291395f59b06ec6945a3077602fac2b07eeeaf0dee2122f3619828"
"checksum tokio-util 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "571da51182ec208780505a32528fc5512a8fe1443ab960b3f2f3ef093cd16930"
"checksum tower-service 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e987b6bf443f4b5b3b6f38704195592cca41c5bb7aedd3c3693c7081f8289860"
"checksum treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41"
"checksum try-lock 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e604eb7b43c06650e854be16a2a03155743d3752dd1c943f6829e26b7a36e382"
"checksum unicase 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
@@ -1521,7 +1645,7 @@ dependencies = [
"checksum unicode-segmentation 1.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e83e153d1053cbb5a118eeff7fd5be06ed99153f00dbcd8ae310c5fb2b22edc0"
"checksum unicode-width 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7007dbd421b92cc6e28410fe7362e2e0a2503394908f417b68ec8d1c364c4e20"
"checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
"checksum url 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "75b414f6c464c879d7f9babf951f23bc3743fb7313c081b2e6ca719067ea9d61"
"checksum url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "829d4a8476c35c9bf0bbce5a3b23f4106f79728039b726d292bb93bc106787cb"
"checksum utf-8 0.7.5 (registry+https://github.com/rust-lang/crates.io-index)" = "05e42f7c18b8f902290b009cde6d651262f956c98bc51bca4cd1d511c9cd85c7"
"checksum vcpkg 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "3fc439f2794e98976c88a2a2dafce96b930fe8010b0a256b3c2199a773933168"
"checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a"

View File

@@ -1,6 +1,6 @@
[package]
name = "monolith"
version = "2.1.2"
version = "2.2.2"
edition = "2018"
authors = [
"Sunshine <sunshine@uberspace.net>",
@@ -12,14 +12,17 @@ authors = [
description = "CLI tool for saving web pages as a single HTML file"
[dependencies]
base64 = "0.10.1"
base64 = "0.11.0"
clap = "2.33.0"
cssparser = "0.27.2"
html5ever = "0.24.1"
lazy_static = "1.4.0"
regex = "1.3.1"
url = "2.1.0"
url = "2.1.1"
[dependencies.reqwest]
version = "0.10.*"
default-features = false
features = ["default-tls", "blocking", "gzip"]
[dev-dependencies]
assert_cmd = "0.12.0"
tempfile = "3.1.0"

18
Dockerfile Normal file
View File

@@ -0,0 +1,18 @@
FROM rust
WORKDIR /usr/local/src/
RUN curl -s https://api.github.com/repos/y2z/monolith/releases/latest \
| grep "tarball_url.*\"," \
| cut -d '"' -f 4 \
| wget -qi - -O monolith.tar.gz
RUN tar xfz monolith.tar.gz \
&& mv Y2Z-monolith-* monolith \
&& rm monolith.tar.gz
WORKDIR /usr/local/src/monolith
RUN ls -a
RUN make install
WORKDIR /tmp
CMD ["/usr/local/cargo/bin/monolith"]

View File

@@ -1,16 +1,25 @@
.PHONY: all build install run test lint
# Makefile for monolith
all: test build
all: build
.PHONY: all
build:
@cargo build --locked
.PHONY: build
install:
@cargo install --force --locked --path .
test:
test: build
@cargo test --locked
@cargo fmt --all -- --check
.PHONY: test_code_formatting
lint:
@cargo fmt --all --
.PHONY: lint
install:
@cargo install --force --locked --path .
.PHONY: install
uninstall:
@cargo uninstall
.PHONY: uninstall

View File

@@ -1,5 +1,4 @@
[![Travis CI Build Status](https://travis-ci.org/Y2Z/monolith.svg?branch=master)](https://travis-ci.org/Y2Z/monolith)
[![AppVeyor Build status](https://ci.appveyor.com/api/projects/status/ae7soyjih8jg2bv7/branch/master?svg=true)](https://ci.appveyor.com/project/snshn/monolith/branch/master)
[![GitHub Actions Build Status](https://github.com/Y2Z/monolith/workflows/Build/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3ABuild)
```
___ ___________ __________ ___________________ ___
@@ -17,47 +16,66 @@ Unlike the conventional "Save page as", `monolith` not only saves the target doc
If compared to saving websites with `wget -mpk`, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available.
---------------------------------------------------
## Installation
### From source
$ git clone https://github.com/Y2Z/monolith.git
$ cd monolith
$ cargo install --path .
### With Homebrew (on macOS and GNU/Linux)
#### With Homebrew (on macOS and GNU/Linux)
$ brew install monolith
### Using Snapcraft (on GNU/Linux)
#### Using Snapcraft (on GNU/Linux)
$ snap install monolith
#### Via Docker
The guide can be found [here](docs/containers.md)
#### From source
$ git clone https://github.com/Y2Z/monolith.git
$ cd monolith
$ make install
---------------------------------------------------
## Usage
$ monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roads-lyrics.html
---------------------------------------------------
## Options
- `-c`: Ignore styles
- `-f`: Exclude iframes
- `-f`: Exclude frames and iframes
- `-i`: Remove images
- `-I`: Isolate document
- `-I`: Isolate the document
- `-j`: Exclude JavaScript
- `-k`: Accept invalid X.509 (TLS) certificates
- `-o`: Write output to file
- `-s`: Silent mode
- `-u`: Specify custom User-Agent
- `-t`: Set custom network request timeout
- `-u`: Provide own User-Agent
---------------------------------------------------
## HTTPS and HTTP proxies
Please set `https_proxy`, `http_proxy` and `no_proxy` environment variables.
Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables.
---------------------------------------------------
## Contributing
Please open an issue if something is wrong, that helps make this project better.
---------------------------------------------------
## Related projects
- `Monolith Chrome Extension`: https://github.com/rhysd/monolith-of-web
- `Pagesaver`: https://github.com/distributed-mind/pagesaver
- `Personal WayBack Machine`: https://github.com/popey/pwbm
- `SingleFile`: https://github.com/gildas-lormeau/SingleFile
---------------------------------------------------
## License
The Unlicense
---------------------------------------------------
<!-- Microtext -->
<sub>Keep in mind that `monolith` is not aware of your browser's session</sub>

View File

@@ -0,0 +1,19 @@
# 1. Record architecture decisions
Date: 2019-12-25
## Status
Accepted
## Context
We need to record the architectural decisions made on this project.
## Decision
We will use Architecture Decision Records, as [described by Michael Nygard](http://thinkrelevance.com/blog/2011/11/15/documenting-architecture-decisions).
## Consequences
See Michael Nygard's article, linked above. For a lightweight ADR toolset, see Nat Pryce's [adr-tools](https://github.com/npryce/adr-tools).

View File

@@ -0,0 +1,25 @@
# 2. Network request timeout
Date: 2020-02-15
## Status
Accepted
## Context
A slow network connection and overloaded server may negatively impact network response time.
## Decision
Make the program simulate behavior of popular web browsers and CLI tools, where
the default network response timeout is most often set to 120 seconds.
Instead of featuring retries for timed out network requests, the program
should have an option to adjust the timeout length, along with making it
indefinite when given "0" as its value.
## Consequences
The user is able to retrieve resources that have long response time, as well as obtain
full control over how soon, and if at all, network requests should time out.

View File

@@ -0,0 +1,25 @@
# 4. Asset Minimization
Date: 2020-03-14
## Status
Accepted
## Context
It may look like a good idea to make monolith compress retrieved assets while
saving the page for the purpose of reducing the resulting document's file size.
## Decision
Given that the main purpose of this program is to save pages in a convenient to store and share manner — it's mostly an archiving tool,
aside from being able to tell monolith to exclude certain types of asests (e.g. images, CSS, JavaScript),
it would be outside of scope of this program to implement code for compressing assets. Minimizing files before embedding them
does not reduce the amount of data that needs to be transferred either. A separate tool can be used later to compress and minimize pages
saved by monolith, if needed.
## Consequences
Monolith will not support modification of original document assets for the purpose of reducing their size, sticking to performing only a minimal
amount of modifications to the original web page — whatever is needed to provide security or exclude unwanted asset types.

15
docs/containers.md Normal file
View File

@@ -0,0 +1,15 @@
1. Run `docker build -t y2z/monolith .` to create a Docker image
2. Create a file named `monolith` which contains:
```sh
#!/bin/sh
docker run --rm \
y2z/monolith \
monolith \
"$@"
```
3. Make the file executable (`chmod +x monolith`) and include it into your `$PATH`
4. Now you should be able to run a containerized build of monolith like this:
`monolith -I https://github.com > document.html`

View File

@@ -2,7 +2,7 @@ use clap::{App, Arg};
#[derive(Default)]
pub struct AppArgs {
pub url_target: String,
pub target: String,
pub no_css: bool,
pub no_frames: bool,
pub no_images: bool,
@@ -11,42 +11,45 @@ pub struct AppArgs {
pub isolate: bool,
pub output: String,
pub silent: bool,
pub timeout: u64,
pub user_agent: String,
}
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
const DEFAULT_USER_AGENT: &str =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0";
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
impl AppArgs {
pub fn get() -> AppArgs {
let app = App::new("monolith")
let app = App::new(env!("CARGO_PKG_NAME"))
.version(crate_version!())
.author(crate_authors!("\n"))
.about(crate_description!())
.arg(
Arg::with_name("url")
Arg::with_name("target")
.required(true)
.takes_value(true)
.index(1)
.help("URL to download"),
.help("URL or file path"),
)
// .args_from_usage("-a, --include-audio 'Embed audio sources'")
.args_from_usage("-c, --no-css 'Ignore styles'")
.args_from_usage("-f, --no-frames 'Exclude iframes'")
.args_from_usage("-i, --no-images 'Remove images'")
.args_from_usage("-I, --isolate 'Cut off from the Internet'")
.args_from_usage("-j, --no-js 'Exclude JavaScript'")
.args_from_usage("-k, --insecure 'Accept invalid X.509 (TLS) certificates'")
.args_from_usage("-o, --output=[document.html] 'Write output to <file>'")
.args_from_usage("-s, --silent 'Suppress verbosity'")
.args_from_usage("-u, --user-agent=[Iceweasel] 'Custom User-Agent string'")
// .args_from_usage("-v, --include-video 'Embed video sources'")
// .args_from_usage("-a, --include-audio 'Removes audio sources'")
.args_from_usage("-c, --no-css 'Removes CSS'")
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
.args_from_usage("-i, --no-images 'Removes images'")
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
.args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
.args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
// .args_from_usage("-v, --include-video 'Removes video sources'")
.get_matches();
let mut app_args = AppArgs::default();
// Process the command
app_args.url_target = app
.value_of("url")
.expect("please set target url")
app_args.target = app
.value_of("target")
.expect("please set target")
.to_string();
app_args.no_css = app.is_present("no-css");
app_args.no_frames = app.is_present("no-frames");
@@ -55,6 +58,11 @@ impl AppArgs {
app_args.insecure = app.is_present("insecure");
app_args.isolate = app.is_present("isolate");
app_args.silent = app.is_present("silent");
app_args.timeout = app
.value_of("timeout")
.unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string())
.parse::<u64>()
.unwrap();
app_args.output = app.value_of("output").unwrap_or("").to_string();
app_args.user_agent = app
.value_of("user-agent")

367
src/css.rs Normal file
View File

@@ -0,0 +1,367 @@
use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::utils::{data_to_data_url, decode_url, get_url_fragment, resolve_url, retrieve_asset};
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
"background",
"background-image",
"border",
"border-image",
"border-image-source",
"content",
"cursor",
"list-style",
"list-style-image",
"mask",
"mask-image",
];
pub fn is_image_url_prop(prop_name: &str) -> bool {
CSS_PROPS_WITH_IMAGE_URLS
.iter()
.find(|p| prop_name.eq_ignore_ascii_case(p))
.is_some()
}
pub fn enquote(input: String, double: bool) -> String {
if double {
format!("\"{}\"", input.replace("\"", "\\\""))
} else {
format!("'{}'", input.replace("'", "\\'"))
}
}
pub fn process_css<'a>(
cache: &mut HashMap<String, String>,
client: &Client,
parent_url: &str,
parser: &mut Parser,
rule_name: &str,
prop_name: &str,
func_name: &str,
opt_no_images: bool,
opt_silent: bool,
) -> Result<String, ParseError<'a, String>> {
let mut result: String = str!();
let mut curr_rule: String = str!(rule_name.clone());
let mut curr_prop: String = str!(prop_name.clone());
let mut token: &Token;
let mut token_offset: SourcePosition;
loop {
token_offset = parser.position();
token = match parser.next_including_whitespace_and_comments() {
Ok(token) => token,
Err(_) => {
break;
}
};
match *token {
Token::Comment(_) => {
let token_slice = parser.slice_from(token_offset);
result.push_str(str!(token_slice).as_str());
}
Token::Semicolon => result.push_str(";"),
Token::Colon => result.push_str(":"),
Token::Comma => result.push_str(","),
Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => {
let closure: &str;
if token == &Token::ParenthesisBlock {
result.push_str("(");
closure = ")";
} else if token == &Token::SquareBracketBlock {
result.push_str("[");
closure = "]";
} else {
result.push_str("{");
closure = "}";
}
let block_css: String = parser
.parse_nested_block(|parser| {
process_css(
cache,
client,
parent_url,
parser,
rule_name,
curr_prop.as_str(),
func_name,
opt_no_images,
opt_silent,
)
})
.unwrap();
result.push_str(block_css.as_str());
result.push_str(closure);
}
Token::CloseParenthesis => result.push_str(")"),
Token::CloseSquareBracket => result.push_str("]"),
Token::CloseCurlyBracket => result.push_str("}"),
Token::IncludeMatch => result.push_str("~="),
Token::DashMatch => result.push_str("|="),
Token::PrefixMatch => result.push_str("^="),
Token::SuffixMatch => result.push_str("$="),
Token::SubstringMatch => result.push_str("*="),
Token::CDO => result.push_str("<!--"),
Token::CDC => result.push_str("-->"),
Token::WhiteSpace(ref value) => {
result.push_str(value);
}
Token::Ident(ref value) => {
curr_prop = str!(value);
result.push_str(value);
}
Token::AtKeyword(ref value) => {
curr_rule = str!(value);
result.push_str("@");
result.push_str(value);
}
Token::Hash(ref value) => {
result.push_str("#");
result.push_str(value);
}
Token::QuotedString(ref value) => {
let is_import: bool = curr_rule == "import";
if is_import {
// Reset current at-rule value
curr_rule = str!();
}
if is_import {
// Skip empty import values
if value.len() < 1 {
result.push_str("''");
continue;
}
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
let full_url_decoded = decode_url(full_url);
let (css, final_url) = retrieve_asset(
cache,
client,
&parent_url,
&full_url_decoded,
false,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(
enquote(
data_to_data_url(
"text/css",
embed_css(
cache,
client,
final_url.as_str(),
&css,
opt_no_images,
opt_silent,
)
.as_bytes(),
&final_url,
url_fragment.as_str(),
),
false,
)
.as_str(),
);
} else {
if func_name == "url" {
// Skip empty url()'s
if value.len() < 1 {
continue;
}
if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
result.push_str(enquote(str!(empty_image!()), false).as_str());
} else {
let resolved_url = resolve_url(&parent_url, value).unwrap_or_default();
let (data_url, _final_url) = retrieve_asset(
cache,
client,
&parent_url,
&resolved_url,
true,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(enquote(data_url, false).as_str());
}
} else {
result.push_str(enquote(str!(value), false).as_str());
}
}
}
Token::Number {
ref has_sign,
ref value,
..
} => {
if *has_sign && *value >= 0. {
result.push_str("+");
}
result.push_str(&value.to_string())
}
Token::Percentage {
ref has_sign,
ref unit_value,
..
} => {
if *has_sign {
result.push_str("-");
}
result.push_str(str!(unit_value * 100.).as_str());
result.push_str("%");
}
Token::Dimension {
ref value,
ref unit,
..
} => {
result.push_str(str!(value).as_str());
result.push_str(str!(unit).as_str());
}
Token::IDHash(ref value) => {
result.push_str("#");
result.push_str(value);
}
Token::UnquotedUrl(ref value) => {
let is_import: bool = curr_rule == "import";
if is_import {
// Reset current at-rule value
curr_rule = str!();
}
// Skip empty url()'s
if value.len() < 1 {
result.push_str("url()");
continue;
} else if value.starts_with("#") {
result.push_str("url(");
result.push_str(value);
result.push_str(")");
continue;
}
result.push_str("url(");
if is_import {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
let full_url_decoded = decode_url(full_url);
let (css, final_url) = retrieve_asset(
cache,
client,
&parent_url,
&full_url_decoded,
false,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(
enquote(
data_to_data_url(
"text/css",
embed_css(
cache,
client,
final_url.as_str(),
&css,
opt_no_images,
opt_silent,
)
.as_bytes(),
&final_url,
url_fragment.as_str(),
),
false,
)
.as_str(),
);
} else {
if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
result.push_str(enquote(str!(empty_image!()), false).as_str());
} else {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let (data_url, _final_url) = retrieve_asset(
cache,
client,
&parent_url,
&full_url,
true,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(enquote(data_url, false).as_str());
}
}
result.push_str(")");
}
Token::Delim(ref value) => result.push_str(&value.to_string()),
Token::Function(ref name) => {
let function_name: &str = &name.clone();
result.push_str(function_name);
result.push_str("(");
let block_css: String = parser
.parse_nested_block(|parser| {
process_css(
cache,
client,
parent_url,
parser,
curr_rule.as_str(),
curr_prop.as_str(),
function_name,
opt_no_images,
opt_silent,
)
})
.unwrap();
result.push_str(block_css.as_str());
result.push_str(")");
}
Token::BadUrl(_) | Token::BadString(_) => {}
}
}
Ok(result)
}
pub fn embed_css(
cache: &mut HashMap<String, String>,
client: &Client,
parent_url: &str,
css: &str,
opt_no_images: bool,
opt_silent: bool,
) -> String {
let mut input = ParserInput::new(&css);
let mut parser = Parser::new(&mut input);
process_css(
cache,
client,
parent_url,
&mut parser,
"",
"",
"",
opt_no_images,
opt_silent,
)
.unwrap()
}

View File

@@ -1,8 +1,6 @@
use crate::http::retrieve_asset;
use crate::css::embed_css;
use crate::js::attr_is_event_handler;
use crate::utils::{
data_to_dataurl, is_valid_url, resolve_css_imports, resolve_url, url_has_protocol,
};
use crate::utils::{data_to_data_url, is_http_url, resolve_url, retrieve_asset, url_has_protocol};
use html5ever::interface::QualName;
use html5ever::parse_document;
use html5ever::rcdom::{Handle, NodeData, RcDom};
@@ -22,27 +20,20 @@ const ICON_VALUES: &[&str] = &[
"fluid-icon",
];
const TRANSPARENT_PIXEL: &str =
"data:image/png;base64,\
iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=";
pub fn get_parent_node(node: &Handle) -> Handle {
let parent = node.parent.take().clone();
parent.and_then(|node| node.upgrade()).unwrap()
}
pub fn get_node_name(node: &Handle) -> &'_ str {
pub fn get_node_name(node: &Handle) -> Option<&'_ str> {
match &node.data {
NodeData::Element { ref name, .. } => name.local.as_ref(),
_ => "",
NodeData::Element { ref name, .. } => Some(name.local.as_ref()),
_ => None,
}
}
pub fn is_icon(attr_value: &str) -> bool {
ICON_VALUES
.iter()
.find(|a| attr_value.eq_ignore_ascii_case(a))
.is_some()
ICON_VALUES.contains(&attr_value.to_lowercase().as_str())
}
pub fn walk_and_embed_assets(
@@ -131,9 +122,10 @@ pub fn walk_and_embed_assets(
} else {
let href_full_url = resolve_url(&url, attr.value.as_ref())
.unwrap_or_default();
let (favicon_dataurl, _) = retrieve_asset(
let (favicon_data_url, _) = retrieve_asset(
cache,
client,
&url,
&href_full_url,
true,
"",
@@ -141,7 +133,7 @@ pub fn walk_and_embed_assets(
)
.unwrap_or_default();
attr.value.clear();
attr.value.push_slice(favicon_dataurl.as_str());
attr.value.push_slice(favicon_data_url.as_str());
}
}
}
@@ -157,21 +149,29 @@ pub fn walk_and_embed_assets(
let replacement_text = match retrieve_asset(
cache,
client,
&url,
&href_full_url,
false,
"text/css",
opt_silent,
) {
// On successful retrieval, traverse CSS
Ok((css_data, _)) => resolve_css_imports(
cache,
client,
&css_data,
true,
&href_full_url,
opt_no_images,
opt_silent,
),
Ok((css_data, final_url)) => {
let x: String = embed_css(
cache,
client,
&final_url,
&css_data,
opt_no_images,
opt_silent,
);
data_to_data_url(
"text/css",
x.as_bytes(),
&final_url,
"",
)
}
// If a network error occured, warn
Err(e) => {
@@ -208,16 +208,55 @@ pub fn walk_and_embed_assets(
}
}
}
"body" => {
// Find and remove background attribute(s), keep reference to the last one
let mut found_background: Option<Attribute> = None;
let mut i = 0;
while i < attrs_mut.len() {
let attr_name = attrs_mut[i].name.local.as_ref();
if attr_name.eq_ignore_ascii_case("background") {
found_background = Some(attrs_mut.remove(i));
} else {
i += 1;
}
}
if !opt_no_images {
if let Some((data_url, _)) = found_background
.iter()
.map(|attr| attr.value.trim())
.filter(|background| !background.is_empty()) // Skip if empty
.next()
.and_then(|background| resolve_url(&url, background).ok()) // Make absolute
.and_then(|abs_src| // Download and convert to data_url
retrieve_asset(
cache,
client,
&url,
&abs_src,
true,
"",
opt_silent,
).ok())
{
// Add new data_url background attribute
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("background")),
value: Tendril::from_slice(data_url.as_ref()),
});
}
}
}
"img" => {
// Find source tags
// Find source attribute(s)
let mut found_src: Option<Attribute> = None;
let mut found_datasrc: Option<Attribute> = None;
let mut i = 0;
while i < attrs_mut.len() {
let name = attrs_mut[i].name.local.as_ref();
if name.eq_ignore_ascii_case("src") {
let attr_name = attrs_mut[i].name.local.as_ref();
if attr_name.eq_ignore_ascii_case("src") {
found_src = Some(attrs_mut.remove(i));
} else if name.eq_ignore_ascii_case("data-src") {
} else if attr_name.eq_ignore_ascii_case("data-src") {
found_datasrc = Some(attrs_mut.remove(i));
} else {
i += 1;
@@ -228,32 +267,90 @@ pub fn walk_and_embed_assets(
if opt_no_images {
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(TRANSPARENT_PIXEL),
value: Tendril::from_slice(empty_image!()),
});
} else if let Some((dataurl, _)) = found_datasrc
} else if let Some((data_url, _)) = found_datasrc
.iter()
.chain(&found_src) // Give dataurl priority
.chain(&found_src) // Give data_url priority
.map(|attr| attr.value.trim())
.filter(|src| !src.is_empty()) // Ignore empty srcs
.filter(|src| !src.is_empty()) // Skip if empty
.next()
.and_then(|src| resolve_url(&url, src).ok()) // Make absolute
.and_then(|abs_src| // Download and convert to dataurl
.and_then(|abs_src| // Download and convert to data_url
retrieve_asset(
cache,
client,
&url,
&abs_src,
true,
"",
opt_silent,
).ok())
{
// Add the new dataurl src attribute
// Add new data_url src attribute
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(dataurl.as_ref()),
value: Tendril::from_slice(data_url.as_ref()),
});
}
}
"input" => {
let mut is_image: bool = false;
for attr in attrs_mut.iter_mut() {
let attr_name: &str = &attr.name.local;
if attr_name == "type" {
is_image = attr.value.to_string().eq_ignore_ascii_case("image");
}
}
if is_image {
let mut found_src: Option<Attribute> = None;
let mut i = 0;
while i < attrs_mut.len() {
let attr_name = attrs_mut[i].name.local.as_ref();
if attr_name.eq_ignore_ascii_case("src") {
found_src = Some(attrs_mut.remove(i));
} else {
i += 1;
}
}
// If images are disabled, clear both sources
if opt_no_images {
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(empty_image!()),
});
} else if let Some((data_url, _)) = found_src
.iter()
.map(|attr| attr.value.trim())
.filter(|src| !src.is_empty()) // Skip if empty
.next()
.and_then(|src| resolve_url(&url, src).ok()) // Make absolute
.and_then(|abs_src| // Download and convert to data_url
retrieve_asset(
cache,
client,
&url,
&abs_src,
true,
"",
opt_silent,
).ok())
{
// Add new data_url src attribute
attrs_mut.push(Attribute {
name: QualName::new(None, ns!(), local_name!("src")),
value: Tendril::from_slice(data_url.as_ref()),
});
}
}
}
"svg" => {
if opt_no_images {
node.children.borrow_mut().clear();
}
}
"source" => {
for attr in attrs_mut.iter_mut() {
let attr_name: &str = &attr.name.local;
@@ -264,16 +361,17 @@ pub fn walk_and_embed_assets(
attr.value.clear();
attr.value.push_slice(src_full_url.as_str());
} else if attr_name == "srcset" {
if get_node_name(&get_parent_node(&node)) == "picture" {
if get_node_name(&get_parent_node(&node)) == Some("picture") {
if opt_no_images {
attr.value.clear();
attr.value.push_slice(TRANSPARENT_PIXEL);
attr.value.push_slice(empty_image!());
} else {
let srcset_full_url =
resolve_url(&url, attr.value.trim()).unwrap_or_default();
let (source_dataurl, _) = retrieve_asset(
let (source_data_url, _) = retrieve_asset(
cache,
client,
&url,
&srcset_full_url,
true,
"",
@@ -281,16 +379,24 @@ pub fn walk_and_embed_assets(
)
.unwrap_or((str!(), str!()));
attr.value.clear();
attr.value.push_slice(source_dataurl.as_str());
attr.value.push_slice(source_data_url.as_str());
}
}
}
}
}
"a" => {
"a" | "area" => {
for attr in attrs_mut.iter_mut() {
if &attr.name.local == "href" {
let attr_value = attr.value.trim();
if opt_no_js && attr_value.starts_with("javascript:") {
attr.value.clear();
// Replace with empty JS call to preserve original behavior
attr.value.push_slice("javascript:;");
continue;
}
// Don't touch email links or hrefs which begin with a hash sign
if attr_value.starts_with('#') || url_has_protocol(attr_value) {
continue;
@@ -327,9 +433,10 @@ pub fn walk_and_embed_assets(
if &attr.name.local == "src" {
let src_full_url =
resolve_url(&url, attr.value.trim()).unwrap_or_default();
let (js_dataurl, _) = retrieve_asset(
let (js_data_url, _) = retrieve_asset(
cache,
client,
&url,
&src_full_url,
true,
"application/javascript",
@@ -337,7 +444,7 @@ pub fn walk_and_embed_assets(
)
.unwrap_or((str!(), str!()));
attr.value.clear();
attr.value.push_slice(js_dataurl.as_str());
attr.value.push_slice(js_data_url.as_str());
}
}
}
@@ -350,12 +457,11 @@ pub fn walk_and_embed_assets(
for node in node.children.borrow_mut().iter_mut() {
if let NodeData::Text { ref contents } = node.data {
let mut tendril = contents.borrow_mut();
let replacement = resolve_css_imports(
let replacement = embed_css(
cache,
client,
tendril.as_ref(),
false,
&url,
tendril.as_ref(),
opt_no_images,
opt_silent,
);
@@ -370,7 +476,7 @@ pub fn walk_and_embed_assets(
if &attr.name.local == "action" {
let attr_value = attr.value.trim();
// Modify action to be a full URL
if !is_valid_url(attr_value) {
if !is_http_url(attr_value) {
let href_full_url =
resolve_url(&url, attr_value).unwrap_or_default();
attr.value.clear();
@@ -379,7 +485,7 @@ pub fn walk_and_embed_assets(
}
}
}
"iframe" => {
"frame" | "iframe" => {
for attr in attrs_mut.iter_mut() {
if &attr.name.local == "src" {
if opt_no_frames {
@@ -388,28 +494,29 @@ pub fn walk_and_embed_assets(
continue;
}
let iframe_src = attr.value.trim();
let frame_src = attr.value.trim();
// Ignore iframes with empty source (they cause infinite loops)
if iframe_src.is_empty() {
// Ignore (i)frames with empty source they cause infinite loops
if frame_src.is_empty() {
continue;
}
let src_full_url = resolve_url(&url, iframe_src).unwrap_or_default();
let (iframe_data, iframe_final_url) = retrieve_asset(
let src_full_url = resolve_url(&url, frame_src).unwrap_or_default();
let (frame_data, frame_final_url) = retrieve_asset(
cache,
client,
&url,
&src_full_url,
false,
"text/html",
opt_silent,
)
.unwrap_or((str!(), src_full_url));
let dom = html_to_dom(&iframe_data);
let dom = html_to_dom(&frame_data);
walk_and_embed_assets(
cache,
client,
&iframe_final_url,
&frame_final_url,
&dom.document,
opt_no_css,
opt_no_js,
@@ -419,9 +526,9 @@ pub fn walk_and_embed_assets(
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
let iframe_dataurl = data_to_dataurl("text/html", &buf);
let iframe_data_url = data_to_data_url("text/html", &buf, "", "");
attr.value.clear();
attr.value.push_slice(iframe_dataurl.as_str());
attr.value.push_slice(iframe_data_url.as_str());
}
}
}
@@ -440,9 +547,10 @@ pub fn walk_and_embed_assets(
} else {
let poster_full_url =
resolve_url(&url, video_poster).unwrap_or_default();
let (poster_dataurl, _) = retrieve_asset(
let (poster_data_url, _) = retrieve_asset(
cache,
client,
&url,
&poster_full_url,
true,
"",
@@ -450,7 +558,7 @@ pub fn walk_and_embed_assets(
)
.unwrap_or((poster_full_url, str!()));
attr.value.clear();
attr.value.push_slice(poster_dataurl.as_str());
attr.value.push_slice(poster_data_url.as_str());
}
}
}
@@ -463,7 +571,7 @@ pub fn walk_and_embed_assets(
// Get rid of style attributes
let mut style_attr_indexes = Vec::new();
for (i, attr) in attrs_mut.iter_mut().enumerate() {
if attr.name.local.to_lowercase() == "style" {
if attr.name.local.as_ref().eq_ignore_ascii_case("style") {
style_attr_indexes.push(i);
}
}
@@ -477,15 +585,15 @@ pub fn walk_and_embed_assets(
.iter_mut()
.filter(|a| a.name.local.as_ref().eq_ignore_ascii_case("style"))
{
let replacement = resolve_css_imports(
let replacement = embed_css(
cache,
client,
attribute.value.as_ref(),
false,
&url,
attribute.value.as_ref(),
opt_no_images,
opt_silent,
);
// let replacement = str!();
attribute.value.clear();
attribute.value.push_slice(&replacement);
}

View File

@@ -1,68 +0,0 @@
use crate::utils::{clean_url, data_to_dataurl, is_data_url};
use reqwest::blocking::Client;
use reqwest::header::CONTENT_TYPE;
use std::collections::HashMap;
pub fn retrieve_asset(
cache: &mut HashMap<String, String>,
client: &Client,
url: &str,
as_dataurl: bool,
mime: &str,
opt_silent: bool,
) -> Result<(String, String), reqwest::Error> {
let cache_key = clean_url(&url);
if is_data_url(&url).unwrap() {
Ok((url.to_string(), url.to_string()))
} else {
if cache.contains_key(&cache_key) {
// url is in cache
if !opt_silent {
eprintln!("{} (from cache)", &url);
}
let data = cache.get(&cache_key).unwrap();
Ok((data.to_string(), url.to_string()))
} else {
// url not in cache, we request it
let mut response = client.get(url).send()?;
let res_url = response.url().to_string();
if !opt_silent {
if url == res_url {
eprintln!("{}", &url);
} else {
eprintln!("{} -> {}", &url, &res_url);
}
}
let new_cache_key = clean_url(&res_url);
if as_dataurl {
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
// Attempt to obtain MIME type by reading the Content-Type header
let mimetype = if mime == "" {
response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
.unwrap_or(&mime)
} else {
mime
};
let dataurl = data_to_dataurl(&mimetype, &data);
// insert in cache
cache.insert(new_cache_key, dataurl.clone());
Ok((dataurl, res_url))
} else {
let content = response.text().unwrap();
// insert in cache
cache.insert(new_cache_key, content.clone());
Ok((content, res_url))
}
}
}
}

View File

@@ -1,7 +1,7 @@
const JS_DOM_EVENT_ATTRS: &[&str] = &[
// From WHATWG HTML spec 8.1.5.2 'Event handlers on elements, Document objects, and Window objects':
// From WHATWG HTML spec 8.1.5.2 "Event handlers on elements, Document objects, and Window objects":
// https://html.spec.whatwg.org/#event-handlers-on-elements,-document-objects,-and-window-objects
// https://html.spec.whatwg.org/#attributes-3 (table 'List of event handler content attributes')
// https://html.spec.whatwg.org/#attributes-3 (table "List of event handler content attributes")
// Global event handlers
"onabort",

View File

@@ -1,11 +1,8 @@
#[macro_use]
extern crate lazy_static;
#[macro_use]
mod macros;
pub mod css;
pub mod html;
pub mod http;
pub mod js;
pub mod utils;

View File

@@ -7,3 +7,11 @@ macro_rules! str {
ToString::to_string(&$val)
};
}
#[macro_export]
macro_rules! empty_image {
() => {
"data:image/png;base64,\
iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII="
};
}

View File

@@ -1,24 +1,25 @@
#[macro_use]
extern crate clap;
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
use monolith::utils::{data_url_to_text, is_data_url, is_file_url, is_http_url, retrieve_asset};
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use std::collections::HashMap;
use std::env;
use std::fs;
use std::io::{self, Error, Write};
use std::path::Path;
use std::process;
use std::time::Duration;
mod args;
mod macros;
#[macro_use]
extern crate clap;
use crate::args::AppArgs;
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
use monolith::http::retrieve_asset;
use monolith::utils::is_valid_url;
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use std::collections::HashMap;
use std::fs::File;
use std::io::{self, Error, Write};
use std::process;
use std::time::Duration;
enum Output {
Stdout(io::Stdout),
File(File),
File(fs::File),
}
impl Output {
@@ -26,7 +27,7 @@ impl Output {
if file_path.is_empty() {
Ok(Output::Stdout(io::stdout()))
} else {
Ok(Output::File(File::create(file_path)?))
Ok(Output::File(fs::File::create(file_path)?))
}
}
@@ -46,13 +47,42 @@ impl Output {
fn main() {
let app_args = AppArgs::get();
let original_target: &str = &app_args.target;
let target_url: &str;
let base_url;
let dom;
if !is_valid_url(app_args.url_target.as_str()) {
eprintln!(
"Only HTTP and HTTPS URLs are allowed but got: {}",
&app_args.url_target
);
// Pre-process the input
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let path = Path::new(original_target);
let mut target: String = str!(original_target.clone()).replace("\\", "/");
let path_is_relative: bool = path.is_relative();
if target.clone().len() == 0 {
eprintln!("No target specified");
process::exit(1);
} else if is_http_url(target.clone()) || is_data_url(target.clone()) {
target_url = target.as_str();
} else if is_file_url(target.clone()) {
target_url = target.as_str();
} else if path.exists() {
if !path.is_file() {
eprintln!("Local target is not a file: {}", original_target);
process::exit(1);
}
target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" });
if path_is_relative {
target.insert_str(if cfg!(windows) { 8 } else { 7 }, &cwd_normalized);
target.insert_str(
if cfg!(windows) { 8 } else { 7 } + &cwd_normalized.len(),
"/",
);
}
target_url = target.as_str();
} else {
target.insert_str(0, "http://");
target_url = target.as_str();
}
let mut output = Output::new(&app_args.output).expect("Could not prepare output");
@@ -65,29 +95,48 @@ fn main() {
HeaderValue::from_str(&app_args.user_agent).expect("Invalid User-Agent header specified"),
);
let timeout: u64 = if app_args.timeout > 0 {
app_args.timeout
} else {
std::u64::MAX / 4
};
let client = Client::builder()
.timeout(Duration::from_secs(10))
.timeout(Duration::from_secs(timeout))
.danger_accept_invalid_certs(app_args.insecure)
.default_headers(header_map)
.build()
.expect("Failed to initialize HTTP client");
// Retrieve root document
let (data, final_url) = retrieve_asset(
&mut cache,
&client,
app_args.url_target.as_str(),
false,
"",
app_args.silent,
)
.expect("Could not retrieve assets in HTML");
let dom = html_to_dom(&data);
if is_file_url(target_url) || is_http_url(target_url) {
let (data, final_url) = retrieve_asset(
&mut cache,
&client,
target_url,
target_url,
false,
"",
app_args.silent,
)
.expect("Could not retrieve target document");
base_url = final_url;
dom = html_to_dom(&data);
} else if is_data_url(target_url) {
let text: String = data_url_to_text(target_url);
if text.len() == 0 {
eprintln!("Unsupported data URL input");
process::exit(1);
}
base_url = str!(target_url);
dom = html_to_dom(&text);
} else {
process::exit(1);
}
walk_and_embed_assets(
&mut cache,
&client,
&final_url,
&base_url,
&dom.document,
app_args.no_css,
app_args.no_js,

520
src/tests/cli.rs Normal file
View File

@@ -0,0 +1,520 @@
use assert_cmd::prelude::*;
use std::env;
use std::io::Write;
use std::process::Command;
use tempfile::NamedTempFile;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_print_version() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("-V").output().unwrap();
// STDOUT should contain program name and version
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_bad_input_empty_target() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("").output().unwrap();
// STDOUT should be empty
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"No target specified\n"
);
// The exit code should be 1
out.assert().code(1);
Ok(())
}
#[test]
fn passing_bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
// STDOUT should contain HTML
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"Unsupported data URL input\n"
);
// The exit code should be 1
out.assert().code(1);
Ok(())
}
#[test]
fn passing_isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-I")
.arg("data:text/html,Hello%2C%20World!")
.output()
.unwrap();
// STDOUT should contain isolated HTML
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-c")
.arg("data:text/html,<style>body{background-color:pink}</style>Hello")
.output()
.unwrap();
// STDOUT should contain HTML with no CSS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
<style></style>\
</head><body>Hello</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-f")
.arg("data:text/html,<iframe src=\"https://google.com\"></iframe>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no iframes
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-i")
.arg("data:text/html,<img src=\"https://google.com\"/>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no images
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\
</head>\
<body>\
<img src=\"{empty_image}\">\
Hi\
</body>\
</html>\n",
empty_image = empty_image!()
)
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-j")
.arg("data:text/html,<script>alert(2)</script>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no JS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
<script></script></head>\
<body>Hi</body>\
</html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let out = cmd
.arg(if cfg!(windows) {
"src\\tests\\data\\local-file.html"
} else {
"src/tests/data/local-file.html"
})
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"\
<!DOCTYPE html><html lang=\"en\"><head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\" rel=\"stylesheet\" type=\"text/css\">\n \
<link href=\"data:text/css;base64,\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
</body></html>\n\
"
);
// STDERR should contain list of retrieved file URLs
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{cwd}/src/tests/data/local-file.html\n\
{file}{cwd}/src/tests/data/local-style.css\n\
{file}{cwd}/src/tests/data/local-script.js\n\
",
file = file_url_protocol,
cwd = cwd_normalized
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_local_file_target_input_absolute_target_path() -> Result<(), Box<dyn std::error::Error>>
{
let cwd = env::current_dir().unwrap();
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-jciI")
.arg(if cfg!(windows) {
format!(
"{cwd}\\src\\tests\\data\\local-file.html",
cwd = cwd.to_str().unwrap()
)
} else {
format!(
"{cwd}/src/tests/data/local-file.html",
cwd = cwd.to_str().unwrap()
)
})
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"{empty_image}\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"\"></script>\n\n\n\n\
</body></html>\n\
",
empty_image = empty_image!()
)
);
// STDERR should contain only the target file
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"{file}{cwd}/src/tests/data/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_local_file_url_target_input() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
let out = cmd
.arg("-cji")
.arg(if cfg!(windows) {
format!(
"{file}{cwd}/src/tests/data/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/src/tests/data/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
})
.output()
.unwrap();
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"{empty_image}\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"\"></script>\n\n\n\n\
</body></html>\n\
",
empty_image = empty_image!()
)
);
// STDERR should contain list of retrieved file URLs
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
if cfg!(windows) {
format!(
"{file}{cwd}/src/tests/data/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/src/tests/data/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
}
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_security_disallow_local_assets_within_data_url_targets(
) -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/local-script.js\"%3E%3C/script%3E")
.output()
.unwrap();
// STDOUT should contain HTML with no JS in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><script src=\"\"></script></head><body></body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_embed_file_url_local_asset_within_style_attribute(
) -> Result<(), Box<dyn std::error::Error>> {
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut file_svg = NamedTempFile::new()?;
writeln!(file_svg, "<svg version=\"1.1\" baseProfile=\"full\" width=\"300\" height=\"200\" xmlns=\"http://www.w3.org/2000/svg\">\
<rect width=\"100%\" height=\"100%\" fill=\"red\" />\
<circle cx=\"150\" cy=\"100\" r=\"80\" fill=\"green\" />\
<text x=\"150\" y=\"125\" font-size=\"60\" text-anchor=\"middle\" fill=\"white\">SVG</text>\
</svg>\n")?;
let mut file_html = NamedTempFile::new()?;
writeln!(
file_html,
"<div style='background-image: url(\"{file}{path}\")'></div>\n",
file = file_url_prefix,
path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg(file_html.path()).output().unwrap();
// STDOUT should contain HTML with date URL for background-image in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><div style=\"background-image: url('data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iMTAwJSIgaGVpZ2h0PSIxMDAlIiBmaWxsPSJyZWQiIC8+PGNpcmNsZSBjeD0iMTUwIiBjeT0iMTAwIiByPSI4MCIgZmlsbD0iZ3JlZW4iIC8+PHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+PC9zdmc+Cgo=')\"></div>\n\n</body></html>\n"
);
// STDERR should list temporary files that got retrieved
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n\
{file}{svg_path}\n\
",
file = file_url_prefix,
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
svg_path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_css_import_string() -> Result<(), Box<dyn std::error::Error>> {
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut file_css = NamedTempFile::new()?;
writeln!(file_css, "body{{background-color:#000;color:#fff}}")?;
let mut file_html = NamedTempFile::new()?;
writeln!(
file_html,
"\
<style>\n\
@charset 'UTF-8';\n\
\n\
@import '{file}{css_path}';\n\
\n\
@import url({file}{css_path});\n\
\n\
@import url('{file}{css_path}')\n\
</style>\n\
",
file = file_url_prefix,
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg(file_html.path()).output().unwrap();
// STDOUT should contain embedded CSS url()'s
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><style>\n@charset 'UTF-8';\n\n@import 'data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K';\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K');\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K')\n</style>\n\n</head><body></body></html>\n"
);
// STDERR should list temporary files that got retrieved
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n\
{file}{css_path}\n\
{file}{css_path}\n\
{file}{css_path}\n\
",
file = file_url_prefix,
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}

199
src/tests/css/embed_css.rs Normal file
View File

@@ -0,0 +1,199 @@
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::css;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_empty_input() {
let cache = &mut HashMap::new();
let client = Client::new();
assert_eq!(css::embed_css(cache, &client, "", "", false, false,), "");
}
#[test]
fn passing_style_exclude_unquoted_images() {
let cache = &mut HashMap::new();
let client = Client::new();
const STYLE: &str = "/* border: none;*/\
background-image: url(https://somewhere.com/bg.png); \
list-style: url(/assets/images/bullet.svg);\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&STYLE,
true,
true,
),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
list-style: url('{empty_image}');\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)",
empty_image = empty_image!()
)
);
}
#[test]
fn passing_style_exclude_single_quoted_images() {
let cache = &mut HashMap::new();
let client = Client::new();
const STYLE: &str = "/* border: none;*/\
background-image: url('https://somewhere.com/bg.png'); \
list-style: url('/assets/images/bullet.svg');\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(cache, &client, "", &STYLE, true, true,),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
list-style: url('{empty_image}');\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)",
empty_image = empty_image!()
)
);
}
#[test]
fn passing_style_block() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
#id.class-name:not(:nth-child(3n+0)) {\n \
// border: none;\n \
background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=');\n\
}\n\
\n\
html > body {}";
assert_eq!(
css::embed_css(cache, &client, "file:///", &CSS, false, true,),
CSS
);
}
#[test]
fn passing_attribute_selectors() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
[data-value] {
/* Attribute exists */
}
[data-value='foo'] {
/* Attribute has this exact value */
}
[data-value*='foo'] {
/* Attribute value contains this value somewhere in it */
}
[data-value~='foo'] {
/* Attribute has this value in a space-separated list somewhere */
}
[data-value^='foo'] {
/* Attribute value starts with this */
}
[data-value|='foo'] {
/* Attribute value starts with this in a dash-separated list */
}
[data-value$='foo'] {
/* Attribute value ends with this */
}
";
assert_eq!(css::embed_css(cache, &client, "", &CSS, false, false,), CSS);
}
#[test]
fn passing_import_string() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
@charset 'UTF-8';\n\
\n\
@import 'data:text/css,html{background-color:%23000}';\n\
\n\
@import url('data:text/css,html{color:%23fff}')\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
false,
true,
),
"\
@charset 'UTF-8';\n\
\n\
@import 'data:text/css;base64,ZGF0YTp0ZXh0L2NzcyxodG1se2JhY2tncm91bmQtY29sb3I6IzAwMH0=';\n\
\n\
@import url('data:text/css;base64,ZGF0YTp0ZXh0L2NzcyxodG1se2NvbG9yOiNmZmZ9')\n\
"
);
}
#[test]
fn passing_hash_urls() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
body {\n \
behavior: url(#default#something);\n\
}\n\
\n\
.scissorHalf {\n \
offset-path: url(#somePath);\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
false,
true,
),
CSS
);
}

50
src/tests/css/enquote.rs Normal file
View File

@@ -0,0 +1,50 @@
use crate::css;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_empty_input_single_quotes() {
assert_eq!(css::enquote(str!(""), false), "''");
}
#[test]
fn passing_empty_input_double_quotes() {
assert_eq!(css::enquote(str!(""), true), "\"\"");
}
#[test]
fn passing_apostrophes_single_quotes() {
assert_eq!(
css::enquote(str!("It's a lovely day, don't you think?"), false),
"'It\\'s a lovely day, don\\'t you think?'"
);
}
#[test]
fn passing_apostrophes_double_quotes() {
assert_eq!(
css::enquote(str!("It's a lovely day, don't you think?"), true),
"\"It's a lovely day, don't you think?\""
);
}
#[test]
fn passing_feet_and_inches_single_quotes() {
assert_eq!(
css::enquote(str!("5'2\", 6'5\""), false),
"'5\\'2\", 6\\'5\"'"
);
}
#[test]
fn passing_feet_and_inches_double_quotes() {
assert_eq!(
css::enquote(str!("5'2\", 6'5\""), true),
"\"5'2\\\", 6'5\\\"\""
);
}

View File

@@ -0,0 +1,88 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::css;
#[test]
fn backrgound() {
assert!(css::is_image_url_prop("background"));
}
#[test]
fn backrgound_image() {
assert!(css::is_image_url_prop("background-image"));
}
#[test]
fn backrgound_image_uppercase() {
assert!(css::is_image_url_prop("BACKGROUND-IMAGE"));
}
#[test]
fn border_image() {
assert!(css::is_image_url_prop("border-image"));
}
#[test]
fn content() {
assert!(css::is_image_url_prop("content"));
}
#[test]
fn cursor() {
assert!(css::is_image_url_prop("cursor"));
}
#[test]
fn list_style() {
assert!(css::is_image_url_prop("list-style"));
}
#[test]
fn list_style_image() {
assert!(css::is_image_url_prop("list-style-image"));
}
#[test]
fn mask_image() {
assert!(css::is_image_url_prop("mask-image"));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::css;
#[test]
fn empty() {
assert!(!css::is_image_url_prop(""));
}
#[test]
fn width() {
assert!(!css::is_image_url_prop("width"));
}
#[test]
fn color() {
assert!(!css::is_image_url_prop("color"));
}
#[test]
fn z_index() {
assert!(!css::is_image_url_prop("z-index"));
}
}

3
src/tests/css/mod.rs Normal file
View File

@@ -0,0 +1,3 @@
mod embed_css;
mod enquote;
mod is_image_url_prop;

View File

@@ -0,0 +1,19 @@
<!doctype html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Local HTML file</title>
<link href="local-style.css" rel="stylesheet" type="text/css" />
<link href="local-style-does-not-exist.css" rel="stylesheet" type="text/css" />
</head>
<body>
<img src="monolith.png" alt="" />
<a href="//local-file.html">Tricky href</a>
<a href="https://github.com/Y2Z/monolith">Remote URL</a>
<script src="local-script.js"></script>
</body>
</html>

View File

@@ -0,0 +1,2 @@
document.body.style.backgroundColor = "green";
document.body.style.color = "red";

View File

@@ -0,0 +1,4 @@
body {
background-color: #000;
color: #fff;
}

View File

@@ -1,520 +0,0 @@
use crate::html::{
get_node_name, get_parent_node, html_to_dom, is_icon, stringify_document, walk_and_embed_assets,
};
use html5ever::rcdom::{Handle, NodeData};
use html5ever::serialize::{serialize, SerializeOpts};
use reqwest::blocking::Client;
use std::collections::HashMap;
#[test]
fn test_is_icon() {
assert_eq!(is_icon("icon"), true);
assert_eq!(is_icon("Shortcut Icon"), true);
assert_eq!(is_icon("ICON"), true);
assert_eq!(is_icon("mask-icon"), true);
assert_eq!(is_icon("fluid-icon"), true);
assert_eq!(is_icon("stylesheet"), false);
assert_eq!(is_icon(""), false);
}
#[test]
fn test_get_parent_node_name() {
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
let dom = html_to_dom(&html);
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
match &node.data {
NodeData::Document => {
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
let parent = get_parent_node(node);
let parent_node_name = get_node_name(&parent);
if node_name == "head" || node_name == "body" {
assert_eq!(parent_node_name, "html");
} else if node_name == "div" {
assert_eq!(parent_node_name, "body");
} else if node_name == "p" {
assert_eq!(parent_node_name, "div");
}
println!("{}", node_name);
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
_ => (),
};
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 7);
}
#[test]
fn test_walk_and_embed_assets() {
let cache = &mut HashMap::new();
let html = "<div><P></P></div>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p></div></body></html>"
);
}
#[test]
fn test_walk_and_embed_assets_ensure_no_recursive_iframe() {
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
);
}
#[test]
fn test_walk_and_embed_assets_no_css() {
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = true;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<link rel=\"stylesheet\" href=\"\">\
<style></style>\
</head>\
<body>\
<div></div>\
</body>\
</html>"
);
}
#[test]
fn test_walk_and_embed_assets_no_images() {
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = true;
let opt_silent = true;
let client = Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<link rel=\"icon\" href=\"\">\
</head>\
<body>\
<div>\
<img src=\"data:image/png;base64,\
iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0\
lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\">\
</div>\
</body>\
</html>"
);
}
#[test]
fn test_walk_and_embed_assets_no_frames() {
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
);
}
#[test]
fn test_walk_and_embed_assets_no_js() {
let html = "<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = true;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><script src=\"\"></script>\
<script></script></div></body></html>"
);
}
#[test]
fn test_walk_and_embed_with_no_integrity() {
let html = "<title>No integrity</title>\
<link integrity=\"sha384-...\" rel=\"something\"/>\
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let client = Client::new();
let opt_no_css: bool = true;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
let opt_silent = true;
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head><title>No integrity</title><link rel=\"something\"><script src=\"\"></script></head>\
<body></body>\
</html>"
);
}
#[test]
fn test_stringify_document() {
let html = "<div><script src=\"some.js\"></script></div>";
let dom = html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
);
}
#[test]
fn test_stringify_document_isolate() {
let html = "<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\" />\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<div><script src=\"some.js\"></script></div>";
let dom = html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = true;
assert_eq!(
stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\">\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
</head>\
<body>\
<div>\
<script src=\"some.js\"></script>\
</div>\
</body>\
</html>"
);
}
#[test]
fn test_stringify_document_no_css() {
let html = "<!doctype html>\
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\"/>\
<div style=\"display: none;\"></div>";
let dom = html_to_dom(&html);
let opt_no_css: bool = true;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\">\
</head>\
<body><div style=\"display: none;\"></div></body>\
</html>"
);
}
#[test]
fn test_stringify_document_no_frames() {
let html = "<!doctype html>\
<title>Frameless document</title>\
<link rel=\"something\"/>\
<div><script src=\"some.js\"></script></div>";
let dom = html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
<title>Frameless document</title>\
<link rel=\"something\">\
</head>\
<body><div><script src=\"some.js\"></script></div></body>\
</html>"
);
}
#[test]
fn test_stringify_document_isolate_no_frames_no_js_no_css_no_images() {
let html = "<!doctype html>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\" />\
<iframe src=\"some.html\"></iframe>\
</div>";
let dom = html_to_dom(&html);
let opt_isolate: bool = true;
let opt_no_css: bool = true;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
assert_eq!(
stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src \'unsafe-inline\' data:; style-src \'none\'; frame-src \'none\';child-src \'none\'; script-src \'none\'; img-src data:;\"></meta>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
</head>\
<body>\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\">\
<iframe src=\"some.html\"></iframe>\
</div>\
</body>\
</html>"
);
}

View File

@@ -0,0 +1,49 @@
use crate::html;
use html5ever::rcdom::{Handle, NodeData};
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn get_node_name() {
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
let dom = html::html_to_dom(&html);
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
match &node.data {
NodeData::Document => {
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
let parent = html::get_parent_node(node);
let parent_node_name = html::get_node_name(&parent);
if node_name == "head" || node_name == "body" {
assert_eq!(parent_node_name, Some("html"));
} else if node_name == "div" {
assert_eq!(parent_node_name, Some("body"));
} else if node_name == "p" {
assert_eq!(parent_node_name, Some("div"));
}
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
_ => (),
};
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 7);
}

50
src/tests/html/is_icon.rs Normal file
View File

@@ -0,0 +1,50 @@
use crate::html;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_icon() {
assert!(html::is_icon("icon"));
}
#[test]
fn passing_shortcut_icon_capitalized() {
assert!(html::is_icon("Shortcut Icon"));
}
#[test]
fn passing_icon_uppercase() {
assert!(html::is_icon("ICON"));
}
#[test]
fn passing_mask_icon() {
assert!(html::is_icon("mask-icon"));
}
#[test]
fn passing_fluid_icon() {
assert!(html::is_icon("fluid-icon"));
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_stylesheet() {
assert!(!html::is_icon("stylesheet"));
}
#[test]
fn failing_empty_string() {
assert!(!html::is_icon(""));
}

4
src/tests/html/mod.rs Normal file
View File

@@ -0,0 +1,4 @@
mod get_node_name;
mod is_icon;
mod stringify_document;
mod walk_and_embed_assets;

View File

@@ -0,0 +1,188 @@
use crate::html;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_div_as_root_element() {
let html = "<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
);
}
#[test]
fn passing_full_page_with_no_html_head_or_body() {
let html = "<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\" />\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = true;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\">\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
</head>\
<body>\
<div>\
<script src=\"some.js\"></script>\
</div>\
</body>\
</html>"
);
}
#[test]
fn passing_doctype_and_the_rest_no_html_head_or_body() {
let html = "<!doctype html>\
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\"/>\
<div style=\"display: none;\"></div>";
let dom = html::html_to_dom(&html);
let opt_no_css: bool = true;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\">\
</head>\
<body><div style=\"display: none;\"></div></body>\
</html>"
);
}
#[test]
fn passing_doctype_and_the_rest_no_html_head_or_body_forbid_frames() {
let html = "<!doctype html>\
<title>Frameless document</title>\
<link rel=\"something\"/>\
<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
<title>Frameless document</title>\
<link rel=\"something\">\
</head>\
<body><div><script src=\"some.js\"></script></div></body>\
</html>"
);
}
#[test]
fn passing_doctype_and_the_rest_all_forbidden() {
let html = "<!doctype html>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\" />\
<iframe src=\"some.html\"></iframe>\
</div>";
let dom = html::html_to_dom(&html);
let opt_isolate: bool = true;
let opt_no_css: bool = true;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; frame-src 'none';child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
</head>\
<body>\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\">\
<iframe src=\"some.html\"></iframe>\
</div>\
</body>\
</html>"
);
}

View File

@@ -0,0 +1,399 @@
use crate::html;
use html5ever::serialize::{serialize, SerializeOpts};
use reqwest::blocking::Client;
use std::collections::HashMap;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_basic() {
let cache = &mut HashMap::new();
let html = "<div><P></P></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p></div></body></html>"
);
}
#[test]
fn passing_ensure_no_recursive_iframe() {
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
);
}
#[test]
fn passing_ensure_no_recursive_frame() {
let html = "<frameset><frame src=\"\"></frameset>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test]
fn passing_no_css() {
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = true;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<link rel=\"stylesheet\" href=\"\">\
<style></style>\
</head>\
<body>\
<div></div>\
</body>\
</html>"
);
}
#[test]
fn passing_no_images() {
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = true;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"<html>\
<head>\
<link rel=\"icon\" href=\"\">\
</head>\
<body>\
<div>\
<img src=\"{empty_image}\">\
</div>\
</body>\
</html>",
empty_image = empty_image!()
)
);
}
#[test]
fn passing_no_body_background_images() {
let html = "<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = true;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body></body></html>"
);
}
#[test]
fn passing_no_frames() {
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test]
fn passing_no_iframes() {
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
);
}
#[test]
fn passing_no_js() {
let html = "<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = true;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><script src=\"\"></script>\
<script></script></div></body></html>"
);
}
#[test]
fn passing_with_no_integrity() {
let html = "<title>No integrity</title>\
<link integrity=\"sha384-...\" rel=\"something\"/>\
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let client = Client::new();
let opt_no_css: bool = true;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
let opt_silent = true;
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head><title>No integrity</title><link rel=\"something\"><script src=\"\"></script></head>\
<body></body>\
</html>"
);
}

View File

@@ -1,25 +0,0 @@
use crate::http::retrieve_asset;
use reqwest::blocking::Client;
use std::collections::HashMap;
#[test]
fn test_retrieve_asset() {
let cache = &mut HashMap::new();
let client = Client::new();
let (data, final_url) =
retrieve_asset(cache, &client, "data:text/html;base64,...", true, "", false).unwrap();
assert_eq!(&data, "data:text/html;base64,...");
assert_eq!(&final_url, "data:text/html;base64,...");
let (data, final_url) = retrieve_asset(
cache,
&client,
"data:text/html;base64,...",
true,
"image/png",
false,
)
.unwrap();
assert_eq!(&data, "data:text/html;base64,...");
assert_eq!(&final_url, "data:text/html;base64,...");
}

View File

@@ -1,13 +0,0 @@
use crate::js::attr_is_event_handler;
#[test]
fn test_attr_is_event_handler() {
// succeeding
assert!(attr_is_event_handler("onBlur"));
assert!(attr_is_event_handler("onclick"));
assert!(attr_is_event_handler("onClick"));
// failing
assert!(!attr_is_event_handler("href"));
assert!(!attr_is_event_handler(""));
assert!(!attr_is_event_handler("class"));
}

View File

@@ -0,0 +1,45 @@
use crate::js;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_onblur_camelcase() {
assert!(js::attr_is_event_handler("onBlur"));
}
#[test]
fn passing_onclick_lowercase() {
assert!(js::attr_is_event_handler("onclick"));
}
#[test]
fn passing_onclick_camelcase() {
assert!(js::attr_is_event_handler("onClick"));
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_href() {
assert!(!js::attr_is_event_handler("href"));
}
#[test]
fn failing_empty_string() {
assert!(!js::attr_is_event_handler(""));
}
#[test]
fn failing_class() {
assert!(!js::attr_is_event_handler("class"));
}

1
src/tests/js/mod.rs Normal file
View File

@@ -0,0 +1 @@
mod attr_is_event_handler;

View File

@@ -0,0 +1,14 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
#[test]
fn contains_correct_image_data() {
assert_eq!(empty_image!(), "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII=");
}
}

2
src/tests/macros/mod.rs Normal file
View File

@@ -0,0 +1,2 @@
mod empty_image;
mod str;

24
src/tests/macros/str.rs Normal file
View File

@@ -0,0 +1,24 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
#[test]
fn returns_empty_string() {
assert_eq!(str!(), "");
}
#[test]
fn converts_integer_into_string() {
assert_eq!(str!(123), "123");
}
#[test]
fn converts_str_into_string() {
assert_eq!(str!("abc"), "abc");
}
}

View File

@@ -1,4 +1,6 @@
mod cli;
mod css;
mod html;
mod http;
mod js;
mod macros;
mod utils;

View File

@@ -1,177 +0,0 @@
use crate::utils::{
clean_url, data_to_dataurl, detect_mimetype, is_data_url, is_valid_url, resolve_url,
url_has_protocol,
};
use url::ParseError;
#[test]
fn test_data_to_dataurl() {
let mime = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let datauri = data_to_dataurl(mime, data.as_bytes());
assert_eq!(
&datauri,
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
);
}
#[test]
fn test_detect_mimetype() {
// image
assert_eq!(detect_mimetype(b"GIF87a"), "image/gif");
assert_eq!(detect_mimetype(b"GIF89a"), "image/gif");
assert_eq!(detect_mimetype(b"\xFF\xD8\xFF"), "image/jpeg");
assert_eq!(detect_mimetype(b"\x89PNG\x0D\x0A\x1A\x0A"), "image/png");
assert_eq!(detect_mimetype(b"<?xml "), "image/svg+xml");
assert_eq!(detect_mimetype(b"<svg "), "image/svg+xml");
assert_eq!(detect_mimetype(b"RIFF....WEBPVP8 "), "image/webp");
assert_eq!(detect_mimetype(b"\x00\x00\x01\x00"), "image/x-icon");
// audio
assert_eq!(detect_mimetype(b"ID3"), "audio/mpeg");
assert_eq!(detect_mimetype(b"\xFF\x0E"), "audio/mpeg");
assert_eq!(detect_mimetype(b"\xFF\x0F"), "audio/mpeg");
assert_eq!(detect_mimetype(b"OggS"), "audio/ogg");
assert_eq!(detect_mimetype(b"RIFF....WAVEfmt "), "audio/wav");
assert_eq!(detect_mimetype(b"fLaC"), "audio/x-flac");
// video
assert_eq!(detect_mimetype(b"RIFF....AVI LIST"), "video/avi");
assert_eq!(detect_mimetype(b"....ftyp"), "video/mp4");
assert_eq!(detect_mimetype(b"\x00\x00\x01\x0B"), "video/mpeg");
assert_eq!(detect_mimetype(b"....moov"), "video/quicktime");
assert_eq!(detect_mimetype(b"\x1A\x45\xDF\xA3"), "video/webm");
}
#[test]
fn test_url_has_protocol() {
// succeeding
assert_eq!(
url_has_protocol("mailto:somebody@somewhere.com?subject=hello"),
true
);
assert_eq!(url_has_protocol("tel:5551234567"), true);
assert_eq!(
url_has_protocol("ftp:user:password@some-ftp-server.com"),
true
);
assert_eq!(url_has_protocol("javascript:void(0)"), true);
assert_eq!(url_has_protocol("http://news.ycombinator.com"), true);
assert_eq!(url_has_protocol("https://github.com"), true);
assert_eq!(
url_has_protocol("MAILTO:somebody@somewhere.com?subject=hello"),
true
);
// failing
assert_eq!(
url_has_protocol("//some-hostname.com/some-file.html"),
false
);
assert_eq!(url_has_protocol("some-hostname.com/some-file.html"), false);
assert_eq!(url_has_protocol("/some-file.html"), false);
assert_eq!(url_has_protocol(""), false);
}
#[test]
fn test_is_valid_url() {
// succeeding
assert!(is_valid_url("https://www.rust-lang.org/"));
assert!(is_valid_url("http://kernel.org"));
// failing
assert!(!is_valid_url("//kernel.org"));
assert!(!is_valid_url("./index.html"));
assert!(!is_valid_url("some-local-page.htm"));
assert!(!is_valid_url("ftp://1.2.3.4/www/index.html"));
assert!(!is_valid_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn test_resolve_url() -> Result<(), ParseError> {
let resolved_url = resolve_url("https://www.kernel.org", "../category/signatures.html")?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
let resolved_url = resolve_url("https://www.kernel.org", "category/signatures.html")?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
let resolved_url = resolve_url(
"saved_page.htm",
"https://www.kernel.org/category/signatures.html",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
let resolved_url = resolve_url(
"https://www.kernel.org",
"//www.kernel.org/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
let resolved_url = resolve_url(
"https://www.kernel.org",
"//another-host.org/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://another-host.org/theme/images/logos/tux.png"
);
let resolved_url = resolve_url(
"https://www.kernel.org/category/signatures.html",
"/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
let resolved_url = resolve_url(
"https://www.w3schools.com/html/html_iframe.asp",
"default.asp",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.w3schools.com/html/default.asp"
);
Ok(())
}
#[test]
fn test_is_data_url() {
// succeeding
assert!(
is_data_url("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
.unwrap_or(false)
);
// failing
assert!(!is_data_url("https://kernel.org").unwrap_or(false));
assert!(!is_data_url("//kernel.org").unwrap_or(false));
assert!(!is_data_url("").unwrap_or(false));
}
#[test]
fn test_clean_url() {
assert_eq!(
clean_url("https://somewhere.com/font.eot#iefix"),
"https://somewhere.com/font.eot"
);
assert_eq!(
clean_url("https://somewhere.com/font.eot#"),
"https://somewhere.com/font.eot"
);
assert_eq!(
clean_url("https://somewhere.com/font.eot?#"),
"https://somewhere.com/font.eot"
);
}

View File

@@ -0,0 +1,32 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_removes_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot#iefix"),
"https://somewhere.com/font.eot"
);
}
#[test]
fn passing_removes_empty_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot#"),
"https://somewhere.com/font.eot"
);
}
#[test]
fn passing_removes_empty_query_and_empty_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot?#"),
"https://somewhere.com/font.eot"
);
}

View File

@@ -0,0 +1,28 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_encode_string_with_specific_media_type() {
let mime = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let data_url = utils::data_to_data_url(mime, data.as_bytes(), "", "");
assert_eq!(
&data_url,
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
);
}
#[test]
fn passing_encode_append_fragment() {
let data = "<svg></svg>\n";
let data_url = utils::data_to_data_url("text/css", data.as_bytes(), "", "fragment");
assert_eq!(&data_url, "data:text/css;base64,PHN2Zz48L3N2Zz4K#fragment");
}

View File

@@ -0,0 +1,58 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_parse_text_html_base64() {
assert_eq!(
utils::data_url_to_text("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_html_utf8() {
assert_eq!(
utils::data_url_to_text(
"data:text/html;utf8,Work expands so as to fill the time available for its completion"
),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_html_plaintext() {
assert_eq!(
utils::data_url_to_text(
"data:text/html,Work expands so as to fill the time available for its completion"
),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() {
assert_eq!(
utils::data_url_to_text(
" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion "
),
"Work expands so as to fill the time available for its completion"
);
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_just_word_data() {
assert_eq!(utils::data_url_to_text("data"), "");
}

View File

@@ -0,0 +1,26 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_decode_unicode_characters() {
assert_eq!(
utils::decode_url(str!(
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
)),
"検ヒム解塗ゃッ = サ"
);
}
#[test]
fn passing_decode_file_url() {
assert_eq!(
utils::decode_url(str!("file:///tmp/space%20here/test%231.html")),
"file:///tmp/space here/test#1.html"
);
}

View File

@@ -0,0 +1,147 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_image_gif87() {
assert_eq!(utils::detect_media_type(b"GIF87a", ""), "image/gif");
}
#[test]
fn passing_image_gif89() {
assert_eq!(utils::detect_media_type(b"GIF89a", ""), "image/gif");
}
#[test]
fn passing_image_jpeg() {
assert_eq!(utils::detect_media_type(b"\xFF\xD8\xFF", ""), "image/jpeg");
}
#[test]
fn passing_image_png() {
assert_eq!(
utils::detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", ""),
"image/png"
);
}
#[test]
fn passing_image_svg() {
assert_eq!(utils::detect_media_type(b"<svg ", ""), "image/svg+xml");
}
#[test]
fn passing_image_webp() {
assert_eq!(
utils::detect_media_type(b"RIFF....WEBPVP8 ", ""),
"image/webp"
);
}
#[test]
fn passing_image_icon() {
assert_eq!(
utils::detect_media_type(b"\x00\x00\x01\x00", ""),
"image/x-icon"
);
}
#[test]
fn passing_image_svg_filename() {
assert_eq!(
utils::detect_media_type(b"<?xml ", "local-file.svg"),
"image/svg+xml"
);
}
#[test]
fn passing_image_svg_url_uppercase() {
assert_eq!(
utils::detect_media_type(b"", "https://some-site.com/images/local-file.SVG"),
"image/svg+xml"
);
}
#[test]
fn passing_audio_mpeg() {
assert_eq!(utils::detect_media_type(b"ID3", ""), "audio/mpeg");
}
#[test]
fn passing_audio_mpeg_2() {
assert_eq!(utils::detect_media_type(b"\xFF\x0E", ""), "audio/mpeg");
}
#[test]
fn passing_audio_mpeg_3() {
assert_eq!(utils::detect_media_type(b"\xFF\x0F", ""), "audio/mpeg");
}
#[test]
fn passing_audio_ogg() {
assert_eq!(utils::detect_media_type(b"OggS", ""), "audio/ogg");
}
#[test]
fn passing_audio_wav() {
assert_eq!(
utils::detect_media_type(b"RIFF....WAVEfmt ", ""),
"audio/wav"
);
}
#[test]
fn passing_audio_flac() {
assert_eq!(utils::detect_media_type(b"fLaC", ""), "audio/x-flac");
}
#[test]
fn passing_video_avi() {
assert_eq!(
utils::detect_media_type(b"RIFF....AVI LIST", ""),
"video/avi"
);
}
#[test]
fn passing_video_mp4() {
assert_eq!(utils::detect_media_type(b"....ftyp", ""), "video/mp4");
}
#[test]
fn passing_video_mpeg() {
assert_eq!(
utils::detect_media_type(b"\x00\x00\x01\x0B", ""),
"video/mpeg"
);
}
#[test]
fn passing_video_quicktime() {
assert_eq!(utils::detect_media_type(b"....moov", ""), "video/quicktime");
}
#[test]
fn passing_video_webm() {
assert_eq!(
utils::detect_media_type(b"\x1A\x45\xDF\xA3", ""),
"video/webm"
);
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_unknown_media_type() {
assert_eq!(utils::detect_media_type(b"abcdef0123456789", ""), "");
}

View File

@@ -0,0 +1,23 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_remove_protocl_and_fragment() {
if cfg!(windows) {
assert_eq!(
utils::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"),
"C:\\documents\\some-path\\some-file.svg"
);
} else {
assert_eq!(
utils::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"),
"/tmp/some-path/some-file.svg"
);
}
}

View File

@@ -0,0 +1,23 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_data_url() {
assert_eq!(
utils::get_url_fragment(
"data:image/svg+xml;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h#test"
),
"test"
);
}
#[test]
fn passing_https_empty() {
assert_eq!(utils::get_url_fragment("https://kernel.org#"), "");
}

View File

@@ -0,0 +1,44 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_data_url_text_html() {
assert!(utils::is_data_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn passing_data_url_no_media_type() {
assert!(utils::is_data_url(
"data:;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_https_url() {
assert!(!utils::is_data_url("https://kernel.org"));
}
#[test]
fn failing_no_protocol_url() {
assert!(!utils::is_data_url("//kernel.org"));
}
#[test]
fn failing_empty_string() {
assert!(!utils::is_data_url(""));
}

View File

@@ -0,0 +1,75 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_unix_file_url() {
assert!(utils::is_file_url(
"file:///home/user/Websites/my-website/index.html"
));
}
#[test]
fn passing_windows_file_url() {
assert!(utils::is_file_url(
"file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png"
));
}
#[test]
fn passing_unix_url_with_backslashes() {
assert!(utils::is_file_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html"
));
}
#[test]
fn passing_windows_file_url_with_backslashes() {
assert!(utils::is_file_url(
"file:\\\\\\C:\\Documents%20and%20Settings\\user\\Websites\\my-website\\assets\\images\\logo.png"
));
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_url_with_no_protocl() {
assert!(!utils::is_file_url("//kernel.org"));
}
#[test]
fn failing_dot_slash_filename() {
assert!(!utils::is_file_url("./index.html"));
}
#[test]
fn failing_just_filename() {
assert!(!utils::is_file_url("some-local-page.htm"));
}
#[test]
fn failing_https_ip_port_url() {
assert!(!utils::is_file_url("https://1.2.3.4:80/www/index.html"));
}
#[test]
fn failing_data_url() {
assert!(!utils::is_file_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn failing_just_word_file() {
assert!(!utils::is_file_url("file"));
}

View File

@@ -0,0 +1,57 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_http_url() {
assert!(utils::is_http_url("http://kernel.org"));
}
#[test]
fn passing_https_url() {
assert!(utils::is_http_url("https://www.rust-lang.org/"));
}
#[test]
fn passing_http_url_with_backslashes() {
assert!(utils::is_http_url("http:\\\\freebsd.org\\"));
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_url_with_no_protocol() {
assert!(!utils::is_http_url("//kernel.org"));
}
#[test]
fn failing_dot_slash_filename() {
assert!(!utils::is_http_url("./index.html"));
}
#[test]
fn failing_just_filename() {
assert!(!utils::is_http_url("some-local-page.htm"));
}
#[test]
fn failing_https_ip_port_url() {
assert!(!utils::is_http_url("ftp://1.2.3.4/www/index.html"));
}
#[test]
fn failing_data_url() {
assert!(!utils::is_http_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}

13
src/tests/utils/mod.rs Normal file
View File

@@ -0,0 +1,13 @@
mod clean_url;
mod data_to_data_url;
mod data_url_to_text;
mod decode_url;
mod detect_media_type;
mod file_url_to_fs_path;
mod get_url_fragment;
mod is_data_url;
mod is_file_url;
mod is_http_url;
mod resolve_url;
mod retrieve_asset;
mod url_has_protocol;

View File

@@ -0,0 +1,229 @@
use url::ParseError;
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_from_https_to_level_up_relative() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url("https://www.kernel.org", "../category/signatures.html")?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
Ok(())
}
#[test]
fn passing_from_just_filename_to_full_https_url() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"saved_page.htm",
"https://www.kernel.org/category/signatures.html",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
Ok(())
}
#[test]
fn passing_from_https_url_to_url_with_no_protocol() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"https://www.kernel.org",
"//www.kernel.org/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
Ok(())
}
#[test]
fn passing_from_https_url_to_url_with_no_protocol_and_on_different_hostname(
) -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"https://www.kernel.org",
"//another-host.org/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://another-host.org/theme/images/logos/tux.png"
);
Ok(())
}
#[test]
fn passing_from_https_url_to_relative_root_path() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"https://www.kernel.org/category/signatures.html",
"/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
Ok(())
}
#[test]
fn passing_from_https_to_just_filename() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"https://www.w3schools.com/html/html_iframe.asp",
"default.asp",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.w3schools.com/html/default.asp"
);
Ok(())
}
#[test]
fn passing_from_data_url_to_https() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"https://www.kernel.org/category/signatures.html",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
Ok(())
}
#[test]
fn passing_from_data_url_to_data_url() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
)?;
assert_eq!(
resolved_url.as_str(),
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K"
);
Ok(())
}
#[test]
fn passing_from_file_url_to_relative_path() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"file:///home/user/Websites/my-website/index.html",
"assets/images/logo.png",
)
.unwrap_or(str!());
assert_eq!(
resolved_url.as_str(),
"file:///home/user/Websites/my-website/assets/images/logo.png"
);
Ok(())
}
#[test]
fn passing_from_file_url_to_relative_path_with_backslashes() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
"assets\\images\\logo.png",
)
.unwrap_or(str!());
assert_eq!(
resolved_url.as_str(),
"file:///home/user/Websites/my-website/assets/images/logo.png"
);
Ok(())
}
#[test]
fn passing_from_data_url_to_file_url() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"file:///etc/passwd",
)
.unwrap_or(str!());
assert_eq!(resolved_url.as_str(), "file:///etc/passwd");
Ok(())
}
#[test]
fn passing_preserve_fragment() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"http://doesnt-matter.local/",
"css/fonts/fontmarvelous.svg#fontmarvelous",
)
.unwrap_or(str!());
assert_eq!(
resolved_url.as_str(),
"http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous"
);
Ok(())
}
#[test]
fn passing_resolve_from_file_url_to_file_url() -> Result<(), ParseError> {
let resolved_url = if cfg!(windows) {
utils::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!())
} else {
utils::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!())
};
assert_eq!(
resolved_url.as_str(),
if cfg!(windows) {
"file:///c:/image.png"
} else {
"file:///tmp/image.png"
}
);
Ok(())
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_from_data_url_to_url_with_no_protocol() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"//www.w3schools.com/html/html_iframe.asp",
)
.unwrap_or(str!());
assert_eq!(resolved_url.as_str(), "");
Ok(())
}

View File

@@ -0,0 +1,137 @@
use crate::utils;
use reqwest::blocking::Client;
use std::collections::HashMap;
use std::env;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_read_data_url() {
let cache = &mut HashMap::new();
let client = Client::new();
// If both source and target are data URLs,
// ensure the result contains target data URL
let (retrieved_data, final_url) = utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"data:text/html;base64,TaRgEt",
true,
"",
false,
)
.unwrap();
assert_eq!(&retrieved_data, "data:text/html;base64,TaRgEt");
assert_eq!(&final_url, "data:text/html;base64,TaRgEt");
}
#[test]
fn passing_read_data_url_ignore_suggested_media_type() {
let cache = &mut HashMap::new();
let client = Client::new();
// Media type parameter should not influence data URLs
let (data, final_url) = utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"data:text/html;base64,TaRgEt",
true,
"image/png",
false,
)
.unwrap();
assert_eq!(&data, "data:text/html;base64,TaRgEt");
assert_eq!(&final_url, "data:text/html;base64,TaRgEt");
}
#[test]
fn passing_read_local_file_with_file_url_parent() {
let cache = &mut HashMap::new();
let client = Client::new();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// Inclusion of local assets from local sources should be allowed
let cwd = env::current_dir().unwrap();
let (data, final_url) = utils::retrieve_asset(
cache,
&client,
&format!(
"{file}{cwd}/src/tests/data/local-file.html",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
),
&format!(
"{file}{cwd}/src/tests/data/local-script.js",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
),
true,
"application/javascript",
false,
)
.unwrap();
assert_eq!(&data, "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
assert_eq!(
&final_url,
&format!(
"{file}{cwd}/src/tests/data/local-script.js",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
)
);
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_read_local_file_with_data_url_parent() {
let cache = &mut HashMap::new();
let client = Client::new();
// Inclusion of local assets from data URL sources should not be allowed
let (data, final_url) = utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"file:///etc/passwd",
true,
"",
false,
)
.unwrap();
assert_eq!(&data, "");
assert_eq!(&final_url, "");
}
#[test]
fn failing_read_local_file_with_https_parent() {
let cache = &mut HashMap::new();
let client = Client::new();
// Inclusion of local assets from remote sources should not be allowed
let (data, final_url) = utils::retrieve_asset(
cache,
&client,
"https://kernel.org/",
"file:///etc/passwd",
true,
"",
false,
)
.unwrap();
assert_eq!(&data, "");
assert_eq!(&final_url, "");
}

View File

@@ -0,0 +1,83 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_mailto() {
assert!(utils::url_has_protocol(
"mailto:somebody@somewhere.com?subject=hello"
));
}
#[test]
fn passing_tel() {
assert!(utils::url_has_protocol("tel:5551234567"));
}
#[test]
fn passing_ftp_no_slashes() {
assert!(utils::url_has_protocol("ftp:some-ftp-server.com"));
}
#[test]
fn passing_ftp_with_credentials() {
assert!(utils::url_has_protocol(
"ftp://user:password@some-ftp-server.com"
));
}
#[test]
fn passing_javascript() {
assert!(utils::url_has_protocol("javascript:void(0)"));
}
#[test]
fn passing_http() {
assert!(utils::url_has_protocol("http://news.ycombinator.com"));
}
#[test]
fn passing_https() {
assert!(utils::url_has_protocol("https://github.com"));
}
#[test]
fn passing_mailto_uppercase() {
assert!(utils::url_has_protocol(
"MAILTO:somebody@somewhere.com?subject=hello"
));
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_url_with_no_protocol() {
assert!(!utils::url_has_protocol(
"//some-hostname.com/some-file.html"
));
}
#[test]
fn failing_relative_path() {
assert!(!utils::url_has_protocol("some-hostname.com/some-file.html"));
}
#[test]
fn failing_relative_to_root_path() {
assert!(!utils::url_has_protocol("/some-file.html"));
}
#[test]
fn failing_empty_string() {
assert!(!utils::url_has_protocol(""));
}

View File

@@ -1,54 +1,17 @@
use crate::http::retrieve_asset;
use base64::encode;
use regex::Regex;
use base64;
use reqwest::blocking::Client;
use reqwest::header::CONTENT_TYPE;
use std::collections::HashMap;
use url::{ParseError, Url};
use std::fs;
use std::path::Path;
use url::{form_urlencoded, ParseError, Url};
/// This monster of a regex is used to match any kind of URL found in CSS.
///
/// There are roughly three different categories that a found URL could fit
/// into:
/// - Font [found after a src: property in an @font-family rule]
/// - Stylesheet [denoted by an @import before the url
/// - Image [covers all other uses of the url() function]
///
/// This regex aims to extract the following information:
/// - What type of URL is it (font/image/css)
/// - Where is the part that needs to be replaced (incl any wrapping quotes)
/// - What is the URL (excl any wrapping quotes)
///
/// Essentially, the regex can be broken down into two parts:
///
/// `(?:(?P<import>@import)|(?P<font>src\s*:)\s+)?`
/// This matches the precursor to a font or CSS URL, and fills in a match under
/// either `<import>` (if it's a CSS URL) or `<font>` (if it's a font).
/// Determining whether or not it's an image can be done by the negation of both
/// of these. Either zero or one of these can match.
///
/// `url\((?P<to_repl>['"]?(?P<url>[^"'\)]+)['"]?)\)`
/// This matches the actual URL part of the url(), and must always match. It also
/// sets `<to_repl>` and `<url>` which correspond to everything within
/// `url(...)` and a usable URL, respectively.
///
/// Note, however, that this does not perform any validation of the found URL.
/// Malformed CSS could lead to an invalid URL being present. It is therefore
/// recomended that the URL gets manually validated.
const CSS_URL_REGEX_STR: &str = r###"(?:(?:(?P<stylesheet>@import)|(?P<font>src\s*:))\s+)?url\((?P<to_repl>['"]?(?P<url>[^"'\)]+)['"]?)\)"###;
lazy_static! {
static ref HAS_PROTOCOL: Regex = Regex::new(r"^[a-z0-9]+:").unwrap();
static ref REGEX_URL: Regex = Regex::new(r"^https?://").unwrap();
static ref REGEX_CSS_URL: Regex = Regex::new(CSS_URL_REGEX_STR).unwrap();
}
const MAGIC: [[&[u8]; 2]; 19] = [
const MAGIC: [[&[u8]; 2]; 18] = [
// Image
[b"GIF87a", b"image/gif"],
[b"GIF89a", b"image/gif"],
[b"\xFF\xD8\xFF", b"image/jpeg"],
[b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],
[b"<?xml ", b"image/svg+xml"],
[b"<svg ", b"image/svg+xml"],
[b"RIFF....WEBPVP8 ", b"image/webp"],
[b"\x00\x00\x01\x00", b"image/x-icon"],
@@ -67,38 +30,66 @@ const MAGIC: [[&[u8]; 2]; 19] = [
[b"\x1A\x45\xDF\xA3", b"video/webm"],
];
pub fn data_to_dataurl(mime: &str, data: &[u8]) -> String {
let mimetype = if mime.is_empty() {
detect_mimetype(data)
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String {
let media_type: String = if media_type.is_empty() {
detect_media_type(data, &url)
} else {
mime.to_string()
media_type.to_string()
};
format!("data:{};base64,{}", mimetype, encode(data))
let hash: String = if fragment != "" {
format!("#{}", fragment)
} else {
str!()
};
format!(
"data:{};base64,{}{}",
media_type,
base64::encode(data),
hash
)
}
pub fn detect_mimetype(data: &[u8]) -> String {
pub fn detect_media_type(data: &[u8], url: &str) -> String {
for item in MAGIC.iter() {
if data.starts_with(item[0]) {
return String::from_utf8(item[1].to_vec()).unwrap();
}
}
"".to_owned()
if url.to_lowercase().ends_with(".svg") {
return str!("image/svg+xml");
}
str!()
}
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
HAS_PROTOCOL.is_match(url.as_ref().to_lowercase().as_str())
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme().len() > 0))
.unwrap_or(false)
}
pub fn is_data_url<T: AsRef<str>>(url: T) -> Result<bool, ParseError> {
Url::parse(url.as_ref()).and_then(|u| Ok(u.scheme() == "data"))
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "data"))
.unwrap_or(false)
}
pub fn is_valid_url<T: AsRef<str>>(path: T) -> bool {
REGEX_URL.is_match(path.as_ref())
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "file"))
.unwrap_or(false)
}
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
.unwrap_or(false)
}
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
let result = if is_valid_url(to.as_ref()) {
let result = if is_http_url(to.as_ref()) {
to.as_ref().to_string()
} else {
Url::parse(from.as_ref())?
@@ -109,99 +100,205 @@ pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<Strin
Ok(result)
}
pub fn resolve_css_imports(
cache: &mut HashMap<String, String>,
client: &Client,
css_string: &str,
as_dataurl: bool,
href: &str,
opt_no_images: bool,
opt_silent: bool,
) -> String {
let mut resolved_css = String::from(css_string);
for link in REGEX_CSS_URL.captures_iter(&css_string) {
let target_link = link.name("url").unwrap().as_str();
// Determine the type of link
let is_stylesheet = link.name("stylesheet").is_some();
let is_font = link.name("font").is_some();
let is_image = !is_stylesheet && !is_font;
// Generate absolute URL for content
let embedded_url = match resolve_url(href, target_link) {
Ok(url) => url,
Err(_) => continue, // Malformed URL
};
// Download the asset. If it's more CSS, resolve that too
let content = if is_stylesheet {
// The link is an @import link
retrieve_asset(
cache,
client,
&embedded_url,
false, // Formating as data URL will be done later
"text/css", // Expect CSS
opt_silent,
)
.map(|(content, _)| {
resolve_css_imports(
cache,
client,
&content,
true, // Finally, convert to a dataurl
&embedded_url,
opt_no_images,
opt_silent,
)
})
} else if (is_image && !opt_no_images) || is_font {
// The link is some other, non-@import link
retrieve_asset(
cache,
client,
&embedded_url,
true, // Format as data URL
"", // Unknown MIME type
opt_silent,
)
.map(|(a, _)| a)
} else {
// If it's a datatype that has been opt_no'd out of, replace with
// absolute URL
Ok(embedded_url.clone())
}
.unwrap_or_else(|e| {
eprintln!("Warning: {}", e);
// If failed to resolve, replace with absolute URL
embedded_url
});
let replacement = format!("\"{}\"", &content);
let dest = link.name("to_repl").unwrap();
let offset = resolved_css.len() - css_string.len();
let target_range = (dest.start() + offset)..(dest.end() + offset);
resolved_css.replace_range(target_range, &replacement);
}
if as_dataurl {
data_to_dataurl("text/css", resolved_css.as_bytes())
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
if Url::parse(url.as_ref()).unwrap().fragment() == None {
str!()
} else {
resolved_css
str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap())
}
}
pub fn clean_url<T: AsRef<str>>(url: T) -> String {
let mut result = Url::parse(url.as_ref()).unwrap();
// Clear fragment
result.set_fragment(None);
// Get rid of stray question mark
if result.query() == Some("") {
result.set_query(None);
}
result.to_string()
}
pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
let parsed_url = Url::parse(url.as_ref()).unwrap_or(Url::parse("http://[::1]").unwrap());
let path: String = parsed_url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len());
if comma_loc == path.len() {
return str!();
}
let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
let data: String = decode_url(raw_data);
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut media_type: &str = "";
let mut encoding: &str = "";
let mut i: i8 = 0;
for item in &meta_data_items {
if i == 0 {
if item.eq_ignore_ascii_case("text/html") {
media_type = item;
continue;
}
}
if item.eq_ignore_ascii_case("base64") || item.eq_ignore_ascii_case("utf8") {
encoding = item;
}
i = i + 1;
}
if media_type.eq_ignore_ascii_case("text/html") {
if encoding.eq_ignore_ascii_case("base64") {
String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
} else {
data
}
} else {
str!()
}
}
pub fn decode_url(input: String) -> String {
form_urlencoded::parse(input.as_bytes())
.map(|(key, val)| {
[
key.to_string(),
if val.to_string().len() == 0 {
str!()
} else {
str!('=')
},
val.to_string(),
]
.concat()
})
.collect()
}
pub fn file_url_to_fs_path(url: &str) -> String {
if !is_file_url(url) {
return str!();
}
let cutoff_l = if cfg!(windows) { 8 } else { 7 };
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
let url_fragment = get_url_fragment(url);
if url_fragment != "" {
let max_len = fs_file_path.len() - 1 - url_fragment.len();
fs_file_path = fs_file_path[0..max_len].to_string();
}
if cfg!(windows) {
fs_file_path = fs_file_path.replace("/", "\\");
}
fs_file_path
}
pub fn retrieve_asset(
cache: &mut HashMap<String, String>,
client: &Client,
parent_url: &str,
url: &str,
as_data_url: bool,
media_type: &str,
opt_silent: bool,
) -> Result<(String, String), reqwest::Error> {
if url.len() == 0 {
return Ok((str!(), str!()));
}
let cache_key = clean_url(&url);
if is_data_url(&url) {
Ok((url.to_string(), url.to_string()))
} else if is_file_url(&url) {
// Check if parent_url is also file:///
// (if not, then we don't embed the asset)
if !is_file_url(&parent_url) {
return Ok((str!(), str!()));
}
let fs_file_path: String = file_url_to_fs_path(url);
let path = Path::new(&fs_file_path);
let url_fragment = get_url_fragment(url);
if path.exists() {
if !opt_silent {
eprintln!("{}", &url);
}
if as_data_url {
let data_url: String = data_to_data_url(
&media_type,
&fs::read(&fs_file_path).unwrap(),
&fs_file_path,
&url_fragment,
);
Ok((data_url, url.to_string()))
} else {
let data: String = fs::read_to_string(&fs_file_path).expect(url);
Ok((data, url.to_string()))
}
} else {
Ok((str!(), url.to_string()))
}
} else {
if cache.contains_key(&cache_key) {
// URL is in cache
if !opt_silent {
eprintln!("{} (from cache)", &url);
}
let data = cache.get(&cache_key).unwrap();
Ok((data.to_string(), url.to_string()))
} else {
// URL not in cache, we request it
let mut response = client.get(url).send()?;
let res_url = response.url().to_string();
if !opt_silent {
if url == res_url {
eprintln!("{}", &url);
} else {
eprintln!("{} -> {}", &url, &res_url);
}
}
let new_cache_key = clean_url(&res_url);
if as_data_url {
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
// Attempt to obtain media type by reading the Content-Type header
let media_type = if media_type == "" {
response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
.unwrap_or(&media_type)
} else {
media_type
};
let url_fragment = get_url_fragment(url);
let data_url = data_to_data_url(&media_type, &data, url, &url_fragment);
// Add to cache
cache.insert(new_cache_key, data_url.clone());
Ok((data_url, res_url))
} else {
let content = response.text().unwrap();
// Add to cache
cache.insert(new_cache_key, content.clone());
Ok((content, res_url))
}
}
}
}