53 Commits

Author SHA1 Message Date
Sunshine
3d678d80ee Merge pull request #176 from snshn/img-srcset
IMG srcset
2020-05-17 14:26:30 -04:00
Sunshine
19a87f426e version bump 2020-05-17 14:06:55 -04:00
Sunshine
cbe3f9f554 implement support for embedding images within srcset 2020-05-17 14:06:44 -04:00
Sunshine
b6a44c64cf Merge pull request #174 from snshn/armhf-cd
Improve CD for compiling ARM binary asset
2020-05-12 03:31:37 -04:00
Sunshine
84e2dd789c improve CD for compiling ARM binary asset 2020-05-12 03:29:32 -04:00
Sunshine
ac4945ca97 Merge pull request #173 from snshn/sha2-integrity
Add asset integrity validation
2020-05-12 03:15:02 -04:00
Sunshine
2ca2c7aff8 version bump 2020-05-12 03:10:43 -04:00
Sunshine
a18df74946 refactor code and implement integrity validation 2020-05-12 02:51:37 -04:00
Sunshine
2bc8414cc1 Merge pull request #172 from snshn/update-metadata-comment
improve metadata comments
2020-04-30 22:39:25 -04:00
Sunshine
c4569343a4 improve metadata comments 2020-04-30 20:23:09 -04:00
Sunshine
5f5820c71a Merge pull request #168 from snshn/context-comment
Metadata comment tag
2020-04-30 20:06:40 -04:00
Sunshine
4719a6fecf Merge pull request #170 from snshn/svg-image-href
Embed SVG IMAGE assets
2020-04-30 20:00:59 -04:00
Sunshine
c999359b9f Merge branch 'context-comment' of github.com:Alch-Emi/monolith into context-comment 2020-04-30 19:54:13 -04:00
Sunshine
f22e2b6e68 embed SVG IMAGE assets 2020-04-30 19:51:30 -04:00
Sunshine
31a9550f5b Merge pull request #171 from snshn/improve-ci-cd
Add rustfmt installation step to CI
2020-04-30 19:51:04 -04:00
Sunshine
201f2d61b9 add rustfmt installation step to CI 2020-04-30 19:45:44 -04:00
Sunshine
3ae4dfae8e Update README.md 2020-04-28 09:07:47 -04:00
Sunshine
7b095fe4ff Merge pull request #167 from snshn/version-bump
version bump
2020-04-25 03:50:10 -04:00
Sunshine
890bcb1bb6 version bump 2020-04-25 01:03:49 -04:00
Sunshine
aa97ea9f82 Merge pull request #165 from snshn/no-fonts
Add flag for excluding web fonts
2020-04-22 09:16:30 -04:00
Sunshine
9b40dbbf27 add option to exclude web fonts 2020-04-22 09:11:20 -04:00
Sunshine
289f3e801b Merge pull request #161 from snshn/cache-blob
Store blobs instead of data URLs in cache
2020-04-19 13:33:03 -04:00
Sunshine
edacd09dc8 store blobs instead of data URLs in cache 2020-04-19 13:26:14 -04:00
Sunshine
5682863725 Merge pull request #164 from snshn/raspberry-pi-artifact-update
Update GitHub Action for assembling ARM artifacts
2020-04-18 13:46:44 -04:00
Sunshine
4304d7a638 update GitHub Action for assembling ARM artifacts 2020-04-18 13:44:26 -04:00
Sunshine
f56f88da94 Merge pull request #91 from snshn/unwrap-noscript-if-no-js
Propose ADR-0002 (NOSCRIPT nodes)
2020-04-16 23:24:30 -04:00
Sunshine
87c8b361ea add ADR-0002 (NOSCRIPT nodes) 2020-04-16 23:24:03 -04:00
Sunshine
cd505ddb6c Merge pull request #163 from snshn/proper-css-ident-escaping
Escape all special chars within #id and .class CSS selectors
2020-04-11 18:33:41 -04:00
Sunshine
eeea617fb1 escape all special chars within #id and .class CSS selectors 2020-04-11 17:50:23 -04:00
Sunshine
cc6dbddb49 Merge pull request #162 from snshn/colons-in-css-class-names
Escape colons within CSS idents
2020-04-10 21:20:37 -04:00
Sunshine
9d3df2cdc6 escape colons within CSS idents 2020-04-10 20:59:56 -04:00
Sunshine
ab601c3830 Merge pull request #160 from snshn/more-css-image-url-detection-props
Treat url()'s found in @counter-style rules as images
2020-04-10 07:28:55 -04:00
Sunshine
3738be2b6d treat url()'s found in @counter-style rules as images 2020-04-10 07:22:02 -04:00
Sunshine
53160f01c7 Merge pull request #159 from snshn/implement-data-url-media-type-detection
Improve data URL media type detection
2020-04-10 06:04:49 -04:00
Sunshine
594ad55bd8 improve data URL media type detection 2020-04-10 05:50:33 -04:00
Sunshine
d2615f51dc Merge pull request #158 from snshn/improve-data-url-support
Improve parsing of data URLs
2020-04-10 01:49:34 -04:00
Sunshine
c097733ae7 improve parsing of data URLs 2020-04-09 20:27:07 -04:00
Sunshine
67d4b7dafc Merge pull request #157 from snshn/2-2-3
Upgrade base64 crate & version bump (2.2.2 → 2.2.3)
2020-04-08 19:56:24 -04:00
Sunshine
b1d6bbce0c upgrade base64 crate & version bump (2.2.2 → 2.2.3) 2020-04-08 19:49:46 -04:00
Sunshine
20124f4891 Merge pull request #156 from snshn/raspberry-pi-artifact
Make the pipeline build and upload armhf executable with every new release
2020-04-08 19:40:41 -04:00
Sunshine
0dd540afaf make the pipeline build and upload armhf executable with every new release 2020-04-08 19:29:17 -04:00
Sunshine
df71083359 Merge pull request #155 from snshn/fix-css-unit-sign-bug
Fix css unit sign bug
2020-04-08 18:19:32 -04:00
Sunshine
349c7bb3ea properly parse negative units in CSS 2020-04-08 18:07:39 -04:00
Sunshine
5a30c6b44b Merge branch 'master' of github.com:snshn/monolith 2020-04-08 10:53:29 -04:00
Sunshine
929924accd Merge pull request #153 from snshn/proper-quotation-marks
use proper quotation marks in the README
2020-04-05 16:25:40 -04:00
Sunshine
812b46960c use proper quotation marks in the README 2020-04-05 16:24:18 -04:00
Sunshine
874080dbda Merge pull request #152 from snshn/separate-ci-build-jobs
Separate OS build jobs
2020-04-05 15:34:21 -04:00
Sunshine
93dd9d4ed4 separate build job per OS 2020-04-05 15:32:25 -04:00
Sunshine
d4d9bbe424 update cd.yml 2020-04-04 22:12:35 -04:00
Emi Simpson
05985583f0 Switch timestamps from rfc822 local time to iso8601 UTC 2020-01-10 14:30:35 -05:00
Emi Simpson
651fa716b4 Clean user, pass, and fragment from URL before writing 2020-01-10 14:18:15 -05:00
Emi Simpson
9be3982dc6 Added --no-context flag to disable adding context comment 2020-01-08 19:00:53 -05:00
Emi Simpson
27c9fb4cd3 Added comment indicating the context under which the page was downloaded 2020-01-08 18:51:18 -05:00
30 changed files with 1571 additions and 614 deletions

22
.github/workflows/build_gnu_linux.yml vendored Normal file
View File

@@ -0,0 +1,22 @@
name: GNU/Linux
on:
push:
branches: [ master ]
jobs:
build:
strategy:
matrix:
os:
- ubuntu-latest
rust:
- stable
runs-on: ${{ matrix.os }}
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- name: Build
run: cargo build --all --locked --verbose

22
.github/workflows/build_macos.yml vendored Normal file
View File

@@ -0,0 +1,22 @@
name: macOS
on:
push:
branches: [ master ]
jobs:
build:
strategy:
matrix:
os:
- macos-latest
rust:
- stable
runs-on: ${{ matrix.os }}
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- name: Build
run: cargo build --all --locked --verbose

View File

@@ -1,4 +1,4 @@
name: Build
name: Windows
on:
push:
@@ -10,8 +10,6 @@ jobs:
strategy:
matrix:
os:
- ubuntu-latest
- macos-latest
- windows-latest
rust:
- stable

View File

@@ -1,23 +1,54 @@
# CD GitHub Actions workflow for Monolith
name: CD
on:
release:
types: [created]
types:
- created
jobs:
windows:
runs-on: windows-latest
windows:
runs-on: windows-2019
steps:
- run: git config --global core.autocrlf false
- name: Checkout the repository
uses: actions/checkout@master
- name: Build the executable
run: cargo build --all --locked
- name: Perform local installation
run: cargo install --force --locked --path .
uses: actions/checkout@v2
- name: Build and install the executable
run: cargo build --release
- uses: Shopify/upload-to-release@1.0.0
with:
name: monolith.exe
path: C:\Users\runneradmin\.cargo\bin\monolith.exe
path: target\release\monolith.exe
repo-token: ${{ secrets.GITHUB_TOKEN }}
gnu_linux_armhf:
runs-on: ubuntu-18.04
steps:
- name: Checkout the repository
uses: actions/checkout@v2
- name: Prepare cross-platform environment
run: |
sudo mkdir -p /cross-build-arm
sudo touch /etc/apt/sources.list.d/armhf.list
echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ bionic main" | sudo tee -a /etc/apt/sources.list.d/armhf.list
sudo apt-get update
sudo apt-get install -y gcc-arm-linux-gnueabihf libc6-armhf-cross libc6-dev-armhf-cross
sudo apt-get download libssl1.1:armhf libssl-dev:armhf
sudo dpkg -x libssl1.1*.deb /cross-build-arm
sudo dpkg -x libssl-dev*.deb /cross-build-arm
rustup target add arm-unknown-linux-gnueabihf
echo "::set-env name=C_INCLUDE_PATH::/cross-build-arm/usr/include"
echo "::set-env name=OPENSSL_INCLUDE_DIR::/cross-build-arm/usr/include/arm-linux-gnueabihf"
echo "::set-env name=OPENSSL_LIB_DIR::/cross-build-arm/usr/lib/arm-linux-gnueabihf"
echo "::set-env name=PKG_CONFIG_ALLOW_CROSS::1"
echo "::set-env name=RUSTFLAGS::-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build-arm/usr/lib/arm-linux-gnueabihf -L/cross-build-arm/lib/arm-linux-gnueabihf"
- name: Build the executable
run: cargo build --release --target=arm-unknown-linux-gnueabihf
- name: Attach artifact to the release
uses: Shopify/upload-to-release@1.0.0
with:
name: monolith-gnu-linux-armhf
path: target/arm-unknown-linux-gnueabihf/release/monolith
repo-token: ${{ secrets.GITHUB_TOKEN }}

View File

@@ -27,4 +27,6 @@ jobs:
- name: Run tests
run: cargo test --all --locked --verbose
- name: Check code formatting
run: cargo fmt --all -- --check
run: |
rustup component add rustfmt
cargo fmt --all -- --check

93
Cargo.lock generated
View File

@@ -61,16 +61,50 @@ name = "base64"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "base64"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bitflags"
version = "1.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "block-buffer"
version = "0.7.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"block-padding 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
"byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
"generic-array 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "block-padding"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "bumpalo"
version = "3.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byte-tools"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "byteorder"
version = "1.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bytes"
version = "0.5.3"
@@ -168,6 +202,14 @@ name = "difference"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "digest"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"generic-array 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "doc-comment"
version = "0.3.1"
@@ -205,6 +247,11 @@ dependencies = [
"serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "fake-simd"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "flate2"
version = "1.0.13"
@@ -303,6 +350,14 @@ dependencies = [
"slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "generic-array"
version = "0.12.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"typenum 1.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "getrandom"
version = "0.1.13"
@@ -568,15 +623,17 @@ dependencies = [
[[package]]
name = "monolith"
version = "2.2.2"
version = "2.2.6"
dependencies = [
"assert_cmd 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
"base64 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
"base64 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
"cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)",
"html5ever 0.24.1 (registry+https://github.com/rust-lang/crates.io-index)",
"reqwest 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
"sha2 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"tempfile 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)",
"url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
@@ -630,6 +687,11 @@ dependencies = [
"libc 0.2.66 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "opaque-debug"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "openssl"
version = "0.10.26"
@@ -1096,6 +1158,17 @@ dependencies = [
"url 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "sha2"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"block-buffer 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)",
"digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)",
"fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
"opaque-debug 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "siphasher"
version = "0.2.3"
@@ -1261,6 +1334,11 @@ name = "try-lock"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "typenum"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "unicase"
version = "2.6.0"
@@ -1501,8 +1579,13 @@ dependencies = [
"checksum atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)" = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90"
"checksum autocfg 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "1d49d90015b3c36167a20fe2810c5cd875ad504b39cff3d4eae7977e6b7c1cb2"
"checksum base64 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7"
"checksum base64 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "7d5ca2cd0adc3f48f9e9ea5a6bbdf9ccc0bfade884847e484d452414c7ccffb3"
"checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
"checksum block-buffer 0.7.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b"
"checksum block-padding 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5"
"checksum bumpalo 3.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "8fe2567a8d8a3aedb4e39aa39e186d5673acfd56393c6ac83b2bc5bd82f4369c"
"checksum byte-tools 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7"
"checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
"checksum bytes 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "10004c15deb332055f7a4a208190aed362cf9a7c2f6ab70a305fba50e1105f38"
"checksum c2-chacha 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "214238caa1bf3a496ec3392968969cab8549f96ff30652c9e56885329315f6bb"
"checksum cc 1.0.47 (registry+https://github.com/rust-lang/crates.io-index)" = "aa87058dce70a3ff5621797f1506cb837edd02ac4c0ae642b4542dce802908b8"
@@ -1515,11 +1598,13 @@ dependencies = [
"checksum cssparser 0.27.2 (registry+https://github.com/rust-lang/crates.io-index)" = "754b69d351cdc2d8ee09ae203db831e005560fc6030da058f86ad60c92a9cb0a"
"checksum cssparser-macros 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "dfae75de57f2b2e85e8768c3ea840fd159c8f33e2b6522c7835b7abac81be16e"
"checksum difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "524cbf6897b527295dff137cec09ecf3a05f4fddffd7dfcd1585403449e74198"
"checksum digest 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5"
"checksum doc-comment 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "923dea538cea0aa3025e8685b20d6ee21ef99c4f77e954a30febbaac5ec73a97"
"checksum dtoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "ea57b42383d091c85abcc2706240b94ab2a8fa1fc81c10ff23c4de06e2a90b5e"
"checksum dtoa-short 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "59020b8513b76630c49d918c33db9f4c91638e7d3404a28084083b87e33f76f2"
"checksum encoding_rs 0.8.20 (registry+https://github.com/rust-lang/crates.io-index)" = "87240518927716f79692c2ed85bfe6e98196d18c6401ec75355760233a7e12e9"
"checksum escargot 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "74cf96bec282dcdb07099f7e31d9fed323bca9435a09aba7b6d99b7617bca96d"
"checksum fake-simd 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed"
"checksum flate2 1.0.13 (registry+https://github.com/rust-lang/crates.io-index)" = "6bd6d6f4752952feb71363cffc9ebac9411b75b87c6ab6058c40c8900cf43c0f"
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
"checksum foreign-types 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f6f339eb8adc052cd2ca78910fda869aefa38d22d5cb648e6485e4d3fc06f3b1"
@@ -1534,6 +1619,7 @@ dependencies = [
"checksum futures-sink 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "171be33efae63c2d59e6dbba34186fe0d6394fb378069a76dfd80fdcffd43c16"
"checksum futures-task 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0bae52d6b29cf440e298856fec3965ee6fa71b06aa7495178615953fd669e5f9"
"checksum futures-util 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c0d66274fb76985d3c62c886d1da7ac4c0903a8c9f754e8fe0f35a6a6cc39e76"
"checksum generic-array 0.12.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c68f0274ae0e023facc3c97b2e00f076be70e254bc851d972503b328db79b2ec"
"checksum getrandom 0.1.13 (registry+https://github.com/rust-lang/crates.io-index)" = "e7db7ca94ed4cd01190ceee0d8a8052f08a247aa1b469a7f68c6a3b71afcf407"
"checksum h2 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b9433d71e471c1736fd5a61b671fc0b148d7a2992f666c958d03cd8feb3b88d1"
"checksum heck 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "20564e78d53d2bb135c343b3f47714a56af2061f1c928fdb541dc7b9fdd94205"
@@ -1567,6 +1653,7 @@ dependencies = [
"checksum new_debug_unreachable 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f40f005c60db6e03bae699e414c58bf9aa7ea02a2d0b9bfbcf19286cc4c82b30"
"checksum nom 4.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2ad2a91a8e869eeb30b9cb3119ae87773a8f4ae617f41b1eb9c154b2905f7bd6"
"checksum num_cpus 1.11.1 (registry+https://github.com/rust-lang/crates.io-index)" = "76dac5ed2a876980778b8b85f75a71b6cbf0db0b1232ee12f826bccb00d09d72"
"checksum opaque-debug 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c"
"checksum openssl 0.10.26 (registry+https://github.com/rust-lang/crates.io-index)" = "3a3cc5799d98e1088141b8e01ff760112bbd9f19d850c124500566ca6901a585"
"checksum openssl-probe 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "77af24da69f9d9341038eba93a073b1fdaaa1b788221b00a69bce9e762cb32de"
"checksum openssl-sys 0.9.53 (registry+https://github.com/rust-lang/crates.io-index)" = "465d16ae7fc0e313318f7de5cecf57b2fbe7511fd213978b457e1c96ff46736f"
@@ -1619,6 +1706,7 @@ dependencies = [
"checksum serde_derive 1.0.103 (registry+https://github.com/rust-lang/crates.io-index)" = "a8c6faef9a2e64b0064f48570289b4bf8823b7581f1d6157c1b52152306651d0"
"checksum serde_json 1.0.42 (registry+https://github.com/rust-lang/crates.io-index)" = "1a3351dcbc1f067e2c92ab7c3c1f288ad1a4cffc470b5aaddb4c2e0a3ae80043"
"checksum serde_urlencoded 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9ec5d77e2d4c73717816afac02670d5c4f534ea95ed430442cad02e7a6e32c97"
"checksum sha2 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "27044adfd2e1f077f649f59deb9490d3941d674002f7d062870a60ebe9bd47a0"
"checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
"checksum siphasher 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "8e88f89a550c01e4cd809f3df4f52dc9e939f3273a2017eabd5c6d12fd98bb23"
"checksum slab 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8"
@@ -1639,6 +1727,7 @@ dependencies = [
"checksum tower-service 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e987b6bf443f4b5b3b6f38704195592cca41c5bb7aedd3c3693c7081f8289860"
"checksum treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a7f741b240f1a48843f9b8e0444fb55fb2a4ff67293b50a9179dfd5ea67f8d41"
"checksum try-lock 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e604eb7b43c06650e854be16a2a03155743d3752dd1c943f6829e26b7a36e382"
"checksum typenum 1.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "373c8a200f9e67a0c95e62a4f52fbf80c23b4381c05a17845531982fa99e6b33"
"checksum unicase 2.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6"
"checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
"checksum unicode-normalization 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "b561e267b2326bb4cebfc0ef9e68355c7abe6c6f522aeac2f5bf95d56c59bdcf"

View File

@@ -1,6 +1,6 @@
[package]
name = "monolith"
version = "2.2.2"
version = "2.2.6"
edition = "2018"
authors = [
"Sunshine <sunshine@uberspace.net>",
@@ -12,10 +12,12 @@ authors = [
description = "CLI tool for saving web pages as a single HTML file"
[dependencies]
base64 = "0.11.0"
base64 = "0.12.0"
clap = "2.33.0"
cssparser = "0.27.2"
html5ever = "0.24.1"
sha2 = "0.8.1" # Used in calculating checksums during integrity checks
time = "0.1.42" # Used to render comments indicating the time the page was saved
url = "2.1.1"
[dependencies.reqwest]

View File

@@ -1,4 +1,6 @@
[![GitHub Actions Build Status](https://github.com/Y2Z/monolith/workflows/Build/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3ABuild)
[![Monolith Build Status for GNU/Linux](https://github.com/Y2Z/monolith/workflows/GNU%2FLinux/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AGNU%2FLinux)
[![Monolith Build Status for macOS](https://github.com/Y2Z/monolith/workflows/macOS/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AmacOS)
[![Monolith Build Status for Windows](https://github.com/Y2Z/monolith/workflows/Windows/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows)
```
___ ___________ __________ ___________________ ___
@@ -10,9 +12,9 @@
|___| |__________| \____________________| |___| |___| |___|
```
A data hoarder's dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
A data hoarders dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
Unlike the conventional "Save page as", `monolith` not only saves the target document, it embeds CSS, image, and JavaScript assets **all at once**, producing a single HTML5 document that is a joy to store and share.
Unlike the conventional Save page as, `monolith` not only saves the target document, it embeds CSS, image, and JavaScript assets **all at once**, producing a single HTML5 document that is a joy to store and share.
If compared to saving websites with `wget -mpk`, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available.
@@ -20,20 +22,23 @@ If compared to saving websites with `wget -mpk`, this tool embeds all assets as
## Installation
#### With Homebrew (on macOS and GNU/Linux)
#### Via Homebrew (on macOS and GNU/Linux)
$ brew install monolith
#### Using Snapcraft (on GNU/Linux)
$ snap install monolith
#### Via Docker
The guide can be found [here](docs/containers.md)
#### From source
Dependency: `libssl-dev`
$ git clone https://github.com/Y2Z/monolith.git
$ cd monolith
$ make install
#### With Docker
The guide can be found [here](docs/containers.md)
---------------------------------------------------
## Usage
@@ -44,6 +49,7 @@ The guide can be found [here](docs/containers.md)
## Options
- `-c`: Ignore styles
- `-f`: Exclude frames and iframes
- `-F`: Omit web fonts
- `-i`: Remove images
- `-I`: Isolate the document
- `-j`: Exclude JavaScript
@@ -78,4 +84,4 @@ The Unlicense
---------------------------------------------------
<!-- Microtext -->
<sub>Keep in mind that `monolith` is not aware of your browser's session</sub>
<sub>Keep in mind that `monolith` is not aware of your browsers session</sub>

View File

@@ -0,0 +1,19 @@
# 2. NOSCRIPT nodes
Date: 2020-04-16
## Status
Accepted
## Context
HTML pages sometimes contain NOSCRIPT nodes, which reveal their contents only in case when JavaScript is not available. Most of the time they contain hidden messages that inform about certain JavaScript-dependent features not being operational, however sometimes can also feature media assets or even iframes.
## Decision
When the document is being saved with or without JavaScript, each NOSCRIPT node should be preserved while its children need to be processed exactly the same way as the rest of the document. This approach will ensure that even hidden remote assets are embedded — since those hidden elements may have to be displayed later in a browser that has JavaScript turned off. An option should be available to "unwrap" all NOSCRIPT nodes in order to make their contents always visible in the document, complimenting the "disable JS" function of the program.
## Consequences
Saved documents will have contents of all NOSCRIPT nodes processed as if they are part of the document's DOM, therefore properly display images encapsulated within NOSCRIPT nodes when being viewed in browsers that have JavaScript turned off (or have no JavaScript support in the first place). The new option to "unwrap" NOSCRIPT elements will help the user ensure that the resulting document always represents what the original web page looked like in a browser that had JavaScript turned off.

View File

@@ -21,5 +21,5 @@ saved by monolith, if needed.
## Consequences
Monolith will not support modification of original document assets for the purpose of reducing their size, sticking to performing only a minimal
Monolith will not support modification of original document assets for the purpose of reducing their size, sticking to performing only minimal
amount of modifications to the original web page — whatever is needed to provide security or exclude unwanted asset types.

View File

@@ -4,6 +4,7 @@ use clap::{App, Arg};
pub struct AppArgs {
pub target: String,
pub no_css: bool,
pub no_fonts: bool,
pub no_frames: bool,
pub no_images: bool,
pub no_js: bool,
@@ -13,6 +14,7 @@ pub struct AppArgs {
pub silent: bool,
pub timeout: u64,
pub user_agent: String,
pub no_metadata: bool,
}
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
@@ -35,10 +37,12 @@ impl AppArgs {
// .args_from_usage("-a, --include-audio 'Removes audio sources'")
.args_from_usage("-c, --no-css 'Removes CSS'")
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
.args_from_usage("-F, --no-fonts 'Removes fonts'")
.args_from_usage("-i, --no-images 'Removes images'")
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
.args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Excludes metadata information from the document'")
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
.args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
@@ -52,10 +56,12 @@ impl AppArgs {
.expect("please set target")
.to_string();
app_args.no_css = app.is_present("no-css");
app_args.no_fonts = app.is_present("no-fonts");
app_args.no_frames = app.is_present("no-frames");
app_args.no_images = app.is_present("no-images");
app_args.no_js = app.is_present("no-js");
app_args.insecure = app.is_present("insecure");
app_args.no_metadata = app.is_present("no-metadata");
app_args.isolate = app.is_present("isolate");
app_args.silent = app.is_present("silent");
app_args.timeout = app

View File

@@ -2,12 +2,12 @@ use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::utils::{data_to_data_url, decode_url, get_url_fragment, resolve_url, retrieve_asset};
use crate::utils::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset};
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
// Universal
"background",
"background-image",
"border",
"border-image",
"border-image-source",
"content",
@@ -16,7 +16,15 @@ const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
"list-style-image",
"mask",
"mask-image",
// Specific to @counter-style
"additive-symbols",
"negative",
"pad",
"prefix",
"suffix",
"symbols",
];
const CSS_SPECIAL_CHARS: &str = "~!@$%^&*()+=,./'\";:?><[]{}|`#";
pub fn is_image_url_prop(prop_name: &str) -> bool {
CSS_PROPS_WITH_IMAGE_URLS
@@ -33,14 +41,27 @@ pub fn enquote(input: String, double: bool) -> String {
}
}
pub fn escape(value: &str) -> String {
let mut res = str!(&value);
res = res.replace("\\", "\\\\");
for c in CSS_SPECIAL_CHARS.chars() {
res = res.replace(c, format!("\\{}", c).as_str());
}
res
}
pub fn process_css<'a>(
cache: &mut HashMap<String, String>,
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &str,
parser: &mut Parser,
rule_name: &str,
prop_name: &str,
func_name: &str,
opt_no_fonts: bool,
opt_no_images: bool,
opt_silent: bool,
) -> Result<String, ParseError<'a, String>> {
@@ -69,6 +90,10 @@ pub fn process_css<'a>(
Token::Colon => result.push_str(":"),
Token::Comma => result.push_str(","),
Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => {
if opt_no_fonts && curr_rule == "font-face" {
continue;
}
let closure: &str;
if token == &Token::ParenthesisBlock {
result.push_str("(");
@@ -91,6 +116,7 @@ pub fn process_css<'a>(
rule_name,
curr_prop.as_str(),
func_name,
opt_no_fonts,
opt_no_images,
opt_silent,
)
@@ -113,12 +139,18 @@ pub fn process_css<'a>(
Token::WhiteSpace(ref value) => {
result.push_str(value);
}
// div...
Token::Ident(ref value) => {
curr_rule = str!();
curr_prop = str!(value);
result.push_str(value);
result.push_str(&escape(value));
}
// @import, @font-face, @charset, @media...
Token::AtKeyword(ref value) => {
curr_rule = str!(value);
if opt_no_fonts && curr_rule == "font-face" {
continue;
}
result.push_str("@");
result.push_str(value);
}
@@ -127,53 +159,49 @@ pub fn process_css<'a>(
result.push_str(value);
}
Token::QuotedString(ref value) => {
let is_import: bool = curr_rule == "import";
if is_import {
if curr_rule == "import" {
// Reset current at-rule value
curr_rule = str!();
}
if is_import {
// Skip empty import values
if value.len() < 1 {
result.push_str("''");
continue;
}
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
let full_url_decoded = decode_url(full_url);
let (css, final_url) = retrieve_asset(
cache,
client,
&parent_url,
&full_url_decoded,
false,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(
enquote(
data_to_data_url(
"text/css",
embed_css(
cache,
client,
final_url.as_str(),
&css,
opt_no_images,
opt_silent,
let import_full_url = resolve_url(&parent_url, value).unwrap_or_default();
let import_url_fragment = get_url_fragment(import_full_url.clone());
match retrieve_asset(cache, client, &parent_url, &import_full_url, opt_silent) {
Ok((import_contents, import_final_url, _import_media_type)) => {
result.push_str(
enquote(
data_to_data_url(
"text/css",
embed_css(
cache,
client,
&import_final_url,
&String::from_utf8_lossy(&import_contents),
opt_no_fonts,
opt_no_images,
opt_silent,
)
.as_bytes(),
&import_final_url,
&import_url_fragment,
),
false,
)
.as_bytes(),
&final_url,
url_fragment.as_str(),
),
false,
)
.as_str(),
);
.as_str(),
);
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(import_full_url.clone()) {
result.push_str(enquote(import_full_url, false).as_str());
}
}
}
} else {
if func_name == "url" {
// Skip empty url()'s
@@ -185,17 +213,30 @@ pub fn process_css<'a>(
result.push_str(enquote(str!(empty_image!()), false).as_str());
} else {
let resolved_url = resolve_url(&parent_url, value).unwrap_or_default();
let (data_url, _final_url) = retrieve_asset(
let url_fragment = get_url_fragment(resolved_url.clone());
match retrieve_asset(
cache,
client,
&parent_url,
&resolved_url,
true,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(enquote(data_url, false).as_str());
) {
Ok((data, final_url, media_type)) => {
let data_url = data_to_data_url(
&media_type,
&data,
&final_url,
&url_fragment,
);
result.push_str(enquote(data_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(resolved_url.clone()) {
result.push_str(enquote(resolved_url, false).as_str());
}
}
}
}
} else {
result.push_str(enquote(str!(value), false).as_str());
@@ -217,26 +258,33 @@ pub fn process_css<'a>(
ref unit_value,
..
} => {
if *has_sign {
result.push_str("-");
if *has_sign && *unit_value >= 0. {
result.push_str("+");
}
result.push_str(str!(unit_value * 100.).as_str());
result.push_str("%");
}
Token::Dimension {
ref has_sign,
ref value,
ref unit,
..
} => {
if *has_sign && *value >= 0. {
result.push_str("+");
}
result.push_str(str!(value).as_str());
result.push_str(str!(unit).as_str());
}
// #selector, #id...
Token::IDHash(ref value) => {
curr_rule = str!();
result.push_str("#");
result.push_str(value);
result.push_str(&escape(value));
}
Token::UnquotedUrl(ref value) => {
let is_import: bool = curr_rule == "import";
if is_import {
// Reset current at-rule value
curr_rule = str!();
@@ -257,54 +305,51 @@ pub fn process_css<'a>(
if is_import {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
let full_url_decoded = decode_url(full_url);
let (css, final_url) = retrieve_asset(
cache,
client,
&parent_url,
&full_url_decoded,
false,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(
enquote(
data_to_data_url(
match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) {
Ok((css, final_url, _media_type)) => {
let data_url = data_to_data_url(
"text/css",
embed_css(
cache,
client,
final_url.as_str(),
&css,
&final_url,
&String::from_utf8_lossy(&css),
opt_no_fonts,
opt_no_images,
opt_silent,
)
.as_bytes(),
&final_url,
url_fragment.as_str(),
),
false,
)
.as_str(),
);
&url_fragment,
);
result.push_str(enquote(data_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) {
result.push_str(enquote(full_url, false).as_str());
}
}
}
} else {
if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
result.push_str(enquote(str!(empty_image!()), false).as_str());
} else {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let (data_url, _final_url) = retrieve_asset(
cache,
client,
&parent_url,
&full_url,
true,
"",
opt_silent,
)
.unwrap_or_default();
result.push_str(enquote(data_url, false).as_str());
let url_fragment = get_url_fragment(full_url.clone());
match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) {
Ok((data, final_url, media_type)) => {
let data_url =
data_to_data_url(&media_type, &data, &final_url, &url_fragment);
result.push_str(enquote(data_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) {
result.push_str(enquote(full_url, false).as_str());
}
}
}
}
}
result.push_str(")");
@@ -325,6 +370,7 @@ pub fn process_css<'a>(
curr_rule.as_str(),
curr_prop.as_str(),
function_name,
opt_no_fonts,
opt_no_images,
opt_silent,
)
@@ -342,10 +388,11 @@ pub fn process_css<'a>(
}
pub fn embed_css(
cache: &mut HashMap<String, String>,
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &str,
css: &str,
opt_no_fonts: bool,
opt_no_images: bool,
opt_silent: bool,
) -> String {
@@ -360,6 +407,7 @@ pub fn embed_css(
"",
"",
"",
opt_no_fonts,
opt_no_images,
opt_silent,
)

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,8 @@
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
use monolith::utils::{data_url_to_text, is_data_url, is_file_url, is_http_url, retrieve_asset};
use monolith::utils::{data_url_to_data, is_data_url, is_file_url, is_http_url, retrieve_asset};
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use reqwest::Url;
use std::collections::HashMap;
use std::env;
use std::fs;
@@ -109,43 +110,44 @@ fn main() {
// Retrieve root document
if is_file_url(target_url) || is_http_url(target_url) {
let (data, final_url) = retrieve_asset(
&mut cache,
&client,
target_url,
target_url,
false,
"",
app_args.silent,
)
.expect("Could not retrieve target document");
base_url = final_url;
dom = html_to_dom(&data);
match retrieve_asset(&mut cache, &client, target_url, target_url, app_args.silent) {
Ok((data, final_url, _media_type)) => {
base_url = final_url;
dom = html_to_dom(&String::from_utf8_lossy(&data));
}
Err(_) => {
eprintln!("Could not retrieve target document");
process::exit(1);
}
}
} else if is_data_url(target_url) {
let text: String = data_url_to_text(target_url);
if text.len() == 0 {
eprintln!("Unsupported data URL input");
let (media_type, data): (String, Vec<u8>) = data_url_to_data(target_url);
if !media_type.eq_ignore_ascii_case("text/html") {
eprintln!("Unsupported data URL media type");
process::exit(1);
}
base_url = str!(target_url);
dom = html_to_dom(&text);
dom = html_to_dom(&String::from_utf8_lossy(&data));
} else {
process::exit(1);
}
let time_saved = time::now_utc();
walk_and_embed_assets(
&mut cache,
&client,
&base_url,
&dom.document,
app_args.no_css,
app_args.no_fonts,
app_args.no_frames,
app_args.no_js,
app_args.no_images,
app_args.silent,
app_args.no_frames,
);
let html: String = stringify_document(
let mut html: String = stringify_document(
&dom.document,
app_args.no_css,
app_args.no_frames,
@@ -154,6 +156,32 @@ fn main() {
app_args.isolate,
);
if !app_args.no_metadata {
// Safe to unwrap (we just put this through an HTTP request)
let mut clean_url = Url::parse(&base_url).unwrap();
clean_url.set_fragment(None);
// Don't include credentials
clean_url.set_username("").unwrap();
clean_url.set_password(None).unwrap();
let metadata_comment = if is_http_url(&base_url) {
format!(
"<!-- Saved from {} at {} using {} v{} -->\n",
&clean_url,
time_saved.rfc3339(),
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
} else {
format!(
"<!-- Saved from local source at {} using {} v{} -->\n",
time_saved.rfc3339(),
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
};
html.insert_str(0, &metadata_comment);
}
output
.writeln_str(&html)
.expect("Could not write HTML output");

View File

@@ -62,7 +62,7 @@ fn passing_bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"Unsupported data URL input\n"
"Unsupported data URL media type\n"
);
// The exit code should be 1
@@ -75,6 +75,7 @@ fn passing_bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
fn passing_isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-I")
.arg("data:text/html,Hello%2C%20World!")
.output()
@@ -101,6 +102,7 @@ fn passing_isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
fn passing_remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-c")
.arg("data:text/html,<style>body{background-color:pink}</style>Hello")
.output()
@@ -128,6 +130,7 @@ fn passing_remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>>
fn passing_remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-f")
.arg("data:text/html,<iframe src=\"https://google.com\"></iframe>Hi")
.output()
@@ -154,6 +157,7 @@ fn passing_remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error
fn passing_remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-i")
.arg("data:text/html,<img src=\"https://google.com\"/>Hi")
.output()
@@ -189,6 +193,7 @@ Hi\
fn passing_remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-j")
.arg("data:text/html,<script>alert(2)</script>Hi")
.output()
@@ -220,10 +225,11 @@ fn passing_local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let out = cmd
.arg("-M")
.arg(if cfg!(windows) {
"src\\tests\\data\\local-file.html"
"src\\tests\\data\\basic\\local-file.html"
} else {
"src/tests/data/local-file.html"
"src/tests/data/basic/local-file.html"
})
.output()
.unwrap();
@@ -236,9 +242,9 @@ fn passing_local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
<!DOCTYPE html><html lang=\"en\"><head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\" rel=\"stylesheet\" type=\"text/css\">\n \
<link href=\"data:text/css;base64,\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"\">\n \
<link rel=\"stylesheet\" type=\"text/css\" href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
@@ -251,9 +257,9 @@ fn passing_local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{cwd}/src/tests/data/local-file.html\n\
{file}{cwd}/src/tests/data/local-style.css\n\
{file}{cwd}/src/tests/data/local-script.js\n\
{file}{cwd}/src/tests/data/basic/local-file.html\n\
{file}{cwd}/src/tests/data/basic/local-style.css\n\
{file}{cwd}/src/tests/data/basic/local-script.js\n\
",
file = file_url_protocol,
cwd = cwd_normalized
@@ -274,15 +280,16 @@ fn passing_local_file_target_input_absolute_target_path() -> Result<(), Box<dyn
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-jciI")
.arg(if cfg!(windows) {
format!(
"{cwd}\\src\\tests\\data\\local-file.html",
"{cwd}\\src\\tests\\data\\basic\\local-file.html",
cwd = cwd.to_str().unwrap()
)
} else {
format!(
"{cwd}/src/tests/data/local-file.html",
"{cwd}/src/tests/data/basic/local-file.html",
cwd = cwd.to_str().unwrap()
)
})
@@ -299,12 +306,12 @@ fn passing_local_file_target_input_absolute_target_path() -> Result<(), Box<dyn
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"{empty_image}\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"\"></script>\n\n\n\n\
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = empty_image!()
@@ -315,7 +322,7 @@ fn passing_local_file_target_input_absolute_target_path() -> Result<(), Box<dyn
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"{file}{cwd}/src/tests/data/local-file.html\n",
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
@@ -334,16 +341,17 @@ fn passing_local_file_url_target_input() -> Result<(), Box<dyn std::error::Error
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
let out = cmd
.arg("-M")
.arg("-cji")
.arg(if cfg!(windows) {
format!(
"{file}{cwd}/src/tests/data/local-file.html",
"{file}{cwd}/src/tests/data/basic/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/src/tests/data/local-file.html",
"{file}{cwd}/src/tests/data/basic/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
@@ -360,12 +368,12 @@ fn passing_local_file_url_target_input() -> Result<(), Box<dyn std::error::Error
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n \
<link href=\"\" rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"{empty_image}\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"\"></script>\n\n\n\n\
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = empty_image!()
@@ -377,13 +385,13 @@ fn passing_local_file_url_target_input() -> Result<(), Box<dyn std::error::Error
std::str::from_utf8(&out.stderr).unwrap(),
if cfg!(windows) {
format!(
"{file}{cwd}/src/tests/data/local-file.html\n",
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/src/tests/data/local-file.html\n",
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
@@ -401,14 +409,15 @@ fn passing_security_disallow_local_assets_within_data_url_targets(
) -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/local-script.js\"%3E%3C/script%3E")
.arg("-M")
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/basic/local-script.js\"%3E%3C/script%3E")
.output()
.unwrap();
// STDOUT should contain HTML with no JS in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><script src=\"\"></script></head><body></body></html>\n"
"<html><head><script></script></head><body></body></html>\n"
);
// STDERR should be empty
@@ -438,7 +447,7 @@ fn passing_embed_file_url_local_asset_within_style_attribute(
file = file_url_prefix,
path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg(file_html.path()).output().unwrap();
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
// STDOUT should contain HTML with date URL for background-image in it
assert_eq!(
@@ -489,7 +498,7 @@ fn passing_css_import_string() -> Result<(), Box<dyn std::error::Error>> {
file = file_url_prefix,
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg(file_html.path()).output().unwrap();
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
// STDOUT should contain embedded CSS url()'s
assert_eq!(

View File

@@ -15,7 +15,10 @@ fn passing_empty_input() {
let cache = &mut HashMap::new();
let client = Client::new();
assert_eq!(css::embed_css(cache, &client, "", "", false, false,), "");
assert_eq!(
css::embed_css(cache, &client, "", "", false, false, false,),
""
);
}
#[test]
@@ -37,6 +40,7 @@ height: calc(100vh - 10pt)";
&client,
"https://doesntmatter.local/",
&STYLE,
false,
true,
true,
),
@@ -67,7 +71,7 @@ line-height: -1; \
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(cache, &client, "", &STYLE, true, true,),
css::embed_css(cache, &client, "", &STYLE, false, true, true,),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
@@ -95,7 +99,7 @@ fn passing_style_block() {
html > body {}";
assert_eq!(
css::embed_css(cache, &client, "file:///", &CSS, false, true,),
css::embed_css(cache, &client, "file:///", &CSS, false, false, true,),
CSS
);
}
@@ -135,7 +139,10 @@ fn passing_attribute_selectors() {
}
";
assert_eq!(css::embed_css(cache, &client, "", &CSS, false, false,), CSS);
assert_eq!(
css::embed_css(cache, &client, "", &CSS, false, false, false,),
CSS
);
}
#[test]
@@ -158,14 +165,15 @@ fn passing_import_string() {
"https://doesntmatter.local/",
&CSS,
false,
false,
true,
),
"\
@charset 'UTF-8';\n\
\n\
@import 'data:text/css;base64,ZGF0YTp0ZXh0L2NzcyxodG1se2JhY2tncm91bmQtY29sb3I6IzAwMH0=';\n\
@import 'data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9';\n\
\n\
@import url('data:text/css;base64,ZGF0YTp0ZXh0L2NzcyxodG1se2NvbG9yOiNmZmZ9')\n\
@import url('data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==')\n\
"
);
}
@@ -192,8 +200,118 @@ body {\n \
"https://doesntmatter.local/",
&CSS,
false,
false,
true,
),
CSS
);
}
#[test]
fn passing_transform_percentages_and_degrees() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
div {\n \
transform: translate(-50%, -50%) rotate(-45deg);\n\
transform: translate(50%, 50%) rotate(45deg);\n\
transform: translate(+50%, +50%) rotate(+45deg);\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
false,
false,
true,
),
CSS
);
}
#[test]
fn passing_unusual_indents() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
.is\\:good:hover {\n \
color: green\n\
}\n\
\n\
#\\~\\!\\@\\$\\%\\^\\&\\*\\(\\)\\+\\=\\,\\.\\/\\\\\\'\\\"\\;\\:\\?\\>\\<\\[\\]\\{\\}\\|\\`\\# {\n \
color: black\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
false,
false,
true,
),
CSS
);
}
#[test]
fn passing_exclude_fonts() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
@font-face {\n \
font-family: 'My Font';\n \
src: url(my_font.woff);\n\
}\n\
\n\
#identifier {\n \
font-family: 'My Font' Arial\n\
}\n\
\n\
@font-face {\n \
font-family: 'My Font';\n \
src: url(my_font.woff);\n\
}\n\
\n\
div {\n \
font-family: 'My Font' Verdana\n\
}\n\
";
const CSS_OUT: &str = " \
\n\
\n\
#identifier {\n \
font-family: 'My Font' Arial\n\
}\n\
\n \
\n\
\n\
div {\n \
font-family: 'My Font' Verdana\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
true,
false,
true,
),
CSS_OUT
);
}

View File

@@ -0,0 +1,26 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::html;
use reqwest::blocking::Client;
use std::collections::HashMap;
#[test]
fn replace_with_empty_images() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small.png 1x, large.png 2x";
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, true, true);
assert_eq!(
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),
embedded_css
);
}
}

View File

@@ -0,0 +1,92 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::html;
#[test]
fn empty_input_sha256() {
assert!(html::has_proper_integrity(
"".as_bytes(),
"sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="
));
}
#[test]
fn sha256() {
assert!(html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha256-9EWAHgy4mSYsm54hmDaIDXPKLRsLnBX7lZyQ6xISNOM="
));
}
#[test]
fn sha384() {
assert!(html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha384-gc9l7omltke8C33bedgh15E12M7RrAQa5t63Yb8APlpe7ZhiqV23+oqiulSJl3Kw"
));
}
#[test]
fn sha512() {
assert!(html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha512-zG5B88cYMqcdiMi9gz0XkOFYw2BpjeYdn5V6+oFrMgSNjRpqL7EF8JEwl17ztZbK3N7I/tTwp3kxQbN1RgFBww=="
));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::html;
#[test]
fn empty_hash() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
""
));
}
#[test]
fn empty_input_empty_hash() {
assert!(!html::has_proper_integrity("".as_bytes(), ""));
}
#[test]
fn sha256() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha256-badhash"
));
}
#[test]
fn sha384() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha384-badhash"
));
}
#[test]
fn sha512() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha512-badhash"
));
}
}

View File

@@ -1,4 +1,6 @@
mod embed_srcset;
mod get_node_name;
mod has_proper_integrity;
mod is_icon;
mod stringify_document;
mod walk_and_embed_assets;

View File

@@ -19,6 +19,7 @@ fn passing_basic() {
let url = "http://localhost";
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
@@ -32,10 +33,11 @@ fn passing_basic() {
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
@@ -55,6 +57,7 @@ fn passing_ensure_no_recursive_iframe() {
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
@@ -68,10 +71,11 @@ fn passing_ensure_no_recursive_iframe() {
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
@@ -91,6 +95,7 @@ fn passing_ensure_no_recursive_frame() {
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
@@ -104,10 +109,11 @@ fn passing_ensure_no_recursive_frame() {
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
@@ -129,6 +135,7 @@ fn passing_no_css() {
let cache = &mut HashMap::new();
let opt_no_css: bool = true;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
@@ -141,10 +148,11 @@ fn passing_no_css() {
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
@@ -154,7 +162,7 @@ fn passing_no_css() {
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<link rel=\"stylesheet\" href=\"\">\
<link rel=\"stylesheet\">\
<style></style>\
</head>\
<body>\
@@ -173,6 +181,7 @@ fn passing_no_images() {
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = true;
@@ -186,10 +195,11 @@ fn passing_no_images() {
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
@@ -200,7 +210,7 @@ fn passing_no_images() {
format!(
"<html>\
<head>\
<link rel=\"icon\" href=\"\">\
<link rel=\"icon\">\
</head>\
<body>\
<div>\
@@ -221,6 +231,7 @@ fn passing_no_body_background_images() {
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = true;
@@ -234,10 +245,11 @@ fn passing_no_body_background_images() {
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
@@ -257,6 +269,7 @@ fn passing_no_frames() {
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
@@ -269,10 +282,11 @@ fn passing_no_frames() {
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
@@ -292,6 +306,7 @@ fn passing_no_iframes() {
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
@@ -304,10 +319,11 @@ fn passing_no_iframes() {
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
@@ -330,6 +346,7 @@ fn passing_no_js() {
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = true;
let opt_no_images: bool = false;
@@ -343,10 +360,11 @@ fn passing_no_js() {
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
@@ -354,7 +372,7 @@ fn passing_no_js() {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><script src=\"\"></script>\
"<html><head></head><body><div><script></script>\
<script></script></div></body></html>"
);
}
@@ -369,6 +387,7 @@ fn passing_with_no_integrity() {
let cache = &mut HashMap::new();
let client = Client::new();
let opt_no_css: bool = true;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
@@ -380,10 +399,11 @@ fn passing_with_no_integrity() {
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
@@ -392,7 +412,7 @@ fn passing_with_no_integrity() {
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head><title>No integrity</title><link rel=\"something\"><script src=\"\"></script></head>\
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
<body></body>\
</html>"
);

View File

@@ -30,3 +30,19 @@ fn passing_removes_empty_query_and_empty_fragment() {
"https://somewhere.com/font.eot"
);
}
#[test]
fn passing_removes_empty_query_amp_and_empty_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot?a=b&#"),
"https://somewhere.com/font.eot?a=b"
);
}
#[test]
fn passing_keeps_credentials() {
assert_eq!(
utils::clean_url("https://cookie:monster@gibson.internet/"),
"https://cookie:monster@gibson.internet/"
);
}

View File

@@ -9,42 +9,76 @@ use crate::utils;
#[test]
fn passing_parse_text_html_base64() {
let (media_type, data) = utils::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
assert_eq!(media_type, "text/html");
assert_eq!(
utils::data_url_to_text("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="),
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_html_utf8() {
let (media_type, data) = utils::data_url_to_data(
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
);
assert_eq!(media_type, "text/html");
assert_eq!(
utils::data_url_to_text(
"data:text/html;utf8,Work expands so as to fill the time available for its completion"
),
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_html_plaintext() {
let (media_type, data) = utils::data_url_to_data(
"data:text/html,Work expands so as to fill the time available for its completion",
);
assert_eq!(media_type, "text/html");
assert_eq!(
utils::data_url_to_text(
"data:text/html,Work expands so as to fill the time available for its completion"
),
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() {
let (media_type, data) = utils::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
assert_eq!(media_type, "text/html");
assert_eq!(
utils::data_url_to_text(
" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion "
),
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_css_url_encoded() {
let (media_type, data) = utils::data_url_to_data("data:text/css,div{background-color:%23000}");
assert_eq!(media_type, "text/css");
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
}
#[test]
fn passing_parse_no_media_type_base64() {
let (media_type, data) = utils::data_url_to_data("data:;base64,dGVzdA==");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "test");
}
#[test]
fn passing_parse_no_media_type_no_encoding() {
let (media_type, data) = utils::data_url_to_data("data:;,test%20test");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "test test");
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
@@ -54,5 +88,8 @@ fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() {
#[test]
fn failing_just_word_data() {
assert_eq!(utils::data_url_to_text("data"), "");
let (media_type, data) = utils::data_url_to_data("data");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "");
}

View File

@@ -1,5 +1,3 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
@@ -7,20 +5,35 @@ use crate::utils;
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_decode_unicode_characters() {
assert_eq!(
utils::decode_url(str!(
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
)),
"検ヒム解塗ゃッ = サ"
);
}
#[cfg(test)]
mod passing {
use crate::utils;
#[test]
fn passing_decode_file_url() {
assert_eq!(
utils::decode_url(str!("file:///tmp/space%20here/test%231.html")),
"file:///tmp/space here/test#1.html"
);
#[test]
fn decode_unicode_characters() {
assert_eq!(
utils::decode_url(str!(
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
)),
"検ヒム解塗ゃッ = サ"
);
}
#[test]
fn decode_file_url() {
assert_eq!(
utils::decode_url(str!("file:///tmp/space%20here/test%231.html")),
"file:///tmp/space here/test#1.html"
);
}
#[test]
fn plus_sign() {
assert_eq!(
utils::decode_url(str!(
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
)),
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
);
}
}

View File

@@ -21,3 +21,18 @@ fn passing_remove_protocl_and_fragment() {
);
}
}
#[test]
fn passing_decodes_urls() {
if cfg!(windows) {
assert_eq!(
utils::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
"C:\\Documents and Settings\\some-file.html"
);
} else {
assert_eq!(
utils::file_url_to_fs_path("file:///home/user/My%20Documents"),
"/home/user/My Documents"
);
}
}

View File

@@ -1,6 +1,6 @@
mod clean_url;
mod data_to_data_url;
mod data_url_to_text;
mod data_url_to_data;
mod decode_url;
mod detect_media_type;
mod file_url_to_fs_path;

View File

@@ -17,38 +17,23 @@ fn passing_read_data_url() {
// If both source and target are data URLs,
// ensure the result contains target data URL
let (retrieved_data, final_url) = utils::retrieve_asset(
let (data, final_url, media_type) = utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"data:text/html;base64,TaRgEt",
true,
"",
"data:text/html;base64,c291cmNl",
"data:text/html;base64,dGFyZ2V0",
false,
)
.unwrap();
assert_eq!(&retrieved_data, "data:text/html;base64,TaRgEt");
assert_eq!(&final_url, "data:text/html;base64,TaRgEt");
}
#[test]
fn passing_read_data_url_ignore_suggested_media_type() {
let cache = &mut HashMap::new();
let client = Client::new();
// Media type parameter should not influence data URLs
let (data, final_url) = utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"data:text/html;base64,TaRgEt",
true,
"image/png",
false,
)
.unwrap();
assert_eq!(&data, "data:text/html;base64,TaRgEt");
assert_eq!(&final_url, "data:text/html;base64,TaRgEt");
assert_eq!(
utils::data_to_data_url(&media_type, &data, &final_url, ""),
utils::data_to_data_url("text/html", "target".as_bytes(), "", "")
);
assert_eq!(
final_url,
utils::data_to_data_url("text/html", "target".as_bytes(), "", "")
);
assert_eq!(&media_type, "text/html");
}
#[test]
@@ -60,29 +45,27 @@ fn passing_read_local_file_with_file_url_parent() {
// Inclusion of local assets from local sources should be allowed
let cwd = env::current_dir().unwrap();
let (data, final_url) = utils::retrieve_asset(
let (data, final_url, _media_type) = utils::retrieve_asset(
cache,
&client,
&format!(
"{file}{cwd}/src/tests/data/local-file.html",
"{file}{cwd}/src/tests/data/basic/local-file.html",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
),
&format!(
"{file}{cwd}/src/tests/data/local-script.js",
"{file}{cwd}/src/tests/data/basic/local-script.js",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
),
true,
"application/javascript",
false,
)
.unwrap();
assert_eq!(&data, "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
assert_eq!(utils::data_to_data_url("application/javascript", &data, &final_url, ""), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
assert_eq!(
&final_url,
&format!(
"{file}{cwd}/src/tests/data/local-script.js",
"{file}{cwd}/src/tests/data/basic/local-script.js",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
)
@@ -102,18 +85,20 @@ fn failing_read_local_file_with_data_url_parent() {
let client = Client::new();
// Inclusion of local assets from data URL sources should not be allowed
let (data, final_url) = utils::retrieve_asset(
match utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"file:///etc/passwd",
true,
"",
false,
)
.unwrap();
assert_eq!(&data, "");
assert_eq!(&final_url, "");
) {
Ok((..)) => {
assert!(false);
}
Err(_) => {
assert!(true);
}
}
}
#[test]
@@ -122,16 +107,18 @@ fn failing_read_local_file_with_https_parent() {
let client = Client::new();
// Inclusion of local assets from remote sources should not be allowed
let (data, final_url) = utils::retrieve_asset(
match utils::retrieve_asset(
cache,
&client,
"https://kernel.org/",
"file:///etc/passwd",
true,
"",
false,
)
.unwrap();
assert_eq!(&data, "");
assert_eq!(&final_url, "");
) {
Ok((..)) => {
assert!(false);
}
Err(_) => {
assert!(true);
}
}
}

View File

@@ -30,6 +30,14 @@ const MAGIC: [[&[u8]; 2]; 18] = [
[b"\x1A\x45\xDF\xA3", b"video/webm"],
];
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
"image/svg+xml",
"text/css",
"text/html",
"text/javascript",
"text/plain",
];
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String {
let media_type: String = if media_type.is_empty() {
detect_media_type(data, &url)
@@ -88,6 +96,10 @@ pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
.unwrap_or(false)
}
pub fn is_plaintext_media_type(media_type: &str) -> bool {
PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
}
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
let result = if is_http_url(to.as_ref()) {
to.as_ref().to_string()
@@ -108,65 +120,68 @@ pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
}
}
pub fn clean_url<T: AsRef<str>>(url: T) -> String {
let mut result = Url::parse(url.as_ref()).unwrap();
pub fn clean_url<T: AsRef<str>>(input: T) -> String {
let mut url = Url::parse(input.as_ref()).unwrap();
// Clear fragment
result.set_fragment(None);
url.set_fragment(None);
// Get rid of stray question mark
if result.query() == Some("") {
result.set_query(None);
if url.query() == Some("") {
url.set_query(None);
}
result.to_string()
// Remove empty trailing ampersand(s)
let mut result: String = url.to_string();
while result.ends_with("&") {
result.pop();
}
result
}
pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String {
let parsed_url = Url::parse(url.as_ref()).unwrap_or(Url::parse("http://[::1]").unwrap());
pub fn data_url_to_data<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
let path: String = parsed_url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len());
if comma_loc == path.len() {
return str!();
}
let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
let data: String = decode_url(raw_data);
let text: String = decode_url(raw_data);
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut media_type: &str = "";
let mut media_type: String = str!();
let mut encoding: &str = "";
let mut i: i8 = 0;
for item in &meta_data_items {
if i == 0 {
if item.eq_ignore_ascii_case("text/html") {
media_type = item;
continue;
media_type = str!(item);
} else {
if item.eq_ignore_ascii_case("base64")
|| item.eq_ignore_ascii_case("utf8")
|| item.eq_ignore_ascii_case("charset=UTF-8")
{
encoding = item;
}
}
if item.eq_ignore_ascii_case("base64") || item.eq_ignore_ascii_case("utf8") {
encoding = item;
}
i = i + 1;
}
if media_type.eq_ignore_ascii_case("text/html") {
if encoding.eq_ignore_ascii_case("base64") {
String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
} else {
data
}
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
base64::decode(&text).unwrap_or(vec![])
} else {
str!()
}
text.as_bytes().to_vec()
};
(media_type, data)
}
pub fn decode_url(input: String) -> String {
let input: String = input.replace("+", "%2B");
form_urlencoded::parse(input.as_bytes())
.map(|(key, val)| {
[
@@ -200,66 +215,61 @@ pub fn file_url_to_fs_path(url: &str) -> String {
fs_file_path = fs_file_path.replace("/", "\\");
}
fs_file_path
// File paths should not be %-encoded
decode_url(fs_file_path)
}
pub fn retrieve_asset(
cache: &mut HashMap<String, String>,
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &str,
url: &str,
as_data_url: bool,
media_type: &str,
opt_silent: bool,
) -> Result<(String, String), reqwest::Error> {
) -> Result<(Vec<u8>, String, String), reqwest::Error> {
if url.len() == 0 {
return Ok((str!(), str!()));
// Provoke error
client.get("").send()?;
}
let cache_key = clean_url(&url);
if is_data_url(&url) {
Ok((url.to_string(), url.to_string()))
let (media_type, data) = data_url_to_data(url);
Ok((data, url.to_string(), media_type))
} else if is_file_url(&url) {
// Check if parent_url is also file:///
// (if not, then we don't embed the asset)
if !is_file_url(&parent_url) {
return Ok((str!(), str!()));
// Provoke error
client.get("").send()?;
}
let fs_file_path: String = file_url_to_fs_path(url);
let path = Path::new(&fs_file_path);
let url_fragment = get_url_fragment(url);
if path.exists() {
if !opt_silent {
eprintln!("{}", &url);
}
if as_data_url {
let data_url: String = data_to_data_url(
&media_type,
&fs::read(&fs_file_path).unwrap(),
&fs_file_path,
&url_fragment,
);
Ok((data_url, url.to_string()))
} else {
let data: String = fs::read_to_string(&fs_file_path).expect(url);
Ok((data, url.to_string()))
}
Ok((fs::read(&fs_file_path).expect(""), url.to_string(), str!()))
} else {
Ok((str!(), url.to_string()))
// Provoke error
Err(client.get("").send().unwrap_err())
}
} else {
let cache_key: String = clean_url(&url);
if cache.contains_key(&cache_key) {
// URL is in cache
// URL is in cache, we get and return it
if !opt_silent {
eprintln!("{} (from cache)", &url);
}
let data = cache.get(&cache_key).unwrap();
Ok((data.to_string(), url.to_string()))
Ok((
cache.get(&cache_key).unwrap().to_vec(),
url.to_string(),
str!(),
))
} else {
// URL not in cache, we request it
// URL not in cache, we retrieve the file
let mut response = client.get(url).send()?;
let res_url = response.url().to_string();
@@ -271,34 +281,23 @@ pub fn retrieve_asset(
}
}
let new_cache_key = clean_url(&res_url);
let new_cache_key: String = clean_url(&res_url);
if as_data_url {
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
// Attempt to obtain media type by reading the Content-Type header
let media_type = if media_type == "" {
response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
.unwrap_or(&media_type)
} else {
media_type
};
let url_fragment = get_url_fragment(url);
let data_url = data_to_data_url(&media_type, &data, url, &url_fragment);
// Add to cache
cache.insert(new_cache_key, data_url.clone());
Ok((data_url, res_url))
} else {
let content = response.text().unwrap();
// Add to cache
cache.insert(new_cache_key, content.clone());
Ok((content, res_url))
}
// Attempt to obtain media type by reading the Content-Type header
let media_type = response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
.unwrap_or("");
// Add to cache
cache.insert(new_cache_key, data.clone());
Ok((data, res_url, media_type.to_string()))
}
}
}