148 Commits

Author SHA1 Message Date
Sunshine
4a5f85ba97 add support for parsing multiple link type (rel) attribute values 2024-12-02 11:34:44 -10:00
Sunshine
2a8d5d7916 add NetBSD CI job 2024-09-25 19:15:45 -04:00
Sunshine
95645fcff4 clean up and format code 2024-09-25 19:15:45 -04:00
Sunshine
b6b358b3bc correct distro codename in apt sources list 2024-09-05 13:56:49 -10:00
Sunshine
8b0635bd84 bump version number (2.8.2 -> 2.8.3) 2024-09-05 19:00:24 -04:00
Sunshine
b685d3a46c update dependencies 2024-09-05 19:00:24 -04:00
Sunshine
82d05fc0f1 only send Referer HTTP header if the parent URL's scheme is http(s) 2024-09-05 18:38:52 -04:00
Orhun Parmaksız
65ac5c36b1 Support disabling vendored OpenSSL while building 2024-09-05 17:36:13 -04:00
Sunshine
1f60a76fcd fix version of shopify/upload-to-release 2024-09-02 18:11:48 -10:00
Sunshine
f067fc2324 bump version number (2.8.1 -> 2.8.2) 2024-09-03 00:01:37 -04:00
Sunshine
49d7585e02 get rid of useless docs 2024-09-02 23:50:09 -04:00
Caleb Hattingh
e25b7bc470 Link openssl statically - fixes #340 2024-09-02 23:40:15 -04:00
Sunshine
329c0568a4 update README.md 2024-08-19 07:21:10 -10:00
Andriy Rakhnin
f151a33c48 update authors 2024-08-16 10:52:58 -04:00
Andriy Rakhnin
ebf96bf1e5 referer_url refactoring 2024-08-16 10:52:58 -04:00
Andriy Rakhnin
c10c78a27d add HTTP referer header for resource requests 2024-08-16 10:52:58 -04:00
Andriy Rakhnin
64e84e4983 couple of cosmetic changes 2024-08-15 14:14:51 -04:00
Andriy Rakhnin
674d4085c7 fix srcset attribute parsing according to the specifications 2024-08-15 14:14:51 -04:00
Bryan Honof
e0fd5d4bb9 Update README.md 2024-07-30 14:54:01 -04:00
Bryan Honof
f4e360f09d Add flox installation option 2024-07-30 14:54:01 -04:00
Sunshine
0b4116f48a bump version in nuget manifest file 2024-06-24 12:06:22 -04:00
Orhun Parmaksız
084981a2ae Update instructions for installing on Arch Linux 2024-06-13 14:20:15 -04:00
Sunshine
a3feb7b721 update dependencies 2024-05-17 11:05:35 -04:00
Sunshine
87eb197e33 do not indent links based on depth in the output 2024-05-17 10:47:25 -04:00
Viktor Szépe
6798cad2b2 Fix typos 2024-03-29 06:24:14 -10:00
Waldir Pimenta
174cb50877 Add installation instructions for NixPkgs 2024-03-27 01:32:17 -10:00
Sunshine
e397a7532d Update README.md 2024-03-26 03:00:12 -10:00
Sunshine
91d8c146a9 Update README.md 2024-03-26 02:52:59 -10:00
Sunshine
67e07b91af provide more installation instructions 2024-03-26 02:51:44 -10:00
Sunshine
f797b8c999 get rid of outdated related projects 2024-03-26 02:05:07 -10:00
Sunshine
60251c6878 switch to new builder for Docker 2024-03-25 03:44:15 -10:00
Sunshine
b70801d55b ignore /target/ when building Docker image 2024-03-25 02:55:06 -10:00
Sunshine
2a50936990 update one of the author's email 2024-03-25 02:32:32 -10:00
Fred Weitendorf
f9e961f088 Add cargo setup to README
I did not have cargo installed on my system when I first tried to build the repo from a fresh git clone (I had not noticed it was a rust project), and it wasn't called out as a dependency for installation from source. I've added it as a noted dependency with some quick, collapsible installation instructions I followed to get it installed.
2024-03-25 01:33:23 -10:00
Thomas Kraxner
d8c3620d00 Scoop as install source 2024-03-25 01:30:17 -10:00
Thomas Merz
4aab0a64ee 🩹 fix README to prevent 'ERROR: failed to solve: invalid reference format: repository name must be lowercase'
and 'docker: invalid reference format: repository name must be lowercase.'
2024-03-25 01:06:43 -10:00
Sunshine
a2155e0af6 switch Rust edition from 2018 to 2021 2024-01-14 07:04:32 -10:00
Sunshine
f7e5527432 upgrade html5ever to 0.26.0 2024-01-14 06:41:46 -10:00
Sunshine
f7dd09d481 get rid of warnings from old version of Shopify/upload-to-release 2024-01-14 06:11:51 -10:00
Sunshine
73c0ceebd4 bump version number (2.8.0 -> 2.8.1) 2024-01-13 16:02:39 -10:00
Sunshine
727eae2e35 get rid of warnings related to new API of base64 crate 2024-01-13 15:57:26 -10:00
Sunshine
b7a38c9f4a Update README.md 2024-01-13 15:44:39 -10:00
vladislav doster
aa556094a4 fix: bump CD workflow runner to ubuntu-20.04
ubuntu-18.04 runners have been deprecated

https://docs.github.com/en/actions/using-jobs/choosing-the-runner-for-a-job#choosing-github-hosted-runners
2024-01-13 15:10:23 -10:00
Sunshine
81b304c558 optimize code 2024-01-13 05:33:54 -10:00
Sunshine
a3e82a2ad8 update dependencies 2024-01-13 05:33:54 -10:00
Sunshine
a5bf1705db bump version number (2.7.0 -> 2.8.0) 2024-01-13 05:33:54 -10:00
Sunshine
78c37958dc add support for using cookie file 2024-01-13 04:50:51 -10:00
Sunshine
20c56a5440 Create FUNDING.yml 2023-09-23 03:29:40 -10:00
Sunshine
37416f827b add installation instruction for Chocolatey 2022-12-05 10:11:24 -10:00
Sunshine
7f123e810b add installation instructions for Arch Linux and Alpine Linux 2022-12-05 10:11:24 -10:00
Sunshine
db04d11d99 add installation instructions for GUIX 2022-11-23 16:39:15 -10:00
Sunshine
1c8d4f1830 Update README.md 2022-11-13 11:16:28 -05:00
Sunshine
1c71e708e1 bump version number (2.6.2 -> 2.7.0), update dependencies 2022-11-10 06:36:32 -10:00
Sunshine
a1bb9a4b74 Update README.md 2022-11-10 06:03:30 -10:00
Sunshine
cf7e368545 Update README.md 2022-11-10 06:02:46 -10:00
Sunshine
c1edde9b3e refine CLI API for white/black-listing of domains 2022-11-10 05:37:36 -10:00
Sunshine
7c0504c4cb Update README.md 2022-11-10 04:45:37 -10:00
Sunshine
1bff2c22ba Update README.md 2022-11-10 04:41:13 -10:00
Sunshine
8113509dcf fix tests 2022-11-10 04:12:31 -10:00
Sunshine
8fc0fc155f parse XML documents, save non-HTML and non-XML targets unparsed 2022-11-10 04:12:31 -10:00
Jakub Jirutka
7c61b462dd disable unnecessary/unused regex features to reduce binary size
This will reduce the monolith binary size by ~15%.
2022-09-20 11:46:26 -04:00
Simone Mosciatti
ef3684025b move to use http instead of https 2022-09-11 14:30:44 -04:00
Simone Mosciatti
db7ee697b3 rewrite small part of the input argument handling
the commit rewrite a small part of the input argument handling, trying
to follow besr rust practices.
We get rid of a variable and of a mutable reference while keeping the
code a bit more coincise.
2022-09-11 14:30:44 -04:00
Sunshine
89ce5029b9 add option to blacklist/whitelist domains 2022-09-01 13:35:52 -10:00
dependabot[bot]
54609b10e5 Bump iana-time-zone from 0.1.44 to 0.1.46 (#316)
Bumps [iana-time-zone](https://github.com/strawlab/iana-time-zone) from 0.1.44 to 0.1.46.
- [Release notes](https://github.com/strawlab/iana-time-zone/releases)
- [Changelog](https://github.com/strawlab/iana-time-zone/blob/main/CHANGELOG.md)
- [Commits](https://github.com/strawlab/iana-time-zone/compare/0.1.44...v0.1.46)

---
updated-dependencies:
- dependency-name: iana-time-zone
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2022-08-31 11:35:38 -10:00
Sunshine
013d93bacc update 3rd-party dependencies and bump version number 2022-08-14 05:12:39 -10:00
Sunshine
0df8613789 Rewrite part of function retrieve_asset, include support for brotli and deflate (#312)
do not crash the app if reqwest throws, add support for deflate & brotli
2022-08-06 19:07:39 -10:00
Sunshine
68a1531a11 Update packages (#313)
update dependencies
2022-08-06 18:21:53 -10:00
Sunshine
99c3be1804 Merge pull request #308 from Y2Z/dependabot/cargo/tokio-1.16.1
Bump tokio from 1.12.0 to 1.16.1
2022-08-06 17:07:18 -10:00
Sunshine
80559e7224 Merge pull request #309 from Y2Z/dependabot/cargo/regex-1.5.5
Bump regex from 1.5.4 to 1.5.5
2022-08-06 16:56:18 -10:00
dependabot[bot]
c5c5f1ca44 Bump regex from 1.5.4 to 1.5.5
Bumps [regex](https://github.com/rust-lang/regex) from 1.5.4 to 1.5.5.
- [Release notes](https://github.com/rust-lang/regex/releases)
- [Changelog](https://github.com/rust-lang/regex/blob/master/CHANGELOG.md)
- [Commits](https://github.com/rust-lang/regex/compare/1.5.4...1.5.5)

---
updated-dependencies:
- dependency-name: regex
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-06-06 21:07:06 +00:00
dependabot[bot]
de6a13a884 Bump tokio from 1.12.0 to 1.16.1
Bumps [tokio](https://github.com/tokio-rs/tokio) from 1.12.0 to 1.16.1.
- [Release notes](https://github.com/tokio-rs/tokio/releases)
- [Commits](https://github.com/tokio-rs/tokio/compare/tokio-1.12.0...tokio-1.16.1)

---
updated-dependencies:
- dependency-name: tokio
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-06-06 19:44:19 +00:00
Sunshine
ef16355f9f Merge pull request #303 from timoteostewart/master
fix typo 'non-standart' to 'non-standard'
2022-03-17 04:21:16 -04:00
Tim Stewart
a4dc0ed9b4 fix typo 'non-standart' to 'non-standard' 2022-03-16 17:54:48 -05:00
Sunshine
cd0e366979 Merge pull request #301 from liamwarfield/patch-1
Updated monk project link
2022-02-22 15:22:33 -10:00
Liam Warfield
d4c6c458f9 Updated monk project link
The monk project has recently moved to Github! Just changing the link here to the new repo.
2022-02-22 14:17:40 -07:00
Sunshine
c9970b3a8e Merge pull request #292 from snshn/include-unsafe-eval-origin-for-isolated-documents
Include unsafe-eval origin for isolated documents
2021-12-05 20:26:44 -10:00
Sunshine
404d322b99 make tests pass for newly added 'unsafe-eval' origin addition 2021-12-05 20:16:37 -10:00
Sunshine
1b353d0b46 include unsafe-eval origin for isolated documents 2021-12-05 20:09:26 -10:00
Sunshine
f920a5e4d6 Merge pull request #290 from matildepark/patch-1
README: remove duplicate macports instructions
2021-11-10 20:33:35 -10:00
matildepark
d3ca1ecad3 README: remove duplicate macports instructions 2021-11-10 23:10:31 -05:00
Sunshine
9e057472c6 Update README.md 2021-10-20 16:21:55 -10:00
Sunshine
d453145bf8 Merge pull request #288 from snshn/update-markdown-files
Update Markdown files
2021-10-20 15:54:07 -10:00
Sunshine
8c131d649f update Markdown files 2021-10-20 15:46:08 -10:00
Sunshine
a221fdb368 Merge pull request #287 from snshn/ci-ignore-some-files
Update README files and set CI to ignore irrelevant paths
2021-10-20 15:40:43 -10:00
Sunshine
15dd82e300 update README files, set CI to ignore irrelevant paths 2021-10-20 15:31:54 -10:00
Sunshine
de492caaa5 Merge pull request #286 from snshn/move-test-data
Move test data files under _data_
2021-10-17 22:51:22 -10:00
Sunshine
9096447c70 move test data files under _data_ 2021-10-17 22:46:06 -10:00
Sunshine
354340db86 Merge pull request #285 from snshn/use-percent-encoding-crate
Offload percent decoding to percent-encoding crate
2021-10-17 22:32:10 -10:00
Sunshine
900dd8d163 offload percent decoding to percent-encoding crate 2021-10-17 22:26:11 -10:00
Sunshine
a11c4496b0 Merge pull request #284 from snshn/move-tests-to-upper-level
Get rid of macros, move tests out of src
2021-10-16 21:39:53 -10:00
Sunshine
dd33b16876 Merge pull request #283 from snshn/formatting
Format README.md and annotate workflows
2021-10-16 21:16:53 -10:00
Sunshine
2cc1870033 get rid of macros, move tests out of src 2021-10-16 21:16:37 -10:00
Sunshine
d41e6c041b format README.md and annotate workflows 2021-10-16 18:48:32 -10:00
Sunshine
460a461373 Update README.md 2021-07-14 00:09:41 -10:00
Sunshine
1e6e87b6aa Merge pull request #277 from Oliver-Hanikel/master
Reduce size of Docker image
2021-07-11 11:45:18 -10:00
Oliver Hanikel
54094270b3 Update run-in-container.sh 2021-07-11 20:07:48 +02:00
Oliver Hanikel
e6cf367e23 reduce size of docker image 2021-07-11 20:00:39 +02:00
Sunshine
e8437ecb28 Update README.md 2021-07-10 16:41:30 -10:00
Sunshine
543bebbd8d Merge pull request #275 from snshn/improve-readme-code-snippets
Remove dollar signs from code snippets
2021-07-10 16:40:20 -10:00
Sunshine
dc6c0200bc remove dollar sign from code snippets 2021-07-10 16:32:56 -10:00
Sunshine
04bdb3072f Update README.md 2021-07-08 13:14:37 -10:00
Sunshine
a9228f0522 Merge pull request #274 from snshn/arm64-cd-job
Downgrade AArch64 CD job from Ubuntu 20.04 to Ubuntu 18.04
2021-07-06 15:29:55 -10:00
Sunshine
aae68c4c82 downgrade AArch64 CD job from Ubuntu 20.04 to Ubuntu 18.04 2021-07-06 14:41:56 -10:00
Sunshine
dd23826205 Merge pull request #273 from herbygillot/patch-1
README: add MacPorts install instructions
2021-07-04 21:16:18 -10:00
Herby Gillot
781f4cd3b5 README: add MacPorts install instructions 2021-07-05 03:07:55 -04:00
Sunshine
6826b59ab9 Merge pull request #272 from snshn/new-release
New release (2.6.1)
2021-07-03 19:39:32 -10:00
Sunshine
2be725eeb5 bump version number (2.6.0 -> 2.6.1) 2021-07-03 19:33:09 -10:00
Sunshine
dd2e9ca2e5 update crates 2021-07-03 19:31:55 -10:00
Sunshine
50bccae476 Merge pull request #267 from snshn/aarch64-binary
Add GNU/Linux AArch64 CD job
2021-07-03 00:15:04 -10:00
Sunshine
b3bcb1d85b add GNU/Linux AArch64 CD job 2021-07-03 00:10:14 -10:00
Sunshine
c58d044459 Merge pull request #271 from snshn/fix-charset-detection-mechanism
Fix charset detection logic
2021-07-02 21:47:56 -10:00
Sunshine
eeaea0df16 fix use of wrong charset 2021-07-02 21:35:06 -10:00
Sunshine
2539aac4c0 Merge pull request #265 from snshn/version-bump
Bump version (2.5.0 -> 2.6.0)
2021-06-08 13:16:40 -10:00
Sunshine
03b9af543a bump version (2.5.0 -> 2.6.0) 2021-06-08 13:09:50 -10:00
Sunshine
1bb8141021 Merge pull request #264 from snshn/fixes
Fixes
2021-06-08 13:04:57 -10:00
Sunshine
4bc8043f0f account for charset when creating data URLs 2021-06-08 12:54:16 -10:00
Sunshine
5effa38392 use proper charset detection for linked assets 2021-06-08 12:25:19 -10:00
Sunshine
125aeeec3b improve validation of charset found in HTML, use genuinely infinite timeout 2021-06-08 11:50:46 -10:00
Sunshine
c938ba6a2f modify proper attribute for (i)frame elements 2021-06-08 04:49:14 -10:00
Sunshine
f354affc36 Merge pull request #263 from snshn/save-with-custom-charset
Add option for saving document using custom encoding
2021-06-08 04:15:49 -10:00
Sunshine
7686b2ea64 avoid excessive parsing of HTML into DOM 2021-06-08 03:57:28 -10:00
Sunshine
b29b9a6a7c add option for saving document using custom encoding 2021-06-08 03:39:27 -10:00
Sunshine
cbda57cfa8 Merge pull request #262 from snshn/support-more-encodings
Add support for wider range of charsets
2021-06-08 02:39:24 -10:00
Sunshine
b8aa545e8c add support for wider range of charsets 2021-06-08 02:30:15 -10:00
Sunshine
22a031af5d Merge pull request #256 from snshn/more-tests-fixes-and-improvements
More tests, fixes, improvements
2021-06-02 04:06:37 -10:00
Sunshine
6e6a60b305 Merge branch 'master' into more-tests-fixes-and-improvements 2021-06-02 04:01:41 -10:00
Sunshine
77d6022d84 bump version (2.4.1 -> 2.5.0) 2021-06-02 04:00:18 -10:00
Sunshine
5db19d1a3e update dependencies 2021-06-02 03:58:28 -10:00
Sunshine
a6e891b3c5 add more tests 2021-06-02 03:41:41 -10:00
Sunshine
d7a82a008b Merge pull request #260 from snshn/ie-css-hack-fix
Remove optional trailing space from CSS idents
2021-05-28 23:04:34 -10:00
Sunshine
2369a4dd3c remove optional trailing space from CSS idents 2021-05-28 12:03:19 -10:00
Sunshine
d27e53fb36 Merge pull request #259 from snshn/related-project-monk
Add Monk to related projects in README.md
2021-05-24 10:54:11 -10:00
Sunshine
2cb51477d2 add Monk to related projects in README.md 2021-05-24 01:47:19 -10:00
Sunshine
a308a20411 simplify code of CLI tests 2021-03-15 20:10:50 -10:00
Sunshine
a6ddf1c13a simplify code responsible for processing CSS 2021-03-14 19:42:57 -10:00
Sunshine
8256d17efd Merge pull request #253 from snshn/unwrap-noscript
Make possible to unwrap NOSCRIPT nodes
2021-03-11 22:43:28 -10:00
Sunshine
efa12935ba Merge pull request #254 from snshn/no-containers-md
Get rid of containers.md (now part of README.md)
2021-03-11 22:39:49 -10:00
Sunshine
7126a98023 Merge pull request #255 from snshn/pkgsrc
Add installation instructions using pkgsrc
2021-03-11 22:38:33 -10:00
Sunshine
c7ee3ec6e2 get rid of containers.md (now part of README.md) 2021-03-11 22:27:44 -10:00
Sunshine
c4218031e2 add installation instructions using pkgsrc 2021-03-11 22:26:32 -10:00
Sunshine
6f918f6c1c make possible to unwrap NOSCRIPT nodes 2021-03-11 18:18:39 -10:00
Sunshine
6ecda080e8 Merge pull request #252 from snshn/revamp
Revamp codebase
2021-03-11 14:25:10 -10:00
Sunshine
2e86ee67a5 revamp codebase 2021-03-11 14:15:18 -10:00
Sunshine
359616b901 Update README.md 2021-03-09 16:04:32 -10:00
Sunshine
ea2cdab330 Update README.md 2021-03-09 15:52:23 -10:00
Sunshine
4434823c46 Update README.md 2021-03-09 14:49:10 -10:00
Sunshine
e0a78ffc9d Update README.md 2021-03-09 13:31:15 -10:00
117 changed files with 5875 additions and 4265 deletions

View File

@@ -1 +0,0 @@
docs/arch

1
.dockerignore Normal file
View File

@@ -0,0 +1 @@
/target/

3
.github/FUNDING.yml vendored Normal file
View File

@@ -0,0 +1,3 @@
# These are supported funding model platforms
github: snshn

View File

@@ -3,6 +3,15 @@ name: GNU/Linux
on:
push:
branches: [ master ]
paths-ignore:
- 'assets/'
- 'dist/'
- 'snap/'
- 'Dockerfile'
- 'LICENSE'
- 'Makefile'
- 'monolith.nuspec'
- 'README.md'
jobs:
build:
@@ -17,6 +26,8 @@ jobs:
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- name: Build
run: cargo build --all --locked --verbose

View File

@@ -3,6 +3,15 @@ name: macOS
on:
push:
branches: [ master ]
paths-ignore:
- 'assets/'
- 'dist/'
- 'snap/'
- 'Dockerfile'
- 'LICENSE'
- 'Makefile'
- 'monolith.nuspec'
- 'README.md'
jobs:
build:
@@ -17,6 +26,8 @@ jobs:
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- name: Build
run: cargo build --all --locked --verbose

View File

@@ -3,6 +3,15 @@ name: Windows
on:
push:
branches: [ master ]
paths-ignore:
- 'assets/'
- 'dist/'
- 'snap/'
- 'Dockerfile'
- 'LICENSE'
- 'Makefile'
- 'monolith.nuspec'
- 'README.md'
jobs:
build:
@@ -17,6 +26,8 @@ jobs:
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- name: Build
run: cargo build --all --locked --verbose

View File

@@ -1,4 +1,4 @@
# CD GitHub Actions workflow for Monolith
# CD GitHub Actions workflow for monolith
name: CD
@@ -13,54 +13,95 @@ jobs:
runs-on: windows-2019
steps:
- run: git config --global core.autocrlf false
- name: Checkout the repository
uses: actions/checkout@v2
- name: Build the executable
run: cargo build --release
- uses: Shopify/upload-to-release@1.0.0
- uses: Shopify/upload-to-release@v2.0.0
with:
name: monolith.exe
path: target\release\monolith.exe
repo-token: ${{ secrets.GITHUB_TOKEN }}
gnu_linux_armhf:
runs-on: ubuntu-18.04
runs-on: ubuntu-20.04
steps:
- name: Checkout the repository
uses: actions/checkout@v2
- name: Prepare cross-platform environment
run: |
sudo mkdir -p /cross-build-arm
sudo mkdir /cross-build
sudo touch /etc/apt/sources.list.d/armhf.list
echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ bionic main" | sudo tee -a /etc/apt/sources.list.d/armhf.list
echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ focal main" | sudo tee -a /etc/apt/sources.list.d/armhf.list
sudo apt-get update
sudo apt-get install -y gcc-arm-linux-gnueabihf libc6-armhf-cross libc6-dev-armhf-cross
sudo apt-get download libssl1.1:armhf libssl-dev:armhf
sudo dpkg -x libssl1.1*.deb /cross-build-arm
sudo dpkg -x libssl-dev*.deb /cross-build-arm
sudo dpkg -x libssl1.1*.deb /cross-build
sudo dpkg -x libssl-dev*.deb /cross-build
rustup target add arm-unknown-linux-gnueabihf
echo "C_INCLUDE_PATH=/cross-build-arm/usr/include" >> $GITHUB_ENV
echo "OPENSSL_INCLUDE_DIR=/cross-build-arm/usr/include/arm-linux-gnueabihf" >> $GITHUB_ENV
echo "OPENSSL_LIB_DIR=/cross-build-arm/usr/lib/arm-linux-gnueabihf" >> $GITHUB_ENV
echo "C_INCLUDE_PATH=/cross-build/usr/include" >> $GITHUB_ENV
echo "OPENSSL_INCLUDE_DIR=/cross-build/usr/include/arm-linux-gnueabihf" >> $GITHUB_ENV
echo "OPENSSL_LIB_DIR=/cross-build/usr/lib/arm-linux-gnueabihf" >> $GITHUB_ENV
echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV
echo "RUSTFLAGS=-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build-arm/usr/lib/arm-linux-gnueabihf -L/cross-build-arm/lib/arm-linux-gnueabihf" >> $GITHUB_ENV
echo "RUSTFLAGS=-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build/usr/lib/arm-linux-gnueabihf -L/cross-build/lib/arm-linux-gnueabihf" >> $GITHUB_ENV
- name: Build the executable
run: cargo build --release --target=arm-unknown-linux-gnueabihf
- name: Attach artifact to the release
uses: Shopify/upload-to-release@1.0.0
uses: Shopify/upload-to-release@v2.0.0
with:
name: monolith-gnu-linux-armhf
path: target/arm-unknown-linux-gnueabihf/release/monolith
repo-token: ${{ secrets.GITHUB_TOKEN }}
gnu_linux_x86_64:
runs-on: ubuntu-18.04
gnu_linux_aarch64:
runs-on: ubuntu-20.04
steps:
- name: Checkout the repository
uses: actions/checkout@v2
- name: Prepare cross-platform environment
run: |
sudo mkdir /cross-build
sudo touch /etc/apt/sources.list.d/arm64.list
echo "deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ focal main" | sudo tee -a /etc/apt/sources.list.d/arm64.list
sudo apt-get update
sudo apt-get install -y gcc-aarch64-linux-gnu libc6-arm64-cross libc6-dev-arm64-cross
sudo apt-get download libssl1.1:arm64 libssl-dev:arm64
sudo dpkg -x libssl1.1*.deb /cross-build
sudo dpkg -x libssl-dev*.deb /cross-build
rustup target add aarch64-unknown-linux-gnu
echo "C_INCLUDE_PATH=/cross-build/usr/include" >> $GITHUB_ENV
echo "OPENSSL_INCLUDE_DIR=/cross-build/usr/include/aarch64-linux-gnu" >> $GITHUB_ENV
echo "OPENSSL_LIB_DIR=/cross-build/usr/lib/aarch64-linux-gnu" >> $GITHUB_ENV
echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV
echo "RUSTFLAGS=-C linker=aarch64-linux-gnu-gcc -L/usr/aarch64-linux-gnu/lib -L/cross-build/usr/lib/aarch64-linux-gnu" >> $GITHUB_ENV
- name: Build the executable
run: cargo build --release --target=aarch64-unknown-linux-gnu
- name: Attach artifact to the release
uses: Shopify/upload-to-release@v2.0.0
with:
name: monolith-gnu-linux-aarch64
path: target/aarch64-unknown-linux-gnu/release/monolith
repo-token: ${{ secrets.GITHUB_TOKEN }}
gnu_linux_x86_64:
runs-on: ubuntu-20.04
steps:
- name: Checkout the repository
uses: actions/checkout@v2
- name: Build the executable
run: cargo build --release
- uses: Shopify/upload-to-release@1.0.0
- uses: Shopify/upload-to-release@v2.0.0
with:
name: monolith-gnu-linux-x86_64
path: target/release/monolith

34
.github/workflows/ci-netbsd.yml vendored Normal file
View File

@@ -0,0 +1,34 @@
# CI NetBSD GitHub Actions workflow for monolith
name: CI
on:
pull_request:
branches: [ master ]
paths-ignore:
- 'assets/'
- 'dist/'
- 'snap/'
- 'Dockerfile'
- 'LICENSE'
- 'Makefile'
- 'monolith.nuspec'
- 'README.md'
jobs:
build_and_test:
runs-on: ubuntu-latest
name: Build and test (netbsd)
steps:
- name: "Checkout repository"
uses: actions/checkout@v4
- name: Test in NetBSD
uses: vmactions/netbsd-vm@v1
with:
usesh: true
prepare: |
/usr/sbin/pkg_add rust mktools gmake pkgconf cwrappers
run: |
cargo build --all --locked --verbose --no-default-features
cargo test --all --locked --verbose --no-default-features

View File

@@ -1,31 +1,42 @@
# CI GitHub Actions workflow for monolith
name: CI
on:
pull_request:
branches: [ master ]
paths-ignore:
- 'assets/'
- 'dist/'
- 'snap/'
- 'Dockerfile'
- 'LICENSE'
- 'Makefile'
- 'monolith.nuspec'
- 'README.md'
jobs:
build_and_test:
name: Build and test
strategy:
matrix:
os:
- ubuntu-latest
- macos-latest
- windows-latest
rust:
- stable
- beta
- nightly
runs-on: ${{ matrix.os }}
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- name: "Checkout repository"
uses: actions/checkout@v4
- name: Build
run: cargo build --all --locked --verbose
- name: Run tests
run: cargo test --all --locked --verbose
- name: Check code formatting
run: |
rustup component add rustfmt

2138
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,14 +1,15 @@
[package]
name = "monolith"
version = "2.4.1"
version = "2.8.3"
authors = [
"Sunshine <sunshine@uberspace.net>",
"Sunshine <snshn@tutanota.com>",
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
"Emmanuel Delaborde <th3rac25@gmail.com>",
"Emi Simpson <emi@alchemi.dev>",
"rhysd <lin90162@yahoo.co.jp>",
"Andriy Rakhnin <a@rakhnin.com>",
]
edition = "2018"
edition = "2021"
description = "CLI tool for saving web pages as a single HTML file"
homepage = "https://github.com/Y2Z/monolith"
repository = "https://github.com/Y2Z/monolith"
@@ -21,22 +22,36 @@ include = [
]
license = "CC0-1.0"
[dependencies]
atty = "0.2" # Used for highlighting network errors
base64 = "0.13.0"
chrono = "0.4.19" # Used for formatting creation timestamp
clap = "2.33.3"
cssparser = "0.28.1"
html5ever = "0.24.1"
regex = "1.4.3" # Used for parsing srcset
sha2 = "0.9.2" # Used for calculating checksums during integrity checks
url = "2.2.0"
[features]
default = ["vendored-openssl"]
# Compile and statically link a copy of OpenSSL.
vendored-openssl = ["openssl/vendored"]
[dependencies.reqwest]
version = "0.11.0"
[dependencies]
atty = "0.2.14" # Used for highlighting network errors
base64 = "0.22.1" # Used for integrity attributes
chrono = "0.4.38" # Used for formatting output timestamp
clap = "3.2.25" # Used for processing CLI arguments
cssparser = "0.34.0" # Used for dealing with CSS
encoding_rs = "0.8.34" # Used for parsing and converting document charsets
html5ever = "0.27.0" # Used for all things DOM
markup5ever_rcdom = "0.3.0" # Used for manipulating DOM
percent-encoding = "2.3.1" # Used for encoding URLs
sha2 = "0.10.8" # Used for calculating checksums during integrity checks
url = "2.5.2" # Used for parsing URLs
openssl = "0.10.64" # Used for static linking of the OpenSSL library
# Used for parsing srcset and NOSCRIPT
[dependencies.regex]
version = "1.10.6"
default-features = false
features = ["default-tls", "blocking", "gzip"]
features = ["std", "perf-dfa", "unicode-perl"]
# Used for making network requests
[dependencies.reqwest]
version = "0.12.7"
default-features = false
features = ["default-tls", "blocking", "gzip", "brotli", "deflate"]
[dev-dependencies]
assert_cmd = "1.0.2"
tempfile = "3.2.0"
assert_cmd = "2.0.16"

View File

@@ -1,18 +1,22 @@
FROM rust
WORKDIR /usr/local/src/
RUN curl -s https://api.github.com/repos/y2z/monolith/releases/latest \
| grep "tarball_url.*\"," \
| cut -d '"' -f 4 \
| wget -qi - -O monolith.tar.gz
FROM clux/muslrust:stable as builder
RUN curl -L -o monolith.tar.gz $(curl -s https://api.github.com/repos/y2z/monolith/releases/latest \
| grep "tarball_url.*\"," \
| cut -d '"' -f 4)
RUN tar xfz monolith.tar.gz \
&& mv Y2Z-monolith-* monolith \
&& rm monolith.tar.gz
WORKDIR /usr/local/src/monolith
RUN ls -a
WORKDIR monolith/
RUN make install
FROM alpine
RUN apk update && \
apk add --no-cache openssl && \
rm -rf "/var/cache/apk/*"
COPY --from=builder /root/.cargo/bin/monolith /usr/bin/monolith
WORKDIR /tmp
CMD ["/usr/local/cargo/bin/monolith"]
ENTRYPOINT ["/usr/bin/monolith"]

View File

@@ -7,23 +7,30 @@ build:
@cargo build --locked
.PHONY: build
test: build
@cargo test --locked
@cargo fmt --all -- --check
.PHONY: test
lint:
@cargo fmt --all --
.PHONY: lint
clean:
@cargo clean
.PHONY: clean
install:
@cargo install --force --locked --path .
.PHONY: install
lint:
@cargo fmt --all --
.PHONY: lint
lint_check:
@cargo fmt --all -- --check
.PHONY: lint_check
test: build
@cargo test --locked
.PHONY: test
uninstall:
@cargo uninstall
.PHONY: uninstall
clean:
@cargo clean
update-lock-file:
@cargo update
.PHONY: clean

231
README.md
View File

@@ -1,6 +1,6 @@
[![Monolith Build Status for GNU/Linux](https://github.com/Y2Z/monolith/workflows/GNU%2FLinux/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AGNU%2FLinux)
[![Monolith Build Status for macOS](https://github.com/Y2Z/monolith/workflows/macOS/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AmacOS)
[![Monolith Build Status for Windows](https://github.com/Y2Z/monolith/workflows/Windows/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows)
[![monolith build status on GNU/Linux](https://github.com/Y2Z/monolith/workflows/GNU%2FLinux/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AGNU%2FLinux)
[![monolith build status on macOS](https://github.com/Y2Z/monolith/workflows/macOS/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AmacOS)
[![monolith build status on Windows](https://github.com/Y2Z/monolith/workflows/Windows/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows)
```
_____ ______________ __________ ___________________ ___
@@ -18,97 +18,242 @@ Unlike the conventional “Save page as”, `monolith` not only saves the target
If compared to saving websites with `wget -mpk`, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available.
---------------------------------------------------
## Installation
### Using Cargo
$ cargo install monolith
#### Using [Cargo](https://crates.io/crates/monolith) (cross-platform)
#### Via Homebrew (on macOS and GNU/Linux)
$ brew install monolith
```console
cargo install monolith
```
#### Using Snapcraft (on GNU/Linux)
$ snap install monolith
#### Via [Homebrew](https://formulae.brew.sh/formula/monolith) (macOS and GNU/Linux)
#### Using Ports collection (on FreeBSD and TrueOS)
$ cd /usr/ports/www/monolith/
$ make install clean
```console
brew install monolith
```
#### Using pre-built binaries (Windows, ARM-based devices, etc)
Every [release](https://github.com/Y2Z/monolith/releases) contains pre-built binaries for Windows, GNU/Linux, as well as platforms with non-standart CPU architecture.
#### Via [Chocolatey](https://community.chocolatey.org/packages/monolith) (Windows)
#### From source
```console
choco install monolith
```
Dependency: `libssl-dev`
#### Via [Scoop](https://scoop.sh/#/apps?q=monolith) (Windows)
$ git clone https://github.com/Y2Z/monolith.git
$ cd monolith
$ make install
```console
scoop install main/monolith
```
#### Using Containers
#### Via [Winget](https://winstall.app/apps/Y2Z.Monolith) (Windows)
```console
winget install --id=Y2Z.Monolith -e
```
#### Via [MacPorts](https://ports.macports.org/port/monolith/summary) (macOS)
```console
sudo port install monolith
```
#### Using [Snapcraft](https://snapcraft.io/monolith) (GNU/Linux)
```console
snap install monolith
```
#### Using [Guix](https://packages.guix.gnu.org/packages/monolith) (GNU/Linux)
```console
guix install monolith
```
#### Using [NixPkgs](https://search.nixos.org/packages?channel=unstable&show=monolith&query=monolith)
```console
nix-env -iA nixpkgs.monolith
```
#### Using [Flox](https://flox.dev)
```console
flox install monolith
```
#### Using [Pacman](https://archlinux.org/packages/extra/x86_64/monolith) (Arch Linux)
```console
pacman -S monolith
```
#### Using [aports](https://pkgs.alpinelinux.org/packages?name=monolith) (Alpine Linux)
```console
apk add monolith
```
#### Using [XBPS Package Manager](https://voidlinux.org/packages/?q=monolith) (Void Linux)
```console
xbps-install -S monolith
```
#### Using [FreeBSD packages](https://svnweb.freebsd.org/ports/head/www/monolith/) (FreeBSD)
```console
pkg install monolith
```
#### Using [FreeBSD ports](https://www.freshports.org/www/monolith/) (FreeBSD)
```console
cd /usr/ports/www/monolith/
make install clean
```
#### Using [pkgsrc](https://pkgsrc.se/www/monolith) (NetBSD, OpenBSD, Haiku, etc)
```console
cd /usr/pkgsrc/www/monolith
make install clean
```
#### Using [containers](https://www.docker.com/)
```console
docker build -t y2z/monolith .
sudo install -b dist/run-in-container.sh /usr/local/bin/monolith
```
#### From [source](https://github.com/Y2Z/monolith)
Dependencies: `libssl`, `cargo`
<details>
<summary>Install cargo (GNU/Linux)</summary>
Check if cargo is installed
```console
cargo -v
```
If cargo is not already installed, install and add it to your existing ```$PATH``` (paraphrasing the [official installation instructions](https://doc.rust-lang.org/cargo/getting-started/installation.html)):
```console
curl https://sh.rustup.rs -sSf | sh
. "$HOME/.cargo/env"
```
Proceed with installing from source:
</details>
```console
git clone https://github.com/Y2Z/monolith.git
cd monolith
make install
```
#### Using [pre-built binaries](https://github.com/Y2Z/monolith/releases) (Windows, ARM-based devices, etc)
Every release contains pre-built binaries for Windows, GNU/Linux, as well as platforms with non-standard CPU architecture.
$ docker build -t Y2Z/monolith .
$ sudo install -b utils/run-in-container.sh /usr/local/bin/monolith
---------------------------------------------------
## Usage
$ monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roads-lyrics.html
or
$ cat index.html | monolith -aIiFfcMv - > index-processed.html
```console
monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roads-lyrics.html
```
```console
cat some-site-page.html | monolith -aIiFfcMv -b https://some.site/ - > some-site-page-with-assets.html
```
---------------------------------------------------
## Options
- `-a`: Exclude audio sources
- `-b`: Use custom `base URL`
- `-b`: Use `custom base URL`
- `-B`: Forbid retrieving assets from specified domain(s)
- `-c`: Exclude CSS
- `-C`: Read cookies from `file`
- `-d`: Allow retrieving assets only from specified `domain(s)`
- `-e`: Ignore network errors
- `-E`: Save document using `custom encoding`
- `-f`: Omit frames
- `-F`: Exclude web fonts
- `-h`: Print help information
- `-i`: Remove images
- `-I`: Isolate the document
- `-j`: Exclude JavaScript
- `-k`: Accept invalid X.509 (TLS) certificates
- `-M`: Don't add timestamp and URL information
- `-o`: Write output to `file`
- `-n`: Extract contents of NOSCRIPT elements
- `-o`: Write output to `file` (use “-” for STDOUT)
- `-s`: Be quiet
- `-t`: Adjust `network request timeout`
- `-u`: Provide `custom User-Agent`
- `-v`: Exclude videos
---------------------------------------------------
## Whitelisting and blacklisting domains
Options `-d` and `-B` provide control over what domains can be used to retrieve assets from, e.g.:
```console
monolith -I -d example.com -d www.example.com https://example.com -o example-only.html
```
```console
monolith -I -B -d .googleusercontent.com -d googleanalytics.com -d .google.com https://example.com -o example-no-ads.html
```
---------------------------------------------------
## Dynamic content
Monolith doesn't feature a JavaScript engine, hence websites that retrieve and display data after initial load may require usage of additional tools.
For example, Chromium (Chrome) can be used to act as a pre-processor for such pages:
```console
chromium --headless --window-size=1920,1080 --run-all-compositor-stages-before-draw --virtual-time-budget=9000 --incognito --dump-dom https://github.com | monolith - -I -b https://github.com -o github.html
```
---------------------------------------------------
## Proxies
Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables.
---------------------------------------------------
## Contributing
Please open an issue if something is wrong, that helps make this project better.
---------------------------------------------------
## Related projects
- `Monolith Chrome Extension`: https://github.com/rhysd/monolith-of-web
- `Pagesaver`: https://github.com/distributed-mind/pagesaver
- `Personal WayBack Machine`: https://github.com/popey/pwbm
- `Hako`: https://github.com/dmpop/hako
---------------------------------------------------
## License
<a href="http://creativecommons.org/publicdomain/zero/1.0/">
<img src="http://i.creativecommons.org/p/zero/1.0/88x31.png" alt="CC0-1.0" />
</a>
<br />
To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide.
This software is distributed without any warranty.
---------------------------------------------------
<!-- Microtext -->
<sub>Keep in mind that `monolith` is not aware of your browsers session</sub>

6
utils/run-in-container.sh → dist/run-in-container.sh vendored Normal file → Executable file
View File

@@ -1,10 +1,10 @@
#!/bin/sh
DOCKER=docker
PROG_NAME=monolith
if which podman 2>&1 > /dev/null; then
DOCKER=podman
fi
ORG_NAME=y2z
PROG_NAME=monolith
$DOCKER run --rm Y2Z/$PROG_NAME $PROG_NAME "$@"
$DOCKER run --rm $ORG_NAME/$PROG_NAME "$@"

View File

@@ -1,19 +0,0 @@
# 1. Record architecture decisions
Date: 2019-12-25
## Status
Accepted
## Context
We need to record the architectural decisions made on this project.
## Decision
We will use Architecture Decision Records, as [described by Michael Nygard](http://thinkrelevance.com/blog/2011/11/15/documenting-architecture-decisions).
## Consequences
See Michael Nygard's article, linked above. For a lightweight ADR toolset, see Nat Pryce's [adr-tools](https://github.com/npryce/adr-tools).

View File

@@ -1,19 +0,0 @@
# 2. NOSCRIPT nodes
Date: 2020-04-16
## Status
Accepted
## Context
HTML pages can contain `noscript` nodes, which reveal their contents only in case when JavaScript is not available. Most of the time they contain hidden messages that inform about certain JavaScript-dependent features not being operational, however sometimes can also feature media assets or even iframes.
## Decision
When the document is being saved with or without JavaScript, each `noscript` node should be preserved while its children need to be processed exactly the same way as the rest of the document. This approach will ensure that even hidden remote assets are embedded — since those hidden elements may have to be displayed later in a browser that has JavaScript turned off. An option should be available to "unwrap" all `noscript` nodes in order to make their contents always visible in the document, complimenting the "disable JS" function of the program.
## Consequences
Saved documents will have contents of all `noscript` nodes processed as if they are part of the document's DOM, therefore properly display images encapsulated within `noscript` nodes when being viewed in browsers that have JavaScript turned off (or have no JavaScript support in the first place). The new option to "unwrap" `noscript` elements will help the user ensure that the resulting document always represents what the original web page looked like in a browser that had JavaScript turned off.

View File

@@ -1,21 +0,0 @@
# 3. Network request timeout
Date: 2020-02-15
## Status
Accepted
## Context
A slow network connection and overloaded server may negatively impact network response time.
## Decision
Make the program simulate behavior of popular web browsers and CLI tools, where the default network response timeout is most often set to 120 seconds.
Instead of featuring retries for timed out network requests, the program should have an option to adjust the timeout length, along with making it indefinite when given "0" as its value.
## Consequences
The user is able to retrieve resources that have long response time, as well as obtain full control over how soon, and if at all, network requests should time out.

View File

@@ -1,21 +0,0 @@
# 4. Asset integrity check
Date: 2020-02-23
## Status
Accepted
## Context
In HTML5, `link` and `script` nodes have an attribute named `integrity`, which lets the browser check if the remote file is valid, mostly for the purpose of enhancing page security.
## Decision
In order to replicate the browser's behavior, the program should perform integrity check the same way it does, excluding the linked asset from the final result if such check fails.
The `integrity` attribute should be removed from nodes, as it bears no benefit for resources embedded as data URLs.
## Consequences
Assets that fail to pass the check get excluded from the saved document. Meanwhile, saved documents no longer contain integrity attributes on all `link` and `script` nodes.

View File

@@ -1,19 +0,0 @@
# 5. Asset Minimization
Date: 2020-03-14
## Status
Accepted
## Context
It may look like a good idea to make monolith compress retrieved assets while saving the page for the purpose of reducing the resulting document's file size.
## Decision
Given that the main purpose of this program is to save pages in a convenient to store and share manner — it's mostly an archiving tool, aside from being able to tell monolith to exclude certain types of asests (e.g. images, CSS, JavaScript), it would be outside of scope of this program to implement code for compressing assets. Minimizing files before embedding them does not reduce the amount of data that needs to be transferred either. A separate tool can be used later to compress and minimize pages saved by monolith, if needed.
## Consequences
Monolith will not support modification of original document assets for the purpose of reducing their size, sticking to performing only minimal amount of modifications to the original web page — whatever is needed to provide security or exclude unwanted asset types.

View File

@@ -1,19 +0,0 @@
# 6. Reload and location `meta` tags
Date: 2020-06-25
## Status
Accepted
## Context
HTML documents may contain `meta` tags capable of automatically refreshing the page or redirecting to another location.
## Decision
Since the resulting document is saved to disk and generally not intended to be served over the network, it only makes sense to remove `meta` tags that have `http-equiv` attribute equal to "Refresh" or "Location", in order to prevent them from reloading the page or redirecting to another location.
## Consequences
Monolith will ensure that saved documents do not contain `meta` tags capable of changing location or reloading the page.

View File

@@ -1,19 +0,0 @@
# 7. Network errors
Date: 2020-11-22
## Status
Accepted
## Context
Servers may return information with HTTP response codes other than `200`, however those responses may still contain useful data.
## Decision
Fail by default, notifying of the network error. Add option to continue retrieving assets by treating all response codes as `200`.
## Consequences
Monolith will fail to obtain resources with status other than `200`, unless told to ignore network errors.

View File

@@ -1,40 +0,0 @@
# 8. Base Tag
Date: 2020-12-25
## Status
Accepted
## Context
HTML documents may contain `base` tag, which influences resolution of anchor links and relative URLs as well as dynamically loaded resources.
Sometimes, in order to make certain saved documents function closer to how they operate while being served from a remote server, the `base` tag specifying the source page's URL may need to be added to the document.
There can be only one such tag. If multiple `base` tags are present, only the first encountered tag ends up being used.
## Decision
Adding the `base` tag should be optional — saved documents should not contain the `base` tag unless it was specified by the user, or the document originally had the `base` tag in it.
Existing `href` attribute's value of the original `base` tag should be used for resolving the document's relative links instead of document's own URL (precisely the way browsers do it).
## Consequences
#### If the base tag does not exist in the source document
- If the base tag does not exist in the source document
- With base URL option provided
- use the specified base URL value to retrieve assets, keep original base URL value in the document
- Without base URL option provided
- download document as usual, do not add base tag
- If the base tag already exists in the source document
- With base URL option provided
- we overwrite the original base URL before retrieving assets, keep new base URL value in the document
- Without base URL option provided:
- use the base URL from the original document to retrieve assets, keep original base URL value in the document
The program will obtain ability to retrieve remote assets for non-remote sources (such as data URLs and local files).
The program will obatin ability to get rid of existing base tag values (by provind an empty one).

View File

@@ -1,15 +0,0 @@
1. Run `docker build -t y2z/monolith .` to create a Docker image
2. Create a file named `monolith` which contains:
```sh
#!/bin/sh
docker run --rm \
y2z/monolith \
monolith \
"$@"
```
3. Make the file executable (`chmod +x monolith`) and include it into your `$PATH`
4. Now you should be able to run a containerized build of monolith like this:
`monolith -I https://github.com > document.html`

View File

@@ -1,3 +0,0 @@
# References
- https://content-security-policy.com/

View File

@@ -1,10 +0,0 @@
# Web apps that can be saved with Monolith
These apps retain most or all of their functionality when saved with Monolith
|Converse|https://conversejs.org|
|:--|:--|
|Description|An XMPP client built using web technologies|
|Functionality retained|**full**|
|Command to use|`monolith https://conversejs.org/fullscreen.html > conversejs.html`|
|Monolith version used|2.2.7|

View File

@@ -2,7 +2,7 @@
<package xmlns="http://schemas.microsoft.com/packaging/2015/06/nuspec.xsd">
<metadata>
<id>monolith</id>
<version>2.4.0</version>
<version>2.8.1</version>
<title>Monolith</title>
<authors>Sunshine, Mahdi Robatipoor, Emmanuel Delaborde, Emi Simpson, rhysd</authors>
<projectUrl>https://github.com/Y2Z/monolith</projectUrl>

119
src/cookies.rs Normal file
View File

@@ -0,0 +1,119 @@
use std::time::{SystemTime, UNIX_EPOCH};
use url::Url;
pub struct Cookie {
pub domain: String,
pub include_subdomains: bool,
pub path: String,
pub https_only: bool,
pub expires: u64,
pub name: String,
pub value: String,
}
#[derive(Debug)]
pub enum CookieFileContentsParseError {
InvalidHeader,
}
impl Cookie {
pub fn is_expired(&self) -> bool {
if self.expires == 0 {
return false; // Session, never expires
}
let start = SystemTime::now();
let since_the_epoch = start
.duration_since(UNIX_EPOCH)
.expect("Time went backwards");
self.expires < since_the_epoch.as_secs()
}
pub fn matches_url(&self, url: &str) -> bool {
match Url::parse(&url) {
Ok(url) => {
// Check protocol scheme
match url.scheme() {
"http" => {
if self.https_only {
return false;
}
}
"https" => {}
_ => {
// Should never match URLs of protocols other than HTTP(S)
return false;
}
}
// Check host
if let Some(url_host) = url.host_str() {
if self.domain.starts_with(".") && self.include_subdomains {
if !url_host.to_lowercase().ends_with(&self.domain)
&& !url_host
.eq_ignore_ascii_case(&self.domain[1..self.domain.len() - 1])
{
return false;
}
} else {
if !url_host.eq_ignore_ascii_case(&self.domain) {
return false;
}
}
} else {
return false;
}
// Check path
if !url.path().eq_ignore_ascii_case(&self.path)
&& !url.path().starts_with(&self.path)
{
return false;
}
}
Err(_) => {
return false;
}
}
true
}
}
pub fn parse_cookie_file_contents(
cookie_file_contents: &str,
) -> Result<Vec<Cookie>, CookieFileContentsParseError> {
let mut cookies: Vec<Cookie> = Vec::new();
for (i, line) in cookie_file_contents.lines().enumerate() {
if i == 0 {
// Parsing first line
if !line.eq("# HTTP Cookie File") && !line.eq("# Netscape HTTP Cookie File") {
return Err(CookieFileContentsParseError::InvalidHeader);
}
} else {
// Ignore comment lines
if line.starts_with("#") {
continue;
}
// Attempt to parse values
let mut fields = line.split("\t");
if fields.clone().count() != 7 {
continue;
}
cookies.push(Cookie {
domain: fields.next().unwrap().to_string().to_lowercase(),
include_subdomains: fields.next().unwrap().to_string() == "TRUE",
path: fields.next().unwrap().to_string(),
https_only: fields.next().unwrap().to_string() == "TRUE",
expires: fields.next().unwrap().parse::<u64>().unwrap(),
name: fields.next().unwrap().to_string(),
value: fields.next().unwrap().to_string(),
});
}
}
Ok(cookies)
}

View File

@@ -1,9 +1,12 @@
use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
use cssparser::{
serialize_identifier, serialize_string, ParseError, Parser, ParserInput, SourcePosition, Token,
};
use reqwest::blocking::Client;
use std::collections::HashMap;
use url::Url;
use crate::opts::Options;
use crate::url::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, url_with_fragment};
use crate::url::{create_data_url, resolve_url, EMPTY_IMAGE_DATA_URL};
use crate::utils::retrieve_asset;
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
@@ -26,7 +29,42 @@ const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
"suffix",
"symbols",
];
const CSS_SPECIAL_CHARS: &'static str = "~!@$%^&*()+=,./'\";:?><[]{}|`#";
pub fn embed_css(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
document_url: &Url,
css: &str,
options: &Options,
) -> String {
let mut input = ParserInput::new(&css);
let mut parser = Parser::new(&mut input);
process_css(
cache,
client,
document_url,
&mut parser,
options,
"",
"",
"",
)
.unwrap()
}
pub fn format_ident(ident: &str) -> String {
let mut res: String = "".to_string();
let _ = serialize_identifier(ident, &mut res);
res = res.trim_end().to_string();
res
}
pub fn format_quoted_string(string: &str) -> String {
let mut res: String = "".to_string();
let _ = serialize_string(string, &mut res);
res
}
pub fn is_image_url_prop(prop_name: &str) -> bool {
CSS_PROPS_WITH_IMAGE_URLS
@@ -35,41 +73,20 @@ pub fn is_image_url_prop(prop_name: &str) -> bool {
.is_some()
}
pub fn enquote(input: String, double: bool) -> String {
if double {
format!("\"{}\"", input.replace("\"", "\\\""))
} else {
format!("'{}'", input.replace("'", "\\'"))
}
}
pub fn escape(value: &str) -> String {
let mut res = str!(&value);
res = res.replace("\\", "\\\\");
for c in CSS_SPECIAL_CHARS.chars() {
res = res.replace(c, format!("\\{}", c).as_str());
}
res
}
pub fn process_css<'a>(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &str,
document_url: &Url,
parser: &mut Parser,
options: &Options,
depth: u32,
rule_name: &str,
prop_name: &str,
func_name: &str,
) -> Result<String, ParseError<'a, String>> {
let mut result: String = str!();
let mut result: String = "".to_string();
let mut curr_rule: String = str!(rule_name.clone());
let mut curr_prop: String = str!(prop_name.clone());
let mut curr_rule: String = rule_name.to_string();
let mut curr_prop: String = prop_name.to_string();
let mut token: &Token;
let mut token_offset: SourcePosition;
@@ -85,7 +102,7 @@ pub fn process_css<'a>(
match *token {
Token::Comment(_) => {
let token_slice = parser.slice_from(token_offset);
result.push_str(str!(token_slice).as_str());
result.push_str(token_slice);
}
Token::Semicolon => result.push_str(";"),
Token::Colon => result.push_str(":"),
@@ -112,10 +129,9 @@ pub fn process_css<'a>(
process_css(
cache,
client,
parent_url,
document_url,
parser,
options,
depth,
rule_name,
curr_prop.as_str(),
func_name,
@@ -141,13 +157,13 @@ pub fn process_css<'a>(
}
// div...
Token::Ident(ref value) => {
curr_rule = str!();
curr_prop = str!(value);
result.push_str(&escape(value));
curr_rule = "".to_string();
curr_prop = value.to_string();
result.push_str(&format_ident(value));
}
// @import, @font-face, @charset, @media...
Token::AtKeyword(ref value) => {
curr_rule = str!(value);
curr_rule = value.to_string();
if options.no_fonts && curr_rule == "font-face" {
continue;
}
@@ -161,95 +177,92 @@ pub fn process_css<'a>(
Token::QuotedString(ref value) => {
if curr_rule == "import" {
// Reset current at-rule value
curr_rule = str!();
curr_rule = "".to_string();
// Skip empty import values
if value.len() < 1 {
if value.len() == 0 {
result.push_str("''");
continue;
}
let import_full_url = resolve_url(&parent_url, value).unwrap_or_default();
let import_url_fragment = get_url_fragment(import_full_url.clone());
match retrieve_asset(
cache,
client,
&parent_url,
&import_full_url,
options,
depth + 1,
) {
Ok((import_contents, import_final_url, _import_media_type)) => {
let import_data_url = data_to_data_url(
"text/css",
let import_full_url: Url = resolve_url(&document_url, value);
match retrieve_asset(cache, client, &document_url, &import_full_url, options) {
Ok((
import_contents,
import_final_url,
import_media_type,
import_charset,
)) => {
let mut import_data_url = create_data_url(
&import_media_type,
&import_charset,
embed_css(
cache,
client,
&import_final_url,
&String::from_utf8_lossy(&import_contents),
options,
depth + 1,
)
.as_bytes(),
&import_final_url,
);
let assembled_url: String = url_with_fragment(
import_data_url.as_str(),
import_url_fragment.as_str(),
import_data_url.set_fragment(import_full_url.fragment());
result.push_str(
format_quoted_string(&import_data_url.to_string()).as_str(),
);
result.push_str(enquote(assembled_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(import_full_url.clone()) {
let assembled_url: String = url_with_fragment(
import_full_url.as_str(),
import_url_fragment.as_str(),
if import_full_url.scheme() == "http"
|| import_full_url.scheme() == "https"
{
result.push_str(
format_quoted_string(&import_full_url.to_string()).as_str(),
);
result.push_str(enquote(assembled_url, false).as_str());
}
}
}
} else {
if func_name == "url" {
// Skip empty url()'s
if value.len() < 1 {
if value.len() == 0 {
continue;
}
if options.no_images && is_image_url_prop(curr_prop.as_str()) {
result.push_str(enquote(str!(empty_image!()), false).as_str());
result.push_str(format_quoted_string(EMPTY_IMAGE_DATA_URL).as_str());
} else {
let resolved_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(resolved_url.clone());
let resolved_url: Url = resolve_url(&document_url, value);
match retrieve_asset(
cache,
client,
&parent_url,
&document_url,
&resolved_url,
options,
depth + 1,
) {
Ok((data, final_url, media_type)) => {
let data_url = data_to_data_url(&media_type, &data, &final_url);
let assembled_url: String =
url_with_fragment(data_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
Ok((data, final_url, media_type, charset)) => {
let mut data_url =
create_data_url(&media_type, &charset, &data, &final_url);
data_url.set_fragment(resolved_url.fragment());
result.push_str(
format_quoted_string(&data_url.to_string()).as_str(),
);
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(resolved_url.clone()) {
let assembled_url: String = url_with_fragment(
resolved_url.as_str(),
url_fragment.as_str(),
if resolved_url.scheme() == "http"
|| resolved_url.scheme() == "https"
{
result.push_str(
format_quoted_string(&resolved_url.to_string())
.as_str(),
);
result.push_str(enquote(assembled_url, false).as_str());
}
}
}
}
} else {
result.push_str(enquote(str!(value), false).as_str());
result.push_str(format_quoted_string(value).as_str());
}
}
}
@@ -271,7 +284,7 @@ pub fn process_css<'a>(
if *has_sign && *unit_value >= 0. {
result.push_str("+");
}
result.push_str(str!(unit_value * 100.0).as_str());
result.push_str(&(unit_value * 100.0).to_string());
result.push_str("%");
}
Token::Dimension {
@@ -283,21 +296,22 @@ pub fn process_css<'a>(
if *has_sign && *value >= 0. {
result.push_str("+");
}
result.push_str(str!(value).as_str());
result.push_str(str!(unit).as_str());
result.push_str(&value.to_string());
result.push_str(&unit.to_string());
}
// #selector, #id...
Token::IDHash(ref value) => {
curr_rule = str!();
curr_rule = "".to_string();
result.push_str("#");
result.push_str(&escape(value));
result.push_str(&format_ident(value));
}
// url()
Token::UnquotedUrl(ref value) => {
let is_import: bool = curr_rule == "import";
if is_import {
// Reset current at-rule value
curr_rule = str!();
curr_rule = "".to_string();
}
// Skip empty url()'s
@@ -313,63 +327,52 @@ pub fn process_css<'a>(
result.push_str("url(");
if is_import {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
match retrieve_asset(cache, client, &parent_url, &full_url, options, depth + 1)
{
Ok((css, final_url, _media_type)) => {
let data_url = data_to_data_url(
"text/css",
let full_url: Url = resolve_url(&document_url, value);
match retrieve_asset(cache, client, &document_url, &full_url, options) {
Ok((css, final_url, media_type, charset)) => {
let mut data_url = create_data_url(
&media_type,
&charset,
embed_css(
cache,
client,
&final_url,
&String::from_utf8_lossy(&css),
options,
depth + 1,
)
.as_bytes(),
&final_url,
);
let assembled_url: String =
url_with_fragment(data_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
data_url.set_fragment(full_url.fragment());
result.push_str(format_quoted_string(&data_url.to_string()).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) {
let assembled_url: String =
url_with_fragment(full_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
if full_url.scheme() == "http" || full_url.scheme() == "https" {
result
.push_str(format_quoted_string(&full_url.to_string()).as_str());
}
}
}
} else {
if is_image_url_prop(curr_prop.as_str()) && options.no_images {
result.push_str(enquote(str!(empty_image!()), false).as_str());
result.push_str(format_quoted_string(EMPTY_IMAGE_DATA_URL).as_str());
} else {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
match retrieve_asset(
cache,
client,
&parent_url,
&full_url,
options,
depth + 1,
) {
Ok((data, final_url, media_type)) => {
let data_url = data_to_data_url(&media_type, &data, &final_url);
let assembled_url: String =
url_with_fragment(data_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
let full_url: Url = resolve_url(&document_url, value);
match retrieve_asset(cache, client, &document_url, &full_url, options) {
Ok((data, final_url, media_type, charset)) => {
let mut data_url =
create_data_url(&media_type, &charset, &data, &final_url);
data_url.set_fragment(full_url.fragment());
result
.push_str(format_quoted_string(&data_url.to_string()).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) {
let assembled_url: String =
url_with_fragment(full_url.as_str(), url_fragment.as_str());
result.push_str(enquote(assembled_url, false).as_str());
if full_url.scheme() == "http" || full_url.scheme() == "https" {
result.push_str(
format_quoted_string(&full_url.to_string()).as_str(),
);
}
}
}
@@ -377,6 +380,7 @@ pub fn process_css<'a>(
}
result.push_str(")");
}
// =
Token::Delim(ref value) => result.push_str(&value.to_string()),
Token::Function(ref name) => {
let function_name: &str = &name.clone();
@@ -388,10 +392,9 @@ pub fn process_css<'a>(
process_css(
cache,
client,
parent_url,
document_url,
parser,
options,
depth,
curr_rule.as_str(),
curr_prop.as_str(),
function_name,
@@ -413,28 +416,3 @@ pub fn process_css<'a>(
Ok(result)
}
pub fn embed_css(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &str,
css: &str,
options: &Options,
depth: u32,
) -> String {
let mut input = ParserInput::new(&css);
let mut parser = Parser::new(&mut input);
process_css(
cache,
client,
parent_url,
&mut parser,
options,
depth,
"",
"",
"",
)
.unwrap()
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,15 +1,7 @@
#[macro_use]
extern crate clap;
#[macro_use]
mod macros;
pub mod cookies;
pub mod css;
pub mod html;
pub mod js;
pub mod opts;
pub mod url;
pub mod utils;
#[cfg(test)]
pub mod tests;

View File

@@ -1,17 +0,0 @@
#[macro_export]
macro_rules! str {
() => {
String::new()
};
($val: expr) => {
ToString::to_string(&$val)
};
}
#[macro_export]
macro_rules! empty_image {
() => {
"data:image/png;base64,\
iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII="
};
}

View File

@@ -1,25 +1,24 @@
use encoding_rs::Encoding;
use markup5ever_rcdom::RcDom;
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use std::collections::HashMap;
use std::env;
use std::fs;
use std::io::{self, prelude::*, Error, Write};
use std::path::Path;
use std::process;
use std::time::Duration;
use url::Url;
use monolith::cookies::parse_cookie_file_contents;
use monolith::html::{
add_favicon, create_metadata_tag, get_base_url, has_favicon, html_to_dom, set_base_url,
stringify_document, walk_and_embed_assets,
add_favicon, create_metadata_tag, get_base_url, get_charset, has_favicon, html_to_dom,
serialize_document, set_base_url, set_charset, walk_and_embed_assets,
};
use monolith::opts::Options;
use monolith::url::{
data_to_data_url, is_data_url, is_file_url, is_http_url, parse_data_url, resolve_url,
};
use monolith::url::{create_data_url, resolve_url};
use monolith::utils::retrieve_asset;
mod macros;
enum Output {
Stdout(io::Stdout),
File(fs::File),
@@ -34,82 +33,136 @@ impl Output {
}
}
fn writeln_str(&mut self, s: &str) -> Result<(), Error> {
fn write(&mut self, bytes: &Vec<u8>) -> Result<(), Error> {
match self {
Output::Stdout(stdout) => {
writeln!(stdout, "{}", s)?;
stdout.write_all(bytes)?;
// Ensure newline at end of output
if bytes.last() != Some(&b"\n"[0]) {
stdout.write(b"\n")?;
}
stdout.flush()
}
Output::File(f) => {
writeln!(f, "{}", s)?;
f.flush()
Output::File(file) => {
file.write_all(bytes)?;
// Ensure newline at end of output
if bytes.last() != Some(&b"\n"[0]) {
file.write(b"\n")?;
}
file.flush()
}
}
}
}
pub fn read_stdin() -> String {
let mut buffer = String::new();
for line in io::stdin().lock().lines() {
buffer += line.unwrap_or_default().as_str();
buffer += "\n";
pub fn read_stdin() -> Vec<u8> {
let mut buffer: Vec<u8> = vec![];
match io::stdin().lock().read_to_end(&mut buffer) {
Ok(_) => buffer,
Err(_) => buffer,
}
buffer
}
fn main() {
let options = Options::from_args();
let original_target: &str = &options.target;
let target_url: &str;
let mut base_url: String;
let mut dom;
let mut use_stdin: bool = false;
let mut options = Options::from_args();
// Pre-process the input
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let path = Path::new(original_target);
let mut target: String = str!(original_target.clone()).replace("\\", "/");
let path_is_relative: bool = path.is_relative();
// Determine exact target URL
if target.clone().len() == 0 {
// Check if target was provided
if options.target.len() == 0 {
if !options.silent {
eprintln!("No target specified");
}
process::exit(1);
} else if target.clone() == "-" {
// Read from pipe (stdin)
use_stdin = true;
// Default target URL to empty data URL; the user can control it via --base-url
target_url = "data:text/html,"
} else if is_http_url(target.clone()) || is_data_url(target.clone()) {
target_url = target.as_str();
} else if is_file_url(target.clone()) {
target_url = target.as_str();
} else if path.exists() {
if !path.is_file() {
if !options.silent {
eprintln!("Local target is not a file: {}", original_target);
}
process::exit(1);
}
target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" });
if path_is_relative {
target.insert_str(if cfg!(windows) { 8 } else { 7 }, &cwd_normalized);
target.insert_str(
if cfg!(windows) { 8 } else { 7 } + &cwd_normalized.len(),
"/",
);
}
target_url = target.as_str();
} else {
target.insert_str(0, "http://");
target_url = target.as_str();
}
// Define output
let mut output = Output::new(&options.output).expect("Could not prepare output");
// Check if custom encoding is valid
if let Some(custom_encoding) = options.encoding.clone() {
if !Encoding::for_label_no_replacement(custom_encoding.as_bytes()).is_some() {
eprintln!("Unknown encoding: {}", &custom_encoding);
process::exit(1);
}
}
let mut use_stdin: bool = false;
let target_url = match options.target.as_str() {
"-" => {
// Read from pipe (stdin)
use_stdin = true;
// Set default target URL to an empty data URL; the user can set it via --base-url
Url::parse("data:text/html,").unwrap()
}
target => match Url::parse(&target) {
Ok(url) => match url.scheme() {
"data" | "file" | "http" | "https" => url,
unsupported_scheme => {
if !options.silent {
eprintln!("Unsupported target URL type: {}", unsupported_scheme);
}
process::exit(1)
}
},
Err(_) => {
// Failed to parse given base URL (perhaps it's a filesystem path?)
let path: &Path = Path::new(&target);
match path.exists() {
true => match path.is_file() {
true => {
let canonical_path = fs::canonicalize(&path).unwrap();
match Url::from_file_path(canonical_path) {
Ok(url) => url,
Err(_) => {
if !options.silent {
eprintln!(
"Could not generate file URL out of given path: {}",
&target
);
}
process::exit(1);
}
}
}
false => {
if !options.silent {
eprintln!("Local target is not a file: {}", &target);
}
process::exit(1);
}
},
false => {
// It is not a FS path, now we do what browsers do:
// prepend "http://" and hope it points to a website
Url::parse(&format!("http://{hopefully_url}", hopefully_url = &target))
.unwrap()
}
}
}
},
};
// Read and parse cookie file
if let Some(opt_cookie_file) = options.cookie_file.clone() {
match fs::read_to_string(opt_cookie_file) {
Ok(str) => match parse_cookie_file_contents(&str) {
Ok(cookies) => {
options.cookies = cookies;
// for c in &cookies {
// // if !cookie.is_expired() {
// // options.cookies.append(c);
// // }
// }
}
Err(_) => {
eprintln!("Could not parse specified cookie file");
process::exit(1);
}
},
Err(_) => {
eprintln!("Could not read specified cookie file");
process::exit(1);
}
}
}
// Initialize client
let mut cache = HashMap::new();
@@ -120,31 +173,61 @@ fn main() {
HeaderValue::from_str(&user_agent).expect("Invalid User-Agent header specified"),
);
}
let timeout: u64 = if options.timeout > 0 {
options.timeout
let client = if options.timeout > 0 {
Client::builder().timeout(Duration::from_secs(options.timeout))
} else {
std::u64::MAX / 4
};
let client = Client::builder()
.timeout(Duration::from_secs(timeout))
.danger_accept_invalid_certs(options.insecure)
.default_headers(header_map)
.build()
.expect("Failed to initialize HTTP client");
// No timeout is default
Client::builder()
}
.danger_accept_invalid_certs(options.insecure)
.default_headers(header_map)
.build()
.expect("Failed to initialize HTTP client");
// At this stage we assume that the base URL is the same as the target URL
base_url = str!(target_url);
// At first we assume that base URL is the same as target URL
let mut base_url: Url = target_url.clone();
let data: Vec<u8>;
let mut document_encoding: String = "".to_string();
let mut dom: RcDom;
// Retrieve target document
if use_stdin {
dom = html_to_dom(&read_stdin());
} else if is_file_url(target_url) || is_http_url(target_url) {
match retrieve_asset(&mut cache, &client, target_url, target_url, &options, 0) {
Ok((data, final_url, _media_type)) => {
if options.base_url.clone().unwrap_or(str!()).is_empty() {
base_url = final_url
data = read_stdin();
} else if target_url.scheme() == "file"
|| (target_url.scheme() == "http" || target_url.scheme() == "https")
|| target_url.scheme() == "data"
{
match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options) {
Ok((retrieved_data, final_url, media_type, charset)) => {
// Provide output as text without processing it, the way browsers do
if !media_type.eq_ignore_ascii_case("text/html")
&& !media_type.eq_ignore_ascii_case("application/xhtml+xml")
{
// Define output
let mut output =
Output::new(&options.output).expect("Could not prepare output");
// Write retrieved data into STDOUT or file
output
.write(&retrieved_data)
.expect("Could not write output");
// Nothing else to do past this point
process::exit(0);
}
dom = html_to_dom(&String::from_utf8_lossy(&data));
if options
.base_url
.clone()
.unwrap_or("".to_string())
.is_empty()
{
base_url = final_url;
}
data = retrieved_data;
document_encoding = charset;
}
Err(_) => {
if !options.silent {
@@ -153,79 +236,121 @@ fn main() {
process::exit(1);
}
}
} else if is_data_url(target_url) {
let (media_type, data): (String, Vec<u8>) = parse_data_url(target_url);
if !media_type.eq_ignore_ascii_case("text/html") {
if !options.silent {
eprintln!("Unsupported data URL media type");
}
process::exit(1);
}
dom = html_to_dom(&String::from_utf8_lossy(&data));
} else {
process::exit(1);
}
// Use custom base URL if specified, read and use what's in the DOM otherwise
if !options.base_url.clone().unwrap_or(str!()).is_empty() {
if is_data_url(options.base_url.clone().unwrap()) {
if !options.silent {
eprintln!("Data URLs cannot be used as base URL");
// Initial parse
dom = html_to_dom(&data, document_encoding.clone());
// TODO: investigate if charset from filesystem/data URL/HTTP headers
// has say over what's specified in HTML
// Attempt to determine document's charset
if let Some(html_charset) = get_charset(&dom.document) {
if !html_charset.is_empty() {
// Check if the charset specified inside HTML is valid
if let Some(encoding) = Encoding::for_label_no_replacement(html_charset.as_bytes()) {
document_encoding = html_charset;
dom = html_to_dom(&data, encoding.name().to_string());
}
process::exit(1);
} else {
base_url = options.base_url.clone().unwrap();
}
} else {
if let Some(existing_base_url) = get_base_url(&dom.document) {
base_url = resolve_url(target_url, existing_base_url).unwrap();
}
}
// Embed remote assets
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
// Use custom base URL if specified, read and use what's in the DOM otherwise
let custom_base_url: String = options.base_url.clone().unwrap_or("".to_string());
if custom_base_url.is_empty() {
// No custom base URL is specified
// Try to see if document has BASE element
if let Some(existing_base_url) = get_base_url(&dom.document) {
base_url = resolve_url(&target_url, &existing_base_url);
}
} else {
// Custom base URL provided
match Url::parse(&custom_base_url) {
Ok(parsed_url) => {
if parsed_url.scheme() == "file" {
// File base URLs can only work with
// documents saved from filesystem
if target_url.scheme() == "file" {
base_url = parsed_url;
}
} else {
base_url = parsed_url;
}
}
Err(_) => {
// Failed to parse given base URL, perhaps it's a filesystem path?
if target_url.scheme() == "file" {
// Relative paths could work for documents saved from filesystem
let path: &Path = Path::new(&custom_base_url);
if path.exists() {
match Url::from_file_path(fs::canonicalize(&path).unwrap()) {
Ok(file_url) => {
base_url = file_url;
}
Err(_) => {
if !options.silent {
eprintln!(
"Could not map given path to base URL: {}",
custom_base_url
);
}
process::exit(1);
}
}
}
}
}
}
}
// Update or add new BASE tag to reroute network requests and hash-links in the final document
// Traverse through the document and embed remote assets
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options);
// Update or add new BASE element to reroute network requests and hash-links
if let Some(new_base_url) = options.base_url.clone() {
dom = set_base_url(&dom.document, new_base_url);
}
// Request and embed /favicon.ico (unless it's already linked in the document)
if !options.no_images && is_http_url(target_url) && !has_favicon(&dom.document) {
let favicon_ico_url: String = resolve_url(&base_url, "/favicon.ico").unwrap();
if !options.no_images
&& (target_url.scheme() == "http" || target_url.scheme() == "https")
&& !has_favicon(&dom.document)
{
let favicon_ico_url: Url = resolve_url(&base_url, "/favicon.ico");
match retrieve_asset(
&mut cache,
&client,
&base_url,
&favicon_ico_url,
&options,
0,
) {
Ok((data, final_url, media_type)) => {
let favicon_data_url: String = data_to_data_url(&media_type, &data, &final_url);
dom = add_favicon(&dom.document, favicon_data_url);
match retrieve_asset(&mut cache, &client, &target_url, &favicon_ico_url, &options) {
Ok((data, final_url, media_type, charset)) => {
let favicon_data_url: Url =
create_data_url(&media_type, &charset, &data, &final_url);
dom = add_favicon(&dom.document, favicon_data_url.to_string());
}
Err(_) => {
// Failed to retrieve favicon.ico
// Failed to retrieve /favicon.ico
}
}
}
// Save using specified charset, if given
if let Some(custom_encoding) = options.encoding.clone() {
document_encoding = custom_encoding;
dom = set_charset(dom, document_encoding.clone());
}
// Serialize DOM tree
let mut result: String = stringify_document(&dom.document, &options);
let mut result: Vec<u8> = serialize_document(dom, document_encoding, &options);
// Add metadata tag
// Prepend metadata comment tag
if !options.no_metadata {
let metadata_comment: String = create_metadata_tag(&target_url);
result.insert_str(0, &metadata_comment);
if metadata_comment.len() > 0 {
result.insert_str(metadata_comment.len(), "\n");
}
let mut metadata_comment: String = create_metadata_tag(&target_url);
metadata_comment += "\n";
result.splice(0..0, metadata_comment.as_bytes().to_vec());
}
// Write result into stdout or file
output
.writeln_str(&result)
.expect("Could not write HTML output");
// Define output
let mut output = Output::new(&options.output).expect("Could not prepare output");
// Write result into STDOUT or file
output.write(&result).expect("Could not write output");
}

View File

@@ -1,12 +1,19 @@
use clap::{App, Arg};
use clap::{App, Arg, ArgAction};
use std::env;
use crate::cookies::Cookie;
#[derive(Default)]
pub struct Options {
pub no_audio: bool,
pub base_url: Option<String>,
pub blacklist_domains: bool,
pub no_css: bool,
pub cookie_file: Option<String>,
pub cookies: Vec<Cookie>,
pub domains: Option<Vec<String>>,
pub ignore_errors: bool,
pub encoding: Option<String>,
pub no_frames: bool,
pub no_fonts: bool,
pub no_images: bool,
@@ -21,6 +28,7 @@ pub struct Options {
pub no_video: bool,
pub target: String,
pub no_color: bool,
pub unwrap_noscript: bool,
}
const ASCII: &'static str = " \
@@ -41,31 +49,50 @@ const ENV_VAR_TERM: &str = "TERM";
impl Options {
pub fn from_args() -> Options {
let app = App::new(env!("CARGO_PKG_NAME"))
.version(crate_version!())
.author(format!("\n{}", crate_authors!("\n")).as_str())
.about(format!("{}\n{}", ASCII, crate_description!()).as_str())
.args_from_usage("-a, --no-audio 'Removes audio sources'")
.args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'")
.args_from_usage("-c, --no-css 'Removes CSS'")
.version(env!("CARGO_PKG_VERSION"))
.author(format!("\n{}\n\n", env!("CARGO_PKG_AUTHORS").replace(':', "\n")).as_str())
.about(format!("{}\n{}", ASCII, env!("CARGO_PKG_DESCRIPTION")).as_str())
.args_from_usage("-a, --no-audio 'Remove audio sources'")
.args_from_usage("-b, --base-url=[http://localhost/] 'Set custom base URL'")
.args_from_usage(
"-B, --blacklist-domains 'Treat list of specified domains as blacklist'",
)
.args_from_usage("-c, --no-css 'Remove CSS'")
.args_from_usage("-C, --cookies=[cookies.txt] 'Specify cookie file'")
.arg(
Arg::with_name("domains")
.short('d')
.long("domain")
.takes_value(true)
.value_name("example.com")
.action(ArgAction::Append)
.help("Specify domains to use for white/black-listing"),
)
.args_from_usage("-e, --ignore-errors 'Ignore network errors'")
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
.args_from_usage("-F, --no-fonts 'Removes fonts'")
.args_from_usage("-i, --no-images 'Removes images'")
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
.args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
.args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
.args_from_usage("-v, --no-video 'Removes video sources'")
.args_from_usage("-E, --encoding=[UTF-8] 'Enforce custom charset'")
.args_from_usage("-f, --no-frames 'Remove frames and iframes'")
.args_from_usage("-F, --no-fonts 'Remove fonts'")
.args_from_usage("-i, --no-images 'Remove images'")
.args_from_usage("-I, --isolate 'Cut off document from the Internet'")
.args_from_usage("-j, --no-js 'Remove JavaScript'")
.args_from_usage("-k, --insecure 'Allow invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Exclude timestamp and source information'")
.args_from_usage(
"-n, --unwrap-noscript 'Replace NOSCRIPT elements with their contents'",
)
.args_from_usage(
"-o, --output=[document.html] 'Write output to <file>, use - for STDOUT'",
)
.args_from_usage("-s, --silent 'Suppress verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'")
.args_from_usage("-v, --no-video 'Remove video sources'")
.arg(
Arg::with_name("target")
.required(true)
.takes_value(true)
.index(1)
.help("URL or file path, use - for stdin"),
.help("URL or file path, use - for STDIN"),
)
.get_matches();
let mut options: Options = Options::default();
@@ -77,9 +104,20 @@ impl Options {
.to_string();
options.no_audio = app.is_present("no-audio");
if let Some(base_url) = app.value_of("base-url") {
options.base_url = Some(str!(base_url));
options.base_url = Some(base_url.to_string());
}
options.blacklist_domains = app.is_present("blacklist-domains");
options.no_css = app.is_present("no-css");
if let Some(cookie_file) = app.value_of("cookies") {
options.cookie_file = Some(cookie_file.to_string());
}
if let Some(encoding) = app.value_of("encoding") {
options.encoding = Some(encoding.to_string());
}
if let Some(domains) = app.get_many::<String>("domains") {
let list_of_domains: Vec<String> = domains.map(|v| v.clone()).collect::<Vec<_>>();
options.domains = Some(list_of_domains);
}
options.ignore_errors = app.is_present("ignore-errors");
options.no_frames = app.is_present("no-frames");
options.no_fonts = app.is_present("no-fonts");
@@ -96,10 +134,11 @@ impl Options {
.parse::<u64>()
.unwrap();
if let Some(user_agent) = app.value_of("user-agent") {
options.user_agent = Some(str!(user_agent));
options.user_agent = Some(user_agent.to_string());
} else {
options.user_agent = Some(DEFAULT_USER_AGENT.to_string());
}
options.unwrap_noscript = app.is_present("unwrap-noscript");
options.no_video = app.is_present("no-video");
options.no_color =

View File

@@ -1,244 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::io::Write;
use std::process::Command;
use tempfile::NamedTempFile;
#[test]
fn local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let out = cmd
.arg("-M")
.arg(if cfg!(windows) {
"src\\tests\\data\\basic\\local-file.html"
} else {
"src/tests/data/basic/local-file.html"
})
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"\
<!DOCTYPE html><html lang=\"en\"><head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\" href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
</body></html>\n\
"
);
// STDERR should contain list of retrieved file URLs
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{cwd}/src/tests/data/basic/local-file.html\n \
{file}{cwd}/src/tests/data/basic/local-style.css\n \
{file}{cwd}/src/tests/data/basic/local-script.js\n\
",
file = file_url_protocol,
cwd = cwd_normalized
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn local_file_target_input_absolute_target_path() -> Result<(), Box<dyn std::error::Error>> {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String = str!(cwd.to_str().unwrap()).replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-jciI")
.arg(if cfg!(windows) {
format!(
"{cwd}\\src\\tests\\data\\basic\\local-file.html",
cwd = cwd.to_str().unwrap()
)
} else {
format!(
"{cwd}/src/tests/data/basic/local-file.html",
cwd = cwd.to_str().unwrap()
)
})
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img src=\"{empty_image}\" alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = empty_image!()
)
);
// STDERR should contain only the target file
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn local_file_url_target_input() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
let out = cmd
.arg("-M")
.arg("-cji")
.arg(if cfg!(windows) {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
})
.output()
.unwrap();
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img src=\"{empty_image}\" alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = empty_image!()
)
);
// STDERR should contain list of retrieved file URLs
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
if cfg!(windows) {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
}
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn embed_file_url_local_asset_within_style_attribute() -> Result<(), Box<dyn std::error::Error>>
{
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut file_svg = NamedTempFile::new()?;
writeln!(file_svg, "<svg version=\"1.1\" baseProfile=\"full\" width=\"300\" height=\"200\" xmlns=\"http://www.w3.org/2000/svg\">\
<rect width=\"100%\" height=\"100%\" fill=\"red\" />\
<circle cx=\"150\" cy=\"100\" r=\"80\" fill=\"green\" />\
<text x=\"150\" y=\"125\" font-size=\"60\" text-anchor=\"middle\" fill=\"white\">SVG</text>\
</svg>\n")?;
let mut file_html = NamedTempFile::new()?;
writeln!(
file_html,
"<div style='background-image: url(\"{file}{path}\")'></div>\n",
file = file_url_prefix,
path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
// STDOUT should contain HTML with date URL for background-image in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><div style=\"background-image: url('data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iMTAwJSIgaGVpZ2h0PSIxMDAlIiBmaWxsPSJyZWQiIC8+PGNpcmNsZSBjeD0iMTUwIiBjeT0iMTAwIiByPSI4MCIgZmlsbD0iZ3JlZW4iIC8+PHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+PC9zdmc+Cgo=')\"></div>\n\n</body></html>\n"
);
// STDERR should list temporary files that got retrieved
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n \
{file}{svg_path}\n\
",
file = file_url_prefix,
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
svg_path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}

View File

@@ -1,51 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
#[test]
fn change_encoding_to_utf_8() -> Result<(), Box<dyn std::error::Error>> {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String = str!(cwd.to_str().unwrap()).replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg(if cfg!(windows) {
"src\\tests\\data\\unusual_encodings\\iso-8859-1.html"
} else {
"src/tests/data/unusual_encodings/iso-8859-1.html"
})
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDOUT should contain newly added base URL
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\n <meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n </head>\n <body>\n © Some Company\n \n\n</body></html>\n"
);
// STDERR should contain only the target file
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"{file}{cwd}/src/tests/data/unusual_encodings/iso-8859-1.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
}

View File

@@ -1,53 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::css;
#[test]
fn empty_input_single_quotes() {
assert_eq!(css::enquote(str!(""), false), "''");
}
#[test]
fn empty_input_double_quotes() {
assert_eq!(css::enquote(str!(""), true), "\"\"");
}
#[test]
fn apostrophes_single_quotes() {
assert_eq!(
css::enquote(str!("It's a lovely day, don't you think?"), false),
"'It\\'s a lovely day, don\\'t you think?'"
);
}
#[test]
fn apostrophes_double_quotes() {
assert_eq!(
css::enquote(str!("It's a lovely day, don't you think?"), true),
"\"It's a lovely day, don't you think?\""
);
}
#[test]
fn feet_and_inches_single_quotes() {
assert_eq!(
css::enquote(str!("5'2\", 6'5\""), false),
"'5\\'2\", 6\\'5\"'"
);
}
#[test]
fn feet_and_inches_double_quotes() {
assert_eq!(
css::enquote(str!("5'2\", 6'5\""), true),
"\"5'2\\\", 6'5\\\"\""
);
}
}

View File

@@ -1,372 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use html5ever::serialize::{serialize, SerializeOpts};
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::html;
use crate::opts::Options;
#[test]
fn basic() {
let cache = &mut HashMap::new();
let html = "<div><P></P></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p></div></body></html>"
);
}
#[test]
fn ensure_no_recursive_iframe() {
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
);
}
#[test]
fn ensure_no_recursive_frame() {
let html = "<frameset><frame src=\"\"></frameset>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test]
fn no_css() {
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
<link rel=\"alternate stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<link rel=\"stylesheet\">\
<link rel=\"alternate stylesheet\">\
<style></style>\
</head>\
<body>\
<div></div>\
</body>\
</html>"
);
}
#[test]
fn no_images() {
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"<html>\
<head>\
<link rel=\"icon\">\
</head>\
<body>\
<div>\
<img src=\"{empty_image}\">\
</div>\
</body>\
</html>",
empty_image = empty_image!()
)
);
}
#[test]
fn no_body_background_images() {
let html =
"<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body></body></html>"
);
}
#[test]
fn no_frames() {
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_frames = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test]
fn no_iframes() {
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_frames = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
);
}
#[test]
fn no_js() {
let html = "<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><script></script>\
<script></script></div></body></html>"
);
}
#[test]
fn discards_integrity() {
let html = "<title>No integrity</title>\
<link integrity=\"sha384-...\" rel=\"something\"/>\
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
<body></body>\
</html>"
);
}
#[test]
fn removes_unwanted_meta_tags() {
let html = "<html>\
<head>\
<meta http-equiv=\"Refresh\" value=\"20\"/>\
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
</head>\
<body></body>\
</html>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<meta http-equiv=\"disabled by monolith (Refresh)\" value=\"20\">\
<meta http-equiv=\"disabled by monolith (Location)\" value=\"https://freebsd.org\">\
</head>\
<body></body>\
</html>"
);
}
#[test]
fn processes_noscript_tags() {
let html = "<html>\
<body>\
<noscript>\
<img src=\"image.png\" />\
</noscript>\
</body>\
</html>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"<html>\
<head>\
</head>\
<body>\
<noscript>\
<img src=\"{}\">\
</noscript>\
</body>\
</html>",
empty_image!(),
)
);
}
}

View File

@@ -1,31 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn encode_string_with_specific_media_type() {
let mime = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let data_url = url::data_to_data_url(mime, data.as_bytes(), "");
assert_eq!(
&data_url,
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
);
}
#[test]
fn encode_append_fragment() {
let data = "<svg></svg>\n";
let data_url = url::data_to_data_url("image/svg+xml", data.as_bytes(), "");
assert_eq!(&data_url, "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K");
}
}

View File

@@ -1,39 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn decode_unicode_characters() {
assert_eq!(
url::decode_url(str!(
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
)),
"検ヒム解塗ゃッ = サ"
);
}
#[test]
fn decode_file_url() {
assert_eq!(
url::decode_url(str!("file:///tmp/space%20here/test%231.html")),
"file:///tmp/space here/test#1.html"
);
}
#[test]
fn plus_sign() {
assert_eq!(
url::decode_url(str!(
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
)),
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
);
}
}

View File

@@ -1,41 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn remove_protocl_and_fragment() {
if cfg!(windows) {
assert_eq!(
url::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"),
"C:\\documents\\some-path\\some-file.svg"
);
} else {
assert_eq!(
url::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"),
"/tmp/some-path/some-file.svg"
);
}
}
#[test]
fn decodes_urls() {
if cfg!(windows) {
assert_eq!(
url::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
"C:\\Documents and Settings\\some-file.html"
);
} else {
assert_eq!(
url::file_url_to_fs_path("file:///home/user/My%20Documents"),
"/home/user/My Documents"
);
}
}
}

View File

@@ -1,48 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn data_url() {
assert_eq!(
url::get_url_fragment(
"data:image/svg+xml;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h#test"
),
"test"
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn https_empty() {
assert_eq!(url::get_url_fragment("https://kernel.org#"), "");
}
#[test]
fn no_fragment() {
assert_eq!(url::get_url_fragment("https://kernel.org"), "");
}
#[test]
fn dummy_data_url() {
assert_eq!(url::get_url_fragment("data:text/html,"), "");
}
}

View File

@@ -1,83 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn unix_file_url() {
assert!(url::is_file_url(
"file:///home/user/Websites/my-website/index.html"
));
}
#[test]
fn windows_file_url() {
assert!(url::is_file_url(
"file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png"
));
}
#[test]
fn unix_url_with_backslashes() {
assert!(url::is_file_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html"
));
}
#[test]
fn windows_file_url_with_backslashes() {
assert!(url::is_file_url(
"file:\\\\\\C:\\Documents%20and%20Settings\\user\\Websites\\my-website\\assets\\images\\logo.png"
));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::url;
#[test]
fn url_with_no_protocl() {
assert!(!url::is_file_url("//kernel.org"));
}
#[test]
fn dot_slash_filename() {
assert!(!url::is_file_url("./index.html"));
}
#[test]
fn just_filename() {
assert!(!url::is_file_url("some-local-page.htm"));
}
#[test]
fn https_ip_port_url() {
assert!(!url::is_file_url("https://1.2.3.4:80/www/index.html"));
}
#[test]
fn data_url() {
assert!(!url::is_file_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn just_word_file() {
assert!(!url::is_file_url("file"));
}
}

View File

@@ -1,12 +0,0 @@
mod clean_url;
mod data_to_data_url;
mod decode_url;
mod file_url_to_fs_path;
mod get_url_fragment;
mod is_data_url;
mod is_file_url;
mod is_http_url;
mod parse_data_url;
mod resolve_url;
mod url_has_protocol;
mod url_with_fragment;

View File

@@ -1,40 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::url;
#[test]
fn url_with_fragment_url() {
let url = "https://localhost.localdomain/path/";
let fragment = "test";
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(&assembled_url, "https://localhost.localdomain/path/#test");
}
#[test]
fn url_with_fragment_empty_url() {
let url = "https://localhost.localdomain/path/";
let fragment = "";
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(&assembled_url, "https://localhost.localdomain/path/");
}
#[test]
fn url_with_fragment_data_url() {
let url = "data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K";
let fragment = "fragment";
let assembled_url = url::url_with_fragment(url, fragment);
assert_eq!(
&assembled_url,
"data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K#fragment"
);
}
}

View File

@@ -1,31 +0,0 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::utils;
#[test]
fn zero() {
assert_eq!(utils::indent(0), "");
}
#[test]
fn one() {
assert_eq!(utils::indent(1), " ");
}
#[test]
fn two() {
assert_eq!(utils::indent(2), " ");
}
#[test]
fn three() {
assert_eq!(utils::indent(3), " ");
}
}

View File

@@ -1,3 +0,0 @@
mod detect_media_type;
mod indent;
mod retrieve_asset;

View File

@@ -1,167 +1,101 @@
use base64;
use url::{form_urlencoded, ParseError, Url};
use base64::prelude::*;
use percent_encoding::percent_decode_str;
use url::Url;
use crate::utils::detect_media_type;
use crate::utils::{detect_media_type, parse_content_type};
pub fn clean_url<T: AsRef<str>>(input: T) -> String {
let mut url = Url::parse(input.as_ref()).unwrap();
pub const EMPTY_IMAGE_DATA_URL: &'static str = "data:image/png;base64,\
iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII=";
// Clear fragment
pub fn clean_url(url: Url) -> Url {
let mut url = url.clone();
// Clear fragment (if any)
url.set_fragment(None);
// Get rid of stray question mark
if url.query() == Some("") {
url.set_query(None);
}
// Remove empty trailing ampersand(s)
let mut result: String = url.to_string();
while result.ends_with("&") {
result.pop();
}
result
url
}
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str) -> String {
pub fn create_data_url(media_type: &str, charset: &str, data: &[u8], final_asset_url: &Url) -> Url {
// TODO: move this block out of this function
let media_type: String = if media_type.is_empty() {
detect_media_type(data, &url)
detect_media_type(data, &final_asset_url)
} else {
media_type.to_string()
};
format!("data:{};base64,{}", media_type, base64::encode(data))
let mut data_url: Url = Url::parse("data:,").unwrap();
let c: String =
if !charset.trim().is_empty() && !charset.trim().eq_ignore_ascii_case("US-ASCII") {
format!(";charset={}", charset.trim())
} else {
"".to_string()
};
data_url.set_path(
format!(
"{}{};base64,{}",
media_type,
c,
BASE64_STANDARD.encode(data)
)
.as_str(),
);
data_url
}
pub fn decode_url(input: String) -> String {
let input: String = input.replace("+", "%2B");
form_urlencoded::parse(input.as_bytes())
.map(|(key, val)| {
[
key.to_string(),
if val.to_string().len() == 0 {
str!()
} else {
str!('=')
},
val.to_string(),
]
.concat()
})
.collect()
}
pub fn file_url_to_fs_path(url: &str) -> String {
if !is_file_url(url) {
return str!();
}
let cutoff_l = if cfg!(windows) { 8 } else { 7 };
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
let url_fragment = get_url_fragment(url);
if url_fragment != "" {
let max_len = fs_file_path.len() - 1 - url_fragment.len();
fs_file_path = fs_file_path[0..max_len].to_string();
}
if cfg!(windows) {
fs_file_path = fs_file_path.replace("/", "\\");
}
// File paths should not be %-encoded
decode_url(fs_file_path)
}
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
match Url::parse(url.as_ref()) {
Ok(parsed_url) => parsed_url.fragment().unwrap_or("").to_string(),
Err(_err) => str!(),
pub fn is_url_and_has_protocol(input: &str) -> bool {
match Url::parse(&input) {
Ok(parsed_url) => {
return parsed_url.scheme().len() > 0;
}
Err(_) => {
return false;
}
}
}
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "data"))
.unwrap_or(false)
}
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "file"))
.unwrap_or(false)
}
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
.unwrap_or(false)
}
pub fn parse_data_url<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
let path: String = parsed_url.path().to_string();
pub fn parse_data_url(url: &Url) -> (String, String, Vec<u8>) {
let path: String = url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len());
let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
// Split data URL into meta data and raw data
let content_type: String = path.chars().take(comma_loc).collect();
let data: String = path.chars().skip(comma_loc + 1).collect();
let text: String = decode_url(raw_data);
// Parse meta data
let (media_type, charset, is_base64) = parse_content_type(&content_type);
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut media_type: String = str!();
let mut encoding: &str = "";
let mut i: i8 = 0;
for item in &meta_data_items {
if i == 0 {
media_type = str!(item);
} else {
if item.eq_ignore_ascii_case("base64")
|| item.eq_ignore_ascii_case("utf8")
|| item.eq_ignore_ascii_case("charset=UTF-8")
{
encoding = item;
}
}
i = i + 1;
}
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
base64::decode(&text).unwrap_or(vec![])
// Parse raw data into vector of bytes
let text: String = percent_decode_str(&data).decode_utf8_lossy().to_string();
let blob: Vec<u8> = if is_base64 {
BASE64_STANDARD.decode(&text).unwrap_or(vec![])
} else {
text.as_bytes().to_vec()
};
(media_type, data)
(media_type, charset, blob)
}
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
let result = if is_http_url(to.as_ref()) {
to.as_ref().to_string()
} else {
Url::parse(from.as_ref())?
.join(to.as_ref())?
.as_ref()
.to_string()
};
Ok(result)
pub fn get_referer_url(url: Url) -> Url {
let mut url = url.clone();
// Spec: https://httpwg.org/specs/rfc9110.html#field.referer
// Must not include the fragment and userinfo components of the URI
url.set_fragment(None);
url.set_username(&"").unwrap();
url.set_password(None).unwrap();
url
}
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme().len() > 0))
.unwrap_or(false)
}
pub fn url_with_fragment(url: &str, fragment: &str) -> String {
let mut result = str!(&url);
if !fragment.is_empty() {
result += "#";
result += fragment;
pub fn resolve_url(from: &Url, to: &str) -> Url {
match Url::parse(&to) {
Ok(parsed_url) => parsed_url,
Err(_) => match from.join(to) {
Ok(joined) => joined,
Err(_) => Url::parse("data:,").unwrap(),
},
}
result
}

View File

@@ -1,16 +1,15 @@
use reqwest::blocking::Client;
use reqwest::header::CONTENT_TYPE;
use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE, COOKIE, REFERER};
use std::collections::HashMap;
use std::fs;
use std::path::Path;
use std::path::{Path, PathBuf};
use url::Url;
use crate::opts::Options;
use crate::url::{clean_url, file_url_to_fs_path, is_data_url, is_file_url, parse_data_url};
use crate::url::{clean_url, get_referer_url, parse_data_url};
const ANSI_COLOR_RED: &'static str = "\x1b[31m";
const ANSI_COLOR_RESET: &'static str = "\x1b[0m";
const INDENT: &'static str = " ";
const MAGIC: [[&[u8]; 2]; 18] = [
// Image
[b"GIF87a", b"image/gif"],
@@ -36,26 +35,117 @@ const MAGIC: [[&[u8]; 2]; 18] = [
];
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
"application/javascript",
"application/json",
"image/svg+xml",
// "text/css",
// "text/csv",
// "text/html",
// "text/javascript",
// "text/plain",
];
pub fn detect_media_type(data: &[u8], url: &str) -> String {
for item in MAGIC.iter() {
if data.starts_with(item[0]) {
return String::from_utf8(item[1].to_vec()).unwrap();
pub fn detect_media_type(data: &[u8], url: &Url) -> String {
// At first attempt to read file's header
for magic_item in MAGIC.iter() {
if data.starts_with(magic_item[0]) {
return String::from_utf8(magic_item[1].to_vec()).unwrap();
}
}
if url.to_lowercase().ends_with(".svg") {
return str!("image/svg+xml");
// If header didn't match any known magic signatures,
// try to guess media type from file name
let parts: Vec<&str> = url.path().split('/').collect();
detect_media_type_by_file_name(parts.last().unwrap())
}
pub fn detect_media_type_by_file_name(filename: &str) -> String {
let filename_lowercased: &str = &filename.to_lowercase();
let parts: Vec<&str> = filename_lowercased.split('.').collect();
let mime: &str = match parts.last() {
Some(v) => match *v {
"avi" => "video/avi",
"bmp" => "image/bmp",
"css" => "text/css",
"flac" => "audio/flac",
"gif" => "image/gif",
"htm" | "html" => "text/html",
"ico" => "image/x-icon",
"jpeg" | "jpg" => "image/jpeg",
"js" => "application/javascript",
"json" => "application/json",
"mp3" => "audio/mpeg",
"mp4" | "m4v" => "video/mp4",
"ogg" => "audio/ogg",
"ogv" => "video/ogg",
"pdf" => "application/pdf",
"png" => "image/png",
"svg" => "image/svg+xml",
"swf" => "application/x-shockwave-flash",
"tif" | "tiff" => "image/tiff",
"txt" => "text/plain",
"wav" => "audio/wav",
"webp" => "image/webp",
"woff" => "font/woff",
"woff2" => "font/woff2",
"xml" => "text/xml",
&_ => "",
},
None => "",
};
mime.to_string()
}
pub fn domain_is_within_domain(domain: &str, domain_to_match_against: &str) -> bool {
if domain_to_match_against.len() == 0 {
return false;
}
str!()
if domain_to_match_against == "." {
return true;
}
let domain_partials: Vec<&str> = domain.trim_end_matches(".").rsplit(".").collect();
let domain_to_match_against_partials: Vec<&str> = domain_to_match_against
.trim_end_matches(".")
.rsplit(".")
.collect();
let domain_to_match_against_starts_with_a_dot = domain_to_match_against.starts_with(".");
let mut i: usize = 0;
let l: usize = std::cmp::max(
domain_partials.len(),
domain_to_match_against_partials.len(),
);
let mut ok: bool = true;
while i < l {
// Exit and return false if went out of bounds of domain to match against, and it didn't start with a dot
if !domain_to_match_against_starts_with_a_dot
&& domain_to_match_against_partials.len() < i + 1
{
ok = false;
break;
}
let domain_partial = if domain_partials.len() < i + 1 {
""
} else {
domain_partials.get(i).unwrap()
};
let domain_to_match_against_partial = if domain_to_match_against_partials.len() < i + 1 {
""
} else {
domain_to_match_against_partials.get(i).unwrap()
};
let parts_match = domain_to_match_against_partial.eq_ignore_ascii_case(domain_partial);
if !parts_match && domain_to_match_against_partial.len() != 0 {
ok = false;
break;
}
i += 1;
}
ok
}
pub fn is_plaintext_media_type(media_type: &str) -> bool {
@@ -63,75 +153,164 @@ pub fn is_plaintext_media_type(media_type: &str) -> bool {
|| PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
}
pub fn indent(level: u32) -> String {
let mut result = str!();
let mut l: u32 = level;
while l > 0 {
result += INDENT;
l -= 1;
pub fn parse_content_type(content_type: &str) -> (String, String, bool) {
let mut media_type: String = "text/plain".to_string();
let mut charset: String = "US-ASCII".to_string();
let mut is_base64: bool = false;
// Parse meta data
let content_type_items: Vec<&str> = content_type.split(';').collect();
let mut i: i8 = 0;
for item in &content_type_items {
if i == 0 {
if item.trim().len() > 0 {
media_type = item.trim().to_string();
}
} else {
if item.trim().eq_ignore_ascii_case("base64") {
is_base64 = true;
} else if item.trim().starts_with("charset=") {
charset = item.trim().chars().skip(8).collect();
}
}
i += 1;
}
result
(media_type, charset, is_base64)
}
pub fn retrieve_asset(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &str,
url: &str,
parent_url: &Url,
url: &Url,
options: &Options,
depth: u32,
) -> Result<(Vec<u8>, String, String), reqwest::Error> {
if url.len() == 0 {
// Provoke error
client.get("").send()?;
}
if is_data_url(&url) {
let (media_type, data) = parse_data_url(url);
Ok((data, url.to_string(), media_type))
} else if is_file_url(&url) {
// Check if parent_url is also file:///
// (if not, then we don't embed the asset)
if !is_file_url(&parent_url) {
) -> Result<(Vec<u8>, Url, String, String), reqwest::Error> {
if url.scheme() == "data" {
let (media_type, charset, data) = parse_data_url(url);
Ok((data, url.clone(), media_type, charset))
} else if url.scheme() == "file" {
// Check if parent_url is also a file: URL (if not, then we don't embed the asset)
if parent_url.scheme() != "file" {
if !options.silent {
eprintln!(
"{}{} ({}){}",
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
"Security Error",
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
// Provoke error
client.get("").send()?;
}
let fs_file_path: String = file_url_to_fs_path(url);
let path = Path::new(&fs_file_path);
let path_buf: PathBuf = url.to_file_path().unwrap().clone();
let path: &Path = path_buf.as_path();
if path.exists() {
if path.is_dir() {
if !options.silent {
eprintln!(
"{}{} (is a directory){}",
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
// Provoke error
Err(client.get("").send().unwrap_err())
} else {
if !options.silent {
eprintln!("{}", &url);
}
let file_blob: Vec<u8> = fs::read(&path).expect("Unable to read file");
Ok((
file_blob.clone(),
url.clone(),
detect_media_type(&file_blob, url),
"".to_string(),
))
}
} else {
if !options.silent {
eprintln!("{}{}", indent(depth).as_str(), &url);
eprintln!(
"{}{} (not found){}",
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
Ok((fs::read(&fs_file_path).expect(""), url.to_string(), str!()))
} else {
// Provoke error
Err(client.get("").send().unwrap_err())
}
} else {
let cache_key: String = clean_url(&url);
let cache_key: String = clean_url(url.clone()).as_str().to_string();
if cache.contains_key(&cache_key) {
// URL is in cache, we get and return it
if !options.silent {
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
eprintln!("{} (from cache)", &url);
}
Ok((
cache.get(&cache_key).unwrap().to_vec(),
url.to_string(),
str!(),
url.clone(),
"".to_string(),
"".to_string(),
))
} else {
if let Some(domains) = &options.domains {
let domain_matches = domains
.iter()
.any(|d| domain_is_within_domain(url.host_str().unwrap(), &d.trim()));
if (options.blacklist_domains && domain_matches)
|| (!options.blacklist_domains && !domain_matches)
{
return Err(client.get("").send().unwrap_err());
}
}
// URL not in cache, we retrieve the file
match client.get(url).send() {
Ok(mut response) => {
if !options.ignore_errors && response.status() != 200 {
let mut headers = HeaderMap::new();
if options.cookies.len() > 0 {
for cookie in &options.cookies {
if !cookie.is_expired() && cookie.matches_url(url.as_str()) {
let cookie_header_value: String = cookie.name.clone() + "=" + &cookie.value;
headers
.insert(COOKIE, HeaderValue::from_str(&cookie_header_value).unwrap());
}
}
}
// Add referer header for page resource requests
if ["https", "http"].contains(&parent_url.scheme()) && parent_url != url {
headers.insert(
REFERER,
HeaderValue::from_str(get_referer_url(parent_url.clone()).as_str()).unwrap(),
);
}
match client.get(url.as_str()).headers(headers).send() {
Ok(response) => {
if !options.ignore_errors && response.status() != reqwest::StatusCode::OK {
if !options.silent {
eprintln!(
"{}{}{} ({}){}",
indent(depth).as_str(),
"{}{} ({}){}",
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
response.status(),
@@ -146,35 +325,72 @@ pub fn retrieve_asset(
return Err(client.get("").send().unwrap_err());
}
let res_url = response.url().to_string();
let response_url: Url = response.url().clone();
if !options.silent {
if url == res_url {
eprintln!("{}{}", indent(depth).as_str(), &url);
if url.as_str() == response_url.as_str() {
eprintln!("{}", &url);
} else {
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &res_url);
eprintln!("{} -> {}", &url, &response_url);
}
}
let new_cache_key: String = clean_url(&res_url);
let new_cache_key: String = clean_url(response_url.clone()).to_string();
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
// Attempt to obtain media type by reading the Content-Type header
let media_type = response
// Attempt to obtain media type and charset by reading Content-Type header
let content_type: &str = response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
.unwrap_or("");
let (media_type, charset, _is_base64) = parse_content_type(&content_type);
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
match response.bytes() {
Ok(b) => {
data = b.to_vec();
}
Err(error) => {
if !options.silent {
eprintln!(
"{}{}{}",
if options.no_color { "" } else { ANSI_COLOR_RED },
error,
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
}
}
// Add retrieved resource to cache
cache.insert(new_cache_key, data.clone());
Ok((data, res_url, media_type.to_string()))
// Return
Ok((data, response_url, media_type, charset))
}
Err(error) => {
if !options.silent {
eprintln!(
"{}{} ({}){}",
if options.no_color { "" } else { ANSI_COLOR_RED },
&url,
error,
if options.no_color {
""
} else {
ANSI_COLOR_RESET
},
);
}
Err(client.get("").send().unwrap_err())
}
Err(error) => Err(error),
}
}
}

View File

@@ -0,0 +1,11 @@
<style>
@charset 'UTF-8';
@import 'style.css';
@import url(style.css);
@import url('style.css');
</style>

View File

@@ -0,0 +1 @@
body{background-color:#000;color:#fff}

View File

@@ -3,8 +3,6 @@
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta http-equiv="Content-Security-Policy" content="default-src 'unsafe-inline' file:;" />
<title>Local HTML file</title>
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-IWaCTORHkRhOWzcZeILSVmV6V6gPTHgNem6o6rsFAyaKTieDFkeeMrWjtO0DuWrX3bqZY46CVTZXUu0mia0qXQ==" crossorigin="anonymous" />
<link href="style.css" rel="stylesheet" type="text/css" integrity="sha512-vWBzl4NE9oIg8NFOPAyOZbaam0UXWr6aDHPaY2kodSzAFl+mKoj/RMNc6C31NDqK4mE2i68IWxYWqWJPLCgPOw==" crossorigin="anonymous" />

View File

@@ -0,0 +1,5 @@
<svg version="1.1" baseProfile="full" width="300" height="200" xmlns="http://www.w3.org/2000/svg">
<rect width="100%" height="100%" fill="red" />
<circle cx="150" cy="100" r="80" fill="green" />
<text x="150" y="125" font-size="60" text-anchor="middle" fill="white">SVG</text>
</svg>

After

Width:  |  Height:  |  Size: 296 B

View File

@@ -0,0 +1 @@
<body><noscript><img src="image.svg" /></noscript></body>

View File

@@ -0,0 +1 @@
<body><noscript><h1>JS is not active</h1><noscript><img src="image.svg" /></noscript></noscript></body>

View File

@@ -0,0 +1 @@
<body><noscript><script>alert(1);</script><img src="image.svg" /></noscript></body>

View File

@@ -0,0 +1,5 @@
<svg version="1.1" baseProfile="full" width="300" height="200" xmlns="http://www.w3.org/2000/svg">
<rect width="100%" height="100%" fill="red" />
<circle cx="150" cy="100" r="80" fill="green" />
<text x="150" y="125" font-size="60" text-anchor="middle" fill="white">SVG</text>
</svg>

After

Width:  |  Height:  |  Size: 296 B

View File

@@ -0,0 +1 @@
<div style="background-image: url('image.svg')"></div>

View File

@@ -0,0 +1,9 @@
<html>
<head>
<meta http-equiv="content-type" content="text/html;charset=GB2312"/>
<title>近七成人减少线下需求 银行数字化转型提速--经济·科技--人民网 </title>
</head>
<body>
<h1>近七成人减少线下需求 银行数字化转型提速</h1>
</body>
</html>

View File

@@ -12,8 +12,8 @@ mod passing {
use std::process::Command;
#[test]
fn add_new_when_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn add_new_when_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-b")
@@ -22,52 +22,48 @@ mod passing {
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain newly added base URL
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<base href=\"http://localhost:8000/\"></base>\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
// Exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn keep_existing_when_none_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn keep_existing_when_none_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("data:text/html,<base href=\"http://localhost:8000/\" />Hello%2C%20World!")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain newly added base URL
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<base href=\"http://localhost:8000/\">\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
// Exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn override_existing_when_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn override_existing_when_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-b")
@@ -76,26 +72,24 @@ mod passing {
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain newly added base URL
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<base href=\"http://localhost/\">\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
// Exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_existing_when_empty_provided() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn set_existing_to_empty_when_empty_provided() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-b")
@@ -104,20 +98,18 @@ mod passing {
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain newly added base URL
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<base href=\"\">\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
// Exit code should be 0
out.assert().code(0);
Ok(())
}
}

View File

@@ -9,32 +9,46 @@
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::io::Write;
use std::fs;
use std::path::Path;
use std::process::{Command, Stdio};
use tempfile::NamedTempFile;
use url::Url;
#[test]
fn print_version() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("-V").output().unwrap();
// STDOUT should contain program name and version
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
);
fn print_help_information() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("-h").output().unwrap();
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// The exit code should be 0
// STDOUT should contain program name, version, and usage information
// TODO
// Exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn stdin_target_input() -> Result<(), Box<dyn std::error::Error>> {
fn print_version() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("-V").output().unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain program name and version
assert_eq!(
String::from_utf8_lossy(&out.stdout),
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn stdin_target_input() {
let mut echo = Command::new("echo")
.arg("Hello from STDIN")
.stdout(Stdio::piped())
@@ -43,70 +57,57 @@ mod passing {
let echo_out = echo.stdout.take().unwrap();
echo.wait().unwrap();
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
cmd.stdin(echo_out);
let out = cmd.arg("-M").arg("-").output().unwrap();
// STDOUT should contain HTML from STDIN
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML created out of STDIN
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
String::from_utf8_lossy(&out.stdout),
"<html><head></head><body>Hello from STDIN\n</body></html>\n"
);
Ok(())
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn css_import_string() -> Result<(), Box<dyn std::error::Error>> {
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut file_css = NamedTempFile::new()?;
writeln!(file_css, "body{{background-color:#000;color:#fff}}")?;
let mut file_html = NamedTempFile::new()?;
writeln!(
file_html,
"\
<style>\n\
@charset 'UTF-8';\n\
\n\
@import '{file}{css_path}';\n\
\n\
@import url({file}{css_path});\n\
\n\
@import url('{file}{css_path}')\n\
</style>\n\
",
file = file_url_prefix,
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
fn css_import_string() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/css/index.html");
let path_css: &Path = Path::new("tests/_data_/css/style.css");
// STDOUT should contain embedded CSS url()'s
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><style>\n@charset 'UTF-8';\n\n@import 'data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K';\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K');\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K')\n</style>\n\n</head><body></body></html>\n"
);
assert!(path_html.is_file());
assert!(path_css.is_file());
// STDERR should list temporary files that got retrieved
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
// STDERR should list files that got retrieved
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file}{html_path}\n \
{file}{css_path}\n \
{file}{css_path}\n \
{file}{css_path}\n\
{file_url_html}\n\
{file_url_css}\n\
{file_url_css}\n\
{file_url_css}\n\
",
file = file_url_prefix,
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_css = Url::from_file_path(fs::canonicalize(&path_css).unwrap()).unwrap(),
)
);
// The exit code should be 0
out.assert().code(0);
// STDOUT should contain embedded CSS url()'s
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head><style>\n\n @charset \"UTF-8\";\n\n @import \"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\";\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n</style>\n</head><body></body></html>\n"
);
Ok(())
// Exit code should be 0
out.assert().code(0);
}
}
@@ -124,22 +125,20 @@ mod failing {
use std::process::Command;
#[test]
fn bad_input_empty_target() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn bad_input_empty_target() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("").output().unwrap();
// STDOUT should be empty
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
String::from_utf8_lossy(&out.stderr),
"No target specified\n"
);
// The exit code should be 1
out.assert().code(1);
// STDOUT should be empty
assert_eq!(String::from_utf8_lossy(&out.stdout), "");
Ok(())
// Exit code should be 1
out.assert().code(1);
}
}

View File

@@ -11,29 +11,11 @@ mod passing {
use std::env;
use std::process::Command;
#[test]
fn bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
// STDOUT should contain HTML
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"Unsupported data URL media type\n"
);
// The exit code should be 1
out.assert().code(1);
Ok(())
}
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test]
fn isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn isolate_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-I")
@@ -41,26 +23,24 @@ mod passing {
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain isolated HTML
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:;\"></meta>\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
// Exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn remove_css_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-c")
@@ -68,27 +48,25 @@ mod passing {
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no CSS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
<style></style>\
</head><body>Hello</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
// Exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_fonts_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn remove_fonts_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-F")
@@ -96,27 +74,25 @@ mod passing {
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no web fonts
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"font-src 'none';\"></meta>\
<style></style>\
</head><body>Hi</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
// Exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn remove_frames_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-f")
@@ -124,26 +100,24 @@ mod passing {
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no iframes
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
String::from_utf8_lossy(&out.stdout),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
// Exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn remove_images_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-i")
@@ -151,9 +125,12 @@ mod passing {
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no images
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
String::from_utf8_lossy(&out.stdout),
format!(
"<html>\
<head>\
@@ -164,22 +141,17 @@ mod passing {
Hi\
</body>\
</html>\n",
empty_image = empty_image!()
empty_image = EMPTY_IMAGE_DATA_URL,
)
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
// Exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn remove_js_from_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-j")
@@ -187,9 +159,12 @@ mod passing {
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML with no JS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
String::from_utf8_lossy(&out.stdout),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
@@ -198,37 +173,58 @@ mod passing {
</html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
// Exit code should be 0
out.assert().code(0);
}
}
Ok(())
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use assert_cmd::prelude::*;
use std::env;
use std::process::Command;
#[test]
fn bad_input_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain text
assert_eq!(String::from_utf8_lossy(&out.stdout), "Hello, World!\n");
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn security_disallow_local_assets_within_data_url_targets(
) -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
fn security_disallow_local_assets_within_data_url_targets() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/basic/local-script.js\"%3E%3C/script%3E")
.output()
.unwrap();
// STDOUT should contain HTML with no JS in it
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML without contents of local JS file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><script></script></head><body></body></html>\n"
String::from_utf8_lossy(&out.stdout),
"<html><head><script src=\"data:application/javascript;base64,\"></script></head><body></body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
// Exit code should be 0
out.assert().code(0);
Ok(())
}
}

271
tests/cli/local_files.rs Normal file
View File

@@ -0,0 +1,271 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::fs;
use std::path::{Path, MAIN_SEPARATOR};
use std::process::Command;
use url::Url;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test]
fn local_file_target_input_relative_target_path() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let cwd_normalized: String = env::current_dir()
.unwrap()
.to_str()
.unwrap()
.replace("\\", "/");
let out = cmd
.arg("-M")
.arg(format!(
"tests{s}_data_{s}basic{s}local-file.html",
s = MAIN_SEPARATOR
))
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDERR should contain list of retrieved file URLs, two missing
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file}{cwd}/tests/_data_/basic/local-file.html\n\
{file}{cwd}/tests/_data_/basic/local-style.css\n\
{file}{cwd}/tests/_data_/basic/local-style-does-not-exist.css (not found)\n\
{file}{cwd}/tests/_data_/basic/monolith.png (not found)\n\
{file}{cwd}/tests/_data_/basic/local-script.js\n\
",
file = file_url_protocol,
cwd = cwd_normalized
)
);
// STDOUT should contain HTML from the local file
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"\
<!DOCTYPE html><html lang=\"en\"><head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\" rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
</body></html>\n\
"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn local_file_target_input_absolute_target_path() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/basic/local-file.html");
let out = cmd
.arg("-M")
.arg("-Ijci")
.arg(path_html.as_os_str())
.output()
.unwrap();
// STDERR should contain only the target file
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"{file_url_html}\n",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML from the local file
assert_eq!(
String::from_utf8_lossy(&out.stdout),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img src=\"{empty_image}\" alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = EMPTY_IMAGE_DATA_URL
)
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn local_file_url_target_input() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let cwd_normalized: String = env::current_dir()
.unwrap()
.to_str()
.unwrap()
.replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
let out = cmd
.arg("-M")
.arg("-cji")
.arg(format!(
"{file}{cwd}/tests/_data_/basic/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
))
.output()
.unwrap();
// STDERR should contain list of retrieved file URLs
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"{file}{cwd}/tests/_data_/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// STDOUT should contain HTML from the local file
assert_eq!(
String::from_utf8_lossy(&out.stdout),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img src=\"{empty_image}\" alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = EMPTY_IMAGE_DATA_URL
)
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn embed_file_url_local_asset_within_style_attribute() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/svg/index.html");
let path_svg: &Path = Path::new("tests/_data_/svg/image.svg");
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
// STDERR should list files that got retrieved
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n\
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML with date URL for background-image in it
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head></head><body><div style=\"background-image: url(&quot;data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=&quot;)\"></div>\n</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn discard_integrity_for_local_files() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let cwd_normalized: String = env::current_dir()
.unwrap()
.to_str()
.unwrap()
.replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
let out = cmd
.arg("-M")
.arg("-i")
.arg(if cfg!(windows) {
format!(
"{file}{cwd}/tests/_data_/integrity/index.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/tests/_data_/integrity/index.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
})
.output()
.unwrap();
// STDERR should contain list of retrieved file URLs
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file}{cwd}/tests/_data_/integrity/index.html\n\
{file}{cwd}/tests/_data_/integrity/style.css\n\
{file}{cwd}/tests/_data_/integrity/style.css\n\
{file}{cwd}/tests/_data_/integrity/script.js\n\
{file}{cwd}/tests/_data_/integrity/script.js\n\
",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// STDOUT should contain HTML from the local file; integrity attributes should be missing
assert_eq!(
String::from_utf8_lossy(&out.stdout),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\n \
<title>Local HTML file</title>\n \
<link href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNGRkY7Cn0K\" rel=\"stylesheet\" type=\"text/css\" crossorigin=\"anonymous\">\n \
<link href=\"style.css\" rel=\"stylesheet\" type=\"text/css\" crossorigin=\"anonymous\">\n</head>\n\n<body>\n \
<p>This page should have black background and white foreground, but only when served via http: (not via file:)</p>\n \
<script src=\"data:application/javascript;base64,ZnVuY3Rpb24gbm9vcCgpIHsKICAgIGNvbnNvbGUubG9nKCJtb25vbGl0aCIpOwp9Cg==\"></script>\n \
<script src=\"script.js\"></script>\n\n\n\n\
</body></html>\n\
"
)
);
// Exit code should be 0
out.assert().code(0);
}
}

View File

@@ -2,4 +2,5 @@ mod base_url;
mod basic;
mod data_url;
mod local_files;
mod noscript;
mod unusual_encodings;

170
tests/cli/noscript.rs Normal file
View File

@@ -0,0 +1,170 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use std::env;
use std::fs;
use std::path::Path;
use std::process::Command;
use url::Url;
#[test]
fn parse_noscript_contents() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/noscript/index.html");
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n\
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML with no CSS
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head></head><body><noscript><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"></noscript>\n</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn unwrap_noscript_contents() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/noscript/index.html");
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n\
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML with no CSS
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head></head><body><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript-->\n</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn unwrap_noscript_contents_nested() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/noscript/nested.html");
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n\
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML with no CSS
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head></head><body><!--noscript--><h1>JS is not active</h1><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript--><!--/noscript-->\n</body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn unwrap_noscript_contents_with_script() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let path_html: &Path = Path::new("tests/_data_/noscript/script.html");
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
// STDERR should contain target HTML and embedded SVG files
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"\
{file_url_html}\n\
{file_url_svg}\n\
",
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
)
);
// STDOUT should contain HTML with no CSS
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html>\
<head></head>\
<body>\
<!--noscript-->\
<img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\">\
<!--/noscript-->\n\
</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn unwrap_noscript_contents_attr_data_url() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-n")
.arg("data:text/html,<noscript class=\"\">test</noscript>")
.output()
.unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain unwrapped contents of NOSCRIPT element
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html><head><!--noscript class=\"\"-->test<!--/noscript--></head><body></body></html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
}

View File

@@ -0,0 +1,239 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use assert_cmd::prelude::*;
use encoding_rs::Encoding;
use std::env;
use std::path::MAIN_SEPARATOR;
use std::process::{Command, Stdio};
#[test]
fn properly_save_document_with_gb2312() {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg(format!(
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
s = MAIN_SEPARATOR
))
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDERR should contain only the target file
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"{file}{cwd}/tests/_data_/unusual_encodings/gb2312.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// STDOUT should contain original document without any modifications
let s: String;
if let Some(encoding) = Encoding::for_label(b"gb2312") {
let (string, _, _) = encoding.decode(&out.stdout);
s = string.to_string();
} else {
s = String::from_utf8_lossy(&out.stdout).to_string();
}
assert_eq!(
s,
"<html>\
<head>\n \
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\">\n \
<title>近七成人减少线下需求\u{3000}银行数字化转型提速--经济·科技--人民网 </title>\n\
</head>\n\
<body>\n \
<h1>近七成人减少线下需求\u{3000}银行数字化转型提速</h1>\n\n\n\
</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn properly_save_document_with_gb2312_from_stdin() {
let mut echo = Command::new("cat")
.arg(format!(
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
s = MAIN_SEPARATOR
))
.stdout(Stdio::piped())
.spawn()
.unwrap();
let echo_out = echo.stdout.take().unwrap();
echo.wait().unwrap();
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
cmd.stdin(echo_out);
let out = cmd.arg("-M").arg("-").output().unwrap();
// STDERR should be empty
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
// STDOUT should contain HTML created out of STDIN
let s: String;
if let Some(encoding) = Encoding::for_label(b"gb2312") {
let (string, _, _) = encoding.decode(&out.stdout);
s = string.to_string();
} else {
s = String::from_utf8_lossy(&out.stdout).to_string();
}
assert_eq!(
s,
"<html>\
<head>\n \
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\">\n \
<title>近七成人减少线下需求\u{3000}银行数字化转型提速--经济·科技--人民网 </title>\n\
</head>\n\
<body>\n \
<h1>近七成人减少线下需求\u{3000}银行数字化转型提速</h1>\n\n\n\
</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn properly_save_document_with_gb2312_custom_charset() {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-E")
.arg("utf8")
.arg(format!(
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
s = MAIN_SEPARATOR
))
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDERR should contain only the target file
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"{file}{cwd}/tests/_data_/unusual_encodings/gb2312.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// STDOUT should contain original document without any modifications
assert_eq!(
String::from_utf8_lossy(&out.stdout).to_string(),
"<html>\
<head>\n \
<meta http-equiv=\"content-type\" content=\"text/html;charset=utf8\">\n \
<title>近七成人减少线下需求\u{3000}银行数字化转型提速--经济·科技--人民网 </title>\n\
</head>\n\
<body>\n \
<h1>近七成人减少线下需求\u{3000}银行数字化转型提速</h1>\n\n\n\
</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
#[test]
fn properly_save_document_with_gb2312_custom_charset_bad() {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg("-E")
.arg("utf0")
.arg(format!(
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
s = MAIN_SEPARATOR
))
.output()
.unwrap();
// STDERR should contain error message
assert_eq!(
String::from_utf8_lossy(&out.stderr),
"Unknown encoding: utf0\n"
);
// STDOUT should be empty
assert_eq!(String::from_utf8_lossy(&out.stdout).to_string(), "");
// Exit code should be 1
out.assert().code(1);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use assert_cmd::prelude::*;
use std::env;
use std::path::MAIN_SEPARATOR;
use std::process::Command;
#[test]
fn change_iso88591_to_utf8_to_properly_display_html_entities() {
let cwd = env::current_dir().unwrap();
let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
let out = cmd
.arg("-M")
.arg(format!(
"tests{s}_data_{s}unusual_encodings{s}iso-8859-1.html",
s = MAIN_SEPARATOR
))
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDERR should contain only the target file
assert_eq!(
String::from_utf8_lossy(&out.stderr),
format!(
"{file}{cwd}/tests/_data_/unusual_encodings/iso-8859-1.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// STDOUT should contain original document but with UTF-8 charset
assert_eq!(
String::from_utf8_lossy(&out.stdout),
"<html>\
<head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">\n \
</head>\n \
<body>\n \
<20> Some Company\n \
\n\n</body>\
</html>\n"
);
// Exit code should be 0
out.assert().code(0);
}
}

View File

@@ -7,21 +7,36 @@
#[cfg(test)]
mod passing {
use crate::url;
use monolith::cookies;
#[test]
fn http_url() {
assert!(url::is_http_url("http://kernel.org"));
fn never_expires() {
let cookie = cookies::Cookie {
domain: String::from("127.0.0.1"),
include_subdomains: true,
path: String::from("/"),
https_only: false,
expires: 0,
name: String::from(""),
value: String::from(""),
};
assert!(!cookie.is_expired());
}
#[test]
fn https_url() {
assert!(url::is_http_url("https://www.rust-lang.org/"));
}
fn expires_long_from_now() {
let cookie = cookies::Cookie {
domain: String::from("127.0.0.1"),
include_subdomains: true,
path: String::from("/"),
https_only: false,
expires: 9999999999,
name: String::from(""),
value: String::from(""),
};
#[test]
fn http_url_with_backslashes() {
assert!(url::is_http_url("http:\\\\freebsd.org\\"));
assert!(!cookie.is_expired());
}
}
@@ -34,32 +49,20 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::url;
use monolith::cookies;
#[test]
fn url_with_no_protocol() {
assert!(!url::is_http_url("//kernel.org"));
}
fn expired() {
let cookie = cookies::Cookie {
domain: String::from("127.0.0.1"),
include_subdomains: true,
path: String::from("/"),
https_only: false,
expires: 1,
name: String::from(""),
value: String::from(""),
};
#[test]
fn dot_slash_filename() {
assert!(!url::is_http_url("./index.html"));
}
#[test]
fn just_filename() {
assert!(!url::is_http_url("some-local-page.htm"));
}
#[test]
fn https_ip_port_url() {
assert!(!url::is_http_url("ftp://1.2.3.4/www/index.html"));
}
#[test]
fn data_url() {
assert!(!url::is_http_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
assert!(cookie.is_expired());
}
}

View File

@@ -0,0 +1,107 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::cookies;
#[test]
fn secure_url() {
let cookie = cookies::Cookie {
domain: String::from("127.0.0.1"),
include_subdomains: true,
path: String::from("/"),
https_only: true,
expires: 0,
name: String::from(""),
value: String::from(""),
};
assert!(cookie.matches_url("https://127.0.0.1/something"));
}
#[test]
fn non_secure_url() {
let cookie = cookies::Cookie {
domain: String::from("127.0.0.1"),
include_subdomains: true,
path: String::from("/"),
https_only: false,
expires: 0,
name: String::from(""),
value: String::from(""),
};
assert!(cookie.matches_url("http://127.0.0.1/something"));
}
#[test]
fn subdomain() {
let cookie = cookies::Cookie {
domain: String::from(".somethingsomething.com"),
include_subdomains: true,
path: String::from("/"),
https_only: true,
expires: 0,
name: String::from(""),
value: String::from(""),
};
assert!(cookie.matches_url("https://cdn.somethingsomething.com/something"));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use monolith::cookies;
#[test]
fn empty_url() {
let cookie = cookies::Cookie {
domain: String::from("127.0.0.1"),
include_subdomains: true,
path: String::from("/"),
https_only: false,
expires: 0,
name: String::from(""),
value: String::from(""),
};
assert!(!cookie.matches_url(""));
}
#[test]
fn wrong_hostname() {
let cookie = cookies::Cookie {
domain: String::from("127.0.0.1"),
include_subdomains: true,
path: String::from("/"),
https_only: false,
expires: 0,
name: String::from(""),
value: String::from(""),
};
assert!(!cookie.matches_url("http://0.0.0.0/"));
}
#[test]
fn wrong_path() {
let cookie = cookies::Cookie {
domain: String::from("127.0.0.1"),
include_subdomains: false,
path: String::from("/"),
https_only: false,
expires: 0,
name: String::from(""),
value: String::from(""),
};
assert!(!cookie.matches_url("http://0.0.0.0/path"));
}
}

View File

@@ -0,0 +1,2 @@
mod is_expired;
mod matches_url;

2
tests/cookies/mod.rs Normal file
View File

@@ -0,0 +1,2 @@
mod cookie;
mod parse_cookie_file_contents;

View File

@@ -0,0 +1,87 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::cookies;
#[test]
fn parse_file() {
let file_contents =
"# Netscape HTTP Cookie File\n127.0.0.1\tFALSE\t/\tFALSE\t0\tUSER_TOKEN\tin";
let result = cookies::parse_cookie_file_contents(&file_contents).unwrap();
assert_eq!(result.len(), 1);
assert_eq!(result[0].domain, "127.0.0.1");
assert_eq!(result[0].include_subdomains, false);
assert_eq!(result[0].path, "/");
assert_eq!(result[0].https_only, false);
assert_eq!(result[0].expires, 0);
assert_eq!(result[0].name, "USER_TOKEN");
assert_eq!(result[0].value, "in");
}
#[test]
fn parse_multiline_file() {
let file_contents = "# HTTP Cookie File\n127.0.0.1\tFALSE\t/\tFALSE\t0\tUSER_TOKEN\tin\n127.0.0.1\tTRUE\t/\tTRUE\t9\tUSER_TOKEN\tout\n\n";
let result = cookies::parse_cookie_file_contents(&file_contents).unwrap();
assert_eq!(result.len(), 2);
assert_eq!(result[0].domain, "127.0.0.1");
assert_eq!(result[0].include_subdomains, false);
assert_eq!(result[0].path, "/");
assert_eq!(result[0].https_only, false);
assert_eq!(result[0].expires, 0);
assert_eq!(result[0].name, "USER_TOKEN");
assert_eq!(result[0].value, "in");
assert_eq!(result[1].domain, "127.0.0.1");
assert_eq!(result[1].include_subdomains, true);
assert_eq!(result[1].path, "/");
assert_eq!(result[1].https_only, true);
assert_eq!(result[1].expires, 9);
assert_eq!(result[1].name, "USER_TOKEN");
assert_eq!(result[1].value, "out");
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use monolith::cookies;
#[test]
fn empty() {
let file_contents = "";
let result = cookies::parse_cookie_file_contents(&file_contents).unwrap();
assert_eq!(result.len(), 0);
}
#[test]
fn no_header() {
let file_contents = "127.0.0.1 FALSE / FALSE 0 USER_TOKEN in";
match cookies::parse_cookie_file_contents(&file_contents) {
Ok(_result) => {
assert!(false);
}
Err(_e) => {
assert!(true);
}
}
}
#[test]
fn spaces_instead_of_tabs() {
let file_contents =
"# HTTP Cookie File\n127.0.0.1 FALSE / FALSE 0 USER_TOKEN in";
let result = cookies::parse_cookie_file_contents(&file_contents).unwrap();
assert_eq!(result.len(), 0);
}
}

View File

@@ -8,35 +8,35 @@
#[cfg(test)]
mod passing {
use reqwest::blocking::Client;
use reqwest::Url;
use std::collections::HashMap;
use crate::css;
use crate::opts::Options;
use monolith::css;
use monolith::opts::Options;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test]
fn empty_input() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("data:,").unwrap();
let options = Options::default();
assert_eq!(css::embed_css(cache, &client, "", "", &options, 0), "");
assert_eq!(
css::embed_css(cache, &client, &document_url, "", &options),
""
);
}
#[test]
fn trim_if_empty() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let options = Options::default();
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
"\t \t ",
&options,
0,
),
css::embed_css(cache, &client, &document_url, "\t \t ", &options),
""
);
}
@@ -45,6 +45,7 @@ mod passing {
fn style_exclude_unquoted_images() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
@@ -58,23 +59,16 @@ mod passing {
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&STYLE,
&options,
0,
),
css::embed_css(cache, &client, &document_url, &STYLE, &options),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
list-style: url('{empty_image}');\
background-image: url(\"{empty_image}\"); \
list-style: url(\"{empty_image}\");\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)",
empty_image = empty_image!()
empty_image = EMPTY_IMAGE_DATA_URL
)
);
}
@@ -83,6 +77,7 @@ mod passing {
fn style_exclude_single_quoted_images() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("data:,").unwrap();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
@@ -96,16 +91,16 @@ mod passing {
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(cache, &client, "", &STYLE, &options, 0),
css::embed_css(cache, &client, &document_url, &STYLE, &options),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
list-style: url('{empty_image}');\
background-image: url(\"{empty_image}\"); \
list-style: url(\"{empty_image}\");\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)",
empty_image = empty_image!()
empty_image = EMPTY_IMAGE_DATA_URL
)
);
}
@@ -114,19 +109,20 @@ mod passing {
fn style_block() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("file:///").unwrap();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
#id.class-name:not(:nth-child(3n+0)) {\n \
// border: none;\n \
background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=');\n\
background-image: url(\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\");\n\
}\n\
\n\
html > body {}";
assert_eq!(
css::embed_css(cache, &client, "file:///", &CSS, &options, 0),
css::embed_css(cache, &client, &document_url, &CSS, &options),
CSS
);
}
@@ -135,6 +131,7 @@ mod passing {
fn attribute_selectors() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.silent = true;
@@ -143,38 +140,42 @@ mod passing {
/* Attribute exists */
}
[data-value='foo'] {
[data-value=\"foo\"] {
/* Attribute has this exact value */
}
[data-value*='foo'] {
[data-value*=\"foo\"] {
/* Attribute value contains this value somewhere in it */
}
[data-value~='foo'] {
[data-value~=\"foo\"] {
/* Attribute has this value in a space-separated list somewhere */
}
[data-value^='foo'] {
[data-value^=\"foo\"] {
/* Attribute value starts with this */
}
[data-value|='foo'] {
[data-value|=\"foo\"] {
/* Attribute value starts with this in a dash-separated list */
}
[data-value$='foo'] {
[data-value$=\"foo\"] {
/* Attribute value ends with this */
}
";
assert_eq!(css::embed_css(cache, &client, "", &CSS, &options, 0), CSS);
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options),
CSS
);
}
#[test]
fn import_string() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.silent = true;
@@ -187,20 +188,13 @@ mod passing {
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
css::embed_css(cache, &client, &document_url, &CSS, &options),
"\
@charset 'UTF-8';\n\
@charset \"UTF-8\";\n\
\n\
@import 'data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9';\n\
@import \"data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9\";\n\
\n\
@import url('data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==')\n\
@import url(\"data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==\")\n\
"
);
}
@@ -209,6 +203,7 @@ mod passing {
fn hash_urls() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.silent = true;
@@ -223,14 +218,7 @@ mod passing {
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
css::embed_css(cache, &client, &document_url, &CSS, &options),
CSS
);
}
@@ -239,6 +227,7 @@ mod passing {
fn transform_percentages_and_degrees() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.silent = true;
@@ -251,14 +240,7 @@ mod passing {
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
css::embed_css(cache, &client, &document_url, &CSS, &options),
CSS
);
}
@@ -267,6 +249,7 @@ mod passing {
fn unusual_indents() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.silent = true;
@@ -281,14 +264,7 @@ mod passing {
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
css::embed_css(cache, &client, &document_url, &CSS, &options),
CSS
);
}
@@ -297,6 +273,7 @@ mod passing {
fn exclude_fonts() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("https://doesntmatter.local/").unwrap();
let mut options = Options::default();
options.no_fonts = true;
options.silent = true;
@@ -320,30 +297,74 @@ mod passing {
font-family: 'My Font' Verdana\n\
}\n\
";
const CSS_OUT: &str = " \
\n\
\n\
#identifier {\n \
font-family: 'My Font' Arial\n\
font-family: \"My Font\" Arial\n\
}\n\
\n \
\n\
\n\
div {\n \
font-family: 'My Font' Verdana\n\
font-family: \"My Font\" Verdana\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
&options,
0,
),
css::embed_css(cache, &client, &document_url, &CSS, &options),
CSS_OUT
);
}
#[test]
fn content() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("data:,").unwrap();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
#language a[href=\"#translations\"]:before {\n\
content: url(data:,) \"\\A\";\n\
white-space: pre }\n\
";
const CSS_OUT: &str = "\
#language a[href=\"#translations\"]:before {\n\
content: url(\"data:text/plain;base64,\") \"\\a \";\n\
white-space: pre }\n\
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options),
CSS_OUT
);
}
#[test]
fn ie_css_hack() {
let cache = &mut HashMap::new();
let client = Client::new();
let document_url: Url = Url::parse("data:,").unwrap();
let mut options = Options::default();
options.silent = true;
const CSS: &str = "\
div#p>svg>foreignObject>section:not(\\9) {\n\
width: 300px;\n\
width: 500px\\9;\n\
}\n\
";
const CSS_OUT: &str = "\
div#p>svg>foreignObject>section:not(\\9) {\n\
width: 300px;\n\
width: 500px\t;\n\
}\n\
";
assert_eq!(
css::embed_css(cache, &client, &document_url, &CSS, &options),
CSS_OUT
);
}

View File

@@ -7,20 +7,20 @@
#[cfg(test)]
mod passing {
use crate::css;
use monolith::css;
#[test]
fn backrgound() {
fn background() {
assert!(css::is_image_url_prop("background"));
}
#[test]
fn backrgound_image() {
fn background_image() {
assert!(css::is_image_url_prop("background-image"));
}
#[test]
fn backrgound_image_uppercase() {
fn background_image_uppercase() {
assert!(css::is_image_url_prop("BACKGROUND-IMAGE"));
}
@@ -64,7 +64,7 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::css;
use monolith::css;
#[test]
fn empty() {

View File

@@ -1,3 +1,2 @@
mod embed_css;
mod enquote;
mod is_image_url_prop;

View File

@@ -8,18 +8,24 @@
#[cfg(test)]
mod passing {
use html5ever::serialize::{serialize, SerializeOpts};
use markup5ever_rcdom::SerializableHandle;
use crate::html;
use monolith::html;
#[test]
fn basic() {
let html = "<div>text</div>";
let mut dom = html::html_to_dom(&html);
let mut dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string());
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),

View File

@@ -7,7 +7,7 @@
#[cfg(test)]
mod passing {
use crate::html;
use monolith::html;
#[test]
fn empty_input_sha256() {
@@ -51,7 +51,7 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::html;
use monolith::html;
#[test]
fn empty_hash() {

View File

@@ -7,8 +7,8 @@
#[cfg(test)]
mod passing {
use crate::html;
use crate::opts::Options;
use monolith::html;
use monolith::opts::Options;
#[test]
fn isolated() {
@@ -16,7 +16,10 @@ mod passing {
options.isolate = true;
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "default-src 'unsafe-inline' data:;");
assert_eq!(
csp_content,
"default-src 'unsafe-eval' 'unsafe-inline' data:;"
);
}
#[test]
@@ -75,6 +78,6 @@ mod passing {
options.no_images = true;
let csp_content = html::compose_csp(&options);
assert_eq!(csp_content, "default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;");
assert_eq!(csp_content, "default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;");
}
}

View File

@@ -8,14 +8,15 @@
#[cfg(test)]
mod passing {
use chrono::prelude::*;
use reqwest::Url;
use crate::html;
use monolith::html;
#[test]
fn http_url() {
let url = "http://192.168.1.1/";
let url: Url = Url::parse("http://192.168.1.1/").unwrap();
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::create_metadata_tag(url);
let metadata_comment: String = html::create_metadata_tag(&url);
assert_eq!(
metadata_comment,
@@ -31,9 +32,9 @@ mod passing {
#[test]
fn file_url() {
let url = "file:///home/monolith/index.html";
let url: Url = Url::parse("file:///home/monolith/index.html").unwrap();
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::create_metadata_tag(url);
let metadata_comment: String = html::create_metadata_tag(&url);
assert_eq!(
metadata_comment,
@@ -48,9 +49,9 @@ mod passing {
#[test]
fn data_url() {
let url = "data:text/html,Hello%2C%20World!";
let url: Url = Url::parse("data:text/html,Hello%2C%20World!").unwrap();
let timestamp = Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
let metadata_comment: String = html::create_metadata_tag(url);
let metadata_comment: String = html::create_metadata_tag(&url);
assert_eq!(
metadata_comment,
@@ -63,20 +64,3 @@ mod passing {
);
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::html;
#[test]
fn empty_string() {
assert_eq!(html::create_metadata_tag(""), "");
}
}

View File

@@ -8,10 +8,12 @@
#[cfg(test)]
mod passing {
use reqwest::blocking::Client;
use reqwest::Url;
use std::collections::HashMap;
use crate::html;
use crate::opts::Options;
use monolith::html;
use monolith::opts::Options;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test]
fn small_medium_large() {
@@ -21,15 +23,19 @@ mod passing {
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
);
assert_eq!(
embedded_css,
format!(
"{} 1x, {} 1.5x, {} 2x",
empty_image!(),
empty_image!(),
empty_image!(),
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL,
),
);
}
@@ -42,11 +48,17 @@ mod passing {
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
);
assert_eq!(
embedded_css,
format!("{}, {} 1.5x", empty_image!(), empty_image!()),
format!("{}, {} 1.5x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
);
}
@@ -58,11 +70,17 @@ mod passing {
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
);
assert_eq!(
embedded_css,
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),
format!("{} 1x, {} 2x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
);
}
@@ -74,15 +92,69 @@ mod passing {
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
);
assert_eq!(
embedded_css,
format!(
"{} 1x, {} 2x, {} 3x",
empty_image!(),
empty_image!(),
empty_image!()
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL
),
);
}
#[test]
fn no_whitespace_after_commas() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small,s.png 1x,medium,m.png 2x,large,l.png 3x";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
);
assert_eq!(
embedded_css,
format!(
"{} 1x, {} 2x, {} 3x",
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL
),
);
}
#[test]
fn last_without_descriptor() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small,s.png 1x, medium,m.png 2x, large,l.png";
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
);
assert_eq!(
embedded_css,
format!(
"{} 1x, {} 2x, {}",
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL
),
);
}
@@ -98,10 +170,12 @@ mod passing {
#[cfg(test)]
mod failing {
use reqwest::blocking::Client;
use reqwest::Url;
use std::collections::HashMap;
use crate::html;
use crate::opts::Options;
use monolith::html;
use monolith::opts::Options;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test]
fn trailing_comma() {
@@ -111,11 +185,17 @@ mod failing {
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
let embedded_css = html::embed_srcset(
cache,
&client,
&Url::parse("data:,").unwrap(),
&srcset_value,
&options,
);
assert_eq!(
embedded_css,
format!("{} 1x, {} 2x,", empty_image!(), empty_image!()),
format!("{} 1x, {} 2x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
);
}
}

View File

@@ -7,7 +7,7 @@
#[cfg(test)]
mod passing {
use crate::html;
use monolith::html;
#[test]
fn present() {
@@ -19,11 +19,11 @@ mod passing {
<body>
</body>
</html>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(
html::get_base_url(&dom.document),
Some(str!("https://musicbrainz.org"))
Some("https://musicbrainz.org".to_string())
);
}
@@ -38,11 +38,11 @@ mod passing {
<body>
</body>
</html>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(
html::get_base_url(&dom.document),
Some(str!("https://www.discogs.com/"))
Some("https://www.discogs.com/".to_string())
);
}
}
@@ -56,7 +56,7 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::html;
use monolith::html;
#[test]
fn absent() {
@@ -67,7 +67,7 @@ mod failing {
<body>
</body>
</html>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_base_url(&dom.document), None);
}
@@ -82,7 +82,7 @@ mod failing {
<body>
</body>
</html>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_base_url(&dom.document), None);
}
@@ -97,8 +97,8 @@ mod failing {
<body>
</body>
</html>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_base_url(&dom.document), Some(str!()));
assert_eq!(html::get_base_url(&dom.document), Some("".to_string()));
}
}

72
tests/html/get_charset.rs Normal file
View File

@@ -0,0 +1,72 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use monolith::html;
#[test]
fn meta_content_type() {
let html = "<!doctype html>
<html>
<head>
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string()));
}
#[test]
fn meta_charset() {
let html = "<!doctype html>
<html>
<head>
<meta charset=\"GB2312\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string()));
}
#[test]
fn multiple_conflicting_meta_charset_first() {
let html = "<!doctype html>
<html>
<head>
<meta charset=\"utf-8\" />
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_charset(&dom.document), Some("utf-8".to_string()));
}
#[test]
fn multiple_conflicting_meta_content_type_first() {
let html = "<!doctype html>
<html>
<head>
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
<meta charset=\"utf-8\" />
</head>
<body>
</body>
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string()));
}
}

View File

@@ -7,14 +7,14 @@
#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
use markup5ever_rcdom::{Handle, NodeData};
use crate::html;
use monolith::html;
#[test]
fn div_two_style_attributes() {
let html = "<!doctype html><html><head></head><body><DIV STYLE=\"color: blue;\" style=\"display: none;\"></div></body></html>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
@@ -35,7 +35,7 @@ mod passing {
} else if node_name == "div" {
assert_eq!(
html::get_node_attr(node, "style"),
Some(str!("color: blue;"))
Some("color: blue;".to_string())
);
}

View File

@@ -7,14 +7,14 @@
#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
use markup5ever_rcdom::{Handle, NodeData};
use crate::html;
use monolith::html;
#[test]
fn parent_node_names() {
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {

View File

@@ -7,12 +7,12 @@
#[cfg(test)]
mod passing {
use crate::html;
use monolith::html;
#[test]
fn icon() {
let html = "<link rel=\"icon\" href=\"\" /><div>text</div>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let res: bool = html::has_favicon(&dom.document);
assert!(res);
@@ -21,7 +21,7 @@ mod passing {
#[test]
fn shortcut_icon() {
let html = "<link rel=\"shortcut icon\" href=\"\" /><div>text</div>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let res: bool = html::has_favicon(&dom.document);
assert!(res);
@@ -37,12 +37,12 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::html;
use monolith::html;
#[test]
fn absent() {
let html = "<div>text</div>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let res: bool = html::has_favicon(&dom.document);
assert!(!res);

View File

@@ -7,7 +7,7 @@
#[cfg(test)]
mod passing {
use crate::html;
use monolith::html;
#[test]
fn icon() {
@@ -34,7 +34,7 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::html;
use monolith::html;
#[test]
fn mask_icon() {

View File

@@ -4,10 +4,12 @@ mod compose_csp;
mod create_metadata_tag;
mod embed_srcset;
mod get_base_url;
mod get_charset;
mod get_node_attr;
mod get_node_name;
mod has_favicon;
mod is_icon;
mod parse_link_type;
mod serialize_document;
mod set_node_attr;
mod stringify_document;
mod walk_and_embed_assets;

View File

@@ -7,20 +7,26 @@
#[cfg(test)]
mod passing {
use crate::url;
use monolith::html;
#[test]
fn data_url_text_html() {
assert!(url::is_data_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
fn icon() {
assert!(html::parse_link_type("icon").contains(&html::LinkType::Icon));
}
#[test]
fn data_url_no_media_type() {
assert!(url::is_data_url(
"data:;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
fn shortcut_icon_capitalized() {
assert!(html::parse_link_type("Shortcut Icon").contains(&html::LinkType::Icon));
}
#[test]
fn stylesheet() {
assert!(html::parse_link_type("stylesheet").contains(&html::LinkType::Stylesheet));
}
#[test]
fn preload_stylesheet() {
assert!(html::parse_link_type("preload stylesheet").contains(&html::LinkType::Stylesheet));
}
}
@@ -33,20 +39,20 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::url;
use monolith::html;
#[test]
fn https_url() {
assert!(!url::is_data_url("https://kernel.org"));
fn mask_icon() {
assert!(html::parse_link_type("mask-icon").is_empty());
}
#[test]
fn no_protocol_url() {
assert!(!url::is_data_url("//kernel.org"));
fn fluid_icon() {
assert!(html::parse_link_type("fluid-icon").is_empty());
}
#[test]
fn empty_string() {
assert!(!url::is_data_url(""));
assert!(html::parse_link_type("").is_empty());
}
}

View File

@@ -7,17 +7,17 @@
#[cfg(test)]
mod passing {
use crate::html;
use crate::opts::Options;
use monolith::html;
use monolith::opts::Options;
#[test]
fn div_as_root_element() {
let html = "<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let options = Options::default();
assert_eq!(
html::stringify_document(&dom.document, &options),
String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)),
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
);
}
@@ -28,18 +28,19 @@ mod passing {
<link rel=\"something\" href=\"some.css\" />\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut options = Options::default();
options.isolate = true;
assert_eq!(
html::stringify_document(
&dom.document,
String::from_utf8_lossy(&html::serialize_document(
dom,
"".to_string(),
&options
),
)),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:;\"></meta>\
<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\">\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
@@ -59,12 +60,12 @@ mod passing {
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\"/>\
<div style=\"display: none;\"></div>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut options = Options::default();
options.no_css = true;
assert_eq!(
html::stringify_document(&dom.document, &options),
String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)),
"<!DOCTYPE html>\
<html>\
<head>\
@@ -83,15 +84,16 @@ mod passing {
<title>Frameless document</title>\
<link rel=\"something\"/>\
<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut options = Options::default();
options.no_frames = true;
assert_eq!(
html::stringify_document(
&dom.document,
String::from_utf8_lossy(&html::serialize_document(
dom,
"".to_string(),
&options
),
)),
"<!DOCTYPE html>\
<html>\
<head>\
@@ -115,7 +117,7 @@ mod passing {
<img style=\"width: 100%;\" src=\"some.png\" />\
<iframe src=\"some.html\"></iframe>\
</div>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut options = Options::default();
options.isolate = true;
options.no_css = true;
@@ -125,14 +127,15 @@ mod passing {
options.no_images = true;
assert_eq!(
html::stringify_document(
&dom.document,
String::from_utf8_lossy(&html::serialize_document(
dom,
"".to_string(),
&options
),
)),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\

View File

@@ -7,14 +7,14 @@
#[cfg(test)]
mod passing {
use html5ever::rcdom::{Handle, NodeData};
use markup5ever_rcdom::{Handle, NodeData};
use crate::html;
use monolith::html;
#[test]
fn html_lang_and_body_style() {
let html = "<!doctype html><html lang=\"en\"><head></head><body></body></html>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
@@ -31,23 +31,23 @@ mod passing {
let node_name = name.local.as_ref().to_string();
if node_name == "html" {
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("en")));
assert_eq!(html::get_node_attr(node, "lang"), Some("en".to_string()));
html::set_node_attr(node, "lang", Some(str!("de")));
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("de")));
html::set_node_attr(node, "lang", Some("de".to_string()));
assert_eq!(html::get_node_attr(node, "lang"), Some("de".to_string()));
html::set_node_attr(node, "lang", None);
assert_eq!(html::get_node_attr(node, "lang"), None);
html::set_node_attr(node, "lang", Some(str!("")));
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("")));
html::set_node_attr(node, "lang", Some("".to_string()));
assert_eq!(html::get_node_attr(node, "lang"), Some("".to_string()));
} else if node_name == "body" {
assert_eq!(html::get_node_attr(node, "style"), None);
html::set_node_attr(node, "style", Some(str!("display: none;")));
html::set_node_attr(node, "style", Some("display: none;".to_string()));
assert_eq!(
html::get_node_attr(node, "style"),
Some(str!("display: none;"))
Some("display: none;".to_string())
);
}
@@ -67,7 +67,7 @@ mod passing {
#[test]
fn body_background() {
let html = "<!doctype html><html lang=\"en\"><head></head><body background=\"1\" background=\"2\"></body></html>";
let dom = html::html_to_dom(&html);
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
@@ -84,7 +84,10 @@ mod passing {
let node_name = name.local.as_ref().to_string();
if node_name == "body" {
assert_eq!(html::get_node_attr(node, "background"), Some(str!("1")));
assert_eq!(
html::get_node_attr(node, "background"),
Some("1".to_string())
);
html::set_node_attr(node, "background", None);
assert_eq!(html::get_node_attr(node, "background"), None);

View File

@@ -0,0 +1,594 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use html5ever::serialize::{serialize, SerializeOpts};
use markup5ever_rcdom::SerializableHandle;
use reqwest::blocking::Client;
use std::collections::HashMap;
use url::Url;
use monolith::html;
use monolith::opts::Options;
use monolith::url::EMPTY_IMAGE_DATA_URL;
#[test]
fn basic() {
let cache = &mut HashMap::new();
let html: &str = "<div><P></P></div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p></div></body></html>"
);
}
#[test]
fn ensure_no_recursive_iframe() {
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
);
}
#[test]
fn ensure_no_recursive_frame() {
let html = "<frameset><frame src=\"\"></frameset>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test]
fn no_css() {
let html = "\
<link rel=\"stylesheet\" href=\"main.css\">\
<link rel=\"alternate stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>\
";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<link rel=\"stylesheet\">\
<link rel=\"alternate stylesheet\">\
<style></style>\
</head>\
<body>\
<div></div>\
</body>\
</html>\
"
);
}
#[test]
fn no_images() {
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"<html>\
<head>\
<link rel=\"icon\">\
</head>\
<body>\
<div>\
<img src=\"{empty_image}\">\
</div>\
</body>\
</html>",
empty_image = EMPTY_IMAGE_DATA_URL
)
);
}
#[test]
fn no_body_background_images() {
let html =
"<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body></body></html>"
);
}
#[test]
fn no_frames() {
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_frames = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
</head>\
<frameset>\
<frame src=\"\">\
</frameset>\
</html>\
"
);
}
#[test]
fn no_iframes() {
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_frames = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head></head>\
<body>\
<iframe src=\"\"></iframe>\
</body>\
</html>\
"
);
}
#[test]
fn no_js() {
let html = "\
<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>\
";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head></head>\
<body>\
<div>\
<script></script>\
<script></script>\
</div>\
</body>\
</html>\
"
);
}
#[test]
fn keeps_integrity_for_unfamiliar_links() {
let html = "<title>Has integrity</title>\
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\" />";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<title>Has integrity</title>\
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\">\
</head>\
<body></body>\
</html>\
"
);
}
#[test]
fn discards_integrity_for_known_links_nojs_nocss() {
let html = "\
<title>No integrity</title>\
<link integrity=\"\" rel=\"stylesheet\" href=\"data:;\"/>\
<script integrity=\"\" src=\"some.js\"></script>\
";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<title>No integrity</title>\
<link rel=\"stylesheet\">\
<script></script>\
</head>\
<body></body>\
</html>\
"
);
}
#[test]
fn discards_integrity_for_embedded_assets() {
let html = "\
<title>No integrity</title>\
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\"/>\
<script integrity=\"sha384-456\" src=\"some.js\"></script>\
";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_js = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<title>No integrity</title>\
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\">\
<script></script>\
</head>\
<body>\
</body>\
</html>\
"
);
}
#[test]
fn removes_unwanted_meta_tags() {
let html = "\
<html>\
<head>\
<meta http-equiv=\"Refresh\" content=\"2\"/>\
<meta http-equiv=\"Location\" content=\"https://freebsd.org\"/>\
</head>\
<body>\
</body>\
</html>\
";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_css = true;
options.no_frames = true;
options.no_js = true;
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<meta content=\"2\">\
<meta content=\"https://freebsd.org\">\
</head>\
<body>\
</body>\
</html>"
);
}
#[test]
fn processes_noscript_tags() {
let html = "\
<html>\
<body>\
<noscript>\
<img src=\"image.png\" />\
</noscript>\
</body>\
</html>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.no_images = true;
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"\
<html>\
<head>\
</head>\
<body>\
<noscript>\
<img src=\"{}\">\
</noscript>\
</body>\
</html>",
EMPTY_IMAGE_DATA_URL,
)
);
}
#[test]
fn preserves_script_type_json() {
let html = "<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>";
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
let url: Url = Url::parse("http://localhost").unwrap();
let cache = &mut HashMap::new();
let mut options = Options::default();
options.silent = true;
let client = Client::new();
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
let mut buf: Vec<u8> = Vec::new();
serialize(
&mut buf,
&SerializableHandle::from(dom.document.clone()),
SerializeOpts::default(),
)
.unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"\
<html>\
<head>\
<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>\
</head>\
<body>\
</body>\
</html>"
);
}
}

View File

@@ -7,7 +7,7 @@
#[cfg(test)]
mod passing {
use crate::js;
use monolith::js;
#[test]
fn onblur_camelcase() {
@@ -34,7 +34,7 @@ mod passing {
#[cfg(test)]
mod failing {
use crate::js;
use monolith::js;
#[test]
fn href() {

Some files were not shown because too many files have changed in this diff Show More