178 Commits

Author SHA1 Message Date
Sunshine
3d678d80ee Merge pull request #176 from snshn/img-srcset
IMG srcset
2020-05-17 14:26:30 -04:00
Sunshine
19a87f426e version bump 2020-05-17 14:06:55 -04:00
Sunshine
cbe3f9f554 implement support for embedding images within srcset 2020-05-17 14:06:44 -04:00
Sunshine
b6a44c64cf Merge pull request #174 from snshn/armhf-cd
Improve CD for compiling ARM binary asset
2020-05-12 03:31:37 -04:00
Sunshine
84e2dd789c improve CD for compiling ARM binary asset 2020-05-12 03:29:32 -04:00
Sunshine
ac4945ca97 Merge pull request #173 from snshn/sha2-integrity
Add asset integrity validation
2020-05-12 03:15:02 -04:00
Sunshine
2ca2c7aff8 version bump 2020-05-12 03:10:43 -04:00
Sunshine
a18df74946 refactor code and implement integrity validation 2020-05-12 02:51:37 -04:00
Sunshine
2bc8414cc1 Merge pull request #172 from snshn/update-metadata-comment
improve metadata comments
2020-04-30 22:39:25 -04:00
Sunshine
c4569343a4 improve metadata comments 2020-04-30 20:23:09 -04:00
Sunshine
5f5820c71a Merge pull request #168 from snshn/context-comment
Metadata comment tag
2020-04-30 20:06:40 -04:00
Sunshine
4719a6fecf Merge pull request #170 from snshn/svg-image-href
Embed SVG IMAGE assets
2020-04-30 20:00:59 -04:00
Sunshine
c999359b9f Merge branch 'context-comment' of github.com:Alch-Emi/monolith into context-comment 2020-04-30 19:54:13 -04:00
Sunshine
f22e2b6e68 embed SVG IMAGE assets 2020-04-30 19:51:30 -04:00
Sunshine
31a9550f5b Merge pull request #171 from snshn/improve-ci-cd
Add rustfmt installation step to CI
2020-04-30 19:51:04 -04:00
Sunshine
201f2d61b9 add rustfmt installation step to CI 2020-04-30 19:45:44 -04:00
Sunshine
3ae4dfae8e Update README.md 2020-04-28 09:07:47 -04:00
Sunshine
7b095fe4ff Merge pull request #167 from snshn/version-bump
version bump
2020-04-25 03:50:10 -04:00
Sunshine
890bcb1bb6 version bump 2020-04-25 01:03:49 -04:00
Sunshine
aa97ea9f82 Merge pull request #165 from snshn/no-fonts
Add flag for excluding web fonts
2020-04-22 09:16:30 -04:00
Sunshine
9b40dbbf27 add option to exclude web fonts 2020-04-22 09:11:20 -04:00
Sunshine
289f3e801b Merge pull request #161 from snshn/cache-blob
Store blobs instead of data URLs in cache
2020-04-19 13:33:03 -04:00
Sunshine
edacd09dc8 store blobs instead of data URLs in cache 2020-04-19 13:26:14 -04:00
Sunshine
5682863725 Merge pull request #164 from snshn/raspberry-pi-artifact-update
Update GitHub Action for assembling ARM artifacts
2020-04-18 13:46:44 -04:00
Sunshine
4304d7a638 update GitHub Action for assembling ARM artifacts 2020-04-18 13:44:26 -04:00
Sunshine
f56f88da94 Merge pull request #91 from snshn/unwrap-noscript-if-no-js
Propose ADR-0002 (NOSCRIPT nodes)
2020-04-16 23:24:30 -04:00
Sunshine
87c8b361ea add ADR-0002 (NOSCRIPT nodes) 2020-04-16 23:24:03 -04:00
Sunshine
cd505ddb6c Merge pull request #163 from snshn/proper-css-ident-escaping
Escape all special chars within #id and .class CSS selectors
2020-04-11 18:33:41 -04:00
Sunshine
eeea617fb1 escape all special chars within #id and .class CSS selectors 2020-04-11 17:50:23 -04:00
Sunshine
cc6dbddb49 Merge pull request #162 from snshn/colons-in-css-class-names
Escape colons within CSS idents
2020-04-10 21:20:37 -04:00
Sunshine
9d3df2cdc6 escape colons within CSS idents 2020-04-10 20:59:56 -04:00
Sunshine
ab601c3830 Merge pull request #160 from snshn/more-css-image-url-detection-props
Treat url()'s found in @counter-style rules as images
2020-04-10 07:28:55 -04:00
Sunshine
3738be2b6d treat url()'s found in @counter-style rules as images 2020-04-10 07:22:02 -04:00
Sunshine
53160f01c7 Merge pull request #159 from snshn/implement-data-url-media-type-detection
Improve data URL media type detection
2020-04-10 06:04:49 -04:00
Sunshine
594ad55bd8 improve data URL media type detection 2020-04-10 05:50:33 -04:00
Sunshine
d2615f51dc Merge pull request #158 from snshn/improve-data-url-support
Improve parsing of data URLs
2020-04-10 01:49:34 -04:00
Sunshine
c097733ae7 improve parsing of data URLs 2020-04-09 20:27:07 -04:00
Sunshine
67d4b7dafc Merge pull request #157 from snshn/2-2-3
Upgrade base64 crate & version bump (2.2.2 → 2.2.3)
2020-04-08 19:56:24 -04:00
Sunshine
b1d6bbce0c upgrade base64 crate & version bump (2.2.2 → 2.2.3) 2020-04-08 19:49:46 -04:00
Sunshine
20124f4891 Merge pull request #156 from snshn/raspberry-pi-artifact
Make the pipeline build and upload armhf executable with every new release
2020-04-08 19:40:41 -04:00
Sunshine
0dd540afaf make the pipeline build and upload armhf executable with every new release 2020-04-08 19:29:17 -04:00
Sunshine
df71083359 Merge pull request #155 from snshn/fix-css-unit-sign-bug
Fix css unit sign bug
2020-04-08 18:19:32 -04:00
Sunshine
349c7bb3ea properly parse negative units in CSS 2020-04-08 18:07:39 -04:00
Sunshine
5a30c6b44b Merge branch 'master' of github.com:snshn/monolith 2020-04-08 10:53:29 -04:00
Sunshine
929924accd Merge pull request #153 from snshn/proper-quotation-marks
use proper quotation marks in the README
2020-04-05 16:25:40 -04:00
Sunshine
812b46960c use proper quotation marks in the README 2020-04-05 16:24:18 -04:00
Sunshine
874080dbda Merge pull request #152 from snshn/separate-ci-build-jobs
Separate OS build jobs
2020-04-05 15:34:21 -04:00
Sunshine
93dd9d4ed4 separate build job per OS 2020-04-05 15:32:25 -04:00
Sunshine
3f0ced0143 Merge pull request #151 from snshn/2-2-2
version bump (2.2.1 → 2.2.2)
2020-04-05 14:44:48 -04:00
Sunshine
8112ab6d04 version bump (2.2.1 → 2.2.2) 2020-04-05 14:38:40 -04:00
Sunshine
e5fc05f5cd Merge pull request #150 from snshn/cd-windows-executable
Make the pipeline upload windows build to every new release
2020-04-05 14:35:44 -04:00
Sunshine
1068ff659a make the pipeline upload windows build to every new release 2020-04-05 14:29:06 -04:00
Sunshine
d4d9bbe424 update cd.yml 2020-04-04 22:12:35 -04:00
Sunshine
cf3a8c8ede Merge pull request #149 from snshn/remove-travis-ci-and-appveyor
Remove TravisCI and AppVeyor from the project
2020-04-04 19:38:03 -04:00
Sunshine
920d992459 remove TravisCI and AppVeyor from the project 2020-04-04 19:26:58 -04:00
Sunshine
c61b3ba858 Merge pull request #148 from snshn/github-actions-build
Improve GitHub Actions integration
2020-04-04 19:14:52 -04:00
Sunshine
dc6e564ea2 integrate GitHub Actions CI further 2020-04-04 19:05:49 -04:00
Sunshine
24536b5e18 Merge pull request #147 from Y2Z/github-actions-ci
Implement CI using GitHub Actions
2020-04-04 17:51:28 -04:00
Sunshine
908fd59019 Update ci.yml 2020-04-04 17:08:19 -04:00
Sunshine
a19aa37ea8 Merge pull request #145 from snshn/no-images-svg
Empty SVG nodes when excluding images
2020-04-04 15:55:26 -04:00
Sunshine
c46bd5900b Merge pull request #146 from snshn/image-map-area-href
Resolve hrefs of <area> image-map tags
2020-04-04 15:51:45 -04:00
Sunshine
5f98ed23b3 set autocrlf to false to let windows builds pass 2020-04-04 15:42:53 -04:00
Sunshine
c6b135398a Implement CI using GitHub Actions 2020-04-04 15:30:13 -04:00
Sunshine
791e44796e resolve hrefs of <area> image-map tags 2020-04-04 14:55:45 -04:00
Sunshine
b428dd8471 Merge pull request #144 from snshn/macros-unit-test
Implement unit tests for macros
2020-04-04 13:11:19 -04:00
Sunshine
b88479446c implement unit tests for macros 2020-04-04 08:21:41 -04:00
Sunshine
1d6217ef5a empty SVG nodes if --no-images 2020-04-03 21:56:46 -04:00
Sunshine
746c7f05de Merge pull request #143 from snshn/embed-input-images
Add support for image inputs
2020-04-03 04:12:06 -04:00
Sunshine
29836d979a add support for image inputs 2020-04-03 03:30:52 -04:00
Sunshine
5ba6e33fa8 Merge pull request #142 from snshn/robatipoors-improvements
Revamp is_icon() and get_node_name()
2020-04-03 01:39:45 -04:00
Sunshine
643c4ce7ef implement improvements suggested by @robatipoor 2020-04-03 00:00:08 -04:00
Sunshine
c011f90b76 Merge pull request #141 from snshn/update-help-dialog
Update help dialog
2020-04-02 22:49:59 -04:00
Sunshine
875481b9a2 update help dialog 2020-04-02 03:04:21 -04:00
Sunshine
05275d864c Merge pull request #140 from snshn/cssparser
Switch to token-based CSS parser
2020-04-02 02:28:58 -04:00
Sunshine
4951fea730 implement full CSS parsing 2020-04-02 01:09:32 -04:00
Sunshine
b8315a7bd5 Merge pull request #138 from snshn/improved-media-type-detection
Improve SVG media type detection
2020-03-24 18:39:33 -04:00
Sunshine
be25784297 improve SVG media type detection 2020-03-24 08:50:39 -04:00
Sunshine
b0f1c39175 Merge pull request #137 from snshn/master
Bump version to 2.2.0
2020-03-24 08:23:56 -04:00
Sunshine
f27d5fa23e bump version number (2.1.2 → 2.2.0) 2020-03-22 23:30:31 -04:00
Sunshine
4f2944a600 Merge pull request #136 from snshn/restructure-tests
Restructure tests
2020-03-22 23:28:04 -04:00
Sunshine
479c42e1ce improve test code structure 2020-03-22 22:08:41 -04:00
Sunshine
933379c798 ensure consistent naming across all tests 2020-03-22 19:03:33 -04:00
Sunshine
061386ccc2 Merge pull request #135 from snshn/local-file-support
Add support for working with local assets
2020-03-22 17:18:43 -04:00
Sunshine
59a8be493d add support for working with local assets 2020-03-22 15:48:23 -04:00
Sunshine
a653bbe7d4 Merge pull request #133 from Y2Z/docker-instructions
Move Docker instructions under docs/
2020-03-18 00:42:40 -04:00
Sunshine
c7aab235d9 Merge pull request #134 from Y2Z/adr-asset-minimization
Add ADR describing asset minimization
2020-03-16 00:46:28 -04:00
Sunshine
60ef631315 add ADR describing asset minimization 2020-03-15 23:04:03 -04:00
Sunshine
b800947151 move Docker instructions into docs/ 2020-03-14 12:51:05 -04:00
Sunshine
808ce3e722 Merge pull request #130 from snshn/body-background
Account for legacy BODY background="" attribute
2020-03-05 08:32:06 -05:00
Sunshine
a92bba4ec5 Update README.md 2020-03-05 05:15:13 -05:00
Sunshine
a445098409 Update README.md 2020-03-05 05:11:54 -05:00
Sunshine
224d4fc480 Merge pull request #129 from snshn/dockerfile
add Dockerfile
2020-03-05 05:08:13 -05:00
Sunshine
d5ee8ae6ab account for legacy BODY background="" attribute 2020-03-05 04:56:09 -05:00
Sunshine
c16e80f507 add Dockerfile 2020-03-05 04:14:37 -05:00
Sunshine
1c1f2c7128 Merge pull request #127 from snshn/win-travis
add windows target OS to TravisCI
2020-02-27 18:38:50 -05:00
Sunshine
efba6a048d add windows target OS to TravisCI 2020-02-27 01:25:22 -05:00
Sunshine
1701425003 Merge pull request #125 from snshn/frames
Treat frames the same way as iframes
2020-02-24 21:35:29 -05:00
Sunshine
7654eec7e2 treat frames the same way as iframes 2020-02-24 20:18:13 -05:00
Sunshine
00942e0b1d Merge pull request #119 from snshn/data-url-input
Data URL input
2020-02-23 23:33:25 -05:00
Sunshine
0d1e21e9ad add black box tests 2020-02-23 22:48:14 -05:00
Sunshine
3d2d40e7cd add support for data URL targets 2020-02-23 22:25:37 -05:00
Sunshine
b8b6d8cff6 fix "succeeding" to "passing" in tests 2020-02-23 22:24:33 -05:00
Sunshine
928664dc88 correct is_valid_url to is_http_url 2020-02-23 22:24:33 -05:00
Sunshine
5c8d75539b rename dataurl to data_url 2020-02-23 22:24:32 -05:00
Sunshine
ee2055a2a3 Merge pull request #123 from snshn/adr-arch-dir
Move ADRs under docs/arch
2020-02-21 19:16:40 -05:00
Sunshine
b4c46c59d4 move ADRs to docs/arch 2020-02-21 07:58:23 -05:00
Sunshine
8574b7899b Merge pull request #121 from snshn/improve-help
Update help dialog and README.md
2020-02-20 08:07:01 -05:00
Sunshine
969bfbdd59 Merge pull request #120 from snshn/update-crates
Update crates
2020-02-15 12:41:29 -05:00
Sunshine
63f3a204a6 Merge pull request #122 from snshn/adr-timeout
Introduce ADR 0003-network-request-timeout.md
2020-02-15 12:40:02 -05:00
Sunshine
094be09e90 add ADR 0003-network-request-timeout.md 2020-02-15 09:09:12 -05:00
Sunshine
23ceaed493 update crates 2020-02-15 01:47:08 -05:00
Sunshine
d9602e25eb update help dialog and README.md 2020-02-15 01:33:20 -05:00
Sunshine
0c50aa223b Update README.md 2020-02-13 23:47:30 -05:00
Sunshine
e5425ee9d0 Update README.md 2020-02-12 08:38:08 -05:00
Sunshine
f720fe0176 Merge pull request #114 from snshn/custom-network-timeout-option
Add option for custom network request timeout
2020-02-10 21:13:17 -05:00
Sunshine
727a5a410c add option for custom network request timeout 2020-02-10 20:08:06 -05:00
Sunshine
23af174822 Merge pull request #115 from snshn/remove-javascript-anchors
Nullify JS within As' href attributes when needed
2020-02-05 22:57:48 -05:00
Sunshine
5ef2b7c9dc nullify JS within As' href attributes when needed 2020-02-03 01:47:35 -05:00
Sunshine
1e8348543a Merge pull request #111 from snshn/adr
Introduce ADRs
2020-01-22 23:57:25 -05:00
Sunshine
f9bafe092d Introduce ADRs 2020-01-22 01:03:31 -05:00
Sunshine
f876e9243c Merge pull request #109 from snshn/version-bump
version bump (2.1.1 → 2.1.2)
2020-01-21 08:39:10 -05:00
Sunshine
b6896febf1 version bump (2.1.1 → 2.1.2) 2020-01-21 02:32:29 -05:00
Sunshine
29d2ba5857 Merge pull request #107 from snshn/update-readme
Update README.md
2020-01-21 02:18:10 -05:00
Sunshine
8b1ebc7871 Update README.md 2020-01-21 02:16:36 -05:00
Sunshine
d753c83c76 Merge pull request #108 from rhysd/revert-manual-proxy-support
Revert #106 since reqwest supports system proxies by default
2020-01-21 02:15:29 -05:00
rhysd
47a825f5ed add proxies instruction in README.md 2020-01-21 13:02:45 +09:00
rhysd
0e12cecd85 Revert "Merge pull request #106 from rhysd/proxy-support"
This reverts commit d8def879b2, reversing
changes made to a9d114d04d.
2020-01-21 13:01:22 +09:00
Sunshine
d8def879b2 Merge pull request #106 from rhysd/proxy-support
Support HTTP and HTTPS proxies
2020-01-20 18:36:00 -05:00
Linda_pp
0420854ed6 remove '$' from environment variable names in README.md 2020-01-20 23:11:14 +09:00
rhysd
d47482fcd9 fix crash at setting empty values to HTTP proxies
with this patch `https_proxy=` and `http_proxy=` will work well.
2020-01-20 17:17:24 +09:00
rhysd
b68624f2f3 support HTTP and HTTPS proxies (fix #103) 2020-01-20 17:02:43 +09:00
Sunshine
a9d114d04d Merge pull request #105 from rhysd/refactor-main
Refactoring for main.rs to address several issues
2020-01-20 01:10:29 -05:00
rhysd
4e4ebe9c98 refactor main to address several issues
Addressed issues:

- when specified URL is invalid, it exited successfully with doing
  nothing. There was no way why it does not work for users
- it exited successfully even if invalid User-Agent value is specified
- it created file twice on `--output` option specified. It may cause an
  issue when some file watcher (e.g. FsEvents on macOS) is watching

Improvements:
- handle errors with `Result::expect` consistently it correctly exits
  with non-zero status on error
- define `Output` enum for handling both stdout and file outputs
2020-01-15 16:52:20 +09:00
Sunshine
429217d8f7 Merge pull request #104 from rhysd/complete-dom-event-handlers
Use complete list of DOM event handlers for detecting JS attributes
2020-01-15 01:34:01 -05:00
rhysd
1779f4a374 better comments for JS_DOM_EVENT_ATTRS constant 2020-01-15 14:33:27 +09:00
rhysd
26e89ae6d3 use complete list of DOM event handlers 2020-01-15 13:58:09 +09:00
Sunshine
b333d19d04 Update README.md 2020-01-14 03:42:04 -05:00
Sunshine
c1dc798ded Merge pull request #101 from rhysd/ignore-preload
Improve handling preload links and white spaces in attribute values
2020-01-13 17:51:25 -05:00
rhysd
69d99b69e8 remove . in line comment 2020-01-13 23:47:07 +09:00
Sunshine
aae53d20f0 Merge pull request #102 from popey/update-snap-config
Update snapcraft configuration
2020-01-13 08:39:15 -05:00
Alan Pope
14cf2ce8a6 Update snapcraft configuration
This changes the build slightly. If snapcraft is triggered when there is a new tagged release in the project github release page, and it's newer than the version in the Snap Store beta channel, we build that stable release. If however, the latest stable release in github releases is already the same as the Snap Store beta channel, then we build the tip of master.

This gives a couple of advantages. 

  * One yaml can be used to build tip-of-git snaps, and stable releases alike
  * Closing the beta channel in the Snap Store will mean the next triggered build will re-build whatever the last stable release is. This is useful to force a rebuild of the stable version in case a dependency (not that there are many) has a security issue.

We also now set the version dynamically based on the git tags.
2020-01-13 11:14:08 +00:00
Emi Simpson
05985583f0 Switch timestamps from rfc822 local time to iso8601 UTC 2020-01-10 14:30:35 -05:00
Emi Simpson
651fa716b4 Clean user, pass, and fragment from URL before writing 2020-01-10 14:18:15 -05:00
rhysd
67b79e92f9 simplify &x.into_iter() to x.iter() 2020-01-10 14:45:02 +09:00
rhysd
b51f41fe34 trim attribute values 2020-01-10 14:41:05 +09:00
rhysd
6f158dc6db compare value of 'rel' properties in case-insensitive 2020-01-10 13:52:31 +09:00
rhysd
8d7052b39c ignore preload and prefetch sources
since all resources are embedded as data URL.
2020-01-09 18:18:21 +09:00
rhysd
660511b8a0 define link type of <link> element as enum and prefer match statement
since match statement checks exhaustiveness
2020-01-09 16:55:42 +09:00
Emi Simpson
9be3982dc6 Added --no-context flag to disable adding context comment 2020-01-08 19:00:53 -05:00
Emi Simpson
27c9fb4cd3 Added comment indicating the context under which the page was downloaded 2020-01-08 18:51:18 -05:00
Sunshine
929512f4f5 Merge pull request #97 from rhysd/reqwest-0.10.0
Upgrade reqwest to v0.10.0 for better binary size and build time
2020-01-08 01:43:55 -05:00
Sunshine
a46d89cefc Merge pull request #98 from rhysd/fix-ci
Fix nighly and beta CI
2020-01-07 18:14:30 -05:00
rhysd
f93646e17a ignore beta channel again on AppVeyor
since rustc command crashes on combination of
channel=beta & target=i686-pc-windows-gnu
2020-01-07 17:31:36 +09:00
rhysd
9d14b6dfea rename appveyor.yml to .appveyor.yml
align to .travis.yml
2020-01-07 15:28:29 +09:00
rhysd
9783b96524 check beta channel on CI not to break this crate with next Rust version 2020-01-07 15:28:29 +09:00
rhysd
106efe58ce fix nighly and beta on CI are failing
we always use stable rustfmt so checking with nighly/beta rustfmt is not
necessary.
2020-01-07 15:28:29 +09:00
rhysd
6e99ad13e7 upgrade reqwest to v0.10.0
This will improve build time and binary size as follows:

* Before

- **Compile targets**: 220
- **Build time**: `cargo build --release  1264.95s user 39.72s system 335% cpu 6:29.14 total`
- **Binary size**: 6578568 bytes

* After

- **Compile targets**: 170
- **Build time**: `cargo build --release  1130.64s user 32.15s system 359% cpu 5:23.69 total`
- **Binary size**: 6107088 bytes

* Differences

- **Compile targets**: 1.29x smaller
- **Build time**: 1.23x faster
- **Binary size**: 1.07x smaller
2020-01-07 14:22:32 +09:00
Sunshine
413dd66886 Merge pull request #96 from rhysd/refactorings
Refactorings
2020-01-05 18:46:31 -05:00
rhysd
dc7ec6e7a8 remove more redundant type annotations 2020-01-04 16:33:11 +09:00
rhysd
ed879231af fix test code was broken by refactoring 2020-01-04 08:07:19 +09:00
rhysd
ddf4b8ac13 prefer &str to String for reducing allocations 2020-01-04 08:05:02 +09:00
rhysd
84c13f0605 prefer unwrap_or_default to unwrap_or 2020-01-04 07:58:29 +09:00
rhysd
ce03e0e487 reduce allocation on checking DOM attributes and do not hard-code number of elements of array constant
`to_lower` allocates new string but the allocation is not necessary
here.
2020-01-04 07:52:47 +09:00
rhysd
63e19998d0 reduce clones and fix some code styles and redundant code 2020-01-04 07:49:26 +09:00
Sunshine
e3321bbb07 Merge pull request #95 from rhysd/rust2018
Migrate to Rust2018 edition
2020-01-03 02:00:47 -05:00
rhysd
0a38cd0eae add rhysd to authors list 2020-01-03 15:43:25 +09:00
rhysd
75fb6961ed migrate to Rust 2018 2020-01-03 00:33:49 +09:00
Sunshine
5ba8931502 Merge pull request #92 from snshn/output-file-option
Add option for saving output to file
2019-12-26 18:13:15 -05:00
Sunshine
13d2ea1607 Merge pull request #94 from snshn/no-integrity
Get rid of integrity attributes
2019-12-26 10:11:52 -05:00
Sunshine
88ffde0c3b wipe integrity attributes 2019-12-26 09:44:01 -05:00
Sunshine
bfb97bd062 add option for saving output to file 2019-12-26 00:45:20 -05:00
Sunshine
295931041c Merge pull request #80 from Alch-Emi/lazyload
Add support for lazy loaded images
2019-12-24 17:11:21 -05:00
Sunshine
2e623dd9f8 Merge pull request #84 from snshn/ignore-hash-in-cache-url
use clean URLs as hashmap keys
2019-12-24 17:08:57 -05:00
Sunshine
169b9657e5 ignore failures for both beta and nightly in the pipeline 2019-12-24 16:07:15 -05:00
Emi Simpson
dab4ae6965 Merged Y2Z/master with Alch-Emi/lazyload 2019-12-24 10:07:56 -05:00
Sunshine
c7fc121c7c use clean URLs as hashmap keys 2019-12-18 11:49:38 -05:00
Emi Simpson
292221ea28 Lazyloaded images are now loaded at compilation, with placeholders omitted 2019-12-09 19:40:29 -05:00
Emi Simpson
feb37f5812 Added support for lazy loaded images
Note: The way this patch works is by resolving any data-src tags on images in
the same way as normal source tags are resolved.  It is assumed that most
lazy-load libraries will use this tag, and that if this tag is set, then it is a
URL that is in use.
2019-12-06 19:27:41 -05:00
68 changed files with 5648 additions and 1935 deletions

1
.adr-dir Normal file
View File

@@ -0,0 +1 @@
docs/arch

22
.github/workflows/build_gnu_linux.yml vendored Normal file
View File

@@ -0,0 +1,22 @@
name: GNU/Linux
on:
push:
branches: [ master ]
jobs:
build:
strategy:
matrix:
os:
- ubuntu-latest
rust:
- stable
runs-on: ${{ matrix.os }}
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- name: Build
run: cargo build --all --locked --verbose

22
.github/workflows/build_macos.yml vendored Normal file
View File

@@ -0,0 +1,22 @@
name: macOS
on:
push:
branches: [ master ]
jobs:
build:
strategy:
matrix:
os:
- macos-latest
rust:
- stable
runs-on: ${{ matrix.os }}
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- name: Build
run: cargo build --all --locked --verbose

22
.github/workflows/build_windows.yml vendored Normal file
View File

@@ -0,0 +1,22 @@
name: Windows
on:
push:
branches: [ master ]
jobs:
build:
strategy:
matrix:
os:
- windows-latest
rust:
- stable
runs-on: ${{ matrix.os }}
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- name: Build
run: cargo build --all --locked --verbose

54
.github/workflows/cd.yml vendored Normal file
View File

@@ -0,0 +1,54 @@
# CD GitHub Actions workflow for Monolith
name: CD
on:
release:
types:
- created
jobs:
windows:
runs-on: windows-2019
steps:
- run: git config --global core.autocrlf false
- name: Checkout the repository
uses: actions/checkout@v2
- name: Build and install the executable
run: cargo build --release
- uses: Shopify/upload-to-release@1.0.0
with:
name: monolith.exe
path: target\release\monolith.exe
repo-token: ${{ secrets.GITHUB_TOKEN }}
gnu_linux_armhf:
runs-on: ubuntu-18.04
steps:
- name: Checkout the repository
uses: actions/checkout@v2
- name: Prepare cross-platform environment
run: |
sudo mkdir -p /cross-build-arm
sudo touch /etc/apt/sources.list.d/armhf.list
echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ bionic main" | sudo tee -a /etc/apt/sources.list.d/armhf.list
sudo apt-get update
sudo apt-get install -y gcc-arm-linux-gnueabihf libc6-armhf-cross libc6-dev-armhf-cross
sudo apt-get download libssl1.1:armhf libssl-dev:armhf
sudo dpkg -x libssl1.1*.deb /cross-build-arm
sudo dpkg -x libssl-dev*.deb /cross-build-arm
rustup target add arm-unknown-linux-gnueabihf
echo "::set-env name=C_INCLUDE_PATH::/cross-build-arm/usr/include"
echo "::set-env name=OPENSSL_INCLUDE_DIR::/cross-build-arm/usr/include/arm-linux-gnueabihf"
echo "::set-env name=OPENSSL_LIB_DIR::/cross-build-arm/usr/lib/arm-linux-gnueabihf"
echo "::set-env name=PKG_CONFIG_ALLOW_CROSS::1"
echo "::set-env name=RUSTFLAGS::-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build-arm/usr/lib/arm-linux-gnueabihf -L/cross-build-arm/lib/arm-linux-gnueabihf"
- name: Build the executable
run: cargo build --release --target=arm-unknown-linux-gnueabihf
- name: Attach artifact to the release
uses: Shopify/upload-to-release@1.0.0
with:
name: monolith-gnu-linux-armhf
path: target/arm-unknown-linux-gnueabihf/release/monolith
repo-token: ${{ secrets.GITHUB_TOKEN }}

32
.github/workflows/ci.yml vendored Normal file
View File

@@ -0,0 +1,32 @@
name: CI
on:
pull_request:
branches: [ master ]
jobs:
build_and_test:
strategy:
matrix:
os:
- ubuntu-latest
- macos-latest
- windows-latest
rust:
- stable
- beta
- nightly
runs-on: ${{ matrix.os }}
steps:
- run: git config --global core.autocrlf false
- uses: actions/checkout@v2
- name: Build
run: cargo build --all --locked --verbose
- name: Run tests
run: cargo test --all --locked --verbose
- name: Check code formatting
run: |
rustup component add rustfmt
cargo fmt --all -- --check

3
.gitignore vendored
View File

@@ -4,6 +4,3 @@
# These are backup files generated by rustfmt
**/*.rs.bk
# Exclude accidental HTML files
*.html

View File

@@ -1,26 +0,0 @@
language: rust
cache: cargo
sudo: false
os:
- linux
- osx
rust:
- stable
- beta
- nightly
before_script:
- rustup component add rustfmt
script:
- cargo build --all --locked --verbose
- cargo test --all --locked --verbose
- cargo fmt --all -- --check
jobs:
allow_failures:
- rust: nightly
fast_finish: true

1100
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,19 +1,30 @@
[package]
name = "monolith"
version = "2.1.0"
version = "2.2.6"
edition = "2018"
authors = [
"Sunshine <sunshine@uberspace.net>",
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
"Emmanuel Delaborde <th3rac25@gmail.com>",
"Emi Simpson <emi@alchemi.dev>",
"rhysd <lin90162@yahoo.co.jp>",
]
description = "CLI tool for saving web pages as a single HTML file"
[dependencies]
base64 = "0.10.1"
base64 = "0.12.0"
clap = "2.33.0"
cssparser = "0.27.2"
html5ever = "0.24.1"
lazy_static = "1.4.0"
regex = "1.3.1"
reqwest = "0.9.20"
url = "2.1.0"
sha2 = "0.8.1" # Used in calculating checksums during integrity checks
time = "0.1.42" # Used to render comments indicating the time the page was saved
url = "2.1.1"
[dependencies.reqwest]
version = "0.10.*"
default-features = false
features = ["default-tls", "blocking", "gzip"]
[dev-dependencies]
assert_cmd = "0.12.0"
tempfile = "3.1.0"

18
Dockerfile Normal file
View File

@@ -0,0 +1,18 @@
FROM rust
WORKDIR /usr/local/src/
RUN curl -s https://api.github.com/repos/y2z/monolith/releases/latest \
| grep "tarball_url.*\"," \
| cut -d '"' -f 4 \
| wget -qi - -O monolith.tar.gz
RUN tar xfz monolith.tar.gz \
&& mv Y2Z-monolith-* monolith \
&& rm monolith.tar.gz
WORKDIR /usr/local/src/monolith
RUN ls -a
RUN make install
WORKDIR /tmp
CMD ["/usr/local/cargo/bin/monolith"]

View File

@@ -1,16 +1,25 @@
.PHONY: all build install run test lint
# Makefile for monolith
all: test build
all: build
.PHONY: all
build:
@cargo build --locked
.PHONY: build
install:
@cargo install --force --locked --path .
test:
test: build
@cargo test --locked
@cargo fmt --all -- --check
.PHONY: test_code_formatting
lint:
@cargo fmt --all --
.PHONY: lint
install:
@cargo install --force --locked --path .
.PHONY: install
uninstall:
@cargo uninstall
.PHONY: uninstall

View File

@@ -1,5 +1,6 @@
[![Travis CI Build Status](https://travis-ci.org/Y2Z/monolith.svg?branch=master)](https://travis-ci.org/Y2Z/monolith)
[![AppVeyor Build status](https://ci.appveyor.com/api/projects/status/ae7soyjih8jg2bv7/branch/master?svg=true)](https://ci.appveyor.com/project/snshn/monolith/branch/master)
[![Monolith Build Status for GNU/Linux](https://github.com/Y2Z/monolith/workflows/GNU%2FLinux/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AGNU%2FLinux)
[![Monolith Build Status for macOS](https://github.com/Y2Z/monolith/workflows/macOS/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AmacOS)
[![Monolith Build Status for Windows](https://github.com/Y2Z/monolith/workflows/Windows/badge.svg)](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows)
```
___ ___________ __________ ___________________ ___
@@ -11,42 +12,76 @@
|___| |__________| \____________________| |___| |___| |___|
```
A data hoarder's dream come true: bundle any web page into a single HTML file.
You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
A data hoarders dream come true: bundle any web page into a single HTML file. You can finally replace that gazillion of open tabs with a gazillion of .html files stored somewhere on your precious little drive.
Unlike the conventional "Save page as", `monolith` not only saves the target document, it embeds CSS, image, and JavaScript assets **all at once**, producing a single HTML5 document that is a joy to store and share.
Unlike the conventional Save page as, `monolith` not only saves the target document, it embeds CSS, image, and JavaScript assets **all at once**, producing a single HTML5 document that is a joy to store and share.
If compared to saving websites with `wget -mpk`, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available.
---------------------------------------------------
## Installation
### From source
$ git clone https://github.com/Y2Z/monolith.git
$ cd monolith
$ cargo install --path .
### On macOS (via Homebrew)
#### Via Homebrew (on macOS and GNU/Linux)
$ brew install monolith
#### Using Snapcraft (on GNU/Linux)
$ snap install monolith
#### From source
Dependency: `libssl-dev`
$ git clone https://github.com/Y2Z/monolith.git
$ cd monolith
$ make install
#### With Docker
The guide can be found [here](docs/containers.md)
---------------------------------------------------
## Usage
$ monolith https://lyrics.github.io/db/p/portishead/dummy/roads/ > portishead-roads-lyrics.html
$ monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roads-lyrics.html
---------------------------------------------------
## Options
- `-c`: Ignore styles
- `-f`: Exclude iframes
- `-f`: Exclude frames and iframes
- `-F`: Omit web fonts
- `-i`: Remove images
- `-I`: Isolate document
- `-I`: Isolate the document
- `-j`: Exclude JavaScript
- `-k`: Accept invalid X.509 (TLS) certificates
- `-o`: Write output to file
- `-s`: Silent mode
- `-u`: Specify custom User-Agent
- `-t`: Set custom network request timeout
- `-u`: Provide own User-Agent
---------------------------------------------------
## HTTPS and HTTP proxies
Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables.
---------------------------------------------------
## Contributing
Please open an issue if something is wrong, that helps make this project better.
---------------------------------------------------
## Related projects
- `Monolith Chrome Extension`: https://github.com/rhysd/monolith-of-web
- `Pagesaver`: https://github.com/distributed-mind/pagesaver
- `SingleFile`: https://github.com/gildas-lormeau/SingleFile
- `Personal WayBack Machine`: https://github.com/popey/pwbm
---------------------------------------------------
## License
The Unlicense
---------------------------------------------------
<!-- Microtext -->
<sub>Keep in mind that `monolith` is not aware of your browser's session</sub>
<sub>Keep in mind that `monolith` is not aware of your browsers session</sub>

View File

@@ -1,130 +0,0 @@
# Appveyor configuration template for Rust using rustup for Rust installation
# https://github.com/starkat99/appveyor-rust
## Operating System (VM environment) ##
# Rust needs at least Visual Studio 2013 Appveyor OS for MSVC targets.
os: Visual Studio 2015
## Build Matrix ##
# This configuration will setup a build for each channel & target combination (12 windows
# combinations in all).
#
# There are 3 channels: stable, beta, and nightly.
#
# Alternatively, the full version may be specified for the channel to build using that specific
# version (e.g. channel: 1.5.0)
#
# The values for target are the set of windows Rust build targets. Each value is of the form
#
# ARCH-pc-windows-TOOLCHAIN
#
# Where ARCH is the target architecture, either x86_64 or i686, and TOOLCHAIN is the linker
# toolchain to use, either msvc or gnu. See https://www.rust-lang.org/downloads.html#win-foot for
# a description of the toolchain differences.
# See https://github.com/rust-lang-nursery/rustup.rs/#toolchain-specification for description of
# toolchains and host triples.
#
# Comment out channel/target combos you do not wish to build in CI.
#
# You may use the `cargoflags` and `RUSTFLAGS` variables to set additional flags for cargo commands
# and rustc, respectively. For instance, you can uncomment the cargoflags lines in the nightly
# channels to enable unstable features when building for nightly. Or you could add additional
# matrix entries to test different combinations of features.
environment:
matrix:
### MSVC Toolchains ###
# Stable 64-bit MSVC
- channel: stable
target: x86_64-pc-windows-msvc
# Stable 32-bit MSVC
- channel: stable
target: i686-pc-windows-msvc
# Beta 64-bit MSVC
- channel: beta
target: x86_64-pc-windows-msvc
# Beta 32-bit MSVC
- channel: beta
target: i686-pc-windows-msvc
# Nightly 64-bit MSVC
- channel: nightly
target: x86_64-pc-windows-msvc
#cargoflags: --features "unstable"
# Nightly 32-bit MSVC
- channel: nightly
target: i686-pc-windows-msvc
#cargoflags: --features "unstable"
### GNU Toolchains ###
# Stable 64-bit GNU
- channel: stable
target: x86_64-pc-windows-gnu
MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
# Stable 32-bit GNU
- channel: stable
target: i686-pc-windows-gnu
MINGW_PATH: 'C:\MinGW\bin'
# Beta 64-bit GNU
- channel: beta
target: x86_64-pc-windows-gnu
MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
# Beta 32-bit GNU
- channel: beta
target: i686-pc-windows-gnu
MINGW_PATH: 'C:\MinGW\bin'
# Nightly 64-bit GNU
- channel: nightly
target: x86_64-pc-windows-gnu
MINGW_PATH: 'C:\mingw-w64\x86_64-6.3.0-posix-seh-rt_v5-rev1\mingw64\bin'
#cargoflags: --features "unstable"
# Nightly 32-bit GNU
- channel: nightly
target: i686-pc-windows-gnu
MINGW_PATH: 'C:\MinGW\bin'
#cargoflags: --features "unstable"
### Allowed failures ###
# See Appveyor documentation for specific details. In short, place any channel or targets you wish
# to allow build failures on (usually nightly at least is a wise choice). This will prevent a build
# or test failure in the matching channels/targets from failing the entire build.
matrix:
allow_failures:
- channel: nightly
# If you only care about stable channel build failures, uncomment the following line:
#- channel: beta
## Install Script ##
# This is the most important part of the Appveyor configuration. This installs the version of Rust
# specified by the 'channel' and 'target' environment variables from the build matrix. This uses
# rustup to install Rust.
#
# For simple configurations, instead of using the build matrix, you can simply set the
# default-toolchain and default-host manually here.
install:
- appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
- rustup-init -yv --default-toolchain %channel% --default-host %target%
- set PATH=%PATH%;%USERPROFILE%\.cargo\bin
- if defined MINGW_PATH set PATH=%PATH%;%MINGW_PATH%
- rustc -vV
- cargo -vV
- rustup component add rustfmt
## Build Script ##
# 'cargo test' takes care of building for us, so disable Appveyor's build stage. This prevents
# the "directory does not contain a project or solution file" error.
build: false
# Uses 'cargo test' to run tests and build. Alternatively, the project may call compiled programs
#directly or perform other testing commands. Rust will automatically be placed in the PATH
# environment variable.
test_script:
- cargo test --verbose %cargoflags%
- cargo fmt --all -- --check

View File

@@ -0,0 +1,19 @@
# 1. Record architecture decisions
Date: 2019-12-25
## Status
Accepted
## Context
We need to record the architectural decisions made on this project.
## Decision
We will use Architecture Decision Records, as [described by Michael Nygard](http://thinkrelevance.com/blog/2011/11/15/documenting-architecture-decisions).
## Consequences
See Michael Nygard's article, linked above. For a lightweight ADR toolset, see Nat Pryce's [adr-tools](https://github.com/npryce/adr-tools).

View File

@@ -0,0 +1,19 @@
# 2. NOSCRIPT nodes
Date: 2020-04-16
## Status
Accepted
## Context
HTML pages sometimes contain NOSCRIPT nodes, which reveal their contents only in case when JavaScript is not available. Most of the time they contain hidden messages that inform about certain JavaScript-dependent features not being operational, however sometimes can also feature media assets or even iframes.
## Decision
When the document is being saved with or without JavaScript, each NOSCRIPT node should be preserved while its children need to be processed exactly the same way as the rest of the document. This approach will ensure that even hidden remote assets are embedded — since those hidden elements may have to be displayed later in a browser that has JavaScript turned off. An option should be available to "unwrap" all NOSCRIPT nodes in order to make their contents always visible in the document, complimenting the "disable JS" function of the program.
## Consequences
Saved documents will have contents of all NOSCRIPT nodes processed as if they are part of the document's DOM, therefore properly display images encapsulated within NOSCRIPT nodes when being viewed in browsers that have JavaScript turned off (or have no JavaScript support in the first place). The new option to "unwrap" NOSCRIPT elements will help the user ensure that the resulting document always represents what the original web page looked like in a browser that had JavaScript turned off.

View File

@@ -0,0 +1,25 @@
# 2. Network request timeout
Date: 2020-02-15
## Status
Accepted
## Context
A slow network connection and overloaded server may negatively impact network response time.
## Decision
Make the program simulate behavior of popular web browsers and CLI tools, where
the default network response timeout is most often set to 120 seconds.
Instead of featuring retries for timed out network requests, the program
should have an option to adjust the timeout length, along with making it
indefinite when given "0" as its value.
## Consequences
The user is able to retrieve resources that have long response time, as well as obtain
full control over how soon, and if at all, network requests should time out.

View File

@@ -0,0 +1,25 @@
# 4. Asset Minimization
Date: 2020-03-14
## Status
Accepted
## Context
It may look like a good idea to make monolith compress retrieved assets while
saving the page for the purpose of reducing the resulting document's file size.
## Decision
Given that the main purpose of this program is to save pages in a convenient to store and share manner — it's mostly an archiving tool,
aside from being able to tell monolith to exclude certain types of asests (e.g. images, CSS, JavaScript),
it would be outside of scope of this program to implement code for compressing assets. Minimizing files before embedding them
does not reduce the amount of data that needs to be transferred either. A separate tool can be used later to compress and minimize pages
saved by monolith, if needed.
## Consequences
Monolith will not support modification of original document assets for the purpose of reducing their size, sticking to performing only minimal
amount of modifications to the original web page — whatever is needed to provide security or exclude unwanted asset types.

15
docs/containers.md Normal file
View File

@@ -0,0 +1,15 @@
1. Run `docker build -t y2z/monolith .` to create a Docker image
2. Create a file named `monolith` which contains:
```sh
#!/bin/sh
docker run --rm \
y2z/monolith \
monolith \
"$@"
```
3. Make the file executable (`chmod +x monolith`) and include it into your `$PATH`
4. Now you should be able to run a containerized build of monolith like this:
`monolith -I https://github.com > document.html`

View File

@@ -1,6 +1,7 @@
name: monolith
base: core18
version: git
# Version data defined inside the monolith part below
adopt-info: monolith
summary: Monolith - Save HTML pages with ease
description: |
A data hoarder's dream come true: bundle any web page into a single
@@ -17,6 +18,14 @@ description: |
confinement: strict
# Building on armhf fails, so we specify all supported non-armhf architectures
architectures:
- build-on: amd64
- build-on: i386
- build-on: arm64
- build-on: ppc64el
- build-on: s390x
parts:
monolith:
plugin: rust
@@ -24,6 +33,21 @@ parts:
build-packages:
- libssl-dev
- pkg-config
override-pull: |
snapcraftctl pull
# Determine the current tag
last_committed_tag="$(git describe --tags --abbrev=0)"
last_committed_tag_ver="$(echo ${last_committed_tag} | sed 's/v//')"
# Determine the most recent version in the beta channel in the Snap Store
last_released_tag="$(snap info $SNAPCRAFT_PROJECT_NAME | awk '$1 == "beta:" { print $2 }')"
# If the latest tag from the upstream project has not been released to
# beta, build that tag instead of master.
if [ "${last_committed_tag_ver}" != "${last_released_tag}" ]; then
git fetch
git checkout "${last_committed_tag}"
fi
# set version number of the snap based on what we did above
snapcraftctl set-version $(git describe --tags --abbrev=0)
apps:
monolith:

View File

@@ -2,60 +2,77 @@ use clap::{App, Arg};
#[derive(Default)]
pub struct AppArgs {
pub url_target: String,
pub target: String,
pub no_css: bool,
pub no_fonts: bool,
pub no_frames: bool,
pub no_images: bool,
pub no_js: bool,
pub insecure: bool,
pub isolate: bool,
pub output: String,
pub silent: bool,
pub timeout: u64,
pub user_agent: String,
pub no_metadata: bool,
}
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
const DEFAULT_USER_AGENT: &str =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0";
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
impl AppArgs {
pub fn get() -> AppArgs {
let app = App::new("monolith")
let app = App::new(env!("CARGO_PKG_NAME"))
.version(crate_version!())
.author(crate_authors!("\n"))
.about(crate_description!())
.arg(
Arg::with_name("url")
Arg::with_name("target")
.required(true)
.takes_value(true)
.index(1)
.help("URL to download"),
.help("URL or file path"),
)
// .args_from_usage("-a, --include-audio 'Embed audio sources'")
.args_from_usage("-c, --no-css 'Ignore styles'")
.args_from_usage("-f, --no-frames 'Exclude iframes'")
.args_from_usage("-i, --no-images 'Remove images'")
.args_from_usage("-I, --isolate 'Cut off from the Internet'")
.args_from_usage("-j, --no-js 'Exclude JavaScript'")
.args_from_usage("-k, --insecure 'Accept invalid X.509 (TLS) certificates'")
.args_from_usage("-s, --silent 'Suppress verbosity'")
.args_from_usage("-u, --user-agent=[Iceweasel] 'Custom User-Agent string'")
// .args_from_usage("-v, --include-video 'Embed video sources'")
// .args_from_usage("-a, --include-audio 'Removes audio sources'")
.args_from_usage("-c, --no-css 'Removes CSS'")
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
.args_from_usage("-F, --no-fonts 'Removes fonts'")
.args_from_usage("-i, --no-images 'Removes images'")
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
.args_from_usage("-j, --no-js 'Removes JavaScript'")
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
.args_from_usage("-M, --no-metadata 'Excludes metadata information from the document'")
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
.args_from_usage("-s, --silent 'Suppresses verbosity'")
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
// .args_from_usage("-v, --include-video 'Removes video sources'")
.get_matches();
let mut app_args = AppArgs::default();
// Process the command
app_args.url_target = app
.value_of("url")
.expect("please set target url")
app_args.target = app
.value_of("target")
.expect("please set target")
.to_string();
app_args.no_css = app.is_present("no-css");
app_args.no_fonts = app.is_present("no-fonts");
app_args.no_frames = app.is_present("no-frames");
app_args.no_images = app.is_present("no-images");
app_args.no_js = app.is_present("no-js");
app_args.insecure = app.is_present("insecure");
app_args.no_metadata = app.is_present("no-metadata");
app_args.isolate = app.is_present("isolate");
app_args.silent = app.is_present("silent");
app_args.timeout = app
.value_of("timeout")
.unwrap_or(&DEFAULT_NETWORK_TIMEOUT.to_string())
.parse::<u64>()
.unwrap();
app_args.output = app.value_of("output").unwrap_or("").to_string();
app_args.user_agent = app
.value_of("user-agent")
.unwrap_or_else(|| DEFAULT_USER_AGENT)
.unwrap_or(DEFAULT_USER_AGENT)
.to_string();
app_args
}

415
src/css.rs Normal file
View File

@@ -0,0 +1,415 @@
use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token};
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::utils::{data_to_data_url, get_url_fragment, is_http_url, resolve_url, retrieve_asset};
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
// Universal
"background",
"background-image",
"border-image",
"border-image-source",
"content",
"cursor",
"list-style",
"list-style-image",
"mask",
"mask-image",
// Specific to @counter-style
"additive-symbols",
"negative",
"pad",
"prefix",
"suffix",
"symbols",
];
const CSS_SPECIAL_CHARS: &str = "~!@$%^&*()+=,./'\";:?><[]{}|`#";
pub fn is_image_url_prop(prop_name: &str) -> bool {
CSS_PROPS_WITH_IMAGE_URLS
.iter()
.find(|p| prop_name.eq_ignore_ascii_case(p))
.is_some()
}
pub fn enquote(input: String, double: bool) -> String {
if double {
format!("\"{}\"", input.replace("\"", "\\\""))
} else {
format!("'{}'", input.replace("'", "\\'"))
}
}
pub fn escape(value: &str) -> String {
let mut res = str!(&value);
res = res.replace("\\", "\\\\");
for c in CSS_SPECIAL_CHARS.chars() {
res = res.replace(c, format!("\\{}", c).as_str());
}
res
}
pub fn process_css<'a>(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &str,
parser: &mut Parser,
rule_name: &str,
prop_name: &str,
func_name: &str,
opt_no_fonts: bool,
opt_no_images: bool,
opt_silent: bool,
) -> Result<String, ParseError<'a, String>> {
let mut result: String = str!();
let mut curr_rule: String = str!(rule_name.clone());
let mut curr_prop: String = str!(prop_name.clone());
let mut token: &Token;
let mut token_offset: SourcePosition;
loop {
token_offset = parser.position();
token = match parser.next_including_whitespace_and_comments() {
Ok(token) => token,
Err(_) => {
break;
}
};
match *token {
Token::Comment(_) => {
let token_slice = parser.slice_from(token_offset);
result.push_str(str!(token_slice).as_str());
}
Token::Semicolon => result.push_str(";"),
Token::Colon => result.push_str(":"),
Token::Comma => result.push_str(","),
Token::ParenthesisBlock | Token::SquareBracketBlock | Token::CurlyBracketBlock => {
if opt_no_fonts && curr_rule == "font-face" {
continue;
}
let closure: &str;
if token == &Token::ParenthesisBlock {
result.push_str("(");
closure = ")";
} else if token == &Token::SquareBracketBlock {
result.push_str("[");
closure = "]";
} else {
result.push_str("{");
closure = "}";
}
let block_css: String = parser
.parse_nested_block(|parser| {
process_css(
cache,
client,
parent_url,
parser,
rule_name,
curr_prop.as_str(),
func_name,
opt_no_fonts,
opt_no_images,
opt_silent,
)
})
.unwrap();
result.push_str(block_css.as_str());
result.push_str(closure);
}
Token::CloseParenthesis => result.push_str(")"),
Token::CloseSquareBracket => result.push_str("]"),
Token::CloseCurlyBracket => result.push_str("}"),
Token::IncludeMatch => result.push_str("~="),
Token::DashMatch => result.push_str("|="),
Token::PrefixMatch => result.push_str("^="),
Token::SuffixMatch => result.push_str("$="),
Token::SubstringMatch => result.push_str("*="),
Token::CDO => result.push_str("<!--"),
Token::CDC => result.push_str("-->"),
Token::WhiteSpace(ref value) => {
result.push_str(value);
}
// div...
Token::Ident(ref value) => {
curr_rule = str!();
curr_prop = str!(value);
result.push_str(&escape(value));
}
// @import, @font-face, @charset, @media...
Token::AtKeyword(ref value) => {
curr_rule = str!(value);
if opt_no_fonts && curr_rule == "font-face" {
continue;
}
result.push_str("@");
result.push_str(value);
}
Token::Hash(ref value) => {
result.push_str("#");
result.push_str(value);
}
Token::QuotedString(ref value) => {
if curr_rule == "import" {
// Reset current at-rule value
curr_rule = str!();
// Skip empty import values
if value.len() < 1 {
result.push_str("''");
continue;
}
let import_full_url = resolve_url(&parent_url, value).unwrap_or_default();
let import_url_fragment = get_url_fragment(import_full_url.clone());
match retrieve_asset(cache, client, &parent_url, &import_full_url, opt_silent) {
Ok((import_contents, import_final_url, _import_media_type)) => {
result.push_str(
enquote(
data_to_data_url(
"text/css",
embed_css(
cache,
client,
&import_final_url,
&String::from_utf8_lossy(&import_contents),
opt_no_fonts,
opt_no_images,
opt_silent,
)
.as_bytes(),
&import_final_url,
&import_url_fragment,
),
false,
)
.as_str(),
);
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(import_full_url.clone()) {
result.push_str(enquote(import_full_url, false).as_str());
}
}
}
} else {
if func_name == "url" {
// Skip empty url()'s
if value.len() < 1 {
continue;
}
if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
result.push_str(enquote(str!(empty_image!()), false).as_str());
} else {
let resolved_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(resolved_url.clone());
match retrieve_asset(
cache,
client,
&parent_url,
&resolved_url,
opt_silent,
) {
Ok((data, final_url, media_type)) => {
let data_url = data_to_data_url(
&media_type,
&data,
&final_url,
&url_fragment,
);
result.push_str(enquote(data_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(resolved_url.clone()) {
result.push_str(enquote(resolved_url, false).as_str());
}
}
}
}
} else {
result.push_str(enquote(str!(value), false).as_str());
}
}
}
Token::Number {
ref has_sign,
ref value,
..
} => {
if *has_sign && *value >= 0. {
result.push_str("+");
}
result.push_str(&value.to_string())
}
Token::Percentage {
ref has_sign,
ref unit_value,
..
} => {
if *has_sign && *unit_value >= 0. {
result.push_str("+");
}
result.push_str(str!(unit_value * 100.).as_str());
result.push_str("%");
}
Token::Dimension {
ref has_sign,
ref value,
ref unit,
..
} => {
if *has_sign && *value >= 0. {
result.push_str("+");
}
result.push_str(str!(value).as_str());
result.push_str(str!(unit).as_str());
}
// #selector, #id...
Token::IDHash(ref value) => {
curr_rule = str!();
result.push_str("#");
result.push_str(&escape(value));
}
Token::UnquotedUrl(ref value) => {
let is_import: bool = curr_rule == "import";
if is_import {
// Reset current at-rule value
curr_rule = str!();
}
// Skip empty url()'s
if value.len() < 1 {
result.push_str("url()");
continue;
} else if value.starts_with("#") {
result.push_str("url(");
result.push_str(value);
result.push_str(")");
continue;
}
result.push_str("url(");
if is_import {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) {
Ok((css, final_url, _media_type)) => {
let data_url = data_to_data_url(
"text/css",
embed_css(
cache,
client,
&final_url,
&String::from_utf8_lossy(&css),
opt_no_fonts,
opt_no_images,
opt_silent,
)
.as_bytes(),
&final_url,
&url_fragment,
);
result.push_str(enquote(data_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) {
result.push_str(enquote(full_url, false).as_str());
}
}
}
} else {
if opt_no_images && is_image_url_prop(curr_prop.as_str()) {
result.push_str(enquote(str!(empty_image!()), false).as_str());
} else {
let full_url = resolve_url(&parent_url, value).unwrap_or_default();
let url_fragment = get_url_fragment(full_url.clone());
match retrieve_asset(cache, client, &parent_url, &full_url, opt_silent) {
Ok((data, final_url, media_type)) => {
let data_url =
data_to_data_url(&media_type, &data, &final_url, &url_fragment);
result.push_str(enquote(data_url, false).as_str());
}
Err(_) => {
// Keep remote reference if unable to retrieve the asset
if is_http_url(full_url.clone()) {
result.push_str(enquote(full_url, false).as_str());
}
}
}
}
}
result.push_str(")");
}
Token::Delim(ref value) => result.push_str(&value.to_string()),
Token::Function(ref name) => {
let function_name: &str = &name.clone();
result.push_str(function_name);
result.push_str("(");
let block_css: String = parser
.parse_nested_block(|parser| {
process_css(
cache,
client,
parent_url,
parser,
curr_rule.as_str(),
curr_prop.as_str(),
function_name,
opt_no_fonts,
opt_no_images,
opt_silent,
)
})
.unwrap();
result.push_str(block_css.as_str());
result.push_str(")");
}
Token::BadUrl(_) | Token::BadString(_) => {}
}
}
Ok(result)
}
pub fn embed_css(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &str,
css: &str,
opt_no_fonts: bool,
opt_no_images: bool,
opt_silent: bool,
) -> String {
let mut input = ParserInput::new(&css);
let mut parser = Parser::new(&mut input);
process_css(
cache,
client,
parent_url,
&mut parser,
"",
"",
"",
opt_no_fonts,
opt_no_images,
opt_silent,
)
.unwrap()
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,63 +0,0 @@
use reqwest::header::CONTENT_TYPE;
use reqwest::Client;
use std::collections::HashMap;
use utils::{data_to_dataurl, is_data_url};
pub fn retrieve_asset(
cache: &mut HashMap<String, String>,
client: &Client,
url: &str,
as_dataurl: bool,
mime: &str,
opt_silent: bool,
) -> Result<(String, String), reqwest::Error> {
if is_data_url(&url).unwrap() {
Ok((url.to_string(), url.to_string()))
} else {
if cache.contains_key(&url.to_string()) {
// url is in cache
if !opt_silent {
eprintln!("{} (from cache)", &url);
}
let data = cache.get(&url.to_string()).unwrap();
Ok((data.to_string(), url.to_string()))
} else {
// url not in cache, we request it
let mut response = client.get(url).send()?;
if !opt_silent {
if url == response.url().as_str() {
eprintln!("{}", &url);
} else {
eprintln!("{} -> {}", &url, &response.url().as_str());
}
}
if as_dataurl {
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
// Attempt to obtain MIME type by reading the Content-Type header
let mimetype = if mime == "" {
response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
.unwrap_or(&mime)
} else {
mime
};
let dataurl = data_to_dataurl(&mimetype, &data);
// insert in cache
cache.insert(response.url().to_string(), dataurl.to_string());
Ok((dataurl, response.url().to_string()))
} else {
let content = response.text().unwrap();
// insert in cache
cache.insert(response.url().to_string(), content.clone());
Ok((content, response.url().to_string()))
}
}
}
}

111
src/js.rs
View File

@@ -1,32 +1,103 @@
const JS_DOM_EVENT_ATTRS: [&str; 21] = [
// Input
"onfocus",
const JS_DOM_EVENT_ATTRS: &[&str] = &[
// From WHATWG HTML spec 8.1.5.2 "Event handlers on elements, Document objects, and Window objects":
// https://html.spec.whatwg.org/#event-handlers-on-elements,-document-objects,-and-window-objects
// https://html.spec.whatwg.org/#attributes-3 (table "List of event handler content attributes")
// Global event handlers
"onabort",
"onauxclick",
"onblur",
"onselect",
"oncancel",
"oncanplay",
"oncanplaythrough",
"onchange",
"onsubmit",
"onreset",
"onclick",
"onclose",
"oncontextmenu",
"oncuechange",
"ondblclick",
"ondrag",
"ondragend",
"ondragenter",
"ondragexit",
"ondragleave",
"ondragover",
"ondragstart",
"ondrop",
"ondurationchange",
"onemptied",
"onended",
"onerror",
"onfocus",
"onformdata",
"oninput",
"oninvalid",
"onkeydown",
"onkeypress",
"onkeyup",
// Mouse
"onmouseover",
"onmouseout",
"onmousedown",
"onmouseup",
"onmousemove",
// Click
"onclick",
"ondblclick",
// Load
"onload",
"onunload",
"onabort",
"onerror",
"onloadeddata",
"onloadedmetadata",
"onloadstart",
"onmousedown",
"onmouseenter",
"onmouseleave",
"onmousemove",
"onmouseout",
"onmouseover",
"onmouseup",
"onwheel",
"onpause",
"onplay",
"onplaying",
"onprogress",
"onratechange",
"onreset",
"onresize",
"onscroll",
"onsecuritypolicyviolation",
"onseeked",
"onseeking",
"onselect",
"onslotchange",
"onstalled",
"onsubmit",
"onsuspend",
"ontimeupdate",
"ontoggle",
"onvolumechange",
"onwaiting",
"onwebkitanimationend",
"onwebkitanimationiteration",
"onwebkitanimationstart",
"onwebkittransitionend",
// Event handlers for <body/> and <frameset/> elements
"onafterprint",
"onbeforeprint",
"onbeforeunload",
"onhashchange",
"onlanguagechange",
"onmessage",
"onmessageerror",
"onoffline",
"ononline",
"onpagehide",
"onpageshow",
"onpopstate",
"onrejectionhandled",
"onstorage",
"onunhandledrejection",
"onunload",
// Event handlers for <html/> element
"oncut",
"oncopy",
"onpaste",
];
// Returns true if DOM attribute name matches a native JavaScript event handler
pub fn attr_is_event_handler(attr_name: &str) -> bool {
JS_DOM_EVENT_ATTRS.contains(&attr_name.to_lowercase().as_str())
JS_DOM_EVENT_ATTRS
.iter()
.find(|a| attr_name.eq_ignore_ascii_case(a))
.is_some()
}

View File

@@ -1,15 +1,8 @@
extern crate html5ever;
#[macro_use]
extern crate lazy_static;
extern crate regex;
extern crate reqwest;
extern crate url;
#[macro_use]
mod macros;
pub mod css;
pub mod html;
pub mod http;
pub mod js;
pub mod utils;

View File

@@ -7,3 +7,11 @@ macro_rules! str {
ToString::to_string(&$val)
};
}
#[macro_export]
macro_rules! empty_image {
() => {
"data:image/png;base64,\
iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII="
};
}

View File

@@ -1,70 +1,188 @@
#[macro_use]
extern crate clap;
extern crate monolith;
extern crate reqwest;
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
use monolith::utils::{data_url_to_data, is_data_url, is_file_url, is_http_url, retrieve_asset};
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use reqwest::Url;
use std::collections::HashMap;
use std::env;
use std::fs;
use std::io::{self, Error, Write};
use std::path::Path;
use std::process;
use std::time::Duration;
mod args;
mod macros;
use args::AppArgs;
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
use monolith::http::retrieve_asset;
use monolith::utils::is_valid_url;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use std::collections::HashMap;
use std::time::Duration;
#[macro_use]
extern crate clap;
use crate::args::AppArgs;
enum Output {
Stdout(io::Stdout),
File(fs::File),
}
impl Output {
fn new(file_path: &str) -> Result<Output, Error> {
if file_path.is_empty() {
Ok(Output::Stdout(io::stdout()))
} else {
Ok(Output::File(fs::File::create(file_path)?))
}
}
fn writeln_str(&mut self, s: &str) -> Result<(), Error> {
match self {
Output::Stdout(stdout) => {
writeln!(stdout, "{}", s)?;
stdout.flush()
}
Output::File(f) => {
writeln!(f, "{}", s)?;
f.flush()
}
}
}
}
fn main() {
let app_args = AppArgs::get();
let cache = &mut HashMap::new();
if is_valid_url(app_args.url_target.as_str()) {
// Initialize client
let mut header_map = HeaderMap::new();
match HeaderValue::from_str(&app_args.user_agent) {
Ok(header) => header_map.insert(USER_AGENT, header),
Err(err) => {
eprintln!("Invalid user agent! {}", err);
return;
}
};
let client = reqwest::Client::builder()
.timeout(Duration::from_secs(10))
.danger_accept_invalid_certs(app_args.insecure)
.default_headers(header_map)
.build()
.expect("Failed to initialize HTTP client");
let original_target: &str = &app_args.target;
let target_url: &str;
let base_url;
let dom;
let (data, final_url) = retrieve_asset(
cache,
&client,
app_args.url_target.as_str(),
false,
"",
app_args.silent,
)
.unwrap();
let dom = html_to_dom(&data);
// Pre-process the input
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let path = Path::new(original_target);
let mut target: String = str!(original_target.clone()).replace("\\", "/");
let path_is_relative: bool = path.is_relative();
walk_and_embed_assets(
cache,
&client,
&final_url,
&dom.document,
app_args.no_css,
app_args.no_js,
app_args.no_images,
app_args.silent,
app_args.no_frames,
);
let html: String = stringify_document(
&dom.document,
app_args.no_css,
app_args.no_frames,
app_args.no_js,
app_args.no_images,
app_args.isolate,
);
println!("{}", html);
if target.clone().len() == 0 {
eprintln!("No target specified");
process::exit(1);
} else if is_http_url(target.clone()) || is_data_url(target.clone()) {
target_url = target.as_str();
} else if is_file_url(target.clone()) {
target_url = target.as_str();
} else if path.exists() {
if !path.is_file() {
eprintln!("Local target is not a file: {}", original_target);
process::exit(1);
}
target.insert_str(0, if cfg!(windows) { "file:///" } else { "file://" });
if path_is_relative {
target.insert_str(if cfg!(windows) { 8 } else { 7 }, &cwd_normalized);
target.insert_str(
if cfg!(windows) { 8 } else { 7 } + &cwd_normalized.len(),
"/",
);
}
target_url = target.as_str();
} else {
target.insert_str(0, "http://");
target_url = target.as_str();
}
let mut output = Output::new(&app_args.output).expect("Could not prepare output");
// Initialize client
let mut cache = HashMap::new();
let mut header_map = HeaderMap::new();
header_map.insert(
USER_AGENT,
HeaderValue::from_str(&app_args.user_agent).expect("Invalid User-Agent header specified"),
);
let timeout: u64 = if app_args.timeout > 0 {
app_args.timeout
} else {
std::u64::MAX / 4
};
let client = Client::builder()
.timeout(Duration::from_secs(timeout))
.danger_accept_invalid_certs(app_args.insecure)
.default_headers(header_map)
.build()
.expect("Failed to initialize HTTP client");
// Retrieve root document
if is_file_url(target_url) || is_http_url(target_url) {
match retrieve_asset(&mut cache, &client, target_url, target_url, app_args.silent) {
Ok((data, final_url, _media_type)) => {
base_url = final_url;
dom = html_to_dom(&String::from_utf8_lossy(&data));
}
Err(_) => {
eprintln!("Could not retrieve target document");
process::exit(1);
}
}
} else if is_data_url(target_url) {
let (media_type, data): (String, Vec<u8>) = data_url_to_data(target_url);
if !media_type.eq_ignore_ascii_case("text/html") {
eprintln!("Unsupported data URL media type");
process::exit(1);
}
base_url = str!(target_url);
dom = html_to_dom(&String::from_utf8_lossy(&data));
} else {
process::exit(1);
}
let time_saved = time::now_utc();
walk_and_embed_assets(
&mut cache,
&client,
&base_url,
&dom.document,
app_args.no_css,
app_args.no_fonts,
app_args.no_frames,
app_args.no_js,
app_args.no_images,
app_args.silent,
);
let mut html: String = stringify_document(
&dom.document,
app_args.no_css,
app_args.no_frames,
app_args.no_js,
app_args.no_images,
app_args.isolate,
);
if !app_args.no_metadata {
// Safe to unwrap (we just put this through an HTTP request)
let mut clean_url = Url::parse(&base_url).unwrap();
clean_url.set_fragment(None);
// Don't include credentials
clean_url.set_username("").unwrap();
clean_url.set_password(None).unwrap();
let metadata_comment = if is_http_url(&base_url) {
format!(
"<!-- Saved from {} at {} using {} v{} -->\n",
&clean_url,
time_saved.rfc3339(),
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
} else {
format!(
"<!-- Saved from local source at {} using {} v{} -->\n",
time_saved.rfc3339(),
env!("CARGO_PKG_NAME"),
env!("CARGO_PKG_VERSION"),
)
};
html.insert_str(0, &metadata_comment);
}
output
.writeln_str(&html)
.expect("Could not write HTML output");
}

529
src/tests/cli.rs Normal file
View File

@@ -0,0 +1,529 @@
use assert_cmd::prelude::*;
use std::env;
use std::io::Write;
use std::process::Command;
use tempfile::NamedTempFile;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_print_version() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("-V").output().unwrap();
// STDOUT should contain program name and version
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_bad_input_empty_target() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("").output().unwrap();
// STDOUT should be empty
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"No target specified\n"
);
// The exit code should be 1
out.assert().code(1);
Ok(())
}
#[test]
fn passing_bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
// STDOUT should contain HTML
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
// STDERR should contain error description
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
"Unsupported data URL media type\n"
);
// The exit code should be 1
out.assert().code(1);
Ok(())
}
#[test]
fn passing_isolate_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-I")
.arg("data:text/html,Hello%2C%20World!")
.output()
.unwrap();
// STDOUT should contain isolated HTML
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
</head><body>Hello, World!</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_remove_css_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-c")
.arg("data:text/html,<style>body{background-color:pink}</style>Hello")
.output()
.unwrap();
// STDOUT should contain HTML with no CSS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
<style></style>\
</head><body>Hello</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_remove_frames_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-f")
.arg("data:text/html,<iframe src=\"https://google.com\"></iframe>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no iframes
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_remove_images_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-i")
.arg("data:text/html,<img src=\"https://google.com\"/>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no images
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"img-src data:;\"></meta>\
</head>\
<body>\
<img src=\"{empty_image}\">\
Hi\
</body>\
</html>\n",
empty_image = empty_image!()
)
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_remove_js_from_data_url() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-j")
.arg("data:text/html,<script>alert(2)</script>Hi")
.output()
.unwrap();
// STDOUT should contain HTML with no JS
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
<script></script></head>\
<body>Hi</body>\
</html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_local_file_target_input() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let out = cmd
.arg("-M")
.arg(if cfg!(windows) {
"src\\tests\\data\\basic\\local-file.html"
} else {
"src/tests/data/basic/local-file.html"
})
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"\
<!DOCTYPE html><html lang=\"en\"><head>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\" href=\"data:text/css;base64,Ym9keSB7CiAgICBiYWNrZ3JvdW5kLWNvbG9yOiAjMDAwOwogICAgY29sb3I6ICNmZmY7Cn0K\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script src=\"data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==\"></script>\n\n\n\n\
</body></html>\n\
"
);
// STDERR should contain list of retrieved file URLs
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{cwd}/src/tests/data/basic/local-file.html\n\
{file}{cwd}/src/tests/data/basic/local-style.css\n\
{file}{cwd}/src/tests/data/basic/local-script.js\n\
",
file = file_url_protocol,
cwd = cwd_normalized
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_local_file_target_input_absolute_target_path() -> Result<(), Box<dyn std::error::Error>>
{
let cwd = env::current_dir().unwrap();
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("-jciI")
.arg(if cfg!(windows) {
format!(
"{cwd}\\src\\tests\\data\\basic\\local-file.html",
cwd = cwd.to_str().unwrap()
)
} else {
format!(
"{cwd}/src/tests/data/basic/local-file.html",
cwd = cwd.to_str().unwrap()
)
})
.output()
.unwrap();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"{empty_image}\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = empty_image!()
)
);
// STDERR should contain only the target file
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_local_file_url_target_input() -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let cwd_normalized: String =
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
let out = cmd
.arg("-M")
.arg("-cji")
.arg(if cfg!(windows) {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html",
file = file_url_protocol,
cwd = cwd_normalized,
)
})
.output()
.unwrap();
// STDOUT should contain HTML from the local file
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
format!(
"\
<!DOCTYPE html><html lang=\"en\"><head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
<title>Local HTML file</title>\n \
<link rel=\"stylesheet\" type=\"text/css\">\n \
<link rel=\"stylesheet\" type=\"text/css\">\n</head>\n\n<body>\n \
<img alt=\"\" src=\"{empty_image}\">\n \
<a href=\"file://local-file.html/\">Tricky href</a>\n \
<a href=\"https://github.com/Y2Z/monolith\">Remote URL</a>\n \
<script></script>\n\n\n\n\
</body></html>\n\
",
empty_image = empty_image!()
)
);
// STDERR should contain list of retrieved file URLs
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
if cfg!(windows) {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
} else {
format!(
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
file = file_url_protocol,
cwd = cwd_normalized,
)
}
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_security_disallow_local_assets_within_data_url_targets(
) -> Result<(), Box<dyn std::error::Error>> {
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let out = cmd
.arg("-M")
.arg("data:text/html,%3Cscript%20src=\"src/tests/data/basic/local-script.js\"%3E%3C/script%3E")
.output()
.unwrap();
// STDOUT should contain HTML with no JS in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><script></script></head><body></body></html>\n"
);
// STDERR should be empty
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_embed_file_url_local_asset_within_style_attribute(
) -> Result<(), Box<dyn std::error::Error>> {
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut file_svg = NamedTempFile::new()?;
writeln!(file_svg, "<svg version=\"1.1\" baseProfile=\"full\" width=\"300\" height=\"200\" xmlns=\"http://www.w3.org/2000/svg\">\
<rect width=\"100%\" height=\"100%\" fill=\"red\" />\
<circle cx=\"150\" cy=\"100\" r=\"80\" fill=\"green\" />\
<text x=\"150\" y=\"125\" font-size=\"60\" text-anchor=\"middle\" fill=\"white\">SVG</text>\
</svg>\n")?;
let mut file_html = NamedTempFile::new()?;
writeln!(
file_html,
"<div style='background-image: url(\"{file}{path}\")'></div>\n",
file = file_url_prefix,
path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
// STDOUT should contain HTML with date URL for background-image in it
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head></head><body><div style=\"background-image: url('data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cmVjdCB3aWR0aD0iMTAwJSIgaGVpZ2h0PSIxMDAlIiBmaWxsPSJyZWQiIC8+PGNpcmNsZSBjeD0iMTUwIiBjeT0iMTAwIiByPSI4MCIgZmlsbD0iZ3JlZW4iIC8+PHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+PC9zdmc+Cgo=')\"></div>\n\n</body></html>\n"
);
// STDERR should list temporary files that got retrieved
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n\
{file}{svg_path}\n\
",
file = file_url_prefix,
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
svg_path = str!(file_svg.path().to_str().unwrap()).replace("\\", "/"),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}
#[test]
fn passing_css_import_string() -> Result<(), Box<dyn std::error::Error>> {
let file_url_prefix: &str = if cfg!(windows) { "file:///" } else { "file://" };
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?;
let mut file_css = NamedTempFile::new()?;
writeln!(file_css, "body{{background-color:#000;color:#fff}}")?;
let mut file_html = NamedTempFile::new()?;
writeln!(
file_html,
"\
<style>\n\
@charset 'UTF-8';\n\
\n\
@import '{file}{css_path}';\n\
\n\
@import url({file}{css_path});\n\
\n\
@import url('{file}{css_path}')\n\
</style>\n\
",
file = file_url_prefix,
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
)?;
let out = cmd.arg("-M").arg(file_html.path()).output().unwrap();
// STDOUT should contain embedded CSS url()'s
assert_eq!(
std::str::from_utf8(&out.stdout).unwrap(),
"<html><head><style>\n@charset 'UTF-8';\n\n@import 'data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K';\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K');\n\n@import url('data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K')\n</style>\n\n</head><body></body></html>\n"
);
// STDERR should list temporary files that got retrieved
assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(),
format!(
"\
{file}{html_path}\n\
{file}{css_path}\n\
{file}{css_path}\n\
{file}{css_path}\n\
",
file = file_url_prefix,
html_path = str!(file_html.path().to_str().unwrap()).replace("\\", "/"),
css_path = str!(file_css.path().to_str().unwrap()).replace("\\", "/"),
)
);
// The exit code should be 0
out.assert().code(0);
Ok(())
}

317
src/tests/css/embed_css.rs Normal file
View File

@@ -0,0 +1,317 @@
use reqwest::blocking::Client;
use std::collections::HashMap;
use crate::css;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_empty_input() {
let cache = &mut HashMap::new();
let client = Client::new();
assert_eq!(
css::embed_css(cache, &client, "", "", false, false, false,),
""
);
}
#[test]
fn passing_style_exclude_unquoted_images() {
let cache = &mut HashMap::new();
let client = Client::new();
const STYLE: &str = "/* border: none;*/\
background-image: url(https://somewhere.com/bg.png); \
list-style: url(/assets/images/bullet.svg);\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&STYLE,
false,
true,
true,
),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
list-style: url('{empty_image}');\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)",
empty_image = empty_image!()
)
);
}
#[test]
fn passing_style_exclude_single_quoted_images() {
let cache = &mut HashMap::new();
let client = Client::new();
const STYLE: &str = "/* border: none;*/\
background-image: url('https://somewhere.com/bg.png'); \
list-style: url('/assets/images/bullet.svg');\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)";
assert_eq!(
css::embed_css(cache, &client, "", &STYLE, false, true, true,),
format!(
"/* border: none;*/\
background-image: url('{empty_image}'); \
list-style: url('{empty_image}');\
width:99.998%; \
margin-top: -20px; \
line-height: -1; \
height: calc(100vh - 10pt)",
empty_image = empty_image!()
)
);
}
#[test]
fn passing_style_block() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
#id.class-name:not(:nth-child(3n+0)) {\n \
// border: none;\n \
background-image: url('data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=');\n\
}\n\
\n\
html > body {}";
assert_eq!(
css::embed_css(cache, &client, "file:///", &CSS, false, false, true,),
CSS
);
}
#[test]
fn passing_attribute_selectors() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
[data-value] {
/* Attribute exists */
}
[data-value='foo'] {
/* Attribute has this exact value */
}
[data-value*='foo'] {
/* Attribute value contains this value somewhere in it */
}
[data-value~='foo'] {
/* Attribute has this value in a space-separated list somewhere */
}
[data-value^='foo'] {
/* Attribute value starts with this */
}
[data-value|='foo'] {
/* Attribute value starts with this in a dash-separated list */
}
[data-value$='foo'] {
/* Attribute value ends with this */
}
";
assert_eq!(
css::embed_css(cache, &client, "", &CSS, false, false, false,),
CSS
);
}
#[test]
fn passing_import_string() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
@charset 'UTF-8';\n\
\n\
@import 'data:text/css,html{background-color:%23000}';\n\
\n\
@import url('data:text/css,html{color:%23fff}')\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
false,
false,
true,
),
"\
@charset 'UTF-8';\n\
\n\
@import 'data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9';\n\
\n\
@import url('data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==')\n\
"
);
}
#[test]
fn passing_hash_urls() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
body {\n \
behavior: url(#default#something);\n\
}\n\
\n\
.scissorHalf {\n \
offset-path: url(#somePath);\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
false,
false,
true,
),
CSS
);
}
#[test]
fn passing_transform_percentages_and_degrees() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
div {\n \
transform: translate(-50%, -50%) rotate(-45deg);\n\
transform: translate(50%, 50%) rotate(45deg);\n\
transform: translate(+50%, +50%) rotate(+45deg);\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
false,
false,
true,
),
CSS
);
}
#[test]
fn passing_unusual_indents() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
.is\\:good:hover {\n \
color: green\n\
}\n\
\n\
#\\~\\!\\@\\$\\%\\^\\&\\*\\(\\)\\+\\=\\,\\.\\/\\\\\\'\\\"\\;\\:\\?\\>\\<\\[\\]\\{\\}\\|\\`\\# {\n \
color: black\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
false,
false,
true,
),
CSS
);
}
#[test]
fn passing_exclude_fonts() {
let cache = &mut HashMap::new();
let client = Client::new();
const CSS: &str = "\
@font-face {\n \
font-family: 'My Font';\n \
src: url(my_font.woff);\n\
}\n\
\n\
#identifier {\n \
font-family: 'My Font' Arial\n\
}\n\
\n\
@font-face {\n \
font-family: 'My Font';\n \
src: url(my_font.woff);\n\
}\n\
\n\
div {\n \
font-family: 'My Font' Verdana\n\
}\n\
";
const CSS_OUT: &str = " \
\n\
\n\
#identifier {\n \
font-family: 'My Font' Arial\n\
}\n\
\n \
\n\
\n\
div {\n \
font-family: 'My Font' Verdana\n\
}\n\
";
assert_eq!(
css::embed_css(
cache,
&client,
"https://doesntmatter.local/",
&CSS,
true,
false,
true,
),
CSS_OUT
);
}

50
src/tests/css/enquote.rs Normal file
View File

@@ -0,0 +1,50 @@
use crate::css;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_empty_input_single_quotes() {
assert_eq!(css::enquote(str!(""), false), "''");
}
#[test]
fn passing_empty_input_double_quotes() {
assert_eq!(css::enquote(str!(""), true), "\"\"");
}
#[test]
fn passing_apostrophes_single_quotes() {
assert_eq!(
css::enquote(str!("It's a lovely day, don't you think?"), false),
"'It\\'s a lovely day, don\\'t you think?'"
);
}
#[test]
fn passing_apostrophes_double_quotes() {
assert_eq!(
css::enquote(str!("It's a lovely day, don't you think?"), true),
"\"It's a lovely day, don't you think?\""
);
}
#[test]
fn passing_feet_and_inches_single_quotes() {
assert_eq!(
css::enquote(str!("5'2\", 6'5\""), false),
"'5\\'2\", 6\\'5\"'"
);
}
#[test]
fn passing_feet_and_inches_double_quotes() {
assert_eq!(
css::enquote(str!("5'2\", 6'5\""), true),
"\"5'2\\\", 6'5\\\"\""
);
}

View File

@@ -0,0 +1,88 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::css;
#[test]
fn backrgound() {
assert!(css::is_image_url_prop("background"));
}
#[test]
fn backrgound_image() {
assert!(css::is_image_url_prop("background-image"));
}
#[test]
fn backrgound_image_uppercase() {
assert!(css::is_image_url_prop("BACKGROUND-IMAGE"));
}
#[test]
fn border_image() {
assert!(css::is_image_url_prop("border-image"));
}
#[test]
fn content() {
assert!(css::is_image_url_prop("content"));
}
#[test]
fn cursor() {
assert!(css::is_image_url_prop("cursor"));
}
#[test]
fn list_style() {
assert!(css::is_image_url_prop("list-style"));
}
#[test]
fn list_style_image() {
assert!(css::is_image_url_prop("list-style-image"));
}
#[test]
fn mask_image() {
assert!(css::is_image_url_prop("mask-image"));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::css;
#[test]
fn empty() {
assert!(!css::is_image_url_prop(""));
}
#[test]
fn width() {
assert!(!css::is_image_url_prop("width"));
}
#[test]
fn color() {
assert!(!css::is_image_url_prop("color"));
}
#[test]
fn z_index() {
assert!(!css::is_image_url_prop("z-index"));
}
}

3
src/tests/css/mod.rs Normal file
View File

@@ -0,0 +1,3 @@
mod embed_css;
mod enquote;
mod is_image_url_prop;

View File

@@ -0,0 +1,19 @@
<!doctype html>
<html lang="en">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Local HTML file</title>
<link href="local-style.css" rel="stylesheet" type="text/css" />
<link href="local-style-does-not-exist.css" rel="stylesheet" type="text/css" />
</head>
<body>
<img src="monolith.png" alt="" />
<a href="//local-file.html">Tricky href</a>
<a href="https://github.com/Y2Z/monolith">Remote URL</a>
<script src="local-script.js"></script>
</body>
</html>

View File

@@ -0,0 +1,2 @@
document.body.style.backgroundColor = "green";
document.body.style.color = "red";

View File

@@ -0,0 +1,4 @@
body {
background-color: #000;
color: #fff;
}

View File

@@ -1,479 +0,0 @@
use crate::html::{
get_node_name, get_parent_node, html_to_dom, is_icon, stringify_document, walk_and_embed_assets,
};
use html5ever::rcdom::{Handle, NodeData};
use html5ever::serialize::{serialize, SerializeOpts};
use std::collections::HashMap;
#[test]
fn test_is_icon() {
assert_eq!(is_icon("icon"), true);
assert_eq!(is_icon("Shortcut Icon"), true);
assert_eq!(is_icon("ICON"), true);
assert_eq!(is_icon("mask-icon"), true);
assert_eq!(is_icon("fluid-icon"), true);
assert_eq!(is_icon("stylesheet"), false);
assert_eq!(is_icon(""), false);
}
#[test]
fn test_get_parent_node_name() {
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
let dom = html_to_dom(&html);
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
match &node.data {
NodeData::Document => {
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
let parent_node_name = get_node_name(&get_parent_node(node));
if node_name == "head" || node_name == "body" {
assert_eq!(parent_node_name, "html");
} else if node_name == "div" {
assert_eq!(parent_node_name, "body");
} else if node_name == "p" {
assert_eq!(parent_node_name, "div");
}
println!("{}", node_name);
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
_ => (),
};
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 7);
}
#[test]
fn test_walk_and_embed_assets() {
let cache = &mut HashMap::new();
let html = "<div><P></P></div>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = reqwest::Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p></div></body></html>"
);
}
#[test]
fn test_walk_and_embed_assets_ensure_no_recursive_iframe() {
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = reqwest::Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
);
}
#[test]
fn test_walk_and_embed_assets_no_css() {
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = true;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = reqwest::Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<link rel=\"stylesheet\" href=\"\">\
<style></style>\
</head>\
<body>\
<div></div>\
</body>\
</html>"
);
}
#[test]
fn test_walk_and_embed_assets_no_images() {
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = true;
let opt_silent = true;
let client = reqwest::Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<link rel=\"icon\" href=\"\">\
</head>\
<body>\
<div>\
<img src=\"data:image/png;base64,\
iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0\
lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=\">\
</div>\
</body>\
</html>"
);
}
#[test]
fn test_walk_and_embed_assets_no_frames() {
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = reqwest::Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
);
}
#[test]
fn test_walk_and_embed_assets_no_js() {
let html = "<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = true;
let opt_no_images: bool = false;
let opt_silent = true;
let client = reqwest::Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><script src=\"\"></script>\
<script></script></div></body></html>"
);
}
#[test]
fn test_stringify_document() {
let html = "<div><script src=\"some.js\"></script></div>";
let dom = html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
);
}
#[test]
fn test_stringify_document_isolate() {
let html = "<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\" />\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<div><script src=\"some.js\"></script></div>";
let dom = html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = true;
assert_eq!(
stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\">\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
</head>\
<body>\
<div>\
<script src=\"some.js\"></script>\
</div>\
</body>\
</html>"
);
}
#[test]
fn test_stringify_document_no_css() {
let html = "<!doctype html>\
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\"/>\
<div style=\"display: none;\"></div>";
let dom = html_to_dom(&html);
let opt_no_css: bool = true;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\">\
</head>\
<body><div style=\"display: none;\"></div></body>\
</html>"
);
}
#[test]
fn test_stringify_document_no_frames() {
let html = "<!doctype html>\
<title>Frameless document</title>\
<link rel=\"something\"/>\
<div><script src=\"some.js\"></script></div>";
let dom = html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
<title>Frameless document</title>\
<link rel=\"something\">\
</head>\
<body><div><script src=\"some.js\"></script></div></body>\
</html>"
);
}
#[test]
fn test_stringify_document_isolate_no_frames_no_js_no_css_no_images() {
let html = "<!doctype html>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\" />\
<iframe src=\"some.html\"></iframe>\
</div>";
let dom = html_to_dom(&html);
let opt_isolate: bool = true;
let opt_no_css: bool = true;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
assert_eq!(
stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src \'unsafe-inline\' data:; style-src \'none\'; frame-src \'none\';child-src \'none\'; script-src \'none\'; img-src data:;\"></meta>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
</head>\
<body>\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\">\
<iframe src=\"some.html\"></iframe>\
</div>\
</body>\
</html>"
);
}

View File

@@ -0,0 +1,26 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::html;
use reqwest::blocking::Client;
use std::collections::HashMap;
#[test]
fn replace_with_empty_images() {
let cache = &mut HashMap::new();
let client = Client::new();
let srcset_value = "small.png 1x, large.png 2x";
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, true, true);
assert_eq!(
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),
embedded_css
);
}
}

View File

@@ -0,0 +1,49 @@
use crate::html;
use html5ever::rcdom::{Handle, NodeData};
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn get_node_name() {
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
let dom = html::html_to_dom(&html);
let mut count = 0;
fn test_walk(node: &Handle, i: &mut i8) {
*i += 1;
match &node.data {
NodeData::Document => {
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
NodeData::Element { ref name, .. } => {
let node_name = name.local.as_ref().to_string();
let parent = html::get_parent_node(node);
let parent_node_name = html::get_node_name(&parent);
if node_name == "head" || node_name == "body" {
assert_eq!(parent_node_name, Some("html"));
} else if node_name == "div" {
assert_eq!(parent_node_name, Some("body"));
} else if node_name == "p" {
assert_eq!(parent_node_name, Some("div"));
}
for child in node.children.borrow().iter() {
test_walk(child, &mut *i);
}
}
_ => (),
};
}
test_walk(&dom.document, &mut count);
assert_eq!(count, 7);
}

View File

@@ -0,0 +1,92 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::html;
#[test]
fn empty_input_sha256() {
assert!(html::has_proper_integrity(
"".as_bytes(),
"sha256-47DEQpj8HBSa+/TImW+5JCeuQeRkm5NMpJWZG3hSuFU="
));
}
#[test]
fn sha256() {
assert!(html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha256-9EWAHgy4mSYsm54hmDaIDXPKLRsLnBX7lZyQ6xISNOM="
));
}
#[test]
fn sha384() {
assert!(html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha384-gc9l7omltke8C33bedgh15E12M7RrAQa5t63Yb8APlpe7ZhiqV23+oqiulSJl3Kw"
));
}
#[test]
fn sha512() {
assert!(html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha512-zG5B88cYMqcdiMi9gz0XkOFYw2BpjeYdn5V6+oFrMgSNjRpqL7EF8JEwl17ztZbK3N7I/tTwp3kxQbN1RgFBww=="
));
}
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod failing {
use crate::html;
#[test]
fn empty_hash() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
""
));
}
#[test]
fn empty_input_empty_hash() {
assert!(!html::has_proper_integrity("".as_bytes(), ""));
}
#[test]
fn sha256() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha256-badhash"
));
}
#[test]
fn sha384() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha384-badhash"
));
}
#[test]
fn sha512() {
assert!(!html::has_proper_integrity(
"abcdef0123456789".as_bytes(),
"sha512-badhash"
));
}
}

50
src/tests/html/is_icon.rs Normal file
View File

@@ -0,0 +1,50 @@
use crate::html;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_icon() {
assert!(html::is_icon("icon"));
}
#[test]
fn passing_shortcut_icon_capitalized() {
assert!(html::is_icon("Shortcut Icon"));
}
#[test]
fn passing_icon_uppercase() {
assert!(html::is_icon("ICON"));
}
#[test]
fn passing_mask_icon() {
assert!(html::is_icon("mask-icon"));
}
#[test]
fn passing_fluid_icon() {
assert!(html::is_icon("fluid-icon"));
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_stylesheet() {
assert!(!html::is_icon("stylesheet"));
}
#[test]
fn failing_empty_string() {
assert!(!html::is_icon(""));
}

6
src/tests/html/mod.rs Normal file
View File

@@ -0,0 +1,6 @@
mod embed_srcset;
mod get_node_name;
mod has_proper_integrity;
mod is_icon;
mod stringify_document;
mod walk_and_embed_assets;

View File

@@ -0,0 +1,188 @@
use crate::html;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_div_as_root_element() {
let html = "<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
);
}
#[test]
fn passing_full_page_with_no_html_head_or_body() {
let html = "<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\" />\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = true;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
<title>Isolated document</title>\
<link rel=\"something\" href=\"some.css\">\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
</head>\
<body>\
<div>\
<script src=\"some.js\"></script>\
</div>\
</body>\
</html>"
);
}
#[test]
fn passing_doctype_and_the_rest_no_html_head_or_body() {
let html = "<!doctype html>\
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\"/>\
<div style=\"display: none;\"></div>";
let dom = html::html_to_dom(&html);
let opt_no_css: bool = true;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
<title>Unstyled document</title>\
<link rel=\"stylesheet\" href=\"main.css\">\
</head>\
<body><div style=\"display: none;\"></div></body>\
</html>"
);
}
#[test]
fn passing_doctype_and_the_rest_no_html_head_or_body_forbid_frames() {
let html = "<!doctype html>\
<title>Frameless document</title>\
<link rel=\"something\"/>\
<div><script src=\"some.js\"></script></div>";
let dom = html::html_to_dom(&html);
let opt_no_css: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_isolate: bool = false;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none';child-src 'none';\"></meta>\
<title>Frameless document</title>\
<link rel=\"something\">\
</head>\
<body><div><script src=\"some.js\"></script></div></body>\
</html>"
);
}
#[test]
fn passing_doctype_and_the_rest_all_forbidden() {
let html = "<!doctype html>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\" />\
<iframe src=\"some.html\"></iframe>\
</div>";
let dom = html::html_to_dom(&html);
let opt_isolate: bool = true;
let opt_no_css: bool = true;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
assert_eq!(
html::stringify_document(
&dom.document,
opt_no_css,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_isolate,
),
"<!DOCTYPE html>\
<html>\
<head>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; frame-src 'none';child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
<title>no-frame no-css no-js no-image isolated document</title>\
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
<link rel=\"stylesheet\" href=\"some.css\">\
</head>\
<body>\
<div>\
<script src=\"some.js\"></script>\
<img style=\"width: 100%;\" src=\"some.png\">\
<iframe src=\"some.html\"></iframe>\
</div>\
</body>\
</html>"
);
}

View File

@@ -0,0 +1,419 @@
use crate::html;
use html5ever::serialize::{serialize, SerializeOpts};
use reqwest::blocking::Client;
use std::collections::HashMap;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_basic() {
let cache = &mut HashMap::new();
let html = "<div><P></P></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p></div></body></html>"
);
}
#[test]
fn passing_ensure_no_recursive_iframe() {
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><p></p><iframe src=\"\"></iframe></div></body></html>"
);
}
#[test]
fn passing_ensure_no_recursive_frame() {
let html = "<frameset><frame src=\"\"></frameset>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test]
fn passing_no_css() {
let html = "<link rel=\"stylesheet\" href=\"main.css\">\
<style>html{background-color: #000;}</style>\
<div style=\"display: none;\"></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = true;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head>\
<link rel=\"stylesheet\">\
<style></style>\
</head>\
<body>\
<div></div>\
</body>\
</html>"
);
}
#[test]
fn passing_no_images() {
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = true;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
format!(
"<html>\
<head>\
<link rel=\"icon\">\
</head>\
<body>\
<div>\
<img src=\"{empty_image}\">\
</div>\
</body>\
</html>",
empty_image = empty_image!()
)
);
}
#[test]
fn passing_no_body_background_images() {
let html = "<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = true;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body></body></html>"
);
}
#[test]
fn passing_no_frames() {
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test]
fn passing_no_iframes() {
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><iframe src=\"\"></iframe></body></html>"
);
}
#[test]
fn passing_no_js() {
let html = "<div onClick=\"void(0)\">\
<script src=\"http://localhost/assets/some.js\"></script>\
<script>alert(1)</script>\
</div>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = true;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><body><div><script></script>\
<script></script></div></body></html>"
);
}
#[test]
fn passing_with_no_integrity() {
let html = "<title>No integrity</title>\
<link integrity=\"sha384-...\" rel=\"something\"/>\
<script integrity=\"sha384-...\" src=\"some.js\"></script>";
let dom = html::html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let client = Client::new();
let opt_no_css: bool = true;
let opt_no_fonts: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = true;
let opt_no_images: bool = true;
let opt_silent = true;
html::walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_fonts,
opt_no_frames,
opt_no_js,
opt_no_images,
opt_silent,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html>\
<head><title>No integrity</title><link rel=\"something\"><script></script></head>\
<body></body>\
</html>"
);
}

View File

@@ -1,23 +0,0 @@
use crate::http::retrieve_asset;
use std::collections::HashMap;
#[test]
fn test_retrieve_asset() {
let cache = &mut HashMap::new();
let client = reqwest::Client::new();
let (data, final_url) =
retrieve_asset(cache, &client, "data:text/html;base64,...", true, "", false).unwrap();
assert_eq!(&data, "data:text/html;base64,...");
assert_eq!(&final_url, "data:text/html;base64,...");
let (data, final_url) = retrieve_asset(
cache,
&client,
"data:text/html;base64,...",
true,
"image/png",
false,
)
.unwrap();
assert_eq!(&data, "data:text/html;base64,...");
assert_eq!(&final_url, "data:text/html;base64,...");
}

View File

@@ -1,13 +0,0 @@
use crate::js::attr_is_event_handler;
#[test]
fn test_attr_is_event_handler() {
// succeeding
assert!(attr_is_event_handler("onBlur"));
assert!(attr_is_event_handler("onclick"));
assert!(attr_is_event_handler("onClick"));
// failing
assert!(!attr_is_event_handler("href"));
assert!(!attr_is_event_handler(""));
assert!(!attr_is_event_handler("class"));
}

View File

@@ -0,0 +1,45 @@
use crate::js;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_onblur_camelcase() {
assert!(js::attr_is_event_handler("onBlur"));
}
#[test]
fn passing_onclick_lowercase() {
assert!(js::attr_is_event_handler("onclick"));
}
#[test]
fn passing_onclick_camelcase() {
assert!(js::attr_is_event_handler("onClick"));
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_href() {
assert!(!js::attr_is_event_handler("href"));
}
#[test]
fn failing_empty_string() {
assert!(!js::attr_is_event_handler(""));
}
#[test]
fn failing_class() {
assert!(!js::attr_is_event_handler("class"));
}

1
src/tests/js/mod.rs Normal file
View File

@@ -0,0 +1 @@
mod attr_is_event_handler;

View File

@@ -0,0 +1,14 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
#[test]
fn contains_correct_image_data() {
assert_eq!(empty_image!(), "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII=");
}
}

2
src/tests/macros/mod.rs Normal file
View File

@@ -0,0 +1,2 @@
mod empty_image;
mod str;

24
src/tests/macros/str.rs Normal file
View File

@@ -0,0 +1,24 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
#[test]
fn returns_empty_string() {
assert_eq!(str!(), "");
}
#[test]
fn converts_integer_into_string() {
assert_eq!(str!(123), "123");
}
#[test]
fn converts_str_into_string() {
assert_eq!(str!("abc"), "abc");
}
}

View File

@@ -1,4 +1,6 @@
mod cli;
mod css;
mod html;
mod http;
mod js;
mod macros;
mod utils;

View File

@@ -1,160 +0,0 @@
use crate::utils::{
data_to_dataurl, detect_mimetype, is_data_url, is_valid_url, resolve_url, url_has_protocol,
};
use url::ParseError;
#[test]
fn test_data_to_dataurl() {
let mime = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let datauri = data_to_dataurl(mime, data.as_bytes());
assert_eq!(
&datauri,
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
);
}
#[test]
fn test_detect_mimetype() {
// image
assert_eq!(detect_mimetype(b"GIF87a"), "image/gif");
assert_eq!(detect_mimetype(b"GIF89a"), "image/gif");
assert_eq!(detect_mimetype(b"\xFF\xD8\xFF"), "image/jpeg");
assert_eq!(detect_mimetype(b"\x89PNG\x0D\x0A\x1A\x0A"), "image/png");
assert_eq!(detect_mimetype(b"<?xml "), "image/svg+xml");
assert_eq!(detect_mimetype(b"<svg "), "image/svg+xml");
assert_eq!(detect_mimetype(b"RIFF....WEBPVP8 "), "image/webp");
assert_eq!(detect_mimetype(b"\x00\x00\x01\x00"), "image/x-icon");
// audio
assert_eq!(detect_mimetype(b"ID3"), "audio/mpeg");
assert_eq!(detect_mimetype(b"\xFF\x0E"), "audio/mpeg");
assert_eq!(detect_mimetype(b"\xFF\x0F"), "audio/mpeg");
assert_eq!(detect_mimetype(b"OggS"), "audio/ogg");
assert_eq!(detect_mimetype(b"RIFF....WAVEfmt "), "audio/wav");
assert_eq!(detect_mimetype(b"fLaC"), "audio/x-flac");
// video
assert_eq!(detect_mimetype(b"RIFF....AVI LIST"), "video/avi");
assert_eq!(detect_mimetype(b"....ftyp"), "video/mp4");
assert_eq!(detect_mimetype(b"\x00\x00\x01\x0B"), "video/mpeg");
assert_eq!(detect_mimetype(b"....moov"), "video/quicktime");
assert_eq!(detect_mimetype(b"\x1A\x45\xDF\xA3"), "video/webm");
}
#[test]
fn test_url_has_protocol() {
// succeeding
assert_eq!(
url_has_protocol("mailto:somebody@somewhere.com?subject=hello"),
true
);
assert_eq!(url_has_protocol("tel:5551234567"), true);
assert_eq!(
url_has_protocol("ftp:user:password@some-ftp-server.com"),
true
);
assert_eq!(url_has_protocol("javascript:void(0)"), true);
assert_eq!(url_has_protocol("http://news.ycombinator.com"), true);
assert_eq!(url_has_protocol("https://github.com"), true);
assert_eq!(
url_has_protocol("MAILTO:somebody@somewhere.com?subject=hello"),
true
);
// failing
assert_eq!(
url_has_protocol("//some-hostname.com/some-file.html"),
false
);
assert_eq!(url_has_protocol("some-hostname.com/some-file.html"), false);
assert_eq!(url_has_protocol("/some-file.html"), false);
assert_eq!(url_has_protocol(""), false);
}
#[test]
fn test_is_valid_url() {
// succeeding
assert!(is_valid_url("https://www.rust-lang.org/"));
assert!(is_valid_url("http://kernel.org"));
// failing
assert!(!is_valid_url("//kernel.org"));
assert!(!is_valid_url("./index.html"));
assert!(!is_valid_url("some-local-page.htm"));
assert!(!is_valid_url("ftp://1.2.3.4/www/index.html"));
assert!(!is_valid_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn test_resolve_url() -> Result<(), ParseError> {
let resolved_url = resolve_url("https://www.kernel.org", "../category/signatures.html")?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
let resolved_url = resolve_url("https://www.kernel.org", "category/signatures.html")?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
let resolved_url = resolve_url(
"saved_page.htm",
"https://www.kernel.org/category/signatures.html",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
let resolved_url = resolve_url(
"https://www.kernel.org",
"//www.kernel.org/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
let resolved_url = resolve_url(
"https://www.kernel.org",
"//another-host.org/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://another-host.org/theme/images/logos/tux.png"
);
let resolved_url = resolve_url(
"https://www.kernel.org/category/signatures.html",
"/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
let resolved_url = resolve_url(
"https://www.w3schools.com/html/html_iframe.asp",
"default.asp",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.w3schools.com/html/default.asp"
);
Ok(())
}
#[test]
fn test_is_data_url() {
// succeeding
assert!(
is_data_url("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
.unwrap_or(false)
);
// failing
assert!(!is_data_url("https://kernel.org").unwrap_or(false));
assert!(!is_data_url("//kernel.org").unwrap_or(false));
assert!(!is_data_url("").unwrap_or(false));
}

View File

@@ -0,0 +1,48 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_removes_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot#iefix"),
"https://somewhere.com/font.eot"
);
}
#[test]
fn passing_removes_empty_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot#"),
"https://somewhere.com/font.eot"
);
}
#[test]
fn passing_removes_empty_query_and_empty_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot?#"),
"https://somewhere.com/font.eot"
);
}
#[test]
fn passing_removes_empty_query_amp_and_empty_fragment() {
assert_eq!(
utils::clean_url("https://somewhere.com/font.eot?a=b&#"),
"https://somewhere.com/font.eot?a=b"
);
}
#[test]
fn passing_keeps_credentials() {
assert_eq!(
utils::clean_url("https://cookie:monster@gibson.internet/"),
"https://cookie:monster@gibson.internet/"
);
}

View File

@@ -0,0 +1,28 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_encode_string_with_specific_media_type() {
let mime = "application/javascript";
let data = "var word = 'hello';\nalert(word);\n";
let data_url = utils::data_to_data_url(mime, data.as_bytes(), "", "");
assert_eq!(
&data_url,
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
);
}
#[test]
fn passing_encode_append_fragment() {
let data = "<svg></svg>\n";
let data_url = utils::data_to_data_url("text/css", data.as_bytes(), "", "fragment");
assert_eq!(&data_url, "data:text/css;base64,PHN2Zz48L3N2Zz4K#fragment");
}

View File

@@ -0,0 +1,95 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_parse_text_html_base64() {
let (media_type, data) = utils::data_url_to_data("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
assert_eq!(media_type, "text/html");
assert_eq!(
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_html_utf8() {
let (media_type, data) = utils::data_url_to_data(
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
);
assert_eq!(media_type, "text/html");
assert_eq!(
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_html_plaintext() {
let (media_type, data) = utils::data_url_to_data(
"data:text/html,Work expands so as to fill the time available for its completion",
);
assert_eq!(media_type, "text/html");
assert_eq!(
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() {
let (media_type, data) = utils::data_url_to_data(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
assert_eq!(media_type, "text/html");
assert_eq!(
String::from_utf8_lossy(&data),
"Work expands so as to fill the time available for its completion"
);
}
#[test]
fn passing_parse_text_css_url_encoded() {
let (media_type, data) = utils::data_url_to_data("data:text/css,div{background-color:%23000}");
assert_eq!(media_type, "text/css");
assert_eq!(String::from_utf8_lossy(&data), "div{background-color:#000}");
}
#[test]
fn passing_parse_no_media_type_base64() {
let (media_type, data) = utils::data_url_to_data("data:;base64,dGVzdA==");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "test");
}
#[test]
fn passing_parse_no_media_type_no_encoding() {
let (media_type, data) = utils::data_url_to_data("data:;,test%20test");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "test test");
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_just_word_data() {
let (media_type, data) = utils::data_url_to_data("data");
assert_eq!(media_type, "");
assert_eq!(String::from_utf8_lossy(&data), "");
}

View File

@@ -0,0 +1,39 @@
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[cfg(test)]
mod passing {
use crate::utils;
#[test]
fn decode_unicode_characters() {
assert_eq!(
utils::decode_url(str!(
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
)),
"検ヒム解塗ゃッ = サ"
);
}
#[test]
fn decode_file_url() {
assert_eq!(
utils::decode_url(str!("file:///tmp/space%20here/test%231.html")),
"file:///tmp/space here/test#1.html"
);
}
#[test]
fn plus_sign() {
assert_eq!(
utils::decode_url(str!(
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
)),
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
);
}
}

View File

@@ -0,0 +1,147 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_image_gif87() {
assert_eq!(utils::detect_media_type(b"GIF87a", ""), "image/gif");
}
#[test]
fn passing_image_gif89() {
assert_eq!(utils::detect_media_type(b"GIF89a", ""), "image/gif");
}
#[test]
fn passing_image_jpeg() {
assert_eq!(utils::detect_media_type(b"\xFF\xD8\xFF", ""), "image/jpeg");
}
#[test]
fn passing_image_png() {
assert_eq!(
utils::detect_media_type(b"\x89PNG\x0D\x0A\x1A\x0A", ""),
"image/png"
);
}
#[test]
fn passing_image_svg() {
assert_eq!(utils::detect_media_type(b"<svg ", ""), "image/svg+xml");
}
#[test]
fn passing_image_webp() {
assert_eq!(
utils::detect_media_type(b"RIFF....WEBPVP8 ", ""),
"image/webp"
);
}
#[test]
fn passing_image_icon() {
assert_eq!(
utils::detect_media_type(b"\x00\x00\x01\x00", ""),
"image/x-icon"
);
}
#[test]
fn passing_image_svg_filename() {
assert_eq!(
utils::detect_media_type(b"<?xml ", "local-file.svg"),
"image/svg+xml"
);
}
#[test]
fn passing_image_svg_url_uppercase() {
assert_eq!(
utils::detect_media_type(b"", "https://some-site.com/images/local-file.SVG"),
"image/svg+xml"
);
}
#[test]
fn passing_audio_mpeg() {
assert_eq!(utils::detect_media_type(b"ID3", ""), "audio/mpeg");
}
#[test]
fn passing_audio_mpeg_2() {
assert_eq!(utils::detect_media_type(b"\xFF\x0E", ""), "audio/mpeg");
}
#[test]
fn passing_audio_mpeg_3() {
assert_eq!(utils::detect_media_type(b"\xFF\x0F", ""), "audio/mpeg");
}
#[test]
fn passing_audio_ogg() {
assert_eq!(utils::detect_media_type(b"OggS", ""), "audio/ogg");
}
#[test]
fn passing_audio_wav() {
assert_eq!(
utils::detect_media_type(b"RIFF....WAVEfmt ", ""),
"audio/wav"
);
}
#[test]
fn passing_audio_flac() {
assert_eq!(utils::detect_media_type(b"fLaC", ""), "audio/x-flac");
}
#[test]
fn passing_video_avi() {
assert_eq!(
utils::detect_media_type(b"RIFF....AVI LIST", ""),
"video/avi"
);
}
#[test]
fn passing_video_mp4() {
assert_eq!(utils::detect_media_type(b"....ftyp", ""), "video/mp4");
}
#[test]
fn passing_video_mpeg() {
assert_eq!(
utils::detect_media_type(b"\x00\x00\x01\x0B", ""),
"video/mpeg"
);
}
#[test]
fn passing_video_quicktime() {
assert_eq!(utils::detect_media_type(b"....moov", ""), "video/quicktime");
}
#[test]
fn passing_video_webm() {
assert_eq!(
utils::detect_media_type(b"\x1A\x45\xDF\xA3", ""),
"video/webm"
);
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_unknown_media_type() {
assert_eq!(utils::detect_media_type(b"abcdef0123456789", ""), "");
}

View File

@@ -0,0 +1,38 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_remove_protocl_and_fragment() {
if cfg!(windows) {
assert_eq!(
utils::file_url_to_fs_path("file:///C:/documents/some-path/some-file.svg#fragment"),
"C:\\documents\\some-path\\some-file.svg"
);
} else {
assert_eq!(
utils::file_url_to_fs_path("file:///tmp/some-path/some-file.svg#fragment"),
"/tmp/some-path/some-file.svg"
);
}
}
#[test]
fn passing_decodes_urls() {
if cfg!(windows) {
assert_eq!(
utils::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"),
"C:\\Documents and Settings\\some-file.html"
);
} else {
assert_eq!(
utils::file_url_to_fs_path("file:///home/user/My%20Documents"),
"/home/user/My Documents"
);
}
}

View File

@@ -0,0 +1,23 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_data_url() {
assert_eq!(
utils::get_url_fragment(
"data:image/svg+xml;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h#test"
),
"test"
);
}
#[test]
fn passing_https_empty() {
assert_eq!(utils::get_url_fragment("https://kernel.org#"), "");
}

View File

@@ -0,0 +1,44 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_data_url_text_html() {
assert!(utils::is_data_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn passing_data_url_no_media_type() {
assert!(utils::is_data_url(
"data:;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_https_url() {
assert!(!utils::is_data_url("https://kernel.org"));
}
#[test]
fn failing_no_protocol_url() {
assert!(!utils::is_data_url("//kernel.org"));
}
#[test]
fn failing_empty_string() {
assert!(!utils::is_data_url(""));
}

View File

@@ -0,0 +1,75 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_unix_file_url() {
assert!(utils::is_file_url(
"file:///home/user/Websites/my-website/index.html"
));
}
#[test]
fn passing_windows_file_url() {
assert!(utils::is_file_url(
"file:///C:/Documents%20and%20Settings/user/Websites/my-website/assets/images/logo.png"
));
}
#[test]
fn passing_unix_url_with_backslashes() {
assert!(utils::is_file_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html"
));
}
#[test]
fn passing_windows_file_url_with_backslashes() {
assert!(utils::is_file_url(
"file:\\\\\\C:\\Documents%20and%20Settings\\user\\Websites\\my-website\\assets\\images\\logo.png"
));
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_url_with_no_protocl() {
assert!(!utils::is_file_url("//kernel.org"));
}
#[test]
fn failing_dot_slash_filename() {
assert!(!utils::is_file_url("./index.html"));
}
#[test]
fn failing_just_filename() {
assert!(!utils::is_file_url("some-local-page.htm"));
}
#[test]
fn failing_https_ip_port_url() {
assert!(!utils::is_file_url("https://1.2.3.4:80/www/index.html"));
}
#[test]
fn failing_data_url() {
assert!(!utils::is_file_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}
#[test]
fn failing_just_word_file() {
assert!(!utils::is_file_url("file"));
}

View File

@@ -0,0 +1,57 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_http_url() {
assert!(utils::is_http_url("http://kernel.org"));
}
#[test]
fn passing_https_url() {
assert!(utils::is_http_url("https://www.rust-lang.org/"));
}
#[test]
fn passing_http_url_with_backslashes() {
assert!(utils::is_http_url("http:\\\\freebsd.org\\"));
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_url_with_no_protocol() {
assert!(!utils::is_http_url("//kernel.org"));
}
#[test]
fn failing_dot_slash_filename() {
assert!(!utils::is_http_url("./index.html"));
}
#[test]
fn failing_just_filename() {
assert!(!utils::is_http_url("some-local-page.htm"));
}
#[test]
fn failing_https_ip_port_url() {
assert!(!utils::is_http_url("ftp://1.2.3.4/www/index.html"));
}
#[test]
fn failing_data_url() {
assert!(!utils::is_http_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h"
));
}

13
src/tests/utils/mod.rs Normal file
View File

@@ -0,0 +1,13 @@
mod clean_url;
mod data_to_data_url;
mod data_url_to_data;
mod decode_url;
mod detect_media_type;
mod file_url_to_fs_path;
mod get_url_fragment;
mod is_data_url;
mod is_file_url;
mod is_http_url;
mod resolve_url;
mod retrieve_asset;
mod url_has_protocol;

View File

@@ -0,0 +1,229 @@
use url::ParseError;
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_from_https_to_level_up_relative() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url("https://www.kernel.org", "../category/signatures.html")?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
Ok(())
}
#[test]
fn passing_from_just_filename_to_full_https_url() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"saved_page.htm",
"https://www.kernel.org/category/signatures.html",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
Ok(())
}
#[test]
fn passing_from_https_url_to_url_with_no_protocol() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"https://www.kernel.org",
"//www.kernel.org/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
Ok(())
}
#[test]
fn passing_from_https_url_to_url_with_no_protocol_and_on_different_hostname(
) -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"https://www.kernel.org",
"//another-host.org/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://another-host.org/theme/images/logos/tux.png"
);
Ok(())
}
#[test]
fn passing_from_https_url_to_relative_root_path() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"https://www.kernel.org/category/signatures.html",
"/theme/images/logos/tux.png",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/theme/images/logos/tux.png"
);
Ok(())
}
#[test]
fn passing_from_https_to_just_filename() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"https://www.w3schools.com/html/html_iframe.asp",
"default.asp",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.w3schools.com/html/default.asp"
);
Ok(())
}
#[test]
fn passing_from_data_url_to_https() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"https://www.kernel.org/category/signatures.html",
)?;
assert_eq!(
resolved_url.as_str(),
"https://www.kernel.org/category/signatures.html"
);
Ok(())
}
#[test]
fn passing_from_data_url_to_data_url() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K",
)?;
assert_eq!(
resolved_url.as_str(),
"data:text/html;base64,PGEgaHJlZj0iaW5kZXguaHRtbCI+SG9tZTwvYT4K"
);
Ok(())
}
#[test]
fn passing_from_file_url_to_relative_path() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"file:///home/user/Websites/my-website/index.html",
"assets/images/logo.png",
)
.unwrap_or(str!());
assert_eq!(
resolved_url.as_str(),
"file:///home/user/Websites/my-website/assets/images/logo.png"
);
Ok(())
}
#[test]
fn passing_from_file_url_to_relative_path_with_backslashes() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"file:\\\\\\home\\user\\Websites\\my-website\\index.html",
"assets\\images\\logo.png",
)
.unwrap_or(str!());
assert_eq!(
resolved_url.as_str(),
"file:///home/user/Websites/my-website/assets/images/logo.png"
);
Ok(())
}
#[test]
fn passing_from_data_url_to_file_url() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"file:///etc/passwd",
)
.unwrap_or(str!());
assert_eq!(resolved_url.as_str(), "file:///etc/passwd");
Ok(())
}
#[test]
fn passing_preserve_fragment() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"http://doesnt-matter.local/",
"css/fonts/fontmarvelous.svg#fontmarvelous",
)
.unwrap_or(str!());
assert_eq!(
resolved_url.as_str(),
"http://doesnt-matter.local/css/fonts/fontmarvelous.svg#fontmarvelous"
);
Ok(())
}
#[test]
fn passing_resolve_from_file_url_to_file_url() -> Result<(), ParseError> {
let resolved_url = if cfg!(windows) {
utils::resolve_url("file:///c:/index.html", "file:///c:/image.png").unwrap_or(str!())
} else {
utils::resolve_url("file:///tmp/index.html", "file:///tmp/image.png").unwrap_or(str!())
};
assert_eq!(
resolved_url.as_str(),
if cfg!(windows) {
"file:///c:/image.png"
} else {
"file:///tmp/image.png"
}
);
Ok(())
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_from_data_url_to_url_with_no_protocol() -> Result<(), ParseError> {
let resolved_url = utils::resolve_url(
"data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h",
"//www.w3schools.com/html/html_iframe.asp",
)
.unwrap_or(str!());
assert_eq!(resolved_url.as_str(), "");
Ok(())
}

View File

@@ -0,0 +1,124 @@
use crate::utils;
use reqwest::blocking::Client;
use std::collections::HashMap;
use std::env;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_read_data_url() {
let cache = &mut HashMap::new();
let client = Client::new();
// If both source and target are data URLs,
// ensure the result contains target data URL
let (data, final_url, media_type) = utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,c291cmNl",
"data:text/html;base64,dGFyZ2V0",
false,
)
.unwrap();
assert_eq!(
utils::data_to_data_url(&media_type, &data, &final_url, ""),
utils::data_to_data_url("text/html", "target".as_bytes(), "", "")
);
assert_eq!(
final_url,
utils::data_to_data_url("text/html", "target".as_bytes(), "", "")
);
assert_eq!(&media_type, "text/html");
}
#[test]
fn passing_read_local_file_with_file_url_parent() {
let cache = &mut HashMap::new();
let client = Client::new();
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
// Inclusion of local assets from local sources should be allowed
let cwd = env::current_dir().unwrap();
let (data, final_url, _media_type) = utils::retrieve_asset(
cache,
&client,
&format!(
"{file}{cwd}/src/tests/data/basic/local-file.html",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
),
&format!(
"{file}{cwd}/src/tests/data/basic/local-script.js",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
),
false,
)
.unwrap();
assert_eq!(utils::data_to_data_url("application/javascript", &data, &final_url, ""), "data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==");
assert_eq!(
&final_url,
&format!(
"{file}{cwd}/src/tests/data/basic/local-script.js",
file = file_url_protocol,
cwd = cwd.to_str().unwrap()
)
);
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_read_local_file_with_data_url_parent() {
let cache = &mut HashMap::new();
let client = Client::new();
// Inclusion of local assets from data URL sources should not be allowed
match utils::retrieve_asset(
cache,
&client,
"data:text/html;base64,SoUrCe",
"file:///etc/passwd",
false,
) {
Ok((..)) => {
assert!(false);
}
Err(_) => {
assert!(true);
}
}
}
#[test]
fn failing_read_local_file_with_https_parent() {
let cache = &mut HashMap::new();
let client = Client::new();
// Inclusion of local assets from remote sources should not be allowed
match utils::retrieve_asset(
cache,
&client,
"https://kernel.org/",
"file:///etc/passwd",
false,
) {
Ok((..)) => {
assert!(false);
}
Err(_) => {
assert!(true);
}
}
}

View File

@@ -0,0 +1,83 @@
use crate::utils;
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn passing_mailto() {
assert!(utils::url_has_protocol(
"mailto:somebody@somewhere.com?subject=hello"
));
}
#[test]
fn passing_tel() {
assert!(utils::url_has_protocol("tel:5551234567"));
}
#[test]
fn passing_ftp_no_slashes() {
assert!(utils::url_has_protocol("ftp:some-ftp-server.com"));
}
#[test]
fn passing_ftp_with_credentials() {
assert!(utils::url_has_protocol(
"ftp://user:password@some-ftp-server.com"
));
}
#[test]
fn passing_javascript() {
assert!(utils::url_has_protocol("javascript:void(0)"));
}
#[test]
fn passing_http() {
assert!(utils::url_has_protocol("http://news.ycombinator.com"));
}
#[test]
fn passing_https() {
assert!(utils::url_has_protocol("https://github.com"));
}
#[test]
fn passing_mailto_uppercase() {
assert!(utils::url_has_protocol(
"MAILTO:somebody@somewhere.com?subject=hello"
));
}
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
#[test]
fn failing_url_with_no_protocol() {
assert!(!utils::url_has_protocol(
"//some-hostname.com/some-file.html"
));
}
#[test]
fn failing_relative_path() {
assert!(!utils::url_has_protocol("some-hostname.com/some-file.html"));
}
#[test]
fn failing_relative_to_root_path() {
assert!(!utils::url_has_protocol("/some-file.html"));
}
#[test]
fn failing_empty_string() {
assert!(!utils::url_has_protocol(""));
}

View File

@@ -1,56 +1,17 @@
extern crate base64;
use self::base64::encode;
use http::retrieve_asset;
use regex::Regex;
use reqwest::Client;
use base64;
use reqwest::blocking::Client;
use reqwest::header::CONTENT_TYPE;
use std::collections::HashMap;
use url::{ParseError, Url};
use std::fs;
use std::path::Path;
use url::{form_urlencoded, ParseError, Url};
/// This monster of a regex is used to match any kind of URL found in CSS.
///
/// There are roughly three different categories that a found URL could fit
/// into:
/// - Font [found after a src: property in an @font-family rule]
/// - Stylesheet [denoted by an @import before the url
/// - Image [covers all other uses of the url() function]
///
/// This regex aims to extract the following information:
/// - What type of URL is it (font/image/css)
/// - Where is the part that needs to be replaced (incl any wrapping quotes)
/// - What is the URL (excl any wrapping quotes)
///
/// Essentially, the regex can be broken down into two parts:
///
/// `(?:(?P<import>@import)|(?P<font>src\s*:)\s+)?`
/// This matches the precursor to a font or CSS URL, and fills in a match under
/// either `<import>` (if it's a CSS URL) or `<font>` (if it's a font).
/// Determining whether or not it's an image can be done by the negation of both
/// of these. Either zero or one of these can match.
///
/// `url\((?P<to_repl>['"]?(?P<url>[^"'\)]+)['"]?)\)`
/// This matches the actual URL part of the url(), and must always match. It also
/// sets `<to_repl>` and `<url>` which correspond to everything within
/// `url(...)` and a usable URL, respectively.
///
/// Note, however, that this does not perform any validation of the found URL.
/// Malformed CSS could lead to an invalid URL being present. It is therefore
/// recomended that the URL gets manually validated.
const CSS_URL_REGEX_STR: &str = r###"(?:(?:(?P<stylesheet>@import)|(?P<font>src\s*:))\s+)?url\((?P<to_repl>['"]?(?P<url>[^"'\)]+)['"]?)\)"###;
lazy_static! {
static ref HAS_PROTOCOL: Regex = Regex::new(r"^[a-z0-9]+:").unwrap();
static ref REGEX_URL: Regex = Regex::new(r"^https?://").unwrap();
static ref REGEX_CSS_URL: Regex = Regex::new(CSS_URL_REGEX_STR).unwrap();
}
const MAGIC: [[&[u8]; 2]; 19] = [
const MAGIC: [[&[u8]; 2]; 18] = [
// Image
[b"GIF87a", b"image/gif"],
[b"GIF89a", b"image/gif"],
[b"\xFF\xD8\xFF", b"image/jpeg"],
[b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],
[b"<?xml ", b"image/svg+xml"],
[b"<svg ", b"image/svg+xml"],
[b"RIFF....WEBPVP8 ", b"image/webp"],
[b"\x00\x00\x01\x00", b"image/x-icon"],
@@ -69,38 +30,78 @@ const MAGIC: [[&[u8]; 2]; 19] = [
[b"\x1A\x45\xDF\xA3", b"video/webm"],
];
pub fn data_to_dataurl(mime: &str, data: &[u8]) -> String {
let mimetype = if mime.is_empty() {
detect_mimetype(data)
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
"image/svg+xml",
"text/css",
"text/html",
"text/javascript",
"text/plain",
];
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String {
let media_type: String = if media_type.is_empty() {
detect_media_type(data, &url)
} else {
mime.to_string()
media_type.to_string()
};
format!("data:{};base64,{}", mimetype, encode(data))
let hash: String = if fragment != "" {
format!("#{}", fragment)
} else {
str!()
};
format!(
"data:{};base64,{}{}",
media_type,
base64::encode(data),
hash
)
}
pub fn detect_mimetype(data: &[u8]) -> String {
pub fn detect_media_type(data: &[u8], url: &str) -> String {
for item in MAGIC.iter() {
if data.starts_with(item[0]) {
return String::from_utf8(item[1].to_vec()).unwrap();
}
}
"".to_owned()
if url.to_lowercase().ends_with(".svg") {
return str!("image/svg+xml");
}
str!()
}
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
HAS_PROTOCOL.is_match(url.as_ref().to_lowercase().as_str())
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme().len() > 0))
.unwrap_or(false)
}
pub fn is_data_url<T: AsRef<str>>(url: T) -> Result<bool, ParseError> {
Url::parse(url.as_ref()).and_then(|u| Ok(u.scheme() == "data"))
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "data"))
.unwrap_or(false)
}
pub fn is_valid_url<T: AsRef<str>>(path: T) -> bool {
REGEX_URL.is_match(path.as_ref())
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "file"))
.unwrap_or(false)
}
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
Url::parse(url.as_ref())
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
.unwrap_or(false)
}
pub fn is_plaintext_media_type(media_type: &str) -> bool {
PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
}
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
let result = if is_valid_url(to.as_ref()) {
let result = if is_http_url(to.as_ref()) {
to.as_ref().to_string()
} else {
Url::parse(from.as_ref())?
@@ -111,88 +112,192 @@ pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<Strin
Ok(result)
}
pub fn resolve_css_imports(
cache: &mut HashMap<String, String>,
client: &Client,
css_string: &str,
as_dataurl: bool,
href: &str,
opt_no_images: bool,
opt_silent: bool,
) -> String {
let mut resolved_css = String::from(css_string);
for link in REGEX_CSS_URL.captures_iter(&css_string) {
let target_link = link.name("url").unwrap().as_str();
// Determine the type of link
let is_stylesheet = link.name("stylesheet").is_some();
let is_font = link.name("font").is_some();
let is_image = !is_stylesheet && !is_font;
// Generate absolute URL for content
let embedded_url = match resolve_url(href, target_link) {
Ok(url) => url,
Err(_) => continue, // Malformed URL
};
// Download the asset. If it's more CSS, resolve that too
let content = if is_stylesheet {
// The link is an @import link
retrieve_asset(
cache,
client,
&embedded_url,
false, // Formating as data URL will be done later
"text/css", // Expect CSS
opt_silent,
)
.map(|(content, _)| {
resolve_css_imports(
cache,
client,
&content,
true, // Finally, convert to a dataurl
&embedded_url,
opt_no_images,
opt_silent,
)
})
} else if (is_image && !opt_no_images) || is_font {
// The link is some other, non-@import link
retrieve_asset(
cache,
client,
&embedded_url,
true, // Format as data URL
"", // Unknown MIME type
opt_silent,
)
.map(|(a, _)| a)
} else {
// If it's a datatype that has been opt_no'd out of, replace with
// absolute URL
Ok(embedded_url.clone())
}
.unwrap_or_else(|e| {
eprintln!("Warning: {}", e);
// If failed to resolve, replace with absolute URL
embedded_url
});
let replacement = format!("\"{}\"", &content);
let dest = link.name("to_repl").unwrap();
let offset = resolved_css.len() - css_string.len();
let target_range = (dest.start() + offset)..(dest.end() + offset);
resolved_css.replace_range(target_range, &replacement);
}
if as_dataurl {
data_to_dataurl("text/css", resolved_css.as_bytes())
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
if Url::parse(url.as_ref()).unwrap().fragment() == None {
str!()
} else {
resolved_css
str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap())
}
}
pub fn clean_url<T: AsRef<str>>(input: T) -> String {
let mut url = Url::parse(input.as_ref()).unwrap();
// Clear fragment
url.set_fragment(None);
// Get rid of stray question mark
if url.query() == Some("") {
url.set_query(None);
}
// Remove empty trailing ampersand(s)
let mut result: String = url.to_string();
while result.ends_with("&") {
result.pop();
}
result
}
pub fn data_url_to_data<T: AsRef<str>>(url: T) -> (String, Vec<u8>) {
let parsed_url: Url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
let path: String = parsed_url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len());
let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
let text: String = decode_url(raw_data);
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut media_type: String = str!();
let mut encoding: &str = "";
let mut i: i8 = 0;
for item in &meta_data_items {
if i == 0 {
media_type = str!(item);
} else {
if item.eq_ignore_ascii_case("base64")
|| item.eq_ignore_ascii_case("utf8")
|| item.eq_ignore_ascii_case("charset=UTF-8")
{
encoding = item;
}
}
i = i + 1;
}
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
base64::decode(&text).unwrap_or(vec![])
} else {
text.as_bytes().to_vec()
};
(media_type, data)
}
pub fn decode_url(input: String) -> String {
let input: String = input.replace("+", "%2B");
form_urlencoded::parse(input.as_bytes())
.map(|(key, val)| {
[
key.to_string(),
if val.to_string().len() == 0 {
str!()
} else {
str!('=')
},
val.to_string(),
]
.concat()
})
.collect()
}
pub fn file_url_to_fs_path(url: &str) -> String {
if !is_file_url(url) {
return str!();
}
let cutoff_l = if cfg!(windows) { 8 } else { 7 };
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
let url_fragment = get_url_fragment(url);
if url_fragment != "" {
let max_len = fs_file_path.len() - 1 - url_fragment.len();
fs_file_path = fs_file_path[0..max_len].to_string();
}
if cfg!(windows) {
fs_file_path = fs_file_path.replace("/", "\\");
}
// File paths should not be %-encoded
decode_url(fs_file_path)
}
pub fn retrieve_asset(
cache: &mut HashMap<String, Vec<u8>>,
client: &Client,
parent_url: &str,
url: &str,
opt_silent: bool,
) -> Result<(Vec<u8>, String, String), reqwest::Error> {
if url.len() == 0 {
// Provoke error
client.get("").send()?;
}
if is_data_url(&url) {
let (media_type, data) = data_url_to_data(url);
Ok((data, url.to_string(), media_type))
} else if is_file_url(&url) {
// Check if parent_url is also file:///
// (if not, then we don't embed the asset)
if !is_file_url(&parent_url) {
// Provoke error
client.get("").send()?;
}
let fs_file_path: String = file_url_to_fs_path(url);
let path = Path::new(&fs_file_path);
if path.exists() {
if !opt_silent {
eprintln!("{}", &url);
}
Ok((fs::read(&fs_file_path).expect(""), url.to_string(), str!()))
} else {
// Provoke error
Err(client.get("").send().unwrap_err())
}
} else {
let cache_key: String = clean_url(&url);
if cache.contains_key(&cache_key) {
// URL is in cache, we get and return it
if !opt_silent {
eprintln!("{} (from cache)", &url);
}
Ok((
cache.get(&cache_key).unwrap().to_vec(),
url.to_string(),
str!(),
))
} else {
// URL not in cache, we retrieve the file
let mut response = client.get(url).send()?;
let res_url = response.url().to_string();
if !opt_silent {
if url == res_url {
eprintln!("{}", &url);
} else {
eprintln!("{} -> {}", &url, &res_url);
}
}
let new_cache_key: String = clean_url(&res_url);
// Convert response into a byte array
let mut data: Vec<u8> = vec![];
response.copy_to(&mut data)?;
// Attempt to obtain media type by reading the Content-Type header
let media_type = response
.headers()
.get(CONTENT_TYPE)
.and_then(|header| header.to_str().ok())
.unwrap_or("");
// Add to cache
cache.insert(new_cache_key, data.clone());
Ok((data, res_url, media_type.to_string()))
}
}
}