Compare commits
48 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8b0635bd84 | ||
|
|
b685d3a46c | ||
|
|
82d05fc0f1 | ||
|
|
65ac5c36b1 | ||
|
|
1f60a76fcd | ||
|
|
f067fc2324 | ||
|
|
49d7585e02 | ||
|
|
e25b7bc470 | ||
|
|
329c0568a4 | ||
|
|
f151a33c48 | ||
|
|
ebf96bf1e5 | ||
|
|
c10c78a27d | ||
|
|
64e84e4983 | ||
|
|
674d4085c7 | ||
|
|
e0fd5d4bb9 | ||
|
|
f4e360f09d | ||
|
|
0b4116f48a | ||
|
|
084981a2ae | ||
|
|
a3feb7b721 | ||
|
|
87eb197e33 | ||
|
|
6798cad2b2 | ||
|
|
174cb50877 | ||
|
|
e397a7532d | ||
|
|
91d8c146a9 | ||
|
|
67e07b91af | ||
|
|
f797b8c999 | ||
|
|
60251c6878 | ||
|
|
b70801d55b | ||
|
|
2a50936990 | ||
|
|
f9e961f088 | ||
|
|
d8c3620d00 | ||
|
|
4aab0a64ee | ||
|
|
a2155e0af6 | ||
|
|
f7e5527432 | ||
|
|
f7dd09d481 | ||
|
|
73c0ceebd4 | ||
|
|
727eae2e35 | ||
|
|
b7a38c9f4a | ||
|
|
aa556094a4 | ||
|
|
81b304c558 | ||
|
|
a3e82a2ad8 | ||
|
|
a5bf1705db | ||
|
|
78c37958dc | ||
|
|
20c56a5440 | ||
|
|
37416f827b | ||
|
|
7f123e810b | ||
|
|
db04d11d99 | ||
|
|
1c8d4f1830 |
1
.dockerignore
Normal file
1
.dockerignore
Normal file
@@ -0,0 +1 @@
|
||||
/target/
|
||||
3
.github/FUNDING.yml
vendored
Normal file
3
.github/FUNDING.yml
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
# These are supported funding model platforms
|
||||
|
||||
github: snshn
|
||||
14
.github/workflows/cd.yml
vendored
14
.github/workflows/cd.yml
vendored
@@ -20,14 +20,14 @@ jobs:
|
||||
- name: Build the executable
|
||||
run: cargo build --release
|
||||
|
||||
- uses: Shopify/upload-to-release@1.0.0
|
||||
- uses: Shopify/upload-to-release@v2.0.0
|
||||
with:
|
||||
name: monolith.exe
|
||||
path: target\release\monolith.exe
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
gnu_linux_armhf:
|
||||
runs-on: ubuntu-18.04
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v2
|
||||
@@ -53,14 +53,14 @@ jobs:
|
||||
run: cargo build --release --target=arm-unknown-linux-gnueabihf
|
||||
|
||||
- name: Attach artifact to the release
|
||||
uses: Shopify/upload-to-release@1.0.0
|
||||
uses: Shopify/upload-to-release@v2.0.0
|
||||
with:
|
||||
name: monolith-gnu-linux-armhf
|
||||
path: target/arm-unknown-linux-gnueabihf/release/monolith
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
gnu_linux_aarch64:
|
||||
runs-on: ubuntu-18.04
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v2
|
||||
@@ -86,14 +86,14 @@ jobs:
|
||||
run: cargo build --release --target=aarch64-unknown-linux-gnu
|
||||
|
||||
- name: Attach artifact to the release
|
||||
uses: Shopify/upload-to-release@1.0.0
|
||||
uses: Shopify/upload-to-release@v2.0.0
|
||||
with:
|
||||
name: monolith-gnu-linux-aarch64
|
||||
path: target/aarch64-unknown-linux-gnu/release/monolith
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
gnu_linux_x86_64:
|
||||
runs-on: ubuntu-18.04
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v2
|
||||
@@ -101,7 +101,7 @@ jobs:
|
||||
- name: Build the executable
|
||||
run: cargo build --release
|
||||
|
||||
- uses: Shopify/upload-to-release@1.0.0
|
||||
- uses: Shopify/upload-to-release@v2.0.0
|
||||
with:
|
||||
name: monolith-gnu-linux-x86_64
|
||||
path: target/release/monolith
|
||||
|
||||
1392
Cargo.lock
generated
1392
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
41
Cargo.toml
41
Cargo.toml
@@ -1,14 +1,15 @@
|
||||
[package]
|
||||
name = "monolith"
|
||||
version = "2.7.0"
|
||||
version = "2.8.3"
|
||||
authors = [
|
||||
"Sunshine <sunshine@uberspace.net>",
|
||||
"Sunshine <snshn@tutanota.com>",
|
||||
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
|
||||
"Emmanuel Delaborde <th3rac25@gmail.com>",
|
||||
"Emi Simpson <emi@alchemi.dev>",
|
||||
"rhysd <lin90162@yahoo.co.jp>",
|
||||
"Andriy Rakhnin <a@rakhnin.com>",
|
||||
]
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
description = "CLI tool for saving web pages as a single HTML file"
|
||||
homepage = "https://github.com/Y2Z/monolith"
|
||||
repository = "https://github.com/Y2Z/monolith"
|
||||
@@ -21,28 +22,36 @@ include = [
|
||||
]
|
||||
license = "CC0-1.0"
|
||||
|
||||
[features]
|
||||
default = ["vendored-openssl"]
|
||||
# Compile and statically link a copy of OpenSSL.
|
||||
vendored-openssl = ["openssl/vendored"]
|
||||
|
||||
[dependencies]
|
||||
atty = "0.2.14" # Used for highlighting network errors
|
||||
base64 = "0.13.0" # Used for integrity attributes
|
||||
chrono = "0.4.20" # Used for formatting creation timestamp
|
||||
clap = "3.2.16"
|
||||
cssparser = "0.29.6"
|
||||
encoding_rs = "0.8.31"
|
||||
html5ever = "0.24.1"
|
||||
percent-encoding = "2.1.0"
|
||||
sha2 = "0.10.2" # Used for calculating checksums during integrity checks
|
||||
url = "2.2.2"
|
||||
atty = "0.2.14" # Used for highlighting network errors
|
||||
base64 = "0.22.1" # Used for integrity attributes
|
||||
chrono = "0.4.38" # Used for formatting output timestamp
|
||||
clap = "3.2.25" # Used for processing CLI arguments
|
||||
cssparser = "0.34.0" # Used for dealing with CSS
|
||||
encoding_rs = "0.8.34" # Used for parsing and converting document charsets
|
||||
html5ever = "0.27.0" # Used for all things DOM
|
||||
markup5ever_rcdom = "0.3.0" # Used for manipulating DOM
|
||||
percent-encoding = "2.3.1" # Used for encoding URLs
|
||||
sha2 = "0.10.8" # Used for calculating checksums during integrity checks
|
||||
url = "2.5.2" # Used for parsing URLs
|
||||
openssl = "0.10.64" # Used for static linking of the OpenSSL library
|
||||
|
||||
# Used for parsing srcset and NOSCRIPT
|
||||
[dependencies.regex]
|
||||
version = "1.6.0"
|
||||
version = "1.10.6"
|
||||
default-features = false
|
||||
features = ["std", "perf-dfa", "unicode-perl"]
|
||||
|
||||
# Used for making network requests
|
||||
[dependencies.reqwest]
|
||||
version = "0.11.11"
|
||||
version = "0.12.7"
|
||||
default-features = false
|
||||
features = ["default-tls", "blocking", "gzip", "brotli", "deflate"]
|
||||
|
||||
[dev-dependencies]
|
||||
assert_cmd = "2.0.4"
|
||||
assert_cmd = "2.0.16"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
FROM ekidd/rust-musl-builder as builder
|
||||
FROM clux/muslrust:stable as builder
|
||||
|
||||
RUN curl -L -o monolith.tar.gz $(curl -s https://api.github.com/repos/y2z/monolith/releases/latest \
|
||||
| grep "tarball_url.*\"," \
|
||||
@@ -17,6 +17,6 @@ RUN apk update && \
|
||||
apk add --no-cache openssl && \
|
||||
rm -rf "/var/cache/apk/*"
|
||||
|
||||
COPY --from=builder /home/rust/.cargo/bin/monolith /usr/bin/monolith
|
||||
COPY --from=builder /root/.cargo/bin/monolith /usr/bin/monolith
|
||||
WORKDIR /tmp
|
||||
ENTRYPOINT ["/usr/bin/monolith"]
|
||||
|
||||
27
Makefile
27
Makefile
@@ -7,23 +7,30 @@ build:
|
||||
@cargo build --locked
|
||||
.PHONY: build
|
||||
|
||||
test: build
|
||||
@cargo test --locked
|
||||
@cargo fmt --all -- --check
|
||||
.PHONY: test
|
||||
|
||||
lint:
|
||||
@cargo fmt --all --
|
||||
.PHONY: lint
|
||||
clean:
|
||||
@cargo clean
|
||||
.PHONY: clean
|
||||
|
||||
install:
|
||||
@cargo install --force --locked --path .
|
||||
.PHONY: install
|
||||
|
||||
lint:
|
||||
@cargo fmt --all --
|
||||
.PHONY: lint
|
||||
|
||||
lint_check:
|
||||
@cargo fmt --all -- --check
|
||||
.PHONY: lint_check
|
||||
|
||||
test: build
|
||||
@cargo test --locked
|
||||
.PHONY: test
|
||||
|
||||
uninstall:
|
||||
@cargo uninstall
|
||||
.PHONY: uninstall
|
||||
|
||||
clean:
|
||||
@cargo clean
|
||||
update-lock-file:
|
||||
@cargo update
|
||||
.PHONY: clean
|
||||
|
||||
109
README.md
109
README.md
@@ -36,6 +36,24 @@ cargo install monolith
|
||||
brew install monolith
|
||||
```
|
||||
|
||||
#### Via [Chocolatey](https://community.chocolatey.org/packages/monolith) (Windows)
|
||||
|
||||
```console
|
||||
choco install monolith
|
||||
```
|
||||
|
||||
#### Via [Scoop](https://scoop.sh/#/apps?q=monolith) (Windows)
|
||||
|
||||
```console
|
||||
scoop install main/monolith
|
||||
```
|
||||
|
||||
#### Via [Winget](https://winstall.app/apps/Y2Z.Monolith) (Windows)
|
||||
|
||||
```console
|
||||
winget install --id=Y2Z.Monolith -e
|
||||
```
|
||||
|
||||
#### Via [MacPorts](https://ports.macports.org/port/monolith/summary) (macOS)
|
||||
|
||||
```console
|
||||
@@ -48,6 +66,42 @@ sudo port install monolith
|
||||
snap install monolith
|
||||
```
|
||||
|
||||
#### Using [Guix](https://packages.guix.gnu.org/packages/monolith) (GNU/Linux)
|
||||
|
||||
```console
|
||||
guix install monolith
|
||||
```
|
||||
|
||||
#### Using [NixPkgs](https://search.nixos.org/packages?channel=unstable&show=monolith&query=monolith)
|
||||
|
||||
```console
|
||||
nix-env -iA nixpkgs.monolith
|
||||
```
|
||||
|
||||
#### Using [Flox](https://flox.dev)
|
||||
|
||||
```console
|
||||
flox install monolith
|
||||
```
|
||||
|
||||
#### Using [Pacman](https://archlinux.org/packages/extra/x86_64/monolith) (Arch Linux)
|
||||
|
||||
```console
|
||||
pacman -S monolith
|
||||
```
|
||||
|
||||
#### Using [aports](https://pkgs.alpinelinux.org/packages?name=monolith) (Alpine Linux)
|
||||
|
||||
```console
|
||||
apk add monolith
|
||||
```
|
||||
|
||||
#### Using [XBPS Package Manager](https://voidlinux.org/packages/?q=monolith) (Void Linux)
|
||||
|
||||
```console
|
||||
xbps-install -S monolith
|
||||
```
|
||||
|
||||
#### Using [FreeBSD packages](https://svnweb.freebsd.org/ports/head/www/monolith/) (FreeBSD)
|
||||
|
||||
```console
|
||||
@@ -71,13 +125,31 @@ make install clean
|
||||
#### Using [containers](https://www.docker.com/)
|
||||
|
||||
```console
|
||||
docker build -t Y2Z/monolith .
|
||||
docker build -t y2z/monolith .
|
||||
sudo install -b dist/run-in-container.sh /usr/local/bin/monolith
|
||||
```
|
||||
|
||||
#### From [source](https://github.com/Y2Z/monolith)
|
||||
|
||||
Dependency: `libssl`
|
||||
Dependencies: `libssl`, `cargo`
|
||||
|
||||
<details>
|
||||
<summary>Install cargo (GNU/Linux)</summary>
|
||||
Check if cargo is installed
|
||||
|
||||
```console
|
||||
cargo -v
|
||||
```
|
||||
|
||||
If cargo is not already installed, install and add it to your existing ```$PATH``` (paraphrasing the [official installation instructions](https://doc.rust-lang.org/cargo/getting-started/installation.html)):
|
||||
|
||||
```console
|
||||
curl https://sh.rustup.rs -sSf | sh
|
||||
. "$HOME/.cargo/env"
|
||||
```
|
||||
|
||||
Proceed with installing from source:
|
||||
</details>
|
||||
|
||||
```console
|
||||
git clone https://github.com/Y2Z/monolith.git
|
||||
@@ -100,7 +172,7 @@ monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roa
|
||||
```
|
||||
|
||||
```console
|
||||
cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html
|
||||
cat some-site-page.html | monolith -aIiFfcMv -b https://some.site/ - > some-site-page-with-assets.html
|
||||
```
|
||||
|
||||
|
||||
@@ -110,14 +182,16 @@ cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html
|
||||
## Options
|
||||
|
||||
- `-a`: Exclude audio sources
|
||||
- `-b`: Use custom `base URL`
|
||||
- `-b`: Use `custom base URL`
|
||||
- `-B`: Forbid retrieving assets from specified domain(s)
|
||||
- `-c`: Exclude CSS
|
||||
- `-C`: Save document using custom `charset`
|
||||
- `-C`: Read cookies from `file`
|
||||
- `-d`: Allow retrieving assets only from specified `domain(s)`
|
||||
- `-e`: Ignore network errors
|
||||
- `-E`: Save document using `custom encoding`
|
||||
- `-f`: Omit frames
|
||||
- `-F`: Exclude web fonts
|
||||
- `-h`: Print help information
|
||||
- `-i`: Remove images
|
||||
- `-I`: Isolate the document
|
||||
- `-j`: Exclude JavaScript
|
||||
@@ -127,7 +201,7 @@ cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html
|
||||
- `-o`: Write output to `file` (use “-” for STDOUT)
|
||||
- `-s`: Be quiet
|
||||
- `-t`: Adjust `network request timeout`
|
||||
- `-u`: Provide custom `User-Agent`
|
||||
- `-u`: Provide `custom User-Agent`
|
||||
- `-v`: Exclude videos
|
||||
|
||||
|
||||
@@ -136,7 +210,7 @@ cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html
|
||||
|
||||
## Whitelisting and blacklisting domains
|
||||
|
||||
Options `-d` and `-B` provide control over what domains can be used to retrieve assets from. E.g.:
|
||||
Options `-d` and `-B` provide control over what domains can be used to retrieve assets from, e.g.:
|
||||
|
||||
```console
|
||||
monolith -I -d example.com -d www.example.com https://example.com -o example-only.html
|
||||
@@ -156,7 +230,7 @@ Monolith doesn't feature a JavaScript engine, hence websites that retrieve and d
|
||||
For example, Chromium (Chrome) can be used to act as a pre-processor for such pages:
|
||||
|
||||
```console
|
||||
chromium --headless --incognito --dump-dom https://github.com | monolith - -I -b https://github.com -o github.html
|
||||
chromium --headless --window-size=1920,1080 --run-all-compositor-stages-before-draw --virtual-time-budget=9000 --incognito --dump-dom https://github.com | monolith - -I -b https://github.com -o github.html
|
||||
```
|
||||
|
||||
|
||||
@@ -179,26 +253,7 @@ Please open an issue if something is wrong, that helps make this project better.
|
||||
---------------------------------------------------
|
||||
|
||||
|
||||
## Related projects
|
||||
|
||||
- Monolith Chrome Extension: https://github.com/rhysd/monolith-of-web
|
||||
- Pagesaver: https://github.com/distributed-mind/pagesaver
|
||||
- Personal WayBack Machine: https://github.com/popey/pwbm
|
||||
- Hako: https://github.com/dmpop/hako
|
||||
- Monk: https://github.com/monk-dev/monk
|
||||
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
|
||||
## License
|
||||
|
||||
To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide.
|
||||
This software is distributed without any warranty.
|
||||
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
|
||||
<!-- Microtext -->
|
||||
<sub>Keep in mind that `monolith` is not aware of your browser’s session</sub>
|
||||
|
||||
2
dist/run-in-container.sh
vendored
Normal file → Executable file
2
dist/run-in-container.sh
vendored
Normal file → Executable file
@@ -7,4 +7,4 @@ if which podman 2>&1 > /dev/null; then
|
||||
DOCKER=podman
|
||||
fi
|
||||
|
||||
$DOCKER run --rm Y2Z/$PROG_NAME "$@"
|
||||
$DOCKER run --rm y2z/$PROG_NAME "$@"
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
# 1. Record architecture decisions
|
||||
|
||||
Date: 2019-12-25
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
We need to record the architectural decisions made on this project.
|
||||
|
||||
## Decision
|
||||
|
||||
We will use Architecture Decision Records, as [described by Michael Nygard](http://thinkrelevance.com/blog/2011/11/15/documenting-architecture-decisions).
|
||||
|
||||
## Consequences
|
||||
|
||||
See Michael Nygard's article, linked above. For a lightweight ADR toolset, see Nat Pryce's [adr-tools](https://github.com/npryce/adr-tools).
|
||||
@@ -1,19 +0,0 @@
|
||||
# 2. NOSCRIPT nodes
|
||||
|
||||
Date: 2020-04-16
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
HTML pages can contain `noscript` nodes, which reveal their contents only in case when JavaScript is not available. Most of the time they contain hidden messages that inform about certain JavaScript-dependent features not being operational, however sometimes can also feature media assets or even iframes.
|
||||
|
||||
## Decision
|
||||
|
||||
When the document is being saved with or without JavaScript, each `noscript` node should be preserved while its children need to be processed exactly the same way as the rest of the document. This approach will ensure that even hidden remote assets are embedded — since those hidden elements may have to be displayed later in a browser that has JavaScript turned off. An option should be available to "unwrap" all `noscript` nodes in order to make their contents always visible in the document, complimenting the "disable JS" function of the program.
|
||||
|
||||
## Consequences
|
||||
|
||||
Saved documents will have contents of all `noscript` nodes processed as if they are part of the document's DOM, therefore properly display images encapsulated within `noscript` nodes when being viewed in browsers that have JavaScript turned off (or have no JavaScript support in the first place). The new option to "unwrap" `noscript` elements will help the user ensure that the resulting document always represents what the original web page looked like in a browser that had JavaScript turned off.
|
||||
@@ -1,21 +0,0 @@
|
||||
# 3. Network request timeout
|
||||
|
||||
Date: 2020-02-15
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
A slow network connection and overloaded server may negatively impact network response time.
|
||||
|
||||
## Decision
|
||||
|
||||
Make the program simulate behavior of popular web browsers and CLI tools, where the default network response timeout is most often set to 120 seconds.
|
||||
|
||||
Instead of featuring retries for timed out network requests, the program should have an option to adjust the timeout length, along with making it indefinite when given "0" as its value.
|
||||
|
||||
## Consequences
|
||||
|
||||
The user is able to retrieve resources that have long response time, as well as obtain full control over how soon, and if at all, network requests should time out.
|
||||
@@ -1,21 +0,0 @@
|
||||
# 4. Asset integrity check
|
||||
|
||||
Date: 2020-02-23
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
In HTML5, `link` and `script` nodes have an attribute named `integrity`, which lets the browser check if the remote file is valid, mostly for the purpose of enhancing page security.
|
||||
|
||||
## Decision
|
||||
|
||||
In order to replicate the browser's behavior, the program should perform integrity check the same way it does, excluding the linked asset from the final result if such check fails.
|
||||
|
||||
The `integrity` attribute should be removed from nodes, as it bears no benefit for resources embedded as data URLs.
|
||||
|
||||
## Consequences
|
||||
|
||||
Assets that fail to pass the check get excluded from the saved document. Meanwhile, saved documents no longer contain integrity attributes on all `link` and `script` nodes.
|
||||
@@ -1,19 +0,0 @@
|
||||
# 5. Asset Minimization
|
||||
|
||||
Date: 2020-03-14
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
It may look like a good idea to make monolith compress retrieved assets while saving the page for the purpose of reducing the resulting document's file size.
|
||||
|
||||
## Decision
|
||||
|
||||
Given that the main purpose of this program is to save pages in a convenient to store and share manner — it's mostly an archiving tool, aside from being able to tell monolith to exclude certain types of asests (e.g. images, CSS, JavaScript), it would be outside of scope of this program to implement code for compressing assets. Minimizing files before embedding them does not reduce the amount of data that needs to be transferred either. A separate tool can be used later to compress and minimize pages saved by monolith, if needed.
|
||||
|
||||
## Consequences
|
||||
|
||||
Monolith will not support modification of original document assets for the purpose of reducing their size, sticking to performing only minimal amount of modifications to the original web page — whatever is needed to provide security or exclude unwanted asset types.
|
||||
@@ -1,19 +0,0 @@
|
||||
# 6. Reload and location `meta` tags
|
||||
|
||||
Date: 2020-06-25
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
HTML documents may contain `meta` tags capable of automatically refreshing the page or redirecting to another location.
|
||||
|
||||
## Decision
|
||||
|
||||
Since the resulting document is saved to disk and generally not intended to be served over the network, it only makes sense to remove `meta` tags that have `http-equiv` attribute equal to "Refresh" or "Location", in order to prevent them from reloading the page or redirecting to another location.
|
||||
|
||||
## Consequences
|
||||
|
||||
Monolith will ensure that saved documents do not contain `meta` tags capable of changing location or reloading the page.
|
||||
@@ -1,19 +0,0 @@
|
||||
# 7. Network errors
|
||||
|
||||
Date: 2020-11-22
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
Servers may return information with HTTP response codes other than `200`, however those responses may still contain useful data.
|
||||
|
||||
## Decision
|
||||
|
||||
Fail by default, notifying of the network error. Add option to continue retrieving assets by treating all response codes as `200`.
|
||||
|
||||
## Consequences
|
||||
|
||||
Monolith will fail to obtain resources with status other than `200`, unless told to ignore network errors.
|
||||
@@ -1,40 +0,0 @@
|
||||
# 8. Base Tag
|
||||
|
||||
Date: 2020-12-25
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
HTML documents may contain `base` tag, which influences resolution of anchor links and relative URLs as well as dynamically loaded resources.
|
||||
|
||||
Sometimes, in order to make certain saved documents function closer to how they operate while being served from a remote server, the `base` tag specifying the source page's URL may need to be added to the document.
|
||||
|
||||
There can be only one such tag. If multiple `base` tags are present, only the first encountered tag ends up being used.
|
||||
|
||||
## Decision
|
||||
|
||||
Adding the `base` tag should be optional — saved documents should not contain the `base` tag unless it was specified by the user, or the document originally had the `base` tag in it.
|
||||
|
||||
Existing `href` attribute's value of the original `base` tag should be used for resolving the document's relative links instead of document's own URL (precisely the way browsers do it).
|
||||
|
||||
## Consequences
|
||||
|
||||
#### If the base tag does not exist in the source document
|
||||
|
||||
- If the base tag does not exist in the source document
|
||||
- With base URL option provided
|
||||
- use the specified base URL value to retrieve assets, keep original base URL value in the document
|
||||
- Without base URL option provided
|
||||
- download document as usual, do not add base tag
|
||||
- If the base tag already exists in the source document
|
||||
- With base URL option provided
|
||||
- we overwrite the original base URL before retrieving assets, keep new base URL value in the document
|
||||
- Without base URL option provided:
|
||||
- use the base URL from the original document to retrieve assets, keep original base URL value in the document
|
||||
|
||||
The program will obtain ability to retrieve remote assets for non-remote sources (such as data URLs and local files).
|
||||
|
||||
The program will obatin ability to get rid of existing base tag values (by provind an empty one).
|
||||
@@ -1,3 +0,0 @@
|
||||
# References
|
||||
|
||||
- https://content-security-policy.com/
|
||||
@@ -1,23 +0,0 @@
|
||||
# Web apps that can be saved with Monolith
|
||||
|
||||
These apps retain all or most of their functionality when saved with Monolith:
|
||||
|
||||
|
||||
## Converse
|
||||
|
||||
| Website | https://conversejs.org |
|
||||
|:-----------------------|:--------------------------------------------------------------------|
|
||||
| Description | An XMPP client built using web technologies |
|
||||
| Functionality retained | **full** |
|
||||
| Command to use | `monolith https://conversejs.org/fullscreen.html > conversejs.html` |
|
||||
| Monolith version used | 2.2.7 |
|
||||
|
||||
|
||||
## Markdown Tables generator
|
||||
|
||||
| Website | https://www.tablesgenerator.com |
|
||||
|:--------------------------|:-----------------------------------------------------------------------------------------------|
|
||||
| Description | Tool for creating tables in extended Markdown format |
|
||||
| Functionality retained | **full** |
|
||||
| Command to use | `monolith -I https://www.tablesgenerator.com/markdown_tables -o markdown-table-generator.html` |
|
||||
| Monolith version used | 2.6.1 |
|
||||
@@ -2,7 +2,7 @@
|
||||
<package xmlns="http://schemas.microsoft.com/packaging/2015/06/nuspec.xsd">
|
||||
<metadata>
|
||||
<id>monolith</id>
|
||||
<version>2.4.0</version>
|
||||
<version>2.8.1</version>
|
||||
<title>Monolith</title>
|
||||
<authors>Sunshine, Mahdi Robatipoor, Emmanuel Delaborde, Emi Simpson, rhysd</authors>
|
||||
<projectUrl>https://github.com/Y2Z/monolith</projectUrl>
|
||||
|
||||
119
src/cookies.rs
Normal file
119
src/cookies.rs
Normal file
@@ -0,0 +1,119 @@
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use url::Url;
|
||||
|
||||
pub struct Cookie {
|
||||
pub domain: String,
|
||||
pub include_subdomains: bool,
|
||||
pub path: String,
|
||||
pub https_only: bool,
|
||||
pub expires: u64,
|
||||
pub name: String,
|
||||
pub value: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum CookieFileContentsParseError {
|
||||
InvalidHeader,
|
||||
}
|
||||
|
||||
impl Cookie {
|
||||
pub fn is_expired(&self) -> bool {
|
||||
if self.expires == 0 {
|
||||
return false; // Session, never expires
|
||||
}
|
||||
|
||||
let start = SystemTime::now();
|
||||
let since_the_epoch = start
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.expect("Time went backwards");
|
||||
|
||||
self.expires < since_the_epoch.as_secs()
|
||||
}
|
||||
|
||||
pub fn matches_url(&self, url: &str) -> bool {
|
||||
match Url::parse(&url) {
|
||||
Ok(url) => {
|
||||
// Check protocol scheme
|
||||
match url.scheme() {
|
||||
"http" => {
|
||||
if self.https_only {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
"https" => {}
|
||||
_ => {
|
||||
// Should never match URLs of protocols other than HTTP(S)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check host
|
||||
if let Some(url_host) = url.host_str() {
|
||||
if self.domain.starts_with(".") && self.include_subdomains {
|
||||
if !url_host.to_lowercase().ends_with(&self.domain)
|
||||
&& !url_host
|
||||
.eq_ignore_ascii_case(&self.domain[1..self.domain.len() - 1])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if !url_host.eq_ignore_ascii_case(&self.domain) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check path
|
||||
if !url.path().eq_ignore_ascii_case(&self.path)
|
||||
&& !url.path().starts_with(&self.path)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_cookie_file_contents(
|
||||
cookie_file_contents: &str,
|
||||
) -> Result<Vec<Cookie>, CookieFileContentsParseError> {
|
||||
let mut cookies: Vec<Cookie> = Vec::new();
|
||||
|
||||
for (i, line) in cookie_file_contents.lines().enumerate() {
|
||||
if i == 0 {
|
||||
// Parsing first line
|
||||
if !line.eq("# HTTP Cookie File") && !line.eq("# Netscape HTTP Cookie File") {
|
||||
return Err(CookieFileContentsParseError::InvalidHeader);
|
||||
}
|
||||
} else {
|
||||
// Ignore comment lines
|
||||
if line.starts_with("#") {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Attempt to parse values
|
||||
let mut fields = line.split("\t");
|
||||
if fields.clone().count() != 7 {
|
||||
continue;
|
||||
}
|
||||
cookies.push(Cookie {
|
||||
domain: fields.next().unwrap().to_string().to_lowercase(),
|
||||
include_subdomains: fields.next().unwrap().to_string() == "TRUE",
|
||||
path: fields.next().unwrap().to_string(),
|
||||
https_only: fields.next().unwrap().to_string() == "TRUE",
|
||||
expires: fields.next().unwrap().parse::<u64>().unwrap(),
|
||||
name: fields.next().unwrap().to_string(),
|
||||
value: fields.next().unwrap().to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(cookies)
|
||||
}
|
||||
39
src/css.rs
39
src/css.rs
@@ -36,7 +36,6 @@ pub fn embed_css(
|
||||
document_url: &Url,
|
||||
css: &str,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
) -> String {
|
||||
let mut input = ParserInput::new(&css);
|
||||
let mut parser = Parser::new(&mut input);
|
||||
@@ -47,7 +46,6 @@ pub fn embed_css(
|
||||
document_url,
|
||||
&mut parser,
|
||||
options,
|
||||
depth,
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
@@ -81,15 +79,14 @@ pub fn process_css<'a>(
|
||||
document_url: &Url,
|
||||
parser: &mut Parser,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
rule_name: &str,
|
||||
prop_name: &str,
|
||||
func_name: &str,
|
||||
) -> Result<String, ParseError<'a, String>> {
|
||||
let mut result: String = "".to_string();
|
||||
|
||||
let mut curr_rule: String = rule_name.clone().to_string();
|
||||
let mut curr_prop: String = prop_name.clone().to_string();
|
||||
let mut curr_rule: String = rule_name.to_string();
|
||||
let mut curr_prop: String = prop_name.to_string();
|
||||
let mut token: &Token;
|
||||
let mut token_offset: SourcePosition;
|
||||
|
||||
@@ -135,7 +132,6 @@ pub fn process_css<'a>(
|
||||
document_url,
|
||||
parser,
|
||||
options,
|
||||
depth,
|
||||
rule_name,
|
||||
curr_prop.as_str(),
|
||||
func_name,
|
||||
@@ -190,14 +186,7 @@ pub fn process_css<'a>(
|
||||
}
|
||||
|
||||
let import_full_url: Url = resolve_url(&document_url, value);
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&document_url,
|
||||
&import_full_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
match retrieve_asset(cache, client, &document_url, &import_full_url, options) {
|
||||
Ok((
|
||||
import_contents,
|
||||
import_final_url,
|
||||
@@ -213,7 +202,6 @@ pub fn process_css<'a>(
|
||||
&import_final_url,
|
||||
&String::from_utf8_lossy(&import_contents),
|
||||
options,
|
||||
depth + 1,
|
||||
)
|
||||
.as_bytes(),
|
||||
&import_final_url,
|
||||
@@ -251,7 +239,6 @@ pub fn process_css<'a>(
|
||||
&document_url,
|
||||
&resolved_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((data, final_url, media_type, charset)) => {
|
||||
let mut data_url =
|
||||
@@ -341,14 +328,7 @@ pub fn process_css<'a>(
|
||||
result.push_str("url(");
|
||||
if is_import {
|
||||
let full_url: Url = resolve_url(&document_url, value);
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&document_url,
|
||||
&full_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
match retrieve_asset(cache, client, &document_url, &full_url, options) {
|
||||
Ok((css, final_url, media_type, charset)) => {
|
||||
let mut data_url = create_data_url(
|
||||
&media_type,
|
||||
@@ -359,7 +339,6 @@ pub fn process_css<'a>(
|
||||
&final_url,
|
||||
&String::from_utf8_lossy(&css),
|
||||
options,
|
||||
depth + 1,
|
||||
)
|
||||
.as_bytes(),
|
||||
&final_url,
|
||||
@@ -380,14 +359,7 @@ pub fn process_css<'a>(
|
||||
result.push_str(format_quoted_string(EMPTY_IMAGE_DATA_URL).as_str());
|
||||
} else {
|
||||
let full_url: Url = resolve_url(&document_url, value);
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&document_url,
|
||||
&full_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
match retrieve_asset(cache, client, &document_url, &full_url, options) {
|
||||
Ok((data, final_url, media_type, charset)) => {
|
||||
let mut data_url =
|
||||
create_data_url(&media_type, &charset, &data, &final_url);
|
||||
@@ -423,7 +395,6 @@ pub fn process_css<'a>(
|
||||
document_url,
|
||||
parser,
|
||||
options,
|
||||
depth,
|
||||
curr_rule.as_str(),
|
||||
curr_prop.as_str(),
|
||||
function_name,
|
||||
|
||||
159
src/html.rs
159
src/html.rs
@@ -1,13 +1,13 @@
|
||||
use base64;
|
||||
use base64::prelude::*;
|
||||
use chrono::prelude::*;
|
||||
use encoding_rs::Encoding;
|
||||
use html5ever::interface::QualName;
|
||||
use html5ever::parse_document;
|
||||
use html5ever::rcdom::{Handle, NodeData, RcDom};
|
||||
use html5ever::serialize::{serialize, SerializeOpts};
|
||||
use html5ever::tendril::{format_tendril, TendrilSink};
|
||||
use html5ever::tree_builder::{Attribute, TreeSink};
|
||||
use html5ever::{local_name, namespace_url, ns, LocalName};
|
||||
use markup5ever_rcdom::{Handle, NodeData, RcDom, SerializableHandle};
|
||||
use regex::Regex;
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::Url;
|
||||
@@ -30,10 +30,16 @@ struct SrcSetItem<'a> {
|
||||
|
||||
const ICON_VALUES: &'static [&str] = &["icon", "shortcut icon"];
|
||||
|
||||
const WHITESPACES: &'static [char] = &['\t', '\n', '\x0c', '\r', ' '];
|
||||
|
||||
pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom {
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, document, SerializeOpts::default())
|
||||
.expect("unable to serialize DOM into buffer");
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.expect("unable to serialize DOM into buffer");
|
||||
|
||||
let mut dom = html_to_dom(&buf, "utf-8".to_string());
|
||||
let doc = dom.get_document();
|
||||
@@ -65,15 +71,15 @@ pub fn check_integrity(data: &[u8], integrity: &str) -> bool {
|
||||
if integrity.starts_with("sha256-") {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(data);
|
||||
base64::encode(hasher.finalize()) == integrity[7..]
|
||||
BASE64_STANDARD.encode(hasher.finalize()) == integrity[7..]
|
||||
} else if integrity.starts_with("sha384-") {
|
||||
let mut hasher = Sha384::new();
|
||||
hasher.update(data);
|
||||
base64::encode(hasher.finalize()) == integrity[7..]
|
||||
BASE64_STANDARD.encode(hasher.finalize()) == integrity[7..]
|
||||
} else if integrity.starts_with("sha512-") {
|
||||
let mut hasher = Sha512::new();
|
||||
hasher.update(data);
|
||||
base64::encode(hasher.finalize()) == integrity[7..]
|
||||
BASE64_STANDARD.encode(hasher.finalize()) == integrity[7..]
|
||||
} else {
|
||||
false
|
||||
}
|
||||
@@ -161,15 +167,44 @@ pub fn embed_srcset(
|
||||
document_url: &Url,
|
||||
srcset: &str,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
) -> String {
|
||||
let mut array: Vec<SrcSetItem> = vec![];
|
||||
let re = Regex::new(r",\s+").unwrap();
|
||||
for srcset_item in re.split(srcset) {
|
||||
let parts: Vec<&str> = srcset_item.trim().split_whitespace().collect();
|
||||
if parts.len() > 0 {
|
||||
let path = parts[0].trim();
|
||||
let descriptor = if parts.len() > 1 { parts[1].trim() } else { "" };
|
||||
|
||||
// Parse srcset attribute according to the specs
|
||||
// https://html.spec.whatwg.org/multipage/images.html#srcset-attribute
|
||||
let mut offset = 0;
|
||||
let size = srcset.chars().count();
|
||||
|
||||
while offset < size {
|
||||
let mut has_descriptor = true;
|
||||
// Zero or more whitespaces + skip leading comma
|
||||
let url_start = offset
|
||||
+ srcset[offset..]
|
||||
.chars()
|
||||
.take_while(|&c| WHITESPACES.contains(&c) || c == ',')
|
||||
.count();
|
||||
if url_start >= size {
|
||||
break;
|
||||
}
|
||||
// A valid non-empty URL that does not start or end with comma
|
||||
let mut url_end = url_start
|
||||
+ srcset[url_start..]
|
||||
.chars()
|
||||
.take_while(|&c| !WHITESPACES.contains(&c))
|
||||
.count();
|
||||
while (url_end - 1) > url_start && srcset.chars().nth(url_end - 1).unwrap() == ',' {
|
||||
has_descriptor = false;
|
||||
url_end -= 1;
|
||||
}
|
||||
offset = url_end;
|
||||
// If the URL wasn't terminated by comma there may also be a descriptor
|
||||
if has_descriptor {
|
||||
offset += srcset[url_end..].chars().take_while(|&c| c != ',').count();
|
||||
}
|
||||
// Collect SrcSetItem
|
||||
if url_end > url_start {
|
||||
let path = &srcset[url_start..url_end];
|
||||
let descriptor = &srcset[url_end..offset].trim();
|
||||
let srcset_real_item = SrcSetItem { path, descriptor };
|
||||
array.push(srcset_real_item);
|
||||
}
|
||||
@@ -182,14 +217,7 @@ pub fn embed_srcset(
|
||||
result.push_str(EMPTY_IMAGE_DATA_URL);
|
||||
} else {
|
||||
let image_full_url: Url = resolve_url(&document_url, part.path);
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&document_url,
|
||||
&image_full_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
match retrieve_asset(cache, client, &document_url, &image_full_url, options) {
|
||||
Ok((image_data, image_final_url, image_media_type, image_charset)) => {
|
||||
let mut image_data_url = create_data_url(
|
||||
&image_media_type,
|
||||
@@ -197,7 +225,7 @@ pub fn embed_srcset(
|
||||
&image_data,
|
||||
&image_final_url,
|
||||
);
|
||||
// Append retreved asset as a data URL
|
||||
// Append retrieved asset as a data URL
|
||||
image_data_url.set_fragment(image_full_url.fragment());
|
||||
result.push_str(image_data_url.as_ref());
|
||||
}
|
||||
@@ -428,8 +456,12 @@ pub fn is_icon(attr_value: &str) -> bool {
|
||||
|
||||
pub fn set_base_url(document: &Handle, desired_base_href: String) -> RcDom {
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, document, SerializeOpts::default())
|
||||
.expect("unable to serialize DOM into buffer");
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.expect("unable to serialize DOM into buffer");
|
||||
|
||||
let mut dom = html_to_dom(&buf, "utf-8".to_string());
|
||||
let doc = dom.get_document();
|
||||
@@ -534,7 +566,7 @@ pub fn set_node_attr(node: &Handle, attr_name: &str, attr_value: Option<String>)
|
||||
|
||||
pub fn serialize_document(mut dom: RcDom, document_encoding: String, options: &Options) -> Vec<u8> {
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
let doc = dom.get_document();
|
||||
let document = dom.get_document();
|
||||
|
||||
if options.isolate
|
||||
|| options.no_css
|
||||
@@ -544,7 +576,7 @@ pub fn serialize_document(mut dom: RcDom, document_encoding: String, options: &O
|
||||
|| options.no_images
|
||||
{
|
||||
// Take care of CSP
|
||||
if let Some(html) = get_child_node_by_name(&doc, "html") {
|
||||
if let Some(html) = get_child_node_by_name(&document, "html") {
|
||||
if let Some(head) = get_child_node_by_name(&html, "head") {
|
||||
let meta = dom.create_element(
|
||||
QualName::new(None, ns!(), local_name!("meta")),
|
||||
@@ -570,8 +602,12 @@ pub fn serialize_document(mut dom: RcDom, document_encoding: String, options: &O
|
||||
}
|
||||
}
|
||||
|
||||
serialize(&mut buf, &doc, SerializeOpts::default())
|
||||
.expect("Unable to serialize DOM into buffer");
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.expect("Unable to serialize DOM into buffer");
|
||||
|
||||
// Unwrap NOSCRIPT elements
|
||||
if options.unwrap_noscript {
|
||||
@@ -599,18 +635,10 @@ pub fn retrieve_and_embed_asset(
|
||||
attr_name: &str,
|
||||
attr_value: &str,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
) {
|
||||
let resolved_url: Url = resolve_url(document_url, attr_value.clone());
|
||||
let resolved_url: Url = resolve_url(document_url, attr_value);
|
||||
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&document_url.clone(),
|
||||
&resolved_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
match retrieve_asset(cache, client, &document_url.clone(), &resolved_url, options) {
|
||||
Ok((data, final_url, mut media_type, charset)) => {
|
||||
let node_name: &str = get_node_name(&node).unwrap();
|
||||
|
||||
@@ -639,7 +667,7 @@ pub fn retrieve_and_embed_asset(
|
||||
|
||||
if node_name == "link" && determine_link_node_type(node) == "stylesheet" {
|
||||
// Stylesheet LINK elements require special treatment
|
||||
let css: String = embed_css(cache, client, &final_url, &s, options, depth + 1);
|
||||
let css: String = embed_css(cache, client, &final_url, &s, options);
|
||||
|
||||
// Create and embed data URL
|
||||
let css_data_url =
|
||||
@@ -648,19 +676,12 @@ pub fn retrieve_and_embed_asset(
|
||||
} else if node_name == "frame" || node_name == "iframe" {
|
||||
// (I)FRAMEs are also quite different from conventional resources
|
||||
let frame_dom = html_to_dom(&data, charset.clone());
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
client,
|
||||
&final_url,
|
||||
&frame_dom.document,
|
||||
&options,
|
||||
depth + 1,
|
||||
);
|
||||
walk_and_embed_assets(cache, client, &final_url, &frame_dom.document, &options);
|
||||
|
||||
let mut frame_data: Vec<u8> = Vec::new();
|
||||
serialize(
|
||||
&mut frame_data,
|
||||
&frame_dom.document,
|
||||
&SerializableHandle::from(frame_dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
@@ -710,13 +731,12 @@ pub fn walk_and_embed_assets(
|
||||
document_url: &Url,
|
||||
node: &Handle,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
) {
|
||||
match node.data {
|
||||
NodeData::Document => {
|
||||
// Dig deeper
|
||||
for child in node.children.borrow().iter() {
|
||||
walk_and_embed_assets(cache, client, &document_url, child, options, depth);
|
||||
walk_and_embed_assets(cache, client, &document_url, child, options);
|
||||
}
|
||||
}
|
||||
NodeData::Element {
|
||||
@@ -751,7 +771,6 @@ pub fn walk_and_embed_assets(
|
||||
"href",
|
||||
&link_attr_href_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
} else {
|
||||
set_node_attr(node, "href", None);
|
||||
@@ -774,7 +793,6 @@ pub fn walk_and_embed_assets(
|
||||
"href",
|
||||
&link_attr_href_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -816,7 +834,6 @@ pub fn walk_and_embed_assets(
|
||||
"background",
|
||||
&body_attr_background_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -862,7 +879,6 @@ pub fn walk_and_embed_assets(
|
||||
"src",
|
||||
&img_full_url,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -870,14 +886,8 @@ pub fn walk_and_embed_assets(
|
||||
// Resolve srcset attribute
|
||||
if let Some(img_srcset) = get_node_attr(node, "srcset") {
|
||||
if !img_srcset.is_empty() {
|
||||
let resolved_srcset: String = embed_srcset(
|
||||
cache,
|
||||
client,
|
||||
&document_url,
|
||||
&img_srcset,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
let resolved_srcset: String =
|
||||
embed_srcset(cache, client, &document_url, &img_srcset, options);
|
||||
set_node_attr(node, "srcset", Some(resolved_srcset));
|
||||
}
|
||||
}
|
||||
@@ -907,7 +917,6 @@ pub fn walk_and_embed_assets(
|
||||
"src",
|
||||
&input_attr_src_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -940,7 +949,6 @@ pub fn walk_and_embed_assets(
|
||||
"href",
|
||||
&image_href,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -961,7 +969,6 @@ pub fn walk_and_embed_assets(
|
||||
"src",
|
||||
&source_attr_src_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
} else if parent_node_name == "video" {
|
||||
@@ -976,7 +983,6 @@ pub fn walk_and_embed_assets(
|
||||
"src",
|
||||
&source_attr_src_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -998,7 +1004,6 @@ pub fn walk_and_embed_assets(
|
||||
&document_url,
|
||||
&source_attr_srcset_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
set_node_attr(node, "srcset", Some(resolved_srcset));
|
||||
}
|
||||
@@ -1051,7 +1056,6 @@ pub fn walk_and_embed_assets(
|
||||
"src",
|
||||
&script_attr_src.unwrap_or_default(),
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1069,7 +1073,6 @@ pub fn walk_and_embed_assets(
|
||||
&document_url,
|
||||
tendril.as_ref(),
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
tendril.clear();
|
||||
tendril.push_slice(&replacement);
|
||||
@@ -1101,7 +1104,6 @@ pub fn walk_and_embed_assets(
|
||||
"src",
|
||||
&frame_attr_src_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1121,7 +1123,6 @@ pub fn walk_and_embed_assets(
|
||||
"src",
|
||||
&audio_attr_src_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1140,7 +1141,6 @@ pub fn walk_and_embed_assets(
|
||||
"src",
|
||||
&video_attr_src_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1164,7 +1164,6 @@ pub fn walk_and_embed_assets(
|
||||
"poster",
|
||||
&video_attr_poster_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1188,7 +1187,6 @@ pub fn walk_and_embed_assets(
|
||||
&document_url,
|
||||
&noscript_contents_dom.document,
|
||||
&options,
|
||||
depth,
|
||||
);
|
||||
// Get rid of original contents
|
||||
noscript_contents.clear();
|
||||
@@ -1198,8 +1196,12 @@ pub fn walk_and_embed_assets(
|
||||
{
|
||||
if let Some(body) = get_child_node_by_name(&html, "body") {
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &body, SerializeOpts::default())
|
||||
.expect("Unable to serialize DOM into buffer");
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(body.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.expect("Unable to serialize DOM into buffer");
|
||||
let result = String::from_utf8_lossy(&buf);
|
||||
noscript_contents.push_slice(&result);
|
||||
}
|
||||
@@ -1225,7 +1227,6 @@ pub fn walk_and_embed_assets(
|
||||
&document_url,
|
||||
&node_attr_style_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
set_node_attr(node, "style", Some(embedded_style));
|
||||
}
|
||||
@@ -1249,7 +1250,7 @@ pub fn walk_and_embed_assets(
|
||||
|
||||
// Dig deeper
|
||||
for child in node.children.borrow().iter() {
|
||||
walk_and_embed_assets(cache, client, &document_url, child, options, depth);
|
||||
walk_and_embed_assets(cache, client, &document_url, child, options);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
pub mod cookies;
|
||||
pub mod css;
|
||||
pub mod html;
|
||||
pub mod js;
|
||||
|
||||
54
src/main.rs
54
src/main.rs
@@ -1,5 +1,5 @@
|
||||
use encoding_rs::Encoding;
|
||||
use html5ever::rcdom::RcDom;
|
||||
use markup5ever_rcdom::RcDom;
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||
use std::collections::HashMap;
|
||||
@@ -10,6 +10,7 @@ use std::process;
|
||||
use std::time::Duration;
|
||||
use url::Url;
|
||||
|
||||
use monolith::cookies::parse_cookie_file_contents;
|
||||
use monolith::html::{
|
||||
add_favicon, create_metadata_tag, get_base_url, get_charset, has_favicon, html_to_dom,
|
||||
serialize_document, set_base_url, set_charset, walk_and_embed_assets,
|
||||
@@ -64,7 +65,7 @@ pub fn read_stdin() -> Vec<u8> {
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let options = Options::from_args();
|
||||
let mut options = Options::from_args();
|
||||
|
||||
// Check if target was provided
|
||||
if options.target.len() == 0 {
|
||||
@@ -74,10 +75,10 @@ fn main() {
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
// Check if custom charset is valid
|
||||
if let Some(custom_charset) = options.charset.clone() {
|
||||
if !Encoding::for_label_no_replacement(custom_charset.as_bytes()).is_some() {
|
||||
eprintln!("Unknown encoding: {}", &custom_charset);
|
||||
// Check if custom encoding is valid
|
||||
if let Some(custom_encoding) = options.encoding.clone() {
|
||||
if !Encoding::for_label_no_replacement(custom_encoding.as_bytes()).is_some() {
|
||||
eprintln!("Unknown encoding: {}", &custom_encoding);
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
@@ -139,6 +140,30 @@ fn main() {
|
||||
},
|
||||
};
|
||||
|
||||
// Read and parse cookie file
|
||||
if let Some(opt_cookie_file) = options.cookie_file.clone() {
|
||||
match fs::read_to_string(opt_cookie_file) {
|
||||
Ok(str) => match parse_cookie_file_contents(&str) {
|
||||
Ok(cookies) => {
|
||||
options.cookies = cookies;
|
||||
// for c in &cookies {
|
||||
// // if !cookie.is_expired() {
|
||||
// // options.cookies.append(c);
|
||||
// // }
|
||||
// }
|
||||
}
|
||||
Err(_) => {
|
||||
eprintln!("Could not parse specified cookie file");
|
||||
process::exit(1);
|
||||
}
|
||||
},
|
||||
Err(_) => {
|
||||
eprintln!("Could not read specified cookie file");
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize client
|
||||
let mut cache = HashMap::new();
|
||||
let mut header_map = HeaderMap::new();
|
||||
@@ -173,7 +198,7 @@ fn main() {
|
||||
|| (target_url.scheme() == "http" || target_url.scheme() == "https")
|
||||
|| target_url.scheme() == "data"
|
||||
{
|
||||
match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options, 0) {
|
||||
match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options) {
|
||||
Ok((retrieved_data, final_url, media_type, charset)) => {
|
||||
// Provide output as text without processing it, the way browsers do
|
||||
if !media_type.eq_ignore_ascii_case("text/html")
|
||||
@@ -281,7 +306,7 @@ fn main() {
|
||||
}
|
||||
|
||||
// Traverse through the document and embed remote assets
|
||||
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
|
||||
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options);
|
||||
|
||||
// Update or add new BASE element to reroute network requests and hash-links
|
||||
if let Some(new_base_url) = options.base_url.clone() {
|
||||
@@ -295,14 +320,7 @@ fn main() {
|
||||
{
|
||||
let favicon_ico_url: Url = resolve_url(&base_url, "/favicon.ico");
|
||||
|
||||
match retrieve_asset(
|
||||
&mut cache,
|
||||
&client,
|
||||
&target_url,
|
||||
&favicon_ico_url,
|
||||
&options,
|
||||
0,
|
||||
) {
|
||||
match retrieve_asset(&mut cache, &client, &target_url, &favicon_ico_url, &options) {
|
||||
Ok((data, final_url, media_type, charset)) => {
|
||||
let favicon_data_url: Url =
|
||||
create_data_url(&media_type, &charset, &data, &final_url);
|
||||
@@ -315,8 +333,8 @@ fn main() {
|
||||
}
|
||||
|
||||
// Save using specified charset, if given
|
||||
if let Some(custom_charset) = options.charset.clone() {
|
||||
document_encoding = custom_charset;
|
||||
if let Some(custom_encoding) = options.encoding.clone() {
|
||||
document_encoding = custom_encoding;
|
||||
dom = set_charset(dom, document_encoding.clone());
|
||||
}
|
||||
|
||||
|
||||
48
src/opts.rs
48
src/opts.rs
@@ -1,15 +1,19 @@
|
||||
use clap::{App, Arg, ArgAction};
|
||||
use std::env;
|
||||
|
||||
use crate::cookies::Cookie;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Options {
|
||||
pub no_audio: bool,
|
||||
pub base_url: Option<String>,
|
||||
pub blacklist_domains: bool,
|
||||
pub no_css: bool,
|
||||
pub charset: Option<String>,
|
||||
pub cookie_file: Option<String>,
|
||||
pub cookies: Vec<Cookie>,
|
||||
pub domains: Option<Vec<String>>,
|
||||
pub ignore_errors: bool,
|
||||
pub encoding: Option<String>,
|
||||
pub no_frames: bool,
|
||||
pub no_fonts: bool,
|
||||
pub no_images: bool,
|
||||
@@ -48,13 +52,13 @@ impl Options {
|
||||
.version(env!("CARGO_PKG_VERSION"))
|
||||
.author(format!("\n{}\n\n", env!("CARGO_PKG_AUTHORS").replace(':', "\n")).as_str())
|
||||
.about(format!("{}\n{}", ASCII, env!("CARGO_PKG_DESCRIPTION")).as_str())
|
||||
.args_from_usage("-a, --no-audio 'Removes audio sources'")
|
||||
.args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'")
|
||||
.args_from_usage("-a, --no-audio 'Remove audio sources'")
|
||||
.args_from_usage("-b, --base-url=[http://localhost/] 'Set custom base URL'")
|
||||
.args_from_usage(
|
||||
"-B, --blacklist-domains 'Treat list of specified domains as blacklist'",
|
||||
)
|
||||
.args_from_usage("-c, --no-css 'Removes CSS'")
|
||||
.args_from_usage("-C, --charset=[UTF-8] 'Enforces custom encoding'")
|
||||
.args_from_usage("-c, --no-css 'Remove CSS'")
|
||||
.args_from_usage("-C, --cookies=[cookies.txt] 'Specify cookie file'")
|
||||
.arg(
|
||||
Arg::with_name("domains")
|
||||
.short('d')
|
||||
@@ -65,23 +69,24 @@ impl Options {
|
||||
.help("Specify domains to use for white/black-listing"),
|
||||
)
|
||||
.args_from_usage("-e, --ignore-errors 'Ignore network errors'")
|
||||
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
|
||||
.args_from_usage("-F, --no-fonts 'Removes fonts'")
|
||||
.args_from_usage("-i, --no-images 'Removes images'")
|
||||
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
|
||||
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
||||
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
|
||||
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
|
||||
.args_from_usage("-E, --encoding=[UTF-8] 'Enforce custom charset'")
|
||||
.args_from_usage("-f, --no-frames 'Remove frames and iframes'")
|
||||
.args_from_usage("-F, --no-fonts 'Remove fonts'")
|
||||
.args_from_usage("-i, --no-images 'Remove images'")
|
||||
.args_from_usage("-I, --isolate 'Cut off document from the Internet'")
|
||||
.args_from_usage("-j, --no-js 'Remove JavaScript'")
|
||||
.args_from_usage("-k, --insecure 'Allow invalid X.509 (TLS) certificates'")
|
||||
.args_from_usage("-M, --no-metadata 'Exclude timestamp and source information'")
|
||||
.args_from_usage(
|
||||
"-n, --unwrap-noscript 'Replaces NOSCRIPT elements with their contents'",
|
||||
"-n, --unwrap-noscript 'Replace NOSCRIPT elements with their contents'",
|
||||
)
|
||||
.args_from_usage(
|
||||
"-o, --output=[document.html] 'Writes output to <file>, use - for STDOUT'",
|
||||
"-o, --output=[document.html] 'Write output to <file>, use - for STDOUT'",
|
||||
)
|
||||
.args_from_usage("-s, --silent 'Suppresses verbosity'")
|
||||
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
|
||||
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
|
||||
.args_from_usage("-v, --no-video 'Removes video sources'")
|
||||
.args_from_usage("-s, --silent 'Suppress verbosity'")
|
||||
.args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'")
|
||||
.args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'")
|
||||
.args_from_usage("-v, --no-video 'Remove video sources'")
|
||||
.arg(
|
||||
Arg::with_name("target")
|
||||
.required(true)
|
||||
@@ -103,8 +108,11 @@ impl Options {
|
||||
}
|
||||
options.blacklist_domains = app.is_present("blacklist-domains");
|
||||
options.no_css = app.is_present("no-css");
|
||||
if let Some(charset) = app.value_of("charset") {
|
||||
options.charset = Some(charset.to_string());
|
||||
if let Some(cookie_file) = app.value_of("cookies") {
|
||||
options.cookie_file = Some(cookie_file.to_string());
|
||||
}
|
||||
if let Some(encoding) = app.value_of("encoding") {
|
||||
options.encoding = Some(encoding.to_string());
|
||||
}
|
||||
if let Some(domains) = app.get_many::<String>("domains") {
|
||||
let list_of_domains: Vec<String> = domains.map(|v| v.clone()).collect::<Vec<_>>();
|
||||
|
||||
25
src/url.rs
25
src/url.rs
@@ -1,4 +1,4 @@
|
||||
use base64;
|
||||
use base64::prelude::*;
|
||||
use percent_encoding::percent_decode_str;
|
||||
use url::Url;
|
||||
|
||||
@@ -33,7 +33,15 @@ pub fn create_data_url(media_type: &str, charset: &str, data: &[u8], final_asset
|
||||
"".to_string()
|
||||
};
|
||||
|
||||
data_url.set_path(format!("{}{};base64,{}", media_type, c, base64::encode(data)).as_str());
|
||||
data_url.set_path(
|
||||
format!(
|
||||
"{}{};base64,{}",
|
||||
media_type,
|
||||
c,
|
||||
BASE64_STANDARD.encode(data)
|
||||
)
|
||||
.as_str(),
|
||||
);
|
||||
|
||||
data_url
|
||||
}
|
||||
@@ -63,7 +71,7 @@ pub fn parse_data_url(url: &Url) -> (String, String, Vec<u8>) {
|
||||
// Parse raw data into vector of bytes
|
||||
let text: String = percent_decode_str(&data).decode_utf8_lossy().to_string();
|
||||
let blob: Vec<u8> = if is_base64 {
|
||||
base64::decode(&text).unwrap_or(vec![])
|
||||
BASE64_STANDARD.decode(&text).unwrap_or(vec![])
|
||||
} else {
|
||||
text.as_bytes().to_vec()
|
||||
};
|
||||
@@ -71,6 +79,17 @@ pub fn parse_data_url(url: &Url) -> (String, String, Vec<u8>) {
|
||||
(media_type, charset, blob)
|
||||
}
|
||||
|
||||
pub fn get_referer_url(url: Url) -> Url {
|
||||
let mut url = url.clone();
|
||||
// Spec: https://httpwg.org/specs/rfc9110.html#field.referer
|
||||
// Must not include the fragment and userinfo components of the URI
|
||||
url.set_fragment(None);
|
||||
url.set_username(&"").unwrap();
|
||||
url.set_password(None).unwrap();
|
||||
|
||||
url
|
||||
}
|
||||
|
||||
pub fn resolve_url(from: &Url, to: &str) -> Url {
|
||||
match Url::parse(&to) {
|
||||
Ok(parsed_url) => parsed_url,
|
||||
|
||||
62
src/utils.rs
62
src/utils.rs
@@ -1,12 +1,12 @@
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE, COOKIE, REFERER};
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use url::Url;
|
||||
|
||||
use crate::opts::Options;
|
||||
use crate::url::{clean_url, parse_data_url};
|
||||
use crate::url::{clean_url, get_referer_url, parse_data_url};
|
||||
|
||||
const ANSI_COLOR_RED: &'static str = "\x1b[31m";
|
||||
const ANSI_COLOR_RESET: &'static str = "\x1b[0m";
|
||||
@@ -148,18 +148,6 @@ pub fn domain_is_within_domain(domain: &str, domain_to_match_against: &str) -> b
|
||||
ok
|
||||
}
|
||||
|
||||
pub fn indent(level: u32) -> String {
|
||||
let mut result: String = String::new();
|
||||
let mut l: u32 = level;
|
||||
|
||||
while l > 0 {
|
||||
result += " ";
|
||||
l -= 1;
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
pub fn is_plaintext_media_type(media_type: &str) -> bool {
|
||||
media_type.to_lowercase().as_str().starts_with("text/")
|
||||
|| PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
|
||||
@@ -198,7 +186,6 @@ pub fn retrieve_asset(
|
||||
parent_url: &Url,
|
||||
url: &Url,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
) -> Result<(Vec<u8>, Url, String, String), reqwest::Error> {
|
||||
if url.scheme() == "data" {
|
||||
let (media_type, charset, data) = parse_data_url(url);
|
||||
@@ -208,8 +195,7 @@ pub fn retrieve_asset(
|
||||
if parent_url.scheme() != "file" {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
"{}{}{} ({}){}",
|
||||
indent(depth).as_str(),
|
||||
"{}{} ({}){}",
|
||||
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||
&url,
|
||||
"Security Error",
|
||||
@@ -230,8 +216,7 @@ pub fn retrieve_asset(
|
||||
if path.is_dir() {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
"{}{}{} (is a directory){}",
|
||||
indent(depth).as_str(),
|
||||
"{}{} (is a directory){}",
|
||||
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||
&url,
|
||||
if options.no_color {
|
||||
@@ -246,7 +231,7 @@ pub fn retrieve_asset(
|
||||
Err(client.get("").send().unwrap_err())
|
||||
} else {
|
||||
if !options.silent {
|
||||
eprintln!("{}{}", indent(depth).as_str(), &url);
|
||||
eprintln!("{}", &url);
|
||||
}
|
||||
|
||||
let file_blob: Vec<u8> = fs::read(&path).expect("Unable to read file");
|
||||
@@ -261,8 +246,7 @@ pub fn retrieve_asset(
|
||||
} else {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
"{}{}{} (not found){}",
|
||||
indent(depth).as_str(),
|
||||
"{}{} (not found){}",
|
||||
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||
&url,
|
||||
if options.no_color {
|
||||
@@ -282,7 +266,7 @@ pub fn retrieve_asset(
|
||||
if cache.contains_key(&cache_key) {
|
||||
// URL is in cache, we get and return it
|
||||
if !options.silent {
|
||||
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
|
||||
eprintln!("{} (from cache)", &url);
|
||||
}
|
||||
|
||||
Ok((
|
||||
@@ -304,13 +288,29 @@ pub fn retrieve_asset(
|
||||
}
|
||||
|
||||
// URL not in cache, we retrieve the file
|
||||
match client.get(url.as_str()).send() {
|
||||
let mut headers = HeaderMap::new();
|
||||
if options.cookies.len() > 0 {
|
||||
for cookie in &options.cookies {
|
||||
if !cookie.is_expired() && cookie.matches_url(url.as_str()) {
|
||||
let cookie_header_value: String = cookie.name.clone() + "=" + &cookie.value;
|
||||
headers
|
||||
.insert(COOKIE, HeaderValue::from_str(&cookie_header_value).unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
// Add referer header for page resource requests
|
||||
if ["https", "http"].contains(&parent_url.scheme()) && parent_url != url {
|
||||
headers.insert(
|
||||
REFERER,
|
||||
HeaderValue::from_str(get_referer_url(parent_url.clone()).as_str()).unwrap(),
|
||||
);
|
||||
}
|
||||
match client.get(url.as_str()).headers(headers).send() {
|
||||
Ok(response) => {
|
||||
if !options.ignore_errors && response.status() != reqwest::StatusCode::OK {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
"{}{}{} ({}){}",
|
||||
indent(depth).as_str(),
|
||||
"{}{} ({}){}",
|
||||
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||
&url,
|
||||
response.status(),
|
||||
@@ -329,9 +329,9 @@ pub fn retrieve_asset(
|
||||
|
||||
if !options.silent {
|
||||
if url.as_str() == response_url.as_str() {
|
||||
eprintln!("{}{}", indent(depth).as_str(), &url);
|
||||
eprintln!("{}", &url);
|
||||
} else {
|
||||
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &response_url);
|
||||
eprintln!("{} -> {}", &url, &response_url);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -355,8 +355,7 @@ pub fn retrieve_asset(
|
||||
Err(error) => {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
"{}{}{}{}",
|
||||
indent(depth).as_str(),
|
||||
"{}{}{}",
|
||||
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||
error,
|
||||
if options.no_color {
|
||||
@@ -378,8 +377,7 @@ pub fn retrieve_asset(
|
||||
Err(error) => {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
"{}{}{} ({}){}",
|
||||
indent(depth).as_str(),
|
||||
"{}{} ({}){}",
|
||||
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||
&url,
|
||||
error,
|
||||
|
||||
@@ -90,9 +90,9 @@ mod passing {
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_css}\n \
|
||||
{file_url_css}\n \
|
||||
{file_url_html}\n\
|
||||
{file_url_css}\n\
|
||||
{file_url_css}\n\
|
||||
{file_url_css}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||
|
||||
@@ -39,10 +39,10 @@ mod passing {
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"\
|
||||
{file}{cwd}/tests/_data_/basic/local-file.html\n \
|
||||
{file}{cwd}/tests/_data_/basic/local-style.css\n \
|
||||
{file}{cwd}/tests/_data_/basic/local-style-does-not-exist.css (not found)\n \
|
||||
{file}{cwd}/tests/_data_/basic/monolith.png (not found)\n \
|
||||
{file}{cwd}/tests/_data_/basic/local-file.html\n\
|
||||
{file}{cwd}/tests/_data_/basic/local-style.css\n\
|
||||
{file}{cwd}/tests/_data_/basic/local-style-does-not-exist.css (not found)\n\
|
||||
{file}{cwd}/tests/_data_/basic/monolith.png (not found)\n\
|
||||
{file}{cwd}/tests/_data_/basic/local-script.js\n\
|
||||
",
|
||||
file = file_url_protocol,
|
||||
@@ -185,7 +185,7 @@ mod passing {
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_html}\n\
|
||||
{file_url_svg}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||
@@ -236,10 +236,10 @@ mod passing {
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"\
|
||||
{file}{cwd}/tests/_data_/integrity/index.html\n \
|
||||
{file}{cwd}/tests/_data_/integrity/style.css\n \
|
||||
{file}{cwd}/tests/_data_/integrity/style.css\n \
|
||||
{file}{cwd}/tests/_data_/integrity/script.js\n \
|
||||
{file}{cwd}/tests/_data_/integrity/index.html\n\
|
||||
{file}{cwd}/tests/_data_/integrity/style.css\n\
|
||||
{file}{cwd}/tests/_data_/integrity/style.css\n\
|
||||
{file}{cwd}/tests/_data_/integrity/script.js\n\
|
||||
{file}{cwd}/tests/_data_/integrity/script.js\n\
|
||||
",
|
||||
file = file_url_protocol,
|
||||
|
||||
@@ -27,7 +27,7 @@ mod passing {
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_html}\n\
|
||||
{file_url_svg}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||
@@ -58,7 +58,7 @@ mod passing {
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_html}\n\
|
||||
{file_url_svg}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||
@@ -89,7 +89,7 @@ mod passing {
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_html}\n\
|
||||
{file_url_svg}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||
@@ -120,7 +120,7 @@ mod passing {
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_html}\n\
|
||||
{file_url_svg}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||
|
||||
@@ -38,7 +38,7 @@ mod passing {
|
||||
)
|
||||
);
|
||||
|
||||
// STDOUT should contain original document without any modificatons
|
||||
// STDOUT should contain original document without any modifications
|
||||
let s: String;
|
||||
if let Some(encoding) = Encoding::for_label(b"gb2312") {
|
||||
let (string, _, _) = encoding.decode(&out.stdout);
|
||||
@@ -115,7 +115,7 @@ mod passing {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-C")
|
||||
.arg("-E")
|
||||
.arg("utf8")
|
||||
.arg(format!(
|
||||
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
|
||||
@@ -135,7 +135,7 @@ mod passing {
|
||||
)
|
||||
);
|
||||
|
||||
// STDOUT should contain original document without any modificatons
|
||||
// STDOUT should contain original document without any modifications
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stdout).to_string(),
|
||||
"<html>\
|
||||
@@ -158,7 +158,7 @@ mod passing {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-C")
|
||||
.arg("-E")
|
||||
.arg("utf0")
|
||||
.arg(format!(
|
||||
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
|
||||
|
||||
68
tests/cookies/cookie/is_expired.rs
Normal file
68
tests/cookies/cookie/is_expired.rs
Normal file
@@ -0,0 +1,68 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn never_expires() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
|
||||
assert!(!cookie.is_expired());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expires_long_from_now() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 9999999999,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
|
||||
assert!(!cookie.is_expired());
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn expired() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 1,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
|
||||
assert!(cookie.is_expired());
|
||||
}
|
||||
}
|
||||
107
tests/cookies/cookie/matches_url.rs
Normal file
107
tests/cookies/cookie/matches_url.rs
Normal file
@@ -0,0 +1,107 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn secure_url() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: true,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(cookie.matches_url("https://127.0.0.1/something"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_secure_url() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(cookie.matches_url("http://127.0.0.1/something"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subdomain() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from(".somethingsomething.com"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: true,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(cookie.matches_url("https://cdn.somethingsomething.com/something"));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn empty_url() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(!cookie.matches_url(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrong_hostname() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(!cookie.matches_url("http://0.0.0.0/"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrong_path() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: false,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(!cookie.matches_url("http://0.0.0.0/path"));
|
||||
}
|
||||
}
|
||||
2
tests/cookies/cookie/mod.rs
Normal file
2
tests/cookies/cookie/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
mod is_expired;
|
||||
mod matches_url;
|
||||
2
tests/cookies/mod.rs
Normal file
2
tests/cookies/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
mod cookie;
|
||||
mod parse_cookie_file_contents;
|
||||
87
tests/cookies/parse_cookie_file_contents.rs
Normal file
87
tests/cookies/parse_cookie_file_contents.rs
Normal file
@@ -0,0 +1,87 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn parse_file() {
|
||||
let file_contents =
|
||||
"# Netscape HTTP Cookie File\n127.0.0.1\tFALSE\t/\tFALSE\t0\tUSER_TOKEN\tin";
|
||||
let result = cookies::parse_cookie_file_contents(&file_contents).unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].domain, "127.0.0.1");
|
||||
assert_eq!(result[0].include_subdomains, false);
|
||||
assert_eq!(result[0].path, "/");
|
||||
assert_eq!(result[0].https_only, false);
|
||||
assert_eq!(result[0].expires, 0);
|
||||
assert_eq!(result[0].name, "USER_TOKEN");
|
||||
assert_eq!(result[0].value, "in");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_multiline_file() {
|
||||
let file_contents = "# HTTP Cookie File\n127.0.0.1\tFALSE\t/\tFALSE\t0\tUSER_TOKEN\tin\n127.0.0.1\tTRUE\t/\tTRUE\t9\tUSER_TOKEN\tout\n\n";
|
||||
let result = cookies::parse_cookie_file_contents(&file_contents).unwrap();
|
||||
assert_eq!(result.len(), 2);
|
||||
assert_eq!(result[0].domain, "127.0.0.1");
|
||||
assert_eq!(result[0].include_subdomains, false);
|
||||
assert_eq!(result[0].path, "/");
|
||||
assert_eq!(result[0].https_only, false);
|
||||
assert_eq!(result[0].expires, 0);
|
||||
assert_eq!(result[0].name, "USER_TOKEN");
|
||||
assert_eq!(result[0].value, "in");
|
||||
assert_eq!(result[1].domain, "127.0.0.1");
|
||||
assert_eq!(result[1].include_subdomains, true);
|
||||
assert_eq!(result[1].path, "/");
|
||||
assert_eq!(result[1].https_only, true);
|
||||
assert_eq!(result[1].expires, 9);
|
||||
assert_eq!(result[1].name, "USER_TOKEN");
|
||||
assert_eq!(result[1].value, "out");
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
let file_contents = "";
|
||||
let result = cookies::parse_cookie_file_contents(&file_contents).unwrap();
|
||||
assert_eq!(result.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_header() {
|
||||
let file_contents = "127.0.0.1 FALSE / FALSE 0 USER_TOKEN in";
|
||||
match cookies::parse_cookie_file_contents(&file_contents) {
|
||||
Ok(_result) => {
|
||||
assert!(false);
|
||||
}
|
||||
Err(_e) => {
|
||||
assert!(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spaces_instead_of_tabs() {
|
||||
let file_contents =
|
||||
"# HTTP Cookie File\n127.0.0.1 FALSE / FALSE 0 USER_TOKEN in";
|
||||
let result = cookies::parse_cookie_file_contents(&file_contents).unwrap();
|
||||
assert_eq!(result.len(), 0);
|
||||
}
|
||||
}
|
||||
@@ -23,7 +23,7 @@ mod passing {
|
||||
let options = Options::default();
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, "", &options, 0),
|
||||
css::embed_css(cache, &client, &document_url, "", &options),
|
||||
""
|
||||
);
|
||||
}
|
||||
@@ -36,7 +36,7 @@ mod passing {
|
||||
let options = Options::default();
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, "\t \t ", &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, "\t \t ", &options),
|
||||
""
|
||||
);
|
||||
}
|
||||
@@ -59,7 +59,7 @@ mod passing {
|
||||
height: calc(100vh - 10pt)";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &STYLE, &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, &STYLE, &options),
|
||||
format!(
|
||||
"/* border: none;*/\
|
||||
background-image: url(\"{empty_image}\"); \
|
||||
@@ -91,7 +91,7 @@ mod passing {
|
||||
height: calc(100vh - 10pt)";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &STYLE, &options, 0),
|
||||
css::embed_css(cache, &client, &document_url, &STYLE, &options),
|
||||
format!(
|
||||
"/* border: none;*/\
|
||||
background-image: url(\"{empty_image}\"); \
|
||||
@@ -122,7 +122,7 @@ mod passing {
|
||||
html > body {}";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
@@ -166,7 +166,7 @@ mod passing {
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
@@ -188,7 +188,7 @@ mod passing {
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
"\
|
||||
@charset \"UTF-8\";\n\
|
||||
\n\
|
||||
@@ -218,7 +218,7 @@ mod passing {
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
@@ -240,7 +240,7 @@ mod passing {
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
@@ -264,7 +264,7 @@ mod passing {
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
@@ -312,7 +312,7 @@ mod passing {
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
CSS_OUT
|
||||
);
|
||||
}
|
||||
@@ -337,7 +337,7 @@ mod passing {
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
CSS_OUT
|
||||
);
|
||||
}
|
||||
@@ -364,7 +364,7 @@ mod passing {
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
CSS_OUT
|
||||
);
|
||||
}
|
||||
|
||||
@@ -10,17 +10,17 @@ mod passing {
|
||||
use monolith::css;
|
||||
|
||||
#[test]
|
||||
fn backrgound() {
|
||||
fn background() {
|
||||
assert!(css::is_image_url_prop("background"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backrgound_image() {
|
||||
fn background_image() {
|
||||
assert!(css::is_image_url_prop("background-image"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backrgound_image_uppercase() {
|
||||
fn background_image_uppercase() {
|
||||
assert!(css::is_image_url_prop("BACKGROUND-IMAGE"));
|
||||
}
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use html5ever::serialize::{serialize, SerializeOpts};
|
||||
use markup5ever_rcdom::SerializableHandle;
|
||||
|
||||
use monolith::html;
|
||||
|
||||
@@ -19,7 +20,12 @@ mod passing {
|
||||
dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string());
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
|
||||
@@ -29,7 +29,6 @@ mod passing {
|
||||
&Url::parse("data:,").unwrap(),
|
||||
&srcset_value,
|
||||
&options,
|
||||
0,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
@@ -55,7 +54,6 @@ mod passing {
|
||||
&Url::parse("data:,").unwrap(),
|
||||
&srcset_value,
|
||||
&options,
|
||||
0,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
@@ -78,7 +76,6 @@ mod passing {
|
||||
&Url::parse("data:,").unwrap(),
|
||||
&srcset_value,
|
||||
&options,
|
||||
0,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
@@ -101,7 +98,6 @@ mod passing {
|
||||
&Url::parse("data:,").unwrap(),
|
||||
&srcset_value,
|
||||
&options,
|
||||
0,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
@@ -112,6 +108,56 @@ mod passing {
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_whitespace_after_commas() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let srcset_value = "small,s.png 1x,medium,m.png 2x,large,l.png 3x";
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
let embedded_css = html::embed_srcset(
|
||||
cache,
|
||||
&client,
|
||||
&Url::parse("data:,").unwrap(),
|
||||
&srcset_value,
|
||||
&options,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
embedded_css,
|
||||
format!(
|
||||
"{} 1x, {} 2x, {} 3x",
|
||||
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn last_without_descriptor() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let srcset_value = "small,s.png 1x, medium,m.png 2x, large,l.png";
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
let embedded_css = html::embed_srcset(
|
||||
cache,
|
||||
&client,
|
||||
&Url::parse("data:,").unwrap(),
|
||||
&srcset_value,
|
||||
&options,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
embedded_css,
|
||||
format!(
|
||||
"{} 1x, {} 2x, {}",
|
||||
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL
|
||||
),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
@@ -145,12 +191,11 @@ mod failing {
|
||||
&Url::parse("data:,").unwrap(),
|
||||
&srcset_value,
|
||||
&options,
|
||||
0,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
embedded_css,
|
||||
format!("{} 1x, {} 2x,", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
|
||||
format!("{} 1x, {} 2x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use html5ever::rcdom::{Handle, NodeData};
|
||||
use markup5ever_rcdom::{Handle, NodeData};
|
||||
|
||||
use monolith::html;
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use html5ever::rcdom::{Handle, NodeData};
|
||||
use markup5ever_rcdom::{Handle, NodeData};
|
||||
|
||||
use monolith::html;
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use html5ever::rcdom::{Handle, NodeData};
|
||||
use markup5ever_rcdom::{Handle, NodeData};
|
||||
|
||||
use monolith::html;
|
||||
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use html5ever::serialize::{serialize, SerializeOpts};
|
||||
use markup5ever_rcdom::SerializableHandle;
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
use url::Url;
|
||||
@@ -29,10 +30,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -52,10 +58,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -75,10 +86,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -104,10 +120,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -140,10 +161,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -177,10 +203,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -201,10 +232,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -233,10 +269,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -269,10 +310,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -303,10 +349,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -340,10 +391,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -378,10 +434,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -424,10 +485,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -463,10 +529,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -498,10 +569,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
mod cli;
|
||||
mod cookies;
|
||||
mod css;
|
||||
mod html;
|
||||
mod js;
|
||||
|
||||
@@ -16,7 +16,8 @@ mod passing {
|
||||
assert_eq!(options.no_audio, false);
|
||||
assert_eq!(options.base_url, None);
|
||||
assert_eq!(options.no_css, false);
|
||||
assert_eq!(options.charset, None);
|
||||
assert_eq!(options.cookie_file, None);
|
||||
assert_eq!(options.encoding, None);
|
||||
assert_eq!(options.no_frames, false);
|
||||
assert_eq!(options.no_fonts, false);
|
||||
assert_eq!(options.no_images, false);
|
||||
|
||||
@@ -46,7 +46,7 @@ mod passing {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removesempty_fragment_and_keeps_empty_query() {
|
||||
fn removes_empty_fragment_and_keeps_query() {
|
||||
assert_eq!(
|
||||
url::clean_url(Url::parse("https://somewhere.com/font.eot?a=b&#").unwrap()).as_str(),
|
||||
"https://somewhere.com/font.eot?a=b&"
|
||||
|
||||
91
tests/url/get_referer_url.rs
Normal file
91
tests/url/get_referer_url.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use reqwest::Url;
|
||||
|
||||
use monolith::url;
|
||||
|
||||
#[test]
|
||||
fn preserve_original() {
|
||||
let original_url: Url = Url::parse("https://somewhere.com/font.eot#iefix").unwrap();
|
||||
let referer_url: Url = url::get_referer_url(original_url.clone());
|
||||
assert_eq!(referer_url.as_str(), "https://somewhere.com/font.eot");
|
||||
assert_eq!(
|
||||
original_url.as_str(),
|
||||
"https://somewhere.com/font.eot#iefix"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_fragment() {
|
||||
assert_eq!(
|
||||
url::get_referer_url(Url::parse("https://somewhere.com/font.eot#iefix").unwrap())
|
||||
.as_str(),
|
||||
"https://somewhere.com/font.eot"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_empty_fragment() {
|
||||
assert_eq!(
|
||||
url::get_referer_url(Url::parse("https://somewhere.com/font.eot#").unwrap()).as_str(),
|
||||
"https://somewhere.com/font.eot"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_empty_fragment_and_keeps_empty_query() {
|
||||
assert_eq!(
|
||||
url::get_referer_url(Url::parse("https://somewhere.com/font.eot?#").unwrap()).as_str(),
|
||||
"https://somewhere.com/font.eot?"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_empty_fragment_and_keeps_query() {
|
||||
assert_eq!(
|
||||
url::get_referer_url(Url::parse("https://somewhere.com/font.eot?a=b&#").unwrap())
|
||||
.as_str(),
|
||||
"https://somewhere.com/font.eot?a=b&"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_credentials() {
|
||||
assert_eq!(
|
||||
url::get_referer_url(Url::parse("https://cookie:monster@gibson.lan/path").unwrap())
|
||||
.as_str(),
|
||||
"https://gibson.lan/path"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_empty_credentials() {
|
||||
assert_eq!(
|
||||
url::get_referer_url(Url::parse("https://@gibson.lan/path").unwrap()).as_str(),
|
||||
"https://gibson.lan/path"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_empty_username_credentials() {
|
||||
assert_eq!(
|
||||
url::get_referer_url(Url::parse("https://:monster@gibson.lan/path").unwrap()).as_str(),
|
||||
"https://gibson.lan/path"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_empty_password_credentials() {
|
||||
assert_eq!(
|
||||
url::get_referer_url(Url::parse("https://cookie@gibson.lan/path").unwrap()).as_str(),
|
||||
"https://gibson.lan/path"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
mod clean_url;
|
||||
mod create_data_url;
|
||||
mod get_referer_url;
|
||||
mod is_url_and_has_protocol;
|
||||
mod parse_data_url;
|
||||
mod resolve_url;
|
||||
|
||||
@@ -1,36 +0,0 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use monolith::utils;
|
||||
|
||||
#[test]
|
||||
fn zero() {
|
||||
assert_eq!(utils::indent(0), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn one() {
|
||||
assert_eq!(utils::indent(1), " ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two() {
|
||||
assert_eq!(utils::indent(2), " ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn three() {
|
||||
assert_eq!(utils::indent(3), " ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn four() {
|
||||
assert_eq!(utils::indent(4), " ");
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,4 @@
|
||||
mod detect_media_type;
|
||||
mod domain_is_within_domain;
|
||||
mod indent;
|
||||
mod parse_content_type;
|
||||
mod retrieve_asset;
|
||||
|
||||
@@ -32,7 +32,6 @@ mod passing {
|
||||
&Url::parse("data:text/html;base64,c291cmNl").unwrap(),
|
||||
&Url::parse("data:text/html;base64,dGFyZ2V0").unwrap(),
|
||||
&options,
|
||||
0,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(&media_type, "text/html");
|
||||
@@ -75,7 +74,6 @@ mod passing {
|
||||
))
|
||||
.unwrap(),
|
||||
&options,
|
||||
0,
|
||||
)
|
||||
.unwrap();
|
||||
assert_eq!(&media_type, "application/javascript");
|
||||
@@ -124,7 +122,6 @@ mod failing {
|
||||
&Url::parse("data:text/html;base64,SoUrCe").unwrap(),
|
||||
&Url::parse("file:///etc/passwd").unwrap(),
|
||||
&options,
|
||||
0,
|
||||
) {
|
||||
Ok((..)) => {
|
||||
assert!(false);
|
||||
@@ -150,7 +147,6 @@ mod failing {
|
||||
&Url::parse("https://kernel.org/").unwrap(),
|
||||
&Url::parse("file:///etc/passwd").unwrap(),
|
||||
&options,
|
||||
0,
|
||||
) {
|
||||
Ok((..)) => {
|
||||
assert!(false);
|
||||
|
||||
Reference in New Issue
Block a user