Compare commits
125 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4a5f85ba97 | ||
|
|
2a8d5d7916 | ||
|
|
95645fcff4 | ||
|
|
b6b358b3bc | ||
|
|
8b0635bd84 | ||
|
|
b685d3a46c | ||
|
|
82d05fc0f1 | ||
|
|
65ac5c36b1 | ||
|
|
1f60a76fcd | ||
|
|
f067fc2324 | ||
|
|
49d7585e02 | ||
|
|
e25b7bc470 | ||
|
|
329c0568a4 | ||
|
|
f151a33c48 | ||
|
|
ebf96bf1e5 | ||
|
|
c10c78a27d | ||
|
|
64e84e4983 | ||
|
|
674d4085c7 | ||
|
|
e0fd5d4bb9 | ||
|
|
f4e360f09d | ||
|
|
0b4116f48a | ||
|
|
084981a2ae | ||
|
|
a3feb7b721 | ||
|
|
87eb197e33 | ||
|
|
6798cad2b2 | ||
|
|
174cb50877 | ||
|
|
e397a7532d | ||
|
|
91d8c146a9 | ||
|
|
67e07b91af | ||
|
|
f797b8c999 | ||
|
|
60251c6878 | ||
|
|
b70801d55b | ||
|
|
2a50936990 | ||
|
|
f9e961f088 | ||
|
|
d8c3620d00 | ||
|
|
4aab0a64ee | ||
|
|
a2155e0af6 | ||
|
|
f7e5527432 | ||
|
|
f7dd09d481 | ||
|
|
73c0ceebd4 | ||
|
|
727eae2e35 | ||
|
|
b7a38c9f4a | ||
|
|
aa556094a4 | ||
|
|
81b304c558 | ||
|
|
a3e82a2ad8 | ||
|
|
a5bf1705db | ||
|
|
78c37958dc | ||
|
|
20c56a5440 | ||
|
|
37416f827b | ||
|
|
7f123e810b | ||
|
|
db04d11d99 | ||
|
|
1c8d4f1830 | ||
|
|
1c71e708e1 | ||
|
|
a1bb9a4b74 | ||
|
|
cf7e368545 | ||
|
|
c1edde9b3e | ||
|
|
7c0504c4cb | ||
|
|
1bff2c22ba | ||
|
|
8113509dcf | ||
|
|
8fc0fc155f | ||
|
|
7c61b462dd | ||
|
|
ef3684025b | ||
|
|
db7ee697b3 | ||
|
|
89ce5029b9 | ||
|
|
54609b10e5 | ||
|
|
013d93bacc | ||
|
|
0df8613789 | ||
|
|
68a1531a11 | ||
|
|
99c3be1804 | ||
|
|
80559e7224 | ||
|
|
c5c5f1ca44 | ||
|
|
de6a13a884 | ||
|
|
ef16355f9f | ||
|
|
a4dc0ed9b4 | ||
|
|
cd0e366979 | ||
|
|
d4c6c458f9 | ||
|
|
c9970b3a8e | ||
|
|
404d322b99 | ||
|
|
1b353d0b46 | ||
|
|
f920a5e4d6 | ||
|
|
d3ca1ecad3 | ||
|
|
9e057472c6 | ||
|
|
d453145bf8 | ||
|
|
8c131d649f | ||
|
|
a221fdb368 | ||
|
|
15dd82e300 | ||
|
|
de492caaa5 | ||
|
|
9096447c70 | ||
|
|
354340db86 | ||
|
|
900dd8d163 | ||
|
|
a11c4496b0 | ||
|
|
dd33b16876 | ||
|
|
2cc1870033 | ||
|
|
d41e6c041b | ||
|
|
460a461373 | ||
|
|
1e6e87b6aa | ||
|
|
54094270b3 | ||
|
|
e6cf367e23 | ||
|
|
e8437ecb28 | ||
|
|
543bebbd8d | ||
|
|
dc6c0200bc | ||
|
|
04bdb3072f | ||
|
|
a9228f0522 | ||
|
|
aae68c4c82 | ||
|
|
dd23826205 | ||
|
|
781f4cd3b5 | ||
|
|
6826b59ab9 | ||
|
|
2be725eeb5 | ||
|
|
dd2e9ca2e5 | ||
|
|
50bccae476 | ||
|
|
b3bcb1d85b | ||
|
|
c58d044459 | ||
|
|
eeaea0df16 | ||
|
|
2539aac4c0 | ||
|
|
03b9af543a | ||
|
|
1bb8141021 | ||
|
|
4bc8043f0f | ||
|
|
5effa38392 | ||
|
|
125aeeec3b | ||
|
|
c938ba6a2f | ||
|
|
f354affc36 | ||
|
|
7686b2ea64 | ||
|
|
b29b9a6a7c | ||
|
|
cbda57cfa8 | ||
|
|
b8aa545e8c |
1
.dockerignore
Normal file
1
.dockerignore
Normal file
@@ -0,0 +1 @@
|
||||
/target/
|
||||
3
.github/FUNDING.yml
vendored
Normal file
3
.github/FUNDING.yml
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
# These are supported funding model platforms
|
||||
|
||||
github: snshn
|
||||
11
.github/workflows/build_gnu_linux.yml
vendored
11
.github/workflows/build_gnu_linux.yml
vendored
@@ -3,6 +3,15 @@ name: GNU/Linux
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
paths-ignore:
|
||||
- 'assets/'
|
||||
- 'dist/'
|
||||
- 'snap/'
|
||||
- 'Dockerfile'
|
||||
- 'LICENSE'
|
||||
- 'Makefile'
|
||||
- 'monolith.nuspec'
|
||||
- 'README.md'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
@@ -17,6 +26,8 @@ jobs:
|
||||
|
||||
steps:
|
||||
- run: git config --global core.autocrlf false
|
||||
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Build
|
||||
run: cargo build --all --locked --verbose
|
||||
|
||||
11
.github/workflows/build_macos.yml
vendored
11
.github/workflows/build_macos.yml
vendored
@@ -3,6 +3,15 @@ name: macOS
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
paths-ignore:
|
||||
- 'assets/'
|
||||
- 'dist/'
|
||||
- 'snap/'
|
||||
- 'Dockerfile'
|
||||
- 'LICENSE'
|
||||
- 'Makefile'
|
||||
- 'monolith.nuspec'
|
||||
- 'README.md'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
@@ -17,6 +26,8 @@ jobs:
|
||||
|
||||
steps:
|
||||
- run: git config --global core.autocrlf false
|
||||
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Build
|
||||
run: cargo build --all --locked --verbose
|
||||
|
||||
11
.github/workflows/build_windows.yml
vendored
11
.github/workflows/build_windows.yml
vendored
@@ -3,6 +3,15 @@ name: Windows
|
||||
on:
|
||||
push:
|
||||
branches: [ master ]
|
||||
paths-ignore:
|
||||
- 'assets/'
|
||||
- 'dist/'
|
||||
- 'snap/'
|
||||
- 'Dockerfile'
|
||||
- 'LICENSE'
|
||||
- 'Makefile'
|
||||
- 'monolith.nuspec'
|
||||
- 'README.md'
|
||||
|
||||
jobs:
|
||||
build:
|
||||
@@ -17,6 +26,8 @@ jobs:
|
||||
|
||||
steps:
|
||||
- run: git config --global core.autocrlf false
|
||||
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Build
|
||||
run: cargo build --all --locked --verbose
|
||||
|
||||
71
.github/workflows/cd.yml
vendored
71
.github/workflows/cd.yml
vendored
@@ -1,4 +1,4 @@
|
||||
# CD GitHub Actions workflow for Monolith
|
||||
# CD GitHub Actions workflow for monolith
|
||||
|
||||
name: CD
|
||||
|
||||
@@ -13,54 +13,95 @@ jobs:
|
||||
runs-on: windows-2019
|
||||
steps:
|
||||
- run: git config --global core.autocrlf false
|
||||
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Build the executable
|
||||
run: cargo build --release
|
||||
- uses: Shopify/upload-to-release@1.0.0
|
||||
|
||||
- uses: Shopify/upload-to-release@v2.0.0
|
||||
with:
|
||||
name: monolith.exe
|
||||
path: target\release\monolith.exe
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
gnu_linux_armhf:
|
||||
runs-on: ubuntu-18.04
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Prepare cross-platform environment
|
||||
run: |
|
||||
sudo mkdir -p /cross-build-arm
|
||||
sudo mkdir /cross-build
|
||||
sudo touch /etc/apt/sources.list.d/armhf.list
|
||||
echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ bionic main" | sudo tee -a /etc/apt/sources.list.d/armhf.list
|
||||
echo "deb [arch=armhf] http://ports.ubuntu.com/ubuntu-ports/ focal main" | sudo tee -a /etc/apt/sources.list.d/armhf.list
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y gcc-arm-linux-gnueabihf libc6-armhf-cross libc6-dev-armhf-cross
|
||||
sudo apt-get download libssl1.1:armhf libssl-dev:armhf
|
||||
sudo dpkg -x libssl1.1*.deb /cross-build-arm
|
||||
sudo dpkg -x libssl-dev*.deb /cross-build-arm
|
||||
sudo dpkg -x libssl1.1*.deb /cross-build
|
||||
sudo dpkg -x libssl-dev*.deb /cross-build
|
||||
rustup target add arm-unknown-linux-gnueabihf
|
||||
echo "C_INCLUDE_PATH=/cross-build-arm/usr/include" >> $GITHUB_ENV
|
||||
echo "OPENSSL_INCLUDE_DIR=/cross-build-arm/usr/include/arm-linux-gnueabihf" >> $GITHUB_ENV
|
||||
echo "OPENSSL_LIB_DIR=/cross-build-arm/usr/lib/arm-linux-gnueabihf" >> $GITHUB_ENV
|
||||
echo "C_INCLUDE_PATH=/cross-build/usr/include" >> $GITHUB_ENV
|
||||
echo "OPENSSL_INCLUDE_DIR=/cross-build/usr/include/arm-linux-gnueabihf" >> $GITHUB_ENV
|
||||
echo "OPENSSL_LIB_DIR=/cross-build/usr/lib/arm-linux-gnueabihf" >> $GITHUB_ENV
|
||||
echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV
|
||||
echo "RUSTFLAGS=-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build-arm/usr/lib/arm-linux-gnueabihf -L/cross-build-arm/lib/arm-linux-gnueabihf" >> $GITHUB_ENV
|
||||
echo "RUSTFLAGS=-C linker=arm-linux-gnueabihf-gcc -L/usr/arm-linux-gnueabihf/lib -L/cross-build/usr/lib/arm-linux-gnueabihf -L/cross-build/lib/arm-linux-gnueabihf" >> $GITHUB_ENV
|
||||
|
||||
- name: Build the executable
|
||||
run: cargo build --release --target=arm-unknown-linux-gnueabihf
|
||||
|
||||
- name: Attach artifact to the release
|
||||
uses: Shopify/upload-to-release@1.0.0
|
||||
uses: Shopify/upload-to-release@v2.0.0
|
||||
with:
|
||||
name: monolith-gnu-linux-armhf
|
||||
path: target/arm-unknown-linux-gnueabihf/release/monolith
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
gnu_linux_x86_64:
|
||||
runs-on: ubuntu-18.04
|
||||
gnu_linux_aarch64:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Prepare cross-platform environment
|
||||
run: |
|
||||
sudo mkdir /cross-build
|
||||
sudo touch /etc/apt/sources.list.d/arm64.list
|
||||
echo "deb [arch=arm64] http://ports.ubuntu.com/ubuntu-ports/ focal main" | sudo tee -a /etc/apt/sources.list.d/arm64.list
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y gcc-aarch64-linux-gnu libc6-arm64-cross libc6-dev-arm64-cross
|
||||
sudo apt-get download libssl1.1:arm64 libssl-dev:arm64
|
||||
sudo dpkg -x libssl1.1*.deb /cross-build
|
||||
sudo dpkg -x libssl-dev*.deb /cross-build
|
||||
rustup target add aarch64-unknown-linux-gnu
|
||||
echo "C_INCLUDE_PATH=/cross-build/usr/include" >> $GITHUB_ENV
|
||||
echo "OPENSSL_INCLUDE_DIR=/cross-build/usr/include/aarch64-linux-gnu" >> $GITHUB_ENV
|
||||
echo "OPENSSL_LIB_DIR=/cross-build/usr/lib/aarch64-linux-gnu" >> $GITHUB_ENV
|
||||
echo "PKG_CONFIG_ALLOW_CROSS=1" >> $GITHUB_ENV
|
||||
echo "RUSTFLAGS=-C linker=aarch64-linux-gnu-gcc -L/usr/aarch64-linux-gnu/lib -L/cross-build/usr/lib/aarch64-linux-gnu" >> $GITHUB_ENV
|
||||
|
||||
- name: Build the executable
|
||||
run: cargo build --release --target=aarch64-unknown-linux-gnu
|
||||
|
||||
- name: Attach artifact to the release
|
||||
uses: Shopify/upload-to-release@v2.0.0
|
||||
with:
|
||||
name: monolith-gnu-linux-aarch64
|
||||
path: target/aarch64-unknown-linux-gnu/release/monolith
|
||||
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
gnu_linux_x86_64:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout the repository
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Build the executable
|
||||
run: cargo build --release
|
||||
- uses: Shopify/upload-to-release@1.0.0
|
||||
|
||||
- uses: Shopify/upload-to-release@v2.0.0
|
||||
with:
|
||||
name: monolith-gnu-linux-x86_64
|
||||
path: target/release/monolith
|
||||
|
||||
34
.github/workflows/ci-netbsd.yml
vendored
Normal file
34
.github/workflows/ci-netbsd.yml
vendored
Normal file
@@ -0,0 +1,34 @@
|
||||
# CI NetBSD GitHub Actions workflow for monolith
|
||||
|
||||
name: CI
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
paths-ignore:
|
||||
- 'assets/'
|
||||
- 'dist/'
|
||||
- 'snap/'
|
||||
- 'Dockerfile'
|
||||
- 'LICENSE'
|
||||
- 'Makefile'
|
||||
- 'monolith.nuspec'
|
||||
- 'README.md'
|
||||
|
||||
jobs:
|
||||
build_and_test:
|
||||
runs-on: ubuntu-latest
|
||||
name: Build and test (netbsd)
|
||||
steps:
|
||||
- name: "Checkout repository"
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Test in NetBSD
|
||||
uses: vmactions/netbsd-vm@v1
|
||||
with:
|
||||
usesh: true
|
||||
prepare: |
|
||||
/usr/sbin/pkg_add rust mktools gmake pkgconf cwrappers
|
||||
run: |
|
||||
cargo build --all --locked --verbose --no-default-features
|
||||
cargo test --all --locked --verbose --no-default-features
|
||||
25
.github/workflows/ci.yml
vendored
25
.github/workflows/ci.yml
vendored
@@ -1,31 +1,42 @@
|
||||
# CI GitHub Actions workflow for monolith
|
||||
|
||||
name: CI
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches: [ master ]
|
||||
paths-ignore:
|
||||
- 'assets/'
|
||||
- 'dist/'
|
||||
- 'snap/'
|
||||
- 'Dockerfile'
|
||||
- 'LICENSE'
|
||||
- 'Makefile'
|
||||
- 'monolith.nuspec'
|
||||
- 'README.md'
|
||||
|
||||
jobs:
|
||||
build_and_test:
|
||||
|
||||
name: Build and test
|
||||
strategy:
|
||||
matrix:
|
||||
os:
|
||||
- ubuntu-latest
|
||||
- macos-latest
|
||||
- windows-latest
|
||||
rust:
|
||||
- stable
|
||||
- beta
|
||||
- nightly
|
||||
runs-on: ${{ matrix.os }}
|
||||
|
||||
steps:
|
||||
- run: git config --global core.autocrlf false
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: "Checkout repository"
|
||||
uses: actions/checkout@v4
|
||||
|
||||
- name: Build
|
||||
run: cargo build --all --locked --verbose
|
||||
|
||||
- name: Run tests
|
||||
run: cargo test --all --locked --verbose
|
||||
|
||||
- name: Check code formatting
|
||||
run: |
|
||||
rustup component add rustfmt
|
||||
|
||||
1619
Cargo.lock
generated
1619
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
50
Cargo.toml
50
Cargo.toml
@@ -1,14 +1,15 @@
|
||||
[package]
|
||||
name = "monolith"
|
||||
version = "2.5.0"
|
||||
version = "2.8.3"
|
||||
authors = [
|
||||
"Sunshine <sunshine@uberspace.net>",
|
||||
"Sunshine <snshn@tutanota.com>",
|
||||
"Mahdi Robatipoor <mahdi.robatipoor@gmail.com>",
|
||||
"Emmanuel Delaborde <th3rac25@gmail.com>",
|
||||
"Emi Simpson <emi@alchemi.dev>",
|
||||
"rhysd <lin90162@yahoo.co.jp>",
|
||||
"Andriy Rakhnin <a@rakhnin.com>",
|
||||
]
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
description = "CLI tool for saving web pages as a single HTML file"
|
||||
homepage = "https://github.com/Y2Z/monolith"
|
||||
repository = "https://github.com/Y2Z/monolith"
|
||||
@@ -21,21 +22,36 @@ include = [
|
||||
]
|
||||
license = "CC0-1.0"
|
||||
|
||||
[dependencies]
|
||||
atty = "0.2.14" # Used for highlighting network errors
|
||||
base64 = "0.13.0"
|
||||
chrono = "0.4.19" # Used for formatting creation timestamp
|
||||
clap = "2.33.3"
|
||||
cssparser = "0.28.1"
|
||||
html5ever = "0.24.1"
|
||||
regex = "1.5.4" # Used for parsing srcset and NOSCRIPT
|
||||
sha2 = "0.9.5" # Used for calculating checksums during integrity checks
|
||||
url = "2.2.2"
|
||||
[features]
|
||||
default = ["vendored-openssl"]
|
||||
# Compile and statically link a copy of OpenSSL.
|
||||
vendored-openssl = ["openssl/vendored"]
|
||||
|
||||
[dependencies.reqwest]
|
||||
version = "0.11.3"
|
||||
[dependencies]
|
||||
atty = "0.2.14" # Used for highlighting network errors
|
||||
base64 = "0.22.1" # Used for integrity attributes
|
||||
chrono = "0.4.38" # Used for formatting output timestamp
|
||||
clap = "3.2.25" # Used for processing CLI arguments
|
||||
cssparser = "0.34.0" # Used for dealing with CSS
|
||||
encoding_rs = "0.8.34" # Used for parsing and converting document charsets
|
||||
html5ever = "0.27.0" # Used for all things DOM
|
||||
markup5ever_rcdom = "0.3.0" # Used for manipulating DOM
|
||||
percent-encoding = "2.3.1" # Used for encoding URLs
|
||||
sha2 = "0.10.8" # Used for calculating checksums during integrity checks
|
||||
url = "2.5.2" # Used for parsing URLs
|
||||
openssl = "0.10.64" # Used for static linking of the OpenSSL library
|
||||
|
||||
# Used for parsing srcset and NOSCRIPT
|
||||
[dependencies.regex]
|
||||
version = "1.10.6"
|
||||
default-features = false
|
||||
features = ["default-tls", "blocking", "gzip"]
|
||||
features = ["std", "perf-dfa", "unicode-perl"]
|
||||
|
||||
# Used for making network requests
|
||||
[dependencies.reqwest]
|
||||
version = "0.12.7"
|
||||
default-features = false
|
||||
features = ["default-tls", "blocking", "gzip", "brotli", "deflate"]
|
||||
|
||||
[dev-dependencies]
|
||||
assert_cmd = "1.0.4"
|
||||
assert_cmd = "2.0.16"
|
||||
|
||||
24
Dockerfile
24
Dockerfile
@@ -1,18 +1,22 @@
|
||||
FROM rust
|
||||
|
||||
WORKDIR /usr/local/src/
|
||||
RUN curl -s https://api.github.com/repos/y2z/monolith/releases/latest \
|
||||
| grep "tarball_url.*\"," \
|
||||
| cut -d '"' -f 4 \
|
||||
| wget -qi - -O monolith.tar.gz
|
||||
FROM clux/muslrust:stable as builder
|
||||
|
||||
RUN curl -L -o monolith.tar.gz $(curl -s https://api.github.com/repos/y2z/monolith/releases/latest \
|
||||
| grep "tarball_url.*\"," \
|
||||
| cut -d '"' -f 4)
|
||||
RUN tar xfz monolith.tar.gz \
|
||||
&& mv Y2Z-monolith-* monolith \
|
||||
&& rm monolith.tar.gz
|
||||
|
||||
WORKDIR /usr/local/src/monolith
|
||||
RUN ls -a
|
||||
WORKDIR monolith/
|
||||
RUN make install
|
||||
|
||||
|
||||
FROM alpine
|
||||
|
||||
RUN apk update && \
|
||||
apk add --no-cache openssl && \
|
||||
rm -rf "/var/cache/apk/*"
|
||||
|
||||
COPY --from=builder /root/.cargo/bin/monolith /usr/bin/monolith
|
||||
WORKDIR /tmp
|
||||
CMD ["/usr/local/cargo/bin/monolith"]
|
||||
ENTRYPOINT ["/usr/bin/monolith"]
|
||||
|
||||
27
Makefile
27
Makefile
@@ -7,23 +7,30 @@ build:
|
||||
@cargo build --locked
|
||||
.PHONY: build
|
||||
|
||||
test: build
|
||||
@cargo test --locked
|
||||
@cargo fmt --all -- --check
|
||||
.PHONY: test
|
||||
|
||||
lint:
|
||||
@cargo fmt --all --
|
||||
.PHONY: lint
|
||||
clean:
|
||||
@cargo clean
|
||||
.PHONY: clean
|
||||
|
||||
install:
|
||||
@cargo install --force --locked --path .
|
||||
.PHONY: install
|
||||
|
||||
lint:
|
||||
@cargo fmt --all --
|
||||
.PHONY: lint
|
||||
|
||||
lint_check:
|
||||
@cargo fmt --all -- --check
|
||||
.PHONY: lint_check
|
||||
|
||||
test: build
|
||||
@cargo test --locked
|
||||
.PHONY: test
|
||||
|
||||
uninstall:
|
||||
@cargo uninstall
|
||||
.PHONY: uninstall
|
||||
|
||||
clean:
|
||||
@cargo clean
|
||||
update-lock-file:
|
||||
@cargo update
|
||||
.PHONY: clean
|
||||
|
||||
221
README.md
221
README.md
@@ -1,6 +1,6 @@
|
||||
[](https://github.com/Y2Z/monolith/actions?query=workflow%3AGNU%2FLinux)
|
||||
[](https://github.com/Y2Z/monolith/actions?query=workflow%3AmacOS)
|
||||
[](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows)
|
||||
[](https://github.com/Y2Z/monolith/actions?query=workflow%3AGNU%2FLinux)
|
||||
[](https://github.com/Y2Z/monolith/actions?query=workflow%3AmacOS)
|
||||
[](https://github.com/Y2Z/monolith/actions?query=workflow%3AWindows)
|
||||
|
||||
```
|
||||
_____ ______________ __________ ___________________ ___
|
||||
@@ -18,101 +18,242 @@ Unlike the conventional “Save page as”, `monolith` not only saves the target
|
||||
|
||||
If compared to saving websites with `wget -mpk`, this tool embeds all assets as data URLs and therefore lets browsers render the saved page exactly the way it was on the Internet, even when no network connection is available.
|
||||
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
|
||||
## Installation
|
||||
|
||||
#### Using [Cargo](https://crates.io/crates/monolith)
|
||||
$ cargo install monolith
|
||||
#### Using [Cargo](https://crates.io/crates/monolith) (cross-platform)
|
||||
|
||||
```console
|
||||
cargo install monolith
|
||||
```
|
||||
|
||||
#### Via [Homebrew](https://formulae.brew.sh/formula/monolith) (macOS and GNU/Linux)
|
||||
$ brew install monolith
|
||||
|
||||
```console
|
||||
brew install monolith
|
||||
```
|
||||
|
||||
#### Via [Chocolatey](https://community.chocolatey.org/packages/monolith) (Windows)
|
||||
|
||||
```console
|
||||
choco install monolith
|
||||
```
|
||||
|
||||
#### Via [Scoop](https://scoop.sh/#/apps?q=monolith) (Windows)
|
||||
|
||||
```console
|
||||
scoop install main/monolith
|
||||
```
|
||||
|
||||
#### Via [Winget](https://winstall.app/apps/Y2Z.Monolith) (Windows)
|
||||
|
||||
```console
|
||||
winget install --id=Y2Z.Monolith -e
|
||||
```
|
||||
|
||||
#### Via [MacPorts](https://ports.macports.org/port/monolith/summary) (macOS)
|
||||
|
||||
```console
|
||||
sudo port install monolith
|
||||
```
|
||||
|
||||
#### Using [Snapcraft](https://snapcraft.io/monolith) (GNU/Linux)
|
||||
$ snap install monolith
|
||||
|
||||
```console
|
||||
snap install monolith
|
||||
```
|
||||
|
||||
#### Using [Guix](https://packages.guix.gnu.org/packages/monolith) (GNU/Linux)
|
||||
|
||||
```console
|
||||
guix install monolith
|
||||
```
|
||||
|
||||
#### Using [NixPkgs](https://search.nixos.org/packages?channel=unstable&show=monolith&query=monolith)
|
||||
|
||||
```console
|
||||
nix-env -iA nixpkgs.monolith
|
||||
```
|
||||
|
||||
#### Using [Flox](https://flox.dev)
|
||||
|
||||
```console
|
||||
flox install monolith
|
||||
```
|
||||
|
||||
#### Using [Pacman](https://archlinux.org/packages/extra/x86_64/monolith) (Arch Linux)
|
||||
|
||||
```console
|
||||
pacman -S monolith
|
||||
```
|
||||
|
||||
#### Using [aports](https://pkgs.alpinelinux.org/packages?name=monolith) (Alpine Linux)
|
||||
|
||||
```console
|
||||
apk add monolith
|
||||
```
|
||||
|
||||
#### Using [XBPS Package Manager](https://voidlinux.org/packages/?q=monolith) (Void Linux)
|
||||
|
||||
```console
|
||||
xbps-install -S monolith
|
||||
```
|
||||
|
||||
#### Using [FreeBSD packages](https://svnweb.freebsd.org/ports/head/www/monolith/) (FreeBSD)
|
||||
$ pkg install monolith
|
||||
|
||||
```console
|
||||
pkg install monolith
|
||||
```
|
||||
|
||||
#### Using [FreeBSD ports](https://www.freshports.org/www/monolith/) (FreeBSD)
|
||||
$ cd /usr/ports/www/monolith/
|
||||
$ make install clean
|
||||
|
||||
```console
|
||||
cd /usr/ports/www/monolith/
|
||||
make install clean
|
||||
```
|
||||
|
||||
#### Using [pkgsrc](https://pkgsrc.se/www/monolith) (NetBSD, OpenBSD, Haiku, etc)
|
||||
$ cd /usr/pkgsrc/www/monolith
|
||||
$ make install clean
|
||||
|
||||
#### Using [pre-built binaries](https://github.com/Y2Z/monolith/releases) (Windows, ARM-based devices, etc)
|
||||
Every release contains pre-built binaries for Windows, GNU/Linux, as well as platforms with non-standart CPU architecture.
|
||||
```console
|
||||
cd /usr/pkgsrc/www/monolith
|
||||
make install clean
|
||||
```
|
||||
|
||||
#### Using [containers](https://www.docker.com/)
|
||||
$ docker build -t Y2Z/monolith .
|
||||
$ sudo install -b utils/run-in-container.sh /usr/local/bin/monolith
|
||||
|
||||
#### From source
|
||||
```console
|
||||
docker build -t y2z/monolith .
|
||||
sudo install -b dist/run-in-container.sh /usr/local/bin/monolith
|
||||
```
|
||||
|
||||
Dependency: `libssl-dev`
|
||||
#### From [source](https://github.com/Y2Z/monolith)
|
||||
|
||||
Dependencies: `libssl`, `cargo`
|
||||
|
||||
<details>
|
||||
<summary>Install cargo (GNU/Linux)</summary>
|
||||
Check if cargo is installed
|
||||
|
||||
```console
|
||||
cargo -v
|
||||
```
|
||||
|
||||
If cargo is not already installed, install and add it to your existing ```$PATH``` (paraphrasing the [official installation instructions](https://doc.rust-lang.org/cargo/getting-started/installation.html)):
|
||||
|
||||
```console
|
||||
curl https://sh.rustup.rs -sSf | sh
|
||||
. "$HOME/.cargo/env"
|
||||
```
|
||||
|
||||
Proceed with installing from source:
|
||||
</details>
|
||||
|
||||
```console
|
||||
git clone https://github.com/Y2Z/monolith.git
|
||||
cd monolith
|
||||
make install
|
||||
```
|
||||
|
||||
#### Using [pre-built binaries](https://github.com/Y2Z/monolith/releases) (Windows, ARM-based devices, etc)
|
||||
|
||||
Every release contains pre-built binaries for Windows, GNU/Linux, as well as platforms with non-standard CPU architecture.
|
||||
|
||||
$ git clone https://github.com/Y2Z/monolith.git
|
||||
$ cd monolith
|
||||
$ make install
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
|
||||
## Usage
|
||||
$ monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roads-lyrics.html
|
||||
or
|
||||
|
||||
$ cat index.html | monolith -aIiFfcMv -b https://original.site/ - > result.html
|
||||
```console
|
||||
monolith https://lyrics.github.io/db/P/Portishead/Dummy/Roads/ -o portishead-roads-lyrics.html
|
||||
```
|
||||
|
||||
```console
|
||||
cat some-site-page.html | monolith -aIiFfcMv -b https://some.site/ - > some-site-page-with-assets.html
|
||||
```
|
||||
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
|
||||
## Options
|
||||
|
||||
- `-a`: Exclude audio sources
|
||||
- `-b`: Use custom `base URL`
|
||||
- `-b`: Use `custom base URL`
|
||||
- `-B`: Forbid retrieving assets from specified domain(s)
|
||||
- `-c`: Exclude CSS
|
||||
- `-C`: Read cookies from `file`
|
||||
- `-d`: Allow retrieving assets only from specified `domain(s)`
|
||||
- `-e`: Ignore network errors
|
||||
- `-E`: Save document using `custom encoding`
|
||||
- `-f`: Omit frames
|
||||
- `-F`: Exclude web fonts
|
||||
- `-h`: Print help information
|
||||
- `-i`: Remove images
|
||||
- `-I`: Isolate the document
|
||||
- `-j`: Exclude JavaScript
|
||||
- `-k`: Accept invalid X.509 (TLS) certificates
|
||||
- `-M`: Don't add timestamp and URL information
|
||||
- `-n`: Extract contents of NOSCRIPT elements
|
||||
- `-o`: Write output to `file`
|
||||
- `-o`: Write output to `file` (use “-” for STDOUT)
|
||||
- `-s`: Be quiet
|
||||
- `-t`: Adjust `network request timeout`
|
||||
- `-u`: Provide custom `User-Agent`
|
||||
- `-u`: Provide `custom User-Agent`
|
||||
- `-v`: Exclude videos
|
||||
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
|
||||
## Whitelisting and blacklisting domains
|
||||
|
||||
Options `-d` and `-B` provide control over what domains can be used to retrieve assets from, e.g.:
|
||||
|
||||
```console
|
||||
monolith -I -d example.com -d www.example.com https://example.com -o example-only.html
|
||||
```
|
||||
|
||||
```console
|
||||
monolith -I -B -d .googleusercontent.com -d googleanalytics.com -d .google.com https://example.com -o example-no-ads.html
|
||||
```
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
|
||||
## Dynamic content
|
||||
|
||||
Monolith doesn't feature a JavaScript engine, hence websites that retrieve and display data after initial load may require usage of additional tools.
|
||||
|
||||
For example, Chromium (Chrome) can be used to act as a pre-processor for such pages:
|
||||
|
||||
```console
|
||||
chromium --headless --window-size=1920,1080 --run-all-compositor-stages-before-draw --virtual-time-budget=9000 --incognito --dump-dom https://github.com | monolith - -I -b https://github.com -o github.html
|
||||
```
|
||||
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
|
||||
## Proxies
|
||||
|
||||
Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables.
|
||||
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
|
||||
## Contributing
|
||||
|
||||
Please open an issue if something is wrong, that helps make this project better.
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
## Related projects
|
||||
- Monolith Chrome Extension: https://github.com/rhysd/monolith-of-web
|
||||
- Pagesaver: https://github.com/distributed-mind/pagesaver
|
||||
- Personal WayBack Machine: https://github.com/popey/pwbm
|
||||
- Hako: https://github.com/dmpop/hako
|
||||
- Monk: https://gitlab.com/fisherdarling/monk
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
|
||||
## License
|
||||
|
||||
To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide.
|
||||
This software is distributed without any warranty.
|
||||
|
||||
---------------------------------------------------
|
||||
|
||||
<!-- Microtext -->
|
||||
<sub>Keep in mind that `monolith` is not aware of your browser’s session</sub>
|
||||
|
||||
6
utils/run-in-container.sh → dist/run-in-container.sh
vendored
Normal file → Executable file
6
utils/run-in-container.sh → dist/run-in-container.sh
vendored
Normal file → Executable file
@@ -1,10 +1,10 @@
|
||||
#!/bin/sh
|
||||
|
||||
DOCKER=docker
|
||||
PROG_NAME=monolith
|
||||
|
||||
if which podman 2>&1 > /dev/null; then
|
||||
DOCKER=podman
|
||||
fi
|
||||
ORG_NAME=y2z
|
||||
PROG_NAME=monolith
|
||||
|
||||
$DOCKER run --rm Y2Z/$PROG_NAME $PROG_NAME "$@"
|
||||
$DOCKER run --rm $ORG_NAME/$PROG_NAME "$@"
|
||||
@@ -1,19 +0,0 @@
|
||||
# 1. Record architecture decisions
|
||||
|
||||
Date: 2019-12-25
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
We need to record the architectural decisions made on this project.
|
||||
|
||||
## Decision
|
||||
|
||||
We will use Architecture Decision Records, as [described by Michael Nygard](http://thinkrelevance.com/blog/2011/11/15/documenting-architecture-decisions).
|
||||
|
||||
## Consequences
|
||||
|
||||
See Michael Nygard's article, linked above. For a lightweight ADR toolset, see Nat Pryce's [adr-tools](https://github.com/npryce/adr-tools).
|
||||
@@ -1,19 +0,0 @@
|
||||
# 2. NOSCRIPT nodes
|
||||
|
||||
Date: 2020-04-16
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
HTML pages can contain `noscript` nodes, which reveal their contents only in case when JavaScript is not available. Most of the time they contain hidden messages that inform about certain JavaScript-dependent features not being operational, however sometimes can also feature media assets or even iframes.
|
||||
|
||||
## Decision
|
||||
|
||||
When the document is being saved with or without JavaScript, each `noscript` node should be preserved while its children need to be processed exactly the same way as the rest of the document. This approach will ensure that even hidden remote assets are embedded — since those hidden elements may have to be displayed later in a browser that has JavaScript turned off. An option should be available to "unwrap" all `noscript` nodes in order to make their contents always visible in the document, complimenting the "disable JS" function of the program.
|
||||
|
||||
## Consequences
|
||||
|
||||
Saved documents will have contents of all `noscript` nodes processed as if they are part of the document's DOM, therefore properly display images encapsulated within `noscript` nodes when being viewed in browsers that have JavaScript turned off (or have no JavaScript support in the first place). The new option to "unwrap" `noscript` elements will help the user ensure that the resulting document always represents what the original web page looked like in a browser that had JavaScript turned off.
|
||||
@@ -1,21 +0,0 @@
|
||||
# 3. Network request timeout
|
||||
|
||||
Date: 2020-02-15
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
A slow network connection and overloaded server may negatively impact network response time.
|
||||
|
||||
## Decision
|
||||
|
||||
Make the program simulate behavior of popular web browsers and CLI tools, where the default network response timeout is most often set to 120 seconds.
|
||||
|
||||
Instead of featuring retries for timed out network requests, the program should have an option to adjust the timeout length, along with making it indefinite when given "0" as its value.
|
||||
|
||||
## Consequences
|
||||
|
||||
The user is able to retrieve resources that have long response time, as well as obtain full control over how soon, and if at all, network requests should time out.
|
||||
@@ -1,21 +0,0 @@
|
||||
# 4. Asset integrity check
|
||||
|
||||
Date: 2020-02-23
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
In HTML5, `link` and `script` nodes have an attribute named `integrity`, which lets the browser check if the remote file is valid, mostly for the purpose of enhancing page security.
|
||||
|
||||
## Decision
|
||||
|
||||
In order to replicate the browser's behavior, the program should perform integrity check the same way it does, excluding the linked asset from the final result if such check fails.
|
||||
|
||||
The `integrity` attribute should be removed from nodes, as it bears no benefit for resources embedded as data URLs.
|
||||
|
||||
## Consequences
|
||||
|
||||
Assets that fail to pass the check get excluded from the saved document. Meanwhile, saved documents no longer contain integrity attributes on all `link` and `script` nodes.
|
||||
@@ -1,19 +0,0 @@
|
||||
# 5. Asset Minimization
|
||||
|
||||
Date: 2020-03-14
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
It may look like a good idea to make monolith compress retrieved assets while saving the page for the purpose of reducing the resulting document's file size.
|
||||
|
||||
## Decision
|
||||
|
||||
Given that the main purpose of this program is to save pages in a convenient to store and share manner — it's mostly an archiving tool, aside from being able to tell monolith to exclude certain types of asests (e.g. images, CSS, JavaScript), it would be outside of scope of this program to implement code for compressing assets. Minimizing files before embedding them does not reduce the amount of data that needs to be transferred either. A separate tool can be used later to compress and minimize pages saved by monolith, if needed.
|
||||
|
||||
## Consequences
|
||||
|
||||
Monolith will not support modification of original document assets for the purpose of reducing their size, sticking to performing only minimal amount of modifications to the original web page — whatever is needed to provide security or exclude unwanted asset types.
|
||||
@@ -1,19 +0,0 @@
|
||||
# 6. Reload and location `meta` tags
|
||||
|
||||
Date: 2020-06-25
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
HTML documents may contain `meta` tags capable of automatically refreshing the page or redirecting to another location.
|
||||
|
||||
## Decision
|
||||
|
||||
Since the resulting document is saved to disk and generally not intended to be served over the network, it only makes sense to remove `meta` tags that have `http-equiv` attribute equal to "Refresh" or "Location", in order to prevent them from reloading the page or redirecting to another location.
|
||||
|
||||
## Consequences
|
||||
|
||||
Monolith will ensure that saved documents do not contain `meta` tags capable of changing location or reloading the page.
|
||||
@@ -1,19 +0,0 @@
|
||||
# 7. Network errors
|
||||
|
||||
Date: 2020-11-22
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
Servers may return information with HTTP response codes other than `200`, however those responses may still contain useful data.
|
||||
|
||||
## Decision
|
||||
|
||||
Fail by default, notifying of the network error. Add option to continue retrieving assets by treating all response codes as `200`.
|
||||
|
||||
## Consequences
|
||||
|
||||
Monolith will fail to obtain resources with status other than `200`, unless told to ignore network errors.
|
||||
@@ -1,40 +0,0 @@
|
||||
# 8. Base Tag
|
||||
|
||||
Date: 2020-12-25
|
||||
|
||||
## Status
|
||||
|
||||
Accepted
|
||||
|
||||
## Context
|
||||
|
||||
HTML documents may contain `base` tag, which influences resolution of anchor links and relative URLs as well as dynamically loaded resources.
|
||||
|
||||
Sometimes, in order to make certain saved documents function closer to how they operate while being served from a remote server, the `base` tag specifying the source page's URL may need to be added to the document.
|
||||
|
||||
There can be only one such tag. If multiple `base` tags are present, only the first encountered tag ends up being used.
|
||||
|
||||
## Decision
|
||||
|
||||
Adding the `base` tag should be optional — saved documents should not contain the `base` tag unless it was specified by the user, or the document originally had the `base` tag in it.
|
||||
|
||||
Existing `href` attribute's value of the original `base` tag should be used for resolving the document's relative links instead of document's own URL (precisely the way browsers do it).
|
||||
|
||||
## Consequences
|
||||
|
||||
#### If the base tag does not exist in the source document
|
||||
|
||||
- If the base tag does not exist in the source document
|
||||
- With base URL option provided
|
||||
- use the specified base URL value to retrieve assets, keep original base URL value in the document
|
||||
- Without base URL option provided
|
||||
- download document as usual, do not add base tag
|
||||
- If the base tag already exists in the source document
|
||||
- With base URL option provided
|
||||
- we overwrite the original base URL before retrieving assets, keep new base URL value in the document
|
||||
- Without base URL option provided:
|
||||
- use the base URL from the original document to retrieve assets, keep original base URL value in the document
|
||||
|
||||
The program will obtain ability to retrieve remote assets for non-remote sources (such as data URLs and local files).
|
||||
|
||||
The program will obatin ability to get rid of existing base tag values (by provind an empty one).
|
||||
@@ -1,3 +0,0 @@
|
||||
# References
|
||||
|
||||
- https://content-security-policy.com/
|
||||
@@ -1,10 +0,0 @@
|
||||
# Web apps that can be saved with Monolith
|
||||
|
||||
These apps retain most or all of their functionality when saved with Monolith
|
||||
|
||||
|Converse|https://conversejs.org|
|
||||
|:--|:--|
|
||||
|Description|An XMPP client built using web technologies|
|
||||
|Functionality retained|**full**|
|
||||
|Command to use|`monolith https://conversejs.org/fullscreen.html > conversejs.html`|
|
||||
|Monolith version used|2.2.7|
|
||||
@@ -2,7 +2,7 @@
|
||||
<package xmlns="http://schemas.microsoft.com/packaging/2015/06/nuspec.xsd">
|
||||
<metadata>
|
||||
<id>monolith</id>
|
||||
<version>2.4.0</version>
|
||||
<version>2.8.1</version>
|
||||
<title>Monolith</title>
|
||||
<authors>Sunshine, Mahdi Robatipoor, Emmanuel Delaborde, Emi Simpson, rhysd</authors>
|
||||
<projectUrl>https://github.com/Y2Z/monolith</projectUrl>
|
||||
|
||||
119
src/cookies.rs
Normal file
119
src/cookies.rs
Normal file
@@ -0,0 +1,119 @@
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
use url::Url;
|
||||
|
||||
pub struct Cookie {
|
||||
pub domain: String,
|
||||
pub include_subdomains: bool,
|
||||
pub path: String,
|
||||
pub https_only: bool,
|
||||
pub expires: u64,
|
||||
pub name: String,
|
||||
pub value: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum CookieFileContentsParseError {
|
||||
InvalidHeader,
|
||||
}
|
||||
|
||||
impl Cookie {
|
||||
pub fn is_expired(&self) -> bool {
|
||||
if self.expires == 0 {
|
||||
return false; // Session, never expires
|
||||
}
|
||||
|
||||
let start = SystemTime::now();
|
||||
let since_the_epoch = start
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.expect("Time went backwards");
|
||||
|
||||
self.expires < since_the_epoch.as_secs()
|
||||
}
|
||||
|
||||
pub fn matches_url(&self, url: &str) -> bool {
|
||||
match Url::parse(&url) {
|
||||
Ok(url) => {
|
||||
// Check protocol scheme
|
||||
match url.scheme() {
|
||||
"http" => {
|
||||
if self.https_only {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
"https" => {}
|
||||
_ => {
|
||||
// Should never match URLs of protocols other than HTTP(S)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Check host
|
||||
if let Some(url_host) = url.host_str() {
|
||||
if self.domain.starts_with(".") && self.include_subdomains {
|
||||
if !url_host.to_lowercase().ends_with(&self.domain)
|
||||
&& !url_host
|
||||
.eq_ignore_ascii_case(&self.domain[1..self.domain.len() - 1])
|
||||
{
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
if !url_host.eq_ignore_ascii_case(&self.domain) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check path
|
||||
if !url.path().eq_ignore_ascii_case(&self.path)
|
||||
&& !url.path().starts_with(&self.path)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_cookie_file_contents(
|
||||
cookie_file_contents: &str,
|
||||
) -> Result<Vec<Cookie>, CookieFileContentsParseError> {
|
||||
let mut cookies: Vec<Cookie> = Vec::new();
|
||||
|
||||
for (i, line) in cookie_file_contents.lines().enumerate() {
|
||||
if i == 0 {
|
||||
// Parsing first line
|
||||
if !line.eq("# HTTP Cookie File") && !line.eq("# Netscape HTTP Cookie File") {
|
||||
return Err(CookieFileContentsParseError::InvalidHeader);
|
||||
}
|
||||
} else {
|
||||
// Ignore comment lines
|
||||
if line.starts_with("#") {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Attempt to parse values
|
||||
let mut fields = line.split("\t");
|
||||
if fields.clone().count() != 7 {
|
||||
continue;
|
||||
}
|
||||
cookies.push(Cookie {
|
||||
domain: fields.next().unwrap().to_string().to_lowercase(),
|
||||
include_subdomains: fields.next().unwrap().to_string() == "TRUE",
|
||||
path: fields.next().unwrap().to_string(),
|
||||
https_only: fields.next().unwrap().to_string() == "TRUE",
|
||||
expires: fields.next().unwrap().parse::<u64>().unwrap(),
|
||||
name: fields.next().unwrap().to_string(),
|
||||
value: fields.next().unwrap().to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
Ok(cookies)
|
||||
}
|
||||
95
src/css.rs
95
src/css.rs
@@ -6,7 +6,7 @@ use std::collections::HashMap;
|
||||
use url::Url;
|
||||
|
||||
use crate::opts::Options;
|
||||
use crate::url::{create_data_url, resolve_url};
|
||||
use crate::url::{create_data_url, resolve_url, EMPTY_IMAGE_DATA_URL};
|
||||
use crate::utils::retrieve_asset;
|
||||
|
||||
const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[
|
||||
@@ -36,7 +36,6 @@ pub fn embed_css(
|
||||
document_url: &Url,
|
||||
css: &str,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
) -> String {
|
||||
let mut input = ParserInput::new(&css);
|
||||
let mut parser = Parser::new(&mut input);
|
||||
@@ -47,7 +46,6 @@ pub fn embed_css(
|
||||
document_url,
|
||||
&mut parser,
|
||||
options,
|
||||
depth,
|
||||
"",
|
||||
"",
|
||||
"",
|
||||
@@ -56,14 +54,14 @@ pub fn embed_css(
|
||||
}
|
||||
|
||||
pub fn format_ident(ident: &str) -> String {
|
||||
let mut res: String = String::new();
|
||||
let mut res: String = "".to_string();
|
||||
let _ = serialize_identifier(ident, &mut res);
|
||||
res = res.trim_end().to_string();
|
||||
res
|
||||
}
|
||||
|
||||
pub fn format_quoted_string(string: &str) -> String {
|
||||
let mut res: String = String::new();
|
||||
let mut res: String = "".to_string();
|
||||
let _ = serialize_string(string, &mut res);
|
||||
res
|
||||
}
|
||||
@@ -81,15 +79,14 @@ pub fn process_css<'a>(
|
||||
document_url: &Url,
|
||||
parser: &mut Parser,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
rule_name: &str,
|
||||
prop_name: &str,
|
||||
func_name: &str,
|
||||
) -> Result<String, ParseError<'a, String>> {
|
||||
let mut result: String = str!();
|
||||
let mut result: String = "".to_string();
|
||||
|
||||
let mut curr_rule: String = str!(rule_name.clone());
|
||||
let mut curr_prop: String = str!(prop_name.clone());
|
||||
let mut curr_rule: String = rule_name.to_string();
|
||||
let mut curr_prop: String = prop_name.to_string();
|
||||
let mut token: &Token;
|
||||
let mut token_offset: SourcePosition;
|
||||
|
||||
@@ -105,7 +102,7 @@ pub fn process_css<'a>(
|
||||
match *token {
|
||||
Token::Comment(_) => {
|
||||
let token_slice = parser.slice_from(token_offset);
|
||||
result.push_str(str!(token_slice).as_str());
|
||||
result.push_str(token_slice);
|
||||
}
|
||||
Token::Semicolon => result.push_str(";"),
|
||||
Token::Colon => result.push_str(":"),
|
||||
@@ -135,7 +132,6 @@ pub fn process_css<'a>(
|
||||
document_url,
|
||||
parser,
|
||||
options,
|
||||
depth,
|
||||
rule_name,
|
||||
curr_prop.as_str(),
|
||||
func_name,
|
||||
@@ -161,13 +157,13 @@ pub fn process_css<'a>(
|
||||
}
|
||||
// div...
|
||||
Token::Ident(ref value) => {
|
||||
curr_rule = str!();
|
||||
curr_prop = str!(value);
|
||||
curr_rule = "".to_string();
|
||||
curr_prop = value.to_string();
|
||||
result.push_str(&format_ident(value));
|
||||
}
|
||||
// @import, @font-face, @charset, @media...
|
||||
Token::AtKeyword(ref value) => {
|
||||
curr_rule = str!(value);
|
||||
curr_rule = value.to_string();
|
||||
if options.no_fonts && curr_rule == "font-face" {
|
||||
continue;
|
||||
}
|
||||
@@ -181,7 +177,7 @@ pub fn process_css<'a>(
|
||||
Token::QuotedString(ref value) => {
|
||||
if curr_rule == "import" {
|
||||
// Reset current at-rule value
|
||||
curr_rule = str!();
|
||||
curr_rule = "".to_string();
|
||||
|
||||
// Skip empty import values
|
||||
if value.len() == 0 {
|
||||
@@ -190,24 +186,22 @@ pub fn process_css<'a>(
|
||||
}
|
||||
|
||||
let import_full_url: Url = resolve_url(&document_url, value);
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&document_url,
|
||||
&import_full_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((import_contents, import_final_url, _import_media_type)) => {
|
||||
match retrieve_asset(cache, client, &document_url, &import_full_url, options) {
|
||||
Ok((
|
||||
import_contents,
|
||||
import_final_url,
|
||||
import_media_type,
|
||||
import_charset,
|
||||
)) => {
|
||||
let mut import_data_url = create_data_url(
|
||||
"text/css",
|
||||
&import_media_type,
|
||||
&import_charset,
|
||||
embed_css(
|
||||
cache,
|
||||
client,
|
||||
&import_final_url,
|
||||
&String::from_utf8_lossy(&import_contents),
|
||||
options,
|
||||
depth + 1,
|
||||
)
|
||||
.as_bytes(),
|
||||
&import_final_url,
|
||||
@@ -236,7 +230,7 @@ pub fn process_css<'a>(
|
||||
}
|
||||
|
||||
if options.no_images && is_image_url_prop(curr_prop.as_str()) {
|
||||
result.push_str(format_quoted_string(empty_image!()).as_str());
|
||||
result.push_str(format_quoted_string(EMPTY_IMAGE_DATA_URL).as_str());
|
||||
} else {
|
||||
let resolved_url: Url = resolve_url(&document_url, value);
|
||||
match retrieve_asset(
|
||||
@@ -245,11 +239,10 @@ pub fn process_css<'a>(
|
||||
&document_url,
|
||||
&resolved_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((data, final_url, media_type)) => {
|
||||
Ok((data, final_url, media_type, charset)) => {
|
||||
let mut data_url =
|
||||
create_data_url(&media_type, &data, &final_url);
|
||||
create_data_url(&media_type, &charset, &data, &final_url);
|
||||
data_url.set_fragment(resolved_url.fragment());
|
||||
result.push_str(
|
||||
format_quoted_string(&data_url.to_string()).as_str(),
|
||||
@@ -291,7 +284,7 @@ pub fn process_css<'a>(
|
||||
if *has_sign && *unit_value >= 0. {
|
||||
result.push_str("+");
|
||||
}
|
||||
result.push_str(str!(unit_value * 100.0).as_str());
|
||||
result.push_str(&(unit_value * 100.0).to_string());
|
||||
result.push_str("%");
|
||||
}
|
||||
Token::Dimension {
|
||||
@@ -303,12 +296,12 @@ pub fn process_css<'a>(
|
||||
if *has_sign && *value >= 0. {
|
||||
result.push_str("+");
|
||||
}
|
||||
result.push_str(str!(value).as_str());
|
||||
result.push_str(str!(unit).as_str());
|
||||
result.push_str(&value.to_string());
|
||||
result.push_str(&unit.to_string());
|
||||
}
|
||||
// #selector, #id...
|
||||
Token::IDHash(ref value) => {
|
||||
curr_rule = str!();
|
||||
curr_rule = "".to_string();
|
||||
result.push_str("#");
|
||||
result.push_str(&format_ident(value));
|
||||
}
|
||||
@@ -318,7 +311,7 @@ pub fn process_css<'a>(
|
||||
|
||||
if is_import {
|
||||
// Reset current at-rule value
|
||||
curr_rule = str!();
|
||||
curr_rule = "".to_string();
|
||||
}
|
||||
|
||||
// Skip empty url()'s
|
||||
@@ -335,24 +328,17 @@ pub fn process_css<'a>(
|
||||
result.push_str("url(");
|
||||
if is_import {
|
||||
let full_url: Url = resolve_url(&document_url, value);
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&document_url,
|
||||
&full_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((css, final_url, _media_type)) => {
|
||||
match retrieve_asset(cache, client, &document_url, &full_url, options) {
|
||||
Ok((css, final_url, media_type, charset)) => {
|
||||
let mut data_url = create_data_url(
|
||||
"text/css",
|
||||
&media_type,
|
||||
&charset,
|
||||
embed_css(
|
||||
cache,
|
||||
client,
|
||||
&final_url,
|
||||
&String::from_utf8_lossy(&css),
|
||||
options,
|
||||
depth + 1,
|
||||
)
|
||||
.as_bytes(),
|
||||
&final_url,
|
||||
@@ -370,19 +356,13 @@ pub fn process_css<'a>(
|
||||
}
|
||||
} else {
|
||||
if is_image_url_prop(curr_prop.as_str()) && options.no_images {
|
||||
result.push_str(format_quoted_string(empty_image!()).as_str());
|
||||
result.push_str(format_quoted_string(EMPTY_IMAGE_DATA_URL).as_str());
|
||||
} else {
|
||||
let full_url: Url = resolve_url(&document_url, value);
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&document_url,
|
||||
&full_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((data, final_url, media_type)) => {
|
||||
let mut data_url = create_data_url(&media_type, &data, &final_url);
|
||||
match retrieve_asset(cache, client, &document_url, &full_url, options) {
|
||||
Ok((data, final_url, media_type, charset)) => {
|
||||
let mut data_url =
|
||||
create_data_url(&media_type, &charset, &data, &final_url);
|
||||
data_url.set_fragment(full_url.fragment());
|
||||
result
|
||||
.push_str(format_quoted_string(&data_url.to_string()).as_str());
|
||||
@@ -415,7 +395,6 @@ pub fn process_css<'a>(
|
||||
document_url,
|
||||
parser,
|
||||
options,
|
||||
depth,
|
||||
curr_rule.as_str(),
|
||||
curr_prop.as_str(),
|
||||
function_name,
|
||||
|
||||
500
src/html.rs
500
src/html.rs
@@ -1,12 +1,13 @@
|
||||
use base64;
|
||||
use base64::prelude::*;
|
||||
use chrono::prelude::*;
|
||||
use encoding_rs::Encoding;
|
||||
use html5ever::interface::QualName;
|
||||
use html5ever::parse_document;
|
||||
use html5ever::rcdom::{Handle, NodeData, RcDom};
|
||||
use html5ever::serialize::{serialize, SerializeOpts};
|
||||
use html5ever::tendril::{format_tendril, TendrilSink};
|
||||
use html5ever::tree_builder::{Attribute, TreeSink};
|
||||
use html5ever::{local_name, namespace_url, ns, LocalName};
|
||||
use markup5ever_rcdom::{Handle, NodeData, RcDom, SerializableHandle};
|
||||
use regex::Regex;
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::Url;
|
||||
@@ -17,8 +18,19 @@ use std::default::Default;
|
||||
use crate::css::embed_css;
|
||||
use crate::js::attr_is_event_handler;
|
||||
use crate::opts::Options;
|
||||
use crate::url::{clean_url, create_data_url, is_url_and_has_protocol, resolve_url};
|
||||
use crate::utils::retrieve_asset;
|
||||
use crate::url::{
|
||||
clean_url, create_data_url, is_url_and_has_protocol, resolve_url, EMPTY_IMAGE_DATA_URL,
|
||||
};
|
||||
use crate::utils::{parse_content_type, retrieve_asset};
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
pub enum LinkType {
|
||||
Alternate,
|
||||
DnsPrefetch,
|
||||
Icon,
|
||||
Preload,
|
||||
Stylesheet,
|
||||
}
|
||||
|
||||
struct SrcSetItem<'a> {
|
||||
path: &'a str,
|
||||
@@ -27,13 +39,18 @@ struct SrcSetItem<'a> {
|
||||
|
||||
const ICON_VALUES: &'static [&str] = &["icon", "shortcut icon"];
|
||||
|
||||
const WHITESPACES: &'static [char] = &['\t', '\n', '\x0c', '\r', ' '];
|
||||
|
||||
pub fn add_favicon(document: &Handle, favicon_data_url: String) -> RcDom {
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, document, SerializeOpts::default())
|
||||
.expect("unable to serialize DOM into buffer");
|
||||
let result = String::from_utf8(buf).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.expect("unable to serialize DOM into buffer");
|
||||
|
||||
let mut dom = html_to_dom(&result);
|
||||
let mut dom = html_to_dom(&buf, "utf-8".to_string());
|
||||
let doc = dom.get_document();
|
||||
if let Some(html) = get_child_node_by_name(&doc, "html") {
|
||||
if let Some(head) = get_child_node_by_name(&html, "head") {
|
||||
@@ -63,15 +80,15 @@ pub fn check_integrity(data: &[u8], integrity: &str) -> bool {
|
||||
if integrity.starts_with("sha256-") {
|
||||
let mut hasher = Sha256::new();
|
||||
hasher.update(data);
|
||||
base64::encode(hasher.finalize()) == integrity[7..]
|
||||
BASE64_STANDARD.encode(hasher.finalize()) == integrity[7..]
|
||||
} else if integrity.starts_with("sha384-") {
|
||||
let mut hasher = Sha384::new();
|
||||
hasher.update(data);
|
||||
base64::encode(hasher.finalize()) == integrity[7..]
|
||||
BASE64_STANDARD.encode(hasher.finalize()) == integrity[7..]
|
||||
} else if integrity.starts_with("sha512-") {
|
||||
let mut hasher = Sha512::new();
|
||||
hasher.update(data);
|
||||
base64::encode(hasher.finalize()) == integrity[7..]
|
||||
BASE64_STANDARD.encode(hasher.finalize()) == integrity[7..]
|
||||
} else {
|
||||
false
|
||||
}
|
||||
@@ -81,7 +98,7 @@ pub fn compose_csp(options: &Options) -> String {
|
||||
let mut string_list = vec![];
|
||||
|
||||
if options.isolate {
|
||||
string_list.push("default-src 'unsafe-inline' data:;");
|
||||
string_list.push("default-src 'unsafe-eval' 'unsafe-inline' data:;");
|
||||
}
|
||||
|
||||
if options.no_css {
|
||||
@@ -115,7 +132,7 @@ pub fn create_metadata_tag(url: &Url) -> String {
|
||||
|
||||
// Prevent credentials from getting into metadata
|
||||
if clean_url.scheme() == "http" || clean_url.scheme() == "https" {
|
||||
// Only HTTP(S) URLs may feature credentials
|
||||
// Only HTTP(S) URLs can contain credentials
|
||||
clean_url.set_username("").unwrap();
|
||||
clean_url.set_password(None).unwrap();
|
||||
}
|
||||
@@ -133,65 +150,71 @@ pub fn create_metadata_tag(url: &Url) -> String {
|
||||
)
|
||||
}
|
||||
|
||||
pub fn determine_link_node_type(node: &Handle) -> &str {
|
||||
let mut link_type: &str = "unknown";
|
||||
|
||||
if let Some(link_attr_rel_value) = get_node_attr(node, "rel") {
|
||||
if is_icon(&link_attr_rel_value) {
|
||||
link_type = "icon";
|
||||
} else if link_attr_rel_value.eq_ignore_ascii_case("stylesheet")
|
||||
|| link_attr_rel_value.eq_ignore_ascii_case("alternate stylesheet")
|
||||
{
|
||||
link_type = "stylesheet";
|
||||
} else if link_attr_rel_value.eq_ignore_ascii_case("preload") {
|
||||
link_type = "preload";
|
||||
} else if link_attr_rel_value.eq_ignore_ascii_case("dns-prefetch") {
|
||||
link_type = "dns-prefetch";
|
||||
}
|
||||
}
|
||||
|
||||
link_type
|
||||
}
|
||||
|
||||
pub fn embed_srcset(
|
||||
cache: &mut HashMap<String, Vec<u8>>,
|
||||
client: &Client,
|
||||
document_url: &Url,
|
||||
srcset: &str,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
) -> String {
|
||||
let mut array: Vec<SrcSetItem> = vec![];
|
||||
let re = Regex::new(r",\s+").unwrap();
|
||||
for srcset_item in re.split(srcset) {
|
||||
let parts: Vec<&str> = srcset_item.trim().split_whitespace().collect();
|
||||
if parts.len() > 0 {
|
||||
let path = parts[0].trim();
|
||||
let descriptor = if parts.len() > 1 { parts[1].trim() } else { "" };
|
||||
|
||||
// Parse srcset attribute according to the specs
|
||||
// https://html.spec.whatwg.org/multipage/images.html#srcset-attribute
|
||||
let mut offset = 0;
|
||||
let size = srcset.chars().count();
|
||||
|
||||
while offset < size {
|
||||
let mut has_descriptor = true;
|
||||
// Zero or more whitespaces + skip leading comma
|
||||
let url_start = offset
|
||||
+ srcset[offset..]
|
||||
.chars()
|
||||
.take_while(|&c| WHITESPACES.contains(&c) || c == ',')
|
||||
.count();
|
||||
if url_start >= size {
|
||||
break;
|
||||
}
|
||||
// A valid non-empty URL that does not start or end with comma
|
||||
let mut url_end = url_start
|
||||
+ srcset[url_start..]
|
||||
.chars()
|
||||
.take_while(|&c| !WHITESPACES.contains(&c))
|
||||
.count();
|
||||
while (url_end - 1) > url_start && srcset.chars().nth(url_end - 1).unwrap() == ',' {
|
||||
has_descriptor = false;
|
||||
url_end -= 1;
|
||||
}
|
||||
offset = url_end;
|
||||
// If the URL wasn't terminated by comma there may also be a descriptor
|
||||
if has_descriptor {
|
||||
offset += srcset[url_end..].chars().take_while(|&c| c != ',').count();
|
||||
}
|
||||
// Collect SrcSetItem
|
||||
if url_end > url_start {
|
||||
let path = &srcset[url_start..url_end];
|
||||
let descriptor = &srcset[url_end..offset].trim();
|
||||
let srcset_real_item = SrcSetItem { path, descriptor };
|
||||
array.push(srcset_real_item);
|
||||
}
|
||||
}
|
||||
|
||||
let mut result: String = str!();
|
||||
let mut result: String = "".to_string();
|
||||
let mut i: usize = array.len();
|
||||
for part in array {
|
||||
if options.no_images {
|
||||
result.push_str(empty_image!());
|
||||
result.push_str(EMPTY_IMAGE_DATA_URL);
|
||||
} else {
|
||||
let image_full_url: Url = resolve_url(&document_url, part.path);
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&document_url,
|
||||
&image_full_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((image_data, image_final_url, image_media_type)) => {
|
||||
let mut image_data_url =
|
||||
create_data_url(&image_media_type, &image_data, &image_final_url);
|
||||
// Append retreved asset as a data URL
|
||||
match retrieve_asset(cache, client, &document_url, &image_full_url, options) {
|
||||
Ok((image_data, image_final_url, image_media_type, image_charset)) => {
|
||||
let mut image_data_url = create_data_url(
|
||||
&image_media_type,
|
||||
&image_charset,
|
||||
&image_data,
|
||||
&image_final_url,
|
||||
);
|
||||
// Append retrieved asset as a data URL
|
||||
image_data_url.set_fragment(image_full_url.fragment());
|
||||
result.push_str(image_data_url.as_ref());
|
||||
}
|
||||
@@ -201,7 +224,7 @@ pub fn embed_srcset(
|
||||
result.push_str(image_full_url.as_ref());
|
||||
} else {
|
||||
// Avoid breaking the structure in case if not an HTTP(S) URL
|
||||
result.push_str(empty_image!());
|
||||
result.push_str(EMPTY_IMAGE_DATA_URL);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -253,6 +276,48 @@ pub fn find_base_node(node: &Handle) -> Option<Handle> {
|
||||
None
|
||||
}
|
||||
|
||||
pub fn find_meta_charset_or_content_type_node(node: &Handle) -> Option<Handle> {
|
||||
match node.data {
|
||||
NodeData::Document => {
|
||||
// Dig deeper
|
||||
for child in node.children.borrow().iter() {
|
||||
if let Some(meta_charset_node) = find_meta_charset_or_content_type_node(child) {
|
||||
return Some(meta_charset_node);
|
||||
}
|
||||
}
|
||||
}
|
||||
NodeData::Element { ref name, .. } => {
|
||||
match name.local.as_ref() {
|
||||
"head" => {
|
||||
if let Some(meta_node) = get_child_node_by_name(node, "meta") {
|
||||
if let Some(_) = get_node_attr(&meta_node, "charset") {
|
||||
return Some(meta_node);
|
||||
} else if let Some(meta_node_http_equiv_attr_value) =
|
||||
get_node_attr(&meta_node, "http-equiv")
|
||||
{
|
||||
if meta_node_http_equiv_attr_value.eq_ignore_ascii_case("content-type")
|
||||
{
|
||||
return Some(meta_node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Dig deeper
|
||||
for child in node.children.borrow().iter() {
|
||||
if let Some(meta_charset_node) = find_meta_charset_or_content_type_node(child) {
|
||||
return Some(meta_charset_node);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
pub fn get_base_url(handle: &Handle) -> Option<String> {
|
||||
if let Some(base_node) = find_base_node(handle) {
|
||||
get_node_attr(&base_node, "href")
|
||||
@@ -261,6 +326,24 @@ pub fn get_base_url(handle: &Handle) -> Option<String> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_charset(node: &Handle) -> Option<String> {
|
||||
if let Some(meta_charset_node) = find_meta_charset_or_content_type_node(node) {
|
||||
if let Some(meta_charset_node_attr_value) = get_node_attr(&meta_charset_node, "charset") {
|
||||
// Processing <meta charset="..." />
|
||||
return Some(meta_charset_node_attr_value);
|
||||
} else if let Some(meta_content_type_node_attr_value) =
|
||||
get_node_attr(&meta_charset_node, "content")
|
||||
{
|
||||
// Processing <meta http-equiv="content-type" content="text/html; charset=..." />
|
||||
let (_media_type, charset, _is_base64) =
|
||||
parse_content_type(&meta_content_type_node_attr_value);
|
||||
return Some(charset);
|
||||
}
|
||||
}
|
||||
|
||||
return None;
|
||||
}
|
||||
|
||||
pub fn get_child_node_by_name(parent: &Handle, node_name: &str) -> Option<Handle> {
|
||||
let children = parent.children.borrow();
|
||||
let matching_children = children.iter().find(|child| match child.data {
|
||||
@@ -273,19 +356,12 @@ pub fn get_child_node_by_name(parent: &Handle, node_name: &str) -> Option<Handle
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_node_name(node: &Handle) -> Option<&'_ str> {
|
||||
match &node.data {
|
||||
NodeData::Element { ref name, .. } => Some(name.local.as_ref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_node_attr(node: &Handle, attr_name: &str) -> Option<String> {
|
||||
match &node.data {
|
||||
NodeData::Element { ref attrs, .. } => {
|
||||
for attr in attrs.borrow().iter() {
|
||||
if &*attr.name.local == attr_name {
|
||||
return Some(str!(&*attr.value));
|
||||
return Some(attr.value.to_string());
|
||||
}
|
||||
}
|
||||
None
|
||||
@@ -294,6 +370,13 @@ pub fn get_node_attr(node: &Handle, attr_name: &str) -> Option<String> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_node_name(node: &Handle) -> Option<&'_ str> {
|
||||
match &node.data {
|
||||
NodeData::Element { ref name, .. } => Some(name.local.as_ref()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get_parent_node(child: &Handle) -> Handle {
|
||||
let parent = child.parent.take().clone();
|
||||
parent.and_then(|node| node.upgrade()).unwrap()
|
||||
@@ -340,10 +423,19 @@ pub fn has_favicon(handle: &Handle) -> bool {
|
||||
found_favicon
|
||||
}
|
||||
|
||||
pub fn html_to_dom(data: &str) -> RcDom {
|
||||
pub fn html_to_dom(data: &Vec<u8>, document_encoding: String) -> RcDom {
|
||||
let s: String;
|
||||
|
||||
if let Some(encoding) = Encoding::for_label(document_encoding.as_bytes()) {
|
||||
let (string, _, _) = encoding.decode(&data);
|
||||
s = string.to_string();
|
||||
} else {
|
||||
s = String::from_utf8_lossy(&data).to_string();
|
||||
}
|
||||
|
||||
parse_document(RcDom::default(), Default::default())
|
||||
.from_utf8()
|
||||
.read_from(&mut data.as_bytes())
|
||||
.read_from(&mut s.as_bytes())
|
||||
.unwrap()
|
||||
}
|
||||
|
||||
@@ -351,13 +443,36 @@ pub fn is_icon(attr_value: &str) -> bool {
|
||||
ICON_VALUES.contains(&attr_value.to_lowercase().as_str())
|
||||
}
|
||||
|
||||
pub fn parse_link_type(link_attr_rel_value: &str) -> Vec<LinkType> {
|
||||
let mut types: Vec<LinkType> = vec![];
|
||||
|
||||
for link_attr_rel_type in link_attr_rel_value.split_whitespace() {
|
||||
if link_attr_rel_type.eq_ignore_ascii_case("alternate") {
|
||||
types.push(LinkType::Alternate);
|
||||
} else if link_attr_rel_type.eq_ignore_ascii_case("dns-prefetch") {
|
||||
types.push(LinkType::DnsPrefetch);
|
||||
} else if link_attr_rel_type.eq_ignore_ascii_case("preload") {
|
||||
types.push(LinkType::Preload);
|
||||
} else if link_attr_rel_type.eq_ignore_ascii_case("stylesheet") {
|
||||
types.push(LinkType::Stylesheet);
|
||||
} else if is_icon(&link_attr_rel_type) {
|
||||
types.push(LinkType::Icon);
|
||||
}
|
||||
}
|
||||
|
||||
types
|
||||
}
|
||||
|
||||
pub fn set_base_url(document: &Handle, desired_base_href: String) -> RcDom {
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, document, SerializeOpts::default())
|
||||
.expect("unable to serialize DOM into buffer");
|
||||
let result = String::from_utf8(buf).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.expect("unable to serialize DOM into buffer");
|
||||
|
||||
let mut dom = html_to_dom(&result);
|
||||
let mut dom = html_to_dom(&buf, "utf-8".to_string());
|
||||
let doc = dom.get_document();
|
||||
if let Some(html_node) = get_child_node_by_name(&doc, "html") {
|
||||
if let Some(head_node) = get_child_node_by_name(&html_node, "head") {
|
||||
@@ -383,6 +498,41 @@ pub fn set_base_url(document: &Handle, desired_base_href: String) -> RcDom {
|
||||
dom
|
||||
}
|
||||
|
||||
pub fn set_charset(mut dom: RcDom, desired_charset: String) -> RcDom {
|
||||
if let Some(meta_charset_node) = find_meta_charset_or_content_type_node(&dom.document) {
|
||||
if let Some(_) = get_node_attr(&meta_charset_node, "charset") {
|
||||
set_node_attr(&meta_charset_node, "charset", Some(desired_charset));
|
||||
} else if let Some(_) = get_node_attr(&meta_charset_node, "content") {
|
||||
set_node_attr(
|
||||
&meta_charset_node,
|
||||
"content",
|
||||
Some(format!("text/html;charset={}", desired_charset)),
|
||||
);
|
||||
}
|
||||
} else {
|
||||
let meta_charset_node = dom.create_element(
|
||||
QualName::new(None, ns!(), local_name!("meta")),
|
||||
vec![Attribute {
|
||||
name: QualName::new(None, ns!(), local_name!("charset")),
|
||||
value: format_tendril!("{}", desired_charset),
|
||||
}],
|
||||
Default::default(),
|
||||
);
|
||||
|
||||
// Insert newly created META charset node into HEAD
|
||||
if let Some(html_node) = get_child_node_by_name(&dom.document, "html") {
|
||||
if let Some(head_node) = get_child_node_by_name(&html_node, "head") {
|
||||
head_node
|
||||
.children
|
||||
.borrow_mut()
|
||||
.push(meta_charset_node.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dom
|
||||
}
|
||||
|
||||
pub fn set_node_attr(node: &Handle, attr_name: &str, attr_value: Option<String>) {
|
||||
match &node.data {
|
||||
NodeData::Element { ref attrs, .. } => {
|
||||
@@ -395,8 +545,8 @@ pub fn set_node_attr(node: &Handle, attr_name: &str, attr_value: Option<String>)
|
||||
found_existing_attr = true;
|
||||
|
||||
if let Some(attr_value) = attr_value.clone() {
|
||||
&attrs_mut[i].value.clear();
|
||||
&attrs_mut[i].value.push_slice(&attr_value.as_str());
|
||||
let _ = &attrs_mut[i].value.clear();
|
||||
let _ = &attrs_mut[i].value.push_slice(&attr_value.as_str());
|
||||
} else {
|
||||
// Remove attr completely if attr_value is not defined
|
||||
attrs_mut.remove(i);
|
||||
@@ -423,16 +573,10 @@ pub fn set_node_attr(node: &Handle, attr_name: &str, attr_value: Option<String>)
|
||||
};
|
||||
}
|
||||
|
||||
pub fn stringify_document(handle: &Handle, options: &Options) -> String {
|
||||
pub fn serialize_document(mut dom: RcDom, document_encoding: String, options: &Options) -> Vec<u8> {
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, handle, SerializeOpts::default())
|
||||
.expect("Unable to serialize DOM into buffer");
|
||||
let document = dom.get_document();
|
||||
|
||||
let mut result = String::from_utf8(buf).unwrap();
|
||||
|
||||
// We can't make it isolate the page right away since it may have no HEAD element,
|
||||
// ergo we have to serialize, parse the DOM again, insert the CSP meta tag, and then
|
||||
// finally serialize and return the resulting string
|
||||
if options.isolate
|
||||
|| options.no_css
|
||||
|| options.no_fonts
|
||||
@@ -441,10 +585,7 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String {
|
||||
|| options.no_images
|
||||
{
|
||||
// Take care of CSP
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
let mut dom = html_to_dom(&result);
|
||||
let doc = dom.get_document();
|
||||
if let Some(html) = get_child_node_by_name(&doc, "html") {
|
||||
if let Some(html) = get_child_node_by_name(&document, "html") {
|
||||
if let Some(head) = get_child_node_by_name(&html, "head") {
|
||||
let meta = dom.create_element(
|
||||
QualName::new(None, ns!(), local_name!("meta")),
|
||||
@@ -468,19 +609,31 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String {
|
||||
head.children.borrow_mut().reverse();
|
||||
}
|
||||
}
|
||||
|
||||
serialize(&mut buf, &doc, SerializeOpts::default())
|
||||
.expect("Unable to serialize DOM into buffer");
|
||||
result = String::from_utf8(buf).unwrap();
|
||||
}
|
||||
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.expect("Unable to serialize DOM into buffer");
|
||||
|
||||
// Unwrap NOSCRIPT elements
|
||||
if options.unwrap_noscript {
|
||||
let s: &str = &String::from_utf8_lossy(&buf);
|
||||
let noscript_re = Regex::new(r"<(?P<c>/?noscript[^>]*)>").unwrap();
|
||||
result = noscript_re.replace_all(&result, "<!--$c-->").to_string();
|
||||
buf = noscript_re.replace_all(&s, "<!--$c-->").as_bytes().to_vec();
|
||||
}
|
||||
|
||||
result
|
||||
if !document_encoding.is_empty() {
|
||||
if let Some(encoding) = Encoding::for_label(document_encoding.as_bytes()) {
|
||||
let s: &str = &String::from_utf8_lossy(&buf);
|
||||
let (data, _, _) = encoding.encode(s);
|
||||
buf = data.to_vec();
|
||||
}
|
||||
}
|
||||
|
||||
buf
|
||||
}
|
||||
|
||||
pub fn retrieve_and_embed_asset(
|
||||
@@ -491,19 +644,11 @@ pub fn retrieve_and_embed_asset(
|
||||
attr_name: &str,
|
||||
attr_value: &str,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
) {
|
||||
let resolved_url: Url = resolve_url(document_url, attr_value.clone());
|
||||
let resolved_url: Url = resolve_url(document_url, attr_value);
|
||||
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&document_url.clone(),
|
||||
&resolved_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((data, final_url, mut media_type)) => {
|
||||
match retrieve_asset(cache, client, &document_url.clone(), &resolved_url, options) {
|
||||
Ok((data, final_url, mut media_type, charset)) => {
|
||||
let node_name: &str = get_node_name(&node).unwrap();
|
||||
|
||||
// Check integrity if it's a LINK or SCRIPT element
|
||||
@@ -521,42 +666,41 @@ pub fn retrieve_and_embed_asset(
|
||||
}
|
||||
|
||||
if ok_to_include {
|
||||
if node_name == "link" && determine_link_node_type(node) == "stylesheet" {
|
||||
let s: String;
|
||||
if let Some(encoding) = Encoding::for_label(charset.as_bytes()) {
|
||||
let (string, _, _) = encoding.decode(&data);
|
||||
s = string.to_string();
|
||||
} else {
|
||||
s = String::from_utf8_lossy(&data).to_string();
|
||||
}
|
||||
|
||||
if node_name == "link"
|
||||
&& parse_link_type(&get_node_attr(node, "rel").unwrap_or(String::from("")))
|
||||
.contains(&LinkType::Stylesheet)
|
||||
{
|
||||
// Stylesheet LINK elements require special treatment
|
||||
let css: String = embed_css(
|
||||
cache,
|
||||
client,
|
||||
&final_url,
|
||||
&String::from_utf8_lossy(&data),
|
||||
options,
|
||||
depth + 1,
|
||||
);
|
||||
let css: String = embed_css(cache, client, &final_url, &s, options);
|
||||
|
||||
// Create and embed data URL
|
||||
let css_data_url = create_data_url("text/css", css.as_bytes(), &final_url);
|
||||
let css_data_url =
|
||||
create_data_url(&media_type, &charset, css.as_bytes(), &final_url);
|
||||
set_node_attr(&node, attr_name, Some(css_data_url.to_string()));
|
||||
} else if node_name == "frame" || node_name == "iframe" {
|
||||
// (I)FRAMEs are also quite different from conventional resources
|
||||
let frame_dom = html_to_dom(&String::from_utf8_lossy(&data));
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
client,
|
||||
&final_url,
|
||||
&frame_dom.document,
|
||||
&options,
|
||||
depth + 1,
|
||||
);
|
||||
let frame_dom = html_to_dom(&data, charset.clone());
|
||||
walk_and_embed_assets(cache, client, &final_url, &frame_dom.document, &options);
|
||||
|
||||
let mut frame_data: Vec<u8> = Vec::new();
|
||||
serialize(
|
||||
&mut frame_data,
|
||||
&frame_dom.document,
|
||||
&SerializableHandle::from(frame_dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
// Create and embed data URL
|
||||
let mut frame_data_url = create_data_url(&media_type, &frame_data, &final_url);
|
||||
let mut frame_data_url =
|
||||
create_data_url(&media_type, &charset, &frame_data, &final_url);
|
||||
frame_data_url.set_fragment(resolved_url.fragment());
|
||||
set_node_attr(node, attr_name, Some(frame_data_url.to_string()));
|
||||
} else {
|
||||
@@ -575,7 +719,7 @@ pub fn retrieve_and_embed_asset(
|
||||
}
|
||||
|
||||
// Create and embed data URL
|
||||
let mut data_url = create_data_url(&media_type, &data, &final_url);
|
||||
let mut data_url = create_data_url(&media_type, &charset, &data, &final_url);
|
||||
data_url.set_fragment(resolved_url.fragment());
|
||||
set_node_attr(node, attr_name, Some(data_url.to_string()));
|
||||
}
|
||||
@@ -599,13 +743,12 @@ pub fn walk_and_embed_assets(
|
||||
document_url: &Url,
|
||||
node: &Handle,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
) {
|
||||
match node.data {
|
||||
NodeData::Document => {
|
||||
// Dig deeper
|
||||
for child in node.children.borrow().iter() {
|
||||
walk_and_embed_assets(cache, client, &document_url, child, options, depth);
|
||||
walk_and_embed_assets(cache, client, &document_url, child, options);
|
||||
}
|
||||
}
|
||||
NodeData::Element {
|
||||
@@ -621,34 +764,15 @@ pub fn walk_and_embed_assets(
|
||||
|| meta_attr_http_equiv_value.eq_ignore_ascii_case("location")
|
||||
{
|
||||
// Remove http-equiv attributes from META nodes if they're able to control the page
|
||||
set_node_attr(
|
||||
&node,
|
||||
"http-equiv",
|
||||
Some(format!(
|
||||
"disabled by monolith ({})",
|
||||
meta_attr_http_equiv_value
|
||||
)),
|
||||
);
|
||||
} else if meta_attr_http_equiv_value.eq_ignore_ascii_case("Content-Type") {
|
||||
// Enforce charset to be set to UTF-8
|
||||
if let Some(_attr_value) = get_node_attr(node, "content") {
|
||||
set_node_attr(
|
||||
&node,
|
||||
"content",
|
||||
Some(str!("text/html; charset=utf-8")),
|
||||
);
|
||||
}
|
||||
set_node_attr(&node, "http-equiv", None);
|
||||
}
|
||||
} else if let Some(_meta_attr_http_equiv_value) = get_node_attr(node, "charset")
|
||||
{
|
||||
// Enforce charset to be set to UTF-8
|
||||
set_node_attr(&node, "charset", Some(str!("utf-8")));
|
||||
}
|
||||
}
|
||||
"link" => {
|
||||
let link_type: &str = determine_link_node_type(node);
|
||||
let link_node_types: Vec<LinkType> =
|
||||
parse_link_type(&get_node_attr(node, "rel").unwrap_or(String::from("")));
|
||||
|
||||
if link_type == "icon" {
|
||||
if link_node_types.contains(&LinkType::Icon) {
|
||||
// Find and resolve LINK's href attribute
|
||||
if let Some(link_attr_href_value) = get_node_attr(node, "href") {
|
||||
if !options.no_images && !link_attr_href_value.is_empty() {
|
||||
@@ -660,13 +784,12 @@ pub fn walk_and_embed_assets(
|
||||
"href",
|
||||
&link_attr_href_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
} else {
|
||||
set_node_attr(node, "href", None);
|
||||
}
|
||||
}
|
||||
} else if link_type == "stylesheet" {
|
||||
} else if link_node_types.contains(&LinkType::Stylesheet) {
|
||||
// Resolve LINK's href attribute
|
||||
if let Some(link_attr_href_value) = get_node_attr(node, "href") {
|
||||
if options.no_css {
|
||||
@@ -683,12 +806,13 @@ pub fn walk_and_embed_assets(
|
||||
"href",
|
||||
&link_attr_href_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if link_type == "preload" || link_type == "dns-prefetch" {
|
||||
} else if link_node_types.contains(&LinkType::Preload)
|
||||
|| link_node_types.contains(&LinkType::DnsPrefetch)
|
||||
{
|
||||
// Since all resources are embedded as data URLs, preloading and prefetching are not necessary
|
||||
set_node_attr(node, "rel", None);
|
||||
} else {
|
||||
@@ -725,7 +849,6 @@ pub fn walk_and_embed_assets(
|
||||
"background",
|
||||
&body_attr_background_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -738,10 +861,10 @@ pub fn walk_and_embed_assets(
|
||||
if options.no_images {
|
||||
// Put empty images into src and data-src attributes
|
||||
if img_attr_src_value != None {
|
||||
set_node_attr(node, "src", Some(str!(empty_image!())));
|
||||
set_node_attr(node, "src", Some(EMPTY_IMAGE_DATA_URL.to_string()));
|
||||
}
|
||||
if img_attr_data_src_value != None {
|
||||
set_node_attr(node, "data-src", Some(str!(empty_image!())));
|
||||
set_node_attr(node, "data-src", Some(EMPTY_IMAGE_DATA_URL.to_string()));
|
||||
}
|
||||
} else {
|
||||
if img_attr_src_value.clone().unwrap_or_default().is_empty()
|
||||
@@ -751,7 +874,7 @@ pub fn walk_and_embed_assets(
|
||||
.is_empty()
|
||||
{
|
||||
// Add empty src attribute
|
||||
set_node_attr(node, "src", Some(str!()));
|
||||
set_node_attr(node, "src", Some("".to_string()));
|
||||
} else {
|
||||
// Add data URL src attribute
|
||||
let img_full_url: String = if !img_attr_data_src_value
|
||||
@@ -771,7 +894,6 @@ pub fn walk_and_embed_assets(
|
||||
"src",
|
||||
&img_full_url,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -779,14 +901,8 @@ pub fn walk_and_embed_assets(
|
||||
// Resolve srcset attribute
|
||||
if let Some(img_srcset) = get_node_attr(node, "srcset") {
|
||||
if !img_srcset.is_empty() {
|
||||
let resolved_srcset: String = embed_srcset(
|
||||
cache,
|
||||
client,
|
||||
&document_url,
|
||||
&img_srcset,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
let resolved_srcset: String =
|
||||
embed_srcset(cache, client, &document_url, &img_srcset, options);
|
||||
set_node_attr(node, "srcset", Some(resolved_srcset));
|
||||
}
|
||||
}
|
||||
@@ -802,11 +918,11 @@ pub fn walk_and_embed_assets(
|
||||
if let Some(input_attr_src_value) = get_node_attr(node, "src") {
|
||||
if options.no_images || input_attr_src_value.is_empty() {
|
||||
let value = if input_attr_src_value.is_empty() {
|
||||
str!()
|
||||
""
|
||||
} else {
|
||||
str!(empty_image!())
|
||||
EMPTY_IMAGE_DATA_URL
|
||||
};
|
||||
set_node_attr(node, "src", Some(value));
|
||||
set_node_attr(node, "src", Some(value.to_string()));
|
||||
} else {
|
||||
retrieve_and_embed_asset(
|
||||
cache,
|
||||
@@ -816,7 +932,6 @@ pub fn walk_and_embed_assets(
|
||||
"src",
|
||||
&input_attr_src_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -824,7 +939,7 @@ pub fn walk_and_embed_assets(
|
||||
}
|
||||
}
|
||||
"image" => {
|
||||
let mut image_href: String = str!();
|
||||
let mut image_href: String = "".to_string();
|
||||
|
||||
if let Some(image_attr_href_value) = get_node_attr(node, "href") {
|
||||
image_href = image_attr_href_value;
|
||||
@@ -849,7 +964,6 @@ pub fn walk_and_embed_assets(
|
||||
"href",
|
||||
&image_href,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -870,7 +984,6 @@ pub fn walk_and_embed_assets(
|
||||
"src",
|
||||
&source_attr_src_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
} else if parent_node_name == "video" {
|
||||
@@ -885,7 +998,6 @@ pub fn walk_and_embed_assets(
|
||||
"src",
|
||||
&source_attr_src_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -895,7 +1007,11 @@ pub fn walk_and_embed_assets(
|
||||
if parent_node_name == "picture" {
|
||||
if !source_attr_srcset_value.is_empty() {
|
||||
if options.no_images {
|
||||
set_node_attr(node, "srcset", Some(str!(empty_image!())));
|
||||
set_node_attr(
|
||||
node,
|
||||
"srcset",
|
||||
Some(EMPTY_IMAGE_DATA_URL.to_string()),
|
||||
);
|
||||
} else {
|
||||
let resolved_srcset: String = embed_srcset(
|
||||
cache,
|
||||
@@ -903,7 +1019,6 @@ pub fn walk_and_embed_assets(
|
||||
&document_url,
|
||||
&source_attr_srcset_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
set_node_attr(node, "srcset", Some(resolved_srcset));
|
||||
}
|
||||
@@ -920,7 +1035,7 @@ pub fn walk_and_embed_assets(
|
||||
{
|
||||
if options.no_js {
|
||||
// Replace with empty JS call to preserve original behavior
|
||||
set_node_attr(node, "href", Some(str!("javascript:;")));
|
||||
set_node_attr(node, "href", Some("javascript:;".to_string()));
|
||||
}
|
||||
} else {
|
||||
// Don't touch mailto: links or hrefs which begin with a hash sign
|
||||
@@ -956,7 +1071,6 @@ pub fn walk_and_embed_assets(
|
||||
"src",
|
||||
&script_attr_src.unwrap_or_default(),
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -974,7 +1088,6 @@ pub fn walk_and_embed_assets(
|
||||
&document_url,
|
||||
tendril.as_ref(),
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
tendril.clear();
|
||||
tendril.push_slice(&replacement);
|
||||
@@ -994,7 +1107,7 @@ pub fn walk_and_embed_assets(
|
||||
if let Some(frame_attr_src_value) = get_node_attr(node, "src") {
|
||||
if options.no_frames {
|
||||
// Empty the src attribute
|
||||
set_node_attr(node, "src", Some(str!()));
|
||||
set_node_attr(node, "src", Some("".to_string()));
|
||||
} else {
|
||||
// Ignore (i)frames with empty source (they cause infinite loops)
|
||||
if !frame_attr_src_value.trim().is_empty() {
|
||||
@@ -1003,10 +1116,9 @@ pub fn walk_and_embed_assets(
|
||||
client,
|
||||
&document_url,
|
||||
node,
|
||||
"href",
|
||||
"src",
|
||||
&frame_attr_src_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1026,7 +1138,6 @@ pub fn walk_and_embed_assets(
|
||||
"src",
|
||||
&audio_attr_src_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1045,7 +1156,6 @@ pub fn walk_and_embed_assets(
|
||||
"src",
|
||||
&video_attr_src_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1055,7 +1165,11 @@ pub fn walk_and_embed_assets(
|
||||
// Skip posters with empty source
|
||||
if !video_attr_poster_value.is_empty() {
|
||||
if options.no_images {
|
||||
set_node_attr(node, "poster", Some(str!(empty_image!())));
|
||||
set_node_attr(
|
||||
node,
|
||||
"poster",
|
||||
Some(EMPTY_IMAGE_DATA_URL.to_string()),
|
||||
);
|
||||
} else {
|
||||
retrieve_and_embed_asset(
|
||||
cache,
|
||||
@@ -1065,7 +1179,6 @@ pub fn walk_and_embed_assets(
|
||||
"poster",
|
||||
&video_attr_poster_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1078,7 +1191,10 @@ pub fn walk_and_embed_assets(
|
||||
// Get contents of NOSCRIPT node
|
||||
let mut noscript_contents = contents.borrow_mut();
|
||||
// Parse contents of NOSCRIPT node as DOM
|
||||
let noscript_contents_dom: RcDom = html_to_dom(&noscript_contents);
|
||||
let noscript_contents_dom: RcDom = html_to_dom(
|
||||
&noscript_contents.as_bytes().to_vec(),
|
||||
"".to_string(),
|
||||
);
|
||||
// Embed assets of NOSCRIPT node contents
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
@@ -1086,7 +1202,6 @@ pub fn walk_and_embed_assets(
|
||||
&document_url,
|
||||
&noscript_contents_dom.document,
|
||||
&options,
|
||||
depth,
|
||||
);
|
||||
// Get rid of original contents
|
||||
noscript_contents.clear();
|
||||
@@ -1096,9 +1211,13 @@ pub fn walk_and_embed_assets(
|
||||
{
|
||||
if let Some(body) = get_child_node_by_name(&html, "body") {
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &body, SerializeOpts::default())
|
||||
.expect("Unable to serialize DOM into buffer");
|
||||
let result = String::from_utf8(buf).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(body.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.expect("Unable to serialize DOM into buffer");
|
||||
let result = String::from_utf8_lossy(&buf);
|
||||
noscript_contents.push_slice(&result);
|
||||
}
|
||||
}
|
||||
@@ -1123,7 +1242,6 @@ pub fn walk_and_embed_assets(
|
||||
&document_url,
|
||||
&node_attr_style_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
set_node_attr(node, "style", Some(embedded_style));
|
||||
}
|
||||
@@ -1147,7 +1265,7 @@ pub fn walk_and_embed_assets(
|
||||
|
||||
// Dig deeper
|
||||
for child in node.children.borrow().iter() {
|
||||
walk_and_embed_assets(cache, client, &document_url, child, options, depth);
|
||||
walk_and_embed_assets(cache, client, &document_url, child, options);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
|
||||
10
src/lib.rs
10
src/lib.rs
@@ -1,15 +1,7 @@
|
||||
#[macro_use]
|
||||
extern crate clap;
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
|
||||
pub mod cookies;
|
||||
pub mod css;
|
||||
pub mod html;
|
||||
pub mod js;
|
||||
pub mod opts;
|
||||
pub mod url;
|
||||
pub mod utils;
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests;
|
||||
|
||||
@@ -1,17 +0,0 @@
|
||||
#[macro_export]
|
||||
macro_rules! str {
|
||||
() => {
|
||||
String::new()
|
||||
};
|
||||
($val: expr) => {
|
||||
ToString::to_string(&$val)
|
||||
};
|
||||
}
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! empty_image {
|
||||
() => {
|
||||
"data:image/png;base64,\
|
||||
iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII="
|
||||
};
|
||||
}
|
||||
360
src/main.rs
360
src/main.rs
@@ -1,3 +1,5 @@
|
||||
use encoding_rs::Encoding;
|
||||
use markup5ever_rcdom::RcDom;
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
|
||||
use std::collections::HashMap;
|
||||
@@ -8,16 +10,15 @@ use std::process;
|
||||
use std::time::Duration;
|
||||
use url::Url;
|
||||
|
||||
use monolith::cookies::parse_cookie_file_contents;
|
||||
use monolith::html::{
|
||||
add_favicon, create_metadata_tag, get_base_url, has_favicon, html_to_dom, set_base_url,
|
||||
stringify_document, walk_and_embed_assets,
|
||||
add_favicon, create_metadata_tag, get_base_url, get_charset, has_favicon, html_to_dom,
|
||||
serialize_document, set_base_url, set_charset, walk_and_embed_assets,
|
||||
};
|
||||
use monolith::opts::Options;
|
||||
use monolith::url::{create_data_url, parse_data_url, resolve_url};
|
||||
use monolith::url::{create_data_url, resolve_url};
|
||||
use monolith::utils::retrieve_asset;
|
||||
|
||||
mod macros;
|
||||
|
||||
enum Output {
|
||||
Stdout(io::Stdout),
|
||||
File(fs::File),
|
||||
@@ -32,107 +33,136 @@ impl Output {
|
||||
}
|
||||
}
|
||||
|
||||
fn writeln_str(&mut self, s: &str) -> Result<(), Error> {
|
||||
fn write(&mut self, bytes: &Vec<u8>) -> Result<(), Error> {
|
||||
match self {
|
||||
Output::Stdout(stdout) => {
|
||||
writeln!(stdout, "{}", s)?;
|
||||
stdout.write_all(bytes)?;
|
||||
// Ensure newline at end of output
|
||||
if bytes.last() != Some(&b"\n"[0]) {
|
||||
stdout.write(b"\n")?;
|
||||
}
|
||||
stdout.flush()
|
||||
}
|
||||
Output::File(f) => {
|
||||
writeln!(f, "{}", s)?;
|
||||
f.flush()
|
||||
Output::File(file) => {
|
||||
file.write_all(bytes)?;
|
||||
// Ensure newline at end of output
|
||||
if bytes.last() != Some(&b"\n"[0]) {
|
||||
file.write(b"\n")?;
|
||||
}
|
||||
file.flush()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_stdin() -> String {
|
||||
let mut buffer = String::new();
|
||||
pub fn read_stdin() -> Vec<u8> {
|
||||
let mut buffer: Vec<u8> = vec![];
|
||||
|
||||
for line in io::stdin().lock().lines() {
|
||||
buffer += line.unwrap_or_default().as_str();
|
||||
buffer += "\n";
|
||||
match io::stdin().lock().read_to_end(&mut buffer) {
|
||||
Ok(_) => buffer,
|
||||
Err(_) => buffer,
|
||||
}
|
||||
|
||||
buffer
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let options = Options::from_args();
|
||||
let mut target: String = str!(&options.target.clone());
|
||||
let mut options = Options::from_args();
|
||||
|
||||
// Check if target was provided
|
||||
if target.len() == 0 {
|
||||
if options.target.len() == 0 {
|
||||
if !options.silent {
|
||||
eprintln!("No target specified");
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
let target_url: Url;
|
||||
let mut base_url: Url;
|
||||
let mut use_stdin: bool = false;
|
||||
|
||||
// Determine exact target URL
|
||||
if target.clone() == "-" {
|
||||
// Read from pipe (stdin)
|
||||
use_stdin = true;
|
||||
// Set default target URL to an empty data URL; the user can control it via --base-url
|
||||
target_url = Url::parse("data:text/html,").unwrap();
|
||||
} else {
|
||||
match Url::parse(&target.clone()) {
|
||||
Ok(parsed_url) => {
|
||||
if parsed_url.scheme() == "data"
|
||||
|| parsed_url.scheme() == "file"
|
||||
|| (parsed_url.scheme() == "http" || parsed_url.scheme() == "https")
|
||||
{
|
||||
target_url = parsed_url;
|
||||
} else {
|
||||
if !options.silent {
|
||||
eprintln!("Unsupported target URL type: {}", &parsed_url.scheme());
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
Err(_err) => {
|
||||
// Failed to parse given base URL,
|
||||
// perhaps it's a filesystem path?
|
||||
let path: &Path = Path::new(&target);
|
||||
|
||||
if path.exists() {
|
||||
if path.is_file() {
|
||||
match Url::from_file_path(fs::canonicalize(&path).unwrap()) {
|
||||
Ok(file_url) => {
|
||||
target_url = file_url;
|
||||
}
|
||||
Err(_err) => {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
"Could not generate file URL out of given path: {}",
|
||||
"err"
|
||||
);
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if !options.silent {
|
||||
eprintln!("Local target is not a file: {}", &options.target);
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
} else {
|
||||
// Last chance, now we do what browsers do:
|
||||
// prepend "http://" and hope it points to a website
|
||||
target.insert_str(0, "http://");
|
||||
target_url = Url::parse(&target).unwrap();
|
||||
}
|
||||
}
|
||||
// Check if custom encoding is valid
|
||||
if let Some(custom_encoding) = options.encoding.clone() {
|
||||
if !Encoding::for_label_no_replacement(custom_encoding.as_bytes()).is_some() {
|
||||
eprintln!("Unknown encoding: {}", &custom_encoding);
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// Define output
|
||||
let mut output = Output::new(&options.output).expect("Could not prepare output");
|
||||
let mut use_stdin: bool = false;
|
||||
|
||||
let target_url = match options.target.as_str() {
|
||||
"-" => {
|
||||
// Read from pipe (stdin)
|
||||
use_stdin = true;
|
||||
// Set default target URL to an empty data URL; the user can set it via --base-url
|
||||
Url::parse("data:text/html,").unwrap()
|
||||
}
|
||||
target => match Url::parse(&target) {
|
||||
Ok(url) => match url.scheme() {
|
||||
"data" | "file" | "http" | "https" => url,
|
||||
unsupported_scheme => {
|
||||
if !options.silent {
|
||||
eprintln!("Unsupported target URL type: {}", unsupported_scheme);
|
||||
}
|
||||
process::exit(1)
|
||||
}
|
||||
},
|
||||
Err(_) => {
|
||||
// Failed to parse given base URL (perhaps it's a filesystem path?)
|
||||
let path: &Path = Path::new(&target);
|
||||
match path.exists() {
|
||||
true => match path.is_file() {
|
||||
true => {
|
||||
let canonical_path = fs::canonicalize(&path).unwrap();
|
||||
match Url::from_file_path(canonical_path) {
|
||||
Ok(url) => url,
|
||||
Err(_) => {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
"Could not generate file URL out of given path: {}",
|
||||
&target
|
||||
);
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
false => {
|
||||
if !options.silent {
|
||||
eprintln!("Local target is not a file: {}", &target);
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
},
|
||||
false => {
|
||||
// It is not a FS path, now we do what browsers do:
|
||||
// prepend "http://" and hope it points to a website
|
||||
Url::parse(&format!("http://{hopefully_url}", hopefully_url = &target))
|
||||
.unwrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
};
|
||||
|
||||
// Read and parse cookie file
|
||||
if let Some(opt_cookie_file) = options.cookie_file.clone() {
|
||||
match fs::read_to_string(opt_cookie_file) {
|
||||
Ok(str) => match parse_cookie_file_contents(&str) {
|
||||
Ok(cookies) => {
|
||||
options.cookies = cookies;
|
||||
// for c in &cookies {
|
||||
// // if !cookie.is_expired() {
|
||||
// // options.cookies.append(c);
|
||||
// // }
|
||||
// }
|
||||
}
|
||||
Err(_) => {
|
||||
eprintln!("Could not parse specified cookie file");
|
||||
process::exit(1);
|
||||
}
|
||||
},
|
||||
Err(_) => {
|
||||
eprintln!("Could not read specified cookie file");
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize client
|
||||
let mut cache = HashMap::new();
|
||||
@@ -143,35 +173,61 @@ fn main() {
|
||||
HeaderValue::from_str(&user_agent).expect("Invalid User-Agent header specified"),
|
||||
);
|
||||
}
|
||||
let timeout: u64 = if options.timeout > 0 {
|
||||
options.timeout
|
||||
let client = if options.timeout > 0 {
|
||||
Client::builder().timeout(Duration::from_secs(options.timeout))
|
||||
} else {
|
||||
std::u64::MAX / 4 // This is pretty close to infinity
|
||||
};
|
||||
let client = Client::builder()
|
||||
.timeout(Duration::from_secs(timeout))
|
||||
.danger_accept_invalid_certs(options.insecure)
|
||||
.default_headers(header_map)
|
||||
.build()
|
||||
.expect("Failed to initialize HTTP client");
|
||||
// No timeout is default
|
||||
Client::builder()
|
||||
}
|
||||
.danger_accept_invalid_certs(options.insecure)
|
||||
.default_headers(header_map)
|
||||
.build()
|
||||
.expect("Failed to initialize HTTP client");
|
||||
|
||||
// At this stage we assume that the base URL is the same as the target URL
|
||||
base_url = target_url.clone();
|
||||
// At first we assume that base URL is the same as target URL
|
||||
let mut base_url: Url = target_url.clone();
|
||||
|
||||
let mut dom;
|
||||
let data: Vec<u8>;
|
||||
let mut document_encoding: String = "".to_string();
|
||||
let mut dom: RcDom;
|
||||
|
||||
// Retrieve target document
|
||||
if use_stdin {
|
||||
dom = html_to_dom(&read_stdin());
|
||||
data = read_stdin();
|
||||
} else if target_url.scheme() == "file"
|
||||
|| (target_url.scheme() == "http" || target_url.scheme() == "https")
|
||||
|| target_url.scheme() == "data"
|
||||
{
|
||||
match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options, 0) {
|
||||
Ok((data, final_url, _media_type)) => {
|
||||
if options.base_url.clone().unwrap_or(str!()).is_empty() {
|
||||
base_url = final_url
|
||||
match retrieve_asset(&mut cache, &client, &target_url, &target_url, &options) {
|
||||
Ok((retrieved_data, final_url, media_type, charset)) => {
|
||||
// Provide output as text without processing it, the way browsers do
|
||||
if !media_type.eq_ignore_ascii_case("text/html")
|
||||
&& !media_type.eq_ignore_ascii_case("application/xhtml+xml")
|
||||
{
|
||||
// Define output
|
||||
let mut output =
|
||||
Output::new(&options.output).expect("Could not prepare output");
|
||||
|
||||
// Write retrieved data into STDOUT or file
|
||||
output
|
||||
.write(&retrieved_data)
|
||||
.expect("Could not write output");
|
||||
|
||||
// Nothing else to do past this point
|
||||
process::exit(0);
|
||||
}
|
||||
dom = html_to_dom(&String::from_utf8_lossy(&data));
|
||||
|
||||
if options
|
||||
.base_url
|
||||
.clone()
|
||||
.unwrap_or("".to_string())
|
||||
.is_empty()
|
||||
{
|
||||
base_url = final_url;
|
||||
}
|
||||
|
||||
data = retrieved_data;
|
||||
document_encoding = charset;
|
||||
}
|
||||
Err(_) => {
|
||||
if !options.silent {
|
||||
@@ -180,36 +236,42 @@ fn main() {
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
} else if target_url.scheme() == "data" {
|
||||
let (media_type, data): (String, Vec<u8>) = parse_data_url(&target_url);
|
||||
|
||||
if !media_type.eq_ignore_ascii_case("text/html") {
|
||||
if !options.silent {
|
||||
eprintln!("Unsupported data URL media type");
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
dom = html_to_dom(&String::from_utf8_lossy(&data));
|
||||
} else {
|
||||
process::exit(1);
|
||||
}
|
||||
|
||||
// Initial parse
|
||||
dom = html_to_dom(&data, document_encoding.clone());
|
||||
|
||||
// TODO: investigate if charset from filesystem/data URL/HTTP headers
|
||||
// has say over what's specified in HTML
|
||||
|
||||
// Attempt to determine document's charset
|
||||
if let Some(html_charset) = get_charset(&dom.document) {
|
||||
if !html_charset.is_empty() {
|
||||
// Check if the charset specified inside HTML is valid
|
||||
if let Some(encoding) = Encoding::for_label_no_replacement(html_charset.as_bytes()) {
|
||||
document_encoding = html_charset;
|
||||
dom = html_to_dom(&data, encoding.name().to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Use custom base URL if specified, read and use what's in the DOM otherwise
|
||||
let b: String = options.base_url.clone().unwrap_or(str!());
|
||||
if b.is_empty() {
|
||||
// No custom base URL is specified,
|
||||
// try to see if the document has BASE tag
|
||||
let custom_base_url: String = options.base_url.clone().unwrap_or("".to_string());
|
||||
if custom_base_url.is_empty() {
|
||||
// No custom base URL is specified
|
||||
// Try to see if document has BASE element
|
||||
if let Some(existing_base_url) = get_base_url(&dom.document) {
|
||||
base_url = resolve_url(&target_url, &existing_base_url);
|
||||
}
|
||||
} else {
|
||||
// Custom base URL provided
|
||||
match Url::parse(&b) {
|
||||
match Url::parse(&custom_base_url) {
|
||||
Ok(parsed_url) => {
|
||||
if parsed_url.scheme() == "file" {
|
||||
// File base URLs can only work with
|
||||
// documents saved from filesystem
|
||||
// documents saved from filesystem
|
||||
if target_url.scheme() == "file" {
|
||||
base_url = parsed_url;
|
||||
}
|
||||
@@ -218,11 +280,10 @@ fn main() {
|
||||
}
|
||||
}
|
||||
Err(_) => {
|
||||
// Failed to parse given base URL,
|
||||
// perhaps it's a filesystem path?
|
||||
// Failed to parse given base URL, perhaps it's a filesystem path?
|
||||
if target_url.scheme() == "file" {
|
||||
// Relative paths could work for documents saved from filesystem
|
||||
let path: &Path = Path::new(&b);
|
||||
let path: &Path = Path::new(&custom_base_url);
|
||||
if path.exists() {
|
||||
match Url::from_file_path(fs::canonicalize(&path).unwrap()) {
|
||||
Ok(file_url) => {
|
||||
@@ -230,7 +291,10 @@ fn main() {
|
||||
}
|
||||
Err(_) => {
|
||||
if !options.silent {
|
||||
eprintln!("Could not map given path to base URL: {}", b);
|
||||
eprintln!(
|
||||
"Could not map given path to base URL: {}",
|
||||
custom_base_url
|
||||
);
|
||||
}
|
||||
process::exit(1);
|
||||
}
|
||||
@@ -241,11 +305,10 @@ fn main() {
|
||||
}
|
||||
}
|
||||
|
||||
// Embed remote assets
|
||||
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options, 0);
|
||||
// Traverse through the document and embed remote assets
|
||||
walk_and_embed_assets(&mut cache, &client, &base_url, &dom.document, &options);
|
||||
|
||||
// Update or add new BASE tag to reroute network requests
|
||||
// and hash-links in the final document
|
||||
// Update or add new BASE element to reroute network requests and hash-links
|
||||
if let Some(new_base_url) = options.base_url.clone() {
|
||||
dom = set_base_url(&dom.document, new_base_url);
|
||||
}
|
||||
@@ -257,16 +320,10 @@ fn main() {
|
||||
{
|
||||
let favicon_ico_url: Url = resolve_url(&base_url, "/favicon.ico");
|
||||
|
||||
match retrieve_asset(
|
||||
&mut cache,
|
||||
&client,
|
||||
&target_url,
|
||||
&favicon_ico_url,
|
||||
&options,
|
||||
0,
|
||||
) {
|
||||
Ok((data, final_url, media_type)) => {
|
||||
let favicon_data_url: Url = create_data_url(&media_type, &data, &final_url);
|
||||
match retrieve_asset(&mut cache, &client, &target_url, &favicon_ico_url, &options) {
|
||||
Ok((data, final_url, media_type, charset)) => {
|
||||
let favicon_data_url: Url =
|
||||
create_data_url(&media_type, &charset, &data, &final_url);
|
||||
dom = add_favicon(&dom.document, favicon_data_url.to_string());
|
||||
}
|
||||
Err(_) => {
|
||||
@@ -275,20 +332,25 @@ fn main() {
|
||||
}
|
||||
}
|
||||
|
||||
// Serialize DOM tree
|
||||
let mut result: String = stringify_document(&dom.document, &options);
|
||||
|
||||
// Add metadata tag
|
||||
if !options.no_metadata {
|
||||
let metadata_comment: String = create_metadata_tag(&target_url);
|
||||
result.insert_str(0, &metadata_comment);
|
||||
if metadata_comment.len() > 0 {
|
||||
result.insert_str(metadata_comment.len(), "\n");
|
||||
}
|
||||
// Save using specified charset, if given
|
||||
if let Some(custom_encoding) = options.encoding.clone() {
|
||||
document_encoding = custom_encoding;
|
||||
dom = set_charset(dom, document_encoding.clone());
|
||||
}
|
||||
|
||||
// Write result into stdout or file
|
||||
output
|
||||
.writeln_str(&result)
|
||||
.expect("Could not write HTML output");
|
||||
// Serialize DOM tree
|
||||
let mut result: Vec<u8> = serialize_document(dom, document_encoding, &options);
|
||||
|
||||
// Prepend metadata comment tag
|
||||
if !options.no_metadata {
|
||||
let mut metadata_comment: String = create_metadata_tag(&target_url);
|
||||
metadata_comment += "\n";
|
||||
result.splice(0..0, metadata_comment.as_bytes().to_vec());
|
||||
}
|
||||
|
||||
// Define output
|
||||
let mut output = Output::new(&options.output).expect("Could not prepare output");
|
||||
|
||||
// Write result into STDOUT or file
|
||||
output.write(&result).expect("Could not write output");
|
||||
}
|
||||
|
||||
82
src/opts.rs
82
src/opts.rs
@@ -1,12 +1,19 @@
|
||||
use clap::{App, Arg};
|
||||
use clap::{App, Arg, ArgAction};
|
||||
use std::env;
|
||||
|
||||
use crate::cookies::Cookie;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Options {
|
||||
pub no_audio: bool,
|
||||
pub base_url: Option<String>,
|
||||
pub blacklist_domains: bool,
|
||||
pub no_css: bool,
|
||||
pub cookie_file: Option<String>,
|
||||
pub cookies: Vec<Cookie>,
|
||||
pub domains: Option<Vec<String>>,
|
||||
pub ignore_errors: bool,
|
||||
pub encoding: Option<String>,
|
||||
pub no_frames: bool,
|
||||
pub no_fonts: bool,
|
||||
pub no_images: bool,
|
||||
@@ -42,34 +49,50 @@ const ENV_VAR_TERM: &str = "TERM";
|
||||
impl Options {
|
||||
pub fn from_args() -> Options {
|
||||
let app = App::new(env!("CARGO_PKG_NAME"))
|
||||
.version(crate_version!())
|
||||
.author(format!("\n{}", crate_authors!("\n")).as_str())
|
||||
.about(format!("{}\n{}", ASCII, crate_description!()).as_str())
|
||||
.args_from_usage("-a, --no-audio 'Removes audio sources'")
|
||||
.args_from_usage("-b, --base-url=[http://localhost/] 'Sets custom base URL'")
|
||||
.args_from_usage("-c, --no-css 'Removes CSS'")
|
||||
.args_from_usage("-e, --ignore-errors 'Ignore network errors'")
|
||||
.args_from_usage("-f, --no-frames 'Removes frames and iframes'")
|
||||
.args_from_usage("-F, --no-fonts 'Removes fonts'")
|
||||
.args_from_usage("-i, --no-images 'Removes images'")
|
||||
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
|
||||
.args_from_usage("-j, --no-js 'Removes JavaScript'")
|
||||
.args_from_usage("-k, --insecure 'Allows invalid X.509 (TLS) certificates'")
|
||||
.args_from_usage("-M, --no-metadata 'Excludes timestamp and source information'")
|
||||
.version(env!("CARGO_PKG_VERSION"))
|
||||
.author(format!("\n{}\n\n", env!("CARGO_PKG_AUTHORS").replace(':', "\n")).as_str())
|
||||
.about(format!("{}\n{}", ASCII, env!("CARGO_PKG_DESCRIPTION")).as_str())
|
||||
.args_from_usage("-a, --no-audio 'Remove audio sources'")
|
||||
.args_from_usage("-b, --base-url=[http://localhost/] 'Set custom base URL'")
|
||||
.args_from_usage(
|
||||
"-n, --unwrap-noscript 'Replaces NOSCRIPT elements with their contents'",
|
||||
"-B, --blacklist-domains 'Treat list of specified domains as blacklist'",
|
||||
)
|
||||
.args_from_usage("-o, --output=[document.html] 'Writes output to <file>'")
|
||||
.args_from_usage("-s, --silent 'Suppresses verbosity'")
|
||||
.args_from_usage("-t, --timeout=[60] 'Adjusts network request timeout'")
|
||||
.args_from_usage("-u, --user-agent=[Firefox] 'Sets custom User-Agent string'")
|
||||
.args_from_usage("-v, --no-video 'Removes video sources'")
|
||||
.args_from_usage("-c, --no-css 'Remove CSS'")
|
||||
.args_from_usage("-C, --cookies=[cookies.txt] 'Specify cookie file'")
|
||||
.arg(
|
||||
Arg::with_name("domains")
|
||||
.short('d')
|
||||
.long("domain")
|
||||
.takes_value(true)
|
||||
.value_name("example.com")
|
||||
.action(ArgAction::Append)
|
||||
.help("Specify domains to use for white/black-listing"),
|
||||
)
|
||||
.args_from_usage("-e, --ignore-errors 'Ignore network errors'")
|
||||
.args_from_usage("-E, --encoding=[UTF-8] 'Enforce custom charset'")
|
||||
.args_from_usage("-f, --no-frames 'Remove frames and iframes'")
|
||||
.args_from_usage("-F, --no-fonts 'Remove fonts'")
|
||||
.args_from_usage("-i, --no-images 'Remove images'")
|
||||
.args_from_usage("-I, --isolate 'Cut off document from the Internet'")
|
||||
.args_from_usage("-j, --no-js 'Remove JavaScript'")
|
||||
.args_from_usage("-k, --insecure 'Allow invalid X.509 (TLS) certificates'")
|
||||
.args_from_usage("-M, --no-metadata 'Exclude timestamp and source information'")
|
||||
.args_from_usage(
|
||||
"-n, --unwrap-noscript 'Replace NOSCRIPT elements with their contents'",
|
||||
)
|
||||
.args_from_usage(
|
||||
"-o, --output=[document.html] 'Write output to <file>, use - for STDOUT'",
|
||||
)
|
||||
.args_from_usage("-s, --silent 'Suppress verbosity'")
|
||||
.args_from_usage("-t, --timeout=[60] 'Adjust network request timeout'")
|
||||
.args_from_usage("-u, --user-agent=[Firefox] 'Set custom User-Agent string'")
|
||||
.args_from_usage("-v, --no-video 'Remove video sources'")
|
||||
.arg(
|
||||
Arg::with_name("target")
|
||||
.required(true)
|
||||
.takes_value(true)
|
||||
.index(1)
|
||||
.help("URL or file path, use - for stdin"),
|
||||
.help("URL or file path, use - for STDIN"),
|
||||
)
|
||||
.get_matches();
|
||||
let mut options: Options = Options::default();
|
||||
@@ -81,9 +104,20 @@ impl Options {
|
||||
.to_string();
|
||||
options.no_audio = app.is_present("no-audio");
|
||||
if let Some(base_url) = app.value_of("base-url") {
|
||||
options.base_url = Some(str!(base_url));
|
||||
options.base_url = Some(base_url.to_string());
|
||||
}
|
||||
options.blacklist_domains = app.is_present("blacklist-domains");
|
||||
options.no_css = app.is_present("no-css");
|
||||
if let Some(cookie_file) = app.value_of("cookies") {
|
||||
options.cookie_file = Some(cookie_file.to_string());
|
||||
}
|
||||
if let Some(encoding) = app.value_of("encoding") {
|
||||
options.encoding = Some(encoding.to_string());
|
||||
}
|
||||
if let Some(domains) = app.get_many::<String>("domains") {
|
||||
let list_of_domains: Vec<String> = domains.map(|v| v.clone()).collect::<Vec<_>>();
|
||||
options.domains = Some(list_of_domains);
|
||||
}
|
||||
options.ignore_errors = app.is_present("ignore-errors");
|
||||
options.no_frames = app.is_present("no-frames");
|
||||
options.no_fonts = app.is_present("no-fonts");
|
||||
@@ -100,7 +134,7 @@ impl Options {
|
||||
.parse::<u64>()
|
||||
.unwrap();
|
||||
if let Some(user_agent) = app.value_of("user-agent") {
|
||||
options.user_agent = Some(str!(user_agent));
|
||||
options.user_agent = Some(user_agent.to_string());
|
||||
} else {
|
||||
options.user_agent = Some(DEFAULT_USER_AGENT.to_string());
|
||||
}
|
||||
|
||||
@@ -1,56 +0,0 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use assert_cmd::prelude::*;
|
||||
use std::env;
|
||||
use std::process::Command;
|
||||
|
||||
#[test]
|
||||
fn change_encoding_to_utf_8() {
|
||||
let cwd = env::current_dir().unwrap();
|
||||
let cwd_normalized: String = str!(cwd.to_str().unwrap()).replace("\\", "/");
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg(if cfg!(windows) {
|
||||
"src\\tests\\data\\unusual_encodings\\iso-8859-1.html"
|
||||
} else {
|
||||
"src/tests/data/unusual_encodings/iso-8859-1.html"
|
||||
})
|
||||
.output()
|
||||
.unwrap();
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
|
||||
// STDOUT should contain newly added base URL
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html>\
|
||||
<head>\n \
|
||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||
</head>\n \
|
||||
<body>\n \
|
||||
© Some Company\n \
|
||||
\n\n</body>\
|
||||
</html>\n"
|
||||
);
|
||||
|
||||
// STDERR should contain only the target file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/unusual_encodings/iso-8859-1.html\n",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
}
|
||||
@@ -1,40 +0,0 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use reqwest::Url;
|
||||
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn encode_string_with_specific_media_type() {
|
||||
let mime = "application/javascript";
|
||||
let data = "var word = 'hello';\nalert(word);\n";
|
||||
let data_url = url::create_data_url(mime, data.as_bytes(), &Url::parse("data:,").unwrap());
|
||||
|
||||
assert_eq!(
|
||||
data_url.as_str(),
|
||||
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_append_fragment() {
|
||||
let data = "<svg></svg>\n";
|
||||
let data_url = url::create_data_url(
|
||||
"image/svg+xml",
|
||||
data.as_bytes(),
|
||||
&Url::parse("data:,").unwrap(),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
data_url.as_str(),
|
||||
"data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,39 +0,0 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn decode_unicode_characters() {
|
||||
assert_eq!(
|
||||
url::percent_decode(str!(
|
||||
"%E6%A4%9C%E3%83%92%E3%83%A0%E8%A7%A3%E5%A1%97%E3%82%83%E3%83%83%20%3D%20%E3%82%B5"
|
||||
)),
|
||||
"検ヒム解塗ゃッ = サ"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_file_url() {
|
||||
assert_eq!(
|
||||
url::percent_decode(str!("file:///tmp/space%20here/test%231.html")),
|
||||
"file:///tmp/space here/test#1.html"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn plus_sign() {
|
||||
assert_eq!(
|
||||
url::percent_decode(str!(
|
||||
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
|
||||
)),
|
||||
"fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic"
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::url;
|
||||
|
||||
#[test]
|
||||
fn apostrophe() {
|
||||
assert_eq!(url::percent_encode(str!("'")), "%27");
|
||||
}
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::utils;
|
||||
|
||||
#[test]
|
||||
fn zero() {
|
||||
assert_eq!(utils::indent(0), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn one() {
|
||||
assert_eq!(utils::indent(1), " ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn two() {
|
||||
assert_eq!(utils::indent(2), " ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn three() {
|
||||
assert_eq!(utils::indent(3), " ");
|
||||
}
|
||||
}
|
||||
@@ -1,3 +0,0 @@
|
||||
mod detect_media_type;
|
||||
mod indent;
|
||||
mod retrieve_asset;
|
||||
96
src/url.rs
96
src/url.rs
@@ -1,7 +1,11 @@
|
||||
use base64;
|
||||
use url::{form_urlencoded, Url};
|
||||
use base64::prelude::*;
|
||||
use percent_encoding::percent_decode_str;
|
||||
use url::Url;
|
||||
|
||||
use crate::utils::detect_media_type;
|
||||
use crate::utils::{detect_media_type, parse_content_type};
|
||||
|
||||
pub const EMPTY_IMAGE_DATA_URL: &'static str = "data:image/png;base64,\
|
||||
iVBORw0KGgoAAAANSUhEUgAAAA0AAAANCAQAAADY4iz3AAAAEUlEQVR42mNkwAkYR6UolgIACvgADsuK6xYAAAAASUVORK5CYII=";
|
||||
|
||||
pub fn clean_url(url: Url) -> Url {
|
||||
let mut url = url.clone();
|
||||
@@ -12,7 +16,8 @@ pub fn clean_url(url: Url) -> Url {
|
||||
url
|
||||
}
|
||||
|
||||
pub fn create_data_url(media_type: &str, data: &[u8], final_asset_url: &Url) -> Url {
|
||||
pub fn create_data_url(media_type: &str, charset: &str, data: &[u8], final_asset_url: &Url) -> Url {
|
||||
// TODO: move this block out of this function
|
||||
let media_type: String = if media_type.is_empty() {
|
||||
detect_media_type(data, &final_asset_url)
|
||||
} else {
|
||||
@@ -21,7 +26,22 @@ pub fn create_data_url(media_type: &str, data: &[u8], final_asset_url: &Url) ->
|
||||
|
||||
let mut data_url: Url = Url::parse("data:,").unwrap();
|
||||
|
||||
data_url.set_path(format!("{};base64,{}", media_type, base64::encode(data)).as_str());
|
||||
let c: String =
|
||||
if !charset.trim().is_empty() && !charset.trim().eq_ignore_ascii_case("US-ASCII") {
|
||||
format!(";charset={}", charset.trim())
|
||||
} else {
|
||||
"".to_string()
|
||||
};
|
||||
|
||||
data_url.set_path(
|
||||
format!(
|
||||
"{}{};base64,{}",
|
||||
media_type,
|
||||
c,
|
||||
BASE64_STANDARD.encode(data)
|
||||
)
|
||||
.as_str(),
|
||||
);
|
||||
|
||||
data_url
|
||||
}
|
||||
@@ -37,65 +57,37 @@ pub fn is_url_and_has_protocol(input: &str) -> bool {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_data_url(url: &Url) -> (String, Vec<u8>) {
|
||||
pub fn parse_data_url(url: &Url) -> (String, String, Vec<u8>) {
|
||||
let path: String = url.path().to_string();
|
||||
let comma_loc: usize = path.find(',').unwrap_or(path.len());
|
||||
|
||||
let meta_data: String = path.chars().take(comma_loc).collect();
|
||||
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
|
||||
// Split data URL into meta data and raw data
|
||||
let content_type: String = path.chars().take(comma_loc).collect();
|
||||
let data: String = path.chars().skip(comma_loc + 1).collect();
|
||||
|
||||
let text: String = percent_decode(raw_data);
|
||||
// Parse meta data
|
||||
let (media_type, charset, is_base64) = parse_content_type(&content_type);
|
||||
|
||||
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
|
||||
let mut media_type: String = str!();
|
||||
let mut encoding: &str = "";
|
||||
|
||||
let mut i: i8 = 0;
|
||||
for item in &meta_data_items {
|
||||
if i == 0 {
|
||||
media_type = str!(item);
|
||||
} else {
|
||||
if item.eq_ignore_ascii_case("base64")
|
||||
|| item.eq_ignore_ascii_case("utf8")
|
||||
|| item.eq_ignore_ascii_case("charset=UTF-8")
|
||||
{
|
||||
encoding = item;
|
||||
}
|
||||
}
|
||||
|
||||
i = i + 1;
|
||||
}
|
||||
|
||||
let data: Vec<u8> = if encoding.eq_ignore_ascii_case("base64") {
|
||||
base64::decode(&text).unwrap_or(vec![])
|
||||
// Parse raw data into vector of bytes
|
||||
let text: String = percent_decode_str(&data).decode_utf8_lossy().to_string();
|
||||
let blob: Vec<u8> = if is_base64 {
|
||||
BASE64_STANDARD.decode(&text).unwrap_or(vec![])
|
||||
} else {
|
||||
text.as_bytes().to_vec()
|
||||
};
|
||||
|
||||
(media_type, data)
|
||||
(media_type, charset, blob)
|
||||
}
|
||||
|
||||
pub fn percent_decode(input: String) -> String {
|
||||
let input: String = input.replace("+", "%2B");
|
||||
pub fn get_referer_url(url: Url) -> Url {
|
||||
let mut url = url.clone();
|
||||
// Spec: https://httpwg.org/specs/rfc9110.html#field.referer
|
||||
// Must not include the fragment and userinfo components of the URI
|
||||
url.set_fragment(None);
|
||||
url.set_username(&"").unwrap();
|
||||
url.set_password(None).unwrap();
|
||||
|
||||
form_urlencoded::parse(input.as_bytes())
|
||||
.map(|(key, val)| {
|
||||
[
|
||||
key.to_string(),
|
||||
if val.to_string().len() == 0 {
|
||||
str!()
|
||||
} else {
|
||||
str!('=')
|
||||
},
|
||||
val.to_string(),
|
||||
]
|
||||
.concat()
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn percent_encode(input: String) -> String {
|
||||
form_urlencoded::byte_serialize(input.as_bytes()).collect()
|
||||
url
|
||||
}
|
||||
|
||||
pub fn resolve_url(from: &Url, to: &str) -> Url {
|
||||
|
||||
268
src/utils.rs
268
src/utils.rs
@@ -1,12 +1,12 @@
|
||||
use reqwest::blocking::Client;
|
||||
use reqwest::header::CONTENT_TYPE;
|
||||
use reqwest::header::{HeaderMap, HeaderValue, CONTENT_TYPE, COOKIE, REFERER};
|
||||
use std::collections::HashMap;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use url::Url;
|
||||
|
||||
use crate::opts::Options;
|
||||
use crate::url::{clean_url, parse_data_url};
|
||||
use crate::url::{clean_url, get_referer_url, parse_data_url};
|
||||
|
||||
const ANSI_COLOR_RED: &'static str = "\x1b[31m";
|
||||
const ANSI_COLOR_RESET: &'static str = "\x1b[0m";
|
||||
@@ -33,20 +33,119 @@ const MAGIC: [[&[u8]; 2]; 18] = [
|
||||
[b"....moov", b"video/quicktime"],
|
||||
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
||||
];
|
||||
const PLAINTEXT_MEDIA_TYPES: &[&str] = &["application/javascript", "image/svg+xml"];
|
||||
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
|
||||
"application/javascript",
|
||||
"application/json",
|
||||
"image/svg+xml",
|
||||
];
|
||||
|
||||
pub fn detect_media_type(data: &[u8], url: &Url) -> String {
|
||||
// At first attempt to read file's header
|
||||
for magic_item in MAGIC.iter() {
|
||||
if data.starts_with(magic_item[0]) {
|
||||
return String::from_utf8(magic_item[1].to_vec()).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
if url.path().to_lowercase().ends_with(".svg") {
|
||||
return str!("image/svg+xml");
|
||||
// If header didn't match any known magic signatures,
|
||||
// try to guess media type from file name
|
||||
let parts: Vec<&str> = url.path().split('/').collect();
|
||||
detect_media_type_by_file_name(parts.last().unwrap())
|
||||
}
|
||||
|
||||
pub fn detect_media_type_by_file_name(filename: &str) -> String {
|
||||
let filename_lowercased: &str = &filename.to_lowercase();
|
||||
let parts: Vec<&str> = filename_lowercased.split('.').collect();
|
||||
|
||||
let mime: &str = match parts.last() {
|
||||
Some(v) => match *v {
|
||||
"avi" => "video/avi",
|
||||
"bmp" => "image/bmp",
|
||||
"css" => "text/css",
|
||||
"flac" => "audio/flac",
|
||||
"gif" => "image/gif",
|
||||
"htm" | "html" => "text/html",
|
||||
"ico" => "image/x-icon",
|
||||
"jpeg" | "jpg" => "image/jpeg",
|
||||
"js" => "application/javascript",
|
||||
"json" => "application/json",
|
||||
"mp3" => "audio/mpeg",
|
||||
"mp4" | "m4v" => "video/mp4",
|
||||
"ogg" => "audio/ogg",
|
||||
"ogv" => "video/ogg",
|
||||
"pdf" => "application/pdf",
|
||||
"png" => "image/png",
|
||||
"svg" => "image/svg+xml",
|
||||
"swf" => "application/x-shockwave-flash",
|
||||
"tif" | "tiff" => "image/tiff",
|
||||
"txt" => "text/plain",
|
||||
"wav" => "audio/wav",
|
||||
"webp" => "image/webp",
|
||||
"woff" => "font/woff",
|
||||
"woff2" => "font/woff2",
|
||||
"xml" => "text/xml",
|
||||
&_ => "",
|
||||
},
|
||||
None => "",
|
||||
};
|
||||
|
||||
mime.to_string()
|
||||
}
|
||||
|
||||
pub fn domain_is_within_domain(domain: &str, domain_to_match_against: &str) -> bool {
|
||||
if domain_to_match_against.len() == 0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
str!()
|
||||
if domain_to_match_against == "." {
|
||||
return true;
|
||||
}
|
||||
|
||||
let domain_partials: Vec<&str> = domain.trim_end_matches(".").rsplit(".").collect();
|
||||
let domain_to_match_against_partials: Vec<&str> = domain_to_match_against
|
||||
.trim_end_matches(".")
|
||||
.rsplit(".")
|
||||
.collect();
|
||||
let domain_to_match_against_starts_with_a_dot = domain_to_match_against.starts_with(".");
|
||||
|
||||
let mut i: usize = 0;
|
||||
let l: usize = std::cmp::max(
|
||||
domain_partials.len(),
|
||||
domain_to_match_against_partials.len(),
|
||||
);
|
||||
let mut ok: bool = true;
|
||||
|
||||
while i < l {
|
||||
// Exit and return false if went out of bounds of domain to match against, and it didn't start with a dot
|
||||
if !domain_to_match_against_starts_with_a_dot
|
||||
&& domain_to_match_against_partials.len() < i + 1
|
||||
{
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
|
||||
let domain_partial = if domain_partials.len() < i + 1 {
|
||||
""
|
||||
} else {
|
||||
domain_partials.get(i).unwrap()
|
||||
};
|
||||
let domain_to_match_against_partial = if domain_to_match_against_partials.len() < i + 1 {
|
||||
""
|
||||
} else {
|
||||
domain_to_match_against_partials.get(i).unwrap()
|
||||
};
|
||||
|
||||
let parts_match = domain_to_match_against_partial.eq_ignore_ascii_case(domain_partial);
|
||||
|
||||
if !parts_match && domain_to_match_against_partial.len() != 0 {
|
||||
ok = false;
|
||||
break;
|
||||
}
|
||||
|
||||
i += 1;
|
||||
}
|
||||
|
||||
ok
|
||||
}
|
||||
|
||||
pub fn is_plaintext_media_type(media_type: &str) -> bool {
|
||||
@@ -54,16 +153,31 @@ pub fn is_plaintext_media_type(media_type: &str) -> bool {
|
||||
|| PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
|
||||
}
|
||||
|
||||
pub fn indent(level: u32) -> String {
|
||||
let mut result: String = String::new();
|
||||
let mut l: u32 = level;
|
||||
pub fn parse_content_type(content_type: &str) -> (String, String, bool) {
|
||||
let mut media_type: String = "text/plain".to_string();
|
||||
let mut charset: String = "US-ASCII".to_string();
|
||||
let mut is_base64: bool = false;
|
||||
|
||||
while l > 0 {
|
||||
result += " ";
|
||||
l -= 1;
|
||||
// Parse meta data
|
||||
let content_type_items: Vec<&str> = content_type.split(';').collect();
|
||||
let mut i: i8 = 0;
|
||||
for item in &content_type_items {
|
||||
if i == 0 {
|
||||
if item.trim().len() > 0 {
|
||||
media_type = item.trim().to_string();
|
||||
}
|
||||
} else {
|
||||
if item.trim().eq_ignore_ascii_case("base64") {
|
||||
is_base64 = true;
|
||||
} else if item.trim().starts_with("charset=") {
|
||||
charset = item.trim().chars().skip(8).collect();
|
||||
}
|
||||
}
|
||||
|
||||
i += 1;
|
||||
}
|
||||
|
||||
result
|
||||
(media_type, charset, is_base64)
|
||||
}
|
||||
|
||||
pub fn retrieve_asset(
|
||||
@@ -72,18 +186,16 @@ pub fn retrieve_asset(
|
||||
parent_url: &Url,
|
||||
url: &Url,
|
||||
options: &Options,
|
||||
depth: u32,
|
||||
) -> Result<(Vec<u8>, Url, String), reqwest::Error> {
|
||||
) -> Result<(Vec<u8>, Url, String, String), reqwest::Error> {
|
||||
if url.scheme() == "data" {
|
||||
let (media_type, data) = parse_data_url(url);
|
||||
Ok((data, url.clone(), media_type))
|
||||
let (media_type, charset, data) = parse_data_url(url);
|
||||
Ok((data, url.clone(), media_type, charset))
|
||||
} else if url.scheme() == "file" {
|
||||
// Check if parent_url is also file:/// (if not, then we don't embed the asset)
|
||||
// Check if parent_url is also a file: URL (if not, then we don't embed the asset)
|
||||
if parent_url.scheme() != "file" {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
"{}{}{} ({}){}",
|
||||
indent(depth).as_str(),
|
||||
"{}{} ({}){}",
|
||||
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||
&url,
|
||||
"Security Error",
|
||||
@@ -104,8 +216,7 @@ pub fn retrieve_asset(
|
||||
if path.is_dir() {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
"{}{}{} (is a directory){}",
|
||||
indent(depth).as_str(),
|
||||
"{}{} (is a directory){}",
|
||||
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||
&url,
|
||||
if options.no_color {
|
||||
@@ -120,16 +231,22 @@ pub fn retrieve_asset(
|
||||
Err(client.get("").send().unwrap_err())
|
||||
} else {
|
||||
if !options.silent {
|
||||
eprintln!("{}{}", indent(depth).as_str(), &url);
|
||||
eprintln!("{}", &url);
|
||||
}
|
||||
|
||||
Ok((fs::read(&path).expect(""), url.clone(), str!()))
|
||||
let file_blob: Vec<u8> = fs::read(&path).expect("Unable to read file");
|
||||
|
||||
Ok((
|
||||
file_blob.clone(),
|
||||
url.clone(),
|
||||
detect_media_type(&file_blob, url),
|
||||
"".to_string(),
|
||||
))
|
||||
}
|
||||
} else {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
"{}{}{} (not found){}",
|
||||
indent(depth).as_str(),
|
||||
"{}{} (not found){}",
|
||||
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||
&url,
|
||||
if options.no_color {
|
||||
@@ -147,23 +264,53 @@ pub fn retrieve_asset(
|
||||
let cache_key: String = clean_url(url.clone()).as_str().to_string();
|
||||
|
||||
if cache.contains_key(&cache_key) {
|
||||
// URL is in cache,
|
||||
// we get and return it
|
||||
// URL is in cache, we get and return it
|
||||
if !options.silent {
|
||||
eprintln!("{}{} (from cache)", indent(depth).as_str(), &url);
|
||||
eprintln!("{} (from cache)", &url);
|
||||
}
|
||||
|
||||
Ok((cache.get(&cache_key).unwrap().to_vec(), url.clone(), str!()))
|
||||
Ok((
|
||||
cache.get(&cache_key).unwrap().to_vec(),
|
||||
url.clone(),
|
||||
"".to_string(),
|
||||
"".to_string(),
|
||||
))
|
||||
} else {
|
||||
// URL not in cache,
|
||||
// we retrieve the file
|
||||
match client.get(url.as_str()).send() {
|
||||
Ok(mut response) => {
|
||||
if !options.ignore_errors && response.status() != 200 {
|
||||
if let Some(domains) = &options.domains {
|
||||
let domain_matches = domains
|
||||
.iter()
|
||||
.any(|d| domain_is_within_domain(url.host_str().unwrap(), &d.trim()));
|
||||
if (options.blacklist_domains && domain_matches)
|
||||
|| (!options.blacklist_domains && !domain_matches)
|
||||
{
|
||||
return Err(client.get("").send().unwrap_err());
|
||||
}
|
||||
}
|
||||
|
||||
// URL not in cache, we retrieve the file
|
||||
let mut headers = HeaderMap::new();
|
||||
if options.cookies.len() > 0 {
|
||||
for cookie in &options.cookies {
|
||||
if !cookie.is_expired() && cookie.matches_url(url.as_str()) {
|
||||
let cookie_header_value: String = cookie.name.clone() + "=" + &cookie.value;
|
||||
headers
|
||||
.insert(COOKIE, HeaderValue::from_str(&cookie_header_value).unwrap());
|
||||
}
|
||||
}
|
||||
}
|
||||
// Add referer header for page resource requests
|
||||
if ["https", "http"].contains(&parent_url.scheme()) && parent_url != url {
|
||||
headers.insert(
|
||||
REFERER,
|
||||
HeaderValue::from_str(get_referer_url(parent_url.clone()).as_str()).unwrap(),
|
||||
);
|
||||
}
|
||||
match client.get(url.as_str()).headers(headers).send() {
|
||||
Ok(response) => {
|
||||
if !options.ignore_errors && response.status() != reqwest::StatusCode::OK {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
"{}{}{} ({}){}",
|
||||
indent(depth).as_str(),
|
||||
"{}{} ({}){}",
|
||||
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||
&url,
|
||||
response.status(),
|
||||
@@ -178,38 +325,59 @@ pub fn retrieve_asset(
|
||||
return Err(client.get("").send().unwrap_err());
|
||||
}
|
||||
|
||||
let response_url: Url = response.url().clone();
|
||||
|
||||
if !options.silent {
|
||||
if url.as_str() == response.url().as_str() {
|
||||
eprintln!("{}{}", indent(depth).as_str(), &url);
|
||||
if url.as_str() == response_url.as_str() {
|
||||
eprintln!("{}", &url);
|
||||
} else {
|
||||
eprintln!("{}{} -> {}", indent(depth).as_str(), &url, &response.url());
|
||||
eprintln!("{} -> {}", &url, &response_url);
|
||||
}
|
||||
}
|
||||
|
||||
let new_cache_key: String = clean_url(response.url().clone()).to_string();
|
||||
let new_cache_key: String = clean_url(response_url.clone()).to_string();
|
||||
|
||||
// Convert response into a byte array
|
||||
let mut data: Vec<u8> = vec![];
|
||||
response.copy_to(&mut data).unwrap();
|
||||
|
||||
// Attempt to obtain media type by reading Content-Type header
|
||||
let media_type: &str = response
|
||||
// Attempt to obtain media type and charset by reading Content-Type header
|
||||
let content_type: &str = response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|header| header.to_str().ok())
|
||||
.unwrap_or("");
|
||||
|
||||
let (media_type, charset, _is_base64) = parse_content_type(&content_type);
|
||||
|
||||
// Convert response into a byte array
|
||||
let mut data: Vec<u8> = vec![];
|
||||
match response.bytes() {
|
||||
Ok(b) => {
|
||||
data = b.to_vec();
|
||||
}
|
||||
Err(error) => {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
"{}{}{}",
|
||||
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||
error,
|
||||
if options.no_color {
|
||||
""
|
||||
} else {
|
||||
ANSI_COLOR_RESET
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add retrieved resource to cache
|
||||
cache.insert(new_cache_key, data.clone());
|
||||
|
||||
// Return
|
||||
Ok((data, response.url().clone(), media_type.to_string()))
|
||||
Ok((data, response_url, media_type, charset))
|
||||
}
|
||||
Err(error) => {
|
||||
if !options.silent {
|
||||
eprintln!(
|
||||
"{}{}{} ({}){}",
|
||||
indent(depth).as_str(),
|
||||
"{}{} ({}){}",
|
||||
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||
&url,
|
||||
error,
|
||||
|
||||
|
Before Width: | Height: | Size: 296 B After Width: | Height: | Size: 296 B |
|
Before Width: | Height: | Size: 296 B After Width: | Height: | Size: 296 B |
9
tests/_data_/unusual_encodings/gb2312.html
Normal file
9
tests/_data_/unusual_encodings/gb2312.html
Normal file
@@ -0,0 +1,9 @@
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="content-type" content="text/html;charset=GB2312"/>
|
||||
<title>近七成人减少线下需求 银行数字化转型提速--经济·科技--人民网 </title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>近七成人减少线下需求 银行数字化转型提速</h1>
|
||||
</body>
|
||||
</html>
|
||||
@@ -22,18 +22,18 @@ mod passing {
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain newly added base URL
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head>\
|
||||
<base href=\"http://localhost:8000/\"></base>\
|
||||
</head><body>Hello, World!</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
@@ -46,18 +46,18 @@ mod passing {
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain newly added base URL
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head>\
|
||||
<base href=\"http://localhost:8000/\">\
|
||||
</head><body>Hello, World!</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
@@ -72,18 +72,18 @@ mod passing {
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain newly added base URL
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head>\
|
||||
<base href=\"http://localhost/\">\
|
||||
</head><body>Hello, World!</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
@@ -98,18 +98,18 @@ mod passing {
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain newly added base URL
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head>\
|
||||
<base href=\"\">\
|
||||
</head><body>Hello, World!</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
}
|
||||
@@ -14,21 +14,36 @@ mod passing {
|
||||
use std::process::{Command, Stdio};
|
||||
use url::Url;
|
||||
|
||||
#[test]
|
||||
fn print_help_information() {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let out = cmd.arg("-h").output().unwrap();
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain program name, version, and usage information
|
||||
// TODO
|
||||
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn print_version() {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let out = cmd.arg("-V").output().unwrap();
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain program name and version
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
format!("{} {}\n", env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"))
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
@@ -46,38 +61,38 @@ mod passing {
|
||||
cmd.stdin(echo_out);
|
||||
let out = cmd.arg("-M").arg("-").output().unwrap();
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain HTML created out of STDIN
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head></head><body>Hello from STDIN\n</body></html>\n"
|
||||
);
|
||||
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn css_import_string() {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let path_html: &Path = Path::new("src/tests/data/css/index.html");
|
||||
let path_css: &Path = Path::new("src/tests/data/css/style.css");
|
||||
let path_html: &Path = Path::new("tests/_data_/css/index.html");
|
||||
let path_css: &Path = Path::new("tests/_data_/css/style.css");
|
||||
|
||||
assert!(path_html.is_file());
|
||||
assert!(path_css.is_file());
|
||||
|
||||
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
|
||||
|
||||
// STDOUT should contain embedded CSS url()'s
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head><style>\n\n @charset \"UTF-8\";\n\n @import \"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\";\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n</style>\n</head><body></body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should list files that got retrieved
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_css}\n \
|
||||
{file_url_css}\n \
|
||||
{file_url_html}\n\
|
||||
{file_url_css}\n\
|
||||
{file_url_css}\n\
|
||||
{file_url_css}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||
@@ -85,7 +100,13 @@ mod passing {
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
// STDOUT should contain embedded CSS url()'s
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head><style>\n\n @charset \"UTF-8\";\n\n @import \"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\";\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n @import url(\"data:text/css;base64,Ym9keXtiYWNrZ3JvdW5kLWNvbG9yOiMwMDA7Y29sb3I6I2ZmZn0K\");\n\n</style>\n</head><body></body></html>\n"
|
||||
);
|
||||
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
}
|
||||
@@ -108,16 +129,16 @@ mod failing {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let out = cmd.arg("").output().unwrap();
|
||||
|
||||
// STDOUT should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
|
||||
|
||||
// STDERR should contain error description
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
"No target specified\n"
|
||||
);
|
||||
|
||||
// The exit code should be 1
|
||||
// STDOUT should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stdout), "");
|
||||
|
||||
// Exit code should be 1
|
||||
out.assert().code(1);
|
||||
}
|
||||
}
|
||||
@@ -11,6 +11,8 @@ mod passing {
|
||||
use std::env;
|
||||
use std::process::Command;
|
||||
|
||||
use monolith::url::EMPTY_IMAGE_DATA_URL;
|
||||
|
||||
#[test]
|
||||
fn isolate_data_url() {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
@@ -21,18 +23,18 @@ mod passing {
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain isolated HTML
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:;\"></meta>\
|
||||
</head><body>Hello, World!</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
@@ -46,19 +48,19 @@ mod passing {
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain HTML with no CSS
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"style-src 'none';\"></meta>\
|
||||
<style></style>\
|
||||
</head><body>Hello</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
@@ -72,19 +74,19 @@ mod passing {
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain HTML with no web fonts
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"font-src 'none';\"></meta>\
|
||||
<style></style>\
|
||||
</head><body>Hi</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
@@ -98,18 +100,18 @@ mod passing {
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain HTML with no iframes
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"frame-src 'none'; child-src 'none';\"></meta>\
|
||||
</head><body><iframe src=\"\"></iframe>Hi</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
@@ -123,9 +125,12 @@ mod passing {
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain HTML with no images
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
format!(
|
||||
"<html>\
|
||||
<head>\
|
||||
@@ -136,14 +141,11 @@ mod passing {
|
||||
Hi\
|
||||
</body>\
|
||||
</html>\n",
|
||||
empty_image = empty_image!()
|
||||
empty_image = EMPTY_IMAGE_DATA_URL,
|
||||
)
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
@@ -157,9 +159,12 @@ mod passing {
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain HTML with no JS
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"script-src 'none';\"></meta>\
|
||||
@@ -168,10 +173,7 @@ mod passing {
|
||||
</html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
}
|
||||
@@ -194,17 +196,14 @@ mod failing {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap();
|
||||
|
||||
// STDOUT should contain HTML
|
||||
assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), "");
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDERR should contain error description
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
"Unsupported data URL media type\n"
|
||||
);
|
||||
// STDOUT should contain text
|
||||
assert_eq!(String::from_utf8_lossy(&out.stdout), "Hello, World!\n");
|
||||
|
||||
// The exit code should be 1
|
||||
out.assert().code(1);
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -216,16 +215,16 @@ mod failing {
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no JS in it
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain HTML without contents of local JS file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head><script src=\"data:application/javascript;base64,\"></script></head><body></body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
}
|
||||
@@ -10,29 +10,49 @@ mod passing {
|
||||
use assert_cmd::prelude::*;
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::path::Path;
|
||||
use std::path::{Path, MAIN_SEPARATOR};
|
||||
use std::process::Command;
|
||||
use url::Url;
|
||||
|
||||
use monolith::url::EMPTY_IMAGE_DATA_URL;
|
||||
|
||||
#[test]
|
||||
fn local_file_target_input_relative_target_path() {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let cwd_normalized: String =
|
||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||
let cwd_normalized: String = env::current_dir()
|
||||
.unwrap()
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.replace("\\", "/");
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg(if cfg!(windows) {
|
||||
"src\\tests\\data\\basic\\local-file.html"
|
||||
} else {
|
||||
"src/tests/data/basic/local-file.html"
|
||||
})
|
||||
.arg(format!(
|
||||
"tests{s}_data_{s}basic{s}local-file.html",
|
||||
s = MAIN_SEPARATOR
|
||||
))
|
||||
.output()
|
||||
.unwrap();
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
|
||||
// STDERR should contain list of retrieved file URLs, two missing
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"\
|
||||
{file}{cwd}/tests/_data_/basic/local-file.html\n\
|
||||
{file}{cwd}/tests/_data_/basic/local-style.css\n\
|
||||
{file}{cwd}/tests/_data_/basic/local-style-does-not-exist.css (not found)\n\
|
||||
{file}{cwd}/tests/_data_/basic/monolith.png (not found)\n\
|
||||
{file}{cwd}/tests/_data_/basic/local-script.js\n\
|
||||
",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized
|
||||
)
|
||||
);
|
||||
|
||||
// STDOUT should contain HTML from the local file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"\
|
||||
<!DOCTYPE html><html lang=\"en\"><head>\n \
|
||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||
@@ -47,30 +67,14 @@ mod passing {
|
||||
"
|
||||
);
|
||||
|
||||
// STDERR should contain list of retrieved file URLs, two missing
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
{file}{cwd}/src/tests/data/basic/local-file.html\n \
|
||||
{file}{cwd}/src/tests/data/basic/local-style.css\n \
|
||||
{file}{cwd}/src/tests/data/basic/local-style-does-not-exist.css (not found)\n \
|
||||
{file}{cwd}/src/tests/data/basic/monolith.png (not found)\n \
|
||||
{file}{cwd}/src/tests/data/basic/local-script.js\n\
|
||||
",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_file_target_input_absolute_target_path() {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let path_html: &Path = Path::new("src/tests/data/basic/local-file.html");
|
||||
let path_html: &Path = Path::new("tests/_data_/basic/local-file.html");
|
||||
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
@@ -79,13 +83,22 @@ mod passing {
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDERR should contain only the target file
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"{file_url_html}\n",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||
)
|
||||
);
|
||||
|
||||
// STDOUT should contain HTML from the local file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
format!(
|
||||
"\
|
||||
<!DOCTYPE html><html lang=\"en\"><head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; script-src 'none'; img-src data:;\"></meta>\n \
|
||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">\n \
|
||||
<title>Local HTML file</title>\n \
|
||||
<link rel=\"stylesheet\" type=\"text/css\">\n \
|
||||
@@ -96,51 +109,47 @@ mod passing {
|
||||
<script></script>\n\n\n\n\
|
||||
</body></html>\n\
|
||||
",
|
||||
empty_image = empty_image!()
|
||||
empty_image = EMPTY_IMAGE_DATA_URL
|
||||
)
|
||||
);
|
||||
|
||||
// STDERR should contain only the target file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"{file_url_html}\n",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn local_file_url_target_input() {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let cwd_normalized: String =
|
||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||
let cwd_normalized: String = env::current_dir()
|
||||
.unwrap()
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.replace("\\", "/");
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-cji")
|
||||
.arg(if cfg!(windows) {
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-file.html",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-file.html",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
})
|
||||
.arg(format!(
|
||||
"{file}{cwd}/tests/_data_/basic/local-file.html",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
))
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDERR should contain list of retrieved file URLs
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"{file}{cwd}/tests/_data_/basic/local-file.html\n",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
);
|
||||
|
||||
// STDOUT should contain HTML from the local file
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
format!(
|
||||
"\
|
||||
<!DOCTYPE html><html lang=\"en\"><head>\
|
||||
@@ -155,44 +164,28 @@ mod passing {
|
||||
<script></script>\n\n\n\n\
|
||||
</body></html>\n\
|
||||
",
|
||||
empty_image = empty_image!()
|
||||
empty_image = EMPTY_IMAGE_DATA_URL
|
||||
)
|
||||
);
|
||||
|
||||
// STDERR should contain list of retrieved file URLs
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/basic/local-file.html\n",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn embed_file_url_local_asset_within_style_attribute() {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let path_html: &Path = Path::new("src/tests/data/svg/index.html");
|
||||
let path_svg: &Path = Path::new("src/tests/data/svg/image.svg");
|
||||
let path_html: &Path = Path::new("tests/_data_/svg/index.html");
|
||||
let path_svg: &Path = Path::new("tests/_data_/svg/image.svg");
|
||||
|
||||
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
|
||||
|
||||
// STDOUT should contain HTML with date URL for background-image in it
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head></head><body><div style=\"background-image: url("data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=")\"></div>\n</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should list files that got retrieved
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_html}\n\
|
||||
{file_url_svg}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||
@@ -200,28 +193,37 @@ mod passing {
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
// STDOUT should contain HTML with date URL for background-image in it
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head></head><body><div style=\"background-image: url("data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=")\"></div>\n</body></html>\n"
|
||||
);
|
||||
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn discard_integrity_for_local_files() {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let cwd_normalized: String =
|
||||
str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/");
|
||||
let cwd_normalized: String = env::current_dir()
|
||||
.unwrap()
|
||||
.to_str()
|
||||
.unwrap()
|
||||
.replace("\\", "/");
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-i")
|
||||
.arg(if cfg!(windows) {
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/integrity/index.html",
|
||||
"{file}{cwd}/tests/_data_/integrity/index.html",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
} else {
|
||||
format!(
|
||||
"{file}{cwd}/src/tests/data/integrity/index.html",
|
||||
"{file}{cwd}/tests/_data_/integrity/index.html",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
@@ -229,9 +231,25 @@ mod passing {
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDERR should contain list of retrieved file URLs
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"\
|
||||
{file}{cwd}/tests/_data_/integrity/index.html\n\
|
||||
{file}{cwd}/tests/_data_/integrity/style.css\n\
|
||||
{file}{cwd}/tests/_data_/integrity/style.css\n\
|
||||
{file}{cwd}/tests/_data_/integrity/script.js\n\
|
||||
{file}{cwd}/tests/_data_/integrity/script.js\n\
|
||||
",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
);
|
||||
|
||||
// STDOUT should contain HTML from the local file; integrity attributes should be missing
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
format!(
|
||||
"\
|
||||
<!DOCTYPE html><html lang=\"en\"><head>\
|
||||
@@ -247,23 +265,7 @@ mod passing {
|
||||
)
|
||||
);
|
||||
|
||||
// STDERR should contain list of retrieved file URLs
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
{file}{cwd}/src/tests/data/integrity/index.html\n \
|
||||
{file}{cwd}/src/tests/data/integrity/style.css\n \
|
||||
{file}{cwd}/src/tests/data/integrity/style.css\n \
|
||||
{file}{cwd}/src/tests/data/integrity/script.js\n \
|
||||
{file}{cwd}/src/tests/data/integrity/script.js\n\
|
||||
",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
}
|
||||
@@ -17,23 +17,17 @@ mod passing {
|
||||
#[test]
|
||||
fn parse_noscript_contents() {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let path_html: &Path = Path::new("src/tests/data/noscript/index.html");
|
||||
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
|
||||
let path_html: &Path = Path::new("tests/_data_/noscript/index.html");
|
||||
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
|
||||
|
||||
let out = cmd.arg("-M").arg(path_html.as_os_str()).output().unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no CSS
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head></head><body><noscript><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"></noscript>\n</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should contain target HTML and embedded SVG files
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_html}\n\
|
||||
{file_url_svg}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||
@@ -41,30 +35,30 @@ mod passing {
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
// STDOUT should contain HTML with no CSS
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head></head><body><noscript><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"></noscript>\n</body></html>\n"
|
||||
);
|
||||
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unwrap_noscript_contents() {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let path_html: &Path = Path::new("src/tests/data/noscript/index.html");
|
||||
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
|
||||
let path_html: &Path = Path::new("tests/_data_/noscript/index.html");
|
||||
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
|
||||
|
||||
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no CSS
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head></head><body><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript-->\n</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should contain target HTML and embedded SVG files
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_html}\n\
|
||||
{file_url_svg}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||
@@ -72,30 +66,30 @@ mod passing {
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
// STDOUT should contain HTML with no CSS
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head></head><body><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript-->\n</body></html>\n"
|
||||
);
|
||||
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unwrap_noscript_contents_nested() {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let path_html: &Path = Path::new("src/tests/data/noscript/nested.html");
|
||||
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
|
||||
let path_html: &Path = Path::new("tests/_data_/noscript/nested.html");
|
||||
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
|
||||
|
||||
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
|
||||
|
||||
// STDOUT should contain HTML with no CSS
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
"<html><head></head><body><!--noscript--><h1>JS is not active</h1><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript--><!--/noscript-->\n</body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should contain target HTML and embedded SVG files
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_html}\n\
|
||||
{file_url_svg}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||
@@ -103,21 +97,40 @@ mod passing {
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
// STDOUT should contain HTML with no CSS
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head></head><body><!--noscript--><h1>JS is not active</h1><!--noscript--><img src=\"data:image/svg+xml;base64,PHN2ZyB2ZXJzaW9uPSIxLjEiIGJhc2VQcm9maWxlPSJmdWxsIiB3aWR0aD0iMzAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj4KICAgIDxyZWN0IHdpZHRoPSIxMDAlIiBoZWlnaHQ9IjEwMCUiIGZpbGw9InJlZCIgLz4KICAgIDxjaXJjbGUgY3g9IjE1MCIgY3k9IjEwMCIgcj0iODAiIGZpbGw9ImdyZWVuIiAvPgogICAgPHRleHQgeD0iMTUwIiB5PSIxMjUiIGZvbnQtc2l6ZT0iNjAiIHRleHQtYW5jaG9yPSJtaWRkbGUiIGZpbGw9IndoaXRlIj5TVkc8L3RleHQ+Cjwvc3ZnPgo=\"><!--/noscript--><!--/noscript-->\n</body></html>\n"
|
||||
);
|
||||
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unwrap_noscript_contents_with_script() {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let path_html: &Path = Path::new("src/tests/data/noscript/script.html");
|
||||
let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg");
|
||||
let path_html: &Path = Path::new("tests/_data_/noscript/script.html");
|
||||
let path_svg: &Path = Path::new("tests/_data_/noscript/image.svg");
|
||||
|
||||
let out = cmd.arg("-Mn").arg(path_html.as_os_str()).output().unwrap();
|
||||
|
||||
// STDERR should contain target HTML and embedded SVG files
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n\
|
||||
{file_url_svg}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
|
||||
)
|
||||
);
|
||||
|
||||
// STDOUT should contain HTML with no CSS
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html>\
|
||||
<head></head>\
|
||||
<body>\
|
||||
@@ -128,20 +141,7 @@ mod passing {
|
||||
</html>\n"
|
||||
);
|
||||
|
||||
// STDERR should contain target HTML and embedded SVG files
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stderr).unwrap(),
|
||||
format!(
|
||||
"\
|
||||
{file_url_html}\n \
|
||||
{file_url_svg}\n\
|
||||
",
|
||||
file_url_html = Url::from_file_path(fs::canonicalize(&path_html).unwrap()).unwrap(),
|
||||
file_url_svg = Url::from_file_path(fs::canonicalize(&path_svg).unwrap()).unwrap(),
|
||||
)
|
||||
);
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
@@ -155,16 +155,16 @@ mod passing {
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain unwrapped contents of NOSCRIPT element
|
||||
assert_eq!(
|
||||
std::str::from_utf8(&out.stdout).unwrap(),
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html><head><!--noscript class=\"\"-->test<!--/noscript--></head><body></body></html>\n"
|
||||
);
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), "");
|
||||
|
||||
// The exit code should be 0
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
}
|
||||
239
tests/cli/unusual_encodings.rs
Normal file
239
tests/cli/unusual_encodings.rs
Normal file
@@ -0,0 +1,239 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use assert_cmd::prelude::*;
|
||||
use encoding_rs::Encoding;
|
||||
use std::env;
|
||||
use std::path::MAIN_SEPARATOR;
|
||||
use std::process::{Command, Stdio};
|
||||
|
||||
#[test]
|
||||
fn properly_save_document_with_gb2312() {
|
||||
let cwd = env::current_dir().unwrap();
|
||||
let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/");
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg(format!(
|
||||
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
|
||||
s = MAIN_SEPARATOR
|
||||
))
|
||||
.output()
|
||||
.unwrap();
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
|
||||
// STDERR should contain only the target file
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"{file}{cwd}/tests/_data_/unusual_encodings/gb2312.html\n",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
);
|
||||
|
||||
// STDOUT should contain original document without any modifications
|
||||
let s: String;
|
||||
if let Some(encoding) = Encoding::for_label(b"gb2312") {
|
||||
let (string, _, _) = encoding.decode(&out.stdout);
|
||||
s = string.to_string();
|
||||
} else {
|
||||
s = String::from_utf8_lossy(&out.stdout).to_string();
|
||||
}
|
||||
assert_eq!(
|
||||
s,
|
||||
"<html>\
|
||||
<head>\n \
|
||||
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\">\n \
|
||||
<title>近七成人减少线下需求\u{3000}银行数字化转型提速--经济·科技--人民网 </title>\n\
|
||||
</head>\n\
|
||||
<body>\n \
|
||||
<h1>近七成人减少线下需求\u{3000}银行数字化转型提速</h1>\n\n\n\
|
||||
</body>\
|
||||
</html>\n"
|
||||
);
|
||||
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn properly_save_document_with_gb2312_from_stdin() {
|
||||
let mut echo = Command::new("cat")
|
||||
.arg(format!(
|
||||
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
|
||||
s = MAIN_SEPARATOR
|
||||
))
|
||||
.stdout(Stdio::piped())
|
||||
.spawn()
|
||||
.unwrap();
|
||||
let echo_out = echo.stdout.take().unwrap();
|
||||
echo.wait().unwrap();
|
||||
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
cmd.stdin(echo_out);
|
||||
let out = cmd.arg("-M").arg("-").output().unwrap();
|
||||
|
||||
// STDERR should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stderr), "");
|
||||
|
||||
// STDOUT should contain HTML created out of STDIN
|
||||
let s: String;
|
||||
if let Some(encoding) = Encoding::for_label(b"gb2312") {
|
||||
let (string, _, _) = encoding.decode(&out.stdout);
|
||||
s = string.to_string();
|
||||
} else {
|
||||
s = String::from_utf8_lossy(&out.stdout).to_string();
|
||||
}
|
||||
assert_eq!(
|
||||
s,
|
||||
"<html>\
|
||||
<head>\n \
|
||||
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\">\n \
|
||||
<title>近七成人减少线下需求\u{3000}银行数字化转型提速--经济·科技--人民网 </title>\n\
|
||||
</head>\n\
|
||||
<body>\n \
|
||||
<h1>近七成人减少线下需求\u{3000}银行数字化转型提速</h1>\n\n\n\
|
||||
</body>\
|
||||
</html>\n"
|
||||
);
|
||||
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn properly_save_document_with_gb2312_custom_charset() {
|
||||
let cwd = env::current_dir().unwrap();
|
||||
let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/");
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-E")
|
||||
.arg("utf8")
|
||||
.arg(format!(
|
||||
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
|
||||
s = MAIN_SEPARATOR
|
||||
))
|
||||
.output()
|
||||
.unwrap();
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
|
||||
// STDERR should contain only the target file
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"{file}{cwd}/tests/_data_/unusual_encodings/gb2312.html\n",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
);
|
||||
|
||||
// STDOUT should contain original document without any modifications
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stdout).to_string(),
|
||||
"<html>\
|
||||
<head>\n \
|
||||
<meta http-equiv=\"content-type\" content=\"text/html;charset=utf8\">\n \
|
||||
<title>近七成人减少线下需求\u{3000}银行数字化转型提速--经济·科技--人民网 </title>\n\
|
||||
</head>\n\
|
||||
<body>\n \
|
||||
<h1>近七成人减少线下需求\u{3000}银行数字化转型提速</h1>\n\n\n\
|
||||
</body>\
|
||||
</html>\n"
|
||||
);
|
||||
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn properly_save_document_with_gb2312_custom_charset_bad() {
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg("-E")
|
||||
.arg("utf0")
|
||||
.arg(format!(
|
||||
"tests{s}_data_{s}unusual_encodings{s}gb2312.html",
|
||||
s = MAIN_SEPARATOR
|
||||
))
|
||||
.output()
|
||||
.unwrap();
|
||||
|
||||
// STDERR should contain error message
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
"Unknown encoding: utf0\n"
|
||||
);
|
||||
|
||||
// STDOUT should be empty
|
||||
assert_eq!(String::from_utf8_lossy(&out.stdout).to_string(), "");
|
||||
|
||||
// Exit code should be 1
|
||||
out.assert().code(1);
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use assert_cmd::prelude::*;
|
||||
use std::env;
|
||||
use std::path::MAIN_SEPARATOR;
|
||||
use std::process::Command;
|
||||
|
||||
#[test]
|
||||
fn change_iso88591_to_utf8_to_properly_display_html_entities() {
|
||||
let cwd = env::current_dir().unwrap();
|
||||
let cwd_normalized: String = cwd.to_str().unwrap().replace("\\", "/");
|
||||
let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap();
|
||||
let out = cmd
|
||||
.arg("-M")
|
||||
.arg(format!(
|
||||
"tests{s}_data_{s}unusual_encodings{s}iso-8859-1.html",
|
||||
s = MAIN_SEPARATOR
|
||||
))
|
||||
.output()
|
||||
.unwrap();
|
||||
let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" };
|
||||
|
||||
// STDERR should contain only the target file
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stderr),
|
||||
format!(
|
||||
"{file}{cwd}/tests/_data_/unusual_encodings/iso-8859-1.html\n",
|
||||
file = file_url_protocol,
|
||||
cwd = cwd_normalized,
|
||||
)
|
||||
);
|
||||
|
||||
// STDOUT should contain original document but with UTF-8 charset
|
||||
assert_eq!(
|
||||
String::from_utf8_lossy(&out.stdout),
|
||||
"<html>\
|
||||
<head>\n \
|
||||
<meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">\n \
|
||||
</head>\n \
|
||||
<body>\n \
|
||||
<20> Some Company\n \
|
||||
\n\n</body>\
|
||||
</html>\n"
|
||||
);
|
||||
|
||||
// Exit code should be 0
|
||||
out.assert().code(0);
|
||||
}
|
||||
}
|
||||
68
tests/cookies/cookie/is_expired.rs
Normal file
68
tests/cookies/cookie/is_expired.rs
Normal file
@@ -0,0 +1,68 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn never_expires() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
|
||||
assert!(!cookie.is_expired());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn expires_long_from_now() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 9999999999,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
|
||||
assert!(!cookie.is_expired());
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn expired() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 1,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
|
||||
assert!(cookie.is_expired());
|
||||
}
|
||||
}
|
||||
107
tests/cookies/cookie/matches_url.rs
Normal file
107
tests/cookies/cookie/matches_url.rs
Normal file
@@ -0,0 +1,107 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn secure_url() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: true,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(cookie.matches_url("https://127.0.0.1/something"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn non_secure_url() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(cookie.matches_url("http://127.0.0.1/something"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn subdomain() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from(".somethingsomething.com"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: true,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(cookie.matches_url("https://cdn.somethingsomething.com/something"));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn empty_url() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(!cookie.matches_url(""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrong_hostname() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: true,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(!cookie.matches_url("http://0.0.0.0/"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrong_path() {
|
||||
let cookie = cookies::Cookie {
|
||||
domain: String::from("127.0.0.1"),
|
||||
include_subdomains: false,
|
||||
path: String::from("/"),
|
||||
https_only: false,
|
||||
expires: 0,
|
||||
name: String::from(""),
|
||||
value: String::from(""),
|
||||
};
|
||||
assert!(!cookie.matches_url("http://0.0.0.0/path"));
|
||||
}
|
||||
}
|
||||
2
tests/cookies/cookie/mod.rs
Normal file
2
tests/cookies/cookie/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
mod is_expired;
|
||||
mod matches_url;
|
||||
2
tests/cookies/mod.rs
Normal file
2
tests/cookies/mod.rs
Normal file
@@ -0,0 +1,2 @@
|
||||
mod cookie;
|
||||
mod parse_cookie_file_contents;
|
||||
87
tests/cookies/parse_cookie_file_contents.rs
Normal file
87
tests/cookies/parse_cookie_file_contents.rs
Normal file
@@ -0,0 +1,87 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn parse_file() {
|
||||
let file_contents =
|
||||
"# Netscape HTTP Cookie File\n127.0.0.1\tFALSE\t/\tFALSE\t0\tUSER_TOKEN\tin";
|
||||
let result = cookies::parse_cookie_file_contents(&file_contents).unwrap();
|
||||
assert_eq!(result.len(), 1);
|
||||
assert_eq!(result[0].domain, "127.0.0.1");
|
||||
assert_eq!(result[0].include_subdomains, false);
|
||||
assert_eq!(result[0].path, "/");
|
||||
assert_eq!(result[0].https_only, false);
|
||||
assert_eq!(result[0].expires, 0);
|
||||
assert_eq!(result[0].name, "USER_TOKEN");
|
||||
assert_eq!(result[0].value, "in");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_multiline_file() {
|
||||
let file_contents = "# HTTP Cookie File\n127.0.0.1\tFALSE\t/\tFALSE\t0\tUSER_TOKEN\tin\n127.0.0.1\tTRUE\t/\tTRUE\t9\tUSER_TOKEN\tout\n\n";
|
||||
let result = cookies::parse_cookie_file_contents(&file_contents).unwrap();
|
||||
assert_eq!(result.len(), 2);
|
||||
assert_eq!(result[0].domain, "127.0.0.1");
|
||||
assert_eq!(result[0].include_subdomains, false);
|
||||
assert_eq!(result[0].path, "/");
|
||||
assert_eq!(result[0].https_only, false);
|
||||
assert_eq!(result[0].expires, 0);
|
||||
assert_eq!(result[0].name, "USER_TOKEN");
|
||||
assert_eq!(result[0].value, "in");
|
||||
assert_eq!(result[1].domain, "127.0.0.1");
|
||||
assert_eq!(result[1].include_subdomains, true);
|
||||
assert_eq!(result[1].path, "/");
|
||||
assert_eq!(result[1].https_only, true);
|
||||
assert_eq!(result[1].expires, 9);
|
||||
assert_eq!(result[1].name, "USER_TOKEN");
|
||||
assert_eq!(result[1].value, "out");
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use monolith::cookies;
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
let file_contents = "";
|
||||
let result = cookies::parse_cookie_file_contents(&file_contents).unwrap();
|
||||
assert_eq!(result.len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_header() {
|
||||
let file_contents = "127.0.0.1 FALSE / FALSE 0 USER_TOKEN in";
|
||||
match cookies::parse_cookie_file_contents(&file_contents) {
|
||||
Ok(_result) => {
|
||||
assert!(false);
|
||||
}
|
||||
Err(_e) => {
|
||||
assert!(true);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spaces_instead_of_tabs() {
|
||||
let file_contents =
|
||||
"# HTTP Cookie File\n127.0.0.1 FALSE / FALSE 0 USER_TOKEN in";
|
||||
let result = cookies::parse_cookie_file_contents(&file_contents).unwrap();
|
||||
assert_eq!(result.len(), 0);
|
||||
}
|
||||
}
|
||||
@@ -11,8 +11,9 @@ mod passing {
|
||||
use reqwest::Url;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::css;
|
||||
use crate::opts::Options;
|
||||
use monolith::css;
|
||||
use monolith::opts::Options;
|
||||
use monolith::url::EMPTY_IMAGE_DATA_URL;
|
||||
|
||||
#[test]
|
||||
fn empty_input() {
|
||||
@@ -22,7 +23,7 @@ mod passing {
|
||||
let options = Options::default();
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, "", &options, 0),
|
||||
css::embed_css(cache, &client, &document_url, "", &options),
|
||||
""
|
||||
);
|
||||
}
|
||||
@@ -35,7 +36,7 @@ mod passing {
|
||||
let options = Options::default();
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, "\t \t ", &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, "\t \t ", &options),
|
||||
""
|
||||
);
|
||||
}
|
||||
@@ -58,7 +59,7 @@ mod passing {
|
||||
height: calc(100vh - 10pt)";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &STYLE, &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, &STYLE, &options),
|
||||
format!(
|
||||
"/* border: none;*/\
|
||||
background-image: url(\"{empty_image}\"); \
|
||||
@@ -67,7 +68,7 @@ mod passing {
|
||||
margin-top: -20px; \
|
||||
line-height: -1; \
|
||||
height: calc(100vh - 10pt)",
|
||||
empty_image = empty_image!()
|
||||
empty_image = EMPTY_IMAGE_DATA_URL
|
||||
)
|
||||
);
|
||||
}
|
||||
@@ -90,7 +91,7 @@ mod passing {
|
||||
height: calc(100vh - 10pt)";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &STYLE, &options, 0),
|
||||
css::embed_css(cache, &client, &document_url, &STYLE, &options),
|
||||
format!(
|
||||
"/* border: none;*/\
|
||||
background-image: url(\"{empty_image}\"); \
|
||||
@@ -99,7 +100,7 @@ mod passing {
|
||||
margin-top: -20px; \
|
||||
line-height: -1; \
|
||||
height: calc(100vh - 10pt)",
|
||||
empty_image = empty_image!()
|
||||
empty_image = EMPTY_IMAGE_DATA_URL
|
||||
)
|
||||
);
|
||||
}
|
||||
@@ -121,7 +122,7 @@ mod passing {
|
||||
html > body {}";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
@@ -165,7 +166,7 @@ mod passing {
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
@@ -187,7 +188,7 @@ mod passing {
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
"\
|
||||
@charset \"UTF-8\";\n\
|
||||
\n\
|
||||
@@ -217,7 +218,7 @@ mod passing {
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
@@ -239,7 +240,7 @@ mod passing {
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
@@ -263,7 +264,7 @@ mod passing {
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
CSS
|
||||
);
|
||||
}
|
||||
@@ -311,7 +312,7 @@ mod passing {
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
CSS_OUT
|
||||
);
|
||||
}
|
||||
@@ -331,12 +332,12 @@ mod passing {
|
||||
";
|
||||
const CSS_OUT: &str = "\
|
||||
#language a[href=\"#translations\"]:before {\n\
|
||||
content: url(\"data:;base64,\") \"\\a \";\n\
|
||||
content: url(\"data:text/plain;base64,\") \"\\a \";\n\
|
||||
white-space: pre }\n\
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
CSS_OUT
|
||||
);
|
||||
}
|
||||
@@ -363,7 +364,7 @@ mod passing {
|
||||
";
|
||||
|
||||
assert_eq!(
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options, 0,),
|
||||
css::embed_css(cache, &client, &document_url, &CSS, &options),
|
||||
CSS_OUT
|
||||
);
|
||||
}
|
||||
@@ -7,20 +7,20 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::css;
|
||||
use monolith::css;
|
||||
|
||||
#[test]
|
||||
fn backrgound() {
|
||||
fn background() {
|
||||
assert!(css::is_image_url_prop("background"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backrgound_image() {
|
||||
fn background_image() {
|
||||
assert!(css::is_image_url_prop("background-image"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backrgound_image_uppercase() {
|
||||
fn background_image_uppercase() {
|
||||
assert!(css::is_image_url_prop("BACKGROUND-IMAGE"));
|
||||
}
|
||||
|
||||
@@ -64,7 +64,7 @@ mod passing {
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::css;
|
||||
use monolith::css;
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
@@ -8,18 +8,24 @@
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use html5ever::serialize::{serialize, SerializeOpts};
|
||||
use markup5ever_rcdom::SerializableHandle;
|
||||
|
||||
use crate::html;
|
||||
use monolith::html;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let html = "<div>text</div>";
|
||||
let mut dom = html::html_to_dom(&html);
|
||||
let mut dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
|
||||
dom = html::add_favicon(&dom.document, "I_AM_A_FAVICON_DATA_URL".to_string());
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
use monolith::html;
|
||||
|
||||
#[test]
|
||||
fn empty_input_sha256() {
|
||||
@@ -51,7 +51,7 @@ mod passing {
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::html;
|
||||
use monolith::html;
|
||||
|
||||
#[test]
|
||||
fn empty_hash() {
|
||||
@@ -7,8 +7,8 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
use crate::opts::Options;
|
||||
use monolith::html;
|
||||
use monolith::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn isolated() {
|
||||
@@ -16,7 +16,10 @@ mod passing {
|
||||
options.isolate = true;
|
||||
let csp_content = html::compose_csp(&options);
|
||||
|
||||
assert_eq!(csp_content, "default-src 'unsafe-inline' data:;");
|
||||
assert_eq!(
|
||||
csp_content,
|
||||
"default-src 'unsafe-eval' 'unsafe-inline' data:;"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -75,6 +78,6 @@ mod passing {
|
||||
options.no_images = true;
|
||||
let csp_content = html::compose_csp(&options);
|
||||
|
||||
assert_eq!(csp_content, "default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;");
|
||||
assert_eq!(csp_content, "default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;");
|
||||
}
|
||||
}
|
||||
@@ -10,7 +10,7 @@ mod passing {
|
||||
use chrono::prelude::*;
|
||||
use reqwest::Url;
|
||||
|
||||
use crate::html;
|
||||
use monolith::html;
|
||||
|
||||
#[test]
|
||||
fn http_url() {
|
||||
@@ -11,8 +11,9 @@ mod passing {
|
||||
use reqwest::Url;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::html;
|
||||
use crate::opts::Options;
|
||||
use monolith::html;
|
||||
use monolith::opts::Options;
|
||||
use monolith::url::EMPTY_IMAGE_DATA_URL;
|
||||
|
||||
#[test]
|
||||
fn small_medium_large() {
|
||||
@@ -28,16 +29,13 @@ mod passing {
|
||||
&Url::parse("data:,").unwrap(),
|
||||
&srcset_value,
|
||||
&options,
|
||||
0,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
embedded_css,
|
||||
format!(
|
||||
"{} 1x, {} 1.5x, {} 2x",
|
||||
empty_image!(),
|
||||
empty_image!(),
|
||||
empty_image!(),
|
||||
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL,
|
||||
),
|
||||
);
|
||||
}
|
||||
@@ -56,12 +54,11 @@ mod passing {
|
||||
&Url::parse("data:,").unwrap(),
|
||||
&srcset_value,
|
||||
&options,
|
||||
0,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
embedded_css,
|
||||
format!("{}, {} 1.5x", empty_image!(), empty_image!()),
|
||||
format!("{}, {} 1.5x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -79,12 +76,11 @@ mod passing {
|
||||
&Url::parse("data:,").unwrap(),
|
||||
&srcset_value,
|
||||
&options,
|
||||
0,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
embedded_css,
|
||||
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),
|
||||
format!("{} 1x, {} 2x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
|
||||
);
|
||||
}
|
||||
|
||||
@@ -102,16 +98,63 @@ mod passing {
|
||||
&Url::parse("data:,").unwrap(),
|
||||
&srcset_value,
|
||||
&options,
|
||||
0,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
embedded_css,
|
||||
format!(
|
||||
"{} 1x, {} 2x, {} 3x",
|
||||
empty_image!(),
|
||||
empty_image!(),
|
||||
empty_image!()
|
||||
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_whitespace_after_commas() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let srcset_value = "small,s.png 1x,medium,m.png 2x,large,l.png 3x";
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
let embedded_css = html::embed_srcset(
|
||||
cache,
|
||||
&client,
|
||||
&Url::parse("data:,").unwrap(),
|
||||
&srcset_value,
|
||||
&options,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
embedded_css,
|
||||
format!(
|
||||
"{} 1x, {} 2x, {} 3x",
|
||||
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn last_without_descriptor() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let srcset_value = "small,s.png 1x, medium,m.png 2x, large,l.png";
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
let embedded_css = html::embed_srcset(
|
||||
cache,
|
||||
&client,
|
||||
&Url::parse("data:,").unwrap(),
|
||||
&srcset_value,
|
||||
&options,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
embedded_css,
|
||||
format!(
|
||||
"{} 1x, {} 2x, {}",
|
||||
EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL
|
||||
),
|
||||
);
|
||||
}
|
||||
@@ -130,8 +173,9 @@ mod failing {
|
||||
use reqwest::Url;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::html;
|
||||
use crate::opts::Options;
|
||||
use monolith::html;
|
||||
use monolith::opts::Options;
|
||||
use monolith::url::EMPTY_IMAGE_DATA_URL;
|
||||
|
||||
#[test]
|
||||
fn trailing_comma() {
|
||||
@@ -147,12 +191,11 @@ mod failing {
|
||||
&Url::parse("data:,").unwrap(),
|
||||
&srcset_value,
|
||||
&options,
|
||||
0,
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
embedded_css,
|
||||
format!("{} 1x, {} 2x,", empty_image!(), empty_image!()),
|
||||
format!("{} 1x, {} 2x", EMPTY_IMAGE_DATA_URL, EMPTY_IMAGE_DATA_URL),
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
use monolith::html;
|
||||
|
||||
#[test]
|
||||
fn present() {
|
||||
@@ -19,11 +19,11 @@ mod passing {
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
|
||||
assert_eq!(
|
||||
html::get_base_url(&dom.document),
|
||||
Some(str!("https://musicbrainz.org"))
|
||||
Some("https://musicbrainz.org".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
@@ -38,11 +38,11 @@ mod passing {
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
|
||||
assert_eq!(
|
||||
html::get_base_url(&dom.document),
|
||||
Some(str!("https://www.discogs.com/"))
|
||||
Some("https://www.discogs.com/".to_string())
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -56,7 +56,7 @@ mod passing {
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::html;
|
||||
use monolith::html;
|
||||
|
||||
#[test]
|
||||
fn absent() {
|
||||
@@ -67,7 +67,7 @@ mod failing {
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
|
||||
assert_eq!(html::get_base_url(&dom.document), None);
|
||||
}
|
||||
@@ -82,7 +82,7 @@ mod failing {
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
|
||||
assert_eq!(html::get_base_url(&dom.document), None);
|
||||
}
|
||||
@@ -97,8 +97,8 @@ mod failing {
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
|
||||
assert_eq!(html::get_base_url(&dom.document), Some(str!()));
|
||||
assert_eq!(html::get_base_url(&dom.document), Some("".to_string()));
|
||||
}
|
||||
}
|
||||
72
tests/html/get_charset.rs
Normal file
72
tests/html/get_charset.rs
Normal file
@@ -0,0 +1,72 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use monolith::html;
|
||||
|
||||
#[test]
|
||||
fn meta_content_type() {
|
||||
let html = "<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
|
||||
assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn meta_charset() {
|
||||
let html = "<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset=\"GB2312\" />
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
|
||||
assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string()));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multiple_conflicting_meta_charset_first() {
|
||||
let html = "<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset=\"utf-8\" />
|
||||
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
|
||||
assert_eq!(html::get_charset(&dom.document), Some("utf-8".to_string()));
|
||||
}
|
||||
#[test]
|
||||
fn multiple_conflicting_meta_content_type_first() {
|
||||
let html = "<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv=\"content-type\" content=\"text/html;charset=GB2312\" />
|
||||
<meta charset=\"utf-8\" />
|
||||
</head>
|
||||
<body>
|
||||
</body>
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
|
||||
assert_eq!(html::get_charset(&dom.document), Some("GB2312".to_string()));
|
||||
}
|
||||
}
|
||||
@@ -7,14 +7,14 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use html5ever::rcdom::{Handle, NodeData};
|
||||
use markup5ever_rcdom::{Handle, NodeData};
|
||||
|
||||
use crate::html;
|
||||
use monolith::html;
|
||||
|
||||
#[test]
|
||||
fn div_two_style_attributes() {
|
||||
let html = "<!doctype html><html><head></head><body><DIV STYLE=\"color: blue;\" style=\"display: none;\"></div></body></html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let mut count = 0;
|
||||
|
||||
fn test_walk(node: &Handle, i: &mut i8) {
|
||||
@@ -35,7 +35,7 @@ mod passing {
|
||||
} else if node_name == "div" {
|
||||
assert_eq!(
|
||||
html::get_node_attr(node, "style"),
|
||||
Some(str!("color: blue;"))
|
||||
Some("color: blue;".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
@@ -7,14 +7,14 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use html5ever::rcdom::{Handle, NodeData};
|
||||
use markup5ever_rcdom::{Handle, NodeData};
|
||||
|
||||
use crate::html;
|
||||
use monolith::html;
|
||||
|
||||
#[test]
|
||||
fn parent_node_names() {
|
||||
let html = "<!doctype html><html><HEAD></HEAD><body><div><P></P></div></body></html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let mut count = 0;
|
||||
|
||||
fn test_walk(node: &Handle, i: &mut i8) {
|
||||
@@ -7,12 +7,12 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
use monolith::html;
|
||||
|
||||
#[test]
|
||||
fn icon() {
|
||||
let html = "<link rel=\"icon\" href=\"\" /><div>text</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let res: bool = html::has_favicon(&dom.document);
|
||||
|
||||
assert!(res);
|
||||
@@ -21,7 +21,7 @@ mod passing {
|
||||
#[test]
|
||||
fn shortcut_icon() {
|
||||
let html = "<link rel=\"shortcut icon\" href=\"\" /><div>text</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let res: bool = html::has_favicon(&dom.document);
|
||||
|
||||
assert!(res);
|
||||
@@ -37,12 +37,12 @@ mod passing {
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::html;
|
||||
use monolith::html;
|
||||
|
||||
#[test]
|
||||
fn absent() {
|
||||
let html = "<div>text</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let res: bool = html::has_favicon(&dom.document);
|
||||
|
||||
assert!(!res);
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
use monolith::html;
|
||||
|
||||
#[test]
|
||||
fn icon() {
|
||||
@@ -34,7 +34,7 @@ mod passing {
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::html;
|
||||
use monolith::html;
|
||||
|
||||
#[test]
|
||||
fn mask_icon() {
|
||||
@@ -4,10 +4,12 @@ mod compose_csp;
|
||||
mod create_metadata_tag;
|
||||
mod embed_srcset;
|
||||
mod get_base_url;
|
||||
mod get_charset;
|
||||
mod get_node_attr;
|
||||
mod get_node_name;
|
||||
mod has_favicon;
|
||||
mod is_icon;
|
||||
mod parse_link_type;
|
||||
mod serialize_document;
|
||||
mod set_node_attr;
|
||||
mod stringify_document;
|
||||
mod walk_and_embed_assets;
|
||||
58
tests/html/parse_link_type.rs
Normal file
58
tests/html/parse_link_type.rs
Normal file
@@ -0,0 +1,58 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use monolith::html;
|
||||
|
||||
#[test]
|
||||
fn icon() {
|
||||
assert!(html::parse_link_type("icon").contains(&html::LinkType::Icon));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shortcut_icon_capitalized() {
|
||||
assert!(html::parse_link_type("Shortcut Icon").contains(&html::LinkType::Icon));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stylesheet() {
|
||||
assert!(html::parse_link_type("stylesheet").contains(&html::LinkType::Stylesheet));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preload_stylesheet() {
|
||||
assert!(html::parse_link_type("preload stylesheet").contains(&html::LinkType::Stylesheet));
|
||||
}
|
||||
}
|
||||
|
||||
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
|
||||
// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝
|
||||
// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use monolith::html;
|
||||
|
||||
#[test]
|
||||
fn mask_icon() {
|
||||
assert!(html::parse_link_type("mask-icon").is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fluid_icon() {
|
||||
assert!(html::parse_link_type("fluid-icon").is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_string() {
|
||||
assert!(html::parse_link_type("").is_empty());
|
||||
}
|
||||
}
|
||||
@@ -7,17 +7,17 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::html;
|
||||
use crate::opts::Options;
|
||||
use monolith::html;
|
||||
use monolith::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn div_as_root_element() {
|
||||
let html = "<div><script src=\"some.js\"></script></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let options = Options::default();
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(&dom.document, &options),
|
||||
String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)),
|
||||
"<html><head></head><body><div><script src=\"some.js\"></script></div></body></html>"
|
||||
);
|
||||
}
|
||||
@@ -28,18 +28,19 @@ mod passing {
|
||||
<link rel=\"something\" href=\"some.css\" />\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
<div><script src=\"some.js\"></script></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let mut options = Options::default();
|
||||
options.isolate = true;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(
|
||||
&dom.document,
|
||||
String::from_utf8_lossy(&html::serialize_document(
|
||||
dom,
|
||||
"".to_string(),
|
||||
&options
|
||||
),
|
||||
)),
|
||||
"<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:;\"></meta>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:;\"></meta>\
|
||||
<title>Isolated document</title>\
|
||||
<link rel=\"something\" href=\"some.css\">\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
@@ -59,12 +60,12 @@ mod passing {
|
||||
<title>Unstyled document</title>\
|
||||
<link rel=\"stylesheet\" href=\"main.css\"/>\
|
||||
<div style=\"display: none;\"></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let mut options = Options::default();
|
||||
options.no_css = true;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(&dom.document, &options),
|
||||
String::from_utf8_lossy(&html::serialize_document(dom, "".to_string(), &options)),
|
||||
"<!DOCTYPE html>\
|
||||
<html>\
|
||||
<head>\
|
||||
@@ -83,15 +84,16 @@ mod passing {
|
||||
<title>Frameless document</title>\
|
||||
<link rel=\"something\"/>\
|
||||
<div><script src=\"some.js\"></script></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let mut options = Options::default();
|
||||
options.no_frames = true;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(
|
||||
&dom.document,
|
||||
String::from_utf8_lossy(&html::serialize_document(
|
||||
dom,
|
||||
"".to_string(),
|
||||
&options
|
||||
),
|
||||
)),
|
||||
"<!DOCTYPE html>\
|
||||
<html>\
|
||||
<head>\
|
||||
@@ -115,7 +117,7 @@ mod passing {
|
||||
<img style=\"width: 100%;\" src=\"some.png\" />\
|
||||
<iframe src=\"some.html\"></iframe>\
|
||||
</div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let mut options = Options::default();
|
||||
options.isolate = true;
|
||||
options.no_css = true;
|
||||
@@ -125,14 +127,15 @@ mod passing {
|
||||
options.no_images = true;
|
||||
|
||||
assert_eq!(
|
||||
html::stringify_document(
|
||||
&dom.document,
|
||||
String::from_utf8_lossy(&html::serialize_document(
|
||||
dom,
|
||||
"".to_string(),
|
||||
&options
|
||||
),
|
||||
)),
|
||||
"<!DOCTYPE html>\
|
||||
<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src 'unsafe-eval' 'unsafe-inline' data:; style-src 'none'; font-src 'none'; frame-src 'none'; child-src 'none'; script-src 'none'; img-src data:;\"></meta>\
|
||||
<title>no-frame no-css no-js no-image isolated document</title>\
|
||||
<meta http-equiv=\"Content-Security-Policy\" content=\"default-src https:\">\
|
||||
<link rel=\"stylesheet\" href=\"some.css\">\
|
||||
@@ -7,14 +7,14 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use html5ever::rcdom::{Handle, NodeData};
|
||||
use markup5ever_rcdom::{Handle, NodeData};
|
||||
|
||||
use crate::html;
|
||||
use monolith::html;
|
||||
|
||||
#[test]
|
||||
fn html_lang_and_body_style() {
|
||||
let html = "<!doctype html><html lang=\"en\"><head></head><body></body></html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let mut count = 0;
|
||||
|
||||
fn test_walk(node: &Handle, i: &mut i8) {
|
||||
@@ -31,23 +31,23 @@ mod passing {
|
||||
let node_name = name.local.as_ref().to_string();
|
||||
|
||||
if node_name == "html" {
|
||||
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("en")));
|
||||
assert_eq!(html::get_node_attr(node, "lang"), Some("en".to_string()));
|
||||
|
||||
html::set_node_attr(node, "lang", Some(str!("de")));
|
||||
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("de")));
|
||||
html::set_node_attr(node, "lang", Some("de".to_string()));
|
||||
assert_eq!(html::get_node_attr(node, "lang"), Some("de".to_string()));
|
||||
|
||||
html::set_node_attr(node, "lang", None);
|
||||
assert_eq!(html::get_node_attr(node, "lang"), None);
|
||||
|
||||
html::set_node_attr(node, "lang", Some(str!("")));
|
||||
assert_eq!(html::get_node_attr(node, "lang"), Some(str!("")));
|
||||
html::set_node_attr(node, "lang", Some("".to_string()));
|
||||
assert_eq!(html::get_node_attr(node, "lang"), Some("".to_string()));
|
||||
} else if node_name == "body" {
|
||||
assert_eq!(html::get_node_attr(node, "style"), None);
|
||||
|
||||
html::set_node_attr(node, "style", Some(str!("display: none;")));
|
||||
html::set_node_attr(node, "style", Some("display: none;".to_string()));
|
||||
assert_eq!(
|
||||
html::get_node_attr(node, "style"),
|
||||
Some(str!("display: none;"))
|
||||
Some("display: none;".to_string())
|
||||
);
|
||||
}
|
||||
|
||||
@@ -67,7 +67,7 @@ mod passing {
|
||||
#[test]
|
||||
fn body_background() {
|
||||
let html = "<!doctype html><html lang=\"en\"><head></head><body background=\"1\" background=\"2\"></body></html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let mut count = 0;
|
||||
|
||||
fn test_walk(node: &Handle, i: &mut i8) {
|
||||
@@ -84,7 +84,10 @@ mod passing {
|
||||
let node_name = name.local.as_ref().to_string();
|
||||
|
||||
if node_name == "body" {
|
||||
assert_eq!(html::get_node_attr(node, "background"), Some(str!("1")));
|
||||
assert_eq!(
|
||||
html::get_node_attr(node, "background"),
|
||||
Some("1".to_string())
|
||||
);
|
||||
|
||||
html::set_node_attr(node, "background", None);
|
||||
assert_eq!(html::get_node_attr(node, "background"), None);
|
||||
@@ -8,19 +8,21 @@
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use html5ever::serialize::{serialize, SerializeOpts};
|
||||
use markup5ever_rcdom::SerializableHandle;
|
||||
use reqwest::blocking::Client;
|
||||
use std::collections::HashMap;
|
||||
use url::Url;
|
||||
|
||||
use crate::html;
|
||||
use crate::opts::Options;
|
||||
use monolith::html;
|
||||
use monolith::opts::Options;
|
||||
use monolith::url::EMPTY_IMAGE_DATA_URL;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let html: &str = "<div><P></P></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let url: Url = Url::parse("http://localhost").unwrap();
|
||||
|
||||
let mut options = Options::default();
|
||||
@@ -28,10 +30,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -42,7 +49,7 @@ mod passing {
|
||||
#[test]
|
||||
fn ensure_no_recursive_iframe() {
|
||||
let html = "<div><P></P><iframe src=\"\"></iframe></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let url: Url = Url::parse("http://localhost").unwrap();
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
@@ -51,10 +58,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -65,7 +77,7 @@ mod passing {
|
||||
#[test]
|
||||
fn ensure_no_recursive_frame() {
|
||||
let html = "<frameset><frame src=\"\"></frameset>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let url: Url = Url::parse("http://localhost").unwrap();
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
@@ -74,10 +86,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -93,7 +110,7 @@ mod passing {
|
||||
<style>html{background-color: #000;}</style>\
|
||||
<div style=\"display: none;\"></div>\
|
||||
";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let url: Url = Url::parse("http://localhost").unwrap();
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
@@ -103,10 +120,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -129,7 +151,7 @@ mod passing {
|
||||
fn no_images() {
|
||||
let html = "<link rel=\"icon\" href=\"favicon.ico\">\
|
||||
<div><img src=\"http://localhost/assets/mono_lisa.png\" /></div>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let url: Url = Url::parse("http://localhost").unwrap();
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
@@ -139,10 +161,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -157,7 +184,7 @@ mod passing {
|
||||
</div>\
|
||||
</body>\
|
||||
</html>",
|
||||
empty_image = empty_image!()
|
||||
empty_image = EMPTY_IMAGE_DATA_URL
|
||||
)
|
||||
);
|
||||
}
|
||||
@@ -166,7 +193,7 @@ mod passing {
|
||||
fn no_body_background_images() {
|
||||
let html =
|
||||
"<body background=\"no/such/image.png\" background=\"no/such/image2.png\"></body>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let url: Url = Url::parse("http://localhost").unwrap();
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
@@ -176,10 +203,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -190,7 +222,7 @@ mod passing {
|
||||
#[test]
|
||||
fn no_frames() {
|
||||
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let url: Url = Url::parse("http://localhost").unwrap();
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
@@ -200,10 +232,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -222,7 +259,7 @@ mod passing {
|
||||
#[test]
|
||||
fn no_iframes() {
|
||||
let html = "<iframe src=\"http://trackbook.com\"></iframe>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let url: Url = Url::parse("http://localhost").unwrap();
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
@@ -232,10 +269,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -258,7 +300,7 @@ mod passing {
|
||||
<script>alert(1)</script>\
|
||||
</div>\
|
||||
";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let url: Url = Url::parse("http://localhost").unwrap();
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
@@ -268,10 +310,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -290,10 +337,10 @@ mod passing {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keeps_integrity_for_linked_assets() {
|
||||
fn keeps_integrity_for_unfamiliar_links() {
|
||||
let html = "<title>Has integrity</title>\
|
||||
<link integrity=\"sha384-12345\" rel=\"something\" href=\"https://some-site.com/some-file.ext\" />";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let url: Url = Url::parse("http://localhost").unwrap();
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
@@ -302,10 +349,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -322,13 +374,13 @@ mod passing {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn discards_integrity_for_linked_assets_nojs_nocss() {
|
||||
fn discards_integrity_for_known_links_nojs_nocss() {
|
||||
let html = "\
|
||||
<title>No integrity</title>\
|
||||
<link integrity=\"\" rel=\"stylesheet\" href=\"data:;\"/>\
|
||||
<script integrity=\"\" src=\"some.js\"></script>\
|
||||
";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let url: Url = Url::parse("http://localhost").unwrap();
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
@@ -339,10 +391,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -366,7 +423,7 @@ mod passing {
|
||||
<link integrity=\"sha384-123\" rel=\"something\" href=\"data:;\"/>\
|
||||
<script integrity=\"sha384-456\" src=\"some.js\"></script>\
|
||||
";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let url: Url = Url::parse("http://localhost").unwrap();
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
@@ -377,10 +434,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -403,14 +465,14 @@ mod passing {
|
||||
let html = "\
|
||||
<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"Refresh\" value=\"20\"/>\
|
||||
<meta http-equiv=\"Location\" value=\"https://freebsd.org\"/>\
|
||||
<meta http-equiv=\"Refresh\" content=\"2\"/>\
|
||||
<meta http-equiv=\"Location\" content=\"https://freebsd.org\"/>\
|
||||
</head>\
|
||||
<body>\
|
||||
</body>\
|
||||
</html>\
|
||||
";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let url: Url = Url::parse("http://localhost").unwrap();
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
@@ -423,18 +485,23 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
"\
|
||||
<html>\
|
||||
<head>\
|
||||
<meta http-equiv=\"disabled by monolith (Refresh)\" value=\"20\">\
|
||||
<meta http-equiv=\"disabled by monolith (Location)\" value=\"https://freebsd.org\">\
|
||||
<meta content=\"2\">\
|
||||
<meta content=\"https://freebsd.org\">\
|
||||
</head>\
|
||||
<body>\
|
||||
</body>\
|
||||
@@ -452,7 +519,7 @@ mod passing {
|
||||
</noscript>\
|
||||
</body>\
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let url: Url = Url::parse("http://localhost").unwrap();
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
@@ -462,10 +529,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -480,7 +552,7 @@ mod passing {
|
||||
</noscript>\
|
||||
</body>\
|
||||
</html>",
|
||||
empty_image!(),
|
||||
EMPTY_IMAGE_DATA_URL,
|
||||
)
|
||||
);
|
||||
}
|
||||
@@ -488,7 +560,7 @@ mod passing {
|
||||
#[test]
|
||||
fn preserves_script_type_json() {
|
||||
let html = "<script id=\"data\" type=\"application/json\">{\"mono\":\"lith\"}</script>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let dom = html::html_to_dom(&html.as_bytes().to_vec(), "".to_string());
|
||||
let url: Url = Url::parse("http://localhost").unwrap();
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
@@ -497,10 +569,15 @@ mod passing {
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
serialize(
|
||||
&mut buf,
|
||||
&SerializableHandle::from(dom.document.clone()),
|
||||
SerializeOpts::default(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::js;
|
||||
use monolith::js;
|
||||
|
||||
#[test]
|
||||
fn onblur_camelcase() {
|
||||
@@ -34,7 +34,7 @@ mod passing {
|
||||
|
||||
#[cfg(test)]
|
||||
mod failing {
|
||||
use crate::js;
|
||||
use monolith::js;
|
||||
|
||||
#[test]
|
||||
fn href() {
|
||||
@@ -1,8 +1,9 @@
|
||||
mod cli;
|
||||
mod cookies;
|
||||
mod css;
|
||||
mod html;
|
||||
mod js;
|
||||
mod macros;
|
||||
// mod macros;
|
||||
mod opts;
|
||||
mod url;
|
||||
mod utils;
|
||||
@@ -7,7 +7,7 @@
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use crate::opts::Options;
|
||||
use monolith::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn defaults() {
|
||||
@@ -16,6 +16,8 @@ mod passing {
|
||||
assert_eq!(options.no_audio, false);
|
||||
assert_eq!(options.base_url, None);
|
||||
assert_eq!(options.no_css, false);
|
||||
assert_eq!(options.cookie_file, None);
|
||||
assert_eq!(options.encoding, None);
|
||||
assert_eq!(options.no_frames, false);
|
||||
assert_eq!(options.no_fonts, false);
|
||||
assert_eq!(options.no_images, false);
|
||||
@@ -23,12 +25,12 @@ mod passing {
|
||||
assert_eq!(options.no_js, false);
|
||||
assert_eq!(options.insecure, false);
|
||||
assert_eq!(options.no_metadata, false);
|
||||
assert_eq!(options.output, str!());
|
||||
assert_eq!(options.output, "".to_string());
|
||||
assert_eq!(options.silent, false);
|
||||
assert_eq!(options.timeout, 0);
|
||||
assert_eq!(options.user_agent, None);
|
||||
assert_eq!(options.no_video, false);
|
||||
|
||||
assert_eq!(options.target, str!());
|
||||
assert_eq!(options.target, "".to_string());
|
||||
}
|
||||
}
|
||||
@@ -9,14 +9,15 @@
|
||||
mod passing {
|
||||
use reqwest::Url;
|
||||
|
||||
use crate::url;
|
||||
use monolith::url;
|
||||
|
||||
#[test]
|
||||
fn preserve_original() {
|
||||
let u: Url = Url::parse("https://somewhere.com/font.eot#iefix").unwrap();
|
||||
|
||||
url::clean_url(u.clone());
|
||||
let clean_u: Url = url::clean_url(u.clone());
|
||||
|
||||
assert_eq!(clean_u.as_str(), "https://somewhere.com/font.eot");
|
||||
assert_eq!(u.as_str(), "https://somewhere.com/font.eot#iefix");
|
||||
}
|
||||
|
||||
@@ -45,7 +46,7 @@ mod passing {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removesempty_fragment_and_keeps_empty_query() {
|
||||
fn removes_empty_fragment_and_keeps_query() {
|
||||
assert_eq!(
|
||||
url::clean_url(Url::parse("https://somewhere.com/font.eot?a=b&#").unwrap()).as_str(),
|
||||
"https://somewhere.com/font.eot?a=b&"
|
||||
109
tests/url/create_data_url.rs
Normal file
109
tests/url/create_data_url.rs
Normal file
@@ -0,0 +1,109 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use reqwest::Url;
|
||||
|
||||
use monolith::url;
|
||||
|
||||
#[test]
|
||||
fn encode_string_with_specific_media_type() {
|
||||
let media_type = "application/javascript";
|
||||
let data = "var word = 'hello';\nalert(word);\n";
|
||||
let data_url = url::create_data_url(
|
||||
media_type,
|
||||
"",
|
||||
data.as_bytes(),
|
||||
&Url::parse("data:,").unwrap(),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
data_url.as_str(),
|
||||
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_append_fragment() {
|
||||
let data = "<svg></svg>\n";
|
||||
let data_url = url::create_data_url(
|
||||
"image/svg+xml",
|
||||
"",
|
||||
data.as_bytes(),
|
||||
&Url::parse("data:,").unwrap(),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
data_url.as_str(),
|
||||
"data:image/svg+xml;base64,PHN2Zz48L3N2Zz4K"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_string_with_specific_media_type_and_charset() {
|
||||
let media_type = "application/javascript";
|
||||
let charset = "utf8";
|
||||
let data = "var word = 'hello';\nalert(word);\n";
|
||||
let data_url = url::create_data_url(
|
||||
media_type,
|
||||
charset,
|
||||
data.as_bytes(),
|
||||
&Url::parse("data:,").unwrap(),
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
data_url.as_str(),
|
||||
"data:application/javascript;charset=utf8;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_data_url_with_us_ascii_charset() {
|
||||
let media_type = "";
|
||||
let charset = "us-ascii";
|
||||
let data = "";
|
||||
let data_url = url::create_data_url(
|
||||
media_type,
|
||||
charset,
|
||||
data.as_bytes(),
|
||||
&Url::parse("data:,").unwrap(),
|
||||
);
|
||||
|
||||
assert_eq!(data_url.as_str(), "data:;base64,");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_data_url_with_utf8_charset() {
|
||||
let media_type = "";
|
||||
let charset = "utf8";
|
||||
let data = "";
|
||||
let data_url = url::create_data_url(
|
||||
media_type,
|
||||
charset,
|
||||
data.as_bytes(),
|
||||
&Url::parse("data:,").unwrap(),
|
||||
);
|
||||
|
||||
assert_eq!(data_url.as_str(), "data:;charset=utf8;base64,");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn create_data_url_with_media_type_text_plain_and_utf8_charset() {
|
||||
let media_type = "text/plain";
|
||||
let charset = "utf8";
|
||||
let data = "";
|
||||
let data_url = url::create_data_url(
|
||||
media_type,
|
||||
charset,
|
||||
data.as_bytes(),
|
||||
&Url::parse("data:,").unwrap(),
|
||||
);
|
||||
|
||||
assert_eq!(data_url.as_str(), "data:text/plain;charset=utf8;base64,");
|
||||
}
|
||||
}
|
||||
91
tests/url/get_referer_url.rs
Normal file
91
tests/url/get_referer_url.rs
Normal file
@@ -0,0 +1,91 @@
|
||||
// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗
|
||||
// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝
|
||||
// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗
|
||||
// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║
|
||||
// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝
|
||||
// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝
|
||||
|
||||
#[cfg(test)]
|
||||
mod passing {
|
||||
use reqwest::Url;
|
||||
|
||||
use monolith::url;
|
||||
|
||||
#[test]
|
||||
fn preserve_original() {
|
||||
let original_url: Url = Url::parse("https://somewhere.com/font.eot#iefix").unwrap();
|
||||
let referer_url: Url = url::get_referer_url(original_url.clone());
|
||||
assert_eq!(referer_url.as_str(), "https://somewhere.com/font.eot");
|
||||
assert_eq!(
|
||||
original_url.as_str(),
|
||||
"https://somewhere.com/font.eot#iefix"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_fragment() {
|
||||
assert_eq!(
|
||||
url::get_referer_url(Url::parse("https://somewhere.com/font.eot#iefix").unwrap())
|
||||
.as_str(),
|
||||
"https://somewhere.com/font.eot"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_empty_fragment() {
|
||||
assert_eq!(
|
||||
url::get_referer_url(Url::parse("https://somewhere.com/font.eot#").unwrap()).as_str(),
|
||||
"https://somewhere.com/font.eot"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_empty_fragment_and_keeps_empty_query() {
|
||||
assert_eq!(
|
||||
url::get_referer_url(Url::parse("https://somewhere.com/font.eot?#").unwrap()).as_str(),
|
||||
"https://somewhere.com/font.eot?"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_empty_fragment_and_keeps_query() {
|
||||
assert_eq!(
|
||||
url::get_referer_url(Url::parse("https://somewhere.com/font.eot?a=b&#").unwrap())
|
||||
.as_str(),
|
||||
"https://somewhere.com/font.eot?a=b&"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_credentials() {
|
||||
assert_eq!(
|
||||
url::get_referer_url(Url::parse("https://cookie:monster@gibson.lan/path").unwrap())
|
||||
.as_str(),
|
||||
"https://gibson.lan/path"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_empty_credentials() {
|
||||
assert_eq!(
|
||||
url::get_referer_url(Url::parse("https://@gibson.lan/path").unwrap()).as_str(),
|
||||
"https://gibson.lan/path"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_empty_username_credentials() {
|
||||
assert_eq!(
|
||||
url::get_referer_url(Url::parse("https://:monster@gibson.lan/path").unwrap()).as_str(),
|
||||
"https://gibson.lan/path"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn removes_empty_password_credentials() {
|
||||
assert_eq!(
|
||||
url::get_referer_url(Url::parse("https://cookie@gibson.lan/path").unwrap()).as_str(),
|
||||
"https://gibson.lan/path"
|
||||
);
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user