ScriptV2: Self hosted tracker script cache (#5502)

* Allow caching tracker script on CE

Open questions with this approach:
- `ingestion_url`: Using `PlausibleWeb.Endpoint.url()` requires that endpoint has started, but we
  want to pre-warm the cache _before_ the endpoint starts. To work around this, a different approach
  is used to get the right url.
- caching: Other caches currently cache database models, this caches a string. Will this cause issues?

* Slightly better workaround

* Lazier timers
This commit is contained in:
Karl-Aksel Puulmann 2025-06-20 09:59:18 +03:00 committed by GitHub
parent 4387d42409
commit 6ade93bf86
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 176 additions and 12 deletions

View File

@ -357,7 +357,8 @@ config :plausible, PlausibleWeb.Endpoint,
http: [port: http_port, ip: listen_ip] ++ default_http_opts,
secret_key_base: secret_key_base,
websocket_url: websocket_url,
secure_cookie: secure_cookie
secure_cookie: secure_cookie,
base_url: base_url
# maybe enable HTTPS in CE
if config_env() in [:ce, :ce_dev, :ce_test] do

View File

@ -128,6 +128,22 @@ defmodule Plausible.Application do
]
)
end,
on_ce do
warmed_cache(PlausibleWeb.TrackerScriptCache,
adapter_opts: [
n_lock_partitions: 1,
ttl_check_interval: false,
ets_options: [:bag, read_concurrency: true]
],
warmers: [
refresh_all:
{PlausibleWeb.TrackerScriptCache.All,
interval: :timer.minutes(180) + Enum.random(1..:timer.seconds(10))},
refresh_updated_recently:
{PlausibleWeb.TrackerScriptCache.RecentlyUpdated, interval: :timer.seconds(120)}
]
)
end,
Plausible.Ingestion.Counters,
Plausible.Session.Salts,
Supervisor.child_spec(Plausible.Event.WriteBuffer, id: Plausible.Event.WriteBuffer),

View File

@ -107,7 +107,7 @@ defmodule Plausible.Cache do
@spec refresh_updated_recently(Keyword.t()) :: :ok
def refresh_updated_recently(opts \\ []) do
recently_updated_query =
from [s, _rg] in base_db_query(),
from [s, ...] in base_db_query(),
order_by: [asc: s.updated_at],
where: s.updated_at > ago(^15, "minute")

View File

@ -4,8 +4,8 @@ defmodule PlausibleWeb.TrackerPlug do
"""
import Plug.Conn
import Ecto.Query
use Agent
use Plausible
base_variants = [
"hash",
@ -62,14 +62,9 @@ defmodule PlausibleWeb.TrackerPlug do
def telemetry_event(name), do: [:plausible, :tracker_script, :request, name]
defp request_tracker_script(tag, conn) do
tracker_script_configuration =
Plausible.Repo.one(
from s in Plausible.Site.TrackerScriptConfiguration, where: s.id == ^tag, preload: [:site]
)
if tracker_script_configuration do
script_tag = PlausibleWeb.Tracker.plausible_main_script_tag(tracker_script_configuration)
script_tag = get_plausible_web_script_tag(tag)
if script_tag do
:telemetry.execute(
telemetry_event(:v2),
%{},
@ -84,7 +79,7 @@ defmodule PlausibleWeb.TrackerPlug do
|> put_resp_header("cache-control", "public, max-age=60, no-transform")
# CDN-Tag is used by BunnyCDN to tag cached resources. This allows us to purge
# specific tracker scripts from the CDN cache.
|> put_resp_header("cdn-tag", "tracker_script::#{tracker_script_configuration.id}")
|> put_resp_header("cdn-tag", "tracker_script::#{tag}")
|> send_resp(200, script_tag)
|> halt()
else
@ -100,6 +95,16 @@ defmodule PlausibleWeb.TrackerPlug do
end
end
defp get_plausible_web_script_tag(tag) do
on_ee do
# On cloud, we generate the script always on the fly relying on CDN caching
PlausibleWeb.TrackerScriptCache.get_from_source(tag)
else
# On self-hosted, we have a pre-warmed cache for the script
PlausibleWeb.TrackerScriptCache.get(tag)
end
end
defp legacy_request_file(filename, files_available, conn) do
if filename && MapSet.member?(files_available, filename) do
location = Application.app_dir(:plausible, "priv/tracker/js/" <> filename)

View File

@ -35,7 +35,7 @@ defmodule PlausibleWeb.Tracker do
def plausible_main_config(tracker_script_configuration) do
%{
domain: tracker_script_configuration.site.domain,
endpoint: "#{PlausibleWeb.Endpoint.url()}/api/event",
endpoint: tracker_ingestion_endpoint(),
hashBasedRouting: tracker_script_configuration.hash_based_routing,
outboundLinks: tracker_script_configuration.outbound_links,
fileDownloads: tracker_script_configuration.file_downloads,
@ -119,4 +119,14 @@ defmodule PlausibleWeb.Tracker do
defp changeset(tracker_script_configuration, config_update, :plugins_api) do
TrackerScriptConfiguration.plugins_api_changeset(tracker_script_configuration, config_update)
end
defp tracker_ingestion_endpoint() do
# :TRICKY: Normally we would use PlausibleWeb.Endpoint.url() here, but
# that requires the endpoint to be started. We start the TrackerScriptCache
# before the endpoint is started, so we need to use the base_url directly.
endpoint_config = Application.fetch_env!(:plausible, PlausibleWeb.Endpoint)
base_url = Keyword.get(endpoint_config, :base_url)
"#{base_url}/api/event"
end
end

View File

@ -0,0 +1,58 @@
defmodule PlausibleWeb.TrackerScriptCache do
@moduledoc """
Cache for tracker script(s) for self-hosted Plausible instances.
"""
alias Plausible.Site.TrackerScriptConfiguration
import Ecto.Query
use Plausible.Cache
@cache_name :tracker_script_cache
@impl true
def name(), do: @cache_name
@impl true
def child_id(), do: :cache_tracker_script
@impl true
def count_all() do
Plausible.Repo.aggregate(TrackerScriptConfiguration, :count)
end
@impl true
def base_db_query() do
from(
t in TrackerScriptConfiguration,
join: s in assoc(t, :site),
preload: [site: s]
)
end
@impl true
def get_from_source(id) do
query =
base_db_query()
|> where([t], t.id == ^id)
case Plausible.Repo.one(query) do
%TrackerScriptConfiguration{} = tracker_script_configuration ->
PlausibleWeb.Tracker.plausible_main_script_tag(tracker_script_configuration)
_ ->
nil
end
end
@impl true
def unwrap_cache_keys(items) do
Enum.reduce(items, [], fn
tracker_script_configuration, acc ->
[
{tracker_script_configuration.id,
PlausibleWeb.Tracker.plausible_main_script_tag(tracker_script_configuration)}
| acc
]
end)
end
end

View File

@ -0,0 +1,74 @@
defmodule PlausibleWeb.TrackerScriptCacheTest do
use Plausible.DataCase, async: true
use Plausible.Teams.Test
alias Plausible.Site.TrackerScriptConfiguration
alias PlausibleWeb.TrackerScriptCache
describe "public cache interface" do
test "cache caches tracker script configurations", %{test: test} do
{:ok, _} =
Supervisor.start_link(
[{TrackerScriptCache, [cache_name: test, child_id: :test_cache_tracker_script]}],
strategy: :one_for_one,
name: :"cache_supervisor_#{test}"
)
site = new_site(domain: "site1.example.com")
config = create_config(site)
:ok = TrackerScriptCache.refresh_all(cache_name: test)
{:ok, _} = Plausible.Repo.delete(config)
assert TrackerScriptCache.size(test) == 1
assert script_tag = TrackerScriptCache.get(config.id, force?: true, cache_name: test)
assert is_binary(script_tag)
refute TrackerScriptCache.get("nonexistent", cache_name: test, force?: true)
end
test "refreshes only recently added configurations", %{test: test} do
{:ok, _} = start_test_cache(test)
site1 = new_site()
site2 = new_site()
past_date = ~N[2021-01-01 00:00:00]
old_config = create_config(site1, inserted_at: past_date, updated_at: past_date)
new_config = create_config(site2)
cache_opts = [cache_name: test, force?: true]
assert TrackerScriptCache.get(old_config.id, cache_opts) == nil
assert TrackerScriptCache.get(new_config.id, cache_opts) == nil
assert :ok = TrackerScriptCache.refresh_updated_recently(cache_opts)
refute TrackerScriptCache.get(old_config.id, cache_opts)
assert TrackerScriptCache.get(new_config.id, cache_opts)
end
end
defp start_test_cache(cache_name) do
%{start: {m, f, a}} = TrackerScriptCache.child_spec(cache_name: cache_name)
apply(m, f, a)
end
defp create_config(site, opts \\ []) do
config = %TrackerScriptConfiguration{
site_id: site.id,
installation_type: :manual,
hash_based_routing: true,
outbound_links: true,
file_downloads: true,
form_submissions: true
}
config
|> Ecto.Changeset.change(opts)
|> Repo.insert!()
|> Repo.preload(:site)
end
end