Stop Cache.Stats + Revert "Temporarily disable ingest metrics (#5369)" (#5370)

* Revert "Temporarily disable ingest metrics (#5369)"

This reverts commit b96e96a7f6.

* Add :tools to MIX_ENV=dev

* Stop tracking caches hit ratio in favour of raw counters
This commit is contained in:
hq1 2025-04-30 10:11:51 +02:00 committed by GitHub
parent b96e96a7f6
commit ffae16f7b9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 32 additions and 183 deletions

View File

@ -18,7 +18,6 @@ defmodule Plausible.Application do
{PartitionSupervisor,
child_spec: Task.Supervisor, name: Plausible.UserAgentParseTaskSupervisor},
Plausible.Session.BalancerSupervisor,
Plausible.Cache.Stats,
Plausible.PromEx,
{Plausible.Auth.TOTP.Vault, key: totp_vault_key()},
Plausible.Repo,

View File

@ -170,7 +170,6 @@ defmodule Plausible.Cache do
end
defdelegate size(cache_name \\ name()), to: Plausible.Cache.Adapter
defdelegate hit_rate(cache_name \\ name()), to: Plausible.Cache.Stats
@spec telemetry_event_refresh(atom(), atom()) :: list(atom())
def telemetry_event_refresh(cache_name \\ name(), mode) when mode in @modes do

View File

@ -1,92 +0,0 @@
defmodule Plausible.Cache.Stats do
@moduledoc """
Keeps track of hit/miss ratio for various caches.
"""
use GenServer
@hit :hit
@miss :miss
@telemetry_hit ConCache.Operations.telemetry_hit()
@telemetry_miss ConCache.Operations.telemetry_miss()
@telemetry_events [@telemetry_hit, @telemetry_miss]
def start_link(_opts) do
GenServer.start_link(__MODULE__, nil)
end
def init(nil) do
__MODULE__ =
:ets.new(__MODULE__, [
:public,
:named_table,
:set,
read_concurrency: true,
write_concurrency: true
])
:telemetry.attach_many(
"plausible-cache-stats",
@telemetry_events,
&__MODULE__.handle_telemetry_event/4,
nil
)
{:ok, nil}
end
def handle_telemetry_event(@telemetry_hit, _measurments, %{cache: %{name: cache_name}}, _) do
bump(cache_name, @hit)
end
def handle_telemetry_event(@telemetry_miss, _measurments, %{cache: %{name: cache_name}}, _) do
bump(cache_name, @miss)
end
def gather(cache_name) do
{:ok,
%{
hit_rate: hit_rate(cache_name),
count: size(cache_name) || 0
}}
end
defdelegate size(cache_name), to: Plausible.Cache.Adapter
def bump(cache_name, type) do
:ets.update_counter(
__MODULE__,
{cache_name, type},
1,
{{cache_name, type}, 0}
)
end
def hit_rate(cache_name) do
cache_name
|> Plausible.Cache.Adapter.get_names()
|> Enum.reduce(
%{hit: 0, miss: 0, hit_miss: 0.0},
fn name, acc ->
hit =
acc.hit + :ets.lookup_element(__MODULE__, {name, @hit}, 2, 0)
miss =
acc.miss + :ets.lookup_element(__MODULE__, {name, @miss}, 2, 0)
hit_miss = hit + miss
hit_miss = if(hit_miss == 0, do: 0.0, else: hit / hit_miss * 100)
acc
|> Map.put(:hit, hit)
|> Map.put(:miss, miss)
|> Map.put(
:hit_miss,
hit_miss
)
end
)
|> Map.fetch!(:hit_miss)
end
end

View File

@ -103,13 +103,26 @@ defmodule Plausible.Ingestion.Event do
end
@spec emit_telemetry_buffered(t()) :: :ok
def emit_telemetry_buffered(_event) do
:ok
def emit_telemetry_buffered(event) do
:telemetry.execute(telemetry_event_buffered(), %{}, %{
domain: event.domain,
request_timestamp: event.request.timestamp,
tracker_script_version: event.request.tracker_script_version
})
end
@spec emit_telemetry_dropped(t(), drop_reason()) :: :ok
def emit_telemetry_dropped(_event, _reason) do
:ok
def emit_telemetry_dropped(event, reason) do
:telemetry.execute(
telemetry_event_dropped(),
%{},
%{
domain: event.domain,
reason: reason,
request_timestamp: event.request.timestamp,
tracker_script_version: event.request.tracker_script_version
}
)
end
defp pipeline() do

View File

@ -79,6 +79,16 @@ defmodule Plausible.PromEx.Plugins.PlausibleMetrics do
metric_prefix ++ [:ingest, :user_agent_parse, :timeout, :total],
event_name: Ingestion.Event.telemetry_ua_parse_timeout()
),
counter(
metric_prefix ++ [:plausible_cache, :hit],
event_name: ConCache.Operations.telemetry_hit(),
tags: [:name]
),
counter(
metric_prefix ++ [:plausible_cache, :miss],
event_name: ConCache.Operations.telemetry_miss(),
tags: [:name]
),
distribution(
metric_prefix ++ [:sessions, :transfer, :duration],
event_name: Plausible.Session.Transfer.telemetry_event(),
@ -133,22 +143,16 @@ defmodule Plausible.PromEx.Plugins.PlausibleMetrics do
Fire telemetry events for various caches
"""
def execute_cache_metrics do
{:ok, user_agents_stats} = Plausible.Cache.Stats.gather(:user_agents)
{:ok, sessions_stats} = Plausible.Cache.Stats.gather(:sessions)
:telemetry.execute([:prom_ex, :plugin, :cache, :user_agents], %{
count: user_agents_stats.count,
hit_rate: user_agents_stats.hit_rate
count: Plausible.Cache.Adapter.size(:user_agents)
})
:telemetry.execute([:prom_ex, :plugin, :cache, :sessions], %{
count: sessions_stats.count,
hit_rate: sessions_stats.hit_rate
count: Plausible.Cache.Adapter.size(:sessions)
})
:telemetry.execute([:prom_ex, :plugin, :cache, :sites], %{
count: Site.Cache.size(),
hit_rate: Site.Cache.hit_rate()
count: Site.Cache.size()
})
end
@ -201,25 +205,10 @@ defmodule Plausible.PromEx.Plugins.PlausibleMetrics do
event_name: [:prom_ex, :plugin, :cache, :user_agents],
measurement: :count
),
last_value(
metric_prefix ++ [:cache, :user_agents, :hit_ratio],
event_name: [:prom_ex, :plugin, :cache, :user_agents],
measurement: :hit_rate
),
last_value(
metric_prefix ++ [:cache, :sessions, :hit_ratio],
event_name: [:prom_ex, :plugin, :cache, :sessions],
measurement: :hit_rate
),
last_value(
metric_prefix ++ [:cache, :sites, :size],
event_name: [:prom_ex, :plugin, :cache, :sites],
measurement: :count
),
last_value(
metric_prefix ++ [:cache, :sites, :hit_ratio],
event_name: [:prom_ex, :plugin, :cache, :sites],
measurement: :hit_rate
)
]
)

View File

@ -44,7 +44,7 @@ defmodule Plausible.MixProject do
:runtime_tools,
:tls_certificate_check,
:opentelemetry_exporter
] ++ if(Mix.env() == :dev, do: [:observer, :wx], else: [])
] ++ if(Mix.env() == :dev, do: [:tools, :observer, :wx], else: [])
]
end

View File

@ -34,7 +34,7 @@ defmodule Plausible.Cache.AdapterTest do
assert ConCache.size(:"#{name}_#{i}") < iterations
end
assert {:ok, %{hit_rate: 100.0, count: ^iterations}} = Plausible.Cache.Stats.gather(name)
assert ^iterations = Plausible.Cache.Adapter.size(name)
end
end
end

View File

@ -58,47 +58,6 @@ defmodule Plausible.CacheTest do
end
end
describe "stats tracking" do
test "get affects hit rate", %{test: test} do
{:ok, _} = start_test_cache(test)
:ok = ExampleCache.merge_items([{"item1", :item1}], cache_name: test)
assert ExampleCache.get("item1", cache_name: test, force?: true)
assert {:ok, %{hit_rate: 100.0}} = Plausible.Cache.Stats.gather(test)
refute ExampleCache.get("item2", cache_name: test, force?: true)
assert {:ok, %{hit_rate: 50.0}} = Plausible.Cache.Stats.gather(test)
end
test "get_or_store affects hit rate", %{test: test} do
{:ok, _} = start_test_cache(test)
:ok = ExampleCache.merge_items([{"item1", :item1}], cache_name: test)
assert ExampleCache.get("item1", cache_name: test, force?: true)
# first read of item2 is evaluated from a function and stored
assert "value" ==
ExampleCache.get_or_store("item2", fn -> "value" end,
cache_name: test,
force?: true
)
# subsequent gets are read from cache and the function is disregarded
assert "value" ==
ExampleCache.get_or_store("item2", fn -> "disregard" end,
cache_name: test,
force?: true
)
assert "value" ==
ExampleCache.get_or_store("item2", fn -> "disregard" end,
cache_name: test,
force?: true
)
# 3 hits, 1 miss
assert {:ok, %{hit_rate: 75.0}} = Plausible.Cache.Stats.gather(test)
end
end
describe "merging cache items" do
test "merging adds new items", %{test: test} do
{:ok, _} = start_test_cache(test)

View File

@ -26,7 +26,6 @@ defmodule Plausible.Ingestion.Counters.TelemetryHandlerTest do
end)
end
@tag :skip
test "handles ingest events by aggregating the counts", %{test: test} do
on_exit(:detach, fn ->
:telemetry.detach("ingest-counters-#{test}")

View File

@ -13,7 +13,6 @@ defmodule Plausible.Ingestion.CountersTest do
@ts ~N[2023-02-14 01:00:03]
describe "integration" do
@tag :skip
test "periodically flushes buffer aggregates to the database", %{test: test} do
on_exit(:detach, fn ->
:telemetry.detach("ingest-counters-#{test}")
@ -47,7 +46,6 @@ defmodule Plausible.Ingestion.CountersTest do
verify_record_written(buffered.domain, "buffered", 1, site_id)
end
@tag :skip
test "the database eventually sums the records within 1-minute buckets", %{test: test} do
# Testing if the database works is an unfunny way of integration testing,
# but on the upside it's quite straight-forward way of testing if the
@ -83,7 +81,6 @@ defmodule Plausible.Ingestion.CountersTest do
verify_record_written(event1.domain, "dropped_not_found", 3)
end
@tag :skip
test "dumps the buffer on shutdown", %{test: test} do
on_exit(:detach, fn ->
:telemetry.detach("ingest-counters-#{test}")

View File

@ -7,7 +7,6 @@ defmodule Plausible.Ingestion.EventTelemetryTest do
use Plausible.DataCase, async: false
@tag :skip
test "telemetry is emitted for all events", %{test: test} do
test_pid = self()

View File

@ -146,19 +146,6 @@ defmodule Plausible.Site.CacheTest do
assert Cache.get("new.example.com", force?: true, cache_name: test)
end
test "cache exposes hit rate", %{test: test} do
{:ok, _} = start_test_cache(test)
new_site(domain: "site1.example.com")
:ok = Cache.refresh_all(cache_name: test)
assert Cache.hit_rate(test) == 0
assert Cache.get("site1.example.com", force?: true, cache_name: test)
assert Cache.hit_rate(test) == 100
refute Cache.get("nonexisting.example.com", force?: true, cache_name: test)
assert Cache.hit_rate(test) == 50
end
test "only recently updated sites can be refreshed", %{test: test} do
{:ok, _} = start_test_cache(test)