Delete stats improvements (#2318)

* Move clear stats functions to Plausible.Purge

* Delete both native and imported stats when deleting a site

This commit moves the delete site function to the Plausible.Purge
module, and fixes a bug where deleted sites could leave dangling
imported stats.

* Clear sites.stats_start_date after clearing stats

This commit fixes a bug where resetting stats left an invalid state of
the stats_start_date field, used for GA imports, for example.
This commit is contained in:
Vinicius Brasil 2022-10-10 08:55:58 -03:00 committed by GitHub
parent aadf892a71
commit 0733efa89e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 152 additions and 37 deletions

View File

@ -42,7 +42,8 @@ defmodule ObanErrorReporter do
args: %{"site_id" => site_id},
state: "executing"
}) do
Plausible.ClickhouseRepo.clear_imported_stats_for(site_id)
site = Plausible.Repo.get(Plausible.Site, site_id)
Plausible.Purge.delete_imported_stats!(site)
end
defp on_job_exception(_job), do: :ignore

View File

@ -10,29 +10,4 @@ defmodule Plausible.ClickhouseRepo do
import Ecto.Query, only: [from: 1, from: 2]
end
end
def clear_stats_for(domain) do
events_sql = "ALTER TABLE events DELETE WHERE domain = ?"
sessions_sql = "ALTER TABLE sessions DELETE WHERE domain = ?"
Ecto.Adapters.SQL.query!(__MODULE__, events_sql, [domain])
Ecto.Adapters.SQL.query!(__MODULE__, sessions_sql, [domain])
end
def clear_imported_stats_for(site_id) do
[
"imported_visitors",
"imported_sources",
"imported_pages",
"imported_entry_pages",
"imported_exit_pages",
"imported_locations",
"imported_devices",
"imported_browsers",
"imported_operating_systems"
]
|> Enum.map(fn table ->
sql = "ALTER TABLE #{table} DELETE WHERE site_id = ?"
Ecto.Adapters.SQL.query!(__MODULE__, sql, [site_id])
end)
end
end

View File

@ -3,8 +3,16 @@ defmodule Plausible.Imported do
use Timex
require Logger
@tables ~w(
imported_visitors imported_sources imported_pages imported_entry_pages
imported_exit_pages imported_locations imported_devices imported_browsers
imported_operating_systems
)
@spec tables() :: [String.t()]
def tables, do: @tables
def forget(site) do
Plausible.ClickhouseRepo.clear_imported_stats_for(site.id)
Plausible.Purge.delete_imported_stats!(site)
end
def from_google_analytics(nil, _site_id, _metric), do: nil

57
lib/plausible/purge.ex Normal file
View File

@ -0,0 +1,57 @@
defmodule Plausible.Purge do
@moduledoc """
Deletes data from a site.
Stats are stored on Clickhouse, and unlike other databases data deletion is
done asynchronously.
- [Clickhouse `ALTER TABLE ... DELETE` Statement`](https://clickhouse.com/docs/en/sql-reference/statements/alter/delete)
- [Synchronicity of `ALTER` Queries](https://clickhouse.com/docs/en/sql-reference/statements/alter/#synchronicity-of-alter-queries)
"""
@doc """
Deletes a site and all associated stats.
"""
@spec delete_site!(Plausible.Site.t()) :: :ok
def delete_site!(site) do
delete_native_stats!(site)
delete_imported_stats!(site)
Plausible.Repo.delete!(site)
:ok
end
@spec delete_imported_stats!(Plausible.Site.t()) :: :ok
@doc """
Deletes imported stats from Google Analytics.
"""
def delete_imported_stats!(site) do
Enum.each(Plausible.Imported.tables(), fn table ->
sql = "ALTER TABLE #{table} DELETE WHERE site_id = ?"
Ecto.Adapters.SQL.query!(Plausible.ClickhouseRepo, sql, [site.id])
end)
:ok
end
@spec delete_native_stats!(Plausible.Site.t()) :: :ok
@doc """
Deletes native stats for a site, and clears the `stats_start_date` field.
"""
def delete_native_stats!(site) do
events_sql = "ALTER TABLE events DELETE WHERE domain = ?"
sessions_sql = "ALTER TABLE sessions DELETE WHERE domain = ?"
Ecto.Adapters.SQL.query!(Plausible.ClickhouseRepo, events_sql, [site.domain])
Ecto.Adapters.SQL.query!(Plausible.ClickhouseRepo, sessions_sql, [site.domain])
clear_stats_start_date!(site)
:ok
end
defp clear_stats_start_date!(site) do
site
|> Ecto.Changeset.change(stats_start_date: nil)
|> Plausible.Repo.update!()
end
end

View File

@ -162,9 +162,4 @@ defmodule Plausible.Sites do
where: sm.role == :owner
)
end
def delete!(site) do
Repo.delete!(site)
Plausible.ClickhouseRepo.clear_stats_for(site.domain)
end
end

View File

@ -42,7 +42,7 @@ defmodule PlausibleWeb.Api.ExternalSitesController do
site = Sites.get_for_user(conn.assigns[:current_user].id, site_id, [:owner])
if site do
Sites.delete!(site)
Plausible.Purge.delete_site!(site)
json(conn, %{"deleted" => true})
else
H.not_found(conn, "Site could not be found")

View File

@ -531,7 +531,7 @@ defmodule PlausibleWeb.AuthController do
Repo.delete!(membership)
if membership.role == :owner do
Plausible.Sites.delete!(membership.site)
Plausible.Purge.delete_site!(membership.site)
end
end

View File

@ -337,7 +337,7 @@ defmodule PlausibleWeb.SiteController do
def reset_stats(conn, _params) do
site = conn.assigns[:site]
Plausible.ClickhouseRepo.clear_stats_for(site.domain)
Plausible.Purge.delete_native_stats!(site)
conn
|> put_flash(:success, "#{site.domain} stats will be reset in a few minutes")
@ -347,7 +347,7 @@ defmodule PlausibleWeb.SiteController do
def delete_site(conn, _params) do
site = conn.assigns[:site]
Plausible.Sites.delete!(site)
Plausible.Purge.delete_site!(site)
conn
|> put_flash(:success, "Site deleted successfully along with all pageviews")

View File

@ -57,7 +57,7 @@ defmodule Plausible.Workers.ImportGoogleAnalytics do
site = Repo.preload(site, memberships: :user)
Plausible.Site.import_failure(site) |> Repo.update!()
Plausible.ClickhouseRepo.clear_imported_stats_for(site.id)
Plausible.Purge.delete_imported_stats!(site)
Enum.each(site.memberships, fn membership ->
if membership.role in [:owner, :admin] do

View File

@ -0,0 +1,79 @@
defmodule Plausible.PurgeTest do
use Plausible.DataCase
import Plausible.TestUtils
setup do
site = insert(:site, stats_start_date: ~D[2020-01-01])
populate_stats(site, [
build(:pageview, domain: site.domain),
build(:imported_visitors, site_id: site.id),
build(:imported_sources, site_id: site.id),
build(:imported_pages, site_id: site.id),
build(:imported_entry_pages, site_id: site.id),
build(:imported_exit_pages, site_id: site.id),
build(:imported_locations, site_id: site.id),
build(:imported_devices, site_id: site.id),
build(:imported_browsers, site_id: site.id),
build(:imported_operating_systems, site_id: site.id)
])
{:ok, %{site: site}}
end
defp assert_count(query, expected) do
assert eventually(
fn ->
count = Plausible.ClickhouseRepo.aggregate(query, :count)
{count == expected, count}
end,
200,
10
)
end
test "delete_imported_stats!/1 deletes imported data", %{site: site} do
assert :ok == Plausible.Purge.delete_imported_stats!(site)
Enum.each(Plausible.Imported.tables(), fn table ->
query = from(imported in table, where: imported.site_id == ^site.id)
assert_count(query, 0)
end)
end
test "delete_imported_stats!/1 does not reset stats_start_date", %{site: site} do
assert :ok == Plausible.Purge.delete_imported_stats!(site)
assert %Date{} = site.stats_start_date
end
test "delete_native_stats!/1 deletes native stats", %{site: site} do
assert :ok == Plausible.Purge.delete_native_stats!(site)
events_query = from(s in Plausible.ClickhouseSession, where: s.domain == ^site.domain)
assert_count(events_query, 0)
sessions_query = from(s in Plausible.ClickhouseSession, where: s.domain == ^site.domain)
assert_count(sessions_query, 0)
end
test "delete_native_stats!/1 resets stats_start_date", %{site: site} do
assert :ok == Plausible.Purge.delete_native_stats!(site)
assert %Plausible.Site{stats_start_date: nil} = Plausible.Repo.reload(site)
end
test "delete_site!/1 deletes the site and all stats", %{site: site} do
assert :ok == Plausible.Purge.delete_site!(site)
assert nil == Plausible.Repo.reload(site)
events_query = from(s in Plausible.ClickhouseSession, where: s.domain == ^site.domain)
assert_count(events_query, 0)
sessions_query = from(s in Plausible.ClickhouseSession, where: s.domain == ^site.domain)
assert_count(sessions_query, 0)
Enum.each(Plausible.Imported.tables(), fn table ->
query = from(imported in table, where: imported.site_id == ^site.id)
assert_count(query, 0)
end)
end
end