Add custom props to full export (#5666)

* Add custom props to full export

* Pass full `site` struct to `export_queries`

* Export only internal props if plan lacks custom props

* Add changelog entry

* Add spot check test for custom props

* Do not generate cartesian product of prop/value pairs 🤦
This commit is contained in:
Adrian Gruntkowski 2025-08-28 15:13:01 +02:00 committed by GitHub
parent 4548e3acc5
commit 70c9a55bf8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 94 additions and 42 deletions

View File

@ -8,6 +8,7 @@ All notable changes to this project will be documented in this file.
- Custom events can now be marked as non-interactive in events API and tracker script. Events marked as non-interactive are not counted towards bounce rate. - Custom events can now be marked as non-interactive in events API and tracker script. Events marked as non-interactive are not counted towards bounce rate.
- Ability to leave team via Team Settings > Leave Team - Ability to leave team via Team Settings > Leave Team
- Stats APIv2 now supports `include.trim_relative_date_range`. This option allows trimming empty values after current time for `day`, `month` and `year` date_range values. - Stats APIv2 now supports `include.trim_relative_date_range`. This option allows trimming empty values after current time for `day`, `month` and `year` date_range values.
- Properties are now included in full site exports done via Site Settings > Imports & Exports
### Removed ### Removed

View File

@ -208,13 +208,13 @@ defmodule Plausible.Exports do
Builds Ecto queries to export data from `events_v2` and `sessions_v2` Builds Ecto queries to export data from `events_v2` and `sessions_v2`
tables into the format of `imported_*` tables for a website. tables into the format of `imported_*` tables for a website.
""" """
@spec export_queries(pos_integer, @spec export_queries(Plausible.Site.t(),
extname: String.t(), extname: String.t(),
date_range: Date.Range.t(), date_range: Date.Range.t(),
timezone: String.t() timezone: String.t()
) :: ) ::
%{String.t() => Ecto.Query.t()} %{String.t() => Ecto.Query.t()}
def export_queries(site_id, opts \\ []) do def export_queries(site, opts \\ []) do
extname = opts[:extname] || ".csv" extname = opts[:extname] || ".csv"
date_range = opts[:date_range] date_range = opts[:date_range]
timezone = opts[:timezone] || "UTC" timezone = opts[:timezone] || "UTC"
@ -231,18 +231,18 @@ defmodule Plausible.Exports do
filename = fn name -> name <> suffix end filename = fn name -> name <> suffix end
%{ %{
filename.("imported_visitors") => export_visitors_q(site_id, timezone, date_range), filename.("imported_visitors") => export_visitors_q(site, timezone, date_range),
filename.("imported_sources") => export_sources_q(site_id, timezone, date_range), filename.("imported_sources") => export_sources_q(site, timezone, date_range),
filename.("imported_pages") => export_pages_q(site_id, timezone, date_range), filename.("imported_pages") => export_pages_q(site, timezone, date_range),
filename.("imported_entry_pages") => export_entry_pages_q(site_id, timezone, date_range), filename.("imported_entry_pages") => export_entry_pages_q(site, timezone, date_range),
filename.("imported_exit_pages") => export_exit_pages_q(site_id, timezone, date_range), filename.("imported_exit_pages") => export_exit_pages_q(site, timezone, date_range),
filename.("imported_custom_events") => filename.("imported_custom_events") => export_custom_events_q(site, timezone, date_range),
export_custom_events_q(site_id, timezone, date_range), filename.("imported_locations") => export_locations_q(site, timezone, date_range),
filename.("imported_locations") => export_locations_q(site_id, timezone, date_range), filename.("imported_devices") => export_devices_q(site, timezone, date_range),
filename.("imported_devices") => export_devices_q(site_id, timezone, date_range), filename.("imported_browsers") => export_browsers_q(site, timezone, date_range),
filename.("imported_browsers") => export_browsers_q(site_id, timezone, date_range),
filename.("imported_operating_systems") => filename.("imported_operating_systems") =>
export_operating_systems_q(site_id, timezone, date_range) export_operating_systems_q(site, timezone, date_range),
filename.("imported_custom_props") => export_custom_props_q(site, timezone, date_range)
} }
end end
@ -336,10 +336,10 @@ defmodule Plausible.Exports do
end end
end end
defp export_visitors_q(site_id, timezone, date_range) do defp export_visitors_q(site, timezone, date_range) do
visitors_sessions_q = visitors_sessions_q =
from s in sampled("sessions_v2"), from s in sampled("sessions_v2"),
where: ^export_filter(site_id, date_range), where: ^export_filter(site.id, date_range),
group_by: selected_as(:date), group_by: selected_as(:date),
select: %{ select: %{
date: date(s.timestamp, ^timezone), date: date(s.timestamp, ^timezone),
@ -351,7 +351,7 @@ defmodule Plausible.Exports do
visitors_events_q = visitors_events_q =
from e in sampled("events_v2"), from e in sampled("events_v2"),
where: ^export_filter(site_id, date_range), where: ^export_filter(site.id, date_range),
group_by: selected_as(:date), group_by: selected_as(:date),
select: %{ select: %{
date: date(e.timestamp, ^timezone), date: date(e.timestamp, ^timezone),
@ -381,9 +381,9 @@ defmodule Plausible.Exports do
] ]
end end
defp export_sources_q(site_id, timezone, date_range) do defp export_sources_q(site, timezone, date_range) do
from s in sampled("sessions_v2"), from s in sampled("sessions_v2"),
where: ^export_filter(site_id, date_range), where: ^export_filter(site.id, date_range),
group_by: [ group_by: [
selected_as(:date), selected_as(:date),
selected_as(:source), selected_as(:source),
@ -412,10 +412,10 @@ defmodule Plausible.Exports do
] ]
end end
defp export_pages_q(site_id, timezone, date_range) do defp export_pages_q(site, timezone, date_range) do
base_q = base_q =
from(e in sampled("events_v2"), from(e in sampled("events_v2"),
where: ^export_filter(site_id, date_range), where: ^export_filter(site.id, date_range),
where: [name: "pageview"], where: [name: "pageview"],
group_by: [selected_as(:date), selected_as(:page)], group_by: [selected_as(:date), selected_as(:page)],
order_by: selected_as(:date) order_by: selected_as(:date)
@ -423,7 +423,7 @@ defmodule Plausible.Exports do
max_scroll_depth_per_session_q = max_scroll_depth_per_session_q =
from(e in "events_v2", from(e in "events_v2",
where: ^export_filter(site_id, date_range), where: ^export_filter(site.id, date_range),
where: e.name == "engagement" and e.scroll_depth <= 100, where: e.name == "engagement" and e.scroll_depth <= 100,
select: %{ select: %{
date: date(e.timestamp, ^timezone), date: date(e.timestamp, ^timezone),
@ -465,7 +465,7 @@ defmodule Plausible.Exports do
selected_as(fragment("any(?)", s.total_scroll_depth_visits), :total_scroll_depth_visits) selected_as(fragment("any(?)", s.total_scroll_depth_visits), :total_scroll_depth_visits)
} }
) )
|> add_time_on_page_columns(site_id, timezone, date_range) |> add_time_on_page_columns(site.id, timezone, date_range)
end end
defp add_time_on_page_columns(q, site_id, timezone, date_range) do defp add_time_on_page_columns(q, site_id, timezone, date_range) do
@ -508,9 +508,9 @@ defmodule Plausible.Exports do
end end
end end
defp export_entry_pages_q(site_id, timezone, date_range) do defp export_entry_pages_q(site, timezone, date_range) do
from s in sampled("sessions_v2"), from s in sampled("sessions_v2"),
where: ^export_filter(site_id, date_range), where: ^export_filter(site.id, date_range),
group_by: [selected_as(:date), s.entry_page], group_by: [selected_as(:date), s.entry_page],
order_by: selected_as(:date), order_by: selected_as(:date),
select: [ select: [
@ -527,9 +527,9 @@ defmodule Plausible.Exports do
] ]
end end
defp export_exit_pages_q(site_id, timezone, date_range) do defp export_exit_pages_q(site, timezone, date_range) do
from s in sampled("sessions_v2"), from s in sampled("sessions_v2"),
where: ^export_filter(site_id, date_range), where: ^export_filter(site.id, date_range),
group_by: [selected_as(:date), s.exit_page], group_by: [selected_as(:date), s.exit_page],
order_by: selected_as(:date), order_by: selected_as(:date),
select: [ select: [
@ -546,9 +546,9 @@ defmodule Plausible.Exports do
] ]
end end
defp export_custom_events_q(site_id, timezone, date_range) do defp export_custom_events_q(site, timezone, date_range) do
from e in sampled("events_v2"), from e in sampled("events_v2"),
where: ^export_filter(site_id, date_range), where: ^export_filter(site.id, date_range),
where: e.name != "pageview", where: e.name != "pageview",
group_by: [ group_by: [
selected_as(:date), selected_as(:date),
@ -583,9 +583,37 @@ defmodule Plausible.Exports do
] ]
end end
defp export_locations_q(site_id, timezone, date_range) do defp export_custom_props_q(site, timezone, date_range) do
query =
from e in sampled("events_v2"),
join: pv in fragment("arrayZip(`meta.key`, `meta.value`)"),
on: true,
hints: "ARRAY",
where: ^export_filter(site.id, date_range),
group_by: [
selected_as(:date),
selected_as(:property),
selected_as(:value)
],
order_by: selected_as(:date),
select: [
date(e.timestamp, ^timezone),
selected_as(fragment("tupleElement(?, 1)", pv), :property),
selected_as(fragment("tupleElement(?, 2)", pv), :value),
visitors(e),
selected_as(scale_sample(fragment("count()")), :events)
]
if Plausible.Billing.Feature.Props.enabled?(site) do
query
else
where(query, [], selected_as(:property) in ^Plausible.Props.internal_keys())
end
end
defp export_locations_q(site, timezone, date_range) do
from s in sampled("sessions_v2"), from s in sampled("sessions_v2"),
where: ^export_filter(site_id, date_range), where: ^export_filter(site.id, date_range),
where: s.country_code != "\0\0" and s.country_code != "ZZ", where: s.country_code != "\0\0" and s.country_code != "ZZ",
group_by: [selected_as(:date), s.country_code, s.subdivision1_code, s.city_geoname_id], group_by: [selected_as(:date), s.country_code, s.subdivision1_code, s.city_geoname_id],
order_by: selected_as(:date), order_by: selected_as(:date),
@ -602,9 +630,9 @@ defmodule Plausible.Exports do
] ]
end end
defp export_devices_q(site_id, timezone, date_range) do defp export_devices_q(site, timezone, date_range) do
from s in sampled("sessions_v2"), from s in sampled("sessions_v2"),
where: ^export_filter(site_id, date_range), where: ^export_filter(site.id, date_range),
group_by: [selected_as(:date), s.screen_size], group_by: [selected_as(:date), s.screen_size],
order_by: selected_as(:date), order_by: selected_as(:date),
select: [ select: [
@ -618,9 +646,9 @@ defmodule Plausible.Exports do
] ]
end end
defp export_browsers_q(site_id, timezone, date_range) do defp export_browsers_q(site, timezone, date_range) do
from s in sampled("sessions_v2"), from s in sampled("sessions_v2"),
where: ^export_filter(site_id, date_range), where: ^export_filter(site.id, date_range),
group_by: [selected_as(:date), s.browser, s.browser_version], group_by: [selected_as(:date), s.browser, s.browser_version],
order_by: selected_as(:date), order_by: selected_as(:date),
select: [ select: [
@ -635,9 +663,9 @@ defmodule Plausible.Exports do
] ]
end end
defp export_operating_systems_q(site_id, timezone, date_range) do defp export_operating_systems_q(site, timezone, date_range) do
from s in sampled("sessions_v2"), from s in sampled("sessions_v2"),
where: ^export_filter(site_id, date_range), where: ^export_filter(site.id, date_range),
group_by: [selected_as(:date), s.operating_system, s.operating_system_version], group_by: [selected_as(:date), s.operating_system, s.operating_system_version],
order_by: selected_as(:date), order_by: selected_as(:date),
select: [ select: [
@ -661,7 +689,7 @@ defmodule Plausible.Exports do
DBConnection.run(pool, fn conn -> DBConnection.run(pool, fn conn ->
conn conn
|> stream_archive(export_queries(_site_id = 1), format: "CSVWithNames") |> stream_archive(export_queries(site), format: "CSVWithNames")
|> Stream.into(File.stream!("export.zip")) |> Stream.into(File.stream!("export.zip"))
|> Stream.run() |> Stream.run()
end) end)

View File

@ -18,6 +18,8 @@ defmodule Plausible.Imported.CSVImporter do
@impl true @impl true
def parse_args(%{"uploads" => uploads, "storage" => storage}) do def parse_args(%{"uploads" => uploads, "storage" => storage}) do
uploads = Enum.reject(uploads, &String.starts_with?(&1["filename"], "imported_custom_props_"))
[uploads: uploads, storage: storage] [uploads: uploads, storage: storage]
end end
@ -208,6 +210,14 @@ defmodule Plausible.Imported.CSVImporter do
def date_range([_ | _] = uploads), do: date_range(uploads, _start_date = nil, _end_date = nil) def date_range([_ | _] = uploads), do: date_range(uploads, _start_date = nil, _end_date = nil)
def date_range([]), do: nil def date_range([]), do: nil
defp date_range(
[%{"filename" => "imported_custom_props_" <> _} | uploads],
prev_start_date,
prev_end_date
) do
date_range(uploads, prev_start_date, prev_end_date)
end
defp date_range([upload | uploads], prev_start_date, prev_end_date) do defp date_range([upload | uploads], prev_start_date, prev_end_date) do
filename = filename =
case upload do case upload do

View File

@ -33,7 +33,7 @@ defmodule Plausible.Workers.ExportAnalytics do
%Date.Range{} = date_range = Exports.date_range(site.id, site.timezone) %Date.Range{} = date_range = Exports.date_range(site.id, site.timezone)
queries = queries =
Exports.export_queries(site_id, Exports.export_queries(site,
date_range: date_range, date_range: date_range,
timezone: site.timezone, timezone: site.timezone,
extname: ".csv" extname: ".csv"

View File

@ -9,12 +9,13 @@ defmodule Plausible.ExportsTest do
setup [:create_user, :create_site] setup [:create_user, :create_site]
test "returns named ecto queries", %{site: site} do test "returns named ecto queries", %{site: site} do
queries = Plausible.Exports.export_queries(site.id) queries = Plausible.Exports.export_queries(site)
assert queries |> Map.values() |> Enum.all?(&match?(%Ecto.Query{}, &1)) assert queries |> Map.values() |> Enum.all?(&match?(%Ecto.Query{}, &1))
assert Map.keys(queries) == [ assert Map.keys(queries) == [
"imported_browsers.csv", "imported_browsers.csv",
"imported_custom_events.csv", "imported_custom_events.csv",
"imported_custom_props.csv",
"imported_devices.csv", "imported_devices.csv",
"imported_entry_pages.csv", "imported_entry_pages.csv",
"imported_exit_pages.csv", "imported_exit_pages.csv",
@ -28,13 +29,14 @@ defmodule Plausible.ExportsTest do
test "with date range", %{site: site} do test "with date range", %{site: site} do
queries = queries =
Plausible.Exports.export_queries(site.id, Plausible.Exports.export_queries(site,
date_range: Date.range(~D[2023-01-01], ~D[2024-03-12]) date_range: Date.range(~D[2023-01-01], ~D[2024-03-12])
) )
assert Map.keys(queries) == [ assert Map.keys(queries) == [
"imported_browsers_20230101_20240312.csv", "imported_browsers_20230101_20240312.csv",
"imported_custom_events_20230101_20240312.csv", "imported_custom_events_20230101_20240312.csv",
"imported_custom_props_20230101_20240312.csv",
"imported_devices_20230101_20240312.csv", "imported_devices_20230101_20240312.csv",
"imported_entry_pages_20230101_20240312.csv", "imported_entry_pages_20230101_20240312.csv",
"imported_exit_pages_20230101_20240312.csv", "imported_exit_pages_20230101_20240312.csv",
@ -47,11 +49,12 @@ defmodule Plausible.ExportsTest do
end end
test "with custom extension", %{site: site} do test "with custom extension", %{site: site} do
queries = Plausible.Exports.export_queries(site.id, extname: ".ch") queries = Plausible.Exports.export_queries(site, extname: ".ch")
assert Map.keys(queries) == [ assert Map.keys(queries) == [
"imported_browsers.ch", "imported_browsers.ch",
"imported_custom_events.ch", "imported_custom_events.ch",
"imported_custom_props.ch",
"imported_devices.ch", "imported_devices.ch",
"imported_entry_pages.ch", "imported_entry_pages.ch",
"imported_exit_pages.ch", "imported_exit_pages.ch",

View File

@ -608,7 +608,7 @@ defmodule Plausible.Imported.CSVImporterTest do
imported_site: imported_site imported_site: imported_site
} }
%{site_import: site_import} = %{site_import: site_import, exported_files: exported_files} =
initial_context initial_context
|> export_archive() |> export_archive()
|> assert_email_notification() |> assert_email_notification()
@ -617,6 +617,16 @@ defmodule Plausible.Imported.CSVImporterTest do
|> upload_csvs() |> upload_csvs()
|> run_import() |> run_import()
assert custom_props_export =
exported_files
|> Enum.find(&String.contains?(&1, "imported_custom_props_"))
|> File.read!()
|> String.split("\n")
assert ~s|"date","property","value","visitors","events"| in custom_props_export
assert ~s|"2024-04-01","author","Marko Saric",43,57| in custom_props_export
assert ~s|"2024-04-01","category","Posts",43,56| in custom_props_export
assert %SiteImport{ assert %SiteImport{
start_date: ~D[2024-04-01], start_date: ~D[2024-04-01],
end_date: ~D[2024-04-30], end_date: ~D[2024-04-30],