Stats: Rebuild session smearing for timeseries (#5694)
* Refactor table_decider#partition_metrics * Refactor query pipeline to return a list of subqueries after splitting * Move order_by out of join logic * Refactor joining logic in query_builder 1. JOIN type is now set in QueryOptimizer 2. JOIN logic is now table and list-size agnostic * Comment an edge case * Rebuild session/visit smearing Previously, whenever graphing any visit metric hourly/realtime, visit_duration and other visit metrics would be way higher than expected, due to long sessions dragging each bucket up and up. Now visits/visitors metrics are still smeared and other visit metrics are counted under last bucket user was active in. visits metric was also overcounted (see new tests). * Remove unneeded case * Unit test for smearing in tabledecider
This commit is contained in:
parent
bf24ae0cd2
commit
db448d7404
|
|
@ -27,6 +27,9 @@ All notable changes to this project will be documented in this file.
|
|||
- Make clicking Compare / Disable Comparison in period picker menu close the menu
|
||||
- Do not log page views for hidden pages (prerendered pages and new tabs), until pages are viewed
|
||||
- Password-authenticated shared links now carry over dashboard params properly
|
||||
- Realtime and hourly graphs of visit duration, views per visit no longer overcount due to long-lasting sessions, instead showing each visit
|
||||
when they occurred.
|
||||
- Fixed realtime and hourly graphs of visits overcounting
|
||||
|
||||
## v3.0.0 - 2025-04-11
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,9 @@ defmodule Plausible.Stats.Query do
|
|||
site_id: nil,
|
||||
site_native_stats_start_at: nil,
|
||||
# Contains information to determine how to combine legacy and new time on page metrics
|
||||
time_on_page_data: %{}
|
||||
time_on_page_data: %{},
|
||||
sql_join_type: :left,
|
||||
smear_session_metrics: false
|
||||
|
||||
require OpenTelemetry.Tracer, as: Tracer
|
||||
alias Plausible.Stats.{DateTimeRange, Filters, Imported, Legacy, Comparisons}
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ defmodule Plausible.Stats.QueryOptimizer do
|
|||
4. Updates event:hostname filters to also apply on visit level for sane results.
|
||||
5. Removes revenue metrics from dashboard queries if not requested, present or unavailable for the site.
|
||||
6. Trims the date range to the current time if query.include.trim_relative_date_range is true.
|
||||
7. Sets the join_type for the query based on the query.
|
||||
|
||||
"""
|
||||
def optimize(query) do
|
||||
|
|
@ -40,18 +41,12 @@ defmodule Plausible.Stats.QueryOptimizer do
|
|||
for sessions.
|
||||
"""
|
||||
def split(query) do
|
||||
{event_metrics, sessions_metrics, _other_metrics} =
|
||||
query.metrics
|
||||
|> Util.maybe_add_visitors_metric()
|
||||
|> TableDecider.partition_metrics(query)
|
||||
|
||||
{
|
||||
Query.set(query,
|
||||
metrics: event_metrics,
|
||||
include_imported: query.include_imported
|
||||
),
|
||||
split_sessions_query(query, sessions_metrics)
|
||||
}
|
||||
|> Enum.map(fn {table_type, metrics} ->
|
||||
build_split_query(table_type, metrics, query)
|
||||
end)
|
||||
end
|
||||
|
||||
defp pipeline() do
|
||||
|
|
@ -62,7 +57,8 @@ defmodule Plausible.Stats.QueryOptimizer do
|
|||
&extend_hostname_filters_to_visit/1,
|
||||
&remove_revenue_metrics_if_unavailable/1,
|
||||
&set_time_on_page_data/1,
|
||||
&trim_relative_date_range/1
|
||||
&trim_relative_date_range/1,
|
||||
&set_sql_join_type/1
|
||||
]
|
||||
end
|
||||
|
||||
|
|
@ -162,7 +158,17 @@ defmodule Plausible.Stats.QueryOptimizer do
|
|||
Enum.find(query.dimensions, &Time.time_dimension?/1)
|
||||
end
|
||||
|
||||
defp split_sessions_query(query, session_metrics) do
|
||||
defp build_split_query(:events, metrics, query) do
|
||||
{
|
||||
:events,
|
||||
Query.set(query,
|
||||
metrics: metrics,
|
||||
include_imported: query.include_imported
|
||||
)
|
||||
}
|
||||
end
|
||||
|
||||
defp build_split_query(:sessions, metrics, query) do
|
||||
dimensions =
|
||||
query.dimensions
|
||||
|> Enum.map(fn
|
||||
|
|
@ -179,12 +185,21 @@ defmodule Plausible.Stats.QueryOptimizer do
|
|||
query.filters
|
||||
end
|
||||
|
||||
{
|
||||
:sessions,
|
||||
Query.set(query,
|
||||
filters: filters,
|
||||
metrics: session_metrics,
|
||||
metrics: metrics,
|
||||
dimensions: dimensions,
|
||||
include_imported: query.include_imported
|
||||
)
|
||||
}
|
||||
end
|
||||
|
||||
defp build_split_query(:sessions_smeared, metrics, query) do
|
||||
{_, query} = build_split_query(:sessions, metrics, query)
|
||||
|
||||
{:sessions, Query.set(query, smear_session_metrics: true)}
|
||||
end
|
||||
|
||||
on_ee do
|
||||
|
|
@ -299,4 +314,19 @@ defmodule Plausible.Stats.QueryOptimizer do
|
|||
|> DateTimeRange.to_timezone("Etc/UTC")
|
||||
end
|
||||
end
|
||||
|
||||
# Normally we can always LEFT JOIN as this is more performant and tables
|
||||
# are expected to contain the same dimensions.
|
||||
|
||||
# The only exception is using the "time:minute" dimension where the sessions
|
||||
# subquery might return more rows than the events one. That's because we're
|
||||
# counting sessions in all time buckets they were active in even if no event
|
||||
# occurred during that particular minute.
|
||||
defp set_sql_join_type(query) do
|
||||
if "time:minute" in query.dimensions do
|
||||
Query.set(query, sql_join_type: :full)
|
||||
else
|
||||
query
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ defmodule Plausible.Stats.SQL.Expression do
|
|||
})
|
||||
end
|
||||
|
||||
def select_dimension(q, key, "time:hour", :sessions, query) do
|
||||
def select_dimension(q, key, "time:hour", :sessions, query) when query.smear_session_metrics do
|
||||
# :TRICKY: ClickHouse timeSlots works off of unix epoch and is not
|
||||
# timezone-aware. This means that for e.g. Asia/Katmandu (GMT+5:45)
|
||||
# to work, we divide time into 15-minute buckets and later combine these
|
||||
|
|
@ -87,7 +87,8 @@ defmodule Plausible.Stats.SQL.Expression do
|
|||
end
|
||||
|
||||
# :NOTE: This is not exposed in Query APIv2
|
||||
def select_dimension(q, key, "time:minute", :sessions, query) do
|
||||
def select_dimension(q, key, "time:minute", :sessions, query)
|
||||
when query.smear_session_metrics do
|
||||
q
|
||||
|> join(:inner, [s], time_slot in time_slots(query, 60),
|
||||
as: :time_slot,
|
||||
|
|
@ -338,6 +339,12 @@ defmodule Plausible.Stats.SQL.Expression do
|
|||
})
|
||||
end
|
||||
|
||||
def session_metric(:visits, query) when query.smear_session_metrics do
|
||||
wrap_alias([s], %{
|
||||
visits: scale_sample(fragment("uniq(?)", s.session_id))
|
||||
})
|
||||
end
|
||||
|
||||
def session_metric(:visits, _query) do
|
||||
wrap_alias([s], %{
|
||||
visits: scale_sample(fragment("greatest(sum(?), 0)", s.sign))
|
||||
|
|
|
|||
|
|
@ -15,15 +15,14 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
|
|||
require Plausible.Stats.SQL.Expression
|
||||
|
||||
def build(query, site) do
|
||||
{event_query, sessions_query} = QueryOptimizer.split(query)
|
||||
|
||||
event_q = build_events_query(site, event_query)
|
||||
sessions_q = build_sessions_query(site, sessions_query)
|
||||
|
||||
join_query_results(
|
||||
{event_q, event_query},
|
||||
{sessions_q, sessions_query}
|
||||
)
|
||||
query
|
||||
|> QueryOptimizer.split()
|
||||
|> Enum.map(fn {table_type, table_query} ->
|
||||
q = build_table_query(table_type, site, table_query)
|
||||
{table_type, table_query, q}
|
||||
end)
|
||||
|> join_query_results(query)
|
||||
|> build_order_by(query)
|
||||
|> paginate(query.pagination)
|
||||
|> select_total_rows(query.include.total_rows)
|
||||
end
|
||||
|
|
@ -32,9 +31,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
|
|||
Enum.reduce(query.order_by || [], q, &build_order_by(&2, query, &1))
|
||||
end
|
||||
|
||||
defp build_events_query(_site, %Query{metrics: []}), do: nil
|
||||
|
||||
defp build_events_query(site, events_query) do
|
||||
defp build_table_query(:events, site, events_query) do
|
||||
q =
|
||||
from(
|
||||
e in "events_v2",
|
||||
|
|
@ -54,6 +51,25 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
|
|||
|> TimeOnPage.merge_legacy_time_on_page(events_query)
|
||||
end
|
||||
|
||||
defp build_table_query(:sessions, site, sessions_query) do
|
||||
q =
|
||||
from(
|
||||
e in "sessions_v2",
|
||||
where: ^SQL.WhereBuilder.build(:sessions, sessions_query),
|
||||
select: ^select_session_metrics(sessions_query)
|
||||
)
|
||||
|
||||
on_ee do
|
||||
q = Plausible.Stats.Sampling.add_query_hint(q, sessions_query)
|
||||
end
|
||||
|
||||
q
|
||||
|> join_events_if_needed(sessions_query)
|
||||
|> build_group_by(:sessions, sessions_query)
|
||||
|> merge_imported(site, sessions_query)
|
||||
|> SQL.SpecialMetrics.add(site, sessions_query)
|
||||
end
|
||||
|
||||
defp join_sessions_if_needed(q, query) do
|
||||
if TableDecider.events_join_sessions?(query) do
|
||||
sessions_q =
|
||||
|
|
@ -79,27 +95,6 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
|
|||
end
|
||||
end
|
||||
|
||||
defp build_sessions_query(_site, %Query{metrics: []}), do: nil
|
||||
|
||||
defp build_sessions_query(site, sessions_query) do
|
||||
q =
|
||||
from(
|
||||
e in "sessions_v2",
|
||||
where: ^SQL.WhereBuilder.build(:sessions, sessions_query),
|
||||
select: ^select_session_metrics(sessions_query)
|
||||
)
|
||||
|
||||
on_ee do
|
||||
q = Plausible.Stats.Sampling.add_query_hint(q, sessions_query)
|
||||
end
|
||||
|
||||
q
|
||||
|> join_events_if_needed(sessions_query)
|
||||
|> build_group_by(:sessions, sessions_query)
|
||||
|> merge_imported(site, sessions_query)
|
||||
|> SQL.SpecialMetrics.add(site, sessions_query)
|
||||
end
|
||||
|
||||
def join_events_if_needed(q, query) do
|
||||
if TableDecider.sessions_join_events?(query) do
|
||||
events_q =
|
||||
|
|
@ -173,24 +168,24 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
|
|||
)
|
||||
end
|
||||
|
||||
defp join_query_results({nil, _}, {nil, _}), do: nil
|
||||
# Only one table is being queried - skip joining!
|
||||
defp join_query_results([{_table_type, _query, q}], _main_query), do: q
|
||||
|
||||
defp join_query_results({events_q, events_query}, {nil, _}),
|
||||
do: events_q |> build_order_by(events_query)
|
||||
# Multiple tables: join results based on dimensions, select metrics from each and the appropriate dimensions.
|
||||
defp join_query_results(queries, main_query) do
|
||||
queries
|
||||
|> Enum.reduce(nil, fn
|
||||
{_table_type, query, q}, nil ->
|
||||
from(e in subquery(q))
|
||||
|> select_join_metrics(query, query.metrics)
|
||||
|
||||
defp join_query_results({nil, events_query}, {sessions_q, _}),
|
||||
do: sessions_q |> build_order_by(events_query)
|
||||
|
||||
defp join_query_results({events_q, events_query}, {sessions_q, sessions_query}) do
|
||||
{join_type, events_q_fields, sessions_q_fields} =
|
||||
TableDecider.join_options(events_query, sessions_query)
|
||||
|
||||
join(subquery(events_q), join_type, [e], s in subquery(sessions_q),
|
||||
on: ^build_group_by_join(events_query)
|
||||
{_table_type, query, q}, acc ->
|
||||
join(acc, main_query.sql_join_type, [], s in subquery(q),
|
||||
on: ^build_group_by_join(main_query)
|
||||
)
|
||||
|> select_join_fields(events_query, events_q_fields, e)
|
||||
|> select_join_fields(sessions_query, sessions_q_fields, s)
|
||||
|> build_order_by(events_query)
|
||||
|> select_join_metrics(query, query.metrics -- [:sample_percent])
|
||||
end)
|
||||
|> select_dimensions(main_query)
|
||||
end
|
||||
|
||||
# NOTE: Old queries do their own pagination
|
||||
|
|
@ -214,8 +209,33 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
|
|||
def build_group_by_join(query) do
|
||||
query.dimensions
|
||||
|> Enum.map(fn dim ->
|
||||
dynamic([e, s], field(e, ^shortname(query, dim)) == field(s, ^shortname(query, dim)))
|
||||
dynamic([a, ..., b], field(a, ^shortname(query, dim)) == field(b, ^shortname(query, dim)))
|
||||
end)
|
||||
|> Enum.reduce(fn condition, acc -> dynamic([], ^acc and ^condition) end)
|
||||
end
|
||||
|
||||
defp select_join_metrics(q, query, metrics) do
|
||||
Enum.reduce(metrics, q, fn
|
||||
metric, q ->
|
||||
select_merge_as(q, [..., x], %{
|
||||
shortname(query, metric) => field(x, ^shortname(query, metric))
|
||||
})
|
||||
end)
|
||||
end
|
||||
|
||||
defp select_dimensions(q, query) do
|
||||
Enum.reduce(query.dimensions, q, fn dimension, q ->
|
||||
# We generally select dimensions from the left-most table. Only exception is time:minute where
|
||||
# we use sessions table as sessions are considered on-going during the whole period.
|
||||
if query.sql_join_type == :full and "time:minute" == dimension do
|
||||
select_merge_as(q, [..., x], %{
|
||||
shortname(query, dimension) => field(x, ^shortname(query, dimension))
|
||||
})
|
||||
else
|
||||
select_merge_as(q, [x], %{
|
||||
shortname(query, dimension) => field(x, ^shortname(query, dimension))
|
||||
})
|
||||
end
|
||||
end)
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -54,84 +54,73 @@ defmodule Plausible.Stats.TableDecider do
|
|||
end
|
||||
end
|
||||
|
||||
@doc """
|
||||
Returns a three-element tuple with instructions on how to join two Ecto
|
||||
queries. The arguments (`events_query` and `sessions_query`) are `%Query{}`
|
||||
structs that have been split by TableDecider already.
|
||||
@type table_type() :: :events | :sessions
|
||||
@type metric() :: String.t()
|
||||
|
||||
Normally we can always LEFT JOIN sessions to events, selecting `dimensions`
|
||||
only from the events subquery. That's because:
|
||||
@spec partition_metrics(list(metric()), Query.t()) :: list({table_type(), list(metric())})
|
||||
def partition_metrics(requested_metrics, query) do
|
||||
metrics = partition(requested_metrics, query, &metric_partitioner/2)
|
||||
|
||||
1) session dimensions (e.g. entry_page) cannot be queried alongside event
|
||||
metrics/dimensions, or
|
||||
|
||||
2) session dimensions (e.g. operating_system) are also available in the
|
||||
events table.
|
||||
|
||||
The only exception is using the "time:minute" dimension where the sessions
|
||||
subquery might return more rows than the events one. That's because we're
|
||||
counting sessions in all time buckets they were active in.
|
||||
"""
|
||||
def join_options(events_query, sessions_query) do
|
||||
events_q_select_fields = events_query.metrics ++ events_query.dimensions
|
||||
sessions_q_select_fields = sessions_query.metrics -- [:sample_percent]
|
||||
|
||||
if "time:minute" in events_query.dimensions do
|
||||
{
|
||||
:full,
|
||||
events_q_select_fields -- ["time:minute"],
|
||||
sessions_q_select_fields ++ ["time:minute"]
|
||||
}
|
||||
else
|
||||
{:left, events_q_select_fields, sessions_q_select_fields}
|
||||
end
|
||||
end
|
||||
|
||||
def partition_metrics(metrics, query) do
|
||||
%{
|
||||
event: event_only_metrics,
|
||||
session: session_only_metrics,
|
||||
either: either_metrics,
|
||||
other: other_metrics,
|
||||
sample_percent: sample_percent
|
||||
} =
|
||||
partition(metrics, query, &metric_partitioner/2)
|
||||
|
||||
%{event: event_only_filters, session: session_only_filters} =
|
||||
filters =
|
||||
query.filters
|
||||
|> dimensions_used_in_filters()
|
||||
|> partition(query, &dimension_partitioner/2)
|
||||
|
||||
%{event: event_only_dimensions, session: session_only_dimensions} =
|
||||
partition(query.dimensions, query, &dimension_partitioner/2)
|
||||
dimensions = partition(query.dimensions, query, &dimension_partitioner/2)
|
||||
|
||||
cond do
|
||||
# Only one table needs to be queried
|
||||
empty?(event_only_metrics) && empty?(event_only_filters) && empty?(event_only_dimensions) ->
|
||||
{[], session_only_metrics ++ either_metrics ++ sample_percent, other_metrics}
|
||||
empty?(metrics.event) && empty?(filters.event) && empty?(dimensions.event) ->
|
||||
[sessions: metrics.session ++ metrics.either ++ metrics.sample_percent]
|
||||
|
||||
empty?(session_only_metrics) && empty?(session_only_filters) &&
|
||||
empty?(session_only_dimensions) ->
|
||||
{event_only_metrics ++ either_metrics ++ sample_percent, [], other_metrics}
|
||||
empty?(metrics.session) && empty?(filters.session) && empty?(dimensions.session) ->
|
||||
[events: metrics.event ++ metrics.either ++ metrics.sample_percent]
|
||||
|
||||
# Filters and/or dimensions on both events and sessions, but only one kind of metric
|
||||
empty?(event_only_metrics) && empty?(event_only_dimensions) ->
|
||||
{[], session_only_metrics ++ either_metrics ++ sample_percent, other_metrics}
|
||||
empty?(metrics.event) && empty?(dimensions.event) ->
|
||||
[sessions: metrics.session ++ metrics.either ++ metrics.sample_percent]
|
||||
|
||||
empty?(session_only_metrics) && empty?(session_only_dimensions) ->
|
||||
{event_only_metrics ++ either_metrics ++ sample_percent, [], other_metrics}
|
||||
empty?(metrics.session) && empty?(dimensions.session) ->
|
||||
[events: metrics.event ++ metrics.either ++ metrics.sample_percent]
|
||||
|
||||
# Default: prefer events
|
||||
true ->
|
||||
{event_only_metrics ++ either_metrics ++ sample_percent,
|
||||
session_only_metrics ++ sample_percent, other_metrics}
|
||||
[
|
||||
events: metrics.event ++ metrics.either ++ metrics.sample_percent,
|
||||
sessions: metrics.session ++ metrics.sample_percent
|
||||
]
|
||||
end
|
||||
|> Enum.flat_map(&smear_session_metrics(&1, query))
|
||||
|> Enum.reject(fn {_table_type, metrics} -> empty?(metrics) end)
|
||||
end
|
||||
|
||||
# :TRICKY: When counting session metrics, we want to count each visit/visitor across
|
||||
# the length of the session, not just when events occurred or when session started.
|
||||
# For this reason, we smear the session metrics across the length of the session.
|
||||
# See `time_slots` usage in `Plausible.Stats.SQL.Expression` to understand how this is done.
|
||||
@smearable_metrics [:visitors, :visits]
|
||||
defp smear_session_metrics({:sessions, metrics} = value, query) do
|
||||
if "time:minute" in query.dimensions or "time:hour" in query.dimensions do
|
||||
# Split metrics into two groups: one with visitors and visits, and the remaining ones
|
||||
{smearable_metrics, session_metrics} = Enum.split_with(metrics, &(&1 in @smearable_metrics))
|
||||
|
||||
[
|
||||
{:sessions, session_metrics},
|
||||
{:sessions_smeared, smearable_metrics}
|
||||
]
|
||||
else
|
||||
[value]
|
||||
end
|
||||
end
|
||||
|
||||
defp smear_session_metrics(value, _query), do: [value]
|
||||
|
||||
# Note: This is inaccurate when filtering but required for old backwards compatibility
|
||||
defp metric_partitioner(%Query{legacy_breakdown: true}, :pageviews), do: :either
|
||||
defp metric_partitioner(%Query{legacy_breakdown: true}, :events), do: :either
|
||||
|
||||
# :TRICKY: For time:minute dimension we prefer sessions over events as there
|
||||
# might be minutes where no events occurred but the session was active.
|
||||
defp metric_partitioner(query, metric) when metric in [:visitors, :visits] do
|
||||
if "time:minute" in query.dimensions, do: :session, else: :either
|
||||
end
|
||||
|
|
|
|||
|
|
@ -363,4 +363,14 @@ defmodule Plausible.Stats.QueryOptimizerTest do
|
|||
assert result.utc_time_range.last == nyc_mar_15_end
|
||||
end
|
||||
end
|
||||
|
||||
describe "set_sql_join_type" do
|
||||
test "updates sql_join_type to :full if time:minute dimension is present" do
|
||||
assert perform(%{dimensions: ["time:minute"]}).sql_join_type == :full
|
||||
end
|
||||
|
||||
test "keeps default sql_join_type otherwise" do
|
||||
assert perform(%{dimensions: ["time:hour"]}).sql_join_type == :left
|
||||
end
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -28,127 +28,164 @@ defmodule Plausible.Stats.TableDeciderTest do
|
|||
test "with no metrics or filters" do
|
||||
query = make_query([])
|
||||
|
||||
assert partition_metrics([], query) == {[], [], []}
|
||||
assert partition_metrics([], query) == []
|
||||
end
|
||||
|
||||
test "session-only metrics accordingly" do
|
||||
query = make_query([])
|
||||
|
||||
assert partition_metrics([:bounce_rate, :views_per_visit], query) ==
|
||||
{[], [:bounce_rate, :views_per_visit], []}
|
||||
assert partition_metrics([:bounce_rate, :views_per_visit], query) == [
|
||||
sessions: [:bounce_rate, :views_per_visit]
|
||||
]
|
||||
end
|
||||
|
||||
test "event-only metrics accordingly" do
|
||||
query = make_query([])
|
||||
|
||||
assert partition_metrics([:total_revenue, :visitors], query) ==
|
||||
{[:total_revenue, :visitors], [], []}
|
||||
assert partition_metrics([:total_revenue, :visitors], query) == [
|
||||
events: [:total_revenue, :visitors]
|
||||
]
|
||||
end
|
||||
|
||||
test "filters from both, event-only metrics" do
|
||||
query = make_query(["event:name", "visit:source"])
|
||||
|
||||
assert partition_metrics([:total_revenue], query) == {[:total_revenue], [], []}
|
||||
assert partition_metrics([:total_revenue], query) == [events: [:total_revenue]]
|
||||
end
|
||||
|
||||
test "filters from both, session-only metrics" do
|
||||
query = make_query(["event:name", "visit:source"])
|
||||
|
||||
assert partition_metrics([:bounce_rate], query) == {[], [:bounce_rate], []}
|
||||
assert partition_metrics([:bounce_rate], query) == [sessions: [:bounce_rate]]
|
||||
end
|
||||
|
||||
test "session filters but no session metrics" do
|
||||
query = make_query(["visit:source"])
|
||||
|
||||
assert partition_metrics([:total_revenue], query) == {[:total_revenue], [], []}
|
||||
assert partition_metrics([:total_revenue], query) == [events: [:total_revenue]]
|
||||
end
|
||||
|
||||
test "sample_percent is added to both types of metrics" do
|
||||
query = make_query([])
|
||||
|
||||
assert partition_metrics([:total_revenue, :sample_percent], query) ==
|
||||
{[:total_revenue, :sample_percent], [], []}
|
||||
assert partition_metrics([:total_revenue, :sample_percent], query) == [
|
||||
events: [:total_revenue, :sample_percent]
|
||||
]
|
||||
|
||||
assert partition_metrics([:bounce_rate, :sample_percent], query) ==
|
||||
{[], [:bounce_rate, :sample_percent], []}
|
||||
assert partition_metrics([:bounce_rate, :sample_percent], query) == [
|
||||
sessions: [:bounce_rate, :sample_percent]
|
||||
]
|
||||
|
||||
assert partition_metrics([:total_revenue, :bounce_rate, :sample_percent], query) ==
|
||||
{[:total_revenue, :sample_percent], [:bounce_rate, :sample_percent], []}
|
||||
assert partition_metrics([:total_revenue, :bounce_rate, :sample_percent], query) == [
|
||||
events: [:total_revenue, :sample_percent],
|
||||
sessions: [:bounce_rate, :sample_percent]
|
||||
]
|
||||
end
|
||||
|
||||
test "other metrics put in its own result" do
|
||||
test "other metrics get ignored" do
|
||||
query = make_query([])
|
||||
|
||||
assert partition_metrics([:percentage, :total_visitors], query) ==
|
||||
{[], [:percentage], [:total_visitors]}
|
||||
assert partition_metrics([:percentage, :total_visitors], query) == [sessions: [:percentage]]
|
||||
end
|
||||
|
||||
test "metrics that can be calculated on either when event-only metrics" do
|
||||
query = make_query([])
|
||||
|
||||
assert partition_metrics([:total_revenue, :visitors], query) ==
|
||||
{[:total_revenue, :visitors], [], []}
|
||||
assert partition_metrics([:total_revenue, :visitors], query) == [
|
||||
events: [:total_revenue, :visitors]
|
||||
]
|
||||
|
||||
assert partition_metrics([:pageviews, :visits], query) == {[:pageviews, :visits], [], []}
|
||||
assert partition_metrics([:pageviews, :visits], query) == [events: [:pageviews, :visits]]
|
||||
end
|
||||
|
||||
test "metrics that can be calculated on either when session-only metrics" do
|
||||
query = make_query([])
|
||||
|
||||
assert partition_metrics([:bounce_rate, :visitors], query) ==
|
||||
{[], [:bounce_rate, :visitors], []}
|
||||
assert partition_metrics([:bounce_rate, :visitors], query) == [
|
||||
sessions: [:bounce_rate, :visitors]
|
||||
]
|
||||
|
||||
assert partition_metrics([:visit_duration, :visits], query) ==
|
||||
{[], [:visit_duration, :visits], []}
|
||||
assert partition_metrics([:visit_duration, :visits], query) == [
|
||||
sessions: [:visit_duration, :visits]
|
||||
]
|
||||
end
|
||||
|
||||
test "metrics that can be calculated on either are biased to events" do
|
||||
query = make_query([])
|
||||
|
||||
assert partition_metrics([:bounce_rate, :total_revenue, :visitors], query) ==
|
||||
{[:total_revenue, :visitors], [:bounce_rate], []}
|
||||
assert partition_metrics([:bounce_rate, :total_revenue, :visitors], query) == [
|
||||
events: [:total_revenue, :visitors],
|
||||
sessions: [:bounce_rate]
|
||||
]
|
||||
end
|
||||
|
||||
test "sample_percent is handled with either metrics" do
|
||||
query = make_query([])
|
||||
|
||||
assert partition_metrics([:visitors, :sample_percent], query) ==
|
||||
{[], [:visitors, :sample_percent], []}
|
||||
assert partition_metrics([:visitors, :sample_percent], query) == [
|
||||
sessions: [:visitors, :sample_percent]
|
||||
]
|
||||
end
|
||||
|
||||
test "metric can be calculated on either, but filtering on events" do
|
||||
query = make_query(["event:name"])
|
||||
|
||||
assert partition_metrics([:visitors], query) == {[:visitors], [], []}
|
||||
assert partition_metrics([:visitors], query) == [events: [:visitors]]
|
||||
end
|
||||
|
||||
test "metric can be calculated on either, but filtering on events and sessions" do
|
||||
query = make_query(["event:name", "visit:exit_page"])
|
||||
|
||||
assert partition_metrics([:visitors], query) == {[], [:visitors], []}
|
||||
assert partition_metrics([:visitors], query) == [sessions: [:visitors]]
|
||||
end
|
||||
|
||||
test "metric can be calculated on either, filtering on either" do
|
||||
query = make_query(["visit:source"])
|
||||
|
||||
assert partition_metrics([:visitors], query) == {[], [:visitors], []}
|
||||
assert partition_metrics([:visitors], query) == [sessions: [:visitors]]
|
||||
end
|
||||
|
||||
test "metric can be calculated on either, filtering on sessions" do
|
||||
query = make_query(["visit:exit_page"])
|
||||
|
||||
assert partition_metrics([:visitors], query) == {[], [:visitors], []}
|
||||
assert partition_metrics([:visitors], query) == [sessions: [:visitors]]
|
||||
end
|
||||
|
||||
test "query dimensions lean metric" do
|
||||
assert partition_metrics([:visitors], make_query([], ["event:name"])) ==
|
||||
{[:visitors], [], []}
|
||||
assert partition_metrics([:visitors], make_query([], ["event:name"])) == [
|
||||
events: [:visitors]
|
||||
]
|
||||
|
||||
assert partition_metrics([:visitors], make_query([], ["visit:source"])) ==
|
||||
{[], [:visitors], []}
|
||||
assert partition_metrics([:visitors], make_query([], ["visit:source"])) == [
|
||||
sessions: [:visitors]
|
||||
]
|
||||
|
||||
assert partition_metrics([:visitors], make_query([], ["visit:exit_page"])) ==
|
||||
{[], [:visitors], []}
|
||||
assert partition_metrics([:visitors], make_query([], ["visit:exit_page"])) == [
|
||||
sessions: [:visitors]
|
||||
]
|
||||
end
|
||||
|
||||
test "smearable metrics" do
|
||||
assert partition_metrics(
|
||||
[:visitors, :visits, :visit_duration, :pageviews],
|
||||
make_query([], ["time:minute"])
|
||||
) == [
|
||||
events: [:pageviews],
|
||||
sessions: [:visit_duration],
|
||||
sessions_smeared: [:visitors, :visits]
|
||||
]
|
||||
|
||||
assert partition_metrics([:visitors], make_query([], ["time:hour"])) == [
|
||||
sessions_smeared: [:visitors]
|
||||
]
|
||||
|
||||
assert partition_metrics([:visitors], make_query([], ["time:day"])) == [
|
||||
sessions: [:visitors]
|
||||
]
|
||||
|
||||
assert partition_metrics([:visitors], make_query([], [])) == [
|
||||
sessions: [:visitors]
|
||||
]
|
||||
end
|
||||
end
|
||||
|
||||
|
|
|
|||
|
|
@ -1681,6 +1681,76 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do
|
|||
%{"dimensions" => ["2021-01-02 12:00:00"], "metrics" => [2]}
|
||||
]
|
||||
end
|
||||
|
||||
test "visitors and visits are smeared across time:minute buckets but visit_duration is not",
|
||||
%{conn: conn, site: site} do
|
||||
populate_stats(site, [
|
||||
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 00:00:00]),
|
||||
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 00:10:00]),
|
||||
build(:pageview, user_id: 2, timestamp: ~N[2021-01-01 00:05:00]),
|
||||
build(:pageview, user_id: 2, timestamp: ~N[2021-01-01 00:08:00])
|
||||
])
|
||||
|
||||
conn =
|
||||
post(conn, "/api/v2/query-internal-test", %{
|
||||
"site_id" => site.domain,
|
||||
"metrics" => ["visitors", "visits", "visit_duration", "pageviews"],
|
||||
"date_range" => ["2021-01-01T00:00:00Z", "2021-01-01T00:30:00Z"],
|
||||
"dimensions" => ["time:minute"]
|
||||
})
|
||||
|
||||
assert json_response(conn, 200)["results"] == [
|
||||
%{"dimensions" => ["2021-01-01 00:00:00"], "metrics" => [1, 1, 0, 1]},
|
||||
%{"dimensions" => ["2021-01-01 00:01:00"], "metrics" => [1, 1, 0, 0]},
|
||||
%{"dimensions" => ["2021-01-01 00:02:00"], "metrics" => [1, 1, 0, 0]},
|
||||
%{"dimensions" => ["2021-01-01 00:03:00"], "metrics" => [1, 1, 0, 0]},
|
||||
%{"dimensions" => ["2021-01-01 00:04:00"], "metrics" => [1, 1, 0, 0]},
|
||||
%{"dimensions" => ["2021-01-01 00:05:00"], "metrics" => [2, 2, 0, 1]},
|
||||
%{"dimensions" => ["2021-01-01 00:06:00"], "metrics" => [2, 2, 0, 0]},
|
||||
%{"dimensions" => ["2021-01-01 00:07:00"], "metrics" => [2, 2, 0, 0]},
|
||||
%{"dimensions" => ["2021-01-01 00:08:00"], "metrics" => [2, 2, 180, 1]},
|
||||
%{"dimensions" => ["2021-01-01 00:09:00"], "metrics" => [1, 1, 0, 0]},
|
||||
%{"dimensions" => ["2021-01-01 00:10:00"], "metrics" => [1, 1, 600, 1]}
|
||||
]
|
||||
end
|
||||
|
||||
test "visitors and visits are smeared across time:hour buckets but visit_duration is not", %{
|
||||
conn: conn,
|
||||
site: site
|
||||
} do
|
||||
populate_stats(site, [
|
||||
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 00:00:00]),
|
||||
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 00:20:00]),
|
||||
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 00:40:00]),
|
||||
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 01:00:00]),
|
||||
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 01:20:00]),
|
||||
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 01:40:00]),
|
||||
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 02:00:00]),
|
||||
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 02:20:00]),
|
||||
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 02:40:00]),
|
||||
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 03:00:00]),
|
||||
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 03:20:00]),
|
||||
build(:pageview, user_id: 2, timestamp: ~N[2021-01-01 01:05:00]),
|
||||
build(:pageview, user_id: 2, timestamp: ~N[2021-01-01 01:10:00]),
|
||||
build(:pageview, user_id: 3, timestamp: ~N[2021-01-01 02:10:00]),
|
||||
build(:pageview, user_id: 3, timestamp: ~N[2021-01-01 02:20:00])
|
||||
])
|
||||
|
||||
conn =
|
||||
post(conn, "/api/v2/query", %{
|
||||
"site_id" => site.domain,
|
||||
"metrics" => ["visits", "visitors", "visit_duration", "pageviews"],
|
||||
"date_range" => ["2021-01-01", "2021-01-01"],
|
||||
"dimensions" => ["time:hour"]
|
||||
})
|
||||
|
||||
assert json_response(conn, 200)["results"] == [
|
||||
%{"dimensions" => ["2021-01-01 00:00:00"], "metrics" => [1, 1, 0, 3]},
|
||||
%{"dimensions" => ["2021-01-01 01:00:00"], "metrics" => [2, 2, 300, 5]},
|
||||
%{"dimensions" => ["2021-01-01 02:00:00"], "metrics" => [2, 2, 600, 5]},
|
||||
%{"dimensions" => ["2021-01-01 03:00:00"], "metrics" => [1, 1, 12_000, 2]}
|
||||
]
|
||||
end
|
||||
end
|
||||
|
||||
test "breakdown by visit:source", %{conn: conn, site: site} do
|
||||
|
|
|
|||
Loading…
Reference in New Issue