Stats: Rebuild session smearing for timeseries (#5694)

* Refactor table_decider#partition_metrics

* Refactor query pipeline to return a list of subqueries after splitting

* Move order_by out of join logic

* Refactor joining logic in query_builder

1. JOIN type is now set in QueryOptimizer
2. JOIN logic is now table and list-size agnostic

* Comment an edge case

* Rebuild session/visit smearing

Previously, whenever graphing any visit metric hourly/realtime, visit_duration and other
visit metrics would be way higher than expected, due to long sessions
dragging each bucket up and up. Now visits/visitors metrics are still
smeared and other visit metrics are counted under last bucket user was
active in.

visits metric was also overcounted (see new tests).

* Remove unneeded case

* Unit test for smearing in tabledecider
This commit is contained in:
Karl-Aksel Puulmann 2025-09-08 09:21:12 +03:00 committed by GitHub
parent bf24ae0cd2
commit db448d7404
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 334 additions and 166 deletions

View File

@ -27,6 +27,9 @@ All notable changes to this project will be documented in this file.
- Make clicking Compare / Disable Comparison in period picker menu close the menu
- Do not log page views for hidden pages (prerendered pages and new tabs), until pages are viewed
- Password-authenticated shared links now carry over dashboard params properly
- Realtime and hourly graphs of visit duration, views per visit no longer overcount due to long-lasting sessions, instead showing each visit
when they occurred.
- Fixed realtime and hourly graphs of visits overcounting
## v3.0.0 - 2025-04-11

View File

@ -28,7 +28,9 @@ defmodule Plausible.Stats.Query do
site_id: nil,
site_native_stats_start_at: nil,
# Contains information to determine how to combine legacy and new time on page metrics
time_on_page_data: %{}
time_on_page_data: %{},
sql_join_type: :left,
smear_session_metrics: false
require OpenTelemetry.Tracer, as: Tracer
alias Plausible.Stats.{DateTimeRange, Filters, Imported, Legacy, Comparisons}

View File

@ -26,6 +26,7 @@ defmodule Plausible.Stats.QueryOptimizer do
4. Updates event:hostname filters to also apply on visit level for sane results.
5. Removes revenue metrics from dashboard queries if not requested, present or unavailable for the site.
6. Trims the date range to the current time if query.include.trim_relative_date_range is true.
7. Sets the join_type for the query based on the query.
"""
def optimize(query) do
@ -40,18 +41,12 @@ defmodule Plausible.Stats.QueryOptimizer do
for sessions.
"""
def split(query) do
{event_metrics, sessions_metrics, _other_metrics} =
query.metrics
|> Util.maybe_add_visitors_metric()
|> TableDecider.partition_metrics(query)
{
Query.set(query,
metrics: event_metrics,
include_imported: query.include_imported
),
split_sessions_query(query, sessions_metrics)
}
query.metrics
|> Util.maybe_add_visitors_metric()
|> TableDecider.partition_metrics(query)
|> Enum.map(fn {table_type, metrics} ->
build_split_query(table_type, metrics, query)
end)
end
defp pipeline() do
@ -62,7 +57,8 @@ defmodule Plausible.Stats.QueryOptimizer do
&extend_hostname_filters_to_visit/1,
&remove_revenue_metrics_if_unavailable/1,
&set_time_on_page_data/1,
&trim_relative_date_range/1
&trim_relative_date_range/1,
&set_sql_join_type/1
]
end
@ -162,7 +158,17 @@ defmodule Plausible.Stats.QueryOptimizer do
Enum.find(query.dimensions, &Time.time_dimension?/1)
end
defp split_sessions_query(query, session_metrics) do
defp build_split_query(:events, metrics, query) do
{
:events,
Query.set(query,
metrics: metrics,
include_imported: query.include_imported
)
}
end
defp build_split_query(:sessions, metrics, query) do
dimensions =
query.dimensions
|> Enum.map(fn
@ -179,12 +185,21 @@ defmodule Plausible.Stats.QueryOptimizer do
query.filters
end
Query.set(query,
filters: filters,
metrics: session_metrics,
dimensions: dimensions,
include_imported: query.include_imported
)
{
:sessions,
Query.set(query,
filters: filters,
metrics: metrics,
dimensions: dimensions,
include_imported: query.include_imported
)
}
end
defp build_split_query(:sessions_smeared, metrics, query) do
{_, query} = build_split_query(:sessions, metrics, query)
{:sessions, Query.set(query, smear_session_metrics: true)}
end
on_ee do
@ -299,4 +314,19 @@ defmodule Plausible.Stats.QueryOptimizer do
|> DateTimeRange.to_timezone("Etc/UTC")
end
end
# Normally we can always LEFT JOIN as this is more performant and tables
# are expected to contain the same dimensions.
# The only exception is using the "time:minute" dimension where the sessions
# subquery might return more rows than the events one. That's because we're
# counting sessions in all time buckets they were active in even if no event
# occurred during that particular minute.
defp set_sql_join_type(query) do
if "time:minute" in query.dimensions do
Query.set(query, sql_join_type: :full)
else
query
end
end
end

View File

@ -64,7 +64,7 @@ defmodule Plausible.Stats.SQL.Expression do
})
end
def select_dimension(q, key, "time:hour", :sessions, query) do
def select_dimension(q, key, "time:hour", :sessions, query) when query.smear_session_metrics do
# :TRICKY: ClickHouse timeSlots works off of unix epoch and is not
# timezone-aware. This means that for e.g. Asia/Katmandu (GMT+5:45)
# to work, we divide time into 15-minute buckets and later combine these
@ -87,7 +87,8 @@ defmodule Plausible.Stats.SQL.Expression do
end
# :NOTE: This is not exposed in Query APIv2
def select_dimension(q, key, "time:minute", :sessions, query) do
def select_dimension(q, key, "time:minute", :sessions, query)
when query.smear_session_metrics do
q
|> join(:inner, [s], time_slot in time_slots(query, 60),
as: :time_slot,
@ -338,6 +339,12 @@ defmodule Plausible.Stats.SQL.Expression do
})
end
def session_metric(:visits, query) when query.smear_session_metrics do
wrap_alias([s], %{
visits: scale_sample(fragment("uniq(?)", s.session_id))
})
end
def session_metric(:visits, _query) do
wrap_alias([s], %{
visits: scale_sample(fragment("greatest(sum(?), 0)", s.sign))

View File

@ -15,15 +15,14 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
require Plausible.Stats.SQL.Expression
def build(query, site) do
{event_query, sessions_query} = QueryOptimizer.split(query)
event_q = build_events_query(site, event_query)
sessions_q = build_sessions_query(site, sessions_query)
join_query_results(
{event_q, event_query},
{sessions_q, sessions_query}
)
query
|> QueryOptimizer.split()
|> Enum.map(fn {table_type, table_query} ->
q = build_table_query(table_type, site, table_query)
{table_type, table_query, q}
end)
|> join_query_results(query)
|> build_order_by(query)
|> paginate(query.pagination)
|> select_total_rows(query.include.total_rows)
end
@ -32,9 +31,7 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
Enum.reduce(query.order_by || [], q, &build_order_by(&2, query, &1))
end
defp build_events_query(_site, %Query{metrics: []}), do: nil
defp build_events_query(site, events_query) do
defp build_table_query(:events, site, events_query) do
q =
from(
e in "events_v2",
@ -54,6 +51,25 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
|> TimeOnPage.merge_legacy_time_on_page(events_query)
end
defp build_table_query(:sessions, site, sessions_query) do
q =
from(
e in "sessions_v2",
where: ^SQL.WhereBuilder.build(:sessions, sessions_query),
select: ^select_session_metrics(sessions_query)
)
on_ee do
q = Plausible.Stats.Sampling.add_query_hint(q, sessions_query)
end
q
|> join_events_if_needed(sessions_query)
|> build_group_by(:sessions, sessions_query)
|> merge_imported(site, sessions_query)
|> SQL.SpecialMetrics.add(site, sessions_query)
end
defp join_sessions_if_needed(q, query) do
if TableDecider.events_join_sessions?(query) do
sessions_q =
@ -79,27 +95,6 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
end
end
defp build_sessions_query(_site, %Query{metrics: []}), do: nil
defp build_sessions_query(site, sessions_query) do
q =
from(
e in "sessions_v2",
where: ^SQL.WhereBuilder.build(:sessions, sessions_query),
select: ^select_session_metrics(sessions_query)
)
on_ee do
q = Plausible.Stats.Sampling.add_query_hint(q, sessions_query)
end
q
|> join_events_if_needed(sessions_query)
|> build_group_by(:sessions, sessions_query)
|> merge_imported(site, sessions_query)
|> SQL.SpecialMetrics.add(site, sessions_query)
end
def join_events_if_needed(q, query) do
if TableDecider.sessions_join_events?(query) do
events_q =
@ -173,24 +168,24 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
)
end
defp join_query_results({nil, _}, {nil, _}), do: nil
# Only one table is being queried - skip joining!
defp join_query_results([{_table_type, _query, q}], _main_query), do: q
defp join_query_results({events_q, events_query}, {nil, _}),
do: events_q |> build_order_by(events_query)
# Multiple tables: join results based on dimensions, select metrics from each and the appropriate dimensions.
defp join_query_results(queries, main_query) do
queries
|> Enum.reduce(nil, fn
{_table_type, query, q}, nil ->
from(e in subquery(q))
|> select_join_metrics(query, query.metrics)
defp join_query_results({nil, events_query}, {sessions_q, _}),
do: sessions_q |> build_order_by(events_query)
defp join_query_results({events_q, events_query}, {sessions_q, sessions_query}) do
{join_type, events_q_fields, sessions_q_fields} =
TableDecider.join_options(events_query, sessions_query)
join(subquery(events_q), join_type, [e], s in subquery(sessions_q),
on: ^build_group_by_join(events_query)
)
|> select_join_fields(events_query, events_q_fields, e)
|> select_join_fields(sessions_query, sessions_q_fields, s)
|> build_order_by(events_query)
{_table_type, query, q}, acc ->
join(acc, main_query.sql_join_type, [], s in subquery(q),
on: ^build_group_by_join(main_query)
)
|> select_join_metrics(query, query.metrics -- [:sample_percent])
end)
|> select_dimensions(main_query)
end
# NOTE: Old queries do their own pagination
@ -214,8 +209,33 @@ defmodule Plausible.Stats.SQL.QueryBuilder do
def build_group_by_join(query) do
query.dimensions
|> Enum.map(fn dim ->
dynamic([e, s], field(e, ^shortname(query, dim)) == field(s, ^shortname(query, dim)))
dynamic([a, ..., b], field(a, ^shortname(query, dim)) == field(b, ^shortname(query, dim)))
end)
|> Enum.reduce(fn condition, acc -> dynamic([], ^acc and ^condition) end)
end
defp select_join_metrics(q, query, metrics) do
Enum.reduce(metrics, q, fn
metric, q ->
select_merge_as(q, [..., x], %{
shortname(query, metric) => field(x, ^shortname(query, metric))
})
end)
end
defp select_dimensions(q, query) do
Enum.reduce(query.dimensions, q, fn dimension, q ->
# We generally select dimensions from the left-most table. Only exception is time:minute where
# we use sessions table as sessions are considered on-going during the whole period.
if query.sql_join_type == :full and "time:minute" == dimension do
select_merge_as(q, [..., x], %{
shortname(query, dimension) => field(x, ^shortname(query, dimension))
})
else
select_merge_as(q, [x], %{
shortname(query, dimension) => field(x, ^shortname(query, dimension))
})
end
end)
end
end

View File

@ -54,84 +54,73 @@ defmodule Plausible.Stats.TableDecider do
end
end
@doc """
Returns a three-element tuple with instructions on how to join two Ecto
queries. The arguments (`events_query` and `sessions_query`) are `%Query{}`
structs that have been split by TableDecider already.
@type table_type() :: :events | :sessions
@type metric() :: String.t()
Normally we can always LEFT JOIN sessions to events, selecting `dimensions`
only from the events subquery. That's because:
@spec partition_metrics(list(metric()), Query.t()) :: list({table_type(), list(metric())})
def partition_metrics(requested_metrics, query) do
metrics = partition(requested_metrics, query, &metric_partitioner/2)
1) session dimensions (e.g. entry_page) cannot be queried alongside event
metrics/dimensions, or
2) session dimensions (e.g. operating_system) are also available in the
events table.
The only exception is using the "time:minute" dimension where the sessions
subquery might return more rows than the events one. That's because we're
counting sessions in all time buckets they were active in.
"""
def join_options(events_query, sessions_query) do
events_q_select_fields = events_query.metrics ++ events_query.dimensions
sessions_q_select_fields = sessions_query.metrics -- [:sample_percent]
if "time:minute" in events_query.dimensions do
{
:full,
events_q_select_fields -- ["time:minute"],
sessions_q_select_fields ++ ["time:minute"]
}
else
{:left, events_q_select_fields, sessions_q_select_fields}
end
end
def partition_metrics(metrics, query) do
%{
event: event_only_metrics,
session: session_only_metrics,
either: either_metrics,
other: other_metrics,
sample_percent: sample_percent
} =
partition(metrics, query, &metric_partitioner/2)
%{event: event_only_filters, session: session_only_filters} =
filters =
query.filters
|> dimensions_used_in_filters()
|> partition(query, &dimension_partitioner/2)
%{event: event_only_dimensions, session: session_only_dimensions} =
partition(query.dimensions, query, &dimension_partitioner/2)
dimensions = partition(query.dimensions, query, &dimension_partitioner/2)
cond do
# Only one table needs to be queried
empty?(event_only_metrics) && empty?(event_only_filters) && empty?(event_only_dimensions) ->
{[], session_only_metrics ++ either_metrics ++ sample_percent, other_metrics}
empty?(metrics.event) && empty?(filters.event) && empty?(dimensions.event) ->
[sessions: metrics.session ++ metrics.either ++ metrics.sample_percent]
empty?(session_only_metrics) && empty?(session_only_filters) &&
empty?(session_only_dimensions) ->
{event_only_metrics ++ either_metrics ++ sample_percent, [], other_metrics}
empty?(metrics.session) && empty?(filters.session) && empty?(dimensions.session) ->
[events: metrics.event ++ metrics.either ++ metrics.sample_percent]
# Filters and/or dimensions on both events and sessions, but only one kind of metric
empty?(event_only_metrics) && empty?(event_only_dimensions) ->
{[], session_only_metrics ++ either_metrics ++ sample_percent, other_metrics}
empty?(metrics.event) && empty?(dimensions.event) ->
[sessions: metrics.session ++ metrics.either ++ metrics.sample_percent]
empty?(session_only_metrics) && empty?(session_only_dimensions) ->
{event_only_metrics ++ either_metrics ++ sample_percent, [], other_metrics}
empty?(metrics.session) && empty?(dimensions.session) ->
[events: metrics.event ++ metrics.either ++ metrics.sample_percent]
# Default: prefer events
true ->
{event_only_metrics ++ either_metrics ++ sample_percent,
session_only_metrics ++ sample_percent, other_metrics}
[
events: metrics.event ++ metrics.either ++ metrics.sample_percent,
sessions: metrics.session ++ metrics.sample_percent
]
end
|> Enum.flat_map(&smear_session_metrics(&1, query))
|> Enum.reject(fn {_table_type, metrics} -> empty?(metrics) end)
end
# :TRICKY: When counting session metrics, we want to count each visit/visitor across
# the length of the session, not just when events occurred or when session started.
# For this reason, we smear the session metrics across the length of the session.
# See `time_slots` usage in `Plausible.Stats.SQL.Expression` to understand how this is done.
@smearable_metrics [:visitors, :visits]
defp smear_session_metrics({:sessions, metrics} = value, query) do
if "time:minute" in query.dimensions or "time:hour" in query.dimensions do
# Split metrics into two groups: one with visitors and visits, and the remaining ones
{smearable_metrics, session_metrics} = Enum.split_with(metrics, &(&1 in @smearable_metrics))
[
{:sessions, session_metrics},
{:sessions_smeared, smearable_metrics}
]
else
[value]
end
end
defp smear_session_metrics(value, _query), do: [value]
# Note: This is inaccurate when filtering but required for old backwards compatibility
defp metric_partitioner(%Query{legacy_breakdown: true}, :pageviews), do: :either
defp metric_partitioner(%Query{legacy_breakdown: true}, :events), do: :either
# :TRICKY: For time:minute dimension we prefer sessions over events as there
# might be minutes where no events occurred but the session was active.
defp metric_partitioner(query, metric) when metric in [:visitors, :visits] do
if "time:minute" in query.dimensions, do: :session, else: :either
end

View File

@ -363,4 +363,14 @@ defmodule Plausible.Stats.QueryOptimizerTest do
assert result.utc_time_range.last == nyc_mar_15_end
end
end
describe "set_sql_join_type" do
test "updates sql_join_type to :full if time:minute dimension is present" do
assert perform(%{dimensions: ["time:minute"]}).sql_join_type == :full
end
test "keeps default sql_join_type otherwise" do
assert perform(%{dimensions: ["time:hour"]}).sql_join_type == :left
end
end
end

View File

@ -28,127 +28,164 @@ defmodule Plausible.Stats.TableDeciderTest do
test "with no metrics or filters" do
query = make_query([])
assert partition_metrics([], query) == {[], [], []}
assert partition_metrics([], query) == []
end
test "session-only metrics accordingly" do
query = make_query([])
assert partition_metrics([:bounce_rate, :views_per_visit], query) ==
{[], [:bounce_rate, :views_per_visit], []}
assert partition_metrics([:bounce_rate, :views_per_visit], query) == [
sessions: [:bounce_rate, :views_per_visit]
]
end
test "event-only metrics accordingly" do
query = make_query([])
assert partition_metrics([:total_revenue, :visitors], query) ==
{[:total_revenue, :visitors], [], []}
assert partition_metrics([:total_revenue, :visitors], query) == [
events: [:total_revenue, :visitors]
]
end
test "filters from both, event-only metrics" do
query = make_query(["event:name", "visit:source"])
assert partition_metrics([:total_revenue], query) == {[:total_revenue], [], []}
assert partition_metrics([:total_revenue], query) == [events: [:total_revenue]]
end
test "filters from both, session-only metrics" do
query = make_query(["event:name", "visit:source"])
assert partition_metrics([:bounce_rate], query) == {[], [:bounce_rate], []}
assert partition_metrics([:bounce_rate], query) == [sessions: [:bounce_rate]]
end
test "session filters but no session metrics" do
query = make_query(["visit:source"])
assert partition_metrics([:total_revenue], query) == {[:total_revenue], [], []}
assert partition_metrics([:total_revenue], query) == [events: [:total_revenue]]
end
test "sample_percent is added to both types of metrics" do
query = make_query([])
assert partition_metrics([:total_revenue, :sample_percent], query) ==
{[:total_revenue, :sample_percent], [], []}
assert partition_metrics([:total_revenue, :sample_percent], query) == [
events: [:total_revenue, :sample_percent]
]
assert partition_metrics([:bounce_rate, :sample_percent], query) ==
{[], [:bounce_rate, :sample_percent], []}
assert partition_metrics([:bounce_rate, :sample_percent], query) == [
sessions: [:bounce_rate, :sample_percent]
]
assert partition_metrics([:total_revenue, :bounce_rate, :sample_percent], query) ==
{[:total_revenue, :sample_percent], [:bounce_rate, :sample_percent], []}
assert partition_metrics([:total_revenue, :bounce_rate, :sample_percent], query) == [
events: [:total_revenue, :sample_percent],
sessions: [:bounce_rate, :sample_percent]
]
end
test "other metrics put in its own result" do
test "other metrics get ignored" do
query = make_query([])
assert partition_metrics([:percentage, :total_visitors], query) ==
{[], [:percentage], [:total_visitors]}
assert partition_metrics([:percentage, :total_visitors], query) == [sessions: [:percentage]]
end
test "metrics that can be calculated on either when event-only metrics" do
query = make_query([])
assert partition_metrics([:total_revenue, :visitors], query) ==
{[:total_revenue, :visitors], [], []}
assert partition_metrics([:total_revenue, :visitors], query) == [
events: [:total_revenue, :visitors]
]
assert partition_metrics([:pageviews, :visits], query) == {[:pageviews, :visits], [], []}
assert partition_metrics([:pageviews, :visits], query) == [events: [:pageviews, :visits]]
end
test "metrics that can be calculated on either when session-only metrics" do
query = make_query([])
assert partition_metrics([:bounce_rate, :visitors], query) ==
{[], [:bounce_rate, :visitors], []}
assert partition_metrics([:bounce_rate, :visitors], query) == [
sessions: [:bounce_rate, :visitors]
]
assert partition_metrics([:visit_duration, :visits], query) ==
{[], [:visit_duration, :visits], []}
assert partition_metrics([:visit_duration, :visits], query) == [
sessions: [:visit_duration, :visits]
]
end
test "metrics that can be calculated on either are biased to events" do
query = make_query([])
assert partition_metrics([:bounce_rate, :total_revenue, :visitors], query) ==
{[:total_revenue, :visitors], [:bounce_rate], []}
assert partition_metrics([:bounce_rate, :total_revenue, :visitors], query) == [
events: [:total_revenue, :visitors],
sessions: [:bounce_rate]
]
end
test "sample_percent is handled with either metrics" do
query = make_query([])
assert partition_metrics([:visitors, :sample_percent], query) ==
{[], [:visitors, :sample_percent], []}
assert partition_metrics([:visitors, :sample_percent], query) == [
sessions: [:visitors, :sample_percent]
]
end
test "metric can be calculated on either, but filtering on events" do
query = make_query(["event:name"])
assert partition_metrics([:visitors], query) == {[:visitors], [], []}
assert partition_metrics([:visitors], query) == [events: [:visitors]]
end
test "metric can be calculated on either, but filtering on events and sessions" do
query = make_query(["event:name", "visit:exit_page"])
assert partition_metrics([:visitors], query) == {[], [:visitors], []}
assert partition_metrics([:visitors], query) == [sessions: [:visitors]]
end
test "metric can be calculated on either, filtering on either" do
query = make_query(["visit:source"])
assert partition_metrics([:visitors], query) == {[], [:visitors], []}
assert partition_metrics([:visitors], query) == [sessions: [:visitors]]
end
test "metric can be calculated on either, filtering on sessions" do
query = make_query(["visit:exit_page"])
assert partition_metrics([:visitors], query) == {[], [:visitors], []}
assert partition_metrics([:visitors], query) == [sessions: [:visitors]]
end
test "query dimensions lean metric" do
assert partition_metrics([:visitors], make_query([], ["event:name"])) ==
{[:visitors], [], []}
assert partition_metrics([:visitors], make_query([], ["event:name"])) == [
events: [:visitors]
]
assert partition_metrics([:visitors], make_query([], ["visit:source"])) ==
{[], [:visitors], []}
assert partition_metrics([:visitors], make_query([], ["visit:source"])) == [
sessions: [:visitors]
]
assert partition_metrics([:visitors], make_query([], ["visit:exit_page"])) ==
{[], [:visitors], []}
assert partition_metrics([:visitors], make_query([], ["visit:exit_page"])) == [
sessions: [:visitors]
]
end
test "smearable metrics" do
assert partition_metrics(
[:visitors, :visits, :visit_duration, :pageviews],
make_query([], ["time:minute"])
) == [
events: [:pageviews],
sessions: [:visit_duration],
sessions_smeared: [:visitors, :visits]
]
assert partition_metrics([:visitors], make_query([], ["time:hour"])) == [
sessions_smeared: [:visitors]
]
assert partition_metrics([:visitors], make_query([], ["time:day"])) == [
sessions: [:visitors]
]
assert partition_metrics([:visitors], make_query([], [])) == [
sessions: [:visitors]
]
end
end

View File

@ -1681,6 +1681,76 @@ defmodule PlausibleWeb.Api.ExternalStatsController.QueryTest do
%{"dimensions" => ["2021-01-02 12:00:00"], "metrics" => [2]}
]
end
test "visitors and visits are smeared across time:minute buckets but visit_duration is not",
%{conn: conn, site: site} do
populate_stats(site, [
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 00:10:00]),
build(:pageview, user_id: 2, timestamp: ~N[2021-01-01 00:05:00]),
build(:pageview, user_id: 2, timestamp: ~N[2021-01-01 00:08:00])
])
conn =
post(conn, "/api/v2/query-internal-test", %{
"site_id" => site.domain,
"metrics" => ["visitors", "visits", "visit_duration", "pageviews"],
"date_range" => ["2021-01-01T00:00:00Z", "2021-01-01T00:30:00Z"],
"dimensions" => ["time:minute"]
})
assert json_response(conn, 200)["results"] == [
%{"dimensions" => ["2021-01-01 00:00:00"], "metrics" => [1, 1, 0, 1]},
%{"dimensions" => ["2021-01-01 00:01:00"], "metrics" => [1, 1, 0, 0]},
%{"dimensions" => ["2021-01-01 00:02:00"], "metrics" => [1, 1, 0, 0]},
%{"dimensions" => ["2021-01-01 00:03:00"], "metrics" => [1, 1, 0, 0]},
%{"dimensions" => ["2021-01-01 00:04:00"], "metrics" => [1, 1, 0, 0]},
%{"dimensions" => ["2021-01-01 00:05:00"], "metrics" => [2, 2, 0, 1]},
%{"dimensions" => ["2021-01-01 00:06:00"], "metrics" => [2, 2, 0, 0]},
%{"dimensions" => ["2021-01-01 00:07:00"], "metrics" => [2, 2, 0, 0]},
%{"dimensions" => ["2021-01-01 00:08:00"], "metrics" => [2, 2, 180, 1]},
%{"dimensions" => ["2021-01-01 00:09:00"], "metrics" => [1, 1, 0, 0]},
%{"dimensions" => ["2021-01-01 00:10:00"], "metrics" => [1, 1, 600, 1]}
]
end
test "visitors and visits are smeared across time:hour buckets but visit_duration is not", %{
conn: conn,
site: site
} do
populate_stats(site, [
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 00:00:00]),
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 00:20:00]),
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 00:40:00]),
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 01:00:00]),
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 01:20:00]),
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 01:40:00]),
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 02:00:00]),
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 02:20:00]),
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 02:40:00]),
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 03:00:00]),
build(:pageview, user_id: 1, timestamp: ~N[2021-01-01 03:20:00]),
build(:pageview, user_id: 2, timestamp: ~N[2021-01-01 01:05:00]),
build(:pageview, user_id: 2, timestamp: ~N[2021-01-01 01:10:00]),
build(:pageview, user_id: 3, timestamp: ~N[2021-01-01 02:10:00]),
build(:pageview, user_id: 3, timestamp: ~N[2021-01-01 02:20:00])
])
conn =
post(conn, "/api/v2/query", %{
"site_id" => site.domain,
"metrics" => ["visits", "visitors", "visit_duration", "pageviews"],
"date_range" => ["2021-01-01", "2021-01-01"],
"dimensions" => ["time:hour"]
})
assert json_response(conn, 200)["results"] == [
%{"dimensions" => ["2021-01-01 00:00:00"], "metrics" => [1, 1, 0, 3]},
%{"dimensions" => ["2021-01-01 01:00:00"], "metrics" => [2, 2, 300, 5]},
%{"dimensions" => ["2021-01-01 02:00:00"], "metrics" => [2, 2, 600, 5]},
%{"dimensions" => ["2021-01-01 03:00:00"], "metrics" => [1, 1, 12_000, 2]}
]
end
end
test "breakdown by visit:source", %{conn: conn, site: site} do