490 lines
16 KiB
Elixir
490 lines
16 KiB
Elixir
defmodule Plausible.Stats.SQL.Expression do
|
|
@moduledoc """
|
|
This module is responsible for generating SQL/Ecto expressions
|
|
for dimensions and metrics used in query SELECT statement.
|
|
|
|
Each dimension and metric is tagged with with selected_as for easier
|
|
usage down the line.
|
|
"""
|
|
|
|
use Plausible
|
|
use Plausible.Stats.SQL.Fragments
|
|
|
|
import Ecto.Query
|
|
|
|
alias Plausible.Stats.{Query, Filters, SQL}
|
|
|
|
@no_ref "Direct / None"
|
|
@no_channel "Direct"
|
|
@not_set "(not set)"
|
|
|
|
defmacrop field_or_blank_value(q, key, expr, empty_value) do
|
|
quote do
|
|
select_merge_as(unquote(q), [t], %{
|
|
unquote(key) =>
|
|
fragment("if(empty(?), ?, ?)", unquote(expr), unquote(empty_value), unquote(expr))
|
|
})
|
|
end
|
|
end
|
|
|
|
defmacrop time_slots(query, period_in_seconds) do
|
|
quote do
|
|
fragment(
|
|
"timeSlots(toTimeZone(?, ?), toUInt32(timeDiff(?, ?)), toUInt32(?))",
|
|
s.start,
|
|
^unquote(query).timezone,
|
|
s.start,
|
|
s.timestamp,
|
|
^unquote(period_in_seconds)
|
|
)
|
|
end
|
|
end
|
|
|
|
def select_dimension(q, key, "time:month", _table, query) do
|
|
select_merge_as(q, [t], %{
|
|
key => fragment("toStartOfMonth(toTimeZone(?, ?))", t.timestamp, ^query.timezone)
|
|
})
|
|
end
|
|
|
|
def select_dimension(q, key, "time:week", _table, query) do
|
|
date_range = Query.date_range(query)
|
|
|
|
select_merge_as(q, [t], %{
|
|
key =>
|
|
weekstart_not_before(
|
|
to_timezone(t.timestamp, ^query.timezone),
|
|
^date_range.first
|
|
)
|
|
})
|
|
end
|
|
|
|
def select_dimension(q, key, "time:day", _table, query) do
|
|
select_merge_as(q, [t], %{
|
|
key => fragment("toDate(toTimeZone(?, ?))", t.timestamp, ^query.timezone)
|
|
})
|
|
end
|
|
|
|
def select_dimension(q, key, "time:hour", :sessions, query) when query.smear_session_metrics do
|
|
# :TRICKY: ClickHouse timeSlots works off of unix epoch and is not
|
|
# timezone-aware. This means that for e.g. Asia/Katmandu (GMT+5:45)
|
|
# to work, we divide time into 15-minute buckets and later combine these
|
|
# via toStartOfHour
|
|
q
|
|
|> join(:inner, [s], time_slot in time_slots(query, 15 * 60),
|
|
as: :time_slot,
|
|
hints: "ARRAY",
|
|
on: true
|
|
)
|
|
|> select_merge_as([s, time_slot: time_slot], %{
|
|
key => fragment("toStartOfHour(?)", time_slot)
|
|
})
|
|
end
|
|
|
|
def select_dimension(q, key, "time:hour", _table, query) do
|
|
select_merge_as(q, [t], %{
|
|
key => fragment("toStartOfHour(toTimeZone(?, ?))", t.timestamp, ^query.timezone)
|
|
})
|
|
end
|
|
|
|
# :NOTE: This is not exposed in Query APIv2
|
|
def select_dimension(q, key, "time:minute", :sessions, query)
|
|
when query.smear_session_metrics do
|
|
q
|
|
|> join(:inner, [s], time_slot in time_slots(query, 60),
|
|
as: :time_slot,
|
|
hints: "ARRAY",
|
|
on: true
|
|
)
|
|
|> select_merge_as([s, time_slot: time_slot], %{
|
|
key => fragment("?", time_slot)
|
|
})
|
|
end
|
|
|
|
# :NOTE: This is not exposed in Query APIv2
|
|
def select_dimension(q, key, "time:minute", _table, query) do
|
|
select_merge_as(q, [t], %{
|
|
key => fragment("toStartOfMinute(toTimeZone(?, ?))", t.timestamp, ^query.timezone)
|
|
})
|
|
end
|
|
|
|
def select_dimension(q, key, "event:name", _table, _query),
|
|
do: select_merge_as(q, [t], %{key => t.name})
|
|
|
|
def select_dimension(q, key, "event:page", _table, _query),
|
|
do: select_merge_as(q, [t], %{key => t.pathname})
|
|
|
|
def select_dimension(q, key, "event:hostname", _table, _query),
|
|
do: select_merge_as(q, [t], %{key => t.hostname})
|
|
|
|
def select_dimension(q, key, "event:props:" <> property_name, _table, _query) do
|
|
select_merge_as(q, [t], %{
|
|
key =>
|
|
fragment(
|
|
"if(not empty(?), ?, '(none)')",
|
|
get_by_key(t, :meta, ^property_name),
|
|
get_by_key(t, :meta, ^property_name)
|
|
)
|
|
})
|
|
end
|
|
|
|
def select_dimension(q, key, "visit:entry_page", _table, _query),
|
|
do: select_merge_as(q, [t], %{key => t.entry_page})
|
|
|
|
def select_dimension(q, key, "visit:exit_page", _table, _query),
|
|
do: select_merge_as(q, [t], %{key => t.exit_page})
|
|
|
|
def select_dimension(q, key, "visit:utm_medium", _table, _query),
|
|
do: field_or_blank_value(q, key, t.utm_medium, @not_set)
|
|
|
|
def select_dimension(q, key, "visit:utm_source", _table, _query),
|
|
do: field_or_blank_value(q, key, t.utm_source, @not_set)
|
|
|
|
def select_dimension(q, key, "visit:utm_campaign", _table, _query),
|
|
do: field_or_blank_value(q, key, t.utm_campaign, @not_set)
|
|
|
|
def select_dimension(q, key, "visit:utm_content", _table, _query),
|
|
do: field_or_blank_value(q, key, t.utm_content, @not_set)
|
|
|
|
def select_dimension(q, key, "visit:utm_term", _table, _query),
|
|
do: field_or_blank_value(q, key, t.utm_term, @not_set)
|
|
|
|
def select_dimension(q, key, "visit:source", _table, _query),
|
|
do: field_or_blank_value(q, key, t.source, @no_ref)
|
|
|
|
def select_dimension(q, key, "visit:channel", _table, _query),
|
|
do: field_or_blank_value(q, key, t.acquisition_channel, @no_channel)
|
|
|
|
def select_dimension(q, key, "visit:referrer", _table, _query),
|
|
do: field_or_blank_value(q, key, t.referrer, @no_ref)
|
|
|
|
def select_dimension(q, key, "visit:device", _table, _query),
|
|
do: field_or_blank_value(q, key, t.device, @not_set)
|
|
|
|
def select_dimension(q, key, "visit:os", _table, _query),
|
|
do: field_or_blank_value(q, key, t.os, @not_set)
|
|
|
|
def select_dimension(q, key, "visit:os_version", _table, _query),
|
|
do: field_or_blank_value(q, key, t.os_version, @not_set)
|
|
|
|
def select_dimension(q, key, "visit:browser", _table, _query),
|
|
do: field_or_blank_value(q, key, t.browser, @not_set)
|
|
|
|
def select_dimension(q, key, "visit:browser_version", _table, _query),
|
|
do: field_or_blank_value(q, key, t.browser_version, @not_set)
|
|
|
|
def select_dimension(q, key, "visit:country", _table, _query),
|
|
do: select_merge_as(q, [t], %{key => t.country})
|
|
|
|
def select_dimension(q, key, "visit:region", _table, _query),
|
|
do: select_merge_as(q, [t], %{key => t.region})
|
|
|
|
def select_dimension(q, key, "visit:city", _table, _query),
|
|
do: select_merge_as(q, [t], %{key => t.city})
|
|
|
|
def select_dimension(q, key, "visit:country_name", _table, _query),
|
|
do: select_merge_as(q, [t], %{key => t.country_name})
|
|
|
|
def select_dimension(q, key, "visit:region_name", _table, _query),
|
|
do: select_merge_as(q, [t], %{key => t.region_name})
|
|
|
|
def select_dimension(q, key, "visit:city_name", _table, _query),
|
|
do: select_merge_as(q, [t], %{key => t.city_name})
|
|
|
|
def select_dimension_internal(q, "visit:entry_page") do
|
|
select_merge_as(q, [t], %{
|
|
entry_page: fragment("any(?)", field(t, :entry_page))
|
|
})
|
|
end
|
|
|
|
def select_dimension_internal(q, "visit:entry_page_hostname") do
|
|
select_merge_as(q, [t], %{
|
|
entry_page_hostname: fragment("any(?)", field(t, :entry_page_hostname))
|
|
})
|
|
end
|
|
|
|
def select_dimension_internal(q, "visit:exit_page") do
|
|
# As exit page changes with every pageview event over the lifetime
|
|
# of a session, only the most recent value must be considered.
|
|
select_merge_as(q, [t], %{
|
|
exit_page: fragment("argMax(?, ?)", field(t, :exit_page), field(t, :events))
|
|
})
|
|
end
|
|
|
|
def select_dimension_internal(q, "visit:exit_page_hostname") do
|
|
select_merge_as(q, [t], %{
|
|
exit_page_hostname:
|
|
fragment("argMax(?, ?)", field(t, :exit_page_hostname), field(t, :events))
|
|
})
|
|
end
|
|
|
|
def select_dimension_internal(q, _dimension), do: q
|
|
|
|
def select_dimension_from_join(q, key, "visit:entry_page"),
|
|
do: select_merge_as(q, [..., t], %{key => t.entry_page})
|
|
|
|
def select_dimension_from_join(q, key, "visit:entry_page_hostname"),
|
|
do: select_merge_as(q, [..., t], %{key => t.entry_page_hostaname})
|
|
|
|
def select_dimension_from_join(q, key, "visit:exit_page"),
|
|
do: select_merge_as(q, [..., t], %{key => t.exit_page})
|
|
|
|
def select_dimension_from_join(q, key, "visit:exit_page_hostname"),
|
|
do: select_merge_as(q, [..., t], %{key => t.exit_page_hostname})
|
|
|
|
def select_dimension_from_join(q, _key, _dimension), do: q
|
|
|
|
def event_metric(:pageviews, _query) do
|
|
wrap_alias([e], %{
|
|
pageviews: scale_sample(fragment("countIf(? = 'pageview')", e.name))
|
|
})
|
|
end
|
|
|
|
def event_metric(:events, _query) do
|
|
wrap_alias([e], %{
|
|
events: scale_sample(fragment("countIf(? != 'engagement')", e.name))
|
|
})
|
|
end
|
|
|
|
def event_metric(:visitors, _query) do
|
|
wrap_alias([e], %{
|
|
visitors: scale_sample(fragment("uniq(?)", e.user_id))
|
|
})
|
|
end
|
|
|
|
def event_metric(:visits, _query) do
|
|
wrap_alias([e], %{
|
|
visits: scale_sample(fragment("uniq(?)", e.session_id))
|
|
})
|
|
end
|
|
|
|
on_ee do
|
|
def event_metric(:total_revenue, _query) do
|
|
wrap_alias(
|
|
[e],
|
|
%{
|
|
total_revenue:
|
|
fragment("toDecimal64(sum(?) * any(_sample_factor), 3)", e.revenue_reporting_amount)
|
|
}
|
|
)
|
|
end
|
|
|
|
def event_metric(:average_revenue, _query) do
|
|
wrap_alias(
|
|
[e],
|
|
%{
|
|
average_revenue: fragment("toDecimal64(avg(?), 3)", e.revenue_reporting_amount)
|
|
}
|
|
)
|
|
end
|
|
end
|
|
|
|
def event_metric(:sample_percent, _query) do
|
|
wrap_alias([], %{
|
|
sample_percent:
|
|
fragment("if(any(_sample_factor) > 1, round(100 / any(_sample_factor)), 100)")
|
|
})
|
|
end
|
|
|
|
def event_metric(:percentage, _query), do: %{}
|
|
def event_metric(:conversion_rate, _query), do: %{}
|
|
def event_metric(:scroll_depth, _query), do: %{}
|
|
def event_metric(:group_conversion_rate, _query), do: %{}
|
|
def event_metric(:total_visitors, _query), do: %{}
|
|
|
|
def event_metric(:time_on_page, query) do
|
|
selected =
|
|
case query.time_on_page_data do
|
|
%{include_new_metric: false} ->
|
|
wrap_alias(
|
|
[e],
|
|
%{
|
|
# :KLUDGE: We would like to but can't use constant 0 here as it leads to cyclic aliases if there's no other
|
|
# metrics selected other than time_on_page.
|
|
__internal_total_time_on_page: fragment("sumArray([0])"),
|
|
__internal_total_time_on_page_visits: fragment("sumArray([0])")
|
|
}
|
|
)
|
|
|
|
%{include_new_metric: true, cutoff: nil} ->
|
|
wrap_alias(
|
|
[e],
|
|
%{
|
|
__internal_total_time_on_page: fragment("sum(?) / 1000", e.engagement_time),
|
|
__internal_total_time_on_page_visits:
|
|
fragment("uniqIf(?, ? = 'engagement')", e.session_id, e.name)
|
|
}
|
|
)
|
|
|
|
%{include_new_metric: true, cutoff: cutoff} ->
|
|
wrap_alias(
|
|
[e],
|
|
%{
|
|
__internal_total_time_on_page:
|
|
fragment(
|
|
"sumIf(?, ? >= ?) / 1000",
|
|
e.engagement_time,
|
|
e.timestamp,
|
|
^cutoff
|
|
),
|
|
__internal_total_time_on_page_visits:
|
|
fragment(
|
|
"uniqIf(?, ? = 'engagement' and ? >= ?)",
|
|
e.session_id,
|
|
e.name,
|
|
e.timestamp,
|
|
^cutoff
|
|
)
|
|
}
|
|
)
|
|
end
|
|
|
|
if query.time_on_page_data.include_legacy_metric do
|
|
selected
|
|
else
|
|
Map.merge(
|
|
selected,
|
|
wrap_alias([e], %{
|
|
time_on_page:
|
|
time_on_page(
|
|
selected_as(:__internal_total_time_on_page),
|
|
selected_as(:__internal_total_time_on_page_visits)
|
|
)
|
|
})
|
|
)
|
|
end
|
|
end
|
|
|
|
def event_metric(unknown, _query), do: raise("Unknown metric: #{unknown}")
|
|
|
|
def session_metric(:bounce_rate, query) do
|
|
# :TRICKY: If page is passed to query, we only count bounce rate where users _entered_ at page.
|
|
event_page_filter = Filters.get_toplevel_filter(query, "event:page")
|
|
condition = SQL.WhereBuilder.build_condition(:entry_page, event_page_filter)
|
|
|
|
wrap_alias([], %{
|
|
bounce_rate:
|
|
fragment(
|
|
# :TRICKY: Before PR #4493, we could have sessions where `sum(is_bounce * sign)`
|
|
# is negative, leading to an underflow and >100% bounce rate. This works around
|
|
# that issue.
|
|
"toUInt32(greatest(ifNotFinite(round(sumIf(is_bounce * sign, ?) / sumIf(sign, ?) * 100), 0), 0))",
|
|
^condition,
|
|
^condition
|
|
),
|
|
__internal_visits: fragment("toUInt32(greatest(sum(sign), 0))")
|
|
})
|
|
end
|
|
|
|
def session_metric(:exit_rate, _query) do
|
|
wrap_alias([s], %{
|
|
__internal_visits: fragment("toUInt32(greatest(sum(sign), 0))")
|
|
})
|
|
end
|
|
|
|
def session_metric(:visits, query) when query.smear_session_metrics do
|
|
wrap_alias([s], %{
|
|
visits: scale_sample(fragment("uniq(?)", s.session_id))
|
|
})
|
|
end
|
|
|
|
def session_metric(:visits, _query) do
|
|
wrap_alias([s], %{
|
|
visits: scale_sample(fragment("greatest(sum(?), 0)", s.sign))
|
|
})
|
|
end
|
|
|
|
def session_metric(:pageviews, _query) do
|
|
wrap_alias([s], %{
|
|
pageviews: scale_sample(fragment("greatest(sum(? * ?), 0)", s.sign, s.pageviews))
|
|
})
|
|
end
|
|
|
|
def session_metric(:events, _query) do
|
|
wrap_alias([s], %{
|
|
events: scale_sample(fragment("greatest(sum(? * ?), 0)", s.sign, s.events))
|
|
})
|
|
end
|
|
|
|
def session_metric(:visitors, _query) do
|
|
wrap_alias([s], %{
|
|
visitors: scale_sample(fragment("uniq(?)", s.user_id))
|
|
})
|
|
end
|
|
|
|
def session_metric(:visit_duration, _query) do
|
|
wrap_alias([], %{
|
|
visit_duration:
|
|
fragment("toUInt32(greatest(ifNotFinite(round(sum(duration * sign) / sum(sign)), 0), 0))"),
|
|
__internal_visits: fragment("toUInt32(greatest(sum(sign), 0))")
|
|
})
|
|
end
|
|
|
|
def session_metric(:views_per_visit, _query) do
|
|
wrap_alias([s], %{
|
|
views_per_visit:
|
|
fragment(
|
|
"greatest(ifNotFinite(round(sum(? * ?) / sum(?), 2), 0), 0)",
|
|
s.sign,
|
|
s.pageviews,
|
|
s.sign
|
|
),
|
|
__internal_visits: fragment("toUInt32(greatest(sum(sign), 0))")
|
|
})
|
|
end
|
|
|
|
def session_metric(:sample_percent, _query) do
|
|
wrap_alias([], %{
|
|
sample_percent:
|
|
fragment("if(any(_sample_factor) > 1, round(100 / any(_sample_factor)), 100)")
|
|
})
|
|
end
|
|
|
|
def session_metric(:percentage, _query), do: %{}
|
|
def session_metric(:conversion_rate, _query), do: %{}
|
|
def session_metric(:group_conversion_rate, _query), do: %{}
|
|
|
|
@doc """
|
|
The fragment matches events to goals by:
|
|
1. Checking if the pathname matches the goal's page regex pattern
|
|
2. Verifying the event name matches the expected name for the goal type
|
|
3. Validating scroll depth is within threshold (for scroll goals)
|
|
4. Ensuring all custom properties match (if any are defined on the goal)
|
|
|
|
Returns an array of goal indices that the event matches.
|
|
"""
|
|
defmacro event_goal_join(goal_join_data) do
|
|
quote do
|
|
fragment(
|
|
"""
|
|
arrayIntersect(
|
|
multiMatchAllIndices(?, ?),
|
|
arrayMap(
|
|
(expected_name, threshold, index, custom_props_keys, custom_props_values) -> if(
|
|
expected_name = ? and ? between threshold and 100 and
|
|
(empty(custom_props_keys) OR arrayAll((k, v) -> ?[indexOf(?, k)] = v, custom_props_keys, custom_props_values)),
|
|
index, -1
|
|
),
|
|
?,
|
|
?,
|
|
?,
|
|
?,
|
|
?
|
|
)
|
|
)
|
|
""",
|
|
e.pathname,
|
|
type(^unquote(goal_join_data).page_regexes, {:array, :string}),
|
|
e.name,
|
|
e.scroll_depth,
|
|
field(e, :"meta.value"),
|
|
field(e, :"meta.key"),
|
|
type(^unquote(goal_join_data).event_names_by_type, {:array, :string}),
|
|
type(^unquote(goal_join_data).scroll_thresholds, {:array, :integer}),
|
|
type(^unquote(goal_join_data).indices, {:array, :integer}),
|
|
type(^unquote(goal_join_data).custom_props_keys, {:array, {:array, :string}}),
|
|
type(^unquote(goal_join_data).custom_props_values, {:array, {:array, :string}})
|
|
)
|
|
end
|
|
end
|
|
end
|