189 lines
5.4 KiB
Elixir
189 lines
5.4 KiB
Elixir
defmodule Plausible.Stats.QueryOptimizer do
|
|
@moduledoc """
|
|
Methods to manipulate Query for business logic reasons before building an ecto query.
|
|
"""
|
|
|
|
use Plausible
|
|
alias Plausible.Stats.{DateTimeRange, Filters, Query, TableDecider, Util, Time}
|
|
|
|
@doc """
|
|
This module manipulates an existing query, updating it according to business logic.
|
|
|
|
For example, it:
|
|
1. Figures out what the right granularity to group by time is
|
|
2. Adds a missing order_by clause to a query
|
|
3. Updating "time" dimension in order_by to the right granularity
|
|
4. Updates event:hostname filters to also apply on visit level for sane results.
|
|
5. Removes revenue metrics from dashboard queries if not requested, present or unavailable for the site.
|
|
|
|
"""
|
|
def optimize(query) do
|
|
Enum.reduce(pipeline(), query, fn step, acc -> step.(acc) end)
|
|
end
|
|
|
|
@doc """
|
|
Splits a query into event and sessions subcomponents as not all metrics can be
|
|
queried from a single table.
|
|
|
|
event:page dimension is treated in a special way, doing a breakdown of visit:entry_page
|
|
for sessions.
|
|
"""
|
|
def split(query) do
|
|
{event_metrics, sessions_metrics, _other_metrics} =
|
|
query.metrics
|
|
|> Util.maybe_add_visitors_metric()
|
|
|> TableDecider.partition_metrics(query)
|
|
|
|
{
|
|
Query.set(query,
|
|
metrics: event_metrics,
|
|
include_imported: query.include_imported
|
|
),
|
|
split_sessions_query(query, sessions_metrics)
|
|
}
|
|
end
|
|
|
|
defp pipeline() do
|
|
[
|
|
&update_group_by_time/1,
|
|
&add_missing_order_by/1,
|
|
&update_time_in_order_by/1,
|
|
&extend_hostname_filters_to_visit/1,
|
|
&remove_revenue_metrics_if_unavailable/1
|
|
]
|
|
end
|
|
|
|
defp add_missing_order_by(%Query{order_by: nil} = query) do
|
|
order_by =
|
|
case time_dimension(query) do
|
|
nil -> [{hd(query.metrics), :desc}]
|
|
time_dimension -> [{time_dimension, :asc}, {hd(query.metrics), :desc}]
|
|
end
|
|
|
|
%Query{query | order_by: order_by}
|
|
end
|
|
|
|
defp add_missing_order_by(query), do: query
|
|
|
|
defp update_group_by_time(
|
|
%Query{
|
|
utc_time_range: %DateTimeRange{first: first, last: last}
|
|
} = query
|
|
) do
|
|
dimensions =
|
|
query.dimensions
|
|
|> Enum.map(fn
|
|
"time" -> resolve_time_dimension(first, last)
|
|
entry -> entry
|
|
end)
|
|
|
|
%Query{query | dimensions: dimensions}
|
|
end
|
|
|
|
defp update_group_by_time(query), do: query
|
|
|
|
defp resolve_time_dimension(first, last) do
|
|
cond do
|
|
DateTime.diff(last, first, :hour) <= 48 -> "time:hour"
|
|
DateTime.diff(last, first, :day) <= 40 -> "time:day"
|
|
Timex.diff(last, first, :weeks) <= 52 -> "time:week"
|
|
true -> "time:month"
|
|
end
|
|
end
|
|
|
|
defp update_time_in_order_by(query) do
|
|
order_by =
|
|
query.order_by
|
|
|> Enum.map(fn
|
|
{"time", direction} -> {time_dimension(query), direction}
|
|
entry -> entry
|
|
end)
|
|
|
|
%Query{query | order_by: order_by}
|
|
end
|
|
|
|
@dimensions_hostname_map %{
|
|
"visit:source" => "visit:entry_page_hostname",
|
|
"visit:entry_page" => "visit:entry_page_hostname",
|
|
"visit:utm_medium" => "visit:entry_page_hostname",
|
|
"visit:utm_source" => "visit:entry_page_hostname",
|
|
"visit:utm_campaign" => "visit:entry_page_hostname",
|
|
"visit:utm_content" => "visit:entry_page_hostname",
|
|
"visit:utm_term" => "visit:entry_page_hostname",
|
|
"visit:referrer" => "visit:entry_page_hostname",
|
|
"visit:exit_page" => "visit:exit_page_hostname"
|
|
}
|
|
|
|
# To avoid showing referrers across hostnames when event:hostname
|
|
# filter is present for breakdowns, add entry/exit page hostname
|
|
# filters
|
|
defp extend_hostname_filters_to_visit(query) do
|
|
# Note: Only works since event:hostname is only allowed as a top level filter
|
|
hostname_filters =
|
|
query.filters
|
|
|> Enum.filter(fn [_operation, dimension | _rest] -> dimension == "event:hostname" end)
|
|
|
|
if length(hostname_filters) > 0 do
|
|
extra_filters =
|
|
query.dimensions
|
|
|> Enum.flat_map(&hostname_filters_for_dimension(&1, hostname_filters))
|
|
|
|
%Query{query | filters: query.filters ++ extra_filters}
|
|
else
|
|
query
|
|
end
|
|
end
|
|
|
|
defp hostname_filters_for_dimension(dimension, hostname_filters) do
|
|
if Map.has_key?(@dimensions_hostname_map, dimension) do
|
|
dimension = Map.get(@dimensions_hostname_map, dimension)
|
|
|
|
hostname_filters
|
|
|> Enum.map(fn [operation, _dimension | rest] -> [operation, dimension | rest] end)
|
|
else
|
|
[]
|
|
end
|
|
end
|
|
|
|
defp time_dimension(query) do
|
|
Enum.find(query.dimensions, &Time.time_dimension?/1)
|
|
end
|
|
|
|
defp split_sessions_query(query, session_metrics) do
|
|
dimensions =
|
|
query.dimensions
|
|
|> Enum.map(fn
|
|
"event:page" -> "visit:entry_page"
|
|
dimension -> dimension
|
|
end)
|
|
|
|
filters =
|
|
if "event:page" in query.dimensions do
|
|
Filters.rename_dimensions_used_in_filter(query.filters, %{
|
|
"event:page" => "visit:entry_page"
|
|
})
|
|
else
|
|
query.filters
|
|
end
|
|
|
|
Query.set(query,
|
|
filters: filters,
|
|
metrics: session_metrics,
|
|
dimensions: dimensions,
|
|
include_imported: query.include_imported
|
|
)
|
|
end
|
|
|
|
on_ee do
|
|
defp remove_revenue_metrics_if_unavailable(query) do
|
|
if query.remove_unavailable_revenue_metrics and map_size(query.revenue_currencies) == 0 do
|
|
Query.set(query, metrics: query.metrics -- Plausible.Stats.Goal.Revenue.revenue_metrics())
|
|
else
|
|
query
|
|
end
|
|
end
|
|
else
|
|
defp remove_revenue_metrics_if_unavailable(query), do: query
|
|
end
|
|
end
|