defmodule Plausible.Stats.SQL.QueryBuilder do @moduledoc false use Plausible use Plausible.Stats.SQL.Fragments import Ecto.Query import Plausible.Stats.Imported import Plausible.Stats.Util alias Plausible.Stats.{Query, QueryOptimizer, TableDecider, SQL} alias Plausible.Stats.SQL.Expression alias Plausible.Stats.Legacy.TimeOnPage require Plausible.Stats.SQL.Expression def build(query, site) do query |> QueryOptimizer.split() |> Enum.map(fn {table_type, table_query} -> q = build_table_query(table_type, site, table_query) {table_type, table_query, q} end) |> join_query_results(query) |> build_order_by(query) |> paginate(query.pagination) |> select_total_rows(query.include.total_rows) end def build_order_by(q, query) do Enum.reduce(query.order_by || [], q, &build_order_by(&2, query, &1)) end defp build_table_query(:events, site, events_query) do q = from( e in "events_v2", where: ^SQL.WhereBuilder.build(:events, events_query), select: ^select_event_metrics(events_query) ) on_ee do q = Plausible.Stats.Sampling.add_query_hint(q, events_query) end q |> join_sessions_if_needed(events_query) |> build_group_by(:events, events_query) |> merge_imported(site, events_query) |> SQL.SpecialMetrics.add(site, events_query) |> TimeOnPage.merge_legacy_time_on_page(events_query) end defp build_table_query(:sessions, site, sessions_query) do q = from( e in "sessions_v2", where: ^SQL.WhereBuilder.build(:sessions, sessions_query), select: ^select_session_metrics(sessions_query) ) on_ee do q = Plausible.Stats.Sampling.add_query_hint(q, sessions_query) end q |> join_events_if_needed(sessions_query) |> build_group_by(:sessions, sessions_query) |> merge_imported(site, sessions_query) |> SQL.SpecialMetrics.add(site, sessions_query) end defp join_sessions_if_needed(q, query) do if TableDecider.events_join_sessions?(query) do %{session: dimensions} = TableDecider.partition_dimensions(query) sessions_q = from( s in "sessions_v2", where: ^SQL.WhereBuilder.build(:sessions, query), where: s.sign == 1, select: %{session_id: s.session_id}, group_by: s.session_id ) # The session-only dimension columns are explicitly selected in joined # sessions table. This enables combining session-only dimensions (entry # and exit pages) with event-only metrics, like revenue. sessions_q = Enum.reduce(dimensions, sessions_q, fn dimension, acc -> Plausible.Stats.SQL.Expression.select_dimension_internal(acc, dimension) end) on_ee do sessions_q = Plausible.Stats.Sampling.add_query_hint(sessions_q, query) end from( e in q, join: sq in subquery(sessions_q), on: e.session_id == sq.session_id ) else q end end def join_events_if_needed(q, query) do if TableDecider.sessions_join_events?(query) do events_q = from(e in "events_v2", where: ^SQL.WhereBuilder.build(:events, query), select: %{ session_id: fragment("DISTINCT ?", e.session_id), _sample_factor: fragment("_sample_factor") } ) on_ee do events_q = Plausible.Stats.Sampling.add_query_hint(events_q, query) end from(s in q, join: e in subquery(events_q), on: s.session_id == e.session_id ) else q end end defp select_event_metrics(query) do query.metrics |> Enum.map(&SQL.Expression.event_metric(&1, query)) |> Enum.reduce(%{}, &Map.merge/2) end defp select_session_metrics(query) do query.metrics |> Enum.map(&SQL.Expression.session_metric(&1, query)) |> Enum.reduce(%{}, &Map.merge/2) end def build_group_by(q, :events, query) do # Session-only dimensions are extracted from joined sessions table %{session: session_only_dimensions} = TableDecider.partition_dimensions(query) event_dimensions = query.dimensions -- session_only_dimensions q = Enum.reduce(event_dimensions, q, &dimension_group_by(&2, :events, query, &1)) Enum.reduce( session_only_dimensions, q, &dimension_group_by_join(&2, query, &1) ) end def build_group_by(q, :sessions, query) do Enum.reduce(query.dimensions, q, &dimension_group_by(&2, :sessions, query, &1)) end defp dimension_group_by_join(q, query, dimension) do key = shortname(query, dimension) q |> Expression.select_dimension_from_join(key, dimension) |> group_by([], selected_as(^key)) end defp dimension_group_by(q, :events, query, "event:goal" = dimension) do goal_join_data = Plausible.Stats.Goals.goal_join_data(query) from(e in q, join: goal in Expression.event_goal_join(goal_join_data), hints: "ARRAY", on: true, select_merge: %{ ^shortname(query, dimension) => fragment("?", goal) }, group_by: goal ) end defp dimension_group_by(q, table, query, dimension) do key = shortname(query, dimension) q |> Expression.select_dimension(key, dimension, table, query) |> group_by([], selected_as(^key)) end defp build_order_by(q, query, {metric_or_dimension, order_direction}) do order_by( q, [t], { ^order_direction, selected_as(^shortname(query, metric_or_dimension)) } ) end # Only one table is being queried - skip joining! defp join_query_results([{_table_type, _query, q}], _main_query), do: q # Multiple tables: join results based on dimensions, select metrics from each and the appropriate dimensions. defp join_query_results(queries, main_query) do queries |> Enum.reduce(nil, fn {_table_type, query, q}, nil -> from(e in subquery(q)) |> select_join_metrics(query, query.metrics) {_table_type, query, q}, acc -> join(acc, main_query.sql_join_type, [], s in subquery(q), on: ^build_group_by_join(main_query) ) |> select_join_metrics(query, query.metrics -- [:sample_percent]) end) |> select_dimensions(main_query, queries) end # NOTE: Old queries do their own pagination defp paginate(q, nil = _pagination), do: q defp paginate(q, pagination) do q |> limit(^pagination.limit) |> offset(^pagination.offset) end defp select_total_rows(q, false = _include_total_rows), do: q defp select_total_rows(q, true = _include_total_rows) do q |> select_merge([], %{total_rows: fragment("count() over ()")}) end def build_group_by_join(%Query{dimensions: []}), do: true def build_group_by_join(query) do query.dimensions |> Enum.map(fn dim -> dynamic([a, ..., b], field(a, ^shortname(query, dim)) == field(b, ^shortname(query, dim))) end) |> Enum.reduce(fn condition, acc -> dynamic([], ^acc and ^condition) end) end defp select_join_metrics(q, query, metrics) do Enum.reduce(metrics, q, fn metric, q -> select_merge_as(q, [..., x], %{ shortname(query, metric) => field(x, ^shortname(query, metric)) }) end) end defp select_dimensions(q, query, queries) do Enum.reduce(query.dimensions, q, fn dimension, q -> case select_from(dimension, query, queries) do :leftmost_table -> select_merge_as(q, [x], %{ shortname(query, dimension) => field(x, ^shortname(query, dimension)) }) :rightmost_table -> select_merge_as(q, [..., x], %{ shortname(query, dimension) => field(x, ^shortname(query, dimension)) }) end end) end defp select_from(dimension, query, queries) do smeared? = Enum.any?(queries, fn {_table_type, query, _q} -> query.smear_session_metrics end) cond do query.sql_join_type == :left -> :leftmost_table # We generally select dimensions from the left-most table. Only exception is time:minute/time:hour where # we use sessions table as smeared sessions are considered on-going during the whole period. dimension in ["time:minute", "time:hour"] and smeared? -> :rightmost_table true -> :leftmost_table end end end