406 lines
12 KiB
Elixir
406 lines
12 KiB
Elixir
defmodule Plausible.Stats.SQL.WhereBuilder do
|
|
@moduledoc """
|
|
A module for building am ecto where clause of a query out of a query.
|
|
"""
|
|
|
|
import Ecto.Query
|
|
import Plausible.Stats.Time, only: [utc_boundaries: 1]
|
|
import Plausible.Stats.Filters.Utils, only: [page_regex: 1]
|
|
|
|
use Plausible.Stats.SQL.Fragments
|
|
|
|
require Logger
|
|
|
|
@sessions_only_visit_fields [
|
|
:entry_page,
|
|
:exit_page,
|
|
:entry_page_hostname,
|
|
:exit_page_hostname
|
|
]
|
|
|
|
@doc "Builds WHERE clause for a given Query against sessions or events table"
|
|
def build(table, query) do
|
|
base_condition = filter_site_time_range(table, query)
|
|
|
|
query.filters
|
|
|> Enum.map(&add_filter(table, query, &1))
|
|
|> Enum.reduce(base_condition, fn condition, acc -> dynamic([], ^acc and ^condition) end)
|
|
end
|
|
|
|
@doc """
|
|
Builds WHERE clause condition based off of a filter and a custom column name
|
|
Used for special business logic cases
|
|
|
|
Accepts nil as the `filter` parameter, in which case the condition is a no-op (WHERE TRUE).
|
|
"""
|
|
def build_condition(db_field, filter) do
|
|
if filter do
|
|
filter_field(db_field, filter)
|
|
else
|
|
true
|
|
end
|
|
end
|
|
|
|
defp filter_site_time_range(table, query) do
|
|
dynamic([], ^filter_site_id(query) and ^filter_time_range(table, query))
|
|
end
|
|
|
|
defp filter_site_id(query) do
|
|
case query.consolidated_site_ids do
|
|
nil ->
|
|
dynamic([x], x.site_id == ^query.site_id)
|
|
|
|
[_ | _] = ids ->
|
|
dynamic([x], fragment("? in ?", x.site_id, ^ids))
|
|
end
|
|
end
|
|
|
|
defp filter_time_range(:events, query) do
|
|
{first_datetime, last_datetime} = utc_boundaries(query)
|
|
dynamic([e], e.timestamp >= ^first_datetime and e.timestamp <= ^last_datetime)
|
|
end
|
|
|
|
defp filter_time_range(:sessions, query) do
|
|
{first_datetime, last_datetime} = utc_boundaries(query)
|
|
|
|
# Counts each _active_ session in time range even if they started before
|
|
dynamic(
|
|
[s],
|
|
# Currently, the sessions table in ClickHouse only has `start` column
|
|
# in its primary key. This means that filtering by `timestamp` is not
|
|
# considered when estimating number of returned rows from index
|
|
# for sample factor calculation. The redundant lower bound `start` condition
|
|
# ensures the lower bound time filter is still present as primary key
|
|
# condition and the sample factor estimation has minimal skew.
|
|
#
|
|
# Without it, the sample factor would be greatly overestimated for large sites,
|
|
# as query would be estimated to return _all_ rows matching other conditions
|
|
# before `start == last_datetime`.
|
|
s.start >= ^NaiveDateTime.add(first_datetime, -7, :day) and
|
|
s.timestamp >= ^first_datetime and
|
|
s.start <= ^last_datetime
|
|
)
|
|
end
|
|
|
|
defp add_filter(table, query, [:ignore_in_totals_query, filter]) do
|
|
add_filter(table, query, filter)
|
|
end
|
|
|
|
defp add_filter(table, query, [:not, filter]) do
|
|
dynamic([e], not (^add_filter(table, query, filter)))
|
|
end
|
|
|
|
defp add_filter(table, query, [:and, filters]) do
|
|
filters
|
|
|> Enum.map(&add_filter(table, query, &1))
|
|
|> Enum.reduce(fn condition, acc -> dynamic([], ^acc and ^condition) end)
|
|
end
|
|
|
|
defp add_filter(table, query, [:or, filters]) do
|
|
filters
|
|
|> Enum.map(&add_filter(table, query, &1))
|
|
|> Enum.reduce(fn condition, acc -> dynamic([], ^acc or ^condition) end)
|
|
end
|
|
|
|
defp add_filter(_table, query, [:has_done, filter]) do
|
|
condition =
|
|
dynamic([], ^filter_site_time_range(:events, query) and ^add_filter(:events, query, filter))
|
|
|
|
dynamic(
|
|
[t],
|
|
t.session_id in subquery(from(e in "events_v2", where: ^condition, select: e.session_id))
|
|
)
|
|
end
|
|
|
|
defp add_filter(table, query, [:has_not_done, filter]) do
|
|
dynamic([], not (^add_filter(table, query, [:has_done, filter])))
|
|
end
|
|
|
|
defp add_filter(:events, _query, [:is, "event:name" | _rest] = filter) do
|
|
in_clause(col_value(:name), filter)
|
|
end
|
|
|
|
defp add_filter(:events, query, [_, "event:goal" | _rest] = filter) do
|
|
Plausible.Stats.Goals.add_filter(query, filter)
|
|
end
|
|
|
|
defp add_filter(:events, _query, [_, "event:page" | _rest] = filter) do
|
|
filter_field(:pathname, filter)
|
|
end
|
|
|
|
defp add_filter(:events, _query, [_, "event:hostname" | _rest] = filter) do
|
|
filter_field(:hostname, filter)
|
|
end
|
|
|
|
defp add_filter(:events, _query, [_, "event:props:" <> prop_name | _rest] = filter) do
|
|
filter_custom_prop(prop_name, :meta, filter)
|
|
end
|
|
|
|
defp add_filter(:events, _query, [_, "visit:entry_props:" <> _prop_name | _rest]) do
|
|
true
|
|
end
|
|
|
|
defp add_filter(
|
|
:events,
|
|
_query,
|
|
[_, "visit:" <> key | _rest] = filter
|
|
) do
|
|
# Filter events query with visit dimension if possible
|
|
field_name = db_field_name(key)
|
|
|
|
if Enum.member?(@sessions_only_visit_fields, field_name) do
|
|
true
|
|
else
|
|
filter_field(field_name, filter)
|
|
end
|
|
end
|
|
|
|
defp add_filter(:sessions, _query, [_, "visit:entry_props:" <> prop_name | _rest] = filter) do
|
|
filter_custom_prop(prop_name, :entry_meta, filter)
|
|
end
|
|
|
|
defp add_filter(:sessions, _query, [_, "visit:" <> key | _rest] = filter) do
|
|
filter_field(db_field_name(key), filter)
|
|
end
|
|
|
|
defp add_filter(:sessions, _query, [_, "event:" <> _ | _rest]) do
|
|
# Cannot apply sessions filters directly on session query where clause.
|
|
true
|
|
end
|
|
|
|
defp add_filter(table, _query, filter) do
|
|
Logger.notice("Unable to process garbage filter. No results are returned",
|
|
table: table,
|
|
filter: filter
|
|
)
|
|
|
|
false
|
|
end
|
|
|
|
defp filter_custom_prop(prop_name, column_name, [:is, _, clauses | _rest] = filter) do
|
|
none_value_included = Enum.member?(clauses, "(none)")
|
|
prop_value_expr = custom_prop_value(column_name, prop_name)
|
|
|
|
dynamic(
|
|
[t],
|
|
(has_key(t, column_name, ^prop_name) and ^in_clause(prop_value_expr, filter)) or
|
|
(^none_value_included and not has_key(t, column_name, ^prop_name))
|
|
)
|
|
end
|
|
|
|
defp filter_custom_prop(prop_name, column_name, [:is_not, _, clauses | _rest] = filter) do
|
|
none_value_included = Enum.member?(clauses, "(none)")
|
|
prop_value_expr = custom_prop_value(column_name, prop_name)
|
|
|
|
dynamic(
|
|
[t],
|
|
(has_key(t, column_name, ^prop_name) and
|
|
not (^in_clause(prop_value_expr, filter))) or
|
|
(^none_value_included and
|
|
has_key(t, column_name, ^prop_name) and
|
|
not (^in_clause(prop_value_expr, filter))) or
|
|
(not (^none_value_included) and not has_key(t, column_name, ^prop_name))
|
|
)
|
|
end
|
|
|
|
defp filter_custom_prop(prop_name, column_name, [:matches_wildcard, dimension, clauses | rest]) do
|
|
regexes = Enum.map(clauses, &page_regex/1)
|
|
|
|
filter_custom_prop(prop_name, column_name, [:matches, dimension, regexes | rest])
|
|
end
|
|
|
|
defp filter_custom_prop(prop_name, column_name, [
|
|
:matches_wildcard_not,
|
|
dimension,
|
|
clauses | rest
|
|
]) do
|
|
regexes = Enum.map(clauses, &page_regex/1)
|
|
|
|
filter_custom_prop(prop_name, column_name, [:matches_not, dimension, regexes | rest])
|
|
end
|
|
|
|
defp filter_custom_prop(prop_name, column_name, [:matches, _dimension, clauses | _rest]) do
|
|
dynamic(
|
|
[t],
|
|
has_key(t, column_name, ^prop_name) and
|
|
fragment(
|
|
"arrayExists(k -> match(?, k), ?)",
|
|
get_by_key(t, column_name, ^prop_name),
|
|
^clauses
|
|
)
|
|
)
|
|
end
|
|
|
|
defp filter_custom_prop(prop_name, column_name, [:matches_not, _dimension, clauses | _rest]) do
|
|
dynamic(
|
|
[t],
|
|
has_key(t, column_name, ^prop_name) and
|
|
fragment(
|
|
"not(arrayExists(k -> match(?, k), ?))",
|
|
get_by_key(t, column_name, ^prop_name),
|
|
^clauses
|
|
)
|
|
)
|
|
end
|
|
|
|
defp filter_custom_prop(prop_name, column_name, [:contains | _rest] = filter) do
|
|
dynamic(
|
|
[t],
|
|
has_key(t, column_name, ^prop_name) and
|
|
^contains_clause(custom_prop_value(column_name, prop_name), filter)
|
|
)
|
|
end
|
|
|
|
defp filter_custom_prop(prop_name, column_name, [:contains_not | _] = filter) do
|
|
dynamic(
|
|
[t],
|
|
has_key(t, column_name, ^prop_name) and
|
|
not (^contains_clause(custom_prop_value(column_name, prop_name), filter))
|
|
)
|
|
end
|
|
|
|
defp filter_field(db_field, [:matches_wildcard, _dimension, glob_exprs | _rest]) do
|
|
page_regexes = Enum.map(glob_exprs, &page_regex/1)
|
|
|
|
dynamic(
|
|
[x],
|
|
fragment("multiMatchAny(?, ?)", type(field(x, ^db_field), :string), ^page_regexes)
|
|
)
|
|
end
|
|
|
|
defp filter_field(db_field, [:matches_wildcard_not | rest]) do
|
|
dynamic([], not (^filter_field(db_field, [:matches_wildcard | rest])))
|
|
end
|
|
|
|
defp filter_field(db_field, [:contains | _rest] = filter) do
|
|
if no_ref_field?(db_field) do
|
|
contains_clause_no_ref(col_value_string(db_field), filter)
|
|
else
|
|
contains_clause(col_value_string(db_field), filter)
|
|
end
|
|
end
|
|
|
|
defp filter_field(db_field, [:contains_not | rest]) do
|
|
dynamic([], not (^filter_field(db_field, [:contains | rest])))
|
|
end
|
|
|
|
defp filter_field(db_field, [:matches, _dimension, clauses | _rest]) do
|
|
dynamic(
|
|
[x],
|
|
fragment("multiMatchAny(?, ?)", type(field(x, ^db_field), :string), ^clauses)
|
|
)
|
|
end
|
|
|
|
defp filter_field(db_field, [:matches_not | rest]) do
|
|
dynamic([], not (^filter_field(db_field, [:matches | rest])))
|
|
end
|
|
|
|
defp filter_field(db_field, [:is, _dimension, clauses | _rest] = filter) do
|
|
list = clauses |> Enum.map(&db_field_val(db_field, &1))
|
|
in_clause(col_value(db_field), filter, list)
|
|
end
|
|
|
|
defp filter_field(db_field, [:is_not | rest]) do
|
|
dynamic([], not (^filter_field(db_field, [:is | rest])))
|
|
end
|
|
|
|
@no_ref "Direct / None"
|
|
@not_set "(not set)"
|
|
|
|
defp db_field_name("channel"), do: :acquisition_channel
|
|
defp db_field_name(name), do: String.to_existing_atom(name)
|
|
|
|
defp no_ref_field?(:source), do: true
|
|
defp no_ref_field?(:referrer), do: true
|
|
defp no_ref_field?(:utm_medium), do: true
|
|
defp no_ref_field?(:utm_source), do: true
|
|
defp no_ref_field?(:utm_campaign), do: true
|
|
defp no_ref_field?(:utm_content), do: true
|
|
defp no_ref_field?(:utm_term), do: true
|
|
defp no_ref_field?(_), do: false
|
|
|
|
defp db_field_val(:source, @no_ref), do: ""
|
|
defp db_field_val(:referrer, @no_ref), do: ""
|
|
defp db_field_val(:utm_medium, @no_ref), do: ""
|
|
defp db_field_val(:utm_source, @no_ref), do: ""
|
|
defp db_field_val(:utm_campaign, @no_ref), do: ""
|
|
defp db_field_val(:utm_content, @no_ref), do: ""
|
|
defp db_field_val(:utm_term, @no_ref), do: ""
|
|
defp db_field_val(_, @not_set), do: ""
|
|
defp db_field_val(_, val), do: val
|
|
|
|
defp col_value(column_name) do
|
|
dynamic([t], field(t, ^column_name))
|
|
end
|
|
|
|
# Needed for string functions to work properly
|
|
defp col_value_string(column_name) do
|
|
dynamic([t], type(field(t, ^column_name), :string))
|
|
end
|
|
|
|
defp custom_prop_value(column_name, prop_name) do
|
|
dynamic([t], get_by_key(t, column_name, ^prop_name))
|
|
end
|
|
|
|
defp in_clause(value_expression, [_, _, clauses | _] = filter, values \\ nil) do
|
|
values = values || clauses
|
|
|
|
if case_sensitive?(filter) do
|
|
dynamic([t], ^value_expression in ^values)
|
|
else
|
|
values = values |> Enum.map(&String.downcase/1)
|
|
dynamic([t], fragment("lower(?)", ^value_expression) in ^values)
|
|
end
|
|
end
|
|
|
|
defp contains_clause(value_expression, [_, _, clauses | _] = filter) do
|
|
if case_sensitive?(filter) do
|
|
dynamic(
|
|
[x],
|
|
fragment("multiSearchAny(?, ?)", ^value_expression, ^clauses)
|
|
)
|
|
else
|
|
dynamic(
|
|
[x],
|
|
fragment("multiSearchAnyCaseInsensitive(?, ?)", ^value_expression, ^clauses)
|
|
)
|
|
end
|
|
end
|
|
|
|
defp contains_clause_no_ref(value_expression, [_, _, clauses | _] = filter) do
|
|
case_sensitive? = case_sensitive?(filter)
|
|
|
|
expression =
|
|
if case_sensitive? do
|
|
dynamic(
|
|
[x],
|
|
fragment("multiSearchAny(?, ?)", ^value_expression, ^clauses)
|
|
)
|
|
else
|
|
dynamic(
|
|
[x],
|
|
fragment("multiSearchAnyCaseInsensitive(?, ?)", ^value_expression, ^clauses)
|
|
)
|
|
end
|
|
|
|
if Enum.any?(clauses, &matches_no_ref?(&1, case_sensitive?)) do
|
|
dynamic([x], ^expression or fragment("? = ?", ^value_expression, ""))
|
|
else
|
|
expression
|
|
end
|
|
end
|
|
|
|
defp case_sensitive?([_, _, _, %{case_sensitive: false}]), do: false
|
|
defp case_sensitive?(_), do: true
|
|
|
|
@no_ref_downcase String.downcase(@no_ref)
|
|
|
|
defp matches_no_ref?(input, false) do
|
|
String.contains?(@no_ref_downcase, String.downcase(input))
|
|
end
|
|
|
|
defp matches_no_ref?(input, true) do
|
|
String.contains?(@no_ref, input)
|
|
end
|
|
end
|