analytics/lib/plausible/stats/imported.ex

609 lines
17 KiB
Elixir

defmodule Plausible.Stats.Imported do
use Plausible.ClickhouseRepo
alias Plausible.Stats.{Query, Base}
import Ecto.Query
import Plausible.Stats.Fragments
@no_ref "Direct / None"
@not_set "(not set)"
@property_to_table_mappings %{
"visit:source" => "imported_sources",
"visit:referrer" => "imported_sources",
"visit:utm_source" => "imported_sources",
"visit:utm_medium" => "imported_sources",
"visit:utm_campaign" => "imported_sources",
"visit:utm_term" => "imported_sources",
"visit:utm_content" => "imported_sources",
"visit:entry_page" => "imported_entry_pages",
"visit:exit_page" => "imported_exit_pages",
"visit:country" => "imported_locations",
"visit:region" => "imported_locations",
"visit:city" => "imported_locations",
"visit:device" => "imported_devices",
"visit:browser" => "imported_browsers",
"visit:browser_version" => "imported_browsers",
"visit:os" => "imported_operating_systems",
"visit:os_version" => "imported_operating_systems",
"event:page" => "imported_pages",
"event:name" => "imported_custom_events",
"event:props:url" => "imported_custom_events"
}
@imported_properties Map.keys(@property_to_table_mappings)
def schema_supports_query?(query) do
filter_count = length(Map.keys(query.filters))
case {filter_count, query.property} do
{0, "event:props:" <> _} -> false
{0, _} -> true
{1, _} -> supports_single_filter?(query)
{_, _} -> false
end
end
defp supports_single_filter?(%Query{
filters: %{"event:goal" => {:is, {:event, event}}},
property: "event:props:url"
})
when event in ["Outbound Link: Click", "File Download"] do
true
end
defp supports_single_filter?(_query), do: false
def merge_imported_timeseries(native_q, _, %Plausible.Stats.Query{include_imported: false}, _),
do: native_q
def merge_imported_timeseries(
native_q,
site,
query,
metrics
) do
import_ids = site.complete_import_ids
imported_q =
from(v in "imported_visitors",
where: v.site_id == ^site.id,
where: v.import_id in ^import_ids,
where: v.date >= ^query.date_range.first and v.date <= ^query.date_range.last,
select: %{}
)
|> select_imported_metrics(metrics)
|> apply_interval(query, site)
from(s in Ecto.Query.subquery(native_q),
full_join: i in subquery(imported_q),
on: s.date == i.date,
select: %{date: fragment("greatest(?, ?)", s.date, i.date)}
)
|> select_joined_metrics(metrics)
end
defp apply_interval(imported_q, %Plausible.Stats.Query{interval: "month"}, _site) do
imported_q
|> group_by([i], fragment("toStartOfMonth(?)", i.date))
|> select_merge([i], %{date: fragment("toStartOfMonth(?)", i.date)})
end
defp apply_interval(imported_q, %Plausible.Stats.Query{interval: "week"} = query, _site) do
imported_q
|> group_by([i], weekstart_not_before(i.date, ^query.date_range.first))
|> select_merge([i], %{date: weekstart_not_before(i.date, ^query.date_range.first)})
end
defp apply_interval(imported_q, _query, _site) do
imported_q
|> group_by([i], i.date)
|> select_merge([i], %{date: i.date})
end
def merge_imported(q, _, %Query{include_imported: false}, _), do: q
def merge_imported(q, site, %Query{property: property} = query, metrics)
when property in @imported_properties do
table = Map.fetch!(@property_to_table_mappings, property)
dim = Plausible.Stats.Filters.without_prefix(property)
import_ids = site.complete_import_ids
imported_q =
from(
i in table,
where: i.site_id == ^site.id,
where: i.import_id in ^import_ids,
where: i.date >= ^query.date_range.first and i.date <= ^query.date_range.last,
where: i.visitors > 0,
select: %{}
)
|> maybe_apply_filter(query.filters, property, dim)
|> group_imported_by(dim)
|> select_imported_metrics(metrics)
join_on =
case dim do
:url ->
dynamic([s, i], s.breakdown_prop_value == i.breakdown_prop_value)
:os_version ->
dynamic([s, i], s.os == i.os and s.os_version == i.os_version)
:browser_version ->
dynamic([s, i], s.browser == i.browser and s.browser_version == i.browser_version)
dim ->
dynamic([s, i], field(s, ^dim) == field(i, ^dim))
end
from(s in Ecto.Query.subquery(q),
full_join: i in subquery(imported_q),
on: ^join_on,
select: %{}
)
|> select_joined_dimension(dim)
|> select_joined_metrics(metrics)
|> apply_order_by(metrics)
end
def merge_imported(q, site, %Query{property: nil} = query, metrics) do
imported_q =
imported_visitors(site, query)
|> select_imported_metrics(metrics)
from(
s in subquery(q),
cross_join: i in subquery(imported_q),
select: %{}
)
|> select_joined_metrics(metrics)
end
def merge_imported(q, _, _, _), do: q
def merge_imported_pageview_goals(q, _, %Query{include_imported: false}, _, _), do: q
def merge_imported_pageview_goals(q, site, query, page_exprs, metrics) do
page_regexes = Enum.map(page_exprs, &Base.page_regex/1)
imported_q =
from(
i in "imported_pages",
where: i.site_id == ^site.id,
where: i.import_id in ^site.complete_import_ids,
where: i.date >= ^query.date_range.first and i.date <= ^query.date_range.last,
where: i.visitors > 0,
where:
fragment(
"notEmpty(multiMatchAllIndices(?, ?) as indices)",
i.page,
^page_regexes
),
array_join: index in fragment("indices"),
group_by: index,
select: %{
name: fragment("concat('Visit ', ?[?])", ^page_exprs, index)
}
)
|> select_imported_metrics(metrics)
from(s in Ecto.Query.subquery(q),
full_join: i in subquery(imported_q),
on: s.name == i.name,
select: %{}
)
|> select_joined_dimension(:name)
|> select_joined_metrics(metrics)
end
def total_imported_visitors(site, query) do
imported_visitors(site, query)
|> select_merge([i], %{total_visitors: fragment("sum(?)", i.visitors)})
end
defp imported_visitors(site, query) do
import_ids = site.complete_import_ids
from(
i in "imported_visitors",
where: i.site_id == ^site.id,
where: i.import_id in ^import_ids,
where: i.date >= ^query.date_range.first and i.date <= ^query.date_range.last,
select: %{}
)
end
defp maybe_apply_filter(
q,
%{"event:goal" => {:is, {:event, event_name}}},
"event:props:url",
_dim
)
when event_name in ["Outbound Link: Click", "File Download"] do
where(q, [i], i.name == ^event_name)
end
defp maybe_apply_filter(q, filters, property, dim) do
case filters[property] do
{:member, list} -> where(q, [i], field(i, ^dim) in ^list)
_ -> q
end
end
defp select_imported_metrics(q, []), do: q
defp select_imported_metrics(q, [:visitors | rest]) do
q
|> select_merge([i], %{visitors: sum(i.visitors)})
|> select_imported_metrics(rest)
end
defp select_imported_metrics(
%Ecto.Query{from: %Ecto.Query.FromExpr{source: {"imported_custom_events", _}}} = q,
[:events | rest]
) do
q
|> select_merge([i], %{events: sum(i.events)})
|> select_imported_metrics(rest)
end
defp select_imported_metrics(q, [:events | rest]) do
q
|> select_merge([i], %{events: sum(i.pageviews)})
|> select_imported_metrics(rest)
end
defp select_imported_metrics(
%Ecto.Query{from: %Ecto.Query.FromExpr{source: {"imported_exit_pages", _}}} = q,
[:visits | rest]
) do
q
|> select_merge([i], %{visits: sum(i.exits)})
|> select_imported_metrics(rest)
end
defp select_imported_metrics(
%Ecto.Query{from: %Ecto.Query.FromExpr{source: {"imported_entry_pages", _}}} = q,
[:visits | rest]
) do
q
|> select_merge([i], %{visits: sum(i.entrances)})
|> select_imported_metrics(rest)
end
defp select_imported_metrics(q, [:visits | rest]) do
q
|> select_merge([i], %{visits: sum(i.visits)})
|> select_imported_metrics(rest)
end
defp select_imported_metrics(
%Ecto.Query{from: %Ecto.Query.FromExpr{source: {"imported_custom_events", _}}} = q,
[:pageviews | rest]
) do
q
|> select_merge([i], %{pageviews: 0})
|> select_imported_metrics(rest)
end
defp select_imported_metrics(q, [:pageviews | rest]) do
q
|> where([i], i.pageviews > 0)
|> select_merge([i], %{pageviews: sum(i.pageviews)})
|> select_imported_metrics(rest)
end
defp select_imported_metrics(
%Ecto.Query{from: %Ecto.Query.FromExpr{source: {"imported_entry_pages", _}}} = q,
[:bounce_rate | rest]
) do
q
|> select_merge([i], %{
bounces: sum(i.bounces),
__internal_visits: sum(i.entrances)
})
|> select_imported_metrics(rest)
end
defp select_imported_metrics(q, [:bounce_rate | rest]) do
q
|> select_merge([i], %{
bounces: sum(i.bounces),
__internal_visits: sum(i.visits)
})
|> select_imported_metrics(rest)
end
defp select_imported_metrics(
%Ecto.Query{from: %Ecto.Query.FromExpr{source: {"imported_entry_pages", _}}} = q,
[:visit_duration | rest]
) do
q
|> select_merge([i], %{
visit_duration: sum(i.visit_duration),
__internal_visits: sum(i.entrances)
})
|> select_imported_metrics(rest)
end
defp select_imported_metrics(q, [:visit_duration | rest]) do
q
|> select_merge([i], %{
visit_duration: sum(i.visit_duration),
__internal_visits: sum(i.visits)
})
|> select_imported_metrics(rest)
end
defp select_imported_metrics(q, [:views_per_visit | rest]) do
q
|> where([i], i.pageviews > 0)
|> select_merge([i], %{
pageviews: sum(i.pageviews),
__internal_visits: sum(i.visits)
})
|> select_imported_metrics(rest)
end
defp select_imported_metrics(q, [_ | rest]) do
q
|> select_imported_metrics(rest)
end
defp group_imported_by(q, dim) when dim in [:source, :referrer] do
q
|> group_by([i], field(i, ^dim))
|> select_merge([i], %{
^dim => fragment("if(empty(?), ?, ?)", field(i, ^dim), @no_ref, field(i, ^dim))
})
end
defp group_imported_by(q, dim)
when dim in [:utm_source, :utm_medium, :utm_campaign, :utm_term, :utm_content] do
q
|> group_by([i], field(i, ^dim))
|> where([i], fragment("not empty(?)", field(i, ^dim)))
|> select_merge([i], %{^dim => field(i, ^dim)})
end
defp group_imported_by(q, :page) do
q
|> group_by([i], i.page)
|> select_merge([i], %{page: i.page, time_on_page: sum(i.time_on_page)})
end
defp group_imported_by(q, :country) do
q
|> group_by([i], i.country)
|> where([i], i.country != "ZZ")
|> select_merge([i], %{country: i.country})
end
defp group_imported_by(q, :region) do
q
|> group_by([i], i.region)
|> where([i], i.region != "")
|> select_merge([i], %{region: i.region})
end
defp group_imported_by(q, :city) do
q
|> group_by([i], i.city)
|> where([i], i.city != 0 and not is_nil(i.city))
|> select_merge([i], %{city: i.city})
end
defp group_imported_by(q, dim) when dim in [:device, :browser] do
q
|> group_by([i], field(i, ^dim))
|> select_merge([i], %{
^dim => fragment("if(empty(?), ?, ?)", field(i, ^dim), @not_set, field(i, ^dim))
})
end
defp group_imported_by(q, :browser_version) do
q
|> group_by([i], [i.browser, i.browser_version])
|> select_merge([i], %{
browser: fragment("if(empty(?), ?, ?)", i.browser, @not_set, i.browser),
browser_version:
fragment(
"if(empty(?), ?, ?)",
i.browser_version,
@not_set,
i.browser_version
)
})
end
defp group_imported_by(q, :os) do
q
|> group_by([i], i.operating_system)
|> select_merge([i], %{
os: fragment("if(empty(?), ?, ?)", i.operating_system, @not_set, i.operating_system)
})
end
defp group_imported_by(q, :os_version) do
q
|> group_by([i], [i.operating_system, i.operating_system_version])
|> select_merge([i], %{
os: fragment("if(empty(?), ?, ?)", i.operating_system, @not_set, i.operating_system),
os_version:
fragment(
"if(empty(?), ?, ?)",
i.operating_system_version,
@not_set,
i.operating_system_version
)
})
end
defp group_imported_by(q, dim) when dim in [:entry_page, :exit_page] do
q
|> group_by([i], field(i, ^dim))
|> select_merge([i], %{^dim => field(i, ^dim)})
end
defp group_imported_by(q, :name) do
q
|> group_by([i], i.name)
|> select_merge([i], %{name: i.name})
end
defp group_imported_by(q, :url) do
q
|> group_by([i], i.link_url)
|> select_merge([i], %{breakdown_prop_value: i.link_url})
end
defp select_joined_dimension(q, :city) do
select_merge(q, [s, i], %{
city: fragment("greatest(?,?)", i.city, s.city)
})
end
defp select_joined_dimension(q, :os_version) do
select_merge(q, [s, i], %{
os: fragment("if(empty(?), ?, ?)", s.os, i.os, s.os),
os_version: fragment("if(empty(?), ?, ?)", s.os_version, i.os_version, s.os_version)
})
end
defp select_joined_dimension(q, :browser_version) do
select_merge(q, [s, i], %{
browser: fragment("if(empty(?), ?, ?)", s.browser, i.browser, s.browser),
browser_version:
fragment("if(empty(?), ?, ?)", s.browser_version, i.browser_version, s.browser_version)
})
end
defp select_joined_dimension(q, :url) do
select_merge(q, [s, i], %{
breakdown_prop_value:
fragment(
"if(empty(?), ?, ?)",
s.breakdown_prop_value,
i.breakdown_prop_value,
s.breakdown_prop_value
)
})
end
defp select_joined_dimension(q, dim) do
select_merge(q, [s, i], %{
^dim => fragment("if(empty(?), ?, ?)", field(s, ^dim), field(i, ^dim), field(s, ^dim))
})
end
defp select_joined_metrics(q, []), do: q
# TODO: Reverse-engineering the native data bounces and total visit
# durations to combine with imported data is inefficient. Instead both
# queries should fetch bounces/total_visit_duration and visits and be
# used as subqueries to a main query that then find the bounce rate/avg
# visit_duration.
defp select_joined_metrics(q, [:visits | rest]) do
q
|> select_merge([s, i], %{visits: s.visits + i.visits})
|> select_joined_metrics(rest)
end
defp select_joined_metrics(q, [:visitors | rest]) do
q
|> select_merge([s, i], %{visitors: selected_as(s.visitors + i.visitors, :visitors)})
|> select_joined_metrics(rest)
end
defp select_joined_metrics(q, [:events | rest]) do
q
|> select_merge([s, i], %{events: s.events + i.events})
|> select_joined_metrics(rest)
end
defp select_joined_metrics(q, [:pageviews | rest]) do
q
|> select_merge([s, i], %{pageviews: s.pageviews + i.pageviews})
|> select_joined_metrics(rest)
end
defp select_joined_metrics(q, [:views_per_visit | rest]) do
q
|> select_merge([s, i], %{
views_per_visit:
fragment(
"if(? + ? > 0, round((? + ? * ?) / (? + ?), 2), 0)",
s.__internal_visits,
i.__internal_visits,
i.pageviews,
s.views_per_visit,
s.__internal_visits,
i.__internal_visits,
s.__internal_visits
)
})
|> select_joined_metrics(rest)
end
defp select_joined_metrics(q, [:bounce_rate | rest]) do
q
|> select_merge([s, i], %{
bounce_rate:
fragment(
"if(? + ? > 0, round(100 * (? + (? * ? / 100)) / (? + ?)), 0)",
s.__internal_visits,
i.__internal_visits,
i.bounces,
s.bounce_rate,
s.__internal_visits,
i.__internal_visits,
s.__internal_visits
)
})
|> select_joined_metrics(rest)
end
defp select_joined_metrics(q, [:visit_duration | rest]) do
q
|> select_merge([s, i], %{
visit_duration:
fragment(
"""
if(
? + ? > 0,
round((? + ? * ?) / (? + ?), 1),
0
)
""",
s.__internal_visits,
i.__internal_visits,
i.visit_duration,
s.visit_duration,
s.__internal_visits,
s.__internal_visits,
i.__internal_visits
)
})
|> select_joined_metrics(rest)
end
defp select_joined_metrics(q, [:sample_percent | rest]) do
q
|> select_merge([s, i], %{sample_percent: s.sample_percent})
|> select_joined_metrics(rest)
end
defp select_joined_metrics(q, [_ | rest]) do
q
|> select_joined_metrics(rest)
end
defp apply_order_by(q, [:visitors | rest]) do
order_by(q, [s, i], desc: s.visitors + i.visitors)
|> apply_order_by(rest)
end
defp apply_order_by(q, _), do: q
end