Refactor building the Query struct (#5893)

* rename Query.build -> Query.parse_and_build

* rename two test files and move 4 %Query{} building functions into subfolder

* rename StatsAPIFilterParser to LegacyStatsAPIFilterParser

* rename Filters.QueryParser to QueryParser

* turn QueryParserTest into QueryParseAndBuildTest

* move query_parser.ex out of filters directory

* separate build from parse

* disable sample_threshold in the new intermediate build function, for now

* remove now redundant test util functions

* remove unused import

* address todo from earlier

* credo

* Make module names in sync with paths in tests

---------

Co-authored-by: Adrian Gruntkowski <adrian.gruntkowski@gmail.com>
This commit is contained in:
RobertJoonas 2025-11-24 09:16:05 +00:00 committed by GitHub
parent 6d5951fffd
commit 7a11f5ec40
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
25 changed files with 3616 additions and 3187 deletions

View File

@ -56,7 +56,7 @@ defmodule Plausible.Stats.ConsolidatedView do
|> DateTime.to_iso8601()
stats_query =
Stats.Query.build!(view, :internal, %{
Stats.Query.parse_and_build!(view, :internal, %{
"site_id" => view.domain,
"metrics" => ["visitors", "visits", "pageviews", "views_per_visit"],
"include" => %{"comparisons" => %{"mode" => "custom", "date_range" => [c_from, c_to]}},
@ -91,7 +91,7 @@ defmodule Plausible.Stats.ConsolidatedView do
defp query_24h_intervals(view, now) do
graph_query =
Stats.Query.build!(
Stats.Query.parse_and_build!(
view,
:internal,
%{

View File

@ -349,7 +349,7 @@ defmodule PlausibleWeb.Live.FunnelSettings.Form do
)
query =
Plausible.Stats.Query.build!(
Plausible.Stats.Query.parse_and_build!(
site,
:internal,
%{

View File

@ -3,7 +3,7 @@ defmodule Plausible.Segments.Filters do
This module contains functions that enable resolving segments in filters.
"""
alias Plausible.Segments
alias Plausible.Stats.Filters
alias Plausible.Stats.{Filters, QueryParser}
@max_segment_filters_count 10
@ -48,7 +48,7 @@ defmodule Plausible.Segments.Filters do
segments,
%{},
fn %Segments.Segment{id: id, segment_data: segment_data} ->
case Filters.QueryParser.parse_filters(segment_data["filters"]) do
case QueryParser.parse_filters(segment_data["filters"]) do
{:ok, filters} -> {id, filters}
_ -> {id, nil}
end

View File

@ -131,7 +131,7 @@ defmodule Plausible.Segments.Segment do
"""
def build_naive_query_from_segment_data(%Plausible.Site{} = site, filters),
do:
Plausible.Stats.Query.build(
Plausible.Stats.Query.parse_and_build(
site,
:internal,
%{

View File

@ -4,8 +4,8 @@ defmodule Plausible.Stats.Filters do
"""
alias Plausible.Stats.Query
alias Plausible.Stats.Filters.QueryParser
alias Plausible.Stats.Filters.StatsAPIFilterParser
alias Plausible.Stats.QueryParser
alias Plausible.Stats.Filters.LegacyStatsAPIFilterParser
@visit_props [
:source,
@ -70,7 +70,7 @@ defmodule Plausible.Stats.Filters do
case Jason.decode(filters) do
{:ok, filters} when is_list(filters) -> parse(filters)
{:ok, _} -> []
{:error, err} -> StatsAPIFilterParser.parse_filter_expression(err.data)
{:error, err} -> LegacyStatsAPIFilterParser.parse_filter_expression(err.data)
end
end

View File

@ -1,5 +1,7 @@
defmodule Plausible.Stats.Filters.StatsAPIFilterParser do
@moduledoc false
defmodule Plausible.Stats.Filters.LegacyStatsAPIFilterParser do
@moduledoc """
Parser for legacy filter format used in Stats API v1.
"""
@non_escaped_pipe_regex ~r/(?<!\\)\|/

View File

@ -41,7 +41,7 @@ defmodule Plausible.Stats.GoalSuggestions do
from_date = Date.shift(to_date, month: -6)
query =
Plausible.Stats.Query.build!(
Plausible.Stats.Query.parse_and_build!(
site,
:internal,
%{

View File

@ -8,7 +8,7 @@ defmodule Plausible.Stats.Legacy.QueryBuilder do
use Plausible
alias Plausible.Stats.{Filters, Interval, Query, DateTimeRange}
alias Plausible.Stats.{Filters, Interval, Query, QueryParser, QueryBuilder, DateTimeRange}
def from(site, params, debug_metadata, now \\ nil) do
now = now || Plausible.Stats.Query.Test.get_fixed_now()
@ -31,9 +31,9 @@ defmodule Plausible.Stats.Legacy.QueryBuilder do
|> put_consolidated_site_ids(site)
|> put_order_by(params)
|> put_include(site, params)
|> Query.put_comparison_utc_time_range()
|> QueryBuilder.put_comparison_utc_time_range()
|> Query.put_imported_opts(site)
|> Query.set_time_on_page_data(site)
|> QueryBuilder.set_time_on_page_data(site)
on_ee do
query = Plausible.Stats.Sampling.put_threshold(query, site, params)
@ -68,7 +68,7 @@ defmodule Plausible.Stats.Legacy.QueryBuilder do
defp preload_goals_and_revenue(query, site) do
{preloaded_goals, revenue_warning, revenue_currencies} =
Plausible.Stats.Filters.QueryParser.preload_goals_and_revenue(
Plausible.Stats.QueryBuilder.preload_goals_and_revenue(
site,
query.metrics,
query.filters,
@ -269,36 +269,37 @@ defmodule Plausible.Stats.Legacy.QueryBuilder do
[{:visitors, :asc}, {"visit:source", :desc}]
"""
def parse_order_by(order_by) do
json_decode(order_by)
|> unwrap([])
|> Filters.QueryParser.parse_order_by()
|> unwrap([])
with true <- is_binary(order_by),
{:ok, order_by} <- JSON.decode(order_by),
{:ok, order_by} <- QueryParser.parse_order_by(order_by) do
order_by
else
_ -> []
end
end
@doc """
### Examples:
iex> QueryBuilder.parse_include(%{}, nil)
QueryParser.default_include()
Plausible.Stats.ParsedQueryParams.default_include()
iex> QueryBuilder.parse_include(%{}, ~s({"total_rows": true}))
Map.merge(QueryParser.default_include(), %{total_rows: true})
Map.merge(Plausible.Stats.ParsedQueryParams.default_include(), %{total_rows: true})
"""
def parse_include(site, include) do
json_decode(include)
|> unwrap(%{})
|> Filters.QueryParser.parse_include(site)
|> unwrap(Filters.QueryParser.default_include())
include =
with true <- is_binary(include),
{:ok, include} <- JSON.decode(include),
{:ok, include} <- QueryParser.parse_include(include, site) do
include
else
_ -> %{}
end
Plausible.Stats.ParsedQueryParams.default_include()
|> Map.merge(include)
end
defp json_decode(string) when is_binary(string) do
Jason.decode(string)
end
defp json_decode(_other), do: :error
defp unwrap({:ok, result}, _default), do: result
defp unwrap(_, default), do: default
defp put_order_by(query, %{} = params) do
struct!(query, order_by: parse_order_by(params["order_by"]))
end
@ -342,7 +343,7 @@ defmodule Plausible.Stats.Legacy.QueryBuilder do
def parse_comparison_params(site, %{"comparison" => "custom"} = params) do
{:ok, date_range} =
Filters.QueryParser.parse_date_range_pair(site, [
QueryParser.parse_date_range_pair(site, [
params["compare_from"],
params["compare_to"]
])

View File

@ -0,0 +1,55 @@
defmodule Plausible.Stats.ParsedQueryParams do
@moduledoc false
defstruct [
:now,
:utc_time_range,
:metrics,
:filters,
:dimensions,
:order_by,
:pagination,
:include
]
alias Plausible.Stats.DateTimeRange
@default_include %{
imports: false,
# `include.imports_meta` can be true even when `include.imports`
# is false. Even if we don't want to include imported data, we
# might still want to know whether imported data can be toggled
# on/off on the dashboard.
imports_meta: false,
time_labels: false,
total_rows: false,
trim_relative_date_range: false,
comparisons: nil,
legacy_time_on_page_cutoff: nil
}
def default_include(), do: @default_include
@default_pagination %{
limit: 10_000,
offset: 0
}
def default_pagination(), do: @default_pagination
def new!(params) when is_map(params) do
%DateTimeRange{} = utc_time_range = Map.fetch!(params, :utc_time_range)
[_ | _] = metrics = Map.fetch!(params, :metrics)
%__MODULE__{
now: params[:now],
utc_time_range: utc_time_range,
metrics: metrics,
filters: params[:filters] || [],
dimensions: params[:dimensions] || [],
order_by: params[:order_by],
pagination: Map.merge(@default_pagination, params[:pagination] || %{}),
include: Map.merge(@default_include, params[:include] || %{})
}
end
end

View File

@ -18,7 +18,7 @@ defmodule Plausible.Stats.Query do
timezone: nil,
legacy_breakdown: false,
preloaded_goals: [],
include: Plausible.Stats.Filters.QueryParser.default_include(),
include: Plausible.Stats.ParsedQueryParams.default_include(),
debug_metadata: %{},
pagination: nil,
# Revenue metric specific metadata
@ -34,45 +34,40 @@ defmodule Plausible.Stats.Query do
smear_session_metrics: false
require OpenTelemetry.Tracer, as: Tracer
alias Plausible.Stats.{DateTimeRange, Filters, Imported, Legacy, Comparisons}
alias Plausible.Stats.{
DateTimeRange,
Imported,
Legacy,
Comparisons,
QueryParser,
ParsedQueryParams,
QueryBuilder
}
@type t :: %__MODULE__{}
def build(
def parse_and_build(
%Plausible.Site{domain: domain} = site,
schema_type,
%{"site_id" => domain} = params,
debug_metadata \\ %{}
) do
with {:ok, query_data} <- Filters.QueryParser.parse(site, schema_type, params) do
query =
%__MODULE__{
debug_metadata: debug_metadata,
site_id: site.id,
site_native_stats_start_at: site.native_stats_start_at
}
|> struct!(Map.to_list(query_data))
|> set_time_on_page_data(site)
|> put_comparison_utc_time_range()
|> put_imported_opts(site)
on_ee do
query = Plausible.Stats.Sampling.put_threshold(query, site, params)
end
{:ok, query}
with {:ok, %ParsedQueryParams{} = parsed_query_params} <-
QueryParser.parse(site, schema_type, params) do
QueryBuilder.build(site, parsed_query_params, params, debug_metadata)
end
end
def build!(site, schema_type, params, debug_metadata \\ %{}) do
case build(site, schema_type, params, debug_metadata) do
def parse_and_build!(site, schema_type, params, debug_metadata \\ %{}) do
case parse_and_build(site, schema_type, params, debug_metadata) do
{:ok, query} -> query
{:error, reason} -> raise "Failed to build query: #{inspect(reason)}"
end
end
@doc """
Builds query from old-style stats APIv1 params. New code should use `Query.build`.
Builds query from old-style stats APIv1 params. New code should use `Query.parse_and_build`.
"""
def from(site, params, debug_metadata \\ %{}, now \\ nil) do
Legacy.QueryBuilder.from(site, params, debug_metadata, now)
@ -143,13 +138,6 @@ defmodule Plausible.Stats.Query do
put_imported_opts(query, nil)
end
def put_comparison_utc_time_range(%__MODULE__{include: %{comparisons: nil}} = query), do: query
def put_comparison_utc_time_range(%__MODULE__{include: %{comparisons: comparison_opts}} = query) do
datetime_range = Comparisons.get_comparison_utc_time_range(query, comparison_opts)
struct!(query, comparison_utc_time_range: datetime_range)
end
def put_imported_opts(query, site) do
requested? = query.include.imports
@ -190,15 +178,6 @@ defmodule Plausible.Stats.Query do
in_comparison_range ++ in_range
end
def set_time_on_page_data(query, site) do
struct!(query,
time_on_page_data: %{
new_metric_visible: Plausible.Stats.TimeOnPage.new_time_on_page_visible?(site),
cutoff_date: site.legacy_time_on_page_cutoff
}
)
end
@spec get_skip_imported_reason(t()) ::
nil | :no_imported_data | :out_of_range | :unsupported_query
def get_skip_imported_reason(query) do

View File

@ -0,0 +1,363 @@
defmodule Plausible.Stats.QueryBuilder do
@moduledoc """
A module used for building the Query struct from already parsed params.
"""
use Plausible
alias Plausible.Segments
alias Plausible.Stats.{Query, ParsedQueryParams, Comparisons, Filters, Time, TableDecider}
def build(site, parsed_query_params, params, debug_metadata \\ %{}) do
with {:ok, parsed_query_params} <- resolve_segments_in_filters(parsed_query_params, site),
query = do_build(parsed_query_params, site, params, debug_metadata),
:ok <- validate_order_by(query),
:ok <- validate_custom_props_access(site, query),
:ok <- validate_toplevel_only_filter_dimension(query),
:ok <- validate_special_metrics_filters(query),
:ok <- validate_behavioral_filters(query),
:ok <- validate_filtered_goals_exist(query),
:ok <- validate_revenue_metrics_access(site, query),
:ok <- validate_metrics(query),
:ok <- validate_include(query) do
query =
query
|> set_time_on_page_data(site)
|> put_comparison_utc_time_range()
|> Query.put_imported_opts(site)
on_ee do
# NOTE: The Query API schema does not allow the sample_threshold param
# and it looks like it's not used as a parameter anymore. We might want
# to clean this up.
query = Plausible.Stats.Sampling.put_threshold(query, site, %{})
end
{:ok, query}
end
end
defp resolve_segments_in_filters(%ParsedQueryParams{} = parsed_query_params, site) do
with {:ok, preloaded_segments} <-
Segments.Filters.preload_needed_segments(site, parsed_query_params.filters),
{:ok, filters} <-
Segments.Filters.resolve_segments(parsed_query_params.filters, preloaded_segments) do
{:ok, struct!(parsed_query_params, filters: filters)}
end
end
defp do_build(parsed_query_params, site, params, debug_metadata) do
%ParsedQueryParams{metrics: metrics, filters: filters, dimensions: dimensions} =
parsed_query_params
{preloaded_goals, revenue_warning, revenue_currencies} =
preload_goals_and_revenue(site, metrics, filters, dimensions)
consolidated_site_ids = get_consolidated_site_ids(site)
all_params =
parsed_query_params
|> Map.to_list()
|> Keyword.merge(
site_id: site.id,
site_native_stats_start_at: site.native_stats_start_at,
consolidated_site_ids: consolidated_site_ids,
timezone: site.timezone,
preloaded_goals: preloaded_goals,
revenue_warning: revenue_warning,
revenue_currencies: revenue_currencies,
input_date_range: Map.get(params, "date_range"),
debug_metadata: debug_metadata
)
struct!(%Query{}, all_params)
end
on_ee do
def get_consolidated_site_ids(%Plausible.Site{} = site) do
if Plausible.Sites.consolidated?(site) do
Plausible.ConsolidatedView.Cache.get(site.domain)
end
end
else
def get_consolidated_site_ids(_site), do: nil
end
def set_time_on_page_data(query, site) do
struct!(query,
time_on_page_data: %{
new_metric_visible: Plausible.Stats.TimeOnPage.new_time_on_page_visible?(site),
cutoff_date: site.legacy_time_on_page_cutoff
}
)
end
def put_comparison_utc_time_range(%Query{include: %{comparisons: nil}} = query), do: query
def put_comparison_utc_time_range(%Query{include: %{comparisons: comparison_opts}} = query) do
datetime_range = Comparisons.get_comparison_utc_time_range(query, comparison_opts)
struct!(query, comparison_utc_time_range: datetime_range)
end
def preload_goals_and_revenue(site, metrics, filters, dimensions) do
preloaded_goals =
Plausible.Stats.Goals.preload_needed_goals(site, dimensions, filters)
{revenue_warning, revenue_currencies} =
preload_revenue(site, preloaded_goals, metrics, dimensions)
{
preloaded_goals,
revenue_warning,
revenue_currencies
}
end
on_ee do
alias Plausible.Stats.Goal.Revenue
def preload_revenue(site, preloaded_goals, metrics, dimensions) do
Revenue.preload(site, preloaded_goals, metrics, dimensions)
end
defp validate_revenue_metrics_access(site, query) do
if Revenue.requested?(query.metrics) and not Revenue.available?(site) do
{:error, "The owner of this site does not have access to the revenue metrics feature."}
else
:ok
end
end
else
defp preload_revenue(_site, _preloaded_goals, _metrics, _dimensions), do: {nil, %{}}
defp validate_revenue_metrics_access(_site, _query), do: :ok
end
defp validate_order_by(query) do
if query.order_by do
valid_values = query.metrics ++ query.dimensions
invalid_entry =
Enum.find(query.order_by, fn {value, _direction} ->
not Enum.member?(valid_values, value)
end)
case invalid_entry do
nil ->
:ok
_ ->
{:error,
"Invalid order_by entry '#{i(invalid_entry)}'. Entry is not a queried metric or dimension."}
end
else
:ok
end
end
@only_toplevel ["event:goal", "event:hostname"]
defp validate_toplevel_only_filter_dimension(query) do
not_toplevel =
query.filters
|> Filters.dimensions_used_in_filters(min_depth: 1, behavioral_filters: :ignore)
|> Enum.filter(&(&1 in @only_toplevel))
if Enum.count(not_toplevel) > 0 do
{:error,
"Invalid filters. Dimension `#{List.first(not_toplevel)}` can only be filtered at the top level."}
else
:ok
end
end
@special_metrics [:conversion_rate, :group_conversion_rate]
defp validate_special_metrics_filters(query) do
special_metric? = Enum.any?(@special_metrics, &(&1 in query.metrics))
deep_custom_property? =
query.filters
|> Filters.dimensions_used_in_filters(min_depth: 1)
|> Enum.any?(fn dimension -> String.starts_with?(dimension, "event:props:") end)
if special_metric? and deep_custom_property? do
{:error,
"Invalid filters. When `conversion_rate` or `group_conversion_rate` metrics are used, custom property filters can only be used on top level."}
else
:ok
end
end
defp validate_behavioral_filters(query) do
query.filters
|> Filters.traverse(0, fn behavioral_depth, operator ->
if operator in [:has_done, :has_not_done] do
behavioral_depth + 1
else
behavioral_depth
end
end)
|> Enum.reduce_while(:ok, fn {[_operator, dimension | _rest], behavioral_depth}, :ok ->
cond do
behavioral_depth == 0 ->
# ignore non-behavioral filters
{:cont, :ok}
behavioral_depth > 1 ->
{:halt,
{:error,
"Invalid filters. Behavioral filters (has_done, has_not_done) cannot be nested."}}
not String.starts_with?(dimension, "event:") ->
{:halt,
{:error,
"Invalid filters. Behavioral filters (has_done, has_not_done) can only be used with event dimension filters."}}
true ->
{:cont, :ok}
end
end)
end
defp validate_filtered_goals_exist(query) do
# Note: We don't check :contains goal filters since it's acceptable if they match nothing.
goal_filter_clauses =
query.filters
|> Filters.all_leaf_filters()
|> Enum.flat_map(fn
[:is, "event:goal", clauses] -> clauses
_ -> []
end)
if length(goal_filter_clauses) > 0 do
configured_goal_names =
query.preloaded_goals.all
|> Enum.map(&Plausible.Goal.display_name/1)
validate_list(goal_filter_clauses, &validate_goal_filter(&1, configured_goal_names))
else
:ok
end
end
defp validate_goal_filter(clause, configured_goal_names) do
if Enum.member?(configured_goal_names, clause) do
:ok
else
{:error,
"Invalid filters. The goal `#{clause}` is not configured for this site. Find out how to configure goals here: https://plausible.io/docs/stats-api#filtering-by-goals"}
end
end
defp validate_custom_props_access(site, query) do
allowed_props = Plausible.Props.allowed_for(site, bypass_setup?: true)
validate_custom_props_access(site, query, allowed_props)
end
defp validate_custom_props_access(_site, _query, :all), do: :ok
defp validate_custom_props_access(_site, query, allowed_props) do
valid? =
query.filters
|> Filters.dimensions_used_in_filters()
|> Enum.concat(query.dimensions)
|> Enum.all?(fn
"event:props:" <> prop -> prop in allowed_props
_ -> true
end)
if valid? do
:ok
else
{:error, "The owner of this site does not have access to the custom properties feature."}
end
end
defp validate_metrics(query) do
with :ok <- validate_list(query.metrics, &validate_metric(&1, query)) do
TableDecider.validate_no_metrics_dimensions_conflict(query)
end
end
defp validate_metric(metric, query) when metric in [:conversion_rate, :group_conversion_rate] do
if Enum.member?(query.dimensions, "event:goal") or
Filters.filtering_on_dimension?(query, "event:goal", behavioral_filters: :ignore) do
:ok
else
{:error, "Metric `#{metric}` can only be queried with event:goal filters or dimensions."}
end
end
defp validate_metric(:scroll_depth = metric, query) do
page_dimension? = Enum.member?(query.dimensions, "event:page")
toplevel_page_filter? = not is_nil(Filters.get_toplevel_filter(query, "event:page"))
if page_dimension? or toplevel_page_filter? do
:ok
else
{:error, "Metric `#{metric}` can only be queried with event:page filters or dimensions."}
end
end
defp validate_metric(:exit_rate = metric, query) do
case {query.dimensions, TableDecider.sessions_join_events?(query)} do
{["visit:exit_page"], false} ->
:ok
{["visit:exit_page"], true} ->
{:error, "Metric `#{metric}` cannot be queried when filtering on event dimensions."}
_ ->
{:error,
"Metric `#{metric}` requires a `\"visit:exit_page\"` dimension. No other dimensions are allowed."}
end
end
defp validate_metric(:views_per_visit = metric, query) do
cond do
Filters.filtering_on_dimension?(query, "event:page", behavioral_filters: :ignore) ->
{:error, "Metric `#{metric}` cannot be queried with a filter on `event:page`."}
length(query.dimensions) > 0 ->
{:error, "Metric `#{metric}` cannot be queried with `dimensions`."}
true ->
:ok
end
end
defp validate_metric(:time_on_page = metric, query) do
cond do
Enum.member?(query.dimensions, "event:page") ->
:ok
Filters.filtering_on_dimension?(query, "event:page", behavioral_filters: :ignore) ->
:ok
true ->
{:error, "Metric `#{metric}` can only be queried with event:page filters or dimensions."}
end
end
defp validate_metric(_, _), do: :ok
defp validate_include(query) do
time_dimension? = Enum.any?(query.dimensions, &Time.time_dimension?/1)
if query.include.time_labels and not time_dimension? do
{:error, "Invalid include.time_labels: requires a time dimension."}
else
:ok
end
end
defp i(value), do: inspect(value, charlists: :as_lists)
defp validate_list(list, parser_function) do
Enum.reduce_while(list, :ok, fn value, :ok ->
case parser_function.(value) do
:ok -> {:cont, :ok}
{:error, _} = error -> {:halt, error}
end
end)
end
end

View File

@ -1,30 +1,9 @@
defmodule Plausible.Stats.Filters.QueryParser do
defmodule Plausible.Stats.QueryParser do
@moduledoc false
use Plausible
alias Plausible.Stats.{TableDecider, Filters, Metrics, DateTimeRange, JSONSchema, Time}
@default_include %{
imports: false,
# `include.imports_meta` can be true even when `include.imports`
# is false. Even if we don't want to include imported data, we
# might still want to know whether imported data can be toggled
# on/off on the dashboard.
imports_meta: false,
time_labels: false,
total_rows: false,
trim_relative_date_range: false,
comparisons: nil,
legacy_time_on_page_cutoff: nil
}
@default_pagination %{
limit: 10_000,
offset: 0
}
def default_include(), do: @default_include
alias Plausible.Stats.{Filters, Metrics, DateTimeRange, JSONSchema}
def parse(site, schema_type, params, now \\ nil) when is_map(params) do
now = now || Plausible.Stats.Query.Test.get_fixed_now()
@ -35,58 +14,26 @@ defmodule Plausible.Stats.Filters.QueryParser do
{:ok, raw_time_range} <-
parse_time_range(site, Map.get(params, "date_range"), date, now),
utc_time_range = raw_time_range |> DateTimeRange.to_timezone("Etc/UTC"),
{:ok, metrics} <- parse_metrics(Map.get(params, "metrics", [])),
{:ok, filters} <- parse_filters(Map.get(params, "filters", [])),
{:ok, preloaded_segments} <-
Plausible.Segments.Filters.preload_needed_segments(site, filters),
{:ok, filters} <-
Plausible.Segments.Filters.resolve_segments(filters, preloaded_segments),
{:ok, dimensions} <- parse_dimensions(Map.get(params, "dimensions", [])),
{:ok, order_by} <- parse_order_by(Map.get(params, "order_by")),
{:ok, include} <- parse_include(Map.get(params, "include", %{}), site),
{:ok, pagination} <- parse_pagination(Map.get(params, "pagination", %{})),
{preloaded_goals, revenue_warning, revenue_currencies} <-
preload_goals_and_revenue(site, metrics, filters, dimensions),
consolidated_site_ids = get_consolidated_site_ids(site),
query = %{
now: now,
consolidated_site_ids: consolidated_site_ids,
input_date_range: Map.get(params, "date_range"),
metrics: metrics,
filters: filters,
utc_time_range: utc_time_range,
dimensions: dimensions,
order_by: order_by,
timezone: site.timezone,
include: include,
pagination: pagination,
preloaded_goals: preloaded_goals,
revenue_warning: revenue_warning,
revenue_currencies: revenue_currencies
},
:ok <- validate_order_by(query),
:ok <- validate_custom_props_access(site, query),
:ok <- validate_toplevel_only_filter_dimension(query),
:ok <- validate_special_metrics_filters(query),
:ok <- validate_behavioral_filters(query),
:ok <- validate_filtered_goals_exist(query),
:ok <- validate_revenue_metrics_access(site, query),
:ok <- validate_metrics(query),
:ok <- validate_include(query) do
{:ok, query}
{:ok, metrics} <- parse_metrics(Map.fetch!(params, "metrics")),
{:ok, filters} <- parse_filters(params["filters"]),
{:ok, dimensions} <- parse_dimensions(params["dimensions"]),
{:ok, order_by} <- parse_order_by(params["order_by"]),
{:ok, pagination} <- parse_pagination(params["pagination"]),
{:ok, include} <- parse_include(params["include"], site) do
{:ok,
Plausible.Stats.ParsedQueryParams.new!(%{
now: now,
utc_time_range: utc_time_range,
metrics: metrics,
filters: filters,
dimensions: dimensions,
order_by: order_by,
pagination: pagination,
include: include
})}
end
end
on_ee do
def get_consolidated_site_ids(%Plausible.Site{} = site) do
if Plausible.Sites.consolidated?(site) do
Plausible.ConsolidatedView.Cache.get(site.domain)
end
end
else
def get_consolidated_site_ids(_site), do: nil
end
def parse_date_range_pair(site, [from, to]) when is_binary(from) and is_binary(to) do
with {:ok, date_range} <- date_range_from_date_strings(site, from, to) do
{:ok, date_range |> DateTimeRange.to_timezone("Etc/UTC")}
@ -110,7 +57,7 @@ defmodule Plausible.Stats.Filters.QueryParser do
parse_list(filters, &parse_filter/1)
end
def parse_filters(_invalid_metrics), do: {:error, "Invalid filters passed."}
def parse_filters(nil), do: {:ok, nil}
defp parse_filter(filter) do
with {:ok, operator} <- parse_operator(filter),
@ -308,6 +255,8 @@ defmodule Plausible.Stats.Filters.QueryParser do
)
end
defp parse_dimensions(nil), do: {:ok, nil}
def parse_order_by(order_by) when is_list(order_by) do
parse_list(order_by, &parse_order_by_entry/1)
end
@ -359,16 +308,19 @@ defmodule Plausible.Stats.Filters.QueryParser do
defp parse_order_direction(entry), do: {:error, "Invalid order_by entry '#{i(entry)}'."}
def parse_include(include, site) when is_map(include) do
with {:ok, include} <- atomize_include_keys(include),
{:ok, include} <- update_comparisons_date_range(include, site) do
{:ok, Map.merge(@default_include, include)}
with {:ok, include} <- atomize_include_keys(include) do
update_comparisons_date_range(include, site)
end
end
def parse_include(nil, _site), do: {:ok, nil}
def parse_include(include, _site), do: {:error, "Invalid include '#{i(include)}'."}
defp atomize_include_keys(map) do
expected_keys = @default_include |> Map.keys() |> Enum.map(&Atom.to_string/1)
expected_keys =
Plausible.Stats.ParsedQueryParams.default_include()
|> Map.keys()
|> Enum.map(&Atom.to_string/1)
if Map.keys(map) |> Enum.all?(&(&1 in expected_keys)) do
{:ok, atomize_keys(map)}
@ -386,9 +338,12 @@ defmodule Plausible.Stats.Filters.QueryParser do
defp update_comparisons_date_range(include, _site), do: {:ok, include}
defp parse_pagination(pagination) when is_map(pagination) do
{:ok, Map.merge(@default_pagination, atomize_keys(pagination))}
{:ok,
Map.merge(Plausible.Stats.ParsedQueryParams.default_pagination(), atomize_keys(pagination))}
end
defp parse_pagination(nil), do: {:ok, nil}
defp atomize_keys(map) when is_map(map) do
Map.new(map, fn {key, value} ->
key = String.to_existing_atom(key)
@ -429,258 +384,6 @@ defmodule Plausible.Stats.Filters.QueryParser do
end
end
defp validate_order_by(query) do
if query.order_by do
valid_values = query.metrics ++ query.dimensions
invalid_entry =
Enum.find(query.order_by, fn {value, _direction} ->
not Enum.member?(valid_values, value)
end)
case invalid_entry do
nil ->
:ok
_ ->
{:error,
"Invalid order_by entry '#{i(invalid_entry)}'. Entry is not a queried metric or dimension."}
end
else
:ok
end
end
def preload_goals_and_revenue(site, metrics, filters, dimensions) do
preloaded_goals =
Plausible.Stats.Goals.preload_needed_goals(site, dimensions, filters)
{revenue_warning, revenue_currencies} =
preload_revenue(site, preloaded_goals, metrics, dimensions)
{
preloaded_goals,
revenue_warning,
revenue_currencies
}
end
@only_toplevel ["event:goal", "event:hostname"]
defp validate_toplevel_only_filter_dimension(query) do
not_toplevel =
query.filters
|> Filters.dimensions_used_in_filters(min_depth: 1, behavioral_filters: :ignore)
|> Enum.filter(&(&1 in @only_toplevel))
if Enum.count(not_toplevel) > 0 do
{:error,
"Invalid filters. Dimension `#{List.first(not_toplevel)}` can only be filtered at the top level."}
else
:ok
end
end
@special_metrics [:conversion_rate, :group_conversion_rate]
defp validate_special_metrics_filters(query) do
special_metric? = Enum.any?(@special_metrics, &(&1 in query.metrics))
deep_custom_property? =
query.filters
|> Filters.dimensions_used_in_filters(min_depth: 1)
|> Enum.any?(fn dimension -> String.starts_with?(dimension, "event:props:") end)
if special_metric? and deep_custom_property? do
{:error,
"Invalid filters. When `conversion_rate` or `group_conversion_rate` metrics are used, custom property filters can only be used on top level."}
else
:ok
end
end
defp validate_behavioral_filters(query) do
query.filters
|> Filters.traverse(0, fn behavioral_depth, operator ->
if operator in [:has_done, :has_not_done] do
behavioral_depth + 1
else
behavioral_depth
end
end)
|> Enum.reduce_while(:ok, fn {[_operator, dimension | _rest], behavioral_depth}, :ok ->
cond do
behavioral_depth == 0 ->
# ignore non-behavioral filters
{:cont, :ok}
behavioral_depth > 1 ->
{:halt,
{:error,
"Invalid filters. Behavioral filters (has_done, has_not_done) cannot be nested."}}
not String.starts_with?(dimension, "event:") ->
{:halt,
{:error,
"Invalid filters. Behavioral filters (has_done, has_not_done) can only be used with event dimension filters."}}
true ->
{:cont, :ok}
end
end)
end
defp validate_filtered_goals_exist(query) do
# Note: We don't check :contains goal filters since it's acceptable if they match nothing.
goal_filter_clauses =
query.filters
|> Filters.all_leaf_filters()
|> Enum.flat_map(fn
[:is, "event:goal", clauses] -> clauses
_ -> []
end)
if length(goal_filter_clauses) > 0 do
configured_goal_names =
query.preloaded_goals.all
|> Enum.map(&Plausible.Goal.display_name/1)
validate_list(goal_filter_clauses, &validate_goal_filter(&1, configured_goal_names))
else
:ok
end
end
on_ee do
alias Plausible.Stats.Goal.Revenue
def preload_revenue(site, preloaded_goals, metrics, dimensions) do
Revenue.preload(site, preloaded_goals, metrics, dimensions)
end
defp validate_revenue_metrics_access(site, query) do
if Revenue.requested?(query.metrics) and not Revenue.available?(site) do
{:error, "The owner of this site does not have access to the revenue metrics feature."}
else
:ok
end
end
else
defp preload_revenue(_site, _preloaded_goals, _metrics, _dimensions), do: {nil, %{}}
defp validate_revenue_metrics_access(_site, _query), do: :ok
end
defp validate_goal_filter(clause, configured_goal_names) do
if Enum.member?(configured_goal_names, clause) do
:ok
else
{:error,
"Invalid filters. The goal `#{clause}` is not configured for this site. Find out how to configure goals here: https://plausible.io/docs/stats-api#filtering-by-goals"}
end
end
defp validate_custom_props_access(site, query) do
allowed_props = Plausible.Props.allowed_for(site, bypass_setup?: true)
validate_custom_props_access(site, query, allowed_props)
end
defp validate_custom_props_access(_site, _query, :all), do: :ok
defp validate_custom_props_access(_site, query, allowed_props) do
valid? =
query.filters
|> Filters.dimensions_used_in_filters()
|> Enum.concat(query.dimensions)
|> Enum.all?(fn
"event:props:" <> prop -> prop in allowed_props
_ -> true
end)
if valid? do
:ok
else
{:error, "The owner of this site does not have access to the custom properties feature."}
end
end
defp validate_metrics(query) do
with :ok <- validate_list(query.metrics, &validate_metric(&1, query)) do
TableDecider.validate_no_metrics_dimensions_conflict(query)
end
end
defp validate_metric(metric, query) when metric in [:conversion_rate, :group_conversion_rate] do
if Enum.member?(query.dimensions, "event:goal") or
Filters.filtering_on_dimension?(query, "event:goal", behavioral_filters: :ignore) do
:ok
else
{:error, "Metric `#{metric}` can only be queried with event:goal filters or dimensions."}
end
end
defp validate_metric(:scroll_depth = metric, query) do
page_dimension? = Enum.member?(query.dimensions, "event:page")
toplevel_page_filter? = not is_nil(Filters.get_toplevel_filter(query, "event:page"))
if page_dimension? or toplevel_page_filter? do
:ok
else
{:error, "Metric `#{metric}` can only be queried with event:page filters or dimensions."}
end
end
defp validate_metric(:exit_rate = metric, query) do
case {query.dimensions, TableDecider.sessions_join_events?(query)} do
{["visit:exit_page"], false} ->
:ok
{["visit:exit_page"], true} ->
{:error, "Metric `#{metric}` cannot be queried when filtering on event dimensions."}
_ ->
{:error,
"Metric `#{metric}` requires a `\"visit:exit_page\"` dimension. No other dimensions are allowed."}
end
end
defp validate_metric(:views_per_visit = metric, query) do
cond do
Filters.filtering_on_dimension?(query, "event:page", behavioral_filters: :ignore) ->
{:error, "Metric `#{metric}` cannot be queried with a filter on `event:page`."}
length(query.dimensions) > 0 ->
{:error, "Metric `#{metric}` cannot be queried with `dimensions`."}
true ->
:ok
end
end
defp validate_metric(:time_on_page = metric, query) do
cond do
Enum.member?(query.dimensions, "event:page") ->
:ok
Filters.filtering_on_dimension?(query, "event:page", behavioral_filters: :ignore) ->
:ok
true ->
{:error, "Metric `#{metric}` can only be queried with event:page filters or dimensions."}
end
end
defp validate_metric(_, _), do: :ok
defp validate_include(query) do
time_dimension? = Enum.any?(query.dimensions, &Time.time_dimension?/1)
if query.include.time_labels and not time_dimension? do
{:error, "Invalid include.time_labels: requires a time dimension."}
else
:ok
end
end
defp i(value), do: inspect(value, charlists: :as_lists)
defp parse_list(list, parser_function) do
@ -691,13 +394,4 @@ defmodule Plausible.Stats.Filters.QueryParser do
end
end)
end
defp validate_list(list, parser_function) do
Enum.reduce_while(list, :ok, fn value, :ok ->
case parser_function.(value) do
:ok -> {:cont, :ok}
{:error, _} = error -> {:halt, error}
end
end)
end
end

View File

@ -9,7 +9,7 @@ defmodule PlausibleWeb.Api.ExternalQueryApiController do
def query(conn, params) do
site = Repo.preload(conn.assigns.site, :owners)
case Query.build(site, conn.assigns.schema_type, params, debug_metadata(conn)) do
case Query.parse_and_build(site, conn.assigns.schema_type, params, debug_metadata(conn)) do
{:ok, query} ->
results = Plausible.Stats.query(site, query)
json(conn, results)

View File

@ -545,7 +545,7 @@ defmodule PlausibleWeb.Live.GoalSettings.Form do
def suggest_page_paths(input, site) do
query =
Plausible.Stats.Query.build!(
Plausible.Stats.Query.parse_and_build!(
site,
:internal,
%{

View File

@ -252,7 +252,7 @@ defmodule PlausibleWeb.Live.Shields.HostnameRules do
def suggest_hostnames(input, _options, site) do
query =
Plausible.Stats.Query.build!(
Plausible.Stats.Query.parse_and_build!(
site,
:internal,
%{

View File

@ -248,7 +248,7 @@ defmodule PlausibleWeb.Live.Shields.PageRules do
def suggest_page_paths(input, _options, site, page_rules) do
query =
Plausible.Stats.Query.build!(
Plausible.Stats.Query.parse_and_build!(
site,
:internal,
%{

View File

@ -88,7 +88,7 @@ defmodule Plausible.Workers.SendEmailReport do
defp stats_aggregates(site, date_range) do
query =
Query.build!(
Query.parse_and_build!(
site,
:internal,
%{
@ -120,7 +120,7 @@ defmodule Plausible.Workers.SendEmailReport do
defp pages(site, date_range) do
query =
Query.build!(
Query.parse_and_build!(
site,
:internal,
%{
@ -145,7 +145,7 @@ defmodule Plausible.Workers.SendEmailReport do
defp sources(site, date_range) do
query =
Query.build!(
Query.parse_and_build!(
site,
:internal,
%{
@ -171,7 +171,7 @@ defmodule Plausible.Workers.SendEmailReport do
defp goals(site, date_range) do
query =
Query.build!(
Query.parse_and_build!(
site,
:internal,
%{

View File

@ -137,7 +137,7 @@ defmodule Plausible.Workers.TrafficChangeNotifier do
defp put_sources(stats, site) do
query =
Query.build!(
Query.parse_and_build!(
site,
:internal,
Map.merge(@base_query_params, %{
@ -154,7 +154,7 @@ defmodule Plausible.Workers.TrafficChangeNotifier do
defp put_pages(stats, site) do
query =
Query.build!(
Query.parse_and_build!(
site,
:internal,
Map.merge(@base_query_params, %{

View File

@ -428,7 +428,7 @@ defmodule Plausible.Stats.ComparisonsTest do
defp build_comparison_query(site, params) do
query =
Query.build!(
Query.parse_and_build!(
site,
:internal,
Map.merge(

View File

@ -1,9 +1,8 @@
defmodule Plausible.Stats.QueryTest do
defmodule Plausible.Stats.Query.QueryFromTest do
use Plausible.DataCase, async: true
use Plausible.Teams.Test
alias Plausible.Stats.Query
alias Plausible.Stats.Legacy.QueryBuilder
alias Plausible.Stats.Filters.QueryParser
alias Plausible.Stats.DateTimeRange
doctest Plausible.Stats.Legacy.QueryBuilder

View File

@ -1,7 +1,7 @@
defmodule Plausible.Stats.QueryOptimizerTest do
defmodule Plausible.Stats.Query.QueryOptimizerTest do
use Plausible.DataCase, async: true
alias Plausible.Stats.{Query, QueryOptimizer, DateTimeRange}
alias Plausible.Stats.{Query, QueryOptimizer, DateTimeRange, ParsedQueryParams}
@default_params %{metrics: [:visitors]}
@ -154,8 +154,6 @@ defmodule Plausible.Stats.QueryOptimizerTest do
end
describe "trim_relative_date_range" do
alias Plausible.Stats.Filters.QueryParser
test "trims current month period when flag is set" do
now = DateTime.new!(~D[2024-01-15], ~T[12:00:00], "UTC")
@ -165,7 +163,7 @@ defmodule Plausible.Stats.QueryOptimizerTest do
input_date_range: "month",
now: now,
timezone: "UTC",
include: Map.put(QueryParser.default_include(), :trim_relative_date_range, true)
include: Map.put(ParsedQueryParams.default_include(), :trim_relative_date_range, true)
})
assert result.utc_time_range.first == ~U[2024-01-01 00:00:00Z]
@ -181,7 +179,7 @@ defmodule Plausible.Stats.QueryOptimizerTest do
input_date_range: "year",
now: now,
timezone: "UTC",
include: Map.put(QueryParser.default_include(), :trim_relative_date_range, true)
include: Map.put(ParsedQueryParams.default_include(), :trim_relative_date_range, true)
})
assert result.utc_time_range.first == ~U[2024-01-01 00:00:00Z]
@ -197,7 +195,7 @@ defmodule Plausible.Stats.QueryOptimizerTest do
input_date_range: "day",
now: now,
timezone: "UTC",
include: Map.put(QueryParser.default_include(), :trim_relative_date_range, true)
include: Map.put(ParsedQueryParams.default_include(), :trim_relative_date_range, true)
})
assert result.utc_time_range.first == ~U[2024-01-15 00:00:00Z]
@ -214,7 +212,7 @@ defmodule Plausible.Stats.QueryOptimizerTest do
input_date_range: "month",
now: now,
timezone: "UTC",
include: Map.put(QueryParser.default_include(), :trim_relative_date_range, true)
include: Map.put(ParsedQueryParams.default_include(), :trim_relative_date_range, true)
})
assert result.utc_time_range == original_range
@ -230,7 +228,7 @@ defmodule Plausible.Stats.QueryOptimizerTest do
input_date_range: "year",
now: now,
timezone: "UTC",
include: Map.put(QueryParser.default_include(), :trim_relative_date_range, true)
include: Map.put(ParsedQueryParams.default_include(), :trim_relative_date_range, true)
})
assert result.utc_time_range == original_range
@ -246,7 +244,7 @@ defmodule Plausible.Stats.QueryOptimizerTest do
input_date_range: "day",
now: now,
timezone: "UTC",
include: Map.put(QueryParser.default_include(), :trim_relative_date_range, true)
include: Map.put(ParsedQueryParams.default_include(), :trim_relative_date_range, true)
})
assert result.utc_time_range == original_range
@ -264,7 +262,7 @@ defmodule Plausible.Stats.QueryOptimizerTest do
timezone: "UTC",
include:
Map.merge(
QueryParser.default_include(),
ParsedQueryParams.default_include(),
%{comparisons: %{mode: "previous_period"}, trim_relative_date_range: true}
)
})
@ -283,7 +281,7 @@ defmodule Plausible.Stats.QueryOptimizerTest do
timezone: "UTC",
include:
Map.merge(
QueryParser.default_include(),
ParsedQueryParams.default_include(),
%{comparisons: %{mode: "previous_period"}, trim_relative_date_range: true}
)
})
@ -302,7 +300,7 @@ defmodule Plausible.Stats.QueryOptimizerTest do
input_date_range: "month",
now: now,
timezone: "UTC",
include: Map.put(QueryParser.default_include(), :trim_relative_date_range, false)
include: Map.put(ParsedQueryParams.default_include(), :trim_relative_date_range, false)
})
assert result.utc_time_range == original_range
@ -318,7 +316,7 @@ defmodule Plausible.Stats.QueryOptimizerTest do
input_date_range: "month",
now: now,
timezone: "UTC",
include: QueryParser.default_include()
include: ParsedQueryParams.default_include()
})
assert result.utc_time_range == original_range
@ -335,7 +333,7 @@ defmodule Plausible.Stats.QueryOptimizerTest do
input_date_range: "7d",
now: now,
timezone: "UTC",
include: Map.put(QueryParser.default_include(), :trim_relative_date_range, true)
include: Map.put(ParsedQueryParams.default_include(), :trim_relative_date_range, true)
})
assert result.utc_time_range == original_range
@ -352,7 +350,7 @@ defmodule Plausible.Stats.QueryOptimizerTest do
input_date_range: "year",
now: now,
timezone: "America/New_York",
include: Map.put(QueryParser.default_include(), :trim_relative_date_range, true)
include: Map.put(ParsedQueryParams.default_include(), :trim_relative_date_range, true)
})
nyc_mar_15_end =

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,22 @@
defmodule Plausible.Stats.Query.QueryParserTest do
use Plausible.DataCase
import Plausible.Stats.QueryParser
setup [:create_user, :create_site]
test "parsing empty map fails", %{site: site} do
assert {:error, "#: Required properties site_id, metrics, date_range were not present."} =
parse(site, :public, %{})
end
test "invalid metric passed", %{site: site} do
params = %{
"site_id" => site.domain,
"metrics" => ["visitors", "event:name"],
"date_range" => "all"
}
assert {:error, "#/metrics/1: Invalid metric \"event:name\""} =
parse(site, :public, params)
end
end

View File

@ -1,4 +1,4 @@
defmodule Plausible.Stats.QueryResultTest do
defmodule Plausible.Stats.Query.QueryResultTest do
use Plausible.DataCase, async: true
use Plausible.Teams.Test
alias Plausible.Stats.{Query, QueryRunner, QueryResult, QueryOptimizer}
@ -18,7 +18,7 @@ defmodule Plausible.Stats.QueryResultTest do
test "query!/3 raises on error on site_id mismatch", %{site: site} do
assert_raise FunctionClauseError, fn ->
Query.build!(
Query.parse_and_build!(
site,
:public,
%{
@ -32,7 +32,7 @@ defmodule Plausible.Stats.QueryResultTest do
assert_raise RuntimeError,
~s/Failed to build query: "#: Required properties metrics, date_range were not present."/,
fn ->
Query.build!(
Query.parse_and_build!(
site,
:public,
%{
@ -44,7 +44,7 @@ defmodule Plausible.Stats.QueryResultTest do
test "serializing query to JSON keeps keys ordered", %{site: site} do
query =
Query.build!(
Query.parse_and_build!(
site,
:public,
%{

File diff suppressed because it is too large Load Diff