analytics/lib/plausible/ingestion/request.ex

431 lines
12 KiB
Elixir

defmodule Plausible.Ecto.EventName do
@moduledoc """
Custom type for event name. Accepts Strings and Integers and stores them as String. Returns
cast error if any other type is provided. Accepting integers is important for 404 tracking.
"""
use Ecto.Type
def type, do: :string
def cast(val) when is_binary(val), do: {:ok, val}
def cast(val) when is_integer(val), do: {:ok, Integer.to_string(val)}
def cast(_), do: :error
def load(val), do: {:ok, val}
def dump(val), do: {:ok, val}
end
defmodule Plausible.Ingestion.Request do
@moduledoc """
The %Plausible.Ingestion.Request{} struct stores all needed fields
to create an event downstream. Pre-eliminary validation is made
to detect user errors early.
"""
use Ecto.Schema
use Plausible
alias Ecto.Changeset
@max_url_size 2_000
@missing_scroll_depth 255
@missing_engagement_time 0
# :KLUDGE: Old version of tracker script sent huge values for engagement time. Ignore
# these while users might still have the old script cached.
@too_large_engagement_time :timer.hours(30 * 24)
@blank_engagement_error_message "engagement event requires a valid integer value for at least one of 'sd' or 'e' fields"
def too_large_engagement_time(), do: @too_large_engagement_time
def blank_engagement_error_message(), do: @blank_engagement_error_message
@primary_key false
embedded_schema do
field :remote_ip, :string
field :user_agent, :string
field :event_name, Plausible.Ecto.EventName
field :uri, :map
field :hostname, :string
field :referrer, :string
field :domains, {:array, :string}
field :ip_classification, :string
field :hash_mode, :integer
field :pathname, :string
field :props, :map
field :scroll_depth, :integer
field :engagement_time, :integer
field :tracker_script_version, :integer, default: 0
field :interactive?, :boolean, default: true
on_ee do
field :revenue_source, :map
end
field :query_params, :map
field :timestamp, :naive_datetime
end
@type t() :: %__MODULE__{}
@spec build(Plug.Conn.t(), NaiveDateTime.t()) ::
{:ok, t(), Plug.Conn.t()} | {:error, Changeset.t()}
@doc """
Builds and initially validates %Plausible.Ingestion.Request{} struct from %Plug.Conn{}.
"""
def build(%Plug.Conn{} = conn, now \\ NaiveDateTime.utc_now()) do
changeset =
%__MODULE__{}
|> Changeset.change()
|> Changeset.put_change(
:timestamp,
NaiveDateTime.truncate(now, :second)
)
case parse_body(conn) do
{:ok, request_body, conn} ->
request =
changeset
|> put_ip_classification(conn)
|> put_remote_ip(conn)
|> put_uri(request_body)
|> put_hostname()
|> put_user_agent(conn)
|> put_request_params(request_body)
|> put_referrer(request_body)
|> put_pathname()
|> put_props(request_body)
|> put_engagement_fields(request_body)
|> put_query_params()
|> put_revenue_source(request_body)
|> put_interactive(request_body)
|> put_tracker_script_version(request_body)
|> map_domains(request_body)
|> Changeset.validate_required([
:event_name,
:hostname,
:pathname,
:timestamp
])
|> Changeset.validate_length(:event_name, max: 120)
|> Changeset.apply_action(nil)
case request do
{:ok, request} ->
{:ok, request, conn}
{:error, _} = error ->
error
end
{:error, :invalid_json} ->
{:error, Changeset.add_error(changeset, :request, "Unable to parse request body as json")}
end
end
on_ee do
defp put_revenue_source(changeset, request_body) do
Plausible.Ingestion.Request.Revenue.put_revenue_source(changeset, request_body)
end
else
defp put_revenue_source(changeset, _request_body), do: changeset
end
defp put_remote_ip(changeset, conn) do
Changeset.put_change(changeset, :remote_ip, PlausibleWeb.RemoteIP.get(conn))
end
defp parse_body(conn) do
case conn.body_params do
%Plug.Conn.Unfetched{} ->
with max_length <- conn.assigns[:read_body_limit] || 1_000_000,
{:ok, body, conn} <-
Plug.Conn.read_body(conn, length: max_length, read_length: max_length),
{:ok, params} when is_map(params) <- Jason.decode(body) do
{:ok, params, conn}
else
_ -> {:error, :invalid_json}
end
params ->
{:ok, params, conn}
end
end
defp put_request_params(changeset, %{} = request_body) do
Changeset.cast(
changeset,
%{
event_name: request_body["n"] || request_body["name"],
hash_mode: request_body["h"] || request_body["hashMode"]
},
[:event_name, :hash_mode]
)
end
defp put_referrer(changeset, %{} = request_body) do
referrer = request_body["r"] || request_body["referrer"]
if is_binary(referrer) do
referrer = String.slice(referrer, 0..(@max_url_size - 1))
Changeset.put_change(changeset, :referrer, referrer)
else
changeset
end
end
defp put_pathname(changeset) do
uri = Changeset.get_field(changeset, :uri)
hash_mode = Changeset.get_field(changeset, :hash_mode)
pathname = get_pathname(uri, hash_mode)
Changeset.put_change(changeset, :pathname, pathname)
end
defp maybe_set_props_path_to_pathname(props_in_request, changeset) do
if Plausible.Goals.SystemGoals.sync_props_path_with_pathname?(
Changeset.get_field(changeset, :event_name),
props_in_request
) do
# "path" props is added to the head of the props enum to avoid it being cut off
[{"path", Changeset.get_field(changeset, :pathname)}] ++ props_in_request
else
props_in_request
end
end
defp map_domains(changeset, %{} = request_body) do
raw = request_body["d"] || request_body["domain"]
raw = if is_binary(raw), do: String.trim(raw)
case raw do
"" ->
Changeset.add_error(changeset, :domain, "can't be blank")
raw when is_binary(raw) ->
domains =
raw
|> String.split(",")
|> Enum.map(&sanitize_hostname/1)
Changeset.put_change(changeset, :domains, domains)
nil ->
from_uri = sanitize_hostname(Changeset.get_field(changeset, :uri))
if from_uri do
Changeset.put_change(changeset, :domains, [from_uri])
else
Changeset.add_error(changeset, :domain, "can't be blank")
end
end
end
@disallowed_schemes ~w(data)
defp put_uri(changeset, %{} = request_body) do
with url when is_binary(url) <- request_body["u"] || request_body["url"],
url when byte_size(url) <= @max_url_size <- url,
%URI{} = uri when uri.scheme not in @disallowed_schemes <- URI.parse(url) do
Changeset.put_change(changeset, :uri, uri)
else
nil -> Changeset.add_error(changeset, :url, "is required")
%URI{} -> Changeset.add_error(changeset, :url, "scheme is not allowed")
_ -> Changeset.add_error(changeset, :url, "must be a valid url")
end
end
defp put_hostname(changeset) do
host =
case Changeset.get_field(changeset, :uri) do
%{host: host} when is_binary(host) and host != "" -> sanitize_hostname(host)
_ -> "(none)"
end
Changeset.put_change(changeset, :hostname, host)
end
@max_props 30
defp put_props(changeset, %{} = request_body) do
props =
(request_body["m"] || request_body["meta"] || request_body["p"] || request_body["props"])
|> Plausible.Helpers.JSON.decode_or_fallback()
|> Enum.reduce([], &filter_bad_props/2)
|> maybe_set_props_path_to_pathname(changeset)
|> Enum.take(@max_props)
|> Map.new()
changeset
|> Changeset.put_change(:props, props)
|> validate_props()
end
defp put_interactive(changeset, %{} = request_body) do
case request_body["i"] || request_body["interactive"] do
interactive? when is_boolean(interactive?) ->
Changeset.put_change(changeset, :interactive?, interactive?)
_ ->
changeset
end
end
defp filter_bad_props({k, v}, acc) do
cond do
Enum.any?([k, v], &(is_list(&1) or is_map(&1))) -> acc
Enum.any?([k, v], &(String.trim_leading(to_string(&1)) == "")) -> acc
true -> [{to_string(k), to_string(v)} | acc]
end
end
@max_prop_key_length Plausible.Props.max_prop_key_length()
@max_prop_value_length Plausible.Props.max_prop_value_length()
defp validate_props(changeset) do
case Changeset.get_field(changeset, :props) do
props ->
Enum.reduce_while(props, changeset, fn
{key, value}, changeset
when byte_size(key) > @max_prop_key_length or
byte_size(value) > @max_prop_value_length ->
{:halt,
Changeset.add_error(
changeset,
:props,
"keys should have at most #{@max_prop_key_length} bytes and values #{@max_prop_value_length} bytes"
)}
_, changeset ->
{:cont, changeset}
end)
end
end
defp put_engagement_fields(changeset, %{} = request_body) do
if Changeset.get_field(changeset, :event_name) == "engagement" do
scroll_depth = parse_scroll_depth(request_body["sd"])
engagement_time = parse_engagement_time(request_body["e"])
case {scroll_depth, engagement_time} do
{@missing_scroll_depth, @missing_engagement_time} ->
changeset
|> Changeset.add_error(
:event_name,
"engagement event requires a valid integer value for at least one of 'sd' or 'e' fields"
)
_ ->
changeset
|> Changeset.put_change(:scroll_depth, scroll_depth)
|> Changeset.put_change(:engagement_time, engagement_time)
end
else
changeset
end
end
defp put_tracker_script_version(changeset, %{} = request_body) do
case request_body["v"] do
version when is_integer(version) ->
Changeset.put_change(changeset, :tracker_script_version, version)
_ ->
changeset
end
end
defp put_query_params(changeset) do
case Changeset.get_field(changeset, :uri) do
%{query: query} when is_binary(query) ->
Changeset.put_change(changeset, :query_params, URI.decode_query(query))
_any ->
changeset
end
end
defp put_ip_classification(changeset, %Plug.Conn{} = conn) do
value =
conn
|> Plug.Conn.get_req_header("x-plausible-ip-type")
|> List.first()
Changeset.put_change(changeset, :ip_classification, value)
end
defp put_user_agent(changeset, %Plug.Conn{} = conn) do
user_agent =
conn
|> Plug.Conn.get_req_header("user-agent")
|> List.first()
Changeset.put_change(changeset, :user_agent, user_agent)
end
defp get_pathname(nil, _hash_mode), do: "/"
defp get_pathname(uri, hash_mode) do
pathname =
(uri.path || "/")
|> URI.decode()
|> String.trim_trailing()
if hash_mode == 1 && uri.fragment do
pathname <> "#" <> URI.decode(uri.fragment)
else
pathname
end
end
@doc """
Removes the "www" part of a hostname.
"""
def sanitize_hostname(%URI{host: hostname}) do
sanitize_hostname(hostname)
end
def sanitize_hostname(hostname) when is_binary(hostname) do
hostname
|> String.trim()
|> String.replace_prefix("www.", "")
end
def sanitize_hostname(nil) do
nil
end
defp parse_scroll_depth(sd) when is_binary(sd) do
case Integer.parse(sd) do
{sd_int, ""} -> parse_scroll_depth(sd_int)
_ -> @missing_scroll_depth
end
end
defp parse_scroll_depth(sd) when is_integer(sd) and sd >= 0 and sd <= 100, do: sd
defp parse_scroll_depth(sd) when is_integer(sd) and sd > 100, do: 100
defp parse_scroll_depth(_), do: @missing_scroll_depth
defp parse_engagement_time(et) when is_binary(et) do
case Integer.parse(et) do
{et_int, ""} -> parse_engagement_time(et_int)
_ -> @missing_engagement_time
end
end
defp parse_engagement_time(et)
when is_integer(et) and et >= 0 and et < @too_large_engagement_time,
do: et
defp parse_engagement_time(_), do: @missing_engagement_time
end
defimpl Jason.Encoder, for: URI do
def encode(uri, _opts), do: [?", URI.to_string(uri), ?"]
end
defimpl Jason.Encoder, for: Plausible.Ingestion.Request do
@fields Plausible.Ingestion.Request.__schema__(:fields)
def encode(request, opts) do
request
|> Map.take(@fields)
|> Jason.Encode.map(opts)
end
end