431 lines
12 KiB
Elixir
431 lines
12 KiB
Elixir
defmodule Plausible.Ecto.EventName do
|
|
@moduledoc """
|
|
Custom type for event name. Accepts Strings and Integers and stores them as String. Returns
|
|
cast error if any other type is provided. Accepting integers is important for 404 tracking.
|
|
"""
|
|
|
|
use Ecto.Type
|
|
def type, do: :string
|
|
|
|
def cast(val) when is_binary(val), do: {:ok, val}
|
|
def cast(val) when is_integer(val), do: {:ok, Integer.to_string(val)}
|
|
|
|
def cast(_), do: :error
|
|
def load(val), do: {:ok, val}
|
|
def dump(val), do: {:ok, val}
|
|
end
|
|
|
|
defmodule Plausible.Ingestion.Request do
|
|
@moduledoc """
|
|
The %Plausible.Ingestion.Request{} struct stores all needed fields
|
|
to create an event downstream. Pre-eliminary validation is made
|
|
to detect user errors early.
|
|
"""
|
|
|
|
use Ecto.Schema
|
|
use Plausible
|
|
alias Ecto.Changeset
|
|
|
|
@max_url_size 2_000
|
|
@missing_scroll_depth 255
|
|
@missing_engagement_time 0
|
|
|
|
# :KLUDGE: Old version of tracker script sent huge values for engagement time. Ignore
|
|
# these while users might still have the old script cached.
|
|
@too_large_engagement_time :timer.hours(30 * 24)
|
|
|
|
@blank_engagement_error_message "engagement event requires a valid integer value for at least one of 'sd' or 'e' fields"
|
|
|
|
def too_large_engagement_time(), do: @too_large_engagement_time
|
|
def blank_engagement_error_message(), do: @blank_engagement_error_message
|
|
|
|
@primary_key false
|
|
embedded_schema do
|
|
field :remote_ip, :string
|
|
field :user_agent, :string
|
|
field :event_name, Plausible.Ecto.EventName
|
|
field :uri, :map
|
|
field :hostname, :string
|
|
field :referrer, :string
|
|
field :domains, {:array, :string}
|
|
field :ip_classification, :string
|
|
field :hash_mode, :integer
|
|
field :pathname, :string
|
|
field :props, :map
|
|
field :scroll_depth, :integer
|
|
field :engagement_time, :integer
|
|
field :tracker_script_version, :integer, default: 0
|
|
field :interactive?, :boolean, default: true
|
|
|
|
on_ee do
|
|
field :revenue_source, :map
|
|
end
|
|
|
|
field :query_params, :map
|
|
|
|
field :timestamp, :naive_datetime
|
|
end
|
|
|
|
@type t() :: %__MODULE__{}
|
|
|
|
@spec build(Plug.Conn.t(), NaiveDateTime.t()) ::
|
|
{:ok, t(), Plug.Conn.t()} | {:error, Changeset.t()}
|
|
@doc """
|
|
Builds and initially validates %Plausible.Ingestion.Request{} struct from %Plug.Conn{}.
|
|
"""
|
|
def build(%Plug.Conn{} = conn, now \\ NaiveDateTime.utc_now()) do
|
|
changeset =
|
|
%__MODULE__{}
|
|
|> Changeset.change()
|
|
|> Changeset.put_change(
|
|
:timestamp,
|
|
NaiveDateTime.truncate(now, :second)
|
|
)
|
|
|
|
case parse_body(conn) do
|
|
{:ok, request_body, conn} ->
|
|
request =
|
|
changeset
|
|
|> put_ip_classification(conn)
|
|
|> put_remote_ip(conn)
|
|
|> put_uri(request_body)
|
|
|> put_hostname()
|
|
|> put_user_agent(conn)
|
|
|> put_request_params(request_body)
|
|
|> put_referrer(request_body)
|
|
|> put_pathname()
|
|
|> put_props(request_body)
|
|
|> put_engagement_fields(request_body)
|
|
|> put_query_params()
|
|
|> put_revenue_source(request_body)
|
|
|> put_interactive(request_body)
|
|
|> put_tracker_script_version(request_body)
|
|
|> map_domains(request_body)
|
|
|> Changeset.validate_required([
|
|
:event_name,
|
|
:hostname,
|
|
:pathname,
|
|
:timestamp
|
|
])
|
|
|> Changeset.validate_length(:event_name, max: 120)
|
|
|> Changeset.apply_action(nil)
|
|
|
|
case request do
|
|
{:ok, request} ->
|
|
{:ok, request, conn}
|
|
|
|
{:error, _} = error ->
|
|
error
|
|
end
|
|
|
|
{:error, :invalid_json} ->
|
|
{:error, Changeset.add_error(changeset, :request, "Unable to parse request body as json")}
|
|
end
|
|
end
|
|
|
|
on_ee do
|
|
defp put_revenue_source(changeset, request_body) do
|
|
Plausible.Ingestion.Request.Revenue.put_revenue_source(changeset, request_body)
|
|
end
|
|
else
|
|
defp put_revenue_source(changeset, _request_body), do: changeset
|
|
end
|
|
|
|
defp put_remote_ip(changeset, conn) do
|
|
Changeset.put_change(changeset, :remote_ip, PlausibleWeb.RemoteIP.get(conn))
|
|
end
|
|
|
|
defp parse_body(conn) do
|
|
case conn.body_params do
|
|
%Plug.Conn.Unfetched{} ->
|
|
with max_length <- conn.assigns[:read_body_limit] || 1_000_000,
|
|
{:ok, body, conn} <-
|
|
Plug.Conn.read_body(conn, length: max_length, read_length: max_length),
|
|
{:ok, params} when is_map(params) <- Jason.decode(body) do
|
|
{:ok, params, conn}
|
|
else
|
|
_ -> {:error, :invalid_json}
|
|
end
|
|
|
|
params ->
|
|
{:ok, params, conn}
|
|
end
|
|
end
|
|
|
|
defp put_request_params(changeset, %{} = request_body) do
|
|
Changeset.cast(
|
|
changeset,
|
|
%{
|
|
event_name: request_body["n"] || request_body["name"],
|
|
hash_mode: request_body["h"] || request_body["hashMode"]
|
|
},
|
|
[:event_name, :hash_mode]
|
|
)
|
|
end
|
|
|
|
defp put_referrer(changeset, %{} = request_body) do
|
|
referrer = request_body["r"] || request_body["referrer"]
|
|
|
|
if is_binary(referrer) do
|
|
referrer = String.slice(referrer, 0..(@max_url_size - 1))
|
|
Changeset.put_change(changeset, :referrer, referrer)
|
|
else
|
|
changeset
|
|
end
|
|
end
|
|
|
|
defp put_pathname(changeset) do
|
|
uri = Changeset.get_field(changeset, :uri)
|
|
hash_mode = Changeset.get_field(changeset, :hash_mode)
|
|
pathname = get_pathname(uri, hash_mode)
|
|
Changeset.put_change(changeset, :pathname, pathname)
|
|
end
|
|
|
|
defp maybe_set_props_path_to_pathname(props_in_request, changeset) do
|
|
if Plausible.Goals.SystemGoals.sync_props_path_with_pathname?(
|
|
Changeset.get_field(changeset, :event_name),
|
|
props_in_request
|
|
) do
|
|
# "path" props is added to the head of the props enum to avoid it being cut off
|
|
[{"path", Changeset.get_field(changeset, :pathname)}] ++ props_in_request
|
|
else
|
|
props_in_request
|
|
end
|
|
end
|
|
|
|
defp map_domains(changeset, %{} = request_body) do
|
|
raw = request_body["d"] || request_body["domain"]
|
|
raw = if is_binary(raw), do: String.trim(raw)
|
|
|
|
case raw do
|
|
"" ->
|
|
Changeset.add_error(changeset, :domain, "can't be blank")
|
|
|
|
raw when is_binary(raw) ->
|
|
domains =
|
|
raw
|
|
|> String.split(",")
|
|
|> Enum.map(&sanitize_hostname/1)
|
|
|
|
Changeset.put_change(changeset, :domains, domains)
|
|
|
|
nil ->
|
|
from_uri = sanitize_hostname(Changeset.get_field(changeset, :uri))
|
|
|
|
if from_uri do
|
|
Changeset.put_change(changeset, :domains, [from_uri])
|
|
else
|
|
Changeset.add_error(changeset, :domain, "can't be blank")
|
|
end
|
|
end
|
|
end
|
|
|
|
@disallowed_schemes ~w(data)
|
|
defp put_uri(changeset, %{} = request_body) do
|
|
with url when is_binary(url) <- request_body["u"] || request_body["url"],
|
|
url when byte_size(url) <= @max_url_size <- url,
|
|
%URI{} = uri when uri.scheme not in @disallowed_schemes <- URI.parse(url) do
|
|
Changeset.put_change(changeset, :uri, uri)
|
|
else
|
|
nil -> Changeset.add_error(changeset, :url, "is required")
|
|
%URI{} -> Changeset.add_error(changeset, :url, "scheme is not allowed")
|
|
_ -> Changeset.add_error(changeset, :url, "must be a valid url")
|
|
end
|
|
end
|
|
|
|
defp put_hostname(changeset) do
|
|
host =
|
|
case Changeset.get_field(changeset, :uri) do
|
|
%{host: host} when is_binary(host) and host != "" -> sanitize_hostname(host)
|
|
_ -> "(none)"
|
|
end
|
|
|
|
Changeset.put_change(changeset, :hostname, host)
|
|
end
|
|
|
|
@max_props 30
|
|
defp put_props(changeset, %{} = request_body) do
|
|
props =
|
|
(request_body["m"] || request_body["meta"] || request_body["p"] || request_body["props"])
|
|
|> Plausible.Helpers.JSON.decode_or_fallback()
|
|
|> Enum.reduce([], &filter_bad_props/2)
|
|
|> maybe_set_props_path_to_pathname(changeset)
|
|
|> Enum.take(@max_props)
|
|
|> Map.new()
|
|
|
|
changeset
|
|
|> Changeset.put_change(:props, props)
|
|
|> validate_props()
|
|
end
|
|
|
|
defp put_interactive(changeset, %{} = request_body) do
|
|
case request_body["i"] || request_body["interactive"] do
|
|
interactive? when is_boolean(interactive?) ->
|
|
Changeset.put_change(changeset, :interactive?, interactive?)
|
|
|
|
_ ->
|
|
changeset
|
|
end
|
|
end
|
|
|
|
defp filter_bad_props({k, v}, acc) do
|
|
cond do
|
|
Enum.any?([k, v], &(is_list(&1) or is_map(&1))) -> acc
|
|
Enum.any?([k, v], &(String.trim_leading(to_string(&1)) == "")) -> acc
|
|
true -> [{to_string(k), to_string(v)} | acc]
|
|
end
|
|
end
|
|
|
|
@max_prop_key_length Plausible.Props.max_prop_key_length()
|
|
@max_prop_value_length Plausible.Props.max_prop_value_length()
|
|
defp validate_props(changeset) do
|
|
case Changeset.get_field(changeset, :props) do
|
|
props ->
|
|
Enum.reduce_while(props, changeset, fn
|
|
{key, value}, changeset
|
|
when byte_size(key) > @max_prop_key_length or
|
|
byte_size(value) > @max_prop_value_length ->
|
|
{:halt,
|
|
Changeset.add_error(
|
|
changeset,
|
|
:props,
|
|
"keys should have at most #{@max_prop_key_length} bytes and values #{@max_prop_value_length} bytes"
|
|
)}
|
|
|
|
_, changeset ->
|
|
{:cont, changeset}
|
|
end)
|
|
end
|
|
end
|
|
|
|
defp put_engagement_fields(changeset, %{} = request_body) do
|
|
if Changeset.get_field(changeset, :event_name) == "engagement" do
|
|
scroll_depth = parse_scroll_depth(request_body["sd"])
|
|
engagement_time = parse_engagement_time(request_body["e"])
|
|
|
|
case {scroll_depth, engagement_time} do
|
|
{@missing_scroll_depth, @missing_engagement_time} ->
|
|
changeset
|
|
|> Changeset.add_error(
|
|
:event_name,
|
|
"engagement event requires a valid integer value for at least one of 'sd' or 'e' fields"
|
|
)
|
|
|
|
_ ->
|
|
changeset
|
|
|> Changeset.put_change(:scroll_depth, scroll_depth)
|
|
|> Changeset.put_change(:engagement_time, engagement_time)
|
|
end
|
|
else
|
|
changeset
|
|
end
|
|
end
|
|
|
|
defp put_tracker_script_version(changeset, %{} = request_body) do
|
|
case request_body["v"] do
|
|
version when is_integer(version) ->
|
|
Changeset.put_change(changeset, :tracker_script_version, version)
|
|
|
|
_ ->
|
|
changeset
|
|
end
|
|
end
|
|
|
|
defp put_query_params(changeset) do
|
|
case Changeset.get_field(changeset, :uri) do
|
|
%{query: query} when is_binary(query) ->
|
|
Changeset.put_change(changeset, :query_params, URI.decode_query(query))
|
|
|
|
_any ->
|
|
changeset
|
|
end
|
|
end
|
|
|
|
defp put_ip_classification(changeset, %Plug.Conn{} = conn) do
|
|
value =
|
|
conn
|
|
|> Plug.Conn.get_req_header("x-plausible-ip-type")
|
|
|> List.first()
|
|
|
|
Changeset.put_change(changeset, :ip_classification, value)
|
|
end
|
|
|
|
defp put_user_agent(changeset, %Plug.Conn{} = conn) do
|
|
user_agent =
|
|
conn
|
|
|> Plug.Conn.get_req_header("user-agent")
|
|
|> List.first()
|
|
|
|
Changeset.put_change(changeset, :user_agent, user_agent)
|
|
end
|
|
|
|
defp get_pathname(nil, _hash_mode), do: "/"
|
|
|
|
defp get_pathname(uri, hash_mode) do
|
|
pathname =
|
|
(uri.path || "/")
|
|
|> URI.decode()
|
|
|> String.trim_trailing()
|
|
|
|
if hash_mode == 1 && uri.fragment do
|
|
pathname <> "#" <> URI.decode(uri.fragment)
|
|
else
|
|
pathname
|
|
end
|
|
end
|
|
|
|
@doc """
|
|
Removes the "www" part of a hostname.
|
|
"""
|
|
def sanitize_hostname(%URI{host: hostname}) do
|
|
sanitize_hostname(hostname)
|
|
end
|
|
|
|
def sanitize_hostname(hostname) when is_binary(hostname) do
|
|
hostname
|
|
|> String.trim()
|
|
|> String.replace_prefix("www.", "")
|
|
end
|
|
|
|
def sanitize_hostname(nil) do
|
|
nil
|
|
end
|
|
|
|
defp parse_scroll_depth(sd) when is_binary(sd) do
|
|
case Integer.parse(sd) do
|
|
{sd_int, ""} -> parse_scroll_depth(sd_int)
|
|
_ -> @missing_scroll_depth
|
|
end
|
|
end
|
|
|
|
defp parse_scroll_depth(sd) when is_integer(sd) and sd >= 0 and sd <= 100, do: sd
|
|
defp parse_scroll_depth(sd) when is_integer(sd) and sd > 100, do: 100
|
|
defp parse_scroll_depth(_), do: @missing_scroll_depth
|
|
|
|
defp parse_engagement_time(et) when is_binary(et) do
|
|
case Integer.parse(et) do
|
|
{et_int, ""} -> parse_engagement_time(et_int)
|
|
_ -> @missing_engagement_time
|
|
end
|
|
end
|
|
|
|
defp parse_engagement_time(et)
|
|
when is_integer(et) and et >= 0 and et < @too_large_engagement_time,
|
|
do: et
|
|
|
|
defp parse_engagement_time(_), do: @missing_engagement_time
|
|
end
|
|
|
|
defimpl Jason.Encoder, for: URI do
|
|
def encode(uri, _opts), do: [?", URI.to_string(uri), ?"]
|
|
end
|
|
|
|
defimpl Jason.Encoder, for: Plausible.Ingestion.Request do
|
|
@fields Plausible.Ingestion.Request.__schema__(:fields)
|
|
def encode(request, opts) do
|
|
request
|
|
|> Map.take(@fields)
|
|
|> Jason.Encode.map(opts)
|
|
end
|
|
end
|