diff --git a/extra/lib/plausible/installation_support/detection/checks.ex b/extra/lib/plausible/installation_support/detection/checks.ex index ffcf1df434..a078201593 100644 --- a/extra/lib/plausible/installation_support/detection/checks.ex +++ b/extra/lib/plausible/installation_support/detection/checks.ex @@ -37,8 +37,8 @@ defmodule Plausible.InstallationSupport.Detection.Checks do ) end - def telemetry_event_handled(), do: [:plausible, :detection, :handled] - def telemetry_event_unhandled(), do: [:plausible, :detection, :unhandled] + def telemetry_event_success(), do: [:plausible, :detection, :success] + def telemetry_event_failure(), do: [:plausible, :detection, :failure] def interpret_diagnostics(%State{ diagnostics: diagnostics, @@ -47,24 +47,36 @@ defmodule Plausible.InstallationSupport.Detection.Checks do }) do result = Detection.Diagnostics.interpret(diagnostics, url) - case result.data do - %{unhandled: true} -> - Sentry.capture_message("Unhandled case for detection", - extra: %{ - message: inspect(diagnostics), - url: url, - hash: :erlang.phash2(diagnostics) - } - ) + {failed?, trigger_sentry?, msg} = + case result do + %{ok?: true} -> + {false, false, nil} - Logger.warning( - "[DETECTION] Unhandled case (data_domain='#{data_domain}'): #{inspect(diagnostics)}" - ) + %{data: %{failure: :customer_website_issue}} -> + {true, false, "Failed due to an issue with the customer website"} - :telemetry.execute(telemetry_event_unhandled(), %{}) + %{data: %{failure: :browserless_issue}} -> + {true, true, "Failed due to a Browserless issue"} - _ -> - :telemetry.execute(telemetry_event_handled(), %{}) + _unknown_failure -> + {true, true, "Unknown failure"} + end + + if failed? do + :telemetry.execute(telemetry_event_failure(), %{}) + Logger.warning("[DETECTION] #{msg} (data_domain='#{data_domain}'): #{inspect(diagnostics)}") + else + :telemetry.execute(telemetry_event_success(), %{}) + end + + if trigger_sentry? do + Sentry.capture_message("[DETECTION] #{msg}", + extra: %{ + message: inspect(diagnostics), + url: url, + hash: :erlang.phash2(diagnostics) + } + ) end result diff --git a/extra/lib/plausible/installation_support/detection/diagnostics.ex b/extra/lib/plausible/installation_support/detection/diagnostics.ex index 40d00a8424..bff98fa57b 100644 --- a/extra/lib/plausible/installation_support/detection/diagnostics.ex +++ b/extra/lib/plausible/installation_support/detection/diagnostics.ex @@ -24,7 +24,7 @@ defmodule Plausible.InstallationSupport.Detection.Diagnostics do } = diagnostics, _url ) do - get_result("gtm", diagnostics) + success("gtm", diagnostics) end def interpret( @@ -34,7 +34,7 @@ defmodule Plausible.InstallationSupport.Detection.Diagnostics do } = diagnostics, _url ) do - get_result( + success( "wordpress", diagnostics ) @@ -47,7 +47,7 @@ defmodule Plausible.InstallationSupport.Detection.Diagnostics do } = diagnostics, _url ) do - get_result("npm", diagnostics) + success("npm", diagnostics) end def interpret( @@ -56,29 +56,46 @@ defmodule Plausible.InstallationSupport.Detection.Diagnostics do } = diagnostics, _url ) do - get_result(PlausibleWeb.Tracker.fallback_installation_type(), diagnostics) + success(PlausibleWeb.Tracker.fallback_installation_type(), diagnostics) + end + + def interpret(%__MODULE__{service_error: %{code: code}}, _url) + when code in [:domain_not_found, :invalid_url] do + failure(:customer_website_issue) + end + + def interpret(%__MODULE__{service_error: %{code: code}}, _url) + when code in [:bad_browserless_response, :browserless_timeout] do + failure(:browserless_issue) end def interpret( - %__MODULE__{ - service_error: service_error - }, + %__MODULE__{service_error: %{code: :browserless_client_error, extra: extra}}, _url - ) - when service_error in [:domain_not_found, :invalid_url], - do: %Result{ok?: false, data: nil, errors: [Atom.to_string(service_error)]} + ) do + cond do + String.contains?(extra, "net::") -> + failure(:customer_website_issue) - def interpret(%__MODULE__{} = _diagnostics, _url), do: unhandled_case() + String.contains?(String.downcase(extra), "execution context") -> + failure(:customer_website_issue) - defp unhandled_case() do + true -> + failure(:unknown_issue) + end + end + + def interpret(%__MODULE__{} = _diagnostics, _url), do: failure(:unknown_issue) + + defp failure(reason) do %Result{ ok?: false, - data: %{unhandled: true}, - errors: ["Unhandled detection case"] + data: %{failure: reason}, + errors: [reason] } end - defp get_result(suggested_technology, diagnostics) do + defp success(suggested_technology, diagnostics) do %Result{ ok?: true, data: %{ diff --git a/lib/plausible/telemetry/plausible_metrics.ex b/lib/plausible/telemetry/plausible_metrics.ex index 6e99e96e71..08f1eb3d40 100644 --- a/lib/plausible/telemetry/plausible_metrics.ex +++ b/lib/plausible/telemetry/plausible_metrics.ex @@ -185,15 +185,15 @@ defmodule Plausible.PromEx.Plugins.PlausibleMetrics do on_ee( do: counter( - metric_prefix ++ [:detection, :handled], - event_name: InstallationSupport.Detection.Checks.telemetry_event_handled() + metric_prefix ++ [:detection, :success], + event_name: InstallationSupport.Detection.Checks.telemetry_event_success() ) ), on_ee( do: counter( - metric_prefix ++ [:detection, :unhandled], - event_name: InstallationSupport.Detection.Checks.telemetry_event_unhandled() + metric_prefix ++ [:detection, :failure], + event_name: InstallationSupport.Detection.Checks.telemetry_event_failure() ) ), on_ee( diff --git a/test/plausible/installation_support/detection/checks_test.exs b/test/plausible/installation_support/detection/checks_test.exs index 2cfefedbc3..3134cf3201 100644 --- a/test/plausible/installation_support/detection/checks_test.exs +++ b/test/plausible/installation_support/detection/checks_test.exs @@ -10,14 +10,17 @@ defmodule Plausible.InstallationSupport.Detection.ChecksTest do alias Plausible.InstallationSupport.Result use Plausible.Test.Support.DNS import Plug.Conn + import ExUnit.CaptureLog @moduletag :capture_log + @expected_domain "example.com" + @working_url "https://#{@expected_domain}" + describe "running detection" do test "handles wordpress detection, retrying on 429" do - expected_domain = "example.com" url_to_verify = nil test = self() - stub_lookup_a_records(expected_domain) + stub_lookup_a_records(@expected_domain) get_context_from_body = fn conn -> {:ok, body, _conn} = Plug.Conn.read_body(conn) @@ -25,7 +28,7 @@ defmodule Plausible.InstallationSupport.Detection.ChecksTest do end detection_counter = - stub_detection_result(fn conn, request_i -> + stub_detection_result_with_counter(fn conn, request_i -> send(test, get_context_from_body.(conn)) case request_i do @@ -60,7 +63,7 @@ defmodule Plausible.InstallationSupport.Detection.ChecksTest do suggested_technology: "wordpress" } } == - Checks.run(url_to_verify, expected_domain, + Checks.run(url_to_verify, @expected_domain, report_to: nil, async?: false, slowdown: 0 @@ -76,7 +79,7 @@ defmodule Plausible.InstallationSupport.Detection.ChecksTest do "url" => ^any( :string, - ~r/https:\/\/#{expected_domain}\?plausible_verification=\d+$/ + ~r/https:\/\/#{@expected_domain}\?plausible_verification=\d+$/ ), "userAgent" => "Plausible Verification Agent - if abused, contact support@plausible.io" @@ -86,7 +89,192 @@ defmodule Plausible.InstallationSupport.Detection.ChecksTest do end end - defp stub_detection_result(handler) do + describe "observability" do + setup %{test: test, test_pid: test_pid} do + :telemetry.attach_many( + "#{test}-telemetry-handler", + [ + Checks.telemetry_event_success(), + Checks.telemetry_event_failure() + ], + fn event, %{}, _, _ -> + send(test_pid, {:telemetry_event, event}) + end, + %{} + ) + end + + test "successful detection -> no logs, telemetry :success" do + stub_lookup_a_records(@expected_domain) + + detection_stub = + json_response_detection_stub(%{ + "completed" => true, + "v1Detected" => nil, + "gtmLikely" => false, + "npm" => false, + "wordpressLikely" => true, + "wordpressPlugin" => false + }) + + state = run_checks(detection_stub) + + log = capture_log(fn -> Checks.interpret_diagnostics(state) end) + + assert log == "" + + assert_receive {:telemetry_event, telemetry_event} + assert telemetry_event == Checks.telemetry_event_success() + end + + test "domain not found -> customer website issue" do + stub_lookup_a_records(@expected_domain, []) + + detection_counter = + Req.Test.stub(Plausible.InstallationSupport.Checks.Detection, fn _conn -> + raise "This check should've been skipped" + end) + + state = + Checks.run(@working_url, @expected_domain, + report_to: nil, + async?: false, + slowdown: 0 + ) + + log = capture_log(fn -> Checks.interpret_diagnostics(state) end) + + assert detection_counter + + assert log =~ "[DETECTION] Failed due to an issue with the customer website" + assert log =~ "service_error: %{code: :domain_not_found}" + + assert_receive {:telemetry_event, telemetry_event} + assert telemetry_event == Checks.telemetry_event_failure() + end + + for msg <- ["Execution context destroyed", "net::ERR_CONNECTION_REFUSED"] do + test "failure due to a known :browserless_client_error (#{msg}) -> customer website issue" do + stub_lookup_a_records(@expected_domain) + + detection_stub = + json_response_detection_stub(%{ + "completed" => false, + "error" => %{"message" => unquote(msg)} + }) + + state = run_checks(detection_stub) + + log = capture_log(fn -> Checks.interpret_diagnostics(state) end) + + assert log =~ "[DETECTION] Failed due to an issue with the customer website" + assert log =~ "code: :browserless_client_error" + assert log =~ ~s|extra: "#{unquote(msg)}"| + + assert_receive {:telemetry_event, telemetry_event} + assert telemetry_event == Checks.telemetry_event_failure() + end + end + + test "failure due to an unknown :browserless_client_error -> unknown failure" do + stub_lookup_a_records(@expected_domain) + + detection_stub = + json_response_detection_stub(%{ + "completed" => false, + "error" => %{"message" => "something unexpected"} + }) + + state = run_checks(detection_stub) + + log = capture_log(fn -> Checks.interpret_diagnostics(state) end) + + assert log =~ "[DETECTION] Unknown failure" + assert log =~ "code: :browserless_client_error" + assert log =~ ~s|extra: "something unexpected"| + + assert_receive {:telemetry_event, telemetry_event} + assert telemetry_event == Checks.telemetry_event_failure() + end + + test "failure hitting the catch-all interpret clause -> unknown failure" do + stub_lookup_a_records(@expected_domain) + + detection_stub = fn conn -> + Req.Test.transport_error(conn, :econnrefused) + end + + state = run_checks(detection_stub) + + log = capture_log(fn -> Checks.interpret_diagnostics(state) end) + + assert log =~ "[DETECTION] Unknown failure" + assert log =~ "code: :req_error" + assert log =~ ~s|extra: :econnrefused| + + assert_receive {:telemetry_event, telemetry_event} + assert telemetry_event == Checks.telemetry_event_failure() + end + + test "failure due to a flaky browserless issue -> browserless issue" do + stub_lookup_a_records(@expected_domain) + + detection_stub = fn conn -> + conn + |> put_resp_content_type("text/html") + |> send_resp(400, "some error message") + end + + state = run_checks(detection_stub) + + log = capture_log(fn -> Checks.interpret_diagnostics(state) end) + + assert log =~ "[DETECTION] Failed due to a Browserless issue" + assert log =~ "code: :bad_browserless_response" + assert log =~ "extra: 400" + + assert_receive {:telemetry_event, telemetry_event} + assert telemetry_event == Checks.telemetry_event_failure() + end + + test "failure due to a browserless timeout -> browserless issue" do + stub_lookup_a_records(@expected_domain) + + detection_stub = fn conn -> + Req.Test.transport_error(conn, :timeout) + end + + state = run_checks(detection_stub) + + log = capture_log(fn -> Checks.interpret_diagnostics(state) end) + + assert log =~ "[DETECTION] Failed due to a Browserless issue" + assert log =~ "code: :browserless_timeout" + + assert_receive {:telemetry_event, telemetry_event} + assert telemetry_event == Checks.telemetry_event_failure() + end + end + + defp json_response_detection_stub(js_data) do + fn conn -> + conn + |> put_resp_content_type("application/json") + |> send_resp(200, Jason.encode!(%{"data" => js_data})) + end + end + + defp run_checks(detection_stub) do + Req.Test.stub(Plausible.InstallationSupport.Checks.Detection, detection_stub) + + Checks.run(@working_url, @expected_domain, + report_to: nil, + async?: false, + slowdown: 0 + ) + end + + defp stub_detection_result_with_counter(handler) do counter = :atomics.new(1, []) Req.Test.stub(Plausible.InstallationSupport.Checks.Detection, fn conn ->