diff --git a/.github/workflows/elixir.yml b/.github/workflows/elixir.yml index 58ee24ecb9..549299a364 100644 --- a/.github/workflows/elixir.yml +++ b/.github/workflows/elixir.yml @@ -74,7 +74,7 @@ jobs: - name: Check Credo Warnings run: mix credo diff --from-git-merge-base origin/master - name: Run tests - run: mix test --include slow --max-failures 1 --warnings-as-errors + run: mix test --include slow --include minio --max-failures 1 --warnings-as-errors - name: Run tests (small build) run: MIX_ENV=small_test mix test --include slow --max-failures 1 --warnings-as-errors - name: Check Dialyzer diff --git a/Makefile b/Makefile index 0cf03b0727..c2c7dc722a 100644 --- a/Makefile +++ b/Makefile @@ -36,3 +36,9 @@ postgres-prod: ## Start a container with the same version of postgres as the one postgres-stop: ## Stop and remove the postgres container docker stop plausible_db && docker rm plausible_db + +minio: ## Start a transient container with a recent version of minio (s3) + docker run -d --rm -p 6000:6000 -p 6001:6001 --name plausible_minio minio/minio server /data --address ":6000" --console-address ":6001" + +minio-stop: ## Stop and remove the minio container + docker stop plausible_minio diff --git a/config/.env.dev b/config/.env.dev index c4bc28dee1..22f3c85bdd 100644 --- a/config/.env.dev +++ b/config/.env.dev @@ -20,3 +20,9 @@ GOOGLE_CLIENT_SECRET=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 PROMEX_DISABLED=false SITE_DEFAULT_INGEST_THRESHOLD=1000000 + +S3_DISABLED=false +S3_ACCESS_KEY_ID=minioadmin +S3_SECRET_ACCESS_KEY=minioadmin +S3_REGION=us-east-1 +S3_ENDPOINT=http://localhost:6000 diff --git a/config/.env.test b/config/.env.test index 050d6f8799..56092d916c 100644 --- a/config/.env.test +++ b/config/.env.test @@ -15,3 +15,9 @@ IP_GEOLOCATION_DB=test/priv/GeoLite2-City-Test.mmdb SITE_DEFAULT_INGEST_THRESHOLD=1000000 GOOGLE_CLIENT_ID=fake_client_id GOOGLE_CLIENT_SECRET=fake_client_secret + +S3_DISABLED=false +S3_ACCESS_KEY_ID=minioadmin +S3_SECRET_ACCESS_KEY=minioadmin +S3_REGION=us-east-1 +S3_ENDPOINT=http://localhost:6000 diff --git a/config/runtime.exs b/config/runtime.exs index 8798b374a1..30c2100da5 100644 --- a/config/runtime.exs +++ b/config/runtime.exs @@ -718,3 +718,61 @@ if not is_selfhost do config :plausible, Plausible.Site, default_ingest_threshold: site_default_ingest_threshold end + +s3_disabled? = + config_dir + |> get_var_from_path_or_env("S3_DISABLED", "true") + |> String.to_existing_atom() + +unless s3_disabled? do + s3_env = [ + %{ + name: "S3_ACCESS_KEY_ID", + example: "AKIAIOSFODNN7EXAMPLE" + }, + %{ + name: "S3_SECRET_ACCESS_KEY", + example: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY" + }, + %{ + name: "S3_REGION", + example: "us-east-1" + }, + %{ + name: "S3_ENDPOINT", + example: "https://.r2.cloudflarestorage.com" + } + ] + + s3_env = + Enum.map(s3_env, fn var -> + Map.put(var, :value, get_var_from_path_or_env(config_dir, var.name)) + end) + + s3_missing_env = Enum.filter(s3_env, &is_nil(&1.value)) + + unless s3_missing_env == [] do + raise ArgumentError, """ + Missing S3 configuration. Please set #{s3_missing_env |> Enum.map(& &1.name) |> Enum.join(", ")} environment variable(s): + + #{s3_missing_env |> Enum.map(fn %{name: name, example: example} -> "\t#{name}=#{example}" end) |> Enum.join("\n")} + """ + end + + s3_env_value = fn name -> + s3_env |> Enum.find(&(&1.name == name)) |> Map.fetch!(:value) + end + + config :ex_aws, + http_client: Plausible.S3.Client, + access_key_id: s3_env_value.("S3_ACCESS_KEY_ID"), + secret_access_key: s3_env_value.("S3_SECRET_ACCESS_KEY"), + region: s3_env_value.("S3_REGION") + + %URI{scheme: s3_scheme, host: s3_host, port: s3_port} = URI.parse(s3_env_value.("S3_ENDPOINT")) + + config :ex_aws, :s3, + scheme: s3_scheme <> "://", + host: s3_host, + port: s3_port +end diff --git a/lib/plausible/imported.ex b/lib/plausible/imported.ex index 82623fbd2d..f141780288 100644 --- a/lib/plausible/imported.ex +++ b/lib/plausible/imported.ex @@ -7,8 +7,7 @@ defmodule Plausible.Imported do * `Plausible.Imported.UniversalAnalytics` - existing mechanism, for legacy Google analytics formerly known as "Google Analytics" * `Plausible.Imported.NoopImporter` - importer stub, used mainly for testing purposes - * `Plausible.Imported.CSVImporter` - a placeholder stub for CSV importer that will - be added soon + * `Plausible.Imported.CSVImporter` - CSV importer from S3 For more information on implementing importers, see `Plausible.Imported.Importer`. """ diff --git a/lib/plausible/imported/csv_importer.ex b/lib/plausible/imported/csv_importer.ex index 8c7844fe30..d2d02e5426 100644 --- a/lib/plausible/imported/csv_importer.ex +++ b/lib/plausible/imported/csv_importer.ex @@ -1,6 +1,6 @@ defmodule Plausible.Imported.CSVImporter do @moduledoc """ - CSV importer stub. + CSV importer from S3 that uses ClickHouse [s3 table function.](https://clickhouse.com/docs/en/sql-reference/table-functions/s3) """ use Plausible.Imported.Importer @@ -16,10 +16,99 @@ defmodule Plausible.Imported.CSVImporter do def email_template(), do: "google_analytics_import.html" @impl true - def parse_args(%{"s3_path" => s3_path}), do: [s3_path: s3_path] + def parse_args(%{"uploads" => uploads}), do: [uploads: uploads] @impl true - def import_data(_site_import, _opts) do - :ok + def import_data(site_import, opts) do + %{id: import_id, site_id: site_id} = site_import + uploads = Keyword.fetch!(opts, :uploads) + + %{access_key_id: s3_access_key_id, secret_access_key: s3_secret_access_key} = + Plausible.S3.import_clickhouse_credentials() + + {:ok, ch} = + Plausible.IngestRepo.config() + |> Keyword.replace!(:pool_size, 1) + |> Ch.start_link() + + ranges = + Enum.map(uploads, fn upload -> + %{"filename" => filename, "s3_url" => s3_url} = upload + + ".csv" = Path.extname(filename) + table = Path.rootname(filename) + ensure_importable_table!(table) + + s3_structure = input_structure!(table) + + statement = + """ + INSERT INTO {table:Identifier} \ + SELECT {site_id:UInt64} AS site_id, *, {import_id:UInt64} AS import_id \ + FROM s3({s3_url:String},{s3_access_key_id:String},{s3_secret_access_key:String},{s3_format:String},{s3_structure:String})\ + """ + + params = + %{ + "table" => table, + "site_id" => site_id, + "import_id" => import_id, + "s3_url" => s3_url, + "s3_access_key_id" => s3_access_key_id, + "s3_secret_access_key" => s3_secret_access_key, + "s3_format" => "CSVWithNames", + "s3_structure" => s3_structure + } + + Ch.query!(ch, statement, params, timeout: :infinity) + + %Ch.Result{rows: [[min_date, max_date]]} = + Ch.query!( + ch, + "SELECT min(date), max(date) FROM {table:Identifier} WHERE site_id = {site_id:UInt64} AND import_id = {import_id:UInt64}", + %{"table" => table, "site_id" => site_id, "import_id" => import_id} + ) + + Date.range(min_date, max_date) + end) + + {:ok, + %{ + start_date: Enum.min_by(ranges, & &1.first, Date).first, + end_date: Enum.max_by(ranges, & &1.last, Date).last + }} + rescue + # we are cancelling on any argument or ClickHouse errors + e in [ArgumentError, Ch.Error] -> + {:error, Exception.message(e)} + end + + input_structures = %{ + "imported_browsers" => + "date Date, browser String, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32", + "imported_devices" => + "date Date, device String, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32", + "imported_entry_pages" => + "date Date, entry_page String, visitors UInt64, entrances UInt64, visit_duration UInt64, bounces UInt32", + "imported_exit_pages" => "date Date, exit_page String, visitors UInt64, exits UInt64", + "imported_locations" => + "date Date, country String, region String, city UInt64, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32", + "imported_operating_systems" => + "date Date, operating_system String, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32", + "imported_pages" => + "date Date, hostname String, page String, visitors UInt64, pageviews UInt64, exits UInt64, time_on_page UInt64", + "imported_sources" => + "date Date, source String, utm_medium String, utm_campaign String, utm_content String, utm_term String, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32", + "imported_visitors" => + "date Date, visitors UInt64, pageviews UInt64, bounces UInt64, visits UInt64, visit_duration UInt64" + } + + for {table, input_structure} <- input_structures do + defp input_structure!(unquote(table)), do: unquote(input_structure) + defp ensure_importable_table!(unquote(table)), do: :ok + end + + defp ensure_importable_table!(table) do + raise ArgumentError, "table #{table} is not supported for data import" end end diff --git a/lib/plausible/s3.ex b/lib/plausible/s3.ex new file mode 100644 index 0000000000..d8f8ad65f3 --- /dev/null +++ b/lib/plausible/s3.ex @@ -0,0 +1,15 @@ +defmodule Plausible.S3 do + @moduledoc """ + Helper functions for S3 exports/imports. + """ + + @doc """ + Returns `access_key_id` and `secret_access_key` to be used by ClickHouse during imports from S3. + """ + @spec import_clickhouse_credentials :: + %{access_key_id: String.t(), secret_access_key: String.t()} + def import_clickhouse_credentials do + %{access_key_id: access_key_id, secret_access_key: secret_access_key} = ExAws.Config.new(:s3) + %{access_key_id: access_key_id, secret_access_key: secret_access_key} + end +end diff --git a/lib/plausible/s3/client.ex b/lib/plausible/s3/client.ex new file mode 100644 index 0000000000..54817c8da4 --- /dev/null +++ b/lib/plausible/s3/client.ex @@ -0,0 +1,17 @@ +defmodule Plausible.S3.Client do + @moduledoc false + @behaviour ExAws.Request.HttpClient + + @impl true + def request(method, url, body, headers, opts) do + req = Finch.build(method, url, headers, body) + + case Finch.request(req, Plausible.Finch, opts) do + {:ok, %Finch.Response{status: status, headers: headers, body: body}} -> + {:ok, %{status_code: status, headers: headers, body: body}} + + {:error, reason} -> + {:error, %{reason: reason}} + end + end +end diff --git a/mix.exs b/mix.exs index d480e93216..95c76fdf54 100644 --- a/mix.exs +++ b/mix.exs @@ -136,7 +136,11 @@ defmodule Plausible.MixProject do {:esbuild, "~> 0.7", runtime: Mix.env() in [:dev, :small_dev]}, {:tailwind, "~> 0.2.0", runtime: Mix.env() in [:dev, :small_dev]}, {:ex_json_logger, "~> 1.4.0"}, - {:ecto_network, "~> 1.5.0"} + {:ecto_network, "~> 1.5.0"}, + {:ex_aws, "~> 2.5"}, + {:ex_aws_s3, "~> 2.5"}, + {:sweet_xml, "~> 0.7.4"}, + {:testcontainers, "~> 1.6", only: [:test, :small_test]} ] end diff --git a/mix.lock b/mix.lock index 338b04257f..a985822b31 100644 --- a/mix.lock +++ b/mix.lock @@ -42,10 +42,13 @@ "erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"}, "esbuild": {:hex, :esbuild, "0.8.1", "0cbf919f0eccb136d2eeef0df49c4acf55336de864e63594adcea3814f3edf41", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "25fc876a67c13cb0a776e7b5d7974851556baeda2085296c14ab48555ea7560f"}, "eternal": {:hex, :eternal, "1.2.2", "d1641c86368de99375b98d183042dd6c2b234262b8d08dfd72b9eeaafc2a1abd", [:mix], [], "hexpm", "2c9fe32b9c3726703ba5e1d43a1d255a4f3f2d8f8f9bc19f094c7cb1a7a9e782"}, + "ex_aws": {:hex, :ex_aws, "2.5.1", "7418917974ea42e9e84b25e88b9f3d21a861d5f953ad453e212f48e593d8d39f", [:mix], [{:configparser_ex, "~> 4.0", [hex: :configparser_ex, repo: "hexpm", optional: true]}, {:hackney, "~> 1.16", [hex: :hackney, repo: "hexpm", optional: true]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: true]}, {:jsx, "~> 2.8 or ~> 3.0", [hex: :jsx, repo: "hexpm", optional: true]}, {:mime, "~> 1.2 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:sweet_xml, "~> 0.7", [hex: :sweet_xml, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "1b95431f70c446fa1871f0eb9b183043c5a625f75f9948a42d25f43ae2eff12b"}, + "ex_aws_s3": {:hex, :ex_aws_s3, "2.5.3", "422468e5c3e1a4da5298e66c3468b465cfd354b842e512cb1f6fbbe4e2f5bdaf", [:mix], [{:ex_aws, "~> 2.0", [hex: :ex_aws, repo: "hexpm", optional: false]}, {:sweet_xml, ">= 0.0.0", [hex: :sweet_xml, repo: "hexpm", optional: true]}], "hexpm", "4f09dd372cc386550e484808c5ac5027766c8d0cd8271ccc578b82ee6ef4f3b8"}, "ex_cldr": {:hex, :ex_cldr, "2.37.5", "9da6d97334035b961d2c2de167dc6af8cd3e09859301a5b8f49f90bd8b034593", [:mix], [{:cldr_utils, "~> 2.21", [hex: :cldr_utils, repo: "hexpm", optional: false]}, {:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:gettext, "~> 0.19", [hex: :gettext, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:nimble_parsec, "~> 0.5 or ~> 1.0", [hex: :nimble_parsec, repo: "hexpm", optional: true]}], "hexpm", "74ad5ddff791112ce4156382e171a5f5d3766af9d5c4675e0571f081fe136479"}, "ex_cldr_currencies": {:hex, :ex_cldr_currencies, "2.15.1", "e92ba17c41e7405b7784e0e65f406b5f17cfe313e0e70de9befd653e12854822", [:mix], [{:ex_cldr, "~> 2.34", [hex: :ex_cldr, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "31df8bd37688340f8819bdd770eb17d659652078d34db632b85d4a32864d6a25"}, "ex_cldr_numbers": {:hex, :ex_cldr_numbers, "2.32.3", "b631ff94c982ec518e46bf4736000a30a33d6b58facc085d5f240305f512ad4a", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:digital_token, "~> 0.3 or ~> 1.0", [hex: :digital_token, repo: "hexpm", optional: false]}, {:ex_cldr, "~> 2.37", [hex: :ex_cldr, repo: "hexpm", optional: false]}, {:ex_cldr_currencies, ">= 2.14.2", [hex: :ex_cldr_currencies, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "7b626ff1e59a0ec9c3c5db5ce9ca91a6995e2ab56426b71f3cbf67181ea225f5"}, "ex_doc": {:hex, :ex_doc, "0.31.1", "8a2355ac42b1cc7b2379da9e40243f2670143721dd50748bf6c3b1184dae2089", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.1", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "3178c3a407c557d8343479e1ff117a96fd31bafe52a039079593fb0524ef61b0"}, + "ex_docker_engine_api": {:hex, :ex_docker_engine_api, "1.43.1", "1161e34b6bea5cef84d8fdc1d5d510fcb0c463941ce84c36f4a0f44a9096eb96", [:mix], [{:hackney, "~> 1.20", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}, {:tesla, "~> 1.7", [hex: :tesla, repo: "hexpm", optional: false]}], "hexpm", "ec8fc499389aeef56ddca67e89e9e98098cff50587b56e8b4613279f382793b1"}, "ex_json_logger": {:hex, :ex_json_logger, "1.4.0", "ad1dcc1cfe6940ee1d9d489b20757c89769626ce34c4957548d6fbe155cd96f1", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "7548a1ecba290746e06214d2b3d8783c76760c779a8903a8e44bfd23a7340444"}, "ex_machina": {:hex, :ex_machina, "2.7.0", "b792cc3127fd0680fecdb6299235b4727a4944a09ff0fa904cc639272cd92dc7", [:mix], [{:ecto, "~> 2.2 or ~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}, {:ecto_sql, "~> 3.0", [hex: :ecto_sql, repo: "hexpm", optional: true]}], "hexpm", "419aa7a39bde11894c87a615c4ecaa52d8f107bbdd81d810465186f783245bf8"}, "ex_money": {:hex, :ex_money, "5.15.3", "ea070eb1eefd22258aa288921ba482f1fa5f870d229069dc3d12458b7b8bf66d", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:ex_cldr_numbers, "~> 2.31", [hex: :ex_cldr_numbers, repo: "hexpm", optional: false]}, {:gringotts, "~> 1.1", [hex: :gringotts, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:nimble_parsec, "~> 0.5 or ~> 1.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 2.0 or ~> 3.0 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:poison, "~> 3.0 or ~> 4.0 or ~> 5.0", [hex: :poison, repo: "hexpm", optional: true]}], "hexpm", "3671f1808c428b7c4688650d43dc1af0b64c0eea822429a28c55cef15fb4fdc1"}, @@ -134,18 +137,22 @@ "siphash": {:hex, :siphash, "3.2.0", "ec03fd4066259218c85e2a4b8eec4bb9663bc02b127ea8a0836db376ba73f2ed", [:make, :mix], [], "hexpm", "ba3810701c6e95637a745e186e8a4899087c3b079ba88fb8f33df054c3b0b7c3"}, "sleeplocks": {:hex, :sleeplocks, "1.1.2", "d45aa1c5513da48c888715e3381211c859af34bee9b8290490e10c90bb6ff0ca", [:rebar3], [], "hexpm", "9fe5d048c5b781d6305c1a3a0f40bb3dfc06f49bf40571f3d2d0c57eaa7f59a5"}, "ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.7", "354c321cf377240c7b8716899e182ce4890c5938111a1296add3ec74cf1715df", [:make, :mix, :rebar3], [], "hexpm", "fe4c190e8f37401d30167c8c405eda19469f34577987c76dde613e838bbc67f8"}, + "sweet_xml": {:hex, :sweet_xml, "0.7.4", "a8b7e1ce7ecd775c7e8a65d501bc2cd933bff3a9c41ab763f5105688ef485d08", [:mix], [], "hexpm", "e7c4b0bdbf460c928234951def54fe87edf1a170f6896675443279e2dbeba167"}, "tailwind": {:hex, :tailwind, "0.2.2", "9e27288b568ede1d88517e8c61259bc214a12d7eed271e102db4c93fcca9b2cd", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}], "hexpm", "ccfb5025179ea307f7f899d1bb3905cd0ac9f687ed77feebc8f67bdca78565c4"}, "telemetry": {:hex, :telemetry, "1.2.1", "68fdfe8d8f05a8428483a97d7aab2f268aaff24b49e0f599faa091f1d4e7f61c", [:rebar3], [], "hexpm", "dad9ce9d8effc621708f99eac538ef1cbe05d6a874dd741de2e689c47feafed5"}, "telemetry_metrics": {:hex, :telemetry_metrics, "0.6.1", "315d9163a1d4660aedc3fee73f33f1d355dcc76c5c3ab3d59e76e3edf80eef1f", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "7be9e0871c41732c233be71e4be11b96e56177bf15dde64a8ac9ce72ac9834c6"}, "telemetry_metrics_prometheus_core": {:hex, :telemetry_metrics_prometheus_core, "1.1.0", "4e15f6d7dbedb3a4e3aed2262b7e1407f166fcb9c30ca3f96635dfbbef99965c", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:telemetry_metrics, "~> 0.6", [hex: :telemetry_metrics, repo: "hexpm", optional: false]}], "hexpm", "0dd10e7fe8070095df063798f82709b0a1224c31b8baf6278b423898d591a069"}, "telemetry_poller": {:hex, :telemetry_poller, "1.0.0", "db91bb424e07f2bb6e73926fcafbfcbcb295f0193e0a00e825e589a0a47e8453", [:rebar3], [{:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "b3a24eafd66c3f42da30fc3ca7dda1e9d546c12250a2d60d7b81d264fbec4f6e"}, "telemetry_registry": {:hex, :telemetry_registry, "0.3.1", "14a3319a7d9027bdbff7ebcacf1a438f5f5c903057b93aee484cca26f05bdcba", [:mix, :rebar3], [{:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "6d0ca77b691cf854ed074b459a93b87f4c7f5512f8f7743c635ca83da81f939e"}, + "tesla": {:hex, :tesla, "1.8.0", "d511a4f5c5e42538d97eef7c40ec4f3e44effdc5068206f42ed859e09e51d1fd", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:exjsx, ">= 3.0.0", [hex: :exjsx, repo: "hexpm", optional: true]}, {:finch, "~> 0.13", [hex: :finch, repo: "hexpm", optional: true]}, {:fuse, "~> 2.4", [hex: :fuse, repo: "hexpm", optional: true]}, {:gun, ">= 1.0.0", [hex: :gun, repo: "hexpm", optional: true]}, {:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: true]}, {:ibrowse, "4.4.2", [hex: :ibrowse, repo: "hexpm", optional: true]}, {:jason, ">= 1.0.0", [hex: :jason, repo: "hexpm", optional: true]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.0", [hex: :mint, repo: "hexpm", optional: true]}, {:msgpax, "~> 2.3", [hex: :msgpax, repo: "hexpm", optional: true]}, {:poison, ">= 1.0.0", [hex: :poison, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm", "10501f360cd926a309501287470372af1a6e1cbed0f43949203a4c13300bc79f"}, + "testcontainers": {:hex, :testcontainers, "1.6.0", "14b3251f01ce0b1ada716130d371ba0b6cb1ce2904aa38bd58e5ff4194f4d88f", [:mix], [{:ecto, "~> 3.3", [hex: :ecto, repo: "hexpm", optional: true]}, {:ecto_sql, "~> 3.3", [hex: :ecto_sql, repo: "hexpm", optional: true]}, {:ex_docker_engine_api, "~> 1.43.1", [hex: :ex_docker_engine_api, repo: "hexpm", optional: false]}, {:uuid, "~> 1.1", [hex: :uuid, repo: "hexpm", optional: false]}], "hexpm", "3f812407f232954999a3a2e05b2802e1d8d1afba120533c42b32c7cc91d35daf"}, "timex": {:hex, :timex, "3.7.11", "bb95cb4eb1d06e27346325de506bcc6c30f9c6dea40d1ebe390b262fad1862d1", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.20", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 1.1", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm", "8b9024f7efbabaf9bd7aa04f65cf8dcd7c9818ca5737677c7b76acbc6a94d1aa"}, "tls_certificate_check": {:hex, :tls_certificate_check, "1.21.0", "042ab2c0c860652bc5cf69c94e3a31f96676d14682e22ec7813bd173ceff1788", [:rebar3], [{:ssl_verify_fun, "~> 1.1", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "6cee6cffc35a390840d48d463541d50746a7b0e421acaadb833cfc7961e490e7"}, "tzdata": {:hex, :tzdata, "1.1.1", "20c8043476dfda8504952d00adac41c6eda23912278add38edc140ae0c5bcc46", [:mix], [{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "a69cec8352eafcd2e198dea28a34113b60fdc6cb57eb5ad65c10292a6ba89787"}, "ua_inspector": {:hex, :ua_inspector, "3.8.0", "c0b0d13200a9bd509225f15ea8cf275c0bec27390a21c355746ff8b8a88c3e4d", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}, {:yamerl, "~> 0.7", [hex: :yamerl, repo: "hexpm", optional: false]}], "hexpm", "7c980bae82a4754075b933e0f383935a681e5a2628856ad3ecf6eb80d8139539"}, "unicode_util_compat": {:hex, :unicode_util_compat, "0.7.0", "bc84380c9ab48177092f43ac89e4dfa2c6d62b40b8bd132b1059ecc7232f9a78", [:rebar3], [], "hexpm", "25eee6d67df61960cf6a794239566599b09e17e668d3700247bc498638152521"}, "unsafe": {:hex, :unsafe, "1.0.2", "23c6be12f6c1605364801f4b47007c0c159497d0446ad378b5cf05f1855c0581", [:mix], [], "hexpm", "b485231683c3ab01a9cd44cb4a79f152c6f3bb87358439c6f68791b85c2df675"}, + "uuid": {:hex, :uuid, "1.1.8", "e22fc04499de0de3ed1116b770c7737779f226ceefa0badb3592e64d5cfb4eb9", [:mix], [], "hexpm", "c790593b4c3b601f5dc2378baae7efaf5b3d73c4c6456ba85759905be792f2ac"}, "websock": {:hex, :websock, "0.5.3", "2f69a6ebe810328555b6fe5c831a851f485e303a7c8ce6c5f675abeb20ebdadc", [:mix], [], "hexpm", "6105453d7fac22c712ad66fab1d45abdf049868f253cf719b625151460b8b453"}, "websock_adapter": {:hex, :websock_adapter, "0.5.5", "9dfeee8269b27e958a65b3e235b7e447769f66b5b5925385f5a569269164a210", [:mix], [{:bandit, ">= 0.6.0", [hex: :bandit, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.6", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "4b977ba4a01918acbf77045ff88de7f6972c2a009213c515a445c48f224ffce9"}, "yamerl": {:hex, :yamerl, "0.10.0", "4ff81fee2f1f6a46f1700c0d880b24d193ddb74bd14ef42cb0bcf46e81ef2f8e", [:rebar3], [], "hexpm", "346adb2963f1051dc837a2364e4acf6eb7d80097c0f53cbdc3046ec8ec4b4e6e"}, diff --git a/test/plausible/config_test.exs b/test/plausible/config_test.exs index 3145a1f750..28e6cad4c8 100644 --- a/test/plausible/config_test.exs +++ b/test/plausible/config_test.exs @@ -191,6 +191,79 @@ defmodule Plausible.ConfigTest do end end + describe "s3" do + test "has required env vars" do + env = [ + {"S3_ACCESS_KEY_ID", nil}, + {"S3_SECRET_ACCESS_KEY", nil}, + {"S3_REGION", nil}, + {"S3_ENDPOINT", nil} + ] + + result = + try do + runtime_config(env) + rescue + e -> e + end + + assert %ArgumentError{} = result + + assert Exception.message(result) == """ + Missing S3 configuration. Please set S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY, S3_REGION, S3_ENDPOINT environment variable(s): + + \tS3_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE + \tS3_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY + \tS3_REGION=us-east-1 + \tS3_ENDPOINT=https://.r2.cloudflarestorage.com + """ + end + + test "renders only missing env vars" do + env = [ + {"S3_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE"}, + {"S3_SECRET_ACCESS_KEY", nil}, + {"S3_REGION", "eu-north-1"}, + {"S3_ENDPOINT", nil} + ] + + result = + try do + runtime_config(env) + rescue + e -> e + end + + assert %ArgumentError{} = result + + assert Exception.message(result) == """ + Missing S3 configuration. Please set S3_SECRET_ACCESS_KEY, S3_ENDPOINT environment variable(s): + + \tS3_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY + \tS3_ENDPOINT=https://.r2.cloudflarestorage.com + """ + end + + test "works when everything is set" do + env = [ + {"S3_ACCESS_KEY_ID", "minioadmin"}, + {"S3_SECRET_ACCESS_KEY", "minioadmin"}, + {"S3_REGION", "us-east-1"}, + {"S3_ENDPOINT", "http://localhost:6000"} + ] + + config = runtime_config(env) + + assert config[:ex_aws] == [ + http_client: Plausible.S3.Client, + access_key_id: "minioadmin", + secret_access_key: "minioadmin", + region: "us-east-1", + s3: [scheme: "http://", host: "localhost", port: 6000] + ] + end + end + defp runtime_config(env) do put_system_env_undo(env) Config.Reader.read!("config/runtime.exs", env: :prod) diff --git a/test/plausible/imported/csv_importer_test.exs b/test/plausible/imported/csv_importer_test.exs new file mode 100644 index 0000000000..9cf4c09854 --- /dev/null +++ b/test/plausible/imported/csv_importer_test.exs @@ -0,0 +1,408 @@ +defmodule Plausible.Imported.CSVImporterTest do + use Plausible.DataCase, async: true + alias Plausible.Imported.{CSVImporter, SiteImport} + alias Testcontainers.MinioContainer + require SiteImport + + @moduletag :minio + + setup_all do + Testcontainers.start_link() + + {:ok, minio} = Testcontainers.start_container(MinioContainer.new()) + on_exit(fn -> :ok = Testcontainers.stop_container(minio.container_id) end) + connection_opts = MinioContainer.connection_opts(minio) + + bucket = "imports" + ExAws.request!(ExAws.S3.put_bucket(bucket, "us-east-1"), connection_opts) + on_exit(fn -> ExAws.request!(ExAws.S3.delete_bucket(bucket), connection_opts) end) + + {:ok, container: minio, bucket: bucket} + end + + describe "new_import/3 and parse_args/1" do + setup [:create_user, :create_new_site] + + test "parses job args properly", %{user: user, site: site} do + tables = [ + "imported_browsers", + "imported_devices", + "imported_entry_pages", + "imported_exit_pages", + "imported_locations", + "imported_operating_systems", + "imported_pages", + "imported_sources", + "imported_visitors" + ] + + uploads = + Enum.map(tables, fn table -> + filename = "#{table}.csv" + + %{ + "filename" => filename, + "s3_url" => "https://bucket-name.s3.eu-north-1.amazonaws.com/#{site.id}/#{filename}" + } + end) + + assert {:ok, job} = + CSVImporter.new_import(site, user, + # to satisfy the non null constraints on the table I'm providing "0" dates (according to ClickHouse) + start_date: ~D[1970-01-01], + end_date: ~D[1970-01-01], + uploads: uploads + ) + + assert %Oban.Job{args: %{"import_id" => import_id, "uploads" => ^uploads} = args} = + Repo.reload!(job) + + assert [ + %{ + id: ^import_id, + source: :csv, + start_date: ~D[1970-01-01], + end_date: ~D[1970-01-01], + status: SiteImport.pending() + } + ] = Plausible.Imported.list_all_imports(site) + + assert %{imported_data: nil} = Repo.reload!(site) + assert CSVImporter.parse_args(args) == [uploads: uploads] + end + end + + describe "import_data/2" do + setup [:create_user, :create_new_site] + + test "imports tables from S3", %{site: site, user: user, bucket: bucket, container: minio} do + csvs = [ + %{ + name: "imported_browsers.csv", + body: """ + "date","browser","visitors","visits","visit_duration","bounces" + "2021-12-30","Amazon Silk",2,2,0,2 + "2021-12-30","Chrome",31,32,329,29 + "2021-12-30","Edge",3,3,0,3 + "2021-12-30","Firefox",1,1,0,1 + "2021-12-30","Internet Explorer",1,1,0,1 + "2021-12-30","Mobile App",2,2,0,2 + "2021-12-30","Mobile App",4,4,0,4 + "2021-12-30","Mobile App",1,1,0,1 + "2021-12-30","Safari",32,36,0,36 + "2021-12-30","Samsung Internet",2,2,0,2 + "2021-12-30","UC Browser",1,1,0,1 + "2021-12-31","Chrome",24,25,75,23 + "2021-12-31","Edge",3,3,0,3 + "2021-12-31","Firefox",1,1,466,0 + "2021-12-31","Mobile App",4,5,0,5 + "2021-12-31","Mobile App",4,4,0,4 + "2021-12-31","Mobile App",1,1,85,0 + "2021-12-31","Safari",37,45,1957,42 + "2021-12-31","Samsung Internet",1,1,199,0 + """ + }, + %{ + name: "imported_devices.csv", + body: """ + "date","device","visitors","visits","visit_duration","bounces" + "2021-12-30","Desktop",25,28,75,27 + "2021-12-30","Mobile",49,51,254,49 + "2021-12-30","Tablet",6,6,0,6 + "2021-12-31","Desktop",20,26,496,24 + "2021-12-31","Mobile",50,54,1842,49 + "2021-12-31","Tablet",5,5,444,4 + "2022-01-01","Desktop",33,34,1117,32 + "2022-01-01","Mobile",55,60,306,54 + "2022-01-01","Tablet",8,8,419,7 + "2022-01-02","Desktop",28,28,86,26 + "2022-01-02","Mobile",66,73,2450,65 + "2022-01-02","Tablet",9,9,0,9 + """ + }, + %{ + name: "imported_entry_pages.csv", + body: """ + "date","visitors","entrances","visit_duration","bounces","entry_page" + "2021-12-30",6,6,0,6,"/14776416252794997127" + "2021-12-30",1,1,0,1,"/15455127321321119046" + "2021-12-30",1,1,43,0,"/10399835914295020763" + "2021-12-30",1,1,0,1,"/9102354072466236765" + "2021-12-30",1,1,0,1,"/1586391735863371077" + "2021-12-30",1,1,0,1,"/3457026921000639206" + "2021-12-30",2,3,0,3,"/6077502147861556415" + "2021-12-30",1,1,0,1,"/14280570555317344651" + "2021-12-30",3,3,0,3,"/5284268072698982201" + "2021-12-30",1,1,0,1,"/7478911940502018071" + "2021-12-30",1,1,0,1,"/6402607186523575652" + "2021-12-30",2,2,0,2,"/9962503789684934900" + "2021-12-30",8,10,0,10,"/13595620304963848161" + "2021-12-30",2,2,0,2,"/17019199732013993436" + "2021-12-30",31,31,211,30,"/9874837495456455794" + "2021-12-31",4,4,0,4,"/14776416252794997127" + "2021-12-31",1,1,0,1,"/8738789417178304429" + "2021-12-31",1,1,0,1,"/7445073500314667742" + "2021-12-31",1,1,0,1,"/4897404798407749335" + "2021-12-31",1,1,45,0,"/11263893625781431659" + "2021-12-31",1,1,0,1,"/16478773157730928089" + "2021-12-31",1,1,0,1,"/1710995203264225236" + "2021-12-31",1,1,0,1,"/14280570555317344651" + "2021-12-31",4,5,444,4,"/5284268072698982201" + "2021-12-31",2,2,466,1,"/7478911940502018071" + "2021-12-31",9,16,1455,15,"/13595620304963848161" + "2021-12-31",25,25,88,23,"/9874837495456455794" + """ + }, + %{ + name: "imported_exit_pages.csv", + body: """ + "date","visitors","exits","exit_page" + "2021-12-30",6,6,"/14776416252794997127" + "2021-12-30",1,1,"/15455127321321119046" + "2021-12-30",1,1,"/9102354072466236765" + "2021-12-30",1,1,"/4457889102355683190" + "2021-12-30",1,1,"/12105301321223776356" + "2021-12-30",1,2,"/1526239929864936398" + "2021-12-30",1,1,"/7478911940502018071" + "2021-12-30",1,1,"/6402607186523575652" + "2021-12-30",2,2,"/9962503789684934900" + "2021-12-30",8,10,"/13595620304963848161" + "2021-12-30",2,2,"/17019199732013993436" + "2021-12-30",32,32,"/9874837495456455794" + "2021-12-31",4,4,"/14776416252794997127" + "2021-12-31",1,1,"/8738789417178304429" + "2021-12-31",1,1,"/7445073500314667742" + "2021-12-31",1,1,"/4897404798407749335" + "2021-12-31",1,1,"/11263893625781431659" + "2021-12-31",1,1,"/16478773157730928089" + "2021-12-31",1,1,"/1710995203264225236" + """ + }, + %{ + name: "imported_locations.csv", + body: """ + "date","country","region","city","visitors","visits","visit_duration","bounces" + "2021-12-30","AU","",0,1,1,43,0 + "2021-12-30","AU","",2078025,3,4,211,3 + "2021-12-30","AU","",2147714,2,2,0,2 + "2021-12-30","AU","",2158177,2,2,0,2 + "2021-12-30","AU","",2174003,1,1,0,1 + "2021-12-30","BE","",0,1,1,0,1 + "2021-12-30","BE","",2792196,1,1,0,1 + "2021-12-30","BR","",0,1,1,0,1 + "2021-12-30","CA","",0,1,1,0,1 + "2021-12-30","PL","",0,1,1,0,1 + "2021-12-30","PL","",756135,1,1,0,1 + "2021-12-30","US","",0,1,1,0,1 + "2021-12-30","US","",0,1,1,0,1 + "2021-12-30","US","",0,1,1,0,1 + "2021-12-30","US","",0,1,1,0,1 + "2021-12-30","US","",4063926,1,1,0,1 + "2021-12-30","US","",4074013,1,3,0,3 + "2021-12-30","US","",5089478,1,1,0,1 + "2021-12-31","AU","",2147714,3,3,0,3 + "2021-12-31","AU","",2158177,2,2,0,2 + "2021-12-31","CA","",0,1,1,0,1 + "2021-12-31","IT","",3176959,1,1,85,0 + "2021-12-31","KR","",1835848,1,1,0,1 + "2021-12-31","LV","",456172,1,1,0,1 + "2021-12-31","MX","",3530757,2,3,0,3 + "2021-12-31","NL","",0,1,1,0,1 + "2021-12-31","NL","",0,1,2,0,2 + "2021-12-31","NL","",2745321,1,1,0,1 + "2021-12-31","NO","",0,1,1,199,0 + "2021-12-31","SE","",0,1,1,0,1 + "2021-12-31","SG","",1880252,1,1,0,1 + """ + }, + %{ + name: "imported_operating_systems.csv", + body: """ + "date","operating_system","visitors","visits","visit_duration","bounces" + "2021-12-30","Android",25,26,254,24 + "2021-12-30","Mac",13,16,0,16 + "2021-12-30","Windows",12,12,75,11 + "2021-12-30","iOS",30,31,0,31 + "2021-12-31","Android",15,16,329,13 + "2021-12-31","Mac",13,19,0,19 + "2021-12-31","Windows",7,7,496,5 + "2021-12-31","iOS",40,43,1957,40 + "2022-01-01","",17,18,0,18 + "2022-01-01","Android",25,28,32,26 + "2022-01-01","Chrome OS",1,1,0,1 + "2022-01-01","Mac",6,6,0,6 + "2022-01-01","Windows",9,9,1117,7 + "2022-01-01","iOS",38,40,693,35 + """ + }, + %{ + name: "imported_pages.csv", + body: """ + "date","visitors","pageviews","exits","time_on_page","hostname","page" + "2021-12-30",1,1,0,43,"lucky.numbers.com","/14776416252794997127" + "2021-12-30",1,1,1,0,"lucky.numbers.com","/14776416252794997127" + "2021-12-30",6,6,6,0,"lucky.numbers.com","/14776416252794997127" + "2021-12-30",1,1,1,0,"lucky.numbers.com","/9102354072466236765" + "2021-12-30",1,1,1,0,"lucky.numbers.com","/7478911940502018071" + "2021-12-30",1,1,1,0,"lucky.numbers.com","/6402607186523575652" + "2021-12-30",2,2,2,0,"lucky.numbers.com","/9962503789684934900" + "2021-12-30",8,10,10,0,"lucky.numbers.com","/13595620304963848161" + "2021-12-30",2,2,2,0,"lucky.numbers.com","/17019199732013993436" + "2021-12-30",32,33,32,211,"lucky.numbers.com","/9874837495456455794" + "2021-12-31",4,4,4,0,"lucky.numbers.com","/14776416252794997127" + "2021-12-31",1,1,1,0,"lucky.numbers.com","/8738789417178304429" + "2021-12-31",1,1,1,0,"lucky.numbers.com","/7445073500314667742" + "2021-12-31",1,1,1,0,"lucky.numbers.com","/4897404798407749335" + "2021-12-31",1,2,1,29,"lucky.numbers.com","/11263893625781431659" + "2022-01-01",2,2,2,0,"lucky.numbers.com","/5878724061840196349" + """ + }, + %{ + name: "imported_sources.csv", + body: """ + "date","source","utm_medium","utm_campaign","utm_content","utm_term","visitors","visits","visit_duration","bounces" + "2021-12-30","","","","","",25,26,254,24 + "2021-12-30","Hacker News","referral","","","",2,2,0,2 + "2021-12-30","Google","organic","","","",20,22,75,21 + "2021-12-30","Pinterest","referral","","","",25,26,0,26 + "2021-12-30","baidu","organic","","","",1,1,0,1 + "2021-12-30","yahoo","organic","","","",3,3,0,3 + "2021-12-31","","","","","",16,16,199,15 + "2021-12-31","Bing","organic","","","",1,1,0,1 + "2021-12-31","DuckDuckGo","organic","","","",1,1,0,1 + "2021-12-31","Hacker News","referral","","","",1,1,466,0 + "2021-12-31","Google","organic","","","",25,32,85,31 + "2021-12-31","Pinterest","referral","","","",22,24,88,22 + "2021-12-31","yahoo","organic","","","",3,3,1899,1 + "2022-01-01","","","","","",37,38,1137,35 + "2022-01-01","Bing","organic","","","",2,2,171,1 + "2022-01-01","DuckDuckGo","organic","","","",2,3,0,3 + "2022-01-01","Hacker News","referral","","","",1,1,0,1 + "2022-01-01","Google","referral","","","",1,1,0,1 + "2022-01-01","Google","organic","","","",21,23,115,19 + "2022-01-01","Pinterest","referral","","","",29,30,0,30 + "2022-01-01","yahoo","organic","","","",3,3,419,2 + "2022-01-06","","","","","",37,38,430,36 + "2022-01-06","Bing","organic","","","how lucky am I as UInt64",1,1,0,1 + "2022-01-06","Bing","organic","","","",3,3,10,2 + """ + }, + %{ + name: "imported_visitors.csv", + body: """ + "date","visitors","pageviews","bounces","visits","visit_duration" + "2011-12-25",5,50,2,7,8640 + "2011-12-26",3,4,2,3,43 + "2011-12-27",3,6,2,4,2313 + "2011-12-28",6,30,4,8,2264 + "2011-12-29",4,8,5,6,136 + "2011-12-30",1,1,1,1,0 + """ + } + ] + + connection_opts = MinioContainer.connection_opts(minio) + + on_exit(fn -> + keys = Enum.map(csvs, fn csv -> "#{site.id}/#{csv.name}" end) + ExAws.request!(ExAws.S3.delete_all_objects(bucket, keys), connection_opts) + end) + + uploads = + for %{name: name, body: body} <- csvs do + key = "#{site.id}/#{name}" + ExAws.request!(ExAws.S3.put_object(bucket, key, body), connection_opts) + %{"filename" => name, "s3_url" => s3_url(minio, bucket, key)} + end + + {:ok, job} = + CSVImporter.new_import( + site, + user, + # to satisfy the non null constraints on the table I'm providing "0" dates (according to ClickHouse) + start_date: ~D[1970-01-01], + end_date: ~D[1970-01-01], + uploads: uploads + ) + + job = Repo.reload!(job) + + assert :ok = Plausible.Workers.ImportAnalytics.perform(job) + + # on successfull import the start and end dates are updated + assert %SiteImport{ + start_date: ~D[2011-12-25], + end_date: ~D[2022-01-06], + source: :csv, + status: :completed + } = Repo.get_by!(SiteImport, site_id: site.id) + + assert Plausible.Stats.Clickhouse.imported_pageview_count(site) == 99 + end + + test "fails on invalid CSV", %{site: site, user: user, bucket: bucket, container: minio} do + csvs = [ + %{ + name: "imported_browsers.csv", + body: """ + "date","browser","visitors","visits","visit_duration","bounces" + "2021-12-30","Amazon Silk",2,2,0,2 + "2021-12-30","Chrome",31,32,329,29 + "2021-12-30","Edge",3,3,0,3 + "2021-12-30","Firefox",1,1,0,1 + "2021-12-30","Internet Explorer",1,1,0,1 + "2021-12-30","Mobile App",2,2,0,2 + "2021-12-31","Mobile App",4,4,0,4 + """ + }, + %{ + name: "imported_devices.csv", + body: """ + "date","device","visitors","visit_duration","bounces" + "2021-12-30","Desktop",28,ehhhh.... + """ + } + ] + + connection_opts = MinioContainer.connection_opts(minio) + + on_exit(fn -> + keys = Enum.map(csvs, fn csv -> "#{site.id}/#{csv.name}" end) + ExAws.request!(ExAws.S3.delete_all_objects(bucket, keys), connection_opts) + end) + + uploads = + for %{name: name, body: body} <- csvs do + key = "#{site.id}/#{name}" + ExAws.request!(ExAws.S3.put_object(bucket, key, body), connection_opts) + %{"filename" => name, "s3_url" => s3_url(minio, bucket, key)} + end + + {:ok, job} = + CSVImporter.new_import( + site, + user, + start_date: ~D[1970-01-01], + end_date: ~D[1970-01-01], + uploads: uploads + ) + + job = Repo.reload!(job) + + assert {:discard, message} = Plausible.Workers.ImportAnalytics.perform(job) + assert message =~ "CANNOT_PARSE_INPUT_ASSERTION_FAILED" + + assert %SiteImport{id: import_id, source: :csv, status: :failed} = + Repo.get_by!(SiteImport, site_id: site.id) + + # ensure no browser left behind + imported_browsers_q = from b in "imported_browsers", where: b.import_id == ^import_id + assert await_clickhouse_count(imported_browsers_q, 0) + end + end + + defp s3_url(minio, bucket, key) do + port = minio |> MinioContainer.connection_opts() |> Keyword.fetch!(:port) + Path.join(["http://172.17.0.1:#{port}", bucket, key]) + end +end diff --git a/test/test_helper.exs b/test/test_helper.exs index 30f0ebbd59..8d436f5a91 100644 --- a/test/test_helper.exs +++ b/test/test_helper.exs @@ -6,8 +6,8 @@ Ecto.Adapters.SQL.Sandbox.mode(Plausible.Repo, :manual) if Mix.env() == :small_test do IO.puts("Test mode: SMALL") - ExUnit.configure(exclude: [:slow, :full_build_only]) + ExUnit.configure(exclude: [:slow, :minio, :full_build_only]) else IO.puts("Test mode: FULL") - ExUnit.configure(exclude: [:slow, :small_build_only]) + ExUnit.configure(exclude: [:slow, :minio, :small_build_only]) end