Add csv importer (#3795)
* add csv importer * make table validation explicit * update some docs * improve docs * add minio container to ci * more tests * eh * continue * add passing test * add failing test * add config test * add minio to Makefile * testcontainers * remove extra whitespace * explain the implementation a bit * account for async deletes in tests * bounces is UInt32 Co-authored-by: Adrian Gruntkowski <adrian.gruntkowski@gmail.com> --------- Co-authored-by: Adrian Gruntkowski <adrian.gruntkowski@gmail.com>
This commit is contained in:
parent
31cf3e54f8
commit
f3423aefec
|
|
@ -74,7 +74,7 @@ jobs:
|
|||
- name: Check Credo Warnings
|
||||
run: mix credo diff --from-git-merge-base origin/master
|
||||
- name: Run tests
|
||||
run: mix test --include slow --max-failures 1 --warnings-as-errors
|
||||
run: mix test --include slow --include minio --max-failures 1 --warnings-as-errors
|
||||
- name: Run tests (small build)
|
||||
run: MIX_ENV=small_test mix test --include slow --max-failures 1 --warnings-as-errors
|
||||
- name: Check Dialyzer
|
||||
|
|
|
|||
6
Makefile
6
Makefile
|
|
@ -36,3 +36,9 @@ postgres-prod: ## Start a container with the same version of postgres as the one
|
|||
|
||||
postgres-stop: ## Stop and remove the postgres container
|
||||
docker stop plausible_db && docker rm plausible_db
|
||||
|
||||
minio: ## Start a transient container with a recent version of minio (s3)
|
||||
docker run -d --rm -p 6000:6000 -p 6001:6001 --name plausible_minio minio/minio server /data --address ":6000" --console-address ":6001"
|
||||
|
||||
minio-stop: ## Stop and remove the minio container
|
||||
docker stop plausible_minio
|
||||
|
|
|
|||
|
|
@ -20,3 +20,9 @@ GOOGLE_CLIENT_SECRET=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
|
|||
|
||||
PROMEX_DISABLED=false
|
||||
SITE_DEFAULT_INGEST_THRESHOLD=1000000
|
||||
|
||||
S3_DISABLED=false
|
||||
S3_ACCESS_KEY_ID=minioadmin
|
||||
S3_SECRET_ACCESS_KEY=minioadmin
|
||||
S3_REGION=us-east-1
|
||||
S3_ENDPOINT=http://localhost:6000
|
||||
|
|
|
|||
|
|
@ -15,3 +15,9 @@ IP_GEOLOCATION_DB=test/priv/GeoLite2-City-Test.mmdb
|
|||
SITE_DEFAULT_INGEST_THRESHOLD=1000000
|
||||
GOOGLE_CLIENT_ID=fake_client_id
|
||||
GOOGLE_CLIENT_SECRET=fake_client_secret
|
||||
|
||||
S3_DISABLED=false
|
||||
S3_ACCESS_KEY_ID=minioadmin
|
||||
S3_SECRET_ACCESS_KEY=minioadmin
|
||||
S3_REGION=us-east-1
|
||||
S3_ENDPOINT=http://localhost:6000
|
||||
|
|
|
|||
|
|
@ -718,3 +718,61 @@ if not is_selfhost do
|
|||
|
||||
config :plausible, Plausible.Site, default_ingest_threshold: site_default_ingest_threshold
|
||||
end
|
||||
|
||||
s3_disabled? =
|
||||
config_dir
|
||||
|> get_var_from_path_or_env("S3_DISABLED", "true")
|
||||
|> String.to_existing_atom()
|
||||
|
||||
unless s3_disabled? do
|
||||
s3_env = [
|
||||
%{
|
||||
name: "S3_ACCESS_KEY_ID",
|
||||
example: "AKIAIOSFODNN7EXAMPLE"
|
||||
},
|
||||
%{
|
||||
name: "S3_SECRET_ACCESS_KEY",
|
||||
example: "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
|
||||
},
|
||||
%{
|
||||
name: "S3_REGION",
|
||||
example: "us-east-1"
|
||||
},
|
||||
%{
|
||||
name: "S3_ENDPOINT",
|
||||
example: "https://<ACCOUNT_ID>.r2.cloudflarestorage.com"
|
||||
}
|
||||
]
|
||||
|
||||
s3_env =
|
||||
Enum.map(s3_env, fn var ->
|
||||
Map.put(var, :value, get_var_from_path_or_env(config_dir, var.name))
|
||||
end)
|
||||
|
||||
s3_missing_env = Enum.filter(s3_env, &is_nil(&1.value))
|
||||
|
||||
unless s3_missing_env == [] do
|
||||
raise ArgumentError, """
|
||||
Missing S3 configuration. Please set #{s3_missing_env |> Enum.map(& &1.name) |> Enum.join(", ")} environment variable(s):
|
||||
|
||||
#{s3_missing_env |> Enum.map(fn %{name: name, example: example} -> "\t#{name}=#{example}" end) |> Enum.join("\n")}
|
||||
"""
|
||||
end
|
||||
|
||||
s3_env_value = fn name ->
|
||||
s3_env |> Enum.find(&(&1.name == name)) |> Map.fetch!(:value)
|
||||
end
|
||||
|
||||
config :ex_aws,
|
||||
http_client: Plausible.S3.Client,
|
||||
access_key_id: s3_env_value.("S3_ACCESS_KEY_ID"),
|
||||
secret_access_key: s3_env_value.("S3_SECRET_ACCESS_KEY"),
|
||||
region: s3_env_value.("S3_REGION")
|
||||
|
||||
%URI{scheme: s3_scheme, host: s3_host, port: s3_port} = URI.parse(s3_env_value.("S3_ENDPOINT"))
|
||||
|
||||
config :ex_aws, :s3,
|
||||
scheme: s3_scheme <> "://",
|
||||
host: s3_host,
|
||||
port: s3_port
|
||||
end
|
||||
|
|
|
|||
|
|
@ -7,8 +7,7 @@ defmodule Plausible.Imported do
|
|||
* `Plausible.Imported.UniversalAnalytics` - existing mechanism, for legacy Google
|
||||
analytics formerly known as "Google Analytics"
|
||||
* `Plausible.Imported.NoopImporter` - importer stub, used mainly for testing purposes
|
||||
* `Plausible.Imported.CSVImporter` - a placeholder stub for CSV importer that will
|
||||
be added soon
|
||||
* `Plausible.Imported.CSVImporter` - CSV importer from S3
|
||||
|
||||
For more information on implementing importers, see `Plausible.Imported.Importer`.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
defmodule Plausible.Imported.CSVImporter do
|
||||
@moduledoc """
|
||||
CSV importer stub.
|
||||
CSV importer from S3 that uses ClickHouse [s3 table function.](https://clickhouse.com/docs/en/sql-reference/table-functions/s3)
|
||||
"""
|
||||
|
||||
use Plausible.Imported.Importer
|
||||
|
|
@ -16,10 +16,99 @@ defmodule Plausible.Imported.CSVImporter do
|
|||
def email_template(), do: "google_analytics_import.html"
|
||||
|
||||
@impl true
|
||||
def parse_args(%{"s3_path" => s3_path}), do: [s3_path: s3_path]
|
||||
def parse_args(%{"uploads" => uploads}), do: [uploads: uploads]
|
||||
|
||||
@impl true
|
||||
def import_data(_site_import, _opts) do
|
||||
:ok
|
||||
def import_data(site_import, opts) do
|
||||
%{id: import_id, site_id: site_id} = site_import
|
||||
uploads = Keyword.fetch!(opts, :uploads)
|
||||
|
||||
%{access_key_id: s3_access_key_id, secret_access_key: s3_secret_access_key} =
|
||||
Plausible.S3.import_clickhouse_credentials()
|
||||
|
||||
{:ok, ch} =
|
||||
Plausible.IngestRepo.config()
|
||||
|> Keyword.replace!(:pool_size, 1)
|
||||
|> Ch.start_link()
|
||||
|
||||
ranges =
|
||||
Enum.map(uploads, fn upload ->
|
||||
%{"filename" => filename, "s3_url" => s3_url} = upload
|
||||
|
||||
".csv" = Path.extname(filename)
|
||||
table = Path.rootname(filename)
|
||||
ensure_importable_table!(table)
|
||||
|
||||
s3_structure = input_structure!(table)
|
||||
|
||||
statement =
|
||||
"""
|
||||
INSERT INTO {table:Identifier} \
|
||||
SELECT {site_id:UInt64} AS site_id, *, {import_id:UInt64} AS import_id \
|
||||
FROM s3({s3_url:String},{s3_access_key_id:String},{s3_secret_access_key:String},{s3_format:String},{s3_structure:String})\
|
||||
"""
|
||||
|
||||
params =
|
||||
%{
|
||||
"table" => table,
|
||||
"site_id" => site_id,
|
||||
"import_id" => import_id,
|
||||
"s3_url" => s3_url,
|
||||
"s3_access_key_id" => s3_access_key_id,
|
||||
"s3_secret_access_key" => s3_secret_access_key,
|
||||
"s3_format" => "CSVWithNames",
|
||||
"s3_structure" => s3_structure
|
||||
}
|
||||
|
||||
Ch.query!(ch, statement, params, timeout: :infinity)
|
||||
|
||||
%Ch.Result{rows: [[min_date, max_date]]} =
|
||||
Ch.query!(
|
||||
ch,
|
||||
"SELECT min(date), max(date) FROM {table:Identifier} WHERE site_id = {site_id:UInt64} AND import_id = {import_id:UInt64}",
|
||||
%{"table" => table, "site_id" => site_id, "import_id" => import_id}
|
||||
)
|
||||
|
||||
Date.range(min_date, max_date)
|
||||
end)
|
||||
|
||||
{:ok,
|
||||
%{
|
||||
start_date: Enum.min_by(ranges, & &1.first, Date).first,
|
||||
end_date: Enum.max_by(ranges, & &1.last, Date).last
|
||||
}}
|
||||
rescue
|
||||
# we are cancelling on any argument or ClickHouse errors
|
||||
e in [ArgumentError, Ch.Error] ->
|
||||
{:error, Exception.message(e)}
|
||||
end
|
||||
|
||||
input_structures = %{
|
||||
"imported_browsers" =>
|
||||
"date Date, browser String, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32",
|
||||
"imported_devices" =>
|
||||
"date Date, device String, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32",
|
||||
"imported_entry_pages" =>
|
||||
"date Date, entry_page String, visitors UInt64, entrances UInt64, visit_duration UInt64, bounces UInt32",
|
||||
"imported_exit_pages" => "date Date, exit_page String, visitors UInt64, exits UInt64",
|
||||
"imported_locations" =>
|
||||
"date Date, country String, region String, city UInt64, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32",
|
||||
"imported_operating_systems" =>
|
||||
"date Date, operating_system String, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32",
|
||||
"imported_pages" =>
|
||||
"date Date, hostname String, page String, visitors UInt64, pageviews UInt64, exits UInt64, time_on_page UInt64",
|
||||
"imported_sources" =>
|
||||
"date Date, source String, utm_medium String, utm_campaign String, utm_content String, utm_term String, visitors UInt64, visits UInt64, visit_duration UInt64, bounces UInt32",
|
||||
"imported_visitors" =>
|
||||
"date Date, visitors UInt64, pageviews UInt64, bounces UInt64, visits UInt64, visit_duration UInt64"
|
||||
}
|
||||
|
||||
for {table, input_structure} <- input_structures do
|
||||
defp input_structure!(unquote(table)), do: unquote(input_structure)
|
||||
defp ensure_importable_table!(unquote(table)), do: :ok
|
||||
end
|
||||
|
||||
defp ensure_importable_table!(table) do
|
||||
raise ArgumentError, "table #{table} is not supported for data import"
|
||||
end
|
||||
end
|
||||
|
|
|
|||
|
|
@ -0,0 +1,15 @@
|
|||
defmodule Plausible.S3 do
|
||||
@moduledoc """
|
||||
Helper functions for S3 exports/imports.
|
||||
"""
|
||||
|
||||
@doc """
|
||||
Returns `access_key_id` and `secret_access_key` to be used by ClickHouse during imports from S3.
|
||||
"""
|
||||
@spec import_clickhouse_credentials ::
|
||||
%{access_key_id: String.t(), secret_access_key: String.t()}
|
||||
def import_clickhouse_credentials do
|
||||
%{access_key_id: access_key_id, secret_access_key: secret_access_key} = ExAws.Config.new(:s3)
|
||||
%{access_key_id: access_key_id, secret_access_key: secret_access_key}
|
||||
end
|
||||
end
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
defmodule Plausible.S3.Client do
|
||||
@moduledoc false
|
||||
@behaviour ExAws.Request.HttpClient
|
||||
|
||||
@impl true
|
||||
def request(method, url, body, headers, opts) do
|
||||
req = Finch.build(method, url, headers, body)
|
||||
|
||||
case Finch.request(req, Plausible.Finch, opts) do
|
||||
{:ok, %Finch.Response{status: status, headers: headers, body: body}} ->
|
||||
{:ok, %{status_code: status, headers: headers, body: body}}
|
||||
|
||||
{:error, reason} ->
|
||||
{:error, %{reason: reason}}
|
||||
end
|
||||
end
|
||||
end
|
||||
6
mix.exs
6
mix.exs
|
|
@ -136,7 +136,11 @@ defmodule Plausible.MixProject do
|
|||
{:esbuild, "~> 0.7", runtime: Mix.env() in [:dev, :small_dev]},
|
||||
{:tailwind, "~> 0.2.0", runtime: Mix.env() in [:dev, :small_dev]},
|
||||
{:ex_json_logger, "~> 1.4.0"},
|
||||
{:ecto_network, "~> 1.5.0"}
|
||||
{:ecto_network, "~> 1.5.0"},
|
||||
{:ex_aws, "~> 2.5"},
|
||||
{:ex_aws_s3, "~> 2.5"},
|
||||
{:sweet_xml, "~> 0.7.4"},
|
||||
{:testcontainers, "~> 1.6", only: [:test, :small_test]}
|
||||
]
|
||||
end
|
||||
|
||||
|
|
|
|||
7
mix.lock
7
mix.lock
|
|
@ -42,10 +42,13 @@
|
|||
"erlex": {:hex, :erlex, "0.2.6", "c7987d15e899c7a2f34f5420d2a2ea0d659682c06ac607572df55a43753aa12e", [:mix], [], "hexpm", "2ed2e25711feb44d52b17d2780eabf998452f6efda104877a3881c2f8c0c0c75"},
|
||||
"esbuild": {:hex, :esbuild, "0.8.1", "0cbf919f0eccb136d2eeef0df49c4acf55336de864e63594adcea3814f3edf41", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "25fc876a67c13cb0a776e7b5d7974851556baeda2085296c14ab48555ea7560f"},
|
||||
"eternal": {:hex, :eternal, "1.2.2", "d1641c86368de99375b98d183042dd6c2b234262b8d08dfd72b9eeaafc2a1abd", [:mix], [], "hexpm", "2c9fe32b9c3726703ba5e1d43a1d255a4f3f2d8f8f9bc19f094c7cb1a7a9e782"},
|
||||
"ex_aws": {:hex, :ex_aws, "2.5.1", "7418917974ea42e9e84b25e88b9f3d21a861d5f953ad453e212f48e593d8d39f", [:mix], [{:configparser_ex, "~> 4.0", [hex: :configparser_ex, repo: "hexpm", optional: true]}, {:hackney, "~> 1.16", [hex: :hackney, repo: "hexpm", optional: true]}, {:jason, "~> 1.1", [hex: :jason, repo: "hexpm", optional: true]}, {:jsx, "~> 2.8 or ~> 3.0", [hex: :jsx, repo: "hexpm", optional: true]}, {:mime, "~> 1.2 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:sweet_xml, "~> 0.7", [hex: :sweet_xml, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "1b95431f70c446fa1871f0eb9b183043c5a625f75f9948a42d25f43ae2eff12b"},
|
||||
"ex_aws_s3": {:hex, :ex_aws_s3, "2.5.3", "422468e5c3e1a4da5298e66c3468b465cfd354b842e512cb1f6fbbe4e2f5bdaf", [:mix], [{:ex_aws, "~> 2.0", [hex: :ex_aws, repo: "hexpm", optional: false]}, {:sweet_xml, ">= 0.0.0", [hex: :sweet_xml, repo: "hexpm", optional: true]}], "hexpm", "4f09dd372cc386550e484808c5ac5027766c8d0cd8271ccc578b82ee6ef4f3b8"},
|
||||
"ex_cldr": {:hex, :ex_cldr, "2.37.5", "9da6d97334035b961d2c2de167dc6af8cd3e09859301a5b8f49f90bd8b034593", [:mix], [{:cldr_utils, "~> 2.21", [hex: :cldr_utils, repo: "hexpm", optional: false]}, {:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:gettext, "~> 0.19", [hex: :gettext, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:nimble_parsec, "~> 0.5 or ~> 1.0", [hex: :nimble_parsec, repo: "hexpm", optional: true]}], "hexpm", "74ad5ddff791112ce4156382e171a5f5d3766af9d5c4675e0571f081fe136479"},
|
||||
"ex_cldr_currencies": {:hex, :ex_cldr_currencies, "2.15.1", "e92ba17c41e7405b7784e0e65f406b5f17cfe313e0e70de9befd653e12854822", [:mix], [{:ex_cldr, "~> 2.34", [hex: :ex_cldr, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "31df8bd37688340f8819bdd770eb17d659652078d34db632b85d4a32864d6a25"},
|
||||
"ex_cldr_numbers": {:hex, :ex_cldr_numbers, "2.32.3", "b631ff94c982ec518e46bf4736000a30a33d6b58facc085d5f240305f512ad4a", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:digital_token, "~> 0.3 or ~> 1.0", [hex: :digital_token, repo: "hexpm", optional: false]}, {:ex_cldr, "~> 2.37", [hex: :ex_cldr, repo: "hexpm", optional: false]}, {:ex_cldr_currencies, ">= 2.14.2", [hex: :ex_cldr_currencies, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}], "hexpm", "7b626ff1e59a0ec9c3c5db5ce9ca91a6995e2ab56426b71f3cbf67181ea225f5"},
|
||||
"ex_doc": {:hex, :ex_doc, "0.31.1", "8a2355ac42b1cc7b2379da9e40243f2670143721dd50748bf6c3b1184dae2089", [:mix], [{:earmark_parser, "~> 1.4.39", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_c, ">= 0.1.1", [hex: :makeup_c, repo: "hexpm", optional: true]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "3178c3a407c557d8343479e1ff117a96fd31bafe52a039079593fb0524ef61b0"},
|
||||
"ex_docker_engine_api": {:hex, :ex_docker_engine_api, "1.43.1", "1161e34b6bea5cef84d8fdc1d5d510fcb0c463941ce84c36f4a0f44a9096eb96", [:mix], [{:hackney, "~> 1.20", [hex: :hackney, repo: "hexpm", optional: false]}, {:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}, {:tesla, "~> 1.7", [hex: :tesla, repo: "hexpm", optional: false]}], "hexpm", "ec8fc499389aeef56ddca67e89e9e98098cff50587b56e8b4613279f382793b1"},
|
||||
"ex_json_logger": {:hex, :ex_json_logger, "1.4.0", "ad1dcc1cfe6940ee1d9d489b20757c89769626ce34c4957548d6fbe155cd96f1", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}], "hexpm", "7548a1ecba290746e06214d2b3d8783c76760c779a8903a8e44bfd23a7340444"},
|
||||
"ex_machina": {:hex, :ex_machina, "2.7.0", "b792cc3127fd0680fecdb6299235b4727a4944a09ff0fa904cc639272cd92dc7", [:mix], [{:ecto, "~> 2.2 or ~> 3.0", [hex: :ecto, repo: "hexpm", optional: true]}, {:ecto_sql, "~> 3.0", [hex: :ecto_sql, repo: "hexpm", optional: true]}], "hexpm", "419aa7a39bde11894c87a615c4ecaa52d8f107bbdd81d810465186f783245bf8"},
|
||||
"ex_money": {:hex, :ex_money, "5.15.3", "ea070eb1eefd22258aa288921ba482f1fa5f870d229069dc3d12458b7b8bf66d", [:mix], [{:decimal, "~> 1.6 or ~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:ex_cldr_numbers, "~> 2.31", [hex: :ex_cldr_numbers, repo: "hexpm", optional: false]}, {:gringotts, "~> 1.1", [hex: :gringotts, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:nimble_parsec, "~> 0.5 or ~> 1.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 2.0 or ~> 3.0 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}, {:poison, "~> 3.0 or ~> 4.0 or ~> 5.0", [hex: :poison, repo: "hexpm", optional: true]}], "hexpm", "3671f1808c428b7c4688650d43dc1af0b64c0eea822429a28c55cef15fb4fdc1"},
|
||||
|
|
@ -134,18 +137,22 @@
|
|||
"siphash": {:hex, :siphash, "3.2.0", "ec03fd4066259218c85e2a4b8eec4bb9663bc02b127ea8a0836db376ba73f2ed", [:make, :mix], [], "hexpm", "ba3810701c6e95637a745e186e8a4899087c3b079ba88fb8f33df054c3b0b7c3"},
|
||||
"sleeplocks": {:hex, :sleeplocks, "1.1.2", "d45aa1c5513da48c888715e3381211c859af34bee9b8290490e10c90bb6ff0ca", [:rebar3], [], "hexpm", "9fe5d048c5b781d6305c1a3a0f40bb3dfc06f49bf40571f3d2d0c57eaa7f59a5"},
|
||||
"ssl_verify_fun": {:hex, :ssl_verify_fun, "1.1.7", "354c321cf377240c7b8716899e182ce4890c5938111a1296add3ec74cf1715df", [:make, :mix, :rebar3], [], "hexpm", "fe4c190e8f37401d30167c8c405eda19469f34577987c76dde613e838bbc67f8"},
|
||||
"sweet_xml": {:hex, :sweet_xml, "0.7.4", "a8b7e1ce7ecd775c7e8a65d501bc2cd933bff3a9c41ab763f5105688ef485d08", [:mix], [], "hexpm", "e7c4b0bdbf460c928234951def54fe87edf1a170f6896675443279e2dbeba167"},
|
||||
"tailwind": {:hex, :tailwind, "0.2.2", "9e27288b568ede1d88517e8c61259bc214a12d7eed271e102db4c93fcca9b2cd", [:mix], [{:castore, ">= 0.0.0", [hex: :castore, repo: "hexpm", optional: false]}], "hexpm", "ccfb5025179ea307f7f899d1bb3905cd0ac9f687ed77feebc8f67bdca78565c4"},
|
||||
"telemetry": {:hex, :telemetry, "1.2.1", "68fdfe8d8f05a8428483a97d7aab2f268aaff24b49e0f599faa091f1d4e7f61c", [:rebar3], [], "hexpm", "dad9ce9d8effc621708f99eac538ef1cbe05d6a874dd741de2e689c47feafed5"},
|
||||
"telemetry_metrics": {:hex, :telemetry_metrics, "0.6.1", "315d9163a1d4660aedc3fee73f33f1d355dcc76c5c3ab3d59e76e3edf80eef1f", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "7be9e0871c41732c233be71e4be11b96e56177bf15dde64a8ac9ce72ac9834c6"},
|
||||
"telemetry_metrics_prometheus_core": {:hex, :telemetry_metrics_prometheus_core, "1.1.0", "4e15f6d7dbedb3a4e3aed2262b7e1407f166fcb9c30ca3f96635dfbbef99965c", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:telemetry_metrics, "~> 0.6", [hex: :telemetry_metrics, repo: "hexpm", optional: false]}], "hexpm", "0dd10e7fe8070095df063798f82709b0a1224c31b8baf6278b423898d591a069"},
|
||||
"telemetry_poller": {:hex, :telemetry_poller, "1.0.0", "db91bb424e07f2bb6e73926fcafbfcbcb295f0193e0a00e825e589a0a47e8453", [:rebar3], [{:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "b3a24eafd66c3f42da30fc3ca7dda1e9d546c12250a2d60d7b81d264fbec4f6e"},
|
||||
"telemetry_registry": {:hex, :telemetry_registry, "0.3.1", "14a3319a7d9027bdbff7ebcacf1a438f5f5c903057b93aee484cca26f05bdcba", [:mix, :rebar3], [{:telemetry, "~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "6d0ca77b691cf854ed074b459a93b87f4c7f5512f8f7743c635ca83da81f939e"},
|
||||
"tesla": {:hex, :tesla, "1.8.0", "d511a4f5c5e42538d97eef7c40ec4f3e44effdc5068206f42ed859e09e51d1fd", [:mix], [{:castore, "~> 0.1 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:exjsx, ">= 3.0.0", [hex: :exjsx, repo: "hexpm", optional: true]}, {:finch, "~> 0.13", [hex: :finch, repo: "hexpm", optional: true]}, {:fuse, "~> 2.4", [hex: :fuse, repo: "hexpm", optional: true]}, {:gun, ">= 1.0.0", [hex: :gun, repo: "hexpm", optional: true]}, {:hackney, "~> 1.6", [hex: :hackney, repo: "hexpm", optional: true]}, {:ibrowse, "4.4.2", [hex: :ibrowse, repo: "hexpm", optional: true]}, {:jason, ">= 1.0.0", [hex: :jason, repo: "hexpm", optional: true]}, {:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.0", [hex: :mint, repo: "hexpm", optional: true]}, {:msgpax, "~> 2.3", [hex: :msgpax, repo: "hexpm", optional: true]}, {:poison, ">= 1.0.0", [hex: :poison, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: true]}], "hexpm", "10501f360cd926a309501287470372af1a6e1cbed0f43949203a4c13300bc79f"},
|
||||
"testcontainers": {:hex, :testcontainers, "1.6.0", "14b3251f01ce0b1ada716130d371ba0b6cb1ce2904aa38bd58e5ff4194f4d88f", [:mix], [{:ecto, "~> 3.3", [hex: :ecto, repo: "hexpm", optional: true]}, {:ecto_sql, "~> 3.3", [hex: :ecto_sql, repo: "hexpm", optional: true]}, {:ex_docker_engine_api, "~> 1.43.1", [hex: :ex_docker_engine_api, repo: "hexpm", optional: false]}, {:uuid, "~> 1.1", [hex: :uuid, repo: "hexpm", optional: false]}], "hexpm", "3f812407f232954999a3a2e05b2802e1d8d1afba120533c42b32c7cc91d35daf"},
|
||||
"timex": {:hex, :timex, "3.7.11", "bb95cb4eb1d06e27346325de506bcc6c30f9c6dea40d1ebe390b262fad1862d1", [:mix], [{:combine, "~> 0.10", [hex: :combine, repo: "hexpm", optional: false]}, {:gettext, "~> 0.20", [hex: :gettext, repo: "hexpm", optional: false]}, {:tzdata, "~> 1.1", [hex: :tzdata, repo: "hexpm", optional: false]}], "hexpm", "8b9024f7efbabaf9bd7aa04f65cf8dcd7c9818ca5737677c7b76acbc6a94d1aa"},
|
||||
"tls_certificate_check": {:hex, :tls_certificate_check, "1.21.0", "042ab2c0c860652bc5cf69c94e3a31f96676d14682e22ec7813bd173ceff1788", [:rebar3], [{:ssl_verify_fun, "~> 1.1", [hex: :ssl_verify_fun, repo: "hexpm", optional: false]}], "hexpm", "6cee6cffc35a390840d48d463541d50746a7b0e421acaadb833cfc7961e490e7"},
|
||||
"tzdata": {:hex, :tzdata, "1.1.1", "20c8043476dfda8504952d00adac41c6eda23912278add38edc140ae0c5bcc46", [:mix], [{:hackney, "~> 1.17", [hex: :hackney, repo: "hexpm", optional: false]}], "hexpm", "a69cec8352eafcd2e198dea28a34113b60fdc6cb57eb5ad65c10292a6ba89787"},
|
||||
"ua_inspector": {:hex, :ua_inspector, "3.8.0", "c0b0d13200a9bd509225f15ea8cf275c0bec27390a21c355746ff8b8a88c3e4d", [:mix], [{:hackney, "~> 1.0", [hex: :hackney, repo: "hexpm", optional: false]}, {:yamerl, "~> 0.7", [hex: :yamerl, repo: "hexpm", optional: false]}], "hexpm", "7c980bae82a4754075b933e0f383935a681e5a2628856ad3ecf6eb80d8139539"},
|
||||
"unicode_util_compat": {:hex, :unicode_util_compat, "0.7.0", "bc84380c9ab48177092f43ac89e4dfa2c6d62b40b8bd132b1059ecc7232f9a78", [:rebar3], [], "hexpm", "25eee6d67df61960cf6a794239566599b09e17e668d3700247bc498638152521"},
|
||||
"unsafe": {:hex, :unsafe, "1.0.2", "23c6be12f6c1605364801f4b47007c0c159497d0446ad378b5cf05f1855c0581", [:mix], [], "hexpm", "b485231683c3ab01a9cd44cb4a79f152c6f3bb87358439c6f68791b85c2df675"},
|
||||
"uuid": {:hex, :uuid, "1.1.8", "e22fc04499de0de3ed1116b770c7737779f226ceefa0badb3592e64d5cfb4eb9", [:mix], [], "hexpm", "c790593b4c3b601f5dc2378baae7efaf5b3d73c4c6456ba85759905be792f2ac"},
|
||||
"websock": {:hex, :websock, "0.5.3", "2f69a6ebe810328555b6fe5c831a851f485e303a7c8ce6c5f675abeb20ebdadc", [:mix], [], "hexpm", "6105453d7fac22c712ad66fab1d45abdf049868f253cf719b625151460b8b453"},
|
||||
"websock_adapter": {:hex, :websock_adapter, "0.5.5", "9dfeee8269b27e958a65b3e235b7e447769f66b5b5925385f5a569269164a210", [:mix], [{:bandit, ">= 0.6.0", [hex: :bandit, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.6", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "4b977ba4a01918acbf77045ff88de7f6972c2a009213c515a445c48f224ffce9"},
|
||||
"yamerl": {:hex, :yamerl, "0.10.0", "4ff81fee2f1f6a46f1700c0d880b24d193ddb74bd14ef42cb0bcf46e81ef2f8e", [:rebar3], [], "hexpm", "346adb2963f1051dc837a2364e4acf6eb7d80097c0f53cbdc3046ec8ec4b4e6e"},
|
||||
|
|
|
|||
|
|
@ -191,6 +191,79 @@ defmodule Plausible.ConfigTest do
|
|||
end
|
||||
end
|
||||
|
||||
describe "s3" do
|
||||
test "has required env vars" do
|
||||
env = [
|
||||
{"S3_ACCESS_KEY_ID", nil},
|
||||
{"S3_SECRET_ACCESS_KEY", nil},
|
||||
{"S3_REGION", nil},
|
||||
{"S3_ENDPOINT", nil}
|
||||
]
|
||||
|
||||
result =
|
||||
try do
|
||||
runtime_config(env)
|
||||
rescue
|
||||
e -> e
|
||||
end
|
||||
|
||||
assert %ArgumentError{} = result
|
||||
|
||||
assert Exception.message(result) == """
|
||||
Missing S3 configuration. Please set S3_ACCESS_KEY_ID, S3_SECRET_ACCESS_KEY, S3_REGION, S3_ENDPOINT environment variable(s):
|
||||
|
||||
\tS3_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE
|
||||
\tS3_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
|
||||
\tS3_REGION=us-east-1
|
||||
\tS3_ENDPOINT=https://<ACCOUNT_ID>.r2.cloudflarestorage.com
|
||||
"""
|
||||
end
|
||||
|
||||
test "renders only missing env vars" do
|
||||
env = [
|
||||
{"S3_ACCESS_KEY_ID", "AKIAIOSFODNN7EXAMPLE"},
|
||||
{"S3_SECRET_ACCESS_KEY", nil},
|
||||
{"S3_REGION", "eu-north-1"},
|
||||
{"S3_ENDPOINT", nil}
|
||||
]
|
||||
|
||||
result =
|
||||
try do
|
||||
runtime_config(env)
|
||||
rescue
|
||||
e -> e
|
||||
end
|
||||
|
||||
assert %ArgumentError{} = result
|
||||
|
||||
assert Exception.message(result) == """
|
||||
Missing S3 configuration. Please set S3_SECRET_ACCESS_KEY, S3_ENDPOINT environment variable(s):
|
||||
|
||||
\tS3_SECRET_ACCESS_KEY=wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY
|
||||
\tS3_ENDPOINT=https://<ACCOUNT_ID>.r2.cloudflarestorage.com
|
||||
"""
|
||||
end
|
||||
|
||||
test "works when everything is set" do
|
||||
env = [
|
||||
{"S3_ACCESS_KEY_ID", "minioadmin"},
|
||||
{"S3_SECRET_ACCESS_KEY", "minioadmin"},
|
||||
{"S3_REGION", "us-east-1"},
|
||||
{"S3_ENDPOINT", "http://localhost:6000"}
|
||||
]
|
||||
|
||||
config = runtime_config(env)
|
||||
|
||||
assert config[:ex_aws] == [
|
||||
http_client: Plausible.S3.Client,
|
||||
access_key_id: "minioadmin",
|
||||
secret_access_key: "minioadmin",
|
||||
region: "us-east-1",
|
||||
s3: [scheme: "http://", host: "localhost", port: 6000]
|
||||
]
|
||||
end
|
||||
end
|
||||
|
||||
defp runtime_config(env) do
|
||||
put_system_env_undo(env)
|
||||
Config.Reader.read!("config/runtime.exs", env: :prod)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,408 @@
|
|||
defmodule Plausible.Imported.CSVImporterTest do
|
||||
use Plausible.DataCase, async: true
|
||||
alias Plausible.Imported.{CSVImporter, SiteImport}
|
||||
alias Testcontainers.MinioContainer
|
||||
require SiteImport
|
||||
|
||||
@moduletag :minio
|
||||
|
||||
setup_all do
|
||||
Testcontainers.start_link()
|
||||
|
||||
{:ok, minio} = Testcontainers.start_container(MinioContainer.new())
|
||||
on_exit(fn -> :ok = Testcontainers.stop_container(minio.container_id) end)
|
||||
connection_opts = MinioContainer.connection_opts(minio)
|
||||
|
||||
bucket = "imports"
|
||||
ExAws.request!(ExAws.S3.put_bucket(bucket, "us-east-1"), connection_opts)
|
||||
on_exit(fn -> ExAws.request!(ExAws.S3.delete_bucket(bucket), connection_opts) end)
|
||||
|
||||
{:ok, container: minio, bucket: bucket}
|
||||
end
|
||||
|
||||
describe "new_import/3 and parse_args/1" do
|
||||
setup [:create_user, :create_new_site]
|
||||
|
||||
test "parses job args properly", %{user: user, site: site} do
|
||||
tables = [
|
||||
"imported_browsers",
|
||||
"imported_devices",
|
||||
"imported_entry_pages",
|
||||
"imported_exit_pages",
|
||||
"imported_locations",
|
||||
"imported_operating_systems",
|
||||
"imported_pages",
|
||||
"imported_sources",
|
||||
"imported_visitors"
|
||||
]
|
||||
|
||||
uploads =
|
||||
Enum.map(tables, fn table ->
|
||||
filename = "#{table}.csv"
|
||||
|
||||
%{
|
||||
"filename" => filename,
|
||||
"s3_url" => "https://bucket-name.s3.eu-north-1.amazonaws.com/#{site.id}/#{filename}"
|
||||
}
|
||||
end)
|
||||
|
||||
assert {:ok, job} =
|
||||
CSVImporter.new_import(site, user,
|
||||
# to satisfy the non null constraints on the table I'm providing "0" dates (according to ClickHouse)
|
||||
start_date: ~D[1970-01-01],
|
||||
end_date: ~D[1970-01-01],
|
||||
uploads: uploads
|
||||
)
|
||||
|
||||
assert %Oban.Job{args: %{"import_id" => import_id, "uploads" => ^uploads} = args} =
|
||||
Repo.reload!(job)
|
||||
|
||||
assert [
|
||||
%{
|
||||
id: ^import_id,
|
||||
source: :csv,
|
||||
start_date: ~D[1970-01-01],
|
||||
end_date: ~D[1970-01-01],
|
||||
status: SiteImport.pending()
|
||||
}
|
||||
] = Plausible.Imported.list_all_imports(site)
|
||||
|
||||
assert %{imported_data: nil} = Repo.reload!(site)
|
||||
assert CSVImporter.parse_args(args) == [uploads: uploads]
|
||||
end
|
||||
end
|
||||
|
||||
describe "import_data/2" do
|
||||
setup [:create_user, :create_new_site]
|
||||
|
||||
test "imports tables from S3", %{site: site, user: user, bucket: bucket, container: minio} do
|
||||
csvs = [
|
||||
%{
|
||||
name: "imported_browsers.csv",
|
||||
body: """
|
||||
"date","browser","visitors","visits","visit_duration","bounces"
|
||||
"2021-12-30","Amazon Silk",2,2,0,2
|
||||
"2021-12-30","Chrome",31,32,329,29
|
||||
"2021-12-30","Edge",3,3,0,3
|
||||
"2021-12-30","Firefox",1,1,0,1
|
||||
"2021-12-30","Internet Explorer",1,1,0,1
|
||||
"2021-12-30","Mobile App",2,2,0,2
|
||||
"2021-12-30","Mobile App",4,4,0,4
|
||||
"2021-12-30","Mobile App",1,1,0,1
|
||||
"2021-12-30","Safari",32,36,0,36
|
||||
"2021-12-30","Samsung Internet",2,2,0,2
|
||||
"2021-12-30","UC Browser",1,1,0,1
|
||||
"2021-12-31","Chrome",24,25,75,23
|
||||
"2021-12-31","Edge",3,3,0,3
|
||||
"2021-12-31","Firefox",1,1,466,0
|
||||
"2021-12-31","Mobile App",4,5,0,5
|
||||
"2021-12-31","Mobile App",4,4,0,4
|
||||
"2021-12-31","Mobile App",1,1,85,0
|
||||
"2021-12-31","Safari",37,45,1957,42
|
||||
"2021-12-31","Samsung Internet",1,1,199,0
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_devices.csv",
|
||||
body: """
|
||||
"date","device","visitors","visits","visit_duration","bounces"
|
||||
"2021-12-30","Desktop",25,28,75,27
|
||||
"2021-12-30","Mobile",49,51,254,49
|
||||
"2021-12-30","Tablet",6,6,0,6
|
||||
"2021-12-31","Desktop",20,26,496,24
|
||||
"2021-12-31","Mobile",50,54,1842,49
|
||||
"2021-12-31","Tablet",5,5,444,4
|
||||
"2022-01-01","Desktop",33,34,1117,32
|
||||
"2022-01-01","Mobile",55,60,306,54
|
||||
"2022-01-01","Tablet",8,8,419,7
|
||||
"2022-01-02","Desktop",28,28,86,26
|
||||
"2022-01-02","Mobile",66,73,2450,65
|
||||
"2022-01-02","Tablet",9,9,0,9
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_entry_pages.csv",
|
||||
body: """
|
||||
"date","visitors","entrances","visit_duration","bounces","entry_page"
|
||||
"2021-12-30",6,6,0,6,"/14776416252794997127"
|
||||
"2021-12-30",1,1,0,1,"/15455127321321119046"
|
||||
"2021-12-30",1,1,43,0,"/10399835914295020763"
|
||||
"2021-12-30",1,1,0,1,"/9102354072466236765"
|
||||
"2021-12-30",1,1,0,1,"/1586391735863371077"
|
||||
"2021-12-30",1,1,0,1,"/3457026921000639206"
|
||||
"2021-12-30",2,3,0,3,"/6077502147861556415"
|
||||
"2021-12-30",1,1,0,1,"/14280570555317344651"
|
||||
"2021-12-30",3,3,0,3,"/5284268072698982201"
|
||||
"2021-12-30",1,1,0,1,"/7478911940502018071"
|
||||
"2021-12-30",1,1,0,1,"/6402607186523575652"
|
||||
"2021-12-30",2,2,0,2,"/9962503789684934900"
|
||||
"2021-12-30",8,10,0,10,"/13595620304963848161"
|
||||
"2021-12-30",2,2,0,2,"/17019199732013993436"
|
||||
"2021-12-30",31,31,211,30,"/9874837495456455794"
|
||||
"2021-12-31",4,4,0,4,"/14776416252794997127"
|
||||
"2021-12-31",1,1,0,1,"/8738789417178304429"
|
||||
"2021-12-31",1,1,0,1,"/7445073500314667742"
|
||||
"2021-12-31",1,1,0,1,"/4897404798407749335"
|
||||
"2021-12-31",1,1,45,0,"/11263893625781431659"
|
||||
"2021-12-31",1,1,0,1,"/16478773157730928089"
|
||||
"2021-12-31",1,1,0,1,"/1710995203264225236"
|
||||
"2021-12-31",1,1,0,1,"/14280570555317344651"
|
||||
"2021-12-31",4,5,444,4,"/5284268072698982201"
|
||||
"2021-12-31",2,2,466,1,"/7478911940502018071"
|
||||
"2021-12-31",9,16,1455,15,"/13595620304963848161"
|
||||
"2021-12-31",25,25,88,23,"/9874837495456455794"
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_exit_pages.csv",
|
||||
body: """
|
||||
"date","visitors","exits","exit_page"
|
||||
"2021-12-30",6,6,"/14776416252794997127"
|
||||
"2021-12-30",1,1,"/15455127321321119046"
|
||||
"2021-12-30",1,1,"/9102354072466236765"
|
||||
"2021-12-30",1,1,"/4457889102355683190"
|
||||
"2021-12-30",1,1,"/12105301321223776356"
|
||||
"2021-12-30",1,2,"/1526239929864936398"
|
||||
"2021-12-30",1,1,"/7478911940502018071"
|
||||
"2021-12-30",1,1,"/6402607186523575652"
|
||||
"2021-12-30",2,2,"/9962503789684934900"
|
||||
"2021-12-30",8,10,"/13595620304963848161"
|
||||
"2021-12-30",2,2,"/17019199732013993436"
|
||||
"2021-12-30",32,32,"/9874837495456455794"
|
||||
"2021-12-31",4,4,"/14776416252794997127"
|
||||
"2021-12-31",1,1,"/8738789417178304429"
|
||||
"2021-12-31",1,1,"/7445073500314667742"
|
||||
"2021-12-31",1,1,"/4897404798407749335"
|
||||
"2021-12-31",1,1,"/11263893625781431659"
|
||||
"2021-12-31",1,1,"/16478773157730928089"
|
||||
"2021-12-31",1,1,"/1710995203264225236"
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_locations.csv",
|
||||
body: """
|
||||
"date","country","region","city","visitors","visits","visit_duration","bounces"
|
||||
"2021-12-30","AU","",0,1,1,43,0
|
||||
"2021-12-30","AU","",2078025,3,4,211,3
|
||||
"2021-12-30","AU","",2147714,2,2,0,2
|
||||
"2021-12-30","AU","",2158177,2,2,0,2
|
||||
"2021-12-30","AU","",2174003,1,1,0,1
|
||||
"2021-12-30","BE","",0,1,1,0,1
|
||||
"2021-12-30","BE","",2792196,1,1,0,1
|
||||
"2021-12-30","BR","",0,1,1,0,1
|
||||
"2021-12-30","CA","",0,1,1,0,1
|
||||
"2021-12-30","PL","",0,1,1,0,1
|
||||
"2021-12-30","PL","",756135,1,1,0,1
|
||||
"2021-12-30","US","",0,1,1,0,1
|
||||
"2021-12-30","US","",0,1,1,0,1
|
||||
"2021-12-30","US","",0,1,1,0,1
|
||||
"2021-12-30","US","",0,1,1,0,1
|
||||
"2021-12-30","US","",4063926,1,1,0,1
|
||||
"2021-12-30","US","",4074013,1,3,0,3
|
||||
"2021-12-30","US","",5089478,1,1,0,1
|
||||
"2021-12-31","AU","",2147714,3,3,0,3
|
||||
"2021-12-31","AU","",2158177,2,2,0,2
|
||||
"2021-12-31","CA","",0,1,1,0,1
|
||||
"2021-12-31","IT","",3176959,1,1,85,0
|
||||
"2021-12-31","KR","",1835848,1,1,0,1
|
||||
"2021-12-31","LV","",456172,1,1,0,1
|
||||
"2021-12-31","MX","",3530757,2,3,0,3
|
||||
"2021-12-31","NL","",0,1,1,0,1
|
||||
"2021-12-31","NL","",0,1,2,0,2
|
||||
"2021-12-31","NL","",2745321,1,1,0,1
|
||||
"2021-12-31","NO","",0,1,1,199,0
|
||||
"2021-12-31","SE","",0,1,1,0,1
|
||||
"2021-12-31","SG","",1880252,1,1,0,1
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_operating_systems.csv",
|
||||
body: """
|
||||
"date","operating_system","visitors","visits","visit_duration","bounces"
|
||||
"2021-12-30","Android",25,26,254,24
|
||||
"2021-12-30","Mac",13,16,0,16
|
||||
"2021-12-30","Windows",12,12,75,11
|
||||
"2021-12-30","iOS",30,31,0,31
|
||||
"2021-12-31","Android",15,16,329,13
|
||||
"2021-12-31","Mac",13,19,0,19
|
||||
"2021-12-31","Windows",7,7,496,5
|
||||
"2021-12-31","iOS",40,43,1957,40
|
||||
"2022-01-01","",17,18,0,18
|
||||
"2022-01-01","Android",25,28,32,26
|
||||
"2022-01-01","Chrome OS",1,1,0,1
|
||||
"2022-01-01","Mac",6,6,0,6
|
||||
"2022-01-01","Windows",9,9,1117,7
|
||||
"2022-01-01","iOS",38,40,693,35
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_pages.csv",
|
||||
body: """
|
||||
"date","visitors","pageviews","exits","time_on_page","hostname","page"
|
||||
"2021-12-30",1,1,0,43,"lucky.numbers.com","/14776416252794997127"
|
||||
"2021-12-30",1,1,1,0,"lucky.numbers.com","/14776416252794997127"
|
||||
"2021-12-30",6,6,6,0,"lucky.numbers.com","/14776416252794997127"
|
||||
"2021-12-30",1,1,1,0,"lucky.numbers.com","/9102354072466236765"
|
||||
"2021-12-30",1,1,1,0,"lucky.numbers.com","/7478911940502018071"
|
||||
"2021-12-30",1,1,1,0,"lucky.numbers.com","/6402607186523575652"
|
||||
"2021-12-30",2,2,2,0,"lucky.numbers.com","/9962503789684934900"
|
||||
"2021-12-30",8,10,10,0,"lucky.numbers.com","/13595620304963848161"
|
||||
"2021-12-30",2,2,2,0,"lucky.numbers.com","/17019199732013993436"
|
||||
"2021-12-30",32,33,32,211,"lucky.numbers.com","/9874837495456455794"
|
||||
"2021-12-31",4,4,4,0,"lucky.numbers.com","/14776416252794997127"
|
||||
"2021-12-31",1,1,1,0,"lucky.numbers.com","/8738789417178304429"
|
||||
"2021-12-31",1,1,1,0,"lucky.numbers.com","/7445073500314667742"
|
||||
"2021-12-31",1,1,1,0,"lucky.numbers.com","/4897404798407749335"
|
||||
"2021-12-31",1,2,1,29,"lucky.numbers.com","/11263893625781431659"
|
||||
"2022-01-01",2,2,2,0,"lucky.numbers.com","/5878724061840196349"
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_sources.csv",
|
||||
body: """
|
||||
"date","source","utm_medium","utm_campaign","utm_content","utm_term","visitors","visits","visit_duration","bounces"
|
||||
"2021-12-30","","","","","",25,26,254,24
|
||||
"2021-12-30","Hacker News","referral","","","",2,2,0,2
|
||||
"2021-12-30","Google","organic","","","",20,22,75,21
|
||||
"2021-12-30","Pinterest","referral","","","",25,26,0,26
|
||||
"2021-12-30","baidu","organic","","","",1,1,0,1
|
||||
"2021-12-30","yahoo","organic","","","",3,3,0,3
|
||||
"2021-12-31","","","","","",16,16,199,15
|
||||
"2021-12-31","Bing","organic","","","",1,1,0,1
|
||||
"2021-12-31","DuckDuckGo","organic","","","",1,1,0,1
|
||||
"2021-12-31","Hacker News","referral","","","",1,1,466,0
|
||||
"2021-12-31","Google","organic","","","",25,32,85,31
|
||||
"2021-12-31","Pinterest","referral","","","",22,24,88,22
|
||||
"2021-12-31","yahoo","organic","","","",3,3,1899,1
|
||||
"2022-01-01","","","","","",37,38,1137,35
|
||||
"2022-01-01","Bing","organic","","","",2,2,171,1
|
||||
"2022-01-01","DuckDuckGo","organic","","","",2,3,0,3
|
||||
"2022-01-01","Hacker News","referral","","","",1,1,0,1
|
||||
"2022-01-01","Google","referral","","","",1,1,0,1
|
||||
"2022-01-01","Google","organic","","","",21,23,115,19
|
||||
"2022-01-01","Pinterest","referral","","","",29,30,0,30
|
||||
"2022-01-01","yahoo","organic","","","",3,3,419,2
|
||||
"2022-01-06","","","","","",37,38,430,36
|
||||
"2022-01-06","Bing","organic","","","how lucky am I as UInt64",1,1,0,1
|
||||
"2022-01-06","Bing","organic","","","",3,3,10,2
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_visitors.csv",
|
||||
body: """
|
||||
"date","visitors","pageviews","bounces","visits","visit_duration"
|
||||
"2011-12-25",5,50,2,7,8640
|
||||
"2011-12-26",3,4,2,3,43
|
||||
"2011-12-27",3,6,2,4,2313
|
||||
"2011-12-28",6,30,4,8,2264
|
||||
"2011-12-29",4,8,5,6,136
|
||||
"2011-12-30",1,1,1,1,0
|
||||
"""
|
||||
}
|
||||
]
|
||||
|
||||
connection_opts = MinioContainer.connection_opts(minio)
|
||||
|
||||
on_exit(fn ->
|
||||
keys = Enum.map(csvs, fn csv -> "#{site.id}/#{csv.name}" end)
|
||||
ExAws.request!(ExAws.S3.delete_all_objects(bucket, keys), connection_opts)
|
||||
end)
|
||||
|
||||
uploads =
|
||||
for %{name: name, body: body} <- csvs do
|
||||
key = "#{site.id}/#{name}"
|
||||
ExAws.request!(ExAws.S3.put_object(bucket, key, body), connection_opts)
|
||||
%{"filename" => name, "s3_url" => s3_url(minio, bucket, key)}
|
||||
end
|
||||
|
||||
{:ok, job} =
|
||||
CSVImporter.new_import(
|
||||
site,
|
||||
user,
|
||||
# to satisfy the non null constraints on the table I'm providing "0" dates (according to ClickHouse)
|
||||
start_date: ~D[1970-01-01],
|
||||
end_date: ~D[1970-01-01],
|
||||
uploads: uploads
|
||||
)
|
||||
|
||||
job = Repo.reload!(job)
|
||||
|
||||
assert :ok = Plausible.Workers.ImportAnalytics.perform(job)
|
||||
|
||||
# on successfull import the start and end dates are updated
|
||||
assert %SiteImport{
|
||||
start_date: ~D[2011-12-25],
|
||||
end_date: ~D[2022-01-06],
|
||||
source: :csv,
|
||||
status: :completed
|
||||
} = Repo.get_by!(SiteImport, site_id: site.id)
|
||||
|
||||
assert Plausible.Stats.Clickhouse.imported_pageview_count(site) == 99
|
||||
end
|
||||
|
||||
test "fails on invalid CSV", %{site: site, user: user, bucket: bucket, container: minio} do
|
||||
csvs = [
|
||||
%{
|
||||
name: "imported_browsers.csv",
|
||||
body: """
|
||||
"date","browser","visitors","visits","visit_duration","bounces"
|
||||
"2021-12-30","Amazon Silk",2,2,0,2
|
||||
"2021-12-30","Chrome",31,32,329,29
|
||||
"2021-12-30","Edge",3,3,0,3
|
||||
"2021-12-30","Firefox",1,1,0,1
|
||||
"2021-12-30","Internet Explorer",1,1,0,1
|
||||
"2021-12-30","Mobile App",2,2,0,2
|
||||
"2021-12-31","Mobile App",4,4,0,4
|
||||
"""
|
||||
},
|
||||
%{
|
||||
name: "imported_devices.csv",
|
||||
body: """
|
||||
"date","device","visitors","visit_duration","bounces"
|
||||
"2021-12-30","Desktop",28,ehhhh....
|
||||
"""
|
||||
}
|
||||
]
|
||||
|
||||
connection_opts = MinioContainer.connection_opts(minio)
|
||||
|
||||
on_exit(fn ->
|
||||
keys = Enum.map(csvs, fn csv -> "#{site.id}/#{csv.name}" end)
|
||||
ExAws.request!(ExAws.S3.delete_all_objects(bucket, keys), connection_opts)
|
||||
end)
|
||||
|
||||
uploads =
|
||||
for %{name: name, body: body} <- csvs do
|
||||
key = "#{site.id}/#{name}"
|
||||
ExAws.request!(ExAws.S3.put_object(bucket, key, body), connection_opts)
|
||||
%{"filename" => name, "s3_url" => s3_url(minio, bucket, key)}
|
||||
end
|
||||
|
||||
{:ok, job} =
|
||||
CSVImporter.new_import(
|
||||
site,
|
||||
user,
|
||||
start_date: ~D[1970-01-01],
|
||||
end_date: ~D[1970-01-01],
|
||||
uploads: uploads
|
||||
)
|
||||
|
||||
job = Repo.reload!(job)
|
||||
|
||||
assert {:discard, message} = Plausible.Workers.ImportAnalytics.perform(job)
|
||||
assert message =~ "CANNOT_PARSE_INPUT_ASSERTION_FAILED"
|
||||
|
||||
assert %SiteImport{id: import_id, source: :csv, status: :failed} =
|
||||
Repo.get_by!(SiteImport, site_id: site.id)
|
||||
|
||||
# ensure no browser left behind
|
||||
imported_browsers_q = from b in "imported_browsers", where: b.import_id == ^import_id
|
||||
assert await_clickhouse_count(imported_browsers_q, 0)
|
||||
end
|
||||
end
|
||||
|
||||
defp s3_url(minio, bucket, key) do
|
||||
port = minio |> MinioContainer.connection_opts() |> Keyword.fetch!(:port)
|
||||
Path.join(["http://172.17.0.1:#{port}", bucket, key])
|
||||
end
|
||||
end
|
||||
|
|
@ -6,8 +6,8 @@ Ecto.Adapters.SQL.Sandbox.mode(Plausible.Repo, :manual)
|
|||
|
||||
if Mix.env() == :small_test do
|
||||
IO.puts("Test mode: SMALL")
|
||||
ExUnit.configure(exclude: [:slow, :full_build_only])
|
||||
ExUnit.configure(exclude: [:slow, :minio, :full_build_only])
|
||||
else
|
||||
IO.puts("Test mode: FULL")
|
||||
ExUnit.configure(exclude: [:slow, :small_build_only])
|
||||
ExUnit.configure(exclude: [:slow, :minio, :small_build_only])
|
||||
end
|
||||
|
|
|
|||
Loading…
Reference in New Issue