CLICKHOUSE_DEFAULT_STORAGE_POLICY env (#4851)

In production we use `storage_policy = 'tiered'` by default but this is
not reflected in any migrations.

This change fixes that by introducing a new environment variable and
plumbing to be used in new (and old) migrations

Tested via setting env, doing `mix ecto.drop; mix ecto.create; mix ecto.migrate` and
checking resulting table schemas.
This commit is contained in:
Karl-Aksel Puulmann 2024-11-26 12:54:17 +02:00 committed by GitHub
parent 95471c0085
commit 2112feee88
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 95 additions and 30 deletions

View File

@ -560,6 +560,9 @@ config :plausible, Plausible.IngestRepo,
pool_size: ingest_pool_size, pool_size: ingest_pool_size,
settings: [ settings: [
materialized_views_ignore_errors: 1 materialized_views_ignore_errors: 1
],
table_settings: [
storage_policy: get_var_from_path_or_env(config_dir, "CLICKHOUSE_DEFAULT_STORAGE_POLICY")
] ]
config :plausible, Plausible.AsyncInsertRepo, config :plausible, Plausible.AsyncInsertRepo,

View File

@ -25,6 +25,7 @@ defmodule Plausible.DataMigration.AcquisitionChannel do
"acquisition_channel_functions", "acquisition_channel_functions",
[ [
on_cluster_statement: on_cluster_statement, on_cluster_statement: on_cluster_statement,
table_settings: Plausible.MigrationUtils.table_settings_expr(),
dictionary_connection_params: Plausible.MigrationUtils.dictionary_connection_params(), dictionary_connection_params: Plausible.MigrationUtils.dictionary_connection_params(),
insert_quorum: insert_quorum insert_quorum: insert_quorum
], ],

View File

@ -91,7 +91,12 @@ defmodule Plausible.DataMigration.LocationsSync do
cluster? = Plausible.IngestRepo.clustered_table?("sessions_v2") cluster? = Plausible.IngestRepo.clustered_table?("sessions_v2")
{:ok, _} = run_sql("truncate-location-data-table", cluster?: cluster?) {:ok, _} = run_sql("truncate-location-data-table", cluster?: cluster?)
{:ok, _} = run_sql("create-location-data-table", cluster?: cluster?)
{:ok, _} =
run_sql("create-location-data-table",
cluster?: cluster?,
table_settings: Plausible.MigrationUtils.table_settings_expr(:suffix)
)
countries = countries =
Location.Country.all() Location.Country.all()

View File

@ -18,8 +18,6 @@ defmodule Plausible.DataMigration.NumericIDs do
end end
end end
@table_settings "SETTINGS index_granularity = 8192"
def run(opts \\ []) do def run(opts \\ []) do
interactive? = Keyword.get(opts, :interactive?, true) interactive? = Keyword.get(opts, :interactive?, true)
@ -34,7 +32,7 @@ defmodule Plausible.DataMigration.NumericIDs do
table_settings = table_settings =
Keyword.get(opts, :table_settings) || System.get_env("NUMERIC_IDS_TABLE_SETTINGS") || Keyword.get(opts, :table_settings) || System.get_env("NUMERIC_IDS_TABLE_SETTINGS") ||
@table_settings Plausible.MigrationUtils.table_settings_expr()
start_from = start_from =
Keyword.get(opts, :start_from) || System.get_env("NUMERIC_IDS_PARTITION_START_FROM") Keyword.get(opts, :start_from) || System.get_env("NUMERIC_IDS_PARTITION_START_FROM")

View File

@ -21,4 +21,23 @@ defmodule Plausible.MigrationUtils do
|> Enum.reject(&is_nil/1) |> Enum.reject(&is_nil/1)
|> Enum.join(" ") |> Enum.join(" ")
end end
def table_settings() do
IngestRepo.config()
|> Keyword.get(:table_settings)
|> Enum.reject(fn {_, v} -> is_nil(v) end)
end
def table_settings_expr(type \\ :prefix) do
expr = Enum.map_join(table_settings(), ", ", fn {k, v} -> "#{k} = #{encode(v)}" end)
case {table_settings(), type} do
{[], _} -> ""
{_, :prefix} -> "SETTINGS #{expr}"
{_, :suffix} -> ", #{expr}"
end
end
defp encode(value) when is_number(value), do: value
defp encode(value) when is_binary(value), do: "'#{value}'"
end end

View File

@ -9,7 +9,8 @@ ENGINE = ReplicatedMergeTree('/clickhouse/{cluster}/tables/{shard}/plausible_pro
<% else %> <% else %>
ENGINE = MergeTree() ENGINE = MergeTree()
<% end %> <% end %>
ORDER BY referrer_source; ORDER BY referrer_source
<%= @table_settings %>;
TRUNCATE TABLE acquisition_channel_source_category SETTINGS alter_sync=2; TRUNCATE TABLE acquisition_channel_source_category SETTINGS alter_sync=2;
@ -41,7 +42,8 @@ ENGINE = ReplicatedMergeTree('/clickhouse/{cluster}/tables/{shard}/plausible_pro
<% else %> <% else %>
ENGINE = MergeTree() ENGINE = MergeTree()
<% end %> <% end %>
ORDER BY referrer_source; ORDER BY referrer_source
<%= @table_settings %>;
TRUNCATE TABLE acquisition_channel_paid_sources SETTINGS alter_sync=2; TRUNCATE TABLE acquisition_channel_paid_sources SETTINGS alter_sync=2;

View File

@ -10,4 +10,4 @@ ENGINE = ReplicatedMergeTree('/clickhouse/{cluster}/tables/{shard}/plausible_pro
ENGINE = MergeTree() ENGINE = MergeTree()
<% end %> <% end %>
ORDER BY (type, id) ORDER BY (type, id)
SETTINGS index_granularity = 128 SETTINGS index_granularity = 128 <%= @table_settings %>

View File

@ -10,8 +10,11 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateEventsAndSessions do
create_if_not_exists table(:events, create_if_not_exists table(:events,
primary_key: false, primary_key: false,
engine: "MergeTree", engine: "MergeTree",
options: options: """
"PARTITION BY toYYYYMM(timestamp) ORDER BY (domain, toDate(timestamp), user_id) SETTINGS index_granularity = 8192" PARTITION BY toYYYYMM(timestamp)
ORDER BY (domain, toDate(timestamp), user_id)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do ) do
add(:name, :string) add(:name, :string)
add(:domain, :string) add(:domain, :string)
@ -34,8 +37,11 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateEventsAndSessions do
create_if_not_exists table(:sessions, create_if_not_exists table(:sessions,
primary_key: false, primary_key: false,
engine: "CollapsingMergeTree(sign)", engine: "CollapsingMergeTree(sign)",
options: options: """
"PARTITION BY toYYYYMM(start) ORDER BY (domain, toDate(start), user_id, session_id) SETTINGS index_granularity = 8192" PARTITION BY toYYYYMM(start)
ORDER BY (domain, toDate(start), user_id, session_id)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do ) do
add(:session_id, :UInt64) add(:session_id, :UInt64)
add(:sign, :Int8) add(:sign, :Int8)

View File

@ -5,7 +5,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_visitors, create_if_not_exists table(:imported_visitors,
primary_key: false, primary_key: false,
engine: "MergeTree", engine: "MergeTree",
options: "ORDER BY (site_id, date)" options: """
ORDER BY (site_id, date)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do ) do
add(:site_id, :UInt64) add(:site_id, :UInt64)
add(:date, :date) add(:date, :date)
@ -19,7 +22,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_sources, create_if_not_exists table(:imported_sources,
primary_key: false, primary_key: false,
engine: "MergeTree", engine: "MergeTree",
options: "ORDER BY (site_id, date, source)" options: """
ORDER BY (site_id, date, source)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do ) do
add(:site_id, :UInt64) add(:site_id, :UInt64)
add(:date, :date) add(:date, :date)
@ -37,7 +43,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_pages, create_if_not_exists table(:imported_pages,
primary_key: false, primary_key: false,
engine: "MergeTree", engine: "MergeTree",
options: "ORDER BY (site_id, date, hostname, page)" options: """
ORDER BY (site_id, date, hostname, page)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do ) do
add(:site_id, :UInt64) add(:site_id, :UInt64)
add(:date, :date) add(:date, :date)
@ -52,7 +61,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_entry_pages, create_if_not_exists table(:imported_entry_pages,
primary_key: false, primary_key: false,
engine: "MergeTree", engine: "MergeTree",
options: "ORDER BY (site_id, date, entry_page)" options: """
ORDER BY (site_id, date, entry_page)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do ) do
add(:site_id, :UInt64) add(:site_id, :UInt64)
add(:date, :date) add(:date, :date)
@ -66,7 +78,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_exit_pages, create_if_not_exists table(:imported_exit_pages,
primary_key: false, primary_key: false,
engine: "MergeTree", engine: "MergeTree",
options: "ORDER BY (site_id, date, exit_page)" options: """
ORDER BY (site_id, date, exit_page)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do ) do
add(:site_id, :UInt64) add(:site_id, :UInt64)
add(:date, :date) add(:date, :date)
@ -78,7 +93,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_locations, create_if_not_exists table(:imported_locations,
primary_key: false, primary_key: false,
engine: "MergeTree", engine: "MergeTree",
options: "ORDER BY (site_id, date, country, region, city)" options: """
ORDER BY (site_id, date, country, region, city)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do ) do
add(:site_id, :UInt64) add(:site_id, :UInt64)
add(:date, :date) add(:date, :date)
@ -94,7 +112,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_devices, create_if_not_exists table(:imported_devices,
primary_key: false, primary_key: false,
engine: "MergeTree", engine: "MergeTree",
options: "ORDER BY (site_id, date, device)" options: """
ORDER BY (site_id, date, device)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do ) do
add(:site_id, :UInt64) add(:site_id, :UInt64)
add(:date, :date) add(:date, :date)
@ -108,7 +129,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_browsers, create_if_not_exists table(:imported_browsers,
primary_key: false, primary_key: false,
engine: "MergeTree", engine: "MergeTree",
options: "ORDER BY (site_id, date, browser)" options: """
ORDER BY (site_id, date, browser)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do ) do
add(:site_id, :UInt64) add(:site_id, :UInt64)
add(:date, :date) add(:date, :date)
@ -122,7 +146,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_operating_systems, create_if_not_exists table(:imported_operating_systems,
primary_key: false, primary_key: false,
engine: "MergeTree", engine: "MergeTree",
options: "ORDER BY (site_id, date, operating_system)" options: """
ORDER BY (site_id, date, operating_system)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do ) do
add(:site_id, :UInt64) add(:site_id, :UInt64)
add(:date, :date) add(:date, :date)

View File

@ -5,8 +5,10 @@ defmodule Plausible.IngestRepo.Migrations.CreateIngestCountersTable do
create_if_not_exists table(:ingest_counters, create_if_not_exists table(:ingest_counters,
primary_key: false, primary_key: false,
engine: "SummingMergeTree(value)", engine: "SummingMergeTree(value)",
options: options: """
"ORDER BY (domain, toDate(event_timebucket), metric, toStartOfMinute(event_timebucket))" ORDER BY (domain, toDate(event_timebucket), metric, toStartOfMinute(event_timebucket))
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do ) do
add(:event_timebucket, :utc_datetime) add(:event_timebucket, :utc_datetime)
add(:domain, :"LowCardinality(String)") add(:domain, :"LowCardinality(String)")

View File

@ -1,10 +1,10 @@
defmodule Plausible.IngestRepo.Migrations.CreateV2Schemas do defmodule Plausible.IngestRepo.Migrations.CreateV2Schemas do
@moduledoc """ @moduledoc """
Normally, for live environments the migration will be done via Normally, for live environments the migration will be done via
`DataMigration.NumericIDs` module. `DataMigration.NumericIDs` module.
For tests, and entirely new small, self-hosted instances however, For tests, and entirely new small, self-hosted instances however,
we want to keep the ability of preparing the database without enforcing we want to keep the ability of preparing the database without enforcing
any _data_ migration. any _data_ migration.
""" """
@ -13,11 +13,12 @@ defmodule Plausible.IngestRepo.Migrations.CreateV2Schemas do
use Plausible.DataMigration, dir: "NumericIDs" use Plausible.DataMigration, dir: "NumericIDs"
@cluster? false @cluster? false
@settings "SETTINGS index_granularity = 8192"
def up do def up do
execute unwrap("create-events-v2", table_settings: @settings, cluster?: @cluster?) table_settings = Plausible.MigrationUtils.table_settings_expr()
execute unwrap("create-sessions-v2", table_settings: @settings, cluster?: @cluster?)
execute unwrap("create-events-v2", table_settings: table_settings, cluster?: @cluster?)
execute unwrap("create-sessions-v2", table_settings: table_settings, cluster?: @cluster?)
end end
def down do def down do

View File

@ -4,19 +4,20 @@ defmodule Plausible.IngestRepo.Migrations.AddImportedCustomEvents do
def change do def change do
# NOTE: Using another table for determining cluster presence # NOTE: Using another table for determining cluster presence
on_cluster = Plausible.MigrationUtils.on_cluster_statement("imported_pages") on_cluster = Plausible.MigrationUtils.on_cluster_statement("imported_pages")
table_settings = Plausible.MigrationUtils.table_settings_expr(:suffix)
settings = settings =
if Plausible.IngestRepo.clustered_table?("imported_pages") do if Plausible.IngestRepo.clustered_table?("imported_pages") do
""" """
ENGINE = ReplicatedMergeTree('/clickhouse/{cluster}/tables/{shard}/{database}/imported_custom_events', '{replica}') ENGINE = ReplicatedMergeTree('/clickhouse/{cluster}/tables/{shard}/{database}/imported_custom_events', '{replica}')
ORDER BY (site_id, import_id, date, name) ORDER BY (site_id, import_id, date, name)
SETTINGS replicated_deduplication_window = 0, storage_policy = 'tiered' SETTINGS replicated_deduplication_window = 0 #{table_settings}
""" """
else else
""" """
ENGINE = MergeTree() ENGINE = MergeTree()
ORDER BY (site_id, import_id, date, name) ORDER BY (site_id, import_id, date, name)
SETTINGS replicated_deduplication_window = 0 SETTINGS replicated_deduplication_window = 0 #{table_settings}
""" """
end end