CLICKHOUSE_DEFAULT_STORAGE_POLICY env (#4851)

In production we use `storage_policy = 'tiered'` by default but this is
not reflected in any migrations.

This change fixes that by introducing a new environment variable and
plumbing to be used in new (and old) migrations

Tested via setting env, doing `mix ecto.drop; mix ecto.create; mix ecto.migrate` and
checking resulting table schemas.
This commit is contained in:
Karl-Aksel Puulmann 2024-11-26 12:54:17 +02:00 committed by GitHub
parent 95471c0085
commit 2112feee88
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 95 additions and 30 deletions

View File

@ -560,6 +560,9 @@ config :plausible, Plausible.IngestRepo,
pool_size: ingest_pool_size,
settings: [
materialized_views_ignore_errors: 1
],
table_settings: [
storage_policy: get_var_from_path_or_env(config_dir, "CLICKHOUSE_DEFAULT_STORAGE_POLICY")
]
config :plausible, Plausible.AsyncInsertRepo,

View File

@ -25,6 +25,7 @@ defmodule Plausible.DataMigration.AcquisitionChannel do
"acquisition_channel_functions",
[
on_cluster_statement: on_cluster_statement,
table_settings: Plausible.MigrationUtils.table_settings_expr(),
dictionary_connection_params: Plausible.MigrationUtils.dictionary_connection_params(),
insert_quorum: insert_quorum
],

View File

@ -91,7 +91,12 @@ defmodule Plausible.DataMigration.LocationsSync do
cluster? = Plausible.IngestRepo.clustered_table?("sessions_v2")
{:ok, _} = run_sql("truncate-location-data-table", cluster?: cluster?)
{:ok, _} = run_sql("create-location-data-table", cluster?: cluster?)
{:ok, _} =
run_sql("create-location-data-table",
cluster?: cluster?,
table_settings: Plausible.MigrationUtils.table_settings_expr(:suffix)
)
countries =
Location.Country.all()

View File

@ -18,8 +18,6 @@ defmodule Plausible.DataMigration.NumericIDs do
end
end
@table_settings "SETTINGS index_granularity = 8192"
def run(opts \\ []) do
interactive? = Keyword.get(opts, :interactive?, true)
@ -34,7 +32,7 @@ defmodule Plausible.DataMigration.NumericIDs do
table_settings =
Keyword.get(opts, :table_settings) || System.get_env("NUMERIC_IDS_TABLE_SETTINGS") ||
@table_settings
Plausible.MigrationUtils.table_settings_expr()
start_from =
Keyword.get(opts, :start_from) || System.get_env("NUMERIC_IDS_PARTITION_START_FROM")

View File

@ -21,4 +21,23 @@ defmodule Plausible.MigrationUtils do
|> Enum.reject(&is_nil/1)
|> Enum.join(" ")
end
def table_settings() do
IngestRepo.config()
|> Keyword.get(:table_settings)
|> Enum.reject(fn {_, v} -> is_nil(v) end)
end
def table_settings_expr(type \\ :prefix) do
expr = Enum.map_join(table_settings(), ", ", fn {k, v} -> "#{k} = #{encode(v)}" end)
case {table_settings(), type} do
{[], _} -> ""
{_, :prefix} -> "SETTINGS #{expr}"
{_, :suffix} -> ", #{expr}"
end
end
defp encode(value) when is_number(value), do: value
defp encode(value) when is_binary(value), do: "'#{value}'"
end

View File

@ -9,7 +9,8 @@ ENGINE = ReplicatedMergeTree('/clickhouse/{cluster}/tables/{shard}/plausible_pro
<% else %>
ENGINE = MergeTree()
<% end %>
ORDER BY referrer_source;
ORDER BY referrer_source
<%= @table_settings %>;
TRUNCATE TABLE acquisition_channel_source_category SETTINGS alter_sync=2;
@ -41,7 +42,8 @@ ENGINE = ReplicatedMergeTree('/clickhouse/{cluster}/tables/{shard}/plausible_pro
<% else %>
ENGINE = MergeTree()
<% end %>
ORDER BY referrer_source;
ORDER BY referrer_source
<%= @table_settings %>;
TRUNCATE TABLE acquisition_channel_paid_sources SETTINGS alter_sync=2;

View File

@ -10,4 +10,4 @@ ENGINE = ReplicatedMergeTree('/clickhouse/{cluster}/tables/{shard}/plausible_pro
ENGINE = MergeTree()
<% end %>
ORDER BY (type, id)
SETTINGS index_granularity = 128
SETTINGS index_granularity = 128 <%= @table_settings %>

View File

@ -10,8 +10,11 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateEventsAndSessions do
create_if_not_exists table(:events,
primary_key: false,
engine: "MergeTree",
options:
"PARTITION BY toYYYYMM(timestamp) ORDER BY (domain, toDate(timestamp), user_id) SETTINGS index_granularity = 8192"
options: """
PARTITION BY toYYYYMM(timestamp)
ORDER BY (domain, toDate(timestamp), user_id)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do
add(:name, :string)
add(:domain, :string)
@ -34,8 +37,11 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateEventsAndSessions do
create_if_not_exists table(:sessions,
primary_key: false,
engine: "CollapsingMergeTree(sign)",
options:
"PARTITION BY toYYYYMM(start) ORDER BY (domain, toDate(start), user_id, session_id) SETTINGS index_granularity = 8192"
options: """
PARTITION BY toYYYYMM(start)
ORDER BY (domain, toDate(start), user_id, session_id)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do
add(:session_id, :UInt64)
add(:sign, :Int8)

View File

@ -5,7 +5,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_visitors,
primary_key: false,
engine: "MergeTree",
options: "ORDER BY (site_id, date)"
options: """
ORDER BY (site_id, date)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do
add(:site_id, :UInt64)
add(:date, :date)
@ -19,7 +22,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_sources,
primary_key: false,
engine: "MergeTree",
options: "ORDER BY (site_id, date, source)"
options: """
ORDER BY (site_id, date, source)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do
add(:site_id, :UInt64)
add(:date, :date)
@ -37,7 +43,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_pages,
primary_key: false,
engine: "MergeTree",
options: "ORDER BY (site_id, date, hostname, page)"
options: """
ORDER BY (site_id, date, hostname, page)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do
add(:site_id, :UInt64)
add(:date, :date)
@ -52,7 +61,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_entry_pages,
primary_key: false,
engine: "MergeTree",
options: "ORDER BY (site_id, date, entry_page)"
options: """
ORDER BY (site_id, date, entry_page)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do
add(:site_id, :UInt64)
add(:date, :date)
@ -66,7 +78,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_exit_pages,
primary_key: false,
engine: "MergeTree",
options: "ORDER BY (site_id, date, exit_page)"
options: """
ORDER BY (site_id, date, exit_page)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do
add(:site_id, :UInt64)
add(:date, :date)
@ -78,7 +93,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_locations,
primary_key: false,
engine: "MergeTree",
options: "ORDER BY (site_id, date, country, region, city)"
options: """
ORDER BY (site_id, date, country, region, city)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do
add(:site_id, :UInt64)
add(:date, :date)
@ -94,7 +112,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_devices,
primary_key: false,
engine: "MergeTree",
options: "ORDER BY (site_id, date, device)"
options: """
ORDER BY (site_id, date, device)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do
add(:site_id, :UInt64)
add(:date, :date)
@ -108,7 +129,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_browsers,
primary_key: false,
engine: "MergeTree",
options: "ORDER BY (site_id, date, browser)"
options: """
ORDER BY (site_id, date, browser)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do
add(:site_id, :UInt64)
add(:date, :date)
@ -122,7 +146,10 @@ defmodule Plausible.ClickhouseRepo.Migrations.CreateImportedVisitors do
create_if_not_exists table(:imported_operating_systems,
primary_key: false,
engine: "MergeTree",
options: "ORDER BY (site_id, date, operating_system)"
options: """
ORDER BY (site_id, date, operating_system)
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do
add(:site_id, :UInt64)
add(:date, :date)

View File

@ -5,8 +5,10 @@ defmodule Plausible.IngestRepo.Migrations.CreateIngestCountersTable do
create_if_not_exists table(:ingest_counters,
primary_key: false,
engine: "SummingMergeTree(value)",
options:
"ORDER BY (domain, toDate(event_timebucket), metric, toStartOfMinute(event_timebucket))"
options: """
ORDER BY (domain, toDate(event_timebucket), metric, toStartOfMinute(event_timebucket))
#{Plausible.MigrationUtils.table_settings_expr()}
"""
) do
add(:event_timebucket, :utc_datetime)
add(:domain, :"LowCardinality(String)")

View File

@ -1,10 +1,10 @@
defmodule Plausible.IngestRepo.Migrations.CreateV2Schemas do
@moduledoc """
Normally, for live environments the migration will be done via
`DataMigration.NumericIDs` module.
`DataMigration.NumericIDs` module.
For tests, and entirely new small, self-hosted instances however,
we want to keep the ability of preparing the database without enforcing
For tests, and entirely new small, self-hosted instances however,
we want to keep the ability of preparing the database without enforcing
any _data_ migration.
"""
@ -13,11 +13,12 @@ defmodule Plausible.IngestRepo.Migrations.CreateV2Schemas do
use Plausible.DataMigration, dir: "NumericIDs"
@cluster? false
@settings "SETTINGS index_granularity = 8192"
def up do
execute unwrap("create-events-v2", table_settings: @settings, cluster?: @cluster?)
execute unwrap("create-sessions-v2", table_settings: @settings, cluster?: @cluster?)
table_settings = Plausible.MigrationUtils.table_settings_expr()
execute unwrap("create-events-v2", table_settings: table_settings, cluster?: @cluster?)
execute unwrap("create-sessions-v2", table_settings: table_settings, cluster?: @cluster?)
end
def down do

View File

@ -4,19 +4,20 @@ defmodule Plausible.IngestRepo.Migrations.AddImportedCustomEvents do
def change do
# NOTE: Using another table for determining cluster presence
on_cluster = Plausible.MigrationUtils.on_cluster_statement("imported_pages")
table_settings = Plausible.MigrationUtils.table_settings_expr(:suffix)
settings =
if Plausible.IngestRepo.clustered_table?("imported_pages") do
"""
ENGINE = ReplicatedMergeTree('/clickhouse/{cluster}/tables/{shard}/{database}/imported_custom_events', '{replica}')
ORDER BY (site_id, import_id, date, name)
SETTINGS replicated_deduplication_window = 0, storage_policy = 'tiered'
SETTINGS replicated_deduplication_window = 0 #{table_settings}
"""
else
"""
ENGINE = MergeTree()
ORDER BY (site_id, import_id, date, name)
SETTINGS replicated_deduplication_window = 0
SETTINGS replicated_deduplication_window = 0 #{table_settings}
"""
end