Save city name when importing from GA (#2608)

This commit adds city data to imported records from Google Analytics. The
current implementation sets city to 0 because GA does not use the GeoNames
database.

Google Analytics Reporting API uses [Geographical IDs](https://developers.google.com/analytics/devguides/collection/protocol/v1/geoid)
to identify cities and countries. Plausible uses
[GeoNames](https://geonames.org/) and I couldn't find databases corelating the
two.

Fortunately, GA also returns the city name and this commit uses the city name
and the country ISO code to find the Geoname ID. To avoid making expensive ETS
searches, I created another ETS table in the Location library that uses
{country, city} as a key.

Related PR: https://github.com/plausible/location/pull/3
This commit is contained in:
Vini Brasil 2023-02-14 09:32:18 -03:00 committed by GitHub
parent 8fcf4d3304
commit 1cb07efe6d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 94 additions and 9 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -57,7 +57,7 @@ defmodule Plausible.Google.ReportRequest do
},
%__MODULE__{
dataset: "imported_locations",
dimensions: ["ga:date", "ga:countryIsoCode", "ga:regionIsoCode"],
dimensions: ["ga:date", "ga:countryIsoCode", "ga:regionIsoCode", "ga:city"],
metrics: ["ga:users", "ga:sessions", "ga:bounces", "ga:sessionDuration"]
},
%__MODULE__{

View File

@ -98,12 +98,16 @@ defmodule Plausible.Imported do
end
defp new_from_google_analytics(site_id, "imported_locations", row) do
country_code = row.dimensions |> Map.fetch!("ga:countryIsoCode") |> default_if_missing("")
city_name = row.dimensions |> Map.fetch!("ga:city") |> default_if_missing("")
city_data = Location.get_city(city_name, country_code)
%{
site_id: site_id,
date: get_date(row),
country: row.dimensions |> Map.fetch!("ga:countryIsoCode") |> default_if_missing(""),
country: country_code,
region: row.dimensions |> Map.fetch!("ga:regionIsoCode") |> default_if_missing(""),
city: 0,
city: city_data && city_data.id,
visitors: row.metrics |> Map.fetch!("ga:users") |> parse_number(),
visits: row.metrics |> Map.fetch!("ga:sessions") |> parse_number(),
bounces: row.metrics |> Map.fetch!("ga:bounces") |> parse_number(),

View File

@ -1,5 +1,6 @@
defmodule Plausible.Workers.ImportGoogleAnalytics do
use Plausible.Repo
require Logger
use Oban.Worker,
queue: :google_analytics_imports,
@ -41,6 +42,7 @@ defmodule Plausible.Workers.ImportGoogleAnalytics do
:ok
{:error, error} ->
Logger.error("Import: Failed to import from GA. Reason: #{inspect(error)}")
import_failed(site)
{:error, error}

View File

@ -64,7 +64,7 @@
"jsx": {:hex, :jsx, "2.8.3", "a05252d381885240744d955fbe3cf810504eb2567164824e19303ea59eef62cf", [:mix, :rebar3], [], "hexpm", "fc3499fed7a726995aa659143a248534adc754ebd16ccd437cd93b649a95091f"},
"jumper": {:hex, :jumper, "1.0.1", "3c00542ef1a83532b72269fab9f0f0c82bf23a35e27d278bfd9ed0865cecabff", [:mix], [], "hexpm", "318c59078ac220e966d27af3646026db9b5a5e6703cb2aa3e26bcfaba65b7433"},
"kaffy": {:hex, :kaffy, "0.9.0", "bef34c9729f6a3af4d0dea8eede8bcb9e11371a83ac9a8b393991bce81839517", [:mix], [{:ecto, "~> 3.0", [hex: :ecto, repo: "hexpm", optional: false]}, {:phoenix, "~> 1.4", [hex: :phoenix, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 2.11", [hex: :phoenix_html, repo: "hexpm", optional: false]}], "hexpm", "d18ff57b8e68feb433aed11e71510cd357abc7034e75358af5deff7d0d4c6ed3"},
"location": {:git, "https://github.com/plausible/location.git", "8faf4f08b06905adde43554dc1d9d35675654816", []},
"location": {:git, "https://github.com/plausible/location.git", "b74d65e06b4613cc43362d16b532f27774a8bbce", []},
"locus": {:hex, :locus, "2.3.6", "c9f53fd5df872fca66a54dc0aa2f8b2d3640388e56a0c39a741be0df6d8854bf", [:rebar3], [{:tls_certificate_check, "~> 1.9", [hex: :tls_certificate_check, repo: "hexpm", optional: false]}], "hexpm", "6087aa9a69673e7011837fb4b3d7f756560adde76892c32f5f93904ee30064e2"},
"makeup": {:hex, :makeup, "1.1.0", "6b67c8bc2882a6b6a445859952a602afc1a41c2e08379ca057c0f525366fc3ca", [:mix], [{:nimble_parsec, "~> 1.2.2 or ~> 1.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "0a45ed501f4a8897f580eabf99a2e5234ea3e75a4373c8a52824f6e873be57a6"},
"makeup_elixir": {:hex, :makeup_elixir, "0.16.0", "f8c570a0d33f8039513fbccaf7108c5d750f47d8defd44088371191b76492b0b", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.2.3", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "28b2cbdc13960a46ae9a8858c4bebdec3c9a6d7b4b9e7f4ed1502f8159f338e7"},

View File

@ -0,0 +1,9 @@
defmodule Plausible.ClickhouseRepo.Migrations.RemoveCityNameToImportedLocations do
use Ecto.Migration
def change do
alter table(:imported_locations) do
remove :city_name
end
end
end

View File

@ -14,7 +14,7 @@ user = Plausible.Factory.insert(:user, email: "user@plausible.test", password: "
site = Plausible.Factory.insert(:site, domain: "dummy.site")
membership = Plausible.Factory.insert(:site_membership, user: user, site: site, role: :owner)
_membership = Plausible.Factory.insert(:site_membership, user: user, site: site, role: :owner)
put_random_time = fn date ->
random_time = Time.new!(:rand.uniform(23), :rand.uniform(59), 0)

View File

@ -756,7 +756,7 @@ defmodule Plausible.ImportedTest do
]
end
test "Location data imported from Google Analytics", %{conn: conn, site: site} do
test "imports city data from Google Analytics", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,
country_code: "EE",
@ -777,6 +777,7 @@ defmodule Plausible.ImportedTest do
%{
dimensions: %{
"ga:countryIsoCode" => "EE",
"ga:city" => "Tartu",
"ga:date" => "20210101",
"ga:regionIsoCode" => "Tartumaa"
},
@ -790,6 +791,75 @@ defmodule Plausible.ImportedTest do
%{
dimensions: %{
"ga:countryIsoCode" => "GB",
"ga:city" => "Edinburgh",
"ga:date" => "20210101",
"ga:regionIsoCode" => "Midlothian"
},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "10",
"ga:sessions" => "1",
"ga:users" => "1"
}
}
],
site.id,
"imported_locations"
)
conn =
get(
conn,
"/api/stats/#{site.domain}/cities?period=day&date=2021-01-01&with_imported=true"
)
assert json_response(conn, 200) == [
%{"code" => 588_335, "name" => "Tartu", "visitors" => 1, "country_flag" => "🇪🇪"},
%{
"code" => 2_650_225,
"name" => "Edinburgh",
"visitors" => 1,
"country_flag" => "🇬🇧"
}
]
end
test "imports country data from Google Analytics", %{conn: conn, site: site} do
populate_stats(site, [
build(:pageview,
country_code: "EE",
timestamp: ~N[2021-01-01 00:15:00]
),
build(:pageview,
country_code: "EE",
timestamp: ~N[2021-01-01 00:15:00]
),
build(:pageview,
country_code: "GB",
timestamp: ~N[2021-01-01 00:15:00]
)
])
import_data(
[
%{
dimensions: %{
"ga:countryIsoCode" => "EE",
"ga:city" => "Tartu",
"ga:date" => "20210101",
"ga:regionIsoCode" => "Tartumaa"
},
metrics: %{
"ga:bounces" => "0",
"ga:sessionDuration" => "10",
"ga:sessions" => "1",
"ga:users" => "1"
}
},
%{
dimensions: %{
"ga:countryIsoCode" => "GB",
"ga:city" => "Edinburgh",
"ga:date" => "20210101",
"ga:regionIsoCode" => "Midlothian"
},